mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 11:36:42 +01:00
implement heatmap calculation
This commit is contained in:
12
README.md
12
README.md
@@ -27,11 +27,13 @@ The 16 bit dos unpacker also uses some variations. (`upkr --x86`)
|
||||
```
|
||||
upkr [-l level(0-9)] [config options] <infile> [<outfile>]
|
||||
upkr -u [config options] <infile> [<outfile>]
|
||||
upkr --heatmap [config options] <infile> [<outfile>]
|
||||
upkr --margin [config options] <infile>
|
||||
|
||||
-l, --level N compression level 0-9
|
||||
-0, ..., -9 short form for setting compression level
|
||||
-u, --unpack unpack infile
|
||||
--heatmap calculate heatmap from compressed file
|
||||
--margin calculate margin for overlapped unpacking of a packed file
|
||||
|
||||
Config presets for specific unpackers:
|
||||
@@ -56,3 +58,13 @@ Config options to tailor output to specific optimized unpackers:
|
||||
--max-offset N
|
||||
--max-length N
|
||||
```
|
||||
|
||||
## Heatmap
|
||||
|
||||
By default, the `--heatmap` flag writes out the heatmap data as a binary file. The heatmap file is
|
||||
the same size as the unpacked data. Each byte can be interpreted like this:
|
||||
|
||||
```
|
||||
is_literal = byte & 1; // whether the byte was encoded as a literal (as opposed to a match)
|
||||
size_in_bits = 2.0 ** (((byte >> 1) - 64) / 8.0); // the size this byte takes up in the compressed data
|
||||
```
|
||||
|
||||
74
src/heatmap.rs
Normal file
74
src/heatmap.rs
Normal file
@@ -0,0 +1,74 @@
|
||||
pub struct Heatmap {
|
||||
data: Vec<u8>,
|
||||
cost: Vec<f32>,
|
||||
literal_index: Vec<usize>,
|
||||
}
|
||||
|
||||
impl Heatmap {
|
||||
pub fn new() -> Heatmap {
|
||||
Heatmap {
|
||||
data: Vec::new(),
|
||||
cost: Vec::new(),
|
||||
literal_index: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_literal(&mut self, byte: u8, cost: f32) {
|
||||
self.data.push(byte);
|
||||
self.cost.push(cost);
|
||||
self.literal_index.push(self.literal_index.len());
|
||||
}
|
||||
|
||||
pub fn add_match(&mut self, offset: usize, length: usize, mut cost: f32) {
|
||||
cost /= length as f32;
|
||||
for _ in 0..length {
|
||||
self.data.push(self.data[self.data.len() - offset]);
|
||||
self.literal_index
|
||||
.push(self.literal_index[self.literal_index.len() - offset]);
|
||||
self.cost.push(cost);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finish(&mut self) {
|
||||
let mut ref_count = vec![0usize; self.literal_index.len()];
|
||||
for &index in &self.literal_index {
|
||||
ref_count[index] += 1;
|
||||
}
|
||||
|
||||
let mut shifted = vec![];
|
||||
for (&index, &cost) in self.literal_index.iter().zip(self.cost.iter()) {
|
||||
let delta = (self.cost[index] - cost) / ref_count[index] as f32;
|
||||
shifted.push(delta);
|
||||
shifted[index] -= delta;
|
||||
}
|
||||
|
||||
for (cost, delta) in self.cost.iter_mut().zip(shifted.into_iter()) {
|
||||
*cost += delta;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reverse(&mut self) {
|
||||
self.data.reverse();
|
||||
self.cost.reverse();
|
||||
self.literal_index.reverse();
|
||||
for index in self.literal_index.iter_mut() {
|
||||
*index = self.data.len() - *index;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.cost.len()
|
||||
}
|
||||
|
||||
pub fn is_literal(&self, index: usize) -> bool {
|
||||
self.literal_index[index] == index
|
||||
}
|
||||
|
||||
pub fn cost(&self, index: usize) -> f32 {
|
||||
self.cost[index]
|
||||
}
|
||||
|
||||
pub fn byte(&self, index: usize) -> u8 {
|
||||
self.data[index]
|
||||
}
|
||||
}
|
||||
@@ -1,11 +1,13 @@
|
||||
mod context_state;
|
||||
mod greedy_packer;
|
||||
mod heatmap;
|
||||
mod lz;
|
||||
mod match_finder;
|
||||
mod parsing_packer;
|
||||
mod rans;
|
||||
|
||||
pub use lz::{calculate_margin, unpack, UnpackError};
|
||||
pub use heatmap::Heatmap;
|
||||
pub use lz::{calculate_margin, create_heatmap, unpack, UnpackError};
|
||||
|
||||
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
|
||||
|
||||
|
||||
31
src/lz.rs
31
src/lz.rs
@@ -1,4 +1,5 @@
|
||||
use crate::context_state::ContextState;
|
||||
use crate::heatmap::Heatmap;
|
||||
use crate::rans::{EntropyCoder, RansDecoder};
|
||||
use crate::Config;
|
||||
use thiserror::Error;
|
||||
@@ -153,21 +154,32 @@ pub fn unpack(
|
||||
max_size: usize,
|
||||
) -> Result<Vec<u8>, UnpackError> {
|
||||
let mut result = vec![];
|
||||
let _ = unpack_internal(Some(&mut result), packed_data, config, max_size)?;
|
||||
let _ = unpack_internal(Some(&mut result), None, packed_data, config, max_size)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn calculate_margin(packed_data: &[u8], config: &Config) -> Result<isize, UnpackError> {
|
||||
unpack_internal(None, packed_data, config, usize::MAX)
|
||||
unpack_internal(None, None, packed_data, config, usize::MAX)
|
||||
}
|
||||
|
||||
pub fn unpack_internal(
|
||||
pub fn create_heatmap(
|
||||
packed_data: &[u8],
|
||||
config: &Config,
|
||||
max_size: usize,
|
||||
) -> Result<Heatmap, UnpackError> {
|
||||
let mut heatmap = Heatmap::new();
|
||||
let _ = unpack_internal(None, Some(&mut heatmap), packed_data, config, max_size)?;
|
||||
Ok(heatmap)
|
||||
}
|
||||
|
||||
fn unpack_internal(
|
||||
mut result: Option<&mut Vec<u8>>,
|
||||
mut heatmap: Option<&mut Heatmap>,
|
||||
packed_data: &[u8],
|
||||
config: &Config,
|
||||
max_size: usize,
|
||||
) -> Result<isize, UnpackError> {
|
||||
let mut decoder = RansDecoder::new(packed_data, &config);
|
||||
let mut decoder = RansDecoder::new(packed_data, &config)?;
|
||||
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
|
||||
let mut offset = usize::MAX;
|
||||
let mut position = 0usize;
|
||||
@@ -198,6 +210,7 @@ pub fn unpack_internal(
|
||||
}
|
||||
|
||||
loop {
|
||||
let prev_decoder = decoder.clone();
|
||||
margin = margin.max(position as isize - decoder.pos() as isize);
|
||||
let literal_base = position % config.parity_contexts * 256;
|
||||
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))?
|
||||
@@ -231,6 +244,9 @@ pub fn unpack_internal(
|
||||
if offset > position {
|
||||
return Err(UnpackError::OffsetOutOfRange { offset, position });
|
||||
}
|
||||
if let Some(ref mut heatmap) = heatmap {
|
||||
heatmap.add_match(offset, length, decoder.cost(&prev_decoder));
|
||||
}
|
||||
if let Some(ref mut result) = result {
|
||||
for _ in 0..length {
|
||||
if result.len() < max_size {
|
||||
@@ -251,6 +267,9 @@ pub fn unpack_internal(
|
||||
context_index = (context_index << 1) | bit as usize;
|
||||
byte |= (bit as u8) << i;
|
||||
}
|
||||
if let Some(ref mut heatmap) = heatmap {
|
||||
heatmap.add_literal(byte, decoder.cost(&prev_decoder));
|
||||
}
|
||||
if let Some(ref mut result) = result {
|
||||
if result.len() < max_size {
|
||||
result.push(byte);
|
||||
@@ -261,6 +280,10 @@ pub fn unpack_internal(
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(heatmap) = heatmap {
|
||||
heatmap.finish();
|
||||
}
|
||||
|
||||
if position > max_size {
|
||||
return Err(UnpackError::OverSize {
|
||||
size: position,
|
||||
|
||||
60
src/main.rs
60
src/main.rs
@@ -9,6 +9,7 @@ fn main() -> Result<()> {
|
||||
let mut reverse = false;
|
||||
let mut unpack = false;
|
||||
let mut calculate_margin = false;
|
||||
let mut create_heatmap = false;
|
||||
let mut level = 2;
|
||||
let mut infile: Option<PathBuf> = None;
|
||||
let mut outfile: Option<PathBuf> = None;
|
||||
@@ -58,6 +59,7 @@ fn main() -> Result<()> {
|
||||
|
||||
Short('u') | Long("unpack") => unpack = true,
|
||||
Long("margin") => calculate_margin = true,
|
||||
Long("heatmap") => create_heatmap = true,
|
||||
Short('l') | Long("level") => level = parser.value()?.parse()?,
|
||||
Short(n) if n.is_ascii_digit() => level = n as u8 - b'0',
|
||||
Short('h') | Long("help") => print_help(0),
|
||||
@@ -73,15 +75,16 @@ fn main() -> Result<()> {
|
||||
}
|
||||
|
||||
let infile = infile.unwrap_or_else(|| print_help(1));
|
||||
let outfile = outfile.unwrap_or_else(|| {
|
||||
let mut name = infile.clone();
|
||||
if unpack {
|
||||
if name.extension().filter(|&e| e == "upk").is_some() {
|
||||
name.set_extension("");
|
||||
} else {
|
||||
name.set_extension("bin");
|
||||
enum OutFileType {
|
||||
Packed,
|
||||
Unpacked,
|
||||
Heatmap,
|
||||
}
|
||||
} else {
|
||||
let outfile = |tpe: OutFileType| {
|
||||
outfile.clone().unwrap_or_else(|| {
|
||||
let mut name = infile.clone();
|
||||
match tpe {
|
||||
OutFileType::Packed => {
|
||||
let mut filename = name
|
||||
.file_name()
|
||||
.unwrap_or_else(|| OsStr::new(""))
|
||||
@@ -89,17 +92,29 @@ fn main() -> Result<()> {
|
||||
filename.push(".upk");
|
||||
name.set_file_name(filename);
|
||||
}
|
||||
OutFileType::Unpacked => {
|
||||
if name.extension().filter(|&e| e == "upk").is_some() {
|
||||
name.set_extension("");
|
||||
} else {
|
||||
name.set_extension("bin");
|
||||
}
|
||||
}
|
||||
OutFileType::Heatmap => {
|
||||
name.set_extension("heatmap");
|
||||
}
|
||||
}
|
||||
name
|
||||
});
|
||||
})
|
||||
};
|
||||
|
||||
if config.parity_contexts != 1 && config.parity_contexts != 2 && config.parity_contexts != 4 {
|
||||
eprintln!("--parity has to be 1, 2, or 4");
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
if !unpack && !calculate_margin {
|
||||
if !unpack && !calculate_margin && !create_heatmap {
|
||||
let mut data = vec![];
|
||||
File::open(infile)?.read_to_end(&mut data)?;
|
||||
File::open(&infile)?.read_to_end(&mut data)?;
|
||||
if reverse {
|
||||
data.reverse();
|
||||
}
|
||||
@@ -126,10 +141,10 @@ fn main() -> Result<()> {
|
||||
packed_data.len(),
|
||||
packed_data.len() as f32 * 100. / data.len() as f32
|
||||
);
|
||||
File::create(outfile)?.write_all(&packed_data)?;
|
||||
File::create(outfile(OutFileType::Packed))?.write_all(&packed_data)?;
|
||||
} else {
|
||||
let mut data = vec![];
|
||||
File::open(infile)?.read_to_end(&mut data)?;
|
||||
File::open(&infile)?.read_to_end(&mut data)?;
|
||||
if reverse {
|
||||
data.reverse();
|
||||
}
|
||||
@@ -138,7 +153,22 @@ fn main() -> Result<()> {
|
||||
if reverse {
|
||||
unpacked_data.reverse();
|
||||
}
|
||||
File::create(outfile)?.write_all(&unpacked_data)?;
|
||||
File::create(outfile(OutFileType::Unpacked))?.write_all(&unpacked_data)?;
|
||||
}
|
||||
if create_heatmap {
|
||||
let mut heatmap = upkr::create_heatmap(&data, &config, max_unpacked_size)?;
|
||||
if reverse {
|
||||
heatmap.reverse();
|
||||
}
|
||||
let mut heatmap_bin = Vec::with_capacity(heatmap.len());
|
||||
for i in 0..heatmap.len() {
|
||||
let cost = (heatmap.cost(i).log2() * 8. + 64.)
|
||||
.round()
|
||||
.max(0.)
|
||||
.min(127.) as u8;
|
||||
heatmap_bin.push((cost << 1) | heatmap.is_literal(i) as u8);
|
||||
}
|
||||
File::create(outfile(OutFileType::Heatmap))?.write_all(&heatmap_bin)?;
|
||||
}
|
||||
if calculate_margin {
|
||||
println!("{}", upkr::calculate_margin(&data, &config)?);
|
||||
@@ -152,11 +182,13 @@ fn print_help(exit_code: i32) -> ! {
|
||||
eprintln!("Usage:");
|
||||
eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]");
|
||||
eprintln!(" upkr -u [config options] <infile> [<outfile>]");
|
||||
eprintln!(" upkr --heatmap [config options] <infile> [<outfile>]");
|
||||
eprintln!(" upkr --margin [config options] <infile>");
|
||||
eprintln!();
|
||||
eprintln!(" -l, --level N compression level 0-9");
|
||||
eprintln!(" -0, ..., -9 short form for setting compression level");
|
||||
eprintln!(" -u, --unpack unpack infile");
|
||||
eprintln!(" --heatmap calculate heatmap from compressed file");
|
||||
eprintln!(" --margin calculate margin for overlapped unpacking of a packed file");
|
||||
eprintln!();
|
||||
eprintln!("Version: {}", env!("CARGO_PKG_VERSION"));
|
||||
|
||||
24
src/rans.rs
24
src/rans.rs
@@ -148,6 +148,7 @@ impl EntropyCoder for CostCounter {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RansDecoder<'a> {
|
||||
data: &'a [u8],
|
||||
pos: usize,
|
||||
@@ -166,8 +167,8 @@ const PROB_MASK: u32 = ONE_PROB - 1;
|
||||
pub struct UnexpectedEOF;
|
||||
|
||||
impl<'a> RansDecoder<'a> {
|
||||
pub fn new(data: &'a [u8], config: &Config) -> RansDecoder<'a> {
|
||||
RansDecoder {
|
||||
pub fn new(data: &'a [u8], config: &Config) -> Result<RansDecoder<'a>, UnexpectedEOF> {
|
||||
let mut decoder = RansDecoder {
|
||||
data,
|
||||
pos: 0,
|
||||
state: 0,
|
||||
@@ -176,7 +177,9 @@ impl<'a> RansDecoder<'a> {
|
||||
bits_left: 0,
|
||||
invert_bit_encoding: config.invert_bit_encoding,
|
||||
bitstream_is_big_endian: config.bitstream_is_big_endian,
|
||||
}
|
||||
};
|
||||
decoder.refill()?;
|
||||
Ok(decoder)
|
||||
}
|
||||
|
||||
pub fn pos(&self) -> usize {
|
||||
@@ -189,8 +192,7 @@ impl<'a> RansDecoder<'a> {
|
||||
Ok(bit)
|
||||
}
|
||||
|
||||
pub fn decode_bit(&mut self, prob: u16) -> Result<bool, UnexpectedEOF> {
|
||||
let prob = prob as u32;
|
||||
fn refill(&mut self) -> Result<(), UnexpectedEOF> {
|
||||
if self.use_bitstream {
|
||||
while self.state < 32768 {
|
||||
if self.bits_left == 0 {
|
||||
@@ -219,6 +221,13 @@ impl<'a> RansDecoder<'a> {
|
||||
self.pos += 1;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn decode_bit(&mut self, prob: u16) -> Result<bool, UnexpectedEOF> {
|
||||
self.refill()?;
|
||||
|
||||
let prob = prob as u32;
|
||||
|
||||
let bit = (self.state & PROB_MASK) < prob;
|
||||
|
||||
@@ -231,4 +240,9 @@ impl<'a> RansDecoder<'a> {
|
||||
|
||||
Ok(bit ^ self.invert_bit_encoding)
|
||||
}
|
||||
|
||||
pub fn cost(&self, prev: &RansDecoder) -> f32 {
|
||||
f32::log2(prev.state as f32) - f32::log2(self.state as f32)
|
||||
+ (self.pos - prev.pos) as f32 * 8.
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user