implement heatmap calculation

This commit is contained in:
2022-10-23 23:06:09 +02:00
parent c4fce626da
commit cab51e06ff
6 changed files with 189 additions and 32 deletions

View File

@@ -27,11 +27,13 @@ The 16 bit dos unpacker also uses some variations. (`upkr --x86`)
```
upkr [-l level(0-9)] [config options] <infile> [<outfile>]
upkr -u [config options] <infile> [<outfile>]
upkr --heatmap [config options] <infile> [<outfile>]
upkr --margin [config options] <infile>
-l, --level N compression level 0-9
-0, ..., -9 short form for setting compression level
-u, --unpack unpack infile
--heatmap calculate heatmap from compressed file
--margin calculate margin for overlapped unpacking of a packed file
Config presets for specific unpackers:
@@ -56,3 +58,13 @@ Config options to tailor output to specific optimized unpackers:
--max-offset N
--max-length N
```
## Heatmap
By default, the `--heatmap` flag writes out the heatmap data as a binary file. The heatmap file is
the same size as the unpacked data. Each byte can be interpreted like this:
```
is_literal = byte & 1; // whether the byte was encoded as a literal (as opposed to a match)
size_in_bits = 2.0 ** (((byte >> 1) - 64) / 8.0); // the size this byte takes up in the compressed data
```

74
src/heatmap.rs Normal file
View File

@@ -0,0 +1,74 @@
pub struct Heatmap {
data: Vec<u8>,
cost: Vec<f32>,
literal_index: Vec<usize>,
}
impl Heatmap {
pub fn new() -> Heatmap {
Heatmap {
data: Vec::new(),
cost: Vec::new(),
literal_index: Vec::new(),
}
}
pub fn add_literal(&mut self, byte: u8, cost: f32) {
self.data.push(byte);
self.cost.push(cost);
self.literal_index.push(self.literal_index.len());
}
pub fn add_match(&mut self, offset: usize, length: usize, mut cost: f32) {
cost /= length as f32;
for _ in 0..length {
self.data.push(self.data[self.data.len() - offset]);
self.literal_index
.push(self.literal_index[self.literal_index.len() - offset]);
self.cost.push(cost);
}
}
pub fn finish(&mut self) {
let mut ref_count = vec![0usize; self.literal_index.len()];
for &index in &self.literal_index {
ref_count[index] += 1;
}
let mut shifted = vec![];
for (&index, &cost) in self.literal_index.iter().zip(self.cost.iter()) {
let delta = (self.cost[index] - cost) / ref_count[index] as f32;
shifted.push(delta);
shifted[index] -= delta;
}
for (cost, delta) in self.cost.iter_mut().zip(shifted.into_iter()) {
*cost += delta;
}
}
pub fn reverse(&mut self) {
self.data.reverse();
self.cost.reverse();
self.literal_index.reverse();
for index in self.literal_index.iter_mut() {
*index = self.data.len() - *index;
}
}
pub fn len(&self) -> usize {
self.cost.len()
}
pub fn is_literal(&self, index: usize) -> bool {
self.literal_index[index] == index
}
pub fn cost(&self, index: usize) -> f32 {
self.cost[index]
}
pub fn byte(&self, index: usize) -> u8 {
self.data[index]
}
}

View File

@@ -1,11 +1,13 @@
mod context_state;
mod greedy_packer;
mod heatmap;
mod lz;
mod match_finder;
mod parsing_packer;
mod rans;
pub use lz::{calculate_margin, unpack, UnpackError};
pub use heatmap::Heatmap;
pub use lz::{calculate_margin, create_heatmap, unpack, UnpackError};
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);

View File

@@ -1,4 +1,5 @@
use crate::context_state::ContextState;
use crate::heatmap::Heatmap;
use crate::rans::{EntropyCoder, RansDecoder};
use crate::Config;
use thiserror::Error;
@@ -153,21 +154,32 @@ pub fn unpack(
max_size: usize,
) -> Result<Vec<u8>, UnpackError> {
let mut result = vec![];
let _ = unpack_internal(Some(&mut result), packed_data, config, max_size)?;
let _ = unpack_internal(Some(&mut result), None, packed_data, config, max_size)?;
Ok(result)
}
pub fn calculate_margin(packed_data: &[u8], config: &Config) -> Result<isize, UnpackError> {
unpack_internal(None, packed_data, config, usize::MAX)
unpack_internal(None, None, packed_data, config, usize::MAX)
}
pub fn unpack_internal(
pub fn create_heatmap(
packed_data: &[u8],
config: &Config,
max_size: usize,
) -> Result<Heatmap, UnpackError> {
let mut heatmap = Heatmap::new();
let _ = unpack_internal(None, Some(&mut heatmap), packed_data, config, max_size)?;
Ok(heatmap)
}
fn unpack_internal(
mut result: Option<&mut Vec<u8>>,
mut heatmap: Option<&mut Heatmap>,
packed_data: &[u8],
config: &Config,
max_size: usize,
) -> Result<isize, UnpackError> {
let mut decoder = RansDecoder::new(packed_data, &config);
let mut decoder = RansDecoder::new(packed_data, &config)?;
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
let mut offset = usize::MAX;
let mut position = 0usize;
@@ -198,6 +210,7 @@ pub fn unpack_internal(
}
loop {
let prev_decoder = decoder.clone();
margin = margin.max(position as isize - decoder.pos() as isize);
let literal_base = position % config.parity_contexts * 256;
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))?
@@ -231,6 +244,9 @@ pub fn unpack_internal(
if offset > position {
return Err(UnpackError::OffsetOutOfRange { offset, position });
}
if let Some(ref mut heatmap) = heatmap {
heatmap.add_match(offset, length, decoder.cost(&prev_decoder));
}
if let Some(ref mut result) = result {
for _ in 0..length {
if result.len() < max_size {
@@ -251,6 +267,9 @@ pub fn unpack_internal(
context_index = (context_index << 1) | bit as usize;
byte |= (bit as u8) << i;
}
if let Some(ref mut heatmap) = heatmap {
heatmap.add_literal(byte, decoder.cost(&prev_decoder));
}
if let Some(ref mut result) = result {
if result.len() < max_size {
result.push(byte);
@@ -261,6 +280,10 @@ pub fn unpack_internal(
}
}
if let Some(heatmap) = heatmap {
heatmap.finish();
}
if position > max_size {
return Err(UnpackError::OverSize {
size: position,

View File

@@ -9,6 +9,7 @@ fn main() -> Result<()> {
let mut reverse = false;
let mut unpack = false;
let mut calculate_margin = false;
let mut create_heatmap = false;
let mut level = 2;
let mut infile: Option<PathBuf> = None;
let mut outfile: Option<PathBuf> = None;
@@ -58,6 +59,7 @@ fn main() -> Result<()> {
Short('u') | Long("unpack") => unpack = true,
Long("margin") => calculate_margin = true,
Long("heatmap") => create_heatmap = true,
Short('l') | Long("level") => level = parser.value()?.parse()?,
Short(n) if n.is_ascii_digit() => level = n as u8 - b'0',
Short('h') | Long("help") => print_help(0),
@@ -73,15 +75,16 @@ fn main() -> Result<()> {
}
let infile = infile.unwrap_or_else(|| print_help(1));
let outfile = outfile.unwrap_or_else(|| {
let mut name = infile.clone();
if unpack {
if name.extension().filter(|&e| e == "upk").is_some() {
name.set_extension("");
} else {
name.set_extension("bin");
enum OutFileType {
Packed,
Unpacked,
Heatmap,
}
} else {
let outfile = |tpe: OutFileType| {
outfile.clone().unwrap_or_else(|| {
let mut name = infile.clone();
match tpe {
OutFileType::Packed => {
let mut filename = name
.file_name()
.unwrap_or_else(|| OsStr::new(""))
@@ -89,17 +92,29 @@ fn main() -> Result<()> {
filename.push(".upk");
name.set_file_name(filename);
}
OutFileType::Unpacked => {
if name.extension().filter(|&e| e == "upk").is_some() {
name.set_extension("");
} else {
name.set_extension("bin");
}
}
OutFileType::Heatmap => {
name.set_extension("heatmap");
}
}
name
});
})
};
if config.parity_contexts != 1 && config.parity_contexts != 2 && config.parity_contexts != 4 {
eprintln!("--parity has to be 1, 2, or 4");
process::exit(1);
}
if !unpack && !calculate_margin {
if !unpack && !calculate_margin && !create_heatmap {
let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?;
File::open(&infile)?.read_to_end(&mut data)?;
if reverse {
data.reverse();
}
@@ -126,10 +141,10 @@ fn main() -> Result<()> {
packed_data.len(),
packed_data.len() as f32 * 100. / data.len() as f32
);
File::create(outfile)?.write_all(&packed_data)?;
File::create(outfile(OutFileType::Packed))?.write_all(&packed_data)?;
} else {
let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?;
File::open(&infile)?.read_to_end(&mut data)?;
if reverse {
data.reverse();
}
@@ -138,7 +153,22 @@ fn main() -> Result<()> {
if reverse {
unpacked_data.reverse();
}
File::create(outfile)?.write_all(&unpacked_data)?;
File::create(outfile(OutFileType::Unpacked))?.write_all(&unpacked_data)?;
}
if create_heatmap {
let mut heatmap = upkr::create_heatmap(&data, &config, max_unpacked_size)?;
if reverse {
heatmap.reverse();
}
let mut heatmap_bin = Vec::with_capacity(heatmap.len());
for i in 0..heatmap.len() {
let cost = (heatmap.cost(i).log2() * 8. + 64.)
.round()
.max(0.)
.min(127.) as u8;
heatmap_bin.push((cost << 1) | heatmap.is_literal(i) as u8);
}
File::create(outfile(OutFileType::Heatmap))?.write_all(&heatmap_bin)?;
}
if calculate_margin {
println!("{}", upkr::calculate_margin(&data, &config)?);
@@ -152,11 +182,13 @@ fn print_help(exit_code: i32) -> ! {
eprintln!("Usage:");
eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]");
eprintln!(" upkr -u [config options] <infile> [<outfile>]");
eprintln!(" upkr --heatmap [config options] <infile> [<outfile>]");
eprintln!(" upkr --margin [config options] <infile>");
eprintln!();
eprintln!(" -l, --level N compression level 0-9");
eprintln!(" -0, ..., -9 short form for setting compression level");
eprintln!(" -u, --unpack unpack infile");
eprintln!(" --heatmap calculate heatmap from compressed file");
eprintln!(" --margin calculate margin for overlapped unpacking of a packed file");
eprintln!();
eprintln!("Version: {}", env!("CARGO_PKG_VERSION"));

View File

@@ -148,6 +148,7 @@ impl EntropyCoder for CostCounter {
}
}
#[derive(Clone)]
pub struct RansDecoder<'a> {
data: &'a [u8],
pos: usize,
@@ -166,8 +167,8 @@ const PROB_MASK: u32 = ONE_PROB - 1;
pub struct UnexpectedEOF;
impl<'a> RansDecoder<'a> {
pub fn new(data: &'a [u8], config: &Config) -> RansDecoder<'a> {
RansDecoder {
pub fn new(data: &'a [u8], config: &Config) -> Result<RansDecoder<'a>, UnexpectedEOF> {
let mut decoder = RansDecoder {
data,
pos: 0,
state: 0,
@@ -176,7 +177,9 @@ impl<'a> RansDecoder<'a> {
bits_left: 0,
invert_bit_encoding: config.invert_bit_encoding,
bitstream_is_big_endian: config.bitstream_is_big_endian,
}
};
decoder.refill()?;
Ok(decoder)
}
pub fn pos(&self) -> usize {
@@ -189,8 +192,7 @@ impl<'a> RansDecoder<'a> {
Ok(bit)
}
pub fn decode_bit(&mut self, prob: u16) -> Result<bool, UnexpectedEOF> {
let prob = prob as u32;
fn refill(&mut self) -> Result<(), UnexpectedEOF> {
if self.use_bitstream {
while self.state < 32768 {
if self.bits_left == 0 {
@@ -219,6 +221,13 @@ impl<'a> RansDecoder<'a> {
self.pos += 1;
}
}
Ok(())
}
pub fn decode_bit(&mut self, prob: u16) -> Result<bool, UnexpectedEOF> {
self.refill()?;
let prob = prob as u32;
let bit = (self.state & PROB_MASK) < prob;
@@ -231,4 +240,9 @@ impl<'a> RansDecoder<'a> {
Ok(bit ^ self.invert_bit_encoding)
}
pub fn cost(&self, prev: &RansDecoder) -> f32 {
f32::log2(prev.state as f32) - f32::log2(self.state as f32)
+ (self.pos - prev.pos) as f32 * 8.
}
}