implement heatmap calculation

This commit is contained in:
2022-10-23 23:06:09 +02:00
parent c4fce626da
commit cab51e06ff
6 changed files with 189 additions and 32 deletions

View File

@@ -27,11 +27,13 @@ The 16 bit dos unpacker also uses some variations. (`upkr --x86`)
``` ```
upkr [-l level(0-9)] [config options] <infile> [<outfile>] upkr [-l level(0-9)] [config options] <infile> [<outfile>]
upkr -u [config options] <infile> [<outfile>] upkr -u [config options] <infile> [<outfile>]
upkr --heatmap [config options] <infile> [<outfile>]
upkr --margin [config options] <infile> upkr --margin [config options] <infile>
-l, --level N compression level 0-9 -l, --level N compression level 0-9
-0, ..., -9 short form for setting compression level -0, ..., -9 short form for setting compression level
-u, --unpack unpack infile -u, --unpack unpack infile
--heatmap calculate heatmap from compressed file
--margin calculate margin for overlapped unpacking of a packed file --margin calculate margin for overlapped unpacking of a packed file
Config presets for specific unpackers: Config presets for specific unpackers:
@@ -56,3 +58,13 @@ Config options to tailor output to specific optimized unpackers:
--max-offset N --max-offset N
--max-length N --max-length N
``` ```
## Heatmap
By default, the `--heatmap` flag writes out the heatmap data as a binary file. The heatmap file is
the same size as the unpacked data. Each byte can be interpreted like this:
```
is_literal = byte & 1; // whether the byte was encoded as a literal (as opposed to a match)
size_in_bits = 2.0 ** (((byte >> 1) - 64) / 8.0); // the size this byte takes up in the compressed data
```

74
src/heatmap.rs Normal file
View File

@@ -0,0 +1,74 @@
pub struct Heatmap {
data: Vec<u8>,
cost: Vec<f32>,
literal_index: Vec<usize>,
}
impl Heatmap {
pub fn new() -> Heatmap {
Heatmap {
data: Vec::new(),
cost: Vec::new(),
literal_index: Vec::new(),
}
}
pub fn add_literal(&mut self, byte: u8, cost: f32) {
self.data.push(byte);
self.cost.push(cost);
self.literal_index.push(self.literal_index.len());
}
pub fn add_match(&mut self, offset: usize, length: usize, mut cost: f32) {
cost /= length as f32;
for _ in 0..length {
self.data.push(self.data[self.data.len() - offset]);
self.literal_index
.push(self.literal_index[self.literal_index.len() - offset]);
self.cost.push(cost);
}
}
pub fn finish(&mut self) {
let mut ref_count = vec![0usize; self.literal_index.len()];
for &index in &self.literal_index {
ref_count[index] += 1;
}
let mut shifted = vec![];
for (&index, &cost) in self.literal_index.iter().zip(self.cost.iter()) {
let delta = (self.cost[index] - cost) / ref_count[index] as f32;
shifted.push(delta);
shifted[index] -= delta;
}
for (cost, delta) in self.cost.iter_mut().zip(shifted.into_iter()) {
*cost += delta;
}
}
pub fn reverse(&mut self) {
self.data.reverse();
self.cost.reverse();
self.literal_index.reverse();
for index in self.literal_index.iter_mut() {
*index = self.data.len() - *index;
}
}
pub fn len(&self) -> usize {
self.cost.len()
}
pub fn is_literal(&self, index: usize) -> bool {
self.literal_index[index] == index
}
pub fn cost(&self, index: usize) -> f32 {
self.cost[index]
}
pub fn byte(&self, index: usize) -> u8 {
self.data[index]
}
}

View File

@@ -1,11 +1,13 @@
mod context_state; mod context_state;
mod greedy_packer; mod greedy_packer;
mod heatmap;
mod lz; mod lz;
mod match_finder; mod match_finder;
mod parsing_packer; mod parsing_packer;
mod rans; mod rans;
pub use lz::{calculate_margin, unpack, UnpackError}; pub use heatmap::Heatmap;
pub use lz::{calculate_margin, create_heatmap, unpack, UnpackError};
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize); pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);

View File

@@ -1,4 +1,5 @@
use crate::context_state::ContextState; use crate::context_state::ContextState;
use crate::heatmap::Heatmap;
use crate::rans::{EntropyCoder, RansDecoder}; use crate::rans::{EntropyCoder, RansDecoder};
use crate::Config; use crate::Config;
use thiserror::Error; use thiserror::Error;
@@ -153,21 +154,32 @@ pub fn unpack(
max_size: usize, max_size: usize,
) -> Result<Vec<u8>, UnpackError> { ) -> Result<Vec<u8>, UnpackError> {
let mut result = vec![]; let mut result = vec![];
let _ = unpack_internal(Some(&mut result), packed_data, config, max_size)?; let _ = unpack_internal(Some(&mut result), None, packed_data, config, max_size)?;
Ok(result) Ok(result)
} }
pub fn calculate_margin(packed_data: &[u8], config: &Config) -> Result<isize, UnpackError> { pub fn calculate_margin(packed_data: &[u8], config: &Config) -> Result<isize, UnpackError> {
unpack_internal(None, packed_data, config, usize::MAX) unpack_internal(None, None, packed_data, config, usize::MAX)
} }
pub fn unpack_internal( pub fn create_heatmap(
packed_data: &[u8],
config: &Config,
max_size: usize,
) -> Result<Heatmap, UnpackError> {
let mut heatmap = Heatmap::new();
let _ = unpack_internal(None, Some(&mut heatmap), packed_data, config, max_size)?;
Ok(heatmap)
}
fn unpack_internal(
mut result: Option<&mut Vec<u8>>, mut result: Option<&mut Vec<u8>>,
mut heatmap: Option<&mut Heatmap>,
packed_data: &[u8], packed_data: &[u8],
config: &Config, config: &Config,
max_size: usize, max_size: usize,
) -> Result<isize, UnpackError> { ) -> Result<isize, UnpackError> {
let mut decoder = RansDecoder::new(packed_data, &config); let mut decoder = RansDecoder::new(packed_data, &config)?;
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config); let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
let mut offset = usize::MAX; let mut offset = usize::MAX;
let mut position = 0usize; let mut position = 0usize;
@@ -198,6 +210,7 @@ pub fn unpack_internal(
} }
loop { loop {
let prev_decoder = decoder.clone();
margin = margin.max(position as isize - decoder.pos() as isize); margin = margin.max(position as isize - decoder.pos() as isize);
let literal_base = position % config.parity_contexts * 256; let literal_base = position % config.parity_contexts * 256;
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))? if decoder.decode_with_context(&mut contexts.context_mut(literal_base))?
@@ -231,6 +244,9 @@ pub fn unpack_internal(
if offset > position { if offset > position {
return Err(UnpackError::OffsetOutOfRange { offset, position }); return Err(UnpackError::OffsetOutOfRange { offset, position });
} }
if let Some(ref mut heatmap) = heatmap {
heatmap.add_match(offset, length, decoder.cost(&prev_decoder));
}
if let Some(ref mut result) = result { if let Some(ref mut result) = result {
for _ in 0..length { for _ in 0..length {
if result.len() < max_size { if result.len() < max_size {
@@ -251,6 +267,9 @@ pub fn unpack_internal(
context_index = (context_index << 1) | bit as usize; context_index = (context_index << 1) | bit as usize;
byte |= (bit as u8) << i; byte |= (bit as u8) << i;
} }
if let Some(ref mut heatmap) = heatmap {
heatmap.add_literal(byte, decoder.cost(&prev_decoder));
}
if let Some(ref mut result) = result { if let Some(ref mut result) = result {
if result.len() < max_size { if result.len() < max_size {
result.push(byte); result.push(byte);
@@ -261,6 +280,10 @@ pub fn unpack_internal(
} }
} }
if let Some(heatmap) = heatmap {
heatmap.finish();
}
if position > max_size { if position > max_size {
return Err(UnpackError::OverSize { return Err(UnpackError::OverSize {
size: position, size: position,

View File

@@ -9,6 +9,7 @@ fn main() -> Result<()> {
let mut reverse = false; let mut reverse = false;
let mut unpack = false; let mut unpack = false;
let mut calculate_margin = false; let mut calculate_margin = false;
let mut create_heatmap = false;
let mut level = 2; let mut level = 2;
let mut infile: Option<PathBuf> = None; let mut infile: Option<PathBuf> = None;
let mut outfile: Option<PathBuf> = None; let mut outfile: Option<PathBuf> = None;
@@ -58,6 +59,7 @@ fn main() -> Result<()> {
Short('u') | Long("unpack") => unpack = true, Short('u') | Long("unpack") => unpack = true,
Long("margin") => calculate_margin = true, Long("margin") => calculate_margin = true,
Long("heatmap") => create_heatmap = true,
Short('l') | Long("level") => level = parser.value()?.parse()?, Short('l') | Long("level") => level = parser.value()?.parse()?,
Short(n) if n.is_ascii_digit() => level = n as u8 - b'0', Short(n) if n.is_ascii_digit() => level = n as u8 - b'0',
Short('h') | Long("help") => print_help(0), Short('h') | Long("help") => print_help(0),
@@ -73,15 +75,16 @@ fn main() -> Result<()> {
} }
let infile = infile.unwrap_or_else(|| print_help(1)); let infile = infile.unwrap_or_else(|| print_help(1));
let outfile = outfile.unwrap_or_else(|| { enum OutFileType {
let mut name = infile.clone(); Packed,
if unpack { Unpacked,
if name.extension().filter(|&e| e == "upk").is_some() { Heatmap,
name.set_extension("");
} else {
name.set_extension("bin");
} }
} else { let outfile = |tpe: OutFileType| {
outfile.clone().unwrap_or_else(|| {
let mut name = infile.clone();
match tpe {
OutFileType::Packed => {
let mut filename = name let mut filename = name
.file_name() .file_name()
.unwrap_or_else(|| OsStr::new("")) .unwrap_or_else(|| OsStr::new(""))
@@ -89,17 +92,29 @@ fn main() -> Result<()> {
filename.push(".upk"); filename.push(".upk");
name.set_file_name(filename); name.set_file_name(filename);
} }
OutFileType::Unpacked => {
if name.extension().filter(|&e| e == "upk").is_some() {
name.set_extension("");
} else {
name.set_extension("bin");
}
}
OutFileType::Heatmap => {
name.set_extension("heatmap");
}
}
name name
}); })
};
if config.parity_contexts != 1 && config.parity_contexts != 2 && config.parity_contexts != 4 { if config.parity_contexts != 1 && config.parity_contexts != 2 && config.parity_contexts != 4 {
eprintln!("--parity has to be 1, 2, or 4"); eprintln!("--parity has to be 1, 2, or 4");
process::exit(1); process::exit(1);
} }
if !unpack && !calculate_margin { if !unpack && !calculate_margin && !create_heatmap {
let mut data = vec![]; let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?; File::open(&infile)?.read_to_end(&mut data)?;
if reverse { if reverse {
data.reverse(); data.reverse();
} }
@@ -126,10 +141,10 @@ fn main() -> Result<()> {
packed_data.len(), packed_data.len(),
packed_data.len() as f32 * 100. / data.len() as f32 packed_data.len() as f32 * 100. / data.len() as f32
); );
File::create(outfile)?.write_all(&packed_data)?; File::create(outfile(OutFileType::Packed))?.write_all(&packed_data)?;
} else { } else {
let mut data = vec![]; let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?; File::open(&infile)?.read_to_end(&mut data)?;
if reverse { if reverse {
data.reverse(); data.reverse();
} }
@@ -138,7 +153,22 @@ fn main() -> Result<()> {
if reverse { if reverse {
unpacked_data.reverse(); unpacked_data.reverse();
} }
File::create(outfile)?.write_all(&unpacked_data)?; File::create(outfile(OutFileType::Unpacked))?.write_all(&unpacked_data)?;
}
if create_heatmap {
let mut heatmap = upkr::create_heatmap(&data, &config, max_unpacked_size)?;
if reverse {
heatmap.reverse();
}
let mut heatmap_bin = Vec::with_capacity(heatmap.len());
for i in 0..heatmap.len() {
let cost = (heatmap.cost(i).log2() * 8. + 64.)
.round()
.max(0.)
.min(127.) as u8;
heatmap_bin.push((cost << 1) | heatmap.is_literal(i) as u8);
}
File::create(outfile(OutFileType::Heatmap))?.write_all(&heatmap_bin)?;
} }
if calculate_margin { if calculate_margin {
println!("{}", upkr::calculate_margin(&data, &config)?); println!("{}", upkr::calculate_margin(&data, &config)?);
@@ -152,11 +182,13 @@ fn print_help(exit_code: i32) -> ! {
eprintln!("Usage:"); eprintln!("Usage:");
eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]"); eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]");
eprintln!(" upkr -u [config options] <infile> [<outfile>]"); eprintln!(" upkr -u [config options] <infile> [<outfile>]");
eprintln!(" upkr --heatmap [config options] <infile> [<outfile>]");
eprintln!(" upkr --margin [config options] <infile>"); eprintln!(" upkr --margin [config options] <infile>");
eprintln!(); eprintln!();
eprintln!(" -l, --level N compression level 0-9"); eprintln!(" -l, --level N compression level 0-9");
eprintln!(" -0, ..., -9 short form for setting compression level"); eprintln!(" -0, ..., -9 short form for setting compression level");
eprintln!(" -u, --unpack unpack infile"); eprintln!(" -u, --unpack unpack infile");
eprintln!(" --heatmap calculate heatmap from compressed file");
eprintln!(" --margin calculate margin for overlapped unpacking of a packed file"); eprintln!(" --margin calculate margin for overlapped unpacking of a packed file");
eprintln!(); eprintln!();
eprintln!("Version: {}", env!("CARGO_PKG_VERSION")); eprintln!("Version: {}", env!("CARGO_PKG_VERSION"));

View File

@@ -148,6 +148,7 @@ impl EntropyCoder for CostCounter {
} }
} }
#[derive(Clone)]
pub struct RansDecoder<'a> { pub struct RansDecoder<'a> {
data: &'a [u8], data: &'a [u8],
pos: usize, pos: usize,
@@ -166,8 +167,8 @@ const PROB_MASK: u32 = ONE_PROB - 1;
pub struct UnexpectedEOF; pub struct UnexpectedEOF;
impl<'a> RansDecoder<'a> { impl<'a> RansDecoder<'a> {
pub fn new(data: &'a [u8], config: &Config) -> RansDecoder<'a> { pub fn new(data: &'a [u8], config: &Config) -> Result<RansDecoder<'a>, UnexpectedEOF> {
RansDecoder { let mut decoder = RansDecoder {
data, data,
pos: 0, pos: 0,
state: 0, state: 0,
@@ -176,7 +177,9 @@ impl<'a> RansDecoder<'a> {
bits_left: 0, bits_left: 0,
invert_bit_encoding: config.invert_bit_encoding, invert_bit_encoding: config.invert_bit_encoding,
bitstream_is_big_endian: config.bitstream_is_big_endian, bitstream_is_big_endian: config.bitstream_is_big_endian,
} };
decoder.refill()?;
Ok(decoder)
} }
pub fn pos(&self) -> usize { pub fn pos(&self) -> usize {
@@ -189,8 +192,7 @@ impl<'a> RansDecoder<'a> {
Ok(bit) Ok(bit)
} }
pub fn decode_bit(&mut self, prob: u16) -> Result<bool, UnexpectedEOF> { fn refill(&mut self) -> Result<(), UnexpectedEOF> {
let prob = prob as u32;
if self.use_bitstream { if self.use_bitstream {
while self.state < 32768 { while self.state < 32768 {
if self.bits_left == 0 { if self.bits_left == 0 {
@@ -219,6 +221,13 @@ impl<'a> RansDecoder<'a> {
self.pos += 1; self.pos += 1;
} }
} }
Ok(())
}
pub fn decode_bit(&mut self, prob: u16) -> Result<bool, UnexpectedEOF> {
self.refill()?;
let prob = prob as u32;
let bit = (self.state & PROB_MASK) < prob; let bit = (self.state & PROB_MASK) < prob;
@@ -231,4 +240,9 @@ impl<'a> RansDecoder<'a> {
Ok(bit ^ self.invert_bit_encoding) Ok(bit ^ self.invert_bit_encoding)
} }
pub fn cost(&self, prev: &RansDecoder) -> f32 {
f32::log2(prev.state as f32) - f32::log2(self.state as f32)
+ (self.pos - prev.pos) as f32 * 8.
}
} }