mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 11:36:42 +01:00
implement heatmap calculation
This commit is contained in:
12
README.md
12
README.md
@@ -27,11 +27,13 @@ The 16 bit dos unpacker also uses some variations. (`upkr --x86`)
|
|||||||
```
|
```
|
||||||
upkr [-l level(0-9)] [config options] <infile> [<outfile>]
|
upkr [-l level(0-9)] [config options] <infile> [<outfile>]
|
||||||
upkr -u [config options] <infile> [<outfile>]
|
upkr -u [config options] <infile> [<outfile>]
|
||||||
|
upkr --heatmap [config options] <infile> [<outfile>]
|
||||||
upkr --margin [config options] <infile>
|
upkr --margin [config options] <infile>
|
||||||
|
|
||||||
-l, --level N compression level 0-9
|
-l, --level N compression level 0-9
|
||||||
-0, ..., -9 short form for setting compression level
|
-0, ..., -9 short form for setting compression level
|
||||||
-u, --unpack unpack infile
|
-u, --unpack unpack infile
|
||||||
|
--heatmap calculate heatmap from compressed file
|
||||||
--margin calculate margin for overlapped unpacking of a packed file
|
--margin calculate margin for overlapped unpacking of a packed file
|
||||||
|
|
||||||
Config presets for specific unpackers:
|
Config presets for specific unpackers:
|
||||||
@@ -56,3 +58,13 @@ Config options to tailor output to specific optimized unpackers:
|
|||||||
--max-offset N
|
--max-offset N
|
||||||
--max-length N
|
--max-length N
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Heatmap
|
||||||
|
|
||||||
|
By default, the `--heatmap` flag writes out the heatmap data as a binary file. The heatmap file is
|
||||||
|
the same size as the unpacked data. Each byte can be interpreted like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
is_literal = byte & 1; // whether the byte was encoded as a literal (as opposed to a match)
|
||||||
|
size_in_bits = 2.0 ** (((byte >> 1) - 64) / 8.0); // the size this byte takes up in the compressed data
|
||||||
|
```
|
||||||
|
|||||||
74
src/heatmap.rs
Normal file
74
src/heatmap.rs
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
pub struct Heatmap {
|
||||||
|
data: Vec<u8>,
|
||||||
|
cost: Vec<f32>,
|
||||||
|
literal_index: Vec<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Heatmap {
|
||||||
|
pub fn new() -> Heatmap {
|
||||||
|
Heatmap {
|
||||||
|
data: Vec::new(),
|
||||||
|
cost: Vec::new(),
|
||||||
|
literal_index: Vec::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_literal(&mut self, byte: u8, cost: f32) {
|
||||||
|
self.data.push(byte);
|
||||||
|
self.cost.push(cost);
|
||||||
|
self.literal_index.push(self.literal_index.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add_match(&mut self, offset: usize, length: usize, mut cost: f32) {
|
||||||
|
cost /= length as f32;
|
||||||
|
for _ in 0..length {
|
||||||
|
self.data.push(self.data[self.data.len() - offset]);
|
||||||
|
self.literal_index
|
||||||
|
.push(self.literal_index[self.literal_index.len() - offset]);
|
||||||
|
self.cost.push(cost);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn finish(&mut self) {
|
||||||
|
let mut ref_count = vec![0usize; self.literal_index.len()];
|
||||||
|
for &index in &self.literal_index {
|
||||||
|
ref_count[index] += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut shifted = vec![];
|
||||||
|
for (&index, &cost) in self.literal_index.iter().zip(self.cost.iter()) {
|
||||||
|
let delta = (self.cost[index] - cost) / ref_count[index] as f32;
|
||||||
|
shifted.push(delta);
|
||||||
|
shifted[index] -= delta;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (cost, delta) in self.cost.iter_mut().zip(shifted.into_iter()) {
|
||||||
|
*cost += delta;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reverse(&mut self) {
|
||||||
|
self.data.reverse();
|
||||||
|
self.cost.reverse();
|
||||||
|
self.literal_index.reverse();
|
||||||
|
for index in self.literal_index.iter_mut() {
|
||||||
|
*index = self.data.len() - *index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.cost.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_literal(&self, index: usize) -> bool {
|
||||||
|
self.literal_index[index] == index
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn cost(&self, index: usize) -> f32 {
|
||||||
|
self.cost[index]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn byte(&self, index: usize) -> u8 {
|
||||||
|
self.data[index]
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,11 +1,13 @@
|
|||||||
mod context_state;
|
mod context_state;
|
||||||
mod greedy_packer;
|
mod greedy_packer;
|
||||||
|
mod heatmap;
|
||||||
mod lz;
|
mod lz;
|
||||||
mod match_finder;
|
mod match_finder;
|
||||||
mod parsing_packer;
|
mod parsing_packer;
|
||||||
mod rans;
|
mod rans;
|
||||||
|
|
||||||
pub use lz::{calculate_margin, unpack, UnpackError};
|
pub use heatmap::Heatmap;
|
||||||
|
pub use lz::{calculate_margin, create_heatmap, unpack, UnpackError};
|
||||||
|
|
||||||
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
|
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
|
||||||
|
|
||||||
|
|||||||
31
src/lz.rs
31
src/lz.rs
@@ -1,4 +1,5 @@
|
|||||||
use crate::context_state::ContextState;
|
use crate::context_state::ContextState;
|
||||||
|
use crate::heatmap::Heatmap;
|
||||||
use crate::rans::{EntropyCoder, RansDecoder};
|
use crate::rans::{EntropyCoder, RansDecoder};
|
||||||
use crate::Config;
|
use crate::Config;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
@@ -153,21 +154,32 @@ pub fn unpack(
|
|||||||
max_size: usize,
|
max_size: usize,
|
||||||
) -> Result<Vec<u8>, UnpackError> {
|
) -> Result<Vec<u8>, UnpackError> {
|
||||||
let mut result = vec![];
|
let mut result = vec![];
|
||||||
let _ = unpack_internal(Some(&mut result), packed_data, config, max_size)?;
|
let _ = unpack_internal(Some(&mut result), None, packed_data, config, max_size)?;
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn calculate_margin(packed_data: &[u8], config: &Config) -> Result<isize, UnpackError> {
|
pub fn calculate_margin(packed_data: &[u8], config: &Config) -> Result<isize, UnpackError> {
|
||||||
unpack_internal(None, packed_data, config, usize::MAX)
|
unpack_internal(None, None, packed_data, config, usize::MAX)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn unpack_internal(
|
pub fn create_heatmap(
|
||||||
|
packed_data: &[u8],
|
||||||
|
config: &Config,
|
||||||
|
max_size: usize,
|
||||||
|
) -> Result<Heatmap, UnpackError> {
|
||||||
|
let mut heatmap = Heatmap::new();
|
||||||
|
let _ = unpack_internal(None, Some(&mut heatmap), packed_data, config, max_size)?;
|
||||||
|
Ok(heatmap)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unpack_internal(
|
||||||
mut result: Option<&mut Vec<u8>>,
|
mut result: Option<&mut Vec<u8>>,
|
||||||
|
mut heatmap: Option<&mut Heatmap>,
|
||||||
packed_data: &[u8],
|
packed_data: &[u8],
|
||||||
config: &Config,
|
config: &Config,
|
||||||
max_size: usize,
|
max_size: usize,
|
||||||
) -> Result<isize, UnpackError> {
|
) -> Result<isize, UnpackError> {
|
||||||
let mut decoder = RansDecoder::new(packed_data, &config);
|
let mut decoder = RansDecoder::new(packed_data, &config)?;
|
||||||
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
|
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
|
||||||
let mut offset = usize::MAX;
|
let mut offset = usize::MAX;
|
||||||
let mut position = 0usize;
|
let mut position = 0usize;
|
||||||
@@ -198,6 +210,7 @@ pub fn unpack_internal(
|
|||||||
}
|
}
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
|
let prev_decoder = decoder.clone();
|
||||||
margin = margin.max(position as isize - decoder.pos() as isize);
|
margin = margin.max(position as isize - decoder.pos() as isize);
|
||||||
let literal_base = position % config.parity_contexts * 256;
|
let literal_base = position % config.parity_contexts * 256;
|
||||||
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))?
|
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))?
|
||||||
@@ -231,6 +244,9 @@ pub fn unpack_internal(
|
|||||||
if offset > position {
|
if offset > position {
|
||||||
return Err(UnpackError::OffsetOutOfRange { offset, position });
|
return Err(UnpackError::OffsetOutOfRange { offset, position });
|
||||||
}
|
}
|
||||||
|
if let Some(ref mut heatmap) = heatmap {
|
||||||
|
heatmap.add_match(offset, length, decoder.cost(&prev_decoder));
|
||||||
|
}
|
||||||
if let Some(ref mut result) = result {
|
if let Some(ref mut result) = result {
|
||||||
for _ in 0..length {
|
for _ in 0..length {
|
||||||
if result.len() < max_size {
|
if result.len() < max_size {
|
||||||
@@ -251,6 +267,9 @@ pub fn unpack_internal(
|
|||||||
context_index = (context_index << 1) | bit as usize;
|
context_index = (context_index << 1) | bit as usize;
|
||||||
byte |= (bit as u8) << i;
|
byte |= (bit as u8) << i;
|
||||||
}
|
}
|
||||||
|
if let Some(ref mut heatmap) = heatmap {
|
||||||
|
heatmap.add_literal(byte, decoder.cost(&prev_decoder));
|
||||||
|
}
|
||||||
if let Some(ref mut result) = result {
|
if let Some(ref mut result) = result {
|
||||||
if result.len() < max_size {
|
if result.len() < max_size {
|
||||||
result.push(byte);
|
result.push(byte);
|
||||||
@@ -261,6 +280,10 @@ pub fn unpack_internal(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(heatmap) = heatmap {
|
||||||
|
heatmap.finish();
|
||||||
|
}
|
||||||
|
|
||||||
if position > max_size {
|
if position > max_size {
|
||||||
return Err(UnpackError::OverSize {
|
return Err(UnpackError::OverSize {
|
||||||
size: position,
|
size: position,
|
||||||
|
|||||||
60
src/main.rs
60
src/main.rs
@@ -9,6 +9,7 @@ fn main() -> Result<()> {
|
|||||||
let mut reverse = false;
|
let mut reverse = false;
|
||||||
let mut unpack = false;
|
let mut unpack = false;
|
||||||
let mut calculate_margin = false;
|
let mut calculate_margin = false;
|
||||||
|
let mut create_heatmap = false;
|
||||||
let mut level = 2;
|
let mut level = 2;
|
||||||
let mut infile: Option<PathBuf> = None;
|
let mut infile: Option<PathBuf> = None;
|
||||||
let mut outfile: Option<PathBuf> = None;
|
let mut outfile: Option<PathBuf> = None;
|
||||||
@@ -58,6 +59,7 @@ fn main() -> Result<()> {
|
|||||||
|
|
||||||
Short('u') | Long("unpack") => unpack = true,
|
Short('u') | Long("unpack") => unpack = true,
|
||||||
Long("margin") => calculate_margin = true,
|
Long("margin") => calculate_margin = true,
|
||||||
|
Long("heatmap") => create_heatmap = true,
|
||||||
Short('l') | Long("level") => level = parser.value()?.parse()?,
|
Short('l') | Long("level") => level = parser.value()?.parse()?,
|
||||||
Short(n) if n.is_ascii_digit() => level = n as u8 - b'0',
|
Short(n) if n.is_ascii_digit() => level = n as u8 - b'0',
|
||||||
Short('h') | Long("help") => print_help(0),
|
Short('h') | Long("help") => print_help(0),
|
||||||
@@ -73,15 +75,16 @@ fn main() -> Result<()> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let infile = infile.unwrap_or_else(|| print_help(1));
|
let infile = infile.unwrap_or_else(|| print_help(1));
|
||||||
let outfile = outfile.unwrap_or_else(|| {
|
enum OutFileType {
|
||||||
let mut name = infile.clone();
|
Packed,
|
||||||
if unpack {
|
Unpacked,
|
||||||
if name.extension().filter(|&e| e == "upk").is_some() {
|
Heatmap,
|
||||||
name.set_extension("");
|
|
||||||
} else {
|
|
||||||
name.set_extension("bin");
|
|
||||||
}
|
}
|
||||||
} else {
|
let outfile = |tpe: OutFileType| {
|
||||||
|
outfile.clone().unwrap_or_else(|| {
|
||||||
|
let mut name = infile.clone();
|
||||||
|
match tpe {
|
||||||
|
OutFileType::Packed => {
|
||||||
let mut filename = name
|
let mut filename = name
|
||||||
.file_name()
|
.file_name()
|
||||||
.unwrap_or_else(|| OsStr::new(""))
|
.unwrap_or_else(|| OsStr::new(""))
|
||||||
@@ -89,17 +92,29 @@ fn main() -> Result<()> {
|
|||||||
filename.push(".upk");
|
filename.push(".upk");
|
||||||
name.set_file_name(filename);
|
name.set_file_name(filename);
|
||||||
}
|
}
|
||||||
|
OutFileType::Unpacked => {
|
||||||
|
if name.extension().filter(|&e| e == "upk").is_some() {
|
||||||
|
name.set_extension("");
|
||||||
|
} else {
|
||||||
|
name.set_extension("bin");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
OutFileType::Heatmap => {
|
||||||
|
name.set_extension("heatmap");
|
||||||
|
}
|
||||||
|
}
|
||||||
name
|
name
|
||||||
});
|
})
|
||||||
|
};
|
||||||
|
|
||||||
if config.parity_contexts != 1 && config.parity_contexts != 2 && config.parity_contexts != 4 {
|
if config.parity_contexts != 1 && config.parity_contexts != 2 && config.parity_contexts != 4 {
|
||||||
eprintln!("--parity has to be 1, 2, or 4");
|
eprintln!("--parity has to be 1, 2, or 4");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if !unpack && !calculate_margin {
|
if !unpack && !calculate_margin && !create_heatmap {
|
||||||
let mut data = vec![];
|
let mut data = vec![];
|
||||||
File::open(infile)?.read_to_end(&mut data)?;
|
File::open(&infile)?.read_to_end(&mut data)?;
|
||||||
if reverse {
|
if reverse {
|
||||||
data.reverse();
|
data.reverse();
|
||||||
}
|
}
|
||||||
@@ -126,10 +141,10 @@ fn main() -> Result<()> {
|
|||||||
packed_data.len(),
|
packed_data.len(),
|
||||||
packed_data.len() as f32 * 100. / data.len() as f32
|
packed_data.len() as f32 * 100. / data.len() as f32
|
||||||
);
|
);
|
||||||
File::create(outfile)?.write_all(&packed_data)?;
|
File::create(outfile(OutFileType::Packed))?.write_all(&packed_data)?;
|
||||||
} else {
|
} else {
|
||||||
let mut data = vec![];
|
let mut data = vec![];
|
||||||
File::open(infile)?.read_to_end(&mut data)?;
|
File::open(&infile)?.read_to_end(&mut data)?;
|
||||||
if reverse {
|
if reverse {
|
||||||
data.reverse();
|
data.reverse();
|
||||||
}
|
}
|
||||||
@@ -138,7 +153,22 @@ fn main() -> Result<()> {
|
|||||||
if reverse {
|
if reverse {
|
||||||
unpacked_data.reverse();
|
unpacked_data.reverse();
|
||||||
}
|
}
|
||||||
File::create(outfile)?.write_all(&unpacked_data)?;
|
File::create(outfile(OutFileType::Unpacked))?.write_all(&unpacked_data)?;
|
||||||
|
}
|
||||||
|
if create_heatmap {
|
||||||
|
let mut heatmap = upkr::create_heatmap(&data, &config, max_unpacked_size)?;
|
||||||
|
if reverse {
|
||||||
|
heatmap.reverse();
|
||||||
|
}
|
||||||
|
let mut heatmap_bin = Vec::with_capacity(heatmap.len());
|
||||||
|
for i in 0..heatmap.len() {
|
||||||
|
let cost = (heatmap.cost(i).log2() * 8. + 64.)
|
||||||
|
.round()
|
||||||
|
.max(0.)
|
||||||
|
.min(127.) as u8;
|
||||||
|
heatmap_bin.push((cost << 1) | heatmap.is_literal(i) as u8);
|
||||||
|
}
|
||||||
|
File::create(outfile(OutFileType::Heatmap))?.write_all(&heatmap_bin)?;
|
||||||
}
|
}
|
||||||
if calculate_margin {
|
if calculate_margin {
|
||||||
println!("{}", upkr::calculate_margin(&data, &config)?);
|
println!("{}", upkr::calculate_margin(&data, &config)?);
|
||||||
@@ -152,11 +182,13 @@ fn print_help(exit_code: i32) -> ! {
|
|||||||
eprintln!("Usage:");
|
eprintln!("Usage:");
|
||||||
eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]");
|
eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]");
|
||||||
eprintln!(" upkr -u [config options] <infile> [<outfile>]");
|
eprintln!(" upkr -u [config options] <infile> [<outfile>]");
|
||||||
|
eprintln!(" upkr --heatmap [config options] <infile> [<outfile>]");
|
||||||
eprintln!(" upkr --margin [config options] <infile>");
|
eprintln!(" upkr --margin [config options] <infile>");
|
||||||
eprintln!();
|
eprintln!();
|
||||||
eprintln!(" -l, --level N compression level 0-9");
|
eprintln!(" -l, --level N compression level 0-9");
|
||||||
eprintln!(" -0, ..., -9 short form for setting compression level");
|
eprintln!(" -0, ..., -9 short form for setting compression level");
|
||||||
eprintln!(" -u, --unpack unpack infile");
|
eprintln!(" -u, --unpack unpack infile");
|
||||||
|
eprintln!(" --heatmap calculate heatmap from compressed file");
|
||||||
eprintln!(" --margin calculate margin for overlapped unpacking of a packed file");
|
eprintln!(" --margin calculate margin for overlapped unpacking of a packed file");
|
||||||
eprintln!();
|
eprintln!();
|
||||||
eprintln!("Version: {}", env!("CARGO_PKG_VERSION"));
|
eprintln!("Version: {}", env!("CARGO_PKG_VERSION"));
|
||||||
|
|||||||
24
src/rans.rs
24
src/rans.rs
@@ -148,6 +148,7 @@ impl EntropyCoder for CostCounter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
pub struct RansDecoder<'a> {
|
pub struct RansDecoder<'a> {
|
||||||
data: &'a [u8],
|
data: &'a [u8],
|
||||||
pos: usize,
|
pos: usize,
|
||||||
@@ -166,8 +167,8 @@ const PROB_MASK: u32 = ONE_PROB - 1;
|
|||||||
pub struct UnexpectedEOF;
|
pub struct UnexpectedEOF;
|
||||||
|
|
||||||
impl<'a> RansDecoder<'a> {
|
impl<'a> RansDecoder<'a> {
|
||||||
pub fn new(data: &'a [u8], config: &Config) -> RansDecoder<'a> {
|
pub fn new(data: &'a [u8], config: &Config) -> Result<RansDecoder<'a>, UnexpectedEOF> {
|
||||||
RansDecoder {
|
let mut decoder = RansDecoder {
|
||||||
data,
|
data,
|
||||||
pos: 0,
|
pos: 0,
|
||||||
state: 0,
|
state: 0,
|
||||||
@@ -176,7 +177,9 @@ impl<'a> RansDecoder<'a> {
|
|||||||
bits_left: 0,
|
bits_left: 0,
|
||||||
invert_bit_encoding: config.invert_bit_encoding,
|
invert_bit_encoding: config.invert_bit_encoding,
|
||||||
bitstream_is_big_endian: config.bitstream_is_big_endian,
|
bitstream_is_big_endian: config.bitstream_is_big_endian,
|
||||||
}
|
};
|
||||||
|
decoder.refill()?;
|
||||||
|
Ok(decoder)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn pos(&self) -> usize {
|
pub fn pos(&self) -> usize {
|
||||||
@@ -189,8 +192,7 @@ impl<'a> RansDecoder<'a> {
|
|||||||
Ok(bit)
|
Ok(bit)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn decode_bit(&mut self, prob: u16) -> Result<bool, UnexpectedEOF> {
|
fn refill(&mut self) -> Result<(), UnexpectedEOF> {
|
||||||
let prob = prob as u32;
|
|
||||||
if self.use_bitstream {
|
if self.use_bitstream {
|
||||||
while self.state < 32768 {
|
while self.state < 32768 {
|
||||||
if self.bits_left == 0 {
|
if self.bits_left == 0 {
|
||||||
@@ -219,6 +221,13 @@ impl<'a> RansDecoder<'a> {
|
|||||||
self.pos += 1;
|
self.pos += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn decode_bit(&mut self, prob: u16) -> Result<bool, UnexpectedEOF> {
|
||||||
|
self.refill()?;
|
||||||
|
|
||||||
|
let prob = prob as u32;
|
||||||
|
|
||||||
let bit = (self.state & PROB_MASK) < prob;
|
let bit = (self.state & PROB_MASK) < prob;
|
||||||
|
|
||||||
@@ -231,4 +240,9 @@ impl<'a> RansDecoder<'a> {
|
|||||||
|
|
||||||
Ok(bit ^ self.invert_bit_encoding)
|
Ok(bit ^ self.invert_bit_encoding)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn cost(&self, prev: &RansDecoder) -> f32 {
|
||||||
|
f32::log2(prev.state as f32) - f32::log2(self.state as f32)
|
||||||
|
+ (self.pos - prev.pos) as f32 * 8.
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user