4 Commits

8 changed files with 98 additions and 28 deletions

2
Cargo.lock generated
View File

@@ -117,7 +117,7 @@ dependencies = [
[[package]] [[package]]
name = "upkr" name = "upkr"
version = "0.2.0-pre1" version = "0.2.0-pre2"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"cdivsufsort", "cdivsufsort",

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "upkr" name = "upkr"
version = "0.2.0-pre1" version = "0.2.0-pre2"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@@ -21,7 +21,7 @@ pub fn pack(
if let Some(m) = match_finder.matches(pos).next() { if let Some(m) = match_finder.matches(pos).next() {
let max_offset = 1 << (m.length * 3 - 1).min(31); let max_offset = 1 << (m.length * 3 - 1).min(31);
let offset = pos - m.pos; let offset = pos - m.pos;
if offset < max_offset { if offset < max_offset && m.length >= config.min_length() {
lz::Op::Match { lz::Op::Match {
offset: offset as u32, offset: offset as u32,
len: m.length as u32, len: m.length as u32,
@@ -40,7 +40,7 @@ pub fn pack(
.zip(data[(pos - offset)..].iter()) .zip(data[(pos - offset)..].iter())
.take_while(|(a, b)| a == b) .take_while(|(a, b)| a == b)
.count(); .count();
if length > 0 { if length >= config.min_length() {
lz::Op::Match { lz::Op::Match {
offset: offset as u32, offset: offset as u32,
len: length as u32, len: length as u32,

View File

@@ -5,7 +5,7 @@ mod match_finder;
mod parsing_packer; mod parsing_packer;
mod rans; mod rans;
pub use lz::unpack; pub use lz::{calculate_margin, unpack};
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize); pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
@@ -20,6 +20,9 @@ pub struct Config {
pub bitstream_is_big_endian: bool, pub bitstream_is_big_endian: bool,
pub simplified_prob_update: bool, pub simplified_prob_update: bool,
pub no_repeated_offsets: bool,
pub eof_in_length: bool,
} }
impl Default for Config { impl Default for Config {
@@ -35,6 +38,19 @@ impl Default for Config {
bitstream_is_big_endian: false, bitstream_is_big_endian: false,
simplified_prob_update: false, simplified_prob_update: false,
no_repeated_offsets: false,
eof_in_length: false,
}
}
}
impl Config {
pub fn min_length(&self) -> usize {
if self.eof_in_length {
2
} else {
1
} }
} }
} }

View File

@@ -25,7 +25,7 @@ impl Op {
} }
&Op::Match { offset, len } => { &Op::Match { offset, len } => {
encode_bit(coder, state, literal_base, config.is_match_bit); encode_bit(coder, state, literal_base, config.is_match_bit);
if !state.prev_was_match { if !state.prev_was_match && !config.no_repeated_offsets {
encode_bit( encode_bit(
coder, coder,
state, state,
@@ -33,18 +33,19 @@ impl Op {
(offset != state.last_offset) == config.new_offset_bit, (offset != state.last_offset) == config.new_offset_bit,
); );
} else { } else {
assert!(offset != state.last_offset); assert!(offset != state.last_offset || config.no_repeated_offsets);
} }
if offset != state.last_offset { if offset != state.last_offset || config.no_repeated_offsets {
encode_length( encode_length(
coder, coder,
state, state,
256 * state.parity_contexts + 1, 256 * state.parity_contexts + 1,
offset + 1, offset + if config.eof_in_length { 0 } else { 1 },
config, config,
); );
state.last_offset = offset; state.last_offset = offset;
} }
assert!(!config.eof_in_length || len > 1);
encode_length(coder, state, 256 * state.parity_contexts + 65, len, config); encode_length(coder, state, 256 * state.parity_contexts + 65, len, config);
state.prev_was_match = true; state.prev_was_match = true;
state.pos += len as usize; state.pos += len as usize;
@@ -60,15 +61,20 @@ pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config:
state.pos % state.parity_contexts * 256, state.pos % state.parity_contexts * 256,
config.is_match_bit, config.is_match_bit,
); );
if !state.prev_was_match { if !state.prev_was_match && !config.no_repeated_offsets {
encode_bit( encode_bit(
coder, coder,
state, state,
256 * state.parity_contexts, 256 * state.parity_contexts,
config.new_offset_bit, config.new_offset_bit ^ config.eof_in_length,
); );
} }
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config); if !config.eof_in_length || config.no_repeated_offsets {
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
}
if config.eof_in_length {
encode_length(coder, state, 256 * state.parity_contexts + 65, 1, config);
}
} }
fn encode_bit( fn encode_bit(
@@ -124,12 +130,27 @@ impl CoderState {
} }
} }
pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> { pub fn unpack(packed_data: &[u8], config: &Config) -> Vec<u8> {
let mut result = vec![];
let _ = unpack_internal(Some(&mut result), packed_data, config);
result
}
pub fn calculate_margin(packed_data: &[u8], config: &Config) -> isize {
unpack_internal(None, packed_data, config)
}
pub fn unpack_internal(
mut result: Option<&mut Vec<u8>>,
packed_data: &[u8],
config: &Config,
) -> isize {
let mut decoder = RansDecoder::new(packed_data, &config); let mut decoder = RansDecoder::new(packed_data, &config);
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config); let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
let mut result = vec![];
let mut offset = 0; let mut offset = 0;
let mut position = 0usize;
let mut prev_was_match = false; let mut prev_was_match = false;
let mut margin = 0isize;
fn decode_length( fn decode_length(
decoder: &mut RansDecoder, decoder: &mut RansDecoder,
@@ -152,11 +173,13 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
} }
loop { loop {
let literal_base = result.len() % config.parity_contexts * 256; margin = margin.max(position as isize - decoder.pos() as isize);
let literal_base = position % config.parity_contexts * 256;
if decoder.decode_with_context(&mut contexts.context_mut(literal_base)) if decoder.decode_with_context(&mut contexts.context_mut(literal_base))
== config.is_match_bit == config.is_match_bit
{ {
if prev_was_match if config.no_repeated_offsets
|| prev_was_match
|| decoder || decoder
.decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts)) .decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))
== config.new_offset_bit == config.new_offset_bit
@@ -166,7 +189,7 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
&mut contexts, &mut contexts,
256 * config.parity_contexts + 1, 256 * config.parity_contexts + 1,
&config, &config,
) - 1; ) - if config.eof_in_length { 0 } else { 1 };
if offset == 0 { if offset == 0 {
break; break;
} }
@@ -177,9 +200,15 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
256 * config.parity_contexts + 65, 256 * config.parity_contexts + 65,
&config, &config,
); );
for _ in 0..length { if config.eof_in_length && length == 1 {
result.push(result[result.len() - offset]); break;
} }
if let Some(ref mut result) = result {
for _ in 0..length {
result.push(result[result.len() - offset]);
}
}
position += length;
prev_was_match = true; prev_was_match = true;
} else { } else {
let mut context_index = 1; let mut context_index = 1;
@@ -190,10 +219,13 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
context_index = (context_index << 1) | bit as usize; context_index = (context_index << 1) | bit as usize;
byte |= (bit as u8) << i; byte |= (bit as u8) << i;
} }
result.push(byte); if let Some(ref mut result) = result {
result.push(byte);
}
position += 1;
prev_was_match = false; prev_was_match = false;
} }
} }
result margin + decoder.pos() as isize - position as isize
} }

View File

@@ -8,6 +8,7 @@ fn main() -> Result<()> {
let mut config = upkr::Config::default(); let mut config = upkr::Config::default();
let mut reverse = false; let mut reverse = false;
let mut unpack = false; let mut unpack = false;
let mut calculate_margin = false;
let mut level = 2; let mut level = 2;
let mut infile: Option<PathBuf> = None; let mut infile: Option<PathBuf> = None;
let mut outfile: Option<PathBuf> = None; let mut outfile: Option<PathBuf> = None;
@@ -28,12 +29,15 @@ fn main() -> Result<()> {
config.use_bitstream = true; config.use_bitstream = true;
config.bitstream_is_big_endian = true; config.bitstream_is_big_endian = true;
} }
Long("no-repeated-offsets") => config.no_repeated_offsets = true,
Long("eof-in-length") => config.eof_in_length = true,
Long("z80") => { Long("z80") => {
config.use_bitstream = true; config.use_bitstream = true;
config.bitstream_is_big_endian = true; config.bitstream_is_big_endian = true;
config.invert_bit_encoding = true; config.invert_bit_encoding = true;
config.simplified_prob_update = true; config.simplified_prob_update = true;
level = 9;
} }
Long("x86") => { Long("x86") => {
config.use_bitstream = true; config.use_bitstream = true;
@@ -42,6 +46,7 @@ fn main() -> Result<()> {
} }
Short('u') | Long("unpack") => unpack = true, Short('u') | Long("unpack") => unpack = true,
Long("margin") => calculate_margin = true,
Short('l') | Long("level") => level = parser.value()?.parse()?, Short('l') | Long("level") => level = parser.value()?.parse()?,
Short('h') | Long("help") => print_help(0), Short('h') | Long("help") => print_help(0),
Value(val) if infile.is_none() => infile = Some(val.try_into()?), Value(val) if infile.is_none() => infile = Some(val.try_into()?),
@@ -75,7 +80,7 @@ fn main() -> Result<()> {
process::exit(1); process::exit(1);
} }
if !unpack { if !unpack && !calculate_margin {
let mut data = vec![]; let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?; File::open(infile)?.read_to_end(&mut data)?;
if reverse { if reverse {
@@ -111,11 +116,16 @@ fn main() -> Result<()> {
if reverse { if reverse {
data.reverse(); data.reverse();
} }
let mut unpacked_data = upkr::unpack(&data, config); if unpack {
if reverse { let mut unpacked_data = upkr::unpack(&data, &config);
unpacked_data.reverse(); if reverse {
unpacked_data.reverse();
}
File::create(outfile)?.write_all(&unpacked_data)?;
}
if calculate_margin {
println!("{}", upkr::calculate_margin(&data, &config));
} }
File::create(outfile)?.write_all(&unpacked_data)?;
} }
Ok(()) Ok(())
@@ -125,9 +135,11 @@ fn print_help(exit_code: i32) -> ! {
eprintln!("Usage:"); eprintln!("Usage:");
eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]"); eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]");
eprintln!(" upkr -u [config options] <infile> [<outfile>]"); eprintln!(" upkr -u [config options] <infile> [<outfile>]");
eprintln!(" upkr --margin [config options] <infile>");
eprintln!(); eprintln!();
eprintln!(" -l, --level N compression level 0-9"); eprintln!(" -l, --level N compression level 0-9");
eprintln!(" -u, --unpack unpack infile"); eprintln!(" -u, --unpack unpack infile");
eprintln!(" --margin calculate margin for overlapped unpacking of a packed file");
eprintln!(); eprintln!();
eprintln!("Config presets for specific unpackers:"); eprintln!("Config presets for specific unpackers:");
eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update"); eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update");
@@ -145,5 +157,6 @@ fn print_help(exit_code: i32) -> ! {
eprintln!(" --invert-bit-encoding"); eprintln!(" --invert-bit-encoding");
eprintln!(" --simplified-prob-update"); eprintln!(" --simplified-prob-update");
eprintln!(" --big-endian-bitstream (implies --bitstream)"); eprintln!(" --big-endian-bitstream (implies --bitstream)");
eprintln!(" --no-repeated-offsets");
process::exit(exit_code); process::exit(exit_code);
} }

View File

@@ -110,6 +110,9 @@ fn parse(
max_arrivals: usize, max_arrivals: usize,
config: &crate::Config, config: &crate::Config,
) { ) {
if length < config.min_length() {
return;
}
cost_counter.reset(); cost_counter.reset();
let mut state = arrival.state.clone(); let mut state = arrival.state.clone();
let op = lz::Op::Match { let op = lz::Op::Match {

View File

@@ -149,6 +149,7 @@ impl EntropyCoder for CostCounter {
pub struct RansDecoder<'a> { pub struct RansDecoder<'a> {
data: &'a [u8], data: &'a [u8],
pos: usize,
state: u32, state: u32,
use_bitstream: bool, use_bitstream: bool,
byte: u8, byte: u8,
@@ -163,6 +164,7 @@ impl<'a> RansDecoder<'a> {
pub fn new(data: &'a [u8], config: &Config) -> RansDecoder<'a> { pub fn new(data: &'a [u8], config: &Config) -> RansDecoder<'a> {
RansDecoder { RansDecoder {
data, data,
pos: 0,
state: 0, state: 0,
use_bitstream: config.use_bitstream, use_bitstream: config.use_bitstream,
byte: 0, byte: 0,
@@ -172,6 +174,10 @@ impl<'a> RansDecoder<'a> {
} }
} }
pub fn pos(&self) -> usize {
self.pos
}
pub fn decode_with_context(&mut self, context: &mut Context) -> bool { pub fn decode_with_context(&mut self, context: &mut Context) -> bool {
let bit = self.decode_bit(context.prob()); let bit = self.decode_bit(context.prob());
context.update(bit); context.update(bit);
@@ -183,8 +189,8 @@ impl<'a> RansDecoder<'a> {
if self.use_bitstream { if self.use_bitstream {
while self.state < 32768 { while self.state < 32768 {
if self.bits_left == 0 { if self.bits_left == 0 {
self.byte = self.data[0]; self.byte = self.data[self.pos];
self.data = &self.data[1..]; self.pos += 1;
self.bits_left = 8; self.bits_left = 8;
} }
if self.bitstream_is_big_endian { if self.bitstream_is_big_endian {