diff --git a/Cargo.lock b/Cargo.lock index 444dbe5..74cdb0c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -117,7 +117,7 @@ dependencies = [ [[package]] name = "upkr" -version = "0.2.0" +version = "0.2.0-pre1" dependencies = [ "anyhow", "cdivsufsort", diff --git a/Cargo.toml b/Cargo.toml index 16c1c8d..469f25a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "upkr" -version = "0.2.0" +version = "0.2.0-pre1" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/src/context_state.rs b/src/context_state.rs index 062d74d..9911ade 100644 --- a/src/context_state.rs +++ b/src/context_state.rs @@ -1,4 +1,7 @@ -use crate::rans::{PROB_BITS, ONE_PROB}; +use crate::{ + rans::{ONE_PROB, PROB_BITS}, + Config, +}; const INIT_PROB: u16 = 1 << (PROB_BITS - 1); const UPDATE_RATE: u32 = 4; @@ -7,6 +10,8 @@ const UPDATE_ADD: u32 = 8; #[derive(Clone)] pub struct ContextState { contexts: Vec, + invert_bit_encoding: bool, + simplified_prob_update: bool, } pub struct Context<'a> { @@ -15,9 +20,11 @@ pub struct Context<'a> { } impl ContextState { - pub fn new(size: usize) -> ContextState { + pub fn new(size: usize, config: &Config) -> ContextState { ContextState { contexts: vec![INIT_PROB as u8; size], + invert_bit_encoding: config.invert_bit_encoding, + simplified_prob_update: config.simplified_prob_update, } } @@ -33,10 +40,21 @@ impl<'a> Context<'a> { pub fn update(&mut self, bit: bool) { let old = self.state.contexts[self.index]; - self.state.contexts[self.index] = if bit { - old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8 + + self.state.contexts[self.index] = if self.state.simplified_prob_update { + let offset = if bit ^ self.state.invert_bit_encoding { + ONE_PROB as i32 >> UPDATE_RATE + } else { + 0 + }; + + (offset + old as i32 - ((old as i32 + UPDATE_ADD as i32) >> UPDATE_RATE)) as u8 } else { - old - ((old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8 + if bit ^ self.state.invert_bit_encoding { + old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8 + } else { + old - ((old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8 + } }; } } diff --git a/src/greedy_packer.rs b/src/greedy_packer.rs index c903063..8794039 100644 --- a/src/greedy_packer.rs +++ b/src/greedy_packer.rs @@ -9,8 +9,8 @@ pub fn pack( mut progress_callback: Option, ) -> Vec { let mut match_finder = MatchFinder::new(data); - let mut rans_coder = RansCoder::new(config.use_bitstream); - let mut state = lz::CoderState::new(config.parity_contexts); + let mut rans_coder = RansCoder::new(config); + let mut state = lz::CoderState::new(config); let mut pos = 0; while pos < data.len() { diff --git a/src/lib.rs b/src/lib.rs index c7a11a3..52a19c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,10 +13,13 @@ pub struct Config { pub use_bitstream: bool, pub parity_contexts: usize, - pub invert_probs: bool, + pub invert_bit_encoding: bool, pub is_match_bit: bool, pub new_offset_bit: bool, pub continue_value_bit: bool, + + pub bitstream_is_big_endian: bool, + pub simplified_prob_update: bool, } impl Default for Config { @@ -25,10 +28,13 @@ impl Default for Config { use_bitstream: false, parity_contexts: 1, - invert_probs: false, + invert_bit_encoding: false, is_match_bit: true, new_offset_bit: true, continue_value_bit: true, + + bitstream_is_big_endian: false, + simplified_prob_update: false, } } } diff --git a/src/lz.rs b/src/lz.rs index f5806ee..f3b2770 100644 --- a/src/lz.rs +++ b/src/lz.rs @@ -102,20 +102,20 @@ fn encode_length( #[derive(Clone)] pub struct CoderState { contexts: ContextState, - parity_contexts: usize, last_offset: u32, prev_was_match: bool, pos: usize, + parity_contexts: usize, } impl CoderState { - pub fn new(parity_contexts: usize) -> CoderState { + pub fn new(config: &Config) -> CoderState { CoderState { - contexts: ContextState::new((1 + 255) * parity_contexts + 1 + 64 + 64), + contexts: ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, config), last_offset: 0, - parity_contexts, prev_was_match: false, pos: 0, + parity_contexts: config.parity_contexts, } } @@ -125,8 +125,8 @@ impl CoderState { } pub fn unpack(packed_data: &[u8], config: Config) -> Vec { - let mut decoder = RansDecoder::new(packed_data, config.use_bitstream); - let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64); + let mut decoder = RansDecoder::new(packed_data, &config); + let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config); let mut result = vec![]; let mut offset = 0; let mut prev_was_match = false; diff --git a/src/main.rs b/src/main.rs index 53c103e..ea6b20a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -22,7 +22,24 @@ fn main() -> Result<()> { Long("invert-is-match-bit") => config.is_match_bit = false, Long("invert-new-offset-bit") => config.new_offset_bit = false, Long("invert-continue-value-bit") => config.continue_value_bit = false, - Long("invert-probs") => config.invert_probs = true, + Long("invert-bit-encoding") => config.invert_bit_encoding = true, + Long("simplified-prob-update") => config.simplified_prob_update = true, + Long("big-endian-bitstream") => { + config.use_bitstream = true; + config.bitstream_is_big_endian = true; + } + + Long("z80") => { + config.use_bitstream = true; + config.bitstream_is_big_endian = true; + config.invert_bit_encoding = true; + config.simplified_prob_update = true; + } + Long("x86") => { + config.use_bitstream = true; + config.continue_value_bit = false; + config.is_match_bit = false; + } Short('u') | Long("unpack") => unpack = true, Short('l') | Long("level") => level = parser.value()?.parse()?, @@ -112,6 +129,10 @@ fn print_help(exit_code: i32) -> ! { eprintln!(" -l, --level N compression level 0-9"); eprintln!(" -u, --unpack unpack infile"); eprintln!(); + eprintln!("Config presets for specific unpackers:"); + eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update"); + eprintln!(" --x86 --bitstream --invert-is-match-bit --invert-continue-value-bit"); + eprintln!(); eprintln!("Config options (need to match when packing/unpacking):"); eprintln!(" -b, --bitstream bitstream mode"); eprintln!(" -p, --parity N use N (2/4) parity contexts"); @@ -121,6 +142,8 @@ fn print_help(exit_code: i32) -> ! { eprintln!(" --invert-is-match-bit"); eprintln!(" --invert-new-offset-bit"); eprintln!(" --invert-continue-value-bit"); - eprintln!(" --invert-probs"); + eprintln!(" --invert-bit-encoding"); + eprintln!(" --simplified-prob-update"); + eprintln!(" --big-endian-bitstream (implies --bitstream)"); process::exit(exit_code); } diff --git a/src/parsing_packer.rs b/src/parsing_packer.rs index 73cd63d..d4a7283 100644 --- a/src/parsing_packer.rs +++ b/src/parsing_packer.rs @@ -18,8 +18,8 @@ pub fn pack( ops.push(link.op); parse = link.prev.clone(); } - let mut state = lz::CoderState::new(config.parity_contexts); - let mut coder = RansCoder::new(config.use_bitstream); + let mut state = lz::CoderState::new(config); + let mut coder = RansCoder::new(config); for op in ops.into_iter().rev() { op.encode(&mut coder, &mut state, config); } @@ -136,13 +136,13 @@ fn parse( 0, Arrival { parse: None, - state: lz::CoderState::new(encoding_config.parity_contexts), + state: lz::CoderState::new(encoding_config), cost: 0.0, }, max_arrivals, ); - let cost_counter = &mut CostCounter::new(); + let cost_counter = &mut CostCounter::new(encoding_config); let mut best_per_offset = HashMap::new(); for pos in 0..data.len() { let match_length = |offset: usize| { diff --git a/src/rans.rs b/src/rans.rs index e81308a..ff8d770 100644 --- a/src/rans.rs +++ b/src/rans.rs @@ -1,4 +1,4 @@ -use crate::context_state::Context; +use crate::{context_state::Context, Config}; pub const PROB_BITS: u32 = 8; pub const ONE_PROB: u32 = 1 << PROB_BITS; @@ -15,20 +15,25 @@ pub trait EntropyCoder { pub struct RansCoder { bits: Vec, use_bitstream: bool, + bitstream_is_big_endian: bool, + invert_bit_encoding: bool, } impl EntropyCoder for RansCoder { fn encode_bit(&mut self, bit: bool, prob: u16) { assert!(prob < 32768); - self.bits.push(prob | ((bit as u16) << 15)); + self.bits + .push(prob | (((bit ^ self.invert_bit_encoding) as u16) << 15)); } } impl RansCoder { - pub fn new(use_bitstream: bool) -> RansCoder { + pub fn new(config: &Config) -> RansCoder { RansCoder { bits: Vec::new(), - use_bitstream, + use_bitstream: config.use_bitstream, + bitstream_is_big_endian: config.bitstream_is_big_endian, + invert_bit_encoding: config.invert_bit_encoding, } } @@ -38,18 +43,31 @@ impl RansCoder { let mut state = 1 << l_bits; let mut byte = 0u8; - let mut bit = 8; + let mut bit = if self.bitstream_is_big_endian { 0 } else { 8 }; let mut flush_state: Box = if self.use_bitstream { - Box::new(|state: &mut u32| { - bit -= 1; - byte |= ((*state & 1) as u8) << bit; - if bit == 0 { - buffer.push(byte); - byte = 0; - bit = 8; - } - *state >>= 1; - }) + if self.bitstream_is_big_endian { + Box::new(|state: &mut u32| { + byte |= ((*state & 1) as u8) << bit; + bit += 1; + if bit == 8 { + buffer.push(byte); + byte = 0; + bit = 0; + } + *state >>= 1; + }) + } else { + Box::new(|state: &mut u32| { + bit -= 1; + byte |= ((*state & 1) as u8) << bit; + if bit == 0 { + buffer.push(byte); + byte = 0; + bit = 8; + } + *state >>= 1; + }) + } } else { Box::new(|state: &mut u32| { buffer.push(*state as u8); @@ -91,10 +109,11 @@ impl RansCoder { pub struct CostCounter { cost: f64, log2_table: Vec, + invert_bit_encoding: bool, } impl CostCounter { - pub fn new() -> CostCounter { + pub fn new(config: &Config) -> CostCounter { let log2_table = (0..ONE_PROB) .map(|prob| { let inv_prob = ONE_PROB as f64 / prob as f64; @@ -104,6 +123,7 @@ impl CostCounter { CostCounter { cost: 0.0, log2_table, + invert_bit_encoding: config.invert_bit_encoding, } } @@ -118,7 +138,7 @@ impl CostCounter { impl EntropyCoder for CostCounter { fn encode_bit(&mut self, bit: bool, prob: u16) { - let prob = if bit { + let prob = if bit ^ self.invert_bit_encoding { prob as u32 } else { ONE_PROB - prob as u32 @@ -133,18 +153,22 @@ pub struct RansDecoder<'a> { use_bitstream: bool, byte: u8, bits_left: u8, + invert_bit_encoding: bool, + bitstream_is_big_endian: bool, } const PROB_MASK: u32 = ONE_PROB - 1; impl<'a> RansDecoder<'a> { - pub fn new(data: &'a [u8], use_bitstream: bool) -> RansDecoder<'a> { + pub fn new(data: &'a [u8], config: &Config) -> RansDecoder<'a> { RansDecoder { data, state: 0, - use_bitstream, + use_bitstream: config.use_bitstream, byte: 0, bits_left: 0, + invert_bit_encoding: config.invert_bit_encoding, + bitstream_is_big_endian: config.bitstream_is_big_endian, } } @@ -163,8 +187,13 @@ impl<'a> RansDecoder<'a> { self.data = &self.data[1..]; self.bits_left = 8; } - self.state = (self.state << 1) | (self.byte & 1) as u32; - self.byte >>= 1; + if self.bitstream_is_big_endian { + self.state = (self.state << 1) | (self.byte >> 7) as u32; + self.byte <<= 1; + } else { + self.state = (self.state << 1) | (self.byte & 1) as u32; + self.byte >>= 1; + } self.bits_left -= 1; } } else { @@ -183,6 +212,6 @@ impl<'a> RansDecoder<'a> { }; self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start; - bit + bit ^ self.invert_bit_encoding } }