From 23872b32228ee5558f16f034b580476d9987c4f6 Mon Sep 17 00:00:00 2001 From: Dennis Ranke Date: Sat, 24 Sep 2022 20:52:39 +0200 Subject: [PATCH] implement encoding options --- Cargo.lock | 2 +- src/greedy_packer.rs | 17 +++++++------ src/lib.rs | 25 ++++++++++--------- src/lz.rs | 56 +++++++++++++++++++++++++++++++------------ src/main.rs | 11 +++++++++ src/parsing_packer.rs | 30 +++++++++++------------ 6 files changed, 87 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2d64cea..444dbe5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -117,7 +117,7 @@ dependencies = [ [[package]] name = "upkr" -version = "0.1.0" +version = "0.2.0" dependencies = [ "anyhow", "cdivsufsort", diff --git a/src/greedy_packer.rs b/src/greedy_packer.rs index a0f160a..c903063 100644 --- a/src/greedy_packer.rs +++ b/src/greedy_packer.rs @@ -1,17 +1,16 @@ -use crate::lz; use crate::match_finder::MatchFinder; use crate::rans::RansCoder; use crate::ProgressCallback; +use crate::{lz, Config}; pub fn pack( data: &[u8], - use_bitstream: bool, - parity_contexts: usize, + config: &Config, mut progress_callback: Option, ) -> Vec { let mut match_finder = MatchFinder::new(data); - let mut rans_coder = RansCoder::new(use_bitstream); - let mut state = lz::CoderState::new(parity_contexts); + let mut rans_coder = RansCoder::new(config.use_bitstream); + let mut state = lz::CoderState::new(config.parity_contexts); let mut pos = 0; while pos < data.len() { @@ -27,7 +26,7 @@ pub fn pack( offset: offset as u32, len: m.length as u32, } - .encode(&mut rans_coder, &mut state); + .encode(&mut rans_coder, &mut state, config); pos += m.length; encoded_match = true; } @@ -46,7 +45,7 @@ pub fn pack( offset: offset as u32, len: length as u32, } - .encode(&mut rans_coder, &mut state); + .encode(&mut rans_coder, &mut state, config); pos += length; encoded_match = true; } @@ -54,11 +53,11 @@ pub fn pack( } if !encoded_match { - lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state); + lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state, config); pos += 1; } } - lz::encode_eof(&mut rans_coder, &mut state); + lz::encode_eof(&mut rans_coder, &mut state, config); rans_coder.finish() } diff --git a/src/lib.rs b/src/lib.rs index b56d57c..c7a11a3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,11 @@ pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize); pub struct Config { pub use_bitstream: bool, pub parity_contexts: usize, + + pub invert_probs: bool, + pub is_match_bit: bool, + pub new_offset_bit: bool, + pub continue_value_bit: bool, } impl Default for Config { @@ -19,6 +24,11 @@ impl Default for Config { Config { use_bitstream: false, parity_contexts: 1, + + invert_probs: false, + is_match_bit: true, + new_offset_bit: true, + continue_value_bit: true, } } } @@ -30,20 +40,9 @@ pub fn pack( progress_callback: Option, ) -> Vec { if level == 0 { - greedy_packer::pack( - data, - config.use_bitstream, - config.parity_contexts, - progress_callback, - ) + greedy_packer::pack(data, &config, progress_callback) } else { - parsing_packer::pack( - data, - level, - config.use_bitstream, - config.parity_contexts, - progress_callback, - ) + parsing_packer::pack(data, level, &config, progress_callback) } } diff --git a/src/lz.rs b/src/lz.rs index dd43aaa..f5806ee 100644 --- a/src/lz.rs +++ b/src/lz.rs @@ -1,5 +1,6 @@ use crate::context_state::ContextState; use crate::rans::{EntropyCoder, RansDecoder}; +use crate::Config; #[derive(Copy, Clone, Debug)] pub enum Op { @@ -8,11 +9,11 @@ pub enum Op { } impl Op { - pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState) { + pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) { let literal_base = state.pos % state.parity_contexts * 256; match self { &Op::Literal(lit) => { - encode_bit(coder, state, literal_base, false); + encode_bit(coder, state, literal_base, !config.is_match_bit); let mut context_index = 1; for i in (0..8).rev() { let bit = (lit >> i) & 1 != 0; @@ -23,22 +24,28 @@ impl Op { state.pos += 1; } &Op::Match { offset, len } => { - encode_bit(coder, state, literal_base, true); + encode_bit(coder, state, literal_base, config.is_match_bit); if !state.prev_was_match { encode_bit( coder, state, 256 * state.parity_contexts, - offset != state.last_offset, + (offset != state.last_offset) == config.new_offset_bit, ); } else { assert!(offset != state.last_offset); } if offset != state.last_offset { - encode_length(coder, state, 256 * state.parity_contexts + 1, offset + 1); + encode_length( + coder, + state, + 256 * state.parity_contexts + 1, + offset + 1, + config, + ); state.last_offset = offset; } - encode_length(coder, state, 256 * state.parity_contexts + 65, len); + encode_length(coder, state, 256 * state.parity_contexts + 65, len, config); state.prev_was_match = true; state.pos += len as usize; } @@ -46,12 +53,22 @@ impl Op { } } -pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState) { - encode_bit(coder, state, state.pos % state.parity_contexts * 256, true); +pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) { + encode_bit( + coder, + state, + state.pos % state.parity_contexts * 256, + config.is_match_bit, + ); if !state.prev_was_match { - encode_bit(coder, state, 256 * state.parity_contexts, true); + encode_bit( + coder, + state, + 256 * state.parity_contexts, + config.new_offset_bit, + ); } - encode_length(coder, state, 256 * state.parity_contexts + 1, 1); + encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config); } fn encode_bit( @@ -68,17 +85,18 @@ fn encode_length( state: &mut CoderState, context_start: usize, mut value: u32, + config: &Config, ) { assert!(value >= 1); let mut context_index = context_start; while value >= 2 { - encode_bit(coder, state, context_index, true); + encode_bit(coder, state, context_index, config.continue_value_bit); encode_bit(coder, state, context_index + 1, value & 1 != 0); context_index += 2; value >>= 1; } - encode_bit(coder, state, context_index, false); + encode_bit(coder, state, context_index, !config.continue_value_bit); } #[derive(Clone)] @@ -106,7 +124,7 @@ impl CoderState { } } -pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec { +pub fn unpack(packed_data: &[u8], config: Config) -> Vec { let mut decoder = RansDecoder::new(packed_data, config.use_bitstream); let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64); let mut result = vec![]; @@ -117,10 +135,13 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec { decoder: &mut RansDecoder, contexts: &mut ContextState, mut context_index: usize, + config: &Config, ) -> usize { let mut length = 0; let mut bit_pos = 0; - while decoder.decode_with_context(&mut contexts.context_mut(context_index)) { + while decoder.decode_with_context(&mut contexts.context_mut(context_index)) + == config.continue_value_bit + { length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1)) as usize) << bit_pos; @@ -132,15 +153,19 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec { loop { let literal_base = result.len() % config.parity_contexts * 256; - if decoder.decode_with_context(&mut contexts.context_mut(literal_base)) { + if decoder.decode_with_context(&mut contexts.context_mut(literal_base)) + == config.is_match_bit + { if prev_was_match || decoder .decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts)) + == config.new_offset_bit { offset = decode_length( &mut decoder, &mut contexts, 256 * config.parity_contexts + 1, + &config, ) - 1; if offset == 0 { break; @@ -150,6 +175,7 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec { &mut decoder, &mut contexts, 256 * config.parity_contexts + 65, + &config, ); for _ in 0..length { result.push(result[result.len() - offset]); diff --git a/src/main.rs b/src/main.rs index eb30b25..53c103e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -19,6 +19,11 @@ fn main() -> Result<()> { Short('b') | Long("bitstream") => config.use_bitstream = true, Short('p') | Long("parity") => config.parity_contexts = parser.value()?.parse()?, Short('r') | Long("reverse") => reverse = true, + Long("invert-is-match-bit") => config.is_match_bit = false, + Long("invert-new-offset-bit") => config.new_offset_bit = false, + Long("invert-continue-value-bit") => config.continue_value_bit = false, + Long("invert-probs") => config.invert_probs = true, + Short('u') | Long("unpack") => unpack = true, Short('l') | Long("level") => level = parser.value()?.parse()?, Short('h') | Long("help") => print_help(0), @@ -111,5 +116,11 @@ fn print_help(exit_code: i32) -> ! { eprintln!(" -b, --bitstream bitstream mode"); eprintln!(" -p, --parity N use N (2/4) parity contexts"); eprintln!(" -r, --reverse reverse input & output"); + eprintln!(); + eprintln!("Config options to tailor output to specific optimized unpackers:"); + eprintln!(" --invert-is-match-bit"); + eprintln!(" --invert-new-offset-bit"); + eprintln!(" --invert-continue-value-bit"); + eprintln!(" --invert-probs"); process::exit(exit_code); } diff --git a/src/parsing_packer.rs b/src/parsing_packer.rs index f110fe0..73cd63d 100644 --- a/src/parsing_packer.rs +++ b/src/parsing_packer.rs @@ -9,27 +9,21 @@ use crate::{lz, ProgressCallback}; pub fn pack( data: &[u8], level: u8, - use_bitstream: bool, - parity_contexts: usize, + config: &crate::Config, progress_cb: Option, ) -> Vec { - let mut parse = parse( - data, - Config::from_level(level), - parity_contexts, - progress_cb, - ); + let mut parse = parse(data, Config::from_level(level), config, progress_cb); let mut ops = vec![]; while let Some(link) = parse { ops.push(link.op); parse = link.prev.clone(); } - let mut state = lz::CoderState::new(parity_contexts); - let mut coder = RansCoder::new(use_bitstream); + let mut state = lz::CoderState::new(config.parity_contexts); + let mut coder = RansCoder::new(config.use_bitstream); for op in ops.into_iter().rev() { - op.encode(&mut coder, &mut state); + op.encode(&mut coder, &mut state, config); } - lz::encode_eof(&mut coder, &mut state); + lz::encode_eof(&mut coder, &mut state, config); coder.finish() } @@ -49,7 +43,7 @@ type Arrivals = HashMap>; fn parse( data: &[u8], config: Config, - parity_contexts: usize, + encoding_config: &crate::Config, mut progress_cb: Option, ) -> Option> { let mut match_finder = MatchFinder::new(data) @@ -114,6 +108,7 @@ fn parse( length: usize, arrival: &Arrival, max_arrivals: usize, + config: &crate::Config, ) { cost_counter.reset(); let mut state = arrival.state.clone(); @@ -121,7 +116,7 @@ fn parse( offset: offset as u32, len: length as u32, }; - op.encode(cost_counter, &mut state); + op.encode(cost_counter, &mut state, config); add_arrival( arrivals, pos + length, @@ -141,7 +136,7 @@ fn parse( 0, Arrival { parse: None, - state: lz::CoderState::new(parity_contexts), + state: lz::CoderState::new(encoding_config.parity_contexts), cost: 0.0, }, max_arrivals, @@ -197,6 +192,7 @@ fn parse( m.length, &arrival, max_arrivals, + encoding_config, ); if m.length >= config.greedy_size { break 'arrival_loop; @@ -220,6 +216,7 @@ fn parse( length, &arrival, max_arrivals, + encoding_config, ); found_last_offset |= offset as u32 == arrival.state.last_offset(); if offset < near_matches.len() { @@ -240,6 +237,7 @@ fn parse( length, &arrival, max_arrivals, + encoding_config, ); } } @@ -247,7 +245,7 @@ fn parse( cost_counter.reset(); let mut state = arrival.state; let op = lz::Op::Literal(data[pos]); - op.encode(cost_counter, &mut state); + op.encode(cost_counter, &mut state, encoding_config); add_arrival( &mut arrivals, pos + 1,