implement encoding options

This commit is contained in:
2022-09-24 20:52:39 +02:00
parent ced6cc8c32
commit 23872b3222
6 changed files with 87 additions and 54 deletions

2
Cargo.lock generated
View File

@@ -117,7 +117,7 @@ dependencies = [
[[package]] [[package]]
name = "upkr" name = "upkr"
version = "0.1.0" version = "0.2.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"cdivsufsort", "cdivsufsort",

View File

@@ -1,17 +1,16 @@
use crate::lz;
use crate::match_finder::MatchFinder; use crate::match_finder::MatchFinder;
use crate::rans::RansCoder; use crate::rans::RansCoder;
use crate::ProgressCallback; use crate::ProgressCallback;
use crate::{lz, Config};
pub fn pack( pub fn pack(
data: &[u8], data: &[u8],
use_bitstream: bool, config: &Config,
parity_contexts: usize,
mut progress_callback: Option<ProgressCallback>, mut progress_callback: Option<ProgressCallback>,
) -> Vec<u8> { ) -> Vec<u8> {
let mut match_finder = MatchFinder::new(data); let mut match_finder = MatchFinder::new(data);
let mut rans_coder = RansCoder::new(use_bitstream); let mut rans_coder = RansCoder::new(config.use_bitstream);
let mut state = lz::CoderState::new(parity_contexts); let mut state = lz::CoderState::new(config.parity_contexts);
let mut pos = 0; let mut pos = 0;
while pos < data.len() { while pos < data.len() {
@@ -27,7 +26,7 @@ pub fn pack(
offset: offset as u32, offset: offset as u32,
len: m.length as u32, len: m.length as u32,
} }
.encode(&mut rans_coder, &mut state); .encode(&mut rans_coder, &mut state, config);
pos += m.length; pos += m.length;
encoded_match = true; encoded_match = true;
} }
@@ -46,7 +45,7 @@ pub fn pack(
offset: offset as u32, offset: offset as u32,
len: length as u32, len: length as u32,
} }
.encode(&mut rans_coder, &mut state); .encode(&mut rans_coder, &mut state, config);
pos += length; pos += length;
encoded_match = true; encoded_match = true;
} }
@@ -54,11 +53,11 @@ pub fn pack(
} }
if !encoded_match { if !encoded_match {
lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state); lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state, config);
pos += 1; pos += 1;
} }
} }
lz::encode_eof(&mut rans_coder, &mut state); lz::encode_eof(&mut rans_coder, &mut state, config);
rans_coder.finish() rans_coder.finish()
} }

View File

@@ -12,6 +12,11 @@ pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
pub struct Config { pub struct Config {
pub use_bitstream: bool, pub use_bitstream: bool,
pub parity_contexts: usize, pub parity_contexts: usize,
pub invert_probs: bool,
pub is_match_bit: bool,
pub new_offset_bit: bool,
pub continue_value_bit: bool,
} }
impl Default for Config { impl Default for Config {
@@ -19,6 +24,11 @@ impl Default for Config {
Config { Config {
use_bitstream: false, use_bitstream: false,
parity_contexts: 1, parity_contexts: 1,
invert_probs: false,
is_match_bit: true,
new_offset_bit: true,
continue_value_bit: true,
} }
} }
} }
@@ -30,20 +40,9 @@ pub fn pack(
progress_callback: Option<ProgressCallback>, progress_callback: Option<ProgressCallback>,
) -> Vec<u8> { ) -> Vec<u8> {
if level == 0 { if level == 0 {
greedy_packer::pack( greedy_packer::pack(data, &config, progress_callback)
data,
config.use_bitstream,
config.parity_contexts,
progress_callback,
)
} else { } else {
parsing_packer::pack( parsing_packer::pack(data, level, &config, progress_callback)
data,
level,
config.use_bitstream,
config.parity_contexts,
progress_callback,
)
} }
} }

View File

@@ -1,5 +1,6 @@
use crate::context_state::ContextState; use crate::context_state::ContextState;
use crate::rans::{EntropyCoder, RansDecoder}; use crate::rans::{EntropyCoder, RansDecoder};
use crate::Config;
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
pub enum Op { pub enum Op {
@@ -8,11 +9,11 @@ pub enum Op {
} }
impl Op { impl Op {
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState) { pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
let literal_base = state.pos % state.parity_contexts * 256; let literal_base = state.pos % state.parity_contexts * 256;
match self { match self {
&Op::Literal(lit) => { &Op::Literal(lit) => {
encode_bit(coder, state, literal_base, false); encode_bit(coder, state, literal_base, !config.is_match_bit);
let mut context_index = 1; let mut context_index = 1;
for i in (0..8).rev() { for i in (0..8).rev() {
let bit = (lit >> i) & 1 != 0; let bit = (lit >> i) & 1 != 0;
@@ -23,22 +24,28 @@ impl Op {
state.pos += 1; state.pos += 1;
} }
&Op::Match { offset, len } => { &Op::Match { offset, len } => {
encode_bit(coder, state, literal_base, true); encode_bit(coder, state, literal_base, config.is_match_bit);
if !state.prev_was_match { if !state.prev_was_match {
encode_bit( encode_bit(
coder, coder,
state, state,
256 * state.parity_contexts, 256 * state.parity_contexts,
offset != state.last_offset, (offset != state.last_offset) == config.new_offset_bit,
); );
} else { } else {
assert!(offset != state.last_offset); assert!(offset != state.last_offset);
} }
if offset != state.last_offset { if offset != state.last_offset {
encode_length(coder, state, 256 * state.parity_contexts + 1, offset + 1); encode_length(
coder,
state,
256 * state.parity_contexts + 1,
offset + 1,
config,
);
state.last_offset = offset; state.last_offset = offset;
} }
encode_length(coder, state, 256 * state.parity_contexts + 65, len); encode_length(coder, state, 256 * state.parity_contexts + 65, len, config);
state.prev_was_match = true; state.prev_was_match = true;
state.pos += len as usize; state.pos += len as usize;
} }
@@ -46,12 +53,22 @@ impl Op {
} }
} }
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState) { pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
encode_bit(coder, state, state.pos % state.parity_contexts * 256, true); encode_bit(
coder,
state,
state.pos % state.parity_contexts * 256,
config.is_match_bit,
);
if !state.prev_was_match { if !state.prev_was_match {
encode_bit(coder, state, 256 * state.parity_contexts, true); encode_bit(
coder,
state,
256 * state.parity_contexts,
config.new_offset_bit,
);
} }
encode_length(coder, state, 256 * state.parity_contexts + 1, 1); encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
} }
fn encode_bit( fn encode_bit(
@@ -68,17 +85,18 @@ fn encode_length(
state: &mut CoderState, state: &mut CoderState,
context_start: usize, context_start: usize,
mut value: u32, mut value: u32,
config: &Config,
) { ) {
assert!(value >= 1); assert!(value >= 1);
let mut context_index = context_start; let mut context_index = context_start;
while value >= 2 { while value >= 2 {
encode_bit(coder, state, context_index, true); encode_bit(coder, state, context_index, config.continue_value_bit);
encode_bit(coder, state, context_index + 1, value & 1 != 0); encode_bit(coder, state, context_index + 1, value & 1 != 0);
context_index += 2; context_index += 2;
value >>= 1; value >>= 1;
} }
encode_bit(coder, state, context_index, false); encode_bit(coder, state, context_index, !config.continue_value_bit);
} }
#[derive(Clone)] #[derive(Clone)]
@@ -106,7 +124,7 @@ impl CoderState {
} }
} }
pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> { pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
let mut decoder = RansDecoder::new(packed_data, config.use_bitstream); let mut decoder = RansDecoder::new(packed_data, config.use_bitstream);
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64); let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64);
let mut result = vec![]; let mut result = vec![];
@@ -117,10 +135,13 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
decoder: &mut RansDecoder, decoder: &mut RansDecoder,
contexts: &mut ContextState, contexts: &mut ContextState,
mut context_index: usize, mut context_index: usize,
config: &Config,
) -> usize { ) -> usize {
let mut length = 0; let mut length = 0;
let mut bit_pos = 0; let mut bit_pos = 0;
while decoder.decode_with_context(&mut contexts.context_mut(context_index)) { while decoder.decode_with_context(&mut contexts.context_mut(context_index))
== config.continue_value_bit
{
length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1)) length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1))
as usize) as usize)
<< bit_pos; << bit_pos;
@@ -132,15 +153,19 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
loop { loop {
let literal_base = result.len() % config.parity_contexts * 256; let literal_base = result.len() % config.parity_contexts * 256;
if decoder.decode_with_context(&mut contexts.context_mut(literal_base)) { if decoder.decode_with_context(&mut contexts.context_mut(literal_base))
== config.is_match_bit
{
if prev_was_match if prev_was_match
|| decoder || decoder
.decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts)) .decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))
== config.new_offset_bit
{ {
offset = decode_length( offset = decode_length(
&mut decoder, &mut decoder,
&mut contexts, &mut contexts,
256 * config.parity_contexts + 1, 256 * config.parity_contexts + 1,
&config,
) - 1; ) - 1;
if offset == 0 { if offset == 0 {
break; break;
@@ -150,6 +175,7 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
&mut decoder, &mut decoder,
&mut contexts, &mut contexts,
256 * config.parity_contexts + 65, 256 * config.parity_contexts + 65,
&config,
); );
for _ in 0..length { for _ in 0..length {
result.push(result[result.len() - offset]); result.push(result[result.len() - offset]);

View File

@@ -19,6 +19,11 @@ fn main() -> Result<()> {
Short('b') | Long("bitstream") => config.use_bitstream = true, Short('b') | Long("bitstream") => config.use_bitstream = true,
Short('p') | Long("parity") => config.parity_contexts = parser.value()?.parse()?, Short('p') | Long("parity") => config.parity_contexts = parser.value()?.parse()?,
Short('r') | Long("reverse") => reverse = true, Short('r') | Long("reverse") => reverse = true,
Long("invert-is-match-bit") => config.is_match_bit = false,
Long("invert-new-offset-bit") => config.new_offset_bit = false,
Long("invert-continue-value-bit") => config.continue_value_bit = false,
Long("invert-probs") => config.invert_probs = true,
Short('u') | Long("unpack") => unpack = true, Short('u') | Long("unpack") => unpack = true,
Short('l') | Long("level") => level = parser.value()?.parse()?, Short('l') | Long("level") => level = parser.value()?.parse()?,
Short('h') | Long("help") => print_help(0), Short('h') | Long("help") => print_help(0),
@@ -111,5 +116,11 @@ fn print_help(exit_code: i32) -> ! {
eprintln!(" -b, --bitstream bitstream mode"); eprintln!(" -b, --bitstream bitstream mode");
eprintln!(" -p, --parity N use N (2/4) parity contexts"); eprintln!(" -p, --parity N use N (2/4) parity contexts");
eprintln!(" -r, --reverse reverse input & output"); eprintln!(" -r, --reverse reverse input & output");
eprintln!();
eprintln!("Config options to tailor output to specific optimized unpackers:");
eprintln!(" --invert-is-match-bit");
eprintln!(" --invert-new-offset-bit");
eprintln!(" --invert-continue-value-bit");
eprintln!(" --invert-probs");
process::exit(exit_code); process::exit(exit_code);
} }

View File

@@ -9,27 +9,21 @@ use crate::{lz, ProgressCallback};
pub fn pack( pub fn pack(
data: &[u8], data: &[u8],
level: u8, level: u8,
use_bitstream: bool, config: &crate::Config,
parity_contexts: usize,
progress_cb: Option<ProgressCallback>, progress_cb: Option<ProgressCallback>,
) -> Vec<u8> { ) -> Vec<u8> {
let mut parse = parse( let mut parse = parse(data, Config::from_level(level), config, progress_cb);
data,
Config::from_level(level),
parity_contexts,
progress_cb,
);
let mut ops = vec![]; let mut ops = vec![];
while let Some(link) = parse { while let Some(link) = parse {
ops.push(link.op); ops.push(link.op);
parse = link.prev.clone(); parse = link.prev.clone();
} }
let mut state = lz::CoderState::new(parity_contexts); let mut state = lz::CoderState::new(config.parity_contexts);
let mut coder = RansCoder::new(use_bitstream); let mut coder = RansCoder::new(config.use_bitstream);
for op in ops.into_iter().rev() { for op in ops.into_iter().rev() {
op.encode(&mut coder, &mut state); op.encode(&mut coder, &mut state, config);
} }
lz::encode_eof(&mut coder, &mut state); lz::encode_eof(&mut coder, &mut state, config);
coder.finish() coder.finish()
} }
@@ -49,7 +43,7 @@ type Arrivals = HashMap<usize, Vec<Arrival>>;
fn parse( fn parse(
data: &[u8], data: &[u8],
config: Config, config: Config,
parity_contexts: usize, encoding_config: &crate::Config,
mut progress_cb: Option<ProgressCallback>, mut progress_cb: Option<ProgressCallback>,
) -> Option<Rc<Parse>> { ) -> Option<Rc<Parse>> {
let mut match_finder = MatchFinder::new(data) let mut match_finder = MatchFinder::new(data)
@@ -114,6 +108,7 @@ fn parse(
length: usize, length: usize,
arrival: &Arrival, arrival: &Arrival,
max_arrivals: usize, max_arrivals: usize,
config: &crate::Config,
) { ) {
cost_counter.reset(); cost_counter.reset();
let mut state = arrival.state.clone(); let mut state = arrival.state.clone();
@@ -121,7 +116,7 @@ fn parse(
offset: offset as u32, offset: offset as u32,
len: length as u32, len: length as u32,
}; };
op.encode(cost_counter, &mut state); op.encode(cost_counter, &mut state, config);
add_arrival( add_arrival(
arrivals, arrivals,
pos + length, pos + length,
@@ -141,7 +136,7 @@ fn parse(
0, 0,
Arrival { Arrival {
parse: None, parse: None,
state: lz::CoderState::new(parity_contexts), state: lz::CoderState::new(encoding_config.parity_contexts),
cost: 0.0, cost: 0.0,
}, },
max_arrivals, max_arrivals,
@@ -197,6 +192,7 @@ fn parse(
m.length, m.length,
&arrival, &arrival,
max_arrivals, max_arrivals,
encoding_config,
); );
if m.length >= config.greedy_size { if m.length >= config.greedy_size {
break 'arrival_loop; break 'arrival_loop;
@@ -220,6 +216,7 @@ fn parse(
length, length,
&arrival, &arrival,
max_arrivals, max_arrivals,
encoding_config,
); );
found_last_offset |= offset as u32 == arrival.state.last_offset(); found_last_offset |= offset as u32 == arrival.state.last_offset();
if offset < near_matches.len() { if offset < near_matches.len() {
@@ -240,6 +237,7 @@ fn parse(
length, length,
&arrival, &arrival,
max_arrivals, max_arrivals,
encoding_config,
); );
} }
} }
@@ -247,7 +245,7 @@ fn parse(
cost_counter.reset(); cost_counter.reset();
let mut state = arrival.state; let mut state = arrival.state;
let op = lz::Op::Literal(data[pos]); let op = lz::Op::Literal(data[pos]);
op.encode(cost_counter, &mut state); op.encode(cost_counter, &mut state, encoding_config);
add_arrival( add_arrival(
&mut arrivals, &mut arrivals,
pos + 1, pos + 1,