implement encoding options

This commit is contained in:
2022-09-24 20:52:39 +02:00
parent ced6cc8c32
commit 23872b3222
6 changed files with 87 additions and 54 deletions

2
Cargo.lock generated
View File

@@ -117,7 +117,7 @@ dependencies = [
[[package]]
name = "upkr"
version = "0.1.0"
version = "0.2.0"
dependencies = [
"anyhow",
"cdivsufsort",

View File

@@ -1,17 +1,16 @@
use crate::lz;
use crate::match_finder::MatchFinder;
use crate::rans::RansCoder;
use crate::ProgressCallback;
use crate::{lz, Config};
pub fn pack(
data: &[u8],
use_bitstream: bool,
parity_contexts: usize,
config: &Config,
mut progress_callback: Option<ProgressCallback>,
) -> Vec<u8> {
let mut match_finder = MatchFinder::new(data);
let mut rans_coder = RansCoder::new(use_bitstream);
let mut state = lz::CoderState::new(parity_contexts);
let mut rans_coder = RansCoder::new(config.use_bitstream);
let mut state = lz::CoderState::new(config.parity_contexts);
let mut pos = 0;
while pos < data.len() {
@@ -27,7 +26,7 @@ pub fn pack(
offset: offset as u32,
len: m.length as u32,
}
.encode(&mut rans_coder, &mut state);
.encode(&mut rans_coder, &mut state, config);
pos += m.length;
encoded_match = true;
}
@@ -46,7 +45,7 @@ pub fn pack(
offset: offset as u32,
len: length as u32,
}
.encode(&mut rans_coder, &mut state);
.encode(&mut rans_coder, &mut state, config);
pos += length;
encoded_match = true;
}
@@ -54,11 +53,11 @@ pub fn pack(
}
if !encoded_match {
lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state);
lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state, config);
pos += 1;
}
}
lz::encode_eof(&mut rans_coder, &mut state);
lz::encode_eof(&mut rans_coder, &mut state, config);
rans_coder.finish()
}

View File

@@ -12,6 +12,11 @@ pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
pub struct Config {
pub use_bitstream: bool,
pub parity_contexts: usize,
pub invert_probs: bool,
pub is_match_bit: bool,
pub new_offset_bit: bool,
pub continue_value_bit: bool,
}
impl Default for Config {
@@ -19,6 +24,11 @@ impl Default for Config {
Config {
use_bitstream: false,
parity_contexts: 1,
invert_probs: false,
is_match_bit: true,
new_offset_bit: true,
continue_value_bit: true,
}
}
}
@@ -30,20 +40,9 @@ pub fn pack(
progress_callback: Option<ProgressCallback>,
) -> Vec<u8> {
if level == 0 {
greedy_packer::pack(
data,
config.use_bitstream,
config.parity_contexts,
progress_callback,
)
greedy_packer::pack(data, &config, progress_callback)
} else {
parsing_packer::pack(
data,
level,
config.use_bitstream,
config.parity_contexts,
progress_callback,
)
parsing_packer::pack(data, level, &config, progress_callback)
}
}

View File

@@ -1,5 +1,6 @@
use crate::context_state::ContextState;
use crate::rans::{EntropyCoder, RansDecoder};
use crate::Config;
#[derive(Copy, Clone, Debug)]
pub enum Op {
@@ -8,11 +9,11 @@ pub enum Op {
}
impl Op {
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState) {
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
let literal_base = state.pos % state.parity_contexts * 256;
match self {
&Op::Literal(lit) => {
encode_bit(coder, state, literal_base, false);
encode_bit(coder, state, literal_base, !config.is_match_bit);
let mut context_index = 1;
for i in (0..8).rev() {
let bit = (lit >> i) & 1 != 0;
@@ -23,22 +24,28 @@ impl Op {
state.pos += 1;
}
&Op::Match { offset, len } => {
encode_bit(coder, state, literal_base, true);
encode_bit(coder, state, literal_base, config.is_match_bit);
if !state.prev_was_match {
encode_bit(
coder,
state,
256 * state.parity_contexts,
offset != state.last_offset,
(offset != state.last_offset) == config.new_offset_bit,
);
} else {
assert!(offset != state.last_offset);
}
if offset != state.last_offset {
encode_length(coder, state, 256 * state.parity_contexts + 1, offset + 1);
encode_length(
coder,
state,
256 * state.parity_contexts + 1,
offset + 1,
config,
);
state.last_offset = offset;
}
encode_length(coder, state, 256 * state.parity_contexts + 65, len);
encode_length(coder, state, 256 * state.parity_contexts + 65, len, config);
state.prev_was_match = true;
state.pos += len as usize;
}
@@ -46,12 +53,22 @@ impl Op {
}
}
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState) {
encode_bit(coder, state, state.pos % state.parity_contexts * 256, true);
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
encode_bit(
coder,
state,
state.pos % state.parity_contexts * 256,
config.is_match_bit,
);
if !state.prev_was_match {
encode_bit(coder, state, 256 * state.parity_contexts, true);
encode_bit(
coder,
state,
256 * state.parity_contexts,
config.new_offset_bit,
);
}
encode_length(coder, state, 256 * state.parity_contexts + 1, 1);
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
}
fn encode_bit(
@@ -68,17 +85,18 @@ fn encode_length(
state: &mut CoderState,
context_start: usize,
mut value: u32,
config: &Config,
) {
assert!(value >= 1);
let mut context_index = context_start;
while value >= 2 {
encode_bit(coder, state, context_index, true);
encode_bit(coder, state, context_index, config.continue_value_bit);
encode_bit(coder, state, context_index + 1, value & 1 != 0);
context_index += 2;
value >>= 1;
}
encode_bit(coder, state, context_index, false);
encode_bit(coder, state, context_index, !config.continue_value_bit);
}
#[derive(Clone)]
@@ -106,7 +124,7 @@ impl CoderState {
}
}
pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
let mut decoder = RansDecoder::new(packed_data, config.use_bitstream);
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64);
let mut result = vec![];
@@ -117,10 +135,13 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
decoder: &mut RansDecoder,
contexts: &mut ContextState,
mut context_index: usize,
config: &Config,
) -> usize {
let mut length = 0;
let mut bit_pos = 0;
while decoder.decode_with_context(&mut contexts.context_mut(context_index)) {
while decoder.decode_with_context(&mut contexts.context_mut(context_index))
== config.continue_value_bit
{
length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1))
as usize)
<< bit_pos;
@@ -132,15 +153,19 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
loop {
let literal_base = result.len() % config.parity_contexts * 256;
if decoder.decode_with_context(&mut contexts.context_mut(literal_base)) {
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))
== config.is_match_bit
{
if prev_was_match
|| decoder
.decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))
== config.new_offset_bit
{
offset = decode_length(
&mut decoder,
&mut contexts,
256 * config.parity_contexts + 1,
&config,
) - 1;
if offset == 0 {
break;
@@ -150,6 +175,7 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
&mut decoder,
&mut contexts,
256 * config.parity_contexts + 65,
&config,
);
for _ in 0..length {
result.push(result[result.len() - offset]);

View File

@@ -19,6 +19,11 @@ fn main() -> Result<()> {
Short('b') | Long("bitstream") => config.use_bitstream = true,
Short('p') | Long("parity") => config.parity_contexts = parser.value()?.parse()?,
Short('r') | Long("reverse") => reverse = true,
Long("invert-is-match-bit") => config.is_match_bit = false,
Long("invert-new-offset-bit") => config.new_offset_bit = false,
Long("invert-continue-value-bit") => config.continue_value_bit = false,
Long("invert-probs") => config.invert_probs = true,
Short('u') | Long("unpack") => unpack = true,
Short('l') | Long("level") => level = parser.value()?.parse()?,
Short('h') | Long("help") => print_help(0),
@@ -111,5 +116,11 @@ fn print_help(exit_code: i32) -> ! {
eprintln!(" -b, --bitstream bitstream mode");
eprintln!(" -p, --parity N use N (2/4) parity contexts");
eprintln!(" -r, --reverse reverse input & output");
eprintln!();
eprintln!("Config options to tailor output to specific optimized unpackers:");
eprintln!(" --invert-is-match-bit");
eprintln!(" --invert-new-offset-bit");
eprintln!(" --invert-continue-value-bit");
eprintln!(" --invert-probs");
process::exit(exit_code);
}

View File

@@ -9,27 +9,21 @@ use crate::{lz, ProgressCallback};
pub fn pack(
data: &[u8],
level: u8,
use_bitstream: bool,
parity_contexts: usize,
config: &crate::Config,
progress_cb: Option<ProgressCallback>,
) -> Vec<u8> {
let mut parse = parse(
data,
Config::from_level(level),
parity_contexts,
progress_cb,
);
let mut parse = parse(data, Config::from_level(level), config, progress_cb);
let mut ops = vec![];
while let Some(link) = parse {
ops.push(link.op);
parse = link.prev.clone();
}
let mut state = lz::CoderState::new(parity_contexts);
let mut coder = RansCoder::new(use_bitstream);
let mut state = lz::CoderState::new(config.parity_contexts);
let mut coder = RansCoder::new(config.use_bitstream);
for op in ops.into_iter().rev() {
op.encode(&mut coder, &mut state);
op.encode(&mut coder, &mut state, config);
}
lz::encode_eof(&mut coder, &mut state);
lz::encode_eof(&mut coder, &mut state, config);
coder.finish()
}
@@ -49,7 +43,7 @@ type Arrivals = HashMap<usize, Vec<Arrival>>;
fn parse(
data: &[u8],
config: Config,
parity_contexts: usize,
encoding_config: &crate::Config,
mut progress_cb: Option<ProgressCallback>,
) -> Option<Rc<Parse>> {
let mut match_finder = MatchFinder::new(data)
@@ -114,6 +108,7 @@ fn parse(
length: usize,
arrival: &Arrival,
max_arrivals: usize,
config: &crate::Config,
) {
cost_counter.reset();
let mut state = arrival.state.clone();
@@ -121,7 +116,7 @@ fn parse(
offset: offset as u32,
len: length as u32,
};
op.encode(cost_counter, &mut state);
op.encode(cost_counter, &mut state, config);
add_arrival(
arrivals,
pos + length,
@@ -141,7 +136,7 @@ fn parse(
0,
Arrival {
parse: None,
state: lz::CoderState::new(parity_contexts),
state: lz::CoderState::new(encoding_config.parity_contexts),
cost: 0.0,
},
max_arrivals,
@@ -197,6 +192,7 @@ fn parse(
m.length,
&arrival,
max_arrivals,
encoding_config,
);
if m.length >= config.greedy_size {
break 'arrival_loop;
@@ -220,6 +216,7 @@ fn parse(
length,
&arrival,
max_arrivals,
encoding_config,
);
found_last_offset |= offset as u32 == arrival.state.last_offset();
if offset < near_matches.len() {
@@ -240,6 +237,7 @@ fn parse(
length,
&arrival,
max_arrivals,
encoding_config,
);
}
}
@@ -247,7 +245,7 @@ fn parse(
cost_counter.reset();
let mut state = arrival.state;
let op = lz::Op::Literal(data[pos]);
op.encode(cost_counter, &mut state);
op.encode(cost_counter, &mut state, encoding_config);
add_arrival(
&mut arrivals,
pos + 1,