mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 11:36:42 +01:00
implement encoding options
This commit is contained in:
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -117,7 +117,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "upkr"
|
||||
version = "0.1.0"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"cdivsufsort",
|
||||
|
||||
@@ -1,17 +1,16 @@
|
||||
use crate::lz;
|
||||
use crate::match_finder::MatchFinder;
|
||||
use crate::rans::RansCoder;
|
||||
use crate::ProgressCallback;
|
||||
use crate::{lz, Config};
|
||||
|
||||
pub fn pack(
|
||||
data: &[u8],
|
||||
use_bitstream: bool,
|
||||
parity_contexts: usize,
|
||||
config: &Config,
|
||||
mut progress_callback: Option<ProgressCallback>,
|
||||
) -> Vec<u8> {
|
||||
let mut match_finder = MatchFinder::new(data);
|
||||
let mut rans_coder = RansCoder::new(use_bitstream);
|
||||
let mut state = lz::CoderState::new(parity_contexts);
|
||||
let mut rans_coder = RansCoder::new(config.use_bitstream);
|
||||
let mut state = lz::CoderState::new(config.parity_contexts);
|
||||
|
||||
let mut pos = 0;
|
||||
while pos < data.len() {
|
||||
@@ -27,7 +26,7 @@ pub fn pack(
|
||||
offset: offset as u32,
|
||||
len: m.length as u32,
|
||||
}
|
||||
.encode(&mut rans_coder, &mut state);
|
||||
.encode(&mut rans_coder, &mut state, config);
|
||||
pos += m.length;
|
||||
encoded_match = true;
|
||||
}
|
||||
@@ -46,7 +45,7 @@ pub fn pack(
|
||||
offset: offset as u32,
|
||||
len: length as u32,
|
||||
}
|
||||
.encode(&mut rans_coder, &mut state);
|
||||
.encode(&mut rans_coder, &mut state, config);
|
||||
pos += length;
|
||||
encoded_match = true;
|
||||
}
|
||||
@@ -54,11 +53,11 @@ pub fn pack(
|
||||
}
|
||||
|
||||
if !encoded_match {
|
||||
lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state);
|
||||
lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state, config);
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
|
||||
lz::encode_eof(&mut rans_coder, &mut state);
|
||||
lz::encode_eof(&mut rans_coder, &mut state, config);
|
||||
rans_coder.finish()
|
||||
}
|
||||
|
||||
25
src/lib.rs
25
src/lib.rs
@@ -12,6 +12,11 @@ pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
|
||||
pub struct Config {
|
||||
pub use_bitstream: bool,
|
||||
pub parity_contexts: usize,
|
||||
|
||||
pub invert_probs: bool,
|
||||
pub is_match_bit: bool,
|
||||
pub new_offset_bit: bool,
|
||||
pub continue_value_bit: bool,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
@@ -19,6 +24,11 @@ impl Default for Config {
|
||||
Config {
|
||||
use_bitstream: false,
|
||||
parity_contexts: 1,
|
||||
|
||||
invert_probs: false,
|
||||
is_match_bit: true,
|
||||
new_offset_bit: true,
|
||||
continue_value_bit: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -30,20 +40,9 @@ pub fn pack(
|
||||
progress_callback: Option<ProgressCallback>,
|
||||
) -> Vec<u8> {
|
||||
if level == 0 {
|
||||
greedy_packer::pack(
|
||||
data,
|
||||
config.use_bitstream,
|
||||
config.parity_contexts,
|
||||
progress_callback,
|
||||
)
|
||||
greedy_packer::pack(data, &config, progress_callback)
|
||||
} else {
|
||||
parsing_packer::pack(
|
||||
data,
|
||||
level,
|
||||
config.use_bitstream,
|
||||
config.parity_contexts,
|
||||
progress_callback,
|
||||
)
|
||||
parsing_packer::pack(data, level, &config, progress_callback)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
56
src/lz.rs
56
src/lz.rs
@@ -1,5 +1,6 @@
|
||||
use crate::context_state::ContextState;
|
||||
use crate::rans::{EntropyCoder, RansDecoder};
|
||||
use crate::Config;
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub enum Op {
|
||||
@@ -8,11 +9,11 @@ pub enum Op {
|
||||
}
|
||||
|
||||
impl Op {
|
||||
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState) {
|
||||
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
|
||||
let literal_base = state.pos % state.parity_contexts * 256;
|
||||
match self {
|
||||
&Op::Literal(lit) => {
|
||||
encode_bit(coder, state, literal_base, false);
|
||||
encode_bit(coder, state, literal_base, !config.is_match_bit);
|
||||
let mut context_index = 1;
|
||||
for i in (0..8).rev() {
|
||||
let bit = (lit >> i) & 1 != 0;
|
||||
@@ -23,22 +24,28 @@ impl Op {
|
||||
state.pos += 1;
|
||||
}
|
||||
&Op::Match { offset, len } => {
|
||||
encode_bit(coder, state, literal_base, true);
|
||||
encode_bit(coder, state, literal_base, config.is_match_bit);
|
||||
if !state.prev_was_match {
|
||||
encode_bit(
|
||||
coder,
|
||||
state,
|
||||
256 * state.parity_contexts,
|
||||
offset != state.last_offset,
|
||||
(offset != state.last_offset) == config.new_offset_bit,
|
||||
);
|
||||
} else {
|
||||
assert!(offset != state.last_offset);
|
||||
}
|
||||
if offset != state.last_offset {
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 1, offset + 1);
|
||||
encode_length(
|
||||
coder,
|
||||
state,
|
||||
256 * state.parity_contexts + 1,
|
||||
offset + 1,
|
||||
config,
|
||||
);
|
||||
state.last_offset = offset;
|
||||
}
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 65, len);
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 65, len, config);
|
||||
state.prev_was_match = true;
|
||||
state.pos += len as usize;
|
||||
}
|
||||
@@ -46,12 +53,22 @@ impl Op {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState) {
|
||||
encode_bit(coder, state, state.pos % state.parity_contexts * 256, true);
|
||||
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
|
||||
encode_bit(
|
||||
coder,
|
||||
state,
|
||||
state.pos % state.parity_contexts * 256,
|
||||
config.is_match_bit,
|
||||
);
|
||||
if !state.prev_was_match {
|
||||
encode_bit(coder, state, 256 * state.parity_contexts, true);
|
||||
encode_bit(
|
||||
coder,
|
||||
state,
|
||||
256 * state.parity_contexts,
|
||||
config.new_offset_bit,
|
||||
);
|
||||
}
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 1, 1);
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
|
||||
}
|
||||
|
||||
fn encode_bit(
|
||||
@@ -68,17 +85,18 @@ fn encode_length(
|
||||
state: &mut CoderState,
|
||||
context_start: usize,
|
||||
mut value: u32,
|
||||
config: &Config,
|
||||
) {
|
||||
assert!(value >= 1);
|
||||
|
||||
let mut context_index = context_start;
|
||||
while value >= 2 {
|
||||
encode_bit(coder, state, context_index, true);
|
||||
encode_bit(coder, state, context_index, config.continue_value_bit);
|
||||
encode_bit(coder, state, context_index + 1, value & 1 != 0);
|
||||
context_index += 2;
|
||||
value >>= 1;
|
||||
}
|
||||
encode_bit(coder, state, context_index, false);
|
||||
encode_bit(coder, state, context_index, !config.continue_value_bit);
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
@@ -106,7 +124,7 @@ impl CoderState {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
|
||||
pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
|
||||
let mut decoder = RansDecoder::new(packed_data, config.use_bitstream);
|
||||
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64);
|
||||
let mut result = vec![];
|
||||
@@ -117,10 +135,13 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
|
||||
decoder: &mut RansDecoder,
|
||||
contexts: &mut ContextState,
|
||||
mut context_index: usize,
|
||||
config: &Config,
|
||||
) -> usize {
|
||||
let mut length = 0;
|
||||
let mut bit_pos = 0;
|
||||
while decoder.decode_with_context(&mut contexts.context_mut(context_index)) {
|
||||
while decoder.decode_with_context(&mut contexts.context_mut(context_index))
|
||||
== config.continue_value_bit
|
||||
{
|
||||
length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1))
|
||||
as usize)
|
||||
<< bit_pos;
|
||||
@@ -132,15 +153,19 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
|
||||
|
||||
loop {
|
||||
let literal_base = result.len() % config.parity_contexts * 256;
|
||||
if decoder.decode_with_context(&mut contexts.context_mut(literal_base)) {
|
||||
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))
|
||||
== config.is_match_bit
|
||||
{
|
||||
if prev_was_match
|
||||
|| decoder
|
||||
.decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))
|
||||
== config.new_offset_bit
|
||||
{
|
||||
offset = decode_length(
|
||||
&mut decoder,
|
||||
&mut contexts,
|
||||
256 * config.parity_contexts + 1,
|
||||
&config,
|
||||
) - 1;
|
||||
if offset == 0 {
|
||||
break;
|
||||
@@ -150,6 +175,7 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
|
||||
&mut decoder,
|
||||
&mut contexts,
|
||||
256 * config.parity_contexts + 65,
|
||||
&config,
|
||||
);
|
||||
for _ in 0..length {
|
||||
result.push(result[result.len() - offset]);
|
||||
|
||||
11
src/main.rs
11
src/main.rs
@@ -19,6 +19,11 @@ fn main() -> Result<()> {
|
||||
Short('b') | Long("bitstream") => config.use_bitstream = true,
|
||||
Short('p') | Long("parity") => config.parity_contexts = parser.value()?.parse()?,
|
||||
Short('r') | Long("reverse") => reverse = true,
|
||||
Long("invert-is-match-bit") => config.is_match_bit = false,
|
||||
Long("invert-new-offset-bit") => config.new_offset_bit = false,
|
||||
Long("invert-continue-value-bit") => config.continue_value_bit = false,
|
||||
Long("invert-probs") => config.invert_probs = true,
|
||||
|
||||
Short('u') | Long("unpack") => unpack = true,
|
||||
Short('l') | Long("level") => level = parser.value()?.parse()?,
|
||||
Short('h') | Long("help") => print_help(0),
|
||||
@@ -111,5 +116,11 @@ fn print_help(exit_code: i32) -> ! {
|
||||
eprintln!(" -b, --bitstream bitstream mode");
|
||||
eprintln!(" -p, --parity N use N (2/4) parity contexts");
|
||||
eprintln!(" -r, --reverse reverse input & output");
|
||||
eprintln!();
|
||||
eprintln!("Config options to tailor output to specific optimized unpackers:");
|
||||
eprintln!(" --invert-is-match-bit");
|
||||
eprintln!(" --invert-new-offset-bit");
|
||||
eprintln!(" --invert-continue-value-bit");
|
||||
eprintln!(" --invert-probs");
|
||||
process::exit(exit_code);
|
||||
}
|
||||
|
||||
@@ -9,27 +9,21 @@ use crate::{lz, ProgressCallback};
|
||||
pub fn pack(
|
||||
data: &[u8],
|
||||
level: u8,
|
||||
use_bitstream: bool,
|
||||
parity_contexts: usize,
|
||||
config: &crate::Config,
|
||||
progress_cb: Option<ProgressCallback>,
|
||||
) -> Vec<u8> {
|
||||
let mut parse = parse(
|
||||
data,
|
||||
Config::from_level(level),
|
||||
parity_contexts,
|
||||
progress_cb,
|
||||
);
|
||||
let mut parse = parse(data, Config::from_level(level), config, progress_cb);
|
||||
let mut ops = vec![];
|
||||
while let Some(link) = parse {
|
||||
ops.push(link.op);
|
||||
parse = link.prev.clone();
|
||||
}
|
||||
let mut state = lz::CoderState::new(parity_contexts);
|
||||
let mut coder = RansCoder::new(use_bitstream);
|
||||
let mut state = lz::CoderState::new(config.parity_contexts);
|
||||
let mut coder = RansCoder::new(config.use_bitstream);
|
||||
for op in ops.into_iter().rev() {
|
||||
op.encode(&mut coder, &mut state);
|
||||
op.encode(&mut coder, &mut state, config);
|
||||
}
|
||||
lz::encode_eof(&mut coder, &mut state);
|
||||
lz::encode_eof(&mut coder, &mut state, config);
|
||||
coder.finish()
|
||||
}
|
||||
|
||||
@@ -49,7 +43,7 @@ type Arrivals = HashMap<usize, Vec<Arrival>>;
|
||||
fn parse(
|
||||
data: &[u8],
|
||||
config: Config,
|
||||
parity_contexts: usize,
|
||||
encoding_config: &crate::Config,
|
||||
mut progress_cb: Option<ProgressCallback>,
|
||||
) -> Option<Rc<Parse>> {
|
||||
let mut match_finder = MatchFinder::new(data)
|
||||
@@ -114,6 +108,7 @@ fn parse(
|
||||
length: usize,
|
||||
arrival: &Arrival,
|
||||
max_arrivals: usize,
|
||||
config: &crate::Config,
|
||||
) {
|
||||
cost_counter.reset();
|
||||
let mut state = arrival.state.clone();
|
||||
@@ -121,7 +116,7 @@ fn parse(
|
||||
offset: offset as u32,
|
||||
len: length as u32,
|
||||
};
|
||||
op.encode(cost_counter, &mut state);
|
||||
op.encode(cost_counter, &mut state, config);
|
||||
add_arrival(
|
||||
arrivals,
|
||||
pos + length,
|
||||
@@ -141,7 +136,7 @@ fn parse(
|
||||
0,
|
||||
Arrival {
|
||||
parse: None,
|
||||
state: lz::CoderState::new(parity_contexts),
|
||||
state: lz::CoderState::new(encoding_config.parity_contexts),
|
||||
cost: 0.0,
|
||||
},
|
||||
max_arrivals,
|
||||
@@ -197,6 +192,7 @@ fn parse(
|
||||
m.length,
|
||||
&arrival,
|
||||
max_arrivals,
|
||||
encoding_config,
|
||||
);
|
||||
if m.length >= config.greedy_size {
|
||||
break 'arrival_loop;
|
||||
@@ -220,6 +216,7 @@ fn parse(
|
||||
length,
|
||||
&arrival,
|
||||
max_arrivals,
|
||||
encoding_config,
|
||||
);
|
||||
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
||||
if offset < near_matches.len() {
|
||||
@@ -240,6 +237,7 @@ fn parse(
|
||||
length,
|
||||
&arrival,
|
||||
max_arrivals,
|
||||
encoding_config,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -247,7 +245,7 @@ fn parse(
|
||||
cost_counter.reset();
|
||||
let mut state = arrival.state;
|
||||
let op = lz::Op::Literal(data[pos]);
|
||||
op.encode(cost_counter, &mut state);
|
||||
op.encode(cost_counter, &mut state, encoding_config);
|
||||
add_arrival(
|
||||
&mut arrivals,
|
||||
pos + 1,
|
||||
|
||||
Reference in New Issue
Block a user