mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 11:36:42 +01:00
implement encoding options
This commit is contained in:
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -117,7 +117,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "upkr"
|
name = "upkr"
|
||||||
version = "0.1.0"
|
version = "0.2.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"cdivsufsort",
|
"cdivsufsort",
|
||||||
|
|||||||
@@ -1,17 +1,16 @@
|
|||||||
use crate::lz;
|
|
||||||
use crate::match_finder::MatchFinder;
|
use crate::match_finder::MatchFinder;
|
||||||
use crate::rans::RansCoder;
|
use crate::rans::RansCoder;
|
||||||
use crate::ProgressCallback;
|
use crate::ProgressCallback;
|
||||||
|
use crate::{lz, Config};
|
||||||
|
|
||||||
pub fn pack(
|
pub fn pack(
|
||||||
data: &[u8],
|
data: &[u8],
|
||||||
use_bitstream: bool,
|
config: &Config,
|
||||||
parity_contexts: usize,
|
|
||||||
mut progress_callback: Option<ProgressCallback>,
|
mut progress_callback: Option<ProgressCallback>,
|
||||||
) -> Vec<u8> {
|
) -> Vec<u8> {
|
||||||
let mut match_finder = MatchFinder::new(data);
|
let mut match_finder = MatchFinder::new(data);
|
||||||
let mut rans_coder = RansCoder::new(use_bitstream);
|
let mut rans_coder = RansCoder::new(config.use_bitstream);
|
||||||
let mut state = lz::CoderState::new(parity_contexts);
|
let mut state = lz::CoderState::new(config.parity_contexts);
|
||||||
|
|
||||||
let mut pos = 0;
|
let mut pos = 0;
|
||||||
while pos < data.len() {
|
while pos < data.len() {
|
||||||
@@ -27,7 +26,7 @@ pub fn pack(
|
|||||||
offset: offset as u32,
|
offset: offset as u32,
|
||||||
len: m.length as u32,
|
len: m.length as u32,
|
||||||
}
|
}
|
||||||
.encode(&mut rans_coder, &mut state);
|
.encode(&mut rans_coder, &mut state, config);
|
||||||
pos += m.length;
|
pos += m.length;
|
||||||
encoded_match = true;
|
encoded_match = true;
|
||||||
}
|
}
|
||||||
@@ -46,7 +45,7 @@ pub fn pack(
|
|||||||
offset: offset as u32,
|
offset: offset as u32,
|
||||||
len: length as u32,
|
len: length as u32,
|
||||||
}
|
}
|
||||||
.encode(&mut rans_coder, &mut state);
|
.encode(&mut rans_coder, &mut state, config);
|
||||||
pos += length;
|
pos += length;
|
||||||
encoded_match = true;
|
encoded_match = true;
|
||||||
}
|
}
|
||||||
@@ -54,11 +53,11 @@ pub fn pack(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !encoded_match {
|
if !encoded_match {
|
||||||
lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state);
|
lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state, config);
|
||||||
pos += 1;
|
pos += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lz::encode_eof(&mut rans_coder, &mut state);
|
lz::encode_eof(&mut rans_coder, &mut state, config);
|
||||||
rans_coder.finish()
|
rans_coder.finish()
|
||||||
}
|
}
|
||||||
|
|||||||
25
src/lib.rs
25
src/lib.rs
@@ -12,6 +12,11 @@ pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
|
|||||||
pub struct Config {
|
pub struct Config {
|
||||||
pub use_bitstream: bool,
|
pub use_bitstream: bool,
|
||||||
pub parity_contexts: usize,
|
pub parity_contexts: usize,
|
||||||
|
|
||||||
|
pub invert_probs: bool,
|
||||||
|
pub is_match_bit: bool,
|
||||||
|
pub new_offset_bit: bool,
|
||||||
|
pub continue_value_bit: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Config {
|
impl Default for Config {
|
||||||
@@ -19,6 +24,11 @@ impl Default for Config {
|
|||||||
Config {
|
Config {
|
||||||
use_bitstream: false,
|
use_bitstream: false,
|
||||||
parity_contexts: 1,
|
parity_contexts: 1,
|
||||||
|
|
||||||
|
invert_probs: false,
|
||||||
|
is_match_bit: true,
|
||||||
|
new_offset_bit: true,
|
||||||
|
continue_value_bit: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -30,20 +40,9 @@ pub fn pack(
|
|||||||
progress_callback: Option<ProgressCallback>,
|
progress_callback: Option<ProgressCallback>,
|
||||||
) -> Vec<u8> {
|
) -> Vec<u8> {
|
||||||
if level == 0 {
|
if level == 0 {
|
||||||
greedy_packer::pack(
|
greedy_packer::pack(data, &config, progress_callback)
|
||||||
data,
|
|
||||||
config.use_bitstream,
|
|
||||||
config.parity_contexts,
|
|
||||||
progress_callback,
|
|
||||||
)
|
|
||||||
} else {
|
} else {
|
||||||
parsing_packer::pack(
|
parsing_packer::pack(data, level, &config, progress_callback)
|
||||||
data,
|
|
||||||
level,
|
|
||||||
config.use_bitstream,
|
|
||||||
config.parity_contexts,
|
|
||||||
progress_callback,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
56
src/lz.rs
56
src/lz.rs
@@ -1,5 +1,6 @@
|
|||||||
use crate::context_state::ContextState;
|
use crate::context_state::ContextState;
|
||||||
use crate::rans::{EntropyCoder, RansDecoder};
|
use crate::rans::{EntropyCoder, RansDecoder};
|
||||||
|
use crate::Config;
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug)]
|
#[derive(Copy, Clone, Debug)]
|
||||||
pub enum Op {
|
pub enum Op {
|
||||||
@@ -8,11 +9,11 @@ pub enum Op {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Op {
|
impl Op {
|
||||||
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState) {
|
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
|
||||||
let literal_base = state.pos % state.parity_contexts * 256;
|
let literal_base = state.pos % state.parity_contexts * 256;
|
||||||
match self {
|
match self {
|
||||||
&Op::Literal(lit) => {
|
&Op::Literal(lit) => {
|
||||||
encode_bit(coder, state, literal_base, false);
|
encode_bit(coder, state, literal_base, !config.is_match_bit);
|
||||||
let mut context_index = 1;
|
let mut context_index = 1;
|
||||||
for i in (0..8).rev() {
|
for i in (0..8).rev() {
|
||||||
let bit = (lit >> i) & 1 != 0;
|
let bit = (lit >> i) & 1 != 0;
|
||||||
@@ -23,22 +24,28 @@ impl Op {
|
|||||||
state.pos += 1;
|
state.pos += 1;
|
||||||
}
|
}
|
||||||
&Op::Match { offset, len } => {
|
&Op::Match { offset, len } => {
|
||||||
encode_bit(coder, state, literal_base, true);
|
encode_bit(coder, state, literal_base, config.is_match_bit);
|
||||||
if !state.prev_was_match {
|
if !state.prev_was_match {
|
||||||
encode_bit(
|
encode_bit(
|
||||||
coder,
|
coder,
|
||||||
state,
|
state,
|
||||||
256 * state.parity_contexts,
|
256 * state.parity_contexts,
|
||||||
offset != state.last_offset,
|
(offset != state.last_offset) == config.new_offset_bit,
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
assert!(offset != state.last_offset);
|
assert!(offset != state.last_offset);
|
||||||
}
|
}
|
||||||
if offset != state.last_offset {
|
if offset != state.last_offset {
|
||||||
encode_length(coder, state, 256 * state.parity_contexts + 1, offset + 1);
|
encode_length(
|
||||||
|
coder,
|
||||||
|
state,
|
||||||
|
256 * state.parity_contexts + 1,
|
||||||
|
offset + 1,
|
||||||
|
config,
|
||||||
|
);
|
||||||
state.last_offset = offset;
|
state.last_offset = offset;
|
||||||
}
|
}
|
||||||
encode_length(coder, state, 256 * state.parity_contexts + 65, len);
|
encode_length(coder, state, 256 * state.parity_contexts + 65, len, config);
|
||||||
state.prev_was_match = true;
|
state.prev_was_match = true;
|
||||||
state.pos += len as usize;
|
state.pos += len as usize;
|
||||||
}
|
}
|
||||||
@@ -46,12 +53,22 @@ impl Op {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState) {
|
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
|
||||||
encode_bit(coder, state, state.pos % state.parity_contexts * 256, true);
|
encode_bit(
|
||||||
|
coder,
|
||||||
|
state,
|
||||||
|
state.pos % state.parity_contexts * 256,
|
||||||
|
config.is_match_bit,
|
||||||
|
);
|
||||||
if !state.prev_was_match {
|
if !state.prev_was_match {
|
||||||
encode_bit(coder, state, 256 * state.parity_contexts, true);
|
encode_bit(
|
||||||
|
coder,
|
||||||
|
state,
|
||||||
|
256 * state.parity_contexts,
|
||||||
|
config.new_offset_bit,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
encode_length(coder, state, 256 * state.parity_contexts + 1, 1);
|
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode_bit(
|
fn encode_bit(
|
||||||
@@ -68,17 +85,18 @@ fn encode_length(
|
|||||||
state: &mut CoderState,
|
state: &mut CoderState,
|
||||||
context_start: usize,
|
context_start: usize,
|
||||||
mut value: u32,
|
mut value: u32,
|
||||||
|
config: &Config,
|
||||||
) {
|
) {
|
||||||
assert!(value >= 1);
|
assert!(value >= 1);
|
||||||
|
|
||||||
let mut context_index = context_start;
|
let mut context_index = context_start;
|
||||||
while value >= 2 {
|
while value >= 2 {
|
||||||
encode_bit(coder, state, context_index, true);
|
encode_bit(coder, state, context_index, config.continue_value_bit);
|
||||||
encode_bit(coder, state, context_index + 1, value & 1 != 0);
|
encode_bit(coder, state, context_index + 1, value & 1 != 0);
|
||||||
context_index += 2;
|
context_index += 2;
|
||||||
value >>= 1;
|
value >>= 1;
|
||||||
}
|
}
|
||||||
encode_bit(coder, state, context_index, false);
|
encode_bit(coder, state, context_index, !config.continue_value_bit);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@@ -106,7 +124,7 @@ impl CoderState {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
|
pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
|
||||||
let mut decoder = RansDecoder::new(packed_data, config.use_bitstream);
|
let mut decoder = RansDecoder::new(packed_data, config.use_bitstream);
|
||||||
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64);
|
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64);
|
||||||
let mut result = vec![];
|
let mut result = vec![];
|
||||||
@@ -117,10 +135,13 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
|
|||||||
decoder: &mut RansDecoder,
|
decoder: &mut RansDecoder,
|
||||||
contexts: &mut ContextState,
|
contexts: &mut ContextState,
|
||||||
mut context_index: usize,
|
mut context_index: usize,
|
||||||
|
config: &Config,
|
||||||
) -> usize {
|
) -> usize {
|
||||||
let mut length = 0;
|
let mut length = 0;
|
||||||
let mut bit_pos = 0;
|
let mut bit_pos = 0;
|
||||||
while decoder.decode_with_context(&mut contexts.context_mut(context_index)) {
|
while decoder.decode_with_context(&mut contexts.context_mut(context_index))
|
||||||
|
== config.continue_value_bit
|
||||||
|
{
|
||||||
length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1))
|
length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1))
|
||||||
as usize)
|
as usize)
|
||||||
<< bit_pos;
|
<< bit_pos;
|
||||||
@@ -132,15 +153,19 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
|
|||||||
|
|
||||||
loop {
|
loop {
|
||||||
let literal_base = result.len() % config.parity_contexts * 256;
|
let literal_base = result.len() % config.parity_contexts * 256;
|
||||||
if decoder.decode_with_context(&mut contexts.context_mut(literal_base)) {
|
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))
|
||||||
|
== config.is_match_bit
|
||||||
|
{
|
||||||
if prev_was_match
|
if prev_was_match
|
||||||
|| decoder
|
|| decoder
|
||||||
.decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))
|
.decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))
|
||||||
|
== config.new_offset_bit
|
||||||
{
|
{
|
||||||
offset = decode_length(
|
offset = decode_length(
|
||||||
&mut decoder,
|
&mut decoder,
|
||||||
&mut contexts,
|
&mut contexts,
|
||||||
256 * config.parity_contexts + 1,
|
256 * config.parity_contexts + 1,
|
||||||
|
&config,
|
||||||
) - 1;
|
) - 1;
|
||||||
if offset == 0 {
|
if offset == 0 {
|
||||||
break;
|
break;
|
||||||
@@ -150,6 +175,7 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
|
|||||||
&mut decoder,
|
&mut decoder,
|
||||||
&mut contexts,
|
&mut contexts,
|
||||||
256 * config.parity_contexts + 65,
|
256 * config.parity_contexts + 65,
|
||||||
|
&config,
|
||||||
);
|
);
|
||||||
for _ in 0..length {
|
for _ in 0..length {
|
||||||
result.push(result[result.len() - offset]);
|
result.push(result[result.len() - offset]);
|
||||||
|
|||||||
11
src/main.rs
11
src/main.rs
@@ -19,6 +19,11 @@ fn main() -> Result<()> {
|
|||||||
Short('b') | Long("bitstream") => config.use_bitstream = true,
|
Short('b') | Long("bitstream") => config.use_bitstream = true,
|
||||||
Short('p') | Long("parity") => config.parity_contexts = parser.value()?.parse()?,
|
Short('p') | Long("parity") => config.parity_contexts = parser.value()?.parse()?,
|
||||||
Short('r') | Long("reverse") => reverse = true,
|
Short('r') | Long("reverse") => reverse = true,
|
||||||
|
Long("invert-is-match-bit") => config.is_match_bit = false,
|
||||||
|
Long("invert-new-offset-bit") => config.new_offset_bit = false,
|
||||||
|
Long("invert-continue-value-bit") => config.continue_value_bit = false,
|
||||||
|
Long("invert-probs") => config.invert_probs = true,
|
||||||
|
|
||||||
Short('u') | Long("unpack") => unpack = true,
|
Short('u') | Long("unpack") => unpack = true,
|
||||||
Short('l') | Long("level") => level = parser.value()?.parse()?,
|
Short('l') | Long("level") => level = parser.value()?.parse()?,
|
||||||
Short('h') | Long("help") => print_help(0),
|
Short('h') | Long("help") => print_help(0),
|
||||||
@@ -111,5 +116,11 @@ fn print_help(exit_code: i32) -> ! {
|
|||||||
eprintln!(" -b, --bitstream bitstream mode");
|
eprintln!(" -b, --bitstream bitstream mode");
|
||||||
eprintln!(" -p, --parity N use N (2/4) parity contexts");
|
eprintln!(" -p, --parity N use N (2/4) parity contexts");
|
||||||
eprintln!(" -r, --reverse reverse input & output");
|
eprintln!(" -r, --reverse reverse input & output");
|
||||||
|
eprintln!();
|
||||||
|
eprintln!("Config options to tailor output to specific optimized unpackers:");
|
||||||
|
eprintln!(" --invert-is-match-bit");
|
||||||
|
eprintln!(" --invert-new-offset-bit");
|
||||||
|
eprintln!(" --invert-continue-value-bit");
|
||||||
|
eprintln!(" --invert-probs");
|
||||||
process::exit(exit_code);
|
process::exit(exit_code);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,27 +9,21 @@ use crate::{lz, ProgressCallback};
|
|||||||
pub fn pack(
|
pub fn pack(
|
||||||
data: &[u8],
|
data: &[u8],
|
||||||
level: u8,
|
level: u8,
|
||||||
use_bitstream: bool,
|
config: &crate::Config,
|
||||||
parity_contexts: usize,
|
|
||||||
progress_cb: Option<ProgressCallback>,
|
progress_cb: Option<ProgressCallback>,
|
||||||
) -> Vec<u8> {
|
) -> Vec<u8> {
|
||||||
let mut parse = parse(
|
let mut parse = parse(data, Config::from_level(level), config, progress_cb);
|
||||||
data,
|
|
||||||
Config::from_level(level),
|
|
||||||
parity_contexts,
|
|
||||||
progress_cb,
|
|
||||||
);
|
|
||||||
let mut ops = vec![];
|
let mut ops = vec![];
|
||||||
while let Some(link) = parse {
|
while let Some(link) = parse {
|
||||||
ops.push(link.op);
|
ops.push(link.op);
|
||||||
parse = link.prev.clone();
|
parse = link.prev.clone();
|
||||||
}
|
}
|
||||||
let mut state = lz::CoderState::new(parity_contexts);
|
let mut state = lz::CoderState::new(config.parity_contexts);
|
||||||
let mut coder = RansCoder::new(use_bitstream);
|
let mut coder = RansCoder::new(config.use_bitstream);
|
||||||
for op in ops.into_iter().rev() {
|
for op in ops.into_iter().rev() {
|
||||||
op.encode(&mut coder, &mut state);
|
op.encode(&mut coder, &mut state, config);
|
||||||
}
|
}
|
||||||
lz::encode_eof(&mut coder, &mut state);
|
lz::encode_eof(&mut coder, &mut state, config);
|
||||||
coder.finish()
|
coder.finish()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -49,7 +43,7 @@ type Arrivals = HashMap<usize, Vec<Arrival>>;
|
|||||||
fn parse(
|
fn parse(
|
||||||
data: &[u8],
|
data: &[u8],
|
||||||
config: Config,
|
config: Config,
|
||||||
parity_contexts: usize,
|
encoding_config: &crate::Config,
|
||||||
mut progress_cb: Option<ProgressCallback>,
|
mut progress_cb: Option<ProgressCallback>,
|
||||||
) -> Option<Rc<Parse>> {
|
) -> Option<Rc<Parse>> {
|
||||||
let mut match_finder = MatchFinder::new(data)
|
let mut match_finder = MatchFinder::new(data)
|
||||||
@@ -114,6 +108,7 @@ fn parse(
|
|||||||
length: usize,
|
length: usize,
|
||||||
arrival: &Arrival,
|
arrival: &Arrival,
|
||||||
max_arrivals: usize,
|
max_arrivals: usize,
|
||||||
|
config: &crate::Config,
|
||||||
) {
|
) {
|
||||||
cost_counter.reset();
|
cost_counter.reset();
|
||||||
let mut state = arrival.state.clone();
|
let mut state = arrival.state.clone();
|
||||||
@@ -121,7 +116,7 @@ fn parse(
|
|||||||
offset: offset as u32,
|
offset: offset as u32,
|
||||||
len: length as u32,
|
len: length as u32,
|
||||||
};
|
};
|
||||||
op.encode(cost_counter, &mut state);
|
op.encode(cost_counter, &mut state, config);
|
||||||
add_arrival(
|
add_arrival(
|
||||||
arrivals,
|
arrivals,
|
||||||
pos + length,
|
pos + length,
|
||||||
@@ -141,7 +136,7 @@ fn parse(
|
|||||||
0,
|
0,
|
||||||
Arrival {
|
Arrival {
|
||||||
parse: None,
|
parse: None,
|
||||||
state: lz::CoderState::new(parity_contexts),
|
state: lz::CoderState::new(encoding_config.parity_contexts),
|
||||||
cost: 0.0,
|
cost: 0.0,
|
||||||
},
|
},
|
||||||
max_arrivals,
|
max_arrivals,
|
||||||
@@ -197,6 +192,7 @@ fn parse(
|
|||||||
m.length,
|
m.length,
|
||||||
&arrival,
|
&arrival,
|
||||||
max_arrivals,
|
max_arrivals,
|
||||||
|
encoding_config,
|
||||||
);
|
);
|
||||||
if m.length >= config.greedy_size {
|
if m.length >= config.greedy_size {
|
||||||
break 'arrival_loop;
|
break 'arrival_loop;
|
||||||
@@ -220,6 +216,7 @@ fn parse(
|
|||||||
length,
|
length,
|
||||||
&arrival,
|
&arrival,
|
||||||
max_arrivals,
|
max_arrivals,
|
||||||
|
encoding_config,
|
||||||
);
|
);
|
||||||
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
||||||
if offset < near_matches.len() {
|
if offset < near_matches.len() {
|
||||||
@@ -240,6 +237,7 @@ fn parse(
|
|||||||
length,
|
length,
|
||||||
&arrival,
|
&arrival,
|
||||||
max_arrivals,
|
max_arrivals,
|
||||||
|
encoding_config,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -247,7 +245,7 @@ fn parse(
|
|||||||
cost_counter.reset();
|
cost_counter.reset();
|
||||||
let mut state = arrival.state;
|
let mut state = arrival.state;
|
||||||
let op = lz::Op::Literal(data[pos]);
|
let op = lz::Op::Literal(data[pos]);
|
||||||
op.encode(cost_counter, &mut state);
|
op.encode(cost_counter, &mut state, encoding_config);
|
||||||
add_arrival(
|
add_arrival(
|
||||||
&mut arrivals,
|
&mut arrivals,
|
||||||
pos + 1,
|
pos + 1,
|
||||||
|
|||||||
Reference in New Issue
Block a user