3 Commits

11 changed files with 254 additions and 132 deletions

2
Cargo.lock generated
View File

@@ -117,7 +117,7 @@ dependencies = [
[[package]] [[package]]
name = "upkr" name = "upkr"
version = "0.1.0" version = "0.2.0-pre1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"cdivsufsort", "cdivsufsort",

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "upkr" name = "upkr"
version = "0.2.0" version = "0.2.0-pre1"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@@ -6,10 +6,25 @@ test_riscv64: build/unpack_riscv64
qemu-riscv64 $< test_data.upk /tmp/out.bin qemu-riscv64 $< test_data.upk /tmp/out.bin
cmp test_data.bin /tmp/out.bin cmp test_data.bin /tmp/out.bin
build/unpack_riscv64.bin: unpack_riscv.S build/unpack_riscv64.o: unpack_riscv.S
mkdir -p build mkdir -p build
riscv64-linux-gnu-gcc -c -o build/unpack_riscv64.o $? riscv64-linux-gnu-gcc -c -o $@ $?
riscv64-linux-gnu-objcopy -O binary --only-section=.text build/unpack_riscv64.o $@
build/unpack_riscv64.bin: build/unpack_riscv64.o
riscv64-linux-gnu-objcopy -O binary --only-section=.text $? $@
disas-riscv64: build/unpack_riscv64.o
riscv64-linux-gnu-objdump -d $?
build/unpack_riscv32.o: unpack_riscv.S
mkdir -p build
riscv64-linux-gnu-gcc -march=rv32imc -mabi=ilp32 -c -o $@ $?
build/unpack_riscv32.bin: build/unpack_riscv32.o
riscv64-linux-gnu-objcopy -O binary --only-section=.text $? $@
disas-riscv32: build/unpack_riscv32.o
riscv64-linux-gnu-objdump -d $?
build/unpack_armv6m: ../c_unpacker/main.c unpack_armv6m.S build/unpack_armv6m: ../c_unpacker/main.c unpack_armv6m.S
mkdir -p build mkdir -p build
@@ -32,5 +47,5 @@ test_c: build/unpack_c
$< test_data.upk /tmp/out.bin $< test_data.upk /tmp/out.bin
cmp test_data.bin /tmp/out.bin cmp test_data.bin /tmp/out.bin
sizes: build/unpack_armv6m.bin build/unpack_riscv64.bin sizes: build/unpack_armv6m.bin build/unpack_riscv64.bin build/unpack_riscv32.bin
ls -l build/*.bin ls -l build/*.bin

View File

@@ -1,6 +1,6 @@
.section .text .section .text
#define FRAME_SIZE (256+64*4+4) #define FRAME_SIZE (256+32*4+4)
// x8 prob array ptr // x8 prob array ptr
// x9 prev was literal // x9 prev was literal
@@ -28,39 +28,39 @@ upkr_unpack:
jal upkr_decode_bit jal upkr_decode_bit
beqz x15, .Lliteral beqz x15, .Lliteral
li x14, 256 slli x14, x14, 8
beqz x9, .Lread_offset beqz x9, .Lread_offset_inc_x14
jal upkr_decode_bit jal upkr_decode_bit
beqz x15, .Lskip_offset bnez x15, .Lread_offset
.Lread_offset:
jal t3, upkr_decode_number
addi x12, x9, -1
beqz x12, .Ldone
.Lskip_offset: .Lfinished_offset:
li x14, 256+64 addi x14, x14, 64
jal t3, upkr_decode_number jal t3, upkr_decode_number
1: 1:
sub x15, x10, x12 add x14, x10, t0
lbu x15, (x15) lbu x14, (x14)
sb x15, (x10) .Lstore_byte:
sb x14, (x10)
addi x10, x10, 1 addi x10, x10, 1
addi x9, x9, -1 addi x9, x9, 1
bnez x9, 1b blt x9, x0, 1b
j .Lmainloop j .Lmainloop
.Lliteral: .Lliteral:
li x14, 1
1:
jal upkr_decode_bit jal upkr_decode_bit
addi x14, x14, -1
slli x14, x14, 1 slli x14, x14, 1
add x14, x14, x15 add x14, x14, x15
srli x9, x14, 8 srli x9, x14, 8
beqz x9, 1b beqz x9, .Lliteral
sb x14, 0(x10) j .Lstore_byte
addi x10, x10, 1
j .Lmainloop
.Lread_offset_inc_x14:
addi x14, x14, 1
.Lread_offset:
jal t3, upkr_decode_number
addi t0, x9, 1
bnez t0, .Lfinished_offset
.Ldone: .Ldone:
addi sp, sp, FRAME_SIZE addi sp, sp, FRAME_SIZE
mv x8, x17 mv x8, x17
@@ -68,16 +68,14 @@ upkr_unpack:
jr t4 jr t4
// x14 context index // x14 context index
// return: x9 decoded number // return: x9 negtive decoded number
upkr_decode_number: upkr_decode_number:
mv t5, x14 mv t5, x14
li x9, 0 li x9, 0
li x8, 1 li x8, -1
1: 1:
addi x14, x14, 1
jal upkr_decode_bit jal upkr_decode_bit
beqz x15, 1f beqz x15, 1f
addi x14, x14, 1
jal upkr_decode_bit jal upkr_decode_bit
beqz x15, 2f beqz x15, 2f
add x9, x9, x8 add x9, x9, x8
@@ -99,46 +97,46 @@ upkr_load_byte:
// x11 in ptr // x11 in ptr
// x13 state // x13 state
// x14 context index // x14 context index
// return: x15 decoded bit // return:
// x14 context index + 1
// x15 decoded bit
upkr_decode_bit: upkr_decode_bit:
srli x15, x13, 12 srli x15, x13, 12
beqz x15, upkr_load_byte beqz x15, upkr_load_byte
mv t0, x9
mv t1, x14 mv t1, x14
mv t2, x10 mv t2, x10
add x14, x14, sp add x14, x14, sp
lbu x9, 0(x14) lbu x12, 0(x14)
andi x10, x13, 255 andi x10, x13, 255
sltu x15, x10, x9 sltu x15, x10, x12
srli x13, x13, 8 srli x13, x13, 8
beqz x15, .Lelse beqz x15, .Lelse
mul x13, x13, x9 mul x13, x13, x12
add x13, x13, x10 add x13, x13, x10
li x10, 256 + 8 li x10, 256 + 8
sub x10, x10, x9 sub x10, x10, x12
srli x10, x10, 4 srli x10, x10, 4
add x9, x9, x10 add x12, x12, x10
j .Lendif j .Lendif
.Lelse: .Lelse:
li x16, 256 li x16, 256
sub x16, x16, x9 sub x16, x16, x12
mul x13, x13, x16 mul x13, x13, x16
add x13, x13, x10 add x13, x13, x10
sub x13, x13, x9 sub x13, x13, x12
addi x10, x9, 8 addi x10, x12, 8
srli x10, x10, 4 srli x10, x10, 4
sub x9, x9, x10 sub x12, x12, x10
.Lendif: .Lendif:
sb x9, 0(x14) sb x12, 0(x14)
mv x9, t0 addi x14, t1, 1
mv x14, t1
mv x10, t2 mv x10, t2
ret ret

View File

@@ -1,4 +1,7 @@
use crate::rans::{PROB_BITS, ONE_PROB}; use crate::{
rans::{ONE_PROB, PROB_BITS},
Config,
};
const INIT_PROB: u16 = 1 << (PROB_BITS - 1); const INIT_PROB: u16 = 1 << (PROB_BITS - 1);
const UPDATE_RATE: u32 = 4; const UPDATE_RATE: u32 = 4;
@@ -7,6 +10,8 @@ const UPDATE_ADD: u32 = 8;
#[derive(Clone)] #[derive(Clone)]
pub struct ContextState { pub struct ContextState {
contexts: Vec<u8>, contexts: Vec<u8>,
invert_bit_encoding: bool,
simplified_prob_update: bool,
} }
pub struct Context<'a> { pub struct Context<'a> {
@@ -15,9 +20,11 @@ pub struct Context<'a> {
} }
impl ContextState { impl ContextState {
pub fn new(size: usize) -> ContextState { pub fn new(size: usize, config: &Config) -> ContextState {
ContextState { ContextState {
contexts: vec![INIT_PROB as u8; size], contexts: vec![INIT_PROB as u8; size],
invert_bit_encoding: config.invert_bit_encoding,
simplified_prob_update: config.simplified_prob_update,
} }
} }
@@ -33,10 +40,21 @@ impl<'a> Context<'a> {
pub fn update(&mut self, bit: bool) { pub fn update(&mut self, bit: bool) {
let old = self.state.contexts[self.index]; let old = self.state.contexts[self.index];
self.state.contexts[self.index] = if bit {
self.state.contexts[self.index] = if self.state.simplified_prob_update {
let offset = if bit ^ self.state.invert_bit_encoding {
ONE_PROB as i32 >> UPDATE_RATE
} else {
0
};
(offset + old as i32 - ((old as i32 + UPDATE_ADD as i32) >> UPDATE_RATE)) as u8
} else {
if bit ^ self.state.invert_bit_encoding {
old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8 old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
} else { } else {
old - ((old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8 old - ((old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
}
}; };
} }
} }

View File

@@ -1,17 +1,16 @@
use crate::lz;
use crate::match_finder::MatchFinder; use crate::match_finder::MatchFinder;
use crate::rans::RansCoder; use crate::rans::RansCoder;
use crate::ProgressCallback; use crate::ProgressCallback;
use crate::{lz, Config};
pub fn pack( pub fn pack(
data: &[u8], data: &[u8],
use_bitstream: bool, config: &Config,
parity_contexts: usize,
mut progress_callback: Option<ProgressCallback>, mut progress_callback: Option<ProgressCallback>,
) -> Vec<u8> { ) -> Vec<u8> {
let mut match_finder = MatchFinder::new(data); let mut match_finder = MatchFinder::new(data);
let mut rans_coder = RansCoder::new(use_bitstream); let mut rans_coder = RansCoder::new(config);
let mut state = lz::CoderState::new(parity_contexts); let mut state = lz::CoderState::new(config);
let mut pos = 0; let mut pos = 0;
while pos < data.len() { while pos < data.len() {
@@ -27,7 +26,7 @@ pub fn pack(
offset: offset as u32, offset: offset as u32,
len: m.length as u32, len: m.length as u32,
} }
.encode(&mut rans_coder, &mut state); .encode(&mut rans_coder, &mut state, config);
pos += m.length; pos += m.length;
encoded_match = true; encoded_match = true;
} }
@@ -46,7 +45,7 @@ pub fn pack(
offset: offset as u32, offset: offset as u32,
len: length as u32, len: length as u32,
} }
.encode(&mut rans_coder, &mut state); .encode(&mut rans_coder, &mut state, config);
pos += length; pos += length;
encoded_match = true; encoded_match = true;
} }
@@ -54,11 +53,11 @@ pub fn pack(
} }
if !encoded_match { if !encoded_match {
lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state); lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state, config);
pos += 1; pos += 1;
} }
} }
lz::encode_eof(&mut rans_coder, &mut state); lz::encode_eof(&mut rans_coder, &mut state, config);
rans_coder.finish() rans_coder.finish()
} }

View File

@@ -12,6 +12,14 @@ pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
pub struct Config { pub struct Config {
pub use_bitstream: bool, pub use_bitstream: bool,
pub parity_contexts: usize, pub parity_contexts: usize,
pub invert_bit_encoding: bool,
pub is_match_bit: bool,
pub new_offset_bit: bool,
pub continue_value_bit: bool,
pub bitstream_is_big_endian: bool,
pub simplified_prob_update: bool,
} }
impl Default for Config { impl Default for Config {
@@ -19,6 +27,14 @@ impl Default for Config {
Config { Config {
use_bitstream: false, use_bitstream: false,
parity_contexts: 1, parity_contexts: 1,
invert_bit_encoding: false,
is_match_bit: true,
new_offset_bit: true,
continue_value_bit: true,
bitstream_is_big_endian: false,
simplified_prob_update: false,
} }
} }
} }
@@ -30,20 +46,9 @@ pub fn pack(
progress_callback: Option<ProgressCallback>, progress_callback: Option<ProgressCallback>,
) -> Vec<u8> { ) -> Vec<u8> {
if level == 0 { if level == 0 {
greedy_packer::pack( greedy_packer::pack(data, &config, progress_callback)
data,
config.use_bitstream,
config.parity_contexts,
progress_callback,
)
} else { } else {
parsing_packer::pack( parsing_packer::pack(data, level, &config, progress_callback)
data,
level,
config.use_bitstream,
config.parity_contexts,
progress_callback,
)
} }
} }

View File

@@ -1,5 +1,6 @@
use crate::context_state::ContextState; use crate::context_state::ContextState;
use crate::rans::{EntropyCoder, RansDecoder}; use crate::rans::{EntropyCoder, RansDecoder};
use crate::Config;
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
pub enum Op { pub enum Op {
@@ -8,11 +9,11 @@ pub enum Op {
} }
impl Op { impl Op {
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState) { pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
let literal_base = state.pos % state.parity_contexts * 256; let literal_base = state.pos % state.parity_contexts * 256;
match self { match self {
&Op::Literal(lit) => { &Op::Literal(lit) => {
encode_bit(coder, state, literal_base, false); encode_bit(coder, state, literal_base, !config.is_match_bit);
let mut context_index = 1; let mut context_index = 1;
for i in (0..8).rev() { for i in (0..8).rev() {
let bit = (lit >> i) & 1 != 0; let bit = (lit >> i) & 1 != 0;
@@ -23,22 +24,28 @@ impl Op {
state.pos += 1; state.pos += 1;
} }
&Op::Match { offset, len } => { &Op::Match { offset, len } => {
encode_bit(coder, state, literal_base, true); encode_bit(coder, state, literal_base, config.is_match_bit);
if !state.prev_was_match { if !state.prev_was_match {
encode_bit( encode_bit(
coder, coder,
state, state,
256 * state.parity_contexts, 256 * state.parity_contexts,
offset != state.last_offset, (offset != state.last_offset) == config.new_offset_bit,
); );
} else { } else {
assert!(offset != state.last_offset); assert!(offset != state.last_offset);
} }
if offset != state.last_offset { if offset != state.last_offset {
encode_length(coder, state, 256 * state.parity_contexts + 1, offset + 1); encode_length(
coder,
state,
256 * state.parity_contexts + 1,
offset + 1,
config,
);
state.last_offset = offset; state.last_offset = offset;
} }
encode_length(coder, state, 256 * state.parity_contexts + 65, len); encode_length(coder, state, 256 * state.parity_contexts + 65, len, config);
state.prev_was_match = true; state.prev_was_match = true;
state.pos += len as usize; state.pos += len as usize;
} }
@@ -46,12 +53,22 @@ impl Op {
} }
} }
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState) { pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
encode_bit(coder, state, state.pos % state.parity_contexts * 256, true); encode_bit(
coder,
state,
state.pos % state.parity_contexts * 256,
config.is_match_bit,
);
if !state.prev_was_match { if !state.prev_was_match {
encode_bit(coder, state, 256 * state.parity_contexts, true); encode_bit(
coder,
state,
256 * state.parity_contexts,
config.new_offset_bit,
);
} }
encode_length(coder, state, 256 * state.parity_contexts + 1, 1); encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
} }
fn encode_bit( fn encode_bit(
@@ -68,36 +85,37 @@ fn encode_length(
state: &mut CoderState, state: &mut CoderState,
context_start: usize, context_start: usize,
mut value: u32, mut value: u32,
config: &Config,
) { ) {
assert!(value >= 1); assert!(value >= 1);
let mut context_index = context_start; let mut context_index = context_start;
while value >= 2 { while value >= 2 {
encode_bit(coder, state, context_index, true); encode_bit(coder, state, context_index, config.continue_value_bit);
encode_bit(coder, state, context_index + 1, value & 1 != 0); encode_bit(coder, state, context_index + 1, value & 1 != 0);
context_index += 2; context_index += 2;
value >>= 1; value >>= 1;
} }
encode_bit(coder, state, context_index, false); encode_bit(coder, state, context_index, !config.continue_value_bit);
} }
#[derive(Clone)] #[derive(Clone)]
pub struct CoderState { pub struct CoderState {
contexts: ContextState, contexts: ContextState,
parity_contexts: usize,
last_offset: u32, last_offset: u32,
prev_was_match: bool, prev_was_match: bool,
pos: usize, pos: usize,
parity_contexts: usize,
} }
impl CoderState { impl CoderState {
pub fn new(parity_contexts: usize) -> CoderState { pub fn new(config: &Config) -> CoderState {
CoderState { CoderState {
contexts: ContextState::new((1 + 255) * parity_contexts + 1 + 64 + 64), contexts: ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, config),
last_offset: 0, last_offset: 0,
parity_contexts,
prev_was_match: false, prev_was_match: false,
pos: 0, pos: 0,
parity_contexts: config.parity_contexts,
} }
} }
@@ -106,9 +124,9 @@ impl CoderState {
} }
} }
pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> { pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
let mut decoder = RansDecoder::new(packed_data, config.use_bitstream); let mut decoder = RansDecoder::new(packed_data, &config);
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64); let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
let mut result = vec![]; let mut result = vec![];
let mut offset = 0; let mut offset = 0;
let mut prev_was_match = false; let mut prev_was_match = false;
@@ -117,10 +135,13 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
decoder: &mut RansDecoder, decoder: &mut RansDecoder,
contexts: &mut ContextState, contexts: &mut ContextState,
mut context_index: usize, mut context_index: usize,
config: &Config,
) -> usize { ) -> usize {
let mut length = 0; let mut length = 0;
let mut bit_pos = 0; let mut bit_pos = 0;
while decoder.decode_with_context(&mut contexts.context_mut(context_index)) { while decoder.decode_with_context(&mut contexts.context_mut(context_index))
== config.continue_value_bit
{
length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1)) length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1))
as usize) as usize)
<< bit_pos; << bit_pos;
@@ -132,15 +153,19 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
loop { loop {
let literal_base = result.len() % config.parity_contexts * 256; let literal_base = result.len() % config.parity_contexts * 256;
if decoder.decode_with_context(&mut contexts.context_mut(literal_base)) { if decoder.decode_with_context(&mut contexts.context_mut(literal_base))
== config.is_match_bit
{
if prev_was_match if prev_was_match
|| decoder || decoder
.decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts)) .decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))
== config.new_offset_bit
{ {
offset = decode_length( offset = decode_length(
&mut decoder, &mut decoder,
&mut contexts, &mut contexts,
256 * config.parity_contexts + 1, 256 * config.parity_contexts + 1,
&config,
) - 1; ) - 1;
if offset == 0 { if offset == 0 {
break; break;
@@ -150,6 +175,7 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
&mut decoder, &mut decoder,
&mut contexts, &mut contexts,
256 * config.parity_contexts + 65, 256 * config.parity_contexts + 65,
&config,
); );
for _ in 0..length { for _ in 0..length {
result.push(result[result.len() - offset]); result.push(result[result.len() - offset]);

View File

@@ -19,6 +19,28 @@ fn main() -> Result<()> {
Short('b') | Long("bitstream") => config.use_bitstream = true, Short('b') | Long("bitstream") => config.use_bitstream = true,
Short('p') | Long("parity") => config.parity_contexts = parser.value()?.parse()?, Short('p') | Long("parity") => config.parity_contexts = parser.value()?.parse()?,
Short('r') | Long("reverse") => reverse = true, Short('r') | Long("reverse") => reverse = true,
Long("invert-is-match-bit") => config.is_match_bit = false,
Long("invert-new-offset-bit") => config.new_offset_bit = false,
Long("invert-continue-value-bit") => config.continue_value_bit = false,
Long("invert-bit-encoding") => config.invert_bit_encoding = true,
Long("simplified-prob-update") => config.simplified_prob_update = true,
Long("big-endian-bitstream") => {
config.use_bitstream = true;
config.bitstream_is_big_endian = true;
}
Long("z80") => {
config.use_bitstream = true;
config.bitstream_is_big_endian = true;
config.invert_bit_encoding = true;
config.simplified_prob_update = true;
}
Long("x86") => {
config.use_bitstream = true;
config.continue_value_bit = false;
config.is_match_bit = false;
}
Short('u') | Long("unpack") => unpack = true, Short('u') | Long("unpack") => unpack = true,
Short('l') | Long("level") => level = parser.value()?.parse()?, Short('l') | Long("level") => level = parser.value()?.parse()?,
Short('h') | Long("help") => print_help(0), Short('h') | Long("help") => print_help(0),
@@ -107,9 +129,21 @@ fn print_help(exit_code: i32) -> ! {
eprintln!(" -l, --level N compression level 0-9"); eprintln!(" -l, --level N compression level 0-9");
eprintln!(" -u, --unpack unpack infile"); eprintln!(" -u, --unpack unpack infile");
eprintln!(); eprintln!();
eprintln!("Config presets for specific unpackers:");
eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update");
eprintln!(" --x86 --bitstream --invert-is-match-bit --invert-continue-value-bit");
eprintln!();
eprintln!("Config options (need to match when packing/unpacking):"); eprintln!("Config options (need to match when packing/unpacking):");
eprintln!(" -b, --bitstream bitstream mode"); eprintln!(" -b, --bitstream bitstream mode");
eprintln!(" -p, --parity N use N (2/4) parity contexts"); eprintln!(" -p, --parity N use N (2/4) parity contexts");
eprintln!(" -r, --reverse reverse input & output"); eprintln!(" -r, --reverse reverse input & output");
eprintln!();
eprintln!("Config options to tailor output to specific optimized unpackers:");
eprintln!(" --invert-is-match-bit");
eprintln!(" --invert-new-offset-bit");
eprintln!(" --invert-continue-value-bit");
eprintln!(" --invert-bit-encoding");
eprintln!(" --simplified-prob-update");
eprintln!(" --big-endian-bitstream (implies --bitstream)");
process::exit(exit_code); process::exit(exit_code);
} }

View File

@@ -9,27 +9,21 @@ use crate::{lz, ProgressCallback};
pub fn pack( pub fn pack(
data: &[u8], data: &[u8],
level: u8, level: u8,
use_bitstream: bool, config: &crate::Config,
parity_contexts: usize,
progress_cb: Option<ProgressCallback>, progress_cb: Option<ProgressCallback>,
) -> Vec<u8> { ) -> Vec<u8> {
let mut parse = parse( let mut parse = parse(data, Config::from_level(level), config, progress_cb);
data,
Config::from_level(level),
parity_contexts,
progress_cb,
);
let mut ops = vec![]; let mut ops = vec![];
while let Some(link) = parse { while let Some(link) = parse {
ops.push(link.op); ops.push(link.op);
parse = link.prev.clone(); parse = link.prev.clone();
} }
let mut state = lz::CoderState::new(parity_contexts); let mut state = lz::CoderState::new(config);
let mut coder = RansCoder::new(use_bitstream); let mut coder = RansCoder::new(config);
for op in ops.into_iter().rev() { for op in ops.into_iter().rev() {
op.encode(&mut coder, &mut state); op.encode(&mut coder, &mut state, config);
} }
lz::encode_eof(&mut coder, &mut state); lz::encode_eof(&mut coder, &mut state, config);
coder.finish() coder.finish()
} }
@@ -49,7 +43,7 @@ type Arrivals = HashMap<usize, Vec<Arrival>>;
fn parse( fn parse(
data: &[u8], data: &[u8],
config: Config, config: Config,
parity_contexts: usize, encoding_config: &crate::Config,
mut progress_cb: Option<ProgressCallback>, mut progress_cb: Option<ProgressCallback>,
) -> Option<Rc<Parse>> { ) -> Option<Rc<Parse>> {
let mut match_finder = MatchFinder::new(data) let mut match_finder = MatchFinder::new(data)
@@ -114,6 +108,7 @@ fn parse(
length: usize, length: usize,
arrival: &Arrival, arrival: &Arrival,
max_arrivals: usize, max_arrivals: usize,
config: &crate::Config,
) { ) {
cost_counter.reset(); cost_counter.reset();
let mut state = arrival.state.clone(); let mut state = arrival.state.clone();
@@ -121,7 +116,7 @@ fn parse(
offset: offset as u32, offset: offset as u32,
len: length as u32, len: length as u32,
}; };
op.encode(cost_counter, &mut state); op.encode(cost_counter, &mut state, config);
add_arrival( add_arrival(
arrivals, arrivals,
pos + length, pos + length,
@@ -141,13 +136,13 @@ fn parse(
0, 0,
Arrival { Arrival {
parse: None, parse: None,
state: lz::CoderState::new(parity_contexts), state: lz::CoderState::new(encoding_config),
cost: 0.0, cost: 0.0,
}, },
max_arrivals, max_arrivals,
); );
let cost_counter = &mut CostCounter::new(); let cost_counter = &mut CostCounter::new(encoding_config);
let mut best_per_offset = HashMap::new(); let mut best_per_offset = HashMap::new();
for pos in 0..data.len() { for pos in 0..data.len() {
let match_length = |offset: usize| { let match_length = |offset: usize| {
@@ -197,6 +192,7 @@ fn parse(
m.length, m.length,
&arrival, &arrival,
max_arrivals, max_arrivals,
encoding_config,
); );
if m.length >= config.greedy_size { if m.length >= config.greedy_size {
break 'arrival_loop; break 'arrival_loop;
@@ -220,6 +216,7 @@ fn parse(
length, length,
&arrival, &arrival,
max_arrivals, max_arrivals,
encoding_config,
); );
found_last_offset |= offset as u32 == arrival.state.last_offset(); found_last_offset |= offset as u32 == arrival.state.last_offset();
if offset < near_matches.len() { if offset < near_matches.len() {
@@ -240,6 +237,7 @@ fn parse(
length, length,
&arrival, &arrival,
max_arrivals, max_arrivals,
encoding_config,
); );
} }
} }
@@ -247,7 +245,7 @@ fn parse(
cost_counter.reset(); cost_counter.reset();
let mut state = arrival.state; let mut state = arrival.state;
let op = lz::Op::Literal(data[pos]); let op = lz::Op::Literal(data[pos]);
op.encode(cost_counter, &mut state); op.encode(cost_counter, &mut state, encoding_config);
add_arrival( add_arrival(
&mut arrivals, &mut arrivals,
pos + 1, pos + 1,

View File

@@ -1,4 +1,4 @@
use crate::context_state::Context; use crate::{context_state::Context, Config};
pub const PROB_BITS: u32 = 8; pub const PROB_BITS: u32 = 8;
pub const ONE_PROB: u32 = 1 << PROB_BITS; pub const ONE_PROB: u32 = 1 << PROB_BITS;
@@ -15,20 +15,25 @@ pub trait EntropyCoder {
pub struct RansCoder { pub struct RansCoder {
bits: Vec<u16>, bits: Vec<u16>,
use_bitstream: bool, use_bitstream: bool,
bitstream_is_big_endian: bool,
invert_bit_encoding: bool,
} }
impl EntropyCoder for RansCoder { impl EntropyCoder for RansCoder {
fn encode_bit(&mut self, bit: bool, prob: u16) { fn encode_bit(&mut self, bit: bool, prob: u16) {
assert!(prob < 32768); assert!(prob < 32768);
self.bits.push(prob | ((bit as u16) << 15)); self.bits
.push(prob | (((bit ^ self.invert_bit_encoding) as u16) << 15));
} }
} }
impl RansCoder { impl RansCoder {
pub fn new(use_bitstream: bool) -> RansCoder { pub fn new(config: &Config) -> RansCoder {
RansCoder { RansCoder {
bits: Vec::new(), bits: Vec::new(),
use_bitstream, use_bitstream: config.use_bitstream,
bitstream_is_big_endian: config.bitstream_is_big_endian,
invert_bit_encoding: config.invert_bit_encoding,
} }
} }
@@ -38,8 +43,20 @@ impl RansCoder {
let mut state = 1 << l_bits; let mut state = 1 << l_bits;
let mut byte = 0u8; let mut byte = 0u8;
let mut bit = 8; let mut bit = if self.bitstream_is_big_endian { 0 } else { 8 };
let mut flush_state: Box<dyn FnMut(&mut u32)> = if self.use_bitstream { let mut flush_state: Box<dyn FnMut(&mut u32)> = if self.use_bitstream {
if self.bitstream_is_big_endian {
Box::new(|state: &mut u32| {
byte |= ((*state & 1) as u8) << bit;
bit += 1;
if bit == 8 {
buffer.push(byte);
byte = 0;
bit = 0;
}
*state >>= 1;
})
} else {
Box::new(|state: &mut u32| { Box::new(|state: &mut u32| {
bit -= 1; bit -= 1;
byte |= ((*state & 1) as u8) << bit; byte |= ((*state & 1) as u8) << bit;
@@ -50,6 +67,7 @@ impl RansCoder {
} }
*state >>= 1; *state >>= 1;
}) })
}
} else { } else {
Box::new(|state: &mut u32| { Box::new(|state: &mut u32| {
buffer.push(*state as u8); buffer.push(*state as u8);
@@ -91,10 +109,11 @@ impl RansCoder {
pub struct CostCounter { pub struct CostCounter {
cost: f64, cost: f64,
log2_table: Vec<f64>, log2_table: Vec<f64>,
invert_bit_encoding: bool,
} }
impl CostCounter { impl CostCounter {
pub fn new() -> CostCounter { pub fn new(config: &Config) -> CostCounter {
let log2_table = (0..ONE_PROB) let log2_table = (0..ONE_PROB)
.map(|prob| { .map(|prob| {
let inv_prob = ONE_PROB as f64 / prob as f64; let inv_prob = ONE_PROB as f64 / prob as f64;
@@ -104,6 +123,7 @@ impl CostCounter {
CostCounter { CostCounter {
cost: 0.0, cost: 0.0,
log2_table, log2_table,
invert_bit_encoding: config.invert_bit_encoding,
} }
} }
@@ -118,7 +138,7 @@ impl CostCounter {
impl EntropyCoder for CostCounter { impl EntropyCoder for CostCounter {
fn encode_bit(&mut self, bit: bool, prob: u16) { fn encode_bit(&mut self, bit: bool, prob: u16) {
let prob = if bit { let prob = if bit ^ self.invert_bit_encoding {
prob as u32 prob as u32
} else { } else {
ONE_PROB - prob as u32 ONE_PROB - prob as u32
@@ -133,18 +153,22 @@ pub struct RansDecoder<'a> {
use_bitstream: bool, use_bitstream: bool,
byte: u8, byte: u8,
bits_left: u8, bits_left: u8,
invert_bit_encoding: bool,
bitstream_is_big_endian: bool,
} }
const PROB_MASK: u32 = ONE_PROB - 1; const PROB_MASK: u32 = ONE_PROB - 1;
impl<'a> RansDecoder<'a> { impl<'a> RansDecoder<'a> {
pub fn new(data: &'a [u8], use_bitstream: bool) -> RansDecoder<'a> { pub fn new(data: &'a [u8], config: &Config) -> RansDecoder<'a> {
RansDecoder { RansDecoder {
data, data,
state: 0, state: 0,
use_bitstream, use_bitstream: config.use_bitstream,
byte: 0, byte: 0,
bits_left: 0, bits_left: 0,
invert_bit_encoding: config.invert_bit_encoding,
bitstream_is_big_endian: config.bitstream_is_big_endian,
} }
} }
@@ -163,8 +187,13 @@ impl<'a> RansDecoder<'a> {
self.data = &self.data[1..]; self.data = &self.data[1..];
self.bits_left = 8; self.bits_left = 8;
} }
if self.bitstream_is_big_endian {
self.state = (self.state << 1) | (self.byte >> 7) as u32;
self.byte <<= 1;
} else {
self.state = (self.state << 1) | (self.byte & 1) as u32; self.state = (self.state << 1) | (self.byte & 1) as u32;
self.byte >>= 1; self.byte >>= 1;
}
self.bits_left -= 1; self.bits_left -= 1;
} }
} else { } else {
@@ -183,6 +212,6 @@ impl<'a> RansDecoder<'a> {
}; };
self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start; self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start;
bit bit ^ self.invert_bit_encoding
} }
} }