8 Commits

16 changed files with 832 additions and 187 deletions

58
Cargo.lock generated
View File

@@ -95,6 +95,24 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "proc-macro2"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7bd7356a8122b6c4a24a82b278680c73357984ca2fc79a0f9fa6dea7dced7c58"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
dependencies = [
"proc-macro2",
]
[[package]] [[package]]
name = "sacabase" name = "sacabase"
version = "2.0.0" version = "2.0.0"
@@ -104,6 +122,37 @@ dependencies = [
"num-traits", "num-traits",
] ]
[[package]]
name = "syn"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a99cb8c4b9a8ef0e7907cd3b617cc8dc04d571c4e73c8ae403d80ac160bb122"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a891860d3c8d66fec8e73ddb3765f90082374dbaaa833407b904a94f1a7eb43"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "time" name = "time"
version = "0.1.44" version = "0.1.44"
@@ -115,14 +164,21 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "unicode-ident"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
[[package]] [[package]]
name = "upkr" name = "upkr"
version = "0.1.0" version = "0.2.0-pre3"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"cdivsufsort", "cdivsufsort",
"lexopt", "lexopt",
"pbr", "pbr",
"thiserror",
] ]
[[package]] [[package]]

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "upkr" name = "upkr"
version = "0.2.0" version = "0.2.0-pre3"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -9,4 +9,5 @@ edition = "2021"
cdivsufsort = "2" cdivsufsort = "2"
lexopt = "0.2.1" lexopt = "0.2.1"
anyhow = "1" anyhow = "1"
thiserror = "1.0.36"
pbr = "1" pbr = "1"

View File

@@ -6,10 +6,25 @@ test_riscv64: build/unpack_riscv64
qemu-riscv64 $< test_data.upk /tmp/out.bin qemu-riscv64 $< test_data.upk /tmp/out.bin
cmp test_data.bin /tmp/out.bin cmp test_data.bin /tmp/out.bin
build/unpack_riscv64.bin: unpack_riscv.S build/unpack_riscv64.o: unpack_riscv.S
mkdir -p build mkdir -p build
riscv64-linux-gnu-gcc -c -o build/unpack_riscv64.o $? riscv64-linux-gnu-gcc -c -o $@ $?
riscv64-linux-gnu-objcopy -O binary --only-section=.text build/unpack_riscv64.o $@
build/unpack_riscv64.bin: build/unpack_riscv64.o
riscv64-linux-gnu-objcopy -O binary --only-section=.text $? $@
disas-riscv64: build/unpack_riscv64.o
riscv64-linux-gnu-objdump -d $?
build/unpack_riscv32.o: unpack_riscv.S
mkdir -p build
riscv64-linux-gnu-gcc -march=rv32imc -mabi=ilp32 -c -o $@ $?
build/unpack_riscv32.bin: build/unpack_riscv32.o
riscv64-linux-gnu-objcopy -O binary --only-section=.text $? $@
disas-riscv32: build/unpack_riscv32.o
riscv64-linux-gnu-objdump -d $?
build/unpack_armv6m: ../c_unpacker/main.c unpack_armv6m.S build/unpack_armv6m: ../c_unpacker/main.c unpack_armv6m.S
mkdir -p build mkdir -p build
@@ -32,5 +47,5 @@ test_c: build/unpack_c
$< test_data.upk /tmp/out.bin $< test_data.upk /tmp/out.bin
cmp test_data.bin /tmp/out.bin cmp test_data.bin /tmp/out.bin
sizes: build/unpack_armv6m.bin build/unpack_riscv64.bin sizes: build/unpack_armv6m.bin build/unpack_riscv64.bin build/unpack_riscv32.bin
ls -l build/*.bin ls -l build/*.bin

View File

@@ -1,6 +1,6 @@
.section .text .section .text
#define FRAME_SIZE (256+64*4+4) #define FRAME_SIZE (256+32*4+4)
// x8 prob array ptr // x8 prob array ptr
// x9 prev was literal // x9 prev was literal
@@ -28,39 +28,39 @@ upkr_unpack:
jal upkr_decode_bit jal upkr_decode_bit
beqz x15, .Lliteral beqz x15, .Lliteral
li x14, 256 slli x14, x14, 8
beqz x9, .Lread_offset beqz x9, .Lread_offset_inc_x14
jal upkr_decode_bit jal upkr_decode_bit
beqz x15, .Lskip_offset bnez x15, .Lread_offset
.Lread_offset:
jal t3, upkr_decode_number
addi x12, x9, -1
beqz x12, .Ldone
.Lskip_offset: .Lfinished_offset:
li x14, 256+64 addi x14, x14, 64
jal t3, upkr_decode_number jal t3, upkr_decode_number
1: 1:
sub x15, x10, x12 add x14, x10, t0
lbu x15, (x15) lbu x14, (x14)
sb x15, (x10) .Lstore_byte:
sb x14, (x10)
addi x10, x10, 1 addi x10, x10, 1
addi x9, x9, -1 addi x9, x9, 1
bnez x9, 1b blt x9, x0, 1b
j .Lmainloop j .Lmainloop
.Lliteral: .Lliteral:
li x14, 1
1:
jal upkr_decode_bit jal upkr_decode_bit
addi x14, x14, -1
slli x14, x14, 1 slli x14, x14, 1
add x14, x14, x15 add x14, x14, x15
srli x9, x14, 8 srli x9, x14, 8
beqz x9, 1b beqz x9, .Lliteral
sb x14, 0(x10) j .Lstore_byte
addi x10, x10, 1
j .Lmainloop
.Lread_offset_inc_x14:
addi x14, x14, 1
.Lread_offset:
jal t3, upkr_decode_number
addi t0, x9, 1
bnez t0, .Lfinished_offset
.Ldone: .Ldone:
addi sp, sp, FRAME_SIZE addi sp, sp, FRAME_SIZE
mv x8, x17 mv x8, x17
@@ -68,16 +68,14 @@ upkr_unpack:
jr t4 jr t4
// x14 context index // x14 context index
// return: x9 decoded number // return: x9 negtive decoded number
upkr_decode_number: upkr_decode_number:
mv t5, x14 mv t5, x14
li x9, 0 li x9, 0
li x8, 1 li x8, -1
1: 1:
addi x14, x14, 1
jal upkr_decode_bit jal upkr_decode_bit
beqz x15, 1f beqz x15, 1f
addi x14, x14, 1
jal upkr_decode_bit jal upkr_decode_bit
beqz x15, 2f beqz x15, 2f
add x9, x9, x8 add x9, x9, x8
@@ -99,46 +97,46 @@ upkr_load_byte:
// x11 in ptr // x11 in ptr
// x13 state // x13 state
// x14 context index // x14 context index
// return: x15 decoded bit // return:
// x14 context index + 1
// x15 decoded bit
upkr_decode_bit: upkr_decode_bit:
srli x15, x13, 12 srli x15, x13, 12
beqz x15, upkr_load_byte beqz x15, upkr_load_byte
mv t0, x9
mv t1, x14 mv t1, x14
mv t2, x10 mv t2, x10
add x14, x14, sp add x14, x14, sp
lbu x9, 0(x14) lbu x12, 0(x14)
andi x10, x13, 255 andi x10, x13, 255
sltu x15, x10, x9 sltu x15, x10, x12
srli x13, x13, 8 srli x13, x13, 8
beqz x15, .Lelse beqz x15, .Lelse
mul x13, x13, x9 mul x13, x13, x12
add x13, x13, x10 add x13, x13, x10
li x10, 256 + 8 li x10, 256 + 8
sub x10, x10, x9 sub x10, x10, x12
srli x10, x10, 4 srli x10, x10, 4
add x9, x9, x10 add x12, x12, x10
j .Lendif j .Lendif
.Lelse: .Lelse:
li x16, 256 li x16, 256
sub x16, x16, x9 sub x16, x16, x12
mul x13, x13, x16 mul x13, x13, x16
add x13, x13, x10 add x13, x13, x10
sub x13, x13, x9 sub x13, x13, x12
addi x10, x9, 8 addi x10, x12, 8
srli x10, x10, 4 srli x10, x10, 4
sub x9, x9, x10 sub x12, x12, x10
.Lendif: .Lendif:
sb x9, 0(x14) sb x12, 0(x14)
mv x9, t0 addi x14, t1, 1
mv x14, t1
mv x10, t2 mv x10, t2
ret ret

3
fuzz/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
target
corpus
artifacts

247
fuzz/Cargo.lock generated Normal file
View File

@@ -0,0 +1,247 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "anyhow"
version = "1.0.65"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98161a4e3e2184da77bb14f02184cdd111e83bbbcc9979dfee3c44b9a85f5602"
[[package]]
name = "arbitrary"
version = "1.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f44124848854b941eafdb34f05b3bcf59472f643c7e151eba7c2b69daa469ed5"
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "cc"
version = "1.0.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
dependencies = [
"jobserver",
]
[[package]]
name = "cdivsufsort"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edefce019197609da416762da75bb000bbd2224b2d89a7e722c2296cbff79b8c"
dependencies = [
"cc",
"sacabase",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "crossbeam-channel"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]]
name = "jobserver"
version = "0.1.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "068b1ee6743e4d11fb9c6a1e6064b3693a1b600e7f5f5988047d98b3dc9fb90b"
dependencies = [
"libc",
]
[[package]]
name = "lexopt"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "478ee9e62aaeaf5b140bd4138753d1f109765488581444218d3ddda43234f3e8"
[[package]]
name = "libc"
version = "0.2.133"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0f80d65747a3e43d1596c7c5492d95d5edddaabd45a7fcdb02b95f644164966"
[[package]]
name = "libfuzzer-sys"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae185684fe19814afd066da15a7cc41e126886c21282934225d9fc847582da58"
dependencies = [
"arbitrary",
"cc",
"once_cell",
]
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1"
[[package]]
name = "pbr"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff5751d87f7c00ae6403eb1fcbba229b9c76c9a30de8c1cf87182177b168cea2"
dependencies = [
"crossbeam-channel",
"libc",
"time",
"winapi",
]
[[package]]
name = "proc-macro2"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7bd7356a8122b6c4a24a82b278680c73357984ca2fc79a0f9fa6dea7dced7c58"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
dependencies = [
"proc-macro2",
]
[[package]]
name = "sacabase"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9883fc3d6ce3d78bb54d908602f8bc1f7b5f983afe601dabe083009d86267a84"
dependencies = [
"num-traits",
]
[[package]]
name = "syn"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a99cb8c4b9a8ef0e7907cd3b617cc8dc04d571c4e73c8ae403d80ac160bb122"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a891860d3c8d66fec8e73ddb3765f90082374dbaaa833407b904a94f1a7eb43"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "time"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
dependencies = [
"libc",
"wasi",
"winapi",
]
[[package]]
name = "unicode-ident"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
[[package]]
name = "upkr"
version = "0.2.0-pre3"
dependencies = [
"anyhow",
"cdivsufsort",
"lexopt",
"pbr",
"thiserror",
]
[[package]]
name = "upkr-fuzz"
version = "0.0.0"
dependencies = [
"libfuzzer-sys",
"upkr",
]
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

31
fuzz/Cargo.toml Normal file
View File

@@ -0,0 +1,31 @@
[package]
name = "upkr-fuzz"
version = "0.0.0"
authors = ["Automatically generated"]
publish = false
edition = "2018"
[package.metadata]
cargo-fuzz = true
[dependencies]
libfuzzer-sys = "0.4"
[dependencies.upkr]
path = ".."
# Prevent this from interfering with workspaces
[workspace]
members = ["."]
[[bin]]
name = "all_configs"
path = "fuzz_targets/all_configs.rs"
test = false
doc = false
[[bin]]
name = "unpack"
path = "fuzz_targets/unpack.rs"
test = false
doc = false

View File

@@ -0,0 +1,29 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
fuzz_target!(|data: &[u8]| {
let mut config = upkr::Config::default();
let mut level = 1;
let mut data = data;
if data.len() > 2 {
let flags1 = data[0];
let flags2 = data[1];
data = &data[2..];
config.use_bitstream = (flags1 & 1) != 0;
config.parity_contexts = if (flags1 & 2) == 0 { 1 } else { 2 };
config.invert_bit_encoding = (flags1 & 4) != 0;
config.is_match_bit = (flags1 & 8) != 0;
config.new_offset_bit = (flags1 & 16) != 0;
config.continue_value_bit = (flags1 & 32) != 0;
config.bitstream_is_big_endian = (flags1 & 64) != 0;
config.simplified_prob_update = (flags1 & 128) != 0;
config.no_repeated_offsets = (flags2 & 32) != 0;
config.eof_in_length = (flags2 & 1) != 0;
config.max_offset = if (flags2 & 2) == 0 { usize::MAX } else { 32 };
config.max_length = if (flags2 & 4) == 0 { usize::MAX } else { 5 };
level = (flags2 >> 3) & 3;
}
let packed = upkr::pack(data, level, &config, None);
let unpacked = upkr::unpack(&packed, &config, 1024 * 1024).unwrap();
assert!(unpacked == data);
});

View File

@@ -0,0 +1,6 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
fuzz_target!(|data: &[u8]| {
let _ = upkr::unpack(data, &upkr::Config::default(), 64 * 1024);
});

View File

@@ -1,4 +1,7 @@
use crate::rans::{PROB_BITS, ONE_PROB}; use crate::{
rans::{ONE_PROB, PROB_BITS},
Config,
};
const INIT_PROB: u16 = 1 << (PROB_BITS - 1); const INIT_PROB: u16 = 1 << (PROB_BITS - 1);
const UPDATE_RATE: u32 = 4; const UPDATE_RATE: u32 = 4;
@@ -7,6 +10,8 @@ const UPDATE_ADD: u32 = 8;
#[derive(Clone)] #[derive(Clone)]
pub struct ContextState { pub struct ContextState {
contexts: Vec<u8>, contexts: Vec<u8>,
invert_bit_encoding: bool,
simplified_prob_update: bool,
} }
pub struct Context<'a> { pub struct Context<'a> {
@@ -15,9 +20,11 @@ pub struct Context<'a> {
} }
impl ContextState { impl ContextState {
pub fn new(size: usize) -> ContextState { pub fn new(size: usize, config: &Config) -> ContextState {
ContextState { ContextState {
contexts: vec![INIT_PROB as u8; size], contexts: vec![INIT_PROB as u8; size],
invert_bit_encoding: config.invert_bit_encoding,
simplified_prob_update: config.simplified_prob_update,
} }
} }
@@ -33,10 +40,21 @@ impl<'a> Context<'a> {
pub fn update(&mut self, bit: bool) { pub fn update(&mut self, bit: bool) {
let old = self.state.contexts[self.index]; let old = self.state.contexts[self.index];
self.state.contexts[self.index] = if bit {
old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8 self.state.contexts[self.index] = if self.state.simplified_prob_update {
let offset = if bit ^ self.state.invert_bit_encoding {
ONE_PROB as i32 >> UPDATE_RATE
} else {
0
};
(offset + old as i32 - ((old as i32 + UPDATE_ADD as i32) >> UPDATE_RATE)) as u8
} else { } else {
old - ((old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8 if bit ^ self.state.invert_bit_encoding {
old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
} else {
old - ((old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
}
}; };
} }
} }

View File

@@ -1,17 +1,16 @@
use crate::lz;
use crate::match_finder::MatchFinder; use crate::match_finder::MatchFinder;
use crate::rans::RansCoder; use crate::rans::RansCoder;
use crate::ProgressCallback; use crate::ProgressCallback;
use crate::{lz, Config};
pub fn pack( pub fn pack(
data: &[u8], data: &[u8],
use_bitstream: bool, config: &Config,
parity_contexts: usize,
mut progress_callback: Option<ProgressCallback>, mut progress_callback: Option<ProgressCallback>,
) -> Vec<u8> { ) -> Vec<u8> {
let mut match_finder = MatchFinder::new(data); let mut match_finder = MatchFinder::new(data);
let mut rans_coder = RansCoder::new(use_bitstream); let mut rans_coder = RansCoder::new(config);
let mut state = lz::CoderState::new(parity_contexts); let mut state = lz::CoderState::new(config);
let mut pos = 0; let mut pos = 0;
while pos < data.len() { while pos < data.len() {
@@ -20,15 +19,16 @@ pub fn pack(
} }
let mut encoded_match = false; let mut encoded_match = false;
if let Some(m) = match_finder.matches(pos).next() { if let Some(m) = match_finder.matches(pos).next() {
let max_offset = 1 << (m.length * 3 - 1).min(31); let max_offset = config.max_offset.min(1 << (m.length * 3 - 1).min(31));
let offset = pos - m.pos; let offset = pos - m.pos;
if offset < max_offset { if offset < max_offset && m.length >= config.min_length() {
let length = m.length.min(config.max_length);
lz::Op::Match { lz::Op::Match {
offset: offset as u32, offset: offset as u32,
len: m.length as u32, len: length as u32,
} }
.encode(&mut rans_coder, &mut state); .encode(&mut rans_coder, &mut state, config);
pos += m.length; pos += length;
encoded_match = true; encoded_match = true;
} }
} }
@@ -40,13 +40,14 @@ pub fn pack(
.iter() .iter()
.zip(data[(pos - offset)..].iter()) .zip(data[(pos - offset)..].iter())
.take_while(|(a, b)| a == b) .take_while(|(a, b)| a == b)
.count(); .count()
if length > 0 { .min(config.max_length);
if length >= config.min_length() {
lz::Op::Match { lz::Op::Match {
offset: offset as u32, offset: offset as u32,
len: length as u32, len: length as u32,
} }
.encode(&mut rans_coder, &mut state); .encode(&mut rans_coder, &mut state, config);
pos += length; pos += length;
encoded_match = true; encoded_match = true;
} }
@@ -54,11 +55,11 @@ pub fn pack(
} }
if !encoded_match { if !encoded_match {
lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state); lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state, config);
pos += 1; pos += 1;
} }
} }
lz::encode_eof(&mut rans_coder, &mut state); lz::encode_eof(&mut rans_coder, &mut state, config);
rans_coder.finish() rans_coder.finish()
} }

View File

@@ -5,13 +5,28 @@ mod match_finder;
mod parsing_packer; mod parsing_packer;
mod rans; mod rans;
pub use lz::unpack; pub use lz::{calculate_margin, unpack, UnpackError};
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize); pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
#[derive(Debug)]
pub struct Config { pub struct Config {
pub use_bitstream: bool, pub use_bitstream: bool,
pub parity_contexts: usize, pub parity_contexts: usize,
pub invert_bit_encoding: bool,
pub is_match_bit: bool,
pub new_offset_bit: bool,
pub continue_value_bit: bool,
pub bitstream_is_big_endian: bool,
pub simplified_prob_update: bool,
pub no_repeated_offsets: bool,
pub eof_in_length: bool,
pub max_offset: usize,
pub max_length: usize,
} }
impl Default for Config { impl Default for Config {
@@ -19,6 +34,30 @@ impl Default for Config {
Config { Config {
use_bitstream: false, use_bitstream: false,
parity_contexts: 1, parity_contexts: 1,
invert_bit_encoding: false,
is_match_bit: true,
new_offset_bit: true,
continue_value_bit: true,
bitstream_is_big_endian: false,
simplified_prob_update: false,
no_repeated_offsets: false,
eof_in_length: false,
max_offset: usize::MAX,
max_length: usize::MAX,
}
}
}
impl Config {
pub fn min_length(&self) -> usize {
if self.eof_in_length {
2
} else {
1
} }
} }
} }
@@ -26,24 +65,13 @@ impl Default for Config {
pub fn pack( pub fn pack(
data: &[u8], data: &[u8],
level: u8, level: u8,
config: Config, config: &Config,
progress_callback: Option<ProgressCallback>, progress_callback: Option<ProgressCallback>,
) -> Vec<u8> { ) -> Vec<u8> {
if level == 0 { if level == 0 {
greedy_packer::pack( greedy_packer::pack(data, config, progress_callback)
data,
config.use_bitstream,
config.parity_contexts,
progress_callback,
)
} else { } else {
parsing_packer::pack( parsing_packer::pack(data, level, config, progress_callback)
data,
level,
config.use_bitstream,
config.parity_contexts,
progress_callback,
)
} }
} }

179
src/lz.rs
View File

@@ -1,5 +1,7 @@
use crate::context_state::ContextState; use crate::context_state::ContextState;
use crate::rans::{EntropyCoder, RansDecoder}; use crate::rans::{EntropyCoder, RansDecoder};
use crate::Config;
use thiserror::Error;
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
pub enum Op { pub enum Op {
@@ -8,11 +10,11 @@ pub enum Op {
} }
impl Op { impl Op {
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState) { pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
let literal_base = state.pos % state.parity_contexts * 256; let literal_base = state.pos % state.parity_contexts * 256;
match self { match self {
&Op::Literal(lit) => { &Op::Literal(lit) => {
encode_bit(coder, state, literal_base, false); encode_bit(coder, state, literal_base, !config.is_match_bit);
let mut context_index = 1; let mut context_index = 1;
for i in (0..8).rev() { for i in (0..8).rev() {
let bit = (lit >> i) & 1 != 0; let bit = (lit >> i) & 1 != 0;
@@ -23,22 +25,30 @@ impl Op {
state.pos += 1; state.pos += 1;
} }
&Op::Match { offset, len } => { &Op::Match { offset, len } => {
encode_bit(coder, state, literal_base, true); encode_bit(coder, state, literal_base, config.is_match_bit);
if !state.prev_was_match { let mut new_offset = true;
if !state.prev_was_match && !config.no_repeated_offsets {
new_offset = offset != state.last_offset;
encode_bit( encode_bit(
coder, coder,
state, state,
256 * state.parity_contexts, 256 * state.parity_contexts,
offset != state.last_offset, new_offset == config.new_offset_bit,
); );
} else {
assert!(offset != state.last_offset);
} }
if offset != state.last_offset { assert!(offset as usize <= config.max_offset);
encode_length(coder, state, 256 * state.parity_contexts + 1, offset + 1); if new_offset {
encode_length(
coder,
state,
256 * state.parity_contexts + 1,
offset + if config.eof_in_length { 0 } else { 1 },
config,
);
state.last_offset = offset; state.last_offset = offset;
} }
encode_length(coder, state, 256 * state.parity_contexts + 65, len); assert!(len as usize >= config.min_length() && len as usize <= config.max_length);
encode_length(coder, state, 256 * state.parity_contexts + 65, len, config);
state.prev_was_match = true; state.prev_was_match = true;
state.pos += len as usize; state.pos += len as usize;
} }
@@ -46,12 +56,27 @@ impl Op {
} }
} }
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState) { pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
encode_bit(coder, state, state.pos % state.parity_contexts * 256, true); encode_bit(
if !state.prev_was_match { coder,
encode_bit(coder, state, 256 * state.parity_contexts, true); state,
state.pos % state.parity_contexts * 256,
config.is_match_bit,
);
if !state.prev_was_match && !config.no_repeated_offsets {
encode_bit(
coder,
state,
256 * state.parity_contexts,
config.new_offset_bit ^ config.eof_in_length,
);
}
if !config.eof_in_length || state.prev_was_match || config.no_repeated_offsets {
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
}
if config.eof_in_length {
encode_length(coder, state, 256 * state.parity_contexts + 65, 1, config);
} }
encode_length(coder, state, 256 * state.parity_contexts + 1, 1);
} }
fn encode_bit( fn encode_bit(
@@ -68,36 +93,37 @@ fn encode_length(
state: &mut CoderState, state: &mut CoderState,
context_start: usize, context_start: usize,
mut value: u32, mut value: u32,
config: &Config,
) { ) {
assert!(value >= 1); assert!(value >= 1);
let mut context_index = context_start; let mut context_index = context_start;
while value >= 2 { while value >= 2 {
encode_bit(coder, state, context_index, true); encode_bit(coder, state, context_index, config.continue_value_bit);
encode_bit(coder, state, context_index + 1, value & 1 != 0); encode_bit(coder, state, context_index + 1, value & 1 != 0);
context_index += 2; context_index += 2;
value >>= 1; value >>= 1;
} }
encode_bit(coder, state, context_index, false); encode_bit(coder, state, context_index, !config.continue_value_bit);
} }
#[derive(Clone)] #[derive(Clone)]
pub struct CoderState { pub struct CoderState {
contexts: ContextState, contexts: ContextState,
parity_contexts: usize,
last_offset: u32, last_offset: u32,
prev_was_match: bool, prev_was_match: bool,
pos: usize, pos: usize,
parity_contexts: usize,
} }
impl CoderState { impl CoderState {
pub fn new(parity_contexts: usize) -> CoderState { pub fn new(config: &Config) -> CoderState {
CoderState { CoderState {
contexts: ContextState::new((1 + 255) * parity_contexts + 1 + 64 + 64), contexts: ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, config),
last_offset: 0, last_offset: 0,
parity_contexts,
prev_was_match: false, prev_was_match: false,
pos: 0, pos: 0,
parity_contexts: config.parity_contexts,
} }
} }
@@ -106,42 +132,89 @@ impl CoderState {
} }
} }
pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> { #[derive(Error, Debug)]
let mut decoder = RansDecoder::new(packed_data, config.use_bitstream); pub enum UnpackError {
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64); #[error("match offset out of range: {offset} > {position}")]
OffsetOutOfRange { offset: usize, position: usize },
#[error("Unpacked data over size limit: {size} > {limit}")]
OverSize { size: usize, limit: usize },
#[error("Unexpected end of input data")]
UnexpectedEOF {
#[from]
source: crate::rans::UnexpectedEOF,
},
#[error("Overflow while reading value")]
ValueOverflow,
}
pub fn unpack(
packed_data: &[u8],
config: &Config,
max_size: usize,
) -> Result<Vec<u8>, UnpackError> {
let mut result = vec![]; let mut result = vec![];
let mut offset = 0; let _ = unpack_internal(Some(&mut result), packed_data, config, max_size)?;
Ok(result)
}
pub fn calculate_margin(packed_data: &[u8], config: &Config) -> Result<isize, UnpackError> {
unpack_internal(None, packed_data, config, usize::MAX)
}
pub fn unpack_internal(
mut result: Option<&mut Vec<u8>>,
packed_data: &[u8],
config: &Config,
max_size: usize,
) -> Result<isize, UnpackError> {
let mut decoder = RansDecoder::new(packed_data, &config);
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
let mut offset = usize::MAX;
let mut position = 0usize;
let mut prev_was_match = false; let mut prev_was_match = false;
let mut margin = 0isize;
fn decode_length( fn decode_length(
decoder: &mut RansDecoder, decoder: &mut RansDecoder,
contexts: &mut ContextState, contexts: &mut ContextState,
mut context_index: usize, mut context_index: usize,
) -> usize { config: &Config,
) -> Result<usize, UnpackError> {
let mut length = 0; let mut length = 0;
let mut bit_pos = 0; let mut bit_pos = 0;
while decoder.decode_with_context(&mut contexts.context_mut(context_index)) { while decoder.decode_with_context(&mut contexts.context_mut(context_index))?
length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1)) == config.continue_value_bit
{
length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1))?
as usize) as usize)
<< bit_pos; << bit_pos;
bit_pos += 1; bit_pos += 1;
if bit_pos >= 32 {
return Err(UnpackError::ValueOverflow);
}
context_index += 2; context_index += 2;
} }
length | (1 << bit_pos) Ok(length | (1 << bit_pos))
} }
loop { loop {
let literal_base = result.len() % config.parity_contexts * 256; margin = margin.max(position as isize - decoder.pos() as isize);
if decoder.decode_with_context(&mut contexts.context_mut(literal_base)) { let literal_base = position % config.parity_contexts * 256;
if prev_was_match if decoder.decode_with_context(&mut contexts.context_mut(literal_base))?
== config.is_match_bit
{
if config.no_repeated_offsets
|| prev_was_match
|| decoder || decoder
.decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts)) .decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))?
== config.new_offset_bit
{ {
offset = decode_length( offset = decode_length(
&mut decoder, &mut decoder,
&mut contexts, &mut contexts,
256 * config.parity_contexts + 1, 256 * config.parity_contexts + 1,
) - 1; &config,
)? - if config.eof_in_length { 0 } else { 1 };
if offset == 0 { if offset == 0 {
break; break;
} }
@@ -150,24 +223,50 @@ pub fn unpack(packed_data: &[u8], config: crate::Config) -> Vec<u8> {
&mut decoder, &mut decoder,
&mut contexts, &mut contexts,
256 * config.parity_contexts + 65, 256 * config.parity_contexts + 65,
); &config,
for _ in 0..length { )?;
result.push(result[result.len() - offset]); if config.eof_in_length && length == 1 {
break;
} }
if offset > position {
return Err(UnpackError::OffsetOutOfRange { offset, position });
}
if let Some(ref mut result) = result {
for _ in 0..length {
if result.len() < max_size {
result.push(result[result.len() - offset]);
} else {
break;
}
}
}
position += length;
prev_was_match = true; prev_was_match = true;
} else { } else {
let mut context_index = 1; let mut context_index = 1;
let mut byte = 0; let mut byte = 0;
for i in (0..8).rev() { for i in (0..8).rev() {
let bit = decoder let bit = decoder
.decode_with_context(&mut contexts.context_mut(literal_base + context_index)); .decode_with_context(&mut contexts.context_mut(literal_base + context_index))?;
context_index = (context_index << 1) | bit as usize; context_index = (context_index << 1) | bit as usize;
byte |= (bit as u8) << i; byte |= (bit as u8) << i;
} }
result.push(byte); if let Some(ref mut result) = result {
if result.len() < max_size {
result.push(byte);
}
}
position += 1;
prev_was_match = false; prev_was_match = false;
} }
} }
result if position > max_size {
return Err(UnpackError::OverSize {
size: position,
limit: max_size,
});
}
Ok(margin + decoder.pos() as isize - position as isize)
} }

View File

@@ -8,9 +8,11 @@ fn main() -> Result<()> {
let mut config = upkr::Config::default(); let mut config = upkr::Config::default();
let mut reverse = false; let mut reverse = false;
let mut unpack = false; let mut unpack = false;
let mut calculate_margin = false;
let mut level = 2; let mut level = 2;
let mut infile: Option<PathBuf> = None; let mut infile: Option<PathBuf> = None;
let mut outfile: Option<PathBuf> = None; let mut outfile: Option<PathBuf> = None;
let mut max_unpacked_size = 512 * 1024 * 1024;
let mut parser = lexopt::Parser::from_env(); let mut parser = lexopt::Parser::from_env();
while let Some(arg) = parser.next()? { while let Some(arg) = parser.next()? {
@@ -19,9 +21,41 @@ fn main() -> Result<()> {
Short('b') | Long("bitstream") => config.use_bitstream = true, Short('b') | Long("bitstream") => config.use_bitstream = true,
Short('p') | Long("parity") => config.parity_contexts = parser.value()?.parse()?, Short('p') | Long("parity") => config.parity_contexts = parser.value()?.parse()?,
Short('r') | Long("reverse") => reverse = true, Short('r') | Long("reverse") => reverse = true,
Long("invert-is-match-bit") => config.is_match_bit = false,
Long("invert-new-offset-bit") => config.new_offset_bit = false,
Long("invert-continue-value-bit") => config.continue_value_bit = false,
Long("invert-bit-encoding") => config.invert_bit_encoding = true,
Long("simplified-prob-update") => config.simplified_prob_update = true,
Long("big-endian-bitstream") => {
config.use_bitstream = true;
config.bitstream_is_big_endian = true;
}
Long("no-repeated-offsets") => config.no_repeated_offsets = true,
Long("eof-in-length") => config.eof_in_length = true,
Long("max-offset") => config.max_offset = parser.value()?.parse()?,
Long("max-length") => config.max_length = parser.value()?.parse()?,
Long("z80") => {
config.use_bitstream = true;
config.bitstream_is_big_endian = true;
config.invert_bit_encoding = true;
config.simplified_prob_update = true;
level = 9;
}
Long("x86") => {
config.use_bitstream = true;
config.continue_value_bit = false;
config.is_match_bit = false;
config.new_offset_bit = false;
}
Short('u') | Long("unpack") => unpack = true, Short('u') | Long("unpack") => unpack = true,
Long("margin") => calculate_margin = true,
Short('l') | Long("level") => level = parser.value()?.parse()?, Short('l') | Long("level") => level = parser.value()?.parse()?,
Short(n) if n.is_ascii_digit() => level = n as u8 - b'0',
Short('h') | Long("help") => print_help(0), Short('h') | Long("help") => print_help(0),
Long("max-unpacked-size") => max_unpacked_size = parser.value()?.parse()?,
Value(val) if infile.is_none() => infile = Some(val.try_into()?), Value(val) if infile.is_none() => infile = Some(val.try_into()?),
Value(val) if outfile.is_none() => outfile = Some(val.try_into()?), Value(val) if outfile.is_none() => outfile = Some(val.try_into()?),
_ => return Err(arg.unexpected().into()), _ => return Err(arg.unexpected().into()),
@@ -53,7 +87,7 @@ fn main() -> Result<()> {
process::exit(1); process::exit(1);
} }
if !unpack { if !unpack && !calculate_margin {
let mut data = vec![]; let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?; File::open(infile)?.read_to_end(&mut data)?;
if reverse { if reverse {
@@ -65,7 +99,7 @@ fn main() -> Result<()> {
let mut packed_data = upkr::pack( let mut packed_data = upkr::pack(
&data, &data,
level, level,
config, &config,
Some(&mut |pos| { Some(&mut |pos| {
pb.set(pos as u64); pb.set(pos as u64);
}), }),
@@ -89,11 +123,16 @@ fn main() -> Result<()> {
if reverse { if reverse {
data.reverse(); data.reverse();
} }
let mut unpacked_data = upkr::unpack(&data, config); if unpack {
if reverse { let mut unpacked_data = upkr::unpack(&data, &config, max_unpacked_size)?;
unpacked_data.reverse(); if reverse {
unpacked_data.reverse();
}
File::create(outfile)?.write_all(&unpacked_data)?;
}
if calculate_margin {
println!("{}", upkr::calculate_margin(&data, &config)?);
} }
File::create(outfile)?.write_all(&unpacked_data)?;
} }
Ok(()) Ok(())
@@ -103,13 +142,34 @@ fn print_help(exit_code: i32) -> ! {
eprintln!("Usage:"); eprintln!("Usage:");
eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]"); eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]");
eprintln!(" upkr -u [config options] <infile> [<outfile>]"); eprintln!(" upkr -u [config options] <infile> [<outfile>]");
eprintln!(" upkr --margin [config options] <infile>");
eprintln!(); eprintln!();
eprintln!(" -l, --level N compression level 0-9"); eprintln!(" -l, --level N compression level 0-9");
eprintln!(" -0, ..., -9 short form for setting compression level");
eprintln!(" -u, --unpack unpack infile"); eprintln!(" -u, --unpack unpack infile");
eprintln!(" --margin calculate margin for overlapped unpacking of a packed file");
eprintln!();
eprintln!("Config presets for specific unpackers:");
eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update -9");
eprintln!(
" --x86 --bitstream --invert-is-match-bit --invert-continue-value-bit --invert-new-offset-bit"
);
eprintln!(); eprintln!();
eprintln!("Config options (need to match when packing/unpacking):"); eprintln!("Config options (need to match when packing/unpacking):");
eprintln!(" -b, --bitstream bitstream mode"); eprintln!(" -b, --bitstream bitstream mode");
eprintln!(" -p, --parity N use N (2/4) parity contexts"); eprintln!(" -p, --parity N use N (2/4) parity contexts");
eprintln!(" -r, --reverse reverse input & output"); eprintln!(" -r, --reverse reverse input & output");
eprintln!();
eprintln!("Config options to tailor output to specific optimized unpackers:");
eprintln!(" --invert-is-match-bit");
eprintln!(" --invert-new-offset-bit");
eprintln!(" --invert-continue-value-bit");
eprintln!(" --invert-bit-encoding");
eprintln!(" --simplified-prob-update");
eprintln!(" --big-endian-bitstream (implies --bitstream)");
eprintln!(" --no-repeated-offsets");
eprintln!(" --eof-in-length");
eprintln!(" --max-offset N");
eprintln!(" --max-length N");
process::exit(exit_code); process::exit(exit_code);
} }

View File

@@ -9,27 +9,21 @@ use crate::{lz, ProgressCallback};
pub fn pack( pub fn pack(
data: &[u8], data: &[u8],
level: u8, level: u8,
use_bitstream: bool, config: &crate::Config,
parity_contexts: usize,
progress_cb: Option<ProgressCallback>, progress_cb: Option<ProgressCallback>,
) -> Vec<u8> { ) -> Vec<u8> {
let mut parse = parse( let mut parse = parse(data, Config::from_level(level), config, progress_cb);
data,
Config::from_level(level),
parity_contexts,
progress_cb,
);
let mut ops = vec![]; let mut ops = vec![];
while let Some(link) = parse { while let Some(link) = parse {
ops.push(link.op); ops.push(link.op);
parse = link.prev.clone(); parse = link.prev.clone();
} }
let mut state = lz::CoderState::new(parity_contexts); let mut state = lz::CoderState::new(config);
let mut coder = RansCoder::new(use_bitstream); let mut coder = RansCoder::new(config);
for op in ops.into_iter().rev() { for op in ops.into_iter().rev() {
op.encode(&mut coder, &mut state); op.encode(&mut coder, &mut state, config);
} }
lz::encode_eof(&mut coder, &mut state); lz::encode_eof(&mut coder, &mut state, config);
coder.finish() coder.finish()
} }
@@ -49,7 +43,7 @@ type Arrivals = HashMap<usize, Vec<Arrival>>;
fn parse( fn parse(
data: &[u8], data: &[u8],
config: Config, config: Config,
parity_contexts: usize, encoding_config: &crate::Config,
mut progress_cb: Option<ProgressCallback>, mut progress_cb: Option<ProgressCallback>,
) -> Option<Rc<Parse>> { ) -> Option<Rc<Parse>> {
let mut match_finder = MatchFinder::new(data) let mut match_finder = MatchFinder::new(data)
@@ -111,17 +105,22 @@ fn parse(
cost_counter: &mut CostCounter, cost_counter: &mut CostCounter,
pos: usize, pos: usize,
offset: usize, offset: usize,
length: usize, mut length: usize,
arrival: &Arrival, arrival: &Arrival,
max_arrivals: usize, max_arrivals: usize,
config: &crate::Config,
) { ) {
if length < config.min_length() {
return;
}
length = length.min(config.max_length);
cost_counter.reset(); cost_counter.reset();
let mut state = arrival.state.clone(); let mut state = arrival.state.clone();
let op = lz::Op::Match { let op = lz::Op::Match {
offset: offset as u32, offset: offset as u32,
len: length as u32, len: length as u32,
}; };
op.encode(cost_counter, &mut state); op.encode(cost_counter, &mut state, config);
add_arrival( add_arrival(
arrivals, arrivals,
pos + length, pos + length,
@@ -141,13 +140,13 @@ fn parse(
0, 0,
Arrival { Arrival {
parse: None, parse: None,
state: lz::CoderState::new(parity_contexts), state: lz::CoderState::new(encoding_config),
cost: 0.0, cost: 0.0,
}, },
max_arrivals, max_arrivals,
); );
let cost_counter = &mut CostCounter::new(); let cost_counter = &mut CostCounter::new(encoding_config);
let mut best_per_offset = HashMap::new(); let mut best_per_offset = HashMap::new();
for pos in 0..data.len() { for pos in 0..data.len() {
let match_length = |offset: usize| { let match_length = |offset: usize| {
@@ -188,18 +187,21 @@ fn parse(
for m in match_finder.matches(pos) { for m in match_finder.matches(pos) {
closest_match = Some(closest_match.unwrap_or(0).max(m.pos)); closest_match = Some(closest_match.unwrap_or(0).max(m.pos));
let offset = pos - m.pos; let offset = pos - m.pos;
found_last_offset |= offset as u32 == arrival.state.last_offset(); if offset <= encoding_config.max_offset {
add_match( found_last_offset |= offset as u32 == arrival.state.last_offset();
&mut arrivals, add_match(
cost_counter, &mut arrivals,
pos, cost_counter,
offset, pos,
m.length, offset,
&arrival, m.length,
max_arrivals, &arrival,
); max_arrivals,
if m.length >= config.greedy_size { encoding_config,
break 'arrival_loop; );
if m.length >= config.greedy_size {
break 'arrival_loop;
}
} }
} }
@@ -210,6 +212,9 @@ fn parse(
&& closest_match.iter().all(|p| *p < match_pos) && closest_match.iter().all(|p| *p < match_pos)
{ {
let offset = pos - match_pos; let offset = pos - match_pos;
if offset > encoding_config.max_offset {
break;
}
let length = match_length(offset); let length = match_length(offset);
assert!(length > 0); assert!(length > 0);
add_match( add_match(
@@ -220,6 +225,7 @@ fn parse(
length, length,
&arrival, &arrival,
max_arrivals, max_arrivals,
encoding_config,
); );
found_last_offset |= offset as u32 == arrival.state.last_offset(); found_last_offset |= offset as u32 == arrival.state.last_offset();
if offset < near_matches.len() { if offset < near_matches.len() {
@@ -240,6 +246,7 @@ fn parse(
length, length,
&arrival, &arrival,
max_arrivals, max_arrivals,
encoding_config,
); );
} }
} }
@@ -247,7 +254,7 @@ fn parse(
cost_counter.reset(); cost_counter.reset();
let mut state = arrival.state; let mut state = arrival.state;
let op = lz::Op::Literal(data[pos]); let op = lz::Op::Literal(data[pos]);
op.encode(cost_counter, &mut state); op.encode(cost_counter, &mut state, encoding_config);
add_arrival( add_arrival(
&mut arrivals, &mut arrivals,
pos + 1, pos + 1,

View File

@@ -1,4 +1,5 @@
use crate::context_state::Context; use crate::{context_state::Context, Config};
use thiserror::Error;
pub const PROB_BITS: u32 = 8; pub const PROB_BITS: u32 = 8;
pub const ONE_PROB: u32 = 1 << PROB_BITS; pub const ONE_PROB: u32 = 1 << PROB_BITS;
@@ -15,20 +16,25 @@ pub trait EntropyCoder {
pub struct RansCoder { pub struct RansCoder {
bits: Vec<u16>, bits: Vec<u16>,
use_bitstream: bool, use_bitstream: bool,
bitstream_is_big_endian: bool,
invert_bit_encoding: bool,
} }
impl EntropyCoder for RansCoder { impl EntropyCoder for RansCoder {
fn encode_bit(&mut self, bit: bool, prob: u16) { fn encode_bit(&mut self, bit: bool, prob: u16) {
assert!(prob < 32768); assert!(prob < 32768);
self.bits.push(prob | ((bit as u16) << 15)); self.bits
.push(prob | (((bit ^ self.invert_bit_encoding) as u16) << 15));
} }
} }
impl RansCoder { impl RansCoder {
pub fn new(use_bitstream: bool) -> RansCoder { pub fn new(config: &Config) -> RansCoder {
RansCoder { RansCoder {
bits: Vec::new(), bits: Vec::new(),
use_bitstream, use_bitstream: config.use_bitstream,
bitstream_is_big_endian: config.bitstream_is_big_endian,
invert_bit_encoding: config.invert_bit_encoding,
} }
} }
@@ -38,18 +44,31 @@ impl RansCoder {
let mut state = 1 << l_bits; let mut state = 1 << l_bits;
let mut byte = 0u8; let mut byte = 0u8;
let mut bit = 8; let mut bit = if self.bitstream_is_big_endian { 0 } else { 8 };
let mut flush_state: Box<dyn FnMut(&mut u32)> = if self.use_bitstream { let mut flush_state: Box<dyn FnMut(&mut u32)> = if self.use_bitstream {
Box::new(|state: &mut u32| { if self.bitstream_is_big_endian {
bit -= 1; Box::new(|state: &mut u32| {
byte |= ((*state & 1) as u8) << bit; byte |= ((*state & 1) as u8) << bit;
if bit == 0 { bit += 1;
buffer.push(byte); if bit == 8 {
byte = 0; buffer.push(byte);
bit = 8; byte = 0;
} bit = 0;
*state >>= 1; }
}) *state >>= 1;
})
} else {
Box::new(|state: &mut u32| {
bit -= 1;
byte |= ((*state & 1) as u8) << bit;
if bit == 0 {
buffer.push(byte);
byte = 0;
bit = 8;
}
*state >>= 1;
})
}
} else { } else {
Box::new(|state: &mut u32| { Box::new(|state: &mut u32| {
buffer.push(*state as u8); buffer.push(*state as u8);
@@ -91,10 +110,11 @@ impl RansCoder {
pub struct CostCounter { pub struct CostCounter {
cost: f64, cost: f64,
log2_table: Vec<f64>, log2_table: Vec<f64>,
invert_bit_encoding: bool,
} }
impl CostCounter { impl CostCounter {
pub fn new() -> CostCounter { pub fn new(config: &Config) -> CostCounter {
let log2_table = (0..ONE_PROB) let log2_table = (0..ONE_PROB)
.map(|prob| { .map(|prob| {
let inv_prob = ONE_PROB as f64 / prob as f64; let inv_prob = ONE_PROB as f64 / prob as f64;
@@ -104,6 +124,7 @@ impl CostCounter {
CostCounter { CostCounter {
cost: 0.0, cost: 0.0,
log2_table, log2_table,
invert_bit_encoding: config.invert_bit_encoding,
} }
} }
@@ -118,7 +139,7 @@ impl CostCounter {
impl EntropyCoder for CostCounter { impl EntropyCoder for CostCounter {
fn encode_bit(&mut self, bit: bool, prob: u16) { fn encode_bit(&mut self, bit: bool, prob: u16) {
let prob = if bit { let prob = if bit ^ self.invert_bit_encoding {
prob as u32 prob as u32
} else { } else {
ONE_PROB - prob as u32 ONE_PROB - prob as u32
@@ -129,48 +150,73 @@ impl EntropyCoder for CostCounter {
pub struct RansDecoder<'a> { pub struct RansDecoder<'a> {
data: &'a [u8], data: &'a [u8],
pos: usize,
state: u32, state: u32,
use_bitstream: bool, use_bitstream: bool,
byte: u8, byte: u8,
bits_left: u8, bits_left: u8,
invert_bit_encoding: bool,
bitstream_is_big_endian: bool,
} }
const PROB_MASK: u32 = ONE_PROB - 1; const PROB_MASK: u32 = ONE_PROB - 1;
#[derive(Debug, Error)]
#[error("Unexpected end of input")]
pub struct UnexpectedEOF;
impl<'a> RansDecoder<'a> { impl<'a> RansDecoder<'a> {
pub fn new(data: &'a [u8], use_bitstream: bool) -> RansDecoder<'a> { pub fn new(data: &'a [u8], config: &Config) -> RansDecoder<'a> {
RansDecoder { RansDecoder {
data, data,
pos: 0,
state: 0, state: 0,
use_bitstream, use_bitstream: config.use_bitstream,
byte: 0, byte: 0,
bits_left: 0, bits_left: 0,
invert_bit_encoding: config.invert_bit_encoding,
bitstream_is_big_endian: config.bitstream_is_big_endian,
} }
} }
pub fn decode_with_context(&mut self, context: &mut Context) -> bool { pub fn pos(&self) -> usize {
let bit = self.decode_bit(context.prob()); self.pos
context.update(bit);
bit
} }
pub fn decode_bit(&mut self, prob: u16) -> bool { pub fn decode_with_context(&mut self, context: &mut Context) -> Result<bool, UnexpectedEOF> {
let bit = self.decode_bit(context.prob())?;
context.update(bit);
Ok(bit)
}
pub fn decode_bit(&mut self, prob: u16) -> Result<bool, UnexpectedEOF> {
let prob = prob as u32; let prob = prob as u32;
if self.use_bitstream { if self.use_bitstream {
while self.state < 32768 { while self.state < 32768 {
if self.bits_left == 0 { if self.bits_left == 0 {
self.byte = self.data[0]; if self.pos >= self.data.len() {
self.data = &self.data[1..]; return Err(UnexpectedEOF);
}
self.byte = self.data[self.pos];
self.pos += 1;
self.bits_left = 8; self.bits_left = 8;
} }
self.state = (self.state << 1) | (self.byte & 1) as u32; if self.bitstream_is_big_endian {
self.byte >>= 1; self.state = (self.state << 1) | (self.byte >> 7) as u32;
self.byte <<= 1;
} else {
self.state = (self.state << 1) | (self.byte & 1) as u32;
self.byte >>= 1;
}
self.bits_left -= 1; self.bits_left -= 1;
} }
} else { } else {
while self.state < 4096 { while self.state < 4096 {
self.state = (self.state << 8) | self.data[0] as u32; if self.pos >= self.data.len() {
self.data = &self.data[1..]; return Err(UnexpectedEOF);
}
self.state = (self.state << 8) | self.data[self.pos] as u32;
self.pos += 1;
} }
} }
@@ -183,6 +229,6 @@ impl<'a> RansDecoder<'a> {
}; };
self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start; self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start;
bit Ok(bit ^ self.invert_bit_encoding)
} }
} }