mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 19:46:42 +01:00
Compare commits
5 Commits
v0.2.0-pre
...
v0.2.0-pre
| Author | SHA1 | Date | |
|---|---|---|---|
| f6642f07c9 | |||
| 8715dede0e | |||
| b12c8f8d93 | |||
| af5fe898bf | |||
| 331857a711 |
58
Cargo.lock
generated
58
Cargo.lock
generated
@@ -95,6 +95,24 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7bd7356a8122b6c4a24a82b278680c73357984ca2fc79a0f9fa6dea7dced7c58"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sacabase"
|
||||
version = "2.0.0"
|
||||
@@ -104,6 +122,37 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.36"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a99cb8c4b9a8ef0e7907cd3b617cc8dc04d571c4e73c8ae403d80ac160bb122"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.36"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a891860d3c8d66fec8e73ddb3765f90082374dbaaa833407b904a94f1a7eb43"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.44"
|
||||
@@ -115,14 +164,21 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
|
||||
|
||||
[[package]]
|
||||
name = "upkr"
|
||||
version = "0.2.0-pre1"
|
||||
version = "0.2.0-pre3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"cdivsufsort",
|
||||
"lexopt",
|
||||
"pbr",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "upkr"
|
||||
version = "0.2.0-pre1"
|
||||
version = "0.2.0-pre3"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
@@ -9,4 +9,5 @@ edition = "2021"
|
||||
cdivsufsort = "2"
|
||||
lexopt = "0.2.1"
|
||||
anyhow = "1"
|
||||
thiserror = "1.0.36"
|
||||
pbr = "1"
|
||||
3
fuzz/.gitignore
vendored
Normal file
3
fuzz/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
target
|
||||
corpus
|
||||
artifacts
|
||||
247
fuzz/Cargo.lock
generated
Normal file
247
fuzz/Cargo.lock
generated
Normal file
@@ -0,0 +1,247 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.65"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "98161a4e3e2184da77bb14f02184cdd111e83bbbcc9979dfee3c44b9a85f5602"
|
||||
|
||||
[[package]]
|
||||
name = "arbitrary"
|
||||
version = "1.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f44124848854b941eafdb34f05b3bcf59472f643c7e151eba7c2b69daa469ed5"
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.73"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cdivsufsort"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "edefce019197609da416762da75bb000bbd2224b2d89a7e722c2296cbff79b8c"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"sacabase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "068b1ee6743e4d11fb9c6a1e6064b3693a1b600e7f5f5988047d98b3dc9fb90b"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lexopt"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "478ee9e62aaeaf5b140bd4138753d1f109765488581444218d3ddda43234f3e8"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.133"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0f80d65747a3e43d1596c7c5492d95d5edddaabd45a7fcdb02b95f644164966"
|
||||
|
||||
[[package]]
|
||||
name = "libfuzzer-sys"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae185684fe19814afd066da15a7cc41e126886c21282934225d9fc847582da58"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"cc",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1"
|
||||
|
||||
[[package]]
|
||||
name = "pbr"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ff5751d87f7c00ae6403eb1fcbba229b9c76c9a30de8c1cf87182177b168cea2"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"libc",
|
||||
"time",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7bd7356a8122b6c4a24a82b278680c73357984ca2fc79a0f9fa6dea7dced7c58"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sacabase"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9883fc3d6ce3d78bb54d908602f8bc1f7b5f983afe601dabe083009d86267a84"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.101"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.36"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a99cb8c4b9a8ef0e7907cd3b617cc8dc04d571c4e73c8ae403d80ac160bb122"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.36"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a891860d3c8d66fec8e73ddb3765f90082374dbaaa833407b904a94f1a7eb43"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"wasi",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
|
||||
|
||||
[[package]]
|
||||
name = "upkr"
|
||||
version = "0.2.0-pre3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"cdivsufsort",
|
||||
"lexopt",
|
||||
"pbr",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "upkr-fuzz"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"libfuzzer-sys",
|
||||
"upkr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.10.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
31
fuzz/Cargo.toml
Normal file
31
fuzz/Cargo.toml
Normal file
@@ -0,0 +1,31 @@
|
||||
[package]
|
||||
name = "upkr-fuzz"
|
||||
version = "0.0.0"
|
||||
authors = ["Automatically generated"]
|
||||
publish = false
|
||||
edition = "2018"
|
||||
|
||||
[package.metadata]
|
||||
cargo-fuzz = true
|
||||
|
||||
[dependencies]
|
||||
libfuzzer-sys = "0.4"
|
||||
|
||||
[dependencies.upkr]
|
||||
path = ".."
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
members = ["."]
|
||||
|
||||
[[bin]]
|
||||
name = "all_configs"
|
||||
path = "fuzz_targets/all_configs.rs"
|
||||
test = false
|
||||
doc = false
|
||||
|
||||
[[bin]]
|
||||
name = "unpack"
|
||||
path = "fuzz_targets/unpack.rs"
|
||||
test = false
|
||||
doc = false
|
||||
29
fuzz/fuzz_targets/all_configs.rs
Normal file
29
fuzz/fuzz_targets/all_configs.rs
Normal file
@@ -0,0 +1,29 @@
|
||||
#![no_main]
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
|
||||
fuzz_target!(|data: &[u8]| {
|
||||
let mut config = upkr::Config::default();
|
||||
let mut level = 1;
|
||||
let mut data = data;
|
||||
if data.len() > 2 {
|
||||
let flags1 = data[0];
|
||||
let flags2 = data[1];
|
||||
data = &data[2..];
|
||||
config.use_bitstream = (flags1 & 1) != 0;
|
||||
config.parity_contexts = if (flags1 & 2) == 0 { 1 } else { 2 };
|
||||
config.invert_bit_encoding = (flags1 & 4) != 0;
|
||||
config.is_match_bit = (flags1 & 8) != 0;
|
||||
config.new_offset_bit = (flags1 & 16) != 0;
|
||||
config.continue_value_bit = (flags1 & 32) != 0;
|
||||
config.bitstream_is_big_endian = (flags1 & 64) != 0;
|
||||
config.simplified_prob_update = (flags1 & 128) != 0;
|
||||
config.no_repeated_offsets = (flags2 & 32) != 0;
|
||||
config.eof_in_length = (flags2 & 1) != 0;
|
||||
config.max_offset = if (flags2 & 2) == 0 { usize::MAX } else { 32 };
|
||||
config.max_length = if (flags2 & 4) == 0 { usize::MAX } else { 5 };
|
||||
level = (flags2 >> 3) & 3;
|
||||
}
|
||||
let packed = upkr::pack(data, level, &config, None);
|
||||
let unpacked = upkr::unpack(&packed, &config, 1024 * 1024).unwrap();
|
||||
assert!(unpacked == data);
|
||||
});
|
||||
6
fuzz/fuzz_targets/unpack.rs
Normal file
6
fuzz/fuzz_targets/unpack.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
#![no_main]
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
|
||||
fuzz_target!(|data: &[u8]| {
|
||||
let _ = upkr::unpack(data, &upkr::Config::default(), 64 * 1024);
|
||||
});
|
||||
@@ -19,15 +19,16 @@ pub fn pack(
|
||||
}
|
||||
let mut encoded_match = false;
|
||||
if let Some(m) = match_finder.matches(pos).next() {
|
||||
let max_offset = 1 << (m.length * 3 - 1).min(31);
|
||||
let max_offset = config.max_offset.min(1 << (m.length * 3 - 1).min(31));
|
||||
let offset = pos - m.pos;
|
||||
if offset < max_offset {
|
||||
if offset < max_offset && m.length >= config.min_length() {
|
||||
let length = m.length.min(config.max_length);
|
||||
lz::Op::Match {
|
||||
offset: offset as u32,
|
||||
len: m.length as u32,
|
||||
len: length as u32,
|
||||
}
|
||||
.encode(&mut rans_coder, &mut state, config);
|
||||
pos += m.length;
|
||||
pos += length;
|
||||
encoded_match = true;
|
||||
}
|
||||
}
|
||||
@@ -39,8 +40,9 @@ pub fn pack(
|
||||
.iter()
|
||||
.zip(data[(pos - offset)..].iter())
|
||||
.take_while(|(a, b)| a == b)
|
||||
.count();
|
||||
if length > 0 {
|
||||
.count()
|
||||
.min(config.max_length);
|
||||
if length >= config.min_length() {
|
||||
lz::Op::Match {
|
||||
offset: offset as u32,
|
||||
len: length as u32,
|
||||
|
||||
31
src/lib.rs
31
src/lib.rs
@@ -5,10 +5,11 @@ mod match_finder;
|
||||
mod parsing_packer;
|
||||
mod rans;
|
||||
|
||||
pub use lz::unpack;
|
||||
pub use lz::{calculate_margin, unpack, UnpackError};
|
||||
|
||||
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Config {
|
||||
pub use_bitstream: bool,
|
||||
pub parity_contexts: usize,
|
||||
@@ -20,6 +21,12 @@ pub struct Config {
|
||||
|
||||
pub bitstream_is_big_endian: bool,
|
||||
pub simplified_prob_update: bool,
|
||||
|
||||
pub no_repeated_offsets: bool,
|
||||
pub eof_in_length: bool,
|
||||
|
||||
pub max_offset: usize,
|
||||
pub max_length: usize,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
@@ -35,6 +42,22 @@ impl Default for Config {
|
||||
|
||||
bitstream_is_big_endian: false,
|
||||
simplified_prob_update: false,
|
||||
|
||||
no_repeated_offsets: false,
|
||||
eof_in_length: false,
|
||||
|
||||
max_offset: usize::MAX,
|
||||
max_length: usize::MAX,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
pub fn min_length(&self) -> usize {
|
||||
if self.eof_in_length {
|
||||
2
|
||||
} else {
|
||||
1
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -42,13 +65,13 @@ impl Default for Config {
|
||||
pub fn pack(
|
||||
data: &[u8],
|
||||
level: u8,
|
||||
config: Config,
|
||||
config: &Config,
|
||||
progress_callback: Option<ProgressCallback>,
|
||||
) -> Vec<u8> {
|
||||
if level == 0 {
|
||||
greedy_packer::pack(data, &config, progress_callback)
|
||||
greedy_packer::pack(data, config, progress_callback)
|
||||
} else {
|
||||
parsing_packer::pack(data, level, &config, progress_callback)
|
||||
parsing_packer::pack(data, level, config, progress_callback)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
127
src/lz.rs
127
src/lz.rs
@@ -1,6 +1,7 @@
|
||||
use crate::context_state::ContextState;
|
||||
use crate::rans::{EntropyCoder, RansDecoder};
|
||||
use crate::Config;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub enum Op {
|
||||
@@ -25,26 +26,28 @@ impl Op {
|
||||
}
|
||||
&Op::Match { offset, len } => {
|
||||
encode_bit(coder, state, literal_base, config.is_match_bit);
|
||||
if !state.prev_was_match {
|
||||
let mut new_offset = true;
|
||||
if !state.prev_was_match && !config.no_repeated_offsets {
|
||||
new_offset = offset != state.last_offset;
|
||||
encode_bit(
|
||||
coder,
|
||||
state,
|
||||
256 * state.parity_contexts,
|
||||
(offset != state.last_offset) == config.new_offset_bit,
|
||||
new_offset == config.new_offset_bit,
|
||||
);
|
||||
} else {
|
||||
assert!(offset != state.last_offset);
|
||||
}
|
||||
if offset != state.last_offset {
|
||||
assert!(offset as usize <= config.max_offset);
|
||||
if new_offset {
|
||||
encode_length(
|
||||
coder,
|
||||
state,
|
||||
256 * state.parity_contexts + 1,
|
||||
offset + 1,
|
||||
offset + if config.eof_in_length { 0 } else { 1 },
|
||||
config,
|
||||
);
|
||||
state.last_offset = offset;
|
||||
}
|
||||
assert!(len as usize >= config.min_length() && len as usize <= config.max_length);
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 65, len, config);
|
||||
state.prev_was_match = true;
|
||||
state.pos += len as usize;
|
||||
@@ -60,15 +63,20 @@ pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config:
|
||||
state.pos % state.parity_contexts * 256,
|
||||
config.is_match_bit,
|
||||
);
|
||||
if !state.prev_was_match {
|
||||
if !state.prev_was_match && !config.no_repeated_offsets {
|
||||
encode_bit(
|
||||
coder,
|
||||
state,
|
||||
256 * state.parity_contexts,
|
||||
config.new_offset_bit,
|
||||
config.new_offset_bit ^ config.eof_in_length,
|
||||
);
|
||||
}
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
|
||||
if !config.eof_in_length || state.prev_was_match || config.no_repeated_offsets {
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
|
||||
}
|
||||
if config.eof_in_length {
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 65, 1, config);
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_bit(
|
||||
@@ -124,41 +132,81 @@ impl CoderState {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
|
||||
#[derive(Error, Debug)]
|
||||
pub enum UnpackError {
|
||||
#[error("match offset out of range: {offset} > {position}")]
|
||||
OffsetOutOfRange { offset: usize, position: usize },
|
||||
#[error("Unpacked data over size limit: {size} > {limit}")]
|
||||
OverSize { size: usize, limit: usize },
|
||||
#[error("Unexpected end of input data")]
|
||||
UnexpectedEOF {
|
||||
#[from]
|
||||
source: crate::rans::UnexpectedEOF,
|
||||
},
|
||||
#[error("Overflow while reading value")]
|
||||
ValueOverflow,
|
||||
}
|
||||
|
||||
pub fn unpack(
|
||||
packed_data: &[u8],
|
||||
config: &Config,
|
||||
max_size: usize,
|
||||
) -> Result<Vec<u8>, UnpackError> {
|
||||
let mut result = vec![];
|
||||
let _ = unpack_internal(Some(&mut result), packed_data, config, max_size)?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn calculate_margin(packed_data: &[u8], config: &Config) -> Result<isize, UnpackError> {
|
||||
unpack_internal(None, packed_data, config, usize::MAX)
|
||||
}
|
||||
|
||||
pub fn unpack_internal(
|
||||
mut result: Option<&mut Vec<u8>>,
|
||||
packed_data: &[u8],
|
||||
config: &Config,
|
||||
max_size: usize,
|
||||
) -> Result<isize, UnpackError> {
|
||||
let mut decoder = RansDecoder::new(packed_data, &config);
|
||||
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
|
||||
let mut result = vec![];
|
||||
let mut offset = 0;
|
||||
let mut offset = usize::MAX;
|
||||
let mut position = 0usize;
|
||||
let mut prev_was_match = false;
|
||||
let mut margin = 0isize;
|
||||
|
||||
fn decode_length(
|
||||
decoder: &mut RansDecoder,
|
||||
contexts: &mut ContextState,
|
||||
mut context_index: usize,
|
||||
config: &Config,
|
||||
) -> usize {
|
||||
) -> Result<usize, UnpackError> {
|
||||
let mut length = 0;
|
||||
let mut bit_pos = 0;
|
||||
while decoder.decode_with_context(&mut contexts.context_mut(context_index))
|
||||
while decoder.decode_with_context(&mut contexts.context_mut(context_index))?
|
||||
== config.continue_value_bit
|
||||
{
|
||||
length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1))
|
||||
length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1))?
|
||||
as usize)
|
||||
<< bit_pos;
|
||||
bit_pos += 1;
|
||||
if bit_pos >= 32 {
|
||||
return Err(UnpackError::ValueOverflow);
|
||||
}
|
||||
context_index += 2;
|
||||
}
|
||||
length | (1 << bit_pos)
|
||||
Ok(length | (1 << bit_pos))
|
||||
}
|
||||
|
||||
loop {
|
||||
let literal_base = result.len() % config.parity_contexts * 256;
|
||||
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))
|
||||
margin = margin.max(position as isize - decoder.pos() as isize);
|
||||
let literal_base = position % config.parity_contexts * 256;
|
||||
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))?
|
||||
== config.is_match_bit
|
||||
{
|
||||
if prev_was_match
|
||||
if config.no_repeated_offsets
|
||||
|| prev_was_match
|
||||
|| decoder
|
||||
.decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))
|
||||
.decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))?
|
||||
== config.new_offset_bit
|
||||
{
|
||||
offset = decode_length(
|
||||
@@ -166,7 +214,7 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
|
||||
&mut contexts,
|
||||
256 * config.parity_contexts + 1,
|
||||
&config,
|
||||
) - 1;
|
||||
)? - if config.eof_in_length { 0 } else { 1 };
|
||||
if offset == 0 {
|
||||
break;
|
||||
}
|
||||
@@ -176,24 +224,49 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
|
||||
&mut contexts,
|
||||
256 * config.parity_contexts + 65,
|
||||
&config,
|
||||
);
|
||||
for _ in 0..length {
|
||||
result.push(result[result.len() - offset]);
|
||||
)?;
|
||||
if config.eof_in_length && length == 1 {
|
||||
break;
|
||||
}
|
||||
if offset > position {
|
||||
return Err(UnpackError::OffsetOutOfRange { offset, position });
|
||||
}
|
||||
if let Some(ref mut result) = result {
|
||||
for _ in 0..length {
|
||||
if result.len() < max_size {
|
||||
result.push(result[result.len() - offset]);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
position += length;
|
||||
prev_was_match = true;
|
||||
} else {
|
||||
let mut context_index = 1;
|
||||
let mut byte = 0;
|
||||
for i in (0..8).rev() {
|
||||
let bit = decoder
|
||||
.decode_with_context(&mut contexts.context_mut(literal_base + context_index));
|
||||
.decode_with_context(&mut contexts.context_mut(literal_base + context_index))?;
|
||||
context_index = (context_index << 1) | bit as usize;
|
||||
byte |= (bit as u8) << i;
|
||||
}
|
||||
result.push(byte);
|
||||
if let Some(ref mut result) = result {
|
||||
if result.len() < max_size {
|
||||
result.push(byte);
|
||||
}
|
||||
}
|
||||
position += 1;
|
||||
prev_was_match = false;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
if position > max_size {
|
||||
return Err(UnpackError::OverSize {
|
||||
size: position,
|
||||
limit: max_size,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(margin + decoder.pos() as isize - position as isize)
|
||||
}
|
||||
|
||||
42
src/main.rs
42
src/main.rs
@@ -8,9 +8,11 @@ fn main() -> Result<()> {
|
||||
let mut config = upkr::Config::default();
|
||||
let mut reverse = false;
|
||||
let mut unpack = false;
|
||||
let mut calculate_margin = false;
|
||||
let mut level = 2;
|
||||
let mut infile: Option<PathBuf> = None;
|
||||
let mut outfile: Option<PathBuf> = None;
|
||||
let mut max_unpacked_size = 512 * 1024 * 1024;
|
||||
|
||||
let mut parser = lexopt::Parser::from_env();
|
||||
while let Some(arg) = parser.next()? {
|
||||
@@ -28,22 +30,32 @@ fn main() -> Result<()> {
|
||||
config.use_bitstream = true;
|
||||
config.bitstream_is_big_endian = true;
|
||||
}
|
||||
Long("no-repeated-offsets") => config.no_repeated_offsets = true,
|
||||
Long("eof-in-length") => config.eof_in_length = true,
|
||||
|
||||
Long("max-offset") => config.max_offset = parser.value()?.parse()?,
|
||||
Long("max-length") => config.max_length = parser.value()?.parse()?,
|
||||
|
||||
Long("z80") => {
|
||||
config.use_bitstream = true;
|
||||
config.bitstream_is_big_endian = true;
|
||||
config.invert_bit_encoding = true;
|
||||
config.simplified_prob_update = true;
|
||||
level = 9;
|
||||
}
|
||||
Long("x86") => {
|
||||
config.use_bitstream = true;
|
||||
config.continue_value_bit = false;
|
||||
config.is_match_bit = false;
|
||||
config.new_offset_bit = false;
|
||||
}
|
||||
|
||||
Short('u') | Long("unpack") => unpack = true,
|
||||
Long("margin") => calculate_margin = true,
|
||||
Short('l') | Long("level") => level = parser.value()?.parse()?,
|
||||
Short(n) if n.is_ascii_digit() => level = n as u8 - b'0',
|
||||
Short('h') | Long("help") => print_help(0),
|
||||
Long("max-unpacked-size") => max_unpacked_size = parser.value()?.parse()?,
|
||||
Value(val) if infile.is_none() => infile = Some(val.try_into()?),
|
||||
Value(val) if outfile.is_none() => outfile = Some(val.try_into()?),
|
||||
_ => return Err(arg.unexpected().into()),
|
||||
@@ -75,7 +87,7 @@ fn main() -> Result<()> {
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
if !unpack {
|
||||
if !unpack && !calculate_margin {
|
||||
let mut data = vec![];
|
||||
File::open(infile)?.read_to_end(&mut data)?;
|
||||
if reverse {
|
||||
@@ -87,7 +99,7 @@ fn main() -> Result<()> {
|
||||
let mut packed_data = upkr::pack(
|
||||
&data,
|
||||
level,
|
||||
config,
|
||||
&config,
|
||||
Some(&mut |pos| {
|
||||
pb.set(pos as u64);
|
||||
}),
|
||||
@@ -111,11 +123,16 @@ fn main() -> Result<()> {
|
||||
if reverse {
|
||||
data.reverse();
|
||||
}
|
||||
let mut unpacked_data = upkr::unpack(&data, config);
|
||||
if reverse {
|
||||
unpacked_data.reverse();
|
||||
if unpack {
|
||||
let mut unpacked_data = upkr::unpack(&data, &config, max_unpacked_size)?;
|
||||
if reverse {
|
||||
unpacked_data.reverse();
|
||||
}
|
||||
File::create(outfile)?.write_all(&unpacked_data)?;
|
||||
}
|
||||
if calculate_margin {
|
||||
println!("{}", upkr::calculate_margin(&data, &config)?);
|
||||
}
|
||||
File::create(outfile)?.write_all(&unpacked_data)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -125,13 +142,18 @@ fn print_help(exit_code: i32) -> ! {
|
||||
eprintln!("Usage:");
|
||||
eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]");
|
||||
eprintln!(" upkr -u [config options] <infile> [<outfile>]");
|
||||
eprintln!(" upkr --margin [config options] <infile>");
|
||||
eprintln!();
|
||||
eprintln!(" -l, --level N compression level 0-9");
|
||||
eprintln!(" -0, ..., -9 short form for setting compression level");
|
||||
eprintln!(" -u, --unpack unpack infile");
|
||||
eprintln!(" --margin calculate margin for overlapped unpacking of a packed file");
|
||||
eprintln!();
|
||||
eprintln!("Config presets for specific unpackers:");
|
||||
eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update");
|
||||
eprintln!(" --x86 --bitstream --invert-is-match-bit --invert-continue-value-bit");
|
||||
eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update -9");
|
||||
eprintln!(
|
||||
" --x86 --bitstream --invert-is-match-bit --invert-continue-value-bit --invert-new-offset-bit"
|
||||
);
|
||||
eprintln!();
|
||||
eprintln!("Config options (need to match when packing/unpacking):");
|
||||
eprintln!(" -b, --bitstream bitstream mode");
|
||||
@@ -145,5 +167,9 @@ fn print_help(exit_code: i32) -> ! {
|
||||
eprintln!(" --invert-bit-encoding");
|
||||
eprintln!(" --simplified-prob-update");
|
||||
eprintln!(" --big-endian-bitstream (implies --bitstream)");
|
||||
eprintln!(" --no-repeated-offsets");
|
||||
eprintln!(" --eof-in-length");
|
||||
eprintln!(" --max-offset N");
|
||||
eprintln!(" --max-length N");
|
||||
process::exit(exit_code);
|
||||
}
|
||||
|
||||
@@ -105,11 +105,15 @@ fn parse(
|
||||
cost_counter: &mut CostCounter,
|
||||
pos: usize,
|
||||
offset: usize,
|
||||
length: usize,
|
||||
mut length: usize,
|
||||
arrival: &Arrival,
|
||||
max_arrivals: usize,
|
||||
config: &crate::Config,
|
||||
) {
|
||||
if length < config.min_length() {
|
||||
return;
|
||||
}
|
||||
length = length.min(config.max_length);
|
||||
cost_counter.reset();
|
||||
let mut state = arrival.state.clone();
|
||||
let op = lz::Op::Match {
|
||||
@@ -183,19 +187,21 @@ fn parse(
|
||||
for m in match_finder.matches(pos) {
|
||||
closest_match = Some(closest_match.unwrap_or(0).max(m.pos));
|
||||
let offset = pos - m.pos;
|
||||
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
||||
add_match(
|
||||
&mut arrivals,
|
||||
cost_counter,
|
||||
pos,
|
||||
offset,
|
||||
m.length,
|
||||
&arrival,
|
||||
max_arrivals,
|
||||
encoding_config,
|
||||
);
|
||||
if m.length >= config.greedy_size {
|
||||
break 'arrival_loop;
|
||||
if offset <= encoding_config.max_offset {
|
||||
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
||||
add_match(
|
||||
&mut arrivals,
|
||||
cost_counter,
|
||||
pos,
|
||||
offset,
|
||||
m.length,
|
||||
&arrival,
|
||||
max_arrivals,
|
||||
encoding_config,
|
||||
);
|
||||
if m.length >= config.greedy_size {
|
||||
break 'arrival_loop;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -206,6 +212,9 @@ fn parse(
|
||||
&& closest_match.iter().all(|p| *p < match_pos)
|
||||
{
|
||||
let offset = pos - match_pos;
|
||||
if offset > encoding_config.max_offset {
|
||||
break;
|
||||
}
|
||||
let length = match_length(offset);
|
||||
assert!(length > 0);
|
||||
add_match(
|
||||
|
||||
37
src/rans.rs
37
src/rans.rs
@@ -1,4 +1,5 @@
|
||||
use crate::{context_state::Context, Config};
|
||||
use thiserror::Error;
|
||||
|
||||
pub const PROB_BITS: u32 = 8;
|
||||
pub const ONE_PROB: u32 = 1 << PROB_BITS;
|
||||
@@ -149,6 +150,7 @@ impl EntropyCoder for CostCounter {
|
||||
|
||||
pub struct RansDecoder<'a> {
|
||||
data: &'a [u8],
|
||||
pos: usize,
|
||||
state: u32,
|
||||
use_bitstream: bool,
|
||||
byte: u8,
|
||||
@@ -159,10 +161,15 @@ pub struct RansDecoder<'a> {
|
||||
|
||||
const PROB_MASK: u32 = ONE_PROB - 1;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
#[error("Unexpected end of input")]
|
||||
pub struct UnexpectedEOF;
|
||||
|
||||
impl<'a> RansDecoder<'a> {
|
||||
pub fn new(data: &'a [u8], config: &Config) -> RansDecoder<'a> {
|
||||
RansDecoder {
|
||||
data,
|
||||
pos: 0,
|
||||
state: 0,
|
||||
use_bitstream: config.use_bitstream,
|
||||
byte: 0,
|
||||
@@ -172,19 +179,26 @@ impl<'a> RansDecoder<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode_with_context(&mut self, context: &mut Context) -> bool {
|
||||
let bit = self.decode_bit(context.prob());
|
||||
context.update(bit);
|
||||
bit
|
||||
pub fn pos(&self) -> usize {
|
||||
self.pos
|
||||
}
|
||||
|
||||
pub fn decode_bit(&mut self, prob: u16) -> bool {
|
||||
pub fn decode_with_context(&mut self, context: &mut Context) -> Result<bool, UnexpectedEOF> {
|
||||
let bit = self.decode_bit(context.prob())?;
|
||||
context.update(bit);
|
||||
Ok(bit)
|
||||
}
|
||||
|
||||
pub fn decode_bit(&mut self, prob: u16) -> Result<bool, UnexpectedEOF> {
|
||||
let prob = prob as u32;
|
||||
if self.use_bitstream {
|
||||
while self.state < 32768 {
|
||||
if self.bits_left == 0 {
|
||||
self.byte = self.data[0];
|
||||
self.data = &self.data[1..];
|
||||
if self.pos >= self.data.len() {
|
||||
return Err(UnexpectedEOF);
|
||||
}
|
||||
self.byte = self.data[self.pos];
|
||||
self.pos += 1;
|
||||
self.bits_left = 8;
|
||||
}
|
||||
if self.bitstream_is_big_endian {
|
||||
@@ -198,8 +212,11 @@ impl<'a> RansDecoder<'a> {
|
||||
}
|
||||
} else {
|
||||
while self.state < 4096 {
|
||||
self.state = (self.state << 8) | self.data[0] as u32;
|
||||
self.data = &self.data[1..];
|
||||
if self.pos >= self.data.len() {
|
||||
return Err(UnexpectedEOF);
|
||||
}
|
||||
self.state = (self.state << 8) | self.data[self.pos] as u32;
|
||||
self.pos += 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -212,6 +229,6 @@ impl<'a> RansDecoder<'a> {
|
||||
};
|
||||
self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start;
|
||||
|
||||
bit ^ self.invert_bit_encoding
|
||||
Ok(bit ^ self.invert_bit_encoding)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user