From f6642f07c9150e239d11c9807b05882143f29de2 Mon Sep 17 00:00:00 2001 From: Dennis Ranke Date: Tue, 27 Sep 2022 17:16:05 +0200 Subject: [PATCH] more config options, unpack error handling, fuzzing --- Cargo.lock | 58 +++++++- Cargo.toml | 3 +- fuzz/.gitignore | 3 + fuzz/Cargo.lock | 247 +++++++++++++++++++++++++++++++ fuzz/Cargo.toml | 31 ++++ fuzz/fuzz_targets/all_configs.rs | 29 ++++ fuzz/fuzz_targets/unpack.rs | 6 + src/greedy_packer.rs | 10 +- src/lib.rs | 15 +- src/lz.rs | 93 ++++++++---- src/main.rs | 23 ++- src/parsing_packer.rs | 34 +++-- src/rans.rs | 25 +++- 13 files changed, 515 insertions(+), 62 deletions(-) create mode 100644 fuzz/.gitignore create mode 100644 fuzz/Cargo.lock create mode 100644 fuzz/Cargo.toml create mode 100644 fuzz/fuzz_targets/all_configs.rs create mode 100644 fuzz/fuzz_targets/unpack.rs diff --git a/Cargo.lock b/Cargo.lock index d98035b..d05ac51 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -95,6 +95,24 @@ dependencies = [ "winapi", ] +[[package]] +name = "proc-macro2" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd7356a8122b6c4a24a82b278680c73357984ca2fc79a0f9fa6dea7dced7c58" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +dependencies = [ + "proc-macro2", +] + [[package]] name = "sacabase" version = "2.0.0" @@ -104,6 +122,37 @@ dependencies = [ "num-traits", ] +[[package]] +name = "syn" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a99cb8c4b9a8ef0e7907cd3b617cc8dc04d571c4e73c8ae403d80ac160bb122" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a891860d3c8d66fec8e73ddb3765f90082374dbaaa833407b904a94f1a7eb43" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "time" version = "0.1.44" @@ -115,14 +164,21 @@ dependencies = [ "winapi", ] +[[package]] +name = "unicode-ident" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd" + [[package]] name = "upkr" -version = "0.2.0-pre2" +version = "0.2.0-pre3" dependencies = [ "anyhow", "cdivsufsort", "lexopt", "pbr", + "thiserror", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 7bde752..70df0cf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "upkr" -version = "0.2.0-pre2" +version = "0.2.0-pre3" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -9,4 +9,5 @@ edition = "2021" cdivsufsort = "2" lexopt = "0.2.1" anyhow = "1" +thiserror = "1.0.36" pbr = "1" \ No newline at end of file diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000..a092511 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,3 @@ +target +corpus +artifacts diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock new file mode 100644 index 0000000..06c61ac --- /dev/null +++ b/fuzz/Cargo.lock @@ -0,0 +1,247 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anyhow" +version = "1.0.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98161a4e3e2184da77bb14f02184cdd111e83bbbcc9979dfee3c44b9a85f5602" + +[[package]] +name = "arbitrary" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f44124848854b941eafdb34f05b3bcf59472f643c7e151eba7c2b69daa469ed5" + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "cc" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +dependencies = [ + "jobserver", +] + +[[package]] +name = "cdivsufsort" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edefce019197609da416762da75bb000bbd2224b2d89a7e722c2296cbff79b8c" +dependencies = [ + "cc", + "sacabase", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crossbeam-channel" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "jobserver" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "068b1ee6743e4d11fb9c6a1e6064b3693a1b600e7f5f5988047d98b3dc9fb90b" +dependencies = [ + "libc", +] + +[[package]] +name = "lexopt" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "478ee9e62aaeaf5b140bd4138753d1f109765488581444218d3ddda43234f3e8" + +[[package]] +name = "libc" +version = "0.2.133" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f80d65747a3e43d1596c7c5492d95d5edddaabd45a7fcdb02b95f644164966" + +[[package]] +name = "libfuzzer-sys" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae185684fe19814afd066da15a7cc41e126886c21282934225d9fc847582da58" +dependencies = [ + "arbitrary", + "cc", + "once_cell", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1" + +[[package]] +name = "pbr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff5751d87f7c00ae6403eb1fcbba229b9c76c9a30de8c1cf87182177b168cea2" +dependencies = [ + "crossbeam-channel", + "libc", + "time", + "winapi", +] + +[[package]] +name = "proc-macro2" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd7356a8122b6c4a24a82b278680c73357984ca2fc79a0f9fa6dea7dced7c58" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "sacabase" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9883fc3d6ce3d78bb54d908602f8bc1f7b5f983afe601dabe083009d86267a84" +dependencies = [ + "num-traits", +] + +[[package]] +name = "syn" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a99cb8c4b9a8ef0e7907cd3b617cc8dc04d571c4e73c8ae403d80ac160bb122" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a891860d3c8d66fec8e73ddb3765f90082374dbaaa833407b904a94f1a7eb43" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "time" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +dependencies = [ + "libc", + "wasi", + "winapi", +] + +[[package]] +name = "unicode-ident" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd" + +[[package]] +name = "upkr" +version = "0.2.0-pre3" +dependencies = [ + "anyhow", + "cdivsufsort", + "lexopt", + "pbr", + "thiserror", +] + +[[package]] +name = "upkr-fuzz" +version = "0.0.0" +dependencies = [ + "libfuzzer-sys", + "upkr", +] + +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..ea90b5a --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "upkr-fuzz" +version = "0.0.0" +authors = ["Automatically generated"] +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.upkr] +path = ".." + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "all_configs" +path = "fuzz_targets/all_configs.rs" +test = false +doc = false + +[[bin]] +name = "unpack" +path = "fuzz_targets/unpack.rs" +test = false +doc = false diff --git a/fuzz/fuzz_targets/all_configs.rs b/fuzz/fuzz_targets/all_configs.rs new file mode 100644 index 0000000..e082f07 --- /dev/null +++ b/fuzz/fuzz_targets/all_configs.rs @@ -0,0 +1,29 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + let mut config = upkr::Config::default(); + let mut level = 1; + let mut data = data; + if data.len() > 2 { + let flags1 = data[0]; + let flags2 = data[1]; + data = &data[2..]; + config.use_bitstream = (flags1 & 1) != 0; + config.parity_contexts = if (flags1 & 2) == 0 { 1 } else { 2 }; + config.invert_bit_encoding = (flags1 & 4) != 0; + config.is_match_bit = (flags1 & 8) != 0; + config.new_offset_bit = (flags1 & 16) != 0; + config.continue_value_bit = (flags1 & 32) != 0; + config.bitstream_is_big_endian = (flags1 & 64) != 0; + config.simplified_prob_update = (flags1 & 128) != 0; + config.no_repeated_offsets = (flags2 & 32) != 0; + config.eof_in_length = (flags2 & 1) != 0; + config.max_offset = if (flags2 & 2) == 0 { usize::MAX } else { 32 }; + config.max_length = if (flags2 & 4) == 0 { usize::MAX } else { 5 }; + level = (flags2 >> 3) & 3; + } + let packed = upkr::pack(data, level, &config, None); + let unpacked = upkr::unpack(&packed, &config, 1024 * 1024).unwrap(); + assert!(unpacked == data); +}); diff --git a/fuzz/fuzz_targets/unpack.rs b/fuzz/fuzz_targets/unpack.rs new file mode 100644 index 0000000..67e91d4 --- /dev/null +++ b/fuzz/fuzz_targets/unpack.rs @@ -0,0 +1,6 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + let _ = upkr::unpack(data, &upkr::Config::default(), 64 * 1024); +}); diff --git a/src/greedy_packer.rs b/src/greedy_packer.rs index 73cfd84..3b77d13 100644 --- a/src/greedy_packer.rs +++ b/src/greedy_packer.rs @@ -19,15 +19,16 @@ pub fn pack( } let mut encoded_match = false; if let Some(m) = match_finder.matches(pos).next() { - let max_offset = 1 << (m.length * 3 - 1).min(31); + let max_offset = config.max_offset.min(1 << (m.length * 3 - 1).min(31)); let offset = pos - m.pos; if offset < max_offset && m.length >= config.min_length() { + let length = m.length.min(config.max_length); lz::Op::Match { offset: offset as u32, - len: m.length as u32, + len: length as u32, } .encode(&mut rans_coder, &mut state, config); - pos += m.length; + pos += length; encoded_match = true; } } @@ -39,7 +40,8 @@ pub fn pack( .iter() .zip(data[(pos - offset)..].iter()) .take_while(|(a, b)| a == b) - .count(); + .count() + .min(config.max_length); if length >= config.min_length() { lz::Op::Match { offset: offset as u32, diff --git a/src/lib.rs b/src/lib.rs index 6bbcafd..7728f6b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,10 +5,11 @@ mod match_finder; mod parsing_packer; mod rans; -pub use lz::{calculate_margin, unpack}; +pub use lz::{calculate_margin, unpack, UnpackError}; pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize); +#[derive(Debug)] pub struct Config { pub use_bitstream: bool, pub parity_contexts: usize, @@ -23,6 +24,9 @@ pub struct Config { pub no_repeated_offsets: bool, pub eof_in_length: bool, + + pub max_offset: usize, + pub max_length: usize, } impl Default for Config { @@ -41,6 +45,9 @@ impl Default for Config { no_repeated_offsets: false, eof_in_length: false, + + max_offset: usize::MAX, + max_length: usize::MAX, } } } @@ -58,13 +65,13 @@ impl Config { pub fn pack( data: &[u8], level: u8, - config: Config, + config: &Config, progress_callback: Option, ) -> Vec { if level == 0 { - greedy_packer::pack(data, &config, progress_callback) + greedy_packer::pack(data, config, progress_callback) } else { - parsing_packer::pack(data, level, &config, progress_callback) + parsing_packer::pack(data, level, config, progress_callback) } } diff --git a/src/lz.rs b/src/lz.rs index d1ee613..21699cc 100644 --- a/src/lz.rs +++ b/src/lz.rs @@ -1,6 +1,7 @@ use crate::context_state::ContextState; use crate::rans::{EntropyCoder, RansDecoder}; use crate::Config; +use thiserror::Error; #[derive(Copy, Clone, Debug)] pub enum Op { @@ -25,17 +26,18 @@ impl Op { } &Op::Match { offset, len } => { encode_bit(coder, state, literal_base, config.is_match_bit); + let mut new_offset = true; if !state.prev_was_match && !config.no_repeated_offsets { + new_offset = offset != state.last_offset; encode_bit( coder, state, 256 * state.parity_contexts, - (offset != state.last_offset) == config.new_offset_bit, + new_offset == config.new_offset_bit, ); - } else { - assert!(offset != state.last_offset || config.no_repeated_offsets); } - if offset != state.last_offset || config.no_repeated_offsets { + assert!(offset as usize <= config.max_offset); + if new_offset { encode_length( coder, state, @@ -45,7 +47,7 @@ impl Op { ); state.last_offset = offset; } - assert!(!config.eof_in_length || len > 1); + assert!(len as usize >= config.min_length() && len as usize <= config.max_length); encode_length(coder, state, 256 * state.parity_contexts + 65, len, config); state.prev_was_match = true; state.pos += len as usize; @@ -69,7 +71,7 @@ pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config: config.new_offset_bit ^ config.eof_in_length, ); } - if !config.eof_in_length || config.no_repeated_offsets { + if !config.eof_in_length || state.prev_was_match || config.no_repeated_offsets { encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config); } if config.eof_in_length { @@ -130,24 +132,44 @@ impl CoderState { } } -pub fn unpack(packed_data: &[u8], config: &Config) -> Vec { - let mut result = vec![]; - let _ = unpack_internal(Some(&mut result), packed_data, config); - result +#[derive(Error, Debug)] +pub enum UnpackError { + #[error("match offset out of range: {offset} > {position}")] + OffsetOutOfRange { offset: usize, position: usize }, + #[error("Unpacked data over size limit: {size} > {limit}")] + OverSize { size: usize, limit: usize }, + #[error("Unexpected end of input data")] + UnexpectedEOF { + #[from] + source: crate::rans::UnexpectedEOF, + }, + #[error("Overflow while reading value")] + ValueOverflow, } -pub fn calculate_margin(packed_data: &[u8], config: &Config) -> isize { - unpack_internal(None, packed_data, config) +pub fn unpack( + packed_data: &[u8], + config: &Config, + max_size: usize, +) -> Result, UnpackError> { + let mut result = vec![]; + let _ = unpack_internal(Some(&mut result), packed_data, config, max_size)?; + Ok(result) +} + +pub fn calculate_margin(packed_data: &[u8], config: &Config) -> Result { + unpack_internal(None, packed_data, config, usize::MAX) } pub fn unpack_internal( mut result: Option<&mut Vec>, packed_data: &[u8], config: &Config, -) -> isize { + max_size: usize, +) -> Result { let mut decoder = RansDecoder::new(packed_data, &config); let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config); - let mut offset = 0; + let mut offset = usize::MAX; let mut position = 0usize; let mut prev_was_match = false; let mut margin = 0isize; @@ -157,31 +179,34 @@ pub fn unpack_internal( contexts: &mut ContextState, mut context_index: usize, config: &Config, - ) -> usize { + ) -> Result { let mut length = 0; let mut bit_pos = 0; - while decoder.decode_with_context(&mut contexts.context_mut(context_index)) + while decoder.decode_with_context(&mut contexts.context_mut(context_index))? == config.continue_value_bit { - length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1)) + length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1))? as usize) << bit_pos; bit_pos += 1; + if bit_pos >= 32 { + return Err(UnpackError::ValueOverflow); + } context_index += 2; } - length | (1 << bit_pos) + Ok(length | (1 << bit_pos)) } loop { margin = margin.max(position as isize - decoder.pos() as isize); let literal_base = position % config.parity_contexts * 256; - if decoder.decode_with_context(&mut contexts.context_mut(literal_base)) + if decoder.decode_with_context(&mut contexts.context_mut(literal_base))? == config.is_match_bit { if config.no_repeated_offsets || prev_was_match || decoder - .decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts)) + .decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))? == config.new_offset_bit { offset = decode_length( @@ -189,7 +214,7 @@ pub fn unpack_internal( &mut contexts, 256 * config.parity_contexts + 1, &config, - ) - if config.eof_in_length { 0 } else { 1 }; + )? - if config.eof_in_length { 0 } else { 1 }; if offset == 0 { break; } @@ -199,13 +224,20 @@ pub fn unpack_internal( &mut contexts, 256 * config.parity_contexts + 65, &config, - ); + )?; if config.eof_in_length && length == 1 { break; } + if offset > position { + return Err(UnpackError::OffsetOutOfRange { offset, position }); + } if let Some(ref mut result) = result { for _ in 0..length { - result.push(result[result.len() - offset]); + if result.len() < max_size { + result.push(result[result.len() - offset]); + } else { + break; + } } } position += length; @@ -215,17 +247,26 @@ pub fn unpack_internal( let mut byte = 0; for i in (0..8).rev() { let bit = decoder - .decode_with_context(&mut contexts.context_mut(literal_base + context_index)); + .decode_with_context(&mut contexts.context_mut(literal_base + context_index))?; context_index = (context_index << 1) | bit as usize; byte |= (bit as u8) << i; } if let Some(ref mut result) = result { - result.push(byte); + if result.len() < max_size { + result.push(byte); + } } position += 1; prev_was_match = false; } } - margin + decoder.pos() as isize - position as isize + if position > max_size { + return Err(UnpackError::OverSize { + size: position, + limit: max_size, + }); + } + + Ok(margin + decoder.pos() as isize - position as isize) } diff --git a/src/main.rs b/src/main.rs index d724657..8cb1d75 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,6 +12,7 @@ fn main() -> Result<()> { let mut level = 2; let mut infile: Option = None; let mut outfile: Option = None; + let mut max_unpacked_size = 512 * 1024 * 1024; let mut parser = lexopt::Parser::from_env(); while let Some(arg) = parser.next()? { @@ -32,6 +33,9 @@ fn main() -> Result<()> { Long("no-repeated-offsets") => config.no_repeated_offsets = true, Long("eof-in-length") => config.eof_in_length = true, + Long("max-offset") => config.max_offset = parser.value()?.parse()?, + Long("max-length") => config.max_length = parser.value()?.parse()?, + Long("z80") => { config.use_bitstream = true; config.bitstream_is_big_endian = true; @@ -43,12 +47,15 @@ fn main() -> Result<()> { config.use_bitstream = true; config.continue_value_bit = false; config.is_match_bit = false; + config.new_offset_bit = false; } Short('u') | Long("unpack") => unpack = true, Long("margin") => calculate_margin = true, Short('l') | Long("level") => level = parser.value()?.parse()?, + Short(n) if n.is_ascii_digit() => level = n as u8 - b'0', Short('h') | Long("help") => print_help(0), + Long("max-unpacked-size") => max_unpacked_size = parser.value()?.parse()?, Value(val) if infile.is_none() => infile = Some(val.try_into()?), Value(val) if outfile.is_none() => outfile = Some(val.try_into()?), _ => return Err(arg.unexpected().into()), @@ -92,7 +99,7 @@ fn main() -> Result<()> { let mut packed_data = upkr::pack( &data, level, - config, + &config, Some(&mut |pos| { pb.set(pos as u64); }), @@ -117,14 +124,14 @@ fn main() -> Result<()> { data.reverse(); } if unpack { - let mut unpacked_data = upkr::unpack(&data, &config); + let mut unpacked_data = upkr::unpack(&data, &config, max_unpacked_size)?; if reverse { unpacked_data.reverse(); } File::create(outfile)?.write_all(&unpacked_data)?; } if calculate_margin { - println!("{}", upkr::calculate_margin(&data, &config)); + println!("{}", upkr::calculate_margin(&data, &config)?); } } @@ -138,12 +145,15 @@ fn print_help(exit_code: i32) -> ! { eprintln!(" upkr --margin [config options] "); eprintln!(); eprintln!(" -l, --level N compression level 0-9"); + eprintln!(" -0, ..., -9 short form for setting compression level"); eprintln!(" -u, --unpack unpack infile"); eprintln!(" --margin calculate margin for overlapped unpacking of a packed file"); eprintln!(); eprintln!("Config presets for specific unpackers:"); - eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update"); - eprintln!(" --x86 --bitstream --invert-is-match-bit --invert-continue-value-bit"); + eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update -9"); + eprintln!( + " --x86 --bitstream --invert-is-match-bit --invert-continue-value-bit --invert-new-offset-bit" + ); eprintln!(); eprintln!("Config options (need to match when packing/unpacking):"); eprintln!(" -b, --bitstream bitstream mode"); @@ -158,5 +168,8 @@ fn print_help(exit_code: i32) -> ! { eprintln!(" --simplified-prob-update"); eprintln!(" --big-endian-bitstream (implies --bitstream)"); eprintln!(" --no-repeated-offsets"); + eprintln!(" --eof-in-length"); + eprintln!(" --max-offset N"); + eprintln!(" --max-length N"); process::exit(exit_code); } diff --git a/src/parsing_packer.rs b/src/parsing_packer.rs index c5e9728..9ab0f75 100644 --- a/src/parsing_packer.rs +++ b/src/parsing_packer.rs @@ -105,7 +105,7 @@ fn parse( cost_counter: &mut CostCounter, pos: usize, offset: usize, - length: usize, + mut length: usize, arrival: &Arrival, max_arrivals: usize, config: &crate::Config, @@ -113,6 +113,7 @@ fn parse( if length < config.min_length() { return; } + length = length.min(config.max_length); cost_counter.reset(); let mut state = arrival.state.clone(); let op = lz::Op::Match { @@ -186,19 +187,21 @@ fn parse( for m in match_finder.matches(pos) { closest_match = Some(closest_match.unwrap_or(0).max(m.pos)); let offset = pos - m.pos; - found_last_offset |= offset as u32 == arrival.state.last_offset(); - add_match( - &mut arrivals, - cost_counter, - pos, - offset, - m.length, - &arrival, - max_arrivals, - encoding_config, - ); - if m.length >= config.greedy_size { - break 'arrival_loop; + if offset <= encoding_config.max_offset { + found_last_offset |= offset as u32 == arrival.state.last_offset(); + add_match( + &mut arrivals, + cost_counter, + pos, + offset, + m.length, + &arrival, + max_arrivals, + encoding_config, + ); + if m.length >= config.greedy_size { + break 'arrival_loop; + } } } @@ -209,6 +212,9 @@ fn parse( && closest_match.iter().all(|p| *p < match_pos) { let offset = pos - match_pos; + if offset > encoding_config.max_offset { + break; + } let length = match_length(offset); assert!(length > 0); add_match( diff --git a/src/rans.rs b/src/rans.rs index 3ee61d0..d1a24b6 100644 --- a/src/rans.rs +++ b/src/rans.rs @@ -1,4 +1,5 @@ use crate::{context_state::Context, Config}; +use thiserror::Error; pub const PROB_BITS: u32 = 8; pub const ONE_PROB: u32 = 1 << PROB_BITS; @@ -160,6 +161,10 @@ pub struct RansDecoder<'a> { const PROB_MASK: u32 = ONE_PROB - 1; +#[derive(Debug, Error)] +#[error("Unexpected end of input")] +pub struct UnexpectedEOF; + impl<'a> RansDecoder<'a> { pub fn new(data: &'a [u8], config: &Config) -> RansDecoder<'a> { RansDecoder { @@ -178,17 +183,20 @@ impl<'a> RansDecoder<'a> { self.pos } - pub fn decode_with_context(&mut self, context: &mut Context) -> bool { - let bit = self.decode_bit(context.prob()); + pub fn decode_with_context(&mut self, context: &mut Context) -> Result { + let bit = self.decode_bit(context.prob())?; context.update(bit); - bit + Ok(bit) } - pub fn decode_bit(&mut self, prob: u16) -> bool { + pub fn decode_bit(&mut self, prob: u16) -> Result { let prob = prob as u32; if self.use_bitstream { while self.state < 32768 { if self.bits_left == 0 { + if self.pos >= self.data.len() { + return Err(UnexpectedEOF); + } self.byte = self.data[self.pos]; self.pos += 1; self.bits_left = 8; @@ -204,8 +212,11 @@ impl<'a> RansDecoder<'a> { } } else { while self.state < 4096 { - self.state = (self.state << 8) | self.data[0] as u32; - self.data = &self.data[1..]; + if self.pos >= self.data.len() { + return Err(UnexpectedEOF); + } + self.state = (self.state << 8) | self.data[self.pos] as u32; + self.pos += 1; } } @@ -218,6 +229,6 @@ impl<'a> RansDecoder<'a> { }; self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start; - bit ^ self.invert_bit_encoding + Ok(bit ^ self.invert_bit_encoding) } }