diff --git a/README.md.upk b/README.md.upk new file mode 100644 index 0000000..73338ac Binary files /dev/null and b/README.md.upk differ diff --git a/src/greedy_packer.rs b/src/greedy_packer.rs index 3b77d13..b1875e7 100644 --- a/src/greedy_packer.rs +++ b/src/greedy_packer.rs @@ -12,7 +12,7 @@ pub fn pack( let mut rans_coder = RansCoder::new(config); let mut state = lz::CoderState::new(config); - let mut pos = 0; + let mut pos = config.dictionary_size; while pos < data.len() { if let Some(ref mut cb) = progress_callback { cb(pos); diff --git a/src/lib.rs b/src/lib.rs index c3e4cd5..ec0e591 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,6 +71,9 @@ pub struct Config { pub max_offset: usize, /// The maximum match length value to encode when compressing. pub max_length: usize, + + /// Size of dictionary at the beginning of data (how many bytes to skip when compressing.) + pub dictionary_size: usize, } impl Default for Config { @@ -92,6 +95,8 @@ impl Default for Config { max_offset: usize::MAX, max_length: usize::MAX, + + dictionary_size: 0, } } } diff --git a/src/main.rs b/src/main.rs index ba4aff7..19460de 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,6 +16,7 @@ fn main() -> Result<()> { let mut level = 2; let mut infile: Option = None; let mut outfile: Option = None; + let mut dictionary: Option = None; let mut max_unpacked_size = 512 * 1024 * 1024; let mut parser = lexopt::Parser::from_env(); @@ -74,6 +75,7 @@ fn main() -> Result<()> { process::exit(0); } Long("max-unpacked-size") => max_unpacked_size = parser.value()?.parse()?, + Long("dictionary") => dictionary = Some(parser.value()?.try_into()?), Value(val) if infile.is_none() => infile = Some(val.try_into()?), Value(val) if outfile.is_none() => outfile = Some(val.try_into()?), _ => return Err(arg.unexpected().into()), @@ -94,6 +96,15 @@ fn main() -> Result<()> { data.reverse(); } + if let Some(dictionary) = dictionary { + let mut dict = vec![]; + File::open(dictionary)?.read_to_end(&mut dict)?; + config.dictionary_size = dict.len(); + // prepend dict + dict.append(&mut data); + data = dict; + } + #[cfg(feature = "terminal")] let mut packed_data = { let mut pb = pbr::ProgressBar::on(std::io::stderr(), data.len() as u64); diff --git a/src/parsing_packer.rs b/src/parsing_packer.rs index 9ab0f75..7769ef7 100644 --- a/src/parsing_packer.rs +++ b/src/parsing_packer.rs @@ -137,7 +137,7 @@ fn parse( } add_arrival( &mut arrivals, - 0, + encoding_config.dictionary_size, Arrival { parse: None, state: lz::CoderState::new(encoding_config), @@ -148,7 +148,7 @@ fn parse( let cost_counter = &mut CostCounter::new(encoding_config); let mut best_per_offset = HashMap::new(); - for pos in 0..data.len() { + for pos in encoding_config.dictionary_size..data.len() { let match_length = |offset: usize| { data[pos..] .iter()