From 130bf821fa185b54af83d36519b0136f9a934364 Mon Sep 17 00:00:00 2001 From: Dennis Ranke Date: Tue, 11 Mar 2025 10:52:24 +0100 Subject: [PATCH] implement dictionary support when packing --- README.md.upk | Bin 0 -> 1553 bytes src/greedy_packer.rs | 2 +- src/lib.rs | 5 +++++ src/main.rs | 11 +++++++++++ src/parsing_packer.rs | 4 ++-- 5 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 README.md.upk diff --git a/README.md.upk b/README.md.upk new file mode 100644 index 0000000000000000000000000000000000000000..73338acc3658760961ecdab170e6e62eb4c8f0a0 GIT binary patch literal 1553 zcmV+s2JZO^JDD1d9nD>zA*pGOEIl!o?Mf@CR<5EpzRImxqQx4sIltfMROEZVgq|*u zFd04NY@DnOf_;&zu;B;QFv|mNAmbXf5fOs|&&-r2xfor>U)~?Pr4>&;aaT~fP3!Yf zu8VdzOqxbT@V`H1{UClCUnCp8nXL}dHuq2CgK1J%sw;T_$s>z;u2>fLtz2a_iF<8&5)PGML!G zjSC4GHCMU?TvCmY*mj;zF9Vj}?YVjzs_cQ>rB^>Ul*uQTPVvp=z@uEjl+H%^W^SQ) zvjR$>H6^oUe@nIn{#%Y!?Rq-uFA#Cx1y$)l9{IGVh(cAxbyb3N^}vREk2cKV7MT*fb&L_l{5wib_`tq`~#Hb8q(= ziqUgXnfZ!3uwI`p5Es23k4?)yuvuL_!{gzb*VApIR(Y>g2mkn&nAt#qeW+~uIN>tA z9m5zPyLn7Qnn=2flU9T)K2@_kH5V(BS^D|g<}xtHq(KO92?Wx$G3`j@OlyK%FX!o0)uKBZBj^JJ3rI z@6xhsqM7px*-WuZMhqqj(d3F@{P+9+#9LpI4Hv5!n{k*qI=TL{hzYhqo_Pp)gD}he z{BlB2+zgl9vP6%KA8HQUKoLSK8L%e<=}fnFwTMmMzChDFRHrK$U>#ddr5ca|Ijn@* zi@xB}k8wu{y{D_I`l|qCjD)Wp?X>+a8i0)r`%AQTbUuzINUjRAi`7_m7agmqDZt67 z5mqASZ8T$ub86ecSr5@4vg$-a{(hF34qaeaw;v_Q~;uDE0agSb{)Ud zK}?+shjD7GFkim^J>s<5SGCUdjj(+co0V#ujp-YprFEMmXjqZrNobyY2#6J@x?PT@ zpF+=Y<3*ey*lXR+MSrT+2k7Jfay&~q$_1qHYB3RH+7}Siz6T39p93zd;`#EbX{1rp zX41(Bl09-%WAHHCZL)~{3|y_5F=Q_}gjk0q+PR(#mS;NOFOlNg)>n*c=5a@!GvnSn zQZZIv!45WyLhFl%-ZTw4oNQRX_@b8wga z&S1$2u@&Bsmoke{o=g%@k)|^@Oj1La7LTGQvm|X_DS?;flw`!!9JVR562Y5e+WUfu zME17?#`e;;$&X*YCH*g>Z@rFv1gg^{%3go=QEDK*xJ?- za7!D;;#cOtfE5>#-=iHoN7CwK*QAryEt;pl$Ey`0-k|shK^nkPg_)r> zawN~crUlNXe`~I!DO}N3KdX1;**{7wU7#Ks#fXWx>N01N=-t4v%_B Result<()> { let mut level = 2; let mut infile: Option = None; let mut outfile: Option = None; + let mut dictionary: Option = None; let mut max_unpacked_size = 512 * 1024 * 1024; let mut parser = lexopt::Parser::from_env(); @@ -74,6 +75,7 @@ fn main() -> Result<()> { process::exit(0); } Long("max-unpacked-size") => max_unpacked_size = parser.value()?.parse()?, + Long("dictionary") => dictionary = Some(parser.value()?.try_into()?), Value(val) if infile.is_none() => infile = Some(val.try_into()?), Value(val) if outfile.is_none() => outfile = Some(val.try_into()?), _ => return Err(arg.unexpected().into()), @@ -94,6 +96,15 @@ fn main() -> Result<()> { data.reverse(); } + if let Some(dictionary) = dictionary { + let mut dict = vec![]; + File::open(dictionary)?.read_to_end(&mut dict)?; + config.dictionary_size = dict.len(); + // prepend dict + dict.append(&mut data); + data = dict; + } + #[cfg(feature = "terminal")] let mut packed_data = { let mut pb = pbr::ProgressBar::on(std::io::stderr(), data.len() as u64); diff --git a/src/parsing_packer.rs b/src/parsing_packer.rs index 9ab0f75..7769ef7 100644 --- a/src/parsing_packer.rs +++ b/src/parsing_packer.rs @@ -137,7 +137,7 @@ fn parse( } add_arrival( &mut arrivals, - 0, + encoding_config.dictionary_size, Arrival { parse: None, state: lz::CoderState::new(encoding_config), @@ -148,7 +148,7 @@ fn parse( let cost_counter = &mut CostCounter::new(encoding_config); let mut best_per_offset = HashMap::new(); - for pos in 0..data.len() { + for pos in encoding_config.dictionary_size..data.len() { let match_length = |offset: usize| { data[pos..] .iter()