From 5e82c65c1821fcf85b97dd9f71e8fce7cddc93c3 Mon Sep 17 00:00:00 2001 From: Dennis Ranke Date: Wed, 24 Nov 2021 22:35:24 +0100 Subject: [PATCH] a few speed and/or compression ratio improvements --- README.md | 16 +++++---- UNLICENSE | 24 ++++++++++++++ src/match_finder.rs | 30 ++++++++++------- src/parsing_packer.rs | 77 +++++++++++++++++++++++++++++++++++++------ 4 files changed, 120 insertions(+), 27 deletions(-) create mode 100644 UNLICENSE diff --git a/README.md b/README.md index 4f2e5b8..b372148 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,14 @@ # Upkr Upkr is a simple general purpose lz packer designed to be used in the [MicroW8](https://github.com/exoticorn/microw8) platform. -The compressed format is base on [Shrinkler](https://github.com/askeksa/Shrinkler) with the main difference being that -Upkr doesn't differnetiate between literals at odd or even addresses. +The compressed format is losely based on [Shrinkler](https://github.com/askeksa/Shrinkler) with the main difference being that +Upkr doesn't differnetiate between literals at odd or even addresses and that I went with rANS/rABS instead of a range coder. -At this point, Upkr should be considered unstable - the exact format isn't finalized yet and still subject to change -and only a very simple (but also very fast) greedy compressor is implemented. The compression ratio will be improved -with a more thourough lz parse in the future, although even in the current state is is already similar to the -DEFLATE compression algorithm. \ No newline at end of file +At this point, Upkr should still be considered unstable - the compressed format is not very likely to change but I still want +to keep that option open a little longer. + +## Inspirations: + +* Ferris' blog about his [C64 intro packer](https://yupferris.github.io/blog/2020/08/31/c64-4k-intro-packer-deep-dive.html) +* [Shrinkler](https://github.com/askeksa/Shrinkler) +* Ryg's [sample rANS implementation](https://github.com/rygorous/ryg_rans) \ No newline at end of file diff --git a/UNLICENSE b/UNLICENSE new file mode 100644 index 0000000..00d2e13 --- /dev/null +++ b/UNLICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to \ No newline at end of file diff --git a/src/match_finder.rs b/src/match_finder.rs index 9c68051..fc8e57a 100644 --- a/src/match_finder.rs +++ b/src/match_finder.rs @@ -6,7 +6,8 @@ pub struct MatchFinder { rev_suffixes: Vec, lcp: Vec, - max_matches: usize, + max_queue_size: usize, + max_matches_per_length: usize, patience: usize, max_length_diff: usize, } @@ -42,8 +43,9 @@ impl MatchFinder { suffixes, rev_suffixes, lcp, - max_matches: 10, - patience: 10, + max_queue_size: 100, + max_matches_per_length: 5, + patience: 100, max_length_diff: 2, } } @@ -57,8 +59,8 @@ impl MatchFinder { left_length: usize::MAX, right_index: index, right_length: usize::MAX, - current_length: 0, - matches_left: self.max_matches, + current_length: usize::MAX, + matches_left: 0, max_length: 0, queue: BinaryHeap::new(), }; @@ -93,15 +95,16 @@ impl<'a> Iterator for Matches<'a> { type Item = Match; fn next(&mut self) -> Option { - if self.queue.is_empty() { - self.current_length = self.left_length.max(self.right_length); + if self.queue.is_empty() || self.matches_left == 0 { + self.queue.clear(); + self.current_length = self.current_length.saturating_sub(1).min(self.left_length.max(self.right_length)); self.max_length = self.max_length.max(self.current_length); if self.current_length < 2 || self.current_length + self.finder.max_length_diff < self.max_length { return None; } - while self.matches_left > 0 + while self.queue.len() < self.finder.max_queue_size && (self.left_length == self.current_length || self.right_length == self.current_length) { @@ -109,13 +112,16 @@ impl<'a> Iterator for Matches<'a> { self.add_to_queue(self.finder.suffixes[self.left_index]); self.move_left(); } - if self.right_length == self.current_length && self.matches_left > 0 { + if self.right_length == self.current_length { self.add_to_queue(self.finder.suffixes[self.right_index]); self.move_right(); } } + self.matches_left = self.finder.max_matches_per_length; } + self.matches_left = self.matches_left.saturating_sub(1); + self.queue.pop().map(|pos| Match { pos, length: self.current_length, @@ -144,7 +150,10 @@ impl<'a> Matches<'a> { fn move_right(&mut self) { let mut patience = self.finder.patience; - while self.right_length > 0 && patience > 0 && self.right_index + 1 < self.finder.suffixes.len() { + while self.right_length > 0 + && patience > 0 + && self.right_index + 1 < self.finder.suffixes.len() + { self.right_index += 1; self.right_length = self .right_length @@ -162,6 +171,5 @@ impl<'a> Matches<'a> { fn add_to_queue(&mut self, pos: i32) { self.queue.push(pos as usize); - self.matches_left -= 1; } } diff --git a/src/parsing_packer.rs b/src/parsing_packer.rs index e3382d0..7837da7 100644 --- a/src/parsing_packer.rs +++ b/src/parsing_packer.rs @@ -1,9 +1,9 @@ use std::collections::HashMap; use std::rc::Rc; -use crate::match_finder::MatchFinder; -use crate::rans::{RansCoder, CostCounter}; use crate::lz; +use crate::match_finder::MatchFinder; +use crate::rans::{CostCounter, RansCoder}; pub fn pack(data: &[u8]) -> Vec { let mut parse = parse(data); @@ -34,10 +34,12 @@ struct Arrival { type Arrivals = HashMap>; -const MAX_ARRIVALS: usize = 16; +const MAX_ARRIVALS: usize = 4; fn parse(data: &[u8]) -> Option> { let match_finder = MatchFinder::new(data); + let mut near_matches = [usize::MAX; 1024]; + let mut last_seen = [usize::MAX; 256]; let mut arrivals: Arrivals = HashMap::new(); fn add_arrival(arrivals: &mut Arrivals, pos: usize, arrival: Arrival) { @@ -54,10 +56,19 @@ fn parse(data: &[u8]) -> Option> { } } } - fn add_match(arrivals: &mut Arrivals, pos: usize, offset: usize, length: usize, arrival: &Arrival) { + fn add_match( + arrivals: &mut Arrivals, + pos: usize, + offset: usize, + length: usize, + arrival: &Arrival, + ) { let mut cost_counter = CostCounter(0.); let mut state = arrival.state.clone(); - let op = lz::Op::Match { offset: offset as u32, len: length as u32 }; + let op = lz::Op::Match { + offset: offset as u32, + len: length as u32, + }; op.encode(&mut cost_counter, &mut state); add_arrival( arrivals, @@ -81,20 +92,64 @@ fn parse(data: &[u8]) -> Option> { cost: 0.0, }, ); + let mut best_per_offset = HashMap::new(); for pos in 0..data.len() { - for arrival in arrivals.remove(&pos).unwrap() { + let match_length = |offset: usize| { + data[pos..] + .iter() + .zip(data[(pos - offset)..].iter()) + .take_while(|(a, b)| a == b) + .count() + }; + + let here_arrivals = if let Some(arr) = arrivals.remove(&pos) { + arr + } else { + continue; + }; + best_per_offset.clear(); + let mut best_cost = f64::MAX; + for arrival in &here_arrivals { + best_cost = best_cost.min(arrival.cost); + let per_offset = best_per_offset + .entry(arrival.state.last_offset()) + .or_insert(f64::MAX); + *per_offset = per_offset.min(arrival.cost); + } + + for arrival in here_arrivals { + if arrival.cost > (best_cost + 32.0).min(*best_per_offset.get(&arrival.state.last_offset()).unwrap()) { + continue; + } let mut found_last_offset = false; + let mut closest_match = None; for m in match_finder.matches(pos) { + closest_match = Some(closest_match.unwrap_or(0).max(m.pos)); let offset = pos - m.pos; - if offset as u32 == arrival.state.last_offset() { - found_last_offset = true; - } + found_last_offset |= offset as u32 == arrival.state.last_offset(); add_match(&mut arrivals, pos, offset, m.length, &arrival); } + let mut near_matches_left = 4; + let mut match_pos = last_seen[data[pos] as usize]; + while near_matches_left > 0 + && match_pos != usize::MAX + && closest_match.iter().all(|p| *p < match_pos) + { + let offset = pos - match_pos; + let length = match_length(offset); + assert!(length > 0); + add_match(&mut arrivals, pos, offset, length, &arrival); + found_last_offset |= offset as u32 == arrival.state.last_offset(); + if offset < near_matches.len() { + match_pos = near_matches[match_pos % near_matches.len()]; + } + near_matches_left -= 1; + } + if !found_last_offset && arrival.state.last_offset() > 0 { let offset = arrival.state.last_offset() as usize; - let length = data[pos..].iter().zip(data[(pos - offset)..].iter()).take_while(|(a, b)| a == b).count(); + let length = match_length(offset); if length > 0 { add_match(&mut arrivals, pos, offset, length, &arrival); } @@ -117,6 +172,8 @@ fn parse(data: &[u8]) -> Option> { }, ); } + near_matches[pos % near_matches.len()] = last_seen[data[pos] as usize]; + last_seen[data[pos] as usize] = pos; } arrivals.remove(&data.len()).unwrap()[0].parse.clone() }