From 5e82c65c1821fcf85b97dd9f71e8fce7cddc93c3 Mon Sep 17 00:00:00 2001
From: Dennis Ranke <dennis.ranke@gmail.com>
Date: Wed, 24 Nov 2021 22:35:24 +0100
Subject: [PATCH] a few speed and/or compression ratio improvements

---
 README.md             | 16 +++++----
 UNLICENSE             | 24 ++++++++++++++
 src/match_finder.rs   | 30 ++++++++++-------
 src/parsing_packer.rs | 77 +++++++++++++++++++++++++++++++++++++------
 4 files changed, 120 insertions(+), 27 deletions(-)
 create mode 100644 UNLICENSE
diff --git a/README.md b/README.md
index 4f2e5b8..b372148 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,14 @@
 # Upkr
 
 Upkr is a simple general purpose lz packer designed to be used in the [MicroW8](https://github.com/exoticorn/microw8) platform.
-The compressed format is base on [Shrinkler](https://github.com/askeksa/Shrinkler) with the main difference being that
-Upkr doesn't differnetiate between literals at odd or even addresses.
+The compressed format is losely based on [Shrinkler](https://github.com/askeksa/Shrinkler) with the main difference being that
+Upkr doesn't differnetiate between literals at odd or even addresses and that I went with rANS/rABS instead of a range coder.
 
-At this point, Upkr should be considered unstable - the exact format isn't finalized yet and still subject to change
-and only a very simple (but also very fast) greedy compressor is implemented. The compression ratio will be improved
-with a more thourough lz parse in the future, although even in the current state is is already similar to the
-DEFLATE compression algorithm.
\ No newline at end of file
+At this point, Upkr should still be considered unstable - the compressed format is not very likely to change but I still want
+to keep that option open a little longer.
+
+## Inspirations:
+
+* Ferris' blog about his [C64 intro packer](https://yupferris.github.io/blog/2020/08/31/c64-4k-intro-packer-deep-dive.html)
+* [Shrinkler](https://github.com/askeksa/Shrinkler)
+* Ryg's [sample rANS implementation](https://github.com/rygorous/ryg_rans)
\ No newline at end of file
diff --git a/UNLICENSE b/UNLICENSE
new file mode 100644
index 0000000..00d2e13
--- /dev/null
+++ b/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
\ No newline at end of file
diff --git a/src/match_finder.rs b/src/match_finder.rs
index 9c68051..fc8e57a 100644
--- a/src/match_finder.rs
+++ b/src/match_finder.rs
@@ -6,7 +6,8 @@ pub struct MatchFinder {
     rev_suffixes: Vec<u32>,
     lcp: Vec<u32>,
 
-    max_matches: usize,
+    max_queue_size: usize,
+    max_matches_per_length: usize,
     patience: usize,
     max_length_diff: usize,
 }
@@ -42,8 +43,9 @@ impl MatchFinder {
             suffixes,
             rev_suffixes,
             lcp,
-            max_matches: 10,
-            patience: 10,
+            max_queue_size: 100,
+            max_matches_per_length: 5,
+            patience: 100,
             max_length_diff: 2,
         }
     }
@@ -57,8 +59,8 @@ impl MatchFinder {
             left_length: usize::MAX,
             right_index: index,
             right_length: usize::MAX,
-            current_length: 0,
-            matches_left: self.max_matches,
+            current_length: usize::MAX,
+            matches_left: 0,
             max_length: 0,
             queue: BinaryHeap::new(),
         };
@@ -93,15 +95,16 @@ impl<'a> Iterator for Matches<'a> {
     type Item = Match;
 
     fn next(&mut self) -> Option<Match> {
-        if self.queue.is_empty() {
-            self.current_length = self.left_length.max(self.right_length);
+        if self.queue.is_empty() || self.matches_left == 0 {
+            self.queue.clear();
+            self.current_length = self.current_length.saturating_sub(1).min(self.left_length.max(self.right_length));
             self.max_length = self.max_length.max(self.current_length);
             if self.current_length < 2
                 || self.current_length + self.finder.max_length_diff < self.max_length
             {
                 return None;
             }
-            while self.matches_left > 0
+            while self.queue.len() < self.finder.max_queue_size
                 && (self.left_length == self.current_length
                     || self.right_length == self.current_length)
             {
@@ -109,13 +112,16 @@ impl<'a> Iterator for Matches<'a> {
                     self.add_to_queue(self.finder.suffixes[self.left_index]);
                     self.move_left();
                 }
-                if self.right_length == self.current_length && self.matches_left > 0 {
+                if self.right_length == self.current_length {
                     self.add_to_queue(self.finder.suffixes[self.right_index]);
                     self.move_right();
                 }
             }
+            self.matches_left = self.finder.max_matches_per_length;
         }
 
+        self.matches_left = self.matches_left.saturating_sub(1);
+
         self.queue.pop().map(|pos| Match {
             pos,
             length: self.current_length,
@@ -144,7 +150,10 @@ impl<'a> Matches<'a> {
 
     fn move_right(&mut self) {
         let mut patience = self.finder.patience;
-        while self.right_length > 0 && patience > 0 && self.right_index + 1 < self.finder.suffixes.len() {
+        while self.right_length > 0
+            && patience > 0
+            && self.right_index + 1 < self.finder.suffixes.len()
+        {
             self.right_index += 1;
             self.right_length = self
                 .right_length
@@ -162,6 +171,5 @@ impl<'a> Matches<'a> {
 
     fn add_to_queue(&mut self, pos: i32) {
         self.queue.push(pos as usize);
-        self.matches_left -= 1;
     }
 }
diff --git a/src/parsing_packer.rs b/src/parsing_packer.rs
index e3382d0..7837da7 100644
--- a/src/parsing_packer.rs
+++ b/src/parsing_packer.rs
@@ -1,9 +1,9 @@
 use std::collections::HashMap;
 use std::rc::Rc;
 
-use crate::match_finder::MatchFinder;
-use crate::rans::{RansCoder, CostCounter};
 use crate::lz;
+use crate::match_finder::MatchFinder;
+use crate::rans::{CostCounter, RansCoder};
 
 pub fn pack(data: &[u8]) -> Vec<u8> {
     let mut parse = parse(data);
@@ -34,10 +34,12 @@ struct Arrival {
 
 type Arrivals = HashMap<usize, Vec<Arrival>>;
 
-const MAX_ARRIVALS: usize = 16;
+const MAX_ARRIVALS: usize = 4;
 
 fn parse(data: &[u8]) -> Option<Rc<Parse>> {
     let match_finder = MatchFinder::new(data);
+    let mut near_matches = [usize::MAX; 1024];
+    let mut last_seen = [usize::MAX; 256];
 
     let mut arrivals: Arrivals = HashMap::new();
     fn add_arrival(arrivals: &mut Arrivals, pos: usize, arrival: Arrival) {
@@ -54,10 +56,19 @@ fn parse(data: &[u8]) -> Option<Rc<Parse>> {
             }
         }
     }
-    fn add_match(arrivals: &mut Arrivals, pos: usize, offset: usize, length: usize, arrival: &Arrival) {
+    fn add_match(
+        arrivals: &mut Arrivals,
+        pos: usize,
+        offset: usize,
+        length: usize,
+        arrival: &Arrival,
+    ) {
         let mut cost_counter = CostCounter(0.);
         let mut state = arrival.state.clone();
-        let op = lz::Op::Match { offset: offset as u32, len: length as u32 };
+        let op = lz::Op::Match {
+            offset: offset as u32,
+            len: length as u32,
+        };
         op.encode(&mut cost_counter, &mut state);
         add_arrival(
             arrivals,
@@ -81,20 +92,64 @@ fn parse(data: &[u8]) -> Option<Rc<Parse>> {
             cost: 0.0,
         },
     );
+    let mut best_per_offset = HashMap::new();
     for pos in 0..data.len() {
-        for arrival in arrivals.remove(&pos).unwrap() {
+        let match_length = |offset: usize| {
+            data[pos..]
+                .iter()
+                .zip(data[(pos - offset)..].iter())
+                .take_while(|(a, b)| a == b)
+                .count()
+        };
+
+        let here_arrivals = if let Some(arr) = arrivals.remove(&pos) {
+            arr
+        } else {
+            continue;
+        };
+        best_per_offset.clear();
+        let mut best_cost = f64::MAX;
+        for arrival in &here_arrivals {
+            best_cost = best_cost.min(arrival.cost);
+            let per_offset = best_per_offset
+                .entry(arrival.state.last_offset())
+                .or_insert(f64::MAX);
+            *per_offset = per_offset.min(arrival.cost);
+        }
+
+        for arrival in here_arrivals {
+            if arrival.cost > (best_cost + 32.0).min(*best_per_offset.get(&arrival.state.last_offset()).unwrap()) {
+                continue;
+            }
             let mut found_last_offset = false;
+            let mut closest_match = None;
             for m in match_finder.matches(pos) {
+                closest_match = Some(closest_match.unwrap_or(0).max(m.pos));
                 let offset = pos - m.pos;
-                if offset as u32 == arrival.state.last_offset() {
-                    found_last_offset = true;
-                }
+                found_last_offset |= offset as u32 == arrival.state.last_offset();
                 add_match(&mut arrivals, pos, offset, m.length, &arrival);
             }
 
+            let mut near_matches_left = 4;
+            let mut match_pos = last_seen[data[pos] as usize];
+            while near_matches_left > 0
+                && match_pos != usize::MAX
+                && closest_match.iter().all(|p| *p < match_pos)
+            {
+                let offset = pos - match_pos;
+                let length = match_length(offset);
+                assert!(length > 0);
+                add_match(&mut arrivals, pos, offset, length, &arrival);
+                found_last_offset |= offset as u32 == arrival.state.last_offset();
+                if offset < near_matches.len() {
+                    match_pos = near_matches[match_pos % near_matches.len()];
+                }
+                near_matches_left -= 1;
+            }
+
             if !found_last_offset && arrival.state.last_offset() > 0 {
                 let offset = arrival.state.last_offset() as usize;
-                let length = data[pos..].iter().zip(data[(pos - offset)..].iter()).take_while(|(a, b)| a == b).count();
+                let length = match_length(offset);
                 if length > 0 {
                     add_match(&mut arrivals, pos, offset, length, &arrival);
                 }
@@ -117,6 +172,8 @@ fn parse(data: &[u8]) -> Option<Rc<Parse>> {
                 },
             );
         }
+        near_matches[pos % near_matches.len()] = last_seen[data[pos] as usize];
+        last_seen[data[pos] as usize] = pos;
     }
     arrivals.remove(&data.len()).unwrap()[0].parse.clone()
 }