From 2eb8f230ba8d729b22c2c438a0770396f7591f41 Mon Sep 17 00:00:00 2001 From: Dennis Ranke Date: Wed, 26 Oct 2022 23:40:41 +0200 Subject: [PATCH] add documentation, make pbr optional as well --- Cargo.lock | 2 +- Cargo.toml | 7 +++++-- release/Makefile | 4 ++-- src/heatmap.rs | 25 +++++++++++++++++++++---- src/lz.rs | 42 ++++++++++++++++++++++++++++++++++++++++-- src/main.rs | 28 +++++++++++++++++----------- 6 files changed, 86 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5e18d65..2d3ea3a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -308,7 +308,7 @@ checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd" [[package]] name = "upkr" -version = "0.2.0" +version = "0.2.1" dependencies = [ "anyhow", "cdivsufsort", diff --git a/Cargo.toml b/Cargo.toml index b4a3ea3..46dc4cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,15 +1,18 @@ [package] name = "upkr" -version = "0.2.0" +version = "0.2.1" edition = "2021" [profile.release] strip = "debuginfo" +[features] +terminal = ["crossterm", "pbr"] + [dependencies] cdivsufsort = "2" lexopt = "0.2.1" anyhow = "1" thiserror = "1.0.36" -pbr = "1" +pbr = { version = "1", optional = true } crossterm = { version = "0.25.0", default-features = false, optional = true } diff --git a/release/Makefile b/release/Makefile index 9bf64a6..f8c6faf 100644 --- a/release/Makefile +++ b/release/Makefile @@ -23,12 +23,12 @@ upkr-windows-$(VERSION).zip: upkr-windows/upkr.exe PHONY zip -r -9 $@ upkr-windows upkr-linux/upkr: - cargo build --target x86_64-unknown-linux-musl --release -F crossterm + cargo build --target x86_64-unknown-linux-musl --release -F terminal mkdir -p upkr-linux cp ../target/x86_64-unknown-linux-musl/release/upkr upkr-linux/ upkr-windows/upkr.exe: - cargo build --target x86_64-pc-windows-gnu --release -F crossterm + cargo build --target x86_64-pc-windows-gnu --release -F terminal mkdir -p upkr-windows cp ../target/x86_64-pc-windows-gnu/release/upkr.exe upkr-windows/ diff --git a/src/heatmap.rs b/src/heatmap.rs index efb6041..2698321 100644 --- a/src/heatmap.rs +++ b/src/heatmap.rs @@ -1,3 +1,14 @@ +/// Heatmap information about a compressed block of data. +/// +/// For each byte in the uncompressed data, the heatmap provides two pieces of intormation: +/// 1. whether this byte was encoded as a literal or as part of a match +/// 2. how many (fractional) bits where spend on encoding this byte +/// +/// For the sake of the heatmap, the cost of literals are spread out across all matches +/// that reference the literal. +/// +/// If the `terminal` feature is enabled, there is a function to write out the +/// heatmap as a colored hexdump. pub struct Heatmap { data: Vec, cost: Vec, @@ -5,7 +16,7 @@ pub struct Heatmap { } impl Heatmap { - pub fn new() -> Heatmap { + pub(crate) fn new() -> Heatmap { Heatmap { data: Vec::new(), cost: Vec::new(), @@ -13,13 +24,13 @@ impl Heatmap { } } - pub fn add_literal(&mut self, byte: u8, cost: f32) { + pub(crate) fn add_literal(&mut self, byte: u8, cost: f32) { self.data.push(byte); self.cost.push(cost); self.literal_index.push(self.literal_index.len()); } - pub fn add_match(&mut self, offset: usize, length: usize, mut cost: f32) { + pub(crate) fn add_match(&mut self, offset: usize, length: usize, mut cost: f32) { cost /= length as f32; for _ in 0..length { self.data.push(self.data[self.data.len() - offset]); @@ -29,7 +40,7 @@ impl Heatmap { } } - pub fn finish(&mut self) { + pub(crate) fn finish(&mut self) { let mut ref_count = vec![0usize; self.literal_index.len()]; for &index in &self.literal_index { ref_count[index] += 1; @@ -47,6 +58,7 @@ impl Heatmap { } } + /// Reverses the heatmap pub fn reverse(&mut self) { self.data.reverse(); self.cost.reverse(); @@ -56,23 +68,28 @@ impl Heatmap { } } + /// The number of (uncompressed) bytes of data in this heatmap pub fn len(&self) -> usize { self.cost.len() } + /// Returns whether the byte at `index` was encoded as a literal pub fn is_literal(&self, index: usize) -> bool { self.literal_index[index] == index } + /// Returns the cost of encoding the byte at `index` in (fractional) bits pub fn cost(&self, index: usize) -> f32 { self.cost[index] } + /// Returns the uncompressed data byte at `index` pub fn byte(&self, index: usize) -> u8 { self.data[index] } #[cfg(feature = "crossterm")] + /// Print the heatmap as a colored hexdump pub fn print_as_hex(&self) -> std::io::Result<()> { use crossterm::{ style::{Attribute, Color, Print, SetAttribute, SetBackgroundColor}, diff --git a/src/lz.rs b/src/lz.rs index d2f8e7a..37a504b 100644 --- a/src/lz.rs +++ b/src/lz.rs @@ -133,21 +133,47 @@ impl CoderState { } } +/// The error type for the uncompressing related functions #[derive(Error, Debug)] pub enum UnpackError { + /// a match offset pointing beyond the start of the unpacked data was encountered #[error("match offset out of range: {offset} > {position}")] - OffsetOutOfRange { offset: usize, position: usize }, + OffsetOutOfRange { + /// the match offset + offset: usize, + /// the current position in the uncompressed stream + position: usize, + }, + /// The passed size limit was exceeded #[error("Unpacked data over size limit: {size} > {limit}")] - OverSize { size: usize, limit: usize }, + OverSize { + /// the size of the uncompressed data + size: usize, + /// the size limit passed into the function + limit: usize, + }, + /// The end of the packed data was reached without an encoded EOF marker #[error("Unexpected end of input data")] UnexpectedEOF { #[from] + /// the underlying EOF error in the rANS decoder source: crate::rans::UnexpectedEOF, }, + /// An offset or length value was found that exceeded 32bit #[error("Overflow while reading value")] ValueOverflow, } +/// Uncompress a piece of compressed data +/// +/// Returns either the uncompressed data, or an `UnpackError` +/// +/// # Parameters +/// +/// - `packed_data`: the compressed data +/// - `config`: the exact compression format config used to compress the data +/// - `max_size`: the maximum size of uncompressed data to return. When this is exceeded, +/// `UnpackError::OverSize` is returned pub fn unpack( packed_data: &[u8], config: &Config, @@ -158,10 +184,22 @@ pub fn unpack( Ok(result) } +/// Calculates the minimum margin when overlapping buffers. +/// +/// Returns the minimum margin needed between the end of the compressed data and the +/// end of the uncompressed data when overlapping the two buffers to save on RAM. pub fn calculate_margin(packed_data: &[u8], config: &Config) -> Result { unpack_internal(None, None, packed_data, config, usize::MAX) } +/// Calculates a `Heatmap` from compressed data. +/// +/// # Parameters +/// +/// - `packed_data`: the compressed data +/// - `config`: the exact compression format config used to compress the data +/// - `max_size`: the maximum size of the heatmap to return. When this is exceeded, +/// `UnpackError::OverSize` is returned pub fn create_heatmap( packed_data: &[u8], config: &Config, diff --git a/src/main.rs b/src/main.rs index 6c02c5a..24ceb5a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -123,17 +123,23 @@ fn main() -> Result<()> { data.reverse(); } - let mut pb = pbr::ProgressBar::new(data.len() as u64); - pb.set_units(pbr::Units::Bytes); - let mut packed_data = upkr::pack( - &data, - level, - &config, - Some(&mut |pos| { - pb.set(pos as u64); - }), - ); - pb.finish(); + #[cfg(feature = "terminal")] + let mut packed_data = { + let mut pb = pbr::ProgressBar::new(data.len() as u64); + pb.set_units(pbr::Units::Bytes); + let packed_data = upkr::pack( + &data, + level, + &config, + Some(&mut |pos| { + pb.set(pos as u64); + }), + ); + pb.finish(); + packed_data + }; + #[cfg(not(feature = "terminal"))] + let mut packed_data = upkr::pack(&data, level, &config, None); if reverse { packed_data.reverse();