add documentation, make pbr optional as well

This commit is contained in:
2022-10-26 23:40:41 +02:00
parent 4eab36b9d9
commit 2eb8f230ba
6 changed files with 86 additions and 22 deletions

2
Cargo.lock generated
View File

@@ -308,7 +308,7 @@ checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
[[package]] [[package]]
name = "upkr" name = "upkr"
version = "0.2.0" version = "0.2.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"cdivsufsort", "cdivsufsort",

View File

@@ -1,15 +1,18 @@
[package] [package]
name = "upkr" name = "upkr"
version = "0.2.0" version = "0.2.1"
edition = "2021" edition = "2021"
[profile.release] [profile.release]
strip = "debuginfo" strip = "debuginfo"
[features]
terminal = ["crossterm", "pbr"]
[dependencies] [dependencies]
cdivsufsort = "2" cdivsufsort = "2"
lexopt = "0.2.1" lexopt = "0.2.1"
anyhow = "1" anyhow = "1"
thiserror = "1.0.36" thiserror = "1.0.36"
pbr = "1" pbr = { version = "1", optional = true }
crossterm = { version = "0.25.0", default-features = false, optional = true } crossterm = { version = "0.25.0", default-features = false, optional = true }

View File

@@ -23,12 +23,12 @@ upkr-windows-$(VERSION).zip: upkr-windows/upkr.exe PHONY
zip -r -9 $@ upkr-windows zip -r -9 $@ upkr-windows
upkr-linux/upkr: upkr-linux/upkr:
cargo build --target x86_64-unknown-linux-musl --release -F crossterm cargo build --target x86_64-unknown-linux-musl --release -F terminal
mkdir -p upkr-linux mkdir -p upkr-linux
cp ../target/x86_64-unknown-linux-musl/release/upkr upkr-linux/ cp ../target/x86_64-unknown-linux-musl/release/upkr upkr-linux/
upkr-windows/upkr.exe: upkr-windows/upkr.exe:
cargo build --target x86_64-pc-windows-gnu --release -F crossterm cargo build --target x86_64-pc-windows-gnu --release -F terminal
mkdir -p upkr-windows mkdir -p upkr-windows
cp ../target/x86_64-pc-windows-gnu/release/upkr.exe upkr-windows/ cp ../target/x86_64-pc-windows-gnu/release/upkr.exe upkr-windows/

View File

@@ -1,3 +1,14 @@
/// Heatmap information about a compressed block of data.
///
/// For each byte in the uncompressed data, the heatmap provides two pieces of intormation:
/// 1. whether this byte was encoded as a literal or as part of a match
/// 2. how many (fractional) bits where spend on encoding this byte
///
/// For the sake of the heatmap, the cost of literals are spread out across all matches
/// that reference the literal.
///
/// If the `terminal` feature is enabled, there is a function to write out the
/// heatmap as a colored hexdump.
pub struct Heatmap { pub struct Heatmap {
data: Vec<u8>, data: Vec<u8>,
cost: Vec<f32>, cost: Vec<f32>,
@@ -5,7 +16,7 @@ pub struct Heatmap {
} }
impl Heatmap { impl Heatmap {
pub fn new() -> Heatmap { pub(crate) fn new() -> Heatmap {
Heatmap { Heatmap {
data: Vec::new(), data: Vec::new(),
cost: Vec::new(), cost: Vec::new(),
@@ -13,13 +24,13 @@ impl Heatmap {
} }
} }
pub fn add_literal(&mut self, byte: u8, cost: f32) { pub(crate) fn add_literal(&mut self, byte: u8, cost: f32) {
self.data.push(byte); self.data.push(byte);
self.cost.push(cost); self.cost.push(cost);
self.literal_index.push(self.literal_index.len()); self.literal_index.push(self.literal_index.len());
} }
pub fn add_match(&mut self, offset: usize, length: usize, mut cost: f32) { pub(crate) fn add_match(&mut self, offset: usize, length: usize, mut cost: f32) {
cost /= length as f32; cost /= length as f32;
for _ in 0..length { for _ in 0..length {
self.data.push(self.data[self.data.len() - offset]); self.data.push(self.data[self.data.len() - offset]);
@@ -29,7 +40,7 @@ impl Heatmap {
} }
} }
pub fn finish(&mut self) { pub(crate) fn finish(&mut self) {
let mut ref_count = vec![0usize; self.literal_index.len()]; let mut ref_count = vec![0usize; self.literal_index.len()];
for &index in &self.literal_index { for &index in &self.literal_index {
ref_count[index] += 1; ref_count[index] += 1;
@@ -47,6 +58,7 @@ impl Heatmap {
} }
} }
/// Reverses the heatmap
pub fn reverse(&mut self) { pub fn reverse(&mut self) {
self.data.reverse(); self.data.reverse();
self.cost.reverse(); self.cost.reverse();
@@ -56,23 +68,28 @@ impl Heatmap {
} }
} }
/// The number of (uncompressed) bytes of data in this heatmap
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.cost.len() self.cost.len()
} }
/// Returns whether the byte at `index` was encoded as a literal
pub fn is_literal(&self, index: usize) -> bool { pub fn is_literal(&self, index: usize) -> bool {
self.literal_index[index] == index self.literal_index[index] == index
} }
/// Returns the cost of encoding the byte at `index` in (fractional) bits
pub fn cost(&self, index: usize) -> f32 { pub fn cost(&self, index: usize) -> f32 {
self.cost[index] self.cost[index]
} }
/// Returns the uncompressed data byte at `index`
pub fn byte(&self, index: usize) -> u8 { pub fn byte(&self, index: usize) -> u8 {
self.data[index] self.data[index]
} }
#[cfg(feature = "crossterm")] #[cfg(feature = "crossterm")]
/// Print the heatmap as a colored hexdump
pub fn print_as_hex(&self) -> std::io::Result<()> { pub fn print_as_hex(&self) -> std::io::Result<()> {
use crossterm::{ use crossterm::{
style::{Attribute, Color, Print, SetAttribute, SetBackgroundColor}, style::{Attribute, Color, Print, SetAttribute, SetBackgroundColor},

View File

@@ -133,21 +133,47 @@ impl CoderState {
} }
} }
/// The error type for the uncompressing related functions
#[derive(Error, Debug)] #[derive(Error, Debug)]
pub enum UnpackError { pub enum UnpackError {
/// a match offset pointing beyond the start of the unpacked data was encountered
#[error("match offset out of range: {offset} > {position}")] #[error("match offset out of range: {offset} > {position}")]
OffsetOutOfRange { offset: usize, position: usize }, OffsetOutOfRange {
/// the match offset
offset: usize,
/// the current position in the uncompressed stream
position: usize,
},
/// The passed size limit was exceeded
#[error("Unpacked data over size limit: {size} > {limit}")] #[error("Unpacked data over size limit: {size} > {limit}")]
OverSize { size: usize, limit: usize }, OverSize {
/// the size of the uncompressed data
size: usize,
/// the size limit passed into the function
limit: usize,
},
/// The end of the packed data was reached without an encoded EOF marker
#[error("Unexpected end of input data")] #[error("Unexpected end of input data")]
UnexpectedEOF { UnexpectedEOF {
#[from] #[from]
/// the underlying EOF error in the rANS decoder
source: crate::rans::UnexpectedEOF, source: crate::rans::UnexpectedEOF,
}, },
/// An offset or length value was found that exceeded 32bit
#[error("Overflow while reading value")] #[error("Overflow while reading value")]
ValueOverflow, ValueOverflow,
} }
/// Uncompress a piece of compressed data
///
/// Returns either the uncompressed data, or an `UnpackError`
///
/// # Parameters
///
/// - `packed_data`: the compressed data
/// - `config`: the exact compression format config used to compress the data
/// - `max_size`: the maximum size of uncompressed data to return. When this is exceeded,
/// `UnpackError::OverSize` is returned
pub fn unpack( pub fn unpack(
packed_data: &[u8], packed_data: &[u8],
config: &Config, config: &Config,
@@ -158,10 +184,22 @@ pub fn unpack(
Ok(result) Ok(result)
} }
/// Calculates the minimum margin when overlapping buffers.
///
/// Returns the minimum margin needed between the end of the compressed data and the
/// end of the uncompressed data when overlapping the two buffers to save on RAM.
pub fn calculate_margin(packed_data: &[u8], config: &Config) -> Result<isize, UnpackError> { pub fn calculate_margin(packed_data: &[u8], config: &Config) -> Result<isize, UnpackError> {
unpack_internal(None, None, packed_data, config, usize::MAX) unpack_internal(None, None, packed_data, config, usize::MAX)
} }
/// Calculates a `Heatmap` from compressed data.
///
/// # Parameters
///
/// - `packed_data`: the compressed data
/// - `config`: the exact compression format config used to compress the data
/// - `max_size`: the maximum size of the heatmap to return. When this is exceeded,
/// `UnpackError::OverSize` is returned
pub fn create_heatmap( pub fn create_heatmap(
packed_data: &[u8], packed_data: &[u8],
config: &Config, config: &Config,

View File

@@ -123,17 +123,23 @@ fn main() -> Result<()> {
data.reverse(); data.reverse();
} }
let mut pb = pbr::ProgressBar::new(data.len() as u64); #[cfg(feature = "terminal")]
pb.set_units(pbr::Units::Bytes); let mut packed_data = {
let mut packed_data = upkr::pack( let mut pb = pbr::ProgressBar::new(data.len() as u64);
&data, pb.set_units(pbr::Units::Bytes);
level, let packed_data = upkr::pack(
&config, &data,
Some(&mut |pos| { level,
pb.set(pos as u64); &config,
}), Some(&mut |pos| {
); pb.set(pos as u64);
pb.finish(); }),
);
pb.finish();
packed_data
};
#[cfg(not(feature = "terminal"))]
let mut packed_data = upkr::pack(&data, level, &config, None);
if reverse { if reverse {
packed_data.reverse(); packed_data.reverse();