add documentation, make pbr optional as well

This commit is contained in:
2022-10-26 23:40:41 +02:00
parent 4eab36b9d9
commit 2eb8f230ba
6 changed files with 86 additions and 22 deletions

2
Cargo.lock generated
View File

@@ -308,7 +308,7 @@ checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
[[package]]
name = "upkr"
version = "0.2.0"
version = "0.2.1"
dependencies = [
"anyhow",
"cdivsufsort",

View File

@@ -1,15 +1,18 @@
[package]
name = "upkr"
version = "0.2.0"
version = "0.2.1"
edition = "2021"
[profile.release]
strip = "debuginfo"
[features]
terminal = ["crossterm", "pbr"]
[dependencies]
cdivsufsort = "2"
lexopt = "0.2.1"
anyhow = "1"
thiserror = "1.0.36"
pbr = "1"
pbr = { version = "1", optional = true }
crossterm = { version = "0.25.0", default-features = false, optional = true }

View File

@@ -23,12 +23,12 @@ upkr-windows-$(VERSION).zip: upkr-windows/upkr.exe PHONY
zip -r -9 $@ upkr-windows
upkr-linux/upkr:
cargo build --target x86_64-unknown-linux-musl --release -F crossterm
cargo build --target x86_64-unknown-linux-musl --release -F terminal
mkdir -p upkr-linux
cp ../target/x86_64-unknown-linux-musl/release/upkr upkr-linux/
upkr-windows/upkr.exe:
cargo build --target x86_64-pc-windows-gnu --release -F crossterm
cargo build --target x86_64-pc-windows-gnu --release -F terminal
mkdir -p upkr-windows
cp ../target/x86_64-pc-windows-gnu/release/upkr.exe upkr-windows/

View File

@@ -1,3 +1,14 @@
/// Heatmap information about a compressed block of data.
///
/// For each byte in the uncompressed data, the heatmap provides two pieces of intormation:
/// 1. whether this byte was encoded as a literal or as part of a match
/// 2. how many (fractional) bits where spend on encoding this byte
///
/// For the sake of the heatmap, the cost of literals are spread out across all matches
/// that reference the literal.
///
/// If the `terminal` feature is enabled, there is a function to write out the
/// heatmap as a colored hexdump.
pub struct Heatmap {
data: Vec<u8>,
cost: Vec<f32>,
@@ -5,7 +16,7 @@ pub struct Heatmap {
}
impl Heatmap {
pub fn new() -> Heatmap {
pub(crate) fn new() -> Heatmap {
Heatmap {
data: Vec::new(),
cost: Vec::new(),
@@ -13,13 +24,13 @@ impl Heatmap {
}
}
pub fn add_literal(&mut self, byte: u8, cost: f32) {
pub(crate) fn add_literal(&mut self, byte: u8, cost: f32) {
self.data.push(byte);
self.cost.push(cost);
self.literal_index.push(self.literal_index.len());
}
pub fn add_match(&mut self, offset: usize, length: usize, mut cost: f32) {
pub(crate) fn add_match(&mut self, offset: usize, length: usize, mut cost: f32) {
cost /= length as f32;
for _ in 0..length {
self.data.push(self.data[self.data.len() - offset]);
@@ -29,7 +40,7 @@ impl Heatmap {
}
}
pub fn finish(&mut self) {
pub(crate) fn finish(&mut self) {
let mut ref_count = vec![0usize; self.literal_index.len()];
for &index in &self.literal_index {
ref_count[index] += 1;
@@ -47,6 +58,7 @@ impl Heatmap {
}
}
/// Reverses the heatmap
pub fn reverse(&mut self) {
self.data.reverse();
self.cost.reverse();
@@ -56,23 +68,28 @@ impl Heatmap {
}
}
/// The number of (uncompressed) bytes of data in this heatmap
pub fn len(&self) -> usize {
self.cost.len()
}
/// Returns whether the byte at `index` was encoded as a literal
pub fn is_literal(&self, index: usize) -> bool {
self.literal_index[index] == index
}
/// Returns the cost of encoding the byte at `index` in (fractional) bits
pub fn cost(&self, index: usize) -> f32 {
self.cost[index]
}
/// Returns the uncompressed data byte at `index`
pub fn byte(&self, index: usize) -> u8 {
self.data[index]
}
#[cfg(feature = "crossterm")]
/// Print the heatmap as a colored hexdump
pub fn print_as_hex(&self) -> std::io::Result<()> {
use crossterm::{
style::{Attribute, Color, Print, SetAttribute, SetBackgroundColor},

View File

@@ -133,21 +133,47 @@ impl CoderState {
}
}
/// The error type for the uncompressing related functions
#[derive(Error, Debug)]
pub enum UnpackError {
/// a match offset pointing beyond the start of the unpacked data was encountered
#[error("match offset out of range: {offset} > {position}")]
OffsetOutOfRange { offset: usize, position: usize },
OffsetOutOfRange {
/// the match offset
offset: usize,
/// the current position in the uncompressed stream
position: usize,
},
/// The passed size limit was exceeded
#[error("Unpacked data over size limit: {size} > {limit}")]
OverSize { size: usize, limit: usize },
OverSize {
/// the size of the uncompressed data
size: usize,
/// the size limit passed into the function
limit: usize,
},
/// The end of the packed data was reached without an encoded EOF marker
#[error("Unexpected end of input data")]
UnexpectedEOF {
#[from]
/// the underlying EOF error in the rANS decoder
source: crate::rans::UnexpectedEOF,
},
/// An offset or length value was found that exceeded 32bit
#[error("Overflow while reading value")]
ValueOverflow,
}
/// Uncompress a piece of compressed data
///
/// Returns either the uncompressed data, or an `UnpackError`
///
/// # Parameters
///
/// - `packed_data`: the compressed data
/// - `config`: the exact compression format config used to compress the data
/// - `max_size`: the maximum size of uncompressed data to return. When this is exceeded,
/// `UnpackError::OverSize` is returned
pub fn unpack(
packed_data: &[u8],
config: &Config,
@@ -158,10 +184,22 @@ pub fn unpack(
Ok(result)
}
/// Calculates the minimum margin when overlapping buffers.
///
/// Returns the minimum margin needed between the end of the compressed data and the
/// end of the uncompressed data when overlapping the two buffers to save on RAM.
pub fn calculate_margin(packed_data: &[u8], config: &Config) -> Result<isize, UnpackError> {
unpack_internal(None, None, packed_data, config, usize::MAX)
}
/// Calculates a `Heatmap` from compressed data.
///
/// # Parameters
///
/// - `packed_data`: the compressed data
/// - `config`: the exact compression format config used to compress the data
/// - `max_size`: the maximum size of the heatmap to return. When this is exceeded,
/// `UnpackError::OverSize` is returned
pub fn create_heatmap(
packed_data: &[u8],
config: &Config,

View File

@@ -123,9 +123,11 @@ fn main() -> Result<()> {
data.reverse();
}
#[cfg(feature = "terminal")]
let mut packed_data = {
let mut pb = pbr::ProgressBar::new(data.len() as u64);
pb.set_units(pbr::Units::Bytes);
let mut packed_data = upkr::pack(
let packed_data = upkr::pack(
&data,
level,
&config,
@@ -134,6 +136,10 @@ fn main() -> Result<()> {
}),
);
pb.finish();
packed_data
};
#[cfg(not(feature = "terminal"))]
let mut packed_data = upkr::pack(&data, level, &config, None);
if reverse {
packed_data.reverse();