From 470b77834083d1b91d723e61ae4616f8824e3241 Mon Sep 17 00:00:00 2001 From: Dennis Ranke Date: Mon, 22 Nov 2021 23:14:24 +0100 Subject: [PATCH] change entropy coder from Range to rANS --- Cargo.lock | 14 ++++++ Cargo.toml | 2 + src/context_state.rs | 6 ++- src/lib.rs | 2 +- src/lz.rs | 10 ++-- src/main.rs | 45 +++++++++++++++--- src/range_coder.rs | 111 ------------------------------------------- src/rans.rs | 91 +++++++++++++++++++++++++++++++++++ 8 files changed, 155 insertions(+), 126 deletions(-) delete mode 100644 src/range_coder.rs create mode 100644 src/rans.rs diff --git a/Cargo.lock b/Cargo.lock index c241a73..d93dffd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "anyhow" +version = "1.0.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d9ff5d688f1c13395289f67db01d4826b46dd694e7580accdc3e8430f2d98e" + [[package]] name = "autocfg" version = "1.0.1" @@ -33,6 +39,12 @@ dependencies = [ "autocfg", ] +[[package]] +name = "pico-args" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db8bcd96cb740d03149cbad5518db9fd87126a10ab519c011893b1754134c468" + [[package]] name = "sacabase" version = "2.0.0" @@ -46,5 +58,7 @@ dependencies = [ name = "upkr" version = "0.1.0" dependencies = [ + "anyhow", "cdivsufsort", + "pico-args", ] diff --git a/Cargo.toml b/Cargo.toml index 1bb3bfa..9269315 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,3 +7,5 @@ edition = "2021" [dependencies] cdivsufsort = "2" +pico-args = "0.4" +anyhow = "1" \ No newline at end of file diff --git a/src/context_state.rs b/src/context_state.rs index 81cb3e0..9906807 100644 --- a/src/context_state.rs +++ b/src/context_state.rs @@ -1,4 +1,6 @@ -const INIT_PROB: u16 = 0x8000; +use crate::rans::{PROB_BITS, ONE_PROB}; + +const INIT_PROB: u16 = 1 << (PROB_BITS - 1); const UPDATE_RATE: u32 = 4; #[derive(Clone)] @@ -31,7 +33,7 @@ impl<'a> Context<'a> { pub fn update(&mut self, bit: bool) { let old = self.state.contexts[self.index]; self.state.contexts[self.index] = if bit { - old + (((1 << 16) - old as u32) >> UPDATE_RATE) as u16 + old + ((ONE_PROB - old as u32) >> UPDATE_RATE) as u16 } else { old - (old >> UPDATE_RATE) }; diff --git a/src/lib.rs b/src/lib.rs index cdfe38c..aa735ce 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,7 +2,7 @@ mod context_state; mod greedy_packer; mod lz; mod match_finder; -mod range_coder; +mod rans; pub use greedy_packer::pack; pub use lz::unpack; \ No newline at end of file diff --git a/src/lz.rs b/src/lz.rs index 9c10439..6aa52b8 100644 --- a/src/lz.rs +++ b/src/lz.rs @@ -1,9 +1,9 @@ use crate::context_state::ContextState; -use crate::range_coder::{RangeCoder, RangeDecoder}; +use crate::rans::{RansCoder, RansDecoder}; pub struct LzCoder { contexts: ContextState, - range_coder: RangeCoder, + range_coder: RansCoder, last_offset: usize, } @@ -11,7 +11,7 @@ impl LzCoder { pub fn new() -> LzCoder { LzCoder { contexts: ContextState::new(1 + 255 + 1 + 64 + 64), - range_coder: RangeCoder::new(), + range_coder: RansCoder::new(), last_offset: 0, } } @@ -68,13 +68,13 @@ impl LzCoder { } pub fn unpack(packed_data: &[u8]) -> Vec { - let mut decoder = RangeDecoder::new(packed_data); + let mut decoder = RansDecoder::new(packed_data); let mut contexts = ContextState::new(1 + 255 + 1 + 64 + 64); let mut result = vec![]; let mut offset = 0; fn decode_length( - decoder: &mut RangeDecoder, + decoder: &mut RansDecoder, contexts: &mut ContextState, mut context_index: usize, ) -> usize { diff --git a/src/main.rs b/src/main.rs index d3a372b..f261b67 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,41 @@ -fn main() { - let test_data = include_bytes!("../README.md"); +use std::{fs::File, path::PathBuf}; +use std::io::prelude::*; +use anyhow::{bail, Result}; - let packed = upkr::pack(test_data); - dbg!((test_data.len(), packed.len())); +fn main() -> Result<()> { + let mut args = pico_args::Arguments::from_env(); - let unpacked = upkr::unpack(&packed); - dbg!(unpacked.len()); - assert!(test_data == unpacked.as_slice()); + match args.subcommand()?.as_ref().map(|s| s.as_str()) { + None => print_help(), + Some("pack") => { + let infile = args.free_from_os_str::(|s| Ok(s.into()))?; + let outfile = args.free_from_os_str::(|s| Ok(s.into()))?; + + let mut data = vec![]; + File::open(infile)?.read_to_end(&mut data)?; + let packed_data = upkr::pack(&data); + File::create(outfile)?.write_all(&packed_data)?; + } + Some("unpack") => { + let infile = args.free_from_os_str::(|s| Ok(s.into()))?; + let outfile = args.free_from_os_str::(|s| Ok(s.into()))?; + + let mut data = vec![]; + File::open(infile)?.read_to_end(&mut data)?; + let packed_data = upkr::unpack(&data); + File::create(outfile)?.write_all(&packed_data)?; + } + Some(other) => { + bail!("Unknown subcommand '{}'", other); + } + } + + Ok(()) +} + +fn print_help() { + eprintln!("Usage:"); + eprintln!(" upkr pack "); + eprintln!(" upkr unpack "); + std::process::exit(1); } diff --git a/src/range_coder.rs b/src/range_coder.rs deleted file mode 100644 index 89ea633..0000000 --- a/src/range_coder.rs +++ /dev/null @@ -1,111 +0,0 @@ -use crate::context_state::Context; - -pub struct RangeCoder { - buffer: Vec, - low: u64, - range: u64, -} - -const TOTAL: u32 = 65536; - -impl RangeCoder { - pub fn new() -> RangeCoder { - RangeCoder { - buffer: vec![], - low: 0, - range: 1 << 40, - } - } - - pub fn encode_with_context(&mut self, bit: bool, context: &mut Context) { - self.encode_bit(bit, context.prob() as u32); - context.update(bit); - } - - pub fn encode_bit(&mut self, bit: bool, prob: u32) { - let (start, size) = if bit { (0, prob) } else { (prob, TOTAL - prob) }; - self.range /= TOTAL as u64; - self.low += start as u64 * self.range; - self.range *= size as u64; - - while (self.low >> 32) == (self.low + self.range - 1) >> 32 { - self.emit_byte(); - } - - if self.range < 1 << 24 { - self.emit_byte(); - self.emit_byte(); - self.range = (1 << 40) - self.low; - } - } - - pub fn finish(mut self) -> Vec { - while self.range < 1 << 32 { - self.emit_byte(); - } - self.low += 1 << 32; - self.emit_byte(); - self.buffer - } - - fn emit_byte(&mut self) { - self.buffer.push((self.low >> 32).try_into().unwrap()); - self.low = (self.low & 0xffffffff) << 8; - self.range *= 256; - } -} - -pub struct RangeDecoder<'a> { - data: &'a [u8], - code: u64, - low: u64, - range: u64, -} - -impl<'a> RangeDecoder<'a> { - pub fn new(data: &'a [u8]) -> RangeDecoder<'a> { - RangeDecoder { - data, - code: 0, - low: 0, - range: 1, - } - } - - pub fn decode_with_context(&mut self, context: &mut Context) -> bool { - let bit = self.decode_bit(context.prob() as u32); - context.update(bit); - bit - } - - pub fn decode_bit(&mut self, prob: u32) -> bool { - while self.low >> 32 == (self.low + self.range - 1) >> 32 { - self.append_byte(); - } - - if self.range < 1 << 24 { - self.append_byte(); - self.append_byte(); - self.range = (1 << 40) - self.low; - } - - let bit = (self.code - self.low) / (self.range / TOTAL as u64) < prob as u64; - - let (start, size) = if bit { (0, prob) } else { (prob, TOTAL - prob) }; - self.range /= TOTAL as u64; - self.low += start as u64 * self.range; - self.range *= size as u64; - - bit - } - - fn append_byte(&mut self) { - self.code = (self.code & 0xffffffff) << 8; - if !self.data.is_empty() { - self.code |= self.data[0] as u64; - self.data = &self.data[1..]; - } - self.low = (self.low & 0xffffffff) << 8; - self.range <<= 8; - } -} diff --git a/src/rans.rs b/src/rans.rs new file mode 100644 index 0000000..1c798b6 --- /dev/null +++ b/src/rans.rs @@ -0,0 +1,91 @@ +use crate::context_state::Context; + +const L_BITS: u32 = 16; +pub const PROB_BITS: u32 = 12; +pub const ONE_PROB: u32 = 1 << PROB_BITS; + +pub struct RansCoder(Vec); + +impl RansCoder { + pub fn new() -> RansCoder { + RansCoder(Vec::new()) + } + + pub fn encode_with_context(&mut self, bit: bool, context: &mut Context) { + self.encode_bit(bit, context.prob()); + context.update(bit); + } + + pub fn encode_bit(&mut self, bit: bool, prob: u16) { + assert!(prob < 32768); + self.0.push(prob | ((bit as u16) << 15)); + } + + pub fn finish(self) -> Vec { + let mut buffer = vec![]; + let mut state = 1 << L_BITS; + + const MAX_STATE_FACTOR: u32 = 1 << (L_BITS + 8 - PROB_BITS); + for step in self.0.into_iter().rev() { + let prob = step as u32 & 32767; + let (start, prob) = if step & 32768 != 0 { + (0, prob) + } else { + (prob, ONE_PROB - prob) + }; + let max_state = MAX_STATE_FACTOR * prob; + while state >= max_state { + buffer.push(state as u8); + state >>= 8; + } + state = ((state / prob) << PROB_BITS) + (state % prob) + start; + } + + while state > 0 { + buffer.push(state as u8); + state >>= 8; + } + + buffer.reverse(); + buffer + } +} + +pub struct RansDecoder<'a> { + data: &'a [u8], + state: u32, +} + +const PROB_MASK: u32 = ONE_PROB - 1; +const L: u32 = 1 << L_BITS; + +impl<'a> RansDecoder<'a> { + pub fn new(data: &'a [u8]) -> RansDecoder<'a> { + RansDecoder { data, state: 0 } + } + + pub fn decode_with_context(&mut self, context: &mut Context) -> bool { + let bit = self.decode_bit(context.prob()); + context.update(bit); + bit + } + + pub fn decode_bit(&mut self, prob: u16) -> bool { + let prob = prob as u32; + while self.state < L { + self.state = (self.state << 8) | self.data[0] as u32; + self.data = &self.data[1..]; + } + + let bit = (self.state & PROB_MASK) < prob; + + let (start, prob) = if bit { + (0, prob) + } else { + (prob, ONE_PROB - prob) + }; + self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start; + + bit + } +}