mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 11:36:42 +01:00
change entropy coder from Range to rANS
This commit is contained in:
14
Cargo.lock
generated
14
Cargo.lock
generated
@@ -2,6 +2,12 @@
|
|||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
version = 3
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anyhow"
|
||||||
|
version = "1.0.47"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "38d9ff5d688f1c13395289f67db01d4826b46dd694e7580accdc3e8430f2d98e"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "autocfg"
|
name = "autocfg"
|
||||||
version = "1.0.1"
|
version = "1.0.1"
|
||||||
@@ -33,6 +39,12 @@ dependencies = [
|
|||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pico-args"
|
||||||
|
version = "0.4.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "db8bcd96cb740d03149cbad5518db9fd87126a10ab519c011893b1754134c468"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sacabase"
|
name = "sacabase"
|
||||||
version = "2.0.0"
|
version = "2.0.0"
|
||||||
@@ -46,5 +58,7 @@ dependencies = [
|
|||||||
name = "upkr"
|
name = "upkr"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
"cdivsufsort",
|
"cdivsufsort",
|
||||||
|
"pico-args",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -7,3 +7,5 @@ edition = "2021"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
cdivsufsort = "2"
|
cdivsufsort = "2"
|
||||||
|
pico-args = "0.4"
|
||||||
|
anyhow = "1"
|
||||||
@@ -1,4 +1,6 @@
|
|||||||
const INIT_PROB: u16 = 0x8000;
|
use crate::rans::{PROB_BITS, ONE_PROB};
|
||||||
|
|
||||||
|
const INIT_PROB: u16 = 1 << (PROB_BITS - 1);
|
||||||
const UPDATE_RATE: u32 = 4;
|
const UPDATE_RATE: u32 = 4;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@@ -31,7 +33,7 @@ impl<'a> Context<'a> {
|
|||||||
pub fn update(&mut self, bit: bool) {
|
pub fn update(&mut self, bit: bool) {
|
||||||
let old = self.state.contexts[self.index];
|
let old = self.state.contexts[self.index];
|
||||||
self.state.contexts[self.index] = if bit {
|
self.state.contexts[self.index] = if bit {
|
||||||
old + (((1 << 16) - old as u32) >> UPDATE_RATE) as u16
|
old + ((ONE_PROB - old as u32) >> UPDATE_RATE) as u16
|
||||||
} else {
|
} else {
|
||||||
old - (old >> UPDATE_RATE)
|
old - (old >> UPDATE_RATE)
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ mod context_state;
|
|||||||
mod greedy_packer;
|
mod greedy_packer;
|
||||||
mod lz;
|
mod lz;
|
||||||
mod match_finder;
|
mod match_finder;
|
||||||
mod range_coder;
|
mod rans;
|
||||||
|
|
||||||
pub use greedy_packer::pack;
|
pub use greedy_packer::pack;
|
||||||
pub use lz::unpack;
|
pub use lz::unpack;
|
||||||
10
src/lz.rs
10
src/lz.rs
@@ -1,9 +1,9 @@
|
|||||||
use crate::context_state::ContextState;
|
use crate::context_state::ContextState;
|
||||||
use crate::range_coder::{RangeCoder, RangeDecoder};
|
use crate::rans::{RansCoder, RansDecoder};
|
||||||
|
|
||||||
pub struct LzCoder {
|
pub struct LzCoder {
|
||||||
contexts: ContextState,
|
contexts: ContextState,
|
||||||
range_coder: RangeCoder,
|
range_coder: RansCoder,
|
||||||
last_offset: usize,
|
last_offset: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -11,7 +11,7 @@ impl LzCoder {
|
|||||||
pub fn new() -> LzCoder {
|
pub fn new() -> LzCoder {
|
||||||
LzCoder {
|
LzCoder {
|
||||||
contexts: ContextState::new(1 + 255 + 1 + 64 + 64),
|
contexts: ContextState::new(1 + 255 + 1 + 64 + 64),
|
||||||
range_coder: RangeCoder::new(),
|
range_coder: RansCoder::new(),
|
||||||
last_offset: 0,
|
last_offset: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -68,13 +68,13 @@ impl LzCoder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn unpack(packed_data: &[u8]) -> Vec<u8> {
|
pub fn unpack(packed_data: &[u8]) -> Vec<u8> {
|
||||||
let mut decoder = RangeDecoder::new(packed_data);
|
let mut decoder = RansDecoder::new(packed_data);
|
||||||
let mut contexts = ContextState::new(1 + 255 + 1 + 64 + 64);
|
let mut contexts = ContextState::new(1 + 255 + 1 + 64 + 64);
|
||||||
let mut result = vec![];
|
let mut result = vec![];
|
||||||
let mut offset = 0;
|
let mut offset = 0;
|
||||||
|
|
||||||
fn decode_length(
|
fn decode_length(
|
||||||
decoder: &mut RangeDecoder,
|
decoder: &mut RansDecoder,
|
||||||
contexts: &mut ContextState,
|
contexts: &mut ContextState,
|
||||||
mut context_index: usize,
|
mut context_index: usize,
|
||||||
) -> usize {
|
) -> usize {
|
||||||
|
|||||||
45
src/main.rs
45
src/main.rs
@@ -1,10 +1,41 @@
|
|||||||
fn main() {
|
use std::{fs::File, path::PathBuf};
|
||||||
let test_data = include_bytes!("../README.md");
|
use std::io::prelude::*;
|
||||||
|
use anyhow::{bail, Result};
|
||||||
|
|
||||||
let packed = upkr::pack(test_data);
|
fn main() -> Result<()> {
|
||||||
dbg!((test_data.len(), packed.len()));
|
let mut args = pico_args::Arguments::from_env();
|
||||||
|
|
||||||
let unpacked = upkr::unpack(&packed);
|
match args.subcommand()?.as_ref().map(|s| s.as_str()) {
|
||||||
dbg!(unpacked.len());
|
None => print_help(),
|
||||||
assert!(test_data == unpacked.as_slice());
|
Some("pack") => {
|
||||||
|
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||||
|
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||||
|
|
||||||
|
let mut data = vec![];
|
||||||
|
File::open(infile)?.read_to_end(&mut data)?;
|
||||||
|
let packed_data = upkr::pack(&data);
|
||||||
|
File::create(outfile)?.write_all(&packed_data)?;
|
||||||
|
}
|
||||||
|
Some("unpack") => {
|
||||||
|
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||||
|
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||||
|
|
||||||
|
let mut data = vec![];
|
||||||
|
File::open(infile)?.read_to_end(&mut data)?;
|
||||||
|
let packed_data = upkr::unpack(&data);
|
||||||
|
File::create(outfile)?.write_all(&packed_data)?;
|
||||||
|
}
|
||||||
|
Some(other) => {
|
||||||
|
bail!("Unknown subcommand '{}'", other);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn print_help() {
|
||||||
|
eprintln!("Usage:");
|
||||||
|
eprintln!(" upkr pack <infile> <outfile>");
|
||||||
|
eprintln!(" upkr unpack <infile> <outfile>");
|
||||||
|
std::process::exit(1);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,111 +0,0 @@
|
|||||||
use crate::context_state::Context;
|
|
||||||
|
|
||||||
pub struct RangeCoder {
|
|
||||||
buffer: Vec<u8>,
|
|
||||||
low: u64,
|
|
||||||
range: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
const TOTAL: u32 = 65536;
|
|
||||||
|
|
||||||
impl RangeCoder {
|
|
||||||
pub fn new() -> RangeCoder {
|
|
||||||
RangeCoder {
|
|
||||||
buffer: vec![],
|
|
||||||
low: 0,
|
|
||||||
range: 1 << 40,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn encode_with_context(&mut self, bit: bool, context: &mut Context) {
|
|
||||||
self.encode_bit(bit, context.prob() as u32);
|
|
||||||
context.update(bit);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn encode_bit(&mut self, bit: bool, prob: u32) {
|
|
||||||
let (start, size) = if bit { (0, prob) } else { (prob, TOTAL - prob) };
|
|
||||||
self.range /= TOTAL as u64;
|
|
||||||
self.low += start as u64 * self.range;
|
|
||||||
self.range *= size as u64;
|
|
||||||
|
|
||||||
while (self.low >> 32) == (self.low + self.range - 1) >> 32 {
|
|
||||||
self.emit_byte();
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.range < 1 << 24 {
|
|
||||||
self.emit_byte();
|
|
||||||
self.emit_byte();
|
|
||||||
self.range = (1 << 40) - self.low;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn finish(mut self) -> Vec<u8> {
|
|
||||||
while self.range < 1 << 32 {
|
|
||||||
self.emit_byte();
|
|
||||||
}
|
|
||||||
self.low += 1 << 32;
|
|
||||||
self.emit_byte();
|
|
||||||
self.buffer
|
|
||||||
}
|
|
||||||
|
|
||||||
fn emit_byte(&mut self) {
|
|
||||||
self.buffer.push((self.low >> 32).try_into().unwrap());
|
|
||||||
self.low = (self.low & 0xffffffff) << 8;
|
|
||||||
self.range *= 256;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct RangeDecoder<'a> {
|
|
||||||
data: &'a [u8],
|
|
||||||
code: u64,
|
|
||||||
low: u64,
|
|
||||||
range: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> RangeDecoder<'a> {
|
|
||||||
pub fn new(data: &'a [u8]) -> RangeDecoder<'a> {
|
|
||||||
RangeDecoder {
|
|
||||||
data,
|
|
||||||
code: 0,
|
|
||||||
low: 0,
|
|
||||||
range: 1,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn decode_with_context(&mut self, context: &mut Context) -> bool {
|
|
||||||
let bit = self.decode_bit(context.prob() as u32);
|
|
||||||
context.update(bit);
|
|
||||||
bit
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn decode_bit(&mut self, prob: u32) -> bool {
|
|
||||||
while self.low >> 32 == (self.low + self.range - 1) >> 32 {
|
|
||||||
self.append_byte();
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.range < 1 << 24 {
|
|
||||||
self.append_byte();
|
|
||||||
self.append_byte();
|
|
||||||
self.range = (1 << 40) - self.low;
|
|
||||||
}
|
|
||||||
|
|
||||||
let bit = (self.code - self.low) / (self.range / TOTAL as u64) < prob as u64;
|
|
||||||
|
|
||||||
let (start, size) = if bit { (0, prob) } else { (prob, TOTAL - prob) };
|
|
||||||
self.range /= TOTAL as u64;
|
|
||||||
self.low += start as u64 * self.range;
|
|
||||||
self.range *= size as u64;
|
|
||||||
|
|
||||||
bit
|
|
||||||
}
|
|
||||||
|
|
||||||
fn append_byte(&mut self) {
|
|
||||||
self.code = (self.code & 0xffffffff) << 8;
|
|
||||||
if !self.data.is_empty() {
|
|
||||||
self.code |= self.data[0] as u64;
|
|
||||||
self.data = &self.data[1..];
|
|
||||||
}
|
|
||||||
self.low = (self.low & 0xffffffff) << 8;
|
|
||||||
self.range <<= 8;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
91
src/rans.rs
Normal file
91
src/rans.rs
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
use crate::context_state::Context;
|
||||||
|
|
||||||
|
const L_BITS: u32 = 16;
|
||||||
|
pub const PROB_BITS: u32 = 12;
|
||||||
|
pub const ONE_PROB: u32 = 1 << PROB_BITS;
|
||||||
|
|
||||||
|
pub struct RansCoder(Vec<u16>);
|
||||||
|
|
||||||
|
impl RansCoder {
|
||||||
|
pub fn new() -> RansCoder {
|
||||||
|
RansCoder(Vec::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn encode_with_context(&mut self, bit: bool, context: &mut Context) {
|
||||||
|
self.encode_bit(bit, context.prob());
|
||||||
|
context.update(bit);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn encode_bit(&mut self, bit: bool, prob: u16) {
|
||||||
|
assert!(prob < 32768);
|
||||||
|
self.0.push(prob | ((bit as u16) << 15));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn finish(self) -> Vec<u8> {
|
||||||
|
let mut buffer = vec![];
|
||||||
|
let mut state = 1 << L_BITS;
|
||||||
|
|
||||||
|
const MAX_STATE_FACTOR: u32 = 1 << (L_BITS + 8 - PROB_BITS);
|
||||||
|
for step in self.0.into_iter().rev() {
|
||||||
|
let prob = step as u32 & 32767;
|
||||||
|
let (start, prob) = if step & 32768 != 0 {
|
||||||
|
(0, prob)
|
||||||
|
} else {
|
||||||
|
(prob, ONE_PROB - prob)
|
||||||
|
};
|
||||||
|
let max_state = MAX_STATE_FACTOR * prob;
|
||||||
|
while state >= max_state {
|
||||||
|
buffer.push(state as u8);
|
||||||
|
state >>= 8;
|
||||||
|
}
|
||||||
|
state = ((state / prob) << PROB_BITS) + (state % prob) + start;
|
||||||
|
}
|
||||||
|
|
||||||
|
while state > 0 {
|
||||||
|
buffer.push(state as u8);
|
||||||
|
state >>= 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer.reverse();
|
||||||
|
buffer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct RansDecoder<'a> {
|
||||||
|
data: &'a [u8],
|
||||||
|
state: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
const PROB_MASK: u32 = ONE_PROB - 1;
|
||||||
|
const L: u32 = 1 << L_BITS;
|
||||||
|
|
||||||
|
impl<'a> RansDecoder<'a> {
|
||||||
|
pub fn new(data: &'a [u8]) -> RansDecoder<'a> {
|
||||||
|
RansDecoder { data, state: 0 }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn decode_with_context(&mut self, context: &mut Context) -> bool {
|
||||||
|
let bit = self.decode_bit(context.prob());
|
||||||
|
context.update(bit);
|
||||||
|
bit
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn decode_bit(&mut self, prob: u16) -> bool {
|
||||||
|
let prob = prob as u32;
|
||||||
|
while self.state < L {
|
||||||
|
self.state = (self.state << 8) | self.data[0] as u32;
|
||||||
|
self.data = &self.data[1..];
|
||||||
|
}
|
||||||
|
|
||||||
|
let bit = (self.state & PROB_MASK) < prob;
|
||||||
|
|
||||||
|
let (start, prob) = if bit {
|
||||||
|
(0, prob)
|
||||||
|
} else {
|
||||||
|
(prob, ONE_PROB - prob)
|
||||||
|
};
|
||||||
|
self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start;
|
||||||
|
|
||||||
|
bit
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user