change entropy coder from Range to rANS

This commit is contained in:
2021-11-22 23:14:24 +01:00
parent 479a2e2c89
commit 470b778340
8 changed files with 155 additions and 126 deletions

View File

@@ -1,4 +1,6 @@
const INIT_PROB: u16 = 0x8000;
use crate::rans::{PROB_BITS, ONE_PROB};
const INIT_PROB: u16 = 1 << (PROB_BITS - 1);
const UPDATE_RATE: u32 = 4;
#[derive(Clone)]
@@ -31,7 +33,7 @@ impl<'a> Context<'a> {
pub fn update(&mut self, bit: bool) {
let old = self.state.contexts[self.index];
self.state.contexts[self.index] = if bit {
old + (((1 << 16) - old as u32) >> UPDATE_RATE) as u16
old + ((ONE_PROB - old as u32) >> UPDATE_RATE) as u16
} else {
old - (old >> UPDATE_RATE)
};

View File

@@ -2,7 +2,7 @@ mod context_state;
mod greedy_packer;
mod lz;
mod match_finder;
mod range_coder;
mod rans;
pub use greedy_packer::pack;
pub use lz::unpack;

View File

@@ -1,9 +1,9 @@
use crate::context_state::ContextState;
use crate::range_coder::{RangeCoder, RangeDecoder};
use crate::rans::{RansCoder, RansDecoder};
pub struct LzCoder {
contexts: ContextState,
range_coder: RangeCoder,
range_coder: RansCoder,
last_offset: usize,
}
@@ -11,7 +11,7 @@ impl LzCoder {
pub fn new() -> LzCoder {
LzCoder {
contexts: ContextState::new(1 + 255 + 1 + 64 + 64),
range_coder: RangeCoder::new(),
range_coder: RansCoder::new(),
last_offset: 0,
}
}
@@ -68,13 +68,13 @@ impl LzCoder {
}
pub fn unpack(packed_data: &[u8]) -> Vec<u8> {
let mut decoder = RangeDecoder::new(packed_data);
let mut decoder = RansDecoder::new(packed_data);
let mut contexts = ContextState::new(1 + 255 + 1 + 64 + 64);
let mut result = vec![];
let mut offset = 0;
fn decode_length(
decoder: &mut RangeDecoder,
decoder: &mut RansDecoder,
contexts: &mut ContextState,
mut context_index: usize,
) -> usize {

View File

@@ -1,10 +1,41 @@
fn main() {
let test_data = include_bytes!("../README.md");
use std::{fs::File, path::PathBuf};
use std::io::prelude::*;
use anyhow::{bail, Result};
let packed = upkr::pack(test_data);
dbg!((test_data.len(), packed.len()));
fn main() -> Result<()> {
let mut args = pico_args::Arguments::from_env();
let unpacked = upkr::unpack(&packed);
dbg!(unpacked.len());
assert!(test_data == unpacked.as_slice());
match args.subcommand()?.as_ref().map(|s| s.as_str()) {
None => print_help(),
Some("pack") => {
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?;
let packed_data = upkr::pack(&data);
File::create(outfile)?.write_all(&packed_data)?;
}
Some("unpack") => {
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?;
let packed_data = upkr::unpack(&data);
File::create(outfile)?.write_all(&packed_data)?;
}
Some(other) => {
bail!("Unknown subcommand '{}'", other);
}
}
Ok(())
}
fn print_help() {
eprintln!("Usage:");
eprintln!(" upkr pack <infile> <outfile>");
eprintln!(" upkr unpack <infile> <outfile>");
std::process::exit(1);
}

View File

@@ -1,111 +0,0 @@
use crate::context_state::Context;
pub struct RangeCoder {
buffer: Vec<u8>,
low: u64,
range: u64,
}
const TOTAL: u32 = 65536;
impl RangeCoder {
pub fn new() -> RangeCoder {
RangeCoder {
buffer: vec![],
low: 0,
range: 1 << 40,
}
}
pub fn encode_with_context(&mut self, bit: bool, context: &mut Context) {
self.encode_bit(bit, context.prob() as u32);
context.update(bit);
}
pub fn encode_bit(&mut self, bit: bool, prob: u32) {
let (start, size) = if bit { (0, prob) } else { (prob, TOTAL - prob) };
self.range /= TOTAL as u64;
self.low += start as u64 * self.range;
self.range *= size as u64;
while (self.low >> 32) == (self.low + self.range - 1) >> 32 {
self.emit_byte();
}
if self.range < 1 << 24 {
self.emit_byte();
self.emit_byte();
self.range = (1 << 40) - self.low;
}
}
pub fn finish(mut self) -> Vec<u8> {
while self.range < 1 << 32 {
self.emit_byte();
}
self.low += 1 << 32;
self.emit_byte();
self.buffer
}
fn emit_byte(&mut self) {
self.buffer.push((self.low >> 32).try_into().unwrap());
self.low = (self.low & 0xffffffff) << 8;
self.range *= 256;
}
}
pub struct RangeDecoder<'a> {
data: &'a [u8],
code: u64,
low: u64,
range: u64,
}
impl<'a> RangeDecoder<'a> {
pub fn new(data: &'a [u8]) -> RangeDecoder<'a> {
RangeDecoder {
data,
code: 0,
low: 0,
range: 1,
}
}
pub fn decode_with_context(&mut self, context: &mut Context) -> bool {
let bit = self.decode_bit(context.prob() as u32);
context.update(bit);
bit
}
pub fn decode_bit(&mut self, prob: u32) -> bool {
while self.low >> 32 == (self.low + self.range - 1) >> 32 {
self.append_byte();
}
if self.range < 1 << 24 {
self.append_byte();
self.append_byte();
self.range = (1 << 40) - self.low;
}
let bit = (self.code - self.low) / (self.range / TOTAL as u64) < prob as u64;
let (start, size) = if bit { (0, prob) } else { (prob, TOTAL - prob) };
self.range /= TOTAL as u64;
self.low += start as u64 * self.range;
self.range *= size as u64;
bit
}
fn append_byte(&mut self) {
self.code = (self.code & 0xffffffff) << 8;
if !self.data.is_empty() {
self.code |= self.data[0] as u64;
self.data = &self.data[1..];
}
self.low = (self.low & 0xffffffff) << 8;
self.range <<= 8;
}
}

91
src/rans.rs Normal file
View File

@@ -0,0 +1,91 @@
use crate::context_state::Context;
const L_BITS: u32 = 16;
pub const PROB_BITS: u32 = 12;
pub const ONE_PROB: u32 = 1 << PROB_BITS;
pub struct RansCoder(Vec<u16>);
impl RansCoder {
pub fn new() -> RansCoder {
RansCoder(Vec::new())
}
pub fn encode_with_context(&mut self, bit: bool, context: &mut Context) {
self.encode_bit(bit, context.prob());
context.update(bit);
}
pub fn encode_bit(&mut self, bit: bool, prob: u16) {
assert!(prob < 32768);
self.0.push(prob | ((bit as u16) << 15));
}
pub fn finish(self) -> Vec<u8> {
let mut buffer = vec![];
let mut state = 1 << L_BITS;
const MAX_STATE_FACTOR: u32 = 1 << (L_BITS + 8 - PROB_BITS);
for step in self.0.into_iter().rev() {
let prob = step as u32 & 32767;
let (start, prob) = if step & 32768 != 0 {
(0, prob)
} else {
(prob, ONE_PROB - prob)
};
let max_state = MAX_STATE_FACTOR * prob;
while state >= max_state {
buffer.push(state as u8);
state >>= 8;
}
state = ((state / prob) << PROB_BITS) + (state % prob) + start;
}
while state > 0 {
buffer.push(state as u8);
state >>= 8;
}
buffer.reverse();
buffer
}
}
pub struct RansDecoder<'a> {
data: &'a [u8],
state: u32,
}
const PROB_MASK: u32 = ONE_PROB - 1;
const L: u32 = 1 << L_BITS;
impl<'a> RansDecoder<'a> {
pub fn new(data: &'a [u8]) -> RansDecoder<'a> {
RansDecoder { data, state: 0 }
}
pub fn decode_with_context(&mut self, context: &mut Context) -> bool {
let bit = self.decode_bit(context.prob());
context.update(bit);
bit
}
pub fn decode_bit(&mut self, prob: u16) -> bool {
let prob = prob as u32;
while self.state < L {
self.state = (self.state << 8) | self.data[0] as u32;
self.data = &self.data[1..];
}
let bit = (self.state & PROB_MASK) < prob;
let (start, prob) = if bit {
(0, prob)
} else {
(prob, ONE_PROB - prob)
};
self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start;
bit
}
}