7 Commits

12 changed files with 253 additions and 38 deletions

5
c_unpacker/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
unpack
unpack_bitstream
unpack_debug
*.upk

10
c_unpacker/Makefile Normal file
View File

@@ -0,0 +1,10 @@
all: unpack unpack_bitstream
unpack: main.c unpack.c
cc -O2 -o unpack main.c unpack.c
unpack_bitstream: main.c unpack.c
cc -O2 -D UPKR_BITSTREAM -o unpack_bitstream main.c unpack.c
unpack_debug: main.c unpack.c
cc -g -o unpack_debug main.c unpack.c

25
c_unpacker/main.c Normal file
View File

@@ -0,0 +1,25 @@
#include <stdio.h>
#include <stdlib.h>
int upkr_unpack(void* destination, void* compressed_data);
int main(int argn, char** argv) {
void* input_buffer = malloc(1024*1024);
void* output_buffer = malloc(1024*1024);
FILE* in_file = fopen(argv[1], "rb");
int in_size = fread(input_buffer, 1, 1024*1024, in_file);
fclose(in_file);
printf("Compressed size: %d\n", in_size);
int out_size = upkr_unpack(output_buffer, input_buffer);
printf("Uncompressed size: %d\n", out_size);
FILE* out_file = fopen(argv[2], "wb");
fwrite(output_buffer, 1, out_size, out_file);
fclose(out_file);
return 0;
}

4
c_unpacker/readme.txt Normal file
View File

@@ -0,0 +1,4 @@
a very simple unpacker in c, as a reference for people wanting to implement their own unpacker.
absolutely not production ready, it makes no effort to ensure the output buffer can actually
hold the uncompressed data.
!!! Never run on untrusted input !!!

95
c_unpacker/unpack.c Normal file
View File

@@ -0,0 +1,95 @@
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned long u32;
u8* upkr_data_ptr;
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
#ifdef UPKR_BITSTREAM
u16 upkr_state;
u8 upkr_current_byte;
int upkr_bits_left;
#else
u32 upkr_state;
#endif
int upkr_decode_bit(int context_index) {
#ifdef UPKR_BITSTREAM
while(upkr_state < 32768) {
if(upkr_bits_left == 0) {
upkr_current_byte = *upkr_data_ptr++;
upkr_bits_left = 8;
}
upkr_state = (upkr_state << 1) + (upkr_current_byte & 1);
upkr_current_byte >>= 1;
--upkr_bits_left;
}
#else
while(upkr_state < 4096) {
upkr_state = (upkr_state << 8) | *upkr_data_ptr++;
}
#endif
int prob = upkr_probs[context_index];
int bit = (upkr_state & 255) < prob ? 1 : 0;
if(bit) {
upkr_state = prob * (upkr_state >> 8) + (upkr_state & 255);
upkr_probs[context_index] = prob + ((256 - prob + 8) >> 4);
} else {
upkr_state = (256 - prob) * (upkr_state >> 8) + (upkr_state & 255) - prob;
upkr_probs[context_index] = prob - ((prob + 8) >> 4);
}
return bit;
}
int upkr_decode_length(int context_index) {
int length = 0;
int bit_pos = 0;
while(upkr_decode_bit(context_index)) {
length |= upkr_decode_bit(context_index + 1) << bit_pos++;
context_index += 2;
}
return length | (1 << bit_pos);
}
int upkr_unpack(void* destination, void* compressed_data) {
upkr_data_ptr = (u8*)compressed_data;
upkr_state = 0;
#ifdef UPKR_BITSTREAM
upkr_bits_left = 0;
#endif
for(int i = 0; i < sizeof(upkr_probs); ++i)
upkr_probs[i] = 128;
u8* write_ptr = (u8*)destination;
int prev_was_match = 0;
int offset = 0;
for(;;) {
if(upkr_decode_bit(0)) {
if(prev_was_match || upkr_decode_bit(256)) {
offset = upkr_decode_length(257) - 1;
if(offset == 0) {
break;
}
}
int length = upkr_decode_length(257 + 64);
while(length--) {
*write_ptr = write_ptr[-offset];
++write_ptr;
}
prev_was_match = 1;
} else {
int byte = 1;
while(byte < 256) {
int bit = upkr_decode_bit(byte);
byte = (byte << 1) + bit;
}
*write_ptr++ = byte;
prev_was_match = 0;
}
}
return write_ptr - (u8*)destination;
}

View File

@@ -6,7 +6,7 @@ const UPDATE_ADD: u32 = 8;
#[derive(Clone)]
pub struct ContextState {
contexts: Vec<u16>,
contexts: Vec<u8>,
}
pub struct Context<'a> {
@@ -17,7 +17,7 @@ pub struct Context<'a> {
impl ContextState {
pub fn new(size: usize) -> ContextState {
ContextState {
contexts: vec![INIT_PROB; size],
contexts: vec![INIT_PROB as u8; size],
}
}
@@ -28,15 +28,15 @@ impl ContextState {
impl<'a> Context<'a> {
pub fn prob(&self) -> u16 {
self.state.contexts[self.index]
self.state.contexts[self.index] as u16
}
pub fn update(&mut self, bit: bool) {
let old = self.state.contexts[self.index];
self.state.contexts[self.index] = if bit {
old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u16
old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
} else {
old - ((old + UPDATE_ADD as u16) >> UPDATE_RATE)
old - ((old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
};
}
}

View File

@@ -3,9 +3,13 @@ use crate::match_finder::MatchFinder;
use crate::rans::RansCoder;
use crate::ProgressCallback;
pub fn pack(data: &[u8], mut progress_callback: Option<ProgressCallback>) -> Vec<u8> {
pub fn pack(
data: &[u8],
use_bitstream: bool,
mut progress_callback: Option<ProgressCallback>,
) -> Vec<u8> {
let mut match_finder = MatchFinder::new(data);
let mut rans_coder = RansCoder::new();
let mut rans_coder = RansCoder::new(use_bitstream);
let mut state = lz::CoderState::new();
let mut pos = 0;

View File

@@ -2,17 +2,31 @@ mod context_state;
mod greedy_packer;
mod lz;
mod match_finder;
mod rans;
mod parsing_packer;
mod rans;
pub use lz::unpack;
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
pub fn pack(data: &[u8], level: u8, progress_callback: Option<ProgressCallback>) -> Vec<u8> {
pub fn pack(
data: &[u8],
level: u8,
use_bitstream: bool,
progress_callback: Option<ProgressCallback>,
) -> Vec<u8> {
if level == 0 {
greedy_packer::pack(data, progress_callback)
greedy_packer::pack(data, use_bitstream, progress_callback)
} else {
parsing_packer::pack(data, level, progress_callback)
parsing_packer::pack(data, level, use_bitstream, progress_callback)
}
}
pub fn compressed_size(mut data: &[u8]) -> f32 {
let mut state = 0;
while state < 4096 {
state = (state << 8) | data[0] as u32;
data = &data[1..];
}
data.len() as f32 + (state as f32).log2() / 8.
}

View File

@@ -40,7 +40,9 @@ impl Op {
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState) {
encode_bit(coder, state, 0, true);
encode_bit(coder, state, 256, true);
if !state.prev_was_match {
encode_bit(coder, state, 256, true);
}
encode_length(coder, state, 257, 1);
}
@@ -75,7 +77,7 @@ fn encode_length(
pub struct CoderState {
contexts: ContextState,
last_offset: u32,
prev_was_match: bool
prev_was_match: bool,
}
impl CoderState {
@@ -83,7 +85,7 @@ impl CoderState {
CoderState {
contexts: ContextState::new(1 + 255 + 1 + 64 + 64),
last_offset: 0,
prev_was_match: false
prev_was_match: false,
}
}
@@ -92,8 +94,8 @@ impl CoderState {
}
}
pub fn unpack(packed_data: &[u8]) -> Vec<u8> {
let mut decoder = RansDecoder::new(packed_data);
pub fn unpack(packed_data: &[u8], use_bitstream: bool) -> Vec<u8> {
let mut decoder = RansDecoder::new(packed_data, use_bitstream);
let mut contexts = ContextState::new(1 + 255 + 1 + 64 + 64);
let mut result = vec![];
let mut offset = 0;

View File

@@ -9,6 +9,7 @@ fn main() -> Result<()> {
None => print_help(),
Some("pack") => {
let level = args.opt_value_from_str(["-l", "--level"])?.unwrap_or(2u8);
let use_bitstream = args.contains(["-b", "--bitstream"]);
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
@@ -21,6 +22,7 @@ fn main() -> Result<()> {
let packed_data = upkr::pack(
&data,
level,
use_bitstream,
Some(&mut |pos| {
pb.set(pos as u64);
}),
@@ -36,12 +38,14 @@ fn main() -> Result<()> {
File::create(outfile)?.write_all(&packed_data)?;
}
Some("unpack") => {
let use_bitstream = args.contains(["-b", "--bitstream"]);
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?;
let packed_data = upkr::unpack(&data);
let packed_data = upkr::unpack(&data, use_bitstream);
File::create(outfile)?.write_all(&packed_data)?;
}
Some(other) => {

View File

@@ -6,7 +6,7 @@ use crate::match_finder::MatchFinder;
use crate::rans::{CostCounter, RansCoder};
use crate::{lz, ProgressCallback};
pub fn pack(data: &[u8], level: u8, progress_cb: Option<ProgressCallback>) -> Vec<u8> {
pub fn pack(data: &[u8], level: u8, use_bitstream: bool, progress_cb: Option<ProgressCallback>) -> Vec<u8> {
let mut parse = parse(data, Config::from_level(level), progress_cb);
let mut ops = vec![];
while let Some(link) = parse {
@@ -14,7 +14,7 @@ pub fn pack(data: &[u8], level: u8, progress_cb: Option<ProgressCallback>) -> Ve
parse = link.prev.clone();
}
let mut state = lz::CoderState::new();
let mut coder = RansCoder::new();
let mut coder = RansCoder::new(use_bitstream);
for op in ops.into_iter().rev() {
op.encode(&mut coder, &mut state);
}

View File

@@ -1,6 +1,5 @@
use crate::context_state::Context;
const L_BITS: u32 = 12;
pub const PROB_BITS: u32 = 8;
pub const ONE_PROB: u32 = 1 << PROB_BITS;
@@ -13,43 +12,75 @@ pub trait EntropyCoder {
}
}
pub struct RansCoder(Vec<u16>);
pub struct RansCoder {
bits: Vec<u16>,
use_bitstream: bool,
}
impl EntropyCoder for RansCoder {
fn encode_bit(&mut self, bit: bool, prob: u16) {
assert!(prob < 32768);
self.0.push(prob | ((bit as u16) << 15));
self.bits.push(prob | ((bit as u16) << 15));
}
}
impl RansCoder {
pub fn new() -> RansCoder {
RansCoder(Vec::new())
pub fn new(use_bitstream: bool) -> RansCoder {
RansCoder {
bits: Vec::new(),
use_bitstream,
}
}
pub fn finish(self) -> Vec<u8> {
let mut buffer = vec![];
let mut state = 1 << L_BITS;
let l_bits: u32 = if self.use_bitstream { 15 } else { 12 };
let mut state = 1 << l_bits;
const MAX_STATE_FACTOR: u32 = 1 << (L_BITS + 8 - PROB_BITS);
for step in self.0.into_iter().rev() {
let mut byte = 0u8;
let mut bit = 8;
let mut flush_state: Box<dyn FnMut(&mut u32)> = if self.use_bitstream {
Box::new(|state: &mut u32| {
bit -= 1;
byte |= ((*state & 1) as u8) << bit;
if bit == 0 {
buffer.push(byte);
byte = 0;
bit = 8;
}
*state >>= 1;
})
} else {
Box::new(|state: &mut u32| {
buffer.push(*state as u8);
*state >>= 8;
})
};
let num_flush_bits = if self.use_bitstream { 1 } else { 8 };
let max_state_factor: u32 = 1 << (l_bits + num_flush_bits - PROB_BITS);
for step in self.bits.into_iter().rev() {
let prob = step as u32 & 32767;
let (start, prob) = if step & 32768 != 0 {
(0, prob)
} else {
(prob, ONE_PROB - prob)
};
let max_state = MAX_STATE_FACTOR * prob;
let max_state = max_state_factor * prob;
while state >= max_state {
buffer.push(state as u8);
state >>= 8;
flush_state(&mut state);
}
state = ((state / prob) << PROB_BITS) + (state % prob) + start;
}
while state > 0 {
buffer.push(state as u8);
state >>= 8;
flush_state(&mut state);
}
drop(flush_state);
if self.use_bitstream && byte != 0 {
buffer.push(byte);
}
buffer.reverse();
@@ -99,14 +130,22 @@ impl EntropyCoder for CostCounter {
pub struct RansDecoder<'a> {
data: &'a [u8],
state: u32,
use_bitstream: bool,
byte: u8,
bits_left: u8,
}
const PROB_MASK: u32 = ONE_PROB - 1;
const L: u32 = 1 << L_BITS;
impl<'a> RansDecoder<'a> {
pub fn new(data: &'a [u8]) -> RansDecoder<'a> {
RansDecoder { data, state: 0 }
pub fn new(data: &'a [u8], use_bitstream: bool) -> RansDecoder<'a> {
RansDecoder {
data,
state: 0,
use_bitstream,
byte: 0,
bits_left: 0,
}
}
pub fn decode_with_context(&mut self, context: &mut Context) -> bool {
@@ -117,9 +156,22 @@ impl<'a> RansDecoder<'a> {
pub fn decode_bit(&mut self, prob: u16) -> bool {
let prob = prob as u32;
while self.state < L {
self.state = (self.state << 8) | self.data[0] as u32;
self.data = &self.data[1..];
if self.use_bitstream {
while self.state < 32768 {
if self.bits_left == 0 {
self.byte = self.data[0];
self.data = &self.data[1..];
self.bits_left = 8;
}
self.state = (self.state << 1) | (self.byte & 1) as u32;
self.byte >>= 1;
self.bits_left -= 1;
}
} else {
while self.state < 4096 {
self.state = (self.state << 8) | self.data[0] as u32;
self.data = &self.data[1..];
}
}
let bit = (self.state & PROB_MASK) < prob;