7 Commits

10 changed files with 193 additions and 42 deletions

5
c_unpacker/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
unpack
unpack_bitstream
unpack_debug
*.upk

10
c_unpacker/Makefile Normal file
View File

@@ -0,0 +1,10 @@
all: unpack unpack_bitstream
unpack: main.c unpack.c
cc -O2 -o unpack main.c unpack.c
unpack_bitstream: main.c unpack.c
cc -O2 -D UPKR_BITSTREAM -o unpack_bitstream main.c unpack.c
unpack_debug: main.c unpack.c
cc -g -o unpack_debug main.c unpack.c

25
c_unpacker/main.c Normal file
View File

@@ -0,0 +1,25 @@
#include <stdio.h>
#include <stdlib.h>
int upkr_unpack(void* destination, void* compressed_data);
int main(int argn, char** argv) {
void* input_buffer = malloc(1024*1024);
void* output_buffer = malloc(1024*1024);
FILE* in_file = fopen(argv[1], "rb");
int in_size = fread(input_buffer, 1, 1024*1024, in_file);
fclose(in_file);
printf("Compressed size: %d\n", in_size);
int out_size = upkr_unpack(output_buffer, input_buffer);
printf("Uncompressed size: %d\n", out_size);
FILE* out_file = fopen(argv[2], "wb");
fwrite(output_buffer, 1, out_size, out_file);
fclose(out_file);
return 0;
}

4
c_unpacker/readme.txt Normal file
View File

@@ -0,0 +1,4 @@
a very simple unpacker in c, as a reference for people wanting to implement their own unpacker.
absolutely not production ready, it makes no effort to ensure the output buffer can actually
hold the uncompressed data.
!!! Never run on untrusted input !!!

98
c_unpacker/unpack.c Normal file
View File

@@ -0,0 +1,98 @@
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned long u32;
u8* upkr_data_ptr;
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
#ifdef UPKR_BITSTREAM
u16 upkr_state;
u8 upkr_current_byte;
int upkr_bits_left;
#else
u32 upkr_state;
#endif
int upkr_decode_bit(int context_index) {
#ifdef UPKR_BITSTREAM
while(upkr_state < 32768) {
if(upkr_bits_left == 0) {
upkr_current_byte = *upkr_data_ptr++;
upkr_bits_left = 8;
}
upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
upkr_current_byte <<= 1;
--upkr_bits_left;
}
#else
while(upkr_state < 4096) {
upkr_state = (upkr_state << 8) | *upkr_data_ptr++;
}
#endif
int prob = upkr_probs[context_index];
int bit = (upkr_state & 255) >= prob ? 1 : 0;
int prob_offset = 16;
int state_offset = 0;
int state_scale = prob;
if(bit) {
state_offset = -prob;
state_scale = 256 - prob;
prob_offset = 0;
}
upkr_state = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255);
upkr_probs[context_index] = prob_offset + prob - ((prob + 8) >> 4);
return bit;
}
int upkr_decode_length(int context_index) {
int length = 0;
int bit_pos = 0;
while(upkr_decode_bit(context_index)) {
length |= upkr_decode_bit(context_index + 1) << bit_pos++;
context_index += 2;
}
return length | (1 << bit_pos);
}
int upkr_unpack(void* destination, void* compressed_data) {
upkr_data_ptr = (u8*)compressed_data;
upkr_state = 0;
#ifdef UPKR_BITSTREAM
upkr_bits_left = 0;
#endif
for(int i = 0; i < sizeof(upkr_probs); ++i)
upkr_probs[i] = 128;
u8* write_ptr = (u8*)destination;
int prev_was_match = 0;
int offset = 0;
for(;;) {
if(upkr_decode_bit(0)) {
if(prev_was_match || upkr_decode_bit(256)) {
offset = upkr_decode_length(257) - 1;
if(offset == 0) {
break;
}
}
int length = upkr_decode_length(257 + 64);
while(length--) {
*write_ptr = write_ptr[-offset];
++write_ptr;
}
prev_was_match = 1;
} else {
int byte = 1;
while(byte < 256) {
int bit = upkr_decode_bit(byte);
byte = (byte << 1) + bit;
}
*write_ptr++ = byte;
prev_was_match = 0;
}
}
return write_ptr - (u8*)destination;
}

View File

@@ -1,8 +1,8 @@
use crate::rans::{PROB_BITS, ONE_PROB};
use crate::rans::{ONE_PROB, PROB_BITS};
const INIT_PROB: u16 = 1 << (PROB_BITS - 1);
const UPDATE_RATE: u32 = 4;
const UPDATE_ADD: u32 = 8;
const UPDATE_RATE: i32 = 4;
const UPDATE_ADD: i32 = 8;
#[derive(Clone)]
pub struct ContextState {
@@ -33,10 +33,13 @@ impl<'a> Context<'a> {
pub fn update(&mut self, bit: bool) {
let old = self.state.contexts[self.index];
self.state.contexts[self.index] = if bit {
old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
let offset = if !bit {
ONE_PROB as i32 >> UPDATE_RATE
} else {
old - ((old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
0
};
self.state.contexts[self.index] =
(offset + old as i32 - ((old as i32 + UPDATE_ADD) >> UPDATE_RATE)) as u8;
}
}

View File

@@ -21,3 +21,12 @@ pub fn pack(
parsing_packer::pack(data, level, use_bitstream, progress_callback)
}
}
pub fn compressed_size(mut data: &[u8]) -> f32 {
let mut state = 0;
while state < 4096 {
state = (state << 8) | data[0] as u32;
data = &data[1..];
}
data.len() as f32 + (state as f32).log2() / 8.
}

View File

@@ -9,32 +9,29 @@ pub enum Op {
impl Op {
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState) {
let base_context = 256 * (state.pos & 3);
match self {
&Op::Literal(lit) => {
encode_bit(coder, state, base_context, false);
encode_bit(coder, state, 0, false);
let mut context_index = 1;
for i in (0..8).rev() {
let bit = (lit >> i) & 1 != 0;
encode_bit(coder, state, base_context + context_index, bit);
encode_bit(coder, state, context_index, bit);
context_index = (context_index << 1) | bit as usize;
}
state.pos += 1;
state.prev_was_match = false;
}
&Op::Match { offset, len } => {
encode_bit(coder, state, base_context, true);
encode_bit(coder, state, 0, true);
if !state.prev_was_match {
encode_bit(coder, state, 1024, offset != state.last_offset);
encode_bit(coder, state, 256, offset != state.last_offset);
} else {
assert!(offset != state.last_offset);
}
if offset != state.last_offset {
encode_length(coder, state, 1025, offset + 1);
encode_length(coder, state, 257, offset + 1);
state.last_offset = offset;
}
encode_length(coder, state, 1025 + 64, len);
state.pos += len as usize;
encode_length(coder, state, 257 + 64, len);
state.prev_was_match = true;
}
}
@@ -42,11 +39,11 @@ impl Op {
}
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState) {
encode_bit(coder, state, 256 * (state.pos & 3), true);
encode_bit(coder, state, 0, true);
if !state.prev_was_match {
encode_bit(coder, state, 1024, true);
encode_bit(coder, state, 256, true);
}
encode_length(coder, state, 1025, 1);
encode_length(coder, state, 257, 1);
}
fn encode_bit(
@@ -80,16 +77,14 @@ fn encode_length(
pub struct CoderState {
contexts: ContextState,
last_offset: u32,
pos: usize,
prev_was_match: bool,
}
impl CoderState {
pub fn new() -> CoderState {
CoderState {
contexts: ContextState::new((1 + 255) * 4 + 1 + 64 + 64),
contexts: ContextState::new(1 + 255 + 1 + 64 + 64),
last_offset: 0,
pos: 0,
prev_was_match: false,
}
}
@@ -101,7 +96,7 @@ impl CoderState {
pub fn unpack(packed_data: &[u8], use_bitstream: bool) -> Vec<u8> {
let mut decoder = RansDecoder::new(packed_data, use_bitstream);
let mut contexts = ContextState::new((1 + 255) * 4 + 1 + 64 + 64);
let mut contexts = ContextState::new(1 + 255 + 1 + 64 + 64);
let mut result = vec![];
let mut offset = 0;
let mut prev_was_match = false;
@@ -124,15 +119,14 @@ pub fn unpack(packed_data: &[u8], use_bitstream: bool) -> Vec<u8> {
}
loop {
let base_context = 256 * (result.len() & 3);
if decoder.decode_with_context(&mut contexts.context_mut(base_context)) {
if prev_was_match || decoder.decode_with_context(&mut contexts.context_mut(1024)) {
offset = decode_length(&mut decoder, &mut contexts, 1025) - 1;
if decoder.decode_with_context(&mut contexts.context_mut(0)) {
if prev_was_match || decoder.decode_with_context(&mut contexts.context_mut(256)) {
offset = decode_length(&mut decoder, &mut contexts, 257) - 1;
if offset == 0 {
break;
}
}
let length = decode_length(&mut decoder, &mut contexts, 1025 + 64);
let length = decode_length(&mut decoder, &mut contexts, 257 + 64);
for _ in 0..length {
result.push(result[result.len() - offset]);
}
@@ -141,7 +135,7 @@ pub fn unpack(packed_data: &[u8], use_bitstream: bool) -> Vec<u8> {
let mut context_index = 1;
let mut byte = 0;
for i in (0..8).rev() {
let bit = decoder.decode_with_context(&mut contexts.context_mut(base_context + context_index));
let bit = decoder.decode_with_context(&mut contexts.context_mut(context_index));
context_index = (context_index << 1) | bit as usize;
byte |= (bit as u8) << i;
}

View File

@@ -58,7 +58,10 @@ fn main() -> Result<()> {
fn print_help() {
eprintln!("Usage:");
eprintln!(" upkr pack [-l level(0-9)] <infile> <outfile>");
eprintln!(" upkr unpack <infile> <outfile>");
eprintln!(" upkr pack [-b] [-l level(0-9)] <infile> <outfile>");
eprintln!(" upkr unpack [-b] <infile> <outfile>");
eprintln!();
eprintln!(" -b, --bitstream bitstream mode");
eprintln!(" -l, --level N compression level 0-9");
std::process::exit(1);
}

View File

@@ -38,15 +38,15 @@ impl RansCoder {
let mut state = 1 << l_bits;
let mut byte = 0u8;
let mut bit = 8;
let mut bit = 0;
let mut flush_state: Box<dyn FnMut(&mut u32)> = if self.use_bitstream {
Box::new(|state: &mut u32| {
bit -= 1;
byte |= ((*state & 1) as u8) << bit;
if bit == 0 {
bit += 1;
if bit == 8 {
buffer.push(byte);
byte = 0;
bit = 8;
bit = 0;
}
*state >>= 1;
})
@@ -61,7 +61,7 @@ impl RansCoder {
let max_state_factor: u32 = 1 << (l_bits + num_flush_bits - PROB_BITS);
for step in self.bits.into_iter().rev() {
let prob = step as u32 & 32767;
let (start, prob) = if step & 32768 != 0 {
let (start, prob) = if step & 32768 == 0 {
(0, prob)
} else {
(prob, ONE_PROB - prob)
@@ -118,7 +118,7 @@ impl CostCounter {
impl EntropyCoder for CostCounter {
fn encode_bit(&mut self, bit: bool, prob: u16) {
let prob = if bit {
let prob = if !bit {
prob as u32
} else {
ONE_PROB - prob as u32
@@ -163,8 +163,8 @@ impl<'a> RansDecoder<'a> {
self.data = &self.data[1..];
self.bits_left = 8;
}
self.state = (self.state << 1) | (self.byte & 1) as u32;
self.byte >>= 1;
self.state = (self.state << 1) | (self.byte >> 7) as u32;
self.byte <<= 1;
self.bits_left -= 1;
}
} else {
@@ -174,12 +174,12 @@ impl<'a> RansDecoder<'a> {
}
}
let bit = (self.state & PROB_MASK) < prob;
let bit = (self.state & PROB_MASK) >= prob;
let (start, prob) = if bit {
(0, prob)
} else {
(prob, ONE_PROB - prob)
} else {
(0, prob)
};
self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start;