3 Commits

Author SHA1 Message Date
4903ac3786 c unpacker now works 2022-09-09 00:47:33 +02:00
f817dc9254 first try of c decompressor, not working yet 2022-09-08 23:42:03 +02:00
d93aec186c add compressed_size function 2022-06-19 23:08:47 +02:00
7 changed files with 164 additions and 22 deletions

5
c_unpacker/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
unpack
unpack_bitstream
unpack_debug
*.upk

10
c_unpacker/Makefile Normal file
View File

@@ -0,0 +1,10 @@
all: unpack unpack_bitstream
unpack: main.c unpack.c
cc -O2 -o unpack main.c unpack.c
unpack_bitstream: main.c unpack.c
cc -O2 -D UPKR_BITSTREAM -o unpack_bitstream main.c unpack.c
unpack_debug: main.c unpack.c
cc -g -o unpack_debug main.c unpack.c

25
c_unpacker/main.c Normal file
View File

@@ -0,0 +1,25 @@
#include <stdio.h>
#include <stdlib.h>
int upkr_unpack(void* destination, void* compressed_data);
int main(int argn, char** argv) {
void* input_buffer = malloc(1024*1024);
void* output_buffer = malloc(1024*1024);
FILE* in_file = fopen(argv[1], "rb");
int in_size = fread(input_buffer, 1, 1024*1024, in_file);
fclose(in_file);
printf("Compressed size: %d\n", in_size);
int out_size = upkr_unpack(output_buffer, input_buffer);
printf("Uncompressed size: %d\n", out_size);
FILE* out_file = fopen(argv[2], "wb");
fwrite(output_buffer, 1, out_size, out_file);
fclose(out_file);
return 0;
}

4
c_unpacker/readme.txt Normal file
View File

@@ -0,0 +1,4 @@
a very simple unpacker in c, as a reference for people wanting to implement their own unpacker.
absolutely not production ready, it makes no effort to ensure the output buffer can actually
hold the uncompressed data.
!!! Never run on untrusted input !!!

95
c_unpacker/unpack.c Normal file
View File

@@ -0,0 +1,95 @@
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned long u32;
u8* upkr_data_ptr;
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
#ifdef UPKR_BITSTREAM
u16 upkr_state;
u8 upkr_current_byte;
int upkr_bits_left;
#else
u32 upkr_state;
#endif
int upkr_decode_bit(int context_index) {
#ifdef UPKR_BITSTREAM
while(upkr_state < 32768) {
if(upkr_bits_left == 0) {
upkr_current_byte = *upkr_data_ptr++;
upkr_bits_left = 8;
}
upkr_state = (upkr_state << 1) + (upkr_current_byte & 1);
upkr_current_byte >>= 1;
--upkr_bits_left;
}
#else
while(upkr_state < 4096) {
upkr_state = (upkr_state << 8) | *upkr_data_ptr++;
}
#endif
int prob = upkr_probs[context_index];
int bit = (upkr_state & 255) < prob ? 1 : 0;
if(bit) {
upkr_state = prob * (upkr_state >> 8) + (upkr_state & 255);
upkr_probs[context_index] = prob + ((256 - prob + 8) >> 4);
} else {
upkr_state = (256 - prob) * (upkr_state >> 8) + (upkr_state & 255) - prob;
upkr_probs[context_index] = prob - ((prob + 8) >> 4);
}
return bit;
}
int upkr_decode_length(int context_index) {
int length = 0;
int bit_pos = 0;
while(upkr_decode_bit(context_index)) {
length |= upkr_decode_bit(context_index + 1) << bit_pos++;
context_index += 2;
}
return length | (1 << bit_pos);
}
int upkr_unpack(void* destination, void* compressed_data) {
upkr_data_ptr = (u8*)compressed_data;
upkr_state = 0;
#ifdef UPKR_BITSTREAM
upkr_bits_left = 0;
#endif
for(int i = 0; i < sizeof(upkr_probs); ++i)
upkr_probs[i] = 128;
u8* write_ptr = (u8*)destination;
int prev_was_match = 0;
int offset = 0;
for(;;) {
if(upkr_decode_bit(0)) {
if(prev_was_match || upkr_decode_bit(256)) {
offset = upkr_decode_length(257) - 1;
if(offset == 0) {
break;
}
}
int length = upkr_decode_length(257 + 64);
while(length--) {
*write_ptr = write_ptr[-offset];
++write_ptr;
}
prev_was_match = 1;
} else {
u8 context_index = 1;
for(int i = 0; i < 8; ++i) {
int bit = upkr_decode_bit(context_index);
context_index = (context_index << 1) + bit;
}
*write_ptr++ = context_index;
prev_was_match = 0;
}
}
return write_ptr - (u8*)destination;
}

View File

@@ -21,3 +21,12 @@ pub fn pack(
parsing_packer::pack(data, level, use_bitstream, progress_callback) parsing_packer::pack(data, level, use_bitstream, progress_callback)
} }
} }
pub fn compressed_size(mut data: &[u8]) -> f32 {
let mut state = 0;
while state < 4096 {
state = (state << 8) | data[0] as u32;
data = &data[1..];
}
data.len() as f32 + (state as f32).log2() / 8.
}

View File

@@ -9,32 +9,29 @@ pub enum Op {
impl Op { impl Op {
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState) { pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState) {
let base_context = 256 * (state.pos & 3);
match self { match self {
&Op::Literal(lit) => { &Op::Literal(lit) => {
encode_bit(coder, state, base_context, false); encode_bit(coder, state, 0, false);
let mut context_index = 1; let mut context_index = 1;
for i in (0..8).rev() { for i in (0..8).rev() {
let bit = (lit >> i) & 1 != 0; let bit = (lit >> i) & 1 != 0;
encode_bit(coder, state, base_context + context_index, bit); encode_bit(coder, state, context_index, bit);
context_index = (context_index << 1) | bit as usize; context_index = (context_index << 1) | bit as usize;
} }
state.pos += 1;
state.prev_was_match = false; state.prev_was_match = false;
} }
&Op::Match { offset, len } => { &Op::Match { offset, len } => {
encode_bit(coder, state, base_context, true); encode_bit(coder, state, 0, true);
if !state.prev_was_match { if !state.prev_was_match {
encode_bit(coder, state, 1024, offset != state.last_offset); encode_bit(coder, state, 256, offset != state.last_offset);
} else { } else {
assert!(offset != state.last_offset); assert!(offset != state.last_offset);
} }
if offset != state.last_offset { if offset != state.last_offset {
encode_length(coder, state, 1025, offset + 1); encode_length(coder, state, 257, offset + 1);
state.last_offset = offset; state.last_offset = offset;
} }
encode_length(coder, state, 1025 + 64, len); encode_length(coder, state, 257 + 64, len);
state.pos += len as usize;
state.prev_was_match = true; state.prev_was_match = true;
} }
} }
@@ -42,11 +39,11 @@ impl Op {
} }
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState) { pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState) {
encode_bit(coder, state, 256 * (state.pos & 3), true); encode_bit(coder, state, 0, true);
if !state.prev_was_match { if !state.prev_was_match {
encode_bit(coder, state, 1024, true); encode_bit(coder, state, 256, true);
} }
encode_length(coder, state, 1025, 1); encode_length(coder, state, 257, 1);
} }
fn encode_bit( fn encode_bit(
@@ -80,16 +77,14 @@ fn encode_length(
pub struct CoderState { pub struct CoderState {
contexts: ContextState, contexts: ContextState,
last_offset: u32, last_offset: u32,
pos: usize,
prev_was_match: bool, prev_was_match: bool,
} }
impl CoderState { impl CoderState {
pub fn new() -> CoderState { pub fn new() -> CoderState {
CoderState { CoderState {
contexts: ContextState::new((1 + 255) * 4 + 1 + 64 + 64), contexts: ContextState::new(1 + 255 + 1 + 64 + 64),
last_offset: 0, last_offset: 0,
pos: 0,
prev_was_match: false, prev_was_match: false,
} }
} }
@@ -101,7 +96,7 @@ impl CoderState {
pub fn unpack(packed_data: &[u8], use_bitstream: bool) -> Vec<u8> { pub fn unpack(packed_data: &[u8], use_bitstream: bool) -> Vec<u8> {
let mut decoder = RansDecoder::new(packed_data, use_bitstream); let mut decoder = RansDecoder::new(packed_data, use_bitstream);
let mut contexts = ContextState::new((1 + 255) * 4 + 1 + 64 + 64); let mut contexts = ContextState::new(1 + 255 + 1 + 64 + 64);
let mut result = vec![]; let mut result = vec![];
let mut offset = 0; let mut offset = 0;
let mut prev_was_match = false; let mut prev_was_match = false;
@@ -124,15 +119,14 @@ pub fn unpack(packed_data: &[u8], use_bitstream: bool) -> Vec<u8> {
} }
loop { loop {
let base_context = 256 * (result.len() & 3); if decoder.decode_with_context(&mut contexts.context_mut(0)) {
if decoder.decode_with_context(&mut contexts.context_mut(base_context)) { if prev_was_match || decoder.decode_with_context(&mut contexts.context_mut(256)) {
if prev_was_match || decoder.decode_with_context(&mut contexts.context_mut(1024)) { offset = decode_length(&mut decoder, &mut contexts, 257) - 1;
offset = decode_length(&mut decoder, &mut contexts, 1025) - 1;
if offset == 0 { if offset == 0 {
break; break;
} }
} }
let length = decode_length(&mut decoder, &mut contexts, 1025 + 64); let length = decode_length(&mut decoder, &mut contexts, 257 + 64);
for _ in 0..length { for _ in 0..length {
result.push(result[result.len() - offset]); result.push(result[result.len() - offset]);
} }
@@ -141,7 +135,7 @@ pub fn unpack(packed_data: &[u8], use_bitstream: bool) -> Vec<u8> {
let mut context_index = 1; let mut context_index = 1;
let mut byte = 0; let mut byte = 0;
for i in (0..8).rev() { for i in (0..8).rev() {
let bit = decoder.decode_with_context(&mut contexts.context_mut(base_context + context_index)); let bit = decoder.decode_with_context(&mut contexts.context_mut(context_index));
context_index = (context_index << 1) | bit as usize; context_index = (context_index << 1) | bit as usize;
byte |= (bit as u8) << i; byte |= (bit as u8) << i;
} }