mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 19:46:42 +01:00
Compare commits
1 Commits
31fb91c629
...
parity-con
| Author | SHA1 | Date | |
|---|---|---|---|
| 7d40bb8123 |
66
Cargo.lock
generated
66
Cargo.lock
generated
@@ -62,12 +62,6 @@ version = "1.4.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "lexopt"
|
|
||||||
version = "0.2.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "478ee9e62aaeaf5b140bd4138753d1f109765488581444218d3ddda43234f3e8"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libc"
|
name = "libc"
|
||||||
version = "0.2.108"
|
version = "0.2.108"
|
||||||
@@ -96,22 +90,10 @@ dependencies = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "pico-args"
|
||||||
version = "1.0.44"
|
version = "0.4.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7bd7356a8122b6c4a24a82b278680c73357984ca2fc79a0f9fa6dea7dced7c58"
|
checksum = "db8bcd96cb740d03149cbad5518db9fd87126a10ab519c011893b1754134c468"
|
||||||
dependencies = [
|
|
||||||
"unicode-ident",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "quote"
|
|
||||||
version = "1.0.21"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sacabase"
|
name = "sacabase"
|
||||||
@@ -122,37 +104,6 @@ dependencies = [
|
|||||||
"num-traits",
|
"num-traits",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "syn"
|
|
||||||
version = "1.0.101"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"unicode-ident",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "thiserror"
|
|
||||||
version = "1.0.36"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "0a99cb8c4b9a8ef0e7907cd3b617cc8dc04d571c4e73c8ae403d80ac160bb122"
|
|
||||||
dependencies = [
|
|
||||||
"thiserror-impl",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "thiserror-impl"
|
|
||||||
version = "1.0.36"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "3a891860d3c8d66fec8e73ddb3765f90082374dbaaa833407b904a94f1a7eb43"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "time"
|
name = "time"
|
||||||
version = "0.1.44"
|
version = "0.1.44"
|
||||||
@@ -164,21 +115,14 @@ dependencies = [
|
|||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "unicode-ident"
|
|
||||||
version = "1.0.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "upkr"
|
name = "upkr"
|
||||||
version = "0.2.0-pre3"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"cdivsufsort",
|
"cdivsufsort",
|
||||||
"lexopt",
|
|
||||||
"pbr",
|
"pbr",
|
||||||
"thiserror",
|
"pico-args",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|||||||
@@ -1,13 +1,12 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "upkr"
|
name = "upkr"
|
||||||
version = "0.2.0-pre3"
|
version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
cdivsufsort = "2"
|
cdivsufsort = "2"
|
||||||
lexopt = "0.2.1"
|
pico-args = "0.4"
|
||||||
anyhow = "1"
|
anyhow = "1"
|
||||||
thiserror = "1.0.36"
|
|
||||||
pbr = "1"
|
pbr = "1"
|
||||||
1
asm_unpackers/.gitignore
vendored
1
asm_unpackers/.gitignore
vendored
@@ -1 +0,0 @@
|
|||||||
/build/
|
|
||||||
@@ -1,51 +0,0 @@
|
|||||||
build/unpack_riscv64: ../c_unpacker/main.c unpack_riscv.S
|
|
||||||
mkdir -p build
|
|
||||||
riscv64-linux-gnu-gcc -g -static -o $@ $^
|
|
||||||
|
|
||||||
test_riscv64: build/unpack_riscv64
|
|
||||||
qemu-riscv64 $< test_data.upk /tmp/out.bin
|
|
||||||
cmp test_data.bin /tmp/out.bin
|
|
||||||
|
|
||||||
build/unpack_riscv64.o: unpack_riscv.S
|
|
||||||
mkdir -p build
|
|
||||||
riscv64-linux-gnu-gcc -c -o $@ $?
|
|
||||||
|
|
||||||
build/unpack_riscv64.bin: build/unpack_riscv64.o
|
|
||||||
riscv64-linux-gnu-objcopy -O binary --only-section=.text $? $@
|
|
||||||
|
|
||||||
disas-riscv64: build/unpack_riscv64.o
|
|
||||||
riscv64-linux-gnu-objdump -d $?
|
|
||||||
|
|
||||||
build/unpack_riscv32.o: unpack_riscv.S
|
|
||||||
mkdir -p build
|
|
||||||
riscv64-linux-gnu-gcc -march=rv32imc -mabi=ilp32 -c -o $@ $?
|
|
||||||
|
|
||||||
build/unpack_riscv32.bin: build/unpack_riscv32.o
|
|
||||||
riscv64-linux-gnu-objcopy -O binary --only-section=.text $? $@
|
|
||||||
|
|
||||||
disas-riscv32: build/unpack_riscv32.o
|
|
||||||
riscv64-linux-gnu-objdump -d $?
|
|
||||||
|
|
||||||
build/unpack_armv6m: ../c_unpacker/main.c unpack_armv6m.S
|
|
||||||
mkdir -p build
|
|
||||||
arm-linux-gnueabihf-gcc -g -static -o $@ $^
|
|
||||||
|
|
||||||
test_armv6m: build/unpack_armv6m
|
|
||||||
qemu-arm $< test_data.upk /tmp/out.bin
|
|
||||||
cmp test_data.bin /tmp/out.bin
|
|
||||||
|
|
||||||
build/unpack_armv6m.bin: unpack_armv6m.S
|
|
||||||
mkdir -p build
|
|
||||||
arm-none-eabi-gcc -march=armv6-m -c -o build/unpack_armv6m.o $?
|
|
||||||
arm-none-eabi-objcopy -O binary --only-section=.text build/unpack_armv6m.o $@
|
|
||||||
|
|
||||||
build/unpack_c: ../c_unpacker/main.c ../c_unpacker/unpack.c
|
|
||||||
mkdir -p build
|
|
||||||
gcc -g -o $@ $^
|
|
||||||
|
|
||||||
test_c: build/unpack_c
|
|
||||||
$< test_data.upk /tmp/out.bin
|
|
||||||
cmp test_data.bin /tmp/out.bin
|
|
||||||
|
|
||||||
sizes: build/unpack_armv6m.bin build/unpack_riscv64.bin build/unpack_riscv32.bin
|
|
||||||
ls -l build/*.bin
|
|
||||||
@@ -1,99 +0,0 @@
|
|||||||
typedef unsigned char u8;
|
|
||||||
typedef unsigned short u16;
|
|
||||||
typedef unsigned long u32;
|
|
||||||
|
|
||||||
u8* upkr_data_ptr;
|
|
||||||
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
|
|
||||||
#ifdef UPKR_BITSTREAM
|
|
||||||
u16 upkr_state;
|
|
||||||
u8 upkr_current_byte;
|
|
||||||
int upkr_bits_left;
|
|
||||||
#else
|
|
||||||
u32 upkr_state;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int upkr_decode_bit(int context_index) {
|
|
||||||
#ifdef UPKR_BITSTREAM
|
|
||||||
while(upkr_state < 32768) {
|
|
||||||
if(upkr_bits_left == 0) {
|
|
||||||
upkr_current_byte = *upkr_data_ptr++;
|
|
||||||
upkr_bits_left = 8;
|
|
||||||
}
|
|
||||||
upkr_state = (upkr_state << 1) + (upkr_current_byte & 1);
|
|
||||||
upkr_current_byte >>= 1;
|
|
||||||
--upkr_bits_left;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
while(upkr_state < 4096) {
|
|
||||||
upkr_state = (upkr_state << 8) | *upkr_data_ptr++;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int prob = upkr_probs[context_index];
|
|
||||||
int bit = (upkr_state & 255) < prob ? 1 : 0;
|
|
||||||
|
|
||||||
int tmp = prob;
|
|
||||||
if(!bit) {
|
|
||||||
tmp = 256 - tmp;
|
|
||||||
}
|
|
||||||
upkr_state = tmp * (upkr_state >> 8) + (upkr_state & 255);
|
|
||||||
tmp += (256 - tmp + 8) >> 4;
|
|
||||||
if(!bit) {
|
|
||||||
upkr_state -= prob;
|
|
||||||
tmp = 256 - tmp;
|
|
||||||
}
|
|
||||||
upkr_probs[context_index] = tmp;
|
|
||||||
|
|
||||||
return bit;
|
|
||||||
}
|
|
||||||
|
|
||||||
int upkr_decode_length(int context_index) {
|
|
||||||
int length = 0;
|
|
||||||
int bit_pos = 0;
|
|
||||||
while(upkr_decode_bit(context_index)) {
|
|
||||||
length |= upkr_decode_bit(context_index + 1) << bit_pos++;
|
|
||||||
context_index += 2;
|
|
||||||
}
|
|
||||||
return length | (1 << bit_pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
void* upkr_unpack(void* destination, void* compressed_data) {
|
|
||||||
upkr_data_ptr = (u8*)compressed_data;
|
|
||||||
upkr_state = 0;
|
|
||||||
#ifdef UPKR_BITSTREAM
|
|
||||||
upkr_bits_left = 0;
|
|
||||||
#endif
|
|
||||||
for(int i = 0; i < sizeof(upkr_probs); ++i)
|
|
||||||
upkr_probs[i] = 128;
|
|
||||||
|
|
||||||
u8* write_ptr = (u8*)destination;
|
|
||||||
|
|
||||||
int prev_was_match = 0;
|
|
||||||
int offset = 0;
|
|
||||||
for(;;) {
|
|
||||||
if(upkr_decode_bit(0)) {
|
|
||||||
if(prev_was_match || upkr_decode_bit(256)) {
|
|
||||||
offset = upkr_decode_length(257) - 1;
|
|
||||||
if(offset == 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int length = upkr_decode_length(257 + 64);
|
|
||||||
while(length--) {
|
|
||||||
*write_ptr = write_ptr[-offset];
|
|
||||||
++write_ptr;
|
|
||||||
}
|
|
||||||
prev_was_match = 1;
|
|
||||||
} else {
|
|
||||||
int byte = 1;
|
|
||||||
while(byte < 256) {
|
|
||||||
int bit = upkr_decode_bit(byte);
|
|
||||||
byte = (byte << 1) + bit;
|
|
||||||
}
|
|
||||||
*write_ptr++ = byte;
|
|
||||||
prev_was_match = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return write_ptr;
|
|
||||||
}
|
|
||||||
Binary file not shown.
@@ -1,162 +0,0 @@
|
|||||||
// armv6-m upkr unpacker by yrlf
|
|
||||||
// some optimizations by exoticorn
|
|
||||||
|
|
||||||
.syntax unified
|
|
||||||
.thumb
|
|
||||||
|
|
||||||
.section .text
|
|
||||||
|
|
||||||
#define ALIGNUP(n, align) (((n) + (align) - 1) & ~((align) - 1))
|
|
||||||
#define PROB_LEN (1 + 255 + 1 + 2*32 + 2*32)
|
|
||||||
#define FRAME_SIZE ALIGNUP(PROB_LEN, 4)
|
|
||||||
|
|
||||||
// auto upkr_unpack(uint8_t * out, uint8_t * in) -> tuple<uint8_t *, uint8_t *>
|
|
||||||
.global upkr_unpack
|
|
||||||
.type upkr_unpack, %function
|
|
||||||
// r0 .. out_ptr (returned)
|
|
||||||
// r1 .. in_ptr (returned)
|
|
||||||
// r2 .. state
|
|
||||||
// r3 .. offset
|
|
||||||
// r4 .. prev_was_literal / decode_length ret
|
|
||||||
// r5 .. subroutine arg (preserved)
|
|
||||||
// r6 .. decode_bit ret
|
|
||||||
// r7 .. probs ptr
|
|
||||||
upkr_unpack:
|
|
||||||
push { r4, r5, r6, r7, lr }
|
|
||||||
sub sp, sp, #FRAME_SIZE
|
|
||||||
|
|
||||||
mov r7, sp
|
|
||||||
movs r2, #255
|
|
||||||
adds r2, r2, #(PROB_LEN - 255)
|
|
||||||
movs r3, #128
|
|
||||||
.Lclear:
|
|
||||||
subs r2, r2, #1
|
|
||||||
strb r3, [r7, r2]
|
|
||||||
bne .Lclear
|
|
||||||
|
|
||||||
.Lloop:
|
|
||||||
movs r5, #0
|
|
||||||
bl upkr_decode_bit
|
|
||||||
beq .Ldata
|
|
||||||
.Lmatch:
|
|
||||||
// r6 = 1
|
|
||||||
lsls r5, r6, #8
|
|
||||||
cmp r4, #0
|
|
||||||
beq 1f
|
|
||||||
|
|
||||||
bl upkr_decode_bit
|
|
||||||
beq 2f
|
|
||||||
|
|
||||||
1:
|
|
||||||
bl upkr_decode_length
|
|
||||||
adds r3, r4, #1
|
|
||||||
beq .Lend
|
|
||||||
2:
|
|
||||||
|
|
||||||
adds r5, r5, #64
|
|
||||||
bl upkr_decode_length
|
|
||||||
.Lcopy_loop:
|
|
||||||
ldrb r5, [r0, r3]
|
|
||||||
.Lstore:
|
|
||||||
strb r5, [r0]
|
|
||||||
adds r0, r0, #1
|
|
||||||
adds r4, r4, #1
|
|
||||||
blt .Lcopy_loop
|
|
||||||
b .Lloop
|
|
||||||
|
|
||||||
.Ldata:
|
|
||||||
movs r5, #1
|
|
||||||
|
|
||||||
.Ldata_loop:
|
|
||||||
bl upkr_decode_bit
|
|
||||||
adcs r5, r5, r5
|
|
||||||
lsrs r4, r5, #8
|
|
||||||
beq .Ldata_loop
|
|
||||||
b .Lstore
|
|
||||||
|
|
||||||
.Lend:
|
|
||||||
add sp, sp, #FRAME_SIZE
|
|
||||||
pop { r4, r5, r6, r7, pc }
|
|
||||||
|
|
||||||
.type upkr_decode_length, %function
|
|
||||||
// r0 .. -length tmp (saved)
|
|
||||||
// r1 ..
|
|
||||||
// r2 ..
|
|
||||||
// r3 ..
|
|
||||||
// r4 .. -length (returned)
|
|
||||||
// r5 .. context index (saved)
|
|
||||||
// r6 .. (saved)
|
|
||||||
// r7 ..
|
|
||||||
upkr_decode_length:
|
|
||||||
push { r0, r5, r6, lr }
|
|
||||||
|
|
||||||
movs r0, #0
|
|
||||||
subs r4, r0, #1
|
|
||||||
.Lbit_loop:
|
|
||||||
adds r5, r5, #1
|
|
||||||
bl upkr_decode_bit
|
|
||||||
beq 1f
|
|
||||||
|
|
||||||
adds r5, r5, #1
|
|
||||||
bl upkr_decode_bit
|
|
||||||
beq 2f
|
|
||||||
adds r0, r0, r4
|
|
||||||
2:
|
|
||||||
lsls r4, r4, #1
|
|
||||||
b .Lbit_loop
|
|
||||||
1:
|
|
||||||
adds r4, r4, r0
|
|
||||||
|
|
||||||
pop { r0, r5, r6, pc }
|
|
||||||
|
|
||||||
.type upkr_decode_bit, %function
|
|
||||||
// r0 .. tmp / prob (saved)
|
|
||||||
// r1 .. in_ptr (modified)
|
|
||||||
// r2 .. state (modified)
|
|
||||||
// r3 .. scratch (saved)
|
|
||||||
// r4 ..
|
|
||||||
// r5 .. context index (preserved)
|
|
||||||
// r6 .. bit (returned)
|
|
||||||
// r7 .. probs ptr (preserved)
|
|
||||||
upkr_fill_state:
|
|
||||||
lsls r2, r2, #8
|
|
||||||
ldrb r6, [r1]
|
|
||||||
adds r1, r1, #1
|
|
||||||
orrs r2, r2, r6
|
|
||||||
|
|
||||||
upkr_decode_bit:
|
|
||||||
lsrs r6, r2, #12
|
|
||||||
beq upkr_fill_state
|
|
||||||
|
|
||||||
push { r0, r1, r3, lr }
|
|
||||||
|
|
||||||
ldrb r0, [r7, r5]
|
|
||||||
|
|
||||||
lsrs r3, r2, #8
|
|
||||||
uxtb r1, r2
|
|
||||||
|
|
||||||
subs r6, r1, r0
|
|
||||||
blt 1f
|
|
||||||
|
|
||||||
subs r1, r2, r0
|
|
||||||
rsbs r0, r0, #0
|
|
||||||
1:
|
|
||||||
|
|
||||||
muls r3, r3, r0
|
|
||||||
adds r2, r1, r3
|
|
||||||
|
|
||||||
rsbs r3, r0, #0
|
|
||||||
uxtb r3, r3
|
|
||||||
lsrs r3, r3, #4
|
|
||||||
adcs r0, r0, r3
|
|
||||||
|
|
||||||
cmp r6, #0
|
|
||||||
blt 1f
|
|
||||||
|
|
||||||
rsbs r0, r0, #0
|
|
||||||
1:
|
|
||||||
|
|
||||||
strb r0, [r7, r5]
|
|
||||||
|
|
||||||
lsrs r6, r6, #31
|
|
||||||
pop { r0, r1, r3, pc }
|
|
||||||
@@ -1,142 +0,0 @@
|
|||||||
.section .text
|
|
||||||
|
|
||||||
#define FRAME_SIZE (256+32*4+4)
|
|
||||||
|
|
||||||
// x8 prob array ptr
|
|
||||||
// x9 prev was literal
|
|
||||||
// x10 out ptr
|
|
||||||
// x11 in ptr
|
|
||||||
// x12 offset
|
|
||||||
// x13 state
|
|
||||||
|
|
||||||
.global upkr_unpack
|
|
||||||
.type upkr_unpack, %function
|
|
||||||
upkr_unpack:
|
|
||||||
mv t4, ra
|
|
||||||
mv x17, x8
|
|
||||||
mv t6, x9
|
|
||||||
li x13, FRAME_SIZE
|
|
||||||
li x9, 128
|
|
||||||
1:
|
|
||||||
addi sp, sp, -1
|
|
||||||
sb x9, 0(sp)
|
|
||||||
addi x13, x13, -1
|
|
||||||
bnez x13, 1b
|
|
||||||
|
|
||||||
.Lmainloop:
|
|
||||||
li x14, 0
|
|
||||||
jal upkr_decode_bit
|
|
||||||
beqz x15, .Lliteral
|
|
||||||
|
|
||||||
slli x14, x14, 8
|
|
||||||
beqz x9, .Lread_offset_inc_x14
|
|
||||||
jal upkr_decode_bit
|
|
||||||
bnez x15, .Lread_offset
|
|
||||||
|
|
||||||
.Lfinished_offset:
|
|
||||||
addi x14, x14, 64
|
|
||||||
jal t3, upkr_decode_number
|
|
||||||
1:
|
|
||||||
add x14, x10, t0
|
|
||||||
lbu x14, (x14)
|
|
||||||
.Lstore_byte:
|
|
||||||
sb x14, (x10)
|
|
||||||
addi x10, x10, 1
|
|
||||||
addi x9, x9, 1
|
|
||||||
blt x9, x0, 1b
|
|
||||||
j .Lmainloop
|
|
||||||
|
|
||||||
.Lliteral:
|
|
||||||
jal upkr_decode_bit
|
|
||||||
addi x14, x14, -1
|
|
||||||
slli x14, x14, 1
|
|
||||||
add x14, x14, x15
|
|
||||||
srli x9, x14, 8
|
|
||||||
beqz x9, .Lliteral
|
|
||||||
j .Lstore_byte
|
|
||||||
|
|
||||||
.Lread_offset_inc_x14:
|
|
||||||
addi x14, x14, 1
|
|
||||||
.Lread_offset:
|
|
||||||
jal t3, upkr_decode_number
|
|
||||||
addi t0, x9, 1
|
|
||||||
bnez t0, .Lfinished_offset
|
|
||||||
.Ldone:
|
|
||||||
addi sp, sp, FRAME_SIZE
|
|
||||||
mv x8, x17
|
|
||||||
mv x9, t6
|
|
||||||
jr t4
|
|
||||||
|
|
||||||
// x14 context index
|
|
||||||
// return: x9 negtive decoded number
|
|
||||||
upkr_decode_number:
|
|
||||||
mv t5, x14
|
|
||||||
li x9, 0
|
|
||||||
li x8, -1
|
|
||||||
1:
|
|
||||||
jal upkr_decode_bit
|
|
||||||
beqz x15, 1f
|
|
||||||
jal upkr_decode_bit
|
|
||||||
beqz x15, 2f
|
|
||||||
add x9, x9, x8
|
|
||||||
2:
|
|
||||||
slli x8, x8, 1
|
|
||||||
j 1b
|
|
||||||
1:
|
|
||||||
add x9, x9, x8
|
|
||||||
|
|
||||||
mv x14, t5
|
|
||||||
jr t3
|
|
||||||
|
|
||||||
upkr_load_byte:
|
|
||||||
lbu x15, 0(x11)
|
|
||||||
addi x11, x11, 1
|
|
||||||
slli x13, x13, 8
|
|
||||||
add x13, x13, x15
|
|
||||||
// x8 prob array ptr
|
|
||||||
// x11 in ptr
|
|
||||||
// x13 state
|
|
||||||
// x14 context index
|
|
||||||
// return:
|
|
||||||
// x14 context index + 1
|
|
||||||
// x15 decoded bit
|
|
||||||
upkr_decode_bit:
|
|
||||||
srli x15, x13, 12
|
|
||||||
beqz x15, upkr_load_byte
|
|
||||||
|
|
||||||
mv t1, x14
|
|
||||||
mv t2, x10
|
|
||||||
|
|
||||||
add x14, x14, sp
|
|
||||||
lbu x12, 0(x14)
|
|
||||||
|
|
||||||
andi x10, x13, 255
|
|
||||||
sltu x15, x10, x12
|
|
||||||
srli x13, x13, 8
|
|
||||||
beqz x15, .Lelse
|
|
||||||
|
|
||||||
mul x13, x13, x12
|
|
||||||
add x13, x13, x10
|
|
||||||
li x10, 256 + 8
|
|
||||||
sub x10, x10, x12
|
|
||||||
srli x10, x10, 4
|
|
||||||
add x12, x12, x10
|
|
||||||
j .Lendif
|
|
||||||
|
|
||||||
.Lelse:
|
|
||||||
li x16, 256
|
|
||||||
sub x16, x16, x12
|
|
||||||
mul x13, x13, x16
|
|
||||||
add x13, x13, x10
|
|
||||||
sub x13, x13, x12
|
|
||||||
addi x10, x12, 8
|
|
||||||
srli x10, x10, 4
|
|
||||||
sub x12, x12, x10
|
|
||||||
|
|
||||||
.Lendif:
|
|
||||||
|
|
||||||
sb x12, 0(x14)
|
|
||||||
|
|
||||||
addi x14, t1, 1
|
|
||||||
mv x10, t2
|
|
||||||
ret
|
|
||||||
5
c_unpacker/.gitignore
vendored
5
c_unpacker/.gitignore
vendored
@@ -1,5 +0,0 @@
|
|||||||
unpack
|
|
||||||
unpack_bitstream
|
|
||||||
unpack_debug
|
|
||||||
*.upk
|
|
||||||
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
all: unpack unpack_bitstream
|
|
||||||
|
|
||||||
unpack: main.c unpack.c
|
|
||||||
cc -O2 -o unpack main.c unpack.c
|
|
||||||
|
|
||||||
unpack_bitstream: main.c unpack.c
|
|
||||||
cc -O2 -D UPKR_BITSTREAM -o unpack_bitstream main.c unpack.c
|
|
||||||
|
|
||||||
unpack_debug: main.c unpack.c
|
|
||||||
cc -g -o unpack_debug main.c unpack.c
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
int upkr_decode_bit(int context_index) {
|
|
||||||
#ifdef UPKR_BITSTREAM
|
|
||||||
while(upkr_state < 32768) {
|
|
||||||
if(upkr_bits_left == 0) {
|
|
||||||
upkr_current_byte = *upkr_data_ptr++;
|
|
||||||
upkr_bits_left = 8;
|
|
||||||
}
|
|
||||||
upkr_state = (upkr_state << 1) + (upkr_current_byte & 1);
|
|
||||||
upkr_current_byte >>= 1;
|
|
||||||
--upkr_bits_left;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
while(upkr_state < 4096) {
|
|
||||||
upkr_state = (upkr_state << 8) | *upkr_data_ptr++;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int prob = upkr_probs[context_index];
|
|
||||||
int bit = (upkr_state & 255) < prob ? 1 : 0;
|
|
||||||
|
|
||||||
if(bit) {
|
|
||||||
prob = 256 - prob;
|
|
||||||
}
|
|
||||||
upkr_state -= prob * ((upkr_state >> 8) + (bit ^ 1));
|
|
||||||
prob -= (prob + 8) >> 4;
|
|
||||||
if(bit) {
|
|
||||||
prob = -prob;
|
|
||||||
}
|
|
||||||
upkr_probs[context_index] = prob;
|
|
||||||
|
|
||||||
return bit;
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
void* upkr_unpack(void* destination, void* compressed_data);
|
|
||||||
|
|
||||||
int main(int argn, char** argv) {
|
|
||||||
void* input_buffer = malloc(1024*1024);
|
|
||||||
void* output_buffer = malloc(1024*1024);
|
|
||||||
|
|
||||||
FILE* in_file = fopen(argv[1], "rb");
|
|
||||||
int in_size = fread(input_buffer, 1, 1024*1024, in_file);
|
|
||||||
fclose(in_file);
|
|
||||||
|
|
||||||
printf("Compressed size: %d\n", in_size);
|
|
||||||
|
|
||||||
void* end_ptr = upkr_unpack(output_buffer, input_buffer);
|
|
||||||
int out_size = (char*)end_ptr - (char*)output_buffer;
|
|
||||||
|
|
||||||
printf("Uncompressed size: %d\n", out_size);
|
|
||||||
|
|
||||||
FILE* out_file = fopen(argv[2], "wb");
|
|
||||||
fwrite(output_buffer, 1, out_size, out_file);
|
|
||||||
fclose(out_file);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
a very simple unpacker in c, as a reference for people wanting to implement their own unpacker.
|
|
||||||
absolutely not production ready, it makes no effort to ensure the output buffer can actually
|
|
||||||
hold the uncompressed data.
|
|
||||||
!!! Never run on untrusted input !!!
|
|
||||||
@@ -1,96 +0,0 @@
|
|||||||
typedef unsigned char u8;
|
|
||||||
typedef unsigned short u16;
|
|
||||||
typedef unsigned long u32;
|
|
||||||
|
|
||||||
u8* upkr_data_ptr;
|
|
||||||
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
|
|
||||||
#ifdef UPKR_BITSTREAM
|
|
||||||
u16 upkr_state;
|
|
||||||
u8 upkr_current_byte;
|
|
||||||
int upkr_bits_left;
|
|
||||||
#else
|
|
||||||
u32 upkr_state;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int upkr_decode_bit(int context_index) {
|
|
||||||
#ifdef UPKR_BITSTREAM
|
|
||||||
while(upkr_state < 32768) {
|
|
||||||
if(upkr_bits_left == 0) {
|
|
||||||
upkr_current_byte = *upkr_data_ptr++;
|
|
||||||
upkr_bits_left = 8;
|
|
||||||
}
|
|
||||||
upkr_state = (upkr_state << 1) + (upkr_current_byte & 1);
|
|
||||||
upkr_current_byte >>= 1;
|
|
||||||
--upkr_bits_left;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
while(upkr_state < 4096) {
|
|
||||||
upkr_state = (upkr_state << 8) | *upkr_data_ptr++;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int prob = upkr_probs[context_index];
|
|
||||||
int bit = (upkr_state & 255) < prob ? 1 : 0;
|
|
||||||
|
|
||||||
if(bit) {
|
|
||||||
upkr_state = prob * (upkr_state >> 8) + (upkr_state & 255);
|
|
||||||
prob += (256 - prob + 8) >> 4;
|
|
||||||
} else {
|
|
||||||
upkr_state = (256 - prob) * (upkr_state >> 8) + (upkr_state & 255) - prob;
|
|
||||||
prob -= (prob + 8) >> 4;
|
|
||||||
}
|
|
||||||
upkr_probs[context_index] = prob;
|
|
||||||
|
|
||||||
return bit;
|
|
||||||
}
|
|
||||||
|
|
||||||
int upkr_decode_length(int context_index) {
|
|
||||||
int length = 0;
|
|
||||||
int bit_pos = 0;
|
|
||||||
while(upkr_decode_bit(context_index)) {
|
|
||||||
length |= upkr_decode_bit(context_index + 1) << bit_pos++;
|
|
||||||
context_index += 2;
|
|
||||||
}
|
|
||||||
return length | (1 << bit_pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
void* upkr_unpack(void* destination, void* compressed_data) {
|
|
||||||
upkr_data_ptr = (u8*)compressed_data;
|
|
||||||
upkr_state = 0;
|
|
||||||
#ifdef UPKR_BITSTREAM
|
|
||||||
upkr_bits_left = 0;
|
|
||||||
#endif
|
|
||||||
for(int i = 0; i < sizeof(upkr_probs); ++i)
|
|
||||||
upkr_probs[i] = 128;
|
|
||||||
|
|
||||||
u8* write_ptr = (u8*)destination;
|
|
||||||
|
|
||||||
int prev_was_match = 0;
|
|
||||||
int offset = 0;
|
|
||||||
for(;;) {
|
|
||||||
if(upkr_decode_bit(0)) {
|
|
||||||
if(prev_was_match || upkr_decode_bit(256)) {
|
|
||||||
offset = upkr_decode_length(257) - 1;
|
|
||||||
if(offset == 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int length = upkr_decode_length(257 + 64);
|
|
||||||
while(length--) {
|
|
||||||
*write_ptr = write_ptr[-offset];
|
|
||||||
++write_ptr;
|
|
||||||
}
|
|
||||||
prev_was_match = 1;
|
|
||||||
} else {
|
|
||||||
int byte = 1;
|
|
||||||
while(byte < 256) {
|
|
||||||
int bit = upkr_decode_bit(byte);
|
|
||||||
byte = (byte << 1) + bit;
|
|
||||||
}
|
|
||||||
*write_ptr++ = byte;
|
|
||||||
prev_was_match = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return write_ptr;
|
|
||||||
}
|
|
||||||
3
fuzz/.gitignore
vendored
3
fuzz/.gitignore
vendored
@@ -1,3 +0,0 @@
|
|||||||
target
|
|
||||||
corpus
|
|
||||||
artifacts
|
|
||||||
247
fuzz/Cargo.lock
generated
247
fuzz/Cargo.lock
generated
@@ -1,247 +0,0 @@
|
|||||||
# This file is automatically @generated by Cargo.
|
|
||||||
# It is not intended for manual editing.
|
|
||||||
version = 3
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "anyhow"
|
|
||||||
version = "1.0.65"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "98161a4e3e2184da77bb14f02184cdd111e83bbbcc9979dfee3c44b9a85f5602"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "arbitrary"
|
|
||||||
version = "1.1.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "f44124848854b941eafdb34f05b3bcf59472f643c7e151eba7c2b69daa469ed5"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "autocfg"
|
|
||||||
version = "1.1.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "cc"
|
|
||||||
version = "1.0.73"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
|
|
||||||
dependencies = [
|
|
||||||
"jobserver",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "cdivsufsort"
|
|
||||||
version = "2.0.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "edefce019197609da416762da75bb000bbd2224b2d89a7e722c2296cbff79b8c"
|
|
||||||
dependencies = [
|
|
||||||
"cc",
|
|
||||||
"sacabase",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "cfg-if"
|
|
||||||
version = "1.0.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "crossbeam-channel"
|
|
||||||
version = "0.5.6"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"crossbeam-utils",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "crossbeam-utils"
|
|
||||||
version = "0.8.11"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
"once_cell",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "jobserver"
|
|
||||||
version = "0.1.25"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "068b1ee6743e4d11fb9c6a1e6064b3693a1b600e7f5f5988047d98b3dc9fb90b"
|
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "lexopt"
|
|
||||||
version = "0.2.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "478ee9e62aaeaf5b140bd4138753d1f109765488581444218d3ddda43234f3e8"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "libc"
|
|
||||||
version = "0.2.133"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "c0f80d65747a3e43d1596c7c5492d95d5edddaabd45a7fcdb02b95f644164966"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "libfuzzer-sys"
|
|
||||||
version = "0.4.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ae185684fe19814afd066da15a7cc41e126886c21282934225d9fc847582da58"
|
|
||||||
dependencies = [
|
|
||||||
"arbitrary",
|
|
||||||
"cc",
|
|
||||||
"once_cell",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "num-traits"
|
|
||||||
version = "0.2.15"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
|
|
||||||
dependencies = [
|
|
||||||
"autocfg",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "once_cell"
|
|
||||||
version = "1.15.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pbr"
|
|
||||||
version = "1.0.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ff5751d87f7c00ae6403eb1fcbba229b9c76c9a30de8c1cf87182177b168cea2"
|
|
||||||
dependencies = [
|
|
||||||
"crossbeam-channel",
|
|
||||||
"libc",
|
|
||||||
"time",
|
|
||||||
"winapi",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "proc-macro2"
|
|
||||||
version = "1.0.44"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "7bd7356a8122b6c4a24a82b278680c73357984ca2fc79a0f9fa6dea7dced7c58"
|
|
||||||
dependencies = [
|
|
||||||
"unicode-ident",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "quote"
|
|
||||||
version = "1.0.21"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "sacabase"
|
|
||||||
version = "2.0.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "9883fc3d6ce3d78bb54d908602f8bc1f7b5f983afe601dabe083009d86267a84"
|
|
||||||
dependencies = [
|
|
||||||
"num-traits",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "syn"
|
|
||||||
version = "1.0.101"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"unicode-ident",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "thiserror"
|
|
||||||
version = "1.0.36"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "0a99cb8c4b9a8ef0e7907cd3b617cc8dc04d571c4e73c8ae403d80ac160bb122"
|
|
||||||
dependencies = [
|
|
||||||
"thiserror-impl",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "thiserror-impl"
|
|
||||||
version = "1.0.36"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "3a891860d3c8d66fec8e73ddb3765f90082374dbaaa833407b904a94f1a7eb43"
|
|
||||||
dependencies = [
|
|
||||||
"proc-macro2",
|
|
||||||
"quote",
|
|
||||||
"syn",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "time"
|
|
||||||
version = "0.1.44"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
|
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
"wasi",
|
|
||||||
"winapi",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "unicode-ident"
|
|
||||||
version = "1.0.4"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "upkr"
|
|
||||||
version = "0.2.0-pre3"
|
|
||||||
dependencies = [
|
|
||||||
"anyhow",
|
|
||||||
"cdivsufsort",
|
|
||||||
"lexopt",
|
|
||||||
"pbr",
|
|
||||||
"thiserror",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "upkr-fuzz"
|
|
||||||
version = "0.0.0"
|
|
||||||
dependencies = [
|
|
||||||
"libfuzzer-sys",
|
|
||||||
"upkr",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "wasi"
|
|
||||||
version = "0.10.0+wasi-snapshot-preview1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "winapi"
|
|
||||||
version = "0.3.9"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
|
||||||
dependencies = [
|
|
||||||
"winapi-i686-pc-windows-gnu",
|
|
||||||
"winapi-x86_64-pc-windows-gnu",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "winapi-i686-pc-windows-gnu"
|
|
||||||
version = "0.4.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "winapi-x86_64-pc-windows-gnu"
|
|
||||||
version = "0.4.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
[package]
|
|
||||||
name = "upkr-fuzz"
|
|
||||||
version = "0.0.0"
|
|
||||||
authors = ["Automatically generated"]
|
|
||||||
publish = false
|
|
||||||
edition = "2018"
|
|
||||||
|
|
||||||
[package.metadata]
|
|
||||||
cargo-fuzz = true
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
libfuzzer-sys = "0.4"
|
|
||||||
|
|
||||||
[dependencies.upkr]
|
|
||||||
path = ".."
|
|
||||||
|
|
||||||
# Prevent this from interfering with workspaces
|
|
||||||
[workspace]
|
|
||||||
members = ["."]
|
|
||||||
|
|
||||||
[[bin]]
|
|
||||||
name = "all_configs"
|
|
||||||
path = "fuzz_targets/all_configs.rs"
|
|
||||||
test = false
|
|
||||||
doc = false
|
|
||||||
|
|
||||||
[[bin]]
|
|
||||||
name = "unpack"
|
|
||||||
path = "fuzz_targets/unpack.rs"
|
|
||||||
test = false
|
|
||||||
doc = false
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
#![no_main]
|
|
||||||
use libfuzzer_sys::fuzz_target;
|
|
||||||
|
|
||||||
fuzz_target!(|data: &[u8]| {
|
|
||||||
let mut config = upkr::Config::default();
|
|
||||||
let mut level = 1;
|
|
||||||
let mut data = data;
|
|
||||||
if data.len() > 2 {
|
|
||||||
let flags1 = data[0];
|
|
||||||
let flags2 = data[1];
|
|
||||||
data = &data[2..];
|
|
||||||
config.use_bitstream = (flags1 & 1) != 0;
|
|
||||||
config.parity_contexts = if (flags1 & 2) == 0 { 1 } else { 2 };
|
|
||||||
config.invert_bit_encoding = (flags1 & 4) != 0;
|
|
||||||
config.is_match_bit = (flags1 & 8) != 0;
|
|
||||||
config.new_offset_bit = (flags1 & 16) != 0;
|
|
||||||
config.continue_value_bit = (flags1 & 32) != 0;
|
|
||||||
config.bitstream_is_big_endian = (flags1 & 64) != 0;
|
|
||||||
config.simplified_prob_update = (flags1 & 128) != 0;
|
|
||||||
config.no_repeated_offsets = (flags2 & 32) != 0;
|
|
||||||
config.eof_in_length = (flags2 & 1) != 0;
|
|
||||||
config.max_offset = if (flags2 & 2) == 0 { usize::MAX } else { 32 };
|
|
||||||
config.max_length = if (flags2 & 4) == 0 { usize::MAX } else { 5 };
|
|
||||||
level = (flags2 >> 3) & 3;
|
|
||||||
}
|
|
||||||
let packed = upkr::pack(data, level, &config, None);
|
|
||||||
let unpacked = upkr::unpack(&packed, &config, 1024 * 1024).unwrap();
|
|
||||||
assert!(unpacked == data);
|
|
||||||
});
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
#![no_main]
|
|
||||||
use libfuzzer_sys::fuzz_target;
|
|
||||||
|
|
||||||
fuzz_target!(|data: &[u8]| {
|
|
||||||
let _ = upkr::unpack(data, &upkr::Config::default(), 64 * 1024);
|
|
||||||
});
|
|
||||||
@@ -1,7 +1,4 @@
|
|||||||
use crate::{
|
use crate::rans::{PROB_BITS, ONE_PROB};
|
||||||
rans::{ONE_PROB, PROB_BITS},
|
|
||||||
Config,
|
|
||||||
};
|
|
||||||
|
|
||||||
const INIT_PROB: u16 = 1 << (PROB_BITS - 1);
|
const INIT_PROB: u16 = 1 << (PROB_BITS - 1);
|
||||||
const UPDATE_RATE: u32 = 4;
|
const UPDATE_RATE: u32 = 4;
|
||||||
@@ -10,8 +7,6 @@ const UPDATE_ADD: u32 = 8;
|
|||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct ContextState {
|
pub struct ContextState {
|
||||||
contexts: Vec<u8>,
|
contexts: Vec<u8>,
|
||||||
invert_bit_encoding: bool,
|
|
||||||
simplified_prob_update: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Context<'a> {
|
pub struct Context<'a> {
|
||||||
@@ -20,11 +15,9 @@ pub struct Context<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl ContextState {
|
impl ContextState {
|
||||||
pub fn new(size: usize, config: &Config) -> ContextState {
|
pub fn new(size: usize) -> ContextState {
|
||||||
ContextState {
|
ContextState {
|
||||||
contexts: vec![INIT_PROB as u8; size],
|
contexts: vec![INIT_PROB as u8; size],
|
||||||
invert_bit_encoding: config.invert_bit_encoding,
|
|
||||||
simplified_prob_update: config.simplified_prob_update,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -40,21 +33,10 @@ impl<'a> Context<'a> {
|
|||||||
|
|
||||||
pub fn update(&mut self, bit: bool) {
|
pub fn update(&mut self, bit: bool) {
|
||||||
let old = self.state.contexts[self.index];
|
let old = self.state.contexts[self.index];
|
||||||
|
self.state.contexts[self.index] = if bit {
|
||||||
self.state.contexts[self.index] = if self.state.simplified_prob_update {
|
old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
|
||||||
let offset = if bit ^ self.state.invert_bit_encoding {
|
|
||||||
ONE_PROB as i32 >> UPDATE_RATE
|
|
||||||
} else {
|
|
||||||
0
|
|
||||||
};
|
|
||||||
|
|
||||||
(offset + old as i32 - ((old as i32 + UPDATE_ADD as i32) >> UPDATE_RATE)) as u8
|
|
||||||
} else {
|
} else {
|
||||||
if bit ^ self.state.invert_bit_encoding {
|
old - ((old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
|
||||||
old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
|
|
||||||
} else {
|
|
||||||
old - ((old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,16 +1,16 @@
|
|||||||
|
use crate::lz;
|
||||||
use crate::match_finder::MatchFinder;
|
use crate::match_finder::MatchFinder;
|
||||||
use crate::rans::RansCoder;
|
use crate::rans::RansCoder;
|
||||||
use crate::ProgressCallback;
|
use crate::ProgressCallback;
|
||||||
use crate::{lz, Config};
|
|
||||||
|
|
||||||
pub fn pack(
|
pub fn pack(
|
||||||
data: &[u8],
|
data: &[u8],
|
||||||
config: &Config,
|
use_bitstream: bool,
|
||||||
mut progress_callback: Option<ProgressCallback>,
|
mut progress_callback: Option<ProgressCallback>,
|
||||||
) -> Vec<u8> {
|
) -> Vec<u8> {
|
||||||
let mut match_finder = MatchFinder::new(data);
|
let mut match_finder = MatchFinder::new(data);
|
||||||
let mut rans_coder = RansCoder::new(config);
|
let mut rans_coder = RansCoder::new(use_bitstream);
|
||||||
let mut state = lz::CoderState::new(config);
|
let mut state = lz::CoderState::new();
|
||||||
|
|
||||||
let mut pos = 0;
|
let mut pos = 0;
|
||||||
while pos < data.len() {
|
while pos < data.len() {
|
||||||
@@ -19,16 +19,15 @@ pub fn pack(
|
|||||||
}
|
}
|
||||||
let mut encoded_match = false;
|
let mut encoded_match = false;
|
||||||
if let Some(m) = match_finder.matches(pos).next() {
|
if let Some(m) = match_finder.matches(pos).next() {
|
||||||
let max_offset = config.max_offset.min(1 << (m.length * 3 - 1).min(31));
|
let max_offset = 1 << (m.length * 3 - 1).min(31);
|
||||||
let offset = pos - m.pos;
|
let offset = pos - m.pos;
|
||||||
if offset < max_offset && m.length >= config.min_length() {
|
if offset < max_offset {
|
||||||
let length = m.length.min(config.max_length);
|
|
||||||
lz::Op::Match {
|
lz::Op::Match {
|
||||||
offset: offset as u32,
|
offset: offset as u32,
|
||||||
len: length as u32,
|
len: m.length as u32,
|
||||||
}
|
}
|
||||||
.encode(&mut rans_coder, &mut state, config);
|
.encode(&mut rans_coder, &mut state);
|
||||||
pos += length;
|
pos += m.length;
|
||||||
encoded_match = true;
|
encoded_match = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -40,14 +39,13 @@ pub fn pack(
|
|||||||
.iter()
|
.iter()
|
||||||
.zip(data[(pos - offset)..].iter())
|
.zip(data[(pos - offset)..].iter())
|
||||||
.take_while(|(a, b)| a == b)
|
.take_while(|(a, b)| a == b)
|
||||||
.count()
|
.count();
|
||||||
.min(config.max_length);
|
if length > 0 {
|
||||||
if length >= config.min_length() {
|
|
||||||
lz::Op::Match {
|
lz::Op::Match {
|
||||||
offset: offset as u32,
|
offset: offset as u32,
|
||||||
len: length as u32,
|
len: length as u32,
|
||||||
}
|
}
|
||||||
.encode(&mut rans_coder, &mut state, config);
|
.encode(&mut rans_coder, &mut state);
|
||||||
pos += length;
|
pos += length;
|
||||||
encoded_match = true;
|
encoded_match = true;
|
||||||
}
|
}
|
||||||
@@ -55,11 +53,11 @@ pub fn pack(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !encoded_match {
|
if !encoded_match {
|
||||||
lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state, config);
|
lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state);
|
||||||
pos += 1;
|
pos += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lz::encode_eof(&mut rans_coder, &mut state, config);
|
lz::encode_eof(&mut rans_coder, &mut state);
|
||||||
rans_coder.finish()
|
rans_coder.finish()
|
||||||
}
|
}
|
||||||
|
|||||||
70
src/lib.rs
70
src/lib.rs
@@ -5,81 +5,19 @@ mod match_finder;
|
|||||||
mod parsing_packer;
|
mod parsing_packer;
|
||||||
mod rans;
|
mod rans;
|
||||||
|
|
||||||
pub use lz::{calculate_margin, unpack, UnpackError};
|
pub use lz::unpack;
|
||||||
|
|
||||||
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
|
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Config {
|
|
||||||
pub use_bitstream: bool,
|
|
||||||
pub parity_contexts: usize,
|
|
||||||
|
|
||||||
pub invert_bit_encoding: bool,
|
|
||||||
pub is_match_bit: bool,
|
|
||||||
pub new_offset_bit: bool,
|
|
||||||
pub continue_value_bit: bool,
|
|
||||||
|
|
||||||
pub bitstream_is_big_endian: bool,
|
|
||||||
pub simplified_prob_update: bool,
|
|
||||||
|
|
||||||
pub no_repeated_offsets: bool,
|
|
||||||
pub eof_in_length: bool,
|
|
||||||
|
|
||||||
pub max_offset: usize,
|
|
||||||
pub max_length: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for Config {
|
|
||||||
fn default() -> Config {
|
|
||||||
Config {
|
|
||||||
use_bitstream: false,
|
|
||||||
parity_contexts: 1,
|
|
||||||
|
|
||||||
invert_bit_encoding: false,
|
|
||||||
is_match_bit: true,
|
|
||||||
new_offset_bit: true,
|
|
||||||
continue_value_bit: true,
|
|
||||||
|
|
||||||
bitstream_is_big_endian: false,
|
|
||||||
simplified_prob_update: false,
|
|
||||||
|
|
||||||
no_repeated_offsets: false,
|
|
||||||
eof_in_length: false,
|
|
||||||
|
|
||||||
max_offset: usize::MAX,
|
|
||||||
max_length: usize::MAX,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Config {
|
|
||||||
pub fn min_length(&self) -> usize {
|
|
||||||
if self.eof_in_length {
|
|
||||||
2
|
|
||||||
} else {
|
|
||||||
1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn pack(
|
pub fn pack(
|
||||||
data: &[u8],
|
data: &[u8],
|
||||||
level: u8,
|
level: u8,
|
||||||
config: &Config,
|
use_bitstream: bool,
|
||||||
progress_callback: Option<ProgressCallback>,
|
progress_callback: Option<ProgressCallback>,
|
||||||
) -> Vec<u8> {
|
) -> Vec<u8> {
|
||||||
if level == 0 {
|
if level == 0 {
|
||||||
greedy_packer::pack(data, config, progress_callback)
|
greedy_packer::pack(data, use_bitstream, progress_callback)
|
||||||
} else {
|
} else {
|
||||||
parsing_packer::pack(data, level, config, progress_callback)
|
parsing_packer::pack(data, level, use_bitstream, progress_callback)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn compressed_size(mut data: &[u8]) -> f32 {
|
|
||||||
let mut state = 0;
|
|
||||||
while state < 4096 {
|
|
||||||
state = (state << 8) | data[0] as u32;
|
|
||||||
data = &data[1..];
|
|
||||||
}
|
|
||||||
data.len() as f32 + (state as f32).log2() / 8.
|
|
||||||
}
|
|
||||||
|
|||||||
204
src/lz.rs
204
src/lz.rs
@@ -1,7 +1,5 @@
|
|||||||
use crate::context_state::ContextState;
|
use crate::context_state::ContextState;
|
||||||
use crate::rans::{EntropyCoder, RansDecoder};
|
use crate::rans::{EntropyCoder, RansDecoder};
|
||||||
use crate::Config;
|
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug)]
|
#[derive(Copy, Clone, Debug)]
|
||||||
pub enum Op {
|
pub enum Op {
|
||||||
@@ -10,73 +8,45 @@ pub enum Op {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Op {
|
impl Op {
|
||||||
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
|
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState) {
|
||||||
let literal_base = state.pos % state.parity_contexts * 256;
|
let base_context = 256 * (state.pos & 3);
|
||||||
match self {
|
match self {
|
||||||
&Op::Literal(lit) => {
|
&Op::Literal(lit) => {
|
||||||
encode_bit(coder, state, literal_base, !config.is_match_bit);
|
encode_bit(coder, state, base_context, false);
|
||||||
let mut context_index = 1;
|
let mut context_index = 1;
|
||||||
for i in (0..8).rev() {
|
for i in (0..8).rev() {
|
||||||
let bit = (lit >> i) & 1 != 0;
|
let bit = (lit >> i) & 1 != 0;
|
||||||
encode_bit(coder, state, literal_base + context_index, bit);
|
encode_bit(coder, state, base_context + context_index, bit);
|
||||||
context_index = (context_index << 1) | bit as usize;
|
context_index = (context_index << 1) | bit as usize;
|
||||||
}
|
}
|
||||||
state.prev_was_match = false;
|
|
||||||
state.pos += 1;
|
state.pos += 1;
|
||||||
|
state.prev_was_match = false;
|
||||||
}
|
}
|
||||||
&Op::Match { offset, len } => {
|
&Op::Match { offset, len } => {
|
||||||
encode_bit(coder, state, literal_base, config.is_match_bit);
|
encode_bit(coder, state, base_context, true);
|
||||||
let mut new_offset = true;
|
if !state.prev_was_match {
|
||||||
if !state.prev_was_match && !config.no_repeated_offsets {
|
encode_bit(coder, state, 1024, offset != state.last_offset);
|
||||||
new_offset = offset != state.last_offset;
|
} else {
|
||||||
encode_bit(
|
assert!(offset != state.last_offset);
|
||||||
coder,
|
|
||||||
state,
|
|
||||||
256 * state.parity_contexts,
|
|
||||||
new_offset == config.new_offset_bit,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
assert!(offset as usize <= config.max_offset);
|
if offset != state.last_offset {
|
||||||
if new_offset {
|
encode_length(coder, state, 1025, offset + 1);
|
||||||
encode_length(
|
|
||||||
coder,
|
|
||||||
state,
|
|
||||||
256 * state.parity_contexts + 1,
|
|
||||||
offset + if config.eof_in_length { 0 } else { 1 },
|
|
||||||
config,
|
|
||||||
);
|
|
||||||
state.last_offset = offset;
|
state.last_offset = offset;
|
||||||
}
|
}
|
||||||
assert!(len as usize >= config.min_length() && len as usize <= config.max_length);
|
encode_length(coder, state, 1025 + 64, len);
|
||||||
encode_length(coder, state, 256 * state.parity_contexts + 65, len, config);
|
|
||||||
state.prev_was_match = true;
|
|
||||||
state.pos += len as usize;
|
state.pos += len as usize;
|
||||||
|
state.prev_was_match = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
|
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState) {
|
||||||
encode_bit(
|
encode_bit(coder, state, 256 * (state.pos & 3), true);
|
||||||
coder,
|
if !state.prev_was_match {
|
||||||
state,
|
encode_bit(coder, state, 1024, true);
|
||||||
state.pos % state.parity_contexts * 256,
|
|
||||||
config.is_match_bit,
|
|
||||||
);
|
|
||||||
if !state.prev_was_match && !config.no_repeated_offsets {
|
|
||||||
encode_bit(
|
|
||||||
coder,
|
|
||||||
state,
|
|
||||||
256 * state.parity_contexts,
|
|
||||||
config.new_offset_bit ^ config.eof_in_length,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if !config.eof_in_length || state.prev_was_match || config.no_repeated_offsets {
|
|
||||||
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
|
|
||||||
}
|
|
||||||
if config.eof_in_length {
|
|
||||||
encode_length(coder, state, 256 * state.parity_contexts + 65, 1, config);
|
|
||||||
}
|
}
|
||||||
|
encode_length(coder, state, 1025, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode_bit(
|
fn encode_bit(
|
||||||
@@ -93,37 +63,34 @@ fn encode_length(
|
|||||||
state: &mut CoderState,
|
state: &mut CoderState,
|
||||||
context_start: usize,
|
context_start: usize,
|
||||||
mut value: u32,
|
mut value: u32,
|
||||||
config: &Config,
|
|
||||||
) {
|
) {
|
||||||
assert!(value >= 1);
|
assert!(value >= 1);
|
||||||
|
|
||||||
let mut context_index = context_start;
|
let mut context_index = context_start;
|
||||||
while value >= 2 {
|
while value >= 2 {
|
||||||
encode_bit(coder, state, context_index, config.continue_value_bit);
|
encode_bit(coder, state, context_index, true);
|
||||||
encode_bit(coder, state, context_index + 1, value & 1 != 0);
|
encode_bit(coder, state, context_index + 1, value & 1 != 0);
|
||||||
context_index += 2;
|
context_index += 2;
|
||||||
value >>= 1;
|
value >>= 1;
|
||||||
}
|
}
|
||||||
encode_bit(coder, state, context_index, !config.continue_value_bit);
|
encode_bit(coder, state, context_index, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct CoderState {
|
pub struct CoderState {
|
||||||
contexts: ContextState,
|
contexts: ContextState,
|
||||||
last_offset: u32,
|
last_offset: u32,
|
||||||
prev_was_match: bool,
|
|
||||||
pos: usize,
|
pos: usize,
|
||||||
parity_contexts: usize,
|
prev_was_match: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CoderState {
|
impl CoderState {
|
||||||
pub fn new(config: &Config) -> CoderState {
|
pub fn new() -> CoderState {
|
||||||
CoderState {
|
CoderState {
|
||||||
contexts: ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, config),
|
contexts: ContextState::new((1 + 255) * 4 + 1 + 64 + 64),
|
||||||
last_offset: 0,
|
last_offset: 0,
|
||||||
prev_was_match: false,
|
|
||||||
pos: 0,
|
pos: 0,
|
||||||
parity_contexts: config.parity_contexts,
|
prev_was_match: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -132,141 +99,56 @@ impl CoderState {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Error, Debug)]
|
pub fn unpack(packed_data: &[u8], use_bitstream: bool) -> Vec<u8> {
|
||||||
pub enum UnpackError {
|
let mut decoder = RansDecoder::new(packed_data, use_bitstream);
|
||||||
#[error("match offset out of range: {offset} > {position}")]
|
let mut contexts = ContextState::new((1 + 255) * 4 + 1 + 64 + 64);
|
||||||
OffsetOutOfRange { offset: usize, position: usize },
|
|
||||||
#[error("Unpacked data over size limit: {size} > {limit}")]
|
|
||||||
OverSize { size: usize, limit: usize },
|
|
||||||
#[error("Unexpected end of input data")]
|
|
||||||
UnexpectedEOF {
|
|
||||||
#[from]
|
|
||||||
source: crate::rans::UnexpectedEOF,
|
|
||||||
},
|
|
||||||
#[error("Overflow while reading value")]
|
|
||||||
ValueOverflow,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn unpack(
|
|
||||||
packed_data: &[u8],
|
|
||||||
config: &Config,
|
|
||||||
max_size: usize,
|
|
||||||
) -> Result<Vec<u8>, UnpackError> {
|
|
||||||
let mut result = vec![];
|
let mut result = vec![];
|
||||||
let _ = unpack_internal(Some(&mut result), packed_data, config, max_size)?;
|
let mut offset = 0;
|
||||||
Ok(result)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn calculate_margin(packed_data: &[u8], config: &Config) -> Result<isize, UnpackError> {
|
|
||||||
unpack_internal(None, packed_data, config, usize::MAX)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn unpack_internal(
|
|
||||||
mut result: Option<&mut Vec<u8>>,
|
|
||||||
packed_data: &[u8],
|
|
||||||
config: &Config,
|
|
||||||
max_size: usize,
|
|
||||||
) -> Result<isize, UnpackError> {
|
|
||||||
let mut decoder = RansDecoder::new(packed_data, &config);
|
|
||||||
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
|
|
||||||
let mut offset = usize::MAX;
|
|
||||||
let mut position = 0usize;
|
|
||||||
let mut prev_was_match = false;
|
let mut prev_was_match = false;
|
||||||
let mut margin = 0isize;
|
|
||||||
|
|
||||||
fn decode_length(
|
fn decode_length(
|
||||||
decoder: &mut RansDecoder,
|
decoder: &mut RansDecoder,
|
||||||
contexts: &mut ContextState,
|
contexts: &mut ContextState,
|
||||||
mut context_index: usize,
|
mut context_index: usize,
|
||||||
config: &Config,
|
) -> usize {
|
||||||
) -> Result<usize, UnpackError> {
|
|
||||||
let mut length = 0;
|
let mut length = 0;
|
||||||
let mut bit_pos = 0;
|
let mut bit_pos = 0;
|
||||||
while decoder.decode_with_context(&mut contexts.context_mut(context_index))?
|
while decoder.decode_with_context(&mut contexts.context_mut(context_index)) {
|
||||||
== config.continue_value_bit
|
length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1))
|
||||||
{
|
|
||||||
length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1))?
|
|
||||||
as usize)
|
as usize)
|
||||||
<< bit_pos;
|
<< bit_pos;
|
||||||
bit_pos += 1;
|
bit_pos += 1;
|
||||||
if bit_pos >= 32 {
|
|
||||||
return Err(UnpackError::ValueOverflow);
|
|
||||||
}
|
|
||||||
context_index += 2;
|
context_index += 2;
|
||||||
}
|
}
|
||||||
Ok(length | (1 << bit_pos))
|
length | (1 << bit_pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
margin = margin.max(position as isize - decoder.pos() as isize);
|
let base_context = 256 * (result.len() & 3);
|
||||||
let literal_base = position % config.parity_contexts * 256;
|
if decoder.decode_with_context(&mut contexts.context_mut(base_context)) {
|
||||||
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))?
|
if prev_was_match || decoder.decode_with_context(&mut contexts.context_mut(1024)) {
|
||||||
== config.is_match_bit
|
offset = decode_length(&mut decoder, &mut contexts, 1025) - 1;
|
||||||
{
|
|
||||||
if config.no_repeated_offsets
|
|
||||||
|| prev_was_match
|
|
||||||
|| decoder
|
|
||||||
.decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))?
|
|
||||||
== config.new_offset_bit
|
|
||||||
{
|
|
||||||
offset = decode_length(
|
|
||||||
&mut decoder,
|
|
||||||
&mut contexts,
|
|
||||||
256 * config.parity_contexts + 1,
|
|
||||||
&config,
|
|
||||||
)? - if config.eof_in_length { 0 } else { 1 };
|
|
||||||
if offset == 0 {
|
if offset == 0 {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let length = decode_length(
|
let length = decode_length(&mut decoder, &mut contexts, 1025 + 64);
|
||||||
&mut decoder,
|
for _ in 0..length {
|
||||||
&mut contexts,
|
result.push(result[result.len() - offset]);
|
||||||
256 * config.parity_contexts + 65,
|
|
||||||
&config,
|
|
||||||
)?;
|
|
||||||
if config.eof_in_length && length == 1 {
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
if offset > position {
|
|
||||||
return Err(UnpackError::OffsetOutOfRange { offset, position });
|
|
||||||
}
|
|
||||||
if let Some(ref mut result) = result {
|
|
||||||
for _ in 0..length {
|
|
||||||
if result.len() < max_size {
|
|
||||||
result.push(result[result.len() - offset]);
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
position += length;
|
|
||||||
prev_was_match = true;
|
prev_was_match = true;
|
||||||
} else {
|
} else {
|
||||||
let mut context_index = 1;
|
let mut context_index = 1;
|
||||||
let mut byte = 0;
|
let mut byte = 0;
|
||||||
for i in (0..8).rev() {
|
for i in (0..8).rev() {
|
||||||
let bit = decoder
|
let bit = decoder.decode_with_context(&mut contexts.context_mut(base_context + context_index));
|
||||||
.decode_with_context(&mut contexts.context_mut(literal_base + context_index))?;
|
|
||||||
context_index = (context_index << 1) | bit as usize;
|
context_index = (context_index << 1) | bit as usize;
|
||||||
byte |= (bit as u8) << i;
|
byte |= (bit as u8) << i;
|
||||||
}
|
}
|
||||||
if let Some(ref mut result) = result {
|
result.push(byte);
|
||||||
if result.len() < max_size {
|
|
||||||
result.push(byte);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
position += 1;
|
|
||||||
prev_was_match = false;
|
prev_was_match = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if position > max_size {
|
result
|
||||||
return Err(UnpackError::OverSize {
|
|
||||||
size: position,
|
|
||||||
limit: max_size,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(margin + decoder.pos() as isize - position as isize)
|
|
||||||
}
|
}
|
||||||
|
|||||||
201
src/main.rs
201
src/main.rs
@@ -1,175 +1,64 @@
|
|||||||
use anyhow::Result;
|
use anyhow::{bail, Result};
|
||||||
use std::ffi::OsStr;
|
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
use std::process;
|
|
||||||
use std::{fs::File, path::PathBuf};
|
use std::{fs::File, path::PathBuf};
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
let mut config = upkr::Config::default();
|
let mut args = pico_args::Arguments::from_env();
|
||||||
let mut reverse = false;
|
|
||||||
let mut unpack = false;
|
|
||||||
let mut calculate_margin = false;
|
|
||||||
let mut level = 2;
|
|
||||||
let mut infile: Option<PathBuf> = None;
|
|
||||||
let mut outfile: Option<PathBuf> = None;
|
|
||||||
let mut max_unpacked_size = 512 * 1024 * 1024;
|
|
||||||
|
|
||||||
let mut parser = lexopt::Parser::from_env();
|
match args.subcommand()?.as_ref().map(|s| s.as_str()) {
|
||||||
while let Some(arg) = parser.next()? {
|
None => print_help(),
|
||||||
use lexopt::prelude::*;
|
Some("pack") => {
|
||||||
match arg {
|
let level = args.opt_value_from_str(["-l", "--level"])?.unwrap_or(2u8);
|
||||||
Short('b') | Long("bitstream") => config.use_bitstream = true,
|
let use_bitstream = args.contains(["-b", "--bitstream"]);
|
||||||
Short('p') | Long("parity") => config.parity_contexts = parser.value()?.parse()?,
|
|
||||||
Short('r') | Long("reverse") => reverse = true,
|
|
||||||
Long("invert-is-match-bit") => config.is_match_bit = false,
|
|
||||||
Long("invert-new-offset-bit") => config.new_offset_bit = false,
|
|
||||||
Long("invert-continue-value-bit") => config.continue_value_bit = false,
|
|
||||||
Long("invert-bit-encoding") => config.invert_bit_encoding = true,
|
|
||||||
Long("simplified-prob-update") => config.simplified_prob_update = true,
|
|
||||||
Long("big-endian-bitstream") => {
|
|
||||||
config.use_bitstream = true;
|
|
||||||
config.bitstream_is_big_endian = true;
|
|
||||||
}
|
|
||||||
Long("no-repeated-offsets") => config.no_repeated_offsets = true,
|
|
||||||
Long("eof-in-length") => config.eof_in_length = true,
|
|
||||||
|
|
||||||
Long("max-offset") => config.max_offset = parser.value()?.parse()?,
|
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||||
Long("max-length") => config.max_length = parser.value()?.parse()?,
|
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||||
|
|
||||||
Long("z80") => {
|
let mut data = vec![];
|
||||||
config.use_bitstream = true;
|
File::open(infile)?.read_to_end(&mut data)?;
|
||||||
config.bitstream_is_big_endian = true;
|
|
||||||
config.invert_bit_encoding = true;
|
let mut pb = pbr::ProgressBar::new(data.len() as u64);
|
||||||
config.simplified_prob_update = true;
|
pb.set_units(pbr::Units::Bytes);
|
||||||
level = 9;
|
let packed_data = upkr::pack(
|
||||||
}
|
&data,
|
||||||
Long("x86") => {
|
level,
|
||||||
config.use_bitstream = true;
|
use_bitstream,
|
||||||
config.continue_value_bit = false;
|
Some(&mut |pos| {
|
||||||
config.is_match_bit = false;
|
pb.set(pos as u64);
|
||||||
config.new_offset_bit = false;
|
}),
|
||||||
}
|
);
|
||||||
|
pb.finish();
|
||||||
|
|
||||||
Short('u') | Long("unpack") => unpack = true,
|
println!(
|
||||||
Long("margin") => calculate_margin = true,
|
"Compressed {} bytes to {} bytes ({}%)",
|
||||||
Short('l') | Long("level") => level = parser.value()?.parse()?,
|
data.len(),
|
||||||
Short(n) if n.is_ascii_digit() => level = n as u8 - b'0',
|
packed_data.len(),
|
||||||
Short('h') | Long("help") => print_help(0),
|
packed_data.len() as f32 * 100. / data.len() as f32
|
||||||
Long("max-unpacked-size") => max_unpacked_size = parser.value()?.parse()?,
|
);
|
||||||
Value(val) if infile.is_none() => infile = Some(val.try_into()?),
|
File::create(outfile)?.write_all(&packed_data)?;
|
||||||
Value(val) if outfile.is_none() => outfile = Some(val.try_into()?),
|
|
||||||
_ => return Err(arg.unexpected().into()),
|
|
||||||
}
|
}
|
||||||
}
|
Some("unpack") => {
|
||||||
|
let use_bitstream = args.contains(["-b", "--bitstream"]);
|
||||||
|
|
||||||
let infile = infile.unwrap_or_else(|| print_help(1));
|
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||||
let outfile = outfile.unwrap_or_else(|| {
|
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||||
let mut name = infile.clone();
|
|
||||||
if unpack {
|
let mut data = vec![];
|
||||||
if name.extension().filter(|&e| e == "upk").is_some() {
|
File::open(infile)?.read_to_end(&mut data)?;
|
||||||
name.set_extension("");
|
let packed_data = upkr::unpack(&data, use_bitstream);
|
||||||
} else {
|
File::create(outfile)?.write_all(&packed_data)?;
|
||||||
name.set_extension("bin");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let mut filename = name
|
|
||||||
.file_name()
|
|
||||||
.unwrap_or_else(|| OsStr::new(""))
|
|
||||||
.to_os_string();
|
|
||||||
filename.push(".upk");
|
|
||||||
name.set_file_name(filename);
|
|
||||||
}
|
}
|
||||||
name
|
Some(other) => {
|
||||||
});
|
bail!("Unknown subcommand '{}'", other);
|
||||||
|
|
||||||
if config.parity_contexts != 1 && config.parity_contexts != 2 && config.parity_contexts != 4 {
|
|
||||||
eprintln!("--parity has to be 1, 2, or 4");
|
|
||||||
process::exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if !unpack && !calculate_margin {
|
|
||||||
let mut data = vec![];
|
|
||||||
File::open(infile)?.read_to_end(&mut data)?;
|
|
||||||
if reverse {
|
|
||||||
data.reverse();
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut pb = pbr::ProgressBar::new(data.len() as u64);
|
|
||||||
pb.set_units(pbr::Units::Bytes);
|
|
||||||
let mut packed_data = upkr::pack(
|
|
||||||
&data,
|
|
||||||
level,
|
|
||||||
&config,
|
|
||||||
Some(&mut |pos| {
|
|
||||||
pb.set(pos as u64);
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
pb.finish();
|
|
||||||
|
|
||||||
if reverse {
|
|
||||||
packed_data.reverse();
|
|
||||||
}
|
|
||||||
|
|
||||||
println!(
|
|
||||||
"Compressed {} bytes to {} bytes ({}%)",
|
|
||||||
data.len(),
|
|
||||||
packed_data.len(),
|
|
||||||
packed_data.len() as f32 * 100. / data.len() as f32
|
|
||||||
);
|
|
||||||
File::create(outfile)?.write_all(&packed_data)?;
|
|
||||||
} else {
|
|
||||||
let mut data = vec![];
|
|
||||||
File::open(infile)?.read_to_end(&mut data)?;
|
|
||||||
if reverse {
|
|
||||||
data.reverse();
|
|
||||||
}
|
|
||||||
if unpack {
|
|
||||||
let mut unpacked_data = upkr::unpack(&data, &config, max_unpacked_size)?;
|
|
||||||
if reverse {
|
|
||||||
unpacked_data.reverse();
|
|
||||||
}
|
|
||||||
File::create(outfile)?.write_all(&unpacked_data)?;
|
|
||||||
}
|
|
||||||
if calculate_margin {
|
|
||||||
println!("{}", upkr::calculate_margin(&data, &config)?);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn print_help(exit_code: i32) -> ! {
|
fn print_help() {
|
||||||
eprintln!("Usage:");
|
eprintln!("Usage:");
|
||||||
eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]");
|
eprintln!(" upkr pack [-l level(0-9)] <infile> <outfile>");
|
||||||
eprintln!(" upkr -u [config options] <infile> [<outfile>]");
|
eprintln!(" upkr unpack <infile> <outfile>");
|
||||||
eprintln!(" upkr --margin [config options] <infile>");
|
std::process::exit(1);
|
||||||
eprintln!();
|
|
||||||
eprintln!(" -l, --level N compression level 0-9");
|
|
||||||
eprintln!(" -0, ..., -9 short form for setting compression level");
|
|
||||||
eprintln!(" -u, --unpack unpack infile");
|
|
||||||
eprintln!(" --margin calculate margin for overlapped unpacking of a packed file");
|
|
||||||
eprintln!();
|
|
||||||
eprintln!("Config presets for specific unpackers:");
|
|
||||||
eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update -9");
|
|
||||||
eprintln!(
|
|
||||||
" --x86 --bitstream --invert-is-match-bit --invert-continue-value-bit --invert-new-offset-bit"
|
|
||||||
);
|
|
||||||
eprintln!();
|
|
||||||
eprintln!("Config options (need to match when packing/unpacking):");
|
|
||||||
eprintln!(" -b, --bitstream bitstream mode");
|
|
||||||
eprintln!(" -p, --parity N use N (2/4) parity contexts");
|
|
||||||
eprintln!(" -r, --reverse reverse input & output");
|
|
||||||
eprintln!();
|
|
||||||
eprintln!("Config options to tailor output to specific optimized unpackers:");
|
|
||||||
eprintln!(" --invert-is-match-bit");
|
|
||||||
eprintln!(" --invert-new-offset-bit");
|
|
||||||
eprintln!(" --invert-continue-value-bit");
|
|
||||||
eprintln!(" --invert-bit-encoding");
|
|
||||||
eprintln!(" --simplified-prob-update");
|
|
||||||
eprintln!(" --big-endian-bitstream (implies --bitstream)");
|
|
||||||
eprintln!(" --no-repeated-offsets");
|
|
||||||
eprintln!(" --eof-in-length");
|
|
||||||
eprintln!(" --max-offset N");
|
|
||||||
eprintln!(" --max-length N");
|
|
||||||
process::exit(exit_code);
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,24 +6,19 @@ use crate::match_finder::MatchFinder;
|
|||||||
use crate::rans::{CostCounter, RansCoder};
|
use crate::rans::{CostCounter, RansCoder};
|
||||||
use crate::{lz, ProgressCallback};
|
use crate::{lz, ProgressCallback};
|
||||||
|
|
||||||
pub fn pack(
|
pub fn pack(data: &[u8], level: u8, use_bitstream: bool, progress_cb: Option<ProgressCallback>) -> Vec<u8> {
|
||||||
data: &[u8],
|
let mut parse = parse(data, Config::from_level(level), progress_cb);
|
||||||
level: u8,
|
|
||||||
config: &crate::Config,
|
|
||||||
progress_cb: Option<ProgressCallback>,
|
|
||||||
) -> Vec<u8> {
|
|
||||||
let mut parse = parse(data, Config::from_level(level), config, progress_cb);
|
|
||||||
let mut ops = vec![];
|
let mut ops = vec![];
|
||||||
while let Some(link) = parse {
|
while let Some(link) = parse {
|
||||||
ops.push(link.op);
|
ops.push(link.op);
|
||||||
parse = link.prev.clone();
|
parse = link.prev.clone();
|
||||||
}
|
}
|
||||||
let mut state = lz::CoderState::new(config);
|
let mut state = lz::CoderState::new();
|
||||||
let mut coder = RansCoder::new(config);
|
let mut coder = RansCoder::new(use_bitstream);
|
||||||
for op in ops.into_iter().rev() {
|
for op in ops.into_iter().rev() {
|
||||||
op.encode(&mut coder, &mut state, config);
|
op.encode(&mut coder, &mut state);
|
||||||
}
|
}
|
||||||
lz::encode_eof(&mut coder, &mut state, config);
|
lz::encode_eof(&mut coder, &mut state);
|
||||||
coder.finish()
|
coder.finish()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -43,7 +38,6 @@ type Arrivals = HashMap<usize, Vec<Arrival>>;
|
|||||||
fn parse(
|
fn parse(
|
||||||
data: &[u8],
|
data: &[u8],
|
||||||
config: Config,
|
config: Config,
|
||||||
encoding_config: &crate::Config,
|
|
||||||
mut progress_cb: Option<ProgressCallback>,
|
mut progress_cb: Option<ProgressCallback>,
|
||||||
) -> Option<Rc<Parse>> {
|
) -> Option<Rc<Parse>> {
|
||||||
let mut match_finder = MatchFinder::new(data)
|
let mut match_finder = MatchFinder::new(data)
|
||||||
@@ -105,22 +99,17 @@ fn parse(
|
|||||||
cost_counter: &mut CostCounter,
|
cost_counter: &mut CostCounter,
|
||||||
pos: usize,
|
pos: usize,
|
||||||
offset: usize,
|
offset: usize,
|
||||||
mut length: usize,
|
length: usize,
|
||||||
arrival: &Arrival,
|
arrival: &Arrival,
|
||||||
max_arrivals: usize,
|
max_arrivals: usize,
|
||||||
config: &crate::Config,
|
|
||||||
) {
|
) {
|
||||||
if length < config.min_length() {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
length = length.min(config.max_length);
|
|
||||||
cost_counter.reset();
|
cost_counter.reset();
|
||||||
let mut state = arrival.state.clone();
|
let mut state = arrival.state.clone();
|
||||||
let op = lz::Op::Match {
|
let op = lz::Op::Match {
|
||||||
offset: offset as u32,
|
offset: offset as u32,
|
||||||
len: length as u32,
|
len: length as u32,
|
||||||
};
|
};
|
||||||
op.encode(cost_counter, &mut state, config);
|
op.encode(cost_counter, &mut state);
|
||||||
add_arrival(
|
add_arrival(
|
||||||
arrivals,
|
arrivals,
|
||||||
pos + length,
|
pos + length,
|
||||||
@@ -140,13 +129,13 @@ fn parse(
|
|||||||
0,
|
0,
|
||||||
Arrival {
|
Arrival {
|
||||||
parse: None,
|
parse: None,
|
||||||
state: lz::CoderState::new(encoding_config),
|
state: lz::CoderState::new(),
|
||||||
cost: 0.0,
|
cost: 0.0,
|
||||||
},
|
},
|
||||||
max_arrivals,
|
max_arrivals,
|
||||||
);
|
);
|
||||||
|
|
||||||
let cost_counter = &mut CostCounter::new(encoding_config);
|
let cost_counter = &mut CostCounter::new();
|
||||||
let mut best_per_offset = HashMap::new();
|
let mut best_per_offset = HashMap::new();
|
||||||
for pos in 0..data.len() {
|
for pos in 0..data.len() {
|
||||||
let match_length = |offset: usize| {
|
let match_length = |offset: usize| {
|
||||||
@@ -187,21 +176,18 @@ fn parse(
|
|||||||
for m in match_finder.matches(pos) {
|
for m in match_finder.matches(pos) {
|
||||||
closest_match = Some(closest_match.unwrap_or(0).max(m.pos));
|
closest_match = Some(closest_match.unwrap_or(0).max(m.pos));
|
||||||
let offset = pos - m.pos;
|
let offset = pos - m.pos;
|
||||||
if offset <= encoding_config.max_offset {
|
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
||||||
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
add_match(
|
||||||
add_match(
|
&mut arrivals,
|
||||||
&mut arrivals,
|
cost_counter,
|
||||||
cost_counter,
|
pos,
|
||||||
pos,
|
offset,
|
||||||
offset,
|
m.length,
|
||||||
m.length,
|
&arrival,
|
||||||
&arrival,
|
max_arrivals,
|
||||||
max_arrivals,
|
);
|
||||||
encoding_config,
|
if m.length >= config.greedy_size {
|
||||||
);
|
break 'arrival_loop;
|
||||||
if m.length >= config.greedy_size {
|
|
||||||
break 'arrival_loop;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -212,9 +198,6 @@ fn parse(
|
|||||||
&& closest_match.iter().all(|p| *p < match_pos)
|
&& closest_match.iter().all(|p| *p < match_pos)
|
||||||
{
|
{
|
||||||
let offset = pos - match_pos;
|
let offset = pos - match_pos;
|
||||||
if offset > encoding_config.max_offset {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
let length = match_length(offset);
|
let length = match_length(offset);
|
||||||
assert!(length > 0);
|
assert!(length > 0);
|
||||||
add_match(
|
add_match(
|
||||||
@@ -225,7 +208,6 @@ fn parse(
|
|||||||
length,
|
length,
|
||||||
&arrival,
|
&arrival,
|
||||||
max_arrivals,
|
max_arrivals,
|
||||||
encoding_config,
|
|
||||||
);
|
);
|
||||||
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
||||||
if offset < near_matches.len() {
|
if offset < near_matches.len() {
|
||||||
@@ -246,7 +228,6 @@ fn parse(
|
|||||||
length,
|
length,
|
||||||
&arrival,
|
&arrival,
|
||||||
max_arrivals,
|
max_arrivals,
|
||||||
encoding_config,
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -254,7 +235,7 @@ fn parse(
|
|||||||
cost_counter.reset();
|
cost_counter.reset();
|
||||||
let mut state = arrival.state;
|
let mut state = arrival.state;
|
||||||
let op = lz::Op::Literal(data[pos]);
|
let op = lz::Op::Literal(data[pos]);
|
||||||
op.encode(cost_counter, &mut state, encoding_config);
|
op.encode(cost_counter, &mut state);
|
||||||
add_arrival(
|
add_arrival(
|
||||||
&mut arrivals,
|
&mut arrivals,
|
||||||
pos + 1,
|
pos + 1,
|
||||||
|
|||||||
106
src/rans.rs
106
src/rans.rs
@@ -1,5 +1,4 @@
|
|||||||
use crate::{context_state::Context, Config};
|
use crate::context_state::Context;
|
||||||
use thiserror::Error;
|
|
||||||
|
|
||||||
pub const PROB_BITS: u32 = 8;
|
pub const PROB_BITS: u32 = 8;
|
||||||
pub const ONE_PROB: u32 = 1 << PROB_BITS;
|
pub const ONE_PROB: u32 = 1 << PROB_BITS;
|
||||||
@@ -16,25 +15,20 @@ pub trait EntropyCoder {
|
|||||||
pub struct RansCoder {
|
pub struct RansCoder {
|
||||||
bits: Vec<u16>,
|
bits: Vec<u16>,
|
||||||
use_bitstream: bool,
|
use_bitstream: bool,
|
||||||
bitstream_is_big_endian: bool,
|
|
||||||
invert_bit_encoding: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EntropyCoder for RansCoder {
|
impl EntropyCoder for RansCoder {
|
||||||
fn encode_bit(&mut self, bit: bool, prob: u16) {
|
fn encode_bit(&mut self, bit: bool, prob: u16) {
|
||||||
assert!(prob < 32768);
|
assert!(prob < 32768);
|
||||||
self.bits
|
self.bits.push(prob | ((bit as u16) << 15));
|
||||||
.push(prob | (((bit ^ self.invert_bit_encoding) as u16) << 15));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RansCoder {
|
impl RansCoder {
|
||||||
pub fn new(config: &Config) -> RansCoder {
|
pub fn new(use_bitstream: bool) -> RansCoder {
|
||||||
RansCoder {
|
RansCoder {
|
||||||
bits: Vec::new(),
|
bits: Vec::new(),
|
||||||
use_bitstream: config.use_bitstream,
|
use_bitstream,
|
||||||
bitstream_is_big_endian: config.bitstream_is_big_endian,
|
|
||||||
invert_bit_encoding: config.invert_bit_encoding,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -44,31 +38,18 @@ impl RansCoder {
|
|||||||
let mut state = 1 << l_bits;
|
let mut state = 1 << l_bits;
|
||||||
|
|
||||||
let mut byte = 0u8;
|
let mut byte = 0u8;
|
||||||
let mut bit = if self.bitstream_is_big_endian { 0 } else { 8 };
|
let mut bit = 8;
|
||||||
let mut flush_state: Box<dyn FnMut(&mut u32)> = if self.use_bitstream {
|
let mut flush_state: Box<dyn FnMut(&mut u32)> = if self.use_bitstream {
|
||||||
if self.bitstream_is_big_endian {
|
Box::new(|state: &mut u32| {
|
||||||
Box::new(|state: &mut u32| {
|
bit -= 1;
|
||||||
byte |= ((*state & 1) as u8) << bit;
|
byte |= ((*state & 1) as u8) << bit;
|
||||||
bit += 1;
|
if bit == 0 {
|
||||||
if bit == 8 {
|
buffer.push(byte);
|
||||||
buffer.push(byte);
|
byte = 0;
|
||||||
byte = 0;
|
bit = 8;
|
||||||
bit = 0;
|
}
|
||||||
}
|
*state >>= 1;
|
||||||
*state >>= 1;
|
})
|
||||||
})
|
|
||||||
} else {
|
|
||||||
Box::new(|state: &mut u32| {
|
|
||||||
bit -= 1;
|
|
||||||
byte |= ((*state & 1) as u8) << bit;
|
|
||||||
if bit == 0 {
|
|
||||||
buffer.push(byte);
|
|
||||||
byte = 0;
|
|
||||||
bit = 8;
|
|
||||||
}
|
|
||||||
*state >>= 1;
|
|
||||||
})
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
Box::new(|state: &mut u32| {
|
Box::new(|state: &mut u32| {
|
||||||
buffer.push(*state as u8);
|
buffer.push(*state as u8);
|
||||||
@@ -110,11 +91,10 @@ impl RansCoder {
|
|||||||
pub struct CostCounter {
|
pub struct CostCounter {
|
||||||
cost: f64,
|
cost: f64,
|
||||||
log2_table: Vec<f64>,
|
log2_table: Vec<f64>,
|
||||||
invert_bit_encoding: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CostCounter {
|
impl CostCounter {
|
||||||
pub fn new(config: &Config) -> CostCounter {
|
pub fn new() -> CostCounter {
|
||||||
let log2_table = (0..ONE_PROB)
|
let log2_table = (0..ONE_PROB)
|
||||||
.map(|prob| {
|
.map(|prob| {
|
||||||
let inv_prob = ONE_PROB as f64 / prob as f64;
|
let inv_prob = ONE_PROB as f64 / prob as f64;
|
||||||
@@ -124,7 +104,6 @@ impl CostCounter {
|
|||||||
CostCounter {
|
CostCounter {
|
||||||
cost: 0.0,
|
cost: 0.0,
|
||||||
log2_table,
|
log2_table,
|
||||||
invert_bit_encoding: config.invert_bit_encoding,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -139,7 +118,7 @@ impl CostCounter {
|
|||||||
|
|
||||||
impl EntropyCoder for CostCounter {
|
impl EntropyCoder for CostCounter {
|
||||||
fn encode_bit(&mut self, bit: bool, prob: u16) {
|
fn encode_bit(&mut self, bit: bool, prob: u16) {
|
||||||
let prob = if bit ^ self.invert_bit_encoding {
|
let prob = if bit {
|
||||||
prob as u32
|
prob as u32
|
||||||
} else {
|
} else {
|
||||||
ONE_PROB - prob as u32
|
ONE_PROB - prob as u32
|
||||||
@@ -150,73 +129,48 @@ impl EntropyCoder for CostCounter {
|
|||||||
|
|
||||||
pub struct RansDecoder<'a> {
|
pub struct RansDecoder<'a> {
|
||||||
data: &'a [u8],
|
data: &'a [u8],
|
||||||
pos: usize,
|
|
||||||
state: u32,
|
state: u32,
|
||||||
use_bitstream: bool,
|
use_bitstream: bool,
|
||||||
byte: u8,
|
byte: u8,
|
||||||
bits_left: u8,
|
bits_left: u8,
|
||||||
invert_bit_encoding: bool,
|
|
||||||
bitstream_is_big_endian: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const PROB_MASK: u32 = ONE_PROB - 1;
|
const PROB_MASK: u32 = ONE_PROB - 1;
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
|
||||||
#[error("Unexpected end of input")]
|
|
||||||
pub struct UnexpectedEOF;
|
|
||||||
|
|
||||||
impl<'a> RansDecoder<'a> {
|
impl<'a> RansDecoder<'a> {
|
||||||
pub fn new(data: &'a [u8], config: &Config) -> RansDecoder<'a> {
|
pub fn new(data: &'a [u8], use_bitstream: bool) -> RansDecoder<'a> {
|
||||||
RansDecoder {
|
RansDecoder {
|
||||||
data,
|
data,
|
||||||
pos: 0,
|
|
||||||
state: 0,
|
state: 0,
|
||||||
use_bitstream: config.use_bitstream,
|
use_bitstream,
|
||||||
byte: 0,
|
byte: 0,
|
||||||
bits_left: 0,
|
bits_left: 0,
|
||||||
invert_bit_encoding: config.invert_bit_encoding,
|
|
||||||
bitstream_is_big_endian: config.bitstream_is_big_endian,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn pos(&self) -> usize {
|
pub fn decode_with_context(&mut self, context: &mut Context) -> bool {
|
||||||
self.pos
|
let bit = self.decode_bit(context.prob());
|
||||||
}
|
|
||||||
|
|
||||||
pub fn decode_with_context(&mut self, context: &mut Context) -> Result<bool, UnexpectedEOF> {
|
|
||||||
let bit = self.decode_bit(context.prob())?;
|
|
||||||
context.update(bit);
|
context.update(bit);
|
||||||
Ok(bit)
|
bit
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn decode_bit(&mut self, prob: u16) -> Result<bool, UnexpectedEOF> {
|
pub fn decode_bit(&mut self, prob: u16) -> bool {
|
||||||
let prob = prob as u32;
|
let prob = prob as u32;
|
||||||
if self.use_bitstream {
|
if self.use_bitstream {
|
||||||
while self.state < 32768 {
|
while self.state < 32768 {
|
||||||
if self.bits_left == 0 {
|
if self.bits_left == 0 {
|
||||||
if self.pos >= self.data.len() {
|
self.byte = self.data[0];
|
||||||
return Err(UnexpectedEOF);
|
self.data = &self.data[1..];
|
||||||
}
|
|
||||||
self.byte = self.data[self.pos];
|
|
||||||
self.pos += 1;
|
|
||||||
self.bits_left = 8;
|
self.bits_left = 8;
|
||||||
}
|
}
|
||||||
if self.bitstream_is_big_endian {
|
self.state = (self.state << 1) | (self.byte & 1) as u32;
|
||||||
self.state = (self.state << 1) | (self.byte >> 7) as u32;
|
self.byte >>= 1;
|
||||||
self.byte <<= 1;
|
|
||||||
} else {
|
|
||||||
self.state = (self.state << 1) | (self.byte & 1) as u32;
|
|
||||||
self.byte >>= 1;
|
|
||||||
}
|
|
||||||
self.bits_left -= 1;
|
self.bits_left -= 1;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
while self.state < 4096 {
|
while self.state < 4096 {
|
||||||
if self.pos >= self.data.len() {
|
self.state = (self.state << 8) | self.data[0] as u32;
|
||||||
return Err(UnexpectedEOF);
|
self.data = &self.data[1..];
|
||||||
}
|
|
||||||
self.state = (self.state << 8) | self.data[self.pos] as u32;
|
|
||||||
self.pos += 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -229,6 +183,6 @@ impl<'a> RansDecoder<'a> {
|
|||||||
};
|
};
|
||||||
self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start;
|
self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start;
|
||||||
|
|
||||||
Ok(bit ^ self.invert_bit_encoding)
|
bit
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
3
z80_unpacker/.gitignore
vendored
3
z80_unpacker/.gitignore
vendored
@@ -1,3 +0,0 @@
|
|||||||
*.bin
|
|
||||||
*.tap
|
|
||||||
*.lst
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
all: unpack.bin example/example.sna
|
|
||||||
|
|
||||||
# binary is positioned from ORG 0, not usable, just assembling to verify the syntax
|
|
||||||
unpack.bin: unpack.asm
|
|
||||||
sjasmplus --msg=war --lst --lstlab=sort --raw=unpack.bin unpack.asm
|
|
||||||
|
|
||||||
example/example.sna: unpack.asm example/example.asm
|
|
||||||
cd example && sjasmplus --msg=war --lst --lstlab=sort example.asm
|
|
||||||
|
|
||||||
clean:
|
|
||||||
$(RM) unpack.bin unpack.lst example/example.sna example/example.lst
|
|
||||||
@@ -1,100 +0,0 @@
|
|||||||
;; Example using upkr depacker for screens slideshow
|
|
||||||
OPT --syntax=abf
|
|
||||||
DEVICE ZXSPECTRUM48,$8FFF
|
|
||||||
|
|
||||||
ORG $9000
|
|
||||||
;; forward example data
|
|
||||||
compressed_scr_files.fwd: ; border color byte + upkr-packed .scr file
|
|
||||||
DB 1
|
|
||||||
INCBIN "screens/Grongy - ZX Spectrum (2022).scr.upk"
|
|
||||||
DB 7
|
|
||||||
INCBIN "screens/Schafft - Poison (2017).scr.upk"
|
|
||||||
DB 0
|
|
||||||
INCBIN "screens/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk"
|
|
||||||
DB 6
|
|
||||||
INCBIN "screens/diver - Back to Bjork (2015).scr.upk"
|
|
||||||
.e:
|
|
||||||
;; backward example data (unpacker goes from the end of the data!)
|
|
||||||
compressed_scr_files.rwd.e: EQU $-1 ; the final IX will point one byte ahead of "$" here
|
|
||||||
INCBIN "screens.reversed/diver - Back to Bjork (2015).scr.upk"
|
|
||||||
DB 6
|
|
||||||
INCBIN "screens.reversed/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk"
|
|
||||||
DB 0
|
|
||||||
INCBIN "screens.reversed/Schafft - Poison (2017).scr.upk"
|
|
||||||
DB 7
|
|
||||||
INCBIN "screens.reversed/Grongy - ZX Spectrum (2022).scr.upk"
|
|
||||||
compressed_scr_files.rwd: ; border color byte + upkr-packed .scr file (backward)
|
|
||||||
DB 1
|
|
||||||
|
|
||||||
start:
|
|
||||||
di
|
|
||||||
; OPT --zxnext
|
|
||||||
; nextreg 7,3 ; ZX Next: switch to 28Mhz
|
|
||||||
|
|
||||||
;;; FORWARD packed/unpacked data demo
|
|
||||||
ld ix,compressed_scr_files.fwd
|
|
||||||
.slideshow_loop.fwd:
|
|
||||||
; set BORDER for next image
|
|
||||||
ld a,(ix)
|
|
||||||
inc ix
|
|
||||||
out (254),a
|
|
||||||
; call unpack of next image directly into VRAM
|
|
||||||
ld de,$4000 ; target VRAM
|
|
||||||
exx
|
|
||||||
; IX = packed data, DE' = destination ($4000)
|
|
||||||
; returned IX will point right after the packed data
|
|
||||||
call fwd.upkr.unpack
|
|
||||||
; do some busy loop with CPU to delay between images
|
|
||||||
call delay
|
|
||||||
; check if all images were displayed, loop around from first one then
|
|
||||||
ld a,ixl
|
|
||||||
cp low compressed_scr_files.fwd.e
|
|
||||||
jr nz,.slideshow_loop.fwd
|
|
||||||
|
|
||||||
;;; BACKWARD packed/unpacked data demo
|
|
||||||
ld ix,compressed_scr_files.rwd
|
|
||||||
.slideshow_loop.rwd:
|
|
||||||
; set BORDER for next image
|
|
||||||
ld a,(ix)
|
|
||||||
dec ix
|
|
||||||
out (254),a
|
|
||||||
; call unpack of next image directly into VRAM
|
|
||||||
ld de,$5AFF ; target VRAM
|
|
||||||
exx
|
|
||||||
; IX = packed data, DE' = destination
|
|
||||||
; returned IX will point right ahead of the packed data
|
|
||||||
call rwd.upkr.unpack
|
|
||||||
; do some busy loop with CPU to delay between images
|
|
||||||
call delay
|
|
||||||
; check if all images were displayed, loop around from first one then
|
|
||||||
ld a,ixl
|
|
||||||
cp low compressed_scr_files.rwd.e
|
|
||||||
jr nz,.slideshow_loop.rwd
|
|
||||||
|
|
||||||
jr start
|
|
||||||
|
|
||||||
delay:
|
|
||||||
ld bc,$AA00
|
|
||||||
.delay:
|
|
||||||
.8 ex (sp),ix
|
|
||||||
dec c
|
|
||||||
jr nz,.delay
|
|
||||||
djnz .delay
|
|
||||||
ret
|
|
||||||
|
|
||||||
; include the depacker library, optionally putting probs array buffer near end of RAM
|
|
||||||
DEFINE UPKR_PROBS_ORIGIN $FA00 ; if not defined, array will be put after unpack code
|
|
||||||
|
|
||||||
MODULE fwd
|
|
||||||
INCLUDE "../unpack.asm"
|
|
||||||
ENDMODULE
|
|
||||||
|
|
||||||
MODULE rwd
|
|
||||||
DEFINE BACKWARDS_UNPACK ; defined to build backwards unpack
|
|
||||||
; initial IX points at last byte of compressed data
|
|
||||||
; initial DE' points at last byte of unpacked data
|
|
||||||
|
|
||||||
INCLUDE "../unpack.asm"
|
|
||||||
ENDMODULE
|
|
||||||
|
|
||||||
SAVESNA "example.sna",start
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,32 +0,0 @@
|
|||||||
Z80 asm implementation of C unpacker, code-size focused (not performance).
|
|
||||||
|
|
||||||
**ONLY BITSTREAM** variant is currently supported, make sure to use "-b" in packer.
|
|
||||||
|
|
||||||
The project is expected to further evolve, including possible changes to binary format, this is
|
|
||||||
initial version of Z80 unpacker to explore if/how it works and how it can be improved further.
|
|
||||||
|
|
||||||
(copy full packer+depacker source to your project if you plan to use it, as future revisions
|
|
||||||
may be incompatible with files you will produce with current version)
|
|
||||||
|
|
||||||
Asm syntax is z00m's sjasmplus: https://github.com/z00m128/sjasmplus
|
|
||||||
|
|
||||||
Backward direction unpacker added as compile-time option, see example for both forward/backward
|
|
||||||
depacker in action.
|
|
||||||
|
|
||||||
The packed/unpacked data-overlap has to be tested per-case, in worst case the packed data
|
|
||||||
may need even more than 7 bytes to unpack final byte, but usually 1-4 bytes may suffice.
|
|
||||||
|
|
||||||
TODO:
|
|
||||||
- build bigger corpus of test data to benchmark future changes in algorithm/format (example and zx48.rom was used to do initial tests)
|
|
||||||
- maybe try to beat double-loop `decode_number` with different encoding format
|
|
||||||
- (@ped7g) Z80N version of unpacker for ZX Next devs
|
|
||||||
- (@exoticorn) add Z80 specific packer (to avoid confusion with original MicroW8 variant), and land it all to master branch, maybe in "z80" directory or something? (and overall decide how to organise+merge this upstream into main repo)
|
|
||||||
- (@exoticorn) add to packer output with possible packed/unpacked region overlap
|
|
||||||
|
|
||||||
DONE:
|
|
||||||
* review non-bitstream variant, if it's feasible to try to implement it with Z80
|
|
||||||
- Ped7g: IMHO nope, the 12b x 8b MUL code would probably quickly cancel any gains from the simpler state update
|
|
||||||
* review first implementation to identify weak spots where the implementation can be shorter+faster
|
|
||||||
with acceptable small changes to the format
|
|
||||||
- Ped7g: the decode_bit settled down and now doesn't feel so confused and redundant, the code seems pretty on point to me, no obvious simplification from format change
|
|
||||||
- Ped7g: the decode_number double-loop is surprisingly resilient, especially in terms of code size I failed to beat it, speed wise only negligible gains
|
|
||||||
@@ -1,381 +0,0 @@
|
|||||||
;; https://github.com/exoticorn/upkr/blob/z80/c_unpacker/unpack.c - original C implementation
|
|
||||||
;; C source in comments ahead of asm - the C macros are removed to keep only bitstream variant
|
|
||||||
;;
|
|
||||||
;; initial version by Peter "Ped" Helcmanovsky (C) 2022, licensed same as upkr project ("unlicensed")
|
|
||||||
;; to assemble use z00m's sjasmplus: https://github.com/z00m128/sjasmplus
|
|
||||||
;;
|
|
||||||
;; you can define UPKR_PROBS_ORIGIN to specific 256 byte aligned address for probs array (320 bytes),
|
|
||||||
;; otherwise it will be positioned after the unpacker code (256 aligned)
|
|
||||||
;;
|
|
||||||
;; public API:
|
|
||||||
;;
|
|
||||||
;; upkr.unpack
|
|
||||||
;; IN: IX = packed data, DE' (shadow DE) = destination
|
|
||||||
;; OUT: IX = after packed data
|
|
||||||
;; modifies: all registers except IY, requires 10 bytes of stack space
|
|
||||||
;;
|
|
||||||
|
|
||||||
; DEFINE BACKWARDS_UNPACK ; uncomment to build backwards depacker (write_ptr--, upkr_data_ptr--)
|
|
||||||
; initial IX points at last byte of compressed data
|
|
||||||
; initial DE' points at last byte of unpacked data
|
|
||||||
|
|
||||||
; DEFINE UPKR_UNPACK_SPEED ; uncomment to get larger but faster unpack routine
|
|
||||||
|
|
||||||
; code size hint: if you put probs array just ahead of BASIC entry point, you will get BC
|
|
||||||
; initialised to probs.e by BASIC `USR` command and you can remove it from unpack init (-3B)
|
|
||||||
|
|
||||||
OPT push reset --syntax=abf
|
|
||||||
MODULE upkr
|
|
||||||
|
|
||||||
NUMBER_BITS EQU 16+15 ; context-bits per offset/length (16+15 for 16bit offsets/pointers)
|
|
||||||
; numbers (offsets/lengths) are encoded like: 1a1b1c1d1e0 = 0000'0000'001e'dbca
|
|
||||||
|
|
||||||
/*
|
|
||||||
u8* upkr_data_ptr;
|
|
||||||
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
|
|
||||||
u16 upkr_state;
|
|
||||||
u8 upkr_current_byte;
|
|
||||||
int upkr_bits_left;
|
|
||||||
|
|
||||||
int upkr_unpack(void* destination, void* compressed_data) {
|
|
||||||
upkr_data_ptr = (u8*)compressed_data;
|
|
||||||
upkr_state = 0;
|
|
||||||
upkr_bits_left = 0;
|
|
||||||
for(int i = 0; i < sizeof(upkr_probs); ++i)
|
|
||||||
upkr_probs[i] = 128;
|
|
||||||
|
|
||||||
u8* write_ptr = (u8*)destination;
|
|
||||||
|
|
||||||
int prev_was_match = 0;
|
|
||||||
int offset = 0;
|
|
||||||
for(;;) {
|
|
||||||
if(upkr_decode_bit(0)) {
|
|
||||||
if(prev_was_match || upkr_decode_bit(256)) {
|
|
||||||
offset = upkr_decode_length(257) - 1;
|
|
||||||
if(offset == 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int length = upkr_decode_length(257 + 64);
|
|
||||||
while(length--) {
|
|
||||||
*write_ptr = write_ptr[-offset];
|
|
||||||
++write_ptr;
|
|
||||||
}
|
|
||||||
prev_was_match = 1;
|
|
||||||
} else {
|
|
||||||
int byte = 1;
|
|
||||||
while(byte < 256) {
|
|
||||||
int bit = upkr_decode_bit(byte);
|
|
||||||
byte = (byte << 1) + bit;
|
|
||||||
}
|
|
||||||
*write_ptr++ = byte;
|
|
||||||
prev_was_match = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return write_ptr - (u8*)destination;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
; IN: IX = compressed_data, DE' = destination
|
|
||||||
unpack:
|
|
||||||
; ** reset probs to 0x80, also reset HL (state) to zero, and set BC to probs+context 0
|
|
||||||
ld hl,probs.c>>1
|
|
||||||
ld bc,probs.e
|
|
||||||
ld a,$80
|
|
||||||
.reset_probs:
|
|
||||||
dec bc
|
|
||||||
ld (bc),a ; will overwrite one extra byte after the array because of odd length
|
|
||||||
dec bc
|
|
||||||
ld (bc),a
|
|
||||||
dec l
|
|
||||||
jr nz,.reset_probs
|
|
||||||
exa
|
|
||||||
; BC = probs (context_index 0), state HL = 0, A' = 0x80 (no source bits left in upkr_current_byte)
|
|
||||||
|
|
||||||
; ** main loop to decompress data
|
|
||||||
; D = prev_was_match = uninitialised, literal is expected first => will reset D to "false"
|
|
||||||
; values for false/true of prev_was_match are: false = high(probs), true = 1 + high(probs)
|
|
||||||
.decompress_data:
|
|
||||||
ld c,0
|
|
||||||
call decode_bit ; if(upkr_decode_bit(0))
|
|
||||||
jr c,.copy_chunk
|
|
||||||
|
|
||||||
; * extract byte from compressed data (literal)
|
|
||||||
inc c ; C = byte = 1 (and also context_index)
|
|
||||||
.decode_byte:
|
|
||||||
call decode_bit ; bit = upkr_decode_bit(byte);
|
|
||||||
rl c ; byte = (byte << 1) + bit;
|
|
||||||
jr nc,.decode_byte ; while(byte < 256)
|
|
||||||
ld a,c
|
|
||||||
exx
|
|
||||||
ld (de),a ; *write_ptr++ = byte;
|
|
||||||
IFNDEF BACKWARDS_UNPACK : inc de : ELSE : dec de : ENDIF
|
|
||||||
exx
|
|
||||||
ld d,b ; prev_was_match = false
|
|
||||||
jr .decompress_data
|
|
||||||
|
|
||||||
; * copy chunk of already decompressed data (match)
|
|
||||||
.copy_chunk:
|
|
||||||
ld a,b
|
|
||||||
inc b ; context_index = 256
|
|
||||||
; if(prev_was_match || upkr_decode_bit(256)) {
|
|
||||||
; offset = upkr_decode_length(257) - 1;
|
|
||||||
; if (0 == offset) break;
|
|
||||||
; }
|
|
||||||
cp d ; CF = prev_was_match
|
|
||||||
call nc,decode_bit ; if not prev_was_match, then upkr_decode_bit(256)
|
|
||||||
jr nc,.keep_offset ; if neither, keep old offset
|
|
||||||
call decode_number ; context_index is already 257-1 as needed by decode_number
|
|
||||||
dec de ; offset = upkr_decode_length(257) - 1;
|
|
||||||
ld a,d
|
|
||||||
or e
|
|
||||||
ret z ; if(offset == 0) break
|
|
||||||
ld (.offset),de
|
|
||||||
.keep_offset:
|
|
||||||
; int length = upkr_decode_length(257 + 64);
|
|
||||||
; while(length--) {
|
|
||||||
; *write_ptr = write_ptr[-offset];
|
|
||||||
; ++write_ptr;
|
|
||||||
; }
|
|
||||||
; prev_was_match = 1;
|
|
||||||
ld c,low(257 + NUMBER_BITS - 1) ; context_index to second "number" set for lengths decoding
|
|
||||||
call decode_number ; length = upkr_decode_length(257 + 64);
|
|
||||||
push de
|
|
||||||
exx
|
|
||||||
IFNDEF BACKWARDS_UNPACK
|
|
||||||
; forward unpack (write_ptr++, upkr_data_ptr++)
|
|
||||||
ld h,d ; DE = write_ptr
|
|
||||||
ld l,e
|
|
||||||
.offset+*: ld bc,0
|
|
||||||
sbc hl,bc ; CF=0 from decode_number ; HL = write_ptr - offset
|
|
||||||
pop bc ; BC = length
|
|
||||||
ldir
|
|
||||||
ELSE
|
|
||||||
; backward unpack (write_ptr--, upkr_data_ptr--)
|
|
||||||
.offset+*: ld hl,0
|
|
||||||
add hl,de ; HL = write_ptr + offset
|
|
||||||
pop bc ; BC = length
|
|
||||||
lddr
|
|
||||||
ENDIF
|
|
||||||
exx
|
|
||||||
ld d,b ; prev_was_match = true
|
|
||||||
djnz .decompress_data ; adjust context_index back to 0..255 range, go to main loop
|
|
||||||
|
|
||||||
/*
|
|
||||||
int upkr_decode_bit(int context_index) {
|
|
||||||
while(upkr_state < 32768) {
|
|
||||||
if(upkr_bits_left == 0) {
|
|
||||||
upkr_current_byte = *upkr_data_ptr++;
|
|
||||||
upkr_bits_left = 8;
|
|
||||||
}
|
|
||||||
upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
|
|
||||||
upkr_current_byte <<= 1;
|
|
||||||
--upkr_bits_left;
|
|
||||||
}
|
|
||||||
|
|
||||||
int prob = upkr_probs[context_index];
|
|
||||||
int bit = (upkr_state & 255) >= prob ? 1 : 0;
|
|
||||||
|
|
||||||
int prob_offset = 16;
|
|
||||||
int state_offset = 0;
|
|
||||||
int state_scale = prob;
|
|
||||||
if(bit) {
|
|
||||||
state_offset = -prob;
|
|
||||||
state_scale = 256 - prob;
|
|
||||||
prob_offset = 0;
|
|
||||||
}
|
|
||||||
upkr_state = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255);
|
|
||||||
upkr_probs[context_index] = prob_offset + prob - ((prob + 8) >> 4);
|
|
||||||
|
|
||||||
return bit;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
inc_c_decode_bit:
|
|
||||||
; ++low(context_index) before decode_bit (to get -1B by two calls in decode_number)
|
|
||||||
inc c
|
|
||||||
decode_bit:
|
|
||||||
; HL = upkr_state
|
|
||||||
; IX = upkr_data_ptr
|
|
||||||
; BC = probs+context_index
|
|
||||||
; A' = upkr_current_byte (!!! init to 0x80 at start, not 0x00)
|
|
||||||
; preserves DE
|
|
||||||
; ** while (state < 32768) - initial check
|
|
||||||
push de
|
|
||||||
bit 7,h
|
|
||||||
jr nz,.state_b15_set
|
|
||||||
exa
|
|
||||||
; ** while body
|
|
||||||
.state_b15_zero:
|
|
||||||
; HL = upkr_state
|
|
||||||
; IX = upkr_data_ptr
|
|
||||||
; A = upkr_current_byte (init to 0x80 at start, not 0x00)
|
|
||||||
add a,a ; upkr_current_byte <<= 1; // and testing if(upkr_bits_left == 0)
|
|
||||||
jr nz,.has_bit ; CF=data, ZF=0 -> some bits + stop bit still available
|
|
||||||
; CF=1 (by stop bit)
|
|
||||||
ld a,(ix)
|
|
||||||
IFNDEF BACKWARDS_UNPACK : inc ix : ELSE : dec ix : ENDIF ; upkr_current_byte = *upkr_data_ptr++;
|
|
||||||
adc a,a ; CF=data, b0=1 as new stop bit
|
|
||||||
.has_bit:
|
|
||||||
adc hl,hl ; upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
|
|
||||||
jp p,.state_b15_zero ; while (state < 32768)
|
|
||||||
exa
|
|
||||||
; ** set "bit"
|
|
||||||
.state_b15_set:
|
|
||||||
ld a,(bc) ; A = upkr_probs[context_index]
|
|
||||||
dec a ; prob is in ~7..249 range, never zero, safe to -1
|
|
||||||
cp l ; CF = bit = prob-1 < (upkr_state & 255) <=> prob <= (upkr_state & 255)
|
|
||||||
inc a
|
|
||||||
; ** adjust state
|
|
||||||
push bc
|
|
||||||
ld c,l ; C = (upkr_state & 255); (preserving the value)
|
|
||||||
push af
|
|
||||||
jr nc,.bit_is_0
|
|
||||||
neg ; A = -prob == (256-prob), CF=1 preserved
|
|
||||||
.bit_is_0:
|
|
||||||
ld d,0
|
|
||||||
ld e,a ; DE = state_scale ; prob || (256-prob)
|
|
||||||
ld l,d ; H:L = (upkr_state>>8) : 0
|
|
||||||
|
|
||||||
IFNDEF UPKR_UNPACK_SPEED
|
|
||||||
|
|
||||||
;; looped MUL for minimum unpack size
|
|
||||||
ld b,8 ; counter
|
|
||||||
.mulLoop:
|
|
||||||
add hl,hl
|
|
||||||
jr nc,.mul0
|
|
||||||
add hl,de
|
|
||||||
.mul0:
|
|
||||||
djnz .mulLoop ; until HL = state_scale * (upkr_state>>8), also BC becomes (upkr_state & 255)
|
|
||||||
|
|
||||||
ELSE
|
|
||||||
|
|
||||||
;;; unrolled MUL for better performance, +25 bytes unpack size
|
|
||||||
ld b,d
|
|
||||||
DUP 8
|
|
||||||
add hl,hl
|
|
||||||
jr nc,0_f
|
|
||||||
add hl,de
|
|
||||||
0:
|
|
||||||
EDUP
|
|
||||||
|
|
||||||
ENDIF
|
|
||||||
|
|
||||||
add hl,bc ; HL = state_scale * (upkr_state >> 8) + (upkr_state & 255)
|
|
||||||
pop af ; restore prob and CF=bit
|
|
||||||
jr nc,.bit_is_0_2
|
|
||||||
dec d ; DE = -prob (also D = bit ? $FF : $00)
|
|
||||||
add hl,de ; HL += -prob
|
|
||||||
; ^ this always preserves CF=1, because (state>>8) >= 128, state_scale: 7..250, prob: 7..250,
|
|
||||||
; so 7*128 > 250 and thus edge case `ADD hl=(7*128+0),de=(-250)` => CF=1
|
|
||||||
.bit_is_0_2:
|
|
||||||
; *** adjust probs[context_index]
|
|
||||||
rra ; + (bit<<4) ; part of -prob_offset, needs another -16
|
|
||||||
and $FC ; clear/keep correct bits to get desired (prob>>4) + extras, CF=0
|
|
||||||
rra
|
|
||||||
rra
|
|
||||||
rra ; A = (bit<<4) + (prob>>4), CF=(prob & 8)
|
|
||||||
adc a,-16 ; A = (bit<<4) - 16 + ((prob + 8)>>4) ; -prob_offset = (bit<<4) - 16
|
|
||||||
ld e,a
|
|
||||||
pop bc
|
|
||||||
ld a,(bc) ; A = prob (cheaper + shorter to re-read again from memory)
|
|
||||||
sub e ; A = 16 - (bit<<4) + prob - ((prob + 8)>>4) ; = prob_offset + prob - ((prob + 8)>>4)
|
|
||||||
ld (bc),a ; probs[context_index] = prob_offset + prob - ((prob + 8) >> 4);
|
|
||||||
add a,d ; restore CF = bit (D = bit ? $FF : $00 && A > 0)
|
|
||||||
pop de
|
|
||||||
ret
|
|
||||||
|
|
||||||
/*
|
|
||||||
int upkr_decode_length(int context_index) {
|
|
||||||
int length = 0;
|
|
||||||
int bit_pos = 0;
|
|
||||||
while(upkr_decode_bit(context_index)) {
|
|
||||||
length |= upkr_decode_bit(context_index + 1) << bit_pos++;
|
|
||||||
context_index += 2;
|
|
||||||
}
|
|
||||||
return length | (1 << bit_pos);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
decode_number:
|
|
||||||
; HL = upkr_state
|
|
||||||
; IX = upkr_data_ptr
|
|
||||||
; BC = probs+context_index-1
|
|
||||||
; A' = upkr_current_byte (!!! init to 0x80 at start, not 0x00)
|
|
||||||
; return length in DE, CF=0
|
|
||||||
ld de,$FFFF ; length = 0 with positional-stop-bit
|
|
||||||
or a ; CF=0 to skip getting data bit and use only `rr d : rr e` to fix init DE
|
|
||||||
.loop:
|
|
||||||
call c,inc_c_decode_bit ; get data bit, context_index + 1 / if CF=0 just add stop bit into DE init
|
|
||||||
rr d
|
|
||||||
rr e ; DE = length = (length >> 1) | (bit << 15);
|
|
||||||
call inc_c_decode_bit ; context_index += 2
|
|
||||||
jr c,.loop
|
|
||||||
.fix_bit_pos:
|
|
||||||
ccf ; NC will become this final `| (1 << bit_pos)` bit
|
|
||||||
rr d
|
|
||||||
rr e
|
|
||||||
jr c,.fix_bit_pos ; until stop bit is reached (all bits did land to correct position)
|
|
||||||
ret ; return with CF=0 (important for unpack routine)
|
|
||||||
|
|
||||||
DISPLAY "upkr.unpack total size: ",/D,$-unpack
|
|
||||||
|
|
||||||
; reserve space for probs array without emitting any machine code (using only EQU)
|
|
||||||
|
|
||||||
IFDEF UPKR_PROBS_ORIGIN ; if specific address is defined by user, move probs array there
|
|
||||||
probs: EQU ((UPKR_PROBS_ORIGIN) + 255) & -$100 ; probs array aligned to 256
|
|
||||||
ELSE
|
|
||||||
probs: EQU ($ + 255) & -$100 ; probs array aligned to 256
|
|
||||||
ENDIF
|
|
||||||
.real_c: EQU 1 + 255 + 1 + 2*NUMBER_BITS ; real size of probs array
|
|
||||||
.c: EQU (.real_c + 1) & -2 ; padding to even size (required by init code)
|
|
||||||
.e: EQU probs + .c
|
|
||||||
|
|
||||||
DISPLAY "upkr.unpack probs array placed at: ",/A,probs,",\tsize: ",/A,probs.c
|
|
||||||
|
|
||||||
/*
|
|
||||||
archived: negligibly faster but +6B longer decode_number variant using HL' and BC' to
|
|
||||||
do `number|=(1<<bit_pos);` type of logic in single loop.
|
|
||||||
*/
|
|
||||||
; decode_number:
|
|
||||||
; exx
|
|
||||||
; ld bc,1
|
|
||||||
; ld l,b
|
|
||||||
; ld h,b ; HL = 0
|
|
||||||
; .loop
|
|
||||||
; exx
|
|
||||||
; inc c
|
|
||||||
; call decode_bit
|
|
||||||
; jr nc,.done
|
|
||||||
; inc c
|
|
||||||
; call decode_bit
|
|
||||||
; exx
|
|
||||||
; jr nc,.b0
|
|
||||||
; add hl,bc
|
|
||||||
; .b0:
|
|
||||||
; sla c
|
|
||||||
; rl b
|
|
||||||
; jr .loop
|
|
||||||
; .done:
|
|
||||||
; exx
|
|
||||||
; add hl,bc
|
|
||||||
; push hl
|
|
||||||
; exx
|
|
||||||
; pop de
|
|
||||||
; ret
|
|
||||||
|
|
||||||
/*
|
|
||||||
archived: possible LUT variant of updating probs value, requires 512-aligned 512B table (not tested)
|
|
||||||
*/
|
|
||||||
; code is replacing decode_bit from "; *** adjust probs[context_index]", followed by `ld (bc),a : add a,d ...`
|
|
||||||
; ld c,a
|
|
||||||
; ld a,high(probs_update_table)/2 ; must be 512 aligned
|
|
||||||
; rla
|
|
||||||
; ld b,a
|
|
||||||
; ld a,(bc)
|
|
||||||
; pop bc
|
|
||||||
; -------------------------------------------
|
|
||||||
; probs_update_table: EQU probs-512
|
|
||||||
; -------------------------------------------
|
|
||||||
; table generator is not obvious and probably not short either, 20+ bytes almost for sure, maybe even 30-40
|
|
||||||
|
|
||||||
ENDMODULE
|
|
||||||
OPT pop
|
|
||||||
Reference in New Issue
Block a user