mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 19:46:42 +01:00
Compare commits
46 Commits
v0.2.0-pre
...
literal_le
| Author | SHA1 | Date | |
|---|---|---|---|
| cac3922b0a | |||
| dd5b0043dd | |||
| 39c95598f2 | |||
| 3e31b37c1c | |||
| 83c023de45 | |||
| a46eb0e7f5 | |||
| 32cd8e5b6c | |||
| 90fa31ce1a | |||
| 31fb91c629 | |||
| e429f252a5 | |||
|
|
8a32e1384c | ||
|
|
9913dcf4bb | ||
|
|
a8fd3dc573 | ||
|
|
e1f9fa143a | ||
|
|
db1c7d2d14 | ||
|
|
c1ffd0e7ed | ||
|
|
00d084105a | ||
|
|
8e5298caee | ||
|
|
1fb29f3a1b | ||
| c8924456aa | |||
| 7b0e22f459 | |||
|
|
165f593a11 | ||
|
|
d4bce4bf7c | ||
|
|
b13fa05413 | ||
|
|
3c773aca8d | ||
| a5406deb30 | |||
|
|
9211544cb9 | ||
|
|
3fa9e0fa12 | ||
|
|
aa3fad4d80 | ||
|
|
6624940ed9 | ||
|
|
c3a9773e5c | ||
|
|
a75a35efb2 | ||
| 540a91d1ba | |||
| e7aaf1491a | |||
| a1dabaf7f9 | |||
| 75e375fb1f | |||
|
|
c7ea11bce3 | ||
|
|
02d20867ee | ||
|
|
511ddefc08 | ||
|
|
d30baaa91f | ||
|
|
919a892ef0 | ||
|
|
ea5c0b1b15 | ||
|
|
a19ec2abb7 | ||
|
|
7b051113e1 | ||
| f1f1c64a76 | |||
| 36cb6d77b5 |
@@ -3,7 +3,8 @@ name = "upkr"
|
|||||||
version = "0.2.0-pre3"
|
version = "0.2.0-pre3"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
[profile.release]
|
||||||
|
strip = "debuginfo"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
cdivsufsort = "2"
|
cdivsufsort = "2"
|
||||||
|
|||||||
@@ -39,6 +39,19 @@ build/unpack_armv6m.bin: unpack_armv6m.S
|
|||||||
arm-none-eabi-gcc -march=armv6-m -c -o build/unpack_armv6m.o $?
|
arm-none-eabi-gcc -march=armv6-m -c -o build/unpack_armv6m.o $?
|
||||||
arm-none-eabi-objcopy -O binary --only-section=.text build/unpack_armv6m.o $@
|
arm-none-eabi-objcopy -O binary --only-section=.text build/unpack_armv6m.o $@
|
||||||
|
|
||||||
|
build/unpack_arm32: ../c_unpacker/main.c unpack_arm32.S
|
||||||
|
mkdir -p build
|
||||||
|
arm-linux-gnueabihf-gcc -g -static -o $@ $^
|
||||||
|
|
||||||
|
test_arm32: build/unpack_arm32
|
||||||
|
qemu-arm $< test_data.upk /tmp/out.bin
|
||||||
|
cmp test_data.bin /tmp/out.bin
|
||||||
|
|
||||||
|
build/unpack_arm32.bin: unpack_arm32.S
|
||||||
|
mkdir -p build
|
||||||
|
arm-none-eabi-gcc -c -o build/unpack_arm32.o $?
|
||||||
|
arm-none-eabi-objcopy -O binary --only-section=.text build/unpack_arm32.o $@
|
||||||
|
|
||||||
build/unpack_c: ../c_unpacker/main.c ../c_unpacker/unpack.c
|
build/unpack_c: ../c_unpacker/main.c ../c_unpacker/unpack.c
|
||||||
mkdir -p build
|
mkdir -p build
|
||||||
gcc -g -o $@ $^
|
gcc -g -o $@ $^
|
||||||
@@ -47,5 +60,5 @@ test_c: build/unpack_c
|
|||||||
$< test_data.upk /tmp/out.bin
|
$< test_data.upk /tmp/out.bin
|
||||||
cmp test_data.bin /tmp/out.bin
|
cmp test_data.bin /tmp/out.bin
|
||||||
|
|
||||||
sizes: build/unpack_armv6m.bin build/unpack_riscv64.bin build/unpack_riscv32.bin
|
sizes: build/unpack_armv6m.bin build/unpack_riscv64.bin build/unpack_riscv32.bin build/unpack_arm32.bin
|
||||||
ls -l build/*.bin
|
ls -l build/*.bin
|
||||||
100
asm_unpackers/unpack_arm32.S
Normal file
100
asm_unpackers/unpack_arm32.S
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
.arm
|
||||||
|
|
||||||
|
.section .text
|
||||||
|
|
||||||
|
.global upkr_unpack
|
||||||
|
.type upkr_unpack, %function
|
||||||
|
// r0 .. out_ptr (returned)
|
||||||
|
// r1 .. in_ptr (returned)
|
||||||
|
// r2 .. state
|
||||||
|
// r3 .. offset
|
||||||
|
// r4 .. prev_was_literal / decode_length ret
|
||||||
|
// r5 .. context index
|
||||||
|
// r6 .. decode_length temp
|
||||||
|
// r7 .. probs ptr
|
||||||
|
// r8-r11 .. decode_bit temp
|
||||||
|
// r12 .. decode_length return address
|
||||||
|
upkr_unpack:
|
||||||
|
push { r3-r11, lr }
|
||||||
|
|
||||||
|
mov r2, #384
|
||||||
|
mov r3, #128
|
||||||
|
.Lclear:
|
||||||
|
subs r2, r2, #1
|
||||||
|
strb r3, [sp, -r2]
|
||||||
|
bne .Lclear
|
||||||
|
|
||||||
|
.Lloop:
|
||||||
|
mov r5, #0
|
||||||
|
bl upkr_decode_bit
|
||||||
|
bcc .Ldata
|
||||||
|
.Lmatch:
|
||||||
|
mov r5, #256
|
||||||
|
rsbs r6, r4, #0
|
||||||
|
blcc upkr_decode_bit
|
||||||
|
bcc .Lskip_offset
|
||||||
|
|
||||||
|
bl upkr_decode_length
|
||||||
|
adds r3, r4, #1
|
||||||
|
popeq { r3-r11, pc }
|
||||||
|
.Lskip_offset:
|
||||||
|
|
||||||
|
mov r5, #256+64
|
||||||
|
bl upkr_decode_length
|
||||||
|
.Lcopy_loop:
|
||||||
|
ldrb r5, [r0, r3]
|
||||||
|
.Lstore:
|
||||||
|
strb r5, [r0], #1
|
||||||
|
adds r4, r4, #1
|
||||||
|
blt .Lcopy_loop
|
||||||
|
b .Lloop
|
||||||
|
|
||||||
|
.Ldata:
|
||||||
|
mov r5, #1
|
||||||
|
|
||||||
|
.Ldata_loop:
|
||||||
|
bl upkr_decode_bit
|
||||||
|
adc r5, r5, r5
|
||||||
|
movs r4, r5, lsr #8
|
||||||
|
beq .Ldata_loop
|
||||||
|
b .Lstore
|
||||||
|
|
||||||
|
.type upkr_decode_length, %function
|
||||||
|
upkr_decode_length:
|
||||||
|
mov r12, lr
|
||||||
|
|
||||||
|
mov r4, #0
|
||||||
|
mvn r6, #0
|
||||||
|
.Lbit_loop:
|
||||||
|
bl upkr_decode_bit_inc
|
||||||
|
addcc r4, r4, r6
|
||||||
|
movcc pc, r12
|
||||||
|
|
||||||
|
bl upkr_decode_bit_inc
|
||||||
|
addcs r4, r4, r6
|
||||||
|
mov r6, r6, lsl #1
|
||||||
|
b .Lbit_loop
|
||||||
|
|
||||||
|
.type upkr_decode_bit, %function
|
||||||
|
upkr_decode_bit_inc:
|
||||||
|
add r5, r5, #1
|
||||||
|
upkr_decode_bit:
|
||||||
|
cmp r2, #4096
|
||||||
|
ldrltb r8, [r1], #1
|
||||||
|
orrlt r2, r8, r2, lsl#8
|
||||||
|
blt upkr_decode_bit
|
||||||
|
|
||||||
|
ldrb r8, [sp, -r5]
|
||||||
|
and r9, r2, #255
|
||||||
|
add r9, r9, #1
|
||||||
|
cmp r8, r9
|
||||||
|
rsbcs r8, r8, #256
|
||||||
|
mvn r9, r2, lsr#8
|
||||||
|
addcs r9, r9, #1
|
||||||
|
mla r2, r8, r9, r2
|
||||||
|
add r9, r8, #8
|
||||||
|
sub r8, r8, r9, lsr#4
|
||||||
|
rsbcs r8, r8, #256
|
||||||
|
strb r8, [sp, -r5]
|
||||||
|
mov pc, r14
|
||||||
|
|
||||||
@@ -1,13 +1,11 @@
|
|||||||
.section .text
|
.section .text
|
||||||
|
|
||||||
#define FRAME_SIZE (256+32*4+4)
|
|
||||||
|
|
||||||
// x8 prob array ptr
|
|
||||||
// x9 prev was literal
|
// x9 prev was literal
|
||||||
// x10 out ptr
|
// x10 out ptr
|
||||||
// x11 in ptr
|
// x11 in ptr
|
||||||
// x12 offset
|
// x12 offset
|
||||||
// x13 state
|
// x13 state
|
||||||
|
// x14 context index
|
||||||
|
|
||||||
.global upkr_unpack
|
.global upkr_unpack
|
||||||
.type upkr_unpack, %function
|
.type upkr_unpack, %function
|
||||||
@@ -15,11 +13,11 @@ upkr_unpack:
|
|||||||
mv t4, ra
|
mv t4, ra
|
||||||
mv x17, x8
|
mv x17, x8
|
||||||
mv t6, x9
|
mv t6, x9
|
||||||
li x13, FRAME_SIZE
|
li x9, 256 + 128
|
||||||
li x9, 128
|
mv x13, x9
|
||||||
1:
|
1:
|
||||||
addi sp, sp, -1
|
sub x8, sp, x13
|
||||||
sb x9, 0(sp)
|
sb x9, 0(x8)
|
||||||
addi x13, x13, -1
|
addi x13, x13, -1
|
||||||
bnez x13, 1b
|
bnez x13, 1b
|
||||||
|
|
||||||
@@ -35,7 +33,7 @@ upkr_unpack:
|
|||||||
|
|
||||||
.Lfinished_offset:
|
.Lfinished_offset:
|
||||||
addi x14, x14, 64
|
addi x14, x14, 64
|
||||||
jal t3, upkr_decode_number
|
jalr ra // jal upkr_decode_number
|
||||||
1:
|
1:
|
||||||
add x14, x10, t0
|
add x14, x10, t0
|
||||||
lbu x14, (x14)
|
lbu x14, (x14)
|
||||||
@@ -58,36 +56,14 @@ upkr_unpack:
|
|||||||
.Lread_offset_inc_x14:
|
.Lread_offset_inc_x14:
|
||||||
addi x14, x14, 1
|
addi x14, x14, 1
|
||||||
.Lread_offset:
|
.Lread_offset:
|
||||||
jal t3, upkr_decode_number
|
jalr ra // jal upkr_decode_number
|
||||||
addi t0, x9, 1
|
addi t0, x9, 1
|
||||||
bnez t0, .Lfinished_offset
|
bnez t0, .Lfinished_offset
|
||||||
.Ldone:
|
.Ldone:
|
||||||
addi sp, sp, FRAME_SIZE
|
|
||||||
mv x8, x17
|
mv x8, x17
|
||||||
mv x9, t6
|
mv x9, t6
|
||||||
jr t4
|
jr t4
|
||||||
|
|
||||||
// x14 context index
|
|
||||||
// return: x9 negtive decoded number
|
|
||||||
upkr_decode_number:
|
|
||||||
mv t5, x14
|
|
||||||
li x9, 0
|
|
||||||
li x8, -1
|
|
||||||
1:
|
|
||||||
jal upkr_decode_bit
|
|
||||||
beqz x15, 1f
|
|
||||||
jal upkr_decode_bit
|
|
||||||
beqz x15, 2f
|
|
||||||
add x9, x9, x8
|
|
||||||
2:
|
|
||||||
slli x8, x8, 1
|
|
||||||
j 1b
|
|
||||||
1:
|
|
||||||
add x9, x9, x8
|
|
||||||
|
|
||||||
mv x14, t5
|
|
||||||
jr t3
|
|
||||||
|
|
||||||
upkr_load_byte:
|
upkr_load_byte:
|
||||||
lbu x15, 0(x11)
|
lbu x15, 0(x11)
|
||||||
addi x11, x11, 1
|
addi x11, x11, 1
|
||||||
@@ -104,39 +80,52 @@ upkr_decode_bit:
|
|||||||
srli x15, x13, 12
|
srli x15, x13, 12
|
||||||
beqz x15, upkr_load_byte
|
beqz x15, upkr_load_byte
|
||||||
|
|
||||||
mv t1, x14
|
addi x14, x14, 1
|
||||||
mv t2, x10
|
|
||||||
|
|
||||||
add x14, x14, sp
|
sub t2, sp, x14
|
||||||
lbu x12, 0(x14)
|
lbu x12, (t2)
|
||||||
|
|
||||||
andi x10, x13, 255
|
andi x8, x13, 255
|
||||||
sltu x15, x10, x12
|
sltu x15, x8, x12
|
||||||
srli x13, x13, 8
|
beqz x15, 1f
|
||||||
beqz x15, .Lelse
|
xori x12, x12, 255
|
||||||
|
addi x12, x12, 1
|
||||||
|
1:
|
||||||
|
srli x8, x13, 8
|
||||||
|
addi x8, x8, 1
|
||||||
|
sub x8, x8, x15
|
||||||
|
mul x8, x8, x12
|
||||||
|
sub x13, x13, x8
|
||||||
|
|
||||||
mul x13, x13, x12
|
addi x8, x12, 8
|
||||||
add x13, x13, x10
|
srli x8, x8, 4
|
||||||
li x10, 256 + 8
|
sub x12, x12, x8
|
||||||
sub x10, x10, x12
|
beqz x15, 1f
|
||||||
srli x10, x10, 4
|
sub x12, x0, x12
|
||||||
add x12, x12, x10
|
1:
|
||||||
j .Lendif
|
|
||||||
|
|
||||||
.Lelse:
|
sb x12, (t2)
|
||||||
li x16, 256
|
|
||||||
sub x16, x16, x12
|
|
||||||
mul x13, x13, x16
|
|
||||||
add x13, x13, x10
|
|
||||||
sub x13, x13, x12
|
|
||||||
addi x10, x12, 8
|
|
||||||
srli x10, x10, 4
|
|
||||||
sub x12, x12, x10
|
|
||||||
|
|
||||||
.Lendif:
|
jalr ra
|
||||||
|
|
||||||
sb x12, 0(x14)
|
// x14 context index
|
||||||
|
// return: x9 negtive decoded number
|
||||||
|
upkr_decode_number:
|
||||||
|
mv t3, ra
|
||||||
|
mv t5, x14
|
||||||
|
li x9, 0
|
||||||
|
li t1, -1
|
||||||
|
1:
|
||||||
|
jal upkr_decode_bit
|
||||||
|
beqz x15, 1f
|
||||||
|
jal upkr_decode_bit
|
||||||
|
beqz x15, 2f
|
||||||
|
add x9, x9, t1
|
||||||
|
2:
|
||||||
|
add t1, t1, t1
|
||||||
|
j 1b
|
||||||
|
1:
|
||||||
|
add x9, x9, t1
|
||||||
|
|
||||||
addi x14, t1, 1
|
mv x14, t5
|
||||||
mv x10, t2
|
jr t3
|
||||||
ret
|
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ pub fn pack(
|
|||||||
let mut rans_coder = RansCoder::new(config);
|
let mut rans_coder = RansCoder::new(config);
|
||||||
let mut state = lz::CoderState::new(config);
|
let mut state = lz::CoderState::new(config);
|
||||||
|
|
||||||
|
let mut literal = vec![];
|
||||||
|
|
||||||
let mut pos = 0;
|
let mut pos = 0;
|
||||||
while pos < data.len() {
|
while pos < data.len() {
|
||||||
if let Some(ref mut cb) = progress_callback {
|
if let Some(ref mut cb) = progress_callback {
|
||||||
@@ -22,6 +24,10 @@ pub fn pack(
|
|||||||
let max_offset = config.max_offset.min(1 << (m.length * 3 - 1).min(31));
|
let max_offset = config.max_offset.min(1 << (m.length * 3 - 1).min(31));
|
||||||
let offset = pos - m.pos;
|
let offset = pos - m.pos;
|
||||||
if offset < max_offset && m.length >= config.min_length() {
|
if offset < max_offset && m.length >= config.min_length() {
|
||||||
|
if !literal.is_empty() {
|
||||||
|
lz::Op::Literal(literal).encode(&mut rans_coder, &mut state, config);
|
||||||
|
literal = vec![];
|
||||||
|
}
|
||||||
let length = m.length.min(config.max_length);
|
let length = m.length.min(config.max_length);
|
||||||
lz::Op::Match {
|
lz::Op::Match {
|
||||||
offset: offset as u32,
|
offset: offset as u32,
|
||||||
@@ -43,6 +49,10 @@ pub fn pack(
|
|||||||
.count()
|
.count()
|
||||||
.min(config.max_length);
|
.min(config.max_length);
|
||||||
if length >= config.min_length() {
|
if length >= config.min_length() {
|
||||||
|
if !literal.is_empty() {
|
||||||
|
lz::Op::Literal(literal).encode(&mut rans_coder, &mut state, config);
|
||||||
|
literal = vec![];
|
||||||
|
}
|
||||||
lz::Op::Match {
|
lz::Op::Match {
|
||||||
offset: offset as u32,
|
offset: offset as u32,
|
||||||
len: length as u32,
|
len: length as u32,
|
||||||
@@ -55,11 +65,14 @@ pub fn pack(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !encoded_match {
|
if !encoded_match {
|
||||||
lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state, config);
|
literal.push(data[pos]);
|
||||||
pos += 1;
|
pos += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !literal.is_empty() {
|
||||||
|
lz::Op::Literal(literal).encode(&mut rans_coder, &mut state, config);
|
||||||
|
}
|
||||||
lz::encode_eof(&mut rans_coder, &mut state, config);
|
lz::encode_eof(&mut rans_coder, &mut state, config);
|
||||||
rans_coder.finish()
|
rans_coder.finish()
|
||||||
}
|
}
|
||||||
|
|||||||
167
src/lz.rs
167
src/lz.rs
@@ -3,36 +3,53 @@ use crate::rans::{EntropyCoder, RansDecoder};
|
|||||||
use crate::Config;
|
use crate::Config;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum Op {
|
pub enum Op {
|
||||||
Literal(u8),
|
Literal(Vec<u8>),
|
||||||
Match { offset: u32, len: u32 },
|
Match { offset: u32, len: u32 },
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Op {
|
impl Op {
|
||||||
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
|
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
|
||||||
let literal_base = state.pos % state.parity_contexts * 256;
|
|
||||||
match self {
|
match self {
|
||||||
&Op::Literal(lit) => {
|
&Op::Literal(ref lit) => {
|
||||||
encode_bit(coder, state, literal_base, !config.is_match_bit);
|
assert!(state.prev_was_match);
|
||||||
let mut context_index = 1;
|
encode_length(
|
||||||
for i in (0..8).rev() {
|
coder,
|
||||||
let bit = (lit >> i) & 1 != 0;
|
state,
|
||||||
encode_bit(coder, state, literal_base + context_index, bit);
|
256 + state.pos % state.parity_contexts * 320,
|
||||||
context_index = (context_index << 1) | bit as usize;
|
lit.len() as u32 + 1,
|
||||||
|
config,
|
||||||
|
);
|
||||||
|
for lit in lit {
|
||||||
|
let literal_base = state.pos % state.parity_contexts * 320;
|
||||||
|
let mut context_index = 1;
|
||||||
|
for i in (0..8).rev() {
|
||||||
|
let bit = (lit >> i) & 1 != 0;
|
||||||
|
encode_bit(coder, state, literal_base + context_index, bit);
|
||||||
|
context_index = (context_index << 1) | bit as usize;
|
||||||
|
}
|
||||||
|
state.pos += 1;
|
||||||
}
|
}
|
||||||
state.prev_was_match = false;
|
state.prev_was_match = false;
|
||||||
state.pos += 1;
|
|
||||||
}
|
}
|
||||||
&Op::Match { offset, len } => {
|
&Op::Match { offset, len } => {
|
||||||
encode_bit(coder, state, literal_base, config.is_match_bit);
|
if state.prev_was_match {
|
||||||
|
encode_length(
|
||||||
|
coder,
|
||||||
|
state,
|
||||||
|
256 + state.pos % state.parity_contexts * 320,
|
||||||
|
1,
|
||||||
|
config,
|
||||||
|
);
|
||||||
|
}
|
||||||
let mut new_offset = true;
|
let mut new_offset = true;
|
||||||
if !state.prev_was_match && !config.no_repeated_offsets {
|
if !state.prev_was_match && !config.no_repeated_offsets {
|
||||||
new_offset = offset != state.last_offset;
|
new_offset = offset != state.last_offset;
|
||||||
encode_bit(
|
encode_bit(
|
||||||
coder,
|
coder,
|
||||||
state,
|
state,
|
||||||
256 * state.parity_contexts,
|
320 * state.parity_contexts,
|
||||||
new_offset == config.new_offset_bit,
|
new_offset == config.new_offset_bit,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -41,14 +58,14 @@ impl Op {
|
|||||||
encode_length(
|
encode_length(
|
||||||
coder,
|
coder,
|
||||||
state,
|
state,
|
||||||
256 * state.parity_contexts + 1,
|
320 * state.parity_contexts + 1,
|
||||||
offset + if config.eof_in_length { 0 } else { 1 },
|
offset + if config.eof_in_length { 0 } else { 1 },
|
||||||
config,
|
config,
|
||||||
);
|
);
|
||||||
state.last_offset = offset;
|
state.last_offset = offset;
|
||||||
}
|
}
|
||||||
assert!(len as usize >= config.min_length() && len as usize <= config.max_length);
|
assert!(len as usize >= config.min_length() && len as usize <= config.max_length);
|
||||||
encode_length(coder, state, 256 * state.parity_contexts + 65, len, config);
|
encode_length(coder, state, 320 * state.parity_contexts + 65, len, config);
|
||||||
state.prev_was_match = true;
|
state.prev_was_match = true;
|
||||||
state.pos += len as usize;
|
state.pos += len as usize;
|
||||||
}
|
}
|
||||||
@@ -57,25 +74,28 @@ impl Op {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
|
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
|
||||||
encode_bit(
|
if state.prev_was_match {
|
||||||
coder,
|
encode_length(
|
||||||
state,
|
coder,
|
||||||
state.pos % state.parity_contexts * 256,
|
state,
|
||||||
config.is_match_bit,
|
256 + state.pos % state.parity_contexts * 320,
|
||||||
);
|
1,
|
||||||
|
config,
|
||||||
|
);
|
||||||
|
}
|
||||||
if !state.prev_was_match && !config.no_repeated_offsets {
|
if !state.prev_was_match && !config.no_repeated_offsets {
|
||||||
encode_bit(
|
encode_bit(
|
||||||
coder,
|
coder,
|
||||||
state,
|
state,
|
||||||
256 * state.parity_contexts,
|
320 * state.parity_contexts,
|
||||||
config.new_offset_bit ^ config.eof_in_length,
|
config.new_offset_bit ^ config.eof_in_length,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
if !config.eof_in_length || state.prev_was_match || config.no_repeated_offsets {
|
if !config.eof_in_length || state.prev_was_match || config.no_repeated_offsets {
|
||||||
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
|
encode_length(coder, state, 320 * state.parity_contexts + 1, 1, config);
|
||||||
}
|
}
|
||||||
if config.eof_in_length {
|
if config.eof_in_length {
|
||||||
encode_length(coder, state, 256 * state.parity_contexts + 65, 1, config);
|
encode_length(coder, state, 320 * state.parity_contexts + 65, 1, config);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -119,9 +139,9 @@ pub struct CoderState {
|
|||||||
impl CoderState {
|
impl CoderState {
|
||||||
pub fn new(config: &Config) -> CoderState {
|
pub fn new(config: &Config) -> CoderState {
|
||||||
CoderState {
|
CoderState {
|
||||||
contexts: ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, config),
|
contexts: ContextState::new((64 + 256) * config.parity_contexts + 1 + 64 + 64, config),
|
||||||
last_offset: 0,
|
last_offset: 0,
|
||||||
prev_was_match: false,
|
prev_was_match: true,
|
||||||
pos: 0,
|
pos: 0,
|
||||||
parity_contexts: config.parity_contexts,
|
parity_contexts: config.parity_contexts,
|
||||||
}
|
}
|
||||||
@@ -168,7 +188,8 @@ pub fn unpack_internal(
|
|||||||
max_size: usize,
|
max_size: usize,
|
||||||
) -> Result<isize, UnpackError> {
|
) -> Result<isize, UnpackError> {
|
||||||
let mut decoder = RansDecoder::new(packed_data, &config);
|
let mut decoder = RansDecoder::new(packed_data, &config);
|
||||||
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
|
let mut contexts =
|
||||||
|
ContextState::new((64 + 256) * config.parity_contexts + 1 + 64 + 64, &config);
|
||||||
let mut offset = usize::MAX;
|
let mut offset = usize::MAX;
|
||||||
let mut position = 0usize;
|
let mut position = 0usize;
|
||||||
let mut prev_was_match = false;
|
let mut prev_was_match = false;
|
||||||
@@ -199,50 +220,14 @@ pub fn unpack_internal(
|
|||||||
|
|
||||||
loop {
|
loop {
|
||||||
margin = margin.max(position as isize - decoder.pos() as isize);
|
margin = margin.max(position as isize - decoder.pos() as isize);
|
||||||
let literal_base = position % config.parity_contexts * 256;
|
let literal_length = decode_length(
|
||||||
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))?
|
&mut decoder,
|
||||||
== config.is_match_bit
|
&mut contexts,
|
||||||
{
|
256 + position % config.parity_contexts * 320,
|
||||||
if config.no_repeated_offsets
|
config,
|
||||||
|| prev_was_match
|
)? - 1;
|
||||||
|| decoder
|
for _ in 0..literal_length {
|
||||||
.decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))?
|
let literal_base = position % config.parity_contexts * 320;
|
||||||
== config.new_offset_bit
|
|
||||||
{
|
|
||||||
offset = decode_length(
|
|
||||||
&mut decoder,
|
|
||||||
&mut contexts,
|
|
||||||
256 * config.parity_contexts + 1,
|
|
||||||
&config,
|
|
||||||
)? - if config.eof_in_length { 0 } else { 1 };
|
|
||||||
if offset == 0 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let length = decode_length(
|
|
||||||
&mut decoder,
|
|
||||||
&mut contexts,
|
|
||||||
256 * config.parity_contexts + 65,
|
|
||||||
&config,
|
|
||||||
)?;
|
|
||||||
if config.eof_in_length && length == 1 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if offset > position {
|
|
||||||
return Err(UnpackError::OffsetOutOfRange { offset, position });
|
|
||||||
}
|
|
||||||
if let Some(ref mut result) = result {
|
|
||||||
for _ in 0..length {
|
|
||||||
if result.len() < max_size {
|
|
||||||
result.push(result[result.len() - offset]);
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
position += length;
|
|
||||||
prev_was_match = true;
|
|
||||||
} else {
|
|
||||||
let mut context_index = 1;
|
let mut context_index = 1;
|
||||||
let mut byte = 0;
|
let mut byte = 0;
|
||||||
for i in (0..8).rev() {
|
for i in (0..8).rev() {
|
||||||
@@ -259,6 +244,46 @@ pub fn unpack_internal(
|
|||||||
position += 1;
|
position += 1;
|
||||||
prev_was_match = false;
|
prev_was_match = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if config.no_repeated_offsets
|
||||||
|
|| prev_was_match
|
||||||
|
|| decoder
|
||||||
|
.decode_with_context(&mut contexts.context_mut(320 * config.parity_contexts))?
|
||||||
|
== config.new_offset_bit
|
||||||
|
{
|
||||||
|
offset = decode_length(
|
||||||
|
&mut decoder,
|
||||||
|
&mut contexts,
|
||||||
|
320 * config.parity_contexts + 1,
|
||||||
|
&config,
|
||||||
|
)? - if config.eof_in_length { 0 } else { 1 };
|
||||||
|
if offset == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let length = decode_length(
|
||||||
|
&mut decoder,
|
||||||
|
&mut contexts,
|
||||||
|
320 * config.parity_contexts + 65,
|
||||||
|
&config,
|
||||||
|
)?;
|
||||||
|
if config.eof_in_length && length == 1 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if offset > position {
|
||||||
|
return Err(UnpackError::OffsetOutOfRange { offset, position });
|
||||||
|
}
|
||||||
|
if let Some(ref mut result) = result {
|
||||||
|
for _ in 0..length {
|
||||||
|
if result.len() < max_size {
|
||||||
|
result.push(result[result.len() - offset]);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
position += length;
|
||||||
|
prev_was_match = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if position > max_size {
|
if position > max_size {
|
||||||
|
|||||||
@@ -49,6 +49,12 @@ fn main() -> Result<()> {
|
|||||||
config.is_match_bit = false;
|
config.is_match_bit = false;
|
||||||
config.new_offset_bit = false;
|
config.new_offset_bit = false;
|
||||||
}
|
}
|
||||||
|
Long("x86b") => {
|
||||||
|
config.use_bitstream = true;
|
||||||
|
config.continue_value_bit = false;
|
||||||
|
config.no_repeated_offsets = true;
|
||||||
|
level = 9;
|
||||||
|
}
|
||||||
|
|
||||||
Short('u') | Long("unpack") => unpack = true,
|
Short('u') | Long("unpack") => unpack = true,
|
||||||
Long("margin") => calculate_margin = true,
|
Long("margin") => calculate_margin = true,
|
||||||
@@ -154,6 +160,9 @@ fn print_help(exit_code: i32) -> ! {
|
|||||||
eprintln!(
|
eprintln!(
|
||||||
" --x86 --bitstream --invert-is-match-bit --invert-continue-value-bit --invert-new-offset-bit"
|
" --x86 --bitstream --invert-is-match-bit --invert-continue-value-bit --invert-new-offset-bit"
|
||||||
);
|
);
|
||||||
|
eprintln!(
|
||||||
|
" --x86b --bitstream --invert-continue-value-bit --no-repeated-offsets -9"
|
||||||
|
);
|
||||||
eprintln!();
|
eprintln!();
|
||||||
eprintln!("Config options (need to match when packing/unpacking):");
|
eprintln!("Config options (need to match when packing/unpacking):");
|
||||||
eprintln!(" -b, --bitstream bitstream mode");
|
eprintln!(" -b, --bitstream bitstream mode");
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ pub fn pack(
|
|||||||
let mut parse = parse(data, Config::from_level(level), config, progress_cb);
|
let mut parse = parse(data, Config::from_level(level), config, progress_cb);
|
||||||
let mut ops = vec![];
|
let mut ops = vec![];
|
||||||
while let Some(link) = parse {
|
while let Some(link) = parse {
|
||||||
ops.push(link.op);
|
ops.push(link.op.clone());
|
||||||
parse = link.prev.clone();
|
parse = link.prev.clone();
|
||||||
}
|
}
|
||||||
let mut state = lz::CoderState::new(config);
|
let mut state = lz::CoderState::new(config);
|
||||||
@@ -32,9 +32,15 @@ struct Parse {
|
|||||||
op: lz::Op,
|
op: lz::Op,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct LiteralPrefix {
|
||||||
|
arrival: Arrival,
|
||||||
|
prefix: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
struct Arrival {
|
struct Arrival {
|
||||||
parse: Option<Rc<Parse>>,
|
parse: Option<Rc<Parse>>,
|
||||||
state: lz::CoderState,
|
state: lz::CoderState,
|
||||||
|
literal_prefix: Option<Box<LiteralPrefix>>,
|
||||||
cost: f64,
|
cost: f64,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -130,6 +136,7 @@ fn parse(
|
|||||||
op,
|
op,
|
||||||
})),
|
})),
|
||||||
state,
|
state,
|
||||||
|
literal_prefix: None,
|
||||||
cost: arrival.cost + cost_counter.cost(),
|
cost: arrival.cost + cost_counter.cost(),
|
||||||
},
|
},
|
||||||
max_arrivals,
|
max_arrivals,
|
||||||
@@ -141,6 +148,7 @@ fn parse(
|
|||||||
Arrival {
|
Arrival {
|
||||||
parse: None,
|
parse: None,
|
||||||
state: lz::CoderState::new(encoding_config),
|
state: lz::CoderState::new(encoding_config),
|
||||||
|
literal_prefix: None,
|
||||||
cost: 0.0,
|
cost: 0.0,
|
||||||
},
|
},
|
||||||
max_arrivals,
|
max_arrivals,
|
||||||
@@ -252,19 +260,26 @@ fn parse(
|
|||||||
}
|
}
|
||||||
|
|
||||||
cost_counter.reset();
|
cost_counter.reset();
|
||||||
let mut state = arrival.state;
|
let (arrival, mut prefix) = if let Some(prefix) = arrival.literal_prefix {
|
||||||
let op = lz::Op::Literal(data[pos]);
|
(prefix.arrival, prefix.prefix)
|
||||||
|
} else {
|
||||||
|
(arrival, vec![])
|
||||||
|
};
|
||||||
|
let mut state = arrival.state.clone();
|
||||||
|
prefix.push(data[pos]);
|
||||||
|
let op = lz::Op::Literal(prefix.clone());
|
||||||
op.encode(cost_counter, &mut state, encoding_config);
|
op.encode(cost_counter, &mut state, encoding_config);
|
||||||
add_arrival(
|
add_arrival(
|
||||||
&mut arrivals,
|
&mut arrivals,
|
||||||
pos + 1,
|
pos + 1,
|
||||||
Arrival {
|
Arrival {
|
||||||
parse: Some(Rc::new(Parse {
|
parse: Some(Rc::new(Parse {
|
||||||
prev: arrival.parse,
|
prev: arrival.parse.clone(),
|
||||||
op,
|
op,
|
||||||
})),
|
})),
|
||||||
state,
|
state,
|
||||||
cost: arrival.cost + cost_counter.cost(),
|
cost: arrival.cost + cost_counter.cost(),
|
||||||
|
literal_prefix: Some(Box::new(LiteralPrefix { arrival, prefix })),
|
||||||
},
|
},
|
||||||
max_arrivals,
|
max_arrivals,
|
||||||
);
|
);
|
||||||
|
|||||||
3
z80_unpacker/.gitignore
vendored
Normal file
3
z80_unpacker/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
*.bin
|
||||||
|
*.tap
|
||||||
|
*.lst
|
||||||
11
z80_unpacker/Makefile
Normal file
11
z80_unpacker/Makefile
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
all: unpack.bin example/example.sna
|
||||||
|
|
||||||
|
# binary is positioned from ORG 0, not usable, just assembling to verify the syntax
|
||||||
|
unpack.bin: unpack.asm
|
||||||
|
sjasmplus --msg=war --lst --lstlab=sort --raw=unpack.bin unpack.asm
|
||||||
|
|
||||||
|
example/example.sna: unpack.asm example/example.asm
|
||||||
|
cd example && sjasmplus --msg=war --lst --lstlab=sort example.asm
|
||||||
|
|
||||||
|
clean:
|
||||||
|
$(RM) unpack.bin unpack.lst example/example.sna example/example.lst
|
||||||
100
z80_unpacker/example/example.asm
Normal file
100
z80_unpacker/example/example.asm
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
;; Example using upkr depacker for screens slideshow
|
||||||
|
OPT --syntax=abf
|
||||||
|
DEVICE ZXSPECTRUM48,$8FFF
|
||||||
|
|
||||||
|
ORG $9000
|
||||||
|
;; forward example data
|
||||||
|
compressed_scr_files.fwd: ; border color byte + upkr-packed .scr file
|
||||||
|
DB 1
|
||||||
|
INCBIN "screens/Grongy - ZX Spectrum (2022).scr.upk"
|
||||||
|
DB 7
|
||||||
|
INCBIN "screens/Schafft - Poison (2017).scr.upk"
|
||||||
|
DB 0
|
||||||
|
INCBIN "screens/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk"
|
||||||
|
DB 6
|
||||||
|
INCBIN "screens/diver - Back to Bjork (2015).scr.upk"
|
||||||
|
.e:
|
||||||
|
;; backward example data (unpacker goes from the end of the data!)
|
||||||
|
compressed_scr_files.rwd.e: EQU $-1 ; the final IX will point one byte ahead of "$" here
|
||||||
|
INCBIN "screens.reversed/diver - Back to Bjork (2015).scr.upk"
|
||||||
|
DB 6
|
||||||
|
INCBIN "screens.reversed/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk"
|
||||||
|
DB 0
|
||||||
|
INCBIN "screens.reversed/Schafft - Poison (2017).scr.upk"
|
||||||
|
DB 7
|
||||||
|
INCBIN "screens.reversed/Grongy - ZX Spectrum (2022).scr.upk"
|
||||||
|
compressed_scr_files.rwd: ; border color byte + upkr-packed .scr file (backward)
|
||||||
|
DB 1
|
||||||
|
|
||||||
|
start:
|
||||||
|
di
|
||||||
|
; OPT --zxnext
|
||||||
|
; nextreg 7,3 ; ZX Next: switch to 28Mhz
|
||||||
|
|
||||||
|
;;; FORWARD packed/unpacked data demo
|
||||||
|
ld ix,compressed_scr_files.fwd
|
||||||
|
.slideshow_loop.fwd:
|
||||||
|
; set BORDER for next image
|
||||||
|
ld a,(ix)
|
||||||
|
inc ix
|
||||||
|
out (254),a
|
||||||
|
; call unpack of next image directly into VRAM
|
||||||
|
ld de,$4000 ; target VRAM
|
||||||
|
exx
|
||||||
|
; IX = packed data, DE' = destination ($4000)
|
||||||
|
; returned IX will point right after the packed data
|
||||||
|
call fwd.upkr.unpack
|
||||||
|
; do some busy loop with CPU to delay between images
|
||||||
|
call delay
|
||||||
|
; check if all images were displayed, loop around from first one then
|
||||||
|
ld a,ixl
|
||||||
|
cp low compressed_scr_files.fwd.e
|
||||||
|
jr nz,.slideshow_loop.fwd
|
||||||
|
|
||||||
|
;;; BACKWARD packed/unpacked data demo
|
||||||
|
ld ix,compressed_scr_files.rwd
|
||||||
|
.slideshow_loop.rwd:
|
||||||
|
; set BORDER for next image
|
||||||
|
ld a,(ix)
|
||||||
|
dec ix
|
||||||
|
out (254),a
|
||||||
|
; call unpack of next image directly into VRAM
|
||||||
|
ld de,$5AFF ; target VRAM
|
||||||
|
exx
|
||||||
|
; IX = packed data, DE' = destination
|
||||||
|
; returned IX will point right ahead of the packed data
|
||||||
|
call rwd.upkr.unpack
|
||||||
|
; do some busy loop with CPU to delay between images
|
||||||
|
call delay
|
||||||
|
; check if all images were displayed, loop around from first one then
|
||||||
|
ld a,ixl
|
||||||
|
cp low compressed_scr_files.rwd.e
|
||||||
|
jr nz,.slideshow_loop.rwd
|
||||||
|
|
||||||
|
jr start
|
||||||
|
|
||||||
|
delay:
|
||||||
|
ld bc,$AA00
|
||||||
|
.delay:
|
||||||
|
.8 ex (sp),ix
|
||||||
|
dec c
|
||||||
|
jr nz,.delay
|
||||||
|
djnz .delay
|
||||||
|
ret
|
||||||
|
|
||||||
|
; include the depacker library, optionally putting probs array buffer near end of RAM
|
||||||
|
DEFINE UPKR_PROBS_ORIGIN $FA00 ; if not defined, array will be put after unpack code
|
||||||
|
|
||||||
|
MODULE fwd
|
||||||
|
INCLUDE "../unpack.asm"
|
||||||
|
ENDMODULE
|
||||||
|
|
||||||
|
MODULE rwd
|
||||||
|
DEFINE BACKWARDS_UNPACK ; defined to build backwards unpack
|
||||||
|
; initial IX points at last byte of compressed data
|
||||||
|
; initial DE' points at last byte of unpacked data
|
||||||
|
|
||||||
|
INCLUDE "../unpack.asm"
|
||||||
|
ENDMODULE
|
||||||
|
|
||||||
|
SAVESNA "example.sna",start
|
||||||
BIN
z80_unpacker/example/example.sna
Normal file
BIN
z80_unpacker/example/example.sna
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
z80_unpacker/example/screens/Grongy - ZX Spectrum (2022).scr
Normal file
BIN
z80_unpacker/example/screens/Grongy - ZX Spectrum (2022).scr
Normal file
Binary file not shown.
BIN
z80_unpacker/example/screens/Grongy - ZX Spectrum (2022).scr.upk
Normal file
BIN
z80_unpacker/example/screens/Grongy - ZX Spectrum (2022).scr.upk
Normal file
Binary file not shown.
BIN
z80_unpacker/example/screens/Schafft - Poison (2017).scr
Normal file
BIN
z80_unpacker/example/screens/Schafft - Poison (2017).scr
Normal file
Binary file not shown.
BIN
z80_unpacker/example/screens/Schafft - Poison (2017).scr.upk
Normal file
BIN
z80_unpacker/example/screens/Schafft - Poison (2017).scr.upk
Normal file
Binary file not shown.
BIN
z80_unpacker/example/screens/diver - Back to Bjork (2015).scr
Normal file
BIN
z80_unpacker/example/screens/diver - Back to Bjork (2015).scr
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
32
z80_unpacker/readme.txt
Normal file
32
z80_unpacker/readme.txt
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
Z80 asm implementation of C unpacker, code-size focused (not performance).
|
||||||
|
|
||||||
|
**ONLY BITSTREAM** variant is currently supported, make sure to use "-b" in packer.
|
||||||
|
|
||||||
|
The project is expected to further evolve, including possible changes to binary format, this is
|
||||||
|
initial version of Z80 unpacker to explore if/how it works and how it can be improved further.
|
||||||
|
|
||||||
|
(copy full packer+depacker source to your project if you plan to use it, as future revisions
|
||||||
|
may be incompatible with files you will produce with current version)
|
||||||
|
|
||||||
|
Asm syntax is z00m's sjasmplus: https://github.com/z00m128/sjasmplus
|
||||||
|
|
||||||
|
Backward direction unpacker added as compile-time option, see example for both forward/backward
|
||||||
|
depacker in action.
|
||||||
|
|
||||||
|
The packed/unpacked data-overlap has to be tested per-case, in worst case the packed data
|
||||||
|
may need even more than 7 bytes to unpack final byte, but usually 1-4 bytes may suffice.
|
||||||
|
|
||||||
|
TODO:
|
||||||
|
- build bigger corpus of test data to benchmark future changes in algorithm/format (example and zx48.rom was used to do initial tests)
|
||||||
|
- maybe try to beat double-loop `decode_number` with different encoding format
|
||||||
|
- (@ped7g) Z80N version of unpacker for ZX Next devs
|
||||||
|
- (@exoticorn) add Z80 specific packer (to avoid confusion with original MicroW8 variant), and land it all to master branch, maybe in "z80" directory or something? (and overall decide how to organise+merge this upstream into main repo)
|
||||||
|
- (@exoticorn) add to packer output with possible packed/unpacked region overlap
|
||||||
|
|
||||||
|
DONE:
|
||||||
|
* review non-bitstream variant, if it's feasible to try to implement it with Z80
|
||||||
|
- Ped7g: IMHO nope, the 12b x 8b MUL code would probably quickly cancel any gains from the simpler state update
|
||||||
|
* review first implementation to identify weak spots where the implementation can be shorter+faster
|
||||||
|
with acceptable small changes to the format
|
||||||
|
- Ped7g: the decode_bit settled down and now doesn't feel so confused and redundant, the code seems pretty on point to me, no obvious simplification from format change
|
||||||
|
- Ped7g: the decode_number double-loop is surprisingly resilient, especially in terms of code size I failed to beat it, speed wise only negligible gains
|
||||||
381
z80_unpacker/unpack.asm
Normal file
381
z80_unpacker/unpack.asm
Normal file
@@ -0,0 +1,381 @@
|
|||||||
|
;; https://github.com/exoticorn/upkr/blob/z80/c_unpacker/unpack.c - original C implementation
|
||||||
|
;; C source in comments ahead of asm - the C macros are removed to keep only bitstream variant
|
||||||
|
;;
|
||||||
|
;; initial version by Peter "Ped" Helcmanovsky (C) 2022, licensed same as upkr project ("unlicensed")
|
||||||
|
;; to assemble use z00m's sjasmplus: https://github.com/z00m128/sjasmplus
|
||||||
|
;;
|
||||||
|
;; you can define UPKR_PROBS_ORIGIN to specific 256 byte aligned address for probs array (320 bytes),
|
||||||
|
;; otherwise it will be positioned after the unpacker code (256 aligned)
|
||||||
|
;;
|
||||||
|
;; public API:
|
||||||
|
;;
|
||||||
|
;; upkr.unpack
|
||||||
|
;; IN: IX = packed data, DE' (shadow DE) = destination
|
||||||
|
;; OUT: IX = after packed data
|
||||||
|
;; modifies: all registers except IY, requires 10 bytes of stack space
|
||||||
|
;;
|
||||||
|
|
||||||
|
; DEFINE BACKWARDS_UNPACK ; uncomment to build backwards depacker (write_ptr--, upkr_data_ptr--)
|
||||||
|
; initial IX points at last byte of compressed data
|
||||||
|
; initial DE' points at last byte of unpacked data
|
||||||
|
|
||||||
|
; DEFINE UPKR_UNPACK_SPEED ; uncomment to get larger but faster unpack routine
|
||||||
|
|
||||||
|
; code size hint: if you put probs array just ahead of BASIC entry point, you will get BC
|
||||||
|
; initialised to probs.e by BASIC `USR` command and you can remove it from unpack init (-3B)
|
||||||
|
|
||||||
|
OPT push reset --syntax=abf
|
||||||
|
MODULE upkr
|
||||||
|
|
||||||
|
NUMBER_BITS EQU 16+15 ; context-bits per offset/length (16+15 for 16bit offsets/pointers)
|
||||||
|
; numbers (offsets/lengths) are encoded like: 1a1b1c1d1e0 = 0000'0000'001e'dbca
|
||||||
|
|
||||||
|
/*
|
||||||
|
u8* upkr_data_ptr;
|
||||||
|
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
|
||||||
|
u16 upkr_state;
|
||||||
|
u8 upkr_current_byte;
|
||||||
|
int upkr_bits_left;
|
||||||
|
|
||||||
|
int upkr_unpack(void* destination, void* compressed_data) {
|
||||||
|
upkr_data_ptr = (u8*)compressed_data;
|
||||||
|
upkr_state = 0;
|
||||||
|
upkr_bits_left = 0;
|
||||||
|
for(int i = 0; i < sizeof(upkr_probs); ++i)
|
||||||
|
upkr_probs[i] = 128;
|
||||||
|
|
||||||
|
u8* write_ptr = (u8*)destination;
|
||||||
|
|
||||||
|
int prev_was_match = 0;
|
||||||
|
int offset = 0;
|
||||||
|
for(;;) {
|
||||||
|
if(upkr_decode_bit(0)) {
|
||||||
|
if(prev_was_match || upkr_decode_bit(256)) {
|
||||||
|
offset = upkr_decode_length(257) - 1;
|
||||||
|
if(offset == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int length = upkr_decode_length(257 + 64);
|
||||||
|
while(length--) {
|
||||||
|
*write_ptr = write_ptr[-offset];
|
||||||
|
++write_ptr;
|
||||||
|
}
|
||||||
|
prev_was_match = 1;
|
||||||
|
} else {
|
||||||
|
int byte = 1;
|
||||||
|
while(byte < 256) {
|
||||||
|
int bit = upkr_decode_bit(byte);
|
||||||
|
byte = (byte << 1) + bit;
|
||||||
|
}
|
||||||
|
*write_ptr++ = byte;
|
||||||
|
prev_was_match = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return write_ptr - (u8*)destination;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
; IN: IX = compressed_data, DE' = destination
|
||||||
|
unpack:
|
||||||
|
; ** reset probs to 0x80, also reset HL (state) to zero, and set BC to probs+context 0
|
||||||
|
ld hl,probs.c>>1
|
||||||
|
ld bc,probs.e
|
||||||
|
ld a,$80
|
||||||
|
.reset_probs:
|
||||||
|
dec bc
|
||||||
|
ld (bc),a ; will overwrite one extra byte after the array because of odd length
|
||||||
|
dec bc
|
||||||
|
ld (bc),a
|
||||||
|
dec l
|
||||||
|
jr nz,.reset_probs
|
||||||
|
exa
|
||||||
|
; BC = probs (context_index 0), state HL = 0, A' = 0x80 (no source bits left in upkr_current_byte)
|
||||||
|
|
||||||
|
; ** main loop to decompress data
|
||||||
|
; D = prev_was_match = uninitialised, literal is expected first => will reset D to "false"
|
||||||
|
; values for false/true of prev_was_match are: false = high(probs), true = 1 + high(probs)
|
||||||
|
.decompress_data:
|
||||||
|
ld c,0
|
||||||
|
call decode_bit ; if(upkr_decode_bit(0))
|
||||||
|
jr c,.copy_chunk
|
||||||
|
|
||||||
|
; * extract byte from compressed data (literal)
|
||||||
|
inc c ; C = byte = 1 (and also context_index)
|
||||||
|
.decode_byte:
|
||||||
|
call decode_bit ; bit = upkr_decode_bit(byte);
|
||||||
|
rl c ; byte = (byte << 1) + bit;
|
||||||
|
jr nc,.decode_byte ; while(byte < 256)
|
||||||
|
ld a,c
|
||||||
|
exx
|
||||||
|
ld (de),a ; *write_ptr++ = byte;
|
||||||
|
IFNDEF BACKWARDS_UNPACK : inc de : ELSE : dec de : ENDIF
|
||||||
|
exx
|
||||||
|
ld d,b ; prev_was_match = false
|
||||||
|
jr .decompress_data
|
||||||
|
|
||||||
|
; * copy chunk of already decompressed data (match)
|
||||||
|
.copy_chunk:
|
||||||
|
ld a,b
|
||||||
|
inc b ; context_index = 256
|
||||||
|
; if(prev_was_match || upkr_decode_bit(256)) {
|
||||||
|
; offset = upkr_decode_length(257) - 1;
|
||||||
|
; if (0 == offset) break;
|
||||||
|
; }
|
||||||
|
cp d ; CF = prev_was_match
|
||||||
|
call nc,decode_bit ; if not prev_was_match, then upkr_decode_bit(256)
|
||||||
|
jr nc,.keep_offset ; if neither, keep old offset
|
||||||
|
call decode_number ; context_index is already 257-1 as needed by decode_number
|
||||||
|
dec de ; offset = upkr_decode_length(257) - 1;
|
||||||
|
ld a,d
|
||||||
|
or e
|
||||||
|
ret z ; if(offset == 0) break
|
||||||
|
ld (.offset),de
|
||||||
|
.keep_offset:
|
||||||
|
; int length = upkr_decode_length(257 + 64);
|
||||||
|
; while(length--) {
|
||||||
|
; *write_ptr = write_ptr[-offset];
|
||||||
|
; ++write_ptr;
|
||||||
|
; }
|
||||||
|
; prev_was_match = 1;
|
||||||
|
ld c,low(257 + NUMBER_BITS - 1) ; context_index to second "number" set for lengths decoding
|
||||||
|
call decode_number ; length = upkr_decode_length(257 + 64);
|
||||||
|
push de
|
||||||
|
exx
|
||||||
|
IFNDEF BACKWARDS_UNPACK
|
||||||
|
; forward unpack (write_ptr++, upkr_data_ptr++)
|
||||||
|
ld h,d ; DE = write_ptr
|
||||||
|
ld l,e
|
||||||
|
.offset+*: ld bc,0
|
||||||
|
sbc hl,bc ; CF=0 from decode_number ; HL = write_ptr - offset
|
||||||
|
pop bc ; BC = length
|
||||||
|
ldir
|
||||||
|
ELSE
|
||||||
|
; backward unpack (write_ptr--, upkr_data_ptr--)
|
||||||
|
.offset+*: ld hl,0
|
||||||
|
add hl,de ; HL = write_ptr + offset
|
||||||
|
pop bc ; BC = length
|
||||||
|
lddr
|
||||||
|
ENDIF
|
||||||
|
exx
|
||||||
|
ld d,b ; prev_was_match = true
|
||||||
|
djnz .decompress_data ; adjust context_index back to 0..255 range, go to main loop
|
||||||
|
|
||||||
|
/*
|
||||||
|
int upkr_decode_bit(int context_index) {
|
||||||
|
while(upkr_state < 32768) {
|
||||||
|
if(upkr_bits_left == 0) {
|
||||||
|
upkr_current_byte = *upkr_data_ptr++;
|
||||||
|
upkr_bits_left = 8;
|
||||||
|
}
|
||||||
|
upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
|
||||||
|
upkr_current_byte <<= 1;
|
||||||
|
--upkr_bits_left;
|
||||||
|
}
|
||||||
|
|
||||||
|
int prob = upkr_probs[context_index];
|
||||||
|
int bit = (upkr_state & 255) >= prob ? 1 : 0;
|
||||||
|
|
||||||
|
int prob_offset = 16;
|
||||||
|
int state_offset = 0;
|
||||||
|
int state_scale = prob;
|
||||||
|
if(bit) {
|
||||||
|
state_offset = -prob;
|
||||||
|
state_scale = 256 - prob;
|
||||||
|
prob_offset = 0;
|
||||||
|
}
|
||||||
|
upkr_state = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255);
|
||||||
|
upkr_probs[context_index] = prob_offset + prob - ((prob + 8) >> 4);
|
||||||
|
|
||||||
|
return bit;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
inc_c_decode_bit:
|
||||||
|
; ++low(context_index) before decode_bit (to get -1B by two calls in decode_number)
|
||||||
|
inc c
|
||||||
|
decode_bit:
|
||||||
|
; HL = upkr_state
|
||||||
|
; IX = upkr_data_ptr
|
||||||
|
; BC = probs+context_index
|
||||||
|
; A' = upkr_current_byte (!!! init to 0x80 at start, not 0x00)
|
||||||
|
; preserves DE
|
||||||
|
; ** while (state < 32768) - initial check
|
||||||
|
push de
|
||||||
|
bit 7,h
|
||||||
|
jr nz,.state_b15_set
|
||||||
|
exa
|
||||||
|
; ** while body
|
||||||
|
.state_b15_zero:
|
||||||
|
; HL = upkr_state
|
||||||
|
; IX = upkr_data_ptr
|
||||||
|
; A = upkr_current_byte (init to 0x80 at start, not 0x00)
|
||||||
|
add a,a ; upkr_current_byte <<= 1; // and testing if(upkr_bits_left == 0)
|
||||||
|
jr nz,.has_bit ; CF=data, ZF=0 -> some bits + stop bit still available
|
||||||
|
; CF=1 (by stop bit)
|
||||||
|
ld a,(ix)
|
||||||
|
IFNDEF BACKWARDS_UNPACK : inc ix : ELSE : dec ix : ENDIF ; upkr_current_byte = *upkr_data_ptr++;
|
||||||
|
adc a,a ; CF=data, b0=1 as new stop bit
|
||||||
|
.has_bit:
|
||||||
|
adc hl,hl ; upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
|
||||||
|
jp p,.state_b15_zero ; while (state < 32768)
|
||||||
|
exa
|
||||||
|
; ** set "bit"
|
||||||
|
.state_b15_set:
|
||||||
|
ld a,(bc) ; A = upkr_probs[context_index]
|
||||||
|
dec a ; prob is in ~7..249 range, never zero, safe to -1
|
||||||
|
cp l ; CF = bit = prob-1 < (upkr_state & 255) <=> prob <= (upkr_state & 255)
|
||||||
|
inc a
|
||||||
|
; ** adjust state
|
||||||
|
push bc
|
||||||
|
ld c,l ; C = (upkr_state & 255); (preserving the value)
|
||||||
|
push af
|
||||||
|
jr nc,.bit_is_0
|
||||||
|
neg ; A = -prob == (256-prob), CF=1 preserved
|
||||||
|
.bit_is_0:
|
||||||
|
ld d,0
|
||||||
|
ld e,a ; DE = state_scale ; prob || (256-prob)
|
||||||
|
ld l,d ; H:L = (upkr_state>>8) : 0
|
||||||
|
|
||||||
|
IFNDEF UPKR_UNPACK_SPEED
|
||||||
|
|
||||||
|
;; looped MUL for minimum unpack size
|
||||||
|
ld b,8 ; counter
|
||||||
|
.mulLoop:
|
||||||
|
add hl,hl
|
||||||
|
jr nc,.mul0
|
||||||
|
add hl,de
|
||||||
|
.mul0:
|
||||||
|
djnz .mulLoop ; until HL = state_scale * (upkr_state>>8), also BC becomes (upkr_state & 255)
|
||||||
|
|
||||||
|
ELSE
|
||||||
|
|
||||||
|
;;; unrolled MUL for better performance, +25 bytes unpack size
|
||||||
|
ld b,d
|
||||||
|
DUP 8
|
||||||
|
add hl,hl
|
||||||
|
jr nc,0_f
|
||||||
|
add hl,de
|
||||||
|
0:
|
||||||
|
EDUP
|
||||||
|
|
||||||
|
ENDIF
|
||||||
|
|
||||||
|
add hl,bc ; HL = state_scale * (upkr_state >> 8) + (upkr_state & 255)
|
||||||
|
pop af ; restore prob and CF=bit
|
||||||
|
jr nc,.bit_is_0_2
|
||||||
|
dec d ; DE = -prob (also D = bit ? $FF : $00)
|
||||||
|
add hl,de ; HL += -prob
|
||||||
|
; ^ this always preserves CF=1, because (state>>8) >= 128, state_scale: 7..250, prob: 7..250,
|
||||||
|
; so 7*128 > 250 and thus edge case `ADD hl=(7*128+0),de=(-250)` => CF=1
|
||||||
|
.bit_is_0_2:
|
||||||
|
; *** adjust probs[context_index]
|
||||||
|
rra ; + (bit<<4) ; part of -prob_offset, needs another -16
|
||||||
|
and $FC ; clear/keep correct bits to get desired (prob>>4) + extras, CF=0
|
||||||
|
rra
|
||||||
|
rra
|
||||||
|
rra ; A = (bit<<4) + (prob>>4), CF=(prob & 8)
|
||||||
|
adc a,-16 ; A = (bit<<4) - 16 + ((prob + 8)>>4) ; -prob_offset = (bit<<4) - 16
|
||||||
|
ld e,a
|
||||||
|
pop bc
|
||||||
|
ld a,(bc) ; A = prob (cheaper + shorter to re-read again from memory)
|
||||||
|
sub e ; A = 16 - (bit<<4) + prob - ((prob + 8)>>4) ; = prob_offset + prob - ((prob + 8)>>4)
|
||||||
|
ld (bc),a ; probs[context_index] = prob_offset + prob - ((prob + 8) >> 4);
|
||||||
|
add a,d ; restore CF = bit (D = bit ? $FF : $00 && A > 0)
|
||||||
|
pop de
|
||||||
|
ret
|
||||||
|
|
||||||
|
/*
|
||||||
|
int upkr_decode_length(int context_index) {
|
||||||
|
int length = 0;
|
||||||
|
int bit_pos = 0;
|
||||||
|
while(upkr_decode_bit(context_index)) {
|
||||||
|
length |= upkr_decode_bit(context_index + 1) << bit_pos++;
|
||||||
|
context_index += 2;
|
||||||
|
}
|
||||||
|
return length | (1 << bit_pos);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
decode_number:
|
||||||
|
; HL = upkr_state
|
||||||
|
; IX = upkr_data_ptr
|
||||||
|
; BC = probs+context_index-1
|
||||||
|
; A' = upkr_current_byte (!!! init to 0x80 at start, not 0x00)
|
||||||
|
; return length in DE, CF=0
|
||||||
|
ld de,$FFFF ; length = 0 with positional-stop-bit
|
||||||
|
or a ; CF=0 to skip getting data bit and use only `rr d : rr e` to fix init DE
|
||||||
|
.loop:
|
||||||
|
call c,inc_c_decode_bit ; get data bit, context_index + 1 / if CF=0 just add stop bit into DE init
|
||||||
|
rr d
|
||||||
|
rr e ; DE = length = (length >> 1) | (bit << 15);
|
||||||
|
call inc_c_decode_bit ; context_index += 2
|
||||||
|
jr c,.loop
|
||||||
|
.fix_bit_pos:
|
||||||
|
ccf ; NC will become this final `| (1 << bit_pos)` bit
|
||||||
|
rr d
|
||||||
|
rr e
|
||||||
|
jr c,.fix_bit_pos ; until stop bit is reached (all bits did land to correct position)
|
||||||
|
ret ; return with CF=0 (important for unpack routine)
|
||||||
|
|
||||||
|
DISPLAY "upkr.unpack total size: ",/D,$-unpack
|
||||||
|
|
||||||
|
; reserve space for probs array without emitting any machine code (using only EQU)
|
||||||
|
|
||||||
|
IFDEF UPKR_PROBS_ORIGIN ; if specific address is defined by user, move probs array there
|
||||||
|
probs: EQU ((UPKR_PROBS_ORIGIN) + 255) & -$100 ; probs array aligned to 256
|
||||||
|
ELSE
|
||||||
|
probs: EQU ($ + 255) & -$100 ; probs array aligned to 256
|
||||||
|
ENDIF
|
||||||
|
.real_c: EQU 1 + 255 + 1 + 2*NUMBER_BITS ; real size of probs array
|
||||||
|
.c: EQU (.real_c + 1) & -2 ; padding to even size (required by init code)
|
||||||
|
.e: EQU probs + .c
|
||||||
|
|
||||||
|
DISPLAY "upkr.unpack probs array placed at: ",/A,probs,",\tsize: ",/A,probs.c
|
||||||
|
|
||||||
|
/*
|
||||||
|
archived: negligibly faster but +6B longer decode_number variant using HL' and BC' to
|
||||||
|
do `number|=(1<<bit_pos);` type of logic in single loop.
|
||||||
|
*/
|
||||||
|
; decode_number:
|
||||||
|
; exx
|
||||||
|
; ld bc,1
|
||||||
|
; ld l,b
|
||||||
|
; ld h,b ; HL = 0
|
||||||
|
; .loop
|
||||||
|
; exx
|
||||||
|
; inc c
|
||||||
|
; call decode_bit
|
||||||
|
; jr nc,.done
|
||||||
|
; inc c
|
||||||
|
; call decode_bit
|
||||||
|
; exx
|
||||||
|
; jr nc,.b0
|
||||||
|
; add hl,bc
|
||||||
|
; .b0:
|
||||||
|
; sla c
|
||||||
|
; rl b
|
||||||
|
; jr .loop
|
||||||
|
; .done:
|
||||||
|
; exx
|
||||||
|
; add hl,bc
|
||||||
|
; push hl
|
||||||
|
; exx
|
||||||
|
; pop de
|
||||||
|
; ret
|
||||||
|
|
||||||
|
/*
|
||||||
|
archived: possible LUT variant of updating probs value, requires 512-aligned 512B table (not tested)
|
||||||
|
*/
|
||||||
|
; code is replacing decode_bit from "; *** adjust probs[context_index]", followed by `ld (bc),a : add a,d ...`
|
||||||
|
; ld c,a
|
||||||
|
; ld a,high(probs_update_table)/2 ; must be 512 aligned
|
||||||
|
; rla
|
||||||
|
; ld b,a
|
||||||
|
; ld a,(bc)
|
||||||
|
; pop bc
|
||||||
|
; -------------------------------------------
|
||||||
|
; probs_update_table: EQU probs-512
|
||||||
|
; -------------------------------------------
|
||||||
|
; table generator is not obvious and probably not short either, 20+ bytes almost for sure, maybe even 30-40
|
||||||
|
|
||||||
|
ENDMODULE
|
||||||
|
OPT pop
|
||||||
Reference in New Issue
Block a user