37 Commits
fuzz ... z80

Author SHA1 Message Date
48727040b3 Merge pull request #5 from ped7g/z80_ped7g
Z80 ped7g - further optimisations
2022-09-27 22:35:44 +02:00
Peter Helcmanovsky (Ped)
8a32e1384c z80_unpacker: readme.txt and comment update 2022-09-19 15:19:39 +02:00
Peter Helcmanovsky (Ped)
9913dcf4bb z80_unpacker: comment with possible LUT variant of updating probs value
missing 512 byte table generator, which doesn't look trivial to do
(especially in terms of code size).
Not tested, but looks as decent speed up.
2022-09-19 14:31:00 +02:00
Peter Helcmanovsky (Ped)
a8fd3dc573 z80_unpacker: optimisation: -1B in decode_number (fwd 170B / rev 167B)
slightly slower code, ROM unpack is back to ~22.6s
2022-09-19 13:20:44 +02:00
Peter Helcmanovsky (Ped)
e1f9fa143a z80_unpacker: comment with caller size optimisation tip 2022-09-19 11:58:32 +02:00
Peter Helcmanovsky (Ped)
db1c7d2d14 z80_unpacker: optimisation: -1B in decode_number (fwd 171B / rev 168B) 2022-09-19 11:49:53 +02:00
Peter Helcmanovsky (Ped)
c1ffd0e7ed z80_unpacker: attempt for faster decode_number (+6B, ~1% faster) => not good
archived in comments for future reference
2022-09-19 11:42:56 +02:00
Peter Helcmanovsky (Ped)
00d084105a z80_unpacker: optimisation: -2B in backward unpack (fwd 172B / rev 169B)
backward was already -1B, so now the total difference is -3B.
2022-09-19 01:31:22 +02:00
Peter Helcmanovsky (Ped)
8e5298caee z80_unpacker: optimisation: -1B in decode_number = 172B (but +4T per length) 2022-09-19 01:09:21 +02:00
Peter Helcmanovsky (Ped)
1fb29f3a1b z80_unpacker: optimisation: -1B and -1T in decode_bit = 173B 2022-09-18 23:44:18 +02:00
c8924456aa -r reverses both input and output 2022-09-18 23:38:41 +02:00
7b0e22f459 Merge pull request #3 from ped7g/z80_ped7g
backward unpacker + example extended
2022-09-18 23:24:28 +02:00
Peter Helcmanovsky (Ped)
165f593a11 z80_unpacker: (codestyle) whitespace + temporary label rename 2022-09-18 23:04:37 +02:00
Peter Helcmanovsky (Ped)
d4bce4bf7c z80_unpacker: optimisation: -3B and ~-10T in decode_bit = 174B
unpack zx48.rom is now ~22.6s (from 23.0s)
(performance version is now 199 bytes, zx48.rom unpack 19.4s -> 19.0s)
2022-09-18 22:54:10 +02:00
Peter Helcmanovsky (Ped)
b13fa05413 z80_unpacker: add backward variant of unpacker + example extended 2022-09-18 00:23:14 +02:00
Peter Helcmanovsky (Ped)
3c773aca8d z80_unpacker: add performance variant of depacker 2022-09-16 03:38:03 +02:00
a5406deb30 Merge pull request #2 from ped7g/z80_ped7g
Z80 ped7g - few more optimisations for current variant of packer
2022-09-16 00:26:55 +02:00
Peter Helcmanovsky (Ped)
9211544cb9 z80_unpacker: add resulting snapshot file to example 2022-09-15 18:37:06 +02:00
Peter Helcmanovsky (Ped)
3fa9e0fa12 z80_unpacker: optimisations: 0B, -13T in decode_bit (stays 177B) 2022-09-15 18:22:33 +02:00
Peter Helcmanovsky (Ped)
aa3fad4d80 z80_unpacker: optimisations: -3B and ~-24T in decode_bit = 177B 2022-09-15 18:22:32 +02:00
Peter Helcmanovsky (Ped)
6624940ed9 z80_unpacker: optimisations: -2B and -27T in decode_bit = 180B 2022-09-15 18:22:32 +02:00
Peter Helcmanovsky (Ped)
c3a9773e5c z80_unpacker: optimisations: -1B in unpack implementation = 182B 2022-09-15 18:22:31 +02:00
Peter Helcmanovsky (Ped)
a75a35efb2 z80_unpacker: probs context-size for offset/length numbers as EQU 2022-09-15 18:22:27 +02:00
540a91d1ba forgot to add back -l 9 2022-09-15 00:18:30 +02:00
e7aaf1491a add old-prob-update to compare script, add reverse option 2022-09-14 23:51:38 +02:00
a1dabaf7f9 add simple script to compare compression of variants 2022-09-14 23:41:14 +02:00
75e375fb1f Merge branch 'ped7g-z80_ped7g' into z80 2022-09-14 09:03:28 +02:00
Peter Helcmanovsky (Ped)
c7ea11bce3 z80_unpacker: optimisations: -2B in unpack implementation = 183B 2022-09-14 01:44:04 +02:00
Peter Helcmanovsky (Ped)
02d20867ee z80_unpacker: optimisations: -2B in unpack implementation = 185B 2022-09-14 01:01:56 +02:00
Peter Helcmanovsky (Ped)
511ddefc08 z80_unpacker: optimisations: -4T per offset/length bit decoded
making the 256-alignment of probs array even more baked-in, but there
was no real chance to get rid of that any way
2022-09-14 00:01:51 +02:00
Peter Helcmanovsky (Ped)
d30baaa91f z80_unpacker: optimisations: -1B by keeping write_ptr in DE' 2022-09-13 23:57:59 +02:00
Peter Helcmanovsky (Ped)
919a892ef0 z80_unpacker: optimisations: -1B by decode_length returning CF=0 2022-09-13 23:25:03 +02:00
Peter Helcmanovsky (Ped)
ea5c0b1b15 z80_unpacker: optimisations: shorter >>4 in probs update 2022-09-13 23:15:18 +02:00
Peter Helcmanovsky (Ped)
a19ec2abb7 z80_unpacker: optimisations: remove .offset init
first offset is mandatory in packed data
2022-09-13 22:53:15 +02:00
Peter Helcmanovsky (Ped)
7b051113e1 z80_unpacker: initial working version with screen-slideshow example 2022-09-13 22:12:03 +02:00
f1f1c64a76 implement simplified prob update, update unpack.c 2022-09-10 12:01:42 +02:00
36cb6d77b5 BE bitstream, flip bit encoding 2022-09-10 11:31:09 +02:00
34 changed files with 658 additions and 442 deletions

View File

@@ -1 +0,0 @@
/build/

View File

@@ -1,30 +0,0 @@
build/unpack_riscv64: ../c_unpacker/main.c ../c_unpacker/unpack.c
mkdir -p build
riscv64-linux-gnu-gcc -g -static -o $@ $^
test_riscv64: build/unpack_riscv64
qemu-riscv64 $< test_data.upk /tmp/out.bin
cmp test_data.bin /tmp/out.bin
build/unpack_armv6m: ../c_unpacker/main.c unpack_armv6m.S
mkdir -p build
arm-linux-gnueabihf-gcc -g -static -o $@ $^
test_armv6m: build/unpack_armv6m
qemu-arm $< test_data.upk /tmp/out.bin
cmp test_data.bin /tmp/out.bin
build/unpack_armv6m.bin: unpack_armv6m.S
arm-none-eabi-gcc -march=armv6-m -c -o build/unpack_armv6m.o $?
arm-none-eabi-objcopy -O binary --only-section=.text build/unpack_armv6m.o $@
build/unpack_c: ../c_unpacker/main.c ../c_unpacker/unpack.c
mkdir -p build
gcc -g -o $@ $^
test_c: build/unpack_c
$< test_data.upk /tmp/out.bin
cmp test_data.bin /tmp/out.bin
sizes: build/unpack_armv6m.bin
ls -l build/*.bin

View File

@@ -1,99 +0,0 @@
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned long u32;
u8* upkr_data_ptr;
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
#ifdef UPKR_BITSTREAM
u16 upkr_state;
u8 upkr_current_byte;
int upkr_bits_left;
#else
u32 upkr_state;
#endif
int upkr_decode_bit(int context_index) {
#ifdef UPKR_BITSTREAM
while(upkr_state < 32768) {
if(upkr_bits_left == 0) {
upkr_current_byte = *upkr_data_ptr++;
upkr_bits_left = 8;
}
upkr_state = (upkr_state << 1) + (upkr_current_byte & 1);
upkr_current_byte >>= 1;
--upkr_bits_left;
}
#else
while(upkr_state < 4096) {
upkr_state = (upkr_state << 8) | *upkr_data_ptr++;
}
#endif
int prob = upkr_probs[context_index];
int bit = (upkr_state & 255) < prob ? 1 : 0;
int tmp = prob;
if(!bit) {
tmp = 256 - tmp;
}
upkr_state = tmp * (upkr_state >> 8) + (upkr_state & 255);
tmp += (256 - tmp + 8) >> 4;
if(!bit) {
upkr_state -= prob;
tmp = 256 - tmp;
}
upkr_probs[context_index] = tmp;
return bit;
}
int upkr_decode_length(int context_index) {
int length = 0;
int bit_pos = 0;
while(upkr_decode_bit(context_index)) {
length |= upkr_decode_bit(context_index + 1) << bit_pos++;
context_index += 2;
}
return length | (1 << bit_pos);
}
void* upkr_unpack(void* destination, void* compressed_data) {
upkr_data_ptr = (u8*)compressed_data;
upkr_state = 0;
#ifdef UPKR_BITSTREAM
upkr_bits_left = 0;
#endif
for(int i = 0; i < sizeof(upkr_probs); ++i)
upkr_probs[i] = 128;
u8* write_ptr = (u8*)destination;
int prev_was_match = 0;
int offset = 0;
for(;;) {
if(upkr_decode_bit(0)) {
if(prev_was_match || upkr_decode_bit(256)) {
offset = upkr_decode_length(257) - 1;
if(offset == 0) {
break;
}
}
int length = upkr_decode_length(257 + 64);
while(length--) {
*write_ptr = write_ptr[-offset];
++write_ptr;
}
prev_was_match = 1;
} else {
int byte = 1;
while(byte < 256) {
int bit = upkr_decode_bit(byte);
byte = (byte << 1) + bit;
}
*write_ptr++ = byte;
prev_was_match = 0;
}
}
return write_ptr;
}

Binary file not shown.

View File

@@ -1,162 +0,0 @@
// armv6-m upkr unpacker by yrlf
// some optimizations by exoticorn
.syntax unified
.thumb
.section .text
#define ALIGNUP(n, align) (((n) + (align) - 1) & ~((align) - 1))
#define PROB_LEN (1 + 255 + 1 + 2*32 + 2*32)
#define FRAME_SIZE ALIGNUP(PROB_LEN, 4)
// auto upkr_unpack(uint8_t * out, uint8_t * in) -> tuple<uint8_t *, uint8_t *>
.global upkr_unpack
.type upkr_unpack, %function
// r0 .. out_ptr (returned)
// r1 .. in_ptr (returned)
// r2 .. state
// r3 .. offset
// r4 .. prev_was_literal / decode_length ret
// r5 .. subroutine arg (preserved)
// r6 .. decode_bit ret
// r7 .. probs ptr
upkr_unpack:
push { r4, r5, r6, r7, lr }
sub sp, sp, #FRAME_SIZE
mov r7, sp
movs r2, #255
adds r2, r2, #(PROB_LEN - 255)
movs r3, #128
.Lclear:
subs r2, r2, #1
strb r3, [r7, r2]
bne .Lclear
.Lloop:
movs r5, #0
bl upkr_decode_bit
beq .Ldata
.Lmatch:
// r6 = 1
lsls r5, r6, #8
cmp r4, #0
beq 1f
bl upkr_decode_bit
beq 2f
1:
bl upkr_decode_length
adds r3, r4, #1
beq .Lend
2:
adds r5, r5, #64
bl upkr_decode_length
.Lcopy_loop:
ldrb r5, [r0, r3]
.Lstore:
strb r5, [r0]
adds r0, r0, #1
adds r4, r4, #1
blt .Lcopy_loop
b .Lloop
.Ldata:
movs r5, #1
.Ldata_loop:
bl upkr_decode_bit
adcs r5, r5, r5
lsrs r4, r5, #8
beq .Ldata_loop
b .Lstore
.Lend:
add sp, sp, #FRAME_SIZE
pop { r4, r5, r6, r7, pc }
.type upkr_decode_length, %function
// r0 .. -length tmp (saved)
// r1 ..
// r2 ..
// r3 ..
// r4 .. -length (returned)
// r5 .. context index (saved)
// r6 .. (saved)
// r7 ..
upkr_decode_length:
push { r0, r5, r6, lr }
movs r0, #0
subs r4, r0, #1
.Lbit_loop:
adds r5, r5, #1
bl upkr_decode_bit
beq 1f
adds r5, r5, #1
bl upkr_decode_bit
beq 2f
adds r0, r0, r4
2:
lsls r4, r4, #1
b .Lbit_loop
1:
adds r4, r4, r0
pop { r0, r5, r6, pc }
.type upkr_decode_bit, %function
// r0 .. tmp / prob (saved)
// r1 .. in_ptr (modified)
// r2 .. state (modified)
// r3 .. scratch (saved)
// r4 ..
// r5 .. context index (preserved)
// r6 .. bit (returned)
// r7 .. probs ptr (preserved)
upkr_fill_state:
lsls r2, r2, #8
ldrb r6, [r1]
adds r1, r1, #1
orrs r2, r2, r6
upkr_decode_bit:
lsrs r6, r2, #12
beq upkr_fill_state
push { r0, r1, r3, lr }
ldrb r0, [r7, r5]
lsrs r3, r2, #8
uxtb r1, r2
subs r6, r1, r0
blt 1f
subs r1, r2, r0
rsbs r0, r0, #0
1:
muls r3, r3, r0
adds r2, r1, r3
rsbs r3, r0, #0
uxtb r3, r3
lsrs r3, r3, #4
adcs r0, r0, r3
cmp r6, #0
blt 1f
rsbs r0, r0, #0
1:
strb r0, [r7, r5]
lsrs r6, r6, #31
pop { r0, r1, r3, pc }

View File

@@ -1,33 +0,0 @@
int upkr_decode_bit(int context_index) {
#ifdef UPKR_BITSTREAM
while(upkr_state < 32768) {
if(upkr_bits_left == 0) {
upkr_current_byte = *upkr_data_ptr++;
upkr_bits_left = 8;
}
upkr_state = (upkr_state << 1) + (upkr_current_byte & 1);
upkr_current_byte >>= 1;
--upkr_bits_left;
}
#else
while(upkr_state < 4096) {
upkr_state = (upkr_state << 8) | *upkr_data_ptr++;
}
#endif
int prob = upkr_probs[context_index];
int bit = (upkr_state & 255) < prob ? 1 : 0;
if(bit) {
prob = 256 - prob;
}
upkr_state -= prob * ((upkr_state >> 8) + (bit ^ 1));
prob -= (prob + 8) >> 4;
if(bit) {
prob = -prob;
}
upkr_probs[context_index] = prob;
return bit;
}

View File

@@ -1,7 +1,7 @@
#include <stdio.h>
#include <stdlib.h>
void* upkr_unpack(void* destination, void* compressed_data);
int upkr_unpack(void* destination, void* compressed_data);
int main(int argn, char** argv) {
void* input_buffer = malloc(1024*1024);
@@ -13,8 +13,7 @@ int main(int argn, char** argv) {
printf("Compressed size: %d\n", in_size);
void* end_ptr = upkr_unpack(output_buffer, input_buffer);
int out_size = (char*)end_ptr - (char*)output_buffer;
int out_size = upkr_unpack(output_buffer, input_buffer);
printf("Uncompressed size: %d\n", out_size);

View File

@@ -19,8 +19,8 @@ int upkr_decode_bit(int context_index) {
upkr_current_byte = *upkr_data_ptr++;
upkr_bits_left = 8;
}
upkr_state = (upkr_state << 1) + (upkr_current_byte & 1);
upkr_current_byte >>= 1;
upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
upkr_current_byte <<= 1;
--upkr_bits_left;
}
#else
@@ -30,16 +30,18 @@ int upkr_decode_bit(int context_index) {
#endif
int prob = upkr_probs[context_index];
int bit = (upkr_state & 255) < prob ? 1 : 0;
int bit = (upkr_state & 255) >= prob ? 1 : 0;
int prob_offset = 16;
int state_offset = 0;
int state_scale = prob;
if(bit) {
upkr_state = prob * (upkr_state >> 8) + (upkr_state & 255);
prob += (256 - prob + 8) >> 4;
} else {
upkr_state = (256 - prob) * (upkr_state >> 8) + (upkr_state & 255) - prob;
prob -= (prob + 8) >> 4;
state_offset = -prob;
state_scale = 256 - prob;
prob_offset = 0;
}
upkr_probs[context_index] = prob;
upkr_state = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255);
upkr_probs[context_index] = prob_offset + prob - ((prob + 8) >> 4);
return bit;
}
@@ -54,7 +56,7 @@ int upkr_decode_length(int context_index) {
return length | (1 << bit_pos);
}
void* upkr_unpack(void* destination, void* compressed_data) {
int upkr_unpack(void* destination, void* compressed_data) {
upkr_data_ptr = (u8*)compressed_data;
upkr_state = 0;
#ifdef UPKR_BITSTREAM
@@ -92,5 +94,5 @@ void* upkr_unpack(void* destination, void* compressed_data) {
}
}
return write_ptr;
return write_ptr - (u8*)destination;
}

50
compare-variants Executable file
View File

@@ -0,0 +1,50 @@
#!/bin/env ruby
configs = [
[:master, '-b'],
[:z80, '-b'],
[:z80, ['-b', '-r']],
['old-prob-update', '-b']
]
files = Dir[ARGV[0] + '/*'].select {|f| !(f =~ /\.txt$/) }
short_names = files.map {|f| File.basename(f)[..16] }
results = []
def print_results(configs, names, results)
configs.each_with_index do |config, i|
printf "%d: %s\n", i + 1, config
end
print ' '
configs.each_index do |i|
printf " %-4d", i + 1
end
puts
names.each_with_index do |name, i|
printf "%16s", name
for res in results
res = res[i]
printf " %-4s", res if res
end
puts
end
end
for config in configs
raise unless system('git', 'checkout', config[0].to_s)
config_results = []
results << config_results
for file in files
if system(*['cargo', 'run', '--release', 'pack', '-l', '9', config[1], file, '/tmp/out.upk'].flatten) &&
system(*['cargo', 'run', '--release', 'unpack', config[1], '/tmp/out.upk', '/tmp/out.bin'].flatten) &&
File.read(file) == File.read('/tmp/out.bin')
size = File.size('/tmp/out.upk')
config_results << size
else
config_results << 'ERR'
end
print_results(configs, short_names, results)
end
end

View File

@@ -1,8 +1,8 @@
use crate::rans::{PROB_BITS, ONE_PROB};
use crate::rans::{ONE_PROB, PROB_BITS};
const INIT_PROB: u16 = 1 << (PROB_BITS - 1);
const UPDATE_RATE: u32 = 4;
const UPDATE_ADD: u32 = 8;
const UPDATE_RATE: i32 = 4;
const UPDATE_ADD: i32 = 8;
#[derive(Clone)]
pub struct ContextState {
@@ -33,10 +33,13 @@ impl<'a> Context<'a> {
pub fn update(&mut self, bit: bool) {
let old = self.state.contexts[self.index];
self.state.contexts[self.index] = if bit {
old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
let offset = if !bit {
ONE_PROB as i32 >> UPDATE_RATE
} else {
old - ((old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
0
};
self.state.contexts[self.index] =
(offset + old as i32 - ((old as i32 + UPDATE_ADD) >> UPDATE_RATE)) as u8;
}
}

View File

@@ -6,12 +6,11 @@ use crate::ProgressCallback;
pub fn pack(
data: &[u8],
use_bitstream: bool,
parity_contexts: usize,
mut progress_callback: Option<ProgressCallback>,
) -> Vec<u8> {
let mut match_finder = MatchFinder::new(data);
let mut rans_coder = RansCoder::new(use_bitstream);
let mut state = lz::CoderState::new(parity_contexts);
let mut state = lz::CoderState::new();
let mut pos = 0;
while pos < data.len() {

View File

@@ -13,19 +13,12 @@ pub fn pack(
data: &[u8],
level: u8,
use_bitstream: bool,
parity_contexts: usize,
progress_callback: Option<ProgressCallback>,
) -> Vec<u8> {
if level == 0 {
greedy_packer::pack(data, use_bitstream, parity_contexts, progress_callback)
greedy_packer::pack(data, use_bitstream, progress_callback)
} else {
parsing_packer::pack(
data,
level,
use_bitstream,
parity_contexts,
progress_callback,
)
parsing_packer::pack(data, level, use_bitstream, progress_callback)
}
}

View File

@@ -9,49 +9,41 @@ pub enum Op {
impl Op {
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState) {
let literal_base = state.pos % state.parity_contexts * 256;
match self {
&Op::Literal(lit) => {
encode_bit(coder, state, literal_base, false);
encode_bit(coder, state, 0, false);
let mut context_index = 1;
for i in (0..8).rev() {
let bit = (lit >> i) & 1 != 0;
encode_bit(coder, state, literal_base + context_index, bit);
encode_bit(coder, state, context_index, bit);
context_index = (context_index << 1) | bit as usize;
}
state.prev_was_match = false;
state.pos += 1;
}
&Op::Match { offset, len } => {
encode_bit(coder, state, literal_base, true);
encode_bit(coder, state, 0, true);
if !state.prev_was_match {
encode_bit(
coder,
state,
256 * state.parity_contexts,
offset != state.last_offset,
);
encode_bit(coder, state, 256, offset != state.last_offset);
} else {
assert!(offset != state.last_offset);
}
if offset != state.last_offset {
encode_length(coder, state, 256 * state.parity_contexts + 1, offset + 1);
encode_length(coder, state, 257, offset + 1);
state.last_offset = offset;
}
encode_length(coder, state, 256 * state.parity_contexts + 65, len);
encode_length(coder, state, 257 + 64, len);
state.prev_was_match = true;
state.pos += len as usize;
}
}
}
}
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState) {
encode_bit(coder, state, state.pos % state.parity_contexts * 256, true);
encode_bit(coder, state, 0, true);
if !state.prev_was_match {
encode_bit(coder, state, 256 * state.parity_contexts, true);
encode_bit(coder, state, 256, true);
}
encode_length(coder, state, 256 * state.parity_contexts + 1, 1);
encode_length(coder, state, 257, 1);
}
fn encode_bit(
@@ -84,20 +76,16 @@ fn encode_length(
#[derive(Clone)]
pub struct CoderState {
contexts: ContextState,
parity_contexts: usize,
last_offset: u32,
prev_was_match: bool,
pos: usize,
}
impl CoderState {
pub fn new(parity_contexts: usize) -> CoderState {
pub fn new() -> CoderState {
CoderState {
contexts: ContextState::new((1 + 255) * parity_contexts + 1 + 64 + 64),
contexts: ContextState::new(1 + 255 + 1 + 64 + 64),
last_offset: 0,
parity_contexts,
prev_was_match: false,
pos: 0,
}
}
@@ -106,9 +94,9 @@ impl CoderState {
}
}
pub fn unpack(packed_data: &[u8], use_bitstream: bool, parity_contexts: usize) -> Vec<u8> {
pub fn unpack(packed_data: &[u8], use_bitstream: bool) -> Vec<u8> {
let mut decoder = RansDecoder::new(packed_data, use_bitstream);
let mut contexts = ContextState::new((1 + 255) * parity_contexts + 1 + 64 + 64);
let mut contexts = ContextState::new(1 + 255 + 1 + 64 + 64);
let mut result = vec![];
let mut offset = 0;
let mut prev_was_match = false;
@@ -131,17 +119,14 @@ pub fn unpack(packed_data: &[u8], use_bitstream: bool, parity_contexts: usize) -
}
loop {
let literal_base = result.len() % parity_contexts * 256;
if decoder.decode_with_context(&mut contexts.context_mut(literal_base)) {
if prev_was_match
|| decoder.decode_with_context(&mut contexts.context_mut(256 * parity_contexts))
{
offset = decode_length(&mut decoder, &mut contexts, 256 * parity_contexts + 1) - 1;
if decoder.decode_with_context(&mut contexts.context_mut(0)) {
if prev_was_match || decoder.decode_with_context(&mut contexts.context_mut(256)) {
offset = decode_length(&mut decoder, &mut contexts, 257) - 1;
if offset == 0 {
break;
}
}
let length = decode_length(&mut decoder, &mut contexts, 256 * parity_contexts + 65);
let length = decode_length(&mut decoder, &mut contexts, 257 + 64);
for _ in 0..length {
result.push(result[result.len() - offset]);
}
@@ -150,8 +135,7 @@ pub fn unpack(packed_data: &[u8], use_bitstream: bool, parity_contexts: usize) -
let mut context_index = 1;
let mut byte = 0;
for i in (0..8).rev() {
let bit = decoder
.decode_with_context(&mut contexts.context_mut(literal_base + context_index));
let bit = decoder.decode_with_context(&mut contexts.context_mut(context_index));
context_index = (context_index << 1) | bit as usize;
byte |= (bit as u8) << i;
}

View File

@@ -1,6 +1,5 @@
use anyhow::{bail, Result};
use std::io::prelude::*;
use std::process;
use std::{fs::File, path::PathBuf};
fn main() -> Result<()> {
@@ -11,34 +10,33 @@ fn main() -> Result<()> {
Some("pack") => {
let level = args.opt_value_from_str(["-l", "--level"])?.unwrap_or(2u8);
let use_bitstream = args.contains(["-b", "--bitstream"]);
let parity_contexts = args
.opt_value_from_str(["-p", "--parity"])?
.unwrap_or(1usize);
if parity_contexts != 1 && parity_contexts != 2 && parity_contexts != 4 {
eprintln!("--parity has to be 1, 2 or 4");
process::exit(1);
}
let reverse = args.contains(["-r", "--reverse"]);
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?;
if reverse {
data.reverse();
}
let mut pb = pbr::ProgressBar::new(data.len() as u64);
pb.set_units(pbr::Units::Bytes);
let packed_data = upkr::pack(
let mut packed_data = upkr::pack(
&data,
level,
use_bitstream,
parity_contexts,
Some(&mut |pos| {
pb.set(pos as u64);
}),
);
pb.finish();
if reverse {
packed_data.reverse();
}
println!(
"Compressed {} bytes to {} bytes ({}%)",
data.len(),
@@ -49,22 +47,21 @@ fn main() -> Result<()> {
}
Some("unpack") => {
let use_bitstream = args.contains(["-b", "--bitstream"]);
let parity_contexts = args
.opt_value_from_str(["-p", "--parity"])?
.unwrap_or(1usize);
if parity_contexts != 1 && parity_contexts != 2 && parity_contexts != 4 {
eprintln!("--parity has to be 1, 2 or 4");
process::exit(1);
}
let reverse = args.contains(["-r", "--reverse"]);
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?;
let packed_data = upkr::unpack(&data, use_bitstream, parity_contexts);
File::create(outfile)?.write_all(&packed_data)?;
if reverse {
data.reverse();
}
let mut unpacked_data = upkr::unpack(&data, use_bitstream);
if reverse {
unpacked_data.reverse();
}
File::create(outfile)?.write_all(&unpacked_data)?;
}
Some(other) => {
bail!("Unknown subcommand '{}'", other);
@@ -76,11 +73,10 @@ fn main() -> Result<()> {
fn print_help() {
eprintln!("Usage:");
eprintln!(" upkr pack [-b] [-l level(0-9)] [-p N] <infile> <outfile>");
eprintln!(" upkr unpack [-b] [-p N] <infile> <outfile>");
eprintln!(" upkr pack [-b] [-l level(0-9)] <infile> <outfile>");
eprintln!(" upkr unpack [-b] <infile> <outfile>");
eprintln!();
eprintln!(" -b, --bitstream bitstream mode");
eprintln!(" -l, --level N compression level 0-9");
eprintln!(" -p, --parity N use N (2/4) parity contexts");
process::exit(1);
std::process::exit(1);
}

View File

@@ -6,25 +6,14 @@ use crate::match_finder::MatchFinder;
use crate::rans::{CostCounter, RansCoder};
use crate::{lz, ProgressCallback};
pub fn pack(
data: &[u8],
level: u8,
use_bitstream: bool,
parity_contexts: usize,
progress_cb: Option<ProgressCallback>,
) -> Vec<u8> {
let mut parse = parse(
data,
Config::from_level(level),
parity_contexts,
progress_cb,
);
pub fn pack(data: &[u8], level: u8, use_bitstream: bool, progress_cb: Option<ProgressCallback>) -> Vec<u8> {
let mut parse = parse(data, Config::from_level(level), progress_cb);
let mut ops = vec![];
while let Some(link) = parse {
ops.push(link.op);
parse = link.prev.clone();
}
let mut state = lz::CoderState::new(parity_contexts);
let mut state = lz::CoderState::new();
let mut coder = RansCoder::new(use_bitstream);
for op in ops.into_iter().rev() {
op.encode(&mut coder, &mut state);
@@ -49,7 +38,6 @@ type Arrivals = HashMap<usize, Vec<Arrival>>;
fn parse(
data: &[u8],
config: Config,
parity_contexts: usize,
mut progress_cb: Option<ProgressCallback>,
) -> Option<Rc<Parse>> {
let mut match_finder = MatchFinder::new(data)
@@ -141,7 +129,7 @@ fn parse(
0,
Arrival {
parse: None,
state: lz::CoderState::new(parity_contexts),
state: lz::CoderState::new(),
cost: 0.0,
},
max_arrivals,

View File

@@ -38,15 +38,15 @@ impl RansCoder {
let mut state = 1 << l_bits;
let mut byte = 0u8;
let mut bit = 8;
let mut bit = 0;
let mut flush_state: Box<dyn FnMut(&mut u32)> = if self.use_bitstream {
Box::new(|state: &mut u32| {
bit -= 1;
byte |= ((*state & 1) as u8) << bit;
if bit == 0 {
bit += 1;
if bit == 8 {
buffer.push(byte);
byte = 0;
bit = 8;
bit = 0;
}
*state >>= 1;
})
@@ -61,7 +61,7 @@ impl RansCoder {
let max_state_factor: u32 = 1 << (l_bits + num_flush_bits - PROB_BITS);
for step in self.bits.into_iter().rev() {
let prob = step as u32 & 32767;
let (start, prob) = if step & 32768 != 0 {
let (start, prob) = if step & 32768 == 0 {
(0, prob)
} else {
(prob, ONE_PROB - prob)
@@ -118,7 +118,7 @@ impl CostCounter {
impl EntropyCoder for CostCounter {
fn encode_bit(&mut self, bit: bool, prob: u16) {
let prob = if bit {
let prob = if !bit {
prob as u32
} else {
ONE_PROB - prob as u32
@@ -163,8 +163,8 @@ impl<'a> RansDecoder<'a> {
self.data = &self.data[1..];
self.bits_left = 8;
}
self.state = (self.state << 1) | (self.byte & 1) as u32;
self.byte >>= 1;
self.state = (self.state << 1) | (self.byte >> 7) as u32;
self.byte <<= 1;
self.bits_left -= 1;
}
} else {
@@ -174,12 +174,12 @@ impl<'a> RansDecoder<'a> {
}
}
let bit = (self.state & PROB_MASK) < prob;
let bit = (self.state & PROB_MASK) >= prob;
let (start, prob) = if bit {
(0, prob)
} else {
(prob, ONE_PROB - prob)
} else {
(0, prob)
};
self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start;

3
z80_unpacker/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
*.bin
*.tap
*.lst

11
z80_unpacker/Makefile Normal file
View File

@@ -0,0 +1,11 @@
all: unpack.bin example/example.sna
# binary is positioned from ORG 0, not usable, just assembling to verify the syntax
unpack.bin: unpack.asm
sjasmplus --msg=war --lst --lstlab=sort --raw=unpack.bin unpack.asm
example/example.sna: unpack.asm example/example.asm
cd example && sjasmplus --msg=war --lst --lstlab=sort example.asm
clean:
$(RM) unpack.bin unpack.lst example/example.sna example/example.lst

View File

@@ -0,0 +1,100 @@
;; Example using upkr depacker for screens slideshow
OPT --syntax=abf
DEVICE ZXSPECTRUM48,$8FFF
ORG $9000
;; forward example data
compressed_scr_files.fwd: ; border color byte + upkr-packed .scr file
DB 1
INCBIN "screens/Grongy - ZX Spectrum (2022).scr.upk"
DB 7
INCBIN "screens/Schafft - Poison (2017).scr.upk"
DB 0
INCBIN "screens/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk"
DB 6
INCBIN "screens/diver - Back to Bjork (2015).scr.upk"
.e:
;; backward example data (unpacker goes from the end of the data!)
compressed_scr_files.rwd.e: EQU $-1 ; the final IX will point one byte ahead of "$" here
INCBIN "screens.reversed/diver - Back to Bjork (2015).scr.upk"
DB 6
INCBIN "screens.reversed/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk"
DB 0
INCBIN "screens.reversed/Schafft - Poison (2017).scr.upk"
DB 7
INCBIN "screens.reversed/Grongy - ZX Spectrum (2022).scr.upk"
compressed_scr_files.rwd: ; border color byte + upkr-packed .scr file (backward)
DB 1
start:
di
; OPT --zxnext
; nextreg 7,3 ; ZX Next: switch to 28Mhz
;;; FORWARD packed/unpacked data demo
ld ix,compressed_scr_files.fwd
.slideshow_loop.fwd:
; set BORDER for next image
ld a,(ix)
inc ix
out (254),a
; call unpack of next image directly into VRAM
ld de,$4000 ; target VRAM
exx
; IX = packed data, DE' = destination ($4000)
; returned IX will point right after the packed data
call fwd.upkr.unpack
; do some busy loop with CPU to delay between images
call delay
; check if all images were displayed, loop around from first one then
ld a,ixl
cp low compressed_scr_files.fwd.e
jr nz,.slideshow_loop.fwd
;;; BACKWARD packed/unpacked data demo
ld ix,compressed_scr_files.rwd
.slideshow_loop.rwd:
; set BORDER for next image
ld a,(ix)
dec ix
out (254),a
; call unpack of next image directly into VRAM
ld de,$5AFF ; target VRAM
exx
; IX = packed data, DE' = destination
; returned IX will point right ahead of the packed data
call rwd.upkr.unpack
; do some busy loop with CPU to delay between images
call delay
; check if all images were displayed, loop around from first one then
ld a,ixl
cp low compressed_scr_files.rwd.e
jr nz,.slideshow_loop.rwd
jr start
delay:
ld bc,$AA00
.delay:
.8 ex (sp),ix
dec c
jr nz,.delay
djnz .delay
ret
; include the depacker library, optionally putting probs array buffer near end of RAM
DEFINE UPKR_PROBS_ORIGIN $FA00 ; if not defined, array will be put after unpack code
MODULE fwd
INCLUDE "../unpack.asm"
ENDMODULE
MODULE rwd
DEFINE BACKWARDS_UNPACK ; defined to build backwards unpack
; initial IX points at last byte of compressed data
; initial DE' points at last byte of unpacked data
INCLUDE "../unpack.asm"
ENDMODULE
SAVESNA "example.sna",start

Binary file not shown.

32
z80_unpacker/readme.txt Normal file
View File

@@ -0,0 +1,32 @@
Z80 asm implementation of C unpacker, code-size focused (not performance).
**ONLY BITSTREAM** variant is currently supported, make sure to use "-b" in packer.
The project is expected to further evolve, including possible changes to binary format, this is
initial version of Z80 unpacker to explore if/how it works and how it can be improved further.
(copy full packer+depacker source to your project if you plan to use it, as future revisions
may be incompatible with files you will produce with current version)
Asm syntax is z00m's sjasmplus: https://github.com/z00m128/sjasmplus
Backward direction unpacker added as compile-time option, see example for both forward/backward
depacker in action.
The packed/unpacked data-overlap has to be tested per-case, in worst case the packed data
may need even more than 7 bytes to unpack final byte, but usually 1-4 bytes may suffice.
TODO:
- build bigger corpus of test data to benchmark future changes in algorithm/format (example and zx48.rom was used to do initial tests)
- maybe try to beat double-loop `decode_number` with different encoding format
- (@ped7g) Z80N version of unpacker for ZX Next devs
- (@exoticorn) add Z80 specific packer (to avoid confusion with original MicroW8 variant), and land it all to master branch, maybe in "z80" directory or something? (and overall decide how to organise+merge this upstream into main repo)
- (@exoticorn) add to packer output with possible packed/unpacked region overlap
DONE:
* review non-bitstream variant, if it's feasible to try to implement it with Z80
- Ped7g: IMHO nope, the 12b x 8b MUL code would probably quickly cancel any gains from the simpler state update
* review first implementation to identify weak spots where the implementation can be shorter+faster
with acceptable small changes to the format
- Ped7g: the decode_bit settled down and now doesn't feel so confused and redundant, the code seems pretty on point to me, no obvious simplification from format change
- Ped7g: the decode_number double-loop is surprisingly resilient, especially in terms of code size I failed to beat it, speed wise only negligible gains

381
z80_unpacker/unpack.asm Normal file
View File

@@ -0,0 +1,381 @@
;; https://github.com/exoticorn/upkr/blob/z80/c_unpacker/unpack.c - original C implementation
;; C source in comments ahead of asm - the C macros are removed to keep only bitstream variant
;;
;; initial version by Peter "Ped" Helcmanovsky (C) 2022, licensed same as upkr project ("unlicensed")
;; to assemble use z00m's sjasmplus: https://github.com/z00m128/sjasmplus
;;
;; you can define UPKR_PROBS_ORIGIN to specific 256 byte aligned address for probs array (320 bytes),
;; otherwise it will be positioned after the unpacker code (256 aligned)
;;
;; public API:
;;
;; upkr.unpack
;; IN: IX = packed data, DE' (shadow DE) = destination
;; OUT: IX = after packed data
;; modifies: all registers except IY, requires 10 bytes of stack space
;;
; DEFINE BACKWARDS_UNPACK ; uncomment to build backwards depacker (write_ptr--, upkr_data_ptr--)
; initial IX points at last byte of compressed data
; initial DE' points at last byte of unpacked data
; DEFINE UPKR_UNPACK_SPEED ; uncomment to get larger but faster unpack routine
; code size hint: if you put probs array just ahead of BASIC entry point, you will get BC
; initialised to probs.e by BASIC `USR` command and you can remove it from unpack init (-3B)
OPT push reset --syntax=abf
MODULE upkr
NUMBER_BITS EQU 16+15 ; context-bits per offset/length (16+15 for 16bit offsets/pointers)
; numbers (offsets/lengths) are encoded like: 1a1b1c1d1e0 = 0000'0000'001e'dbca
/*
u8* upkr_data_ptr;
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
u16 upkr_state;
u8 upkr_current_byte;
int upkr_bits_left;
int upkr_unpack(void* destination, void* compressed_data) {
upkr_data_ptr = (u8*)compressed_data;
upkr_state = 0;
upkr_bits_left = 0;
for(int i = 0; i < sizeof(upkr_probs); ++i)
upkr_probs[i] = 128;
u8* write_ptr = (u8*)destination;
int prev_was_match = 0;
int offset = 0;
for(;;) {
if(upkr_decode_bit(0)) {
if(prev_was_match || upkr_decode_bit(256)) {
offset = upkr_decode_length(257) - 1;
if(offset == 0) {
break;
}
}
int length = upkr_decode_length(257 + 64);
while(length--) {
*write_ptr = write_ptr[-offset];
++write_ptr;
}
prev_was_match = 1;
} else {
int byte = 1;
while(byte < 256) {
int bit = upkr_decode_bit(byte);
byte = (byte << 1) + bit;
}
*write_ptr++ = byte;
prev_was_match = 0;
}
}
return write_ptr - (u8*)destination;
}
*/
; IN: IX = compressed_data, DE' = destination
unpack:
; ** reset probs to 0x80, also reset HL (state) to zero, and set BC to probs+context 0
ld hl,probs.c>>1
ld bc,probs.e
ld a,$80
.reset_probs:
dec bc
ld (bc),a ; will overwrite one extra byte after the array because of odd length
dec bc
ld (bc),a
dec l
jr nz,.reset_probs
exa
; BC = probs (context_index 0), state HL = 0, A' = 0x80 (no source bits left in upkr_current_byte)
; ** main loop to decompress data
; D = prev_was_match = uninitialised, literal is expected first => will reset D to "false"
; values for false/true of prev_was_match are: false = high(probs), true = 1 + high(probs)
.decompress_data:
ld c,0
call decode_bit ; if(upkr_decode_bit(0))
jr c,.copy_chunk
; * extract byte from compressed data (literal)
inc c ; C = byte = 1 (and also context_index)
.decode_byte:
call decode_bit ; bit = upkr_decode_bit(byte);
rl c ; byte = (byte << 1) + bit;
jr nc,.decode_byte ; while(byte < 256)
ld a,c
exx
ld (de),a ; *write_ptr++ = byte;
IFNDEF BACKWARDS_UNPACK : inc de : ELSE : dec de : ENDIF
exx
ld d,b ; prev_was_match = false
jr .decompress_data
; * copy chunk of already decompressed data (match)
.copy_chunk:
ld a,b
inc b ; context_index = 256
; if(prev_was_match || upkr_decode_bit(256)) {
; offset = upkr_decode_length(257) - 1;
; if (0 == offset) break;
; }
cp d ; CF = prev_was_match
call nc,decode_bit ; if not prev_was_match, then upkr_decode_bit(256)
jr nc,.keep_offset ; if neither, keep old offset
call decode_number ; context_index is already 257-1 as needed by decode_number
dec de ; offset = upkr_decode_length(257) - 1;
ld a,d
or e
ret z ; if(offset == 0) break
ld (.offset),de
.keep_offset:
; int length = upkr_decode_length(257 + 64);
; while(length--) {
; *write_ptr = write_ptr[-offset];
; ++write_ptr;
; }
; prev_was_match = 1;
ld c,low(257 + NUMBER_BITS - 1) ; context_index to second "number" set for lengths decoding
call decode_number ; length = upkr_decode_length(257 + 64);
push de
exx
IFNDEF BACKWARDS_UNPACK
; forward unpack (write_ptr++, upkr_data_ptr++)
ld h,d ; DE = write_ptr
ld l,e
.offset+*: ld bc,0
sbc hl,bc ; CF=0 from decode_number ; HL = write_ptr - offset
pop bc ; BC = length
ldir
ELSE
; backward unpack (write_ptr--, upkr_data_ptr--)
.offset+*: ld hl,0
add hl,de ; HL = write_ptr + offset
pop bc ; BC = length
lddr
ENDIF
exx
ld d,b ; prev_was_match = true
djnz .decompress_data ; adjust context_index back to 0..255 range, go to main loop
/*
int upkr_decode_bit(int context_index) {
while(upkr_state < 32768) {
if(upkr_bits_left == 0) {
upkr_current_byte = *upkr_data_ptr++;
upkr_bits_left = 8;
}
upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
upkr_current_byte <<= 1;
--upkr_bits_left;
}
int prob = upkr_probs[context_index];
int bit = (upkr_state & 255) >= prob ? 1 : 0;
int prob_offset = 16;
int state_offset = 0;
int state_scale = prob;
if(bit) {
state_offset = -prob;
state_scale = 256 - prob;
prob_offset = 0;
}
upkr_state = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255);
upkr_probs[context_index] = prob_offset + prob - ((prob + 8) >> 4);
return bit;
}
*/
inc_c_decode_bit:
; ++low(context_index) before decode_bit (to get -1B by two calls in decode_number)
inc c
decode_bit:
; HL = upkr_state
; IX = upkr_data_ptr
; BC = probs+context_index
; A' = upkr_current_byte (!!! init to 0x80 at start, not 0x00)
; preserves DE
; ** while (state < 32768) - initial check
push de
bit 7,h
jr nz,.state_b15_set
exa
; ** while body
.state_b15_zero:
; HL = upkr_state
; IX = upkr_data_ptr
; A = upkr_current_byte (init to 0x80 at start, not 0x00)
add a,a ; upkr_current_byte <<= 1; // and testing if(upkr_bits_left == 0)
jr nz,.has_bit ; CF=data, ZF=0 -> some bits + stop bit still available
; CF=1 (by stop bit)
ld a,(ix)
IFNDEF BACKWARDS_UNPACK : inc ix : ELSE : dec ix : ENDIF ; upkr_current_byte = *upkr_data_ptr++;
adc a,a ; CF=data, b0=1 as new stop bit
.has_bit:
adc hl,hl ; upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
jp p,.state_b15_zero ; while (state < 32768)
exa
; ** set "bit"
.state_b15_set:
ld a,(bc) ; A = upkr_probs[context_index]
dec a ; prob is in ~7..249 range, never zero, safe to -1
cp l ; CF = bit = prob-1 < (upkr_state & 255) <=> prob <= (upkr_state & 255)
inc a
; ** adjust state
push bc
ld c,l ; C = (upkr_state & 255); (preserving the value)
push af
jr nc,.bit_is_0
neg ; A = -prob == (256-prob), CF=1 preserved
.bit_is_0:
ld d,0
ld e,a ; DE = state_scale ; prob || (256-prob)
ld l,d ; H:L = (upkr_state>>8) : 0
IFNDEF UPKR_UNPACK_SPEED
;; looped MUL for minimum unpack size
ld b,8 ; counter
.mulLoop:
add hl,hl
jr nc,.mul0
add hl,de
.mul0:
djnz .mulLoop ; until HL = state_scale * (upkr_state>>8), also BC becomes (upkr_state & 255)
ELSE
;;; unrolled MUL for better performance, +25 bytes unpack size
ld b,d
DUP 8
add hl,hl
jr nc,0_f
add hl,de
0:
EDUP
ENDIF
add hl,bc ; HL = state_scale * (upkr_state >> 8) + (upkr_state & 255)
pop af ; restore prob and CF=bit
jr nc,.bit_is_0_2
dec d ; DE = -prob (also D = bit ? $FF : $00)
add hl,de ; HL += -prob
; ^ this always preserves CF=1, because (state>>8) >= 128, state_scale: 7..250, prob: 7..250,
; so 7*128 > 250 and thus edge case `ADD hl=(7*128+0),de=(-250)` => CF=1
.bit_is_0_2:
; *** adjust probs[context_index]
rra ; + (bit<<4) ; part of -prob_offset, needs another -16
and $FC ; clear/keep correct bits to get desired (prob>>4) + extras, CF=0
rra
rra
rra ; A = (bit<<4) + (prob>>4), CF=(prob & 8)
adc a,-16 ; A = (bit<<4) - 16 + ((prob + 8)>>4) ; -prob_offset = (bit<<4) - 16
ld e,a
pop bc
ld a,(bc) ; A = prob (cheaper + shorter to re-read again from memory)
sub e ; A = 16 - (bit<<4) + prob - ((prob + 8)>>4) ; = prob_offset + prob - ((prob + 8)>>4)
ld (bc),a ; probs[context_index] = prob_offset + prob - ((prob + 8) >> 4);
add a,d ; restore CF = bit (D = bit ? $FF : $00 && A > 0)
pop de
ret
/*
int upkr_decode_length(int context_index) {
int length = 0;
int bit_pos = 0;
while(upkr_decode_bit(context_index)) {
length |= upkr_decode_bit(context_index + 1) << bit_pos++;
context_index += 2;
}
return length | (1 << bit_pos);
}
*/
decode_number:
; HL = upkr_state
; IX = upkr_data_ptr
; BC = probs+context_index-1
; A' = upkr_current_byte (!!! init to 0x80 at start, not 0x00)
; return length in DE, CF=0
ld de,$FFFF ; length = 0 with positional-stop-bit
or a ; CF=0 to skip getting data bit and use only `rr d : rr e` to fix init DE
.loop:
call c,inc_c_decode_bit ; get data bit, context_index + 1 / if CF=0 just add stop bit into DE init
rr d
rr e ; DE = length = (length >> 1) | (bit << 15);
call inc_c_decode_bit ; context_index += 2
jr c,.loop
.fix_bit_pos:
ccf ; NC will become this final `| (1 << bit_pos)` bit
rr d
rr e
jr c,.fix_bit_pos ; until stop bit is reached (all bits did land to correct position)
ret ; return with CF=0 (important for unpack routine)
DISPLAY "upkr.unpack total size: ",/D,$-unpack
; reserve space for probs array without emitting any machine code (using only EQU)
IFDEF UPKR_PROBS_ORIGIN ; if specific address is defined by user, move probs array there
probs: EQU ((UPKR_PROBS_ORIGIN) + 255) & -$100 ; probs array aligned to 256
ELSE
probs: EQU ($ + 255) & -$100 ; probs array aligned to 256
ENDIF
.real_c: EQU 1 + 255 + 1 + 2*NUMBER_BITS ; real size of probs array
.c: EQU (.real_c + 1) & -2 ; padding to even size (required by init code)
.e: EQU probs + .c
DISPLAY "upkr.unpack probs array placed at: ",/A,probs,",\tsize: ",/A,probs.c
/*
archived: negligibly faster but +6B longer decode_number variant using HL' and BC' to
do `number|=(1<<bit_pos);` type of logic in single loop.
*/
; decode_number:
; exx
; ld bc,1
; ld l,b
; ld h,b ; HL = 0
; .loop
; exx
; inc c
; call decode_bit
; jr nc,.done
; inc c
; call decode_bit
; exx
; jr nc,.b0
; add hl,bc
; .b0:
; sla c
; rl b
; jr .loop
; .done:
; exx
; add hl,bc
; push hl
; exx
; pop de
; ret
/*
archived: possible LUT variant of updating probs value, requires 512-aligned 512B table (not tested)
*/
; code is replacing decode_bit from "; *** adjust probs[context_index]", followed by `ld (bc),a : add a,d ...`
; ld c,a
; ld a,high(probs_update_table)/2 ; must be 512 aligned
; rla
; ld b,a
; ld a,(bc)
; pop bc
; -------------------------------------------
; probs_update_table: EQU probs-512
; -------------------------------------------
; table generator is not obvious and probably not short either, 20+ bytes almost for sure, maybe even 30-40
ENDMODULE
OPT pop