27 Commits

Author SHA1 Message Date
c8924456aa -r reverses both input and output 2022-09-18 23:38:41 +02:00
7b0e22f459 Merge pull request #3 from ped7g/z80_ped7g
backward unpacker + example extended
2022-09-18 23:24:28 +02:00
Peter Helcmanovsky (Ped)
165f593a11 z80_unpacker: (codestyle) whitespace + temporary label rename 2022-09-18 23:04:37 +02:00
Peter Helcmanovsky (Ped)
d4bce4bf7c z80_unpacker: optimisation: -3B and ~-10T in decode_bit = 174B
unpack zx48.rom is now ~22.6s (from 23.0s)
(performance version is now 199 bytes, zx48.rom unpack 19.4s -> 19.0s)
2022-09-18 22:54:10 +02:00
Peter Helcmanovsky (Ped)
b13fa05413 z80_unpacker: add backward variant of unpacker + example extended 2022-09-18 00:23:14 +02:00
Peter Helcmanovsky (Ped)
3c773aca8d z80_unpacker: add performance variant of depacker 2022-09-16 03:38:03 +02:00
a5406deb30 Merge pull request #2 from ped7g/z80_ped7g
Z80 ped7g - few more optimisations for current variant of packer
2022-09-16 00:26:55 +02:00
Peter Helcmanovsky (Ped)
9211544cb9 z80_unpacker: add resulting snapshot file to example 2022-09-15 18:37:06 +02:00
Peter Helcmanovsky (Ped)
3fa9e0fa12 z80_unpacker: optimisations: 0B, -13T in decode_bit (stays 177B) 2022-09-15 18:22:33 +02:00
Peter Helcmanovsky (Ped)
aa3fad4d80 z80_unpacker: optimisations: -3B and ~-24T in decode_bit = 177B 2022-09-15 18:22:32 +02:00
Peter Helcmanovsky (Ped)
6624940ed9 z80_unpacker: optimisations: -2B and -27T in decode_bit = 180B 2022-09-15 18:22:32 +02:00
Peter Helcmanovsky (Ped)
c3a9773e5c z80_unpacker: optimisations: -1B in unpack implementation = 182B 2022-09-15 18:22:31 +02:00
Peter Helcmanovsky (Ped)
a75a35efb2 z80_unpacker: probs context-size for offset/length numbers as EQU 2022-09-15 18:22:27 +02:00
540a91d1ba forgot to add back -l 9 2022-09-15 00:18:30 +02:00
e7aaf1491a add old-prob-update to compare script, add reverse option 2022-09-14 23:51:38 +02:00
a1dabaf7f9 add simple script to compare compression of variants 2022-09-14 23:41:14 +02:00
75e375fb1f Merge branch 'ped7g-z80_ped7g' into z80 2022-09-14 09:03:28 +02:00
Peter Helcmanovsky (Ped)
c7ea11bce3 z80_unpacker: optimisations: -2B in unpack implementation = 183B 2022-09-14 01:44:04 +02:00
Peter Helcmanovsky (Ped)
02d20867ee z80_unpacker: optimisations: -2B in unpack implementation = 185B 2022-09-14 01:01:56 +02:00
Peter Helcmanovsky (Ped)
511ddefc08 z80_unpacker: optimisations: -4T per offset/length bit decoded
making the 256-alignment of probs array even more baked-in, but there
was no real chance to get rid of that any way
2022-09-14 00:01:51 +02:00
Peter Helcmanovsky (Ped)
d30baaa91f z80_unpacker: optimisations: -1B by keeping write_ptr in DE' 2022-09-13 23:57:59 +02:00
Peter Helcmanovsky (Ped)
919a892ef0 z80_unpacker: optimisations: -1B by decode_length returning CF=0 2022-09-13 23:25:03 +02:00
Peter Helcmanovsky (Ped)
ea5c0b1b15 z80_unpacker: optimisations: shorter >>4 in probs update 2022-09-13 23:15:18 +02:00
Peter Helcmanovsky (Ped)
a19ec2abb7 z80_unpacker: optimisations: remove .offset init
first offset is mandatory in packed data
2022-09-13 22:53:15 +02:00
Peter Helcmanovsky (Ped)
7b051113e1 z80_unpacker: initial working version with screen-slideshow example 2022-09-13 22:12:03 +02:00
f1f1c64a76 implement simplified prob update, update unpack.c 2022-09-10 12:01:42 +02:00
36cb6d77b5 BE bitstream, flip bit encoding 2022-09-10 11:31:09 +02:00
23 changed files with 561 additions and 33 deletions

View File

@@ -19,8 +19,8 @@ int upkr_decode_bit(int context_index) {
upkr_current_byte = *upkr_data_ptr++;
upkr_bits_left = 8;
}
upkr_state = (upkr_state << 1) + (upkr_current_byte & 1);
upkr_current_byte >>= 1;
upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
upkr_current_byte <<= 1;
--upkr_bits_left;
}
#else
@@ -30,19 +30,18 @@ int upkr_decode_bit(int context_index) {
#endif
int prob = upkr_probs[context_index];
int bit = (upkr_state & 255) < prob ? 1 : 0;
int bit = (upkr_state & 255) >= prob ? 1 : 0;
int tmp = prob;
if(!bit) {
tmp = 256 - tmp;
int prob_offset = 16;
int state_offset = 0;
int state_scale = prob;
if(bit) {
state_offset = -prob;
state_scale = 256 - prob;
prob_offset = 0;
}
upkr_state = tmp * (upkr_state >> 8) + (upkr_state & 255);
tmp += (256 - tmp + 8) >> 4;
if(!bit) {
upkr_state -= prob;
tmp = 256 - tmp;
}
upkr_probs[context_index] = tmp;
upkr_state = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255);
upkr_probs[context_index] = prob_offset + prob - ((prob + 8) >> 4);
return bit;
}

50
compare-variants Executable file
View File

@@ -0,0 +1,50 @@
#!/bin/env ruby
configs = [
[:master, '-b'],
[:z80, '-b'],
[:z80, ['-b', '-r']],
['old-prob-update', '-b']
]
files = Dir[ARGV[0] + '/*'].select {|f| !(f =~ /\.txt$/) }
short_names = files.map {|f| File.basename(f)[..16] }
results = []
def print_results(configs, names, results)
configs.each_with_index do |config, i|
printf "%d: %s\n", i + 1, config
end
print ' '
configs.each_index do |i|
printf " %-4d", i + 1
end
puts
names.each_with_index do |name, i|
printf "%16s", name
for res in results
res = res[i]
printf " %-4s", res if res
end
puts
end
end
for config in configs
raise unless system('git', 'checkout', config[0].to_s)
config_results = []
results << config_results
for file in files
if system(*['cargo', 'run', '--release', 'pack', '-l', '9', config[1], file, '/tmp/out.upk'].flatten) &&
system(*['cargo', 'run', '--release', 'unpack', config[1], '/tmp/out.upk', '/tmp/out.bin'].flatten) &&
File.read(file) == File.read('/tmp/out.bin')
size = File.size('/tmp/out.upk')
config_results << size
else
config_results << 'ERR'
end
print_results(configs, short_names, results)
end
end

View File

@@ -1,8 +1,8 @@
use crate::rans::{PROB_BITS, ONE_PROB};
use crate::rans::{ONE_PROB, PROB_BITS};
const INIT_PROB: u16 = 1 << (PROB_BITS - 1);
const UPDATE_RATE: u32 = 4;
const UPDATE_ADD: u32 = 8;
const UPDATE_RATE: i32 = 4;
const UPDATE_ADD: i32 = 8;
#[derive(Clone)]
pub struct ContextState {
@@ -33,10 +33,13 @@ impl<'a> Context<'a> {
pub fn update(&mut self, bit: bool) {
let old = self.state.contexts[self.index];
self.state.contexts[self.index] = if bit {
old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
let offset = if !bit {
ONE_PROB as i32 >> UPDATE_RATE
} else {
old - ((old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
0
};
self.state.contexts[self.index] =
(offset + old as i32 - ((old as i32 + UPDATE_ADD) >> UPDATE_RATE)) as u8;
}
}

View File

@@ -10,16 +10,20 @@ fn main() -> Result<()> {
Some("pack") => {
let level = args.opt_value_from_str(["-l", "--level"])?.unwrap_or(2u8);
let use_bitstream = args.contains(["-b", "--bitstream"]);
let reverse = args.contains(["-r", "--reverse"]);
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?;
if reverse {
data.reverse();
}
let mut pb = pbr::ProgressBar::new(data.len() as u64);
pb.set_units(pbr::Units::Bytes);
let packed_data = upkr::pack(
let mut packed_data = upkr::pack(
&data,
level,
use_bitstream,
@@ -29,6 +33,10 @@ fn main() -> Result<()> {
);
pb.finish();
if reverse {
packed_data.reverse();
}
println!(
"Compressed {} bytes to {} bytes ({}%)",
data.len(),
@@ -39,14 +47,21 @@ fn main() -> Result<()> {
}
Some("unpack") => {
let use_bitstream = args.contains(["-b", "--bitstream"]);
let reverse = args.contains(["-r", "--reverse"]);
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?;
let packed_data = upkr::unpack(&data, use_bitstream);
File::create(outfile)?.write_all(&packed_data)?;
if reverse {
data.reverse();
}
let mut unpacked_data = upkr::unpack(&data, use_bitstream);
if reverse {
unpacked_data.reverse();
}
File::create(outfile)?.write_all(&unpacked_data)?;
}
Some(other) => {
bail!("Unknown subcommand '{}'", other);

View File

@@ -38,15 +38,15 @@ impl RansCoder {
let mut state = 1 << l_bits;
let mut byte = 0u8;
let mut bit = 8;
let mut bit = 0;
let mut flush_state: Box<dyn FnMut(&mut u32)> = if self.use_bitstream {
Box::new(|state: &mut u32| {
bit -= 1;
byte |= ((*state & 1) as u8) << bit;
if bit == 0 {
bit += 1;
if bit == 8 {
buffer.push(byte);
byte = 0;
bit = 8;
bit = 0;
}
*state >>= 1;
})
@@ -61,7 +61,7 @@ impl RansCoder {
let max_state_factor: u32 = 1 << (l_bits + num_flush_bits - PROB_BITS);
for step in self.bits.into_iter().rev() {
let prob = step as u32 & 32767;
let (start, prob) = if step & 32768 != 0 {
let (start, prob) = if step & 32768 == 0 {
(0, prob)
} else {
(prob, ONE_PROB - prob)
@@ -118,7 +118,7 @@ impl CostCounter {
impl EntropyCoder for CostCounter {
fn encode_bit(&mut self, bit: bool, prob: u16) {
let prob = if bit {
let prob = if !bit {
prob as u32
} else {
ONE_PROB - prob as u32
@@ -163,8 +163,8 @@ impl<'a> RansDecoder<'a> {
self.data = &self.data[1..];
self.bits_left = 8;
}
self.state = (self.state << 1) | (self.byte & 1) as u32;
self.byte >>= 1;
self.state = (self.state << 1) | (self.byte >> 7) as u32;
self.byte <<= 1;
self.bits_left -= 1;
}
} else {
@@ -174,12 +174,12 @@ impl<'a> RansDecoder<'a> {
}
}
let bit = (self.state & PROB_MASK) < prob;
let bit = (self.state & PROB_MASK) >= prob;
let (start, prob) = if bit {
(0, prob)
} else {
(prob, ONE_PROB - prob)
} else {
(0, prob)
};
self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start;

3
z80_unpacker/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
*.bin
*.tap
*.lst

11
z80_unpacker/Makefile Normal file
View File

@@ -0,0 +1,11 @@
all: unpack.bin example/example.sna
# binary is positioned from ORG 0, not usable, just assembling to verify the syntax
unpack.bin: unpack.asm
sjasmplus --msg=war --lst --lstlab=sort --raw=unpack.bin unpack.asm
example/example.sna: unpack.asm example/example.asm
cd example && sjasmplus --msg=war --lst --lstlab=sort example.asm
clean:
$(RM) unpack.bin unpack.lst example/example.sna example/example.lst

View File

@@ -0,0 +1,100 @@
;; Example using upkr depacker for screens slideshow
OPT --syntax=abf
DEVICE ZXSPECTRUM48,$8FFF
ORG $9000
;; forward example data
compressed_scr_files.fwd: ; border color byte + upkr-packed .scr file
DB 1
INCBIN "screens/Grongy - ZX Spectrum (2022).scr.upk"
DB 7
INCBIN "screens/Schafft - Poison (2017).scr.upk"
DB 0
INCBIN "screens/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk"
DB 6
INCBIN "screens/diver - Back to Bjork (2015).scr.upk"
.e:
;; backward example data (unpacker goes from the end of the data!)
compressed_scr_files.rwd.e: EQU $-1 ; the final IX will point one byte ahead of "$" here
INCBIN "screens.reversed/diver - Back to Bjork (2015).scr.upk"
DB 6
INCBIN "screens.reversed/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk"
DB 0
INCBIN "screens.reversed/Schafft - Poison (2017).scr.upk"
DB 7
INCBIN "screens.reversed/Grongy - ZX Spectrum (2022).scr.upk"
compressed_scr_files.rwd: ; border color byte + upkr-packed .scr file (backward)
DB 1
start:
di
; OPT --zxnext
; nextreg 7,3 ; ZX Next: switch to 28Mhz
;;; FORWARD packed/unpacked data demo
ld ix,compressed_scr_files.fwd
.slideshow_loop.fwd:
; set BORDER for next image
ld a,(ix)
inc ix
out (254),a
; call unpack of next image directly into VRAM
ld de,$4000 ; target VRAM
exx
; IX = packed data, DE' = destination ($4000)
; returned IX will point right after the packed data
call fwd.upkr.unpack
; do some busy loop with CPU to delay between images
call delay
; check if all images were displayed, loop around from first one then
ld a,ixl
cp low compressed_scr_files.fwd.e
jr nz,.slideshow_loop.fwd
;;; BACKWARD packed/unpacked data demo
ld ix,compressed_scr_files.rwd
.slideshow_loop.rwd:
; set BORDER for next image
ld a,(ix)
dec ix
out (254),a
; call unpack of next image directly into VRAM
ld de,$5AFF ; target VRAM
exx
; IX = packed data, DE' = destination
; returned IX will point right ahead of the packed data
call rwd.upkr.unpack
; do some busy loop with CPU to delay between images
call delay
; check if all images were displayed, loop around from first one then
ld a,ixl
cp low compressed_scr_files.rwd.e
jr nz,.slideshow_loop.rwd
jr start
delay:
ld bc,$AA00
.delay:
.8 ex (sp),ix
dec c
jr nz,.delay
djnz .delay
ret
; include the depacker library, optionally putting probs array buffer near end of RAM
DEFINE UPKR_PROBS_ORIGIN $FA00 ; if not defined, array will be put after unpack code
MODULE fwd
INCLUDE "../unpack.asm"
ENDMODULE
MODULE rwd
DEFINE BACKWARDS_UNPACK ; defined to build backwards unpack
; initial IX points at last byte of compressed data
; initial DE' points at last byte of unpacked data
INCLUDE "../unpack.asm"
ENDMODULE
SAVESNA "example.sna",start

Binary file not shown.

19
z80_unpacker/readme.txt Normal file
View File

@@ -0,0 +1,19 @@
Z80 asm implementation of C unpacker, code-size focused (not performance).
**ONLY BITSTREAM** variant is currently supported, make sure to use "-b" in packer.
The project is expected to further evolve, including possible changes to binary format, this is
initial version of Z80 unpacker to explore if/how it works and how it can be improved further.
(copy full packer+depacker source to your project if you plan to use it, as future revisions
may be incompatible with files you will produce with current version)
Asm syntax is z00m's sjasmplus: https://github.com/z00m128/sjasmplus
TODO:
- build base corpus of test data to benchmark future changes in algorithm/format
- review first implementation to identify weak spots where the implementation can be shorter+faster
with acceptable small changes to the format
- review non-bitstream variant, if it's feasible to try to implement it with Z80
- (@ped7g) Z80N version of unpacker for ZX Next devs
- (@exoticorn) add Z80 specific packer (to avoid confusion with original MicroW8 variant), and land it all to master branch, maybe in "z80" directory or something? (and overall decide how to organise+merge this upstream into main repo)

328
z80_unpacker/unpack.asm Normal file
View File

@@ -0,0 +1,328 @@
;; https://github.com/exoticorn/upkr/blob/z80/c_unpacker/unpack.c - original C implementation
;; C source in comments ahead of asm - the C macros are removed to keep only bitstream variant
;;
;; initial version by Peter "Ped" Helcmanovsky (C) 2022, licensed same as upkr project ("unlicensed")
;; to assemble use z00m's sjasmplus: https://github.com/z00m128/sjasmplus
;;
;; you can define UPKR_PROBS_ORIGIN to specific 256 byte aligned address for probs array (386 bytes),
;; otherwise it will be positioned after the unpacker code (256 aligned)
;;
;; public API:
;;
;; upkr.unpack
;; IN: IX = packed data, DE' (shadow DE) = destination
;; OUT: IX = after packed data
;; modifies: all registers except IY, requires 10 bytes of stack space
;;
; DEFINE BACKWARDS_UNPACK ; uncomment to build backwards depacker
; initial IX points at last byte of compressed data
; initial DE' points at last byte of unpacked data
; DEFINE UPKR_UNPACK_SPEED ; uncomment to get larger but faster unpack routine
OPT push reset --syntax=abf
MODULE upkr
NUMBER_BITS EQU 16+15 ; context-bits per offset/length (16+15 for 16bit offsets/pointers)
; numbers (offsets/lengths) are encoded like: 1a1b1c1d1e0 = 0000'0000'001e'dbca
/*
u8* upkr_data_ptr;
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
u16 upkr_state;
u8 upkr_current_byte;
int upkr_bits_left;
int upkr_unpack(void* destination, void* compressed_data) {
upkr_data_ptr = (u8*)compressed_data;
upkr_state = 0;
upkr_bits_left = 0;
for(int i = 0; i < sizeof(upkr_probs); ++i)
upkr_probs[i] = 128;
u8* write_ptr = (u8*)destination;
int prev_was_match = 0;
int offset = 0;
for(;;) {
if(upkr_decode_bit(0)) {
if(prev_was_match || upkr_decode_bit(256)) {
offset = upkr_decode_length(257) - 1;
if(offset == 0) {
break;
}
}
int length = upkr_decode_length(257 + 64);
while(length--) {
*write_ptr = write_ptr[-offset];
++write_ptr;
}
prev_was_match = 1;
} else {
int byte = 1;
while(byte < 256) {
int bit = upkr_decode_bit(byte);
byte = (byte << 1) + bit;
}
*write_ptr++ = byte;
prev_was_match = 0;
}
}
return write_ptr - (u8*)destination;
}
*/
; IN: IX = compressed_data, DE' = destination
unpack:
; ** reset probs to 0x80, also reset HL (state) to zero, and set BC to probs+context 0
ld hl,probs.c>>1
ld bc,probs.e
ld a,$80
.reset_probs:
dec bc
ld (bc),a ; will overwrite one extra byte after the array because of odd length
dec bc
ld (bc),a
dec l
jr nz,.reset_probs
exa
; BC = probs (context_index 0), state HL = 0, A' = 0x80 (no source bits left in upkr_current_byte)
; ** main loop to decompress data
; D = prev_was_match = uninitialised, literal is expected first => will reset D to "false"
; values for false/true of prev_was_match are: false = high(probs), true = 1 + high(probs)
.decompress_data:
ld c,0
call decode_bit ; if(upkr_decode_bit(0))
jr c,.copy_chunk
; * extract byte from compressed data (literal)
inc c ; C = byte = 1 (and also context_index)
.decode_byte:
call decode_bit ; bit = upkr_decode_bit(byte);
rl c ; byte = (byte << 1) + bit;
jr nc,.decode_byte ; while(byte < 256)
ld a,c
exx
ld (de),a ; *write_ptr++ = byte;
IFNDEF BACKWARDS_UNPACK : inc de : ELSE : dec de : ENDIF
exx
ld d,b ; prev_was_match = false
jr .decompress_data
; * copy chunk of already decompressed data (match)
.copy_chunk:
ld a,b
inc b ; context_index = 256
; if(prev_was_match || upkr_decode_bit(256)) {
; offset = upkr_decode_length(257) - 1;
; if (0 == offset) break;
; }
cp d ; CF = prev_was_match
call nc,decode_bit ; if not prev_was_match, then upkr_decode_bit(256)
jr nc,.keep_offset ; if neither, keep old offset
inc c ; context_index to first "number" set for offsets decoding (257)
call decode_number
dec de ; offset = upkr_decode_length(257) - 1;
ld a,d
or e
ret z ; if(offset == 0) break
ld (.offset),de
.keep_offset:
; int length = upkr_decode_length(257 + 64);
; while(length--) {
; *write_ptr = write_ptr[-offset];
; ++write_ptr;
; }
; prev_was_match = 1;
ld c,low(257 + NUMBER_BITS) ; context_index to second "number" set for lengths decoding
call decode_number ; length = upkr_decode_length(257 + 64);
push de
exx
ld h,d ; DE = write_ptr
ld l,e
.offset+*: ld bc,0
IFNDEF BACKWARDS_UNPACK
sbc hl,bc ; CF=0 from decode_number ; HL = write_ptr - offset
ELSE
add hl,bc ; HL = write_ptr + offset
ENDIF
pop bc ; BC = length
IFNDEF BACKWARDS_UNPACK : ldir : ELSE : lddr : ENDIF
exx
ld d,b ; prev_was_match = true
djnz .decompress_data ; adjust context_index back to 0..255 range, go to main loop
/*
int upkr_decode_bit(int context_index) {
while(upkr_state < 32768) {
if(upkr_bits_left == 0) {
upkr_current_byte = *upkr_data_ptr++;
upkr_bits_left = 8;
}
upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
upkr_current_byte <<= 1;
--upkr_bits_left;
}
int prob = upkr_probs[context_index];
int bit = (upkr_state & 255) >= prob ? 1 : 0;
int prob_offset = 16;
int state_offset = 0;
int state_scale = prob;
if(bit) {
state_offset = -prob;
state_scale = 256 - prob;
prob_offset = 0;
}
upkr_state = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255);
upkr_probs[context_index] = prob_offset + prob - ((prob + 8) >> 4);
return bit;
}
*/
decode_bit:
; HL = upkr_state
; IX = upkr_data_ptr
; BC = probs+context_index
; A' = upkr_current_byte (!!! init to 0x80 at start, not 0x00)
; preserves DE
; ** while (state < 32768) - initial check
push de
bit 7,h
jr nz,.state_b15_set
exa
; ** while body
.state_b15_zero:
; HL = upkr_state
; IX = upkr_data_ptr
; A = upkr_current_byte (init to 0x80 at start, not 0x00)
add a,a ; upkr_current_byte <<= 1; // and testing if(upkr_bits_left == 0)
jr nz,.has_bit ; CF=data, ZF=0 -> some bits + stop bit still available
; CF=1 (by stop bit)
ld a,(ix)
IFNDEF BACKWARDS_UNPACK : inc ix : ELSE : dec ix : ENDIF ; upkr_current_byte = *upkr_data_ptr++;
adc a,a ; CF=data, b0=1 as new stop bit
.has_bit:
adc hl,hl ; upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
jp p,.state_b15_zero ; while (state < 32768)
exa
; ** set "bit"
.state_b15_set:
ld a,(bc) ; A = upkr_probs[context_index]
dec a ; prob is in ~7..249 range, never zero, safe to -1
cp l ; CF = bit = prob-1 < (upkr_state & 255) <=> prob <= (upkr_state & 255)
inc a
; ** adjust state
push bc
ld c,l ; C = (upkr_state & 255); (preserving the value)
push af
jr nc,.bit_is_0
neg ; A = -prob == (256-prob), CF=1 preserved
.bit_is_0:
ld d,0
ld e,a ; DE = state_scale ; prob || (256-prob)
ld l,d ; H:L = (upkr_state>>8) : 0
IFNDEF UPKR_UNPACK_SPEED
;; looped MUL for minimum unpack size
ld b,8 ; counter
.mulLoop:
add hl,hl
jr nc,.mul0
add hl,de
.mul0:
djnz .mulLoop ; until HL = state_scale * (upkr_state>>8), also BC becomes (upkr_state & 255)
ELSE
;;; unrolled MUL for better performance, +25 bytes unpack size
ld b,d
DUP 8
add hl,hl
jr nc,0_f
add hl,de
0:
EDUP
ENDIF
add hl,bc ; HL = state_scale * (upkr_state >> 8) + (upkr_state & 255)
pop af ; restore prob and CF=bit
jr nc,.bit_is_0_2
dec d ; DE = -prob (also D = bit ? $FF : $00)
add hl,de ; HL += -prob
; ^ this always preserves CF=1, because (state>>8) >= 128, state_scale: 7..250, prob: 7..250,
; so 7*128 > 250 and thus edge case `ADD hl=(7*128+0),de=(-250)` => CF=1
.bit_is_0_2:
; *** adjust probs[context_index]
ld e,a ; preserve prob
rra ; + (bit<<4) ; part of -prob_offset, needs another -16
and $FC ; clear/keep correct bits to get desired (prob>>4) + extras, CF=0
rra
rra
rra ; A = (bit<<4) + (prob>>4), CF=(prob & 8)
adc a,-16 ; A = (bit<<4) - 16 + ((prob + 8)>>4) ; -prob_offset = (bit<<4) - 16
sub e ; A = (bit<<4) - 16 + ((prob + 8)>>4) - prob ; = ((prob + 8)>>4) - prob_offset - prob
neg ; A = prob_offset + prob - ((prob + 8)>>4)
pop bc
ld (bc),a ; probs[context_index] = prob_offset + prob - ((prob + 8) >> 4);
add a,d ; restore CF = bit (D = bit ? $FF : $00 && A > 0)
pop de
ret
/*
int upkr_decode_length(int context_index) {
int length = 0;
int bit_pos = 0;
while(upkr_decode_bit(context_index)) {
length |= upkr_decode_bit(context_index + 1) << bit_pos++;
context_index += 2;
}
return length | (1 << bit_pos);
}
*/
decode_number:
; HL = upkr_state
; IX = upkr_data_ptr
; BC = probs+context_index
; A' = upkr_current_byte (!!! init to 0x80 at start, not 0x00)
; return length in DE, CF=0
ld de,$7FFF ; length = 0 with positional-stop-bit
jr .loop_entry
.loop:
inc c ; context_index + 1
call decode_bit
rr d
rr e ; DE = length = (length >> 1) | (bit << 15);
inc c ; context_index += 2
.loop_entry:
call decode_bit
jr c,.loop
.fix_bit_pos:
ccf ; NC will become this final `| (1 << bit_pos)` bit
rr d
rr e
jr c,.fix_bit_pos ; until stop bit is reached (all bits did land to correct position)
ret ; return with CF=0 (important for unpack routine)
DISPLAY "upkr.unpack total size: ",/D,$-unpack
; reserve space for probs array without emitting any machine code (using only EQU)
IFDEF UPKR_PROBS_ORIGIN ; if specific address is defined by user, move probs array there
probs: EQU ((UPKR_PROBS_ORIGIN) + 255) & -$100 ; probs array aligned to 256
ELSE
probs: EQU ($ + 255) & -$100 ; probs array aligned to 256
ENDIF
.real_c: EQU 1 + 255 + 1 + 2*NUMBER_BITS ; real size of probs array
.c: EQU (.real_c + 1) & -2 ; padding to even size (required by init code)
.e: EQU probs + .c
DISPLAY "upkr.unpack probs array placed at: ",/A,probs,",\tsize: ",/A,probs.c
ENDMODULE
OPT pop