mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 11:36:42 +01:00
Compare commits
25 Commits
old-prob-u
...
z80
| Author | SHA1 | Date | |
|---|---|---|---|
| 48727040b3 | |||
|
|
8a32e1384c | ||
|
|
9913dcf4bb | ||
|
|
a8fd3dc573 | ||
|
|
e1f9fa143a | ||
|
|
db1c7d2d14 | ||
|
|
c1ffd0e7ed | ||
|
|
00d084105a | ||
|
|
8e5298caee | ||
|
|
1fb29f3a1b | ||
| c8924456aa | |||
| 7b0e22f459 | |||
|
|
165f593a11 | ||
|
|
d4bce4bf7c | ||
|
|
b13fa05413 | ||
|
|
3c773aca8d | ||
| a5406deb30 | |||
|
|
9211544cb9 | ||
|
|
3fa9e0fa12 | ||
|
|
aa3fad4d80 | ||
|
|
6624940ed9 | ||
|
|
c3a9773e5c | ||
|
|
a75a35efb2 | ||
| 540a91d1ba | |||
| e7aaf1491a |
@@ -2,7 +2,9 @@
|
|||||||
|
|
||||||
configs = [
|
configs = [
|
||||||
[:master, '-b'],
|
[:master, '-b'],
|
||||||
[:z80, '-b']
|
[:z80, '-b'],
|
||||||
|
[:z80, ['-b', '-r']],
|
||||||
|
['old-prob-update', '-b']
|
||||||
]
|
]
|
||||||
|
|
||||||
files = Dir[ARGV[0] + '/*'].select {|f| !(f =~ /\.txt$/) }
|
files = Dir[ARGV[0] + '/*'].select {|f| !(f =~ /\.txt$/) }
|
||||||
@@ -34,8 +36,8 @@ for config in configs
|
|||||||
config_results = []
|
config_results = []
|
||||||
results << config_results
|
results << config_results
|
||||||
for file in files
|
for file in files
|
||||||
if system('cargo', 'run', '--release', 'pack', '-l', '9', config[1], file, '/tmp/out.upk') &&
|
if system(*['cargo', 'run', '--release', 'pack', '-l', '9', config[1], file, '/tmp/out.upk'].flatten) &&
|
||||||
system('cargo', 'run', '--release', 'unpack', config[1], '/tmp/out.upk', '/tmp/out.bin') &&
|
system(*['cargo', 'run', '--release', 'unpack', config[1], '/tmp/out.upk', '/tmp/out.bin'].flatten) &&
|
||||||
File.read(file) == File.read('/tmp/out.bin')
|
File.read(file) == File.read('/tmp/out.bin')
|
||||||
size = File.size('/tmp/out.upk')
|
size = File.size('/tmp/out.upk')
|
||||||
config_results << size
|
config_results << size
|
||||||
|
|||||||
21
src/main.rs
21
src/main.rs
@@ -10,16 +10,20 @@ fn main() -> Result<()> {
|
|||||||
Some("pack") => {
|
Some("pack") => {
|
||||||
let level = args.opt_value_from_str(["-l", "--level"])?.unwrap_or(2u8);
|
let level = args.opt_value_from_str(["-l", "--level"])?.unwrap_or(2u8);
|
||||||
let use_bitstream = args.contains(["-b", "--bitstream"]);
|
let use_bitstream = args.contains(["-b", "--bitstream"]);
|
||||||
|
let reverse = args.contains(["-r", "--reverse"]);
|
||||||
|
|
||||||
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||||
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||||
|
|
||||||
let mut data = vec![];
|
let mut data = vec![];
|
||||||
File::open(infile)?.read_to_end(&mut data)?;
|
File::open(infile)?.read_to_end(&mut data)?;
|
||||||
|
if reverse {
|
||||||
|
data.reverse();
|
||||||
|
}
|
||||||
|
|
||||||
let mut pb = pbr::ProgressBar::new(data.len() as u64);
|
let mut pb = pbr::ProgressBar::new(data.len() as u64);
|
||||||
pb.set_units(pbr::Units::Bytes);
|
pb.set_units(pbr::Units::Bytes);
|
||||||
let packed_data = upkr::pack(
|
let mut packed_data = upkr::pack(
|
||||||
&data,
|
&data,
|
||||||
level,
|
level,
|
||||||
use_bitstream,
|
use_bitstream,
|
||||||
@@ -29,6 +33,10 @@ fn main() -> Result<()> {
|
|||||||
);
|
);
|
||||||
pb.finish();
|
pb.finish();
|
||||||
|
|
||||||
|
if reverse {
|
||||||
|
packed_data.reverse();
|
||||||
|
}
|
||||||
|
|
||||||
println!(
|
println!(
|
||||||
"Compressed {} bytes to {} bytes ({}%)",
|
"Compressed {} bytes to {} bytes ({}%)",
|
||||||
data.len(),
|
data.len(),
|
||||||
@@ -39,14 +47,21 @@ fn main() -> Result<()> {
|
|||||||
}
|
}
|
||||||
Some("unpack") => {
|
Some("unpack") => {
|
||||||
let use_bitstream = args.contains(["-b", "--bitstream"]);
|
let use_bitstream = args.contains(["-b", "--bitstream"]);
|
||||||
|
let reverse = args.contains(["-r", "--reverse"]);
|
||||||
|
|
||||||
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||||
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||||
|
|
||||||
let mut data = vec![];
|
let mut data = vec![];
|
||||||
File::open(infile)?.read_to_end(&mut data)?;
|
File::open(infile)?.read_to_end(&mut data)?;
|
||||||
let packed_data = upkr::unpack(&data, use_bitstream);
|
if reverse {
|
||||||
File::create(outfile)?.write_all(&packed_data)?;
|
data.reverse();
|
||||||
|
}
|
||||||
|
let mut unpacked_data = upkr::unpack(&data, use_bitstream);
|
||||||
|
if reverse {
|
||||||
|
unpacked_data.reverse();
|
||||||
|
}
|
||||||
|
File::create(outfile)?.write_all(&unpacked_data)?;
|
||||||
}
|
}
|
||||||
Some(other) => {
|
Some(other) => {
|
||||||
bail!("Unknown subcommand '{}'", other);
|
bail!("Unknown subcommand '{}'", other);
|
||||||
|
|||||||
1
z80_unpacker/.gitignore
vendored
1
z80_unpacker/.gitignore
vendored
@@ -1,4 +1,3 @@
|
|||||||
*.bin
|
*.bin
|
||||||
*.tap
|
*.tap
|
||||||
*.sna
|
|
||||||
*.lst
|
*.lst
|
||||||
|
|||||||
@@ -3,7 +3,8 @@
|
|||||||
DEVICE ZXSPECTRUM48,$8FFF
|
DEVICE ZXSPECTRUM48,$8FFF
|
||||||
|
|
||||||
ORG $9000
|
ORG $9000
|
||||||
compressed_scr_files: ; border color byte + upkr-packed .scr file
|
;; forward example data
|
||||||
|
compressed_scr_files.fwd: ; border color byte + upkr-packed .scr file
|
||||||
DB 1
|
DB 1
|
||||||
INCBIN "screens/Grongy - ZX Spectrum (2022).scr.upk"
|
INCBIN "screens/Grongy - ZX Spectrum (2022).scr.upk"
|
||||||
DB 7
|
DB 7
|
||||||
@@ -13,37 +14,87 @@ compressed_scr_files: ; border color byte + upkr-packed .scr file
|
|||||||
DB 6
|
DB 6
|
||||||
INCBIN "screens/diver - Back to Bjork (2015).scr.upk"
|
INCBIN "screens/diver - Back to Bjork (2015).scr.upk"
|
||||||
.e:
|
.e:
|
||||||
|
;; backward example data (unpacker goes from the end of the data!)
|
||||||
|
compressed_scr_files.rwd.e: EQU $-1 ; the final IX will point one byte ahead of "$" here
|
||||||
|
INCBIN "screens.reversed/diver - Back to Bjork (2015).scr.upk"
|
||||||
|
DB 6
|
||||||
|
INCBIN "screens.reversed/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk"
|
||||||
|
DB 0
|
||||||
|
INCBIN "screens.reversed/Schafft - Poison (2017).scr.upk"
|
||||||
|
DB 7
|
||||||
|
INCBIN "screens.reversed/Grongy - ZX Spectrum (2022).scr.upk"
|
||||||
|
compressed_scr_files.rwd: ; border color byte + upkr-packed .scr file (backward)
|
||||||
|
DB 1
|
||||||
|
|
||||||
start:
|
start:
|
||||||
di
|
di
|
||||||
; OPT --zxnext
|
; OPT --zxnext
|
||||||
; nextreg 7,3 ; ZX Next: switch to 28Mhz
|
; nextreg 7,3 ; ZX Next: switch to 28Mhz
|
||||||
ld ix,compressed_scr_files
|
|
||||||
.slideshow_loop
|
;;; FORWARD packed/unpacked data demo
|
||||||
|
ld ix,compressed_scr_files.fwd
|
||||||
|
.slideshow_loop.fwd:
|
||||||
; set BORDER for next image
|
; set BORDER for next image
|
||||||
ldi a,(ix) ; fake: ld a,(ix) : inc ix
|
ld a,(ix)
|
||||||
|
inc ix
|
||||||
out (254),a
|
out (254),a
|
||||||
; call unpack of next image directly into VRAM
|
; call unpack of next image directly into VRAM
|
||||||
ld de,$4000 ; target VRAM
|
ld de,$4000 ; target VRAM
|
||||||
exx
|
exx
|
||||||
; IX = packed data, DE' = destination ($4000)
|
; IX = packed data, DE' = destination ($4000)
|
||||||
; returned IX will point right after the packed data
|
; returned IX will point right after the packed data
|
||||||
call upkr.unpack
|
call fwd.upkr.unpack
|
||||||
; do some busy loop with CPU to delay between images
|
; do some busy loop with CPU to delay between images
|
||||||
|
call delay
|
||||||
|
; check if all images were displayed, loop around from first one then
|
||||||
|
ld a,ixl
|
||||||
|
cp low compressed_scr_files.fwd.e
|
||||||
|
jr nz,.slideshow_loop.fwd
|
||||||
|
|
||||||
|
;;; BACKWARD packed/unpacked data demo
|
||||||
|
ld ix,compressed_scr_files.rwd
|
||||||
|
.slideshow_loop.rwd:
|
||||||
|
; set BORDER for next image
|
||||||
|
ld a,(ix)
|
||||||
|
dec ix
|
||||||
|
out (254),a
|
||||||
|
; call unpack of next image directly into VRAM
|
||||||
|
ld de,$5AFF ; target VRAM
|
||||||
|
exx
|
||||||
|
; IX = packed data, DE' = destination
|
||||||
|
; returned IX will point right ahead of the packed data
|
||||||
|
call rwd.upkr.unpack
|
||||||
|
; do some busy loop with CPU to delay between images
|
||||||
|
call delay
|
||||||
|
; check if all images were displayed, loop around from first one then
|
||||||
|
ld a,ixl
|
||||||
|
cp low compressed_scr_files.rwd.e
|
||||||
|
jr nz,.slideshow_loop.rwd
|
||||||
|
|
||||||
|
jr start
|
||||||
|
|
||||||
|
delay:
|
||||||
ld bc,$AA00
|
ld bc,$AA00
|
||||||
.delay:
|
.delay:
|
||||||
.8 ex (sp),ix
|
.8 ex (sp),ix
|
||||||
dec c
|
dec c
|
||||||
jr nz,.delay
|
jr nz,.delay
|
||||||
djnz .delay
|
djnz .delay
|
||||||
; check if all images were displayed, loop around from first one then
|
ret
|
||||||
ld a,ixl
|
|
||||||
cp low compressed_scr_files.e
|
|
||||||
jr z,start
|
|
||||||
jr .slideshow_loop
|
|
||||||
|
|
||||||
; include the depacker library, optionally putting probs array buffer near end of RAM
|
; include the depacker library, optionally putting probs array buffer near end of RAM
|
||||||
DEFINE UPKR_PROBS_ORIGIN $FA00 ; if not defined, array will be put after unpack code
|
DEFINE UPKR_PROBS_ORIGIN $FA00 ; if not defined, array will be put after unpack code
|
||||||
|
|
||||||
|
MODULE fwd
|
||||||
INCLUDE "../unpack.asm"
|
INCLUDE "../unpack.asm"
|
||||||
|
ENDMODULE
|
||||||
|
|
||||||
|
MODULE rwd
|
||||||
|
DEFINE BACKWARDS_UNPACK ; defined to build backwards unpack
|
||||||
|
; initial IX points at last byte of compressed data
|
||||||
|
; initial DE' points at last byte of unpacked data
|
||||||
|
|
||||||
|
INCLUDE "../unpack.asm"
|
||||||
|
ENDMODULE
|
||||||
|
|
||||||
SAVESNA "example.sna",start
|
SAVESNA "example.sna",start
|
||||||
|
|||||||
BIN
z80_unpacker/example/example.sna
Normal file
BIN
z80_unpacker/example/example.sna
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -10,10 +10,23 @@ may be incompatible with files you will produce with current version)
|
|||||||
|
|
||||||
Asm syntax is z00m's sjasmplus: https://github.com/z00m128/sjasmplus
|
Asm syntax is z00m's sjasmplus: https://github.com/z00m128/sjasmplus
|
||||||
|
|
||||||
|
Backward direction unpacker added as compile-time option, see example for both forward/backward
|
||||||
|
depacker in action.
|
||||||
|
|
||||||
|
The packed/unpacked data-overlap has to be tested per-case, in worst case the packed data
|
||||||
|
may need even more than 7 bytes to unpack final byte, but usually 1-4 bytes may suffice.
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
- build base corpus of test data to benchmark future changes in algorithm/format
|
- build bigger corpus of test data to benchmark future changes in algorithm/format (example and zx48.rom was used to do initial tests)
|
||||||
- review first implementation to identify weak spots where the implementation can be shorter+faster
|
- maybe try to beat double-loop `decode_number` with different encoding format
|
||||||
with acceptable small changes to the format
|
|
||||||
- review non-bitstream variant, if it's feasible to try to implement it with Z80
|
|
||||||
- (@ped7g) Z80N version of unpacker for ZX Next devs
|
- (@ped7g) Z80N version of unpacker for ZX Next devs
|
||||||
- (@exoticorn) add Z80 specific packer (to avoid confusion with original MicroW8 variant), and land it all to master branch, maybe in "z80" directory or something? (and overall decide how to organise+merge this upstream into main repo)
|
- (@exoticorn) add Z80 specific packer (to avoid confusion with original MicroW8 variant), and land it all to master branch, maybe in "z80" directory or something? (and overall decide how to organise+merge this upstream into main repo)
|
||||||
|
- (@exoticorn) add to packer output with possible packed/unpacked region overlap
|
||||||
|
|
||||||
|
DONE:
|
||||||
|
* review non-bitstream variant, if it's feasible to try to implement it with Z80
|
||||||
|
- Ped7g: IMHO nope, the 12b x 8b MUL code would probably quickly cancel any gains from the simpler state update
|
||||||
|
* review first implementation to identify weak spots where the implementation can be shorter+faster
|
||||||
|
with acceptable small changes to the format
|
||||||
|
- Ped7g: the decode_bit settled down and now doesn't feel so confused and redundant, the code seems pretty on point to me, no obvious simplification from format change
|
||||||
|
- Ped7g: the decode_number double-loop is surprisingly resilient, especially in terms of code size I failed to beat it, speed wise only negligible gains
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
;; initial version by Peter "Ped" Helcmanovsky (C) 2022, licensed same as upkr project ("unlicensed")
|
;; initial version by Peter "Ped" Helcmanovsky (C) 2022, licensed same as upkr project ("unlicensed")
|
||||||
;; to assemble use z00m's sjasmplus: https://github.com/z00m128/sjasmplus
|
;; to assemble use z00m's sjasmplus: https://github.com/z00m128/sjasmplus
|
||||||
;;
|
;;
|
||||||
;; you can define UPKR_PROBS_ORIGIN to specific 256 byte aligned address for probs array (386 bytes),
|
;; you can define UPKR_PROBS_ORIGIN to specific 256 byte aligned address for probs array (320 bytes),
|
||||||
;; otherwise it will be positioned after the unpacker code (256 aligned)
|
;; otherwise it will be positioned after the unpacker code (256 aligned)
|
||||||
;;
|
;;
|
||||||
;; public API:
|
;; public API:
|
||||||
@@ -12,12 +12,24 @@
|
|||||||
;; upkr.unpack
|
;; upkr.unpack
|
||||||
;; IN: IX = packed data, DE' (shadow DE) = destination
|
;; IN: IX = packed data, DE' (shadow DE) = destination
|
||||||
;; OUT: IX = after packed data
|
;; OUT: IX = after packed data
|
||||||
;; modifies: all registers except IY, requires 14 bytes of stack space
|
;; modifies: all registers except IY, requires 10 bytes of stack space
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
; DEFINE BACKWARDS_UNPACK ; uncomment to build backwards depacker (write_ptr--, upkr_data_ptr--)
|
||||||
|
; initial IX points at last byte of compressed data
|
||||||
|
; initial DE' points at last byte of unpacked data
|
||||||
|
|
||||||
|
; DEFINE UPKR_UNPACK_SPEED ; uncomment to get larger but faster unpack routine
|
||||||
|
|
||||||
|
; code size hint: if you put probs array just ahead of BASIC entry point, you will get BC
|
||||||
|
; initialised to probs.e by BASIC `USR` command and you can remove it from unpack init (-3B)
|
||||||
|
|
||||||
OPT push reset --syntax=abf
|
OPT push reset --syntax=abf
|
||||||
MODULE upkr
|
MODULE upkr
|
||||||
|
|
||||||
|
NUMBER_BITS EQU 16+15 ; context-bits per offset/length (16+15 for 16bit offsets/pointers)
|
||||||
|
; numbers (offsets/lengths) are encoded like: 1a1b1c1d1e0 = 0000'0000'001e'dbca
|
||||||
|
|
||||||
/*
|
/*
|
||||||
u8* upkr_data_ptr;
|
u8* upkr_data_ptr;
|
||||||
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
|
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
|
||||||
@@ -81,8 +93,8 @@ unpack:
|
|||||||
; BC = probs (context_index 0), state HL = 0, A' = 0x80 (no source bits left in upkr_current_byte)
|
; BC = probs (context_index 0), state HL = 0, A' = 0x80 (no source bits left in upkr_current_byte)
|
||||||
|
|
||||||
; ** main loop to decompress data
|
; ** main loop to decompress data
|
||||||
.decompress_data_reset_match:
|
; D = prev_was_match = uninitialised, literal is expected first => will reset D to "false"
|
||||||
ld d,0 ; prev_was_match = 0;
|
; values for false/true of prev_was_match are: false = high(probs), true = 1 + high(probs)
|
||||||
.decompress_data:
|
.decompress_data:
|
||||||
ld c,0
|
ld c,0
|
||||||
call decode_bit ; if(upkr_decode_bit(0))
|
call decode_bit ; if(upkr_decode_bit(0))
|
||||||
@@ -97,23 +109,23 @@ unpack:
|
|||||||
ld a,c
|
ld a,c
|
||||||
exx
|
exx
|
||||||
ld (de),a ; *write_ptr++ = byte;
|
ld (de),a ; *write_ptr++ = byte;
|
||||||
inc de
|
IFNDEF BACKWARDS_UNPACK : inc de : ELSE : dec de : ENDIF
|
||||||
exx
|
exx
|
||||||
jr .decompress_data_reset_match
|
ld d,b ; prev_was_match = false
|
||||||
|
jr .decompress_data
|
||||||
|
|
||||||
; * copy chunk of already decompressed data (match)
|
; * copy chunk of already decompressed data (match)
|
||||||
.copy_chunk:
|
.copy_chunk:
|
||||||
|
ld a,b
|
||||||
inc b ; context_index = 256
|
inc b ; context_index = 256
|
||||||
; if(prev_was_match || upkr_decode_bit(256)) {
|
; if(prev_was_match || upkr_decode_bit(256)) {
|
||||||
; offset = upkr_decode_length(257) - 1;
|
; offset = upkr_decode_length(257) - 1;
|
||||||
; if (0 == offset) break;
|
; if (0 == offset) break;
|
||||||
; }
|
; }
|
||||||
xor a
|
|
||||||
cp d ; CF = prev_was_match
|
cp d ; CF = prev_was_match
|
||||||
call nc,decode_bit ; if not prev_was_match, then upkr_decode_bit(256)
|
call nc,decode_bit ; if not prev_was_match, then upkr_decode_bit(256)
|
||||||
jr nc,.keep_offset ; if neither, keep old offset
|
jr nc,.keep_offset ; if neither, keep old offset
|
||||||
inc c
|
call decode_number ; context_index is already 257-1 as needed by decode_number
|
||||||
call decode_length
|
|
||||||
dec de ; offset = upkr_decode_length(257) - 1;
|
dec de ; offset = upkr_decode_length(257) - 1;
|
||||||
ld a,d
|
ld a,d
|
||||||
or e
|
or e
|
||||||
@@ -126,18 +138,27 @@ unpack:
|
|||||||
; ++write_ptr;
|
; ++write_ptr;
|
||||||
; }
|
; }
|
||||||
; prev_was_match = 1;
|
; prev_was_match = 1;
|
||||||
ld c,low(257+64) ; context_index = 257+64
|
ld c,low(257 + NUMBER_BITS - 1) ; context_index to second "number" set for lengths decoding
|
||||||
call decode_length ; length = upkr_decode_length(257 + 64);
|
call decode_number ; length = upkr_decode_length(257 + 64);
|
||||||
push de
|
push de
|
||||||
exx
|
exx
|
||||||
|
IFNDEF BACKWARDS_UNPACK
|
||||||
|
; forward unpack (write_ptr++, upkr_data_ptr++)
|
||||||
ld h,d ; DE = write_ptr
|
ld h,d ; DE = write_ptr
|
||||||
ld l,e
|
ld l,e
|
||||||
.offset+*: ld bc,0
|
.offset+*: ld bc,0
|
||||||
sbc hl,bc ; CF=0 from decode_length ; HL = write_ptr - offset
|
sbc hl,bc ; CF=0 from decode_number ; HL = write_ptr - offset
|
||||||
pop bc ; BC = length
|
pop bc ; BC = length
|
||||||
ldir
|
ldir
|
||||||
|
ELSE
|
||||||
|
; backward unpack (write_ptr--, upkr_data_ptr--)
|
||||||
|
.offset+*: ld hl,0
|
||||||
|
add hl,de ; HL = write_ptr + offset
|
||||||
|
pop bc ; BC = length
|
||||||
|
lddr
|
||||||
|
ENDIF
|
||||||
exx
|
exx
|
||||||
ld d,b ; prev_was_match = non-zero
|
ld d,b ; prev_was_match = true
|
||||||
djnz .decompress_data ; adjust context_index back to 0..255 range, go to main loop
|
djnz .decompress_data ; adjust context_index back to 0..255 range, go to main loop
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -169,6 +190,9 @@ int upkr_decode_bit(int context_index) {
|
|||||||
return bit;
|
return bit;
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
inc_c_decode_bit:
|
||||||
|
; ++low(context_index) before decode_bit (to get -1B by two calls in decode_number)
|
||||||
|
inc c
|
||||||
decode_bit:
|
decode_bit:
|
||||||
; HL = upkr_state
|
; HL = upkr_state
|
||||||
; IX = upkr_data_ptr
|
; IX = upkr_data_ptr
|
||||||
@@ -189,7 +213,7 @@ decode_bit:
|
|||||||
jr nz,.has_bit ; CF=data, ZF=0 -> some bits + stop bit still available
|
jr nz,.has_bit ; CF=data, ZF=0 -> some bits + stop bit still available
|
||||||
; CF=1 (by stop bit)
|
; CF=1 (by stop bit)
|
||||||
ld a,(ix)
|
ld a,(ix)
|
||||||
inc ix ; upkr_current_byte = *upkr_data_ptr++;
|
IFNDEF BACKWARDS_UNPACK : inc ix : ELSE : dec ix : ENDIF ; upkr_current_byte = *upkr_data_ptr++;
|
||||||
adc a,a ; CF=data, b0=1 as new stop bit
|
adc a,a ; CF=data, b0=1 as new stop bit
|
||||||
.has_bit:
|
.has_bit:
|
||||||
adc hl,hl ; upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
|
adc hl,hl ; upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
|
||||||
@@ -202,9 +226,8 @@ decode_bit:
|
|||||||
cp l ; CF = bit = prob-1 < (upkr_state & 255) <=> prob <= (upkr_state & 255)
|
cp l ; CF = bit = prob-1 < (upkr_state & 255) <=> prob <= (upkr_state & 255)
|
||||||
inc a
|
inc a
|
||||||
; ** adjust state
|
; ** adjust state
|
||||||
push af
|
push bc
|
||||||
push af
|
ld c,l ; C = (upkr_state & 255); (preserving the value)
|
||||||
push hl
|
|
||||||
push af
|
push af
|
||||||
jr nc,.bit_is_0
|
jr nc,.bit_is_0
|
||||||
neg ; A = -prob == (256-prob), CF=1 preserved
|
neg ; A = -prob == (256-prob), CF=1 preserved
|
||||||
@@ -212,38 +235,52 @@ decode_bit:
|
|||||||
ld d,0
|
ld d,0
|
||||||
ld e,a ; DE = state_scale ; prob || (256-prob)
|
ld e,a ; DE = state_scale ; prob || (256-prob)
|
||||||
ld l,d ; H:L = (upkr_state>>8) : 0
|
ld l,d ; H:L = (upkr_state>>8) : 0
|
||||||
ld a,8 ; counter
|
|
||||||
|
IFNDEF UPKR_UNPACK_SPEED
|
||||||
|
|
||||||
|
;; looped MUL for minimum unpack size
|
||||||
|
ld b,8 ; counter
|
||||||
.mulLoop:
|
.mulLoop:
|
||||||
add hl,hl
|
add hl,hl
|
||||||
jr nc,.mul0
|
jr nc,.mul0
|
||||||
add hl,de
|
add hl,de
|
||||||
.mul0:
|
.mul0:
|
||||||
dec a
|
djnz .mulLoop ; until HL = state_scale * (upkr_state>>8), also BC becomes (upkr_state & 255)
|
||||||
jr nz,.mulLoop ; until HL = state_scale * (upkr_state>>8)
|
|
||||||
pop af
|
ELSE
|
||||||
|
|
||||||
|
;;; unrolled MUL for better performance, +25 bytes unpack size
|
||||||
|
ld b,d
|
||||||
|
DUP 8
|
||||||
|
add hl,hl
|
||||||
|
jr nc,0_f
|
||||||
|
add hl,de
|
||||||
|
0:
|
||||||
|
EDUP
|
||||||
|
|
||||||
|
ENDIF
|
||||||
|
|
||||||
|
add hl,bc ; HL = state_scale * (upkr_state >> 8) + (upkr_state & 255)
|
||||||
|
pop af ; restore prob and CF=bit
|
||||||
jr nc,.bit_is_0_2
|
jr nc,.bit_is_0_2
|
||||||
dec d ; D = 0xFF (DE = -prob)
|
dec d ; DE = -prob (also D = bit ? $FF : $00)
|
||||||
add hl,de ; HL += -prob
|
add hl,de ; HL += -prob
|
||||||
.bit_is_0_2: ; HL = state_offset + state_scale * (upkr_state >> 8)
|
; ^ this always preserves CF=1, because (state>>8) >= 128, state_scale: 7..250, prob: 7..250,
|
||||||
pop de
|
; so 7*128 > 250 and thus edge case `ADD hl=(7*128+0),de=(-250)` => CF=1
|
||||||
ld d,0 ; DE = (upkr_state & 255)
|
.bit_is_0_2:
|
||||||
add hl,de ; HL = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255) ; new upkr_state
|
|
||||||
; *** adjust probs[context_index]
|
; *** adjust probs[context_index]
|
||||||
pop af ; restore prob and bit
|
rra ; + (bit<<4) ; part of -prob_offset, needs another -16
|
||||||
|
and $FC ; clear/keep correct bits to get desired (prob>>4) + extras, CF=0
|
||||||
|
rra
|
||||||
|
rra
|
||||||
|
rra ; A = (bit<<4) + (prob>>4), CF=(prob & 8)
|
||||||
|
adc a,-16 ; A = (bit<<4) - 16 + ((prob + 8)>>4) ; -prob_offset = (bit<<4) - 16
|
||||||
ld e,a
|
ld e,a
|
||||||
jr c,.bit_is_1
|
pop bc
|
||||||
ld d,-16 ; 0xF0
|
ld a,(bc) ; A = prob (cheaper + shorter to re-read again from memory)
|
||||||
.bit_is_1: ; D:E = -prob_offset:prob, A = prob
|
sub e ; A = 16 - (bit<<4) + prob - ((prob + 8)>>4) ; = prob_offset + prob - ((prob + 8)>>4)
|
||||||
and $F8
|
ld (bc),a ; probs[context_index] = prob_offset + prob - ((prob + 8) >> 4);
|
||||||
rra
|
add a,d ; restore CF = bit (D = bit ? $FF : $00 && A > 0)
|
||||||
rra
|
|
||||||
rra
|
|
||||||
rra
|
|
||||||
adc a,d ; A = -prob_offset + ((prob + 8) >> 4)
|
|
||||||
neg
|
|
||||||
add a,e ; A = prob_offset + prob - ((prob + 8) >> 4)
|
|
||||||
ld (bc),a ; update probs[context_index]
|
|
||||||
pop af ; restore resulting CF = bit
|
|
||||||
pop de
|
pop de
|
||||||
ret
|
ret
|
||||||
|
|
||||||
@@ -258,22 +295,19 @@ int upkr_decode_length(int context_index) {
|
|||||||
return length | (1 << bit_pos);
|
return length | (1 << bit_pos);
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
decode_length:
|
decode_number:
|
||||||
; HL = upkr_state
|
; HL = upkr_state
|
||||||
; IX = upkr_data_ptr
|
; IX = upkr_data_ptr
|
||||||
; BC = probs+context_index
|
; BC = probs+context_index-1
|
||||||
; A' = upkr_current_byte (!!! init to 0x80 at start, not 0x00)
|
; A' = upkr_current_byte (!!! init to 0x80 at start, not 0x00)
|
||||||
; return length in DE, CF=0
|
; return length in DE, CF=0
|
||||||
ld de,$7FFF ; length = 0 with positional-stop-bit
|
ld de,$FFFF ; length = 0 with positional-stop-bit
|
||||||
jr .loop_entry
|
or a ; CF=0 to skip getting data bit and use only `rr d : rr e` to fix init DE
|
||||||
.loop:
|
.loop:
|
||||||
inc c ; context_index + 1
|
call c,inc_c_decode_bit ; get data bit, context_index + 1 / if CF=0 just add stop bit into DE init
|
||||||
call decode_bit
|
|
||||||
rr d
|
rr d
|
||||||
rr e ; DE = length = (length >> 1) | (bit << 15);
|
rr e ; DE = length = (length >> 1) | (bit << 15);
|
||||||
inc c ; context_index += 2
|
call inc_c_decode_bit ; context_index += 2
|
||||||
.loop_entry:
|
|
||||||
call decode_bit
|
|
||||||
jr c,.loop
|
jr c,.loop
|
||||||
.fix_bit_pos:
|
.fix_bit_pos:
|
||||||
ccf ; NC will become this final `| (1 << bit_pos)` bit
|
ccf ; NC will become this final `| (1 << bit_pos)` bit
|
||||||
@@ -287,15 +321,61 @@ decode_length:
|
|||||||
; reserve space for probs array without emitting any machine code (using only EQU)
|
; reserve space for probs array without emitting any machine code (using only EQU)
|
||||||
|
|
||||||
IFDEF UPKR_PROBS_ORIGIN ; if specific address is defined by user, move probs array there
|
IFDEF UPKR_PROBS_ORIGIN ; if specific address is defined by user, move probs array there
|
||||||
ORG UPKR_PROBS_ORIGIN
|
probs: EQU ((UPKR_PROBS_ORIGIN) + 255) & -$100 ; probs array aligned to 256
|
||||||
|
ELSE
|
||||||
|
probs: EQU ($ + 255) & -$100 ; probs array aligned to 256
|
||||||
ENDIF
|
ENDIF
|
||||||
|
.real_c: EQU 1 + 255 + 1 + 2*NUMBER_BITS ; real size of probs array
|
||||||
probs: EQU ($+255) & -$100 ; probs array aligned to 256
|
|
||||||
.real_c: EQU 1 + 255 + 1 + 2*32 + 2*32 ; real size of probs array
|
|
||||||
.c: EQU (.real_c + 1) & -2 ; padding to even size (required by init code)
|
.c: EQU (.real_c + 1) & -2 ; padding to even size (required by init code)
|
||||||
.e: EQU probs + .c
|
.e: EQU probs + .c
|
||||||
|
|
||||||
DISPLAY "upkr.unpack probs array placed at: ",/A,probs,",\tsize: ",/A,probs.c
|
DISPLAY "upkr.unpack probs array placed at: ",/A,probs,",\tsize: ",/A,probs.c
|
||||||
|
|
||||||
|
/*
|
||||||
|
archived: negligibly faster but +6B longer decode_number variant using HL' and BC' to
|
||||||
|
do `number|=(1<<bit_pos);` type of logic in single loop.
|
||||||
|
*/
|
||||||
|
; decode_number:
|
||||||
|
; exx
|
||||||
|
; ld bc,1
|
||||||
|
; ld l,b
|
||||||
|
; ld h,b ; HL = 0
|
||||||
|
; .loop
|
||||||
|
; exx
|
||||||
|
; inc c
|
||||||
|
; call decode_bit
|
||||||
|
; jr nc,.done
|
||||||
|
; inc c
|
||||||
|
; call decode_bit
|
||||||
|
; exx
|
||||||
|
; jr nc,.b0
|
||||||
|
; add hl,bc
|
||||||
|
; .b0:
|
||||||
|
; sla c
|
||||||
|
; rl b
|
||||||
|
; jr .loop
|
||||||
|
; .done:
|
||||||
|
; exx
|
||||||
|
; add hl,bc
|
||||||
|
; push hl
|
||||||
|
; exx
|
||||||
|
; pop de
|
||||||
|
; ret
|
||||||
|
|
||||||
|
/*
|
||||||
|
archived: possible LUT variant of updating probs value, requires 512-aligned 512B table (not tested)
|
||||||
|
*/
|
||||||
|
; code is replacing decode_bit from "; *** adjust probs[context_index]", followed by `ld (bc),a : add a,d ...`
|
||||||
|
; ld c,a
|
||||||
|
; ld a,high(probs_update_table)/2 ; must be 512 aligned
|
||||||
|
; rla
|
||||||
|
; ld b,a
|
||||||
|
; ld a,(bc)
|
||||||
|
; pop bc
|
||||||
|
; -------------------------------------------
|
||||||
|
; probs_update_table: EQU probs-512
|
||||||
|
; -------------------------------------------
|
||||||
|
; table generator is not obvious and probably not short either, 20+ bytes almost for sure, maybe even 30-40
|
||||||
|
|
||||||
ENDMODULE
|
ENDMODULE
|
||||||
OPT pop
|
OPT pop
|
||||||
|
|||||||
Reference in New Issue
Block a user