15 Commits

Author SHA1 Message Date
c8924456aa -r reverses both input and output 2022-09-18 23:38:41 +02:00
7b0e22f459 Merge pull request #3 from ped7g/z80_ped7g
backward unpacker + example extended
2022-09-18 23:24:28 +02:00
Peter Helcmanovsky (Ped)
165f593a11 z80_unpacker: (codestyle) whitespace + temporary label rename 2022-09-18 23:04:37 +02:00
Peter Helcmanovsky (Ped)
d4bce4bf7c z80_unpacker: optimisation: -3B and ~-10T in decode_bit = 174B
unpack zx48.rom is now ~22.6s (from 23.0s)
(performance version is now 199 bytes, zx48.rom unpack 19.4s -> 19.0s)
2022-09-18 22:54:10 +02:00
Peter Helcmanovsky (Ped)
b13fa05413 z80_unpacker: add backward variant of unpacker + example extended 2022-09-18 00:23:14 +02:00
Peter Helcmanovsky (Ped)
3c773aca8d z80_unpacker: add performance variant of depacker 2022-09-16 03:38:03 +02:00
a5406deb30 Merge pull request #2 from ped7g/z80_ped7g
Z80 ped7g - few more optimisations for current variant of packer
2022-09-16 00:26:55 +02:00
Peter Helcmanovsky (Ped)
9211544cb9 z80_unpacker: add resulting snapshot file to example 2022-09-15 18:37:06 +02:00
Peter Helcmanovsky (Ped)
3fa9e0fa12 z80_unpacker: optimisations: 0B, -13T in decode_bit (stays 177B) 2022-09-15 18:22:33 +02:00
Peter Helcmanovsky (Ped)
aa3fad4d80 z80_unpacker: optimisations: -3B and ~-24T in decode_bit = 177B 2022-09-15 18:22:32 +02:00
Peter Helcmanovsky (Ped)
6624940ed9 z80_unpacker: optimisations: -2B and -27T in decode_bit = 180B 2022-09-15 18:22:32 +02:00
Peter Helcmanovsky (Ped)
c3a9773e5c z80_unpacker: optimisations: -1B in unpack implementation = 182B 2022-09-15 18:22:31 +02:00
Peter Helcmanovsky (Ped)
a75a35efb2 z80_unpacker: probs context-size for offset/length numbers as EQU 2022-09-15 18:22:27 +02:00
540a91d1ba forgot to add back -l 9 2022-09-15 00:18:30 +02:00
e7aaf1491a add old-prob-update to compare script, add reverse option 2022-09-14 23:51:38 +02:00
10 changed files with 159 additions and 65 deletions

View File

@@ -2,7 +2,9 @@
configs = [
[:master, '-b'],
[:z80, '-b']
[:z80, '-b'],
[:z80, ['-b', '-r']],
['old-prob-update', '-b']
]
files = Dir[ARGV[0] + '/*'].select {|f| !(f =~ /\.txt$/) }
@@ -34,8 +36,8 @@ for config in configs
config_results = []
results << config_results
for file in files
if system('cargo', 'run', '--release', 'pack', '-l', '9', config[1], file, '/tmp/out.upk') &&
system('cargo', 'run', '--release', 'unpack', config[1], '/tmp/out.upk', '/tmp/out.bin') &&
if system(*['cargo', 'run', '--release', 'pack', '-l', '9', config[1], file, '/tmp/out.upk'].flatten) &&
system(*['cargo', 'run', '--release', 'unpack', config[1], '/tmp/out.upk', '/tmp/out.bin'].flatten) &&
File.read(file) == File.read('/tmp/out.bin')
size = File.size('/tmp/out.upk')
config_results << size

View File

@@ -10,16 +10,20 @@ fn main() -> Result<()> {
Some("pack") => {
let level = args.opt_value_from_str(["-l", "--level"])?.unwrap_or(2u8);
let use_bitstream = args.contains(["-b", "--bitstream"]);
let reverse = args.contains(["-r", "--reverse"]);
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?;
if reverse {
data.reverse();
}
let mut pb = pbr::ProgressBar::new(data.len() as u64);
pb.set_units(pbr::Units::Bytes);
let packed_data = upkr::pack(
let mut packed_data = upkr::pack(
&data,
level,
use_bitstream,
@@ -29,6 +33,10 @@ fn main() -> Result<()> {
);
pb.finish();
if reverse {
packed_data.reverse();
}
println!(
"Compressed {} bytes to {} bytes ({}%)",
data.len(),
@@ -39,14 +47,21 @@ fn main() -> Result<()> {
}
Some("unpack") => {
let use_bitstream = args.contains(["-b", "--bitstream"]);
let reverse = args.contains(["-r", "--reverse"]);
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?;
let packed_data = upkr::unpack(&data, use_bitstream);
File::create(outfile)?.write_all(&packed_data)?;
if reverse {
data.reverse();
}
let mut unpacked_data = upkr::unpack(&data, use_bitstream);
if reverse {
unpacked_data.reverse();
}
File::create(outfile)?.write_all(&unpacked_data)?;
}
Some(other) => {
bail!("Unknown subcommand '{}'", other);

View File

@@ -1,4 +1,3 @@
*.bin
*.tap
*.sna
*.lst

View File

@@ -3,7 +3,8 @@
DEVICE ZXSPECTRUM48,$8FFF
ORG $9000
compressed_scr_files: ; border color byte + upkr-packed .scr file
;; forward example data
compressed_scr_files.fwd: ; border color byte + upkr-packed .scr file
DB 1
INCBIN "screens/Grongy - ZX Spectrum (2022).scr.upk"
DB 7
@@ -13,37 +14,87 @@ compressed_scr_files: ; border color byte + upkr-packed .scr file
DB 6
INCBIN "screens/diver - Back to Bjork (2015).scr.upk"
.e:
;; backward example data (unpacker goes from the end of the data!)
compressed_scr_files.rwd.e: EQU $-1 ; the final IX will point one byte ahead of "$" here
INCBIN "screens.reversed/diver - Back to Bjork (2015).scr.upk"
DB 6
INCBIN "screens.reversed/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk"
DB 0
INCBIN "screens.reversed/Schafft - Poison (2017).scr.upk"
DB 7
INCBIN "screens.reversed/Grongy - ZX Spectrum (2022).scr.upk"
compressed_scr_files.rwd: ; border color byte + upkr-packed .scr file (backward)
DB 1
start:
di
; OPT --zxnext
; nextreg 7,3 ; ZX Next: switch to 28Mhz
ld ix,compressed_scr_files
.slideshow_loop
;;; FORWARD packed/unpacked data demo
ld ix,compressed_scr_files.fwd
.slideshow_loop.fwd:
; set BORDER for next image
ldi a,(ix) ; fake: ld a,(ix) : inc ix
ld a,(ix)
inc ix
out (254),a
; call unpack of next image directly into VRAM
ld de,$4000 ; target VRAM
exx
; IX = packed data, DE' = destination ($4000)
; returned IX will point right after the packed data
call upkr.unpack
call fwd.upkr.unpack
; do some busy loop with CPU to delay between images
call delay
; check if all images were displayed, loop around from first one then
ld a,ixl
cp low compressed_scr_files.fwd.e
jr nz,.slideshow_loop.fwd
;;; BACKWARD packed/unpacked data demo
ld ix,compressed_scr_files.rwd
.slideshow_loop.rwd:
; set BORDER for next image
ld a,(ix)
dec ix
out (254),a
; call unpack of next image directly into VRAM
ld de,$5AFF ; target VRAM
exx
; IX = packed data, DE' = destination
; returned IX will point right ahead of the packed data
call rwd.upkr.unpack
; do some busy loop with CPU to delay between images
call delay
; check if all images were displayed, loop around from first one then
ld a,ixl
cp low compressed_scr_files.rwd.e
jr nz,.slideshow_loop.rwd
jr start
delay:
ld bc,$AA00
.delay:
.8 ex (sp),ix
dec c
jr nz,.delay
djnz .delay
; check if all images were displayed, loop around from first one then
ld a,ixl
cp low compressed_scr_files.e
jr z,start
jr .slideshow_loop
ret
; include the depacker library, optionally putting probs array buffer near end of RAM
DEFINE UPKR_PROBS_ORIGIN $FA00 ; if not defined, array will be put after unpack code
MODULE fwd
INCLUDE "../unpack.asm"
ENDMODULE
MODULE rwd
DEFINE BACKWARDS_UNPACK ; defined to build backwards unpack
; initial IX points at last byte of compressed data
; initial DE' points at last byte of unpacked data
INCLUDE "../unpack.asm"
ENDMODULE
SAVESNA "example.sna",start

Binary file not shown.

View File

@@ -12,12 +12,21 @@
;; upkr.unpack
;; IN: IX = packed data, DE' (shadow DE) = destination
;; OUT: IX = after packed data
;; modifies: all registers except IY, requires 14 bytes of stack space
;; modifies: all registers except IY, requires 10 bytes of stack space
;;
; DEFINE BACKWARDS_UNPACK ; uncomment to build backwards depacker
; initial IX points at last byte of compressed data
; initial DE' points at last byte of unpacked data
; DEFINE UPKR_UNPACK_SPEED ; uncomment to get larger but faster unpack routine
OPT push reset --syntax=abf
MODULE upkr
NUMBER_BITS EQU 16+15 ; context-bits per offset/length (16+15 for 16bit offsets/pointers)
; numbers (offsets/lengths) are encoded like: 1a1b1c1d1e0 = 0000'0000'001e'dbca
/*
u8* upkr_data_ptr;
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
@@ -81,8 +90,8 @@ unpack:
; BC = probs (context_index 0), state HL = 0, A' = 0x80 (no source bits left in upkr_current_byte)
; ** main loop to decompress data
.decompress_data_reset_match:
ld d,0 ; prev_was_match = 0;
; D = prev_was_match = uninitialised, literal is expected first => will reset D to "false"
; values for false/true of prev_was_match are: false = high(probs), true = 1 + high(probs)
.decompress_data:
ld c,0
call decode_bit ; if(upkr_decode_bit(0))
@@ -97,23 +106,24 @@ unpack:
ld a,c
exx
ld (de),a ; *write_ptr++ = byte;
inc de
IFNDEF BACKWARDS_UNPACK : inc de : ELSE : dec de : ENDIF
exx
jr .decompress_data_reset_match
ld d,b ; prev_was_match = false
jr .decompress_data
; * copy chunk of already decompressed data (match)
.copy_chunk:
ld a,b
inc b ; context_index = 256
; if(prev_was_match || upkr_decode_bit(256)) {
; offset = upkr_decode_length(257) - 1;
; if (0 == offset) break;
; }
xor a
cp d ; CF = prev_was_match
call nc,decode_bit ; if not prev_was_match, then upkr_decode_bit(256)
jr nc,.keep_offset ; if neither, keep old offset
inc c
call decode_length
inc c ; context_index to first "number" set for offsets decoding (257)
call decode_number
dec de ; offset = upkr_decode_length(257) - 1;
ld a,d
or e
@@ -126,18 +136,22 @@ unpack:
; ++write_ptr;
; }
; prev_was_match = 1;
ld c,low(257+64) ; context_index = 257+64
call decode_length ; length = upkr_decode_length(257 + 64);
ld c,low(257 + NUMBER_BITS) ; context_index to second "number" set for lengths decoding
call decode_number ; length = upkr_decode_length(257 + 64);
push de
exx
ld h,d ; DE = write_ptr
ld l,e
.offset+*: ld bc,0
sbc hl,bc ; CF=0 from decode_length ; HL = write_ptr - offset
IFNDEF BACKWARDS_UNPACK
sbc hl,bc ; CF=0 from decode_number ; HL = write_ptr - offset
ELSE
add hl,bc ; HL = write_ptr + offset
ENDIF
pop bc ; BC = length
ldir
IFNDEF BACKWARDS_UNPACK : ldir : ELSE : lddr : ENDIF
exx
ld d,b ; prev_was_match = non-zero
ld d,b ; prev_was_match = true
djnz .decompress_data ; adjust context_index back to 0..255 range, go to main loop
/*
@@ -189,7 +203,7 @@ decode_bit:
jr nz,.has_bit ; CF=data, ZF=0 -> some bits + stop bit still available
; CF=1 (by stop bit)
ld a,(ix)
inc ix ; upkr_current_byte = *upkr_data_ptr++;
IFNDEF BACKWARDS_UNPACK : inc ix : ELSE : dec ix : ENDIF ; upkr_current_byte = *upkr_data_ptr++;
adc a,a ; CF=data, b0=1 as new stop bit
.has_bit:
adc hl,hl ; upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
@@ -202,9 +216,8 @@ decode_bit:
cp l ; CF = bit = prob-1 < (upkr_state & 255) <=> prob <= (upkr_state & 255)
inc a
; ** adjust state
push af
push af
push hl
push bc
ld c,l ; C = (upkr_state & 255); (preserving the value)
push af
jr nc,.bit_is_0
neg ; A = -prob == (256-prob), CF=1 preserved
@@ -212,38 +225,52 @@ decode_bit:
ld d,0
ld e,a ; DE = state_scale ; prob || (256-prob)
ld l,d ; H:L = (upkr_state>>8) : 0
ld a,8 ; counter
IFNDEF UPKR_UNPACK_SPEED
;; looped MUL for minimum unpack size
ld b,8 ; counter
.mulLoop:
add hl,hl
jr nc,.mul0
add hl,de
.mul0:
dec a
jr nz,.mulLoop ; until HL = state_scale * (upkr_state>>8)
pop af
djnz .mulLoop ; until HL = state_scale * (upkr_state>>8), also BC becomes (upkr_state & 255)
ELSE
;;; unrolled MUL for better performance, +25 bytes unpack size
ld b,d
DUP 8
add hl,hl
jr nc,0_f
add hl,de
0:
EDUP
ENDIF
add hl,bc ; HL = state_scale * (upkr_state >> 8) + (upkr_state & 255)
pop af ; restore prob and CF=bit
jr nc,.bit_is_0_2
dec d ; D = 0xFF (DE = -prob)
dec d ; DE = -prob (also D = bit ? $FF : $00)
add hl,de ; HL += -prob
.bit_is_0_2: ; HL = state_offset + state_scale * (upkr_state >> 8)
pop de
ld d,0 ; DE = (upkr_state & 255)
add hl,de ; HL = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255) ; new upkr_state
; ^ this always preserves CF=1, because (state>>8) >= 128, state_scale: 7..250, prob: 7..250,
; so 7*128 > 250 and thus edge case `ADD hl=(7*128+0),de=(-250)` => CF=1
.bit_is_0_2:
; *** adjust probs[context_index]
pop af ; restore prob and bit
ld e,a
jr c,.bit_is_1
ld d,-16 ; 0xF0
.bit_is_1: ; D:E = -prob_offset:prob, A = prob
and $F8
ld e,a ; preserve prob
rra ; + (bit<<4) ; part of -prob_offset, needs another -16
and $FC ; clear/keep correct bits to get desired (prob>>4) + extras, CF=0
rra
rra
rra
rra
adc a,d ; A = -prob_offset + ((prob + 8) >> 4)
neg
add a,e ; A = prob_offset + prob - ((prob + 8) >> 4)
ld (bc),a ; update probs[context_index]
pop af ; restore resulting CF = bit
rra ; A = (bit<<4) + (prob>>4), CF=(prob & 8)
adc a,-16 ; A = (bit<<4) - 16 + ((prob + 8)>>4) ; -prob_offset = (bit<<4) - 16
sub e ; A = (bit<<4) - 16 + ((prob + 8)>>4) - prob ; = ((prob + 8)>>4) - prob_offset - prob
neg ; A = prob_offset + prob - ((prob + 8)>>4)
pop bc
ld (bc),a ; probs[context_index] = prob_offset + prob - ((prob + 8) >> 4);
add a,d ; restore CF = bit (D = bit ? $FF : $00 && A > 0)
pop de
ret
@@ -258,7 +285,7 @@ int upkr_decode_length(int context_index) {
return length | (1 << bit_pos);
}
*/
decode_length:
decode_number:
; HL = upkr_state
; IX = upkr_data_ptr
; BC = probs+context_index
@@ -287,11 +314,11 @@ decode_length:
; reserve space for probs array without emitting any machine code (using only EQU)
IFDEF UPKR_PROBS_ORIGIN ; if specific address is defined by user, move probs array there
ORG UPKR_PROBS_ORIGIN
probs: EQU ((UPKR_PROBS_ORIGIN) + 255) & -$100 ; probs array aligned to 256
ELSE
probs: EQU ($ + 255) & -$100 ; probs array aligned to 256
ENDIF
probs: EQU ($+255) & -$100 ; probs array aligned to 256
.real_c: EQU 1 + 255 + 1 + 2*32 + 2*32 ; real size of probs array
.real_c: EQU 1 + 255 + 1 + 2*NUMBER_BITS ; real size of probs array
.c: EQU (.real_c + 1) & -2 ; padding to even size (required by init code)
.e: EQU probs + .c