Merge pull request #2 from ped7g/z80_ped7g

Z80 ped7g - few more optimisations for current variant of packer
This commit is contained in:
2022-09-16 00:26:55 +02:00
committed by GitHub
3 changed files with 31 additions and 32 deletions

View File

@@ -1,4 +1,3 @@
*.bin *.bin
*.tap *.tap
*.sna
*.lst *.lst

Binary file not shown.

View File

@@ -12,12 +12,15 @@
;; upkr.unpack ;; upkr.unpack
;; IN: IX = packed data, DE' (shadow DE) = destination ;; IN: IX = packed data, DE' (shadow DE) = destination
;; OUT: IX = after packed data ;; OUT: IX = after packed data
;; modifies: all registers except IY, requires 14 bytes of stack space ;; modifies: all registers except IY, requires 10 bytes of stack space
;; ;;
OPT push reset --syntax=abf OPT push reset --syntax=abf
MODULE upkr MODULE upkr
NUMBER_BITS EQU 16+15 ; context-bits per offset/length (16+15 for 16bit offsets/pointers)
; numbers (offsets/lengths) are encoded like: 1a1b1c1d1e0 = 0000'0000'001e'dbca
/* /*
u8* upkr_data_ptr; u8* upkr_data_ptr;
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32]; u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
@@ -81,8 +84,8 @@ unpack:
; BC = probs (context_index 0), state HL = 0, A' = 0x80 (no source bits left in upkr_current_byte) ; BC = probs (context_index 0), state HL = 0, A' = 0x80 (no source bits left in upkr_current_byte)
; ** main loop to decompress data ; ** main loop to decompress data
.decompress_data_reset_match: ; D = prev_was_match = uninitialised, literal is expected first => will reset D to "false"
ld d,0 ; prev_was_match = 0; ; values for false/true of prev_was_match are: false = high(probs), true = 1 + high(probs)
.decompress_data: .decompress_data:
ld c,0 ld c,0
call decode_bit ; if(upkr_decode_bit(0)) call decode_bit ; if(upkr_decode_bit(0))
@@ -99,21 +102,22 @@ unpack:
ld (de),a ; *write_ptr++ = byte; ld (de),a ; *write_ptr++ = byte;
inc de inc de
exx exx
jr .decompress_data_reset_match ld d,b ; prev_was_match = false
jr .decompress_data
; * copy chunk of already decompressed data (match) ; * copy chunk of already decompressed data (match)
.copy_chunk: .copy_chunk:
ld a,b
inc b ; context_index = 256 inc b ; context_index = 256
; if(prev_was_match || upkr_decode_bit(256)) { ; if(prev_was_match || upkr_decode_bit(256)) {
; offset = upkr_decode_length(257) - 1; ; offset = upkr_decode_length(257) - 1;
; if (0 == offset) break; ; if (0 == offset) break;
; } ; }
xor a
cp d ; CF = prev_was_match cp d ; CF = prev_was_match
call nc,decode_bit ; if not prev_was_match, then upkr_decode_bit(256) call nc,decode_bit ; if not prev_was_match, then upkr_decode_bit(256)
jr nc,.keep_offset ; if neither, keep old offset jr nc,.keep_offset ; if neither, keep old offset
inc c inc c ; context_index to first "number" set for offsets decoding (257)
call decode_length call decode_number
dec de ; offset = upkr_decode_length(257) - 1; dec de ; offset = upkr_decode_length(257) - 1;
ld a,d ld a,d
or e or e
@@ -126,18 +130,18 @@ unpack:
; ++write_ptr; ; ++write_ptr;
; } ; }
; prev_was_match = 1; ; prev_was_match = 1;
ld c,low(257+64) ; context_index = 257+64 ld c,low(257 + NUMBER_BITS) ; context_index to second "number" set for lengths decoding
call decode_length ; length = upkr_decode_length(257 + 64); call decode_number ; length = upkr_decode_length(257 + 64);
push de push de
exx exx
ld h,d ; DE = write_ptr ld h,d ; DE = write_ptr
ld l,e ld l,e
.offset+*: ld bc,0 .offset+*: ld bc,0
sbc hl,bc ; CF=0 from decode_length ; HL = write_ptr - offset sbc hl,bc ; CF=0 from decode_number ; HL = write_ptr - offset
pop bc ; BC = length pop bc ; BC = length
ldir ldir
exx exx
ld d,b ; prev_was_match = non-zero ld d,b ; prev_was_match = true
djnz .decompress_data ; adjust context_index back to 0..255 range, go to main loop djnz .decompress_data ; adjust context_index back to 0..255 range, go to main loop
/* /*
@@ -202,9 +206,8 @@ decode_bit:
cp l ; CF = bit = prob-1 < (upkr_state & 255) <=> prob <= (upkr_state & 255) cp l ; CF = bit = prob-1 < (upkr_state & 255) <=> prob <= (upkr_state & 255)
inc a inc a
; ** adjust state ; ** adjust state
push af push bc
push af ld c,l ; C = (upkr_state & 255); (preserving the value)
push hl
push af push af
jr nc,.bit_is_0 jr nc,.bit_is_0
neg ; A = -prob == (256-prob), CF=1 preserved neg ; A = -prob == (256-prob), CF=1 preserved
@@ -212,28 +215,23 @@ decode_bit:
ld d,0 ld d,0
ld e,a ; DE = state_scale ; prob || (256-prob) ld e,a ; DE = state_scale ; prob || (256-prob)
ld l,d ; H:L = (upkr_state>>8) : 0 ld l,d ; H:L = (upkr_state>>8) : 0
ld a,8 ; counter ld b,8 ; counter
.mulLoop: .mulLoop:
add hl,hl add hl,hl
jr nc,.mul0 jr nc,.mul0
add hl,de add hl,de
.mul0: .mul0:
dec a djnz .mulLoop ; until HL = state_scale * (upkr_state>>8), also BC becomes (upkr_state & 255)
jr nz,.mulLoop ; until HL = state_scale * (upkr_state>>8) add hl,bc ; HL = state_scale * (upkr_state >> 8) + (upkr_state & 255)
pop af pop af
ld d,-16 ; D = -prob_offset (-16 0xF0 when bit = 0)
jr nc,.bit_is_0_2 jr nc,.bit_is_0_2
dec d ; D = 0xFF (DE = -prob) ld d,b ; D = -prob_offset (0 when bit = 1) (also does fix following ADD)
add hl,de ; HL += -prob dec h
.bit_is_0_2: ; HL = state_offset + state_scale * (upkr_state >> 8) add hl,de ; HL += -prob (HL += (256 - prob) - 256)
pop de .bit_is_0_2: ; HL = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255) ; new upkr_state
ld d,0 ; DE = (upkr_state & 255)
add hl,de ; HL = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255) ; new upkr_state
; *** adjust probs[context_index] ; *** adjust probs[context_index]
pop af ; restore prob and bit ld e,a ; D:E = -prob_offset:prob, A = prob
ld e,a
jr c,.bit_is_1
ld d,-16 ; 0xF0
.bit_is_1: ; D:E = -prob_offset:prob, A = prob
and $F8 and $F8
rra rra
rra rra
@@ -242,8 +240,10 @@ decode_bit:
adc a,d ; A = -prob_offset + ((prob + 8) >> 4) adc a,d ; A = -prob_offset + ((prob + 8) >> 4)
neg neg
add a,e ; A = prob_offset + prob - ((prob + 8) >> 4) add a,e ; A = prob_offset + prob - ((prob + 8) >> 4)
pop bc
ld (bc),a ; update probs[context_index] ld (bc),a ; update probs[context_index]
pop af ; restore resulting CF = bit add a,d ; bit=0: A = 23..249, D = 240 -> CF=1 || bit=1: D=0 -> CF=0
ccf ; resulting CF = bit restored
pop de pop de
ret ret
@@ -258,7 +258,7 @@ int upkr_decode_length(int context_index) {
return length | (1 << bit_pos); return length | (1 << bit_pos);
} }
*/ */
decode_length: decode_number:
; HL = upkr_state ; HL = upkr_state
; IX = upkr_data_ptr ; IX = upkr_data_ptr
; BC = probs+context_index ; BC = probs+context_index
@@ -291,7 +291,7 @@ decode_length:
ENDIF ENDIF
probs: EQU ($+255) & -$100 ; probs array aligned to 256 probs: EQU ($+255) & -$100 ; probs array aligned to 256
.real_c: EQU 1 + 255 + 1 + 2*32 + 2*32 ; real size of probs array .real_c: EQU 1 + 255 + 1 + 2*NUMBER_BITS ; real size of probs array
.c: EQU (.real_c + 1) & -2 ; padding to even size (required by init code) .c: EQU (.real_c + 1) & -2 ; padding to even size (required by init code)
.e: EQU probs + .c .e: EQU probs + .c