From d4bce4bf7cb1095a797ca5853c0c01fcba4f603f Mon Sep 17 00:00:00 2001 From: "Peter Helcmanovsky (Ped)" Date: Sun, 18 Sep 2022 22:54:10 +0200 Subject: [PATCH] z80_unpacker: optimisation: -3B and ~-10T in decode_bit = 174B unpack zx48.rom is now ~22.6s (from 23.0s) (performance version is now 199 bytes, zx48.rom unpack 19.4s -> 19.0s) --- z80_unpacker/example/example.sna | Bin 49179 -> 49179 bytes z80_unpacker/unpack.asm | 31 +++++++++++++++---------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/z80_unpacker/example/example.sna b/z80_unpacker/example/example.sna index 78df3d8a2cf9da82d8237271b0d006240e53706b..515f5d68690562b02a92e501cbcea636569d53df 100644 GIT binary patch delta 175 zcmbQ;z&yKwd4u>u#tV}r7KZU{eJEYE`NZ4c^oM*3XSYtSS!ge8z$7XeFaPY1yuAFm z50l@z9As*md~uD(w zP|{@GMM_Mnk0vi%C}n`fyo)CV|JO@!f=!b;Ee)bAKG~lJGA(|d+^G0Ve51i5mdPdi GmjeKk{!YRG delta 167 zcmbQ;z&yKwd4u>u#>7KZWdcqm=9`NZ4c^oM*3XLn4lS!gdN_Q8NTKvgpS*$;Uj z=z8nYe2}The)74620BWQEChJYDm^+aXz;W0rjYc_0Ew3sEW55KJu=|oIh*kaVwS`s upjjD{wH7HcX*`;|aG{h2b|WvI6#QQ=!3j1{>a;YF-rT$Bq0Hox{mTKZu}H!I diff --git a/z80_unpacker/unpack.asm b/z80_unpacker/unpack.asm index b7e5120..b1bcbce 100644 --- a/z80_unpacker/unpack.asm +++ b/z80_unpacker/unpack.asm @@ -250,27 +250,26 @@ decode_bit: ENDIF add hl,bc ; HL = state_scale * (upkr_state >> 8) + (upkr_state & 255) - pop af - ld d,-16 ; D = -prob_offset (-16 0xF0 when bit = 0) + pop af ; restore prob and CF=bit jr nc,.bit_is_0_2 - ld d,b ; D = -prob_offset (0 when bit = 1) (also does fix following ADD) - dec h - add hl,de ; HL += -prob (HL += (256 - prob) - 256) -.bit_is_0_2: ; HL = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255) ; new upkr_state + dec d ; DE = -prob (also D = bit ? $FF : $00) + add hl,de ; HL += -prob + ; ^ this always preserves CF=1, because (state>>8) >= 128, state_scale: 7..250, prob: 7..250, + ; so 7*128 > 250 and thus edge case `ADD hl=(7*128+0),de=(-250)` => CF=1 +.bit_is_0_2: ; *** adjust probs[context_index] - ld e,a ; D:E = -prob_offset:prob, A = prob - and $F8 + ld e,a ; preserve prob + rra ; + (bit<<4) ; part of -prob_offset, needs another -16 + and $FC ; clear/keep correct bits to get desired (prob>>4) + extras, CF=0 rra rra - rra - rra - adc a,d ; A = -prob_offset + ((prob + 8) >> 4) - neg - add a,e ; A = prob_offset + prob - ((prob + 8) >> 4) + rra ; A = (bit<<4) + (prob>>4), CF=(prob & 8) + adc a,-16 ; A = (bit<<4) - 16 + ((prob + 8)>>4) ; -prob_offset = (bit<<4) - 16 + sub e ; A = (bit<<4) - 16 + ((prob + 8)>>4) - prob ; = ((prob + 8)>>4) - prob_offset - prob + neg ; A = prob_offset + prob - ((prob + 8)>>4) pop bc - ld (bc),a ; update probs[context_index] - add a,d ; bit=0: A = 23..249, D = 240 -> CF=1 || bit=1: D=0 -> CF=0 - ccf ; resulting CF = bit restored + ld (bc),a ; probs[context_index] = prob_offset + prob - ((prob + 8) >> 4); + add a,d ; restore CF = bit (D = bit ? $FF : $00 && A > 0) pop de ret