diff --git a/z80_unpacker/example/example.asm b/z80_unpacker/example/example.asm index 80fd790..e3e6570 100644 --- a/z80_unpacker/example/example.asm +++ b/z80_unpacker/example/example.asm @@ -3,7 +3,8 @@ DEVICE ZXSPECTRUM48,$8FFF ORG $9000 -compressed_scr_files: ; border color byte + upkr-packed .scr file + ;; forward example data +compressed_scr_files.fwd: ; border color byte + upkr-packed .scr file DB 1 INCBIN "screens/Grongy - ZX Spectrum (2022).scr.upk" DB 7 @@ -13,37 +14,87 @@ compressed_scr_files: ; border color byte + upkr-packed .scr file DB 6 INCBIN "screens/diver - Back to Bjork (2015).scr.upk" .e: + ;; backward example data (unpacker goes from the end of the data!) +compressed_scr_files.rwd.e: EQU $-1 ; the final IX will point one byte ahead of "$" here + INCBIN "screens.reversed/diver - Back to Bjork (2015).scr.upk" + DB 6 + INCBIN "screens.reversed/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk" + DB 0 + INCBIN "screens.reversed/Schafft - Poison (2017).scr.upk" + DB 7 + INCBIN "screens.reversed/Grongy - ZX Spectrum (2022).scr.upk" +compressed_scr_files.rwd: ; border color byte + upkr-packed .scr file (backward) + DB 1 start: di ; OPT --zxnext -; nextreg 7,3 ; ZX Next: switch to 28Mhz - ld ix,compressed_scr_files -.slideshow_loop +; nextreg 7,3 ; ZX Next: switch to 28Mhz + + ;;; FORWARD packed/unpacked data demo + ld ix,compressed_scr_files.fwd +.slideshow_loop.fwd: ; set BORDER for next image - ldi a,(ix) ; fake: ld a,(ix) : inc ix + ld a,(ix) + inc ix out (254),a ; call unpack of next image directly into VRAM - ld de,$4000 ; target VRAM + ld de,$4000 ; target VRAM exx ; IX = packed data, DE' = destination ($4000) ; returned IX will point right after the packed data - call upkr.unpack + call fwd.upkr.unpack ; do some busy loop with CPU to delay between images + call delay + ; check if all images were displayed, loop around from first one then + ld a,ixl + cp low compressed_scr_files.fwd.e + jr nz,.slideshow_loop.fwd + + ;;; BACKWARD packed/unpacked data demo + ld ix,compressed_scr_files.rwd +.slideshow_loop.rwd: + ; set BORDER for next image + ld a,(ix) + dec ix + out (254),a + ; call unpack of next image directly into VRAM + ld de,$5AFF ; target VRAM + exx + ; IX = packed data, DE' = destination + ; returned IX will point right ahead of the packed data + call rwd.upkr.unpack + ; do some busy loop with CPU to delay between images + call delay + ; check if all images were displayed, loop around from first one then + ld a,ixl + cp low compressed_scr_files.rwd.e + jr nz,.slideshow_loop.rwd + + jr start + +delay: ld bc,$AA00 .delay: .8 ex (sp),ix dec c jr nz,.delay djnz .delay - ; check if all images were displayed, loop around from first one then - ld a,ixl - cp low compressed_scr_files.e - jr z,start - jr .slideshow_loop + ret ; include the depacker library, optionally putting probs array buffer near end of RAM DEFINE UPKR_PROBS_ORIGIN $FA00 ; if not defined, array will be put after unpack code - INCLUDE "../unpack.asm" + + MODULE fwd + INCLUDE "../unpack.asm" + ENDMODULE + + MODULE rwd + DEFINE BACKWARDS_UNPACK ; defined to build backwards unpack + ; initial IX points at last byte of compressed data + ; initial DE' points at last byte of unpacked data + + INCLUDE "../unpack.asm" + ENDMODULE SAVESNA "example.sna",start diff --git a/z80_unpacker/example/example.sna b/z80_unpacker/example/example.sna index a1f9f56..515f5d6 100644 Binary files a/z80_unpacker/example/example.sna and b/z80_unpacker/example/example.sna differ diff --git a/z80_unpacker/example/screens.reversed/Grongy - ZX Spectrum (2022).scr.upk b/z80_unpacker/example/screens.reversed/Grongy - ZX Spectrum (2022).scr.upk new file mode 100644 index 0000000..0e8777c Binary files /dev/null and b/z80_unpacker/example/screens.reversed/Grongy - ZX Spectrum (2022).scr.upk differ diff --git a/z80_unpacker/example/screens.reversed/Schafft - Poison (2017).scr.upk b/z80_unpacker/example/screens.reversed/Schafft - Poison (2017).scr.upk new file mode 100644 index 0000000..32f5401 Binary files /dev/null and b/z80_unpacker/example/screens.reversed/Schafft - Poison (2017).scr.upk differ diff --git a/z80_unpacker/example/screens.reversed/diver - Back to Bjork (2015).scr.upk b/z80_unpacker/example/screens.reversed/diver - Back to Bjork (2015).scr.upk new file mode 100644 index 0000000..423885a Binary files /dev/null and b/z80_unpacker/example/screens.reversed/diver - Back to Bjork (2015).scr.upk differ diff --git a/z80_unpacker/example/screens.reversed/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk b/z80_unpacker/example/screens.reversed/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk new file mode 100644 index 0000000..5e2e2ae Binary files /dev/null and b/z80_unpacker/example/screens.reversed/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk differ diff --git a/z80_unpacker/unpack.asm b/z80_unpacker/unpack.asm index 6dc9097..26e47b7 100644 --- a/z80_unpacker/unpack.asm +++ b/z80_unpacker/unpack.asm @@ -15,6 +15,12 @@ ;; modifies: all registers except IY, requires 10 bytes of stack space ;; +; DEFINE BACKWARDS_UNPACK ; uncomment to build backwards depacker + ; initial IX points at last byte of compressed data + ; initial DE' points at last byte of unpacked data + +; DEFINE UPKR_UNPACK_SPEED ; uncomment to get larger but faster unpack routine + OPT push reset --syntax=abf MODULE upkr @@ -100,7 +106,7 @@ unpack: ld a,c exx ld (de),a ; *write_ptr++ = byte; - inc de + IFNDEF BACKWARDS_UNPACK : inc de : ELSE : dec de : ENDIF exx ld d,b ; prev_was_match = false jr .decompress_data @@ -137,9 +143,13 @@ unpack: ld h,d ; DE = write_ptr ld l,e .offset+*: ld bc,0 + IFNDEF BACKWARDS_UNPACK sbc hl,bc ; CF=0 from decode_number ; HL = write_ptr - offset + ELSE + add hl,bc ; HL = write_ptr + offset + ENDIF pop bc ; BC = length - ldir + IFNDEF BACKWARDS_UNPACK : ldir : ELSE : lddr : ENDIF exx ld d,b ; prev_was_match = true djnz .decompress_data ; adjust context_index back to 0..255 range, go to main loop @@ -193,7 +203,7 @@ decode_bit: jr nz,.has_bit ; CF=data, ZF=0 -> some bits + stop bit still available ; CF=1 (by stop bit) ld a,(ix) - inc ix ; upkr_current_byte = *upkr_data_ptr++; + IFNDEF BACKWARDS_UNPACK : inc ix : ELSE : dec ix : ENDIF ; upkr_current_byte = *upkr_data_ptr++; adc a,a ; CF=data, b0=1 as new stop bit .has_bit: adc hl,hl ; upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7); @@ -215,6 +225,10 @@ decode_bit: ld d,0 ld e,a ; DE = state_scale ; prob || (256-prob) ld l,d ; H:L = (upkr_state>>8) : 0 + + IFNDEF UPKR_UNPACK_SPEED + + ;; looped MUL for minimum unpack size ld b,8 ; counter .mulLoop: add hl,hl @@ -222,28 +236,41 @@ decode_bit: add hl,de .mul0: djnz .mulLoop ; until HL = state_scale * (upkr_state>>8), also BC becomes (upkr_state & 255) + + ELSE + + ;;; unrolled MUL for better performance, +25 bytes unpack size + ld b,d + DUP 8 + add hl,hl + jr nc,0_f + add hl,de +0: + EDUP + + ENDIF + add hl,bc ; HL = state_scale * (upkr_state >> 8) + (upkr_state & 255) - pop af - ld d,-16 ; D = -prob_offset (-16 0xF0 when bit = 0) + pop af ; restore prob and CF=bit jr nc,.bit_is_0_2 - ld d,b ; D = -prob_offset (0 when bit = 1) (also does fix following ADD) - dec h - add hl,de ; HL += -prob (HL += (256 - prob) - 256) -.bit_is_0_2: ; HL = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255) ; new upkr_state + dec d ; DE = -prob (also D = bit ? $FF : $00) + add hl,de ; HL += -prob + ; ^ this always preserves CF=1, because (state>>8) >= 128, state_scale: 7..250, prob: 7..250, + ; so 7*128 > 250 and thus edge case `ADD hl=(7*128+0),de=(-250)` => CF=1 +.bit_is_0_2: ; *** adjust probs[context_index] - ld e,a ; D:E = -prob_offset:prob, A = prob - and $F8 + ld e,a ; preserve prob + rra ; + (bit<<4) ; part of -prob_offset, needs another -16 + and $FC ; clear/keep correct bits to get desired (prob>>4) + extras, CF=0 rra rra - rra - rra - adc a,d ; A = -prob_offset + ((prob + 8) >> 4) - neg - add a,e ; A = prob_offset + prob - ((prob + 8) >> 4) + rra ; A = (bit<<4) + (prob>>4), CF=(prob & 8) + adc a,-16 ; A = (bit<<4) - 16 + ((prob + 8)>>4) ; -prob_offset = (bit<<4) - 16 + sub e ; A = (bit<<4) - 16 + ((prob + 8)>>4) - prob ; = ((prob + 8)>>4) - prob_offset - prob + neg ; A = prob_offset + prob - ((prob + 8)>>4) pop bc - ld (bc),a ; update probs[context_index] - add a,d ; bit=0: A = 23..249, D = 240 -> CF=1 || bit=1: D=0 -> CF=0 - ccf ; resulting CF = bit restored + ld (bc),a ; probs[context_index] = prob_offset + prob - ((prob + 8) >> 4); + add a,d ; restore CF = bit (D = bit ? $FF : $00 && A > 0) pop de ret @@ -287,12 +314,12 @@ decode_number: ; reserve space for probs array without emitting any machine code (using only EQU) IFDEF UPKR_PROBS_ORIGIN ; if specific address is defined by user, move probs array there - ORG UPKR_PROBS_ORIGIN +probs: EQU ((UPKR_PROBS_ORIGIN) + 255) & -$100 ; probs array aligned to 256 + ELSE +probs: EQU ($ + 255) & -$100 ; probs array aligned to 256 ENDIF - -probs: EQU ($+255) & -$100 ; probs array aligned to 256 -.real_c: EQU 1 + 255 + 1 + 2*NUMBER_BITS ; real size of probs array -.c: EQU (.real_c + 1) & -2 ; padding to even size (required by init code) +.real_c: EQU 1 + 255 + 1 + 2*NUMBER_BITS ; real size of probs array +.c: EQU (.real_c + 1) & -2 ; padding to even size (required by init code) .e: EQU probs + .c DISPLAY "upkr.unpack probs array placed at: ",/A,probs,",\tsize: ",/A,probs.c