Merge pull request #3 from ped7g/z80_ped7g

backward unpacker + example extended
2026-06-27 21:49:38 +02:00 · 2022-09-18 23:24:28 +02:00
parent a5406deb30 165f593a11
commit 7b0e22f459
7 changed files with 115 additions and 37 deletions
@@ -3,7 +3,8 @@
    DEVICE ZXSPECTRUM48,$8FFF

    ORG     $9000
-compressed_scr_files:       ; border color byte + upkr-packed .scr file
+  ;; forward example data
+compressed_scr_files.fwd:               ; border color byte + upkr-packed .scr file
    DB      1
    INCBIN  "screens/Grongy - ZX Spectrum (2022).scr.upk"
    DB      7
@@ -13,37 +14,87 @@ compressed_scr_files:       ; border color byte + upkr-packed .scr file
    DB      6
    INCBIN  "screens/diver - Back to Bjork (2015).scr.upk"
 .e:
+  ;; backward example data (unpacker goes from the end of the data!)
+compressed_scr_files.rwd.e: EQU $-1     ; the final IX will point one byte ahead of "$" here
+    INCBIN  "screens.reversed/diver - Back to Bjork (2015).scr.upk"
+    DB      6
+    INCBIN  "screens.reversed/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk"
+    DB      0
+    INCBIN  "screens.reversed/Schafft - Poison (2017).scr.upk"
+    DB      7
+    INCBIN  "screens.reversed/Grongy - ZX Spectrum (2022).scr.upk"
+compressed_scr_files.rwd:               ; border color byte + upkr-packed .scr file (backward)
+    DB      1

 start:
    di
 ;     OPT --zxnext
-;     nextreg 7,3                 ; ZX Next: switch to 28Mhz
-    ld      ix,compressed_scr_files
-.slideshow_loop
+;     nextreg 7,3                       ; ZX Next: switch to 28Mhz
+
+  ;;; FORWARD packed/unpacked data demo
+    ld      ix,compressed_scr_files.fwd
+.slideshow_loop.fwd:
  ; set BORDER for next image
-    ldi     a,(ix)              ; fake: ld a,(ix) : inc ix
+    ld      a,(ix)
+    inc     ix
    out     (254),a
  ; call unpack of next image directly into VRAM
-    ld      de,$4000            ; target VRAM
+    ld      de,$4000                    ; target VRAM
    exx
  ; IX = packed data, DE' = destination ($4000)
  ; returned IX will point right after the packed data
-    call    upkr.unpack
+    call    fwd.upkr.unpack
  ; do some busy loop with CPU to delay between images
+    call    delay
+  ; check if all images were displayed, loop around from first one then
+    ld      a,ixl
+    cp      low compressed_scr_files.fwd.e
+    jr      nz,.slideshow_loop.fwd
+
+  ;;; BACKWARD packed/unpacked data demo
+    ld      ix,compressed_scr_files.rwd
+.slideshow_loop.rwd:
+  ; set BORDER for next image
+    ld      a,(ix)
+    dec     ix
+    out     (254),a
+  ; call unpack of next image directly into VRAM
+    ld      de,$5AFF                    ; target VRAM
+    exx
+  ; IX = packed data, DE' = destination
+  ; returned IX will point right ahead of the packed data
+    call    rwd.upkr.unpack
+  ; do some busy loop with CPU to delay between images
+    call    delay
+  ; check if all images were displayed, loop around from first one then
+    ld      a,ixl
+    cp      low compressed_scr_files.rwd.e
+    jr      nz,.slideshow_loop.rwd
+
+    jr      start
+
+delay:
    ld      bc,$AA00
 .delay:
    .8 ex      (sp),ix
    dec     c
    jr      nz,.delay
    djnz    .delay
-  ; check if all images were displayed, loop around from first one then
-    ld      a,ixl
-    cp      low compressed_scr_files.e
-    jr      z,start
-    jr      .slideshow_loop
+    ret

  ; include the depacker library, optionally putting probs array buffer near end of RAM
    DEFINE  UPKR_PROBS_ORIGIN $FA00   ; if not defined, array will be put after unpack code
-    INCLUDE "../unpack.asm"
+
+    MODULE fwd
+        INCLUDE "../unpack.asm"
+    ENDMODULE
+
+    MODULE rwd
+        DEFINE BACKWARDS_UNPACK         ; defined to build backwards unpack
+                ; initial IX points at last byte of compressed data
+                ; initial DE' points at last byte of unpacked data
+
+        INCLUDE "../unpack.asm"
+    ENDMODULE

    SAVESNA "example.sna",start
@@ -15,6 +15,12 @@
 ;;         modifies: all registers except IY, requires 10 bytes of stack space
 ;;

+;     DEFINE BACKWARDS_UNPACK         ; uncomment to build backwards depacker
+            ; initial IX points at last byte of compressed data
+            ; initial DE' points at last byte of unpacked data
+
+;     DEFINE UPKR_UNPACK_SPEED        ; uncomment to get larger but faster unpack routine
+
    OPT push reset --syntax=abf
    MODULE upkr

@@ -100,7 +106,7 @@ unpack:
    ld      a,c
    exx
    ld      (de),a              ; *write_ptr++ = byte;
-    inc     de
+    IFNDEF BACKWARDS_UNPACK : inc de : ELSE : dec de : ENDIF
    exx
    ld      d,b                 ; prev_was_match = false
    jr      .decompress_data
@@ -137,9 +143,13 @@ unpack:
    ld      h,d                 ; DE = write_ptr
    ld      l,e
 .offset+*:  ld      bc,0
+    IFNDEF BACKWARDS_UNPACK
    sbc     hl,bc               ; CF=0 from decode_number ; HL = write_ptr - offset
+    ELSE
+    add     hl,bc               ; HL = write_ptr + offset
+    ENDIF
    pop     bc                  ; BC = length
-    ldir
+    IFNDEF BACKWARDS_UNPACK : ldir : ELSE : lddr : ENDIF
    exx
    ld      d,b                 ; prev_was_match = true
    djnz    .decompress_data    ; adjust context_index back to 0..255 range, go to main loop
@@ -193,7 +203,7 @@ decode_bit:
    jr      nz,.has_bit             ; CF=data, ZF=0 -> some bits + stop bit still available
  ; CF=1 (by stop bit)
    ld      a,(ix)
-    inc     ix                      ; upkr_current_byte = *upkr_data_ptr++;
+    IFNDEF BACKWARDS_UNPACK : inc ix : ELSE : dec ix : ENDIF    ; upkr_current_byte = *upkr_data_ptr++;
    adc     a,a                     ; CF=data, b0=1 as new stop bit
 .has_bit:
    adc     hl,hl                   ; upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
@@ -215,6 +225,10 @@ decode_bit:
    ld      d,0
    ld      e,a                     ; DE = state_scale ; prob || (256-prob)
    ld      l,d                     ; H:L = (upkr_state>>8) : 0
+
+  IFNDEF UPKR_UNPACK_SPEED
+
+    ;; looped MUL for minimum unpack size
    ld      b,8                     ; counter
 .mulLoop:
    add     hl,hl
@@ -222,28 +236,41 @@ decode_bit:
    add     hl,de
 .mul0:
    djnz    .mulLoop                ; until HL = state_scale * (upkr_state>>8), also BC becomes (upkr_state & 255)
+
+  ELSE
+
+    ;;; unrolled MUL for better performance, +25 bytes unpack size
+    ld      b,d
+    DUP     8
+        add     hl,hl
+        jr      nc,0_f
+        add     hl,de
+0:
+    EDUP
+
+  ENDIF
+
    add     hl,bc                   ; HL = state_scale * (upkr_state >> 8) + (upkr_state & 255)
-    pop     af
-    ld      d,-16                   ; D = -prob_offset (-16 0xF0 when bit = 0)
+    pop     af                      ; restore prob and CF=bit
    jr      nc,.bit_is_0_2
-    ld      d,b                     ; D = -prob_offset (0 when bit = 1) (also does fix following ADD)
-    dec     h
-    add     hl,de                   ; HL += -prob (HL += (256 - prob) - 256)
-.bit_is_0_2:                        ; HL = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255) ; new upkr_state
+    dec     d                       ; DE = -prob (also D = bit ? $FF : $00)
+    add     hl,de                   ; HL += -prob
+    ; ^ this always preserves CF=1, because (state>>8) >= 128, state_scale: 7..250, prob: 7..250,
+    ; so 7*128 > 250 and thus edge case `ADD hl=(7*128+0),de=(-250)` => CF=1
+.bit_is_0_2:
 ; *** adjust probs[context_index]
-    ld      e,a                     ; D:E = -prob_offset:prob, A = prob
-    and     $F8
+    ld      e,a                     ; preserve prob
+    rra                             ; + (bit<<4) ; part of -prob_offset, needs another -16
+    and     $FC                     ; clear/keep correct bits to get desired (prob>>4) + extras, CF=0
    rra
    rra
-    rra
-    rra
-    adc     a,d                     ; A = -prob_offset + ((prob + 8) >> 4)
-    neg
-    add     a,e                     ; A = prob_offset + prob - ((prob + 8) >> 4)
+    rra                             ; A = (bit<<4) + (prob>>4), CF=(prob & 8)
+    adc     a,-16                   ; A = (bit<<4) - 16 + ((prob + 8)>>4) ; -prob_offset = (bit<<4) - 16
+    sub     e                       ; A = (bit<<4) - 16 + ((prob + 8)>>4) - prob ; = ((prob + 8)>>4) - prob_offset - prob
+    neg                             ; A = prob_offset + prob - ((prob + 8)>>4)
    pop     bc
-    ld      (bc),a                  ; update probs[context_index]
-    add     a,d                     ; bit=0: A = 23..249, D = 240 -> CF=1 || bit=1: D=0 -> CF=0
-    ccf                             ; resulting CF = bit restored
+    ld      (bc),a                  ; probs[context_index] = prob_offset + prob - ((prob + 8) >> 4);
+    add     a,d                     ; restore CF = bit (D = bit ? $FF : $00 && A > 0)
    pop     de
    ret

@@ -287,12 +314,12 @@ decode_number:
    ; reserve space for probs array without emitting any machine code (using only EQU)

    IFDEF UPKR_PROBS_ORIGIN     ; if specific address is defined by user, move probs array there
-    ORG UPKR_PROBS_ORIGIN
+probs:      EQU ((UPKR_PROBS_ORIGIN) + 255) & -$100     ; probs array aligned to 256
+    ELSE
+probs:      EQU ($ + 255) & -$100                       ; probs array aligned to 256
    ENDIF
-
-probs:      EQU ($+255) & -$100                 ; probs array aligned to 256
-.real_c:    EQU 1 + 255 + 1 + 2*NUMBER_BITS     ; real size of probs array
-.c:         EQU (.real_c + 1) & -2              ; padding to even size (required by init code)
+.real_c:    EQU 1 + 255 + 1 + 2*NUMBER_BITS             ; real size of probs array
+.c:         EQU (.real_c + 1) & -2                      ; padding to even size (required by init code)
 .e:         EQU probs + .c

    DISPLAY "upkr.unpack probs array placed at: ",/A,probs,",\tsize: ",/A,probs.c