mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 19:46:42 +01:00
Compare commits
1 Commits
0c5ba4e32c
...
dictionary
| Author | SHA1 | Date | |
|---|---|---|---|
| 130bf821fa |
@@ -27,7 +27,6 @@ The 16 bit dos unpacker also uses some variations. (`upkr --x86`)
|
|||||||
* [Atari Lynx](https://github.com/42Bastian/new_bll/blob/master/demos/depacker/unupkr.asm)
|
* [Atari Lynx](https://github.com/42Bastian/new_bll/blob/master/demos/depacker/unupkr.asm)
|
||||||
* [Atari Jaguar](https://github.com/42Bastian/new_bjl/blob/main/exp/depacker/unupkr.js)
|
* [Atari Jaguar](https://github.com/42Bastian/new_bjl/blob/main/exp/depacker/unupkr.js)
|
||||||
* [8080, R800](https://github.com/ivagorRetrocomp/DeUpkr)
|
* [8080, R800](https://github.com/ivagorRetrocomp/DeUpkr)
|
||||||
* [6502](https://github.com/pfusik/upkr6502)
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
|||||||
BIN
README.md.upk
Normal file
BIN
README.md.upk
Normal file
Binary file not shown.
@@ -1,190 +0,0 @@
|
|||||||
;;; -*-asm-*-
|
|
||||||
;;; ukpr unpacker for Atari Jaguar RISC.
|
|
||||||
|
|
||||||
;;; lyxass syntax
|
|
||||||
|
|
||||||
|
|
||||||
; input:
|
|
||||||
;;; R20 : packed buffer
|
|
||||||
;;; R21 : output buffer
|
|
||||||
;;; r30 : return address
|
|
||||||
;;;
|
|
||||||
;;; Register usage (destroyed!)
|
|
||||||
;;; r0-r17,r20,r21
|
|
||||||
;;;
|
|
||||||
|
|
||||||
DST REG 21
|
|
||||||
SRC REG 20
|
|
||||||
|
|
||||||
REGTOP 16
|
|
||||||
LR_save REG 99
|
|
||||||
LR_save2 REG 99
|
|
||||||
GETBIT REG 99
|
|
||||||
GETLENGTH REG 99
|
|
||||||
LITERAL REG 99
|
|
||||||
LOOP REG 99
|
|
||||||
index REG 99
|
|
||||||
bit_pos REG 99
|
|
||||||
state REG 99
|
|
||||||
prev_was_match REG 99
|
|
||||||
offset REG 99
|
|
||||||
prob reg 99
|
|
||||||
byte REG 99
|
|
||||||
PROBS reg 99
|
|
||||||
tmp2 reg 2
|
|
||||||
tmp1 REG 1
|
|
||||||
tmp0 REG 0
|
|
||||||
|
|
||||||
REGMAP
|
|
||||||
|
|
||||||
upkr_probs equ $200
|
|
||||||
|
|
||||||
SIZEOF_PROBS EQU 1+255+1+2*32+2*32
|
|
||||||
|
|
||||||
unupkr::
|
|
||||||
move LR,LR_save
|
|
||||||
moveq #0,tmp0
|
|
||||||
movei #upkr_probs,PROBS
|
|
||||||
bset #7,tmp0
|
|
||||||
movei #SIZEOF_PROBS,tmp2
|
|
||||||
move PROBS,tmp1
|
|
||||||
.init storeb tmp0,(tmp1)
|
|
||||||
subq #1,tmp2
|
|
||||||
jr pl,.init
|
|
||||||
addq #1,tmp1
|
|
||||||
|
|
||||||
moveq #0,offset
|
|
||||||
moveq #0,state
|
|
||||||
movei #getlength,GETLENGTH
|
|
||||||
movei #getbit,GETBIT
|
|
||||||
.looppc move PC,LOOP
|
|
||||||
addq #.loop-.looppc,LOOP
|
|
||||||
move pc,LITERAL
|
|
||||||
jr .start
|
|
||||||
addq #6,LITERAL
|
|
||||||
|
|
||||||
.literal
|
|
||||||
moveq #1,byte
|
|
||||||
move pc,LR
|
|
||||||
jr .into
|
|
||||||
addq #6,LR ; LR = .getbit
|
|
||||||
.getbit
|
|
||||||
addc byte,byte
|
|
||||||
.into
|
|
||||||
btst #8,byte
|
|
||||||
jump eq,(GETBIT)
|
|
||||||
move byte,index
|
|
||||||
|
|
||||||
storeb byte,(DST)
|
|
||||||
addq #1,DST
|
|
||||||
.start
|
|
||||||
moveq #0,prev_was_match
|
|
||||||
|
|
||||||
.loop
|
|
||||||
moveq #0,index
|
|
||||||
BL (GETBIT)
|
|
||||||
jump cc,(LITERAL)
|
|
||||||
addq #14,LR
|
|
||||||
cmpq #1,prev_was_match
|
|
||||||
jr eq,.newoff
|
|
||||||
shlq #8,r0
|
|
||||||
jump (GETBIT)
|
|
||||||
move r0,index
|
|
||||||
jr cc,.oldoff
|
|
||||||
shlq #8,r0
|
|
||||||
.newoff
|
|
||||||
addq #1,r0 ; r0 = 257
|
|
||||||
BL (GETLENGTH)
|
|
||||||
subq #1,r0
|
|
||||||
jump eq,(LR_save)
|
|
||||||
move r0,offset
|
|
||||||
|
|
||||||
.oldoff
|
|
||||||
movei #257+64,r0
|
|
||||||
BL (GETLENGTH)
|
|
||||||
|
|
||||||
move DST,r1
|
|
||||||
sub offset,r1
|
|
||||||
.cpymatch1
|
|
||||||
loadb (r1),r2
|
|
||||||
subq #1,r0
|
|
||||||
addqt #1,r1
|
|
||||||
storeb r2,(DST)
|
|
||||||
jr ne,.cpymatch1
|
|
||||||
addq #1,DST
|
|
||||||
|
|
||||||
jump (LOOP)
|
|
||||||
moveq #1,prev_was_match
|
|
||||||
|
|
||||||
getlength:
|
|
||||||
move LR,LR_save2
|
|
||||||
moveq #0,byte
|
|
||||||
move r0,index
|
|
||||||
moveq #0,bit_pos
|
|
||||||
move pc,LR
|
|
||||||
jump (GETBIT)
|
|
||||||
addq #6,LR
|
|
||||||
.gl
|
|
||||||
jr cc,.exit
|
|
||||||
addq #8,LR ; => return to "sh ..."
|
|
||||||
jump (GETBIT)
|
|
||||||
nop
|
|
||||||
sh bit_pos,r0
|
|
||||||
subq #1,bit_pos ; sh < 0 => shift left!
|
|
||||||
or r0,byte
|
|
||||||
jump (GETBIT)
|
|
||||||
subq #8,LR
|
|
||||||
.exit
|
|
||||||
moveq #1,r0
|
|
||||||
sh bit_pos,r0
|
|
||||||
jump (LR_save2)
|
|
||||||
or byte,r0
|
|
||||||
|
|
||||||
.newbyte:
|
|
||||||
loadb (SRC),r2
|
|
||||||
shlq #8,state
|
|
||||||
addq #1,SRC
|
|
||||||
or r2,state
|
|
||||||
getbit
|
|
||||||
move state,r2
|
|
||||||
move PROBS,r1
|
|
||||||
add index,r1 ; r1 = &probs[index]
|
|
||||||
shrq #12,r2
|
|
||||||
loadb (r1),prob
|
|
||||||
jr eq,.newbyte
|
|
||||||
move state,r2
|
|
||||||
move state,r0
|
|
||||||
shlq #24,r2
|
|
||||||
shrq #8,r0 ; sh
|
|
||||||
shrq #24,r2 ; sl
|
|
||||||
cmp prob,r2
|
|
||||||
addqt #1,index
|
|
||||||
jr cs,.one
|
|
||||||
mult prob,r0
|
|
||||||
|
|
||||||
;; state -= ((state >> 8) + 1)*prob
|
|
||||||
;; prob -= (prob+8)>>4
|
|
||||||
move prob,r2
|
|
||||||
add prob,r0
|
|
||||||
addq #8,r2
|
|
||||||
sub r0,state
|
|
||||||
shrq #4,r2
|
|
||||||
moveq #0,r0
|
|
||||||
jr .ret
|
|
||||||
sub r2,prob
|
|
||||||
|
|
||||||
.one
|
|
||||||
;; state = (state >> 8)*prob+(state & 0xff)
|
|
||||||
;; prob += (256 + 8 - prob) >> 4
|
|
||||||
move r2,state
|
|
||||||
movei #256+8,r2
|
|
||||||
add r0,state
|
|
||||||
sub prob,r2 ; 256-prob+8
|
|
||||||
shrq #4,r2
|
|
||||||
add r2,prob
|
|
||||||
|
|
||||||
moveq #3,r0
|
|
||||||
.ret
|
|
||||||
storeb prob,(r1)
|
|
||||||
jump (LR)
|
|
||||||
shrq #1,r0 ; C = 0, r0 = 1
|
|
||||||
@@ -1,217 +0,0 @@
|
|||||||
;;; -*-asm-*-
|
|
||||||
;;; ukpr unpacker for Atari Jaguar RISC. (quick version)
|
|
||||||
|
|
||||||
;;; lyxass syntax
|
|
||||||
|
|
||||||
|
|
||||||
; input:
|
|
||||||
;;; R20 : packed buffer
|
|
||||||
;;; R21 : output buffer
|
|
||||||
;;; r30 : return address
|
|
||||||
;;;
|
|
||||||
;;; Register usage (destroyed!)
|
|
||||||
;;; r0-r17,r20,r21
|
|
||||||
;;;
|
|
||||||
|
|
||||||
DST REG 21
|
|
||||||
SRC REG 20
|
|
||||||
|
|
||||||
REGTOP 17
|
|
||||||
LR_save REG 99
|
|
||||||
LR_save2 REG 99
|
|
||||||
GETBIT REG 99
|
|
||||||
GETLENGTH REG 99
|
|
||||||
LITERAL REG 99
|
|
||||||
LOOP REG 99
|
|
||||||
index REG 99
|
|
||||||
bit_pos REG 99
|
|
||||||
state REG 99
|
|
||||||
prev_was_match REG 99
|
|
||||||
offset REG 99
|
|
||||||
prob reg 99
|
|
||||||
byte REG 99
|
|
||||||
ndata reg 99
|
|
||||||
PROBS reg 99
|
|
||||||
tmp2 reg 2
|
|
||||||
tmp1 REG 1
|
|
||||||
tmp0 REG 0
|
|
||||||
|
|
||||||
REGMAP
|
|
||||||
|
|
||||||
upkr_probs equ $200
|
|
||||||
|
|
||||||
SIZEOF_PROBS EQU 1+255+1+2*32+2*32
|
|
||||||
|
|
||||||
unupkr::
|
|
||||||
move LR,LR_save
|
|
||||||
movei #$80808080,tmp0
|
|
||||||
movei #upkr_probs,PROBS
|
|
||||||
movei #SIZEOF_PROBS,tmp2
|
|
||||||
move PROBS,tmp1
|
|
||||||
.init store tmp0,(tmp1)
|
|
||||||
subq #4,tmp2
|
|
||||||
jr pl,.init
|
|
||||||
addq #4,tmp1
|
|
||||||
|
|
||||||
loadb (SRC),ndata
|
|
||||||
addq #1,SRC
|
|
||||||
moveq #0,offset
|
|
||||||
moveq #0,state
|
|
||||||
movei #getlength,GETLENGTH
|
|
||||||
movei #getbit,GETBIT
|
|
||||||
.looppc move PC,LOOP
|
|
||||||
addq #.loop-.looppc,LOOP
|
|
||||||
move pc,LITERAL
|
|
||||||
jr .start
|
|
||||||
addq #6,LITERAL
|
|
||||||
|
|
||||||
.literal
|
|
||||||
moveq #1,byte
|
|
||||||
move pc,LR
|
|
||||||
jr .into
|
|
||||||
addq #6,LR ; LR = .getbit
|
|
||||||
.getbit
|
|
||||||
addc byte,byte
|
|
||||||
.into
|
|
||||||
btst #8,byte
|
|
||||||
jump eq,(GETBIT)
|
|
||||||
move byte,index
|
|
||||||
|
|
||||||
storeb byte,(DST)
|
|
||||||
addq #1,DST
|
|
||||||
.start
|
|
||||||
moveq #0,prev_was_match
|
|
||||||
|
|
||||||
.loop
|
|
||||||
moveq #0,index
|
|
||||||
BL (GETBIT)
|
|
||||||
jump cc,(LITERAL)
|
|
||||||
addq #14,LR
|
|
||||||
cmpq #1,prev_was_match
|
|
||||||
jr eq,.newoff
|
|
||||||
shlq #8,r0
|
|
||||||
jump (GETBIT)
|
|
||||||
move r0,index
|
|
||||||
jr cc,.oldoff
|
|
||||||
shlq #8,r0
|
|
||||||
.newoff
|
|
||||||
addq #1,r0 ; r0 = 257
|
|
||||||
BL (GETLENGTH)
|
|
||||||
subq #1,r0
|
|
||||||
move r0,offset
|
|
||||||
jump eq,(LR_save)
|
|
||||||
nop
|
|
||||||
.oldoff
|
|
||||||
movei #257+64,r0
|
|
||||||
BL (GETLENGTH)
|
|
||||||
|
|
||||||
move DST,r2
|
|
||||||
move DST,r1
|
|
||||||
or offset,r2
|
|
||||||
btst #0,r2
|
|
||||||
moveq #1,prev_was_match
|
|
||||||
jr ne,.cpymatch1
|
|
||||||
sub offset,r1
|
|
||||||
.cpymatch2
|
|
||||||
loadw (r1),r2
|
|
||||||
addqt #2,r1
|
|
||||||
subq #2,r0
|
|
||||||
storew r2,(DST)
|
|
||||||
jump eq,(LOOP)
|
|
||||||
addqt #2,DST
|
|
||||||
jr pl,.cpymatch2
|
|
||||||
nop
|
|
||||||
jump (LOOP)
|
|
||||||
subq #1,DST
|
|
||||||
|
|
||||||
.cpymatch1
|
|
||||||
loadb (r1),r2
|
|
||||||
subq #1,r0
|
|
||||||
addqt #1,r1
|
|
||||||
storeb r2,(DST)
|
|
||||||
jr ne,.cpymatch1
|
|
||||||
addq #1,DST
|
|
||||||
|
|
||||||
jump (LOOP)
|
|
||||||
//-> nop
|
|
||||||
|
|
||||||
getlength:
|
|
||||||
move LR,LR_save2
|
|
||||||
moveq #0,byte
|
|
||||||
move r0,index
|
|
||||||
moveq #0,bit_pos
|
|
||||||
move pc,LR
|
|
||||||
jump (GETBIT)
|
|
||||||
addq #6,LR
|
|
||||||
.gl
|
|
||||||
jr cc,.exit
|
|
||||||
addq #8,LR ; => return to "sh ..."
|
|
||||||
jump (GETBIT)
|
|
||||||
nop
|
|
||||||
sh bit_pos,r0
|
|
||||||
subq #1,bit_pos ; sh < 0 => shift left!
|
|
||||||
or r0,byte
|
|
||||||
jump (GETBIT)
|
|
||||||
subq #8,LR
|
|
||||||
.exit
|
|
||||||
moveq #1,r0
|
|
||||||
sh bit_pos,r0
|
|
||||||
jump (LR_save2)
|
|
||||||
or byte,r0
|
|
||||||
|
|
||||||
.newbyte:
|
|
||||||
move ndata,r2
|
|
||||||
shlq #8,state
|
|
||||||
loadb (SRC),ndata
|
|
||||||
or r2,state
|
|
||||||
addq #1,SRC
|
|
||||||
move state,r2
|
|
||||||
shrq #12,r2
|
|
||||||
jr ne,.done
|
|
||||||
move state,r2
|
|
||||||
jr .newbyte
|
|
||||||
getbit
|
|
||||||
move state,r2
|
|
||||||
move PROBS,r1
|
|
||||||
add index,r1 ; r1 = &probs[index]
|
|
||||||
shrq #12,r2
|
|
||||||
loadb (r1),prob
|
|
||||||
jr eq,.newbyte
|
|
||||||
move state,r2
|
|
||||||
.done
|
|
||||||
move state,r0
|
|
||||||
shlq #24,r2
|
|
||||||
shrq #8,r0 ; sh
|
|
||||||
shrq #24,r2 ; sl
|
|
||||||
cmp prob,r2
|
|
||||||
addqt #1,index
|
|
||||||
jr cs,.one
|
|
||||||
mult prob,r0
|
|
||||||
|
|
||||||
;; state -= ((state >> 8) + 1)*prob
|
|
||||||
;; prob -= (prob+8)>>4
|
|
||||||
move prob,r2
|
|
||||||
add prob,r0
|
|
||||||
addq #8,r2
|
|
||||||
sub r0,state
|
|
||||||
shrq #4,r2
|
|
||||||
moveq #0,r0
|
|
||||||
sub r2,prob
|
|
||||||
shrq #1,r0 ; C = 0, r0 = 0
|
|
||||||
jump (LR)
|
|
||||||
storeb prob,(r1)
|
|
||||||
|
|
||||||
.one
|
|
||||||
;; state = (state >> 8)*prob+(state & 0xff)
|
|
||||||
;; prob += (256 + 8 - prob) >> 4
|
|
||||||
move r2,state
|
|
||||||
movei #256+8,r2
|
|
||||||
add r0,state
|
|
||||||
sub prob,r2 ; 256-prob+8
|
|
||||||
shrq #4,r2
|
|
||||||
add r2,prob
|
|
||||||
|
|
||||||
moveq #3,r0
|
|
||||||
storeb prob,(r1)
|
|
||||||
jump (LR)
|
|
||||||
shrq #1,r0 ; C = 0, r0 = 1
|
|
||||||
@@ -1,56 +1,3 @@
|
|||||||
/*
|
|
||||||
A simple C unpacker for upkr compressed data.
|
|
||||||
|
|
||||||
This implements two variants, selected by the UPKR_BITSTREAM define:
|
|
||||||
- normal: faster and smaller on modern hardware as whole bytes are shifted into
|
|
||||||
the rANS state at a time, but requires 20bits for the state
|
|
||||||
- bitstream: only single bits are shifted into the rANS state at a time
|
|
||||||
which allows the state to always fit in 16bits which is a boon
|
|
||||||
on very old CPUs.
|
|
||||||
The encoder and decoder need to be configured to use the same varianet.
|
|
||||||
|
|
||||||
upkr compressed data is a rANS byte-/bit-stream encoding a series of literal
|
|
||||||
byte values and back-references as probability encoded bits.
|
|
||||||
|
|
||||||
upkr_decode_bit reads one bit from the rANS stream, taking a probability context
|
|
||||||
as parameter. The probability context is a byte estimating the probability of
|
|
||||||
a bit encoded in this context being set. It is updated by upkr_decode_bit
|
|
||||||
after each decoded bit to reflect the observed past frequencies of on/off bits.
|
|
||||||
|
|
||||||
There are a number of different contexts used in the compressed format. The order in the
|
|
||||||
upkr_probs array is arbitrary, the only requirement for the unpacker is that all bits
|
|
||||||
that shared the same context while encoding also share the same context while decoding.
|
|
||||||
The contexts are:
|
|
||||||
- is match
|
|
||||||
- has offset
|
|
||||||
- literal bit N (0-7) with already decoded highest bits of literal == M (255 total)
|
|
||||||
- offset bit N (one less than max offset bits)
|
|
||||||
- has offset bit N (max offset bits)
|
|
||||||
- length bit N (one less then max length bits)
|
|
||||||
- has length bit N (max length bits)
|
|
||||||
|
|
||||||
Literal bytes are encoded from highest to lowest bit, with the bit position and
|
|
||||||
the already decoded bits as context.
|
|
||||||
|
|
||||||
Offst and Length are encoded in an interlaced variant of elias gamma coding. They
|
|
||||||
are encoded from lowest to highest bits. For each bit, first one bit is read in the
|
|
||||||
"has offset/length bit N)". If this is set, offset/length bit N is read in it's context
|
|
||||||
and the decoding continues with the next bit. If the "has bit N" is read as false, a
|
|
||||||
fixed 1 bit is added as the top bit at this position.
|
|
||||||
|
|
||||||
The highlevel decode loop then looks like this:
|
|
||||||
loop:
|
|
||||||
if read_bit(IS_MATCH):
|
|
||||||
if prev_was_match || read_bit(HAS_OFFSET):
|
|
||||||
offset = read_length_or_offset(OFFSET) - 1
|
|
||||||
if offset == 0:
|
|
||||||
break
|
|
||||||
length = read_length_or_offset(LENGTH)
|
|
||||||
copy_bytes_from_offset(length, offset)
|
|
||||||
else:
|
|
||||||
read_and_push(literal)
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
typedef unsigned short u16;
|
typedef unsigned short u16;
|
||||||
typedef unsigned long u32;
|
typedef unsigned long u32;
|
||||||
@@ -67,7 +14,6 @@ u32 upkr_state;
|
|||||||
|
|
||||||
int upkr_decode_bit(int context_index) {
|
int upkr_decode_bit(int context_index) {
|
||||||
#ifdef UPKR_BITSTREAM
|
#ifdef UPKR_BITSTREAM
|
||||||
// shift in single bits until rANS state is >= 32768
|
|
||||||
while(upkr_state < 32768) {
|
while(upkr_state < 32768) {
|
||||||
if(upkr_bits_left == 0) {
|
if(upkr_bits_left == 0) {
|
||||||
upkr_current_byte = *upkr_data_ptr++;
|
upkr_current_byte = *upkr_data_ptr++;
|
||||||
@@ -78,7 +24,6 @@ int upkr_decode_bit(int context_index) {
|
|||||||
--upkr_bits_left;
|
--upkr_bits_left;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
// shift in a full byte until rANS state is >= 4096
|
|
||||||
while(upkr_state < 4096) {
|
while(upkr_state < 4096) {
|
||||||
upkr_state = (upkr_state << 8) | *upkr_data_ptr++;
|
upkr_state = (upkr_state << 8) | *upkr_data_ptr++;
|
||||||
}
|
}
|
||||||
@@ -87,8 +32,6 @@ int upkr_decode_bit(int context_index) {
|
|||||||
int prob = upkr_probs[context_index];
|
int prob = upkr_probs[context_index];
|
||||||
int bit = (upkr_state & 255) < prob ? 1 : 0;
|
int bit = (upkr_state & 255) < prob ? 1 : 0;
|
||||||
|
|
||||||
// rANS state and context probability update
|
|
||||||
// for the later, add 1/16th (rounded) of difference from either 0 or 256
|
|
||||||
if(bit) {
|
if(bit) {
|
||||||
upkr_state = prob * (upkr_state >> 8) + (upkr_state & 255);
|
upkr_state = prob * (upkr_state >> 8) + (upkr_state & 255);
|
||||||
prob += (256 - prob + 8) >> 4;
|
prob += (256 - prob + 8) >> 4;
|
||||||
@@ -117,7 +60,6 @@ void* upkr_unpack(void* destination, void* compressed_data) {
|
|||||||
#ifdef UPKR_BITSTREAM
|
#ifdef UPKR_BITSTREAM
|
||||||
upkr_bits_left = 0;
|
upkr_bits_left = 0;
|
||||||
#endif
|
#endif
|
||||||
// all contexts are initialized to 128 = equal probability of 0 and 1
|
|
||||||
for(int i = 0; i < sizeof(upkr_probs); ++i)
|
for(int i = 0; i < sizeof(upkr_probs); ++i)
|
||||||
upkr_probs[i] = 128;
|
upkr_probs[i] = 128;
|
||||||
|
|
||||||
@@ -126,13 +68,10 @@ void* upkr_unpack(void* destination, void* compressed_data) {
|
|||||||
int prev_was_match = 0;
|
int prev_was_match = 0;
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
for(;;) {
|
for(;;) {
|
||||||
// is match
|
|
||||||
if(upkr_decode_bit(0)) {
|
if(upkr_decode_bit(0)) {
|
||||||
// has offset
|
|
||||||
if(prev_was_match || upkr_decode_bit(256)) {
|
if(prev_was_match || upkr_decode_bit(256)) {
|
||||||
offset = upkr_decode_length(257) - 1;
|
offset = upkr_decode_length(257) - 1;
|
||||||
if(offset == 0) {
|
if(offset == 0) {
|
||||||
// a 0 offset signals the end of the compressed data
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -143,9 +82,6 @@ void* upkr_unpack(void* destination, void* compressed_data) {
|
|||||||
}
|
}
|
||||||
prev_was_match = 1;
|
prev_was_match = 1;
|
||||||
} else {
|
} else {
|
||||||
// byte contains the previously read bits and indicates the number of
|
|
||||||
// read bits by the set top bit. Therefore it can be directly used as the
|
|
||||||
// context index. The set top bit ends up at bit position 8 and is not stored.
|
|
||||||
int byte = 1;
|
int byte = 1;
|
||||||
while(byte < 256) {
|
while(byte < 256) {
|
||||||
int bit = upkr_decode_bit(byte);
|
int bit = upkr_decode_bit(byte);
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ pub fn pack(
|
|||||||
let mut rans_coder = RansCoder::new(config);
|
let mut rans_coder = RansCoder::new(config);
|
||||||
let mut state = lz::CoderState::new(config);
|
let mut state = lz::CoderState::new(config);
|
||||||
|
|
||||||
let mut pos = 0;
|
let mut pos = config.dictionary_size;
|
||||||
while pos < data.len() {
|
while pos < data.len() {
|
||||||
if let Some(ref mut cb) = progress_callback {
|
if let Some(ref mut cb) = progress_callback {
|
||||||
cb(pos);
|
cb(pos);
|
||||||
|
|||||||
@@ -71,6 +71,9 @@ pub struct Config {
|
|||||||
pub max_offset: usize,
|
pub max_offset: usize,
|
||||||
/// The maximum match length value to encode when compressing.
|
/// The maximum match length value to encode when compressing.
|
||||||
pub max_length: usize,
|
pub max_length: usize,
|
||||||
|
|
||||||
|
/// Size of dictionary at the beginning of data (how many bytes to skip when compressing.)
|
||||||
|
pub dictionary_size: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Config {
|
impl Default for Config {
|
||||||
@@ -92,6 +95,8 @@ impl Default for Config {
|
|||||||
|
|
||||||
max_offset: usize::MAX,
|
max_offset: usize::MAX,
|
||||||
max_length: usize::MAX,
|
max_length: usize::MAX,
|
||||||
|
|
||||||
|
dictionary_size: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
11
src/main.rs
11
src/main.rs
@@ -16,6 +16,7 @@ fn main() -> Result<()> {
|
|||||||
let mut level = 2;
|
let mut level = 2;
|
||||||
let mut infile: Option<PathBuf> = None;
|
let mut infile: Option<PathBuf> = None;
|
||||||
let mut outfile: Option<PathBuf> = None;
|
let mut outfile: Option<PathBuf> = None;
|
||||||
|
let mut dictionary: Option<PathBuf> = None;
|
||||||
let mut max_unpacked_size = 512 * 1024 * 1024;
|
let mut max_unpacked_size = 512 * 1024 * 1024;
|
||||||
|
|
||||||
let mut parser = lexopt::Parser::from_env();
|
let mut parser = lexopt::Parser::from_env();
|
||||||
@@ -74,6 +75,7 @@ fn main() -> Result<()> {
|
|||||||
process::exit(0);
|
process::exit(0);
|
||||||
}
|
}
|
||||||
Long("max-unpacked-size") => max_unpacked_size = parser.value()?.parse()?,
|
Long("max-unpacked-size") => max_unpacked_size = parser.value()?.parse()?,
|
||||||
|
Long("dictionary") => dictionary = Some(parser.value()?.try_into()?),
|
||||||
Value(val) if infile.is_none() => infile = Some(val.try_into()?),
|
Value(val) if infile.is_none() => infile = Some(val.try_into()?),
|
||||||
Value(val) if outfile.is_none() => outfile = Some(val.try_into()?),
|
Value(val) if outfile.is_none() => outfile = Some(val.try_into()?),
|
||||||
_ => return Err(arg.unexpected().into()),
|
_ => return Err(arg.unexpected().into()),
|
||||||
@@ -94,6 +96,15 @@ fn main() -> Result<()> {
|
|||||||
data.reverse();
|
data.reverse();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(dictionary) = dictionary {
|
||||||
|
let mut dict = vec![];
|
||||||
|
File::open(dictionary)?.read_to_end(&mut dict)?;
|
||||||
|
config.dictionary_size = dict.len();
|
||||||
|
// prepend dict
|
||||||
|
dict.append(&mut data);
|
||||||
|
data = dict;
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(feature = "terminal")]
|
#[cfg(feature = "terminal")]
|
||||||
let mut packed_data = {
|
let mut packed_data = {
|
||||||
let mut pb = pbr::ProgressBar::on(std::io::stderr(), data.len() as u64);
|
let mut pb = pbr::ProgressBar::on(std::io::stderr(), data.len() as u64);
|
||||||
|
|||||||
@@ -137,7 +137,7 @@ fn parse(
|
|||||||
}
|
}
|
||||||
add_arrival(
|
add_arrival(
|
||||||
&mut arrivals,
|
&mut arrivals,
|
||||||
0,
|
encoding_config.dictionary_size,
|
||||||
Arrival {
|
Arrival {
|
||||||
parse: None,
|
parse: None,
|
||||||
state: lz::CoderState::new(encoding_config),
|
state: lz::CoderState::new(encoding_config),
|
||||||
@@ -148,7 +148,7 @@ fn parse(
|
|||||||
|
|
||||||
let cost_counter = &mut CostCounter::new(encoding_config);
|
let cost_counter = &mut CostCounter::new(encoding_config);
|
||||||
let mut best_per_offset = HashMap::new();
|
let mut best_per_offset = HashMap::new();
|
||||||
for pos in 0..data.len() {
|
for pos in encoding_config.dictionary_size..data.len() {
|
||||||
let match_length = |offset: usize| {
|
let match_length = |offset: usize| {
|
||||||
data[pos..]
|
data[pos..]
|
||||||
.iter()
|
.iter()
|
||||||
|
|||||||
Reference in New Issue
Block a user