2 Commits

52 changed files with 321 additions and 3868 deletions

319
Cargo.lock generated
View File

@@ -10,15 +10,9 @@ checksum = "38d9ff5d688f1c13395289f67db01d4826b46dd694e7580accdc3e8430f2d98e"
[[package]] [[package]]
name = "autocfg" name = "autocfg"
version = "1.1.0" version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "cc" name = "cc"
@@ -62,79 +56,17 @@ dependencies = [
"lazy_static", "lazy_static",
] ]
[[package]]
name = "crossterm"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67"
dependencies = [
"bitflags",
"crossterm_winapi",
"libc",
"mio",
"parking_lot",
"signal-hook",
"signal-hook-mio",
"winapi",
]
[[package]]
name = "crossterm_winapi"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c"
dependencies = [
"winapi",
]
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.4.0" version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lexopt"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "478ee9e62aaeaf5b140bd4138753d1f109765488581444218d3ddda43234f3e8"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.135" version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68783febc7782c6c5cb401fbda4de5a9898be1762314da0bb2c10ced61f18b0c" checksum = "8521a1b57e76b1ec69af7599e75e38e7b7fad6610f037db8c79b127201b5d119"
[[package]]
name = "lock_api"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
[[package]]
name = "mio"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf"
dependencies = [
"libc",
"log",
"wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.36.1",
]
[[package]] [[package]]
name = "num-traits" name = "num-traits"
@@ -145,29 +77,6 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "parking_lot"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dc9e0dc2adc1c69d09143aff38d3d30c5c3f0df0dad82e6d25547af174ebec0"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-sys 0.42.0",
]
[[package]] [[package]]
name = "pbr" name = "pbr"
version = "1.0.4" version = "1.0.4"
@@ -181,31 +90,10 @@ dependencies = [
] ]
[[package]] [[package]]
name = "proc-macro2" name = "pico-args"
version = "1.0.44" version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7bd7356a8122b6c4a24a82b278680c73357984ca2fc79a0f9fa6dea7dced7c58" checksum = "db8bcd96cb740d03149cbad5518db9fd87126a10ab519c011893b1754134c468"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
dependencies = [
"proc-macro2",
]
[[package]]
name = "redox_syscall"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
dependencies = [
"bitflags",
]
[[package]] [[package]]
name = "sacabase" name = "sacabase"
@@ -216,79 +104,6 @@ dependencies = [
"num-traits", "num-traits",
] ]
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "signal-hook"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d"
dependencies = [
"libc",
"signal-hook-registry",
]
[[package]]
name = "signal-hook-mio"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af"
dependencies = [
"libc",
"mio",
"signal-hook",
]
[[package]]
name = "signal-hook-registry"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
dependencies = [
"libc",
]
[[package]]
name = "smallvec"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
[[package]]
name = "syn"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a99cb8c4b9a8ef0e7907cd3b617cc8dc04d571c4e73c8ae403d80ac160bb122"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a891860d3c8d66fec8e73ddb3765f90082374dbaaa833407b904a94f1a7eb43"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "time" name = "time"
version = "0.1.44" version = "0.1.44"
@@ -296,26 +111,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
dependencies = [ dependencies = [
"libc", "libc",
"wasi 0.10.0+wasi-snapshot-preview1", "wasi",
"winapi", "winapi",
] ]
[[package]]
name = "unicode-ident"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
[[package]] [[package]]
name = "upkr" name = "upkr"
version = "0.2.2" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"cdivsufsort", "cdivsufsort",
"crossterm",
"lexopt",
"pbr", "pbr",
"thiserror", "pico-args",
] ]
[[package]] [[package]]
@@ -324,12 +131,6 @@ version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]] [[package]]
name = "winapi" name = "winapi"
version = "0.3.9" version = "0.3.9"
@@ -351,103 +152,3 @@ name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0" version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
dependencies = [
"windows_aarch64_msvc 0.36.1",
"windows_i686_gnu 0.36.1",
"windows_i686_msvc 0.36.1",
"windows_x86_64_gnu 0.36.1",
"windows_x86_64_msvc 0.36.1",
]
[[package]]
name = "windows-sys"
version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc 0.42.0",
"windows_i686_gnu 0.42.0",
"windows_i686_msvc 0.42.0",
"windows_x86_64_gnu 0.42.0",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc 0.42.0",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e"
[[package]]
name = "windows_aarch64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4"
[[package]]
name = "windows_i686_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
[[package]]
name = "windows_i686_gnu"
version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7"
[[package]]
name = "windows_i686_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
[[package]]
name = "windows_i686_msvc"
version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246"
[[package]]
name = "windows_x86_64_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028"
[[package]]
name = "windows_x86_64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5"

View File

@@ -1,18 +1,12 @@
[package] [package]
name = "upkr" name = "upkr"
version = "0.2.2" version = "0.1.0"
edition = "2021" edition = "2021"
[profile.release] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
strip = "debuginfo"
[features]
terminal = ["crossterm", "pbr"]
[dependencies] [dependencies]
cdivsufsort = "2" cdivsufsort = "2"
lexopt = "0.2.1" pico-args = "0.4"
anyhow = "1" anyhow = "1"
thiserror = "1.0.36" pbr = "1"
pbr = { version = "1", optional = true }
crossterm = { version = "0.25.0", default-features = false, optional = true }

View File

@@ -2,83 +2,13 @@
Upkr is a simple general purpose lz packer designed to be used in the [MicroW8](https://github.com/exoticorn/microw8) platform. Upkr is a simple general purpose lz packer designed to be used in the [MicroW8](https://github.com/exoticorn/microw8) platform.
The compressed format is losely based on [Shrinkler](https://github.com/askeksa/Shrinkler) with the main difference being that The compressed format is losely based on [Shrinkler](https://github.com/askeksa/Shrinkler) with the main difference being that
Upkr doesn't differentiate between literals at odd or even addresses (by default) and that I went with rANS/rABS instead of a range coder. Upkr doesn't differnetiate between literals at odd or even addresses and that I went with rANS/rABS instead of a range coder.
Compression rate is on par with Shrinkler. At this point, Upkr should still be considered unstable - the compressed format is not very likely to change but I still want
to keep that option open a little longer.
The differences compare to Shrinkler also makes it interesting on 8bit platforms. The z80 unpacker included in the release
is both about twice as fast and smaller than the Shrinkler unpacker.
## Inspirations: ## Inspirations:
* Ferris' blog about his [C64 intro packer](https://yupferris.github.io/blog/2020/08/31/c64-4k-intro-packer-deep-dive.html) * Ferris' blog about his [C64 intro packer](https://yupferris.github.io/blog/2020/08/31/c64-4k-intro-packer-deep-dive.html)
* [Shrinkler](https://github.com/askeksa/Shrinkler) * [Shrinkler](https://github.com/askeksa/Shrinkler)
* Ryg's [sample rANS implementation](https://github.com/rygorous/ryg_rans) * Ryg's [sample rANS implementation](https://github.com/rygorous/ryg_rans)
## Unpackers
The release includes a reference c unpacker, as well as some optimized asm unpackers (arm and riscv). The unpckers in
c_unpacker and asm_unpackers unpack the default upkr compressed format. The z80_unpacker
is based on some variations to the compressed format. (Use `upkr --z80` to select those variations.)
The 16 bit dos unpacker also uses some variations. (`upkr --x86`)
### More unpackers outside this repository
* [Atari Lynx](https://github.com/42Bastian/new_bll/blob/master/demos/depacker/unupkr.asm)
* [Atari Jaguar](https://github.com/42Bastian/new_bjl/blob/main/exp/depacker/unupkr.js)
* [8080, R800](https://github.com/ivagorRetrocomp/DeUpkr)
* [6502](https://github.com/pfusik/upkr6502)
## Usage
```
upkr [-l level(0-9)] [config options] <infile> [<outfile>]
upkr -u [config options] <infile> [<outfile>]
upkr --heatmap [config options] <infile> [<outfile>]
upkr --margin [config options] <infile>
-l, --level N compression level 0-9
-0, ..., -9 short form for setting compression level
-d, --decompress decompress infile
--heatmap calculate heatmap from compressed file
--raw-cost report raw cost of literals in heatmap
(the cost of literals is spread across all matches
that reference the literal by default.)
--hexdump print heatmap as colored hexdump
--margin calculate margin for overlapped unpacking of a packed file
When no infile is given, or the infile is '-', read from stdin.
When no outfile is given and reading from stdin, or when outfile is '-', write to stdout.
Config presets for specific unpackers:
--z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update -9
--x86 --bitstream --invert-is-match-bit --invert-continue-value-bit --invert-new-offset-bit
--x86b --bitstream --invert-continue-value-bit --no-repeated-offsets -9
Config options (need to match when packing/unpacking):
-b, --bitstream bitstream mode
-p, --parity N use N (2/4) parity contexts
-r, --reverse reverse input & output
Config options to tailor output to specific optimized unpackers:
--invert-is-match-bit
--invert-new-offset-bit
--invert-continue-value-bit
--invert-bit-encoding
--simplified-prob-update
--big-endian-bitstream (implies --bitstream)
--no-repeated-offsets
--eof-in-length
--max-offset N
--max-length N
```
## Heatmap
By default, the `--heatmap` flag writes out the heatmap data as a binary file. The heatmap file is
the same size as the unpacked data. Each byte can be interpreted like this:
```
is_literal = byte & 1; // whether the byte was encoded as a literal (as opposed to a match)
size_in_bits = 2.0 ** (((byte >> 1) - 64) / 8.0); // the size this byte takes up in the compressed data
```

View File

@@ -1 +0,0 @@
/build/

View File

@@ -1,64 +0,0 @@
build/unpack_riscv64: ../c_unpacker/main.c unpack_riscv.S
mkdir -p build
riscv64-linux-gnu-gcc -g -static -o $@ $^
test_riscv64: build/unpack_riscv64
qemu-riscv64 $< test_data.upk /tmp/out.bin
cmp test_data.bin /tmp/out.bin
build/unpack_riscv64.o: unpack_riscv.S
mkdir -p build
riscv64-linux-gnu-gcc -c -o $@ $?
build/unpack_riscv64.bin: build/unpack_riscv64.o
riscv64-linux-gnu-objcopy -O binary --only-section=.text $? $@
disas-riscv64: build/unpack_riscv64.o
riscv64-linux-gnu-objdump -d $?
build/unpack_riscv32.o: unpack_riscv.S
mkdir -p build
riscv64-linux-gnu-gcc -march=rv32imc -mabi=ilp32 -c -o $@ $?
build/unpack_riscv32.bin: build/unpack_riscv32.o
riscv64-linux-gnu-objcopy -O binary --only-section=.text $? $@
disas-riscv32: build/unpack_riscv32.o
riscv64-linux-gnu-objdump -d $?
build/unpack_armv6m: ../c_unpacker/main.c unpack_armv6m.S
mkdir -p build
arm-linux-gnueabihf-gcc -g -static -o $@ $^
test_armv6m: build/unpack_armv6m
qemu-arm $< test_data.upk /tmp/out.bin
cmp test_data.bin /tmp/out.bin
build/unpack_armv6m.bin: unpack_armv6m.S
mkdir -p build
arm-none-eabi-gcc -march=armv6-m -c -o build/unpack_armv6m.o $?
arm-none-eabi-objcopy -O binary --only-section=.text build/unpack_armv6m.o $@
build/unpack_arm32: ../c_unpacker/main.c unpack_arm32.S
mkdir -p build
arm-linux-gnueabihf-gcc -g -static -o $@ $^
test_arm32: build/unpack_arm32
qemu-arm $< test_data.upk /tmp/out.bin
cmp test_data.bin /tmp/out.bin
build/unpack_arm32.bin: unpack_arm32.S
mkdir -p build
arm-none-eabi-gcc -c -o build/unpack_arm32.o $?
arm-none-eabi-objcopy -O binary --only-section=.text build/unpack_arm32.o $@
build/unpack_c: ../c_unpacker/main.c ../c_unpacker/unpack.c
mkdir -p build
gcc -g -o $@ $^
test_c: build/unpack_c
$< test_data.upk /tmp/out.bin
cmp test_data.bin /tmp/out.bin
sizes: build/unpack_armv6m.bin build/unpack_riscv64.bin build/unpack_riscv32.bin build/unpack_arm32.bin
ls -l build/*.bin

View File

@@ -1,99 +0,0 @@
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned long u32;
u8* upkr_data_ptr;
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
#ifdef UPKR_BITSTREAM
u16 upkr_state;
u8 upkr_current_byte;
int upkr_bits_left;
#else
u32 upkr_state;
#endif
int upkr_decode_bit(int context_index) {
#ifdef UPKR_BITSTREAM
while(upkr_state < 32768) {
if(upkr_bits_left == 0) {
upkr_current_byte = *upkr_data_ptr++;
upkr_bits_left = 8;
}
upkr_state = (upkr_state << 1) + (upkr_current_byte & 1);
upkr_current_byte >>= 1;
--upkr_bits_left;
}
#else
while(upkr_state < 4096) {
upkr_state = (upkr_state << 8) | *upkr_data_ptr++;
}
#endif
int prob = upkr_probs[context_index];
int bit = (upkr_state & 255) < prob ? 1 : 0;
int tmp = prob;
if(!bit) {
tmp = 256 - tmp;
}
upkr_state = tmp * (upkr_state >> 8) + (upkr_state & 255);
tmp += (256 - tmp + 8) >> 4;
if(!bit) {
upkr_state -= prob;
tmp = 256 - tmp;
}
upkr_probs[context_index] = tmp;
return bit;
}
int upkr_decode_length(int context_index) {
int length = 0;
int bit_pos = 0;
while(upkr_decode_bit(context_index)) {
length |= upkr_decode_bit(context_index + 1) << bit_pos++;
context_index += 2;
}
return length | (1 << bit_pos);
}
void* upkr_unpack(void* destination, void* compressed_data) {
upkr_data_ptr = (u8*)compressed_data;
upkr_state = 0;
#ifdef UPKR_BITSTREAM
upkr_bits_left = 0;
#endif
for(int i = 0; i < sizeof(upkr_probs); ++i)
upkr_probs[i] = 128;
u8* write_ptr = (u8*)destination;
int prev_was_match = 0;
int offset = 0;
for(;;) {
if(upkr_decode_bit(0)) {
if(prev_was_match || upkr_decode_bit(256)) {
offset = upkr_decode_length(257) - 1;
if(offset == 0) {
break;
}
}
int length = upkr_decode_length(257 + 64);
while(length--) {
*write_ptr = write_ptr[-offset];
++write_ptr;
}
prev_was_match = 1;
} else {
int byte = 1;
while(byte < 256) {
int bit = upkr_decode_bit(byte);
byte = (byte << 1) + bit;
}
*write_ptr++ = byte;
prev_was_match = 0;
}
}
return write_ptr;
}

Binary file not shown.

View File

@@ -1,100 +0,0 @@
.arm
.section .text
.global upkr_unpack
.type upkr_unpack, %function
// r0 .. out_ptr (returned)
// r1 .. in_ptr (returned)
// r2 .. state
// r3 .. offset
// r4 .. prev_was_literal / decode_length ret
// r5 .. context index
// r6 .. decode_length temp
// r7 .. probs ptr
// r8-r11 .. decode_bit temp
// r12 .. decode_length return address
upkr_unpack:
push { r3-r11, lr }
mov r2, #384
mov r3, #128
.Lclear:
subs r2, r2, #1
strb r3, [sp, -r2]
bne .Lclear
.Lloop:
mov r5, #0
bl upkr_decode_bit
bcc .Ldata
.Lmatch:
mov r5, #256
rsbs r6, r4, #0
blcc upkr_decode_bit
bcc .Lskip_offset
bl upkr_decode_length
adds r3, r4, #1
popeq { r3-r11, pc }
.Lskip_offset:
mov r5, #256+64
bl upkr_decode_length
.Lcopy_loop:
ldrb r5, [r0, r3]
.Lstore:
strb r5, [r0], #1
adds r4, r4, #1
blt .Lcopy_loop
b .Lloop
.Ldata:
mov r5, #1
.Ldata_loop:
bl upkr_decode_bit
adc r5, r5, r5
movs r4, r5, lsr #8
beq .Ldata_loop
b .Lstore
.type upkr_decode_length, %function
upkr_decode_length:
mov r12, lr
mov r4, #0
mvn r6, #0
.Lbit_loop:
bl upkr_decode_bit_inc
addcc r4, r4, r6
movcc pc, r12
bl upkr_decode_bit_inc
addcs r4, r4, r6
mov r6, r6, lsl #1
b .Lbit_loop
.type upkr_decode_bit, %function
upkr_decode_bit_inc:
add r5, r5, #1
upkr_decode_bit:
cmp r2, #4096
ldrltb r8, [r1], #1
orrlt r2, r8, r2, lsl#8
blt upkr_decode_bit
ldrb r8, [sp, -r5]
and r9, r2, #255
add r9, r9, #1
cmp r8, r9
rsbcs r8, r8, #256
mvn r9, r2, lsr#8
addcs r9, r9, #1
mla r2, r8, r9, r2
add r9, r8, #8
sub r8, r8, r9, lsr#4
rsbcs r8, r8, #256
strb r8, [sp, -r5]
mov pc, r14

View File

@@ -1,162 +0,0 @@
// armv6-m upkr unpacker by yrlf
// some optimizations by exoticorn
.syntax unified
.thumb
.section .text
#define ALIGNUP(n, align) (((n) + (align) - 1) & ~((align) - 1))
#define PROB_LEN (1 + 255 + 1 + 2*32 + 2*32)
#define FRAME_SIZE ALIGNUP(PROB_LEN, 4)
// auto upkr_unpack(uint8_t * out, uint8_t * in) -> tuple<uint8_t *, uint8_t *>
.global upkr_unpack
.type upkr_unpack, %function
// r0 .. out_ptr (returned)
// r1 .. in_ptr (returned)
// r2 .. state
// r3 .. offset
// r4 .. prev_was_literal / decode_length ret
// r5 .. subroutine arg (preserved)
// r6 .. decode_bit ret
// r7 .. probs ptr
upkr_unpack:
push { r4, r5, r6, r7, lr }
sub sp, sp, #FRAME_SIZE
mov r7, sp
movs r2, #255
adds r2, r2, #(PROB_LEN - 255)
movs r3, #128
.Lclear:
subs r2, r2, #1
strb r3, [r7, r2]
bne .Lclear
.Lloop:
movs r5, #0
bl upkr_decode_bit
beq .Ldata
.Lmatch:
// r6 = 1
lsls r5, r6, #8
cmp r4, #0
beq 1f
bl upkr_decode_bit
beq 2f
1:
bl upkr_decode_length
adds r3, r4, #1
beq .Lend
2:
adds r5, r5, #64
bl upkr_decode_length
.Lcopy_loop:
ldrb r5, [r0, r3]
.Lstore:
strb r5, [r0]
adds r0, r0, #1
adds r4, r4, #1
blt .Lcopy_loop
b .Lloop
.Ldata:
movs r5, #1
.Ldata_loop:
bl upkr_decode_bit
adcs r5, r5, r5
lsrs r4, r5, #8
beq .Ldata_loop
b .Lstore
.Lend:
add sp, sp, #FRAME_SIZE
pop { r4, r5, r6, r7, pc }
.type upkr_decode_length, %function
// r0 .. -length tmp (saved)
// r1 ..
// r2 ..
// r3 ..
// r4 .. -length (returned)
// r5 .. context index (saved)
// r6 .. (saved)
// r7 ..
upkr_decode_length:
push { r0, r5, r6, lr }
movs r0, #0
subs r4, r0, #1
.Lbit_loop:
adds r5, r5, #1
bl upkr_decode_bit
beq 1f
adds r5, r5, #1
bl upkr_decode_bit
beq 2f
adds r0, r0, r4
2:
lsls r4, r4, #1
b .Lbit_loop
1:
adds r4, r4, r0
pop { r0, r5, r6, pc }
.type upkr_decode_bit, %function
// r0 .. tmp / prob (saved)
// r1 .. in_ptr (modified)
// r2 .. state (modified)
// r3 .. scratch (saved)
// r4 ..
// r5 .. context index (preserved)
// r6 .. bit (returned)
// r7 .. probs ptr (preserved)
upkr_fill_state:
lsls r2, r2, #8
ldrb r6, [r1]
adds r1, r1, #1
orrs r2, r2, r6
upkr_decode_bit:
lsrs r6, r2, #12
beq upkr_fill_state
push { r0, r1, r3, lr }
ldrb r0, [r7, r5]
lsrs r3, r2, #8
uxtb r1, r2
subs r6, r1, r0
blt 1f
subs r1, r2, r0
rsbs r0, r0, #0
1:
muls r3, r3, r0
adds r2, r1, r3
rsbs r3, r0, #0
uxtb r3, r3
lsrs r3, r3, #4
adcs r0, r0, r3
cmp r6, #0
blt 1f
rsbs r0, r0, #0
1:
strb r0, [r7, r5]
lsrs r6, r6, #31
pop { r0, r1, r3, pc }

View File

@@ -1,190 +0,0 @@
;;; -*-asm-*-
;;; ukpr unpacker for Atari Jaguar RISC.
;;; lyxass syntax
; input:
;;; R20 : packed buffer
;;; R21 : output buffer
;;; r30 : return address
;;;
;;; Register usage (destroyed!)
;;; r0-r17,r20,r21
;;;
DST REG 21
SRC REG 20
REGTOP 16
LR_save REG 99
LR_save2 REG 99
GETBIT REG 99
GETLENGTH REG 99
LITERAL REG 99
LOOP REG 99
index REG 99
bit_pos REG 99
state REG 99
prev_was_match REG 99
offset REG 99
prob reg 99
byte REG 99
PROBS reg 99
tmp2 reg 2
tmp1 REG 1
tmp0 REG 0
REGMAP
upkr_probs equ $200
SIZEOF_PROBS EQU 1+255+1+2*32+2*32
unupkr::
move LR,LR_save
moveq #0,tmp0
movei #upkr_probs,PROBS
bset #7,tmp0
movei #SIZEOF_PROBS,tmp2
move PROBS,tmp1
.init storeb tmp0,(tmp1)
subq #1,tmp2
jr pl,.init
addq #1,tmp1
moveq #0,offset
moveq #0,state
movei #getlength,GETLENGTH
movei #getbit,GETBIT
.looppc move PC,LOOP
addq #.loop-.looppc,LOOP
move pc,LITERAL
jr .start
addq #6,LITERAL
.literal
moveq #1,byte
move pc,LR
jr .into
addq #6,LR ; LR = .getbit
.getbit
addc byte,byte
.into
btst #8,byte
jump eq,(GETBIT)
move byte,index
storeb byte,(DST)
addq #1,DST
.start
moveq #0,prev_was_match
.loop
moveq #0,index
BL (GETBIT)
jump cc,(LITERAL)
addq #14,LR
cmpq #1,prev_was_match
jr eq,.newoff
shlq #8,r0
jump (GETBIT)
move r0,index
jr cc,.oldoff
shlq #8,r0
.newoff
addq #1,r0 ; r0 = 257
BL (GETLENGTH)
subq #1,r0
jump eq,(LR_save)
move r0,offset
.oldoff
movei #257+64,r0
BL (GETLENGTH)
move DST,r1
sub offset,r1
.cpymatch1
loadb (r1),r2
subq #1,r0
addqt #1,r1
storeb r2,(DST)
jr ne,.cpymatch1
addq #1,DST
jump (LOOP)
moveq #1,prev_was_match
getlength:
move LR,LR_save2
moveq #0,byte
move r0,index
moveq #0,bit_pos
move pc,LR
jump (GETBIT)
addq #6,LR
.gl
jr cc,.exit
addq #8,LR ; => return to "sh ..."
jump (GETBIT)
nop
sh bit_pos,r0
subq #1,bit_pos ; sh < 0 => shift left!
or r0,byte
jump (GETBIT)
subq #8,LR
.exit
moveq #1,r0
sh bit_pos,r0
jump (LR_save2)
or byte,r0
.newbyte:
loadb (SRC),r2
shlq #8,state
addq #1,SRC
or r2,state
getbit
move state,r2
move PROBS,r1
add index,r1 ; r1 = &probs[index]
shrq #12,r2
loadb (r1),prob
jr eq,.newbyte
move state,r2
move state,r0
shlq #24,r2
shrq #8,r0 ; sh
shrq #24,r2 ; sl
cmp prob,r2
addqt #1,index
jr cs,.one
mult prob,r0
;; state -= ((state >> 8) + 1)*prob
;; prob -= (prob+8)>>4
move prob,r2
add prob,r0
addq #8,r2
sub r0,state
shrq #4,r2
moveq #0,r0
jr .ret
sub r2,prob
.one
;; state = (state >> 8)*prob+(state & 0xff)
;; prob += (256 + 8 - prob) >> 4
move r2,state
movei #256+8,r2
add r0,state
sub prob,r2 ; 256-prob+8
shrq #4,r2
add r2,prob
moveq #3,r0
.ret
storeb prob,(r1)
jump (LR)
shrq #1,r0 ; C = 0, r0 = 1

View File

@@ -1,217 +0,0 @@
;;; -*-asm-*-
;;; ukpr unpacker for Atari Jaguar RISC. (quick version)
;;; lyxass syntax
; input:
;;; R20 : packed buffer
;;; R21 : output buffer
;;; r30 : return address
;;;
;;; Register usage (destroyed!)
;;; r0-r17,r20,r21
;;;
DST REG 21
SRC REG 20
REGTOP 17
LR_save REG 99
LR_save2 REG 99
GETBIT REG 99
GETLENGTH REG 99
LITERAL REG 99
LOOP REG 99
index REG 99
bit_pos REG 99
state REG 99
prev_was_match REG 99
offset REG 99
prob reg 99
byte REG 99
ndata reg 99
PROBS reg 99
tmp2 reg 2
tmp1 REG 1
tmp0 REG 0
REGMAP
upkr_probs equ $200
SIZEOF_PROBS EQU 1+255+1+2*32+2*32
unupkr::
move LR,LR_save
movei #$80808080,tmp0
movei #upkr_probs,PROBS
movei #SIZEOF_PROBS,tmp2
move PROBS,tmp1
.init store tmp0,(tmp1)
subq #4,tmp2
jr pl,.init
addq #4,tmp1
loadb (SRC),ndata
addq #1,SRC
moveq #0,offset
moveq #0,state
movei #getlength,GETLENGTH
movei #getbit,GETBIT
.looppc move PC,LOOP
addq #.loop-.looppc,LOOP
move pc,LITERAL
jr .start
addq #6,LITERAL
.literal
moveq #1,byte
move pc,LR
jr .into
addq #6,LR ; LR = .getbit
.getbit
addc byte,byte
.into
btst #8,byte
jump eq,(GETBIT)
move byte,index
storeb byte,(DST)
addq #1,DST
.start
moveq #0,prev_was_match
.loop
moveq #0,index
BL (GETBIT)
jump cc,(LITERAL)
addq #14,LR
cmpq #1,prev_was_match
jr eq,.newoff
shlq #8,r0
jump (GETBIT)
move r0,index
jr cc,.oldoff
shlq #8,r0
.newoff
addq #1,r0 ; r0 = 257
BL (GETLENGTH)
subq #1,r0
move r0,offset
jump eq,(LR_save)
nop
.oldoff
movei #257+64,r0
BL (GETLENGTH)
move DST,r2
move DST,r1
or offset,r2
btst #0,r2
moveq #1,prev_was_match
jr ne,.cpymatch1
sub offset,r1
.cpymatch2
loadw (r1),r2
addqt #2,r1
subq #2,r0
storew r2,(DST)
jump eq,(LOOP)
addqt #2,DST
jr pl,.cpymatch2
nop
jump (LOOP)
subq #1,DST
.cpymatch1
loadb (r1),r2
subq #1,r0
addqt #1,r1
storeb r2,(DST)
jr ne,.cpymatch1
addq #1,DST
jump (LOOP)
//-> nop
getlength:
move LR,LR_save2
moveq #0,byte
move r0,index
moveq #0,bit_pos
move pc,LR
jump (GETBIT)
addq #6,LR
.gl
jr cc,.exit
addq #8,LR ; => return to "sh ..."
jump (GETBIT)
nop
sh bit_pos,r0
subq #1,bit_pos ; sh < 0 => shift left!
or r0,byte
jump (GETBIT)
subq #8,LR
.exit
moveq #1,r0
sh bit_pos,r0
jump (LR_save2)
or byte,r0
.newbyte:
move ndata,r2
shlq #8,state
loadb (SRC),ndata
or r2,state
addq #1,SRC
move state,r2
shrq #12,r2
jr ne,.done
move state,r2
jr .newbyte
getbit
move state,r2
move PROBS,r1
add index,r1 ; r1 = &probs[index]
shrq #12,r2
loadb (r1),prob
jr eq,.newbyte
move state,r2
.done
move state,r0
shlq #24,r2
shrq #8,r0 ; sh
shrq #24,r2 ; sl
cmp prob,r2
addqt #1,index
jr cs,.one
mult prob,r0
;; state -= ((state >> 8) + 1)*prob
;; prob -= (prob+8)>>4
move prob,r2
add prob,r0
addq #8,r2
sub r0,state
shrq #4,r2
moveq #0,r0
sub r2,prob
shrq #1,r0 ; C = 0, r0 = 0
jump (LR)
storeb prob,(r1)
.one
;; state = (state >> 8)*prob+(state & 0xff)
;; prob += (256 + 8 - prob) >> 4
move r2,state
movei #256+8,r2
add r0,state
sub prob,r2 ; 256-prob+8
shrq #4,r2
add r2,prob
moveq #3,r0
storeb prob,(r1)
jump (LR)
shrq #1,r0 ; C = 0, r0 = 1

View File

@@ -1,131 +0,0 @@
.section .text
// x9 prev was literal
// x10 out ptr
// x11 in ptr
// x12 offset
// x13 state
// x14 context index
.global upkr_unpack
.type upkr_unpack, %function
upkr_unpack:
mv t4, ra
mv x17, x8
mv t6, x9
li x9, 256 + 128
mv x13, x9
1:
sub x8, sp, x13
sb x9, 0(x8)
addi x13, x13, -1
bnez x13, 1b
.Lmainloop:
li x14, 0
jal upkr_decode_bit
beqz x15, .Lliteral
slli x14, x14, 8
beqz x9, .Lread_offset_inc_x14
jal upkr_decode_bit
bnez x15, .Lread_offset
.Lfinished_offset:
addi x14, x14, 64
jalr ra // jal upkr_decode_number
1:
add x14, x10, t0
lbu x14, (x14)
.Lstore_byte:
sb x14, (x10)
addi x10, x10, 1
addi x9, x9, 1
blt x9, x0, 1b
j .Lmainloop
.Lliteral:
jal upkr_decode_bit
addi x14, x14, -1
slli x14, x14, 1
add x14, x14, x15
srli x9, x14, 8
beqz x9, .Lliteral
j .Lstore_byte
.Lread_offset_inc_x14:
addi x14, x14, 1
.Lread_offset:
jalr ra // jal upkr_decode_number
addi t0, x9, 1
bnez t0, .Lfinished_offset
.Ldone:
mv x8, x17
mv x9, t6
jr t4
upkr_load_byte:
lbu x15, 0(x11)
addi x11, x11, 1
slli x13, x13, 8
add x13, x13, x15
// x8 prob array ptr
// x11 in ptr
// x13 state
// x14 context index
// return:
// x14 context index + 1
// x15 decoded bit
upkr_decode_bit:
srli x15, x13, 12
beqz x15, upkr_load_byte
addi x14, x14, 1
sub t2, sp, x14
lbu x12, (t2)
andi x8, x13, 255
sltu x15, x8, x12
beqz x15, 1f
xori x12, x12, 255
addi x12, x12, 1
1:
srli x8, x13, 8
addi x8, x8, 1
sub x8, x8, x15
mul x8, x8, x12
sub x13, x13, x8
addi x8, x12, 8
srli x8, x8, 4
sub x12, x12, x8
beqz x15, 1f
sub x12, x0, x12
1:
sb x12, (t2)
jalr ra
// x14 context index
// return: x9 negtive decoded number
upkr_decode_number:
mv t3, ra
mv t5, x14
li x9, 0
li t1, -1
1:
jal upkr_decode_bit
beqz x15, 1f
jal upkr_decode_bit
beqz x15, 2f
add x9, x9, t1
2:
add t1, t1, t1
j 1b
1:
add x9, x9, t1
mv x14, t5
jr t3

View File

@@ -1,2 +0,0 @@
/target/
/upkr

127
c_library/Cargo.lock generated
View File

@@ -1,127 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "anyhow"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800"
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "cdivsufsort"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edefce019197609da416762da75bb000bbd2224b2d89a7e722c2296cbff79b8c"
dependencies = [
"cc",
"sacabase",
]
[[package]]
name = "lexopt"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "478ee9e62aaeaf5b140bd4138753d1f109765488581444218d3ddda43234f3e8"
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
]
[[package]]
name = "proc-macro2"
version = "1.0.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b"
dependencies = [
"proc-macro2",
]
[[package]]
name = "sacabase"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9883fc3d6ce3d78bb54d908602f8bc1f7b5f983afe601dabe083009d86267a84"
dependencies = [
"num-traits",
]
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5ab016db510546d856297882807df8da66a16fb8c4101cb8b30054b0d5b2d9c"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5420d42e90af0c38c3290abcca25b9b3bdf379fc9f55c528f53a269d9c9a267e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
[[package]]
name = "upkr"
version = "0.2.1"
dependencies = [
"anyhow",
"cdivsufsort",
"lexopt",
"thiserror",
]
[[package]]
name = "upkr_c"
version = "0.0.1"
dependencies = [
"upkr",
]

View File

@@ -1,17 +0,0 @@
[package]
name = "upkr_c"
version = "0.0.1"
edition = "2021"
[lib]
name = "upkr"
crate-type = ["staticlib"]
[profile.release]
opt-level = "s"
strip = "debuginfo"
lto = true
panic = "abort"
[dependencies]
upkr = { path="..", default-features=false }

View File

@@ -1,8 +0,0 @@
upkr: upkr.c upkr.h target/release/libupkr.a
gcc -O2 -Ltarget/release -o upkr upkr.c -lupkr -lm
strip upkr
target/release/libupkr.a: cargo
cargo build --release
.PHONY: cargo

View File

@@ -1,11 +0,0 @@
This is a simple example of compiling upkr to a library that can be linked in a
c program. It consists of a small rust crate which implements the c api and
compiles to a static library and a matching c header file. As is, the rust
crate offers two simple functions to compress/uncompress data with the default
upkr config.
The provided makefile will only work on linux. Building the example upkr.c on
other platforms is left as an exercise for the reader ;)
On Windows you might have to make sure to install and use the correct rust
toolchain version (mingw vs. msvc) to match your c compiler.

View File

@@ -1,42 +0,0 @@
use std::ffi::c_int;
// the upkr config to use, this can be modified to use other configs
fn config() -> upkr::Config {
upkr::Config::default()
}
#[no_mangle]
pub extern "C" fn upkr_compress(
output_buffer: *mut u8,
output_buffer_size: usize,
input_buffer: *const u8,
input_size: usize,
compression_level: c_int,
) -> usize {
let output_buffer = unsafe { std::slice::from_raw_parts_mut(output_buffer, output_buffer_size) };
let input_buffer = unsafe { std::slice::from_raw_parts(input_buffer, input_size) };
let packed_data = upkr::pack(input_buffer, compression_level.max(0).min(9) as u8, &config(), None);
let copy_size = packed_data.len().min(output_buffer.len());
output_buffer[..copy_size].copy_from_slice(&packed_data[..copy_size]);
packed_data.len()
}
#[no_mangle]
pub extern "C" fn upkr_uncompress(output_buffer: *mut u8, output_buffer_size: usize, input_buffer: *const u8, input_size: usize) -> isize {
let output_buffer = unsafe { std::slice::from_raw_parts_mut(output_buffer, output_buffer_size)};
let input_buffer = unsafe { std::slice::from_raw_parts(input_buffer, input_size)};
match upkr::unpack(input_buffer, &config(), output_buffer.len()) {
Ok(unpacked_data) => {
output_buffer[..unpacked_data.len()].copy_from_slice(&unpacked_data);
unpacked_data.len() as isize
}
Err(upkr::UnpackError::OverSize { size, .. }) => size as isize,
Err(other) => {
eprintln!("[upkr] compressed data corrupt: {}", other);
-1
}
}
}

View File

@@ -1,99 +0,0 @@
#include "upkr.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int main(int argc, char** argv) {
if(argc < 2) {
fprintf(stdout, "Usage:\n upkr [compress] [-0 .. -9] <file> [<out-file>]\n upkr [uncompress] <file> [<out-file>]\n");
return 1;
}
int argi = 1;
int uncompress = 0;
int compression_level = 4;
if(strcmp(argv[argi], "compress") == 0) {
++argi;
} else if(strcmp(argv[argi], "uncompress") == 0) {
uncompress = 1;
++argi;
}
if(argi < argc && argv[argi][0] == '-') {
compression_level = atoi(argv[argi] + 1);
++argi;
}
if(argi == argc) {
fprintf(stdout, "intput filename missing\n");
return 1;
}
const char* input_name = argv[argi++];
char* output_name;
if(argi < argc) {
output_name = argv[argi];
} else {
output_name = malloc(strlen(input_name) + 5);
strcpy(output_name, input_name);
strcat(output_name, uncompress ? ".unp" : ".upk");
}
FILE* file = fopen(input_name, "rb");
if(file == 0) {
fprintf(stdout, "failed to open input file '%s'\n", file);
return 1;
}
fseek(file, 0, SEEK_END);
long input_size = ftell(file);
rewind(file);
char* input_buffer = (char*)malloc(input_size);
long offset = 0;
while(offset < input_size) {
long read_size = fread(input_buffer + offset, 1, input_size - offset, file);
if(read_size <= 0) {
fprintf(stdout, "error reading input file\n");
return 1;
}
offset += read_size;
}
fclose(file);
long output_buffer_size = input_size * 8;
long output_size;
char* output_buffer = (char*)malloc(output_buffer_size);
for(;;) {
if(uncompress) {
output_size = upkr_uncompress(output_buffer, output_buffer_size, input_buffer, input_size);
} else {
output_size = upkr_compress(output_buffer, output_buffer_size, input_buffer, input_size, compression_level);
}
if(output_size < 0) {
return 1;
}
if(output_size <= output_buffer_size) {
break;
}
output_buffer = (char*)realloc(output_buffer, output_size);
output_buffer_size = output_size;
}
file = fopen(output_name, "wb");
if(file == 0) {
fprintf(stdout, "failed to open output file '%s'\n", output_name);
return 1;
}
offset = 0;
while(offset < output_size) {
long written_size = fwrite(output_buffer + offset, 1, output_size - offset, file);
if(written_size <= 0) {
fprintf(stdout, "error writing output file\n");
return 1;
}
offset += written_size;
}
fclose(file);
return 0;
}

View File

@@ -1,25 +0,0 @@
#ifndef UPKR_H_INCLUDED
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
// input_buffer/input_size: input data to compress
// output_buffer/output_buffer_size: buffer to compress into
// compression_level: 0-9
// returns the size of the compressed data, even if it didn't fit into the output buffer
size_t upkr_compress(void* output_buffer, size_t output_buffer_size, void* input_buffer, size_t input_size, int compression_level);
// input_buffer/input_size: compressed data
// output_buffer/output_buffer_size: buffer to uncompress into
// return value:
// >= 0 : size of uncompressed data, even if it didn't fit into the output buffer
// < 0 : input data corrupt, unable to decompress
ptrdiff_t upkr_uncompress(void* output_buffer, size_t output_buffer_size, void* input_buffer, size_t input_size);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -1,33 +0,0 @@
int upkr_decode_bit(int context_index) {
#ifdef UPKR_BITSTREAM
while(upkr_state < 32768) {
if(upkr_bits_left == 0) {
upkr_current_byte = *upkr_data_ptr++;
upkr_bits_left = 8;
}
upkr_state = (upkr_state << 1) + (upkr_current_byte & 1);
upkr_current_byte >>= 1;
--upkr_bits_left;
}
#else
while(upkr_state < 4096) {
upkr_state = (upkr_state << 8) | *upkr_data_ptr++;
}
#endif
int prob = upkr_probs[context_index];
int bit = (upkr_state & 255) < prob ? 1 : 0;
if(bit) {
prob = 256 - prob;
}
upkr_state -= prob * ((upkr_state >> 8) + (bit ^ 1));
prob -= (prob + 8) >> 4;
if(bit) {
prob = -prob;
}
upkr_probs[context_index] = prob;
return bit;
}

View File

@@ -1,7 +1,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
void* upkr_unpack(void* destination, void* compressed_data); int upkr_unpack(void* destination, void* compressed_data);
int main(int argn, char** argv) { int main(int argn, char** argv) {
void* input_buffer = malloc(1024*1024); void* input_buffer = malloc(1024*1024);
@@ -13,8 +13,7 @@ int main(int argn, char** argv) {
printf("Compressed size: %d\n", in_size); printf("Compressed size: %d\n", in_size);
void* end_ptr = upkr_unpack(output_buffer, input_buffer); int out_size = upkr_unpack(output_buffer, input_buffer);
int out_size = (char*)end_ptr - (char*)output_buffer;
printf("Uncompressed size: %d\n", out_size); printf("Uncompressed size: %d\n", out_size);

View File

@@ -1,56 +1,3 @@
/*
A simple C unpacker for upkr compressed data.
This implements two variants, selected by the UPKR_BITSTREAM define:
- normal: faster and smaller on modern hardware as whole bytes are shifted into
the rANS state at a time, but requires 20bits for the state
- bitstream: only single bits are shifted into the rANS state at a time
which allows the state to always fit in 16bits which is a boon
on very old CPUs.
The encoder and decoder need to be configured to use the same varianet.
upkr compressed data is a rANS byte-/bit-stream encoding a series of literal
byte values and back-references as probability encoded bits.
upkr_decode_bit reads one bit from the rANS stream, taking a probability context
as parameter. The probability context is a byte estimating the probability of
a bit encoded in this context being set. It is updated by upkr_decode_bit
after each decoded bit to reflect the observed past frequencies of on/off bits.
There are a number of different contexts used in the compressed format. The order in the
upkr_probs array is arbitrary, the only requirement for the unpacker is that all bits
that shared the same context while encoding also share the same context while decoding.
The contexts are:
- is match
- has offset
- literal bit N (0-7) with already decoded highest bits of literal == M (255 total)
- offset bit N (one less than max offset bits)
- has offset bit N (max offset bits)
- length bit N (one less then max length bits)
- has length bit N (max length bits)
Literal bytes are encoded from highest to lowest bit, with the bit position and
the already decoded bits as context.
Offst and Length are encoded in an interlaced variant of elias gamma coding. They
are encoded from lowest to highest bits. For each bit, first one bit is read in the
"has offset/length bit N)". If this is set, offset/length bit N is read in it's context
and the decoding continues with the next bit. If the "has bit N" is read as false, a
fixed 1 bit is added as the top bit at this position.
The highlevel decode loop then looks like this:
loop:
if read_bit(IS_MATCH):
if prev_was_match || read_bit(HAS_OFFSET):
offset = read_length_or_offset(OFFSET) - 1
if offset == 0:
break
length = read_length_or_offset(LENGTH)
copy_bytes_from_offset(length, offset)
else:
read_and_push(literal)
*/
typedef unsigned char u8; typedef unsigned char u8;
typedef unsigned short u16; typedef unsigned short u16;
typedef unsigned long u32; typedef unsigned long u32;
@@ -67,36 +14,34 @@ u32 upkr_state;
int upkr_decode_bit(int context_index) { int upkr_decode_bit(int context_index) {
#ifdef UPKR_BITSTREAM #ifdef UPKR_BITSTREAM
// shift in single bits until rANS state is >= 32768
while(upkr_state < 32768) { while(upkr_state < 32768) {
if(upkr_bits_left == 0) { if(upkr_bits_left == 0) {
upkr_current_byte = *upkr_data_ptr++; upkr_current_byte = *upkr_data_ptr++;
upkr_bits_left = 8; upkr_bits_left = 8;
} }
upkr_state = (upkr_state << 1) + (upkr_current_byte & 1); upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
upkr_current_byte >>= 1; upkr_current_byte <<= 1;
--upkr_bits_left; --upkr_bits_left;
} }
#else #else
// shift in a full byte until rANS state is >= 4096
while(upkr_state < 4096) { while(upkr_state < 4096) {
upkr_state = (upkr_state << 8) | *upkr_data_ptr++; upkr_state = (upkr_state << 8) | *upkr_data_ptr++;
} }
#endif #endif
int prob = upkr_probs[context_index]; int prob = upkr_probs[context_index];
int bit = (upkr_state & 255) < prob ? 1 : 0; int bit = (upkr_state & 255) >= prob ? 1 : 0;
// rANS state and context probability update int prob_offset = 16;
// for the later, add 1/16th (rounded) of difference from either 0 or 256 int state_offset = 0;
int state_scale = prob;
if(bit) { if(bit) {
upkr_state = prob * (upkr_state >> 8) + (upkr_state & 255); state_offset = -prob;
prob += (256 - prob + 8) >> 4; state_scale = 256 - prob;
} else { prob_offset = 0;
upkr_state = (256 - prob) * (upkr_state >> 8) + (upkr_state & 255) - prob;
prob -= (prob + 8) >> 4;
} }
upkr_probs[context_index] = prob; upkr_state = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255);
upkr_probs[context_index] = prob_offset + prob - ((prob + 8) >> 4);
return bit; return bit;
} }
@@ -111,13 +56,12 @@ int upkr_decode_length(int context_index) {
return length | (1 << bit_pos); return length | (1 << bit_pos);
} }
void* upkr_unpack(void* destination, void* compressed_data) { int upkr_unpack(void* destination, void* compressed_data) {
upkr_data_ptr = (u8*)compressed_data; upkr_data_ptr = (u8*)compressed_data;
upkr_state = 0; upkr_state = 0;
#ifdef UPKR_BITSTREAM #ifdef UPKR_BITSTREAM
upkr_bits_left = 0; upkr_bits_left = 0;
#endif #endif
// all contexts are initialized to 128 = equal probability of 0 and 1
for(int i = 0; i < sizeof(upkr_probs); ++i) for(int i = 0; i < sizeof(upkr_probs); ++i)
upkr_probs[i] = 128; upkr_probs[i] = 128;
@@ -126,13 +70,10 @@ void* upkr_unpack(void* destination, void* compressed_data) {
int prev_was_match = 0; int prev_was_match = 0;
int offset = 0; int offset = 0;
for(;;) { for(;;) {
// is match
if(upkr_decode_bit(0)) { if(upkr_decode_bit(0)) {
// has offset
if(prev_was_match || upkr_decode_bit(256)) { if(prev_was_match || upkr_decode_bit(256)) {
offset = upkr_decode_length(257) - 1; offset = upkr_decode_length(257) - 1;
if(offset == 0) { if(offset == 0) {
// a 0 offset signals the end of the compressed data
break; break;
} }
} }
@@ -143,9 +84,6 @@ void* upkr_unpack(void* destination, void* compressed_data) {
} }
prev_was_match = 1; prev_was_match = 1;
} else { } else {
// byte contains the previously read bits and indicates the number of
// read bits by the set top bit. Therefore it can be directly used as the
// context index. The set top bit ends up at bit position 8 and is not stored.
int byte = 1; int byte = 1;
while(byte < 256) { while(byte < 256) {
int bit = upkr_decode_bit(byte); int bit = upkr_decode_bit(byte);
@@ -156,5 +94,5 @@ void* upkr_unpack(void* destination, void* compressed_data) {
} }
} }
return write_ptr; return write_ptr - (u8*)destination;
} }

50
compare-variants Executable file
View File

@@ -0,0 +1,50 @@
#!/bin/env ruby
configs = [
[:master, '-b'],
[:z80, '-b'],
[:z80, ['-b', '-r']],
['old-prob-update', '-b']
]
files = Dir[ARGV[0] + '/*'].select {|f| !(f =~ /\.txt$/) }
short_names = files.map {|f| File.basename(f)[..16] }
results = []
def print_results(configs, names, results)
configs.each_with_index do |config, i|
printf "%d: %s\n", i + 1, config
end
print ' '
configs.each_index do |i|
printf " %-4d", i + 1
end
puts
names.each_with_index do |name, i|
printf "%16s", name
for res in results
res = res[i]
printf " %-4s", res if res
end
puts
end
end
for config in configs
raise unless system('git', 'checkout', config[0].to_s)
config_results = []
results << config_results
for file in files
if system(*['cargo', 'run', '--release', 'pack', '-l', '9', config[1], file, '/tmp/out.upk'].flatten) &&
system(*['cargo', 'run', '--release', 'unpack', config[1], '/tmp/out.upk', '/tmp/out.bin'].flatten) &&
File.read(file) == File.read('/tmp/out.bin')
size = File.size('/tmp/out.upk')
config_results << size
else
config_results << 'ERR'
end
print_results(configs, short_names, results)
end
end

View File

@@ -1,13 +0,0 @@
16 bit DOS executable stubs
---------------------------
by pestis and TomCat
unpack_x86_16_DOS.asm:
maximum compatibility, relocates unpacked code to normal start address
unpack_x86_16_DOS_no_relocation.asm:
saves some bytes by not relocating, unpacked code needs to be assembled to
start at 0x3FFE
unpack_x86_16_DOS_no_repeated_offset.asm:
removes support for repeated offsets, potentially at the cost of some compression ratio.
most likely only a win in very narrow circumstances around the 1kb mark

View File

@@ -1,160 +0,0 @@
; Contributions from pestis, TomCat and exoticorn
;
; This is the 16-bit DOS x86 decompression stub for upkr, which is designed for
; maximum compatibility: it relocates the compressed data so it can be
; decompressed starting at the normal .COM starting address. In other words,
; many of the already existing .COM files should be compressable using this
; stub.
;
; How to use:
; 1) Pack your intro using upkr into data.bin with the --x86 command line
; argument:
;
; $ upkr --x86 intro.com data.bin
;
; 2) Compile this .asm file using nasm (or any compatible assembler):
;
; $ nasm unpack_x86_16_DOS.asm -fbin -o intropck.com
;
; The packed size of the intro+stub is limited by max_len (see below) bytes.
;
; In specific cases, the unpacker stub can be further optimized to save a byte
; or two:
; 1) You can remove CLC before RET, if you don't mind carry being set upon
; program entry
; 2) You can also move PUSHA before PUSH SI and put POPA as the first
; operation of the compressed code.
max_len equ 16384
prog_start equ (0x100+max_len+510+relocation-upkr_unpack)
probs equ (((prog_start+max_len+510)+255)/256)*256
org 0x100
; This is will be loaded at 0x100, but relocates the code and data to prog_start
relocation:
push si ; si = 0x100 at DOS start, so save it for later ret
pusha ; pusha to recall all registers before starting intro
push si ; for pop di to start writing the output
mov di, prog_start ; the depacker & data are relocated from 0x100 to prog_start
mov ch, max_len/512
rep movsw
jmp si ; jump to relocated upkr_unpack
; upkr_unpack unpacks the code to 0x100 and runs it when done.
upkr_unpack:
xchg ax, bp ; position in input bitstream (bp) = 0
cwd ; upkr_state (dx) = 0;
xchg ax, cx ; cx = 0x9XX
mov al, 128 ; for(int i = 0; i < sizeof(upkr_probs); ++i) upkr_probs[i] = 128;
rep stosb
pop di ; u8* write_ptr = (u8*)destination;
.mainloop:
mov bx, probs
call upkr_decode_bit
jc .else ; if(upkr_decode_bit(0)) {
mov bh, (probs+256)/256
jcxz .skip_call
call upkr_decode_bit
jc .skipoffset
.skip_call:
stc
call upkr_decode_number ; offset = upkr_decode_length(258) - 1;
loop .notdone ; if(offset == 0)
popa
clc
ret
.notdone:
mov si, di
.sub:
dec si
loop .sub
.skipoffset:
mov bl, 128 ; int length = upkr_decode_length(384);
call upkr_decode_number
rep movsb ; *write_ptr = write_ptr[-offset];
jmp .mainloop
.byteloop:
call upkr_decode_bit ; int bit = upkr_decode_bit(byte);
.else:
adc bl, bl ; byte = (byte << 1) + bit;
jnc .byteloop
xchg ax, bx
stosb
inc si
mov cl, 1
jmp .mainloop ; prev_was_match = 0;
; upkr_decode_bit decodes one bit from the rANS entropy encoded bit stream.
; parameters:
; bx = memory address of the context probability
; dx = decoder state
; bp = bit position in input stream
; returns:
; dx = new decoder state
; bp = new bit position in input stream
; carry = bit
; trashes ax
upkr_load_bit:
bt [compressed_data-relocation+prog_start], bp
inc bp
adc dx, dx
upkr_decode_bit:
inc dx ; inc dx, dec dx is used to test the top (sign) bit of dx
dec dx
jns upkr_load_bit
movzx ax, byte [bx] ; u16 prob = upkr_probs[context_index]
neg byte [bx]
push ax ; save prob, tmp = prob
cmp dl, al ; int bit = (upkr_state & 255) < prob ? 1 : 0; (carry = bit)
pushf ; save bit flags
jc .bit ; (skip if bit)
xchg [bx], al ; tmp = 256 - tmp;
.bit:
shr byte [bx], 4 ; upkr_probs[context_index] = tmp + (256 - tmp + 8) >> 4;
adc [bx], al
mul dh ; upkr_state = tmp * (upkr_state >> 8) + (upkr_state & 255);
mov dh, 0
add dx, ax
popf
pop ax
jc .bit2 ; (skip if bit)
neg byte [bx] ; tmp = 256 - tmp;
sub dx, ax ; upkr_state -= prob; note that this will also leave carry always unset, which is what we want
.bit2:
ret ; return the bit in carry
; upkr_decode_number loads a variable length encoded number (up to 16 bits) from
; the compressed stream. Only numbers 1..65535 can be encoded. If the encoded
; number has 4 bits and is 1ABC, it is encoded using a kind of an "interleaved
; elias code": 0A0B0C1. The 1 in the end implies that no more bits are coming.
; parameters:
; cx = must be 0
; bx = memory address of the context probability
; dx = decoder state
; bp = bit position in input stream
; carry = must be 1
; returns:
; cx = length
; dx = new decoder state
; bp = new bit position in input stream
; carry = 1
; trashes bl, ax
upkr_decode_number_loop:
inc bx
call upkr_decode_bit
upkr_decode_number:
rcr cx, 1
inc bx
call upkr_decode_bit
jnc upkr_decode_number_loop ; 0 = there's more bits coming, 1 = no more bits
.loop2:
rcr cx, 1
jnc .loop2
ret
compressed_data:
incbin "data.bin"

View File

@@ -1,151 +0,0 @@
; Contributions from pestis, TomCat and exoticorn
;
; This is the 16-bit DOS x86 decompression stub for upkr, which decompresses the
; code starting at address 0x3FFE (or whatever is defined by the entrypoint
; below). Thus, the packed code needs to be assembled with org 0x3FFE to work.
;
; How to use:
; 1) Put POPA as the first instruction of your compiled code and use org
; 0x3FFE
; 2) Pack your intro using upkr into data.bin with the --x86 command line
; argument:
;
; $ upkr --x86 intro.com data.bin
;
; 2) Compile this .asm file using nasm (or any compatible assembler) e.g.
;
; $ nasm unpack_x86_16_DOS_no_relocation.asm -fbin -o intropck.com
;
; In specific cases, the unpacker stub can be further optimized to save a byte
; or two:
; 1) If your stub+compressed code is 2k or smaller, you can save 1 byte by
; putting probs at 0x900 and initializing DI with SALC; XCHG AX, DI instead
; of MOV DI, probs
; 2) If you remove the PUSHA (and POPA in the compressed code), then you can
; assume the registers as follows: AX = 0x00XX, BX = probs + 0x1XX, CX = 0
; DX = (trash), SI = DI = right after your program, SP = as it was when the
; program started, flags = carry set
;
; Note that even with the PUSHA / POPA, carry will be set (!) unlike normal dos
; program.
entry equ 0x3FFE
probs equ entry - 0x1FE ; must be aligned to 256
org 0x100
; This is will be loaded at 0x100, but relocates the code and data to prog_start
upkr_unpack:
pusha
xchg ax, bp ; position in bitstream = 0
cwd ; upkr_state = 0;
mov di, probs
mov ax, 0x8080 ; for(int i = 0; i < sizeof(upkr_probs); ++i) upkr_probs[i] = 128;
rep stosw
push di
.mainloop:
mov bx, probs
call upkr_decode_bit
jc .else ; if(upkr_decode_bit(0)) {
mov bh, (probs+256)/256
jcxz .skip_call ; if(prev_was_match || upkr_decode_bit(257)) {
call upkr_decode_bit
jc .skipoffset
.skip_call:
stc
call upkr_decode_number ; offset = upkr_decode_number(258) - 1;
mov si, di
loop .sub ; if(offset == 0)
ret
.sub:
dec si
loop .sub
.skipoffset:
mov bl, 128 ; int length = upkr_decode_number(384);
call upkr_decode_number
rep movsb ; *write_ptr = write_ptr[-offset];
jmp .mainloop
.byteloop:
call upkr_decode_bit ; int bit = upkr_decode_bit(byte);
.else:
adc bl, bl ; byte = (byte << 1) + bit;
jnc .byteloop
xchg ax, bx
stosb
inc si
mov cl, 1
jmp .mainloop ; prev_was_match = 0;
; upkr_decode_bit decodes one bit from the rANS entropy encoded bit stream.
; parameters:
; bx = memory address of the context probability
; dx = decoder state
; bp = bit position in input stream
; returns:
; dx = new decoder state
; bp = new bit position in input stream
; carry = bit
; trashes ax
upkr_load_bit:
bt [compressed_data], bp
inc bp
adc dx, dx
upkr_decode_bit:
inc dx
dec dx ; inc dx, dec dx is used to test the top (sign) bit of dx
jns upkr_load_bit
movzx ax, byte [bx] ; u16 prob = upkr_probs[context_index]
neg byte [bx]
push ax ; save prob, tmp = prob
cmp dl, al ; int bit = (upkr_state & 255) < prob ? 1 : 0; (carry = bit)
pushf ; save bit flags
jc .bit ; (skip if bit)
xchg [bx], al ; tmp = 256 - tmp;
.bit:
shr byte [bx], 4 ; upkr_probs[context_index] = tmp + (256 - tmp + 8) >> 4;
adc [bx], al
mul dh ; upkr_state = tmp * (upkr_state >> 8) + (upkr_state & 255);
mov dh, 0
add dx, ax
popf
pop ax
jc .bit2 ; (skip if bit)
neg byte [bx] ; tmp = 256 - tmp;
sub dx, ax ; upkr_state -= prob; note that this will also leave carry always unset, which is what we want
.bit2:
ret ; flags = bit
; upkr_decode_number loads a variable length encoded number (up to 16 bits) from
; the compressed stream. Only numbers 1..65535 can be encoded. If the encoded
; number has 4 bits and is 1ABC, it is encoded using a kind of an "interleaved
; elias code": 0A0B0C1. The 1 in the end implies that no more bits are coming.
; parameters:
; cx = must be 0
; bx = memory address of the context probability
; dx = decoder state
; bp = bit position in input stream
; carry = must be 1
; returns:
; cx = length
; dx = new decoder state
; bp = new bit position in input stream
; carry = 1
; trashes bl, ax
upkr_decode_number_loop:
inc bx
call upkr_decode_bit
upkr_decode_number:
rcr cx, 1
inc bx
call upkr_decode_bit
jnc upkr_decode_number_loop ; while(upkr_decode_bit(context_index)) {
.loop2:
rcr cx, 1
jnc .loop2
ret
compressed_data:
incbin "data.bin"

View File

@@ -1,154 +0,0 @@
; Contributions from pestis, TomCat and exoticorn
;
; This is the 16-bit DOS x86 decompression stub for upkr, which is designed for
; the --no-repeated-offsets option of upkr. The decompression stub is slightly
; smaller, but the compressed data might be bigger, so you have to test if
; --no-repeated-offsets pays off in the end. This stub relocates the compressed
; data so it can be decompressed starting at the normal .COM starting address.
;
; How to use:
; 1) Pack your intro using upkr into data.bin with the --x86b command line
; argument: (notice the --x86b, not --x86!)
;
; $ upkr --x86b intro.com data.bin
;
; 2) Compile this .asm file using nasm (or any compatible assembler):
;
; $ nasm unpack_x86_16_DOS_no_repeated_offsets.asm -fbin -o intropck.com
;
; The packed size of the intro+stub is limited by max_len (see below) bytes.
;
; In specific cases, the unpacker stub can be further optimized to save a byte
; or two:
; 1) You can remove CLC before RET, if you don't mind carry being set upon
; program entry
; 2) You can also move PUSHA before PUSH SI and put POPA as the first
; operation of the compressed code.
max_len equ 16384
prog_start equ (0x100+max_len+510+relocation-upkr_unpack)
probs equ (((prog_start+max_len+510)+255)/256)*256
org 0x100
; This is will be loaded at 0x100, but relocates the code and data to prog_start
relocation:
push si ; si = 0x100 at DOS start, so save it for later ret
pusha ; pusha to recall all registers before starting intro
push si ; for pop di to start writing the output
mov di, prog_start ; the depacker & data are relocated from 0x100 to prog_start
mov ch, max_len/512
rep movsw
jmp si ; jump to relocated upkr_unpack
; upkr_unpack unpacks the code to 0x100 and runs it when done.
upkr_unpack:
xchg ax, bp ; position in bitstream = 0
cwd ; upkr_state = 0;
xchg cx, ax ; cx > 0x0200
mov al, 128 ; for(int i = 0; i < sizeof(upkr_probs); ++i) upkr_probs[i] = 128;
rep stosb
pop di ; u8* write_ptr = (u8*)destination;
.mainloop:
mov bx, probs
call upkr_decode_bit
jnc .else ; if(upkr_decode_bit(0)) {
inc bh
call upkr_decode_number ; offset = upkr_decode_number(258) - 1;
loop .notdone ; if(offset == 0)
popa
clc
ret
.notdone:
mov si, di
.sub:
dec si
loop .sub
mov bl, 128 ; int length = upkr_decode_number(384);
call upkr_decode_number
rep movsb ; *write_ptr = write_ptr[-offset];
jmp .mainloop
.else:
inc bx
.byteloop:
call upkr_decode_bit ; int bit = upkr_decode_bit(byte);
adc bl, bl ; byte = (byte << 1) + bit;
jnc .byteloop
xchg ax, bx
stosb
jmp .mainloop ; prev_was_match = 0;
; upkr_decode_bit decodes one bit from the rANS entropy encoded bit stream.
; parameters:
; bx = memory address of the context probability
; dx = decoder state
; bp = bit position in input stream
; returns:
; dx = new decoder state
; bp = new bit position in input stream
; carry = bit
; trashes ax
upkr_load_bit:
bt [compressed_data-relocation+prog_start], bp
inc bp
adc dx, dx
upkr_decode_bit:
inc dx
dec dx ; or whatever other test for the top bit there is
jns upkr_load_bit
movzx ax, byte [bx] ; u16 prob = upkr_probs[context_index]
neg byte [bx]
push ax ; save prob, tmp = prob
cmp dl, al ; int bit = (upkr_state & 255) < prob ? 1 : 0; (carry = bit)
pushf ; save bit flags
jc .bit ; (skip if bit)
xchg [bx], al ; tmp = 256 - tmp;
.bit:
shr byte [bx], 4 ; upkr_probs[context_index] = tmp + (256 - tmp + 8) >> 4;
adc [bx], al ; upkr_probs[context_index] = tmp;
mul dh ; upkr_state = tmp * (upkr_state >> 8) + (upkr_state & 255);
mov dh, 0
add dx, ax
popf
pop ax
jc .bit2 ; (skip if bit)
neg byte [bx] ; tmp = 256 - tmp;
sub dx, ax ; upkr_state -= prob; note that this will also leave carry always unset, which is what we want
.bit2:
ret ; flags = bit
; upkr_decode_number loads a variable length encoded number (up to 16 bits) from
; the compressed stream. Only numbers 1..65535 can be encoded. If the encoded
; number has 4 bits and is 1ABC, it is encoded using a kind of an "interleaved
; elias code": 0A0B0C1. The 1 in the end implies that no more bits are coming.
; parameters:
; cx = must be 0
; bx = memory address of the context probability
; dx = decoder state
; bp = bit position in input stream
; carry = must be 1
; returns:
; cx = length
; dx = new decoder state
; bp = new bit position in input stream
; carry = 1
; trashes bl, ax
upkr_decode_number_loop:
inc bx
call upkr_decode_bit
upkr_decode_number:
rcr cx, 1
inc bx
call upkr_decode_bit
jnc upkr_decode_number_loop ; 0 = there's more bits coming, 1 = no more bits
.loop2:
rcr cx, 1
jnc .loop2
ret
compressed_data:
incbin "data.bin"

3
fuzz/.gitignore vendored
View File

@@ -1,3 +0,0 @@
target
corpus
artifacts

247
fuzz/Cargo.lock generated
View File

@@ -1,247 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "anyhow"
version = "1.0.65"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98161a4e3e2184da77bb14f02184cdd111e83bbbcc9979dfee3c44b9a85f5602"
[[package]]
name = "arbitrary"
version = "1.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f44124848854b941eafdb34f05b3bcf59472f643c7e151eba7c2b69daa469ed5"
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "cc"
version = "1.0.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
dependencies = [
"jobserver",
]
[[package]]
name = "cdivsufsort"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edefce019197609da416762da75bb000bbd2224b2d89a7e722c2296cbff79b8c"
dependencies = [
"cc",
"sacabase",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "crossbeam-channel"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]]
name = "jobserver"
version = "0.1.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "068b1ee6743e4d11fb9c6a1e6064b3693a1b600e7f5f5988047d98b3dc9fb90b"
dependencies = [
"libc",
]
[[package]]
name = "lexopt"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "478ee9e62aaeaf5b140bd4138753d1f109765488581444218d3ddda43234f3e8"
[[package]]
name = "libc"
version = "0.2.133"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0f80d65747a3e43d1596c7c5492d95d5edddaabd45a7fcdb02b95f644164966"
[[package]]
name = "libfuzzer-sys"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae185684fe19814afd066da15a7cc41e126886c21282934225d9fc847582da58"
dependencies = [
"arbitrary",
"cc",
"once_cell",
]
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1"
[[package]]
name = "pbr"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff5751d87f7c00ae6403eb1fcbba229b9c76c9a30de8c1cf87182177b168cea2"
dependencies = [
"crossbeam-channel",
"libc",
"time",
"winapi",
]
[[package]]
name = "proc-macro2"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7bd7356a8122b6c4a24a82b278680c73357984ca2fc79a0f9fa6dea7dced7c58"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
dependencies = [
"proc-macro2",
]
[[package]]
name = "sacabase"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9883fc3d6ce3d78bb54d908602f8bc1f7b5f983afe601dabe083009d86267a84"
dependencies = [
"num-traits",
]
[[package]]
name = "syn"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a99cb8c4b9a8ef0e7907cd3b617cc8dc04d571c4e73c8ae403d80ac160bb122"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a891860d3c8d66fec8e73ddb3765f90082374dbaaa833407b904a94f1a7eb43"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "time"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
dependencies = [
"libc",
"wasi",
"winapi",
]
[[package]]
name = "unicode-ident"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
[[package]]
name = "upkr"
version = "0.2.0-pre3"
dependencies = [
"anyhow",
"cdivsufsort",
"lexopt",
"pbr",
"thiserror",
]
[[package]]
name = "upkr-fuzz"
version = "0.0.0"
dependencies = [
"libfuzzer-sys",
"upkr",
]
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@@ -1,31 +0,0 @@
[package]
name = "upkr-fuzz"
version = "0.0.0"
authors = ["Automatically generated"]
publish = false
edition = "2018"
[package.metadata]
cargo-fuzz = true
[dependencies]
libfuzzer-sys = "0.4"
[dependencies.upkr]
path = ".."
# Prevent this from interfering with workspaces
[workspace]
members = ["."]
[[bin]]
name = "all_configs"
path = "fuzz_targets/all_configs.rs"
test = false
doc = false
[[bin]]
name = "unpack"
path = "fuzz_targets/unpack.rs"
test = false
doc = false

View File

@@ -1,29 +0,0 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
fuzz_target!(|data: &[u8]| {
let mut config = upkr::Config::default();
let mut level = 1;
let mut data = data;
if data.len() > 2 {
let flags1 = data[0];
let flags2 = data[1];
data = &data[2..];
config.use_bitstream = (flags1 & 1) != 0;
config.parity_contexts = if (flags1 & 2) == 0 { 1 } else { 2 };
config.invert_bit_encoding = (flags1 & 4) != 0;
config.is_match_bit = (flags1 & 8) != 0;
config.new_offset_bit = (flags1 & 16) != 0;
config.continue_value_bit = (flags1 & 32) != 0;
config.bitstream_is_big_endian = (flags1 & 64) != 0;
config.simplified_prob_update = (flags1 & 128) != 0;
config.no_repeated_offsets = (flags2 & 32) != 0;
config.eof_in_length = (flags2 & 1) != 0;
config.max_offset = if (flags2 & 2) == 0 { usize::MAX } else { 32 };
config.max_length = if (flags2 & 4) == 0 { usize::MAX } else { 5 };
level = (flags2 >> 3) & 3;
}
let packed = upkr::pack(data, level, &config, None);
let unpacked = upkr::unpack(&packed, &config, 1024 * 1024).unwrap();
assert!(unpacked == data);
});

View File

@@ -1,6 +0,0 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
fuzz_target!(|data: &[u8]| {
let _ = upkr::unpack(data, &upkr::Config::default(), 64 * 1024);
});

5
release/.gitignore vendored
View File

@@ -1,5 +0,0 @@
*.zip
*.tgz
upkr-linux/
upkr-windows/
upkr-windows-32/

View File

@@ -1,51 +0,0 @@
VERSION := $(shell cargo run --release -- --version)
all: clean upkr-linux-$(VERSION).tgz upkr-windows-$(VERSION).zip upkr-windows-32-$(VERSION).zip
clean:
rm -rf upkr-linux
rm -f upkr-linux*.tgz
rm -rf upkr-windows
rm -rf upkr-windows-32
rm -f upkr-windows*.zip
upkr-linux-$(VERSION).tgz: upkr-linux/upkr PHONY
cp ../README.md upkr-linux
cd .. && git archive HEAD c_unpacker | tar -xC release/upkr-linux
cd .. && git archive HEAD z80_unpacker | tar -xC release/upkr-linux
cd .. && git archive HEAD asm_unpackers | tar -xC release/upkr-linux
tar czf $@ upkr-linux
upkr-windows-$(VERSION).zip: upkr-windows/upkr.exe PHONY
cp ../README.md upkr-windows/
cd .. && git archive HEAD c_unpacker | tar -xC release/upkr-windows
cd .. && git archive HEAD z80_unpacker | tar -xC release/upkr-windows
cd .. && git archive HEAD asm_unpackers | tar -xC release/upkr-windows
zip -r -9 $@ upkr-windows
upkr-windows-32-$(VERSION).zip: upkr-windows-32/upkr.exe PHONY
cp ../README.md upkr-windows-32/
cd .. && git archive HEAD c_unpacker | tar -xC release/upkr-windows-32
cd .. && git archive HEAD z80_unpacker | tar -xC release/upkr-windows-32
cd .. && git archive HEAD asm_unpackers | tar -xC release/upkr-windows-32
zip -r -9 $@ upkr-windows-32
upkr-linux/upkr:
cargo build --target x86_64-unknown-linux-musl --release -F terminal
mkdir -p upkr-linux
cp ../target/x86_64-unknown-linux-musl/release/upkr upkr-linux/
strip upkr-linux/upkr
upkr-windows/upkr.exe:
cargo build --target x86_64-pc-windows-gnu --release -F terminal
mkdir -p upkr-windows
cp ../target/x86_64-pc-windows-gnu/release/upkr.exe upkr-windows/
x86_64-w64-mingw32-strip upkr-windows/upkr.exe
upkr-windows-32/upkr.exe:
cargo build --target i686-pc-windows-gnu --release -F terminal
mkdir -p upkr-windows-32
cp ../target/i686-pc-windows-gnu/release/upkr.exe upkr-windows-32/
i686-w64-mingw32-strip upkr-windows-32/upkr.exe
PHONY:

View File

@@ -1,17 +1,12 @@
use crate::{ use crate::rans::{ONE_PROB, PROB_BITS};
rans::{ONE_PROB, PROB_BITS},
Config,
};
const INIT_PROB: u16 = 1 << (PROB_BITS - 1); const INIT_PROB: u16 = 1 << (PROB_BITS - 1);
const UPDATE_RATE: u32 = 4; const UPDATE_RATE: i32 = 4;
const UPDATE_ADD: u32 = 8; const UPDATE_ADD: i32 = 8;
#[derive(Clone)] #[derive(Clone)]
pub struct ContextState { pub struct ContextState {
contexts: Vec<u8>, contexts: Vec<u8>,
invert_bit_encoding: bool,
simplified_prob_update: bool,
} }
pub struct Context<'a> { pub struct Context<'a> {
@@ -20,11 +15,9 @@ pub struct Context<'a> {
} }
impl ContextState { impl ContextState {
pub fn new(size: usize, config: &Config) -> ContextState { pub fn new(size: usize) -> ContextState {
ContextState { ContextState {
contexts: vec![INIT_PROB as u8; size], contexts: vec![INIT_PROB as u8; size],
invert_bit_encoding: config.invert_bit_encoding,
simplified_prob_update: config.simplified_prob_update,
} }
} }
@@ -40,21 +33,12 @@ impl<'a> Context<'a> {
pub fn update(&mut self, bit: bool) { pub fn update(&mut self, bit: bool) {
let old = self.state.contexts[self.index]; let old = self.state.contexts[self.index];
if bit {
self.state.contexts[self.index] = if self.state.simplified_prob_update { self.state.contexts[self.index] =
let offset = if bit ^ self.state.invert_bit_encoding { old - ((old as i32 + UPDATE_ADD) >> UPDATE_RATE) as u8;
ONE_PROB as i32 >> UPDATE_RATE
} else { } else {
0 self.state.contexts[self.index] =
}; old + (((ONE_PROB as i32 - old as i32) + UPDATE_ADD) >> UPDATE_RATE) as u8;
}
(offset + old as i32 - ((old as i32 + UPDATE_ADD as i32) >> UPDATE_RATE)) as u8
} else {
if bit ^ self.state.invert_bit_encoding {
old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
} else {
old - ((old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
}
};
} }
} }

View File

@@ -1,16 +1,16 @@
use crate::lz;
use crate::match_finder::MatchFinder; use crate::match_finder::MatchFinder;
use crate::rans::RansCoder; use crate::rans::RansCoder;
use crate::ProgressCallback; use crate::ProgressCallback;
use crate::{lz, Config};
pub fn pack( pub fn pack(
data: &[u8], data: &[u8],
config: &Config, use_bitstream: bool,
mut progress_callback: Option<ProgressCallback>, mut progress_callback: Option<ProgressCallback>,
) -> Vec<u8> { ) -> Vec<u8> {
let mut match_finder = MatchFinder::new(data); let mut match_finder = MatchFinder::new(data);
let mut rans_coder = RansCoder::new(config); let mut rans_coder = RansCoder::new(use_bitstream);
let mut state = lz::CoderState::new(config); let mut state = lz::CoderState::new();
let mut pos = 0; let mut pos = 0;
while pos < data.len() { while pos < data.len() {
@@ -19,16 +19,15 @@ pub fn pack(
} }
let mut encoded_match = false; let mut encoded_match = false;
if let Some(m) = match_finder.matches(pos).next() { if let Some(m) = match_finder.matches(pos).next() {
let max_offset = config.max_offset.min(1 << (m.length * 3 - 1).min(31)); let max_offset = 1 << (m.length * 3 - 1).min(31);
let offset = pos - m.pos; let offset = pos - m.pos;
if offset < max_offset && m.length >= config.min_length() { if offset < max_offset {
let length = m.length.min(config.max_length);
lz::Op::Match { lz::Op::Match {
offset: offset as u32, offset: offset as u32,
len: length as u32, len: m.length as u32,
} }
.encode(&mut rans_coder, &mut state, config); .encode(&mut rans_coder, &mut state);
pos += length; pos += m.length;
encoded_match = true; encoded_match = true;
} }
} }
@@ -40,14 +39,13 @@ pub fn pack(
.iter() .iter()
.zip(data[(pos - offset)..].iter()) .zip(data[(pos - offset)..].iter())
.take_while(|(a, b)| a == b) .take_while(|(a, b)| a == b)
.count() .count();
.min(config.max_length); if length > 0 {
if length >= config.min_length() {
lz::Op::Match { lz::Op::Match {
offset: offset as u32, offset: offset as u32,
len: length as u32, len: length as u32,
} }
.encode(&mut rans_coder, &mut state, config); .encode(&mut rans_coder, &mut state);
pos += length; pos += length;
encoded_match = true; encoded_match = true;
} }
@@ -55,11 +53,11 @@ pub fn pack(
} }
if !encoded_match { if !encoded_match {
lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state, config); lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state);
pos += 1; pos += 1;
} }
} }
lz::encode_eof(&mut rans_coder, &mut state, config); lz::encode_eof(&mut rans_coder, &mut state);
rans_coder.finish() rans_coder.finish()
} }

View File

@@ -1,208 +0,0 @@
/// Heatmap information about a compressed block of data.
///
/// For each byte in the uncompressed data, the heatmap provides two pieces of intormation:
/// 1. whether this byte was encoded as a literal or as part of a match
/// 2. how many (fractional) bits where spend on encoding this byte
///
/// For the sake of the heatmap, the cost of literals are spread out across all matches
/// that reference the literal.
///
/// If the `terminal` feature is enabled, there is a function to write out the
/// heatmap as a colored hexdump.
pub struct Heatmap {
data: Vec<u8>,
cost: Vec<f32>,
raw_cost: Vec<f32>,
literal_index: Vec<usize>,
}
impl Heatmap {
pub(crate) fn new() -> Heatmap {
Heatmap {
data: Vec::new(),
cost: Vec::new(),
raw_cost: Vec::new(),
literal_index: Vec::new(),
}
}
pub(crate) fn add_literal(&mut self, byte: u8, cost: f32) {
self.data.push(byte);
self.cost.push(cost);
self.literal_index.push(self.literal_index.len());
}
pub(crate) fn add_match(&mut self, offset: usize, length: usize, mut cost: f32) {
cost /= length as f32;
for _ in 0..length {
self.data.push(self.data[self.data.len() - offset]);
self.literal_index
.push(self.literal_index[self.literal_index.len() - offset]);
self.cost.push(cost);
}
}
pub(crate) fn finish(&mut self) {
self.raw_cost = self.cost.clone();
let mut ref_count = vec![0usize; self.literal_index.len()];
for &index in &self.literal_index {
ref_count[index] += 1;
}
let mut shifted = vec![];
for (&index, &cost) in self.literal_index.iter().zip(self.cost.iter()) {
let delta = (self.cost[index] - cost) / ref_count[index] as f32;
shifted.push(delta);
shifted[index] -= delta;
}
for (cost, delta) in self.cost.iter_mut().zip(shifted.into_iter()) {
*cost += delta;
}
}
/// Reverses the heatmap
pub fn reverse(&mut self) {
self.data.reverse();
self.cost.reverse();
self.literal_index.reverse();
for index in self.literal_index.iter_mut() {
*index = self.data.len() - *index;
}
}
/// The number of (uncompressed) bytes of data in this heatmap
pub fn len(&self) -> usize {
self.cost.len()
}
/// Returns whether the byte at `index` was encoded as a literal
pub fn is_literal(&self, index: usize) -> bool {
self.literal_index[index] == index
}
/// Returns the cost of encoding the byte at `index` in (fractional) bits.
/// The cost of literal bytes is spread across the matches that reference it.
/// See `raw_cost` for the raw encoding cost of each byte.
pub fn cost(&self, index: usize) -> f32 {
self.cost[index]
}
/// Returns the raw cost of encoding the byte at `index` in (fractional) bits
pub fn raw_cost(&self, index: usize) -> f32 {
self.raw_cost[index]
}
/// Returns the uncompressed data byte at `index`
pub fn byte(&self, index: usize) -> u8 {
self.data[index]
}
#[cfg(feature = "crossterm")]
/// Print the heatmap as a colored hexdump
pub fn print_as_hex(&self) -> std::io::Result<()> {
self.print_as_hex_internal(false)
}
#[cfg(feature = "crossterm")]
/// Print the heatmap as a colored hexdump, based on `raw_cost`.
pub fn print_as_hex_raw_cost(&self) -> std::io::Result<()> {
self.print_as_hex_internal(true)
}
#[cfg(feature = "crossterm")]
fn print_as_hex_internal(&self, report_raw_cost: bool) -> std::io::Result<()> {
use crossterm::{
style::{Attribute, Color, Print, SetAttribute, SetBackgroundColor},
QueueableCommand,
};
use std::io::{stdout, Write};
fn set_color(
mut out: impl QueueableCommand,
heatmap: &Heatmap,
index: usize,
num_colors: u16,
report_raw_cost: bool,
) -> std::io::Result<()> {
let cost = if report_raw_cost {
heatmap.raw_cost(index)
} else {
heatmap.cost(index)
};
if num_colors < 256 {
let colors = [
Color::Red,
Color::Yellow,
Color::Green,
Color::Cyan,
Color::Blue,
Color::DarkBlue,
Color::Black,
];
let color_index = (3. - cost.log2())
.round()
.max(0.)
.min((colors.len() - 1) as f32) as usize;
out.queue(SetBackgroundColor(colors[color_index]))?;
} else {
let colors = [
196, 166, 136, 106, 76, 46, 41, 36, 31, 26, 21, 20, 19, 18, 17, 16,
];
let color_index = ((3. - cost.log2()) * 2.5)
.round()
.max(0.)
.min((colors.len() - 1) as f32) as usize;
out.queue(SetBackgroundColor(Color::AnsiValue(colors[color_index])))?;
}
out.queue(SetAttribute(if heatmap.is_literal(index) {
Attribute::Underlined
} else {
Attribute::NoUnderline
}))?;
Ok(())
}
let num_colors = crossterm::style::available_color_count();
let term_width = crossterm::terminal::size()?.0.min(120) as usize;
let bytes_per_row = (term_width - 8) / 4;
for row_start in (0..self.data.len()).step_by(bytes_per_row) {
let row_range = row_start..self.data.len().min(row_start + bytes_per_row);
let mut stdout = stdout();
stdout.queue(Print(&format!("{:04x} ", row_start)))?;
for i in row_range.clone() {
set_color(&mut stdout, self, i, num_colors, report_raw_cost)?;
stdout.queue(Print(&format!("{:02x} ", self.data[i])))?;
}
let num_spaces = 1 + (bytes_per_row - (row_range.end - row_range.start)) * 3;
let gap: String = std::iter::repeat(' ').take(num_spaces).collect();
stdout
.queue(SetAttribute(Attribute::Reset))?
.queue(Print(&gap))?;
for i in row_range.clone() {
set_color(&mut stdout, self, i, num_colors, report_raw_cost)?;
let byte = self.data[i];
if byte >= 32 && byte < 127 {
stdout.queue(Print(format!("{}", byte as char)))?;
} else {
stdout.queue(Print("."))?;
}
}
stdout
.queue(SetAttribute(Attribute::Reset))?
.queue(Print("\n"))?;
stdout.flush()?;
}
Ok(())
}
}

View File

@@ -1,142 +1,27 @@
#![deny(missing_docs)]
//! Compression and decompression of the upkr format and variants.
//!
//! Upkr is a compression format initially designed for the MicroW8 fantasy console,
//! with design goals being a competitive compression ratio, reasonable fast
//! decompression, low memory overhead and very small decompression code
//! when handoptimized in assembler. (An optimized DOS execuable decompressor is <140 bytes.)
mod context_state; mod context_state;
mod greedy_packer; mod greedy_packer;
mod heatmap;
mod lz; mod lz;
mod match_finder; mod match_finder;
mod parsing_packer; mod parsing_packer;
mod rans; mod rans;
pub use heatmap::Heatmap; pub use lz::unpack;
pub use lz::{calculate_margin, create_heatmap, unpack, UnpackError};
/// The type of a callback function to be given to the `pack` function.
///
/// It will be periodically called with the number of bytes of the input already processed.
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize); pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
/// A configuration of which compression format variation to use.
///
/// Use `Config::default()` for the standard upkr format.
///
/// Compression format variants exist to help with micro-optimizations in uncompression
/// code on specific platforms.
#[derive(Debug)]
pub struct Config {
/// Shift in bits from a bitstream into the rANS state, rather than whole bytes.
/// This decreases the size of the rNAS state to 16 bits which is very useful on
/// 8 bit platforms.
pub use_bitstream: bool,
/// The number of parity contexts (usually 1, 2 or 4). This can improve compression
/// on data that consists of regular groups of 2 or 4 bytes. One example is 32bit ARM
/// code, where each instruction is 4 bytes, so `parity_contexts = 4` improves compression
/// quite a bit. Defaults to `1`.
pub parity_contexts: usize,
/// Invert the encoding of bits in the rANS coder. `bit = state_lo >= prob` instead of
/// `bit = state_lo < prob`.
pub invert_bit_encoding: bool,
/// The boolean value which encodes a match. Defaults to `true`.
pub is_match_bit: bool,
/// The boolean value which encodes a new offset (rather than re-using the previous offset).
/// Defaults to `true`.
pub new_offset_bit: bool,
/// The boolean value which encodes that there are more bits comming for length/offset values.
/// Defaults to `true`.
pub continue_value_bit: bool,
/// Reverses the bits in the bitstream.
pub bitstream_is_big_endian: bool,
/// A slightly less accurate, but slightly simpler variation of the prob update in the
/// rANS coder, Used for the z80 uncompressor.
pub simplified_prob_update: bool,
/// Disables support for re-using the last offset in the compression format.
/// This might save a few bytes when working with very small data.
pub no_repeated_offsets: bool,
/// Standard upkr encodes the EOF marker in the offset. This encodes it in the match length
/// instead.
pub eof_in_length: bool,
/// The maximum match offset value to encode when compressing.
pub max_offset: usize,
/// The maximum match length value to encode when compressing.
pub max_length: usize,
}
impl Default for Config {
fn default() -> Config {
Config {
use_bitstream: false,
parity_contexts: 1,
invert_bit_encoding: false,
is_match_bit: true,
new_offset_bit: true,
continue_value_bit: true,
bitstream_is_big_endian: false,
simplified_prob_update: false,
no_repeated_offsets: false,
eof_in_length: false,
max_offset: usize::MAX,
max_length: usize::MAX,
}
}
}
impl Config {
fn min_length(&self) -> usize {
if self.eof_in_length {
2
} else {
1
}
}
}
/// Compresses the given data.
///
/// # Arguments
/// - `data`: The data to compress
/// - `level`: The compression level (0-9). Increasing the level by one roughly halves the
/// compression speed.
/// - `config`: The compression format variant to use.
/// - `progress_callback`: An optional callback which will periodically be called with
/// the number of bytes already processed.
///
/// # Example
/// ```rust
/// let compressed_data = upkr::pack(b"Hello, World! Yellow world!", 0, &upkr::Config::default(), None);
/// assert!(compressed_data.len() < 27);
/// ```
pub fn pack( pub fn pack(
data: &[u8], data: &[u8],
level: u8, level: u8,
config: &Config, use_bitstream: bool,
progress_callback: Option<ProgressCallback>, progress_callback: Option<ProgressCallback>,
) -> Vec<u8> { ) -> Vec<u8> {
if level == 0 { if level == 0 {
greedy_packer::pack(data, config, progress_callback) greedy_packer::pack(data, use_bitstream, progress_callback)
} else { } else {
parsing_packer::pack(data, level, config, progress_callback) parsing_packer::pack(data, level, use_bitstream, progress_callback)
} }
} }
/// Estimate the exact (fractional) size of upkr compressed data.
///
/// Note that this currently does NOT work for the bitstream variant.
pub fn compressed_size(mut data: &[u8]) -> f32 { pub fn compressed_size(mut data: &[u8]) -> f32 {
let mut state = 0; let mut state = 0;
while state < 4096 { while state < 4096 {

253
src/lz.rs
View File

@@ -1,8 +1,5 @@
use crate::context_state::ContextState; use crate::context_state::ContextState;
use crate::heatmap::Heatmap;
use crate::rans::{EntropyCoder, RansDecoder}; use crate::rans::{EntropyCoder, RansDecoder};
use crate::Config;
use thiserror::Error;
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
pub enum Op { pub enum Op {
@@ -11,73 +8,42 @@ pub enum Op {
} }
impl Op { impl Op {
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) { pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState) {
let literal_base = state.pos % state.parity_contexts * 256;
match self { match self {
&Op::Literal(lit) => { &Op::Literal(lit) => {
encode_bit(coder, state, literal_base, !config.is_match_bit); encode_bit(coder, state, 0, false);
let mut context_index = 1; let mut context_index = 1;
for i in (0..8).rev() { for i in (0..8).rev() {
let bit = (lit >> i) & 1 != 0; let bit = (lit >> i) & 1 != 0;
encode_bit(coder, state, literal_base + context_index, bit); encode_bit(coder, state, context_index, bit);
context_index = (context_index << 1) | bit as usize; context_index = (context_index << 1) | bit as usize;
} }
state.prev_was_match = false; state.prev_was_match = false;
state.pos += 1;
} }
&Op::Match { offset, len } => { &Op::Match { offset, len } => {
encode_bit(coder, state, literal_base, config.is_match_bit); encode_bit(coder, state, 0, true);
let mut new_offset = true; if !state.prev_was_match {
if !state.prev_was_match && !config.no_repeated_offsets { encode_bit(coder, state, 256, offset != state.last_offset);
new_offset = offset != state.last_offset; } else {
encode_bit( assert!(offset != state.last_offset);
coder,
state,
256 * state.parity_contexts,
new_offset == config.new_offset_bit,
);
} }
assert!(offset as usize <= config.max_offset); if offset != state.last_offset {
if new_offset { encode_length(coder, state, 257, offset + 1);
encode_length(
coder,
state,
256 * state.parity_contexts + 1,
offset + if config.eof_in_length { 0 } else { 1 },
config,
);
state.last_offset = offset; state.last_offset = offset;
} }
assert!(len as usize >= config.min_length() && len as usize <= config.max_length); encode_length(coder, state, 257 + 64, len);
encode_length(coder, state, 256 * state.parity_contexts + 65, len, config);
state.prev_was_match = true; state.prev_was_match = true;
state.pos += len as usize;
} }
} }
} }
} }
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) { pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState) {
encode_bit( encode_bit(coder, state, 0, true);
coder, if !state.prev_was_match {
state, encode_bit(coder, state, 256, true);
state.pos % state.parity_contexts * 256,
config.is_match_bit,
);
if !state.prev_was_match && !config.no_repeated_offsets {
encode_bit(
coder,
state,
256 * state.parity_contexts,
config.new_offset_bit ^ config.eof_in_length,
);
}
if !config.eof_in_length || state.prev_was_match || config.no_repeated_offsets {
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
}
if config.eof_in_length {
encode_length(coder, state, 256 * state.parity_contexts + 65, 1, config);
} }
encode_length(coder, state, 257, 1);
} }
fn encode_bit( fn encode_bit(
@@ -94,18 +60,17 @@ fn encode_length(
state: &mut CoderState, state: &mut CoderState,
context_start: usize, context_start: usize,
mut value: u32, mut value: u32,
config: &Config,
) { ) {
assert!(value >= 1); assert!(value >= 1);
let mut context_index = context_start; let mut context_index = context_start;
while value >= 2 { while value >= 2 {
encode_bit(coder, state, context_index, config.continue_value_bit); encode_bit(coder, state, context_index, true);
encode_bit(coder, state, context_index + 1, value & 1 != 0); encode_bit(coder, state, context_index + 1, value & 1 != 0);
context_index += 2; context_index += 2;
value >>= 1; value >>= 1;
} }
encode_bit(coder, state, context_index, !config.continue_value_bit); encode_bit(coder, state, context_index, false);
} }
#[derive(Clone)] #[derive(Clone)]
@@ -113,18 +78,14 @@ pub struct CoderState {
contexts: ContextState, contexts: ContextState,
last_offset: u32, last_offset: u32,
prev_was_match: bool, prev_was_match: bool,
pos: usize,
parity_contexts: usize,
} }
impl CoderState { impl CoderState {
pub fn new(config: &Config) -> CoderState { pub fn new() -> CoderState {
CoderState { CoderState {
contexts: ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, config), contexts: ContextState::new(1 + 255 + 1 + 64 + 64),
last_offset: 0, last_offset: 0,
prev_was_match: false, prev_was_match: false,
pos: 0,
parity_contexts: config.parity_contexts,
} }
} }
@@ -133,201 +94,55 @@ impl CoderState {
} }
} }
/// The error type for the uncompressing related functions pub fn unpack(packed_data: &[u8], use_bitstream: bool) -> Vec<u8> {
#[derive(Error, Debug)] let mut decoder = RansDecoder::new(packed_data, use_bitstream);
pub enum UnpackError { let mut contexts = ContextState::new(1 + 255 + 1 + 64 + 64);
/// a match offset pointing beyond the start of the unpacked data was encountered
#[error("match offset out of range: {offset} > {position}")]
OffsetOutOfRange {
/// the match offset
offset: usize,
/// the current position in the uncompressed stream
position: usize,
},
/// The passed size limit was exceeded
#[error("Unpacked data over size limit: {size} > {limit}")]
OverSize {
/// the size of the uncompressed data
size: usize,
/// the size limit passed into the function
limit: usize,
},
/// The end of the packed data was reached without an encoded EOF marker
#[error("Unexpected end of input data")]
UnexpectedEOF {
#[from]
/// the underlying EOF error in the rANS decoder
source: crate::rans::UnexpectedEOF,
},
/// An offset or length value was found that exceeded 32bit
#[error("Overflow while reading value")]
ValueOverflow,
}
/// Uncompress a piece of compressed data
///
/// Returns either the uncompressed data, or an `UnpackError`
///
/// # Parameters
///
/// - `packed_data`: the compressed data
/// - `config`: the exact compression format config used to compress the data
/// - `max_size`: the maximum size of uncompressed data to return. When this is exceeded,
/// `UnpackError::OverSize` is returned
pub fn unpack(
packed_data: &[u8],
config: &Config,
max_size: usize,
) -> Result<Vec<u8>, UnpackError> {
let mut result = vec![]; let mut result = vec![];
let _ = unpack_internal(Some(&mut result), None, packed_data, config, max_size)?; let mut offset = 0;
Ok(result)
}
/// Calculates the minimum margin when overlapping buffers.
///
/// Returns the minimum margin needed between the end of the compressed data and the
/// end of the uncompressed data when overlapping the two buffers to save on RAM.
pub fn calculate_margin(packed_data: &[u8], config: &Config) -> Result<isize, UnpackError> {
unpack_internal(None, None, packed_data, config, usize::MAX)
}
/// Calculates a `Heatmap` from compressed data.
///
/// # Parameters
///
/// - `packed_data`: the compressed data
/// - `config`: the exact compression format config used to compress the data
/// - `max_size`: the maximum size of the heatmap to return. When this is exceeded,
/// `UnpackError::OverSize` is returned
pub fn create_heatmap(
packed_data: &[u8],
config: &Config,
max_size: usize,
) -> Result<Heatmap, UnpackError> {
let mut heatmap = Heatmap::new();
let _ = unpack_internal(None, Some(&mut heatmap), packed_data, config, max_size)?;
Ok(heatmap)
}
fn unpack_internal(
mut result: Option<&mut Vec<u8>>,
mut heatmap: Option<&mut Heatmap>,
packed_data: &[u8],
config: &Config,
max_size: usize,
) -> Result<isize, UnpackError> {
let mut decoder = RansDecoder::new(packed_data, &config)?;
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
let mut offset = usize::MAX;
let mut position = 0usize;
let mut prev_was_match = false; let mut prev_was_match = false;
let mut margin = 0isize;
fn decode_length( fn decode_length(
decoder: &mut RansDecoder, decoder: &mut RansDecoder,
contexts: &mut ContextState, contexts: &mut ContextState,
mut context_index: usize, mut context_index: usize,
config: &Config, ) -> usize {
) -> Result<usize, UnpackError> {
let mut length = 0; let mut length = 0;
let mut bit_pos = 0; let mut bit_pos = 0;
while decoder.decode_with_context(&mut contexts.context_mut(context_index))? while decoder.decode_with_context(&mut contexts.context_mut(context_index)) {
== config.continue_value_bit length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1))
{
length |= (decoder.decode_with_context(&mut contexts.context_mut(context_index + 1))?
as usize) as usize)
<< bit_pos; << bit_pos;
bit_pos += 1; bit_pos += 1;
if bit_pos >= 32 {
return Err(UnpackError::ValueOverflow);
}
context_index += 2; context_index += 2;
} }
Ok(length | (1 << bit_pos)) length | (1 << bit_pos)
} }
loop { loop {
let prev_decoder = decoder.clone(); if decoder.decode_with_context(&mut contexts.context_mut(0)) {
margin = margin.max(position as isize - decoder.pos() as isize); if prev_was_match || decoder.decode_with_context(&mut contexts.context_mut(256)) {
let literal_base = position % config.parity_contexts * 256; offset = decode_length(&mut decoder, &mut contexts, 257) - 1;
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))?
== config.is_match_bit
{
if config.no_repeated_offsets
|| prev_was_match
|| decoder
.decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))?
== config.new_offset_bit
{
offset = decode_length(
&mut decoder,
&mut contexts,
256 * config.parity_contexts + 1,
&config,
)? - if config.eof_in_length { 0 } else { 1 };
if offset == 0 { if offset == 0 {
break; break;
} }
} }
let length = decode_length( let length = decode_length(&mut decoder, &mut contexts, 257 + 64);
&mut decoder,
&mut contexts,
256 * config.parity_contexts + 65,
&config,
)?;
if config.eof_in_length && length == 1 {
break;
}
if offset > position {
return Err(UnpackError::OffsetOutOfRange { offset, position });
}
if let Some(ref mut heatmap) = heatmap {
heatmap.add_match(offset, length, decoder.cost(&prev_decoder));
}
if let Some(ref mut result) = result {
for _ in 0..length { for _ in 0..length {
if result.len() < max_size {
result.push(result[result.len() - offset]); result.push(result[result.len() - offset]);
} else {
break;
} }
}
}
position += length;
prev_was_match = true; prev_was_match = true;
} else { } else {
let mut context_index = 1; let mut context_index = 1;
let mut byte = 0; let mut byte = 0;
for i in (0..8).rev() { for i in (0..8).rev() {
let bit = decoder let bit = decoder.decode_with_context(&mut contexts.context_mut(context_index));
.decode_with_context(&mut contexts.context_mut(literal_base + context_index))?;
context_index = (context_index << 1) | bit as usize; context_index = (context_index << 1) | bit as usize;
byte |= (bit as u8) << i; byte |= (bit as u8) << i;
} }
if let Some(ref mut heatmap) = heatmap {
heatmap.add_literal(byte, decoder.cost(&prev_decoder));
}
if let Some(ref mut result) = result {
if result.len() < max_size {
result.push(byte); result.push(byte);
}
}
position += 1;
prev_was_match = false; prev_was_match = false;
} }
} }
if let Some(heatmap) = heatmap { result
heatmap.finish();
}
if position > max_size {
return Err(UnpackError::OverSize {
size: position,
limit: max_size,
});
}
Ok(margin + decoder.pos() as isize - position as isize)
} }

View File

@@ -1,300 +1,67 @@
use anyhow::Result; use anyhow::{bail, Result};
use std::ffi::OsStr;
use std::io::prelude::*; use std::io::prelude::*;
use std::process;
use std::{fs::File, path::PathBuf}; use std::{fs::File, path::PathBuf};
fn main() -> Result<()> { fn main() -> Result<()> {
let mut config = upkr::Config::default(); let mut args = pico_args::Arguments::from_env();
let mut reverse = false;
let mut unpack = false;
let mut calculate_margin = false;
let mut create_heatmap = false;
let mut report_raw_cost = false;
#[allow(unused_mut)]
let mut do_hexdump = false;
let mut level = 2;
let mut infile: Option<PathBuf> = None;
let mut outfile: Option<PathBuf> = None;
let mut max_unpacked_size = 512 * 1024 * 1024;
let mut parser = lexopt::Parser::from_env(); match args.subcommand()?.as_ref().map(|s| s.as_str()) {
while let Some(arg) = parser.next()? { None => print_help(),
use lexopt::prelude::*; Some("pack") => {
match arg { let level = args.opt_value_from_str(["-l", "--level"])?.unwrap_or(2u8);
Short('b') | Long("bitstream") => config.use_bitstream = true, let use_bitstream = args.contains(["-b", "--bitstream"]);
Short('p') | Long("parity") => config.parity_contexts = parser.value()?.parse()?,
Short('r') | Long("reverse") => reverse = true,
Long("invert-is-match-bit") => config.is_match_bit = false,
Long("invert-new-offset-bit") => config.new_offset_bit = false,
Long("invert-continue-value-bit") => config.continue_value_bit = false,
Long("invert-bit-encoding") => config.invert_bit_encoding = true,
Long("simplified-prob-update") => config.simplified_prob_update = true,
Long("big-endian-bitstream") => {
config.use_bitstream = true;
config.bitstream_is_big_endian = true;
}
Long("no-repeated-offsets") => config.no_repeated_offsets = true,
Long("eof-in-length") => config.eof_in_length = true,
Long("max-offset") => config.max_offset = parser.value()?.parse()?, let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
Long("max-length") => config.max_length = parser.value()?.parse()?, let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
Long("z80") => { let mut data = vec![];
config.use_bitstream = true; File::open(infile)?.read_to_end(&mut data)?;
config.bitstream_is_big_endian = true;
config.invert_bit_encoding = true;
config.simplified_prob_update = true;
level = 9;
}
Long("x86") => {
config.use_bitstream = true;
config.continue_value_bit = false;
config.is_match_bit = false;
config.new_offset_bit = false;
}
Long("x86b") => {
config.use_bitstream = true;
config.continue_value_bit = false;
config.no_repeated_offsets = true;
level = 9;
}
Short('u') | Long("unpack") | Short('d') | Long("decompress") => unpack = true, let mut pb = pbr::ProgressBar::new(data.len() as u64);
Long("margin") => calculate_margin = true,
Long("heatmap") => create_heatmap = true,
Long("raw-cost") => report_raw_cost = true,
#[cfg(feature = "crossterm")]
Long("hexdump") => do_hexdump = true,
Short('l') | Long("level") => level = parser.value()?.parse()?,
Short(n) if n.is_ascii_digit() => level = n as u8 - b'0',
Short('h') | Long("help") => print_help(0),
Long("version") => {
println!("{}", env!("CARGO_PKG_VERSION"));
process::exit(0);
}
Long("max-unpacked-size") => max_unpacked_size = parser.value()?.parse()?,
Value(val) if infile.is_none() => infile = Some(val.try_into()?),
Value(val) if outfile.is_none() => outfile = Some(val.try_into()?),
_ => return Err(arg.unexpected().into()),
}
}
let infile = IoTarget::from_filename(infile);
let outfile = |tpe: OutFileType| infile.output(tpe, &outfile);
if config.parity_contexts != 1 && config.parity_contexts != 2 && config.parity_contexts != 4 {
eprintln!("--parity has to be 1, 2, or 4");
process::exit(1);
}
if !unpack && !calculate_margin && !create_heatmap {
let mut data = infile.read()?;
if reverse {
data.reverse();
}
#[cfg(feature = "terminal")]
let mut packed_data = {
let mut pb = pbr::ProgressBar::on(std::io::stderr(), data.len() as u64);
pb.set_units(pbr::Units::Bytes); pb.set_units(pbr::Units::Bytes);
let packed_data = upkr::pack( let packed_data = upkr::pack(
&data, &data,
level, level,
&config, use_bitstream,
Some(&mut |pos| { Some(&mut |pos| {
pb.set(pos as u64); pb.set(pos as u64);
}), }),
); );
pb.finish(); pb.finish();
eprintln!();
packed_data
};
#[cfg(not(feature = "terminal"))]
let mut packed_data = upkr::pack(&data, level, &config, None);
if reverse { println!(
packed_data.reverse();
}
eprintln!(
"Compressed {} bytes to {} bytes ({}%)", "Compressed {} bytes to {} bytes ({}%)",
data.len(), data.len(),
packed_data.len(), packed_data.len(),
packed_data.len() as f32 * 100. / data.len() as f32 packed_data.len() as f32 * 100. / data.len() as f32
); );
outfile(OutFileType::Packed).write(&packed_data)?; File::create(outfile)?.write_all(&packed_data)?;
} else {
let mut data = infile.read()?;
if reverse {
data.reverse();
} }
if unpack { Some("unpack") => {
let mut unpacked_data = upkr::unpack(&data, &config, max_unpacked_size)?; let use_bitstream = args.contains(["-b", "--bitstream"]);
if reverse {
unpacked_data.reverse(); let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?;
let packed_data = upkr::unpack(&data, use_bitstream);
File::create(outfile)?.write_all(&packed_data)?;
} }
outfile(OutFileType::Unpacked).write(&unpacked_data)?; Some(other) => {
} bail!("Unknown subcommand '{}'", other);
if create_heatmap {
let mut heatmap = upkr::create_heatmap(&data, &config, max_unpacked_size)?;
if reverse {
heatmap.reverse();
}
match do_hexdump {
#[cfg(feature = "crossterm")]
true => {
if report_raw_cost {
heatmap.print_as_hex_raw_cost()?
} else {
heatmap.print_as_hex()?
}
}
_ => {
let mut heatmap_bin = Vec::with_capacity(heatmap.len());
for i in 0..heatmap.len() {
let cost = if report_raw_cost {
heatmap.raw_cost(i)
} else {
heatmap.cost(i)
};
let cost = (cost.log2() * 8. + 64.).round().max(0.).min(127.) as u8;
heatmap_bin.push((cost << 1) | heatmap.is_literal(i) as u8);
}
outfile(OutFileType::Heatmap).write(&heatmap_bin)?;
}
}
}
if calculate_margin {
println!("{}", upkr::calculate_margin(&data, &config)?);
} }
} }
Ok(()) Ok(())
} }
enum OutFileType { fn print_help() {
Packed,
Unpacked,
Heatmap,
}
enum IoTarget {
StdInOut,
File(PathBuf),
}
impl IoTarget {
fn from_filename(filename: Option<PathBuf>) -> IoTarget {
if let Some(path) = filename {
if path.as_os_str() == "-" {
IoTarget::StdInOut
} else {
IoTarget::File(path)
}
} else {
IoTarget::StdInOut
}
}
fn read(&self) -> Result<Vec<u8>> {
let mut buffer = vec![];
match *self {
IoTarget::StdInOut => std::io::stdin().read_to_end(&mut buffer)?,
IoTarget::File(ref path) => File::open(path)?.read_to_end(&mut buffer)?,
};
Ok(buffer)
}
fn write(&self, data: &[u8]) -> Result<()> {
match *self {
IoTarget::StdInOut => std::io::stdout().write_all(data)?,
IoTarget::File(ref path) => File::create(path)?.write_all(data)?,
};
Ok(())
}
fn output(&self, tpe: OutFileType, outname: &Option<PathBuf>) -> IoTarget {
if outname.is_some() {
return IoTarget::from_filename(outname.clone());
}
match *self {
IoTarget::StdInOut => IoTarget::StdInOut,
IoTarget::File(ref path) => {
let mut name = path.clone();
match tpe {
OutFileType::Packed => {
let mut filename = name
.file_name()
.unwrap_or_else(|| OsStr::new(""))
.to_os_string();
filename.push(".upk");
name.set_file_name(filename);
}
OutFileType::Unpacked => {
if name.extension().filter(|&e| e == "upk").is_some() {
name.set_extension("");
} else {
name.set_extension("bin");
}
}
OutFileType::Heatmap => {
name.set_extension("heatmap");
}
}
IoTarget::File(name)
}
}
}
}
fn print_help(exit_code: i32) -> ! {
eprintln!("Usage:"); eprintln!("Usage:");
eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]"); eprintln!(" upkr pack [-b] [-l level(0-9)] <infile> <outfile>");
eprintln!(" upkr -u [config options] <infile> [<outfile>]"); eprintln!(" upkr unpack [-b] <infile> <outfile>");
eprintln!(" upkr --heatmap [config options] <infile> [<outfile>]");
eprintln!(" upkr --margin [config options] <infile>");
eprintln!(); eprintln!();
eprintln!(" -l, --level N compression level 0-9");
eprintln!(" -0, ..., -9 short form for setting compression level");
eprintln!(" -d, --decompress decompress infile");
eprintln!(" --heatmap calculate heatmap from compressed file");
eprintln!(" --raw-cost report raw cost of literals in heatmap");
#[cfg(feature = "crossterm")]
eprintln!(" --hexdump print heatmap as colored hexdump");
eprintln!(" --margin calculate margin for overlapped unpacking of a packed file");
eprintln!();
eprintln!("When no infile is given, or the infile is '-', read from stdin.");
eprintln!(
"When no outfile is given and reading from stdin, or when outfile is '-', write to stdout."
);
eprintln!();
eprintln!("Version: {}", env!("CARGO_PKG_VERSION"));
eprintln!();
eprintln!("Config presets for specific unpackers:");
eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update -9");
eprintln!(
" --x86 --bitstream --invert-is-match-bit --invert-continue-value-bit --invert-new-offset-bit"
);
eprintln!(
" --x86b --bitstream --invert-continue-value-bit --no-repeated-offsets -9"
);
eprintln!();
eprintln!("Config options (need to match when packing/unpacking):");
eprintln!(" -b, --bitstream bitstream mode"); eprintln!(" -b, --bitstream bitstream mode");
eprintln!(" -p, --parity N use N (2/4) parity contexts"); eprintln!(" -l, --level N compression level 0-9");
eprintln!(" -r, --reverse reverse input & output"); std::process::exit(1);
eprintln!();
eprintln!("Config options to tailor output to specific optimized unpackers:");
eprintln!(" --invert-is-match-bit");
eprintln!(" --invert-new-offset-bit");
eprintln!(" --invert-continue-value-bit");
eprintln!(" --invert-bit-encoding");
eprintln!(" --simplified-prob-update");
eprintln!(" --big-endian-bitstream (implies --bitstream)");
eprintln!(" --no-repeated-offsets");
eprintln!(" --eof-in-length");
eprintln!(" --max-offset N");
eprintln!(" --max-length N");
process::exit(exit_code);
} }

View File

@@ -6,24 +6,19 @@ use crate::match_finder::MatchFinder;
use crate::rans::{CostCounter, RansCoder}; use crate::rans::{CostCounter, RansCoder};
use crate::{lz, ProgressCallback}; use crate::{lz, ProgressCallback};
pub fn pack( pub fn pack(data: &[u8], level: u8, use_bitstream: bool, progress_cb: Option<ProgressCallback>) -> Vec<u8> {
data: &[u8], let mut parse = parse(data, Config::from_level(level), progress_cb);
level: u8,
config: &crate::Config,
progress_cb: Option<ProgressCallback>,
) -> Vec<u8> {
let mut parse = parse(data, Config::from_level(level), config, progress_cb);
let mut ops = vec![]; let mut ops = vec![];
while let Some(link) = parse { while let Some(link) = parse {
ops.push(link.op); ops.push(link.op);
parse = link.prev.clone(); parse = link.prev.clone();
} }
let mut state = lz::CoderState::new(config); let mut state = lz::CoderState::new();
let mut coder = RansCoder::new(config); let mut coder = RansCoder::new(use_bitstream);
for op in ops.into_iter().rev() { for op in ops.into_iter().rev() {
op.encode(&mut coder, &mut state, config); op.encode(&mut coder, &mut state);
} }
lz::encode_eof(&mut coder, &mut state, config); lz::encode_eof(&mut coder, &mut state);
coder.finish() coder.finish()
} }
@@ -43,7 +38,6 @@ type Arrivals = HashMap<usize, Vec<Arrival>>;
fn parse( fn parse(
data: &[u8], data: &[u8],
config: Config, config: Config,
encoding_config: &crate::Config,
mut progress_cb: Option<ProgressCallback>, mut progress_cb: Option<ProgressCallback>,
) -> Option<Rc<Parse>> { ) -> Option<Rc<Parse>> {
let mut match_finder = MatchFinder::new(data) let mut match_finder = MatchFinder::new(data)
@@ -105,22 +99,17 @@ fn parse(
cost_counter: &mut CostCounter, cost_counter: &mut CostCounter,
pos: usize, pos: usize,
offset: usize, offset: usize,
mut length: usize, length: usize,
arrival: &Arrival, arrival: &Arrival,
max_arrivals: usize, max_arrivals: usize,
config: &crate::Config,
) { ) {
if length < config.min_length() {
return;
}
length = length.min(config.max_length);
cost_counter.reset(); cost_counter.reset();
let mut state = arrival.state.clone(); let mut state = arrival.state.clone();
let op = lz::Op::Match { let op = lz::Op::Match {
offset: offset as u32, offset: offset as u32,
len: length as u32, len: length as u32,
}; };
op.encode(cost_counter, &mut state, config); op.encode(cost_counter, &mut state);
add_arrival( add_arrival(
arrivals, arrivals,
pos + length, pos + length,
@@ -140,13 +129,13 @@ fn parse(
0, 0,
Arrival { Arrival {
parse: None, parse: None,
state: lz::CoderState::new(encoding_config), state: lz::CoderState::new(),
cost: 0.0, cost: 0.0,
}, },
max_arrivals, max_arrivals,
); );
let cost_counter = &mut CostCounter::new(encoding_config); let cost_counter = &mut CostCounter::new();
let mut best_per_offset = HashMap::new(); let mut best_per_offset = HashMap::new();
for pos in 0..data.len() { for pos in 0..data.len() {
let match_length = |offset: usize| { let match_length = |offset: usize| {
@@ -187,7 +176,6 @@ fn parse(
for m in match_finder.matches(pos) { for m in match_finder.matches(pos) {
closest_match = Some(closest_match.unwrap_or(0).max(m.pos)); closest_match = Some(closest_match.unwrap_or(0).max(m.pos));
let offset = pos - m.pos; let offset = pos - m.pos;
if offset <= encoding_config.max_offset {
found_last_offset |= offset as u32 == arrival.state.last_offset(); found_last_offset |= offset as u32 == arrival.state.last_offset();
add_match( add_match(
&mut arrivals, &mut arrivals,
@@ -197,13 +185,11 @@ fn parse(
m.length, m.length,
&arrival, &arrival,
max_arrivals, max_arrivals,
encoding_config,
); );
if m.length >= config.greedy_size { if m.length >= config.greedy_size {
break 'arrival_loop; break 'arrival_loop;
} }
} }
}
let mut near_matches_left = config.num_near_matches; let mut near_matches_left = config.num_near_matches;
let mut match_pos = last_seen[data[pos] as usize]; let mut match_pos = last_seen[data[pos] as usize];
@@ -212,9 +198,6 @@ fn parse(
&& closest_match.iter().all(|p| *p < match_pos) && closest_match.iter().all(|p| *p < match_pos)
{ {
let offset = pos - match_pos; let offset = pos - match_pos;
if offset > encoding_config.max_offset {
break;
}
let length = match_length(offset); let length = match_length(offset);
assert!(length > 0); assert!(length > 0);
add_match( add_match(
@@ -225,7 +208,6 @@ fn parse(
length, length,
&arrival, &arrival,
max_arrivals, max_arrivals,
encoding_config,
); );
found_last_offset |= offset as u32 == arrival.state.last_offset(); found_last_offset |= offset as u32 == arrival.state.last_offset();
if offset < near_matches.len() { if offset < near_matches.len() {
@@ -246,7 +228,6 @@ fn parse(
length, length,
&arrival, &arrival,
max_arrivals, max_arrivals,
encoding_config,
); );
} }
} }
@@ -254,7 +235,7 @@ fn parse(
cost_counter.reset(); cost_counter.reset();
let mut state = arrival.state; let mut state = arrival.state;
let op = lz::Op::Literal(data[pos]); let op = lz::Op::Literal(data[pos]);
op.encode(cost_counter, &mut state, encoding_config); op.encode(cost_counter, &mut state);
add_arrival( add_arrival(
&mut arrivals, &mut arrivals,
pos + 1, pos + 1,

View File

@@ -1,5 +1,4 @@
use crate::{context_state::Context, Config}; use crate::context_state::Context;
use thiserror::Error;
pub const PROB_BITS: u32 = 8; pub const PROB_BITS: u32 = 8;
pub const ONE_PROB: u32 = 1 << PROB_BITS; pub const ONE_PROB: u32 = 1 << PROB_BITS;
@@ -16,25 +15,20 @@ pub trait EntropyCoder {
pub struct RansCoder { pub struct RansCoder {
bits: Vec<u16>, bits: Vec<u16>,
use_bitstream: bool, use_bitstream: bool,
bitstream_is_big_endian: bool,
invert_bit_encoding: bool,
} }
impl EntropyCoder for RansCoder { impl EntropyCoder for RansCoder {
fn encode_bit(&mut self, bit: bool, prob: u16) { fn encode_bit(&mut self, bit: bool, prob: u16) {
assert!(prob < 32768); assert!(prob < 32768);
self.bits self.bits.push(prob | ((bit as u16) << 15));
.push(prob | (((bit ^ self.invert_bit_encoding) as u16) << 15));
} }
} }
impl RansCoder { impl RansCoder {
pub fn new(config: &Config) -> RansCoder { pub fn new(use_bitstream: bool) -> RansCoder {
RansCoder { RansCoder {
bits: Vec::new(), bits: Vec::new(),
use_bitstream: config.use_bitstream, use_bitstream,
bitstream_is_big_endian: config.bitstream_is_big_endian,
invert_bit_encoding: config.invert_bit_encoding,
} }
} }
@@ -44,9 +38,8 @@ impl RansCoder {
let mut state = 1 << l_bits; let mut state = 1 << l_bits;
let mut byte = 0u8; let mut byte = 0u8;
let mut bit = if self.bitstream_is_big_endian { 0 } else { 8 }; let mut bit = 0;
let mut flush_state: Box<dyn FnMut(&mut u32)> = if self.use_bitstream { let mut flush_state: Box<dyn FnMut(&mut u32)> = if self.use_bitstream {
if self.bitstream_is_big_endian {
Box::new(|state: &mut u32| { Box::new(|state: &mut u32| {
byte |= ((*state & 1) as u8) << bit; byte |= ((*state & 1) as u8) << bit;
bit += 1; bit += 1;
@@ -57,18 +50,6 @@ impl RansCoder {
} }
*state >>= 1; *state >>= 1;
}) })
} else {
Box::new(|state: &mut u32| {
bit -= 1;
byte |= ((*state & 1) as u8) << bit;
if bit == 0 {
buffer.push(byte);
byte = 0;
bit = 8;
}
*state >>= 1;
})
}
} else { } else {
Box::new(|state: &mut u32| { Box::new(|state: &mut u32| {
buffer.push(*state as u8); buffer.push(*state as u8);
@@ -80,7 +61,7 @@ impl RansCoder {
let max_state_factor: u32 = 1 << (l_bits + num_flush_bits - PROB_BITS); let max_state_factor: u32 = 1 << (l_bits + num_flush_bits - PROB_BITS);
for step in self.bits.into_iter().rev() { for step in self.bits.into_iter().rev() {
let prob = step as u32 & 32767; let prob = step as u32 & 32767;
let (start, prob) = if step & 32768 != 0 { let (start, prob) = if step & 32768 == 0 {
(0, prob) (0, prob)
} else { } else {
(prob, ONE_PROB - prob) (prob, ONE_PROB - prob)
@@ -110,11 +91,10 @@ impl RansCoder {
pub struct CostCounter { pub struct CostCounter {
cost: f64, cost: f64,
log2_table: Vec<f64>, log2_table: Vec<f64>,
invert_bit_encoding: bool,
} }
impl CostCounter { impl CostCounter {
pub fn new(config: &Config) -> CostCounter { pub fn new() -> CostCounter {
let log2_table = (0..ONE_PROB) let log2_table = (0..ONE_PROB)
.map(|prob| { .map(|prob| {
let inv_prob = ONE_PROB as f64 / prob as f64; let inv_prob = ONE_PROB as f64 / prob as f64;
@@ -124,7 +104,6 @@ impl CostCounter {
CostCounter { CostCounter {
cost: 0.0, cost: 0.0,
log2_table, log2_table,
invert_bit_encoding: config.invert_bit_encoding,
} }
} }
@@ -139,7 +118,7 @@ impl CostCounter {
impl EntropyCoder for CostCounter { impl EntropyCoder for CostCounter {
fn encode_bit(&mut self, bit: bool, prob: u16) { fn encode_bit(&mut self, bit: bool, prob: u16) {
let prob = if bit ^ self.invert_bit_encoding { let prob = if !bit {
prob as u32 prob as u32
} else { } else {
ONE_PROB - prob as u32 ONE_PROB - prob as u32
@@ -148,101 +127,62 @@ impl EntropyCoder for CostCounter {
} }
} }
#[derive(Clone)]
pub struct RansDecoder<'a> { pub struct RansDecoder<'a> {
data: &'a [u8], data: &'a [u8],
pos: usize,
state: u32, state: u32,
use_bitstream: bool, use_bitstream: bool,
byte: u8, byte: u8,
bits_left: u8, bits_left: u8,
invert_bit_encoding: bool,
bitstream_is_big_endian: bool,
} }
const PROB_MASK: u32 = ONE_PROB - 1; const PROB_MASK: u32 = ONE_PROB - 1;
#[derive(Debug, Error)]
#[error("Unexpected end of input")]
pub struct UnexpectedEOF;
impl<'a> RansDecoder<'a> { impl<'a> RansDecoder<'a> {
pub fn new(data: &'a [u8], config: &Config) -> Result<RansDecoder<'a>, UnexpectedEOF> { pub fn new(data: &'a [u8], use_bitstream: bool) -> RansDecoder<'a> {
let mut decoder = RansDecoder { RansDecoder {
data, data,
pos: 0,
state: 0, state: 0,
use_bitstream: config.use_bitstream, use_bitstream,
byte: 0, byte: 0,
bits_left: 0, bits_left: 0,
invert_bit_encoding: config.invert_bit_encoding, }
bitstream_is_big_endian: config.bitstream_is_big_endian,
};
decoder.refill()?;
Ok(decoder)
} }
pub fn pos(&self) -> usize { pub fn decode_with_context(&mut self, context: &mut Context) -> bool {
self.pos let bit = self.decode_bit(context.prob());
}
pub fn decode_with_context(&mut self, context: &mut Context) -> Result<bool, UnexpectedEOF> {
let bit = self.decode_bit(context.prob())?;
context.update(bit); context.update(bit);
Ok(bit) bit
} }
fn refill(&mut self) -> Result<(), UnexpectedEOF> { pub fn decode_bit(&mut self, prob: u16) -> bool {
let prob = prob as u32;
if self.use_bitstream { if self.use_bitstream {
while self.state < 32768 { while self.state < 32768 {
if self.bits_left == 0 { if self.bits_left == 0 {
if self.pos >= self.data.len() { self.byte = self.data[0];
return Err(UnexpectedEOF); self.data = &self.data[1..];
}
self.byte = self.data[self.pos];
self.pos += 1;
self.bits_left = 8; self.bits_left = 8;
} }
if self.bitstream_is_big_endian {
self.state = (self.state << 1) | (self.byte >> 7) as u32; self.state = (self.state << 1) | (self.byte >> 7) as u32;
self.byte <<= 1; self.byte <<= 1;
} else {
self.state = (self.state << 1) | (self.byte & 1) as u32;
self.byte >>= 1;
}
self.bits_left -= 1; self.bits_left -= 1;
} }
} else { } else {
while self.state < 4096 { while self.state < 4096 {
if self.pos >= self.data.len() { self.state = (self.state << 8) | self.data[0] as u32;
return Err(UnexpectedEOF); self.data = &self.data[1..];
} }
self.state = (self.state << 8) | self.data[self.pos] as u32;
self.pos += 1;
}
}
Ok(())
} }
pub fn decode_bit(&mut self, prob: u16) -> Result<bool, UnexpectedEOF> { let bit = (self.state & PROB_MASK) >= prob;
self.refill()?;
let prob = prob as u32;
let bit = (self.state & PROB_MASK) < prob;
let (start, prob) = if bit { let (start, prob) = if bit {
(0, prob)
} else {
(prob, ONE_PROB - prob) (prob, ONE_PROB - prob)
} else {
(0, prob)
}; };
self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start; self.state = prob * (self.state >> PROB_BITS) + (self.state & PROB_MASK) - start;
Ok(bit ^ self.invert_bit_encoding) bit
}
pub fn cost(&self, prev: &RansDecoder) -> f32 {
f32::log2(prev.state as f32) - f32::log2(self.state as f32)
+ (self.pos - prev.pos) as f32 * 8.
} }
} }

View File

@@ -1,3 +1,4 @@
*.bin *.bin
*.tap *.tap
*.sna
*.lst *.lst

View File

@@ -3,8 +3,7 @@
DEVICE ZXSPECTRUM48,$8FFF DEVICE ZXSPECTRUM48,$8FFF
ORG $9000 ORG $9000
;; forward example data compressed_scr_files: ; border color byte + upkr-packed .scr file
compressed_scr_files.fwd: ; border color byte + upkr-packed .scr file
DB 1 DB 1
INCBIN "screens/Grongy - ZX Spectrum (2022).scr.upk" INCBIN "screens/Grongy - ZX Spectrum (2022).scr.upk"
DB 7 DB 7
@@ -14,87 +13,37 @@ compressed_scr_files.fwd: ; border color byte + upkr-packed .scr f
DB 6 DB 6
INCBIN "screens/diver - Back to Bjork (2015).scr.upk" INCBIN "screens/diver - Back to Bjork (2015).scr.upk"
.e: .e:
;; backward example data (unpacker goes from the end of the data!)
compressed_scr_files.rwd.e: EQU $-1 ; the final IX will point one byte ahead of "$" here
INCBIN "screens.reversed/diver - Back to Bjork (2015).scr.upk"
DB 6
INCBIN "screens.reversed/diver - Mercenary 4. The Heaven's Devil (2014) (Forever 2014 Olympic Edition, 1).scr.upk"
DB 0
INCBIN "screens.reversed/Schafft - Poison (2017).scr.upk"
DB 7
INCBIN "screens.reversed/Grongy - ZX Spectrum (2022).scr.upk"
compressed_scr_files.rwd: ; border color byte + upkr-packed .scr file (backward)
DB 1
start: start:
di di
; OPT --zxnext ; OPT --zxnext
; nextreg 7,3 ; ZX Next: switch to 28Mhz ; nextreg 7,3 ; ZX Next: switch to 28Mhz
ld ix,compressed_scr_files
;;; FORWARD packed/unpacked data demo .slideshow_loop
ld ix,compressed_scr_files.fwd
.slideshow_loop.fwd:
; set BORDER for next image ; set BORDER for next image
ld a,(ix) ldi a,(ix) ; fake: ld a,(ix) : inc ix
inc ix
out (254),a out (254),a
; call unpack of next image directly into VRAM ; call unpack of next image directly into VRAM
ld de,$4000 ; target VRAM ld de,$4000 ; target VRAM
exx exx
; IX = packed data, DE' = destination ($4000) ; IX = packed data, DE' = destination ($4000)
; returned IX will point right after the packed data ; returned IX will point right after the packed data
call fwd.upkr.unpack call upkr.unpack
; do some busy loop with CPU to delay between images ; do some busy loop with CPU to delay between images
call delay
; check if all images were displayed, loop around from first one then
ld a,ixl
cp low compressed_scr_files.fwd.e
jr nz,.slideshow_loop.fwd
;;; BACKWARD packed/unpacked data demo
ld ix,compressed_scr_files.rwd
.slideshow_loop.rwd:
; set BORDER for next image
ld a,(ix)
dec ix
out (254),a
; call unpack of next image directly into VRAM
ld de,$5AFF ; target VRAM
exx
; IX = packed data, DE' = destination
; returned IX will point right ahead of the packed data
call rwd.upkr.unpack
; do some busy loop with CPU to delay between images
call delay
; check if all images were displayed, loop around from first one then
ld a,ixl
cp low compressed_scr_files.rwd.e
jr nz,.slideshow_loop.rwd
jr start
delay:
ld bc,$AA00 ld bc,$AA00
.delay: .delay:
.8 ex (sp),ix .8 ex (sp),ix
dec c dec c
jr nz,.delay jr nz,.delay
djnz .delay djnz .delay
ret ; check if all images were displayed, loop around from first one then
ld a,ixl
cp low compressed_scr_files.e
jr z,start
jr .slideshow_loop
; include the depacker library, optionally putting probs array buffer near end of RAM ; include the depacker library, optionally putting probs array buffer near end of RAM
DEFINE UPKR_PROBS_ORIGIN $FA00 ; if not defined, array will be put after unpack code DEFINE UPKR_PROBS_ORIGIN $FA00 ; if not defined, array will be put after unpack code
MODULE fwd
INCLUDE "../unpack.asm" INCLUDE "../unpack.asm"
ENDMODULE
MODULE rwd
DEFINE BACKWARDS_UNPACK ; defined to build backwards unpack
; initial IX points at last byte of compressed data
; initial DE' points at last byte of unpacked data
INCLUDE "../unpack.asm"
ENDMODULE
SAVESNA "example.sna",start SAVESNA "example.sna",start

Binary file not shown.

View File

@@ -10,23 +10,10 @@ may be incompatible with files you will produce with current version)
Asm syntax is z00m's sjasmplus: https://github.com/z00m128/sjasmplus Asm syntax is z00m's sjasmplus: https://github.com/z00m128/sjasmplus
Backward direction unpacker added as compile-time option, see example for both forward/backward
depacker in action.
The packed/unpacked data-overlap has to be tested per-case, in worst case the packed data
may need even more than 7 bytes to unpack final byte, but usually 1-4 bytes may suffice.
TODO: TODO:
- build bigger corpus of test data to benchmark future changes in algorithm/format (example and zx48.rom was used to do initial tests) - build base corpus of test data to benchmark future changes in algorithm/format
- maybe try to beat double-loop `decode_number` with different encoding format - review first implementation to identify weak spots where the implementation can be shorter+faster
with acceptable small changes to the format
- review non-bitstream variant, if it's feasible to try to implement it with Z80
- (@ped7g) Z80N version of unpacker for ZX Next devs - (@ped7g) Z80N version of unpacker for ZX Next devs
- (@exoticorn) add Z80 specific packer (to avoid confusion with original MicroW8 variant), and land it all to master branch, maybe in "z80" directory or something? (and overall decide how to organise+merge this upstream into main repo) - (@exoticorn) add Z80 specific packer (to avoid confusion with original MicroW8 variant), and land it all to master branch, maybe in "z80" directory or something? (and overall decide how to organise+merge this upstream into main repo)
- (@exoticorn) add to packer output with possible packed/unpacked region overlap
DONE:
* review non-bitstream variant, if it's feasible to try to implement it with Z80
- Ped7g: IMHO nope, the 12b x 8b MUL code would probably quickly cancel any gains from the simpler state update
* review first implementation to identify weak spots where the implementation can be shorter+faster
with acceptable small changes to the format
- Ped7g: the decode_bit settled down and now doesn't feel so confused and redundant, the code seems pretty on point to me, no obvious simplification from format change
- Ped7g: the decode_number double-loop is surprisingly resilient, especially in terms of code size I failed to beat it, speed wise only negligible gains

View File

@@ -4,7 +4,7 @@
;; initial version by Peter "Ped" Helcmanovsky (C) 2022, licensed same as upkr project ("unlicensed") ;; initial version by Peter "Ped" Helcmanovsky (C) 2022, licensed same as upkr project ("unlicensed")
;; to assemble use z00m's sjasmplus: https://github.com/z00m128/sjasmplus ;; to assemble use z00m's sjasmplus: https://github.com/z00m128/sjasmplus
;; ;;
;; you can define UPKR_PROBS_ORIGIN to specific 256 byte aligned address for probs array (320 bytes), ;; you can define UPKR_PROBS_ORIGIN to specific 256 byte aligned address for probs array (386 bytes),
;; otherwise it will be positioned after the unpacker code (256 aligned) ;; otherwise it will be positioned after the unpacker code (256 aligned)
;; ;;
;; public API: ;; public API:
@@ -12,24 +12,12 @@
;; upkr.unpack ;; upkr.unpack
;; IN: IX = packed data, DE' (shadow DE) = destination ;; IN: IX = packed data, DE' (shadow DE) = destination
;; OUT: IX = after packed data ;; OUT: IX = after packed data
;; modifies: all registers except IY, requires 10 bytes of stack space ;; modifies: all registers except IY, requires 14 bytes of stack space
;; ;;
; DEFINE BACKWARDS_UNPACK ; uncomment to build backwards depacker (write_ptr--, upkr_data_ptr--)
; initial IX points at last byte of compressed data
; initial DE' points at last byte of unpacked data
; DEFINE UPKR_UNPACK_SPEED ; uncomment to get larger but faster unpack routine
; code size hint: if you put probs array just ahead of BASIC entry point, you will get BC
; initialised to probs.e by BASIC `USR` command and you can remove it from unpack init (-3B)
OPT push reset --syntax=abf OPT push reset --syntax=abf
MODULE upkr MODULE upkr
NUMBER_BITS EQU 16+15 ; context-bits per offset/length (16+15 for 16bit offsets/pointers)
; numbers (offsets/lengths) are encoded like: 1a1b1c1d1e0 = 0000'0000'001e'dbca
/* /*
u8* upkr_data_ptr; u8* upkr_data_ptr;
u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32]; u8 upkr_probs[1 + 255 + 1 + 2*32 + 2*32];
@@ -93,8 +81,8 @@ unpack:
; BC = probs (context_index 0), state HL = 0, A' = 0x80 (no source bits left in upkr_current_byte) ; BC = probs (context_index 0), state HL = 0, A' = 0x80 (no source bits left in upkr_current_byte)
; ** main loop to decompress data ; ** main loop to decompress data
; D = prev_was_match = uninitialised, literal is expected first => will reset D to "false" .decompress_data_reset_match:
; values for false/true of prev_was_match are: false = high(probs), true = 1 + high(probs) ld d,0 ; prev_was_match = 0;
.decompress_data: .decompress_data:
ld c,0 ld c,0
call decode_bit ; if(upkr_decode_bit(0)) call decode_bit ; if(upkr_decode_bit(0))
@@ -109,23 +97,23 @@ unpack:
ld a,c ld a,c
exx exx
ld (de),a ; *write_ptr++ = byte; ld (de),a ; *write_ptr++ = byte;
IFNDEF BACKWARDS_UNPACK : inc de : ELSE : dec de : ENDIF inc de
exx exx
ld d,b ; prev_was_match = false jr .decompress_data_reset_match
jr .decompress_data
; * copy chunk of already decompressed data (match) ; * copy chunk of already decompressed data (match)
.copy_chunk: .copy_chunk:
ld a,b
inc b ; context_index = 256 inc b ; context_index = 256
; if(prev_was_match || upkr_decode_bit(256)) { ; if(prev_was_match || upkr_decode_bit(256)) {
; offset = upkr_decode_length(257) - 1; ; offset = upkr_decode_length(257) - 1;
; if (0 == offset) break; ; if (0 == offset) break;
; } ; }
xor a
cp d ; CF = prev_was_match cp d ; CF = prev_was_match
call nc,decode_bit ; if not prev_was_match, then upkr_decode_bit(256) call nc,decode_bit ; if not prev_was_match, then upkr_decode_bit(256)
jr nc,.keep_offset ; if neither, keep old offset jr nc,.keep_offset ; if neither, keep old offset
call decode_number ; context_index is already 257-1 as needed by decode_number inc c
call decode_length
dec de ; offset = upkr_decode_length(257) - 1; dec de ; offset = upkr_decode_length(257) - 1;
ld a,d ld a,d
or e or e
@@ -138,27 +126,18 @@ unpack:
; ++write_ptr; ; ++write_ptr;
; } ; }
; prev_was_match = 1; ; prev_was_match = 1;
ld c,low(257 + NUMBER_BITS - 1) ; context_index to second "number" set for lengths decoding ld c,low(257+64) ; context_index = 257+64
call decode_number ; length = upkr_decode_length(257 + 64); call decode_length ; length = upkr_decode_length(257 + 64);
push de push de
exx exx
IFNDEF BACKWARDS_UNPACK
; forward unpack (write_ptr++, upkr_data_ptr++)
ld h,d ; DE = write_ptr ld h,d ; DE = write_ptr
ld l,e ld l,e
.offset+*: ld bc,0 .offset+*: ld bc,0
sbc hl,bc ; CF=0 from decode_number ; HL = write_ptr - offset sbc hl,bc ; CF=0 from decode_length ; HL = write_ptr - offset
pop bc ; BC = length pop bc ; BC = length
ldir ldir
ELSE
; backward unpack (write_ptr--, upkr_data_ptr--)
.offset+*: ld hl,0
add hl,de ; HL = write_ptr + offset
pop bc ; BC = length
lddr
ENDIF
exx exx
ld d,b ; prev_was_match = true ld d,b ; prev_was_match = non-zero
djnz .decompress_data ; adjust context_index back to 0..255 range, go to main loop djnz .decompress_data ; adjust context_index back to 0..255 range, go to main loop
/* /*
@@ -190,9 +169,6 @@ int upkr_decode_bit(int context_index) {
return bit; return bit;
} }
*/ */
inc_c_decode_bit:
; ++low(context_index) before decode_bit (to get -1B by two calls in decode_number)
inc c
decode_bit: decode_bit:
; HL = upkr_state ; HL = upkr_state
; IX = upkr_data_ptr ; IX = upkr_data_ptr
@@ -213,7 +189,7 @@ decode_bit:
jr nz,.has_bit ; CF=data, ZF=0 -> some bits + stop bit still available jr nz,.has_bit ; CF=data, ZF=0 -> some bits + stop bit still available
; CF=1 (by stop bit) ; CF=1 (by stop bit)
ld a,(ix) ld a,(ix)
IFNDEF BACKWARDS_UNPACK : inc ix : ELSE : dec ix : ENDIF ; upkr_current_byte = *upkr_data_ptr++; inc ix ; upkr_current_byte = *upkr_data_ptr++;
adc a,a ; CF=data, b0=1 as new stop bit adc a,a ; CF=data, b0=1 as new stop bit
.has_bit: .has_bit:
adc hl,hl ; upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7); adc hl,hl ; upkr_state = (upkr_state << 1) + (upkr_current_byte >> 7);
@@ -226,8 +202,9 @@ decode_bit:
cp l ; CF = bit = prob-1 < (upkr_state & 255) <=> prob <= (upkr_state & 255) cp l ; CF = bit = prob-1 < (upkr_state & 255) <=> prob <= (upkr_state & 255)
inc a inc a
; ** adjust state ; ** adjust state
push bc push af
ld c,l ; C = (upkr_state & 255); (preserving the value) push af
push hl
push af push af
jr nc,.bit_is_0 jr nc,.bit_is_0
neg ; A = -prob == (256-prob), CF=1 preserved neg ; A = -prob == (256-prob), CF=1 preserved
@@ -235,52 +212,38 @@ decode_bit:
ld d,0 ld d,0
ld e,a ; DE = state_scale ; prob || (256-prob) ld e,a ; DE = state_scale ; prob || (256-prob)
ld l,d ; H:L = (upkr_state>>8) : 0 ld l,d ; H:L = (upkr_state>>8) : 0
ld a,8 ; counter
IFNDEF UPKR_UNPACK_SPEED
;; looped MUL for minimum unpack size
ld b,8 ; counter
.mulLoop: .mulLoop:
add hl,hl add hl,hl
jr nc,.mul0 jr nc,.mul0
add hl,de add hl,de
.mul0: .mul0:
djnz .mulLoop ; until HL = state_scale * (upkr_state>>8), also BC becomes (upkr_state & 255) dec a
jr nz,.mulLoop ; until HL = state_scale * (upkr_state>>8)
ELSE pop af
;;; unrolled MUL for better performance, +25 bytes unpack size
ld b,d
DUP 8
add hl,hl
jr nc,0_f
add hl,de
0:
EDUP
ENDIF
add hl,bc ; HL = state_scale * (upkr_state >> 8) + (upkr_state & 255)
pop af ; restore prob and CF=bit
jr nc,.bit_is_0_2 jr nc,.bit_is_0_2
dec d ; DE = -prob (also D = bit ? $FF : $00) dec d ; D = 0xFF (DE = -prob)
add hl,de ; HL += -prob add hl,de ; HL += -prob
; ^ this always preserves CF=1, because (state>>8) >= 128, state_scale: 7..250, prob: 7..250, .bit_is_0_2: ; HL = state_offset + state_scale * (upkr_state >> 8)
; so 7*128 > 250 and thus edge case `ADD hl=(7*128+0),de=(-250)` => CF=1 pop de
.bit_is_0_2: ld d,0 ; DE = (upkr_state & 255)
add hl,de ; HL = state_offset + state_scale * (upkr_state >> 8) + (upkr_state & 255) ; new upkr_state
; *** adjust probs[context_index] ; *** adjust probs[context_index]
rra ; + (bit<<4) ; part of -prob_offset, needs another -16 pop af ; restore prob and bit
and $FC ; clear/keep correct bits to get desired (prob>>4) + extras, CF=0
rra
rra
rra ; A = (bit<<4) + (prob>>4), CF=(prob & 8)
adc a,-16 ; A = (bit<<4) - 16 + ((prob + 8)>>4) ; -prob_offset = (bit<<4) - 16
ld e,a ld e,a
pop bc jr c,.bit_is_1
ld a,(bc) ; A = prob (cheaper + shorter to re-read again from memory) ld d,-16 ; 0xF0
sub e ; A = 16 - (bit<<4) + prob - ((prob + 8)>>4) ; = prob_offset + prob - ((prob + 8)>>4) .bit_is_1: ; D:E = -prob_offset:prob, A = prob
ld (bc),a ; probs[context_index] = prob_offset + prob - ((prob + 8) >> 4); and $F8
add a,d ; restore CF = bit (D = bit ? $FF : $00 && A > 0) rra
rra
rra
rra
adc a,d ; A = -prob_offset + ((prob + 8) >> 4)
neg
add a,e ; A = prob_offset + prob - ((prob + 8) >> 4)
ld (bc),a ; update probs[context_index]
pop af ; restore resulting CF = bit
pop de pop de
ret ret
@@ -295,19 +258,22 @@ int upkr_decode_length(int context_index) {
return length | (1 << bit_pos); return length | (1 << bit_pos);
} }
*/ */
decode_number: decode_length:
; HL = upkr_state ; HL = upkr_state
; IX = upkr_data_ptr ; IX = upkr_data_ptr
; BC = probs+context_index-1 ; BC = probs+context_index
; A' = upkr_current_byte (!!! init to 0x80 at start, not 0x00) ; A' = upkr_current_byte (!!! init to 0x80 at start, not 0x00)
; return length in DE, CF=0 ; return length in DE, CF=0
ld de,$FFFF ; length = 0 with positional-stop-bit ld de,$7FFF ; length = 0 with positional-stop-bit
or a ; CF=0 to skip getting data bit and use only `rr d : rr e` to fix init DE jr .loop_entry
.loop: .loop:
call c,inc_c_decode_bit ; get data bit, context_index + 1 / if CF=0 just add stop bit into DE init inc c ; context_index + 1
call decode_bit
rr d rr d
rr e ; DE = length = (length >> 1) | (bit << 15); rr e ; DE = length = (length >> 1) | (bit << 15);
call inc_c_decode_bit ; context_index += 2 inc c ; context_index += 2
.loop_entry:
call decode_bit
jr c,.loop jr c,.loop
.fix_bit_pos: .fix_bit_pos:
ccf ; NC will become this final `| (1 << bit_pos)` bit ccf ; NC will become this final `| (1 << bit_pos)` bit
@@ -321,61 +287,15 @@ decode_number:
; reserve space for probs array without emitting any machine code (using only EQU) ; reserve space for probs array without emitting any machine code (using only EQU)
IFDEF UPKR_PROBS_ORIGIN ; if specific address is defined by user, move probs array there IFDEF UPKR_PROBS_ORIGIN ; if specific address is defined by user, move probs array there
probs: EQU ((UPKR_PROBS_ORIGIN) + 255) & -$100 ; probs array aligned to 256 ORG UPKR_PROBS_ORIGIN
ELSE
probs: EQU ($ + 255) & -$100 ; probs array aligned to 256
ENDIF ENDIF
.real_c: EQU 1 + 255 + 1 + 2*NUMBER_BITS ; real size of probs array
probs: EQU ($+255) & -$100 ; probs array aligned to 256
.real_c: EQU 1 + 255 + 1 + 2*32 + 2*32 ; real size of probs array
.c: EQU (.real_c + 1) & -2 ; padding to even size (required by init code) .c: EQU (.real_c + 1) & -2 ; padding to even size (required by init code)
.e: EQU probs + .c .e: EQU probs + .c
DISPLAY "upkr.unpack probs array placed at: ",/A,probs,",\tsize: ",/A,probs.c DISPLAY "upkr.unpack probs array placed at: ",/A,probs,",\tsize: ",/A,probs.c
/*
archived: negligibly faster but +6B longer decode_number variant using HL' and BC' to
do `number|=(1<<bit_pos);` type of logic in single loop.
*/
; decode_number:
; exx
; ld bc,1
; ld l,b
; ld h,b ; HL = 0
; .loop
; exx
; inc c
; call decode_bit
; jr nc,.done
; inc c
; call decode_bit
; exx
; jr nc,.b0
; add hl,bc
; .b0:
; sla c
; rl b
; jr .loop
; .done:
; exx
; add hl,bc
; push hl
; exx
; pop de
; ret
/*
archived: possible LUT variant of updating probs value, requires 512-aligned 512B table (not tested)
*/
; code is replacing decode_bit from "; *** adjust probs[context_index]", followed by `ld (bc),a : add a,d ...`
; ld c,a
; ld a,high(probs_update_table)/2 ; must be 512 aligned
; rla
; ld b,a
; ld a,(bc)
; pop bc
; -------------------------------------------
; probs_update_table: EQU probs-512
; -------------------------------------------
; table generator is not obvious and probably not short either, 20+ bytes almost for sure, maybe even 30-40
ENDMODULE ENDMODULE
OPT pop OPT pop