mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 19:46:42 +01:00
Compare commits
8 Commits
31fb91c629
...
v0.2.0
| Author | SHA1 | Date | |
|---|---|---|---|
| d7bdc8c1c7 | |||
| 887722a66b | |||
| 39c95598f2 | |||
| 3e31b37c1c | |||
| 83c023de45 | |||
| a46eb0e7f5 | |||
| 32cd8e5b6c | |||
| 90fa31ce1a |
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -172,7 +172,7 @@ checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
|
||||
|
||||
[[package]]
|
||||
name = "upkr"
|
||||
version = "0.2.0-pre3"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"cdivsufsort",
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
[package]
|
||||
name = "upkr"
|
||||
version = "0.2.0-pre3"
|
||||
version = "0.2.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
[profile.release]
|
||||
strip = "debuginfo"
|
||||
|
||||
[dependencies]
|
||||
cdivsufsort = "2"
|
||||
|
||||
52
README.md
52
README.md
@@ -2,13 +2,57 @@
|
||||
|
||||
Upkr is a simple general purpose lz packer designed to be used in the [MicroW8](https://github.com/exoticorn/microw8) platform.
|
||||
The compressed format is losely based on [Shrinkler](https://github.com/askeksa/Shrinkler) with the main difference being that
|
||||
Upkr doesn't differnetiate between literals at odd or even addresses and that I went with rANS/rABS instead of a range coder.
|
||||
Upkr doesn't differentiate between literals at odd or even addresses (by default) and that I went with rANS/rABS instead of a range coder.
|
||||
|
||||
At this point, Upkr should still be considered unstable - the compressed format is not very likely to change but I still want
|
||||
to keep that option open a little longer.
|
||||
Compression rate is on par with Shrinkler.
|
||||
|
||||
The differences compare to Shrinkler also makes it interesting on 8bit platforms. The z80 unpacker included in the release
|
||||
is both about twice as fast and smaller than the Shrinkler unpacker.
|
||||
|
||||
## Inspirations:
|
||||
|
||||
* Ferris' blog about his [C64 intro packer](https://yupferris.github.io/blog/2020/08/31/c64-4k-intro-packer-deep-dive.html)
|
||||
* [Shrinkler](https://github.com/askeksa/Shrinkler)
|
||||
* Ryg's [sample rANS implementation](https://github.com/rygorous/ryg_rans)
|
||||
* Ryg's [sample rANS implementation](https://github.com/rygorous/ryg_rans)
|
||||
|
||||
## Unpackers
|
||||
|
||||
The release includes a reference c unpacker, as well as some optimized asm unpackers (arm and riscv). The unpckers in
|
||||
c_unpacker and asm_unpackers unpack the default upkr compressed format. The z80_unpacker
|
||||
is based on some variations to the compressed format. (Use `upkr --z80` to select those variations.)
|
||||
An optimized x86 (DOS) unpacker is currently being worked on out of tree.
|
||||
|
||||
## Usage
|
||||
|
||||
```
|
||||
upkr [-l level(0-9)] [config options] <infile> [<outfile>]
|
||||
upkr -u [config options] <infile> [<outfile>]
|
||||
upkr --margin [config options] <infile>
|
||||
|
||||
-l, --level N compression level 0-9
|
||||
-0, ..., -9 short form for setting compression level
|
||||
-u, --unpack unpack infile
|
||||
--margin calculate margin for overlapped unpacking of a packed file
|
||||
|
||||
Config presets for specific unpackers:
|
||||
--z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update -9
|
||||
--x86 --bitstream --invert-is-match-bit --invert-continue-value-bit --invert-new-offset-bit
|
||||
--x86b --bitstream --invert-continue-value-bit --no-repeated-offsets -9
|
||||
|
||||
Config options (need to match when packing/unpacking):
|
||||
-b, --bitstream bitstream mode
|
||||
-p, --parity N use N (2/4) parity contexts
|
||||
-r, --reverse reverse input & output
|
||||
|
||||
Config options to tailor output to specific optimized unpackers:
|
||||
--invert-is-match-bit
|
||||
--invert-new-offset-bit
|
||||
--invert-continue-value-bit
|
||||
--invert-bit-encoding
|
||||
--simplified-prob-update
|
||||
--big-endian-bitstream (implies --bitstream)
|
||||
--no-repeated-offsets
|
||||
--eof-in-length
|
||||
--max-offset N
|
||||
--max-length N
|
||||
```
|
||||
|
||||
@@ -39,6 +39,19 @@ build/unpack_armv6m.bin: unpack_armv6m.S
|
||||
arm-none-eabi-gcc -march=armv6-m -c -o build/unpack_armv6m.o $?
|
||||
arm-none-eabi-objcopy -O binary --only-section=.text build/unpack_armv6m.o $@
|
||||
|
||||
build/unpack_arm32: ../c_unpacker/main.c unpack_arm32.S
|
||||
mkdir -p build
|
||||
arm-linux-gnueabihf-gcc -g -static -o $@ $^
|
||||
|
||||
test_arm32: build/unpack_arm32
|
||||
qemu-arm $< test_data.upk /tmp/out.bin
|
||||
cmp test_data.bin /tmp/out.bin
|
||||
|
||||
build/unpack_arm32.bin: unpack_arm32.S
|
||||
mkdir -p build
|
||||
arm-none-eabi-gcc -c -o build/unpack_arm32.o $?
|
||||
arm-none-eabi-objcopy -O binary --only-section=.text build/unpack_arm32.o $@
|
||||
|
||||
build/unpack_c: ../c_unpacker/main.c ../c_unpacker/unpack.c
|
||||
mkdir -p build
|
||||
gcc -g -o $@ $^
|
||||
@@ -47,5 +60,5 @@ test_c: build/unpack_c
|
||||
$< test_data.upk /tmp/out.bin
|
||||
cmp test_data.bin /tmp/out.bin
|
||||
|
||||
sizes: build/unpack_armv6m.bin build/unpack_riscv64.bin build/unpack_riscv32.bin
|
||||
sizes: build/unpack_armv6m.bin build/unpack_riscv64.bin build/unpack_riscv32.bin build/unpack_arm32.bin
|
||||
ls -l build/*.bin
|
||||
100
asm_unpackers/unpack_arm32.S
Normal file
100
asm_unpackers/unpack_arm32.S
Normal file
@@ -0,0 +1,100 @@
|
||||
.arm
|
||||
|
||||
.section .text
|
||||
|
||||
.global upkr_unpack
|
||||
.type upkr_unpack, %function
|
||||
// r0 .. out_ptr (returned)
|
||||
// r1 .. in_ptr (returned)
|
||||
// r2 .. state
|
||||
// r3 .. offset
|
||||
// r4 .. prev_was_literal / decode_length ret
|
||||
// r5 .. context index
|
||||
// r6 .. decode_length temp
|
||||
// r7 .. probs ptr
|
||||
// r8-r11 .. decode_bit temp
|
||||
// r12 .. decode_length return address
|
||||
upkr_unpack:
|
||||
push { r3-r11, lr }
|
||||
|
||||
mov r2, #384
|
||||
mov r3, #128
|
||||
.Lclear:
|
||||
subs r2, r2, #1
|
||||
strb r3, [sp, -r2]
|
||||
bne .Lclear
|
||||
|
||||
.Lloop:
|
||||
mov r5, #0
|
||||
bl upkr_decode_bit
|
||||
bcc .Ldata
|
||||
.Lmatch:
|
||||
mov r5, #256
|
||||
rsbs r6, r4, #0
|
||||
blcc upkr_decode_bit
|
||||
bcc .Lskip_offset
|
||||
|
||||
bl upkr_decode_length
|
||||
adds r3, r4, #1
|
||||
popeq { r3-r11, pc }
|
||||
.Lskip_offset:
|
||||
|
||||
mov r5, #256+64
|
||||
bl upkr_decode_length
|
||||
.Lcopy_loop:
|
||||
ldrb r5, [r0, r3]
|
||||
.Lstore:
|
||||
strb r5, [r0], #1
|
||||
adds r4, r4, #1
|
||||
blt .Lcopy_loop
|
||||
b .Lloop
|
||||
|
||||
.Ldata:
|
||||
mov r5, #1
|
||||
|
||||
.Ldata_loop:
|
||||
bl upkr_decode_bit
|
||||
adc r5, r5, r5
|
||||
movs r4, r5, lsr #8
|
||||
beq .Ldata_loop
|
||||
b .Lstore
|
||||
|
||||
.type upkr_decode_length, %function
|
||||
upkr_decode_length:
|
||||
mov r12, lr
|
||||
|
||||
mov r4, #0
|
||||
mvn r6, #0
|
||||
.Lbit_loop:
|
||||
bl upkr_decode_bit_inc
|
||||
addcc r4, r4, r6
|
||||
movcc pc, r12
|
||||
|
||||
bl upkr_decode_bit_inc
|
||||
addcs r4, r4, r6
|
||||
mov r6, r6, lsl #1
|
||||
b .Lbit_loop
|
||||
|
||||
.type upkr_decode_bit, %function
|
||||
upkr_decode_bit_inc:
|
||||
add r5, r5, #1
|
||||
upkr_decode_bit:
|
||||
cmp r2, #4096
|
||||
ldrltb r8, [r1], #1
|
||||
orrlt r2, r8, r2, lsl#8
|
||||
blt upkr_decode_bit
|
||||
|
||||
ldrb r8, [sp, -r5]
|
||||
and r9, r2, #255
|
||||
add r9, r9, #1
|
||||
cmp r8, r9
|
||||
rsbcs r8, r8, #256
|
||||
mvn r9, r2, lsr#8
|
||||
addcs r9, r9, #1
|
||||
mla r2, r8, r9, r2
|
||||
add r9, r8, #8
|
||||
sub r8, r8, r9, lsr#4
|
||||
rsbcs r8, r8, #256
|
||||
strb r8, [sp, -r5]
|
||||
mov pc, r14
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
.section .text
|
||||
|
||||
#define FRAME_SIZE (256+32*4+4)
|
||||
|
||||
// x8 prob array ptr
|
||||
// x9 prev was literal
|
||||
// x10 out ptr
|
||||
// x11 in ptr
|
||||
// x12 offset
|
||||
// x13 state
|
||||
// x14 context index
|
||||
|
||||
.global upkr_unpack
|
||||
.type upkr_unpack, %function
|
||||
@@ -15,11 +13,11 @@ upkr_unpack:
|
||||
mv t4, ra
|
||||
mv x17, x8
|
||||
mv t6, x9
|
||||
li x13, FRAME_SIZE
|
||||
li x9, 128
|
||||
li x9, 256 + 128
|
||||
mv x13, x9
|
||||
1:
|
||||
addi sp, sp, -1
|
||||
sb x9, 0(sp)
|
||||
sub x8, sp, x13
|
||||
sb x9, 0(x8)
|
||||
addi x13, x13, -1
|
||||
bnez x13, 1b
|
||||
|
||||
@@ -35,7 +33,7 @@ upkr_unpack:
|
||||
|
||||
.Lfinished_offset:
|
||||
addi x14, x14, 64
|
||||
jal t3, upkr_decode_number
|
||||
jalr ra // jal upkr_decode_number
|
||||
1:
|
||||
add x14, x10, t0
|
||||
lbu x14, (x14)
|
||||
@@ -58,36 +56,14 @@ upkr_unpack:
|
||||
.Lread_offset_inc_x14:
|
||||
addi x14, x14, 1
|
||||
.Lread_offset:
|
||||
jal t3, upkr_decode_number
|
||||
jalr ra // jal upkr_decode_number
|
||||
addi t0, x9, 1
|
||||
bnez t0, .Lfinished_offset
|
||||
.Ldone:
|
||||
addi sp, sp, FRAME_SIZE
|
||||
mv x8, x17
|
||||
mv x9, t6
|
||||
jr t4
|
||||
|
||||
// x14 context index
|
||||
// return: x9 negtive decoded number
|
||||
upkr_decode_number:
|
||||
mv t5, x14
|
||||
li x9, 0
|
||||
li x8, -1
|
||||
1:
|
||||
jal upkr_decode_bit
|
||||
beqz x15, 1f
|
||||
jal upkr_decode_bit
|
||||
beqz x15, 2f
|
||||
add x9, x9, x8
|
||||
2:
|
||||
slli x8, x8, 1
|
||||
j 1b
|
||||
1:
|
||||
add x9, x9, x8
|
||||
|
||||
mv x14, t5
|
||||
jr t3
|
||||
|
||||
upkr_load_byte:
|
||||
lbu x15, 0(x11)
|
||||
addi x11, x11, 1
|
||||
@@ -104,39 +80,52 @@ upkr_decode_bit:
|
||||
srli x15, x13, 12
|
||||
beqz x15, upkr_load_byte
|
||||
|
||||
mv t1, x14
|
||||
mv t2, x10
|
||||
addi x14, x14, 1
|
||||
|
||||
add x14, x14, sp
|
||||
lbu x12, 0(x14)
|
||||
sub t2, sp, x14
|
||||
lbu x12, (t2)
|
||||
|
||||
andi x10, x13, 255
|
||||
sltu x15, x10, x12
|
||||
srli x13, x13, 8
|
||||
beqz x15, .Lelse
|
||||
andi x8, x13, 255
|
||||
sltu x15, x8, x12
|
||||
beqz x15, 1f
|
||||
xori x12, x12, 255
|
||||
addi x12, x12, 1
|
||||
1:
|
||||
srli x8, x13, 8
|
||||
addi x8, x8, 1
|
||||
sub x8, x8, x15
|
||||
mul x8, x8, x12
|
||||
sub x13, x13, x8
|
||||
|
||||
mul x13, x13, x12
|
||||
add x13, x13, x10
|
||||
li x10, 256 + 8
|
||||
sub x10, x10, x12
|
||||
srli x10, x10, 4
|
||||
add x12, x12, x10
|
||||
j .Lendif
|
||||
addi x8, x12, 8
|
||||
srli x8, x8, 4
|
||||
sub x12, x12, x8
|
||||
beqz x15, 1f
|
||||
sub x12, x0, x12
|
||||
1:
|
||||
|
||||
.Lelse:
|
||||
li x16, 256
|
||||
sub x16, x16, x12
|
||||
mul x13, x13, x16
|
||||
add x13, x13, x10
|
||||
sub x13, x13, x12
|
||||
addi x10, x12, 8
|
||||
srli x10, x10, 4
|
||||
sub x12, x12, x10
|
||||
|
||||
.Lendif:
|
||||
|
||||
sb x12, 0(x14)
|
||||
sb x12, (t2)
|
||||
|
||||
addi x14, t1, 1
|
||||
mv x10, t2
|
||||
ret
|
||||
jalr ra
|
||||
|
||||
// x14 context index
|
||||
// return: x9 negtive decoded number
|
||||
upkr_decode_number:
|
||||
mv t3, ra
|
||||
mv t5, x14
|
||||
li x9, 0
|
||||
li t1, -1
|
||||
1:
|
||||
jal upkr_decode_bit
|
||||
beqz x15, 1f
|
||||
jal upkr_decode_bit
|
||||
beqz x15, 2f
|
||||
add x9, x9, t1
|
||||
2:
|
||||
add t1, t1, t1
|
||||
j 1b
|
||||
1:
|
||||
add x9, x9, t1
|
||||
|
||||
mv x14, t5
|
||||
jr t3
|
||||
|
||||
4
release/.gitignore
vendored
Normal file
4
release/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
*.zip
|
||||
*.tgz
|
||||
upkr-linux/
|
||||
upkr-windows/
|
||||
35
release/Makefile
Normal file
35
release/Makefile
Normal file
@@ -0,0 +1,35 @@
|
||||
VERSION := $(shell cargo run --release -- --version)
|
||||
|
||||
all: clean upkr-linux-$(VERSION).tgz upkr-windows-$(VERSION).zip
|
||||
|
||||
clean:
|
||||
rm -rf upkr-linux
|
||||
rm -f upkr-linux*.tgz
|
||||
rm -rf upkr-windows
|
||||
rm -f upkr-windows*.zip
|
||||
|
||||
upkr-linux-$(VERSION).tgz: upkr-linux/upkr PHONY
|
||||
cp ../README.md upkr-linux
|
||||
cd .. && git archive HEAD c_unpacker | tar -xC release/upkr-linux
|
||||
cd .. && git archive HEAD z80_unpacker | tar -xC release/upkr-linux
|
||||
cd .. && git archive HEAD asm_unpackers | tar -xC release/upkr-linux
|
||||
tar czf $@ upkr-linux
|
||||
|
||||
upkr-windows-$(VERSION).zip: upkr-windows/upkr.exe PHONY
|
||||
cp ../README.md upkr-windows/
|
||||
cd .. && git archive HEAD c_unpacker | tar -xC release/upkr-windows
|
||||
cd .. && git archive HEAD z80_unpacker | tar -xC release/upkr-windows
|
||||
cd .. && git archive HEAD asm_unpackers | tar -xC release/upkr-windows
|
||||
zip -r -9 $@ upkr-windows
|
||||
|
||||
upkr-linux/upkr:
|
||||
cargo build --target x86_64-unknown-linux-musl --release
|
||||
mkdir -p upkr-linux
|
||||
cp ../target/x86_64-unknown-linux-musl/release/upkr upkr-linux/
|
||||
|
||||
upkr-windows/upkr.exe:
|
||||
cargo build --target x86_64-pc-windows-gnu --release
|
||||
mkdir -p upkr-windows
|
||||
cp ../target/x86_64-pc-windows-gnu/release/upkr.exe upkr-windows/
|
||||
|
||||
PHONY:
|
||||
15
src/main.rs
15
src/main.rs
@@ -49,12 +49,22 @@ fn main() -> Result<()> {
|
||||
config.is_match_bit = false;
|
||||
config.new_offset_bit = false;
|
||||
}
|
||||
Long("x86b") => {
|
||||
config.use_bitstream = true;
|
||||
config.continue_value_bit = false;
|
||||
config.no_repeated_offsets = true;
|
||||
level = 9;
|
||||
}
|
||||
|
||||
Short('u') | Long("unpack") => unpack = true,
|
||||
Long("margin") => calculate_margin = true,
|
||||
Short('l') | Long("level") => level = parser.value()?.parse()?,
|
||||
Short(n) if n.is_ascii_digit() => level = n as u8 - b'0',
|
||||
Short('h') | Long("help") => print_help(0),
|
||||
Long("version") => {
|
||||
println!("{}", env!("CARGO_PKG_VERSION"));
|
||||
process::exit(0);
|
||||
}
|
||||
Long("max-unpacked-size") => max_unpacked_size = parser.value()?.parse()?,
|
||||
Value(val) if infile.is_none() => infile = Some(val.try_into()?),
|
||||
Value(val) if outfile.is_none() => outfile = Some(val.try_into()?),
|
||||
@@ -149,11 +159,16 @@ fn print_help(exit_code: i32) -> ! {
|
||||
eprintln!(" -u, --unpack unpack infile");
|
||||
eprintln!(" --margin calculate margin for overlapped unpacking of a packed file");
|
||||
eprintln!();
|
||||
eprintln!("Version: {}", env!("CARGO_PKG_VERSION"));
|
||||
eprintln!();
|
||||
eprintln!("Config presets for specific unpackers:");
|
||||
eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update -9");
|
||||
eprintln!(
|
||||
" --x86 --bitstream --invert-is-match-bit --invert-continue-value-bit --invert-new-offset-bit"
|
||||
);
|
||||
eprintln!(
|
||||
" --x86b --bitstream --invert-continue-value-bit --no-repeated-offsets -9"
|
||||
);
|
||||
eprintln!();
|
||||
eprintln!("Config options (need to match when packing/unpacking):");
|
||||
eprintln!(" -b, --bitstream bitstream mode");
|
||||
|
||||
Reference in New Issue
Block a user