2 Commits

9 changed files with 135 additions and 171 deletions

2
Cargo.lock generated
View File

@@ -172,7 +172,7 @@ checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
[[package]]
name = "upkr"
version = "0.2.0"
version = "0.2.0-pre3"
dependencies = [
"anyhow",
"cdivsufsort",

View File

@@ -1,6 +1,6 @@
[package]
name = "upkr"
version = "0.2.0"
version = "0.2.0-pre3"
edition = "2021"
[profile.release]

View File

@@ -2,57 +2,13 @@
Upkr is a simple general purpose lz packer designed to be used in the [MicroW8](https://github.com/exoticorn/microw8) platform.
The compressed format is losely based on [Shrinkler](https://github.com/askeksa/Shrinkler) with the main difference being that
Upkr doesn't differentiate between literals at odd or even addresses (by default) and that I went with rANS/rABS instead of a range coder.
Upkr doesn't differnetiate between literals at odd or even addresses and that I went with rANS/rABS instead of a range coder.
Compression rate is on par with Shrinkler.
The differences compare to Shrinkler also makes it interesting on 8bit platforms. The z80 unpacker included in the release
is both about twice as fast and smaller than the Shrinkler unpacker.
At this point, Upkr should still be considered unstable - the compressed format is not very likely to change but I still want
to keep that option open a little longer.
## Inspirations:
* Ferris' blog about his [C64 intro packer](https://yupferris.github.io/blog/2020/08/31/c64-4k-intro-packer-deep-dive.html)
* [Shrinkler](https://github.com/askeksa/Shrinkler)
* Ryg's [sample rANS implementation](https://github.com/rygorous/ryg_rans)
## Unpackers
The release includes a reference c unpacker, as well as some optimized asm unpackers (arm and riscv). The unpckers in
c_unpacker and asm_unpackers unpack the default upkr compressed format. The z80_unpacker
is based on some variations to the compressed format. (Use `upkr --z80` to select those variations.)
An optimized x86 (DOS) unpacker is currently being worked on out of tree.
## Usage
```
upkr [-l level(0-9)] [config options] <infile> [<outfile>]
upkr -u [config options] <infile> [<outfile>]
upkr --margin [config options] <infile>
-l, --level N compression level 0-9
-0, ..., -9 short form for setting compression level
-u, --unpack unpack infile
--margin calculate margin for overlapped unpacking of a packed file
Config presets for specific unpackers:
--z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update -9
--x86 --bitstream --invert-is-match-bit --invert-continue-value-bit --invert-new-offset-bit
--x86b --bitstream --invert-continue-value-bit --no-repeated-offsets -9
Config options (need to match when packing/unpacking):
-b, --bitstream bitstream mode
-p, --parity N use N (2/4) parity contexts
-r, --reverse reverse input & output
Config options to tailor output to specific optimized unpackers:
--invert-is-match-bit
--invert-new-offset-bit
--invert-continue-value-bit
--invert-bit-encoding
--simplified-prob-update
--big-endian-bitstream (implies --bitstream)
--no-repeated-offsets
--eof-in-length
--max-offset N
--max-length N
```
* Ryg's [sample rANS implementation](https://github.com/rygorous/ryg_rans)

4
release/.gitignore vendored
View File

@@ -1,4 +0,0 @@
*.zip
*.tgz
upkr-linux/
upkr-windows/

View File

@@ -1,35 +0,0 @@
VERSION := $(shell cargo run --release -- --version)
all: clean upkr-linux-$(VERSION).tgz upkr-windows-$(VERSION).zip
clean:
rm -rf upkr-linux
rm -f upkr-linux*.tgz
rm -rf upkr-windows
rm -f upkr-windows*.zip
upkr-linux-$(VERSION).tgz: upkr-linux/upkr PHONY
cp ../README.md upkr-linux
cd .. && git archive HEAD c_unpacker | tar -xC release/upkr-linux
cd .. && git archive HEAD z80_unpacker | tar -xC release/upkr-linux
cd .. && git archive HEAD asm_unpackers | tar -xC release/upkr-linux
tar czf $@ upkr-linux
upkr-windows-$(VERSION).zip: upkr-windows/upkr.exe PHONY
cp ../README.md upkr-windows/
cd .. && git archive HEAD c_unpacker | tar -xC release/upkr-windows
cd .. && git archive HEAD z80_unpacker | tar -xC release/upkr-windows
cd .. && git archive HEAD asm_unpackers | tar -xC release/upkr-windows
zip -r -9 $@ upkr-windows
upkr-linux/upkr:
cargo build --target x86_64-unknown-linux-musl --release
mkdir -p upkr-linux
cp ../target/x86_64-unknown-linux-musl/release/upkr upkr-linux/
upkr-windows/upkr.exe:
cargo build --target x86_64-pc-windows-gnu --release
mkdir -p upkr-windows
cp ../target/x86_64-pc-windows-gnu/release/upkr.exe upkr-windows/
PHONY:

View File

@@ -12,6 +12,8 @@ pub fn pack(
let mut rans_coder = RansCoder::new(config);
let mut state = lz::CoderState::new(config);
let mut literal = vec![];
let mut pos = 0;
while pos < data.len() {
if let Some(ref mut cb) = progress_callback {
@@ -22,6 +24,10 @@ pub fn pack(
let max_offset = config.max_offset.min(1 << (m.length * 3 - 1).min(31));
let offset = pos - m.pos;
if offset < max_offset && m.length >= config.min_length() {
if !literal.is_empty() {
lz::Op::Literal(literal).encode(&mut rans_coder, &mut state, config);
literal = vec![];
}
let length = m.length.min(config.max_length);
lz::Op::Match {
offset: offset as u32,
@@ -43,6 +49,10 @@ pub fn pack(
.count()
.min(config.max_length);
if length >= config.min_length() {
if !literal.is_empty() {
lz::Op::Literal(literal).encode(&mut rans_coder, &mut state, config);
literal = vec![];
}
lz::Op::Match {
offset: offset as u32,
len: length as u32,
@@ -55,11 +65,14 @@ pub fn pack(
}
if !encoded_match {
lz::Op::Literal(data[pos]).encode(&mut rans_coder, &mut state, config);
literal.push(data[pos]);
pos += 1;
}
}
if !literal.is_empty() {
lz::Op::Literal(literal).encode(&mut rans_coder, &mut state, config);
}
lz::encode_eof(&mut rans_coder, &mut state, config);
rans_coder.finish()
}

167
src/lz.rs
View File

@@ -3,36 +3,53 @@ use crate::rans::{EntropyCoder, RansDecoder};
use crate::Config;
use thiserror::Error;
#[derive(Copy, Clone, Debug)]
#[derive(Clone, Debug)]
pub enum Op {
Literal(u8),
Literal(Vec<u8>),
Match { offset: u32, len: u32 },
}
impl Op {
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
let literal_base = state.pos % state.parity_contexts * 256;
match self {
&Op::Literal(lit) => {
encode_bit(coder, state, literal_base, !config.is_match_bit);
let mut context_index = 1;
for i in (0..8).rev() {
let bit = (lit >> i) & 1 != 0;
encode_bit(coder, state, literal_base + context_index, bit);
context_index = (context_index << 1) | bit as usize;
&Op::Literal(ref lit) => {
assert!(state.prev_was_match);
encode_length(
coder,
state,
256 + state.pos % state.parity_contexts * 320,
lit.len() as u32 + 1,
config,
);
for lit in lit {
let literal_base = state.pos % state.parity_contexts * 320;
let mut context_index = 1;
for i in (0..8).rev() {
let bit = (lit >> i) & 1 != 0;
encode_bit(coder, state, literal_base + context_index, bit);
context_index = (context_index << 1) | bit as usize;
}
state.pos += 1;
}
state.prev_was_match = false;
state.pos += 1;
}
&Op::Match { offset, len } => {
encode_bit(coder, state, literal_base, config.is_match_bit);
if state.prev_was_match {
encode_length(
coder,
state,
256 + state.pos % state.parity_contexts * 320,
1,
config,
);
}
let mut new_offset = true;
if !state.prev_was_match && !config.no_repeated_offsets {
new_offset = offset != state.last_offset;
encode_bit(
coder,
state,
256 * state.parity_contexts,
320 * state.parity_contexts,
new_offset == config.new_offset_bit,
);
}
@@ -41,14 +58,14 @@ impl Op {
encode_length(
coder,
state,
256 * state.parity_contexts + 1,
320 * state.parity_contexts + 1,
offset + if config.eof_in_length { 0 } else { 1 },
config,
);
state.last_offset = offset;
}
assert!(len as usize >= config.min_length() && len as usize <= config.max_length);
encode_length(coder, state, 256 * state.parity_contexts + 65, len, config);
encode_length(coder, state, 320 * state.parity_contexts + 65, len, config);
state.prev_was_match = true;
state.pos += len as usize;
}
@@ -57,25 +74,28 @@ impl Op {
}
pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
encode_bit(
coder,
state,
state.pos % state.parity_contexts * 256,
config.is_match_bit,
);
if state.prev_was_match {
encode_length(
coder,
state,
256 + state.pos % state.parity_contexts * 320,
1,
config,
);
}
if !state.prev_was_match && !config.no_repeated_offsets {
encode_bit(
coder,
state,
256 * state.parity_contexts,
320 * state.parity_contexts,
config.new_offset_bit ^ config.eof_in_length,
);
}
if !config.eof_in_length || state.prev_was_match || config.no_repeated_offsets {
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
encode_length(coder, state, 320 * state.parity_contexts + 1, 1, config);
}
if config.eof_in_length {
encode_length(coder, state, 256 * state.parity_contexts + 65, 1, config);
encode_length(coder, state, 320 * state.parity_contexts + 65, 1, config);
}
}
@@ -119,9 +139,9 @@ pub struct CoderState {
impl CoderState {
pub fn new(config: &Config) -> CoderState {
CoderState {
contexts: ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, config),
contexts: ContextState::new((64 + 256) * config.parity_contexts + 1 + 64 + 64, config),
last_offset: 0,
prev_was_match: false,
prev_was_match: true,
pos: 0,
parity_contexts: config.parity_contexts,
}
@@ -168,7 +188,8 @@ pub fn unpack_internal(
max_size: usize,
) -> Result<isize, UnpackError> {
let mut decoder = RansDecoder::new(packed_data, &config);
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
let mut contexts =
ContextState::new((64 + 256) * config.parity_contexts + 1 + 64 + 64, &config);
let mut offset = usize::MAX;
let mut position = 0usize;
let mut prev_was_match = false;
@@ -199,50 +220,14 @@ pub fn unpack_internal(
loop {
margin = margin.max(position as isize - decoder.pos() as isize);
let literal_base = position % config.parity_contexts * 256;
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))?
== config.is_match_bit
{
if config.no_repeated_offsets
|| prev_was_match
|| decoder
.decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))?
== config.new_offset_bit
{
offset = decode_length(
&mut decoder,
&mut contexts,
256 * config.parity_contexts + 1,
&config,
)? - if config.eof_in_length { 0 } else { 1 };
if offset == 0 {
break;
}
}
let length = decode_length(
&mut decoder,
&mut contexts,
256 * config.parity_contexts + 65,
&config,
)?;
if config.eof_in_length && length == 1 {
break;
}
if offset > position {
return Err(UnpackError::OffsetOutOfRange { offset, position });
}
if let Some(ref mut result) = result {
for _ in 0..length {
if result.len() < max_size {
result.push(result[result.len() - offset]);
} else {
break;
}
}
}
position += length;
prev_was_match = true;
} else {
let literal_length = decode_length(
&mut decoder,
&mut contexts,
256 + position % config.parity_contexts * 320,
config,
)? - 1;
for _ in 0..literal_length {
let literal_base = position % config.parity_contexts * 320;
let mut context_index = 1;
let mut byte = 0;
for i in (0..8).rev() {
@@ -259,6 +244,46 @@ pub fn unpack_internal(
position += 1;
prev_was_match = false;
}
if config.no_repeated_offsets
|| prev_was_match
|| decoder
.decode_with_context(&mut contexts.context_mut(320 * config.parity_contexts))?
== config.new_offset_bit
{
offset = decode_length(
&mut decoder,
&mut contexts,
320 * config.parity_contexts + 1,
&config,
)? - if config.eof_in_length { 0 } else { 1 };
if offset == 0 {
break;
}
}
let length = decode_length(
&mut decoder,
&mut contexts,
320 * config.parity_contexts + 65,
&config,
)?;
if config.eof_in_length && length == 1 {
break;
}
if offset > position {
return Err(UnpackError::OffsetOutOfRange { offset, position });
}
if let Some(ref mut result) = result {
for _ in 0..length {
if result.len() < max_size {
result.push(result[result.len() - offset]);
} else {
break;
}
}
}
position += length;
prev_was_match = true;
}
if position > max_size {

View File

@@ -61,10 +61,6 @@ fn main() -> Result<()> {
Short('l') | Long("level") => level = parser.value()?.parse()?,
Short(n) if n.is_ascii_digit() => level = n as u8 - b'0',
Short('h') | Long("help") => print_help(0),
Long("version") => {
println!("{}", env!("CARGO_PKG_VERSION"));
process::exit(0);
}
Long("max-unpacked-size") => max_unpacked_size = parser.value()?.parse()?,
Value(val) if infile.is_none() => infile = Some(val.try_into()?),
Value(val) if outfile.is_none() => outfile = Some(val.try_into()?),
@@ -159,8 +155,6 @@ fn print_help(exit_code: i32) -> ! {
eprintln!(" -u, --unpack unpack infile");
eprintln!(" --margin calculate margin for overlapped unpacking of a packed file");
eprintln!();
eprintln!("Version: {}", env!("CARGO_PKG_VERSION"));
eprintln!();
eprintln!("Config presets for specific unpackers:");
eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update -9");
eprintln!(

View File

@@ -15,7 +15,7 @@ pub fn pack(
let mut parse = parse(data, Config::from_level(level), config, progress_cb);
let mut ops = vec![];
while let Some(link) = parse {
ops.push(link.op);
ops.push(link.op.clone());
parse = link.prev.clone();
}
let mut state = lz::CoderState::new(config);
@@ -32,9 +32,15 @@ struct Parse {
op: lz::Op,
}
struct LiteralPrefix {
arrival: Arrival,
prefix: Vec<u8>,
}
struct Arrival {
parse: Option<Rc<Parse>>,
state: lz::CoderState,
literal_prefix: Option<Box<LiteralPrefix>>,
cost: f64,
}
@@ -130,6 +136,7 @@ fn parse(
op,
})),
state,
literal_prefix: None,
cost: arrival.cost + cost_counter.cost(),
},
max_arrivals,
@@ -141,6 +148,7 @@ fn parse(
Arrival {
parse: None,
state: lz::CoderState::new(encoding_config),
literal_prefix: None,
cost: 0.0,
},
max_arrivals,
@@ -252,19 +260,26 @@ fn parse(
}
cost_counter.reset();
let mut state = arrival.state;
let op = lz::Op::Literal(data[pos]);
let (arrival, mut prefix) = if let Some(prefix) = arrival.literal_prefix {
(prefix.arrival, prefix.prefix)
} else {
(arrival, vec![])
};
let mut state = arrival.state.clone();
prefix.push(data[pos]);
let op = lz::Op::Literal(prefix.clone());
op.encode(cost_counter, &mut state, encoding_config);
add_arrival(
&mut arrivals,
pos + 1,
Arrival {
parse: Some(Rc::new(Parse {
prev: arrival.parse,
prev: arrival.parse.clone(),
op,
})),
state,
cost: arrival.cost + cost_counter.cost(),
literal_prefix: Some(Box::new(LiteralPrefix { arrival, prefix })),
},
max_arrivals,
);