mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 19:46:42 +01:00
Compare commits
4 Commits
v0.2.0-pre
...
8715dede0e
| Author | SHA1 | Date | |
|---|---|---|---|
| 8715dede0e | |||
| b12c8f8d93 | |||
| af5fe898bf | |||
| 331857a711 |
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -117,7 +117,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "upkr"
|
||||
version = "0.2.0-pre1"
|
||||
version = "0.2.0-pre2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"cdivsufsort",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "upkr"
|
||||
version = "0.2.0-pre1"
|
||||
version = "0.2.0-pre2"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
@@ -21,7 +21,7 @@ pub fn pack(
|
||||
if let Some(m) = match_finder.matches(pos).next() {
|
||||
let max_offset = 1 << (m.length * 3 - 1).min(31);
|
||||
let offset = pos - m.pos;
|
||||
if offset < max_offset {
|
||||
if offset < max_offset && m.length >= config.min_length() {
|
||||
lz::Op::Match {
|
||||
offset: offset as u32,
|
||||
len: m.length as u32,
|
||||
@@ -40,7 +40,7 @@ pub fn pack(
|
||||
.zip(data[(pos - offset)..].iter())
|
||||
.take_while(|(a, b)| a == b)
|
||||
.count();
|
||||
if length > 0 {
|
||||
if length >= config.min_length() {
|
||||
lz::Op::Match {
|
||||
offset: offset as u32,
|
||||
len: length as u32,
|
||||
|
||||
18
src/lib.rs
18
src/lib.rs
@@ -5,7 +5,7 @@ mod match_finder;
|
||||
mod parsing_packer;
|
||||
mod rans;
|
||||
|
||||
pub use lz::unpack;
|
||||
pub use lz::{calculate_margin, unpack};
|
||||
|
||||
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
|
||||
|
||||
@@ -20,6 +20,9 @@ pub struct Config {
|
||||
|
||||
pub bitstream_is_big_endian: bool,
|
||||
pub simplified_prob_update: bool,
|
||||
|
||||
pub no_repeated_offsets: bool,
|
||||
pub eof_in_length: bool,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
@@ -35,6 +38,19 @@ impl Default for Config {
|
||||
|
||||
bitstream_is_big_endian: false,
|
||||
simplified_prob_update: false,
|
||||
|
||||
no_repeated_offsets: false,
|
||||
eof_in_length: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
pub fn min_length(&self) -> usize {
|
||||
if self.eof_in_length {
|
||||
2
|
||||
} else {
|
||||
1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
64
src/lz.rs
64
src/lz.rs
@@ -25,7 +25,7 @@ impl Op {
|
||||
}
|
||||
&Op::Match { offset, len } => {
|
||||
encode_bit(coder, state, literal_base, config.is_match_bit);
|
||||
if !state.prev_was_match {
|
||||
if !state.prev_was_match && !config.no_repeated_offsets {
|
||||
encode_bit(
|
||||
coder,
|
||||
state,
|
||||
@@ -33,18 +33,19 @@ impl Op {
|
||||
(offset != state.last_offset) == config.new_offset_bit,
|
||||
);
|
||||
} else {
|
||||
assert!(offset != state.last_offset);
|
||||
assert!(offset != state.last_offset || config.no_repeated_offsets);
|
||||
}
|
||||
if offset != state.last_offset {
|
||||
if offset != state.last_offset || config.no_repeated_offsets {
|
||||
encode_length(
|
||||
coder,
|
||||
state,
|
||||
256 * state.parity_contexts + 1,
|
||||
offset + 1,
|
||||
offset + if config.eof_in_length { 0 } else { 1 },
|
||||
config,
|
||||
);
|
||||
state.last_offset = offset;
|
||||
}
|
||||
assert!(!config.eof_in_length || len > 1);
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 65, len, config);
|
||||
state.prev_was_match = true;
|
||||
state.pos += len as usize;
|
||||
@@ -60,15 +61,20 @@ pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config:
|
||||
state.pos % state.parity_contexts * 256,
|
||||
config.is_match_bit,
|
||||
);
|
||||
if !state.prev_was_match {
|
||||
if !state.prev_was_match && !config.no_repeated_offsets {
|
||||
encode_bit(
|
||||
coder,
|
||||
state,
|
||||
256 * state.parity_contexts,
|
||||
config.new_offset_bit,
|
||||
config.new_offset_bit ^ config.eof_in_length,
|
||||
);
|
||||
}
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
|
||||
if !config.eof_in_length || config.no_repeated_offsets {
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
|
||||
}
|
||||
if config.eof_in_length {
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 65, 1, config);
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_bit(
|
||||
@@ -124,12 +130,27 @@ impl CoderState {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
|
||||
pub fn unpack(packed_data: &[u8], config: &Config) -> Vec<u8> {
|
||||
let mut result = vec![];
|
||||
let _ = unpack_internal(Some(&mut result), packed_data, config);
|
||||
result
|
||||
}
|
||||
|
||||
pub fn calculate_margin(packed_data: &[u8], config: &Config) -> isize {
|
||||
unpack_internal(None, packed_data, config)
|
||||
}
|
||||
|
||||
pub fn unpack_internal(
|
||||
mut result: Option<&mut Vec<u8>>,
|
||||
packed_data: &[u8],
|
||||
config: &Config,
|
||||
) -> isize {
|
||||
let mut decoder = RansDecoder::new(packed_data, &config);
|
||||
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
|
||||
let mut result = vec![];
|
||||
let mut offset = 0;
|
||||
let mut position = 0usize;
|
||||
let mut prev_was_match = false;
|
||||
let mut margin = 0isize;
|
||||
|
||||
fn decode_length(
|
||||
decoder: &mut RansDecoder,
|
||||
@@ -152,11 +173,13 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
|
||||
}
|
||||
|
||||
loop {
|
||||
let literal_base = result.len() % config.parity_contexts * 256;
|
||||
margin = margin.max(position as isize - decoder.pos() as isize);
|
||||
let literal_base = position % config.parity_contexts * 256;
|
||||
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))
|
||||
== config.is_match_bit
|
||||
{
|
||||
if prev_was_match
|
||||
if config.no_repeated_offsets
|
||||
|| prev_was_match
|
||||
|| decoder
|
||||
.decode_with_context(&mut contexts.context_mut(256 * config.parity_contexts))
|
||||
== config.new_offset_bit
|
||||
@@ -166,7 +189,7 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
|
||||
&mut contexts,
|
||||
256 * config.parity_contexts + 1,
|
||||
&config,
|
||||
) - 1;
|
||||
) - if config.eof_in_length { 0 } else { 1 };
|
||||
if offset == 0 {
|
||||
break;
|
||||
}
|
||||
@@ -177,9 +200,15 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
|
||||
256 * config.parity_contexts + 65,
|
||||
&config,
|
||||
);
|
||||
for _ in 0..length {
|
||||
result.push(result[result.len() - offset]);
|
||||
if config.eof_in_length && length == 1 {
|
||||
break;
|
||||
}
|
||||
if let Some(ref mut result) = result {
|
||||
for _ in 0..length {
|
||||
result.push(result[result.len() - offset]);
|
||||
}
|
||||
}
|
||||
position += length;
|
||||
prev_was_match = true;
|
||||
} else {
|
||||
let mut context_index = 1;
|
||||
@@ -190,10 +219,13 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
|
||||
context_index = (context_index << 1) | bit as usize;
|
||||
byte |= (bit as u8) << i;
|
||||
}
|
||||
result.push(byte);
|
||||
if let Some(ref mut result) = result {
|
||||
result.push(byte);
|
||||
}
|
||||
position += 1;
|
||||
prev_was_match = false;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
margin + decoder.pos() as isize - position as isize
|
||||
}
|
||||
|
||||
23
src/main.rs
23
src/main.rs
@@ -8,6 +8,7 @@ fn main() -> Result<()> {
|
||||
let mut config = upkr::Config::default();
|
||||
let mut reverse = false;
|
||||
let mut unpack = false;
|
||||
let mut calculate_margin = false;
|
||||
let mut level = 2;
|
||||
let mut infile: Option<PathBuf> = None;
|
||||
let mut outfile: Option<PathBuf> = None;
|
||||
@@ -28,12 +29,15 @@ fn main() -> Result<()> {
|
||||
config.use_bitstream = true;
|
||||
config.bitstream_is_big_endian = true;
|
||||
}
|
||||
Long("no-repeated-offsets") => config.no_repeated_offsets = true,
|
||||
Long("eof-in-length") => config.eof_in_length = true,
|
||||
|
||||
Long("z80") => {
|
||||
config.use_bitstream = true;
|
||||
config.bitstream_is_big_endian = true;
|
||||
config.invert_bit_encoding = true;
|
||||
config.simplified_prob_update = true;
|
||||
level = 9;
|
||||
}
|
||||
Long("x86") => {
|
||||
config.use_bitstream = true;
|
||||
@@ -42,6 +46,7 @@ fn main() -> Result<()> {
|
||||
}
|
||||
|
||||
Short('u') | Long("unpack") => unpack = true,
|
||||
Long("margin") => calculate_margin = true,
|
||||
Short('l') | Long("level") => level = parser.value()?.parse()?,
|
||||
Short('h') | Long("help") => print_help(0),
|
||||
Value(val) if infile.is_none() => infile = Some(val.try_into()?),
|
||||
@@ -75,7 +80,7 @@ fn main() -> Result<()> {
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
if !unpack {
|
||||
if !unpack && !calculate_margin {
|
||||
let mut data = vec![];
|
||||
File::open(infile)?.read_to_end(&mut data)?;
|
||||
if reverse {
|
||||
@@ -111,11 +116,16 @@ fn main() -> Result<()> {
|
||||
if reverse {
|
||||
data.reverse();
|
||||
}
|
||||
let mut unpacked_data = upkr::unpack(&data, config);
|
||||
if reverse {
|
||||
unpacked_data.reverse();
|
||||
if unpack {
|
||||
let mut unpacked_data = upkr::unpack(&data, &config);
|
||||
if reverse {
|
||||
unpacked_data.reverse();
|
||||
}
|
||||
File::create(outfile)?.write_all(&unpacked_data)?;
|
||||
}
|
||||
if calculate_margin {
|
||||
println!("{}", upkr::calculate_margin(&data, &config));
|
||||
}
|
||||
File::create(outfile)?.write_all(&unpacked_data)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -125,9 +135,11 @@ fn print_help(exit_code: i32) -> ! {
|
||||
eprintln!("Usage:");
|
||||
eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]");
|
||||
eprintln!(" upkr -u [config options] <infile> [<outfile>]");
|
||||
eprintln!(" upkr --margin [config options] <infile>");
|
||||
eprintln!();
|
||||
eprintln!(" -l, --level N compression level 0-9");
|
||||
eprintln!(" -u, --unpack unpack infile");
|
||||
eprintln!(" --margin calculate margin for overlapped unpacking of a packed file");
|
||||
eprintln!();
|
||||
eprintln!("Config presets for specific unpackers:");
|
||||
eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update");
|
||||
@@ -145,5 +157,6 @@ fn print_help(exit_code: i32) -> ! {
|
||||
eprintln!(" --invert-bit-encoding");
|
||||
eprintln!(" --simplified-prob-update");
|
||||
eprintln!(" --big-endian-bitstream (implies --bitstream)");
|
||||
eprintln!(" --no-repeated-offsets");
|
||||
process::exit(exit_code);
|
||||
}
|
||||
|
||||
@@ -110,6 +110,9 @@ fn parse(
|
||||
max_arrivals: usize,
|
||||
config: &crate::Config,
|
||||
) {
|
||||
if length < config.min_length() {
|
||||
return;
|
||||
}
|
||||
cost_counter.reset();
|
||||
let mut state = arrival.state.clone();
|
||||
let op = lz::Op::Match {
|
||||
|
||||
10
src/rans.rs
10
src/rans.rs
@@ -149,6 +149,7 @@ impl EntropyCoder for CostCounter {
|
||||
|
||||
pub struct RansDecoder<'a> {
|
||||
data: &'a [u8],
|
||||
pos: usize,
|
||||
state: u32,
|
||||
use_bitstream: bool,
|
||||
byte: u8,
|
||||
@@ -163,6 +164,7 @@ impl<'a> RansDecoder<'a> {
|
||||
pub fn new(data: &'a [u8], config: &Config) -> RansDecoder<'a> {
|
||||
RansDecoder {
|
||||
data,
|
||||
pos: 0,
|
||||
state: 0,
|
||||
use_bitstream: config.use_bitstream,
|
||||
byte: 0,
|
||||
@@ -172,6 +174,10 @@ impl<'a> RansDecoder<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pos(&self) -> usize {
|
||||
self.pos
|
||||
}
|
||||
|
||||
pub fn decode_with_context(&mut self, context: &mut Context) -> bool {
|
||||
let bit = self.decode_bit(context.prob());
|
||||
context.update(bit);
|
||||
@@ -183,8 +189,8 @@ impl<'a> RansDecoder<'a> {
|
||||
if self.use_bitstream {
|
||||
while self.state < 32768 {
|
||||
if self.bits_left == 0 {
|
||||
self.byte = self.data[0];
|
||||
self.data = &self.data[1..];
|
||||
self.byte = self.data[self.pos];
|
||||
self.pos += 1;
|
||||
self.bits_left = 8;
|
||||
}
|
||||
if self.bitstream_is_big_endian {
|
||||
|
||||
Reference in New Issue
Block a user