mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 19:46:42 +01:00
Compare commits
2 Commits
v0.2.0-pre
...
8715dede0e
| Author | SHA1 | Date | |
|---|---|---|---|
| 8715dede0e | |||
| b12c8f8d93 |
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -117,7 +117,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "upkr"
|
||||
version = "0.2.0-pre1"
|
||||
version = "0.2.0-pre2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"cdivsufsort",
|
||||
|
||||
@@ -21,7 +21,7 @@ pub fn pack(
|
||||
if let Some(m) = match_finder.matches(pos).next() {
|
||||
let max_offset = 1 << (m.length * 3 - 1).min(31);
|
||||
let offset = pos - m.pos;
|
||||
if offset < max_offset {
|
||||
if offset < max_offset && m.length >= config.min_length() {
|
||||
lz::Op::Match {
|
||||
offset: offset as u32,
|
||||
len: m.length as u32,
|
||||
@@ -40,7 +40,7 @@ pub fn pack(
|
||||
.zip(data[(pos - offset)..].iter())
|
||||
.take_while(|(a, b)| a == b)
|
||||
.count();
|
||||
if length > 0 {
|
||||
if length >= config.min_length() {
|
||||
lz::Op::Match {
|
||||
offset: offset as u32,
|
||||
len: length as u32,
|
||||
|
||||
14
src/lib.rs
14
src/lib.rs
@@ -5,7 +5,7 @@ mod match_finder;
|
||||
mod parsing_packer;
|
||||
mod rans;
|
||||
|
||||
pub use lz::unpack;
|
||||
pub use lz::{calculate_margin, unpack};
|
||||
|
||||
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
|
||||
|
||||
@@ -22,6 +22,7 @@ pub struct Config {
|
||||
pub simplified_prob_update: bool,
|
||||
|
||||
pub no_repeated_offsets: bool,
|
||||
pub eof_in_length: bool,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
@@ -39,6 +40,17 @@ impl Default for Config {
|
||||
simplified_prob_update: false,
|
||||
|
||||
no_repeated_offsets: false,
|
||||
eof_in_length: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
pub fn min_length(&self) -> usize {
|
||||
if self.eof_in_length {
|
||||
2
|
||||
} else {
|
||||
1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
55
src/lz.rs
55
src/lz.rs
@@ -40,11 +40,12 @@ impl Op {
|
||||
coder,
|
||||
state,
|
||||
256 * state.parity_contexts + 1,
|
||||
offset + 1,
|
||||
offset + if config.eof_in_length { 0 } else { 1 },
|
||||
config,
|
||||
);
|
||||
state.last_offset = offset;
|
||||
}
|
||||
assert!(!config.eof_in_length || len > 1);
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 65, len, config);
|
||||
state.prev_was_match = true;
|
||||
state.pos += len as usize;
|
||||
@@ -60,15 +61,20 @@ pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config:
|
||||
state.pos % state.parity_contexts * 256,
|
||||
config.is_match_bit,
|
||||
);
|
||||
if !state.prev_was_match {
|
||||
if !state.prev_was_match && !config.no_repeated_offsets {
|
||||
encode_bit(
|
||||
coder,
|
||||
state,
|
||||
256 * state.parity_contexts,
|
||||
config.new_offset_bit,
|
||||
config.new_offset_bit ^ config.eof_in_length,
|
||||
);
|
||||
}
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
|
||||
if !config.eof_in_length || config.no_repeated_offsets {
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
|
||||
}
|
||||
if config.eof_in_length {
|
||||
encode_length(coder, state, 256 * state.parity_contexts + 65, 1, config);
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_bit(
|
||||
@@ -124,12 +130,27 @@ impl CoderState {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
|
||||
pub fn unpack(packed_data: &[u8], config: &Config) -> Vec<u8> {
|
||||
let mut result = vec![];
|
||||
let _ = unpack_internal(Some(&mut result), packed_data, config);
|
||||
result
|
||||
}
|
||||
|
||||
pub fn calculate_margin(packed_data: &[u8], config: &Config) -> isize {
|
||||
unpack_internal(None, packed_data, config)
|
||||
}
|
||||
|
||||
pub fn unpack_internal(
|
||||
mut result: Option<&mut Vec<u8>>,
|
||||
packed_data: &[u8],
|
||||
config: &Config,
|
||||
) -> isize {
|
||||
let mut decoder = RansDecoder::new(packed_data, &config);
|
||||
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
|
||||
let mut result = vec![];
|
||||
let mut offset = 0;
|
||||
let mut position = 0usize;
|
||||
let mut prev_was_match = false;
|
||||
let mut margin = 0isize;
|
||||
|
||||
fn decode_length(
|
||||
decoder: &mut RansDecoder,
|
||||
@@ -152,7 +173,8 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
|
||||
}
|
||||
|
||||
loop {
|
||||
let literal_base = result.len() % config.parity_contexts * 256;
|
||||
margin = margin.max(position as isize - decoder.pos() as isize);
|
||||
let literal_base = position % config.parity_contexts * 256;
|
||||
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))
|
||||
== config.is_match_bit
|
||||
{
|
||||
@@ -167,7 +189,7 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
|
||||
&mut contexts,
|
||||
256 * config.parity_contexts + 1,
|
||||
&config,
|
||||
) - 1;
|
||||
) - if config.eof_in_length { 0 } else { 1 };
|
||||
if offset == 0 {
|
||||
break;
|
||||
}
|
||||
@@ -178,9 +200,15 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
|
||||
256 * config.parity_contexts + 65,
|
||||
&config,
|
||||
);
|
||||
for _ in 0..length {
|
||||
result.push(result[result.len() - offset]);
|
||||
if config.eof_in_length && length == 1 {
|
||||
break;
|
||||
}
|
||||
if let Some(ref mut result) = result {
|
||||
for _ in 0..length {
|
||||
result.push(result[result.len() - offset]);
|
||||
}
|
||||
}
|
||||
position += length;
|
||||
prev_was_match = true;
|
||||
} else {
|
||||
let mut context_index = 1;
|
||||
@@ -191,10 +219,13 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
|
||||
context_index = (context_index << 1) | bit as usize;
|
||||
byte |= (bit as u8) << i;
|
||||
}
|
||||
result.push(byte);
|
||||
if let Some(ref mut result) = result {
|
||||
result.push(byte);
|
||||
}
|
||||
position += 1;
|
||||
prev_was_match = false;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
margin + decoder.pos() as isize - position as isize
|
||||
}
|
||||
|
||||
21
src/main.rs
21
src/main.rs
@@ -8,6 +8,7 @@ fn main() -> Result<()> {
|
||||
let mut config = upkr::Config::default();
|
||||
let mut reverse = false;
|
||||
let mut unpack = false;
|
||||
let mut calculate_margin = false;
|
||||
let mut level = 2;
|
||||
let mut infile: Option<PathBuf> = None;
|
||||
let mut outfile: Option<PathBuf> = None;
|
||||
@@ -29,12 +30,14 @@ fn main() -> Result<()> {
|
||||
config.bitstream_is_big_endian = true;
|
||||
}
|
||||
Long("no-repeated-offsets") => config.no_repeated_offsets = true,
|
||||
Long("eof-in-length") => config.eof_in_length = true,
|
||||
|
||||
Long("z80") => {
|
||||
config.use_bitstream = true;
|
||||
config.bitstream_is_big_endian = true;
|
||||
config.invert_bit_encoding = true;
|
||||
config.simplified_prob_update = true;
|
||||
level = 9;
|
||||
}
|
||||
Long("x86") => {
|
||||
config.use_bitstream = true;
|
||||
@@ -43,6 +46,7 @@ fn main() -> Result<()> {
|
||||
}
|
||||
|
||||
Short('u') | Long("unpack") => unpack = true,
|
||||
Long("margin") => calculate_margin = true,
|
||||
Short('l') | Long("level") => level = parser.value()?.parse()?,
|
||||
Short('h') | Long("help") => print_help(0),
|
||||
Value(val) if infile.is_none() => infile = Some(val.try_into()?),
|
||||
@@ -76,7 +80,7 @@ fn main() -> Result<()> {
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
if !unpack {
|
||||
if !unpack && !calculate_margin {
|
||||
let mut data = vec![];
|
||||
File::open(infile)?.read_to_end(&mut data)?;
|
||||
if reverse {
|
||||
@@ -112,11 +116,16 @@ fn main() -> Result<()> {
|
||||
if reverse {
|
||||
data.reverse();
|
||||
}
|
||||
let mut unpacked_data = upkr::unpack(&data, config);
|
||||
if reverse {
|
||||
unpacked_data.reverse();
|
||||
if unpack {
|
||||
let mut unpacked_data = upkr::unpack(&data, &config);
|
||||
if reverse {
|
||||
unpacked_data.reverse();
|
||||
}
|
||||
File::create(outfile)?.write_all(&unpacked_data)?;
|
||||
}
|
||||
if calculate_margin {
|
||||
println!("{}", upkr::calculate_margin(&data, &config));
|
||||
}
|
||||
File::create(outfile)?.write_all(&unpacked_data)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -126,9 +135,11 @@ fn print_help(exit_code: i32) -> ! {
|
||||
eprintln!("Usage:");
|
||||
eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]");
|
||||
eprintln!(" upkr -u [config options] <infile> [<outfile>]");
|
||||
eprintln!(" upkr --margin [config options] <infile>");
|
||||
eprintln!();
|
||||
eprintln!(" -l, --level N compression level 0-9");
|
||||
eprintln!(" -u, --unpack unpack infile");
|
||||
eprintln!(" --margin calculate margin for overlapped unpacking of a packed file");
|
||||
eprintln!();
|
||||
eprintln!("Config presets for specific unpackers:");
|
||||
eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update");
|
||||
|
||||
@@ -110,6 +110,9 @@ fn parse(
|
||||
max_arrivals: usize,
|
||||
config: &crate::Config,
|
||||
) {
|
||||
if length < config.min_length() {
|
||||
return;
|
||||
}
|
||||
cost_counter.reset();
|
||||
let mut state = arrival.state.clone();
|
||||
let op = lz::Op::Match {
|
||||
|
||||
10
src/rans.rs
10
src/rans.rs
@@ -149,6 +149,7 @@ impl EntropyCoder for CostCounter {
|
||||
|
||||
pub struct RansDecoder<'a> {
|
||||
data: &'a [u8],
|
||||
pos: usize,
|
||||
state: u32,
|
||||
use_bitstream: bool,
|
||||
byte: u8,
|
||||
@@ -163,6 +164,7 @@ impl<'a> RansDecoder<'a> {
|
||||
pub fn new(data: &'a [u8], config: &Config) -> RansDecoder<'a> {
|
||||
RansDecoder {
|
||||
data,
|
||||
pos: 0,
|
||||
state: 0,
|
||||
use_bitstream: config.use_bitstream,
|
||||
byte: 0,
|
||||
@@ -172,6 +174,10 @@ impl<'a> RansDecoder<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn pos(&self) -> usize {
|
||||
self.pos
|
||||
}
|
||||
|
||||
pub fn decode_with_context(&mut self, context: &mut Context) -> bool {
|
||||
let bit = self.decode_bit(context.prob());
|
||||
context.update(bit);
|
||||
@@ -183,8 +189,8 @@ impl<'a> RansDecoder<'a> {
|
||||
if self.use_bitstream {
|
||||
while self.state < 32768 {
|
||||
if self.bits_left == 0 {
|
||||
self.byte = self.data[0];
|
||||
self.data = &self.data[1..];
|
||||
self.byte = self.data[self.pos];
|
||||
self.pos += 1;
|
||||
self.bits_left = 8;
|
||||
}
|
||||
if self.bitstream_is_big_endian {
|
||||
|
||||
Reference in New Issue
Block a user