2 Commits

Author SHA1 Message Date
8715dede0e add --eof-in-length option 2022-09-26 23:41:17 +02:00
b12c8f8d93 add parameter to print out margin for overlapped unpacking 2022-09-25 23:44:03 +02:00
7 changed files with 86 additions and 23 deletions

2
Cargo.lock generated
View File

@@ -117,7 +117,7 @@ dependencies = [
[[package]]
name = "upkr"
version = "0.2.0-pre1"
version = "0.2.0-pre2"
dependencies = [
"anyhow",
"cdivsufsort",

View File

@@ -21,7 +21,7 @@ pub fn pack(
if let Some(m) = match_finder.matches(pos).next() {
let max_offset = 1 << (m.length * 3 - 1).min(31);
let offset = pos - m.pos;
if offset < max_offset {
if offset < max_offset && m.length >= config.min_length() {
lz::Op::Match {
offset: offset as u32,
len: m.length as u32,
@@ -40,7 +40,7 @@ pub fn pack(
.zip(data[(pos - offset)..].iter())
.take_while(|(a, b)| a == b)
.count();
if length > 0 {
if length >= config.min_length() {
lz::Op::Match {
offset: offset as u32,
len: length as u32,

View File

@@ -5,7 +5,7 @@ mod match_finder;
mod parsing_packer;
mod rans;
pub use lz::unpack;
pub use lz::{calculate_margin, unpack};
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
@@ -22,6 +22,7 @@ pub struct Config {
pub simplified_prob_update: bool,
pub no_repeated_offsets: bool,
pub eof_in_length: bool,
}
impl Default for Config {
@@ -39,6 +40,17 @@ impl Default for Config {
simplified_prob_update: false,
no_repeated_offsets: false,
eof_in_length: false,
}
}
}
impl Config {
pub fn min_length(&self) -> usize {
if self.eof_in_length {
2
} else {
1
}
}
}

View File

@@ -40,11 +40,12 @@ impl Op {
coder,
state,
256 * state.parity_contexts + 1,
offset + 1,
offset + if config.eof_in_length { 0 } else { 1 },
config,
);
state.last_offset = offset;
}
assert!(!config.eof_in_length || len > 1);
encode_length(coder, state, 256 * state.parity_contexts + 65, len, config);
state.prev_was_match = true;
state.pos += len as usize;
@@ -60,15 +61,20 @@ pub fn encode_eof(coder: &mut dyn EntropyCoder, state: &mut CoderState, config:
state.pos % state.parity_contexts * 256,
config.is_match_bit,
);
if !state.prev_was_match {
if !state.prev_was_match && !config.no_repeated_offsets {
encode_bit(
coder,
state,
256 * state.parity_contexts,
config.new_offset_bit,
config.new_offset_bit ^ config.eof_in_length,
);
}
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
if !config.eof_in_length || config.no_repeated_offsets {
encode_length(coder, state, 256 * state.parity_contexts + 1, 1, config);
}
if config.eof_in_length {
encode_length(coder, state, 256 * state.parity_contexts + 65, 1, config);
}
}
fn encode_bit(
@@ -124,12 +130,27 @@ impl CoderState {
}
}
pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
pub fn unpack(packed_data: &[u8], config: &Config) -> Vec<u8> {
let mut result = vec![];
let _ = unpack_internal(Some(&mut result), packed_data, config);
result
}
pub fn calculate_margin(packed_data: &[u8], config: &Config) -> isize {
unpack_internal(None, packed_data, config)
}
pub fn unpack_internal(
mut result: Option<&mut Vec<u8>>,
packed_data: &[u8],
config: &Config,
) -> isize {
let mut decoder = RansDecoder::new(packed_data, &config);
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
let mut result = vec![];
let mut offset = 0;
let mut position = 0usize;
let mut prev_was_match = false;
let mut margin = 0isize;
fn decode_length(
decoder: &mut RansDecoder,
@@ -152,7 +173,8 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
}
loop {
let literal_base = result.len() % config.parity_contexts * 256;
margin = margin.max(position as isize - decoder.pos() as isize);
let literal_base = position % config.parity_contexts * 256;
if decoder.decode_with_context(&mut contexts.context_mut(literal_base))
== config.is_match_bit
{
@@ -167,7 +189,7 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
&mut contexts,
256 * config.parity_contexts + 1,
&config,
) - 1;
) - if config.eof_in_length { 0 } else { 1 };
if offset == 0 {
break;
}
@@ -178,9 +200,15 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
256 * config.parity_contexts + 65,
&config,
);
for _ in 0..length {
result.push(result[result.len() - offset]);
if config.eof_in_length && length == 1 {
break;
}
if let Some(ref mut result) = result {
for _ in 0..length {
result.push(result[result.len() - offset]);
}
}
position += length;
prev_was_match = true;
} else {
let mut context_index = 1;
@@ -191,10 +219,13 @@ pub fn unpack(packed_data: &[u8], config: Config) -> Vec<u8> {
context_index = (context_index << 1) | bit as usize;
byte |= (bit as u8) << i;
}
result.push(byte);
if let Some(ref mut result) = result {
result.push(byte);
}
position += 1;
prev_was_match = false;
}
}
result
margin + decoder.pos() as isize - position as isize
}

View File

@@ -8,6 +8,7 @@ fn main() -> Result<()> {
let mut config = upkr::Config::default();
let mut reverse = false;
let mut unpack = false;
let mut calculate_margin = false;
let mut level = 2;
let mut infile: Option<PathBuf> = None;
let mut outfile: Option<PathBuf> = None;
@@ -29,12 +30,14 @@ fn main() -> Result<()> {
config.bitstream_is_big_endian = true;
}
Long("no-repeated-offsets") => config.no_repeated_offsets = true,
Long("eof-in-length") => config.eof_in_length = true,
Long("z80") => {
config.use_bitstream = true;
config.bitstream_is_big_endian = true;
config.invert_bit_encoding = true;
config.simplified_prob_update = true;
level = 9;
}
Long("x86") => {
config.use_bitstream = true;
@@ -43,6 +46,7 @@ fn main() -> Result<()> {
}
Short('u') | Long("unpack") => unpack = true,
Long("margin") => calculate_margin = true,
Short('l') | Long("level") => level = parser.value()?.parse()?,
Short('h') | Long("help") => print_help(0),
Value(val) if infile.is_none() => infile = Some(val.try_into()?),
@@ -76,7 +80,7 @@ fn main() -> Result<()> {
process::exit(1);
}
if !unpack {
if !unpack && !calculate_margin {
let mut data = vec![];
File::open(infile)?.read_to_end(&mut data)?;
if reverse {
@@ -112,11 +116,16 @@ fn main() -> Result<()> {
if reverse {
data.reverse();
}
let mut unpacked_data = upkr::unpack(&data, config);
if reverse {
unpacked_data.reverse();
if unpack {
let mut unpacked_data = upkr::unpack(&data, &config);
if reverse {
unpacked_data.reverse();
}
File::create(outfile)?.write_all(&unpacked_data)?;
}
if calculate_margin {
println!("{}", upkr::calculate_margin(&data, &config));
}
File::create(outfile)?.write_all(&unpacked_data)?;
}
Ok(())
@@ -126,9 +135,11 @@ fn print_help(exit_code: i32) -> ! {
eprintln!("Usage:");
eprintln!(" upkr [-l level(0-9)] [config options] <infile> [<outfile>]");
eprintln!(" upkr -u [config options] <infile> [<outfile>]");
eprintln!(" upkr --margin [config options] <infile>");
eprintln!();
eprintln!(" -l, --level N compression level 0-9");
eprintln!(" -u, --unpack unpack infile");
eprintln!(" --margin calculate margin for overlapped unpacking of a packed file");
eprintln!();
eprintln!("Config presets for specific unpackers:");
eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update");

View File

@@ -110,6 +110,9 @@ fn parse(
max_arrivals: usize,
config: &crate::Config,
) {
if length < config.min_length() {
return;
}
cost_counter.reset();
let mut state = arrival.state.clone();
let op = lz::Op::Match {

View File

@@ -149,6 +149,7 @@ impl EntropyCoder for CostCounter {
pub struct RansDecoder<'a> {
data: &'a [u8],
pos: usize,
state: u32,
use_bitstream: bool,
byte: u8,
@@ -163,6 +164,7 @@ impl<'a> RansDecoder<'a> {
pub fn new(data: &'a [u8], config: &Config) -> RansDecoder<'a> {
RansDecoder {
data,
pos: 0,
state: 0,
use_bitstream: config.use_bitstream,
byte: 0,
@@ -172,6 +174,10 @@ impl<'a> RansDecoder<'a> {
}
}
pub fn pos(&self) -> usize {
self.pos
}
pub fn decode_with_context(&mut self, context: &mut Context) -> bool {
let bit = self.decode_bit(context.prob());
context.update(bit);
@@ -183,8 +189,8 @@ impl<'a> RansDecoder<'a> {
if self.use_bitstream {
while self.state < 32768 {
if self.bits_left == 0 {
self.byte = self.data[0];
self.data = &self.data[1..];
self.byte = self.data[self.pos];
self.pos += 1;
self.bits_left = 8;
}
if self.bitstream_is_big_endian {