mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 11:36:42 +01:00
add compression levels
This commit is contained in:
12
src/main.rs
12
src/main.rs
@@ -8,20 +8,21 @@ fn main() -> Result<()> {
|
||||
match args.subcommand()?.as_ref().map(|s| s.as_str()) {
|
||||
None => print_help(),
|
||||
Some("pack") => {
|
||||
let fast = args.contains("--fast");
|
||||
let level = args.opt_value_from_str(["-l", "--level"])?.unwrap_or(2u8);
|
||||
|
||||
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||
|
||||
let mut data = vec![];
|
||||
File::open(infile)?.read_to_end(&mut data)?;
|
||||
let packed_data = if fast {
|
||||
let packed_data = if level == 0 {
|
||||
upkr::pack_fast(&data)
|
||||
} else {
|
||||
let mut pb = pbr::ProgressBar::new(data.len() as u64);
|
||||
pb.set_units(pbr::Units::Bytes);
|
||||
let packed_data = upkr::pack(
|
||||
&data,
|
||||
level,
|
||||
Some(&mut |pos| {
|
||||
pb.set(pos as u64);
|
||||
}),
|
||||
@@ -29,7 +30,12 @@ fn main() -> Result<()> {
|
||||
pb.finish();
|
||||
packed_data
|
||||
};
|
||||
println!("Compressed {} bytes to {} bytes ({}%)", data.len(), packed_data.len(), packed_data.len() as f32 * 100. / data.len() as f32);
|
||||
println!(
|
||||
"Compressed {} bytes to {} bytes ({}%)",
|
||||
data.len(),
|
||||
packed_data.len(),
|
||||
packed_data.len() as f32 * 100. / data.len() as f32
|
||||
);
|
||||
File::create(outfile)?.write_all(&packed_data)?;
|
||||
}
|
||||
Some("unpack") => {
|
||||
|
||||
@@ -45,14 +45,34 @@ impl MatchFinder {
|
||||
suffixes,
|
||||
rev_suffixes,
|
||||
lcp,
|
||||
max_queue_size: 1000,
|
||||
max_matches_per_length: 10,
|
||||
patience: 1000,
|
||||
max_length_diff: 4,
|
||||
max_queue_size: 100,
|
||||
max_matches_per_length: 5,
|
||||
patience: 100,
|
||||
max_length_diff: 2,
|
||||
queue: BinaryHeap::new()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_max_queue_size(mut self, v: usize) -> MatchFinder {
|
||||
self.max_queue_size = v;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_patience(mut self, v: usize) -> MatchFinder {
|
||||
self.patience = v;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_max_matches_per_length(mut self, v: usize) -> MatchFinder {
|
||||
self.max_matches_per_length = v;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_max_length_diff(mut self, v: usize) -> MatchFinder {
|
||||
self.max_length_diff = v;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn matches(&mut self, pos: usize) -> Matches {
|
||||
let index = self.rev_suffixes[pos] as usize;
|
||||
self.queue.clear();
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::mem;
|
||||
use std::rc::Rc;
|
||||
|
||||
use crate::{ProgressCallback, lz};
|
||||
use crate::match_finder::MatchFinder;
|
||||
use crate::rans::{CostCounter, RansCoder};
|
||||
use crate::{lz, ProgressCallback};
|
||||
|
||||
pub fn pack(data: &[u8], progress_cb: Option<ProgressCallback>) -> Vec<u8> {
|
||||
let mut parse = parse(data, progress_cb);
|
||||
pub fn pack(data: &[u8], level: u8, progress_cb: Option<ProgressCallback>) -> Vec<u8> {
|
||||
let mut parse = parse(data, Config::from_level(level), progress_cb);
|
||||
let mut ops = vec![];
|
||||
while let Some(link) = parse {
|
||||
ops.push(link.op);
|
||||
@@ -34,26 +35,63 @@ struct Arrival {
|
||||
|
||||
type Arrivals = HashMap<usize, Vec<Arrival>>;
|
||||
|
||||
const MAX_ARRIVALS: usize = 256;
|
||||
|
||||
fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Parse>> {
|
||||
let mut match_finder = MatchFinder::new(data);
|
||||
fn parse(
|
||||
data: &[u8],
|
||||
config: Config,
|
||||
mut progress_cb: Option<ProgressCallback>,
|
||||
) -> Option<Rc<Parse>> {
|
||||
let mut match_finder = MatchFinder::new(data)
|
||||
.with_max_queue_size(config.max_queue_size)
|
||||
.with_patience(config.patience)
|
||||
.with_max_matches_per_length(config.max_matches_per_length)
|
||||
.with_max_length_diff(config.max_length_diff);
|
||||
let mut near_matches = [usize::MAX; 1024];
|
||||
let mut last_seen = [usize::MAX; 256];
|
||||
|
||||
let max_arrivals = config.max_arrivals;
|
||||
|
||||
let mut arrivals: Arrivals = HashMap::new();
|
||||
fn add_arrival(arrivals: &mut Arrivals, pos: usize, arrival: Arrival) {
|
||||
let vec = arrivals.entry(pos).or_default();
|
||||
if vec.len() < MAX_ARRIVALS || vec[MAX_ARRIVALS - 1].cost > arrival.cost {
|
||||
vec.push(arrival);
|
||||
fn sort_arrivals(vec: &mut Vec<Arrival>, max_arrivals: usize) {
|
||||
if max_arrivals == 0 {
|
||||
return;
|
||||
}
|
||||
vec.sort_by(|a, b| {
|
||||
a.cost
|
||||
.partial_cmp(&b.cost)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
});
|
||||
if vec.len() > MAX_ARRIVALS {
|
||||
vec.pop();
|
||||
let mut seen_offsets = HashSet::new();
|
||||
let mut remaining = Vec::new();
|
||||
for arr in mem::replace(vec, Vec::new()) {
|
||||
if seen_offsets.insert(arr.state.last_offset()) {
|
||||
if vec.len() < max_arrivals {
|
||||
vec.push(arr);
|
||||
}
|
||||
} else {
|
||||
remaining.push(arr);
|
||||
}
|
||||
}
|
||||
for arr in remaining {
|
||||
if vec.len() >= max_arrivals {
|
||||
break;
|
||||
}
|
||||
vec.push(arr);
|
||||
}
|
||||
}
|
||||
|
||||
fn add_arrival(arrivals: &mut Arrivals, pos: usize, arrival: Arrival, max_arrivals: usize) {
|
||||
let vec = arrivals.entry(pos).or_default();
|
||||
if max_arrivals == 0 {
|
||||
if vec.is_empty() {
|
||||
vec.push(arrival);
|
||||
} else if vec[0].cost > arrival.cost {
|
||||
vec[0] = arrival;
|
||||
}
|
||||
return;
|
||||
}
|
||||
vec.push(arrival);
|
||||
if vec.len() > max_arrivals * 2 {
|
||||
sort_arrivals(vec, max_arrivals);
|
||||
}
|
||||
}
|
||||
fn add_match(
|
||||
@@ -63,6 +101,7 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
||||
offset: usize,
|
||||
length: usize,
|
||||
arrival: &Arrival,
|
||||
max_arrivals: usize,
|
||||
) {
|
||||
cost_counter.reset();
|
||||
let mut state = arrival.state.clone();
|
||||
@@ -82,6 +121,7 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
||||
state,
|
||||
cost: arrival.cost + cost_counter.cost(),
|
||||
},
|
||||
max_arrivals,
|
||||
);
|
||||
}
|
||||
add_arrival(
|
||||
@@ -92,6 +132,7 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
||||
state: lz::CoderState::new(),
|
||||
cost: 0.0,
|
||||
},
|
||||
max_arrivals,
|
||||
);
|
||||
|
||||
let cost_counter = &mut CostCounter::new();
|
||||
@@ -105,7 +146,8 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
||||
.count()
|
||||
};
|
||||
|
||||
let here_arrivals = if let Some(arr) = arrivals.remove(&pos) {
|
||||
let here_arrivals = if let Some(mut arr) = arrivals.remove(&pos) {
|
||||
sort_arrivals(&mut arr, max_arrivals);
|
||||
arr
|
||||
} else {
|
||||
continue;
|
||||
@@ -121,7 +163,12 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
||||
}
|
||||
|
||||
'arrival_loop: for arrival in here_arrivals {
|
||||
if arrival.cost > (best_cost + 16.0).min(*best_per_offset.get(&arrival.state.last_offset()).unwrap()) {
|
||||
if arrival.cost
|
||||
> (best_cost + config.max_cost_delta).min(
|
||||
*best_per_offset.get(&arrival.state.last_offset()).unwrap()
|
||||
+ config.max_offset_cost_delta,
|
||||
)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
let mut found_last_offset = false;
|
||||
@@ -130,13 +177,21 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
||||
closest_match = Some(closest_match.unwrap_or(0).max(m.pos));
|
||||
let offset = pos - m.pos;
|
||||
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
||||
add_match(&mut arrivals, cost_counter, pos, offset, m.length, &arrival);
|
||||
if m.length > 64 {
|
||||
add_match(
|
||||
&mut arrivals,
|
||||
cost_counter,
|
||||
pos,
|
||||
offset,
|
||||
m.length,
|
||||
&arrival,
|
||||
max_arrivals,
|
||||
);
|
||||
if m.length >= config.greedy_size {
|
||||
break 'arrival_loop;
|
||||
}
|
||||
}
|
||||
|
||||
let mut near_matches_left = 8;
|
||||
let mut near_matches_left = config.num_near_matches;
|
||||
let mut match_pos = last_seen[data[pos] as usize];
|
||||
while near_matches_left > 0
|
||||
&& match_pos != usize::MAX
|
||||
@@ -145,7 +200,15 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
||||
let offset = pos - match_pos;
|
||||
let length = match_length(offset);
|
||||
assert!(length > 0);
|
||||
add_match(&mut arrivals, cost_counter, pos, offset, length, &arrival);
|
||||
add_match(
|
||||
&mut arrivals,
|
||||
cost_counter,
|
||||
pos,
|
||||
offset,
|
||||
length,
|
||||
&arrival,
|
||||
max_arrivals,
|
||||
);
|
||||
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
||||
if offset < near_matches.len() {
|
||||
match_pos = near_matches[match_pos % near_matches.len()];
|
||||
@@ -157,7 +220,15 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
||||
let offset = arrival.state.last_offset() as usize;
|
||||
let length = match_length(offset);
|
||||
if length > 0 {
|
||||
add_match(&mut arrivals, cost_counter, pos, offset, length, &arrival);
|
||||
add_match(
|
||||
&mut arrivals,
|
||||
cost_counter,
|
||||
pos,
|
||||
offset,
|
||||
length,
|
||||
&arrival,
|
||||
max_arrivals,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -176,6 +247,7 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
||||
state,
|
||||
cost: arrival.cost + cost_counter.cost(),
|
||||
},
|
||||
max_arrivals,
|
||||
);
|
||||
}
|
||||
near_matches[pos % near_matches.len()] = last_seen[data[pos] as usize];
|
||||
@@ -186,3 +258,56 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
||||
}
|
||||
arrivals.remove(&data.len()).unwrap()[0].parse.clone()
|
||||
}
|
||||
|
||||
struct Config {
|
||||
max_arrivals: usize,
|
||||
max_cost_delta: f64,
|
||||
max_offset_cost_delta: f64,
|
||||
num_near_matches: usize,
|
||||
greedy_size: usize,
|
||||
max_queue_size: usize,
|
||||
patience: usize,
|
||||
max_matches_per_length: usize,
|
||||
max_length_diff: usize,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
fn from_level(level: u8) -> Config {
|
||||
let max_arrivals = match level {
|
||||
0..=1 => 0,
|
||||
2 => 2,
|
||||
3 => 4,
|
||||
4 => 8,
|
||||
5 => 16,
|
||||
6 => 32,
|
||||
7 => 64,
|
||||
8 => 96,
|
||||
_ => 128,
|
||||
};
|
||||
let (max_cost_delta, max_offset_cost_delta) = match level {
|
||||
0..=4 => (16.0, 0.0),
|
||||
5..=8 => (16.0, 4.0),
|
||||
_ => (16.0, 8.0),
|
||||
};
|
||||
let num_near_matches = level.saturating_sub(1) as usize;
|
||||
let greedy_size = 4 + level as usize * level as usize * 3;
|
||||
let max_length_diff = match level {
|
||||
0..=1 => 0,
|
||||
2..=3 => 1,
|
||||
4..=5 => 2,
|
||||
6..=7 => 3,
|
||||
_ => 4,
|
||||
};
|
||||
Config {
|
||||
max_arrivals,
|
||||
max_cost_delta,
|
||||
max_offset_cost_delta,
|
||||
num_near_matches,
|
||||
greedy_size,
|
||||
max_queue_size: level as usize * 100,
|
||||
patience: level as usize * 100,
|
||||
max_matches_per_length: level as usize,
|
||||
max_length_diff,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user