mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 19:46:42 +01:00
add compression levels
This commit is contained in:
12
src/main.rs
12
src/main.rs
@@ -8,20 +8,21 @@ fn main() -> Result<()> {
|
|||||||
match args.subcommand()?.as_ref().map(|s| s.as_str()) {
|
match args.subcommand()?.as_ref().map(|s| s.as_str()) {
|
||||||
None => print_help(),
|
None => print_help(),
|
||||||
Some("pack") => {
|
Some("pack") => {
|
||||||
let fast = args.contains("--fast");
|
let level = args.opt_value_from_str(["-l", "--level"])?.unwrap_or(2u8);
|
||||||
|
|
||||||
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
let infile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||||
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
let outfile = args.free_from_os_str::<PathBuf, bool>(|s| Ok(s.into()))?;
|
||||||
|
|
||||||
let mut data = vec![];
|
let mut data = vec![];
|
||||||
File::open(infile)?.read_to_end(&mut data)?;
|
File::open(infile)?.read_to_end(&mut data)?;
|
||||||
let packed_data = if fast {
|
let packed_data = if level == 0 {
|
||||||
upkr::pack_fast(&data)
|
upkr::pack_fast(&data)
|
||||||
} else {
|
} else {
|
||||||
let mut pb = pbr::ProgressBar::new(data.len() as u64);
|
let mut pb = pbr::ProgressBar::new(data.len() as u64);
|
||||||
pb.set_units(pbr::Units::Bytes);
|
pb.set_units(pbr::Units::Bytes);
|
||||||
let packed_data = upkr::pack(
|
let packed_data = upkr::pack(
|
||||||
&data,
|
&data,
|
||||||
|
level,
|
||||||
Some(&mut |pos| {
|
Some(&mut |pos| {
|
||||||
pb.set(pos as u64);
|
pb.set(pos as u64);
|
||||||
}),
|
}),
|
||||||
@@ -29,7 +30,12 @@ fn main() -> Result<()> {
|
|||||||
pb.finish();
|
pb.finish();
|
||||||
packed_data
|
packed_data
|
||||||
};
|
};
|
||||||
println!("Compressed {} bytes to {} bytes ({}%)", data.len(), packed_data.len(), packed_data.len() as f32 * 100. / data.len() as f32);
|
println!(
|
||||||
|
"Compressed {} bytes to {} bytes ({}%)",
|
||||||
|
data.len(),
|
||||||
|
packed_data.len(),
|
||||||
|
packed_data.len() as f32 * 100. / data.len() as f32
|
||||||
|
);
|
||||||
File::create(outfile)?.write_all(&packed_data)?;
|
File::create(outfile)?.write_all(&packed_data)?;
|
||||||
}
|
}
|
||||||
Some("unpack") => {
|
Some("unpack") => {
|
||||||
|
|||||||
@@ -45,14 +45,34 @@ impl MatchFinder {
|
|||||||
suffixes,
|
suffixes,
|
||||||
rev_suffixes,
|
rev_suffixes,
|
||||||
lcp,
|
lcp,
|
||||||
max_queue_size: 1000,
|
max_queue_size: 100,
|
||||||
max_matches_per_length: 10,
|
max_matches_per_length: 5,
|
||||||
patience: 1000,
|
patience: 100,
|
||||||
max_length_diff: 4,
|
max_length_diff: 2,
|
||||||
queue: BinaryHeap::new()
|
queue: BinaryHeap::new()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn with_max_queue_size(mut self, v: usize) -> MatchFinder {
|
||||||
|
self.max_queue_size = v;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_patience(mut self, v: usize) -> MatchFinder {
|
||||||
|
self.patience = v;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_max_matches_per_length(mut self, v: usize) -> MatchFinder {
|
||||||
|
self.max_matches_per_length = v;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_max_length_diff(mut self, v: usize) -> MatchFinder {
|
||||||
|
self.max_length_diff = v;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
pub fn matches(&mut self, pos: usize) -> Matches {
|
pub fn matches(&mut self, pos: usize) -> Matches {
|
||||||
let index = self.rev_suffixes[pos] as usize;
|
let index = self.rev_suffixes[pos] as usize;
|
||||||
self.queue.clear();
|
self.queue.clear();
|
||||||
|
|||||||
@@ -1,12 +1,13 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::{HashMap, HashSet};
|
||||||
|
use std::mem;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
|
||||||
use crate::{ProgressCallback, lz};
|
|
||||||
use crate::match_finder::MatchFinder;
|
use crate::match_finder::MatchFinder;
|
||||||
use crate::rans::{CostCounter, RansCoder};
|
use crate::rans::{CostCounter, RansCoder};
|
||||||
|
use crate::{lz, ProgressCallback};
|
||||||
|
|
||||||
pub fn pack(data: &[u8], progress_cb: Option<ProgressCallback>) -> Vec<u8> {
|
pub fn pack(data: &[u8], level: u8, progress_cb: Option<ProgressCallback>) -> Vec<u8> {
|
||||||
let mut parse = parse(data, progress_cb);
|
let mut parse = parse(data, Config::from_level(level), progress_cb);
|
||||||
let mut ops = vec![];
|
let mut ops = vec![];
|
||||||
while let Some(link) = parse {
|
while let Some(link) = parse {
|
||||||
ops.push(link.op);
|
ops.push(link.op);
|
||||||
@@ -34,27 +35,64 @@ struct Arrival {
|
|||||||
|
|
||||||
type Arrivals = HashMap<usize, Vec<Arrival>>;
|
type Arrivals = HashMap<usize, Vec<Arrival>>;
|
||||||
|
|
||||||
const MAX_ARRIVALS: usize = 256;
|
fn parse(
|
||||||
|
data: &[u8],
|
||||||
fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Parse>> {
|
config: Config,
|
||||||
let mut match_finder = MatchFinder::new(data);
|
mut progress_cb: Option<ProgressCallback>,
|
||||||
|
) -> Option<Rc<Parse>> {
|
||||||
|
let mut match_finder = MatchFinder::new(data)
|
||||||
|
.with_max_queue_size(config.max_queue_size)
|
||||||
|
.with_patience(config.patience)
|
||||||
|
.with_max_matches_per_length(config.max_matches_per_length)
|
||||||
|
.with_max_length_diff(config.max_length_diff);
|
||||||
let mut near_matches = [usize::MAX; 1024];
|
let mut near_matches = [usize::MAX; 1024];
|
||||||
let mut last_seen = [usize::MAX; 256];
|
let mut last_seen = [usize::MAX; 256];
|
||||||
|
|
||||||
|
let max_arrivals = config.max_arrivals;
|
||||||
|
|
||||||
let mut arrivals: Arrivals = HashMap::new();
|
let mut arrivals: Arrivals = HashMap::new();
|
||||||
fn add_arrival(arrivals: &mut Arrivals, pos: usize, arrival: Arrival) {
|
fn sort_arrivals(vec: &mut Vec<Arrival>, max_arrivals: usize) {
|
||||||
let vec = arrivals.entry(pos).or_default();
|
if max_arrivals == 0 {
|
||||||
if vec.len() < MAX_ARRIVALS || vec[MAX_ARRIVALS - 1].cost > arrival.cost {
|
return;
|
||||||
vec.push(arrival);
|
}
|
||||||
vec.sort_by(|a, b| {
|
vec.sort_by(|a, b| {
|
||||||
a.cost
|
a.cost
|
||||||
.partial_cmp(&b.cost)
|
.partial_cmp(&b.cost)
|
||||||
.unwrap_or(std::cmp::Ordering::Equal)
|
.unwrap_or(std::cmp::Ordering::Equal)
|
||||||
});
|
});
|
||||||
if vec.len() > MAX_ARRIVALS {
|
let mut seen_offsets = HashSet::new();
|
||||||
vec.pop();
|
let mut remaining = Vec::new();
|
||||||
|
for arr in mem::replace(vec, Vec::new()) {
|
||||||
|
if seen_offsets.insert(arr.state.last_offset()) {
|
||||||
|
if vec.len() < max_arrivals {
|
||||||
|
vec.push(arr);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
remaining.push(arr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for arr in remaining {
|
||||||
|
if vec.len() >= max_arrivals {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
vec.push(arr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_arrival(arrivals: &mut Arrivals, pos: usize, arrival: Arrival, max_arrivals: usize) {
|
||||||
|
let vec = arrivals.entry(pos).or_default();
|
||||||
|
if max_arrivals == 0 {
|
||||||
|
if vec.is_empty() {
|
||||||
|
vec.push(arrival);
|
||||||
|
} else if vec[0].cost > arrival.cost {
|
||||||
|
vec[0] = arrival;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
vec.push(arrival);
|
||||||
|
if vec.len() > max_arrivals * 2 {
|
||||||
|
sort_arrivals(vec, max_arrivals);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
fn add_match(
|
fn add_match(
|
||||||
arrivals: &mut Arrivals,
|
arrivals: &mut Arrivals,
|
||||||
@@ -63,6 +101,7 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
|||||||
offset: usize,
|
offset: usize,
|
||||||
length: usize,
|
length: usize,
|
||||||
arrival: &Arrival,
|
arrival: &Arrival,
|
||||||
|
max_arrivals: usize,
|
||||||
) {
|
) {
|
||||||
cost_counter.reset();
|
cost_counter.reset();
|
||||||
let mut state = arrival.state.clone();
|
let mut state = arrival.state.clone();
|
||||||
@@ -82,6 +121,7 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
|||||||
state,
|
state,
|
||||||
cost: arrival.cost + cost_counter.cost(),
|
cost: arrival.cost + cost_counter.cost(),
|
||||||
},
|
},
|
||||||
|
max_arrivals,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
add_arrival(
|
add_arrival(
|
||||||
@@ -92,6 +132,7 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
|||||||
state: lz::CoderState::new(),
|
state: lz::CoderState::new(),
|
||||||
cost: 0.0,
|
cost: 0.0,
|
||||||
},
|
},
|
||||||
|
max_arrivals,
|
||||||
);
|
);
|
||||||
|
|
||||||
let cost_counter = &mut CostCounter::new();
|
let cost_counter = &mut CostCounter::new();
|
||||||
@@ -105,7 +146,8 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
|||||||
.count()
|
.count()
|
||||||
};
|
};
|
||||||
|
|
||||||
let here_arrivals = if let Some(arr) = arrivals.remove(&pos) {
|
let here_arrivals = if let Some(mut arr) = arrivals.remove(&pos) {
|
||||||
|
sort_arrivals(&mut arr, max_arrivals);
|
||||||
arr
|
arr
|
||||||
} else {
|
} else {
|
||||||
continue;
|
continue;
|
||||||
@@ -121,7 +163,12 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
|||||||
}
|
}
|
||||||
|
|
||||||
'arrival_loop: for arrival in here_arrivals {
|
'arrival_loop: for arrival in here_arrivals {
|
||||||
if arrival.cost > (best_cost + 16.0).min(*best_per_offset.get(&arrival.state.last_offset()).unwrap()) {
|
if arrival.cost
|
||||||
|
> (best_cost + config.max_cost_delta).min(
|
||||||
|
*best_per_offset.get(&arrival.state.last_offset()).unwrap()
|
||||||
|
+ config.max_offset_cost_delta,
|
||||||
|
)
|
||||||
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let mut found_last_offset = false;
|
let mut found_last_offset = false;
|
||||||
@@ -130,13 +177,21 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
|||||||
closest_match = Some(closest_match.unwrap_or(0).max(m.pos));
|
closest_match = Some(closest_match.unwrap_or(0).max(m.pos));
|
||||||
let offset = pos - m.pos;
|
let offset = pos - m.pos;
|
||||||
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
||||||
add_match(&mut arrivals, cost_counter, pos, offset, m.length, &arrival);
|
add_match(
|
||||||
if m.length > 64 {
|
&mut arrivals,
|
||||||
|
cost_counter,
|
||||||
|
pos,
|
||||||
|
offset,
|
||||||
|
m.length,
|
||||||
|
&arrival,
|
||||||
|
max_arrivals,
|
||||||
|
);
|
||||||
|
if m.length >= config.greedy_size {
|
||||||
break 'arrival_loop;
|
break 'arrival_loop;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut near_matches_left = 8;
|
let mut near_matches_left = config.num_near_matches;
|
||||||
let mut match_pos = last_seen[data[pos] as usize];
|
let mut match_pos = last_seen[data[pos] as usize];
|
||||||
while near_matches_left > 0
|
while near_matches_left > 0
|
||||||
&& match_pos != usize::MAX
|
&& match_pos != usize::MAX
|
||||||
@@ -145,7 +200,15 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
|||||||
let offset = pos - match_pos;
|
let offset = pos - match_pos;
|
||||||
let length = match_length(offset);
|
let length = match_length(offset);
|
||||||
assert!(length > 0);
|
assert!(length > 0);
|
||||||
add_match(&mut arrivals, cost_counter, pos, offset, length, &arrival);
|
add_match(
|
||||||
|
&mut arrivals,
|
||||||
|
cost_counter,
|
||||||
|
pos,
|
||||||
|
offset,
|
||||||
|
length,
|
||||||
|
&arrival,
|
||||||
|
max_arrivals,
|
||||||
|
);
|
||||||
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
||||||
if offset < near_matches.len() {
|
if offset < near_matches.len() {
|
||||||
match_pos = near_matches[match_pos % near_matches.len()];
|
match_pos = near_matches[match_pos % near_matches.len()];
|
||||||
@@ -157,7 +220,15 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
|||||||
let offset = arrival.state.last_offset() as usize;
|
let offset = arrival.state.last_offset() as usize;
|
||||||
let length = match_length(offset);
|
let length = match_length(offset);
|
||||||
if length > 0 {
|
if length > 0 {
|
||||||
add_match(&mut arrivals, cost_counter, pos, offset, length, &arrival);
|
add_match(
|
||||||
|
&mut arrivals,
|
||||||
|
cost_counter,
|
||||||
|
pos,
|
||||||
|
offset,
|
||||||
|
length,
|
||||||
|
&arrival,
|
||||||
|
max_arrivals,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -176,6 +247,7 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
|||||||
state,
|
state,
|
||||||
cost: arrival.cost + cost_counter.cost(),
|
cost: arrival.cost + cost_counter.cost(),
|
||||||
},
|
},
|
||||||
|
max_arrivals,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
near_matches[pos % near_matches.len()] = last_seen[data[pos] as usize];
|
near_matches[pos % near_matches.len()] = last_seen[data[pos] as usize];
|
||||||
@@ -186,3 +258,56 @@ fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Pa
|
|||||||
}
|
}
|
||||||
arrivals.remove(&data.len()).unwrap()[0].parse.clone()
|
arrivals.remove(&data.len()).unwrap()[0].parse.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct Config {
|
||||||
|
max_arrivals: usize,
|
||||||
|
max_cost_delta: f64,
|
||||||
|
max_offset_cost_delta: f64,
|
||||||
|
num_near_matches: usize,
|
||||||
|
greedy_size: usize,
|
||||||
|
max_queue_size: usize,
|
||||||
|
patience: usize,
|
||||||
|
max_matches_per_length: usize,
|
||||||
|
max_length_diff: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Config {
|
||||||
|
fn from_level(level: u8) -> Config {
|
||||||
|
let max_arrivals = match level {
|
||||||
|
0..=1 => 0,
|
||||||
|
2 => 2,
|
||||||
|
3 => 4,
|
||||||
|
4 => 8,
|
||||||
|
5 => 16,
|
||||||
|
6 => 32,
|
||||||
|
7 => 64,
|
||||||
|
8 => 96,
|
||||||
|
_ => 128,
|
||||||
|
};
|
||||||
|
let (max_cost_delta, max_offset_cost_delta) = match level {
|
||||||
|
0..=4 => (16.0, 0.0),
|
||||||
|
5..=8 => (16.0, 4.0),
|
||||||
|
_ => (16.0, 8.0),
|
||||||
|
};
|
||||||
|
let num_near_matches = level.saturating_sub(1) as usize;
|
||||||
|
let greedy_size = 4 + level as usize * level as usize * 3;
|
||||||
|
let max_length_diff = match level {
|
||||||
|
0..=1 => 0,
|
||||||
|
2..=3 => 1,
|
||||||
|
4..=5 => 2,
|
||||||
|
6..=7 => 3,
|
||||||
|
_ => 4,
|
||||||
|
};
|
||||||
|
Config {
|
||||||
|
max_arrivals,
|
||||||
|
max_cost_delta,
|
||||||
|
max_offset_cost_delta,
|
||||||
|
num_near_matches,
|
||||||
|
greedy_size,
|
||||||
|
max_queue_size: level as usize * 100,
|
||||||
|
patience: level as usize * 100,
|
||||||
|
max_matches_per_length: level as usize,
|
||||||
|
max_length_diff,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user