mirror of
https://github.com/exoticorn/upkr.git
synced 2026-01-20 11:36:42 +01:00
speed optimizations + progress bar
This commit is contained in:
@@ -3,7 +3,7 @@ use crate::match_finder::MatchFinder;
|
||||
use crate::rans::RansCoder;
|
||||
|
||||
pub fn pack(data: &[u8]) -> Vec<u8> {
|
||||
let match_finder = MatchFinder::new(data);
|
||||
let mut match_finder = MatchFinder::new(data);
|
||||
let mut rans_coder = RansCoder::new();
|
||||
let mut state = lz::CoderState::new();
|
||||
|
||||
|
||||
@@ -7,4 +7,6 @@ mod parsing_packer;
|
||||
|
||||
pub use greedy_packer::pack as pack_fast;
|
||||
pub use parsing_packer::pack;
|
||||
pub use lz::unpack;
|
||||
pub use lz::unpack;
|
||||
|
||||
pub type ProgressCallback<'a> = &'a mut dyn FnMut(usize);
|
||||
12
src/main.rs
12
src/main.rs
@@ -18,8 +18,18 @@ fn main() -> Result<()> {
|
||||
let packed_data = if fast {
|
||||
upkr::pack_fast(&data)
|
||||
} else {
|
||||
upkr::pack(&data)
|
||||
let mut pb = pbr::ProgressBar::new(data.len() as u64);
|
||||
pb.set_units(pbr::Units::Bytes);
|
||||
let packed_data = upkr::pack(
|
||||
&data,
|
||||
Some(&mut |pos| {
|
||||
pb.set(pos as u64);
|
||||
}),
|
||||
);
|
||||
pb.finish();
|
||||
packed_data
|
||||
};
|
||||
println!("Compressed {} bytes to {} bytes ({}%)", data.len(), packed_data.len(), packed_data.len() as f32 * 100. / data.len() as f32);
|
||||
File::create(outfile)?.write_all(&packed_data)?;
|
||||
}
|
||||
Some("unpack") => {
|
||||
|
||||
@@ -10,6 +10,8 @@ pub struct MatchFinder {
|
||||
max_matches_per_length: usize,
|
||||
patience: usize,
|
||||
max_length_diff: usize,
|
||||
|
||||
queue: BinaryHeap<usize>
|
||||
}
|
||||
|
||||
impl MatchFinder {
|
||||
@@ -43,15 +45,17 @@ impl MatchFinder {
|
||||
suffixes,
|
||||
rev_suffixes,
|
||||
lcp,
|
||||
max_queue_size: 100,
|
||||
max_matches_per_length: 5,
|
||||
patience: 100,
|
||||
max_length_diff: 2,
|
||||
max_queue_size: 1000,
|
||||
max_matches_per_length: 10,
|
||||
patience: 1000,
|
||||
max_length_diff: 4,
|
||||
queue: BinaryHeap::new()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn matches(&self, pos: usize) -> Matches {
|
||||
pub fn matches(&mut self, pos: usize) -> Matches {
|
||||
let index = self.rev_suffixes[pos] as usize;
|
||||
self.queue.clear();
|
||||
let mut matches = Matches {
|
||||
finder: self,
|
||||
pos_range: 0..pos,
|
||||
@@ -62,7 +66,6 @@ impl MatchFinder {
|
||||
current_length: usize::MAX,
|
||||
matches_left: 0,
|
||||
max_length: 0,
|
||||
queue: BinaryHeap::new(),
|
||||
};
|
||||
|
||||
matches.move_left();
|
||||
@@ -73,7 +76,7 @@ impl MatchFinder {
|
||||
}
|
||||
|
||||
pub struct Matches<'a> {
|
||||
finder: &'a MatchFinder,
|
||||
finder: &'a mut MatchFinder,
|
||||
pos_range: Range<usize>,
|
||||
left_index: usize,
|
||||
left_length: usize,
|
||||
@@ -82,7 +85,6 @@ pub struct Matches<'a> {
|
||||
current_length: usize,
|
||||
matches_left: usize,
|
||||
max_length: usize,
|
||||
queue: BinaryHeap<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -95,8 +97,8 @@ impl<'a> Iterator for Matches<'a> {
|
||||
type Item = Match;
|
||||
|
||||
fn next(&mut self) -> Option<Match> {
|
||||
if self.queue.is_empty() || self.matches_left == 0 {
|
||||
self.queue.clear();
|
||||
if self.finder.queue.is_empty() || self.matches_left == 0 {
|
||||
self.finder.queue.clear();
|
||||
self.current_length = self.current_length.saturating_sub(1).min(self.left_length.max(self.right_length));
|
||||
self.max_length = self.max_length.max(self.current_length);
|
||||
if self.current_length < 2
|
||||
@@ -104,16 +106,16 @@ impl<'a> Iterator for Matches<'a> {
|
||||
{
|
||||
return None;
|
||||
}
|
||||
while self.queue.len() < self.finder.max_queue_size
|
||||
while self.finder.queue.len() < self.finder.max_queue_size
|
||||
&& (self.left_length == self.current_length
|
||||
|| self.right_length == self.current_length)
|
||||
{
|
||||
if self.left_length == self.current_length {
|
||||
self.add_to_queue(self.finder.suffixes[self.left_index]);
|
||||
self.finder.queue.push(self.finder.suffixes[self.left_index] as usize);
|
||||
self.move_left();
|
||||
}
|
||||
if self.right_length == self.current_length {
|
||||
self.add_to_queue(self.finder.suffixes[self.right_index]);
|
||||
self.finder.queue.push(self.finder.suffixes[self.right_index] as usize);
|
||||
self.move_right();
|
||||
}
|
||||
}
|
||||
@@ -122,7 +124,7 @@ impl<'a> Iterator for Matches<'a> {
|
||||
|
||||
self.matches_left = self.matches_left.saturating_sub(1);
|
||||
|
||||
self.queue.pop().map(|pos| Match {
|
||||
self.finder.queue.pop().map(|pos| Match {
|
||||
pos,
|
||||
length: self.current_length,
|
||||
})
|
||||
@@ -168,8 +170,4 @@ impl<'a> Matches<'a> {
|
||||
}
|
||||
self.right_length = 0;
|
||||
}
|
||||
|
||||
fn add_to_queue(&mut self, pos: i32) {
|
||||
self.queue.push(pos as usize);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
use std::collections::HashMap;
|
||||
use std::rc::Rc;
|
||||
|
||||
use crate::lz;
|
||||
use crate::{ProgressCallback, lz};
|
||||
use crate::match_finder::MatchFinder;
|
||||
use crate::rans::{CostCounter, RansCoder};
|
||||
|
||||
pub fn pack(data: &[u8]) -> Vec<u8> {
|
||||
let mut parse = parse(data);
|
||||
pub fn pack(data: &[u8], progress_cb: Option<ProgressCallback>) -> Vec<u8> {
|
||||
let mut parse = parse(data, progress_cb);
|
||||
let mut ops = vec![];
|
||||
while let Some(link) = parse {
|
||||
ops.push(link.op);
|
||||
@@ -34,10 +34,10 @@ struct Arrival {
|
||||
|
||||
type Arrivals = HashMap<usize, Vec<Arrival>>;
|
||||
|
||||
const MAX_ARRIVALS: usize = 4;
|
||||
const MAX_ARRIVALS: usize = 256;
|
||||
|
||||
fn parse(data: &[u8]) -> Option<Rc<Parse>> {
|
||||
let match_finder = MatchFinder::new(data);
|
||||
fn parse(data: &[u8], mut progress_cb: Option<ProgressCallback>) -> Option<Rc<Parse>> {
|
||||
let mut match_finder = MatchFinder::new(data);
|
||||
let mut near_matches = [usize::MAX; 1024];
|
||||
let mut last_seen = [usize::MAX; 256];
|
||||
|
||||
@@ -58,18 +58,19 @@ fn parse(data: &[u8]) -> Option<Rc<Parse>> {
|
||||
}
|
||||
fn add_match(
|
||||
arrivals: &mut Arrivals,
|
||||
cost_counter: &mut CostCounter,
|
||||
pos: usize,
|
||||
offset: usize,
|
||||
length: usize,
|
||||
arrival: &Arrival,
|
||||
) {
|
||||
let mut cost_counter = CostCounter(0.);
|
||||
cost_counter.reset();
|
||||
let mut state = arrival.state.clone();
|
||||
let op = lz::Op::Match {
|
||||
offset: offset as u32,
|
||||
len: length as u32,
|
||||
};
|
||||
op.encode(&mut cost_counter, &mut state);
|
||||
op.encode(cost_counter, &mut state);
|
||||
add_arrival(
|
||||
arrivals,
|
||||
pos + length,
|
||||
@@ -79,7 +80,7 @@ fn parse(data: &[u8]) -> Option<Rc<Parse>> {
|
||||
op,
|
||||
})),
|
||||
state,
|
||||
cost: arrival.cost + cost_counter.0,
|
||||
cost: arrival.cost + cost_counter.cost(),
|
||||
},
|
||||
);
|
||||
}
|
||||
@@ -92,6 +93,8 @@ fn parse(data: &[u8]) -> Option<Rc<Parse>> {
|
||||
cost: 0.0,
|
||||
},
|
||||
);
|
||||
|
||||
let cost_counter = &mut CostCounter::new();
|
||||
let mut best_per_offset = HashMap::new();
|
||||
for pos in 0..data.len() {
|
||||
let match_length = |offset: usize| {
|
||||
@@ -117,8 +120,8 @@ fn parse(data: &[u8]) -> Option<Rc<Parse>> {
|
||||
*per_offset = per_offset.min(arrival.cost);
|
||||
}
|
||||
|
||||
for arrival in here_arrivals {
|
||||
if arrival.cost > (best_cost + 32.0).min(*best_per_offset.get(&arrival.state.last_offset()).unwrap()) {
|
||||
'arrival_loop: for arrival in here_arrivals {
|
||||
if arrival.cost > (best_cost + 16.0).min(*best_per_offset.get(&arrival.state.last_offset()).unwrap()) {
|
||||
continue;
|
||||
}
|
||||
let mut found_last_offset = false;
|
||||
@@ -127,10 +130,13 @@ fn parse(data: &[u8]) -> Option<Rc<Parse>> {
|
||||
closest_match = Some(closest_match.unwrap_or(0).max(m.pos));
|
||||
let offset = pos - m.pos;
|
||||
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
||||
add_match(&mut arrivals, pos, offset, m.length, &arrival);
|
||||
add_match(&mut arrivals, cost_counter, pos, offset, m.length, &arrival);
|
||||
if m.length > 64 {
|
||||
break 'arrival_loop;
|
||||
}
|
||||
}
|
||||
|
||||
let mut near_matches_left = 4;
|
||||
let mut near_matches_left = 8;
|
||||
let mut match_pos = last_seen[data[pos] as usize];
|
||||
while near_matches_left > 0
|
||||
&& match_pos != usize::MAX
|
||||
@@ -139,7 +145,7 @@ fn parse(data: &[u8]) -> Option<Rc<Parse>> {
|
||||
let offset = pos - match_pos;
|
||||
let length = match_length(offset);
|
||||
assert!(length > 0);
|
||||
add_match(&mut arrivals, pos, offset, length, &arrival);
|
||||
add_match(&mut arrivals, cost_counter, pos, offset, length, &arrival);
|
||||
found_last_offset |= offset as u32 == arrival.state.last_offset();
|
||||
if offset < near_matches.len() {
|
||||
match_pos = near_matches[match_pos % near_matches.len()];
|
||||
@@ -151,14 +157,14 @@ fn parse(data: &[u8]) -> Option<Rc<Parse>> {
|
||||
let offset = arrival.state.last_offset() as usize;
|
||||
let length = match_length(offset);
|
||||
if length > 0 {
|
||||
add_match(&mut arrivals, pos, offset, length, &arrival);
|
||||
add_match(&mut arrivals, cost_counter, pos, offset, length, &arrival);
|
||||
}
|
||||
}
|
||||
|
||||
let mut cost_counter = CostCounter(0.);
|
||||
cost_counter.reset();
|
||||
let mut state = arrival.state;
|
||||
let op = lz::Op::Literal(data[pos]);
|
||||
op.encode(&mut cost_counter, &mut state);
|
||||
op.encode(cost_counter, &mut state);
|
||||
add_arrival(
|
||||
&mut arrivals,
|
||||
pos + 1,
|
||||
@@ -168,12 +174,15 @@ fn parse(data: &[u8]) -> Option<Rc<Parse>> {
|
||||
op,
|
||||
})),
|
||||
state,
|
||||
cost: arrival.cost + cost_counter.0,
|
||||
cost: arrival.cost + cost_counter.cost(),
|
||||
},
|
||||
);
|
||||
}
|
||||
near_matches[pos % near_matches.len()] = last_seen[data[pos] as usize];
|
||||
last_seen[data[pos] as usize] = pos;
|
||||
if let Some(ref mut cb) = progress_cb {
|
||||
cb(pos + 1);
|
||||
}
|
||||
}
|
||||
arrivals.remove(&data.len()).unwrap()[0].parse.clone()
|
||||
}
|
||||
|
||||
37
src/rans.rs
37
src/rans.rs
@@ -57,13 +57,42 @@ impl RansCoder {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct CostCounter(pub f64);
|
||||
pub struct CostCounter {
|
||||
cost: f64,
|
||||
log2_table: Vec<f64>,
|
||||
}
|
||||
|
||||
impl CostCounter {
|
||||
pub fn new() -> CostCounter {
|
||||
let log2_table = (0..ONE_PROB)
|
||||
.map(|prob| {
|
||||
let inv_prob = ONE_PROB as f64 / prob as f64;
|
||||
inv_prob.log2()
|
||||
})
|
||||
.collect();
|
||||
CostCounter {
|
||||
cost: 0.0,
|
||||
log2_table,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn cost(&self) -> f64 {
|
||||
self.cost
|
||||
}
|
||||
|
||||
pub fn reset(&mut self) {
|
||||
self.cost = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
impl EntropyCoder for CostCounter {
|
||||
fn encode_bit(&mut self, bit: bool, prob: u16) {
|
||||
let prob = if bit { prob as u32 } else { ONE_PROB - prob as u32 };
|
||||
let inv_prob = ONE_PROB as f64 / prob as f64;
|
||||
self.0 += inv_prob.log2();
|
||||
let prob = if bit {
|
||||
prob as u32
|
||||
} else {
|
||||
ONE_PROB - prob as u32
|
||||
};
|
||||
self.cost += self.log2_table[prob as usize];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user