From 93b2bb60bd11bbd5351934d484e679aa231aad8e Mon Sep 17 00:00:00 2001 From: Dennis Ranke Date: Sat, 20 Nov 2021 23:36:16 +0100 Subject: [PATCH] implement upkr unpacker in wasm, use to load compressed base --- platform/loader.cwa | 122 ++++++++++++++++++++++++++++++++++-- src/lib.rs | 12 +++- uw8-tool/src/base_module.rs | 7 +++ uw8-tool/src/lib.rs | 5 ++ uw8-tool/src/main.rs | 14 ++--- web/src/main.js | 8 ++- 6 files changed, 149 insertions(+), 19 deletions(-) create mode 100644 uw8-tool/src/lib.rs diff --git a/platform/loader.cwa b/platform/loader.cwa index cc8b56b..9954709 100644 --- a/platform/loader.cwa +++ b/platform/loader.cwa @@ -1,8 +1,8 @@ -import "env.memory" memory(8); +import "env.memory" memory(9); export fn load_uw8(module_start: i32, module_end: i32, base_start: i32, base_end: i32) -> i32 { if ?module_start == 0 { - let defer length = module_end - module_start; + let lazy length = module_end - module_start; copy(base_end, module_start, length); return base_end + length; } @@ -14,7 +14,7 @@ export fn load_uw8(module_start: i32, module_end: i32, base_start: i32, base_end loop sections { if src < module_end & (base_start >= base_end | ?src <= ?base_start) { - let defer length2 = copy_section(dest, src); + let lazy length2 = copy_section(dest, src); dest = dest + length2; if base_start < base_end & ?src == ?base_start { base_start = base_start + section_size(base_start); @@ -24,7 +24,7 @@ export fn load_uw8(module_start: i32, module_end: i32, base_start: i32, base_end } if base_start < base_end { - let defer length3 = copy_section(dest, base_start); + let lazy length3 = copy_section(dest, base_start); dest = dest + length3; base_start = base_start + length3; branch sections; @@ -39,7 +39,7 @@ fn section_size(ptr: i32) -> i32 { let l = 0; let shift = 0; loop size { - let defer b = ?p; + let lazy b = ?p; l = l | ((b & 127) << shift); shift = shift + 7; p = p + 1; @@ -49,7 +49,7 @@ fn section_size(ptr: i32) -> i32 { } fn copy_section(dest: i32, src: i32) -> i32 { - let defer length = section_size(src); + let lazy length = section_size(src); copy(dest, src, length); length } @@ -61,4 +61,114 @@ fn copy(dest: i32, src: i32, len: i32) { branch_if len: bytes } } +} + +// upkr unpacker + +global mut upkr_src_ptr: i32 = 0; +global mut upkr_code: i64 = 0i64; +global mut upkr_low: i64 = 0i64; +global mut upkr_range: i64 = 0i64; + +// uncompress upkr compressed data at `src` into the buffer at `dest` +// returns the end of the uncompressed data +export fn uncompress(src_ptr: i32, dest_ptr: i32) -> i32 { + upkr_src_ptr = src_ptr; + upkr_code = 0i64; + upkr_low = 0i64; + upkr_range = 1i64; + + let offset: i32; + + let byte: i32; + + let i: i32; + loop init_contexts { + i!0x80000 = 0x8000; + branch_if (i := i + 4) < (256 + 1 + 128) * 4: init_contexts + } + + block finished { + loop unpack_loop { + if upkr_bit(0) { + if upkr_bit(256) { + offset = upkr_length(257) - 1; + branch_if !offset: finished + } + let length = upkr_length(257 + 64); + loop copy { + dest_ptr?0 = (dest_ptr - offset)?0; + dest_ptr = dest_ptr + 1; + branch_if (length := length - 1): copy; + } + } else { + // literal + i = 0; + byte = 1; + loop literal { + byte = (byte << 1) | upkr_bit(byte); + branch_if (i := i + 1) < 8: literal; + } + dest_ptr?0 = byte; + dest_ptr = dest_ptr + 1; + } + branch unpack_loop; + } + } + + dest_ptr +} + +fn upkr_length(context_index: i32) -> i32 { + let length: i32; + let bit_pos: i32; + loop bits { + if upkr_bit(context_index) { + length = length | (upkr_bit(context_index + 1) << bit_pos); + context_index = context_index + 2; + bit_pos = bit_pos + 1; + branch bits; + } + } + length | (1 << bit_pos) +} + +fn upkr_bit(context_index: i32) -> i32 { + let prob = ((context_index * 4)!0x80000) as i64; + + loop refill { + if upkr_low >> 32i64 == (upkr_low + upkr_range - 1i64) >> 32i64 { + upkr_append_byte(); + branch refill; + } + } + + if upkr_range < (1i64 << 24i64) { + upkr_append_byte(); + upkr_append_byte(); + upkr_range = (1i64 << 40i64) - upkr_low; + } + + let range = upkr_range / 65536i64; + let bit = (upkr_code - upkr_low) / range < prob; + + if bit { + upkr_range = range * prob; + prob = prob + (((1i64 << 16i64) - prob) >> 4i64); + } else { + upkr_low = upkr_low + range * prob; + upkr_range = range * (65536i64 - prob); + prob = prob - (prob >> 4i64); + } + + (context_index * 4)!0x80000 = prob as i32; + + bit +} + +fn upkr_append_byte() { + upkr_code = ((upkr_code & i64.extend_i32_u(-1)) << 8i64) | (?upkr_src_ptr) as i64; + upkr_src_ptr = upkr_src_ptr + 1; + upkr_low = (upkr_low & i64.extend_i32_u(-1)) << 8i64; + upkr_range = upkr_range << 8i64; } \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 45ec8d5..b6c3053 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -178,12 +178,20 @@ impl Loader { fn load(&mut self, module_data: &[u8]) -> Result> { let memory = self.memory.data_mut(&mut self.store); + let compressed_base_module = include_bytes!("../uw8-tool/base.upk"); + memory[..compressed_base_module.len()].copy_from_slice(compressed_base_module); + + let base_end = self.instance.get_typed_func::<(i32, i32), i32, _>(&mut self.store, "uncompress")?.call(&mut self.store, (0, 0x84000))? as u32 as usize; + + let memory = self.memory.data_mut(&mut self.store); + + let base_module = memory[0x84000..base_end].to_vec(); + let base_start = module_data.len(); memory[..base_start].copy_from_slice(module_data); - let base_module = include_bytes!("../uw8-tool/base1.wasm"); let base_end = base_start + base_module.len(); - memory[base_start..base_end].copy_from_slice(base_module); + memory[base_start..base_end].copy_from_slice(&base_module); let load_uw8 = self .instance diff --git a/uw8-tool/src/base_module.rs b/uw8-tool/src/base_module.rs index e7cd75b..3ae0903 100644 --- a/uw8-tool/src/base_module.rs +++ b/uw8-tool/src/base_module.rs @@ -192,6 +192,13 @@ impl BaseModule { File::create(path)?.write_all(&self.to_wasm())?; Ok(()) } + + pub fn create_binary(path: &Path) -> Result<()> { + let base1 = BaseModule::for_format_version(1)?.to_wasm(); + let data = upkr::pack(&base1); + File::create(path)?.write_all(&data)?; + Ok(()) + } } fn add_function( diff --git a/uw8-tool/src/lib.rs b/uw8-tool/src/lib.rs new file mode 100644 index 0000000..e25733f --- /dev/null +++ b/uw8-tool/src/lib.rs @@ -0,0 +1,5 @@ +mod base_module; +pub mod pack; + +pub use base_module::BaseModule; +pub use pack::{pack_file, unpack, unpack_file}; \ No newline at end of file diff --git a/uw8-tool/src/main.rs b/uw8-tool/src/main.rs index 766a1e0..b643e8e 100644 --- a/uw8-tool/src/main.rs +++ b/uw8-tool/src/main.rs @@ -1,10 +1,7 @@ -mod base_module; -mod pack; - use std::path::PathBuf; use anyhow::Result; -use base_module::BaseModule; +use uw8_tool::BaseModule; use pico_args::Arguments; fn main() -> Result<()> { @@ -13,20 +10,19 @@ fn main() -> Result<()> { if let Some(cmd) = args.subcommand()? { match cmd.as_str() { "make-base" => { - let version: u8 = args.free_from_str()?; - BaseModule::for_format_version(version)? - .write_to_file(format!("base{}.wasm", version))?; + let path: PathBuf = args.free_from_str()?; + BaseModule::create_binary(&path)?; } "pack" => { let version: u8 = args.opt_value_from_str(["-v", "--version"])?.unwrap_or(1); let source: PathBuf = args.free_from_str()?; let dest: PathBuf = args.free_from_str()?; - pack::pack_file(&source, &dest, version)?; + uw8_tool::pack_file(&source, &dest, version)?; } "unpack" => { let source: PathBuf = args.free_from_str()?; let dest: PathBuf = args.free_from_str()?; - pack::unpack_file(&source, &dest)?; + uw8_tool::unpack_file(&source, &dest)?; } _ => { eprintln!("Unknown subcommand '{}'", cmd); diff --git a/web/src/main.js b/web/src/main.js index d9d4427..9106c01 100644 --- a/web/src/main.js +++ b/web/src/main.js @@ -1,6 +1,6 @@ import loaderUrl from "data-url:../../platform/loader.wasm"; import platformUrl from "data-url:../../platform/platform.wasm"; -import baseUrl from "data-url:../../uw8-tool/base1.wasm"; +import baseUrl from "data-url:../../uw8-tool/base.upk"; async function loadWasm(url, imports) { let wasm_module = await (await fetch(url)).arrayBuffer(); @@ -66,9 +66,13 @@ async function runModule(data) { let loadMem = loaderImport.env.memory.buffer; let loader = await loadWasm(loaderUrl, loaderImport); - let baseModule = await (await fetch(baseUrl)).arrayBuffer(); + let packedBaseModule = await (await fetch(baseUrl)).arrayBuffer(); if (dataU8Array[0] != 0) { + new Uint8Array(loadMem).set(new Uint8Array(packedBaseModule)); + let baseEnd = loader.exports.uncompress(0, 0x84000); + let baseModule = loadMem.slice(0x84000, baseEnd); + new Uint8Array(loadMem).set(dataU8Array); new Uint8Array(loadMem).set(new Uint8Array(baseModule), data.byteLength);