1 Commits

Author SHA1 Message Date
130bf821fa implement dictionary support when packing 2025-03-11 10:53:40 +01:00
15 changed files with 239 additions and 701 deletions

375
Cargo.lock generated
View File

@@ -1,33 +1,30 @@
# This file is automatically @generated by Cargo. # This file is automatically @generated by Cargo.
# It is not intended for manual editing. # It is not intended for manual editing.
version = 4 version = 3
[[package]] [[package]]
name = "anyhow" name = "anyhow"
version = "1.0.98" version = "1.0.47"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" checksum = "38d9ff5d688f1c13395289f67db01d4826b46dd694e7580accdc3e8430f2d98e"
[[package]] [[package]]
name = "autocfg" name = "autocfg"
version = "1.5.0" version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.9.1" version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.2.29" version = "1.0.72"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c1599538de2394445747c8cf7935946e3cc27e9625f889d979bfb2aaf569362" checksum = "22a9137b95ea06864e018375b72adfb7db6e6f68cfc8df5a04d00288050485ee"
dependencies = [
"shlex",
]
[[package]] [[package]]
name = "cdivsufsort" name = "cdivsufsort"
@@ -41,104 +38,118 @@ dependencies = [
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.1" version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]] [[package]]
name = "crossbeam-channel" name = "crossbeam-channel"
version = "0.5.15" version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
dependencies = [ dependencies = [
"cfg-if",
"crossbeam-utils", "crossbeam-utils",
] ]
[[package]] [[package]]
name = "crossbeam-utils" name = "crossbeam-utils"
version = "0.8.21" version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
dependencies = [
"cfg-if",
"lazy_static",
]
[[package]] [[package]]
name = "crossterm" name = "crossterm"
version = "0.29.0" version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67"
dependencies = [ dependencies = [
"bitflags", "bitflags",
"document-features", "crossterm_winapi",
"parking_lot",
"rustix",
]
[[package]]
name = "document-features"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95249b50c6c185bee49034bcb378a49dc2b5dff0be90ff6616d31d64febab05d"
dependencies = [
"litrs",
]
[[package]]
name = "errno"
version = "0.3.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad"
dependencies = [
"libc", "libc",
"windows-sys", "mio",
"parking_lot",
"signal-hook",
"signal-hook-mio",
"winapi",
] ]
[[package]]
name = "crossterm_winapi"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ae1b35a484aa10e07fe0638d02301c5ad24de82d310ccbd2f3693da5f09bf1c"
dependencies = [
"winapi",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]] [[package]]
name = "lexopt" name = "lexopt"
version = "0.3.1" version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fa0e2a1fcbe2f6be6c42e342259976206b383122fc152e872795338b5a3f3a7" checksum = "478ee9e62aaeaf5b140bd4138753d1f109765488581444218d3ddda43234f3e8"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.174" version = "0.2.135"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" checksum = "68783febc7782c6c5cb401fbda4de5a9898be1762314da0bb2c10ced61f18b0c"
[[package]]
name = "linux-raw-sys"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
[[package]]
name = "litrs"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5"
[[package]] [[package]]
name = "lock_api" name = "lock_api"
version = "0.4.13" version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df"
dependencies = [ dependencies = [
"autocfg", "autocfg",
"scopeguard", "scopeguard",
] ]
[[package]] [[package]]
name = "num-traits" name = "log"
version = "0.2.19" version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
[[package]]
name = "mio"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf"
dependencies = [
"libc",
"log",
"wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.36.1",
]
[[package]]
name = "num-traits"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
dependencies = [ dependencies = [
"autocfg", "autocfg",
] ]
[[package]] [[package]]
name = "parking_lot" name = "parking_lot"
version = "0.12.4" version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
dependencies = [ dependencies = [
"lock_api", "lock_api",
"parking_lot_core", "parking_lot_core",
@@ -146,68 +157,56 @@ dependencies = [
[[package]] [[package]]
name = "parking_lot_core" name = "parking_lot_core"
version = "0.9.11" version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" checksum = "4dc9e0dc2adc1c69d09143aff38d3d30c5c3f0df0dad82e6d25547af174ebec0"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"libc", "libc",
"redox_syscall", "redox_syscall",
"smallvec", "smallvec",
"windows-targets 0.52.6", "windows-sys 0.42.0",
] ]
[[package]] [[package]]
name = "pbr" name = "pbr"
version = "1.1.1" version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed5827dfa0d69b6c92493d6c38e633bbaa5937c153d0d7c28bf12313f8c6d514" checksum = "ff5751d87f7c00ae6403eb1fcbba229b9c76c9a30de8c1cf87182177b168cea2"
dependencies = [ dependencies = [
"crossbeam-channel", "crossbeam-channel",
"libc", "libc",
"time",
"winapi", "winapi",
] ]
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.95" version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" checksum = "7bd7356a8122b6c4a24a82b278680c73357984ca2fc79a0f9fa6dea7dced7c58"
dependencies = [ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.40" version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.5.13" version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
dependencies = [ dependencies = [
"bitflags", "bitflags",
] ]
[[package]]
name = "rustix"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8"
dependencies = [
"bitflags",
"errno",
"libc",
"linux-raw-sys",
"windows-sys",
]
[[package]] [[package]]
name = "sacabase" name = "sacabase"
version = "2.0.0" version = "2.0.0"
@@ -219,27 +218,51 @@ dependencies = [
[[package]] [[package]]
name = "scopeguard" name = "scopeguard"
version = "1.2.0" version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]] [[package]]
name = "shlex" name = "signal-hook"
version = "1.3.0" version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d"
dependencies = [
"libc",
"signal-hook-registry",
]
[[package]]
name = "signal-hook-mio"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af"
dependencies = [
"libc",
"mio",
"signal-hook",
]
[[package]]
name = "signal-hook-registry"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "smallvec" name = "smallvec"
version = "1.15.1" version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.104" version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" checksum = "e90cde112c4b9690b8cbe810cba9ddd8bc1d7472e2cae317b69e9438c1cba7d2"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@@ -248,18 +271,18 @@ dependencies = [
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "2.0.12" version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" checksum = "0a99cb8c4b9a8ef0e7907cd3b617cc8dc04d571c4e73c8ae403d80ac160bb122"
dependencies = [ dependencies = [
"thiserror-impl", "thiserror-impl",
] ]
[[package]] [[package]]
name = "thiserror-impl" name = "thiserror-impl"
version = "2.0.12" version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" checksum = "3a891860d3c8d66fec8e73ddb3765f90082374dbaaa833407b904a94f1a7eb43"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@@ -267,14 +290,25 @@ dependencies = [
] ]
[[package]] [[package]]
name = "unicode-ident" name = "time"
version = "1.0.18" version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
dependencies = [
"libc",
"wasi 0.10.0+wasi-snapshot-preview1",
"winapi",
]
[[package]]
name = "unicode-ident"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcc811dc4066ac62f84f11307873c4850cb653bfa9b1719cee2bd2204a4bc5dd"
[[package]] [[package]]
name = "upkr" name = "upkr"
version = "0.2.3" version = "0.2.2"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"cdivsufsort", "cdivsufsort",
@@ -284,6 +318,18 @@ dependencies = [
"thiserror", "thiserror",
] ]
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]] [[package]]
name = "winapi" name = "winapi"
version = "0.3.9" version = "0.3.9"
@@ -308,137 +354,100 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.60.2" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
dependencies = [ dependencies = [
"windows-targets 0.53.2", "windows_aarch64_msvc 0.36.1",
"windows_i686_gnu 0.36.1",
"windows_i686_msvc 0.36.1",
"windows_x86_64_gnu 0.36.1",
"windows_x86_64_msvc 0.36.1",
] ]
[[package]] [[package]]
name = "windows-targets" name = "windows-sys"
version = "0.52.6" version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
dependencies = [ dependencies = [
"windows_aarch64_gnullvm 0.52.6", "windows_aarch64_gnullvm",
"windows_aarch64_msvc 0.52.6", "windows_aarch64_msvc 0.42.0",
"windows_i686_gnu 0.52.6", "windows_i686_gnu 0.42.0",
"windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.42.0",
"windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.42.0",
"windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm",
"windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.42.0",
"windows_x86_64_msvc 0.52.6",
]
[[package]]
name = "windows-targets"
version = "0.53.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef"
dependencies = [
"windows_aarch64_gnullvm 0.53.0",
"windows_aarch64_msvc 0.53.0",
"windows_i686_gnu 0.53.0",
"windows_i686_gnullvm 0.53.0",
"windows_i686_msvc 0.53.0",
"windows_x86_64_gnu 0.53.0",
"windows_x86_64_gnullvm 0.53.0",
"windows_x86_64_msvc 0.53.0",
] ]
[[package]] [[package]]
name = "windows_aarch64_gnullvm" name = "windows_aarch64_gnullvm"
version = "0.52.6" version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
[[package]] [[package]]
name = "windows_aarch64_msvc" name = "windows_aarch64_msvc"
version = "0.52.6" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
[[package]] [[package]]
name = "windows_aarch64_msvc" name = "windows_aarch64_msvc"
version = "0.53.0" version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4"
[[package]] [[package]]
name = "windows_i686_gnu" name = "windows_i686_gnu"
version = "0.52.6" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
[[package]] [[package]]
name = "windows_i686_gnu" name = "windows_i686_gnu"
version = "0.53.0" version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_gnullvm"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
[[package]] [[package]]
name = "windows_i686_msvc" name = "windows_i686_msvc"
version = "0.52.6" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
[[package]] [[package]]
name = "windows_i686_msvc" name = "windows_i686_msvc"
version = "0.53.0" version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246"
[[package]] [[package]]
name = "windows_x86_64_gnu" name = "windows_x86_64_gnu"
version = "0.52.6" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
[[package]] [[package]]
name = "windows_x86_64_gnu" name = "windows_x86_64_gnu"
version = "0.53.0" version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed"
[[package]] [[package]]
name = "windows_x86_64_gnullvm" name = "windows_x86_64_gnullvm"
version = "0.52.6" version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.53.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
[[package]] [[package]]
name = "windows_x86_64_msvc" name = "windows_x86_64_msvc"
version = "0.52.6" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
[[package]] [[package]]
name = "windows_x86_64_msvc" name = "windows_x86_64_msvc"
version = "0.53.0" version = "0.42.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5"

View File

@@ -1,10 +1,7 @@
[package] [package]
name = "upkr" name = "upkr"
version = "0.2.3" version = "0.2.2"
edition = "2024" edition = "2021"
description = "Simple LZ packer with relatively small unpackers"
license = "Unlicense"
reepository = "https://github.com/exoticorn/upkr"
[profile.release] [profile.release]
strip = "debuginfo" strip = "debuginfo"
@@ -14,8 +11,8 @@ terminal = ["crossterm", "pbr"]
[dependencies] [dependencies]
cdivsufsort = "2" cdivsufsort = "2"
lexopt = "0.3.1" lexopt = "0.2.1"
anyhow = "1" anyhow = "1"
thiserror = "2.0.12" thiserror = "1.0.36"
pbr = { version = "1", optional = true } pbr = { version = "1", optional = true }
crossterm = { version = "0.29.0", default-features = false, optional = true } crossterm = { version = "0.25.0", default-features = false, optional = true }

View File

@@ -27,7 +27,6 @@ The 16 bit dos unpacker also uses some variations. (`upkr --x86`)
* [Atari Lynx](https://github.com/42Bastian/new_bll/blob/master/demos/depacker/unupkr.asm) * [Atari Lynx](https://github.com/42Bastian/new_bll/blob/master/demos/depacker/unupkr.asm)
* [Atari Jaguar](https://github.com/42Bastian/new_bjl/blob/main/exp/depacker/unupkr.js) * [Atari Jaguar](https://github.com/42Bastian/new_bjl/blob/main/exp/depacker/unupkr.js)
* [8080, R800](https://github.com/ivagorRetrocomp/DeUpkr) * [8080, R800](https://github.com/ivagorRetrocomp/DeUpkr)
* [6502](https://github.com/pfusik/upkr6502)
## Usage ## Usage

BIN
README.md.upk Normal file

Binary file not shown.

View File

@@ -23,13 +23,6 @@ build/unpack_riscv32.o: unpack_riscv.S
build/unpack_riscv32.bin: build/unpack_riscv32.o build/unpack_riscv32.bin: build/unpack_riscv32.o
riscv64-linux-gnu-objcopy -O binary --only-section=.text $? $@ riscv64-linux-gnu-objcopy -O binary --only-section=.text $? $@
build/unpack_riscv32nc.o: unpack_riscv.S
mkdir -p build
riscv64-linux-gnu-gcc -march=rv32im -mabi=ilp32 -c -o $@ $?
build/unpack_riscv32nc.bin: build/unpack_riscv32nc.o
riscv64-linux-gnu-objcopy -O binary --only-section=.text $? $@
disas-riscv32: build/unpack_riscv32.o disas-riscv32: build/unpack_riscv32.o
riscv64-linux-gnu-objdump -d $? riscv64-linux-gnu-objdump -d $?
@@ -68,4 +61,4 @@ test_c: build/unpack_c
cmp test_data.bin /tmp/out.bin cmp test_data.bin /tmp/out.bin
sizes: build/unpack_armv6m.bin build/unpack_riscv64.bin build/unpack_riscv32.bin build/unpack_arm32.bin sizes: build/unpack_armv6m.bin build/unpack_riscv64.bin build/unpack_riscv32.bin build/unpack_arm32.bin
ls -l build/*.bin ls -l build/*.bin

View File

@@ -1,190 +0,0 @@
;;; -*-asm-*-
;;; ukpr unpacker for Atari Jaguar RISC.
;;; lyxass syntax
; input:
;;; R20 : packed buffer
;;; R21 : output buffer
;;; r30 : return address
;;;
;;; Register usage (destroyed!)
;;; r0-r17,r20,r21
;;;
DST REG 21
SRC REG 20
REGTOP 16
LR_save REG 99
LR_save2 REG 99
GETBIT REG 99
GETLENGTH REG 99
LITERAL REG 99
LOOP REG 99
index REG 99
bit_pos REG 99
state REG 99
prev_was_match REG 99
offset REG 99
prob reg 99
byte REG 99
PROBS reg 99
tmp2 reg 2
tmp1 REG 1
tmp0 REG 0
REGMAP
upkr_probs equ $200
SIZEOF_PROBS EQU 1+255+1+2*32+2*32
unupkr::
move LR,LR_save
moveq #0,tmp0
movei #upkr_probs,PROBS
bset #7,tmp0
movei #SIZEOF_PROBS,tmp2
move PROBS,tmp1
.init storeb tmp0,(tmp1)
subq #1,tmp2
jr pl,.init
addq #1,tmp1
moveq #0,offset
moveq #0,state
movei #getlength,GETLENGTH
movei #getbit,GETBIT
.looppc move PC,LOOP
addq #.loop-.looppc,LOOP
move pc,LITERAL
jr .start
addq #6,LITERAL
.literal
moveq #1,byte
move pc,LR
jr .into
addq #6,LR ; LR = .getbit
.getbit
addc byte,byte
.into
btst #8,byte
jump eq,(GETBIT)
move byte,index
storeb byte,(DST)
addq #1,DST
.start
moveq #0,prev_was_match
.loop
moveq #0,index
BL (GETBIT)
jump cc,(LITERAL)
addq #14,LR
cmpq #1,prev_was_match
jr eq,.newoff
shlq #8,r0
jump (GETBIT)
move r0,index
jr cc,.oldoff
shlq #8,r0
.newoff
addq #1,r0 ; r0 = 257
BL (GETLENGTH)
subq #1,r0
jump eq,(LR_save)
move r0,offset
.oldoff
movei #257+64,r0
BL (GETLENGTH)
move DST,r1
sub offset,r1
.cpymatch1
loadb (r1),r2
subq #1,r0
addqt #1,r1
storeb r2,(DST)
jr ne,.cpymatch1
addq #1,DST
jump (LOOP)
moveq #1,prev_was_match
getlength:
move LR,LR_save2
moveq #0,byte
move r0,index
moveq #0,bit_pos
move pc,LR
jump (GETBIT)
addq #6,LR
.gl
jr cc,.exit
addq #8,LR ; => return to "sh ..."
jump (GETBIT)
nop
sh bit_pos,r0
subq #1,bit_pos ; sh < 0 => shift left!
or r0,byte
jump (GETBIT)
subq #8,LR
.exit
moveq #1,r0
sh bit_pos,r0
jump (LR_save2)
or byte,r0
.newbyte:
loadb (SRC),r2
shlq #8,state
addq #1,SRC
or r2,state
getbit
move state,r2
move PROBS,r1
add index,r1 ; r1 = &probs[index]
shrq #12,r2
loadb (r1),prob
jr eq,.newbyte
move state,r2
move state,r0
shlq #24,r2
shrq #8,r0 ; sh
shrq #24,r2 ; sl
cmp prob,r2
addqt #1,index
jr cs,.one
mult prob,r0
;; state -= ((state >> 8) + 1)*prob
;; prob -= (prob+8)>>4
move prob,r2
add prob,r0
addq #8,r2
sub r0,state
shrq #4,r2
moveq #0,r0
jr .ret
sub r2,prob
.one
;; state = (state >> 8)*prob+(state & 0xff)
;; prob += (256 + 8 - prob) >> 4
move r2,state
movei #256+8,r2
add r0,state
sub prob,r2 ; 256-prob+8
shrq #4,r2
add r2,prob
moveq #3,r0
.ret
storeb prob,(r1)
jump (LR)
shrq #1,r0 ; C = 0, r0 = 1

View File

@@ -1,217 +0,0 @@
;;; -*-asm-*-
;;; ukpr unpacker for Atari Jaguar RISC. (quick version)
;;; lyxass syntax
; input:
;;; R20 : packed buffer
;;; R21 : output buffer
;;; r30 : return address
;;;
;;; Register usage (destroyed!)
;;; r0-r17,r20,r21
;;;
DST REG 21
SRC REG 20
REGTOP 17
LR_save REG 99
LR_save2 REG 99
GETBIT REG 99
GETLENGTH REG 99
LITERAL REG 99
LOOP REG 99
index REG 99
bit_pos REG 99
state REG 99
prev_was_match REG 99
offset REG 99
prob reg 99
byte REG 99
ndata reg 99
PROBS reg 99
tmp2 reg 2
tmp1 REG 1
tmp0 REG 0
REGMAP
upkr_probs equ $200
SIZEOF_PROBS EQU 1+255+1+2*32+2*32
unupkr::
move LR,LR_save
movei #$80808080,tmp0
movei #upkr_probs,PROBS
movei #SIZEOF_PROBS,tmp2
move PROBS,tmp1
.init store tmp0,(tmp1)
subq #4,tmp2
jr pl,.init
addq #4,tmp1
loadb (SRC),ndata
addq #1,SRC
moveq #0,offset
moveq #0,state
movei #getlength,GETLENGTH
movei #getbit,GETBIT
.looppc move PC,LOOP
addq #.loop-.looppc,LOOP
move pc,LITERAL
jr .start
addq #6,LITERAL
.literal
moveq #1,byte
move pc,LR
jr .into
addq #6,LR ; LR = .getbit
.getbit
addc byte,byte
.into
btst #8,byte
jump eq,(GETBIT)
move byte,index
storeb byte,(DST)
addq #1,DST
.start
moveq #0,prev_was_match
.loop
moveq #0,index
BL (GETBIT)
jump cc,(LITERAL)
addq #14,LR
cmpq #1,prev_was_match
jr eq,.newoff
shlq #8,r0
jump (GETBIT)
move r0,index
jr cc,.oldoff
shlq #8,r0
.newoff
addq #1,r0 ; r0 = 257
BL (GETLENGTH)
subq #1,r0
move r0,offset
jump eq,(LR_save)
nop
.oldoff
movei #257+64,r0
BL (GETLENGTH)
move DST,r2
move DST,r1
or offset,r2
btst #0,r2
moveq #1,prev_was_match
jr ne,.cpymatch1
sub offset,r1
.cpymatch2
loadw (r1),r2
addqt #2,r1
subq #2,r0
storew r2,(DST)
jump eq,(LOOP)
addqt #2,DST
jr pl,.cpymatch2
nop
jump (LOOP)
subq #1,DST
.cpymatch1
loadb (r1),r2
subq #1,r0
addqt #1,r1
storeb r2,(DST)
jr ne,.cpymatch1
addq #1,DST
jump (LOOP)
//-> nop
getlength:
move LR,LR_save2
moveq #0,byte
move r0,index
moveq #0,bit_pos
move pc,LR
jump (GETBIT)
addq #6,LR
.gl
jr cc,.exit
addq #8,LR ; => return to "sh ..."
jump (GETBIT)
nop
sh bit_pos,r0
subq #1,bit_pos ; sh < 0 => shift left!
or r0,byte
jump (GETBIT)
subq #8,LR
.exit
moveq #1,r0
sh bit_pos,r0
jump (LR_save2)
or byte,r0
.newbyte:
move ndata,r2
shlq #8,state
loadb (SRC),ndata
or r2,state
addq #1,SRC
move state,r2
shrq #12,r2
jr ne,.done
move state,r2
jr .newbyte
getbit
move state,r2
move PROBS,r1
add index,r1 ; r1 = &probs[index]
shrq #12,r2
loadb (r1),prob
jr eq,.newbyte
move state,r2
.done
move state,r0
shlq #24,r2
shrq #8,r0 ; sh
shrq #24,r2 ; sl
cmp prob,r2
addqt #1,index
jr cs,.one
mult prob,r0
;; state -= ((state >> 8) + 1)*prob
;; prob -= (prob+8)>>4
move prob,r2
add prob,r0
addq #8,r2
sub r0,state
shrq #4,r2
moveq #0,r0
sub r2,prob
shrq #1,r0 ; C = 0, r0 = 0
jump (LR)
storeb prob,(r1)
.one
;; state = (state >> 8)*prob+(state & 0xff)
;; prob += (256 + 8 - prob) >> 4
move r2,state
movei #256+8,r2
add r0,state
sub prob,r2 ; 256-prob+8
shrq #4,r2
add r2,prob
moveq #3,r0
storeb prob,(r1)
jump (LR)
shrq #1,r0 ; C = 0, r0 = 1

View File

@@ -1,56 +1,3 @@
/*
A simple C unpacker for upkr compressed data.
This implements two variants, selected by the UPKR_BITSTREAM define:
- normal: faster and smaller on modern hardware as whole bytes are shifted into
the rANS state at a time, but requires 20bits for the state
- bitstream: only single bits are shifted into the rANS state at a time
which allows the state to always fit in 16bits which is a boon
on very old CPUs.
The encoder and decoder need to be configured to use the same varianet.
upkr compressed data is a rANS byte-/bit-stream encoding a series of literal
byte values and back-references as probability encoded bits.
upkr_decode_bit reads one bit from the rANS stream, taking a probability context
as parameter. The probability context is a byte estimating the probability of
a bit encoded in this context being set. It is updated by upkr_decode_bit
after each decoded bit to reflect the observed past frequencies of on/off bits.
There are a number of different contexts used in the compressed format. The order in the
upkr_probs array is arbitrary, the only requirement for the unpacker is that all bits
that shared the same context while encoding also share the same context while decoding.
The contexts are:
- is match
- has offset
- literal bit N (0-7) with already decoded highest bits of literal == M (255 total)
- offset bit N (one less than max offset bits)
- has offset bit N (max offset bits)
- length bit N (one less then max length bits)
- has length bit N (max length bits)
Literal bytes are encoded from highest to lowest bit, with the bit position and
the already decoded bits as context.
Offst and Length are encoded in an interlaced variant of elias gamma coding. They
are encoded from lowest to highest bits. For each bit, first one bit is read in the
"has offset/length bit N)". If this is set, offset/length bit N is read in it's context
and the decoding continues with the next bit. If the "has bit N" is read as false, a
fixed 1 bit is added as the top bit at this position.
The highlevel decode loop then looks like this:
loop:
if read_bit(IS_MATCH):
if prev_was_match || read_bit(HAS_OFFSET):
offset = read_length_or_offset(OFFSET) - 1
if offset == 0:
break
length = read_length_or_offset(LENGTH)
copy_bytes_from_offset(length, offset)
else:
read_and_push(literal)
*/
typedef unsigned char u8; typedef unsigned char u8;
typedef unsigned short u16; typedef unsigned short u16;
typedef unsigned long u32; typedef unsigned long u32;
@@ -67,7 +14,6 @@ u32 upkr_state;
int upkr_decode_bit(int context_index) { int upkr_decode_bit(int context_index) {
#ifdef UPKR_BITSTREAM #ifdef UPKR_BITSTREAM
// shift in single bits until rANS state is >= 32768
while(upkr_state < 32768) { while(upkr_state < 32768) {
if(upkr_bits_left == 0) { if(upkr_bits_left == 0) {
upkr_current_byte = *upkr_data_ptr++; upkr_current_byte = *upkr_data_ptr++;
@@ -78,7 +24,6 @@ int upkr_decode_bit(int context_index) {
--upkr_bits_left; --upkr_bits_left;
} }
#else #else
// shift in a full byte until rANS state is >= 4096
while(upkr_state < 4096) { while(upkr_state < 4096) {
upkr_state = (upkr_state << 8) | *upkr_data_ptr++; upkr_state = (upkr_state << 8) | *upkr_data_ptr++;
} }
@@ -87,8 +32,6 @@ int upkr_decode_bit(int context_index) {
int prob = upkr_probs[context_index]; int prob = upkr_probs[context_index];
int bit = (upkr_state & 255) < prob ? 1 : 0; int bit = (upkr_state & 255) < prob ? 1 : 0;
// rANS state and context probability update
// for the later, add 1/16th (rounded) of difference from either 0 or 256
if(bit) { if(bit) {
upkr_state = prob * (upkr_state >> 8) + (upkr_state & 255); upkr_state = prob * (upkr_state >> 8) + (upkr_state & 255);
prob += (256 - prob + 8) >> 4; prob += (256 - prob + 8) >> 4;
@@ -117,7 +60,6 @@ void* upkr_unpack(void* destination, void* compressed_data) {
#ifdef UPKR_BITSTREAM #ifdef UPKR_BITSTREAM
upkr_bits_left = 0; upkr_bits_left = 0;
#endif #endif
// all contexts are initialized to 128 = equal probability of 0 and 1
for(int i = 0; i < sizeof(upkr_probs); ++i) for(int i = 0; i < sizeof(upkr_probs); ++i)
upkr_probs[i] = 128; upkr_probs[i] = 128;
@@ -126,13 +68,10 @@ void* upkr_unpack(void* destination, void* compressed_data) {
int prev_was_match = 0; int prev_was_match = 0;
int offset = 0; int offset = 0;
for(;;) { for(;;) {
// is match
if(upkr_decode_bit(0)) { if(upkr_decode_bit(0)) {
// has offset
if(prev_was_match || upkr_decode_bit(256)) { if(prev_was_match || upkr_decode_bit(256)) {
offset = upkr_decode_length(257) - 1; offset = upkr_decode_length(257) - 1;
if(offset == 0) { if(offset == 0) {
// a 0 offset signals the end of the compressed data
break; break;
} }
} }
@@ -143,9 +82,6 @@ void* upkr_unpack(void* destination, void* compressed_data) {
} }
prev_was_match = 1; prev_was_match = 1;
} else { } else {
// byte contains the previously read bits and indicates the number of
// read bits by the set top bit. Therefore it can be directly used as the
// context index. The set top bit ends up at bit position 8 and is not stored.
int byte = 1; int byte = 1;
while(byte < 256) { while(byte < 256) {
int bit = upkr_decode_bit(byte); int bit = upkr_decode_bit(byte);

View File

@@ -1,6 +1,6 @@
use crate::{ use crate::{
Config,
rans::{ONE_PROB, PROB_BITS}, rans::{ONE_PROB, PROB_BITS},
Config,
}; };
const INIT_PROB: u16 = 1 << (PROB_BITS - 1); const INIT_PROB: u16 = 1 << (PROB_BITS - 1);
@@ -49,10 +49,12 @@ impl<'a> Context<'a> {
}; };
(offset + old as i32 - ((old as i32 + UPDATE_ADD as i32) >> UPDATE_RATE)) as u8 (offset + old as i32 - ((old as i32 + UPDATE_ADD as i32) >> UPDATE_RATE)) as u8
} else if bit ^ self.state.invert_bit_encoding {
old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
} else { } else {
old - ((old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8 if bit ^ self.state.invert_bit_encoding {
old + ((ONE_PROB - old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
} else {
old - ((old as u32 + UPDATE_ADD) >> UPDATE_RATE) as u8
}
}; };
} }
} }

View File

@@ -12,7 +12,7 @@ pub fn pack(
let mut rans_coder = RansCoder::new(config); let mut rans_coder = RansCoder::new(config);
let mut state = lz::CoderState::new(config); let mut state = lz::CoderState::new(config);
let mut pos = 0; let mut pos = config.dictionary_size;
while pos < data.len() { while pos < data.len() {
if let Some(ref mut cb) = progress_callback { if let Some(ref mut cb) = progress_callback {
cb(pos); cb(pos);

View File

@@ -77,11 +77,6 @@ impl Heatmap {
self.cost.len() self.cost.len()
} }
/// Returns whether the heatmap data is empty
pub fn is_empty(&self) -> bool {
self.cost.is_empty()
}
/// Returns whether the byte at `index` was encoded as a literal /// Returns whether the byte at `index` was encoded as a literal
pub fn is_literal(&self, index: usize) -> bool { pub fn is_literal(&self, index: usize) -> bool {
self.literal_index[index] == index self.literal_index[index] == index
@@ -119,10 +114,10 @@ impl Heatmap {
#[cfg(feature = "crossterm")] #[cfg(feature = "crossterm")]
fn print_as_hex_internal(&self, report_raw_cost: bool) -> std::io::Result<()> { fn print_as_hex_internal(&self, report_raw_cost: bool) -> std::io::Result<()> {
use crossterm::{ use crossterm::{
QueueableCommand,
style::{Attribute, Color, Print, SetAttribute, SetBackgroundColor}, style::{Attribute, Color, Print, SetAttribute, SetBackgroundColor},
QueueableCommand,
}; };
use std::io::{Write, stdout}; use std::io::{stdout, Write};
fn set_color( fn set_color(
mut out: impl QueueableCommand, mut out: impl QueueableCommand,

View File

@@ -71,6 +71,9 @@ pub struct Config {
pub max_offset: usize, pub max_offset: usize,
/// The maximum match length value to encode when compressing. /// The maximum match length value to encode when compressing.
pub max_length: usize, pub max_length: usize,
/// Size of dictionary at the beginning of data (how many bytes to skip when compressing.)
pub dictionary_size: usize,
} }
impl Default for Config { impl Default for Config {
@@ -92,6 +95,8 @@ impl Default for Config {
max_offset: usize::MAX, max_offset: usize::MAX,
max_length: usize::MAX, max_length: usize::MAX,
dictionary_size: 0,
} }
} }
} }

View File

@@ -1,7 +1,7 @@
use crate::Config;
use crate::context_state::ContextState; use crate::context_state::ContextState;
use crate::heatmap::Heatmap; use crate::heatmap::Heatmap;
use crate::rans::{EntropyCoder, RansDecoder}; use crate::rans::{EntropyCoder, RansDecoder};
use crate::Config;
use thiserror::Error; use thiserror::Error;
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
@@ -13,8 +13,8 @@ pub enum Op {
impl Op { impl Op {
pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) { pub fn encode(&self, coder: &mut dyn EntropyCoder, state: &mut CoderState, config: &Config) {
let literal_base = state.pos % state.parity_contexts * 256; let literal_base = state.pos % state.parity_contexts * 256;
match *self { match self {
Op::Literal(lit) => { &Op::Literal(lit) => {
encode_bit(coder, state, literal_base, !config.is_match_bit); encode_bit(coder, state, literal_base, !config.is_match_bit);
let mut context_index = 1; let mut context_index = 1;
for i in (0..8).rev() { for i in (0..8).rev() {
@@ -25,7 +25,7 @@ impl Op {
state.prev_was_match = false; state.prev_was_match = false;
state.pos += 1; state.pos += 1;
} }
Op::Match { offset, len } => { &Op::Match { offset, len } => {
encode_bit(coder, state, literal_base, config.is_match_bit); encode_bit(coder, state, literal_base, config.is_match_bit);
let mut new_offset = true; let mut new_offset = true;
if !state.prev_was_match && !config.no_repeated_offsets { if !state.prev_was_match && !config.no_repeated_offsets {
@@ -217,8 +217,8 @@ fn unpack_internal(
config: &Config, config: &Config,
max_size: usize, max_size: usize,
) -> Result<isize, UnpackError> { ) -> Result<isize, UnpackError> {
let mut decoder = RansDecoder::new(packed_data, config)?; let mut decoder = RansDecoder::new(packed_data, &config)?;
let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, config); let mut contexts = ContextState::new((1 + 255) * config.parity_contexts + 1 + 64 + 64, &config);
let mut offset = usize::MAX; let mut offset = usize::MAX;
let mut position = 0usize; let mut position = 0usize;
let mut prev_was_match = false; let mut prev_was_match = false;
@@ -264,7 +264,7 @@ fn unpack_internal(
&mut decoder, &mut decoder,
&mut contexts, &mut contexts,
256 * config.parity_contexts + 1, 256 * config.parity_contexts + 1,
config, &config,
)? - if config.eof_in_length { 0 } else { 1 }; )? - if config.eof_in_length { 0 } else { 1 };
if offset == 0 { if offset == 0 {
break; break;
@@ -274,7 +274,7 @@ fn unpack_internal(
&mut decoder, &mut decoder,
&mut contexts, &mut contexts,
256 * config.parity_contexts + 65, 256 * config.parity_contexts + 65,
config, &config,
)?; )?;
if config.eof_in_length && length == 1 { if config.eof_in_length && length == 1 {
break; break;

View File

@@ -16,6 +16,7 @@ fn main() -> Result<()> {
let mut level = 2; let mut level = 2;
let mut infile: Option<PathBuf> = None; let mut infile: Option<PathBuf> = None;
let mut outfile: Option<PathBuf> = None; let mut outfile: Option<PathBuf> = None;
let mut dictionary: Option<PathBuf> = None;
let mut max_unpacked_size = 512 * 1024 * 1024; let mut max_unpacked_size = 512 * 1024 * 1024;
let mut parser = lexopt::Parser::from_env(); let mut parser = lexopt::Parser::from_env();
@@ -74,8 +75,9 @@ fn main() -> Result<()> {
process::exit(0); process::exit(0);
} }
Long("max-unpacked-size") => max_unpacked_size = parser.value()?.parse()?, Long("max-unpacked-size") => max_unpacked_size = parser.value()?.parse()?,
Value(val) if infile.is_none() => infile = Some(val.into()), Long("dictionary") => dictionary = Some(parser.value()?.try_into()?),
Value(val) if outfile.is_none() => outfile = Some(val.into()), Value(val) if infile.is_none() => infile = Some(val.try_into()?),
Value(val) if outfile.is_none() => outfile = Some(val.try_into()?),
_ => return Err(arg.unexpected().into()), _ => return Err(arg.unexpected().into()),
} }
} }
@@ -94,6 +96,15 @@ fn main() -> Result<()> {
data.reverse(); data.reverse();
} }
if let Some(dictionary) = dictionary {
let mut dict = vec![];
File::open(dictionary)?.read_to_end(&mut dict)?;
config.dictionary_size = dict.len();
// prepend dict
dict.append(&mut data);
data = dict;
}
#[cfg(feature = "terminal")] #[cfg(feature = "terminal")]
let mut packed_data = { let mut packed_data = {
let mut pb = pbr::ProgressBar::on(std::io::stderr(), data.len() as u64); let mut pb = pbr::ProgressBar::on(std::io::stderr(), data.len() as u64);
@@ -158,7 +169,7 @@ fn main() -> Result<()> {
} else { } else {
heatmap.cost(i) heatmap.cost(i)
}; };
let cost = (cost.log2() * 8. + 64.).round().clamp(0., 127.) as u8; let cost = (cost.log2() * 8. + 64.).round().max(0.).min(127.) as u8;
heatmap_bin.push((cost << 1) | heatmap.is_literal(i) as u8); heatmap_bin.push((cost << 1) | heatmap.is_literal(i) as u8);
} }
outfile(OutFileType::Heatmap).write(&heatmap_bin)?; outfile(OutFileType::Heatmap).write(&heatmap_bin)?;
@@ -272,9 +283,7 @@ fn print_help(exit_code: i32) -> ! {
eprintln!("Version: {}", env!("CARGO_PKG_VERSION")); eprintln!("Version: {}", env!("CARGO_PKG_VERSION"));
eprintln!(); eprintln!();
eprintln!("Config presets for specific unpackers:"); eprintln!("Config presets for specific unpackers:");
eprintln!( eprintln!(" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update -9");
" --z80 --big-endian-bitstream --invert-bit-encoding --simplified-prob-update -9"
);
eprintln!( eprintln!(
" --x86 --bitstream --invert-is-match-bit --invert-continue-value-bit --invert-new-offset-bit" " --x86 --bitstream --invert-is-match-bit --invert-continue-value-bit --invert-new-offset-bit"
); );

View File

@@ -4,7 +4,7 @@ use std::rc::Rc;
use crate::match_finder::MatchFinder; use crate::match_finder::MatchFinder;
use crate::rans::{CostCounter, RansCoder}; use crate::rans::{CostCounter, RansCoder};
use crate::{ProgressCallback, lz}; use crate::{lz, ProgressCallback};
pub fn pack( pub fn pack(
data: &[u8], data: &[u8],
@@ -68,7 +68,7 @@ fn parse(
}); });
let mut seen_offsets = HashSet::new(); let mut seen_offsets = HashSet::new();
let mut remaining = Vec::new(); let mut remaining = Vec::new();
for arr in mem::take(vec) { for arr in mem::replace(vec, Vec::new()) {
if seen_offsets.insert(arr.state.last_offset()) { if seen_offsets.insert(arr.state.last_offset()) {
if vec.len() < max_arrivals { if vec.len() < max_arrivals {
vec.push(arr); vec.push(arr);
@@ -137,7 +137,7 @@ fn parse(
} }
add_arrival( add_arrival(
&mut arrivals, &mut arrivals,
0, encoding_config.dictionary_size,
Arrival { Arrival {
parse: None, parse: None,
state: lz::CoderState::new(encoding_config), state: lz::CoderState::new(encoding_config),
@@ -148,7 +148,7 @@ fn parse(
let cost_counter = &mut CostCounter::new(encoding_config); let cost_counter = &mut CostCounter::new(encoding_config);
let mut best_per_offset = HashMap::new(); let mut best_per_offset = HashMap::new();
for pos in 0..data.len() { for pos in encoding_config.dictionary_size..data.len() {
let match_length = |offset: usize| { let match_length = |offset: usize| {
data[pos..] data[pos..]
.iter() .iter()