add test abstractions and Freq test
This commit is contained in:
parent
256fad24fc
commit
d7d2125299
6 changed files with 295 additions and 35 deletions
208
Cargo.lock
generated
208
Cargo.lock
generated
|
|
@ -2,6 +2,214 @@
|
||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
version = 4
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "1.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bitvec"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
|
||||||
|
dependencies = [
|
||||||
|
"funty",
|
||||||
|
"radium",
|
||||||
|
"tap",
|
||||||
|
"wyz",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg-if"
|
||||||
|
version = "1.0.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "env_logger"
|
||||||
|
version = "0.8.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"regex",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "funty"
|
||||||
|
version = "2.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "getrandom"
|
||||||
|
version = "0.2.16"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"libc",
|
||||||
|
"wasi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.176"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "log"
|
||||||
|
version = "0.4.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.7.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.101"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quickcheck"
|
||||||
|
version = "1.0.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
|
||||||
|
dependencies = [
|
||||||
|
"env_logger",
|
||||||
|
"log",
|
||||||
|
"rand",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quickcheck_macros"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f71ee38b42f8459a88d3362be6f9b841ad2d5421844f61eb1c59c11bff3ac14a"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.41"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "radium"
|
||||||
|
version = "0.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rand"
|
||||||
|
version = "0.8.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||||
|
dependencies = [
|
||||||
|
"rand_core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rand_core"
|
||||||
|
version = "0.6.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||||
|
dependencies = [
|
||||||
|
"getrandom",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-automata",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-automata"
|
||||||
|
version = "0.4.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.8.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "2.0.106"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tap"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "text-compression"
|
name = "text-compression"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"bitvec",
|
||||||
|
"quickcheck",
|
||||||
|
"quickcheck_macros",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-ident"
|
||||||
|
version = "1.0.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasi"
|
||||||
|
version = "0.11.1+wasi-snapshot-preview1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wyz"
|
||||||
|
version = "0.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
|
||||||
|
dependencies = [
|
||||||
|
"tap",
|
||||||
|
]
|
||||||
|
|
|
||||||
|
|
@ -4,3 +4,8 @@ version = "0.1.0"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
bitvec = "1.0.1"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
quickcheck = "1"
|
||||||
|
quickcheck_macros = "1"
|
||||||
20
src/compression_scheme.rs
Normal file
20
src/compression_scheme.rs
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
use bitvec::{slice::BitSlice, vec::BitVec};
|
||||||
|
|
||||||
|
pub trait CompressionScheme {
|
||||||
|
type Header;
|
||||||
|
|
||||||
|
fn encode(src: &[u8], buf: &mut BitVec) -> Self::Header;
|
||||||
|
fn decode(src: &BitSlice, header: &Self::Header, buf: &mut Vec<u8>);
|
||||||
|
|
||||||
|
fn header_size(header: &Self::Header) -> usize;
|
||||||
|
|
||||||
|
fn idempotent_on(src: &[u8]) -> bool {
|
||||||
|
let mut buf_bits = BitVec::new();
|
||||||
|
let header = Self::encode(src, &mut buf_bits);
|
||||||
|
|
||||||
|
let mut buf_bytes = Vec::new();
|
||||||
|
Self::decode(&buf_bits, &header, &mut buf_bytes);
|
||||||
|
|
||||||
|
src == buf_bytes
|
||||||
|
}
|
||||||
|
}
|
||||||
57
src/freq.rs
57
src/freq.rs
|
|
@ -8,6 +8,39 @@
|
||||||
|
|
||||||
use std::{array, fmt};
|
use std::{array, fmt};
|
||||||
|
|
||||||
|
use crate::CompressionScheme;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct Freq;
|
||||||
|
|
||||||
|
impl CompressionScheme for Freq {
|
||||||
|
type Header = [u8; 256];
|
||||||
|
|
||||||
|
fn encode(src: &[u8], buf: &mut bitvec::prelude::BitVec) -> Self::Header {
|
||||||
|
let encoder = Encoder::new(src);
|
||||||
|
|
||||||
|
for Bits { one_count } in encoder.clone() {
|
||||||
|
for _ in 0..one_count {
|
||||||
|
buf.push(true);
|
||||||
|
}
|
||||||
|
buf.push(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
encoder.byte_ranking
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode(mut src: &bitvec::prelude::BitSlice, header: &Self::Header, buf: &mut Vec<u8>) {
|
||||||
|
while let Some(one_count) = src.first_zero() {
|
||||||
|
buf.push(*header.get(one_count).unwrap());
|
||||||
|
src = &src[(one_count + 1)..];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn header_size(_header: &Self::Header) -> usize {
|
||||||
|
256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
pub struct Bits {
|
pub struct Bits {
|
||||||
pub one_count: u8,
|
pub one_count: u8,
|
||||||
|
|
@ -23,8 +56,9 @@ impl fmt::Display for Bits {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Encoder<'src> {
|
struct Encoder<'src> {
|
||||||
src: &'src [u8],
|
src: &'src [u8],
|
||||||
|
byte_ranking: [u8; 256],
|
||||||
byte_mapping: [u8; 256],
|
byte_mapping: [u8; 256],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -40,27 +74,20 @@ impl<'src> Encoder<'src> {
|
||||||
clippy::cast_possible_truncation,
|
clippy::cast_possible_truncation,
|
||||||
reason = "`i` is in the range `0..256`"
|
reason = "`i` is in the range `0..256`"
|
||||||
)]
|
)]
|
||||||
let mut ranking: [u8; 256] = array::from_fn(|i| i as u8);
|
let mut byte_ranking: [u8; 256] = array::from_fn(|i| i as u8);
|
||||||
ranking.sort_by_key(|&byte| -i64::from(byte_counts[byte as usize]));
|
byte_ranking.sort_by_key(|&byte| -i64::from(byte_counts[byte as usize]));
|
||||||
|
|
||||||
#[allow(
|
#[allow(
|
||||||
clippy::cast_possible_truncation,
|
clippy::cast_possible_truncation,
|
||||||
reason = "`i` is in the range `0..256`"
|
reason = "`i` is in the range `0..256`"
|
||||||
)]
|
)]
|
||||||
let byte_mapping =
|
let byte_mapping =
|
||||||
array::from_fn(|i| ranking.iter().position(|b| *b == (i as u8)).unwrap() as u8);
|
array::from_fn(|i| byte_ranking.iter().position(|b| *b == (i as u8)).unwrap() as u8);
|
||||||
|
|
||||||
Self { src, byte_mapping }
|
Self {
|
||||||
}
|
src,
|
||||||
|
byte_ranking,
|
||||||
pub fn print_mapping(&self) {
|
byte_mapping,
|
||||||
for (one_count, byte) in self.byte_mapping.iter().zip(0u8..) {
|
|
||||||
println!(
|
|
||||||
"0x{byte:02x}: {:3} bit{s} ({:?})",
|
|
||||||
one_count + 1,
|
|
||||||
(byte as char),
|
|
||||||
s = if *one_count == 0 { "s" } else { "" },
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
31
src/main.rs
31
src/main.rs
|
|
@ -1,34 +1,25 @@
|
||||||
use std::{
|
use std::io::{self, Read};
|
||||||
env,
|
|
||||||
io::{self, Read},
|
|
||||||
};
|
|
||||||
|
|
||||||
|
mod compression_scheme;
|
||||||
mod freq;
|
mod freq;
|
||||||
mod rle;
|
mod rle;
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test;
|
||||||
|
|
||||||
|
pub use compression_scheme::CompressionScheme;
|
||||||
|
pub use freq::Freq;
|
||||||
|
|
||||||
fn main() -> Result<(), io::Error> {
|
fn main() -> Result<(), io::Error> {
|
||||||
let debug = env::args().any(|arg| arg == "--debug" || arg == "-d");
|
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut buf = Vec::new();
|
||||||
let len_src = io::stdin().read_to_end(&mut buf)?;
|
let len_src = io::stdin().read_to_end(&mut buf)?;
|
||||||
|
|
||||||
println!("Original: {len_src}");
|
println!("Original: {len_src}");
|
||||||
|
|
||||||
let len_rle = rle::Encoder::new(&buf).count() * 2;
|
// let len_rle = rle::Encoder::new(&buf).count() * 2;
|
||||||
println!(" Rle'd: {len_rle}");
|
// println!(" Rle'd: {len_rle}");
|
||||||
|
|
||||||
let encoder_freq = freq::Encoder::new(&buf);
|
// let len_freq = len_freq_table + len_freq_bits;
|
||||||
if debug {
|
// println!(" Freq'd: {len_freq}");
|
||||||
encoder_freq.print_mapping();
|
|
||||||
}
|
|
||||||
|
|
||||||
let len_freq_table = 256;
|
|
||||||
let len_freq_bits = encoder_freq
|
|
||||||
.map(|freq::Bits { one_count }| one_count as usize + 1)
|
|
||||||
.sum::<usize>()
|
|
||||||
.div_ceil(8);
|
|
||||||
let len_freq = len_freq_table + len_freq_bits;
|
|
||||||
println!(" Freq'd: {len_freq}");
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
9
src/test.rs
Normal file
9
src/test.rs
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
use quickcheck_macros::quickcheck;
|
||||||
|
|
||||||
|
use crate::{CompressionScheme, Freq, Rle};
|
||||||
|
|
||||||
|
#[allow(clippy::needless_pass_by_value)]
|
||||||
|
#[quickcheck]
|
||||||
|
fn freq_roundtrip(src: Vec<u8>) -> bool {
|
||||||
|
Freq::idempotent_on(&src)
|
||||||
|
}
|
||||||
Loading…
Reference in a new issue