add test abstractions and Freq test

This commit is contained in:
mehbark 2025-10-03 00:04:04 -04:00
parent 256fad24fc
commit d7d2125299
Signed by: mbk
GPG key ID: E333EC1335FFCCDB
6 changed files with 295 additions and 35 deletions

208
Cargo.lock generated
View file

@ -2,6 +2,214 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 4 version = 4
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "bitvec"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
dependencies = [
"funty",
"radium",
"tap",
"wyz",
]
[[package]]
name = "cfg-if"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
[[package]]
name = "env_logger"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
dependencies = [
"log",
"regex",
]
[[package]]
name = "funty"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
[[package]]
name = "getrandom"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "libc"
version = "0.2.176"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174"
[[package]]
name = "log"
version = "0.4.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
[[package]]
name = "memchr"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "proc-macro2"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quickcheck"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
dependencies = [
"env_logger",
"log",
"rand",
]
[[package]]
name = "quickcheck_macros"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f71ee38b42f8459a88d3362be6f9b841ad2d5421844f61eb1c59c11bff3ac14a"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "quote"
version = "1.0.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1"
dependencies = [
"proc-macro2",
]
[[package]]
name = "radium"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "regex"
version = "1.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
[[package]]
name = "syn"
version = "2.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "tap"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
[[package]] [[package]]
name = "text-compression" name = "text-compression"
version = "0.1.0" version = "0.1.0"
dependencies = [
"bitvec",
"quickcheck",
"quickcheck_macros",
]
[[package]]
name = "unicode-ident"
version = "1.0.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
[[package]]
name = "wasi"
version = "0.11.1+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
[[package]]
name = "wyz"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
dependencies = [
"tap",
]

View file

@ -4,3 +4,8 @@ version = "0.1.0"
edition = "2024" edition = "2024"
[dependencies] [dependencies]
bitvec = "1.0.1"
[dev-dependencies]
quickcheck = "1"
quickcheck_macros = "1"

20
src/compression_scheme.rs Normal file
View file

@ -0,0 +1,20 @@
use bitvec::{slice::BitSlice, vec::BitVec};
pub trait CompressionScheme {
type Header;
fn encode(src: &[u8], buf: &mut BitVec) -> Self::Header;
fn decode(src: &BitSlice, header: &Self::Header, buf: &mut Vec<u8>);
fn header_size(header: &Self::Header) -> usize;
fn idempotent_on(src: &[u8]) -> bool {
let mut buf_bits = BitVec::new();
let header = Self::encode(src, &mut buf_bits);
let mut buf_bytes = Vec::new();
Self::decode(&buf_bits, &header, &mut buf_bytes);
src == buf_bytes
}
}

View file

@ -8,6 +8,39 @@
use std::{array, fmt}; use std::{array, fmt};
use crate::CompressionScheme;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Freq;
impl CompressionScheme for Freq {
type Header = [u8; 256];
fn encode(src: &[u8], buf: &mut bitvec::prelude::BitVec) -> Self::Header {
let encoder = Encoder::new(src);
for Bits { one_count } in encoder.clone() {
for _ in 0..one_count {
buf.push(true);
}
buf.push(false);
}
encoder.byte_ranking
}
fn decode(mut src: &bitvec::prelude::BitSlice, header: &Self::Header, buf: &mut Vec<u8>) {
while let Some(one_count) = src.first_zero() {
buf.push(*header.get(one_count).unwrap());
src = &src[(one_count + 1)..];
}
}
fn header_size(_header: &Self::Header) -> usize {
256
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Bits { pub struct Bits {
pub one_count: u8, pub one_count: u8,
@ -23,8 +56,9 @@ impl fmt::Display for Bits {
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Encoder<'src> { struct Encoder<'src> {
src: &'src [u8], src: &'src [u8],
byte_ranking: [u8; 256],
byte_mapping: [u8; 256], byte_mapping: [u8; 256],
} }
@ -40,27 +74,20 @@ impl<'src> Encoder<'src> {
clippy::cast_possible_truncation, clippy::cast_possible_truncation,
reason = "`i` is in the range `0..256`" reason = "`i` is in the range `0..256`"
)] )]
let mut ranking: [u8; 256] = array::from_fn(|i| i as u8); let mut byte_ranking: [u8; 256] = array::from_fn(|i| i as u8);
ranking.sort_by_key(|&byte| -i64::from(byte_counts[byte as usize])); byte_ranking.sort_by_key(|&byte| -i64::from(byte_counts[byte as usize]));
#[allow( #[allow(
clippy::cast_possible_truncation, clippy::cast_possible_truncation,
reason = "`i` is in the range `0..256`" reason = "`i` is in the range `0..256`"
)] )]
let byte_mapping = let byte_mapping =
array::from_fn(|i| ranking.iter().position(|b| *b == (i as u8)).unwrap() as u8); array::from_fn(|i| byte_ranking.iter().position(|b| *b == (i as u8)).unwrap() as u8);
Self { src, byte_mapping } Self {
} src,
byte_ranking,
pub fn print_mapping(&self) { byte_mapping,
for (one_count, byte) in self.byte_mapping.iter().zip(0u8..) {
println!(
"0x{byte:02x}: {:3} bit{s} ({:?})",
one_count + 1,
(byte as char),
s = if *one_count == 0 { "s" } else { "" },
);
} }
} }
} }

View file

@ -1,34 +1,25 @@
use std::{ use std::io::{self, Read};
env,
io::{self, Read},
};
mod compression_scheme;
mod freq; mod freq;
mod rle; mod rle;
#[cfg(test)]
mod test;
pub use compression_scheme::CompressionScheme;
pub use freq::Freq;
fn main() -> Result<(), io::Error> { fn main() -> Result<(), io::Error> {
let debug = env::args().any(|arg| arg == "--debug" || arg == "-d");
let mut buf = Vec::new(); let mut buf = Vec::new();
let len_src = io::stdin().read_to_end(&mut buf)?; let len_src = io::stdin().read_to_end(&mut buf)?;
println!("Original: {len_src}"); println!("Original: {len_src}");
let len_rle = rle::Encoder::new(&buf).count() * 2; // let len_rle = rle::Encoder::new(&buf).count() * 2;
println!(" Rle'd: {len_rle}"); // println!(" Rle'd: {len_rle}");
let encoder_freq = freq::Encoder::new(&buf); // let len_freq = len_freq_table + len_freq_bits;
if debug { // println!(" Freq'd: {len_freq}");
encoder_freq.print_mapping();
}
let len_freq_table = 256;
let len_freq_bits = encoder_freq
.map(|freq::Bits { one_count }| one_count as usize + 1)
.sum::<usize>()
.div_ceil(8);
let len_freq = len_freq_table + len_freq_bits;
println!(" Freq'd: {len_freq}");
Ok(()) Ok(())
} }

9
src/test.rs Normal file
View file

@ -0,0 +1,9 @@
use quickcheck_macros::quickcheck;
use crate::{CompressionScheme, Freq, Rle};
#[allow(clippy::needless_pass_by_value)]
#[quickcheck]
fn freq_roundtrip(src: Vec<u8>) -> bool {
Freq::idempotent_on(&src)
}