From d7d21252993921db0fb844caebe159bbc2e6e579 Mon Sep 17 00:00:00 2001 From: mehbark Date: Fri, 3 Oct 2025 00:04:04 -0400 Subject: [PATCH] add test abstractions and `Freq` test --- Cargo.lock | 208 ++++++++++++++++++++++++++++++++++++++ Cargo.toml | 5 + src/compression_scheme.rs | 20 ++++ src/freq.rs | 57 ++++++++--- src/main.rs | 31 ++---- src/test.rs | 9 ++ 6 files changed, 295 insertions(+), 35 deletions(-) create mode 100644 src/compression_scheme.rs create mode 100644 src/test.rs diff --git a/Cargo.lock b/Cargo.lock index 3e9f65d..8943f21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,214 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "cfg-if" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" + +[[package]] +name = "env_logger" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "libc" +version = "0.2.176" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" + +[[package]] +name = "log" +version = "0.4.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "proc-macro2" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quickcheck" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" +dependencies = [ + "env_logger", + "log", + "rand", +] + +[[package]] +name = "quickcheck_macros" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f71ee38b42f8459a88d3362be6f9b841ad2d5421844f61eb1c59c11bff3ac14a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "regex" +version = "1.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" + +[[package]] +name = "syn" +version = "2.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "text-compression" version = "0.1.0" +dependencies = [ + "bitvec", + "quickcheck", + "quickcheck_macros", +] + +[[package]] +name = "unicode-ident" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] diff --git a/Cargo.toml b/Cargo.toml index bfe2552..821f53b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,3 +4,8 @@ version = "0.1.0" edition = "2024" [dependencies] +bitvec = "1.0.1" + +[dev-dependencies] +quickcheck = "1" +quickcheck_macros = "1" \ No newline at end of file diff --git a/src/compression_scheme.rs b/src/compression_scheme.rs new file mode 100644 index 0000000..3d89d86 --- /dev/null +++ b/src/compression_scheme.rs @@ -0,0 +1,20 @@ +use bitvec::{slice::BitSlice, vec::BitVec}; + +pub trait CompressionScheme { + type Header; + + fn encode(src: &[u8], buf: &mut BitVec) -> Self::Header; + fn decode(src: &BitSlice, header: &Self::Header, buf: &mut Vec); + + fn header_size(header: &Self::Header) -> usize; + + fn idempotent_on(src: &[u8]) -> bool { + let mut buf_bits = BitVec::new(); + let header = Self::encode(src, &mut buf_bits); + + let mut buf_bytes = Vec::new(); + Self::decode(&buf_bits, &header, &mut buf_bytes); + + src == buf_bytes + } +} diff --git a/src/freq.rs b/src/freq.rs index 269dae4..d38dca6 100644 --- a/src/freq.rs +++ b/src/freq.rs @@ -8,6 +8,39 @@ use std::{array, fmt}; +use crate::CompressionScheme; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Freq; + +impl CompressionScheme for Freq { + type Header = [u8; 256]; + + fn encode(src: &[u8], buf: &mut bitvec::prelude::BitVec) -> Self::Header { + let encoder = Encoder::new(src); + + for Bits { one_count } in encoder.clone() { + for _ in 0..one_count { + buf.push(true); + } + buf.push(false); + } + + encoder.byte_ranking + } + + fn decode(mut src: &bitvec::prelude::BitSlice, header: &Self::Header, buf: &mut Vec) { + while let Some(one_count) = src.first_zero() { + buf.push(*header.get(one_count).unwrap()); + src = &src[(one_count + 1)..]; + } + } + + fn header_size(_header: &Self::Header) -> usize { + 256 + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct Bits { pub one_count: u8, @@ -23,8 +56,9 @@ impl fmt::Display for Bits { } #[derive(Debug, Clone)] -pub struct Encoder<'src> { +struct Encoder<'src> { src: &'src [u8], + byte_ranking: [u8; 256], byte_mapping: [u8; 256], } @@ -40,27 +74,20 @@ impl<'src> Encoder<'src> { clippy::cast_possible_truncation, reason = "`i` is in the range `0..256`" )] - let mut ranking: [u8; 256] = array::from_fn(|i| i as u8); - ranking.sort_by_key(|&byte| -i64::from(byte_counts[byte as usize])); + let mut byte_ranking: [u8; 256] = array::from_fn(|i| i as u8); + byte_ranking.sort_by_key(|&byte| -i64::from(byte_counts[byte as usize])); #[allow( clippy::cast_possible_truncation, reason = "`i` is in the range `0..256`" )] let byte_mapping = - array::from_fn(|i| ranking.iter().position(|b| *b == (i as u8)).unwrap() as u8); + array::from_fn(|i| byte_ranking.iter().position(|b| *b == (i as u8)).unwrap() as u8); - Self { src, byte_mapping } - } - - pub fn print_mapping(&self) { - for (one_count, byte) in self.byte_mapping.iter().zip(0u8..) { - println!( - "0x{byte:02x}: {:3} bit{s} ({:?})", - one_count + 1, - (byte as char), - s = if *one_count == 0 { "s" } else { "" }, - ); + Self { + src, + byte_ranking, + byte_mapping, } } } diff --git a/src/main.rs b/src/main.rs index 509ccd5..d57a2e8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,34 +1,25 @@ -use std::{ - env, - io::{self, Read}, -}; +use std::io::{self, Read}; +mod compression_scheme; mod freq; mod rle; +#[cfg(test)] +mod test; + +pub use compression_scheme::CompressionScheme; +pub use freq::Freq; fn main() -> Result<(), io::Error> { - let debug = env::args().any(|arg| arg == "--debug" || arg == "-d"); - let mut buf = Vec::new(); let len_src = io::stdin().read_to_end(&mut buf)?; println!("Original: {len_src}"); - let len_rle = rle::Encoder::new(&buf).count() * 2; - println!(" Rle'd: {len_rle}"); + // let len_rle = rle::Encoder::new(&buf).count() * 2; + // println!(" Rle'd: {len_rle}"); - let encoder_freq = freq::Encoder::new(&buf); - if debug { - encoder_freq.print_mapping(); - } - - let len_freq_table = 256; - let len_freq_bits = encoder_freq - .map(|freq::Bits { one_count }| one_count as usize + 1) - .sum::() - .div_ceil(8); - let len_freq = len_freq_table + len_freq_bits; - println!(" Freq'd: {len_freq}"); + // let len_freq = len_freq_table + len_freq_bits; + // println!(" Freq'd: {len_freq}"); Ok(()) } diff --git a/src/test.rs b/src/test.rs new file mode 100644 index 0000000..ecf36d2 --- /dev/null +++ b/src/test.rs @@ -0,0 +1,9 @@ +use quickcheck_macros::quickcheck; + +use crate::{CompressionScheme, Freq, Rle}; + +#[allow(clippy::needless_pass_by_value)] +#[quickcheck] +fn freq_roundtrip(src: Vec) -> bool { + Freq::idempotent_on(&src) +}