initial huffman; DRY main

This commit is contained in:
mehbark 2025-10-04 02:05:43 -04:00
parent fd0662b66d
commit 589e7b711f
Signed by: mbk
GPG key ID: E333EC1335FFCCDB
4 changed files with 192 additions and 11 deletions

View file

@ -9,7 +9,7 @@ pub trait CompressionScheme {
/// Encode some bytes into `buf`, returning a [`Header`][Self::Header].
///
/// This does not necessarily have to be deterministic,
/// This does not have to be deterministic,
/// but it **must** be decodable by [`decode`](Self::decode).
/// That is, [`decode`](Self::decode) ∘ [`encode`](Self::encode) = `id`.
fn encode(src: &[u8], buf: &mut BitVec) -> Self::Header;

158
src/huffman.rs Normal file
View file

@ -0,0 +1,158 @@
use std::{
collections::{BinaryHeap, HashMap},
fmt,
};
use bitvec::prelude::{BitSlice, BitVec};
use crate::CompressionScheme;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Huffman;
impl CompressionScheme for Huffman {
type Header = Option<Node>;
fn encode(src: &[u8], buf: &mut BitVec) -> Self::Header {
let mut counts = HashMap::new();
for byte in src {
*counts.entry(*byte).or_default() += 1;
}
let tree = Node::build(&counts)?;
Some(tree)
}
fn decode(src: &BitSlice, header: &Self::Header, buf: &mut Vec<u8>) {
if let Some(node) = header {
todo!()
}
}
fn header_size(header: &Self::Header) -> usize {
header.as_ref().map_or(0, Node::byte_size)
}
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Node {
Leaf { byte: u8 },
Branch { left: Box<Self>, right: Box<Self> },
}
impl Node {
fn build(counts: &HashMap<u8, u32>) -> Option<Self> {
WeightedNode::build(counts).map(WeightedNode::unburden)
}
/// Write to `buf` the sequence of bits that this tree has assigned `byte`.
///
/// # Panics
/// Panics if called with a byte that was not passed to [`build`].
fn encode_byte(&self, byte: u8, buf: &mut BitVec) {
let mut current = self;
todo!()
}
/// Return the next encoded byte in the stream of bits.
fn decode_byte(src: &BitSlice) -> Option<u8> {
todo!()
}
fn byte_size(&self) -> usize {
match self {
Node::Leaf { .. } => 1,
Node::Branch { left, right, .. } => 1 + left.byte_size() + right.byte_size(),
}
}
fn write_depth(&self, f: &mut fmt::Formatter<'_>, depth: usize) -> fmt::Result {
for _ in 0..depth {
write!(f, " ")?;
}
match self {
Node::Leaf { byte } => writeln!(f, "| {:?}", *byte as char),
Node::Branch { left, right } => {
writeln!(f, "{depth}+")?;
left.write_depth(f, depth + 1)?;
right.write_depth(f, depth + 1)
}
}
}
}
impl fmt::Debug for Node {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.write_depth(f, 0)
}
}
#[derive(Clone, PartialEq, Eq)]
enum WeightedNode {
Leaf {
byte: u8,
count: u32,
},
Branch {
count: u32,
left: Box<Self>,
right: Box<Self>,
},
}
impl WeightedNode {
fn build(counts: &HashMap<u8, u32>) -> Option<Self> {
let mut queue = BinaryHeap::new();
for (&byte, &count) in counts {
queue.push(WeightedNode::Leaf { byte, count });
}
loop {
let first = queue.pop()?;
let Some(second) = queue.pop() else {
return Some(first);
};
queue.push(WeightedNode::join(first, second));
}
}
fn count(&self) -> u32 {
match self {
Self::Branch { count, .. } | Self::Leaf { count, .. } => *count,
}
}
fn join(left: Self, right: Self) -> Self {
Self::Branch {
count: left.count() + right.count(),
left: Box::new(left),
right: Box::new(right),
}
}
fn unburden(self) -> Node {
match self {
WeightedNode::Leaf { byte, .. } => Node::Leaf { byte },
WeightedNode::Branch { left, right, .. } => Node::Branch {
left: Box::new(left.unburden()),
right: Box::new(right.unburden()),
},
}
}
}
impl PartialOrd for WeightedNode {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for WeightedNode {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.count().cmp(&other.count()).reverse()
}
}

View file

@ -1,7 +1,12 @@
use std::io::{self, Read};
use std::{
env,
fmt::Debug,
io::{self, Read},
};
mod compression_scheme;
mod freq;
mod huffman;
mod rle;
#[cfg(test)]
mod test;
@ -9,9 +14,12 @@ mod test;
use bitvec::vec::BitVec;
pub use compression_scheme::CompressionScheme;
pub use freq::Freq;
pub use huffman::Huffman;
pub use rle::Rle;
fn main() -> Result<(), io::Error> {
let debug = env::args().any(|arg| arg == "--debug" || arg == "-d");
let mut buf = Vec::new();
let len_src = io::stdin().read_to_end(&mut buf)?;
@ -19,14 +27,23 @@ fn main() -> Result<(), io::Error> {
let mut bitbuf = BitVec::new();
let () = Rle::encode(&buf, &mut bitbuf);
let len_rle = bitbuf.len().div_ceil(8);
println!(" Rle'd: {len_rle}");
bitbuf.clear();
let header = Freq::encode(&buf, &mut bitbuf);
let len_freq = Freq::header_size(&header) + bitbuf.len().div_ceil(8);
println!(" Freq'd: {len_freq}");
run::<Rle, _>(&buf, &mut bitbuf, "rle", debug);
run::<Freq, _>(&buf, &mut bitbuf, "freq", debug);
run::<Huffman, _>(&buf, &mut bitbuf, "Huffman", debug);
Ok(())
}
fn run<Scheme, Header>(buf: &[u8], bitbuf: &mut BitVec, name: &str, debug: bool)
where
Scheme: CompressionScheme<Header = Header>,
Header: Debug,
{
bitbuf.clear();
let header = Scheme::encode(buf, bitbuf);
let len_freq = Scheme::header_size(&header) + bitbuf.len().div_ceil(8);
println!("{name}'d: {len_freq}");
if debug {
eprintln!("{name} header: {header:#?}\n");
}
}

View file

@ -1,6 +1,6 @@
use quickcheck_macros::quickcheck;
use crate::{CompressionScheme, Freq, Rle};
use crate::{CompressionScheme, Freq, Huffman, Rle};
#[allow(clippy::needless_pass_by_value)]
#[quickcheck]
@ -13,3 +13,9 @@ fn roundtrip_freq(src: Vec<u8>) -> bool {
fn roundtrip_rle(src: Vec<u8>) -> bool {
Rle::idempotent_on(&src)
}
#[allow(clippy::needless_pass_by_value)]
#[quickcheck]
fn roundtrip_huffman(src: Vec<u8>) -> bool {
Huffman::idempotent_on(&src)
}