initial huffman; DRY main
This commit is contained in:
parent
fd0662b66d
commit
589e7b711f
4 changed files with 192 additions and 11 deletions
|
|
@ -9,7 +9,7 @@ pub trait CompressionScheme {
|
|||
|
||||
/// Encode some bytes into `buf`, returning a [`Header`][Self::Header].
|
||||
///
|
||||
/// This does not necessarily have to be deterministic,
|
||||
/// This does not have to be deterministic,
|
||||
/// but it **must** be decodable by [`decode`](Self::decode).
|
||||
/// That is, [`decode`](Self::decode) ∘ [`encode`](Self::encode) = `id`.
|
||||
fn encode(src: &[u8], buf: &mut BitVec) -> Self::Header;
|
||||
|
|
|
|||
158
src/huffman.rs
Normal file
158
src/huffman.rs
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
use std::{
|
||||
collections::{BinaryHeap, HashMap},
|
||||
fmt,
|
||||
};
|
||||
|
||||
use bitvec::prelude::{BitSlice, BitVec};
|
||||
|
||||
use crate::CompressionScheme;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct Huffman;
|
||||
|
||||
impl CompressionScheme for Huffman {
|
||||
type Header = Option<Node>;
|
||||
|
||||
fn encode(src: &[u8], buf: &mut BitVec) -> Self::Header {
|
||||
let mut counts = HashMap::new();
|
||||
for byte in src {
|
||||
*counts.entry(*byte).or_default() += 1;
|
||||
}
|
||||
|
||||
let tree = Node::build(&counts)?;
|
||||
|
||||
Some(tree)
|
||||
}
|
||||
|
||||
fn decode(src: &BitSlice, header: &Self::Header, buf: &mut Vec<u8>) {
|
||||
if let Some(node) = header {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
fn header_size(header: &Self::Header) -> usize {
|
||||
header.as_ref().map_or(0, Node::byte_size)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum Node {
|
||||
Leaf { byte: u8 },
|
||||
Branch { left: Box<Self>, right: Box<Self> },
|
||||
}
|
||||
|
||||
impl Node {
|
||||
fn build(counts: &HashMap<u8, u32>) -> Option<Self> {
|
||||
WeightedNode::build(counts).map(WeightedNode::unburden)
|
||||
}
|
||||
|
||||
/// Write to `buf` the sequence of bits that this tree has assigned `byte`.
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if called with a byte that was not passed to [`build`].
|
||||
fn encode_byte(&self, byte: u8, buf: &mut BitVec) {
|
||||
let mut current = self;
|
||||
todo!()
|
||||
}
|
||||
|
||||
/// Return the next encoded byte in the stream of bits.
|
||||
fn decode_byte(src: &BitSlice) -> Option<u8> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn byte_size(&self) -> usize {
|
||||
match self {
|
||||
Node::Leaf { .. } => 1,
|
||||
Node::Branch { left, right, .. } => 1 + left.byte_size() + right.byte_size(),
|
||||
}
|
||||
}
|
||||
|
||||
fn write_depth(&self, f: &mut fmt::Formatter<'_>, depth: usize) -> fmt::Result {
|
||||
for _ in 0..depth {
|
||||
write!(f, " ")?;
|
||||
}
|
||||
match self {
|
||||
Node::Leaf { byte } => writeln!(f, "| {:?}", *byte as char),
|
||||
Node::Branch { left, right } => {
|
||||
writeln!(f, "{depth}+")?;
|
||||
left.write_depth(f, depth + 1)?;
|
||||
right.write_depth(f, depth + 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Node {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.write_depth(f, 0)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
enum WeightedNode {
|
||||
Leaf {
|
||||
byte: u8,
|
||||
count: u32,
|
||||
},
|
||||
Branch {
|
||||
count: u32,
|
||||
left: Box<Self>,
|
||||
right: Box<Self>,
|
||||
},
|
||||
}
|
||||
|
||||
impl WeightedNode {
|
||||
fn build(counts: &HashMap<u8, u32>) -> Option<Self> {
|
||||
let mut queue = BinaryHeap::new();
|
||||
|
||||
for (&byte, &count) in counts {
|
||||
queue.push(WeightedNode::Leaf { byte, count });
|
||||
}
|
||||
|
||||
loop {
|
||||
let first = queue.pop()?;
|
||||
|
||||
let Some(second) = queue.pop() else {
|
||||
return Some(first);
|
||||
};
|
||||
|
||||
queue.push(WeightedNode::join(first, second));
|
||||
}
|
||||
}
|
||||
|
||||
fn count(&self) -> u32 {
|
||||
match self {
|
||||
Self::Branch { count, .. } | Self::Leaf { count, .. } => *count,
|
||||
}
|
||||
}
|
||||
|
||||
fn join(left: Self, right: Self) -> Self {
|
||||
Self::Branch {
|
||||
count: left.count() + right.count(),
|
||||
left: Box::new(left),
|
||||
right: Box::new(right),
|
||||
}
|
||||
}
|
||||
|
||||
fn unburden(self) -> Node {
|
||||
match self {
|
||||
WeightedNode::Leaf { byte, .. } => Node::Leaf { byte },
|
||||
WeightedNode::Branch { left, right, .. } => Node::Branch {
|
||||
left: Box::new(left.unburden()),
|
||||
right: Box::new(right.unburden()),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for WeightedNode {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for WeightedNode {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.count().cmp(&other.count()).reverse()
|
||||
}
|
||||
}
|
||||
35
src/main.rs
35
src/main.rs
|
|
@ -1,7 +1,12 @@
|
|||
use std::io::{self, Read};
|
||||
use std::{
|
||||
env,
|
||||
fmt::Debug,
|
||||
io::{self, Read},
|
||||
};
|
||||
|
||||
mod compression_scheme;
|
||||
mod freq;
|
||||
mod huffman;
|
||||
mod rle;
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
|
@ -9,9 +14,12 @@ mod test;
|
|||
use bitvec::vec::BitVec;
|
||||
pub use compression_scheme::CompressionScheme;
|
||||
pub use freq::Freq;
|
||||
pub use huffman::Huffman;
|
||||
pub use rle::Rle;
|
||||
|
||||
fn main() -> Result<(), io::Error> {
|
||||
let debug = env::args().any(|arg| arg == "--debug" || arg == "-d");
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let len_src = io::stdin().read_to_end(&mut buf)?;
|
||||
|
||||
|
|
@ -19,14 +27,23 @@ fn main() -> Result<(), io::Error> {
|
|||
|
||||
let mut bitbuf = BitVec::new();
|
||||
|
||||
let () = Rle::encode(&buf, &mut bitbuf);
|
||||
let len_rle = bitbuf.len().div_ceil(8);
|
||||
println!(" Rle'd: {len_rle}");
|
||||
|
||||
bitbuf.clear();
|
||||
let header = Freq::encode(&buf, &mut bitbuf);
|
||||
let len_freq = Freq::header_size(&header) + bitbuf.len().div_ceil(8);
|
||||
println!(" Freq'd: {len_freq}");
|
||||
run::<Rle, _>(&buf, &mut bitbuf, "rle", debug);
|
||||
run::<Freq, _>(&buf, &mut bitbuf, "freq", debug);
|
||||
run::<Huffman, _>(&buf, &mut bitbuf, "Huffman", debug);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run<Scheme, Header>(buf: &[u8], bitbuf: &mut BitVec, name: &str, debug: bool)
|
||||
where
|
||||
Scheme: CompressionScheme<Header = Header>,
|
||||
Header: Debug,
|
||||
{
|
||||
bitbuf.clear();
|
||||
let header = Scheme::encode(buf, bitbuf);
|
||||
let len_freq = Scheme::header_size(&header) + bitbuf.len().div_ceil(8);
|
||||
println!("{name}'d: {len_freq}");
|
||||
if debug {
|
||||
eprintln!("{name} header: {header:#?}\n");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use quickcheck_macros::quickcheck;
|
||||
|
||||
use crate::{CompressionScheme, Freq, Rle};
|
||||
use crate::{CompressionScheme, Freq, Huffman, Rle};
|
||||
|
||||
#[allow(clippy::needless_pass_by_value)]
|
||||
#[quickcheck]
|
||||
|
|
@ -13,3 +13,9 @@ fn roundtrip_freq(src: Vec<u8>) -> bool {
|
|||
fn roundtrip_rle(src: Vec<u8>) -> bool {
|
||||
Rle::idempotent_on(&src)
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_pass_by_value)]
|
||||
#[quickcheck]
|
||||
fn roundtrip_huffman(src: Vec<u8>) -> bool {
|
||||
Huffman::idempotent_on(&src)
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue