initial huffman; DRY main
This commit is contained in:
parent
fd0662b66d
commit
589e7b711f
4 changed files with 192 additions and 11 deletions
|
|
@ -9,7 +9,7 @@ pub trait CompressionScheme {
|
||||||
|
|
||||||
/// Encode some bytes into `buf`, returning a [`Header`][Self::Header].
|
/// Encode some bytes into `buf`, returning a [`Header`][Self::Header].
|
||||||
///
|
///
|
||||||
/// This does not necessarily have to be deterministic,
|
/// This does not have to be deterministic,
|
||||||
/// but it **must** be decodable by [`decode`](Self::decode).
|
/// but it **must** be decodable by [`decode`](Self::decode).
|
||||||
/// That is, [`decode`](Self::decode) ∘ [`encode`](Self::encode) = `id`.
|
/// That is, [`decode`](Self::decode) ∘ [`encode`](Self::encode) = `id`.
|
||||||
fn encode(src: &[u8], buf: &mut BitVec) -> Self::Header;
|
fn encode(src: &[u8], buf: &mut BitVec) -> Self::Header;
|
||||||
|
|
|
||||||
158
src/huffman.rs
Normal file
158
src/huffman.rs
Normal file
|
|
@ -0,0 +1,158 @@
|
||||||
|
use std::{
|
||||||
|
collections::{BinaryHeap, HashMap},
|
||||||
|
fmt,
|
||||||
|
};
|
||||||
|
|
||||||
|
use bitvec::prelude::{BitSlice, BitVec};
|
||||||
|
|
||||||
|
use crate::CompressionScheme;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct Huffman;
|
||||||
|
|
||||||
|
impl CompressionScheme for Huffman {
|
||||||
|
type Header = Option<Node>;
|
||||||
|
|
||||||
|
fn encode(src: &[u8], buf: &mut BitVec) -> Self::Header {
|
||||||
|
let mut counts = HashMap::new();
|
||||||
|
for byte in src {
|
||||||
|
*counts.entry(*byte).or_default() += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let tree = Node::build(&counts)?;
|
||||||
|
|
||||||
|
Some(tree)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode(src: &BitSlice, header: &Self::Header, buf: &mut Vec<u8>) {
|
||||||
|
if let Some(node) = header {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn header_size(header: &Self::Header) -> usize {
|
||||||
|
header.as_ref().map_or(0, Node::byte_size)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub enum Node {
|
||||||
|
Leaf { byte: u8 },
|
||||||
|
Branch { left: Box<Self>, right: Box<Self> },
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Node {
|
||||||
|
fn build(counts: &HashMap<u8, u32>) -> Option<Self> {
|
||||||
|
WeightedNode::build(counts).map(WeightedNode::unburden)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write to `buf` the sequence of bits that this tree has assigned `byte`.
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// Panics if called with a byte that was not passed to [`build`].
|
||||||
|
fn encode_byte(&self, byte: u8, buf: &mut BitVec) {
|
||||||
|
let mut current = self;
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the next encoded byte in the stream of bits.
|
||||||
|
fn decode_byte(src: &BitSlice) -> Option<u8> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn byte_size(&self) -> usize {
|
||||||
|
match self {
|
||||||
|
Node::Leaf { .. } => 1,
|
||||||
|
Node::Branch { left, right, .. } => 1 + left.byte_size() + right.byte_size(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_depth(&self, f: &mut fmt::Formatter<'_>, depth: usize) -> fmt::Result {
|
||||||
|
for _ in 0..depth {
|
||||||
|
write!(f, " ")?;
|
||||||
|
}
|
||||||
|
match self {
|
||||||
|
Node::Leaf { byte } => writeln!(f, "| {:?}", *byte as char),
|
||||||
|
Node::Branch { left, right } => {
|
||||||
|
writeln!(f, "{depth}+")?;
|
||||||
|
left.write_depth(f, depth + 1)?;
|
||||||
|
right.write_depth(f, depth + 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for Node {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
self.write_depth(f, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, PartialEq, Eq)]
|
||||||
|
enum WeightedNode {
|
||||||
|
Leaf {
|
||||||
|
byte: u8,
|
||||||
|
count: u32,
|
||||||
|
},
|
||||||
|
Branch {
|
||||||
|
count: u32,
|
||||||
|
left: Box<Self>,
|
||||||
|
right: Box<Self>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WeightedNode {
|
||||||
|
fn build(counts: &HashMap<u8, u32>) -> Option<Self> {
|
||||||
|
let mut queue = BinaryHeap::new();
|
||||||
|
|
||||||
|
for (&byte, &count) in counts {
|
||||||
|
queue.push(WeightedNode::Leaf { byte, count });
|
||||||
|
}
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let first = queue.pop()?;
|
||||||
|
|
||||||
|
let Some(second) = queue.pop() else {
|
||||||
|
return Some(first);
|
||||||
|
};
|
||||||
|
|
||||||
|
queue.push(WeightedNode::join(first, second));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn count(&self) -> u32 {
|
||||||
|
match self {
|
||||||
|
Self::Branch { count, .. } | Self::Leaf { count, .. } => *count,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn join(left: Self, right: Self) -> Self {
|
||||||
|
Self::Branch {
|
||||||
|
count: left.count() + right.count(),
|
||||||
|
left: Box::new(left),
|
||||||
|
right: Box::new(right),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unburden(self) -> Node {
|
||||||
|
match self {
|
||||||
|
WeightedNode::Leaf { byte, .. } => Node::Leaf { byte },
|
||||||
|
WeightedNode::Branch { left, right, .. } => Node::Branch {
|
||||||
|
left: Box::new(left.unburden()),
|
||||||
|
right: Box::new(right.unburden()),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for WeightedNode {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||||
|
Some(self.cmp(other))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Ord for WeightedNode {
|
||||||
|
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||||
|
self.count().cmp(&other.count()).reverse()
|
||||||
|
}
|
||||||
|
}
|
||||||
35
src/main.rs
35
src/main.rs
|
|
@ -1,7 +1,12 @@
|
||||||
use std::io::{self, Read};
|
use std::{
|
||||||
|
env,
|
||||||
|
fmt::Debug,
|
||||||
|
io::{self, Read},
|
||||||
|
};
|
||||||
|
|
||||||
mod compression_scheme;
|
mod compression_scheme;
|
||||||
mod freq;
|
mod freq;
|
||||||
|
mod huffman;
|
||||||
mod rle;
|
mod rle;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test;
|
mod test;
|
||||||
|
|
@ -9,9 +14,12 @@ mod test;
|
||||||
use bitvec::vec::BitVec;
|
use bitvec::vec::BitVec;
|
||||||
pub use compression_scheme::CompressionScheme;
|
pub use compression_scheme::CompressionScheme;
|
||||||
pub use freq::Freq;
|
pub use freq::Freq;
|
||||||
|
pub use huffman::Huffman;
|
||||||
pub use rle::Rle;
|
pub use rle::Rle;
|
||||||
|
|
||||||
fn main() -> Result<(), io::Error> {
|
fn main() -> Result<(), io::Error> {
|
||||||
|
let debug = env::args().any(|arg| arg == "--debug" || arg == "-d");
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut buf = Vec::new();
|
||||||
let len_src = io::stdin().read_to_end(&mut buf)?;
|
let len_src = io::stdin().read_to_end(&mut buf)?;
|
||||||
|
|
||||||
|
|
@ -19,14 +27,23 @@ fn main() -> Result<(), io::Error> {
|
||||||
|
|
||||||
let mut bitbuf = BitVec::new();
|
let mut bitbuf = BitVec::new();
|
||||||
|
|
||||||
let () = Rle::encode(&buf, &mut bitbuf);
|
run::<Rle, _>(&buf, &mut bitbuf, "rle", debug);
|
||||||
let len_rle = bitbuf.len().div_ceil(8);
|
run::<Freq, _>(&buf, &mut bitbuf, "freq", debug);
|
||||||
println!(" Rle'd: {len_rle}");
|
run::<Huffman, _>(&buf, &mut bitbuf, "Huffman", debug);
|
||||||
|
|
||||||
bitbuf.clear();
|
|
||||||
let header = Freq::encode(&buf, &mut bitbuf);
|
|
||||||
let len_freq = Freq::header_size(&header) + bitbuf.len().div_ceil(8);
|
|
||||||
println!(" Freq'd: {len_freq}");
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn run<Scheme, Header>(buf: &[u8], bitbuf: &mut BitVec, name: &str, debug: bool)
|
||||||
|
where
|
||||||
|
Scheme: CompressionScheme<Header = Header>,
|
||||||
|
Header: Debug,
|
||||||
|
{
|
||||||
|
bitbuf.clear();
|
||||||
|
let header = Scheme::encode(buf, bitbuf);
|
||||||
|
let len_freq = Scheme::header_size(&header) + bitbuf.len().div_ceil(8);
|
||||||
|
println!("{name}'d: {len_freq}");
|
||||||
|
if debug {
|
||||||
|
eprintln!("{name} header: {header:#?}\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
use quickcheck_macros::quickcheck;
|
use quickcheck_macros::quickcheck;
|
||||||
|
|
||||||
use crate::{CompressionScheme, Freq, Rle};
|
use crate::{CompressionScheme, Freq, Huffman, Rle};
|
||||||
|
|
||||||
#[allow(clippy::needless_pass_by_value)]
|
#[allow(clippy::needless_pass_by_value)]
|
||||||
#[quickcheck]
|
#[quickcheck]
|
||||||
|
|
@ -13,3 +13,9 @@ fn roundtrip_freq(src: Vec<u8>) -> bool {
|
||||||
fn roundtrip_rle(src: Vec<u8>) -> bool {
|
fn roundtrip_rle(src: Vec<u8>) -> bool {
|
||||||
Rle::idempotent_on(&src)
|
Rle::idempotent_on(&src)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::needless_pass_by_value)]
|
||||||
|
#[quickcheck]
|
||||||
|
fn roundtrip_huffman(src: Vec<u8>) -> bool {
|
||||||
|
Huffman::idempotent_on(&src)
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue