finish huffman

This commit is contained in:
mehbark 2025-10-04 15:23:49 -04:00
parent 589e7b711f
commit 3a07eae54f
Signed by: mbk
GPG key ID: E333EC1335FFCCDB

View file

@ -21,12 +21,22 @@ impl CompressionScheme for Huffman {
let tree = Node::build(&counts)?;
for &byte in src {
tree.encode_byte(byte, buf);
}
Some(tree)
}
fn decode(src: &BitSlice, header: &Self::Header, buf: &mut Vec<u8>) {
if let Some(node) = header {
todo!()
fn decode(mut src: &BitSlice, header: &Self::Header, buf: &mut Vec<u8>) {
let Some(header) = header else {
// no header means an empty source
return;
};
while let Some((byte, rest)) = header.decode_byte(src) {
buf.push(byte);
src = rest;
}
}
@ -38,7 +48,7 @@ impl CompressionScheme for Huffman {
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Node {
Leaf { byte: u8 },
Branch { left: Box<Self>, right: Box<Self> },
Branch { path0: Box<Self>, path1: Box<Self> },
}
impl Node {
@ -52,18 +62,48 @@ impl Node {
/// Panics if called with a byte that was not passed to [`build`].
fn encode_byte(&self, byte: u8, buf: &mut BitVec) {
let mut current = self;
todo!()
loop {
match current {
Node::Leaf { byte: _ } => break,
Node::Branch { path0, path1 } => {
// TODO: this contains check is wasteful. be less dumb?
if path0.contains(byte) {
buf.push(false);
current = path0;
} else {
buf.push(true);
current = path1;
}
}
}
}
}
/// Return the next encoded byte in the stream of bits.
fn decode_byte(src: &BitSlice) -> Option<u8> {
todo!()
/// Return the next encoded byte in a stream of bits and the rest of the bits.
fn decode_byte<'a>(&self, src: &'a BitSlice) -> Option<(u8, &'a BitSlice)> {
match self {
Node::Leaf { byte } => Some((*byte, src)),
Node::Branch { path0, path1 } => {
if *src.first()? {
path1.decode_byte(&src[1..])
} else {
path0.decode_byte(&src[1..])
}
}
}
}
fn contains(&self, byte: u8) -> bool {
match self {
Node::Leaf { byte: b } => *b == byte,
Node::Branch { path0, path1 } => path0.contains(byte) || path1.contains(byte),
}
}
fn byte_size(&self) -> usize {
match self {
Node::Leaf { .. } => 1,
Node::Branch { left, right, .. } => 1 + left.byte_size() + right.byte_size(),
Node::Branch { path0, path1, .. } => 1 + path0.byte_size() + path1.byte_size(),
}
}
@ -73,10 +113,10 @@ impl Node {
}
match self {
Node::Leaf { byte } => writeln!(f, "| {:?}", *byte as char),
Node::Branch { left, right } => {
Node::Branch { path0, path1 } => {
writeln!(f, "{depth}+")?;
left.write_depth(f, depth + 1)?;
right.write_depth(f, depth + 1)
path0.write_depth(f, depth + 1)?;
path1.write_depth(f, depth + 1)
}
}
}
@ -96,8 +136,8 @@ enum WeightedNode {
},
Branch {
count: u32,
left: Box<Self>,
right: Box<Self>,
path0: Box<Self>,
path1: Box<Self>,
},
}
@ -126,20 +166,20 @@ impl WeightedNode {
}
}
fn join(left: Self, right: Self) -> Self {
fn join(path0: Self, path1: Self) -> Self {
Self::Branch {
count: left.count() + right.count(),
left: Box::new(left),
right: Box::new(right),
count: path0.count() + path1.count(),
path0: Box::new(path0),
path1: Box::new(path1),
}
}
fn unburden(self) -> Node {
match self {
WeightedNode::Leaf { byte, .. } => Node::Leaf { byte },
WeightedNode::Branch { left, right, .. } => Node::Branch {
left: Box::new(left.unburden()),
right: Box::new(right.unburden()),
WeightedNode::Branch { path0, path1, .. } => Node::Branch {
path0: Box::new(path0.unburden()),
path1: Box::new(path1.unburden()),
},
}
}