finish huffman

This commit is contained in:
mehbark 2025-10-04 15:23:49 -04:00
parent 589e7b711f
commit 3a07eae54f
Signed by: mbk
GPG key ID: E333EC1335FFCCDB

View file

@ -21,12 +21,22 @@ impl CompressionScheme for Huffman {
let tree = Node::build(&counts)?; let tree = Node::build(&counts)?;
for &byte in src {
tree.encode_byte(byte, buf);
}
Some(tree) Some(tree)
} }
fn decode(src: &BitSlice, header: &Self::Header, buf: &mut Vec<u8>) { fn decode(mut src: &BitSlice, header: &Self::Header, buf: &mut Vec<u8>) {
if let Some(node) = header { let Some(header) = header else {
todo!() // no header means an empty source
return;
};
while let Some((byte, rest)) = header.decode_byte(src) {
buf.push(byte);
src = rest;
} }
} }
@ -38,7 +48,7 @@ impl CompressionScheme for Huffman {
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Node { pub enum Node {
Leaf { byte: u8 }, Leaf { byte: u8 },
Branch { left: Box<Self>, right: Box<Self> }, Branch { path0: Box<Self>, path1: Box<Self> },
} }
impl Node { impl Node {
@ -52,18 +62,48 @@ impl Node {
/// Panics if called with a byte that was not passed to [`build`]. /// Panics if called with a byte that was not passed to [`build`].
fn encode_byte(&self, byte: u8, buf: &mut BitVec) { fn encode_byte(&self, byte: u8, buf: &mut BitVec) {
let mut current = self; let mut current = self;
todo!() loop {
match current {
Node::Leaf { byte: _ } => break,
Node::Branch { path0, path1 } => {
// TODO: this contains check is wasteful. be less dumb?
if path0.contains(byte) {
buf.push(false);
current = path0;
} else {
buf.push(true);
current = path1;
}
}
}
}
} }
/// Return the next encoded byte in the stream of bits. /// Return the next encoded byte in a stream of bits and the rest of the bits.
fn decode_byte(src: &BitSlice) -> Option<u8> { fn decode_byte<'a>(&self, src: &'a BitSlice) -> Option<(u8, &'a BitSlice)> {
todo!() match self {
Node::Leaf { byte } => Some((*byte, src)),
Node::Branch { path0, path1 } => {
if *src.first()? {
path1.decode_byte(&src[1..])
} else {
path0.decode_byte(&src[1..])
}
}
}
}
fn contains(&self, byte: u8) -> bool {
match self {
Node::Leaf { byte: b } => *b == byte,
Node::Branch { path0, path1 } => path0.contains(byte) || path1.contains(byte),
}
} }
fn byte_size(&self) -> usize { fn byte_size(&self) -> usize {
match self { match self {
Node::Leaf { .. } => 1, Node::Leaf { .. } => 1,
Node::Branch { left, right, .. } => 1 + left.byte_size() + right.byte_size(), Node::Branch { path0, path1, .. } => 1 + path0.byte_size() + path1.byte_size(),
} }
} }
@ -73,10 +113,10 @@ impl Node {
} }
match self { match self {
Node::Leaf { byte } => writeln!(f, "| {:?}", *byte as char), Node::Leaf { byte } => writeln!(f, "| {:?}", *byte as char),
Node::Branch { left, right } => { Node::Branch { path0, path1 } => {
writeln!(f, "{depth}+")?; writeln!(f, "{depth}+")?;
left.write_depth(f, depth + 1)?; path0.write_depth(f, depth + 1)?;
right.write_depth(f, depth + 1) path1.write_depth(f, depth + 1)
} }
} }
} }
@ -96,8 +136,8 @@ enum WeightedNode {
}, },
Branch { Branch {
count: u32, count: u32,
left: Box<Self>, path0: Box<Self>,
right: Box<Self>, path1: Box<Self>,
}, },
} }
@ -126,20 +166,20 @@ impl WeightedNode {
} }
} }
fn join(left: Self, right: Self) -> Self { fn join(path0: Self, path1: Self) -> Self {
Self::Branch { Self::Branch {
count: left.count() + right.count(), count: path0.count() + path1.count(),
left: Box::new(left), path0: Box::new(path0),
right: Box::new(right), path1: Box::new(path1),
} }
} }
fn unburden(self) -> Node { fn unburden(self) -> Node {
match self { match self {
WeightedNode::Leaf { byte, .. } => Node::Leaf { byte }, WeightedNode::Leaf { byte, .. } => Node::Leaf { byte },
WeightedNode::Branch { left, right, .. } => Node::Branch { WeightedNode::Branch { path0, path1, .. } => Node::Branch {
left: Box::new(left.unburden()), path0: Box::new(path0.unburden()),
right: Box::new(right.unburden()), path1: Box::new(path1.unburden()),
}, },
} }
} }