From 3a07eae54ff7ffee98f134e5721cbad7b322a847 Mon Sep 17 00:00:00 2001 From: mehbark Date: Sat, 4 Oct 2025 15:23:49 -0400 Subject: [PATCH] finish huffman --- src/huffman.rs | 82 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 21 deletions(-) diff --git a/src/huffman.rs b/src/huffman.rs index 62070b0..061fc42 100644 --- a/src/huffman.rs +++ b/src/huffman.rs @@ -21,12 +21,22 @@ impl CompressionScheme for Huffman { let tree = Node::build(&counts)?; + for &byte in src { + tree.encode_byte(byte, buf); + } + Some(tree) } - fn decode(src: &BitSlice, header: &Self::Header, buf: &mut Vec) { - if let Some(node) = header { - todo!() + fn decode(mut src: &BitSlice, header: &Self::Header, buf: &mut Vec) { + let Some(header) = header else { + // no header means an empty source + return; + }; + + while let Some((byte, rest)) = header.decode_byte(src) { + buf.push(byte); + src = rest; } } @@ -38,7 +48,7 @@ impl CompressionScheme for Huffman { #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Node { Leaf { byte: u8 }, - Branch { left: Box, right: Box }, + Branch { path0: Box, path1: Box }, } impl Node { @@ -52,18 +62,48 @@ impl Node { /// Panics if called with a byte that was not passed to [`build`]. fn encode_byte(&self, byte: u8, buf: &mut BitVec) { let mut current = self; - todo!() + loop { + match current { + Node::Leaf { byte: _ } => break, + Node::Branch { path0, path1 } => { + // TODO: this contains check is wasteful. be less dumb? + if path0.contains(byte) { + buf.push(false); + current = path0; + } else { + buf.push(true); + current = path1; + } + } + } + } } - /// Return the next encoded byte in the stream of bits. - fn decode_byte(src: &BitSlice) -> Option { - todo!() + /// Return the next encoded byte in a stream of bits and the rest of the bits. + fn decode_byte<'a>(&self, src: &'a BitSlice) -> Option<(u8, &'a BitSlice)> { + match self { + Node::Leaf { byte } => Some((*byte, src)), + Node::Branch { path0, path1 } => { + if *src.first()? { + path1.decode_byte(&src[1..]) + } else { + path0.decode_byte(&src[1..]) + } + } + } + } + + fn contains(&self, byte: u8) -> bool { + match self { + Node::Leaf { byte: b } => *b == byte, + Node::Branch { path0, path1 } => path0.contains(byte) || path1.contains(byte), + } } fn byte_size(&self) -> usize { match self { Node::Leaf { .. } => 1, - Node::Branch { left, right, .. } => 1 + left.byte_size() + right.byte_size(), + Node::Branch { path0, path1, .. } => 1 + path0.byte_size() + path1.byte_size(), } } @@ -73,10 +113,10 @@ impl Node { } match self { Node::Leaf { byte } => writeln!(f, "| {:?}", *byte as char), - Node::Branch { left, right } => { + Node::Branch { path0, path1 } => { writeln!(f, "{depth}+")?; - left.write_depth(f, depth + 1)?; - right.write_depth(f, depth + 1) + path0.write_depth(f, depth + 1)?; + path1.write_depth(f, depth + 1) } } } @@ -96,8 +136,8 @@ enum WeightedNode { }, Branch { count: u32, - left: Box, - right: Box, + path0: Box, + path1: Box, }, } @@ -126,20 +166,20 @@ impl WeightedNode { } } - fn join(left: Self, right: Self) -> Self { + fn join(path0: Self, path1: Self) -> Self { Self::Branch { - count: left.count() + right.count(), - left: Box::new(left), - right: Box::new(right), + count: path0.count() + path1.count(), + path0: Box::new(path0), + path1: Box::new(path1), } } fn unburden(self) -> Node { match self { WeightedNode::Leaf { byte, .. } => Node::Leaf { byte }, - WeightedNode::Branch { left, right, .. } => Node::Branch { - left: Box::new(left.unburden()), - right: Box::new(right.unburden()), + WeightedNode::Branch { path0, path1, .. } => Node::Branch { + path0: Box::new(path0.unburden()), + path1: Box::new(path1.unburden()), }, } }