speed up Huffman encoding

we compute the bits for each byte in advance
2025-10-05 12:57:48 -04:00 · 2025-10-05 12:57:48 -04:00 · fb28bb882e
commit fb28bb882e
parent efd3abef9e
1 changed files with 10 additions and 5 deletions
--- a/src/huffman.rs
+++ b/src/huffman.rs
@ -1,4 +1,5 @@
 use std::{
+    array,
    collections::{BinaryHeap, HashMap},
    fmt,
 };
@ -21,8 +22,15 @@ impl CompressionScheme for Huffman {

        let tree = Node::build(&counts)?;

+        #[allow(clippy::cast_possible_truncation, reason = "byte is in 0..256")]
+        let byte_mapping: [_; 256] = array::from_fn(|byte| {
+            let mut buf = BitVec::new();
+            tree.encode_byte(byte as u8, &mut buf);
+            buf
+        });
+
        for &byte in src {
-            tree.encode_byte(byte, buf);
+            buf.extend_from_bitslice(&byte_mapping[byte as usize]);
        }

        Some(tree)
@ -62,16 +70,13 @@ impl Node {
    }

    /// Write to `buf` the sequence of bits that this tree has assigned `byte`.
-    ///
-    /// # Panics
-    /// Panics if called with a byte that was not passed to [`build`].
    fn encode_byte(&self, byte: u8, buf: &mut BitVec) {
        let mut current = self;
        loop {
            match current {
                Node::Leaf { byte: _ } => break,
                Node::Branch { path0, path1 } => {
-                    // TODO: this contains check is wasteful. be less dumb?
+                    // wasteful, but we precompute a table, so it's fine :D
                    if path0.contains(byte) {
                        buf.push(false);
                        current = path0;