speed up Huffman encoding

we compute the bits for each byte in advance
This commit is contained in:
mehbark 2025-10-05 12:57:48 -04:00
parent efd3abef9e
commit fb28bb882e

View file

@ -1,4 +1,5 @@
use std::{ use std::{
array,
collections::{BinaryHeap, HashMap}, collections::{BinaryHeap, HashMap},
fmt, fmt,
}; };
@ -21,8 +22,15 @@ impl CompressionScheme for Huffman {
let tree = Node::build(&counts)?; let tree = Node::build(&counts)?;
#[allow(clippy::cast_possible_truncation, reason = "byte is in 0..256")]
let byte_mapping: [_; 256] = array::from_fn(|byte| {
let mut buf = BitVec::new();
tree.encode_byte(byte as u8, &mut buf);
buf
});
for &byte in src { for &byte in src {
tree.encode_byte(byte, buf); buf.extend_from_bitslice(&byte_mapping[byte as usize]);
} }
Some(tree) Some(tree)
@ -62,16 +70,13 @@ impl Node {
} }
/// Write to `buf` the sequence of bits that this tree has assigned `byte`. /// Write to `buf` the sequence of bits that this tree has assigned `byte`.
///
/// # Panics
/// Panics if called with a byte that was not passed to [`build`].
fn encode_byte(&self, byte: u8, buf: &mut BitVec) { fn encode_byte(&self, byte: u8, buf: &mut BitVec) {
let mut current = self; let mut current = self;
loop { loop {
match current { match current {
Node::Leaf { byte: _ } => break, Node::Leaf { byte: _ } => break,
Node::Branch { path0, path1 } => { Node::Branch { path0, path1 } => {
// TODO: this contains check is wasteful. be less dumb? // wasteful, but we precompute a table, so it's fine :D
if path0.contains(byte) { if path0.contains(byte) {
buf.push(false); buf.push(false);
current = path0; current = path0;