speed up Huffman encoding

we compute the bits for each byte in advance
This commit is contained in:
mehbark 2025-10-05 12:57:48 -04:00
parent efd3abef9e
commit fb28bb882e

View file

@ -1,4 +1,5 @@
use std::{
array,
collections::{BinaryHeap, HashMap},
fmt,
};
@ -21,8 +22,15 @@ impl CompressionScheme for Huffman {
let tree = Node::build(&counts)?;
#[allow(clippy::cast_possible_truncation, reason = "byte is in 0..256")]
let byte_mapping: [_; 256] = array::from_fn(|byte| {
let mut buf = BitVec::new();
tree.encode_byte(byte as u8, &mut buf);
buf
});
for &byte in src {
tree.encode_byte(byte, buf);
buf.extend_from_bitslice(&byte_mapping[byte as usize]);
}
Some(tree)
@ -62,16 +70,13 @@ impl Node {
}
/// Write to `buf` the sequence of bits that this tree has assigned `byte`.
///
/// # Panics
/// Panics if called with a byte that was not passed to [`build`].
fn encode_byte(&self, byte: u8, buf: &mut BitVec) {
let mut current = self;
loop {
match current {
Node::Leaf { byte: _ } => break,
Node::Branch { path0, path1 } => {
// TODO: this contains check is wasteful. be less dumb?
// wasteful, but we precompute a table, so it's fine :D
if path0.contains(byte) {
buf.push(false);
current = path0;