avoid creating intermediate hashmap

This commit is contained in:
mehbark 2026-04-16 12:58:31 -04:00
parent 6a2f388985
commit 62fa7ccd41
Signed by: mbk
GPG key ID: E333EC1335FFCCDB

View file

@ -10,12 +10,12 @@ fn main() {
for n in 1..=20 { for n in 1..=20 {
let freqs = ngram_freqs(n, &src); let freqs = ngram_freqs(n, &src);
let bits = entropy(freqs.values().copied()); let bits = entropy(freqs);
println!("{n:2}: {bits:6.3} bits, {:.3} bits/letter", bits / n as f64); println!("{n:2}: {bits:6.3} bits, {:.3} bits/letter", bits / n as f64);
} }
} }
fn ngram_freqs(n: usize, src: &[u8]) -> HashMap<Box<[u8]>, f64> { fn ngram_freqs(n: usize, src: &[u8]) -> impl Iterator<Item = f64> {
assert!(n > 0); assert!(n > 0);
let mut counts: HashMap<Box<[u8]>, u64> = HashMap::new(); let mut counts: HashMap<Box<[u8]>, u64> = HashMap::new();
@ -27,9 +27,8 @@ fn ngram_freqs(n: usize, src: &[u8]) -> HashMap<Box<[u8]>, f64> {
let total_count: u64 = counts.values().sum(); let total_count: u64 = counts.values().sum();
counts counts
.into_iter() .into_values()
.map(|(k, v)| (k, v as f64 / total_count as f64)) .map(move |p| p as f64 / total_count as f64)
.collect()
} }
fn entropy(probs: impl Iterator<Item = f64>) -> f64 { fn entropy(probs: impl Iterator<Item = f64>) -> f64 {