avoid creating intermediate hashmap

This commit is contained in:
mehbark 2026-04-16 12:58:31 -04:00
parent 6a2f388985
commit 62fa7ccd41
Signed by: mbk
GPG key ID: E333EC1335FFCCDB

View file

@ -10,12 +10,12 @@ fn main() {
for n in 1..=20 {
let freqs = ngram_freqs(n, &src);
let bits = entropy(freqs.values().copied());
let bits = entropy(freqs);
println!("{n:2}: {bits:6.3} bits, {:.3} bits/letter", bits / n as f64);
}
}
fn ngram_freqs(n: usize, src: &[u8]) -> HashMap<Box<[u8]>, f64> {
fn ngram_freqs(n: usize, src: &[u8]) -> impl Iterator<Item = f64> {
assert!(n > 0);
let mut counts: HashMap<Box<[u8]>, u64> = HashMap::new();
@ -27,9 +27,8 @@ fn ngram_freqs(n: usize, src: &[u8]) -> HashMap<Box<[u8]>, f64> {
let total_count: u64 = counts.values().sum();
counts
.into_iter()
.map(|(k, v)| (k, v as f64 / total_count as f64))
.collect()
.into_values()
.map(move |p| p as f64 / total_count as f64)
}
fn entropy(probs: impl Iterator<Item = f64>) -> f64 {