reuse hashmap

This commit is contained in:
mehbark 2026-04-16 14:04:13 -04:00
parent 154501dcf5
commit c3c60e38dd
Signed by: mbk
GPG key ID: E333EC1335FFCCDB
2 changed files with 12 additions and 6 deletions

View file

@ -4,3 +4,6 @@ version = "0.1.0"
edition = "2024" edition = "2024"
[dependencies] [dependencies]
[profile.release]
debug = true

View file

@ -8,8 +8,9 @@ fn main() {
let mut src = Vec::new(); let mut src = Vec::new();
io::stdin().read_to_end(&mut src).unwrap(); io::stdin().read_to_end(&mut src).unwrap();
let mut table: HashMap<Box<[u8]>, u64> = HashMap::new();
for n in 1..=20 { for n in 1..=20 {
let (ngrams, freqs) = ngram_freqs(n, &src); let (ngrams, freqs) = ngram_freqs(n, &src, &mut table);
let bits = entropy(freqs); let bits = entropy(freqs);
println!( println!(
"{n:2}: {bits:6.3} bits, {:.3} bits/letter ({ngrams:7} unique ngrams)", "{n:2}: {bits:6.3} bits, {:.3} bits/letter ({ngrams:7} unique ngrams)",
@ -18,10 +19,14 @@ fn main() {
} }
} }
fn ngram_freqs(n: usize, src: &[u8]) -> (usize, impl Iterator<Item = f64>) { fn ngram_freqs(
n: usize,
src: &[u8],
counts: &mut HashMap<Box<[u8]>, u64>,
) -> (usize, impl Iterator<Item = f64>) {
assert!(n > 0); assert!(n > 0);
let mut counts: HashMap<Box<[u8]>, u64> = HashMap::new(); counts.clear();
for ngram in src.windows(n) { for ngram in src.windows(n) {
*counts.entry(Box::from(ngram)).or_default() += 1; *counts.entry(Box::from(ngram)).or_default() += 1;
@ -31,9 +36,7 @@ fn ngram_freqs(n: usize, src: &[u8]) -> (usize, impl Iterator<Item = f64>) {
( (
counts.len(), counts.len(),
counts counts.values().map(move |p| *p as f64 / total_count as f64),
.into_values()
.map(move |p| p as f64 / total_count as f64),
) )
} }