diff --git a/Cargo.toml b/Cargo.toml index 844c6d1..5607953 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,3 +4,6 @@ version = "0.1.0" edition = "2024" [dependencies] + +[profile.release] +debug = true diff --git a/src/main.rs b/src/main.rs index 02cc1ea..b553187 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,8 +8,9 @@ fn main() { let mut src = Vec::new(); io::stdin().read_to_end(&mut src).unwrap(); + let mut table: HashMap, u64> = HashMap::new(); for n in 1..=20 { - let (ngrams, freqs) = ngram_freqs(n, &src); + let (ngrams, freqs) = ngram_freqs(n, &src, &mut table); let bits = entropy(freqs); println!( "{n:2}: {bits:6.3} bits, {:.3} bits/letter ({ngrams:7} unique ngrams)", @@ -18,10 +19,14 @@ fn main() { } } -fn ngram_freqs(n: usize, src: &[u8]) -> (usize, impl Iterator) { +fn ngram_freqs( + n: usize, + src: &[u8], + counts: &mut HashMap, u64>, +) -> (usize, impl Iterator) { assert!(n > 0); - let mut counts: HashMap, u64> = HashMap::new(); + counts.clear(); for ngram in src.windows(n) { *counts.entry(Box::from(ngram)).or_default() += 1; @@ -31,9 +36,7 @@ fn ngram_freqs(n: usize, src: &[u8]) -> (usize, impl Iterator) { ( counts.len(), - counts - .into_values() - .map(move |p| p as f64 / total_count as f64), + counts.values().map(move |p| *p as f64 / total_count as f64), ) }