diff --git a/.gitignore b/.gitignore index ea8c4bf..a3ccf3a 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +flamegraph.svg diff --git a/src/main.rs b/src/main.rs index 5c68322..02cc1ea 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,13 +9,16 @@ fn main() { io::stdin().read_to_end(&mut src).unwrap(); for n in 1..=20 { - let freqs = ngram_freqs(n, &src); + let (ngrams, freqs) = ngram_freqs(n, &src); let bits = entropy(freqs); - println!("{n:2}: {bits:6.3} bits, {:.3} bits/letter", bits / n as f64); + println!( + "{n:2}: {bits:6.3} bits, {:.3} bits/letter ({ngrams:7} unique ngrams)", + bits / n as f64 + ); } } -fn ngram_freqs(n: usize, src: &[u8]) -> impl Iterator { +fn ngram_freqs(n: usize, src: &[u8]) -> (usize, impl Iterator) { assert!(n > 0); let mut counts: HashMap, u64> = HashMap::new(); @@ -26,9 +29,12 @@ fn ngram_freqs(n: usize, src: &[u8]) -> impl Iterator { let total_count: u64 = counts.values().sum(); - counts - .into_values() - .map(move |p| p as f64 / total_count as f64) + ( + counts.len(), + counts + .into_values() + .map(move |p| p as f64 / total_count as f64), + ) } fn entropy(probs: impl Iterator) -> f64 {