diff --git a/src/main.rs b/src/main.rs index 71e0ef5..1a2fe33 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +#![allow(clippy::cast_precision_loss)] use std::{ collections::HashMap, io::{self, Read}, @@ -7,9 +8,10 @@ fn main() { let mut src = Vec::new(); io::stdin().read_to_end(&mut src).unwrap(); - for n in 1..2 { + for n in 1..=20 { let freqs = ngram_freqs(n, &src); - println!("{}", freqs.values().sum::()); + let bits = entropy(freqs.values().copied()); + println!("{n:2}: {bits:6.3} bits, {:.3} bits/letter", bits / n as f64); } } @@ -29,3 +31,7 @@ fn ngram_freqs(n: usize, src: &[u8]) -> HashMap, f64> { .map(|(k, v)| (k, v as f64 / total_count as f64)) .collect() } + +fn entropy(probs: impl Iterator) -> f64 { + -probs.map(|prob| prob * prob.log2()).sum::() +}