initial slow version

This commit is contained in:
mehbark 2026-04-16 12:55:18 -04:00
parent 3d622f86c6
commit 6a2f388985
Signed by: mbk
GPG key ID: E333EC1335FFCCDB

View file

@ -1,3 +1,4 @@
#![allow(clippy::cast_precision_loss)]
use std::{ use std::{
collections::HashMap, collections::HashMap,
io::{self, Read}, io::{self, Read},
@ -7,9 +8,10 @@ fn main() {
let mut src = Vec::new(); let mut src = Vec::new();
io::stdin().read_to_end(&mut src).unwrap(); io::stdin().read_to_end(&mut src).unwrap();
for n in 1..2 { for n in 1..=20 {
let freqs = ngram_freqs(n, &src); let freqs = ngram_freqs(n, &src);
println!("{}", freqs.values().sum::<f64>()); let bits = entropy(freqs.values().copied());
println!("{n:2}: {bits:6.3} bits, {:.3} bits/letter", bits / n as f64);
} }
} }
@ -29,3 +31,7 @@ fn ngram_freqs(n: usize, src: &[u8]) -> HashMap<Box<[u8]>, f64> {
.map(|(k, v)| (k, v as f64 / total_count as f64)) .map(|(k, v)| (k, v as f64 / total_count as f64))
.collect() .collect()
} }
fn entropy(probs: impl Iterator<Item = f64>) -> f64 {
-probs.map(|prob| prob * prob.log2()).sum::<f64>()
}