correct sums of freqs

This commit is contained in:
mehbark 2026-04-15 23:18:23 -04:00
commit 3d622f86c6
Signed by: mbk
GPG key ID: E333EC1335FFCCDB
4 changed files with 45 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/target

7
Cargo.lock generated Normal file
View file

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "ngram-counter"
version = "0.1.0"

6
Cargo.toml Normal file
View file

@ -0,0 +1,6 @@
[package]
name = "ngram-counter"
version = "0.1.0"
edition = "2024"
[dependencies]

31
src/main.rs Normal file
View file

@ -0,0 +1,31 @@
use std::{
collections::HashMap,
io::{self, Read},
};
fn main() {
let mut src = Vec::new();
io::stdin().read_to_end(&mut src).unwrap();
for n in 1..2 {
let freqs = ngram_freqs(n, &src);
println!("{}", freqs.values().sum::<f64>());
}
}
fn ngram_freqs(n: usize, src: &[u8]) -> HashMap<Box<[u8]>, f64> {
assert!(n > 0);
let mut counts: HashMap<Box<[u8]>, u64> = HashMap::new();
for ngram in src.windows(n) {
*counts.entry(Box::from(ngram)).or_default() += 1;
}
let total_count: u64 = counts.values().sum();
counts
.into_iter()
.map(|(k, v)| (k, v as f64 / total_count as f64))
.collect()
}