From 3d622f86c6d0c23759db3cb1984f05bee40e9343 Mon Sep 17 00:00:00 2001 From: mehbark Date: Wed, 15 Apr 2026 23:18:23 -0400 Subject: [PATCH] correct sums of freqs --- .gitignore | 1 + Cargo.lock | 7 +++++++ Cargo.toml | 6 ++++++ src/main.rs | 31 +++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..ee60d4c --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "ngram-counter" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..844c6d1 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "ngram-counter" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..71e0ef5 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,31 @@ +use std::{ + collections::HashMap, + io::{self, Read}, +}; + +fn main() { + let mut src = Vec::new(); + io::stdin().read_to_end(&mut src).unwrap(); + + for n in 1..2 { + let freqs = ngram_freqs(n, &src); + println!("{}", freqs.values().sum::()); + } +} + +fn ngram_freqs(n: usize, src: &[u8]) -> HashMap, f64> { + assert!(n > 0); + + let mut counts: HashMap, u64> = HashMap::new(); + + for ngram in src.windows(n) { + *counts.entry(Box::from(ngram)).or_default() += 1; + } + + let total_count: u64 = counts.values().sum(); + + counts + .into_iter() + .map(|(k, v)| (k, v as f64 / total_count as f64)) + .collect() +}