correct sums of freqs
This commit is contained in:
commit
3d622f86c6
4 changed files with 45 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
/target
|
||||||
7
Cargo.lock
generated
Normal file
7
Cargo.lock
generated
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ngram-counter"
|
||||||
|
version = "0.1.0"
|
||||||
6
Cargo.toml
Normal file
6
Cargo.toml
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
[package]
|
||||||
|
name = "ngram-counter"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2024"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
31
src/main.rs
Normal file
31
src/main.rs
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
use std::{
|
||||||
|
collections::HashMap,
|
||||||
|
io::{self, Read},
|
||||||
|
};
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let mut src = Vec::new();
|
||||||
|
io::stdin().read_to_end(&mut src).unwrap();
|
||||||
|
|
||||||
|
for n in 1..2 {
|
||||||
|
let freqs = ngram_freqs(n, &src);
|
||||||
|
println!("{}", freqs.values().sum::<f64>());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ngram_freqs(n: usize, src: &[u8]) -> HashMap<Box<[u8]>, f64> {
|
||||||
|
assert!(n > 0);
|
||||||
|
|
||||||
|
let mut counts: HashMap<Box<[u8]>, u64> = HashMap::new();
|
||||||
|
|
||||||
|
for ngram in src.windows(n) {
|
||||||
|
*counts.entry(Box::from(ngram)).or_default() += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_count: u64 = counts.values().sum();
|
||||||
|
|
||||||
|
counts
|
||||||
|
.into_iter()
|
||||||
|
.map(|(k, v)| (k, v as f64 / total_count as f64))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
Loading…
Reference in a new issue