avoid creating intermediate hashmap
This commit is contained in:
parent
6a2f388985
commit
62fa7ccd41
1 changed files with 4 additions and 5 deletions
|
|
@ -10,12 +10,12 @@ fn main() {
|
||||||
|
|
||||||
for n in 1..=20 {
|
for n in 1..=20 {
|
||||||
let freqs = ngram_freqs(n, &src);
|
let freqs = ngram_freqs(n, &src);
|
||||||
let bits = entropy(freqs.values().copied());
|
let bits = entropy(freqs);
|
||||||
println!("{n:2}: {bits:6.3} bits, {:.3} bits/letter", bits / n as f64);
|
println!("{n:2}: {bits:6.3} bits, {:.3} bits/letter", bits / n as f64);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ngram_freqs(n: usize, src: &[u8]) -> HashMap<Box<[u8]>, f64> {
|
fn ngram_freqs(n: usize, src: &[u8]) -> impl Iterator<Item = f64> {
|
||||||
assert!(n > 0);
|
assert!(n > 0);
|
||||||
|
|
||||||
let mut counts: HashMap<Box<[u8]>, u64> = HashMap::new();
|
let mut counts: HashMap<Box<[u8]>, u64> = HashMap::new();
|
||||||
|
|
@ -27,9 +27,8 @@ fn ngram_freqs(n: usize, src: &[u8]) -> HashMap<Box<[u8]>, f64> {
|
||||||
let total_count: u64 = counts.values().sum();
|
let total_count: u64 = counts.values().sum();
|
||||||
|
|
||||||
counts
|
counts
|
||||||
.into_iter()
|
.into_values()
|
||||||
.map(|(k, v)| (k, v as f64 / total_count as f64))
|
.map(move |p| p as f64 / total_count as f64)
|
||||||
.collect()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn entropy(probs: impl Iterator<Item = f64>) -> f64 {
|
fn entropy(probs: impl Iterator<Item = f64>) -> f64 {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue