reuse hashmap
This commit is contained in:
parent
154501dcf5
commit
c3c60e38dd
2 changed files with 12 additions and 6 deletions
|
|
@ -4,3 +4,6 @@ version = "0.1.0"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
debug = true
|
||||||
|
|
|
||||||
15
src/main.rs
15
src/main.rs
|
|
@ -8,8 +8,9 @@ fn main() {
|
||||||
let mut src = Vec::new();
|
let mut src = Vec::new();
|
||||||
io::stdin().read_to_end(&mut src).unwrap();
|
io::stdin().read_to_end(&mut src).unwrap();
|
||||||
|
|
||||||
|
let mut table: HashMap<Box<[u8]>, u64> = HashMap::new();
|
||||||
for n in 1..=20 {
|
for n in 1..=20 {
|
||||||
let (ngrams, freqs) = ngram_freqs(n, &src);
|
let (ngrams, freqs) = ngram_freqs(n, &src, &mut table);
|
||||||
let bits = entropy(freqs);
|
let bits = entropy(freqs);
|
||||||
println!(
|
println!(
|
||||||
"{n:2}: {bits:6.3} bits, {:.3} bits/letter ({ngrams:7} unique ngrams)",
|
"{n:2}: {bits:6.3} bits, {:.3} bits/letter ({ngrams:7} unique ngrams)",
|
||||||
|
|
@ -18,10 +19,14 @@ fn main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ngram_freqs(n: usize, src: &[u8]) -> (usize, impl Iterator<Item = f64>) {
|
fn ngram_freqs(
|
||||||
|
n: usize,
|
||||||
|
src: &[u8],
|
||||||
|
counts: &mut HashMap<Box<[u8]>, u64>,
|
||||||
|
) -> (usize, impl Iterator<Item = f64>) {
|
||||||
assert!(n > 0);
|
assert!(n > 0);
|
||||||
|
|
||||||
let mut counts: HashMap<Box<[u8]>, u64> = HashMap::new();
|
counts.clear();
|
||||||
|
|
||||||
for ngram in src.windows(n) {
|
for ngram in src.windows(n) {
|
||||||
*counts.entry(Box::from(ngram)).or_default() += 1;
|
*counts.entry(Box::from(ngram)).or_default() += 1;
|
||||||
|
|
@ -31,9 +36,7 @@ fn ngram_freqs(n: usize, src: &[u8]) -> (usize, impl Iterator<Item = f64>) {
|
||||||
|
|
||||||
(
|
(
|
||||||
counts.len(),
|
counts.len(),
|
||||||
counts
|
counts.values().map(move |p| *p as f64 / total_count as f64),
|
||||||
.into_values()
|
|
||||||
.map(move |p| p as f64 / total_count as f64),
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue