let you specify min and max ngram length

This commit is contained in:
mehbark 2026-04-16 14:56:52 -04:00
parent 17f4e37faa
commit 0f3cab9430
Signed by: mbk
GPG key ID: E333EC1335FFCCDB

View file

@ -1,15 +1,45 @@
#![allow(clippy::cast_precision_loss)] #![allow(clippy::cast_precision_loss)]
use std::{ use std::{
collections::HashMap, collections::HashMap,
env,
io::{self, Read}, io::{self, Read},
process,
}; };
const USAGE: &str = "\
usage: (ngram-entropy) | (ngram-entropy N) | (ngram-entropy MIN MAX)\
";
fn main() { fn main() {
let arg1 = env::args().nth(1);
let arg2 = env::args().nth(2);
let (min, max) = match (arg1, arg2) {
(Some(n), None) => {
if let Ok(n) = n.parse::<usize>() {
(n, n)
} else {
eprintln!("{USAGE}");
process::exit(1);
}
}
(Some(min), Some(max)) => {
if let (Ok(min), Ok(max)) = (min.parse::<usize>(), max.parse::<usize>()) {
(min, max)
} else {
eprintln!("{USAGE}");
process::exit(1);
}
}
_ => (1, 20),
};
let mut src = Vec::new(); let mut src = Vec::new();
io::stdin().read_to_end(&mut src).unwrap(); io::stdin().read_to_end(&mut src).unwrap();
let mut table: HashMap<&[u8], u64> = HashMap::new(); let mut table: HashMap<&[u8], u64> = HashMap::new();
for n in 1..=20 {
for n in min..=max {
let (ngrams, freqs) = ngram_freqs(n, &src, &mut table); let (ngrams, freqs) = ngram_freqs(n, &src, &mut table);
let bits = entropy(freqs); let bits = entropy(freqs);
println!( println!(