let you specify min and max ngram length
This commit is contained in:
parent
17f4e37faa
commit
0f3cab9430
1 changed files with 31 additions and 1 deletions
32
src/main.rs
32
src/main.rs
|
|
@ -1,15 +1,45 @@
|
||||||
#![allow(clippy::cast_precision_loss)]
|
#![allow(clippy::cast_precision_loss)]
|
||||||
use std::{
|
use std::{
|
||||||
collections::HashMap,
|
collections::HashMap,
|
||||||
|
env,
|
||||||
io::{self, Read},
|
io::{self, Read},
|
||||||
|
process,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const USAGE: &str = "\
|
||||||
|
usage: (ngram-entropy) | (ngram-entropy N) | (ngram-entropy MIN MAX)\
|
||||||
|
";
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
|
let arg1 = env::args().nth(1);
|
||||||
|
let arg2 = env::args().nth(2);
|
||||||
|
|
||||||
|
let (min, max) = match (arg1, arg2) {
|
||||||
|
(Some(n), None) => {
|
||||||
|
if let Ok(n) = n.parse::<usize>() {
|
||||||
|
(n, n)
|
||||||
|
} else {
|
||||||
|
eprintln!("{USAGE}");
|
||||||
|
process::exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(Some(min), Some(max)) => {
|
||||||
|
if let (Ok(min), Ok(max)) = (min.parse::<usize>(), max.parse::<usize>()) {
|
||||||
|
(min, max)
|
||||||
|
} else {
|
||||||
|
eprintln!("{USAGE}");
|
||||||
|
process::exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (1, 20),
|
||||||
|
};
|
||||||
|
|
||||||
let mut src = Vec::new();
|
let mut src = Vec::new();
|
||||||
io::stdin().read_to_end(&mut src).unwrap();
|
io::stdin().read_to_end(&mut src).unwrap();
|
||||||
|
|
||||||
let mut table: HashMap<&[u8], u64> = HashMap::new();
|
let mut table: HashMap<&[u8], u64> = HashMap::new();
|
||||||
for n in 1..=20 {
|
|
||||||
|
for n in min..=max {
|
||||||
let (ngrams, freqs) = ngram_freqs(n, &src, &mut table);
|
let (ngrams, freqs) = ngram_freqs(n, &src, &mut table);
|
||||||
let bits = entropy(freqs);
|
let bits = entropy(freqs);
|
||||||
println!(
|
println!(
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue