diff --git a/Cargo.lock b/Cargo.lock index 19a838c..b3ceb2e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -121,20 +121,14 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - [[package]] name = "fast-markov-chain" version = "0.1.0" dependencies = [ "bstr", "clap", - "itertools", "rand", + "rustc-hash", ] [[package]] @@ -161,15 +155,6 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" -[[package]] -name = "itertools" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" -dependencies = [ - "either", -] - [[package]] name = "libc" version = "0.2.171" @@ -257,6 +242,12 @@ version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "serde" version = "1.0.219" diff --git a/Cargo.toml b/Cargo.toml index 6a00187..7a58cb2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,5 +6,5 @@ edition = "2024" [dependencies] bstr = "1.11.3" clap = { version = "4.5.32", features = ["derive"] } -itertools = "0.14.0" rand = "0.9.0" +rustc-hash = "2.1.1" diff --git a/src/main.rs b/src/main.rs index cb76ac0..6850bcf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,4 @@ use std::{ - collections::HashMap, error::Error, io::{self, Read, Write}, process, @@ -8,9 +7,11 @@ use std::{ use bstr::ByteSlice; use clap::Parser; use rand::seq::IndexedRandom; +use rustc_hash::FxHashMap; #[derive(clap::Parser)] struct Args { + // TODO: multi-word prefix first_word: String, #[arg(short, long)] count: usize, @@ -41,14 +42,11 @@ fn main() -> Result<(), Box<dyn Error>> { // it's probably fine to make a big ol vec of the words… let words: Vec<_> = buf.words().collect(); - let mut freq: HashMap<&[&str], HashMap<&[&str], usize>> = HashMap::new(); + let mut freq: FxHashMap<&[&str], FxHashMap<&[&str], usize>> = FxHashMap::default(); for window in words.windows(order as usize) { - let [words @ .., _] = window else { - unreachable!() - }; *freq - .entry(words) + .entry(&window[..window.len() - 1]) .or_default() .entry(&window[1..]) .or_default() += 1;