diff --git a/Cargo.lock b/Cargo.lock index b845814..59c9470 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -114,16 +114,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] -name = "env_logger" -version = "0.10.2" +name = "env_filter" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" dependencies = [ - "humantime", - "is-terminal", "log", "regex", - "termcolor", +] + +[[package]] +name = "env_logger" +version = "0.11.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3716d7a920fb4fac5d84e9d4bce8ceb321e9414b4409da61b07b75c1e3d0697" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "jiff", + "log", ] [[package]] @@ -168,29 +178,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbd780fe5cc30f81464441920d82ac8740e2e46b29a6fad543ddd075229ce37e" - -[[package]] -name = "humantime" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" - -[[package]] -name = "is-terminal" -version = "0.4.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" -dependencies = [ - "hermit-abi", - "libc", - "windows-sys", -] - [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -204,10 +191,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] -name = "libc" -version = "0.2.171" +name = "jiff" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" +checksum = "c102670231191d07d37a35af3eb77f1f0dbf7a71be51a962dcd57ea607be7260" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cdde31a9d349f1b1f51a0b3714a5940ac022976f4b49485fc04be052b183b4c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "libsqlite3-sys" @@ -244,13 +249,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] -name = "pretty_env_logger" -version = "0.5.0" +name = "portable-atomic" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "865724d4dbe39d9f3dd3b52b88d859d66bcb2d6a0acfd5ea68a65fb66d4bdc1c" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" dependencies = [ - "env_logger", - "log", + "portable-atomic", ] [[package]] @@ -363,11 +373,12 @@ name = "smogon-stats" version = "0.1.0" dependencies = [ "clap", + "env_logger", "log", - "pretty_env_logger", "rusqlite", "serde", "serde_json", + "thiserror", ] [[package]] @@ -388,12 +399,23 @@ dependencies = [ ] [[package]] -name = "termcolor" -version = "1.4.1" +name = "thiserror" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" dependencies = [ - "winapi-util", + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -414,15 +436,6 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "winapi-util" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" -dependencies = [ - "windows-sys", -] - [[package]] name = "windows-sys" version = "0.59.0" diff --git a/Cargo.toml b/Cargo.toml index f6b3d22..151630b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,11 +2,14 @@ name = "smogon-stats" version = "0.1.0" edition = "2024" +authors = ["mehbark <terezi@pyrope.net>"] +description = "turn https://smogon.com/stats/ chaos json files into https://sqlite.org databases" [dependencies] clap = { version = "4.5.32", features = ["derive"] } +env_logger = "0.11.7" log = "0.4.27" -pretty_env_logger = "0.5.0" rusqlite = "0.34.0" serde = { version = "1.0.219", features = ["derive"] } serde_json = "1.0.140" +thiserror = "2.0.12" diff --git a/src/main.rs b/src/main.rs index 64f7a62..8031bae 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,22 +1,82 @@ // https://www.smogon.com/stats/2025-02/chaos/ -use std::{collections::HashMap, error::Error, fs::File, path::PathBuf}; +use std::{ + collections::HashMap, + fs::{self, File}, + io, + path::PathBuf, + process, +}; use clap::Parser; use rusqlite::Connection; use serde::Deserialize; +use thiserror::Error; -fn main() -> Result<(), Box<dyn Error>> { - let Args { +fn main() { + env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("warn")).init(); + let config = Config::parse(); + if let Err(e) = run(config) { + log::error!("{e}"); + process::exit(1); + }; +} + +fn run( + Config { input_file, output_file, - } = Args::parse(); - pretty_env_logger::init_timed(); + }: Config, +) -> Result<(), AppError> { + if fs::exists(&output_file)? { + log::warn!("{output_file:?} already exists: creating the db will probably fail"); + } + + // parsing the stats is slow but doing it first avoids creating empty tables + log::info!("parsing stats"); + let input_file = File::open(input_file)?; + let stats: Stats = serde_json::from_reader(input_file)?; + + log::info!( + "meta: {}: {} battles, cutoff: {}", + stats.info.metagame, + stats.info.battle_count, + stats.info.cutoff + ); log::info!("opening connection"); let mut conn = Connection::open(output_file)?; + + create_tables(&mut conn)?; + insert_stats(&mut conn, &stats)?; + + Ok(()) +} + +#[derive(clap::Parser)] +#[command(about)] +struct Config { + #[arg()] + input_file: PathBuf, + #[arg(short, long = "output")] + output_file: PathBuf, +} + +#[derive(Error, Debug)] +enum AppError { + #[error("Error creating sqlite db: {0}")] + Sql(#[from] rusqlite::Error), + #[error("IO error: {0}")] + Io(#[from] io::Error), + #[error("Error reading JSON: {0}")] + Deserialize(#[from] serde_json::Error), +} + +fn create_tables(conn: &mut Connection) -> rusqlite::Result<()> { log::info!("creating tables"); - // TODO: checks and counters, specifically KO or switch + // we could do IF NOT EXISTS, but these are meant to be ephemeral+read-only; + // i'd like to be able to evolve and improve the schema, so y'all'll have to + // take the ten seconds to delete and remake the db SORRY! things'll change! conn.execute_batch( " BEGIN; @@ -58,32 +118,24 @@ fn main() -> Result<(), Box<dyn Error>> { ); COMMIT; ", - )?; - - log::info!("parsing stats"); - let input_file = File::open(input_file)?; - let stats: Stats = serde_json::from_reader(input_file)?; - log::info!( - "meta: {}: {} battles, cutoff: {}", - stats.info.metagame, - stats.info.battle_count, - stats.info.cutoff - ); - - insert_stats(&mut conn, &stats)?; - - Ok(()) + ) } fn insert_stats(conn: &mut Connection, stats: &Stats) -> rusqlite::Result<()> { + let mon_count = stats.data.len(); + let mon_count_digits = mon_count.to_string().len(); + for (i, (mon, data)) in stats.data.iter().enumerate() { - log::debug!("Processing mon #{i} ({mon})…"); + log::debug!( + "Processing mon {:mon_count_digits$}/{mon_count} ({mon})", + i + 1 + ); let tx = conn.transaction()?; // normalizing with mon_count gives us data that is much easier to work - // with. for example, if pikachu has 10k raw count and thunderbolt is used - // 9k times (weighted), we'd like 0.9 so that we can say pikachu runs - // thunderbolt 90% of the time. + // with. for example, if pikachu has 10k count (weighted) and thunderbolt + // is used 9k times (weighted), we'd like 0.9 so that we can say pikachu + // runs thunderbolt 90% of the time. // HACK: we get the weighted mon count by summing the ability usage // there's is 1000% a better way to do this @@ -144,14 +196,6 @@ fn insert_stats(conn: &mut Connection, stats: &Stats) -> rusqlite::Result<()> { Ok(()) } -#[derive(clap::Parser)] -struct Args { - #[arg()] - input_file: PathBuf, - #[arg(short, long = "output")] - output_file: PathBuf, -} - #[derive(Debug, Deserialize, PartialEq, Eq, Hash, Clone, Copy)] #[serde(rename_all = "lowercase")] enum Type {