Compare commits

...

2 commits

3 changed files with 363 additions and 79 deletions

325
Cargo.lock generated
View file

@ -11,6 +11,12 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "allocator-api2"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]] [[package]]
name = "anstream" name = "anstream"
version = "0.6.18" version = "0.6.18"
@ -61,12 +67,30 @@ dependencies = [
"windows-sys", "windows-sys",
] ]
[[package]]
name = "autocfg"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.9.0" version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
[[package]]
name = "bumpalo"
version = "3.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]] [[package]]
name = "clap" name = "clap"
version = "4.5.32" version = "4.5.32"
@ -114,18 +138,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]] [[package]]
name = "env_logger" name = "env_filter"
version = "0.10.2" version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0"
dependencies = [ dependencies = [
"humantime",
"is-terminal",
"log", "log",
"regex", "regex",
"termcolor",
] ]
[[package]]
name = "env_logger"
version = "0.11.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3716d7a920fb4fac5d84e9d4bce8ceb321e9414b4409da61b07b75c1e3d0697"
dependencies = [
"anstream",
"anstyle",
"env_filter",
"jiff",
"log",
]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]] [[package]]
name = "fallible-iterator" name = "fallible-iterator"
version = "0.3.0" version = "0.3.0"
@ -138,18 +178,53 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "float-cmp"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8"
dependencies = [
"num-traits",
]
[[package]] [[package]]
name = "foldhash" name = "foldhash"
version = "0.1.5" version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "getrandom"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0"
dependencies = [
"cfg-if",
"js-sys",
"libc",
"r-efi",
"wasi",
"wasm-bindgen",
]
[[package]]
name = "halfbrown"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa2c385c6df70fd180bbb673d93039dbd2cd34e41d782600bdf6e1ca7bce39aa"
dependencies = [
"hashbrown",
"serde",
]
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.15.2" version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
dependencies = [ dependencies = [
"allocator-api2",
"equivalent",
"foldhash", "foldhash",
] ]
@ -168,29 +243,6 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hermit-abi"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbd780fe5cc30f81464441920d82ac8740e2e46b29a6fad543ddd075229ce37e"
[[package]]
name = "humantime"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"
[[package]]
name = "is-terminal"
version = "0.4.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
dependencies = [
"hermit-abi",
"libc",
"windows-sys",
]
[[package]] [[package]]
name = "is_terminal_polyfill" name = "is_terminal_polyfill"
version = "1.70.1" version = "1.70.1"
@ -203,6 +255,40 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "jiff"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c102670231191d07d37a35af3eb77f1f0dbf7a71be51a962dcd57ea607be7260"
dependencies = [
"jiff-static",
"log",
"portable-atomic",
"portable-atomic-util",
"serde",
]
[[package]]
name = "jiff-static"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4cdde31a9d349f1b1f51a0b3714a5940ac022976f4b49485fc04be052b183b4c"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "js-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.171" version = "0.2.171"
@ -231,6 +317,15 @@ version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.21.1" version = "1.21.1"
@ -244,13 +339,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]] [[package]]
name = "pretty_env_logger" name = "portable-atomic"
version = "0.5.0" version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "865724d4dbe39d9f3dd3b52b88d859d66bcb2d6a0acfd5ea68a65fb66d4bdc1c" checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e"
[[package]]
name = "portable-atomic-util"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
dependencies = [ dependencies = [
"env_logger", "portable-atomic",
"log",
] ]
[[package]] [[package]]
@ -271,6 +371,32 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "r-efi"
version = "5.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5"
[[package]]
name = "ref-cast"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a0ae411dbe946a674d89546582cea4ba2bb8defac896622d6496f14c23ba5cf"
dependencies = [
"ref-cast-impl",
]
[[package]]
name = "ref-cast-impl"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "regex" name = "regex"
version = "1.11.1" version = "1.11.1"
@ -352,6 +478,27 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "simd-json"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10b5602e4f1f7d358956f94cac1eff59220f34cf9e26d49f5fde5acef851cbed"
dependencies = [
"getrandom",
"halfbrown",
"ref-cast",
"serde",
"serde_json",
"simdutf8",
"value-trait",
]
[[package]]
name = "simdutf8"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
[[package]] [[package]]
name = "smallvec" name = "smallvec"
version = "1.14.0" version = "1.14.0"
@ -363,11 +510,12 @@ name = "smogon-stats"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"clap", "clap",
"env_logger",
"log", "log",
"pretty_env_logger",
"rusqlite", "rusqlite",
"serde", "serde",
"serde_json", "simd-json",
"thiserror",
] ]
[[package]] [[package]]
@ -388,12 +536,23 @@ dependencies = [
] ]
[[package]] [[package]]
name = "termcolor" name = "thiserror"
version = "1.4.1" version = "2.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708"
dependencies = [ dependencies = [
"winapi-util", "thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "2.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d"
dependencies = [
"proc-macro2",
"quote",
"syn",
] ]
[[package]] [[package]]
@ -408,6 +567,18 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "value-trait"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0508fce11ad19e0aab49ce20b6bec7f8f82902ded31df1c9fc61b90f0eb396b8"
dependencies = [
"float-cmp",
"halfbrown",
"itoa",
"ryu",
]
[[package]] [[package]]
name = "vcpkg" name = "vcpkg"
version = "0.2.15" version = "0.2.15"
@ -415,12 +586,69 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]] [[package]]
name = "winapi-util" name = "wasi"
version = "0.1.9" version = "0.14.2+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
dependencies = [ dependencies = [
"windows-sys", "wit-bindgen-rt",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [
"cfg-if",
"once_cell",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
dependencies = [
"bumpalo",
"log",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
dependencies = [
"unicode-ident",
] ]
[[package]] [[package]]
@ -495,3 +723,12 @@ name = "windows_x86_64_msvc"
version = "0.52.6" version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "wit-bindgen-rt"
version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [
"bitflags",
]

View file

@ -2,11 +2,14 @@
name = "smogon-stats" name = "smogon-stats"
version = "0.1.0" version = "0.1.0"
edition = "2024" edition = "2024"
authors = ["mehbark <terezi@pyrope.net>"]
description = "turn https://smogon.com/stats/ chaos json files into https://sqlite.org databases"
[dependencies] [dependencies]
clap = { version = "4.5.32", features = ["derive"] } clap = { version = "4.5.32", features = ["derive"] }
env_logger = "0.11.7"
log = "0.4.27" log = "0.4.27"
pretty_env_logger = "0.5.0"
rusqlite = "0.34.0" rusqlite = "0.34.0"
serde = { version = "1.0.219", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.140" simd-json = "0.15.0"
thiserror = "2.0.12"

View file

@ -1,22 +1,82 @@
// https://www.smogon.com/stats/2025-02/chaos/ // https://www.smogon.com/stats/2025-02/chaos/
use std::{collections::HashMap, error::Error, fs::File, path::PathBuf}; use std::{
collections::HashMap,
fs::{self, File},
io,
path::PathBuf,
process,
};
use clap::Parser; use clap::Parser;
use rusqlite::Connection; use rusqlite::Connection;
use serde::Deserialize; use serde::Deserialize;
use thiserror::Error;
fn main() -> Result<(), Box<dyn Error>> { fn main() {
let Args { env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("warn")).init();
let config = Config::parse();
if let Err(e) = run(config) {
log::error!("{e}");
process::exit(1);
};
}
fn run(
Config {
input_file, input_file,
output_file, output_file,
} = Args::parse(); }: Config,
pretty_env_logger::init_timed(); ) -> Result<(), AppError> {
if fs::exists(&output_file)? {
log::warn!("{output_file:?} already exists: creating the db will probably fail");
}
// parsing the stats is slow but doing it first avoids creating empty tables
log::info!("parsing stats");
let input_file = File::open(input_file)?;
let stats: Stats = simd_json::from_reader(input_file)?;
log::info!(
"meta: {}: {} battles, cutoff: {}",
stats.info.metagame,
stats.info.battle_count,
stats.info.cutoff
);
log::info!("opening connection"); log::info!("opening connection");
let mut conn = Connection::open(output_file)?; let mut conn = Connection::open(output_file)?;
create_tables(&mut conn)?;
insert_stats(&mut conn, &stats)?;
Ok(())
}
#[derive(clap::Parser)]
#[command(about)]
struct Config {
#[arg()]
input_file: PathBuf,
#[arg(short, long = "output")]
output_file: PathBuf,
}
#[derive(Error, Debug)]
enum AppError {
#[error("Error creating sqlite db: {0}")]
Sql(#[from] rusqlite::Error),
#[error("IO error: {0}")]
Io(#[from] io::Error),
#[error("Error reading JSON: {0}")]
Deserialize(#[from] simd_json::Error),
}
fn create_tables(conn: &mut Connection) -> rusqlite::Result<()> {
log::info!("creating tables"); log::info!("creating tables");
// TODO: checks and counters, specifically KO or switch // we could do IF NOT EXISTS, but these are meant to be ephemeral+read-only;
// i'd like to be able to evolve and improve the schema, so y'all'll have to
// take the ten seconds to delete and remake the db SORRY! things'll change!
conn.execute_batch( conn.execute_batch(
" "
BEGIN; BEGIN;
@ -58,32 +118,24 @@ fn main() -> Result<(), Box<dyn Error>> {
); );
COMMIT; COMMIT;
", ",
)?; )
log::info!("parsing stats");
let input_file = File::open(input_file)?;
let stats: Stats = serde_json::from_reader(input_file)?;
log::info!(
"meta: {}: {} battles, cutoff: {}",
stats.info.metagame,
stats.info.battle_count,
stats.info.cutoff
);
insert_stats(&mut conn, &stats)?;
Ok(())
} }
fn insert_stats(conn: &mut Connection, stats: &Stats) -> rusqlite::Result<()> { fn insert_stats(conn: &mut Connection, stats: &Stats) -> rusqlite::Result<()> {
let mon_count = stats.data.len();
let mon_count_digits = mon_count.to_string().len();
for (i, (mon, data)) in stats.data.iter().enumerate() { for (i, (mon, data)) in stats.data.iter().enumerate() {
log::debug!("Processing mon #{i} ({mon})…"); log::debug!(
"Processing mon {:mon_count_digits$}/{mon_count} ({mon})",
i + 1
);
let tx = conn.transaction()?; let tx = conn.transaction()?;
// normalizing with mon_count gives us data that is much easier to work // normalizing with mon_count gives us data that is much easier to work
// with. for example, if pikachu has 10k raw count and thunderbolt is used // with. for example, if pikachu has 10k count (weighted) and thunderbolt
// 9k times (weighted), we'd like 0.9 so that we can say pikachu runs // is used 9k times (weighted), we'd like 0.9 so that we can say pikachu
// thunderbolt 90% of the time. // runs thunderbolt 90% of the time.
// HACK: we get the weighted mon count by summing the ability usage // HACK: we get the weighted mon count by summing the ability usage
// there's is 1000% a better way to do this // there's is 1000% a better way to do this
@ -144,14 +196,6 @@ fn insert_stats(conn: &mut Connection, stats: &Stats) -> rusqlite::Result<()> {
Ok(()) Ok(())
} }
#[derive(clap::Parser)]
struct Args {
#[arg()]
input_file: PathBuf,
#[arg(short, long = "output")]
output_file: PathBuf,
}
#[derive(Debug, Deserialize, PartialEq, Eq, Hash, Clone, Copy)] #[derive(Debug, Deserialize, PartialEq, Eq, Hash, Clone, Copy)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
enum Type { enum Type {