use std::{ collections::HashMap, fs::{self, File}, io, path::PathBuf, process, }; use clap::Parser; use rusqlite::Connection; use serde::Deserialize; use thiserror::Error; fn main() { env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("warn")).init(); let config = Config::parse(); if let Err(e) = run(config) { log::error!("{e}"); process::exit(1); }; } fn run( Config { input_file, output_file, }: Config, ) -> Result<(), AppError> { if fs::exists(&output_file)? { log::warn!("{output_file:?} already exists: creating the db will probably fail"); } // parsing the stats is slow but doing it first avoids creating empty tables log::info!("parsing stats"); let input_file = File::open(input_file)?; let stats: Stats = simd_json::from_reader(input_file)?; log::info!( "meta: {}: {} battles, cutoff: {}", stats.info.metagame, stats.info.battle_count, stats.info.cutoff ); log::info!("opening connection"); let mut conn = Connection::open(output_file)?; create_tables(&mut conn)?; insert_stats(&mut conn, &stats)?; Ok(()) } #[derive(clap::Parser)] #[command(about)] struct Config { #[arg()] input_file: PathBuf, // TODO: non-utf-8 (the problem is saving the database) #[arg(short, long = "output")] output_file: PathBuf, } #[derive(Error, Debug)] enum AppError { #[error("Error creating sqlite db: {0}")] Sql(#[from] rusqlite::Error), #[error("IO error: {0}")] Io(#[from] io::Error), #[error("Error reading JSON: {0}")] Deserialize(#[from] simd_json::Error), } fn create_tables(conn: &mut Connection) -> rusqlite::Result<()> { log::info!("creating tables"); // we could do IF NOT EXISTS, but these are meant to be ephemeral+read-only; // i'd like to be able to evolve and improve the schema, so y'all'll have to // take the ten seconds to delete and remake the db SORRY! things'll change! conn.execute_batch( " BEGIN; CREATE TABLE mon ( name STRING NOT NULL, usage REAL, viability_ceiling REAL ); CREATE TABLE ability ( mon STRING NOT NULL, name STRING NOT NULL, usage REAL NOT NULL ); CREATE TABLE move ( mon STRING NOT NULL, name STRING NOT NULL, usage REAL NOT NULL ); CREATE TABLE item ( mon STRING NOT NULL, name STRING NOT NULL, usage REAL NOT NULL ); CREATE TABLE tera ( mon STRING NOT NULL, type STRING NOT NULL, usage REAL NOT NULL ); CREATE TABLE team ( mon STRING NOT NULL, mate STRING NOT NULL, usage REAL NOT NULL ); CREATE TABLE cc ( mon STRING NOT NULL, opp STRING NOT NULL, percentage REAL NOT NULL, stddev REAL NOT NULL ); COMMIT; ", ) } fn insert_stats(conn: &mut Connection, stats: &Stats) -> rusqlite::Result<()> { let mon_count = stats.data.len(); let mon_count_digits = mon_count.to_string().len(); let tx = conn.transaction()?; for (i, (mon, data)) in stats.data.iter().enumerate() { log::debug!( "Processing mon {:mon_count_digits$}/{mon_count} ({mon})", i + 1 ); // normalizing with mon_count gives us data that is much easier to work // with. for example, if pikachu has 10k count (weighted) and thunderbolt // is used 9k times (weighted), we'd like 0.9 so that we can say pikachu // runs thunderbolt 90% of the time. // HACK: we get the weighted mon count by summing the ability usage // there's is 1000% a better way to do this let mon_count: f32 = data.abilities.values().sum(); tx.execute( "INSERT INTO mon VALUES (?1, ?2, ?3)", ( mon, data.usage, data.viability_ceiling.as_ref().map(|x| x[1]), ), )?; for (ability, count) in &data.abilities { tx.execute( "INSERT INTO ability VALUES (?1, ?2, ?3)", (mon, ability, count / mon_count), )?; } for (r#move, count) in &data.moves { tx.execute( "INSERT INTO move VALUES (?1, ?2, ?3)", (mon, r#move, count / mon_count), )?; } for (item, count) in &data.items { tx.execute( "INSERT INTO item VALUES (?1, ?2, ?3)", (mon, item, count / mon_count), )?; } if let Some(tera) = &data.tera { for (tera, count) in tera { tx.execute( "INSERT INTO tera VALUES (?1, ?2, ?3)", ( mon, format!("{tera:?}").to_ascii_lowercase(), count / mon_count, ), )?; } } for (mate, count) in &data.teammates { tx.execute( "INSERT INTO team VALUES (?1, ?2, ?3)", (mon, mate, count / mon_count), )?; } for (opp, (_, percentage, stddev)) in &data.checks_and_counters { tx.execute( "INSERT INTO cc VALUES (?1, ?2, ?3, ?4)", (mon, opp, percentage, stddev), )?; } } tx.commit()?; Ok(()) } #[derive(Debug, Deserialize, PartialEq, Eq, Hash, Clone, Copy)] #[serde(rename_all = "lowercase")] enum Type { Normal, Fire, Water, Electric, Grass, Ice, Fighting, Poison, Ground, Flying, Psychic, Bug, Rock, Ghost, Dragon, Dark, Steel, Fairy, Stellar, // idk either man Nothing, } #[derive(Debug, Deserialize)] struct Stats { info: Info, data: HashMap<Box<str>, Data>, } #[derive(Debug, Deserialize)] struct Info { metagame: Box<str>, cutoff: f64, #[serde(rename = "number of battles")] battle_count: i64, } type Counts = HashMap<Box<str>, f32>; #[derive(Debug, Deserialize)] #[serde(rename_all = "PascalCase")] struct Data { #[serde(rename = "Viability Ceiling")] viability_ceiling: Option<Box<[u32]>>, abilities: Counts, items: Counts, moves: Counts, #[serde(rename = "Tera Types")] tera: Option<HashMap<Type, f32>>, // i'm just not going to include happiness sorry teammates: Counts, #[serde(rename = "Checks and Counters")] checks_and_counters: HashMap<Box<str>, (f32, f32, f32)>, #[serde(rename = "usage")] usage: Option<f32>, }