smogon-stats/src/main.rs

262 lines
6.7 KiB
Rust

use std::{
collections::HashMap,
fs::{self, File},
io,
path::PathBuf,
process,
};
use clap::Parser;
use rusqlite::Connection;
use serde::Deserialize;
use thiserror::Error;
fn main() {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("warn")).init();
let config = Config::parse();
if let Err(e) = run(config) {
log::error!("{e}");
process::exit(1);
};
}
fn run(
Config {
input_file,
output_file,
}: Config,
) -> Result<(), AppError> {
if fs::exists(&output_file)? {
log::warn!("{output_file:?} already exists: creating the db will probably fail");
}
// parsing the stats is slow but doing it first avoids creating empty tables
log::info!("parsing stats");
let input_file = File::open(input_file)?;
let stats: Stats = simd_json::from_reader(input_file)?;
log::info!(
"meta: {}: {} battles, cutoff: {}",
stats.info.metagame,
stats.info.battle_count,
stats.info.cutoff
);
log::info!("opening connection");
let mut conn = Connection::open(output_file)?;
create_tables(&mut conn)?;
insert_stats(&mut conn, &stats)?;
Ok(())
}
#[derive(clap::Parser)]
#[command(about)]
struct Config {
#[arg()]
input_file: PathBuf,
// TODO: non-utf-8 (the problem is saving the database)
#[arg(short, long = "output")]
output_file: PathBuf,
}
#[derive(Error, Debug)]
enum AppError {
#[error("Error creating sqlite db: {0}")]
Sql(#[from] rusqlite::Error),
#[error("IO error: {0}")]
Io(#[from] io::Error),
#[error("Error reading JSON: {0}")]
Deserialize(#[from] simd_json::Error),
}
fn create_tables(conn: &mut Connection) -> rusqlite::Result<()> {
log::info!("creating tables");
// we could do IF NOT EXISTS, but these are meant to be ephemeral+read-only;
// i'd like to be able to evolve and improve the schema, so y'all'll have to
// take the ten seconds to delete and remake the db SORRY! things'll change!
conn.execute_batch(
"
BEGIN;
CREATE TABLE mon (
name STRING NOT NULL,
usage REAL,
viability_ceiling REAL
);
CREATE TABLE ability (
mon STRING NOT NULL,
name STRING NOT NULL,
usage REAL NOT NULL
);
CREATE TABLE move (
mon STRING NOT NULL,
name STRING NOT NULL,
usage REAL NOT NULL
);
CREATE TABLE item (
mon STRING NOT NULL,
name STRING NOT NULL,
usage REAL NOT NULL
);
CREATE TABLE tera (
mon STRING NOT NULL,
type STRING NOT NULL,
usage REAL NOT NULL
);
CREATE TABLE team (
mon STRING NOT NULL,
mate STRING NOT NULL,
usage REAL NOT NULL
);
CREATE TABLE cc (
mon STRING NOT NULL,
opp STRING NOT NULL,
percentage REAL NOT NULL,
stddev REAL NOT NULL
);
COMMIT;
",
)
}
fn insert_stats(conn: &mut Connection, stats: &Stats) -> rusqlite::Result<()> {
let mon_count = stats.data.len();
let mon_count_digits = mon_count.to_string().len();
let tx = conn.transaction()?;
for (i, (mon, data)) in stats.data.iter().enumerate() {
log::debug!(
"Processing mon {:mon_count_digits$}/{mon_count} ({mon})",
i + 1
);
// normalizing with mon_count gives us data that is much easier to work
// with. for example, if pikachu has 10k count (weighted) and thunderbolt
// is used 9k times (weighted), we'd like 0.9 so that we can say pikachu
// runs thunderbolt 90% of the time.
// HACK: we get the weighted mon count by summing the ability usage
// there's is 1000% a better way to do this
let mon_count: f32 = data.abilities.values().sum();
tx.execute(
"INSERT INTO mon VALUES (?1, ?2, ?3)",
(
mon,
data.usage,
data.viability_ceiling.as_ref().map(|x| x[1]),
),
)?;
for (ability, count) in &data.abilities {
tx.execute(
"INSERT INTO ability VALUES (?1, ?2, ?3)",
(mon, ability, count / mon_count),
)?;
}
for (r#move, count) in &data.moves {
tx.execute(
"INSERT INTO move VALUES (?1, ?2, ?3)",
(mon, r#move, count / mon_count),
)?;
}
for (item, count) in &data.items {
tx.execute(
"INSERT INTO item VALUES (?1, ?2, ?3)",
(mon, item, count / mon_count),
)?;
}
if let Some(tera) = &data.tera {
for (tera, count) in tera {
tx.execute(
"INSERT INTO tera VALUES (?1, ?2, ?3)",
(
mon,
format!("{tera:?}").to_ascii_lowercase(),
count / mon_count,
),
)?;
}
}
for (mate, count) in &data.teammates {
tx.execute(
"INSERT INTO team VALUES (?1, ?2, ?3)",
(mon, mate, count / mon_count),
)?;
}
for (opp, (_, percentage, stddev)) in &data.checks_and_counters {
tx.execute(
"INSERT INTO cc VALUES (?1, ?2, ?3, ?4)",
(mon, opp, percentage, stddev),
)?;
}
}
tx.commit()?;
Ok(())
}
#[derive(Debug, Deserialize, PartialEq, Eq, Hash, Clone, Copy)]
#[serde(rename_all = "lowercase")]
enum Type {
Normal,
Fire,
Water,
Electric,
Grass,
Ice,
Fighting,
Poison,
Ground,
Flying,
Psychic,
Bug,
Rock,
Ghost,
Dragon,
Dark,
Steel,
Fairy,
Stellar,
// idk either man
Nothing,
}
#[derive(Debug, Deserialize)]
struct Stats {
info: Info,
data: HashMap<Box<str>, Data>,
}
#[derive(Debug, Deserialize)]
struct Info {
metagame: Box<str>,
cutoff: f64,
#[serde(rename = "number of battles")]
battle_count: i64,
}
type Counts = HashMap<Box<str>, f32>;
#[derive(Debug, Deserialize)]
#[serde(rename_all = "PascalCase")]
struct Data {
#[serde(rename = "Viability Ceiling")]
viability_ceiling: Option<Box<[u32]>>,
abilities: Counts,
items: Counts,
moves: Counts,
#[serde(rename = "Tera Types")]
tera: Option<HashMap<Type, f32>>,
// i'm just not going to include happiness sorry
teammates: Counts,
#[serde(rename = "Checks and Counters")]
checks_and_counters: HashMap<Box<str>, (f32, f32, f32)>,
#[serde(rename = "usage")]
usage: Option<f32>,
}