normalize usage

This commit is contained in:
mehbark 2025-03-24 18:02:33 -04:00
parent c91ba1e0b1
commit 2f661e09c5

View file

@ -18,11 +18,12 @@ fn main() -> Result<(), Box<dyn Error>> {
log::info!("opening connection"); log::info!("opening connection");
let mut conn = Connection::open(output_file)?; let mut conn = Connection::open(output_file)?;
log::info!("creating tables"); log::info!("creating tables");
// TODO: checks and counters (actually very cool)
conn.execute_batch( conn.execute_batch(
" "
BEGIN; BEGIN;
CREATE TABLE usage ( CREATE TABLE mon (
mon STRING, name STRING,
usage REAL NOT NULL usage REAL NOT NULL
); );
CREATE TABLE ability ( CREATE TABLE ability (
@ -62,32 +63,54 @@ fn main() -> Result<(), Box<dyn Error>> {
for (i, (mon, data)) in stats.data.iter().enumerate() { for (i, (mon, data)) in stats.data.iter().enumerate() {
log::debug!("Processing mon #{i} ({mon})…"); log::debug!("Processing mon #{i} ({mon})…");
let tx = conn.transaction()?; let tx = conn.transaction()?;
tx.execute("INSERT INTO usage VALUES (?1, ?2)", (mon, data.usage))?;
// normalizing with mon_count gives us data that is much easier to work
// with. for example, if pikachu has 10k raw count and thunderbolt is used
// 9k times (weighted), we'd like 0.9 so that we can say pikachu runs
// thunderbolt 90% of the time.
// HACK: we get the weighted mon count by summing the ability usage
// there's is 1000% a better way to do this
let mon_count: f32 = data.abilities.values().sum();
tx.execute("INSERT INTO mon VALUES (?1, ?2)", (mon, data.usage))?;
for (ability, count) in &data.abilities { for (ability, count) in &data.abilities {
tx.execute( tx.execute(
"INSERT INTO ability VALUES (?1, ?2, ?3)", "INSERT INTO ability VALUES (?1, ?2, ?3)",
(mon, ability, count), (mon, ability, count / mon_count),
)?; )?;
} }
for (r#move, count) in &data.moves { for (r#move, count) in &data.moves {
tx.execute("INSERT INTO move VALUES (?1, ?2, ?3)", (mon, r#move, count))?; tx.execute(
"INSERT INTO move VALUES (?1, ?2, ?3)",
(mon, r#move, count / mon_count),
)?;
} }
for (item, count) in &data.items { for (item, count) in &data.items {
tx.execute("INSERT INTO item VALUES (?1, ?2, ?3)", (mon, item, count))?; tx.execute(
"INSERT INTO item VALUES (?1, ?2, ?3)",
(mon, item, count / mon_count),
)?;
} }
for (tera, count) in &data.tera { for (tera, count) in &data.tera {
tx.execute( tx.execute(
"INSERT INTO tera VALUES (?1, ?2, ?3)", "INSERT INTO tera VALUES (?1, ?2, ?3)",
(mon, format!("{tera:?}").to_ascii_lowercase(), count), (
mon,
format!("{tera:?}").to_ascii_lowercase(),
count / mon_count,
),
)?; )?;
} }
for (mate, count) in &data.teammates { for (mate, count) in &data.teammates {
tx.execute("INSERT INTO team VALUES (?1, ?2, ?3)", (mon, mate, count))?; tx.execute(
"INSERT INTO team VALUES (?1, ?2, ?3)",
(mon, mate, count / mon_count),
)?;
} }
tx.commit()?; tx.commit()?;
} }