parse vars, nums, and assts

This commit is contained in:
mehbark 2026-01-01 23:38:28 -05:00
parent bb086751df
commit e2bdfc58cd
5 changed files with 229 additions and 25 deletions

83
Cargo.lock generated
View file

@ -37,6 +37,12 @@ dependencies = [
"yansi",
]
[[package]]
name = "beef"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1"
[[package]]
name = "bitflags"
version = "1.3.2"
@ -66,7 +72,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ba4a05c9ce83b07de31b31c874e87c069881ac4355db9e752e3a55c11ec75a6"
dependencies = [
"hashbrown",
"regex-automata",
"regex-automata 0.3.9",
"serde",
"stacker",
"unicode-ident",
@ -105,6 +111,12 @@ version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "foldhash"
version = "0.1.5"
@ -145,6 +157,40 @@ version = "0.2.178"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
[[package]]
name = "logos"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a790d11254054e5dc83902dba85d253ff06ceb0cfafb12be8773435cb9dfb4f4"
dependencies = [
"logos-derive",
]
[[package]]
name = "logos-codegen"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f60337c43a38313b58871f8d5d76872b8e17aa9d51fad494b5e76092c0ce05f5"
dependencies = [
"beef",
"fnv",
"proc-macro2",
"quote",
"regex-automata 0.4.13",
"regex-syntax 0.8.8",
"rustc_version",
"syn",
]
[[package]]
name = "logos-derive"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d151b2ae667f69e10b8738f5cac0c746faa22b2e15ea7e83b55476afec3767dc"
dependencies = [
"logos-codegen",
]
[[package]]
name = "memchr"
version = "2.7.6"
@ -185,6 +231,7 @@ version = "0.1.0"
dependencies = [
"ariadne",
"chumsky",
"logos",
]
[[package]]
@ -204,7 +251,18 @@ checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"regex-syntax 0.7.5",
]
[[package]]
name = "regex-automata"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax 0.8.8",
]
[[package]]
@ -213,6 +271,27 @@ version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
[[package]]
name = "regex-syntax"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "rustc_version"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
dependencies = [
"semver",
]
[[package]]
name = "semver"
version = "1.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
[[package]]
name = "serde"
version = "1.0.228"

View file

@ -6,3 +6,4 @@ edition = "2024"
[dependencies]
ariadne = { version = "0.6.0", features = ["auto-color"]}
chumsky = { version = "0.12.0", features = ["pratt"] }
logos = "0.16.0"

View file

@ -27,4 +27,6 @@ pub enum Ast {
App(Box<Ast>, Box<Ast>),
/// `x + y` (only builtins, sorry)
BinOp(Box<Ast>, fn(Val, Val) -> Val, Box<Ast>),
/// `3`
Num(f64),
}

View file

@ -1,8 +1,6 @@
use std::env;
use chumsky::Parser;
use crate::parser::parser;
use crate::parser::parse;
mod ast;
mod map;
@ -12,6 +10,6 @@ mod val;
fn main() {
let src = &env::args().nth(1).expect("give me an argument now");
let res = parser().parse(src);
let res = parse(src);
println!("{res:#?}");
}

View file

@ -1,33 +1,157 @@
use std::{fmt, process};
use ariadne::{Color, Label, Report, ReportKind, Source};
use chumsky::{
error::{RichPattern, RichReason},
input::{Stream, ValueInput},
prelude::*,
text,
};
use crate::ast::{Ast, Ident};
pub fn parser<'a>() -> impl Parser<'a, &'a str, Vec<Ast>, extra::Err<Rich<'a, char>>> {
recursive(|puyo| {
use logos::Logos;
#[derive(Logos, Debug, Clone, PartialEq, Eq, Hash)]
#[logos(skip r"[ \t\n\r\f]+")]
#[logos(skip r"#[^\n]*?\n")]
enum Token<'a> {
#[token(":")]
Colon,
#[token(";")]
Semicolon,
#[token(":=")]
Set,
#[token("(")]
OpenParen,
#[token(")")]
CloseParen,
#[token("[")]
OpenBracket,
#[token("]")]
CloseBracket,
#[token("{")]
OpenBrace,
#[token("}")]
CloseBrace,
#[token("^")]
Return,
#[regex(r"[+*/%=-]")]
BinOp(&'a str),
#[token("fn")]
Fn,
#[regex(r"[a-zA-Z_][a-zA-Z_0-9']*")]
Var(&'a str),
#[regex(r"[+-]?(\d+|\d+\.\d*|\d*\.\d+)")]
Num(&'a str),
Error,
}
impl fmt::Display for Token<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Token::Colon => write!(f, ":"),
Token::Semicolon => write!(f, ";"),
Token::Set => write!(f, ":="),
Token::OpenParen => write!(f, "("),
Token::CloseParen => write!(f, ")"),
Token::OpenBracket => write!(f, "["),
Token::CloseBracket => write!(f, "]"),
Token::OpenBrace => write!(f, "{{"),
Token::CloseBrace => write!(f, "}}"),
Token::Return => write!(f, "^"),
Token::Fn => write!(f, "fn"),
Token::BinOp(x) | Token::Var(x) | Token::Num(x) => write!(f, "{x}"),
Token::Error => write!(f, "ERROR"),
}
}
}
fn error_ast() -> Ast {
Ast::Var(Ident::Local("!invalid".to_owned()))
}
fn parser<'tokens, 'src: 'tokens, I>()
-> impl Parser<'tokens, I, Vec<Ast>, extra::Err<Rich<'tokens, Token<'src>>>>
where
I: ValueInput<'tokens, Token = Token<'src>, Span = SimpleSpan>,
{
recursive(|expr| {
let ident = choice((
text::ident().map(|s: &str| Ident::Local(s.to_owned())),
just('^')
.padded()
.ignore_then(text::ident().map(|s: &str| Ident::Return(s.to_owned()))),
just(Token::Return).ignore_then(select! {
Token::Var(s) => Ident::Return(s.to_owned())
}),
select! {
Token::Var(s) => Ident::Local(s.to_owned()),
},
));
// let semicolon = just(Token::Semicolon).repeated().at_least(1);
choice((
// text::keyword("fn")
// .padded()
// .ignore_then(args().delimited_by(just('('), just(')')))
// .map(|(inputs, outputs)| Ast::Fn {
// inputs,
// outputs,
// body: vec![],
// }),
ident
.clone()
.then_ignore(just(Token::Set))
.then(expr)
.map(|(id, val)| Ast::Set(id, Box::new(val))),
ident.map(Ast::Var),
select! {
Token::Num(n) => n,
}
.validate(|n, e, emitter| match n.parse::<f64>() {
Ok(n) => Ast::Num(n),
Err(err) => {
emitter.emit(Rich::custom(e.span(), format!("Invalid number: {err}")));
error_ast()
}
}),
))
.padded()
.repeated()
.collect()
})
.then_ignore(just(Token::Semicolon).repeated().at_least(1))
.repeated()
.collect()
}
// yeah i copy-pasted myself lol
pub fn parse(src: &str) -> Vec<Ast> {
let token_iter = Token::lexer(src).spanned().map(|(tok, span)| match tok {
Ok(tok) => (tok, span.into()),
Err(()) => (Token::Error, span.into()),
});
let token_stream =
Stream::from_iter(token_iter).map((0..src.len()).into(), |(t, s): (_, _)| (t, s));
let source_filename = "input";
match parser().parse(token_stream).into_result() {
Ok(insts) => insts,
Err(errs) => {
for err in errs {
Report::build(
ReportKind::Error,
(source_filename, err.span().into_range()),
)
.with_config(ariadne::Config::new().with_index_type(ariadne::IndexType::Byte))
.with_label(
Label::new((source_filename, err.span().into_range()))
.with_message(err.reason())
.with_color(Color::Red),
)
.finish()
.eprint((source_filename, Source::from(&src)))
.unwrap();
}
process::exit(2);
}
}
}