From e2bdfc58cdc49dcc7475447f9a58556e862a59e2 Mon Sep 17 00:00:00 2001 From: mehbark Date: Thu, 1 Jan 2026 23:38:28 -0500 Subject: [PATCH] parse vars, nums, and assts --- Cargo.lock | 83 +++++++++++++++++++++++++- Cargo.toml | 1 + src/ast.rs | 2 + src/main.rs | 6 +- src/parser.rs | 162 ++++++++++++++++++++++++++++++++++++++++++++------ 5 files changed, 229 insertions(+), 25 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3afaca5..58c81c4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,6 +37,12 @@ dependencies = [ "yansi", ] +[[package]] +name = "beef" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" + [[package]] name = "bitflags" version = "1.3.2" @@ -66,7 +72,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ba4a05c9ce83b07de31b31c874e87c069881ac4355db9e752e3a55c11ec75a6" dependencies = [ "hashbrown", - "regex-automata", + "regex-automata 0.3.9", "serde", "stacker", "unicode-ident", @@ -105,6 +111,12 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "foldhash" version = "0.1.5" @@ -145,6 +157,40 @@ version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +[[package]] +name = "logos" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a790d11254054e5dc83902dba85d253ff06ceb0cfafb12be8773435cb9dfb4f4" +dependencies = [ + "logos-derive", +] + +[[package]] +name = "logos-codegen" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60337c43a38313b58871f8d5d76872b8e17aa9d51fad494b5e76092c0ce05f5" +dependencies = [ + "beef", + "fnv", + "proc-macro2", + "quote", + "regex-automata 0.4.13", + "regex-syntax 0.8.8", + "rustc_version", + "syn", +] + +[[package]] +name = "logos-derive" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d151b2ae667f69e10b8738f5cac0c746faa22b2e15ea7e83b55476afec3767dc" +dependencies = [ + "logos-codegen", +] + [[package]] name = "memchr" version = "2.7.6" @@ -185,6 +231,7 @@ version = "0.1.0" dependencies = [ "ariadne", "chumsky", + "logos", ] [[package]] @@ -204,7 +251,18 @@ checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.7.5", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.8.8", ] [[package]] @@ -213,6 +271,27 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + [[package]] name = "serde" version = "1.0.228" diff --git a/Cargo.toml b/Cargo.toml index 3f3a0f6..8d6ae4e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,3 +6,4 @@ edition = "2024" [dependencies] ariadne = { version = "0.6.0", features = ["auto-color"]} chumsky = { version = "0.12.0", features = ["pratt"] } +logos = "0.16.0" diff --git a/src/ast.rs b/src/ast.rs index a9ad004..e85b72c 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -27,4 +27,6 @@ pub enum Ast { App(Box, Box), /// `x + y` (only builtins, sorry) BinOp(Box, fn(Val, Val) -> Val, Box), + /// `3` + Num(f64), } diff --git a/src/main.rs b/src/main.rs index ddc2ae9..af147a7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,6 @@ use std::env; -use chumsky::Parser; - -use crate::parser::parser; +use crate::parser::parse; mod ast; mod map; @@ -12,6 +10,6 @@ mod val; fn main() { let src = &env::args().nth(1).expect("give me an argument now"); - let res = parser().parse(src); + let res = parse(src); println!("{res:#?}"); } diff --git a/src/parser.rs b/src/parser.rs index c10180f..0adfed3 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,33 +1,157 @@ +use std::{fmt, process}; + +use ariadne::{Color, Label, Report, ReportKind, Source}; use chumsky::{ - error::{RichPattern, RichReason}, + input::{Stream, ValueInput}, prelude::*, - text, }; use crate::ast::{Ast, Ident}; -pub fn parser<'a>() -> impl Parser<'a, &'a str, Vec, extra::Err>> { - recursive(|puyo| { +use logos::Logos; + +#[derive(Logos, Debug, Clone, PartialEq, Eq, Hash)] +#[logos(skip r"[ \t\n\r\f]+")] +#[logos(skip r"#[^\n]*?\n")] +enum Token<'a> { + #[token(":")] + Colon, + + #[token(";")] + Semicolon, + + #[token(":=")] + Set, + + #[token("(")] + OpenParen, + #[token(")")] + CloseParen, + + #[token("[")] + OpenBracket, + #[token("]")] + CloseBracket, + + #[token("{")] + OpenBrace, + #[token("}")] + CloseBrace, + + #[token("^")] + Return, + + #[regex(r"[+*/%=-]")] + BinOp(&'a str), + + #[token("fn")] + Fn, + + #[regex(r"[a-zA-Z_][a-zA-Z_0-9']*")] + Var(&'a str), + + #[regex(r"[+-]?(\d+|\d+\.\d*|\d*\.\d+)")] + Num(&'a str), + + Error, +} + +impl fmt::Display for Token<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Token::Colon => write!(f, ":"), + Token::Semicolon => write!(f, ";"), + Token::Set => write!(f, ":="), + Token::OpenParen => write!(f, "("), + Token::CloseParen => write!(f, ")"), + Token::OpenBracket => write!(f, "["), + Token::CloseBracket => write!(f, "]"), + Token::OpenBrace => write!(f, "{{"), + Token::CloseBrace => write!(f, "}}"), + Token::Return => write!(f, "^"), + Token::Fn => write!(f, "fn"), + Token::BinOp(x) | Token::Var(x) | Token::Num(x) => write!(f, "{x}"), + Token::Error => write!(f, "ERROR"), + } + } +} + +fn error_ast() -> Ast { + Ast::Var(Ident::Local("!invalid".to_owned())) +} + +fn parser<'tokens, 'src: 'tokens, I>() +-> impl Parser<'tokens, I, Vec, extra::Err>>> +where + I: ValueInput<'tokens, Token = Token<'src>, Span = SimpleSpan>, +{ + recursive(|expr| { let ident = choice(( - text::ident().map(|s: &str| Ident::Local(s.to_owned())), - just('^') - .padded() - .ignore_then(text::ident().map(|s: &str| Ident::Return(s.to_owned()))), + just(Token::Return).ignore_then(select! { + Token::Var(s) => Ident::Return(s.to_owned()) + }), + select! { + Token::Var(s) => Ident::Local(s.to_owned()), + }, )); + // let semicolon = just(Token::Semicolon).repeated().at_least(1); + choice(( - // text::keyword("fn") - // .padded() - // .ignore_then(args().delimited_by(just('('), just(')'))) - // .map(|(inputs, outputs)| Ast::Fn { - // inputs, - // outputs, - // body: vec![], - // }), + ident + .clone() + .then_ignore(just(Token::Set)) + .then(expr) + .map(|(id, val)| Ast::Set(id, Box::new(val))), ident.map(Ast::Var), + select! { + Token::Num(n) => n, + } + .validate(|n, e, emitter| match n.parse::() { + Ok(n) => Ast::Num(n), + Err(err) => { + emitter.emit(Rich::custom(e.span(), format!("Invalid number: {err}"))); + error_ast() + } + }), )) - .padded() - .repeated() - .collect() }) + .then_ignore(just(Token::Semicolon).repeated().at_least(1)) + .repeated() + .collect() +} + +// yeah i copy-pasted myself lol +pub fn parse(src: &str) -> Vec { + let token_iter = Token::lexer(src).spanned().map(|(tok, span)| match tok { + Ok(tok) => (tok, span.into()), + Err(()) => (Token::Error, span.into()), + }); + + let token_stream = + Stream::from_iter(token_iter).map((0..src.len()).into(), |(t, s): (_, _)| (t, s)); + + let source_filename = "input"; + + match parser().parse(token_stream).into_result() { + Ok(insts) => insts, + Err(errs) => { + for err in errs { + Report::build( + ReportKind::Error, + (source_filename, err.span().into_range()), + ) + .with_config(ariadne::Config::new().with_index_type(ariadne::IndexType::Byte)) + .with_label( + Label::new((source_filename, err.span().into_range())) + .with_message(err.reason()) + .with_color(Color::Red), + ) + .finish() + .eprint((source_filename, Source::from(&src))) + .unwrap(); + } + process::exit(2); + } + } }