diff --git a/Cargo.lock b/Cargo.lock index 8886c5a..eaf9ea6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,28 +2,114 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217" +dependencies = [ + "const-random", +] + [[package]] name = "anyhow" version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61604a8f862e1d5c3229fdd78f8b02c68dcf73a4c4b05fd636d12240aaa242c1" +[[package]] +name = "ariadne" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7080ae01b2f0c312065d4914cd0f0de045eb8832e9415b355106a6cff3073cb4" +dependencies = [ + "yansi", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chumsky" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2d3efff85e8572b1c3fa0127706af58c4fff8458f8d9436d54b1e97573c7a3f" +dependencies = [ + "ahash", +] + +[[package]] +name = "const-random" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f590d95d011aa80b063ffe3253422ed5aa462af4e9867d43ce8337562bac77c4" +dependencies = [ + "const-random-macro", + "proc-macro-hack", +] + +[[package]] +name = "const-random-macro" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40" +dependencies = [ + "getrandom", + "lazy_static", + "proc-macro-hack", + "tiny-keccak", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "getrandom" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "hwas" version = "0.1.0" dependencies = [ "anyhow", + "ariadne", + "chumsky", "nom", "wasm-encoder", "wasmparser", ] +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "leb128" version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" +[[package]] +name = "libc" +version = "0.2.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "869d572136620d55835903746bcb5cdc54cb2851fd0aeec53220b4bb65ef3013" + [[package]] name = "memchr" version = "2.4.1" @@ -47,12 +133,33 @@ dependencies = [ "version_check", ] +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "version_check" version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + [[package]] name = "wasm-encoder" version = "0.8.0" @@ -67,3 +174,9 @@ name = "wasmparser" version = "0.81.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98930446519f63d00a836efdc22f67766ceae8dbcc1571379f2bcabc6b2b9abc" + +[[package]] +name = "yansi" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc79f4a1e39857fc00c3f662cbf2651c771f00e9c15fe2abc341806bd46bd71" diff --git a/Cargo.toml b/Cargo.toml index a922053..8d70333 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,4 +9,6 @@ edition = "2021" nom = "7" wasmparser = "0.81" wasm-encoder = "0.8" -anyhow = "1" \ No newline at end of file +anyhow = "1" +chumsky = "0.5" +ariadne = "0.1" \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 7d232c6..ad7ddd5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,6 +6,7 @@ mod ast; mod constfold; mod emit; mod parser; +mod parser2; mod typecheck; fn main() -> Result<()> { @@ -17,6 +18,10 @@ fn main() -> Result<()> { let mut input = String::new(); File::open(&filename)?.read_to_string(&mut input)?; + if let Err(_) = parser2::parse(&input) { + bail!("Parse failed"); + } + let mut script = match parser::parse(input.as_str()) { Ok(script) => script, Err(err) => { diff --git a/src/parser2.rs b/src/parser2.rs new file mode 100644 index 0000000..20d79bc --- /dev/null +++ b/src/parser2.rs @@ -0,0 +1,568 @@ +use ariadne::{Color, Fmt, Label, Report, ReportKind, Source}; +use chumsky::{prelude::*, stream::Stream}; +use std::fmt; + +pub type Span = std::ops::Range; + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +enum Token { + Import, + Export, + Fn, + Let, + Memory, + Global, + Mut, + Loop, + BranchIf, + Ident(String), + Str(String), + Int(i32), + Float(String), + Op(String), + Ctrl(char), +} + +impl fmt::Display for Token { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Token::Import => write!(f, "import"), + Token::Export => write!(f, "export"), + Token::Fn => write!(f, "fn"), + Token::Let => write!(f, "let"), + Token::Memory => write!(f, "memory"), + Token::Global => write!(f, "global"), + Token::Mut => write!(f, "mut"), + Token::Loop => write!(f, "loop"), + Token::BranchIf => write!(f, "branch_if"), + Token::Ident(s) => write!(f, "{}", s), + Token::Str(s) => write!(f, "{:?}", s), + Token::Int(v) => write!(f, "{}", v), + Token::Float(v) => write!(f, "{}", v), + Token::Op(s) => write!(f, "{}", s), + Token::Ctrl(c) => write!(f, "{}", c), + } + } +} + +pub fn parse(source: &str) -> Result<(), ()> { + let tokens = match lexer().parse(source) { + Ok(tokens) => tokens, + Err(errors) => { + report_errors( + errors + .into_iter() + .map(|e| e.map(|c| c.to_string())) + .collect(), + source, + ); + return Err(()); + } + }; + + let source_len = source.chars().count(); + let script = match script_parser().parse(Stream::from_iter( + source_len..source_len + 1, + tokens.into_iter(), + )) { + Ok(script) => script, + Err(errors) => { + report_errors( + errors + .into_iter() + .map(|e| e.map(|t| t.to_string())) + .collect(), + source, + ); + return Err(()); + } + }; + dbg!(script); + Ok(()) +} + +fn report_errors(errors: Vec>, source: &str) { + for error in errors { + let report = Report::build(ReportKind::Error, (), error.span().start()); + + let report = match error.reason() { + chumsky::error::SimpleReason::Unclosed { span, delimiter } => report + .with_message(format!( + "Unclosed delimiter {}", + delimiter.fg(Color::Yellow) + )) + .with_label( + Label::new(span.clone()) + .with_message(format!( + "Unclosed delimiter {}", + delimiter.fg(Color::Yellow) + )) + .with_color(Color::Yellow), + ) + .with_label( + Label::new(error.span()) + .with_message(format!( + "Must be closed before this {}", + error + .found() + .unwrap_or(&"end of file".to_string()) + .fg(Color::Red) + )) + .with_color(Color::Red), + ), + chumsky::error::SimpleReason::Unexpected => report + .with_message(format!( + "{}, expected one of {}", + if error.found().is_some() { + "Unexpected token in input" + } else { + "Unexpted end of input" + }, + if error.expected().len() == 0 { + "end of input".to_string() + } else { + error + .expected() + .map(|x| x.to_string()) + .collect::>() + .join(", ") + } + )) + .with_label( + Label::new(error.span()) + .with_message(format!( + "Unexpected token {}", + error + .found() + .unwrap_or(&"end of file".to_string()) + .fg(Color::Red) + )) + .with_color(Color::Red), + ), + chumsky::error::SimpleReason::Custom(msg) => report.with_message(msg).with_label( + Label::new(error.span()) + .with_message(format!("{}", msg.fg(Color::Red))) + .with_color(Color::Red), + ), + }; + + report.finish().eprint(Source::from(source)).unwrap(); + } +} + +fn lexer() -> impl Parser, Error = Simple> { + let float = text::int(10) + .chain::(just('.').chain(text::digits(10))) + .collect::() + .map(Token::Float); + + let int = text::int(10).map(|s: String| Token::Int(s.parse().unwrap())); + + let str_ = just('"') + .ignore_then(filter(|c| *c != '"').repeated()) + .then_ignore(just('"')) + .collect::() + .map(Token::Str); + + let op = one_of("+-*/%&^|<=>".chars()) + .repeated() + .at_least(1) + .or(just(':').chain(just('='))) + .collect::() + .map(Token::Op); + + let ctrl = one_of("(){};,:?!".chars()).map(Token::Ctrl); + + let ident = text::ident().map(|ident: String| match ident.as_str() { + "import" => Token::Import, + "export" => Token::Export, + "fn" => Token::Fn, + "let" => Token::Let, + "memory" => Token::Memory, + "global" => Token::Global, + "mut" => Token::Mut, + "loop" => Token::Loop, + "branch_if" => Token::BranchIf, + _ => Token::Ident(ident), + }); + + let single_line = + seq::<_, _, Simple>("//".chars()).then_ignore(take_until(text::newline())); + + let multi_line = + seq::<_, _, Simple>("/*".chars()).then_ignore(take_until(seq("*/".chars()))); + + let comment = single_line.or(multi_line); + + let token = float + .or(int) + .or(str_) + .or(op) + .or(ctrl) + .or(ident) + .recover_with(skip_then_retry_until([])); + + token + .map_with_span(|tok, span| (tok, span)) + .padded() + .padded_by(comment.padded().repeated()) + .repeated() +} + +mod ast { + use super::Span; + + #[derive(Debug)] + pub struct Script { + pub imports: Vec, + pub global_vars: Vec, + pub functions: Vec, + } + + #[derive(Debug)] + pub enum TopLevelItem { + Import(Import), + GlobalVar(GlobalVar), + Function(Function), + } + + #[derive(Debug)] + pub struct Import { + pub span: Span, + pub import: String, + pub type_: ImportType, + } + + #[derive(Debug)] + pub enum ImportType { + Memory(u32), + Variable { + name: String, + type_: Type, + mutable: bool, + }, + // Function { name: String, params: Vec, result: Option } + } + + #[derive(Debug)] + pub struct GlobalVar { + pub span: Span, + pub name: String, + pub type_: Type, + } + + #[derive(Debug)] + pub struct Function { + pub span: Span, + pub export: bool, + pub name: String, + pub params: Vec<(String, Type)>, + pub type_: Option, + pub body: Block, + } + + #[derive(Debug)] + pub struct Block { + pub statements: Vec, + pub final_expression: Option>, + } + + impl Block { + pub fn type_(&self) -> Option { + self.final_expression.as_ref().and_then(|e| e.type_) + } + } + + #[derive(Debug)] + pub struct MemoryLocation { + pub span: Span, + pub size: MemSize, + pub left: Box, + pub right: Box, + } + + #[derive(Debug)] + pub struct LocalVariable { + pub span: Span, + pub name: String, + pub type_: Option, + pub value: Option, + pub defer: bool, + } + + #[derive(Debug)] + pub struct Expression { + pub type_: Option, + pub expr: Expr, + pub span: Span, + } + + #[derive(Debug)] + pub enum Expr { + I32Const(i32), + F32Const(f32), + Variable(String), + Let { + name: String, + type_: Option, + value: Option>, + defer: bool, + }, + Poke { + mem_location: MemoryLocation, + value: Box, + }, + Loop { + label: String, + block: Box, + }, + BranchIf { + condition: Box, + label: String, + }, + BinOp { + op: BinOp, + left: Box, + right: Box, + }, + LocalTee { + name: String, + value: Box, + }, + Cast { + value: Box, + type_: Type, + }, + FuncCall { + name: String, + params: Vec, + }, + Select { + condition: Box, + if_true: Box, + if_false: Box, + }, + Error, + } + + impl Expr { + pub fn with_span(self, span: Span) -> Expression { + Expression { + type_: None, + expr: self, + span: span, + } + } + } + + #[derive(Debug, Clone, Copy)] + pub enum BinOp { + Add, + Sub, + Mul, + Div, + Rem, + And, + Or, + Xor, + Eq, + Ne, + Gt, + Ge, + Lt, + Le, + } + + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + pub enum MemSize { + Byte, + Word, + } + + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] + pub enum Type { + I32, + I64, + F32, + F64, + } +} + +fn map_token( + f: impl Fn(&Token) -> Option + 'static + Clone, +) -> impl Parser> + Clone { + filter_map(move |span, tok: Token| { + if let Some(output) = f(&tok) { + Ok(output) + } else { + Err(Simple::expected_input_found(span, Vec::new(), Some(tok))) + } + }) +} + +fn block_parser() -> impl Parser> + Clone { + recursive(|block| { + let expression = recursive(|expression| { + let val = map_token(|tok| match tok { + Token::Int(v) => Some(ast::Expr::I32Const(*v)), + Token::Float(v) => Some(ast::Expr::F32Const(v.parse().unwrap())), + _ => None, + }) + .labelled("value"); + + let variable = filter_map(|span, tok| match tok { + Token::Ident(id) => Ok(ast::Expr::Variable(id)), + _ => Err(Simple::expected_input_found(span, Vec::new(), Some(tok))), + }) + .labelled("variable"); + + let ident = filter_map(|span, tok| match tok { + Token::Ident(id) => Ok(id), + _ => Err(Simple::expected_input_found(span, Vec::new(), Some(tok))), + }) + .labelled("identifier"); + + let local_tee = ident + .then(just(Token::Op(":=".to_string())).ignore_then(expression.clone())) + .map(|(name, expr)| ast::Expr::LocalTee { + name, + value: Box::new(expr), + }); + + let loop_expr = just(Token::Loop) + .ignore_then(ident) + .then( + block + .clone() + .delimited_by(Token::Ctrl('{'), Token::Ctrl('}')), + ) + .map(|(label, block)| ast::Expr::Loop { + label, + block: Box::new(block), + }); + + let branch_if = just(Token::BranchIf) + .ignore_then(expression.clone()) + .then_ignore(just(Token::Ctrl(':'))) + .then(ident) + .map(|(condition, label)| ast::Expr::BranchIf { + condition: Box::new(condition), + label, + }); + + let atom = val + .or(variable) + .or(local_tee) + .or(loop_expr) + .or(branch_if) + .map_with_span(|expr, span| expr.with_span(span)) + .or(expression + .clone() + .delimited_by(Token::Ctrl('('), Token::Ctrl(')'))) + .recover_with(nested_delimiters( + Token::Ctrl('('), + Token::Ctrl(')'), + [(Token::Ctrl('{'), Token::Ctrl('}'))], + |span| ast::Expr::Error.with_span(span), + )); + + atom + }); + + expression + .clone() + .then_ignore(just(Token::Ctrl(';'))) + .repeated() + .then(expression.clone().or_not()) + .map(|(statements, final_expression)| ast::Block { + statements, + final_expression: final_expression.map(|e| Box::new(e)), + }) + }) +} + +fn type_parser() -> impl Parser> + Clone { + filter_map(|span, tok| match tok { + Token::Ident(id) if id == "i32" => Ok(ast::Type::I32), + Token::Ident(id) if id == "i64" => Ok(ast::Type::I64), + Token::Ident(id) if id == "f32" => Ok(ast::Type::F32), + Token::Ident(id) if id == "f64" => Ok(ast::Type::F64), + _ => Err(Simple::expected_input_found( + span, + vec![ + Token::Ident("i32".into()), + Token::Ident("i64".into()), + Token::Ident("f32".into()), + Token::Ident("f64".into()), + ], + Some(tok), + )), + }) +} + +fn top_level_item_parser() -> impl Parser> + Clone { + let integer = map_token(|tok| match tok { + Token::Int(v) => Some(*v), + _ => None, + }); + + let string = map_token(|tok| match tok { + Token::Str(s) => Some(s.clone()), + _ => None, + }); + + let identifier = map_token(|tok| match tok { + Token::Ident(id) => Some(id.clone()), + _ => None, + }); + + let import_memory = just(Token::Memory) + .ignore_then(integer.delimited_by(Token::Ctrl('('), Token::Ctrl(')'))) + .map(|min_size| ast::ImportType::Memory(min_size as u32)); + + let import_global = just(Token::Global) + .ignore_then(just(Token::Mut).or_not()) + .then(identifier) + .then_ignore(just(Token::Ctrl(':'))) + .then(type_parser()) + .map(|((mut_opt, name), type_)| ast::ImportType::Variable { + mutable: mut_opt.is_some(), + name, + type_, + }); + + let import = just(Token::Import) + .ignore_then(string) + .then(import_memory.or(import_global)) + .then_ignore(just(Token::Ctrl(';'))) + .map_with_span(|(import, type_), span| { + ast::TopLevelItem::Import(ast::Import { + span, + import, + type_, + }) + }); + + import +} + +fn script_parser() -> impl Parser> + Clone { + top_level_item_parser() + .repeated() + .then_ignore(end()) + .map(|items| { + let mut script = ast::Script { + imports: Vec::new(), + global_vars: Vec::new(), + functions: Vec::new(), + }; + for item in items { + match item { + ast::TopLevelItem::Import(i) => script.imports.push(i), + ast::TopLevelItem::GlobalVar(v) => script.global_vars.push(v), + ast::TopLevelItem::Function(f) => script.functions.push(f), + } + } + script + }) +}