try out chumsky parser lib - wip

This commit is contained in:
2021-10-30 23:52:09 +02:00
parent 2267eed21c
commit 7ef98f0e96
4 changed files with 689 additions and 1 deletions

113
Cargo.lock generated
View File

@@ -2,28 +2,114 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217"
dependencies = [
"const-random",
]
[[package]]
name = "anyhow"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61604a8f862e1d5c3229fdd78f8b02c68dcf73a4c4b05fd636d12240aaa242c1"
[[package]]
name = "ariadne"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7080ae01b2f0c312065d4914cd0f0de045eb8832e9415b355106a6cff3073cb4"
dependencies = [
"yansi",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2d3efff85e8572b1c3fa0127706af58c4fff8458f8d9436d54b1e97573c7a3f"
dependencies = [
"ahash",
]
[[package]]
name = "const-random"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f590d95d011aa80b063ffe3253422ed5aa462af4e9867d43ce8337562bac77c4"
dependencies = [
"const-random-macro",
"proc-macro-hack",
]
[[package]]
name = "const-random-macro"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40"
dependencies = [
"getrandom",
"lazy_static",
"proc-macro-hack",
"tiny-keccak",
]
[[package]]
name = "crunchy"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "getrandom"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "hwas"
version = "0.1.0"
dependencies = [
"anyhow",
"ariadne",
"chumsky",
"nom",
"wasm-encoder",
"wasmparser",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "leb128"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67"
[[package]]
name = "libc"
version = "0.2.105"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "869d572136620d55835903746bcb5cdc54cb2851fd0aeec53220b4bb65ef3013"
[[package]]
name = "memchr"
version = "2.4.1"
@@ -47,12 +133,33 @@ dependencies = [
"version_check",
]
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]]
name = "tiny-keccak"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
dependencies = [
"crunchy",
]
[[package]]
name = "version_check"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
[[package]]
name = "wasi"
version = "0.10.2+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
[[package]]
name = "wasm-encoder"
version = "0.8.0"
@@ -67,3 +174,9 @@ name = "wasmparser"
version = "0.81.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98930446519f63d00a836efdc22f67766ceae8dbcc1571379f2bcabc6b2b9abc"
[[package]]
name = "yansi"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fc79f4a1e39857fc00c3f662cbf2651c771f00e9c15fe2abc341806bd46bd71"

View File

@@ -10,3 +10,5 @@ nom = "7"
wasmparser = "0.81"
wasm-encoder = "0.8"
anyhow = "1"
chumsky = "0.5"
ariadne = "0.1"

View File

@@ -6,6 +6,7 @@ mod ast;
mod constfold;
mod emit;
mod parser;
mod parser2;
mod typecheck;
fn main() -> Result<()> {
@@ -17,6 +18,10 @@ fn main() -> Result<()> {
let mut input = String::new();
File::open(&filename)?.read_to_string(&mut input)?;
if let Err(_) = parser2::parse(&input) {
bail!("Parse failed");
}
let mut script = match parser::parse(input.as_str()) {
Ok(script) => script,
Err(err) => {

568
src/parser2.rs Normal file
View File

@@ -0,0 +1,568 @@
use ariadne::{Color, Fmt, Label, Report, ReportKind, Source};
use chumsky::{prelude::*, stream::Stream};
use std::fmt;
pub type Span = std::ops::Range<usize>;
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
enum Token {
Import,
Export,
Fn,
Let,
Memory,
Global,
Mut,
Loop,
BranchIf,
Ident(String),
Str(String),
Int(i32),
Float(String),
Op(String),
Ctrl(char),
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Token::Import => write!(f, "import"),
Token::Export => write!(f, "export"),
Token::Fn => write!(f, "fn"),
Token::Let => write!(f, "let"),
Token::Memory => write!(f, "memory"),
Token::Global => write!(f, "global"),
Token::Mut => write!(f, "mut"),
Token::Loop => write!(f, "loop"),
Token::BranchIf => write!(f, "branch_if"),
Token::Ident(s) => write!(f, "{}", s),
Token::Str(s) => write!(f, "{:?}", s),
Token::Int(v) => write!(f, "{}", v),
Token::Float(v) => write!(f, "{}", v),
Token::Op(s) => write!(f, "{}", s),
Token::Ctrl(c) => write!(f, "{}", c),
}
}
}
pub fn parse(source: &str) -> Result<(), ()> {
let tokens = match lexer().parse(source) {
Ok(tokens) => tokens,
Err(errors) => {
report_errors(
errors
.into_iter()
.map(|e| e.map(|c| c.to_string()))
.collect(),
source,
);
return Err(());
}
};
let source_len = source.chars().count();
let script = match script_parser().parse(Stream::from_iter(
source_len..source_len + 1,
tokens.into_iter(),
)) {
Ok(script) => script,
Err(errors) => {
report_errors(
errors
.into_iter()
.map(|e| e.map(|t| t.to_string()))
.collect(),
source,
);
return Err(());
}
};
dbg!(script);
Ok(())
}
fn report_errors(errors: Vec<Simple<String>>, source: &str) {
for error in errors {
let report = Report::build(ReportKind::Error, (), error.span().start());
let report = match error.reason() {
chumsky::error::SimpleReason::Unclosed { span, delimiter } => report
.with_message(format!(
"Unclosed delimiter {}",
delimiter.fg(Color::Yellow)
))
.with_label(
Label::new(span.clone())
.with_message(format!(
"Unclosed delimiter {}",
delimiter.fg(Color::Yellow)
))
.with_color(Color::Yellow),
)
.with_label(
Label::new(error.span())
.with_message(format!(
"Must be closed before this {}",
error
.found()
.unwrap_or(&"end of file".to_string())
.fg(Color::Red)
))
.with_color(Color::Red),
),
chumsky::error::SimpleReason::Unexpected => report
.with_message(format!(
"{}, expected one of {}",
if error.found().is_some() {
"Unexpected token in input"
} else {
"Unexpted end of input"
},
if error.expected().len() == 0 {
"end of input".to_string()
} else {
error
.expected()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join(", ")
}
))
.with_label(
Label::new(error.span())
.with_message(format!(
"Unexpected token {}",
error
.found()
.unwrap_or(&"end of file".to_string())
.fg(Color::Red)
))
.with_color(Color::Red),
),
chumsky::error::SimpleReason::Custom(msg) => report.with_message(msg).with_label(
Label::new(error.span())
.with_message(format!("{}", msg.fg(Color::Red)))
.with_color(Color::Red),
),
};
report.finish().eprint(Source::from(source)).unwrap();
}
}
fn lexer() -> impl Parser<char, Vec<(Token, Span)>, Error = Simple<char>> {
let float = text::int(10)
.chain::<char, _, _>(just('.').chain(text::digits(10)))
.collect::<String>()
.map(Token::Float);
let int = text::int(10).map(|s: String| Token::Int(s.parse().unwrap()));
let str_ = just('"')
.ignore_then(filter(|c| *c != '"').repeated())
.then_ignore(just('"'))
.collect::<String>()
.map(Token::Str);
let op = one_of("+-*/%&^|<=>".chars())
.repeated()
.at_least(1)
.or(just(':').chain(just('=')))
.collect::<String>()
.map(Token::Op);
let ctrl = one_of("(){};,:?!".chars()).map(Token::Ctrl);
let ident = text::ident().map(|ident: String| match ident.as_str() {
"import" => Token::Import,
"export" => Token::Export,
"fn" => Token::Fn,
"let" => Token::Let,
"memory" => Token::Memory,
"global" => Token::Global,
"mut" => Token::Mut,
"loop" => Token::Loop,
"branch_if" => Token::BranchIf,
_ => Token::Ident(ident),
});
let single_line =
seq::<_, _, Simple<char>>("//".chars()).then_ignore(take_until(text::newline()));
let multi_line =
seq::<_, _, Simple<char>>("/*".chars()).then_ignore(take_until(seq("*/".chars())));
let comment = single_line.or(multi_line);
let token = float
.or(int)
.or(str_)
.or(op)
.or(ctrl)
.or(ident)
.recover_with(skip_then_retry_until([]));
token
.map_with_span(|tok, span| (tok, span))
.padded()
.padded_by(comment.padded().repeated())
.repeated()
}
mod ast {
use super::Span;
#[derive(Debug)]
pub struct Script {
pub imports: Vec<Import>,
pub global_vars: Vec<GlobalVar>,
pub functions: Vec<Function>,
}
#[derive(Debug)]
pub enum TopLevelItem {
Import(Import),
GlobalVar(GlobalVar),
Function(Function),
}
#[derive(Debug)]
pub struct Import {
pub span: Span,
pub import: String,
pub type_: ImportType,
}
#[derive(Debug)]
pub enum ImportType {
Memory(u32),
Variable {
name: String,
type_: Type,
mutable: bool,
},
// Function { name: String, params: Vec<Type>, result: Option<Type> }
}
#[derive(Debug)]
pub struct GlobalVar {
pub span: Span,
pub name: String,
pub type_: Type,
}
#[derive(Debug)]
pub struct Function {
pub span: Span,
pub export: bool,
pub name: String,
pub params: Vec<(String, Type)>,
pub type_: Option<Type>,
pub body: Block,
}
#[derive(Debug)]
pub struct Block {
pub statements: Vec<Expression>,
pub final_expression: Option<Box<Expression>>,
}
impl Block {
pub fn type_(&self) -> Option<Type> {
self.final_expression.as_ref().and_then(|e| e.type_)
}
}
#[derive(Debug)]
pub struct MemoryLocation {
pub span: Span,
pub size: MemSize,
pub left: Box<Expression>,
pub right: Box<Expression>,
}
#[derive(Debug)]
pub struct LocalVariable {
pub span: Span,
pub name: String,
pub type_: Option<Type>,
pub value: Option<Expression>,
pub defer: bool,
}
#[derive(Debug)]
pub struct Expression {
pub type_: Option<Type>,
pub expr: Expr,
pub span: Span,
}
#[derive(Debug)]
pub enum Expr {
I32Const(i32),
F32Const(f32),
Variable(String),
Let {
name: String,
type_: Option<Type>,
value: Option<Box<Expression>>,
defer: bool,
},
Poke {
mem_location: MemoryLocation,
value: Box<Expression>,
},
Loop {
label: String,
block: Box<Block>,
},
BranchIf {
condition: Box<Expression>,
label: String,
},
BinOp {
op: BinOp,
left: Box<Expression>,
right: Box<Expression>,
},
LocalTee {
name: String,
value: Box<Expression>,
},
Cast {
value: Box<Expression>,
type_: Type,
},
FuncCall {
name: String,
params: Vec<Expression>,
},
Select {
condition: Box<Expression>,
if_true: Box<Expression>,
if_false: Box<Expression>,
},
Error,
}
impl Expr {
pub fn with_span(self, span: Span) -> Expression {
Expression {
type_: None,
expr: self,
span: span,
}
}
}
#[derive(Debug, Clone, Copy)]
pub enum BinOp {
Add,
Sub,
Mul,
Div,
Rem,
And,
Or,
Xor,
Eq,
Ne,
Gt,
Ge,
Lt,
Le,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MemSize {
Byte,
Word,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
pub enum Type {
I32,
I64,
F32,
F64,
}
}
fn map_token<O>(
f: impl Fn(&Token) -> Option<O> + 'static + Clone,
) -> impl Parser<Token, O, Error = Simple<Token>> + Clone {
filter_map(move |span, tok: Token| {
if let Some(output) = f(&tok) {
Ok(output)
} else {
Err(Simple::expected_input_found(span, Vec::new(), Some(tok)))
}
})
}
fn block_parser() -> impl Parser<Token, ast::Block, Error = Simple<Token>> + Clone {
recursive(|block| {
let expression = recursive(|expression| {
let val = map_token(|tok| match tok {
Token::Int(v) => Some(ast::Expr::I32Const(*v)),
Token::Float(v) => Some(ast::Expr::F32Const(v.parse().unwrap())),
_ => None,
})
.labelled("value");
let variable = filter_map(|span, tok| match tok {
Token::Ident(id) => Ok(ast::Expr::Variable(id)),
_ => Err(Simple::expected_input_found(span, Vec::new(), Some(tok))),
})
.labelled("variable");
let ident = filter_map(|span, tok| match tok {
Token::Ident(id) => Ok(id),
_ => Err(Simple::expected_input_found(span, Vec::new(), Some(tok))),
})
.labelled("identifier");
let local_tee = ident
.then(just(Token::Op(":=".to_string())).ignore_then(expression.clone()))
.map(|(name, expr)| ast::Expr::LocalTee {
name,
value: Box::new(expr),
});
let loop_expr = just(Token::Loop)
.ignore_then(ident)
.then(
block
.clone()
.delimited_by(Token::Ctrl('{'), Token::Ctrl('}')),
)
.map(|(label, block)| ast::Expr::Loop {
label,
block: Box::new(block),
});
let branch_if = just(Token::BranchIf)
.ignore_then(expression.clone())
.then_ignore(just(Token::Ctrl(':')))
.then(ident)
.map(|(condition, label)| ast::Expr::BranchIf {
condition: Box::new(condition),
label,
});
let atom = val
.or(variable)
.or(local_tee)
.or(loop_expr)
.or(branch_if)
.map_with_span(|expr, span| expr.with_span(span))
.or(expression
.clone()
.delimited_by(Token::Ctrl('('), Token::Ctrl(')')))
.recover_with(nested_delimiters(
Token::Ctrl('('),
Token::Ctrl(')'),
[(Token::Ctrl('{'), Token::Ctrl('}'))],
|span| ast::Expr::Error.with_span(span),
));
atom
});
expression
.clone()
.then_ignore(just(Token::Ctrl(';')))
.repeated()
.then(expression.clone().or_not())
.map(|(statements, final_expression)| ast::Block {
statements,
final_expression: final_expression.map(|e| Box::new(e)),
})
})
}
fn type_parser() -> impl Parser<Token, ast::Type, Error = Simple<Token>> + Clone {
filter_map(|span, tok| match tok {
Token::Ident(id) if id == "i32" => Ok(ast::Type::I32),
Token::Ident(id) if id == "i64" => Ok(ast::Type::I64),
Token::Ident(id) if id == "f32" => Ok(ast::Type::F32),
Token::Ident(id) if id == "f64" => Ok(ast::Type::F64),
_ => Err(Simple::expected_input_found(
span,
vec![
Token::Ident("i32".into()),
Token::Ident("i64".into()),
Token::Ident("f32".into()),
Token::Ident("f64".into()),
],
Some(tok),
)),
})
}
fn top_level_item_parser() -> impl Parser<Token, ast::TopLevelItem, Error = Simple<Token>> + Clone {
let integer = map_token(|tok| match tok {
Token::Int(v) => Some(*v),
_ => None,
});
let string = map_token(|tok| match tok {
Token::Str(s) => Some(s.clone()),
_ => None,
});
let identifier = map_token(|tok| match tok {
Token::Ident(id) => Some(id.clone()),
_ => None,
});
let import_memory = just(Token::Memory)
.ignore_then(integer.delimited_by(Token::Ctrl('('), Token::Ctrl(')')))
.map(|min_size| ast::ImportType::Memory(min_size as u32));
let import_global = just(Token::Global)
.ignore_then(just(Token::Mut).or_not())
.then(identifier)
.then_ignore(just(Token::Ctrl(':')))
.then(type_parser())
.map(|((mut_opt, name), type_)| ast::ImportType::Variable {
mutable: mut_opt.is_some(),
name,
type_,
});
let import = just(Token::Import)
.ignore_then(string)
.then(import_memory.or(import_global))
.then_ignore(just(Token::Ctrl(';')))
.map_with_span(|(import, type_), span| {
ast::TopLevelItem::Import(ast::Import {
span,
import,
type_,
})
});
import
}
fn script_parser() -> impl Parser<Token, ast::Script, Error = Simple<Token>> + Clone {
top_level_item_parser()
.repeated()
.then_ignore(end())
.map(|items| {
let mut script = ast::Script {
imports: Vec::new(),
global_vars: Vec::new(),
functions: Vec::new(),
};
for item in items {
match item {
ast::TopLevelItem::Import(i) => script.imports.push(i),
ast::TopLevelItem::GlobalVar(v) => script.global_vars.push(v),
ast::TopLevelItem::Function(f) => script.functions.push(f),
}
}
script
})
}