From e6a6fd1535384990a1ac9ac4a727f3362fa36aa9 Mon Sep 17 00:00:00 2001 From: Dennis Ranke Date: Sun, 24 Oct 2021 20:24:24 +0200 Subject: [PATCH] initial commit, correctly parses simple example --- .gitignore | 1 + Cargo.lock | 39 +++++ Cargo.toml | 9 + src/ast.rs | 132 +++++++++++++++ src/main.rs | 11 ++ src/parser.rs | 445 ++++++++++++++++++++++++++++++++++++++++++++++++++ test.hw | 9 + 7 files changed, 646 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/ast.rs create mode 100644 src/main.rs create mode 100644 src/parser.rs create mode 100644 test.hw diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..7d62d00 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,39 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "hwas" +version = "0.1.0" +dependencies = [ + "nom", +] + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "minimal-lexical" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c64630dcdd71f1a64c435f54885086a0de5d6a12d104d69b165fb7d5286d677" + +[[package]] +name = "nom" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffd9d26838a953b4af82cbeb9f1592c6798916983959be223a7124e992742c1" +dependencies = [ + "memchr", + "minimal-lexical", + "version_check", +] + +[[package]] +name = "version_check" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..32379b7 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "hwas" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +nom = "7" diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..81dd5a3 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,132 @@ +#[derive(Debug, Clone, Copy)] +pub struct Position(pub usize); + +#[derive(Debug)] +pub struct Script<'a> { + pub items: Vec>, +} + +#[derive(Debug)] +pub enum TopLevelItem<'a> { + GlobalVar(GlobalVar<'a>), + Function(Function<'a>), +} + +#[derive(Debug)] +pub struct GlobalVar<'a> { + pub position: Position, + pub visibility: Visibility, + pub name: &'a str, + pub type_: Type, +} + +#[derive(Debug)] +pub struct Function<'a> { + pub position: Position, + pub visibility: Visibility, + pub name: &'a str, + pub params: Vec<(&'a str, Type)>, + pub type_: Option, + pub body: Block<'a>, +} + +#[derive(Debug)] +pub struct Block<'a> { + pub statements: Vec>, + pub final_expression: Option>, +} + +#[derive(Debug)] +pub enum Statement<'a> { + LocalVariable(LocalVariable<'a>), + Poke { + mem_location: MemoryLocation<'a>, + value: Expression<'a>, + }, + Expression(Expression<'a>), +} + +#[derive(Debug)] +pub struct MemoryLocation<'a> { + pub position: Position, + pub size: MemSize, + pub left: Expression<'a>, + pub right: Expression<'a>, +} + +#[derive(Debug)] +pub struct LocalVariable<'a> { + pub position: Position, + pub name: &'a str, + pub type_: Option, + pub value: Option>, +} + +#[derive(Debug)] +pub enum Expression<'a> { + I32Const(i32), + Variable { + position: Position, + name: &'a str, + }, + Loop { + position: Position, + label: &'a str, + block: Box>, + }, + BranchIf { + position: Position, + condition: Box>, + label: &'a str, + }, + BinOp { + position: Position, + op: BinOp, + left: Box>, + right: Box>, + }, + LocalTee { + position: Position, + name: &'a str, + value: Box>, + }, +} + +#[derive(Debug)] +pub enum BinOp { + Add, + Sub, + Mul, + Div, + Rem, + And, + Or, + Xor, + Eq, + Ne, + Gt, + Ge, + Lt, + Le, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MemSize { + Byte, + Word, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Visibility { + Local, + Export, + Import, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Type { + I32, + I64, + F32, + F64, +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..2ce1c7c --- /dev/null +++ b/src/main.rs @@ -0,0 +1,11 @@ +mod parser; +mod ast; + +fn main() { + let input = include_str!("../test.hw"); + let result = parser::parse(input); + match result { + Ok(script) => {dbg!(script);}, + Err(err) => println!("error: {}", nom::error::convert_error(input, err)) + } +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..c3c8658 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,445 @@ +use crate::ast; +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::{alpha1, alphanumeric1, char, digit1, multispace0}, + combinator::{self, cut, map, map_res, not, opt, peek, recognize, success, value}, + error::VerboseError, + multi::{fold_many0, many0, separated_list0}, + sequence::{delimited, pair, preceded, separated_pair, terminated}, + Finish, +}; + +type IResult<'a, O> = nom::IResult<&'a str, O, VerboseError<&'a str>>; + +pub fn parse(s: &str) -> Result> { + let (_, script) = combinator::all_consuming(terminated(script, multispace0))(s).finish()?; + Ok(script) +} + +fn script(s: &str) -> IResult { + let (s, items) = many0(top_level_item)(s)?; + Ok((s, ast::Script { items })) +} + +fn top_level_item(s: &str) -> IResult { + alt(( + map(function, ast::TopLevelItem::Function), + map(global_var, ast::TopLevelItem::GlobalVar), + ))(s) +} + +fn global_var(s: &str) -> IResult { + let (s, vis) = visibility(s)?; + let (s, position) = ws(position)(s)?; + let (s, name) = identifier(s)?; + let (s, type_) = preceded(ws(char(':')), type_)(s)?; + let (s, _) = ws(char(';'))(s)?; + + Ok(( + s, + ast::GlobalVar { + position, + visibility: vis, + name: name, + type_, + }, + )) +} + +fn function(s: &str) -> IResult { + let (s, vis) = visibility(s)?; + let (s, _) = ws(tag("fn"))(s)?; + cut(move |s| { + let (s, position) = ws(position)(s)?; + let (s, name) = identifier(s)?; + let (s, params) = delimited( + ws(char('(')), + separated_list0( + ws(char(',')), + pair(map(identifier, |i| i), preceded(ws(tag(":")), type_)), + ), + ws(char(')')), + )(s)?; + let (s, type_) = opt(preceded(ws(tag("->")), type_))(s)?; + let (s, body) = block(s)?; + + Ok(( + s, + ast::Function { + position, + visibility: vis, + name: name, + params, + type_, + body, + }, + )) + })(s) +} + +fn block(s: &str) -> IResult { + let (s, (statements, final_expression)) = delimited( + ws(char('{')), + pair(many0(statement), opt(expression)), + ws(char('}')), + )(s)?; + Ok(( + s, + ast::Block { + statements, + final_expression, + }, + )) +} + +fn statement(s: &str) -> IResult { + alt(( + map(local_var, ast::Statement::LocalVariable), + map( + terminated(expression, ws(char(';'))), + ast::Statement::Expression, + ), + map( + terminated(block_expression, not(peek(ws(char('}'))))), + ast::Statement::Expression, + ), + map( + terminated( + pair(mem_location, preceded(ws(char('=')), expression)), + ws(char(';')), + ), + |(mem_location, value)| ast::Statement::Poke { + mem_location, + value, + }, + ), + ))(s) +} + +fn local_var(s: &str) -> IResult { + let (s, _) = ws(tag("let"))(s)?; + let (s, position) = ws(position)(s)?; + let (s, name) = identifier(s)?; + let (s, type_) = opt(preceded(ws(char(':')), type_))(s)?; + let (s, value) = opt(preceded(ws(char('=')), expression))(s)?; + let (s, _) = ws(char(';'))(s)?; + + Ok(( + s, + ast::LocalVariable { + position, + name: name, + type_, + value, + }, + )) +} + +fn mem_location(s: &str) -> IResult { + let (s, position) = ws(position)(s)?; + let (s, left) = expression(s)?; + let (s, size) = map(ws(alt((char('?'), char('!')))), |op| match op { + '?' => ast::MemSize::Byte, + '!' => ast::MemSize::Word, + _ => unreachable!(), + })(s)?; + let (s, right) = expression(s)?; + + Ok(( + s, + ast::MemoryLocation { + position, + size, + left, + right, + }, + )) +} + +fn expression(s: &str) -> IResult { + expression_cmp(s) +} + +fn expression_atom(s: &str) -> IResult { + alt(( + branch_if, + block_expression, + map( + separated_pair(pair(ws(position), identifier), ws(tag(":=")), expression), + |((position, name), value)| ast::Expression::LocalTee { + position, + name: name, + value: Box::new(value), + }, + ), + map(integer, |v| ast::Expression::I32Const(v)), + map(ws(pair(position, identifier)), |(position, name)| { + ast::Expression::Variable { + position, + name: name, + } + }), + delimited(ws(char('(')), cut(expression), ws(char(')'))), + ))(s) +} + +fn branch_if(s: &str) -> IResult { + let (s, position) = ws(position)(s)?; + let (s, _) = tag("branch_if")(s)?; + cut(move |s| { + let (s, condition) = expression(s)?; + let (s, _) = ws(char(':'))(s)?; + let (s, label) = identifier(s)?; + + Ok(( + s, + ast::Expression::BranchIf { + position, + condition: Box::new(condition), + label: label, + }, + )) + })(s) +} + +fn expression_product(s: &str) -> IResult { + let (s, mut init) = map(expression_atom, Some)(s)?; + fold_many0( + pair( + ws(pair(position, alt((char('*'), char('/'), char('%'))))), + expression_atom, + ), + move || init.take().unwrap(), + |left, ((position, op), right)| { + let op = match op { + '*' => ast::BinOp::Mul, + '/' => ast::BinOp::Div, + '%' => ast::BinOp::Rem, + _ => unreachable!(), + }; + ast::Expression::BinOp { + position, + op, + left: Box::new(left), + right: Box::new(right), + } + }, + )(s) +} + +fn expression_sum(s: &str) -> IResult { + let (s, mut init) = map(expression_product, Some)(s)?; + fold_many0( + pair( + ws(pair(position, alt((char('+'), char('-'))))), + expression_product, + ), + move || init.take().unwrap(), + |left, ((position, op), right)| { + let op = if op == '+' { + ast::BinOp::Add + } else { + ast::BinOp::Sub + }; + ast::Expression::BinOp { + position, + op, + left: Box::new(left), + right: Box::new(right), + } + }, + )(s) +} + +fn expression_bit(s: &str) -> IResult { + let (s, mut init) = map(expression_sum, Some)(s)?; + fold_many0( + pair( + ws(pair(position, alt((char('&'), char('|'), char('^'))))), + expression_sum, + ), + move || init.take().unwrap(), + |left, ((position, op), right)| { + let op = match op { + '&' => ast::BinOp::And, + '|' => ast::BinOp::Or, + '^' => ast::BinOp::Xor, + _ => unreachable!(), + }; + ast::Expression::BinOp { + position, + op, + left: Box::new(left), + right: Box::new(right), + } + }, + )(s) +} + +fn expression_cmp(s: &str) -> IResult { + let (s, mut init) = map(expression_bit, Some)(s)?; + fold_many0( + pair( + ws(pair( + position, + alt(( + tag("=="), + tag("!="), + tag("<="), + tag("<"), + tag(">="), + tag(">"), + )), + )), + expression_bit, + ), + move || init.take().unwrap(), + |left, ((position, op), right)| { + let op = match op { + "==" => ast::BinOp::Eq, + "!=" => ast::BinOp::Ne, + "<=" => ast::BinOp::Le, + "<" => ast::BinOp::Lt, + ">=" => ast::BinOp::Ge, + ">" => ast::BinOp::Gt, + _ => unreachable!(), + }; + ast::Expression::BinOp { + position, + op, + left: Box::new(left), + right: Box::new(right), + } + }, + )(s) +} + +fn block_expression(s: &str) -> IResult { + loop_(s) +} + +fn loop_(s: &str) -> IResult { + let (s, position) = ws(position)(s)?; + let (s, _) = tag("loop")(s)?; + cut(move |s| { + let (s, label) = identifier(s)?; + let (s, block) = block(s)?; + + Ok(( + s, + ast::Expression::Loop { + position, + label: label, + block: Box::new(block), + }, + )) + })(s) +} + +fn integer(s: &str) -> IResult { + ws(map_res( + recognize(pair(opt(char('-')), digit1)), + |n: &str| n.parse::(), + ))(s) +} + +fn visibility(s: &str) -> IResult { + ws(alt(( + value(ast::Visibility::Export, tag("export")), + value(ast::Visibility::Import, tag("import")), + success(ast::Visibility::Local), + )))(s) +} + +fn type_(s: &str) -> IResult { + ws(alt(( + value(ast::Type::I32, tag("i32")), + value(ast::Type::I64, tag("i64")), + value(ast::Type::F32, tag("f32")), + value(ast::Type::F64, tag("f64")), + )))(s) +} + +fn identifier(s: &str) -> IResult<&str> { + ws(recognize(pair( + alt((alpha1, tag("_"))), + many0(alt((alphanumeric1, tag("_")))), + )))(s) +} + +fn position(s: &str) -> IResult { + Ok((s, ast::Position(s.len()))) +} + +fn ws<'a, F: 'a, O>(inner: F) -> impl FnMut(&'a str) -> IResult +where + F: FnMut(&'a str) -> IResult<'a, O>, +{ + preceded(multispace0, inner) +} + +#[cfg(test)] +mod test { + use nom::combinator::all_consuming; + + #[test] + fn identifier() { + all_consuming(super::identifier)("_froobaz123").unwrap(); + } + + #[test] + fn type_() { + all_consuming(super::type_)("i32").unwrap(); + all_consuming(super::type_)("i64").unwrap(); + all_consuming(super::type_)("f32").unwrap(); + all_consuming(super::type_)("f64").unwrap(); + } + + #[test] + fn integer() { + all_consuming(super::integer)("123").unwrap(); + all_consuming(super::integer)("-123").unwrap(); + } + + #[test] + fn local_var() { + all_consuming(super::local_var)("let foo: i32;").unwrap(); + all_consuming(super::local_var)("let bar = 42;").unwrap(); + } + + #[test] + fn function() { + all_consuming(super::function)("export fn foo(a: i32, b: f32) -> i32 { let x = 42; x }") + .unwrap(); + } + + #[test] + fn loop_() { + all_consuming(super::loop_)("loop foo { 42 }").unwrap(); + all_consuming(super::loop_)("loop foo { i?64 = (i % 320 + time / 10) ^ (i / 320); }") + .unwrap(); + } + + #[test] + fn block() { + all_consuming(super::block)("{loop frame {}}").unwrap(); + } + + #[test] + fn expression() { + all_consuming(super::expression)("foo + 2 * (bar ^ 23)").unwrap(); + all_consuming(super::expression)("i := i + 1").unwrap(); + all_consuming(super::expression)("(i := i + 1)").unwrap(); + } + + #[test] + fn poke() { + all_consuming(super::statement)("i?64 = (i % 320 + time / 10) ^ (i / 320);").unwrap(); + } + + #[test] + fn branch_if() { + all_consuming(super::branch_if)("branch_if (i := i + 1) < 10: foo").unwrap(); + } +} diff --git a/test.hw b/test.hw new file mode 100644 index 0000000..bf6bb62 --- /dev/null +++ b/test.hw @@ -0,0 +1,9 @@ +import time: i32; + +export fn tic() { + let i = 0; + loop frame { + i?64 = (i % 320 + time / 10) ^ (i / 320); + branch_if (i := i + 1) < 320*256: frame; + } +}