diff --git a/Cargo.lock b/Cargo.lock index 7d62d00..dca4237 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7,8 +7,16 @@ name = "hwas" version = "0.1.0" dependencies = [ "nom", + "wasm-encoder", + "wasmparser", ] +[[package]] +name = "leb128" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" + [[package]] name = "memchr" version = "2.4.1" @@ -37,3 +45,18 @@ name = "version_check" version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" + +[[package]] +name = "wasm-encoder" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db0c351632e46cc06a58a696a6c11e4cf90cad4b9f8f07a0b59128d616c29bb0" +dependencies = [ + "leb128", +] + +[[package]] +name = "wasmparser" +version = "0.81.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98930446519f63d00a836efdc22f67766ceae8dbcc1571379f2bcabc6b2b9abc" diff --git a/Cargo.toml b/Cargo.toml index 32379b7..e7ffaac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,3 +7,5 @@ edition = "2021" [dependencies] nom = "7" +wasmparser = "0.81" +wasm-encoder = "0.8" \ No newline at end of file diff --git a/src/ast.rs b/src/ast.rs index cbe2cee..5f6f84b 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -3,20 +3,35 @@ pub struct Position(pub usize); #[derive(Debug)] pub struct Script<'a> { + pub imports: Vec>, pub global_vars: Vec>, pub functions: Vec> } #[derive(Debug)] pub enum TopLevelItem<'a> { + Import(Import<'a>), GlobalVar(GlobalVar<'a>), Function(Function<'a>), } +#[derive(Debug)] +pub struct Import<'a> { + pub position: Position, + pub import: &'a str, + pub type_: ImportType<'a> +} + +#[derive(Debug)] +pub enum ImportType<'a> { + Memory(u32), + Variable {name: &'a str, type_: Type}, + // Function { name: &'a str, params: Vec, result: Option } +} + #[derive(Debug)] pub struct GlobalVar<'a> { pub position: Position, - pub visibility: Visibility, pub name: &'a str, pub type_: Type, } @@ -24,7 +39,7 @@ pub struct GlobalVar<'a> { #[derive(Debug)] pub struct Function<'a> { pub position: Position, - pub visibility: Visibility, + pub export: bool, pub name: &'a str, pub params: Vec<(&'a str, Type)>, pub type_: Option, @@ -37,6 +52,12 @@ pub struct Block<'a> { pub final_expression: Option>, } +impl<'a> Block<'a> { + pub fn type_(&self) -> Option { + self.final_expression.as_ref().and_then(|e| e.type_) + } +} + #[derive(Debug)] pub enum Statement<'a> { LocalVariable(LocalVariable<'a>), @@ -130,14 +151,7 @@ pub enum MemSize { Word, } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Visibility { - Local, - Export, - Import, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] pub enum Type { I32, I64, diff --git a/src/constfold.rs b/src/constfold.rs index a1e89a9..2a2756b 100644 --- a/src/constfold.rs +++ b/src/constfold.rs @@ -46,7 +46,6 @@ fn fold_expr(expr: &mut ast::Expression) { } => { fold_expr(left); fold_expr(right); - dbg!(&left.expr, &right.expr); match (&left.expr, &right.expr) { (&ast::Expr::I32Const(left), &ast::Expr::I32Const(right)) => { let result = match op { diff --git a/src/emit.rs b/src/emit.rs new file mode 100644 index 0000000..fe2be25 --- /dev/null +++ b/src/emit.rs @@ -0,0 +1,314 @@ +use std::collections::HashMap; + +use wasm_encoder::{ + BlockType, CodeSection, EntityType, Export, ExportSection, Function, FunctionSection, + GlobalType, ImportSection, Instruction, MemArg, MemoryType, Module, TypeSection, ValType, +}; + +use crate::ast; + +pub fn emit(script: &ast::Script) -> Vec { + let mut module = Module::new(); + + let function_types = collect_function_types(script); + { + let mut types = TypeSection::new(); + let mut type_vec: Vec<_> = function_types.iter().map(|(k, v)| (*v, k)).collect(); + type_vec.sort(); + for (_, (params, result)) in type_vec { + let params: Vec<_> = params.iter().cloned().map(map_type).collect(); + let result: Vec<_> = result.iter().cloned().map(map_type).collect(); + types.function(params, result); + } + module.section(&types); + } + + let mut globals: HashMap<&str, u32> = HashMap::new(); + + { + let mut imports = ImportSection::new(); + + for import in &script.imports { + let (module, name) = if let Some(dot_index) = import.import.find('.') { + ( + &import.import[..dot_index], + Some(&import.import[(dot_index + 1)..]), + ) + } else { + (import.import, None) + }; + let type_: EntityType = match import.type_ { + ast::ImportType::Memory(min_size) => MemoryType { + minimum: min_size as u64, + maximum: None, + memory64: false, + } + .into(), + ast::ImportType::Variable { type_, name } => { + globals.insert(name, globals.len() as u32); + GlobalType { + val_type: map_type(type_), + mutable: false, + } + .into() + } + }; + imports.import(module, name, type_); + } + + module.section(&imports); + } + + { + let mut functions = FunctionSection::new(); + let mut exports = ExportSection::new(); + let mut code = CodeSection::new(); + + for (index, func) in script.functions.iter().enumerate() { + let type_ = *function_types.get(&function_type_key(func)).unwrap(); + functions.function(type_ as u32); + if func.export { + exports.export(func.name, Export::Function(index as u32)); + } + + code.function(&emit_function(func, &globals)); + } + + module.section(&functions); + module.section(&exports); + module.section(&code); + } + + module.finish() +} + +type FunctionTypeKey = (Vec, Option); + +fn collect_function_types(script: &ast::Script) -> HashMap { + let mut types: HashMap = HashMap::new(); + + for func in &script.functions { + let index = types.len(); + types + .entry(function_type_key(func)) + .or_insert_with(|| index); + } + + types +} + +fn function_type_key(func: &ast::Function) -> FunctionTypeKey { + let param_types: Vec<_> = func.params.iter().map(|(_, type_)| *type_).collect(); + (param_types, func.type_) +} + +struct FunctionContext<'a> { + function: &'a mut Function, + globals: &'a HashMap<&'a str, u32>, + locals: &'a HashMap<&'a str, u32>, + labels: Vec, +} + +fn emit_function(func: &ast::Function, globals: &HashMap<&str, u32>) -> Function { + let mut locals = Vec::new(); + collect_locals(&func.body, &mut locals); + locals.sort_by_key(|(_, t)| *t); + + let mut function = Function::new_with_locals_types(locals.iter().map(|(_, t)| map_type(*t))); + + let locals: HashMap<&str, u32> = locals + .into_iter() + .enumerate() + .map(|(index, (name, _))| (name, index as u32)) + .collect(); + + let mut context = FunctionContext { + function: &mut function, + globals, + locals: &locals, + labels: vec![], + }; + + emit_block(&mut context, &func.body); + if func.type_.is_none() && func.body.type_().is_some() { + function.instruction(&Instruction::Drop); + } + function.instruction(&Instruction::End); + + function +} + +fn collect_locals<'a>(block: &ast::Block<'a>, locals: &mut Vec<(&'a str, ast::Type)>) { + for stmt in &block.statements { + match stmt { + ast::Statement::LocalVariable(v) => { + locals.push((v.name, v.type_.unwrap())); + if let Some(ref value) = v.value { + collect_locals_expr(value, locals); + } + } + ast::Statement::Expression(e) => collect_locals_expr(e, locals), + ast::Statement::Poke { + mem_location, + value, + .. + } => { + collect_locals_expr(&mem_location.left, locals); + collect_locals_expr(value, locals); + } + } + } + if let Some(ref expr) = block.final_expression { + collect_locals_expr(expr, locals); + } +} + +fn collect_locals_expr<'a>(expr: &ast::Expression<'a>, locals: &mut Vec<(&'a str, ast::Type)>) { + match &expr.expr { + ast::Expr::Variable { .. } | ast::Expr::I32Const(_) => (), + ast::Expr::BinOp { left, right, .. } => { + collect_locals_expr(left, locals); + collect_locals_expr(right, locals); + } + ast::Expr::BranchIf { condition, .. } => collect_locals_expr(condition, locals), + ast::Expr::LocalTee { value, .. } => collect_locals_expr(value, locals), + ast::Expr::Loop { block, .. } => collect_locals(block, locals), + } +} + +fn emit_block(ctx: &mut FunctionContext, block: &ast::Block) { + for stmt in &block.statements { + match stmt { + ast::Statement::Expression(e) => { + emit_expression(ctx, e); + if e.type_.is_some() { + ctx.function.instruction(&Instruction::Drop); + } + } + ast::Statement::LocalVariable(v) => { + if let Some(ref val) = v.value { + emit_expression(ctx, val); + ctx.function + .instruction(&Instruction::LocalSet(*ctx.locals.get(v.name).unwrap())); + } + } + ast::Statement::Poke { + mem_location, + value, + .. + } => { + emit_expression(ctx, value); + emit_expression(ctx, &mem_location.left); + let offset = if let ast::Expr::I32Const(v) = mem_location.right.expr { + v as u32 as u64 + } else { + unreachable!() + }; + ctx.function.instruction(&match mem_location.size { + ast::MemSize::Byte => Instruction::I32Store8(MemArg { + align: 0, + memory_index: 0, + offset, + }), + ast::MemSize::Word => Instruction::I32Store(MemArg { + align: 2, + memory_index: 0, + offset, + }), + }); + } + } + } + if let Some(ref expr) = block.final_expression { + emit_expression(ctx, expr); + } +} + +fn emit_expression(ctx: &mut FunctionContext, expr: &ast::Expression) { + match &expr.expr { + ast::Expr::BinOp { + left, op, right, .. + } => { + emit_expression(ctx, left); + emit_expression(ctx, right); + use ast::BinOp::*; + use ast::Type::*; + ctx.function.instruction(&match (left.type_.unwrap(), op) { + (I32, Add) => Instruction::I32Add, + (I32, Sub) => Instruction::I32Sub, + (I32, Mul) => Instruction::I32Mul, + (I32, Div) => Instruction::I32DivS, + (I32, Rem) => Instruction::I32RemS, + (I32, And) => Instruction::I32And, + (I32, Or) => Instruction::I32Or, + (I32, Xor) => Instruction::I32Xor, + (I32, Eq) => Instruction::I32Eq, + (I32, Ne) => Instruction::I32Neq, + (I32, Lt) => Instruction::I32LtS, + (I32, Le) => Instruction::I32LeS, + (I32, Gt) => Instruction::I32GtS, + (I32, Ge) => Instruction::I32GeS, + + (I64, _) => todo!(), + (F32, _) => todo!(), + (F64, _) => todo!(), + }); + } + ast::Expr::BranchIf { + condition, label, .. + } => { + emit_expression(ctx, condition); + let depth = ctx + .labels + .iter() + .rev() + .enumerate() + .find(|(_, l)| l == label) + .unwrap() + .0; + ctx.function.instruction(&Instruction::BrIf(depth as u32)); + } + ast::Expr::I32Const(v) => { + ctx.function.instruction(&Instruction::I32Const(*v)); + } + ast::Expr::LocalTee { name, value, .. } => { + emit_expression(ctx, value); + let index = ctx.locals.get(*name).unwrap(); + ctx.function.instruction(&Instruction::LocalTee(*index)); + } + ast::Expr::Loop { label, block, .. } => { + ctx.labels.push(label.to_string()); + ctx.function + .instruction(&Instruction::Loop(map_block_type(block.type_()))); + emit_block(ctx, block); + ctx.labels.pop(); + ctx.function.instruction(&Instruction::End); + } + ast::Expr::Variable { name, .. } => { + if let Some(index) = ctx.locals.get(*name) { + ctx.function.instruction(&Instruction::LocalGet(*index)); + } else if let Some(index) = ctx.globals.get(*name) { + ctx.function.instruction(&Instruction::GlobalGet(*index)); + } else { + unreachable!() + } + } + } +} + +fn map_type(t: ast::Type) -> ValType { + match t { + ast::Type::I32 => ValType::I32, + ast::Type::I64 => ValType::I64, + ast::Type::F32 => ValType::F32, + ast::Type::F64 => ValType::F64, + } +} + +fn map_block_type(t: Option) -> BlockType { + if let Some(t) = t { + BlockType::Result(map_type(t)) + } else { + BlockType::Empty + } +} diff --git a/src/main.rs b/src/main.rs index de0a3c6..4250b06 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,12 @@ +use std::fs::File; +use std::io::prelude::*; + mod parser; mod ast; mod typecheck; mod constfold; +mod emit; + fn main() { let input = include_str!("../test.hw"); @@ -10,7 +15,9 @@ fn main() { Ok(mut script) => { constfold::fold_script(&mut script); typecheck::tc_script(&mut script).unwrap(); - dbg!(script); + let wasm = emit::emit(&script); + let mut file = File::create("test.wasm").unwrap(); + file.write_all(&wasm).unwrap(); }, Err(err) => println!("error: {}", nom::error::convert_error(input, err)) } diff --git a/src/parser.rs b/src/parser.rs index f3f349d..15401fd 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,10 +2,10 @@ use crate::ast; use nom::{ branch::alt, bytes::complete::tag, - character::complete::{alpha1, alphanumeric1, char, digit1, multispace0}, - combinator::{self, cut, map, map_res, not, opt, peek, recognize, success, value}, + character::complete::{alpha1, alphanumeric1, char, digit1, multispace0, none_of}, + combinator::{self, cut, map, map_res, not, opt, peek, recognize, value}, error::VerboseError, - multi::{fold_many0, many0, separated_list0}, + multi::{fold_many0, many0, many1, separated_list0}, sequence::{delimited, pair, preceded, separated_pair, terminated}, Finish, }; @@ -19,10 +19,12 @@ pub fn parse(s: &str) -> Result> { fn script(s: &str) -> IResult { let (s, items) = many0(top_level_item)(s)?; + let mut imports = vec![]; let mut global_vars = vec![]; let mut functions = vec![]; for item in items { match item { + ast::TopLevelItem::Import(i) => imports.push(i), ast::TopLevelItem::GlobalVar(v) => global_vars.push(v), ast::TopLevelItem::Function(f) => functions.push(f), } @@ -30,6 +32,7 @@ fn script(s: &str) -> IResult { Ok(( s, ast::Script { + imports, global_vars, functions, }, @@ -38,13 +41,50 @@ fn script(s: &str) -> IResult { fn top_level_item(s: &str) -> IResult { alt(( + map(import, ast::TopLevelItem::Import), map(function, ast::TopLevelItem::Function), map(global_var, ast::TopLevelItem::GlobalVar), ))(s) } +fn import(s: &str) -> IResult { + let (s, position) = ws(position)(s)?; + let (s, _) = tag("import")(s)?; + let (s, import) = ws(delimited( + char('"'), + recognize(many1(none_of("\""))), + char('"'), + ))(s)?; + let (s, type_) = alt(( + map_res( + preceded( + ws(tag("memory")), + delimited(ws(char('(')), ws(digit1), ws(char(')'))), + ), + |num| num.parse().map(ast::ImportType::Memory), + ), + map( + preceded( + ws(tag("global")), + pair(identifier, preceded(ws(char(':')), type_)), + ), + |(name, type_)| ast::ImportType::Variable { name, type_ }, + ), + ))(s)?; + let (s, _) = ws(char(';'))(s)?; + + Ok(( + s, + ast::Import { + position, + import, + type_, + }, + )) +} + fn global_var(s: &str) -> IResult { - let (s, vis) = visibility(s)?; + let (s, _) = ws(tag("global"))(s)?; let (s, position) = ws(position)(s)?; let (s, name) = identifier(s)?; let (s, type_) = preceded(ws(char(':')), type_)(s)?; @@ -54,7 +94,6 @@ fn global_var(s: &str) -> IResult { s, ast::GlobalVar { position, - visibility: vis, name: name, type_, }, @@ -62,7 +101,7 @@ fn global_var(s: &str) -> IResult { } fn function(s: &str) -> IResult { - let (s, vis) = visibility(s)?; + let (s, export) = map(ws(opt(tag("export"))), |e| e.is_some())(s)?; let (s, _) = ws(tag("fn"))(s)?; cut(move |s| { let (s, position) = ws(position)(s)?; @@ -82,7 +121,7 @@ fn function(s: &str) -> IResult { s, ast::Function { position, - visibility: vis, + export, name: name, params, type_, @@ -361,14 +400,6 @@ fn integer(s: &str) -> IResult { ))(s) } -fn visibility(s: &str) -> IResult { - ws(alt(( - value(ast::Visibility::Export, tag("export")), - value(ast::Visibility::Import, tag("import")), - success(ast::Visibility::Local), - )))(s) -} - fn type_(s: &str) -> IResult { ws(alt(( value(ast::Type::I32, tag("i32")), diff --git a/src/typecheck.rs b/src/typecheck.rs index 2ef6f42..b9d979e 100644 --- a/src/typecheck.rs +++ b/src/typecheck.rs @@ -19,6 +19,22 @@ pub fn tc_script(script: &mut ast::Script) -> Result<()> { local_vars: HashMap::new(), }; + for import in &script.imports { + match import.type_ { + ast::ImportType::Variable { name, type_ } => { + if context.global_vars.contains_key(name) { + return Err(Error { + position: import.position, + message: "Duplicate global variable".into(), + }); + } + context.global_vars.insert(name, type_); + } + // ast::ImportType::Function { .. } => todo!(), + ast::ImportType::Memory( .. ) => () + } + } + for v in &script.global_vars { if context.global_vars.contains_key(v.name) { return Err(Error { diff --git a/test.hw b/test.hw index bf6bb62..e8ebf03 100644 --- a/test.hw +++ b/test.hw @@ -1,4 +1,5 @@ -import time: i32; +import "uw8.ram" memory(2); +import "uw8.time" global time: i32; export fn tic() { let i = 0;