From 4cfc7ae8a80db58160e6089e4daf4c56f247c919 Mon Sep 17 00:00:00 2001 From: Dennis Ranke Date: Fri, 6 May 2022 01:23:31 +0200 Subject: [PATCH] add support for escaped strings + (multi-)char literatls --- examples/microw8/print.cwa | 15 ++++++++ src/parser.rs | 73 +++++++++++++++++++++++++++++++++++--- 2 files changed, 84 insertions(+), 4 deletions(-) create mode 100644 examples/microw8/print.cwa diff --git a/examples/microw8/print.cwa b/examples/microw8/print.cwa new file mode 100644 index 0000000..7af0633 --- /dev/null +++ b/examples/microw8/print.cwa @@ -0,0 +1,15 @@ +import "env.memory" memory(4); +import "env.printString" fn printString(i32); +import "env.printChar" fn printChar(i32); + +export fn upd() { + printChar(12); + printChar('Test'); + printChar('\x1f\x10\x10'); + printChar('abc\n'); + printString(0); +} + +data 0 { + "\x0e\x64\"Colors!!!\"\x0e\1\0" +} \ No newline at end of file diff --git a/src/parser.rs b/src/parser.rs index 548d380..50fb894 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -288,11 +288,32 @@ fn lexer() -> impl Parser, Error = LexerError> { .map_err(|err| LexerError::custom(span, err.to_string())) }); - let str_ = just('"') - .ignore_then(filter(|c| *c != '"').repeated()) - .then_ignore(just('"')) + let str_ = just('\\') + .then(any()) + .map(|t| vec![t.0, t.1]) + .or(none_of("\"").map(|c| vec![c])) + .repeated() + .flatten() + .delimited_by(just('"'), just('"')) .collect::() - .map(Token::Str); + .map(|s| Token::Str(parse_string_escapes(s))); + + let char_ = just('\\') + .then(any()) + .map(|t| vec![t.0, t.1]) + .or(none_of("\'").map(|c| vec![c])) + .repeated() + .flatten() + .delimited_by(just('\''), just('\'')) + .collect::() + .map(|s| { + let s = parse_string_escapes(s); + let mut value = 0; + for (i, c) in s.chars().enumerate() { + value |= (c as u32) << (i * 8); + } + Token::Int(value as i32) + }); let op = one_of("+-*/%&^|<=>#") .repeated() @@ -344,6 +365,7 @@ fn lexer() -> impl Parser, Error = LexerError> { .or(int_float) .or(int) .or(str_) + .or(char_) .or(op) .or(ctrl) .or(ident) @@ -354,6 +376,7 @@ fn lexer() -> impl Parser, Error = LexerError> { .padded() .padded_by(comment.padded().repeated()) .repeated() + .boxed() } fn map_token( @@ -372,6 +395,48 @@ fn map_token( }) } +fn parse_string_escapes(s: String) -> String { + let mut result = String::new(); + let mut chars = s.chars(); + while let Some(c) = chars.next() { + if c != '\\' { + result.push(c); + } else if let Some(c) = chars.next() { + match c { + '0' => result.push('\0'), + 'n' => result.push('\n'), + 'r' => result.push('\r'), + 't' => result.push('\t'), + 'x' => { + if let Some(high) = chars.next() { + if let Some(low) = chars.next() { + if let Ok(c) = u8::from_str_radix(&format!("{}{}", high, low), 16) { + result.push(c as char); + } else { + result.push('\\'); + result.push('x'); + result.push(high); + result.push(low); + } + } else { + result.push('\\'); + result.push('x'); + result.push(high); + } + } else { + result.push('\\'); + result.push('x'); + } + } + other => result.push(other), + } + } else { + result.push('\\'); + } + } + result +} + type ScriptError = Simple; fn script_parser() -> impl Parser + Clone { let identifier = filter_map(|span, tok| match tok {