From 8fddfc639f5d0a4d09312b18d9374a9521389837 Mon Sep 17 00:00:00 2001 From: gluax Date: Wed, 23 Feb 2022 15:56:30 -0800 Subject: [PATCH 1/4] lexer errors so far --- errors/src/parser/parser_errors.rs | 80 +++++++++++++++++++ parser/src/tokenizer/lexer.rs | 123 +++++++++++++++-------------- parser/src/tokenizer/mod.rs | 63 ++++++++------- parser/src/tokenizer/token.rs | 2 + 4 files changed, 177 insertions(+), 91 deletions(-) diff --git a/errors/src/parser/parser_errors.rs b/errors/src/parser/parser_errors.rs index 17757a94ba..9d860947da 100644 --- a/errors/src/parser/parser_errors.rs +++ b/errors/src/parser/parser_errors.rs @@ -223,4 +223,84 @@ create_errors!( msg: "Array dimensions specified as a tuple cannot be empty.", help: None, } + + /// When an empty input tendril was expected but not found. + @backtraced + lexer_empty_input_tendril { + args: (), + msg: "", + help: None, + } + + /// When an integer is started with a leading zero. + @backtraced + lexer_eat_integer_leading_zero { + args: (), + msg: "", + help: None, + } + + /// When an integer is started with a leading zero. + @backtraced + lexer_expected_valid_escaped_char { + args: (), + msg: "", + help: None, + } + + /// When a string is not properly closed. + @backtraced + lexer_string_not_closed { + args: (), + msg: "", + help: None, + } + + /// When an illegal escaped character is provided. + @backtraced + lexer_invalid_escaped_char { + args: (), + msg: "", + help: None, + } + + /// When a string is not properly closed. + @backtraced + lexer_char_not_closed { + args: (), + msg: "", + help: None, + } + + /// When a string is not properly closed. + @backtraced + lexer_invalid_char { + args: (), + msg: "", + help: None, + } + + /// When a block comment is empty. + @backtraced + lexer_empty_block_comment { + args: (), + msg: "", + help: None, + } + + /// When a block comment is not closed before end of file. + @backtraced + lexer_block_comment_does_not_close_before_eof { + args: (), + msg: "", + help: None, + } + + /// When the lexer could not lex some text. + @backtraced + could_not_lex { + args: (), + msg: "", + help: None, + } ); diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index 37e899a03f..d693f65843 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -15,6 +15,7 @@ // along with the Leo library. If not, see . use crate::tokenizer::{Char, Token}; +use leo_errors::{Result, ParserError}; use leo_span::{Span, Symbol}; use serde::{Deserialize, Serialize}; @@ -147,13 +148,13 @@ impl Token { /// Returns a tuple: [(integer length, integer token)] if an integer can be eaten, otherwise returns [`None`]. /// An integer can be eaten if its bytes are at the front of the given `input_tendril` string. /// - fn eat_integer(input_tendril: &StrTendril) -> (usize, Option) { + fn eat_integer(input_tendril: &StrTendril) -> Result<(usize, Token)> { if input_tendril.is_empty() { - return (0, None); + return Err(ParserError::lexer_empty_input_tendril().into()); } let input = input_tendril[..].as_bytes(); if !input[0].is_ascii_digit() { - return (0, None); + return Err(ParserError::lexer_eat_integer_leading_zero().into()); } let mut i = 1; let mut is_hex = false; @@ -173,7 +174,7 @@ impl Token { i += 1; } - (i, Some(Token::Int(input_tendril.subtendril(0, i as u32)))) + Ok((i, Token::Int(input_tendril.subtendril(0, i as u32)))) } /// Returns the number of bytes in an emoji via a bit mask. @@ -197,13 +198,13 @@ impl Token { /// Returns a tuple: [(token length, token)] if the next token can be eaten, otherwise returns [`None`]. /// The next token can be eaten if the bytes at the front of the given `input_tendril` string can be scanned into a token. /// - pub(crate) fn eat(input_tendril: StrTendril) -> (usize, Option) { + pub(crate) fn eat(input_tendril: StrTendril) -> Result<(usize, Token)> { if input_tendril.is_empty() { - return (0, None); + return Err(ParserError::lexer_empty_input_tendril().into()); } let input = input_tendril[..].as_bytes(); match input[0] { - x if x.is_ascii_whitespace() => return (1, None), + x if x.is_ascii_whitespace() => return Ok((1, Token::WhiteSpace)), b'"' => { let mut i = 1; let mut len: u8 = 1; @@ -270,7 +271,7 @@ impl Token { unicode = false; string.push(character.into()); } - None => return (0, None), + None => return Err(ParserError::lexer_expected_valid_escaped_char().into()), } } @@ -282,10 +283,10 @@ impl Token { } if i == input.len() || !end { - return (0, None); + return Err(ParserError::lexer_string_not_closed().into()); } - return (i + 1, Some(Token::StringLit(string))); + return Ok((i + 1, Token::StringLit(string))); } b'\'' => { let mut i = 1; @@ -310,7 +311,7 @@ impl Token { if input[i + 1] == b'{' { unicode = true; } else { - return (0, None); + return Err(ParserError::lexer_invalid_escaped_char().into()); } } else { escaped = true; @@ -323,12 +324,12 @@ impl Token { } if !end { - return (0, None); + return Err(ParserError::lexer_string_not_closed().into()); } return match Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode) { - Some(character) => (i + 1, Some(Token::CharLit(character))), - None => (0, None), + Some(character) => Ok((i + 1, Token::CharLit(character))), + None => Err(ParserError::lexer_invalid_char().into()), }; } x if x.is_ascii_digit() => { @@ -336,119 +337,119 @@ impl Token { } b'!' => { if let Some(len) = eat(input, "!=") { - return (len, Some(Token::NotEq)); + return Ok((len, Token::NotEq)); } - return (1, Some(Token::Not)); + return Ok((1, Token::Not)); } b'?' => { - return (1, Some(Token::Question)); + return Ok((1, Token::Question)); } b'&' => { if let Some(len) = eat(input, "&&") { - return (len, Some(Token::And)); + return Ok((len, Token::And)); } - return (1, Some(Token::Ampersand)); + return Ok((1, Token::Ampersand)); } - b'(' => return (1, Some(Token::LeftParen)), - b')' => return (1, Some(Token::RightParen)), - b'_' => return (1, Some(Token::Underscore)), + b'(' => return Ok((1, Token::LeftParen)), + b')' => return Ok((1, Token::RightParen)), + b'_' => return Ok((1, Token::Underscore)), b'*' => { if let Some(len) = eat(input, "**") { if let Some(inner_len) = eat(&input[len..], "=") { - return (len + inner_len, Some(Token::ExpEq)); + return Ok((len + inner_len, Token::ExpEq)); } - return (len, Some(Token::Exp)); + return Ok((len, Token::Exp)); } else if let Some(len) = eat(input, "*=") { - return (len, Some(Token::MulEq)); + return Ok((len, Token::MulEq)); } - return (1, Some(Token::Mul)); + return Ok((1, Token::Mul)); } b'+' => { if let Some(len) = eat(input, "+=") { - return (len, Some(Token::AddEq)); + return Ok((len, Token::AddEq)); } - return (1, Some(Token::Add)); + return Ok((1, Token::Add)); } - b',' => return (1, Some(Token::Comma)), + b',' => return Ok((1, Token::Comma)), b'-' => { if let Some(len) = eat(input, "->") { - return (len, Some(Token::Arrow)); + return Ok((len, Token::Arrow)); } else if let Some(len) = eat(input, "-=") { - return (len, Some(Token::MinusEq)); + return Ok((len, Token::MinusEq)); } - return (1, Some(Token::Minus)); + return Ok((1, Token::Minus)); } b'.' => { if let Some(len) = eat(input, "...") { - return (len, Some(Token::DotDotDot)); + return Ok((len, Token::DotDotDot)); } else if let Some(len) = eat(input, "..") { - return (len, Some(Token::DotDot)); + return Ok((len, Token::DotDot)); } - return (1, Some(Token::Dot)); + return Ok((1, Token::Dot)); } b'/' => { if eat(input, "//").is_some() { let eol = input.iter().position(|x| *x == b'\n'); let len = if let Some(eol) = eol { eol + 1 } else { input.len() }; - return (len, Some(Token::CommentLine(input_tendril.subtendril(0, len as u32)))); + return Ok((len, Token::CommentLine(input_tendril.subtendril(0, len as u32)))); } else if eat(input, "/*").is_some() { if input.is_empty() { - return (0, None); + return Err(ParserError::lexer_empty_block_comment().into()); } let eol = input.windows(2).skip(2).position(|x| x[0] == b'*' && x[1] == b'/'); let len = if let Some(eol) = eol { eol + 4 } else { - return (0, None); + return Err(ParserError::lexer_block_comment_does_not_close_before_eof().into()); }; - return (len, Some(Token::CommentBlock(input_tendril.subtendril(0, len as u32)))); + return Ok((len, Token::CommentBlock(input_tendril.subtendril(0, len as u32)))); } else if let Some(len) = eat(input, "/=") { - return (len, Some(Token::DivEq)); + return Ok((len, Token::DivEq)); } - return (1, Some(Token::Div)); + return Ok((1, Token::Div)); } b':' => { if let Some(len) = eat(input, "::") { - return (len, Some(Token::DoubleColon)); + return Ok((len, Token::DoubleColon)); } else { - return (1, Some(Token::Colon)); + return Ok((1, Token::Colon)); } } - b';' => return (1, Some(Token::Semicolon)), + b';' => return Ok((1, Token::Semicolon)), b'<' => { if let Some(len) = eat(input, "<=") { - return (len, Some(Token::LtEq)); + return Ok((len, Token::LtEq)); } - return (1, Some(Token::Lt)); + return Ok((1, Token::Lt)); } b'>' => { if let Some(len) = eat(input, ">=") { - return (len, Some(Token::GtEq)); + return Ok((len, Token::GtEq)); } - return (1, Some(Token::Gt)); + return Ok((1, Token::Gt)); } b'=' => { if let Some(len) = eat(input, "==") { - return (len, Some(Token::Eq)); + return Ok((len, Token::Eq)); } - return (1, Some(Token::Assign)); + return Ok((1, Token::Assign)); } - b'@' => return (1, Some(Token::At)), - b'[' => return (1, Some(Token::LeftSquare)), - b']' => return (1, Some(Token::RightSquare)), - b'{' => return (1, Some(Token::LeftCurly)), - b'}' => return (1, Some(Token::RightCurly)), + b'@' => return Ok((1, Token::At)), + b'[' => return Ok((1, Token::LeftSquare)), + b']' => return Ok((1, Token::RightSquare)), + b'{' => return Ok((1, Token::LeftCurly)), + b'}' => return Ok((1, Token::RightCurly)), b'|' => { if let Some(len) = eat(input, "||") { - return (len, Some(Token::Or)); + return Ok((len, Token::Or)); } } _ => (), } if let Some(ident) = eat_identifier(&input_tendril) { - return ( + return Ok(( ident.len(), - Some(match &*ident { + match &*ident { x if x.starts_with("aleo1") => Token::AddressLit(ident), "address" => Token::Address, "as" => Token::As, @@ -486,11 +487,11 @@ impl Token { "u64" => Token::U64, "u128" => Token::U128, _ => Token::Ident(Symbol::intern(&ident)), - }), - ); + }, + )); } - (0, None) + Err(ParserError::could_not_lex().into()) } } diff --git a/parser/src/tokenizer/mod.rs b/parser/src/tokenizer/mod.rs index 52fded2834..4b6d16b85e 100644 --- a/parser/src/tokenizer/mod.rs +++ b/parser/src/tokenizer/mod.rs @@ -28,21 +28,42 @@ pub(crate) use self::token::*; pub(crate) mod lexer; pub(crate) use self::lexer::*; -use leo_errors::{LeoError, ParserError}; +use leo_errors:: {ParserError, Result}; use leo_span::Span; use tendril::StrTendril; /// Creates a new vector of spanned tokens from a given file path and source code text. -pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result, LeoError> { +pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result> { let path = Arc::new(path.to_string()); let mut tokens = vec![]; let mut index = 0usize; let mut line_no = 1usize; let mut line_start = 0usize; while input.len() > index { - match Token::eat(input.subtendril(index as u32, (input.len() - index) as u32)) { - (token_len, Some(token)) => { + match Token::eat(input.subtendril(index as u32, (input.len() - index) as u32))? { + (token_len, Token::WhiteSpace) => { + if token_len == 0 && index == input.len() { + break; + } else if token_len == 0 { + return Err(ParserError::unexpected_token( + &input[index..].chars().next().unwrap(), + &Span::new( + line_no, + line_no, + index - line_start + 1, + index - line_start + 2, + path, + input.subtendril( + line_start as u32, + input[line_start..].find('\n').unwrap_or_else(|| input.len()) as u32, + ), + ), + ) + .into()); + } + } + (token_len, token) => { let mut span = Span::new( line_no, line_no, @@ -79,32 +100,14 @@ pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result { - if token_len == 0 && index == input.len() { - break; - } else if token_len == 0 { - return Err(ParserError::unexpected_token( - &input[index..].chars().next().unwrap(), - &Span::new( - line_no, - line_no, - index - line_start + 1, - index - line_start + 2, - path, - input.subtendril( - line_start as u32, - input[line_start..].find('\n').unwrap_or_else(|| input.len()) as u32, - ), - ), - ) - .into()); - } - if input.as_bytes()[index] == b'\n' { - line_no += 1; - line_start = index + token_len; - } - index += token_len; - } + // (token_len, None) => { + // if input.as_bytes()[index] == b'\n' { + // line_no += 1; + // line_start = index + token_len; + // } + // index += token_len; + + // } } } Ok(tokens) diff --git a/parser/src/tokenizer/token.rs b/parser/src/tokenizer/token.rs index ce3347beb0..d2850ed39f 100644 --- a/parser/src/tokenizer/token.rs +++ b/parser/src/tokenizer/token.rs @@ -59,6 +59,7 @@ pub enum Token { False, AddressLit(#[serde(with = "leo_span::tendril_json")] StrTendril), CharLit(Char), + WhiteSpace, // Symbols At, @@ -258,6 +259,7 @@ impl fmt::Display for Token { False => write!(f, "false"), AddressLit(s) => write!(f, "{}", s), CharLit(s) => write!(f, "{}", s), + WhiteSpace => write!(f, "whitespace"), At => write!(f, "@"), From 7f218553129453cca77174a78c16214ef6050765 Mon Sep 17 00:00:00 2001 From: gluax Date: Wed, 23 Feb 2022 16:14:53 -0800 Subject: [PATCH 2/4] fix infinite loop, check if tests work --- parser/src/tokenizer/mod.rs | 15 ++-- .../expression/literal/char_fail.leo.out | 70 +++++++++---------- .../expression/literal/string_fail.leo.out | 14 ++-- 3 files changed, 48 insertions(+), 51 deletions(-) diff --git a/parser/src/tokenizer/mod.rs b/parser/src/tokenizer/mod.rs index 4b6d16b85e..c882546cc3 100644 --- a/parser/src/tokenizer/mod.rs +++ b/parser/src/tokenizer/mod.rs @@ -62,6 +62,11 @@ pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result { let mut span = Span::new( @@ -100,14 +105,6 @@ pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result { - // if input.as_bytes()[index] == b'\n' { - // line_no += 1; - // line_start = index + token_len; - // } - // index += token_len; - - // } } } Ok(tokens) @@ -232,7 +229,7 @@ mod tests { fn test_spans() { create_session_if_not_set_then(|_| { let raw = r#" - test +ppp test // test test /* test */ diff --git a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out index 46390f4fb9..8bc453a975 100644 --- a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out +++ b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out @@ -2,38 +2,38 @@ namespace: Token expectation: Fail outputs: - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | 'a\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | ''\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\x9A'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\x7'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\x7g'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\xz'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\x80'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\xc1'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\xc2'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\xDF'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\xC0'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\xe0'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\x9f'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | 'abcdefg'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\t\\t'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\a'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\z'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\A'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\Z'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\1'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\9'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\*'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\x'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\u'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\u{bbbbb}\\u{aaaa}'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\uz'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\u1'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\u123'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\u{2764z'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\u{276g}'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\u00000000'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\u01000000'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '\\u9999999'\n | ^" - - "Error [EPAR0370000]: '\n --> test:1:1\n |\n 1 | '😭😂😘'\n | ^" + - "Error [EPAR0370027]: " + - "Error [EPAR0370027]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370028]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370028]: " + - "Error [EPAR0370028]: " + - "Error [EPAR0370028]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370030]: " + - "Error [EPAR0370028]: " + - "Error [EPAR0370028]: " + - "Error [EPAR0370028]: " + - "Error [EPAR0370030]: " diff --git a/tests/expectations/parser/parser/expression/literal/string_fail.leo.out b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out index 4d8c97fc71..f97f766501 100644 --- a/tests/expectations/parser/parser/expression/literal/string_fail.leo.out +++ b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out @@ -2,10 +2,10 @@ namespace: Token expectation: Fail outputs: - - "Error [EPAR0370000]: \"\n --> test:1:1\n |\n 1 | \"Hello world!\n | ^" - - "Error [EPAR0370000]: \"\n --> test:1:1\n |\n 1 | \"\\\"\n | ^" - - "Error [EPAR0370000]: \"\n --> test:1:1\n |\n 1 | \"\\l\"\n | ^" - - "Error [EPAR0370000]: \"\n --> test:1:1\n |\n 1 | \"\\uaaa\"\n | ^" - - "Error [EPAR0370000]: \"\n --> test:1:1\n |\n 1 | \"\\u\"\n | ^" - - "Error [EPAR0370000]: \"\n --> test:1:1\n |\n 1 | \"\\xFF\"\n | ^" - - "Error [EPAR0370000]: \"\n --> test:1:1\n |\n 1 | \"\\x\"\n | ^" + - "Error [EPAR0370027]: " + - "Error [EPAR0370027]: " + - "Error [EPAR0370026]: " + - "Error [EPAR0370027]: " + - "Error [EPAR0370027]: " + - "Error [EPAR0370026]: " + - "Error [EPAR0370027]: " From c0b0e28ee20d9c070bd07ca7c22774dd30cc2967 Mon Sep 17 00:00:00 2001 From: gluax Date: Fri, 25 Feb 2022 11:23:59 -0800 Subject: [PATCH 3/4] error messages and test regen --- errors/src/parser/parser_errors.rs | 42 +++++------ parser/src/tokenizer/lexer.rs | 16 ++--- parser/src/tokenizer/mod.rs | 2 +- .../expression/literal/char_fail.leo.out | 70 +++++++++---------- .../expression/literal/string_fail.leo.out | 14 ++-- 5 files changed, 68 insertions(+), 76 deletions(-) diff --git a/errors/src/parser/parser_errors.rs b/errors/src/parser/parser_errors.rs index 9d860947da..d89cd5407f 100644 --- a/errors/src/parser/parser_errors.rs +++ b/errors/src/parser/parser_errors.rs @@ -16,7 +16,7 @@ use crate::create_errors; -use std::fmt::Display; +use std::fmt::{Debug, Display}; create_errors!( /// ParserError enum that represents all the errors for the `leo-parser` crate. @@ -228,55 +228,47 @@ create_errors!( @backtraced lexer_empty_input_tendril { args: (), - msg: "", + msg: "Expected more characters to lex but found none.", help: None, } /// When an integer is started with a leading zero. @backtraced lexer_eat_integer_leading_zero { - args: (), - msg: "", + args: (input: impl Display), + msg: format!("Tried to eat integer but found a leading zero on {}.", input), help: None, } /// When an integer is started with a leading zero. @backtraced lexer_expected_valid_escaped_char { - args: (), - msg: "", + args: (input: impl Display), + msg: format!("Expected a valid escape character but found {}.", input), help: None, } /// When a string is not properly closed. @backtraced lexer_string_not_closed { - args: (), - msg: "", - help: None, - } - - /// When an illegal escaped character is provided. - @backtraced - lexer_invalid_escaped_char { - args: (), - msg: "", + args: (input: impl Display), + msg: format!("Expected a closed string but found {}.", input), help: None, } /// When a string is not properly closed. @backtraced lexer_char_not_closed { - args: (), - msg: "", + args: (input: impl Display), + msg: format!("Expected a closed char but found {}.", input), help: None, } /// When a string is not properly closed. @backtraced lexer_invalid_char { - args: (), - msg: "", + args: (input: impl Display), + msg: format!("Expected valid character but found {}.", input), help: None, } @@ -284,23 +276,23 @@ create_errors!( @backtraced lexer_empty_block_comment { args: (), - msg: "", + msg: "Empty block comment.", help: None, } /// When a block comment is not closed before end of file. @backtraced lexer_block_comment_does_not_close_before_eof { - args: (), - msg: "", + args: (input: impl Display), + msg: format!("Block comment does not close with content: {}.", input), help: None, } /// When the lexer could not lex some text. @backtraced could_not_lex { - args: (), - msg: "", + args: (input: impl Display), + msg: format!("Could not lex the following content: {}.", input), help: None, } ); diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index d693f65843..29ce21ec03 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -154,7 +154,7 @@ impl Token { } let input = input_tendril[..].as_bytes(); if !input[0].is_ascii_digit() { - return Err(ParserError::lexer_eat_integer_leading_zero().into()); + return Err(ParserError::lexer_eat_integer_leading_zero(String::from_utf8_lossy(input)).into()); } let mut i = 1; let mut is_hex = false; @@ -271,7 +271,7 @@ impl Token { unicode = false; string.push(character.into()); } - None => return Err(ParserError::lexer_expected_valid_escaped_char().into()), + None => return Err(ParserError::lexer_expected_valid_escaped_char(input_tendril.subtendril(start as u32, len as u32)).into()), } } @@ -283,7 +283,7 @@ impl Token { } if i == input.len() || !end { - return Err(ParserError::lexer_string_not_closed().into()); + return Err(ParserError::lexer_string_not_closed(String::from_utf8_lossy(&input[0..i])).into()); } return Ok((i + 1, Token::StringLit(string))); @@ -311,7 +311,7 @@ impl Token { if input[i + 1] == b'{' { unicode = true; } else { - return Err(ParserError::lexer_invalid_escaped_char().into()); + return Err(ParserError::lexer_expected_valid_escaped_char(input[i]).into()); } } else { escaped = true; @@ -324,12 +324,12 @@ impl Token { } if !end { - return Err(ParserError::lexer_string_not_closed().into()); + return Err(ParserError::lexer_char_not_closed(String::from_utf8_lossy(&input[0..i])).into()); } return match Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode) { Some(character) => Ok((i + 1, Token::CharLit(character))), - None => Err(ParserError::lexer_invalid_char().into()), + None => Err(ParserError::lexer_invalid_char(String::from_utf8_lossy(&input[0..i-1])).into()), }; } x if x.is_ascii_digit() => { @@ -400,7 +400,7 @@ impl Token { let len = if let Some(eol) = eol { eol + 4 } else { - return Err(ParserError::lexer_block_comment_does_not_close_before_eof().into()); + return Err(ParserError::lexer_block_comment_does_not_close_before_eof(String::from_utf8_lossy(&input[0..])).into()); }; return Ok((len, Token::CommentBlock(input_tendril.subtendril(0, len as u32)))); } else if let Some(len) = eat(input, "/=") { @@ -491,7 +491,7 @@ impl Token { )); } - Err(ParserError::could_not_lex().into()) + Err(ParserError::could_not_lex(String::from_utf8_lossy(&input[0..])).into()) } } diff --git a/parser/src/tokenizer/mod.rs b/parser/src/tokenizer/mod.rs index c882546cc3..e334d60371 100644 --- a/parser/src/tokenizer/mod.rs +++ b/parser/src/tokenizer/mod.rs @@ -214,7 +214,7 @@ mod tests { .unwrap(); let mut output = String::new(); for SpannedToken { token, .. } in tokens.iter() { - output += &format!("{} ", token.to_string()); + output += &format!("{} ", token); } assert_eq!( diff --git a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out index 8bc453a975..4642d0114d 100644 --- a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out +++ b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out @@ -2,38 +2,38 @@ namespace: Token expectation: Fail outputs: - - "Error [EPAR0370027]: " - - "Error [EPAR0370027]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370028]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370028]: " - - "Error [EPAR0370028]: " - - "Error [EPAR0370028]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370030]: " - - "Error [EPAR0370028]: " - - "Error [EPAR0370028]: " - - "Error [EPAR0370028]: " - - "Error [EPAR0370030]: " + - "Error [EPAR0370028]: Expected a closed char but found '\\'." + - "Error [EPAR0370028]: Expected a closed char but found 'a." + - "Error [EPAR0370029]: Expected valid character but found ." + - "Error [EPAR0370029]: Expected valid character but found '\\x9." + - "Error [EPAR0370029]: Expected valid character but found '\\x." + - "Error [EPAR0370029]: Expected valid character but found '\\x7." + - "Error [EPAR0370029]: Expected valid character but found '\\x." + - "Error [EPAR0370029]: Expected valid character but found '\\x8." + - "Error [EPAR0370029]: Expected valid character but found '\\xc." + - "Error [EPAR0370029]: Expected valid character but found '\\xc." + - "Error [EPAR0370029]: Expected valid character but found '\\xD." + - "Error [EPAR0370029]: Expected valid character but found '\\xC." + - "Error [EPAR0370029]: Expected valid character but found '\\xe." + - "Error [EPAR0370029]: Expected valid character but found '\\x9." + - "Error [EPAR0370029]: Expected valid character but found 'abcdef." + - "Error [EPAR0370029]: Expected valid character but found '\\t\\." + - "Error [EPAR0370029]: Expected valid character but found '\\." + - "Error [EPAR0370029]: Expected valid character but found '\\." + - "Error [EPAR0370029]: Expected valid character but found '\\." + - "Error [EPAR0370029]: Expected valid character but found '\\." + - "Error [EPAR0370029]: Expected valid character but found '\\." + - "Error [EPAR0370029]: Expected valid character but found '\\." + - "Error [EPAR0370029]: Expected valid character but found '\\." + - "Error [EPAR0370029]: Expected valid character but found '\\." + - "Error [EPAR0370026]: Expected a valid escape character but found 117." + - "Error [EPAR0370029]: Expected valid character but found '\\u{bbbbb}\\u{aaaa." + - "Error [EPAR0370026]: Expected a valid escape character but found 117." + - "Error [EPAR0370026]: Expected a valid escape character but found 117." + - "Error [EPAR0370026]: Expected a valid escape character but found 117." + - "Error [EPAR0370029]: Expected valid character but found '\\u{2764." + - "Error [EPAR0370029]: Expected valid character but found '\\u{276g." + - "Error [EPAR0370026]: Expected a valid escape character but found 117." + - "Error [EPAR0370026]: Expected a valid escape character but found 117." + - "Error [EPAR0370026]: Expected a valid escape character but found 117." + - "Error [EPAR0370029]: Expected valid character but found '😭😂�." diff --git a/tests/expectations/parser/parser/expression/literal/string_fail.leo.out b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out index f97f766501..2b1fcd8d31 100644 --- a/tests/expectations/parser/parser/expression/literal/string_fail.leo.out +++ b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out @@ -2,10 +2,10 @@ namespace: Token expectation: Fail outputs: - - "Error [EPAR0370027]: " - - "Error [EPAR0370027]: " - - "Error [EPAR0370026]: " - - "Error [EPAR0370027]: " - - "Error [EPAR0370027]: " - - "Error [EPAR0370026]: " - - "Error [EPAR0370027]: " + - "Error [EPAR0370027]: Expected a closed string but found \"Hello world!." + - "Error [EPAR0370027]: Expected a closed string but found \"\\\"." + - "Error [EPAR0370026]: Expected a valid escape character but found \\l." + - "Error [EPAR0370027]: Expected a closed string but found \"\\uaaa\"." + - "Error [EPAR0370027]: Expected a closed string but found \"\\u\"." + - "Error [EPAR0370026]: Expected a valid escape character but found \\xFF." + - "Error [EPAR0370027]: Expected a closed string but found \"\\x\"." From 02c44b26d8d8c5c9abc84026ad940f5f97e6425c Mon Sep 17 00:00:00 2001 From: gluax Date: Fri, 25 Feb 2022 13:35:08 -0800 Subject: [PATCH 4/4] fix whitespace discrep --- errors/src/parser/parser_errors.rs | 48 +++++++++++++++--------------- parser/src/tokenizer/lexer.rs | 16 +++++++--- parser/src/tokenizer/mod.rs | 20 +++++++++---- parser/src/tokenizer/token.rs | 2 +- 4 files changed, 51 insertions(+), 35 deletions(-) diff --git a/errors/src/parser/parser_errors.rs b/errors/src/parser/parser_errors.rs index d89cd5407f..15cbb19241 100644 --- a/errors/src/parser/parser_errors.rs +++ b/errors/src/parser/parser_errors.rs @@ -235,64 +235,64 @@ create_errors!( /// When an integer is started with a leading zero. @backtraced lexer_eat_integer_leading_zero { - args: (input: impl Display), - msg: format!("Tried to eat integer but found a leading zero on {}.", input), - help: None, + args: (input: impl Display), + msg: format!("Tried to eat integer but found a leading zero on {}.", input), + help: None, } /// When an integer is started with a leading zero. @backtraced lexer_expected_valid_escaped_char { - args: (input: impl Display), - msg: format!("Expected a valid escape character but found {}.", input), - help: None, + args: (input: impl Display), + msg: format!("Expected a valid escape character but found {}.", input), + help: None, } /// When a string is not properly closed. @backtraced lexer_string_not_closed { - args: (input: impl Display), - msg: format!("Expected a closed string but found {}.", input), - help: None, + args: (input: impl Display), + msg: format!("Expected a closed string but found {}.", input), + help: None, } /// When a string is not properly closed. @backtraced lexer_char_not_closed { - args: (input: impl Display), - msg: format!("Expected a closed char but found {}.", input), - help: None, + args: (input: impl Display), + msg: format!("Expected a closed char but found {}.", input), + help: None, } /// When a string is not properly closed. @backtraced lexer_invalid_char { - args: (input: impl Display), - msg: format!("Expected valid character but found {}.", input), - help: None, + args: (input: impl Display), + msg: format!("Expected valid character but found {}.", input), + help: None, } /// When a block comment is empty. @backtraced lexer_empty_block_comment { - args: (), - msg: "Empty block comment.", - help: None, + args: (), + msg: "Empty block comment.", + help: None, } /// When a block comment is not closed before end of file. @backtraced lexer_block_comment_does_not_close_before_eof { - args: (input: impl Display), - msg: format!("Block comment does not close with content: {}.", input), - help: None, + args: (input: impl Display), + msg: format!("Block comment does not close with content: {}.", input), + help: None, } /// When the lexer could not lex some text. @backtraced could_not_lex { - args: (input: impl Display), - msg: format!("Could not lex the following content: {}.", input), - help: None, + args: (input: impl Display), + msg: format!("Could not lex the following content: {}.", input), + help: None, } ); diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index 29ce21ec03..be73b78d95 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -15,7 +15,7 @@ // along with the Leo library. If not, see . use crate::tokenizer::{Char, Token}; -use leo_errors::{Result, ParserError}; +use leo_errors::{ParserError, Result}; use leo_span::{Span, Symbol}; use serde::{Deserialize, Serialize}; @@ -271,7 +271,12 @@ impl Token { unicode = false; string.push(character.into()); } - None => return Err(ParserError::lexer_expected_valid_escaped_char(input_tendril.subtendril(start as u32, len as u32)).into()), + None => { + return Err(ParserError::lexer_expected_valid_escaped_char( + input_tendril.subtendril(start as u32, len as u32), + ) + .into()) + } } } @@ -329,7 +334,7 @@ impl Token { return match Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode) { Some(character) => Ok((i + 1, Token::CharLit(character))), - None => Err(ParserError::lexer_invalid_char(String::from_utf8_lossy(&input[0..i-1])).into()), + None => Err(ParserError::lexer_invalid_char(String::from_utf8_lossy(&input[0..i - 1])).into()), }; } x if x.is_ascii_digit() => { @@ -400,7 +405,10 @@ impl Token { let len = if let Some(eol) = eol { eol + 4 } else { - return Err(ParserError::lexer_block_comment_does_not_close_before_eof(String::from_utf8_lossy(&input[0..])).into()); + return Err(ParserError::lexer_block_comment_does_not_close_before_eof( + String::from_utf8_lossy(&input[0..]), + ) + .into()); }; return Ok((len, Token::CommentBlock(input_tendril.subtendril(0, len as u32)))); } else if let Some(len) = eat(input, "/=") { diff --git a/parser/src/tokenizer/mod.rs b/parser/src/tokenizer/mod.rs index e334d60371..f48b3222e4 100644 --- a/parser/src/tokenizer/mod.rs +++ b/parser/src/tokenizer/mod.rs @@ -28,7 +28,7 @@ pub(crate) use self::token::*; pub(crate) mod lexer; pub(crate) use self::lexer::*; -use leo_errors:: {ParserError, Result}; +use leo_errors::{ParserError, Result}; use leo_span::Span; use tendril::StrTendril; @@ -42,8 +42,8 @@ pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result index { match Token::eat(input.subtendril(index as u32, (input.len() - index) as u32))? { - (token_len, Token::WhiteSpace) => { - if token_len == 0 && index == input.len() { + (token_len, Token::WhiteSpace) => { + if token_len == 0 && index == input.len() { break; } else if token_len == 0 { return Err(ParserError::unexpected_token( @@ -62,12 +62,20 @@ pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result { let mut span = Span::new( line_no, diff --git a/parser/src/tokenizer/token.rs b/parser/src/tokenizer/token.rs index d2850ed39f..2ee7c55ff7 100644 --- a/parser/src/tokenizer/token.rs +++ b/parser/src/tokenizer/token.rs @@ -259,7 +259,7 @@ impl fmt::Display for Token { False => write!(f, "false"), AddressLit(s) => write!(f, "{}", s), CharLit(s) => write!(f, "{}", s), - WhiteSpace => write!(f, "whitespace"), + WhiteSpace => write!(f, "whitespace"), At => write!(f, "@"),