From 736c6af72db0f908f9ccc30d13456d76bc1703a3 Mon Sep 17 00:00:00 2001 From: gluax <16431709+gluax@users.noreply.github.com> Date: Mon, 28 Feb 2022 14:45:32 -0800 Subject: [PATCH 1/7] a bunch of parser bug fixes so far --- compiler/parser/src/parser/file.rs | 2 - compiler/parser/src/tokenizer/lexer.rs | 91 +++++++++++--------------- leo/errors/src/parser/parser_errors.rs | 74 ++++++++++++++++++--- 3 files changed, 102 insertions(+), 65 deletions(-) diff --git a/compiler/parser/src/parser/file.rs b/compiler/parser/src/parser/file.rs index 295ec3b650..4d97d4185a 100644 --- a/compiler/parser/src/parser/file.rs +++ b/compiler/parser/src/parser/file.rs @@ -114,8 +114,6 @@ impl ParserContext<'_> { let (args, _, span) = self.parse_paren_comma_list(|p| { Ok(if let Some(ident) = p.eat_identifier() { Some(ident.name) - } else if let Some((int, _)) = p.eat_int() { - Some(Symbol::intern(&int.value)) } else { let token = p.expect_any()?; p.emit_err(ParserError::unexpected_str(&token.token, "ident or int", &token.span)); diff --git a/compiler/parser/src/tokenizer/lexer.rs b/compiler/parser/src/tokenizer/lexer.rs index be73b78d95..f97a3e1700 100644 --- a/compiler/parser/src/tokenizer/lexer.rs +++ b/compiler/parser/src/tokenizer/lexer.rs @@ -66,9 +66,9 @@ impl Token { /// /// Returns a `char` if a character can be eaten, otherwise returns [`None`]. /// - fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Option { + fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Result { if input_tendril.is_empty() { - return None; + return Err(ParserError::lexer_empty_input_tendril().into()); } if escaped { @@ -76,22 +76,22 @@ impl Token { let escaped = &string[1..string.len()]; if escaped.len() != 1 { - return None; + return Err(ParserError::lexer_escaped_char_incorrect_length(escaped).into()); } if let Some(character) = escaped.chars().next() { return match character { - '0' => Some(Char::Scalar(0 as char)), - 't' => Some(Char::Scalar(9 as char)), - 'n' => Some(Char::Scalar(10 as char)), - 'r' => Some(Char::Scalar(13 as char)), - '\"' => Some(Char::Scalar(34 as char)), - '\'' => Some(Char::Scalar(39 as char)), - '\\' => Some(Char::Scalar(92 as char)), - _ => None, + '0' => Ok(Char::Scalar(0 as char)), + 't' => Ok(Char::Scalar(9 as char)), + 'n' => Ok(Char::Scalar(10 as char)), + 'r' => Ok(Char::Scalar(13 as char)), + '\"' => Ok(Char::Scalar(34 as char)), + '\'' => Ok(Char::Scalar(39 as char)), + '\\' => Ok(Char::Scalar(92 as char)), + _ => return Err(ParserError::lexer_expected_valid_escaped_char(character).into()), }; } else { - return None; + return Err(ParserError::lexer_unclosed_escaped_char().into()); } } @@ -100,48 +100,52 @@ impl Token { let hex_string = &string[2..string.len()]; if hex_string.len() != 2 { - return None; + return Err(ParserError::lexer_escaped_hex_incorrect_length(hex_string).into()); } if let Ok(ascii_number) = u8::from_str_radix(hex_string, 16) { // According to RFC, we allow only values less than 128. if ascii_number > 127 { - return None; + return Err(ParserError::lexer_expected_valid_hex_char(ascii_number).into()); } - return Some(Char::Scalar(ascii_number as char)); + return Ok(Char::Scalar(ascii_number as char)); } } if unicode { let string = input_tendril.to_string(); - if &string[string.len() - 1..] != "}" { - return None; + if string.find('}').is_none() { + return Err(ParserError::lexer_unclosed_escaped_unicode_char(string).into()); } let unicode_number = &string[3..string.len() - 1]; let len = unicode_number.len(); if !(1..=6).contains(&len) { - return None; + return Err(ParserError::lexer_invalid_escaped_unicode_length(unicode_number).into()); } if let Ok(hex) = u32::from_str_radix(unicode_number, 16) { if let Some(character) = std::char::from_u32(hex) { // scalar - return Some(Char::Scalar(character)); + return Ok(Char::Scalar(character)); } else if hex <= 0x10FFFF { - return Some(Char::NonScalar(hex)); + return Ok(Char::NonScalar(hex)); + } else { + return Err(ParserError::lexer_invalid_character_exceeded_max_value(unicode_number).into()); } } } if input_tendril.to_string().chars().count() != 1 { - return None; + // If char doesn't close. + return Err(ParserError::lexer_char_not_closed(&input_tendril[0..]).into()); } else if let Some(character) = input_tendril.to_string().chars().next() { - return Some(Char::Scalar(character)); + // If its a simple char. + return Ok(Char::Scalar(character)); } - None + Err(ParserError::lexer_invalid_char(input_tendril.to_string()).into()) } /// @@ -157,18 +161,8 @@ impl Token { return Err(ParserError::lexer_eat_integer_leading_zero(String::from_utf8_lossy(input)).into()); } let mut i = 1; - let mut is_hex = false; while i < input.len() { - if i == 1 && input[0] == b'0' && input[i] == b'x' { - is_hex = true; - i += 1; - continue; - } - if is_hex { - if !input[i].is_ascii_hexdigit() { - break; - } - } else if !input[i].is_ascii_digit() { + if !input[i].is_ascii_digit() { break; } @@ -258,26 +252,17 @@ impl Token { } if !in_escape { - match Self::eat_char( + let character = Self::eat_char( input_tendril.subtendril(start as u32, len as u32), escaped, hex, unicode, - ) { - Some(character) => { - len = 1; - escaped = false; - hex = false; - unicode = false; - string.push(character.into()); - } - None => { - return Err(ParserError::lexer_expected_valid_escaped_char( - input_tendril.subtendril(start as u32, len as u32), - ) - .into()) - } - } + )?; + len = 1; + escaped = false; + hex = false; + unicode = false; + string.push(character.into()); } i += 1; @@ -332,10 +317,8 @@ impl Token { return Err(ParserError::lexer_char_not_closed(String::from_utf8_lossy(&input[0..i])).into()); } - return match Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode) { - Some(character) => Ok((i + 1, Token::CharLit(character))), - None => Err(ParserError::lexer_invalid_char(String::from_utf8_lossy(&input[0..i - 1])).into()), - }; + let character = Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode)?; + return Ok((i + 1, Token::CharLit(character))); } x if x.is_ascii_digit() => { return Self::eat_integer(&input_tendril); diff --git a/leo/errors/src/parser/parser_errors.rs b/leo/errors/src/parser/parser_errors.rs index 15cbb19241..c3c2f23de5 100644 --- a/leo/errors/src/parser/parser_errors.rs +++ b/leo/errors/src/parser/parser_errors.rs @@ -236,23 +236,23 @@ create_errors!( @backtraced lexer_eat_integer_leading_zero { args: (input: impl Display), - msg: format!("Tried to eat integer but found a leading zero on {}.", input), + msg: format!("Tried to eat integer but found a leading zero on `{}`.", input), help: None, } /// When an integer is started with a leading zero. @backtraced - lexer_expected_valid_escaped_char { + lexer_expected_valid_escaped_char { args: (input: impl Display), - msg: format!("Expected a valid escape character but found {}.", input), + msg: format!("Expected a valid escape character but found `{}`.", input), help: None, - } + } /// When a string is not properly closed. @backtraced lexer_string_not_closed { args: (input: impl Display), - msg: format!("Expected a closed string but found {}.", input), + msg: format!("Expected a closed string but found `{}`.", input), help: None, } @@ -260,7 +260,7 @@ create_errors!( @backtraced lexer_char_not_closed { args: (input: impl Display), - msg: format!("Expected a closed char but found {}.", input), + msg: format!("Expected a closed char but found `{}`.", input), help: None, } @@ -268,7 +268,7 @@ create_errors!( @backtraced lexer_invalid_char { args: (input: impl Display), - msg: format!("Expected valid character but found {}.", input), + msg: format!("Expected valid character but found `{}`.", input), help: None, } @@ -284,7 +284,7 @@ create_errors!( @backtraced lexer_block_comment_does_not_close_before_eof { args: (input: impl Display), - msg: format!("Block comment does not close with content: {}.", input), + msg: format!("Block comment does not close with content: `{}`.", input), help: None, } @@ -292,7 +292,63 @@ create_errors!( @backtraced could_not_lex { args: (input: impl Display), - msg: format!("Could not lex the following content: {}.", input), + msg: format!("Could not lex the following content: `{}`.", input), help: None, } + + /// When a escaped character was given more than one char to escape. + @backtraced + lexer_escaped_char_incorrect_length { + args: (input: impl Display), + msg: format!("Could not lex the following escaped char due to being given more than one char: `{}`.", input), + help: None, + } + + /// When a escape was given but no following character + @backtraced + lexer_unclosed_escaped_char { + args: (), + msg: "There was no escaped character following the escape char symbol `\\`.", + help: None, + } + + /// When a escaped hex was given more than two chars to escape. + @backtraced + lexer_escaped_hex_incorrect_length { + args: (input: impl Display), + msg: format!("Could not lex the following escaped hex due to being given more than two chars: `{}`.", input), + help: None, + } + + /// When a valid hex character was expected. + @backtraced + lexer_expected_valid_hex_char { + args: (input: impl Display), + msg: format!("Expected a valid hex character but found `{}`.", input), + help: None, + } + + /// When a escaped unicode char was given but no following closing symbol. + @backtraced + lexer_unclosed_escaped_unicode_char { + args: (input: impl Display), + msg: format!("There was no closing `}}` after a escaped unicode `{}`.", input), + help: None, + } + + /// When a escaped unicode char was given but it had an incorrect length. + @backtraced + lexer_invalid_escaped_unicode_length { + args: (input: impl Display), + msg: format!("The escaped unicode char `{}` is not within valid length of [1, 6].", input), + help: None, + } + + /// When a escaped unicode char was given but exceeded maximum value. + @backtraced + lexer_invalid_character_exceeded_max_value { + args: (input: impl Display), + msg: format!("The escaped unicode char `{}` is greater than 0x10FFFF.", input), + help: None, + } ); From 029a42df0afe73d5bc1705a7c49a8ab7d58e23c6 Mon Sep 17 00:00:00 2001 From: gluax <16431709+gluax@users.noreply.github.com> Date: Mon, 28 Feb 2022 14:47:22 -0800 Subject: [PATCH 2/7] regen tests so far --- .../expression/literal/char_fail.leo.out | 70 +++++++++---------- .../expression/literal/string_fail.leo.out | 14 ++-- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out index 4642d0114d..971fb20b36 100644 --- a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out +++ b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out @@ -2,38 +2,38 @@ namespace: Token expectation: Fail outputs: - - "Error [EPAR0370028]: Expected a closed char but found '\\'." - - "Error [EPAR0370028]: Expected a closed char but found 'a." - - "Error [EPAR0370029]: Expected valid character but found ." - - "Error [EPAR0370029]: Expected valid character but found '\\x9." - - "Error [EPAR0370029]: Expected valid character but found '\\x." - - "Error [EPAR0370029]: Expected valid character but found '\\x7." - - "Error [EPAR0370029]: Expected valid character but found '\\x." - - "Error [EPAR0370029]: Expected valid character but found '\\x8." - - "Error [EPAR0370029]: Expected valid character but found '\\xc." - - "Error [EPAR0370029]: Expected valid character but found '\\xc." - - "Error [EPAR0370029]: Expected valid character but found '\\xD." - - "Error [EPAR0370029]: Expected valid character but found '\\xC." - - "Error [EPAR0370029]: Expected valid character but found '\\xe." - - "Error [EPAR0370029]: Expected valid character but found '\\x9." - - "Error [EPAR0370029]: Expected valid character but found 'abcdef." - - "Error [EPAR0370029]: Expected valid character but found '\\t\\." - - "Error [EPAR0370029]: Expected valid character but found '\\." - - "Error [EPAR0370029]: Expected valid character but found '\\." - - "Error [EPAR0370029]: Expected valid character but found '\\." - - "Error [EPAR0370029]: Expected valid character but found '\\." - - "Error [EPAR0370029]: Expected valid character but found '\\." - - "Error [EPAR0370029]: Expected valid character but found '\\." - - "Error [EPAR0370029]: Expected valid character but found '\\." - - "Error [EPAR0370029]: Expected valid character but found '\\." - - "Error [EPAR0370026]: Expected a valid escape character but found 117." - - "Error [EPAR0370029]: Expected valid character but found '\\u{bbbbb}\\u{aaaa." - - "Error [EPAR0370026]: Expected a valid escape character but found 117." - - "Error [EPAR0370026]: Expected a valid escape character but found 117." - - "Error [EPAR0370026]: Expected a valid escape character but found 117." - - "Error [EPAR0370029]: Expected valid character but found '\\u{2764." - - "Error [EPAR0370029]: Expected valid character but found '\\u{276g." - - "Error [EPAR0370026]: Expected a valid escape character but found 117." - - "Error [EPAR0370026]: Expected a valid escape character but found 117." - - "Error [EPAR0370026]: Expected a valid escape character but found 117." - - "Error [EPAR0370029]: Expected valid character but found '😭😂�." + - "Error [EPAR0370028]: Expected a closed char but found `'\\'`." + - "Error [EPAR0370028]: Expected a closed char but found `'a`." + - "Error [EPAR0370024]: Expected more characters to lex but found none." + - "Error [EPAR0370036]: Expected a valid hex character but found `154`." + - "Error [EPAR0370035]: Could not lex the following escaped hex due to being given more than two chars: `7`." + - "Error [EPAR0370028]: Expected a closed char but found `\\x7g`." + - "Error [EPAR0370035]: Could not lex the following escaped hex due to being given more than two chars: `z`." + - "Error [EPAR0370036]: Expected a valid hex character but found `128`." + - "Error [EPAR0370036]: Expected a valid hex character but found `193`." + - "Error [EPAR0370036]: Expected a valid hex character but found `194`." + - "Error [EPAR0370036]: Expected a valid hex character but found `223`." + - "Error [EPAR0370036]: Expected a valid hex character but found `192`." + - "Error [EPAR0370036]: Expected a valid hex character but found `224`." + - "Error [EPAR0370036]: Expected a valid hex character but found `159`." + - "Error [EPAR0370028]: Expected a closed char but found `abcdefg`." + - "Error [EPAR0370033]: Could not lex the following escaped char due to being given more than one char: `t\\t`." + - "Error [EPAR0370026]: Expected a valid escape character but found `a`." + - "Error [EPAR0370026]: Expected a valid escape character but found `z`." + - "Error [EPAR0370026]: Expected a valid escape character but found `A`." + - "Error [EPAR0370026]: Expected a valid escape character but found `Z`." + - "Error [EPAR0370026]: Expected a valid escape character but found `1`." + - "Error [EPAR0370026]: Expected a valid escape character but found `9`." + - "Error [EPAR0370026]: Expected a valid escape character but found `*`." + - "Error [EPAR0370035]: Could not lex the following escaped hex due to being given more than two chars: ``." + - "Error [EPAR0370026]: Expected a valid escape character but found `117`." + - "Error [EPAR0370038]: The escaped unicode char `bbbbb}\\u{aaaa` is not within valid length of [1, 6]." + - "Error [EPAR0370026]: Expected a valid escape character but found `117`." + - "Error [EPAR0370026]: Expected a valid escape character but found `117`." + - "Error [EPAR0370026]: Expected a valid escape character but found `117`." + - "Error [EPAR0370037]: There was no closing `}` after a escaped unicode `\\u{2764z`." + - "Error [EPAR0370028]: Expected a closed char but found `\\u{276g}`." + - "Error [EPAR0370026]: Expected a valid escape character but found `117`." + - "Error [EPAR0370026]: Expected a valid escape character but found `117`." + - "Error [EPAR0370026]: Expected a valid escape character but found `117`." + - "Error [EPAR0370028]: Expected a closed char but found `😭😂😘`." diff --git a/tests/expectations/parser/parser/expression/literal/string_fail.leo.out b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out index 2b1fcd8d31..9910d77db3 100644 --- a/tests/expectations/parser/parser/expression/literal/string_fail.leo.out +++ b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out @@ -2,10 +2,10 @@ namespace: Token expectation: Fail outputs: - - "Error [EPAR0370027]: Expected a closed string but found \"Hello world!." - - "Error [EPAR0370027]: Expected a closed string but found \"\\\"." - - "Error [EPAR0370026]: Expected a valid escape character but found \\l." - - "Error [EPAR0370027]: Expected a closed string but found \"\\uaaa\"." - - "Error [EPAR0370027]: Expected a closed string but found \"\\u\"." - - "Error [EPAR0370026]: Expected a valid escape character but found \\xFF." - - "Error [EPAR0370027]: Expected a closed string but found \"\\x\"." + - "Error [EPAR0370027]: Expected a closed string but found `\"Hello world!`." + - "Error [EPAR0370027]: Expected a closed string but found `\"\\\"`." + - "Error [EPAR0370026]: Expected a valid escape character but found `l`." + - "Error [EPAR0370027]: Expected a closed string but found `\"\\uaaa\"`." + - "Error [EPAR0370027]: Expected a closed string but found `\"\\u\"`." + - "Error [EPAR0370036]: Expected a valid hex character but found `255`." + - "Error [EPAR0370027]: Expected a closed string but found `\"\\x\"`." From bd1d602f6dd83a33d35fef1139fe67b86d15bb36 Mon Sep 17 00:00:00 2001 From: gluax <16431709+gluax@users.noreply.github.com> Date: Fri, 4 Mar 2022 09:28:29 -0800 Subject: [PATCH 3/7] fix annotation error msg --- compiler/parser/src/parser/file.rs | 2 +- .../parser/parser/functions/annotated_arg_not_ident_int.leo.out | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/parser/src/parser/file.rs b/compiler/parser/src/parser/file.rs index 4d97d4185a..d611a505b8 100644 --- a/compiler/parser/src/parser/file.rs +++ b/compiler/parser/src/parser/file.rs @@ -116,7 +116,7 @@ impl ParserContext<'_> { Some(ident.name) } else { let token = p.expect_any()?; - p.emit_err(ParserError::unexpected_str(&token.token, "ident or int", &token.span)); + p.emit_err(ParserError::unexpected_str(&token.token, "ident", &token.span)); None }) })?; diff --git a/tests/expectations/parser/parser/functions/annotated_arg_not_ident_int.leo.out b/tests/expectations/parser/parser/functions/annotated_arg_not_ident_int.leo.out index 34585e8f24..4c32e5ad4b 100644 --- a/tests/expectations/parser/parser/functions/annotated_arg_not_ident_int.leo.out +++ b/tests/expectations/parser/parser/functions/annotated_arg_not_ident_int.leo.out @@ -2,4 +2,4 @@ namespace: Parse expectation: Fail outputs: - - "Error [EPAR0370009]: unexpected string: expected 'ident or int', got '?'\n --> test:3:6\n |\n 3 | @foo(?, bar, ?)\n | ^\nError [EPAR0370009]: unexpected string: expected 'ident or int', got '?'\n --> test:3:14\n |\n 3 | @foo(?, bar, ?)\n | ^\nError [EPAR0370017]: \"@context(...)\" is deprecated. Did you mean @test annotation?\n --> test:8:2\n |\n 8 | @context // recovery witness\n | ^^^^^^^" + - "Error [EPAR0370009]: unexpected string: expected 'ident', got '?'\n --> test:3:6\n |\n 3 | @foo(?, bar, ?)\n | ^\nError [EPAR0370009]: unexpected string: expected 'ident', got '?'\n --> test:3:14\n |\n 3 | @foo(?, bar, ?)\n | ^\nError [EPAR0370017]: \"@context(...)\" is deprecated. Did you mean @test annotation?\n --> test:8:2\n |\n 8 | @context // recovery witness\n | ^^^^^^^" From 85764aa39460339871ded82fb626f441f10a8caf Mon Sep 17 00:00:00 2001 From: gluax <16431709+gluax@users.noreply.github.com> Date: Fri, 4 Mar 2022 10:03:42 -0800 Subject: [PATCH 4/7] fix self discrep --- compiler/parser/src/parser/file.rs | 14 +++++++++++--- leo/errors/src/parser/parser_errors.rs | 8 ++++++++ .../parser/circuits/self_not_first_fail.leo.out | 5 +++++ tests/parser/circuits/self_not_first_fail.leo | 10 ++++++++++ 4 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 tests/expectations/parser/parser/circuits/self_not_first_fail.leo.out create mode 100644 tests/parser/circuits/self_not_first_fail.leo diff --git a/compiler/parser/src/parser/file.rs b/compiler/parser/src/parser/file.rs index d611a505b8..4775468001 100644 --- a/compiler/parser/src/parser/file.rs +++ b/compiler/parser/src/parser/file.rs @@ -361,7 +361,7 @@ impl ParserContext<'_> { /// /// Returns a [`FunctionInput`] AST node if the next tokens represent a function parameter. /// - pub fn parse_function_parameters(&mut self) -> Result { + pub fn parse_function_parameters(&mut self, first: bool) -> Result { let const_ = self.eat(Token::Const); let mutable = self.eat(Token::Mut); let reference = self.eat(Token::Ampersand); @@ -374,7 +374,9 @@ impl ParserContext<'_> { self.expect_ident()? }; if name.name == sym::SelfLower { - if let Some(mutable) = &mutable { + if !first { + return Err(ParserError::parser_self_outside_first_argument().into()); + } else if let Some(mutable) = &mutable { self.emit_err(ParserError::mut_self_parameter(&(&mutable.span + &name.span))); return Ok(Self::build_ref_self(name, mutable)); } else if let Some(reference) = &reference { @@ -431,7 +433,13 @@ impl ParserContext<'_> { let name = self.expect_ident()?; // Parse parameters. - let (inputs, ..) = self.parse_paren_comma_list(|p| p.parse_function_parameters().map(Some))?; + let mut first = true; + let (inputs, ..) = self.parse_paren_comma_list(|p| { + let param = p.parse_function_parameters(first).map(Some); + first = false; + param + } + )?; // Parse return type. let output = if self.eat(Token::Arrow).is_some() { diff --git a/leo/errors/src/parser/parser_errors.rs b/leo/errors/src/parser/parser_errors.rs index c3c2f23de5..79634004f4 100644 --- a/leo/errors/src/parser/parser_errors.rs +++ b/leo/errors/src/parser/parser_errors.rs @@ -351,4 +351,12 @@ create_errors!( msg: format!("The escaped unicode char `{}` is greater than 0x10FFFF.", input), help: None, } + + /// When a function recieved a self argument outside the first argument. + @backtraced + parser_self_outside_first_argument { + args: (), + msg: "A function received a self argument as not the first argument.", + help: None, + } ); diff --git a/tests/expectations/parser/parser/circuits/self_not_first_fail.leo.out b/tests/expectations/parser/parser/circuits/self_not_first_fail.leo.out new file mode 100644 index 0000000000..12aa9c0dc5 --- /dev/null +++ b/tests/expectations/parser/parser/circuits/self_not_first_fail.leo.out @@ -0,0 +1,5 @@ +--- +namespace: Parse +expectation: Fail +outputs: + - "Error [EPAR0370040]: A function received a self argument as not the first argument." diff --git a/tests/parser/circuits/self_not_first_fail.leo b/tests/parser/circuits/self_not_first_fail.leo new file mode 100644 index 0000000000..51ea2fc20e --- /dev/null +++ b/tests/parser/circuits/self_not_first_fail.leo @@ -0,0 +1,10 @@ +/* +namespace: Parse +expectation: Fail +*/ + +circuit X { + function x(foo: u32, &self) { + return (); + } +} \ No newline at end of file From 308512fab98efe510ccb2833a6482d7ab2fac3cb Mon Sep 17 00:00:00 2001 From: gluax <16431709+gluax@users.noreply.github.com> Date: Fri, 4 Mar 2022 10:26:34 -0800 Subject: [PATCH 5/7] add tests --- compiler/parser/src/tokenizer/lexer.rs | 4 ++++ leo/errors/src/parser/parser_errors.rs | 7 +++++++ .../parser/parser/circuits/self_not_first_fail.leo.out | 2 +- .../parser/parser/expression/literal/char_fail.leo.out | 2 ++ .../parser/functions/annotated_arg_not_ident.leo.out | 5 +++++ .../parser/parser/statement/hex_int_fail.leo.out | 7 +++++++ tests/parser/expression/literal/char_fail.leo | 2 ++ ...g_not_ident_int.leo => annotated_arg_not_ident.leo} | 6 ++++++ tests/parser/statement/hex_int_fail.leo | 10 ++++++++++ 9 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 tests/expectations/parser/parser/functions/annotated_arg_not_ident.leo.out create mode 100644 tests/expectations/parser/parser/statement/hex_int_fail.leo.out rename tests/parser/functions/{annotated_arg_not_ident_int.leo => annotated_arg_not_ident.leo} (71%) create mode 100644 tests/parser/statement/hex_int_fail.leo diff --git a/compiler/parser/src/tokenizer/lexer.rs b/compiler/parser/src/tokenizer/lexer.rs index f97a3e1700..631e0f853d 100644 --- a/compiler/parser/src/tokenizer/lexer.rs +++ b/compiler/parser/src/tokenizer/lexer.rs @@ -161,7 +161,11 @@ impl Token { return Err(ParserError::lexer_eat_integer_leading_zero(String::from_utf8_lossy(input)).into()); } let mut i = 1; + while i < input.len() { + if i == 1 && input[0] == b'0' && input[i] == b'x' { + return Err(ParserError::lexer_hex_number_provided(&input_tendril[0..3]).into()); + } if !input[i].is_ascii_digit() { break; } diff --git a/leo/errors/src/parser/parser_errors.rs b/leo/errors/src/parser/parser_errors.rs index 79634004f4..9ad6bf1702 100644 --- a/leo/errors/src/parser/parser_errors.rs +++ b/leo/errors/src/parser/parser_errors.rs @@ -351,6 +351,13 @@ create_errors!( msg: format!("The escaped unicode char `{}` is greater than 0x10FFFF.", input), help: None, } + /// When a hex number is provided. + @backtraced + lexer_hex_number_provided { + args: (input: impl Display), + msg: format!("A hex number `{}..` was provided but hex is not allowed.", input), + help: None, + } /// When a function recieved a self argument outside the first argument. @backtraced diff --git a/tests/expectations/parser/parser/circuits/self_not_first_fail.leo.out b/tests/expectations/parser/parser/circuits/self_not_first_fail.leo.out index 12aa9c0dc5..5a73599380 100644 --- a/tests/expectations/parser/parser/circuits/self_not_first_fail.leo.out +++ b/tests/expectations/parser/parser/circuits/self_not_first_fail.leo.out @@ -2,4 +2,4 @@ namespace: Parse expectation: Fail outputs: - - "Error [EPAR0370040]: A function received a self argument as not the first argument." + - "Error [EPAR0370041]: A function received a self argument as not the first argument." diff --git a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out index 971fb20b36..3378569ca7 100644 --- a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out +++ b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out @@ -36,4 +36,6 @@ outputs: - "Error [EPAR0370026]: Expected a valid escape character but found `117`." - "Error [EPAR0370026]: Expected a valid escape character but found `117`." - "Error [EPAR0370026]: Expected a valid escape character but found `117`." + - "Error [EPAR0370039]: The escaped unicode char `110000` is greater than 0x10FFFF." + - "Error [EPAR0370037]: There was no closing `}` after a escaped unicode `\\u{af🦀`." - "Error [EPAR0370028]: Expected a closed char but found `😭😂😘`." diff --git a/tests/expectations/parser/parser/functions/annotated_arg_not_ident.leo.out b/tests/expectations/parser/parser/functions/annotated_arg_not_ident.leo.out new file mode 100644 index 0000000000..35a7e35ca2 --- /dev/null +++ b/tests/expectations/parser/parser/functions/annotated_arg_not_ident.leo.out @@ -0,0 +1,5 @@ +--- +namespace: Parse +expectation: Fail +outputs: + - "Error [EPAR0370009]: unexpected string: expected 'ident', got '?'\n --> test:3:6\n |\n 3 | @foo(?, bar, ?)\n | ^\nError [EPAR0370009]: unexpected string: expected 'ident', got '?'\n --> test:3:14\n |\n 3 | @foo(?, bar, ?)\n | ^\nError [EPAR0370009]: unexpected string: expected 'ident', got '123'\n --> test:8:6\n |\n 8 | @bar(123) // ints not vali\n | ^^^\nError [EPAR0370017]: \"@context(...)\" is deprecated. Did you mean @test annotation?\n --> test:14:2\n |\n 14 | @context // recovery witness\n | ^^^^^^^" diff --git a/tests/expectations/parser/parser/statement/hex_int_fail.leo.out b/tests/expectations/parser/parser/statement/hex_int_fail.leo.out new file mode 100644 index 0000000000..c36f660260 --- /dev/null +++ b/tests/expectations/parser/parser/statement/hex_int_fail.leo.out @@ -0,0 +1,7 @@ +--- +namespace: ParseStatement +expectation: Fail +outputs: + - "Error [EPAR0370040]: A hex number `0x4..` was provided but hex is not allowed." + - "Error [EPAR0370040]: A hex number `0xA..` was provided but hex is not allowed." + - "Error [EPAR0370040]: A hex number `0xF..` was provided but hex is not allowed." diff --git a/tests/parser/expression/literal/char_fail.leo b/tests/parser/expression/literal/char_fail.leo index 982cc3f801..c5c0b94eb3 100644 --- a/tests/parser/expression/literal/char_fail.leo +++ b/tests/parser/expression/literal/char_fail.leo @@ -43,5 +43,7 @@ expectation: Fail '\u00000000' '\u01000000' '\u9999999' +'\u{110000}' +'\u{af🦀' '😭😂😘' diff --git a/tests/parser/functions/annotated_arg_not_ident_int.leo b/tests/parser/functions/annotated_arg_not_ident.leo similarity index 71% rename from tests/parser/functions/annotated_arg_not_ident_int.leo rename to tests/parser/functions/annotated_arg_not_ident.leo index 20780213f0..e927be8d5c 100644 --- a/tests/parser/functions/annotated_arg_not_ident_int.leo +++ b/tests/parser/functions/annotated_arg_not_ident.leo @@ -8,6 +8,12 @@ function x() { return (); } +@bar(123) // ints not vali +function x() { + return (); +} + + @context // recovery witness function x() { return (); diff --git a/tests/parser/statement/hex_int_fail.leo b/tests/parser/statement/hex_int_fail.leo new file mode 100644 index 0000000000..f8a4343770 --- /dev/null +++ b/tests/parser/statement/hex_int_fail.leo @@ -0,0 +1,10 @@ +/* +namespace: ParseStatement +expectation: Fail +*/ + +let x = 0x40u32; + +let y: u32 = 0xAAu32; + +let z = 0xFFu8; \ No newline at end of file From 789862103e4cea8b20ac64e9da354eb81f3852a7 Mon Sep 17 00:00:00 2001 From: gluax <16431709+gluax@users.noreply.github.com> Date: Fri, 4 Mar 2022 11:18:28 -0800 Subject: [PATCH 6/7] another char bug fix --- compiler/parser/src/parser/file.rs | 3 +- compiler/parser/src/tokenizer/lexer.rs | 32 +++++++++++++++---- .../expression/literal/char_fail.leo.out | 3 +- tests/parser/expression/literal/char_fail.leo | 1 + 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/compiler/parser/src/parser/file.rs b/compiler/parser/src/parser/file.rs index 4775468001..6318e66600 100644 --- a/compiler/parser/src/parser/file.rs +++ b/compiler/parser/src/parser/file.rs @@ -438,8 +438,7 @@ impl ParserContext<'_> { let param = p.parse_function_parameters(first).map(Some); first = false; param - } - )?; + })?; // Parse return type. let output = if self.eat(Token::Arrow).is_some() { diff --git a/compiler/parser/src/tokenizer/lexer.rs b/compiler/parser/src/tokenizer/lexer.rs index 631e0f853d..ce74b35547 100644 --- a/compiler/parser/src/tokenizer/lexer.rs +++ b/compiler/parser/src/tokenizer/lexer.rs @@ -176,7 +176,7 @@ impl Token { } /// Returns the number of bytes in an emoji via a bit mask. - fn utf8_byte_count(byte: u8) -> u8 { + fn utf8_byte_count(byte: u8) -> usize { let mut mask = 0x80; let mut result = 0; while byte & mask > 0 { @@ -205,7 +205,7 @@ impl Token { x if x.is_ascii_whitespace() => return Ok((1, Token::WhiteSpace)), b'"' => { let mut i = 1; - let mut len: u8 = 1; + let mut len = 1; let mut start = 1; let mut in_escape = false; let mut escaped = false; @@ -218,7 +218,7 @@ impl Token { // If it's an emoji get the length. if input[i] & 0x80 > 0 { len = Self::utf8_byte_count(input[i]); - i += (len as usize) - 1; + i += len - 1; } if !in_escape { @@ -287,14 +287,27 @@ impl Token { let mut in_escape = false; let mut escaped = false; let mut hex = false; - let mut unicode = false; + let mut escaped_unicode = false; + let mut unicode_char = false; let mut end = false; while i < input.len() { - if !in_escape { + if input[i] & 0x80 > 0 && !unicode_char { + i += Self::utf8_byte_count(input[i]); + unicode_char = true; + continue; + } else if input[i] & 0x80 > 0 && unicode_char { + i += Self::utf8_byte_count(input[i]); + return Err(ParserError::lexer_invalid_char(&input_tendril[0..i]).into()); + } else if !in_escape || unicode_char { if input[i] == b'\'' { end = true; break; + } else if unicode_char { + return Err(ParserError::lexer_invalid_char( + &input_tendril[0..input_tendril[1..].find('\'').unwrap_or(i + 1)], + ) + .into()); } else if input[i] == b'\\' { in_escape = true; } @@ -303,7 +316,7 @@ impl Token { hex = true; } else if input[i] == b'u' { if input[i + 1] == b'{' { - unicode = true; + escaped_unicode = true; } else { return Err(ParserError::lexer_expected_valid_escaped_char(input[i]).into()); } @@ -321,7 +334,12 @@ impl Token { return Err(ParserError::lexer_char_not_closed(String::from_utf8_lossy(&input[0..i])).into()); } - let character = Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode)?; + let character = Self::eat_char( + input_tendril.subtendril(1, (i - 1) as u32), + escaped, + hex, + escaped_unicode, + )?; return Ok((i + 1, Token::CharLit(character))); } x if x.is_ascii_digit() => { diff --git a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out index 3378569ca7..ece3a8f208 100644 --- a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out +++ b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out @@ -38,4 +38,5 @@ outputs: - "Error [EPAR0370026]: Expected a valid escape character but found `117`." - "Error [EPAR0370039]: The escaped unicode char `110000` is greater than 0x10FFFF." - "Error [EPAR0370037]: There was no closing `}` after a escaped unicode `\\u{af🦀`." - - "Error [EPAR0370028]: Expected a closed char but found `😭😂😘`." + - "Error [EPAR0370029]: Expected valid character but found `'🦀\\`." + - "Error [EPAR0370029]: Expected valid character but found `'😭😂`." diff --git a/tests/parser/expression/literal/char_fail.leo b/tests/parser/expression/literal/char_fail.leo index c5c0b94eb3..12dfdec1f4 100644 --- a/tests/parser/expression/literal/char_fail.leo +++ b/tests/parser/expression/literal/char_fail.leo @@ -45,5 +45,6 @@ expectation: Fail '\u9999999' '\u{110000}' '\u{af🦀' +'🦀\n' '😭😂😘' From 060c13dd8dea8774e3f7e8d5a1e5be4d3c70abcf Mon Sep 17 00:00:00 2001 From: gluax <16431709+gluax@users.noreply.github.com> Date: Fri, 4 Mar 2022 11:21:42 -0800 Subject: [PATCH 7/7] clippy fix --- .../ast-passes/src/canonicalization/canonicalizer.rs | 10 ++++------ compiler/parser/src/tokenizer/mod.rs | 2 +- tests/test-framework/src/fetch.rs | 2 +- tests/test-framework/src/runner.rs | 2 +- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/compiler/ast-passes/src/canonicalization/canonicalizer.rs b/compiler/ast-passes/src/canonicalization/canonicalizer.rs index 4c42d32407..592107d434 100644 --- a/compiler/ast-passes/src/canonicalization/canonicalizer.rs +++ b/compiler/ast-passes/src/canonicalization/canonicalizer.rs @@ -550,9 +550,7 @@ impl ReconstructingReducer for Canonicalizer { for (index, character) in string.iter().enumerate() { let col_start = span.col_start + index + 1 + col_adder; // account for open quote let bytes = span.content.clone().into_bytes(); - let col_stop: usize; - - if bytes[col_start - 1] == b'\\' { + let col_stop = if bytes[col_start - 1] == b'\\' { let mut width = 0; match bytes[col_start] { @@ -569,10 +567,10 @@ impl ReconstructingReducer for Canonicalizer { _ => width += 1, } col_adder += width; - col_stop = col_start + 1 + width; + col_start + 1 + width } else { - col_stop = col_start + 1; - } + col_start + 1 + }; elements.push(SpreadOrExpression::Expression(Expression::Value( ValueExpression::Char(CharValue { diff --git a/compiler/parser/src/tokenizer/mod.rs b/compiler/parser/src/tokenizer/mod.rs index f48b3222e4..81d674b0b1 100644 --- a/compiler/parser/src/tokenizer/mod.rs +++ b/compiler/parser/src/tokenizer/mod.rs @@ -56,7 +56,7 @@ pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result>(path: T, out: &mut Vec<(String, String)>) { if entry.is_dir() { find_tests(entry.as_path(), out); continue; - } else if entry.extension().map(|x| x.to_str()).flatten().unwrap_or_default() != "leo" { + } else if entry.extension().and_then(|x| x.to_str()).unwrap_or_default() != "leo" { continue; } let content = fs::read_to_string(entry.as_path()).expect("failed to read test"); diff --git a/tests/test-framework/src/runner.rs b/tests/test-framework/src/runner.rs index b2d49394c3..3865e2a2e3 100644 --- a/tests/test-framework/src/runner.rs +++ b/tests/test-framework/src/runner.rs @@ -147,7 +147,7 @@ pub fn run_tests(runner: &T, expectation_category: &str) { let mut expected_output = expectations.as_ref().map(|x| x.outputs.iter()); for (i, test) in tests.into_iter().enumerate() { - let expected_output = expected_output.as_mut().map(|x| x.next()).flatten().cloned(); + let expected_output = expected_output.as_mut().and_then(|x| x.next()).cloned(); println!("running test {} @ '{}'", test_name, path.to_str().unwrap()); let output = namespace.run_test(Test { name: test_name.clone(),