diff --git a/compiler/parser/src/test.rs b/compiler/parser/src/test.rs index 12bdf89074..3e91f5d7f9 100644 --- a/compiler/parser/src/test.rs +++ b/compiler/parser/src/test.rs @@ -259,7 +259,6 @@ impl Runner for TestRunner { } } -#[test] pub fn parser_tests() { leo_test_framework::run_tests(&TestRunner, "parser"); } diff --git a/compiler/parser/src/tokenizer/lexer.rs b/compiler/parser/src/tokenizer/lexer.rs index 9aae8388d1..ded4dc2aef 100644 --- a/compiler/parser/src/tokenizer/lexer.rs +++ b/compiler/parser/src/tokenizer/lexer.rs @@ -35,7 +35,7 @@ fn eat_identifier(input: &mut Peekable>) -> Option>) -> Result<(usize, Token)> { - let mut int = String::from(lead); - - match input.peek() { - None => return Err(ParserError::lexer_empty_input_tendril().into()), - Some(c) if !c.is_ascii_digit() => return Err(ParserError::lexer_eat_integer_leading_zero(c).into()), - _ => {} + fn eat_integer(input: &mut Peekable>) -> Result<(usize, Token)> { + if input.peek().is_none() { + return Err(ParserError::lexer_empty_input_tendril().into()); } + let mut int = String::new(); while let Some(c) = input.next_if(|c| c.is_ascii_digit()) { if c == '0' && matches!(input.peek(), Some('x')) { int.push(c); @@ -183,30 +180,78 @@ impl Token { let mut input = input_tendril.chars().peekable(); - match input.next() { - Some(x) if x.is_ascii_whitespace() => return Ok((1, Token::WhiteSpace)), - Some(lead) if lead.is_ascii_digit() => { - return Self::eat_integer(lead, &mut input); + match input.peek() { + Some(x) if x.is_ascii_whitespace() => { + input.next(); + return Ok((1, Token::WhiteSpace)); + } + Some('"') => { + let mut string = Vec::new(); + input.next(); + + while let Some(c) = input.next_if(|c| c != &'"') { + let character = leo_ast::Char::Scalar(c); + string.push(character); + } + + if input.next_if_eq(&'"').is_some() { + return Ok((string.len() + 2, Token::StringLit(string))); + } + + return Err(ParserError::lexer_string_not_closed(string).into()); + } + Some('\'') => { + input.next(); + + if let Some(c) = input.next() { + dbg!(&c); + if input.next_if_eq(&'\'').is_some() { + input.next(); + return Ok((c.len_utf8() + 2, Token::CharLit(Char::Scalar(c)))); + } else if let Some(c) = input.next() { + return Err(ParserError::lexer_string_not_closed(c).into()); + } else { + return Err(ParserError::lexer_empty_input_tendril().into()); + } + } + + return Err(ParserError::lexer_empty_input_tendril().into()); + } + Some(x) if x.is_ascii_digit() => { + return Self::eat_integer(&mut input); } Some('!') => { + input.next(); if input.next_if_eq(&'=').is_some() { return Ok((2, Token::NotEq)); } return Ok((1, Token::Not)); } Some('?') => { + input.next(); return Ok((1, Token::Question)); } Some('&') => { + input.next(); if input.next_if_eq(&'&').is_some() { return Ok((2, Token::And)); } return Ok((1, Token::Ampersand)); } - Some('(') => return Ok((1, Token::LeftParen)), - Some(')') => return Ok((1, Token::RightParen)), - Some('_') => return Ok((1, Token::Underscore)), + Some('(') => { + input.next(); + return Ok((1, Token::LeftParen)); + } + Some(')') => { + input.next(); + return Ok((1, Token::RightParen)); + } + Some('_') => { + input.next(); + return Ok((1, Token::Underscore)); + } Some('*') => { + input.next(); if input.next_if_eq(&'*').is_some() { if input.next_if_eq(&'=').is_some() { return Ok((3, Token::ExpEq)); @@ -218,13 +263,18 @@ impl Token { return Ok((1, Token::Mul)); } Some('+') => { + input.next(); if input.next_if_eq(&'=').is_some() { return Ok((2, Token::AddEq)); } return Ok((1, Token::Add)); } - Some(',') => return Ok((1, Token::Comma)), + Some(',') => { + input.next(); + return Ok((1, Token::Comma)); + } Some('-') => { + input.next(); if input.next_if_eq(&'>').is_some() { return Ok((2, Token::Arrow)); } else if input.next_if_eq(&'=').is_some() { @@ -233,6 +283,7 @@ impl Token { return Ok((1, Token::Minus)); } Some('.') => { + input.next(); if input.next_if_eq(&'.').is_some() { if input.next_if_eq(&'.').is_some() { return Ok((3, Token::DotDotDot)); @@ -242,8 +293,9 @@ impl Token { } return Ok((1, Token::Dot)); } - Some(c) if c == '/' => { - let mut comment = String::from(c); + Some(c) if c == &'/' => { + let mut comment = String::from(*c); + input.next(); if let Some(c) = input.next_if_eq(&'/') { comment.push(c); @@ -251,7 +303,8 @@ impl Token { comment.push(c); } - if input.next_if_eq(&'\n').is_some() { + if let Some(newline) = input.next_if_eq(&'\n') { + comment.push(newline); return Ok((comment.len() + 1, Token::CommentLine(comment))); } @@ -283,37 +336,60 @@ impl Token { return Ok((1, Token::Div)); } Some(':') => { + input.next(); if input.next_if_eq(&':').is_some() { return Ok((2, Token::DoubleColon)); } else { return Ok((1, Token::Colon)); } } - Some(';') => return Ok((1, Token::Semicolon)), + Some(';') => { + input.next(); + return Ok((1, Token::Semicolon)); + } Some('<') => { + input.next(); if input.next_if_eq(&'=').is_some() { return Ok((2, Token::LtEq)); } return Ok((1, Token::Lt)); } Some('>') => { + input.next(); if input.next_if_eq(&'=').is_some() { return Ok((2, Token::GtEq)); } return Ok((1, Token::Gt)); } Some('=') => { + input.next(); if input.next_if_eq(&'=').is_some() { return Ok((2, Token::Eq)); } return Ok((1, Token::Assign)); } - Some('@') => return Ok((1, Token::At)), - Some('[') => return Ok((1, Token::LeftSquare)), - Some(']') => return Ok((1, Token::RightSquare)), - Some('{') => return Ok((1, Token::LeftCurly)), - Some('}') => return Ok((1, Token::RightCurly)), + Some('@') => { + input.next(); + return Ok((1, Token::At)); + } + Some('[') => { + input.next(); + return Ok((1, Token::LeftSquare)); + } + Some(']') => { + input.next(); + return Ok((1, Token::RightSquare)); + } + Some('{') => { + input.next(); + return Ok((1, Token::LeftCurly)); + } + Some('}') => { + input.next(); + return Ok((1, Token::RightCurly)); + } Some('|') => { + input.next(); if input.next_if_eq(&'|').is_some() { return Ok((2, Token::Or)); } else if let Some(found) = input.next() { diff --git a/compiler/parser/src/tokenizer/mod.rs b/compiler/parser/src/tokenizer/mod.rs index ec76528322..919c883fd6 100644 --- a/compiler/parser/src/tokenizer/mod.rs +++ b/compiler/parser/src/tokenizer/mod.rs @@ -39,7 +39,7 @@ pub(crate) fn tokenize(path: &str, input: &str) -> Result> { let mut line_no = 1usize; let mut line_start = 0usize; while input.len() > index { - match Token::eat(&input[index..(input.len() - index)])? { + match Token::eat(&input[index..input.len()])? { (token_len, Token::WhiteSpace) => { if token_len == 0 && index == input.len() { break; @@ -52,7 +52,12 @@ pub(crate) fn tokenize(path: &str, input: &str) -> Result> { index - line_start + 1, index - line_start + 2, path, - input[line_start..input[line_start..].find('\n').unwrap_or(input.len())].to_string(), + input[line_start + ..input[line_start..] + .find('\n') + .map(|i| i + line_start) + .unwrap_or(input.len())] + .to_string(), ), ) .into()); @@ -78,7 +83,12 @@ pub(crate) fn tokenize(path: &str, input: &str) -> Result> { index - line_start + 1, index - line_start + token_len + 1, path.clone(), - input[line_start..input[line_start..].find('\n').unwrap_or(input.len() - line_start)].to_string(), + input[line_start + ..input[line_start..] + .find('\n') + .map(|i| i + line_start) + .unwrap_or(input.len())] + .to_string(), ); match &token { Token::CommentLine(_) => { @@ -121,6 +131,12 @@ mod tests { let tokens = tokenize( "test_path", r#" + 'a' + '😭' + '\u{10001F}' + '\x7f' + '\x00' + '\x37' "test" "test{}test" "test{}" @@ -219,7 +235,7 @@ mod tests { assert_eq!( output, - r#""test" "test{}test" "test{}" "{}test" "test{" "test}" "test{test" "test}test" "te{{}}" aleo1qnr4dkkvkgfqph0vzc3y6z2eu975wnpz2925ntjccd5cfqxtyu8sta57j8 test_ident 12345 address as bool circuit const else false field for function group i128 i64 i32 i16 i8 if import in input let mut & return static string test true u128 u64 u32 u16 u8 self Self console ! != && ( ) * ** **= *= + += , - -= -> _ . .. ... / /= : :: ; < <= = == > >= @ [ ] { { } } || ? // test + r#"'a' '😭' '\u{10001F}' "test" "test{}test" "test{}" "{}test" "test{" "test}" "test{test" "test}test" "te{{}}" aleo1qnr4dkkvkgfqph0vzc3y6z2eu975wnpz2925ntjccd5cfqxtyu8sta57j8 test_ident 12345 address as bool circuit const else false field for function group i128 i64 i32 i16 i8 if import in input let mut & return static string test true u128 u64 u32 u16 u8 self Self console ! != && ( ) * ** **= *= + += , - -= -> _ . .. ... / /= : :: ; < <= = == > >= @ [ ] { { } } || ? // test /* test */ // "# ); }); diff --git a/compiler/parser/src/tokenizer/token.rs b/compiler/parser/src/tokenizer/token.rs index 22afe2635d..a9c3e17c86 100644 --- a/compiler/parser/src/tokenizer/token.rs +++ b/compiler/parser/src/tokenizer/token.rs @@ -258,7 +258,7 @@ impl fmt::Display for Token { True => write!(f, "true"), False => write!(f, "false"), AddressLit(s) => write!(f, "{}", s), - CharLit(s) => write!(f, "{}", s), + CharLit(s) => write!(f, "'{}'", s), WhiteSpace => write!(f, "whitespace"), At => write!(f, "@"), diff --git a/compiler/parser/tests/serialization/json.rs b/compiler/parser/tests/serialization/json.rs deleted file mode 100644 index b0fcd85eef..0000000000 --- a/compiler/parser/tests/serialization/json.rs +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright (C) 2019-2022 Aleo Systems Inc. -// This file is part of the Leo library. - -// The Leo library is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -// The Leo library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. - -// You should have received a copy of the GNU General Public License -// along with the Leo library. If not, see . - -use leo_ast::Ast; -#[cfg(not(feature = "ci_skip"))] -use leo_ast::Program; -use leo_errors::{emitter::Handler, LeoError, Result}; - -use std::fs::File; -use std::io::BufReader; -use std::iter::Iterator; -use std::path::{Path, PathBuf}; - -fn to_ast(program_filepath: &Path) -> Result { - let program_string = std::fs::read_to_string(program_filepath).expect("failed to open test"); - - // Parses the Leo file and constructs a leo ast. - leo_parser::parse_ast(&Handler::default(), "", &program_string) -} - -fn setup() { - std::env::set_var("LEO_TESTFRAMEWORK", "true"); -} - -fn clean() { - std::env::remove_var("LEO_TESTFRAMEWORK"); -} - -#[test] -#[cfg(not(feature = "ci_skip"))] -fn test_serialize() { - setup(); - - // Construct an ast from the given test file. - let ast = { - let mut program_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - program_filepath.push("tests/serialization/leo/one_plus_one.leo"); - - to_ast(&program_filepath).unwrap() - }; - - // Serializes the ast into JSON format. - let serialized_ast: Program = serde_json::from_value(serde_json::to_value(ast.as_repr()).unwrap()).unwrap(); - - // Load the expected ast. - let expected: Program = serde_json::from_str(include_str!("./expected_leo_ast/one_plus_one.json")).unwrap(); - - clean(); - assert_eq!(expected, serialized_ast); -} - -#[test] -#[cfg(not(feature = "ci_skip"))] -fn test_serialize_no_span() { - setup(); - - let program_paths = vec![ - "tests/serialization/leo/linear_regression.leo", - "tests/serialization/leo/palindrome.leo", - "tests/serialization/leo/pedersen_hash.leo", - "tests/serialization/leo/silly_sudoku.leo", - ]; - - let json_paths = vec![ - "tests/serialization/expected_leo_ast/linear_regression.json", - "tests/serialization/expected_leo_ast/palindrome.json", - "tests/serialization/expected_leo_ast/pedersen_hash.json", - "tests/serialization/expected_leo_ast/silly_sudoku.json", - ]; - - for (program_path, json_path) in program_paths.into_iter().zip(json_paths) { - // Construct an ast from the given test file. - let ast = { - let mut program_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - program_filepath.push(program_path); - to_ast(&program_filepath).unwrap() - }; - - let json_reader = { - let mut json_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - json_filepath.push(json_path); - let file = File::open(json_filepath).expect("Failed to read expected ast file"); - BufReader::new(file) - }; - - // Serializes the ast into JSON format. - let mut serialized_ast: serde_json::Value = serde_json::to_value(ast.as_repr()).unwrap(); - remove_key_from_json(&mut serialized_ast, "span"); - serialized_ast = normalize_json_value(serialized_ast); - - // Load the expected ast. - let expected: serde_json::Value = serde_json::from_reader(json_reader).unwrap(); - - assert_eq!(expected, serialized_ast); - } - clean(); -} - -// Helper functions to recursively filter keys from AST JSON. -// Redeclaring here since we don't want to make this public. -fn remove_key_from_json(value: &mut serde_json::Value, key: &str) { - match value { - serde_json::value::Value::Object(map) => { - map.remove(key); - for val in map.values_mut() { - remove_key_from_json(val, key); - } - } - serde_json::value::Value::Array(values) => { - for val in values.iter_mut() { - remove_key_from_json(val, key); - } - } - _ => (), - } -} - -// Helper function to normalize AST -// Redeclaring here because we don't want to make this public -fn normalize_json_value(value: serde_json::Value) -> serde_json::Value { - match value { - serde_json::Value::Array(vec) => { - let orig_length = vec.len(); - let mut new_vec: Vec = vec - .into_iter() - .filter(|v| !matches!(v, serde_json::Value::Object(map) if map.is_empty())) - .map(normalize_json_value) - .collect(); - - if orig_length == 2 && new_vec.len() == 1 { - new_vec.pop().unwrap() - } else { - serde_json::Value::Array(new_vec) - } - } - serde_json::Value::Object(map) => { - serde_json::Value::Object(map.into_iter().map(|(k, v)| (k, normalize_json_value(v))).collect()) - } - _ => value, - } -} - -// TODO Renable when we don't write spans to snapshots. -/* #[test] -#[cfg(not(feature = "ci_skip"))] -fn test_deserialize() { - setup(); - - // Load the expected ast. - let expected_ast = { - let mut program_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - program_filepath.push("tests/serialization/main.leo"); - - to_ast(&program_filepath).unwrap() - }; - - // Construct an ast by deserializing a ast JSON file. - let serialized_ast = include_str!("expected_leo_ast.json"); - let ast = Ast::from_json_string(serialized_ast).unwrap(); - - clean(); - assert_eq!(expected_ast, ast); -} - -#[test] -fn test_serialize_deserialize_serialize() { - setup(); - - // Construct an ast from the given test file. - let ast = { - let mut program_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - program_filepath.push("tests/serialization/main.leo"); - - to_ast(&program_filepath).unwrap() - }; - - // Serializes the ast into JSON format. - let serialized_ast = ast.to_json_string().unwrap(); - - // Deserializes the serialized ast into an ast. - let ast = Ast::from_json_string(&serialized_ast).unwrap(); - - // Reserializes the ast into JSON format. - let reserialized_ast = ast.to_json_string().unwrap(); - - clean(); - assert_eq!(serialized_ast, reserialized_ast); -} */ - -#[test] -fn test_generic_parser_error() { - setup(); - - let error_result = { - let mut program_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - program_filepath.push("tests/serialization/leo/parser_error.leo"); - - to_ast(&program_filepath) - } - .map_err(|err| matches!(err, LeoError::ParserError(_))); - - clean(); - assert!(error_result.err().unwrap()); -} diff --git a/leo/errors/src/parser/parser_errors.rs b/leo/errors/src/parser/parser_errors.rs index 9b02ce6979..215cfd51f9 100644 --- a/leo/errors/src/parser/parser_errors.rs +++ b/leo/errors/src/parser/parser_errors.rs @@ -251,8 +251,8 @@ create_errors!( /// When a string is not properly closed. @backtraced lexer_string_not_closed { - args: (input: impl Display), - msg: format!("Expected a closed string but found `{}`.", input), + args: (input: impl Debug), + msg: format!("Expected a closed string but found `{:?}`.", input), help: None, }