tokenizing almost working

This commit is contained in:
gluax 2022-03-15 17:13:02 -07:00
parent 3d1cc9a735
commit 5034294d09
6 changed files with 124 additions and 250 deletions

View File

@ -259,7 +259,6 @@ impl Runner for TestRunner {
}
}
#[test]
pub fn parser_tests() {
leo_test_framework::run_tests(&TestRunner, "parser");
}

View File

@ -35,7 +35,7 @@ fn eat_identifier(input: &mut Peekable<impl Iterator<Item = char>>) -> Option<St
}
let mut ident = String::new();
while let Some(c) = input.next_if(|c| c.is_ascii_alphabetic()) {
while let Some(c) = input.next_if(|c| c.is_ascii_alphanumeric() || c == &'_') {
ident.push(c);
}
Some(ident)
@ -133,15 +133,12 @@ impl Token {
/// Returns a tuple: [(integer length, integer token)] if an integer can be eaten, otherwise returns [`None`].
/// An integer can be eaten if its bytes are at the front of the given `input_tendril` string.
///
fn eat_integer(lead: char, input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Token)> {
let mut int = String::from(lead);
match input.peek() {
None => return Err(ParserError::lexer_empty_input_tendril().into()),
Some(c) if !c.is_ascii_digit() => return Err(ParserError::lexer_eat_integer_leading_zero(c).into()),
_ => {}
fn eat_integer(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Token)> {
if input.peek().is_none() {
return Err(ParserError::lexer_empty_input_tendril().into());
}
let mut int = String::new();
while let Some(c) = input.next_if(|c| c.is_ascii_digit()) {
if c == '0' && matches!(input.peek(), Some('x')) {
int.push(c);
@ -183,30 +180,78 @@ impl Token {
let mut input = input_tendril.chars().peekable();
match input.next() {
Some(x) if x.is_ascii_whitespace() => return Ok((1, Token::WhiteSpace)),
Some(lead) if lead.is_ascii_digit() => {
return Self::eat_integer(lead, &mut input);
match input.peek() {
Some(x) if x.is_ascii_whitespace() => {
input.next();
return Ok((1, Token::WhiteSpace));
}
Some('"') => {
let mut string = Vec::new();
input.next();
while let Some(c) = input.next_if(|c| c != &'"') {
let character = leo_ast::Char::Scalar(c);
string.push(character);
}
if input.next_if_eq(&'"').is_some() {
return Ok((string.len() + 2, Token::StringLit(string)));
}
return Err(ParserError::lexer_string_not_closed(string).into());
}
Some('\'') => {
input.next();
if let Some(c) = input.next() {
dbg!(&c);
if input.next_if_eq(&'\'').is_some() {
input.next();
return Ok((c.len_utf8() + 2, Token::CharLit(Char::Scalar(c))));
} else if let Some(c) = input.next() {
return Err(ParserError::lexer_string_not_closed(c).into());
} else {
return Err(ParserError::lexer_empty_input_tendril().into());
}
}
return Err(ParserError::lexer_empty_input_tendril().into());
}
Some(x) if x.is_ascii_digit() => {
return Self::eat_integer(&mut input);
}
Some('!') => {
input.next();
if input.next_if_eq(&'=').is_some() {
return Ok((2, Token::NotEq));
}
return Ok((1, Token::Not));
}
Some('?') => {
input.next();
return Ok((1, Token::Question));
}
Some('&') => {
input.next();
if input.next_if_eq(&'&').is_some() {
return Ok((2, Token::And));
}
return Ok((1, Token::Ampersand));
}
Some('(') => return Ok((1, Token::LeftParen)),
Some(')') => return Ok((1, Token::RightParen)),
Some('_') => return Ok((1, Token::Underscore)),
Some('(') => {
input.next();
return Ok((1, Token::LeftParen));
}
Some(')') => {
input.next();
return Ok((1, Token::RightParen));
}
Some('_') => {
input.next();
return Ok((1, Token::Underscore));
}
Some('*') => {
input.next();
if input.next_if_eq(&'*').is_some() {
if input.next_if_eq(&'=').is_some() {
return Ok((3, Token::ExpEq));
@ -218,13 +263,18 @@ impl Token {
return Ok((1, Token::Mul));
}
Some('+') => {
input.next();
if input.next_if_eq(&'=').is_some() {
return Ok((2, Token::AddEq));
}
return Ok((1, Token::Add));
}
Some(',') => return Ok((1, Token::Comma)),
Some(',') => {
input.next();
return Ok((1, Token::Comma));
}
Some('-') => {
input.next();
if input.next_if_eq(&'>').is_some() {
return Ok((2, Token::Arrow));
} else if input.next_if_eq(&'=').is_some() {
@ -233,6 +283,7 @@ impl Token {
return Ok((1, Token::Minus));
}
Some('.') => {
input.next();
if input.next_if_eq(&'.').is_some() {
if input.next_if_eq(&'.').is_some() {
return Ok((3, Token::DotDotDot));
@ -242,8 +293,9 @@ impl Token {
}
return Ok((1, Token::Dot));
}
Some(c) if c == '/' => {
let mut comment = String::from(c);
Some(c) if c == &'/' => {
let mut comment = String::from(*c);
input.next();
if let Some(c) = input.next_if_eq(&'/') {
comment.push(c);
@ -251,7 +303,8 @@ impl Token {
comment.push(c);
}
if input.next_if_eq(&'\n').is_some() {
if let Some(newline) = input.next_if_eq(&'\n') {
comment.push(newline);
return Ok((comment.len() + 1, Token::CommentLine(comment)));
}
@ -283,37 +336,60 @@ impl Token {
return Ok((1, Token::Div));
}
Some(':') => {
input.next();
if input.next_if_eq(&':').is_some() {
return Ok((2, Token::DoubleColon));
} else {
return Ok((1, Token::Colon));
}
}
Some(';') => return Ok((1, Token::Semicolon)),
Some(';') => {
input.next();
return Ok((1, Token::Semicolon));
}
Some('<') => {
input.next();
if input.next_if_eq(&'=').is_some() {
return Ok((2, Token::LtEq));
}
return Ok((1, Token::Lt));
}
Some('>') => {
input.next();
if input.next_if_eq(&'=').is_some() {
return Ok((2, Token::GtEq));
}
return Ok((1, Token::Gt));
}
Some('=') => {
input.next();
if input.next_if_eq(&'=').is_some() {
return Ok((2, Token::Eq));
}
return Ok((1, Token::Assign));
}
Some('@') => return Ok((1, Token::At)),
Some('[') => return Ok((1, Token::LeftSquare)),
Some(']') => return Ok((1, Token::RightSquare)),
Some('{') => return Ok((1, Token::LeftCurly)),
Some('}') => return Ok((1, Token::RightCurly)),
Some('@') => {
input.next();
return Ok((1, Token::At));
}
Some('[') => {
input.next();
return Ok((1, Token::LeftSquare));
}
Some(']') => {
input.next();
return Ok((1, Token::RightSquare));
}
Some('{') => {
input.next();
return Ok((1, Token::LeftCurly));
}
Some('}') => {
input.next();
return Ok((1, Token::RightCurly));
}
Some('|') => {
input.next();
if input.next_if_eq(&'|').is_some() {
return Ok((2, Token::Or));
} else if let Some(found) = input.next() {

View File

@ -39,7 +39,7 @@ pub(crate) fn tokenize(path: &str, input: &str) -> Result<Vec<SpannedToken>> {
let mut line_no = 1usize;
let mut line_start = 0usize;
while input.len() > index {
match Token::eat(&input[index..(input.len() - index)])? {
match Token::eat(&input[index..input.len()])? {
(token_len, Token::WhiteSpace) => {
if token_len == 0 && index == input.len() {
break;
@ -52,7 +52,12 @@ pub(crate) fn tokenize(path: &str, input: &str) -> Result<Vec<SpannedToken>> {
index - line_start + 1,
index - line_start + 2,
path,
input[line_start..input[line_start..].find('\n').unwrap_or(input.len())].to_string(),
input[line_start
..input[line_start..]
.find('\n')
.map(|i| i + line_start)
.unwrap_or(input.len())]
.to_string(),
),
)
.into());
@ -78,7 +83,12 @@ pub(crate) fn tokenize(path: &str, input: &str) -> Result<Vec<SpannedToken>> {
index - line_start + 1,
index - line_start + token_len + 1,
path.clone(),
input[line_start..input[line_start..].find('\n').unwrap_or(input.len() - line_start)].to_string(),
input[line_start
..input[line_start..]
.find('\n')
.map(|i| i + line_start)
.unwrap_or(input.len())]
.to_string(),
);
match &token {
Token::CommentLine(_) => {
@ -121,6 +131,12 @@ mod tests {
let tokens = tokenize(
"test_path",
r#"
'a'
'😭'
'\u{10001F}'
'\x7f'
'\x00'
'\x37'
"test"
"test{}test"
"test{}"
@ -219,7 +235,7 @@ mod tests {
assert_eq!(
output,
r#""test" "test{}test" "test{}" "{}test" "test{" "test}" "test{test" "test}test" "te{{}}" aleo1qnr4dkkvkgfqph0vzc3y6z2eu975wnpz2925ntjccd5cfqxtyu8sta57j8 test_ident 12345 address as bool circuit const else false field for function group i128 i64 i32 i16 i8 if import in input let mut & return static string test true u128 u64 u32 u16 u8 self Self console ! != && ( ) * ** **= *= + += , - -= -> _ . .. ... / /= : :: ; < <= = == > >= @ [ ] { { } } || ? // test
r#"'a' '😭' '\u{10001F}' "test" "test{}test" "test{}" "{}test" "test{" "test}" "test{test" "test}test" "te{{}}" aleo1qnr4dkkvkgfqph0vzc3y6z2eu975wnpz2925ntjccd5cfqxtyu8sta57j8 test_ident 12345 address as bool circuit const else false field for function group i128 i64 i32 i16 i8 if import in input let mut & return static string test true u128 u64 u32 u16 u8 self Self console ! != && ( ) * ** **= *= + += , - -= -> _ . .. ... / /= : :: ; < <= = == > >= @ [ ] { { } } || ? // test
/* test */ // "#
);
});

View File

@ -258,7 +258,7 @@ impl fmt::Display for Token {
True => write!(f, "true"),
False => write!(f, "false"),
AddressLit(s) => write!(f, "{}", s),
CharLit(s) => write!(f, "{}", s),
CharLit(s) => write!(f, "'{}'", s),
WhiteSpace => write!(f, "whitespace"),
At => write!(f, "@"),

View File

@ -1,217 +0,0 @@
// Copyright (C) 2019-2022 Aleo Systems Inc.
// This file is part of the Leo library.
// The Leo library is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// The Leo library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
use leo_ast::Ast;
#[cfg(not(feature = "ci_skip"))]
use leo_ast::Program;
use leo_errors::{emitter::Handler, LeoError, Result};
use std::fs::File;
use std::io::BufReader;
use std::iter::Iterator;
use std::path::{Path, PathBuf};
fn to_ast(program_filepath: &Path) -> Result<Ast> {
let program_string = std::fs::read_to_string(program_filepath).expect("failed to open test");
// Parses the Leo file and constructs a leo ast.
leo_parser::parse_ast(&Handler::default(), "", &program_string)
}
fn setup() {
std::env::set_var("LEO_TESTFRAMEWORK", "true");
}
fn clean() {
std::env::remove_var("LEO_TESTFRAMEWORK");
}
#[test]
#[cfg(not(feature = "ci_skip"))]
fn test_serialize() {
setup();
// Construct an ast from the given test file.
let ast = {
let mut program_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
program_filepath.push("tests/serialization/leo/one_plus_one.leo");
to_ast(&program_filepath).unwrap()
};
// Serializes the ast into JSON format.
let serialized_ast: Program = serde_json::from_value(serde_json::to_value(ast.as_repr()).unwrap()).unwrap();
// Load the expected ast.
let expected: Program = serde_json::from_str(include_str!("./expected_leo_ast/one_plus_one.json")).unwrap();
clean();
assert_eq!(expected, serialized_ast);
}
#[test]
#[cfg(not(feature = "ci_skip"))]
fn test_serialize_no_span() {
setup();
let program_paths = vec![
"tests/serialization/leo/linear_regression.leo",
"tests/serialization/leo/palindrome.leo",
"tests/serialization/leo/pedersen_hash.leo",
"tests/serialization/leo/silly_sudoku.leo",
];
let json_paths = vec![
"tests/serialization/expected_leo_ast/linear_regression.json",
"tests/serialization/expected_leo_ast/palindrome.json",
"tests/serialization/expected_leo_ast/pedersen_hash.json",
"tests/serialization/expected_leo_ast/silly_sudoku.json",
];
for (program_path, json_path) in program_paths.into_iter().zip(json_paths) {
// Construct an ast from the given test file.
let ast = {
let mut program_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
program_filepath.push(program_path);
to_ast(&program_filepath).unwrap()
};
let json_reader = {
let mut json_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
json_filepath.push(json_path);
let file = File::open(json_filepath).expect("Failed to read expected ast file");
BufReader::new(file)
};
// Serializes the ast into JSON format.
let mut serialized_ast: serde_json::Value = serde_json::to_value(ast.as_repr()).unwrap();
remove_key_from_json(&mut serialized_ast, "span");
serialized_ast = normalize_json_value(serialized_ast);
// Load the expected ast.
let expected: serde_json::Value = serde_json::from_reader(json_reader).unwrap();
assert_eq!(expected, serialized_ast);
}
clean();
}
// Helper functions to recursively filter keys from AST JSON.
// Redeclaring here since we don't want to make this public.
fn remove_key_from_json(value: &mut serde_json::Value, key: &str) {
match value {
serde_json::value::Value::Object(map) => {
map.remove(key);
for val in map.values_mut() {
remove_key_from_json(val, key);
}
}
serde_json::value::Value::Array(values) => {
for val in values.iter_mut() {
remove_key_from_json(val, key);
}
}
_ => (),
}
}
// Helper function to normalize AST
// Redeclaring here because we don't want to make this public
fn normalize_json_value(value: serde_json::Value) -> serde_json::Value {
match value {
serde_json::Value::Array(vec) => {
let orig_length = vec.len();
let mut new_vec: Vec<serde_json::Value> = vec
.into_iter()
.filter(|v| !matches!(v, serde_json::Value::Object(map) if map.is_empty()))
.map(normalize_json_value)
.collect();
if orig_length == 2 && new_vec.len() == 1 {
new_vec.pop().unwrap()
} else {
serde_json::Value::Array(new_vec)
}
}
serde_json::Value::Object(map) => {
serde_json::Value::Object(map.into_iter().map(|(k, v)| (k, normalize_json_value(v))).collect())
}
_ => value,
}
}
// TODO Renable when we don't write spans to snapshots.
/* #[test]
#[cfg(not(feature = "ci_skip"))]
fn test_deserialize() {
setup();
// Load the expected ast.
let expected_ast = {
let mut program_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
program_filepath.push("tests/serialization/main.leo");
to_ast(&program_filepath).unwrap()
};
// Construct an ast by deserializing a ast JSON file.
let serialized_ast = include_str!("expected_leo_ast.json");
let ast = Ast::from_json_string(serialized_ast).unwrap();
clean();
assert_eq!(expected_ast, ast);
}
#[test]
fn test_serialize_deserialize_serialize() {
setup();
// Construct an ast from the given test file.
let ast = {
let mut program_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
program_filepath.push("tests/serialization/main.leo");
to_ast(&program_filepath).unwrap()
};
// Serializes the ast into JSON format.
let serialized_ast = ast.to_json_string().unwrap();
// Deserializes the serialized ast into an ast.
let ast = Ast::from_json_string(&serialized_ast).unwrap();
// Reserializes the ast into JSON format.
let reserialized_ast = ast.to_json_string().unwrap();
clean();
assert_eq!(serialized_ast, reserialized_ast);
} */
#[test]
fn test_generic_parser_error() {
setup();
let error_result = {
let mut program_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
program_filepath.push("tests/serialization/leo/parser_error.leo");
to_ast(&program_filepath)
}
.map_err(|err| matches!(err, LeoError::ParserError(_)));
clean();
assert!(error_result.err().unwrap());
}

View File

@ -251,8 +251,8 @@ create_errors!(
/// When a string is not properly closed.
@backtraced
lexer_string_not_closed {
args: (input: impl Display),
msg: format!("Expected a closed string but found `{}`.", input),
args: (input: impl Debug),
msg: format!("Expected a closed string but found `{:?}`.", input),
help: None,
}