mirror of
https://github.com/ProvableHQ/leo.git
synced 2024-11-24 07:48:04 +03:00
tokenizing almost working
This commit is contained in:
parent
3d1cc9a735
commit
5034294d09
@ -259,7 +259,6 @@ impl Runner for TestRunner {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn parser_tests() {
|
||||
leo_test_framework::run_tests(&TestRunner, "parser");
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ fn eat_identifier(input: &mut Peekable<impl Iterator<Item = char>>) -> Option<St
|
||||
}
|
||||
|
||||
let mut ident = String::new();
|
||||
while let Some(c) = input.next_if(|c| c.is_ascii_alphabetic()) {
|
||||
while let Some(c) = input.next_if(|c| c.is_ascii_alphanumeric() || c == &'_') {
|
||||
ident.push(c);
|
||||
}
|
||||
Some(ident)
|
||||
@ -133,15 +133,12 @@ impl Token {
|
||||
/// Returns a tuple: [(integer length, integer token)] if an integer can be eaten, otherwise returns [`None`].
|
||||
/// An integer can be eaten if its bytes are at the front of the given `input_tendril` string.
|
||||
///
|
||||
fn eat_integer(lead: char, input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Token)> {
|
||||
let mut int = String::from(lead);
|
||||
|
||||
match input.peek() {
|
||||
None => return Err(ParserError::lexer_empty_input_tendril().into()),
|
||||
Some(c) if !c.is_ascii_digit() => return Err(ParserError::lexer_eat_integer_leading_zero(c).into()),
|
||||
_ => {}
|
||||
fn eat_integer(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Token)> {
|
||||
if input.peek().is_none() {
|
||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
||||
}
|
||||
|
||||
let mut int = String::new();
|
||||
while let Some(c) = input.next_if(|c| c.is_ascii_digit()) {
|
||||
if c == '0' && matches!(input.peek(), Some('x')) {
|
||||
int.push(c);
|
||||
@ -183,30 +180,78 @@ impl Token {
|
||||
|
||||
let mut input = input_tendril.chars().peekable();
|
||||
|
||||
match input.next() {
|
||||
Some(x) if x.is_ascii_whitespace() => return Ok((1, Token::WhiteSpace)),
|
||||
Some(lead) if lead.is_ascii_digit() => {
|
||||
return Self::eat_integer(lead, &mut input);
|
||||
match input.peek() {
|
||||
Some(x) if x.is_ascii_whitespace() => {
|
||||
input.next();
|
||||
return Ok((1, Token::WhiteSpace));
|
||||
}
|
||||
Some('"') => {
|
||||
let mut string = Vec::new();
|
||||
input.next();
|
||||
|
||||
while let Some(c) = input.next_if(|c| c != &'"') {
|
||||
let character = leo_ast::Char::Scalar(c);
|
||||
string.push(character);
|
||||
}
|
||||
|
||||
if input.next_if_eq(&'"').is_some() {
|
||||
return Ok((string.len() + 2, Token::StringLit(string)));
|
||||
}
|
||||
|
||||
return Err(ParserError::lexer_string_not_closed(string).into());
|
||||
}
|
||||
Some('\'') => {
|
||||
input.next();
|
||||
|
||||
if let Some(c) = input.next() {
|
||||
dbg!(&c);
|
||||
if input.next_if_eq(&'\'').is_some() {
|
||||
input.next();
|
||||
return Ok((c.len_utf8() + 2, Token::CharLit(Char::Scalar(c))));
|
||||
} else if let Some(c) = input.next() {
|
||||
return Err(ParserError::lexer_string_not_closed(c).into());
|
||||
} else {
|
||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
||||
}
|
||||
}
|
||||
|
||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
||||
}
|
||||
Some(x) if x.is_ascii_digit() => {
|
||||
return Self::eat_integer(&mut input);
|
||||
}
|
||||
Some('!') => {
|
||||
input.next();
|
||||
if input.next_if_eq(&'=').is_some() {
|
||||
return Ok((2, Token::NotEq));
|
||||
}
|
||||
return Ok((1, Token::Not));
|
||||
}
|
||||
Some('?') => {
|
||||
input.next();
|
||||
return Ok((1, Token::Question));
|
||||
}
|
||||
Some('&') => {
|
||||
input.next();
|
||||
if input.next_if_eq(&'&').is_some() {
|
||||
return Ok((2, Token::And));
|
||||
}
|
||||
return Ok((1, Token::Ampersand));
|
||||
}
|
||||
Some('(') => return Ok((1, Token::LeftParen)),
|
||||
Some(')') => return Ok((1, Token::RightParen)),
|
||||
Some('_') => return Ok((1, Token::Underscore)),
|
||||
Some('(') => {
|
||||
input.next();
|
||||
return Ok((1, Token::LeftParen));
|
||||
}
|
||||
Some(')') => {
|
||||
input.next();
|
||||
return Ok((1, Token::RightParen));
|
||||
}
|
||||
Some('_') => {
|
||||
input.next();
|
||||
return Ok((1, Token::Underscore));
|
||||
}
|
||||
Some('*') => {
|
||||
input.next();
|
||||
if input.next_if_eq(&'*').is_some() {
|
||||
if input.next_if_eq(&'=').is_some() {
|
||||
return Ok((3, Token::ExpEq));
|
||||
@ -218,13 +263,18 @@ impl Token {
|
||||
return Ok((1, Token::Mul));
|
||||
}
|
||||
Some('+') => {
|
||||
input.next();
|
||||
if input.next_if_eq(&'=').is_some() {
|
||||
return Ok((2, Token::AddEq));
|
||||
}
|
||||
return Ok((1, Token::Add));
|
||||
}
|
||||
Some(',') => return Ok((1, Token::Comma)),
|
||||
Some(',') => {
|
||||
input.next();
|
||||
return Ok((1, Token::Comma));
|
||||
}
|
||||
Some('-') => {
|
||||
input.next();
|
||||
if input.next_if_eq(&'>').is_some() {
|
||||
return Ok((2, Token::Arrow));
|
||||
} else if input.next_if_eq(&'=').is_some() {
|
||||
@ -233,6 +283,7 @@ impl Token {
|
||||
return Ok((1, Token::Minus));
|
||||
}
|
||||
Some('.') => {
|
||||
input.next();
|
||||
if input.next_if_eq(&'.').is_some() {
|
||||
if input.next_if_eq(&'.').is_some() {
|
||||
return Ok((3, Token::DotDotDot));
|
||||
@ -242,8 +293,9 @@ impl Token {
|
||||
}
|
||||
return Ok((1, Token::Dot));
|
||||
}
|
||||
Some(c) if c == '/' => {
|
||||
let mut comment = String::from(c);
|
||||
Some(c) if c == &'/' => {
|
||||
let mut comment = String::from(*c);
|
||||
input.next();
|
||||
if let Some(c) = input.next_if_eq(&'/') {
|
||||
comment.push(c);
|
||||
|
||||
@ -251,7 +303,8 @@ impl Token {
|
||||
comment.push(c);
|
||||
}
|
||||
|
||||
if input.next_if_eq(&'\n').is_some() {
|
||||
if let Some(newline) = input.next_if_eq(&'\n') {
|
||||
comment.push(newline);
|
||||
return Ok((comment.len() + 1, Token::CommentLine(comment)));
|
||||
}
|
||||
|
||||
@ -283,37 +336,60 @@ impl Token {
|
||||
return Ok((1, Token::Div));
|
||||
}
|
||||
Some(':') => {
|
||||
input.next();
|
||||
if input.next_if_eq(&':').is_some() {
|
||||
return Ok((2, Token::DoubleColon));
|
||||
} else {
|
||||
return Ok((1, Token::Colon));
|
||||
}
|
||||
}
|
||||
Some(';') => return Ok((1, Token::Semicolon)),
|
||||
Some(';') => {
|
||||
input.next();
|
||||
return Ok((1, Token::Semicolon));
|
||||
}
|
||||
Some('<') => {
|
||||
input.next();
|
||||
if input.next_if_eq(&'=').is_some() {
|
||||
return Ok((2, Token::LtEq));
|
||||
}
|
||||
return Ok((1, Token::Lt));
|
||||
}
|
||||
Some('>') => {
|
||||
input.next();
|
||||
if input.next_if_eq(&'=').is_some() {
|
||||
return Ok((2, Token::GtEq));
|
||||
}
|
||||
return Ok((1, Token::Gt));
|
||||
}
|
||||
Some('=') => {
|
||||
input.next();
|
||||
if input.next_if_eq(&'=').is_some() {
|
||||
return Ok((2, Token::Eq));
|
||||
}
|
||||
return Ok((1, Token::Assign));
|
||||
}
|
||||
Some('@') => return Ok((1, Token::At)),
|
||||
Some('[') => return Ok((1, Token::LeftSquare)),
|
||||
Some(']') => return Ok((1, Token::RightSquare)),
|
||||
Some('{') => return Ok((1, Token::LeftCurly)),
|
||||
Some('}') => return Ok((1, Token::RightCurly)),
|
||||
Some('@') => {
|
||||
input.next();
|
||||
return Ok((1, Token::At));
|
||||
}
|
||||
Some('[') => {
|
||||
input.next();
|
||||
return Ok((1, Token::LeftSquare));
|
||||
}
|
||||
Some(']') => {
|
||||
input.next();
|
||||
return Ok((1, Token::RightSquare));
|
||||
}
|
||||
Some('{') => {
|
||||
input.next();
|
||||
return Ok((1, Token::LeftCurly));
|
||||
}
|
||||
Some('}') => {
|
||||
input.next();
|
||||
return Ok((1, Token::RightCurly));
|
||||
}
|
||||
Some('|') => {
|
||||
input.next();
|
||||
if input.next_if_eq(&'|').is_some() {
|
||||
return Ok((2, Token::Or));
|
||||
} else if let Some(found) = input.next() {
|
||||
|
@ -39,7 +39,7 @@ pub(crate) fn tokenize(path: &str, input: &str) -> Result<Vec<SpannedToken>> {
|
||||
let mut line_no = 1usize;
|
||||
let mut line_start = 0usize;
|
||||
while input.len() > index {
|
||||
match Token::eat(&input[index..(input.len() - index)])? {
|
||||
match Token::eat(&input[index..input.len()])? {
|
||||
(token_len, Token::WhiteSpace) => {
|
||||
if token_len == 0 && index == input.len() {
|
||||
break;
|
||||
@ -52,7 +52,12 @@ pub(crate) fn tokenize(path: &str, input: &str) -> Result<Vec<SpannedToken>> {
|
||||
index - line_start + 1,
|
||||
index - line_start + 2,
|
||||
path,
|
||||
input[line_start..input[line_start..].find('\n').unwrap_or(input.len())].to_string(),
|
||||
input[line_start
|
||||
..input[line_start..]
|
||||
.find('\n')
|
||||
.map(|i| i + line_start)
|
||||
.unwrap_or(input.len())]
|
||||
.to_string(),
|
||||
),
|
||||
)
|
||||
.into());
|
||||
@ -78,7 +83,12 @@ pub(crate) fn tokenize(path: &str, input: &str) -> Result<Vec<SpannedToken>> {
|
||||
index - line_start + 1,
|
||||
index - line_start + token_len + 1,
|
||||
path.clone(),
|
||||
input[line_start..input[line_start..].find('\n').unwrap_or(input.len() - line_start)].to_string(),
|
||||
input[line_start
|
||||
..input[line_start..]
|
||||
.find('\n')
|
||||
.map(|i| i + line_start)
|
||||
.unwrap_or(input.len())]
|
||||
.to_string(),
|
||||
);
|
||||
match &token {
|
||||
Token::CommentLine(_) => {
|
||||
@ -121,6 +131,12 @@ mod tests {
|
||||
let tokens = tokenize(
|
||||
"test_path",
|
||||
r#"
|
||||
'a'
|
||||
'😭'
|
||||
'\u{10001F}'
|
||||
'\x7f'
|
||||
'\x00'
|
||||
'\x37'
|
||||
"test"
|
||||
"test{}test"
|
||||
"test{}"
|
||||
@ -219,7 +235,7 @@ mod tests {
|
||||
|
||||
assert_eq!(
|
||||
output,
|
||||
r#""test" "test{}test" "test{}" "{}test" "test{" "test}" "test{test" "test}test" "te{{}}" aleo1qnr4dkkvkgfqph0vzc3y6z2eu975wnpz2925ntjccd5cfqxtyu8sta57j8 test_ident 12345 address as bool circuit const else false field for function group i128 i64 i32 i16 i8 if import in input let mut & return static string test true u128 u64 u32 u16 u8 self Self console ! != && ( ) * ** **= *= + += , - -= -> _ . .. ... / /= : :: ; < <= = == > >= @ [ ] { { } } || ? // test
|
||||
r#"'a' '😭' '\u{10001F}' "test" "test{}test" "test{}" "{}test" "test{" "test}" "test{test" "test}test" "te{{}}" aleo1qnr4dkkvkgfqph0vzc3y6z2eu975wnpz2925ntjccd5cfqxtyu8sta57j8 test_ident 12345 address as bool circuit const else false field for function group i128 i64 i32 i16 i8 if import in input let mut & return static string test true u128 u64 u32 u16 u8 self Self console ! != && ( ) * ** **= *= + += , - -= -> _ . .. ... / /= : :: ; < <= = == > >= @ [ ] { { } } || ? // test
|
||||
/* test */ // "#
|
||||
);
|
||||
});
|
||||
|
@ -258,7 +258,7 @@ impl fmt::Display for Token {
|
||||
True => write!(f, "true"),
|
||||
False => write!(f, "false"),
|
||||
AddressLit(s) => write!(f, "{}", s),
|
||||
CharLit(s) => write!(f, "{}", s),
|
||||
CharLit(s) => write!(f, "'{}'", s),
|
||||
WhiteSpace => write!(f, "whitespace"),
|
||||
|
||||
At => write!(f, "@"),
|
||||
|
@ -1,217 +0,0 @@
|
||||
// Copyright (C) 2019-2022 Aleo Systems Inc.
|
||||
// This file is part of the Leo library.
|
||||
|
||||
// The Leo library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// The Leo library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
use leo_ast::Ast;
|
||||
#[cfg(not(feature = "ci_skip"))]
|
||||
use leo_ast::Program;
|
||||
use leo_errors::{emitter::Handler, LeoError, Result};
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::iter::Iterator;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
fn to_ast(program_filepath: &Path) -> Result<Ast> {
|
||||
let program_string = std::fs::read_to_string(program_filepath).expect("failed to open test");
|
||||
|
||||
// Parses the Leo file and constructs a leo ast.
|
||||
leo_parser::parse_ast(&Handler::default(), "", &program_string)
|
||||
}
|
||||
|
||||
fn setup() {
|
||||
std::env::set_var("LEO_TESTFRAMEWORK", "true");
|
||||
}
|
||||
|
||||
fn clean() {
|
||||
std::env::remove_var("LEO_TESTFRAMEWORK");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(not(feature = "ci_skip"))]
|
||||
fn test_serialize() {
|
||||
setup();
|
||||
|
||||
// Construct an ast from the given test file.
|
||||
let ast = {
|
||||
let mut program_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
program_filepath.push("tests/serialization/leo/one_plus_one.leo");
|
||||
|
||||
to_ast(&program_filepath).unwrap()
|
||||
};
|
||||
|
||||
// Serializes the ast into JSON format.
|
||||
let serialized_ast: Program = serde_json::from_value(serde_json::to_value(ast.as_repr()).unwrap()).unwrap();
|
||||
|
||||
// Load the expected ast.
|
||||
let expected: Program = serde_json::from_str(include_str!("./expected_leo_ast/one_plus_one.json")).unwrap();
|
||||
|
||||
clean();
|
||||
assert_eq!(expected, serialized_ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(not(feature = "ci_skip"))]
|
||||
fn test_serialize_no_span() {
|
||||
setup();
|
||||
|
||||
let program_paths = vec![
|
||||
"tests/serialization/leo/linear_regression.leo",
|
||||
"tests/serialization/leo/palindrome.leo",
|
||||
"tests/serialization/leo/pedersen_hash.leo",
|
||||
"tests/serialization/leo/silly_sudoku.leo",
|
||||
];
|
||||
|
||||
let json_paths = vec![
|
||||
"tests/serialization/expected_leo_ast/linear_regression.json",
|
||||
"tests/serialization/expected_leo_ast/palindrome.json",
|
||||
"tests/serialization/expected_leo_ast/pedersen_hash.json",
|
||||
"tests/serialization/expected_leo_ast/silly_sudoku.json",
|
||||
];
|
||||
|
||||
for (program_path, json_path) in program_paths.into_iter().zip(json_paths) {
|
||||
// Construct an ast from the given test file.
|
||||
let ast = {
|
||||
let mut program_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
program_filepath.push(program_path);
|
||||
to_ast(&program_filepath).unwrap()
|
||||
};
|
||||
|
||||
let json_reader = {
|
||||
let mut json_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
json_filepath.push(json_path);
|
||||
let file = File::open(json_filepath).expect("Failed to read expected ast file");
|
||||
BufReader::new(file)
|
||||
};
|
||||
|
||||
// Serializes the ast into JSON format.
|
||||
let mut serialized_ast: serde_json::Value = serde_json::to_value(ast.as_repr()).unwrap();
|
||||
remove_key_from_json(&mut serialized_ast, "span");
|
||||
serialized_ast = normalize_json_value(serialized_ast);
|
||||
|
||||
// Load the expected ast.
|
||||
let expected: serde_json::Value = serde_json::from_reader(json_reader).unwrap();
|
||||
|
||||
assert_eq!(expected, serialized_ast);
|
||||
}
|
||||
clean();
|
||||
}
|
||||
|
||||
// Helper functions to recursively filter keys from AST JSON.
|
||||
// Redeclaring here since we don't want to make this public.
|
||||
fn remove_key_from_json(value: &mut serde_json::Value, key: &str) {
|
||||
match value {
|
||||
serde_json::value::Value::Object(map) => {
|
||||
map.remove(key);
|
||||
for val in map.values_mut() {
|
||||
remove_key_from_json(val, key);
|
||||
}
|
||||
}
|
||||
serde_json::value::Value::Array(values) => {
|
||||
for val in values.iter_mut() {
|
||||
remove_key_from_json(val, key);
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to normalize AST
|
||||
// Redeclaring here because we don't want to make this public
|
||||
fn normalize_json_value(value: serde_json::Value) -> serde_json::Value {
|
||||
match value {
|
||||
serde_json::Value::Array(vec) => {
|
||||
let orig_length = vec.len();
|
||||
let mut new_vec: Vec<serde_json::Value> = vec
|
||||
.into_iter()
|
||||
.filter(|v| !matches!(v, serde_json::Value::Object(map) if map.is_empty()))
|
||||
.map(normalize_json_value)
|
||||
.collect();
|
||||
|
||||
if orig_length == 2 && new_vec.len() == 1 {
|
||||
new_vec.pop().unwrap()
|
||||
} else {
|
||||
serde_json::Value::Array(new_vec)
|
||||
}
|
||||
}
|
||||
serde_json::Value::Object(map) => {
|
||||
serde_json::Value::Object(map.into_iter().map(|(k, v)| (k, normalize_json_value(v))).collect())
|
||||
}
|
||||
_ => value,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO Renable when we don't write spans to snapshots.
|
||||
/* #[test]
|
||||
#[cfg(not(feature = "ci_skip"))]
|
||||
fn test_deserialize() {
|
||||
setup();
|
||||
|
||||
// Load the expected ast.
|
||||
let expected_ast = {
|
||||
let mut program_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
program_filepath.push("tests/serialization/main.leo");
|
||||
|
||||
to_ast(&program_filepath).unwrap()
|
||||
};
|
||||
|
||||
// Construct an ast by deserializing a ast JSON file.
|
||||
let serialized_ast = include_str!("expected_leo_ast.json");
|
||||
let ast = Ast::from_json_string(serialized_ast).unwrap();
|
||||
|
||||
clean();
|
||||
assert_eq!(expected_ast, ast);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_serialize_deserialize_serialize() {
|
||||
setup();
|
||||
|
||||
// Construct an ast from the given test file.
|
||||
let ast = {
|
||||
let mut program_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
program_filepath.push("tests/serialization/main.leo");
|
||||
|
||||
to_ast(&program_filepath).unwrap()
|
||||
};
|
||||
|
||||
// Serializes the ast into JSON format.
|
||||
let serialized_ast = ast.to_json_string().unwrap();
|
||||
|
||||
// Deserializes the serialized ast into an ast.
|
||||
let ast = Ast::from_json_string(&serialized_ast).unwrap();
|
||||
|
||||
// Reserializes the ast into JSON format.
|
||||
let reserialized_ast = ast.to_json_string().unwrap();
|
||||
|
||||
clean();
|
||||
assert_eq!(serialized_ast, reserialized_ast);
|
||||
} */
|
||||
|
||||
#[test]
|
||||
fn test_generic_parser_error() {
|
||||
setup();
|
||||
|
||||
let error_result = {
|
||||
let mut program_filepath = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
|
||||
program_filepath.push("tests/serialization/leo/parser_error.leo");
|
||||
|
||||
to_ast(&program_filepath)
|
||||
}
|
||||
.map_err(|err| matches!(err, LeoError::ParserError(_)));
|
||||
|
||||
clean();
|
||||
assert!(error_result.err().unwrap());
|
||||
}
|
@ -251,8 +251,8 @@ create_errors!(
|
||||
/// When a string is not properly closed.
|
||||
@backtraced
|
||||
lexer_string_not_closed {
|
||||
args: (input: impl Display),
|
||||
msg: format!("Expected a closed string but found `{}`.", input),
|
||||
args: (input: impl Debug),
|
||||
msg: format!("Expected a closed string but found `{:?}`.", input),
|
||||
help: None,
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user