From 3593529c4d5062900e0d90e9c437f3132334705f Mon Sep 17 00:00:00 2001
From: gluax <16431709+gluax@users.noreply.github.com>
Date: Fri, 21 Jan 2022 12:32:09 -0800
Subject: [PATCH 01/14] migrate parser
---
parser/Cargo.toml | 10 +-
parser/README.md | 4 +-
parser/benches/leo_ast.rs | 40 +-
parser/examples/parser.rs | 18 +-
parser/src/lib.rs | 71 +-
parser/src/parser/context.rs | 150 +-
parser/src/parser/expression.rs | 282 ++--
parser/src/parser/file.rs | 392 +++---
parser/src/parser/mod.rs | 23 +-
parser/src/parser/statement.rs | 163 +--
parser/src/parser/type_.rs | 77 +-
parser/src/test.rs | 186 ++-
parser/src/tokenizer/lexer.rs | 12 +-
parser/src/tokenizer/mod.rs | 104 +-
parser/src/tokenizer/token.rs | 75 +-
parser/tests/mod.rs | 17 -
.../expected_leo_ast/linear_regression.json | 1162 ----------------
.../expected_leo_ast/one_plus_one.json | 100 --
.../expected_leo_ast/palindrome.json | 1189 ----------------
.../expected_leo_ast/pedersen_hash.json | 299 ----
.../expected_leo_ast/silly_sudoku.json | 1209 -----------------
parser/tests/serialization/json.rs | 6 +-
.../serialization/leo/deprecated_error.leo | 7 -
.../serialization/leo/linear_regression.leo | 65 -
.../tests/serialization/leo/one_plus_one.leo | 3 -
parser/tests/serialization/leo/palindrome.leo | 59 -
.../tests/serialization/leo/parser_error.leo | 1 -
.../tests/serialization/leo/pedersen_hash.leo | 25 -
.../tests/serialization/leo/silly_sudoku.leo | 71 -
parser/tests/serialization/mod.rs | 17 -
30 files changed, 921 insertions(+), 4916 deletions(-)
delete mode 100644 parser/tests/mod.rs
delete mode 100644 parser/tests/serialization/expected_leo_ast/linear_regression.json
delete mode 100644 parser/tests/serialization/expected_leo_ast/one_plus_one.json
delete mode 100644 parser/tests/serialization/expected_leo_ast/palindrome.json
delete mode 100644 parser/tests/serialization/expected_leo_ast/pedersen_hash.json
delete mode 100644 parser/tests/serialization/expected_leo_ast/silly_sudoku.json
delete mode 100644 parser/tests/serialization/leo/deprecated_error.leo
delete mode 100644 parser/tests/serialization/leo/linear_regression.leo
delete mode 100644 parser/tests/serialization/leo/one_plus_one.leo
delete mode 100644 parser/tests/serialization/leo/palindrome.leo
delete mode 100644 parser/tests/serialization/leo/parser_error.leo
delete mode 100644 parser/tests/serialization/leo/pedersen_hash.leo
delete mode 100644 parser/tests/serialization/leo/silly_sudoku.leo
delete mode 100644 parser/tests/serialization/mod.rs
diff --git a/parser/Cargo.toml b/parser/Cargo.toml
index 560f072eb0..e16765ebcf 100644
--- a/parser/Cargo.toml
+++ b/parser/Cargo.toml
@@ -16,7 +16,7 @@ categories = [ "cryptography::cryptocurrencies", "web-programming" ]
include = [ "Cargo.toml", "src", "README.md", "LICENSE.md" ]
license = "GPL-3.0"
edition = "2021"
-rust-version = "1.56.1"
+rust-version = "1.56"
[[bench]]
name = "leo_ast"
@@ -31,6 +31,14 @@ version = "1.5.3"
path = "../errors"
version = "1.5.3"
+[dependencies.leo-input]
+path = "../input"
+version = "1.5.1"
+
+[dependencies.leo-span]
+path = "../span"
+version = "1.5.3"
+
[dependencies.lazy_static]
version = "1.3.0"
diff --git a/parser/README.md b/parser/README.md
index c09aff1736..b683f9b4fb 100644
--- a/parser/README.md
+++ b/parser/README.md
@@ -30,7 +30,8 @@ Bolded ones are also keywords.
#### Symbols
- At
- Not
-- And
+- And (`&&`)
+- Ampersand (`&`)
- Or
- Eq
- NotEq
@@ -98,7 +99,6 @@ Bolded ones are also keywords.
- **If**
- **In**
- **Let**
-- **Mut**
- **Return**
- **Static**
- **String**
diff --git a/parser/benches/leo_ast.rs b/parser/benches/leo_ast.rs
index 0777665e0d..9394de2b6b 100644
--- a/parser/benches/leo_ast.rs
+++ b/parser/benches/leo_ast.rs
@@ -1,4 +1,4 @@
-// Copyright (C) 2019-2021 Aleo Systems Inc.
+// Copyright (C) 2019-2022 Aleo Systems Inc.
// This file is part of the Leo library.
// The Leo library is free software: you can redistribute it and/or modify
@@ -14,55 +14,51 @@
// You should have received a copy of the GNU General Public License
// along with the Leo library. If not, see .
+use leo_ast::Ast;
+use leo_errors::emitter::Handler;
+use leo_span::symbol::create_session_if_not_set_then;
+
use criterion::{criterion_group, criterion_main, Criterion};
use std::time::Duration;
-fn bench_big_if_else(c: &mut Criterion) {
- let program_string = include_str!("./big_if_else.leo");
- let ast = leo_parser::parse_ast("./big_if_else.leo", program_string).expect("failed to parse benchmark");
+fn parse_ast(path: &str, input: &str) -> Ast {
+ create_session_if_not_set_then(|_| {
+ leo_parser::parse_ast(&Handler::default(), path, input).expect("failed to parse benchmark")
+ })
+}
+fn bench_big_if_else(c: &mut Criterion) {
+ let ast = parse_ast("./big_if_else.leo", include_str!("./big_if_else.leo"));
c.bench_function("Ast::big_if_else", |b| b.iter(|| &ast));
}
fn bench_big_ternary(c: &mut Criterion) {
- let program_string = include_str!("./big_ternary.leo");
- let ast = leo_parser::parse_ast("./big_ternary.leo", program_string).expect("failed to parse benchmark");
-
+ let ast = parse_ast("./big_ternary.leo", include_str!("./big_ternary.leo"));
c.bench_function("Ast::big_ternary", |b| b.iter(|| &ast));
}
fn bench_big_circuit(c: &mut Criterion) {
- let program_string = include_str!("./big_circuit.leo");
- let ast = leo_parser::parse_ast("./big_circuit.leo", program_string).expect("failed to parse benchmark");
-
+ let ast = parse_ast("./big_circuit.leo", include_str!("./big_circuit.leo"));
c.bench_function("Ast::big_circuit", |b| b.iter(|| &ast));
}
fn bench_long_expr(c: &mut Criterion) {
- let program_string = include_str!("./long_expr.leo");
- let ast = leo_parser::parse_ast("./long_expr.leo", program_string).expect("failed to parse benchmark");
-
+ let ast = parse_ast("./long_expr.leo", include_str!("./long_expr.leo"));
c.bench_function("Ast::long_expr", |b| b.iter(|| &ast));
}
fn bench_long_array(c: &mut Criterion) {
- let program_string = include_str!("./long_array.leo");
- let ast = leo_parser::parse_ast("./long_array.leo", program_string).expect("failed to parse benchmark");
-
+ let ast = parse_ast("./long_array.leo", include_str!("./long_array.leo"));
c.bench_function("Ast::long_array", |b| b.iter(|| &ast));
}
fn bench_many_foos(c: &mut Criterion) {
- let program_string = include_str!("./many_foos.leo");
- let ast = leo_parser::parse_ast("./many_foos.leo", program_string).expect("failed to parse benchmark");
-
+ let ast = parse_ast("./many_foos.leo", include_str!("./many_foos.leo"));
c.bench_function("Ast::many_foos", |b| b.iter(|| &ast));
}
fn bench_many_assigns(c: &mut Criterion) {
- let program_string = include_str!("./many_assigns.leo");
- let ast = leo_parser::parse_ast("./many_assigns.leo", program_string).expect("failed to parse benchmark");
-
+ let ast = parse_ast("./many_assigns.leo", include_str!("./many_assigns.leo"));
c.bench_function("Ast::many_assigns", |b| b.iter(|| &ast));
}
diff --git a/parser/examples/parser.rs b/parser/examples/parser.rs
index 04cb943625..801646f27f 100644
--- a/parser/examples/parser.rs
+++ b/parser/examples/parser.rs
@@ -1,4 +1,4 @@
-// Copyright (C) 2019-2021 Aleo Systems Inc.
+// Copyright (C) 2019-2022 Aleo Systems Inc.
// This file is part of the Leo library.
// The Leo library is free software: you can redistribute it and/or modify
@@ -15,7 +15,9 @@
// along with the Leo library. If not, see .
use leo_ast::Ast;
-use leo_errors::Result;
+use leo_errors::{emitter::Handler, Result};
+use leo_span::symbol::create_session_if_not_set_then;
+
use std::{env, fs, path::Path};
fn to_leo_tree(filepath: &Path) -> Result {
@@ -23,12 +25,12 @@ fn to_leo_tree(filepath: &Path) -> Result {
let program_filepath = filepath.to_path_buf();
let program_string = fs::read_to_string(&program_filepath).expect("failed to open input file");
- // Parses the Leo file and constructs an ast.
- let ast = leo_parser::parse_ast(filepath.to_str().unwrap(), &program_string)?;
-
- let serialized_leo_ast = Ast::to_json_string(&ast).expect("serialization failed");
-
- Ok(serialized_leo_ast)
+ // Parses the Leo file constructing an ast which is then serialized.
+ create_session_if_not_set_then(|_| {
+ let handler = Handler::default();
+ let ast = leo_parser::parse_ast(&handler, filepath.to_str().unwrap(), &program_string)?;
+ Ok(Ast::to_json_string(&ast).expect("serialization failed"))
+ })
}
fn main() -> Result<()> {
diff --git a/parser/src/lib.rs b/parser/src/lib.rs
index 7b1a0d83aa..21a1001bde 100644
--- a/parser/src/lib.rs
+++ b/parser/src/lib.rs
@@ -1,4 +1,4 @@
-// Copyright (C) 2019-2021 Aleo Systems Inc.
+// Copyright (C) 2019-2022 Aleo Systems Inc.
// This file is part of the Leo library.
// The Leo library is free software: you can redistribute it and/or modify
@@ -23,19 +23,82 @@
#![doc = include_str!("../README.md")]
pub(crate) mod tokenizer;
+use leo_input::LeoInputParser;
pub use tokenizer::KEYWORD_TOKENS;
pub(crate) use tokenizer::*;
pub mod parser;
pub use parser::*;
-use leo_ast::Ast;
+use leo_ast::{Ast, Input};
+use leo_errors::emitter::Handler;
use leo_errors::Result;
#[cfg(test)]
mod test;
/// Creates a new AST from a given file path and source code text.
-pub fn parse_ast, Y: AsRef>(path: T, source: Y) -> Result {
- Ok(Ast::new(parser::parse(path.as_ref(), source.as_ref())?))
+pub fn parse_ast, Y: AsRef>(handler: &Handler, path: T, source: Y) -> Result {
+ Ok(Ast::new(parser::parse(handler, path.as_ref(), source.as_ref())?))
+}
+
+/// Parses program input from from the input file path and state file path
+pub fn parse_program_input, Y: AsRef, T2: AsRef, Y2: AsRef>(
+ input_string: T,
+ input_path: Y,
+ state_string: T2,
+ state_path: Y2,
+) -> Result {
+ let input_syntax_tree = LeoInputParser::parse_file(input_string.as_ref()).map_err(|mut e| {
+ e.set_path(
+ input_path.as_ref(),
+ &input_string
+ .as_ref()
+ .lines()
+ .map(|x| x.to_string())
+ .collect::>()[..],
+ );
+
+ e
+ })?;
+ let state_syntax_tree = LeoInputParser::parse_file(state_string.as_ref()).map_err(|mut e| {
+ e.set_path(
+ state_path.as_ref(),
+ &state_string
+ .as_ref()
+ .lines()
+ .map(|x| x.to_string())
+ .collect::>()[..],
+ );
+
+ e
+ })?;
+
+ let mut input = Input::new();
+ input.parse_input(input_syntax_tree).map_err(|mut e| {
+ e.set_path(
+ input_path.as_ref(),
+ &input_string
+ .as_ref()
+ .lines()
+ .map(|x| x.to_string())
+ .collect::>()[..],
+ );
+
+ e
+ })?;
+ input.parse_state(state_syntax_tree).map_err(|mut e| {
+ e.set_path(
+ state_path.as_ref(),
+ &state_string
+ .as_ref()
+ .lines()
+ .map(|x| x.to_string())
+ .collect::>()[..],
+ );
+
+ e
+ })?;
+
+ Ok(input)
}
diff --git a/parser/src/parser/context.rs b/parser/src/parser/context.rs
index af64bd9c43..c125e2a591 100644
--- a/parser/src/parser/context.rs
+++ b/parser/src/parser/context.rs
@@ -1,4 +1,4 @@
-// Copyright (C) 2019-2021 Aleo Systems Inc.
+// Copyright (C) 2019-2022 Aleo Systems Inc.
// This file is part of the Leo library.
// The Leo library is free software: you can redistribute it and/or modify
@@ -14,42 +14,48 @@
// You should have received a copy of the GNU General Public License
// along with the Leo library. If not, see .
-use std::{borrow::Cow, unimplemented};
-
use crate::{assert_no_whitespace, tokenizer::*, Token, KEYWORD_TOKENS};
+
use leo_ast::*;
-use leo_errors::{LeoError, ParserError, Result, Span};
+use leo_errors::emitter::Handler;
+use leo_errors::{LeoError, ParserError, Result};
+use leo_span::{Span, Symbol};
+
+use std::{borrow::Cow, unimplemented};
use tendril::format_tendril;
/// Stores a program in tokenized format plus additional context.
/// May be converted into a [`Program`] AST by parsing all tokens.
-pub struct ParserContext {
+pub struct ParserContext<'a> {
+ #[allow(dead_code)]
+ pub(crate) handler: &'a Handler,
tokens: Vec,
end_span: Span,
// true if parsing an expression for an if statement -- means circuit inits are not legal
pub(crate) fuzzy_struct_state: bool,
}
-impl Iterator for ParserContext {
+impl Iterator for ParserContext<'_> {
type Item = SpannedToken;
fn next(&mut self) -> Option {
- self.tokens.pop()
+ self.bump()
}
}
-impl ParserContext {
+impl<'a> ParserContext<'a> {
///
/// Returns a new [`ParserContext`] type given a vector of tokens.
///
- pub fn new(mut tokens: Vec) -> Self {
+ pub fn new(handler: &'a Handler, mut tokens: Vec) -> Self {
tokens.reverse();
// todo: performance optimization here: drain filter
tokens = tokens
.into_iter()
.filter(|x| !matches!(x.token, Token::CommentLine(_) | Token::CommentBlock(_)))
.collect();
- ParserContext {
+ Self {
+ handler,
end_span: tokens
.iter()
.find(|x| !x.span.content.trim().is_empty())
@@ -60,6 +66,16 @@ impl ParserContext {
}
}
+ /// Returns the current token if there is one.
+ pub fn curr(&self) -> Option<&SpannedToken> {
+ self.tokens.last()
+ }
+
+ /// Emit the error `err`.
+ pub(crate) fn emit_err(&self, err: ParserError) {
+ self.handler.emit_err(err.into());
+ }
+
///
/// Returns an unexpected end of function [`SyntaxError`].
///
@@ -75,12 +91,15 @@ impl ParserContext {
}
///
- /// Returns a reference to the next token or error if it does not exist.
+ /// Returns a reference to the next SpannedToken or error if it does not exist.
///
pub fn peek(&self) -> Result<&SpannedToken> {
- self.tokens.last().ok_or_else(|| self.eof())
+ self.curr().ok_or_else(|| self.eof())
}
+ ///
+ /// Returns a reference to the next Token.
+ ///
pub fn peek_token(&self) -> Cow<'_, Token> {
self.tokens
.last()
@@ -112,14 +131,19 @@ impl ParserContext {
!self.tokens.is_empty()
}
+ /// Advances the current token.
+ pub fn bump(&mut self) -> Option {
+ self.tokens.pop()
+ }
+
///
/// Removes the next token if it exists and returns it, or [None] if
/// the next token does not exist.
///
pub fn eat(&mut self, token: Token) -> Option {
- if let Some(SpannedToken { token: inner, .. }) = self.tokens.last() {
+ if let Some(SpannedToken { token: inner, .. }) = self.curr() {
if &token == inner {
- return self.tokens.pop();
+ return self.bump();
}
}
None
@@ -139,13 +163,12 @@ impl ParserContext {
pub fn eat_identifier(&mut self) -> Option {
if let Some(SpannedToken {
token: Token::Ident(_), ..
- }) = self.tokens.last()
+ }) = self.curr()
{
- let token = self.tokens.pop().unwrap();
if let SpannedToken {
token: Token::Ident(name),
span,
- } = token
+ } = self.bump().unwrap()
{
return Some(Identifier { name, span });
} else {
@@ -186,6 +209,21 @@ impl ParserContext {
})
}
+ /// Returns `true` if the next token is Function or if it is a Const followed by Function.
+ /// Returns `false` otherwise.
+ pub fn peek_is_function(&self) -> Result {
+ let first = &self.peek()?.token;
+ let next = if self.tokens.len() >= 2 {
+ &self.peek_next()?.token
+ } else {
+ return Ok(false);
+ };
+ Ok(matches!(
+ (first, next),
+ (Token::Function | Token::At, _) | (Token::Const, Token::Function)
+ ))
+ }
+
///
/// Removes the next two tokens if they are a pair of [`GroupCoordinate`] and returns them,
/// or [None] if the next token is not a [`GroupCoordinate`].
@@ -263,13 +301,12 @@ impl ParserContext {
pub fn eat_int(&mut self) -> Option<(PositiveNumber, Span)> {
if let Some(SpannedToken {
token: Token::Int(_), ..
- }) = self.tokens.last()
+ }) = self.curr()
{
- let token = self.tokens.pop().unwrap();
if let SpannedToken {
token: Token::Int(value),
span,
- } = token
+ } = self.bump().unwrap()
{
return Some((PositiveNumber { value }, span));
} else {
@@ -284,9 +321,9 @@ impl ParserContext {
/// the next token does not exist.
///
pub fn eat_any(&mut self, token: &[Token]) -> Option {
- if let Some(SpannedToken { token: inner, .. }) = self.tokens.last() {
+ if let Some(SpannedToken { token: inner, .. }) = self.curr() {
if token.iter().any(|x| x == inner) {
- return self.tokens.pop();
+ return self.bump();
}
}
None
@@ -296,9 +333,9 @@ impl ParserContext {
/// Returns the span of the next token if it is equal to the given [`Token`], or error.
///
pub fn expect(&mut self, token: Token) -> Result {
- if let Some(SpannedToken { token: inner, span }) = self.tokens.last() {
+ if let Some(SpannedToken { token: inner, span }) = self.curr() {
if &token == inner {
- Ok(self.tokens.pop().unwrap().span)
+ Ok(self.bump().unwrap().span)
} else {
Err(ParserError::unexpected(inner, token, span).into())
}
@@ -311,9 +348,9 @@ impl ParserContext {
/// Returns the span of the next token if it is equal to one of the given [`Token`]s, or error.
///
pub fn expect_oneof(&mut self, token: &[Token]) -> Result {
- if let Some(SpannedToken { token: inner, span }) = self.tokens.last() {
+ if let Some(SpannedToken { token: inner, span }) = self.curr() {
if token.iter().any(|x| x == inner) {
- Ok(self.tokens.pop().unwrap())
+ Ok(self.bump().unwrap())
} else {
return Err(ParserError::unexpected(
inner,
@@ -334,27 +371,25 @@ impl ParserContext {
pub fn expect_loose_identifier(&mut self) -> Result {
if let Some(token) = self.eat_any(KEYWORD_TOKENS) {
return Ok(Identifier {
- name: token.token.to_string().into(),
+ name: token.token.keyword_to_symbol().unwrap(),
span: token.span,
});
}
if let Some((int, span)) = self.eat_int() {
- return Ok(Identifier { name: int.value, span });
+ let name = Symbol::intern(&int.value);
+ return Ok(Identifier { name, span });
}
self.expect_ident()
}
- ///
/// Returns the [`Identifier`] of the next token if it is an [`Identifier`], or error.
- ///
pub fn expect_ident(&mut self) -> Result {
- if let Some(SpannedToken { token: inner, span }) = self.tokens.last() {
+ if let Some(SpannedToken { token: inner, span }) = self.curr() {
if let Token::Ident(_) = inner {
- let token = self.tokens.pop().unwrap();
if let SpannedToken {
token: Token::Ident(name),
span,
- } = token
+ } = self.bump().unwrap()
{
Ok(Identifier { name, span })
} else {
@@ -378,4 +413,53 @@ impl ParserContext {
Err(self.eof())
}
}
+
+ /// Parses a list of `T`s using `inner`
+ /// The opening and closing delimiters are `bra` and `ket`,
+ /// and elements in the list are separated by `sep`.
+ /// When `(list, true)` is returned, `sep` was a terminator.
+ pub(super) fn parse_list(
+ &mut self,
+ open: Token,
+ close: Token,
+ sep: Token,
+ mut inner: impl FnMut(&mut Self) -> Result