Merge branch 'tokenizer' of github.com:AleoHQ/leo into tokenizer

This commit is contained in:
Protryon 2021-03-07 06:27:57 -08:00
commit 1f49d8bc44
9 changed files with 245 additions and 11 deletions

View File

@ -110,7 +110,7 @@ impl fmt::Display for FormattedError {
write!(
f,
"{indent }--> {path}{line_start}:{start}\n\
"{indent }--> {path} {line_start}:{start}\n\
{indent } |\n",
indent = INDENT,
path = path,
@ -132,7 +132,7 @@ impl fmt::Display for FormattedError {
write!(
f,
"{indent } | {underline}\n\
"{indent } | {underline}\n\
{indent } |\n\
{indent } = {message}",
indent = INDENT,

View File

@ -163,7 +163,6 @@ impl ParserContext {
/// Removes the next two tokens if they are a pair of [`GroupCoordinate`] and returns them,
/// or [None] if the next token is not a [`GroupCoordinate`].
///
// kinda hacky, we're not LALR(1) for groups...
pub fn eat_group_partial(&mut self) -> Option<(GroupCoordinate, GroupCoordinate, Span)> {
let mut i = self.tokens.len() - 1;
let start_span = self.tokens.get(i)?.span.clone();

View File

@ -32,17 +32,40 @@ const INT_TYPES: &[Token] = &[
];
impl ParserContext {
///
/// Returns an [`Expression`] AST node if the next token is an expression.
/// Includes circuit init expressions.
///
pub fn parse_expression(&mut self) -> SyntaxResult<Expression> {
// Store current parser state.
let prior_fuzzy_state = self.fuzzy_struct_state;
// Allow circuit init expressions.
self.fuzzy_struct_state = false;
// Parse expression.
let result = self.parse_expression_fuzzy();
// Restore prior parser state.
self.fuzzy_struct_state = prior_fuzzy_state;
result
}
///
/// Returns an [`Expression`] AST node if the next tokens represent
/// a ternary expression. May or may not include circuit init expressions.
///
/// Otherwise, tries to parse the next token using [`parse_or_expression`].
///
pub fn parse_expression_fuzzy(&mut self) -> SyntaxResult<Expression> {
// Check if we are parsing a ternary expression.
let if_token = self.eat(Token::If);
// Try to parse the next expression. Try BinaryOperation::Or.
let mut expr = self.parse_or_expression()?;
// Parse the rest of the ternary expression.
if self.eat(Token::Question).is_some() {
let if_true = self.parse_expression()?;
self.expect(Token::Colon)?;
@ -60,6 +83,12 @@ impl ParserContext {
Ok(expr)
}
///
/// Returns an [`Expression`] AST node if the next tokens represent
/// a binary or expression.
///
/// Otherwise, tries to parse the next token using [`parse_and_expression`].
///
pub fn parse_or_expression(&mut self) -> SyntaxResult<Expression> {
let mut expr = self.parse_and_expression()?;
while self.eat(Token::Or).is_some() {
@ -74,6 +103,12 @@ impl ParserContext {
Ok(expr)
}
///
/// Returns an [`Expression`] AST node if the next tokens represent a
/// binary and expression.
///
/// Otherwise, tries to parse the next token using [`parse_bit_or_expression`].
///
pub fn parse_and_expression(&mut self) -> SyntaxResult<Expression> {
let mut expr = self.parse_bit_or_expression()?;
while self.eat(Token::And).is_some() {
@ -88,6 +123,12 @@ impl ParserContext {
Ok(expr)
}
///
/// Returns an [`Expression`] AST node if the next tokens represent a
/// binary bitwise or expression.
///
/// Otherwise, tries to parse the next token using [`parse_bit_xor_expression`].
///
pub fn parse_bit_or_expression(&mut self) -> SyntaxResult<Expression> {
let mut expr = self.parse_bit_xor_expression()?;
while self.eat(Token::BitOr).is_some() {
@ -102,6 +143,12 @@ impl ParserContext {
Ok(expr)
}
///
/// Returns an [`Expression`] AST node if the next tokens represent a
/// binary bitwise xor expression.
///
/// Otherwise, tries to parse the next token using [`parse_bit_and_expression`].
///
pub fn parse_bit_xor_expression(&mut self) -> SyntaxResult<Expression> {
let mut expr = self.parse_bit_and_expression()?;
while self.eat(Token::BitXor).is_some() {
@ -116,6 +163,12 @@ impl ParserContext {
Ok(expr)
}
///
/// Returns an [`Expression`] AST node if the next tokens represent a
/// binary bitwise and expression.
///
/// Otherwise, tries to parse the next token using [`parse_eq_expression`].
///
pub fn parse_bit_and_expression(&mut self) -> SyntaxResult<Expression> {
let mut expr = self.parse_eq_expression()?;
while self.eat(Token::BitAnd).is_some() {
@ -130,6 +183,12 @@ impl ParserContext {
Ok(expr)
}
///
/// Returns an [`Expression`] AST node if the next tokens represent a
/// binary equals or not equals expression.
///
/// Otherwise, tries to parse the next token using [`parse_rel_expression`].
///
pub fn parse_eq_expression(&mut self) -> SyntaxResult<Expression> {
let mut expr = self.parse_rel_expression()?;
while let Some(SpannedToken { token: op, .. }) = self.eat_any(&[Token::Eq, Token::NotEq]) {
@ -148,6 +207,12 @@ impl ParserContext {
Ok(expr)
}
///
/// Returns an [`Expression`] AST node if the next tokens represent a
/// binary relational expression: less than, less than or equals, greater than, greater than or equals.
///
/// Otherwise, tries to parse the next token using [`parse_shift_expression`].
///
pub fn parse_rel_expression(&mut self) -> SyntaxResult<Expression> {
let mut expr = self.parse_shift_expression()?;
while let Some(SpannedToken { token: op, .. }) = self.eat_any(&[Token::Lt, Token::LtEq, Token::Gt, Token::GtEq])
@ -169,10 +234,16 @@ impl ParserContext {
Ok(expr)
}
///
/// Returns an [`Expression`] AST node if the next tokens represent a
/// binary shift expression.
///
/// Otherwise, tries to parse the next token using [`parse_add_sub_expression`].
///
pub fn parse_shift_expression(&mut self) -> SyntaxResult<Expression> {
let mut expr = self.parse_add_expression()?;
let mut expr = self.parse_add_sub_expression()?;
while let Some(SpannedToken { token: op, .. }) = self.eat_any(&[Token::Shl, Token::Shr, Token::ShrSigned]) {
let right = self.parse_add_expression()?;
let right = self.parse_add_sub_expression()?;
expr = Expression::Binary(BinaryExpression {
span: expr.span() + right.span(),
op: match op {
@ -188,10 +259,16 @@ impl ParserContext {
Ok(expr)
}
pub fn parse_add_expression(&mut self) -> SyntaxResult<Expression> {
let mut expr = self.parse_multiply_expression()?;
///
/// Returns an [`Expression`] AST node if the next tokens represent a
/// binary addition or subtraction expression.
///
/// Otherwise, tries to parse the next token using [`parse_mul_div_pow_expression`].
///
pub fn parse_add_sub_expression(&mut self) -> SyntaxResult<Expression> {
let mut expr = self.parse_mul_div_mod_expression()?;
while let Some(SpannedToken { token: op, .. }) = self.eat_any(&[Token::Add, Token::Minus]) {
let right = self.parse_multiply_expression()?;
let right = self.parse_mul_div_mod_expression()?;
expr = Expression::Binary(BinaryExpression {
span: expr.span() + right.span(),
op: match op {
@ -206,7 +283,13 @@ impl ParserContext {
Ok(expr)
}
pub fn parse_multiply_expression(&mut self) -> SyntaxResult<Expression> {
///
/// Returns an [`Expression`] AST node if the next tokens represent a
/// binary multiplication, division, or modulus expression.
///
/// Otherwise, tries to parse the next token using [`parse_exp_expression`].
///
pub fn parse_mul_div_mod_expression(&mut self) -> SyntaxResult<Expression> {
let mut expr = self.parse_exp_expression()?;
while let Some(SpannedToken { token: op, .. }) = self.eat_any(&[Token::Mul, Token::Div, Token::Mod]) {
let right = self.parse_exp_expression()?;
@ -225,6 +308,12 @@ impl ParserContext {
Ok(expr)
}
///
/// Returns an [`Expression`] AST node if the next tokens represent a
/// binary exponentiation expression.
///
/// Otherwise, tries to parse the next token using [`parse_cast_expression`].
///
pub fn parse_exp_expression(&mut self) -> SyntaxResult<Expression> {
let mut exprs = vec![];
exprs.push(self.parse_cast_expression()?);
@ -244,6 +333,12 @@ impl ParserContext {
Ok(expr)
}
///
/// Returns an [`Expression`] AST node if the next tokens represent a
/// type cast expression.
///
/// Otherwise, tries to parse the next token using [`parse_unary_expression`].
///
pub fn parse_cast_expression(&mut self) -> SyntaxResult<Expression> {
let mut expr = self.parse_unary_expression()?;
while self.eat(Token::As).is_some() {
@ -257,6 +352,12 @@ impl ParserContext {
Ok(expr)
}
///
/// Returns an [`Expression`] AST node if the next tokens represent a
/// unary not, negate, or bitwise not expression.
///
/// Otherwise, tries to parse the next token using [`parse_access_expression`].
///
pub fn parse_unary_expression(&mut self) -> SyntaxResult<Expression> {
let mut ops = vec![];
while let Some(token) = self.eat_any(&[Token::Not, Token::Minus, Token::BitNot]) {
@ -278,6 +379,12 @@ impl ParserContext {
Ok(inner)
}
///
/// Returns an [`Expression`] AST node if the next tokens represent an
/// array access, circuit member access, function call, or static function call expression.
///
/// Otherwise, tries to parse the next token using [`parse_primary_expression`].
///
pub fn parse_access_expression(&mut self) -> SyntaxResult<Expression> {
let mut expr = self.parse_primary_expression()?;
while let Some(token) = self.eat_any(&[Token::LeftSquare, Token::Dot, Token::LeftParen, Token::DoubleColon]) {
@ -377,6 +484,12 @@ impl ParserContext {
Ok(expr)
}
///
/// Returns a [`SpreadOrExpression`] AST node if the next tokens represent an
/// spread or expression.
///
/// This method should only be called in the context of an array access expression.
///
pub fn parse_spread_or_expression(&mut self) -> SyntaxResult<SpreadOrExpression> {
Ok(if self.eat(Token::DotDotDot).is_some() {
SpreadOrExpression::Spread(self.parse_expression()?)
@ -385,6 +498,10 @@ impl ParserContext {
})
}
///
/// Returns an [`Expression`] AST node if the next tokens represent an
/// circuit initialization expression.
///
pub fn parse_circuit_init(&mut self, ident: Identifier) -> SyntaxResult<Expression> {
self.expect(Token::LeftCurly)?;
let mut members = vec![];
@ -419,6 +536,14 @@ impl ParserContext {
}))
}
///
/// Returns an [`Expression`] AST node if the next token is a primary expression:
/// - Scalar types: field, group, unsigned integer, signed integer, boolean, address
/// - Aggregate types: array, tuple
/// - Identifiers: variables, keywords
///
/// Returns an expression error if the token cannot be matched.
///
pub fn parse_primary_expression(&mut self) -> SyntaxResult<Expression> {
let SpannedToken { token, span } = self.expect_any()?;
Ok(match token {

View File

@ -19,6 +19,9 @@ use crate::KEYWORD_TOKENS;
use super::*;
impl ParserContext {
///
/// Returns a [`Program`] AST if all tokens can be consumed and represent a valid Leo program.
///
pub fn parse_program(&mut self) -> SyntaxResult<Program> {
let mut imports = vec![];
let mut circuits = IndexMap::new();
@ -74,6 +77,9 @@ impl ParserContext {
})
}
///
/// Returns an [`Annotation`] AST node if the next tokens represent a supported annotation.
///
pub fn parse_annotation(&mut self) -> SyntaxResult<Annotation> {
let start = self.expect(Token::At)?;
let name = self.expect_ident()?;
@ -115,6 +121,10 @@ impl ParserContext {
})
}
///
/// Returns a vector of [`PackageAccess`] AST nodes if the next tokens represent package access
/// expressions within an import statement.
///
pub fn parse_package_accesses(&mut self) -> SyntaxResult<Vec<PackageAccess>> {
let mut out = vec![];
self.expect(Token::LeftParen)?;
@ -129,6 +139,10 @@ impl ParserContext {
Ok(out)
}
///
/// Returns a [`PackageAccess`] AST node if the next tokens represent a package access expression
/// within an import statement.
///
pub fn parse_package_access(&mut self) -> SyntaxResult<PackageAccess> {
if let Some(SpannedToken { span, .. }) = self.eat(Token::Mul) {
Ok(PackageAccess::Star(span))
@ -160,6 +174,9 @@ impl ParserContext {
}
}
///
/// Returns an [`Identifier`] AST node if the next tokens represent a valid package name.
///
pub fn parse_package_name(&mut self) -> SyntaxResult<Identifier> {
// Build the package name, starting with valid characters up to a dash `-` (Token::Minus).
let mut base = self.expect_loose_identifier()?;
@ -212,6 +229,10 @@ impl ParserContext {
Ok(base)
}
///
/// Returns a [`PackageOrPackages`] AST node if the next tokens represent a valid package import
/// with accesses.
///
pub fn parse_package_or_packages(&mut self) -> SyntaxResult<PackageOrPackages> {
let package_name = self.parse_package_name()?;
self.expect(Token::Dot)?;
@ -232,6 +253,9 @@ impl ParserContext {
}
}
///
/// Returns a [`ImportStatement`] AST node if the next tokens represent an import statement.
///
pub fn parse_import(&mut self) -> SyntaxResult<ImportStatement> {
self.expect(Token::Import)?;
let package_or_packages = self.parse_package_or_packages()?;
@ -242,6 +266,10 @@ impl ParserContext {
})
}
///
/// Returns a [`CircuitMember`] AST node if the next tokens represent a circuit member variable
/// or circuit member function.
///
pub fn parse_circuit_member(&mut self) -> SyntaxResult<CircuitMember> {
let peeked = &self.peek()?.token;
if peeked == &Token::Function || peeked == &Token::At {
@ -257,6 +285,10 @@ impl ParserContext {
}
}
///
/// Returns an [`(Identifier, Circuit)`] tuple of AST nodes if the next tokens represent a
/// circuit name and definition statement.
///
pub fn parse_circuit(&mut self) -> SyntaxResult<(Identifier, Circuit)> {
self.expect(Token::Circuit)?;
let name = self.expect_ident()?;
@ -272,6 +304,9 @@ impl ParserContext {
}))
}
///
/// Returns a [`FunctionInput`] AST node if the next tokens represent a function parameter.
///
pub fn parse_function_input(&mut self) -> SyntaxResult<FunctionInput> {
if let Some(token) = self.eat(Token::Input) {
return Ok(FunctionInput::InputKeyword(InputKeyword { span: token.span }));
@ -308,6 +343,10 @@ impl ParserContext {
}))
}
///
/// Returns an [`(Identifier, Function)`] AST node if the next tokens represent a function name
/// and function definition.
///
pub fn parse_function(&mut self) -> SyntaxResult<(Identifier, Function)> {
let mut annotations = vec![];
while self.peek()?.token == Token::At {

View File

@ -35,6 +35,10 @@ const ASSIGN_TOKENS: &[Token] = &[
];
impl ParserContext {
///
/// Returns an [`Identifier`] AST node if the given [`Expression`] AST node evaluates to an
/// identifier access. The access is stored in the given accesses.
///
pub fn construct_assignee_access(expr: Expression, accesses: &mut Vec<AssigneeAccess>) -> SyntaxResult<Identifier> {
let identifier;
match expr {
@ -63,6 +67,9 @@ impl ParserContext {
Ok(identifier)
}
///
/// Returns an [`Assignee`] AST node from the given [`Expression`] AST node with accesses.
///
pub fn construct_assignee(expr: Expression) -> SyntaxResult<Assignee> {
let expr_span = expr.span().clone();
let mut accesses = vec![];
@ -75,6 +82,9 @@ impl ParserContext {
})
}
///
/// Returns a [`Statement`] AST node if the next tokens represent a statement.
///
pub fn parse_statement(&mut self) -> SyntaxResult<Statement> {
match &self.peek()?.token {
Token::Return => Ok(Statement::Return(self.parse_return_statement()?)),
@ -124,6 +134,9 @@ impl ParserContext {
}
}
///
/// Returns a [`Block`] AST node if the next tokens represent a block of statements.
///
pub fn parse_block(&mut self) -> SyntaxResult<Block> {
let start = self.expect(Token::LeftCurly)?;
let mut statements = vec![];
@ -142,6 +155,9 @@ impl ParserContext {
}
}
///
/// Returns a [`ReturnStatement`] AST node if the next tokens represent a return statement.
///
pub fn parse_return_statement(&mut self) -> SyntaxResult<ReturnStatement> {
let start = self.expect(Token::Return)?;
let expr = self.parse_expression()?;
@ -153,6 +169,9 @@ impl ParserContext {
})
}
///
/// Returns a [`ConditionalStatement`] AST node if the next tokens represent a conditional statement.
///
pub fn parse_conditional_statement(&mut self) -> SyntaxResult<ConditionalStatement> {
let start = self.expect(Token::If)?;
self.fuzzy_struct_state = true;
@ -173,6 +192,9 @@ impl ParserContext {
})
}
///
/// Returns an [`IterationStatement`] AST node if the next tokens represent an iteration statement.
///
pub fn parse_for_statement(&mut self) -> SyntaxResult<IterationStatement> {
let start_span = self.expect(Token::For)?;
let ident = self.expect_ident()?;
@ -193,6 +215,9 @@ impl ParserContext {
})
}
///
/// Returns a [`FormattedString`] AST node if the next tokens represent a formatted string.
///
pub fn parse_formatted_string(&mut self) -> SyntaxResult<FormattedString> {
let start_span;
let parts = match self.expect_any()? {
@ -224,6 +249,9 @@ impl ParserContext {
})
}
///
/// Returns a [`ConsoleStatement`] AST node if the next tokens represent a console statement.
///
pub fn parse_console_statement(&mut self) -> SyntaxResult<ConsoleStatement> {
let keyword = self.expect(Token::Console)?;
self.expect(Token::Dot)?;
@ -254,6 +282,10 @@ impl ParserContext {
})
}
///
/// Returns a [`VariableName`] AST node if the next tokens represent a variable name with
/// valid keywords.
///
pub fn parse_variable_name(&mut self) -> SyntaxResult<VariableName> {
let mutable = self.eat(Token::Mut);
let name = self.expect_ident()?;
@ -267,6 +299,9 @@ impl ParserContext {
})
}
///
/// Returns a [`DefinitionStatement`] AST node if the next tokens represent a definition statement.
///
pub fn parse_definition_statement(&mut self) -> SyntaxResult<DefinitionStatement> {
let declare = self.expect_oneof(&[Token::Let, Token::Const])?;
let mut variable_names = vec![];

View File

@ -34,6 +34,9 @@ const TYPE_TOKENS: &[Token] = &[
];
impl ParserContext {
///
/// Returns a [`IntegerType`] AST node if the given token is a supported integer type, or [`None`].
///
pub fn token_to_int_type(token: Token) -> Option<IntegerType> {
Some(match token {
Token::I8 => IntegerType::I8,
@ -50,6 +53,9 @@ impl ParserContext {
})
}
///
/// Returns an [`ArrayDimensions`] AST node if the next tokens represent dimensions for an array type.
///
pub fn parse_array_dimensions(&mut self) -> SyntaxResult<ArrayDimensions> {
Ok(if let Some((int, _)) = self.eat_int() {
ArrayDimensions(vec![int])
@ -72,6 +78,10 @@ impl ParserContext {
})
}
///
/// Returns a [`(Type, Span)`] tuple of AST nodes if the next token represents a type. Also
/// returns the span of the parsed token.
///
pub fn parse_type(&mut self) -> SyntaxResult<(Type, Span)> {
Ok(if let Some(token) = self.eat(Token::BigSelf) {
(Type::SelfType, token.span)

View File

@ -20,6 +20,10 @@ use serde::{Deserialize, Serialize};
use std::fmt;
///
/// Returns a reference to bytes from the given input if the given string is equal to the bytes,
/// otherwise returns [`None`].
///
fn eat<'a>(input: &'a [u8], wanted: &str) -> Option<&'a [u8]> {
let wanted = wanted.as_bytes();
if input.len() < wanted.len() {
@ -31,6 +35,10 @@ fn eat<'a>(input: &'a [u8], wanted: &str) -> Option<&'a [u8]> {
None
}
///
/// Returns a reference to the bytes of an identifier and the remaining bytes from the given input.
/// Returns [`None`] if the bytes do not represent an identifier.
///
fn eat_identifier(input: &[u8]) -> Option<(&[u8], &[u8])> {
if input.is_empty() {
return None;
@ -49,6 +57,10 @@ fn eat_identifier(input: &[u8]) -> Option<(&[u8], &[u8])> {
}
impl Token {
///
/// Returns a reference to the remaining bytes and the bytes of a number from the given input.
/// Returns [`None`] if the bytes do not represent a number.
///
fn gobble_int(input: &[u8]) -> (&[u8], Option<Token>) {
if input.is_empty() {
return (input, None);
@ -80,6 +92,10 @@ impl Token {
)
}
///
/// Returns a reference to the remaining bytes and the bytes of a [`Token`] from the given input.
/// Returns [`None`] if the bytes do not represent a token.
///
pub(crate) fn gobble(input: &[u8]) -> (&[u8], Option<Token>) {
if input.is_empty() {
return (input, None);
@ -365,7 +381,11 @@ impl fmt::Debug for SpannedToken {
}
}
pub(crate) fn validate_address(address: &str) -> bool {
///
/// Returns true if the given string looks like Aleo address.
/// This method DOES NOT check if the address is valid on-chain.
///
pub(crate) fn check_address(address: &str) -> bool {
// "aleo1" (LOWERCASE_LETTER | ASCII_DIGIT){58}
if !address.starts_with("aleo1") || address.len() != 63 {
return false;

View File

@ -64,7 +64,7 @@ pub(crate) fn tokenize(path: &str, source: &str) -> Result<Vec<SpannedToken>, To
span.line_stop = line_no;
}
Token::AddressLit(address) => {
if !validate_address(address) {
if !check_address(address) {
return Err(TokenError::invalid_address_lit(address, &span));
}
}

View File

@ -17,6 +17,7 @@
use serde::{Deserialize, Serialize};
use std::fmt;
/// Parts of a formatted string for logging to the console.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum FormattedStringPart {
Const(String),
@ -32,6 +33,7 @@ impl fmt::Display for FormattedStringPart {
}
}
/// Represents all valid Leo syntax tokens.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum Token {
FormattedString(Vec<FormattedStringPart>),
@ -129,6 +131,7 @@ pub enum Token {
Question,
}
/// Represents all valid Leo keyword tokens.
pub const KEYWORD_TOKENS: &[Token] = &[
Token::Address,
Token::As,
@ -167,6 +170,9 @@ pub const KEYWORD_TOKENS: &[Token] = &[
];
impl Token {
///
/// Returns `true` if the `self` token equals a Leo keyword.
///
pub fn is_keyword(&self) -> bool {
KEYWORD_TOKENS.iter().any(|x| x == self)
}