Merge pull request #1748 from AleoHQ/parser-refactor

Parser: store current + previous tokens
This commit is contained in:
Collin Chin 2022-04-19 12:55:46 -07:00 committed by GitHub
commit f002637d73
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 496 additions and 640 deletions

View File

@ -18,7 +18,7 @@ use serde::{Deserialize, Serialize};
use std::fmt;
/// The sort of bindings to introduce, either `let` or `const`.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum Declare {
/// This is a `const` binding.
Const,

View File

@ -18,56 +18,103 @@ use crate::{assert_no_whitespace, tokenizer::*, Token, KEYWORD_TOKENS};
use leo_ast::*;
use leo_errors::emitter::Handler;
use leo_errors::{LeoError, ParserError, Result};
use leo_errors::{ParserError, Result};
use leo_span::{Span, Symbol};
use std::{borrow::Cow, unreachable};
use std::fmt::Display;
use std::mem;
/// Stores a program in tokenized format plus additional context.
/// May be converted into a [`Program`] AST by parsing all tokens.
pub struct ParserContext<'a> {
#[allow(dead_code)]
/// Handler used to side-channel emit errors from the parser.
pub(crate) handler: &'a Handler,
/// All un-bumped tokens.
tokens: Vec<SpannedToken>,
end_span: Span,
/// The current token, i.e., if `p.tokens = ['3', *, '4']`,
/// then after a `p.bump()`, we'll have `p.token = '3'`.
pub(crate) token: SpannedToken,
/// The previous token, i.e., if `p.tokens = ['3', *, '4']`,
/// then after two `p.bump()`s, we'll have `p.token = '*'` and `p.prev_token = '3'`.
pub(crate) prev_token: SpannedToken,
// true if parsing an expression for if and loop statements -- means circuit inits are not legal
pub(crate) disallow_circuit_construction: bool,
}
impl Iterator for ParserContext<'_> {
type Item = SpannedToken;
fn next(&mut self) -> Option<SpannedToken> {
self.bump()
}
/// HACK(Centril): Place to store a dummy EOF.
/// Exists to appease borrow checker for now.
dummy_eof: SpannedToken,
}
impl<'a> ParserContext<'a> {
///
/// Returns a new [`ParserContext`] type given a vector of tokens.
///
pub fn new(handler: &'a Handler, mut tokens: Vec<SpannedToken>) -> Self {
// Strip out comments.
tokens.retain(|x| !matches!(x.token, Token::CommentLine(_) | Token::CommentBlock(_)));
// For performance we reverse so that we get cheap `.pop()`s.
tokens.reverse();
// todo: performance optimization here: drain filter
tokens = tokens
.into_iter()
.filter(|x| !matches!(x.token, Token::CommentLine(_) | Token::CommentBlock(_)))
.collect();
Self {
let token = SpannedToken::dummy();
let dummy_eof = SpannedToken {
token: Token::Eof,
span: token.span.clone(),
};
let mut p = Self {
handler,
end_span: tokens
.iter()
.find(|x| !x.span.content.trim().is_empty())
.map(|x| x.span.clone())
.unwrap_or_default(),
tokens,
disallow_circuit_construction: false,
}
dummy_eof,
prev_token: token.clone(),
token,
tokens,
};
p.bump();
p
}
/// Returns the current token if there is one.
pub fn peek_option(&self) -> Option<&SpannedToken> {
self.tokens.last()
/// Advances the parser cursor by one token.
///
/// So e.g., if we had `previous = A`, `current = B`, and `tokens = [C, D, E]`,
/// then after `p.bump()`, the state will be `previous = B`, `current = C`, and `tokens = [D, E]`.
pub fn bump(&mut self) {
// Probably a bug (infinite loop), as the previous token was already EOF.
if let Token::Eof = self.prev_token.token {
panic!("attempted to bump the parser past EOF (may be stuck in a loop)");
}
// Extract next token, or `Eof` if there was none.
let next_token = self.tokens.pop().unwrap_or_else(|| SpannedToken {
token: Token::Eof,
span: self.token.span.clone(),
});
// Set the new token.
self.prev_token = mem::replace(&mut self.token, next_token);
}
/// Checks whether the current token is `token`.
pub fn check(&self, tok: &Token) -> bool {
&self.token.token == tok
}
/// Removes the next token if it exists and returns it, or [None] if
/// the next token does not exist.
pub fn eat(&mut self, token: &Token) -> bool {
self.check(token).then(|| self.bump()).is_some()
}
/// Look-ahead `dist` tokens of `self.token` and get access to that token there.
/// When `dist == 0` then the current token is looked at.
pub fn look_ahead<'s, R>(&'s self, dist: usize, looker: impl FnOnce(&'s SpannedToken) -> R) -> R {
if dist == 0 {
return looker(&self.token);
}
let idx = match self.tokens.len().checked_sub(dist) {
None => return looker(&self.dummy_eof),
Some(idx) => idx,
};
looker(self.tokens.get(idx).unwrap_or_else(|| &self.dummy_eof))
}
/// Emit the error `err`.
@ -75,302 +122,153 @@ impl<'a> ParserContext<'a> {
self.handler.emit_err(err.into());
}
///
/// Returns an unexpected end of function [`SyntaxError`].
///
pub fn eof(&self) -> LeoError {
ParserError::unexpected_eof(&self.end_span).into()
}
///
/// Returns a reference to the next SpannedToken or error if it does not exist.
///
pub fn peek_next(&self) -> Result<&SpannedToken> {
self.tokens.get(self.tokens.len() - 2).ok_or_else(|| self.eof())
}
///
/// Returns a reference to the current SpannedToken or error if it does not exist.
///
pub fn peek(&self) -> Result<&SpannedToken> {
self.tokens.last().ok_or_else(|| self.eof())
}
///
/// Returns a reference to the next Token.
///
pub fn peek_token(&self) -> Cow<'_, Token> {
self.peek_option()
.map(|x| &x.token)
.map(Cow::Borrowed)
.unwrap_or_else(|| Cow::Owned(Token::Eof))
}
///
/// Returns true if the next token exists.
///
pub fn has_next(&self) -> bool {
!self.tokens.is_empty()
!matches!(self.token.token, Token::Eof)
}
/// Advances the current token.
pub fn bump(&mut self) -> Option<SpannedToken> {
self.tokens.pop()
/// At the previous token, return and make an identifier with `name`.
fn mk_ident_prev(&self, name: Symbol) -> Identifier {
let span = self.prev_token.span.clone();
Identifier { name, span }
}
///
/// Removes the next token if it exists and returns it, or [None] if
/// the next token does not exist.
///
pub fn eat(&mut self, token: Token) -> Option<SpannedToken> {
if let Some(SpannedToken { token: inner, .. }) = self.peek_option() {
if &token == inner {
return self.bump();
}
}
None
}
///
/// Appends a token to the back of the vector.
///
pub fn backtrack(&mut self, token: SpannedToken) {
self.tokens.push(token);
}
///
/// Removes the next token if it is a [`Token::Ident(_)`] and returns it, or [None] if
/// the next token is not a [`Token::Ident(_)`] or if the next token does not exist.
///
/// Eats the next token if its an identifier and returns it.
pub fn eat_identifier(&mut self) -> Option<Identifier> {
if let Some(SpannedToken {
token: Token::Ident(_), ..
}) = self.peek_option()
{
if let SpannedToken {
token: Token::Ident(name),
span,
} = self.bump().unwrap()
{
return Some(Identifier { name, span });
} else {
unreachable!("eat_identifier_ shouldn't produce this")
}
if let Token::Ident(name) = self.token.token {
self.bump();
return Some(self.mk_ident_prev(name));
}
None
}
/// Expects an identifier, "loosely" speaking, or errors.
///
/// This could be either a keyword, integer, or a normal identifier.
pub fn expect_loose_identifier(&mut self) -> Result<Identifier> {
if self.eat_any(KEYWORD_TOKENS) {
return Ok(self.mk_ident_prev(self.prev_token.token.keyword_to_symbol().unwrap()));
}
if let Some(int) = self.eat_int() {
return Ok(self.mk_ident_prev(Symbol::intern(&int.value)));
}
self.expect_ident()
}
/// Expects an [`Identifier`], or errors.
pub fn expect_ident(&mut self) -> Result<Identifier> {
self.eat_identifier()
.ok_or_else(|| ParserError::unexpected_str(&self.token.token, "ident", &self.token.span).into())
}
///
/// Returns a reference to the next token if it is a [`GroupCoordinate`], or [None] if
/// the next token is not a [`GroupCoordinate`].
///
fn peek_group_coordinate(&self, i: &mut usize) -> Option<GroupCoordinate> {
*i = i.checked_sub(1)?;
let token = self.tokens.get(*i)?;
Some(match &token.token {
Token::Add => GroupCoordinate::SignHigh,
Token::Minus => match self.tokens.get(i.checked_sub(1)?) {
Some(SpannedToken {
token: Token::Int(value),
span,
}) => {
*i -= 1;
GroupCoordinate::Number(format!("-{}", value), span.clone())
}
_ => GroupCoordinate::SignLow,
},
Token::Underscore => GroupCoordinate::Inferred,
Token::Int(value) => GroupCoordinate::Number(value.clone(), token.span.clone()),
_ => return None,
})
fn peek_group_coordinate(&self, dist: &mut usize) -> Option<GroupCoordinate> {
let (advanced, gc) = self.look_ahead(*dist, |t0| match &t0.token {
Token::Add => Some((1, GroupCoordinate::SignHigh)),
Token::Minus => self.look_ahead(*dist + 1, |t1| match &t1.token {
Token::Int(value) => Some((2, GroupCoordinate::Number(format!("-{}", value), t1.span.clone()))),
_ => Some((1, GroupCoordinate::SignLow)),
}),
Token::Underscore => Some((1, GroupCoordinate::Inferred)),
Token::Int(value) => Some((1, GroupCoordinate::Number(value.clone(), t0.span.clone()))),
_ => None,
})?;
*dist += advanced;
Some(gc)
}
/// Returns `true` if the next token is Function or if it is a Const followed by Function.
/// Returns `false` otherwise.
pub fn peek_is_function(&self) -> Result<bool> {
let first = &self.peek()?.token;
let next = if self.tokens.len() >= 2 {
&self.peek_next()?.token
} else {
return Ok(false);
};
Ok(matches!(
(first, next),
pub fn peek_is_function(&self) -> bool {
matches!(
(&self.token.token, self.look_ahead(1, |t| &t.token)),
(Token::Function, _) | (Token::Const, Token::Function)
))
)
}
///
/// Removes the next two tokens if they are a pair of [`GroupCoordinate`] and returns them,
/// or [None] if the next token is not a [`GroupCoordinate`].
///
pub fn eat_group_partial(&mut self) -> Option<Result<(GroupCoordinate, GroupCoordinate, Span)>> {
let mut i = self.tokens.len();
let start_span = self.tokens.get(i.checked_sub(1)?)?.span.clone();
let first = self.peek_group_coordinate(&mut i)?;
i = i.checked_sub(1)?;
if !matches!(
self.tokens.get(i),
Some(SpannedToken {
token: Token::Comma,
..
})
) {
return None;
pub fn eat_group_partial(&mut self) -> Option<Result<GroupTuple>> {
assert!(self.check(&Token::LeftParen)); // `(`.
// Peek at first gc.
let start_span = &self.token.span;
let mut dist = 1; // 0th is `(` so 1st is first gc's start.
let first_gc = self.peek_group_coordinate(&mut dist)?;
let check_ahead = |d, token: &_| self.look_ahead(d, |t| (&t.token == token).then(|| t.span.clone()));
// Peek at `,`.
check_ahead(dist, &Token::Comma)?;
dist += 1; // Standing at `,` so advance one for next gc's start.
// Peek at second gc.
let second_gc = self.peek_group_coordinate(&mut dist)?;
// Peek at `)`.
let right_paren_span = check_ahead(dist, &Token::RightParen)?;
dist += 1; // Standing at `)` so advance one for 'group'.
// Peek at `group`.
let end_span = check_ahead(dist, &Token::Group)?;
dist += 1; // Standing at `)` so advance one for 'group'.
let gt = GroupTuple {
span: start_span + &end_span,
x: first_gc,
y: second_gc,
};
// Eat everything so that this isn't just peeking.
for _ in 0..dist {
self.bump();
}
let second = self.peek_group_coordinate(&mut i)?;
i = i.checked_sub(1)?;
let right_paren_span = if let Some(SpannedToken {
token: Token::RightParen,
span,
}) = self.tokens.get(i)
{
span.clone()
} else {
return None;
};
i = i.checked_sub(1)?;
let end_span = if let Some(SpannedToken {
token: Token::Group,
span,
}) = self.tokens.get(i)
{
span.clone()
} else {
return None;
};
self.tokens.drain(i..);
if let Err(e) = assert_no_whitespace(
&right_paren_span,
&end_span,
&format!("({},{})", first, second),
"group",
) {
if let Err(e) = assert_no_whitespace(&right_paren_span, &end_span, &format!("({},{})", gt.x, gt.y), "group") {
return Some(Err(e));
}
Some(Ok((first, second, start_span + end_span)))
Some(Ok(gt))
}
///
/// Removes the next token if it is a [`Token::Int(_)`] and returns it, or [None] if
/// the next token is not a [`Token::Int(_)`] or if the next token does not exist.
///
pub fn eat_int(&mut self) -> Option<(PositiveNumber, Span)> {
if let Some(SpannedToken {
token: Token::Int(_), ..
}) = self.peek_option()
{
if let SpannedToken {
token: Token::Int(value),
span,
} = self.bump().unwrap()
{
return Some((PositiveNumber { value }, span));
} else {
unreachable!("eat_int_ shouldn't produce this")
}
/// Eats the next token if it is a [`Token::Int(_)`] and returns it.
pub fn eat_int(&mut self) -> Option<PositiveNumber> {
if let Token::Int(value) = &self.token.token {
let value = value.clone();
self.bump();
return Some(PositiveNumber { value });
}
None
}
///
/// Removes the next token if it exists and returns it, or [None] if
/// the next token does not exist.
///
pub fn eat_any(&mut self, token: &[Token]) -> Option<SpannedToken> {
if let Some(SpannedToken { token: inner, .. }) = self.peek_option() {
if token.iter().any(|x| x == inner) {
return self.bump();
}
}
None
/// Eats any of the given `tokens`, returning `true` if anything was eaten.
pub fn eat_any(&mut self, tokens: &[Token]) -> bool {
tokens.iter().any(|x| self.check(x)).then(|| self.bump()).is_some()
}
///
/// Returns the span of the next token if it is equal to the given [`Token`], or error.
///
pub fn expect(&mut self, token: Token) -> Result<Span> {
if let Some(SpannedToken { token: inner, span }) = self.peek_option() {
if &token == inner {
Ok(self.bump().unwrap().span)
} else {
Err(ParserError::unexpected(inner, token, span).into())
}
/// Returns an unexpected error at the current token.
fn unexpected<T>(&self, expected: impl Display) -> Result<T> {
Err(ParserError::unexpected(&self.token.token, expected, &self.token.span).into())
}
/// Eats the expected `token`, or errors.
pub fn expect(&mut self, token: &Token) -> Result<Span> {
if self.eat(token) {
Ok(self.prev_token.span.clone())
} else {
Err(self.eof())
self.unexpected(token)
}
}
///
/// Returns the span of the next token if it is equal to one of the given [`Token`]s, or error.
///
pub fn expect_oneof(&mut self, token: &[Token]) -> Result<SpannedToken> {
if let Some(SpannedToken { token: inner, span }) = self.peek_option() {
if token.iter().any(|x| x == inner) {
Ok(self.bump().unwrap())
} else {
return Err(ParserError::unexpected(
inner,
token.iter().map(|x| format!("'{}'", x)).collect::<Vec<_>>().join(", "),
span,
)
.into());
}
/// Eats one of the expected `tokens`, or errors.
pub fn expect_any(&mut self, tokens: &[Token]) -> Result<Span> {
if self.eat_any(tokens) {
Ok(self.prev_token.span.clone())
} else {
Err(self.eof())
}
}
///
/// Returns the [`Identifier`] of the next token if it is a keyword,
/// [`Token::Int(_)`], or an [`Identifier`], or error.
///
pub fn expect_loose_identifier(&mut self) -> Result<Identifier> {
if let Some(token) = self.eat_any(KEYWORD_TOKENS) {
return Ok(Identifier {
name: token.token.keyword_to_symbol().unwrap(),
span: token.span,
});
}
if let Some((int, span)) = self.eat_int() {
let name = Symbol::intern(&int.value);
return Ok(Identifier { name, span });
}
self.expect_ident()
}
/// Returns the [`Identifier`] of the next token if it is an [`Identifier`], or error.
pub fn expect_ident(&mut self) -> Result<Identifier> {
if let Some(SpannedToken { token: inner, span }) = self.peek_option() {
if let Token::Ident(_) = inner {
if let SpannedToken {
token: Token::Ident(name),
span,
} = self.bump().unwrap()
{
Ok(Identifier { name, span })
} else {
unreachable!("expect_ident_ shouldn't produce this")
}
} else {
Err(ParserError::unexpected_str(inner, "ident", span).into())
}
} else {
Err(self.eof())
}
}
///
/// Returns the next token if it exists or return end of function.
///
pub fn expect_any(&mut self) -> Result<SpannedToken> {
if let Some(x) = self.tokens.pop() {
Ok(x)
} else {
Err(self.eof())
self.unexpected(tokens.iter().map(|x| format!("'{}'", x)).collect::<Vec<_>>().join(", "))
}
}
@ -389,16 +287,15 @@ impl<'a> ParserContext<'a> {
let mut trailing = false;
// Parse opening delimiter.
let open_span = self.expect(open)?;
let open_span = self.expect(&open)?;
while self.peek()?.token != close {
while !self.check(&close) {
// Parse the element. We allow inner parser recovery through the `Option`.
if let Some(elem) = inner(self)? {
list.push(elem);
}
// Parse the separator.
if self.eat(sep.clone()).is_none() {
if !self.eat(&sep) {
trailing = false;
break;
}
@ -407,9 +304,9 @@ impl<'a> ParserContext<'a> {
}
// Parse closing delimiter.
let close_span = self.expect(close)?;
let span = open_span + self.expect(&close)?;
Ok((list, trailing, open_span + close_span))
Ok((list, trailing, span))
}
/// Parse a list separated by `,` and delimited by parens.
@ -422,6 +319,6 @@ impl<'a> ParserContext<'a> {
/// Returns true if the current token is `(`.
pub(super) fn peek_is_left_par(&self) -> bool {
matches!(self.peek_option().map(|t| &t.token), Some(Token::LeftParen))
matches!(self.token.token, Token::LeftParen)
}
}

View File

@ -55,20 +55,18 @@ impl ParserContext<'_> {
result
}
///
/// Returns an [`Expression`] AST node if the next tokens represent
/// a ternary expression. May or may not include circuit init expressions.
///
/// Otherwise, tries to parse the next token using [`parse_disjunctive_expression`].
///
pub fn parse_conditional_expression(&mut self) -> Result<Expression> {
// Try to parse the next expression. Try BinaryOperation::Or.
let mut expr = self.parse_disjunctive_expression()?;
// Parse the rest of the ternary expression.
if self.eat(Token::Question).is_some() {
if self.eat(&Token::Question) {
let if_true = self.parse_expression()?;
self.expect(Token::Colon)?;
self.expect(&Token::Colon)?;
let if_false = self.parse_conditional_expression()?;
expr = Expression::Ternary(TernaryExpression {
span: expr.span() + if_false.span(),
@ -94,12 +92,11 @@ impl ParserContext<'_> {
/// The `token` is translated to `op` in the AST.
fn parse_bin_expr(
&mut self,
token: Token,
op: BinaryOperation,
tokens: &[Token],
mut f: impl FnMut(&mut Self) -> Result<Expression>,
) -> Result<Expression> {
let mut expr = f(self)?;
while self.eat(token.clone()).is_some() {
while let Some(op) = self.eat_bin_op(tokens) {
expr = Self::bin_expr(expr, f(self)?, op);
}
Ok(expr)
@ -110,7 +107,7 @@ impl ParserContext<'_> {
///
/// Otherwise, tries to parse the next token using [`parse_conjunctive_expression`].
pub fn parse_disjunctive_expression(&mut self) -> Result<Expression> {
self.parse_bin_expr(Token::Or, BinaryOperation::Or, Self::parse_conjunctive_expression)
self.parse_bin_expr(&[Token::Or], Self::parse_conjunctive_expression)
}
/// Returns an [`Expression`] AST node if the next tokens represent a
@ -118,7 +115,27 @@ impl ParserContext<'_> {
///
/// Otherwise, tries to parse the next token using [`parse_equality_expression`].
pub fn parse_conjunctive_expression(&mut self) -> Result<Expression> {
self.parse_bin_expr(Token::And, BinaryOperation::And, Self::parse_equality_expression)
self.parse_bin_expr(&[Token::And], Self::parse_equality_expression)
}
/// Eats one of binary operators matching any in `tokens`.
fn eat_bin_op(&mut self, tokens: &[Token]) -> Option<BinaryOperation> {
self.eat_any(tokens).then(|| match &self.prev_token.token {
Token::Eq => BinaryOperation::Eq,
Token::NotEq => BinaryOperation::Ne,
Token::Lt => BinaryOperation::Lt,
Token::LtEq => BinaryOperation::Le,
Token::Gt => BinaryOperation::Gt,
Token::GtEq => BinaryOperation::Ge,
Token::Add => BinaryOperation::Add,
Token::Minus => BinaryOperation::Sub,
Token::Mul => BinaryOperation::Mul,
Token::Div => BinaryOperation::Div,
Token::Or => BinaryOperation::Or,
Token::And => BinaryOperation::And,
Token::Exp => BinaryOperation::Pow,
_ => unreachable!("`eat_bin_op` shouldn't produce this"),
})
}
/// Returns an [`Expression`] AST node if the next tokens represent a
@ -127,13 +144,8 @@ impl ParserContext<'_> {
/// Otherwise, tries to parse the next token using [`parse_ordering_expression`].
pub fn parse_equality_expression(&mut self) -> Result<Expression> {
let mut expr = self.parse_ordering_expression()?;
if let Some(SpannedToken { token: op, .. }) = self.eat_any(&[Token::Eq, Token::NotEq]) {
if let Some(op) = self.eat_bin_op(&[Token::Eq, Token::NotEq]) {
let right = self.parse_ordering_expression()?;
let op = match op {
Token::Eq => BinaryOperation::Eq,
Token::NotEq => BinaryOperation::Ne,
_ => unreachable!("parse_equality_expression_ shouldn't produce this"),
};
expr = Self::bin_expr(expr, right, op);
}
Ok(expr)
@ -144,20 +156,10 @@ impl ParserContext<'_> {
///
/// Otherwise, tries to parse the next token using [`parse_shift_expression`].
pub fn parse_ordering_expression(&mut self) -> Result<Expression> {
let mut expr = self.parse_additive_expression()?;
while let Some(SpannedToken { token: op, .. }) = self.eat_any(&[Token::Lt, Token::LtEq, Token::Gt, Token::GtEq])
{
let right = self.parse_additive_expression()?;
let op = match op {
Token::Lt => BinaryOperation::Lt,
Token::LtEq => BinaryOperation::Le,
Token::Gt => BinaryOperation::Gt,
Token::GtEq => BinaryOperation::Ge,
_ => unreachable!("parse_ordering_expression_ shouldn't produce this"),
};
expr = Self::bin_expr(expr, right, op);
}
Ok(expr)
self.parse_bin_expr(
&[Token::Lt, Token::LtEq, Token::Gt, Token::GtEq],
Self::parse_additive_expression,
)
}
/// Returns an [`Expression`] AST node if the next tokens represent a
@ -165,17 +167,7 @@ impl ParserContext<'_> {
///
/// Otherwise, tries to parse the next token using [`parse_mul_div_pow_expression`].
pub fn parse_additive_expression(&mut self) -> Result<Expression> {
let mut expr = self.parse_multiplicative_expression()?;
while let Some(SpannedToken { token: op, .. }) = self.eat_any(&[Token::Add, Token::Minus]) {
let right = self.parse_multiplicative_expression()?;
let op = match op {
Token::Add => BinaryOperation::Add,
Token::Minus => BinaryOperation::Sub,
_ => unreachable!("parse_additive_expression_ shouldn't produce this"),
};
expr = Self::bin_expr(expr, right, op);
}
Ok(expr)
self.parse_bin_expr(&[Token::Add, Token::Minus], Self::parse_multiplicative_expression)
}
/// Returns an [`Expression`] AST node if the next tokens represent a
@ -183,17 +175,7 @@ impl ParserContext<'_> {
///
/// Otherwise, tries to parse the next token using [`parse_exponential_expression`].
pub fn parse_multiplicative_expression(&mut self) -> Result<Expression> {
let mut expr = self.parse_exponential_expression()?;
while let Some(SpannedToken { token: op, .. }) = self.eat_any(&[Token::Mul, Token::Div]) {
let right = self.parse_exponential_expression()?;
let op = match op {
Token::Mul => BinaryOperation::Mul,
Token::Div => BinaryOperation::Div,
_ => unreachable!("parse_multiplicative_expression_ shouldn't produce this"),
};
expr = Self::bin_expr(expr, right, op);
}
Ok(expr)
self.parse_bin_expr(&[Token::Mul, Token::Div], Self::parse_exponential_expression)
}
/// Returns an [`Expression`] AST node if the next tokens represent a
@ -203,121 +185,86 @@ impl ParserContext<'_> {
pub fn parse_exponential_expression(&mut self) -> Result<Expression> {
let mut expr = self.parse_unary_expression()?;
if self.eat(Token::Exp).is_some() {
if let Some(op) = self.eat_bin_op(&[Token::Exp]) {
let right = self.parse_exponential_expression()?;
expr = Self::bin_expr(expr, right, BinaryOperation::Pow);
expr = Self::bin_expr(expr, right, op);
}
Ok(expr)
}
///
/// Returns an [`Expression`] AST node if the next tokens represent a
/// unary not, negate, or bitwise not expression.
///
/// Otherwise, tries to parse the next token using [`parse_postfix_expression`].
///
pub fn parse_unary_expression(&mut self) -> Result<Expression> {
let mut ops = Vec::new();
while let Some(token) = self.eat_any(&[Token::Not, Token::Minus]) {
ops.push(token);
}
let mut inner = self.parse_postfix_expression()?;
for op in ops.into_iter().rev() {
let operation = match op.token {
while self.eat_any(&[Token::Not, Token::Minus]) {
let operation = match self.prev_token.token {
Token::Not => UnaryOperation::Not,
Token::Minus => UnaryOperation::Negate,
_ => unreachable!("parse_unary_expression_ shouldn't produce this"),
};
ops.push((operation, self.prev_token.span.clone()));
}
let mut inner = self.parse_postfix_expression()?;
for (op, op_span) in ops.into_iter().rev() {
inner = Expression::Unary(UnaryExpression {
span: &op.span + inner.span(),
op: operation,
span: &op_span + inner.span(),
op,
inner: Box::new(inner),
});
}
Ok(inner)
}
///
/// Returns an [`Expression`] AST node if the next tokens represent an
/// array access, circuit member access, function call, or static function call expression.
///
/// Otherwise, tries to parse the next token using [`parse_primary_expression`].
///
pub fn parse_postfix_expression(&mut self) -> Result<Expression> {
// We don't directly parse named-type's and Identifier's here as
// the ABNF states. Rather the primary expression already
// handle those. The ABNF is more specific for language reasons.
let mut expr = self.parse_primary_expression()?;
while let Some(token) = self.eat_any(&[Token::Dot, Token::LeftParen]) {
match token.token {
Token::Dot => {
let next = self.peek()?;
return Err(ParserError::unexpected_str(&next.token, "int or ident", &next.span).into());
}
Token::LeftParen => {
let mut arguments = Vec::new();
let end_span;
loop {
if let Some(end) = self.eat(Token::RightParen) {
end_span = end.span;
break;
}
arguments.push(self.parse_expression()?);
if self.eat(Token::Comma).is_none() {
end_span = self.expect(Token::RightParen)?;
break;
}
}
expr = Expression::Call(CallExpression {
span: expr.span() + &end_span,
function: Box::new(expr),
arguments,
});
}
_ => unreachable!("parse_postfix_expression_ shouldn't produce this"),
loop {
if self.eat(&Token::Dot) {
let curr = &self.token;
return Err(ParserError::unexpected_str(&curr.token, "int or ident", &curr.span).into());
}
if !self.check(&Token::LeftParen) {
break;
}
let (arguments, _, span) = self.parse_paren_comma_list(|p| p.parse_expression().map(Some))?;
expr = Expression::Call(CallExpression {
span: expr.span() + &span,
function: Box::new(expr),
arguments,
});
}
Ok(expr)
}
///
/// Returns an [`Expression`] AST node if the next tokens represent a
/// tuple initialization expression or an affine group literal.
///
pub fn parse_tuple_expression(&mut self, span: &Span) -> Result<Expression> {
if let Some((left, right, span)) = self.eat_group_partial().transpose()? {
pub fn parse_tuple_expression(&mut self) -> Result<Expression> {
if let Some(gt) = self.eat_group_partial().transpose()? {
return Ok(Expression::Value(ValueExpression::Group(Box::new(GroupValue::Tuple(
GroupTuple {
span,
x: left,
y: right,
},
gt,
)))));
}
let mut args = Vec::new();
let end_span;
loop {
let end = self.eat(Token::RightParen);
if let Some(end) = end {
end_span = end.span;
break;
}
let expr = self.parse_expression()?;
args.push(expr);
if self.eat(Token::Comma).is_none() {
end_span = self.expect(Token::RightParen)?;
break;
}
}
if args.len() == 1 {
Ok(args.remove(0))
let (mut tuple, trailing, span) = self.parse_paren_comma_list(|p| p.parse_expression().map(Some))?;
if !trailing && tuple.len() == 1 {
Ok(tuple.remove(0))
} else {
Err(ParserError::unexpected("A tuple expression.", "A valid expression.", &(span + &end_span)).into())
Err(ParserError::unexpected("A tuple expression.", "A valid expression.", &span).into())
}
}
///
/// Returns an [`Expression`] AST node if the next token is a primary expression:
/// - Literals: field, group, unsigned integer, signed integer, boolean, address
/// - Aggregate types: array, tuple
@ -325,38 +272,37 @@ impl ParserContext<'_> {
/// - self
///
/// Returns an expression error if the token cannot be matched.
///
pub fn parse_primary_expression(&mut self) -> Result<Expression> {
let SpannedToken { token, span } = self.expect_any()?;
if let Token::LeftParen = self.token.token {
return self.parse_tuple_expression();
}
let SpannedToken { token, span } = self.token.clone();
self.bump();
Ok(match token {
Token::Int(value) => {
let type_ = self.eat_any(INT_TYPES);
match type_ {
Some(SpannedToken {
token: Token::Field,
span: type_span,
}) => {
assert_no_whitespace(&span, &type_span, &value, "field")?;
Expression::Value(ValueExpression::Field(value, span + type_span))
let suffix_span = self.token.span.clone();
let full_span = &span + &suffix_span;
let assert_no_whitespace = |x| assert_no_whitespace(&span, &suffix_span, &value, x);
match self.eat_any(INT_TYPES).then(|| &self.prev_token.token) {
// Literal followed by `field`, e.g., `42field`.
Some(Token::Field) => {
assert_no_whitespace("field")?;
Expression::Value(ValueExpression::Field(value, full_span))
}
Some(SpannedToken {
token: Token::Group,
span: type_span,
}) => {
assert_no_whitespace(&span, &type_span, &value, "group")?;
Expression::Value(ValueExpression::Group(Box::new(GroupValue::Single(
value,
span + type_span,
))))
// Literal followed by `group`, e.g., `42group`.
Some(Token::Group) => {
assert_no_whitespace("group")?;
Expression::Value(ValueExpression::Group(Box::new(GroupValue::Single(value, full_span))))
}
Some(SpannedToken { token, span: type_span }) => {
assert_no_whitespace(&span, &type_span, &value, &token.to_string())?;
Expression::Value(ValueExpression::Integer(
Self::token_to_int_type(token).expect("unknown int type token"),
value,
span + type_span,
))
// Literal followed by other type suffix, e.g., `42u8`.
Some(suffix) => {
assert_no_whitespace(&suffix.to_string())?;
let int_ty = Self::token_to_int_type(suffix).expect("unknown int type token");
Expression::Value(ValueExpression::Integer(int_ty, value, full_span))
}
// Just literal and no suffix.
None => Expression::Value(ValueExpression::Implicit(value, span)),
}
}
@ -368,7 +314,6 @@ impl ParserContext<'_> {
span,
})),
Token::StringLit(value) => Expression::Value(ValueExpression::String(value, span)),
Token::LeftParen => self.parse_tuple_expression(&span)?,
Token::Ident(name) => {
let ident = Identifier { name, span };
Expression::Identifier(ident)

View File

@ -27,11 +27,10 @@ impl ParserContext<'_> {
let mut functions = IndexMap::new();
while self.has_next() {
let token = self.peek()?;
match &token.token {
Token::Ident(sym::test) => return Err(ParserError::test_function(&token.span).into()),
match &self.token.token {
Token::Ident(sym::test) => return Err(ParserError::test_function(&self.token.span).into()),
// Const functions share the first token with the global Const.
Token::Const if self.peek_is_function()? => {
Token::Const if self.peek_is_function() => {
let (id, function) = self.parse_function_declaration()?;
functions.insert(id, function);
}
@ -39,7 +38,7 @@ impl ParserContext<'_> {
let (id, function) = self.parse_function_declaration()?;
functions.insert(id, function);
}
_ => return Err(Self::unexpected_item(token).into()),
_ => return Err(Self::unexpected_item(&self.token).into()),
}
}
Ok(Program {
@ -65,12 +64,12 @@ impl ParserContext<'_> {
/// Returns a [`ParamMode`] AST node if the next tokens represent a function parameter mode.
///
pub fn parse_function_parameter_mode(&mut self) -> Result<ParamMode> {
let public = self.eat(Token::Public);
let constant = self.eat(Token::Constant);
let const_ = self.eat(Token::Const);
let public = self.eat(&Token::Public).then(|| self.prev_token.span.clone());
let constant = self.eat(&Token::Constant).then(|| self.prev_token.span.clone());
let const_ = self.eat(&Token::Const).then(|| self.prev_token.span.clone());
if const_.is_some() {
self.emit_err(ParserError::const_parameter_or_input(&const_.as_ref().unwrap().span));
if let Some(span) = &const_ {
self.emit_err(ParserError::const_parameter_or_input(span));
}
match (public, constant, const_) {
@ -79,10 +78,10 @@ impl ParserContext<'_> {
(None, None, None) => Ok(ParamMode::Private),
(Some(_), None, None) => Ok(ParamMode::Public),
(Some(m1), Some(m2), None) | (Some(m1), None, Some(m2)) | (None, Some(m1), Some(m2)) => {
Err(ParserError::inputs_multiple_variable_types_specified(&(m1.span + m2.span)).into())
Err(ParserError::inputs_multiple_variable_types_specified(&(m1 + m2)).into())
}
(Some(m1), Some(m2), Some(m3)) => {
Err(ParserError::inputs_multiple_variable_types_specified(&(m1.span + m2.span + m3.span)).into())
Err(ParserError::inputs_multiple_variable_types_specified(&(m1 + m2 + m3)).into())
}
}
}
@ -90,9 +89,9 @@ impl ParserContext<'_> {
///
/// Returns a [`FunctionInput`] AST node if the next tokens represent a function parameter.
///
pub fn parse_function_parameters(&mut self) -> Result<FunctionInput> {
pub fn parse_function_parameter(&mut self) -> Result<FunctionInput> {
let mode = self.parse_function_parameter_mode()?;
let mutable = self.eat(Token::Mut);
let mutable = self.eat(&Token::Mut).then(|| self.prev_token.clone());
let name = self.expect_ident()?;
@ -100,7 +99,7 @@ impl ParserContext<'_> {
self.emit_err(ParserError::mut_function_input(&(&mutable.span + &name.span)));
}
self.expect(Token::Colon)?;
self.expect(&Token::Colon)?;
let type_ = self.parse_type()?.0;
Ok(FunctionInput::Variable(FunctionInputVariable::new(
name.clone(),
@ -114,17 +113,17 @@ impl ParserContext<'_> {
/// and function definition.
pub fn parse_function_declaration(&mut self) -> Result<(Identifier, Function)> {
// Parse optional const modifier.
let const_ = self.eat(Token::Const).is_some();
let const_ = self.eat(&Token::Const);
// Parse `function IDENT`.
let start = self.expect(Token::Function)?;
let start = self.expect(&Token::Function)?;
let name = self.expect_ident()?;
// Parse parameters.
let (inputs, ..) = self.parse_paren_comma_list(|p| p.parse_function_parameters().map(Some))?;
let (inputs, ..) = self.parse_paren_comma_list(|p| p.parse_function_parameter().map(Some))?;
// Parse return type.
let output = if self.eat(Token::Arrow).is_some() {
let output = if self.eat(&Token::Arrow) {
Some(self.parse_type()?.0)
} else {
None

View File

@ -24,11 +24,10 @@ impl ParserContext<'_> {
let mut sections = Vec::new();
while self.has_next() {
let token = self.peek()?;
if matches!(token.token, Token::LeftSquare) {
if self.check(&Token::LeftSquare) {
sections.push(self.parse_section()?);
} else {
return Err(ParserError::unexpected_token(token.token.clone(), &token.span).into());
return Err(ParserError::unexpected_token(self.token.token.clone(), &self.token.span).into());
}
}
@ -42,17 +41,12 @@ impl ParserContext<'_> {
/// `
/// Returns [`Section`].
pub fn parse_section(&mut self) -> Result<Section> {
self.expect(Token::LeftSquare)?;
self.expect(&Token::LeftSquare)?;
let section = self.expect_ident()?;
self.expect(Token::RightSquare)?;
self.expect(&Token::RightSquare)?;
let mut definitions = Vec::new();
while let Some(SpannedToken {
token: Token::Const | Token::Constant | Token::Public | Token::Ident(_),
..
}) = self.peek_option()
{
while let Token::Const | Token::Constant | Token::Public | Token::Ident(_) = self.token.token {
definitions.push(self.parse_input_definition()?);
}
@ -70,11 +64,11 @@ impl ParserContext<'_> {
let mode = self.parse_function_parameter_mode()?;
let name = self.expect_ident()?;
self.expect(Token::Colon)?;
self.expect(&Token::Colon)?;
let (type_, span) = self.parse_type()?;
self.expect(Token::Assign)?;
self.expect(&Token::Assign)?;
let value = self.parse_primary_expression()?;
self.expect(Token::Semicolon)?;
self.expect(&Token::Semicolon)?;
Ok(Definition {
mode,

View File

@ -52,7 +52,7 @@ impl ParserContext<'_> {
/// Returns a [`Statement`] AST node if the next tokens represent a statement.
///
pub fn parse_statement(&mut self) -> Result<Statement> {
match &self.peek()?.token {
match &self.token.token {
Token::Return => Ok(Statement::Return(self.parse_return_statement()?)),
Token::If => Ok(Statement::Conditional(self.parse_conditional_statement()?)),
Token::For => Ok(Statement::Iteration(Box::new(self.parse_loop_statement()?))),
@ -69,21 +69,19 @@ impl ParserContext<'_> {
pub fn parse_assign_statement(&mut self) -> Result<Statement> {
let expr = self.parse_expression()?;
if let Some(operator) = self.eat_any(ASSIGN_TOKENS) {
if self.eat_any(ASSIGN_TOKENS) {
let value = self.parse_expression()?;
let assignee = Self::construct_assignee(expr)?;
self.expect(Token::Semicolon)?;
self.expect(&Token::Semicolon)?;
Ok(Statement::Assign(Box::new(AssignStatement {
span: &assignee.span + value.span(),
assignee,
operation: match operator.token {
Token::Assign => AssignOperation::Assign,
_ => unreachable!("parse_assign_statement_ shouldn't produce this"),
},
// Currently only `=` so this is alright.
operation: AssignOperation::Assign,
value,
})))
} else {
self.expect(Token::Semicolon)?;
self.expect(&Token::Semicolon)?;
Ok(Statement::Expression(ExpressionStatement {
span: expr.span().clone(),
expression: expr,
@ -91,50 +89,40 @@ impl ParserContext<'_> {
}
}
///
/// Returns a [`Block`] AST node if the next tokens represent a block of statements.
///
pub fn parse_block(&mut self) -> Result<Block> {
let start = self.expect(Token::LeftCurly)?;
let start = self.expect(&Token::LeftCurly)?;
let mut statements = Vec::new();
loop {
match self.eat(Token::RightCurly) {
None => {
statements.push(self.parse_statement()?);
}
Some(end) => {
return Ok(Block {
span: start + end.span,
statements,
});
}
if self.eat(&Token::RightCurly) {
return Ok(Block {
span: &start + &self.prev_token.span,
statements,
});
}
statements.push(self.parse_statement()?);
}
}
///
/// Returns a [`ReturnStatement`] AST node if the next tokens represent a return statement.
///
pub fn parse_return_statement(&mut self) -> Result<ReturnStatement> {
let start = self.expect(Token::Return)?;
let expr = self.parse_expression()?;
self.expect(Token::Semicolon)?;
Ok(ReturnStatement {
span: &start + expr.span(),
expression: expr,
})
let start = self.expect(&Token::Return)?;
let expression = self.parse_expression()?;
self.expect(&Token::Semicolon)?;
let span = &start + expression.span();
Ok(ReturnStatement { span, expression })
}
/// Returns a [`ConditionalStatement`] AST node if the next tokens represent a conditional statement.
pub fn parse_conditional_statement(&mut self) -> Result<ConditionalStatement> {
let start = self.expect(Token::If)?;
let start = self.expect(&Token::If)?;
self.disallow_circuit_construction = true;
let expr = self.parse_conditional_expression()?;
self.disallow_circuit_construction = false;
let body = self.parse_block()?;
let next = if self.eat(Token::Else).is_some() {
let next = if self.eat(&Token::Else) {
let s = self.parse_statement()?;
if !matches!(s, Statement::Block(_) | Statement::Conditional(_)) {
self.emit_err(ParserError::unexpected_statement(&s, "Block or Conditional", s.span()));
@ -154,14 +142,14 @@ impl ParserContext<'_> {
/// Returns an [`IterationStatement`] AST node if the next tokens represent an iteration statement.
pub fn parse_loop_statement(&mut self) -> Result<IterationStatement> {
let start_span = self.expect(Token::For)?;
let start_span = self.expect(&Token::For)?;
let ident = self.expect_ident()?;
self.expect(Token::In)?;
self.expect(&Token::In)?;
// Parse iteration range.
let start = self.parse_expression()?;
self.expect(Token::DotDot)?;
let inclusive = self.eat(Token::Assign).is_some();
self.expect(&Token::DotDot)?;
let inclusive = self.eat(&Token::Assign);
self.disallow_circuit_construction = true;
let stop = self.parse_conditional_expression()?;
self.disallow_circuit_construction = false;
@ -183,7 +171,8 @@ impl ParserContext<'_> {
let mut string = None;
let (parameters, _, span) = self.parse_paren_comma_list(|p| {
if string.is_none() {
let SpannedToken { token, span } = p.expect_any()?;
p.bump();
let SpannedToken { token, span } = p.prev_token.clone();
string = Some(match token {
Token::StringLit(chars) => chars,
_ => {
@ -206,14 +195,14 @@ impl ParserContext<'_> {
/// Returns a [`ConsoleStatement`] AST node if the next tokens represent a console statement.
pub fn parse_console_statement(&mut self) -> Result<ConsoleStatement> {
let keyword = self.expect(Token::Console)?;
self.expect(Token::Dot)?;
let keyword = self.expect(&Token::Console)?;
self.expect(&Token::Dot)?;
let function = self.expect_ident()?;
let function = match function.name {
sym::assert => {
self.expect(Token::LeftParen)?;
self.expect(&Token::LeftParen)?;
let expr = self.parse_expression()?;
self.expect(Token::RightParen)?;
self.expect(&Token::RightParen)?;
ConsoleFunction::Assert(expr)
}
sym::error => ConsoleFunction::Error(self.parse_console_args()?),
@ -228,7 +217,7 @@ impl ParserContext<'_> {
ConsoleFunction::Log(self.parse_console_args()?)
}
};
self.expect(Token::Semicolon)?;
self.expect(&Token::Semicolon)?;
Ok(ConsoleStatement {
span: &keyword + function.span(),
@ -238,29 +227,34 @@ impl ParserContext<'_> {
/// Returns a [`VariableName`] AST node if the next tokens represent a variable name with
/// valid keywords.
pub fn parse_variable_name(&mut self, span: &SpannedToken) -> Result<VariableName> {
let mutable = self.eat(Token::Mut);
if let Some(mutable) = &mutable {
self.emit_err(ParserError::let_mut_statement(&(&mutable.span + &span.span)));
pub fn parse_variable_name(&mut self, decl_ty: Declare, span: &Span) -> Result<VariableName> {
if self.eat(&Token::Mut) {
self.emit_err(ParserError::let_mut_statement(&(&self.prev_token.span + span)));
}
let name = self.expect_ident()?;
Ok(VariableName {
span: name.span.clone(),
mutable: matches!(span.token, Token::Let),
mutable: matches!(decl_ty, Declare::Let),
identifier: name,
})
}
/// Returns a [`DefinitionStatement`] AST node if the next tokens represent a definition statement.
pub fn parse_definition_statement(&mut self) -> Result<DefinitionStatement> {
let declare = self.expect_oneof(&[Token::Let, Token::Const])?;
self.expect_any(&[Token::Let, Token::Const])?;
let decl_span = self.prev_token.span.clone();
let decl_type = match &self.prev_token.token {
Token::Let => Declare::Let,
Token::Const => Declare::Const,
_ => unreachable!("parse_definition_statement_ shouldn't produce this"),
};
dbg!();
// Parse variable names.
let variable_names = if self.peek_is_left_par() {
let vars = self
.parse_paren_comma_list(|p| p.parse_variable_name(&declare).map(Some))
.parse_paren_comma_list(|p| p.parse_variable_name(decl_type, &decl_span).map(Some))
.map(|(vars, ..)| vars)?;
if vars.len() == 1 {
@ -269,26 +263,22 @@ impl ParserContext<'_> {
vars
} else {
vec![self.parse_variable_name(&declare)?]
vec![self.parse_variable_name(decl_type, &decl_span)?]
};
// Parse an optional type ascription.
let type_ = self
.eat(Token::Colon)
.map(|_| self.parse_type().map(|t| t.0))
.eat(&Token::Colon)
.then(|| self.parse_type().map(|t| t.0))
.transpose()?;
self.expect(Token::Assign)?;
self.expect(&Token::Assign)?;
let expr = self.parse_expression()?;
self.expect(Token::Semicolon)?;
self.expect(&Token::Semicolon)?;
Ok(DefinitionStatement {
span: &declare.span + expr.span(),
declaration_type: match declare.token {
Token::Let => Declare::Let,
Token::Const => Declare::Const,
_ => unreachable!("parse_definition_statement_ shouldn't produce this"),
},
span: &decl_span + expr.span(),
declaration_type: decl_type,
variable_names,
type_,
value: expr,

View File

@ -39,7 +39,7 @@ impl ParserContext<'_> {
///
/// Returns a [`IntegerType`] AST node if the given token is a supported integer type, or [`None`].
///
pub fn token_to_int_type(token: Token) -> Option<IntegerType> {
pub fn token_to_int_type(token: &Token) -> Option<IntegerType> {
Some(match token {
Token::I8 => IntegerType::I8,
Token::I16 => IntegerType::I16,
@ -62,9 +62,9 @@ impl ParserContext<'_> {
let span = ident.span.clone();
(Type::Identifier(ident), span)
} else {
let token = self.expect_oneof(TYPE_TOKENS)?;
let span = self.expect_any(TYPE_TOKENS)?;
(
match token.token {
match &self.prev_token.token {
Token::Field => Type::Field,
Token::Group => Type::Group,
Token::Address => Type::Address,
@ -72,7 +72,7 @@ impl ParserContext<'_> {
Token::Char => Type::Char,
x => Type::IntegerType(Self::token_to_int_type(x).expect("invalid int type")),
},
token.span,
span,
)
})
}

View File

@ -56,7 +56,8 @@ fn not_fully_consumed(tokens: &mut ParserContext) -> Result<(), String> {
}
let mut out = "did not consume all input: ".to_string();
while tokens.has_next() {
out.push_str(&tokens.expect_any().unwrap().to_string());
tokens.bump();
out.push_str(&tokens.prev_token.to_string());
out.push('\n');
}
Err(out)

View File

@ -434,6 +434,16 @@ pub struct SpannedToken {
pub span: Span,
}
impl SpannedToken {
/// Returns a dummy token at a dummy span.
pub fn dummy() -> Self {
Self {
token: Token::Question,
span: Span::dummy(),
}
}
}
impl fmt::Display for SpannedToken {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "'{}' @ ", self.token.to_string().trim())?;

View File

@ -20,6 +20,7 @@
//! separated by whitespace.
pub(crate) mod token;
use std::iter;
use std::sync::Arc;
pub use self::token::KEYWORD_TOKENS;
@ -33,69 +34,79 @@ use leo_span::Span;
/// Creates a new vector of spanned tokens from a given file path and source code text.
pub(crate) fn tokenize(path: &str, input: &str) -> Result<Vec<SpannedToken>> {
tokenize_iter(path, input).collect()
}
/// Yields spanned tokens from a given file path and source code text.
pub(crate) fn tokenize_iter<'a>(path: &'a str, input: &'a str) -> impl 'a + Iterator<Item = Result<SpannedToken>> {
let path = Arc::new(path.to_string());
let mut tokens = vec![];
let mut index = 0usize;
let mut line_no = 1usize;
let mut line_start = 0usize;
while input.len() > index {
match Token::eat(&input[index..])? {
(token_len, Token::WhiteSpace) => {
let bytes = input.as_bytes();
if bytes[index] == 0x000D && matches!(bytes.get(index + 1), Some(0x000A)) {
// Check carriage return followed by newline.
line_no += 1;
line_start = index + token_len;
index += token_len;
} else if matches!(bytes[index], 0x000A | 0x000D) {
// Check new-line or carriage-return
line_no += 1;
line_start = index + token_len;
}
index += token_len;
}
(token_len, token) => {
let mut span = Span::new(
line_no,
line_no,
index - line_start + 1,
index - line_start + token_len + 1,
path.clone(),
input[line_start
..input[line_start..]
.find('\n')
.map(|i| i + line_start)
.unwrap_or(input.len())]
.to_string(),
);
match &token {
Token::CommentLine(_) => {
iter::from_fn(move || {
while input.len() > index {
let token = match Token::eat(&input[index..]) {
Err(e) => return Some(Err(e.into())),
Ok(t) => t,
};
match token {
(token_len, Token::WhiteSpace) => {
let bytes = input.as_bytes();
if bytes[index] == 0x000D && matches!(bytes.get(index + 1), Some(0x000A)) {
// Check carriage return followed by newline.
line_no += 1;
line_start = index + token_len;
index += token_len;
} else if matches!(bytes[index], 0x000A | 0x000D) {
// Check new-line or carriage-return
line_no += 1;
line_start = index + token_len;
}
Token::CommentBlock(block) => {
let line_ct = block.chars().filter(|x| *x == '\n').count();
line_no += line_ct;
if line_ct > 0 {
let last_line_index = block.rfind('\n').unwrap();
line_start = index + last_line_index + 1;
span.col_stop = index + token_len - line_start + 1;
}
span.line_stop = line_no;
}
Token::AddressLit(address) => {
if !check_address(address) {
return Err(ParserError::invalid_address_lit(address, &span).into());
}
}
_ => (),
index += token_len;
}
(token_len, token) => {
let mut span = Span::new(
line_no,
line_no,
index - line_start + 1,
index - line_start + token_len + 1,
path.clone(),
input[line_start
..input[line_start..]
.find('\n')
.map(|i| i + line_start)
.unwrap_or(input.len())]
.to_string(),
);
match &token {
Token::CommentLine(_) => {
line_no += 1;
line_start = index + token_len;
}
Token::CommentBlock(block) => {
let line_ct = block.chars().filter(|x| *x == '\n').count();
line_no += line_ct;
if line_ct > 0 {
let last_line_index = block.rfind('\n').unwrap();
line_start = index + last_line_index + 1;
span.col_stop = index + token_len - line_start + 1;
}
span.line_stop = line_no;
}
Token::AddressLit(address) if !check_address(address) => {
return Some(Err(ParserError::invalid_address_lit(address, &span).into()));
}
_ => (),
}
index += token_len;
return Some(Ok(SpannedToken { token, span }));
}
tokens.push(SpannedToken { token, span });
index += token_len;
}
}
}
Ok(tokens)
None
})
}
#[cfg(test)]

View File

@ -295,7 +295,7 @@ impl fmt::Display for Token {
Public => write!(f, "public"),
Return => write!(f, "return"),
Type => write!(f, "type"),
Eof => write!(f, ""),
Eof => write!(f, "<eof>"),
}
}
}

View File

@ -25,6 +25,9 @@ use serde::Deserialize;
/// This is used in many spots throughout the rest of the Leo crates.
#[derive(Clone, Debug, Default, Deserialize, Eq, Hash, PartialEq)]
pub struct Span {
// TODO(Centril): All of could be optimized to just `{ lo: u32, hi: u32 }`,
// i.e. 8 bytes by indexing into a global source map of all files concatenated.
// That would also give us `Copy` which is quite nice!
/// The line number where the error started.
pub line_start: usize,
/// The line number where the error stopped.
@ -64,6 +67,12 @@ impl Span {
content,
}
}
/// Generates a dummy span with all defaults.
/// Should only be used in temporary situations.
pub fn dummy() -> Self {
Self::new(0, 0, 0, 0, <_>::default(), <_>::default())
}
}
impl Serialize for Span {

View File

@ -10,7 +10,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 12
path: ""
content: "(+, _)group"
@ -22,7 +22,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 12
path: ""
content: "(_, -)group"
@ -34,7 +34,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 12
path: ""
content: "(+, -)group"
@ -46,7 +46,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 12
path: ""
content: "(-, +)group"
@ -58,7 +58,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 12
path: ""
content: "(+, +)group"
@ -70,7 +70,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 12
path: ""
content: "(-, -)group"
@ -82,7 +82,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 12
path: ""
content: "(_, _)group"
@ -112,7 +112,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 16
path: ""
content: "(123,-456)group"
@ -142,7 +142,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 16
path: ""
content: "(-123,456)group"
@ -172,7 +172,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 16
path: ""
content: "(-123,456)group"
@ -193,7 +193,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 14
path: ""
content: "(123, _)group"
@ -214,7 +214,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 14
path: ""
content: "(123, -)group"
@ -235,7 +235,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 14
path: ""
content: "(123, -)group"
@ -256,7 +256,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 14
path: ""
content: "(123, +)group"
@ -277,7 +277,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 14
path: ""
content: "(123, +)group"
@ -298,7 +298,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 14
path: ""
content: "(123, -)group"
@ -319,7 +319,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 14
path: ""
content: "(123, _)group"
@ -340,7 +340,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 14
path: ""
content: "(+, 345)group"
@ -361,7 +361,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 14
path: ""
content: "(_, 345)group"
@ -382,7 +382,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 14
path: ""
content: "(+, 345)group"
@ -403,7 +403,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 14
path: ""
content: "(-, 345)group"
@ -424,7 +424,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 14
path: ""
content: "(+, 345)group"
@ -445,7 +445,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 14
path: ""
content: "(-, 345)group"
@ -466,7 +466,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 14
path: ""
content: "(_, 345)group"
@ -496,7 +496,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 16
path: ""
content: "(123, 456)group"
@ -526,7 +526,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 16
path: ""
content: "(123, 456)group"
@ -556,7 +556,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 16
path: ""
content: "(123, 456)group"
@ -586,7 +586,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 16
path: ""
content: "(123, 456)group"
@ -616,7 +616,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 16
path: ""
content: "(123, 456)group"
@ -646,7 +646,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 16
path: ""
content: "(123, 456)group"
@ -676,7 +676,7 @@ outputs:
span:
line_start: 1
line_stop: 1
col_start: 2
col_start: 1
col_stop: 16
path: ""
content: "(123, 456)group"

View File

@ -11,6 +11,6 @@ outputs:
- "Error [EPAR0370005]: expected A valid expression. -- got 'A tuple expression.'\n --> test:1:1\n |\n 1 | (123,456u8)group\n | ^^^^^^^^^^^"
- "Error [EPAR0370005]: expected A valid expression. -- got 'A tuple expression.'\n --> test:1:1\n |\n 1 | (123,456field)group\n | ^^^^^^^^^^^^^^"
- "Error [EPAR0370004]: Unexpected white space between terms (123,456) and group\n --> test:1:10\n |\n 1 | (123, 456) group\n | ^"
- "did not consume all input: 'group' @ 1:8-13\n"
- "Error [EPAR0370005]: expected A valid expression. -- got 'A tuple expression.'\n --> test:1:1\n |\n 1 | (123, )group\n | ^^^^^^^"
- "Error [EPAR0370005]: expected A valid expression. -- got 'A tuple expression.'\n --> test:1:1\n |\n 1 | (123, 456, 789)group\n | ^^^^^^^^^^^^^^^"
- "Error [EPAR0370005]: expected A valid expression. -- got 'A tuple expression.'\n --> test:1:1\n |\n 1 | (123, 456)bool\n | ^^^^^^^^^^"

View File

@ -20,7 +20,7 @@ outputs:
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '>'\n --> test:1:1\n |\n 1 | >\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '>='\n --> test:1:1\n |\n 1 | >=\n | ^^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '+'\n --> test:1:1\n |\n 1 | +\n | ^"
- "Error [EPAR0370003]: unexpected EOF\n --> test:1:1\n |\n 1 | -\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '<eof>'\n --> test:1:1\n |\n 1 | -\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '*'\n --> test:1:1\n |\n 1 | *\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '**'\n --> test:1:1\n |\n 1 | **\n | ^^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '/'\n --> test:1:1\n |\n 1 | /\n | ^"
@ -30,7 +30,7 @@ outputs:
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '*'\n --> test:1:1\n |\n 1 | *=\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '/'\n --> test:1:1\n |\n 1 | /=\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '**'\n --> test:1:1\n |\n 1 | **=\n | ^^"
- "Error [EPAR0370003]: unexpected EOF\n --> test:1:1\n |\n 1 | (\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '<eof>'\n --> test:1:1\n |\n 1 | (\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got ')'\n --> test:1:1\n |\n 1 | )\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '['\n --> test:1:1\n |\n 1 | [\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got ']'\n --> test:1:1\n |\n 1 | ]\n | ^"

View File

@ -99,7 +99,7 @@ outputs:
span:
line_start: 7
line_stop: 7
col_start: 22
col_start: 21
col_stop: 32
path: ""
content: "constant d: group = (0, 1)group;"
@ -234,7 +234,7 @@ outputs:
span:
line_start: 14
line_stop: 14
col_start: 14
col_start: 13
col_stop: 24
path: ""
content: "r3: group = (0, 1)group;"

View File

@ -99,7 +99,7 @@ outputs:
span:
line_start: 7
line_stop: 7
col_start: 20
col_start: 19
col_stop: 30
path: ""
content: "public d: group = (0, 1)group; "
@ -234,7 +234,7 @@ outputs:
span:
line_start: 14
line_stop: 14
col_start: 14
col_start: 13
col_stop: 24
path: ""
content: "r3: group = (0, 1)group; "

View File

@ -29,15 +29,15 @@ outputs:
- "Error [EPAR0370005]: expected 'i8', 'i16', 'i32', 'i64', 'i128', 'u8', 'u16', 'u32', 'u64', 'u128', 'field', 'group', 'address', 'bool', 'char' -- got 'const'\n --> test:1:8\n |\n 1 | let x: const = expr;\n | ^^^^^"
- "Error [EPAR0370005]: expected 'i8', 'i16', 'i32', 'i64', 'i128', 'u8', 'u16', 'u32', 'u64', 'u128', 'field', 'group', 'address', 'bool', 'char' -- got 'let'\n --> test:1:10\n |\n 1 | const x: let = expr;\n | ^^^"
- "Error [EPAR0370005]: expected 'i8', 'i16', 'i32', 'i64', 'i128', 'u8', 'u16', 'u32', 'u64', 'u128', 'field', 'group', 'address', 'bool', 'char' -- got 'mut'\n --> test:1:8\n |\n 1 | let x: mut = expr;\n | ^^^"
- "Error [EPAR0370003]: unexpected EOF\n --> test:1:1\n |\n 1 | let\n | ^^^"
- "Error [EPAR0370003]: unexpected EOF\n --> test:1:5\n |\n 1 | let x\n | ^"
- "Error [EPAR0370003]: unexpected EOF\n --> test:1:6\n |\n 1 | let x:\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'ident', got '<eof>'\n --> test:1:1\n |\n 1 | let\n | ^^^"
- "Error [EPAR0370005]: expected = -- got '<eof>'\n --> test:1:5\n |\n 1 | let x\n | ^"
- "Error [EPAR0370005]: expected 'i8', 'i16', 'i32', 'i64', 'i128', 'u8', 'u16', 'u32', 'u64', 'u128', 'field', 'group', 'address', 'bool', 'char' -- got '<eof>'\n --> test:1:6\n |\n 1 | let x:\n | ^"
- "Error [EPAR0370005]: expected ) -- got ']'\n --> test:1:14\n |\n 1 | let x = (a, y]);\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'ident', got '='\n --> test:1:5\n |\n 1 | let = 1u8;\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'ident', got ';'\n --> test:1:4\n |\n 1 | let;\n | ^"
- "Error [EPAR0370005]: expected = -- got '1'\n --> test:1:7\n |\n 1 | let x 1u8;\n | ^"
- "Error [EPAR0370005]: expected = -- got ';'\n --> test:1:10\n |\n 1 | let x: u8;\n | ^"
- "Error [EPAR0370003]: unexpected EOF\n --> test:1:8\n |\n 1 | let x: u8\n | ^^"
- "Error [EPAR0370005]: expected = -- got '<eof>'\n --> test:1:8\n |\n 1 | let x: u8\n | ^^"
- "Error [EPAR0370005]: expected 'i8', 'i16', 'i32', 'i64', 'i128', 'u8', 'u16', 'u32', 'u64', 'u128', 'field', 'group', 'address', 'bool', 'char' -- got '='\n --> test:1:8\n |\n 1 | let x: = 1;\n | ^"
- "Error [EPAR0370005]: expected 'i8', 'i16', 'i32', 'i64', 'i128', 'u8', 'u16', 'u32', 'u64', 'u128', 'field', 'group', 'address', 'bool', 'char' -- got '['\n --> test:1:8\n |\n 1 | let x: [u8] = 1;\n | ^"
- "Error [EPAR0370005]: expected 'i8', 'i16', 'i32', 'i64', 'i128', 'u8', 'u16', 'u32', 'u64', 'u128', 'field', 'group', 'address', 'bool', 'char' -- got '['\n --> test:1:8\n |\n 1 | let x: [u8;\n | ^"

View File

@ -2,6 +2,6 @@
namespace: ParseStatement
expectation: Fail
outputs:
- "Error [EPAR0370003]: unexpected EOF\n --> test:1:1\n |\n 1 | return\n | ^^^^^^"
- "Error [EPAR0370003]: unexpected EOF\n --> test:1:8\n |\n 1 | return 5\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '<eof>'\n --> test:1:1\n |\n 1 | return\n | ^^^^^^"
- "Error [EPAR0370005]: expected ; -- got '<eof>'\n --> test:1:8\n |\n 1 | return 5\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got 'if'\n --> test:2:1\n |\n 2 | if x {}\n | ^^"

View File

@ -8,7 +8,7 @@ outputs:
- "Error [EPAR0370009]: unexpected string: expected 'expression', got ','\n --> test:1:1\n |\n 1 | , x = 10u8;\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '['\n --> test:1:1\n |\n 1 | [ x = 10u8;\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got ']'\n --> test:1:1\n |\n 1 | ] x = 10u8;\n | ^"
- "Error [EPAR0370003]: unexpected EOF\n --> test:1:11\n |\n 1 | { x = 10u8;\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '<eof>'\n --> test:1:11\n |\n 1 | { x = 10u8;\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got '}'\n --> test:1:1\n |\n 1 | } x = 10u8;\n | ^"
- "Error [EPAR0370005]: expected ) -- got '='\n --> test:1:5\n |\n 1 | ( x = 10u8;\n | ^"
- "Error [EPAR0370009]: unexpected string: expected 'expression', got ')'\n --> test:1:1\n |\n 1 | ) x = 10u8;\n | ^"

View File

@ -145,4 +145,4 @@ mut
return
type
type

View File

@ -9,4 +9,4 @@ function x(x: u32, public y: i32) {
function x(public x: u32, y: i32) {
return 0;
}
}