mirror of
https://github.com/ProvableHQ/leo.git
synced 2024-12-23 18:21:38 +03:00
lexer errors so far
This commit is contained in:
parent
1312c5d036
commit
8fddfc639f
@ -223,4 +223,84 @@ create_errors!(
|
||||
msg: "Array dimensions specified as a tuple cannot be empty.",
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When an empty input tendril was expected but not found.
|
||||
@backtraced
|
||||
lexer_empty_input_tendril {
|
||||
args: (),
|
||||
msg: "",
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When an integer is started with a leading zero.
|
||||
@backtraced
|
||||
lexer_eat_integer_leading_zero {
|
||||
args: (),
|
||||
msg: "",
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When an integer is started with a leading zero.
|
||||
@backtraced
|
||||
lexer_expected_valid_escaped_char {
|
||||
args: (),
|
||||
msg: "",
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When a string is not properly closed.
|
||||
@backtraced
|
||||
lexer_string_not_closed {
|
||||
args: (),
|
||||
msg: "",
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When an illegal escaped character is provided.
|
||||
@backtraced
|
||||
lexer_invalid_escaped_char {
|
||||
args: (),
|
||||
msg: "",
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When a string is not properly closed.
|
||||
@backtraced
|
||||
lexer_char_not_closed {
|
||||
args: (),
|
||||
msg: "",
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When a string is not properly closed.
|
||||
@backtraced
|
||||
lexer_invalid_char {
|
||||
args: (),
|
||||
msg: "",
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When a block comment is empty.
|
||||
@backtraced
|
||||
lexer_empty_block_comment {
|
||||
args: (),
|
||||
msg: "",
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When a block comment is not closed before end of file.
|
||||
@backtraced
|
||||
lexer_block_comment_does_not_close_before_eof {
|
||||
args: (),
|
||||
msg: "",
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When the lexer could not lex some text.
|
||||
@backtraced
|
||||
could_not_lex {
|
||||
args: (),
|
||||
msg: "",
|
||||
help: None,
|
||||
}
|
||||
);
|
||||
|
@ -15,6 +15,7 @@
|
||||
// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::tokenizer::{Char, Token};
|
||||
use leo_errors::{Result, ParserError};
|
||||
use leo_span::{Span, Symbol};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
@ -147,13 +148,13 @@ impl Token {
|
||||
/// Returns a tuple: [(integer length, integer token)] if an integer can be eaten, otherwise returns [`None`].
|
||||
/// An integer can be eaten if its bytes are at the front of the given `input_tendril` string.
|
||||
///
|
||||
fn eat_integer(input_tendril: &StrTendril) -> (usize, Option<Token>) {
|
||||
fn eat_integer(input_tendril: &StrTendril) -> Result<(usize, Token)> {
|
||||
if input_tendril.is_empty() {
|
||||
return (0, None);
|
||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
||||
}
|
||||
let input = input_tendril[..].as_bytes();
|
||||
if !input[0].is_ascii_digit() {
|
||||
return (0, None);
|
||||
return Err(ParserError::lexer_eat_integer_leading_zero().into());
|
||||
}
|
||||
let mut i = 1;
|
||||
let mut is_hex = false;
|
||||
@ -173,7 +174,7 @@ impl Token {
|
||||
|
||||
i += 1;
|
||||
}
|
||||
(i, Some(Token::Int(input_tendril.subtendril(0, i as u32))))
|
||||
Ok((i, Token::Int(input_tendril.subtendril(0, i as u32))))
|
||||
}
|
||||
|
||||
/// Returns the number of bytes in an emoji via a bit mask.
|
||||
@ -197,13 +198,13 @@ impl Token {
|
||||
/// Returns a tuple: [(token length, token)] if the next token can be eaten, otherwise returns [`None`].
|
||||
/// The next token can be eaten if the bytes at the front of the given `input_tendril` string can be scanned into a token.
|
||||
///
|
||||
pub(crate) fn eat(input_tendril: StrTendril) -> (usize, Option<Token>) {
|
||||
pub(crate) fn eat(input_tendril: StrTendril) -> Result<(usize, Token)> {
|
||||
if input_tendril.is_empty() {
|
||||
return (0, None);
|
||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
||||
}
|
||||
let input = input_tendril[..].as_bytes();
|
||||
match input[0] {
|
||||
x if x.is_ascii_whitespace() => return (1, None),
|
||||
x if x.is_ascii_whitespace() => return Ok((1, Token::WhiteSpace)),
|
||||
b'"' => {
|
||||
let mut i = 1;
|
||||
let mut len: u8 = 1;
|
||||
@ -270,7 +271,7 @@ impl Token {
|
||||
unicode = false;
|
||||
string.push(character.into());
|
||||
}
|
||||
None => return (0, None),
|
||||
None => return Err(ParserError::lexer_expected_valid_escaped_char().into()),
|
||||
}
|
||||
}
|
||||
|
||||
@ -282,10 +283,10 @@ impl Token {
|
||||
}
|
||||
|
||||
if i == input.len() || !end {
|
||||
return (0, None);
|
||||
return Err(ParserError::lexer_string_not_closed().into());
|
||||
}
|
||||
|
||||
return (i + 1, Some(Token::StringLit(string)));
|
||||
return Ok((i + 1, Token::StringLit(string)));
|
||||
}
|
||||
b'\'' => {
|
||||
let mut i = 1;
|
||||
@ -310,7 +311,7 @@ impl Token {
|
||||
if input[i + 1] == b'{' {
|
||||
unicode = true;
|
||||
} else {
|
||||
return (0, None);
|
||||
return Err(ParserError::lexer_invalid_escaped_char().into());
|
||||
}
|
||||
} else {
|
||||
escaped = true;
|
||||
@ -323,12 +324,12 @@ impl Token {
|
||||
}
|
||||
|
||||
if !end {
|
||||
return (0, None);
|
||||
return Err(ParserError::lexer_string_not_closed().into());
|
||||
}
|
||||
|
||||
return match Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode) {
|
||||
Some(character) => (i + 1, Some(Token::CharLit(character))),
|
||||
None => (0, None),
|
||||
Some(character) => Ok((i + 1, Token::CharLit(character))),
|
||||
None => Err(ParserError::lexer_invalid_char().into()),
|
||||
};
|
||||
}
|
||||
x if x.is_ascii_digit() => {
|
||||
@ -336,119 +337,119 @@ impl Token {
|
||||
}
|
||||
b'!' => {
|
||||
if let Some(len) = eat(input, "!=") {
|
||||
return (len, Some(Token::NotEq));
|
||||
return Ok((len, Token::NotEq));
|
||||
}
|
||||
return (1, Some(Token::Not));
|
||||
return Ok((1, Token::Not));
|
||||
}
|
||||
b'?' => {
|
||||
return (1, Some(Token::Question));
|
||||
return Ok((1, Token::Question));
|
||||
}
|
||||
b'&' => {
|
||||
if let Some(len) = eat(input, "&&") {
|
||||
return (len, Some(Token::And));
|
||||
return Ok((len, Token::And));
|
||||
}
|
||||
return (1, Some(Token::Ampersand));
|
||||
return Ok((1, Token::Ampersand));
|
||||
}
|
||||
b'(' => return (1, Some(Token::LeftParen)),
|
||||
b')' => return (1, Some(Token::RightParen)),
|
||||
b'_' => return (1, Some(Token::Underscore)),
|
||||
b'(' => return Ok((1, Token::LeftParen)),
|
||||
b')' => return Ok((1, Token::RightParen)),
|
||||
b'_' => return Ok((1, Token::Underscore)),
|
||||
b'*' => {
|
||||
if let Some(len) = eat(input, "**") {
|
||||
if let Some(inner_len) = eat(&input[len..], "=") {
|
||||
return (len + inner_len, Some(Token::ExpEq));
|
||||
return Ok((len + inner_len, Token::ExpEq));
|
||||
}
|
||||
return (len, Some(Token::Exp));
|
||||
return Ok((len, Token::Exp));
|
||||
} else if let Some(len) = eat(input, "*=") {
|
||||
return (len, Some(Token::MulEq));
|
||||
return Ok((len, Token::MulEq));
|
||||
}
|
||||
return (1, Some(Token::Mul));
|
||||
return Ok((1, Token::Mul));
|
||||
}
|
||||
b'+' => {
|
||||
if let Some(len) = eat(input, "+=") {
|
||||
return (len, Some(Token::AddEq));
|
||||
return Ok((len, Token::AddEq));
|
||||
}
|
||||
return (1, Some(Token::Add));
|
||||
return Ok((1, Token::Add));
|
||||
}
|
||||
b',' => return (1, Some(Token::Comma)),
|
||||
b',' => return Ok((1, Token::Comma)),
|
||||
b'-' => {
|
||||
if let Some(len) = eat(input, "->") {
|
||||
return (len, Some(Token::Arrow));
|
||||
return Ok((len, Token::Arrow));
|
||||
} else if let Some(len) = eat(input, "-=") {
|
||||
return (len, Some(Token::MinusEq));
|
||||
return Ok((len, Token::MinusEq));
|
||||
}
|
||||
return (1, Some(Token::Minus));
|
||||
return Ok((1, Token::Minus));
|
||||
}
|
||||
b'.' => {
|
||||
if let Some(len) = eat(input, "...") {
|
||||
return (len, Some(Token::DotDotDot));
|
||||
return Ok((len, Token::DotDotDot));
|
||||
} else if let Some(len) = eat(input, "..") {
|
||||
return (len, Some(Token::DotDot));
|
||||
return Ok((len, Token::DotDot));
|
||||
}
|
||||
return (1, Some(Token::Dot));
|
||||
return Ok((1, Token::Dot));
|
||||
}
|
||||
b'/' => {
|
||||
if eat(input, "//").is_some() {
|
||||
let eol = input.iter().position(|x| *x == b'\n');
|
||||
let len = if let Some(eol) = eol { eol + 1 } else { input.len() };
|
||||
return (len, Some(Token::CommentLine(input_tendril.subtendril(0, len as u32))));
|
||||
return Ok((len, Token::CommentLine(input_tendril.subtendril(0, len as u32))));
|
||||
} else if eat(input, "/*").is_some() {
|
||||
if input.is_empty() {
|
||||
return (0, None);
|
||||
return Err(ParserError::lexer_empty_block_comment().into());
|
||||
}
|
||||
let eol = input.windows(2).skip(2).position(|x| x[0] == b'*' && x[1] == b'/');
|
||||
let len = if let Some(eol) = eol {
|
||||
eol + 4
|
||||
} else {
|
||||
return (0, None);
|
||||
return Err(ParserError::lexer_block_comment_does_not_close_before_eof().into());
|
||||
};
|
||||
return (len, Some(Token::CommentBlock(input_tendril.subtendril(0, len as u32))));
|
||||
return Ok((len, Token::CommentBlock(input_tendril.subtendril(0, len as u32))));
|
||||
} else if let Some(len) = eat(input, "/=") {
|
||||
return (len, Some(Token::DivEq));
|
||||
return Ok((len, Token::DivEq));
|
||||
}
|
||||
return (1, Some(Token::Div));
|
||||
return Ok((1, Token::Div));
|
||||
}
|
||||
b':' => {
|
||||
if let Some(len) = eat(input, "::") {
|
||||
return (len, Some(Token::DoubleColon));
|
||||
return Ok((len, Token::DoubleColon));
|
||||
} else {
|
||||
return (1, Some(Token::Colon));
|
||||
return Ok((1, Token::Colon));
|
||||
}
|
||||
}
|
||||
b';' => return (1, Some(Token::Semicolon)),
|
||||
b';' => return Ok((1, Token::Semicolon)),
|
||||
b'<' => {
|
||||
if let Some(len) = eat(input, "<=") {
|
||||
return (len, Some(Token::LtEq));
|
||||
return Ok((len, Token::LtEq));
|
||||
}
|
||||
return (1, Some(Token::Lt));
|
||||
return Ok((1, Token::Lt));
|
||||
}
|
||||
b'>' => {
|
||||
if let Some(len) = eat(input, ">=") {
|
||||
return (len, Some(Token::GtEq));
|
||||
return Ok((len, Token::GtEq));
|
||||
}
|
||||
return (1, Some(Token::Gt));
|
||||
return Ok((1, Token::Gt));
|
||||
}
|
||||
b'=' => {
|
||||
if let Some(len) = eat(input, "==") {
|
||||
return (len, Some(Token::Eq));
|
||||
return Ok((len, Token::Eq));
|
||||
}
|
||||
return (1, Some(Token::Assign));
|
||||
return Ok((1, Token::Assign));
|
||||
}
|
||||
b'@' => return (1, Some(Token::At)),
|
||||
b'[' => return (1, Some(Token::LeftSquare)),
|
||||
b']' => return (1, Some(Token::RightSquare)),
|
||||
b'{' => return (1, Some(Token::LeftCurly)),
|
||||
b'}' => return (1, Some(Token::RightCurly)),
|
||||
b'@' => return Ok((1, Token::At)),
|
||||
b'[' => return Ok((1, Token::LeftSquare)),
|
||||
b']' => return Ok((1, Token::RightSquare)),
|
||||
b'{' => return Ok((1, Token::LeftCurly)),
|
||||
b'}' => return Ok((1, Token::RightCurly)),
|
||||
b'|' => {
|
||||
if let Some(len) = eat(input, "||") {
|
||||
return (len, Some(Token::Or));
|
||||
return Ok((len, Token::Or));
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
if let Some(ident) = eat_identifier(&input_tendril) {
|
||||
return (
|
||||
return Ok((
|
||||
ident.len(),
|
||||
Some(match &*ident {
|
||||
match &*ident {
|
||||
x if x.starts_with("aleo1") => Token::AddressLit(ident),
|
||||
"address" => Token::Address,
|
||||
"as" => Token::As,
|
||||
@ -486,11 +487,11 @@ impl Token {
|
||||
"u64" => Token::U64,
|
||||
"u128" => Token::U128,
|
||||
_ => Token::Ident(Symbol::intern(&ident)),
|
||||
}),
|
||||
);
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
(0, None)
|
||||
Err(ParserError::could_not_lex().into())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -28,21 +28,42 @@ pub(crate) use self::token::*;
|
||||
pub(crate) mod lexer;
|
||||
pub(crate) use self::lexer::*;
|
||||
|
||||
use leo_errors::{LeoError, ParserError};
|
||||
use leo_errors:: {ParserError, Result};
|
||||
use leo_span::Span;
|
||||
|
||||
use tendril::StrTendril;
|
||||
|
||||
/// Creates a new vector of spanned tokens from a given file path and source code text.
|
||||
pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result<Vec<SpannedToken>, LeoError> {
|
||||
pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result<Vec<SpannedToken>> {
|
||||
let path = Arc::new(path.to_string());
|
||||
let mut tokens = vec![];
|
||||
let mut index = 0usize;
|
||||
let mut line_no = 1usize;
|
||||
let mut line_start = 0usize;
|
||||
while input.len() > index {
|
||||
match Token::eat(input.subtendril(index as u32, (input.len() - index) as u32)) {
|
||||
(token_len, Some(token)) => {
|
||||
match Token::eat(input.subtendril(index as u32, (input.len() - index) as u32))? {
|
||||
(token_len, Token::WhiteSpace) => {
|
||||
if token_len == 0 && index == input.len() {
|
||||
break;
|
||||
} else if token_len == 0 {
|
||||
return Err(ParserError::unexpected_token(
|
||||
&input[index..].chars().next().unwrap(),
|
||||
&Span::new(
|
||||
line_no,
|
||||
line_no,
|
||||
index - line_start + 1,
|
||||
index - line_start + 2,
|
||||
path,
|
||||
input.subtendril(
|
||||
line_start as u32,
|
||||
input[line_start..].find('\n').unwrap_or_else(|| input.len()) as u32,
|
||||
),
|
||||
),
|
||||
)
|
||||
.into());
|
||||
}
|
||||
}
|
||||
(token_len, token) => {
|
||||
let mut span = Span::new(
|
||||
line_no,
|
||||
line_no,
|
||||
@ -79,32 +100,14 @@ pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result<Vec<SpannedToken
|
||||
tokens.push(SpannedToken { token, span });
|
||||
index += token_len;
|
||||
}
|
||||
(token_len, None) => {
|
||||
if token_len == 0 && index == input.len() {
|
||||
break;
|
||||
} else if token_len == 0 {
|
||||
return Err(ParserError::unexpected_token(
|
||||
&input[index..].chars().next().unwrap(),
|
||||
&Span::new(
|
||||
line_no,
|
||||
line_no,
|
||||
index - line_start + 1,
|
||||
index - line_start + 2,
|
||||
path,
|
||||
input.subtendril(
|
||||
line_start as u32,
|
||||
input[line_start..].find('\n').unwrap_or_else(|| input.len()) as u32,
|
||||
),
|
||||
),
|
||||
)
|
||||
.into());
|
||||
}
|
||||
if input.as_bytes()[index] == b'\n' {
|
||||
line_no += 1;
|
||||
line_start = index + token_len;
|
||||
}
|
||||
index += token_len;
|
||||
}
|
||||
// (token_len, None) => {
|
||||
// if input.as_bytes()[index] == b'\n' {
|
||||
// line_no += 1;
|
||||
// line_start = index + token_len;
|
||||
// }
|
||||
// index += token_len;
|
||||
|
||||
// }
|
||||
}
|
||||
}
|
||||
Ok(tokens)
|
||||
|
@ -59,6 +59,7 @@ pub enum Token {
|
||||
False,
|
||||
AddressLit(#[serde(with = "leo_span::tendril_json")] StrTendril),
|
||||
CharLit(Char),
|
||||
WhiteSpace,
|
||||
|
||||
// Symbols
|
||||
At,
|
||||
@ -258,6 +259,7 @@ impl fmt::Display for Token {
|
||||
False => write!(f, "false"),
|
||||
AddressLit(s) => write!(f, "{}", s),
|
||||
CharLit(s) => write!(f, "{}", s),
|
||||
WhiteSpace => write!(f, "whitespace"),
|
||||
|
||||
At => write!(f, "@"),
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user