mirror of
https://github.com/ProvableHQ/leo.git
synced 2024-11-10 13:24:15 +03:00
most things moved to a char parser
This commit is contained in:
parent
f886890e2b
commit
3d1cc9a735
@ -16,21 +16,19 @@
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
use tendril::StrTendril;
|
||||
|
||||
/// A number string guaranteed to be positive by the pest grammar.
|
||||
#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct PositiveNumber {
|
||||
/// The string representation of the positive number.
|
||||
// FIXME(Centril): This should become an `u128`.
|
||||
#[serde(with = "leo_span::tendril_json")]
|
||||
pub value: StrTendril,
|
||||
pub value: String,
|
||||
}
|
||||
|
||||
impl PositiveNumber {
|
||||
/// Returns `true` if this number is zero.
|
||||
pub fn is_zero(&self) -> bool {
|
||||
self.value.as_ref().eq("0")
|
||||
self.value.eq("0")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -14,8 +14,6 @@
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
use tendril::StrTendril;
|
||||
|
||||
use super::*;
|
||||
use crate::{Char, CharValue};
|
||||
|
||||
@ -24,37 +22,21 @@ use crate::{Char, CharValue};
|
||||
pub enum ValueExpression {
|
||||
// todo: deserialize values here
|
||||
/// An address literal, e.g., `aleo1qnr4dkkvkgfqph0vzc3y6z2eu975wnpz2925ntjccd5cfqxtyu8sta57j8`.
|
||||
Address(
|
||||
#[serde(with = "leo_span::tendril_json")] StrTendril,
|
||||
#[serde(with = "leo_span::span_json")] Span,
|
||||
),
|
||||
Address(String, #[serde(with = "leo_span::span_json")] Span),
|
||||
/// A boolean literal, either `true` or `false`.
|
||||
Boolean(
|
||||
#[serde(with = "leo_span::tendril_json")] StrTendril,
|
||||
#[serde(with = "leo_span::span_json")] Span,
|
||||
),
|
||||
Boolean(String, #[serde(with = "leo_span::span_json")] Span),
|
||||
/// A char literal, e.g., `'a'`, representing a single unicode code point.
|
||||
Char(CharValue),
|
||||
/// A field literal, e.g., `42field`.
|
||||
/// That is, a signed number followed by the keyword `field`.
|
||||
Field(
|
||||
#[serde(with = "leo_span::tendril_json")] StrTendril,
|
||||
#[serde(with = "leo_span::span_json")] Span,
|
||||
),
|
||||
Field(String, #[serde(with = "leo_span::span_json")] Span),
|
||||
/// A group literal, either product or affine.
|
||||
/// For example, `42group` or `(12, 52)group`.
|
||||
Group(Box<GroupValue>),
|
||||
/// A negated non-integer literal, e.g., `-4.2`.
|
||||
Implicit(
|
||||
#[serde(with = "leo_span::tendril_json")] StrTendril,
|
||||
#[serde(with = "leo_span::span_json")] Span,
|
||||
),
|
||||
Implicit(String, #[serde(with = "leo_span::span_json")] Span),
|
||||
/// An integer literal, e.g., `42`.
|
||||
Integer(
|
||||
IntegerType,
|
||||
#[serde(with = "leo_span::tendril_json")] StrTendril,
|
||||
#[serde(with = "leo_span::span_json")] Span,
|
||||
),
|
||||
Integer(IntegerType, String, #[serde(with = "leo_span::span_json")] Span),
|
||||
/// A string literal, e.g., `"foobar"`.
|
||||
String(Vec<Char>, #[serde(with = "leo_span::span_json")] Span),
|
||||
}
|
||||
|
@ -18,16 +18,12 @@ use leo_span::Span;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
use tendril::StrTendril;
|
||||
|
||||
/// A coordinate in a affine group literal.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum GroupCoordinate {
|
||||
/// A number, e.g., `42`.
|
||||
Number(
|
||||
#[serde(with = "leo_span::tendril_json")] StrTendril,
|
||||
#[serde(with = "leo_span::span_json")] Span,
|
||||
),
|
||||
Number(String, #[serde(with = "leo_span::span_json")] Span),
|
||||
/// A sign high recovery, i.e. `+`.
|
||||
SignHigh,
|
||||
/// A sign low recovery, i.e., `-`.
|
||||
|
@ -19,16 +19,12 @@ use leo_span::Span;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
use tendril::StrTendril;
|
||||
|
||||
/// A group literal.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum GroupValue {
|
||||
/// Product group literal, e.g., `42group`.
|
||||
Single(
|
||||
#[serde(with = "leo_span::tendril_json")] StrTendril,
|
||||
#[serde(with = "leo_span::span_json")] Span,
|
||||
),
|
||||
Single(String, #[serde(with = "leo_span::span_json")] Span),
|
||||
/// An affine group literal with (x, y) coordinates.
|
||||
Tuple(GroupTuple),
|
||||
}
|
||||
|
@ -38,22 +38,20 @@ impl TryFrom<(Type, Expression)> for InputValue {
|
||||
Ok(match value {
|
||||
(type_, Expression::Value(value)) => {
|
||||
match (type_, value) {
|
||||
(Type::Address, ValueExpression::Address(value, _)) => Self::Address(value.to_string()),
|
||||
(Type::Address, ValueExpression::Address(value, _)) => Self::Address(value),
|
||||
(Type::Boolean, ValueExpression::Boolean(value, span)) => {
|
||||
let bool_value = value.parse::<bool>().map_err(|_| ParserError::unexpected_eof(&span))?; // TODO: change error
|
||||
Self::Boolean(bool_value)
|
||||
}
|
||||
(Type::Char, ValueExpression::Char(value)) => Self::Char(value),
|
||||
(Type::Field, ValueExpression::Field(value, _) | ValueExpression::Implicit(value, _)) => {
|
||||
Self::Field(value.to_string())
|
||||
Self::Field(value)
|
||||
}
|
||||
(Type::Group, ValueExpression::Group(value)) => Self::Group(*value),
|
||||
(Type::IntegerType(type_), ValueExpression::Implicit(value, _)) => {
|
||||
Self::Integer(type_, value.to_string())
|
||||
}
|
||||
(Type::IntegerType(type_), ValueExpression::Implicit(value, _)) => Self::Integer(type_, value),
|
||||
(Type::IntegerType(expected), ValueExpression::Integer(actual, value, span)) => {
|
||||
if expected == actual {
|
||||
Self::Integer(expected, value.to_string())
|
||||
Self::Integer(expected, value)
|
||||
} else {
|
||||
return Err(InputError::unexpected_type(expected.to_string(), actual, &span).into());
|
||||
}
|
||||
|
@ -22,7 +22,6 @@ use leo_errors::{LeoError, ParserError, Result};
|
||||
use leo_span::{Span, Symbol};
|
||||
|
||||
use std::{borrow::Cow, unreachable};
|
||||
use tendril::format_tendril;
|
||||
|
||||
/// Stores a program in tokenized format plus additional context.
|
||||
/// May be converted into a [`Program`] AST by parsing all tokens.
|
||||
@ -182,7 +181,7 @@ impl<'a> ParserContext<'a> {
|
||||
return None;
|
||||
}
|
||||
*i -= 1;
|
||||
GroupCoordinate::Number(format_tendril!("-{}", value), span.clone())
|
||||
GroupCoordinate::Number(format!("-{}", value), span.clone())
|
||||
}
|
||||
_ => GroupCoordinate::SignLow,
|
||||
},
|
||||
|
@ -19,8 +19,6 @@ use super::*;
|
||||
use leo_errors::{ParserError, Result};
|
||||
use leo_span::sym;
|
||||
|
||||
use tendril::format_tendril;
|
||||
|
||||
const INT_TYPES: &[Token] = &[
|
||||
Token::I8,
|
||||
Token::I16,
|
||||
@ -253,17 +251,10 @@ impl ParserContext<'_> {
|
||||
// hack for const signed integer overflow issues
|
||||
if matches!(operation, UnaryOperation::Negate) {
|
||||
if let Expression::Value(ValueExpression::Integer(type_, value, span)) = inner {
|
||||
inner = Expression::Value(ValueExpression::Integer(
|
||||
type_,
|
||||
format_tendril!("-{}", value),
|
||||
&op.span + &span,
|
||||
));
|
||||
inner = Expression::Value(ValueExpression::Integer(type_, format!("-{}", value), &op.span + &span));
|
||||
continue;
|
||||
} else if let Expression::Value(ValueExpression::Implicit(value, span)) = inner {
|
||||
inner = Expression::Value(ValueExpression::Implicit(
|
||||
format_tendril!("-{}", value),
|
||||
&op.span + &span,
|
||||
));
|
||||
inner = Expression::Value(ValueExpression::Implicit(format!("-{}", value), &op.span + &span));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -51,14 +51,14 @@ pub(crate) fn assert_no_whitespace(left_span: &Span, right_span: &Span, left: &s
|
||||
|
||||
/// Creates a new program from a given file path and source code text.
|
||||
pub fn parse(handler: &Handler, path: &str, source: &str) -> Result<Program> {
|
||||
let mut tokens = ParserContext::new(handler, crate::tokenize(path, source.into())?);
|
||||
let mut tokens = ParserContext::new(handler, crate::tokenize(path, source)?);
|
||||
|
||||
tokens.parse_program()
|
||||
}
|
||||
|
||||
/// Parses an input file at the given file `path` and `source` code text.
|
||||
pub fn parse_input(handler: &Handler, path: &str, source: &str) -> Result<ParsedInputFile> {
|
||||
let mut tokens = ParserContext::new(handler, crate::tokenize(path, source.into())?);
|
||||
let mut tokens = ParserContext::new(handler, crate::tokenize(path, source)?);
|
||||
|
||||
tokens.parse_input()
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ impl Namespace for TokenNamespace {
|
||||
|
||||
fn run_test(&self, test: Test) -> Result<Value, String> {
|
||||
create_session_if_not_set_then(|_| {
|
||||
tokenizer::tokenize("test", test.content.into())
|
||||
tokenizer::tokenize("test", &test.content)
|
||||
.map(|tokens| {
|
||||
Value::String(
|
||||
tokens
|
||||
@ -80,7 +80,7 @@ fn implicit_value_expr() -> Expression {
|
||||
}
|
||||
|
||||
fn tokenize(test: Test) -> Result<Vec<SpannedToken>, String> {
|
||||
tokenizer::tokenize("test", test.content.into()).map_err(|x| x.to_string())
|
||||
tokenizer::tokenize("test", &test.content).map_err(|x| x.to_string())
|
||||
}
|
||||
|
||||
fn all_are_comments(tokens: &[SpannedToken]) -> bool {
|
||||
|
@ -21,52 +21,31 @@ use leo_span::{Span, Symbol};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tendril::StrTendril;
|
||||
|
||||
use std::fmt;
|
||||
|
||||
///
|
||||
/// Returns the length of the given `wanted` string if the string can be eaten, otherwise returns [`None`].
|
||||
/// A string can be eaten if its bytes are at the front of the given `input` array.
|
||||
///
|
||||
fn eat(input: &[u8], wanted: &str) -> Option<usize> {
|
||||
let wanted = wanted.as_bytes();
|
||||
if input.len() < wanted.len() {
|
||||
return None;
|
||||
}
|
||||
if &input[0..wanted.len()] == wanted {
|
||||
return Some(wanted.len());
|
||||
}
|
||||
None
|
||||
}
|
||||
use std::{fmt, iter::Peekable};
|
||||
|
||||
///
|
||||
/// Returns a new `StrTendril` string if an identifier can be eaten, otherwise returns [`None`].
|
||||
/// An identifier can be eaten if its bytes are at the front of the given `input_tendril` string.
|
||||
///
|
||||
fn eat_identifier(input_tendril: &StrTendril) -> Option<StrTendril> {
|
||||
if input_tendril.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let input = input_tendril.as_bytes();
|
||||
|
||||
if !input[0].is_ascii_alphabetic() {
|
||||
return None;
|
||||
fn eat_identifier(input: &mut Peekable<impl Iterator<Item = char>>) -> Option<String> {
|
||||
match input.peek() {
|
||||
None => return None,
|
||||
Some(c) if !c.is_ascii_alphabetic() => return None,
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let mut i = 1usize;
|
||||
while i < input.len() {
|
||||
if !input[i].is_ascii_alphanumeric() && input[i] != b'_' {
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
let mut ident = String::new();
|
||||
while let Some(c) = input.next_if(|c| c.is_ascii_alphabetic()) {
|
||||
ident.push(c);
|
||||
}
|
||||
Some(input_tendril.subtendril(0, i as u32))
|
||||
Some(ident)
|
||||
}
|
||||
|
||||
impl Token {
|
||||
///
|
||||
/// Returns a `char` if a character can be eaten, otherwise returns [`None`].
|
||||
///
|
||||
fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Result<Char> {
|
||||
fn _eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Result<Char> {
|
||||
if input_tendril.is_empty() {
|
||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
||||
}
|
||||
@ -154,34 +133,30 @@ impl Token {
|
||||
/// Returns a tuple: [(integer length, integer token)] if an integer can be eaten, otherwise returns [`None`].
|
||||
/// An integer can be eaten if its bytes are at the front of the given `input_tendril` string.
|
||||
///
|
||||
fn eat_integer(input_tendril: &StrTendril) -> Result<(usize, Token)> {
|
||||
if input_tendril.is_empty() {
|
||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
||||
}
|
||||
let input = input_tendril.as_bytes();
|
||||
if !input[0].is_ascii_digit() {
|
||||
return Err(ParserError::lexer_eat_integer_leading_zero(String::from_utf8_lossy(input)).into());
|
||||
}
|
||||
let mut i = 1;
|
||||
fn eat_integer(lead: char, input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Token)> {
|
||||
let mut int = String::from(lead);
|
||||
|
||||
while i < input.len() {
|
||||
if i == 1 && input[0] == b'0' && input[i] == b'x' {
|
||||
return Err(ParserError::lexer_hex_number_provided(
|
||||
&input_tendril[0..input_tendril.find('\n').unwrap_or(i) + 1],
|
||||
)
|
||||
.into());
|
||||
}
|
||||
if !input[i].is_ascii_digit() {
|
||||
break;
|
||||
match input.peek() {
|
||||
None => return Err(ParserError::lexer_empty_input_tendril().into()),
|
||||
Some(c) if !c.is_ascii_digit() => return Err(ParserError::lexer_eat_integer_leading_zero(c).into()),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
while let Some(c) = input.next_if(|c| c.is_ascii_digit()) {
|
||||
if c == '0' && matches!(input.peek(), Some('x')) {
|
||||
int.push(c);
|
||||
int.push(input.next().unwrap());
|
||||
return Err(ParserError::lexer_hex_number_provided(int).into());
|
||||
}
|
||||
|
||||
i += 1;
|
||||
int.push(c);
|
||||
}
|
||||
Ok((i, Token::Int(input_tendril.subtendril(0, i as u32))))
|
||||
|
||||
Ok((int.len(), Token::Int(int)))
|
||||
}
|
||||
|
||||
/// Returns the number of bytes in an utf-8 encoding that starts with this byte.
|
||||
fn utf8_byte_count(byte: u8) -> usize {
|
||||
fn _utf8_byte_count(byte: u8) -> usize {
|
||||
let mut mask = 0x80;
|
||||
let mut result = 0;
|
||||
while byte & mask > 0 {
|
||||
@ -201,284 +176,155 @@ impl Token {
|
||||
/// Returns a tuple: [(token length, token)] if the next token can be eaten, otherwise returns [`None`].
|
||||
/// The next token can be eaten if the bytes at the front of the given `input_tendril` string can be scanned into a token.
|
||||
///
|
||||
pub(crate) fn eat(input_tendril: StrTendril) -> Result<(usize, Token)> {
|
||||
pub(crate) fn eat(input_tendril: &str) -> Result<(usize, Token)> {
|
||||
if input_tendril.is_empty() {
|
||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
||||
}
|
||||
let input = input_tendril.as_bytes();
|
||||
match input[0] {
|
||||
x if x.is_ascii_whitespace() => return Ok((1, Token::WhiteSpace)),
|
||||
b'"' => {
|
||||
let mut i = 1;
|
||||
let mut len = 1;
|
||||
let mut start = 1;
|
||||
let mut in_escape = false;
|
||||
let mut escaped = false;
|
||||
let mut hex = false;
|
||||
let mut unicode = false;
|
||||
let mut end = false;
|
||||
let mut string = Vec::new();
|
||||
|
||||
while i < input.len() {
|
||||
// Get the length of the utf-8 encoding here
|
||||
// and position i at the last byte.
|
||||
if input[i] & 0x80 > 0 {
|
||||
len = Self::utf8_byte_count(input[i]);
|
||||
i += len;
|
||||
let mut input = input_tendril.chars().peekable();
|
||||
|
||||
if unicode {
|
||||
return Err(
|
||||
ParserError::lexer_emoji_inside_escaped_unicode_char(&input_tendril[0..i]).into(),
|
||||
);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if !in_escape {
|
||||
if input[i] == b'"' {
|
||||
end = true;
|
||||
break;
|
||||
} else if input[i] == b'\\' {
|
||||
in_escape = true;
|
||||
start = i;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
len += 1;
|
||||
|
||||
match input[i] {
|
||||
b'x' => {
|
||||
hex = true;
|
||||
}
|
||||
b'u' => {
|
||||
unicode = true;
|
||||
}
|
||||
b'}' if unicode => {
|
||||
in_escape = false;
|
||||
}
|
||||
_ if !hex && !unicode => {
|
||||
escaped = true;
|
||||
in_escape = false;
|
||||
}
|
||||
_ if hex && len == 4 => {
|
||||
in_escape = false;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if !in_escape {
|
||||
let character = Self::eat_char(
|
||||
input_tendril.subtendril(start as u32, len as u32),
|
||||
escaped,
|
||||
hex,
|
||||
unicode,
|
||||
)?;
|
||||
len = 1;
|
||||
escaped = false;
|
||||
hex = false;
|
||||
unicode = false;
|
||||
string.push(character.into());
|
||||
}
|
||||
|
||||
i += 1;
|
||||
|
||||
if !escaped && !hex && !unicode {
|
||||
start = i;
|
||||
}
|
||||
}
|
||||
|
||||
if i == input.len() || !end {
|
||||
return Err(ParserError::lexer_string_not_closed(String::from_utf8_lossy(&input[0..i])).into());
|
||||
}
|
||||
|
||||
return Ok((i + 1, Token::StringLit(string)));
|
||||
match input.next() {
|
||||
Some(x) if x.is_ascii_whitespace() => return Ok((1, Token::WhiteSpace)),
|
||||
Some(lead) if lead.is_ascii_digit() => {
|
||||
return Self::eat_integer(lead, &mut input);
|
||||
}
|
||||
b'\'' => {
|
||||
let mut i = 1;
|
||||
let mut in_escape = false;
|
||||
let mut escaped = false;
|
||||
let mut hex = false;
|
||||
let mut escaped_unicode = false;
|
||||
let mut unicode_char = false;
|
||||
let mut end = false;
|
||||
|
||||
while i < input.len() {
|
||||
if input[i] & 0x80 > 0 && !unicode_char {
|
||||
i += Self::utf8_byte_count(input[i]);
|
||||
unicode_char = true;
|
||||
continue;
|
||||
} else if input[i] & 0x80 > 0 && unicode_char {
|
||||
i += Self::utf8_byte_count(input[i]);
|
||||
return Err(ParserError::lexer_invalid_char(&input_tendril[0..i]).into());
|
||||
} else if !in_escape || unicode_char {
|
||||
if input[i] == b'\'' {
|
||||
end = true;
|
||||
break;
|
||||
} else if unicode_char {
|
||||
return Err(ParserError::lexer_invalid_char(
|
||||
// grab the contents of everything between the '' if possible.
|
||||
// else just show the character right before stuff went wrong.
|
||||
&input_tendril[0..input_tendril[1..].find('\'').unwrap_or(i - 1) + 1],
|
||||
)
|
||||
.into());
|
||||
} else if input[i] == b'\\' {
|
||||
in_escape = true;
|
||||
}
|
||||
} else {
|
||||
if input[i] == b'x' {
|
||||
hex = true;
|
||||
} else if input[i] == b'u' {
|
||||
let one_ahead = input.get(i + 1);
|
||||
if matches!(one_ahead, Some(b'{')) {
|
||||
escaped_unicode = true;
|
||||
} else if one_ahead.is_some() {
|
||||
return Err(ParserError::lexer_expected_valid_escaped_char(input[i + 1]).into());
|
||||
} else {
|
||||
return Err(ParserError::lexer_expected_valid_escaped_char(input[i]).into());
|
||||
}
|
||||
} else {
|
||||
escaped = true;
|
||||
}
|
||||
|
||||
in_escape = false;
|
||||
}
|
||||
|
||||
i += 1;
|
||||
}
|
||||
|
||||
if !end {
|
||||
return Err(ParserError::lexer_char_not_closed(String::from_utf8_lossy(&input[0..i])).into());
|
||||
}
|
||||
|
||||
let character = Self::eat_char(
|
||||
input_tendril.subtendril(1, (i - 1) as u32),
|
||||
escaped,
|
||||
hex,
|
||||
escaped_unicode,
|
||||
)?;
|
||||
return Ok((i + 1, Token::CharLit(character)));
|
||||
}
|
||||
x if x.is_ascii_digit() => {
|
||||
return Self::eat_integer(&input_tendril);
|
||||
}
|
||||
b'!' => {
|
||||
if let Some(len) = eat(input, "!=") {
|
||||
return Ok((len, Token::NotEq));
|
||||
Some('!') => {
|
||||
if input.next_if_eq(&'=').is_some() {
|
||||
return Ok((2, Token::NotEq));
|
||||
}
|
||||
return Ok((1, Token::Not));
|
||||
}
|
||||
b'?' => {
|
||||
Some('?') => {
|
||||
return Ok((1, Token::Question));
|
||||
}
|
||||
b'&' => {
|
||||
if let Some(len) = eat(input, "&&") {
|
||||
return Ok((len, Token::And));
|
||||
Some('&') => {
|
||||
if input.next_if_eq(&'&').is_some() {
|
||||
return Ok((2, Token::And));
|
||||
}
|
||||
return Ok((1, Token::Ampersand));
|
||||
}
|
||||
b'(' => return Ok((1, Token::LeftParen)),
|
||||
b')' => return Ok((1, Token::RightParen)),
|
||||
b'_' => return Ok((1, Token::Underscore)),
|
||||
b'*' => {
|
||||
if let Some(len) = eat(input, "**") {
|
||||
if let Some(inner_len) = eat(&input[len..], "=") {
|
||||
return Ok((len + inner_len, Token::ExpEq));
|
||||
Some('(') => return Ok((1, Token::LeftParen)),
|
||||
Some(')') => return Ok((1, Token::RightParen)),
|
||||
Some('_') => return Ok((1, Token::Underscore)),
|
||||
Some('*') => {
|
||||
if input.next_if_eq(&'*').is_some() {
|
||||
if input.next_if_eq(&'=').is_some() {
|
||||
return Ok((3, Token::ExpEq));
|
||||
}
|
||||
return Ok((len, Token::Exp));
|
||||
} else if let Some(len) = eat(input, "*=") {
|
||||
return Ok((len, Token::MulEq));
|
||||
return Ok((2, Token::Exp));
|
||||
} else if input.next_if_eq(&'=').is_some() {
|
||||
return Ok((2, Token::MulEq));
|
||||
}
|
||||
return Ok((1, Token::Mul));
|
||||
}
|
||||
b'+' => {
|
||||
if let Some(len) = eat(input, "+=") {
|
||||
return Ok((len, Token::AddEq));
|
||||
Some('+') => {
|
||||
if input.next_if_eq(&'=').is_some() {
|
||||
return Ok((2, Token::AddEq));
|
||||
}
|
||||
return Ok((1, Token::Add));
|
||||
}
|
||||
b',' => return Ok((1, Token::Comma)),
|
||||
b'-' => {
|
||||
if let Some(len) = eat(input, "->") {
|
||||
return Ok((len, Token::Arrow));
|
||||
} else if let Some(len) = eat(input, "-=") {
|
||||
return Ok((len, Token::MinusEq));
|
||||
Some(',') => return Ok((1, Token::Comma)),
|
||||
Some('-') => {
|
||||
if input.next_if_eq(&'>').is_some() {
|
||||
return Ok((2, Token::Arrow));
|
||||
} else if input.next_if_eq(&'=').is_some() {
|
||||
return Ok((2, Token::MinusEq));
|
||||
}
|
||||
return Ok((1, Token::Minus));
|
||||
}
|
||||
b'.' => {
|
||||
if let Some(len) = eat(input, "...") {
|
||||
return Ok((len, Token::DotDotDot));
|
||||
} else if let Some(len) = eat(input, "..") {
|
||||
return Ok((len, Token::DotDot));
|
||||
Some('.') => {
|
||||
if input.next_if_eq(&'.').is_some() {
|
||||
if input.next_if_eq(&'.').is_some() {
|
||||
return Ok((3, Token::DotDotDot));
|
||||
} else {
|
||||
return Ok((2, Token::DotDot));
|
||||
}
|
||||
}
|
||||
return Ok((1, Token::Dot));
|
||||
}
|
||||
b'/' => {
|
||||
if eat(input, "//").is_some() {
|
||||
let eol = input.iter().position(|x| *x == b'\n');
|
||||
let len = if let Some(eol) = eol { eol + 1 } else { input.len() };
|
||||
return Ok((len, Token::CommentLine(input_tendril.subtendril(0, len as u32))));
|
||||
} else if eat(input, "/*").is_some() {
|
||||
if input.is_empty() {
|
||||
Some(c) if c == '/' => {
|
||||
let mut comment = String::from(c);
|
||||
if let Some(c) = input.next_if_eq(&'/') {
|
||||
comment.push(c);
|
||||
|
||||
while let Some(c) = input.next_if(|c| c != &'\n') {
|
||||
comment.push(c);
|
||||
}
|
||||
|
||||
if input.next_if_eq(&'\n').is_some() {
|
||||
return Ok((comment.len() + 1, Token::CommentLine(comment)));
|
||||
}
|
||||
|
||||
return Ok((comment.len(), Token::CommentLine(comment)));
|
||||
} else if let Some(c) = input.next_if_eq(&'*') {
|
||||
comment.push(c);
|
||||
|
||||
if input.peek().is_none() {
|
||||
return Err(ParserError::lexer_empty_block_comment().into());
|
||||
}
|
||||
let eol = input.windows(2).skip(2).position(|x| x[0] == b'*' && x[1] == b'/');
|
||||
let len = if let Some(eol) = eol {
|
||||
eol + 4
|
||||
} else {
|
||||
return Err(ParserError::lexer_block_comment_does_not_close_before_eof(
|
||||
String::from_utf8_lossy(&input[0..]),
|
||||
)
|
||||
.into());
|
||||
};
|
||||
return Ok((len, Token::CommentBlock(input_tendril.subtendril(0, len as u32))));
|
||||
} else if let Some(len) = eat(input, "/=") {
|
||||
return Ok((len, Token::DivEq));
|
||||
|
||||
let mut ended = false;
|
||||
while let Some(c) = input.next() {
|
||||
comment.push(c);
|
||||
if c == '*' && input.next_if_eq(&'/').is_some() {
|
||||
comment.push('/');
|
||||
ended = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !ended {
|
||||
return Err(ParserError::lexer_block_comment_does_not_close_before_eof(comment).into());
|
||||
}
|
||||
return Ok((comment.len() + 4, Token::CommentBlock(comment)));
|
||||
} else if input.next_if_eq(&'=').is_some() {
|
||||
return Ok((2, Token::DivEq));
|
||||
}
|
||||
return Ok((1, Token::Div));
|
||||
}
|
||||
b':' => {
|
||||
if let Some(len) = eat(input, "::") {
|
||||
return Ok((len, Token::DoubleColon));
|
||||
Some(':') => {
|
||||
if input.next_if_eq(&':').is_some() {
|
||||
return Ok((2, Token::DoubleColon));
|
||||
} else {
|
||||
return Ok((1, Token::Colon));
|
||||
}
|
||||
}
|
||||
b';' => return Ok((1, Token::Semicolon)),
|
||||
b'<' => {
|
||||
if let Some(len) = eat(input, "<=") {
|
||||
return Ok((len, Token::LtEq));
|
||||
Some(';') => return Ok((1, Token::Semicolon)),
|
||||
Some('<') => {
|
||||
if input.next_if_eq(&'=').is_some() {
|
||||
return Ok((2, Token::LtEq));
|
||||
}
|
||||
return Ok((1, Token::Lt));
|
||||
}
|
||||
b'>' => {
|
||||
if let Some(len) = eat(input, ">=") {
|
||||
return Ok((len, Token::GtEq));
|
||||
Some('>') => {
|
||||
if input.next_if_eq(&'=').is_some() {
|
||||
return Ok((2, Token::GtEq));
|
||||
}
|
||||
return Ok((1, Token::Gt));
|
||||
}
|
||||
b'=' => {
|
||||
if let Some(len) = eat(input, "==") {
|
||||
return Ok((len, Token::Eq));
|
||||
Some('=') => {
|
||||
if input.next_if_eq(&'=').is_some() {
|
||||
return Ok((2, Token::Eq));
|
||||
}
|
||||
return Ok((1, Token::Assign));
|
||||
}
|
||||
b'@' => return Ok((1, Token::At)),
|
||||
b'[' => return Ok((1, Token::LeftSquare)),
|
||||
b']' => return Ok((1, Token::RightSquare)),
|
||||
b'{' => return Ok((1, Token::LeftCurly)),
|
||||
b'}' => return Ok((1, Token::RightCurly)),
|
||||
b'|' => {
|
||||
if let Some(len) = eat(input, "||") {
|
||||
return Ok((len, Token::Or));
|
||||
Some('@') => return Ok((1, Token::At)),
|
||||
Some('[') => return Ok((1, Token::LeftSquare)),
|
||||
Some(']') => return Ok((1, Token::RightSquare)),
|
||||
Some('{') => return Ok((1, Token::LeftCurly)),
|
||||
Some('}') => return Ok((1, Token::RightCurly)),
|
||||
Some('|') => {
|
||||
if input.next_if_eq(&'|').is_some() {
|
||||
return Ok((2, Token::Or));
|
||||
} else if let Some(found) = input.next() {
|
||||
return Err(ParserError::lexer_expected_but_found(found, '|').into());
|
||||
} else {
|
||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
if let Some(ident) = eat_identifier(&input_tendril) {
|
||||
if let Some(ident) = eat_identifier(&mut input) {
|
||||
return Ok((
|
||||
ident.len(),
|
||||
match &*ident {
|
||||
@ -523,7 +369,7 @@ impl Token {
|
||||
));
|
||||
}
|
||||
|
||||
Err(ParserError::could_not_lex(String::from_utf8_lossy(&input[0..])).into())
|
||||
Err(ParserError::could_not_lex(input.collect::<String>()).into())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -31,17 +31,15 @@ pub(crate) use self::lexer::*;
|
||||
use leo_errors::{ParserError, Result};
|
||||
use leo_span::Span;
|
||||
|
||||
use tendril::StrTendril;
|
||||
|
||||
/// Creates a new vector of spanned tokens from a given file path and source code text.
|
||||
pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result<Vec<SpannedToken>> {
|
||||
pub(crate) fn tokenize(path: &str, input: &str) -> Result<Vec<SpannedToken>> {
|
||||
let path = Arc::new(path.to_string());
|
||||
let mut tokens = vec![];
|
||||
let mut index = 0usize;
|
||||
let mut line_no = 1usize;
|
||||
let mut line_start = 0usize;
|
||||
while input.len() > index {
|
||||
match Token::eat(input.subtendril(index as u32, (input.len() - index) as u32))? {
|
||||
match Token::eat(&input[index..(input.len() - index)])? {
|
||||
(token_len, Token::WhiteSpace) => {
|
||||
if token_len == 0 && index == input.len() {
|
||||
break;
|
||||
@ -54,10 +52,7 @@ pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result<Vec<SpannedToken
|
||||
index - line_start + 1,
|
||||
index - line_start + 2,
|
||||
path,
|
||||
input.subtendril(
|
||||
line_start as u32,
|
||||
input[line_start..].find('\n').unwrap_or(input.len()) as u32,
|
||||
),
|
||||
input[line_start..input[line_start..].find('\n').unwrap_or(input.len())].to_string(),
|
||||
),
|
||||
)
|
||||
.into());
|
||||
@ -83,10 +78,7 @@ pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result<Vec<SpannedToken
|
||||
index - line_start + 1,
|
||||
index - line_start + token_len + 1,
|
||||
path.clone(),
|
||||
input.subtendril(
|
||||
line_start as u32,
|
||||
input[line_start..].find('\n').unwrap_or(input.len() - line_start) as u32,
|
||||
),
|
||||
input[line_start..input[line_start..].find('\n').unwrap_or(input.len() - line_start)].to_string(),
|
||||
);
|
||||
match &token {
|
||||
Token::CommentLine(_) => {
|
||||
|
@ -18,7 +18,6 @@ use leo_span::{sym, Symbol};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
use tendril::StrTendril;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub enum Char {
|
||||
@ -50,14 +49,14 @@ impl fmt::Display for Char {
|
||||
pub enum Token {
|
||||
// Lexical Grammar
|
||||
// Literals
|
||||
CommentLine(#[serde(with = "leo_span::tendril_json")] StrTendril),
|
||||
CommentBlock(#[serde(with = "leo_span::tendril_json")] StrTendril),
|
||||
CommentLine(String),
|
||||
CommentBlock(String),
|
||||
StringLit(Vec<leo_ast::Char>),
|
||||
Ident(Symbol),
|
||||
Int(#[serde(with = "leo_span::tendril_json")] StrTendril),
|
||||
Int(String),
|
||||
True,
|
||||
False,
|
||||
AddressLit(#[serde(with = "leo_span::tendril_json")] StrTendril),
|
||||
AddressLit(String),
|
||||
CharLit(Char),
|
||||
WhiteSpace,
|
||||
|
||||
|
@ -398,4 +398,13 @@ create_errors!(
|
||||
msg: "do not put parens around single dimension array size",
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// For when a user puts parens around a single defined variable.
|
||||
@backtraced
|
||||
lexer_expected_but_found {
|
||||
args: (found: impl Display, expected: impl Display),
|
||||
msg: format!("Found the char `{}`, but expected `{}`", found, expected),
|
||||
help: None,
|
||||
}
|
||||
|
||||
);
|
||||
|
@ -20,7 +20,6 @@ use std::{fmt, sync::Arc, usize};
|
||||
|
||||
use serde::ser::{Serialize, SerializeStruct, Serializer};
|
||||
use serde::Deserialize;
|
||||
use tendril::StrTendril;
|
||||
|
||||
/// The span type which tracks where formatted errors originate from in a Leo file.
|
||||
/// This is used in many spots throughout the rest of the Leo crates.
|
||||
@ -36,9 +35,8 @@ pub struct Span {
|
||||
pub col_stop: usize,
|
||||
/// The path to the Leo file containing the error.
|
||||
pub path: Arc<String>,
|
||||
#[serde(with = "crate::tendril_json")]
|
||||
/// The content of the line(s) that the span is found on.
|
||||
pub content: StrTendril,
|
||||
pub content: String,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
@ -55,7 +53,7 @@ impl Span {
|
||||
col_start: usize,
|
||||
col_stop: usize,
|
||||
path: Arc<String>,
|
||||
content: StrTendril,
|
||||
content: String,
|
||||
) -> Self {
|
||||
Self {
|
||||
line_start,
|
||||
@ -156,7 +154,7 @@ impl std::ops::Add for Span {
|
||||
new_content.push(format!("{:<1$}...", " ", other.col_start + 4));
|
||||
}
|
||||
}
|
||||
let new_content = new_content.join("\n").into();
|
||||
let new_content = new_content.join("\n");
|
||||
if self.line_start < other.line_stop {
|
||||
Span {
|
||||
line_start: self.line_start,
|
||||
|
Loading…
Reference in New Issue
Block a user