most things moved to a char parser

This commit is contained in:
gluax 2022-03-15 14:53:42 -07:00
parent f886890e2b
commit 3d1cc9a735
14 changed files with 169 additions and 365 deletions

View File

@ -16,21 +16,19 @@
use serde::{Deserialize, Serialize};
use std::fmt;
use tendril::StrTendril;
/// A number string guaranteed to be positive by the pest grammar.
#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq, Hash)]
pub struct PositiveNumber {
/// The string representation of the positive number.
// FIXME(Centril): This should become an `u128`.
#[serde(with = "leo_span::tendril_json")]
pub value: StrTendril,
pub value: String,
}
impl PositiveNumber {
/// Returns `true` if this number is zero.
pub fn is_zero(&self) -> bool {
self.value.as_ref().eq("0")
self.value.eq("0")
}
}

View File

@ -14,8 +14,6 @@
// You should have received a copy of the GNU General Public License
// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
use tendril::StrTendril;
use super::*;
use crate::{Char, CharValue};
@ -24,37 +22,21 @@ use crate::{Char, CharValue};
pub enum ValueExpression {
// todo: deserialize values here
/// An address literal, e.g., `aleo1qnr4dkkvkgfqph0vzc3y6z2eu975wnpz2925ntjccd5cfqxtyu8sta57j8`.
Address(
#[serde(with = "leo_span::tendril_json")] StrTendril,
#[serde(with = "leo_span::span_json")] Span,
),
Address(String, #[serde(with = "leo_span::span_json")] Span),
/// A boolean literal, either `true` or `false`.
Boolean(
#[serde(with = "leo_span::tendril_json")] StrTendril,
#[serde(with = "leo_span::span_json")] Span,
),
Boolean(String, #[serde(with = "leo_span::span_json")] Span),
/// A char literal, e.g., `'a'`, representing a single unicode code point.
Char(CharValue),
/// A field literal, e.g., `42field`.
/// That is, a signed number followed by the keyword `field`.
Field(
#[serde(with = "leo_span::tendril_json")] StrTendril,
#[serde(with = "leo_span::span_json")] Span,
),
Field(String, #[serde(with = "leo_span::span_json")] Span),
/// A group literal, either product or affine.
/// For example, `42group` or `(12, 52)group`.
Group(Box<GroupValue>),
/// A negated non-integer literal, e.g., `-4.2`.
Implicit(
#[serde(with = "leo_span::tendril_json")] StrTendril,
#[serde(with = "leo_span::span_json")] Span,
),
Implicit(String, #[serde(with = "leo_span::span_json")] Span),
/// An integer literal, e.g., `42`.
Integer(
IntegerType,
#[serde(with = "leo_span::tendril_json")] StrTendril,
#[serde(with = "leo_span::span_json")] Span,
),
Integer(IntegerType, String, #[serde(with = "leo_span::span_json")] Span),
/// A string literal, e.g., `"foobar"`.
String(Vec<Char>, #[serde(with = "leo_span::span_json")] Span),
}

View File

@ -18,16 +18,12 @@ use leo_span::Span;
use serde::{Deserialize, Serialize};
use std::fmt;
use tendril::StrTendril;
/// A coordinate in a affine group literal.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum GroupCoordinate {
/// A number, e.g., `42`.
Number(
#[serde(with = "leo_span::tendril_json")] StrTendril,
#[serde(with = "leo_span::span_json")] Span,
),
Number(String, #[serde(with = "leo_span::span_json")] Span),
/// A sign high recovery, i.e. `+`.
SignHigh,
/// A sign low recovery, i.e., `-`.

View File

@ -19,16 +19,12 @@ use leo_span::Span;
use serde::{Deserialize, Serialize};
use std::fmt;
use tendril::StrTendril;
/// A group literal.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum GroupValue {
/// Product group literal, e.g., `42group`.
Single(
#[serde(with = "leo_span::tendril_json")] StrTendril,
#[serde(with = "leo_span::span_json")] Span,
),
Single(String, #[serde(with = "leo_span::span_json")] Span),
/// An affine group literal with (x, y) coordinates.
Tuple(GroupTuple),
}

View File

@ -38,22 +38,20 @@ impl TryFrom<(Type, Expression)> for InputValue {
Ok(match value {
(type_, Expression::Value(value)) => {
match (type_, value) {
(Type::Address, ValueExpression::Address(value, _)) => Self::Address(value.to_string()),
(Type::Address, ValueExpression::Address(value, _)) => Self::Address(value),
(Type::Boolean, ValueExpression::Boolean(value, span)) => {
let bool_value = value.parse::<bool>().map_err(|_| ParserError::unexpected_eof(&span))?; // TODO: change error
Self::Boolean(bool_value)
}
(Type::Char, ValueExpression::Char(value)) => Self::Char(value),
(Type::Field, ValueExpression::Field(value, _) | ValueExpression::Implicit(value, _)) => {
Self::Field(value.to_string())
Self::Field(value)
}
(Type::Group, ValueExpression::Group(value)) => Self::Group(*value),
(Type::IntegerType(type_), ValueExpression::Implicit(value, _)) => {
Self::Integer(type_, value.to_string())
}
(Type::IntegerType(type_), ValueExpression::Implicit(value, _)) => Self::Integer(type_, value),
(Type::IntegerType(expected), ValueExpression::Integer(actual, value, span)) => {
if expected == actual {
Self::Integer(expected, value.to_string())
Self::Integer(expected, value)
} else {
return Err(InputError::unexpected_type(expected.to_string(), actual, &span).into());
}

View File

@ -22,7 +22,6 @@ use leo_errors::{LeoError, ParserError, Result};
use leo_span::{Span, Symbol};
use std::{borrow::Cow, unreachable};
use tendril::format_tendril;
/// Stores a program in tokenized format plus additional context.
/// May be converted into a [`Program`] AST by parsing all tokens.
@ -182,7 +181,7 @@ impl<'a> ParserContext<'a> {
return None;
}
*i -= 1;
GroupCoordinate::Number(format_tendril!("-{}", value), span.clone())
GroupCoordinate::Number(format!("-{}", value), span.clone())
}
_ => GroupCoordinate::SignLow,
},

View File

@ -19,8 +19,6 @@ use super::*;
use leo_errors::{ParserError, Result};
use leo_span::sym;
use tendril::format_tendril;
const INT_TYPES: &[Token] = &[
Token::I8,
Token::I16,
@ -253,17 +251,10 @@ impl ParserContext<'_> {
// hack for const signed integer overflow issues
if matches!(operation, UnaryOperation::Negate) {
if let Expression::Value(ValueExpression::Integer(type_, value, span)) = inner {
inner = Expression::Value(ValueExpression::Integer(
type_,
format_tendril!("-{}", value),
&op.span + &span,
));
inner = Expression::Value(ValueExpression::Integer(type_, format!("-{}", value), &op.span + &span));
continue;
} else if let Expression::Value(ValueExpression::Implicit(value, span)) = inner {
inner = Expression::Value(ValueExpression::Implicit(
format_tendril!("-{}", value),
&op.span + &span,
));
inner = Expression::Value(ValueExpression::Implicit(format!("-{}", value), &op.span + &span));
continue;
}
}

View File

@ -51,14 +51,14 @@ pub(crate) fn assert_no_whitespace(left_span: &Span, right_span: &Span, left: &s
/// Creates a new program from a given file path and source code text.
pub fn parse(handler: &Handler, path: &str, source: &str) -> Result<Program> {
let mut tokens = ParserContext::new(handler, crate::tokenize(path, source.into())?);
let mut tokens = ParserContext::new(handler, crate::tokenize(path, source)?);
tokens.parse_program()
}
/// Parses an input file at the given file `path` and `source` code text.
pub fn parse_input(handler: &Handler, path: &str, source: &str) -> Result<ParsedInputFile> {
let mut tokens = ParserContext::new(handler, crate::tokenize(path, source.into())?);
let mut tokens = ParserContext::new(handler, crate::tokenize(path, source)?);
tokens.parse_input()
}

View File

@ -35,7 +35,7 @@ impl Namespace for TokenNamespace {
fn run_test(&self, test: Test) -> Result<Value, String> {
create_session_if_not_set_then(|_| {
tokenizer::tokenize("test", test.content.into())
tokenizer::tokenize("test", &test.content)
.map(|tokens| {
Value::String(
tokens
@ -80,7 +80,7 @@ fn implicit_value_expr() -> Expression {
}
fn tokenize(test: Test) -> Result<Vec<SpannedToken>, String> {
tokenizer::tokenize("test", test.content.into()).map_err(|x| x.to_string())
tokenizer::tokenize("test", &test.content).map_err(|x| x.to_string())
}
fn all_are_comments(tokens: &[SpannedToken]) -> bool {

View File

@ -21,52 +21,31 @@ use leo_span::{Span, Symbol};
use serde::{Deserialize, Serialize};
use tendril::StrTendril;
use std::fmt;
///
/// Returns the length of the given `wanted` string if the string can be eaten, otherwise returns [`None`].
/// A string can be eaten if its bytes are at the front of the given `input` array.
///
fn eat(input: &[u8], wanted: &str) -> Option<usize> {
let wanted = wanted.as_bytes();
if input.len() < wanted.len() {
return None;
}
if &input[0..wanted.len()] == wanted {
return Some(wanted.len());
}
None
}
use std::{fmt, iter::Peekable};
///
/// Returns a new `StrTendril` string if an identifier can be eaten, otherwise returns [`None`].
/// An identifier can be eaten if its bytes are at the front of the given `input_tendril` string.
///
fn eat_identifier(input_tendril: &StrTendril) -> Option<StrTendril> {
if input_tendril.is_empty() {
return None;
}
let input = input_tendril.as_bytes();
if !input[0].is_ascii_alphabetic() {
return None;
fn eat_identifier(input: &mut Peekable<impl Iterator<Item = char>>) -> Option<String> {
match input.peek() {
None => return None,
Some(c) if !c.is_ascii_alphabetic() => return None,
_ => {}
}
let mut i = 1usize;
while i < input.len() {
if !input[i].is_ascii_alphanumeric() && input[i] != b'_' {
break;
}
i += 1;
let mut ident = String::new();
while let Some(c) = input.next_if(|c| c.is_ascii_alphabetic()) {
ident.push(c);
}
Some(input_tendril.subtendril(0, i as u32))
Some(ident)
}
impl Token {
///
/// Returns a `char` if a character can be eaten, otherwise returns [`None`].
///
fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Result<Char> {
fn _eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Result<Char> {
if input_tendril.is_empty() {
return Err(ParserError::lexer_empty_input_tendril().into());
}
@ -154,34 +133,30 @@ impl Token {
/// Returns a tuple: [(integer length, integer token)] if an integer can be eaten, otherwise returns [`None`].
/// An integer can be eaten if its bytes are at the front of the given `input_tendril` string.
///
fn eat_integer(input_tendril: &StrTendril) -> Result<(usize, Token)> {
if input_tendril.is_empty() {
return Err(ParserError::lexer_empty_input_tendril().into());
}
let input = input_tendril.as_bytes();
if !input[0].is_ascii_digit() {
return Err(ParserError::lexer_eat_integer_leading_zero(String::from_utf8_lossy(input)).into());
}
let mut i = 1;
fn eat_integer(lead: char, input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Token)> {
let mut int = String::from(lead);
while i < input.len() {
if i == 1 && input[0] == b'0' && input[i] == b'x' {
return Err(ParserError::lexer_hex_number_provided(
&input_tendril[0..input_tendril.find('\n').unwrap_or(i) + 1],
)
.into());
}
if !input[i].is_ascii_digit() {
break;
match input.peek() {
None => return Err(ParserError::lexer_empty_input_tendril().into()),
Some(c) if !c.is_ascii_digit() => return Err(ParserError::lexer_eat_integer_leading_zero(c).into()),
_ => {}
}
while let Some(c) = input.next_if(|c| c.is_ascii_digit()) {
if c == '0' && matches!(input.peek(), Some('x')) {
int.push(c);
int.push(input.next().unwrap());
return Err(ParserError::lexer_hex_number_provided(int).into());
}
i += 1;
int.push(c);
}
Ok((i, Token::Int(input_tendril.subtendril(0, i as u32))))
Ok((int.len(), Token::Int(int)))
}
/// Returns the number of bytes in an utf-8 encoding that starts with this byte.
fn utf8_byte_count(byte: u8) -> usize {
fn _utf8_byte_count(byte: u8) -> usize {
let mut mask = 0x80;
let mut result = 0;
while byte & mask > 0 {
@ -201,284 +176,155 @@ impl Token {
/// Returns a tuple: [(token length, token)] if the next token can be eaten, otherwise returns [`None`].
/// The next token can be eaten if the bytes at the front of the given `input_tendril` string can be scanned into a token.
///
pub(crate) fn eat(input_tendril: StrTendril) -> Result<(usize, Token)> {
pub(crate) fn eat(input_tendril: &str) -> Result<(usize, Token)> {
if input_tendril.is_empty() {
return Err(ParserError::lexer_empty_input_tendril().into());
}
let input = input_tendril.as_bytes();
match input[0] {
x if x.is_ascii_whitespace() => return Ok((1, Token::WhiteSpace)),
b'"' => {
let mut i = 1;
let mut len = 1;
let mut start = 1;
let mut in_escape = false;
let mut escaped = false;
let mut hex = false;
let mut unicode = false;
let mut end = false;
let mut string = Vec::new();
while i < input.len() {
// Get the length of the utf-8 encoding here
// and position i at the last byte.
if input[i] & 0x80 > 0 {
len = Self::utf8_byte_count(input[i]);
i += len;
let mut input = input_tendril.chars().peekable();
if unicode {
return Err(
ParserError::lexer_emoji_inside_escaped_unicode_char(&input_tendril[0..i]).into(),
);
}
continue;
}
if !in_escape {
if input[i] == b'"' {
end = true;
break;
} else if input[i] == b'\\' {
in_escape = true;
start = i;
i += 1;
continue;
}
} else {
len += 1;
match input[i] {
b'x' => {
hex = true;
}
b'u' => {
unicode = true;
}
b'}' if unicode => {
in_escape = false;
}
_ if !hex && !unicode => {
escaped = true;
in_escape = false;
}
_ if hex && len == 4 => {
in_escape = false;
}
_ => {}
}
}
if !in_escape {
let character = Self::eat_char(
input_tendril.subtendril(start as u32, len as u32),
escaped,
hex,
unicode,
)?;
len = 1;
escaped = false;
hex = false;
unicode = false;
string.push(character.into());
}
i += 1;
if !escaped && !hex && !unicode {
start = i;
}
}
if i == input.len() || !end {
return Err(ParserError::lexer_string_not_closed(String::from_utf8_lossy(&input[0..i])).into());
}
return Ok((i + 1, Token::StringLit(string)));
match input.next() {
Some(x) if x.is_ascii_whitespace() => return Ok((1, Token::WhiteSpace)),
Some(lead) if lead.is_ascii_digit() => {
return Self::eat_integer(lead, &mut input);
}
b'\'' => {
let mut i = 1;
let mut in_escape = false;
let mut escaped = false;
let mut hex = false;
let mut escaped_unicode = false;
let mut unicode_char = false;
let mut end = false;
while i < input.len() {
if input[i] & 0x80 > 0 && !unicode_char {
i += Self::utf8_byte_count(input[i]);
unicode_char = true;
continue;
} else if input[i] & 0x80 > 0 && unicode_char {
i += Self::utf8_byte_count(input[i]);
return Err(ParserError::lexer_invalid_char(&input_tendril[0..i]).into());
} else if !in_escape || unicode_char {
if input[i] == b'\'' {
end = true;
break;
} else if unicode_char {
return Err(ParserError::lexer_invalid_char(
// grab the contents of everything between the '' if possible.
// else just show the character right before stuff went wrong.
&input_tendril[0..input_tendril[1..].find('\'').unwrap_or(i - 1) + 1],
)
.into());
} else if input[i] == b'\\' {
in_escape = true;
}
} else {
if input[i] == b'x' {
hex = true;
} else if input[i] == b'u' {
let one_ahead = input.get(i + 1);
if matches!(one_ahead, Some(b'{')) {
escaped_unicode = true;
} else if one_ahead.is_some() {
return Err(ParserError::lexer_expected_valid_escaped_char(input[i + 1]).into());
} else {
return Err(ParserError::lexer_expected_valid_escaped_char(input[i]).into());
}
} else {
escaped = true;
}
in_escape = false;
}
i += 1;
}
if !end {
return Err(ParserError::lexer_char_not_closed(String::from_utf8_lossy(&input[0..i])).into());
}
let character = Self::eat_char(
input_tendril.subtendril(1, (i - 1) as u32),
escaped,
hex,
escaped_unicode,
)?;
return Ok((i + 1, Token::CharLit(character)));
}
x if x.is_ascii_digit() => {
return Self::eat_integer(&input_tendril);
}
b'!' => {
if let Some(len) = eat(input, "!=") {
return Ok((len, Token::NotEq));
Some('!') => {
if input.next_if_eq(&'=').is_some() {
return Ok((2, Token::NotEq));
}
return Ok((1, Token::Not));
}
b'?' => {
Some('?') => {
return Ok((1, Token::Question));
}
b'&' => {
if let Some(len) = eat(input, "&&") {
return Ok((len, Token::And));
Some('&') => {
if input.next_if_eq(&'&').is_some() {
return Ok((2, Token::And));
}
return Ok((1, Token::Ampersand));
}
b'(' => return Ok((1, Token::LeftParen)),
b')' => return Ok((1, Token::RightParen)),
b'_' => return Ok((1, Token::Underscore)),
b'*' => {
if let Some(len) = eat(input, "**") {
if let Some(inner_len) = eat(&input[len..], "=") {
return Ok((len + inner_len, Token::ExpEq));
Some('(') => return Ok((1, Token::LeftParen)),
Some(')') => return Ok((1, Token::RightParen)),
Some('_') => return Ok((1, Token::Underscore)),
Some('*') => {
if input.next_if_eq(&'*').is_some() {
if input.next_if_eq(&'=').is_some() {
return Ok((3, Token::ExpEq));
}
return Ok((len, Token::Exp));
} else if let Some(len) = eat(input, "*=") {
return Ok((len, Token::MulEq));
return Ok((2, Token::Exp));
} else if input.next_if_eq(&'=').is_some() {
return Ok((2, Token::MulEq));
}
return Ok((1, Token::Mul));
}
b'+' => {
if let Some(len) = eat(input, "+=") {
return Ok((len, Token::AddEq));
Some('+') => {
if input.next_if_eq(&'=').is_some() {
return Ok((2, Token::AddEq));
}
return Ok((1, Token::Add));
}
b',' => return Ok((1, Token::Comma)),
b'-' => {
if let Some(len) = eat(input, "->") {
return Ok((len, Token::Arrow));
} else if let Some(len) = eat(input, "-=") {
return Ok((len, Token::MinusEq));
Some(',') => return Ok((1, Token::Comma)),
Some('-') => {
if input.next_if_eq(&'>').is_some() {
return Ok((2, Token::Arrow));
} else if input.next_if_eq(&'=').is_some() {
return Ok((2, Token::MinusEq));
}
return Ok((1, Token::Minus));
}
b'.' => {
if let Some(len) = eat(input, "...") {
return Ok((len, Token::DotDotDot));
} else if let Some(len) = eat(input, "..") {
return Ok((len, Token::DotDot));
Some('.') => {
if input.next_if_eq(&'.').is_some() {
if input.next_if_eq(&'.').is_some() {
return Ok((3, Token::DotDotDot));
} else {
return Ok((2, Token::DotDot));
}
}
return Ok((1, Token::Dot));
}
b'/' => {
if eat(input, "//").is_some() {
let eol = input.iter().position(|x| *x == b'\n');
let len = if let Some(eol) = eol { eol + 1 } else { input.len() };
return Ok((len, Token::CommentLine(input_tendril.subtendril(0, len as u32))));
} else if eat(input, "/*").is_some() {
if input.is_empty() {
Some(c) if c == '/' => {
let mut comment = String::from(c);
if let Some(c) = input.next_if_eq(&'/') {
comment.push(c);
while let Some(c) = input.next_if(|c| c != &'\n') {
comment.push(c);
}
if input.next_if_eq(&'\n').is_some() {
return Ok((comment.len() + 1, Token::CommentLine(comment)));
}
return Ok((comment.len(), Token::CommentLine(comment)));
} else if let Some(c) = input.next_if_eq(&'*') {
comment.push(c);
if input.peek().is_none() {
return Err(ParserError::lexer_empty_block_comment().into());
}
let eol = input.windows(2).skip(2).position(|x| x[0] == b'*' && x[1] == b'/');
let len = if let Some(eol) = eol {
eol + 4
} else {
return Err(ParserError::lexer_block_comment_does_not_close_before_eof(
String::from_utf8_lossy(&input[0..]),
)
.into());
};
return Ok((len, Token::CommentBlock(input_tendril.subtendril(0, len as u32))));
} else if let Some(len) = eat(input, "/=") {
return Ok((len, Token::DivEq));
let mut ended = false;
while let Some(c) = input.next() {
comment.push(c);
if c == '*' && input.next_if_eq(&'/').is_some() {
comment.push('/');
ended = true;
break;
}
}
if !ended {
return Err(ParserError::lexer_block_comment_does_not_close_before_eof(comment).into());
}
return Ok((comment.len() + 4, Token::CommentBlock(comment)));
} else if input.next_if_eq(&'=').is_some() {
return Ok((2, Token::DivEq));
}
return Ok((1, Token::Div));
}
b':' => {
if let Some(len) = eat(input, "::") {
return Ok((len, Token::DoubleColon));
Some(':') => {
if input.next_if_eq(&':').is_some() {
return Ok((2, Token::DoubleColon));
} else {
return Ok((1, Token::Colon));
}
}
b';' => return Ok((1, Token::Semicolon)),
b'<' => {
if let Some(len) = eat(input, "<=") {
return Ok((len, Token::LtEq));
Some(';') => return Ok((1, Token::Semicolon)),
Some('<') => {
if input.next_if_eq(&'=').is_some() {
return Ok((2, Token::LtEq));
}
return Ok((1, Token::Lt));
}
b'>' => {
if let Some(len) = eat(input, ">=") {
return Ok((len, Token::GtEq));
Some('>') => {
if input.next_if_eq(&'=').is_some() {
return Ok((2, Token::GtEq));
}
return Ok((1, Token::Gt));
}
b'=' => {
if let Some(len) = eat(input, "==") {
return Ok((len, Token::Eq));
Some('=') => {
if input.next_if_eq(&'=').is_some() {
return Ok((2, Token::Eq));
}
return Ok((1, Token::Assign));
}
b'@' => return Ok((1, Token::At)),
b'[' => return Ok((1, Token::LeftSquare)),
b']' => return Ok((1, Token::RightSquare)),
b'{' => return Ok((1, Token::LeftCurly)),
b'}' => return Ok((1, Token::RightCurly)),
b'|' => {
if let Some(len) = eat(input, "||") {
return Ok((len, Token::Or));
Some('@') => return Ok((1, Token::At)),
Some('[') => return Ok((1, Token::LeftSquare)),
Some(']') => return Ok((1, Token::RightSquare)),
Some('{') => return Ok((1, Token::LeftCurly)),
Some('}') => return Ok((1, Token::RightCurly)),
Some('|') => {
if input.next_if_eq(&'|').is_some() {
return Ok((2, Token::Or));
} else if let Some(found) = input.next() {
return Err(ParserError::lexer_expected_but_found(found, '|').into());
} else {
return Err(ParserError::lexer_empty_input_tendril().into());
}
}
_ => (),
}
if let Some(ident) = eat_identifier(&input_tendril) {
if let Some(ident) = eat_identifier(&mut input) {
return Ok((
ident.len(),
match &*ident {
@ -523,7 +369,7 @@ impl Token {
));
}
Err(ParserError::could_not_lex(String::from_utf8_lossy(&input[0..])).into())
Err(ParserError::could_not_lex(input.collect::<String>()).into())
}
}

View File

@ -31,17 +31,15 @@ pub(crate) use self::lexer::*;
use leo_errors::{ParserError, Result};
use leo_span::Span;
use tendril::StrTendril;
/// Creates a new vector of spanned tokens from a given file path and source code text.
pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result<Vec<SpannedToken>> {
pub(crate) fn tokenize(path: &str, input: &str) -> Result<Vec<SpannedToken>> {
let path = Arc::new(path.to_string());
let mut tokens = vec![];
let mut index = 0usize;
let mut line_no = 1usize;
let mut line_start = 0usize;
while input.len() > index {
match Token::eat(input.subtendril(index as u32, (input.len() - index) as u32))? {
match Token::eat(&input[index..(input.len() - index)])? {
(token_len, Token::WhiteSpace) => {
if token_len == 0 && index == input.len() {
break;
@ -54,10 +52,7 @@ pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result<Vec<SpannedToken
index - line_start + 1,
index - line_start + 2,
path,
input.subtendril(
line_start as u32,
input[line_start..].find('\n').unwrap_or(input.len()) as u32,
),
input[line_start..input[line_start..].find('\n').unwrap_or(input.len())].to_string(),
),
)
.into());
@ -83,10 +78,7 @@ pub(crate) fn tokenize(path: &str, input: StrTendril) -> Result<Vec<SpannedToken
index - line_start + 1,
index - line_start + token_len + 1,
path.clone(),
input.subtendril(
line_start as u32,
input[line_start..].find('\n').unwrap_or(input.len() - line_start) as u32,
),
input[line_start..input[line_start..].find('\n').unwrap_or(input.len() - line_start)].to_string(),
);
match &token {
Token::CommentLine(_) => {

View File

@ -18,7 +18,6 @@ use leo_span::{sym, Symbol};
use serde::{Deserialize, Serialize};
use std::fmt;
use tendril::StrTendril;
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum Char {
@ -50,14 +49,14 @@ impl fmt::Display for Char {
pub enum Token {
// Lexical Grammar
// Literals
CommentLine(#[serde(with = "leo_span::tendril_json")] StrTendril),
CommentBlock(#[serde(with = "leo_span::tendril_json")] StrTendril),
CommentLine(String),
CommentBlock(String),
StringLit(Vec<leo_ast::Char>),
Ident(Symbol),
Int(#[serde(with = "leo_span::tendril_json")] StrTendril),
Int(String),
True,
False,
AddressLit(#[serde(with = "leo_span::tendril_json")] StrTendril),
AddressLit(String),
CharLit(Char),
WhiteSpace,

View File

@ -398,4 +398,13 @@ create_errors!(
msg: "do not put parens around single dimension array size",
help: None,
}
/// For when a user puts parens around a single defined variable.
@backtraced
lexer_expected_but_found {
args: (found: impl Display, expected: impl Display),
msg: format!("Found the char `{}`, but expected `{}`", found, expected),
help: None,
}
);

View File

@ -20,7 +20,6 @@ use std::{fmt, sync::Arc, usize};
use serde::ser::{Serialize, SerializeStruct, Serializer};
use serde::Deserialize;
use tendril::StrTendril;
/// The span type which tracks where formatted errors originate from in a Leo file.
/// This is used in many spots throughout the rest of the Leo crates.
@ -36,9 +35,8 @@ pub struct Span {
pub col_stop: usize,
/// The path to the Leo file containing the error.
pub path: Arc<String>,
#[serde(with = "crate::tendril_json")]
/// The content of the line(s) that the span is found on.
pub content: StrTendril,
pub content: String,
}
impl Span {
@ -55,7 +53,7 @@ impl Span {
col_start: usize,
col_stop: usize,
path: Arc<String>,
content: StrTendril,
content: String,
) -> Self {
Self {
line_start,
@ -156,7 +154,7 @@ impl std::ops::Add for Span {
new_content.push(format!("{:<1$}...", " ", other.col_start + 4));
}
}
let new_content = new_content.join("\n").into();
let new_content = new_content.join("\n");
if self.line_start < other.line_stop {
Span {
line_start: self.line_start,