mirror of
https://github.com/AleoHQ/leo.git
synced 2024-12-20 08:01:42 +03:00
add string type to tokenizer + lexer
This commit is contained in:
parent
d43605538d
commit
84c0a82008
@ -375,7 +375,7 @@ impl ParserContext<'_> {
|
|||||||
Token::True => Expression::Value(ValueExpression::Boolean("true".into(), span)),
|
Token::True => Expression::Value(ValueExpression::Boolean("true".into(), span)),
|
||||||
Token::False => Expression::Value(ValueExpression::Boolean("false".into(), span)),
|
Token::False => Expression::Value(ValueExpression::Boolean("false".into(), span)),
|
||||||
Token::AddressLit(value) => Expression::Value(ValueExpression::Address(value, span)),
|
Token::AddressLit(value) => Expression::Value(ValueExpression::Address(value, span)),
|
||||||
Token::StringLit(value) => Expression::Value(ValueExpression::String(value, span)),
|
Token::StaticString(value) => Expression::Value(ValueExpression::String(value, span)),
|
||||||
Token::Ident(name) => {
|
Token::Ident(name) => {
|
||||||
let ident = Identifier { name, span };
|
let ident = Identifier { name, span };
|
||||||
Expression::Identifier(ident)
|
Expression::Identifier(ident)
|
||||||
|
@ -154,7 +154,7 @@ impl ParserContext<'_> {
|
|||||||
p.bump();
|
p.bump();
|
||||||
let SpannedToken { token, span } = p.prev_token.clone();
|
let SpannedToken { token, span } = p.prev_token.clone();
|
||||||
string = Some(match token {
|
string = Some(match token {
|
||||||
Token::StringLit(chars) => chars,
|
Token::StaticString(chars) => chars,
|
||||||
_ => {
|
_ => {
|
||||||
p.emit_err(ParserError::unexpected_str(token, "formatted string", span));
|
p.emit_err(ParserError::unexpected_str(token, "formatted string", span));
|
||||||
Vec::new()
|
Vec::new()
|
||||||
|
@ -23,6 +23,7 @@ pub(super) const TYPE_TOKENS: &[Token] = &[
|
|||||||
Token::Field,
|
Token::Field,
|
||||||
Token::Group,
|
Token::Group,
|
||||||
Token::Scalar,
|
Token::Scalar,
|
||||||
|
Token::String,
|
||||||
Token::I8,
|
Token::I8,
|
||||||
Token::I16,
|
Token::I16,
|
||||||
Token::I32,
|
Token::I32,
|
||||||
@ -64,6 +65,7 @@ impl ParserContext<'_> {
|
|||||||
Token::Field => Type::Field,
|
Token::Field => Type::Field,
|
||||||
Token::Group => Type::Group,
|
Token::Group => Type::Group,
|
||||||
Token::Scalar => Type::Scalar,
|
Token::Scalar => Type::Scalar,
|
||||||
|
Token::String => Type::String,
|
||||||
x => Type::IntegerType(Self::token_to_int_type(x).expect("invalid int type")),
|
x => Type::IntegerType(Self::token_to_int_type(x).expect("invalid int type")),
|
||||||
},
|
},
|
||||||
span,
|
span,
|
||||||
|
@ -39,113 +39,114 @@ fn is_bidi_override(c: char) -> bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Token {
|
impl Token {
|
||||||
// Eats the parts of the unicode character after \u.
|
// todo: remove this unused code or reference https://github.com/Geal/nom/blob/main/examples/string.rs
|
||||||
fn eat_unicode_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
|
// // Eats the parts of the unicode character after \u.
|
||||||
let mut unicode = String::new();
|
// fn eat_unicode_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
|
||||||
// Account for the chars '\' and 'u'.
|
// let mut unicode = String::new();
|
||||||
let mut len = 2;
|
// // Account for the chars '\' and 'u'.
|
||||||
|
// let mut len = 2;
|
||||||
|
//
|
||||||
|
// if input.next_if_eq(&'{').is_some() {
|
||||||
|
// len += 1;
|
||||||
|
// } else if let Some(c) = input.next() {
|
||||||
|
// return Err(ParserError::lexer_unopened_escaped_unicode_char(c).into());
|
||||||
|
// } else {
|
||||||
|
// return Err(ParserError::lexer_empty_input_tendril().into());
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// while let Some(c) = input.next_if(|c| c != &'}') {
|
||||||
|
// len += 1;
|
||||||
|
// unicode.push(c);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if input.next_if_eq(&'}').is_some() {
|
||||||
|
// len += 1;
|
||||||
|
// } else {
|
||||||
|
// return Err(ParserError::lexer_unclosed_escaped_unicode_char(unicode).into());
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // Max of 6 digits.
|
||||||
|
// // Minimum of 1 digit.
|
||||||
|
// if unicode.len() > 6 || unicode.is_empty() {
|
||||||
|
// return Err(ParserError::lexer_invalid_escaped_unicode_length(unicode).into());
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if let Ok(hex) = u32::from_str_radix(&unicode, 16) {
|
||||||
|
// if let Some(character) = std::char::from_u32(hex) {
|
||||||
|
// Ok((len, Char::Scalar(character)))
|
||||||
|
// } else if hex <= 0x10FFFF {
|
||||||
|
// Ok((len, Char::NonScalar(hex)))
|
||||||
|
// } else {
|
||||||
|
// Err(ParserError::lexer_invalid_character_exceeded_max_value(unicode).into())
|
||||||
|
// }
|
||||||
|
// } else {
|
||||||
|
// Err(ParserError::lexer_expected_valid_hex_char(unicode).into())
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
if input.next_if_eq(&'{').is_some() {
|
// // Eats the parts of the hex character after \x.
|
||||||
len += 1;
|
// fn eat_hex_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
|
||||||
} else if let Some(c) = input.next() {
|
// let mut hex = String::new();
|
||||||
return Err(ParserError::lexer_unopened_escaped_unicode_char(c).into());
|
// // Account for the chars '\' and 'x'.
|
||||||
} else {
|
// let mut len = 2;
|
||||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
//
|
||||||
}
|
// // First hex character.
|
||||||
|
// if let Some(c) = input.next_if(|c| c != &'\'') {
|
||||||
|
// len += 1;
|
||||||
|
// hex.push(c);
|
||||||
|
// } else if let Some(c) = input.next() {
|
||||||
|
// return Err(ParserError::lexer_expected_valid_hex_char(c).into());
|
||||||
|
// } else {
|
||||||
|
// return Err(ParserError::lexer_empty_input_tendril().into());
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // Second hex character.
|
||||||
|
// if let Some(c) = input.next_if(|c| c != &'\'') {
|
||||||
|
// len += 1;
|
||||||
|
// hex.push(c);
|
||||||
|
// } else if let Some(c) = input.next() {
|
||||||
|
// return Err(ParserError::lexer_expected_valid_hex_char(c).into());
|
||||||
|
// } else {
|
||||||
|
// return Err(ParserError::lexer_empty_input_tendril().into());
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if let Ok(ascii_number) = u8::from_str_radix(&hex, 16) {
|
||||||
|
// // According to RFC, we allow only values less than 128.
|
||||||
|
// if ascii_number > 127 {
|
||||||
|
// return Err(ParserError::lexer_expected_valid_hex_char(hex).into());
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// Ok((len, Char::Scalar(ascii_number as char)))
|
||||||
|
// } else {
|
||||||
|
// Err(ParserError::lexer_expected_valid_hex_char(hex).into())
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
while let Some(c) = input.next_if(|c| c != &'}') {
|
// fn eat_escaped_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
|
||||||
len += 1;
|
// match input.next() {
|
||||||
unicode.push(c);
|
// None => Err(ParserError::lexer_empty_input_tendril().into()),
|
||||||
}
|
// // Length of 2 to account the '\'.
|
||||||
|
// Some('0') => Ok((2, Char::Scalar(0 as char))),
|
||||||
|
// Some('t') => Ok((2, Char::Scalar(9 as char))),
|
||||||
|
// Some('n') => Ok((2, Char::Scalar(10 as char))),
|
||||||
|
// Some('r') => Ok((2, Char::Scalar(13 as char))),
|
||||||
|
// Some('\"') => Ok((2, Char::Scalar(34 as char))),
|
||||||
|
// Some('\'') => Ok((2, Char::Scalar(39 as char))),
|
||||||
|
// Some('\\') => Ok((2, Char::Scalar(92 as char))),
|
||||||
|
// Some('u') => Self::eat_unicode_char(input),
|
||||||
|
// Some('x') => Self::eat_hex_char(input),
|
||||||
|
// Some(c) => Err(ParserError::lexer_expected_valid_escaped_char(c).into()),
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
if input.next_if_eq(&'}').is_some() {
|
// /// Returns a `char` if a character can be eaten, otherwise returns [`None`].
|
||||||
len += 1;
|
// fn eat_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
|
||||||
} else {
|
// match input.next() {
|
||||||
return Err(ParserError::lexer_unclosed_escaped_unicode_char(unicode).into());
|
// None => Err(ParserError::lexer_empty_input_tendril().into()),
|
||||||
}
|
// Some('\\') => Self::eat_escaped_char(input),
|
||||||
|
// Some(c) => Ok((c.len_utf8(), Char::Scalar(c))),
|
||||||
// Max of 6 digits.
|
// }
|
||||||
// Minimum of 1 digit.
|
// }
|
||||||
if unicode.len() > 6 || unicode.is_empty() {
|
|
||||||
return Err(ParserError::lexer_invalid_escaped_unicode_length(unicode).into());
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Ok(hex) = u32::from_str_radix(&unicode, 16) {
|
|
||||||
if let Some(character) = std::char::from_u32(hex) {
|
|
||||||
Ok((len, Char::Scalar(character)))
|
|
||||||
} else if hex <= 0x10FFFF {
|
|
||||||
Ok((len, Char::NonScalar(hex)))
|
|
||||||
} else {
|
|
||||||
Err(ParserError::lexer_invalid_character_exceeded_max_value(unicode).into())
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Err(ParserError::lexer_expected_valid_hex_char(unicode).into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Eats the parts of the hex character after \x.
|
|
||||||
fn eat_hex_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
|
|
||||||
let mut hex = String::new();
|
|
||||||
// Account for the chars '\' and 'x'.
|
|
||||||
let mut len = 2;
|
|
||||||
|
|
||||||
// First hex character.
|
|
||||||
if let Some(c) = input.next_if(|c| c != &'\'') {
|
|
||||||
len += 1;
|
|
||||||
hex.push(c);
|
|
||||||
} else if let Some(c) = input.next() {
|
|
||||||
return Err(ParserError::lexer_expected_valid_hex_char(c).into());
|
|
||||||
} else {
|
|
||||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Second hex character.
|
|
||||||
if let Some(c) = input.next_if(|c| c != &'\'') {
|
|
||||||
len += 1;
|
|
||||||
hex.push(c);
|
|
||||||
} else if let Some(c) = input.next() {
|
|
||||||
return Err(ParserError::lexer_expected_valid_hex_char(c).into());
|
|
||||||
} else {
|
|
||||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Ok(ascii_number) = u8::from_str_radix(&hex, 16) {
|
|
||||||
// According to RFC, we allow only values less than 128.
|
|
||||||
if ascii_number > 127 {
|
|
||||||
return Err(ParserError::lexer_expected_valid_hex_char(hex).into());
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok((len, Char::Scalar(ascii_number as char)))
|
|
||||||
} else {
|
|
||||||
Err(ParserError::lexer_expected_valid_hex_char(hex).into())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn eat_escaped_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
|
|
||||||
match input.next() {
|
|
||||||
None => Err(ParserError::lexer_empty_input_tendril().into()),
|
|
||||||
// Length of 2 to account the '\'.
|
|
||||||
Some('0') => Ok((2, Char::Scalar(0 as char))),
|
|
||||||
Some('t') => Ok((2, Char::Scalar(9 as char))),
|
|
||||||
Some('n') => Ok((2, Char::Scalar(10 as char))),
|
|
||||||
Some('r') => Ok((2, Char::Scalar(13 as char))),
|
|
||||||
Some('\"') => Ok((2, Char::Scalar(34 as char))),
|
|
||||||
Some('\'') => Ok((2, Char::Scalar(39 as char))),
|
|
||||||
Some('\\') => Ok((2, Char::Scalar(92 as char))),
|
|
||||||
Some('u') => Self::eat_unicode_char(input),
|
|
||||||
Some('x') => Self::eat_hex_char(input),
|
|
||||||
Some(c) => Err(ParserError::lexer_expected_valid_escaped_char(c).into()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns a `char` if a character can be eaten, otherwise returns [`None`].
|
|
||||||
fn eat_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
|
|
||||||
match input.next() {
|
|
||||||
None => Err(ParserError::lexer_empty_input_tendril().into()),
|
|
||||||
Some('\\') => Self::eat_escaped_char(input),
|
|
||||||
Some(c) => Ok((c.len_utf8(), Char::Scalar(c))),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns a tuple: [(integer length, integer token)] if an integer can be eaten, otherwise returns [`None`].
|
/// Returns a tuple: [(integer length, integer token)] if an integer can be eaten, otherwise returns [`None`].
|
||||||
/// An integer can be eaten if its bytes are at the front of the given `input_tendril` string.
|
/// An integer can be eaten if its bytes are at the front of the given `input_tendril` string.
|
||||||
@ -183,27 +184,25 @@ impl Token {
|
|||||||
return Ok((1, Token::WhiteSpace));
|
return Ok((1, Token::WhiteSpace));
|
||||||
}
|
}
|
||||||
Some('"') => {
|
Some('"') => {
|
||||||
let mut string: Vec<leo_ast::Char> = Vec::new();
|
let mut string = String::from("\"");
|
||||||
input.next();
|
|
||||||
|
|
||||||
let mut len = 0;
|
let mut ended = false;
|
||||||
while let Some(c) = input.peek() {
|
while let Some(c) = input.next() {
|
||||||
if is_bidi_override(*c) {
|
if is_bidi_override(c) {
|
||||||
return Err(ParserError::lexer_bidi_override().into());
|
return Err(ParserError::lexer_bidi_override().into());
|
||||||
}
|
}
|
||||||
if c == &'"' {
|
string.push(c);
|
||||||
|
if c == '"' {
|
||||||
|
ended = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let (char_len, character) = Self::eat_char(&mut input)?;
|
|
||||||
len += char_len;
|
|
||||||
string.push(character.into());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if input.next_if_eq(&'"').is_some() {
|
if !ended {
|
||||||
return Ok((len + 2, Token::StringLit(string)));
|
return Err(ParserError::lexer_string_not_closed(string).into());
|
||||||
}
|
}
|
||||||
|
|
||||||
return Err(ParserError::lexer_string_not_closed(leo_ast::Chars(string)).into());
|
return Ok((string.len(), Token::StaticString(string)));
|
||||||
}
|
}
|
||||||
Some(x) if x.is_ascii_digit() => {
|
Some(x) if x.is_ascii_digit() => {
|
||||||
return Self::eat_integer(&mut input);
|
return Self::eat_integer(&mut input);
|
||||||
|
@ -50,7 +50,7 @@ pub enum Token {
|
|||||||
// Literals
|
// Literals
|
||||||
CommentLine(String),
|
CommentLine(String),
|
||||||
CommentBlock(String),
|
CommentBlock(String),
|
||||||
StringLit(Vec<leo_ast::Char>),
|
StaticString(String),
|
||||||
Ident(Symbol),
|
Ident(Symbol),
|
||||||
Int(String),
|
Int(String),
|
||||||
True,
|
True,
|
||||||
@ -96,6 +96,7 @@ pub enum Token {
|
|||||||
Field,
|
Field,
|
||||||
Group,
|
Group,
|
||||||
Scalar,
|
Scalar,
|
||||||
|
String,
|
||||||
I8,
|
I8,
|
||||||
I16,
|
I16,
|
||||||
I32,
|
I32,
|
||||||
@ -150,6 +151,7 @@ pub const KEYWORD_TOKENS: &[Token] = &[
|
|||||||
Token::Public,
|
Token::Public,
|
||||||
Token::Return,
|
Token::Return,
|
||||||
Token::Scalar,
|
Token::Scalar,
|
||||||
|
Token::String,
|
||||||
Token::True,
|
Token::True,
|
||||||
Token::U8,
|
Token::U8,
|
||||||
Token::U16,
|
Token::U16,
|
||||||
@ -189,6 +191,7 @@ impl Token {
|
|||||||
Token::Public => sym::Public,
|
Token::Public => sym::Public,
|
||||||
Token::Return => sym::Return,
|
Token::Return => sym::Return,
|
||||||
Token::Scalar => sym::scalar,
|
Token::Scalar => sym::scalar,
|
||||||
|
Token::String => sym::string,
|
||||||
Token::True => sym::True,
|
Token::True => sym::True,
|
||||||
Token::U8 => sym::u8,
|
Token::U8 => sym::u8,
|
||||||
Token::U16 => sym::u16,
|
Token::U16 => sym::u16,
|
||||||
@ -206,13 +209,7 @@ impl fmt::Display for Token {
|
|||||||
match self {
|
match self {
|
||||||
CommentLine(s) => write!(f, "{}", s),
|
CommentLine(s) => write!(f, "{}", s),
|
||||||
CommentBlock(s) => write!(f, "{}", s),
|
CommentBlock(s) => write!(f, "{}", s),
|
||||||
StringLit(string) => {
|
StaticString(s) => write!(f, "{}", s),
|
||||||
write!(f, "\"")?;
|
|
||||||
for character in string.iter() {
|
|
||||||
write!(f, "{}", character)?;
|
|
||||||
}
|
|
||||||
write!(f, "\"")
|
|
||||||
}
|
|
||||||
Ident(s) => write!(f, "{}", s),
|
Ident(s) => write!(f, "{}", s),
|
||||||
Int(s) => write!(f, "{}", s),
|
Int(s) => write!(f, "{}", s),
|
||||||
True => write!(f, "true"),
|
True => write!(f, "true"),
|
||||||
@ -255,6 +252,7 @@ impl fmt::Display for Token {
|
|||||||
Field => write!(f, "field"),
|
Field => write!(f, "field"),
|
||||||
Group => write!(f, "group"),
|
Group => write!(f, "group"),
|
||||||
Scalar => write!(f, "scalar"),
|
Scalar => write!(f, "scalar"),
|
||||||
|
String => write!(f, "string"),
|
||||||
I8 => write!(f, "i8"),
|
I8 => write!(f, "i8"),
|
||||||
I16 => write!(f, "i16"),
|
I16 => write!(f, "i16"),
|
||||||
I32 => write!(f, "i32"),
|
I32 => write!(f, "i32"),
|
||||||
|
@ -137,6 +137,7 @@ symbols! {
|
|||||||
scalar,
|
scalar,
|
||||||
Star: "*",
|
Star: "*",
|
||||||
std,
|
std,
|
||||||
|
string,
|
||||||
Struct: "struct",
|
Struct: "struct",
|
||||||
test,
|
test,
|
||||||
True: "true",
|
True: "true",
|
||||||
|
Loading…
Reference in New Issue
Block a user