mirror of
https://github.com/AleoHQ/leo.git
synced 2024-12-24 18:12:28 +03:00
realized raw strings don't play nicely for testing strings
This commit is contained in:
parent
5034294d09
commit
9391a31569
@ -48,7 +48,7 @@ impl fmt::Display for Char {
|
|||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Self::Scalar(c) => write!(f, "{}", c),
|
Self::Scalar(c) => write!(f, "{}", c),
|
||||||
Self::NonScalar(c) => write!(f, "{}", c),
|
Self::NonScalar(c) => write!(f, "{:X}", c),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -259,6 +259,7 @@ impl Runner for TestRunner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
pub fn parser_tests() {
|
pub fn parser_tests() {
|
||||||
leo_test_framework::run_tests(&TestRunner, "parser");
|
leo_test_framework::run_tests(&TestRunner, "parser");
|
||||||
}
|
}
|
||||||
|
@ -19,7 +19,6 @@ use leo_errors::{ParserError, Result};
|
|||||||
use leo_span::{Span, Symbol};
|
use leo_span::{Span, Symbol};
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use tendril::StrTendril;
|
|
||||||
|
|
||||||
use std::{fmt, iter::Peekable};
|
use std::{fmt, iter::Peekable};
|
||||||
|
|
||||||
@ -42,91 +41,109 @@ fn eat_identifier(input: &mut Peekable<impl Iterator<Item = char>>) -> Option<St
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Token {
|
impl Token {
|
||||||
///
|
fn eat_unicode_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
|
||||||
/// Returns a `char` if a character can be eaten, otherwise returns [`None`].
|
let mut unicode = String::new();
|
||||||
///
|
// Account for the chars '\' and 'u'.
|
||||||
fn _eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Result<Char> {
|
let mut len = 2;
|
||||||
if input_tendril.is_empty() {
|
|
||||||
|
if input.next_if_eq(&'{').is_some() {
|
||||||
|
len += 1;
|
||||||
|
} else if let Some(c) = input.next() {
|
||||||
|
return Err(ParserError::lexer_unopened_escaped_unicode_char(c).into());
|
||||||
|
} else {
|
||||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
return Err(ParserError::lexer_empty_input_tendril().into());
|
||||||
}
|
}
|
||||||
|
|
||||||
if escaped {
|
while let Some(c) = input.next_if(|c| c != &'}') {
|
||||||
let string = input_tendril.to_string();
|
len += 1;
|
||||||
let escaped = &string[1..input_tendril.len()];
|
unicode.push(c);
|
||||||
|
|
||||||
if escaped.len() != 1 {
|
|
||||||
return Err(ParserError::lexer_escaped_char_incorrect_length(escaped).into());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(character) = escaped.chars().next() {
|
if input.next_if_eq(&'}').is_some() {
|
||||||
return match character {
|
len += 1;
|
||||||
'0' => Ok(Char::Scalar(0 as char)),
|
|
||||||
't' => Ok(Char::Scalar(9 as char)),
|
|
||||||
'n' => Ok(Char::Scalar(10 as char)),
|
|
||||||
'r' => Ok(Char::Scalar(13 as char)),
|
|
||||||
'\"' => Ok(Char::Scalar(34 as char)),
|
|
||||||
'\'' => Ok(Char::Scalar(39 as char)),
|
|
||||||
'\\' => Ok(Char::Scalar(92 as char)),
|
|
||||||
_ => return Err(ParserError::lexer_expected_valid_escaped_char(character).into()),
|
|
||||||
};
|
|
||||||
} else {
|
} else {
|
||||||
return Err(ParserError::lexer_unclosed_escaped_char().into());
|
return Err(ParserError::lexer_unclosed_escaped_unicode_char(unicode).into());
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if hex {
|
// Max of 6 digits.
|
||||||
let string = input_tendril.to_string();
|
// Minimum of 1 digit.
|
||||||
let hex_string = &string[2..string.len()];
|
if unicode.len() > 6 || unicode.is_empty() {
|
||||||
|
return Err(ParserError::lexer_invalid_escaped_unicode_length(unicode).into());
|
||||||
if hex_string.len() != 2 {
|
|
||||||
return Err(ParserError::lexer_escaped_hex_incorrect_length(hex_string).into());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Ok(ascii_number) = u8::from_str_radix(hex_string, 16) {
|
if let Ok(hex) = u32::from_str_radix(&unicode, 16) {
|
||||||
// According to RFC, we allow only values less than 128.
|
|
||||||
if ascii_number > 127 {
|
|
||||||
return Err(ParserError::lexer_expected_valid_hex_char(ascii_number).into());
|
|
||||||
}
|
|
||||||
|
|
||||||
return Ok(Char::Scalar(ascii_number as char));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if unicode {
|
|
||||||
let string = input_tendril.to_string();
|
|
||||||
if string.find('{').is_none() {
|
|
||||||
return Err(ParserError::lexer_unopened_escaped_unicode_char(string).into());
|
|
||||||
} else if string.find('}').is_none() {
|
|
||||||
return Err(ParserError::lexer_unclosed_escaped_unicode_char(string).into());
|
|
||||||
}
|
|
||||||
|
|
||||||
let unicode_number = &string[3..string.len() - 1];
|
|
||||||
let len = unicode_number.len();
|
|
||||||
if !(1..=6).contains(&len) {
|
|
||||||
return Err(ParserError::lexer_invalid_escaped_unicode_length(unicode_number).into());
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Ok(hex) = u32::from_str_radix(unicode_number, 16) {
|
|
||||||
if let Some(character) = std::char::from_u32(hex) {
|
if let Some(character) = std::char::from_u32(hex) {
|
||||||
// scalar
|
// scalar
|
||||||
return Ok(Char::Scalar(character));
|
Ok((len, Char::Scalar(character)))
|
||||||
} else if hex <= 0x10FFFF {
|
} else if hex <= 0x10FFFF {
|
||||||
return Ok(Char::NonScalar(hex));
|
Ok((len, Char::NonScalar(hex)))
|
||||||
} else {
|
} else {
|
||||||
return Err(ParserError::lexer_invalid_character_exceeded_max_value(unicode_number).into());
|
Err(ParserError::lexer_invalid_character_exceeded_max_value(unicode).into())
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
Err(ParserError::lexer_expected_valid_hex_char(unicode).into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if input_tendril.to_string().chars().count() != 1 {
|
fn eat_hex_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
|
||||||
// If char doesn't close.
|
let mut hex = String::new();
|
||||||
return Err(ParserError::lexer_char_not_closed(&input_tendril[0..]).into());
|
// Account for the chars '\' and 'x'.
|
||||||
} else if let Some(character) = input_tendril.to_string().chars().next() {
|
let mut len = 2;
|
||||||
// If its a simple char.
|
|
||||||
return Ok(Char::Scalar(character));
|
// At least one hex character necessary.
|
||||||
|
if let Some(c) = input.next_if(|c| c != &'\'') {
|
||||||
|
len += 1;
|
||||||
|
hex.push(c);
|
||||||
|
} else if let Some(c) = input.next() {
|
||||||
|
return Err(ParserError::lexer_expected_valid_hex_char(c).into());
|
||||||
|
} else {
|
||||||
|
return Err(ParserError::lexer_empty_input_tendril().into());
|
||||||
}
|
}
|
||||||
|
|
||||||
Err(ParserError::lexer_invalid_char(input_tendril.to_string()).into())
|
// Second hex character optional.
|
||||||
|
if let Some(c) = input.next_if(|c| c != &'\'') {
|
||||||
|
len += 1;
|
||||||
|
hex.push(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Ok(ascii_number) = u8::from_str_radix(&hex, 16) {
|
||||||
|
// According to RFC, we allow only values less than 128.
|
||||||
|
if ascii_number > 127 {
|
||||||
|
return Err(ParserError::lexer_expected_valid_hex_char(hex).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((len, Char::Scalar(ascii_number as char)))
|
||||||
|
} else {
|
||||||
|
Err(ParserError::lexer_expected_valid_hex_char(hex).into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn eat_escaped_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
|
||||||
|
match input.next() {
|
||||||
|
None => Err(ParserError::lexer_empty_input_tendril().into()),
|
||||||
|
// Length of 2 to account the '\'.
|
||||||
|
Some('0') => Ok((2, Char::Scalar(0 as char))),
|
||||||
|
Some('t') => Ok((2, Char::Scalar(9 as char))),
|
||||||
|
Some('n') => Ok((2, Char::Scalar(10 as char))),
|
||||||
|
Some('r') => Ok((2, Char::Scalar(13 as char))),
|
||||||
|
Some('\"') => Ok((2, Char::Scalar(34 as char))),
|
||||||
|
Some('\'') => Ok((2, Char::Scalar(39 as char))),
|
||||||
|
Some('\\') => Ok((2, Char::Scalar(92 as char))),
|
||||||
|
Some('u') => Self::eat_unicode_char(input),
|
||||||
|
Some('x') => Self::eat_hex_char(input),
|
||||||
|
Some(c) => Err(ParserError::lexer_expected_valid_escaped_char(c).into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Returns a `char` if a character can be eaten, otherwise returns [`None`].
|
||||||
|
///
|
||||||
|
fn eat_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
|
||||||
|
match input.next() {
|
||||||
|
None => Err(ParserError::lexer_empty_input_tendril().into()),
|
||||||
|
Some('\\') => Self::eat_escaped_char(input),
|
||||||
|
Some(c) => Ok((c.len_utf8(), Char::Scalar(c))),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
@ -186,16 +203,21 @@ impl Token {
|
|||||||
return Ok((1, Token::WhiteSpace));
|
return Ok((1, Token::WhiteSpace));
|
||||||
}
|
}
|
||||||
Some('"') => {
|
Some('"') => {
|
||||||
let mut string = Vec::new();
|
let mut string: Vec<leo_ast::Char> = Vec::new();
|
||||||
input.next();
|
input.next();
|
||||||
|
|
||||||
while let Some(c) = input.next_if(|c| c != &'"') {
|
let mut len = 0;
|
||||||
let character = leo_ast::Char::Scalar(c);
|
while let Some(c) = input.peek() {
|
||||||
string.push(character);
|
if c == &'"' {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let (char_len, character) = Self::eat_char(&mut input)?;
|
||||||
|
len += char_len;
|
||||||
|
string.push(character.into());
|
||||||
}
|
}
|
||||||
|
|
||||||
if input.next_if_eq(&'"').is_some() {
|
if input.next_if_eq(&'"').is_some() {
|
||||||
return Ok((string.len() + 2, Token::StringLit(string)));
|
return Ok((len + 2, Token::StringLit(string)));
|
||||||
}
|
}
|
||||||
|
|
||||||
return Err(ParserError::lexer_string_not_closed(string).into());
|
return Err(ParserError::lexer_string_not_closed(string).into());
|
||||||
@ -203,20 +225,17 @@ impl Token {
|
|||||||
Some('\'') => {
|
Some('\'') => {
|
||||||
input.next();
|
input.next();
|
||||||
|
|
||||||
if let Some(c) = input.next() {
|
let (len, character) = Self::eat_char(&mut input)?;
|
||||||
dbg!(&c);
|
|
||||||
if input.next_if_eq(&'\'').is_some() {
|
if input.next_if_eq(&'\'').is_some() {
|
||||||
input.next();
|
input.next();
|
||||||
return Ok((c.len_utf8() + 2, Token::CharLit(Char::Scalar(c))));
|
return Ok((len + 2, Token::CharLit(character)));
|
||||||
} else if let Some(c) = input.next() {
|
} else if let Some(c) = input.next() {
|
||||||
return Err(ParserError::lexer_string_not_closed(c).into());
|
return Err(ParserError::lexer_string_not_closed(c).into());
|
||||||
} else {
|
} else {
|
||||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
return Err(ParserError::lexer_empty_input_tendril().into());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
|
||||||
}
|
|
||||||
Some(x) if x.is_ascii_digit() => {
|
Some(x) if x.is_ascii_digit() => {
|
||||||
return Self::eat_integer(&mut input);
|
return Self::eat_integer(&mut input);
|
||||||
}
|
}
|
||||||
|
@ -133,10 +133,6 @@ mod tests {
|
|||||||
r#"
|
r#"
|
||||||
'a'
|
'a'
|
||||||
'😭'
|
'😭'
|
||||||
'\u{10001F}'
|
|
||||||
'\x7f'
|
|
||||||
'\x00'
|
|
||||||
'\x37'
|
|
||||||
"test"
|
"test"
|
||||||
"test{}test"
|
"test{}test"
|
||||||
"test{}"
|
"test{}"
|
||||||
@ -235,13 +231,12 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
output,
|
output,
|
||||||
r#"'a' '😭' '\u{10001F}' "test" "test{}test" "test{}" "{}test" "test{" "test}" "test{test" "test}test" "te{{}}" aleo1qnr4dkkvkgfqph0vzc3y6z2eu975wnpz2925ntjccd5cfqxtyu8sta57j8 test_ident 12345 address as bool circuit const else false field for function group i128 i64 i32 i16 i8 if import in input let mut & return static string test true u128 u64 u32 u16 u8 self Self console ! != && ( ) * ** **= *= + += , - -= -> _ . .. ... / /= : :: ; < <= = == > >= @ [ ] { { } } || ? // test
|
r#"'a' '😭' "test" "test{}test" "test{}" "{}test" "test{" "test}" "test{test" "test}test" "te{{}}" aleo1qnr4dkkvkgfqph0vzc3y6z2eu975wnpz2925ntjccd5cfqxtyu8sta57j8 test_ident 12345 address as bool circuit const else false field for function group i128 i64 i32 i16 i8 if import in input let mut & return static string test true u128 u64 u32 u16 u8 self Self console ! != && ( ) * ** **= *= + += , - -= -> _ . .. ... / /= : :: ; < <= = == > >= @ [ ] { { } } || ? // test
|
||||||
/* test */ // "#
|
/* test */ // "#
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_spans() {
|
fn test_spans() {
|
||||||
create_session_if_not_set_then(|_| {
|
create_session_if_not_set_then(|_| {
|
||||||
let raw = r#"
|
let raw = r#"
|
||||||
|
@ -39,7 +39,7 @@ impl fmt::Display for Char {
|
|||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Self::Scalar(c) => write!(f, "{}", c),
|
Self::Scalar(c) => write!(f, "{}", c),
|
||||||
Self::NonScalar(c) => write!(f, "{}", c),
|
Self::NonScalar(c) => write!(f, "{:X}", c),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user