realized raw strings don't play nicely for testing strings

This commit is contained in:
gluax 2022-03-16 12:27:11 -07:00
parent 5034294d09
commit 9391a31569
5 changed files with 109 additions and 94 deletions

View File

@ -48,7 +48,7 @@ impl fmt::Display for Char {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self { match self {
Self::Scalar(c) => write!(f, "{}", c), Self::Scalar(c) => write!(f, "{}", c),
Self::NonScalar(c) => write!(f, "{}", c), Self::NonScalar(c) => write!(f, "{:X}", c),
} }
} }
} }

View File

@ -259,6 +259,7 @@ impl Runner for TestRunner {
} }
} }
#[test]
pub fn parser_tests() { pub fn parser_tests() {
leo_test_framework::run_tests(&TestRunner, "parser"); leo_test_framework::run_tests(&TestRunner, "parser");
} }

View File

@ -19,7 +19,6 @@ use leo_errors::{ParserError, Result};
use leo_span::{Span, Symbol}; use leo_span::{Span, Symbol};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tendril::StrTendril;
use std::{fmt, iter::Peekable}; use std::{fmt, iter::Peekable};
@ -42,91 +41,109 @@ fn eat_identifier(input: &mut Peekable<impl Iterator<Item = char>>) -> Option<St
} }
impl Token { impl Token {
/// fn eat_unicode_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
/// Returns a `char` if a character can be eaten, otherwise returns [`None`]. let mut unicode = String::new();
/// // Account for the chars '\' and 'u'.
fn _eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Result<Char> { let mut len = 2;
if input_tendril.is_empty() {
if input.next_if_eq(&'{').is_some() {
len += 1;
} else if let Some(c) = input.next() {
return Err(ParserError::lexer_unopened_escaped_unicode_char(c).into());
} else {
return Err(ParserError::lexer_empty_input_tendril().into()); return Err(ParserError::lexer_empty_input_tendril().into());
} }
if escaped { while let Some(c) = input.next_if(|c| c != &'}') {
let string = input_tendril.to_string(); len += 1;
let escaped = &string[1..input_tendril.len()]; unicode.push(c);
if escaped.len() != 1 {
return Err(ParserError::lexer_escaped_char_incorrect_length(escaped).into());
} }
if let Some(character) = escaped.chars().next() { if input.next_if_eq(&'}').is_some() {
return match character { len += 1;
'0' => Ok(Char::Scalar(0 as char)),
't' => Ok(Char::Scalar(9 as char)),
'n' => Ok(Char::Scalar(10 as char)),
'r' => Ok(Char::Scalar(13 as char)),
'\"' => Ok(Char::Scalar(34 as char)),
'\'' => Ok(Char::Scalar(39 as char)),
'\\' => Ok(Char::Scalar(92 as char)),
_ => return Err(ParserError::lexer_expected_valid_escaped_char(character).into()),
};
} else { } else {
return Err(ParserError::lexer_unclosed_escaped_char().into()); return Err(ParserError::lexer_unclosed_escaped_unicode_char(unicode).into());
}
} }
if hex { // Max of 6 digits.
let string = input_tendril.to_string(); // Minimum of 1 digit.
let hex_string = &string[2..string.len()]; if unicode.len() > 6 || unicode.is_empty() {
return Err(ParserError::lexer_invalid_escaped_unicode_length(unicode).into());
if hex_string.len() != 2 {
return Err(ParserError::lexer_escaped_hex_incorrect_length(hex_string).into());
} }
if let Ok(ascii_number) = u8::from_str_radix(hex_string, 16) { if let Ok(hex) = u32::from_str_radix(&unicode, 16) {
// According to RFC, we allow only values less than 128.
if ascii_number > 127 {
return Err(ParserError::lexer_expected_valid_hex_char(ascii_number).into());
}
return Ok(Char::Scalar(ascii_number as char));
}
}
if unicode {
let string = input_tendril.to_string();
if string.find('{').is_none() {
return Err(ParserError::lexer_unopened_escaped_unicode_char(string).into());
} else if string.find('}').is_none() {
return Err(ParserError::lexer_unclosed_escaped_unicode_char(string).into());
}
let unicode_number = &string[3..string.len() - 1];
let len = unicode_number.len();
if !(1..=6).contains(&len) {
return Err(ParserError::lexer_invalid_escaped_unicode_length(unicode_number).into());
}
if let Ok(hex) = u32::from_str_radix(unicode_number, 16) {
if let Some(character) = std::char::from_u32(hex) { if let Some(character) = std::char::from_u32(hex) {
// scalar // scalar
return Ok(Char::Scalar(character)); Ok((len, Char::Scalar(character)))
} else if hex <= 0x10FFFF { } else if hex <= 0x10FFFF {
return Ok(Char::NonScalar(hex)); Ok((len, Char::NonScalar(hex)))
} else { } else {
return Err(ParserError::lexer_invalid_character_exceeded_max_value(unicode_number).into()); Err(ParserError::lexer_invalid_character_exceeded_max_value(unicode).into())
} }
} else {
Err(ParserError::lexer_expected_valid_hex_char(unicode).into())
} }
} }
if input_tendril.to_string().chars().count() != 1 { fn eat_hex_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
// If char doesn't close. let mut hex = String::new();
return Err(ParserError::lexer_char_not_closed(&input_tendril[0..]).into()); // Account for the chars '\' and 'x'.
} else if let Some(character) = input_tendril.to_string().chars().next() { let mut len = 2;
// If its a simple char.
return Ok(Char::Scalar(character)); // At least one hex character necessary.
if let Some(c) = input.next_if(|c| c != &'\'') {
len += 1;
hex.push(c);
} else if let Some(c) = input.next() {
return Err(ParserError::lexer_expected_valid_hex_char(c).into());
} else {
return Err(ParserError::lexer_empty_input_tendril().into());
} }
Err(ParserError::lexer_invalid_char(input_tendril.to_string()).into()) // Second hex character optional.
if let Some(c) = input.next_if(|c| c != &'\'') {
len += 1;
hex.push(c);
}
if let Ok(ascii_number) = u8::from_str_radix(&hex, 16) {
// According to RFC, we allow only values less than 128.
if ascii_number > 127 {
return Err(ParserError::lexer_expected_valid_hex_char(hex).into());
}
Ok((len, Char::Scalar(ascii_number as char)))
} else {
Err(ParserError::lexer_expected_valid_hex_char(hex).into())
}
}
fn eat_escaped_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
match input.next() {
None => Err(ParserError::lexer_empty_input_tendril().into()),
// Length of 2 to account the '\'.
Some('0') => Ok((2, Char::Scalar(0 as char))),
Some('t') => Ok((2, Char::Scalar(9 as char))),
Some('n') => Ok((2, Char::Scalar(10 as char))),
Some('r') => Ok((2, Char::Scalar(13 as char))),
Some('\"') => Ok((2, Char::Scalar(34 as char))),
Some('\'') => Ok((2, Char::Scalar(39 as char))),
Some('\\') => Ok((2, Char::Scalar(92 as char))),
Some('u') => Self::eat_unicode_char(input),
Some('x') => Self::eat_hex_char(input),
Some(c) => Err(ParserError::lexer_expected_valid_escaped_char(c).into()),
}
}
///
/// Returns a `char` if a character can be eaten, otherwise returns [`None`].
///
fn eat_char(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Char)> {
match input.next() {
None => Err(ParserError::lexer_empty_input_tendril().into()),
Some('\\') => Self::eat_escaped_char(input),
Some(c) => Ok((c.len_utf8(), Char::Scalar(c))),
}
} }
/// ///
@ -186,16 +203,21 @@ impl Token {
return Ok((1, Token::WhiteSpace)); return Ok((1, Token::WhiteSpace));
} }
Some('"') => { Some('"') => {
let mut string = Vec::new(); let mut string: Vec<leo_ast::Char> = Vec::new();
input.next(); input.next();
while let Some(c) = input.next_if(|c| c != &'"') { let mut len = 0;
let character = leo_ast::Char::Scalar(c); while let Some(c) = input.peek() {
string.push(character); if c == &'"' {
break;
}
let (char_len, character) = Self::eat_char(&mut input)?;
len += char_len;
string.push(character.into());
} }
if input.next_if_eq(&'"').is_some() { if input.next_if_eq(&'"').is_some() {
return Ok((string.len() + 2, Token::StringLit(string))); return Ok((len + 2, Token::StringLit(string)));
} }
return Err(ParserError::lexer_string_not_closed(string).into()); return Err(ParserError::lexer_string_not_closed(string).into());
@ -203,20 +225,17 @@ impl Token {
Some('\'') => { Some('\'') => {
input.next(); input.next();
if let Some(c) = input.next() { let (len, character) = Self::eat_char(&mut input)?;
dbg!(&c);
if input.next_if_eq(&'\'').is_some() { if input.next_if_eq(&'\'').is_some() {
input.next(); input.next();
return Ok((c.len_utf8() + 2, Token::CharLit(Char::Scalar(c)))); return Ok((len + 2, Token::CharLit(character)));
} else if let Some(c) = input.next() { } else if let Some(c) = input.next() {
return Err(ParserError::lexer_string_not_closed(c).into()); return Err(ParserError::lexer_string_not_closed(c).into());
} else { } else {
return Err(ParserError::lexer_empty_input_tendril().into()); return Err(ParserError::lexer_empty_input_tendril().into());
} }
} }
return Err(ParserError::lexer_empty_input_tendril().into());
}
Some(x) if x.is_ascii_digit() => { Some(x) if x.is_ascii_digit() => {
return Self::eat_integer(&mut input); return Self::eat_integer(&mut input);
} }

View File

@ -133,10 +133,6 @@ mod tests {
r#" r#"
'a' 'a'
'😭' '😭'
'\u{10001F}'
'\x7f'
'\x00'
'\x37'
"test" "test"
"test{}test" "test{}test"
"test{}" "test{}"
@ -235,13 +231,12 @@ mod tests {
assert_eq!( assert_eq!(
output, output,
r#"'a' '😭' '\u{10001F}' "test" "test{}test" "test{}" "{}test" "test{" "test}" "test{test" "test}test" "te{{}}" aleo1qnr4dkkvkgfqph0vzc3y6z2eu975wnpz2925ntjccd5cfqxtyu8sta57j8 test_ident 12345 address as bool circuit const else false field for function group i128 i64 i32 i16 i8 if import in input let mut & return static string test true u128 u64 u32 u16 u8 self Self console ! != && ( ) * ** **= *= + += , - -= -> _ . .. ... / /= : :: ; < <= = == > >= @ [ ] { { } } || ? // test r#"'a' '😭' "test" "test{}test" "test{}" "{}test" "test{" "test}" "test{test" "test}test" "te{{}}" aleo1qnr4dkkvkgfqph0vzc3y6z2eu975wnpz2925ntjccd5cfqxtyu8sta57j8 test_ident 12345 address as bool circuit const else false field for function group i128 i64 i32 i16 i8 if import in input let mut & return static string test true u128 u64 u32 u16 u8 self Self console ! != && ( ) * ** **= *= + += , - -= -> _ . .. ... / /= : :: ; < <= = == > >= @ [ ] { { } } || ? // test
/* test */ // "# /* test */ // "#
); );
}); });
} }
#[test]
fn test_spans() { fn test_spans() {
create_session_if_not_set_then(|_| { create_session_if_not_set_then(|_| {
let raw = r#" let raw = r#"

View File

@ -39,7 +39,7 @@ impl fmt::Display for Char {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self { match self {
Self::Scalar(c) => write!(f, "{}", c), Self::Scalar(c) => write!(f, "{}", c),
Self::NonScalar(c) => write!(f, "{}", c), Self::NonScalar(c) => write!(f, "{:X}", c),
} }
} }
} }