added character parsing

This commit is contained in:
damirka 2021-05-19 23:36:53 +03:00
parent 32bd282030
commit 39b61a0669
3 changed files with 138 additions and 11 deletions

View File

@ -690,7 +690,7 @@ impl ParserContext {
Token::False => Expression::Value(ValueExpression::Boolean("false".into(), span)),
Token::AddressLit(value) => Expression::Value(ValueExpression::Address(value, span)),
Token::CharLit(value) => Expression::Value(ValueExpression::Char(value, span)),
Token::QuotedString(value) => Expression::Value(ValueExpression::String(value, span)),
Token::StringLiteral(value) => Expression::Value(ValueExpression::String(value, span)),
Token::LeftParen => self.parse_tuple_expression(&span)?,
Token::LeftSquare => self.parse_array_expression(&span)?,
Token::Ident(name) => {

View File

@ -61,6 +61,140 @@ fn eat_identifier(input_tendril: &StrTendril) -> Option<StrTendril> {
}
impl Token {
///
/// Eats symbol. Used for string literals and char literals
///
fn eat_string(input_tendril: &StrTendril) -> (usize, Option<Token>) {
if input_tendril.is_empty() {
return (0, None);
}
let input = input_tendril[..].as_bytes();
// let mut prev = b'`';
// let mut escaped = false;
let mut collect: Vec<char> = Vec::new();
let mut iter = input.iter().enumerate().skip(1);
while let Some((i, symbol)) = iter.next() {
let symbol = *symbol;
if symbol == b'`' {
return (i + 1, Some(Token::StringLiteral(collect)));
}
// Process escapes.
if symbol == b'\\' {
if let Some((_, escaped)) = iter.next() {
match escaped {
b'0' => collect.push(0 as char),
b't' => collect.push(9 as char),
b'n' => collect.push(10 as char),
b'r' => collect.push(13 as char),
b'\"' => collect.push(34 as char),
b'\'' => collect.push(39 as char),
b'\\' => collect.push(92 as char),
b'`' => collect.push(b'`' as char), // TODO: REMOVE LATER
// \x0F
b'x' => {
if let Some((_, first_hex)) = iter.next() {
// peak first symbol
if let Some((_, second_hex)) = iter.next() {
// peak second symbol
if let Ok(string) = std::str::from_utf8(&[*first_hex, *second_hex]) {
if let Ok(number) = u8::from_str_radix(&string, 16) {
if number <= 127 {
collect.push(number as char);
continue;
}
}
}
}
}
return (0, None);
}
// \u{1-6 hex digits}
b'u' => {
if let Some((start, open_brace)) = iter.next() {
if *open_brace == b'{' {
let mut unicode: Vec<u8> = Vec::new();
while let Some((end, symbol)) = iter.next() {
if end > start + 7 {
return (0, None);
}
match *symbol {
0..=9 | b'a'..=b'f' | b'A'..=b'F' => unicode.push(*symbol),
b'}' => {
if let Ok(string) = std::str::from_utf8(&unicode[..]) {
if let Some(character) = string.chars().next() {
collect.push(character);
break;
}
}
return (0, None);
}
_ => {
return (0, None);
}
}
}
continue;
}
}
return (0, None);
}
_ => {
return (0, None);
}
}
continue;
} else {
return (0, None);
}
}
collect.push(symbol as char);
// Backslash is always escape - changes logic.
// if *symbol == b'\\' && !escaped {
// escaped = true;
// continue;
// }
// // If last double quote is not escaped - end token.
// if *symbol == b'`' && !escaped {
// return (i + 1, Some(Token::StringLiteral(collect)));
// }
// // If we need to escape, there are special symbols that get translated.
// if escaped {
// match *symbol {
// b'0' => collect.push(0 as char),
// b't' => collect.push(9 as char),
// b'n' => collect.push(10 as char),
// b'r' => collect.push(13 as char),
// b'\"' => collect.push(34 as char),
// b'\'' => collect.push(39 as char),
// b'\\' => collect.push(92 as char),
// _ => collect.push(*symbol as char)
// };
// escaped = false;
// continue;
// }
}
(0, None)
}
///
/// Returns a new `StrTendril` string if an character can be eaten, otherwise returns [`None`].
///
@ -236,14 +370,7 @@ impl Token {
match input[0] {
x if x.is_ascii_whitespace() => return (1, None),
b'`' => {
let mut collect: Vec<char> = Vec::new();
for (i, char_bytes) in input.iter().enumerate().skip(1) {
if *char_bytes == b'`' {
return (i + 1, Some(Token::QuotedString(collect)));
}
collect.push(std::char::from_u32(*char_bytes as u32).unwrap());
}
return Self::eat_string(&input_tendril);
}
b'"' => {
let mut i = 1;

View File

@ -42,7 +42,7 @@ pub enum Token {
CommentLine(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
CommentBlock(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
FormatString(Vec<FormatStringPart>),
QuotedString(Vec<char>),
StringLiteral(Vec<char>),
Ident(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
Int(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
True,
@ -216,7 +216,7 @@ impl fmt::Display for Token {
}
write!(f, "\"")
}
QuotedString(content) => {
StringLiteral(content) => {
write!(f, "\"")?;
for character in content {
write!(f, "{}", character)?;