string parsing leverages eat_char

This commit is contained in:
gluax 2021-05-22 15:53:30 -04:00
parent 329b330c15
commit efc83205f2
2 changed files with 98 additions and 20 deletions

View File

@ -148,7 +148,7 @@ char_types = {
} }
// Declared in values/char_value.rs // Declared in values/char_value.rs
value_char = { "\'" ~ char_types ~ "\'" } value_char = ${ "\'" ~ char_types ~ "\'" }
// Declared in values/integer_value.rs // Declared in values/integer_value.rs
value_integer = { value_integer_signed | value_integer_unsigned} value_integer = { value_integer_signed | value_integer_unsigned}

View File

@ -62,10 +62,12 @@ fn eat_identifier(input_tendril: &StrTendril) -> Option<StrTendril> {
impl Token { impl Token {
/// ///
/// Returns a new `Token::CharLit` if an character can be eaten, otherwise returns [`None`]. /// Returns a `char` if an character can be eaten, otherwise returns [`None`].
/// ///
fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Option<Token> { fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Option<char> {
println!("it {} e {} h {} u {}", input_tendril, escaped, hex, unicode);
if input_tendril.is_empty() { if input_tendril.is_empty() {
println!("ne");
return None; return None;
} }
@ -79,13 +81,13 @@ impl Token {
if let Some(character) = escaped.chars().next() { if let Some(character) = escaped.chars().next() {
return match character { return match character {
'0' => Some(Token::CharLit(0 as char)), '0' => Some(0 as char),
't' => Some(Token::CharLit(9 as char)), 't' => Some(9 as char),
'n' => Some(Token::CharLit(10 as char)), 'n' => Some(10 as char),
'r' => Some(Token::CharLit(13 as char)), 'r' => Some(13 as char),
'\"' => Some(Token::CharLit(34 as char)), '\"' => Some(34 as char),
'\'' => Some(Token::CharLit(39 as char)), '\'' => Some(39 as char),
'\\' => Some(Token::CharLit(92 as char)), '\\' => Some(92 as char),
_ => None, _ => None,
}; };
} else { } else {
@ -102,7 +104,7 @@ impl Token {
} }
if let Ok(ascii_number) = u8::from_str_radix(&hex_string, 16) { if let Ok(ascii_number) = u8::from_str_radix(&hex_string, 16) {
return Some(Token::CharLit(ascii_number as char)); return Some(ascii_number as char);
} }
} }
@ -112,13 +114,14 @@ impl Token {
if let Ok(hex) = u32::from_str_radix(&unicode_number, 16) { if let Ok(hex) = u32::from_str_radix(&unicode_number, 16) {
if let Some(character) = std::char::from_u32(hex) { if let Some(character) = std::char::from_u32(hex) {
return Some(Token::CharLit(character)); return Some(character);
} }
} }
} }
println!("itcs {:?}", input_tendril.to_string().chars());
if let Some(character) = input_tendril.to_string().chars().next() { if let Some(character) = input_tendril.to_string().chars().next() {
return Some(Token::CharLit(character)); return Some(character);
} }
None None
@ -168,6 +171,84 @@ impl Token {
let input = input_tendril[..].as_bytes(); let input = input_tendril[..].as_bytes();
match input[0] { match input[0] {
x if x.is_ascii_whitespace() => return (1, None), x if x.is_ascii_whitespace() => return (1, None),
b'`' => {
let mut i = 1;
let mut len: u32 = 1;
let mut start = 1;
let mut in_escape = false;
let mut escaped = false;
let mut hex = false;
let mut unicode = false;
let mut end = false;
let mut string = Vec::new();
while i < input.len() {
if !in_escape {
if input[i] == b'`' {
end = true;
break;
} else if input[i] == b'\\' {
in_escape = true;
start = i;
i += 1;
continue;
}
} else {
len += 1;
match input[i] {
b'x' => {
hex = true;
}
b'u' => {
unicode = true;
}
b'}' if unicode => {
in_escape = false;
}
_ if !hex && !unicode => {
escaped = true;
in_escape = false;
}
_ if hex && len == 4 => {
println!("len 4");
in_escape = false;
}
_ => {}
}
}
if !in_escape {
match Self::eat_char(
input_tendril.subtendril(start as u32, len as u32),
escaped,
hex,
unicode,
) {
Some(character) => {
len = 1;
escaped = false;
hex = false;
unicode = false;
string.push(character);
}
None => return (0, None),
}
}
i += 1;
if !escaped && !hex && !unicode {
start = i;
}
}
if !end {
return (0, None);
}
return (i + 1, Some(Token::StringLiteral(string)));
}
b'"' => { b'"' => {
let mut i = 1; let mut i = 1;
let mut in_escape = false; let mut in_escape = false;
@ -248,13 +329,10 @@ impl Token {
return (0, None); return (0, None);
} }
let result = Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode); return match Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode) {
Some(character) => (i + 1, Some(Token::CharLit(character))),
if result.is_none() { None => (0, None),
return (0, None); };
}
return (i + 1, result);
} }
x if x.is_ascii_digit() => { x if x.is_ascii_digit() => {
return Self::eat_integer(&input_tendril); return Self::eat_integer(&input_tendril);