a bunch of parser bug fixes so far

This commit is contained in:
gluax 2022-02-28 14:45:32 -08:00
parent 9730f90361
commit 736c6af72d
3 changed files with 102 additions and 65 deletions

View File

@ -114,8 +114,6 @@ impl ParserContext<'_> {
let (args, _, span) = self.parse_paren_comma_list(|p| {
Ok(if let Some(ident) = p.eat_identifier() {
Some(ident.name)
} else if let Some((int, _)) = p.eat_int() {
Some(Symbol::intern(&int.value))
} else {
let token = p.expect_any()?;
p.emit_err(ParserError::unexpected_str(&token.token, "ident or int", &token.span));

View File

@ -66,9 +66,9 @@ impl Token {
///
/// Returns a `char` if a character can be eaten, otherwise returns [`None`].
///
fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Option<Char> {
fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Result<Char> {
if input_tendril.is_empty() {
return None;
return Err(ParserError::lexer_empty_input_tendril().into());
}
if escaped {
@ -76,22 +76,22 @@ impl Token {
let escaped = &string[1..string.len()];
if escaped.len() != 1 {
return None;
return Err(ParserError::lexer_escaped_char_incorrect_length(escaped).into());
}
if let Some(character) = escaped.chars().next() {
return match character {
'0' => Some(Char::Scalar(0 as char)),
't' => Some(Char::Scalar(9 as char)),
'n' => Some(Char::Scalar(10 as char)),
'r' => Some(Char::Scalar(13 as char)),
'\"' => Some(Char::Scalar(34 as char)),
'\'' => Some(Char::Scalar(39 as char)),
'\\' => Some(Char::Scalar(92 as char)),
_ => None,
'0' => Ok(Char::Scalar(0 as char)),
't' => Ok(Char::Scalar(9 as char)),
'n' => Ok(Char::Scalar(10 as char)),
'r' => Ok(Char::Scalar(13 as char)),
'\"' => Ok(Char::Scalar(34 as char)),
'\'' => Ok(Char::Scalar(39 as char)),
'\\' => Ok(Char::Scalar(92 as char)),
_ => return Err(ParserError::lexer_expected_valid_escaped_char(character).into()),
};
} else {
return None;
return Err(ParserError::lexer_unclosed_escaped_char().into());
}
}
@ -100,48 +100,52 @@ impl Token {
let hex_string = &string[2..string.len()];
if hex_string.len() != 2 {
return None;
return Err(ParserError::lexer_escaped_hex_incorrect_length(hex_string).into());
}
if let Ok(ascii_number) = u8::from_str_radix(hex_string, 16) {
// According to RFC, we allow only values less than 128.
if ascii_number > 127 {
return None;
return Err(ParserError::lexer_expected_valid_hex_char(ascii_number).into());
}
return Some(Char::Scalar(ascii_number as char));
return Ok(Char::Scalar(ascii_number as char));
}
}
if unicode {
let string = input_tendril.to_string();
if &string[string.len() - 1..] != "}" {
return None;
if string.find('}').is_none() {
return Err(ParserError::lexer_unclosed_escaped_unicode_char(string).into());
}
let unicode_number = &string[3..string.len() - 1];
let len = unicode_number.len();
if !(1..=6).contains(&len) {
return None;
return Err(ParserError::lexer_invalid_escaped_unicode_length(unicode_number).into());
}
if let Ok(hex) = u32::from_str_radix(unicode_number, 16) {
if let Some(character) = std::char::from_u32(hex) {
// scalar
return Some(Char::Scalar(character));
return Ok(Char::Scalar(character));
} else if hex <= 0x10FFFF {
return Some(Char::NonScalar(hex));
return Ok(Char::NonScalar(hex));
} else {
return Err(ParserError::lexer_invalid_character_exceeded_max_value(unicode_number).into());
}
}
}
if input_tendril.to_string().chars().count() != 1 {
return None;
// If char doesn't close.
return Err(ParserError::lexer_char_not_closed(&input_tendril[0..]).into());
} else if let Some(character) = input_tendril.to_string().chars().next() {
return Some(Char::Scalar(character));
// If its a simple char.
return Ok(Char::Scalar(character));
}
None
Err(ParserError::lexer_invalid_char(input_tendril.to_string()).into())
}
///
@ -157,18 +161,8 @@ impl Token {
return Err(ParserError::lexer_eat_integer_leading_zero(String::from_utf8_lossy(input)).into());
}
let mut i = 1;
let mut is_hex = false;
while i < input.len() {
if i == 1 && input[0] == b'0' && input[i] == b'x' {
is_hex = true;
i += 1;
continue;
}
if is_hex {
if !input[i].is_ascii_hexdigit() {
break;
}
} else if !input[i].is_ascii_digit() {
if !input[i].is_ascii_digit() {
break;
}
@ -258,26 +252,17 @@ impl Token {
}
if !in_escape {
match Self::eat_char(
let character = Self::eat_char(
input_tendril.subtendril(start as u32, len as u32),
escaped,
hex,
unicode,
) {
Some(character) => {
len = 1;
escaped = false;
hex = false;
unicode = false;
string.push(character.into());
}
None => {
return Err(ParserError::lexer_expected_valid_escaped_char(
input_tendril.subtendril(start as u32, len as u32),
)
.into())
}
}
)?;
len = 1;
escaped = false;
hex = false;
unicode = false;
string.push(character.into());
}
i += 1;
@ -332,10 +317,8 @@ impl Token {
return Err(ParserError::lexer_char_not_closed(String::from_utf8_lossy(&input[0..i])).into());
}
return match Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode) {
Some(character) => Ok((i + 1, Token::CharLit(character))),
None => Err(ParserError::lexer_invalid_char(String::from_utf8_lossy(&input[0..i - 1])).into()),
};
let character = Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode)?;
return Ok((i + 1, Token::CharLit(character)));
}
x if x.is_ascii_digit() => {
return Self::eat_integer(&input_tendril);

View File

@ -236,23 +236,23 @@ create_errors!(
@backtraced
lexer_eat_integer_leading_zero {
args: (input: impl Display),
msg: format!("Tried to eat integer but found a leading zero on {}.", input),
msg: format!("Tried to eat integer but found a leading zero on `{}`.", input),
help: None,
}
/// When an integer is started with a leading zero.
@backtraced
lexer_expected_valid_escaped_char {
lexer_expected_valid_escaped_char {
args: (input: impl Display),
msg: format!("Expected a valid escape character but found {}.", input),
msg: format!("Expected a valid escape character but found `{}`.", input),
help: None,
}
}
/// When a string is not properly closed.
@backtraced
lexer_string_not_closed {
args: (input: impl Display),
msg: format!("Expected a closed string but found {}.", input),
msg: format!("Expected a closed string but found `{}`.", input),
help: None,
}
@ -260,7 +260,7 @@ create_errors!(
@backtraced
lexer_char_not_closed {
args: (input: impl Display),
msg: format!("Expected a closed char but found {}.", input),
msg: format!("Expected a closed char but found `{}`.", input),
help: None,
}
@ -268,7 +268,7 @@ create_errors!(
@backtraced
lexer_invalid_char {
args: (input: impl Display),
msg: format!("Expected valid character but found {}.", input),
msg: format!("Expected valid character but found `{}`.", input),
help: None,
}
@ -284,7 +284,7 @@ create_errors!(
@backtraced
lexer_block_comment_does_not_close_before_eof {
args: (input: impl Display),
msg: format!("Block comment does not close with content: {}.", input),
msg: format!("Block comment does not close with content: `{}`.", input),
help: None,
}
@ -292,7 +292,63 @@ create_errors!(
@backtraced
could_not_lex {
args: (input: impl Display),
msg: format!("Could not lex the following content: {}.", input),
msg: format!("Could not lex the following content: `{}`.", input),
help: None,
}
/// When a escaped character was given more than one char to escape.
@backtraced
lexer_escaped_char_incorrect_length {
args: (input: impl Display),
msg: format!("Could not lex the following escaped char due to being given more than one char: `{}`.", input),
help: None,
}
/// When a escape was given but no following character
@backtraced
lexer_unclosed_escaped_char {
args: (),
msg: "There was no escaped character following the escape char symbol `\\`.",
help: None,
}
/// When a escaped hex was given more than two chars to escape.
@backtraced
lexer_escaped_hex_incorrect_length {
args: (input: impl Display),
msg: format!("Could not lex the following escaped hex due to being given more than two chars: `{}`.", input),
help: None,
}
/// When a valid hex character was expected.
@backtraced
lexer_expected_valid_hex_char {
args: (input: impl Display),
msg: format!("Expected a valid hex character but found `{}`.", input),
help: None,
}
/// When a escaped unicode char was given but no following closing symbol.
@backtraced
lexer_unclosed_escaped_unicode_char {
args: (input: impl Display),
msg: format!("There was no closing `}}` after a escaped unicode `{}`.", input),
help: None,
}
/// When a escaped unicode char was given but it had an incorrect length.
@backtraced
lexer_invalid_escaped_unicode_length {
args: (input: impl Display),
msg: format!("The escaped unicode char `{}` is not within valid length of [1, 6].", input),
help: None,
}
/// When a escaped unicode char was given but exceeded maximum value.
@backtraced
lexer_invalid_character_exceeded_max_value {
args: (input: impl Display),
msg: format!("The escaped unicode char `{}` is greater than 0x10FFFF.", input),
help: None,
}
);