mirror of
https://github.com/ProvableHQ/leo.git
synced 2024-12-24 18:52:58 +03:00
a bunch of parser bug fixes so far
This commit is contained in:
parent
9730f90361
commit
736c6af72d
@ -114,8 +114,6 @@ impl ParserContext<'_> {
|
||||
let (args, _, span) = self.parse_paren_comma_list(|p| {
|
||||
Ok(if let Some(ident) = p.eat_identifier() {
|
||||
Some(ident.name)
|
||||
} else if let Some((int, _)) = p.eat_int() {
|
||||
Some(Symbol::intern(&int.value))
|
||||
} else {
|
||||
let token = p.expect_any()?;
|
||||
p.emit_err(ParserError::unexpected_str(&token.token, "ident or int", &token.span));
|
||||
|
@ -66,9 +66,9 @@ impl Token {
|
||||
///
|
||||
/// Returns a `char` if a character can be eaten, otherwise returns [`None`].
|
||||
///
|
||||
fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Option<Char> {
|
||||
fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Result<Char> {
|
||||
if input_tendril.is_empty() {
|
||||
return None;
|
||||
return Err(ParserError::lexer_empty_input_tendril().into());
|
||||
}
|
||||
|
||||
if escaped {
|
||||
@ -76,22 +76,22 @@ impl Token {
|
||||
let escaped = &string[1..string.len()];
|
||||
|
||||
if escaped.len() != 1 {
|
||||
return None;
|
||||
return Err(ParserError::lexer_escaped_char_incorrect_length(escaped).into());
|
||||
}
|
||||
|
||||
if let Some(character) = escaped.chars().next() {
|
||||
return match character {
|
||||
'0' => Some(Char::Scalar(0 as char)),
|
||||
't' => Some(Char::Scalar(9 as char)),
|
||||
'n' => Some(Char::Scalar(10 as char)),
|
||||
'r' => Some(Char::Scalar(13 as char)),
|
||||
'\"' => Some(Char::Scalar(34 as char)),
|
||||
'\'' => Some(Char::Scalar(39 as char)),
|
||||
'\\' => Some(Char::Scalar(92 as char)),
|
||||
_ => None,
|
||||
'0' => Ok(Char::Scalar(0 as char)),
|
||||
't' => Ok(Char::Scalar(9 as char)),
|
||||
'n' => Ok(Char::Scalar(10 as char)),
|
||||
'r' => Ok(Char::Scalar(13 as char)),
|
||||
'\"' => Ok(Char::Scalar(34 as char)),
|
||||
'\'' => Ok(Char::Scalar(39 as char)),
|
||||
'\\' => Ok(Char::Scalar(92 as char)),
|
||||
_ => return Err(ParserError::lexer_expected_valid_escaped_char(character).into()),
|
||||
};
|
||||
} else {
|
||||
return None;
|
||||
return Err(ParserError::lexer_unclosed_escaped_char().into());
|
||||
}
|
||||
}
|
||||
|
||||
@ -100,48 +100,52 @@ impl Token {
|
||||
let hex_string = &string[2..string.len()];
|
||||
|
||||
if hex_string.len() != 2 {
|
||||
return None;
|
||||
return Err(ParserError::lexer_escaped_hex_incorrect_length(hex_string).into());
|
||||
}
|
||||
|
||||
if let Ok(ascii_number) = u8::from_str_radix(hex_string, 16) {
|
||||
// According to RFC, we allow only values less than 128.
|
||||
if ascii_number > 127 {
|
||||
return None;
|
||||
return Err(ParserError::lexer_expected_valid_hex_char(ascii_number).into());
|
||||
}
|
||||
|
||||
return Some(Char::Scalar(ascii_number as char));
|
||||
return Ok(Char::Scalar(ascii_number as char));
|
||||
}
|
||||
}
|
||||
|
||||
if unicode {
|
||||
let string = input_tendril.to_string();
|
||||
if &string[string.len() - 1..] != "}" {
|
||||
return None;
|
||||
if string.find('}').is_none() {
|
||||
return Err(ParserError::lexer_unclosed_escaped_unicode_char(string).into());
|
||||
}
|
||||
|
||||
let unicode_number = &string[3..string.len() - 1];
|
||||
let len = unicode_number.len();
|
||||
if !(1..=6).contains(&len) {
|
||||
return None;
|
||||
return Err(ParserError::lexer_invalid_escaped_unicode_length(unicode_number).into());
|
||||
}
|
||||
|
||||
if let Ok(hex) = u32::from_str_radix(unicode_number, 16) {
|
||||
if let Some(character) = std::char::from_u32(hex) {
|
||||
// scalar
|
||||
return Some(Char::Scalar(character));
|
||||
return Ok(Char::Scalar(character));
|
||||
} else if hex <= 0x10FFFF {
|
||||
return Some(Char::NonScalar(hex));
|
||||
return Ok(Char::NonScalar(hex));
|
||||
} else {
|
||||
return Err(ParserError::lexer_invalid_character_exceeded_max_value(unicode_number).into());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if input_tendril.to_string().chars().count() != 1 {
|
||||
return None;
|
||||
// If char doesn't close.
|
||||
return Err(ParserError::lexer_char_not_closed(&input_tendril[0..]).into());
|
||||
} else if let Some(character) = input_tendril.to_string().chars().next() {
|
||||
return Some(Char::Scalar(character));
|
||||
// If its a simple char.
|
||||
return Ok(Char::Scalar(character));
|
||||
}
|
||||
|
||||
None
|
||||
Err(ParserError::lexer_invalid_char(input_tendril.to_string()).into())
|
||||
}
|
||||
|
||||
///
|
||||
@ -157,18 +161,8 @@ impl Token {
|
||||
return Err(ParserError::lexer_eat_integer_leading_zero(String::from_utf8_lossy(input)).into());
|
||||
}
|
||||
let mut i = 1;
|
||||
let mut is_hex = false;
|
||||
while i < input.len() {
|
||||
if i == 1 && input[0] == b'0' && input[i] == b'x' {
|
||||
is_hex = true;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if is_hex {
|
||||
if !input[i].is_ascii_hexdigit() {
|
||||
break;
|
||||
}
|
||||
} else if !input[i].is_ascii_digit() {
|
||||
if !input[i].is_ascii_digit() {
|
||||
break;
|
||||
}
|
||||
|
||||
@ -258,26 +252,17 @@ impl Token {
|
||||
}
|
||||
|
||||
if !in_escape {
|
||||
match Self::eat_char(
|
||||
let character = Self::eat_char(
|
||||
input_tendril.subtendril(start as u32, len as u32),
|
||||
escaped,
|
||||
hex,
|
||||
unicode,
|
||||
) {
|
||||
Some(character) => {
|
||||
len = 1;
|
||||
escaped = false;
|
||||
hex = false;
|
||||
unicode = false;
|
||||
string.push(character.into());
|
||||
}
|
||||
None => {
|
||||
return Err(ParserError::lexer_expected_valid_escaped_char(
|
||||
input_tendril.subtendril(start as u32, len as u32),
|
||||
)
|
||||
.into())
|
||||
}
|
||||
}
|
||||
)?;
|
||||
len = 1;
|
||||
escaped = false;
|
||||
hex = false;
|
||||
unicode = false;
|
||||
string.push(character.into());
|
||||
}
|
||||
|
||||
i += 1;
|
||||
@ -332,10 +317,8 @@ impl Token {
|
||||
return Err(ParserError::lexer_char_not_closed(String::from_utf8_lossy(&input[0..i])).into());
|
||||
}
|
||||
|
||||
return match Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode) {
|
||||
Some(character) => Ok((i + 1, Token::CharLit(character))),
|
||||
None => Err(ParserError::lexer_invalid_char(String::from_utf8_lossy(&input[0..i - 1])).into()),
|
||||
};
|
||||
let character = Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode)?;
|
||||
return Ok((i + 1, Token::CharLit(character)));
|
||||
}
|
||||
x if x.is_ascii_digit() => {
|
||||
return Self::eat_integer(&input_tendril);
|
||||
|
@ -236,23 +236,23 @@ create_errors!(
|
||||
@backtraced
|
||||
lexer_eat_integer_leading_zero {
|
||||
args: (input: impl Display),
|
||||
msg: format!("Tried to eat integer but found a leading zero on {}.", input),
|
||||
msg: format!("Tried to eat integer but found a leading zero on `{}`.", input),
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When an integer is started with a leading zero.
|
||||
@backtraced
|
||||
lexer_expected_valid_escaped_char {
|
||||
lexer_expected_valid_escaped_char {
|
||||
args: (input: impl Display),
|
||||
msg: format!("Expected a valid escape character but found {}.", input),
|
||||
msg: format!("Expected a valid escape character but found `{}`.", input),
|
||||
help: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// When a string is not properly closed.
|
||||
@backtraced
|
||||
lexer_string_not_closed {
|
||||
args: (input: impl Display),
|
||||
msg: format!("Expected a closed string but found {}.", input),
|
||||
msg: format!("Expected a closed string but found `{}`.", input),
|
||||
help: None,
|
||||
}
|
||||
|
||||
@ -260,7 +260,7 @@ create_errors!(
|
||||
@backtraced
|
||||
lexer_char_not_closed {
|
||||
args: (input: impl Display),
|
||||
msg: format!("Expected a closed char but found {}.", input),
|
||||
msg: format!("Expected a closed char but found `{}`.", input),
|
||||
help: None,
|
||||
}
|
||||
|
||||
@ -268,7 +268,7 @@ create_errors!(
|
||||
@backtraced
|
||||
lexer_invalid_char {
|
||||
args: (input: impl Display),
|
||||
msg: format!("Expected valid character but found {}.", input),
|
||||
msg: format!("Expected valid character but found `{}`.", input),
|
||||
help: None,
|
||||
}
|
||||
|
||||
@ -284,7 +284,7 @@ create_errors!(
|
||||
@backtraced
|
||||
lexer_block_comment_does_not_close_before_eof {
|
||||
args: (input: impl Display),
|
||||
msg: format!("Block comment does not close with content: {}.", input),
|
||||
msg: format!("Block comment does not close with content: `{}`.", input),
|
||||
help: None,
|
||||
}
|
||||
|
||||
@ -292,7 +292,63 @@ create_errors!(
|
||||
@backtraced
|
||||
could_not_lex {
|
||||
args: (input: impl Display),
|
||||
msg: format!("Could not lex the following content: {}.", input),
|
||||
msg: format!("Could not lex the following content: `{}`.", input),
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When a escaped character was given more than one char to escape.
|
||||
@backtraced
|
||||
lexer_escaped_char_incorrect_length {
|
||||
args: (input: impl Display),
|
||||
msg: format!("Could not lex the following escaped char due to being given more than one char: `{}`.", input),
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When a escape was given but no following character
|
||||
@backtraced
|
||||
lexer_unclosed_escaped_char {
|
||||
args: (),
|
||||
msg: "There was no escaped character following the escape char symbol `\\`.",
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When a escaped hex was given more than two chars to escape.
|
||||
@backtraced
|
||||
lexer_escaped_hex_incorrect_length {
|
||||
args: (input: impl Display),
|
||||
msg: format!("Could not lex the following escaped hex due to being given more than two chars: `{}`.", input),
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When a valid hex character was expected.
|
||||
@backtraced
|
||||
lexer_expected_valid_hex_char {
|
||||
args: (input: impl Display),
|
||||
msg: format!("Expected a valid hex character but found `{}`.", input),
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When a escaped unicode char was given but no following closing symbol.
|
||||
@backtraced
|
||||
lexer_unclosed_escaped_unicode_char {
|
||||
args: (input: impl Display),
|
||||
msg: format!("There was no closing `}}` after a escaped unicode `{}`.", input),
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When a escaped unicode char was given but it had an incorrect length.
|
||||
@backtraced
|
||||
lexer_invalid_escaped_unicode_length {
|
||||
args: (input: impl Display),
|
||||
msg: format!("The escaped unicode char `{}` is not within valid length of [1, 6].", input),
|
||||
help: None,
|
||||
}
|
||||
|
||||
/// When a escaped unicode char was given but exceeded maximum value.
|
||||
@backtraced
|
||||
lexer_invalid_character_exceeded_max_value {
|
||||
args: (input: impl Display),
|
||||
msg: format!("The escaped unicode char `{}` is greater than 0x10FFFF.", input),
|
||||
help: None,
|
||||
}
|
||||
);
|
||||
|
Loading…
Reference in New Issue
Block a user