code cleanup, added tests, marked dangerous code for review

This commit is contained in:
0rphon 2022-03-09 15:16:52 -08:00
parent 874edd8fcc
commit 3c258bf3a4
13 changed files with 70 additions and 34 deletions

View File

@ -550,7 +550,7 @@ impl ReconstructingReducer for Canonicalizer {
for (index, character) in string.iter().enumerate() { for (index, character) in string.iter().enumerate() {
let col_start = span.col_start + index + 1 + col_adder; // account for open quote let col_start = span.col_start + index + 1 + col_adder; // account for open quote
let bytes = span.content.clone().into_bytes(); let bytes = span.content.clone().into_bytes();
let col_stop = if bytes[col_start - 1] == b'\\' { let col_stop = if bytes[col_start - 1] == b'\\' { // 0rphon
let mut width = 0; let mut width = 0;
match bytes[col_start] { match bytes[col_start] {
@ -558,7 +558,7 @@ impl ReconstructingReducer for Canonicalizer {
b'u' => { b'u' => {
width += 1; width += 1;
let mut index = 1; let mut index = 1;
while bytes[col_start + index] != b'}' { while bytes[col_start + index] != b'}' { // 0rphon
width += 1; width += 1;
index += 1; index += 1;
} }

View File

@ -48,7 +48,7 @@ impl Importer {
let pretty_package = package.join("."); let pretty_package = package.join(".");
let resolved_package = let resolved_package =
match wrapped_resolver.resolve_package(&package.iter().map(|x| &**x).collect::<Vec<_>>()[..], &span)? { match wrapped_resolver.resolve_package(&package.iter().map(|x| &**x).collect::<Vec<_>>(), &span)? {
Some(x) => x, Some(x) => x,
None => return Err(AstError::unresolved_import(pretty_package, &span).into()), None => return Err(AstError::unresolved_import(pretty_package, &span).into()),
}; };

View File

@ -32,7 +32,7 @@ fn eat(input: &[u8], wanted: &str) -> Option<usize> {
if input.len() < wanted.len() { if input.len() < wanted.len() {
return None; return None;
} }
if &input[0..wanted.len()] == wanted { if &input[..wanted.len()] == wanted { // 0rphon
return Some(wanted.len()); return Some(wanted.len());
} }
None None
@ -70,7 +70,7 @@ impl Token {
if escaped { if escaped {
let string = input_tendril.to_string(); let string = input_tendril.to_string();
let escaped = &string[1..string.len()]; let escaped = &string[1..string.len()]; // 0rphon
if escaped.len() != 1 { if escaped.len() != 1 {
return Err(ParserError::lexer_escaped_char_incorrect_length(escaped).into()); return Err(ParserError::lexer_escaped_char_incorrect_length(escaped).into());
@ -92,7 +92,7 @@ impl Token {
if hex { if hex {
let string = input_tendril.to_string(); let string = input_tendril.to_string();
let hex_string = &string[2..string.len()]; let hex_string = &string[2..string.len()]; // 0rphon
if hex_string.len() != 2 { if hex_string.len() != 2 {
return Err(ParserError::lexer_escaped_hex_incorrect_length(hex_string).into()); return Err(ParserError::lexer_escaped_hex_incorrect_length(hex_string).into());
@ -114,7 +114,7 @@ impl Token {
let unicode_number = &string[3..string.len() - 1]; let unicode_number = &string[3..string.len() - 1];
let len = unicode_number.len(); let len = unicode_number.len();
if !(1..=6).contains(&len) { if !(1..=6).contains(&len) { // 0rphon
return Err(ParserError::lexer_invalid_escaped_unicode_length(unicode_number).into()); return Err(ParserError::lexer_invalid_escaped_unicode_length(unicode_number).into());
} else if let Ok(hex) = u32::from_str_radix(unicode_number, 16) { } else if let Ok(hex) = u32::from_str_radix(unicode_number, 16) {
if let Some(character) = std::char::from_u32(hex) { if let Some(character) = std::char::from_u32(hex) {
@ -130,7 +130,7 @@ impl Token {
if input_tendril.to_string().chars().count() != 1 { if input_tendril.to_string().chars().count() != 1 {
// If char doesn't close. // If char doesn't close.
return Err(ParserError::lexer_char_not_closed(&input_tendril[0..]).into()); return Err(ParserError::lexer_char_not_closed(&input_tendril).into());
} else if let Some(character) = input_tendril.to_string().chars().next() { } else if let Some(character) = input_tendril.to_string().chars().next() {
// If its a simple char. // If its a simple char.
return Ok(Char::Scalar(character)); return Ok(Char::Scalar(character));
@ -159,7 +159,7 @@ impl Token {
while i < input.len() { while i < input.len() {
if i == 1 && input[0] == b'0' && input[i] == b'x' { if i == 1 && input[0] == b'0' && input[i] == b'x' {
return Err(ParserError::lexer_hex_number_provided(&input_tendril[0..3]).into()); return Err(ParserError::lexer_hex_number_provided(&input_tendril[..3]).into());
} }
if !input[i].is_ascii_digit() { if !input[i].is_ascii_digit() {
break; break;
@ -276,7 +276,7 @@ impl Token {
} }
if i == input.len() || !end { if i == input.len() || !end {
return Err(ParserError::lexer_string_not_closed(String::from_utf8_lossy(&input[0..i])).into()); return Err(ParserError::lexer_string_not_closed(String::from_utf8_lossy(&input[..i])).into()); // 0rphon
} }
return Ok((i + 1, Token::StringLit(string))); return Ok((i + 1, Token::StringLit(string)));
@ -297,14 +297,14 @@ impl Token {
continue; continue;
} else if input[i] & 0x80 > 0 && unicode_char { } else if input[i] & 0x80 > 0 && unicode_char {
i += Self::utf8_byte_count(input[i]); i += Self::utf8_byte_count(input[i]);
return Err(ParserError::lexer_invalid_char(&input_tendril[0..i]).into()); return Err(ParserError::lexer_invalid_char(&input_tendril[..i]).into());
} else if !in_escape || unicode_char { } else if !in_escape || unicode_char {
if input[i] == b'\'' { if input[i] == b'\'' {
end = true; end = true;
break; break;
} else if unicode_char { } else if unicode_char {
return Err(ParserError::lexer_invalid_char( return Err(ParserError::lexer_invalid_char(
&input_tendril[0..input_tendril[1..].find('\'').unwrap_or(i + 1)], &input_tendril[..input_tendril[1..].find('\'').unwrap_or(i + 1)],
) )
.into()); .into());
} else if input[i] == b'\\' { } else if input[i] == b'\\' {
@ -330,7 +330,7 @@ impl Token {
} }
if !end { if !end {
return Err(ParserError::lexer_char_not_closed(String::from_utf8_lossy(&input[0..i])).into()); return Err(ParserError::lexer_char_not_closed(String::from_utf8_lossy(&input[..i])).into());
} }
let character = Self::eat_char( let character = Self::eat_char(
@ -407,7 +407,7 @@ impl Token {
eol + 4 eol + 4
} else { } else {
return Err(ParserError::lexer_block_comment_does_not_close_before_eof( return Err(ParserError::lexer_block_comment_does_not_close_before_eof(
String::from_utf8_lossy(&input[0..]), String::from_utf8_lossy(&input),
) )
.into()); .into());
}; };
@ -500,7 +500,7 @@ impl Token {
)); ));
} }
Err(ParserError::could_not_lex(String::from_utf8_lossy(&input[0..])).into()) Err(ParserError::could_not_lex(String::from_utf8_lossy(&input)).into())
} }
} }

View File

@ -259,7 +259,7 @@ ppp test
let token_raw = token.token.to_string(); let token_raw = token.token.to_string();
let start = line_indicies.get(token.span.line_start - 1).unwrap(); let start = line_indicies.get(token.span.line_start - 1).unwrap();
let stop = line_indicies.get(token.span.line_stop - 1).unwrap(); let stop = line_indicies.get(token.span.line_stop - 1).unwrap();
let original = &raw[*start + token.span.col_start - 1..*stop + token.span.col_stop - 1]; let original = &raw[*start + token.span.col_start - 1..*stop + token.span.col_stop - 1]; // 0rphon
assert_eq!(original, &token_raw); assert_eq!(original, &token_raw);
} }
}) })

View File

@ -195,7 +195,7 @@ impl TryFrom<&Path> for Manifest {
if line.starts_with("remote") { if line.starts_with("remote") {
let remote = line let remote = line
.split('=') // Split the line as 'remote' = '"{author}/{package_name}"' .split('=') // Split the line as 'remote' = '"{author}/{package_name}"'
.collect::<Vec<&str>>()[1]; // Fetch just '"{author}/{package_name}"' .nth(1).unwrap(); // Fetch just '"{author}/{package_name}"'
old_remote_format = Some(remote); old_remote_format = Some(remote);
// Retain the old remote format if the `manifest_refactor_remote` is not enabled // Retain the old remote format if the `manifest_refactor_remote` is not enabled
@ -238,8 +238,8 @@ impl TryFrom<&Path> for Manifest {
// Fetch the author from the old remote. // Fetch the author from the old remote.
let remote_author = old_remote let remote_author = old_remote
.split('/') // Split the old remote as '"{author}' and '{package_name}"' .split('/') // Split the old remote as '"{author}' and '{package_name}"'
.collect::<Vec<&str>>()[0] // Fetch just the '"{author}' .nth(0).unwrap() // Fetch just the '"{author}'
.replace(&['\"', ' '][..], ""); // Remove the quotes from the author string .replace(['\"', ' '], ""); // Remove the quotes from the author string
// Construct the new remote section. // Construct the new remote section.
let new_remote = format!( let new_remote = format!(

View File

@ -90,7 +90,7 @@ impl Serialize for Span {
} else { } else {
state.serialize_field("path", "")?; state.serialize_field("path", "")?;
} }
state.serialize_field("content", &self.content[..])?; state.serialize_field("content", self.content.as_ref())?;
state.end() state.end()
} }
} }

View File

@ -21,7 +21,7 @@ use tendril::StrTendril;
/// Serialization for the StrTendril type. /// Serialization for the StrTendril type.
pub fn serialize<S: Serializer>(tendril: &StrTendril, serializer: S) -> Result<S::Ok, S::Error> { pub fn serialize<S: Serializer>(tendril: &StrTendril, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_str(&tendril[..]) serializer.serialize_str(&tendril)
} }
/// Deserialization for the StrTendril type. /// Deserialization for the StrTendril type.

View File

@ -3,12 +3,15 @@ namespace: Token
expectation: Fail expectation: Fail
outputs: outputs:
- "Error [EPAR0370028]: Expected a closed char but found `'\\'`." - "Error [EPAR0370028]: Expected a closed char but found `'\\'`."
- "Error [EPAR0370032]: Could not lex the following content: `\\`."
- "Error [EPAR0370028]: Expected a closed char but found `'\\`."
- "Error [EPAR0370032]: Could not lex the following content: `\\n`."
- "Error [EPAR0370028]: Expected a closed char but found `'a`." - "Error [EPAR0370028]: Expected a closed char but found `'a`."
- "Error [EPAR0370024]: Expected more characters to lex but found none." - "Error [EPAR0370024]: Expected more characters to lex but found none."
- "Error [EPAR0370036]: Expected a valid hex character but found `154`."
- "Error [EPAR0370035]: Could not lex the following escaped hex due to being given more than two chars: `7`." - "Error [EPAR0370035]: Could not lex the following escaped hex due to being given more than two chars: `7`."
- "Error [EPAR0370028]: Expected a closed char but found `\\x7g`."
- "Error [EPAR0370035]: Could not lex the following escaped hex due to being given more than two chars: `z`." - "Error [EPAR0370035]: Could not lex the following escaped hex due to being given more than two chars: `z`."
- "Error [EPAR0370036]: Expected a valid hex character but found `154`."
- "Error [EPAR0370028]: Expected a closed char but found `\\x7g`."
- "Error [EPAR0370036]: Expected a valid hex character but found `128`." - "Error [EPAR0370036]: Expected a valid hex character but found `128`."
- "Error [EPAR0370036]: Expected a valid hex character but found `193`." - "Error [EPAR0370036]: Expected a valid hex character but found `193`."
- "Error [EPAR0370036]: Expected a valid hex character but found `194`." - "Error [EPAR0370036]: Expected a valid hex character but found `194`."
@ -17,7 +20,6 @@ outputs:
- "Error [EPAR0370036]: Expected a valid hex character but found `224`." - "Error [EPAR0370036]: Expected a valid hex character but found `224`."
- "Error [EPAR0370036]: Expected a valid hex character but found `159`." - "Error [EPAR0370036]: Expected a valid hex character but found `159`."
- "Error [EPAR0370028]: Expected a closed char but found `abcdefg`." - "Error [EPAR0370028]: Expected a closed char but found `abcdefg`."
- "Error [EPAR0370033]: Could not lex the following escaped char due to being given more than one char: `t\\t`."
- "Error [EPAR0370026]: Expected a valid escape character but found `a`." - "Error [EPAR0370026]: Expected a valid escape character but found `a`."
- "Error [EPAR0370026]: Expected a valid escape character but found `z`." - "Error [EPAR0370026]: Expected a valid escape character but found `z`."
- "Error [EPAR0370026]: Expected a valid escape character but found `A`." - "Error [EPAR0370026]: Expected a valid escape character but found `A`."
@ -27,16 +29,20 @@ outputs:
- "Error [EPAR0370026]: Expected a valid escape character but found `*`." - "Error [EPAR0370026]: Expected a valid escape character but found `*`."
- "Error [EPAR0370035]: Could not lex the following escaped hex due to being given more than two chars: ``." - "Error [EPAR0370035]: Could not lex the following escaped hex due to being given more than two chars: ``."
- "Error [EPAR0370026]: Expected a valid escape character but found `117`." - "Error [EPAR0370026]: Expected a valid escape character but found `117`."
- "Error [EPAR0370038]: The escaped unicode char `bbbbb}\\u{aaaa` is not within valid length of [1, 6]." - "Error [EPAR0370033]: Could not lex the following escaped char due to being given more than one char: `t\\t`."
- "Error [EPAR0370026]: Expected a valid escape character but found `117`." - "Error [EPAR0370026]: Expected a valid escape character but found `117`."
- "Error [EPAR0370026]: Expected a valid escape character but found `117`." - "Error [EPAR0370026]: Expected a valid escape character but found `117`."
- "Error [EPAR0370024]: Expected more characters to lex but found none."
- "Error [EPAR0370029]: Expected valid character but found `'🦀\\`."
- "Error [EPAR0370026]: Expected a valid escape character but found `117`." - "Error [EPAR0370026]: Expected a valid escape character but found `117`."
- "Error [EPAR0370024]: Expected more characters to lex but found none."
- "Error [EPAR0370024]: Expected more characters to lex but found none."
- "Error [EPAR0370037]: There was no closing `}` after a escaped unicode `\\u{af🦀`."
- "Error [EPAR0370037]: There was no closing `}` after a escaped unicode `\\u{2764z`." - "Error [EPAR0370037]: There was no closing `}` after a escaped unicode `\\u{2764z`."
- "Error [EPAR0370028]: Expected a closed char but found `\\u{276g}`." - "Error [EPAR0370028]: Expected a closed char but found `\\u{276g}`."
- "Error [EPAR0370026]: Expected a valid escape character but found `117`." - "Error [EPAR0370026]: Expected a valid escape character but found `117`."
- "Error [EPAR0370026]: Expected a valid escape character but found `117`." - "Error [EPAR0370026]: Expected a valid escape character but found `117`."
- "Error [EPAR0370026]: Expected a valid escape character but found `117`." - "Error [EPAR0370026]: Expected a valid escape character but found `117`."
- "Error [EPAR0370039]: The escaped unicode char `110000` is greater than 0x10FFFF." - "Error [EPAR0370039]: The escaped unicode char `110000` is greater than 0x10FFFF."
- "Error [EPAR0370037]: There was no closing `}` after a escaped unicode `\\u{af🦀`." - "Error [EPAR0370038]: The escaped unicode char `bbbbb}\\u{aaaa` is not within valid length of [1, 6]."
- "Error [EPAR0370029]: Expected valid character but found `'🦀\\`."
- "Error [EPAR0370029]: Expected valid character but found `'😭😂`." - "Error [EPAR0370029]: Expected valid character but found `'😭😂`."

View File

@ -0,0 +1,7 @@
---
namespace: Token
expectation: Fail
outputs:
- "Error [EPAR0370040]: A hex number `0xb..` was provided but hex is not allowed."
- "Error [EPAR0370040]: A hex number `0xb..` was provided but hex is not allowed."
- "Error [EPAR0370040]: A hex number `0xb..` was provided but hex is not allowed."

View File

@ -10,3 +10,6 @@ outputs:
- "Error [EPAR0370027]: Expected a closed string but found `\"\\u\"`." - "Error [EPAR0370027]: Expected a closed string but found `\"\\u\"`."
- "Error [EPAR0370036]: Expected a valid hex character but found `255`." - "Error [EPAR0370036]: Expected a valid hex character but found `255`."
- "Error [EPAR0370027]: Expected a closed string but found `\"\\x\"`." - "Error [EPAR0370027]: Expected a closed string but found `\"\\x\"`."
- "Error [EPAR0370028]: Expected a closed char but found `\\x\" `."
- "Error [EPAR0370028]: Expected a closed char but found `\\x\" `."
- "Error [EPAR0370027]: Expected a closed string but found `\"\\u{af🦀\"`."

View File

@ -4,15 +4,18 @@ expectation: Fail
*/ */
'\' '\'
\
'\
\n
'a 'a
'' ''
'\x9A'
'\x7' '\x7'
'\x7g'
'\xz' '\xz'
'\x9A'
'\x7g'
'\x80' '\x80'
'\xc1' '\xc1'
'\xc2' '\xc2'
@ -23,7 +26,6 @@ expectation: Fail
'abcdefg' 'abcdefg'
'\t\t'
'\a' '\a'
'\z' '\z'
'\A' '\A'
@ -33,18 +35,22 @@ expectation: Fail
'\*' '\*'
'\x' '\x'
'\u' '\u'
'\t\t'
'\u{bbbbb}\u{aaaa}'
'\uz' '\uz'
'\u1' '\u1'
'' // 0rphon '\u};
'🦀\n'
'\u123' '\u123'
'' //0rphon '🦀1🦀'
'' //0rphon '\u6🦀}'
'\u{af🦀'
'\u{2764z' '\u{2764z'
'\u{276g}' '\u{276g}'
'\u9999999'
'\u00000000' '\u00000000'
'\u01000000' '\u01000000'
'\u9999999'
'\u{110000}' '\u{110000}'
'\u{af🦀' '\u{bbbbb}\u{aaaa}'
'🦀\n'
'😭😂😘' '😭😂😘'

View File

@ -0,0 +1,8 @@
/*
namespace: Token
expectation: Fail
*/
0xb
0xb // 0rphon 0x
0xbfield

View File

@ -18,3 +18,9 @@ expectation: Fail
"\xFF" "\xFF"
"\x" "\x"
"\x" // 0rphon "\u}"
"\x" // 0rphon "\u6🦀}"
"\u{af🦀"