From 3c258bf3a4ac91f1a2215512d24312d75934e652 Mon Sep 17 00:00:00 2001 From: 0rphon <59403052+0rphon@users.noreply.github.com> Date: Wed, 9 Mar 2022 15:16:52 -0800 Subject: [PATCH] code cleanup, added tests, marked dangerous code for review --- .../src/canonicalization/canonicalizer.rs | 4 ++-- .../src/import_resolution/importer.rs | 2 +- compiler/parser/src/tokenizer/lexer.rs | 24 +++++++++---------- compiler/parser/src/tokenizer/mod.rs | 2 +- leo/package/src/root/manifest.rs | 6 ++--- leo/span/src/span.rs | 2 +- leo/span/src/tendril_json.rs | 2 +- .../expression/literal/char_fail.leo.out | 18 +++++++++----- .../expression/literal/int_fail.leo.out | 7 ++++++ .../expression/literal/string_fail.leo.out | 3 +++ tests/parser/expression/literal/char_fail.leo | 20 ++++++++++------ tests/parser/expression/literal/int_fail.leo | 8 +++++++ .../parser/expression/literal/string_fail.leo | 6 +++++ 13 files changed, 70 insertions(+), 34 deletions(-) create mode 100644 tests/expectations/parser/parser/expression/literal/int_fail.leo.out create mode 100644 tests/parser/expression/literal/int_fail.leo diff --git a/compiler/ast-passes/src/canonicalization/canonicalizer.rs b/compiler/ast-passes/src/canonicalization/canonicalizer.rs index 592107d434..8ce1e000dd 100644 --- a/compiler/ast-passes/src/canonicalization/canonicalizer.rs +++ b/compiler/ast-passes/src/canonicalization/canonicalizer.rs @@ -550,7 +550,7 @@ impl ReconstructingReducer for Canonicalizer { for (index, character) in string.iter().enumerate() { let col_start = span.col_start + index + 1 + col_adder; // account for open quote let bytes = span.content.clone().into_bytes(); - let col_stop = if bytes[col_start - 1] == b'\\' { + let col_stop = if bytes[col_start - 1] == b'\\' { // 0rphon let mut width = 0; match bytes[col_start] { @@ -558,7 +558,7 @@ impl ReconstructingReducer for Canonicalizer { b'u' => { width += 1; let mut index = 1; - while bytes[col_start + index] != b'}' { + while bytes[col_start + index] != b'}' { // 0rphon width += 1; index += 1; } diff --git a/compiler/ast-passes/src/import_resolution/importer.rs b/compiler/ast-passes/src/import_resolution/importer.rs index 808341433e..2b8fd967f4 100644 --- a/compiler/ast-passes/src/import_resolution/importer.rs +++ b/compiler/ast-passes/src/import_resolution/importer.rs @@ -48,7 +48,7 @@ impl Importer { let pretty_package = package.join("."); let resolved_package = - match wrapped_resolver.resolve_package(&package.iter().map(|x| &**x).collect::>()[..], &span)? { + match wrapped_resolver.resolve_package(&package.iter().map(|x| &**x).collect::>(), &span)? { Some(x) => x, None => return Err(AstError::unresolved_import(pretty_package, &span).into()), }; diff --git a/compiler/parser/src/tokenizer/lexer.rs b/compiler/parser/src/tokenizer/lexer.rs index 07222593d1..36f8e1acab 100644 --- a/compiler/parser/src/tokenizer/lexer.rs +++ b/compiler/parser/src/tokenizer/lexer.rs @@ -32,7 +32,7 @@ fn eat(input: &[u8], wanted: &str) -> Option { if input.len() < wanted.len() { return None; } - if &input[0..wanted.len()] == wanted { + if &input[..wanted.len()] == wanted { // 0rphon return Some(wanted.len()); } None @@ -70,7 +70,7 @@ impl Token { if escaped { let string = input_tendril.to_string(); - let escaped = &string[1..string.len()]; + let escaped = &string[1..string.len()]; // 0rphon if escaped.len() != 1 { return Err(ParserError::lexer_escaped_char_incorrect_length(escaped).into()); @@ -92,7 +92,7 @@ impl Token { if hex { let string = input_tendril.to_string(); - let hex_string = &string[2..string.len()]; + let hex_string = &string[2..string.len()]; // 0rphon if hex_string.len() != 2 { return Err(ParserError::lexer_escaped_hex_incorrect_length(hex_string).into()); @@ -114,7 +114,7 @@ impl Token { let unicode_number = &string[3..string.len() - 1]; let len = unicode_number.len(); - if !(1..=6).contains(&len) { + if !(1..=6).contains(&len) { // 0rphon return Err(ParserError::lexer_invalid_escaped_unicode_length(unicode_number).into()); } else if let Ok(hex) = u32::from_str_radix(unicode_number, 16) { if let Some(character) = std::char::from_u32(hex) { @@ -130,7 +130,7 @@ impl Token { if input_tendril.to_string().chars().count() != 1 { // If char doesn't close. - return Err(ParserError::lexer_char_not_closed(&input_tendril[0..]).into()); + return Err(ParserError::lexer_char_not_closed(&input_tendril).into()); } else if let Some(character) = input_tendril.to_string().chars().next() { // If its a simple char. return Ok(Char::Scalar(character)); @@ -159,7 +159,7 @@ impl Token { while i < input.len() { if i == 1 && input[0] == b'0' && input[i] == b'x' { - return Err(ParserError::lexer_hex_number_provided(&input_tendril[0..3]).into()); + return Err(ParserError::lexer_hex_number_provided(&input_tendril[..3]).into()); } if !input[i].is_ascii_digit() { break; @@ -276,7 +276,7 @@ impl Token { } if i == input.len() || !end { - return Err(ParserError::lexer_string_not_closed(String::from_utf8_lossy(&input[0..i])).into()); + return Err(ParserError::lexer_string_not_closed(String::from_utf8_lossy(&input[..i])).into()); // 0rphon } return Ok((i + 1, Token::StringLit(string))); @@ -297,14 +297,14 @@ impl Token { continue; } else if input[i] & 0x80 > 0 && unicode_char { i += Self::utf8_byte_count(input[i]); - return Err(ParserError::lexer_invalid_char(&input_tendril[0..i]).into()); + return Err(ParserError::lexer_invalid_char(&input_tendril[..i]).into()); } else if !in_escape || unicode_char { if input[i] == b'\'' { end = true; break; } else if unicode_char { return Err(ParserError::lexer_invalid_char( - &input_tendril[0..input_tendril[1..].find('\'').unwrap_or(i + 1)], + &input_tendril[..input_tendril[1..].find('\'').unwrap_or(i + 1)], ) .into()); } else if input[i] == b'\\' { @@ -330,7 +330,7 @@ impl Token { } if !end { - return Err(ParserError::lexer_char_not_closed(String::from_utf8_lossy(&input[0..i])).into()); + return Err(ParserError::lexer_char_not_closed(String::from_utf8_lossy(&input[..i])).into()); } let character = Self::eat_char( @@ -407,7 +407,7 @@ impl Token { eol + 4 } else { return Err(ParserError::lexer_block_comment_does_not_close_before_eof( - String::from_utf8_lossy(&input[0..]), + String::from_utf8_lossy(&input), ) .into()); }; @@ -500,7 +500,7 @@ impl Token { )); } - Err(ParserError::could_not_lex(String::from_utf8_lossy(&input[0..])).into()) + Err(ParserError::could_not_lex(String::from_utf8_lossy(&input)).into()) } } diff --git a/compiler/parser/src/tokenizer/mod.rs b/compiler/parser/src/tokenizer/mod.rs index 4ffdc96026..2289704a83 100644 --- a/compiler/parser/src/tokenizer/mod.rs +++ b/compiler/parser/src/tokenizer/mod.rs @@ -259,7 +259,7 @@ ppp test let token_raw = token.token.to_string(); let start = line_indicies.get(token.span.line_start - 1).unwrap(); let stop = line_indicies.get(token.span.line_stop - 1).unwrap(); - let original = &raw[*start + token.span.col_start - 1..*stop + token.span.col_stop - 1]; + let original = &raw[*start + token.span.col_start - 1..*stop + token.span.col_stop - 1]; // 0rphon assert_eq!(original, &token_raw); } }) diff --git a/leo/package/src/root/manifest.rs b/leo/package/src/root/manifest.rs index c72624bc96..378d080e65 100644 --- a/leo/package/src/root/manifest.rs +++ b/leo/package/src/root/manifest.rs @@ -195,7 +195,7 @@ impl TryFrom<&Path> for Manifest { if line.starts_with("remote") { let remote = line .split('=') // Split the line as 'remote' = '"{author}/{package_name}"' - .collect::>()[1]; // Fetch just '"{author}/{package_name}"' + .nth(1).unwrap(); // Fetch just '"{author}/{package_name}"' old_remote_format = Some(remote); // Retain the old remote format if the `manifest_refactor_remote` is not enabled @@ -238,8 +238,8 @@ impl TryFrom<&Path> for Manifest { // Fetch the author from the old remote. let remote_author = old_remote .split('/') // Split the old remote as '"{author}' and '{package_name}"' - .collect::>()[0] // Fetch just the '"{author}' - .replace(&['\"', ' '][..], ""); // Remove the quotes from the author string + .nth(0).unwrap() // Fetch just the '"{author}' + .replace(['\"', ' '], ""); // Remove the quotes from the author string // Construct the new remote section. let new_remote = format!( diff --git a/leo/span/src/span.rs b/leo/span/src/span.rs index 50f2d32080..be3db2dacd 100644 --- a/leo/span/src/span.rs +++ b/leo/span/src/span.rs @@ -90,7 +90,7 @@ impl Serialize for Span { } else { state.serialize_field("path", "")?; } - state.serialize_field("content", &self.content[..])?; + state.serialize_field("content", self.content.as_ref())?; state.end() } } diff --git a/leo/span/src/tendril_json.rs b/leo/span/src/tendril_json.rs index d9a7290f0a..9a788e56a3 100644 --- a/leo/span/src/tendril_json.rs +++ b/leo/span/src/tendril_json.rs @@ -21,7 +21,7 @@ use tendril::StrTendril; /// Serialization for the StrTendril type. pub fn serialize(tendril: &StrTendril, serializer: S) -> Result { - serializer.serialize_str(&tendril[..]) + serializer.serialize_str(&tendril) } /// Deserialization for the StrTendril type. diff --git a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out index ece3a8f208..8137ce1700 100644 --- a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out +++ b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out @@ -3,12 +3,15 @@ namespace: Token expectation: Fail outputs: - "Error [EPAR0370028]: Expected a closed char but found `'\\'`." + - "Error [EPAR0370032]: Could not lex the following content: `\\`." + - "Error [EPAR0370028]: Expected a closed char but found `'\\`." + - "Error [EPAR0370032]: Could not lex the following content: `\\n`." - "Error [EPAR0370028]: Expected a closed char but found `'a`." - "Error [EPAR0370024]: Expected more characters to lex but found none." - - "Error [EPAR0370036]: Expected a valid hex character but found `154`." - "Error [EPAR0370035]: Could not lex the following escaped hex due to being given more than two chars: `7`." - - "Error [EPAR0370028]: Expected a closed char but found `\\x7g`." - "Error [EPAR0370035]: Could not lex the following escaped hex due to being given more than two chars: `z`." + - "Error [EPAR0370036]: Expected a valid hex character but found `154`." + - "Error [EPAR0370028]: Expected a closed char but found `\\x7g`." - "Error [EPAR0370036]: Expected a valid hex character but found `128`." - "Error [EPAR0370036]: Expected a valid hex character but found `193`." - "Error [EPAR0370036]: Expected a valid hex character but found `194`." @@ -17,7 +20,6 @@ outputs: - "Error [EPAR0370036]: Expected a valid hex character but found `224`." - "Error [EPAR0370036]: Expected a valid hex character but found `159`." - "Error [EPAR0370028]: Expected a closed char but found `abcdefg`." - - "Error [EPAR0370033]: Could not lex the following escaped char due to being given more than one char: `t\\t`." - "Error [EPAR0370026]: Expected a valid escape character but found `a`." - "Error [EPAR0370026]: Expected a valid escape character but found `z`." - "Error [EPAR0370026]: Expected a valid escape character but found `A`." @@ -27,16 +29,20 @@ outputs: - "Error [EPAR0370026]: Expected a valid escape character but found `*`." - "Error [EPAR0370035]: Could not lex the following escaped hex due to being given more than two chars: ``." - "Error [EPAR0370026]: Expected a valid escape character but found `117`." - - "Error [EPAR0370038]: The escaped unicode char `bbbbb}\\u{aaaa` is not within valid length of [1, 6]." + - "Error [EPAR0370033]: Could not lex the following escaped char due to being given more than one char: `t\\t`." - "Error [EPAR0370026]: Expected a valid escape character but found `117`." - "Error [EPAR0370026]: Expected a valid escape character but found `117`." + - "Error [EPAR0370024]: Expected more characters to lex but found none." + - "Error [EPAR0370029]: Expected valid character but found `'🦀\\`." - "Error [EPAR0370026]: Expected a valid escape character but found `117`." + - "Error [EPAR0370024]: Expected more characters to lex but found none." + - "Error [EPAR0370024]: Expected more characters to lex but found none." + - "Error [EPAR0370037]: There was no closing `}` after a escaped unicode `\\u{af🦀`." - "Error [EPAR0370037]: There was no closing `}` after a escaped unicode `\\u{2764z`." - "Error [EPAR0370028]: Expected a closed char but found `\\u{276g}`." - "Error [EPAR0370026]: Expected a valid escape character but found `117`." - "Error [EPAR0370026]: Expected a valid escape character but found `117`." - "Error [EPAR0370026]: Expected a valid escape character but found `117`." - "Error [EPAR0370039]: The escaped unicode char `110000` is greater than 0x10FFFF." - - "Error [EPAR0370037]: There was no closing `}` after a escaped unicode `\\u{af🦀`." - - "Error [EPAR0370029]: Expected valid character but found `'🦀\\`." + - "Error [EPAR0370038]: The escaped unicode char `bbbbb}\\u{aaaa` is not within valid length of [1, 6]." - "Error [EPAR0370029]: Expected valid character but found `'😭😂`." diff --git a/tests/expectations/parser/parser/expression/literal/int_fail.leo.out b/tests/expectations/parser/parser/expression/literal/int_fail.leo.out new file mode 100644 index 0000000000..65e9f563b7 --- /dev/null +++ b/tests/expectations/parser/parser/expression/literal/int_fail.leo.out @@ -0,0 +1,7 @@ +--- +namespace: Token +expectation: Fail +outputs: + - "Error [EPAR0370040]: A hex number `0xb..` was provided but hex is not allowed." + - "Error [EPAR0370040]: A hex number `0xb..` was provided but hex is not allowed." + - "Error [EPAR0370040]: A hex number `0xb..` was provided but hex is not allowed." diff --git a/tests/expectations/parser/parser/expression/literal/string_fail.leo.out b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out index 7b8f745fad..17c704ae04 100644 --- a/tests/expectations/parser/parser/expression/literal/string_fail.leo.out +++ b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out @@ -10,3 +10,6 @@ outputs: - "Error [EPAR0370027]: Expected a closed string but found `\"\\u\"`." - "Error [EPAR0370036]: Expected a valid hex character but found `255`." - "Error [EPAR0370027]: Expected a closed string but found `\"\\x\"`." + - "Error [EPAR0370028]: Expected a closed char but found `\\x\" `." + - "Error [EPAR0370028]: Expected a closed char but found `\\x\" `." + - "Error [EPAR0370027]: Expected a closed string but found `\"\\u{af🦀\"`." diff --git a/tests/parser/expression/literal/char_fail.leo b/tests/parser/expression/literal/char_fail.leo index 12dfdec1f4..1b3c896f29 100644 --- a/tests/parser/expression/literal/char_fail.leo +++ b/tests/parser/expression/literal/char_fail.leo @@ -4,15 +4,18 @@ expectation: Fail */ '\' +\ +'\ +\n 'a '' -'\x9A' '\x7' -'\x7g' '\xz' +'\x9A' +'\x7g' '\x80' '\xc1' '\xc2' @@ -23,7 +26,6 @@ expectation: Fail 'abcdefg' -'\t\t' '\a' '\z' '\A' @@ -33,18 +35,22 @@ expectation: Fail '\*' '\x' '\u' +'\t\t' -'\u{bbbbb}\u{aaaa}' '\uz' '\u1' +'' // 0rphon '\u}; +'🦀\n' '\u123' +'' //0rphon '🦀1🦀' +'' //0rphon '\u6🦀}' +'\u{af🦀' '\u{2764z' '\u{276g}' +'\u9999999' '\u00000000' '\u01000000' -'\u9999999' '\u{110000}' -'\u{af🦀' -'🦀\n' +'\u{bbbbb}\u{aaaa}' '😭😂😘' diff --git a/tests/parser/expression/literal/int_fail.leo b/tests/parser/expression/literal/int_fail.leo new file mode 100644 index 0000000000..d4155676a9 --- /dev/null +++ b/tests/parser/expression/literal/int_fail.leo @@ -0,0 +1,8 @@ +/* +namespace: Token +expectation: Fail +*/ + +0xb +0xb // 0rphon 0x +0xbfield \ No newline at end of file diff --git a/tests/parser/expression/literal/string_fail.leo b/tests/parser/expression/literal/string_fail.leo index 41545172dd..c4a783461b 100644 --- a/tests/parser/expression/literal/string_fail.leo +++ b/tests/parser/expression/literal/string_fail.leo @@ -18,3 +18,9 @@ expectation: Fail "\xFF" "\x" + +"\x" // 0rphon "\u}" + +"\x" // 0rphon "\u6🦀}" + +"\u{af🦀"