From cce0f597dea4d432e5915c988286d11bf6b97915 Mon Sep 17 00:00:00 2001
From: gluax <jonathan.t.pavlik@gmail.com>
Date: Fri, 21 May 2021 12:33:39 -0400
Subject: [PATCH] Revert "refactor for better errors, code shouldn't have been
 in the lexer"

This reverts commit 1e1486f4efe7b34001edf09ab11c0fc921433c0a.
---
 parser/src/errors/syntax.rs                   |  40 -----
 parser/src/parser/expression.rs               | 118 +-------------
 parser/src/tokenizer/lexer.rs                 | 144 +++++++++++++++---
 parser/src/tokenizer/token.rs                 |   2 +-
 .../compiler/char/invalid_char.leo.out        |   2 +-
 .../parser/expression/literal/char.leo.out    |  16 +-
 .../expression/literal/char_fail.leo.out      |   8 +-
 .../expression/literal/char_parse.leo.out     |  18 ---
 tests/parser/expression/literal/char.leo      |   2 -
 tests/parser/expression/literal/char_fail.leo |   6 +-
 .../parser/expression/literal/char_parse.leo  |   2 -
 11 files changed, 142 insertions(+), 216 deletions(-)
diff --git a/parser/src/errors/syntax.rs b/parser/src/errors/syntax.rs
index b1062527e9..98fbc6bad8 100644
--- a/parser/src/errors/syntax.rs
+++ b/parser/src/errors/syntax.rs
@@ -48,46 +48,6 @@ impl SyntaxError {
         SyntaxError::Error(FormattedError::new_from_span(message, span))
     }
 
-    pub fn invalid_char(character: Vec<u8>, span: &Span) -> Self {
-        Self::new_from_span(format!("Invalid character '{:?}'", character), span)
-    }
-
-    pub fn invalid_empty_char(span: &Span) -> Self {
-        Self::new_from_span("Empty character '' is not valid".to_string(), span)
-    }
-
-    pub fn invalid_escaped_char(character: char, span: &Span) -> Self {
-        Self::new_from_span(format!("Invalid escape character '\\{}'", character), span)
-    }
-
-    pub fn invalid_hex_char(character: Vec<u8>, span: &Span) -> Self {
-        Self::new_from_span(format!("Invalid singe hex character '\\x{:?}'", character), span)
-    }
-
-    pub fn invalid_hex_single_char(character: char, span: &Span) -> Self {
-        Self::new_from_span(
-            format!(
-                "Invalid singe hex character '\\x{}', expected '\\x0{}",
-                character, character
-            ),
-            span,
-        )
-    }
-
-    pub fn invalid_unicode_char(character: Vec<u8>, escaped: bool, span: &Span) -> Self {
-        if escaped {
-            return Self::new_from_span(
-                format!("Invalid unicode escaped character '\\u{{{:?}}}'", character),
-                span,
-            );
-        }
-
-        Self::new_from_span(
-            format!("Invalid unicode symbol character '\\u{{{:?}}}'", character),
-            span,
-        )
-    }
-
     pub fn invalid_import_list(span: &Span) -> Self {
         Self::new_from_span("Cannot import empty list".to_string(), span)
     }
diff --git a/parser/src/parser/expression.rs b/parser/src/parser/expression.rs
index beeec9541b..40ca52e2bd 100644
--- a/parser/src/parser/expression.rs
+++ b/parser/src/parser/expression.rs
@@ -14,7 +14,7 @@
 // You should have received a copy of the GNU General Public License
 // along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
 
-use tendril::{format_tendril, StrTendril};
+use tendril::format_tendril;
 
 use super::*;
 
@@ -643,120 +643,6 @@ impl ParserContext {
         }
     }
 
-    ///
-    /// Returns a character if it is a valid character that can be parsed.
-    ///
-    fn parse_char(&mut self, input_tendril: StrTendril, span: Span) -> SyntaxResult<Expression> {
-        if input_tendril.is_empty() {
-            return Err(SyntaxError::invalid_empty_char(&span));
-        }
-
-        let input = input_tendril[..].as_bytes();
-        let mut i = 0;
-        let mut escaped = false;
-        let mut hex = false;
-        let mut unicode = false;
-        let mut characters: Vec<u8> = vec![];
-
-        while i < input.len() {
-            if !escaped {
-                if input[i] == b'{' {
-                    i += 1;
-                    characters.clear();
-                    continue;
-                }
-
-                if input[i] == b'}' {
-                    i += 1;
-                    continue;
-                }
-            } else {
-                escaped = false;
-                characters.clear();
-
-                match input[i] {
-                    b'0' => characters.push(0),
-                    b't' => characters.push(9),
-                    b'n' => characters.push(10),
-                    b'r' => characters.push(13),
-                    b'\"' => characters.push(34),
-                    b'\'' => characters.push(39),
-                    b'\\' => characters.push(92),
-                    b'x' => {
-                        hex = true;
-
-                        i += 1;
-                        continue;
-                    }
-                    b'u' => {
-                        unicode = true;
-                    }
-                    _ => {
-                        return Err(SyntaxError::invalid_escaped_char(input[i] as char, &span));
-                    }
-                }
-
-                i += 1;
-
-                continue;
-            }
-
-            if input[i] == b'\\' {
-                escaped = true;
-            }
-
-            characters.push(input[i]);
-            i += 1;
-        }
-
-        return match characters.len() {
-            1 | 2 | 3 | 4 | 5 | 6 if unicode => {
-                if let Ok(string) = std::str::from_utf8(&characters[..]) {
-                    if let Ok(hex) = u32::from_str_radix(&string, 16) {
-                        if hex <= 0x10FFFF {
-                            if let Some(unicode_char) = std::char::from_u32(hex) {
-                                return Ok(Expression::Value(ValueExpression::Char(unicode_char, span)));
-                            }
-                        }
-                    }
-                }
-
-                Err(SyntaxError::invalid_unicode_char(characters, true, &span))
-            }
-            1 => {
-                if hex {
-                    return Err(SyntaxError::invalid_hex_single_char(characters[0] as char, &span));
-                } else if escaped {
-                    return Err(SyntaxError::invalid_escaped_char(characters[0] as char, &span));
-                }
-
-                Ok(Expression::Value(ValueExpression::Char(characters[0] as char, span)))
-            }
-            2 if hex => {
-                if let Ok(string) = std::str::from_utf8(&characters[..]) {
-                    if let Ok(number) = u8::from_str_radix(&string, 16) {
-                        if number <= 127 {
-                            return Ok(Expression::Value(ValueExpression::Char(number as char, span)));
-                        }
-                    }
-                }
-
-                Err(SyntaxError::invalid_hex_char(characters, &span))
-            }
-            3 | 4 => {
-                // direct unicode symbol
-                if let Ok(string) = std::str::from_utf8(&characters[..]) {
-                    if let Some(character) = string.chars().next() {
-                        return Ok(Expression::Value(ValueExpression::Char(character, span)));
-                    }
-                }
-
-                Err(SyntaxError::invalid_unicode_char(characters, false, &span))
-            }
-            _ => Err(SyntaxError::invalid_char(characters, &span)),
-        };
-    }
-
     ///
     /// Returns an [`Expression`] AST node if the next token is a primary expression:
     /// - Literals: field, group, unsigned integer, signed integer, boolean, address
@@ -803,7 +689,7 @@ impl ParserContext {
             Token::True => Expression::Value(ValueExpression::Boolean("true".into(), span)),
             Token::False => Expression::Value(ValueExpression::Boolean("false".into(), span)),
             Token::AddressLit(value) => Expression::Value(ValueExpression::Address(value, span)),
-            Token::CharLit(value) => self.parse_char(value, span)?,
+            Token::CharLit(value) => Expression::Value(ValueExpression::Char(value, span)),
             Token::LeftParen => self.parse_tuple_expression(&span)?,
             Token::LeftSquare => self.parse_array_expression(&span)?,
             Token::Ident(name) => {
diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs
index 178a4432ad..2a2850a71c 100644
--- a/parser/src/tokenizer/lexer.rs
+++ b/parser/src/tokenizer/lexer.rs
@@ -61,6 +61,132 @@ fn eat_identifier(input_tendril: &StrTendril) -> Option<StrTendril> {
 }
 
 impl Token {
+    ///
+    /// Returns a new `StrTendril` string if an character can be eaten, otherwise returns [`None`].
+    ///
+    fn eat_char(input_tendril: &StrTendril) -> (usize, Option<Token>) {
+        // Probably better to move this logic to a parse_char.
+        // Would give better errors, and isolates logic from lexer.
+        // Lexer can just return content between single quotes.
+        if input_tendril.is_empty() {
+            return (0, None);
+        }
+
+        let input = input_tendril[..].as_bytes();
+        let mut i = 1;
+        let mut escaped = false;
+        let mut hex = false;
+        let mut unicode = false;
+        let mut last = false;
+        let mut characters: Vec<u8> = vec![];
+
+        while i < input.len() {
+            if !escaped {
+                if input[i] == b'\'' {
+                    last = true;
+                    i += 1;
+                    break;
+                }
+
+                if input[i] == b'{' {
+                    i += 1;
+                    characters.clear();
+                    continue;
+                }
+
+                if input[i] == b'}' {
+                    i += 1;
+                    continue;
+                }
+            } else {
+                escaped = false;
+                characters.clear();
+
+                match input[i] {
+                    b'0' => characters.push(0),
+                    b't' => characters.push(9),
+                    b'n' => characters.push(10),
+                    b'r' => characters.push(13),
+                    b'\"' => characters.push(34),
+                    b'\'' => characters.push(39),
+                    b'\\' => characters.push(92),
+                    b'x' => {
+                        hex = true;
+
+                        i += 1;
+                        continue;
+                    }
+                    b'u' => {
+                        unicode = true;
+                    }
+                    _ => {
+                        return (0, None);
+                    }
+                }
+
+                i += 1;
+
+                continue;
+            }
+
+            if input[i] == b'\\' {
+                escaped = true;
+            }
+
+            characters.push(input[i]);
+            i += 1;
+        }
+
+        if !last {
+            return (0, None);
+        }
+
+        return match characters.len() {
+            1 | 2 | 3 | 4 | 5 if unicode => {
+                if let Ok(string) = std::str::from_utf8(&characters[..]) {
+                    if let Ok(hex) = u32::from_str_radix(&string, 16) {
+                        if hex <= 0x10FFFF {
+                            if let Some(unicode_char) = std::char::from_u32(hex) {
+                                return (i, Some(Token::CharLit(unicode_char)));
+                            }
+                        }
+                    }
+                }
+
+                (0, None)
+            }
+            1 => {
+                if hex {
+                    return (0, None);
+                }
+
+                (i, Some(Token::CharLit(characters[0] as char)))
+            }
+            2 if hex => {
+                if let Ok(string) = std::str::from_utf8(&characters[..]) {
+                    if let Ok(number) = u8::from_str_radix(&string, 16) {
+                        if number <= 127 {
+                            return (i, Some(Token::CharLit(number as char)));
+                        }
+                    }
+                }
+
+                (0, None)
+            }
+            3 | 4 => {
+                // direct unicode symbol
+                if let Ok(string) = std::str::from_utf8(&characters[..]) {
+                    if let Some(character) = string.chars().next() {
+                        return (i, Some(Token::CharLit(character)));
+                    }
+                }
+
+                (0, None)
+            }
+            _ => (0, None),
+        };
+    }
+
     ///
     /// Returns a tuple: [(integer length, integer token)] if an integer can be eaten, otherwise returns [`None`].
     /// An integer can be eaten if its bytes are at the front of the given `input_tendril` string.
@@ -151,23 +277,7 @@ impl Token {
                 return (i + 1, Some(Token::FormatString(segments)));
             }
             b'\'' => {
-                let mut i = 1;
-                let mut end = false;
-
-                while i < input.len() {
-                    if input[i] == b'\'' {
-                        end = true;
-                        break;
-                    }
-
-                    i += 1;
-                }
-
-                if !end {
-                    return (0, None);
-                }
-
-                return (i + 1, Some(Token::CharLit(input_tendril.subtendril(1, (i - 1) as u32))));
+                return Self::eat_char(&input_tendril);
             }
             x if x.is_ascii_digit() => {
                 return Self::eat_integer(&input_tendril);
diff --git a/parser/src/tokenizer/token.rs b/parser/src/tokenizer/token.rs
index 5863041ae9..d7da426589 100644
--- a/parser/src/tokenizer/token.rs
+++ b/parser/src/tokenizer/token.rs
@@ -47,7 +47,7 @@ pub enum Token {
     True,
     False,
     AddressLit(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
-    CharLit(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
+    CharLit(char),
 
     At,
 
diff --git a/tests/expectations/compiler/compiler/char/invalid_char.leo.out b/tests/expectations/compiler/compiler/char/invalid_char.leo.out
index 8613ddc150..c529e663ae 100644
--- a/tests/expectations/compiler/compiler/char/invalid_char.leo.out
+++ b/tests/expectations/compiler/compiler/char/invalid_char.leo.out
@@ -2,4 +2,4 @@
 namespace: Compile
 expectation: Fail
 outputs:
-  - "    --> compiler-test:4:23\n     |\n   4 |     const not_valid = '';\n     |                       ^^\n     |\n     = Empty character '' is not valid"
+  - "    --> compiler-test:4:23\n     |\n   4 |     const not_valid = '';\n     |                       ^\n     |\n     = unexpected token: '''"
diff --git a/tests/expectations/parser/parser/expression/literal/char.leo.out b/tests/expectations/parser/parser/expression/literal/char.leo.out
index 0ce29e5695..d6e0fd1fc1 100644
--- a/tests/expectations/parser/parser/expression/literal/char.leo.out
+++ b/tests/expectations/parser/parser/expression/literal/char.leo.out
@@ -4,14 +4,12 @@ expectation: Pass
 outputs:
   - "'a' @ 1:1-4"
   - "'Z' @ 1:1-4"
-  - "'\\\"' @ 1:1-5"
-  - "'\\t' @ 1:1-5"
-  - "'\\r' @ 1:1-5"
-  - "'\\0' @ 1:1-5"
-  - "'\\u{2764}' @ 1:1-11"
-  - "'\\u{306E}' @ 1:1-11"
-  - "'\\u{10FFFF}' @ 1:1-13"
+  - "'\"' @ 1:1-5"
+  - "'' @ 1:1-5"
+  - "'' @ 1:1-5"
+  - "'\u0000' @ 1:1-5"
+  - "'❤' @ 1:1-11"
+  - "'の' @ 1:1-11"
   - "'❤' @ 1:1-6"
   - "'の' @ 1:1-6"
-  - "'\\x0F' @ 1:1-7"
-  - "'\\x2A' @ 1:1-7"
+  - "'*' @ 1:1-7"
diff --git a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out
index 4c71c693af..fb3cc4bdcc 100644
--- a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out
+++ b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out
@@ -2,9 +2,7 @@
 namespace: ParseExpression
 expectation: Fail
 outputs:
-  - "    --> test:1:1\n     |\n   1 | '\\'\n     | ^^^\n     |\n     = Invalid escape character '\\\\'"
+  - "    --> test:1:1\n     |\n   1 | '\\'\n     | ^\n     |\n     = unexpected token: '''"
   - "    --> test:1:1\n     |\n   1 | 'a\n     | ^\n     |\n     = unexpected token: '''"
-  - "    --> test:1:1\n     |\n   1 | ''\n     | ^^\n     |\n     = Empty character '' is not valid"
-  - "    --> test:1:1\n     |\n   1 | '\\x9'\n     | ^^^^^\n     |\n     = Invalid singe hex character '\\x9', expected '\\x09"
-  - "    --> test:1:1\n     |\n   1 | '\\x80'\n     | ^^^^^^\n     |\n     = Invalid singe hex character '\\x[56, 48]'"
-  - "    --> test:1:1\n     |\n   1 | '\\u{9999999}'\n     | ^^^^^^^^^^^^^\n     |\n     = Invalid character '[57, 57, 57, 57, 57, 57, 57]'"
+  - "    --> test:1:1\n     |\n   1 | ''\n     | ^\n     |\n     = unexpected token: '''"
+  - "    --> test:1:1\n     |\n   1 | '\\x9'\n     | ^\n     |\n     = unexpected token: '''"
diff --git a/tests/expectations/parser/parser/expression/literal/char_parse.leo.out b/tests/expectations/parser/parser/expression/literal/char_parse.leo.out
index 5ed19fa02e..3b080cd0a6 100644
--- a/tests/expectations/parser/parser/expression/literal/char_parse.leo.out
+++ b/tests/expectations/parser/parser/expression/literal/char_parse.leo.out
@@ -74,15 +74,6 @@ outputs:
           col_stop: 11
           path: test
           content: "'\\u{306E}'"
-  - Value:
-      Char:
-        - 􏿿
-        - line_start: 1
-          line_stop: 1
-          col_start: 1
-          col_stop: 13
-          path: test
-          content: "'\\u{10FFFF}'"
   - Value:
       Char:
         - ❤
@@ -101,15 +92,6 @@ outputs:
           col_stop: 6
           path: test
           content: "'の'"
-  - Value:
-      Char:
-        - "\u000f"
-        - line_start: 1
-          line_stop: 1
-          col_start: 1
-          col_stop: 7
-          path: test
-          content: "'\\x0F'"
   - Value:
       Char:
         - "*"
diff --git a/tests/parser/expression/literal/char.leo b/tests/parser/expression/literal/char.leo
index 71babf34f3..5ea47f7dbf 100644
--- a/tests/parser/expression/literal/char.leo
+++ b/tests/parser/expression/literal/char.leo
@@ -11,8 +11,6 @@ expectation: Pass
 '\0'
 '\u{2764}'
 '\u{306E}'
-'\u{10FFFF}'
 '❤'
 'の'
-'\x0F'
 '\x2A'
\ No newline at end of file
diff --git a/tests/parser/expression/literal/char_fail.leo b/tests/parser/expression/literal/char_fail.leo
index 5991d04148..565c6f3922 100644
--- a/tests/parser/expression/literal/char_fail.leo
+++ b/tests/parser/expression/literal/char_fail.leo
@@ -9,8 +9,4 @@ expectation: Fail
 
 ''
 
-'\x9'
-
-'\x80'
-
-'\u{9999999}'
\ No newline at end of file
+'\x9'
\ No newline at end of file
diff --git a/tests/parser/expression/literal/char_parse.leo b/tests/parser/expression/literal/char_parse.leo
index 3c22c813ce..515f6b10f3 100644
--- a/tests/parser/expression/literal/char_parse.leo
+++ b/tests/parser/expression/literal/char_parse.leo
@@ -11,8 +11,6 @@ expectation: Pass
 '\0'
 '\u{2764}'
 '\u{306E}'
-'\u{10FFFF}'
 '❤'
 'の'
-'\x0F'
 '\x2A'
\ No newline at end of file