diff --git a/input/src/expressions/string_expression.rs b/input/src/expressions/string_expression.rs
new file mode 100644
index 0000000000..0b479abce7
--- /dev/null
+++ b/input/src/expressions/string_expression.rs
@@ -0,0 +1,39 @@
+// Copyright (C) 2019-2021 Aleo Systems Inc.
+// This file is part of the Leo library.
+
+// The Leo library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// The Leo library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with the Leo library. If not, see .
+
+use crate::{ast::Rule, values::CharValue};
+
+use pest::Span;
+use pest_ast::FromPest;
+use std::fmt;
+
+#[derive(Clone, Debug, FromPest, PartialEq, Eq)]
+#[pest_ast(rule(Rule::expression_string))]
+pub struct StringExpression<'ast> {
+ pub chars: Vec>,
+ #[pest_ast(outer())]
+ pub span: Span<'ast>,
+}
+
+impl<'ast> fmt::Display for StringExpression<'ast> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ for character in self.chars.iter() {
+ write!(f, "{:?}", character)?;
+ }
+
+ Ok(())
+ }
+}
diff --git a/parser/src/errors/syntax.rs b/parser/src/errors/syntax.rs
index b1062527e9..98fbc6bad8 100644
--- a/parser/src/errors/syntax.rs
+++ b/parser/src/errors/syntax.rs
@@ -48,46 +48,6 @@ impl SyntaxError {
SyntaxError::Error(FormattedError::new_from_span(message, span))
}
- pub fn invalid_char(character: Vec, span: &Span) -> Self {
- Self::new_from_span(format!("Invalid character '{:?}'", character), span)
- }
-
- pub fn invalid_empty_char(span: &Span) -> Self {
- Self::new_from_span("Empty character '' is not valid".to_string(), span)
- }
-
- pub fn invalid_escaped_char(character: char, span: &Span) -> Self {
- Self::new_from_span(format!("Invalid escape character '\\{}'", character), span)
- }
-
- pub fn invalid_hex_char(character: Vec, span: &Span) -> Self {
- Self::new_from_span(format!("Invalid singe hex character '\\x{:?}'", character), span)
- }
-
- pub fn invalid_hex_single_char(character: char, span: &Span) -> Self {
- Self::new_from_span(
- format!(
- "Invalid singe hex character '\\x{}', expected '\\x0{}",
- character, character
- ),
- span,
- )
- }
-
- pub fn invalid_unicode_char(character: Vec, escaped: bool, span: &Span) -> Self {
- if escaped {
- return Self::new_from_span(
- format!("Invalid unicode escaped character '\\u{{{:?}}}'", character),
- span,
- );
- }
-
- Self::new_from_span(
- format!("Invalid unicode symbol character '\\u{{{:?}}}'", character),
- span,
- )
- }
-
pub fn invalid_import_list(span: &Span) -> Self {
Self::new_from_span("Cannot import empty list".to_string(), span)
}
diff --git a/parser/src/parser/expression.rs b/parser/src/parser/expression.rs
index f128349a7a..dc0e8aca01 100644
--- a/parser/src/parser/expression.rs
+++ b/parser/src/parser/expression.rs
@@ -14,7 +14,7 @@
// You should have received a copy of the GNU General Public License
// along with the Leo library. If not, see .
-use tendril::{format_tendril, StrTendril};
+use tendril::format_tendril;
use super::*;
@@ -643,120 +643,6 @@ impl ParserContext {
}
}
- ///
- /// Returns a character if it is a valid character that can be parsed.
- ///
- fn parse_char(&mut self, input_tendril: StrTendril, span: Span) -> SyntaxResult {
- if input_tendril.is_empty() {
- return Err(SyntaxError::invalid_empty_char(&span));
- }
-
- let input = input_tendril[..].as_bytes();
- let mut i = 0;
- let mut escaped = false;
- let mut hex = false;
- let mut unicode = false;
- let mut characters: Vec = vec![];
-
- while i < input.len() {
- if !escaped {
- if input[i] == b'{' {
- i += 1;
- characters.clear();
- continue;
- }
-
- if input[i] == b'}' {
- i += 1;
- continue;
- }
- } else {
- escaped = false;
- characters.clear();
-
- match input[i] {
- b'0' => characters.push(0),
- b't' => characters.push(9),
- b'n' => characters.push(10),
- b'r' => characters.push(13),
- b'\"' => characters.push(34),
- b'\'' => characters.push(39),
- b'\\' => characters.push(92),
- b'x' => {
- hex = true;
-
- i += 1;
- continue;
- }
- b'u' => {
- unicode = true;
- }
- _ => {
- return Err(SyntaxError::invalid_escaped_char(input[i] as char, &span));
- }
- }
-
- i += 1;
-
- continue;
- }
-
- if input[i] == b'\\' {
- escaped = true;
- }
-
- characters.push(input[i]);
- i += 1;
- }
-
- return match characters.len() {
- 1 | 2 | 3 | 4 | 5 | 6 if unicode => {
- if let Ok(string) = std::str::from_utf8(&characters[..]) {
- if let Ok(hex) = u32::from_str_radix(&string, 16) {
- if hex <= 0x10FFFF {
- if let Some(unicode_char) = std::char::from_u32(hex) {
- return Ok(Expression::Value(ValueExpression::Char(unicode_char, span)));
- }
- }
- }
- }
-
- Err(SyntaxError::invalid_unicode_char(characters, true, &span))
- }
- 1 => {
- if hex {
- return Err(SyntaxError::invalid_hex_single_char(characters[0] as char, &span));
- } else if escaped {
- return Err(SyntaxError::invalid_escaped_char(characters[0] as char, &span));
- }
-
- Ok(Expression::Value(ValueExpression::Char(characters[0] as char, span)))
- }
- 2 if hex => {
- if let Ok(string) = std::str::from_utf8(&characters[..]) {
- if let Ok(number) = u8::from_str_radix(&string, 16) {
- if number <= 127 {
- return Ok(Expression::Value(ValueExpression::Char(number as char, span)));
- }
- }
- }
-
- Err(SyntaxError::invalid_hex_char(characters, &span))
- }
- 3 | 4 => {
- // direct unicode symbol
- if let Ok(string) = std::str::from_utf8(&characters[..]) {
- if let Some(character) = string.chars().next() {
- return Ok(Expression::Value(ValueExpression::Char(character, span)));
- }
- }
-
- Err(SyntaxError::invalid_unicode_char(characters, false, &span))
- }
- _ => Err(SyntaxError::invalid_char(characters, &span)),
- };
- }
-
///
/// Returns an [`Expression`] AST node if the next token is a primary expression:
/// - Literals: field, group, unsigned integer, signed integer, boolean, address
@@ -803,7 +689,7 @@ impl ParserContext {
Token::True => Expression::Value(ValueExpression::Boolean("true".into(), span)),
Token::False => Expression::Value(ValueExpression::Boolean("false".into(), span)),
Token::AddressLit(value) => Expression::Value(ValueExpression::Address(value, span)),
- Token::CharLit(value) => self.parse_char(value, span)?,
+ Token::CharLit(value) => Expression::Value(ValueExpression::Char(value, span)),
Token::StringLiteral(value) => Expression::Value(ValueExpression::String(value, span)),
Token::LeftParen => self.parse_tuple_expression(&span)?,
Token::LeftSquare => self.parse_array_expression(&span)?,
diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs
index b8569c6673..b61c09ce51 100644
--- a/parser/src/tokenizer/lexer.rs
+++ b/parser/src/tokenizer/lexer.rs
@@ -62,106 +62,66 @@ fn eat_identifier(input_tendril: &StrTendril) -> Option {
impl Token {
///
- /// Eats String. Returns Token::StringLiteral with processed contents of the string.
+ /// Returns a new `Token::CharLit` if an character can be eaten, otherwise returns [`None`].
///
- fn eat_string(input_tendril: &StrTendril) -> (usize, Option) {
+ fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Option {
if input_tendril.is_empty() {
- return (0, None);
+ return None;
}
- let input = input_tendril[..].as_bytes();
- let mut collect: Vec = Vec::new();
- let mut iter = input.iter().enumerate().skip(1);
+ if escaped {
+ let string = input_tendril.to_string();
+ let escaped = &string[1..string.len()];
- while let Some((i, symbol)) = iter.next() {
- let symbol = *symbol;
-
- if symbol == b'`' {
- return (i + 1, Some(Token::StringLiteral(collect)));
+ if escaped.len() != 1 {
+ return None;
}
- // Process escapes.
- if symbol == b'\\' {
- if let Some((_, escaped)) = iter.next() {
- match escaped {
- b'0' => collect.push(0 as char),
- b't' => collect.push(9 as char),
- b'n' => collect.push(10 as char),
- b'r' => collect.push(13 as char),
- b'\"' => collect.push(34 as char),
- b'\'' => collect.push(39 as char),
- b'\\' => collect.push(92 as char),
- // \x0F - 2 HEX digits after \x
- b'x' => {
- // get first symbol
- if let Some((_, first_hex)) = iter.next() {
- // get second symbol
- if let Some((_, second_hex)) = iter.next() {
- if let Ok(string) = std::str::from_utf8(&[*first_hex, *second_hex]) {
- if let Ok(number) = u8::from_str_radix(&string, 16) {
- if number <= 127 {
- collect.push(number as char);
- continue;
- }
- }
- }
- }
- }
+ if let Some(character) = escaped.chars().next() {
+ return match character {
+ '0' => Some(Token::CharLit(0 as char)),
+ 't' => Some(Token::CharLit(9 as char)),
+ 'n' => Some(Token::CharLit(10 as char)),
+ 'r' => Some(Token::CharLit(13 as char)),
+ '\"' => Some(Token::CharLit(34 as char)),
+ '\'' => Some(Token::CharLit(39 as char)),
+ '\\' => Some(Token::CharLit(92 as char)),
+ _ => None,
+ };
+ } else {
+ return None;
+ }
+ }
- return (0, None);
- }
+ if hex {
+ let string = input_tendril.to_string();
+ let hex_string = &string[2..string.len()];
- // \u{1-6 hex digits}
- b'u' => {
- if let Some((start, open_brace)) = iter.next() {
- if *open_brace == b'{' {
- let mut characters: Vec = Vec::new();
+ if hex_string.len() != 2 {
+ return None;
+ }
- while let Some((end, symbol)) = iter.next() {
- if end > start + 7 {
- return (0, None);
- }
+ if let Ok(ascii_number) = u8::from_str_radix(&hex_string, 16) {
+ return Some(Token::CharLit(ascii_number as char));
+ }
+ }
- match *symbol {
- 0..=9 | b'a'..=b'f' | b'A'..=b'F' => characters.push(*symbol),
- b'}' => {
- if let Ok(unicode_string) = std::str::from_utf8(&characters[..]) {
- if let Ok(hex) = u32::from_str_radix(&unicode_string, 16) {
- if let Some(unicode_char) = std::char::from_u32(hex) {
- collect.push(unicode_char);
- break;
- }
- }
- }
+ if unicode {
+ let string = input_tendril.to_string();
+ let unicode_number = &string[3..string.len() - 1];
- return (0, None);
- }
- _ => {
- return (0, None);
- }
- }
- }
-
- continue;
- }
- }
-
- return (0, None);
- }
- _ => {
- return (0, None);
- }
- }
- continue;
+ if let Ok(hex) = u32::from_str_radix(&unicode_number, 16) {
+ if let Some(character) = std::char::from_u32(hex) {
+ return Some(Token::CharLit(character));
}
-
- return (0, None);
}
-
- collect.push(symbol as char);
}
- (0, None)
+ if let Some(character) = input_tendril.to_string().chars().next() {
+ return Some(Token::CharLit(character));
+ }
+
+ None
}
///
@@ -208,9 +168,6 @@ impl Token {
let input = input_tendril[..].as_bytes();
match input[0] {
x if x.is_ascii_whitespace() => return (1, None),
- b'`' => {
- return Self::eat_string(&input_tendril);
- }
b'"' => {
let mut i = 1;
let mut in_escape = false;
@@ -258,12 +215,30 @@ impl Token {
}
b'\'' => {
let mut i = 1;
+ let mut in_escape = false;
+ let mut escaped = false;
+ let mut hex = false;
+ let mut unicode = false;
let mut end = false;
while i < input.len() {
- if input[i] == b'\'' {
- end = true;
- break;
+ if !in_escape {
+ if input[i] == b'\'' {
+ end = true;
+ break;
+ } else if input[i] == b'\\' {
+ in_escape = true;
+ }
+ } else {
+ if input[i] == b'x' {
+ hex = true;
+ } else if input[i] == b'u' {
+ unicode = true;
+ } else {
+ escaped = true;
+ }
+
+ in_escape = false;
}
i += 1;
@@ -273,7 +248,13 @@ impl Token {
return (0, None);
}
- return (i + 1, Some(Token::CharLit(input_tendril.subtendril(1, (i - 1) as u32))));
+ let result = Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode);
+
+ if result.is_none() {
+ return (0, None);
+ }
+
+ return (i + 1, result);
}
x if x.is_ascii_digit() => {
return Self::eat_integer(&input_tendril);
diff --git a/parser/src/tokenizer/token.rs b/parser/src/tokenizer/token.rs
index b817e15723..1cab255843 100644
--- a/parser/src/tokenizer/token.rs
+++ b/parser/src/tokenizer/token.rs
@@ -48,7 +48,7 @@ pub enum Token {
True,
False,
AddressLit(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
- CharLit(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
+ CharLit(char),
At,
diff --git a/tests/expectations/compiler/compiler/char/invalid_char.leo.out b/tests/expectations/compiler/compiler/char/invalid_char.leo.out
index 8613ddc150..c529e663ae 100644
--- a/tests/expectations/compiler/compiler/char/invalid_char.leo.out
+++ b/tests/expectations/compiler/compiler/char/invalid_char.leo.out
@@ -2,4 +2,4 @@
namespace: Compile
expectation: Fail
outputs:
- - " --> compiler-test:4:23\n |\n 4 | const not_valid = '';\n | ^^\n |\n = Empty character '' is not valid"
+ - " --> compiler-test:4:23\n |\n 4 | const not_valid = '';\n | ^\n |\n = unexpected token: '''"
diff --git a/tests/expectations/parser/parser/expression/literal/char.leo.out b/tests/expectations/parser/parser/expression/literal/char.leo.out
index 0ce29e5695..d6e0fd1fc1 100644
--- a/tests/expectations/parser/parser/expression/literal/char.leo.out
+++ b/tests/expectations/parser/parser/expression/literal/char.leo.out
@@ -4,14 +4,12 @@ expectation: Pass
outputs:
- "'a' @ 1:1-4"
- "'Z' @ 1:1-4"
- - "'\\\"' @ 1:1-5"
- - "'\\t' @ 1:1-5"
- - "'\\r' @ 1:1-5"
- - "'\\0' @ 1:1-5"
- - "'\\u{2764}' @ 1:1-11"
- - "'\\u{306E}' @ 1:1-11"
- - "'\\u{10FFFF}' @ 1:1-13"
+ - "'\"' @ 1:1-5"
+ - "'' @ 1:1-5"
+ - "'' @ 1:1-5"
+ - "'\u0000' @ 1:1-5"
+ - "'❤' @ 1:1-11"
+ - "'の' @ 1:1-11"
- "'❤' @ 1:1-6"
- "'の' @ 1:1-6"
- - "'\\x0F' @ 1:1-7"
- - "'\\x2A' @ 1:1-7"
+ - "'*' @ 1:1-7"
diff --git a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out
index 4c71c693af..fb3cc4bdcc 100644
--- a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out
+++ b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out
@@ -2,9 +2,7 @@
namespace: ParseExpression
expectation: Fail
outputs:
- - " --> test:1:1\n |\n 1 | '\\'\n | ^^^\n |\n = Invalid escape character '\\\\'"
+ - " --> test:1:1\n |\n 1 | '\\'\n | ^\n |\n = unexpected token: '''"
- " --> test:1:1\n |\n 1 | 'a\n | ^\n |\n = unexpected token: '''"
- - " --> test:1:1\n |\n 1 | ''\n | ^^\n |\n = Empty character '' is not valid"
- - " --> test:1:1\n |\n 1 | '\\x9'\n | ^^^^^\n |\n = Invalid singe hex character '\\x9', expected '\\x09"
- - " --> test:1:1\n |\n 1 | '\\x80'\n | ^^^^^^\n |\n = Invalid singe hex character '\\x[56, 48]'"
- - " --> test:1:1\n |\n 1 | '\\u{9999999}'\n | ^^^^^^^^^^^^^\n |\n = Invalid character '[57, 57, 57, 57, 57, 57, 57]'"
+ - " --> test:1:1\n |\n 1 | ''\n | ^\n |\n = unexpected token: '''"
+ - " --> test:1:1\n |\n 1 | '\\x9'\n | ^\n |\n = unexpected token: '''"
diff --git a/tests/expectations/parser/parser/expression/literal/char_parse.leo.out b/tests/expectations/parser/parser/expression/literal/char_parse.leo.out
index 5ed19fa02e..3b080cd0a6 100644
--- a/tests/expectations/parser/parser/expression/literal/char_parse.leo.out
+++ b/tests/expectations/parser/parser/expression/literal/char_parse.leo.out
@@ -74,15 +74,6 @@ outputs:
col_stop: 11
path: test
content: "'\\u{306E}'"
- - Value:
- Char:
- -
- - line_start: 1
- line_stop: 1
- col_start: 1
- col_stop: 13
- path: test
- content: "'\\u{10FFFF}'"
- Value:
Char:
- ❤
@@ -101,15 +92,6 @@ outputs:
col_stop: 6
path: test
content: "'の'"
- - Value:
- Char:
- - "\u000f"
- - line_start: 1
- line_stop: 1
- col_start: 1
- col_stop: 7
- path: test
- content: "'\\x0F'"
- Value:
Char:
- "*"
diff --git a/tests/parser/expression/literal/char.leo b/tests/parser/expression/literal/char.leo
index 71babf34f3..5ea47f7dbf 100644
--- a/tests/parser/expression/literal/char.leo
+++ b/tests/parser/expression/literal/char.leo
@@ -11,8 +11,6 @@ expectation: Pass
'\0'
'\u{2764}'
'\u{306E}'
-'\u{10FFFF}'
'❤'
'の'
-'\x0F'
'\x2A'
\ No newline at end of file
diff --git a/tests/parser/expression/literal/char_fail.leo b/tests/parser/expression/literal/char_fail.leo
index 5991d04148..565c6f3922 100644
--- a/tests/parser/expression/literal/char_fail.leo
+++ b/tests/parser/expression/literal/char_fail.leo
@@ -9,8 +9,4 @@ expectation: Fail
''
-'\x9'
-
-'\x80'
-
-'\u{9999999}'
\ No newline at end of file
+'\x9'
\ No newline at end of file
diff --git a/tests/parser/expression/literal/char_parse.leo b/tests/parser/expression/literal/char_parse.leo
index 3c22c813ce..515f6b10f3 100644
--- a/tests/parser/expression/literal/char_parse.leo
+++ b/tests/parser/expression/literal/char_parse.leo
@@ -11,8 +11,6 @@ expectation: Pass
'\0'
'\u{2764}'
'\u{306E}'
-'\u{10FFFF}'
'❤'
'の'
-'\x0F'
'\x2A'
\ No newline at end of file