mirror of
https://github.com/AleoHQ/leo.git
synced 2024-12-25 10:32:13 +03:00
merge remote
This commit is contained in:
commit
329b330c15
39
input/src/expressions/string_expression.rs
Normal file
39
input/src/expressions/string_expression.rs
Normal file
@ -0,0 +1,39 @@
|
||||
// Copyright (C) 2019-2021 Aleo Systems Inc.
|
||||
// This file is part of the Leo library.
|
||||
|
||||
// The Leo library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// The Leo library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
use crate::{ast::Rule, values::CharValue};
|
||||
|
||||
use pest::Span;
|
||||
use pest_ast::FromPest;
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Clone, Debug, FromPest, PartialEq, Eq)]
|
||||
#[pest_ast(rule(Rule::expression_string))]
|
||||
pub struct StringExpression<'ast> {
|
||||
pub chars: Vec<CharValue<'ast>>,
|
||||
#[pest_ast(outer())]
|
||||
pub span: Span<'ast>,
|
||||
}
|
||||
|
||||
impl<'ast> fmt::Display for StringExpression<'ast> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
for character in self.chars.iter() {
|
||||
write!(f, "{:?}", character)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -48,46 +48,6 @@ impl SyntaxError {
|
||||
SyntaxError::Error(FormattedError::new_from_span(message, span))
|
||||
}
|
||||
|
||||
pub fn invalid_char(character: Vec<u8>, span: &Span) -> Self {
|
||||
Self::new_from_span(format!("Invalid character '{:?}'", character), span)
|
||||
}
|
||||
|
||||
pub fn invalid_empty_char(span: &Span) -> Self {
|
||||
Self::new_from_span("Empty character '' is not valid".to_string(), span)
|
||||
}
|
||||
|
||||
pub fn invalid_escaped_char(character: char, span: &Span) -> Self {
|
||||
Self::new_from_span(format!("Invalid escape character '\\{}'", character), span)
|
||||
}
|
||||
|
||||
pub fn invalid_hex_char(character: Vec<u8>, span: &Span) -> Self {
|
||||
Self::new_from_span(format!("Invalid singe hex character '\\x{:?}'", character), span)
|
||||
}
|
||||
|
||||
pub fn invalid_hex_single_char(character: char, span: &Span) -> Self {
|
||||
Self::new_from_span(
|
||||
format!(
|
||||
"Invalid singe hex character '\\x{}', expected '\\x0{}",
|
||||
character, character
|
||||
),
|
||||
span,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn invalid_unicode_char(character: Vec<u8>, escaped: bool, span: &Span) -> Self {
|
||||
if escaped {
|
||||
return Self::new_from_span(
|
||||
format!("Invalid unicode escaped character '\\u{{{:?}}}'", character),
|
||||
span,
|
||||
);
|
||||
}
|
||||
|
||||
Self::new_from_span(
|
||||
format!("Invalid unicode symbol character '\\u{{{:?}}}'", character),
|
||||
span,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn invalid_import_list(span: &Span) -> Self {
|
||||
Self::new_from_span("Cannot import empty list".to_string(), span)
|
||||
}
|
||||
|
@ -14,7 +14,7 @@
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
use tendril::{format_tendril, StrTendril};
|
||||
use tendril::format_tendril;
|
||||
|
||||
use super::*;
|
||||
|
||||
@ -643,120 +643,6 @@ impl ParserContext {
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Returns a character if it is a valid character that can be parsed.
|
||||
///
|
||||
fn parse_char(&mut self, input_tendril: StrTendril, span: Span) -> SyntaxResult<Expression> {
|
||||
if input_tendril.is_empty() {
|
||||
return Err(SyntaxError::invalid_empty_char(&span));
|
||||
}
|
||||
|
||||
let input = input_tendril[..].as_bytes();
|
||||
let mut i = 0;
|
||||
let mut escaped = false;
|
||||
let mut hex = false;
|
||||
let mut unicode = false;
|
||||
let mut characters: Vec<u8> = vec![];
|
||||
|
||||
while i < input.len() {
|
||||
if !escaped {
|
||||
if input[i] == b'{' {
|
||||
i += 1;
|
||||
characters.clear();
|
||||
continue;
|
||||
}
|
||||
|
||||
if input[i] == b'}' {
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
escaped = false;
|
||||
characters.clear();
|
||||
|
||||
match input[i] {
|
||||
b'0' => characters.push(0),
|
||||
b't' => characters.push(9),
|
||||
b'n' => characters.push(10),
|
||||
b'r' => characters.push(13),
|
||||
b'\"' => characters.push(34),
|
||||
b'\'' => characters.push(39),
|
||||
b'\\' => characters.push(92),
|
||||
b'x' => {
|
||||
hex = true;
|
||||
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
b'u' => {
|
||||
unicode = true;
|
||||
}
|
||||
_ => {
|
||||
return Err(SyntaxError::invalid_escaped_char(input[i] as char, &span));
|
||||
}
|
||||
}
|
||||
|
||||
i += 1;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if input[i] == b'\\' {
|
||||
escaped = true;
|
||||
}
|
||||
|
||||
characters.push(input[i]);
|
||||
i += 1;
|
||||
}
|
||||
|
||||
return match characters.len() {
|
||||
1 | 2 | 3 | 4 | 5 | 6 if unicode => {
|
||||
if let Ok(string) = std::str::from_utf8(&characters[..]) {
|
||||
if let Ok(hex) = u32::from_str_radix(&string, 16) {
|
||||
if hex <= 0x10FFFF {
|
||||
if let Some(unicode_char) = std::char::from_u32(hex) {
|
||||
return Ok(Expression::Value(ValueExpression::Char(unicode_char, span)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(SyntaxError::invalid_unicode_char(characters, true, &span))
|
||||
}
|
||||
1 => {
|
||||
if hex {
|
||||
return Err(SyntaxError::invalid_hex_single_char(characters[0] as char, &span));
|
||||
} else if escaped {
|
||||
return Err(SyntaxError::invalid_escaped_char(characters[0] as char, &span));
|
||||
}
|
||||
|
||||
Ok(Expression::Value(ValueExpression::Char(characters[0] as char, span)))
|
||||
}
|
||||
2 if hex => {
|
||||
if let Ok(string) = std::str::from_utf8(&characters[..]) {
|
||||
if let Ok(number) = u8::from_str_radix(&string, 16) {
|
||||
if number <= 127 {
|
||||
return Ok(Expression::Value(ValueExpression::Char(number as char, span)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(SyntaxError::invalid_hex_char(characters, &span))
|
||||
}
|
||||
3 | 4 => {
|
||||
// direct unicode symbol
|
||||
if let Ok(string) = std::str::from_utf8(&characters[..]) {
|
||||
if let Some(character) = string.chars().next() {
|
||||
return Ok(Expression::Value(ValueExpression::Char(character, span)));
|
||||
}
|
||||
}
|
||||
|
||||
Err(SyntaxError::invalid_unicode_char(characters, false, &span))
|
||||
}
|
||||
_ => Err(SyntaxError::invalid_char(characters, &span)),
|
||||
};
|
||||
}
|
||||
|
||||
///
|
||||
/// Returns an [`Expression`] AST node if the next token is a primary expression:
|
||||
/// - Literals: field, group, unsigned integer, signed integer, boolean, address
|
||||
@ -803,7 +689,7 @@ impl ParserContext {
|
||||
Token::True => Expression::Value(ValueExpression::Boolean("true".into(), span)),
|
||||
Token::False => Expression::Value(ValueExpression::Boolean("false".into(), span)),
|
||||
Token::AddressLit(value) => Expression::Value(ValueExpression::Address(value, span)),
|
||||
Token::CharLit(value) => self.parse_char(value, span)?,
|
||||
Token::CharLit(value) => Expression::Value(ValueExpression::Char(value, span)),
|
||||
Token::StringLiteral(value) => Expression::Value(ValueExpression::String(value, span)),
|
||||
Token::LeftParen => self.parse_tuple_expression(&span)?,
|
||||
Token::LeftSquare => self.parse_array_expression(&span)?,
|
||||
|
@ -62,106 +62,66 @@ fn eat_identifier(input_tendril: &StrTendril) -> Option<StrTendril> {
|
||||
|
||||
impl Token {
|
||||
///
|
||||
/// Eats String. Returns Token::StringLiteral with processed contents of the string.
|
||||
/// Returns a new `Token::CharLit` if an character can be eaten, otherwise returns [`None`].
|
||||
///
|
||||
fn eat_string(input_tendril: &StrTendril) -> (usize, Option<Token>) {
|
||||
fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Option<Token> {
|
||||
if input_tendril.is_empty() {
|
||||
return (0, None);
|
||||
return None;
|
||||
}
|
||||
|
||||
let input = input_tendril[..].as_bytes();
|
||||
let mut collect: Vec<char> = Vec::new();
|
||||
let mut iter = input.iter().enumerate().skip(1);
|
||||
if escaped {
|
||||
let string = input_tendril.to_string();
|
||||
let escaped = &string[1..string.len()];
|
||||
|
||||
while let Some((i, symbol)) = iter.next() {
|
||||
let symbol = *symbol;
|
||||
|
||||
if symbol == b'`' {
|
||||
return (i + 1, Some(Token::StringLiteral(collect)));
|
||||
if escaped.len() != 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Process escapes.
|
||||
if symbol == b'\\' {
|
||||
if let Some((_, escaped)) = iter.next() {
|
||||
match escaped {
|
||||
b'0' => collect.push(0 as char),
|
||||
b't' => collect.push(9 as char),
|
||||
b'n' => collect.push(10 as char),
|
||||
b'r' => collect.push(13 as char),
|
||||
b'\"' => collect.push(34 as char),
|
||||
b'\'' => collect.push(39 as char),
|
||||
b'\\' => collect.push(92 as char),
|
||||
// \x0F - 2 HEX digits after \x
|
||||
b'x' => {
|
||||
// get first symbol
|
||||
if let Some((_, first_hex)) = iter.next() {
|
||||
// get second symbol
|
||||
if let Some((_, second_hex)) = iter.next() {
|
||||
if let Ok(string) = std::str::from_utf8(&[*first_hex, *second_hex]) {
|
||||
if let Ok(number) = u8::from_str_radix(&string, 16) {
|
||||
if number <= 127 {
|
||||
collect.push(number as char);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(character) = escaped.chars().next() {
|
||||
return match character {
|
||||
'0' => Some(Token::CharLit(0 as char)),
|
||||
't' => Some(Token::CharLit(9 as char)),
|
||||
'n' => Some(Token::CharLit(10 as char)),
|
||||
'r' => Some(Token::CharLit(13 as char)),
|
||||
'\"' => Some(Token::CharLit(34 as char)),
|
||||
'\'' => Some(Token::CharLit(39 as char)),
|
||||
'\\' => Some(Token::CharLit(92 as char)),
|
||||
_ => None,
|
||||
};
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
return (0, None);
|
||||
}
|
||||
if hex {
|
||||
let string = input_tendril.to_string();
|
||||
let hex_string = &string[2..string.len()];
|
||||
|
||||
// \u{1-6 hex digits}
|
||||
b'u' => {
|
||||
if let Some((start, open_brace)) = iter.next() {
|
||||
if *open_brace == b'{' {
|
||||
let mut characters: Vec<u8> = Vec::new();
|
||||
if hex_string.len() != 2 {
|
||||
return None;
|
||||
}
|
||||
|
||||
while let Some((end, symbol)) = iter.next() {
|
||||
if end > start + 7 {
|
||||
return (0, None);
|
||||
}
|
||||
if let Ok(ascii_number) = u8::from_str_radix(&hex_string, 16) {
|
||||
return Some(Token::CharLit(ascii_number as char));
|
||||
}
|
||||
}
|
||||
|
||||
match *symbol {
|
||||
0..=9 | b'a'..=b'f' | b'A'..=b'F' => characters.push(*symbol),
|
||||
b'}' => {
|
||||
if let Ok(unicode_string) = std::str::from_utf8(&characters[..]) {
|
||||
if let Ok(hex) = u32::from_str_radix(&unicode_string, 16) {
|
||||
if let Some(unicode_char) = std::char::from_u32(hex) {
|
||||
collect.push(unicode_char);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if unicode {
|
||||
let string = input_tendril.to_string();
|
||||
let unicode_number = &string[3..string.len() - 1];
|
||||
|
||||
return (0, None);
|
||||
}
|
||||
_ => {
|
||||
return (0, None);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return (0, None);
|
||||
}
|
||||
_ => {
|
||||
return (0, None);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
if let Ok(hex) = u32::from_str_radix(&unicode_number, 16) {
|
||||
if let Some(character) = std::char::from_u32(hex) {
|
||||
return Some(Token::CharLit(character));
|
||||
}
|
||||
|
||||
return (0, None);
|
||||
}
|
||||
|
||||
collect.push(symbol as char);
|
||||
}
|
||||
|
||||
(0, None)
|
||||
if let Some(character) = input_tendril.to_string().chars().next() {
|
||||
return Some(Token::CharLit(character));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
///
|
||||
@ -208,9 +168,6 @@ impl Token {
|
||||
let input = input_tendril[..].as_bytes();
|
||||
match input[0] {
|
||||
x if x.is_ascii_whitespace() => return (1, None),
|
||||
b'`' => {
|
||||
return Self::eat_string(&input_tendril);
|
||||
}
|
||||
b'"' => {
|
||||
let mut i = 1;
|
||||
let mut in_escape = false;
|
||||
@ -258,12 +215,30 @@ impl Token {
|
||||
}
|
||||
b'\'' => {
|
||||
let mut i = 1;
|
||||
let mut in_escape = false;
|
||||
let mut escaped = false;
|
||||
let mut hex = false;
|
||||
let mut unicode = false;
|
||||
let mut end = false;
|
||||
|
||||
while i < input.len() {
|
||||
if input[i] == b'\'' {
|
||||
end = true;
|
||||
break;
|
||||
if !in_escape {
|
||||
if input[i] == b'\'' {
|
||||
end = true;
|
||||
break;
|
||||
} else if input[i] == b'\\' {
|
||||
in_escape = true;
|
||||
}
|
||||
} else {
|
||||
if input[i] == b'x' {
|
||||
hex = true;
|
||||
} else if input[i] == b'u' {
|
||||
unicode = true;
|
||||
} else {
|
||||
escaped = true;
|
||||
}
|
||||
|
||||
in_escape = false;
|
||||
}
|
||||
|
||||
i += 1;
|
||||
@ -273,7 +248,13 @@ impl Token {
|
||||
return (0, None);
|
||||
}
|
||||
|
||||
return (i + 1, Some(Token::CharLit(input_tendril.subtendril(1, (i - 1) as u32))));
|
||||
let result = Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode);
|
||||
|
||||
if result.is_none() {
|
||||
return (0, None);
|
||||
}
|
||||
|
||||
return (i + 1, result);
|
||||
}
|
||||
x if x.is_ascii_digit() => {
|
||||
return Self::eat_integer(&input_tendril);
|
||||
|
@ -48,7 +48,7 @@ pub enum Token {
|
||||
True,
|
||||
False,
|
||||
AddressLit(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
|
||||
CharLit(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
|
||||
CharLit(char),
|
||||
|
||||
At,
|
||||
|
||||
|
@ -2,4 +2,4 @@
|
||||
namespace: Compile
|
||||
expectation: Fail
|
||||
outputs:
|
||||
- " --> compiler-test:4:23\n |\n 4 | const not_valid = '';\n | ^^\n |\n = Empty character '' is not valid"
|
||||
- " --> compiler-test:4:23\n |\n 4 | const not_valid = '';\n | ^\n |\n = unexpected token: '''"
|
||||
|
@ -4,14 +4,12 @@ expectation: Pass
|
||||
outputs:
|
||||
- "'a' @ 1:1-4"
|
||||
- "'Z' @ 1:1-4"
|
||||
- "'\\\"' @ 1:1-5"
|
||||
- "'\\t' @ 1:1-5"
|
||||
- "'\\r' @ 1:1-5"
|
||||
- "'\\0' @ 1:1-5"
|
||||
- "'\\u{2764}' @ 1:1-11"
|
||||
- "'\\u{306E}' @ 1:1-11"
|
||||
- "'\\u{10FFFF}' @ 1:1-13"
|
||||
- "'\"' @ 1:1-5"
|
||||
- "'' @ 1:1-5"
|
||||
- "'' @ 1:1-5"
|
||||
- "'\u0000' @ 1:1-5"
|
||||
- "'❤' @ 1:1-11"
|
||||
- "'の' @ 1:1-11"
|
||||
- "'❤' @ 1:1-6"
|
||||
- "'の' @ 1:1-6"
|
||||
- "'\\x0F' @ 1:1-7"
|
||||
- "'\\x2A' @ 1:1-7"
|
||||
- "'*' @ 1:1-7"
|
||||
|
@ -2,9 +2,7 @@
|
||||
namespace: ParseExpression
|
||||
expectation: Fail
|
||||
outputs:
|
||||
- " --> test:1:1\n |\n 1 | '\\'\n | ^^^\n |\n = Invalid escape character '\\\\'"
|
||||
- " --> test:1:1\n |\n 1 | '\\'\n | ^\n |\n = unexpected token: '''"
|
||||
- " --> test:1:1\n |\n 1 | 'a\n | ^\n |\n = unexpected token: '''"
|
||||
- " --> test:1:1\n |\n 1 | ''\n | ^^\n |\n = Empty character '' is not valid"
|
||||
- " --> test:1:1\n |\n 1 | '\\x9'\n | ^^^^^\n |\n = Invalid singe hex character '\\x9', expected '\\x09"
|
||||
- " --> test:1:1\n |\n 1 | '\\x80'\n | ^^^^^^\n |\n = Invalid singe hex character '\\x[56, 48]'"
|
||||
- " --> test:1:1\n |\n 1 | '\\u{9999999}'\n | ^^^^^^^^^^^^^\n |\n = Invalid character '[57, 57, 57, 57, 57, 57, 57]'"
|
||||
- " --> test:1:1\n |\n 1 | ''\n | ^\n |\n = unexpected token: '''"
|
||||
- " --> test:1:1\n |\n 1 | '\\x9'\n | ^\n |\n = unexpected token: '''"
|
||||
|
@ -74,15 +74,6 @@ outputs:
|
||||
col_stop: 11
|
||||
path: test
|
||||
content: "'\\u{306E}'"
|
||||
- Value:
|
||||
Char:
|
||||
-
|
||||
- line_start: 1
|
||||
line_stop: 1
|
||||
col_start: 1
|
||||
col_stop: 13
|
||||
path: test
|
||||
content: "'\\u{10FFFF}'"
|
||||
- Value:
|
||||
Char:
|
||||
- ❤
|
||||
@ -101,15 +92,6 @@ outputs:
|
||||
col_stop: 6
|
||||
path: test
|
||||
content: "'の'"
|
||||
- Value:
|
||||
Char:
|
||||
- "\u000f"
|
||||
- line_start: 1
|
||||
line_stop: 1
|
||||
col_start: 1
|
||||
col_stop: 7
|
||||
path: test
|
||||
content: "'\\x0F'"
|
||||
- Value:
|
||||
Char:
|
||||
- "*"
|
||||
|
@ -11,8 +11,6 @@ expectation: Pass
|
||||
'\0'
|
||||
'\u{2764}'
|
||||
'\u{306E}'
|
||||
'\u{10FFFF}'
|
||||
'❤'
|
||||
'の'
|
||||
'\x0F'
|
||||
'\x2A'
|
@ -9,8 +9,4 @@ expectation: Fail
|
||||
|
||||
''
|
||||
|
||||
'\x9'
|
||||
|
||||
'\x80'
|
||||
|
||||
'\u{9999999}'
|
||||
'\x9'
|
@ -11,8 +11,6 @@ expectation: Pass
|
||||
'\0'
|
||||
'\u{2764}'
|
||||
'\u{306E}'
|
||||
'\u{10FFFF}'
|
||||
'❤'
|
||||
'の'
|
||||
'\x0F'
|
||||
'\x2A'
|
Loading…
Reference in New Issue
Block a user