Merge pull request #974 from AleoHQ/feature/string-parsing

[Feature] String parsing
This commit is contained in:
Alessandro Coglio 2021-05-24 10:51:34 -07:00 committed by GitHub
commit f0525792a9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
44 changed files with 1265 additions and 124 deletions

View File

@ -220,6 +220,9 @@ impl<'a> FromAst<'a, leo_ast::ValueExpression> for Constant<'a> {
value: ConstValue::Int(ConstInt::parse(int_type, value, span)?),
}
}
String(_str_type, _value) => {
unimplemented!("strings do not exist on ASG level")
}
})
}
}

View File

@ -19,7 +19,7 @@ use crate::{Identifier, Span};
use serde::{Deserialize, Serialize};
use tendril::StrTendril;
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct Annotation {
pub span: Span,
pub name: Identifier,

View File

@ -35,6 +35,12 @@ impl ReducerError {
ReducerError::Error(FormattedError::new_from_span(message, span))
}
pub fn failed_to_convert_tendril_to_char(tendril: String, span: &Span) -> Self {
let message = format!("Failed to convert tendril `{}` to char", tendril);
Self::new_from_span(message, span)
}
pub fn impossible_console_assert_call(span: &Span) -> Self {
let message = "Console::Assert cannot be matched here, its handled in another case.".to_string();

View File

@ -33,6 +33,7 @@ pub enum ValueExpression {
#[serde(with = "crate::common::tendril_json")] StrTendril,
Span,
),
String(Vec<char>, Span),
}
impl fmt::Display for ValueExpression {
@ -46,6 +47,12 @@ impl fmt::Display for ValueExpression {
Implicit(implicit, _) => write!(f, "{}", implicit),
Integer(value, type_, _) => write!(f, "{}{}", value, type_),
Group(group) => write!(f, "{}", group),
String(char_vec, _) => {
for character in char_vec {
write!(f, "{}", character)?
}
Ok(())
}
}
}
}
@ -59,7 +66,8 @@ impl Node for ValueExpression {
| Char(_, span)
| Field(_, span)
| Implicit(_, span)
| Integer(_, _, span) => span,
| Integer(_, _, span)
| String(_, span) => span,
Group(group) => match &**group {
GroupValue::Single(_, span) | GroupValue::Tuple(GroupTuple { span, .. }) => span,
},
@ -74,7 +82,8 @@ impl Node for ValueExpression {
| Char(_, span)
| Field(_, span)
| Implicit(_, span)
| Integer(_, _, span) => *span = new_span,
| Integer(_, _, span)
| String(_, span) => *span = new_span,
Group(group) => match &mut **group {
GroupValue::Single(_, span) | GroupValue::Tuple(GroupTuple { span, .. }) => *span = new_span,
},

View File

@ -17,7 +17,7 @@
use crate::{ArrayDimensions, GroupValue};
use leo_input::{
errors::InputParserError,
expressions::{ArrayInitializerExpression, ArrayInlineExpression, Expression, TupleExpression},
expressions::{ArrayInitializerExpression, ArrayInlineExpression, Expression, StringExpression, TupleExpression},
types::{ArrayType, DataType, IntegerType, TupleType, Type},
values::{
Address,
@ -115,11 +115,59 @@ impl InputValue {
(Type::Array(array_type), Expression::ArrayInitializer(initializer)) => {
InputValue::from_array_initializer(array_type, initializer)
}
(Type::Array(array_type), Expression::StringExpression(string)) => {
InputValue::from_string(array_type, string)
}
(Type::Tuple(tuple_type), Expression::Tuple(tuple)) => InputValue::from_tuple(tuple_type, tuple),
(type_, expression) => Err(InputParserError::expression_type_mismatch(type_, expression)),
}
}
///
/// Returns a new `InputValue` from the given `ArrayType` and `StringExpression`.
///
pub(crate) fn from_string(mut array_type: ArrayType, string: StringExpression) -> Result<Self, InputParserError> {
// Create a new `ArrayDimensions` type from the input array_type dimensions.
let array_dimensions_type = ArrayDimensions::from(array_type.dimensions.clone());
// Convert the array dimensions to usize.
let array_dimensions = parse_array_dimensions(array_dimensions_type, &array_type.span)?;
// Return an error if the outer array dimension does not equal the number of array elements.
if array_dimensions[0] != string.chars.len() {
return Err(InputParserError::invalid_string_length(
array_dimensions[0],
string.chars.len(),
&string.span,
));
}
array_type.dimensions = array_type.dimensions.next_dimension();
let inner_array_type = if array_dimensions.len() == 1 {
// This is a single array
*array_type.type_
} else {
// This is a multi-dimensional array
return Err(InputParserError::invalid_string_dimensions(&array_type.span));
};
let mut elements = Vec::with_capacity(string.chars.len());
for character in string.chars.into_iter() {
let element = InputValue::from_expression(
inner_array_type.clone(),
Expression::Value(Value::Char(CharValue {
value: character.clone(),
span: character.span().clone(),
})),
)?;
elements.push(element)
}
Ok(InputValue::Array(elements))
}
///
/// Returns a new `InputValue` from the given `ArrayType` and `ArrayInlineExpression`.
///

View File

@ -485,6 +485,20 @@ impl ReconstructingReducer for Canonicalizer {
}
}
fn reduce_string(&mut self, string: &[char], span: &Span) -> Result<Expression, ReducerError> {
let mut elements = Vec::new();
for character in string {
elements.push(SpreadOrExpression::Expression(Expression::Value(
ValueExpression::Char(*character, span.clone()),
)));
}
Ok(Expression::ArrayInline(ArrayInlineExpression {
elements,
span: span.clone(),
}))
}
fn reduce_array_init(
&mut self,
array_init: &ArrayInitExpression,

View File

@ -51,7 +51,7 @@ impl<R: ReconstructingReducer> ReconstructingDirector<R> {
pub fn reduce_expression(&mut self, expression: &Expression) -> Result<Expression, ReducerError> {
let new = match expression {
Expression::Identifier(identifier) => Expression::Identifier(self.reduce_identifier(&identifier)?),
Expression::Value(value) => Expression::Value(self.reduce_value(&value)?),
Expression::Value(value) => self.reduce_value(&value)?,
Expression::Binary(binary) => Expression::Binary(self.reduce_binary(&binary)?),
Expression::Unary(unary) => Expression::Unary(self.reduce_unary(&unary)?),
Expression::Ternary(ternary) => Expression::Ternary(self.reduce_ternary(&ternary)?),
@ -100,12 +100,17 @@ impl<R: ReconstructingReducer> ReconstructingDirector<R> {
self.reducer.reduce_group_value(group_value, new)
}
pub fn reduce_value(&mut self, value: &ValueExpression) -> Result<ValueExpression, ReducerError> {
pub fn reduce_string(&mut self, string: &[char], span: &Span) -> Result<Expression, ReducerError> {
self.reducer.reduce_string(string, span)
}
pub fn reduce_value(&mut self, value: &ValueExpression) -> Result<Expression, ReducerError> {
let new = match value {
ValueExpression::Group(group_value) => {
ValueExpression::Group(Box::new(self.reduce_group_value(&group_value)?))
Expression::Value(ValueExpression::Group(Box::new(self.reduce_group_value(&group_value)?)))
}
_ => value.clone(),
ValueExpression::String(string, span) => self.reduce_string(&string, &span)?,
_ => Expression::Value(value.clone()),
};
self.reducer.reduce_value(value, new)

View File

@ -51,11 +51,14 @@ pub trait ReconstructingReducer {
Ok(new)
}
fn reduce_value(
&mut self,
_value: &ValueExpression,
new: ValueExpression,
) -> Result<ValueExpression, ReducerError> {
fn reduce_string(&mut self, string: &[char], span: &Span) -> Result<Expression, ReducerError> {
Ok(Expression::Value(ValueExpression::String(
string.to_vec(),
span.clone(),
)))
}
fn reduce_value(&mut self, _value: &ValueExpression, new: Expression) -> Result<Expression, ReducerError> {
Ok(new)
}

View File

@ -18,14 +18,43 @@ use crate::{Expression, Node, Span};
use serde::{Deserialize, Serialize};
use std::fmt;
use tendril::StrTendril;
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Debug)]
pub enum FormatStringPart {
Const(#[serde(with = "crate::common::tendril_json")] StrTendril),
Const(char),
Container,
}
impl FormatStringPart {
pub fn from_string(string: Vec<char>) -> Vec<Self> {
let mut parts = Vec::new();
let mut in_container = false;
let mut i = 0;
while i < string.len() {
let character = string[i];
match character {
'{' if !in_container => in_container = true,
'}' if in_container => {
in_container = false;
parts.push(FormatStringPart::Container);
}
_ if in_container => {
in_container = false;
parts.push(FormatStringPart::Const('{'));
continue;
}
_ => parts.push(FormatStringPart::Const(character)),
}
i += 1;
}
parts
}
}
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Debug)]
pub struct FormatString {
pub parts: Vec<FormatStringPart>,
@ -41,8 +70,8 @@ impl fmt::Display for FormatString {
self.parts
.iter()
.map(|x| match x {
FormatStringPart::Const(x) => x,
FormatStringPart::Container => "{}",
FormatStringPart::Const(x) => x.to_string(),
FormatStringPart::Container => "{}".to_string(),
})
.collect::<Vec<_>>()
.join("")

View File

@ -51,8 +51,8 @@ impl<'a, F: PrimeField, G: GroupType<F>> ConstrainedProgram<'a, F, G> {
let mut parameters = executed_containers.iter();
for part in formatted.parts.iter() {
match part {
FormatStringPart::Const(c) => out.push(&**c),
FormatStringPart::Container => out.push(&**parameters.next().unwrap()),
FormatStringPart::Const(c) => out.push(c.to_string()),
FormatStringPart::Container => out.push(parameters.next().unwrap().to_string()),
}
}

View File

@ -152,9 +152,7 @@ impl<R: ReconstructingReducer, O: CombinerOptions> CombineAstAsgDirector<R, O> {
asg: &AsgExpression,
) -> Result<AstExpression, ReducerError> {
let new = match (ast, asg) {
(AstExpression::Value(value), AsgExpression::Constant(const_)) => {
AstExpression::Value(self.reduce_value(&value, &const_)?)
}
(AstExpression::Value(value), AsgExpression::Constant(const_)) => self.reduce_value(&value, &const_)?,
(AstExpression::Binary(ast), AsgExpression::Binary(asg)) => {
AstExpression::Binary(self.reduce_binary(&ast, &asg)?)
}
@ -404,7 +402,7 @@ impl<R: ReconstructingReducer, O: CombinerOptions> CombineAstAsgDirector<R, O> {
self.ast_reducer.reduce_unary(ast, inner, ast.op.clone())
}
pub fn reduce_value(&mut self, ast: &ValueExpression, asg: &AsgConstant) -> Result<ValueExpression, ReducerError> {
pub fn reduce_value(&mut self, ast: &ValueExpression, asg: &AsgConstant) -> Result<AstExpression, ReducerError> {
let mut new = ast.clone();
if self.options.type_inference_enabled() {
@ -436,15 +434,19 @@ impl<R: ReconstructingReducer, O: CombinerOptions> CombineAstAsgDirector<R, O> {
ConstValue::Char(_) => {
if let Some(c) = tendril.chars().next() {
new = ValueExpression::Char(c, span.clone());
} else {
return Err(ReducerError::failed_to_convert_tendril_to_char(
tendril.to_string(),
span,
));
}
// TODO RETURN ERR
}
_ => unimplemented!(), // impossible?
}
}
}
self.ast_reducer.reduce_value(ast, new)
self.ast_reducer.reduce_value(ast, AstExpression::Value(new))
}
pub fn reduce_variable_ref(

View File

@ -99,14 +99,22 @@ impl<'a, F: PrimeField, G: GroupType<F>> fmt::Display for ConstrainedValue<'a, F
// Data type wrappers
ConstrainedValue::Array(ref array) => {
write!(f, "[")?;
for (i, e) in array.iter().enumerate() {
write!(f, "{}", e)?;
if i < array.len() - 1 {
write!(f, ", ")?;
if matches!(array[0], ConstrainedValue::Char(_)) {
for character in array {
write!(f, "{}", character)?;
}
Ok(())
} else {
write!(f, "[")?;
for (i, e) in array.iter().enumerate() {
write!(f, "{}", e)?;
if i < array.len() - 1 {
write!(f, ", ")?;
}
}
write!(f, "]")
}
write!(f, "]")
}
ConstrainedValue::Tuple(ref tuple) => {
let values = tuple.iter().map(|x| x.to_string()).collect::<Vec<_>>().join(", ");

View File

@ -95,3 +95,16 @@ fn test_illegal_array_range_fail() {
let program = parse_program(program_string);
assert!(program.is_err());
}
#[test]
fn test_string_transformation() {
let program_string = include_str!("string_transformation.leo");
let program = parse_program(program_string).unwrap();
assert_satisfied(program);
let ast = parse_program_ast(program_string);
let expected_json = include_str!("string_transformation.json");
let expected_ast: Ast = Ast::from_json_string(expected_json).expect("Unable to parse json.");
assert_eq!(expected_ast, ast);
}

View File

@ -0,0 +1,309 @@
{
"name": "",
"expected_input": [],
"imports": [],
"circuits": {},
"global_consts": {},
"functions": {
"{\"name\":\"main\",\"span\":\"{\\\"line_start\\\":1,\\\"line_stop\\\":1,\\\"col_start\\\":10,\\\"col_stop\\\":14,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\"function main() {\\\"}\"}": {
"annotations": [],
"identifier": "{\"name\":\"main\",\"span\":\"{\\\"line_start\\\":1,\\\"line_stop\\\":1,\\\"col_start\\\":10,\\\"col_stop\\\":14,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\"function main() {\\\"}\"}",
"input": [],
"output": {
"Tuple": []
},
"block": {
"statements": [
{
"Definition": {
"declaration_type": "Let",
"variable_names": [
{
"mutable": true,
"identifier": "{\"name\":\"s\",\"span\":\"{\\\"line_start\\\":2,\\\"line_stop\\\":2,\\\"col_start\\\":9,\\\"col_stop\\\":10,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\" let s = \\\\\\\"Hello, World!\\\\\\\";\\\"}\"}",
"span": {
"line_start": 2,
"line_stop": 2,
"col_start": 9,
"col_stop": 10,
"path": "",
"content": " let s = \"Hello, World!\";"
}
}
],
"type_": {
"Array": [
"Char",
[
{
"value": "13"
}
]
]
},
"value": {
"ArrayInline": {
"elements": [
{
"Expression": {
"Value": {
"Char": [
"H",
{
"line_start": 2,
"line_stop": 2,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " let s = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"e",
{
"line_start": 2,
"line_stop": 2,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " let s = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"l",
{
"line_start": 2,
"line_stop": 2,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " let s = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"l",
{
"line_start": 2,
"line_stop": 2,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " let s = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"o",
{
"line_start": 2,
"line_stop": 2,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " let s = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
",",
{
"line_start": 2,
"line_stop": 2,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " let s = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
" ",
{
"line_start": 2,
"line_stop": 2,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " let s = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"W",
{
"line_start": 2,
"line_stop": 2,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " let s = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"o",
{
"line_start": 2,
"line_stop": 2,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " let s = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"r",
{
"line_start": 2,
"line_stop": 2,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " let s = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"l",
{
"line_start": 2,
"line_stop": 2,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " let s = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"d",
{
"line_start": 2,
"line_stop": 2,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " let s = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"!",
{
"line_start": 2,
"line_stop": 2,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " let s = \"Hello, World!\";"
}
]
}
}
}
],
"span": {
"line_start": 2,
"line_stop": 2,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " let s = \"Hello, World!\";"
}
}
},
"span": {
"line_start": 2,
"line_stop": 2,
"col_start": 5,
"col_stop": 28,
"path": "",
"content": " let s = \"Hello, World!\";"
}
}
}
],
"span": {
"line_start": 1,
"line_stop": 3,
"col_start": 17,
"col_stop": 2,
"path": "",
"content": "function main() {\n...\n}"
}
},
"span": {
"line_start": 1,
"line_stop": 3,
"col_start": 1,
"col_stop": 2,
"path": "",
"content": "function main() {\n...\n}"
}
}
}
}

View File

@ -0,0 +1,3 @@
function main() {
let s = "Hello, World!";
}

View File

@ -1027,11 +1027,283 @@
"content": " const n = 'a';"
}
}
},
{
"Definition": {
"declaration_type": "Const",
"variable_names": [
{
"mutable": false,
"identifier": "{\"name\":\"o\",\"span\":\"{\\\"line_start\\\":24,\\\"line_stop\\\":24,\\\"col_start\\\":9,\\\"col_stop\\\":10,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\" const o = \\\\\\\"Hello, World!\\\\\\\";\\\"}\"}",
"span": {
"line_start": 24,
"line_stop": 24,
"col_start": 9,
"col_stop": 10,
"path": "",
"content": " const o = \"Hello, World!\";"
}
}
],
"type_": {
"Array": [
"Char",
[
{
"value": "13"
}
]
]
},
"value": {
"ArrayInline": {
"elements": [
{
"Expression": {
"Value": {
"Char": [
"H",
{
"line_start": 24,
"line_stop": 24,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " const o = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"e",
{
"line_start": 24,
"line_stop": 24,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " const o = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"l",
{
"line_start": 24,
"line_stop": 24,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " const o = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"l",
{
"line_start": 24,
"line_stop": 24,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " const o = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"o",
{
"line_start": 24,
"line_stop": 24,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " const o = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
",",
{
"line_start": 24,
"line_stop": 24,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " const o = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
" ",
{
"line_start": 24,
"line_stop": 24,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " const o = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"W",
{
"line_start": 24,
"line_stop": 24,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " const o = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"o",
{
"line_start": 24,
"line_stop": 24,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " const o = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"r",
{
"line_start": 24,
"line_stop": 24,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " const o = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"l",
{
"line_start": 24,
"line_stop": 24,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " const o = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"d",
{
"line_start": 24,
"line_stop": 24,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " const o = \"Hello, World!\";"
}
]
}
}
},
{
"Expression": {
"Value": {
"Char": [
"!",
{
"line_start": 24,
"line_stop": 24,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " const o = \"Hello, World!\";"
}
]
}
}
}
],
"span": {
"line_start": 24,
"line_stop": 24,
"col_start": 13,
"col_stop": 28,
"path": "",
"content": " const o = \"Hello, World!\";"
}
}
},
"span": {
"line_start": 24,
"line_stop": 24,
"col_start": 3,
"col_stop": 28,
"path": "",
"content": " const o = \"Hello, World!\";"
}
}
}
],
"span": {
"line_start": 9,
"line_stop": 24,
"line_stop": 25,
"col_start": 17,
"col_stop": 2,
"path": "",
@ -1040,11 +1312,11 @@
},
"span": {
"line_start": 9,
"line_stop": 24,
"line_stop": 25,
"col_start": 1,
"col_stop": 2,
"path": "",
"content": "function main() {\n...\n}\n\n\n\n\n\n\n\n\n\n\n\n\n"
"content": "function main() {\n...\n}\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
}
}
}

View File

@ -21,4 +21,5 @@ function main() {
const l = (1u8, 1u8, true);
const m = Foo {};
const n = 'a';
const o = "Hello, World!";
}

View File

@ -89,6 +89,21 @@ impl InputParserError {
Self::new_from_span(message, span)
}
pub fn invalid_string_dimensions(span: &Span) -> Self {
let message = "String type defintion of a char array should not be multi-dimensional".to_string();
Self::new_from_span(message, span)
}
pub fn invalid_string_length(expected: usize, received: usize, span: &Span) -> Self {
let message = format!(
"Expected size of char array `{}` to match string size instead received `{}`",
expected, received
);
Self::new_from_span(message, span)
}
pub fn implicit_type(data_type: DataType, implicit: NumberValue) -> Self {
let message = format!("expected `{}`, found `{}`", data_type, implicit);

View File

@ -25,6 +25,7 @@ use std::fmt;
pub enum Expression<'ast> {
ArrayInitializer(ArrayInitializerExpression<'ast>),
ArrayInline(ArrayInlineExpression<'ast>),
StringExpression(StringExpression<'ast>),
Tuple(TupleExpression<'ast>),
Value(Value<'ast>),
}
@ -34,6 +35,7 @@ impl<'ast> Expression<'ast> {
match self {
Expression::ArrayInitializer(expression) => &expression.span,
Expression::ArrayInline(expression) => &expression.span,
Expression::StringExpression(string) => &string.span,
Expression::Tuple(tuple) => &tuple.span,
Expression::Value(value) => value.span(),
}
@ -56,6 +58,7 @@ impl<'ast> fmt::Display for Expression<'ast> {
write!(f, "array [{}]", values)
}
Expression::StringExpression(ref string) => write!(f, "{}", string),
Expression::Tuple(ref tuple) => {
let values = tuple
.expressions

View File

@ -25,3 +25,6 @@ pub use expression::*;
pub mod tuple_expression;
pub use tuple_expression::*;
pub mod string_expression;
pub use string_expression::*;

View File

@ -0,0 +1,41 @@
// Copyright (C) 2019-2021 Aleo Systems Inc.
// This file is part of the Leo library.
// The Leo library is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// The Leo library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
use crate::{ast::Rule, values::CharTypes};
use pest::Span;
use pest_ast::FromPest;
use std::fmt;
#[derive(Clone, Debug, FromPest, PartialEq, Eq)]
#[pest_ast(rule(Rule::expression_string))]
pub struct StringExpression<'ast> {
pub chars: Vec<CharTypes<'ast>>,
#[pest_ast(outer())]
pub span: Span<'ast>,
}
impl<'ast> fmt::Display for StringExpression<'ast> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "\"")?;
for character in self.chars.iter() {
write!(f, "{:?}", character)?;
}
write!(f, "\"")
}
}

View File

@ -148,7 +148,7 @@ char_types = {
}
// Declared in values/char_value.rs
value_char = { "\'" ~ char_types ~ "\'" }
value_char = ${ "\'" ~ char_types ~ "\'" }
// Declared in values/integer_value.rs
value_integer = { value_integer_signed | value_integer_unsigned}
@ -200,12 +200,16 @@ expression_array_initializer = { "[" ~ expression ~ ";" ~ array_dimensions ~ "]"
expression_array_inline = { "[" ~ NEWLINE* ~ inline_array_inner ~ NEWLINE* ~ "]"}
inline_array_inner = _{ (expression ~ ("," ~ NEWLINE* ~ expression)*)? }
// Declared in expressions/string_expression.rs
expression_string = ${ "\"" ~ (!"\"" ~ char_types)+ ~ "\"" }
// Declared in expressions/expression.rs
expression = {
value
| expression_tuple
| expression_array_inline
| expression_array_initializer
| expression_string
}
expression_tuple = { "(" ~ expression ~ ("," ~ expression)+ ~")" }

View File

@ -67,6 +67,17 @@ pub enum CharTypes<'ast> {
Unicode(UnicodeChar<'ast>),
}
impl<'ast> CharTypes<'ast> {
pub fn span(&self) -> &Span<'ast> {
match self {
CharTypes::Basic(value) => &value.span,
CharTypes::Escaped(value) => &value.span,
CharTypes::Hex(value) => &value.span,
CharTypes::Unicode(value) => &value.span,
}
}
}
impl<'ast> CharTypes<'ast> {
pub fn inner(self) -> Result<char, InputParserError> {
match self {
@ -78,14 +89,23 @@ impl<'ast> CharTypes<'ast> {
Err(InputParserError::invalid_char(character.value, &character.span))
}
Self::Escaped(character) => {
if let Some(character) = character.value.chars().nth(1) {
return Ok(character);
if let Some(inner) = character.value.chars().nth(1) {
return match inner {
'0' => Ok(0 as char),
't' => Ok(9 as char),
'n' => Ok(10 as char),
'r' => Ok(13 as char),
'\"' => Ok(34 as char),
'\'' => Ok(39 as char),
'\\' => Ok(92 as char),
_ => Err(InputParserError::invalid_char(character.value, &character.span)),
};
}
Err(InputParserError::invalid_char(character.value, &character.span))
}
Self::Hex(character) => {
let hex_string_number = character.value[3..character.value.len()].to_string();
let hex_string_number = character.value[2..character.value.len()].to_string();
if let Ok(number) = u8::from_str_radix(&hex_string_number, 16) {
if number < 127 {
return Ok(number as char);

View File

@ -690,6 +690,7 @@ impl ParserContext {
Token::False => Expression::Value(ValueExpression::Boolean("false".into(), span)),
Token::AddressLit(value) => Expression::Value(ValueExpression::Address(value, span)),
Token::CharLit(value) => Expression::Value(ValueExpression::Char(value, span)),
Token::StringLiteral(value) => Expression::Value(ValueExpression::String(value, span)),
Token::LeftParen => self.parse_tuple_expression(&span)?,
Token::LeftSquare => self.parse_array_expression(&span)?,
Token::Ident(name) => {

View File

@ -226,16 +226,19 @@ impl ParserContext {
///
pub fn parse_formatted_string(&mut self) -> SyntaxResult<FormatString> {
let start_span;
let parts = match self.expect_any()? {
let string = match self.expect_any()? {
SpannedToken {
token: Token::FormatString(parts),
token: Token::StringLiteral(chars),
span,
} => {
start_span = span;
parts
chars
}
SpannedToken { token, span } => return Err(SyntaxError::unexpected_str(&token, "formatted string", &span)),
};
let parts = FormatStringPart::from_string(string);
let mut parameters = Vec::new();
while self.eat(Token::Comma).is_some() {
let param = self.parse_expression()?;
@ -243,13 +246,7 @@ impl ParserContext {
}
Ok(FormatString {
parts: parts
.into_iter()
.map(|x| match x {
crate::FormatStringPart::Const(value) => FormatStringPart::Const(value),
crate::FormatStringPart::Container => FormatStringPart::Container,
})
.collect(),
parts,
span: &start_span + parameters.last().map(|x| x.span()).unwrap_or(&start_span),
parameters,
})

View File

@ -14,7 +14,7 @@
// You should have received a copy of the GNU General Public License
// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.
use crate::tokenizer::{FormatStringPart, Token};
use crate::tokenizer::Token;
use leo_ast::Span;
use serde::{Deserialize, Serialize};
use tendril::StrTendril;
@ -62,9 +62,9 @@ fn eat_identifier(input_tendril: &StrTendril) -> Option<StrTendril> {
impl Token {
///
/// Returns a new `Token::CharLit` if an character can be eaten, otherwise returns [`None`].
/// Returns a `char` if an character can be eaten, otherwise returns [`None`].
///
fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Option<Token> {
fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Option<char> {
if input_tendril.is_empty() {
return None;
}
@ -79,13 +79,13 @@ impl Token {
if let Some(character) = escaped.chars().next() {
return match character {
'0' => Some(Token::CharLit(0 as char)),
't' => Some(Token::CharLit(9 as char)),
'n' => Some(Token::CharLit(10 as char)),
'r' => Some(Token::CharLit(13 as char)),
'\"' => Some(Token::CharLit(34 as char)),
'\'' => Some(Token::CharLit(39 as char)),
'\\' => Some(Token::CharLit(92 as char)),
'0' => Some(0 as char),
't' => Some(9 as char),
'n' => Some(10 as char),
'r' => Some(13 as char),
'\"' => Some(34 as char),
'\'' => Some(39 as char),
'\\' => Some(92 as char),
_ => None,
};
} else {
@ -102,7 +102,12 @@ impl Token {
}
if let Ok(ascii_number) = u8::from_str_radix(&hex_string, 16) {
return Some(Token::CharLit(ascii_number as char));
// According to RFC, we allow only values less than 128.
if ascii_number > 127 {
return None;
}
return Some(ascii_number as char);
}
}
@ -112,13 +117,13 @@ impl Token {
if let Ok(hex) = u32::from_str_radix(&unicode_number, 16) {
if let Some(character) = std::char::from_u32(hex) {
return Some(Token::CharLit(character));
return Some(character);
}
}
}
if let Some(character) = input_tendril.to_string().chars().next() {
return Some(Token::CharLit(character));
return Some(character);
}
None
@ -170,48 +175,80 @@ impl Token {
x if x.is_ascii_whitespace() => return (1, None),
b'"' => {
let mut i = 1;
let mut len: u32 = 1;
let mut start = 1;
let mut in_escape = false;
let mut start = 1usize;
let mut segments = Vec::new();
let mut escaped = false;
let mut hex = false;
let mut unicode = false;
let mut end = false;
let mut string = Vec::new();
while i < input.len() {
if !in_escape {
if input[i] == b'"' {
end = true;
break;
}
if input[i] == b'\\' {
in_escape = !in_escape;
} else if i < input.len() - 1 && input[i] == b'{' {
if i < input.len() - 2 && input[i + 1] == b'{' {
i += 2;
continue;
} else if input[i + 1] != b'}' {
i += 1;
continue;
}
if start < i {
segments.push(FormatStringPart::Const(
input_tendril.subtendril(start as u32, (i - start) as u32),
));
}
segments.push(FormatStringPart::Container);
start = i + 2;
i = start;
} else if input[i] == b'\\' {
in_escape = true;
start = i;
i += 1;
continue;
}
} else {
in_escape = false;
len += 1;
match input[i] {
b'x' => {
hex = true;
}
b'u' => {
unicode = true;
}
b'}' if unicode => {
in_escape = false;
}
_ if !hex && !unicode => {
escaped = true;
in_escape = false;
}
_ if hex && len == 4 => {
in_escape = false;
}
_ => {}
}
}
if !in_escape {
match Self::eat_char(
input_tendril.subtendril(start as u32, len as u32),
escaped,
hex,
unicode,
) {
Some(character) => {
len = 1;
escaped = false;
hex = false;
unicode = false;
string.push(character);
}
None => return (0, None),
}
}
i += 1;
if !escaped && !hex && !unicode {
start = i;
}
}
if i == input.len() {
if i == input.len() || i == 1 || !end {
return (0, None);
}
if start < i {
segments.push(FormatStringPart::Const(
input_tendril.subtendril(start as u32, (i - start) as u32),
));
}
return (i + 1, Some(Token::FormatString(segments)));
return (i + 1, Some(Token::StringLiteral(string)));
}
b'\'' => {
let mut i = 1;
@ -248,13 +285,10 @@ impl Token {
return (0, None);
}
let result = Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode);
if result.is_none() {
return (0, None);
}
return (i + 1, result);
return match Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode) {
Some(character) => (i + 1, Some(Token::CharLit(character))),
None => (0, None),
};
}
x if x.is_ascii_digit() => {
return Self::eat_integer(&input_tendril);

View File

@ -259,6 +259,6 @@ mod tests {
let original = &raw[*start + token.span.col_start - 1..*stop + token.span.col_stop - 1];
assert_eq!(original, &token_raw);
}
println!("{}", serde_json::to_string_pretty(&tokens).unwrap());
// println!("{}", serde_json::to_string_pretty(&tokens).unwrap());
}
}

View File

@ -18,22 +18,6 @@ use serde::{Deserialize, Serialize};
use std::fmt;
use tendril::StrTendril;
/// Parts of a formatted string for logging to the console.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum FormatStringPart {
Const(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
Container,
}
impl fmt::Display for FormatStringPart {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
FormatStringPart::Const(c) => write!(f, "{}", c),
FormatStringPart::Container => write!(f, "{{}}"),
}
}
}
/// Represents all valid Leo syntax tokens.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum Token {
@ -41,7 +25,7 @@ pub enum Token {
// Literals
CommentLine(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
CommentBlock(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
FormatString(Vec<FormatStringPart>),
StringLiteral(Vec<char>),
Ident(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
Int(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
True,
@ -207,11 +191,10 @@ impl fmt::Display for Token {
match self {
CommentLine(s) => write!(f, "{}", s),
CommentBlock(s) => write!(f, "{}", s),
FormatString(parts) => {
// todo escapes
StringLiteral(content) => {
write!(f, "\"")?;
for part in parts.iter() {
part.fmt(f)?;
for character in content {
write!(f, "{}", character)?;
}
write!(f, "\"")
}

View File

@ -20,6 +20,8 @@ const field_test: field = 2;
const use_another_const = basic + 1;
const foo = Foo { width: 10, height: 20 };
const uno = uno();
const character = 'a';
const hello = "Hello, World!";
circuit Foo {
width: u32,
@ -47,5 +49,7 @@ function main(a: u32) -> bool {
&& use_another_const == 9u32 // use another const test
&& foo.width == 10u32 // circuit test
&& foo.height == 20u32
&& uno == 1u32; // function test
&& uno == 1u32 // function test
&& character == 'a' // char test
&& hello == "Hello, World!";
}

View File

@ -0,0 +1,22 @@
/*
namespace: Compile
expectation: Pass
input_file:
- inputs/string_out.in
*/
circuit Foo {
s1: [char; 13];
}
function takes_string(s: [char; 13]) -> bool {
return s == "Hello, World!";
}
function main(s1: [char; 13]) -> [char; 13] {
let f = Foo { s1 };
let b = takes_string(s1);
let result = f.s1 == "Hello, World!" ? s1 : "abcdefghjklmn";
return result;
}

View File

@ -0,0 +1,15 @@
/*
namespace: Compile
expectation: Pass
input_file:
- inputs/string.in
- inputs/weird.in
*/
function main(s1: [char; 13], s2: [char; 4]) -> bool {
let hello: [char; 13] = "Hello, World!";
let part1 = "Good";
let part2 = " dog!";
let concat: [char; 9] = [...part1, ...part2];
return hello == s1 && "nope" != s2 && "es" == s2[1..3] && concat == "Good dog!";
}

View File

@ -0,0 +1,6 @@
[main]
s1: [char; 13] = "Hello, World!";
s2: [char; 4] = "test";
[registers]
out: bool = true;

View File

@ -0,0 +1,5 @@
[main]
s1: [char; 13] = "Hello, World!";
[registers]
out: [char; 13] = "Hello, World!";

View File

@ -0,0 +1,6 @@
[main]
s1: [char; 13] = "\"ello, World\"";
s2: [char; 4] = "\u{2764}\x2A\x09\u{2764}";
[registers]
out: bool = true;

View File

@ -33,7 +33,7 @@ outputs:
registers:
r:
type: char
value: "\n"
value: "*"
- input_file: inputs/unicode.in
output:
registers:

View File

@ -2,4 +2,4 @@
namespace: Compile
expectation: Fail
outputs:
- " --> compiler-test:4:17\n |\n 4 | console.log(\"\", 1u32);\n | ^^^^^^^^\n |\n = function call expected 1 arguments, got 2"
- " --> compiler-test:4:17\n |\n 4 | console.log(\"\", 1u32);\n | ^\n |\n = unexpected token: '\"'"

View File

@ -0,0 +1,18 @@
---
namespace: Compile
expectation: Pass
outputs:
- circuit:
num_public_variables: 0
num_private_variables: 141
num_constraints: 115
at: 145ada587c833434abb89c3349d19e06365fda3eb9b2a227046a78469e3ca313
bt: f2945a3bc1beaee407bb4ec35303115a93a8c68886d97011cd65ec6d899664e8
ct: 10b997b6341b3cf811cb7b0fdb891f91006d41c50e9f9566ff92f92816153dfc
output:
- input_file: inputs/string_out.in
output:
registers:
out:
type: "[char; 13]"
value: "Hello, World!"

View File

@ -0,0 +1,24 @@
---
namespace: Compile
expectation: Pass
outputs:
- circuit:
num_public_variables: 0
num_private_variables: 92
num_constraints: 75
at: c03384068dd2b9bd528c65e301960d5c4f61bf94949c9080f28a4bc57f81e856
bt: 7f653d4004b13eee112bbefcf900b0c8201524129423cdda82799c18fdcfb3f7
ct: bce0d8a64cf530613e693412358d17d231ec8516108ab4ee5a47ddf185a471fa
output:
- input_file: inputs/string.in
output:
registers:
out:
type: bool
value: "true"
- input_file: inputs/weird.in
output:
registers:
out:
type: bool
value: "false"

View File

@ -1,5 +1,5 @@
---
namespace: ParseExpression
namespace: Token
expectation: Fail
outputs:
- " --> test:1:1\n |\n 1 | '\\'\n | ^\n |\n = unexpected token: '''"

View File

@ -0,0 +1,12 @@
---
namespace: Token
expectation: Fail
outputs:
- " --> test:1:1\n |\n 1 | \"\"\n | ^\n |\n = unexpected token: '\"'"
- " --> test:1:1\n |\n 1 | \"Hello world!\n | ^\n |\n = unexpected token: '\"'"
- " --> test:1:1\n |\n 1 | \"\\\"\n | ^\n |\n = unexpected token: '\"'"
- " --> test:1:1\n |\n 1 | \"\\l\"\n | ^\n |\n = unexpected token: '\"'"
- " --> test:1:1\n |\n 1 | \"\\uaaa\"\n | ^\n |\n = unexpected token: '\"'"
- " --> test:1:1\n |\n 1 | \"\\u\"\n | ^\n |\n = unexpected token: '\"'"
- " --> test:1:1\n |\n 1 | \"\\xFF\"\n | ^\n |\n = unexpected token: '\"'"
- " --> test:1:1\n |\n 1 | \"\\x\"\n | ^\n |\n = unexpected token: '\"'"

View File

@ -0,0 +1,160 @@
---
namespace: ParseExpression
expectation: Pass
outputs:
- Value:
String:
- - s
- t
- r
- i
- n
- g
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 9
path: test
content: "\"string\""
- Value:
String:
- - a
- n
- o
- t
- h
- e
- r
- " "
- "{"
- " "
- "}"
- " "
- s
- t
- r
- i
- n
- g
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 21
path: test
content: "\"another { } string\""
- Value:
String:
- - "{"
- " "
- "]"
- " "
- "["
- " "
- ;
- " "
- a
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 12
path: test
content: "\"{ ] [ ; a\""
- Value:
String:
- - ࿺
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 10
path: test
content: "\"\\u{FFA}\""
- Value:
String:
- - 򯫺
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 12
path: test
content: "\"\\u{afafa}\""
- Value:
String:
- - 꾯
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 11
path: test
content: "\"\\u{afaf}\""
- Value:
String:
- - ૺ
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 10
path: test
content: "\"\\u{afa}\""
- Value:
String:
- - ¯
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 9
path: test
content: "\"\\u{af}\""
- Value:
String:
- - "\n"
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 8
path: test
content: "\"\\u{a}\""
- Value:
String:
- - "\n"
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 7
path: test
content: "\"\\x0A\""
- Value:
String:
- - 
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 7
path: test
content: "\"\\x7F\""
- Value:
String:
- - a
- a
- " "
- "\\"
- " "
- "\""
- " "
- " "
- "\n"
- " "
- a
- a
- " "
- "\t"
- " "
- "\r"
- " "
- " "
- "\u0000"
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 28
path: test
content: "\"aa \\\\ \\\" \\n aa \\t \\r \\0\""

View File

@ -1,5 +1,5 @@
/*
namespace: ParseExpression
namespace: Token
expectation: Fail
*/

View File

@ -0,0 +1,20 @@
/*
namespace: Token
expectation: Fail
*/
""
"Hello world!
"\"
"\l"
"\uaaa"
"\u"
"\xFF"
"\x"

View File

@ -0,0 +1,23 @@
/*
namespace: ParseExpression
expectation: Pass
*/
"string"
"another { } string"
"{ ] [ ; a"
"\u{FFA}"
"\u{afafa}"
"\u{afaf}"
"\u{afa}"
"\u{af}"
"\u{a}"
"\x0A"
"\x7F"
"aa \\ \" \n aa \t \r \0"