From 32bd282030cfb0af22ee3793baeb7386b2bd0c6e Mon Sep 17 00:00:00 2001 From: damirka Date: Tue, 18 May 2021 18:56:55 +0300 Subject: [PATCH 01/20] 1st draft for string parsing and ast type - currently uses back quotes "`" for strings, change later - ast -> asg unimplemented, strings need to be processed on canonicalization stage --- asg/src/expression/constant.rs | 3 + ast/src/expression/value.rs | 13 +++- parser/src/parser/expression.rs | 1 + parser/src/tokenizer/lexer.rs | 10 +++ parser/src/tokenizer/mod.rs | 2 +- parser/src/tokenizer/token.rs | 8 +++ .../parser/parser/expression/string.leo.out | 63 +++++++++++++++++++ tests/parser/expression/string.leo | 10 +++ 8 files changed, 107 insertions(+), 3 deletions(-) create mode 100644 tests/expectations/parser/parser/expression/string.leo.out create mode 100644 tests/parser/expression/string.leo diff --git a/asg/src/expression/constant.rs b/asg/src/expression/constant.rs index 99077a2756..42a3ca8e45 100644 --- a/asg/src/expression/constant.rs +++ b/asg/src/expression/constant.rs @@ -220,6 +220,9 @@ impl<'a> FromAst<'a, leo_ast::ValueExpression> for Constant<'a> { value: ConstValue::Int(ConstInt::parse(int_type, value, span)?), } } + String(_str_type, _value) => { + unimplemented!("strings do not exist on ASG level") + } }) } } diff --git a/ast/src/expression/value.rs b/ast/src/expression/value.rs index 865df9235a..e153801cbd 100644 --- a/ast/src/expression/value.rs +++ b/ast/src/expression/value.rs @@ -33,6 +33,7 @@ pub enum ValueExpression { #[serde(with = "crate::common::tendril_json")] StrTendril, Span, ), + String(Vec, Span), } impl fmt::Display for ValueExpression { @@ -46,6 +47,12 @@ impl fmt::Display for ValueExpression { Implicit(implicit, _) => write!(f, "{}", implicit), Integer(value, type_, _) => write!(f, "{}{}", value, type_), Group(group) => write!(f, "{}", group), + String(char_vec, _) => { + for character in char_vec { + write!(f, "{}", character)? + } + Ok(()) + } } } } @@ -59,7 +66,8 @@ impl Node for ValueExpression { | Char(_, span) | Field(_, span) | Implicit(_, span) - | Integer(_, _, span) => span, + | Integer(_, _, span) // => span, + | String(_, span) => span, Group(group) => match &**group { GroupValue::Single(_, span) | GroupValue::Tuple(GroupTuple { span, .. }) => span, }, @@ -74,7 +82,8 @@ impl Node for ValueExpression { | Char(_, span) | Field(_, span) | Implicit(_, span) - | Integer(_, _, span) => *span = new_span, + | Integer(_, _, span) // => *span = new_span, + | String(_, span) => *span = new_span, Group(group) => match &mut **group { GroupValue::Single(_, span) | GroupValue::Tuple(GroupTuple { span, .. }) => *span = new_span, }, diff --git a/parser/src/parser/expression.rs b/parser/src/parser/expression.rs index 40ca52e2bd..8647da9ea5 100644 --- a/parser/src/parser/expression.rs +++ b/parser/src/parser/expression.rs @@ -690,6 +690,7 @@ impl ParserContext { Token::False => Expression::Value(ValueExpression::Boolean("false".into(), span)), Token::AddressLit(value) => Expression::Value(ValueExpression::Address(value, span)), Token::CharLit(value) => Expression::Value(ValueExpression::Char(value, span)), + Token::QuotedString(value) => Expression::Value(ValueExpression::String(value, span)), Token::LeftParen => self.parse_tuple_expression(&span)?, Token::LeftSquare => self.parse_array_expression(&span)?, Token::Ident(name) => { diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index 9984f9dab2..8042e6fa23 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -235,6 +235,16 @@ impl Token { let input = input_tendril[..].as_bytes(); match input[0] { x if x.is_ascii_whitespace() => return (1, None), + b'`' => { + let mut collect: Vec = Vec::new(); + for (i, char_bytes) in input.iter().enumerate().skip(1) { + if *char_bytes == b'`' { + return (i + 1, Some(Token::QuotedString(collect))); + } + + collect.push(std::char::from_u32(*char_bytes as u32).unwrap()); + } + } b'"' => { let mut i = 1; let mut in_escape = false; diff --git a/parser/src/tokenizer/mod.rs b/parser/src/tokenizer/mod.rs index 9637eabbdb..adfcbb22e5 100644 --- a/parser/src/tokenizer/mod.rs +++ b/parser/src/tokenizer/mod.rs @@ -259,6 +259,6 @@ mod tests { let original = &raw[*start + token.span.col_start - 1..*stop + token.span.col_stop - 1]; assert_eq!(original, &token_raw); } - println!("{}", serde_json::to_string_pretty(&tokens).unwrap()); + // println!("{}", serde_json::to_string_pretty(&tokens).unwrap()); } } diff --git a/parser/src/tokenizer/token.rs b/parser/src/tokenizer/token.rs index d7da426589..3a9e3026b4 100644 --- a/parser/src/tokenizer/token.rs +++ b/parser/src/tokenizer/token.rs @@ -42,6 +42,7 @@ pub enum Token { CommentLine(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), CommentBlock(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), FormatString(Vec), + QuotedString(Vec), Ident(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), Int(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), True, @@ -215,6 +216,13 @@ impl fmt::Display for Token { } write!(f, "\"") } + QuotedString(content) => { + write!(f, "\"")?; + for character in content { + write!(f, "{}", character)?; + } + write!(f, "\"") + } Ident(s) => write!(f, "{}", s), Int(s) => write!(f, "{}", s), True => write!(f, "true"), diff --git a/tests/expectations/parser/parser/expression/string.leo.out b/tests/expectations/parser/parser/expression/string.leo.out new file mode 100644 index 0000000000..4fc7fe22ef --- /dev/null +++ b/tests/expectations/parser/parser/expression/string.leo.out @@ -0,0 +1,63 @@ +--- +namespace: ParseExpression +expectation: Pass +outputs: + - Value: + String: + - - s + - t + - r + - i + - n + - g + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 9 + path: test + content: "`string`" + - Value: + String: + - - a + - n + - o + - t + - h + - e + - r + - " " + - "{" + - " " + - "}" + - " " + - s + - t + - r + - i + - n + - g + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 21 + path: test + content: "`another { } string`" + - Value: + String: + - - "{" + - " " + - "\\" + - " " + - "]" + - " " + - "[" + - " " + - ; + - " " + - a + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 14 + path: test + content: "`{ \\ ] [ ; a`" diff --git a/tests/parser/expression/string.leo b/tests/parser/expression/string.leo new file mode 100644 index 0000000000..efd8cec921 --- /dev/null +++ b/tests/parser/expression/string.leo @@ -0,0 +1,10 @@ +/* +namespace: ParseExpression +expectation: Pass +*/ + +`string` + +`another { } string` + +`{ \ ] [ ; a` From 39b61a066977225e86ae4249eee8588f5627fb15 Mon Sep 17 00:00:00 2001 From: damirka Date: Wed, 19 May 2021 23:36:53 +0300 Subject: [PATCH 02/20] added character parsing --- parser/src/parser/expression.rs | 2 +- parser/src/tokenizer/lexer.rs | 143 ++++++++++++++++++++++++++++++-- parser/src/tokenizer/token.rs | 4 +- 3 files changed, 138 insertions(+), 11 deletions(-) diff --git a/parser/src/parser/expression.rs b/parser/src/parser/expression.rs index 8647da9ea5..dc0e8aca01 100644 --- a/parser/src/parser/expression.rs +++ b/parser/src/parser/expression.rs @@ -690,7 +690,7 @@ impl ParserContext { Token::False => Expression::Value(ValueExpression::Boolean("false".into(), span)), Token::AddressLit(value) => Expression::Value(ValueExpression::Address(value, span)), Token::CharLit(value) => Expression::Value(ValueExpression::Char(value, span)), - Token::QuotedString(value) => Expression::Value(ValueExpression::String(value, span)), + Token::StringLiteral(value) => Expression::Value(ValueExpression::String(value, span)), Token::LeftParen => self.parse_tuple_expression(&span)?, Token::LeftSquare => self.parse_array_expression(&span)?, Token::Ident(name) => { diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index 8042e6fa23..f24e91b8f9 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -61,6 +61,140 @@ fn eat_identifier(input_tendril: &StrTendril) -> Option { } impl Token { + /// + /// Eats symbol. Used for string literals and char literals + /// + fn eat_string(input_tendril: &StrTendril) -> (usize, Option) { + if input_tendril.is_empty() { + return (0, None); + } + + let input = input_tendril[..].as_bytes(); + + // let mut prev = b'`'; + // let mut escaped = false; + let mut collect: Vec = Vec::new(); + + let mut iter = input.iter().enumerate().skip(1); + + while let Some((i, symbol)) = iter.next() { + let symbol = *symbol; + + if symbol == b'`' { + return (i + 1, Some(Token::StringLiteral(collect))); + } + + // Process escapes. + if symbol == b'\\' { + if let Some((_, escaped)) = iter.next() { + match escaped { + b'0' => collect.push(0 as char), + b't' => collect.push(9 as char), + b'n' => collect.push(10 as char), + b'r' => collect.push(13 as char), + b'\"' => collect.push(34 as char), + b'\'' => collect.push(39 as char), + b'\\' => collect.push(92 as char), + b'`' => collect.push(b'`' as char), // TODO: REMOVE LATER + // \x0F + b'x' => { + if let Some((_, first_hex)) = iter.next() { + // peak first symbol + if let Some((_, second_hex)) = iter.next() { + // peak second symbol + if let Ok(string) = std::str::from_utf8(&[*first_hex, *second_hex]) { + if let Ok(number) = u8::from_str_radix(&string, 16) { + if number <= 127 { + collect.push(number as char); + continue; + } + } + } + } + } + + return (0, None); + } + + // \u{1-6 hex digits} + b'u' => { + if let Some((start, open_brace)) = iter.next() { + if *open_brace == b'{' { + let mut unicode: Vec = Vec::new(); + + while let Some((end, symbol)) = iter.next() { + if end > start + 7 { + return (0, None); + } + + match *symbol { + 0..=9 | b'a'..=b'f' | b'A'..=b'F' => unicode.push(*symbol), + b'}' => { + if let Ok(string) = std::str::from_utf8(&unicode[..]) { + if let Some(character) = string.chars().next() { + collect.push(character); + break; + } + } + + return (0, None); + } + _ => { + return (0, None); + } + } + } + + continue; + } + } + + return (0, None); + } + _ => { + return (0, None); + } + } + continue; + } else { + return (0, None); + } + } + + collect.push(symbol as char); + + // Backslash is always escape - changes logic. + // if *symbol == b'\\' && !escaped { + // escaped = true; + // continue; + // } + + // // If last double quote is not escaped - end token. + // if *symbol == b'`' && !escaped { + // return (i + 1, Some(Token::StringLiteral(collect))); + // } + + // // If we need to escape, there are special symbols that get translated. + // if escaped { + // match *symbol { + // b'0' => collect.push(0 as char), + // b't' => collect.push(9 as char), + // b'n' => collect.push(10 as char), + // b'r' => collect.push(13 as char), + // b'\"' => collect.push(34 as char), + // b'\'' => collect.push(39 as char), + // b'\\' => collect.push(92 as char), + // _ => collect.push(*symbol as char) + // }; + + // escaped = false; + // continue; + // } + } + + (0, None) + } + /// /// Returns a new `StrTendril` string if an character can be eaten, otherwise returns [`None`]. /// @@ -236,14 +370,7 @@ impl Token { match input[0] { x if x.is_ascii_whitespace() => return (1, None), b'`' => { - let mut collect: Vec = Vec::new(); - for (i, char_bytes) in input.iter().enumerate().skip(1) { - if *char_bytes == b'`' { - return (i + 1, Some(Token::QuotedString(collect))); - } - - collect.push(std::char::from_u32(*char_bytes as u32).unwrap()); - } + return Self::eat_string(&input_tendril); } b'"' => { let mut i = 1; diff --git a/parser/src/tokenizer/token.rs b/parser/src/tokenizer/token.rs index 3a9e3026b4..1cab255843 100644 --- a/parser/src/tokenizer/token.rs +++ b/parser/src/tokenizer/token.rs @@ -42,7 +42,7 @@ pub enum Token { CommentLine(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), CommentBlock(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), FormatString(Vec), - QuotedString(Vec), + StringLiteral(Vec), Ident(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), Int(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), True, @@ -216,7 +216,7 @@ impl fmt::Display for Token { } write!(f, "\"") } - QuotedString(content) => { + StringLiteral(content) => { write!(f, "\"")?; for character in content { write!(f, "{}", character)?; From 2f9fde8a0f667e76f90554335ec17c83fff50f76 Mon Sep 17 00:00:00 2001 From: damirka Date: Wed, 19 May 2021 23:42:19 +0300 Subject: [PATCH 03/20] removes commented blocks --- parser/src/tokenizer/lexer.rs | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index f24e91b8f9..0367ac0c87 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -162,34 +162,6 @@ impl Token { } collect.push(symbol as char); - - // Backslash is always escape - changes logic. - // if *symbol == b'\\' && !escaped { - // escaped = true; - // continue; - // } - - // // If last double quote is not escaped - end token. - // if *symbol == b'`' && !escaped { - // return (i + 1, Some(Token::StringLiteral(collect))); - // } - - // // If we need to escape, there are special symbols that get translated. - // if escaped { - // match *symbol { - // b'0' => collect.push(0 as char), - // b't' => collect.push(9 as char), - // b'n' => collect.push(10 as char), - // b'r' => collect.push(13 as char), - // b'\"' => collect.push(34 as char), - // b'\'' => collect.push(39 as char), - // b'\\' => collect.push(92 as char), - // _ => collect.push(*symbol as char) - // }; - - // escaped = false; - // continue; - // } } (0, None) From 737985034b59a77496eac11f785d3a71fcb0f31a Mon Sep 17 00:00:00 2001 From: damirka Date: Wed, 19 May 2021 23:46:47 +0300 Subject: [PATCH 04/20] fix tests --- .../parser/parser/expression/string.leo.out | 98 ++++++++++++++++++- tests/parser/expression/string.leo | 13 ++- 2 files changed, 106 insertions(+), 5 deletions(-) diff --git a/tests/expectations/parser/parser/expression/string.leo.out b/tests/expectations/parser/parser/expression/string.leo.out index 4fc7fe22ef..5240d494bc 100644 --- a/tests/expectations/parser/parser/expression/string.leo.out +++ b/tests/expectations/parser/parser/expression/string.leo.out @@ -45,8 +45,6 @@ outputs: - Value: String: - - "{" - - " " - - "\\" - " " - "]" - " " @@ -58,6 +56,98 @@ outputs: - line_start: 1 line_stop: 1 col_start: 1 - col_stop: 14 + col_stop: 12 path: test - content: "`{ \\ ] [ ; a`" + content: "`{ ] [ ; a`" + - Value: + String: + - - a + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 13 + path: test + content: "`\\u{afafaf}`" + - Value: + String: + - - a + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 12 + path: test + content: "`\\u{afafa}`" + - Value: + String: + - - a + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 11 + path: test + content: "`\\u{afaf}`" + - Value: + String: + - - a + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 10 + path: test + content: "`\\u{afa}`" + - Value: + String: + - - a + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 9 + path: test + content: "`\\u{af}`" + - Value: + String: + - - a + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 8 + path: test + content: "`\\u{a}`" + - Value: + String: + - - "\n" + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 7 + path: test + content: "`\\x0A`" + - Value: + String: + - - a + - a + - " " + - "`" + - " " + - "\\" + - " " + - "\"" + - " " + - " " + - "\n" + - " " + - a + - a + - " " + - "\t" + - " " + - "\r" + - " " + - " " + - "\u0000" + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 31 + path: test + content: "`aa \\` \\\\ \\\" \\n aa \\t \\r \\0`" diff --git a/tests/parser/expression/string.leo b/tests/parser/expression/string.leo index efd8cec921..6af1a2ed34 100644 --- a/tests/parser/expression/string.leo +++ b/tests/parser/expression/string.leo @@ -7,4 +7,15 @@ expectation: Pass `another { } string` -`{ \ ] [ ; a` +`{ ] [ ; a` + +`\u{afafaf}` +`\u{afafa}` +`\u{afaf}` +`\u{afa}` +`\u{af}` +`\u{a}` + +`\x0A` + +`aa \` \\ \" \n aa \t \r \0` From ed03a385b58f5d698f824427219dc61ddca87971 Mon Sep 17 00:00:00 2001 From: damirka Date: Wed, 19 May 2021 23:59:54 +0300 Subject: [PATCH 05/20] fix unicode chars --- parser/src/tokenizer/lexer.rs | 14 ++++++++------ .../parser/parser/expression/string.leo.out | 16 ++++++++-------- tests/parser/expression/string.leo | 2 +- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index 0367ac0c87..b966d48591 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -120,7 +120,7 @@ impl Token { b'u' => { if let Some((start, open_brace)) = iter.next() { if *open_brace == b'{' { - let mut unicode: Vec = Vec::new(); + let mut characters: Vec = Vec::new(); while let Some((end, symbol)) = iter.next() { if end > start + 7 { @@ -128,12 +128,14 @@ impl Token { } match *symbol { - 0..=9 | b'a'..=b'f' | b'A'..=b'F' => unicode.push(*symbol), + 0..=9 | b'a'..=b'f' | b'A'..=b'F' => characters.push(*symbol), b'}' => { - if let Ok(string) = std::str::from_utf8(&unicode[..]) { - if let Some(character) = string.chars().next() { - collect.push(character); - break; + if let Ok(unicode_string) = std::str::from_utf8(&characters[..]) { + if let Ok(hex) = u32::from_str_radix(&unicode_string, 16) { + if let Some(unicode_char) = std::char::from_u32(hex) { + collect.push(unicode_char); + break; + } } } diff --git a/tests/expectations/parser/parser/expression/string.leo.out b/tests/expectations/parser/parser/expression/string.leo.out index 5240d494bc..70b43b4443 100644 --- a/tests/expectations/parser/parser/expression/string.leo.out +++ b/tests/expectations/parser/parser/expression/string.leo.out @@ -61,16 +61,16 @@ outputs: content: "`{ ] [ ; a`" - Value: String: - - - a + - - ࿺ - line_start: 1 line_stop: 1 col_start: 1 - col_stop: 13 + col_stop: 10 path: test - content: "`\\u{afafaf}`" + content: "`\\u{FFA}`" - Value: String: - - - a + - - 򯫺 - line_start: 1 line_stop: 1 col_start: 1 @@ -79,7 +79,7 @@ outputs: content: "`\\u{afafa}`" - Value: String: - - - a + - - 꾯 - line_start: 1 line_stop: 1 col_start: 1 @@ -88,7 +88,7 @@ outputs: content: "`\\u{afaf}`" - Value: String: - - - a + - - ૺ - line_start: 1 line_stop: 1 col_start: 1 @@ -97,7 +97,7 @@ outputs: content: "`\\u{afa}`" - Value: String: - - - a + - - ¯ - line_start: 1 line_stop: 1 col_start: 1 @@ -106,7 +106,7 @@ outputs: content: "`\\u{af}`" - Value: String: - - - a + - - "\n" - line_start: 1 line_stop: 1 col_start: 1 diff --git a/tests/parser/expression/string.leo b/tests/parser/expression/string.leo index 6af1a2ed34..47f75535df 100644 --- a/tests/parser/expression/string.leo +++ b/tests/parser/expression/string.leo @@ -9,7 +9,7 @@ expectation: Pass `{ ] [ ; a` -`\u{afafaf}` +`\u{FFA}` `\u{afafa}` `\u{afaf}` `\u{afa}` From f404c426eda27731be6f60b0186929d06f9ba488 Mon Sep 17 00:00:00 2001 From: damirka Date: Thu, 20 May 2021 00:33:39 +0300 Subject: [PATCH 06/20] fix comments --- ast/src/expression/value.rs | 4 ++-- parser/src/tokenizer/lexer.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ast/src/expression/value.rs b/ast/src/expression/value.rs index e153801cbd..09e190b96f 100644 --- a/ast/src/expression/value.rs +++ b/ast/src/expression/value.rs @@ -66,7 +66,7 @@ impl Node for ValueExpression { | Char(_, span) | Field(_, span) | Implicit(_, span) - | Integer(_, _, span) // => span, + | Integer(_, _, span) | String(_, span) => span, Group(group) => match &**group { GroupValue::Single(_, span) | GroupValue::Tuple(GroupTuple { span, .. }) => span, @@ -82,7 +82,7 @@ impl Node for ValueExpression { | Char(_, span) | Field(_, span) | Implicit(_, span) - | Integer(_, _, span) // => *span = new_span, + | Integer(_, _, span) | String(_, span) => *span = new_span, Group(group) => match &mut **group { GroupValue::Single(_, span) | GroupValue::Tuple(GroupTuple { span, .. }) => *span = new_span, diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index 96c6807bb6..da94586407 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -62,7 +62,7 @@ fn eat_identifier(input_tendril: &StrTendril) -> Option { impl Token { /// - /// Eats symbol. Used for string literals and char literals + /// Eats String. Returns Token::StringLiteral with processed contents of the string. /// fn eat_string(input_tendril: &StrTendril) -> (usize, Option) { if input_tendril.is_empty() { From 86fc23942ba85d3b16576c360039d146b12c4dbb Mon Sep 17 00:00:00 2001 From: damirka Date: Thu, 20 May 2021 00:36:40 +0300 Subject: [PATCH 07/20] more comment fixes --- parser/src/tokenizer/lexer.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index da94586407..d7d3512946 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -70,11 +70,7 @@ impl Token { } let input = input_tendril[..].as_bytes(); - - // let mut prev = b'`'; - // let mut escaped = false; let mut collect: Vec = Vec::new(); - let mut iter = input.iter().enumerate().skip(1); while let Some((i, symbol)) = iter.next() { @@ -95,13 +91,12 @@ impl Token { b'\"' => collect.push(34 as char), b'\'' => collect.push(39 as char), b'\\' => collect.push(92 as char), - b'`' => collect.push(b'`' as char), // TODO: REMOVE LATER - // \x0F + // \x0F - 2 HEX digits after \x b'x' => { + // get first symbol if let Some((_, first_hex)) = iter.next() { - // peak first symbol + // get second symbol if let Some((_, second_hex)) = iter.next() { - // peak second symbol if let Ok(string) = std::str::from_utf8(&[*first_hex, *second_hex]) { if let Ok(number) = u8::from_str_radix(&string, 16) { if number <= 127 { @@ -158,9 +153,9 @@ impl Token { } } continue; - } else { - return (0, None); } + + return (0, None); } collect.push(symbol as char); From 4fe91f0f391d938232f8ad89f293b89d5ab4f789 Mon Sep 17 00:00:00 2001 From: gluax Date: Thu, 20 May 2021 12:39:36 -0400 Subject: [PATCH 08/20] string canonicalization to char array --- ast/src/reducer/canonicalization.rs | 14 ++++++++++++++ ast/src/reducer/reconstructing_director.rs | 13 +++++++++---- ast/src/reducer/reconstructing_reducer.rs | 13 ++++++++----- compiler/src/phases/reducing_director.rs | 8 +++----- parser/src/tokenizer/lexer.rs | 2 +- 5 files changed, 35 insertions(+), 15 deletions(-) diff --git a/ast/src/reducer/canonicalization.rs b/ast/src/reducer/canonicalization.rs index 7237d88d69..674c55ac5f 100644 --- a/ast/src/reducer/canonicalization.rs +++ b/ast/src/reducer/canonicalization.rs @@ -485,6 +485,20 @@ impl ReconstructingReducer for Canonicalizer { } } + fn reduce_string(&mut self, string: &[char], span: &Span) -> Result { + let mut elements = Vec::new(); + for character in string { + elements.push(SpreadOrExpression::Expression(Expression::Value( + ValueExpression::Char(*character, span.clone()), + ))); + } + + Ok(Expression::ArrayInline(ArrayInlineExpression { + elements, + span: span.clone(), + })) + } + fn reduce_array_init( &mut self, array_init: &ArrayInitExpression, diff --git a/ast/src/reducer/reconstructing_director.rs b/ast/src/reducer/reconstructing_director.rs index 942eaec3aa..bb727b4186 100644 --- a/ast/src/reducer/reconstructing_director.rs +++ b/ast/src/reducer/reconstructing_director.rs @@ -51,7 +51,7 @@ impl ReconstructingDirector { pub fn reduce_expression(&mut self, expression: &Expression) -> Result { let new = match expression { Expression::Identifier(identifier) => Expression::Identifier(self.reduce_identifier(&identifier)?), - Expression::Value(value) => Expression::Value(self.reduce_value(&value)?), + Expression::Value(value) => self.reduce_value(&value)?, Expression::Binary(binary) => Expression::Binary(self.reduce_binary(&binary)?), Expression::Unary(unary) => Expression::Unary(self.reduce_unary(&unary)?), Expression::Ternary(ternary) => Expression::Ternary(self.reduce_ternary(&ternary)?), @@ -100,12 +100,17 @@ impl ReconstructingDirector { self.reducer.reduce_group_value(group_value, new) } - pub fn reduce_value(&mut self, value: &ValueExpression) -> Result { + pub fn reduce_string(&mut self, string: &[char], span: &Span) -> Result { + self.reducer.reduce_string(string, span) + } + + pub fn reduce_value(&mut self, value: &ValueExpression) -> Result { let new = match value { ValueExpression::Group(group_value) => { - ValueExpression::Group(Box::new(self.reduce_group_value(&group_value)?)) + Expression::Value(ValueExpression::Group(Box::new(self.reduce_group_value(&group_value)?))) } - _ => value.clone(), + ValueExpression::String(string, span) => self.reduce_string(&string, &span)?, + _ => Expression::Value(value.clone()), }; self.reducer.reduce_value(value, new) diff --git a/ast/src/reducer/reconstructing_reducer.rs b/ast/src/reducer/reconstructing_reducer.rs index 36c8972728..0af238d2b8 100644 --- a/ast/src/reducer/reconstructing_reducer.rs +++ b/ast/src/reducer/reconstructing_reducer.rs @@ -51,11 +51,14 @@ pub trait ReconstructingReducer { Ok(new) } - fn reduce_value( - &mut self, - _value: &ValueExpression, - new: ValueExpression, - ) -> Result { + fn reduce_string(&mut self, string: &[char], span: &Span) -> Result { + Ok(Expression::Value(ValueExpression::String( + string.to_vec(), + span.clone(), + ))) + } + + fn reduce_value(&mut self, _value: &ValueExpression, new: Expression) -> Result { Ok(new) } diff --git a/compiler/src/phases/reducing_director.rs b/compiler/src/phases/reducing_director.rs index 0bec948eb3..0138ca6d67 100644 --- a/compiler/src/phases/reducing_director.rs +++ b/compiler/src/phases/reducing_director.rs @@ -152,9 +152,7 @@ impl CombineAstAsgDirector { asg: &AsgExpression, ) -> Result { let new = match (ast, asg) { - (AstExpression::Value(value), AsgExpression::Constant(const_)) => { - AstExpression::Value(self.reduce_value(&value, &const_)?) - } + (AstExpression::Value(value), AsgExpression::Constant(const_)) => self.reduce_value(&value, &const_)?, (AstExpression::Binary(ast), AsgExpression::Binary(asg)) => { AstExpression::Binary(self.reduce_binary(&ast, &asg)?) } @@ -404,7 +402,7 @@ impl CombineAstAsgDirector { self.ast_reducer.reduce_unary(ast, inner, ast.op.clone()) } - pub fn reduce_value(&mut self, ast: &ValueExpression, asg: &AsgConstant) -> Result { + pub fn reduce_value(&mut self, ast: &ValueExpression, asg: &AsgConstant) -> Result { let mut new = ast.clone(); if self.options.type_inference_enabled() { @@ -444,7 +442,7 @@ impl CombineAstAsgDirector { } } - self.ast_reducer.reduce_value(ast, new) + self.ast_reducer.reduce_value(ast, AstExpression::Value(new)) } pub fn reduce_variable_ref( diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index d7d3512946..5f77eefb72 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -154,7 +154,7 @@ impl Token { } continue; } - + return (0, None); } From c4a1bd69557f00aa29afceebf6caad91396f8c05 Mon Sep 17 00:00:00 2001 From: gluax Date: Thu, 20 May 2021 12:50:49 -0400 Subject: [PATCH 09/20] merge and print strings like strings, rather than arrays --- compiler/src/value/value.rs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/compiler/src/value/value.rs b/compiler/src/value/value.rs index 1bb94ed34b..645fb5555f 100644 --- a/compiler/src/value/value.rs +++ b/compiler/src/value/value.rs @@ -99,14 +99,22 @@ impl<'a, F: PrimeField, G: GroupType> fmt::Display for ConstrainedValue<'a, F // Data type wrappers ConstrainedValue::Array(ref array) => { - write!(f, "[")?; - for (i, e) in array.iter().enumerate() { - write!(f, "{}", e)?; - if i < array.len() - 1 { - write!(f, ", ")?; + if matches!(array[0], ConstrainedValue::Char(_)) { + for character in array { + write!(f, "{}", character)?; } + + Ok(()) + } else { + write!(f, "[")?; + for (i, e) in array.iter().enumerate() { + write!(f, "{}", e)?; + if i < array.len() - 1 { + write!(f, ", ")?; + } + } + write!(f, "]") } - write!(f, "]") } ConstrainedValue::Tuple(ref tuple) => { let values = tuple.iter().map(|x| x.to_string()).collect::>().join(", "); From ca59ff3177ae34ceba2151b977d9fbe398583684 Mon Sep 17 00:00:00 2001 From: gluax Date: Thu, 20 May 2021 14:30:12 -0400 Subject: [PATCH 10/20] tests for now should be changed to use input strings when they are in --- compiler/tests/canonicalization/mod.rs | 13 + .../string_transformation.json | 310 ++++++++++++++++++ .../string_transformation.leo | 3 + compiler/tests/type_inference/basic.json | 278 +++++++++++++++- compiler/tests/type_inference/basic.leo | 1 + .../global_consts/global_const_types.leo | 6 +- tests/compiler/string/circuit.leo | 22 ++ tests/compiler/string/equality.leo | 11 + tests/compiler/string/inputs/string.in | 6 + tests/compiler/string/inputs/string_out.in | 6 + .../compiler/compiler/string/circuit.leo.out | 18 + .../compiler/compiler/string/equality.leo.out | 18 + 12 files changed, 688 insertions(+), 4 deletions(-) create mode 100644 compiler/tests/canonicalization/string_transformation.json create mode 100644 compiler/tests/canonicalization/string_transformation.leo create mode 100644 tests/compiler/string/circuit.leo create mode 100644 tests/compiler/string/equality.leo create mode 100644 tests/compiler/string/inputs/string.in create mode 100644 tests/compiler/string/inputs/string_out.in create mode 100644 tests/expectations/compiler/compiler/string/circuit.leo.out create mode 100644 tests/expectations/compiler/compiler/string/equality.leo.out diff --git a/compiler/tests/canonicalization/mod.rs b/compiler/tests/canonicalization/mod.rs index f420b2370c..5a86dcf4b2 100644 --- a/compiler/tests/canonicalization/mod.rs +++ b/compiler/tests/canonicalization/mod.rs @@ -95,3 +95,16 @@ fn test_illegal_array_range_fail() { let program = parse_program(program_string); assert!(program.is_err()); } + +#[test] +fn test_string_transformation() { + let program_string = include_str!("string_transformation.leo"); + let program = parse_program(program_string).unwrap(); + assert_satisfied(program); + + let ast = parse_program_ast(program_string); + let expected_json = include_str!("string_transformation.json"); + let expected_ast: Ast = Ast::from_json_string(expected_json).expect("Unable to parse json."); + + assert_eq!(expected_ast, ast); +} diff --git a/compiler/tests/canonicalization/string_transformation.json b/compiler/tests/canonicalization/string_transformation.json new file mode 100644 index 0000000000..03744195c0 --- /dev/null +++ b/compiler/tests/canonicalization/string_transformation.json @@ -0,0 +1,310 @@ +{ + "name": "", + "expected_input": [], + "imports": [], + "circuits": {}, + "global_consts": {}, + "functions": { + "{\"name\":\"main\",\"span\":\"{\\\"line_start\\\":1,\\\"line_stop\\\":1,\\\"col_start\\\":10,\\\"col_stop\\\":14,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\"function main() {\\\"}\"}": { + "annotations": [], + "identifier": "{\"name\":\"main\",\"span\":\"{\\\"line_start\\\":1,\\\"line_stop\\\":1,\\\"col_start\\\":10,\\\"col_stop\\\":14,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\"function main() {\\\"}\"}", + "input": [], + "output": { + "Tuple": [] + }, + "block": { + "statements": [ + { + "Definition": { + "declaration_type": "Let", + "variable_names": [ + { + "mutable": true, + "identifier": "{\"name\":\"s\",\"span\":\"{\\\"line_start\\\":2,\\\"line_stop\\\":2,\\\"col_start\\\":9,\\\"col_stop\\\":10,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\" let s = `Hello, World!`;\\\"}\"}", + "span": { + "line_start": 2, + "line_stop": 2, + "col_start": 9, + "col_stop": 10, + "path": "", + "content": " let s = `Hello, World!`;" + } + } + ], + "type_": { + "Array": [ + "Char", + [ + { + "value": "13" + } + ] + ] + }, + "value": { + "ArrayInline": { + "elements": [ + { + "Expression": { + "Value": { + "Char": [ + "H", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "e", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "l", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "l", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "o", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + ",", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + " ", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "W", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "o", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "r", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "l", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "d", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "!", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = `Hello, World!`;" + } + ] + } + } + } + ], + "span": { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = `Hello, World!`;" + } + } + }, + "span": { + "line_start": 2, + "line_stop": 2, + "col_start": 5, + "col_stop": 28, + "path": "", + "content": " let s = `Hello, World!`;" + } + } + } + ], + "span": { + "line_start": 1, + "line_stop": 3, + "col_start": 17, + "col_stop": 2, + "path": "", + "content": "function main() {\n...\n}" + } + }, + "span": { + "line_start": 1, + "line_stop": 3, + "col_start": 1, + "col_stop": 2, + "path": "", + "content": "function main() {\n...\n}" + } + } + } + } + \ No newline at end of file diff --git a/compiler/tests/canonicalization/string_transformation.leo b/compiler/tests/canonicalization/string_transformation.leo new file mode 100644 index 0000000000..0f2e78e1a9 --- /dev/null +++ b/compiler/tests/canonicalization/string_transformation.leo @@ -0,0 +1,3 @@ +function main() { + let s = `Hello, World!`; +} \ No newline at end of file diff --git a/compiler/tests/type_inference/basic.json b/compiler/tests/type_inference/basic.json index 2c6b796cb9..fdd3bf2e15 100644 --- a/compiler/tests/type_inference/basic.json +++ b/compiler/tests/type_inference/basic.json @@ -1027,11 +1027,283 @@ "content": " const n = 'a';" } } + }, + { + "Definition": { + "declaration_type": "Const", + "variable_names": [ + { + "mutable": false, + "identifier": "{\"name\":\"o\",\"span\":\"{\\\"line_start\\\":24,\\\"line_stop\\\":24,\\\"col_start\\\":9,\\\"col_stop\\\":10,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\" const o = `Hello, World!`;\\\"}\"}", + "span": { + "line_start": 24, + "line_stop": 24, + "col_start": 9, + "col_stop": 10, + "path": "", + "content": " const o = `Hello, World!`;" + } + } + ], + "type_": { + "Array": [ + "Char", + [ + { + "value": "13" + } + ] + ] + }, + "value": { + "ArrayInline": { + "elements": [ + { + "Expression": { + "Value": { + "Char": [ + "H", + { + "line_start": 24, + "line_stop": 24, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " const o = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "e", + { + "line_start": 24, + "line_stop": 24, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " const o = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "l", + { + "line_start": 24, + "line_stop": 24, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " const o = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "l", + { + "line_start": 24, + "line_stop": 24, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " const o = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "o", + { + "line_start": 24, + "line_stop": 24, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " const o = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + ",", + { + "line_start": 24, + "line_stop": 24, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " const o = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + " ", + { + "line_start": 24, + "line_stop": 24, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " const o = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "W", + { + "line_start": 24, + "line_stop": 24, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " const o = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "o", + { + "line_start": 24, + "line_stop": 24, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " const o = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "r", + { + "line_start": 24, + "line_stop": 24, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " const o = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "l", + { + "line_start": 24, + "line_stop": 24, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " const o = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "d", + { + "line_start": 24, + "line_stop": 24, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " const o = `Hello, World!`;" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "!", + { + "line_start": 24, + "line_stop": 24, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " const o = `Hello, World!`;" + } + ] + } + } + } + ], + "span": { + "line_start": 24, + "line_stop": 24, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " const o = `Hello, World!`;" + } + } + }, + "span": { + "line_start": 24, + "line_stop": 24, + "col_start": 3, + "col_stop": 28, + "path": "", + "content": " const o = `Hello, World!`;" + } + } } ], "span": { "line_start": 9, - "line_stop": 24, + "line_stop": 25, "col_start": 17, "col_stop": 2, "path": "", @@ -1040,11 +1312,11 @@ }, "span": { "line_start": 9, - "line_stop": 24, + "line_stop": 25, "col_start": 1, "col_stop": 2, "path": "", - "content": "function main() {\n...\n}\n\n\n\n\n\n\n\n\n\n\n\n\n" + "content": "function main() {\n...\n}\n\n\n\n\n\n\n\n\n\n\n\n\n\n" } } } diff --git a/compiler/tests/type_inference/basic.leo b/compiler/tests/type_inference/basic.leo index 330daeae20..f3dae7a734 100644 --- a/compiler/tests/type_inference/basic.leo +++ b/compiler/tests/type_inference/basic.leo @@ -21,4 +21,5 @@ function main() { const l = (1u8, 1u8, true); const m = Foo {}; const n = 'a'; + const o = `Hello, World!`; } \ No newline at end of file diff --git a/tests/compiler/global_consts/global_const_types.leo b/tests/compiler/global_consts/global_const_types.leo index 5b454dbfc7..5fba7a9316 100644 --- a/tests/compiler/global_consts/global_const_types.leo +++ b/tests/compiler/global_consts/global_const_types.leo @@ -20,6 +20,8 @@ const field_test: field = 2; const use_another_const = basic + 1; const foo = Foo { width: 10, height: 20 }; const uno = uno(); +const character = 'a'; +const hello = `Hello, World!`; circuit Foo { width: u32, @@ -47,5 +49,7 @@ function main(a: u32) -> bool { && use_another_const == 9u32 // use another const test && foo.width == 10u32 // circuit test && foo.height == 20u32 - && uno == 1u32; // function test + && uno == 1u32 // function test + && character == 'a' // char test + && hello == `Hello, World!`; } diff --git a/tests/compiler/string/circuit.leo b/tests/compiler/string/circuit.leo new file mode 100644 index 0000000000..43326849f2 --- /dev/null +++ b/tests/compiler/string/circuit.leo @@ -0,0 +1,22 @@ +/* +namespace: Compile +expectation: Pass +input_file: + - inputs/string_out.in +*/ + +circuit Foo { + s1: [char; 13]; +} + +function takes_string(s: [char; 13]) -> bool { + return s == `Hello, World!`; +} + +function main(s1: [char; 13]) -> [char; 13] { + let f = Foo { s1 }; + let b = takes_string(s1); + + let result = f.s1 == `Hello, World!` ? s1 : `abcdefghjklmn`; + return result; +} \ No newline at end of file diff --git a/tests/compiler/string/equality.leo b/tests/compiler/string/equality.leo new file mode 100644 index 0000000000..460941b18b --- /dev/null +++ b/tests/compiler/string/equality.leo @@ -0,0 +1,11 @@ +/* +namespace: Compile +expectation: Pass +input_file: + - inputs/string.in +*/ + +function main(s1: [char; 13], s2: [char; 4]) -> bool { + let hello: [char; 13] = `Hello, World!`; + return hello == s1 && `nope` != s2; +} \ No newline at end of file diff --git a/tests/compiler/string/inputs/string.in b/tests/compiler/string/inputs/string.in new file mode 100644 index 0000000000..c0d4b5605b --- /dev/null +++ b/tests/compiler/string/inputs/string.in @@ -0,0 +1,6 @@ +[main] +s1: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', '\u{20}', 'W', 'o', 'r', 'l', 'd', '!']; +s2: [char; 4] = ['t', 'e', 's', 't']; + +[registers] +out: bool = true; \ No newline at end of file diff --git a/tests/compiler/string/inputs/string_out.in b/tests/compiler/string/inputs/string_out.in new file mode 100644 index 0000000000..e3417677af --- /dev/null +++ b/tests/compiler/string/inputs/string_out.in @@ -0,0 +1,6 @@ +[main] +s1: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', '\u{20}', 'W', 'o', 'r', 'l', 'd', '!']; +s2: [char; 4] = ['t', 'e', 's', 't']; + +[registers] +out: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', '\u{20}', 'W', 'o', 'r', 'l', 'd', '!']; \ No newline at end of file diff --git a/tests/expectations/compiler/compiler/string/circuit.leo.out b/tests/expectations/compiler/compiler/string/circuit.leo.out new file mode 100644 index 0000000000..b3a7b67aa3 --- /dev/null +++ b/tests/expectations/compiler/compiler/string/circuit.leo.out @@ -0,0 +1,18 @@ +--- +namespace: Compile +expectation: Pass +outputs: + - circuit: + num_public_variables: 0 + num_private_variables: 141 + num_constraints: 115 + at: 145ada587c833434abb89c3349d19e06365fda3eb9b2a227046a78469e3ca313 + bt: f2945a3bc1beaee407bb4ec35303115a93a8c68886d97011cd65ec6d899664e8 + ct: 10b997b6341b3cf811cb7b0fdb891f91006d41c50e9f9566ff92f92816153dfc + output: + - input_file: inputs/string_out.in + output: + registers: + out: + type: "[char; 13]" + value: "Hello, World!" diff --git a/tests/expectations/compiler/compiler/string/equality.leo.out b/tests/expectations/compiler/compiler/string/equality.leo.out new file mode 100644 index 0000000000..21cc0616d7 --- /dev/null +++ b/tests/expectations/compiler/compiler/string/equality.leo.out @@ -0,0 +1,18 @@ +--- +namespace: Compile +expectation: Pass +outputs: + - circuit: + num_public_variables: 0 + num_private_variables: 84 + num_constraints: 67 + at: da464aeb42d53f56ff26141c802d2a769477763766c5746e603c5326b01790bb + bt: 6b03d4cb03e7bf9cf8ec746ee3410578d8ac51a29e56f9090d8e27a4ddf16c64 + ct: ebcd3f740af33d9c3ab2c5e4189709be8d73fab149e788734705cad488a4208c + output: + - input_file: inputs/string.in + output: + registers: + out: + type: bool + value: "true" From c8e63a21d6d83f35326c4c46a5c836a44884d5af Mon Sep 17 00:00:00 2001 From: gluax Date: Thu, 20 May 2021 20:10:00 -0400 Subject: [PATCH 11/20] change pest according to suggestion fixes it --- input/src/leo-input.pest | 2 +- tests/compiler/string/inputs/string.in | 2 +- tests/compiler/string/inputs/string_out.in | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/input/src/leo-input.pest b/input/src/leo-input.pest index a500de21fb..b072fb63d8 100644 --- a/input/src/leo-input.pest +++ b/input/src/leo-input.pest @@ -148,7 +148,7 @@ char_types = { } // Declared in values/char_value.rs -value_char = { "\'" ~ char_types ~ "\'" } +value_char = ${ "\'" ~ char_types ~ "\'" } // Declared in values/integer_value.rs value_integer = { value_integer_signed | value_integer_unsigned} diff --git a/tests/compiler/string/inputs/string.in b/tests/compiler/string/inputs/string.in index c0d4b5605b..8787f7f0a3 100644 --- a/tests/compiler/string/inputs/string.in +++ b/tests/compiler/string/inputs/string.in @@ -1,5 +1,5 @@ [main] -s1: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', '\u{20}', 'W', 'o', 'r', 'l', 'd', '!']; +s1: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', ' ', 'W', 'o', 'r', 'l', 'd', '!']; s2: [char; 4] = ['t', 'e', 's', 't']; [registers] diff --git a/tests/compiler/string/inputs/string_out.in b/tests/compiler/string/inputs/string_out.in index e3417677af..be00dfe2c0 100644 --- a/tests/compiler/string/inputs/string_out.in +++ b/tests/compiler/string/inputs/string_out.in @@ -1,6 +1,6 @@ [main] -s1: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', '\u{20}', 'W', 'o', 'r', 'l', 'd', '!']; +s1: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', ' ', 'W', 'o', 'r', 'l', 'd', '!']; s2: [char; 4] = ['t', 'e', 's', 't']; [registers] -out: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', '\u{20}', 'W', 'o', 'r', 'l', 'd', '!']; \ No newline at end of file +out: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', ' ', 'W', 'o', 'r', 'l', 'd', '!']; \ No newline at end of file From ddbdd485d23b695b7d81249d95764f3e873e81b4 Mon Sep 17 00:00:00 2001 From: gluax Date: Sat, 22 May 2021 10:41:54 -0400 Subject: [PATCH 12/20] Revert "change pest according to suggestion fixes it" This reverts commit c8e63a21d6d83f35326c4c46a5c836a44884d5af. --- input/src/leo-input.pest | 2 +- tests/compiler/string/inputs/string.in | 2 +- tests/compiler/string/inputs/string_out.in | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/input/src/leo-input.pest b/input/src/leo-input.pest index b072fb63d8..a500de21fb 100644 --- a/input/src/leo-input.pest +++ b/input/src/leo-input.pest @@ -148,7 +148,7 @@ char_types = { } // Declared in values/char_value.rs -value_char = ${ "\'" ~ char_types ~ "\'" } +value_char = { "\'" ~ char_types ~ "\'" } // Declared in values/integer_value.rs value_integer = { value_integer_signed | value_integer_unsigned} diff --git a/tests/compiler/string/inputs/string.in b/tests/compiler/string/inputs/string.in index 8787f7f0a3..c0d4b5605b 100644 --- a/tests/compiler/string/inputs/string.in +++ b/tests/compiler/string/inputs/string.in @@ -1,5 +1,5 @@ [main] -s1: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', ' ', 'W', 'o', 'r', 'l', 'd', '!']; +s1: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', '\u{20}', 'W', 'o', 'r', 'l', 'd', '!']; s2: [char; 4] = ['t', 'e', 's', 't']; [registers] diff --git a/tests/compiler/string/inputs/string_out.in b/tests/compiler/string/inputs/string_out.in index be00dfe2c0..e3417677af 100644 --- a/tests/compiler/string/inputs/string_out.in +++ b/tests/compiler/string/inputs/string_out.in @@ -1,6 +1,6 @@ [main] -s1: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', ' ', 'W', 'o', 'r', 'l', 'd', '!']; +s1: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', '\u{20}', 'W', 'o', 'r', 'l', 'd', '!']; s2: [char; 4] = ['t', 'e', 's', 't']; [registers] -out: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', ' ', 'W', 'o', 'r', 'l', 'd', '!']; \ No newline at end of file +out: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', '\u{20}', 'W', 'o', 'r', 'l', 'd', '!']; \ No newline at end of file From efc83205f202f282f694aaef8b31fe6602bbbfab Mon Sep 17 00:00:00 2001 From: gluax Date: Sat, 22 May 2021 15:53:30 -0400 Subject: [PATCH 13/20] string parsing leverages eat_char --- input/src/leo-input.pest | 2 +- parser/src/tokenizer/lexer.rs | 116 ++++++++++++++++++++++++++++------ 2 files changed, 98 insertions(+), 20 deletions(-) diff --git a/input/src/leo-input.pest b/input/src/leo-input.pest index a500de21fb..b072fb63d8 100644 --- a/input/src/leo-input.pest +++ b/input/src/leo-input.pest @@ -148,7 +148,7 @@ char_types = { } // Declared in values/char_value.rs -value_char = { "\'" ~ char_types ~ "\'" } +value_char = ${ "\'" ~ char_types ~ "\'" } // Declared in values/integer_value.rs value_integer = { value_integer_signed | value_integer_unsigned} diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index b61c09ce51..9da887ac15 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -62,10 +62,12 @@ fn eat_identifier(input_tendril: &StrTendril) -> Option { impl Token { /// - /// Returns a new `Token::CharLit` if an character can be eaten, otherwise returns [`None`]. + /// Returns a `char` if an character can be eaten, otherwise returns [`None`]. /// - fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Option { + fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Option { + println!("it {} e {} h {} u {}", input_tendril, escaped, hex, unicode); if input_tendril.is_empty() { + println!("ne"); return None; } @@ -79,13 +81,13 @@ impl Token { if let Some(character) = escaped.chars().next() { return match character { - '0' => Some(Token::CharLit(0 as char)), - 't' => Some(Token::CharLit(9 as char)), - 'n' => Some(Token::CharLit(10 as char)), - 'r' => Some(Token::CharLit(13 as char)), - '\"' => Some(Token::CharLit(34 as char)), - '\'' => Some(Token::CharLit(39 as char)), - '\\' => Some(Token::CharLit(92 as char)), + '0' => Some(0 as char), + 't' => Some(9 as char), + 'n' => Some(10 as char), + 'r' => Some(13 as char), + '\"' => Some(34 as char), + '\'' => Some(39 as char), + '\\' => Some(92 as char), _ => None, }; } else { @@ -102,7 +104,7 @@ impl Token { } if let Ok(ascii_number) = u8::from_str_radix(&hex_string, 16) { - return Some(Token::CharLit(ascii_number as char)); + return Some(ascii_number as char); } } @@ -112,13 +114,14 @@ impl Token { if let Ok(hex) = u32::from_str_radix(&unicode_number, 16) { if let Some(character) = std::char::from_u32(hex) { - return Some(Token::CharLit(character)); + return Some(character); } } } + println!("itcs {:?}", input_tendril.to_string().chars()); if let Some(character) = input_tendril.to_string().chars().next() { - return Some(Token::CharLit(character)); + return Some(character); } None @@ -168,6 +171,84 @@ impl Token { let input = input_tendril[..].as_bytes(); match input[0] { x if x.is_ascii_whitespace() => return (1, None), + b'`' => { + let mut i = 1; + let mut len: u32 = 1; + let mut start = 1; + let mut in_escape = false; + let mut escaped = false; + let mut hex = false; + let mut unicode = false; + let mut end = false; + let mut string = Vec::new(); + + while i < input.len() { + if !in_escape { + if input[i] == b'`' { + end = true; + break; + } else if input[i] == b'\\' { + in_escape = true; + start = i; + i += 1; + continue; + } + } else { + len += 1; + + match input[i] { + b'x' => { + hex = true; + } + b'u' => { + unicode = true; + } + b'}' if unicode => { + in_escape = false; + } + _ if !hex && !unicode => { + escaped = true; + in_escape = false; + } + _ if hex && len == 4 => { + println!("len 4"); + in_escape = false; + } + _ => {} + } + } + + if !in_escape { + match Self::eat_char( + input_tendril.subtendril(start as u32, len as u32), + escaped, + hex, + unicode, + ) { + Some(character) => { + len = 1; + escaped = false; + hex = false; + unicode = false; + string.push(character); + } + None => return (0, None), + } + } + + i += 1; + + if !escaped && !hex && !unicode { + start = i; + } + } + + if !end { + return (0, None); + } + + return (i + 1, Some(Token::StringLiteral(string))); + } b'"' => { let mut i = 1; let mut in_escape = false; @@ -248,13 +329,10 @@ impl Token { return (0, None); } - let result = Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode); - - if result.is_none() { - return (0, None); - } - - return (i + 1, result); + return match Self::eat_char(input_tendril.subtendril(1, (i - 1) as u32), escaped, hex, unicode) { + Some(character) => (i + 1, Some(Token::CharLit(character))), + None => (0, None), + }; } x if x.is_ascii_digit() => { return Self::eat_integer(&input_tendril); From 8cb1dc6e30dc34075e210ca9d72ee67b9291a139 Mon Sep 17 00:00:00 2001 From: gluax Date: Sat, 22 May 2021 15:57:25 -0400 Subject: [PATCH 14/20] remove debug stmts --- parser/src/tokenizer/lexer.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index 9da887ac15..b85df00bab 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -65,9 +65,7 @@ impl Token { /// Returns a `char` if an character can be eaten, otherwise returns [`None`]. /// fn eat_char(input_tendril: StrTendril, escaped: bool, hex: bool, unicode: bool) -> Option { - println!("it {} e {} h {} u {}", input_tendril, escaped, hex, unicode); if input_tendril.is_empty() { - println!("ne"); return None; } @@ -119,7 +117,6 @@ impl Token { } } - println!("itcs {:?}", input_tendril.to_string().chars()); if let Some(character) = input_tendril.to_string().chars().next() { return Some(character); } @@ -211,7 +208,6 @@ impl Token { in_escape = false; } _ if hex && len == 4 => { - println!("len 4"); in_escape = false; } _ => {} From 1c57eb41961bcd751cf492e1cfe8b03d1ef18efe Mon Sep 17 00:00:00 2001 From: gluax Date: Sat, 22 May 2021 17:57:05 -0400 Subject: [PATCH 15/20] input string syntax --- ast/src/input/input_value.rs | 50 +++++- input/src/errors/parser.rs | 15 ++ input/src/expressions/expression.rs | 3 + input/src/expressions/mod.rs | 3 + input/src/expressions/string_expression.rs | 8 +- input/src/leo-input.pest | 4 + input/src/values/char_types.rs | 26 ++- parser/src/tokenizer/lexer.rs | 2 +- tests/compiler/string/circuit.leo | 2 +- tests/compiler/string/equality.leo | 1 + tests/compiler/string/inputs/string.in | 4 +- tests/compiler/string/inputs/string_out.in | 6 +- tests/compiler/string/inputs/weird.in | 6 + .../expression/literal/char_fail.leo.out | 2 +- .../expression/literal/string copy.leo.out | 151 ++++++++++++++++++ .../parser/expression/literal/string.leo.out | 15 ++ .../expression/literal/string_fail.leo.out | 6 + .../expression/literal/string_parse.leo.out | 151 ++++++++++++++++++ tests/parser/expression/literal/char_fail.leo | 2 +- tests/parser/expression/literal/string.leo | 21 +++ .../parser/expression/literal/string_fail.leo | 8 + .../{string.leo => literal/string_parse.leo} | 0 22 files changed, 470 insertions(+), 16 deletions(-) create mode 100644 tests/compiler/string/inputs/weird.in create mode 100644 tests/expectations/parser/parser/expression/literal/string copy.leo.out create mode 100644 tests/expectations/parser/parser/expression/literal/string.leo.out create mode 100644 tests/expectations/parser/parser/expression/literal/string_fail.leo.out create mode 100644 tests/expectations/parser/parser/expression/literal/string_parse.leo.out create mode 100644 tests/parser/expression/literal/string.leo create mode 100644 tests/parser/expression/literal/string_fail.leo rename tests/parser/expression/{string.leo => literal/string_parse.leo} (100%) diff --git a/ast/src/input/input_value.rs b/ast/src/input/input_value.rs index b076ae6428..a90f6fb85f 100644 --- a/ast/src/input/input_value.rs +++ b/ast/src/input/input_value.rs @@ -17,7 +17,7 @@ use crate::{ArrayDimensions, GroupValue}; use leo_input::{ errors::InputParserError, - expressions::{ArrayInitializerExpression, ArrayInlineExpression, Expression, TupleExpression}, + expressions::{ArrayInitializerExpression, ArrayInlineExpression, Expression, StringExpression, TupleExpression}, types::{ArrayType, DataType, IntegerType, TupleType, Type}, values::{ Address, @@ -115,11 +115,59 @@ impl InputValue { (Type::Array(array_type), Expression::ArrayInitializer(initializer)) => { InputValue::from_array_initializer(array_type, initializer) } + (Type::Array(array_type), Expression::StringExpression(string)) => { + InputValue::from_string(array_type, string) + } (Type::Tuple(tuple_type), Expression::Tuple(tuple)) => InputValue::from_tuple(tuple_type, tuple), (type_, expression) => Err(InputParserError::expression_type_mismatch(type_, expression)), } } + /// + /// Returns a new `InputValue` from the given `ArrayType` and `StringExpression`. + /// + pub(crate) fn from_string(mut array_type: ArrayType, string: StringExpression) -> Result { + // Create a new `ArrayDimensions` type from the input array_type dimensions. + let array_dimensions_type = ArrayDimensions::from(array_type.dimensions.clone()); + + // Convert the array dimensions to usize. + let array_dimensions = parse_array_dimensions(array_dimensions_type, &array_type.span)?; + + // Return an error if the outer array dimension does not equal the number of array elements. + if array_dimensions[0] != string.chars.len() { + return Err(InputParserError::invalid_string_length( + array_dimensions[0], + string.chars.len(), + &string.span, + )); + } + + array_type.dimensions = array_type.dimensions.next_dimension(); + + let inner_array_type = if array_dimensions.len() == 1 { + // This is a single array + *array_type.type_ + } else { + // This is a multi-dimensional array + return Err(InputParserError::invalid_string_dimensions(&array_type.span)); + }; + + let mut elements = Vec::with_capacity(string.chars.len()); + for character in string.chars.into_iter() { + let element = InputValue::from_expression( + inner_array_type.clone(), + Expression::Value(Value::Char(CharValue { + value: character.clone(), + span: character.span().clone(), + })), + )?; + + elements.push(element) + } + + Ok(InputValue::Array(elements)) + } + /// /// Returns a new `InputValue` from the given `ArrayType` and `ArrayInlineExpression`. /// diff --git a/input/src/errors/parser.rs b/input/src/errors/parser.rs index 093c55dc8c..5776856942 100644 --- a/input/src/errors/parser.rs +++ b/input/src/errors/parser.rs @@ -89,6 +89,21 @@ impl InputParserError { Self::new_from_span(message, span) } + pub fn invalid_string_dimensions(span: &Span) -> Self { + let message = "String type defintion of a char array should not be multi-dimensional".to_string(); + + Self::new_from_span(message, span) + } + + pub fn invalid_string_length(expected: usize, received: usize, span: &Span) -> Self { + let message = format!( + "Expected size of char array `{}` to match string size instead received `{}`", + expected, received + ); + + Self::new_from_span(message, span) + } + pub fn implicit_type(data_type: DataType, implicit: NumberValue) -> Self { let message = format!("expected `{}`, found `{}`", data_type, implicit); diff --git a/input/src/expressions/expression.rs b/input/src/expressions/expression.rs index 4694307b58..6f4397800a 100644 --- a/input/src/expressions/expression.rs +++ b/input/src/expressions/expression.rs @@ -25,6 +25,7 @@ use std::fmt; pub enum Expression<'ast> { ArrayInitializer(ArrayInitializerExpression<'ast>), ArrayInline(ArrayInlineExpression<'ast>), + StringExpression(StringExpression<'ast>), Tuple(TupleExpression<'ast>), Value(Value<'ast>), } @@ -34,6 +35,7 @@ impl<'ast> Expression<'ast> { match self { Expression::ArrayInitializer(expression) => &expression.span, Expression::ArrayInline(expression) => &expression.span, + Expression::StringExpression(string) => &string.span, Expression::Tuple(tuple) => &tuple.span, Expression::Value(value) => value.span(), } @@ -56,6 +58,7 @@ impl<'ast> fmt::Display for Expression<'ast> { write!(f, "array [{}]", values) } + Expression::StringExpression(ref string) => write!(f, "{}", string), Expression::Tuple(ref tuple) => { let values = tuple .expressions diff --git a/input/src/expressions/mod.rs b/input/src/expressions/mod.rs index 09e4554026..18a1505dd5 100644 --- a/input/src/expressions/mod.rs +++ b/input/src/expressions/mod.rs @@ -25,3 +25,6 @@ pub use expression::*; pub mod tuple_expression; pub use tuple_expression::*; + +pub mod string_expression; +pub use string_expression::*; diff --git a/input/src/expressions/string_expression.rs b/input/src/expressions/string_expression.rs index 0b479abce7..8c4b1cc38c 100644 --- a/input/src/expressions/string_expression.rs +++ b/input/src/expressions/string_expression.rs @@ -14,7 +14,7 @@ // You should have received a copy of the GNU General Public License // along with the Leo library. If not, see . -use crate::{ast::Rule, values::CharValue}; +use crate::{ast::Rule, values::CharTypes}; use pest::Span; use pest_ast::FromPest; @@ -23,17 +23,19 @@ use std::fmt; #[derive(Clone, Debug, FromPest, PartialEq, Eq)] #[pest_ast(rule(Rule::expression_string))] pub struct StringExpression<'ast> { - pub chars: Vec>, + pub chars: Vec>, #[pest_ast(outer())] pub span: Span<'ast>, } impl<'ast> fmt::Display for StringExpression<'ast> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "\"")?; + for character in self.chars.iter() { write!(f, "{:?}", character)?; } - Ok(()) + write!(f, "\"") } } diff --git a/input/src/leo-input.pest b/input/src/leo-input.pest index b072fb63d8..cbfdb02b9b 100644 --- a/input/src/leo-input.pest +++ b/input/src/leo-input.pest @@ -200,12 +200,16 @@ expression_array_initializer = { "[" ~ expression ~ ";" ~ array_dimensions ~ "]" expression_array_inline = { "[" ~ NEWLINE* ~ inline_array_inner ~ NEWLINE* ~ "]"} inline_array_inner = _{ (expression ~ ("," ~ NEWLINE* ~ expression)*)? } +// Declared in expressions/string_expression.rs +expression_string = ${ "\"" ~ (!"\"" ~ char_types)+ ~ "\"" } + // Declared in expressions/expression.rs expression = { value | expression_tuple | expression_array_inline | expression_array_initializer + | expression_string } expression_tuple = { "(" ~ expression ~ ("," ~ expression)+ ~")" } diff --git a/input/src/values/char_types.rs b/input/src/values/char_types.rs index 314f4f5c9d..a83c315324 100644 --- a/input/src/values/char_types.rs +++ b/input/src/values/char_types.rs @@ -67,6 +67,17 @@ pub enum CharTypes<'ast> { Unicode(UnicodeChar<'ast>), } +impl<'ast> CharTypes<'ast> { + pub fn span(&self) -> &Span<'ast> { + match self { + CharTypes::Basic(value) => &value.span, + CharTypes::Escaped(value) => &value.span, + CharTypes::Hex(value) => &value.span, + CharTypes::Unicode(value) => &value.span, + } + } +} + impl<'ast> CharTypes<'ast> { pub fn inner(self) -> Result { match self { @@ -78,14 +89,23 @@ impl<'ast> CharTypes<'ast> { Err(InputParserError::invalid_char(character.value, &character.span)) } Self::Escaped(character) => { - if let Some(character) = character.value.chars().nth(1) { - return Ok(character); + if let Some(inner) = character.value.chars().nth(1) { + return match inner { + '0' => Ok(0 as char), + 't' => Ok(9 as char), + 'n' => Ok(10 as char), + 'r' => Ok(13 as char), + '\"' => Ok(34 as char), + '\'' => Ok(39 as char), + '\\' => Ok(92 as char), + _ => Err(InputParserError::invalid_char(character.value, &character.span)), + }; } Err(InputParserError::invalid_char(character.value, &character.span)) } Self::Hex(character) => { - let hex_string_number = character.value[3..character.value.len()].to_string(); + let hex_string_number = character.value[2..character.value.len()].to_string(); if let Ok(number) = u8::from_str_radix(&hex_string_number, 16) { if number < 127 { return Ok(number as char); diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index b85df00bab..d0e370e88c 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -239,7 +239,7 @@ impl Token { } } - if !end { + if i == input.len() || i == 1 || !end { return (0, None); } diff --git a/tests/compiler/string/circuit.leo b/tests/compiler/string/circuit.leo index 43326849f2..4d25067c3b 100644 --- a/tests/compiler/string/circuit.leo +++ b/tests/compiler/string/circuit.leo @@ -9,7 +9,7 @@ circuit Foo { s1: [char; 13]; } -function takes_string(s: [char; 13]) -> bool { +function takes_string(s: [char; 13]) -> [char; 13] { return s == `Hello, World!`; } diff --git a/tests/compiler/string/equality.leo b/tests/compiler/string/equality.leo index 460941b18b..1de6273686 100644 --- a/tests/compiler/string/equality.leo +++ b/tests/compiler/string/equality.leo @@ -3,6 +3,7 @@ namespace: Compile expectation: Pass input_file: - inputs/string.in + - inputs/weird.in */ function main(s1: [char; 13], s2: [char; 4]) -> bool { diff --git a/tests/compiler/string/inputs/string.in b/tests/compiler/string/inputs/string.in index c0d4b5605b..caf5c3fbb3 100644 --- a/tests/compiler/string/inputs/string.in +++ b/tests/compiler/string/inputs/string.in @@ -1,6 +1,6 @@ [main] -s1: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', '\u{20}', 'W', 'o', 'r', 'l', 'd', '!']; -s2: [char; 4] = ['t', 'e', 's', 't']; +s1: [char; 13] = "Hello, World!"; +s2: [char; 4] = "test"; [registers] out: bool = true; \ No newline at end of file diff --git a/tests/compiler/string/inputs/string_out.in b/tests/compiler/string/inputs/string_out.in index e3417677af..71cb6e7380 100644 --- a/tests/compiler/string/inputs/string_out.in +++ b/tests/compiler/string/inputs/string_out.in @@ -1,6 +1,6 @@ [main] -s1: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', '\u{20}', 'W', 'o', 'r', 'l', 'd', '!']; -s2: [char; 4] = ['t', 'e', 's', 't']; +s1: [char; 13] = "Hello, World!"; +s2: [char; 4] = "test": [registers] -out: [char; 13] = ['H', 'e', 'l', 'l', 'o', ',', '\u{20}', 'W', 'o', 'r', 'l', 'd', '!']; \ No newline at end of file +out: [char; 13] = "Hello, World!"; \ No newline at end of file diff --git a/tests/compiler/string/inputs/weird.in b/tests/compiler/string/inputs/weird.in new file mode 100644 index 0000000000..321bed9bae --- /dev/null +++ b/tests/compiler/string/inputs/weird.in @@ -0,0 +1,6 @@ +[main] +s1: [char; 13] = "\"ello, World\""; +s2: [char; 4] = "\u{2764}\x2A\x09\u{2764}"; + +[registers] +out: bool = true; \ No newline at end of file diff --git a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out index fb3cc4bdcc..ae4e3247ba 100644 --- a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out +++ b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out @@ -1,5 +1,5 @@ --- -namespace: ParseExpression +namespace: Token expectation: Fail outputs: - " --> test:1:1\n |\n 1 | '\\'\n | ^\n |\n = unexpected token: '''" diff --git a/tests/expectations/parser/parser/expression/literal/string copy.leo.out b/tests/expectations/parser/parser/expression/literal/string copy.leo.out new file mode 100644 index 0000000000..7402c5eb45 --- /dev/null +++ b/tests/expectations/parser/parser/expression/literal/string copy.leo.out @@ -0,0 +1,151 @@ +--- +namespace: ParseExpression +expectation: Pass +outputs: + - Value: + String: + - - s + - t + - r + - i + - n + - g + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 9 + path: test + content: "`string`" + - Value: + String: + - - a + - n + - o + - t + - h + - e + - r + - " " + - "{" + - " " + - "}" + - " " + - s + - t + - r + - i + - n + - g + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 21 + path: test + content: "`another { } string`" + - Value: + String: + - - "{" + - " " + - "]" + - " " + - "[" + - " " + - ; + - " " + - a + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 12 + path: test + content: "`{ ] [ ; a`" + - Value: + String: + - - ࿺ + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 10 + path: test + content: "`\\u{FFA}`" + - Value: + String: + - - 򯫺 + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 12 + path: test + content: "`\\u{afafa}`" + - Value: + String: + - - 꾯 + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 11 + path: test + content: "`\\u{afaf}`" + - Value: + String: + - - ૺ + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 10 + path: test + content: "`\\u{afa}`" + - Value: + String: + - - ¯ + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 9 + path: test + content: "`\\u{af}`" + - Value: + String: + - - "\n" + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 8 + path: test + content: "`\\u{a}`" + - Value: + String: + - - "\n" + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 7 + path: test + content: "`\\x0A`" + - Value: + String: + - - a + - a + - " " + - "\\" + - " " + - "\"" + - " " + - " " + - "\n" + - " " + - a + - a + - " " + - "\t" + - " " + - "\r" + - " " + - " " + - "\u0000" + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 28 + path: test + content: "`aa \\\\ \\\" \\n aa \\t \\r \\0`" diff --git a/tests/expectations/parser/parser/expression/literal/string.leo.out b/tests/expectations/parser/parser/expression/literal/string.leo.out new file mode 100644 index 0000000000..c78bafb527 --- /dev/null +++ b/tests/expectations/parser/parser/expression/literal/string.leo.out @@ -0,0 +1,15 @@ +--- +namespace: Token +expectation: Pass +outputs: + - "'\"string\"' @ 1:1-9" + - "'\"another { } string\"' @ 1:1-21" + - "'\"{ ] [ ; a\"' @ 1:1-12" + - "'\"࿺\"' @ 1:1-10" + - "'\"򯫺\"' @ 1:1-12" + - "'\"꾯\"' @ 1:1-11" + - "'\"ૺ\"' @ 1:1-10" + - "'\"¯\"' @ 1:1-9" + - "'\"\n\"' @ 1:1-8" + - "'\"\n\"' @ 1:1-7" + - "'\"aa \\ \" \n aa \t \r \u0000\"' @ 1:1-28" diff --git a/tests/expectations/parser/parser/expression/literal/string_fail.leo.out b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out new file mode 100644 index 0000000000..84f3088ba8 --- /dev/null +++ b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out @@ -0,0 +1,6 @@ +--- +namespace: Token +expectation: Fail +outputs: + - " --> test:1:1\n |\n 1 | ``\n | ^\n |\n = unexpected token: '`'" + - " --> test:1:1\n |\n 1 | `Hello world!\n | ^\n |\n = unexpected token: '`'" diff --git a/tests/expectations/parser/parser/expression/literal/string_parse.leo.out b/tests/expectations/parser/parser/expression/literal/string_parse.leo.out new file mode 100644 index 0000000000..7402c5eb45 --- /dev/null +++ b/tests/expectations/parser/parser/expression/literal/string_parse.leo.out @@ -0,0 +1,151 @@ +--- +namespace: ParseExpression +expectation: Pass +outputs: + - Value: + String: + - - s + - t + - r + - i + - n + - g + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 9 + path: test + content: "`string`" + - Value: + String: + - - a + - n + - o + - t + - h + - e + - r + - " " + - "{" + - " " + - "}" + - " " + - s + - t + - r + - i + - n + - g + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 21 + path: test + content: "`another { } string`" + - Value: + String: + - - "{" + - " " + - "]" + - " " + - "[" + - " " + - ; + - " " + - a + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 12 + path: test + content: "`{ ] [ ; a`" + - Value: + String: + - - ࿺ + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 10 + path: test + content: "`\\u{FFA}`" + - Value: + String: + - - 򯫺 + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 12 + path: test + content: "`\\u{afafa}`" + - Value: + String: + - - 꾯 + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 11 + path: test + content: "`\\u{afaf}`" + - Value: + String: + - - ૺ + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 10 + path: test + content: "`\\u{afa}`" + - Value: + String: + - - ¯ + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 9 + path: test + content: "`\\u{af}`" + - Value: + String: + - - "\n" + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 8 + path: test + content: "`\\u{a}`" + - Value: + String: + - - "\n" + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 7 + path: test + content: "`\\x0A`" + - Value: + String: + - - a + - a + - " " + - "\\" + - " " + - "\"" + - " " + - " " + - "\n" + - " " + - a + - a + - " " + - "\t" + - " " + - "\r" + - " " + - " " + - "\u0000" + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 28 + path: test + content: "`aa \\\\ \\\" \\n aa \\t \\r \\0`" diff --git a/tests/parser/expression/literal/char_fail.leo b/tests/parser/expression/literal/char_fail.leo index 565c6f3922..d69ae92ffc 100644 --- a/tests/parser/expression/literal/char_fail.leo +++ b/tests/parser/expression/literal/char_fail.leo @@ -1,5 +1,5 @@ /* -namespace: ParseExpression +namespace: Token expectation: Fail */ diff --git a/tests/parser/expression/literal/string.leo b/tests/parser/expression/literal/string.leo new file mode 100644 index 0000000000..75d7405b3b --- /dev/null +++ b/tests/parser/expression/literal/string.leo @@ -0,0 +1,21 @@ +/* +namespace: Token +expectation: Pass +*/ + +`string` + +`another { } string` + +`{ ] [ ; a` + +`\u{FFA}` +`\u{afafa}` +`\u{afaf}` +`\u{afa}` +`\u{af}` +`\u{a}` + +`\x0A` + +`aa \\ \" \n aa \t \r \0` diff --git a/tests/parser/expression/literal/string_fail.leo b/tests/parser/expression/literal/string_fail.leo new file mode 100644 index 0000000000..68b7f07416 --- /dev/null +++ b/tests/parser/expression/literal/string_fail.leo @@ -0,0 +1,8 @@ +/* +namespace: Token +expectation: Fail +*/ + +`` + +`Hello world! \ No newline at end of file diff --git a/tests/parser/expression/string.leo b/tests/parser/expression/literal/string_parse.leo similarity index 100% rename from tests/parser/expression/string.leo rename to tests/parser/expression/literal/string_parse.leo From bce10cc88530441a5bbafdb4201b776af10162f8 Mon Sep 17 00:00:00 2001 From: gluax Date: Sat, 22 May 2021 18:21:11 -0400 Subject: [PATCH 16/20] quick test fix --- tests/compiler/string/circuit.leo | 2 +- tests/compiler/string/inputs/string_out.in | 1 - tests/expectations/compiler/compiler/char/circuit.leo.out | 2 +- .../expectations/compiler/compiler/string/equality.leo.out | 6 ++++++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/compiler/string/circuit.leo b/tests/compiler/string/circuit.leo index 4d25067c3b..43326849f2 100644 --- a/tests/compiler/string/circuit.leo +++ b/tests/compiler/string/circuit.leo @@ -9,7 +9,7 @@ circuit Foo { s1: [char; 13]; } -function takes_string(s: [char; 13]) -> [char; 13] { +function takes_string(s: [char; 13]) -> bool { return s == `Hello, World!`; } diff --git a/tests/compiler/string/inputs/string_out.in b/tests/compiler/string/inputs/string_out.in index 71cb6e7380..b35cb990e7 100644 --- a/tests/compiler/string/inputs/string_out.in +++ b/tests/compiler/string/inputs/string_out.in @@ -1,6 +1,5 @@ [main] s1: [char; 13] = "Hello, World!"; -s2: [char; 4] = "test": [registers] out: [char; 13] = "Hello, World!"; \ No newline at end of file diff --git a/tests/expectations/compiler/compiler/char/circuit.leo.out b/tests/expectations/compiler/compiler/char/circuit.leo.out index 9a578e2249..1f6342d9b3 100644 --- a/tests/expectations/compiler/compiler/char/circuit.leo.out +++ b/tests/expectations/compiler/compiler/char/circuit.leo.out @@ -33,7 +33,7 @@ outputs: registers: r: type: char - value: "\n" + value: "*" - input_file: inputs/unicode.in output: registers: diff --git a/tests/expectations/compiler/compiler/string/equality.leo.out b/tests/expectations/compiler/compiler/string/equality.leo.out index 21cc0616d7..53a3a8c78e 100644 --- a/tests/expectations/compiler/compiler/string/equality.leo.out +++ b/tests/expectations/compiler/compiler/string/equality.leo.out @@ -16,3 +16,9 @@ outputs: out: type: bool value: "true" + - input_file: inputs/weird.in + output: + registers: + out: + type: bool + value: "false" From 7145a751d91aa117d2f1d21a8517ce6ff56b068f Mon Sep 17 00:00:00 2001 From: gluax Date: Sat, 22 May 2021 21:15:07 -0400 Subject: [PATCH 17/20] console.log refactor, concat test... has asg or TIPhase reducer bug --- ast/src/annotation.rs | 2 +- ast/src/errors/reducer.rs | 6 + .../statements/console/formatted_string.rs | 37 +- compiler/src/console/format.rs | 4 +- compiler/src/phases/reducing_director.rs | 8 +- .../string_transformation.json | 587 +++++++++--------- .../string_transformation.leo | 2 +- compiler/tests/type_inference/basic.json | 34 +- compiler/tests/type_inference/basic.leo | 2 +- parser/src/parser/statement.rs | 17 +- parser/src/tokenizer/lexer.rs | 51 +- parser/src/tokenizer/token.rs | 25 - .../global_consts/global_const_types.leo | 4 +- tests/compiler/string/circuit.leo | 4 +- tests/compiler/string/equality.leo | 7 +- .../expression/literal/string_fail.leo.out | 4 +- .../expression/literal/string_parse.leo.out | 22 +- tests/parser/expression/literal/string.leo | 22 +- .../parser/expression/literal/string_fail.leo | 4 +- .../expression/literal/string_parse.leo | 22 +- 20 files changed, 417 insertions(+), 447 deletions(-) diff --git a/ast/src/annotation.rs b/ast/src/annotation.rs index 5720e55a4b..f58ff5ed7c 100644 --- a/ast/src/annotation.rs +++ b/ast/src/annotation.rs @@ -19,7 +19,7 @@ use crate::{Identifier, Span}; use serde::{Deserialize, Serialize}; use tendril::StrTendril; -#[derive(Clone, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] pub struct Annotation { pub span: Span, pub name: Identifier, diff --git a/ast/src/errors/reducer.rs b/ast/src/errors/reducer.rs index e8b582af85..eda2e8464a 100644 --- a/ast/src/errors/reducer.rs +++ b/ast/src/errors/reducer.rs @@ -35,6 +35,12 @@ impl ReducerError { ReducerError::Error(FormattedError::new_from_span(message, span)) } + pub fn failed_to_convert_tendril_to_char(tendril: String, span: &Span) -> Self { + let message = format!("Failed to convert tendril `{}` to char", tendril); + + Self::new_from_span(message, span) + } + pub fn impossible_console_assert_call(span: &Span) -> Self { let message = "Console::Assert cannot be matched here, its handled in another case.".to_string(); diff --git a/ast/src/statements/console/formatted_string.rs b/ast/src/statements/console/formatted_string.rs index 5013b8cb93..11c177ef7c 100644 --- a/ast/src/statements/console/formatted_string.rs +++ b/ast/src/statements/console/formatted_string.rs @@ -18,14 +18,43 @@ use crate::{Expression, Node, Span}; use serde::{Deserialize, Serialize}; use std::fmt; -use tendril::StrTendril; #[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Debug)] pub enum FormatStringPart { - Const(#[serde(with = "crate::common::tendril_json")] StrTendril), + Const(char), Container, } +impl FormatStringPart { + pub fn from_string(string: Vec) -> Vec { + let mut parts = Vec::new(); + let mut in_container = false; + let mut i = 0; + + while i < string.len() { + let character = string[i]; + + match character { + '{' if !in_container => in_container = true, + '}' if in_container => { + in_container = false; + parts.push(FormatStringPart::Container); + } + _ if in_container => { + in_container = false; + parts.push(FormatStringPart::Const('{')); + continue; + } + _ => parts.push(FormatStringPart::Const(character)), + } + + i += 1; + } + + parts + } +} + #[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Debug)] pub struct FormatString { pub parts: Vec, @@ -41,8 +70,8 @@ impl fmt::Display for FormatString { self.parts .iter() .map(|x| match x { - FormatStringPart::Const(x) => x, - FormatStringPart::Container => "{}", + FormatStringPart::Const(x) => x.to_string(), + FormatStringPart::Container => "{}".to_string(), }) .collect::>() .join("") diff --git a/compiler/src/console/format.rs b/compiler/src/console/format.rs index 342cb8d7fe..7372fa69d2 100644 --- a/compiler/src/console/format.rs +++ b/compiler/src/console/format.rs @@ -51,8 +51,8 @@ impl<'a, F: PrimeField, G: GroupType> ConstrainedProgram<'a, F, G> { let mut parameters = executed_containers.iter(); for part in formatted.parts.iter() { match part { - FormatStringPart::Const(c) => out.push(&**c), - FormatStringPart::Container => out.push(&**parameters.next().unwrap()), + FormatStringPart::Const(c) => out.push(c.to_string()), + FormatStringPart::Container => out.push(parameters.next().unwrap().to_string()), } } diff --git a/compiler/src/phases/reducing_director.rs b/compiler/src/phases/reducing_director.rs index 0138ca6d67..1b54168d35 100644 --- a/compiler/src/phases/reducing_director.rs +++ b/compiler/src/phases/reducing_director.rs @@ -116,8 +116,10 @@ impl CombineAstAsgDirector { } pub fn reduce_type(&mut self, ast: &AstType, asg: &AsgType, span: &Span) -> Result { + println!("Hellllloooo"); let new = match (ast, asg) { (AstType::Array(ast_type, ast_dimensions), AsgType::Array(asg_type, asg_dimensions)) => { + println!("astd {}, asgd {}", ast_dimensions, asg_dimensions); if self.options.type_inference_enabled() { AstType::Array( Box::new(self.reduce_type(ast_type, asg_type, span)?), @@ -434,8 +436,12 @@ impl CombineAstAsgDirector { ConstValue::Char(_) => { if let Some(c) = tendril.chars().next() { new = ValueExpression::Char(c, span.clone()); + } else { + return Err(ReducerError::failed_to_convert_tendril_to_char( + tendril.to_string(), + span, + )); } - // TODO RETURN ERR } _ => unimplemented!(), // impossible? } diff --git a/compiler/tests/canonicalization/string_transformation.json b/compiler/tests/canonicalization/string_transformation.json index 03744195c0..9abfbb842f 100644 --- a/compiler/tests/canonicalization/string_transformation.json +++ b/compiler/tests/canonicalization/string_transformation.json @@ -1,310 +1,309 @@ { - "name": "", - "expected_input": [], - "imports": [], - "circuits": {}, - "global_consts": {}, - "functions": { - "{\"name\":\"main\",\"span\":\"{\\\"line_start\\\":1,\\\"line_stop\\\":1,\\\"col_start\\\":10,\\\"col_stop\\\":14,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\"function main() {\\\"}\"}": { - "annotations": [], - "identifier": "{\"name\":\"main\",\"span\":\"{\\\"line_start\\\":1,\\\"line_stop\\\":1,\\\"col_start\\\":10,\\\"col_stop\\\":14,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\"function main() {\\\"}\"}", - "input": [], - "output": { - "Tuple": [] - }, - "block": { - "statements": [ - { - "Definition": { - "declaration_type": "Let", - "variable_names": [ - { - "mutable": true, - "identifier": "{\"name\":\"s\",\"span\":\"{\\\"line_start\\\":2,\\\"line_stop\\\":2,\\\"col_start\\\":9,\\\"col_stop\\\":10,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\" let s = `Hello, World!`;\\\"}\"}", - "span": { - "line_start": 2, - "line_stop": 2, - "col_start": 9, - "col_stop": 10, - "path": "", - "content": " let s = `Hello, World!`;" - } + "name": "", + "expected_input": [], + "imports": [], + "circuits": {}, + "global_consts": {}, + "functions": { + "{\"name\":\"main\",\"span\":\"{\\\"line_start\\\":1,\\\"line_stop\\\":1,\\\"col_start\\\":10,\\\"col_stop\\\":14,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\"function main() {\\\"}\"}": { + "annotations": [], + "identifier": "{\"name\":\"main\",\"span\":\"{\\\"line_start\\\":1,\\\"line_stop\\\":1,\\\"col_start\\\":10,\\\"col_stop\\\":14,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\"function main() {\\\"}\"}", + "input": [], + "output": { + "Tuple": [] + }, + "block": { + "statements": [ + { + "Definition": { + "declaration_type": "Let", + "variable_names": [ + { + "mutable": true, + "identifier": "{\"name\":\"s\",\"span\":\"{\\\"line_start\\\":2,\\\"line_stop\\\":2,\\\"col_start\\\":9,\\\"col_stop\\\":10,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\" let s = \\\\\\\"Hello, World!\\\\\\\";\\\"}\"}", + "span": { + "line_start": 2, + "line_stop": 2, + "col_start": 9, + "col_stop": 10, + "path": "", + "content": " let s = \"Hello, World!\";" } - ], - "type_": { - "Array": [ - "Char", - [ - { - "value": "13" - } - ] - ] - }, - "value": { - "ArrayInline": { - "elements": [ - { - "Expression": { - "Value": { - "Char": [ - "H", - { - "line_start": 2, - "line_stop": 2, - "col_start": 13, - "col_stop": 28, - "path": "", - "content": " let s = `Hello, World!`;" - } - ] - } - } - }, - { - "Expression": { - "Value": { - "Char": [ - "e", - { - "line_start": 2, - "line_stop": 2, - "col_start": 13, - "col_stop": 28, - "path": "", - "content": " let s = `Hello, World!`;" - } - ] - } - } - }, - { - "Expression": { - "Value": { - "Char": [ - "l", - { - "line_start": 2, - "line_stop": 2, - "col_start": 13, - "col_stop": 28, - "path": "", - "content": " let s = `Hello, World!`;" - } - ] - } - } - }, - { - "Expression": { - "Value": { - "Char": [ - "l", - { - "line_start": 2, - "line_stop": 2, - "col_start": 13, - "col_stop": 28, - "path": "", - "content": " let s = `Hello, World!`;" - } - ] - } - } - }, - { - "Expression": { - "Value": { - "Char": [ - "o", - { - "line_start": 2, - "line_stop": 2, - "col_start": 13, - "col_stop": 28, - "path": "", - "content": " let s = `Hello, World!`;" - } - ] - } - } - }, - { - "Expression": { - "Value": { - "Char": [ - ",", - { - "line_start": 2, - "line_stop": 2, - "col_start": 13, - "col_stop": 28, - "path": "", - "content": " let s = `Hello, World!`;" - } - ] - } - } - }, - { - "Expression": { - "Value": { - "Char": [ - " ", - { - "line_start": 2, - "line_stop": 2, - "col_start": 13, - "col_stop": 28, - "path": "", - "content": " let s = `Hello, World!`;" - } - ] - } - } - }, - { - "Expression": { - "Value": { - "Char": [ - "W", - { - "line_start": 2, - "line_stop": 2, - "col_start": 13, - "col_stop": 28, - "path": "", - "content": " let s = `Hello, World!`;" - } - ] - } - } - }, - { - "Expression": { - "Value": { - "Char": [ - "o", - { - "line_start": 2, - "line_stop": 2, - "col_start": 13, - "col_stop": 28, - "path": "", - "content": " let s = `Hello, World!`;" - } - ] - } - } - }, - { - "Expression": { - "Value": { - "Char": [ - "r", - { - "line_start": 2, - "line_stop": 2, - "col_start": 13, - "col_stop": 28, - "path": "", - "content": " let s = `Hello, World!`;" - } - ] - } - } - }, - { - "Expression": { - "Value": { - "Char": [ - "l", - { - "line_start": 2, - "line_stop": 2, - "col_start": 13, - "col_stop": 28, - "path": "", - "content": " let s = `Hello, World!`;" - } - ] - } - } - }, - { - "Expression": { - "Value": { - "Char": [ - "d", - { - "line_start": 2, - "line_stop": 2, - "col_start": 13, - "col_stop": 28, - "path": "", - "content": " let s = `Hello, World!`;" - } - ] - } - } - }, - { - "Expression": { - "Value": { - "Char": [ - "!", - { - "line_start": 2, - "line_stop": 2, - "col_start": 13, - "col_stop": 28, - "path": "", - "content": " let s = `Hello, World!`;" - } - ] - } - } - } - ], - "span": { - "line_start": 2, - "line_stop": 2, - "col_start": 13, - "col_stop": 28, - "path": "", - "content": " let s = `Hello, World!`;" - } - } - }, - "span": { - "line_start": 2, - "line_stop": 2, - "col_start": 5, - "col_stop": 28, - "path": "", - "content": " let s = `Hello, World!`;" } + ], + "type_": { + "Array": [ + "Char", + [ + { + "value": "13" + } + ] + ] + }, + "value": { + "ArrayInline": { + "elements": [ + { + "Expression": { + "Value": { + "Char": [ + "H", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = \"Hello, World!\";" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "e", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = \"Hello, World!\";" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "l", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = \"Hello, World!\";" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "l", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = \"Hello, World!\";" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "o", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = \"Hello, World!\";" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + ",", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = \"Hello, World!\";" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + " ", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = \"Hello, World!\";" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "W", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = \"Hello, World!\";" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "o", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = \"Hello, World!\";" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "r", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = \"Hello, World!\";" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "l", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = \"Hello, World!\";" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "d", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = \"Hello, World!\";" + } + ] + } + } + }, + { + "Expression": { + "Value": { + "Char": [ + "!", + { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = \"Hello, World!\";" + } + ] + } + } + } + ], + "span": { + "line_start": 2, + "line_stop": 2, + "col_start": 13, + "col_stop": 28, + "path": "", + "content": " let s = \"Hello, World!\";" + } + } + }, + "span": { + "line_start": 2, + "line_stop": 2, + "col_start": 5, + "col_stop": 28, + "path": "", + "content": " let s = \"Hello, World!\";" } } - ], - "span": { - "line_start": 1, - "line_stop": 3, - "col_start": 17, - "col_stop": 2, - "path": "", - "content": "function main() {\n...\n}" } - }, + ], "span": { "line_start": 1, "line_stop": 3, - "col_start": 1, + "col_start": 17, "col_stop": 2, "path": "", "content": "function main() {\n...\n}" } + }, + "span": { + "line_start": 1, + "line_stop": 3, + "col_start": 1, + "col_stop": 2, + "path": "", + "content": "function main() {\n...\n}" } } } - \ No newline at end of file +} diff --git a/compiler/tests/canonicalization/string_transformation.leo b/compiler/tests/canonicalization/string_transformation.leo index 0f2e78e1a9..e3830d38ee 100644 --- a/compiler/tests/canonicalization/string_transformation.leo +++ b/compiler/tests/canonicalization/string_transformation.leo @@ -1,3 +1,3 @@ function main() { - let s = `Hello, World!`; + let s = "Hello, World!"; } \ No newline at end of file diff --git a/compiler/tests/type_inference/basic.json b/compiler/tests/type_inference/basic.json index fdd3bf2e15..82a82842ee 100644 --- a/compiler/tests/type_inference/basic.json +++ b/compiler/tests/type_inference/basic.json @@ -1034,14 +1034,14 @@ "variable_names": [ { "mutable": false, - "identifier": "{\"name\":\"o\",\"span\":\"{\\\"line_start\\\":24,\\\"line_stop\\\":24,\\\"col_start\\\":9,\\\"col_stop\\\":10,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\" const o = `Hello, World!`;\\\"}\"}", + "identifier": "{\"name\":\"o\",\"span\":\"{\\\"line_start\\\":24,\\\"line_stop\\\":24,\\\"col_start\\\":9,\\\"col_stop\\\":10,\\\"path\\\":\\\"\\\",\\\"content\\\":\\\" const o = \\\\\\\"Hello, World!\\\\\\\";\\\"}\"}", "span": { "line_start": 24, "line_stop": 24, "col_start": 9, "col_stop": 10, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } } ], @@ -1069,7 +1069,7 @@ "col_start": 13, "col_stop": 28, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } ] } @@ -1086,7 +1086,7 @@ "col_start": 13, "col_stop": 28, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } ] } @@ -1103,7 +1103,7 @@ "col_start": 13, "col_stop": 28, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } ] } @@ -1120,7 +1120,7 @@ "col_start": 13, "col_stop": 28, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } ] } @@ -1137,7 +1137,7 @@ "col_start": 13, "col_stop": 28, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } ] } @@ -1154,7 +1154,7 @@ "col_start": 13, "col_stop": 28, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } ] } @@ -1171,7 +1171,7 @@ "col_start": 13, "col_stop": 28, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } ] } @@ -1188,7 +1188,7 @@ "col_start": 13, "col_stop": 28, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } ] } @@ -1205,7 +1205,7 @@ "col_start": 13, "col_stop": 28, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } ] } @@ -1222,7 +1222,7 @@ "col_start": 13, "col_stop": 28, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } ] } @@ -1239,7 +1239,7 @@ "col_start": 13, "col_stop": 28, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } ] } @@ -1256,7 +1256,7 @@ "col_start": 13, "col_stop": 28, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } ] } @@ -1273,7 +1273,7 @@ "col_start": 13, "col_stop": 28, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } ] } @@ -1286,7 +1286,7 @@ "col_start": 13, "col_stop": 28, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } } }, @@ -1296,7 +1296,7 @@ "col_start": 3, "col_stop": 28, "path": "", - "content": " const o = `Hello, World!`;" + "content": " const o = \"Hello, World!\";" } } } diff --git a/compiler/tests/type_inference/basic.leo b/compiler/tests/type_inference/basic.leo index f3dae7a734..66b714a602 100644 --- a/compiler/tests/type_inference/basic.leo +++ b/compiler/tests/type_inference/basic.leo @@ -21,5 +21,5 @@ function main() { const l = (1u8, 1u8, true); const m = Foo {}; const n = 'a'; - const o = `Hello, World!`; + const o = "Hello, World!"; } \ No newline at end of file diff --git a/parser/src/parser/statement.rs b/parser/src/parser/statement.rs index e83dfa732f..dcdeb943d3 100644 --- a/parser/src/parser/statement.rs +++ b/parser/src/parser/statement.rs @@ -226,16 +226,19 @@ impl ParserContext { /// pub fn parse_formatted_string(&mut self) -> SyntaxResult { let start_span; - let parts = match self.expect_any()? { + let string = match self.expect_any()? { SpannedToken { - token: Token::FormatString(parts), + token: Token::StringLiteral(chars), span, } => { start_span = span; - parts + chars } SpannedToken { token, span } => return Err(SyntaxError::unexpected_str(&token, "formatted string", &span)), }; + + let parts = FormatStringPart::from_string(string); + let mut parameters = Vec::new(); while self.eat(Token::Comma).is_some() { let param = self.parse_expression()?; @@ -243,13 +246,7 @@ impl ParserContext { } Ok(FormatString { - parts: parts - .into_iter() - .map(|x| match x { - crate::FormatStringPart::Const(value) => FormatStringPart::Const(value), - crate::FormatStringPart::Container => FormatStringPart::Container, - }) - .collect(), + parts, span: &start_span + parameters.last().map(|x| x.span()).unwrap_or(&start_span), parameters, }) diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index d0e370e88c..048929f064 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -14,7 +14,7 @@ // You should have received a copy of the GNU General Public License // along with the Leo library. If not, see . -use crate::tokenizer::{FormatStringPart, Token}; +use crate::tokenizer::Token; use leo_ast::Span; use serde::{Deserialize, Serialize}; use tendril::StrTendril; @@ -168,7 +168,7 @@ impl Token { let input = input_tendril[..].as_bytes(); match input[0] { x if x.is_ascii_whitespace() => return (1, None), - b'`' => { + b'"' => { let mut i = 1; let mut len: u32 = 1; let mut start = 1; @@ -181,7 +181,7 @@ impl Token { while i < input.len() { if !in_escape { - if input[i] == b'`' { + if input[i] == b'"' { end = true; break; } else if input[i] == b'\\' { @@ -245,51 +245,6 @@ impl Token { return (i + 1, Some(Token::StringLiteral(string))); } - b'"' => { - let mut i = 1; - let mut in_escape = false; - let mut start = 1usize; - let mut segments = Vec::new(); - while i < input.len() { - if !in_escape { - if input[i] == b'"' { - break; - } - if input[i] == b'\\' { - in_escape = !in_escape; - } else if i < input.len() - 1 && input[i] == b'{' { - if i < input.len() - 2 && input[i + 1] == b'{' { - i += 2; - continue; - } else if input[i + 1] != b'}' { - i += 1; - continue; - } - if start < i { - segments.push(FormatStringPart::Const( - input_tendril.subtendril(start as u32, (i - start) as u32), - )); - } - segments.push(FormatStringPart::Container); - start = i + 2; - i = start; - continue; - } - } else { - in_escape = false; - } - i += 1; - } - if i == input.len() { - return (0, None); - } - if start < i { - segments.push(FormatStringPart::Const( - input_tendril.subtendril(start as u32, (i - start) as u32), - )); - } - return (i + 1, Some(Token::FormatString(segments))); - } b'\'' => { let mut i = 1; let mut in_escape = false; diff --git a/parser/src/tokenizer/token.rs b/parser/src/tokenizer/token.rs index 1cab255843..11d9e1d153 100644 --- a/parser/src/tokenizer/token.rs +++ b/parser/src/tokenizer/token.rs @@ -18,22 +18,6 @@ use serde::{Deserialize, Serialize}; use std::fmt; use tendril::StrTendril; -/// Parts of a formatted string for logging to the console. -#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] -pub enum FormatStringPart { - Const(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), - Container, -} - -impl fmt::Display for FormatStringPart { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - FormatStringPart::Const(c) => write!(f, "{}", c), - FormatStringPart::Container => write!(f, "{{}}"), - } - } -} - /// Represents all valid Leo syntax tokens. #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub enum Token { @@ -41,7 +25,6 @@ pub enum Token { // Literals CommentLine(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), CommentBlock(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), - FormatString(Vec), StringLiteral(Vec), Ident(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), Int(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), @@ -208,14 +191,6 @@ impl fmt::Display for Token { match self { CommentLine(s) => write!(f, "{}", s), CommentBlock(s) => write!(f, "{}", s), - FormatString(parts) => { - // todo escapes - write!(f, "\"")?; - for part in parts.iter() { - part.fmt(f)?; - } - write!(f, "\"") - } StringLiteral(content) => { write!(f, "\"")?; for character in content { diff --git a/tests/compiler/global_consts/global_const_types.leo b/tests/compiler/global_consts/global_const_types.leo index 5fba7a9316..8550672aeb 100644 --- a/tests/compiler/global_consts/global_const_types.leo +++ b/tests/compiler/global_consts/global_const_types.leo @@ -21,7 +21,7 @@ const use_another_const = basic + 1; const foo = Foo { width: 10, height: 20 }; const uno = uno(); const character = 'a'; -const hello = `Hello, World!`; +const hello = "Hello, World!"; circuit Foo { width: u32, @@ -51,5 +51,5 @@ function main(a: u32) -> bool { && foo.height == 20u32 && uno == 1u32 // function test && character == 'a' // char test - && hello == `Hello, World!`; + && hello == "Hello, World!"; } diff --git a/tests/compiler/string/circuit.leo b/tests/compiler/string/circuit.leo index 43326849f2..9905975b15 100644 --- a/tests/compiler/string/circuit.leo +++ b/tests/compiler/string/circuit.leo @@ -10,13 +10,13 @@ circuit Foo { } function takes_string(s: [char; 13]) -> bool { - return s == `Hello, World!`; + return s == "Hello, World!"; } function main(s1: [char; 13]) -> [char; 13] { let f = Foo { s1 }; let b = takes_string(s1); - let result = f.s1 == `Hello, World!` ? s1 : `abcdefghjklmn`; + let result = f.s1 == "Hello, World!" ? s1 : "abcdefghjklmn"; return result; } \ No newline at end of file diff --git a/tests/compiler/string/equality.leo b/tests/compiler/string/equality.leo index 1de6273686..b9a7e69c4a 100644 --- a/tests/compiler/string/equality.leo +++ b/tests/compiler/string/equality.leo @@ -7,6 +7,9 @@ input_file: */ function main(s1: [char; 13], s2: [char; 4]) -> bool { - let hello: [char; 13] = `Hello, World!`; - return hello == s1 && `nope` != s2; + let hello: [char; 13] = "Hello, World!"; + let part1 = "Good"; + let part2 = " dog!"; + let concat: [char; 9] = [...part1, ...part2]; + return hello == s1 && "nope" != s2 && "es" == s2[1..3] && concat == "Good dog!"; } \ No newline at end of file diff --git a/tests/expectations/parser/parser/expression/literal/string_fail.leo.out b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out index 84f3088ba8..5a0cbfcf88 100644 --- a/tests/expectations/parser/parser/expression/literal/string_fail.leo.out +++ b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out @@ -2,5 +2,5 @@ namespace: Token expectation: Fail outputs: - - " --> test:1:1\n |\n 1 | ``\n | ^\n |\n = unexpected token: '`'" - - " --> test:1:1\n |\n 1 | `Hello world!\n | ^\n |\n = unexpected token: '`'" + - " --> test:1:1\n |\n 1 | \"\"\n | ^\n |\n = unexpected token: '\"'" + - " --> test:1:1\n |\n 1 | \"Hello world!\n | ^\n |\n = unexpected token: '\"'" diff --git a/tests/expectations/parser/parser/expression/literal/string_parse.leo.out b/tests/expectations/parser/parser/expression/literal/string_parse.leo.out index 7402c5eb45..b149109755 100644 --- a/tests/expectations/parser/parser/expression/literal/string_parse.leo.out +++ b/tests/expectations/parser/parser/expression/literal/string_parse.leo.out @@ -15,7 +15,7 @@ outputs: col_start: 1 col_stop: 9 path: test - content: "`string`" + content: "\"string\"" - Value: String: - - a @@ -41,7 +41,7 @@ outputs: col_start: 1 col_stop: 21 path: test - content: "`another { } string`" + content: "\"another { } string\"" - Value: String: - - "{" @@ -58,7 +58,7 @@ outputs: col_start: 1 col_stop: 12 path: test - content: "`{ ] [ ; a`" + content: "\"{ ] [ ; a\"" - Value: String: - - ࿺ @@ -67,7 +67,7 @@ outputs: col_start: 1 col_stop: 10 path: test - content: "`\\u{FFA}`" + content: "\"\\u{FFA}\"" - Value: String: - - 򯫺 @@ -76,7 +76,7 @@ outputs: col_start: 1 col_stop: 12 path: test - content: "`\\u{afafa}`" + content: "\"\\u{afafa}\"" - Value: String: - - 꾯 @@ -85,7 +85,7 @@ outputs: col_start: 1 col_stop: 11 path: test - content: "`\\u{afaf}`" + content: "\"\\u{afaf}\"" - Value: String: - - ૺ @@ -94,7 +94,7 @@ outputs: col_start: 1 col_stop: 10 path: test - content: "`\\u{afa}`" + content: "\"\\u{afa}\"" - Value: String: - - ¯ @@ -103,7 +103,7 @@ outputs: col_start: 1 col_stop: 9 path: test - content: "`\\u{af}`" + content: "\"\\u{af}\"" - Value: String: - - "\n" @@ -112,7 +112,7 @@ outputs: col_start: 1 col_stop: 8 path: test - content: "`\\u{a}`" + content: "\"\\u{a}\"" - Value: String: - - "\n" @@ -121,7 +121,7 @@ outputs: col_start: 1 col_stop: 7 path: test - content: "`\\x0A`" + content: "\"\\x0A\"" - Value: String: - - a @@ -148,4 +148,4 @@ outputs: col_start: 1 col_stop: 28 path: test - content: "`aa \\\\ \\\" \\n aa \\t \\r \\0`" + content: "\"aa \\\\ \\\" \\n aa \\t \\r \\0\"" diff --git a/tests/parser/expression/literal/string.leo b/tests/parser/expression/literal/string.leo index 75d7405b3b..604d27a5e4 100644 --- a/tests/parser/expression/literal/string.leo +++ b/tests/parser/expression/literal/string.leo @@ -3,19 +3,19 @@ namespace: Token expectation: Pass */ -`string` +"string" -`another { } string` +"another { } string" -`{ ] [ ; a` +"{ ] [ ; a" -`\u{FFA}` -`\u{afafa}` -`\u{afaf}` -`\u{afa}` -`\u{af}` -`\u{a}` +"\u{FFA}" +"\u{afafa}" +"\u{afaf}" +"\u{afa}" +"\u{af}" +"\u{a}" -`\x0A` +"\x0A" -`aa \\ \" \n aa \t \r \0` +"aa \\ \" \n aa \t \r \0" diff --git a/tests/parser/expression/literal/string_fail.leo b/tests/parser/expression/literal/string_fail.leo index 68b7f07416..84ea369f22 100644 --- a/tests/parser/expression/literal/string_fail.leo +++ b/tests/parser/expression/literal/string_fail.leo @@ -3,6 +3,6 @@ namespace: Token expectation: Fail */ -`` +"" -`Hello world! \ No newline at end of file +"Hello world! \ No newline at end of file diff --git a/tests/parser/expression/literal/string_parse.leo b/tests/parser/expression/literal/string_parse.leo index 6e87bb69c1..b4e0e10422 100644 --- a/tests/parser/expression/literal/string_parse.leo +++ b/tests/parser/expression/literal/string_parse.leo @@ -3,19 +3,19 @@ namespace: ParseExpression expectation: Pass */ -`string` +"string" -`another { } string` +"another { } string" -`{ ] [ ; a` +"{ ] [ ; a" -`\u{FFA}` -`\u{afafa}` -`\u{afaf}` -`\u{afa}` -`\u{af}` -`\u{a}` +"\u{FFA}" +"\u{afafa}" +"\u{afaf}" +"\u{afa}" +"\u{af}" +"\u{a}" -`\x0A` +"\x0A" -`aa \\ \" \n aa \t \r \0` +"aa \\ \" \n aa \t \r \0" From 91f27bc046695ad4ce25d453053202f3351a3841 Mon Sep 17 00:00:00 2001 From: gluax Date: Sat, 22 May 2021 22:26:24 -0400 Subject: [PATCH 18/20] explicit type resolves issue, will make bug for implict --- compiler/src/phases/reducing_director.rs | 2 -- .../compiler/console/log_parameter_fail_none.leo.out | 2 +- .../compiler/compiler/string/equality.leo.out | 10 +++++----- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/compiler/src/phases/reducing_director.rs b/compiler/src/phases/reducing_director.rs index 1b54168d35..6c22253cb3 100644 --- a/compiler/src/phases/reducing_director.rs +++ b/compiler/src/phases/reducing_director.rs @@ -116,10 +116,8 @@ impl CombineAstAsgDirector { } pub fn reduce_type(&mut self, ast: &AstType, asg: &AsgType, span: &Span) -> Result { - println!("Hellllloooo"); let new = match (ast, asg) { (AstType::Array(ast_type, ast_dimensions), AsgType::Array(asg_type, asg_dimensions)) => { - println!("astd {}, asgd {}", ast_dimensions, asg_dimensions); if self.options.type_inference_enabled() { AstType::Array( Box::new(self.reduce_type(ast_type, asg_type, span)?), diff --git a/tests/expectations/compiler/compiler/console/log_parameter_fail_none.leo.out b/tests/expectations/compiler/compiler/console/log_parameter_fail_none.leo.out index af6f8935d0..56c7a9e3b8 100644 --- a/tests/expectations/compiler/compiler/console/log_parameter_fail_none.leo.out +++ b/tests/expectations/compiler/compiler/console/log_parameter_fail_none.leo.out @@ -2,4 +2,4 @@ namespace: Compile expectation: Fail outputs: - - " --> compiler-test:4:17\n |\n 4 | console.log(\"\", 1u32);\n | ^^^^^^^^\n |\n = function call expected 1 arguments, got 2" + - " --> compiler-test:4:17\n |\n 4 | console.log(\"\", 1u32);\n | ^\n |\n = unexpected token: '\"'" diff --git a/tests/expectations/compiler/compiler/string/equality.leo.out b/tests/expectations/compiler/compiler/string/equality.leo.out index 53a3a8c78e..21015d1a1e 100644 --- a/tests/expectations/compiler/compiler/string/equality.leo.out +++ b/tests/expectations/compiler/compiler/string/equality.leo.out @@ -4,11 +4,11 @@ expectation: Pass outputs: - circuit: num_public_variables: 0 - num_private_variables: 84 - num_constraints: 67 - at: da464aeb42d53f56ff26141c802d2a769477763766c5746e603c5326b01790bb - bt: 6b03d4cb03e7bf9cf8ec746ee3410578d8ac51a29e56f9090d8e27a4ddf16c64 - ct: ebcd3f740af33d9c3ab2c5e4189709be8d73fab149e788734705cad488a4208c + num_private_variables: 92 + num_constraints: 75 + at: c03384068dd2b9bd528c65e301960d5c4f61bf94949c9080f28a4bc57f81e856 + bt: 7f653d4004b13eee112bbefcf900b0c8201524129423cdda82799c18fdcfb3f7 + ct: bce0d8a64cf530613e693412358d17d231ec8516108ab4ee5a47ddf185a471fa output: - input_file: inputs/string.in output: From 497b039d60a5f95d24661a098fbe4dc2e43d9020 Mon Sep 17 00:00:00 2001 From: damirka Date: Mon, 24 May 2021 13:10:28 +0300 Subject: [PATCH 19/20] added few more test cases for strings --- .../expression/literal/string copy.leo.out | 151 ------------------ .../parser/expression/literal/string.leo.out | 15 -- .../expression/literal/string_fail.leo.out | 4 + .../parser/parser/expression/string.leo.out | 151 ------------------ tests/parser/expression/literal/string.leo | 21 --- .../parser/expression/literal/string_fail.leo | 10 +- 6 files changed, 13 insertions(+), 339 deletions(-) delete mode 100644 tests/expectations/parser/parser/expression/literal/string copy.leo.out delete mode 100644 tests/expectations/parser/parser/expression/literal/string.leo.out delete mode 100644 tests/expectations/parser/parser/expression/string.leo.out delete mode 100644 tests/parser/expression/literal/string.leo diff --git a/tests/expectations/parser/parser/expression/literal/string copy.leo.out b/tests/expectations/parser/parser/expression/literal/string copy.leo.out deleted file mode 100644 index 7402c5eb45..0000000000 --- a/tests/expectations/parser/parser/expression/literal/string copy.leo.out +++ /dev/null @@ -1,151 +0,0 @@ ---- -namespace: ParseExpression -expectation: Pass -outputs: - - Value: - String: - - - s - - t - - r - - i - - n - - g - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 9 - path: test - content: "`string`" - - Value: - String: - - - a - - n - - o - - t - - h - - e - - r - - " " - - "{" - - " " - - "}" - - " " - - s - - t - - r - - i - - n - - g - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 21 - path: test - content: "`another { } string`" - - Value: - String: - - - "{" - - " " - - "]" - - " " - - "[" - - " " - - ; - - " " - - a - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 12 - path: test - content: "`{ ] [ ; a`" - - Value: - String: - - - ࿺ - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 10 - path: test - content: "`\\u{FFA}`" - - Value: - String: - - - 򯫺 - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 12 - path: test - content: "`\\u{afafa}`" - - Value: - String: - - - 꾯 - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 11 - path: test - content: "`\\u{afaf}`" - - Value: - String: - - - ૺ - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 10 - path: test - content: "`\\u{afa}`" - - Value: - String: - - - ¯ - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 9 - path: test - content: "`\\u{af}`" - - Value: - String: - - - "\n" - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 8 - path: test - content: "`\\u{a}`" - - Value: - String: - - - "\n" - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 7 - path: test - content: "`\\x0A`" - - Value: - String: - - - a - - a - - " " - - "\\" - - " " - - "\"" - - " " - - " " - - "\n" - - " " - - a - - a - - " " - - "\t" - - " " - - "\r" - - " " - - " " - - "\u0000" - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 28 - path: test - content: "`aa \\\\ \\\" \\n aa \\t \\r \\0`" diff --git a/tests/expectations/parser/parser/expression/literal/string.leo.out b/tests/expectations/parser/parser/expression/literal/string.leo.out deleted file mode 100644 index c78bafb527..0000000000 --- a/tests/expectations/parser/parser/expression/literal/string.leo.out +++ /dev/null @@ -1,15 +0,0 @@ ---- -namespace: Token -expectation: Pass -outputs: - - "'\"string\"' @ 1:1-9" - - "'\"another { } string\"' @ 1:1-21" - - "'\"{ ] [ ; a\"' @ 1:1-12" - - "'\"࿺\"' @ 1:1-10" - - "'\"򯫺\"' @ 1:1-12" - - "'\"꾯\"' @ 1:1-11" - - "'\"ૺ\"' @ 1:1-10" - - "'\"¯\"' @ 1:1-9" - - "'\"\n\"' @ 1:1-8" - - "'\"\n\"' @ 1:1-7" - - "'\"aa \\ \" \n aa \t \r \u0000\"' @ 1:1-28" diff --git a/tests/expectations/parser/parser/expression/literal/string_fail.leo.out b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out index 5a0cbfcf88..db6d067a51 100644 --- a/tests/expectations/parser/parser/expression/literal/string_fail.leo.out +++ b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out @@ -4,3 +4,7 @@ expectation: Fail outputs: - " --> test:1:1\n |\n 1 | \"\"\n | ^\n |\n = unexpected token: '\"'" - " --> test:1:1\n |\n 1 | \"Hello world!\n | ^\n |\n = unexpected token: '\"'" + - " --> test:1:1\n |\n 1 | \"\\\"\n | ^\n |\n = unexpected token: '\"'" + - " --> test:1:1\n |\n 1 | \"\\l\"\n | ^\n |\n = unexpected token: '\"'" + - " --> test:1:1\n |\n 1 | \"\\uaaa\"\n | ^\n |\n = unexpected token: '\"'" + - " --> test:1:1\n |\n 1 | \"\\u\"\n | ^\n |\n = unexpected token: '\"'" diff --git a/tests/expectations/parser/parser/expression/string.leo.out b/tests/expectations/parser/parser/expression/string.leo.out deleted file mode 100644 index 7402c5eb45..0000000000 --- a/tests/expectations/parser/parser/expression/string.leo.out +++ /dev/null @@ -1,151 +0,0 @@ ---- -namespace: ParseExpression -expectation: Pass -outputs: - - Value: - String: - - - s - - t - - r - - i - - n - - g - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 9 - path: test - content: "`string`" - - Value: - String: - - - a - - n - - o - - t - - h - - e - - r - - " " - - "{" - - " " - - "}" - - " " - - s - - t - - r - - i - - n - - g - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 21 - path: test - content: "`another { } string`" - - Value: - String: - - - "{" - - " " - - "]" - - " " - - "[" - - " " - - ; - - " " - - a - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 12 - path: test - content: "`{ ] [ ; a`" - - Value: - String: - - - ࿺ - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 10 - path: test - content: "`\\u{FFA}`" - - Value: - String: - - - 򯫺 - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 12 - path: test - content: "`\\u{afafa}`" - - Value: - String: - - - 꾯 - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 11 - path: test - content: "`\\u{afaf}`" - - Value: - String: - - - ૺ - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 10 - path: test - content: "`\\u{afa}`" - - Value: - String: - - - ¯ - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 9 - path: test - content: "`\\u{af}`" - - Value: - String: - - - "\n" - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 8 - path: test - content: "`\\u{a}`" - - Value: - String: - - - "\n" - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 7 - path: test - content: "`\\x0A`" - - Value: - String: - - - a - - a - - " " - - "\\" - - " " - - "\"" - - " " - - " " - - "\n" - - " " - - a - - a - - " " - - "\t" - - " " - - "\r" - - " " - - " " - - "\u0000" - - line_start: 1 - line_stop: 1 - col_start: 1 - col_stop: 28 - path: test - content: "`aa \\\\ \\\" \\n aa \\t \\r \\0`" diff --git a/tests/parser/expression/literal/string.leo b/tests/parser/expression/literal/string.leo deleted file mode 100644 index 604d27a5e4..0000000000 --- a/tests/parser/expression/literal/string.leo +++ /dev/null @@ -1,21 +0,0 @@ -/* -namespace: Token -expectation: Pass -*/ - -"string" - -"another { } string" - -"{ ] [ ; a" - -"\u{FFA}" -"\u{afafa}" -"\u{afaf}" -"\u{afa}" -"\u{af}" -"\u{a}" - -"\x0A" - -"aa \\ \" \n aa \t \r \0" diff --git a/tests/parser/expression/literal/string_fail.leo b/tests/parser/expression/literal/string_fail.leo index 84ea369f22..e2d900aa89 100644 --- a/tests/parser/expression/literal/string_fail.leo +++ b/tests/parser/expression/literal/string_fail.leo @@ -5,4 +5,12 @@ expectation: Fail "" -"Hello world! \ No newline at end of file +"Hello world! + +"\" + +"\l" + +"\uaaa" + +"\u" From a3416c1e3eaa4f6ca22a74989ba53ac4febc5f8d Mon Sep 17 00:00:00 2001 From: damirka Date: Mon, 24 May 2021 15:13:33 +0300 Subject: [PATCH 20/20] added max value for \x7F escape --- parser/src/tokenizer/lexer.rs | 5 +++++ .../parser/parser/expression/literal/string_fail.leo.out | 2 ++ .../parser/expression/literal/string_parse.leo.out | 9 +++++++++ tests/parser/expression/literal/string_fail.leo | 4 ++++ tests/parser/expression/literal/string_parse.leo | 2 ++ 5 files changed, 22 insertions(+) diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index 048929f064..16210073d9 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -102,6 +102,11 @@ impl Token { } if let Ok(ascii_number) = u8::from_str_radix(&hex_string, 16) { + // According to RFC, we allow only values less than 128. + if ascii_number > 127 { + return None; + } + return Some(ascii_number as char); } } diff --git a/tests/expectations/parser/parser/expression/literal/string_fail.leo.out b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out index db6d067a51..d97d2cda74 100644 --- a/tests/expectations/parser/parser/expression/literal/string_fail.leo.out +++ b/tests/expectations/parser/parser/expression/literal/string_fail.leo.out @@ -8,3 +8,5 @@ outputs: - " --> test:1:1\n |\n 1 | \"\\l\"\n | ^\n |\n = unexpected token: '\"'" - " --> test:1:1\n |\n 1 | \"\\uaaa\"\n | ^\n |\n = unexpected token: '\"'" - " --> test:1:1\n |\n 1 | \"\\u\"\n | ^\n |\n = unexpected token: '\"'" + - " --> test:1:1\n |\n 1 | \"\\xFF\"\n | ^\n |\n = unexpected token: '\"'" + - " --> test:1:1\n |\n 1 | \"\\x\"\n | ^\n |\n = unexpected token: '\"'" diff --git a/tests/expectations/parser/parser/expression/literal/string_parse.leo.out b/tests/expectations/parser/parser/expression/literal/string_parse.leo.out index b149109755..5fd13277dd 100644 --- a/tests/expectations/parser/parser/expression/literal/string_parse.leo.out +++ b/tests/expectations/parser/parser/expression/literal/string_parse.leo.out @@ -122,6 +122,15 @@ outputs: col_stop: 7 path: test content: "\"\\x0A\"" + - Value: + String: + - -  + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 7 + path: test + content: "\"\\x7F\"" - Value: String: - - a diff --git a/tests/parser/expression/literal/string_fail.leo b/tests/parser/expression/literal/string_fail.leo index e2d900aa89..3015bf53c3 100644 --- a/tests/parser/expression/literal/string_fail.leo +++ b/tests/parser/expression/literal/string_fail.leo @@ -14,3 +14,7 @@ expectation: Fail "\uaaa" "\u" + +"\xFF" + +"\x" diff --git a/tests/parser/expression/literal/string_parse.leo b/tests/parser/expression/literal/string_parse.leo index b4e0e10422..f23e65908c 100644 --- a/tests/parser/expression/literal/string_parse.leo +++ b/tests/parser/expression/literal/string_parse.leo @@ -18,4 +18,6 @@ expectation: Pass "\x0A" +"\x7F" + "aa \\ \" \n aa \t \r \0"