From 32bd282030cfb0af22ee3793baeb7386b2bd0c6e Mon Sep 17 00:00:00 2001 From: damirka Date: Tue, 18 May 2021 18:56:55 +0300 Subject: [PATCH] 1st draft for string parsing and ast type - currently uses back quotes "`" for strings, change later - ast -> asg unimplemented, strings need to be processed on canonicalization stage --- asg/src/expression/constant.rs | 3 + ast/src/expression/value.rs | 13 +++- parser/src/parser/expression.rs | 1 + parser/src/tokenizer/lexer.rs | 10 +++ parser/src/tokenizer/mod.rs | 2 +- parser/src/tokenizer/token.rs | 8 +++ .../parser/parser/expression/string.leo.out | 63 +++++++++++++++++++ tests/parser/expression/string.leo | 10 +++ 8 files changed, 107 insertions(+), 3 deletions(-) create mode 100644 tests/expectations/parser/parser/expression/string.leo.out create mode 100644 tests/parser/expression/string.leo diff --git a/asg/src/expression/constant.rs b/asg/src/expression/constant.rs index 99077a2756..42a3ca8e45 100644 --- a/asg/src/expression/constant.rs +++ b/asg/src/expression/constant.rs @@ -220,6 +220,9 @@ impl<'a> FromAst<'a, leo_ast::ValueExpression> for Constant<'a> { value: ConstValue::Int(ConstInt::parse(int_type, value, span)?), } } + String(_str_type, _value) => { + unimplemented!("strings do not exist on ASG level") + } }) } } diff --git a/ast/src/expression/value.rs b/ast/src/expression/value.rs index 865df9235a..e153801cbd 100644 --- a/ast/src/expression/value.rs +++ b/ast/src/expression/value.rs @@ -33,6 +33,7 @@ pub enum ValueExpression { #[serde(with = "crate::common::tendril_json")] StrTendril, Span, ), + String(Vec, Span), } impl fmt::Display for ValueExpression { @@ -46,6 +47,12 @@ impl fmt::Display for ValueExpression { Implicit(implicit, _) => write!(f, "{}", implicit), Integer(value, type_, _) => write!(f, "{}{}", value, type_), Group(group) => write!(f, "{}", group), + String(char_vec, _) => { + for character in char_vec { + write!(f, "{}", character)? + } + Ok(()) + } } } } @@ -59,7 +66,8 @@ impl Node for ValueExpression { | Char(_, span) | Field(_, span) | Implicit(_, span) - | Integer(_, _, span) => span, + | Integer(_, _, span) // => span, + | String(_, span) => span, Group(group) => match &**group { GroupValue::Single(_, span) | GroupValue::Tuple(GroupTuple { span, .. }) => span, }, @@ -74,7 +82,8 @@ impl Node for ValueExpression { | Char(_, span) | Field(_, span) | Implicit(_, span) - | Integer(_, _, span) => *span = new_span, + | Integer(_, _, span) // => *span = new_span, + | String(_, span) => *span = new_span, Group(group) => match &mut **group { GroupValue::Single(_, span) | GroupValue::Tuple(GroupTuple { span, .. }) => *span = new_span, }, diff --git a/parser/src/parser/expression.rs b/parser/src/parser/expression.rs index 40ca52e2bd..8647da9ea5 100644 --- a/parser/src/parser/expression.rs +++ b/parser/src/parser/expression.rs @@ -690,6 +690,7 @@ impl ParserContext { Token::False => Expression::Value(ValueExpression::Boolean("false".into(), span)), Token::AddressLit(value) => Expression::Value(ValueExpression::Address(value, span)), Token::CharLit(value) => Expression::Value(ValueExpression::Char(value, span)), + Token::QuotedString(value) => Expression::Value(ValueExpression::String(value, span)), Token::LeftParen => self.parse_tuple_expression(&span)?, Token::LeftSquare => self.parse_array_expression(&span)?, Token::Ident(name) => { diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index 9984f9dab2..8042e6fa23 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -235,6 +235,16 @@ impl Token { let input = input_tendril[..].as_bytes(); match input[0] { x if x.is_ascii_whitespace() => return (1, None), + b'`' => { + let mut collect: Vec = Vec::new(); + for (i, char_bytes) in input.iter().enumerate().skip(1) { + if *char_bytes == b'`' { + return (i + 1, Some(Token::QuotedString(collect))); + } + + collect.push(std::char::from_u32(*char_bytes as u32).unwrap()); + } + } b'"' => { let mut i = 1; let mut in_escape = false; diff --git a/parser/src/tokenizer/mod.rs b/parser/src/tokenizer/mod.rs index 9637eabbdb..adfcbb22e5 100644 --- a/parser/src/tokenizer/mod.rs +++ b/parser/src/tokenizer/mod.rs @@ -259,6 +259,6 @@ mod tests { let original = &raw[*start + token.span.col_start - 1..*stop + token.span.col_stop - 1]; assert_eq!(original, &token_raw); } - println!("{}", serde_json::to_string_pretty(&tokens).unwrap()); + // println!("{}", serde_json::to_string_pretty(&tokens).unwrap()); } } diff --git a/parser/src/tokenizer/token.rs b/parser/src/tokenizer/token.rs index d7da426589..3a9e3026b4 100644 --- a/parser/src/tokenizer/token.rs +++ b/parser/src/tokenizer/token.rs @@ -42,6 +42,7 @@ pub enum Token { CommentLine(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), CommentBlock(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), FormatString(Vec), + QuotedString(Vec), Ident(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), Int(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), True, @@ -215,6 +216,13 @@ impl fmt::Display for Token { } write!(f, "\"") } + QuotedString(content) => { + write!(f, "\"")?; + for character in content { + write!(f, "{}", character)?; + } + write!(f, "\"") + } Ident(s) => write!(f, "{}", s), Int(s) => write!(f, "{}", s), True => write!(f, "true"), diff --git a/tests/expectations/parser/parser/expression/string.leo.out b/tests/expectations/parser/parser/expression/string.leo.out new file mode 100644 index 0000000000..4fc7fe22ef --- /dev/null +++ b/tests/expectations/parser/parser/expression/string.leo.out @@ -0,0 +1,63 @@ +--- +namespace: ParseExpression +expectation: Pass +outputs: + - Value: + String: + - - s + - t + - r + - i + - n + - g + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 9 + path: test + content: "`string`" + - Value: + String: + - - a + - n + - o + - t + - h + - e + - r + - " " + - "{" + - " " + - "}" + - " " + - s + - t + - r + - i + - n + - g + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 21 + path: test + content: "`another { } string`" + - Value: + String: + - - "{" + - " " + - "\\" + - " " + - "]" + - " " + - "[" + - " " + - ; + - " " + - a + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 14 + path: test + content: "`{ \\ ] [ ; a`" diff --git a/tests/parser/expression/string.leo b/tests/parser/expression/string.leo new file mode 100644 index 0000000000..efd8cec921 --- /dev/null +++ b/tests/parser/expression/string.leo @@ -0,0 +1,10 @@ +/* +namespace: ParseExpression +expectation: Pass +*/ + +`string` + +`another { } string` + +`{ \ ] [ ; a`