diff --git a/asg/src/expression/constant.rs b/asg/src/expression/constant.rs index 52f6cea11f..157812052b 100644 --- a/asg/src/expression/constant.rs +++ b/asg/src/expression/constant.rs @@ -132,7 +132,11 @@ impl<'a> FromAst<'a, leo_ast::ValueExpression> for Constant<'a> { Constant { parent: Cell::new(None), span: Some(span.clone()), - value: ConstValue::Field(value.parse().map_err(|_| AsgConvertError::invalid_char(&value, span))?), + value: ConstValue::Char( + value + .parse::() + .map_err(|_| AsgConvertError::invalid_char(&value, span))?, + ), } } Field(value, span) => { diff --git a/parser/src/parser/expression.rs b/parser/src/parser/expression.rs index 028f506a04..40ca52e2bd 100644 --- a/parser/src/parser/expression.rs +++ b/parser/src/parser/expression.rs @@ -689,6 +689,7 @@ impl ParserContext { Token::True => Expression::Value(ValueExpression::Boolean("true".into(), span)), Token::False => Expression::Value(ValueExpression::Boolean("false".into(), span)), Token::AddressLit(value) => Expression::Value(ValueExpression::Address(value, span)), + Token::CharLit(value) => Expression::Value(ValueExpression::Char(value, span)), Token::LeftParen => self.parse_tuple_expression(&span)?, Token::LeftSquare => self.parse_array_expression(&span)?, Token::Ident(name) => { diff --git a/parser/src/parser/type_.rs b/parser/src/parser/type_.rs index 5a3eae2ec1..7604770f38 100644 --- a/parser/src/parser/type_.rs +++ b/parser/src/parser/type_.rs @@ -31,6 +31,7 @@ const TYPE_TOKENS: &[Token] = &[ Token::Group, Token::Address, Token::Bool, + Token::Char, ]; impl ParserContext { @@ -117,6 +118,7 @@ impl ParserContext { Token::Group => Type::Group, Token::Address => Type::Address, Token::Bool => Type::Boolean, + Token::Char => Type::Char, x => Type::IntegerType(Self::token_to_int_type(x).expect("invalid int type")), }, token.span, diff --git a/parser/src/tokenizer/lexer.rs b/parser/src/tokenizer/lexer.rs index 404e3c1b00..f318735164 100644 --- a/parser/src/tokenizer/lexer.rs +++ b/parser/src/tokenizer/lexer.rs @@ -45,6 +45,7 @@ fn eat_identifier(input_tendril: &StrTendril) -> Option { return None; } let input = input_tendril[..].as_bytes(); + if !input[0].is_ascii_alphabetic() { return None; } @@ -149,6 +150,55 @@ impl Token { } return (i + 1, Some(Token::FormatString(segments))); } + b'\'' => { + if input[1] == b'\'' { + return (0, None); + } + + let mut i = 1; + let mut in_escape = false; + let mut character = String::new(); + while i < input.len() { + if !in_escape { + if input[i] == b'\'' { + break; + } + if input[i] == b'\\' { + in_escape = !in_escape; + } else { + character.push(input[i] as char); + } + } else { + in_escape = false; + if input[i] == b'u' { + i += 2; + let mut j = i; + let mut size = 0; + while input[j] != b'}' { + j += 1; + size += 1; + } + let hex_string_number: String = input_tendril.subtendril(i as u32, size).to_string(); + if let Ok(hex) = u32::from_str_radix(&hex_string_number, 16) { + if let Some(unicode) = std::char::from_u32(hex) { + i = j; + character = unicode.to_string(); + } + } else { + return (0, None); + } + } else { + character.push(input[i] as char); + } + } + i += 1; + } + if i == input.len() { + return (0, None); + } + + return (i + 1, Some(Token::CharLit(character.into()))); + } x if x.is_ascii_digit() => { return Self::eat_integer(&input_tendril); } @@ -310,6 +360,7 @@ impl Token { "address" => Token::Address, "as" => Token::As, "bool" => Token::Bool, + "char" => Token::Char, "circuit" => Token::Circuit, "console" => Token::Console, "const" => Token::Const, diff --git a/parser/src/tokenizer/token.rs b/parser/src/tokenizer/token.rs index f5359e83ec..5863041ae9 100644 --- a/parser/src/tokenizer/token.rs +++ b/parser/src/tokenizer/token.rs @@ -47,6 +47,7 @@ pub enum Token { True, False, AddressLit(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), + CharLit(#[serde(with = "leo_ast::common::tendril_json")] StrTendril), At, @@ -104,6 +105,7 @@ pub enum Token { Group, Bool, Address, + Char, BigSelf, // primary expresion @@ -156,6 +158,7 @@ pub const KEYWORD_TOKENS: &[Token] = &[ Token::Address, Token::As, Token::Bool, + Token::Char, Token::Circuit, Token::Console, Token::Const, @@ -217,6 +220,7 @@ impl fmt::Display for Token { True => write!(f, "true"), False => write!(f, "false"), AddressLit(s) => write!(f, "{}", s), + CharLit(s) => write!(f, "{}", s), At => write!(f, "@"), @@ -271,6 +275,7 @@ impl fmt::Display for Token { Group => write!(f, "group"), Bool => write!(f, "bool"), Address => write!(f, "address"), + Char => write!(f, "char"), BigSelf => write!(f, "Self"), Input => write!(f, "input"), diff --git a/tests/expectations/parser/parser/expression/literal/char.leo.out b/tests/expectations/parser/parser/expression/literal/char.leo.out new file mode 100644 index 0000000000..126cebd1b8 --- /dev/null +++ b/tests/expectations/parser/parser/expression/literal/char.leo.out @@ -0,0 +1,10 @@ +--- +namespace: Token +expectation: Pass +outputs: + - "'a' @ 1:1-4" + - "'A' @ 1:1-4" + - "''' @ 1:1-5" + - "'\\' @ 1:1-5" + - "'n' @ 1:1-5" + - "'❤' @ 1:1-11" diff --git a/tests/expectations/parser/parser/expression/literal/char_fail.leo.out b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out new file mode 100644 index 0000000000..0becec81b8 --- /dev/null +++ b/tests/expectations/parser/parser/expression/literal/char_fail.leo.out @@ -0,0 +1,7 @@ +--- +namespace: ParseExpression +expectation: Fail +outputs: + - " --> test:1:1\n |\n 1 | '\\'\n | ^\n |\n = unexpected token: '''" + - " --> test:1:1\n |\n 1 | 'a\n | ^\n |\n = unexpected token: '''" + - " --> test:1:1\n |\n 1 | ''\n | ^\n |\n = unexpected token: '''" diff --git a/tests/expectations/parser/parser/expression/literal/char_parse.leo.out b/tests/expectations/parser/parser/expression/literal/char_parse.leo.out new file mode 100644 index 0000000000..4b4e1aa952 --- /dev/null +++ b/tests/expectations/parser/parser/expression/literal/char_parse.leo.out @@ -0,0 +1,67 @@ +--- +namespace: ParseExpression +expectation: Pass +outputs: + - Value: + Char: + - a + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 4 + path: test + content: "'a'" + - Value: + Char: + - b + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 4 + path: test + content: "'b'" + - Value: + Char: + - "\"" + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 5 + path: test + content: "'\\\"'" + - Value: + Char: + - t + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 5 + path: test + content: "'\\t'" + - Value: + Char: + - r + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 5 + path: test + content: "'\\r'" + - Value: + Char: + - "0" + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 5 + path: test + content: "'\\0'" + - Value: + Char: + - ❤ + - line_start: 1 + line_stop: 1 + col_start: 1 + col_stop: 11 + path: test + content: "'\\u{2764}'" diff --git a/tests/parser/expression/literal/char.leo b/tests/parser/expression/literal/char.leo new file mode 100644 index 0000000000..d2719e893a --- /dev/null +++ b/tests/parser/expression/literal/char.leo @@ -0,0 +1,11 @@ +/* +namespace: Token +expectation: Pass +*/ + +'a' +'A' +'\'' +'\\' +'\n' +'\u{2764}' \ No newline at end of file diff --git a/tests/parser/expression/literal/char_fail.leo b/tests/parser/expression/literal/char_fail.leo new file mode 100644 index 0000000000..3a4b06903e --- /dev/null +++ b/tests/parser/expression/literal/char_fail.leo @@ -0,0 +1,10 @@ +/* +namespace: ParseExpression +expectation: Fail +*/ + +'\' + +'a + +'' diff --git a/tests/parser/expression/literal/char_parse.leo b/tests/parser/expression/literal/char_parse.leo new file mode 100644 index 0000000000..e784384f51 --- /dev/null +++ b/tests/parser/expression/literal/char_parse.leo @@ -0,0 +1,12 @@ +/* +namespace: ParseExpression +expectation: Pass +*/ + +'a' +'b' +'\"' +'\t' +'\r' +'\0' +'\u{2764}' \ No newline at end of file