diff --git a/compiler/parser/src/parser/expression.rs b/compiler/parser/src/parser/expression.rs index 487daf41ea..c01d374b1f 100644 --- a/compiler/parser/src/parser/expression.rs +++ b/compiler/parser/src/parser/expression.rs @@ -375,7 +375,7 @@ impl ParserContext<'_> { Token::True => Expression::Value(ValueExpression::Boolean("true".into(), span)), Token::False => Expression::Value(ValueExpression::Boolean("false".into(), span)), Token::AddressLit(value) => Expression::Value(ValueExpression::Address(value, span)), - Token::StringLit(value) => Expression::Value(ValueExpression::String(value, span)), + Token::StaticString(value) => Expression::Value(ValueExpression::String(value, span)), Token::Ident(name) => { let ident = Identifier { name, span }; Expression::Identifier(ident) diff --git a/compiler/parser/src/parser/statement.rs b/compiler/parser/src/parser/statement.rs index 4274a95153..738eac9da9 100644 --- a/compiler/parser/src/parser/statement.rs +++ b/compiler/parser/src/parser/statement.rs @@ -154,7 +154,7 @@ impl ParserContext<'_> { p.bump(); let SpannedToken { token, span } = p.prev_token.clone(); string = Some(match token { - Token::StringLit(chars) => chars, + Token::StaticString(chars) => chars, _ => { p.emit_err(ParserError::unexpected_str(token, "formatted string", span)); Vec::new() diff --git a/compiler/parser/src/parser/type_.rs b/compiler/parser/src/parser/type_.rs index 5b9b9662e5..5568a8ca3f 100644 --- a/compiler/parser/src/parser/type_.rs +++ b/compiler/parser/src/parser/type_.rs @@ -23,6 +23,7 @@ pub(super) const TYPE_TOKENS: &[Token] = &[ Token::Field, Token::Group, Token::Scalar, + Token::String, Token::I8, Token::I16, Token::I32, @@ -64,6 +65,7 @@ impl ParserContext<'_> { Token::Field => Type::Field, Token::Group => Type::Group, Token::Scalar => Type::Scalar, + Token::String => Type::String, x => Type::IntegerType(Self::token_to_int_type(x).expect("invalid int type")), }, span, diff --git a/compiler/parser/src/tokenizer/lexer.rs b/compiler/parser/src/tokenizer/lexer.rs index 6c295873ff..e3c1ec0380 100644 --- a/compiler/parser/src/tokenizer/lexer.rs +++ b/compiler/parser/src/tokenizer/lexer.rs @@ -39,113 +39,114 @@ fn is_bidi_override(c: char) -> bool { } impl Token { - // Eats the parts of the unicode character after \u. - fn eat_unicode_char(input: &mut Peekable>) -> Result<(usize, Char)> { - let mut unicode = String::new(); - // Account for the chars '\' and 'u'. - let mut len = 2; + // todo: remove this unused code or reference https://github.com/Geal/nom/blob/main/examples/string.rs + // // Eats the parts of the unicode character after \u. + // fn eat_unicode_char(input: &mut Peekable>) -> Result<(usize, Char)> { + // let mut unicode = String::new(); + // // Account for the chars '\' and 'u'. + // let mut len = 2; + // + // if input.next_if_eq(&'{').is_some() { + // len += 1; + // } else if let Some(c) = input.next() { + // return Err(ParserError::lexer_unopened_escaped_unicode_char(c).into()); + // } else { + // return Err(ParserError::lexer_empty_input_tendril().into()); + // } + // + // while let Some(c) = input.next_if(|c| c != &'}') { + // len += 1; + // unicode.push(c); + // } + // + // if input.next_if_eq(&'}').is_some() { + // len += 1; + // } else { + // return Err(ParserError::lexer_unclosed_escaped_unicode_char(unicode).into()); + // } + // + // // Max of 6 digits. + // // Minimum of 1 digit. + // if unicode.len() > 6 || unicode.is_empty() { + // return Err(ParserError::lexer_invalid_escaped_unicode_length(unicode).into()); + // } + // + // if let Ok(hex) = u32::from_str_radix(&unicode, 16) { + // if let Some(character) = std::char::from_u32(hex) { + // Ok((len, Char::Scalar(character))) + // } else if hex <= 0x10FFFF { + // Ok((len, Char::NonScalar(hex))) + // } else { + // Err(ParserError::lexer_invalid_character_exceeded_max_value(unicode).into()) + // } + // } else { + // Err(ParserError::lexer_expected_valid_hex_char(unicode).into()) + // } + // } - if input.next_if_eq(&'{').is_some() { - len += 1; - } else if let Some(c) = input.next() { - return Err(ParserError::lexer_unopened_escaped_unicode_char(c).into()); - } else { - return Err(ParserError::lexer_empty_input_tendril().into()); - } + // // Eats the parts of the hex character after \x. + // fn eat_hex_char(input: &mut Peekable>) -> Result<(usize, Char)> { + // let mut hex = String::new(); + // // Account for the chars '\' and 'x'. + // let mut len = 2; + // + // // First hex character. + // if let Some(c) = input.next_if(|c| c != &'\'') { + // len += 1; + // hex.push(c); + // } else if let Some(c) = input.next() { + // return Err(ParserError::lexer_expected_valid_hex_char(c).into()); + // } else { + // return Err(ParserError::lexer_empty_input_tendril().into()); + // } + // + // // Second hex character. + // if let Some(c) = input.next_if(|c| c != &'\'') { + // len += 1; + // hex.push(c); + // } else if let Some(c) = input.next() { + // return Err(ParserError::lexer_expected_valid_hex_char(c).into()); + // } else { + // return Err(ParserError::lexer_empty_input_tendril().into()); + // } + // + // if let Ok(ascii_number) = u8::from_str_radix(&hex, 16) { + // // According to RFC, we allow only values less than 128. + // if ascii_number > 127 { + // return Err(ParserError::lexer_expected_valid_hex_char(hex).into()); + // } + // + // Ok((len, Char::Scalar(ascii_number as char))) + // } else { + // Err(ParserError::lexer_expected_valid_hex_char(hex).into()) + // } + // } - while let Some(c) = input.next_if(|c| c != &'}') { - len += 1; - unicode.push(c); - } + // fn eat_escaped_char(input: &mut Peekable>) -> Result<(usize, Char)> { + // match input.next() { + // None => Err(ParserError::lexer_empty_input_tendril().into()), + // // Length of 2 to account the '\'. + // Some('0') => Ok((2, Char::Scalar(0 as char))), + // Some('t') => Ok((2, Char::Scalar(9 as char))), + // Some('n') => Ok((2, Char::Scalar(10 as char))), + // Some('r') => Ok((2, Char::Scalar(13 as char))), + // Some('\"') => Ok((2, Char::Scalar(34 as char))), + // Some('\'') => Ok((2, Char::Scalar(39 as char))), + // Some('\\') => Ok((2, Char::Scalar(92 as char))), + // Some('u') => Self::eat_unicode_char(input), + // Some('x') => Self::eat_hex_char(input), + // Some(c) => Err(ParserError::lexer_expected_valid_escaped_char(c).into()), + // } + // } - if input.next_if_eq(&'}').is_some() { - len += 1; - } else { - return Err(ParserError::lexer_unclosed_escaped_unicode_char(unicode).into()); - } - - // Max of 6 digits. - // Minimum of 1 digit. - if unicode.len() > 6 || unicode.is_empty() { - return Err(ParserError::lexer_invalid_escaped_unicode_length(unicode).into()); - } - - if let Ok(hex) = u32::from_str_radix(&unicode, 16) { - if let Some(character) = std::char::from_u32(hex) { - Ok((len, Char::Scalar(character))) - } else if hex <= 0x10FFFF { - Ok((len, Char::NonScalar(hex))) - } else { - Err(ParserError::lexer_invalid_character_exceeded_max_value(unicode).into()) - } - } else { - Err(ParserError::lexer_expected_valid_hex_char(unicode).into()) - } - } - - // Eats the parts of the hex character after \x. - fn eat_hex_char(input: &mut Peekable>) -> Result<(usize, Char)> { - let mut hex = String::new(); - // Account for the chars '\' and 'x'. - let mut len = 2; - - // First hex character. - if let Some(c) = input.next_if(|c| c != &'\'') { - len += 1; - hex.push(c); - } else if let Some(c) = input.next() { - return Err(ParserError::lexer_expected_valid_hex_char(c).into()); - } else { - return Err(ParserError::lexer_empty_input_tendril().into()); - } - - // Second hex character. - if let Some(c) = input.next_if(|c| c != &'\'') { - len += 1; - hex.push(c); - } else if let Some(c) = input.next() { - return Err(ParserError::lexer_expected_valid_hex_char(c).into()); - } else { - return Err(ParserError::lexer_empty_input_tendril().into()); - } - - if let Ok(ascii_number) = u8::from_str_radix(&hex, 16) { - // According to RFC, we allow only values less than 128. - if ascii_number > 127 { - return Err(ParserError::lexer_expected_valid_hex_char(hex).into()); - } - - Ok((len, Char::Scalar(ascii_number as char))) - } else { - Err(ParserError::lexer_expected_valid_hex_char(hex).into()) - } - } - - fn eat_escaped_char(input: &mut Peekable>) -> Result<(usize, Char)> { - match input.next() { - None => Err(ParserError::lexer_empty_input_tendril().into()), - // Length of 2 to account the '\'. - Some('0') => Ok((2, Char::Scalar(0 as char))), - Some('t') => Ok((2, Char::Scalar(9 as char))), - Some('n') => Ok((2, Char::Scalar(10 as char))), - Some('r') => Ok((2, Char::Scalar(13 as char))), - Some('\"') => Ok((2, Char::Scalar(34 as char))), - Some('\'') => Ok((2, Char::Scalar(39 as char))), - Some('\\') => Ok((2, Char::Scalar(92 as char))), - Some('u') => Self::eat_unicode_char(input), - Some('x') => Self::eat_hex_char(input), - Some(c) => Err(ParserError::lexer_expected_valid_escaped_char(c).into()), - } - } - - /// Returns a `char` if a character can be eaten, otherwise returns [`None`]. - fn eat_char(input: &mut Peekable>) -> Result<(usize, Char)> { - match input.next() { - None => Err(ParserError::lexer_empty_input_tendril().into()), - Some('\\') => Self::eat_escaped_char(input), - Some(c) => Ok((c.len_utf8(), Char::Scalar(c))), - } - } + // /// Returns a `char` if a character can be eaten, otherwise returns [`None`]. + // fn eat_char(input: &mut Peekable>) -> Result<(usize, Char)> { + // match input.next() { + // None => Err(ParserError::lexer_empty_input_tendril().into()), + // Some('\\') => Self::eat_escaped_char(input), + // Some(c) => Ok((c.len_utf8(), Char::Scalar(c))), + // } + // } /// Returns a tuple: [(integer length, integer token)] if an integer can be eaten, otherwise returns [`None`]. /// An integer can be eaten if its bytes are at the front of the given `input_tendril` string. @@ -183,27 +184,25 @@ impl Token { return Ok((1, Token::WhiteSpace)); } Some('"') => { - let mut string: Vec = Vec::new(); - input.next(); + let mut string = String::from("\""); - let mut len = 0; - while let Some(c) = input.peek() { - if is_bidi_override(*c) { + let mut ended = false; + while let Some(c) = input.next() { + if is_bidi_override(c) { return Err(ParserError::lexer_bidi_override().into()); } - if c == &'"' { + string.push(c); + if c == '"' { + ended = true; break; } - let (char_len, character) = Self::eat_char(&mut input)?; - len += char_len; - string.push(character.into()); } - if input.next_if_eq(&'"').is_some() { - return Ok((len + 2, Token::StringLit(string))); + if !ended { + return Err(ParserError::lexer_string_not_closed(string).into()); } - return Err(ParserError::lexer_string_not_closed(leo_ast::Chars(string)).into()); + return Ok((string.len(), Token::StaticString(string))); } Some(x) if x.is_ascii_digit() => { return Self::eat_integer(&mut input); diff --git a/compiler/parser/src/tokenizer/token.rs b/compiler/parser/src/tokenizer/token.rs index 1c3460b9db..1b215d5cc9 100644 --- a/compiler/parser/src/tokenizer/token.rs +++ b/compiler/parser/src/tokenizer/token.rs @@ -50,7 +50,7 @@ pub enum Token { // Literals CommentLine(String), CommentBlock(String), - StringLit(Vec), + StaticString(String), Ident(Symbol), Int(String), True, @@ -96,6 +96,7 @@ pub enum Token { Field, Group, Scalar, + String, I8, I16, I32, @@ -150,6 +151,7 @@ pub const KEYWORD_TOKENS: &[Token] = &[ Token::Public, Token::Return, Token::Scalar, + Token::String, Token::True, Token::U8, Token::U16, @@ -189,6 +191,7 @@ impl Token { Token::Public => sym::Public, Token::Return => sym::Return, Token::Scalar => sym::scalar, + Token::String => sym::string, Token::True => sym::True, Token::U8 => sym::u8, Token::U16 => sym::u16, @@ -206,13 +209,7 @@ impl fmt::Display for Token { match self { CommentLine(s) => write!(f, "{}", s), CommentBlock(s) => write!(f, "{}", s), - StringLit(string) => { - write!(f, "\"")?; - for character in string.iter() { - write!(f, "{}", character)?; - } - write!(f, "\"") - } + StaticString(s) => write!(f, "{}", s), Ident(s) => write!(f, "{}", s), Int(s) => write!(f, "{}", s), True => write!(f, "true"), @@ -255,6 +252,7 @@ impl fmt::Display for Token { Field => write!(f, "field"), Group => write!(f, "group"), Scalar => write!(f, "scalar"), + String => write!(f, "string"), I8 => write!(f, "i8"), I16 => write!(f, "i16"), I32 => write!(f, "i32"), diff --git a/leo/span/src/symbol.rs b/leo/span/src/symbol.rs index b56374c0c0..ef147e3322 100644 --- a/leo/span/src/symbol.rs +++ b/leo/span/src/symbol.rs @@ -137,6 +137,7 @@ symbols! { scalar, Star: "*", std, + string, Struct: "struct", test, True: "true",