char lexer/tokenize/parsing done

This commit is contained in:
gluax 2021-05-12 15:33:37 -04:00
parent 819fc9087f
commit 90f9215a0b
11 changed files with 181 additions and 1 deletions

View File

@ -132,7 +132,11 @@ impl<'a> FromAst<'a, leo_ast::ValueExpression> for Constant<'a> {
Constant {
parent: Cell::new(None),
span: Some(span.clone()),
value: ConstValue::Field(value.parse().map_err(|_| AsgConvertError::invalid_char(&value, span))?),
value: ConstValue::Char(
value
.parse::<char>()
.map_err(|_| AsgConvertError::invalid_char(&value, span))?,
),
}
}
Field(value, span) => {

View File

@ -689,6 +689,7 @@ impl ParserContext {
Token::True => Expression::Value(ValueExpression::Boolean("true".into(), span)),
Token::False => Expression::Value(ValueExpression::Boolean("false".into(), span)),
Token::AddressLit(value) => Expression::Value(ValueExpression::Address(value, span)),
Token::CharLit(value) => Expression::Value(ValueExpression::Char(value, span)),
Token::LeftParen => self.parse_tuple_expression(&span)?,
Token::LeftSquare => self.parse_array_expression(&span)?,
Token::Ident(name) => {

View File

@ -31,6 +31,7 @@ const TYPE_TOKENS: &[Token] = &[
Token::Group,
Token::Address,
Token::Bool,
Token::Char,
];
impl ParserContext {
@ -117,6 +118,7 @@ impl ParserContext {
Token::Group => Type::Group,
Token::Address => Type::Address,
Token::Bool => Type::Boolean,
Token::Char => Type::Char,
x => Type::IntegerType(Self::token_to_int_type(x).expect("invalid int type")),
},
token.span,

View File

@ -45,6 +45,7 @@ fn eat_identifier(input_tendril: &StrTendril) -> Option<StrTendril> {
return None;
}
let input = input_tendril[..].as_bytes();
if !input[0].is_ascii_alphabetic() {
return None;
}
@ -149,6 +150,55 @@ impl Token {
}
return (i + 1, Some(Token::FormatString(segments)));
}
b'\'' => {
if input[1] == b'\'' {
return (0, None);
}
let mut i = 1;
let mut in_escape = false;
let mut character = String::new();
while i < input.len() {
if !in_escape {
if input[i] == b'\'' {
break;
}
if input[i] == b'\\' {
in_escape = !in_escape;
} else {
character.push(input[i] as char);
}
} else {
in_escape = false;
if input[i] == b'u' {
i += 2;
let mut j = i;
let mut size = 0;
while input[j] != b'}' {
j += 1;
size += 1;
}
let hex_string_number: String = input_tendril.subtendril(i as u32, size).to_string();
if let Ok(hex) = u32::from_str_radix(&hex_string_number, 16) {
if let Some(unicode) = std::char::from_u32(hex) {
i = j;
character = unicode.to_string();
}
} else {
return (0, None);
}
} else {
character.push(input[i] as char);
}
}
i += 1;
}
if i == input.len() {
return (0, None);
}
return (i + 1, Some(Token::CharLit(character.into())));
}
x if x.is_ascii_digit() => {
return Self::eat_integer(&input_tendril);
}
@ -310,6 +360,7 @@ impl Token {
"address" => Token::Address,
"as" => Token::As,
"bool" => Token::Bool,
"char" => Token::Char,
"circuit" => Token::Circuit,
"console" => Token::Console,
"const" => Token::Const,

View File

@ -47,6 +47,7 @@ pub enum Token {
True,
False,
AddressLit(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
CharLit(#[serde(with = "leo_ast::common::tendril_json")] StrTendril),
At,
@ -104,6 +105,7 @@ pub enum Token {
Group,
Bool,
Address,
Char,
BigSelf,
// primary expresion
@ -156,6 +158,7 @@ pub const KEYWORD_TOKENS: &[Token] = &[
Token::Address,
Token::As,
Token::Bool,
Token::Char,
Token::Circuit,
Token::Console,
Token::Const,
@ -217,6 +220,7 @@ impl fmt::Display for Token {
True => write!(f, "true"),
False => write!(f, "false"),
AddressLit(s) => write!(f, "{}", s),
CharLit(s) => write!(f, "{}", s),
At => write!(f, "@"),
@ -271,6 +275,7 @@ impl fmt::Display for Token {
Group => write!(f, "group"),
Bool => write!(f, "bool"),
Address => write!(f, "address"),
Char => write!(f, "char"),
BigSelf => write!(f, "Self"),
Input => write!(f, "input"),

View File

@ -0,0 +1,10 @@
---
namespace: Token
expectation: Pass
outputs:
- "'a' @ 1:1-4"
- "'A' @ 1:1-4"
- "''' @ 1:1-5"
- "'\\' @ 1:1-5"
- "'n' @ 1:1-5"
- "'❤' @ 1:1-11"

View File

@ -0,0 +1,7 @@
---
namespace: ParseExpression
expectation: Fail
outputs:
- " --> test:1:1\n |\n 1 | '\\'\n | ^\n |\n = unexpected token: '''"
- " --> test:1:1\n |\n 1 | 'a\n | ^\n |\n = unexpected token: '''"
- " --> test:1:1\n |\n 1 | ''\n | ^\n |\n = unexpected token: '''"

View File

@ -0,0 +1,67 @@
---
namespace: ParseExpression
expectation: Pass
outputs:
- Value:
Char:
- a
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 4
path: test
content: "'a'"
- Value:
Char:
- b
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 4
path: test
content: "'b'"
- Value:
Char:
- "\""
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 5
path: test
content: "'\\\"'"
- Value:
Char:
- t
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 5
path: test
content: "'\\t'"
- Value:
Char:
- r
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 5
path: test
content: "'\\r'"
- Value:
Char:
- "0"
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 5
path: test
content: "'\\0'"
- Value:
Char:
- ❤
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 11
path: test
content: "'\\u{2764}'"

View File

@ -0,0 +1,11 @@
/*
namespace: Token
expectation: Pass
*/
'a'
'A'
'\''
'\\'
'\n'
'\u{2764}'

View File

@ -0,0 +1,10 @@
/*
namespace: ParseExpression
expectation: Fail
*/
'\'
'a
''

View File

@ -0,0 +1,12 @@
/*
namespace: ParseExpression
expectation: Pass
*/
'a'
'b'
'\"'
'\t'
'\r'
'\0'
'\u{2764}'