chars done, should merge in compiler tests

This commit is contained in:
gluax 2021-05-13 17:25:33 -04:00
parent 53cd6e2a12
commit 733bc90bbe
10 changed files with 317 additions and 197 deletions

View File

@ -1,4 +1,4 @@
function main() {
const heart: char = '\u{2764}';
const Hiragana = '\u{306E}';
const heart: char = '';
const Hiragana = '';
}

View File

@ -42,6 +42,16 @@ fn test_basic() {
assert_satisfied(program);
}
#[test]
fn test_circuit() {
let program_string = include_str!("circuit.leo");
let char_input_string = include_str!("input/char.in");
let program = parse_program_with_input(program_string, char_input_string).unwrap();
assert_satisfied(program);
}
#[test]
fn test_escapes() {
let program_string = include_str!("escapes.leo");
@ -67,11 +77,17 @@ fn test_function() {
}
#[test]
fn test_circuit() {
let program_string = include_str!("circuit.leo");
let char_input_string = include_str!("input/char.in");
let program = parse_program_with_input(program_string, char_input_string).unwrap();
fn test_octal() {
let program_string = include_str!("octal.leo");
let program = parse_program(program_string).unwrap();
assert_satisfied(program);
}
#[test]
fn test_unicode() {
let program_string = include_str!("unicode.leo");
let program = parse_program(program_string).unwrap();
assert_satisfied(program);
}

View File

@ -0,0 +1,4 @@
function main() {
const tab: char = '\xO011';
const z = '\xO172';
}

View File

@ -0,0 +1,4 @@
function main() {
const heart: char = '\u{2764}';
const Hiragana = '\u{306E}';
}

View File

@ -137,10 +137,15 @@ number_positive = @{ ASCII_DIGIT+ }
// ANY is equivalent to '\u{00}'..'\u{10FFFF}'
basic_char = { ANY }
escaped_char = @{ "\\" ~ ("\"" | "\'" | "\\" | "/" | "b" | "f" | "n" | "r" | "t") }
hex_char = @{ "\\" ~ "u" ~ "{" ~ ASCII_HEX_DIGIT{4} ~ "}" }
hex_char = @{ "\\" ~ "x" ~ "H" ~ ASCII_HEX_DIGIT{1, 2} }
octal_char = @{ "\\" ~ "x" ~ "O" ~ ASCII_DIGIT{3} }
unicode_char = @{ "\\" ~ "u" ~ "{" ~ ASCII_HEX_DIGIT{1, 6} ~ "}" }
char_types = {
escaped_char
| unicode_char
| hex_char
| octal_char
| basic_char
}

View File

@ -49,12 +49,32 @@ pub struct HexChar<'ast> {
pub span: Span<'ast>,
}
#[derive(Clone, Debug, FromPest, PartialEq, Eq)]
#[pest_ast(rule(Rule::octal_char))]
pub struct OctalChar<'ast> {
#[pest_ast(outer(with(span_into_string)))]
pub value: String,
#[pest_ast(outer())]
pub span: Span<'ast>,
}
#[derive(Clone, Debug, FromPest, PartialEq, Eq)]
#[pest_ast(rule(Rule::unicode_char))]
pub struct UnicodeChar<'ast> {
#[pest_ast(outer(with(span_into_string)))]
pub value: String,
#[pest_ast(outer())]
pub span: Span<'ast>,
}
#[derive(Clone, Debug, FromPest, PartialEq, Eq)]
#[pest_ast(rule(Rule::char_types))]
pub enum CharTypes<'ast> {
Basic(BasicChar<'ast>),
Escaped(EscapedChar<'ast>),
Hex(HexChar<'ast>),
Octal(OctalChar<'ast>),
Unicode(UnicodeChar<'ast>),
}
impl<'ast> CharTypes<'ast> {
@ -75,8 +95,28 @@ impl<'ast> CharTypes<'ast> {
Err(InputParserError::invalid_char(character.value, &character.span))
}
Self::Hex(character) => {
let hex_string_number = character.value[3..=6].to_string();
if let Ok(hex) = u32::from_str_radix(&hex_string_number, 16) {
let hex_string_number = character.value[3..character.value.len()].to_string();
if let Ok(number) = u8::from_str_radix(&hex_string_number, 16) {
if number < 127 {
return Ok(number as char);
}
}
Err(InputParserError::invalid_char(character.value, &character.span))
}
Self::Octal(character) => {
let octal_string_number = character.value[3..character.value.len()].to_string();
if let Ok(number) = u8::from_str_radix(&octal_string_number, 8) {
if number < 127 {
return Ok(number as char);
}
}
Err(InputParserError::invalid_char(character.value, &character.span))
}
Self::Unicode(character) => {
let unicode_string_number = character.value[3..=character.value.len() - 2].to_string();
if let Ok(hex) = u32::from_str_radix(&unicode_string_number, 16) {
if let Some(unicode) = std::char::from_u32(hex) {
return Ok(unicode);
}

View File

@ -61,6 +61,162 @@ fn eat_identifier(input_tendril: &StrTendril) -> Option<StrTendril> {
}
impl Token {
///
/// Returns a new `StrTendril` string if an character can be eaten, otherwise returns [`None`].
///
fn eat_char(input_tendril: &StrTendril) -> (usize, Option<Token>) {
if input_tendril.is_empty() {
return (0, None);
}
let input = input_tendril[..].as_bytes();
let mut i = 1;
let mut escaped = false;
let mut hex = false;
let mut octal = false;
let mut unicode = false;
let mut last = false;
let mut characters: Vec<u8> = vec![];
while i < input.len() {
if !escaped {
if input[i] == b'\'' {
last = true;
i += 1;
break;
}
if input[i] == b'{' {
i += 1;
characters.clear();
continue;
}
if input[i] == b'}' {
i += 1;
continue;
}
} else {
escaped = false;
characters.clear();
match input[i] {
b'0' => characters.push(0),
b't' => characters.push(9),
b'n' => characters.push(10),
b'r' => characters.push(13),
b'\"' => characters.push(34),
b'\'' => characters.push(39),
b'\\' => characters.push(92),
b'x' => {
i += 1;
match input[i] {
b'H' => {
hex = true;
}
b'O' => {
octal = true;
}
_ => {
return (0, None);
}
}
i += 1;
continue;
}
b'u' => {
unicode = true;
}
_ => {
return (0, None);
}
}
i += 1;
continue;
}
if input[i] == b'\\' {
escaped = true;
}
characters.push(input[i]);
i += 1;
}
if !last {
return (0, None);
}
return match characters.len() {
1 => {
if hex {
if let Ok(string) = std::str::from_utf8(&characters[..]) {
if let Ok(number) = u8::from_str_radix(&string, 16) {
if number < 127 {
return (i, Some(Token::CharLit(number as char)));
}
}
}
}
(i, Some(Token::CharLit(characters[0] as char)))
}
2 => {
if hex {
if let Ok(string) = std::str::from_utf8(&characters[..]) {
if let Ok(number) = u8::from_str_radix(&string, 16) {
if number < 127 {
return (i, Some(Token::CharLit(number as char)));
}
}
}
}
if unicode {
if let Ok(string) = std::str::from_utf8(&characters[..]) {
if let Some(character) = string.chars().next() {
return (i, Some(Token::CharLit(character)));
}
}
}
(0, None)
}
3 => {
if let Ok(string) = std::str::from_utf8(&characters[..]) {
if octal {
if let Ok(number) = u8::from_str_radix(&string, 8) {
if number < 127 {
return (i, Some(Token::CharLit(number as char)));
}
}
}
if let Some(character) = string.chars().next() {
return (i, Some(Token::CharLit(character)));
}
}
(0, None)
}
4 | 5 | 6 => {
if let Ok(unicode_string) = std::str::from_utf8(&characters[..]) {
if let Ok(hex) = u32::from_str_radix(&unicode_string, 16) {
if let Some(unicode_char) = std::char::from_u32(hex) {
return (i, Some(Token::CharLit(unicode_char)));
}
}
}
(0, None)
}
_ => (0, None),
};
}
///
/// Returns a tuple: [(integer length, integer token)] if an integer can be eaten, otherwise returns [`None`].
/// An integer can be eaten if its bytes are at the front of the given `input_tendril` string.
@ -151,184 +307,7 @@ impl Token {
return (i + 1, Some(Token::FormatString(segments)));
}
b'\'' => {
let mut i = 1;
let mut escaped = false;
let mut hex = false;
let mut octal = false;
let mut unicode = true;
let mut characters: Vec<u8> = vec![];
while i < input.len() {
if !escaped {
if input[i] == b'\'' {
i += 1;
break;
}
if input[i] == b'{' {
i += 1;
characters.clear();
continue;
}
if input[i] == b'}' {
i += 1;
continue;
}
} else {
escaped = false;
characters.clear();
match input[i] {
b'0' => characters.push(0),
b't' => characters.push(9),
b'n' => characters.push(10),
b'r' => characters.push(13),
b'\"' => characters.push(34),
b'\'' => characters.push(39),
b'\\' => characters.push(92),
b'x' => {
i += 1;
match input[i] {
b'H' => {
hex = true;
}
b'O' => {
octal = true;
}
_ => {
return (0, None);
}
}
i += 1;
continue;
}
b'u' => {
unicode = true;
}
_ => {
return (0, None);
}
}
i += 1;
continue;
}
if input[i] == b'\\' {
escaped = true;
}
characters.push(input[i]);
i += 1;
}
if i == input.len() {
return (0, None);
}
return match characters.len() {
1 => {
if hex {
if let Ok(string) = std::str::from_utf8(&characters[..]) {
if let Ok(number) = u8::from_str_radix(&string, 16) {
if number < 127 {
return (i, Some(Token::CharLit(number as char)));
}
}
}
}
(i, Some(Token::CharLit(characters[0] as char)))
}
2 => {
if hex {
if let Ok(string) = std::str::from_utf8(&characters[..]) {
if let Ok(number) = u8::from_str_radix(&string, 16) {
if number < 127 {
return (i, Some(Token::CharLit(number as char)));
}
}
}
}
if unicode {
if let Ok(string) = std::str::from_utf8(&characters[..]) {
if let Some(character) = string.chars().next() {
return (i, Some(Token::CharLit(character)));
}
}
}
(0, None)
}
3 => {
if let Ok(string) = std::str::from_utf8(&characters[..]) {
if octal {
if let Ok(number) = u8::from_str_radix(&string, 8) {
if number < 127 {
return (i, Some(Token::CharLit(number as char)));
}
}
}
if let Some(character) = string.chars().next() {
return (i, Some(Token::CharLit(character)));
}
}
(0, None)
}
4 | 5 | 6 => {
if let Ok(unicode_string) = std::str::from_utf8(&characters[..]) {
if let Ok(hex) = u32::from_str_radix(&unicode_string, 16) {
if let Some(unicode_char) = std::char::from_u32(hex) {
return (i, Some(Token::CharLit(unicode_char)));
}
}
}
(0, None)
}
_ => (0, None),
};
// while i < input.len() {
// if !in_escape {
// if input[i] == b'\'' {
// break;
// }
// if input[i] == b'\\' {
// in_escape = !in_escape;
// } else {
// character.push(input[i] as char);
// }
// } else {
// in_escape = false;
// if input[i] == b'u' {
// i += 2;
// let mut j = i;
// let mut size = 0;
// while input[j] != b'}' {
// j += 1;
// size += 1;
// }
// let hex_string_number: String = input_tendril.subtendril(i as u32, size).to_string();
// if let Ok(hex) = u32::from_str_radix(&hex_string_number, 16) {
// if let Some(unicode) = std::char::from_u32(hex) {
// i = j;
// character = unicode.to_string();
// }
// } else {
// return (0, None);
// }
// } else {
// character.push(input[i] as char);
// }
// }
// i += 1;
// }
return Self::eat_char(&input_tendril);
}
x if x.is_ascii_digit() => {
return Self::eat_integer(&input_tendril);

View File

@ -3,8 +3,16 @@ namespace: Token
expectation: Pass
outputs:
- "'a' @ 1:1-4"
- "'A' @ 1:1-4"
- "''' @ 1:1-5"
- "'\\' @ 1:1-5"
- "'n' @ 1:1-5"
- "'Z' @ 1:1-4"
- "'\"' @ 1:1-5"
- "'' @ 1:1-5"
- "'' @ 1:1-5"
- "'\u0000' @ 1:1-5"
- "'❤' @ 1:1-11"
- "'の' @ 1:1-11"
- "'❤' @ 1:1-6"
- "'の' @ 1:1-6"
- "'*' @ 1:1-8"
- "'' @ 1:1-7"
- "'' @ 1:1-9"
- "'z' @ 1:1-9"

View File

@ -13,13 +13,13 @@ outputs:
content: "'a'"
- Value:
Char:
- b
- Z
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 4
path: test
content: "'b'"
content: "'Z'"
- Value:
Char:
- "\""
@ -31,7 +31,7 @@ outputs:
content: "'\\\"'"
- Value:
Char:
- t
- "\t"
- line_start: 1
line_stop: 1
col_start: 1
@ -40,7 +40,7 @@ outputs:
content: "'\\t'"
- Value:
Char:
- r
- "\r"
- line_start: 1
line_stop: 1
col_start: 1
@ -49,7 +49,7 @@ outputs:
content: "'\\r'"
- Value:
Char:
- "0"
- "\u0000"
- line_start: 1
line_stop: 1
col_start: 1
@ -65,3 +65,66 @@ outputs:
col_stop: 11
path: test
content: "'\\u{2764}'"
- Value:
Char:
- の
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 11
path: test
content: "'\\u{306E}'"
- Value:
Char:
- ❤
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 6
path: test
content: "'❤'"
- Value:
Char:
- の
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 6
path: test
content: "'の'"
- Value:
Char:
- "*"
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 8
path: test
content: "'\\xH2A'"
- Value:
Char:
- "\t"
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 7
path: test
content: "'\\xH9'"
- Value:
Char:
- "\t"
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 9
path: test
content: "'\\xO011'"
- Value:
Char:
- z
- line_start: 1
line_stop: 1
col_start: 1
col_stop: 9
path: test
content: "'\\xO172'"

View File

@ -12,6 +12,7 @@ expectation: Pass
'\u{2764}'
'\u{306E}'
'❤'
'の'
'\xH2A'
'\xH9'
'\xO011'