From 24d14bef8651ab055ae1261e75d01c5a20941361 Mon Sep 17 00:00:00 2001 From: imaqtkatt Date: Thu, 11 Jan 2024 10:37:21 -0300 Subject: [PATCH] Add upper unicode escape syntax --- src/term/parser/lexer.rs | 7 ++++--- tests/golden_tests/run_file/world.hvm | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/term/parser/lexer.rs b/src/term/parser/lexer.rs index 1f74016b..3b45b234 100644 --- a/src/term/parser/lexer.rs +++ b/src/term/parser/lexer.rs @@ -36,7 +36,8 @@ pub enum Token { #[regex(r#""([^"\\]|\\t|\\u|\\n|\\")*""#, |lex| normalized_string(lex).ok())] Str(String), - #[regex(r#"'(.|\\t|\\u[0-9a-fA-F]{1,8}|\\n|\\')'"#, normalized_char)] + #[regex(r#"'\\U[0-9a-fA-F]{1,8}'"#, normalized_char, priority = 2)] + #[regex(r#"'(.|\\t|\\u[0-9a-fA-F]{1,4}|\\n|\\')'"#, normalized_char)] Char(u64), #[token("#")] @@ -144,7 +145,7 @@ fn normalized_string(lexer: &mut Lexer) -> Result '\\' => match chars.next() { Some('n') => s.push('\n'), Some('t') => s.push('\t'), - Some('u') => { + Some('u') | Some('U') => { let hex = chars.take(8).collect::(); let hex_val = u32::from_str_radix(&hex, 16)?; let char = char::from_u32(hex_val).unwrap_or(char::REPLACEMENT_CHARACTER); @@ -218,7 +219,7 @@ fn normalized_char(lexer: &mut Lexer) -> Option { Some('n') => '\n', Some('t') => '\t', Some('\'') => '\'', - Some('u') => { + Some('u') | Some('U') => { let hex = chars.take(8).collect::(); let hex_val = u32::from_str_radix(&hex, 16).unwrap(); char::from_u32(hex_val).unwrap_or(char::REPLACEMENT_CHARACTER) diff --git a/tests/golden_tests/run_file/world.hvm b/tests/golden_tests/run_file/world.hvm index b6584edd..3d3b0d52 100644 --- a/tests/golden_tests/run_file/world.hvm +++ b/tests/golden_tests/run_file/world.hvm @@ -1 +1 @@ -main = (1, @#str s ('\u1F30E', s)) \ No newline at end of file +main = (1, @#str s ('\U1F30E', s)) \ No newline at end of file