From 9cb3a405d85f1824ad54e07abd49fd0b2007fde6 Mon Sep 17 00:00:00 2001 From: Louis Gesbert Date: Thu, 19 Aug 2021 11:13:04 +0200 Subject: [PATCH] Fix lexing of money tokens MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - The polish lexer was wrongly adapted from the american one: the code to trim the prefix `$` char was kept instead of trimming the postfix `PLN` - Anyway the regex isn't enforced to match until the end of input so trimming postfix units is not needed - And the trimming for `€` was wrong anyway, it assumed `€` is only 1 char long in utf8 --- compiler/surface/lexer_fr.ml | 5 ++--- compiler/surface/lexer_pl.ml | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/compiler/surface/lexer_fr.ml b/compiler/surface/lexer_fr.ml index bc16197a..c7e059ac 100644 --- a/compiler/surface/lexer_fr.ml +++ b/compiler/surface/lexer_fr.ml @@ -318,9 +318,8 @@ let rec lex_code (lexbuf : lexbuf) : token = DAY | digit, Star (digit | hspace), Opt (',', Rep (digit, 0 .. 2)), Star hspace, 0x20AC -> let extract_parts = R.regexp "([0-9]([0-9 ]*[0-9]|))(,([0-9]{0,2})|)" in - let full_str = Utf8.lexeme lexbuf in - let only_numbers_str = String.trim (String.sub full_str 0 (String.length full_str - 1)) in - let parts = R.get_substring (R.exec ~rex:extract_parts only_numbers_str) in + let str = Utf8.lexeme lexbuf in + let parts = R.get_substring (R.exec ~rex:extract_parts str) in (* Integer literal*) let units = parts 1 in let remove_spaces = R.regexp " " in diff --git a/compiler/surface/lexer_pl.ml b/compiler/surface/lexer_pl.ml index 010171a6..5b7c0a47 100644 --- a/compiler/surface/lexer_pl.ml +++ b/compiler/surface/lexer_pl.ml @@ -314,9 +314,8 @@ let rec lex_code (lexbuf : lexbuf) : token = DAY | digit, Star (digit | ','), Opt ('.', Rep (digit, 0 .. 2)), Star hspace, "PLN" -> let extract_parts = R.regexp "([0-9]([0-9,]*[0-9]|))(.([0-9]{0,2})|)" in - let full_str = Utf8.lexeme lexbuf in - let only_numbers_str = String.trim (String.sub full_str 1 (String.length full_str - 1)) in - let parts = R.get_substring (R.exec ~rex:extract_parts only_numbers_str) in + let str = Utf8.lexeme lexbuf in + let parts = R.get_substring (R.exec ~rex:extract_parts str) in (* Integer literal*) let units = parts 1 in let remove_commas = R.regexp "," in