Fix lexing of money tokens

- The polish lexer was wrongly adapted from the american one: the code to trim
  the prefix `$` char was kept instead of trimming the postfix `PLN`
- Anyway the regex isn't enforced to match until the end of input so trimming
  postfix units is not needed
- And the trimming for `€` was wrong anyway, it assumed `€` is only 1 char long
  in utf8
This commit is contained in:
Louis Gesbert 2021-08-19 11:13:04 +02:00
parent 7817e9a855
commit 9cb3a405d8
2 changed files with 4 additions and 6 deletions

View File

@ -318,9 +318,8 @@ let rec lex_code (lexbuf : lexbuf) : token =
DAY
| digit, Star (digit | hspace), Opt (',', Rep (digit, 0 .. 2)), Star hspace, 0x20AC ->
let extract_parts = R.regexp "([0-9]([0-9 ]*[0-9]|))(,([0-9]{0,2})|)" in
let full_str = Utf8.lexeme lexbuf in
let only_numbers_str = String.trim (String.sub full_str 0 (String.length full_str - 1)) in
let parts = R.get_substring (R.exec ~rex:extract_parts only_numbers_str) in
let str = Utf8.lexeme lexbuf in
let parts = R.get_substring (R.exec ~rex:extract_parts str) in
(* Integer literal*)
let units = parts 1 in
let remove_spaces = R.regexp " " in

View File

@ -314,9 +314,8 @@ let rec lex_code (lexbuf : lexbuf) : token =
DAY
| digit, Star (digit | ','), Opt ('.', Rep (digit, 0 .. 2)), Star hspace, "PLN" ->
let extract_parts = R.regexp "([0-9]([0-9,]*[0-9]|))(.([0-9]{0,2})|)" in
let full_str = Utf8.lexeme lexbuf in
let only_numbers_str = String.trim (String.sub full_str 1 (String.length full_str - 1)) in
let parts = R.get_substring (R.exec ~rex:extract_parts only_numbers_str) in
let str = Utf8.lexeme lexbuf in
let parts = R.get_substring (R.exec ~rex:extract_parts str) in
(* Integer literal*)
let units = parts 1 in
let remove_commas = R.regexp "," in