diff --git a/catala.opam b/catala.opam index 9f412eb6..9ed57673 100644 --- a/catala.opam +++ b/catala.opam @@ -32,6 +32,7 @@ depends: [ "js_of_ocaml-ppx" {>= "3.8.0"} "camomile" {>= "1.0.2"} "odoc" {with-doc} + "cppo" {>= "1"} ] build: [ ["dune" "subst"] {dev} diff --git a/compiler/surface/lexer.cppo.ml b/compiler/surface/lexer.cppo.ml index af358784..b795980c 100644 --- a/compiler/surface/lexer.cppo.ml +++ b/compiler/surface/lexer.cppo.ml @@ -522,7 +522,17 @@ let rec lex_code (lexbuf : lexbuf) : token = L.update_acc lexbuf; DAY | MX_MONEY_AMOUNT - | MX_DECIMAL_LITERAL + | Plus digit, MS_DECIMAL_SEPARATOR, Star digit -> + let rex = + Re.(compile @@ whole_string @@ seq [ + group (rep1 digit); + str MS_DECIMAL_SEPARATOR; + group (rep digit) + ]) in + let dec_parts = R.get_substring (R.exec ~rex (Utf8.lexeme lexbuf)) in + L.update_acc lexbuf; + DECIMAL_LITERAL + (Runtime.integer_of_string (dec_parts 1), Runtime.integer_of_string (dec_parts 2)) | "<=@" -> L.update_acc lexbuf; LESSER_EQUAL_DATE diff --git a/compiler/surface/lexer_en.cppo.ml b/compiler/surface/lexer_en.cppo.ml index 836cf648..b938bef2 100644 --- a/compiler/surface/lexer_en.cppo.ml +++ b/compiler/surface/lexer_en.cppo.ml @@ -85,7 +85,10 @@ #define MS_TRUE "true" #define MS_FALSE "false" +(* Specific delimiters *) + #define MR_MONEY_OP_SUFFIX '$' +#define MS_DECIMAL_SEPARATOR "." (* Builtins *) @@ -108,8 +111,13 @@ (* More complex cases *) #define MX_MONEY_AMOUNT \ - 0x24, Star hspace, digit, Star (digit | ','), Opt ('.', Rep (digit, 0 .. 2)) -> \ - let extract_parts = R.regexp "([0-9]([0-9,]*[0-9]|))(.([0-9]{0,2})|)" in \ + 0x24, Star hspace, digit, Star (digit | ','), Opt (MS_DECIMAL_SEPARATOR, Rep (digit, 0 .. 2)) -> \ + let extract_parts = \ + Re.(compile @@ seq [ \ + group (seq [ digit; opt (seq [ rep (alt [digit; char ',']); digit]) ]); \ + opt (seq [ str MS_DECIMAL_SEPARATOR; group (repn digit 0 (Some 2))]) \ + ]) \ + in \ let full_str = Utf8.lexeme lexbuf in \ let only_numbers_str = String.trim (String.sub full_str 1 (String.length full_str - 1)) in \ let parts = R.get_substring (R.exec ~rex:extract_parts only_numbers_str) in \ @@ -120,16 +128,7 @@ Runtime.integer_of_string (R.substitute ~rex:remove_commas ~subst:(fun _ -> "") units) \ in \ let cents = \ - try Runtime.integer_of_string (parts 4) with Not_found -> Runtime.integer_of_int 0 \ + try Runtime.integer_of_string (parts 2) with Not_found -> Runtime.integer_of_int 0 \ in \ L.update_acc lexbuf; \ MONEY_AMOUNT (units, cents) - -#define MX_DECIMAL_LITERAL \ - Plus digit, '.', Star digit -> \ - let extract_code_title = R.regexp "([0-9]+)\\.([0-9]*)" in \ - let dec_parts = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in \ - (* Integer literal*) \ - L.update_acc lexbuf; \ - DECIMAL_LITERAL \ - (Runtime.integer_of_string (dec_parts 1), Runtime.integer_of_string (dec_parts 2)) diff --git a/compiler/surface/lexer_fr.cppo.ml b/compiler/surface/lexer_fr.cppo.ml index 4494f6b6..56c6848d 100644 --- a/compiler/surface/lexer_fr.cppo.ml +++ b/compiler/surface/lexer_fr.cppo.ml @@ -92,7 +92,10 @@ #define MS_TRUE "vrai" #define MS_FALSE "faux" +(* Specific delimiters *) + #define MR_MONEY_OP_SUFFIX 0x20AC (* The euro sign *) +#define MS_DECIMAL_SEPARATOR "," (* Builtins *) @@ -115,8 +118,13 @@ (* More complex cases *) #define MX_MONEY_AMOUNT \ - digit, Star (digit | hspace), Opt (',', Rep (digit, 0 .. 2)), Star hspace, 0x20AC -> \ - let extract_parts = R.regexp "([0-9]([0-9 ]*[0-9]|))(,([0-9]{0,2})|)" in \ + digit, Star (digit | hspace), Opt (MS_DECIMAL_SEPARATOR, Rep (digit, 0 .. 2)), Star hspace, 0x20AC -> \ + let extract_parts = \ + Re.(compile @@ seq [ \ + group (seq [ digit; opt (seq [ rep (alt [digit; char ' ']); digit]) ]); \ + opt (seq [ str MS_DECIMAL_SEPARATOR; group (repn digit 0 (Some 2))]) \ + ]) \ + in \ let str = Utf8.lexeme lexbuf in \ let parts = R.get_substring (R.exec ~rex:extract_parts str) in \ (* Integer literal*) \ @@ -126,16 +134,7 @@ Runtime.integer_of_string (R.substitute ~rex:remove_spaces ~subst:(fun _ -> "") units) \ in \ let cents = \ - try Runtime.integer_of_string (parts 4) with Not_found -> Runtime.integer_of_int 0 \ + try Runtime.integer_of_string (parts 2) with Not_found -> Runtime.integer_of_int 0 \ in \ L.update_acc lexbuf; \ MONEY_AMOUNT (units, cents) - -#define MX_DECIMAL_LITERAL \ - Plus digit, ',', Star digit -> \ - let extract_code_title = R.regexp "([0-9]+),([0-9]*)" in \ - let dec_parts = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in \ - (* Integer literal*) \ - L.update_acc lexbuf; \ - DECIMAL_LITERAL \ - (Runtime.integer_of_string (dec_parts 1), Runtime.integer_of_string (dec_parts 2)) diff --git a/compiler/surface/lexer_pl.cppo.ml b/compiler/surface/lexer_pl.cppo.ml index ffe1540d..dd7e0a0a 100644 --- a/compiler/surface/lexer_pl.cppo.ml +++ b/compiler/surface/lexer_pl.cppo.ml @@ -90,7 +90,10 @@ #define MS_TRUE "prawda" #define MS_FALSE "falsz" +(* Specific delimiters *) + #define MR_MONEY_OP_SUFFIX '$' +#define MS_DECIMAL_SEPARATOR "." (* Builtins *) @@ -113,8 +116,13 @@ (* More complex cases *) #define MX_MONEY_AMOUNT \ - digit, Star (digit | ','), Opt ('.', Rep (digit, 0 .. 2)), Star hspace, "PLN" -> \ - let extract_parts = R.regexp "([0-9]([0-9,]*[0-9]|))(.([0-9]{0,2})|)" in \ + digit, Star (digit | ','), Opt (MS_DECIMAL_SEPARATOR, Rep (digit, 0 .. 2)), Star hspace, "PLN" -> \ + let extract_parts = \ + Re.(compile @@ seq [ \ + group (seq [ digit; opt (seq [ rep (alt [digit; char ',']); digit]) ]); \ + opt (seq [ str MS_DECIMAL_SEPARATOR; group (repn digit 0 (Some 2))]) \ + ]) \ + in \ let str = Utf8.lexeme lexbuf in \ let parts = R.get_substring (R.exec ~rex:extract_parts str) in \ (* Integer literal*) \ @@ -124,16 +132,7 @@ Runtime.integer_of_string (R.substitute ~rex:remove_commas ~subst:(fun _ -> "") units) \ in \ let cents = \ - try Runtime.integer_of_string (parts 4) with Not_found -> Runtime.integer_of_int 0 \ + try Runtime.integer_of_string (parts 2) with Not_found -> Runtime.integer_of_int 0 \ in \ L.update_acc lexbuf; \ MONEY_AMOUNT (units, cents) - -#define MX_DECIMAL_LITERAL \ - Plus digit, '.', Star digit -> \ - let extract_code_title = R.regexp "([0-9]+)\\.([0-9]*)" in \ - let dec_parts = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in \ - (* Integer literal*) \ - L.update_acc lexbuf; \ - DECIMAL_LITERAL \ - (Runtime.integer_of_string (dec_parts 1), Runtime.integer_of_string (dec_parts 2))