diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 81d77720..50fd8e88 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -54,6 +54,11 @@ jobs: eval $(opam env) make build + - name: Run tests + run: | + eval $(opam env) + make tests + - name: Make examples run: | eval $(opam env) diff --git a/src/catala/parsing/lexer.ml b/src/catala/parsing/lexer.ml index cd046d9c..da94d2a5 100644 --- a/src/catala/parsing/lexer.ml +++ b/src/catala/parsing/lexer.ml @@ -22,19 +22,42 @@ let code_string_acc : string ref = ref "" let update_acc (lexbuf : lexbuf) : unit = code_string_acc := !code_string_acc ^ Utf8.lexeme lexbuf +let raise_lexer_error (loc : Pos.t) (token : string) (msg : string) = + Errors.raise_spanned_error (Printf.sprintf "Parsing error on token \"%s\": %s" token msg) loc + +let token_list_language_agnostic : (string * token) list = + [ + ("->", ARROW); + (".", DOT); + ("<=", LESSER_EQUAL); + (">=", GREATER_EQUAL); + (">", GREATER); + ("!=", NOT_EQUAL); + ("=", EQUAL); + ("(", LPAREN); + (")", RPAREN); + ("+", PLUS); + ("-", MINUS); + ("*", MULT); + ("/", DIV); + ("|", VERTICAL); + (":", COLON); + ("--", ALT); + ] + let token_list : (string * token) list = [ ("scope", SCOPE); - ("|", CONSEQUENCE); + ("]", CONSEQUENCE); ("data", DATA); - ("depends on", DEPENDS); - ("declaration", DECLARATION); - ("context", CONTEXT); + ("fun of", DEPENDS); + ("new", DECLARATION); + ("param", CONTEXT); ("decreasing", DECREASING); ("increasing", INCREASING); ("of", OF); - ("collection", COLLECTION); - ("enumeration", ENUM); + ("set", COLLECTION); + ("enum", ENUM); ("int", INTEGER); ("amount", MONEY); ("text", TEXT); @@ -42,21 +65,21 @@ let token_list : (string * token) list = ("date", DATE); ("boolean", BOOLEAN); ("sum", SUM); - ("fulfilled", FILLED); + ("ok", FILLED); ("def", DEFINITION); ("equals", DEFINED_AS); ("match", MATCH); - ("with pattern", WITH); - ("?", UNDER_CONDITION); + ("with", WITH); + ("[", UNDER_CONDITION); ("if", IF); ("then", THEN); ("else", ELSE); - ("content", CONTENT); - ("structure", STRUCT); - ("optional", OPTIONAL); - ("assertion", ASSERTION); + ("type", CONTENT); + ("struct", STRUCT); + ("option", OPTIONAL); + ("assert", ASSERTION); ("varies", VARIES); - ("with", WITH_V); + ("with parameter", WITH_V); ("for", FOR); ("all", ALL); ("we have", WE_HAVE); @@ -75,7 +98,7 @@ let token_list : (string * token) list = ("true", TRUE); ("false", FALSE); ] - @ Lexer_fr.token_list_language_agnostic + @ token_list_language_agnostic let rec lex_code (lexbuf : lexbuf) : token = match%sedlex lexbuf with @@ -97,13 +120,13 @@ let rec lex_code (lexbuf : lexbuf) : token = | "data" -> update_acc lexbuf; DATA - | "depends on" -> + | "fun of" -> update_acc lexbuf; DEPENDS - | "declaration" -> + | "new" -> update_acc lexbuf; DECLARATION - | "context" -> + | "param" -> update_acc lexbuf; CONTEXT | "decreasing" -> @@ -115,10 +138,10 @@ let rec lex_code (lexbuf : lexbuf) : token = | "of" -> update_acc lexbuf; OF - | "collection" -> + | "set" -> update_acc lexbuf; COLLECTION - | "enumeration" -> + | "enum" -> update_acc lexbuf; ENUM | "int" -> @@ -142,22 +165,28 @@ let rec lex_code (lexbuf : lexbuf) : token = | "sum" -> update_acc lexbuf; SUM - | "fulfilled" -> + | "ok" -> update_acc lexbuf; FILLED | "def" -> update_acc lexbuf; DEFINITION - | "=" -> + | ":=" -> update_acc lexbuf; DEFINED_AS + | "varies" -> + update_acc lexbuf; + VARIES + | "with" -> + update_acc lexbuf; + WITH_V | "match" -> update_acc lexbuf; MATCH - | "with pattern" -> + | "with" -> update_acc lexbuf; WITH - | "?" -> + | "[" -> update_acc lexbuf; UNDER_CONDITION | "if" -> @@ -172,24 +201,18 @@ let rec lex_code (lexbuf : lexbuf) : token = | "condition" -> update_acc lexbuf; CONDITION - | "content" -> + | "type" -> update_acc lexbuf; CONTENT | "structure" -> update_acc lexbuf; STRUCT - | "optional" -> + | "option" -> update_acc lexbuf; OPTIONAL - | "assertion" -> + | "assert" -> update_acc lexbuf; ASSERTION - | "varies" -> - update_acc lexbuf; - VARIES - | "with" -> - update_acc lexbuf; - WITH_V | "for" -> update_acc lexbuf; FOR @@ -233,7 +256,7 @@ let rec lex_code (lexbuf : lexbuf) : token = | "not" -> update_acc lexbuf; NOT - | "|" -> + | "]" -> update_acc lexbuf; CONSEQUENCE | "number" -> @@ -333,7 +356,7 @@ let rec lex_code (lexbuf : lexbuf) : token = (* Integer literal*) update_acc lexbuf; INT_LITERAL (int_of_string (Utf8.lexeme lexbuf)) - | _ -> Lexer_fr.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token" + | _ -> raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token" let rec lex_law (lexbuf : lexbuf) : token = match%sedlex lexbuf with @@ -363,9 +386,7 @@ let rec lex_law (lexbuf : lexbuf) : token = let pos = lexing_positions lexbuf in if Filename.extension name = ".pdf" then LAW_INCLUDE (Ast.PdfFile ((name, pos), pages)) else if Filename.extension name = ".catala" then LAW_INCLUDE (Ast.CatalaFile (name, pos)) - else - Lexer_fr.raise_lexer_error (lexing_positions lexbuf) name - "this type of file cannot be included" + else raise_lexer_error (lexing_positions lexbuf) name "this type of file cannot be included" | "@@", Plus (Compl '@'), "@@", Star '+' -> let extract_code_title = R.regexp "@@([^@]+)@@([\\+]*)" in let get_match = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in @@ -394,6 +415,6 @@ let rec lex_law (lexbuf : lexbuf) : token = LAW_ARTICLE (title, None, None) | Plus (Compl ('@' | '/' | '\n')) -> LAW_TEXT (Utf8.lexeme lexbuf) - | _ -> Lexer_fr.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token" + | _ -> raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token" let lexer lexbuf = if !is_code then lex_code lexbuf else lex_law lexbuf diff --git a/src/catala/parsing/lexer_en.ml b/src/catala/parsing/lexer_en.ml index 4602cd2a..44be7e2a 100644 --- a/src/catala/parsing/lexer_en.ml +++ b/src/catala/parsing/lexer_en.ml @@ -14,14 +14,9 @@ open Parser open Sedlexing +module L = Lexer module R = Re.Pcre -let is_code : bool ref = ref false - -let code_string_acc : string ref = ref "" - -let update_acc (lexbuf : lexbuf) : unit = code_string_acc := !code_string_acc ^ Utf8.lexeme lexbuf - let token_list_en : (string * token) list = [ ("scope", SCOPE); @@ -75,178 +70,178 @@ let token_list_en : (string * token) list = ("true", TRUE); ("false", FALSE); ] - @ Lexer_fr.token_list_language_agnostic + @ L.token_list_language_agnostic let rec lex_code_en (lexbuf : lexbuf) : token = match%sedlex lexbuf with | white_space -> (* Whitespaces *) - update_acc lexbuf; + L.update_acc lexbuf; lex_code_en lexbuf | '#', Star (Compl '\n'), '\n' -> (* Comments *) - update_acc lexbuf; + L.update_acc lexbuf; lex_code_en lexbuf | "*/" -> (* End of code section *) - is_code := false; - END_CODE !code_string_acc + L.is_code := false; + END_CODE !L.code_string_acc | "scope" -> - update_acc lexbuf; + L.update_acc lexbuf; SCOPE | "data" -> - update_acc lexbuf; + L.update_acc lexbuf; DATA | "depends on" -> - update_acc lexbuf; + L.update_acc lexbuf; DEPENDS | "declaration" -> - update_acc lexbuf; + L.update_acc lexbuf; DECLARATION | "context" -> - update_acc lexbuf; + L.update_acc lexbuf; CONTEXT | "decreasing" -> - update_acc lexbuf; + L.update_acc lexbuf; DECREASING | "increasing" -> - update_acc lexbuf; + L.update_acc lexbuf; INCREASING | "of" -> - update_acc lexbuf; + L.update_acc lexbuf; OF | "collection" -> - update_acc lexbuf; + L.update_acc lexbuf; COLLECTION | "enumeration" -> - update_acc lexbuf; + L.update_acc lexbuf; ENUM | "integer" -> - update_acc lexbuf; + L.update_acc lexbuf; INTEGER | "amount" -> - update_acc lexbuf; + L.update_acc lexbuf; MONEY | "text" -> - update_acc lexbuf; + L.update_acc lexbuf; TEXT | "decimal" -> - update_acc lexbuf; + L.update_acc lexbuf; DECIMAL | "date" -> - update_acc lexbuf; + L.update_acc lexbuf; DATE | "boolean" -> - update_acc lexbuf; + L.update_acc lexbuf; BOOLEAN | "sum" -> - update_acc lexbuf; + L.update_acc lexbuf; SUM | "fulfilled" -> - update_acc lexbuf; + L.update_acc lexbuf; FILLED | "definition" -> - update_acc lexbuf; + L.update_acc lexbuf; DEFINITION | "equals" -> - update_acc lexbuf; + L.update_acc lexbuf; DEFINED_AS | "match" -> - update_acc lexbuf; + L.update_acc lexbuf; MATCH | "with pattern" -> - update_acc lexbuf; + L.update_acc lexbuf; WITH | "under condition" -> - update_acc lexbuf; + L.update_acc lexbuf; UNDER_CONDITION | "if" -> - update_acc lexbuf; + L.update_acc lexbuf; IF | "consequence" -> - update_acc lexbuf; + L.update_acc lexbuf; CONSEQUENCE | "then" -> - update_acc lexbuf; + L.update_acc lexbuf; THEN | "else" -> - update_acc lexbuf; + L.update_acc lexbuf; ELSE | "condition" -> - update_acc lexbuf; + L.update_acc lexbuf; CONDITION | "content" -> - update_acc lexbuf; + L.update_acc lexbuf; CONTENT | "structure" -> - update_acc lexbuf; + L.update_acc lexbuf; STRUCT | "optional" -> - update_acc lexbuf; + L.update_acc lexbuf; OPTIONAL | "assertion" -> - update_acc lexbuf; + L.update_acc lexbuf; ASSERTION | "varies" -> - update_acc lexbuf; + L.update_acc lexbuf; VARIES | "with" -> - update_acc lexbuf; + L.update_acc lexbuf; WITH_V | "for" -> - update_acc lexbuf; + L.update_acc lexbuf; FOR | "all" -> - update_acc lexbuf; + L.update_acc lexbuf; ALL | "we have" -> - update_acc lexbuf; + L.update_acc lexbuf; WE_HAVE | "fixed" -> - update_acc lexbuf; + L.update_acc lexbuf; FIXED | "by" -> - update_acc lexbuf; + L.update_acc lexbuf; BY | "rule" -> (* 0xE8 is è *) - update_acc lexbuf; + L.update_acc lexbuf; RULE | "exists" -> - update_acc lexbuf; + L.update_acc lexbuf; EXISTS | "in" -> - update_acc lexbuf; + L.update_acc lexbuf; IN | "such" -> - update_acc lexbuf; + L.update_acc lexbuf; SUCH | "that" -> - update_acc lexbuf; + L.update_acc lexbuf; THAT | "now" -> - update_acc lexbuf; + L.update_acc lexbuf; NOW | "and" -> - update_acc lexbuf; + L.update_acc lexbuf; AND | "or" -> - update_acc lexbuf; + L.update_acc lexbuf; OR | "not" -> - update_acc lexbuf; + L.update_acc lexbuf; NOT | "number" -> - update_acc lexbuf; + L.update_acc lexbuf; CARDINAL | "true" -> - update_acc lexbuf; + L.update_acc lexbuf; TRUE | "false" -> - update_acc lexbuf; + L.update_acc lexbuf; FALSE | "year" -> - update_acc lexbuf; + L.update_acc lexbuf; YEAR | 0x24, Star white_space, '0' .. '9', Star ('0' .. '9' | ','), Opt ('.', Rep ('0' .. '9', 0 .. 2)) -> @@ -259,93 +254,99 @@ let rec lex_code_en (lexbuf : lexbuf) : token = let remove_commas = R.regexp "," in let units = int_of_string (R.substitute ~rex:remove_commas ~subst:(fun _ -> "") units) in let cents = try int_of_string (parts 4) with Not_found -> 0 in - update_acc lexbuf; + L.update_acc lexbuf; MONEY_AMOUNT (units, cents) | Plus '0' .. '9', '.', Star '0' .. '9' -> let extract_code_title = R.regexp "([0-9]+)\\.([0-9]*)" in let dec_parts = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in (* Integer literal*) - update_acc lexbuf; + L.update_acc lexbuf; DECIMAL_LITERAL (int_of_string (dec_parts 1), int_of_string (dec_parts 2)) | "->" -> - update_acc lexbuf; + L.update_acc lexbuf; ARROW | '.' -> - update_acc lexbuf; + L.update_acc lexbuf; DOT | "<=" -> - update_acc lexbuf; + L.update_acc lexbuf; LESSER_EQUAL | '<' -> - update_acc lexbuf; + L.update_acc lexbuf; LESSER | ">=" -> - update_acc lexbuf; + L.update_acc lexbuf; GREATER_EQUAL | '>' -> - update_acc lexbuf; + L.update_acc lexbuf; GREATER | "!=" -> - update_acc lexbuf; + L.update_acc lexbuf; NOT_EQUAL | '=' -> - update_acc lexbuf; + L.update_acc lexbuf; EQUAL | '(' -> - update_acc lexbuf; + L.update_acc lexbuf; LPAREN | ')' -> - update_acc lexbuf; + L.update_acc lexbuf; RPAREN | '+' -> - update_acc lexbuf; + L.update_acc lexbuf; PLUS | '-' -> - update_acc lexbuf; + L.update_acc lexbuf; MINUS | '*' -> - update_acc lexbuf; + L.update_acc lexbuf; MULT | '%' -> - update_acc lexbuf; + L.update_acc lexbuf; PERCENT | '/' -> - update_acc lexbuf; + L.update_acc lexbuf; DIV | '|' -> - update_acc lexbuf; + L.update_acc lexbuf; VERTICAL | ':' -> - update_acc lexbuf; + L.update_acc lexbuf; COLON | "--" -> - update_acc lexbuf; + L.update_acc lexbuf; ALT | uppercase, Star (uppercase | lowercase | '0' .. '9' | '_' | '\'') -> (* Name of constructor *) - update_acc lexbuf; + L.update_acc lexbuf; CONSTRUCTOR (Utf8.lexeme lexbuf) | lowercase, Star (lowercase | uppercase | '0' .. '9' | '_' | '\'') -> (* Name of variable *) - update_acc lexbuf; + L.update_acc lexbuf; IDENT (Utf8.lexeme lexbuf) | Plus '0' .. '9' -> (* Integer literal*) - update_acc lexbuf; + L.update_acc lexbuf; INT_LITERAL (int_of_string (Utf8.lexeme lexbuf)) - | _ -> Lexer_fr.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token" + | _ -> L.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token" let rec lex_law_en (lexbuf : lexbuf) : token = match%sedlex lexbuf with | '\n' -> lex_law_en lexbuf | "/*" -> - is_code := true; - code_string_acc := ""; + L.is_code := true; + L.code_string_acc := ""; BEGIN_CODE | eof -> EOF - | "@@", Star white_space, "Master file", Star white_space, "@@" -> MASTER_FILE - | "@@", Star white_space, "Begin metadata", Star white_space, "@@" -> BEGIN_METADATA - | "@@", Star white_space, "End metadata", Star white_space, "@@" -> END_METADATA + | "@@", Star white_space, "Master file", Star white_space, "@@" -> + Cli.debug_print "A1"; + MASTER_FILE + | "@@", Star white_space, "Begin metadata", Star white_space, "@@" -> + Cli.debug_print "A1"; + BEGIN_METADATA + | "@@", Star white_space, "End metadata", Star white_space, "@@" -> + Cli.debug_print "A1"; + END_METADATA | ( "@@", Star white_space, "Include:", @@ -363,9 +364,7 @@ let rec lex_law_en (lexbuf : lexbuf) : token = let pos = lexing_positions lexbuf in if Filename.extension name = ".pdf" then LAW_INCLUDE (Ast.PdfFile ((name, pos), pages)) else if Filename.extension name = ".catala" then LAW_INCLUDE (Ast.CatalaFile (name, pos)) - else - Lexer_fr.raise_lexer_error (lexing_positions lexbuf) name - "this type of file cannot be included" + else L.raise_lexer_error (lexing_positions lexbuf) name "this type of file cannot be included" | "@@", Plus (Compl '@'), "@@", Star '+' -> let extract_code_title = R.regexp "@@([^@]+)@@([\\+]*)" in let get_match = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in @@ -394,6 +393,6 @@ let rec lex_law_en (lexbuf : lexbuf) : token = LAW_ARTICLE (title, None, None) | Plus (Compl ('@' | '/' | '\n')) -> LAW_TEXT (Utf8.lexeme lexbuf) - | _ -> Lexer_fr.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token" + | _ -> L.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token" -let lexer_en lexbuf = if !is_code then lex_code_en lexbuf else lex_law_en lexbuf +let lexer_en lexbuf = if !L.is_code then lex_code_en lexbuf else lex_law_en lexbuf diff --git a/src/catala/parsing/lexer_fr.ml b/src/catala/parsing/lexer_fr.ml index 753a962c..277accd6 100644 --- a/src/catala/parsing/lexer_fr.ml +++ b/src/catala/parsing/lexer_fr.ml @@ -14,43 +14,9 @@ open Parser open Sedlexing +module L = Lexer module R = Re.Pcre -let is_code : bool ref = ref false - -let code_string_acc : string ref = ref "" - -let raise_lexer_error (loc : Pos.t) (token : string) (msg : string) = - Errors.raise_spanned_error (Printf.sprintf "Parsing error on token \"%s\": %s" token msg) loc - -let rec lex_code_as_string (lexbuf : lexbuf) (acc : string) : token = - match%sedlex lexbuf with - | "*/" -> END_CODE (acc ^ Utf8.lexeme lexbuf) - | any -> lex_code_as_string lexbuf (acc ^ Utf8.lexeme lexbuf) - | _ -> raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unexpected token" - -let update_acc (lexbuf : lexbuf) = code_string_acc := !code_string_acc ^ Utf8.lexeme lexbuf - -let token_list_language_agnostic : (string * token) list = - [ - ("->", ARROW); - (".", DOT); - ("<=", LESSER_EQUAL); - (">=", GREATER_EQUAL); - (">", GREATER); - ("!=", NOT_EQUAL); - ("=", EQUAL); - ("(", LPAREN); - (")", RPAREN); - ("+", PLUS); - ("-", MINUS); - ("*", MULT); - ("/", DIV); - ("|", VERTICAL); - (":", COLON); - ("--", ALT); - ] - let token_list_fr : (string * token) list = [ ("champ d'application", SCOPE); @@ -104,184 +70,184 @@ let token_list_fr : (string * token) list = ("vrai", TRUE); ("faux", FALSE); ] - @ token_list_language_agnostic + @ L.token_list_language_agnostic let rec lex_code_fr (lexbuf : lexbuf) : token = match%sedlex lexbuf with | white_space | '\n' -> (* Whitespaces *) - update_acc lexbuf; + L.update_acc lexbuf; lex_code_fr lexbuf | '#', Star (Compl '\n'), '\n' -> (* Comments *) - update_acc lexbuf; + L.update_acc lexbuf; lex_code_fr lexbuf | "*/" -> (* End of code section *) - is_code := false; - END_CODE !code_string_acc + L.is_code := false; + END_CODE !L.code_string_acc | "champ d\'application" -> - update_acc lexbuf; + L.update_acc lexbuf; SCOPE | "donn", 0xE9, "e" -> (* 0xE9 is é *) - update_acc lexbuf; + L.update_acc lexbuf; DATA | "d", 0xE9, "pend de" -> - update_acc lexbuf; + L.update_acc lexbuf; DEPENDS | "d", 0xE9, "claration" -> - update_acc lexbuf; + L.update_acc lexbuf; DECLARATION | "contexte" -> - update_acc lexbuf; + L.update_acc lexbuf; CONTEXT | "d", 0xE9, "croissant" -> - update_acc lexbuf; + L.update_acc lexbuf; DECREASING | "croissant" -> - update_acc lexbuf; + L.update_acc lexbuf; INCREASING | "de" -> - update_acc lexbuf; + L.update_acc lexbuf; OF | "collection" -> - update_acc lexbuf; + L.update_acc lexbuf; COLLECTION | 0xE9, "num", 0xE9, "ration" -> - update_acc lexbuf; + L.update_acc lexbuf; ENUM | "entier" -> - update_acc lexbuf; + L.update_acc lexbuf; INTEGER | "montant" -> - update_acc lexbuf; + L.update_acc lexbuf; MONEY | "texte" -> - update_acc lexbuf; + L.update_acc lexbuf; TEXT | "d", 0xE9, "cimal" -> - update_acc lexbuf; + L.update_acc lexbuf; DECIMAL | "date" -> - update_acc lexbuf; + L.update_acc lexbuf; DATE | "bool", 0xE9, "en" -> - update_acc lexbuf; + L.update_acc lexbuf; BOOLEAN | "somme" -> - update_acc lexbuf; + L.update_acc lexbuf; SUM | "rempli" -> - update_acc lexbuf; + L.update_acc lexbuf; FILLED | "d", 0xE9, "finition" -> (* 0xE9 is é *) - update_acc lexbuf; + L.update_acc lexbuf; DEFINITION | 0xE9, "gal ", 0x00E0 -> (* 0xE9 is é *) - update_acc lexbuf; + L.update_acc lexbuf; DEFINED_AS | "selon" -> - update_acc lexbuf; + L.update_acc lexbuf; MATCH | "sous forme" -> - update_acc lexbuf; + L.update_acc lexbuf; WITH | "sous condition" -> - update_acc lexbuf; + L.update_acc lexbuf; UNDER_CONDITION | "si" -> - update_acc lexbuf; + L.update_acc lexbuf; IF | "cons", 0xE9, "quence" -> (* 0xE9 is é *) - update_acc lexbuf; + L.update_acc lexbuf; CONSEQUENCE | "alors" -> (* 0xE9 is é *) - update_acc lexbuf; + L.update_acc lexbuf; THEN | "sinon" -> - update_acc lexbuf; + L.update_acc lexbuf; ELSE | "condition" -> - update_acc lexbuf; + L.update_acc lexbuf; CONDITION | "contenu" -> - update_acc lexbuf; + L.update_acc lexbuf; CONTENT | "structure" -> - update_acc lexbuf; + L.update_acc lexbuf; STRUCT | "optionnel" -> - update_acc lexbuf; + L.update_acc lexbuf; OPTIONAL | "assertion" -> - update_acc lexbuf; + L.update_acc lexbuf; ASSERTION | "varie" -> - update_acc lexbuf; + L.update_acc lexbuf; VARIES | "avec" -> - update_acc lexbuf; + L.update_acc lexbuf; WITH_V | "pour" -> - update_acc lexbuf; + L.update_acc lexbuf; FOR | "tout" -> - update_acc lexbuf; + L.update_acc lexbuf; ALL | "on a" -> - update_acc lexbuf; + L.update_acc lexbuf; WE_HAVE | "fix", 0xE9 -> (* 0xE9 is é *) - update_acc lexbuf; + L.update_acc lexbuf; FIXED | "par" -> - update_acc lexbuf; + L.update_acc lexbuf; BY | "r", 0xE8, "gle" -> (* 0xE8 is è *) - update_acc lexbuf; + L.update_acc lexbuf; RULE | "existe" -> - update_acc lexbuf; + L.update_acc lexbuf; EXISTS | "dans" -> - update_acc lexbuf; + L.update_acc lexbuf; IN | "tel" -> - update_acc lexbuf; + L.update_acc lexbuf; SUCH | "que" -> - update_acc lexbuf; + L.update_acc lexbuf; THAT | "maintenant" -> - update_acc lexbuf; + L.update_acc lexbuf; NOW | "et" -> - update_acc lexbuf; + L.update_acc lexbuf; AND | "ou" -> - update_acc lexbuf; + L.update_acc lexbuf; OR | "non" -> - update_acc lexbuf; + L.update_acc lexbuf; NOT | "nombre" -> - update_acc lexbuf; + L.update_acc lexbuf; CARDINAL | "vrai" -> - update_acc lexbuf; + L.update_acc lexbuf; TRUE | "faux" -> - update_acc lexbuf; + L.update_acc lexbuf; FALSE | "an" -> - update_acc lexbuf; + L.update_acc lexbuf; YEAR | ( '0' .. '9', Star ('0' .. '9' | white_space), @@ -297,88 +263,88 @@ let rec lex_code_fr (lexbuf : lexbuf) : token = let remove_spaces = R.regexp " " in let units = int_of_string (R.substitute ~rex:remove_spaces ~subst:(fun _ -> "") units) in let cents = try int_of_string (parts 4) with Not_found -> 0 in - update_acc lexbuf; + L.update_acc lexbuf; MONEY_AMOUNT (units, cents) | Plus '0' .. '9', ',', Star '0' .. '9' -> let extract_code_title = R.regexp "([0-9]+),([0-9]*)" in let dec_parts = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in (* Integer literal*) - update_acc lexbuf; + L.update_acc lexbuf; DECIMAL_LITERAL (int_of_string (dec_parts 1), int_of_string (dec_parts 2)) | "->" -> - update_acc lexbuf; + L.update_acc lexbuf; ARROW | '.' -> - update_acc lexbuf; + L.update_acc lexbuf; DOT | "<=" -> - update_acc lexbuf; + L.update_acc lexbuf; LESSER_EQUAL | '<' -> - update_acc lexbuf; + L.update_acc lexbuf; LESSER | ">=" -> - update_acc lexbuf; + L.update_acc lexbuf; GREATER_EQUAL | '>' -> - update_acc lexbuf; + L.update_acc lexbuf; GREATER | "!=" -> - update_acc lexbuf; + L.update_acc lexbuf; NOT_EQUAL | '=' -> - update_acc lexbuf; + L.update_acc lexbuf; EQUAL | '(' -> - update_acc lexbuf; + L.update_acc lexbuf; LPAREN | ')' -> - update_acc lexbuf; + L.update_acc lexbuf; RPAREN | '+' -> - update_acc lexbuf; + L.update_acc lexbuf; PLUS | '-' -> - update_acc lexbuf; + L.update_acc lexbuf; MINUS | '*' -> - update_acc lexbuf; + L.update_acc lexbuf; MULT | '%' -> - update_acc lexbuf; + L.update_acc lexbuf; PERCENT | '/' -> - update_acc lexbuf; + L.update_acc lexbuf; DIV | '|' -> - update_acc lexbuf; + L.update_acc lexbuf; VERTICAL | ':' -> - update_acc lexbuf; + L.update_acc lexbuf; COLON | "--" -> - update_acc lexbuf; + L.update_acc lexbuf; ALT | uppercase, Star (uppercase | lowercase | '0' .. '9' | '_' | '\'') -> (* Name of constructor *) - update_acc lexbuf; + L.update_acc lexbuf; CONSTRUCTOR (Utf8.lexeme lexbuf) | lowercase, Star (lowercase | uppercase | '0' .. '9' | '_' | '\'') -> (* Name of variable *) - update_acc lexbuf; + L.update_acc lexbuf; IDENT (Utf8.lexeme lexbuf) | Plus '0' .. '9' -> (* Integer literal*) - update_acc lexbuf; + L.update_acc lexbuf; INT_LITERAL (int_of_string (Utf8.lexeme lexbuf)) - | _ -> raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token" + | _ -> L.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token" let rec lex_law_fr (lexbuf : lexbuf) : token = match%sedlex lexbuf with | '\n' -> lex_law_fr lexbuf | "/*" -> - is_code := true; - code_string_acc := ""; + L.is_code := true; + L.code_string_acc := ""; BEGIN_CODE | eof -> EOF | "@@", Star white_space, "Fichier ma", 0x00EE, "tre", Star white_space, "@@" -> @@ -408,7 +374,7 @@ let rec lex_law_fr (lexbuf : lexbuf) : token = if R.pmatch ~rex:jorftext name then LAW_INCLUDE (Ast.LegislativeText (name, pos)) else if Filename.extension name = ".pdf" then LAW_INCLUDE (Ast.PdfFile ((name, pos), pages)) else if Filename.extension name = ".catala" then LAW_INCLUDE (Ast.CatalaFile (name, pos)) - else raise_lexer_error (lexing_positions lexbuf) name "this type of file cannot be included" + else L.raise_lexer_error (lexing_positions lexbuf) name "this type of file cannot be included" | "@@", Plus (Compl '@'), "@@", Star '+' -> let extract_code_title = R.regexp "@@([^@]+)@@([\\+]*)" in let get_match = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in @@ -445,6 +411,7 @@ let rec lex_law_fr (lexbuf : lexbuf) : token = LAW_ARTICLE (title, article_id, article_expiration_date) | Plus (Compl ('@' | '/' | '\n')) -> LAW_TEXT (Utf8.lexeme lexbuf) - | _ -> raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token" + | _ -> L.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token" -let lexer_fr (lexbuf : lexbuf) : token = if !is_code then lex_code_fr lexbuf else lex_law_fr lexbuf +let lexer_fr (lexbuf : lexbuf) : token = + if !L.is_code then lex_code_fr lexbuf else lex_law_fr lexbuf diff --git a/src/catala/parsing/parser_driver.ml b/src/catala/parsing/parser_driver.ml index 20715787..5a59ab22 100644 --- a/src/catala/parsing/parser_driver.ml +++ b/src/catala/parsing/parser_driver.ml @@ -131,7 +131,7 @@ let sedlex_with_menhir (lexer' : lexbuf -> Parser.token) (token_list : (string * in try loop lexer token_list lexbuf None (target_rule (fst @@ Sedlexing.lexing_positions lexbuf)) with Sedlexing.MalFormed | Sedlexing.InvalidCodepoint _ -> - Lexer_fr.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "malformed token" + Lexer.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "malformed token" let rec parse_source_files (source_files : string list) (language : Cli.frontend_lang) : Ast.program = diff --git a/src/catala/pos.ml b/src/catala/pos.ml index d2aa90ba..d0734fe1 100644 --- a/src/catala/pos.ml +++ b/src/catala/pos.ml @@ -112,11 +112,11 @@ let retrieve_loc_text (pos : t) : string = | None -> [] in let pos_lines = get_lines 1 in - let spaces = int_of_float (floor (log (float_of_int eline))) in + let spaces = int_of_float (log10 (float_of_int eline)) + 1 in close_in oc; Cli.print_with_style blue_style "%*s--> %s\n%s" spaces "" filename (Cli.add_prefix_to_each_line - (Printf.sprintf "\n%s\n" (String.concat "\n" pos_lines)) + (Printf.sprintf "\n%s" (String.concat "\n" pos_lines)) (fun i -> let cur_line = sline - include_extra_count + i - 1 in if diff --git a/tests/Makefile b/tests/Makefile index edd038fe..9f3e5388 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -31,6 +31,6 @@ test_scope/sub_scope.catala: $(call interpret_with_scope_and_compare,nv,A) $(call interpret_with_scope_and_compare,nv,B) test_scope/sub_sub_scope.catala: - $(call interpret_with_scope_and_compare,en,A) - $(call interpret_with_scope_and_compare,en,B) - $(call interpret_with_scope_and_compare,en,C) + $(call interpret_with_scope_and_compare,nv,A) + $(call interpret_with_scope_and_compare,nv,B) + $(call interpret_with_scope_and_compare,nv,C) diff --git a/tests/test_bool/test_bool.catala b/tests/test_bool/test_bool.catala index 59f58bc0..b68624cc 100644 --- a/tests/test_bool/test_bool.catala +++ b/tests/test_bool/test_bool.catala @@ -1,10 +1,10 @@ /* -declaration scope TestBool : - context foo content bool - context bar content int +new scope TestBool : + param foo type bool + param bar type int scope TestBool : - def bar = 1 - def foo ? bar >= 0 |= true - def foo ? bar < 0 |= false + def bar := 1 + def foo [ bar >= 0 ] := true + def foo [ bar < 0 ] := false */ diff --git a/tests/test_func/func.catala b/tests/test_func/func.catala index d27b2b1e..da0cc496 100644 --- a/tests/test_func/func.catala +++ b/tests/test_func/func.catala @@ -1,14 +1,14 @@ /* -declaration scope S: - context f content int depends on int - context x content int - context b content bool +new scope S: + param f type int fun of int + param x type int + param b type bool scope S: - def f of x ? (x >= x) |= x + x - def f of x ? not b |= x * x + def f of x [ (x >= x) ] := x + x + def f of x [ not b ] := x * x - def b = false + def b := false - def x = f of 3 + def x := f of 3 */ diff --git a/tests/test_scope/scope.catala b/tests/test_scope/scope.catala index ffb53cd4..6e8e4a21 100644 --- a/tests/test_scope/scope.catala +++ b/tests/test_scope/scope.catala @@ -1,13 +1,13 @@ /* -declaration scope A: - context a content int - context b content dec - context c content bool +new scope A: + param a type int + param b type dec + param c type bool scope A: - def c = false - def a ? c |= 42 - def a ? not c |= 0 - def b ? not c |= 1337 - def b ? not c |= 0 + def c := false + def a [ c ] := 42 + def a [ not c ] := 0 + def b [ not c ] := 1337 + def b [ not c ] := 0 */ diff --git a/tests/test_scope/scope.catala.A.out b/tests/test_scope/scope.catala.A.out index 0b300d79..89e5dcf4 100644 --- a/tests/test_scope/scope.catala.A.out +++ b/tests/test_scope/scope.catala.A.out @@ -3,20 +3,17 @@ [ERROR] The conflict concerns this variable b [ERROR] --> test_scope/scope.catala [ERROR] | -[ERROR] 4 | context b content dec -[ERROR] | ^ -[ERROR] | +[ERROR] 4 | param b type dec +[ERROR] | ^ [ERROR] [ERROR] This justification is true: [ERROR] --> test_scope/scope.catala [ERROR] | -[ERROR] 11 | def b ? not c |= 1337 +[ERROR] 11 | def b [ not c ] := 1337 [ERROR] | ^^^^^ -[ERROR] | -[ERROR] +[ERROR] [ERROR] This justification is true: [ERROR] --> test_scope/scope.catala -[ERROR] | -[ERROR] 12 | def b ? not c |= 0 +[ERROR] | +[ERROR] 12 | def b [ not c ] := 0 [ERROR] | ^^^^^ -[ERROR] | diff --git a/tests/test_scope/sub_scope.catala b/tests/test_scope/sub_scope.catala index 329657d9..2e91b8f2 100644 --- a/tests/test_scope/sub_scope.catala +++ b/tests/test_scope/sub_scope.catala @@ -1,22 +1,22 @@ /* -declaration scope A: - context a content int - context b content bool - context a_base content int +new scope A: + param a type int + param b type bool + param a_base type int -declaration scope B: - context a content int - context b content bool - context scopeA scope A - context scopeAbis scope A +new scope B: + param a type int + param b type bool + param scopeA scope A + param scopeAbis scope A scope A: - def a_base = 1 - def a = -1 - def b = a > 0 + def a_base := 1 + def a := -1 + def b := a > 0 scope B: - def a = 42 - def b = scopeA.b - def scopeA.a ? a > 0 |= scopeA.a_base + def a := 42 + def b := scopeA.b + def scopeA.a [ a > 0 ] := scopeA.a_base */ diff --git a/tests/test_scope/sub_sub_scope.catala b/tests/test_scope/sub_sub_scope.catala index e92e2017..70172d94 100644 --- a/tests/test_scope/sub_sub_scope.catala +++ b/tests/test_scope/sub_sub_scope.catala @@ -1,28 +1,28 @@ /* -declaration scope A: - context x content integer - context u content boolean +new scope A: + param x type int + param u type bool -declaration scope B: - context a scope A - context y content integer +new scope B: + param a scope A + param y type int -declaration scope C: - context a scope A - context b scope B - context z content integer +new scope C: + param a scope A + param b scope B + param z type int scope A: - definition x equals 0 - definition u equals true + def x := 0 + def u := true scope B: - definition a.x under condition a.u consequence equals 1 - definition y under condition a.x = 1 consequence equals 1 - definition y under condition a.x + 1 = 2 consequence equals 1 + def a.x [ a.u ] := 1 + def y [ a.x = 1 ] := 1 + def y [ a.x + 1 = 2 ] := 1 scope C: - definition a.x equals 2 - definition b.y equals 3 - definition z equals 2 + def a.x := 2 + def b.y := 3 + def z := 2 */ diff --git a/tests/test_scope/sub_sub_scope.catala.B.out b/tests/test_scope/sub_sub_scope.catala.B.out index 461ee88e..1dba4f6e 100644 --- a/tests/test_scope/sub_sub_scope.catala.B.out +++ b/tests/test_scope/sub_sub_scope.catala.B.out @@ -1,22 +1,19 @@ [ERROR] Default logic conflict, multiple justifications are true but are not related by a precedence [ERROR] [ERROR] The conflict concerns this variable y +[ERROR] --> test_scope/sub_sub_scope.catala +[ERROR] | +[ERROR] 8 | param y type int +[ERROR] | ^ +[ERROR] +[ERROR] This justification is true: [ERROR] --> test_scope/sub_sub_scope.catala [ERROR] | -[ERROR] 8 | context y content integer -[ERROR] | ^ +[ERROR] 21 | def y [ a.x = 1 ] := 1 +[ERROR] | ^^^^^^^ +[ERROR] +[ERROR] This justification is true: +[ERROR] --> test_scope/sub_sub_scope.catala [ERROR] | -[ERROR] -[ERROR] This justification is true: -[ERROR] --> test_scope/sub_sub_scope.catala -[ERROR] | -[ERROR] 21 | definition y under condition a.x = 1 consequence equals 1 -[ERROR] | ^^^^^^^ -[ERROR] | -[ERROR] -[ERROR] This justification is true: -[ERROR] --> test_scope/sub_sub_scope.catala -[ERROR] | -[ERROR] 22 | definition y under condition a.x + 1 = 2 consequence equals 1 -[ERROR] | ^^^^^^^^^^^ -[ERROR] | +[ERROR] 22 | def y [ a.x + 1 = 2 ] := 1 +[ERROR] | ^^^^^^^^^^^