Cleaned lexing code and rationalized non-verbose syntax

This commit is contained in:
Denis Merigoux 2020-08-08 18:32:44 +02:00
parent df25d5d94d
commit 0f43975182
14 changed files with 330 additions and 344 deletions

View File

@ -54,6 +54,11 @@ jobs:
eval $(opam env)
make build
- name: Run tests
run: |
eval $(opam env)
make tests
- name: Make examples
run: |
eval $(opam env)

View File

@ -22,19 +22,42 @@ let code_string_acc : string ref = ref ""
let update_acc (lexbuf : lexbuf) : unit = code_string_acc := !code_string_acc ^ Utf8.lexeme lexbuf
let raise_lexer_error (loc : Pos.t) (token : string) (msg : string) =
Errors.raise_spanned_error (Printf.sprintf "Parsing error on token \"%s\": %s" token msg) loc
let token_list_language_agnostic : (string * token) list =
[
("->", ARROW);
(".", DOT);
("<=", LESSER_EQUAL);
(">=", GREATER_EQUAL);
(">", GREATER);
("!=", NOT_EQUAL);
("=", EQUAL);
("(", LPAREN);
(")", RPAREN);
("+", PLUS);
("-", MINUS);
("*", MULT);
("/", DIV);
("|", VERTICAL);
(":", COLON);
("--", ALT);
]
let token_list : (string * token) list =
[
("scope", SCOPE);
("|", CONSEQUENCE);
("]", CONSEQUENCE);
("data", DATA);
("depends on", DEPENDS);
("declaration", DECLARATION);
("context", CONTEXT);
("fun of", DEPENDS);
("new", DECLARATION);
("param", CONTEXT);
("decreasing", DECREASING);
("increasing", INCREASING);
("of", OF);
("collection", COLLECTION);
("enumeration", ENUM);
("set", COLLECTION);
("enum", ENUM);
("int", INTEGER);
("amount", MONEY);
("text", TEXT);
@ -42,21 +65,21 @@ let token_list : (string * token) list =
("date", DATE);
("boolean", BOOLEAN);
("sum", SUM);
("fulfilled", FILLED);
("ok", FILLED);
("def", DEFINITION);
("equals", DEFINED_AS);
("match", MATCH);
("with pattern", WITH);
("?", UNDER_CONDITION);
("with", WITH);
("[", UNDER_CONDITION);
("if", IF);
("then", THEN);
("else", ELSE);
("content", CONTENT);
("structure", STRUCT);
("optional", OPTIONAL);
("assertion", ASSERTION);
("type", CONTENT);
("struct", STRUCT);
("option", OPTIONAL);
("assert", ASSERTION);
("varies", VARIES);
("with", WITH_V);
("with parameter", WITH_V);
("for", FOR);
("all", ALL);
("we have", WE_HAVE);
@ -75,7 +98,7 @@ let token_list : (string * token) list =
("true", TRUE);
("false", FALSE);
]
@ Lexer_fr.token_list_language_agnostic
@ token_list_language_agnostic
let rec lex_code (lexbuf : lexbuf) : token =
match%sedlex lexbuf with
@ -97,13 +120,13 @@ let rec lex_code (lexbuf : lexbuf) : token =
| "data" ->
update_acc lexbuf;
DATA
| "depends on" ->
| "fun of" ->
update_acc lexbuf;
DEPENDS
| "declaration" ->
| "new" ->
update_acc lexbuf;
DECLARATION
| "context" ->
| "param" ->
update_acc lexbuf;
CONTEXT
| "decreasing" ->
@ -115,10 +138,10 @@ let rec lex_code (lexbuf : lexbuf) : token =
| "of" ->
update_acc lexbuf;
OF
| "collection" ->
| "set" ->
update_acc lexbuf;
COLLECTION
| "enumeration" ->
| "enum" ->
update_acc lexbuf;
ENUM
| "int" ->
@ -142,22 +165,28 @@ let rec lex_code (lexbuf : lexbuf) : token =
| "sum" ->
update_acc lexbuf;
SUM
| "fulfilled" ->
| "ok" ->
update_acc lexbuf;
FILLED
| "def" ->
update_acc lexbuf;
DEFINITION
| "=" ->
| ":=" ->
update_acc lexbuf;
DEFINED_AS
| "varies" ->
update_acc lexbuf;
VARIES
| "with" ->
update_acc lexbuf;
WITH_V
| "match" ->
update_acc lexbuf;
MATCH
| "with pattern" ->
| "with" ->
update_acc lexbuf;
WITH
| "?" ->
| "[" ->
update_acc lexbuf;
UNDER_CONDITION
| "if" ->
@ -172,24 +201,18 @@ let rec lex_code (lexbuf : lexbuf) : token =
| "condition" ->
update_acc lexbuf;
CONDITION
| "content" ->
| "type" ->
update_acc lexbuf;
CONTENT
| "structure" ->
update_acc lexbuf;
STRUCT
| "optional" ->
| "option" ->
update_acc lexbuf;
OPTIONAL
| "assertion" ->
| "assert" ->
update_acc lexbuf;
ASSERTION
| "varies" ->
update_acc lexbuf;
VARIES
| "with" ->
update_acc lexbuf;
WITH_V
| "for" ->
update_acc lexbuf;
FOR
@ -233,7 +256,7 @@ let rec lex_code (lexbuf : lexbuf) : token =
| "not" ->
update_acc lexbuf;
NOT
| "|" ->
| "]" ->
update_acc lexbuf;
CONSEQUENCE
| "number" ->
@ -333,7 +356,7 @@ let rec lex_code (lexbuf : lexbuf) : token =
(* Integer literal*)
update_acc lexbuf;
INT_LITERAL (int_of_string (Utf8.lexeme lexbuf))
| _ -> Lexer_fr.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token"
| _ -> raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token"
let rec lex_law (lexbuf : lexbuf) : token =
match%sedlex lexbuf with
@ -363,9 +386,7 @@ let rec lex_law (lexbuf : lexbuf) : token =
let pos = lexing_positions lexbuf in
if Filename.extension name = ".pdf" then LAW_INCLUDE (Ast.PdfFile ((name, pos), pages))
else if Filename.extension name = ".catala" then LAW_INCLUDE (Ast.CatalaFile (name, pos))
else
Lexer_fr.raise_lexer_error (lexing_positions lexbuf) name
"this type of file cannot be included"
else raise_lexer_error (lexing_positions lexbuf) name "this type of file cannot be included"
| "@@", Plus (Compl '@'), "@@", Star '+' ->
let extract_code_title = R.regexp "@@([^@]+)@@([\\+]*)" in
let get_match = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
@ -394,6 +415,6 @@ let rec lex_law (lexbuf : lexbuf) : token =
LAW_ARTICLE (title, None, None)
| Plus (Compl ('@' | '/' | '\n')) -> LAW_TEXT (Utf8.lexeme lexbuf)
| _ -> Lexer_fr.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token"
| _ -> raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token"
let lexer lexbuf = if !is_code then lex_code lexbuf else lex_law lexbuf

View File

@ -14,14 +14,9 @@
open Parser
open Sedlexing
module L = Lexer
module R = Re.Pcre
let is_code : bool ref = ref false
let code_string_acc : string ref = ref ""
let update_acc (lexbuf : lexbuf) : unit = code_string_acc := !code_string_acc ^ Utf8.lexeme lexbuf
let token_list_en : (string * token) list =
[
("scope", SCOPE);
@ -75,178 +70,178 @@ let token_list_en : (string * token) list =
("true", TRUE);
("false", FALSE);
]
@ Lexer_fr.token_list_language_agnostic
@ L.token_list_language_agnostic
let rec lex_code_en (lexbuf : lexbuf) : token =
match%sedlex lexbuf with
| white_space ->
(* Whitespaces *)
update_acc lexbuf;
L.update_acc lexbuf;
lex_code_en lexbuf
| '#', Star (Compl '\n'), '\n' ->
(* Comments *)
update_acc lexbuf;
L.update_acc lexbuf;
lex_code_en lexbuf
| "*/" ->
(* End of code section *)
is_code := false;
END_CODE !code_string_acc
L.is_code := false;
END_CODE !L.code_string_acc
| "scope" ->
update_acc lexbuf;
L.update_acc lexbuf;
SCOPE
| "data" ->
update_acc lexbuf;
L.update_acc lexbuf;
DATA
| "depends on" ->
update_acc lexbuf;
L.update_acc lexbuf;
DEPENDS
| "declaration" ->
update_acc lexbuf;
L.update_acc lexbuf;
DECLARATION
| "context" ->
update_acc lexbuf;
L.update_acc lexbuf;
CONTEXT
| "decreasing" ->
update_acc lexbuf;
L.update_acc lexbuf;
DECREASING
| "increasing" ->
update_acc lexbuf;
L.update_acc lexbuf;
INCREASING
| "of" ->
update_acc lexbuf;
L.update_acc lexbuf;
OF
| "collection" ->
update_acc lexbuf;
L.update_acc lexbuf;
COLLECTION
| "enumeration" ->
update_acc lexbuf;
L.update_acc lexbuf;
ENUM
| "integer" ->
update_acc lexbuf;
L.update_acc lexbuf;
INTEGER
| "amount" ->
update_acc lexbuf;
L.update_acc lexbuf;
MONEY
| "text" ->
update_acc lexbuf;
L.update_acc lexbuf;
TEXT
| "decimal" ->
update_acc lexbuf;
L.update_acc lexbuf;
DECIMAL
| "date" ->
update_acc lexbuf;
L.update_acc lexbuf;
DATE
| "boolean" ->
update_acc lexbuf;
L.update_acc lexbuf;
BOOLEAN
| "sum" ->
update_acc lexbuf;
L.update_acc lexbuf;
SUM
| "fulfilled" ->
update_acc lexbuf;
L.update_acc lexbuf;
FILLED
| "definition" ->
update_acc lexbuf;
L.update_acc lexbuf;
DEFINITION
| "equals" ->
update_acc lexbuf;
L.update_acc lexbuf;
DEFINED_AS
| "match" ->
update_acc lexbuf;
L.update_acc lexbuf;
MATCH
| "with pattern" ->
update_acc lexbuf;
L.update_acc lexbuf;
WITH
| "under condition" ->
update_acc lexbuf;
L.update_acc lexbuf;
UNDER_CONDITION
| "if" ->
update_acc lexbuf;
L.update_acc lexbuf;
IF
| "consequence" ->
update_acc lexbuf;
L.update_acc lexbuf;
CONSEQUENCE
| "then" ->
update_acc lexbuf;
L.update_acc lexbuf;
THEN
| "else" ->
update_acc lexbuf;
L.update_acc lexbuf;
ELSE
| "condition" ->
update_acc lexbuf;
L.update_acc lexbuf;
CONDITION
| "content" ->
update_acc lexbuf;
L.update_acc lexbuf;
CONTENT
| "structure" ->
update_acc lexbuf;
L.update_acc lexbuf;
STRUCT
| "optional" ->
update_acc lexbuf;
L.update_acc lexbuf;
OPTIONAL
| "assertion" ->
update_acc lexbuf;
L.update_acc lexbuf;
ASSERTION
| "varies" ->
update_acc lexbuf;
L.update_acc lexbuf;
VARIES
| "with" ->
update_acc lexbuf;
L.update_acc lexbuf;
WITH_V
| "for" ->
update_acc lexbuf;
L.update_acc lexbuf;
FOR
| "all" ->
update_acc lexbuf;
L.update_acc lexbuf;
ALL
| "we have" ->
update_acc lexbuf;
L.update_acc lexbuf;
WE_HAVE
| "fixed" ->
update_acc lexbuf;
L.update_acc lexbuf;
FIXED
| "by" ->
update_acc lexbuf;
L.update_acc lexbuf;
BY
| "rule" ->
(* 0xE8 is è *)
update_acc lexbuf;
L.update_acc lexbuf;
RULE
| "exists" ->
update_acc lexbuf;
L.update_acc lexbuf;
EXISTS
| "in" ->
update_acc lexbuf;
L.update_acc lexbuf;
IN
| "such" ->
update_acc lexbuf;
L.update_acc lexbuf;
SUCH
| "that" ->
update_acc lexbuf;
L.update_acc lexbuf;
THAT
| "now" ->
update_acc lexbuf;
L.update_acc lexbuf;
NOW
| "and" ->
update_acc lexbuf;
L.update_acc lexbuf;
AND
| "or" ->
update_acc lexbuf;
L.update_acc lexbuf;
OR
| "not" ->
update_acc lexbuf;
L.update_acc lexbuf;
NOT
| "number" ->
update_acc lexbuf;
L.update_acc lexbuf;
CARDINAL
| "true" ->
update_acc lexbuf;
L.update_acc lexbuf;
TRUE
| "false" ->
update_acc lexbuf;
L.update_acc lexbuf;
FALSE
| "year" ->
update_acc lexbuf;
L.update_acc lexbuf;
YEAR
| 0x24, Star white_space, '0' .. '9', Star ('0' .. '9' | ','), Opt ('.', Rep ('0' .. '9', 0 .. 2))
->
@ -259,93 +254,99 @@ let rec lex_code_en (lexbuf : lexbuf) : token =
let remove_commas = R.regexp "," in
let units = int_of_string (R.substitute ~rex:remove_commas ~subst:(fun _ -> "") units) in
let cents = try int_of_string (parts 4) with Not_found -> 0 in
update_acc lexbuf;
L.update_acc lexbuf;
MONEY_AMOUNT (units, cents)
| Plus '0' .. '9', '.', Star '0' .. '9' ->
let extract_code_title = R.regexp "([0-9]+)\\.([0-9]*)" in
let dec_parts = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
(* Integer literal*)
update_acc lexbuf;
L.update_acc lexbuf;
DECIMAL_LITERAL (int_of_string (dec_parts 1), int_of_string (dec_parts 2))
| "->" ->
update_acc lexbuf;
L.update_acc lexbuf;
ARROW
| '.' ->
update_acc lexbuf;
L.update_acc lexbuf;
DOT
| "<=" ->
update_acc lexbuf;
L.update_acc lexbuf;
LESSER_EQUAL
| '<' ->
update_acc lexbuf;
L.update_acc lexbuf;
LESSER
| ">=" ->
update_acc lexbuf;
L.update_acc lexbuf;
GREATER_EQUAL
| '>' ->
update_acc lexbuf;
L.update_acc lexbuf;
GREATER
| "!=" ->
update_acc lexbuf;
L.update_acc lexbuf;
NOT_EQUAL
| '=' ->
update_acc lexbuf;
L.update_acc lexbuf;
EQUAL
| '(' ->
update_acc lexbuf;
L.update_acc lexbuf;
LPAREN
| ')' ->
update_acc lexbuf;
L.update_acc lexbuf;
RPAREN
| '+' ->
update_acc lexbuf;
L.update_acc lexbuf;
PLUS
| '-' ->
update_acc lexbuf;
L.update_acc lexbuf;
MINUS
| '*' ->
update_acc lexbuf;
L.update_acc lexbuf;
MULT
| '%' ->
update_acc lexbuf;
L.update_acc lexbuf;
PERCENT
| '/' ->
update_acc lexbuf;
L.update_acc lexbuf;
DIV
| '|' ->
update_acc lexbuf;
L.update_acc lexbuf;
VERTICAL
| ':' ->
update_acc lexbuf;
L.update_acc lexbuf;
COLON
| "--" ->
update_acc lexbuf;
L.update_acc lexbuf;
ALT
| uppercase, Star (uppercase | lowercase | '0' .. '9' | '_' | '\'') ->
(* Name of constructor *)
update_acc lexbuf;
L.update_acc lexbuf;
CONSTRUCTOR (Utf8.lexeme lexbuf)
| lowercase, Star (lowercase | uppercase | '0' .. '9' | '_' | '\'') ->
(* Name of variable *)
update_acc lexbuf;
L.update_acc lexbuf;
IDENT (Utf8.lexeme lexbuf)
| Plus '0' .. '9' ->
(* Integer literal*)
update_acc lexbuf;
L.update_acc lexbuf;
INT_LITERAL (int_of_string (Utf8.lexeme lexbuf))
| _ -> Lexer_fr.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token"
| _ -> L.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token"
let rec lex_law_en (lexbuf : lexbuf) : token =
match%sedlex lexbuf with
| '\n' -> lex_law_en lexbuf
| "/*" ->
is_code := true;
code_string_acc := "";
L.is_code := true;
L.code_string_acc := "";
BEGIN_CODE
| eof -> EOF
| "@@", Star white_space, "Master file", Star white_space, "@@" -> MASTER_FILE
| "@@", Star white_space, "Begin metadata", Star white_space, "@@" -> BEGIN_METADATA
| "@@", Star white_space, "End metadata", Star white_space, "@@" -> END_METADATA
| "@@", Star white_space, "Master file", Star white_space, "@@" ->
Cli.debug_print "A1";
MASTER_FILE
| "@@", Star white_space, "Begin metadata", Star white_space, "@@" ->
Cli.debug_print "A1";
BEGIN_METADATA
| "@@", Star white_space, "End metadata", Star white_space, "@@" ->
Cli.debug_print "A1";
END_METADATA
| ( "@@",
Star white_space,
"Include:",
@ -363,9 +364,7 @@ let rec lex_law_en (lexbuf : lexbuf) : token =
let pos = lexing_positions lexbuf in
if Filename.extension name = ".pdf" then LAW_INCLUDE (Ast.PdfFile ((name, pos), pages))
else if Filename.extension name = ".catala" then LAW_INCLUDE (Ast.CatalaFile (name, pos))
else
Lexer_fr.raise_lexer_error (lexing_positions lexbuf) name
"this type of file cannot be included"
else L.raise_lexer_error (lexing_positions lexbuf) name "this type of file cannot be included"
| "@@", Plus (Compl '@'), "@@", Star '+' ->
let extract_code_title = R.regexp "@@([^@]+)@@([\\+]*)" in
let get_match = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
@ -394,6 +393,6 @@ let rec lex_law_en (lexbuf : lexbuf) : token =
LAW_ARTICLE (title, None, None)
| Plus (Compl ('@' | '/' | '\n')) -> LAW_TEXT (Utf8.lexeme lexbuf)
| _ -> Lexer_fr.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token"
| _ -> L.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token"
let lexer_en lexbuf = if !is_code then lex_code_en lexbuf else lex_law_en lexbuf
let lexer_en lexbuf = if !L.is_code then lex_code_en lexbuf else lex_law_en lexbuf

View File

@ -14,43 +14,9 @@
open Parser
open Sedlexing
module L = Lexer
module R = Re.Pcre
let is_code : bool ref = ref false
let code_string_acc : string ref = ref ""
let raise_lexer_error (loc : Pos.t) (token : string) (msg : string) =
Errors.raise_spanned_error (Printf.sprintf "Parsing error on token \"%s\": %s" token msg) loc
let rec lex_code_as_string (lexbuf : lexbuf) (acc : string) : token =
match%sedlex lexbuf with
| "*/" -> END_CODE (acc ^ Utf8.lexeme lexbuf)
| any -> lex_code_as_string lexbuf (acc ^ Utf8.lexeme lexbuf)
| _ -> raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unexpected token"
let update_acc (lexbuf : lexbuf) = code_string_acc := !code_string_acc ^ Utf8.lexeme lexbuf
let token_list_language_agnostic : (string * token) list =
[
("->", ARROW);
(".", DOT);
("<=", LESSER_EQUAL);
(">=", GREATER_EQUAL);
(">", GREATER);
("!=", NOT_EQUAL);
("=", EQUAL);
("(", LPAREN);
(")", RPAREN);
("+", PLUS);
("-", MINUS);
("*", MULT);
("/", DIV);
("|", VERTICAL);
(":", COLON);
("--", ALT);
]
let token_list_fr : (string * token) list =
[
("champ d'application", SCOPE);
@ -104,184 +70,184 @@ let token_list_fr : (string * token) list =
("vrai", TRUE);
("faux", FALSE);
]
@ token_list_language_agnostic
@ L.token_list_language_agnostic
let rec lex_code_fr (lexbuf : lexbuf) : token =
match%sedlex lexbuf with
| white_space | '\n' ->
(* Whitespaces *)
update_acc lexbuf;
L.update_acc lexbuf;
lex_code_fr lexbuf
| '#', Star (Compl '\n'), '\n' ->
(* Comments *)
update_acc lexbuf;
L.update_acc lexbuf;
lex_code_fr lexbuf
| "*/" ->
(* End of code section *)
is_code := false;
END_CODE !code_string_acc
L.is_code := false;
END_CODE !L.code_string_acc
| "champ d\'application" ->
update_acc lexbuf;
L.update_acc lexbuf;
SCOPE
| "donn", 0xE9, "e" ->
(* 0xE9 is é *)
update_acc lexbuf;
L.update_acc lexbuf;
DATA
| "d", 0xE9, "pend de" ->
update_acc lexbuf;
L.update_acc lexbuf;
DEPENDS
| "d", 0xE9, "claration" ->
update_acc lexbuf;
L.update_acc lexbuf;
DECLARATION
| "contexte" ->
update_acc lexbuf;
L.update_acc lexbuf;
CONTEXT
| "d", 0xE9, "croissant" ->
update_acc lexbuf;
L.update_acc lexbuf;
DECREASING
| "croissant" ->
update_acc lexbuf;
L.update_acc lexbuf;
INCREASING
| "de" ->
update_acc lexbuf;
L.update_acc lexbuf;
OF
| "collection" ->
update_acc lexbuf;
L.update_acc lexbuf;
COLLECTION
| 0xE9, "num", 0xE9, "ration" ->
update_acc lexbuf;
L.update_acc lexbuf;
ENUM
| "entier" ->
update_acc lexbuf;
L.update_acc lexbuf;
INTEGER
| "montant" ->
update_acc lexbuf;
L.update_acc lexbuf;
MONEY
| "texte" ->
update_acc lexbuf;
L.update_acc lexbuf;
TEXT
| "d", 0xE9, "cimal" ->
update_acc lexbuf;
L.update_acc lexbuf;
DECIMAL
| "date" ->
update_acc lexbuf;
L.update_acc lexbuf;
DATE
| "bool", 0xE9, "en" ->
update_acc lexbuf;
L.update_acc lexbuf;
BOOLEAN
| "somme" ->
update_acc lexbuf;
L.update_acc lexbuf;
SUM
| "rempli" ->
update_acc lexbuf;
L.update_acc lexbuf;
FILLED
| "d", 0xE9, "finition" ->
(* 0xE9 is é *)
update_acc lexbuf;
L.update_acc lexbuf;
DEFINITION
| 0xE9, "gal ", 0x00E0 ->
(* 0xE9 is é *)
update_acc lexbuf;
L.update_acc lexbuf;
DEFINED_AS
| "selon" ->
update_acc lexbuf;
L.update_acc lexbuf;
MATCH
| "sous forme" ->
update_acc lexbuf;
L.update_acc lexbuf;
WITH
| "sous condition" ->
update_acc lexbuf;
L.update_acc lexbuf;
UNDER_CONDITION
| "si" ->
update_acc lexbuf;
L.update_acc lexbuf;
IF
| "cons", 0xE9, "quence" ->
(* 0xE9 is é *)
update_acc lexbuf;
L.update_acc lexbuf;
CONSEQUENCE
| "alors" ->
(* 0xE9 is é *)
update_acc lexbuf;
L.update_acc lexbuf;
THEN
| "sinon" ->
update_acc lexbuf;
L.update_acc lexbuf;
ELSE
| "condition" ->
update_acc lexbuf;
L.update_acc lexbuf;
CONDITION
| "contenu" ->
update_acc lexbuf;
L.update_acc lexbuf;
CONTENT
| "structure" ->
update_acc lexbuf;
L.update_acc lexbuf;
STRUCT
| "optionnel" ->
update_acc lexbuf;
L.update_acc lexbuf;
OPTIONAL
| "assertion" ->
update_acc lexbuf;
L.update_acc lexbuf;
ASSERTION
| "varie" ->
update_acc lexbuf;
L.update_acc lexbuf;
VARIES
| "avec" ->
update_acc lexbuf;
L.update_acc lexbuf;
WITH_V
| "pour" ->
update_acc lexbuf;
L.update_acc lexbuf;
FOR
| "tout" ->
update_acc lexbuf;
L.update_acc lexbuf;
ALL
| "on a" ->
update_acc lexbuf;
L.update_acc lexbuf;
WE_HAVE
| "fix", 0xE9 ->
(* 0xE9 is é *)
update_acc lexbuf;
L.update_acc lexbuf;
FIXED
| "par" ->
update_acc lexbuf;
L.update_acc lexbuf;
BY
| "r", 0xE8, "gle" ->
(* 0xE8 is è *)
update_acc lexbuf;
L.update_acc lexbuf;
RULE
| "existe" ->
update_acc lexbuf;
L.update_acc lexbuf;
EXISTS
| "dans" ->
update_acc lexbuf;
L.update_acc lexbuf;
IN
| "tel" ->
update_acc lexbuf;
L.update_acc lexbuf;
SUCH
| "que" ->
update_acc lexbuf;
L.update_acc lexbuf;
THAT
| "maintenant" ->
update_acc lexbuf;
L.update_acc lexbuf;
NOW
| "et" ->
update_acc lexbuf;
L.update_acc lexbuf;
AND
| "ou" ->
update_acc lexbuf;
L.update_acc lexbuf;
OR
| "non" ->
update_acc lexbuf;
L.update_acc lexbuf;
NOT
| "nombre" ->
update_acc lexbuf;
L.update_acc lexbuf;
CARDINAL
| "vrai" ->
update_acc lexbuf;
L.update_acc lexbuf;
TRUE
| "faux" ->
update_acc lexbuf;
L.update_acc lexbuf;
FALSE
| "an" ->
update_acc lexbuf;
L.update_acc lexbuf;
YEAR
| ( '0' .. '9',
Star ('0' .. '9' | white_space),
@ -297,88 +263,88 @@ let rec lex_code_fr (lexbuf : lexbuf) : token =
let remove_spaces = R.regexp " " in
let units = int_of_string (R.substitute ~rex:remove_spaces ~subst:(fun _ -> "") units) in
let cents = try int_of_string (parts 4) with Not_found -> 0 in
update_acc lexbuf;
L.update_acc lexbuf;
MONEY_AMOUNT (units, cents)
| Plus '0' .. '9', ',', Star '0' .. '9' ->
let extract_code_title = R.regexp "([0-9]+),([0-9]*)" in
let dec_parts = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
(* Integer literal*)
update_acc lexbuf;
L.update_acc lexbuf;
DECIMAL_LITERAL (int_of_string (dec_parts 1), int_of_string (dec_parts 2))
| "->" ->
update_acc lexbuf;
L.update_acc lexbuf;
ARROW
| '.' ->
update_acc lexbuf;
L.update_acc lexbuf;
DOT
| "<=" ->
update_acc lexbuf;
L.update_acc lexbuf;
LESSER_EQUAL
| '<' ->
update_acc lexbuf;
L.update_acc lexbuf;
LESSER
| ">=" ->
update_acc lexbuf;
L.update_acc lexbuf;
GREATER_EQUAL
| '>' ->
update_acc lexbuf;
L.update_acc lexbuf;
GREATER
| "!=" ->
update_acc lexbuf;
L.update_acc lexbuf;
NOT_EQUAL
| '=' ->
update_acc lexbuf;
L.update_acc lexbuf;
EQUAL
| '(' ->
update_acc lexbuf;
L.update_acc lexbuf;
LPAREN
| ')' ->
update_acc lexbuf;
L.update_acc lexbuf;
RPAREN
| '+' ->
update_acc lexbuf;
L.update_acc lexbuf;
PLUS
| '-' ->
update_acc lexbuf;
L.update_acc lexbuf;
MINUS
| '*' ->
update_acc lexbuf;
L.update_acc lexbuf;
MULT
| '%' ->
update_acc lexbuf;
L.update_acc lexbuf;
PERCENT
| '/' ->
update_acc lexbuf;
L.update_acc lexbuf;
DIV
| '|' ->
update_acc lexbuf;
L.update_acc lexbuf;
VERTICAL
| ':' ->
update_acc lexbuf;
L.update_acc lexbuf;
COLON
| "--" ->
update_acc lexbuf;
L.update_acc lexbuf;
ALT
| uppercase, Star (uppercase | lowercase | '0' .. '9' | '_' | '\'') ->
(* Name of constructor *)
update_acc lexbuf;
L.update_acc lexbuf;
CONSTRUCTOR (Utf8.lexeme lexbuf)
| lowercase, Star (lowercase | uppercase | '0' .. '9' | '_' | '\'') ->
(* Name of variable *)
update_acc lexbuf;
L.update_acc lexbuf;
IDENT (Utf8.lexeme lexbuf)
| Plus '0' .. '9' ->
(* Integer literal*)
update_acc lexbuf;
L.update_acc lexbuf;
INT_LITERAL (int_of_string (Utf8.lexeme lexbuf))
| _ -> raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token"
| _ -> L.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token"
let rec lex_law_fr (lexbuf : lexbuf) : token =
match%sedlex lexbuf with
| '\n' -> lex_law_fr lexbuf
| "/*" ->
is_code := true;
code_string_acc := "";
L.is_code := true;
L.code_string_acc := "";
BEGIN_CODE
| eof -> EOF
| "@@", Star white_space, "Fichier ma", 0x00EE, "tre", Star white_space, "@@" ->
@ -408,7 +374,7 @@ let rec lex_law_fr (lexbuf : lexbuf) : token =
if R.pmatch ~rex:jorftext name then LAW_INCLUDE (Ast.LegislativeText (name, pos))
else if Filename.extension name = ".pdf" then LAW_INCLUDE (Ast.PdfFile ((name, pos), pages))
else if Filename.extension name = ".catala" then LAW_INCLUDE (Ast.CatalaFile (name, pos))
else raise_lexer_error (lexing_positions lexbuf) name "this type of file cannot be included"
else L.raise_lexer_error (lexing_positions lexbuf) name "this type of file cannot be included"
| "@@", Plus (Compl '@'), "@@", Star '+' ->
let extract_code_title = R.regexp "@@([^@]+)@@([\\+]*)" in
let get_match = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
@ -445,6 +411,7 @@ let rec lex_law_fr (lexbuf : lexbuf) : token =
LAW_ARTICLE (title, article_id, article_expiration_date)
| Plus (Compl ('@' | '/' | '\n')) -> LAW_TEXT (Utf8.lexeme lexbuf)
| _ -> raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token"
| _ -> L.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "unknown token"
let lexer_fr (lexbuf : lexbuf) : token = if !is_code then lex_code_fr lexbuf else lex_law_fr lexbuf
let lexer_fr (lexbuf : lexbuf) : token =
if !L.is_code then lex_code_fr lexbuf else lex_law_fr lexbuf

View File

@ -131,7 +131,7 @@ let sedlex_with_menhir (lexer' : lexbuf -> Parser.token) (token_list : (string *
in
try loop lexer token_list lexbuf None (target_rule (fst @@ Sedlexing.lexing_positions lexbuf))
with Sedlexing.MalFormed | Sedlexing.InvalidCodepoint _ ->
Lexer_fr.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "malformed token"
Lexer.raise_lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf) "malformed token"
let rec parse_source_files (source_files : string list) (language : Cli.frontend_lang) : Ast.program
=

View File

@ -112,11 +112,11 @@ let retrieve_loc_text (pos : t) : string =
| None -> []
in
let pos_lines = get_lines 1 in
let spaces = int_of_float (floor (log (float_of_int eline))) in
let spaces = int_of_float (log10 (float_of_int eline)) + 1 in
close_in oc;
Cli.print_with_style blue_style "%*s--> %s\n%s" spaces "" filename
(Cli.add_prefix_to_each_line
(Printf.sprintf "\n%s\n" (String.concat "\n" pos_lines))
(Printf.sprintf "\n%s" (String.concat "\n" pos_lines))
(fun i ->
let cur_line = sline - include_extra_count + i - 1 in
if

View File

@ -31,6 +31,6 @@ test_scope/sub_scope.catala:
$(call interpret_with_scope_and_compare,nv,A)
$(call interpret_with_scope_and_compare,nv,B)
test_scope/sub_sub_scope.catala:
$(call interpret_with_scope_and_compare,en,A)
$(call interpret_with_scope_and_compare,en,B)
$(call interpret_with_scope_and_compare,en,C)
$(call interpret_with_scope_and_compare,nv,A)
$(call interpret_with_scope_and_compare,nv,B)
$(call interpret_with_scope_and_compare,nv,C)

View File

@ -1,10 +1,10 @@
/*
declaration scope TestBool :
context foo content bool
context bar content int
new scope TestBool :
param foo type bool
param bar type int
scope TestBool :
def bar = 1
def foo ? bar >= 0 |= true
def foo ? bar < 0 |= false
def bar := 1
def foo [ bar >= 0 ] := true
def foo [ bar < 0 ] := false
*/

View File

@ -1,14 +1,14 @@
/*
declaration scope S:
context f content int depends on int
context x content int
context b content bool
new scope S:
param f type int fun of int
param x type int
param b type bool
scope S:
def f of x ? (x >= x) |= x + x
def f of x ? not b |= x * x
def f of x [ (x >= x) ] := x + x
def f of x [ not b ] := x * x
def b = false
def b := false
def x = f of 3
def x := f of 3
*/

View File

@ -1,13 +1,13 @@
/*
declaration scope A:
context a content int
context b content dec
context c content bool
new scope A:
param a type int
param b type dec
param c type bool
scope A:
def c = false
def a ? c |= 42
def a ? not c |= 0
def b ? not c |= 1337
def b ? not c |= 0
def c := false
def a [ c ] := 42
def a [ not c ] := 0
def b [ not c ] := 1337
def b [ not c ] := 0
*/

View File

@ -3,20 +3,17 @@
[ERROR] The conflict concerns this variable b
[ERROR] --> test_scope/scope.catala
[ERROR] |
[ERROR] 4 | context b content dec
[ERROR] | ^
[ERROR] |
[ERROR] 4 | param b type dec
[ERROR] | ^
[ERROR]
[ERROR] This justification is true:
[ERROR] --> test_scope/scope.catala
[ERROR] |
[ERROR] 11 | def b ? not c |= 1337
[ERROR] 11 | def b [ not c ] := 1337
[ERROR] | ^^^^^
[ERROR] |
[ERROR]
[ERROR]
[ERROR] This justification is true:
[ERROR] --> test_scope/scope.catala
[ERROR] |
[ERROR] 12 | def b ? not c |= 0
[ERROR] |
[ERROR] 12 | def b [ not c ] := 0
[ERROR] | ^^^^^
[ERROR] |

View File

@ -1,22 +1,22 @@
/*
declaration scope A:
context a content int
context b content bool
context a_base content int
new scope A:
param a type int
param b type bool
param a_base type int
declaration scope B:
context a content int
context b content bool
context scopeA scope A
context scopeAbis scope A
new scope B:
param a type int
param b type bool
param scopeA scope A
param scopeAbis scope A
scope A:
def a_base = 1
def a = -1
def b = a > 0
def a_base := 1
def a := -1
def b := a > 0
scope B:
def a = 42
def b = scopeA.b
def scopeA.a ? a > 0 |= scopeA.a_base
def a := 42
def b := scopeA.b
def scopeA.a [ a > 0 ] := scopeA.a_base
*/

View File

@ -1,28 +1,28 @@
/*
declaration scope A:
context x content integer
context u content boolean
new scope A:
param x type int
param u type bool
declaration scope B:
context a scope A
context y content integer
new scope B:
param a scope A
param y type int
declaration scope C:
context a scope A
context b scope B
context z content integer
new scope C:
param a scope A
param b scope B
param z type int
scope A:
definition x equals 0
definition u equals true
def x := 0
def u := true
scope B:
definition a.x under condition a.u consequence equals 1
definition y under condition a.x = 1 consequence equals 1
definition y under condition a.x + 1 = 2 consequence equals 1
def a.x [ a.u ] := 1
def y [ a.x = 1 ] := 1
def y [ a.x + 1 = 2 ] := 1
scope C:
definition a.x equals 2
definition b.y equals 3
definition z equals 2
def a.x := 2
def b.y := 3
def z := 2
*/

View File

@ -1,22 +1,19 @@
[ERROR] Default logic conflict, multiple justifications are true but are not related by a precedence
[ERROR]
[ERROR] The conflict concerns this variable y
[ERROR] --> test_scope/sub_sub_scope.catala
[ERROR] |
[ERROR] 8 | param y type int
[ERROR] | ^
[ERROR]
[ERROR] This justification is true:
[ERROR] --> test_scope/sub_sub_scope.catala
[ERROR] |
[ERROR] 8 | context y content integer
[ERROR] | ^
[ERROR] 21 | def y [ a.x = 1 ] := 1
[ERROR] | ^^^^^^^
[ERROR]
[ERROR] This justification is true:
[ERROR] --> test_scope/sub_sub_scope.catala
[ERROR] |
[ERROR]
[ERROR] This justification is true:
[ERROR] --> test_scope/sub_sub_scope.catala
[ERROR] |
[ERROR] 21 | definition y under condition a.x = 1 consequence equals 1
[ERROR] | ^^^^^^^
[ERROR] |
[ERROR]
[ERROR] This justification is true:
[ERROR] --> test_scope/sub_sub_scope.catala
[ERROR] |
[ERROR] 22 | definition y under condition a.x + 1 = 2 consequence equals 1
[ERROR] | ^^^^^^^^^^^
[ERROR] |
[ERROR] 22 | def y [ a.x + 1 = 2 ] := 1
[ERROR] | ^^^^^^^^^^^