catala/compiler/surface/parser.mly

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

686 lines
17 KiB
OCaml
Raw Normal View History

(*
2020-04-16 18:47:35 +03:00
This file is part of the Catala compiler, a specification language for tax and social benefits
computation rules.
Copyright (C) 2020 Inria, contributors: Denis Merigoux <denis.merigoux@inria.fr>,
Emile Rolley <emile.rolley@tuta.io>
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*)
%{
2022-11-21 12:46:17 +03:00
open Catala_utils
%}
%parameter<Localisation: sig
2021-08-19 19:26:06 +03:00
val lex_builtin: string -> Ast.builtin_expression option
end>
2020-04-03 23:34:11 +03:00
(* The token is returned for every line of law text, make them right-associative
so that we concat them efficiently as much as possible. *)
%right LAW_TEXT
(* Precedence of expression constructions *)
%right top_expr
%right ALT
%right let_expr IS
%right AND OR XOR (* Desugaring enforces proper parens later on *)
%nonassoc GREATER GREATER_EQUAL LESSER LESSER_EQUAL EQUAL NOT_EQUAL
%left PLUS MINUS PLUSPLUS
%left MULT DIV
%right apply OF CONTAINS FOR SUCH WITH
%right unop_expr
%right CONTENT
%nonassoc UIDENT
%left DOT
(* Types of all rules, in order. Without this, Menhir type errors are nearly
impossible to debug because of inlining *)
%type<Ast.uident Marked.pos> addpos(UIDENT)
%type<Pos.t> pos(CONDITION)
%type<Ast.primitive_typ> typ_base
%type<Ast.base_typ_data> typ
%type<Ast.uident Marked.pos> uident
%type<Ast.lident Marked.pos> lident
%type<Ast.scope_var> scope_var
%type<Ast.path * Ast.uident Marked.pos> quident
%type<Ast.path * Ast.lident Marked.pos> qlident
%type<Ast.expression> expression
%type<Ast.naked_expression> naked_expression
%type<Ast.lident Marked.pos * expression> struct_content_field
%type<Ast.naked_expression> struct_or_enum_inject
%type<Ast.literal_number> num_literal
%type<Ast.literal_unit> unit_literal
%type<Ast.literal> literal
%type<(Ast.lident Marked.pos * expression) list> scope_call_args
%type<bool> minmax
%type<Ast.unop> unop
%type<Ast.binop> binop
%type<Ast.match_case_pattern> constructor_binding
%type<Ast.match_case> match_arm
%type<Ast.expression> condition_consequence
%type<Ast.scope_var Marked.pos * Ast.lident Marked.pos option> rule_expr
%type<bool> rule_consequence
%type<Ast.rule> rule
%type<Ast.lident Marked.pos> definition_parameters
%type<Ast.lident Marked.pos> label
%type<Ast.lident Marked.pos> state
%type<Ast.exception_to> exception_to
%type<Ast.definition> definition
%type<Ast.variation_typ> variation_type
%type<Ast.scope_use_item> assertion
%type<Ast.scope_use_item> scope_item
%type<Ast.lident Marked.pos * Ast.base_typ Marked.pos> struct_scope_base
%type<Ast.base_typ_data Marked.pos> struct_scope_func
%type<Ast.struct_decl_field> struct_scope
%type<Ast.io_input> scope_decl_item_attribute_input
%type<bool> scope_decl_item_attribute_output
%type<Ast.scope_decl_context_io> scope_decl_item_attribute
%type<Ast.scope_decl_context_item> scope_decl_item
%type<Ast.enum_decl_case> enum_decl_line
%type<Ast.code_item> code_item
%type<Ast.code_block> code
%type<Ast.code_block * string Marked.pos> metadata_block
%type<Ast.law_heading> law_heading
%type<string> law_text
%type<Ast.law_structure> source_file_item
%type<Ast.law_structure list> source_file
%start source_file
%%
let pos(x) ==
| x ; { Pos.from_lpos $loc }
let addpos(x) ==
| ~=x ; { x, Pos.from_lpos $loc(x) }
let typ_base :=
| INTEGER ; { Integer }
| BOOLEAN ; { Boolean }
| MONEY ; { Money }
| DURATION ; { Duration }
| TEXT ; { Text }
| DECIMAL ; { Decimal }
| DATE ; { Date }
| c = quident ; { let path, uid = c in Named (path, uid) }
2020-04-14 12:46:48 +03:00
let typ :=
| t = typ_base ; <Primitive>
| COLLECTION ; t = addpos(typ) ; <Collection>
2020-03-08 07:01:26 +03:00
let uident ==
| ~ = addpos(UIDENT) ; <>
let lident :=
| i = LIDENT ; {
match Localisation.lex_builtin i with
| Some _ ->
Errors.raise_spanned_error
(Pos.from_lpos $sloc)
"Reserved builtin name"
| None ->
(i, Pos.from_lpos $sloc)
}
let scope_var ==
| b = separated_nonempty_list(DOT, addpos(LIDENT)) ; <>
let quident :=
| uid = uident ; DOT ; quid = quident ; {
let path, quid = quid in uid :: path, quid
}
| id = uident ; { [], id }
let qlident :=
| uid = uident ; DOT ; qlid = qlident ; {
let path, lid = qlid in uid :: path, lid
}
| id = lident ; { [], id }
let expression :=
| e = addpos(naked_expression) ; <>
let naked_expression ==
| id = addpos(LIDENT) ; {
match Localisation.lex_builtin (Marked.unmark id) with
| Some b -> Builtin b
| None -> Ident ([], id)
}
| uid = uident ; DOT ; qlid = qlident ; {
let path, lid = qlid in Ident (uid :: path, lid)
}
| l = literal ; {
Literal l
}
| LPAREN ; e = expression ; RPAREN ; <Paren>
| e = expression ;
DOT ; i = addpos(qlident) ; <Dotted>
| CARDINAL ; {
Builtin Cardinal
}
| DECIMAL ; {
Builtin ToDecimal
}
| MONEY ; {
Builtin ToMoney
Make scopes directly callable Quite a few changes are included here, some of which have some extra implications visible in the language: - adds the `Scope of { -- input_v: value; ... }` construct in the language - handle it down the pipeline: * `ScopeCall` in the surface AST * `EScopeCall` in desugared and scopelang * expressions are now traversed to detect dependencies between scopes * transformed into a normal function call in dcalc - defining a scope now implicitely defines a structure with the same name, with the output variables of the scope defined as fields. This allows us to type the return value from a scope call and access its fields easily. * the implications are mostly in surface/name_resolution.ml code-wise * the `Scope_out` struct that was defined in scope_to_dcalc is no longer needed/used and the fields are no longer renamed (changes some outputs; the explicit suffix for variables with multiple states is ignored as well) * one benefit is that disambiguation works just like for structures when there are conflicts on field names * however, it's now a conflict if a scope and a structure have the same name (side-note: issues with conflicting enum / struct names or scope variables / subscope names were silent and are now properly reported) - you can consequently use scope names as types for variables as well. Writing literals is not allowed though, they can only be obtained by calling the scope. Remaining TODOs: - context variables are not handled properly at the moment - error handling on invalid calls - tests show a small error message regression; lots of examples will need tweaking to avoid scope/struct name or struct fields / output variable conflicts - add a `->` syntax to make struct field access distinct from scope output var access, enforced with typing. This is expected to reduce confusion of users and add a little typing precision. - document the new syntax & implications (tutorial, cheat-sheet) - a consequence of the changes is that subscope variables also can now be typed. A possible future evolution / simplification would be to rewrite subscopes as explicit scope calls early in the pipeline. That could also allow to manipulate them as expressions (bind them in let-ins, return them...)
2022-10-21 16:47:17 +03:00
}
| LBRACKET ; l = separated_list(SEMICOLON, expression) ; RBRACKET ;
<ArrayLit>
| e = struct_or_enum_inject ; <>
| e1 = expression ;
OF ;
e2 = expression ; {
FunCall (e1, e2)
} %prec apply
| OUTPUT ; OF ;
c = addpos(quident) ;
fields = option(scope_call_args) ; {
let fields = Option.value ~default:[] fields in
ScopeCall (c, fields)
2020-04-14 20:13:20 +03:00
}
| e = expression ;
WITH ; c = constructor_binding ; {
TestMatchCase (e, (c, Pos.from_lpos $sloc))
}
| e1 = expression ;
CONTAINS ;
e2 = expression ; {
MemCollection (e2, e1)
} %prec apply
| SUM ; typ = addpos(typ_base) ;
OF ; coll = expression ; {
CollectionOp (AggregateSum { typ = Marked.unmark typ }, coll)
} %prec apply
| f = expression ;
FOR ; i = lident ;
AMONG ; coll = expression ; {
CollectionOp (Map {f = i, f}, coll)
} %prec apply
| max = minmax ;
OF ; coll = expression ;
OR ; IF ; COLLECTION ; EMPTY ; THEN ;
default = expression ; {
CollectionOp (AggregateExtremum { max; default }, coll)
} %prec apply
| op = addpos(unop) ; e = expression ; {
Unop (op, e)
} %prec unop_expr
| e1 = expression ;
binop = addpos(binop) ;
e2 = expression ; {
Binop (binop, e1, e2)
}
| EXISTS ; i = lident ;
AMONG ; coll = expression ;
SUCH ; THAT ; predicate = expression ; {
CollectionOp (Exists {predicate = i, predicate}, coll)
} %prec let_expr
| FOR ; ALL ; i = lident ;
AMONG ; coll = expression ;
WE_HAVE ; predicate = expression ; {
CollectionOp (Forall {predicate = i, predicate}, coll)
} %prec let_expr
| MATCH ; e = expression ;
WITH ;
arms = addpos(nonempty_list(addpos(preceded(ALT, match_arm)))) ; {
MatchWith (e, arms)
}
| IF ; e1 = expression ;
THEN ; e2 = expression ;
ELSE ; e3 = expression ; {
IfThenElse (e1, e2, e3)
} %prec let_expr
| LET ; id = lident ;
DEFINED_AS ; e1 = expression ;
IN ; e2 = expression ; {
LetIn (id, e1, e2)
} %prec let_expr
| i = lident ;
AMONG ; coll = expression ;
SUCH ; THAT ; f = expression ; {
CollectionOp (Filter {f = i, f}, coll)
} %prec top_expr
| fmap = expression ;
FOR ; i = lident ;
AMONG ; coll = expression ;
SUCH ; THAT ; ffilt = expression ; {
CollectionOp (Map {f = i, fmap}, (CollectionOp (Filter {f = i, ffilt}, coll), Pos.from_lpos $loc))
} %prec top_expr
| i = lident ;
AMONG ; coll = expression ;
SUCH ; THAT ; f = expression ;
IS ; max = minmax ;
OR ; IF ; COLLECTION ; EMPTY ; THEN ; default = expression ; {
CollectionOp (AggregateArgExtremum { max; default; f = i, f }, coll)
} %prec top_expr
let struct_content_field :=
| field = lident ; COLON ; e = expression ; <>
let struct_or_enum_inject ==
| uid = addpos(quident) ;
data = option(preceded(CONTENT,expression)) ; {
EnumInject(uid, data)
}
| c = addpos(quident) ;
LBRACE ;
fields = nonempty_list(preceded(ALT, struct_content_field)) ;
RBRACE ; {
StructLit(c, fields)
}
let num_literal ==
| d = INT_LITERAL ; <Int>
| d = DECIMAL_LITERAL ; {
let (d1, d2) = d in Dec (d1, d2)
}
2020-04-14 20:13:20 +03:00
let unit_literal ==
| PERCENT ; { Percent }
| YEAR ; { Year}
| MONTH ; { Month }
| DAY ; { Day }
2020-04-11 19:16:15 +03:00
let literal :=
| l = addpos(num_literal); u = option(addpos(unit_literal)) ; <LNumber>
| money = MONEY_AMOUNT ; {
let (units, cents) = money in
LMoneyAmount {
money_amount_units = units;
money_amount_cents = cents;
}
}
| BAR ; d = DATE_LITERAL ; BAR ; {
2022-07-21 15:14:22 +03:00
let (y,m,d) = d in
LDate {
literal_date_year = y;
literal_date_month = m;
literal_date_day = d;
}
}
| TRUE ; { LBool true }
| FALSE ; { LBool false }
let scope_call_args ==
| WITH_V ;
LBRACE ;
fields = list(preceded (ALT, struct_content_field)) ;
RBRACE ; {
fields
}
let minmax ==
| MAXIMUM ; { true }
| MINIMUM ; { false }
2022-12-12 18:02:07 +03:00
let unop ==
| NOT ; { Not }
| k = MINUS ; <Minus>
let binop ==
| k = MULT ; <Mult>
| k = DIV ; <Div>
| k = PLUS ; <Add>
| k = MINUS ; <Sub>
| PLUSPLUS ; { Concat }
| k = LESSER ; <Lt>
| k = LESSER_EQUAL ; <Lte>
| k = GREATER ; <Gt>
| k = GREATER_EQUAL ; <Gte>
| EQUAL ; { Eq }
| NOT_EQUAL ; { Neq }
| AND ; { And }
| OR ; { Or }
| XOR ; { Xor }
let constructor_binding :=
| uid = addpos(quident) ; OF ; lid = lident ; {
([uid], Some lid)
}
| uid = addpos(quident) ; {
([uid], None)
} %prec apply
2020-03-08 08:06:32 +03:00
let match_arm :=
| WILDCARD ; COLON ; ~ = expression ; <WildCard>
%prec ALT
| pat = addpos(constructor_binding) ;
COLON ; e = expression ; {
MatchCase {
match_case_pattern = pat;
match_case_expr = e;
}
} %prec ALT
let condition_consequence :=
| UNDER_CONDITION ; c = expression ; CONSEQUENCE ; <>
let rule_expr :=
| i = addpos(scope_var) ; p = option(definition_parameters) ; <>
2020-04-14 20:16:40 +03:00
let rule_consequence :=
| flag = option(NOT); FILLED ; {
None = flag
}
2020-08-07 11:57:57 +03:00
let rule :=
| label = option(label) ;
except = option(exception_to) ;
RULE ;
name_and_param = rule_expr ;
cond = option(condition_consequence) ;
state = option(state) ;
consequence = addpos(rule_consequence) ; {
let (name, param_applied) = name_and_param in
let cons : bool Marked.pos = consequence in
let rule_exception = match except with
| None -> NotAnException
| Some x -> x
in
{
rule_label = label;
rule_exception_to = rule_exception;
rule_parameter = param_applied;
rule_condition = cond;
rule_name = name;
rule_id = Shared_ast.RuleName.fresh
(String.concat "." (List.map (fun i -> Marked.unmark i) (Marked.unmark name)),
Pos.from_lpos $sloc);
rule_consequence = cons;
rule_state = state;
}
}
let definition_parameters :=
| OF ; i = lident ; <>
let label :=
| LABEL ; i = lident ; <>
let state :=
| STATE ; s = lident ; <>
let exception_to :=
| EXCEPTION ; i = option(lident) ; {
match i with
| None -> UnlabeledException
| Some x -> ExceptionToLabel x
}
let definition :=
| label = option(label);
except = option(exception_to) ;
DEFINITION ;
name = addpos(scope_var) ;
param = option(definition_parameters) ;
state = option(state) ;
cond = option(condition_consequence) ;
DEFINED_AS ;
e = expression ; {
let def_exception = match except with
| None -> NotAnException
| Some x -> x
in
{
definition_label = label;
definition_exception_to = def_exception;
definition_name = name;
definition_parameter = param;
definition_condition = cond;
definition_id =
Shared_ast.RuleName.fresh
(String.concat "." (List.map (fun i -> Marked.unmark i) (Marked.unmark name)),
Pos.from_lpos $sloc);
definition_expr = e;
definition_state = state;
}
}
let variation_type :=
| INCREASING ; { Increasing }
| DECREASING ; { Decreasing }
let assertion :=
| cond = option(condition_consequence) ;
base = expression ; {
(Assertion {
assertion_condition = cond;
assertion_content = base;
})
}
| FIXED ; q = addpos(scope_var) ; BY ; i = lident ; {
MetaAssertion (FixedBy (q, i))
}
| VARIES ; q = addpos(scope_var) ;
WITH_V ; e = expression ;
t = option(addpos(variation_type)) ; {
MetaAssertion (VariesWith (q, e, t))
}
2020-03-08 08:06:32 +03:00
let scope_item :=
| r = rule ; <Rule>
| d = definition ; <Definition>
| ASSERTION ; contents = assertion ; <>
2020-04-14 12:46:48 +03:00
let struct_scope_base :=
| DATA ; i = lident ;
CONTENT ; t = addpos(typ) ; {
let t, pos = t in
(i, (Data t, pos))
}
| pos = pos(CONDITION) ; i = lident ; {
(i, (Condition, pos))
}
2020-04-03 23:58:34 +03:00
let struct_scope_func ==
| DEPENDS ; t = addpos(typ) ; <>
2020-04-03 23:58:34 +03:00
let struct_scope :=
| name_and_typ = struct_scope_base ;
func_typ = option(struct_scope_func) ; {
let (name, typ) = name_and_typ in
let (typ, typ_pos) = typ in
{
struct_decl_field_name = name;
2020-04-14 13:34:09 +03:00
struct_decl_field_typ = match func_typ with
| None -> (Base typ, typ_pos)
| Some (arg_typ, arg_pos) ->
Func {
arg_typ = (Data arg_typ, arg_pos);
return_typ = (typ, typ_pos);
}, Pos.from_lpos $sloc ;
}
}
2020-04-03 23:58:34 +03:00
let scope_decl_item_attribute_input :=
| CONTEXT ; { Context }
| INPUT ; { Input }
let scope_decl_item_attribute_output :=
| OUTPUT ; { true }
| { false }
let scope_decl_item_attribute :=
| input = addpos(scope_decl_item_attribute_input) ;
output = addpos(scope_decl_item_attribute_output) ; {
{
scope_decl_context_io_input = input;
scope_decl_context_io_output = output
}
}
| INTERNAL ; {
{
scope_decl_context_io_input = (Internal, Pos.from_lpos $sloc);
scope_decl_context_io_output = (false, Pos.from_lpos $sloc)
}
}
| OUTPUT ; {
{
scope_decl_context_io_input = (Internal, Pos.from_lpos $sloc);
scope_decl_context_io_output = (true, Pos.from_lpos $sloc)
}
}
let scope_decl_item :=
| attr = scope_decl_item_attribute ;
i = lident ;
CONTENT ; t = addpos(typ) ;
func_typ = option(struct_scope_func) ;
states = list(state) ; {
ContextData {
scope_decl_context_item_name = i;
scope_decl_context_item_attribute = attr;
2020-05-14 22:19:46 +03:00
scope_decl_context_item_typ =
2022-02-28 16:33:07 +03:00
(let (typ, typ_pos) = t in
match func_typ with
| None -> (Base (Data typ), typ_pos)
| Some (arg_typ, arg_pos) ->
Func {
arg_typ = (Data arg_typ, arg_pos);
return_typ = (Data typ, typ_pos);
}, Pos.from_lpos $sloc);
2022-02-28 16:33:07 +03:00
scope_decl_context_item_states = states;
}
}
| i = lident ; SCOPE ; c = uident ; {
ContextScope{
scope_decl_context_scope_name = i;
scope_decl_context_scope_sub_scope = c;
scope_decl_context_scope_attribute = {
scope_decl_context_io_input = (Internal, Pos.from_lpos $sloc);
scope_decl_context_io_output = (false, Pos.from_lpos $sloc);
};
}
}
| attr = scope_decl_item_attribute ;
i = lident ;
pos_condition = pos(CONDITION) ;
func_typ = option(struct_scope_func) ;
states = list(state) ; {
ContextData {
scope_decl_context_item_name = i;
scope_decl_context_item_attribute = attr;
scope_decl_context_item_typ =
2022-02-28 16:33:07 +03:00
(match func_typ with
| None -> (Base (Condition), pos_condition)
| Some (arg_typ, arg_pos) ->
Func {
arg_typ = (Data arg_typ, arg_pos);
return_typ = (Condition, pos_condition);
}, Pos.from_lpos $sloc);
2022-02-28 16:33:07 +03:00
scope_decl_context_item_states = states;
}
}
2020-04-03 23:58:34 +03:00
let enum_decl_line :=
| ALT ; c = uident ;
t = option(preceded(CONTENT,addpos(typ))) ; {
{
enum_decl_case_name = c;
enum_decl_case_typ =
Option.map (fun (t, t_pos) -> Base (Data t), t_pos) t;
}
}
2020-04-14 12:46:48 +03:00
let code_item :=
| SCOPE ; c = uident ;
e = option(preceded(UNDER_CONDITION,expression)) ;
COLON ; items = nonempty_list(addpos(scope_item)) ; {
ScopeUse {
scope_use_name = c;
scope_use_condition = e;
2020-05-14 22:19:46 +03:00
scope_use_items = items;
}
}
| DECLARATION ; STRUCT ; c = uident ;
COLON ; scopes = list(addpos(struct_scope)) ; {
StructDecl {
struct_decl_name = c;
2020-05-14 22:19:46 +03:00
struct_decl_fields = scopes;
}
}
| DECLARATION ; SCOPE ; c = uident ;
COLON ; context = nonempty_list(addpos(scope_decl_item)) ; {
ScopeDecl {
scope_decl_name = c;
scope_decl_context = context;
}
}
| DECLARATION ; ENUM ; c = uident ;
COLON ; cases = list(addpos(enum_decl_line)) ; {
EnumDecl {
enum_decl_name = c;
2020-04-14 13:34:09 +03:00
enum_decl_cases = cases;
}
}
let code :=
| code = list(addpos(code_item)) ; <>
let metadata_block :=
| BEGIN_METADATA ; option(law_text) ;
~ = code ;
text = END_CODE ; {
(code, (text, Pos.from_lpos $sloc))
}
let law_heading :=
| title = LAW_HEADING ; {
2022-09-07 18:14:22 +03:00
let (title, id, is_archive, precedence) = title in {
law_heading_name = (title, Pos.from_lpos $sloc);
law_heading_id = id;
2022-09-07 18:14:22 +03:00
law_heading_is_archive = is_archive;
law_heading_precedence = precedence;
}
}
let law_text :=
| lines = nonempty_list(LAW_TEXT) ; { String.trim (String.concat "" lines) }
let source_file_item :=
| text = law_text ; { LawText text }
| BEGIN_CODE ;
~ = code ;
text = END_CODE ; {
CodeBlock (code, (text, Pos.from_lpos $sloc), false)
}
| heading = law_heading ; {
LawHeading (heading, [])
}
| code = metadata_block ; {
let (code, source_repr) = code in
CodeBlock (code, source_repr, true)
}
| BEGIN_DIRECTIVE ; LAW_INCLUDE ; COLON ;
args = nonempty_list(DIRECTIVE_ARG) ;
page = option(AT_PAGE) ;
END_DIRECTIVE ; {
let filename = String.trim (String.concat "" args) in
let pos = Pos.from_lpos $sloc in
2023-01-04 18:29:21 +03:00
let jorftext = Re.Pcre.regexp "(JORFARTI\\d{12}|LEGIARTI\\d{12}|CETATEXT\\d{12})" in
if Re.Pcre.pmatch ~rex:jorftext filename && page = None then
LawInclude (Ast.LegislativeText (filename, pos))
else if Filename.extension filename = ".pdf" || page <> None then
LawInclude (Ast.PdfFile ((filename, pos), page))
else
LawInclude (Ast.CatalaFile (filename, pos))
}
let source_file :=
| hd = source_file_item ; tl = source_file ; { hd::tl }
| EOF ; { [] }