catala/src/lawspec/parsing/parser.mly

462 lines
12 KiB
OCaml
Raw Normal View History

(*
This file is part of the Lawspec compiler, a specification language for tax and social benefits
computation rules.
2020-04-15 16:33:21 +03:00
Copyright (C) 2020 Inria, contributor: Denis Merigoux <denis.merigoux@inria.fr>
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*)
%{
2020-03-08 03:52:31 +03:00
open Ast
open Parse_utils
%}
%token EOF
2020-03-08 03:52:31 +03:00
%token<string> LAW_ARTICLE
%token<string> LAW_CODE
%token<string> LAW_TEXT
2020-03-08 06:28:45 +03:00
%token<string> CONSTRUCTOR IDENT
%token<string> END_CODE
2020-03-08 07:27:46 +03:00
%token<int> INT_LITERAL
2020-04-11 19:16:15 +03:00
%token<int * int> DECIMAL_LITERAL
2020-04-03 23:58:34 +03:00
%token BEGIN_CODE
%token COLON ALT DATA
%token OF INTEGER COLLECTION
%token RULE CONDITION DEFINED_AS
2020-03-08 07:12:12 +03:00
%token EXISTS IN SUCH THAT NOW LESSER GREATER
2020-04-03 23:34:11 +03:00
%token DOT AND OR LPAREN RPAREN OPTIONAL EQUAL
2020-04-14 20:13:20 +03:00
%token CARDINAL LESSER_EQUAL GREATER_EQUAL
2020-04-03 23:58:34 +03:00
%token ASSERTION FIXED BY YEAR
2020-03-08 08:06:32 +03:00
%token PLUS MINUS MULT DIV MATCH WITH VARIES_WITH
2020-04-11 19:16:15 +03:00
%token FOR ALL WE_HAVE INCREASING DECREASING
%token NOT BOOLEAN PERCENT ARROW
2020-04-14 20:13:20 +03:00
%token FIELD FILLED EURO NOT_EQUAL DEFINITION
2020-04-03 23:58:34 +03:00
%token STRUCT CONTENT IF THEN DEPENDS DECLARATION
2020-04-11 19:16:15 +03:00
%token CONTEXT INCLUDES ENUM ELSE DATE SUM
%token BEGIN_METADATA END_METADATA MONEY DECIMAL
%token UNDER_CONDITION CONSEQUENCE
2020-04-03 23:34:11 +03:00
%type <Ast.source_file> source_file
%start source_file
%%
2020-04-03 23:58:34 +03:00
typ_base:
2020-04-14 12:46:48 +03:00
| INTEGER { (Integer, mk_position $sloc) }
| BOOLEAN { (Boolean, mk_position $sloc) }
| MONEY { (Money, mk_position $sloc) }
| DECIMAL { (Decimal, mk_position $sloc) }
| DATE { (Date, mk_position $sloc) }
| c = constructor {
let (s, _) = c in
(Named s, mk_position $sloc)
}
collection_marked:
| COLLECTION { mk_position $sloc }
optional_marked:
| OPTIONAL { mk_position $sloc }
2020-03-08 07:01:26 +03:00
2020-04-03 23:58:34 +03:00
typ:
2020-04-14 12:46:48 +03:00
| collection = option(collection_marked) t = typ_base optional = option(optional_marked) {
(Data {
typ_data_collection = collection;
typ_data_optional = optional;
typ_data_base = t;
}, mk_position $sloc)
}
2020-03-08 07:01:26 +03:00
qident:
2020-04-14 18:58:36 +03:00
| i = ident { let (i, i_pos) = i in ([Ident i, i_pos], mk_position $sloc) }
| i = ident DOT q = qident {
let (i, i_pos) = i in
let (q, _) = q in
((Ident i, i_pos)::q, mk_position $sloc)
}
| c = constructor DOT q = qident {
let (c, c_pos) = c in
let (q, _) = q in
((Constructor c, c_pos)::q, mk_position $sloc)
}
atomic_expression:
2020-04-14 20:13:20 +03:00
| q = qident { let (q, q_pos) = q in (Qident q, q_pos) }
| l = literal { let (l, l_pos) = l in (Literal l, l_pos) }
| LPAREN e = expression RPAREN { e }
small_expression:
2020-04-14 20:13:20 +03:00
| e = atomic_expression { e }
| e = small_expression ARROW c = constructor {
(Project (e, c), mk_position $sloc)
}
constructor_payload:
2020-04-14 20:13:20 +03:00
| CONTENT e = small_expression { e }
2020-03-08 07:01:26 +03:00
primitive_expression:
2020-04-14 20:13:20 +03:00
| e = small_expression { e }
| NOW { (Builtin Now, mk_position $sloc) }
| CARDINAL {
(Builtin Cardinal, mk_position $sloc)
}
| c = constructor p = option(constructor_payload) {
(Inject (c, p), mk_position $sloc)
}
2020-03-08 08:06:32 +03:00
2020-04-11 19:16:15 +03:00
num_literal:
2020-04-14 20:13:20 +03:00
| d = INT_LITERAL { (Int d, mk_position $sloc) }
| d = DECIMAL_LITERAL {
let (d1, d2) = d in
(Dec (d1, d2), mk_position $sloc)
}
unit_literal:
| PERCENT { (Percent, mk_position $sloc) }
| EURO { (Euro, mk_position $sloc) }
| YEAR { (Year, mk_position $sloc)}
2020-04-11 19:16:15 +03:00
literal:
2020-04-14 20:13:20 +03:00
| l = num_literal u = option(unit_literal) {
((l, u), mk_position $sloc)
}
2020-03-08 07:27:46 +03:00
2020-04-14 20:13:20 +03:00
compare_op:
| LESSER { (Lt, mk_position $sloc) }
| LESSER_EQUAL { (Lte, mk_position $sloc) }
| GREATER { (Gt, mk_position $sloc) }
| GREATER_EQUAL { (Gte, mk_position $sloc) }
| EQUAL { (Eq, mk_position $sloc) }
| NOT_EQUAL { (Neq, mk_position $sloc) }
2020-03-08 07:01:26 +03:00
2020-04-11 23:55:43 +03:00
aggregate_func:
2020-04-14 20:13:20 +03:00
| SUM { (AggregateSum, mk_position $sloc) }
| CARDINAL { (AggregateCount, mk_position $sloc)}
2020-04-11 23:55:43 +03:00
aggregate:
2020-04-14 20:13:20 +03:00
| func = aggregate_func FOR i = ident IN e1 = primitive_expression
OF e2 = base_expression {
(Aggregate (func, i, e1, e2), mk_position $sloc)
}
2020-04-11 23:55:43 +03:00
2020-03-08 07:12:12 +03:00
base_expression:
2020-04-14 20:13:20 +03:00
| e = primitive_expression { e }
| ag = aggregate { ag }
| e1 = primitive_expression OF e2 = base_expression {
(FunCall (e1, e2), mk_position $sloc)
}
| e = primitive_expression WITH c= constructor {
(TestMatchCase (e, c), mk_position $sloc)
}
| e1 = primitive_expression IN e2 = base_expression {
(MemCollection (e1, e2), mk_position $sloc)
}
2020-03-08 07:12:12 +03:00
2020-03-08 08:06:32 +03:00
mult_op:
2020-04-14 20:13:20 +03:00
| MULT { (Mult, mk_position $sloc) }
| DIV { (Div, mk_position $sloc) }
2020-03-08 08:06:32 +03:00
mult_expression:
2020-04-14 20:13:20 +03:00
| base_expression { (Foo (), mk_position $sloc) }
| base_expression mult_op mult_expression { (Foo (), mk_position $sloc) }
2020-03-08 08:06:32 +03:00
sum_op:
2020-04-14 20:13:20 +03:00
| PLUS { (Add, mk_position $sloc) }
| MINUS { (Sub, mk_position $sloc) }
sum_unop:
| MINUS { (Minus, mk_position $sloc) }
2020-03-08 08:06:32 +03:00
sum_expression:
2020-04-14 20:13:20 +03:00
| e = mult_expression { e }
| e1 = mult_expression binop = sum_op e2 = sum_expression {
(Binop (binop, e1, e2), mk_position $sloc)
}
| unop = sum_unop e = sum_expression { (Unop (unop, e), mk_position $sloc) }
2020-03-08 07:12:12 +03:00
logical_op:
2020-04-14 20:13:20 +03:00
| AND { (And, mk_position $sloc) }
| OR { (Or, mk_position $sloc) }
2020-03-08 07:01:26 +03:00
2020-03-08 08:49:29 +03:00
logical_unop:
2020-04-14 20:13:20 +03:00
| NOT { (Not, mk_position $sloc) }
2020-03-08 08:49:29 +03:00
2020-03-08 07:01:26 +03:00
compare_expression:
2020-04-14 20:13:20 +03:00
| e = sum_expression { e }
| e1 = sum_expression binop = compare_op e2 = compare_expression {
(Binop (binop, e1, e2), mk_position $sloc)
}
2020-03-08 07:01:26 +03:00
2020-03-08 07:12:12 +03:00
logical_expression:
2020-04-14 20:13:20 +03:00
| e = compare_expression { e }
| unop = logical_unop e = compare_expression { (Unop (unop, e), mk_position $sloc) }
| e1 = compare_expression binop = logical_op e2 = logical_expression {
(Binop (binop, e1, e2), mk_position $sloc)
}
2020-03-08 07:01:26 +03:00
2020-03-08 08:06:32 +03:00
optional_binding:
2020-04-14 20:13:20 +03:00
| { ([], None)}
| OF i = ident {([], Some i)}
| OF c = constructor cs_and_i = constructor_binding {
let (cs, i) = cs_and_i in
(c::cs, i)
}
2020-03-08 08:49:29 +03:00
constructor_binding:
2020-04-14 20:13:20 +03:00
| c = constructor cs_and_i = optional_binding {
let (cs, i) = cs_and_i in
(c::cs, i)
}
2020-03-08 08:06:32 +03:00
match_arm:
2020-04-14 20:13:20 +03:00
| pat = constructor_binding COLON e = logical_expression {
({
(* DM 14/04/2020 : I can't have the $sloc in constructor_binding... *)
match_case_pattern = (pat, mk_position $sloc);
match_case_expr = e;
}, mk_position $sloc)
}
2020-03-08 08:06:32 +03:00
match_arms:
2020-04-14 20:13:20 +03:00
| ALT a = match_arm arms = match_arms {
let (arms, _) = arms in
(a::arms, mk_position $sloc)
}
| { ([], mk_position $sloc)}
2020-03-08 08:06:32 +03:00
forall_prefix:
2020-04-14 20:13:20 +03:00
| FOR ALL i = ident IN e = primitive_expression WE_HAVE {
(i, e)
}
exists_prefix:
| EXISTS i = ident IN e = primitive_expression SUCH THAT {
(i, e)
}
2020-03-08 07:01:26 +03:00
expression:
2020-04-14 20:13:20 +03:00
| i_in_e1 = exists_prefix e2 = expression {
let (i,e1) = i_in_e1 in
(Exists (i, e1, e2), mk_position $sloc)
}
| i_in_e1 = forall_prefix e2 = expression {
let (i,e1) = i_in_e1 in
(Forall (i, e1, e2), mk_position $sloc)
}
| MATCH e = primitive_expression WITH arms = match_arms {
(MatchWith (e, arms), mk_position $sloc)
}
| IF e1 = expression THEN e2 = expression ELSE e3 = base_expression {
(IfThenElse (e1, e2, e3), mk_position $sloc)
}
| e = logical_expression { e }
2020-03-08 07:01:26 +03:00
condition:
2020-04-14 18:58:36 +03:00
| UNDER_CONDITION e = expression { e }
2020-03-08 07:01:26 +03:00
2020-04-07 14:59:52 +03:00
condition_consequence:
2020-04-14 18:58:36 +03:00
| cond = condition CONSEQUENCE { cond }
2020-04-14 20:16:40 +03:00
rule_expr:
| i = qident p = option(definition_parameters) { (i, p) }
2020-04-03 23:34:11 +03:00
rule:
2020-04-15 16:24:30 +03:00
| name_and_param = rule_expr cond = option(condition_consequence)
FILLED {
2020-04-14 20:16:40 +03:00
let (name, param_applied) = name_and_param in
2020-04-14 20:13:20 +03:00
({
2020-04-15 16:24:30 +03:00
rule_parameter = param_applied;
2020-04-14 20:13:20 +03:00
rule_condition = cond;
2020-04-14 20:16:40 +03:00
rule_name = name;
2020-04-14 20:13:20 +03:00
}, mk_position $sloc)
}
2020-03-08 08:49:29 +03:00
2020-04-03 23:34:11 +03:00
definition_parameters:
2020-04-14 20:13:20 +03:00
| OF i = ident { i }
2020-04-03 23:34:11 +03:00
definition:
2020-04-14 20:13:20 +03:00
| name = qident param = option(definition_parameters)
cond = option(condition_consequence) DEFINED_AS e = expression {
({
definition_name = name;
definition_parameter = param;
definition_condition = cond;
definition_expr = e;
}, mk_position $sloc)
}
2020-03-08 06:28:45 +03:00
2020-03-08 08:06:32 +03:00
variation_type:
2020-04-14 18:58:36 +03:00
| INCREASING { (Increasing, mk_position $sloc) }
| DECREASING { (Decreasing, mk_position $sloc) }
2020-03-08 08:06:32 +03:00
2020-04-10 19:46:06 +03:00
assertion_base:
2020-04-14 18:58:36 +03:00
| e = expression { let (e, _) = e in (Assert e, mk_position $sloc) }
| q = qident FIXED BY i = ident { (FixedBy (q, i), mk_position $sloc) }
| q = qident VARIES_WITH e = base_expression t = option(variation_type) {
(VariesWith (q, e, t), mk_position $sloc)
}
2020-04-10 19:46:06 +03:00
assertion:
2020-04-14 18:58:36 +03:00
| cond = option(condition_consequence) base = assertion_base {
(cond, base)
}
2020-03-08 08:06:32 +03:00
2020-04-03 23:34:11 +03:00
application_field_item:
2020-04-14 20:13:20 +03:00
| RULE r = rule {
let (r, _) = r in (Rule r, mk_position $sloc)
}
| DEFINITION d = definition {
let (d, _) = d in (Definition d, mk_position $sloc)
}
2020-04-14 18:58:36 +03:00
| ASSERTION contents = assertion {
(let (cond, cont) = contents in Assertion {
assertion_condition = cond;
assertion_content = cont;
}, mk_position $sloc)
}
2020-03-08 06:28:45 +03:00
2020-04-14 12:46:48 +03:00
ident:
| i = IDENT { (i, mk_position $sloc) }
condition_pos:
| CONDITION { mk_position $sloc }
2020-04-03 23:58:34 +03:00
struct_field_base:
2020-04-14 12:46:48 +03:00
| DATA i= ident CONTENT t = typ {
(i, t)
}
| pos = condition_pos i = ident {
(i, (Condition, pos))
}
2020-04-03 23:58:34 +03:00
struct_field_func:
2020-04-14 13:34:09 +03:00
| DEPENDS OF t = typ { t }
2020-04-03 23:58:34 +03:00
struct_field:
2020-04-14 13:34:09 +03:00
| name_and_typ = struct_field_base func_typ = option(struct_field_func) {
2020-04-14 12:46:48 +03:00
let (name, typ) = name_and_typ in
2020-04-14 13:34:09 +03:00
let (typ, typ_pos) = typ in
2020-04-14 12:46:48 +03:00
({
struct_decl_field_name = name;
2020-04-14 13:34:09 +03:00
struct_decl_field_typ = match func_typ with
| None -> (Base typ, typ_pos)
| Some (return_typ, return_pos) -> (Func {
arg_typ = (typ, typ_pos);
return_typ = (return_typ, return_pos);
}, mk_position $sloc) ;
2020-04-14 12:46:48 +03:00
}, mk_position $sloc)
}
2020-04-03 23:58:34 +03:00
field_decl_item:
| CONTEXT i = ident CONTENT t = typ func_typ = option(struct_field_func) { ({
field_decl_context_item_name = i;
field_decl_context_item_typ =
let (typ, typ_pos) = t in
match func_typ with
| None -> (Base typ, typ_pos)
| Some (return_typ, return_pos) -> (Func {
arg_typ = (typ, typ_pos);
return_typ = (return_typ, return_pos);
}, mk_position $sloc);
}, mk_position $sloc) }
2020-04-03 23:58:34 +03:00
field_decl_include:
| c1 = constructor DOT i1 = ident EQUAL c2 = constructor DOT i2 = ident {
({
parent_field_name = c1;
parent_field_context_item = i1 ;
sub_field_name = c2;
sub_field_context_item = i2;
}, mk_position $sloc)
}
2020-04-07 14:59:52 +03:00
field_decl_includes_context:
| CONTEXT join = nonempty_list(field_decl_include) { join }
2020-04-03 23:58:34 +03:00
field_decl_includes:
| INCLUDES FIELD c = constructor context = option(field_decl_includes_context) {
({
field_decl_include_sub_field = c;
field_decl_include_joins = match context with
| None -> []
| Some context -> context
}, mk_position $sloc)
}
2020-04-03 23:58:34 +03:00
2020-04-10 13:55:18 +03:00
enum_decl_line_payload:
2020-04-14 13:34:09 +03:00
| CONTENT t = typ { let (t, t_pos) = t in (Base t, t_pos) }
2020-04-10 13:55:18 +03:00
enum_decl_line:
2020-04-14 13:34:09 +03:00
| ALT c = constructor t = option(enum_decl_line_payload) { ({
enum_decl_case_name = c;
enum_decl_case_typ = t;
}, mk_position $sloc) }
2020-04-14 12:46:48 +03:00
constructor:
| c = CONSTRUCTOR { (c, mk_position $sloc) }
2020-04-10 13:55:18 +03:00
2020-03-08 06:28:45 +03:00
code_item:
2020-04-14 18:58:36 +03:00
| FIELD c = constructor COLON items = nonempty_list(application_field_item) {
(FieldUse {
field_use_name = c;
field_use_items = items;
}, mk_position $sloc)
2020-04-14 12:46:48 +03:00
}
| DECLARATION STRUCT c = constructor COLON fields = list(struct_field) {
(StructDecl {
struct_decl_name = c;
struct_decl_fields = fields;
}, mk_position $sloc)
}
| DECLARATION FIELD c = constructor COLON context = nonempty_list(field_decl_item)
includes = list(field_decl_includes) {
(FieldDecl {
field_decl_name = c;
field_decl_context = context;
field_decl_includes = includes;
}, mk_position $sloc)
2020-04-14 12:46:48 +03:00
}
2020-04-14 13:34:09 +03:00
| DECLARATION ENUM c = constructor COLON cases = nonempty_list(enum_decl_line) {
(EnumDecl {
enum_decl_name = c;
enum_decl_cases = cases;
}, mk_position $sloc)
2020-04-14 12:01:31 +03:00
}
2020-03-08 06:28:45 +03:00
code:
2020-04-14 12:01:31 +03:00
| code = list(code_item) { (code, mk_position $sloc) }
2020-03-08 06:28:45 +03:00
2020-04-10 13:02:05 +03:00
metadata_block:
2020-04-14 12:01:31 +03:00
| BEGIN_CODE code_and_pos = code text = END_CODE END_METADATA {
let (code, pos) = code_and_pos in
(code, (text, pos))
}
2020-04-10 13:02:05 +03:00
source_file_item:
2020-03-08 03:52:31 +03:00
| title = LAW_ARTICLE { LawArticle title }
| code = LAW_CODE { LawCode code }
| text = LAW_TEXT { LawText text }
2020-04-14 12:01:31 +03:00
| BEGIN_METADATA code = metadata_block {
let (code, source_repr) = code in
MetadataBlock (code, source_repr)
}
| BEGIN_CODE code_and_pos = code text = END_CODE {
let (code, pos) = code_and_pos in
CodeBlock (code, (text, pos))
}
source_file:
2020-03-08 03:52:31 +03:00
| i = source_file_item f = source_file { i::f }
| EOF { [] }