Merge pull request #111 from AltGr/localised-builtins

Remove parser tokens for builtin functions
This commit is contained in:
Denis Merigoux 2021-05-04 17:53:03 +02:00 committed by GitHub
commit 144046b9c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 547 additions and 1458 deletions

View File

@ -91,9 +91,30 @@ You can look at the
[online OCaml documentation](https://catala-lang.org/ocaml_docs/) for the
different modules' interfaces as well as high-level architecture documentation.
Please note that the `ocamlformat` version the this project use is `0.18.0`.
Please note that the `ocamlformat` version this project uses is `0.18.0`.
Using another version may cause spurious diffs to appear in your pull requests.
### Example: adding a builtin function
The language provides a limited number of builtin functions, which are sometimes
needed for things that can't easily be expressed in Catala itself; in case you
need more, here is how one can be added:
- Choose a name wisely. Be ready to patch any code that already used the name
for scope parameters, variables or structure fields, since it won't compile
anymore.
- Add an element to the `builtin_expression` type in `surface/ast.ml(i)`
- Add your builtin in the `builtins` list in `surface/lexer.ml`, and with proper
translations in all of the language-specific modules `surface/lexer_en.ml`,
`surface/lexer_fr.ml`, etc.
- The rest can all be done by following the type errors downstream:
- Add a corresponding element to the lower-level AST in `dcalc/ast.ml(i)`, type `unop`
- Extend the translation accordingly in `surface/desugaring.ml`
- Extend the printer (`dcalc/print.ml`) and the typer with correct type
information (`dcalc/typing.ml`)
- Finally, provide the implementations:
* in `lcalc/to_ocaml.ml`, function `format_unop`
* in `dcalc/interpreter.ml`, function `evaluate_operator`
## Internationalization
The Catala language should be adapted to any legislative text that follows a

View File

@ -12,19 +12,31 @@
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
(** Abstract syntax tree built by the Catala parser *)
[@@@ocaml.warning "-7"]
open Utils
(** {1 Visitor classes for programs} *)
(** To allow for quick traversal and/or modification of this AST structure, we provide a
{{:https://en.wikipedia.org/wiki/Visitor_pattern} visitor design pattern}. This feature is
implemented via {{:https://gitlab.inria.fr/fpottier/visitors} François Pottier's OCaml visitors
library}. *)
(** {1 Type definitions} *)
type constructor = (string[@opaque])
[@@deriving
visitors { variety = "map"; name = "constructor_map"; nude = true },
visitors { variety = "iter"; name = "constructor_iter"; nude = true }]
(** Constructors are CamelCase *)
type ident = (string[@opaque])
[@@deriving
visitors { variety = "map"; name = "ident_map"; nude = true },
visitors { variety = "iter"; name = "ident_iter"; nude = true }]
(** Idents are snake_case *)
type qident = ident Pos.marked list
[@@deriving
@ -262,6 +274,7 @@ and expression =
| ArrayLit of expression Pos.marked list
| Ident of ident
| Dotted of expression Pos.marked * constructor Pos.marked option * ident Pos.marked
(** Dotted is for both struct field projection and sub-scope variables *)
[@@deriving
visitors
{

View File

@ -1,966 +0,0 @@
(* This file is part of the Catala compiler, a specification language for tax and social benefits
computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
<denis.merigoux@inria.fr>
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
(** Abstract syntax tree built by the Catala parser *)
open Utils
(** {1 Type definitions} *)
type constructor = string
(** Constructors are CamelCase *)
type ident = string
(** Idents are snake_case *)
type qident = ident Pos.marked list
type primitive_typ =
| Integer
| Decimal
| Boolean
| Money
| Duration
| Text
| Date
| Named of constructor
type base_typ_data = Primitive of primitive_typ | Collection of base_typ_data Pos.marked
type base_typ = Condition | Data of base_typ_data
type func_typ = { arg_typ : base_typ Pos.marked; return_typ : base_typ Pos.marked }
type typ = Base of base_typ | Func of func_typ
type struct_decl_field = {
struct_decl_field_name : ident Pos.marked;
struct_decl_field_typ : typ Pos.marked;
}
type struct_decl = {
struct_decl_name : constructor Pos.marked;
struct_decl_fields : struct_decl_field Pos.marked list;
}
type enum_decl_case = {
enum_decl_case_name : constructor Pos.marked;
enum_decl_case_typ : typ Pos.marked option;
}
type enum_decl = {
enum_decl_name : constructor Pos.marked;
enum_decl_cases : enum_decl_case Pos.marked list;
}
type match_case_pattern =
(constructor Pos.marked option * constructor Pos.marked) list * ident Pos.marked option
type op_kind =
| KInt (** No suffix *)
| KDec (** Suffix: [.] *)
| KMoney (** Suffix: [$] *)
| KDate (** Suffix: [@] *)
| KDuration (** Suffix: [^] *)
type binop =
| And
| Or
| Xor
| Add of op_kind
| Sub of op_kind
| Mult of op_kind
| Div of op_kind
| Lt of op_kind
| Lte of op_kind
| Gt of op_kind
| Gte of op_kind
| Eq
| Neq
type unop = Not | Minus of op_kind
type builtin_expression = Cardinal | IntToDec | GetDay | GetMonth | GetYear
type literal_date = {
literal_date_day : (int[@opaque]) Pos.marked;
literal_date_month : (int[@opaque]) Pos.marked;
literal_date_year : (int[@opaque]) Pos.marked;
}
type literal_number =
| Int of (Runtime.integer[@opaque])
| Dec of (Runtime.integer[@opaque]) * (Runtime.integer[@opaque])
type literal_unit = Percent | Year | Month | Day
type money_amount = {
money_amount_units : (Runtime.integer[@opaque]);
money_amount_cents : (Runtime.integer[@opaque]);
}
type literal =
| LNumber of literal_number Pos.marked * literal_unit Pos.marked option
| LBool of bool
| LMoneyAmount of money_amount
| LDate of literal_date
type aggregate_func =
| AggregateSum of primitive_typ
| AggregateCount
| AggregateExtremum of bool (* true if max *) * primitive_typ * expression Pos.marked
| AggregateArgExtremum of bool (* true if max *) * primitive_typ * expression Pos.marked
and collection_op = Exists | Forall | Aggregate of aggregate_func | Map | Filter
and match_case = {
match_case_pattern : match_case_pattern Pos.marked;
match_case_expr : expression Pos.marked;
}
and match_cases = match_case Pos.marked list
and expression =
| MatchWith of expression Pos.marked * match_cases Pos.marked
| IfThenElse of expression Pos.marked * expression Pos.marked * expression Pos.marked
| Binop of binop Pos.marked * expression Pos.marked * expression Pos.marked
| Unop of unop Pos.marked * expression Pos.marked
| CollectionOp of
collection_op Pos.marked * ident Pos.marked * expression Pos.marked * expression Pos.marked
| MemCollection of expression Pos.marked * expression Pos.marked
| TestMatchCase of expression Pos.marked * match_case_pattern Pos.marked
| FunCall of expression Pos.marked * expression Pos.marked
| Builtin of builtin_expression
| Literal of literal
| EnumInject of
constructor Pos.marked option * constructor Pos.marked * expression Pos.marked option
| StructLit of constructor Pos.marked * (ident Pos.marked * expression Pos.marked) list
| ArrayLit of expression Pos.marked list
| Ident of ident
| Dotted of expression Pos.marked * constructor Pos.marked option * ident Pos.marked
(** Dotted is for both struct field projection and sub-scope variables *)
type exception_to = NotAnException | UnlabeledException | ExceptionToLabel of ident Pos.marked
type rule = {
rule_label : ident Pos.marked option;
rule_exception_to : exception_to;
rule_parameter : ident Pos.marked option;
rule_condition : expression Pos.marked option;
rule_name : qident Pos.marked;
rule_consequence : (bool[@opaque]) Pos.marked;
}
type definition = {
definition_label : ident Pos.marked option;
definition_exception_to : exception_to;
definition_name : qident Pos.marked;
definition_parameter : ident Pos.marked option;
definition_condition : expression Pos.marked option;
definition_expr : expression Pos.marked;
}
type variation_typ = Increasing | Decreasing
type meta_assertion =
| FixedBy of qident Pos.marked * ident Pos.marked
| VariesWith of qident Pos.marked * expression Pos.marked * variation_typ Pos.marked option
type assertion = {
assertion_condition : expression Pos.marked option;
assertion_content : expression Pos.marked;
}
type scope_use_item =
| Rule of rule
| Definition of definition
| Assertion of assertion
| MetaAssertion of meta_assertion
type scope_use = {
scope_use_condition : expression Pos.marked option;
scope_use_name : constructor Pos.marked;
scope_use_items : scope_use_item Pos.marked list;
}
type scope_decl_context_scope = {
scope_decl_context_scope_name : ident Pos.marked;
scope_decl_context_scope_sub_scope : constructor Pos.marked;
}
type scope_decl_context_data = {
scope_decl_context_item_name : ident Pos.marked;
scope_decl_context_item_typ : typ Pos.marked;
}
type scope_decl_context_item =
| ContextData of scope_decl_context_data
| ContextScope of scope_decl_context_scope
type scope_decl = {
scope_decl_name : constructor Pos.marked;
scope_decl_context : scope_decl_context_item Pos.marked list;
}
type code_item =
| ScopeUse of scope_use
| ScopeDecl of scope_decl
| StructDecl of struct_decl
| EnumDecl of enum_decl
type code_block = code_item Pos.marked list
type source_repr = (string[@opaque]) Pos.marked
type law_article = {
law_article_name : (string[@opaque]) Pos.marked;
law_article_id : (string[@opaque]) option;
law_article_expiration_date : (string[@opaque]) option;
law_article_precedence : (int[@opaque]);
}
type law_include =
| PdfFile of (string[@opaque]) Pos.marked * (int[@opaque]) option
| CatalaFile of (string[@opaque]) Pos.marked
| LegislativeText of (string[@opaque]) Pos.marked
type law_article_item = LawText of (string[@opaque]) | CodeBlock of code_block * source_repr
type law_heading = { law_heading_name : (string[@opaque]); law_heading_precedence : (int[@opaque]) }
type law_structure =
| LawInclude of law_include
| LawHeading of law_heading * law_structure list
| LawArticle of law_article * law_article_item list
| MetadataBlock of code_block * source_repr
| IntermediateText of (string[@opaque])
type program_item = LawStructure of law_structure
type program = { program_items : program_item list; program_source_files : (string[@opaque]) list }
type source_file_or_master =
| SourceFile of program_item list
| MasterFile of string Pos.marked list
(** {1 Visitor classes for programs} *)
(** To allow for quick traversal and/or modification of this AST structure, we provide a
{{:https://en.wikipedia.org/wiki/Visitor_pattern} visitor design pattern}. This feature is
implemented via {{:https://gitlab.inria.fr/fpottier/visitors} François Pottier's OCaml visitors
library}. *)
(** {2 Program map visitor} *)
class virtual ['self] program_map :
object ('self)
method visit_Add : 'monomorphic. 'env -> op_kind -> binop
method visit_Aggregate : 'monomorphic. 'env -> aggregate_func -> collection_op
method visit_AggregateArgExtremum :
'monomorphic. 'env -> bool -> primitive_typ -> expression Pos.marked -> aggregate_func
method visit_AggregateCount : 'monomorphic. 'env -> aggregate_func
method visit_AggregateExtremum :
'monomorphic. 'env -> bool -> primitive_typ -> expression Pos.marked -> aggregate_func
method visit_AggregateSum : 'monomorphic. 'env -> primitive_typ -> aggregate_func
method visit_And : 'monomorphic. 'env -> binop
method visit_ArrayLit : 'monomorphic. 'env -> expression Pos.marked list -> expression
method visit_Assertion : 'monomorphic. 'env -> assertion -> scope_use_item
method visit_Base : 'monomorphic. 'env -> base_typ -> typ
method visit_Binop :
'monomorphic.
'env -> binop Pos.marked -> expression Pos.marked -> expression Pos.marked -> expression
method visit_Boolean : 'monomorphic. 'env -> primitive_typ
method visit_Builtin : 'monomorphic. 'env -> builtin_expression -> expression
method visit_Cardinal : 'monomorphic. 'env -> builtin_expression
method visit_CatalaFile : 'monomorphic. 'env -> string Pos.marked -> law_include
method visit_CodeBlock :
'monomorphic. 'env -> code_block -> string Pos.marked -> law_article_item
method visit_Collection : 'monomorphic. 'env -> base_typ_data Pos.marked -> base_typ_data
method visit_CollectionOp :
'monomorphic.
'env ->
collection_op Pos.marked ->
ident Pos.marked ->
expression Pos.marked ->
expression Pos.marked ->
expression
method visit_Condition : 'monomorphic. 'env -> base_typ
method visit_ContextData :
'monomorphic. 'env -> scope_decl_context_data -> scope_decl_context_item
method visit_ContextScope :
'monomorphic. 'env -> scope_decl_context_scope -> scope_decl_context_item
method visit_Data : 'monomorphic. 'env -> base_typ_data -> base_typ
method visit_Date : 'monomorphic. 'env -> primitive_typ
method visit_Day : 'monomorphic. 'env -> literal_unit
method visit_Dec : 'monomorphic. 'env -> Runtime.integer -> Runtime.integer -> literal_number
method visit_Decimal : 'monomorphic. 'env -> primitive_typ
method visit_Decreasing : 'monomorphic. 'env -> variation_typ
method visit_Definition : 'monomorphic. 'env -> definition -> scope_use_item
method visit_Div : 'monomorphic. 'env -> op_kind -> binop
method visit_Dotted :
'monomorphic.
'env ->
expression Pos.marked ->
constructor Pos.marked option ->
ident Pos.marked ->
expression
method visit_Duration : 'monomorphic. 'env -> primitive_typ
method visit_EnumDecl : 'monomorphic. 'env -> enum_decl -> code_item
method visit_EnumInject :
'monomorphic.
'env ->
constructor Pos.marked option ->
constructor Pos.marked ->
expression Pos.marked option ->
expression
method visit_Eq : 'monomorphic. 'env -> binop
method visit_ExceptionToLabel : 'monomorphic. 'env -> ident Pos.marked -> exception_to
method visit_Exists : 'monomorphic. 'env -> collection_op
method visit_Filter : 'monomorphic. 'env -> collection_op
method visit_FixedBy :
'monomorphic. 'env -> qident Pos.marked -> ident Pos.marked -> meta_assertion
method visit_Forall : 'monomorphic. 'env -> collection_op
method visit_FunCall :
'monomorphic. 'env -> expression Pos.marked -> expression Pos.marked -> expression
method visit_Func : 'monomorphic. 'env -> func_typ -> typ
method visit_GetDay : 'monomorphic. 'env -> builtin_expression
method visit_GetMonth : 'monomorphic. 'env -> builtin_expression
method visit_GetYear : 'monomorphic. 'env -> builtin_expression
method visit_Gt : 'monomorphic. 'env -> op_kind -> binop
method visit_Gte : 'monomorphic. 'env -> op_kind -> binop
method visit_Ident : 'monomorphic. 'env -> ident -> expression
method visit_IfThenElse :
'monomorphic.
'env -> expression Pos.marked -> expression Pos.marked -> expression Pos.marked -> expression
method visit_Increasing : 'monomorphic. 'env -> variation_typ
method visit_Int : 'monomorphic. 'env -> Runtime.integer -> literal_number
method visit_IntToDec : 'monomorphic. 'env -> builtin_expression
method visit_Integer : 'monomorphic. 'env -> primitive_typ
method visit_IntermediateText : 'monomorphic. 'env -> string -> law_structure
method visit_KDate : 'monomorphic. 'env -> op_kind
method visit_KDec : 'monomorphic. 'env -> op_kind
method visit_KDuration : 'monomorphic. 'env -> op_kind
method visit_KInt : 'monomorphic. 'env -> op_kind
method visit_KMoney : 'monomorphic. 'env -> op_kind
method visit_LBool : 'monomorphic. 'env -> bool -> literal
method visit_LDate : 'monomorphic. 'env -> literal_date -> literal
method visit_LMoneyAmount : 'monomorphic. 'env -> money_amount -> literal
method visit_LNumber :
'monomorphic. 'env -> literal_number Pos.marked -> literal_unit Pos.marked option -> literal
method visit_LawArticle :
'monomorphic. 'env -> law_article -> law_article_item list -> law_structure
method visit_LawHeading :
'monomorphic. 'env -> law_heading -> law_structure list -> law_structure
method visit_LawInclude : 'monomorphic. 'env -> law_include -> law_structure
method visit_LawStructure : 'monomorphic. 'env -> law_structure -> program_item
method visit_LawText : 'monomorphic. 'env -> string -> law_article_item
method visit_LegislativeText : 'monomorphic. 'env -> string Pos.marked -> law_include
method visit_Literal : 'monomorphic. 'env -> literal -> expression
method visit_Lt : 'monomorphic. 'env -> op_kind -> binop
method visit_Lte : 'monomorphic. 'env -> op_kind -> binop
method visit_Map : 'monomorphic. 'env -> collection_op
method visit_MatchWith :
'monomorphic. 'env -> expression Pos.marked -> match_cases Pos.marked -> expression
method visit_MemCollection :
'monomorphic. 'env -> expression Pos.marked -> expression Pos.marked -> expression
method visit_MetaAssertion : 'monomorphic. 'env -> meta_assertion -> scope_use_item
method visit_MetadataBlock :
'monomorphic. 'env -> code_block -> string Pos.marked -> law_structure
method visit_Minus : 'monomorphic. 'env -> op_kind -> unop
method visit_Money : 'monomorphic. 'env -> primitive_typ
method visit_Month : 'monomorphic. 'env -> literal_unit
method visit_Mult : 'monomorphic. 'env -> op_kind -> binop
method visit_Named : 'monomorphic. 'env -> constructor -> primitive_typ
method visit_Neq : 'monomorphic. 'env -> binop
method visit_Not : 'monomorphic. 'env -> unop
method visit_NotAnException : 'monomorphic. 'env -> exception_to
method visit_Or : 'monomorphic. 'env -> binop
method visit_Xor : 'monomorphic. 'env -> binop
method visit_PdfFile : 'monomorphic. 'env -> string Pos.marked -> int option -> law_include
method visit_Percent : 'monomorphic. 'env -> literal_unit
method visit_Primitive : 'monomorphic. 'env -> primitive_typ -> base_typ_data
method visit_Rule : 'monomorphic. 'env -> rule -> scope_use_item
method visit_ScopeDecl : 'monomorphic. 'env -> scope_decl -> code_item
method visit_ScopeUse : 'monomorphic. 'env -> scope_use -> code_item
method visit_StructDecl : 'monomorphic. 'env -> struct_decl -> code_item
method visit_StructLit :
'monomorphic.
'env ->
constructor Pos.marked ->
(ident Pos.marked * expression Pos.marked) list ->
expression
method visit_Sub : 'monomorphic. 'env -> op_kind -> binop
method visit_TestMatchCase :
'monomorphic. 'env -> expression Pos.marked -> match_case_pattern Pos.marked -> expression
method visit_Text : 'monomorphic. 'env -> primitive_typ
method visit_UnlabeledException : 'monomorphic. 'env -> exception_to
method visit_Unop : 'monomorphic. 'env -> unop Pos.marked -> expression Pos.marked -> expression
method visit_VariesWith :
'monomorphic.
'env ->
qident Pos.marked ->
expression Pos.marked ->
variation_typ Pos.marked option ->
meta_assertion
method visit_Year : 'monomorphic. 'env -> literal_unit
method visit_aggregate_func : 'monomorphic. 'env -> aggregate_func -> aggregate_func
method visit_assertion : 'monomorphic. 'env -> assertion -> assertion
method visit_base_typ : 'monomorphic. 'env -> base_typ -> base_typ
method visit_base_typ_data : 'monomorphic. 'env -> base_typ_data -> base_typ_data
method visit_binop : 'monomorphic. 'env -> binop -> binop
method visit_builtin_expression : 'monomorphic. 'env -> builtin_expression -> builtin_expression
method visit_code_block :
'monomorphic. 'env -> code_item Pos.marked list -> code_item Pos.marked list
method visit_code_item : 'monomorphic. 'env -> code_item -> code_item
method visit_collection_op : 'monomorphic. 'env -> collection_op -> collection_op
method visit_constructor : 'monomorphic. 'env -> constructor -> constructor
method visit_definition : 'monomorphic. 'env -> definition -> definition
method visit_enum_decl : 'monomorphic. 'env -> enum_decl -> enum_decl
method visit_enum_decl_case : 'monomorphic. 'env -> enum_decl_case -> enum_decl_case
method visit_exception_to : 'monomorphic. 'env -> exception_to -> exception_to
method visit_expression : 'monomorphic. 'env -> expression -> expression
method visit_func_typ : 'monomorphic. 'env -> func_typ -> func_typ
method visit_ident : 'monomorphic. 'env -> ident -> ident
method visit_law_article : 'monomorphic. 'env -> law_article -> law_article
method visit_law_article_item : 'monomorphic. 'env -> law_article_item -> law_article_item
method visit_law_heading : 'monomorphic. 'env -> law_heading -> law_heading
method visit_law_include : 'monomorphic. 'env -> law_include -> law_include
method visit_law_structure : 'monomorphic. 'env -> law_structure -> law_structure
method visit_literal : 'monomorphic. 'env -> literal -> literal
method visit_literal_date : 'monomorphic. 'env -> literal_date -> literal_date
method visit_literal_number : 'monomorphic. 'env -> literal_number -> literal_number
method visit_literal_unit : 'monomorphic. 'env -> literal_unit -> literal_unit
method visit_marked : 'a. ('env -> 'a -> 'a) -> 'env -> 'a Pos.marked -> 'a Pos.marked
method visit_match_case : 'monomorphic. 'env -> match_case -> match_case
method visit_match_case_pattern :
'monomorphic.
'env ->
(constructor Pos.marked option * constructor Pos.marked) list * ident Pos.marked option ->
(constructor Pos.marked option * constructor Pos.marked) list * ident Pos.marked option
method visit_match_cases : 'monomorphic. 'env -> match_cases -> match_cases
method visit_meta_assertion : 'monomorphic. 'env -> meta_assertion -> meta_assertion
method visit_money_amount : 'monomorphic. 'env -> money_amount -> money_amount
method visit_op_kind : 'monomorphic. 'env -> op_kind -> op_kind
method visit_primitive_typ : 'monomorphic. 'env -> primitive_typ -> primitive_typ
method visit_program : 'monomorphic. 'env -> program -> program
method visit_program_item : 'monomorphic. 'env -> program_item -> program_item
method visit_qident : 'monomorphic. 'env -> ident Pos.marked list -> ident Pos.marked list
method visit_rule : 'monomorphic. 'env -> rule -> rule
method visit_scope_decl : 'monomorphic. 'env -> scope_decl -> scope_decl
method visit_scope_decl_context_data :
'monomorphic. 'env -> scope_decl_context_data -> scope_decl_context_data
method visit_scope_decl_context_item :
'monomorphic. 'env -> scope_decl_context_item -> scope_decl_context_item
method visit_scope_decl_context_scope :
'monomorphic. 'env -> scope_decl_context_scope -> scope_decl_context_scope
method visit_scope_use : 'monomorphic. 'env -> scope_use -> scope_use
method visit_scope_use_item : 'monomorphic. 'env -> scope_use_item -> scope_use_item
method visit_source_repr : 'monomorphic. 'env -> string Pos.marked -> string Pos.marked
method visit_struct_decl : 'monomorphic. 'env -> struct_decl -> struct_decl
method visit_struct_decl_field : 'monomorphic. 'env -> struct_decl_field -> struct_decl_field
method visit_typ : 'monomorphic. 'env -> typ -> typ
method visit_unop : 'monomorphic. 'env -> unop -> unop
method visit_variation_typ : 'monomorphic. 'env -> variation_typ -> variation_typ
end
(** {2 Program iter visitor} *)
class virtual ['self] program_iter :
object ('self)
method visit_Add : 'monomorphic. 'env -> op_kind -> unit
method visit_Aggregate : 'monomorphic. 'env -> aggregate_func -> unit
method visit_AggregateArgExtremum :
'monomorphic. 'env -> bool -> primitive_typ -> expression Pos.marked -> unit
method visit_AggregateCount : 'monomorphic. 'env -> unit
method visit_AggregateExtremum :
'monomorphic. 'env -> bool -> primitive_typ -> expression Pos.marked -> unit
method visit_AggregateSum : 'monomorphic. 'env -> primitive_typ -> unit
method visit_And : 'monomorphic. 'env -> unit
method visit_ArrayLit : 'monomorphic. 'env -> expression Pos.marked list -> unit
method visit_Assertion : 'monomorphic. 'env -> assertion -> unit
method visit_Base : 'monomorphic. 'env -> base_typ -> unit
method visit_Binop :
'monomorphic.
'env -> binop Pos.marked -> expression Pos.marked -> expression Pos.marked -> unit
method visit_Boolean : 'monomorphic. 'env -> unit
method visit_Builtin : 'monomorphic. 'env -> builtin_expression -> unit
method visit_Cardinal : 'monomorphic. 'env -> unit
method visit_CatalaFile : 'monomorphic. 'env -> string Pos.marked -> unit
method visit_CodeBlock : 'monomorphic. 'env -> code_block -> string Pos.marked -> unit
method visit_Collection : 'monomorphic. 'env -> base_typ_data Pos.marked -> unit
method visit_CollectionOp :
'monomorphic.
'env ->
collection_op Pos.marked ->
ident Pos.marked ->
expression Pos.marked ->
expression Pos.marked ->
unit
method visit_Condition : 'monomorphic. 'env -> unit
method visit_ContextData : 'monomorphic. 'env -> scope_decl_context_data -> unit
method visit_ContextScope : 'monomorphic. 'env -> scope_decl_context_scope -> unit
method visit_Data : 'monomorphic. 'env -> base_typ_data -> unit
method visit_Date : 'monomorphic. 'env -> unit
method visit_Day : 'monomorphic. 'env -> unit
method visit_Dec : 'monomorphic. 'env -> Runtime.integer -> Runtime.integer -> unit
method visit_Decimal : 'monomorphic. 'env -> unit
method visit_Decreasing : 'monomorphic. 'env -> unit
method visit_Definition : 'monomorphic. 'env -> definition -> unit
method visit_Div : 'monomorphic. 'env -> op_kind -> unit
method visit_Dotted :
'monomorphic.
'env -> expression Pos.marked -> constructor Pos.marked option -> ident Pos.marked -> unit
method visit_Duration : 'monomorphic. 'env -> unit
method visit_EnumDecl : 'monomorphic. 'env -> enum_decl -> unit
method visit_EnumInject :
'monomorphic.
'env ->
constructor Pos.marked option ->
constructor Pos.marked ->
expression Pos.marked option ->
unit
method visit_Eq : 'monomorphic. 'env -> unit
method visit_ExceptionToLabel : 'monomorphic. 'env -> ident Pos.marked -> unit
method visit_Exists : 'monomorphic. 'env -> unit
method visit_Filter : 'monomorphic. 'env -> unit
method visit_FixedBy : 'monomorphic. 'env -> qident Pos.marked -> ident Pos.marked -> unit
method visit_Forall : 'monomorphic. 'env -> unit
method visit_FunCall :
'monomorphic. 'env -> expression Pos.marked -> expression Pos.marked -> unit
method visit_Func : 'monomorphic. 'env -> func_typ -> unit
method visit_GetDay : 'monomorphic. 'env -> unit
method visit_GetMonth : 'monomorphic. 'env -> unit
method visit_GetYear : 'monomorphic. 'env -> unit
method visit_Gt : 'monomorphic. 'env -> op_kind -> unit
method visit_Gte : 'monomorphic. 'env -> op_kind -> unit
method visit_Ident : 'monomorphic. 'env -> ident -> unit
method visit_IfThenElse :
'monomorphic.
'env -> expression Pos.marked -> expression Pos.marked -> expression Pos.marked -> unit
method visit_Increasing : 'monomorphic. 'env -> unit
method visit_Int : 'monomorphic. 'env -> Runtime.integer -> unit
method visit_IntToDec : 'monomorphic. 'env -> unit
method visit_Integer : 'monomorphic. 'env -> unit
method visit_IntermediateText : 'monomorphic. 'env -> string -> unit
method visit_KDate : 'monomorphic. 'env -> unit
method visit_KDec : 'monomorphic. 'env -> unit
method visit_KDuration : 'monomorphic. 'env -> unit
method visit_KInt : 'monomorphic. 'env -> unit
method visit_KMoney : 'monomorphic. 'env -> unit
method visit_LBool : 'monomorphic. 'env -> bool -> unit
method visit_LDate : 'monomorphic. 'env -> literal_date -> unit
method visit_LMoneyAmount : 'monomorphic. 'env -> money_amount -> unit
method visit_LNumber :
'monomorphic. 'env -> literal_number Pos.marked -> literal_unit Pos.marked option -> unit
method visit_LawArticle : 'monomorphic. 'env -> law_article -> law_article_item list -> unit
method visit_LawHeading : 'monomorphic. 'env -> law_heading -> law_structure list -> unit
method visit_LawInclude : 'monomorphic. 'env -> law_include -> unit
method visit_LawStructure : 'monomorphic. 'env -> law_structure -> unit
method visit_LawText : 'monomorphic. 'env -> string -> unit
method visit_LegislativeText : 'monomorphic. 'env -> string Pos.marked -> unit
method visit_Literal : 'monomorphic. 'env -> literal -> unit
method visit_Lt : 'monomorphic. 'env -> op_kind -> unit
method visit_Lte : 'monomorphic. 'env -> op_kind -> unit
method visit_Map : 'monomorphic. 'env -> unit
method visit_MatchWith :
'monomorphic. 'env -> expression Pos.marked -> match_cases Pos.marked -> unit
method visit_MemCollection :
'monomorphic. 'env -> expression Pos.marked -> expression Pos.marked -> unit
method visit_MetaAssertion : 'monomorphic. 'env -> meta_assertion -> unit
method visit_MetadataBlock : 'monomorphic. 'env -> code_block -> string Pos.marked -> unit
method visit_Minus : 'monomorphic. 'env -> op_kind -> unit
method visit_Money : 'monomorphic. 'env -> unit
method visit_Month : 'monomorphic. 'env -> unit
method visit_Mult : 'monomorphic. 'env -> op_kind -> unit
method visit_Named : 'monomorphic. 'env -> constructor -> unit
method visit_Neq : 'monomorphic. 'env -> unit
method visit_Not : 'monomorphic. 'env -> unit
method visit_NotAnException : 'monomorphic. 'env -> unit
method visit_Or : 'monomorphic. 'env -> unit
method visit_Xor : 'monomorphic. 'env -> unit
method visit_PdfFile : 'monomorphic. 'env -> string Pos.marked -> int option -> unit
method visit_Percent : 'monomorphic. 'env -> unit
method visit_Primitive : 'monomorphic. 'env -> primitive_typ -> unit
method visit_Rule : 'monomorphic. 'env -> rule -> unit
method visit_ScopeDecl : 'monomorphic. 'env -> scope_decl -> unit
method visit_ScopeUse : 'monomorphic. 'env -> scope_use -> unit
method visit_StructDecl : 'monomorphic. 'env -> struct_decl -> unit
method visit_StructLit :
'monomorphic.
'env -> constructor Pos.marked -> (ident Pos.marked * expression Pos.marked) list -> unit
method visit_Sub : 'monomorphic. 'env -> op_kind -> unit
method visit_TestMatchCase :
'monomorphic. 'env -> expression Pos.marked -> match_case_pattern Pos.marked -> unit
method visit_Text : 'monomorphic. 'env -> unit
method visit_UnlabeledException : 'monomorphic. 'env -> unit
method visit_Unop : 'monomorphic. 'env -> unop Pos.marked -> expression Pos.marked -> unit
method visit_VariesWith :
'monomorphic.
'env -> qident Pos.marked -> expression Pos.marked -> variation_typ Pos.marked option -> unit
method visit_Year : 'monomorphic. 'env -> unit
method visit_aggregate_func : 'monomorphic. 'env -> aggregate_func -> unit
method visit_assertion : 'monomorphic. 'env -> assertion -> unit
method visit_base_typ : 'monomorphic. 'env -> base_typ -> unit
method visit_base_typ_data : 'monomorphic. 'env -> base_typ_data -> unit
method visit_binop : 'monomorphic. 'env -> binop -> unit
method visit_builtin_expression : 'monomorphic. 'env -> builtin_expression -> unit
method visit_code_block : 'monomorphic. 'env -> code_item Pos.marked list -> unit
method visit_code_item : 'monomorphic. 'env -> code_item -> unit
method visit_collection_op : 'monomorphic. 'env -> collection_op -> unit
method visit_constructor : 'monomorphic. 'env -> constructor -> unit
method visit_definition : 'monomorphic. 'env -> definition -> unit
method visit_enum_decl : 'monomorphic. 'env -> enum_decl -> unit
method visit_enum_decl_case : 'monomorphic. 'env -> enum_decl_case -> unit
method visit_exception_to : 'monomorphic. 'env -> exception_to -> unit
method visit_expression : 'monomorphic. 'env -> expression -> unit
method visit_func_typ : 'monomorphic. 'env -> func_typ -> unit
method visit_ident : 'monomorphic. 'env -> ident -> unit
method visit_law_article : 'monomorphic. 'env -> law_article -> unit
method visit_law_article_item : 'monomorphic. 'env -> law_article_item -> unit
method visit_law_heading : 'monomorphic. 'env -> law_heading -> unit
method visit_law_include : 'monomorphic. 'env -> law_include -> unit
method visit_law_structure : 'monomorphic. 'env -> law_structure -> unit
method visit_literal : 'monomorphic. 'env -> literal -> unit
method visit_literal_date : 'monomorphic. 'env -> literal_date -> unit
method visit_literal_number : 'monomorphic. 'env -> literal_number -> unit
method visit_literal_unit : 'monomorphic. 'env -> literal_unit -> unit
method visit_marked : 'a. ('env -> 'a -> unit) -> 'env -> 'a Pos.marked -> unit
method visit_match_case : 'monomorphic. 'env -> match_case -> unit
method visit_match_case_pattern :
'monomorphic.
'env ->
(constructor Pos.marked option * constructor Pos.marked) list * ident Pos.marked option ->
unit
method visit_match_cases : 'monomorphic. 'env -> match_cases -> unit
method visit_meta_assertion : 'monomorphic. 'env -> meta_assertion -> unit
method visit_money_amount : 'monomorphic. 'env -> money_amount -> unit
method visit_op_kind : 'monomorphic. 'env -> op_kind -> unit
method visit_primitive_typ : 'monomorphic. 'env -> primitive_typ -> unit
method visit_program : 'monomorphic. 'env -> program -> unit
method visit_program_item : 'monomorphic. 'env -> program_item -> unit
method visit_qident : 'monomorphic. 'env -> ident Pos.marked list -> unit
method visit_rule : 'monomorphic. 'env -> rule -> unit
method visit_scope_decl : 'monomorphic. 'env -> scope_decl -> unit
method visit_scope_decl_context_data : 'monomorphic. 'env -> scope_decl_context_data -> unit
method visit_scope_decl_context_item : 'monomorphic. 'env -> scope_decl_context_item -> unit
method visit_scope_decl_context_scope : 'monomorphic. 'env -> scope_decl_context_scope -> unit
method visit_scope_use : 'monomorphic. 'env -> scope_use -> unit
method visit_scope_use_item : 'monomorphic. 'env -> scope_use_item -> unit
method visit_source_repr : 'monomorphic. 'env -> string Pos.marked -> unit
method visit_struct_decl : 'monomorphic. 'env -> struct_decl -> unit
method visit_struct_decl_field : 'monomorphic. 'env -> struct_decl_field -> unit
method visit_typ : 'monomorphic. 'env -> typ -> unit
method visit_unop : 'monomorphic. 'env -> unop -> unit
method visit_variation_typ : 'monomorphic. 'env -> variation_typ -> unit
end

View File

@ -15,8 +15,13 @@
(pps sedlex.ppx visitors.ppx)))
(menhir
(modules parser)
(flags --table))
(modules tokens)
(flags --only-tokens))
(menhir
(modules tokens parser)
(merge_into parser)
(flags --external-tokens Tokens --table))
(documentation
(package catala)
@ -27,17 +32,19 @@
(rule
(with-stdout-to
parser.messages.new
(run menhir %{dep:parser.mly} --list-errors)))
(run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser --list-errors)))
(rule
(with-stdout-to
parser_errors.ml
(run menhir %{dep:parser.mly} --compile-errors %{dep:parser.messages})))
(run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser
--compile-errors %{dep:parser.messages})))
(rule
(with-stdout-to
parser.messages.updated
(run menhir %{dep:parser.mly} --update-errors %{dep:parser.messages})))
(run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser
--update-errors %{dep:parser.messages})))
(rule
(alias update-parser-messages)

View File

@ -14,7 +14,7 @@
(** Concise syntax with English abbreviated keywords. *)
open Parser
open Tokens
open Sedlexing
open Utils
open Lexer_common
@ -130,6 +130,10 @@ let token_list : (string * token) list =
]
@ token_list_language_agnostic
(** Localised builtin functions *)
let builtins : (string * Ast.builtin_expression) list =
[ ("int_to_dec", IntToDec); ("get_day", GetDay); ("get_month", GetMonth); ("get_year", GetYear) ]
(** Main lexing function used in a code block *)
let rec lex_code (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
@ -298,18 +302,6 @@ let rec lex_code (lexbuf : lexbuf) : token =
| "|]" ->
update_acc lexbuf;
CONSEQUENCE
| "int_to_dec" ->
update_acc lexbuf;
INT_TO_DEC
| "get_day" ->
update_acc lexbuf;
GET_DAY
| "get_month" ->
update_acc lexbuf;
GET_MONTH
| "get_year" ->
update_acc lexbuf;
GET_YEAR
| "maximum" ->
update_acc lexbuf;
MAXIMUM
@ -595,3 +587,21 @@ let lex_law (lexbuf : lexbuf) : token =
(** Entry point of the lexer, distributes to {!val: lex_code} or {!val: lex_law} depending of {!val:
is_code}. *)
let lexer (lexbuf : lexbuf) : token = if !is_code then lex_code lexbuf else lex_law lexbuf
module type LocalisedLexer = sig
val token_list : (string * Tokens.token) list
(** Same as {!val: token_list_language_agnostic}, but with tokens specialized to a given language. *)
val builtins : (string * Ast.builtin_expression) list
(** Associative list of string to their corresponding builtins *)
val lex_code : Sedlexing.lexbuf -> Tokens.token
(** Main lexing function used in code blocks *)
val lex_law : Sedlexing.lexbuf -> Tokens.token
(** Main lexing function used outside code blocks *)
val lexer : Sedlexing.lexbuf -> Tokens.token
(** Entry point of the lexer, distributes to {!val: lex_code} or {!val: lex_law} depending of
{!val: Surface.Lexer.is_code}. *)
end

View File

@ -12,7 +12,9 @@
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
(** Concise syntax with English abbreviated keywords. *)
(** Basic lexer with abbreviated syntax, and support for localised implementations. *)
(** Auxiliary functions used by all lexers. *)
val is_code : bool ref
(** Boolean reference, used by the lexer as the mutable state to distinguish whether it is lexing
@ -29,20 +31,28 @@ val update_acc : Sedlexing.lexbuf -> unit
val raise_lexer_error : Utils.Pos.t -> string -> 'a
(** Error-generating helper *)
val token_list_language_agnostic : (string * Parser.token) list
val token_list_language_agnostic : (string * Tokens.token) list
(** Associative list matching each punctuation string part of the Catala syntax with its {!module:
Surface.Parser} token. Same for all the input languages (English, French, etc.) *)
val token_list : (string * Parser.token) list
(** Same as {!val: token_list_language_agnostic}, but with tokens whose string varies with the input
language. *)
module type LocalisedLexer = sig
val token_list : (string * Tokens.token) list
(** Same as {!val: token_list_language_agnostic}, but with tokens whose string varies with the
input language. *)
val lex_code : Sedlexing.lexbuf -> Parser.token
(** Main lexing function used in a code block *)
val builtins : (string * Ast.builtin_expression) list
(** Associative list of string to their corresponding builtins *)
val lex_law : Sedlexing.lexbuf -> Parser.token
(** Main lexing function used outside code blocks *)
val lex_code : Sedlexing.lexbuf -> Tokens.token
(** Main lexing function used in a code block *)
val lexer : Sedlexing.lexbuf -> Parser.token
(** Entry point of the lexer, distributes to {!val: lex_code} or {!val: lex_law} depending of {!val:
is_code}. *)
val lex_law : Sedlexing.lexbuf -> Tokens.token
(** Main lexing function used outside code blocks *)
val lexer : Sedlexing.lexbuf -> Tokens.token
(** Entry point of the lexer, distributes to {!val: lex_code} or {!val: lex_law} depending of
{!val: is_code}. *)
end
include LocalisedLexer
(** Concise syntax with English abbreviated keywords. *)

View File

@ -12,7 +12,7 @@
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
open Parser
open Tokens
open Sedlexing
module R = Re.Pcre

View File

@ -15,5 +15,5 @@
val calc_precedence : string -> int
(** Calculates the precedence according a matched regex of the form : '[#]+' *)
val get_law_heading : Sedlexing.lexbuf -> Parser.token
val get_law_heading : Sedlexing.lexbuf -> Tokens.token
(** Gets the [LAW_HEADING] token from the current [lexbuf] *)

View File

@ -12,7 +12,7 @@
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
open Parser
open Tokens
open Sedlexing
open Utils
open Lexer_common
@ -21,7 +21,7 @@ module R = Re.Pcre
(** Same as {!val: Surface.Lexer.token_list_language_agnostic}, but with tokens specialized to
English. *)
let token_list_en : (string * token) list =
let token_list : (string * token) list =
[
("scope", SCOPE);
("consequence", CONSEQUENCE);
@ -85,19 +85,28 @@ let token_list_en : (string * token) list =
]
@ L.token_list_language_agnostic
(** Localised builtin functions *)
let builtins : (string * Ast.builtin_expression) list =
[
("integer_to_decimal", IntToDec);
("get_day", GetDay);
("get_month", GetMonth);
("get_year", GetYear);
]
(** Main lexing function used in code blocks *)
let rec lex_code_en (lexbuf : lexbuf) : token =
let rec lex_code (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
let prev_pos = lexing_positions lexbuf in
match%sedlex lexbuf with
| white_space ->
(* Whitespaces *)
L.update_acc lexbuf;
lex_code_en lexbuf
lex_code lexbuf
| '#', Star (Compl '\n'), '\n' ->
(* Comments *)
L.update_acc lexbuf;
lex_code_en lexbuf
lex_code lexbuf
| "```" ->
(* End of code section *)
L.is_code := false;
@ -253,18 +262,6 @@ let rec lex_code_en (lexbuf : lexbuf) : token =
| "not" ->
L.update_acc lexbuf;
NOT
| "integer_to_decimal" ->
L.update_acc lexbuf;
INT_TO_DEC
| "get_day" ->
L.update_acc lexbuf;
GET_DAY
| "get_month" ->
L.update_acc lexbuf;
GET_MONTH
| "get_year" ->
L.update_acc lexbuf;
GET_YEAR
| "maximum" ->
L.update_acc lexbuf;
MAXIMUM
@ -487,7 +484,7 @@ let rec lex_code_en (lexbuf : lexbuf) : token =
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
(** Main lexing function used outside code blocks *)
let lex_law_en (lexbuf : lexbuf) : token =
let lex_law (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
let prev_pos = lexing_positions lexbuf in
match%sedlex lexbuf with
@ -548,7 +545,6 @@ let lex_law_en (lexbuf : lexbuf) : token =
| Plus (Compl ('/' | '#' | '`' | '>')) -> LAW_TEXT (Utf8.lexeme lexbuf)
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
(** Entry point of the lexer, distributes to {!val: lex_code_en} or {!val: lex_law_en} depending of
{!val: Surface.Lexer.is_code}. *)
let lexer_en (lexbuf : lexbuf) : token =
if !L.is_code then lex_code_en lexbuf else lex_law_en lexbuf
(** Entry point of the lexer, distributes to {!val: lex_code} or {!val: lex_law} depending of {!val:
Surface.Lexer.is_code}. *)
let lexer (lexbuf : lexbuf) : token = if !L.is_code then lex_code lexbuf else lex_law lexbuf

View File

@ -12,16 +12,4 @@
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
val token_list_en : (string * Parser.token) list
(** Same as {!val: Surface.Lexer.token_list_language_agnostic}, but with tokens specialized to
English. *)
val lex_code_en : Sedlexing.lexbuf -> Parser.token
(** Main lexing function used in code blocks *)
val lex_law_en : Sedlexing.lexbuf -> Parser.token
(** Main lexing function used outside code blocks *)
val lexer_en : Sedlexing.lexbuf -> Parser.token
(** Entry point of the lexer, distributes to {!val: lex_code_en} or {!val: lex_law_en} depending of
{!val: Surface.Lexer.is_code}. *)
include Lexer.LocalisedLexer

View File

@ -12,7 +12,7 @@
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
open Parser
open Tokens
open Sedlexing
open Utils
open Lexer_common
@ -21,7 +21,7 @@ module R = Re.Pcre
(** Same as {!val: Surface.Lexer.token_list_language_agnostic}, but with tokens specialized to
French. *)
let token_list_fr : (string * token) list =
let token_list : (string * token) list =
[
("champ d'application", SCOPE);
("conséquence", CONSEQUENCE);
@ -83,19 +83,27 @@ let token_list_fr : (string * token) list =
]
@ L.token_list_language_agnostic
let builtins : (string * Ast.builtin_expression) list =
[
("entier_vers_décimal", Ast.IntToDec);
("accès_jour", Ast.GetDay);
("accès_mois", Ast.GetMonth);
("accès_année", Ast.GetYear);
]
(** Main lexing function used in code blocks *)
let rec lex_code_fr (lexbuf : lexbuf) : token =
let rec lex_code (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
let prev_pos = lexing_positions lexbuf in
match%sedlex lexbuf with
| white_space | '\n' ->
(* Whitespaces *)
L.update_acc lexbuf;
lex_code_fr lexbuf
lex_code lexbuf
| '#', Star (Compl '\n'), '\n' ->
(* Comments *)
L.update_acc lexbuf;
lex_code_fr lexbuf
lex_code lexbuf
| "```" ->
(* End of code section *)
L.is_code := false;
@ -275,18 +283,6 @@ let rec lex_code_fr (lexbuf : lexbuf) : token =
| "initial" ->
L.update_acc lexbuf;
INIT
| "entier_vers_d", 0xE9, "cimal" ->
L.update_acc lexbuf;
INT_TO_DEC
| "acc", 0xE8, "s_jour" ->
L.update_acc lexbuf;
GET_DAY
| "acc", 0xE8, "s_mois" ->
L.update_acc lexbuf;
GET_MONTH
| "acc", 0xE8, "s_ann", 0xE9, "e" ->
L.update_acc lexbuf;
GET_YEAR
| "vrai" ->
L.update_acc lexbuf;
TRUE
@ -494,7 +490,7 @@ let rec lex_code_fr (lexbuf : lexbuf) : token =
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
(** Main lexing function used outside code blocks *)
let lex_law_fr (lexbuf : lexbuf) : token =
let lex_law (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
let prev_pos = lexing_positions lexbuf in
match%sedlex lexbuf with
@ -564,7 +560,6 @@ let lex_law_fr (lexbuf : lexbuf) : token =
| Plus (Compl ('/' | '#' | '`' | '>')) -> LAW_TEXT (Utf8.lexeme lexbuf)
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
(** Entry point of the lexer, distributes to {!val: lex_code_fr} or {!val: lex_law_fr} depending of
{!val: Surface.Lexer.is_code}. *)
let lexer_fr (lexbuf : lexbuf) : token =
if !L.is_code then lex_code_fr lexbuf else lex_law_fr lexbuf
(** Entry point of the lexer, distributes to {!val: lex_code} or {!val: lex_law} depending of {!val:
Surface.Lexer.is_code}. *)
let lexer (lexbuf : lexbuf) : token = if !L.is_code then lex_code lexbuf else lex_law lexbuf

View File

@ -12,16 +12,4 @@
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
val token_list_fr : (string * Parser.token) list
(** Same as {!val: Surface.Lexer.token_list_language_agnostic}, but with tokens specialized to
French. *)
val lex_code_fr : Sedlexing.lexbuf -> Parser.token
(** Main lexing function used in code blocks *)
val lex_law_fr : Sedlexing.lexbuf -> Parser.token
(** Main lexing function used outside code blocks *)
val lexer_fr : Sedlexing.lexbuf -> Parser.token
(** Entry point of the lexer, distributes to {!val: lex_code_fr} or {!val: lex_law_fr} depending of
{!val: Surface.Lexer.is_code}. *)
include Lexer.LocalisedLexer

File diff suppressed because it is too large Load Diff

View File

@ -17,49 +17,12 @@
*)
%{
open Ast
open Utils
%}
%token EOF
%token<string * string option * string option * int> LAW_ARTICLE
%token<string * int> LAW_HEADING
%token<Ast.law_include> LAW_INCLUDE
%token<string> LAW_TEXT
%token<string> CONSTRUCTOR IDENT
%token<string> END_CODE
%token<Runtime.integer> INT_LITERAL
%token TRUE FALSE
%token<Runtime.integer * Runtime.integer> DECIMAL_LITERAL
%token<Runtime.integer * Runtime.integer> MONEY_AMOUNT
%token BEGIN_CODE TEXT MASTER_FILE
%token COLON ALT DATA VERTICAL
%token OF INTEGER COLLECTION
%token RULE CONDITION DEFINED_AS
%token LESSER GREATER LESSER_EQUAL GREATER_EQUAL
%token LESSER_DEC GREATER_DEC LESSER_EQUAL_DEC GREATER_EQUAL_DEC
%token LESSER_MONEY GREATER_MONEY LESSER_EQUAL_MONEY GREATER_EQUAL_MONEY
%token LESSER_DATE GREATER_DATE LESSER_EQUAL_DATE GREATER_EQUAL_DATE
%token LESSER_DURATION GREATER_DURATION LESSER_EQUAL_DURATION GREATER_EQUAL_DURATION
%token EXISTS IN SUCH THAT
%token DOT AND OR XOR LPAREN RPAREN EQUAL
%token CARDINAL ASSERTION FIXED BY YEAR MONTH DAY
%token PLUS MINUS MULT DIV
%token PLUSDEC MINUSDEC MULTDEC DIVDEC
%token PLUSMONEY MINUSMONEY MULTMONEY DIVMONEY
%token MINUSDATE PLUSDATE PLUSDURATION MINUSDURATION
%token MATCH WITH VARIES WITH_V
%token FOR ALL WE_HAVE INCREASING DECREASING
%token NOT BOOLEAN PERCENT DURATION
%token SCOPE FILLED NOT_EQUAL DEFINITION
%token STRUCT CONTENT IF THEN DEPENDS DECLARATION
%token CONTEXT ENUM ELSE DATE SUM
%token BEGIN_METADATA END_METADATA MONEY DECIMAL
%token UNDER_CONDITION CONSEQUENCE LBRACKET RBRACKET
%token LABEL EXCEPTION LSQUARE RSQUARE SEMICOLON
%token INT_TO_DEC MAXIMUM MINIMUM INIT
%token GET_DAY GET_MONTH GET_YEAR
%token FILTER MAP
%parameter<Localisation: sig
val builtins: (string * Ast.builtin_expression) list
end>
%type <Ast.source_file_or_master> source_file_or_master
@ -98,7 +61,9 @@ qident:
}
atomic_expression:
| q = ident { let (q, q_pos) = q in (Ident q, q_pos) }
| q = IDENT {
(try Builtin (List.assoc q Localisation.builtins) with Not_found -> Ident q),
Pos.from_lpos $sloc }
| l = literal { let (l, l_pos) = l in (Literal l, l_pos) }
| LPAREN e = expression RPAREN { e }
@ -135,18 +100,6 @@ struct_or_enum_inject:
(Builtin Cardinal, Pos.from_lpos $sloc)
}
| INT_TO_DEC {
(Builtin IntToDec, Pos.from_lpos $sloc)
}
| GET_DAY {
(Builtin GetDay, Pos.from_lpos $sloc)
}
| GET_MONTH {
(Builtin GetMonth, Pos.from_lpos $sloc)
}
| GET_YEAR {
(Builtin GetYear, Pos.from_lpos $sloc)
}
| e = struct_or_enum_inject {
e
}
@ -473,7 +426,13 @@ struct_or_enum_inject:
}
ident:
| i = IDENT { (i, Pos.from_lpos $sloc) }
| i = IDENT {
if List.mem_assoc i Localisation.builtins then
Errors.raise_spanned_error
(Printf.sprintf "Reserved builtin name")
(Pos.from_lpos $sloc)
else (i, Pos.from_lpos $sloc)
}
condition_pos:
| CONDITION { Pos.from_lpos $sloc }

View File

@ -16,16 +16,9 @@
open Sedlexing
open Utils
module I = Parser.MenhirInterpreter
(** {1 Internal functions} *)
(** Returns the state number from the Menhir environment *)
let state (env : 'semantic_value I.env) : int =
match Lazy.force (I.stack env) with
| MenhirLib.General.Nil -> 0
| MenhirLib.General.Cons (Element (s, _, _, _), _) -> I.number s
(** Three-way minimum *)
let minimum a b c = min a (min b c)
@ -113,106 +106,133 @@ let raise_parser_error (error_loc : Pos.t) (last_good_loc : Pos.t option) (token
| None -> []
| Some last_good_loc -> [ (Some "Last good token:", last_good_loc) ]))
(** Usage: [fail lexbuf env token_list last_input_needed]
module ParserAux (LocalisedLexer : Lexer.LocalisedLexer) = struct
include Parser.Make (LocalisedLexer)
module I = MenhirInterpreter
Raises an error with meaningful hints about what the parsing error was. [lexbuf] is the lexing
buffer state at the failure point, [env] is the Menhir environment and [last_input_needed] is
the last checkpoint of a valid Menhir state before the parsing error. [token_list] is provided
by things like {!val: Surface.Lexer.token_list_language_agnostic} and is used to provide
suggestions of the tokens acceptable at the failure point *)
let fail (lexbuf : lexbuf) (env : 'semantic_value I.env) (token_list : (string * Parser.token) list)
(last_input_needed : 'semantic_value I.env option) : 'a =
let wrong_token = Utf8.lexeme lexbuf in
let acceptable_tokens, last_positions =
match last_input_needed with
| Some last_input_needed ->
( List.filter
(fun (_, t) ->
I.acceptable (I.input_needed last_input_needed) t (fst (lexing_positions lexbuf)))
token_list,
Some (I.positions last_input_needed) )
| None -> (token_list, None)
in
let similar_acceptable_tokens =
List.sort
(fun (x, _) (y, _) ->
let truncated_x =
if String.length wrong_token <= String.length x then
String.sub x 0 (String.length wrong_token)
else x
in
let truncated_y =
if String.length wrong_token <= String.length y then
String.sub y 0 (String.length wrong_token)
else y
in
let levx = levenshtein_distance truncated_x wrong_token in
let levy = levenshtein_distance truncated_y wrong_token in
if levx = levy then String.length x - String.length y else levx - levy)
acceptable_tokens
in
let similar_token_msg =
if List.length similar_acceptable_tokens = 0 then None
else
Some
(Printf.sprintf "did you mean %s?"
(String.concat ", or maybe "
(List.map
(fun (ts, _) -> Cli.print_with_style syntax_hints_style "\"%s\"" ts)
similar_acceptable_tokens)))
in
(* The parser has suspended itself because of a syntax error. Stop. *)
let custom_menhir_message =
match Parser_errors.message (state env) with
| exception Not_found ->
"Message: " ^ Cli.print_with_style syntax_hints_style "%s" "unexpected token"
| msg ->
"Message: "
^ Cli.print_with_style syntax_hints_style "%s" (String.trim (String.uncapitalize_ascii msg))
in
let msg =
match similar_token_msg with
| None -> custom_menhir_message
| Some similar_token_msg ->
Printf.sprintf "%s\nAutosuggestion: %s" custom_menhir_message similar_token_msg
in
raise_parser_error
(Pos.from_lpos (lexing_positions lexbuf))
(Option.map Pos.from_lpos last_positions)
(Utf8.lexeme lexbuf) msg
(** Returns the state number from the Menhir environment *)
let state (env : 'semantic_value I.env) : int =
match Lazy.force (I.stack env) with
| MenhirLib.General.Nil -> 0
| MenhirLib.General.Cons (Element (s, _, _, _), _) -> I.number s
(** Main parsing loop *)
let rec loop (next_token : unit -> Parser.token * Lexing.position * Lexing.position)
(token_list : (string * Parser.token) list) (lexbuf : lexbuf)
(last_input_needed : 'semantic_value I.env option) (checkpoint : 'semantic_value I.checkpoint) :
Ast.source_file_or_master =
match checkpoint with
| I.InputNeeded env ->
let token = next_token () in
let checkpoint = I.offer checkpoint token in
loop next_token token_list lexbuf (Some env) checkpoint
| I.Shifting _ | I.AboutToReduce _ ->
let checkpoint = I.resume checkpoint in
loop next_token token_list lexbuf last_input_needed checkpoint
| I.HandlingError env -> fail lexbuf env token_list last_input_needed
| I.Accepted v -> v
| I.Rejected ->
(* Cannot happen as we stop at syntax error immediatly *)
assert false
(** Usage: [fail lexbuf env token_list last_input_needed]
(** Stub that wraps the parsing main loop and handles the Menhir/Sedlex type difference for
[lexbuf]. *)
let sedlex_with_menhir (lexer' : lexbuf -> Parser.token) (token_list : (string * Parser.token) list)
(target_rule : Lexing.position -> 'semantic_value I.checkpoint) (lexbuf : lexbuf) :
Ast.source_file_or_master =
let lexer : unit -> Parser.token * Lexing.position * Lexing.position =
with_tokenizer lexer' lexbuf
in
try loop lexer token_list lexbuf None (target_rule (fst @@ Sedlexing.lexing_positions lexbuf))
with Sedlexing.MalFormed | Sedlexing.InvalidCodepoint _ ->
Lexer.raise_lexer_error (Pos.from_lpos (lexing_positions lexbuf)) (Utf8.lexeme lexbuf)
Raises an error with meaningful hints about what the parsing error was. [lexbuf] is the lexing
buffer state at the failure point, [env] is the Menhir environment and [last_input_needed] is
the last checkpoint of a valid Menhir state before the parsing error. [token_list] is provided
by things like {!val: Surface.Lexer.token_list_language_agnostic} and is used to provide
suggestions of the tokens acceptable at the failure point *)
let fail (lexbuf : lexbuf) (env : 'semantic_value I.env)
(token_list : (string * Tokens.token) list) (last_input_needed : 'semantic_value I.env option)
: 'a =
let wrong_token = Utf8.lexeme lexbuf in
let acceptable_tokens, last_positions =
match last_input_needed with
| Some last_input_needed ->
( List.filter
(fun (_, t) ->
I.acceptable (I.input_needed last_input_needed) t (fst (lexing_positions lexbuf)))
token_list,
Some (I.positions last_input_needed) )
| None -> (token_list, None)
in
let similar_acceptable_tokens =
List.sort
(fun (x, _) (y, _) ->
let truncated_x =
if String.length wrong_token <= String.length x then
String.sub x 0 (String.length wrong_token)
else x
in
let truncated_y =
if String.length wrong_token <= String.length y then
String.sub y 0 (String.length wrong_token)
else y
in
let levx = levenshtein_distance truncated_x wrong_token in
let levy = levenshtein_distance truncated_y wrong_token in
if levx = levy then String.length x - String.length y else levx - levy)
acceptable_tokens
in
let similar_token_msg =
if List.length similar_acceptable_tokens = 0 then None
else
Some
(Printf.sprintf "did you mean %s?"
(String.concat ", or maybe "
(List.map
(fun (ts, _) -> Cli.print_with_style syntax_hints_style "\"%s\"" ts)
similar_acceptable_tokens)))
in
(* The parser has suspended itself because of a syntax error. Stop. *)
let custom_menhir_message =
match Parser_errors.message (state env) with
| exception Not_found ->
"Message: " ^ Cli.print_with_style syntax_hints_style "%s" "unexpected token"
| msg ->
"Message: "
^ Cli.print_with_style syntax_hints_style "%s"
(String.trim (String.uncapitalize_ascii msg))
in
let msg =
match similar_token_msg with
| None -> custom_menhir_message
| Some similar_token_msg ->
Printf.sprintf "%s\nAutosuggestion: %s" custom_menhir_message similar_token_msg
in
raise_parser_error
(Pos.from_lpos (lexing_positions lexbuf))
(Option.map Pos.from_lpos last_positions)
(Utf8.lexeme lexbuf) msg
(** {1 Parsing multiple files}*)
(** Main parsing loop *)
let rec loop (next_token : unit -> Tokens.token * Lexing.position * Lexing.position)
(token_list : (string * Tokens.token) list) (lexbuf : lexbuf)
(last_input_needed : 'semantic_value I.env option) (checkpoint : 'semantic_value I.checkpoint)
: Ast.source_file_or_master =
match checkpoint with
| I.InputNeeded env ->
let token = next_token () in
let checkpoint = I.offer checkpoint token in
loop next_token token_list lexbuf (Some env) checkpoint
| I.Shifting _ | I.AboutToReduce _ ->
let checkpoint = I.resume checkpoint in
loop next_token token_list lexbuf last_input_needed checkpoint
| I.HandlingError env -> fail lexbuf env token_list last_input_needed
| I.Accepted v -> v
| I.Rejected ->
(* Cannot happen as we stop at syntax error immediatly *)
assert false
(** Stub that wraps the parsing main loop and handles the Menhir/Sedlex type difference for
[lexbuf]. *)
let sedlex_with_menhir (lexer' : lexbuf -> Tokens.token)
(token_list : (string * Tokens.token) list)
(target_rule : Lexing.position -> 'semantic_value I.checkpoint) (lexbuf : lexbuf) :
Ast.source_file_or_master =
let lexer : unit -> Tokens.token * Lexing.position * Lexing.position =
with_tokenizer lexer' lexbuf
in
try loop lexer token_list lexbuf None (target_rule (fst @@ Sedlexing.lexing_positions lexbuf))
with Sedlexing.MalFormed | Sedlexing.InvalidCodepoint _ ->
Lexer.raise_lexer_error (Pos.from_lpos (lexing_positions lexbuf)) (Utf8.lexeme lexbuf)
let commands_or_includes (lexbuf : lexbuf) : Ast.source_file_or_master =
sedlex_with_menhir LocalisedLexer.lexer LocalisedLexer.token_list
Incremental.source_file_or_master lexbuf
end
module Parser_NonVerbose = ParserAux (Lexer)
module Parser_En = ParserAux (Lexer_en)
module Parser_Fr = ParserAux (Lexer_fr)
let localised_parser : Cli.frontend_lang -> lexbuf -> Ast.source_file_or_master = function
| `NonVerbose -> Parser_NonVerbose.commands_or_includes
| `En -> Parser_En.commands_or_includes
| `Fr -> Parser_Fr.commands_or_includes
(** {1 Parsing multiple files} *)
(** Parses a single source file *)
let rec parse_source_file (source_file : Pos.input_file) (language : Cli.frontend_lang) :
@ -231,21 +251,7 @@ let rec parse_source_file (source_file : Pos.input_file) (language : Cli.fronten
let source_file_name = match source_file with FileName s -> s | Contents _ -> "stdin" in
Sedlexing.set_filename lexbuf source_file_name;
Parse_utils.current_file := source_file_name;
let lexer_lang =
match language with
| `Fr -> Lexer_fr.lexer_fr
| `En -> Lexer_en.lexer_en
| `NonVerbose -> Lexer.lexer
in
let token_list_lang =
match language with
| `Fr -> Lexer_fr.token_list_fr
| `En -> Lexer_en.token_list_en
| `NonVerbose -> Lexer.token_list
in
let commands_or_includes =
sedlex_with_menhir lexer_lang token_list_lang Parser.Incremental.source_file_or_master lexbuf
in
let commands_or_includes = localised_parser language lexbuf in
(match input with Some input -> close_in input | None -> ());
match commands_or_includes with
| Ast.SourceFile commands ->

View File

@ -0,0 +1,62 @@
(*
This file is part of the Catala compiler, a specification language for tax and social benefits
computation rules.
Copyright (C) 2020 Inria, contributor: Denis Merigoux <denis.merigoux@inria.fr>
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*)
%{
open Ast
%}
%token EOF
%token<string * string option * string option * int> LAW_ARTICLE
%token<string * int> LAW_HEADING
%token<Ast.law_include> LAW_INCLUDE
%token<string> LAW_TEXT
%token<string> CONSTRUCTOR IDENT
%token<string> END_CODE
%token<Runtime.integer> INT_LITERAL
%token TRUE FALSE
%token<Runtime.integer * Runtime.integer> DECIMAL_LITERAL
%token<Runtime.integer * Runtime.integer> MONEY_AMOUNT
%token BEGIN_CODE TEXT MASTER_FILE
%token COLON ALT DATA VERTICAL
%token OF INTEGER COLLECTION
%token RULE CONDITION DEFINED_AS
%token LESSER GREATER LESSER_EQUAL GREATER_EQUAL
%token LESSER_DEC GREATER_DEC LESSER_EQUAL_DEC GREATER_EQUAL_DEC
%token LESSER_MONEY GREATER_MONEY LESSER_EQUAL_MONEY GREATER_EQUAL_MONEY
%token LESSER_DATE GREATER_DATE LESSER_EQUAL_DATE GREATER_EQUAL_DATE
%token LESSER_DURATION GREATER_DURATION LESSER_EQUAL_DURATION GREATER_EQUAL_DURATION
%token EXISTS IN SUCH THAT
%token DOT AND OR XOR LPAREN RPAREN EQUAL
%token CARDINAL ASSERTION FIXED BY YEAR MONTH DAY
%token PLUS MINUS MULT DIV
%token PLUSDEC MINUSDEC MULTDEC DIVDEC
%token PLUSMONEY MINUSMONEY MULTMONEY DIVMONEY
%token MINUSDATE PLUSDATE PLUSDURATION MINUSDURATION
%token MATCH WITH VARIES WITH_V
%token FOR ALL WE_HAVE INCREASING DECREASING
%token NOT BOOLEAN PERCENT DURATION
%token SCOPE FILLED NOT_EQUAL DEFINITION
%token STRUCT CONTENT IF THEN DEPENDS DECLARATION
%token CONTEXT ENUM ELSE DATE SUM
%token BEGIN_METADATA END_METADATA MONEY DECIMAL
%token UNDER_CONDITION CONSEQUENCE LBRACKET RBRACKET
%token LABEL EXCEPTION LSQUARE RSQUARE SEMICOLON
%token MAXIMUM MINIMUM INIT
%token FILTER MAP
%%