2022-03-08 17:03:14 +03:00
|
|
|
(* This file is part of the Catala compiler, a specification language for tax
|
|
|
|
and social benefits computation rules. Copyright (C) 2020 Inria,
|
|
|
|
contributors: Denis Merigoux <denis.merigoux@inria.fr>, Emile Rolley
|
|
|
|
<emile.rolley@tuta.io>
|
2021-03-09 22:57:41 +03:00
|
|
|
|
2022-03-08 17:03:14 +03:00
|
|
|
Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
|
|
use this file except in compliance with the License. You may obtain a copy of
|
|
|
|
the License at
|
2021-03-09 22:57:41 +03:00
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
2022-03-08 17:03:14 +03:00
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
|
|
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
|
|
License for the specific language governing permissions and limitations under
|
2021-03-09 22:57:41 +03:00
|
|
|
the License. *)
|
|
|
|
|
2024-09-25 16:20:45 +03:00
|
|
|
open Catala_utils
|
|
|
|
|
2021-05-26 18:39:39 +03:00
|
|
|
(** Auxiliary functions used by all lexers. *)
|
|
|
|
|
2024-09-25 16:20:45 +03:00
|
|
|
type lexing_context = Law | Raw | Code | Directive | Directive_args | Inactive
|
|
|
|
|
|
|
|
val with_lexing_context : File.t -> (unit -> 'a) -> 'a
|
|
|
|
(** Initialises the lexing context during the call of the supplied function,
|
|
|
|
which is required for using the lexer. Calls can be nested. Upon
|
|
|
|
termination, emits a warning if the lexer is not in a consistent state
|
|
|
|
([Law] context, no pending code content) *)
|
2021-05-26 18:39:39 +03:00
|
|
|
|
2021-08-17 16:49:48 +03:00
|
|
|
val context : lexing_context ref
|
2022-03-08 17:03:14 +03:00
|
|
|
(** Reference, used by the lexer as the mutable state to distinguish whether it
|
|
|
|
is lexing code or law. *)
|
2021-08-17 16:49:48 +03:00
|
|
|
|
2024-09-25 16:20:45 +03:00
|
|
|
val update_acc : Sedlexing.lexbuf -> unit
|
|
|
|
(** Updates the current code buffer with the current lexeme. The underlying
|
|
|
|
buffer is used to accumulate the string representation of the body of code
|
|
|
|
being lexed. This string representation is used in the literate programming
|
2022-03-08 17:03:14 +03:00
|
|
|
backends to faithfully capture the spacing pattern of the original program *)
|
2021-05-26 18:39:39 +03:00
|
|
|
|
2024-09-25 16:20:45 +03:00
|
|
|
val flush_acc : unit -> string
|
|
|
|
(** Flushes the code buffer and returns its contents (see [update_acc]) *)
|
2021-05-26 18:39:39 +03:00
|
|
|
|
2024-07-30 16:20:51 +03:00
|
|
|
exception Lexing_error of (Catala_utils.Pos.t * string)
|
|
|
|
|
2022-11-21 12:46:17 +03:00
|
|
|
val raise_lexer_error : Catala_utils.Pos.t -> string -> 'a
|
2021-05-26 18:39:39 +03:00
|
|
|
(** Error-generating helper *)
|
|
|
|
|
|
|
|
val token_list_language_agnostic : (string * Tokens.token) list
|
2022-03-08 17:03:14 +03:00
|
|
|
(** Associative list matching each punctuation string part of the Catala syntax
|
|
|
|
with its {!Surface.Parser} token. Same for all the input languages (English,
|
|
|
|
French, etc.) *)
|
2021-05-26 18:39:39 +03:00
|
|
|
|
2021-03-09 22:57:41 +03:00
|
|
|
val calc_precedence : string -> int
|
|
|
|
(** Calculates the precedence according a matched regex of the form : '[#]+' *)
|
|
|
|
|
2021-04-30 10:59:09 +03:00
|
|
|
val get_law_heading : Sedlexing.lexbuf -> Tokens.token
|
2021-03-09 22:57:41 +03:00
|
|
|
(** Gets the [LAW_HEADING] token from the current [lexbuf] *)
|
2021-05-26 18:39:39 +03:00
|
|
|
|
2023-09-11 17:44:35 +03:00
|
|
|
(** Simplified tokens for dependency extraction *)
|
|
|
|
type line_token =
|
|
|
|
| LINE_TEST of string (* ```catala-test { id = xx } *)
|
|
|
|
| LINE_INLINE_TEST (* ```catala-test-inline *)
|
|
|
|
| LINE_BLOCK_END (* ``` *)
|
|
|
|
| LINE_INCLUDE of string (* > Include foo.catala_en *)
|
2023-12-01 17:24:54 +03:00
|
|
|
| LINE_MODULE_DEF of string * bool (* > Module Xxx [external] *)
|
2023-09-11 17:44:35 +03:00
|
|
|
| LINE_MODULE_USE of string (* > Using Xxx [as Yyy] *)
|
|
|
|
| LINE_ANY (* anything else *)
|
|
|
|
|
2021-05-26 18:39:39 +03:00
|
|
|
module type LocalisedLexer = sig
|
|
|
|
val token_list : (string * Tokens.token) list
|
2022-04-14 12:18:26 +03:00
|
|
|
(** Same as {!val:Surface.Lexer_common.token_list_language_agnostic}, but with
|
|
|
|
tokens whose string varies with the input language. *)
|
2021-05-26 18:39:39 +03:00
|
|
|
|
2021-08-19 19:26:06 +03:00
|
|
|
val lex_builtin : string -> Ast.builtin_expression option
|
|
|
|
(** Simple lexer for builtins *)
|
2021-05-26 18:39:39 +03:00
|
|
|
|
|
|
|
val lex_code : Sedlexing.lexbuf -> Tokens.token
|
|
|
|
(** Main lexing function used in a code block *)
|
|
|
|
|
|
|
|
val lex_law : Sedlexing.lexbuf -> Tokens.token
|
|
|
|
(** Main lexing function used outside code blocks *)
|
|
|
|
|
|
|
|
val lexer : Sedlexing.lexbuf -> Tokens.token
|
2022-04-14 12:18:26 +03:00
|
|
|
(** Entry point of the lexer, distributes to {!val:lex_code} or {!val:lex_law}
|
|
|
|
depending of the current {!val:Surface.Lexer_common.context}. *)
|
2023-09-11 17:44:35 +03:00
|
|
|
|
|
|
|
val lex_line : Sedlexing.lexbuf -> (string * line_token) option
|
2023-09-19 19:21:14 +03:00
|
|
|
(** Low-level lexer intended for dependency extraction. The whole line
|
|
|
|
(including ["\n"] is always returned together with the token. [None] for
|
|
|
|
EOF. *)
|
2021-05-26 18:39:39 +03:00
|
|
|
end
|