2022-03-08 17:03:14 +03:00
|
|
|
(* This file is part of the Catala compiler, a specification language for tax
|
|
|
|
and social benefits computation rules. Copyright (C) 2020 Inria,
|
|
|
|
contributors: Denis Merigoux <denis.merigoux@inria.fr>, Emile Rolley
|
|
|
|
<emile.rolley@tuta.io>
|
2020-04-19 16:53:35 +03:00
|
|
|
|
2022-03-08 17:03:14 +03:00
|
|
|
Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
|
|
use this file except in compliance with the License. You may obtain a copy of
|
|
|
|
the License at
|
2020-04-19 16:53:35 +03:00
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
2022-03-08 17:03:14 +03:00
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
|
|
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
|
|
License for the specific language governing permissions and limitations under
|
2020-04-19 16:53:35 +03:00
|
|
|
the License. *)
|
|
|
|
|
2023-09-01 17:24:27 +03:00
|
|
|
(** Wrapping module around parser and lexer that offers the
|
|
|
|
{!:Parser_driver.parse_source_file} API. *)
|
2020-12-14 17:23:04 +03:00
|
|
|
|
2020-04-25 19:38:37 +03:00
|
|
|
open Sedlexing
|
2022-11-21 12:46:17 +03:00
|
|
|
open Catala_utils
|
2020-04-25 19:38:37 +03:00
|
|
|
|
2022-03-08 17:03:14 +03:00
|
|
|
(** After parsing, heading structure is completely flat because of the
|
|
|
|
[source_file_item] rule. We need to tree-i-fy the flat structure, by looking
|
|
|
|
at the precedence of the law headings. *)
|
|
|
|
let rec law_struct_list_to_tree (f : Ast.law_structure list) :
|
|
|
|
Ast.law_structure list =
|
2021-01-20 21:58:48 +03:00
|
|
|
match f with
|
|
|
|
| [] -> []
|
2022-05-12 16:10:55 +03:00
|
|
|
| [item] -> [item]
|
2021-01-20 21:58:48 +03:00
|
|
|
| first_item :: rest -> (
|
2022-05-12 16:10:55 +03:00
|
|
|
let rest_tree = law_struct_list_to_tree rest in
|
|
|
|
match rest_tree with
|
|
|
|
| [] -> assert false (* there should be at least one rest element *)
|
|
|
|
| rest_head :: rest_tail -> (
|
|
|
|
match first_item with
|
2023-09-05 16:00:55 +03:00
|
|
|
| CodeBlock _ | LawText _ | LawInclude _ | ModuleDef _ | ModuleUse _ ->
|
2022-05-12 16:10:55 +03:00
|
|
|
(* if an article or an include is just before a new heading , then we
|
|
|
|
don't merge it with what comes next *)
|
|
|
|
first_item :: rest_head :: rest_tail
|
|
|
|
| LawHeading (heading, _) ->
|
|
|
|
(* here we have encountered a heading, which is going to "gobble"
|
|
|
|
everything in the [rest_tree] until it finds a heading of at least
|
|
|
|
the same precedence *)
|
|
|
|
let rec split_rest_tree (rest_tree : Ast.law_structure list) :
|
|
|
|
Ast.law_structure list * Ast.law_structure list =
|
|
|
|
match rest_tree with
|
|
|
|
| [] -> [], []
|
|
|
|
| LawHeading (new_heading, _) :: _
|
|
|
|
when new_heading.law_heading_precedence
|
|
|
|
<= heading.law_heading_precedence ->
|
|
|
|
(* we stop gobbling *)
|
|
|
|
[], rest_tree
|
|
|
|
| first :: after ->
|
|
|
|
(* we continue gobbling *)
|
|
|
|
let after_gobbled, after_out = split_rest_tree after in
|
|
|
|
first :: after_gobbled, after_out
|
|
|
|
in
|
|
|
|
let gobbled, rest_out = split_rest_tree rest_tree in
|
|
|
|
LawHeading (heading, gobbled) :: rest_out))
|
2021-01-20 21:58:48 +03:00
|
|
|
|
2021-05-26 18:39:39 +03:00
|
|
|
module ParserAux (LocalisedLexer : Lexer_common.LocalisedLexer) = struct
|
2021-04-30 10:59:09 +03:00
|
|
|
include Parser.Make (LocalisedLexer)
|
|
|
|
module I = MenhirInterpreter
|
|
|
|
|
|
|
|
(** Returns the state number from the Menhir environment *)
|
|
|
|
let state (env : 'semantic_value I.env) : int =
|
|
|
|
match Lazy.force (I.stack env) with
|
|
|
|
| MenhirLib.General.Nil -> 0
|
|
|
|
| MenhirLib.General.Cons (Element (s, _, _, _), _) -> I.number s
|
|
|
|
|
2024-06-17 16:38:25 +03:00
|
|
|
let register_parsing_error
|
2022-03-08 17:03:14 +03:00
|
|
|
(lexbuf : lexbuf)
|
|
|
|
(env : 'semantic_value I.env)
|
2024-06-17 16:38:25 +03:00
|
|
|
(acceptable_tokens : (string * Tokens.token) list)
|
|
|
|
(similar_candidate_tokens : string list) : 'a =
|
|
|
|
(* The parser has suspended itself because of a syntax error. *)
|
2023-06-07 19:10:50 +03:00
|
|
|
let custom_menhir_message ppf =
|
2023-07-10 17:21:23 +03:00
|
|
|
(match Parser_errors.message (state env) with
|
2024-04-10 18:33:19 +03:00
|
|
|
| exception Not_found -> Format.fprintf ppf "@{<yellow>unexpected token@}"
|
2021-04-30 10:59:09 +03:00
|
|
|
| msg ->
|
2024-05-08 13:35:11 +03:00
|
|
|
Format.fprintf ppf "@{<yellow>@<1>%s@} @[<hov>%a@]" "»"
|
|
|
|
Format.pp_print_text
|
2024-04-10 18:33:19 +03:00
|
|
|
(String.trim (String.uncapitalize_ascii msg)));
|
2024-04-12 12:36:43 +03:00
|
|
|
if acceptable_tokens <> [] then
|
2024-05-08 13:35:11 +03:00
|
|
|
Format.fprintf ppf "@\n@[<hov>Those are valid at this point:@ %a@]"
|
2024-04-12 12:36:43 +03:00
|
|
|
(Format.pp_print_list
|
|
|
|
~pp_sep:(fun ppf () -> Format.fprintf ppf ",@ ")
|
|
|
|
(fun ppf string -> Format.fprintf ppf "@{<yellow>\"%s\"@}" string))
|
|
|
|
(List.map (fun (s, _) -> s) acceptable_tokens)
|
2021-04-30 10:59:09 +03:00
|
|
|
in
|
2024-06-17 16:38:25 +03:00
|
|
|
let suggestion =
|
|
|
|
if similar_candidate_tokens = [] then None
|
|
|
|
else Some similar_candidate_tokens
|
|
|
|
in
|
|
|
|
let error_loc = Pos.from_lpos (lexing_positions lexbuf) in
|
|
|
|
let wrong_token = Utf8.lexeme lexbuf in
|
|
|
|
let msg = custom_menhir_message in
|
2024-07-30 16:20:51 +03:00
|
|
|
Message.delayed_error ~kind:Parsing () ?suggestion ~pos:error_loc
|
2024-06-17 16:38:25 +03:00
|
|
|
"@[<hov>Syntax error at %a:@ %t@]"
|
|
|
|
(fun ppf string -> Format.fprintf ppf "@{<yellow>\"%s\"@}" string)
|
|
|
|
wrong_token msg
|
|
|
|
|
|
|
|
let sorted_candidate_tokens lexbuf token_list env =
|
|
|
|
let acceptable_tokens =
|
2024-06-20 16:38:21 +03:00
|
|
|
List.filter_map
|
|
|
|
(fun ((_, t) as elt) ->
|
|
|
|
if I.acceptable (I.input_needed env) t (fst (lexing_positions lexbuf))
|
|
|
|
then Some elt
|
|
|
|
else None)
|
2024-06-17 16:38:25 +03:00
|
|
|
token_list
|
|
|
|
in
|
2024-06-20 16:38:21 +03:00
|
|
|
let lexeme = Utf8.lexeme lexbuf in
|
2024-06-17 16:38:25 +03:00
|
|
|
let similar_acceptable_tokens =
|
2024-06-20 16:38:21 +03:00
|
|
|
Suggestions.best_candidates (List.map fst acceptable_tokens) lexeme
|
2024-06-17 16:38:25 +03:00
|
|
|
in
|
|
|
|
let module S = Set.Make (String) in
|
|
|
|
let s_toks = S.of_list similar_acceptable_tokens in
|
|
|
|
let sorted_acceptable_tokens =
|
|
|
|
List.sort
|
|
|
|
(fun (s, _) _ -> if S.mem s s_toks then -1 else 1)
|
|
|
|
acceptable_tokens
|
|
|
|
in
|
|
|
|
similar_acceptable_tokens, sorted_acceptable_tokens
|
|
|
|
|
|
|
|
type 'a ring_buffer = {
|
|
|
|
curr_idx : int;
|
|
|
|
start : int ref;
|
|
|
|
stop : int ref;
|
|
|
|
max_size : int;
|
|
|
|
feed : unit -> 'a;
|
|
|
|
data : 'a array;
|
|
|
|
}
|
|
|
|
|
|
|
|
let next ({ curr_idx; start; stop; max_size; feed; data } as buff) =
|
|
|
|
let next_idx = succ curr_idx mod max_size in
|
|
|
|
if curr_idx = !stop then (
|
|
|
|
let new_elt = feed () in
|
|
|
|
data.(curr_idx) <- new_elt;
|
|
|
|
let size = ((!stop - !start + max_size) mod max_size) + 1 in
|
|
|
|
stop := succ !stop mod max_size;
|
|
|
|
let is_full = size = max_size in
|
|
|
|
if is_full then
|
|
|
|
(* buffer will get full: start is also moved *)
|
|
|
|
start := succ !start mod max_size;
|
|
|
|
{ buff with curr_idx = next_idx }, new_elt)
|
|
|
|
else
|
|
|
|
let elt = data.(curr_idx) in
|
|
|
|
{ buff with curr_idx = next_idx }, elt
|
|
|
|
|
|
|
|
let create ?(max_size = 20) feed v =
|
|
|
|
{
|
|
|
|
curr_idx = 0;
|
|
|
|
start = ref 0;
|
|
|
|
stop = ref 0;
|
|
|
|
feed;
|
|
|
|
data = Array.make max_size v;
|
|
|
|
max_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
let progress ?(max_step = 10) lexer_buffer env checkpoint : int =
|
|
|
|
let rec loop nth_step lexer_buffer env checkpoint =
|
|
|
|
if nth_step >= max_step then nth_step
|
|
|
|
else
|
|
|
|
match checkpoint with
|
|
|
|
| I.InputNeeded env ->
|
|
|
|
let new_lexer_buffer, token = next lexer_buffer in
|
|
|
|
let checkpoint = I.offer checkpoint token in
|
|
|
|
loop (succ nth_step) new_lexer_buffer env checkpoint
|
|
|
|
| I.Shifting _ | I.AboutToReduce _ ->
|
|
|
|
let checkpoint = I.resume checkpoint in
|
|
|
|
loop nth_step lexer_buffer env checkpoint
|
|
|
|
| I.HandlingError (_ : _ I.env) | I.Accepted _ | I.Rejected -> nth_step
|
|
|
|
in
|
|
|
|
loop 0 lexer_buffer env checkpoint
|
|
|
|
|
|
|
|
let recover_parsing_error lexer_buffer env acceptable_tokens =
|
|
|
|
let candidates_checkpoints =
|
|
|
|
let without_token = I.input_needed env in
|
|
|
|
let make_with_token tok =
|
|
|
|
let l, r = I.positions env in
|
|
|
|
let checkpoint = I.input_needed env in
|
|
|
|
I.offer checkpoint (tok, l, r)
|
|
|
|
in
|
|
|
|
without_token :: List.map make_with_token acceptable_tokens
|
|
|
|
in
|
|
|
|
let threshold = min 10 lexer_buffer.max_size in
|
|
|
|
let rec iterate ((curr_max_progress, _) as acc) = function
|
|
|
|
| [] -> acc
|
|
|
|
| cp :: t ->
|
|
|
|
if curr_max_progress >= 10 then acc
|
|
|
|
else
|
|
|
|
let cp_progress = progress ~max_step:threshold lexer_buffer env cp in
|
|
|
|
if cp_progress > curr_max_progress then iterate (cp_progress, cp) t
|
|
|
|
else iterate acc t
|
|
|
|
in
|
|
|
|
let best_progress, best_cp =
|
|
|
|
let dummy_cp = I.input_needed env in
|
|
|
|
iterate (-1, dummy_cp) candidates_checkpoints
|
|
|
|
in
|
|
|
|
(* We do not consider paths were progress isn't significant *)
|
|
|
|
if best_progress < 2 then None else Some best_cp
|
2021-04-30 10:59:09 +03:00
|
|
|
|
|
|
|
(** Main parsing loop *)
|
2024-06-17 16:38:25 +03:00
|
|
|
let loop
|
|
|
|
(lexer_buffer :
|
|
|
|
(Tokens.token * Lexing.position * Lexing.position) ring_buffer)
|
2022-03-08 17:03:14 +03:00
|
|
|
(token_list : (string * Tokens.token) list)
|
|
|
|
(lexbuf : lexbuf)
|
|
|
|
(last_input_needed : 'semantic_value I.env option)
|
|
|
|
(checkpoint : 'semantic_value I.checkpoint) : Ast.source_file =
|
2024-06-17 16:38:25 +03:00
|
|
|
let rec loop
|
|
|
|
(lexer_buffer :
|
|
|
|
(Tokens.token * Lexing.position * Lexing.position) ring_buffer)
|
|
|
|
(token_list : (string * Tokens.token) list)
|
|
|
|
(lexbuf : lexbuf)
|
|
|
|
(last_input_needed : 'semantic_value I.env option)
|
|
|
|
(checkpoint : 'semantic_value I.checkpoint) : Ast.source_file =
|
|
|
|
match checkpoint with
|
|
|
|
| I.InputNeeded env ->
|
|
|
|
let new_lexer_buffer, token = next lexer_buffer in
|
|
|
|
let checkpoint = I.offer checkpoint token in
|
|
|
|
loop new_lexer_buffer token_list lexbuf (Some env) checkpoint
|
|
|
|
| I.Shifting _ | I.AboutToReduce _ ->
|
|
|
|
let checkpoint = I.resume checkpoint in
|
|
|
|
loop lexer_buffer token_list lexbuf last_input_needed checkpoint
|
|
|
|
| I.HandlingError (env : 'semantic_value I.env) -> (
|
|
|
|
let similar_candidate_tokens, sorted_acceptable_tokens =
|
|
|
|
sorted_candidate_tokens lexbuf token_list env
|
|
|
|
in
|
|
|
|
register_parsing_error lexbuf env sorted_acceptable_tokens
|
|
|
|
similar_candidate_tokens;
|
|
|
|
let best_effort_checkpoint =
|
|
|
|
recover_parsing_error lexer_buffer env
|
|
|
|
(List.map snd sorted_acceptable_tokens)
|
|
|
|
in
|
|
|
|
match best_effort_checkpoint with
|
|
|
|
| None ->
|
|
|
|
(* No reasonable solution, aborting *)
|
|
|
|
[]
|
|
|
|
| Some best_effort_checkpoint ->
|
|
|
|
loop lexer_buffer token_list lexbuf last_input_needed
|
|
|
|
best_effort_checkpoint)
|
|
|
|
| I.Accepted v -> v
|
|
|
|
| I.Rejected -> []
|
|
|
|
in
|
|
|
|
loop lexer_buffer token_list lexbuf last_input_needed checkpoint
|
2021-04-30 10:59:09 +03:00
|
|
|
|
2022-03-08 17:03:14 +03:00
|
|
|
(** Stub that wraps the parsing main loop and handles the Menhir/Sedlex type
|
|
|
|
difference for [lexbuf]. *)
|
|
|
|
let sedlex_with_menhir
|
|
|
|
(lexer' : lexbuf -> Tokens.token)
|
2021-04-30 10:59:09 +03:00
|
|
|
(token_list : (string * Tokens.token) list)
|
2022-03-08 17:03:14 +03:00
|
|
|
(target_rule : Lexing.position -> 'semantic_value I.checkpoint)
|
|
|
|
(lexbuf : lexbuf) : Ast.source_file =
|
2024-06-17 16:38:25 +03:00
|
|
|
let lexer_buffer :
|
|
|
|
(Tokens.token * Lexing.position * Lexing.position) ring_buffer =
|
|
|
|
let feed = with_tokenizer lexer' lexbuf in
|
|
|
|
create feed Lexing.(Tokens.EOF, dummy_pos, dummy_pos)
|
2021-04-30 10:59:09 +03:00
|
|
|
in
|
2022-03-08 17:03:14 +03:00
|
|
|
try
|
2024-06-17 16:38:25 +03:00
|
|
|
let target_rule =
|
|
|
|
target_rule (fst @@ Sedlexing.lexing_positions lexbuf)
|
|
|
|
in
|
2024-07-30 16:20:51 +03:00
|
|
|
loop lexer_buffer token_list lexbuf None target_rule
|
|
|
|
with Lexer_common.Lexing_error (pos, token) ->
|
|
|
|
(* The encapsulating [Message.with_delayed_errors] will raise an
|
|
|
|
exception: we are safe returning a dummy value. *)
|
|
|
|
Message.delayed_error ~kind:Lexing [] ~pos
|
|
|
|
"Parsing error after token \"%s\": what comes after is unknown" token
|
2021-04-30 10:59:09 +03:00
|
|
|
|
2021-05-15 02:16:08 +03:00
|
|
|
let commands_or_includes (lexbuf : lexbuf) : Ast.source_file =
|
2022-03-08 17:03:14 +03:00
|
|
|
sedlex_with_menhir LocalisedLexer.lexer LocalisedLexer.token_list
|
|
|
|
Incremental.source_file lexbuf
|
2021-04-30 10:59:09 +03:00
|
|
|
end
|
2020-04-25 19:38:37 +03:00
|
|
|
|
2021-05-03 18:06:08 +03:00
|
|
|
module Parser_En = ParserAux (Lexer_en)
|
|
|
|
module Parser_Fr = ParserAux (Lexer_fr)
|
2021-05-09 23:55:50 +03:00
|
|
|
module Parser_Pl = ParserAux (Lexer_pl)
|
2020-12-14 17:23:04 +03:00
|
|
|
|
2024-03-19 17:23:06 +03:00
|
|
|
let localised_parser : Global.backend_lang -> lexbuf -> Ast.source_file =
|
|
|
|
function
|
2021-05-26 22:18:18 +03:00
|
|
|
| En -> Parser_En.commands_or_includes
|
|
|
|
| Fr -> Parser_Fr.commands_or_includes
|
|
|
|
| Pl -> Parser_Pl.commands_or_includes
|
2021-05-03 18:06:08 +03:00
|
|
|
|
2023-09-11 17:44:35 +03:00
|
|
|
(** Lightweight lexer for dependency *)
|
|
|
|
|
2024-03-15 16:23:30 +03:00
|
|
|
let lines (file : File.t) (language : Global.backend_lang) =
|
2023-09-19 19:21:14 +03:00
|
|
|
let lex_line =
|
|
|
|
match language with
|
2023-09-11 17:44:35 +03:00
|
|
|
| En -> Lexer_en.lex_line
|
|
|
|
| Fr -> Lexer_fr.lex_line
|
|
|
|
| Pl -> Lexer_pl.lex_line
|
|
|
|
in
|
|
|
|
let input = open_in file in
|
|
|
|
try
|
|
|
|
let lexbuf = Sedlexing.Utf8.from_channel input in
|
|
|
|
Sedlexing.set_filename lexbuf file;
|
|
|
|
let rec aux () =
|
|
|
|
match lex_line lexbuf with
|
Generate tests reports from 'clerk test'
This is a proper replacement for the previous shell-based placeholder hack.
Here is a summary:
- `clerk runtest` (normally run by ninja) is much extended:
* besides generating the test@out file, it checks individual tests for success
and can write a report file containing their status, and the positions for
their (expected/current) outputs (this uses `Marshal`)
* it now handles out-tests directly in addition to inline-tests, for which
it generates the separate output file ; they are included in the report
- ninja is now tasked with building all the test reports (which shouldn't fail);
for directories, individual reports are concatenated (as before).
Removing intermediate report rules, and out-test rules means that the ninja
file is much simplified.
- then, clerk takes back control, reads the final reports and formats them in a
user-friendly way. Printing the reports may imply running `diff` internally.
In particular, the commands to easily reproduce each test are provided.
Resetting the test results if required is also done directly by clerk, at this
stage.
A few switches are available to customise the output, but I am waiting for some
feedback before deciding what to make available from the CLI.
The `clerk report` command is available to manually explore test reports, but
normally the processing is done directly at the end of `clerk test` (i.e. ninja
will no longer call that command)
2024-06-14 22:05:19 +03:00
|
|
|
| Some (str, tok) ->
|
|
|
|
Seq.Cons ((str, tok, Sedlexing.lexing_bytes_positions lexbuf), aux)
|
2023-09-19 19:21:14 +03:00
|
|
|
| None ->
|
|
|
|
close_in input;
|
|
|
|
Seq.Nil
|
2023-09-11 17:44:35 +03:00
|
|
|
in
|
Generate tests reports from 'clerk test'
This is a proper replacement for the previous shell-based placeholder hack.
Here is a summary:
- `clerk runtest` (normally run by ninja) is much extended:
* besides generating the test@out file, it checks individual tests for success
and can write a report file containing their status, and the positions for
their (expected/current) outputs (this uses `Marshal`)
* it now handles out-tests directly in addition to inline-tests, for which
it generates the separate output file ; they are included in the report
- ninja is now tasked with building all the test reports (which shouldn't fail);
for directories, individual reports are concatenated (as before).
Removing intermediate report rules, and out-test rules means that the ninja
file is much simplified.
- then, clerk takes back control, reads the final reports and formats them in a
user-friendly way. Printing the reports may imply running `diff` internally.
In particular, the commands to easily reproduce each test are provided.
Resetting the test results if required is also done directly by clerk, at this
stage.
A few switches are available to customise the output, but I am waiting for some
feedback before deciding what to make available from the CLI.
The `clerk report` command is available to manually explore test reports, but
normally the processing is done directly at the end of `clerk test` (i.e. ninja
will no longer call that command)
2024-06-14 22:05:19 +03:00
|
|
|
Seq.once aux
|
2023-09-11 17:44:35 +03:00
|
|
|
with exc ->
|
|
|
|
let bt = Printexc.get_raw_backtrace () in
|
|
|
|
close_in input;
|
|
|
|
Printexc.raise_with_backtrace exc bt
|
|
|
|
|
2021-05-03 18:06:08 +03:00
|
|
|
(** {1 Parsing multiple files} *)
|
2021-04-29 19:40:29 +03:00
|
|
|
|
2023-09-26 12:42:46 +03:00
|
|
|
let lexbuf_file lexbuf =
|
|
|
|
(fst (Sedlexing.lexing_positions lexbuf)).Lexing.pos_fname
|
|
|
|
|
|
|
|
let with_sedlex_file file f =
|
|
|
|
let ic = open_in file in
|
|
|
|
let lexbuf = Sedlexing.Utf8.from_channel ic in
|
|
|
|
Sedlexing.set_filename lexbuf file;
|
|
|
|
Fun.protect ~finally:(fun () -> close_in ic) (fun () -> f lexbuf)
|
|
|
|
|
2024-09-11 15:46:32 +03:00
|
|
|
let with_sedlex_source source_file f =
|
|
|
|
match source_file with
|
|
|
|
| Global.FileName file -> with_sedlex_file file f
|
|
|
|
| Global.Contents (str, file) ->
|
|
|
|
let lexbuf = Sedlexing.Utf8.from_string str in
|
|
|
|
Sedlexing.set_filename lexbuf file;
|
|
|
|
f lexbuf
|
|
|
|
| Global.Stdin file ->
|
|
|
|
let lexbuf = Sedlexing.Utf8.from_channel stdin in
|
|
|
|
Sedlexing.set_filename lexbuf file;
|
|
|
|
f lexbuf
|
|
|
|
|
2020-12-14 17:23:04 +03:00
|
|
|
(** Parses a single source file *)
|
2024-09-11 15:46:32 +03:00
|
|
|
let rec parse_source ?resolve_included_file (lexbuf : Sedlexing.lexbuf) :
|
|
|
|
Ast.program =
|
2023-09-26 12:42:46 +03:00
|
|
|
let source_file_name = lexbuf_file lexbuf in
|
2024-04-10 19:39:30 +03:00
|
|
|
Message.debug "Parsing %a" File.format source_file_name;
|
2023-09-26 12:42:46 +03:00
|
|
|
let language = Cli.file_lang source_file_name in
|
2021-05-15 02:16:08 +03:00
|
|
|
let commands = localised_parser language lexbuf in
|
2024-09-11 15:46:32 +03:00
|
|
|
let program =
|
|
|
|
expand_includes ?resolve_included_file source_file_name commands
|
|
|
|
in
|
2021-05-15 02:16:08 +03:00
|
|
|
{
|
2023-11-20 18:01:06 +03:00
|
|
|
program with
|
2021-05-15 02:16:08 +03:00
|
|
|
program_source_files = source_file_name :: program.Ast.program_source_files;
|
2023-09-22 18:50:19 +03:00
|
|
|
program_lang = language;
|
2021-05-15 02:16:08 +03:00
|
|
|
}
|
2020-12-11 23:17:01 +03:00
|
|
|
|
2022-03-08 17:03:14 +03:00
|
|
|
(** Expands the include directives in a parsing result, thus parsing new source
|
|
|
|
files *)
|
2024-09-11 15:46:32 +03:00
|
|
|
and expand_includes
|
|
|
|
?(resolve_included_file = fun path -> Catala_utils.Global.FileName path)
|
|
|
|
(source_file : string)
|
|
|
|
(commands : Ast.law_structure list) : Ast.program =
|
2023-09-26 12:42:46 +03:00
|
|
|
let language = Cli.file_lang source_file in
|
2023-09-19 12:44:18 +03:00
|
|
|
let rprg =
|
|
|
|
List.fold_left
|
|
|
|
(fun acc command ->
|
2023-10-13 17:13:02 +03:00
|
|
|
let join_module_names name_opt =
|
2024-05-27 12:26:14 +03:00
|
|
|
match acc.Ast.program_module, name_opt with
|
2023-10-13 17:13:02 +03:00
|
|
|
| opt, None | None, opt -> opt
|
|
|
|
| Some id1, Some id2 ->
|
2024-07-30 16:20:51 +03:00
|
|
|
Message.error ~kind:Parsing
|
2024-05-27 12:26:14 +03:00
|
|
|
~extra_pos:
|
|
|
|
["", Mark.get id1.module_name; "", Mark.get id2.module_name]
|
2023-10-13 17:13:02 +03:00
|
|
|
"Multiple definitions of the module name"
|
|
|
|
in
|
|
|
|
match command with
|
2024-05-27 12:26:14 +03:00
|
|
|
| Ast.ModuleDef (id, is_external) ->
|
2023-10-13 17:13:02 +03:00
|
|
|
{
|
|
|
|
acc with
|
2024-05-27 12:26:14 +03:00
|
|
|
Ast.program_module =
|
|
|
|
join_module_names
|
|
|
|
(Some { module_name = id; module_external = is_external });
|
2023-10-13 17:13:02 +03:00
|
|
|
Ast.program_items = command :: acc.Ast.program_items;
|
|
|
|
}
|
2023-11-20 18:01:06 +03:00
|
|
|
| Ast.ModuleUse (mod_use_name, alias) ->
|
|
|
|
let mod_use_alias = Option.value ~default:mod_use_name alias in
|
2023-09-19 19:21:14 +03:00
|
|
|
{
|
|
|
|
acc with
|
2023-12-01 01:53:38 +03:00
|
|
|
Ast.program_used_modules =
|
|
|
|
{ mod_use_name; mod_use_alias } :: acc.Ast.program_used_modules;
|
2023-09-19 19:21:14 +03:00
|
|
|
Ast.program_items = command :: acc.Ast.program_items;
|
|
|
|
}
|
|
|
|
| Ast.LawInclude (Ast.CatalaFile inc_file) ->
|
|
|
|
let source_dir = Filename.dirname source_file in
|
|
|
|
let sub_source = File.(source_dir / Mark.remove inc_file) in
|
2024-07-30 16:20:51 +03:00
|
|
|
let pos = Mark.get inc_file in
|
|
|
|
if File.check_file sub_source = None then
|
|
|
|
Message.delayed_error ~kind:Parsing ~pos acc
|
|
|
|
"Included file '%s' is not a regular file or does not exist."
|
|
|
|
sub_source
|
|
|
|
else
|
2024-09-11 15:46:32 +03:00
|
|
|
let sub_source = resolve_included_file sub_source in
|
|
|
|
with_sedlex_source sub_source
|
2024-07-30 16:20:51 +03:00
|
|
|
@@ fun lexbuf ->
|
2024-09-11 15:46:32 +03:00
|
|
|
let includ_program = parse_source ~resolve_included_file lexbuf in
|
2024-07-30 16:20:51 +03:00
|
|
|
let () =
|
|
|
|
includ_program.Ast.program_module
|
|
|
|
|> Option.iter
|
|
|
|
@@ fun id ->
|
|
|
|
Message.error ~kind:Parsing
|
|
|
|
~extra_pos:
|
|
|
|
[
|
|
|
|
"File include", Mark.get inc_file;
|
|
|
|
"Module declaration", Mark.get id.Ast.module_name;
|
|
|
|
]
|
|
|
|
"A file that declares a module cannot be used through the \
|
|
|
|
raw '@{<yellow>> Include@}'@ directive.@ You should use it \
|
|
|
|
as a module with@ '@{<yellow>> Use @{<blue>%s@}@}'@ \
|
|
|
|
instead."
|
|
|
|
(Mark.remove id.Ast.module_name)
|
|
|
|
in
|
|
|
|
{
|
|
|
|
Ast.program_module = acc.program_module;
|
|
|
|
Ast.program_source_files =
|
|
|
|
List.rev_append includ_program.program_source_files
|
|
|
|
acc.Ast.program_source_files;
|
|
|
|
Ast.program_items =
|
|
|
|
List.rev_append includ_program.program_items
|
|
|
|
acc.Ast.program_items;
|
|
|
|
Ast.program_used_modules =
|
|
|
|
List.rev_append includ_program.program_used_modules
|
|
|
|
acc.Ast.program_used_modules;
|
|
|
|
Ast.program_lang = language;
|
|
|
|
}
|
2023-09-19 19:21:14 +03:00
|
|
|
| Ast.LawHeading (heading, commands') ->
|
|
|
|
let {
|
2024-05-27 12:26:14 +03:00
|
|
|
Ast.program_module;
|
2023-09-19 19:21:14 +03:00
|
|
|
Ast.program_items = commands';
|
|
|
|
Ast.program_source_files = new_sources;
|
2023-11-20 18:01:06 +03:00
|
|
|
Ast.program_used_modules = new_used_modules;
|
2023-09-19 19:21:14 +03:00
|
|
|
Ast.program_lang = _;
|
|
|
|
} =
|
2023-09-26 12:42:46 +03:00
|
|
|
expand_includes source_file commands'
|
2023-09-19 19:21:14 +03:00
|
|
|
in
|
|
|
|
{
|
2024-05-27 12:26:14 +03:00
|
|
|
Ast.program_module = join_module_names program_module;
|
2023-09-19 19:21:14 +03:00
|
|
|
Ast.program_source_files =
|
|
|
|
List.rev_append new_sources acc.Ast.program_source_files;
|
|
|
|
Ast.program_items =
|
|
|
|
Ast.LawHeading (heading, commands') :: acc.Ast.program_items;
|
2023-11-20 18:01:06 +03:00
|
|
|
Ast.program_used_modules =
|
|
|
|
List.rev_append new_used_modules acc.Ast.program_used_modules;
|
2023-09-19 19:21:14 +03:00
|
|
|
Ast.program_lang = language;
|
|
|
|
}
|
|
|
|
| i -> { acc with Ast.program_items = i :: acc.Ast.program_items })
|
2023-09-19 12:44:18 +03:00
|
|
|
{
|
2024-05-27 12:26:14 +03:00
|
|
|
Ast.program_module = None;
|
2023-09-19 12:44:18 +03:00
|
|
|
Ast.program_source_files = [];
|
|
|
|
Ast.program_items = [];
|
2023-11-20 18:01:06 +03:00
|
|
|
Ast.program_used_modules = [];
|
2023-09-19 12:44:18 +03:00
|
|
|
Ast.program_lang = language;
|
|
|
|
}
|
|
|
|
commands
|
|
|
|
in
|
|
|
|
{
|
|
|
|
Ast.program_lang = language;
|
2024-05-27 12:26:14 +03:00
|
|
|
Ast.program_module = rprg.Ast.program_module;
|
2023-09-19 12:44:18 +03:00
|
|
|
Ast.program_source_files = List.rev rprg.Ast.program_source_files;
|
|
|
|
Ast.program_items = List.rev rprg.Ast.program_items;
|
2023-11-20 18:01:06 +03:00
|
|
|
Ast.program_used_modules = List.rev rprg.Ast.program_used_modules;
|
2023-09-19 12:44:18 +03:00
|
|
|
}
|
|
|
|
|
2023-05-11 18:39:38 +03:00
|
|
|
(** {2 Handling interfaces} *)
|
2023-04-19 19:26:50 +03:00
|
|
|
|
2023-05-11 18:39:38 +03:00
|
|
|
let get_interface program =
|
2023-09-19 12:44:18 +03:00
|
|
|
let rec filter (req, acc) = function
|
2023-09-19 19:21:14 +03:00
|
|
|
| Ast.LawInclude _ | Ast.LawText _ | Ast.ModuleDef _ -> req, acc
|
2023-09-19 12:44:18 +03:00
|
|
|
| Ast.LawHeading (_, str) -> List.fold_left filter (req, acc) str
|
2023-11-20 18:01:06 +03:00
|
|
|
| Ast.ModuleUse (mod_use_name, alias) ->
|
2023-12-01 01:53:38 +03:00
|
|
|
( {
|
|
|
|
Ast.mod_use_name;
|
|
|
|
mod_use_alias = Option.value ~default:mod_use_name alias;
|
|
|
|
}
|
|
|
|
:: req,
|
|
|
|
acc )
|
2023-04-19 19:26:50 +03:00
|
|
|
| Ast.CodeBlock (code, _, true) ->
|
2023-09-19 19:21:14 +03:00
|
|
|
( req,
|
|
|
|
List.fold_left
|
|
|
|
(fun acc -> function
|
|
|
|
| Ast.ScopeUse _, _ -> acc
|
|
|
|
| ((Ast.ScopeDecl _ | StructDecl _ | EnumDecl _), _) as e ->
|
|
|
|
e :: acc
|
|
|
|
| Ast.Topdef def, m ->
|
|
|
|
(Ast.Topdef { def with topdef_expr = None }, m) :: acc)
|
|
|
|
acc code )
|
2023-04-19 19:26:50 +03:00
|
|
|
| Ast.CodeBlock (_, _, false) ->
|
|
|
|
(* Non-metadata blocks are ignored *)
|
2023-09-19 12:44:18 +03:00
|
|
|
req, acc
|
2023-04-19 19:26:50 +03:00
|
|
|
in
|
2024-08-06 13:41:57 +03:00
|
|
|
let req, acc = List.fold_left filter ([], []) program.Ast.program_items in
|
|
|
|
List.rev req, List.rev acc
|
2023-05-11 18:39:38 +03:00
|
|
|
|
|
|
|
(** {1 API} *)
|
|
|
|
|
2023-12-01 01:49:19 +03:00
|
|
|
let check_modname program source_file =
|
2024-05-27 12:26:14 +03:00
|
|
|
match program.Ast.program_module, source_file with
|
|
|
|
| ( Some { module_name = mname, pos; _ },
|
2024-03-15 16:23:30 +03:00
|
|
|
(Global.FileName file | Global.Contents (_, file) | Global.Stdin file) )
|
2023-12-01 01:49:19 +03:00
|
|
|
when not File.(equal mname Filename.(remove_extension (basename file))) ->
|
2024-07-30 16:20:51 +03:00
|
|
|
Message.error ~kind:Parsing ~pos
|
2024-04-10 18:33:19 +03:00
|
|
|
"Module declared as@ @{<blue>%s@},@ which@ does@ not@ match@ the@ file@ \
|
|
|
|
name@ %a.@ Rename the module to@ @{<blue>%s@}@ or@ the@ file@ to@ %a."
|
2023-12-01 01:53:38 +03:00
|
|
|
mname File.format file
|
2023-12-05 17:58:53 +03:00
|
|
|
(String.capitalize_ascii Filename.(remove_extension (basename file)))
|
|
|
|
File.format
|
|
|
|
File.((dirname file / mname) ^ Filename.extension file)
|
2023-12-01 01:49:19 +03:00
|
|
|
| _ -> ()
|
|
|
|
|
2024-03-13 19:51:12 +03:00
|
|
|
let load_interface ?default_module_name source_file =
|
2023-09-27 14:06:30 +03:00
|
|
|
let program = with_sedlex_source source_file parse_source in
|
2023-12-01 01:49:19 +03:00
|
|
|
check_modname program source_file;
|
2023-09-19 12:44:18 +03:00
|
|
|
let modname =
|
2024-05-27 12:26:14 +03:00
|
|
|
match program.Ast.program_module, default_module_name with
|
2024-03-13 19:51:12 +03:00
|
|
|
| Some mname, _ -> mname
|
2024-03-19 17:23:06 +03:00
|
|
|
| None, Some n ->
|
2024-05-27 12:26:14 +03:00
|
|
|
{
|
|
|
|
module_name =
|
|
|
|
n, Pos.from_info (Global.input_src_file source_file) 0 0 0 0;
|
|
|
|
module_external = false;
|
|
|
|
}
|
2024-03-13 19:51:12 +03:00
|
|
|
| None, None ->
|
2024-07-30 16:20:51 +03:00
|
|
|
Message.error ~kind:Parsing
|
2023-09-26 12:42:46 +03:00
|
|
|
"%a doesn't define a module name. It should contain a '@{<cyan>> \
|
2023-09-19 19:21:14 +03:00
|
|
|
Module %s@}' directive."
|
2023-09-26 12:42:46 +03:00
|
|
|
File.format
|
2024-03-19 17:16:08 +03:00
|
|
|
(Global.input_src_file source_file)
|
2023-09-19 12:44:18 +03:00
|
|
|
(match source_file with
|
2023-12-01 01:53:38 +03:00
|
|
|
| FileName s ->
|
|
|
|
String.capitalize_ascii Filename.(basename (remove_extension s))
|
|
|
|
| _ -> "Module_name")
|
2023-09-19 12:44:18 +03:00
|
|
|
in
|
|
|
|
let used_modules, intf = get_interface program in
|
2023-12-01 01:53:38 +03:00
|
|
|
{
|
|
|
|
Ast.intf_modname = modname;
|
2023-11-20 18:01:06 +03:00
|
|
|
Ast.intf_code = intf;
|
2023-12-01 01:53:38 +03:00
|
|
|
Ast.intf_submodules = used_modules;
|
|
|
|
}
|
2023-09-05 16:00:55 +03:00
|
|
|
|
2024-09-11 15:46:32 +03:00
|
|
|
let parse_top_level_file
|
|
|
|
?resolve_included_file
|
|
|
|
(source_file : File.t Global.input_src) : Ast.program =
|
2024-07-30 16:20:51 +03:00
|
|
|
Message.with_delayed_errors
|
|
|
|
@@ fun () ->
|
2024-09-11 15:46:32 +03:00
|
|
|
let program =
|
|
|
|
with_sedlex_source source_file (parse_source ?resolve_included_file)
|
|
|
|
in
|
2023-12-01 01:49:19 +03:00
|
|
|
check_modname program source_file;
|
2022-03-08 17:03:14 +03:00
|
|
|
{
|
|
|
|
program with
|
|
|
|
Ast.program_items = law_struct_list_to_tree program.Ast.program_items;
|
|
|
|
}
|