mirror of
https://github.com/CatalaLang/catala.git
synced 2024-11-08 07:51:43 +03:00
Add non verbose lexing
This commit is contained in:
parent
15c1df9fa7
commit
84835b0a79
@ -21,6 +21,7 @@ depends: [
|
||||
"cmdliner" {>= "1.0.4"}
|
||||
"re" {>= "1.9.0"}
|
||||
"dune" {build}
|
||||
"ocamlgraph" {>= "1.8.8"}
|
||||
]
|
||||
build: [
|
||||
["dune" "subst"] {pinned}
|
||||
|
@ -27,6 +27,7 @@
|
||||
(cmdliner (>= 1.0.4))
|
||||
(re (>= 1.9.0))
|
||||
(dune (and :build ))
|
||||
(ocamlgraph (>= 1.8.8))
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -45,9 +45,14 @@ let language =
|
||||
Arg.(
|
||||
value
|
||||
& opt (some string) None
|
||||
& info [ "l"; "language" ] ~docv:"LANG" ~doc:"Input language among: en, fr (default fr)")
|
||||
& info [ "l"; "language" ] ~docv:"LANG" ~doc:"Input language among: en, fr, nv (default nv)")
|
||||
|
||||
type language_option = Fr | En
|
||||
type language_option = [ `Fr | `En | `NonVerbose ]
|
||||
|
||||
type reduced_lang_option = [ `Fr | `En ]
|
||||
|
||||
let reduce_lang (lang : language_option) : reduced_lang_option =
|
||||
match lang with `En | `NonVerbose -> `En | `Fr -> `Fr
|
||||
|
||||
let output =
|
||||
Arg.(
|
||||
|
@ -25,13 +25,13 @@ let driver (source_file : string) (debug : bool) (wrap_weaved_output : bool)
|
||||
let language =
|
||||
match language with
|
||||
| Some l ->
|
||||
if l = "fr" then Cli.Fr
|
||||
else if l = "en" then Cli.En
|
||||
if l = "fr" then `Fr
|
||||
else if l = "en" then `En
|
||||
else begin
|
||||
Cli.error_print (Printf.sprintf "The selected language (%s) is not supported by Catala" l);
|
||||
exit 1
|
||||
end
|
||||
| None -> Cli.Fr
|
||||
| None -> `NonVerbose
|
||||
in
|
||||
let backend =
|
||||
if backend = "Makefile" then Cli.Makefile
|
||||
@ -68,8 +68,8 @@ let driver (source_file : string) (debug : bool) (wrap_weaved_output : bool)
|
||||
try
|
||||
let weaved_output =
|
||||
match backend with
|
||||
| Cli.Latex -> Latex.ast_to_latex program language
|
||||
| Cli.Html -> Html.ast_to_html program pygmentize_loc language
|
||||
| Cli.Latex -> Latex.ast_to_latex program (Cli.reduce_lang language)
|
||||
| Cli.Html -> Html.ast_to_html program pygmentize_loc (Cli.reduce_lang language)
|
||||
| _ -> assert false
|
||||
in
|
||||
let output_file =
|
||||
|
@ -24,6 +24,7 @@ let pre_html (s : string) = s
|
||||
|
||||
let wrap_html (code : string) (source_files : string list) (custom_pygments : string option)
|
||||
(language : Cli.language_option) : string =
|
||||
let language = C.reduce_lang language in
|
||||
let pygments = match custom_pygments with Some p -> p | None -> "pygmentize" in
|
||||
let css_file = Filename.temp_file "catala_css_pygments" "" in
|
||||
let pygments_args = [| "-f"; "html"; "-S"; "colorful"; "-a"; ".catala-code" |] in
|
||||
@ -56,15 +57,15 @@ let wrap_html (code : string) (source_files : string list) (custom_pygments : st
|
||||
%s"
|
||||
css_as_string
|
||||
( match language with
|
||||
| C.Fr -> "Implémentation de texte législatif"
|
||||
| C.En -> "Legislative text implementation" )
|
||||
(match language with C.Fr -> "Document généré par" | C.En -> "Document generated by")
|
||||
| `Fr -> "Implémentation de texte législatif"
|
||||
| `En -> "Legislative text implementation" )
|
||||
(match language with `Fr -> "Document généré par" | `En -> "Document generated by")
|
||||
( match Build_info.V1.version () with
|
||||
| None -> "n/a"
|
||||
| Some v -> Build_info.V1.Version.to_string v )
|
||||
( match language with
|
||||
| C.Fr -> "Fichiers sources tissés dans ce document"
|
||||
| C.En -> "Source files weaved in this document" )
|
||||
| `Fr -> "Fichiers sources tissés dans ce document"
|
||||
| `En -> "Source files weaved in this document" )
|
||||
(String.concat "\n"
|
||||
(List.map
|
||||
(fun filename ->
|
||||
@ -76,14 +77,12 @@ let wrap_html (code : string) (source_files : string list) (custom_pygments : st
|
||||
in
|
||||
Printf.sprintf "<li><tt>%s</tt>, %s %s</li>"
|
||||
(pre_html (Filename.basename filename))
|
||||
( match language with
|
||||
| C.Fr -> "dernière modification le"
|
||||
| C.En -> "last modification" )
|
||||
(match language with `Fr -> "dernière modification le" | `En -> "last modification")
|
||||
ftime)
|
||||
source_files))
|
||||
code
|
||||
|
||||
let pygmentize_code (c : string Pos.marked) (language : C.language_option)
|
||||
let pygmentize_code (c : string Pos.marked) (language : C.reduced_lang_option)
|
||||
(custom_pygments : string option) : string =
|
||||
C.debug_print (Printf.sprintf "Pygmenting the code chunk %s" (Pos.to_string (Pos.get_position c)));
|
||||
let temp_file_in = Filename.temp_file "catala_html_pygments" "in" in
|
||||
@ -92,7 +91,7 @@ let pygmentize_code (c : string Pos.marked) (language : C.language_option)
|
||||
Printf.fprintf oc "%s" (Pos.unmark c);
|
||||
close_out oc;
|
||||
let pygments = match custom_pygments with Some p -> p | None -> "pygmentize" in
|
||||
let pygments_lexer = match language with C.Fr -> "catala_fr" | C.En -> "catala_en" in
|
||||
let pygments_lexer = match language with `Fr -> "catala_fr" | `En -> "catala_en" in
|
||||
let pygments_args =
|
||||
[|
|
||||
"-l";
|
||||
@ -122,7 +121,7 @@ let pygmentize_code (c : string Pos.marked) (language : C.language_option)
|
||||
type program_state = InsideArticle | OutsideArticle
|
||||
|
||||
let program_item_to_html (i : A.program_item) (custom_pygments : string option)
|
||||
(language : C.language_option) (state : program_state) : string * program_state =
|
||||
(language : C.reduced_lang_option) (state : program_state) : string * program_state =
|
||||
let closing_div =
|
||||
(* First we terminate the div of the previous article if need be *)
|
||||
match (i, state) with
|
||||
@ -146,7 +145,7 @@ let program_item_to_html (i : A.program_item) (custom_pygments : string option)
|
||||
P.sprintf
|
||||
"<div class='article-container'>\n\n<div class='article-title'><a href='%s'>%s</a></div>"
|
||||
( match (a.law_article_id, language) with
|
||||
| Some id, C.Fr ->
|
||||
| Some id, `Fr ->
|
||||
let ltime = Unix.localtime (Unix.time ()) in
|
||||
P.sprintf "https://beta.legifrance.gouv.fr/codes/id/%s/%d-%02d-%02d" id
|
||||
(1900 + ltime.Unix.tm_year) (ltime.Unix.tm_mon + 1) ltime.Unix.tm_mday
|
||||
@ -179,7 +178,7 @@ let program_item_to_html (i : A.program_item) (custom_pygments : string option)
|
||||
(closing_div ^ item_string, new_state)
|
||||
|
||||
let ast_to_html (program : A.program) (custom_pygments : string option)
|
||||
(language : C.language_option) : string =
|
||||
(language : C.reduced_lang_option) : string =
|
||||
let i_s, _ =
|
||||
List.fold_left
|
||||
(fun (acc, state) i ->
|
||||
|
@ -33,6 +33,7 @@ let pre_latexify (s : string) =
|
||||
|
||||
let wrap_latex (code : string) (source_files : string list) (custom_pygments : string option)
|
||||
(language : Cli.language_option) =
|
||||
let language = C.reduce_lang language in
|
||||
Printf.sprintf
|
||||
"\\documentclass[11pt, a4paper]{article}\n\n\
|
||||
\\usepackage[T1]{fontenc}\n\
|
||||
@ -76,20 +77,20 @@ let wrap_latex (code : string) (source_files : string list) (custom_pygments : s
|
||||
\\[\\star\\star\\star\\]\\\\\n\
|
||||
%s\n\n\
|
||||
\\end{document}"
|
||||
(match language with C.Fr -> "french" | C.En -> "english")
|
||||
(match language with `Fr -> "french" | `En -> "english")
|
||||
( match custom_pygments with
|
||||
| None -> ""
|
||||
| Some p -> Printf.sprintf "\\renewcommand{\\MintedPygmentize}{%s}" p )
|
||||
( match language with
|
||||
| C.Fr -> "Implémentation de texte législatif"
|
||||
| C.En -> "Legislative text implementation" )
|
||||
(match language with C.Fr -> "Document généré par" | C.En -> "Document generated by")
|
||||
| `Fr -> "Implémentation de texte législatif"
|
||||
| `En -> "Legislative text implementation" )
|
||||
(match language with `Fr -> "Document généré par" | `En -> "Document generated by")
|
||||
( match Build_info.V1.version () with
|
||||
| None -> "n/a"
|
||||
| Some v -> Build_info.V1.Version.to_string v )
|
||||
( match language with
|
||||
| C.Fr -> "Fichiers sources tissés dans ce document"
|
||||
| C.En -> "Source files weaved in this document" )
|
||||
| `Fr -> "Fichiers sources tissés dans ce document"
|
||||
| `En -> "Source files weaved in this document" )
|
||||
(String.concat ","
|
||||
(List.map
|
||||
(fun filename ->
|
||||
@ -101,9 +102,7 @@ let wrap_latex (code : string) (source_files : string list) (custom_pygments : s
|
||||
in
|
||||
Printf.sprintf "\\item\\texttt{%s}, %s %s"
|
||||
(pre_latexify (Filename.basename filename))
|
||||
( match language with
|
||||
| C.Fr -> "dernière modification le"
|
||||
| C.En -> "last modification" )
|
||||
(match language with `Fr -> "dernière modification le" | `En -> "last modification")
|
||||
ftime)
|
||||
source_files))
|
||||
code
|
||||
@ -123,7 +122,7 @@ let math_syms_replace (c : string) : string =
|
||||
in
|
||||
R.substitute ~rex:syms ~subst:syms2cmd c
|
||||
|
||||
let program_item_to_latex (i : A.program_item) (language : C.language_option) : string =
|
||||
let program_item_to_latex (i : A.program_item) (language : C.reduced_lang_option) : string =
|
||||
match i with
|
||||
| A.LawHeading (title, precedence) ->
|
||||
P.sprintf "\\%ssection*{%s}"
|
||||
@ -138,10 +137,10 @@ let program_item_to_latex (i : A.program_item) (language : C.language_option) :
|
||||
\\end{minted}"
|
||||
(pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
|
||||
(Pos.get_start_line (Pos.get_position c))
|
||||
(match language with C.Fr -> "catala_fr" | C.En -> "catala_en")
|
||||
(match language with `Fr -> "catala_fr" | `En -> "catala_en")
|
||||
(math_syms_replace (Pos.unmark c))
|
||||
| A.MetadataBlock (_, c) ->
|
||||
let metadata_title = match language with C.Fr -> "Métadonnées" | C.En -> "Metadata" in
|
||||
let metadata_title = match language with `Fr -> "Métadonnées" | `En -> "Metadata" in
|
||||
P.sprintf
|
||||
"\\begin{tcolorbox}[colframe=OliveGreen, breakable, \
|
||||
title=\\textcolor{black}{\\texttt{%s}},title after \
|
||||
@ -153,7 +152,7 @@ let program_item_to_latex (i : A.program_item) (language : C.language_option) :
|
||||
metadata_title metadata_title
|
||||
(Pos.get_start_line (Pos.get_position c))
|
||||
(pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
|
||||
(match language with C.Fr -> "catala_fr" | C.En -> "catala_en")
|
||||
(match language with `Fr -> "catala_fr" | `En -> "catala_en")
|
||||
(math_syms_replace (Pos.unmark c))
|
||||
| A.LawInclude (A.PdfFile ((file, _), page)) ->
|
||||
let label = file ^ match page with None -> "" | Some p -> P.sprintf "_page_%d," p in
|
||||
@ -165,5 +164,5 @@ let program_item_to_latex (i : A.program_item) (language : C.language_option) :
|
||||
file label
|
||||
| A.LawInclude (A.CatalaFile _ | A.LegislativeText _) -> ""
|
||||
|
||||
let ast_to_latex (program : A.program) (language : C.language_option) : string =
|
||||
let ast_to_latex (program : A.program) (language : C.reduced_lang_option) : string =
|
||||
String.concat "\n\n" (List.map (fun i -> program_item_to_latex i language) program.program_items)
|
||||
|
397
src/catala/parsing/lexer.ml
Normal file
397
src/catala/parsing/lexer.ml
Normal file
@ -0,0 +1,397 @@
|
||||
(* This file is part of the Catala compiler, a specification language for tax and social benefits
|
||||
computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
|
||||
<denis.merigoux@inria.fr>
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
in compliance with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
or implied. See the License for the specific language governing permissions and limitations under
|
||||
the License. *)
|
||||
|
||||
open Parser
|
||||
open Sedlexing
|
||||
module R = Re.Pcre
|
||||
|
||||
let is_code : bool ref = ref false
|
||||
|
||||
let code_string_acc : string ref = ref ""
|
||||
|
||||
let update_acc (lexbuf : lexbuf) : unit = code_string_acc := !code_string_acc ^ Utf8.lexeme lexbuf
|
||||
|
||||
let token_list : (string * token) list =
|
||||
[
|
||||
("scope", SCOPE);
|
||||
("~", CONSEQUENCE);
|
||||
("data", DATA);
|
||||
("depends on", DEPENDS);
|
||||
("declaration", DECLARATION);
|
||||
("context", CONTEXT);
|
||||
("decreasing", DECREASING);
|
||||
("increasing", INCREASING);
|
||||
("of", OF);
|
||||
("collection", COLLECTION);
|
||||
("enumeration", ENUM);
|
||||
("integer", INTEGER);
|
||||
("amount", MONEY);
|
||||
("text", TEXT);
|
||||
("decimal", DECIMAL);
|
||||
("date", DATE);
|
||||
("boolean", BOOLEAN);
|
||||
("sum", SUM);
|
||||
("fulfilled", FILLED);
|
||||
("def", DEFINITION);
|
||||
("equals", DEFINED_AS);
|
||||
("match", MATCH);
|
||||
("with pattern", WITH);
|
||||
("?", UNDER_CONDITION);
|
||||
("if", IF);
|
||||
("then", THEN);
|
||||
("else", ELSE);
|
||||
("content", CONTENT);
|
||||
("structure", STRUCT);
|
||||
("optional", OPTIONAL);
|
||||
("assertion", ASSERTION);
|
||||
("varies", VARIES);
|
||||
("with", WITH_V);
|
||||
("for", FOR);
|
||||
("all", ALL);
|
||||
("we have", WE_HAVE);
|
||||
("fixed", FIXED);
|
||||
("by", BY);
|
||||
("rule", RULE);
|
||||
("exists", EXISTS);
|
||||
("such", SUCH);
|
||||
("that", THAT);
|
||||
("now", NOW);
|
||||
("and", AND);
|
||||
("or", OR);
|
||||
("not", NOT);
|
||||
("number", CARDINAL);
|
||||
("year", YEAR);
|
||||
("true", TRUE);
|
||||
("false", FALSE);
|
||||
]
|
||||
@ Lexer_fr.token_list_language_agnostic
|
||||
|
||||
let rec lex_code (lexbuf : lexbuf) : token =
|
||||
match%sedlex lexbuf with
|
||||
| white_space ->
|
||||
(* Whitespaces *)
|
||||
update_acc lexbuf;
|
||||
lex_code lexbuf
|
||||
| '#', Star (Compl '\n'), '\n' ->
|
||||
(* Comments *)
|
||||
update_acc lexbuf;
|
||||
lex_code lexbuf
|
||||
| "*/" ->
|
||||
(* End of code section *)
|
||||
is_code := false;
|
||||
END_CODE !code_string_acc
|
||||
| "scope" ->
|
||||
update_acc lexbuf;
|
||||
SCOPE
|
||||
| "data" ->
|
||||
update_acc lexbuf;
|
||||
DATA
|
||||
| "depends on" ->
|
||||
update_acc lexbuf;
|
||||
DEPENDS
|
||||
| "declaration" ->
|
||||
update_acc lexbuf;
|
||||
DECLARATION
|
||||
| "context" ->
|
||||
update_acc lexbuf;
|
||||
CONTEXT
|
||||
| "decreasing" ->
|
||||
update_acc lexbuf;
|
||||
DECREASING
|
||||
| "increasing" ->
|
||||
update_acc lexbuf;
|
||||
INCREASING
|
||||
| "of" ->
|
||||
update_acc lexbuf;
|
||||
OF
|
||||
| "collection" ->
|
||||
update_acc lexbuf;
|
||||
COLLECTION
|
||||
| "enumeration" ->
|
||||
update_acc lexbuf;
|
||||
ENUM
|
||||
| "int" ->
|
||||
update_acc lexbuf;
|
||||
INTEGER
|
||||
| "amount" ->
|
||||
update_acc lexbuf;
|
||||
MONEY
|
||||
| "text" ->
|
||||
update_acc lexbuf;
|
||||
TEXT
|
||||
| "dec" ->
|
||||
update_acc lexbuf;
|
||||
DECIMAL
|
||||
| "date" ->
|
||||
update_acc lexbuf;
|
||||
DATE
|
||||
| "bool" ->
|
||||
update_acc lexbuf;
|
||||
BOOLEAN
|
||||
| "sum" ->
|
||||
update_acc lexbuf;
|
||||
SUM
|
||||
| "fulfilled" ->
|
||||
update_acc lexbuf;
|
||||
FILLED
|
||||
| "def" ->
|
||||
update_acc lexbuf;
|
||||
DEFINITION
|
||||
| "=" ->
|
||||
update_acc lexbuf;
|
||||
DEFINED_AS
|
||||
| "match" ->
|
||||
update_acc lexbuf;
|
||||
MATCH
|
||||
| "with pattern" ->
|
||||
update_acc lexbuf;
|
||||
WITH
|
||||
| "?" ->
|
||||
update_acc lexbuf;
|
||||
UNDER_CONDITION
|
||||
| "if" ->
|
||||
update_acc lexbuf;
|
||||
IF
|
||||
| "~" ->
|
||||
update_acc lexbuf;
|
||||
CONSEQUENCE
|
||||
| "then" ->
|
||||
update_acc lexbuf;
|
||||
THEN
|
||||
| "else" ->
|
||||
update_acc lexbuf;
|
||||
ELSE
|
||||
| "condition" ->
|
||||
update_acc lexbuf;
|
||||
CONDITION
|
||||
| "content" ->
|
||||
update_acc lexbuf;
|
||||
CONTENT
|
||||
| "structure" ->
|
||||
update_acc lexbuf;
|
||||
STRUCT
|
||||
| "optional" ->
|
||||
update_acc lexbuf;
|
||||
OPTIONAL
|
||||
| "assertion" ->
|
||||
update_acc lexbuf;
|
||||
ASSERTION
|
||||
| "varies" ->
|
||||
update_acc lexbuf;
|
||||
VARIES
|
||||
| "with" ->
|
||||
update_acc lexbuf;
|
||||
WITH_V
|
||||
| "for" ->
|
||||
update_acc lexbuf;
|
||||
FOR
|
||||
| "all" ->
|
||||
update_acc lexbuf;
|
||||
ALL
|
||||
| "we have" ->
|
||||
update_acc lexbuf;
|
||||
WE_HAVE
|
||||
| "fixed" ->
|
||||
update_acc lexbuf;
|
||||
FIXED
|
||||
| "by" ->
|
||||
update_acc lexbuf;
|
||||
BY
|
||||
| "rule" ->
|
||||
(* 0xE8 is è *)
|
||||
update_acc lexbuf;
|
||||
RULE
|
||||
| "exists" ->
|
||||
update_acc lexbuf;
|
||||
EXISTS
|
||||
| "in" ->
|
||||
update_acc lexbuf;
|
||||
IN
|
||||
| "such" ->
|
||||
update_acc lexbuf;
|
||||
SUCH
|
||||
| "that" ->
|
||||
update_acc lexbuf;
|
||||
THAT
|
||||
| "now" ->
|
||||
update_acc lexbuf;
|
||||
NOW
|
||||
| "and" ->
|
||||
update_acc lexbuf;
|
||||
AND
|
||||
| "or" ->
|
||||
update_acc lexbuf;
|
||||
OR
|
||||
| "not" ->
|
||||
update_acc lexbuf;
|
||||
NOT
|
||||
| "number" ->
|
||||
update_acc lexbuf;
|
||||
CARDINAL
|
||||
| "true" ->
|
||||
update_acc lexbuf;
|
||||
TRUE
|
||||
| "false" ->
|
||||
update_acc lexbuf;
|
||||
FALSE
|
||||
| "year" ->
|
||||
update_acc lexbuf;
|
||||
YEAR
|
||||
| 0x24, Star white_space, '0' .. '9', Star ('0' .. '9' | ','), Opt ('.', Rep ('0' .. '9', 0 .. 2))
|
||||
->
|
||||
let extract_parts = R.regexp "([0-9]([0-9,]*[0-9]|))(.([0-9]{0,2})|)" in
|
||||
let full_str = Utf8.lexeme lexbuf in
|
||||
let only_numbers_str = String.trim (String.sub full_str 1 (String.length full_str - 1)) in
|
||||
let parts = R.get_substring (R.exec ~rex:extract_parts only_numbers_str) in
|
||||
(* Integer literal*)
|
||||
let units = parts 1 in
|
||||
let remove_commas = R.regexp "," in
|
||||
let units = int_of_string (R.substitute ~rex:remove_commas ~subst:(fun _ -> "") units) in
|
||||
let cents = try int_of_string (parts 4) with Not_found -> 0 in
|
||||
update_acc lexbuf;
|
||||
MONEY_AMOUNT (units, cents)
|
||||
| Plus '0' .. '9', '.', Star '0' .. '9' ->
|
||||
let extract_code_title = R.regexp "([0-9]+)\\.([0-9]*)" in
|
||||
let dec_parts = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
|
||||
(* Integer literal*)
|
||||
update_acc lexbuf;
|
||||
DECIMAL_LITERAL (int_of_string (dec_parts 1), int_of_string (dec_parts 2))
|
||||
| "->" ->
|
||||
update_acc lexbuf;
|
||||
ARROW
|
||||
| '.' ->
|
||||
update_acc lexbuf;
|
||||
DOT
|
||||
| "<=" ->
|
||||
update_acc lexbuf;
|
||||
LESSER_EQUAL
|
||||
| '<' ->
|
||||
update_acc lexbuf;
|
||||
LESSER
|
||||
| ">=" ->
|
||||
update_acc lexbuf;
|
||||
GREATER_EQUAL
|
||||
| '>' ->
|
||||
update_acc lexbuf;
|
||||
GREATER
|
||||
| "!=" ->
|
||||
update_acc lexbuf;
|
||||
NOT_EQUAL
|
||||
| '=' ->
|
||||
update_acc lexbuf;
|
||||
EQUAL
|
||||
| '(' ->
|
||||
update_acc lexbuf;
|
||||
LPAREN
|
||||
| ')' ->
|
||||
update_acc lexbuf;
|
||||
RPAREN
|
||||
| '+' ->
|
||||
update_acc lexbuf;
|
||||
PLUS
|
||||
| '-' ->
|
||||
update_acc lexbuf;
|
||||
MINUS
|
||||
| '*' ->
|
||||
update_acc lexbuf;
|
||||
MULT
|
||||
| '%' ->
|
||||
update_acc lexbuf;
|
||||
PERCENT
|
||||
| '/' ->
|
||||
update_acc lexbuf;
|
||||
DIV
|
||||
| '|' ->
|
||||
update_acc lexbuf;
|
||||
VERTICAL
|
||||
| ':' ->
|
||||
update_acc lexbuf;
|
||||
COLON
|
||||
| "--" ->
|
||||
update_acc lexbuf;
|
||||
ALT
|
||||
| uppercase, Star (uppercase | lowercase | '0' .. '9' | '_' | '\'') ->
|
||||
(* Name of constructor *)
|
||||
update_acc lexbuf;
|
||||
CONSTRUCTOR (Utf8.lexeme lexbuf)
|
||||
| lowercase, Star (lowercase | uppercase | '0' .. '9' | '_' | '\'') ->
|
||||
(* Name of variable *)
|
||||
update_acc lexbuf;
|
||||
IDENT (Utf8.lexeme lexbuf)
|
||||
| Plus '0' .. '9' ->
|
||||
(* Integer literal*)
|
||||
update_acc lexbuf;
|
||||
INT_LITERAL (int_of_string (Utf8.lexeme lexbuf))
|
||||
| _ -> Errors.lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf)
|
||||
|
||||
let rec lex_law (lexbuf : lexbuf) : token =
|
||||
match%sedlex lexbuf with
|
||||
| '\n' -> lex_law lexbuf
|
||||
| "/*" ->
|
||||
is_code := true;
|
||||
code_string_acc := "";
|
||||
BEGIN_CODE
|
||||
| eof -> EOF
|
||||
| "@@", Star white_space, "Master file", Star white_space, "@@" -> MASTER_FILE
|
||||
| "@@", Star white_space, "Begin metadata", Star white_space, "@@" -> BEGIN_METADATA
|
||||
| "@@", Star white_space, "End metadata", Star white_space, "@@" -> END_METADATA
|
||||
| ( "@@",
|
||||
Star white_space,
|
||||
"Include:",
|
||||
Star white_space,
|
||||
Plus (Compl '@'),
|
||||
Star white_space,
|
||||
Opt ('@', Star white_space, "p.", Star white_space, Plus '0' .. '9', Star white_space),
|
||||
"@@" ) ->
|
||||
let extract_components =
|
||||
R.regexp "@@\\s*Include\\:\\s*([^@]+)\\s*(@\\s*p\\.\\s*([0-9]+)|)@@"
|
||||
in
|
||||
let get_component = R.get_substring (R.exec ~rex:extract_components (Utf8.lexeme lexbuf)) in
|
||||
let name = get_component 1 in
|
||||
let pages = try Some (int_of_string (get_component 3)) with Not_found -> None in
|
||||
let pos = lexing_positions lexbuf in
|
||||
if Filename.extension name = ".pdf" then LAW_INCLUDE (Ast.PdfFile ((name, pos), pages))
|
||||
else if Filename.extension name = ".catala" then LAW_INCLUDE (Ast.CatalaFile (name, pos))
|
||||
else Errors.lexer_error (lexing_positions lexbuf) "this type of file cannot be included"
|
||||
| "@@", Plus (Compl '@'), "@@", Star '+' ->
|
||||
let extract_code_title = R.regexp "@@([^@]+)@@([\\+]*)" in
|
||||
let get_match = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
|
||||
let get_new_lines = R.regexp "\n" in
|
||||
let new_lines_count =
|
||||
try Array.length (R.extract ~rex:get_new_lines (Utf8.lexeme lexbuf)) with Not_found -> 0
|
||||
in
|
||||
for _i = 1 to new_lines_count do
|
||||
new_line lexbuf
|
||||
done;
|
||||
let law_title = get_match 1 in
|
||||
let precedence = String.length (get_match 2) in
|
||||
|
||||
LAW_HEADING (law_title, precedence)
|
||||
| "@", Plus (Compl '@'), "@" ->
|
||||
let extract_article_title = R.regexp "@([^@]+)@" in
|
||||
let title = R.get_substring (R.exec ~rex:extract_article_title (Utf8.lexeme lexbuf)) 1 in
|
||||
let get_new_lines = R.regexp "\n" in
|
||||
let new_lines_count =
|
||||
try Array.length (R.extract ~rex:get_new_lines (Utf8.lexeme lexbuf)) with Not_found -> 0
|
||||
in
|
||||
(* the -1 is here to compensate for Sedlex's automatic newline detection around token *)
|
||||
for _i = 1 to new_lines_count - 1 do
|
||||
new_line lexbuf
|
||||
done;
|
||||
|
||||
LAW_ARTICLE (title, None, None)
|
||||
| Plus (Compl ('@' | '/' | '\n')) -> LAW_TEXT (Utf8.lexeme lexbuf)
|
||||
| _ -> Errors.lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf)
|
||||
|
||||
let lexer lexbuf = if !is_code then lex_code lexbuf else lex_law lexbuf
|
@ -142,10 +142,16 @@ let rec parse_source_files (source_files : string list) (language : Cli.language
|
||||
try
|
||||
Parse_utils.current_file := source_file;
|
||||
let lexer_lang =
|
||||
match language with Cli.Fr -> Lexer_fr.lexer_fr | Cli.En -> Lexer_en.lexer_en
|
||||
match language with
|
||||
| `Fr -> Lexer_fr.lexer_fr
|
||||
| `En -> Lexer_en.lexer_en
|
||||
| `NonVerbose -> Lexer.lexer
|
||||
in
|
||||
let token_list_lang =
|
||||
match language with Cli.Fr -> Lexer_fr.token_list_fr | Cli.En -> Lexer_en.token_list_en
|
||||
match language with
|
||||
| `Fr -> Lexer_fr.token_list_fr
|
||||
| `En -> Lexer_en.token_list_en
|
||||
| `NonVerbose -> Lexer.token_list
|
||||
in
|
||||
let commands_or_includes =
|
||||
sedlex_with_menhir lexer_lang token_list_lang Parser.Incremental.source_file_or_master
|
||||
|
@ -175,7 +175,7 @@ let driver (file : string) (debug : bool) (client_id : string) (client_secret :
|
||||
if debug then Catala.Cli.debug_flag := true;
|
||||
let access_token = Api.get_token client_id client_secret in
|
||||
(* LegiFrance is only supported for French texts *)
|
||||
let program = Catala.Parser_driver.parse_source_files [ file ] Catala.Cli.Fr in
|
||||
let program = Catala.Parser_driver.parse_source_files [ file ] `Fr in
|
||||
let article_text_acc =
|
||||
List.fold_left
|
||||
(fun article_text_acc item ->
|
||||
|
7
tests/non_verbose.catala
Normal file
7
tests/non_verbose.catala
Normal file
@ -0,0 +1,7 @@
|
||||
/*
|
||||
declaration scope A:
|
||||
context x content int
|
||||
|
||||
scope A:
|
||||
def x ? true ~= 42
|
||||
*/
|
Loading…
Reference in New Issue
Block a user