This commit is contained in:
Bartosz Woźniak 2021-05-09 22:55:50 +02:00
parent 37c8f8da64
commit db46b1c444
21 changed files with 709 additions and 30 deletions

View File

@ -124,7 +124,7 @@ general-to-specifics statutes order. Therefore, there exists multiple versions
of the Catala surface syntax, adapted to the language of the legislative text.
Currently, Catala supports English and French legislative text via the
`--language=en` or `--language=fr` option.
`--language=en`, `--language=fr` or `--language=pl` option.
Technically, support for new languages can be added via a new lexer. If you want
to add a new language, you can start from

View File

@ -119,7 +119,8 @@ augmented with the Catala plugin, simply enter
sudo make pygments
This will execute the
script `syntax_highlighting/fr/pygments/set_up_pygments.sh` and
script `syntax_highlighting/fr/pygments/set_up_pygments.sh`,
`syntax_highlighting/pl/pygments/set_up_pygments.sh` and
`syntax_highlighting/en/pygments/set_up_pygments.sh`.
The scripts patch your `pigmentize` executable, used for instance by the `minted` LaTeX package.

View File

@ -58,6 +58,7 @@ install:
SYNTAX_HIGHLIGHTING_FR=${CURDIR}/syntax_highlighting/fr
SYNTAX_HIGHLIGHTING_EN=${CURDIR}/syntax_highlighting/en
SYNTAX_HIGHLIGHTING_PL=${CURDIR}/syntax_highlighting/pl
pygmentize_fr: $(SYNTAX_HIGHLIGHTING_FR)/set_up_pygments.sh
chmod +x $<
@ -67,6 +68,10 @@ pygmentize_en: $(SYNTAX_HIGHLIGHTING_EN)/set_up_pygments.sh
chmod +x $<
sudo $<
pygmentize_pl: $(SYNTAX_HIGHLIGHTING_PL)/set_up_pygments.sh
chmod +x $<
sudo $<
#> pygments : Extends your pygmentize executable with Catala lexers
pygments: pygmentize_fr pygmentize_en
@ -78,6 +83,10 @@ atom_en: ${CURDIR}/syntax_highlighting/en/setup_atom.sh
chmod +x $<
$<
atom_pl: ${CURDIR}/syntax_highlighting/pl/setup_atom.sh
chmod +x $<
$<
atom_nv: ${CURDIR}/syntax_highlighting/nv/setup_atom.sh
chmod +x $<
$<
@ -93,6 +102,10 @@ vscode_en: ${CURDIR}/syntax_highlighting/en/setup_vscode.sh
chmod +x $<
$<
vscode_pl: ${CURDIR}/syntax_highlighting/pl/setup_vscode.sh
chmod +x $<
$<
vscode_nv: ${CURDIR}/syntax_highlighting/nv/setup_vscode.sh
chmod +x $<
$<
@ -222,7 +235,7 @@ clean:
$(MAKE) -C $(CODE_GENERAL_IMPOTS_DIR) clean
inspect:
gitinspector -f ml,mli,mly,iro,tex,catala,catala_en,catala_fr,md,fst,mld --grading
gitinspector -f ml,mli,mly,iro,tex,catala,catala_en,catala_pl,catala_fr,md,fst,mld --grading
##########################################
# Special targets

View File

@ -99,7 +99,7 @@ To build and run the example, create a `Makefile` in `foo/`
with the following contents:
```Makefile
CATALA_LANG=en # or fr if your source code is in French
CATALA_LANG=en # or fr/pl if your source code is in French/Polish
SRC=foo.catala
include ../Makefile.common.mk

14
examples/polish_taxes_pl/.gitignore vendored Normal file
View File

@ -0,0 +1,14 @@
*.aux
*.dvi
*.fdb_latexmk
*.fls
*.log
*.out
*.fls
*.tex
*.pdf
_minted*
*.toc
*.pyg
*.d
*.ml

View File

@ -0,0 +1,4 @@
CATALA_LANG=pl
SRC=polish_tax_code.catala_pl
include ../Makefile.common.mk

View File

@ -0,0 +1,24 @@
## Rozdzial 3
## Art. 7. 1. Stawki podatku wynoszą:
### 1) od umowy sprzedaży:
#### a) nieruchomości, rzeczy ruchomych, prawa użytkowania wieczystego, własnościowego spółdzielczego prawa do lokalu mieszkalnego, spółdzielczego prawa do lokalu użytkowego oraz wynikających z przepisów prawa spółdzielczego: prawa do domu jednorodzinnego oraz prawa do lokalu w małym domu mieszkalnym 2%,
> Begin metadata
```catala
deklaracja zakres UmowaSprzedazy:
kontekst kwota zawartosc pieniadze
kontekst stawka zawartosc dziesietna
kontekst podatek zawartosc pieniadze
zakres UmowaSprzedazy:
definicja podatek wynosi
kwota *$ stawka
zakres UmowaSprzedazy:
etykieta a7_u1_p1_ppa
definicja stawka wynosi 2%
```
> End metadata

View File

@ -0,0 +1,3 @@
# Master file
> Include: podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_pl

View File

@ -0,0 +1,23 @@
> Include: ../podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_en
## [Test - Art. 7 ustęp 1 punkt 1]
```catala
deklaracja zakres Test_A7_U1_P1_PPa:
kontekst sprzedaz zakres UmowaSprzedazy
zakres Test_A7_U1_P1_PPa:
definicja sprzedaz.kwota wynosi $100
asercja sprzedaz.podatek = $2
deklaracja zakres Test_A7_U1_P1_PPb:
kontekst sprzedaz zakres UmowaSprzedazy
zakres Test_A7_U1_P1_PPb:
definicja sprzedaz.kwota wynosi $100
definicja sprzedaz.inne_prawa_majatkowe wynosi prawda
asercja sprzedaz.podatek = $1
```

View File

@ -16,6 +16,7 @@ scp grammar.html $1/
scp catala.html $1/
scp _build/default/src/catala/catala_web.bc.js $1/playground/
scp examples/tutorial_en/tutorial_en.catala_en $1/playground/
# TODO BW: PL
scp examples/tutoriel_fr/tutoriel_fr.catala_fr $1/playground/
cat examples/allocations_familiales/prologue.catala_fr > allocations_familiales.catala_fr
cat examples/allocations_familiales/decrets_divers.catala_fr >> allocations_familiales.catala_fr
@ -29,5 +30,6 @@ mv -f allocations_familiales_fixed.catala_fr allocations_familiales.catala_fr
scp allocations_familiales.catala_fr $1/playground/
rm allocations_familiales.catala_fr
scp syntax_highlighting/en/ace/mode-catala_en.js $1/playground/
# TODO BW: PL
scp syntax_highlighting/fr/ace/mode-catala_fr.js $1/playground/
scp french_law_js/french_law.js $1/french_law.js

View File

@ -98,7 +98,8 @@ let format_lit (fmt : Format.formatter) (l : lit Pos.marked) : unit =
| LMoney e -> (
match !Utils.Cli.locale_lang with
| `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e)
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e))
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e)
| `Pl -> Format.fprintf fmt "%s PLN" (Runtime.money_to_string e))
| LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d)
| LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d)

View File

@ -35,6 +35,7 @@ let driver (source_file : Pos.input_file) (debug : bool) (dcalc : bool) (unstyle
| Some l ->
if l = "fr" then `Fr
else if l = "en" then `En
else if l = "pl" then `Pl
else if l = "non-verbose" then `NonVerbose
else
Errors.raise_error

View File

@ -38,7 +38,8 @@ let format_lit (fmt : Format.formatter) (l : lit Pos.marked) : unit =
| LMoney e -> (
match !Utils.Cli.locale_lang with
| `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e)
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e))
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e)
| `Pl -> Format.fprintf fmt "%s PLN" (Runtime.money_to_string e))
| LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d)
| LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d)

View File

@ -71,12 +71,14 @@ let wrap_html (source_files : string list) (language : Cli.backend_lang) (fmt :
css_as_string
(match language with
| `Fr -> "Implémentation de texte législatif"
| `En -> "Legislative text implementation")
(match language with `Fr -> "Document généré par" | `En -> "Document generated by")
| `En -> "Legislative text implementation"
| `Pl -> "Implementowanie tekstów legislacyjnych")
(match language with `Fr -> "Document généré par" | `En -> "Document generated by" | `Pl -> "Dokument wygenerowany przez")
Utils.Cli.version
(match language with
| `Fr -> "Fichiers sources tissés dans ce document"
| `En -> "Source files weaved in this document")
| `En -> "Source files weaved in this document"
| `Pl -> "Pliki źródłowe w tym dokumencie")
(String.concat "\n"
(List.map
(fun filename ->
@ -90,7 +92,8 @@ let wrap_html (source_files : string list) (language : Cli.backend_lang) (fmt :
(pre_html (Filename.basename filename))
(match language with
| `Fr -> "dernière modification le"
| `En -> "last modification")
| `En -> "last modification"
| `Pl -> "ostatnia modyfikacja")
ftime)
source_files));
wrapped fmt
@ -104,7 +107,7 @@ let pygmentize_code (c : string Pos.marked) (language : C.backend_lang) : string
Printf.fprintf oc "%s" (Pos.unmark c);
close_out oc;
let pygments = "pygmentize" in
let pygments_lexer = match language with `Fr -> "catala_fr" | `En -> "catala_en" in
let pygments_lexer = match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl" in
let pygments_args =
[|
"-l";

View File

@ -80,15 +80,17 @@ let wrap_latex (source_files : string list) (language : C.backend_lang) (fmt : F
%s : \n\
\\begin{itemize}%s\\end{itemize}\n\n\
\\[\\star\\star\\star\\]\\\\\n"
(match language with `Fr -> "french" | `En -> "english")
(match language with `Fr -> "french" | `En -> "english" | `Pl -> "polish")
(match language with
| `Fr -> "Implémentation de texte législatif"
| `En -> "Legislative text implementation")
(match language with `Fr -> "Document généré par" | `En -> "Document generated by")
| `En -> "Legislative text implementation"
| `Pl -> "Implementacja tekstów legislacyjnych")
(match language with `Fr -> "Document généré par" | `En -> "Document generated by" | `Pl -> "Dokument wygenerowany przez")
Utils.Cli.version
(match language with
| `Fr -> "Fichiers sources tissés dans ce document"
| `En -> "Source files weaved in this document")
| `En -> "Source files weaved in this document"
| `Pl -> "Pliki źródłowe w tym dokumencie")
(String.concat ","
(List.map
(fun filename ->
@ -102,7 +104,8 @@ let wrap_latex (source_files : string list) (language : C.backend_lang) (fmt : F
(pre_latexify (Filename.basename filename))
(match language with
| `Fr -> "dernière modification le"
| `En -> "last modification")
| `En -> "last modification"
| `Pl -> "ostatnia modyfikacja")
ftime)
source_files));
wrapped fmt;
@ -137,7 +140,7 @@ let law_article_item_to_latex (language : C.backend_lang) (fmt : Format.formatte
\\end{minted}"
(pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
(Pos.get_start_line (Pos.get_position c) - 1)
(match language with `Fr -> "catala_fr" | `En -> "catala_en")
(match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl")
(math_syms_replace (Pos.unmark c))
let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatter)
@ -171,7 +174,7 @@ let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatt
(law_article_item_to_latex language)
fmt children
| A.MetadataBlock (_, c) ->
let metadata_title = match language with `Fr -> "Métadonnées" | `En -> "Metadata" in
let metadata_title = match language with `Fr -> "Métadonnées" | `En -> "Metadata" | `Pl -> "Metadane" in
Format.fprintf fmt
"\\begin{tcolorbox}[colframe=OliveGreen, breakable, \
title=\\textcolor{black}{\\texttt{%s}},title after \
@ -183,7 +186,7 @@ let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatt
metadata_title metadata_title
(Pos.get_start_line (Pos.get_position c) - 1)
(pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
(match language with `Fr -> "catala_fr" | `En -> "catala_en")
(match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl")
(math_syms_replace (Pos.unmark c))
| A.IntermediateText t -> Format.fprintf fmt "%s" (pre_latexify t)

View File

@ -32,19 +32,37 @@
(rule
(with-stdout-to
parser.messages.new
(run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser --list-errors)))
(run
menhir
%{dep:tokens.mly}
%{dep:parser.mly}
--base
parser
--list-errors)))
(rule
(with-stdout-to
parser_errors.ml
(run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser
--compile-errors %{dep:parser.messages})))
(run
menhir
%{dep:tokens.mly}
%{dep:parser.mly}
--base
parser
--compile-errors
%{dep:parser.messages})))
(rule
(with-stdout-to
parser.messages.updated
(run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser
--update-errors %{dep:parser.messages})))
(run
menhir
%{dep:tokens.mly}
%{dep:parser.mly}
--base
parser
--update-errors
%{dep:parser.messages})))
(rule
(alias update-parser-messages)

View File

@ -0,0 +1,550 @@
(* This file is part of the Catala compiler, a specification language for tax and social benefits
computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
<denis.merigoux@inria.fr>
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
open Tokens
open Sedlexing
open Utils
open Lexer_common
module L = Lexer
module R = Re.Pcre
(** Same as {!val: Surface.Lexer.token_list_language_agnostic}, but with tokens specialized to
Polish. *)
let token_list : (string * token) list =
[
("zakres", SCOPE);
("konsekwencja", CONSEQUENCE);
("data", DATA);
("zalezy od", DEPENDS);
("deklaracja", DECLARATION);
("kontekst", CONTEXT);
("malejacy", DECREASING);
("rosnacy", INCREASING);
("z", OF);
("kolekcja", COLLECTION);
("enumeracja", ENUM);
("calkowita", INTEGER);
("pieniadze", MONEY);
("tekst", TEXT);
("dziesietna", DECIMAL);
("czas", DATE);
("czas trwania", DURATION);
("zerojedynkowy", BOOLEAN);
("suma", SUM);
("spelnione", FILLED);
("definicja", DEFINITION);
("etykieta", LABEL);
("wyjatek", EXCEPTION);
("wynosi", DEFINED_AS);
("pasuje", MATCH);
("ze wzorem", WITH);
("pod warunkiem", UNDER_CONDITION);
("jezeli", IF);
("wtedy", THEN);
("inaczej", ELSE);
("zawartosc", CONTENT);
("struktura", STRUCT);
("asercja", ASSERTION);
("rozna", VARIES);
("with", WITH_V);
("dla", FOR);
("wszystkie", ALL);
("mamy", WE_HAVE);
("staloprzecinkowa", FIXED);
("przez", BY);
("zasada", RULE);
("istnieje", EXISTS);
("takie ze", SUCH);
("to", THAT);
("i", AND);
("lub", OR);
("xor", XOR);
("nie", NOT);
("maximum", MAXIMUM);
("minimum", MINIMUM);
("filtr", FILTER);
("mapuj", MAP);
("poczatkowy", INIT);
("liczba", CARDINAL);
("rok", YEAR);
("miesiac", MONTH);
("dzien", DAY);
("prawda", TRUE);
("falsz", FALSE);
]
@ L.token_list_language_agnostic
(** Localised builtin functions *)
let builtins : (string * Ast.builtin_expression) list =
[
("integer_to_decimal", IntToDec);
("get_day", GetDay);
("get_month", GetMonth);
("get_year", GetYear);
]
(** Main lexing function used in code blocks *)
let rec lex_code (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
let prev_pos = lexing_positions lexbuf in
match%sedlex lexbuf with
| white_space ->
(* Whitespaces *)
L.update_acc lexbuf;
lex_code lexbuf
| '#', Star (Compl '\n'), '\n' ->
(* Comments *)
L.update_acc lexbuf;
lex_code lexbuf
| "```" ->
(* End of code section *)
L.is_code := false;
END_CODE !L.code_string_acc
| "zakres" ->
L.update_acc lexbuf;
SCOPE
| "data" ->
L.update_acc lexbuf;
DATA
| "zalezy od" ->
L.update_acc lexbuf;
DEPENDS
| "deklaracja" ->
L.update_acc lexbuf;
DECLARATION
| "kontekst" ->
L.update_acc lexbuf;
CONTEXT
| "malejacy" ->
L.update_acc lexbuf;
DECREASING
| "rosnacy" ->
L.update_acc lexbuf;
INCREASING
| "z" ->
L.update_acc lexbuf;
OF
| "kolekcja" ->
L.update_acc lexbuf;
COLLECTION
| "enumeracja" ->
L.update_acc lexbuf;
ENUM
| "calkowita" ->
L.update_acc lexbuf;
INTEGER
| "pieniadze" ->
L.update_acc lexbuf;
MONEY
| "tekst" ->
L.update_acc lexbuf;
TEXT
| "dziesietna" ->
L.update_acc lexbuf;
DECIMAL
| "czas" ->
L.update_acc lexbuf;
DATE
| "czas trwania" ->
L.update_acc lexbuf;
DURATION
| "zerojedynkowy" ->
L.update_acc lexbuf;
BOOLEAN
| "suma" ->
L.update_acc lexbuf;
SUM
| "spelnione" ->
L.update_acc lexbuf;
FILLED
| "definicja" ->
L.update_acc lexbuf;
DEFINITION
| "etykieta" ->
L.update_acc lexbuf;
LABEL
| "wyjatek" ->
L.update_acc lexbuf;
EXCEPTION
| "wynosi" ->
L.update_acc lexbuf;
DEFINED_AS
| "pasuje" ->
L.update_acc lexbuf;
MATCH
| "ze wzorem" ->
L.update_acc lexbuf;
WITH
| "pod warunkiem" ->
L.update_acc lexbuf;
UNDER_CONDITION
| "jezeli" ->
L.update_acc lexbuf;
IF
| "konsekwencja" ->
L.update_acc lexbuf;
CONSEQUENCE
| "wtedy" ->
L.update_acc lexbuf;
THEN
| "inaczej" ->
L.update_acc lexbuf;
ELSE
| "condition" ->
L.update_acc lexbuf;
CONDITION
| "zawartosc" ->
L.update_acc lexbuf;
CONTENT
| "struktura" ->
L.update_acc lexbuf;
STRUCT
| "asercja" ->
L.update_acc lexbuf;
ASSERTION
| "rozna" ->
L.update_acc lexbuf;
VARIES
| "wraz z" ->
L.update_acc lexbuf;
WITH_V
| "dla" ->
L.update_acc lexbuf;
FOR
| "wszystkie" ->
L.update_acc lexbuf;
ALL
| "mamy" ->
L.update_acc lexbuf;
WE_HAVE
| "staloprzecinkowa" ->
L.update_acc lexbuf;
FIXED
| "przez" ->
L.update_acc lexbuf;
BY
| "zasada" ->
(* 0xE8 is è *)
L.update_acc lexbuf;
RULE
| "istnieje" ->
L.update_acc lexbuf;
EXISTS
| "in" ->
L.update_acc lexbuf;
IN
| "takie ze" ->
L.update_acc lexbuf;
SUCH
| "to" ->
L.update_acc lexbuf;
THAT
| "i" ->
L.update_acc lexbuf;
AND
| "lub" ->
L.update_acc lexbuf;
OR
| "xor" ->
L.update_acc lexbuf;
XOR
| "nie" ->
L.update_acc lexbuf;
NOT
| "maximum" ->
L.update_acc lexbuf;
MAXIMUM
| "minimum" ->
L.update_acc lexbuf;
MINIMUM
| "filtr" ->
L.update_acc lexbuf;
FILTER
| "mapuj" ->
L.update_acc lexbuf;
MAP
| "poczatkowy" ->
L.update_acc lexbuf;
INIT
| "liczba" ->
L.update_acc lexbuf;
CARDINAL
| "prawda" ->
L.update_acc lexbuf;
TRUE
| "falsz" ->
L.update_acc lexbuf;
FALSE
| "rok" ->
L.update_acc lexbuf;
YEAR
| "miesiac" ->
L.update_acc lexbuf;
MONTH
| "dzien" ->
L.update_acc lexbuf;
DAY
| 0x24, Star white_space, '0' .. '9', Star ('0' .. '9' | ','), Opt ('.', Rep ('0' .. '9', 0 .. 2))
->
let extract_parts = R.regexp "([0-9]([0-9,]*[0-9]|))(.([0-9]{0,2})|)" in
let full_str = Utf8.lexeme lexbuf in
let only_numbers_str = String.trim (String.sub full_str 1 (String.length full_str - 1)) in
let parts = R.get_substring (R.exec ~rex:extract_parts only_numbers_str) in
(* Integer literal*)
let units = parts 1 in
let remove_commas = R.regexp "," in
let units =
Runtime.integer_of_string (R.substitute ~rex:remove_commas ~subst:(fun _ -> "") units)
in
let cents =
try Runtime.integer_of_string (parts 4) with Not_found -> Runtime.integer_of_int 0
in
L.update_acc lexbuf;
MONEY_AMOUNT (units, cents)
| Plus '0' .. '9', '.', Star '0' .. '9' ->
let extract_code_title = R.regexp "([0-9]+)\\.([0-9]*)" in
let dec_parts = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
(* Integer literal*)
L.update_acc lexbuf;
DECIMAL_LITERAL
(Runtime.integer_of_string (dec_parts 1), Runtime.integer_of_string (dec_parts 2))
| "<=@" ->
L.update_acc lexbuf;
LESSER_EQUAL_DATE
| "<@" ->
L.update_acc lexbuf;
LESSER_DATE
| ">=@" ->
L.update_acc lexbuf;
GREATER_EQUAL_DATE
| ">@" ->
L.update_acc lexbuf;
GREATER_DATE
| "-@" ->
L.update_acc lexbuf;
MINUSDATE
| "+@" ->
L.update_acc lexbuf;
PLUSDATE
| "<=^" ->
L.update_acc lexbuf;
LESSER_EQUAL_DURATION
| "<^" ->
L.update_acc lexbuf;
LESSER_DURATION
| ">=^" ->
L.update_acc lexbuf;
GREATER_EQUAL_DURATION
| ">^" ->
L.update_acc lexbuf;
GREATER_DURATION
| "+^" ->
L.update_acc lexbuf;
PLUSDURATION
| "-^" ->
L.update_acc lexbuf;
MINUSDURATION
| "<=", 0x24 ->
L.update_acc lexbuf;
LESSER_EQUAL_MONEY
| '<', 0x24 ->
L.update_acc lexbuf;
LESSER_MONEY
| ">=", 0x24 ->
L.update_acc lexbuf;
GREATER_EQUAL_MONEY
| '>', 0x24 ->
L.update_acc lexbuf;
GREATER_MONEY
| '+', 0x24 ->
L.update_acc lexbuf;
PLUSMONEY
| '-', 0x24 ->
L.update_acc lexbuf;
MINUSMONEY
| '*', 0x24 ->
L.update_acc lexbuf;
MULTMONEY
| '/', 0x24 ->
L.update_acc lexbuf;
DIVMONEY
| "<=." ->
L.update_acc lexbuf;
LESSER_EQUAL_DEC
| "<." ->
L.update_acc lexbuf;
LESSER_DEC
| ">=." ->
L.update_acc lexbuf;
GREATER_EQUAL_DEC
| ">." ->
L.update_acc lexbuf;
GREATER_DEC
| "+." ->
L.update_acc lexbuf;
PLUSDEC
| "-." ->
L.update_acc lexbuf;
MINUSDEC
| "*." ->
L.update_acc lexbuf;
MULTDEC
| "/." ->
L.update_acc lexbuf;
DIVDEC
| "<=" ->
L.update_acc lexbuf;
LESSER_EQUAL
| '<' ->
L.update_acc lexbuf;
LESSER
| ">=" ->
L.update_acc lexbuf;
GREATER_EQUAL
| '>' ->
L.update_acc lexbuf;
GREATER
| '+' ->
L.update_acc lexbuf;
PLUS
| '-' ->
L.update_acc lexbuf;
MINUS
| '*' ->
L.update_acc lexbuf;
MULT
| '/' ->
L.update_acc lexbuf;
DIV
| "!=" ->
L.update_acc lexbuf;
NOT_EQUAL
| '=' ->
L.update_acc lexbuf;
EQUAL
| '%' ->
L.update_acc lexbuf;
PERCENT
| '(' ->
L.update_acc lexbuf;
LPAREN
| ')' ->
L.update_acc lexbuf;
RPAREN
| '{' ->
L.update_acc lexbuf;
LBRACKET
| '}' ->
L.update_acc lexbuf;
RBRACKET
| '[' ->
L.update_acc lexbuf;
LSQUARE
| ']' ->
L.update_acc lexbuf;
RSQUARE
| '|' ->
L.update_acc lexbuf;
VERTICAL
| ':' ->
L.update_acc lexbuf;
COLON
| ';' ->
L.update_acc lexbuf;
SEMICOLON
| "--" ->
L.update_acc lexbuf;
ALT
| '.' ->
L.update_acc lexbuf;
DOT
| uppercase, Star (uppercase | lowercase | '0' .. '9' | '_' | '\'') ->
(* Name of constructor *)
L.update_acc lexbuf;
CONSTRUCTOR (Utf8.lexeme lexbuf)
| lowercase, Star (lowercase | uppercase | '0' .. '9' | '_' | '\'') ->
(* Name of variable *)
L.update_acc lexbuf;
IDENT (Utf8.lexeme lexbuf)
| Plus '0' .. '9' ->
(* Integer literal*)
L.update_acc lexbuf;
INT_LITERAL (Runtime.integer_of_string (Utf8.lexeme lexbuf))
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
(** Main lexing function used outside code blocks *)
let lex_law (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
let prev_pos = lexing_positions lexbuf in
match%sedlex lexbuf with
| "```catala" ->
L.is_code := true;
L.code_string_acc := "";
BEGIN_CODE
| eof -> EOF
| '#', Star white_space, "Master file" -> MASTER_FILE
| '>', Star white_space, "Begin metadata" -> BEGIN_METADATA
| '>', Star white_space, "End metadata" -> END_METADATA
| ( '>',
Star white_space,
"Include:",
Star white_space,
Plus (Compl ('@' | '\n')),
Star white_space,
Opt ('@', Star white_space, "p.", Star white_space, Plus '0' .. '9', Star white_space),
'\n' ) ->
let extract_components =
R.regexp ">\\s*Include\\:\\s*([^@\\n]+)\\s*(@\\s*p\\.\\s*([0-9]+)|)"
in
let get_component = R.get_substring (R.exec ~rex:extract_components (Utf8.lexeme lexbuf)) in
let name = get_component 1 in
let pages = try Some (int_of_string (get_component 3)) with Not_found -> None in
let pos = lexing_positions lexbuf in
if Filename.extension name = ".pdf" then
LAW_INCLUDE (Ast.PdfFile ((name, Pos.from_lpos pos), pages))
else LAW_INCLUDE (Ast.CatalaFile (name, Pos.from_lpos pos))
| '#', Plus '#', Star white_space, Plus (Compl ('[' | ']' | '\n')), Star white_space, '\n' ->
get_law_heading lexbuf
| ( '#',
Plus '#',
Star white_space,
'[',
Star white_space,
Plus (Compl ']'),
Star white_space,
']',
'\n' ) ->
let extract_article_title = R.regexp "([#]+)\\s*\\[([^\\]]+)\\]" in
let get_substring =
R.get_substring (R.exec ~rex:extract_article_title (Utf8.lexeme lexbuf))
in
let title = get_substring 2 in
let get_new_lines = R.regexp "\n" in
let new_lines_count =
try Array.length (R.extract ~rex:get_new_lines (Utf8.lexeme lexbuf)) with Not_found -> 0
in
(* the -1 is here to compensate for Sedlex's automatic newline detection around token *)
for _i = 1 to new_lines_count - 1 do
new_line lexbuf
done;
let precedence = calc_precedence (get_substring 1) in
LAW_ARTICLE (title, None, None, precedence)
| Plus (Compl ('/' | '#' | '`' | '>')) -> LAW_TEXT (Utf8.lexeme lexbuf)
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
(** Entry point of the lexer, distributes to {!val: lex_code} or {!val: lex_law} depending of {!val:
Surface.Lexer.is_code}. *)
let lexer (lexbuf : lexbuf) : token = if !L.is_code then lex_code lexbuf else lex_law lexbuf

View File

@ -0,0 +1,15 @@
(* This file is part of the Catala compiler, a specification language for tax and social benefits
computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
<denis.merigoux@inria.fr>
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
include Lexer.LocalisedLexer

View File

@ -226,11 +226,13 @@ end
module Parser_NonVerbose = ParserAux (Lexer)
module Parser_En = ParserAux (Lexer_en)
module Parser_Fr = ParserAux (Lexer_fr)
module Parser_Pl = ParserAux (Lexer_pl)
let localised_parser : Cli.frontend_lang -> lexbuf -> Ast.source_file_or_master = function
| `NonVerbose -> Parser_NonVerbose.commands_or_includes
| `En -> Parser_En.commands_or_includes
| `Fr -> Parser_Fr.commands_or_includes
| `Pl -> Parser_Pl.commands_or_includes
(** {1 Parsing multiple files} *)

View File

@ -12,12 +12,12 @@
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
type frontend_lang = [ `Fr | `En | `NonVerbose ]
type frontend_lang = [ `Fr | `En | `NonVerbose | `Pl]
type backend_lang = [ `Fr | `En ]
type backend_lang = [ `Fr | `En | `Pl ]
let to_backend_lang (lang : frontend_lang) : backend_lang =
match lang with `En | `NonVerbose -> `En | `Fr -> `Fr
match lang with `En | `NonVerbose -> `En | `Fr -> `Fr | `Pl -> `Pl
(** Source files to be compiled *)
let source_files : string list ref = ref []
@ -57,7 +57,8 @@ let unstyled = Arg.(value & flag & info [ "unstyled" ] ~doc:"Removes styling fro
let optimize = Arg.(value & flag & info [ "optimize"; "O" ] ~doc:"Run compiler optimizations")
let trace_opt =
Arg.(value & flag & info [ "trace"; "t" ] ~doc:"Displays a trace of the interpreter's computation")
Arg.(
value & flag & info [ "trace"; "t" ] ~doc:"Displays a trace of the interpreter's computation")
let wrap_weaved_output =
Arg.(

View File

@ -12,9 +12,9 @@
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
type frontend_lang = [ `En | `Fr | `NonVerbose ]
type frontend_lang = [ `En | `Fr | `NonVerbose | `Pl ]
type backend_lang = [ `En | `Fr ]
type backend_lang = [ `En | `Fr | `Pl ]
val to_backend_lang : frontend_lang -> backend_lang