This commit is contained in:
Bartosz Woźniak 2021-05-09 22:55:50 +02:00
parent 37c8f8da64
commit db46b1c444
21 changed files with 709 additions and 30 deletions

View File

@ -124,7 +124,7 @@ general-to-specifics statutes order. Therefore, there exists multiple versions
of the Catala surface syntax, adapted to the language of the legislative text. of the Catala surface syntax, adapted to the language of the legislative text.
Currently, Catala supports English and French legislative text via the Currently, Catala supports English and French legislative text via the
`--language=en` or `--language=fr` option. `--language=en`, `--language=fr` or `--language=pl` option.
Technically, support for new languages can be added via a new lexer. If you want Technically, support for new languages can be added via a new lexer. If you want
to add a new language, you can start from to add a new language, you can start from

View File

@ -119,7 +119,8 @@ augmented with the Catala plugin, simply enter
sudo make pygments sudo make pygments
This will execute the This will execute the
script `syntax_highlighting/fr/pygments/set_up_pygments.sh` and script `syntax_highlighting/fr/pygments/set_up_pygments.sh`,
`syntax_highlighting/pl/pygments/set_up_pygments.sh` and
`syntax_highlighting/en/pygments/set_up_pygments.sh`. `syntax_highlighting/en/pygments/set_up_pygments.sh`.
The scripts patch your `pigmentize` executable, used for instance by the `minted` LaTeX package. The scripts patch your `pigmentize` executable, used for instance by the `minted` LaTeX package.

View File

@ -58,6 +58,7 @@ install:
SYNTAX_HIGHLIGHTING_FR=${CURDIR}/syntax_highlighting/fr SYNTAX_HIGHLIGHTING_FR=${CURDIR}/syntax_highlighting/fr
SYNTAX_HIGHLIGHTING_EN=${CURDIR}/syntax_highlighting/en SYNTAX_HIGHLIGHTING_EN=${CURDIR}/syntax_highlighting/en
SYNTAX_HIGHLIGHTING_PL=${CURDIR}/syntax_highlighting/pl
pygmentize_fr: $(SYNTAX_HIGHLIGHTING_FR)/set_up_pygments.sh pygmentize_fr: $(SYNTAX_HIGHLIGHTING_FR)/set_up_pygments.sh
chmod +x $< chmod +x $<
@ -67,6 +68,10 @@ pygmentize_en: $(SYNTAX_HIGHLIGHTING_EN)/set_up_pygments.sh
chmod +x $< chmod +x $<
sudo $< sudo $<
pygmentize_pl: $(SYNTAX_HIGHLIGHTING_PL)/set_up_pygments.sh
chmod +x $<
sudo $<
#> pygments : Extends your pygmentize executable with Catala lexers #> pygments : Extends your pygmentize executable with Catala lexers
pygments: pygmentize_fr pygmentize_en pygments: pygmentize_fr pygmentize_en
@ -78,6 +83,10 @@ atom_en: ${CURDIR}/syntax_highlighting/en/setup_atom.sh
chmod +x $< chmod +x $<
$< $<
atom_pl: ${CURDIR}/syntax_highlighting/pl/setup_atom.sh
chmod +x $<
$<
atom_nv: ${CURDIR}/syntax_highlighting/nv/setup_atom.sh atom_nv: ${CURDIR}/syntax_highlighting/nv/setup_atom.sh
chmod +x $< chmod +x $<
$< $<
@ -93,6 +102,10 @@ vscode_en: ${CURDIR}/syntax_highlighting/en/setup_vscode.sh
chmod +x $< chmod +x $<
$< $<
vscode_pl: ${CURDIR}/syntax_highlighting/pl/setup_vscode.sh
chmod +x $<
$<
vscode_nv: ${CURDIR}/syntax_highlighting/nv/setup_vscode.sh vscode_nv: ${CURDIR}/syntax_highlighting/nv/setup_vscode.sh
chmod +x $< chmod +x $<
$< $<
@ -222,7 +235,7 @@ clean:
$(MAKE) -C $(CODE_GENERAL_IMPOTS_DIR) clean $(MAKE) -C $(CODE_GENERAL_IMPOTS_DIR) clean
inspect: inspect:
gitinspector -f ml,mli,mly,iro,tex,catala,catala_en,catala_fr,md,fst,mld --grading gitinspector -f ml,mli,mly,iro,tex,catala,catala_en,catala_pl,catala_fr,md,fst,mld --grading
########################################## ##########################################
# Special targets # Special targets

View File

@ -99,7 +99,7 @@ To build and run the example, create a `Makefile` in `foo/`
with the following contents: with the following contents:
```Makefile ```Makefile
CATALA_LANG=en # or fr if your source code is in French CATALA_LANG=en # or fr/pl if your source code is in French/Polish
SRC=foo.catala SRC=foo.catala
include ../Makefile.common.mk include ../Makefile.common.mk

14
examples/polish_taxes_pl/.gitignore vendored Normal file
View File

@ -0,0 +1,14 @@
*.aux
*.dvi
*.fdb_latexmk
*.fls
*.log
*.out
*.fls
*.tex
*.pdf
_minted*
*.toc
*.pyg
*.d
*.ml

View File

@ -0,0 +1,4 @@
CATALA_LANG=pl
SRC=polish_tax_code.catala_pl
include ../Makefile.common.mk

View File

@ -0,0 +1,24 @@
## Rozdzial 3
## Art. 7. 1. Stawki podatku wynoszą:
### 1) od umowy sprzedaży:
#### a) nieruchomości, rzeczy ruchomych, prawa użytkowania wieczystego, własnościowego spółdzielczego prawa do lokalu mieszkalnego, spółdzielczego prawa do lokalu użytkowego oraz wynikających z przepisów prawa spółdzielczego: prawa do domu jednorodzinnego oraz prawa do lokalu w małym domu mieszkalnym 2%,
> Begin metadata
```catala
deklaracja zakres UmowaSprzedazy:
kontekst kwota zawartosc pieniadze
kontekst stawka zawartosc dziesietna
kontekst podatek zawartosc pieniadze
zakres UmowaSprzedazy:
definicja podatek wynosi
kwota *$ stawka
zakres UmowaSprzedazy:
etykieta a7_u1_p1_ppa
definicja stawka wynosi 2%
```
> End metadata

View File

@ -0,0 +1,3 @@
# Master file
> Include: podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_pl

View File

@ -0,0 +1,23 @@
> Include: ../podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_en
## [Test - Art. 7 ustęp 1 punkt 1]
```catala
deklaracja zakres Test_A7_U1_P1_PPa:
kontekst sprzedaz zakres UmowaSprzedazy
zakres Test_A7_U1_P1_PPa:
definicja sprzedaz.kwota wynosi $100
asercja sprzedaz.podatek = $2
deklaracja zakres Test_A7_U1_P1_PPb:
kontekst sprzedaz zakres UmowaSprzedazy
zakres Test_A7_U1_P1_PPb:
definicja sprzedaz.kwota wynosi $100
definicja sprzedaz.inne_prawa_majatkowe wynosi prawda
asercja sprzedaz.podatek = $1
```

View File

@ -16,6 +16,7 @@ scp grammar.html $1/
scp catala.html $1/ scp catala.html $1/
scp _build/default/src/catala/catala_web.bc.js $1/playground/ scp _build/default/src/catala/catala_web.bc.js $1/playground/
scp examples/tutorial_en/tutorial_en.catala_en $1/playground/ scp examples/tutorial_en/tutorial_en.catala_en $1/playground/
# TODO BW: PL
scp examples/tutoriel_fr/tutoriel_fr.catala_fr $1/playground/ scp examples/tutoriel_fr/tutoriel_fr.catala_fr $1/playground/
cat examples/allocations_familiales/prologue.catala_fr > allocations_familiales.catala_fr cat examples/allocations_familiales/prologue.catala_fr > allocations_familiales.catala_fr
cat examples/allocations_familiales/decrets_divers.catala_fr >> allocations_familiales.catala_fr cat examples/allocations_familiales/decrets_divers.catala_fr >> allocations_familiales.catala_fr
@ -29,5 +30,6 @@ mv -f allocations_familiales_fixed.catala_fr allocations_familiales.catala_fr
scp allocations_familiales.catala_fr $1/playground/ scp allocations_familiales.catala_fr $1/playground/
rm allocations_familiales.catala_fr rm allocations_familiales.catala_fr
scp syntax_highlighting/en/ace/mode-catala_en.js $1/playground/ scp syntax_highlighting/en/ace/mode-catala_en.js $1/playground/
# TODO BW: PL
scp syntax_highlighting/fr/ace/mode-catala_fr.js $1/playground/ scp syntax_highlighting/fr/ace/mode-catala_fr.js $1/playground/
scp french_law_js/french_law.js $1/french_law.js scp french_law_js/french_law.js $1/french_law.js

View File

@ -98,7 +98,8 @@ let format_lit (fmt : Format.formatter) (l : lit Pos.marked) : unit =
| LMoney e -> ( | LMoney e -> (
match !Utils.Cli.locale_lang with match !Utils.Cli.locale_lang with
| `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e) | `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e)
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e)) | `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e)
| `Pl -> Format.fprintf fmt "%s PLN" (Runtime.money_to_string e))
| LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d) | LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d)
| LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d) | LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d)

View File

@ -35,6 +35,7 @@ let driver (source_file : Pos.input_file) (debug : bool) (dcalc : bool) (unstyle
| Some l -> | Some l ->
if l = "fr" then `Fr if l = "fr" then `Fr
else if l = "en" then `En else if l = "en" then `En
else if l = "pl" then `Pl
else if l = "non-verbose" then `NonVerbose else if l = "non-verbose" then `NonVerbose
else else
Errors.raise_error Errors.raise_error

View File

@ -38,7 +38,8 @@ let format_lit (fmt : Format.formatter) (l : lit Pos.marked) : unit =
| LMoney e -> ( | LMoney e -> (
match !Utils.Cli.locale_lang with match !Utils.Cli.locale_lang with
| `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e) | `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e)
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e)) | `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e)
| `Pl -> Format.fprintf fmt "%s PLN" (Runtime.money_to_string e))
| LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d) | LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d)
| LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d) | LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d)

View File

@ -71,12 +71,14 @@ let wrap_html (source_files : string list) (language : Cli.backend_lang) (fmt :
css_as_string css_as_string
(match language with (match language with
| `Fr -> "Implémentation de texte législatif" | `Fr -> "Implémentation de texte législatif"
| `En -> "Legislative text implementation") | `En -> "Legislative text implementation"
(match language with `Fr -> "Document généré par" | `En -> "Document generated by") | `Pl -> "Implementowanie tekstów legislacyjnych")
(match language with `Fr -> "Document généré par" | `En -> "Document generated by" | `Pl -> "Dokument wygenerowany przez")
Utils.Cli.version Utils.Cli.version
(match language with (match language with
| `Fr -> "Fichiers sources tissés dans ce document" | `Fr -> "Fichiers sources tissés dans ce document"
| `En -> "Source files weaved in this document") | `En -> "Source files weaved in this document"
| `Pl -> "Pliki źródłowe w tym dokumencie")
(String.concat "\n" (String.concat "\n"
(List.map (List.map
(fun filename -> (fun filename ->
@ -90,7 +92,8 @@ let wrap_html (source_files : string list) (language : Cli.backend_lang) (fmt :
(pre_html (Filename.basename filename)) (pre_html (Filename.basename filename))
(match language with (match language with
| `Fr -> "dernière modification le" | `Fr -> "dernière modification le"
| `En -> "last modification") | `En -> "last modification"
| `Pl -> "ostatnia modyfikacja")
ftime) ftime)
source_files)); source_files));
wrapped fmt wrapped fmt
@ -104,7 +107,7 @@ let pygmentize_code (c : string Pos.marked) (language : C.backend_lang) : string
Printf.fprintf oc "%s" (Pos.unmark c); Printf.fprintf oc "%s" (Pos.unmark c);
close_out oc; close_out oc;
let pygments = "pygmentize" in let pygments = "pygmentize" in
let pygments_lexer = match language with `Fr -> "catala_fr" | `En -> "catala_en" in let pygments_lexer = match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl" in
let pygments_args = let pygments_args =
[| [|
"-l"; "-l";

View File

@ -80,15 +80,17 @@ let wrap_latex (source_files : string list) (language : C.backend_lang) (fmt : F
%s : \n\ %s : \n\
\\begin{itemize}%s\\end{itemize}\n\n\ \\begin{itemize}%s\\end{itemize}\n\n\
\\[\\star\\star\\star\\]\\\\\n" \\[\\star\\star\\star\\]\\\\\n"
(match language with `Fr -> "french" | `En -> "english") (match language with `Fr -> "french" | `En -> "english" | `Pl -> "polish")
(match language with (match language with
| `Fr -> "Implémentation de texte législatif" | `Fr -> "Implémentation de texte législatif"
| `En -> "Legislative text implementation") | `En -> "Legislative text implementation"
(match language with `Fr -> "Document généré par" | `En -> "Document generated by") | `Pl -> "Implementacja tekstów legislacyjnych")
(match language with `Fr -> "Document généré par" | `En -> "Document generated by" | `Pl -> "Dokument wygenerowany przez")
Utils.Cli.version Utils.Cli.version
(match language with (match language with
| `Fr -> "Fichiers sources tissés dans ce document" | `Fr -> "Fichiers sources tissés dans ce document"
| `En -> "Source files weaved in this document") | `En -> "Source files weaved in this document"
| `Pl -> "Pliki źródłowe w tym dokumencie")
(String.concat "," (String.concat ","
(List.map (List.map
(fun filename -> (fun filename ->
@ -102,7 +104,8 @@ let wrap_latex (source_files : string list) (language : C.backend_lang) (fmt : F
(pre_latexify (Filename.basename filename)) (pre_latexify (Filename.basename filename))
(match language with (match language with
| `Fr -> "dernière modification le" | `Fr -> "dernière modification le"
| `En -> "last modification") | `En -> "last modification"
| `Pl -> "ostatnia modyfikacja")
ftime) ftime)
source_files)); source_files));
wrapped fmt; wrapped fmt;
@ -137,7 +140,7 @@ let law_article_item_to_latex (language : C.backend_lang) (fmt : Format.formatte
\\end{minted}" \\end{minted}"
(pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c)))) (pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
(Pos.get_start_line (Pos.get_position c) - 1) (Pos.get_start_line (Pos.get_position c) - 1)
(match language with `Fr -> "catala_fr" | `En -> "catala_en") (match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl")
(math_syms_replace (Pos.unmark c)) (math_syms_replace (Pos.unmark c))
let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatter) let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatter)
@ -171,7 +174,7 @@ let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatt
(law_article_item_to_latex language) (law_article_item_to_latex language)
fmt children fmt children
| A.MetadataBlock (_, c) -> | A.MetadataBlock (_, c) ->
let metadata_title = match language with `Fr -> "Métadonnées" | `En -> "Metadata" in let metadata_title = match language with `Fr -> "Métadonnées" | `En -> "Metadata" | `Pl -> "Metadane" in
Format.fprintf fmt Format.fprintf fmt
"\\begin{tcolorbox}[colframe=OliveGreen, breakable, \ "\\begin{tcolorbox}[colframe=OliveGreen, breakable, \
title=\\textcolor{black}{\\texttt{%s}},title after \ title=\\textcolor{black}{\\texttt{%s}},title after \
@ -183,7 +186,7 @@ let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatt
metadata_title metadata_title metadata_title metadata_title
(Pos.get_start_line (Pos.get_position c) - 1) (Pos.get_start_line (Pos.get_position c) - 1)
(pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c)))) (pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
(match language with `Fr -> "catala_fr" | `En -> "catala_en") (match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl")
(math_syms_replace (Pos.unmark c)) (math_syms_replace (Pos.unmark c))
| A.IntermediateText t -> Format.fprintf fmt "%s" (pre_latexify t) | A.IntermediateText t -> Format.fprintf fmt "%s" (pre_latexify t)

View File

@ -32,19 +32,37 @@
(rule (rule
(with-stdout-to (with-stdout-to
parser.messages.new parser.messages.new
(run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser --list-errors))) (run
menhir
%{dep:tokens.mly}
%{dep:parser.mly}
--base
parser
--list-errors)))
(rule (rule
(with-stdout-to (with-stdout-to
parser_errors.ml parser_errors.ml
(run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser (run
--compile-errors %{dep:parser.messages}))) menhir
%{dep:tokens.mly}
%{dep:parser.mly}
--base
parser
--compile-errors
%{dep:parser.messages})))
(rule (rule
(with-stdout-to (with-stdout-to
parser.messages.updated parser.messages.updated
(run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser (run
--update-errors %{dep:parser.messages}))) menhir
%{dep:tokens.mly}
%{dep:parser.mly}
--base
parser
--update-errors
%{dep:parser.messages})))
(rule (rule
(alias update-parser-messages) (alias update-parser-messages)

View File

@ -0,0 +1,550 @@
(* This file is part of the Catala compiler, a specification language for tax and social benefits
computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
<denis.merigoux@inria.fr>
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
open Tokens
open Sedlexing
open Utils
open Lexer_common
module L = Lexer
module R = Re.Pcre
(** Same as {!val: Surface.Lexer.token_list_language_agnostic}, but with tokens specialized to
Polish. *)
let token_list : (string * token) list =
[
("zakres", SCOPE);
("konsekwencja", CONSEQUENCE);
("data", DATA);
("zalezy od", DEPENDS);
("deklaracja", DECLARATION);
("kontekst", CONTEXT);
("malejacy", DECREASING);
("rosnacy", INCREASING);
("z", OF);
("kolekcja", COLLECTION);
("enumeracja", ENUM);
("calkowita", INTEGER);
("pieniadze", MONEY);
("tekst", TEXT);
("dziesietna", DECIMAL);
("czas", DATE);
("czas trwania", DURATION);
("zerojedynkowy", BOOLEAN);
("suma", SUM);
("spelnione", FILLED);
("definicja", DEFINITION);
("etykieta", LABEL);
("wyjatek", EXCEPTION);
("wynosi", DEFINED_AS);
("pasuje", MATCH);
("ze wzorem", WITH);
("pod warunkiem", UNDER_CONDITION);
("jezeli", IF);
("wtedy", THEN);
("inaczej", ELSE);
("zawartosc", CONTENT);
("struktura", STRUCT);
("asercja", ASSERTION);
("rozna", VARIES);
("with", WITH_V);
("dla", FOR);
("wszystkie", ALL);
("mamy", WE_HAVE);
("staloprzecinkowa", FIXED);
("przez", BY);
("zasada", RULE);
("istnieje", EXISTS);
("takie ze", SUCH);
("to", THAT);
("i", AND);
("lub", OR);
("xor", XOR);
("nie", NOT);
("maximum", MAXIMUM);
("minimum", MINIMUM);
("filtr", FILTER);
("mapuj", MAP);
("poczatkowy", INIT);
("liczba", CARDINAL);
("rok", YEAR);
("miesiac", MONTH);
("dzien", DAY);
("prawda", TRUE);
("falsz", FALSE);
]
@ L.token_list_language_agnostic
(** Localised builtin functions *)
let builtins : (string * Ast.builtin_expression) list =
[
("integer_to_decimal", IntToDec);
("get_day", GetDay);
("get_month", GetMonth);
("get_year", GetYear);
]
(** Main lexing function used in code blocks *)
let rec lex_code (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
let prev_pos = lexing_positions lexbuf in
match%sedlex lexbuf with
| white_space ->
(* Whitespaces *)
L.update_acc lexbuf;
lex_code lexbuf
| '#', Star (Compl '\n'), '\n' ->
(* Comments *)
L.update_acc lexbuf;
lex_code lexbuf
| "```" ->
(* End of code section *)
L.is_code := false;
END_CODE !L.code_string_acc
| "zakres" ->
L.update_acc lexbuf;
SCOPE
| "data" ->
L.update_acc lexbuf;
DATA
| "zalezy od" ->
L.update_acc lexbuf;
DEPENDS
| "deklaracja" ->
L.update_acc lexbuf;
DECLARATION
| "kontekst" ->
L.update_acc lexbuf;
CONTEXT
| "malejacy" ->
L.update_acc lexbuf;
DECREASING
| "rosnacy" ->
L.update_acc lexbuf;
INCREASING
| "z" ->
L.update_acc lexbuf;
OF
| "kolekcja" ->
L.update_acc lexbuf;
COLLECTION
| "enumeracja" ->
L.update_acc lexbuf;
ENUM
| "calkowita" ->
L.update_acc lexbuf;
INTEGER
| "pieniadze" ->
L.update_acc lexbuf;
MONEY
| "tekst" ->
L.update_acc lexbuf;
TEXT
| "dziesietna" ->
L.update_acc lexbuf;
DECIMAL
| "czas" ->
L.update_acc lexbuf;
DATE
| "czas trwania" ->
L.update_acc lexbuf;
DURATION
| "zerojedynkowy" ->
L.update_acc lexbuf;
BOOLEAN
| "suma" ->
L.update_acc lexbuf;
SUM
| "spelnione" ->
L.update_acc lexbuf;
FILLED
| "definicja" ->
L.update_acc lexbuf;
DEFINITION
| "etykieta" ->
L.update_acc lexbuf;
LABEL
| "wyjatek" ->
L.update_acc lexbuf;
EXCEPTION
| "wynosi" ->
L.update_acc lexbuf;
DEFINED_AS
| "pasuje" ->
L.update_acc lexbuf;
MATCH
| "ze wzorem" ->
L.update_acc lexbuf;
WITH
| "pod warunkiem" ->
L.update_acc lexbuf;
UNDER_CONDITION
| "jezeli" ->
L.update_acc lexbuf;
IF
| "konsekwencja" ->
L.update_acc lexbuf;
CONSEQUENCE
| "wtedy" ->
L.update_acc lexbuf;
THEN
| "inaczej" ->
L.update_acc lexbuf;
ELSE
| "condition" ->
L.update_acc lexbuf;
CONDITION
| "zawartosc" ->
L.update_acc lexbuf;
CONTENT
| "struktura" ->
L.update_acc lexbuf;
STRUCT
| "asercja" ->
L.update_acc lexbuf;
ASSERTION
| "rozna" ->
L.update_acc lexbuf;
VARIES
| "wraz z" ->
L.update_acc lexbuf;
WITH_V
| "dla" ->
L.update_acc lexbuf;
FOR
| "wszystkie" ->
L.update_acc lexbuf;
ALL
| "mamy" ->
L.update_acc lexbuf;
WE_HAVE
| "staloprzecinkowa" ->
L.update_acc lexbuf;
FIXED
| "przez" ->
L.update_acc lexbuf;
BY
| "zasada" ->
(* 0xE8 is è *)
L.update_acc lexbuf;
RULE
| "istnieje" ->
L.update_acc lexbuf;
EXISTS
| "in" ->
L.update_acc lexbuf;
IN
| "takie ze" ->
L.update_acc lexbuf;
SUCH
| "to" ->
L.update_acc lexbuf;
THAT
| "i" ->
L.update_acc lexbuf;
AND
| "lub" ->
L.update_acc lexbuf;
OR
| "xor" ->
L.update_acc lexbuf;
XOR
| "nie" ->
L.update_acc lexbuf;
NOT
| "maximum" ->
L.update_acc lexbuf;
MAXIMUM
| "minimum" ->
L.update_acc lexbuf;
MINIMUM
| "filtr" ->
L.update_acc lexbuf;
FILTER
| "mapuj" ->
L.update_acc lexbuf;
MAP
| "poczatkowy" ->
L.update_acc lexbuf;
INIT
| "liczba" ->
L.update_acc lexbuf;
CARDINAL
| "prawda" ->
L.update_acc lexbuf;
TRUE
| "falsz" ->
L.update_acc lexbuf;
FALSE
| "rok" ->
L.update_acc lexbuf;
YEAR
| "miesiac" ->
L.update_acc lexbuf;
MONTH
| "dzien" ->
L.update_acc lexbuf;
DAY
| 0x24, Star white_space, '0' .. '9', Star ('0' .. '9' | ','), Opt ('.', Rep ('0' .. '9', 0 .. 2))
->
let extract_parts = R.regexp "([0-9]([0-9,]*[0-9]|))(.([0-9]{0,2})|)" in
let full_str = Utf8.lexeme lexbuf in
let only_numbers_str = String.trim (String.sub full_str 1 (String.length full_str - 1)) in
let parts = R.get_substring (R.exec ~rex:extract_parts only_numbers_str) in
(* Integer literal*)
let units = parts 1 in
let remove_commas = R.regexp "," in
let units =
Runtime.integer_of_string (R.substitute ~rex:remove_commas ~subst:(fun _ -> "") units)
in
let cents =
try Runtime.integer_of_string (parts 4) with Not_found -> Runtime.integer_of_int 0
in
L.update_acc lexbuf;
MONEY_AMOUNT (units, cents)
| Plus '0' .. '9', '.', Star '0' .. '9' ->
let extract_code_title = R.regexp "([0-9]+)\\.([0-9]*)" in
let dec_parts = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
(* Integer literal*)
L.update_acc lexbuf;
DECIMAL_LITERAL
(Runtime.integer_of_string (dec_parts 1), Runtime.integer_of_string (dec_parts 2))
| "<=@" ->
L.update_acc lexbuf;
LESSER_EQUAL_DATE
| "<@" ->
L.update_acc lexbuf;
LESSER_DATE
| ">=@" ->
L.update_acc lexbuf;
GREATER_EQUAL_DATE
| ">@" ->
L.update_acc lexbuf;
GREATER_DATE
| "-@" ->
L.update_acc lexbuf;
MINUSDATE
| "+@" ->
L.update_acc lexbuf;
PLUSDATE
| "<=^" ->
L.update_acc lexbuf;
LESSER_EQUAL_DURATION
| "<^" ->
L.update_acc lexbuf;
LESSER_DURATION
| ">=^" ->
L.update_acc lexbuf;
GREATER_EQUAL_DURATION
| ">^" ->
L.update_acc lexbuf;
GREATER_DURATION
| "+^" ->
L.update_acc lexbuf;
PLUSDURATION
| "-^" ->
L.update_acc lexbuf;
MINUSDURATION
| "<=", 0x24 ->
L.update_acc lexbuf;
LESSER_EQUAL_MONEY
| '<', 0x24 ->
L.update_acc lexbuf;
LESSER_MONEY
| ">=", 0x24 ->
L.update_acc lexbuf;
GREATER_EQUAL_MONEY
| '>', 0x24 ->
L.update_acc lexbuf;
GREATER_MONEY
| '+', 0x24 ->
L.update_acc lexbuf;
PLUSMONEY
| '-', 0x24 ->
L.update_acc lexbuf;
MINUSMONEY
| '*', 0x24 ->
L.update_acc lexbuf;
MULTMONEY
| '/', 0x24 ->
L.update_acc lexbuf;
DIVMONEY
| "<=." ->
L.update_acc lexbuf;
LESSER_EQUAL_DEC
| "<." ->
L.update_acc lexbuf;
LESSER_DEC
| ">=." ->
L.update_acc lexbuf;
GREATER_EQUAL_DEC
| ">." ->
L.update_acc lexbuf;
GREATER_DEC
| "+." ->
L.update_acc lexbuf;
PLUSDEC
| "-." ->
L.update_acc lexbuf;
MINUSDEC
| "*." ->
L.update_acc lexbuf;
MULTDEC
| "/." ->
L.update_acc lexbuf;
DIVDEC
| "<=" ->
L.update_acc lexbuf;
LESSER_EQUAL
| '<' ->
L.update_acc lexbuf;
LESSER
| ">=" ->
L.update_acc lexbuf;
GREATER_EQUAL
| '>' ->
L.update_acc lexbuf;
GREATER
| '+' ->
L.update_acc lexbuf;
PLUS
| '-' ->
L.update_acc lexbuf;
MINUS
| '*' ->
L.update_acc lexbuf;
MULT
| '/' ->
L.update_acc lexbuf;
DIV
| "!=" ->
L.update_acc lexbuf;
NOT_EQUAL
| '=' ->
L.update_acc lexbuf;
EQUAL
| '%' ->
L.update_acc lexbuf;
PERCENT
| '(' ->
L.update_acc lexbuf;
LPAREN
| ')' ->
L.update_acc lexbuf;
RPAREN
| '{' ->
L.update_acc lexbuf;
LBRACKET
| '}' ->
L.update_acc lexbuf;
RBRACKET
| '[' ->
L.update_acc lexbuf;
LSQUARE
| ']' ->
L.update_acc lexbuf;
RSQUARE
| '|' ->
L.update_acc lexbuf;
VERTICAL
| ':' ->
L.update_acc lexbuf;
COLON
| ';' ->
L.update_acc lexbuf;
SEMICOLON
| "--" ->
L.update_acc lexbuf;
ALT
| '.' ->
L.update_acc lexbuf;
DOT
| uppercase, Star (uppercase | lowercase | '0' .. '9' | '_' | '\'') ->
(* Name of constructor *)
L.update_acc lexbuf;
CONSTRUCTOR (Utf8.lexeme lexbuf)
| lowercase, Star (lowercase | uppercase | '0' .. '9' | '_' | '\'') ->
(* Name of variable *)
L.update_acc lexbuf;
IDENT (Utf8.lexeme lexbuf)
| Plus '0' .. '9' ->
(* Integer literal*)
L.update_acc lexbuf;
INT_LITERAL (Runtime.integer_of_string (Utf8.lexeme lexbuf))
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
(** Main lexing function used outside code blocks *)
let lex_law (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
let prev_pos = lexing_positions lexbuf in
match%sedlex lexbuf with
| "```catala" ->
L.is_code := true;
L.code_string_acc := "";
BEGIN_CODE
| eof -> EOF
| '#', Star white_space, "Master file" -> MASTER_FILE
| '>', Star white_space, "Begin metadata" -> BEGIN_METADATA
| '>', Star white_space, "End metadata" -> END_METADATA
| ( '>',
Star white_space,
"Include:",
Star white_space,
Plus (Compl ('@' | '\n')),
Star white_space,
Opt ('@', Star white_space, "p.", Star white_space, Plus '0' .. '9', Star white_space),
'\n' ) ->
let extract_components =
R.regexp ">\\s*Include\\:\\s*([^@\\n]+)\\s*(@\\s*p\\.\\s*([0-9]+)|)"
in
let get_component = R.get_substring (R.exec ~rex:extract_components (Utf8.lexeme lexbuf)) in
let name = get_component 1 in
let pages = try Some (int_of_string (get_component 3)) with Not_found -> None in
let pos = lexing_positions lexbuf in
if Filename.extension name = ".pdf" then
LAW_INCLUDE (Ast.PdfFile ((name, Pos.from_lpos pos), pages))
else LAW_INCLUDE (Ast.CatalaFile (name, Pos.from_lpos pos))
| '#', Plus '#', Star white_space, Plus (Compl ('[' | ']' | '\n')), Star white_space, '\n' ->
get_law_heading lexbuf
| ( '#',
Plus '#',
Star white_space,
'[',
Star white_space,
Plus (Compl ']'),
Star white_space,
']',
'\n' ) ->
let extract_article_title = R.regexp "([#]+)\\s*\\[([^\\]]+)\\]" in
let get_substring =
R.get_substring (R.exec ~rex:extract_article_title (Utf8.lexeme lexbuf))
in
let title = get_substring 2 in
let get_new_lines = R.regexp "\n" in
let new_lines_count =
try Array.length (R.extract ~rex:get_new_lines (Utf8.lexeme lexbuf)) with Not_found -> 0
in
(* the -1 is here to compensate for Sedlex's automatic newline detection around token *)
for _i = 1 to new_lines_count - 1 do
new_line lexbuf
done;
let precedence = calc_precedence (get_substring 1) in
LAW_ARTICLE (title, None, None, precedence)
| Plus (Compl ('/' | '#' | '`' | '>')) -> LAW_TEXT (Utf8.lexeme lexbuf)
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
(** Entry point of the lexer, distributes to {!val: lex_code} or {!val: lex_law} depending of {!val:
Surface.Lexer.is_code}. *)
let lexer (lexbuf : lexbuf) : token = if !L.is_code then lex_code lexbuf else lex_law lexbuf

View File

@ -0,0 +1,15 @@
(* This file is part of the Catala compiler, a specification language for tax and social benefits
computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
<denis.merigoux@inria.fr>
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
include Lexer.LocalisedLexer

View File

@ -226,11 +226,13 @@ end
module Parser_NonVerbose = ParserAux (Lexer) module Parser_NonVerbose = ParserAux (Lexer)
module Parser_En = ParserAux (Lexer_en) module Parser_En = ParserAux (Lexer_en)
module Parser_Fr = ParserAux (Lexer_fr) module Parser_Fr = ParserAux (Lexer_fr)
module Parser_Pl = ParserAux (Lexer_pl)
let localised_parser : Cli.frontend_lang -> lexbuf -> Ast.source_file_or_master = function let localised_parser : Cli.frontend_lang -> lexbuf -> Ast.source_file_or_master = function
| `NonVerbose -> Parser_NonVerbose.commands_or_includes | `NonVerbose -> Parser_NonVerbose.commands_or_includes
| `En -> Parser_En.commands_or_includes | `En -> Parser_En.commands_or_includes
| `Fr -> Parser_Fr.commands_or_includes | `Fr -> Parser_Fr.commands_or_includes
| `Pl -> Parser_Pl.commands_or_includes
(** {1 Parsing multiple files} *) (** {1 Parsing multiple files} *)

View File

@ -12,12 +12,12 @@
or implied. See the License for the specific language governing permissions and limitations under or implied. See the License for the specific language governing permissions and limitations under
the License. *) the License. *)
type frontend_lang = [ `Fr | `En | `NonVerbose ] type frontend_lang = [ `Fr | `En | `NonVerbose | `Pl]
type backend_lang = [ `Fr | `En ] type backend_lang = [ `Fr | `En | `Pl ]
let to_backend_lang (lang : frontend_lang) : backend_lang = let to_backend_lang (lang : frontend_lang) : backend_lang =
match lang with `En | `NonVerbose -> `En | `Fr -> `Fr match lang with `En | `NonVerbose -> `En | `Fr -> `Fr | `Pl -> `Pl
(** Source files to be compiled *) (** Source files to be compiled *)
let source_files : string list ref = ref [] let source_files : string list ref = ref []
@ -57,7 +57,8 @@ let unstyled = Arg.(value & flag & info [ "unstyled" ] ~doc:"Removes styling fro
let optimize = Arg.(value & flag & info [ "optimize"; "O" ] ~doc:"Run compiler optimizations") let optimize = Arg.(value & flag & info [ "optimize"; "O" ] ~doc:"Run compiler optimizations")
let trace_opt = let trace_opt =
Arg.(value & flag & info [ "trace"; "t" ] ~doc:"Displays a trace of the interpreter's computation") Arg.(
value & flag & info [ "trace"; "t" ] ~doc:"Displays a trace of the interpreter's computation")
let wrap_weaved_output = let wrap_weaved_output =
Arg.( Arg.(

View File

@ -12,9 +12,9 @@
or implied. See the License for the specific language governing permissions and limitations under or implied. See the License for the specific language governing permissions and limitations under
the License. *) the License. *)
type frontend_lang = [ `En | `Fr | `NonVerbose ] type frontend_lang = [ `En | `Fr | `NonVerbose | `Pl ]
type backend_lang = [ `En | `Fr ] type backend_lang = [ `En | `Fr | `Pl ]
val to_backend_lang : frontend_lang -> backend_lang val to_backend_lang : frontend_lang -> backend_lang