mirror of
https://github.com/CatalaLang/catala.git
synced 2024-09-19 00:15:39 +03:00
lexer
This commit is contained in:
parent
37c8f8da64
commit
db46b1c444
@ -124,7 +124,7 @@ general-to-specifics statutes order. Therefore, there exists multiple versions
|
|||||||
of the Catala surface syntax, adapted to the language of the legislative text.
|
of the Catala surface syntax, adapted to the language of the legislative text.
|
||||||
|
|
||||||
Currently, Catala supports English and French legislative text via the
|
Currently, Catala supports English and French legislative text via the
|
||||||
`--language=en` or `--language=fr` option.
|
`--language=en`, `--language=fr` or `--language=pl` option.
|
||||||
|
|
||||||
Technically, support for new languages can be added via a new lexer. If you want
|
Technically, support for new languages can be added via a new lexer. If you want
|
||||||
to add a new language, you can start from
|
to add a new language, you can start from
|
||||||
|
@ -119,7 +119,8 @@ augmented with the Catala plugin, simply enter
|
|||||||
sudo make pygments
|
sudo make pygments
|
||||||
|
|
||||||
This will execute the
|
This will execute the
|
||||||
script `syntax_highlighting/fr/pygments/set_up_pygments.sh` and
|
script `syntax_highlighting/fr/pygments/set_up_pygments.sh`,
|
||||||
|
`syntax_highlighting/pl/pygments/set_up_pygments.sh` and
|
||||||
`syntax_highlighting/en/pygments/set_up_pygments.sh`.
|
`syntax_highlighting/en/pygments/set_up_pygments.sh`.
|
||||||
|
|
||||||
The scripts patch your `pigmentize` executable, used for instance by the `minted` LaTeX package.
|
The scripts patch your `pigmentize` executable, used for instance by the `minted` LaTeX package.
|
||||||
|
15
Makefile
15
Makefile
@ -58,6 +58,7 @@ install:
|
|||||||
|
|
||||||
SYNTAX_HIGHLIGHTING_FR=${CURDIR}/syntax_highlighting/fr
|
SYNTAX_HIGHLIGHTING_FR=${CURDIR}/syntax_highlighting/fr
|
||||||
SYNTAX_HIGHLIGHTING_EN=${CURDIR}/syntax_highlighting/en
|
SYNTAX_HIGHLIGHTING_EN=${CURDIR}/syntax_highlighting/en
|
||||||
|
SYNTAX_HIGHLIGHTING_PL=${CURDIR}/syntax_highlighting/pl
|
||||||
|
|
||||||
pygmentize_fr: $(SYNTAX_HIGHLIGHTING_FR)/set_up_pygments.sh
|
pygmentize_fr: $(SYNTAX_HIGHLIGHTING_FR)/set_up_pygments.sh
|
||||||
chmod +x $<
|
chmod +x $<
|
||||||
@ -67,6 +68,10 @@ pygmentize_en: $(SYNTAX_HIGHLIGHTING_EN)/set_up_pygments.sh
|
|||||||
chmod +x $<
|
chmod +x $<
|
||||||
sudo $<
|
sudo $<
|
||||||
|
|
||||||
|
pygmentize_pl: $(SYNTAX_HIGHLIGHTING_PL)/set_up_pygments.sh
|
||||||
|
chmod +x $<
|
||||||
|
sudo $<
|
||||||
|
|
||||||
#> pygments : Extends your pygmentize executable with Catala lexers
|
#> pygments : Extends your pygmentize executable with Catala lexers
|
||||||
pygments: pygmentize_fr pygmentize_en
|
pygments: pygmentize_fr pygmentize_en
|
||||||
|
|
||||||
@ -78,6 +83,10 @@ atom_en: ${CURDIR}/syntax_highlighting/en/setup_atom.sh
|
|||||||
chmod +x $<
|
chmod +x $<
|
||||||
$<
|
$<
|
||||||
|
|
||||||
|
atom_pl: ${CURDIR}/syntax_highlighting/pl/setup_atom.sh
|
||||||
|
chmod +x $<
|
||||||
|
$<
|
||||||
|
|
||||||
atom_nv: ${CURDIR}/syntax_highlighting/nv/setup_atom.sh
|
atom_nv: ${CURDIR}/syntax_highlighting/nv/setup_atom.sh
|
||||||
chmod +x $<
|
chmod +x $<
|
||||||
$<
|
$<
|
||||||
@ -93,6 +102,10 @@ vscode_en: ${CURDIR}/syntax_highlighting/en/setup_vscode.sh
|
|||||||
chmod +x $<
|
chmod +x $<
|
||||||
$<
|
$<
|
||||||
|
|
||||||
|
vscode_pl: ${CURDIR}/syntax_highlighting/pl/setup_vscode.sh
|
||||||
|
chmod +x $<
|
||||||
|
$<
|
||||||
|
|
||||||
vscode_nv: ${CURDIR}/syntax_highlighting/nv/setup_vscode.sh
|
vscode_nv: ${CURDIR}/syntax_highlighting/nv/setup_vscode.sh
|
||||||
chmod +x $<
|
chmod +x $<
|
||||||
$<
|
$<
|
||||||
@ -222,7 +235,7 @@ clean:
|
|||||||
$(MAKE) -C $(CODE_GENERAL_IMPOTS_DIR) clean
|
$(MAKE) -C $(CODE_GENERAL_IMPOTS_DIR) clean
|
||||||
|
|
||||||
inspect:
|
inspect:
|
||||||
gitinspector -f ml,mli,mly,iro,tex,catala,catala_en,catala_fr,md,fst,mld --grading
|
gitinspector -f ml,mli,mly,iro,tex,catala,catala_en,catala_pl,catala_fr,md,fst,mld --grading
|
||||||
|
|
||||||
##########################################
|
##########################################
|
||||||
# Special targets
|
# Special targets
|
||||||
|
@ -99,7 +99,7 @@ To build and run the example, create a `Makefile` in `foo/`
|
|||||||
with the following contents:
|
with the following contents:
|
||||||
|
|
||||||
```Makefile
|
```Makefile
|
||||||
CATALA_LANG=en # or fr if your source code is in French
|
CATALA_LANG=en # or fr/pl if your source code is in French/Polish
|
||||||
SRC=foo.catala
|
SRC=foo.catala
|
||||||
|
|
||||||
include ../Makefile.common.mk
|
include ../Makefile.common.mk
|
||||||
|
14
examples/polish_taxes_pl/.gitignore
vendored
Normal file
14
examples/polish_taxes_pl/.gitignore
vendored
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
*.aux
|
||||||
|
*.dvi
|
||||||
|
*.fdb_latexmk
|
||||||
|
*.fls
|
||||||
|
*.log
|
||||||
|
*.out
|
||||||
|
*.fls
|
||||||
|
*.tex
|
||||||
|
*.pdf
|
||||||
|
_minted*
|
||||||
|
*.toc
|
||||||
|
*.pyg
|
||||||
|
*.d
|
||||||
|
*.ml
|
4
examples/polish_taxes_pl/Makefile
Normal file
4
examples/polish_taxes_pl/Makefile
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
CATALA_LANG=pl
|
||||||
|
SRC=polish_tax_code.catala_pl
|
||||||
|
|
||||||
|
include ../Makefile.common.mk
|
@ -0,0 +1,24 @@
|
|||||||
|
## Rozdzial 3
|
||||||
|
|
||||||
|
## Art. 7. 1. Stawki podatku wynoszą:
|
||||||
|
|
||||||
|
### 1) od umowy sprzedaży:
|
||||||
|
#### a) nieruchomości, rzeczy ruchomych, prawa użytkowania wieczystego, własnościowego spółdzielczego prawa do lokalu mieszkalnego, spółdzielczego prawa do lokalu użytkowego oraz wynikających z przepisów prawa spółdzielczego: prawa do domu jednorodzinnego oraz prawa do lokalu w małym domu mieszkalnym – 2%,
|
||||||
|
|
||||||
|
> Begin metadata
|
||||||
|
```catala
|
||||||
|
deklaracja zakres UmowaSprzedazy:
|
||||||
|
kontekst kwota zawartosc pieniadze
|
||||||
|
kontekst stawka zawartosc dziesietna
|
||||||
|
kontekst podatek zawartosc pieniadze
|
||||||
|
|
||||||
|
zakres UmowaSprzedazy:
|
||||||
|
definicja podatek wynosi
|
||||||
|
kwota *$ stawka
|
||||||
|
|
||||||
|
zakres UmowaSprzedazy:
|
||||||
|
etykieta a7_u1_p1_ppa
|
||||||
|
definicja stawka wynosi 2%
|
||||||
|
```
|
||||||
|
> End metadata
|
||||||
|
|
3
examples/polish_taxes_pl/polish_taxes_pl.catala_pl
Normal file
3
examples/polish_taxes_pl/polish_taxes_pl.catala_pl
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# Master file
|
||||||
|
|
||||||
|
> Include: podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_pl
|
23
examples/polish_taxes_pl/tests/test_a7_u1_p1.catala_pl
Normal file
23
examples/polish_taxes_pl/tests/test_a7_u1_p1.catala_pl
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
> Include: ../podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_en
|
||||||
|
|
||||||
|
## [Test - Art. 7 ustęp 1 punkt 1]
|
||||||
|
|
||||||
|
```catala
|
||||||
|
deklaracja zakres Test_A7_U1_P1_PPa:
|
||||||
|
kontekst sprzedaz zakres UmowaSprzedazy
|
||||||
|
|
||||||
|
zakres Test_A7_U1_P1_PPa:
|
||||||
|
definicja sprzedaz.kwota wynosi $100
|
||||||
|
|
||||||
|
asercja sprzedaz.podatek = $2
|
||||||
|
|
||||||
|
|
||||||
|
deklaracja zakres Test_A7_U1_P1_PPb:
|
||||||
|
kontekst sprzedaz zakres UmowaSprzedazy
|
||||||
|
|
||||||
|
zakres Test_A7_U1_P1_PPb:
|
||||||
|
definicja sprzedaz.kwota wynosi $100
|
||||||
|
definicja sprzedaz.inne_prawa_majatkowe wynosi prawda
|
||||||
|
|
||||||
|
asercja sprzedaz.podatek = $1
|
||||||
|
```
|
@ -16,6 +16,7 @@ scp grammar.html $1/
|
|||||||
scp catala.html $1/
|
scp catala.html $1/
|
||||||
scp _build/default/src/catala/catala_web.bc.js $1/playground/
|
scp _build/default/src/catala/catala_web.bc.js $1/playground/
|
||||||
scp examples/tutorial_en/tutorial_en.catala_en $1/playground/
|
scp examples/tutorial_en/tutorial_en.catala_en $1/playground/
|
||||||
|
# TODO BW: PL
|
||||||
scp examples/tutoriel_fr/tutoriel_fr.catala_fr $1/playground/
|
scp examples/tutoriel_fr/tutoriel_fr.catala_fr $1/playground/
|
||||||
cat examples/allocations_familiales/prologue.catala_fr > allocations_familiales.catala_fr
|
cat examples/allocations_familiales/prologue.catala_fr > allocations_familiales.catala_fr
|
||||||
cat examples/allocations_familiales/decrets_divers.catala_fr >> allocations_familiales.catala_fr
|
cat examples/allocations_familiales/decrets_divers.catala_fr >> allocations_familiales.catala_fr
|
||||||
@ -29,5 +30,6 @@ mv -f allocations_familiales_fixed.catala_fr allocations_familiales.catala_fr
|
|||||||
scp allocations_familiales.catala_fr $1/playground/
|
scp allocations_familiales.catala_fr $1/playground/
|
||||||
rm allocations_familiales.catala_fr
|
rm allocations_familiales.catala_fr
|
||||||
scp syntax_highlighting/en/ace/mode-catala_en.js $1/playground/
|
scp syntax_highlighting/en/ace/mode-catala_en.js $1/playground/
|
||||||
|
# TODO BW: PL
|
||||||
scp syntax_highlighting/fr/ace/mode-catala_fr.js $1/playground/
|
scp syntax_highlighting/fr/ace/mode-catala_fr.js $1/playground/
|
||||||
scp french_law_js/french_law.js $1/french_law.js
|
scp french_law_js/french_law.js $1/french_law.js
|
||||||
|
@ -98,7 +98,8 @@ let format_lit (fmt : Format.formatter) (l : lit Pos.marked) : unit =
|
|||||||
| LMoney e -> (
|
| LMoney e -> (
|
||||||
match !Utils.Cli.locale_lang with
|
match !Utils.Cli.locale_lang with
|
||||||
| `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e)
|
| `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e)
|
||||||
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e))
|
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e)
|
||||||
|
| `Pl -> Format.fprintf fmt "%s PLN" (Runtime.money_to_string e))
|
||||||
| LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d)
|
| LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d)
|
||||||
| LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d)
|
| LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d)
|
||||||
|
|
||||||
|
@ -35,6 +35,7 @@ let driver (source_file : Pos.input_file) (debug : bool) (dcalc : bool) (unstyle
|
|||||||
| Some l ->
|
| Some l ->
|
||||||
if l = "fr" then `Fr
|
if l = "fr" then `Fr
|
||||||
else if l = "en" then `En
|
else if l = "en" then `En
|
||||||
|
else if l = "pl" then `Pl
|
||||||
else if l = "non-verbose" then `NonVerbose
|
else if l = "non-verbose" then `NonVerbose
|
||||||
else
|
else
|
||||||
Errors.raise_error
|
Errors.raise_error
|
||||||
|
@ -38,7 +38,8 @@ let format_lit (fmt : Format.formatter) (l : lit Pos.marked) : unit =
|
|||||||
| LMoney e -> (
|
| LMoney e -> (
|
||||||
match !Utils.Cli.locale_lang with
|
match !Utils.Cli.locale_lang with
|
||||||
| `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e)
|
| `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e)
|
||||||
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e))
|
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e)
|
||||||
|
| `Pl -> Format.fprintf fmt "%s PLN" (Runtime.money_to_string e))
|
||||||
| LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d)
|
| LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d)
|
||||||
| LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d)
|
| LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d)
|
||||||
|
|
||||||
|
@ -71,12 +71,14 @@ let wrap_html (source_files : string list) (language : Cli.backend_lang) (fmt :
|
|||||||
css_as_string
|
css_as_string
|
||||||
(match language with
|
(match language with
|
||||||
| `Fr -> "Implémentation de texte législatif"
|
| `Fr -> "Implémentation de texte législatif"
|
||||||
| `En -> "Legislative text implementation")
|
| `En -> "Legislative text implementation"
|
||||||
(match language with `Fr -> "Document généré par" | `En -> "Document generated by")
|
| `Pl -> "Implementowanie tekstów legislacyjnych")
|
||||||
|
(match language with `Fr -> "Document généré par" | `En -> "Document generated by" | `Pl -> "Dokument wygenerowany przez")
|
||||||
Utils.Cli.version
|
Utils.Cli.version
|
||||||
(match language with
|
(match language with
|
||||||
| `Fr -> "Fichiers sources tissés dans ce document"
|
| `Fr -> "Fichiers sources tissés dans ce document"
|
||||||
| `En -> "Source files weaved in this document")
|
| `En -> "Source files weaved in this document"
|
||||||
|
| `Pl -> "Pliki źródłowe w tym dokumencie")
|
||||||
(String.concat "\n"
|
(String.concat "\n"
|
||||||
(List.map
|
(List.map
|
||||||
(fun filename ->
|
(fun filename ->
|
||||||
@ -90,7 +92,8 @@ let wrap_html (source_files : string list) (language : Cli.backend_lang) (fmt :
|
|||||||
(pre_html (Filename.basename filename))
|
(pre_html (Filename.basename filename))
|
||||||
(match language with
|
(match language with
|
||||||
| `Fr -> "dernière modification le"
|
| `Fr -> "dernière modification le"
|
||||||
| `En -> "last modification")
|
| `En -> "last modification"
|
||||||
|
| `Pl -> "ostatnia modyfikacja")
|
||||||
ftime)
|
ftime)
|
||||||
source_files));
|
source_files));
|
||||||
wrapped fmt
|
wrapped fmt
|
||||||
@ -104,7 +107,7 @@ let pygmentize_code (c : string Pos.marked) (language : C.backend_lang) : string
|
|||||||
Printf.fprintf oc "%s" (Pos.unmark c);
|
Printf.fprintf oc "%s" (Pos.unmark c);
|
||||||
close_out oc;
|
close_out oc;
|
||||||
let pygments = "pygmentize" in
|
let pygments = "pygmentize" in
|
||||||
let pygments_lexer = match language with `Fr -> "catala_fr" | `En -> "catala_en" in
|
let pygments_lexer = match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl" in
|
||||||
let pygments_args =
|
let pygments_args =
|
||||||
[|
|
[|
|
||||||
"-l";
|
"-l";
|
||||||
|
@ -80,15 +80,17 @@ let wrap_latex (source_files : string list) (language : C.backend_lang) (fmt : F
|
|||||||
%s : \n\
|
%s : \n\
|
||||||
\\begin{itemize}%s\\end{itemize}\n\n\
|
\\begin{itemize}%s\\end{itemize}\n\n\
|
||||||
\\[\\star\\star\\star\\]\\\\\n"
|
\\[\\star\\star\\star\\]\\\\\n"
|
||||||
(match language with `Fr -> "french" | `En -> "english")
|
(match language with `Fr -> "french" | `En -> "english" | `Pl -> "polish")
|
||||||
(match language with
|
(match language with
|
||||||
| `Fr -> "Implémentation de texte législatif"
|
| `Fr -> "Implémentation de texte législatif"
|
||||||
| `En -> "Legislative text implementation")
|
| `En -> "Legislative text implementation"
|
||||||
(match language with `Fr -> "Document généré par" | `En -> "Document generated by")
|
| `Pl -> "Implementacja tekstów legislacyjnych")
|
||||||
|
(match language with `Fr -> "Document généré par" | `En -> "Document generated by" | `Pl -> "Dokument wygenerowany przez")
|
||||||
Utils.Cli.version
|
Utils.Cli.version
|
||||||
(match language with
|
(match language with
|
||||||
| `Fr -> "Fichiers sources tissés dans ce document"
|
| `Fr -> "Fichiers sources tissés dans ce document"
|
||||||
| `En -> "Source files weaved in this document")
|
| `En -> "Source files weaved in this document"
|
||||||
|
| `Pl -> "Pliki źródłowe w tym dokumencie")
|
||||||
(String.concat ","
|
(String.concat ","
|
||||||
(List.map
|
(List.map
|
||||||
(fun filename ->
|
(fun filename ->
|
||||||
@ -102,7 +104,8 @@ let wrap_latex (source_files : string list) (language : C.backend_lang) (fmt : F
|
|||||||
(pre_latexify (Filename.basename filename))
|
(pre_latexify (Filename.basename filename))
|
||||||
(match language with
|
(match language with
|
||||||
| `Fr -> "dernière modification le"
|
| `Fr -> "dernière modification le"
|
||||||
| `En -> "last modification")
|
| `En -> "last modification"
|
||||||
|
| `Pl -> "ostatnia modyfikacja")
|
||||||
ftime)
|
ftime)
|
||||||
source_files));
|
source_files));
|
||||||
wrapped fmt;
|
wrapped fmt;
|
||||||
@ -137,7 +140,7 @@ let law_article_item_to_latex (language : C.backend_lang) (fmt : Format.formatte
|
|||||||
\\end{minted}"
|
\\end{minted}"
|
||||||
(pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
|
(pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
|
||||||
(Pos.get_start_line (Pos.get_position c) - 1)
|
(Pos.get_start_line (Pos.get_position c) - 1)
|
||||||
(match language with `Fr -> "catala_fr" | `En -> "catala_en")
|
(match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl")
|
||||||
(math_syms_replace (Pos.unmark c))
|
(math_syms_replace (Pos.unmark c))
|
||||||
|
|
||||||
let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatter)
|
let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatter)
|
||||||
@ -171,7 +174,7 @@ let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatt
|
|||||||
(law_article_item_to_latex language)
|
(law_article_item_to_latex language)
|
||||||
fmt children
|
fmt children
|
||||||
| A.MetadataBlock (_, c) ->
|
| A.MetadataBlock (_, c) ->
|
||||||
let metadata_title = match language with `Fr -> "Métadonnées" | `En -> "Metadata" in
|
let metadata_title = match language with `Fr -> "Métadonnées" | `En -> "Metadata" | `Pl -> "Metadane" in
|
||||||
Format.fprintf fmt
|
Format.fprintf fmt
|
||||||
"\\begin{tcolorbox}[colframe=OliveGreen, breakable, \
|
"\\begin{tcolorbox}[colframe=OliveGreen, breakable, \
|
||||||
title=\\textcolor{black}{\\texttt{%s}},title after \
|
title=\\textcolor{black}{\\texttt{%s}},title after \
|
||||||
@ -183,7 +186,7 @@ let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatt
|
|||||||
metadata_title metadata_title
|
metadata_title metadata_title
|
||||||
(Pos.get_start_line (Pos.get_position c) - 1)
|
(Pos.get_start_line (Pos.get_position c) - 1)
|
||||||
(pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
|
(pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
|
||||||
(match language with `Fr -> "catala_fr" | `En -> "catala_en")
|
(match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl")
|
||||||
(math_syms_replace (Pos.unmark c))
|
(math_syms_replace (Pos.unmark c))
|
||||||
| A.IntermediateText t -> Format.fprintf fmt "%s" (pre_latexify t)
|
| A.IntermediateText t -> Format.fprintf fmt "%s" (pre_latexify t)
|
||||||
|
|
||||||
|
@ -32,19 +32,37 @@
|
|||||||
(rule
|
(rule
|
||||||
(with-stdout-to
|
(with-stdout-to
|
||||||
parser.messages.new
|
parser.messages.new
|
||||||
(run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser --list-errors)))
|
(run
|
||||||
|
menhir
|
||||||
|
%{dep:tokens.mly}
|
||||||
|
%{dep:parser.mly}
|
||||||
|
--base
|
||||||
|
parser
|
||||||
|
--list-errors)))
|
||||||
|
|
||||||
(rule
|
(rule
|
||||||
(with-stdout-to
|
(with-stdout-to
|
||||||
parser_errors.ml
|
parser_errors.ml
|
||||||
(run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser
|
(run
|
||||||
--compile-errors %{dep:parser.messages})))
|
menhir
|
||||||
|
%{dep:tokens.mly}
|
||||||
|
%{dep:parser.mly}
|
||||||
|
--base
|
||||||
|
parser
|
||||||
|
--compile-errors
|
||||||
|
%{dep:parser.messages})))
|
||||||
|
|
||||||
(rule
|
(rule
|
||||||
(with-stdout-to
|
(with-stdout-to
|
||||||
parser.messages.updated
|
parser.messages.updated
|
||||||
(run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser
|
(run
|
||||||
--update-errors %{dep:parser.messages})))
|
menhir
|
||||||
|
%{dep:tokens.mly}
|
||||||
|
%{dep:parser.mly}
|
||||||
|
--base
|
||||||
|
parser
|
||||||
|
--update-errors
|
||||||
|
%{dep:parser.messages})))
|
||||||
|
|
||||||
(rule
|
(rule
|
||||||
(alias update-parser-messages)
|
(alias update-parser-messages)
|
||||||
|
550
src/catala/surface/lexer_pl.ml
Normal file
550
src/catala/surface/lexer_pl.ml
Normal file
@ -0,0 +1,550 @@
|
|||||||
|
(* This file is part of the Catala compiler, a specification language for tax and social benefits
|
||||||
|
computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
|
||||||
|
<denis.merigoux@inria.fr>
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||||
|
in compliance with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||||
|
or implied. See the License for the specific language governing permissions and limitations under
|
||||||
|
the License. *)
|
||||||
|
|
||||||
|
open Tokens
|
||||||
|
open Sedlexing
|
||||||
|
open Utils
|
||||||
|
open Lexer_common
|
||||||
|
module L = Lexer
|
||||||
|
module R = Re.Pcre
|
||||||
|
|
||||||
|
(** Same as {!val: Surface.Lexer.token_list_language_agnostic}, but with tokens specialized to
|
||||||
|
Polish. *)
|
||||||
|
let token_list : (string * token) list =
|
||||||
|
[
|
||||||
|
("zakres", SCOPE);
|
||||||
|
("konsekwencja", CONSEQUENCE);
|
||||||
|
("data", DATA);
|
||||||
|
("zalezy od", DEPENDS);
|
||||||
|
("deklaracja", DECLARATION);
|
||||||
|
("kontekst", CONTEXT);
|
||||||
|
("malejacy", DECREASING);
|
||||||
|
("rosnacy", INCREASING);
|
||||||
|
("z", OF);
|
||||||
|
("kolekcja", COLLECTION);
|
||||||
|
("enumeracja", ENUM);
|
||||||
|
("calkowita", INTEGER);
|
||||||
|
("pieniadze", MONEY);
|
||||||
|
("tekst", TEXT);
|
||||||
|
("dziesietna", DECIMAL);
|
||||||
|
("czas", DATE);
|
||||||
|
("czas trwania", DURATION);
|
||||||
|
("zerojedynkowy", BOOLEAN);
|
||||||
|
("suma", SUM);
|
||||||
|
("spelnione", FILLED);
|
||||||
|
("definicja", DEFINITION);
|
||||||
|
("etykieta", LABEL);
|
||||||
|
("wyjatek", EXCEPTION);
|
||||||
|
("wynosi", DEFINED_AS);
|
||||||
|
("pasuje", MATCH);
|
||||||
|
("ze wzorem", WITH);
|
||||||
|
("pod warunkiem", UNDER_CONDITION);
|
||||||
|
("jezeli", IF);
|
||||||
|
("wtedy", THEN);
|
||||||
|
("inaczej", ELSE);
|
||||||
|
("zawartosc", CONTENT);
|
||||||
|
("struktura", STRUCT);
|
||||||
|
("asercja", ASSERTION);
|
||||||
|
("rozna", VARIES);
|
||||||
|
("with", WITH_V);
|
||||||
|
("dla", FOR);
|
||||||
|
("wszystkie", ALL);
|
||||||
|
("mamy", WE_HAVE);
|
||||||
|
("staloprzecinkowa", FIXED);
|
||||||
|
("przez", BY);
|
||||||
|
("zasada", RULE);
|
||||||
|
("istnieje", EXISTS);
|
||||||
|
("takie ze", SUCH);
|
||||||
|
("to", THAT);
|
||||||
|
("i", AND);
|
||||||
|
("lub", OR);
|
||||||
|
("xor", XOR);
|
||||||
|
("nie", NOT);
|
||||||
|
("maximum", MAXIMUM);
|
||||||
|
("minimum", MINIMUM);
|
||||||
|
("filtr", FILTER);
|
||||||
|
("mapuj", MAP);
|
||||||
|
("poczatkowy", INIT);
|
||||||
|
("liczba", CARDINAL);
|
||||||
|
("rok", YEAR);
|
||||||
|
("miesiac", MONTH);
|
||||||
|
("dzien", DAY);
|
||||||
|
("prawda", TRUE);
|
||||||
|
("falsz", FALSE);
|
||||||
|
]
|
||||||
|
@ L.token_list_language_agnostic
|
||||||
|
|
||||||
|
(** Localised builtin functions *)
|
||||||
|
let builtins : (string * Ast.builtin_expression) list =
|
||||||
|
[
|
||||||
|
("integer_to_decimal", IntToDec);
|
||||||
|
("get_day", GetDay);
|
||||||
|
("get_month", GetMonth);
|
||||||
|
("get_year", GetYear);
|
||||||
|
]
|
||||||
|
|
||||||
|
(** Main lexing function used in code blocks *)
|
||||||
|
let rec lex_code (lexbuf : lexbuf) : token =
|
||||||
|
let prev_lexeme = Utf8.lexeme lexbuf in
|
||||||
|
let prev_pos = lexing_positions lexbuf in
|
||||||
|
match%sedlex lexbuf with
|
||||||
|
| white_space ->
|
||||||
|
(* Whitespaces *)
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
lex_code lexbuf
|
||||||
|
| '#', Star (Compl '\n'), '\n' ->
|
||||||
|
(* Comments *)
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
lex_code lexbuf
|
||||||
|
| "```" ->
|
||||||
|
(* End of code section *)
|
||||||
|
L.is_code := false;
|
||||||
|
END_CODE !L.code_string_acc
|
||||||
|
| "zakres" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
SCOPE
|
||||||
|
| "data" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
DATA
|
||||||
|
| "zalezy od" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
DEPENDS
|
||||||
|
| "deklaracja" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
DECLARATION
|
||||||
|
| "kontekst" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
CONTEXT
|
||||||
|
| "malejacy" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
DECREASING
|
||||||
|
| "rosnacy" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
INCREASING
|
||||||
|
| "z" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
OF
|
||||||
|
| "kolekcja" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
COLLECTION
|
||||||
|
| "enumeracja" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
ENUM
|
||||||
|
| "calkowita" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
INTEGER
|
||||||
|
| "pieniadze" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
MONEY
|
||||||
|
| "tekst" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
TEXT
|
||||||
|
| "dziesietna" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
DECIMAL
|
||||||
|
| "czas" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
DATE
|
||||||
|
| "czas trwania" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
DURATION
|
||||||
|
| "zerojedynkowy" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
BOOLEAN
|
||||||
|
| "suma" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
SUM
|
||||||
|
| "spelnione" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
FILLED
|
||||||
|
| "definicja" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
DEFINITION
|
||||||
|
| "etykieta" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
LABEL
|
||||||
|
| "wyjatek" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
EXCEPTION
|
||||||
|
| "wynosi" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
DEFINED_AS
|
||||||
|
| "pasuje" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
MATCH
|
||||||
|
| "ze wzorem" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
WITH
|
||||||
|
| "pod warunkiem" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
UNDER_CONDITION
|
||||||
|
| "jezeli" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
IF
|
||||||
|
| "konsekwencja" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
CONSEQUENCE
|
||||||
|
| "wtedy" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
THEN
|
||||||
|
| "inaczej" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
ELSE
|
||||||
|
| "condition" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
CONDITION
|
||||||
|
| "zawartosc" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
CONTENT
|
||||||
|
| "struktura" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
STRUCT
|
||||||
|
| "asercja" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
ASSERTION
|
||||||
|
| "rozna" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
VARIES
|
||||||
|
| "wraz z" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
WITH_V
|
||||||
|
| "dla" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
FOR
|
||||||
|
| "wszystkie" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
ALL
|
||||||
|
| "mamy" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
WE_HAVE
|
||||||
|
| "staloprzecinkowa" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
FIXED
|
||||||
|
| "przez" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
BY
|
||||||
|
| "zasada" ->
|
||||||
|
(* 0xE8 is è *)
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
RULE
|
||||||
|
| "istnieje" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
EXISTS
|
||||||
|
| "in" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
IN
|
||||||
|
| "takie ze" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
SUCH
|
||||||
|
| "to" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
THAT
|
||||||
|
| "i" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
AND
|
||||||
|
| "lub" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
OR
|
||||||
|
| "xor" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
XOR
|
||||||
|
| "nie" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
NOT
|
||||||
|
| "maximum" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
MAXIMUM
|
||||||
|
| "minimum" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
MINIMUM
|
||||||
|
| "filtr" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
FILTER
|
||||||
|
| "mapuj" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
MAP
|
||||||
|
| "poczatkowy" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
INIT
|
||||||
|
| "liczba" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
CARDINAL
|
||||||
|
| "prawda" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
TRUE
|
||||||
|
| "falsz" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
FALSE
|
||||||
|
| "rok" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
YEAR
|
||||||
|
| "miesiac" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
MONTH
|
||||||
|
| "dzien" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
DAY
|
||||||
|
| 0x24, Star white_space, '0' .. '9', Star ('0' .. '9' | ','), Opt ('.', Rep ('0' .. '9', 0 .. 2))
|
||||||
|
->
|
||||||
|
let extract_parts = R.regexp "([0-9]([0-9,]*[0-9]|))(.([0-9]{0,2})|)" in
|
||||||
|
let full_str = Utf8.lexeme lexbuf in
|
||||||
|
let only_numbers_str = String.trim (String.sub full_str 1 (String.length full_str - 1)) in
|
||||||
|
let parts = R.get_substring (R.exec ~rex:extract_parts only_numbers_str) in
|
||||||
|
(* Integer literal*)
|
||||||
|
let units = parts 1 in
|
||||||
|
let remove_commas = R.regexp "," in
|
||||||
|
let units =
|
||||||
|
Runtime.integer_of_string (R.substitute ~rex:remove_commas ~subst:(fun _ -> "") units)
|
||||||
|
in
|
||||||
|
let cents =
|
||||||
|
try Runtime.integer_of_string (parts 4) with Not_found -> Runtime.integer_of_int 0
|
||||||
|
in
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
MONEY_AMOUNT (units, cents)
|
||||||
|
| Plus '0' .. '9', '.', Star '0' .. '9' ->
|
||||||
|
let extract_code_title = R.regexp "([0-9]+)\\.([0-9]*)" in
|
||||||
|
let dec_parts = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
|
||||||
|
(* Integer literal*)
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
DECIMAL_LITERAL
|
||||||
|
(Runtime.integer_of_string (dec_parts 1), Runtime.integer_of_string (dec_parts 2))
|
||||||
|
| "<=@" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
LESSER_EQUAL_DATE
|
||||||
|
| "<@" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
LESSER_DATE
|
||||||
|
| ">=@" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
GREATER_EQUAL_DATE
|
||||||
|
| ">@" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
GREATER_DATE
|
||||||
|
| "-@" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
MINUSDATE
|
||||||
|
| "+@" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
PLUSDATE
|
||||||
|
| "<=^" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
LESSER_EQUAL_DURATION
|
||||||
|
| "<^" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
LESSER_DURATION
|
||||||
|
| ">=^" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
GREATER_EQUAL_DURATION
|
||||||
|
| ">^" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
GREATER_DURATION
|
||||||
|
| "+^" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
PLUSDURATION
|
||||||
|
| "-^" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
MINUSDURATION
|
||||||
|
| "<=", 0x24 ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
LESSER_EQUAL_MONEY
|
||||||
|
| '<', 0x24 ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
LESSER_MONEY
|
||||||
|
| ">=", 0x24 ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
GREATER_EQUAL_MONEY
|
||||||
|
| '>', 0x24 ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
GREATER_MONEY
|
||||||
|
| '+', 0x24 ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
PLUSMONEY
|
||||||
|
| '-', 0x24 ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
MINUSMONEY
|
||||||
|
| '*', 0x24 ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
MULTMONEY
|
||||||
|
| '/', 0x24 ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
DIVMONEY
|
||||||
|
| "<=." ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
LESSER_EQUAL_DEC
|
||||||
|
| "<." ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
LESSER_DEC
|
||||||
|
| ">=." ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
GREATER_EQUAL_DEC
|
||||||
|
| ">." ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
GREATER_DEC
|
||||||
|
| "+." ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
PLUSDEC
|
||||||
|
| "-." ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
MINUSDEC
|
||||||
|
| "*." ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
MULTDEC
|
||||||
|
| "/." ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
DIVDEC
|
||||||
|
| "<=" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
LESSER_EQUAL
|
||||||
|
| '<' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
LESSER
|
||||||
|
| ">=" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
GREATER_EQUAL
|
||||||
|
| '>' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
GREATER
|
||||||
|
| '+' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
PLUS
|
||||||
|
| '-' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
MINUS
|
||||||
|
| '*' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
MULT
|
||||||
|
| '/' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
DIV
|
||||||
|
| "!=" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
NOT_EQUAL
|
||||||
|
| '=' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
EQUAL
|
||||||
|
| '%' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
PERCENT
|
||||||
|
| '(' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
LPAREN
|
||||||
|
| ')' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
RPAREN
|
||||||
|
| '{' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
LBRACKET
|
||||||
|
| '}' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
RBRACKET
|
||||||
|
| '[' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
LSQUARE
|
||||||
|
| ']' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
RSQUARE
|
||||||
|
| '|' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
VERTICAL
|
||||||
|
| ':' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
COLON
|
||||||
|
| ';' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
SEMICOLON
|
||||||
|
| "--" ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
ALT
|
||||||
|
| '.' ->
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
DOT
|
||||||
|
| uppercase, Star (uppercase | lowercase | '0' .. '9' | '_' | '\'') ->
|
||||||
|
(* Name of constructor *)
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
CONSTRUCTOR (Utf8.lexeme lexbuf)
|
||||||
|
| lowercase, Star (lowercase | uppercase | '0' .. '9' | '_' | '\'') ->
|
||||||
|
(* Name of variable *)
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
IDENT (Utf8.lexeme lexbuf)
|
||||||
|
| Plus '0' .. '9' ->
|
||||||
|
(* Integer literal*)
|
||||||
|
L.update_acc lexbuf;
|
||||||
|
INT_LITERAL (Runtime.integer_of_string (Utf8.lexeme lexbuf))
|
||||||
|
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
|
||||||
|
|
||||||
|
(** Main lexing function used outside code blocks *)
|
||||||
|
let lex_law (lexbuf : lexbuf) : token =
|
||||||
|
let prev_lexeme = Utf8.lexeme lexbuf in
|
||||||
|
let prev_pos = lexing_positions lexbuf in
|
||||||
|
match%sedlex lexbuf with
|
||||||
|
| "```catala" ->
|
||||||
|
L.is_code := true;
|
||||||
|
L.code_string_acc := "";
|
||||||
|
|
||||||
|
BEGIN_CODE
|
||||||
|
| eof -> EOF
|
||||||
|
| '#', Star white_space, "Master file" -> MASTER_FILE
|
||||||
|
| '>', Star white_space, "Begin metadata" -> BEGIN_METADATA
|
||||||
|
| '>', Star white_space, "End metadata" -> END_METADATA
|
||||||
|
| ( '>',
|
||||||
|
Star white_space,
|
||||||
|
"Include:",
|
||||||
|
Star white_space,
|
||||||
|
Plus (Compl ('@' | '\n')),
|
||||||
|
Star white_space,
|
||||||
|
Opt ('@', Star white_space, "p.", Star white_space, Plus '0' .. '9', Star white_space),
|
||||||
|
'\n' ) ->
|
||||||
|
let extract_components =
|
||||||
|
R.regexp ">\\s*Include\\:\\s*([^@\\n]+)\\s*(@\\s*p\\.\\s*([0-9]+)|)"
|
||||||
|
in
|
||||||
|
let get_component = R.get_substring (R.exec ~rex:extract_components (Utf8.lexeme lexbuf)) in
|
||||||
|
let name = get_component 1 in
|
||||||
|
let pages = try Some (int_of_string (get_component 3)) with Not_found -> None in
|
||||||
|
let pos = lexing_positions lexbuf in
|
||||||
|
if Filename.extension name = ".pdf" then
|
||||||
|
LAW_INCLUDE (Ast.PdfFile ((name, Pos.from_lpos pos), pages))
|
||||||
|
else LAW_INCLUDE (Ast.CatalaFile (name, Pos.from_lpos pos))
|
||||||
|
| '#', Plus '#', Star white_space, Plus (Compl ('[' | ']' | '\n')), Star white_space, '\n' ->
|
||||||
|
get_law_heading lexbuf
|
||||||
|
| ( '#',
|
||||||
|
Plus '#',
|
||||||
|
Star white_space,
|
||||||
|
'[',
|
||||||
|
Star white_space,
|
||||||
|
Plus (Compl ']'),
|
||||||
|
Star white_space,
|
||||||
|
']',
|
||||||
|
'\n' ) ->
|
||||||
|
let extract_article_title = R.regexp "([#]+)\\s*\\[([^\\]]+)\\]" in
|
||||||
|
let get_substring =
|
||||||
|
R.get_substring (R.exec ~rex:extract_article_title (Utf8.lexeme lexbuf))
|
||||||
|
in
|
||||||
|
let title = get_substring 2 in
|
||||||
|
let get_new_lines = R.regexp "\n" in
|
||||||
|
let new_lines_count =
|
||||||
|
try Array.length (R.extract ~rex:get_new_lines (Utf8.lexeme lexbuf)) with Not_found -> 0
|
||||||
|
in
|
||||||
|
(* the -1 is here to compensate for Sedlex's automatic newline detection around token *)
|
||||||
|
for _i = 1 to new_lines_count - 1 do
|
||||||
|
new_line lexbuf
|
||||||
|
done;
|
||||||
|
let precedence = calc_precedence (get_substring 1) in
|
||||||
|
|
||||||
|
LAW_ARTICLE (title, None, None, precedence)
|
||||||
|
| Plus (Compl ('/' | '#' | '`' | '>')) -> LAW_TEXT (Utf8.lexeme lexbuf)
|
||||||
|
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
|
||||||
|
|
||||||
|
(** Entry point of the lexer, distributes to {!val: lex_code} or {!val: lex_law} depending of {!val:
|
||||||
|
Surface.Lexer.is_code}. *)
|
||||||
|
let lexer (lexbuf : lexbuf) : token = if !L.is_code then lex_code lexbuf else lex_law lexbuf
|
15
src/catala/surface/lexer_pl.mli
Normal file
15
src/catala/surface/lexer_pl.mli
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
(* This file is part of the Catala compiler, a specification language for tax and social benefits
|
||||||
|
computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
|
||||||
|
<denis.merigoux@inria.fr>
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||||
|
in compliance with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||||
|
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||||
|
or implied. See the License for the specific language governing permissions and limitations under
|
||||||
|
the License. *)
|
||||||
|
|
||||||
|
include Lexer.LocalisedLexer
|
@ -226,11 +226,13 @@ end
|
|||||||
module Parser_NonVerbose = ParserAux (Lexer)
|
module Parser_NonVerbose = ParserAux (Lexer)
|
||||||
module Parser_En = ParserAux (Lexer_en)
|
module Parser_En = ParserAux (Lexer_en)
|
||||||
module Parser_Fr = ParserAux (Lexer_fr)
|
module Parser_Fr = ParserAux (Lexer_fr)
|
||||||
|
module Parser_Pl = ParserAux (Lexer_pl)
|
||||||
|
|
||||||
let localised_parser : Cli.frontend_lang -> lexbuf -> Ast.source_file_or_master = function
|
let localised_parser : Cli.frontend_lang -> lexbuf -> Ast.source_file_or_master = function
|
||||||
| `NonVerbose -> Parser_NonVerbose.commands_or_includes
|
| `NonVerbose -> Parser_NonVerbose.commands_or_includes
|
||||||
| `En -> Parser_En.commands_or_includes
|
| `En -> Parser_En.commands_or_includes
|
||||||
| `Fr -> Parser_Fr.commands_or_includes
|
| `Fr -> Parser_Fr.commands_or_includes
|
||||||
|
| `Pl -> Parser_Pl.commands_or_includes
|
||||||
|
|
||||||
(** {1 Parsing multiple files} *)
|
(** {1 Parsing multiple files} *)
|
||||||
|
|
||||||
|
@ -12,12 +12,12 @@
|
|||||||
or implied. See the License for the specific language governing permissions and limitations under
|
or implied. See the License for the specific language governing permissions and limitations under
|
||||||
the License. *)
|
the License. *)
|
||||||
|
|
||||||
type frontend_lang = [ `Fr | `En | `NonVerbose ]
|
type frontend_lang = [ `Fr | `En | `NonVerbose | `Pl]
|
||||||
|
|
||||||
type backend_lang = [ `Fr | `En ]
|
type backend_lang = [ `Fr | `En | `Pl ]
|
||||||
|
|
||||||
let to_backend_lang (lang : frontend_lang) : backend_lang =
|
let to_backend_lang (lang : frontend_lang) : backend_lang =
|
||||||
match lang with `En | `NonVerbose -> `En | `Fr -> `Fr
|
match lang with `En | `NonVerbose -> `En | `Fr -> `Fr | `Pl -> `Pl
|
||||||
|
|
||||||
(** Source files to be compiled *)
|
(** Source files to be compiled *)
|
||||||
let source_files : string list ref = ref []
|
let source_files : string list ref = ref []
|
||||||
@ -57,7 +57,8 @@ let unstyled = Arg.(value & flag & info [ "unstyled" ] ~doc:"Removes styling fro
|
|||||||
let optimize = Arg.(value & flag & info [ "optimize"; "O" ] ~doc:"Run compiler optimizations")
|
let optimize = Arg.(value & flag & info [ "optimize"; "O" ] ~doc:"Run compiler optimizations")
|
||||||
|
|
||||||
let trace_opt =
|
let trace_opt =
|
||||||
Arg.(value & flag & info [ "trace"; "t" ] ~doc:"Displays a trace of the interpreter's computation")
|
Arg.(
|
||||||
|
value & flag & info [ "trace"; "t" ] ~doc:"Displays a trace of the interpreter's computation")
|
||||||
|
|
||||||
let wrap_weaved_output =
|
let wrap_weaved_output =
|
||||||
Arg.(
|
Arg.(
|
||||||
|
@ -12,9 +12,9 @@
|
|||||||
or implied. See the License for the specific language governing permissions and limitations under
|
or implied. See the License for the specific language governing permissions and limitations under
|
||||||
the License. *)
|
the License. *)
|
||||||
|
|
||||||
type frontend_lang = [ `En | `Fr | `NonVerbose ]
|
type frontend_lang = [ `En | `Fr | `NonVerbose | `Pl ]
|
||||||
|
|
||||||
type backend_lang = [ `En | `Fr ]
|
type backend_lang = [ `En | `Fr | `Pl ]
|
||||||
|
|
||||||
val to_backend_lang : frontend_lang -> backend_lang
|
val to_backend_lang : frontend_lang -> backend_lang
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user