mirror of
https://github.com/CatalaLang/catala.git
synced 2024-11-08 07:51:43 +03:00
lexer
This commit is contained in:
parent
37c8f8da64
commit
db46b1c444
@ -124,7 +124,7 @@ general-to-specifics statutes order. Therefore, there exists multiple versions
|
||||
of the Catala surface syntax, adapted to the language of the legislative text.
|
||||
|
||||
Currently, Catala supports English and French legislative text via the
|
||||
`--language=en` or `--language=fr` option.
|
||||
`--language=en`, `--language=fr` or `--language=pl` option.
|
||||
|
||||
Technically, support for new languages can be added via a new lexer. If you want
|
||||
to add a new language, you can start from
|
||||
|
@ -119,7 +119,8 @@ augmented with the Catala plugin, simply enter
|
||||
sudo make pygments
|
||||
|
||||
This will execute the
|
||||
script `syntax_highlighting/fr/pygments/set_up_pygments.sh` and
|
||||
script `syntax_highlighting/fr/pygments/set_up_pygments.sh`,
|
||||
`syntax_highlighting/pl/pygments/set_up_pygments.sh` and
|
||||
`syntax_highlighting/en/pygments/set_up_pygments.sh`.
|
||||
|
||||
The scripts patch your `pigmentize` executable, used for instance by the `minted` LaTeX package.
|
||||
|
15
Makefile
15
Makefile
@ -58,6 +58,7 @@ install:
|
||||
|
||||
SYNTAX_HIGHLIGHTING_FR=${CURDIR}/syntax_highlighting/fr
|
||||
SYNTAX_HIGHLIGHTING_EN=${CURDIR}/syntax_highlighting/en
|
||||
SYNTAX_HIGHLIGHTING_PL=${CURDIR}/syntax_highlighting/pl
|
||||
|
||||
pygmentize_fr: $(SYNTAX_HIGHLIGHTING_FR)/set_up_pygments.sh
|
||||
chmod +x $<
|
||||
@ -67,6 +68,10 @@ pygmentize_en: $(SYNTAX_HIGHLIGHTING_EN)/set_up_pygments.sh
|
||||
chmod +x $<
|
||||
sudo $<
|
||||
|
||||
pygmentize_pl: $(SYNTAX_HIGHLIGHTING_PL)/set_up_pygments.sh
|
||||
chmod +x $<
|
||||
sudo $<
|
||||
|
||||
#> pygments : Extends your pygmentize executable with Catala lexers
|
||||
pygments: pygmentize_fr pygmentize_en
|
||||
|
||||
@ -78,6 +83,10 @@ atom_en: ${CURDIR}/syntax_highlighting/en/setup_atom.sh
|
||||
chmod +x $<
|
||||
$<
|
||||
|
||||
atom_pl: ${CURDIR}/syntax_highlighting/pl/setup_atom.sh
|
||||
chmod +x $<
|
||||
$<
|
||||
|
||||
atom_nv: ${CURDIR}/syntax_highlighting/nv/setup_atom.sh
|
||||
chmod +x $<
|
||||
$<
|
||||
@ -93,6 +102,10 @@ vscode_en: ${CURDIR}/syntax_highlighting/en/setup_vscode.sh
|
||||
chmod +x $<
|
||||
$<
|
||||
|
||||
vscode_pl: ${CURDIR}/syntax_highlighting/pl/setup_vscode.sh
|
||||
chmod +x $<
|
||||
$<
|
||||
|
||||
vscode_nv: ${CURDIR}/syntax_highlighting/nv/setup_vscode.sh
|
||||
chmod +x $<
|
||||
$<
|
||||
@ -222,7 +235,7 @@ clean:
|
||||
$(MAKE) -C $(CODE_GENERAL_IMPOTS_DIR) clean
|
||||
|
||||
inspect:
|
||||
gitinspector -f ml,mli,mly,iro,tex,catala,catala_en,catala_fr,md,fst,mld --grading
|
||||
gitinspector -f ml,mli,mly,iro,tex,catala,catala_en,catala_pl,catala_fr,md,fst,mld --grading
|
||||
|
||||
##########################################
|
||||
# Special targets
|
||||
|
@ -99,7 +99,7 @@ To build and run the example, create a `Makefile` in `foo/`
|
||||
with the following contents:
|
||||
|
||||
```Makefile
|
||||
CATALA_LANG=en # or fr if your source code is in French
|
||||
CATALA_LANG=en # or fr/pl if your source code is in French/Polish
|
||||
SRC=foo.catala
|
||||
|
||||
include ../Makefile.common.mk
|
||||
|
14
examples/polish_taxes_pl/.gitignore
vendored
Normal file
14
examples/polish_taxes_pl/.gitignore
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
*.aux
|
||||
*.dvi
|
||||
*.fdb_latexmk
|
||||
*.fls
|
||||
*.log
|
||||
*.out
|
||||
*.fls
|
||||
*.tex
|
||||
*.pdf
|
||||
_minted*
|
||||
*.toc
|
||||
*.pyg
|
||||
*.d
|
||||
*.ml
|
4
examples/polish_taxes_pl/Makefile
Normal file
4
examples/polish_taxes_pl/Makefile
Normal file
@ -0,0 +1,4 @@
|
||||
CATALA_LANG=pl
|
||||
SRC=polish_tax_code.catala_pl
|
||||
|
||||
include ../Makefile.common.mk
|
@ -0,0 +1,24 @@
|
||||
## Rozdzial 3
|
||||
|
||||
## Art. 7. 1. Stawki podatku wynoszą:
|
||||
|
||||
### 1) od umowy sprzedaży:
|
||||
#### a) nieruchomości, rzeczy ruchomych, prawa użytkowania wieczystego, własnościowego spółdzielczego prawa do lokalu mieszkalnego, spółdzielczego prawa do lokalu użytkowego oraz wynikających z przepisów prawa spółdzielczego: prawa do domu jednorodzinnego oraz prawa do lokalu w małym domu mieszkalnym – 2%,
|
||||
|
||||
> Begin metadata
|
||||
```catala
|
||||
deklaracja zakres UmowaSprzedazy:
|
||||
kontekst kwota zawartosc pieniadze
|
||||
kontekst stawka zawartosc dziesietna
|
||||
kontekst podatek zawartosc pieniadze
|
||||
|
||||
zakres UmowaSprzedazy:
|
||||
definicja podatek wynosi
|
||||
kwota *$ stawka
|
||||
|
||||
zakres UmowaSprzedazy:
|
||||
etykieta a7_u1_p1_ppa
|
||||
definicja stawka wynosi 2%
|
||||
```
|
||||
> End metadata
|
||||
|
3
examples/polish_taxes_pl/polish_taxes_pl.catala_pl
Normal file
3
examples/polish_taxes_pl/polish_taxes_pl.catala_pl
Normal file
@ -0,0 +1,3 @@
|
||||
# Master file
|
||||
|
||||
> Include: podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_pl
|
23
examples/polish_taxes_pl/tests/test_a7_u1_p1.catala_pl
Normal file
23
examples/polish_taxes_pl/tests/test_a7_u1_p1.catala_pl
Normal file
@ -0,0 +1,23 @@
|
||||
> Include: ../podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_en
|
||||
|
||||
## [Test - Art. 7 ustęp 1 punkt 1]
|
||||
|
||||
```catala
|
||||
deklaracja zakres Test_A7_U1_P1_PPa:
|
||||
kontekst sprzedaz zakres UmowaSprzedazy
|
||||
|
||||
zakres Test_A7_U1_P1_PPa:
|
||||
definicja sprzedaz.kwota wynosi $100
|
||||
|
||||
asercja sprzedaz.podatek = $2
|
||||
|
||||
|
||||
deklaracja zakres Test_A7_U1_P1_PPb:
|
||||
kontekst sprzedaz zakres UmowaSprzedazy
|
||||
|
||||
zakres Test_A7_U1_P1_PPb:
|
||||
definicja sprzedaz.kwota wynosi $100
|
||||
definicja sprzedaz.inne_prawa_majatkowe wynosi prawda
|
||||
|
||||
asercja sprzedaz.podatek = $1
|
||||
```
|
@ -16,6 +16,7 @@ scp grammar.html $1/
|
||||
scp catala.html $1/
|
||||
scp _build/default/src/catala/catala_web.bc.js $1/playground/
|
||||
scp examples/tutorial_en/tutorial_en.catala_en $1/playground/
|
||||
# TODO BW: PL
|
||||
scp examples/tutoriel_fr/tutoriel_fr.catala_fr $1/playground/
|
||||
cat examples/allocations_familiales/prologue.catala_fr > allocations_familiales.catala_fr
|
||||
cat examples/allocations_familiales/decrets_divers.catala_fr >> allocations_familiales.catala_fr
|
||||
@ -29,5 +30,6 @@ mv -f allocations_familiales_fixed.catala_fr allocations_familiales.catala_fr
|
||||
scp allocations_familiales.catala_fr $1/playground/
|
||||
rm allocations_familiales.catala_fr
|
||||
scp syntax_highlighting/en/ace/mode-catala_en.js $1/playground/
|
||||
# TODO BW: PL
|
||||
scp syntax_highlighting/fr/ace/mode-catala_fr.js $1/playground/
|
||||
scp french_law_js/french_law.js $1/french_law.js
|
||||
|
@ -98,7 +98,8 @@ let format_lit (fmt : Format.formatter) (l : lit Pos.marked) : unit =
|
||||
| LMoney e -> (
|
||||
match !Utils.Cli.locale_lang with
|
||||
| `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e)
|
||||
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e))
|
||||
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e)
|
||||
| `Pl -> Format.fprintf fmt "%s PLN" (Runtime.money_to_string e))
|
||||
| LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d)
|
||||
| LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d)
|
||||
|
||||
|
@ -35,6 +35,7 @@ let driver (source_file : Pos.input_file) (debug : bool) (dcalc : bool) (unstyle
|
||||
| Some l ->
|
||||
if l = "fr" then `Fr
|
||||
else if l = "en" then `En
|
||||
else if l = "pl" then `Pl
|
||||
else if l = "non-verbose" then `NonVerbose
|
||||
else
|
||||
Errors.raise_error
|
||||
|
@ -38,7 +38,8 @@ let format_lit (fmt : Format.formatter) (l : lit Pos.marked) : unit =
|
||||
| LMoney e -> (
|
||||
match !Utils.Cli.locale_lang with
|
||||
| `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e)
|
||||
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e))
|
||||
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e)
|
||||
| `Pl -> Format.fprintf fmt "%s PLN" (Runtime.money_to_string e))
|
||||
| LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d)
|
||||
| LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d)
|
||||
|
||||
|
@ -71,12 +71,14 @@ let wrap_html (source_files : string list) (language : Cli.backend_lang) (fmt :
|
||||
css_as_string
|
||||
(match language with
|
||||
| `Fr -> "Implémentation de texte législatif"
|
||||
| `En -> "Legislative text implementation")
|
||||
(match language with `Fr -> "Document généré par" | `En -> "Document generated by")
|
||||
| `En -> "Legislative text implementation"
|
||||
| `Pl -> "Implementowanie tekstów legislacyjnych")
|
||||
(match language with `Fr -> "Document généré par" | `En -> "Document generated by" | `Pl -> "Dokument wygenerowany przez")
|
||||
Utils.Cli.version
|
||||
(match language with
|
||||
| `Fr -> "Fichiers sources tissés dans ce document"
|
||||
| `En -> "Source files weaved in this document")
|
||||
| `En -> "Source files weaved in this document"
|
||||
| `Pl -> "Pliki źródłowe w tym dokumencie")
|
||||
(String.concat "\n"
|
||||
(List.map
|
||||
(fun filename ->
|
||||
@ -90,7 +92,8 @@ let wrap_html (source_files : string list) (language : Cli.backend_lang) (fmt :
|
||||
(pre_html (Filename.basename filename))
|
||||
(match language with
|
||||
| `Fr -> "dernière modification le"
|
||||
| `En -> "last modification")
|
||||
| `En -> "last modification"
|
||||
| `Pl -> "ostatnia modyfikacja")
|
||||
ftime)
|
||||
source_files));
|
||||
wrapped fmt
|
||||
@ -104,7 +107,7 @@ let pygmentize_code (c : string Pos.marked) (language : C.backend_lang) : string
|
||||
Printf.fprintf oc "%s" (Pos.unmark c);
|
||||
close_out oc;
|
||||
let pygments = "pygmentize" in
|
||||
let pygments_lexer = match language with `Fr -> "catala_fr" | `En -> "catala_en" in
|
||||
let pygments_lexer = match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl" in
|
||||
let pygments_args =
|
||||
[|
|
||||
"-l";
|
||||
|
@ -80,15 +80,17 @@ let wrap_latex (source_files : string list) (language : C.backend_lang) (fmt : F
|
||||
%s : \n\
|
||||
\\begin{itemize}%s\\end{itemize}\n\n\
|
||||
\\[\\star\\star\\star\\]\\\\\n"
|
||||
(match language with `Fr -> "french" | `En -> "english")
|
||||
(match language with `Fr -> "french" | `En -> "english" | `Pl -> "polish")
|
||||
(match language with
|
||||
| `Fr -> "Implémentation de texte législatif"
|
||||
| `En -> "Legislative text implementation")
|
||||
(match language with `Fr -> "Document généré par" | `En -> "Document generated by")
|
||||
| `En -> "Legislative text implementation"
|
||||
| `Pl -> "Implementacja tekstów legislacyjnych")
|
||||
(match language with `Fr -> "Document généré par" | `En -> "Document generated by" | `Pl -> "Dokument wygenerowany przez")
|
||||
Utils.Cli.version
|
||||
(match language with
|
||||
| `Fr -> "Fichiers sources tissés dans ce document"
|
||||
| `En -> "Source files weaved in this document")
|
||||
| `En -> "Source files weaved in this document"
|
||||
| `Pl -> "Pliki źródłowe w tym dokumencie")
|
||||
(String.concat ","
|
||||
(List.map
|
||||
(fun filename ->
|
||||
@ -102,7 +104,8 @@ let wrap_latex (source_files : string list) (language : C.backend_lang) (fmt : F
|
||||
(pre_latexify (Filename.basename filename))
|
||||
(match language with
|
||||
| `Fr -> "dernière modification le"
|
||||
| `En -> "last modification")
|
||||
| `En -> "last modification"
|
||||
| `Pl -> "ostatnia modyfikacja")
|
||||
ftime)
|
||||
source_files));
|
||||
wrapped fmt;
|
||||
@ -137,7 +140,7 @@ let law_article_item_to_latex (language : C.backend_lang) (fmt : Format.formatte
|
||||
\\end{minted}"
|
||||
(pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
|
||||
(Pos.get_start_line (Pos.get_position c) - 1)
|
||||
(match language with `Fr -> "catala_fr" | `En -> "catala_en")
|
||||
(match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl")
|
||||
(math_syms_replace (Pos.unmark c))
|
||||
|
||||
let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatter)
|
||||
@ -171,7 +174,7 @@ let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatt
|
||||
(law_article_item_to_latex language)
|
||||
fmt children
|
||||
| A.MetadataBlock (_, c) ->
|
||||
let metadata_title = match language with `Fr -> "Métadonnées" | `En -> "Metadata" in
|
||||
let metadata_title = match language with `Fr -> "Métadonnées" | `En -> "Metadata" | `Pl -> "Metadane" in
|
||||
Format.fprintf fmt
|
||||
"\\begin{tcolorbox}[colframe=OliveGreen, breakable, \
|
||||
title=\\textcolor{black}{\\texttt{%s}},title after \
|
||||
@ -183,7 +186,7 @@ let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatt
|
||||
metadata_title metadata_title
|
||||
(Pos.get_start_line (Pos.get_position c) - 1)
|
||||
(pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
|
||||
(match language with `Fr -> "catala_fr" | `En -> "catala_en")
|
||||
(match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl")
|
||||
(math_syms_replace (Pos.unmark c))
|
||||
| A.IntermediateText t -> Format.fprintf fmt "%s" (pre_latexify t)
|
||||
|
||||
|
@ -32,19 +32,37 @@
|
||||
(rule
|
||||
(with-stdout-to
|
||||
parser.messages.new
|
||||
(run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser --list-errors)))
|
||||
(run
|
||||
menhir
|
||||
%{dep:tokens.mly}
|
||||
%{dep:parser.mly}
|
||||
--base
|
||||
parser
|
||||
--list-errors)))
|
||||
|
||||
(rule
|
||||
(with-stdout-to
|
||||
parser_errors.ml
|
||||
(run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser
|
||||
--compile-errors %{dep:parser.messages})))
|
||||
(run
|
||||
menhir
|
||||
%{dep:tokens.mly}
|
||||
%{dep:parser.mly}
|
||||
--base
|
||||
parser
|
||||
--compile-errors
|
||||
%{dep:parser.messages})))
|
||||
|
||||
(rule
|
||||
(with-stdout-to
|
||||
parser.messages.updated
|
||||
(run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser
|
||||
--update-errors %{dep:parser.messages})))
|
||||
(run
|
||||
menhir
|
||||
%{dep:tokens.mly}
|
||||
%{dep:parser.mly}
|
||||
--base
|
||||
parser
|
||||
--update-errors
|
||||
%{dep:parser.messages})))
|
||||
|
||||
(rule
|
||||
(alias update-parser-messages)
|
||||
|
550
src/catala/surface/lexer_pl.ml
Normal file
550
src/catala/surface/lexer_pl.ml
Normal file
@ -0,0 +1,550 @@
|
||||
(* This file is part of the Catala compiler, a specification language for tax and social benefits
|
||||
computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
|
||||
<denis.merigoux@inria.fr>
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
in compliance with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
or implied. See the License for the specific language governing permissions and limitations under
|
||||
the License. *)
|
||||
|
||||
open Tokens
|
||||
open Sedlexing
|
||||
open Utils
|
||||
open Lexer_common
|
||||
module L = Lexer
|
||||
module R = Re.Pcre
|
||||
|
||||
(** Same as {!val: Surface.Lexer.token_list_language_agnostic}, but with tokens specialized to
|
||||
Polish. *)
|
||||
let token_list : (string * token) list =
|
||||
[
|
||||
("zakres", SCOPE);
|
||||
("konsekwencja", CONSEQUENCE);
|
||||
("data", DATA);
|
||||
("zalezy od", DEPENDS);
|
||||
("deklaracja", DECLARATION);
|
||||
("kontekst", CONTEXT);
|
||||
("malejacy", DECREASING);
|
||||
("rosnacy", INCREASING);
|
||||
("z", OF);
|
||||
("kolekcja", COLLECTION);
|
||||
("enumeracja", ENUM);
|
||||
("calkowita", INTEGER);
|
||||
("pieniadze", MONEY);
|
||||
("tekst", TEXT);
|
||||
("dziesietna", DECIMAL);
|
||||
("czas", DATE);
|
||||
("czas trwania", DURATION);
|
||||
("zerojedynkowy", BOOLEAN);
|
||||
("suma", SUM);
|
||||
("spelnione", FILLED);
|
||||
("definicja", DEFINITION);
|
||||
("etykieta", LABEL);
|
||||
("wyjatek", EXCEPTION);
|
||||
("wynosi", DEFINED_AS);
|
||||
("pasuje", MATCH);
|
||||
("ze wzorem", WITH);
|
||||
("pod warunkiem", UNDER_CONDITION);
|
||||
("jezeli", IF);
|
||||
("wtedy", THEN);
|
||||
("inaczej", ELSE);
|
||||
("zawartosc", CONTENT);
|
||||
("struktura", STRUCT);
|
||||
("asercja", ASSERTION);
|
||||
("rozna", VARIES);
|
||||
("with", WITH_V);
|
||||
("dla", FOR);
|
||||
("wszystkie", ALL);
|
||||
("mamy", WE_HAVE);
|
||||
("staloprzecinkowa", FIXED);
|
||||
("przez", BY);
|
||||
("zasada", RULE);
|
||||
("istnieje", EXISTS);
|
||||
("takie ze", SUCH);
|
||||
("to", THAT);
|
||||
("i", AND);
|
||||
("lub", OR);
|
||||
("xor", XOR);
|
||||
("nie", NOT);
|
||||
("maximum", MAXIMUM);
|
||||
("minimum", MINIMUM);
|
||||
("filtr", FILTER);
|
||||
("mapuj", MAP);
|
||||
("poczatkowy", INIT);
|
||||
("liczba", CARDINAL);
|
||||
("rok", YEAR);
|
||||
("miesiac", MONTH);
|
||||
("dzien", DAY);
|
||||
("prawda", TRUE);
|
||||
("falsz", FALSE);
|
||||
]
|
||||
@ L.token_list_language_agnostic
|
||||
|
||||
(** Localised builtin functions *)
|
||||
let builtins : (string * Ast.builtin_expression) list =
|
||||
[
|
||||
("integer_to_decimal", IntToDec);
|
||||
("get_day", GetDay);
|
||||
("get_month", GetMonth);
|
||||
("get_year", GetYear);
|
||||
]
|
||||
|
||||
(** Main lexing function used in code blocks *)
|
||||
let rec lex_code (lexbuf : lexbuf) : token =
|
||||
let prev_lexeme = Utf8.lexeme lexbuf in
|
||||
let prev_pos = lexing_positions lexbuf in
|
||||
match%sedlex lexbuf with
|
||||
| white_space ->
|
||||
(* Whitespaces *)
|
||||
L.update_acc lexbuf;
|
||||
lex_code lexbuf
|
||||
| '#', Star (Compl '\n'), '\n' ->
|
||||
(* Comments *)
|
||||
L.update_acc lexbuf;
|
||||
lex_code lexbuf
|
||||
| "```" ->
|
||||
(* End of code section *)
|
||||
L.is_code := false;
|
||||
END_CODE !L.code_string_acc
|
||||
| "zakres" ->
|
||||
L.update_acc lexbuf;
|
||||
SCOPE
|
||||
| "data" ->
|
||||
L.update_acc lexbuf;
|
||||
DATA
|
||||
| "zalezy od" ->
|
||||
L.update_acc lexbuf;
|
||||
DEPENDS
|
||||
| "deklaracja" ->
|
||||
L.update_acc lexbuf;
|
||||
DECLARATION
|
||||
| "kontekst" ->
|
||||
L.update_acc lexbuf;
|
||||
CONTEXT
|
||||
| "malejacy" ->
|
||||
L.update_acc lexbuf;
|
||||
DECREASING
|
||||
| "rosnacy" ->
|
||||
L.update_acc lexbuf;
|
||||
INCREASING
|
||||
| "z" ->
|
||||
L.update_acc lexbuf;
|
||||
OF
|
||||
| "kolekcja" ->
|
||||
L.update_acc lexbuf;
|
||||
COLLECTION
|
||||
| "enumeracja" ->
|
||||
L.update_acc lexbuf;
|
||||
ENUM
|
||||
| "calkowita" ->
|
||||
L.update_acc lexbuf;
|
||||
INTEGER
|
||||
| "pieniadze" ->
|
||||
L.update_acc lexbuf;
|
||||
MONEY
|
||||
| "tekst" ->
|
||||
L.update_acc lexbuf;
|
||||
TEXT
|
||||
| "dziesietna" ->
|
||||
L.update_acc lexbuf;
|
||||
DECIMAL
|
||||
| "czas" ->
|
||||
L.update_acc lexbuf;
|
||||
DATE
|
||||
| "czas trwania" ->
|
||||
L.update_acc lexbuf;
|
||||
DURATION
|
||||
| "zerojedynkowy" ->
|
||||
L.update_acc lexbuf;
|
||||
BOOLEAN
|
||||
| "suma" ->
|
||||
L.update_acc lexbuf;
|
||||
SUM
|
||||
| "spelnione" ->
|
||||
L.update_acc lexbuf;
|
||||
FILLED
|
||||
| "definicja" ->
|
||||
L.update_acc lexbuf;
|
||||
DEFINITION
|
||||
| "etykieta" ->
|
||||
L.update_acc lexbuf;
|
||||
LABEL
|
||||
| "wyjatek" ->
|
||||
L.update_acc lexbuf;
|
||||
EXCEPTION
|
||||
| "wynosi" ->
|
||||
L.update_acc lexbuf;
|
||||
DEFINED_AS
|
||||
| "pasuje" ->
|
||||
L.update_acc lexbuf;
|
||||
MATCH
|
||||
| "ze wzorem" ->
|
||||
L.update_acc lexbuf;
|
||||
WITH
|
||||
| "pod warunkiem" ->
|
||||
L.update_acc lexbuf;
|
||||
UNDER_CONDITION
|
||||
| "jezeli" ->
|
||||
L.update_acc lexbuf;
|
||||
IF
|
||||
| "konsekwencja" ->
|
||||
L.update_acc lexbuf;
|
||||
CONSEQUENCE
|
||||
| "wtedy" ->
|
||||
L.update_acc lexbuf;
|
||||
THEN
|
||||
| "inaczej" ->
|
||||
L.update_acc lexbuf;
|
||||
ELSE
|
||||
| "condition" ->
|
||||
L.update_acc lexbuf;
|
||||
CONDITION
|
||||
| "zawartosc" ->
|
||||
L.update_acc lexbuf;
|
||||
CONTENT
|
||||
| "struktura" ->
|
||||
L.update_acc lexbuf;
|
||||
STRUCT
|
||||
| "asercja" ->
|
||||
L.update_acc lexbuf;
|
||||
ASSERTION
|
||||
| "rozna" ->
|
||||
L.update_acc lexbuf;
|
||||
VARIES
|
||||
| "wraz z" ->
|
||||
L.update_acc lexbuf;
|
||||
WITH_V
|
||||
| "dla" ->
|
||||
L.update_acc lexbuf;
|
||||
FOR
|
||||
| "wszystkie" ->
|
||||
L.update_acc lexbuf;
|
||||
ALL
|
||||
| "mamy" ->
|
||||
L.update_acc lexbuf;
|
||||
WE_HAVE
|
||||
| "staloprzecinkowa" ->
|
||||
L.update_acc lexbuf;
|
||||
FIXED
|
||||
| "przez" ->
|
||||
L.update_acc lexbuf;
|
||||
BY
|
||||
| "zasada" ->
|
||||
(* 0xE8 is è *)
|
||||
L.update_acc lexbuf;
|
||||
RULE
|
||||
| "istnieje" ->
|
||||
L.update_acc lexbuf;
|
||||
EXISTS
|
||||
| "in" ->
|
||||
L.update_acc lexbuf;
|
||||
IN
|
||||
| "takie ze" ->
|
||||
L.update_acc lexbuf;
|
||||
SUCH
|
||||
| "to" ->
|
||||
L.update_acc lexbuf;
|
||||
THAT
|
||||
| "i" ->
|
||||
L.update_acc lexbuf;
|
||||
AND
|
||||
| "lub" ->
|
||||
L.update_acc lexbuf;
|
||||
OR
|
||||
| "xor" ->
|
||||
L.update_acc lexbuf;
|
||||
XOR
|
||||
| "nie" ->
|
||||
L.update_acc lexbuf;
|
||||
NOT
|
||||
| "maximum" ->
|
||||
L.update_acc lexbuf;
|
||||
MAXIMUM
|
||||
| "minimum" ->
|
||||
L.update_acc lexbuf;
|
||||
MINIMUM
|
||||
| "filtr" ->
|
||||
L.update_acc lexbuf;
|
||||
FILTER
|
||||
| "mapuj" ->
|
||||
L.update_acc lexbuf;
|
||||
MAP
|
||||
| "poczatkowy" ->
|
||||
L.update_acc lexbuf;
|
||||
INIT
|
||||
| "liczba" ->
|
||||
L.update_acc lexbuf;
|
||||
CARDINAL
|
||||
| "prawda" ->
|
||||
L.update_acc lexbuf;
|
||||
TRUE
|
||||
| "falsz" ->
|
||||
L.update_acc lexbuf;
|
||||
FALSE
|
||||
| "rok" ->
|
||||
L.update_acc lexbuf;
|
||||
YEAR
|
||||
| "miesiac" ->
|
||||
L.update_acc lexbuf;
|
||||
MONTH
|
||||
| "dzien" ->
|
||||
L.update_acc lexbuf;
|
||||
DAY
|
||||
| 0x24, Star white_space, '0' .. '9', Star ('0' .. '9' | ','), Opt ('.', Rep ('0' .. '9', 0 .. 2))
|
||||
->
|
||||
let extract_parts = R.regexp "([0-9]([0-9,]*[0-9]|))(.([0-9]{0,2})|)" in
|
||||
let full_str = Utf8.lexeme lexbuf in
|
||||
let only_numbers_str = String.trim (String.sub full_str 1 (String.length full_str - 1)) in
|
||||
let parts = R.get_substring (R.exec ~rex:extract_parts only_numbers_str) in
|
||||
(* Integer literal*)
|
||||
let units = parts 1 in
|
||||
let remove_commas = R.regexp "," in
|
||||
let units =
|
||||
Runtime.integer_of_string (R.substitute ~rex:remove_commas ~subst:(fun _ -> "") units)
|
||||
in
|
||||
let cents =
|
||||
try Runtime.integer_of_string (parts 4) with Not_found -> Runtime.integer_of_int 0
|
||||
in
|
||||
L.update_acc lexbuf;
|
||||
MONEY_AMOUNT (units, cents)
|
||||
| Plus '0' .. '9', '.', Star '0' .. '9' ->
|
||||
let extract_code_title = R.regexp "([0-9]+)\\.([0-9]*)" in
|
||||
let dec_parts = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
|
||||
(* Integer literal*)
|
||||
L.update_acc lexbuf;
|
||||
DECIMAL_LITERAL
|
||||
(Runtime.integer_of_string (dec_parts 1), Runtime.integer_of_string (dec_parts 2))
|
||||
| "<=@" ->
|
||||
L.update_acc lexbuf;
|
||||
LESSER_EQUAL_DATE
|
||||
| "<@" ->
|
||||
L.update_acc lexbuf;
|
||||
LESSER_DATE
|
||||
| ">=@" ->
|
||||
L.update_acc lexbuf;
|
||||
GREATER_EQUAL_DATE
|
||||
| ">@" ->
|
||||
L.update_acc lexbuf;
|
||||
GREATER_DATE
|
||||
| "-@" ->
|
||||
L.update_acc lexbuf;
|
||||
MINUSDATE
|
||||
| "+@" ->
|
||||
L.update_acc lexbuf;
|
||||
PLUSDATE
|
||||
| "<=^" ->
|
||||
L.update_acc lexbuf;
|
||||
LESSER_EQUAL_DURATION
|
||||
| "<^" ->
|
||||
L.update_acc lexbuf;
|
||||
LESSER_DURATION
|
||||
| ">=^" ->
|
||||
L.update_acc lexbuf;
|
||||
GREATER_EQUAL_DURATION
|
||||
| ">^" ->
|
||||
L.update_acc lexbuf;
|
||||
GREATER_DURATION
|
||||
| "+^" ->
|
||||
L.update_acc lexbuf;
|
||||
PLUSDURATION
|
||||
| "-^" ->
|
||||
L.update_acc lexbuf;
|
||||
MINUSDURATION
|
||||
| "<=", 0x24 ->
|
||||
L.update_acc lexbuf;
|
||||
LESSER_EQUAL_MONEY
|
||||
| '<', 0x24 ->
|
||||
L.update_acc lexbuf;
|
||||
LESSER_MONEY
|
||||
| ">=", 0x24 ->
|
||||
L.update_acc lexbuf;
|
||||
GREATER_EQUAL_MONEY
|
||||
| '>', 0x24 ->
|
||||
L.update_acc lexbuf;
|
||||
GREATER_MONEY
|
||||
| '+', 0x24 ->
|
||||
L.update_acc lexbuf;
|
||||
PLUSMONEY
|
||||
| '-', 0x24 ->
|
||||
L.update_acc lexbuf;
|
||||
MINUSMONEY
|
||||
| '*', 0x24 ->
|
||||
L.update_acc lexbuf;
|
||||
MULTMONEY
|
||||
| '/', 0x24 ->
|
||||
L.update_acc lexbuf;
|
||||
DIVMONEY
|
||||
| "<=." ->
|
||||
L.update_acc lexbuf;
|
||||
LESSER_EQUAL_DEC
|
||||
| "<." ->
|
||||
L.update_acc lexbuf;
|
||||
LESSER_DEC
|
||||
| ">=." ->
|
||||
L.update_acc lexbuf;
|
||||
GREATER_EQUAL_DEC
|
||||
| ">." ->
|
||||
L.update_acc lexbuf;
|
||||
GREATER_DEC
|
||||
| "+." ->
|
||||
L.update_acc lexbuf;
|
||||
PLUSDEC
|
||||
| "-." ->
|
||||
L.update_acc lexbuf;
|
||||
MINUSDEC
|
||||
| "*." ->
|
||||
L.update_acc lexbuf;
|
||||
MULTDEC
|
||||
| "/." ->
|
||||
L.update_acc lexbuf;
|
||||
DIVDEC
|
||||
| "<=" ->
|
||||
L.update_acc lexbuf;
|
||||
LESSER_EQUAL
|
||||
| '<' ->
|
||||
L.update_acc lexbuf;
|
||||
LESSER
|
||||
| ">=" ->
|
||||
L.update_acc lexbuf;
|
||||
GREATER_EQUAL
|
||||
| '>' ->
|
||||
L.update_acc lexbuf;
|
||||
GREATER
|
||||
| '+' ->
|
||||
L.update_acc lexbuf;
|
||||
PLUS
|
||||
| '-' ->
|
||||
L.update_acc lexbuf;
|
||||
MINUS
|
||||
| '*' ->
|
||||
L.update_acc lexbuf;
|
||||
MULT
|
||||
| '/' ->
|
||||
L.update_acc lexbuf;
|
||||
DIV
|
||||
| "!=" ->
|
||||
L.update_acc lexbuf;
|
||||
NOT_EQUAL
|
||||
| '=' ->
|
||||
L.update_acc lexbuf;
|
||||
EQUAL
|
||||
| '%' ->
|
||||
L.update_acc lexbuf;
|
||||
PERCENT
|
||||
| '(' ->
|
||||
L.update_acc lexbuf;
|
||||
LPAREN
|
||||
| ')' ->
|
||||
L.update_acc lexbuf;
|
||||
RPAREN
|
||||
| '{' ->
|
||||
L.update_acc lexbuf;
|
||||
LBRACKET
|
||||
| '}' ->
|
||||
L.update_acc lexbuf;
|
||||
RBRACKET
|
||||
| '[' ->
|
||||
L.update_acc lexbuf;
|
||||
LSQUARE
|
||||
| ']' ->
|
||||
L.update_acc lexbuf;
|
||||
RSQUARE
|
||||
| '|' ->
|
||||
L.update_acc lexbuf;
|
||||
VERTICAL
|
||||
| ':' ->
|
||||
L.update_acc lexbuf;
|
||||
COLON
|
||||
| ';' ->
|
||||
L.update_acc lexbuf;
|
||||
SEMICOLON
|
||||
| "--" ->
|
||||
L.update_acc lexbuf;
|
||||
ALT
|
||||
| '.' ->
|
||||
L.update_acc lexbuf;
|
||||
DOT
|
||||
| uppercase, Star (uppercase | lowercase | '0' .. '9' | '_' | '\'') ->
|
||||
(* Name of constructor *)
|
||||
L.update_acc lexbuf;
|
||||
CONSTRUCTOR (Utf8.lexeme lexbuf)
|
||||
| lowercase, Star (lowercase | uppercase | '0' .. '9' | '_' | '\'') ->
|
||||
(* Name of variable *)
|
||||
L.update_acc lexbuf;
|
||||
IDENT (Utf8.lexeme lexbuf)
|
||||
| Plus '0' .. '9' ->
|
||||
(* Integer literal*)
|
||||
L.update_acc lexbuf;
|
||||
INT_LITERAL (Runtime.integer_of_string (Utf8.lexeme lexbuf))
|
||||
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
|
||||
|
||||
(** Main lexing function used outside code blocks *)
|
||||
let lex_law (lexbuf : lexbuf) : token =
|
||||
let prev_lexeme = Utf8.lexeme lexbuf in
|
||||
let prev_pos = lexing_positions lexbuf in
|
||||
match%sedlex lexbuf with
|
||||
| "```catala" ->
|
||||
L.is_code := true;
|
||||
L.code_string_acc := "";
|
||||
|
||||
BEGIN_CODE
|
||||
| eof -> EOF
|
||||
| '#', Star white_space, "Master file" -> MASTER_FILE
|
||||
| '>', Star white_space, "Begin metadata" -> BEGIN_METADATA
|
||||
| '>', Star white_space, "End metadata" -> END_METADATA
|
||||
| ( '>',
|
||||
Star white_space,
|
||||
"Include:",
|
||||
Star white_space,
|
||||
Plus (Compl ('@' | '\n')),
|
||||
Star white_space,
|
||||
Opt ('@', Star white_space, "p.", Star white_space, Plus '0' .. '9', Star white_space),
|
||||
'\n' ) ->
|
||||
let extract_components =
|
||||
R.regexp ">\\s*Include\\:\\s*([^@\\n]+)\\s*(@\\s*p\\.\\s*([0-9]+)|)"
|
||||
in
|
||||
let get_component = R.get_substring (R.exec ~rex:extract_components (Utf8.lexeme lexbuf)) in
|
||||
let name = get_component 1 in
|
||||
let pages = try Some (int_of_string (get_component 3)) with Not_found -> None in
|
||||
let pos = lexing_positions lexbuf in
|
||||
if Filename.extension name = ".pdf" then
|
||||
LAW_INCLUDE (Ast.PdfFile ((name, Pos.from_lpos pos), pages))
|
||||
else LAW_INCLUDE (Ast.CatalaFile (name, Pos.from_lpos pos))
|
||||
| '#', Plus '#', Star white_space, Plus (Compl ('[' | ']' | '\n')), Star white_space, '\n' ->
|
||||
get_law_heading lexbuf
|
||||
| ( '#',
|
||||
Plus '#',
|
||||
Star white_space,
|
||||
'[',
|
||||
Star white_space,
|
||||
Plus (Compl ']'),
|
||||
Star white_space,
|
||||
']',
|
||||
'\n' ) ->
|
||||
let extract_article_title = R.regexp "([#]+)\\s*\\[([^\\]]+)\\]" in
|
||||
let get_substring =
|
||||
R.get_substring (R.exec ~rex:extract_article_title (Utf8.lexeme lexbuf))
|
||||
in
|
||||
let title = get_substring 2 in
|
||||
let get_new_lines = R.regexp "\n" in
|
||||
let new_lines_count =
|
||||
try Array.length (R.extract ~rex:get_new_lines (Utf8.lexeme lexbuf)) with Not_found -> 0
|
||||
in
|
||||
(* the -1 is here to compensate for Sedlex's automatic newline detection around token *)
|
||||
for _i = 1 to new_lines_count - 1 do
|
||||
new_line lexbuf
|
||||
done;
|
||||
let precedence = calc_precedence (get_substring 1) in
|
||||
|
||||
LAW_ARTICLE (title, None, None, precedence)
|
||||
| Plus (Compl ('/' | '#' | '`' | '>')) -> LAW_TEXT (Utf8.lexeme lexbuf)
|
||||
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
|
||||
|
||||
(** Entry point of the lexer, distributes to {!val: lex_code} or {!val: lex_law} depending of {!val:
|
||||
Surface.Lexer.is_code}. *)
|
||||
let lexer (lexbuf : lexbuf) : token = if !L.is_code then lex_code lexbuf else lex_law lexbuf
|
15
src/catala/surface/lexer_pl.mli
Normal file
15
src/catala/surface/lexer_pl.mli
Normal file
@ -0,0 +1,15 @@
|
||||
(* This file is part of the Catala compiler, a specification language for tax and social benefits
|
||||
computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
|
||||
<denis.merigoux@inria.fr>
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
||||
in compliance with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
or implied. See the License for the specific language governing permissions and limitations under
|
||||
the License. *)
|
||||
|
||||
include Lexer.LocalisedLexer
|
@ -226,11 +226,13 @@ end
|
||||
module Parser_NonVerbose = ParserAux (Lexer)
|
||||
module Parser_En = ParserAux (Lexer_en)
|
||||
module Parser_Fr = ParserAux (Lexer_fr)
|
||||
module Parser_Pl = ParserAux (Lexer_pl)
|
||||
|
||||
let localised_parser : Cli.frontend_lang -> lexbuf -> Ast.source_file_or_master = function
|
||||
| `NonVerbose -> Parser_NonVerbose.commands_or_includes
|
||||
| `En -> Parser_En.commands_or_includes
|
||||
| `Fr -> Parser_Fr.commands_or_includes
|
||||
| `Pl -> Parser_Pl.commands_or_includes
|
||||
|
||||
(** {1 Parsing multiple files} *)
|
||||
|
||||
|
@ -12,12 +12,12 @@
|
||||
or implied. See the License for the specific language governing permissions and limitations under
|
||||
the License. *)
|
||||
|
||||
type frontend_lang = [ `Fr | `En | `NonVerbose ]
|
||||
type frontend_lang = [ `Fr | `En | `NonVerbose | `Pl]
|
||||
|
||||
type backend_lang = [ `Fr | `En ]
|
||||
type backend_lang = [ `Fr | `En | `Pl ]
|
||||
|
||||
let to_backend_lang (lang : frontend_lang) : backend_lang =
|
||||
match lang with `En | `NonVerbose -> `En | `Fr -> `Fr
|
||||
match lang with `En | `NonVerbose -> `En | `Fr -> `Fr | `Pl -> `Pl
|
||||
|
||||
(** Source files to be compiled *)
|
||||
let source_files : string list ref = ref []
|
||||
@ -57,7 +57,8 @@ let unstyled = Arg.(value & flag & info [ "unstyled" ] ~doc:"Removes styling fro
|
||||
let optimize = Arg.(value & flag & info [ "optimize"; "O" ] ~doc:"Run compiler optimizations")
|
||||
|
||||
let trace_opt =
|
||||
Arg.(value & flag & info [ "trace"; "t" ] ~doc:"Displays a trace of the interpreter's computation")
|
||||
Arg.(
|
||||
value & flag & info [ "trace"; "t" ] ~doc:"Displays a trace of the interpreter's computation")
|
||||
|
||||
let wrap_weaved_output =
|
||||
Arg.(
|
||||
|
@ -12,9 +12,9 @@
|
||||
or implied. See the License for the specific language governing permissions and limitations under
|
||||
the License. *)
|
||||
|
||||
type frontend_lang = [ `En | `Fr | `NonVerbose ]
|
||||
type frontend_lang = [ `En | `Fr | `NonVerbose | `Pl ]
|
||||
|
||||
type backend_lang = [ `En | `Fr ]
|
||||
type backend_lang = [ `En | `Fr | `Pl ]
|
||||
|
||||
val to_backend_lang : frontend_lang -> backend_lang
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user