Merge pull request #117 from wozniakpl/polish

Polish language support
This commit is contained in:
Denis Merigoux 2021-05-19 16:52:16 +02:00 committed by GitHub
commit a6ba0290ed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
39 changed files with 1399 additions and 95 deletions

View File

@ -124,7 +124,7 @@ general-to-specifics statutes order. Therefore, there exists multiple versions
of the Catala surface syntax, adapted to the language of the legislative text.
Currently, Catala supports English and French legislative text via the
`--language=en` or `--language=fr` option.
`--language=en`, `--language=fr` or `--language=pl` option.
Technically, support for new languages can be added via a new lexer. If you want
to add a new language, you can start from

View File

@ -119,7 +119,8 @@ augmented with the Catala plugin, simply enter
sudo make pygments
This will execute the
script `syntax_highlighting/fr/pygments/set_up_pygments.sh` and
script `syntax_highlighting/fr/pygments/set_up_pygments.sh`,
`syntax_highlighting/pl/pygments/set_up_pygments.sh` and
`syntax_highlighting/en/pygments/set_up_pygments.sh`.
The scripts patch your `pigmentize` executable, used for instance by the `minted` LaTeX package.

View File

@ -58,6 +58,7 @@ install:
SYNTAX_HIGHLIGHTING_FR=${CURDIR}/syntax_highlighting/fr
SYNTAX_HIGHLIGHTING_EN=${CURDIR}/syntax_highlighting/en
SYNTAX_HIGHLIGHTING_PL=${CURDIR}/syntax_highlighting/pl
pygmentize_fr: $(SYNTAX_HIGHLIGHTING_FR)/set_up_pygments.sh
chmod +x $<
@ -67,8 +68,12 @@ pygmentize_en: $(SYNTAX_HIGHLIGHTING_EN)/set_up_pygments.sh
chmod +x $<
sudo $<
pygmentize_pl: $(SYNTAX_HIGHLIGHTING_PL)/set_up_pygments.sh
chmod +x $<
sudo $<
#> pygments : Extends your pygmentize executable with Catala lexers
pygments: pygmentize_fr pygmentize_en
pygments: pygmentize_fr pygmentize_en pygmentize_pl
atom_fr: ${CURDIR}/syntax_highlighting/fr/setup_atom.sh
chmod +x $<
@ -78,6 +83,10 @@ atom_en: ${CURDIR}/syntax_highlighting/en/setup_atom.sh
chmod +x $<
$<
atom_pl: ${CURDIR}/syntax_highlighting/pl/setup_atom.sh
chmod +x $<
$<
atom_nv: ${CURDIR}/syntax_highlighting/nv/setup_atom.sh
chmod +x $<
$<
@ -93,6 +102,11 @@ vscode_en: ${CURDIR}/syntax_highlighting/en/setup_vscode.sh
chmod +x $<
$<
# TODO
# vscode_pl: ${CURDIR}/syntax_highlighting/pl/setup_vscode.sh
# chmod +x $<
# $<
vscode_nv: ${CURDIR}/syntax_highlighting/nv/setup_vscode.sh
chmod +x $<
$<
@ -110,6 +124,7 @@ CODE_GENERAL_IMPOTS_DIR=$(EXAMPLES_DIR)/code_general_impots
US_TAX_CODE_DIR=$(EXAMPLES_DIR)/us_tax_code
TUTORIAL_EN_DIR=$(EXAMPLES_DIR)/tutorial_en
TUTORIEL_FR_DIR=$(EXAMPLES_DIR)/tutoriel_fr
POLISH_TAXES_DIR=$(EXAMPLES_DIR)/polish_taxes
literate_allocations_familiales: build
@ -132,9 +147,13 @@ literate_tutoriel_fr: build
$(MAKE) -C $(TUTORIEL_FR_DIR) tutoriel_fr.tex
$(MAKE) -C $(TUTORIEL_FR_DIR) tutoriel_fr.html
literate_polish_taxes: build
$(MAKE) -C $(POLISH_TAXES_DIR) polish_taxes.tex
$(MAKE) -C $(POLISH_TAXES_DIR) polish_taxes.html
#> literate_examples : Builds the .tex and .html versions of the examples code. Needs pygments to be installed and patched with Catala.
literate_examples: literate_allocations_familiales literate_code_general_impots \
literate_us_tax_code literate_tutorial_en literate_tutoriel_fr
literate_us_tax_code literate_tutorial_en literate_tutoriel_fr literate_polish_taxes
##########################################
# Execute test suite
@ -219,10 +238,11 @@ clean:
$(MAKE) -C $(US_TAX_CODE_DIR) clean
$(MAKE) -C $(TUTORIEL_FR_DIR) clean
$(MAKE) -C $(TUTORIAL_EN_DIR) clean
$(MAKE) -C $(POLISH_TAXES_DIR) clean
$(MAKE) -C $(CODE_GENERAL_IMPOTS_DIR) clean
inspect:
gitinspector -f ml,mli,mly,iro,tex,catala,catala_en,catala_fr,md,fst,mld --grading
gitinspector -f ml,mli,mly,iro,tex,catala,catala_en,catala_pl,catala_fr,md,fst,mld --grading
##########################################
# Special targets

View File

@ -44,6 +44,13 @@ TEST_FILES_SCOPES_FR=$(foreach TEST_FILE,$(TEST_FILES),\
) \
)
tests: $(TEST_FILES_SCOPES_EN) $(TEST_FILES_SCOPES_FR)
TEST_FILES_SCOPES_PL=$(foreach TEST_FILE,$(TEST_FILES),\
$(foreach TEST_SCOPE,\
$(shell grep -Po "deklaracja zakres [^:]*" $(TEST_FILE) | cut -d " " -f 3), \
$(word 1,$(subst /, ,$(TEST_FILE))).$(word 1,$(subst ., ,$(word 3,$(subst /, ,$(TEST_FILE))))).$(TEST_SCOPE).run \
) \
)
tests: $(TEST_FILES_SCOPES_EN) $(TEST_FILES_SCOPES_FR) $(TEST_FILES_SCOPES_PL)
.FORCE:

View File

@ -99,7 +99,7 @@ To build and run the example, create a `Makefile` in `foo/`
with the following contents:
```Makefile
CATALA_LANG=en # or fr if your source code is in French
CATALA_LANG=en # or fr/pl if your source code is in French/Polish
SRC=foo.catala
include ../Makefile.common.mk

14
examples/polish_taxes/.gitignore vendored Normal file
View File

@ -0,0 +1,14 @@
*.aux
*.dvi
*.fdb_latexmk
*.fls
*.log
*.out
*.fls
*.tex
*.pdf
_minted*
*.toc
*.pyg
*.d
*.ml

View File

@ -0,0 +1,4 @@
CATALA_LANG=pl
SRC=polish_tax_code.catala_pl
include ../Makefile.common.mk

View File

@ -0,0 +1,144 @@
### Rozdział 3
#### Art. 7.
1. Stawki podatku wynoszą:
1) od umowy sprzedaży:
a) nieruchomości, rzeczy ruchomych, prawa użytkowania wieczystego, własnościowego spółdzielczego prawa do lokalu mieszkalnego, spółdzielczego prawa do lokalu użytkowego oraz wynikających z przepisów prawa spółdzielczego: prawa do domu jednorodzinnego oraz prawa do lokalu w małym domu mieszkalnym 2%,
```catala
deklaracja zakres UmowaSprzedazy:
kontekst kwota typu pieniądze
kontekst inne_prawa_majatkowe warunek
kontekst stawka_podatku typu dziesiętny
kontekst podatek typu pieniądze
zakres UmowaSprzedazy:
definicja podatek wynosi
kwota *$ stawka_podatku
zakres UmowaSprzedazy:
etykieta a7_u1_p1_ppa
definicja stawka_podatku wynosi 2%
```
b) innych praw majątkowych 1%;
```catala
zakres UmowaSprzedazy:
wyjątek a7_u1_p1_ppa
definicja stawka_podatku pod warunkiem
inne_prawa_majatkowe
konsekwencja wynosi 1%
```
2) od umów zamiany, dożywocia, o dział spadku, o zniesienie współwłasności oraz darowizny:
a) przy przeniesieniu własności nieruchomości, rzeczy ruchomych, prawa użytkowania wieczystego, własnościowego spółdzielczego prawa do lokalu
mieszkalnego, spółdzielczego prawa do lokalu użytkowego oraz wynikających z przepisów prawa spółdzielczego: prawa do domu jednorodzinnego oraz prawa do lokalu w małym domu mieszkalnym 2%,
```catala
deklaracja zakres UmowaZamianyLubDarowizny:
kontekst kwota typu pieniądze
kontekst inne_prawa_majatkowe warunek
kontekst stawka_podatku typu dziesiętny
kontekst podatek typu pieniądze
zakres UmowaZamianyLubDarowizny:
definicja podatek wynosi
kwota *$ stawka_podatku
zakres UmowaZamianyLubDarowizny:
etykieta a7_u1_p2_ppa
definicja stawka_podatku wynosi 2%
```
b) przy przeniesieniu własności innych praw majątkowych 1%;
```catala
zakres UmowaZamianyLubDarowizny:
wyjątek a7_u1_p2_ppa
definicja stawka_podatku pod warunkiem
inne_prawa_majatkowe
konsekwencja wynosi 1%
```
3) od umowy ustanowienia odpłatnego użytkowania, w tym nieprawidłowego, oraz odpłatnej służebności 1%, z zastrzeżeniem ust. 5;
```catala
deklaracja zakres OdplatneUzytkowanie:
kontekst kwota typu pieniądze
kontekst stawka_podatku typu dziesiętny
kontekst podatek typu pieniądze
zakres OdplatneUzytkowanie:
definicja podatek wynosi
kwota *$ stawka_podatku
zakres OdplatneUzytkowanie:
definicja stawka_podatku wynosi 1%
```
4) od umowy pożyczki oraz depozytu nieprawidłowego 0,5%, z zastrzeżeniem ust. 5;
```catala
deklaracja zakres PozyczkaLubDepozytNieprawidlowy:
kontekst kwota typu pieniądze
kontekst stawka_podatku typu dziesiętny
kontekst podatek typu pieniądze
zakres PozyczkaLubDepozytNieprawidlowy:
definicja podatek wynosi
kwota *$ stawka_podatku # TODO: extract somehow? this exists in every declaration
zakres PozyczkaLubDepozytNieprawidlowy:
definicja stawka_podatku wynosi 0.5%
```
7) od ustanowienia hipoteki:
a) na zabezpieczenie wierzytelności istniejących od kwoty zabezpieczonej wierzytelności 0,1%,
```catala
deklaracja zakres Hipoteka:
kontekst kwota typu pieniądze
kontekst wysokosc_nieustalona warunek
kontekst stawka_podatku typu dziesiętny
kontekst podatek typu pieniądze
zakres Hipoteka:
etykieta a7_u1_p7_ppa
definicja podatek wynosi
kwota *$ stawka_podatku
zakres Hipoteka:
definicja stawka_podatku wynosi 0.1%
```
b) na zabezpieczenie wierzytelności o wysokości nieustalonej 19 zł;
```catala
zakres Hipoteka:
wyjątek a7_u1_p7_ppa
definicja podatek pod warunkiem
wysokosc_nieustalona
konsekwencja wynosi 19 PLN
```
9) od umowy spółki 0,5%.
```catala
deklaracja zakres UmowaSpolki:
kontekst kwota typu pieniądze
kontekst stawka_podatku typu dziesiętny
kontekst podatek typu pieniądze
zakres UmowaSpolki:
definicja podatek wynosi
kwota *$ stawka_podatku
zakres UmowaSpolki:
definicja stawka_podatku wynosi 0.5%
```

View File

@ -0,0 +1 @@
## Ustawa z dnia 9 września 2000 r. o podatku od czynności cywilnoprawnych

View File

@ -0,0 +1,3 @@
> Include: podatek_od_czynnosci_cywilnoprawnych/wstep.catala_pl
> Include: podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_pl

View File

@ -0,0 +1,23 @@
> Include: ../podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_pl
## [Test - Art. 7 ustęp 1 punkt 1]
```catala
deklaracja zakres Test_A7_U1_P1_PPa:
kontekst sprzedaz zakres UmowaSprzedazy
zakres Test_A7_U1_P1_PPa:
definicja sprzedaz.kwota wynosi 100 PLN
asercja sprzedaz.podatek = 2 PLN
deklaracja zakres Test_A7_U1_P1_PPb:
kontekst sprzedaz zakres UmowaSprzedazy
zakres Test_A7_U1_P1_PPb:
definicja sprzedaz.kwota wynosi 100 PLN
definicja sprzedaz.inne_prawa_majatkowe wynosi prawda
asercja sprzedaz.podatek = 1 PLN
```

View File

@ -0,0 +1,23 @@
> Include: ../podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_pl
## [Test - Art. 7 ustęp 1 punkt 2]
```catala
deklaracja zakres Test_A7_U1_P2_PPa:
kontekst sprzedaz zakres UmowaZamianyLubDarowizny
zakres Test_A7_U1_P2_PPa:
definicja sprzedaz.kwota wynosi 100 PLN
asercja sprzedaz.podatek = 2 PLN
deklaracja zakres Test_A7_U1_P2_PPb:
kontekst sprzedaz zakres UmowaZamianyLubDarowizny
zakres Test_A7_U1_P2_PPb:
definicja sprzedaz.kwota wynosi 100 PLN
definicja sprzedaz.inne_prawa_majatkowe wynosi prawda
asercja sprzedaz.podatek = 1 PLN
```

View File

@ -0,0 +1,13 @@
> Include: ../podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_pl
## [Test - Art. 7 ustęp 1 punkt 3]
```catala
deklaracja zakres Test_A7_U1_P3:
kontekst sprzedaz zakres OdplatneUzytkowanie
zakres Test_A7_U1_P3:
definicja sprzedaz.kwota wynosi 100 PLN
asercja sprzedaz.podatek = 1 PLN
```

View File

@ -0,0 +1,13 @@
> Include: ../podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_pl
## [Test - Art. 7 ustęp 1 punkt 4]
```catala
deklaracja zakres Test_A7_U1_P4:
kontekst sprzedaz zakres PozyczkaLubDepozytNieprawidlowy
zakres Test_A7_U1_P4:
definicja sprzedaz.kwota wynosi 200 PLN
asercja sprzedaz.podatek = 1 PLN
```

View File

@ -0,0 +1,13 @@
> Include: ../podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_pl
## [Test - Art. 7 ustęp 1 punkt 7]
```catala
deklaracja zakres Test_A7_U1_P7:
kontekst sprzedaz zakres PozyczkaLubDepozytNieprawidlowy
zakres Test_A7_U1_P7:
definicja sprzedaz.kwota wynosi 200 PLN
asercja sprzedaz.podatek = 1 PLN
```

View File

@ -0,0 +1,13 @@
> Include: ../podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_pl
## [Test - Art. 7 ustęp 1 punkt 9]
```catala
deklaracja zakres Test_A7_U1_P9:
kontekst sprzedaz zakres UmowaSpolki
zakres Test_A7_U1_P9:
definicja sprzedaz.kwota wynosi 1000 PLN
asercja sprzedaz.podatek = 5 PLN
```

View File

@ -30,4 +30,5 @@ scp allocations_familiales.catala_fr $1/playground/
rm allocations_familiales.catala_fr
scp syntax_highlighting/en/ace/mode-catala_en.js $1/playground/
scp syntax_highlighting/fr/ace/mode-catala_fr.js $1/playground/
# TODO: PL
scp french_law_js/french_law.js $1/french_law.js

View File

@ -98,7 +98,8 @@ let format_lit (fmt : Format.formatter) (l : lit Pos.marked) : unit =
| LMoney e -> (
match !Utils.Cli.locale_lang with
| `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e)
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e))
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e)
| `Pl -> Format.fprintf fmt "%s PLN" (Runtime.money_to_string e))
| LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d)
| LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d)

View File

@ -35,6 +35,7 @@ let driver (source_file : Pos.input_file) (debug : bool) (dcalc : bool) (unstyle
| Some l ->
if l = "fr" then `Fr
else if l = "en" then `En
else if l = "pl" then `Pl
else if l = "non-verbose" then `NonVerbose
else
Errors.raise_error

View File

@ -38,7 +38,8 @@ let format_lit (fmt : Format.formatter) (l : lit Pos.marked) : unit =
| LMoney e -> (
match !Utils.Cli.locale_lang with
| `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e)
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e))
| `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e)
| `Pl -> Format.fprintf fmt "%s PLN" (Runtime.money_to_string e))
| LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d)
| LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d)

View File

@ -71,12 +71,17 @@ let wrap_html (source_files : string list) (language : Cli.backend_lang) (fmt :
css_as_string
(match language with
| `Fr -> "Implémentation de texte législatif"
| `En -> "Legislative text implementation")
(match language with `Fr -> "Document généré par" | `En -> "Document generated by")
| `En -> "Legislative text implementation"
| `Pl -> "Implementacja tekstu legislacyjnego")
(match language with
| `Fr -> "Document généré par"
| `En -> "Document generated by"
| `Pl -> "Dokument wygenerowany przez")
Utils.Cli.version
(match language with
| `Fr -> "Fichiers sources tissés dans ce document"
| `En -> "Source files weaved in this document")
| `En -> "Source files weaved in this document"
| `Pl -> "Pliki źródłowe w tym dokumencie")
(String.concat "\n"
(List.map
(fun filename ->
@ -90,7 +95,8 @@ let wrap_html (source_files : string list) (language : Cli.backend_lang) (fmt :
(pre_html (Filename.basename filename))
(match language with
| `Fr -> "dernière modification le"
| `En -> "last modification")
| `En -> "last modification"
| `Pl -> "ostatnia modyfikacja")
ftime)
source_files));
wrapped fmt
@ -104,7 +110,9 @@ let pygmentize_code (c : string Pos.marked) (language : C.backend_lang) : string
Printf.fprintf oc "%s" (Pos.unmark c);
close_out oc;
let pygments = "pygmentize" in
let pygments_lexer = match language with `Fr -> "catala_fr" | `En -> "catala_en" in
let pygments_lexer =
match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl"
in
let pygments_args =
[|
"-l";

View File

@ -43,10 +43,10 @@ let wrap_latex (source_files : string list) (language : C.backend_lang) (fmt : F
"\\documentclass[11pt, a4paper]{article}\n\n\
\\usepackage[T1]{fontenc}\n\
\\usepackage[utf8]{inputenc}\n\
\\usepackage{amssymb}\n\
\\usepackage[%s]{babel}\n\
\\usepackage{lmodern}\n\
\\usepackage{minted}\n\
\\usepackage{amssymb}\n\
\\usepackage{newunicodechar}\n\
\\usepackage{textcomp}\n\
\\usepackage[hidelinks]{hyperref}\n\
@ -80,15 +80,20 @@ let wrap_latex (source_files : string list) (language : C.backend_lang) (fmt : F
%s : \n\
\\begin{itemize}%s\\end{itemize}\n\n\
\\[\\star\\star\\star\\]\\\\\n"
(match language with `Fr -> "french" | `En -> "english")
(match language with `Fr -> "french" | `En -> "english" | `Pl -> "polish")
(match language with
| `Fr -> "Implémentation de texte législatif"
| `En -> "Legislative text implementation")
(match language with `Fr -> "Document généré par" | `En -> "Document generated by")
| `En -> "Legislative text implementation"
| `Pl -> "Implementacja tekstów legislacyjnych")
(match language with
| `Fr -> "Document généré par"
| `En -> "Document generated by"
| `Pl -> "Dokument wygenerowany przez")
Utils.Cli.version
(match language with
| `Fr -> "Fichiers sources tissés dans ce document"
| `En -> "Source files weaved in this document")
| `En -> "Source files weaved in this document"
| `Pl -> "Pliki źródłowe w tym dokumencie")
(String.concat ","
(List.map
(fun filename ->
@ -102,7 +107,8 @@ let wrap_latex (source_files : string list) (language : C.backend_lang) (fmt : F
(pre_latexify (Filename.basename filename))
(match language with
| `Fr -> "dernière modification le"
| `En -> "last modification")
| `En -> "last modification"
| `Pl -> "ostatnia modyfikacja")
ftime)
source_files));
wrapped fmt;
@ -159,10 +165,12 @@ let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatt
\\end{minted}"
(pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
(Pos.get_start_line (Pos.get_position c) - 1)
(match language with `Fr -> "catala_fr" | `En -> "catala_en")
(match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl")
(math_syms_replace (Pos.unmark c))
| A.CodeBlock (_, c, true) ->
let metadata_title = match language with `Fr -> "Métadonnées" | `En -> "Metadata" in
let metadata_title =
match language with `Fr -> "Métadonnées" | `En -> "Metadata" | `Pl -> "Metadane"
in
Format.fprintf fmt
"\\begin{tcolorbox}[colframe=OliveGreen, breakable, \
title=\\textcolor{black}{\\texttt{%s}},title after \
@ -174,7 +182,7 @@ let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatt
metadata_title metadata_title
(Pos.get_start_line (Pos.get_position c) - 1)
(pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
(match language with `Fr -> "catala_fr" | `En -> "catala_en")
(match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl")
(math_syms_replace (Pos.unmark c))
(** {1 API} *)

View File

@ -0,0 +1,527 @@
(* This file is part of the Catala compiler, a specification language for tax and social benefits
computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
<denis.merigoux@inria.fr>
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
open Tokens
open Sedlexing
open Utils
open Lexer_common
module L = Lexer
module R = Re.Pcre
(** Same as {!val: Surface.Lexer.token_list_language_agnostic}, but with tokens specialized to
Polish. *)
let token_list : (string * token) list =
[
("zakres", SCOPE);
("konsekwencja", CONSEQUENCE);
("data", DATA);
("zalezy od", DEPENDS);
("deklaracja", DECLARATION);
("kontekst", CONTEXT);
("malejacy", DECREASING);
("rosnacy", INCREASING);
("z", OF);
("kolekcja", COLLECTION);
("enumeracja", ENUM);
("calkowita", INTEGER);
("pieniądze", MONEY);
("tekst", TEXT);
("dziesiętny", DECIMAL);
("czas", DATE);
("czas trwania", DURATION);
("zerojedynkowy", BOOLEAN);
("suma", SUM);
("spelnione", FILLED);
("definicja", DEFINITION);
("etykieta", LABEL);
("wyjątek", EXCEPTION);
("wynosi", DEFINED_AS);
("pasuje", MATCH);
("ze wzorem", WITH);
("pod warunkiem", UNDER_CONDITION);
("jezeli", IF);
("wtedy", THEN);
("inaczej", ELSE);
("typu", CONTENT);
("struktura", STRUCT);
("asercja", ASSERTION);
("rozna", VARIES);
("wraz z", WITH_V);
("dla", FOR);
("wszystkie", ALL);
("mamy", WE_HAVE);
("staloprzecinkowa", FIXED);
("przez", BY);
("zasada", RULE);
("istnieje", EXISTS);
("takie ze", SUCH);
("to", THAT);
("i", AND);
("lub", OR);
("xor", XOR);
("nie", NOT);
("maximum", MAXIMUM);
("minimum", MINIMUM);
("filtr", FILTER);
("mapuj", MAP);
("poczatkowy", INIT);
("liczba", CARDINAL);
("rok", YEAR);
("miesiac", MONTH);
("dzien", DAY);
("prawda", TRUE);
("falsz", FALSE);
]
@ L.token_list_language_agnostic
(** Localised builtin functions *)
let builtins : (string * Ast.builtin_expression) list =
[
("integer_to_decimal", IntToDec);
("get_day", GetDay);
("get_month", GetMonth);
("get_year", GetYear);
]
(** Main lexing function used in code blocks *)
let rec lex_code (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
let prev_pos = lexing_positions lexbuf in
match%sedlex lexbuf with
| white_space ->
(* Whitespaces *)
L.update_acc lexbuf;
lex_code lexbuf
| '#', Star (Compl '\n'), '\n' ->
(* Comments *)
L.update_acc lexbuf;
lex_code lexbuf
| "```" ->
(* End of code section *)
L.is_code := false;
END_CODE !L.code_string_acc
| "zakres" ->
L.update_acc lexbuf;
SCOPE
| "data" ->
L.update_acc lexbuf;
DATA
| "zalezy od" ->
L.update_acc lexbuf;
DEPENDS
| "deklaracja" ->
L.update_acc lexbuf;
DECLARATION
| "kontekst" ->
L.update_acc lexbuf;
CONTEXT
| "malejacy" ->
L.update_acc lexbuf;
DECREASING
| "rosnacy" ->
L.update_acc lexbuf;
INCREASING
| "z" ->
L.update_acc lexbuf;
OF
| "kolekcja" ->
L.update_acc lexbuf;
COLLECTION
| "enumeracja" ->
L.update_acc lexbuf;
ENUM
| "calkowita" ->
L.update_acc lexbuf;
INTEGER
| "pieni", 0x0105, "dze" ->
L.update_acc lexbuf;
MONEY
| "tekst" ->
L.update_acc lexbuf;
TEXT
| "dziesi", 0x0119, "tny" ->
L.update_acc lexbuf;
DECIMAL
| "czas" ->
L.update_acc lexbuf;
DATE
| "czas trwania" ->
L.update_acc lexbuf;
DURATION
| "zerojedynkowy" ->
L.update_acc lexbuf;
BOOLEAN
| "suma" ->
L.update_acc lexbuf;
SUM
| "spelnione" ->
L.update_acc lexbuf;
FILLED
| "definicja" ->
L.update_acc lexbuf;
DEFINITION
| "etykieta" ->
L.update_acc lexbuf;
LABEL
| "wyj", 0x0105, "tek" ->
L.update_acc lexbuf;
EXCEPTION
| "wynosi" ->
L.update_acc lexbuf;
DEFINED_AS
| "pasuje" ->
L.update_acc lexbuf;
MATCH
| "ze wzorem" ->
L.update_acc lexbuf;
WITH
| "pod warunkiem" ->
L.update_acc lexbuf;
UNDER_CONDITION
| "jezeli" ->
L.update_acc lexbuf;
IF
| "konsekwencja" ->
L.update_acc lexbuf;
CONSEQUENCE
| "wtedy" ->
L.update_acc lexbuf;
THEN
| "inaczej" ->
L.update_acc lexbuf;
ELSE
| "warunek" ->
L.update_acc lexbuf;
CONDITION
| "typu" ->
L.update_acc lexbuf;
CONTENT
| "struktura" ->
L.update_acc lexbuf;
STRUCT
| "asercja" ->
L.update_acc lexbuf;
ASSERTION
| "rozna" ->
L.update_acc lexbuf;
VARIES
| "wraz z" ->
L.update_acc lexbuf;
WITH_V
| "dla" ->
L.update_acc lexbuf;
FOR
| "wszystkie" ->
L.update_acc lexbuf;
ALL
| "mamy" ->
L.update_acc lexbuf;
WE_HAVE
| "staloprzecinkowa" ->
L.update_acc lexbuf;
FIXED
| "przez" ->
L.update_acc lexbuf;
BY
| "zasada" ->
(* 0xE8 is è *)
L.update_acc lexbuf;
RULE
| "istnieje" ->
L.update_acc lexbuf;
EXISTS
| "in" ->
L.update_acc lexbuf;
IN
| "takie ze" ->
L.update_acc lexbuf;
SUCH
| "to" ->
L.update_acc lexbuf;
THAT
| "i" ->
L.update_acc lexbuf;
AND
| "lub" ->
L.update_acc lexbuf;
OR
| "xor" ->
L.update_acc lexbuf;
XOR
| "nie" ->
L.update_acc lexbuf;
NOT
| "maximum" ->
L.update_acc lexbuf;
MAXIMUM
| "minimum" ->
L.update_acc lexbuf;
MINIMUM
| "filtr" ->
L.update_acc lexbuf;
FILTER
| "mapuj" ->
L.update_acc lexbuf;
MAP
| "poczatkowy" ->
L.update_acc lexbuf;
INIT
| "liczba" ->
L.update_acc lexbuf;
CARDINAL
| "prawda" ->
L.update_acc lexbuf;
TRUE
| "falsz" ->
L.update_acc lexbuf;
FALSE
| "rok" ->
L.update_acc lexbuf;
YEAR
| "miesiac" ->
L.update_acc lexbuf;
MONTH
| "dzien" ->
L.update_acc lexbuf;
DAY
| ( Star white_space,
'0' .. '9',
Star ('0' .. '9' | ','),
Opt ('.', Rep ('0' .. '9', 0 .. 2)),
Star white_space,
"PLN" ) ->
let extract_parts = R.regexp "([0-9]([0-9,]*[0-9]|))(.([0-9]{0,2})|)" in
let full_str = Utf8.lexeme lexbuf in
let only_numbers_str = String.trim (String.sub full_str 1 (String.length full_str - 1)) in
let parts = R.get_substring (R.exec ~rex:extract_parts only_numbers_str) in
(* Integer literal*)
let units = parts 1 in
let remove_commas = R.regexp "," in
let units =
Runtime.integer_of_string (R.substitute ~rex:remove_commas ~subst:(fun _ -> "") units)
in
let cents =
try Runtime.integer_of_string (parts 4) with Not_found -> Runtime.integer_of_int 0
in
L.update_acc lexbuf;
MONEY_AMOUNT (units, cents)
| Plus '0' .. '9', '.', Star '0' .. '9' ->
let extract_code_title = R.regexp "([0-9]+)\\.([0-9]*)" in
let dec_parts = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
(* Integer literal*)
L.update_acc lexbuf;
DECIMAL_LITERAL
(Runtime.integer_of_string (dec_parts 1), Runtime.integer_of_string (dec_parts 2))
| "<=@" ->
L.update_acc lexbuf;
LESSER_EQUAL_DATE
| "<@" ->
L.update_acc lexbuf;
LESSER_DATE
| ">=@" ->
L.update_acc lexbuf;
GREATER_EQUAL_DATE
| ">@" ->
L.update_acc lexbuf;
GREATER_DATE
| "-@" ->
L.update_acc lexbuf;
MINUSDATE
| "+@" ->
L.update_acc lexbuf;
PLUSDATE
| "<=^" ->
L.update_acc lexbuf;
LESSER_EQUAL_DURATION
| "<^" ->
L.update_acc lexbuf;
LESSER_DURATION
| ">=^" ->
L.update_acc lexbuf;
GREATER_EQUAL_DURATION
| ">^" ->
L.update_acc lexbuf;
GREATER_DURATION
| "+^" ->
L.update_acc lexbuf;
PLUSDURATION
| "-^" ->
L.update_acc lexbuf;
MINUSDURATION
| "<=", 0x24 ->
L.update_acc lexbuf;
LESSER_EQUAL_MONEY
| '<', 0x24 ->
L.update_acc lexbuf;
LESSER_MONEY
| ">=", 0x24 ->
L.update_acc lexbuf;
GREATER_EQUAL_MONEY
| '>', 0x24 ->
L.update_acc lexbuf;
GREATER_MONEY
| '+', 0x24 ->
L.update_acc lexbuf;
PLUSMONEY
| '-', 0x24 ->
L.update_acc lexbuf;
MINUSMONEY
| '*', 0x24 ->
L.update_acc lexbuf;
MULTMONEY
| '/', 0x24 ->
L.update_acc lexbuf;
DIVMONEY
| "<=." ->
L.update_acc lexbuf;
LESSER_EQUAL_DEC
| "<." ->
L.update_acc lexbuf;
LESSER_DEC
| ">=." ->
L.update_acc lexbuf;
GREATER_EQUAL_DEC
| ">." ->
L.update_acc lexbuf;
GREATER_DEC
| "+." ->
L.update_acc lexbuf;
PLUSDEC
| "-." ->
L.update_acc lexbuf;
MINUSDEC
| "*." ->
L.update_acc lexbuf;
MULTDEC
| "/." ->
L.update_acc lexbuf;
DIVDEC
| "<=" ->
L.update_acc lexbuf;
LESSER_EQUAL
| '<' ->
L.update_acc lexbuf;
LESSER
| ">=" ->
L.update_acc lexbuf;
GREATER_EQUAL
| '>' ->
L.update_acc lexbuf;
GREATER
| '+' ->
L.update_acc lexbuf;
PLUS
| '-' ->
L.update_acc lexbuf;
MINUS
| '*' ->
L.update_acc lexbuf;
MULT
| '/' ->
L.update_acc lexbuf;
DIV
| "!=" ->
L.update_acc lexbuf;
NOT_EQUAL
| '=' ->
L.update_acc lexbuf;
EQUAL
| '%' ->
L.update_acc lexbuf;
PERCENT
| '(' ->
L.update_acc lexbuf;
LPAREN
| ')' ->
L.update_acc lexbuf;
RPAREN
| '{' ->
L.update_acc lexbuf;
LBRACKET
| '}' ->
L.update_acc lexbuf;
RBRACKET
| '[' ->
L.update_acc lexbuf;
LSQUARE
| ']' ->
L.update_acc lexbuf;
RSQUARE
| '|' ->
L.update_acc lexbuf;
VERTICAL
| ':' ->
L.update_acc lexbuf;
COLON
| ';' ->
L.update_acc lexbuf;
SEMICOLON
| "--" ->
L.update_acc lexbuf;
ALT
| '.' ->
L.update_acc lexbuf;
DOT
| uppercase, Star (uppercase | lowercase | '0' .. '9' | '_' | '\'') ->
(* Name of constructor *)
L.update_acc lexbuf;
CONSTRUCTOR (Utf8.lexeme lexbuf)
| lowercase, Star (lowercase | uppercase | '0' .. '9' | '_' | '\'') ->
(* Name of variable *)
L.update_acc lexbuf;
IDENT (Utf8.lexeme lexbuf)
| Plus '0' .. '9' ->
(* Integer literal*)
L.update_acc lexbuf;
INT_LITERAL (Runtime.integer_of_string (Utf8.lexeme lexbuf))
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
(** Main lexing function used outside code blocks *)
let lex_law (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
let prev_pos = lexing_positions lexbuf in
match%sedlex lexbuf with
| "```catala" ->
L.is_code := true;
L.code_string_acc := "";
BEGIN_CODE
| eof -> EOF
| '>', Star white_space, "Poczatek metadanych" -> BEGIN_METADATA
| '>', Star white_space, "Koniec metadanych" -> END_METADATA
| ( '>',
Star white_space,
"Include:",
Star white_space,
Plus (Compl ('@' | '\n')),
Star white_space,
Opt ('@', Star white_space, "p.", Star white_space, Plus '0' .. '9', Star white_space),
'\n' ) ->
let extract_components =
R.regexp ">\\s*Include\\:\\s*([^@\\n]+)\\s*(@\\s*p\\.\\s*([0-9]+)|)"
in
let get_component = R.get_substring (R.exec ~rex:extract_components (Utf8.lexeme lexbuf)) in
let name = get_component 1 in
let pages = try Some (int_of_string (get_component 3)) with Not_found -> None in
let pos = lexing_positions lexbuf in
if Filename.extension name = ".pdf" then
LAW_INCLUDE (Ast.PdfFile ((name, Pos.from_lpos pos), pages))
else LAW_INCLUDE (Ast.CatalaFile (name, Pos.from_lpos pos))
| Plus '#', Star white_space, Plus (Compl '\n'), Star white_space, '\n' -> get_law_heading lexbuf
| Plus (Compl ('#' | '`' | '>')) -> LAW_TEXT (Utf8.lexeme lexbuf)
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
(** Entry point of the lexer, distributes to {!val: lex_code} or {!val: lex_law} depending of {!val:
Surface.Lexer.is_code}. *)
let lexer (lexbuf : lexbuf) : token = if !L.is_code then lex_code lexbuf else lex_law lexbuf

View File

@ -0,0 +1,15 @@
(* This file is part of the Catala compiler, a specification language for tax and social benefits
computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
<denis.merigoux@inria.fr>
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
include Lexer.LocalisedLexer

View File

@ -225,11 +225,13 @@ end
module Parser_NonVerbose = ParserAux (Lexer)
module Parser_En = ParserAux (Lexer_en)
module Parser_Fr = ParserAux (Lexer_fr)
module Parser_Pl = ParserAux (Lexer_pl)
let localised_parser : Cli.frontend_lang -> lexbuf -> Ast.source_file = function
| `NonVerbose -> Parser_NonVerbose.commands_or_includes
| `En -> Parser_En.commands_or_includes
| `Fr -> Parser_Fr.commands_or_includes
| `Pl -> Parser_Pl.commands_or_includes
(** {1 Parsing multiple files} *)

View File

@ -12,12 +12,12 @@
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
type frontend_lang = [ `Fr | `En | `NonVerbose ]
type frontend_lang = [ `Fr | `En | `NonVerbose | `Pl ]
type backend_lang = [ `Fr | `En ]
type backend_lang = [ `Fr | `En | `Pl ]
let to_backend_lang (lang : frontend_lang) : backend_lang =
match lang with `En | `NonVerbose -> `En | `Fr -> `Fr
match lang with `En | `NonVerbose -> `En | `Fr -> `Fr | `Pl -> `Pl
(** Source files to be compiled *)
let source_files : string list ref = ref []

View File

@ -12,9 +12,9 @@
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
type frontend_lang = [ `En | `Fr | `NonVerbose ]
type frontend_lang = [ `En | `Fr | `NonVerbose | `Pl ]
type backend_lang = [ `En | `Fr ]
type backend_lang = [ `En | `Fr | `Pl ]
val to_backend_lang : frontend_lang -> backend_lang

View File

@ -150,30 +150,14 @@ contexts [] {
main : context {
: inline_push {
regex \= (\#\#)
styles [] = .law_title;
: pop {
regex \= (@@[\+]*)
styles [] = .law_title;
}
: pattern {
regex \= (.)
styles [] = .law_title;
}
: pattern {
regex \= ([\#]+.*)
styles [] = .law_title;
}
: inline_push {
regex \= (@)
styles [] = .law_subtitle;
: pop {
regex \= (@)
styles [] = .law_subtitle;
}
: pattern {
regex \= (.)
styles [] = .law_subtitle;
}
: pattern {
regex \= ([\#]+\s*\[[^\]]\s*].*)
styles [] = .law_subtitle;
}
: pattern {

View File

@ -150,30 +150,14 @@ contexts [] {
main : context {
: inline_push {
regex \= (\#\#)
styles [] = .law_title;
: pop {
regex \= (\n)
styles [] = .law_title;
}
: pattern {
regex \= (.)
styles [] = .law_title;
}
: pattern {
regex \= ([\#]+.*)
styles [] = .law_title;
}
: inline_push {
regex \= (@)
styles [] = .law_subtitle;
: pop {
regex \= (@)
styles [] = .law_subtitle;
}
: pattern {
regex \= (.)
styles [] = .law_subtitle;
}
: pattern {
regex \= ([\#]+\s*\[[^\]]\s*].*)
styles [] = .law_subtitle;
}
: pattern {

View File

@ -25,7 +25,7 @@ class CatalaFrLexer(RegexLexer):
(u'(```)', bygroups(Text), 'root'),
(u'(\\s*\\#.*$)', bygroups(Comment.Single)),
(u'(contexte)(\\s+)([a-z\xe9\xe8\xe0\xe2\xf9\xee\xf4\xea\u0153\xe7][a-z\xe9\xe8\xe0\xe2\xf9\xee\xf4\xea\u0153\xe7A-Z\xc9\xc8\xc0\xc2\xd9\xce\xd4\xca\u0152\xc70-9_\\\']*)',
bygroups(Keyword.Declaration, String, Name.Variable)),
bygroups(Keyword.Declaration, Text, Name.Variable)),
(u'\\b(selon|sous\\s+forme|fix\xe9|par|d\xe9croissante|croissante|varie|avec|on\\s+a|dans|tel\\s+que|existe|pour|tout|de|si|alors|sinon|initial)\\b', bygroups(Keyword.Reserved)),
(u'\\b(champ\\s+d\'application|si\\s+et\\s+seulement\\s+si|d\xe9pend\\s+de|d\xe9claration|inclus|collection|contenu|optionnel|structure|\xe9num\xe9ration|contexte|r\xe8gle|sous\\s+condition|condition|donn\xe9e|cons\xe9quence|rempli|\xe9gal\\s+\xe0|assertion|d\xe9finition|\xe9tiquette|exception)\\b', bygroups(Keyword.Declaration)),
(u'(\\|[0-9]+/[0-9]+/[0-9]+\\|)', bygroups(Number.Integer)),

View File

@ -150,30 +150,14 @@ contexts [] {
main : context {
: inline_push {
regex \= ([\#]+)
styles [] = .law_title;
: pop {
regex \= (\n)
styles [] = .law_title;
}
: pattern {
regex \= (.)
styles [] = .law_title;
}
: pattern {
regex \= ([\#]+.*)
styles [] = .law_title;
}
: inline_push {
regex \= ([\#]+\s*\[[^\]]\s*])
styles [] = .law_subtitle;
: pop {
regex \= (\n)
styles [] = .law_subtitle;
}
: pattern {
regex \= (.)
styles [] = .law_subtitle;
}
: pattern {
regex \= ([\#]+\s*\[[^\]]\s*].*)
styles [] = .law_subtitle;
}
: pattern {

View File

@ -0,0 +1,138 @@
'fileTypes' : [
'catala_pl'
]
'name' : 'catala_pl'
'patterns' : [
{
'include' : '#main'
}
]
'scopeName' : 'source.catala_pl'
'uuid' : ''
'repository' : {
'main' : {
'patterns' : [
{
'match' : '([\\#]+.*)'
'name' : 'markup.heading.title.catala_pl'
}
{
'match' : '([\\#]+\\s*\\[[^\\]]\\s*].*)'
'name' : 'markup.heading.subtitle.catala_pl'
}
{
'match' : '([^\\x{0060}])'
'name' : 'entity.law.catala_pl'
}
{
'begin' : '(```catala)'
'beginCaptures' : {
'1' : {
'name' : 'comment.block.documentation.catala_pl'
}
}
'patterns' : [
{
'include' : '#code'
}
]
'end' : '(```)'
'endCaptures' : {
'1' : {
'name' : 'comment.block.documentation.catala_pl'
}
}
}
]
}
'code' : {
'patterns' : [
{
'match' : '(\\s*\\#.*$)'
'name' : 'comment.line.catala_pl'
}
{
'match' : '(kontekst)(\\s+)([a-z\\x{0105}\\x{0107}\\x{0119}\\x{0142}\\x{0144}\\x{00f3}\\x{015b}\\x{017c}\\x{017a}][a-z\\x{0105}\\x{0107}\\x{0119}\\x{0142}\\x{0144}\\x{00f3}\\x{015b}\\x{017c}\\x{017a}A-Z\\x{0104}\\x{0106}\\x{0118}\\x{0141}\\x{0143}\\x{00d3}\\x{015a}\\x{017b}\\x{0179}0-9_\\\']*)'
'captures' : {
'1' : {
'name' : 'keyword.other.catala_pl'
}
'2' : {
'name' : 'text.catala_pl'
}
'3' : {
'name' : 'entity.name.function.catala_pl'
}
}
}
{
'match' : '\\b(pasuje|ze\\s+wzorem|staloprzecinkowa|przez|malejacy|rosnacy|rozna|wraz z|mamy|w|takich ze|istnieje|dla|wszystkie|z|jezeli|wtedy|inaczej|poczatkowy)\\b'
'name' : 'keyword.control.catala_pl'
}
{
'match' : '\\b(zakres|zalezy\\s+od|deklaracja|kolekcja|typu|opcjonalny|struktura|enumeracja|kontekst|zasada|pod\\s+warunkuem|czas|konsekwencja|spelnione|wynosi|asercja|definicja|etykieta|wyj\x{0105}tek)\\b'
'name' : 'keyword.other.catala_pl'
}
{
'match' : '(\\|[0-9]+/[0-9]+/[0-9]+\\|)'
'name' : 'constant.numeric.catala_pl'
}
{
'match' : '\\b(prawda|falsz)\\b'
'name' : 'constant.catala_pl'
}
{
'match' : '\\b([0-9]+(,[0.9]*|))\\b'
'name' : 'constant.numeric.catala_pl'
}
{
'match' : '(\\-\\-|\\;|\\.|\\,|\\:|\\(|\\)|\\[|\\]|\\{|\\})'
'name' : 'punctuation.catala_pl'
}
{
'match' : '(\\-\\>|\\+\\.|\\+\\@|\\+\\^|\\+\\$|\\+|\\-\\.|\\-\\@|\\-\\^|\\-\\$|\\-|\\*\\.|\\*\\@|\\*\\^|\\*\\$|\\*|/\\.|/\\@|/\\^|/\\$|/|\\!|>\\.|>=\\.|<=\\.|<\\.|>\\@|>=\\@|<=\\@|<\\@|>\\$|>=\\$|<=\\$|<\\$|>\\^|>=\\^|<=\\^|<\\^|>|>=|<=|<|=|nie|lub|xor|i|\\$|%|rok|miesiac|dzien)'
'name' : 'keyword.operator.catala_pl'
}
{
'match' : '\\b(calkowita|zerojedynkowy|czas|czas trwania|pieni\\x{0105}dze|warunek|tekst|dziesi\\x{0119}tny|suma)\\b'
'name' : 'support.type.catala_pl'
}
{
'match' : '\\b([A-Z\\x{0104}\\x{0106}\\x{0118}\\x{0141}\\x{0143}\\x{00d3}\\x{015a}\\x{017b}\\x{0179}][a-z\\x{0105}\\x{0107}\\x{0119}\\x{0142}\\x{0144}\\x{00f3}\\x{015b}\\x{017c}\\x{017a}A-Z\\x{0104}\\x{0106}\\x{0118}\\x{0141}\\x{0143}\\x{00d3}\\x{015a}\\x{017b}\\x{0179}0-9_\\\']*)(\\.)([a-z\\x{0105}\\x{0107}\\x{0119}\\x{0142}\\x{0144}\\x{00f3}\\x{015b}\\x{017c}\\x{017a}][a-z\\x{0105}\\x{0107}\\x{0119}\\x{0142}\\x{0144}\\x{00f3}\\x{015b}\\x{017c}\\x{017a}A-Z\\x{0104}\\x{0106}\\x{0118}\\x{0141}\\x{0143}\\x{00d3}\\x{015a}\\x{017b}\\x{0179}0-9_\\\']*)\\b'
'captures' : {
'1' : {
'name' : 'entity.name.class.catala_pl'
}
'2' : {
'name' : 'punctuation.catala_pl'
}
'3' : {
'name' : 'entity.name.function.catala_pl'
}
}
}
{
'match' : '\\b([a-z\\x{0105}\\x{0107}\\x{0119}\\x{0142}\\x{0144}\\x{00f3}\\x{015b}\\x{017c}\\x{017a}][a-z\\x{0105}\\x{0107}\\x{0119}\\x{0142}\\x{0144}\\x{00f3}\\x{015b}\\x{017c}\\x{017a}A-Z\\x{0104}\\x{0106}\\x{0118}\\x{0141}\\x{0143}\\x{00d3}\\x{015a}\\x{017b}\\x{0179}0-9_\\\']*)(\\.)([a-z\\x{0105}\\x{0107}\\x{0119}\\x{0142}\\x{0144}\\x{00f3}\\x{015b}\\x{017c}\\x{017a}][a-z\\x{0105}\\x{0107}\\x{0119}\\x{0142}\\x{0144}\\x{00f3}\\x{015b}\\x{017c}\\x{017a}A-Z\\x{0104}\\x{0106}\\x{0118}\\x{0141}\\x{0143}\\x{00d3}\\x{015a}\\x{017b}\\x{0179}0-9_\\\'\\.]*)\\b'
'captures' : {
'1' : {
'name' : 'entity.name.function.catala_pl'
}
'2' : {
'name' : 'punctuation.catala_pl'
}
'3' : {
'name' : 'meta.variable_id.catala_pl'
}
}
}
{
'match' : '\\b([a-z\\x{0105}\\x{0107}\\x{0119}\\x{0142}\\x{0144}\\x{00f3}\\x{015b}\\x{017c}\\x{017a}][a-z\\x{0105}\\x{0107}\\x{0119}\\x{0142}\\x{0144}\\x{00f3}\\x{015b}\\x{017c}\\x{017a}A-Z\\x{0104}\\x{0106}\\x{0118}\\x{0141}\\x{0143}\\x{00d3}\\x{015a}\\x{017b}\\x{0179}0-9_\\\']*)\\b'
'name' : 'entity.name.function.catala_pl'
}
{
'match' : '\\b([A-Z\\x{0104}\\x{0106}\\x{0118}\\x{0141}\\x{0143}\\x{00d3}\\x{015a}\\x{017b}\\x{0179}][a-z\\x{0105}\\x{0107}\\x{0119}\\x{0142}\\x{0144}\\x{00f3}\\x{015b}\\x{017c}\\x{017a}A-Z\\x{0104}\\x{0106}\\x{0118}\\x{0141}\\x{0143}\\x{00d3}\\x{015a}\\x{017b}\\x{0179}0-9_\\\']*)\\b'
'name' : 'entity.name.class.catala_pl'
}
]
}
}

View File

@ -0,0 +1,18 @@
{
"name": "language-catala-pl",
"version": "1.0.0",
"description": "Polish syntax highlighting for the Catala language",
"engines": {
"atom": "*"
},
"repository": {
"type": "git",
"url": "https://gitlab.inria.fr/verifisc/catala.git"
},
"bugs": {
"url": "https://gitlab.inria.fr/verifisc/catala/issues"
},
"dependencies": {},
"license": "Apache"
}

View File

@ -0,0 +1,258 @@
# This file is part of the Catala compiler, a specification language for tax and social benefits
# computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
# <denis.merigoux@inria.fr>
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
name = catala_pl
file_extensions [] = catala_pl;
################################################################
## Styles
################################################################
styles [] {
.comment : style {
color = light_green
italic = true
textmate_scope = comment.line
pygments_scope = Comment.Single
}
.sc_id : style {
textmate_scope = meta.variable_id
pygments_scope = String
color = violet_red
}
.sc_id_def : style {
textmate_scope = entity.name.function
pygments_scope = Name.Variable
color = orange
}
.cc_id : style {
color = light_blue
textmate_scope = entity.name.class
pygments_scope = Name.Class
}
.law_title : style {
color = yellow
italic = false
textmate_scope = markup.heading.title
pygments_scope = Generic.Heading
}
.law_subtitle : style {
color = gold
italic = true
textmate_scope = markup.heading.subtitle
pygments_scope = Generic.Heading
}
.law : style {
color = light_yellow
italic = false
textmate_scope = entity.law
pygments_scope = String
}
.code_delimiter : style {
color = grey
italic = false
textmate_scope = comment.block.documentation
pygments_scope = String
}
.primitive : style {
color = purple
textmate_scope = support.type
pygments_scope = Keyword.Type
}
.whitespace : style {
textmate_scope = text
pygments_scope = String
}
.keyword_expression : style {
color = pink
textmate_scope = keyword.control
pygments_scope = Keyword.Reserved
}
.keyword_rule : style {
color = cyan
textmate_scope = keyword.other
pygments_scope = Keyword.Declaration
}
.punctuation : style {
color = red_2
ace_scope = punctuation
textmate_scope = punctuation
pygments_scope = Operator
}
.literal : style {
color = green
textmate_scope = constant
pygments_scope = Keyword.Constant
}
.literal_numeric : style {
color = violet
textmate_scope = constant.numeric
pygments_scope = Number.Integer
}
.operator : style {
color = brown
textmate_scope = keyword.operator
pygments_scope = Operator
}
}
#################################################
## Parse contexts
#################################################
__LOWER_CHARS \= a-ząćęłńóśżź
__UPPER_CHARS \= A-ZĄĆĘŁŃÓŚŻŹ
__SC_ID \= [$${__LOWER_CHARS}][$${__LOWER_CHARS}$${__UPPER_CHARS}0-9_\']*
__SC_ID_COMPOSED \= [$${__LOWER_CHARS}][$${__LOWER_CHARS}$${__UPPER_CHARS}0-9_\'\.]*
__CC_ID \= [$${__UPPER_CHARS}][$${__LOWER_CHARS}$${__UPPER_CHARS}0-9_\']*
contexts [] {
##############################################
## Main Context - Entry point context
##############################################
main : context {
: pattern {
regex \= ([\#]+.*)
styles [] = .law_title;
}
: pattern {
regex \= ([\#]+\s*\[[^\]]\s*].*)
styles [] = .law_subtitle;
}
: pattern {
regex \= ([^`])
styles [] = .law;
}
: push {
regex \= (```catala)
styles [] = .code_delimiter;
context [] = code;
}
}
code : context {
: pop {
regex \= (```)
styles [] = .code_delimiter;
}
: pattern {
regex \= (\s*\#.*$)
styles [] = .comment ;
}
: pattern {
regex \= (kontekst)(\s+)($${__SC_ID})
styles [] = .keyword_rule, .whitespace, .sc_id_def;
}
: pattern {
regex \= \b(pasuje|ze\s+wzorem|staloprzecinkowa|przez|malejacy|rosnacy|rozna|wraz z|mamy|w|takich ze|istnieje|dla|wszystkie|z|jezeli|wtedy|inaczej|poczatkowy)\b
styles [] = .keyword_expression ;
}
: pattern {
regex \= \b(zakres|zalezy\s+od|deklaracja|kolekcja|typu|opcjonalny|struktura|enumeracja|kontekst|zasada|pod\s+warunkuem|czas|konsekwencja|spelnione|wynosi|asercja|definicja|etykieta|wyjątek)\b
styles [] = .keyword_rule ;
}
: pattern {
regex \= (\|[0-9]+/[0-9]+/[0-9]+\|)
styles [] = .literal_numeric ;
}
: pattern {
regex \= \b(prawda|falsz)\b
styles [] = .literal ;
}
: pattern {
regex \= \b([0-9]+(,[0.9]*|))\b
styles [] = .literal_numeric ;
}
: pattern {
regex \= (\-\-|\;|\.|\,|\:|\(|\)|\[|\]|\{|\})
styles [] = .punctuation;
}
: pattern {
regex \= (\-\>|\+\.|\+\@|\+\^|\+\$|\+|\-\.|\-\@|\-\^|\-\$|\-|\*\.|\*\@|\*\^|\*\$|\*|/\.|/\@|/\^|/\$|/|\!|>\.|>=\.|<=\.|<\.|>\@|>=\@|<=\@|<\@|>\$|>=\$|<=\$|<\$|>\^|>=\^|<=\^|<\^|>|>=|<=|<|=|nie|lub|xor|i|\$|%|rok|miesiac|dzien)
styles [] = .operator;
}
: pattern {
regex \= \b(calkowita|zerojedynkowy|czas|czas trwania|pieniądze|warunek|tekst|dziesiętny|suma)\b
styles [] = .primitive;
}
: pattern {
regex \= \b($${__CC_ID})(\.)($${__SC_ID})\b
styles [] = .cc_id, .punctuation, .sc_id_def ;
}
: pattern {
regex \= \b($${__SC_ID})(\.)($${__SC_ID_COMPOSED})\b
styles [] = .sc_id_def, .punctuation, .sc_id ;
}
: pattern {
regex \= \b($${__SC_ID})\b
styles [] = .sc_id_def ;
}
: pattern {
regex \= \b($${__CC_ID})\b
styles [] = .cc_id ;
}
}
}

View File

@ -0,0 +1,3 @@
catala_pl_lexer.egg-info
__pycache__

View File

@ -0,0 +1,55 @@
from pygments.lexer import RegexLexer, bygroups
from pygments.token import *
import re
__all__=['CatalaPlLexer']
class CatalaPlLexer(RegexLexer):
name = 'CatalaPl'
aliases = ['catala_pl']
filenames = ['*.catala_pl']
flags = re.MULTILINE | re.UNICODE
tokens = {
'root' : [
(u'(\\#\\#)', bygroups(Generic.Heading), 'main__1'),
(u'([\#]+\s*\[[^\]]\s*])', bygroups(Generic.Heading), 'main__2'),
(u'([^`\\n\\r])', bygroups(Text)),
(u'(```catala)', bygroups(Text), 'code'),
('(\n|\r|\r\n)', Text),
('.', Text),
],
'code' : [
(u'(```)', bygroups(Text), 'root'),
(u'(\\s*\\#.*$)', bygroups(Comment.Single)),
(u'(kontekst)(\\s+)([a-z\u0105\u0107\u0119\u0142\u0144\xf3\u015b\u017c\u017a][a-z\u0105\u0107\u0119\u0142\u0144\xf3\u015b\u017c\u017aA-Z\u0104\u0106\u0118\u0141\u0143\xd3\u015a\u017b\u01790-9_\\\']*)', bygroups(Keyword.Declaration, Text, Name.Variable)),
(u'\\b(pasuje|ze\\s+wzorem|staloprzecinkowa|przez|malejacy|rosnacy|rozna|wraz z|mamy|w|takich ze|istnieje|dla|wszystkie|z|jezeli|wtedy|inaczej|poczatkowy)\\b', bygroups(Keyword.Reserved)),
(u'\\b(zakres|zalezy\\s+od|deklaracja|kolekcja|typu|opcjonalny|struktura|enumeracja|kontekst|zasada|pod\\s+warunkuem|czas|konsekwencja|spelnione|wynosi|asercja|definicja|etykieta|wyj\u0105tek)\\b', bygroups(Keyword.Declaration)),
(u'(\\|[0-9]+/[0-9]+/[0-9]+\\|)', bygroups(Number.Integer)),
(u'\\b(prawda|falsz)\\b', bygroups(Keyword.Constant)),
(u'\\b([0-9]+(,[0.9]*|))\\b', bygroups(Number.Integer)),
(u'(\\-\\-|\\;|\\.|\\,|\\:|\\(|\\)|\\[|\\]|\\{|\\})', bygroups(Operator)),
(u'(\\-\\>|\\+\\.|\\+\\@|\\+\\^|\\+\\$|\\+|\\-\\.|\\-\\@|\\-\\^|\\-\\$|\\-|\\*\\.|\\*\\@|\\*\\^|\\*\\$|\\*|/\\.|/\\@|/\\^|/\\$|/|\\!|>\\.|>=\\.|<=\\.|<\\.|>\\@|>=\\@|<=\\@|<\\@|>\\$|>=\\$|<=\\$|<\\$|>\\^|>=\\^|<=\\^|<\\^|>|>=|<=|<|=|nie|lub|xor|i|\\$|%|rok|miesiac|dzien)', bygroups(Operator)),
(u'\\b(calkowita|zerojedynkowy|czas|czas trwania|pieniądze|warunek|tekst|dziesi\u0119tny|suma)\\b', bygroups(Keyword.Type)),
(u'\\b([A-Z\u0104\u0106\u0118\u0141\u0143\xd3\u015a\u017b\u0179][a-z\u0105\u0107\u0119\u0142\u0144\xf3\u015b\u017c\u017aA-Z\u0104\u0106\u0118\u0141\u0143\xd3\u015a\u017b\u01790-9_\\\']*)(\\.)([a-z\u0105\u0107\u0119\u0142\u0144\xf3\u015b\u017c\u017a][a-z\u0105\u0107\u0119\u0142\u0144\xf3\u015b\u017c\u017aA-Z\u0104\u0106\u0118\u0141\u0143\xd3\u015a\u017b\u01790-9_\\\']*)\\b', bygroups(Name.Class, Operator, Name.Variable)),
(u'\\b([a-z\u0105\u0107\u0119\u0142\u0144\xf3\u015b\u017c\u017a][a-z\u0105\u0107\u0119\u0142\u0144\xf3\u015b\u017c\u017aA-Z\u0104\u0106\u0118\u0141\u0143\xd3\u015a\u017b\u01790-9_\\\']*)(\\.)([a-z\u0105\u0107\u0119\u0142\u0144\xf3\u015b\u017c\u017a][a-z\u0105\u0107\u0119\u0142\u0144\xf3\u015b\u017c\u017aA-Z\u0104\u0106\u0118\u0141\u0143\xd3\u015a\u017b\u01790-9_\\\'\\.]*)\\b', bygroups(Name.Variable, Operator, Text)),
(u'\\b([a-z\u0105\u0107\u0119\u0142\u0144\xf3\u015b\u017c\u017a][a-z\u0105\u0107\u0119\u0142\u0144\xf3\u015b\u017c\u017aA-Z\u0104\u0106\u0118\u0141\u0143\xd3\u015a\u017b\u01790-9_\\\']*)\\b', bygroups(Name.Variable)),
(u'\\b([A-Z\u0104\u0106\u0118\u0141\u0143\xd3\u015a\u017b\u0179][a-z\u0105\u0107\u0119\u0142\u0144\xf3\u015b\u017c\u017aA-Z\u0104\u0106\u0118\u0141\u0143\xd3\u015a\u017b\u01790-9_\\\']*)\\b', bygroups(Name.Class)),
('(\n|\r|\r\n)', Text),
('.', Text),
],
'main__1': [
(u'(\n)', bygroups(Generic.Heading), 'root'),
(u'(.)', bygroups(Generic.Heading)),
('(\n|\r|\r\n)', Text),
('.', Text),
],
'main__2': [
(u'(\n)', bygroups(Generic.Heading), 'root'),
(u'(.)', bygroups(Generic.Heading)),
('(\n|\r|\r\n)', Text),
('.', Text),
]
}

View File

@ -0,0 +1,10 @@
from setuptools import setup, find_packages
setup(
name='catala_pl_lexer',
packages=find_packages(),
entry_points="""
[pygments.lexers]
catala_pl_lexer = catala_pl_lexer.lexer:CatalaPlLexer
""",
)

View File

@ -0,0 +1,4 @@
#! /usr/bin/env bash
cd "$(dirname "$0")"
cd pygments && python3 setup.py develop

View File

@ -0,0 +1,10 @@
#! /usr/bin/env bash
FILE=~/.atom/packages/catala_pl
SCRIPT=`realpath $0`
SCRIPTPATH=`dirname $SCRIPT`
if [ ! -L "$FILE" ]; then
echo "Creating link"
ln -s -f $SCRIPTPATH/atom "$FILE"
fi