lexer

2024-11-08 07:51:43 +03:00 · 2021-05-09 22:55:50 +02:00 · 2021-05-09 22:55:50 +02:00 · db46b1c444
commit db46b1c444
parent 37c8f8da64
21 changed files with 709 additions and 30 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -124,7 +124,7 @@ general-to-specifics statutes order. Therefore, there exists multiple versions
 of the Catala surface syntax, adapted to the language of the legislative text.

 Currently, Catala supports English and French legislative text via the
-`--language=en` or `--language=fr` option.
+`--language=en`, `--language=fr` or `--language=pl`  option.

 Technically, support for new languages can be added via a new lexer. If you want
 to add a new language, you can start from
--- a/INSTALL.md
+++ b/INSTALL.md
@ -119,7 +119,8 @@ augmented with the Catala plugin, simply enter
    sudo make pygments

 This will execute the
-script `syntax_highlighting/fr/pygments/set_up_pygments.sh` and
+script `syntax_highlighting/fr/pygments/set_up_pygments.sh`,
+`syntax_highlighting/pl/pygments/set_up_pygments.sh` and
 `syntax_highlighting/en/pygments/set_up_pygments.sh`.

 The scripts patch your `pigmentize` executable, used for instance by the `minted` LaTeX package.
--- a/15
+++ b/15
@ -58,6 +58,7 @@ install:

 SYNTAX_HIGHLIGHTING_FR=${CURDIR}/syntax_highlighting/fr
 SYNTAX_HIGHLIGHTING_EN=${CURDIR}/syntax_highlighting/en
+SYNTAX_HIGHLIGHTING_PL=${CURDIR}/syntax_highlighting/pl

 pygmentize_fr: $(SYNTAX_HIGHLIGHTING_FR)/set_up_pygments.sh
 	chmod +x $<
@ -67,6 +68,10 @@ pygmentize_en: $(SYNTAX_HIGHLIGHTING_EN)/set_up_pygments.sh
 	chmod +x $<
 	sudo $<

+pygmentize_pl: $(SYNTAX_HIGHLIGHTING_PL)/set_up_pygments.sh
+	chmod +x $<
+	sudo $<
+
 #> pygments				: Extends your pygmentize executable with Catala lexers
 pygments: pygmentize_fr pygmentize_en

@ -78,6 +83,10 @@ atom_en: ${CURDIR}/syntax_highlighting/en/setup_atom.sh
 	chmod +x $<
 	$<

+atom_pl: ${CURDIR}/syntax_highlighting/pl/setup_atom.sh
+	chmod +x $<
+	$<
+
 atom_nv: ${CURDIR}/syntax_highlighting/nv/setup_atom.sh
 	chmod +x $<
 	$<
@ -93,6 +102,10 @@ vscode_en: ${CURDIR}/syntax_highlighting/en/setup_vscode.sh
 	chmod +x $<
 	$<

+vscode_pl: ${CURDIR}/syntax_highlighting/pl/setup_vscode.sh
+	chmod +x $<
+	$<
+
 vscode_nv: ${CURDIR}/syntax_highlighting/nv/setup_vscode.sh
 	chmod +x $<
 	$<
@ -222,7 +235,7 @@ clean:
 	$(MAKE) -C $(CODE_GENERAL_IMPOTS_DIR) clean

 inspect:
-	gitinspector -f ml,mli,mly,iro,tex,catala,catala_en,catala_fr,md,fst,mld --grading
+	gitinspector -f ml,mli,mly,iro,tex,catala,catala_en,catala_pl,catala_fr,md,fst,mld --grading

 ##########################################
 # Special targets
--- a/examples/README.md
+++ b/examples/README.md
@ -99,7 +99,7 @@ To build and run the example, create a `Makefile` in `foo/`
 with the following contents:

 ```Makefile
-CATALA_LANG=en # or fr if your source code is in French
+CATALA_LANG=en # or fr/pl if your source code is in French/Polish
 SRC=foo.catala

 include ../Makefile.common.mk
--- a/examples/polish_taxes_pl/.gitignore
+++ b/examples/polish_taxes_pl/.gitignore
@ -0,0 +1,14 @@
+*.aux
+*.dvi
+*.fdb_latexmk
+*.fls
+*.log
+*.out
+*.fls
+*.tex
+*.pdf
+_minted*
+*.toc
+*.pyg
+*.d
+*.ml
--- a/examples/polish_taxes_pl/Makefile
+++ b/examples/polish_taxes_pl/Makefile
@ -0,0 +1,4 @@
+CATALA_LANG=pl
+SRC=polish_tax_code.catala_pl
+
+include ../Makefile.common.mk
--- a/examples/polish_taxes_pl/podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_pl
+++ b/examples/polish_taxes_pl/podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_pl
@ -0,0 +1,24 @@
+## Rozdzial 3
+
+## Art. 7. 1. Stawki podatku wynoszą:
+
+### 1) od umowy sprzedaży:
+#### a) nieruchomości, rzeczy ruchomych, prawa użytkowania wieczystego, własnościowego spółdzielczego prawa do lokalu mieszkalnego, spółdzielczego prawa do lokalu użytkowego oraz wynikających z przepisów prawa spółdzielczego: prawa do domu jednorodzinnego oraz prawa do lokalu w małym domu mieszkalnym – 2%,
+
+> Begin metadata
+```catala
+deklaracja zakres UmowaSprzedazy:
+  kontekst kwota zawartosc pieniadze
+  kontekst stawka zawartosc dziesietna
+  kontekst podatek zawartosc pieniadze
+
+zakres UmowaSprzedazy:
+  definicja podatek wynosi
+    kwota *$ stawka
+
+zakres UmowaSprzedazy:
+  etykieta a7_u1_p1_ppa
+  definicja stawka wynosi 2%
+```
+> End metadata
+
--- a/examples/polish_taxes_pl/polish_taxes_pl.catala_pl
+++ b/examples/polish_taxes_pl/polish_taxes_pl.catala_pl
@ -0,0 +1,3 @@
+# Master file
+
+> Include: podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_pl
--- a/examples/polish_taxes_pl/tests/test_a7_u1_p1.catala_pl
+++ b/examples/polish_taxes_pl/tests/test_a7_u1_p1.catala_pl
@ -0,0 +1,23 @@
+> Include: ../podatek_od_czynnosci_cywilnoprawnych/rozdzial_3.catala_en
+
+## [Test - Art. 7 ustęp 1 punkt 1]
+
+```catala
+deklaracja zakres Test_A7_U1_P1_PPa:
+  kontekst sprzedaz zakres UmowaSprzedazy
+
+zakres Test_A7_U1_P1_PPa:
+  definicja sprzedaz.kwota wynosi $100
+
+  asercja sprzedaz.podatek = $2
+
+
+deklaracja zakres Test_A7_U1_P1_PPb:
+  kontekst sprzedaz zakres UmowaSprzedazy
+
+zakres Test_A7_U1_P1_PPb:
+  definicja sprzedaz.kwota wynosi $100
+  definicja sprzedaz.inne_prawa_majatkowe wynosi prawda
+
+  asercja sprzedaz.podatek = $1
+```
--- a/generate_website_assets.sh
+++ b/generate_website_assets.sh
@ -16,6 +16,7 @@ scp grammar.html $1/
 scp catala.html $1/
 scp _build/default/src/catala/catala_web.bc.js $1/playground/
 scp examples/tutorial_en/tutorial_en.catala_en $1/playground/
+# TODO BW: PL
 scp examples/tutoriel_fr/tutoriel_fr.catala_fr $1/playground/
 cat examples/allocations_familiales/prologue.catala_fr > allocations_familiales.catala_fr
 cat examples/allocations_familiales/decrets_divers.catala_fr >> allocations_familiales.catala_fr
@ -29,5 +30,6 @@ mv -f allocations_familiales_fixed.catala_fr allocations_familiales.catala_fr
 scp allocations_familiales.catala_fr $1/playground/
 rm allocations_familiales.catala_fr
 scp syntax_highlighting/en/ace/mode-catala_en.js $1/playground/
+# TODO BW: PL
 scp syntax_highlighting/fr/ace/mode-catala_fr.js $1/playground/
 scp french_law_js/french_law.js $1/french_law.js
--- a/src/catala/dcalc/print.ml
+++ b/src/catala/dcalc/print.ml
@ -98,7 +98,8 @@ let format_lit (fmt : Format.formatter) (l : lit Pos.marked) : unit =
  | LMoney e -> (
      match !Utils.Cli.locale_lang with
      | `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e)
-      | `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e))
+      | `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e)
+      | `Pl -> Format.fprintf fmt "%s PLN" (Runtime.money_to_string e))
  | LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d)
  | LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d)

--- a/src/catala/driver.ml
+++ b/src/catala/driver.ml
@ -35,6 +35,7 @@ let driver (source_file : Pos.input_file) (debug : bool) (dcalc : bool) (unstyle
      | Some l ->
          if l = "fr" then `Fr
          else if l = "en" then `En
+          else if l = "pl" then `Pl
          else if l = "non-verbose" then `NonVerbose
          else
            Errors.raise_error
--- a/src/catala/lcalc/print.ml
+++ b/src/catala/lcalc/print.ml
@ -38,7 +38,8 @@ let format_lit (fmt : Format.formatter) (l : lit Pos.marked) : unit =
  | LMoney e -> (
      match !Utils.Cli.locale_lang with
      | `En -> Format.fprintf fmt "$%s" (Runtime.money_to_string e)
-      | `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e))
+      | `Fr -> Format.fprintf fmt "%s €" (Runtime.money_to_string e)
+      | `Pl -> Format.fprintf fmt "%s PLN" (Runtime.money_to_string e))
  | LDate d -> Format.fprintf fmt "%s" (Runtime.date_to_string d)
  | LDuration d -> Format.fprintf fmt "%s" (Runtime.duration_to_string d)

--- a/src/catala/literate/html.ml
+++ b/src/catala/literate/html.ml
@ -71,12 +71,14 @@ let wrap_html (source_files : string list) (language : Cli.backend_lang) (fmt :
    css_as_string
    (match language with
    | `Fr -> "Implémentation de texte législatif"
-    | `En -> "Legislative text implementation")
-    (match language with `Fr -> "Document généré par" | `En -> "Document generated by")
+    | `En -> "Legislative text implementation"
+    | `Pl -> "Implementowanie tekstów legislacyjnych")
+    (match language with `Fr -> "Document généré par" | `En -> "Document generated by" | `Pl -> "Dokument wygenerowany przez")
    Utils.Cli.version
    (match language with
    | `Fr -> "Fichiers sources tissés dans ce document"
-    | `En -> "Source files weaved in this document")
+    | `En -> "Source files weaved in this document"
+    | `Pl -> "Pliki źródłowe w tym dokumencie")
    (String.concat "\n"
       (List.map
          (fun filename ->
@ -90,7 +92,8 @@ let wrap_html (source_files : string list) (language : Cli.backend_lang) (fmt :
              (pre_html (Filename.basename filename))
              (match language with
              | `Fr -> "dernière modification le"
-              | `En -> "last modification")
+              | `En -> "last modification"
+              | `Pl -> "ostatnia modyfikacja")
              ftime)
          source_files));
  wrapped fmt
@ -104,7 +107,7 @@ let pygmentize_code (c : string Pos.marked) (language : C.backend_lang) : string
  Printf.fprintf oc "%s" (Pos.unmark c);
  close_out oc;
  let pygments = "pygmentize" in
-  let pygments_lexer = match language with `Fr -> "catala_fr" | `En -> "catala_en" in
+  let pygments_lexer = match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl" in
  let pygments_args =
    [|
      "-l";
--- a/src/catala/literate/latex.ml
+++ b/src/catala/literate/latex.ml
@ -80,15 +80,17 @@ let wrap_latex (source_files : string list) (language : C.backend_lang) (fmt : F
     %s : \n\
     \\begin{itemize}%s\\end{itemize}\n\n\
     \\[\\star\\star\\star\\]\\\\\n"
-    (match language with `Fr -> "french" | `En -> "english")
+    (match language with `Fr -> "french" | `En -> "english" | `Pl -> "polish")
    (match language with
    | `Fr -> "Implémentation de texte législatif"
-    | `En -> "Legislative text implementation")
-    (match language with `Fr -> "Document généré par" | `En -> "Document generated by")
+    | `En -> "Legislative text implementation"
+    | `Pl -> "Implementacja tekstów legislacyjnych")
+    (match language with `Fr -> "Document généré par" | `En -> "Document generated by" | `Pl -> "Dokument wygenerowany przez")
    Utils.Cli.version
    (match language with
    | `Fr -> "Fichiers sources tissés dans ce document"
-    | `En -> "Source files weaved in this document")
+    | `En -> "Source files weaved in this document"
+    | `Pl -> "Pliki źródłowe w tym dokumencie")
    (String.concat ","
       (List.map
          (fun filename ->
@ -102,7 +104,8 @@ let wrap_latex (source_files : string list) (language : C.backend_lang) (fmt : F
              (pre_latexify (Filename.basename filename))
              (match language with
              | `Fr -> "dernière modification le"
-              | `En -> "last modification")
+              | `En -> "last modification"
+              | `Pl -> "ostatnia modyfikacja")
              ftime)
          source_files));
  wrapped fmt;
@ -137,7 +140,7 @@ let law_article_item_to_latex (language : C.backend_lang) (fmt : Format.formatte
         \\end{minted}"
        (pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
        (Pos.get_start_line (Pos.get_position c) - 1)
-        (match language with `Fr -> "catala_fr" | `En -> "catala_en")
+        (match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl")
        (math_syms_replace (Pos.unmark c))

 let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatter)
@ -171,7 +174,7 @@ let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatt
        (law_article_item_to_latex language)
        fmt children
  | A.MetadataBlock (_, c) ->
-      let metadata_title = match language with `Fr -> "Métadonnées" | `En -> "Metadata" in
+      let metadata_title = match language with `Fr -> "Métadonnées" | `En -> "Metadata" | `Pl -> "Metadane" in
      Format.fprintf fmt
        "\\begin{tcolorbox}[colframe=OliveGreen, breakable, \
         title=\\textcolor{black}{\\texttt{%s}},title after \
@ -183,7 +186,7 @@ let rec law_structure_to_latex (language : C.backend_lang) (fmt : Format.formatt
        metadata_title metadata_title
        (Pos.get_start_line (Pos.get_position c) - 1)
        (pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
-        (match language with `Fr -> "catala_fr" | `En -> "catala_en")
+        (match language with `Fr -> "catala_fr" | `En -> "catala_en" | `Pl -> "catala_pl")
        (math_syms_replace (Pos.unmark c))
  | A.IntermediateText t -> Format.fprintf fmt "%s" (pre_latexify t)

--- a/src/catala/surface/dune
+++ b/src/catala/surface/dune
@ -32,19 +32,37 @@
 (rule
 (with-stdout-to
  parser.messages.new
-  (run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser --list-errors)))
+  (run
+   menhir
+   %{dep:tokens.mly}
+   %{dep:parser.mly}
+   --base
+   parser
+   --list-errors)))

 (rule
 (with-stdout-to
  parser_errors.ml
-  (run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser
-    --compile-errors %{dep:parser.messages})))
+  (run
+   menhir
+   %{dep:tokens.mly}
+   %{dep:parser.mly}
+   --base
+   parser
+   --compile-errors
+   %{dep:parser.messages})))

 (rule
 (with-stdout-to
  parser.messages.updated
-  (run menhir %{dep:tokens.mly} %{dep:parser.mly} --base parser
-    --update-errors %{dep:parser.messages})))
+  (run
+   menhir
+   %{dep:tokens.mly}
+   %{dep:parser.mly}
+   --base
+   parser
+   --update-errors
+   %{dep:parser.messages})))

 (rule
 (alias update-parser-messages)
--- a/src/catala/surface/lexer_pl.ml
+++ b/src/catala/surface/lexer_pl.ml
@ -0,0 +1,550 @@
+(* This file is part of the Catala compiler, a specification language for tax and social benefits
+   computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
+   <denis.merigoux@inria.fr>
+
+   Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+   in compliance with the License. You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software distributed under the License
+   is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+   or implied. See the License for the specific language governing permissions and limitations under
+   the License. *)
+
+open Tokens
+open Sedlexing
+open Utils
+open Lexer_common
+module L = Lexer
+module R = Re.Pcre
+
+(** Same as {!val: Surface.Lexer.token_list_language_agnostic}, but with tokens specialized to
+    Polish. *)
+let token_list : (string * token) list =
+  [
+    ("zakres", SCOPE);
+    ("konsekwencja", CONSEQUENCE);
+    ("data", DATA);
+    ("zalezy od", DEPENDS);
+    ("deklaracja", DECLARATION);
+    ("kontekst", CONTEXT);
+    ("malejacy", DECREASING);
+    ("rosnacy", INCREASING);
+    ("z", OF);
+    ("kolekcja", COLLECTION);
+    ("enumeracja", ENUM);
+    ("calkowita", INTEGER);
+    ("pieniadze", MONEY);
+    ("tekst", TEXT);
+    ("dziesietna", DECIMAL);
+    ("czas", DATE);
+    ("czas trwania", DURATION);
+    ("zerojedynkowy", BOOLEAN);
+    ("suma", SUM);
+    ("spelnione", FILLED);
+    ("definicja", DEFINITION);
+    ("etykieta", LABEL);
+    ("wyjatek", EXCEPTION);
+    ("wynosi", DEFINED_AS);
+    ("pasuje", MATCH);
+    ("ze wzorem", WITH);
+    ("pod warunkiem", UNDER_CONDITION);
+    ("jezeli", IF);
+    ("wtedy", THEN);
+    ("inaczej", ELSE);
+    ("zawartosc", CONTENT);
+    ("struktura", STRUCT);
+    ("asercja", ASSERTION);
+    ("rozna", VARIES);
+    ("with", WITH_V);
+    ("dla", FOR);
+    ("wszystkie", ALL);
+    ("mamy", WE_HAVE);
+    ("staloprzecinkowa", FIXED);
+    ("przez", BY);
+    ("zasada", RULE);
+    ("istnieje", EXISTS);
+    ("takie ze", SUCH);
+    ("to", THAT);
+    ("i", AND);
+    ("lub", OR);
+    ("xor", XOR);
+    ("nie", NOT);
+    ("maximum", MAXIMUM);
+    ("minimum", MINIMUM);
+    ("filtr", FILTER);
+    ("mapuj", MAP);
+    ("poczatkowy", INIT);
+    ("liczba", CARDINAL);
+    ("rok", YEAR);
+    ("miesiac", MONTH);
+    ("dzien", DAY);
+    ("prawda", TRUE);
+    ("falsz", FALSE);
+  ]
+  @ L.token_list_language_agnostic
+
+(** Localised builtin functions *)
+let builtins : (string * Ast.builtin_expression) list =
+  [
+    ("integer_to_decimal", IntToDec);
+    ("get_day", GetDay);
+    ("get_month", GetMonth);
+    ("get_year", GetYear);
+  ]
+
+(** Main lexing function used in code blocks *)
+let rec lex_code (lexbuf : lexbuf) : token =
+  let prev_lexeme = Utf8.lexeme lexbuf in
+  let prev_pos = lexing_positions lexbuf in
+  match%sedlex lexbuf with
+  | white_space ->
+      (* Whitespaces *)
+      L.update_acc lexbuf;
+      lex_code lexbuf
+  | '#', Star (Compl '\n'), '\n' ->
+      (* Comments *)
+      L.update_acc lexbuf;
+      lex_code lexbuf
+  | "```" ->
+      (* End of code section *)
+      L.is_code := false;
+      END_CODE !L.code_string_acc
+  | "zakres" ->
+      L.update_acc lexbuf;
+      SCOPE
+  | "data" ->
+      L.update_acc lexbuf;
+      DATA
+  | "zalezy od" ->
+      L.update_acc lexbuf;
+      DEPENDS
+  | "deklaracja" ->
+      L.update_acc lexbuf;
+      DECLARATION
+  | "kontekst" ->
+      L.update_acc lexbuf;
+      CONTEXT
+  | "malejacy" ->
+      L.update_acc lexbuf;
+      DECREASING
+  | "rosnacy" ->
+      L.update_acc lexbuf;
+      INCREASING
+  | "z" ->
+      L.update_acc lexbuf;
+      OF
+  | "kolekcja" ->
+      L.update_acc lexbuf;
+      COLLECTION
+  | "enumeracja" ->
+      L.update_acc lexbuf;
+      ENUM
+  | "calkowita" ->
+      L.update_acc lexbuf;
+      INTEGER
+  | "pieniadze" ->
+      L.update_acc lexbuf;
+      MONEY
+  | "tekst" ->
+      L.update_acc lexbuf;
+      TEXT
+  | "dziesietna" ->
+      L.update_acc lexbuf;
+      DECIMAL
+  | "czas" ->
+      L.update_acc lexbuf;
+      DATE
+  | "czas trwania" ->
+      L.update_acc lexbuf;
+      DURATION
+  | "zerojedynkowy" ->
+      L.update_acc lexbuf;
+      BOOLEAN
+  | "suma" ->
+      L.update_acc lexbuf;
+      SUM
+  | "spelnione" ->
+      L.update_acc lexbuf;
+      FILLED
+  | "definicja" ->
+      L.update_acc lexbuf;
+      DEFINITION
+  | "etykieta" ->
+      L.update_acc lexbuf;
+      LABEL
+  | "wyjatek" ->
+      L.update_acc lexbuf;
+      EXCEPTION
+  | "wynosi" ->
+      L.update_acc lexbuf;
+      DEFINED_AS
+  | "pasuje" ->
+      L.update_acc lexbuf;
+      MATCH
+  | "ze wzorem" ->
+      L.update_acc lexbuf;
+      WITH
+  | "pod warunkiem" ->
+      L.update_acc lexbuf;
+      UNDER_CONDITION
+  | "jezeli" ->
+      L.update_acc lexbuf;
+      IF
+  | "konsekwencja" ->
+      L.update_acc lexbuf;
+      CONSEQUENCE
+  | "wtedy" ->
+      L.update_acc lexbuf;
+      THEN
+  | "inaczej" ->
+      L.update_acc lexbuf;
+      ELSE
+  | "condition" ->
+      L.update_acc lexbuf;
+      CONDITION
+  | "zawartosc" ->
+      L.update_acc lexbuf;
+      CONTENT
+  | "struktura" ->
+      L.update_acc lexbuf;
+      STRUCT
+  | "asercja" ->
+      L.update_acc lexbuf;
+      ASSERTION
+  | "rozna" ->
+      L.update_acc lexbuf;
+      VARIES
+  | "wraz z" ->
+      L.update_acc lexbuf;
+      WITH_V
+  | "dla" ->
+      L.update_acc lexbuf;
+      FOR
+  | "wszystkie" ->
+      L.update_acc lexbuf;
+      ALL
+  | "mamy" ->
+      L.update_acc lexbuf;
+      WE_HAVE
+  | "staloprzecinkowa" ->
+      L.update_acc lexbuf;
+      FIXED
+  | "przez" ->
+      L.update_acc lexbuf;
+      BY
+  | "zasada" ->
+      (* 0xE8 is è *)
+      L.update_acc lexbuf;
+      RULE
+  | "istnieje" ->
+      L.update_acc lexbuf;
+      EXISTS
+  | "in" ->
+      L.update_acc lexbuf;
+      IN
+  | "takie ze" ->
+      L.update_acc lexbuf;
+      SUCH
+  | "to" ->
+      L.update_acc lexbuf;
+      THAT
+  | "i" ->
+      L.update_acc lexbuf;
+      AND
+  | "lub" ->
+      L.update_acc lexbuf;
+      OR
+  | "xor" ->
+      L.update_acc lexbuf;
+      XOR
+  | "nie" ->
+      L.update_acc lexbuf;
+      NOT
+  | "maximum" ->
+      L.update_acc lexbuf;
+      MAXIMUM
+  | "minimum" ->
+      L.update_acc lexbuf;
+      MINIMUM
+  | "filtr" ->
+      L.update_acc lexbuf;
+      FILTER
+  | "mapuj" ->
+      L.update_acc lexbuf;
+      MAP
+  | "poczatkowy" ->
+      L.update_acc lexbuf;
+      INIT
+  | "liczba" ->
+      L.update_acc lexbuf;
+      CARDINAL
+  | "prawda" ->
+      L.update_acc lexbuf;
+      TRUE
+  | "falsz" ->
+      L.update_acc lexbuf;
+      FALSE
+  | "rok" ->
+      L.update_acc lexbuf;
+      YEAR
+  | "miesiac" ->
+      L.update_acc lexbuf;
+      MONTH
+  | "dzien" ->
+      L.update_acc lexbuf;
+      DAY
+  | 0x24, Star white_space, '0' .. '9', Star ('0' .. '9' | ','), Opt ('.', Rep ('0' .. '9', 0 .. 2))
+    ->
+      let extract_parts = R.regexp "([0-9]([0-9,]*[0-9]|))(.([0-9]{0,2})|)" in
+      let full_str = Utf8.lexeme lexbuf in
+      let only_numbers_str = String.trim (String.sub full_str 1 (String.length full_str - 1)) in
+      let parts = R.get_substring (R.exec ~rex:extract_parts only_numbers_str) in
+      (* Integer literal*)
+      let units = parts 1 in
+      let remove_commas = R.regexp "," in
+      let units =
+        Runtime.integer_of_string (R.substitute ~rex:remove_commas ~subst:(fun _ -> "") units)
+      in
+      let cents =
+        try Runtime.integer_of_string (parts 4) with Not_found -> Runtime.integer_of_int 0
+      in
+      L.update_acc lexbuf;
+      MONEY_AMOUNT (units, cents)
+  | Plus '0' .. '9', '.', Star '0' .. '9' ->
+      let extract_code_title = R.regexp "([0-9]+)\\.([0-9]*)" in
+      let dec_parts = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
+      (* Integer literal*)
+      L.update_acc lexbuf;
+      DECIMAL_LITERAL
+        (Runtime.integer_of_string (dec_parts 1), Runtime.integer_of_string (dec_parts 2))
+  | "<=@" ->
+      L.update_acc lexbuf;
+      LESSER_EQUAL_DATE
+  | "<@" ->
+      L.update_acc lexbuf;
+      LESSER_DATE
+  | ">=@" ->
+      L.update_acc lexbuf;
+      GREATER_EQUAL_DATE
+  | ">@" ->
+      L.update_acc lexbuf;
+      GREATER_DATE
+  | "-@" ->
+      L.update_acc lexbuf;
+      MINUSDATE
+  | "+@" ->
+      L.update_acc lexbuf;
+      PLUSDATE
+  | "<=^" ->
+      L.update_acc lexbuf;
+      LESSER_EQUAL_DURATION
+  | "<^" ->
+      L.update_acc lexbuf;
+      LESSER_DURATION
+  | ">=^" ->
+      L.update_acc lexbuf;
+      GREATER_EQUAL_DURATION
+  | ">^" ->
+      L.update_acc lexbuf;
+      GREATER_DURATION
+  | "+^" ->
+      L.update_acc lexbuf;
+      PLUSDURATION
+  | "-^" ->
+      L.update_acc lexbuf;
+      MINUSDURATION
+  | "<=", 0x24 ->
+      L.update_acc lexbuf;
+      LESSER_EQUAL_MONEY
+  | '<', 0x24 ->
+      L.update_acc lexbuf;
+      LESSER_MONEY
+  | ">=", 0x24 ->
+      L.update_acc lexbuf;
+      GREATER_EQUAL_MONEY
+  | '>', 0x24 ->
+      L.update_acc lexbuf;
+      GREATER_MONEY
+  | '+', 0x24 ->
+      L.update_acc lexbuf;
+      PLUSMONEY
+  | '-', 0x24 ->
+      L.update_acc lexbuf;
+      MINUSMONEY
+  | '*', 0x24 ->
+      L.update_acc lexbuf;
+      MULTMONEY
+  | '/', 0x24 ->
+      L.update_acc lexbuf;
+      DIVMONEY
+  | "<=." ->
+      L.update_acc lexbuf;
+      LESSER_EQUAL_DEC
+  | "<." ->
+      L.update_acc lexbuf;
+      LESSER_DEC
+  | ">=." ->
+      L.update_acc lexbuf;
+      GREATER_EQUAL_DEC
+  | ">." ->
+      L.update_acc lexbuf;
+      GREATER_DEC
+  | "+." ->
+      L.update_acc lexbuf;
+      PLUSDEC
+  | "-." ->
+      L.update_acc lexbuf;
+      MINUSDEC
+  | "*." ->
+      L.update_acc lexbuf;
+      MULTDEC
+  | "/." ->
+      L.update_acc lexbuf;
+      DIVDEC
+  | "<=" ->
+      L.update_acc lexbuf;
+      LESSER_EQUAL
+  | '<' ->
+      L.update_acc lexbuf;
+      LESSER
+  | ">=" ->
+      L.update_acc lexbuf;
+      GREATER_EQUAL
+  | '>' ->
+      L.update_acc lexbuf;
+      GREATER
+  | '+' ->
+      L.update_acc lexbuf;
+      PLUS
+  | '-' ->
+      L.update_acc lexbuf;
+      MINUS
+  | '*' ->
+      L.update_acc lexbuf;
+      MULT
+  | '/' ->
+      L.update_acc lexbuf;
+      DIV
+  | "!=" ->
+      L.update_acc lexbuf;
+      NOT_EQUAL
+  | '=' ->
+      L.update_acc lexbuf;
+      EQUAL
+  | '%' ->
+      L.update_acc lexbuf;
+      PERCENT
+  | '(' ->
+      L.update_acc lexbuf;
+      LPAREN
+  | ')' ->
+      L.update_acc lexbuf;
+      RPAREN
+  | '{' ->
+      L.update_acc lexbuf;
+      LBRACKET
+  | '}' ->
+      L.update_acc lexbuf;
+      RBRACKET
+  | '[' ->
+      L.update_acc lexbuf;
+      LSQUARE
+  | ']' ->
+      L.update_acc lexbuf;
+      RSQUARE
+  | '|' ->
+      L.update_acc lexbuf;
+      VERTICAL
+  | ':' ->
+      L.update_acc lexbuf;
+      COLON
+  | ';' ->
+      L.update_acc lexbuf;
+      SEMICOLON
+  | "--" ->
+      L.update_acc lexbuf;
+      ALT
+  | '.' ->
+      L.update_acc lexbuf;
+      DOT
+  | uppercase, Star (uppercase | lowercase | '0' .. '9' | '_' | '\'') ->
+      (* Name of constructor *)
+      L.update_acc lexbuf;
+      CONSTRUCTOR (Utf8.lexeme lexbuf)
+  | lowercase, Star (lowercase | uppercase | '0' .. '9' | '_' | '\'') ->
+      (* Name of variable *)
+      L.update_acc lexbuf;
+      IDENT (Utf8.lexeme lexbuf)
+  | Plus '0' .. '9' ->
+      (* Integer literal*)
+      L.update_acc lexbuf;
+      INT_LITERAL (Runtime.integer_of_string (Utf8.lexeme lexbuf))
+  | _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
+
+(** Main lexing function used outside code blocks *)
+let lex_law (lexbuf : lexbuf) : token =
+  let prev_lexeme = Utf8.lexeme lexbuf in
+  let prev_pos = lexing_positions lexbuf in
+  match%sedlex lexbuf with
+  | "```catala" ->
+      L.is_code := true;
+      L.code_string_acc := "";
+
+      BEGIN_CODE
+  | eof -> EOF
+  | '#', Star white_space, "Master file" -> MASTER_FILE
+  | '>', Star white_space, "Begin metadata" -> BEGIN_METADATA
+  | '>', Star white_space, "End metadata" -> END_METADATA
+  | ( '>',
+      Star white_space,
+      "Include:",
+      Star white_space,
+      Plus (Compl ('@' | '\n')),
+      Star white_space,
+      Opt ('@', Star white_space, "p.", Star white_space, Plus '0' .. '9', Star white_space),
+      '\n' ) ->
+      let extract_components =
+        R.regexp ">\\s*Include\\:\\s*([^@\\n]+)\\s*(@\\s*p\\.\\s*([0-9]+)|)"
+      in
+      let get_component = R.get_substring (R.exec ~rex:extract_components (Utf8.lexeme lexbuf)) in
+      let name = get_component 1 in
+      let pages = try Some (int_of_string (get_component 3)) with Not_found -> None in
+      let pos = lexing_positions lexbuf in
+      if Filename.extension name = ".pdf" then
+        LAW_INCLUDE (Ast.PdfFile ((name, Pos.from_lpos pos), pages))
+      else LAW_INCLUDE (Ast.CatalaFile (name, Pos.from_lpos pos))
+  | '#', Plus '#', Star white_space, Plus (Compl ('[' | ']' | '\n')), Star white_space, '\n' ->
+      get_law_heading lexbuf
+  | ( '#',
+      Plus '#',
+      Star white_space,
+      '[',
+      Star white_space,
+      Plus (Compl ']'),
+      Star white_space,
+      ']',
+      '\n' ) ->
+      let extract_article_title = R.regexp "([#]+)\\s*\\[([^\\]]+)\\]" in
+      let get_substring =
+        R.get_substring (R.exec ~rex:extract_article_title (Utf8.lexeme lexbuf))
+      in
+      let title = get_substring 2 in
+      let get_new_lines = R.regexp "\n" in
+      let new_lines_count =
+        try Array.length (R.extract ~rex:get_new_lines (Utf8.lexeme lexbuf)) with Not_found -> 0
+      in
+      (* the -1 is here to compensate for Sedlex's automatic newline detection around token *)
+      for _i = 1 to new_lines_count - 1 do
+        new_line lexbuf
+      done;
+      let precedence = calc_precedence (get_substring 1) in
+
+      LAW_ARTICLE (title, None, None, precedence)
+  | Plus (Compl ('/' | '#' | '`' | '>')) -> LAW_TEXT (Utf8.lexeme lexbuf)
+  | _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
+
+(** Entry point of the lexer, distributes to {!val: lex_code} or {!val: lex_law} depending of {!val:
+    Surface.Lexer.is_code}. *)
+let lexer (lexbuf : lexbuf) : token = if !L.is_code then lex_code lexbuf else lex_law lexbuf
--- a/src/catala/surface/lexer_pl.mli
+++ b/src/catala/surface/lexer_pl.mli
@ -0,0 +1,15 @@
+(* This file is part of the Catala compiler, a specification language for tax and social benefits
+   computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
+   <denis.merigoux@inria.fr>
+
+   Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+   in compliance with the License. You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software distributed under the License
+   is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+   or implied. See the License for the specific language governing permissions and limitations under
+   the License. *)
+
+include Lexer.LocalisedLexer
--- a/src/catala/surface/parser_driver.ml
+++ b/src/catala/surface/parser_driver.ml
@ -226,11 +226,13 @@ end
 module Parser_NonVerbose = ParserAux (Lexer)
 module Parser_En = ParserAux (Lexer_en)
 module Parser_Fr = ParserAux (Lexer_fr)
+module Parser_Pl = ParserAux (Lexer_pl)

 let localised_parser : Cli.frontend_lang -> lexbuf -> Ast.source_file_or_master = function
  | `NonVerbose -> Parser_NonVerbose.commands_or_includes
  | `En -> Parser_En.commands_or_includes
  | `Fr -> Parser_Fr.commands_or_includes
+  | `Pl -> Parser_Pl.commands_or_includes

 (** {1 Parsing multiple files} *)

--- a/src/catala/utils/cli.ml
+++ b/src/catala/utils/cli.ml
@ -12,12 +12,12 @@
   or implied. See the License for the specific language governing permissions and limitations under
   the License. *)

-type frontend_lang = [ `Fr | `En | `NonVerbose ]
+type frontend_lang = [ `Fr | `En | `NonVerbose | `Pl]

-type backend_lang = [ `Fr | `En ]
+type backend_lang = [ `Fr | `En | `Pl ]

 let to_backend_lang (lang : frontend_lang) : backend_lang =
-  match lang with `En | `NonVerbose -> `En | `Fr -> `Fr
+  match lang with `En | `NonVerbose -> `En | `Fr -> `Fr | `Pl -> `Pl

 (** Source files to be compiled *)
 let source_files : string list ref = ref []
@ -57,7 +57,8 @@ let unstyled = Arg.(value & flag & info [ "unstyled" ] ~doc:"Removes styling fro
 let optimize = Arg.(value & flag & info [ "optimize"; "O" ] ~doc:"Run compiler optimizations")

 let trace_opt =
-  Arg.(value & flag & info [ "trace"; "t" ] ~doc:"Displays a trace of the interpreter's computation")
+  Arg.(
+    value & flag & info [ "trace"; "t" ] ~doc:"Displays a trace of the interpreter's computation")

 let wrap_weaved_output =
  Arg.(
--- a/src/catala/utils/cli.mli
+++ b/src/catala/utils/cli.mli
@ -12,9 +12,9 @@
   or implied. See the License for the specific language governing permissions and limitations under
   the License. *)

-type frontend_lang = [ `En | `Fr | `NonVerbose ]
+type frontend_lang = [ `En | `Fr | `NonVerbose | `Pl ]

-type backend_lang = [ `En | `Fr ]
+type backend_lang = [ `En | `Fr | `Pl ]

 val to_backend_lang : frontend_lang -> backend_lang