Add non verbose lexing

2024-11-08 07:51:43 +03:00 · 2020-08-03 18:07:09 +02:00 · 2020-08-03 18:07:09 +02:00 · 84835b0a79
commit 84835b0a79
parent 15c1df9fa7
10 changed files with 452 additions and 37 deletions
--- a/catala.opam
+++ b/catala.opam
@ -21,6 +21,7 @@ depends: [
  "cmdliner" {>= "1.0.4"}
  "re" {>= "1.9.0"}
  "dune" {build}
+  "ocamlgraph" {>= "1.8.8"}
 ]
 build: [
  ["dune" "subst"] {pinned}
--- a/1
+++ b/1
@ -27,6 +27,7 @@
   (cmdliner (>= 1.0.4))
   (re (>= 1.9.0))
   (dune (and :build ))
+   (ocamlgraph (>= 1.8.8))
 )
 )

--- a/src/catala/cli.ml
+++ b/src/catala/cli.ml
@ -45,9 +45,14 @@ let language =
  Arg.(
    value
    & opt (some string) None
-    & info [ "l"; "language" ] ~docv:"LANG" ~doc:"Input language among: en, fr (default fr)")
+    & info [ "l"; "language" ] ~docv:"LANG" ~doc:"Input language among: en, fr, nv (default nv)")

-type language_option = Fr | En
+type language_option = [ `Fr | `En | `NonVerbose ]
+
+type reduced_lang_option = [ `Fr | `En ]
+
+let reduce_lang (lang : language_option) : reduced_lang_option =
+  match lang with `En | `NonVerbose -> `En | `Fr -> `Fr

 let output =
  Arg.(
--- a/src/catala/driver.ml
+++ b/src/catala/driver.ml
@ -25,13 +25,13 @@ let driver (source_file : string) (debug : bool) (wrap_weaved_output : bool)
  let language =
    match language with
    | Some l ->
-        if l = "fr" then Cli.Fr
-        else if l = "en" then Cli.En
+        if l = "fr" then `Fr
+        else if l = "en" then `En
        else begin
          Cli.error_print (Printf.sprintf "The selected language (%s) is not supported by Catala" l);
          exit 1
        end
-    | None -> Cli.Fr
+    | None -> `NonVerbose
  in
  let backend =
    if backend = "Makefile" then Cli.Makefile
@ -68,8 +68,8 @@ let driver (source_file : string) (debug : bool) (wrap_weaved_output : bool)
      try
        let weaved_output =
          match backend with
-          | Cli.Latex -> Latex.ast_to_latex program language
-          | Cli.Html -> Html.ast_to_html program pygmentize_loc language
+          | Cli.Latex -> Latex.ast_to_latex program (Cli.reduce_lang language)
+          | Cli.Html -> Html.ast_to_html program pygmentize_loc (Cli.reduce_lang language)
          | _ -> assert false
        in
        let output_file =
--- a/src/catala/literate/html.ml
+++ b/src/catala/literate/html.ml
@ -24,6 +24,7 @@ let pre_html (s : string) = s

 let wrap_html (code : string) (source_files : string list) (custom_pygments : string option)
    (language : Cli.language_option) : string =
+  let language = C.reduce_lang language in
  let pygments = match custom_pygments with Some p -> p | None -> "pygmentize" in
  let css_file = Filename.temp_file "catala_css_pygments" "" in
  let pygments_args = [| "-f"; "html"; "-S"; "colorful"; "-a"; ".catala-code" |] in
@ -56,15 +57,15 @@ let wrap_html (code : string) (source_files : string list) (custom_pygments : st
     %s"
    css_as_string
    ( match language with
-    | C.Fr -> "Implémentation de texte législatif"
-    | C.En -> "Legislative text implementation" )
-    (match language with C.Fr -> "Document généré par" | C.En -> "Document generated by")
+    | `Fr -> "Implémentation de texte législatif"
+    | `En -> "Legislative text implementation" )
+    (match language with `Fr -> "Document généré par" | `En -> "Document generated by")
    ( match Build_info.V1.version () with
    | None -> "n/a"
    | Some v -> Build_info.V1.Version.to_string v )
    ( match language with
-    | C.Fr -> "Fichiers sources tissés dans ce document"
-    | C.En -> "Source files weaved in this document" )
+    | `Fr -> "Fichiers sources tissés dans ce document"
+    | `En -> "Source files weaved in this document" )
    (String.concat "\n"
       (List.map
          (fun filename ->
@ -76,14 +77,12 @@ let wrap_html (code : string) (source_files : string list) (custom_pygments : st
            in
            Printf.sprintf "<li><tt>%s</tt>, %s %s</li>"
              (pre_html (Filename.basename filename))
-              ( match language with
-              | C.Fr -> "dernière modification le"
-              | C.En -> "last modification" )
+              (match language with `Fr -> "dernière modification le" | `En -> "last modification")
              ftime)
          source_files))
    code

-let pygmentize_code (c : string Pos.marked) (language : C.language_option)
+let pygmentize_code (c : string Pos.marked) (language : C.reduced_lang_option)
    (custom_pygments : string option) : string =
  C.debug_print (Printf.sprintf "Pygmenting the code chunk %s" (Pos.to_string (Pos.get_position c)));
  let temp_file_in = Filename.temp_file "catala_html_pygments" "in" in
@ -92,7 +91,7 @@ let pygmentize_code (c : string Pos.marked) (language : C.language_option)
  Printf.fprintf oc "%s" (Pos.unmark c);
  close_out oc;
  let pygments = match custom_pygments with Some p -> p | None -> "pygmentize" in
-  let pygments_lexer = match language with C.Fr -> "catala_fr" | C.En -> "catala_en" in
+  let pygments_lexer = match language with `Fr -> "catala_fr" | `En -> "catala_en" in
  let pygments_args =
    [|
      "-l";
@ -122,7 +121,7 @@ let pygmentize_code (c : string Pos.marked) (language : C.language_option)
 type program_state = InsideArticle | OutsideArticle

 let program_item_to_html (i : A.program_item) (custom_pygments : string option)
-    (language : C.language_option) (state : program_state) : string * program_state =
+    (language : C.reduced_lang_option) (state : program_state) : string * program_state =
  let closing_div =
    (* First we terminate the div of the previous article if need be *)
    match (i, state) with
@ -146,7 +145,7 @@ let program_item_to_html (i : A.program_item) (custom_pygments : string option)
        P.sprintf
          "<div class='article-container'>\n\n<div class='article-title'><a href='%s'>%s</a></div>"
          ( match (a.law_article_id, language) with
-          | Some id, C.Fr ->
+          | Some id, `Fr ->
              let ltime = Unix.localtime (Unix.time ()) in
              P.sprintf "https://beta.legifrance.gouv.fr/codes/id/%s/%d-%02d-%02d" id
                (1900 + ltime.Unix.tm_year) (ltime.Unix.tm_mon + 1) ltime.Unix.tm_mday
@ -179,7 +178,7 @@ let program_item_to_html (i : A.program_item) (custom_pygments : string option)
  (closing_div ^ item_string, new_state)

 let ast_to_html (program : A.program) (custom_pygments : string option)
-    (language : C.language_option) : string =
+    (language : C.reduced_lang_option) : string =
  let i_s, _ =
    List.fold_left
      (fun (acc, state) i ->
--- a/src/catala/literate/latex.ml
+++ b/src/catala/literate/latex.ml
@ -33,6 +33,7 @@ let pre_latexify (s : string) =

 let wrap_latex (code : string) (source_files : string list) (custom_pygments : string option)
    (language : Cli.language_option) =
+  let language = C.reduce_lang language in
  Printf.sprintf
    "\\documentclass[11pt, a4paper]{article}\n\n\
     \\usepackage[T1]{fontenc}\n\
@ -76,20 +77,20 @@ let wrap_latex (code : string) (source_files : string list) (custom_pygments : s
     \\[\\star\\star\\star\\]\\\\\n\
     %s\n\n\
     \\end{document}"
-    (match language with C.Fr -> "french" | C.En -> "english")
+    (match language with `Fr -> "french" | `En -> "english")
    ( match custom_pygments with
    | None -> ""
    | Some p -> Printf.sprintf "\\renewcommand{\\MintedPygmentize}{%s}" p )
    ( match language with
-    | C.Fr -> "Implémentation de texte législatif"
-    | C.En -> "Legislative text implementation" )
-    (match language with C.Fr -> "Document généré par" | C.En -> "Document generated by")
+    | `Fr -> "Implémentation de texte législatif"
+    | `En -> "Legislative text implementation" )
+    (match language with `Fr -> "Document généré par" | `En -> "Document generated by")
    ( match Build_info.V1.version () with
    | None -> "n/a"
    | Some v -> Build_info.V1.Version.to_string v )
    ( match language with
-    | C.Fr -> "Fichiers sources tissés dans ce document"
-    | C.En -> "Source files weaved in this document" )
+    | `Fr -> "Fichiers sources tissés dans ce document"
+    | `En -> "Source files weaved in this document" )
    (String.concat ","
       (List.map
          (fun filename ->
@ -101,9 +102,7 @@ let wrap_latex (code : string) (source_files : string list) (custom_pygments : s
            in
            Printf.sprintf "\\item\\texttt{%s}, %s %s"
              (pre_latexify (Filename.basename filename))
-              ( match language with
-              | C.Fr -> "dernière modification le"
-              | C.En -> "last modification" )
+              (match language with `Fr -> "dernière modification le" | `En -> "last modification")
              ftime)
          source_files))
    code
@ -123,7 +122,7 @@ let math_syms_replace (c : string) : string =
  in
  R.substitute ~rex:syms ~subst:syms2cmd c

-let program_item_to_latex (i : A.program_item) (language : C.language_option) : string =
+let program_item_to_latex (i : A.program_item) (language : C.reduced_lang_option) : string =
  match i with
  | A.LawHeading (title, precedence) ->
      P.sprintf "\\%ssection*{%s}"
@ -138,10 +137,10 @@ let program_item_to_latex (i : A.program_item) (language : C.language_option) :
         \\end{minted}"
        (pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
        (Pos.get_start_line (Pos.get_position c))
-        (match language with C.Fr -> "catala_fr" | C.En -> "catala_en")
+        (match language with `Fr -> "catala_fr" | `En -> "catala_en")
        (math_syms_replace (Pos.unmark c))
  | A.MetadataBlock (_, c) ->
-      let metadata_title = match language with C.Fr -> "Métadonnées" | C.En -> "Metadata" in
+      let metadata_title = match language with `Fr -> "Métadonnées" | `En -> "Metadata" in
      P.sprintf
        "\\begin{tcolorbox}[colframe=OliveGreen, breakable, \
         title=\\textcolor{black}{\\texttt{%s}},title after \
@ -153,7 +152,7 @@ let program_item_to_latex (i : A.program_item) (language : C.language_option) :
        metadata_title metadata_title
        (Pos.get_start_line (Pos.get_position c))
        (pre_latexify (Filename.basename (Pos.get_file (Pos.get_position c))))
-        (match language with C.Fr -> "catala_fr" | C.En -> "catala_en")
+        (match language with `Fr -> "catala_fr" | `En -> "catala_en")
        (math_syms_replace (Pos.unmark c))
  | A.LawInclude (A.PdfFile ((file, _), page)) ->
      let label = file ^ match page with None -> "" | Some p -> P.sprintf "_page_%d," p in
@ -165,5 +164,5 @@ let program_item_to_latex (i : A.program_item) (language : C.language_option) :
        file label
  | A.LawInclude (A.CatalaFile _ | A.LegislativeText _) -> ""

-let ast_to_latex (program : A.program) (language : C.language_option) : string =
+let ast_to_latex (program : A.program) (language : C.reduced_lang_option) : string =
  String.concat "\n\n" (List.map (fun i -> program_item_to_latex i language) program.program_items)
--- a/src/catala/parsing/lexer.ml
+++ b/src/catala/parsing/lexer.ml
@ -0,0 +1,397 @@
+(* This file is part of the Catala compiler, a specification language for tax and social benefits
+   computation rules. Copyright (C) 2020 Inria, contributor: Denis Merigoux
+   <denis.merigoux@inria.fr>
+
+   Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+   in compliance with the License. You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software distributed under the License
+   is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+   or implied. See the License for the specific language governing permissions and limitations under
+   the License. *)
+
+open Parser
+open Sedlexing
+module R = Re.Pcre
+
+let is_code : bool ref = ref false
+
+let code_string_acc : string ref = ref ""
+
+let update_acc (lexbuf : lexbuf) : unit = code_string_acc := !code_string_acc ^ Utf8.lexeme lexbuf
+
+let token_list : (string * token) list =
+  [
+    ("scope", SCOPE);
+    ("~", CONSEQUENCE);
+    ("data", DATA);
+    ("depends on", DEPENDS);
+    ("declaration", DECLARATION);
+    ("context", CONTEXT);
+    ("decreasing", DECREASING);
+    ("increasing", INCREASING);
+    ("of", OF);
+    ("collection", COLLECTION);
+    ("enumeration", ENUM);
+    ("integer", INTEGER);
+    ("amount", MONEY);
+    ("text", TEXT);
+    ("decimal", DECIMAL);
+    ("date", DATE);
+    ("boolean", BOOLEAN);
+    ("sum", SUM);
+    ("fulfilled", FILLED);
+    ("def", DEFINITION);
+    ("equals", DEFINED_AS);
+    ("match", MATCH);
+    ("with pattern", WITH);
+    ("?", UNDER_CONDITION);
+    ("if", IF);
+    ("then", THEN);
+    ("else", ELSE);
+    ("content", CONTENT);
+    ("structure", STRUCT);
+    ("optional", OPTIONAL);
+    ("assertion", ASSERTION);
+    ("varies", VARIES);
+    ("with", WITH_V);
+    ("for", FOR);
+    ("all", ALL);
+    ("we have", WE_HAVE);
+    ("fixed", FIXED);
+    ("by", BY);
+    ("rule", RULE);
+    ("exists", EXISTS);
+    ("such", SUCH);
+    ("that", THAT);
+    ("now", NOW);
+    ("and", AND);
+    ("or", OR);
+    ("not", NOT);
+    ("number", CARDINAL);
+    ("year", YEAR);
+    ("true", TRUE);
+    ("false", FALSE);
+  ]
+  @ Lexer_fr.token_list_language_agnostic
+
+let rec lex_code (lexbuf : lexbuf) : token =
+  match%sedlex lexbuf with
+  | white_space ->
+      (* Whitespaces *)
+      update_acc lexbuf;
+      lex_code lexbuf
+  | '#', Star (Compl '\n'), '\n' ->
+      (* Comments *)
+      update_acc lexbuf;
+      lex_code lexbuf
+  | "*/" ->
+      (* End of code section *)
+      is_code := false;
+      END_CODE !code_string_acc
+  | "scope" ->
+      update_acc lexbuf;
+      SCOPE
+  | "data" ->
+      update_acc lexbuf;
+      DATA
+  | "depends on" ->
+      update_acc lexbuf;
+      DEPENDS
+  | "declaration" ->
+      update_acc lexbuf;
+      DECLARATION
+  | "context" ->
+      update_acc lexbuf;
+      CONTEXT
+  | "decreasing" ->
+      update_acc lexbuf;
+      DECREASING
+  | "increasing" ->
+      update_acc lexbuf;
+      INCREASING
+  | "of" ->
+      update_acc lexbuf;
+      OF
+  | "collection" ->
+      update_acc lexbuf;
+      COLLECTION
+  | "enumeration" ->
+      update_acc lexbuf;
+      ENUM
+  | "int" ->
+      update_acc lexbuf;
+      INTEGER
+  | "amount" ->
+      update_acc lexbuf;
+      MONEY
+  | "text" ->
+      update_acc lexbuf;
+      TEXT
+  | "dec" ->
+      update_acc lexbuf;
+      DECIMAL
+  | "date" ->
+      update_acc lexbuf;
+      DATE
+  | "bool" ->
+      update_acc lexbuf;
+      BOOLEAN
+  | "sum" ->
+      update_acc lexbuf;
+      SUM
+  | "fulfilled" ->
+      update_acc lexbuf;
+      FILLED
+  | "def" ->
+      update_acc lexbuf;
+      DEFINITION
+  | "=" ->
+      update_acc lexbuf;
+      DEFINED_AS
+  | "match" ->
+      update_acc lexbuf;
+      MATCH
+  | "with pattern" ->
+      update_acc lexbuf;
+      WITH
+  | "?" ->
+      update_acc lexbuf;
+      UNDER_CONDITION
+  | "if" ->
+      update_acc lexbuf;
+      IF
+  | "~" ->
+      update_acc lexbuf;
+      CONSEQUENCE
+  | "then" ->
+      update_acc lexbuf;
+      THEN
+  | "else" ->
+      update_acc lexbuf;
+      ELSE
+  | "condition" ->
+      update_acc lexbuf;
+      CONDITION
+  | "content" ->
+      update_acc lexbuf;
+      CONTENT
+  | "structure" ->
+      update_acc lexbuf;
+      STRUCT
+  | "optional" ->
+      update_acc lexbuf;
+      OPTIONAL
+  | "assertion" ->
+      update_acc lexbuf;
+      ASSERTION
+  | "varies" ->
+      update_acc lexbuf;
+      VARIES
+  | "with" ->
+      update_acc lexbuf;
+      WITH_V
+  | "for" ->
+      update_acc lexbuf;
+      FOR
+  | "all" ->
+      update_acc lexbuf;
+      ALL
+  | "we have" ->
+      update_acc lexbuf;
+      WE_HAVE
+  | "fixed" ->
+      update_acc lexbuf;
+      FIXED
+  | "by" ->
+      update_acc lexbuf;
+      BY
+  | "rule" ->
+      (* 0xE8 is è *)
+      update_acc lexbuf;
+      RULE
+  | "exists" ->
+      update_acc lexbuf;
+      EXISTS
+  | "in" ->
+      update_acc lexbuf;
+      IN
+  | "such" ->
+      update_acc lexbuf;
+      SUCH
+  | "that" ->
+      update_acc lexbuf;
+      THAT
+  | "now" ->
+      update_acc lexbuf;
+      NOW
+  | "and" ->
+      update_acc lexbuf;
+      AND
+  | "or" ->
+      update_acc lexbuf;
+      OR
+  | "not" ->
+      update_acc lexbuf;
+      NOT
+  | "number" ->
+      update_acc lexbuf;
+      CARDINAL
+  | "true" ->
+      update_acc lexbuf;
+      TRUE
+  | "false" ->
+      update_acc lexbuf;
+      FALSE
+  | "year" ->
+      update_acc lexbuf;
+      YEAR
+  | 0x24, Star white_space, '0' .. '9', Star ('0' .. '9' | ','), Opt ('.', Rep ('0' .. '9', 0 .. 2))
+    ->
+      let extract_parts = R.regexp "([0-9]([0-9,]*[0-9]|))(.([0-9]{0,2})|)" in
+      let full_str = Utf8.lexeme lexbuf in
+      let only_numbers_str = String.trim (String.sub full_str 1 (String.length full_str - 1)) in
+      let parts = R.get_substring (R.exec ~rex:extract_parts only_numbers_str) in
+      (* Integer literal*)
+      let units = parts 1 in
+      let remove_commas = R.regexp "," in
+      let units = int_of_string (R.substitute ~rex:remove_commas ~subst:(fun _ -> "") units) in
+      let cents = try int_of_string (parts 4) with Not_found -> 0 in
+      update_acc lexbuf;
+      MONEY_AMOUNT (units, cents)
+  | Plus '0' .. '9', '.', Star '0' .. '9' ->
+      let extract_code_title = R.regexp "([0-9]+)\\.([0-9]*)" in
+      let dec_parts = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
+      (* Integer literal*)
+      update_acc lexbuf;
+      DECIMAL_LITERAL (int_of_string (dec_parts 1), int_of_string (dec_parts 2))
+  | "->" ->
+      update_acc lexbuf;
+      ARROW
+  | '.' ->
+      update_acc lexbuf;
+      DOT
+  | "<=" ->
+      update_acc lexbuf;
+      LESSER_EQUAL
+  | '<' ->
+      update_acc lexbuf;
+      LESSER
+  | ">=" ->
+      update_acc lexbuf;
+      GREATER_EQUAL
+  | '>' ->
+      update_acc lexbuf;
+      GREATER
+  | "!=" ->
+      update_acc lexbuf;
+      NOT_EQUAL
+  | '=' ->
+      update_acc lexbuf;
+      EQUAL
+  | '(' ->
+      update_acc lexbuf;
+      LPAREN
+  | ')' ->
+      update_acc lexbuf;
+      RPAREN
+  | '+' ->
+      update_acc lexbuf;
+      PLUS
+  | '-' ->
+      update_acc lexbuf;
+      MINUS
+  | '*' ->
+      update_acc lexbuf;
+      MULT
+  | '%' ->
+      update_acc lexbuf;
+      PERCENT
+  | '/' ->
+      update_acc lexbuf;
+      DIV
+  | '|' ->
+      update_acc lexbuf;
+      VERTICAL
+  | ':' ->
+      update_acc lexbuf;
+      COLON
+  | "--" ->
+      update_acc lexbuf;
+      ALT
+  | uppercase, Star (uppercase | lowercase | '0' .. '9' | '_' | '\'') ->
+      (* Name of constructor *)
+      update_acc lexbuf;
+      CONSTRUCTOR (Utf8.lexeme lexbuf)
+  | lowercase, Star (lowercase | uppercase | '0' .. '9' | '_' | '\'') ->
+      (* Name of variable *)
+      update_acc lexbuf;
+      IDENT (Utf8.lexeme lexbuf)
+  | Plus '0' .. '9' ->
+      (* Integer literal*)
+      update_acc lexbuf;
+      INT_LITERAL (int_of_string (Utf8.lexeme lexbuf))
+  | _ -> Errors.lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf)
+
+let rec lex_law (lexbuf : lexbuf) : token =
+  match%sedlex lexbuf with
+  | '\n' -> lex_law lexbuf
+  | "/*" ->
+      is_code := true;
+      code_string_acc := "";
+      BEGIN_CODE
+  | eof -> EOF
+  | "@@", Star white_space, "Master file", Star white_space, "@@" -> MASTER_FILE
+  | "@@", Star white_space, "Begin metadata", Star white_space, "@@" -> BEGIN_METADATA
+  | "@@", Star white_space, "End metadata", Star white_space, "@@" -> END_METADATA
+  | ( "@@",
+      Star white_space,
+      "Include:",
+      Star white_space,
+      Plus (Compl '@'),
+      Star white_space,
+      Opt ('@', Star white_space, "p.", Star white_space, Plus '0' .. '9', Star white_space),
+      "@@" ) ->
+      let extract_components =
+        R.regexp "@@\\s*Include\\:\\s*([^@]+)\\s*(@\\s*p\\.\\s*([0-9]+)|)@@"
+      in
+      let get_component = R.get_substring (R.exec ~rex:extract_components (Utf8.lexeme lexbuf)) in
+      let name = get_component 1 in
+      let pages = try Some (int_of_string (get_component 3)) with Not_found -> None in
+      let pos = lexing_positions lexbuf in
+      if Filename.extension name = ".pdf" then LAW_INCLUDE (Ast.PdfFile ((name, pos), pages))
+      else if Filename.extension name = ".catala" then LAW_INCLUDE (Ast.CatalaFile (name, pos))
+      else Errors.lexer_error (lexing_positions lexbuf) "this type of file cannot be included"
+  | "@@", Plus (Compl '@'), "@@", Star '+' ->
+      let extract_code_title = R.regexp "@@([^@]+)@@([\\+]*)" in
+      let get_match = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
+      let get_new_lines = R.regexp "\n" in
+      let new_lines_count =
+        try Array.length (R.extract ~rex:get_new_lines (Utf8.lexeme lexbuf)) with Not_found -> 0
+      in
+      for _i = 1 to new_lines_count do
+        new_line lexbuf
+      done;
+      let law_title = get_match 1 in
+      let precedence = String.length (get_match 2) in
+
+      LAW_HEADING (law_title, precedence)
+  | "@", Plus (Compl '@'), "@" ->
+      let extract_article_title = R.regexp "@([^@]+)@" in
+      let title = R.get_substring (R.exec ~rex:extract_article_title (Utf8.lexeme lexbuf)) 1 in
+      let get_new_lines = R.regexp "\n" in
+      let new_lines_count =
+        try Array.length (R.extract ~rex:get_new_lines (Utf8.lexeme lexbuf)) with Not_found -> 0
+      in
+      (* the -1 is here to compensate for Sedlex's automatic newline detection around token *)
+      for _i = 1 to new_lines_count - 1 do
+        new_line lexbuf
+      done;
+
+      LAW_ARTICLE (title, None, None)
+  | Plus (Compl ('@' | '/' | '\n')) -> LAW_TEXT (Utf8.lexeme lexbuf)
+  | _ -> Errors.lexer_error (lexing_positions lexbuf) (Utf8.lexeme lexbuf)
+
+let lexer lexbuf = if !is_code then lex_code lexbuf else lex_law lexbuf
--- a/src/catala/parsing/parser_driver.ml
+++ b/src/catala/parsing/parser_driver.ml
@ -142,10 +142,16 @@ let rec parse_source_files (source_files : string list) (language : Cli.language
      try
        Parse_utils.current_file := source_file;
        let lexer_lang =
-          match language with Cli.Fr -> Lexer_fr.lexer_fr | Cli.En -> Lexer_en.lexer_en
+          match language with
+          | `Fr -> Lexer_fr.lexer_fr
+          | `En -> Lexer_en.lexer_en
+          | `NonVerbose -> Lexer.lexer
        in
        let token_list_lang =
-          match language with Cli.Fr -> Lexer_fr.token_list_fr | Cli.En -> Lexer_en.token_list_en
+          match language with
+          | `Fr -> Lexer_fr.token_list_fr
+          | `En -> Lexer_en.token_list_en
+          | `NonVerbose -> Lexer.token_list
        in
        let commands_or_includes =
          sedlex_with_menhir lexer_lang token_list_lang Parser.Incremental.source_file_or_master
--- a/src/legifrance_catala/main.ml
+++ b/src/legifrance_catala/main.ml
@ -175,7 +175,7 @@ let driver (file : string) (debug : bool) (client_id : string) (client_secret :
  if debug then Catala.Cli.debug_flag := true;
  let access_token = Api.get_token client_id client_secret in
  (* LegiFrance is only supported for French texts *)
-  let program = Catala.Parser_driver.parse_source_files [ file ] Catala.Cli.Fr in
+  let program = Catala.Parser_driver.parse_source_files [ file ] `Fr in
  let article_text_acc =
    List.fold_left
      (fun article_text_acc item ->
--- a/tests/non_verbose.catala
+++ b/tests/non_verbose.catala
@ -0,0 +1,7 @@
+/*
+declaration scope A:
+  context x content int
+
+scope A:
+  def x ? true ~= 42
+*/