Embed the Pygments lexers in Catala (#428)

This commit is contained in:
Denis Merigoux 2023-03-14 10:25:52 +01:00 committed by GitHub
commit 43ad27037a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 126974 additions and 7635 deletions

View File

@ -73,15 +73,13 @@ Next, install all the packages that Catala depends on with
This should ensure everything is set up for developing on the Catala compiler!
The Python dependencies are installed inside a local virtual environment
(`venv`), so for things like syntax coloration in Catala's literate output to
work, you will have to run
(`venv`). The Makefile rules will use it automatically when building the syntax
cheat-sheet, for example, but if you need to otherwise colorise Catala code, or
use generated Python code, you should run the following command once in every
new shell session:
. _python_venv/bin/activate
from the catala directory to enable it. This needs to be done in every new shell
session, unless you use the predefined `make` rules which already account for
that.
**Warning**: the `make dependencies` command does not include the `z3`
dependency required to enable the proof platform feature of Catala. If you wish
to enable support for the `Proof` command of the Catala compiler, you should

View File

@ -40,6 +40,7 @@ depends: [
"visitors" {>= "20200210"}
"zarith" {>= "1.12"}
"zarith_stubs_js" {>= "v0.14.1"}
"crunch" {>= "3.0.0"}
"alcotest" {with-test & >= "1.5.0"}
"odoc" {with-doc}
"ocamlformat" {cataladevmode & = "0.21.0"}

View File

@ -33,6 +33,13 @@ type backend_option_builtin =
type 'a backend_option = [ backend_option_builtin | `Plugin of 'a ]
(** Associates a {!type: Cli.backend_lang} with its string represtation. *)
let languages = ["en", En; "fr", Fr; "pl", Pl]
let language_code =
let rl = List.map (fun (a, b) -> b, a) languages in
fun l -> List.assoc l rl
let backend_option_to_string = function
| `Interpret -> "Interpret"
| `Makefile -> "Makefile"

View File

@ -33,6 +33,11 @@ type backend_option_builtin =
type 'a backend_option = [ backend_option_builtin | `Plugin of 'a ]
val languages : (string * backend_lang) list
val language_code : backend_lang -> string
(** Returns the lowercase two-letter language code *)
val backend_option_to_string : string backend_option -> string
(** [backend_option_to_string backend] returns the string representation of the
given [backend].*)

View File

@ -26,6 +26,12 @@ let finally f k =
f ();
r
let temp_file pfx sfx =
let f = Filename.temp_file pfx sfx in
if not !Cli.debug_flag then
at_exit (fun () -> try Sys.remove f with _ -> ());
f
let with_out_channel filename f =
let oc = open_out filename in
finally (fun () -> close_out oc) (fun () -> f oc)
@ -60,3 +66,45 @@ let get_out_channel ~source_file ~output_file ?ext () =
let get_formatter_of_out_channel ~source_file ~output_file ?ext () =
let f, with_ = get_out_channel ~source_file ~output_file ?ext () in
f, fun fmt -> with_ (fun oc -> with_formatter_of_out_channel oc fmt)
let with_temp_file pfx sfx ?contents f =
let filename = temp_file pfx sfx in
finally (fun () -> Sys.remove filename)
@@ fun () ->
Option.iter
(fun contents ->
with_out_channel filename (fun oc -> output_string oc contents))
contents;
f filename
let contents filename =
with_in_channel filename (fun ic ->
really_input_string ic (in_channel_length ic))
let process_out ?check_exit cmd args =
let check_exit =
let default n =
if n <> 0 then
Printf.ksprintf failwith "Sub-process %s returned with status %d" cmd n
in
Option.value check_exit ~default
in
let aargs = Array.of_list (cmd :: args) in
let ic =
try Unix.open_process_args_in cmd aargs
with Unix.Unix_error (Unix.ENOENT, _, _) ->
Printf.ksprintf failwith "ERROR: program %s not found" cmd
in
let buf = Buffer.create 4096 in
finally (fun () ->
match Unix.close_process_in ic with
| Unix.WEXITED n -> check_exit n
| Unix.WSIGNALED n | Unix.WSTOPPED n ->
Printf.ksprintf failwith "Sub-process %s was killed (%d)" cmd n)
@@ fun () ->
try
while true do
Buffer.add_channel buf ic 4096
done;
assert false
with End_of_file -> Buffer.contents buf

View File

@ -60,3 +60,22 @@ val get_formatter_of_out_channel :
(** [get_output_format ~source_file ~output_file ?ext ()] returns the infered
filename and its corresponding [with_formatter_of_out_channel] function. If
the [output_file] is equal to [Some "-"] returns a wrapper around [stdout]. *)
val temp_file : string -> string -> string
(** Like [Filename.temp_file], but registers the file for deletion at program
exit unless Cli.debug_flag is set. *)
val with_temp_file :
string -> string -> ?contents:string -> (string -> 'a) -> 'a
(** Creates a temp file (with prefix and suffix like [temp_file], optionally
with the given contents, for the lifetime of the supplied function, then
remove it unconditionally *)
val contents : string -> string
(** Reads the contents of a file as a string *)
val process_out : ?check_exit:(int -> unit) -> string -> string list -> string
(** [process_out cmd args] executes the given command with the specified
arguments, and returns the stdout of the process as a string. [check_exit]
is called on the return code of the sub-process, the default is to fail on
anything but 0. *)

View File

@ -17,9 +17,6 @@
open Catala_utils
(** Associates a {!type: Cli.backend_lang} with its string represtation. *)
let languages = ["en", Cli.En; "fr", Cli.Fr; "pl", Cli.Pl]
(** Associates a file extension with its corresponding {!type: Cli.backend_lang}
string representation. *)
let extensions = [".catala_fr", "fr"; ".catala_en", "en"; ".catala_pl", "pl"]
@ -59,7 +56,7 @@ let driver source_file (options : Cli.options) : int =
try List.assoc ext extensions with Not_found -> ext)
in
let language =
try List.assoc l languages
try List.assoc l Cli.languages
with Not_found ->
Errors.raise_error
"The selected language (%s) is not supported by Catala" l

View File

@ -3,6 +3,21 @@
(public_name catala.literate)
(libraries re catala_utils surface ubase uutf))
(rule
(target pygment_lexers.ml)
(action
(progn
(copy
../../syntax_highlighting/en/pygments/catala_en_lexer/lexer.py
lexer_en.py)
(copy
../../syntax_highlighting/fr/pygments/catala_fr_lexer/lexer.py
lexer_fr.py)
(copy
../../syntax_highlighting/pl/pygments/catala_pl_lexer/lexer.py
lexer_pl.py)
(run ocaml-crunch -e py -m plain -o %{target} .))))
(documentation
(package catala)
(mld_files literate))

View File

@ -30,13 +30,6 @@ module C = Cli
(** Converts double lines into HTML newlines. *)
let pre_html (s : string) = String.trim (run_pandoc s `Html)
(** Raise an error if pygments cannot be found *)
let raise_failed_pygments (command : string) (error_code : int) : 'a =
Errors.raise_error
"Weaving to HTML failed: pygmentize command \"%s\" returned with error \
code %d"
command error_code
(** Partial application allowing to remove first code lines of
[<td class="code">] and [<td class="linenos">] generated HTML. Basically,
remove all code block first lines. *)
@ -57,28 +50,19 @@ let wrap_html
(language : Cli.backend_lang)
(fmt : Format.formatter)
(wrapped : Format.formatter -> unit) : unit =
let pygments = "pygmentize" in
let css_file = Filename.temp_file "catala_css_pygments" "" in
let pygments_args =
[| "-f"; "html"; "-S"; "colorful"; "-a"; ".catala-code" |]
let css_as_string =
call_pygmentize ["-f"; "html"; "-S"; "default"; "-a"; ".catala-code"]
in
let cmd =
Format.sprintf "%s %s > %s" pygments
(String.concat " " (Array.to_list pygments_args))
css_file
in
let return_code = Sys.command cmd in
if return_code <> 0 then raise_failed_pygments cmd return_code;
let oc = open_in css_file in
let css_as_string = really_input_string oc (in_channel_length oc) in
close_in oc;
Format.fprintf fmt
"<head>\n\
"<!DOCTYPE html>\n\
<html>\n\
<head>\n\
<style>\n\
%s\n\
</style>\n\
<meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>\n\
</head>\n\
<body>\n\
<h1>%s<br />\n\
<small>%s Catala version %s</small>\n\
</h1>\n\
@ -110,46 +94,29 @@ let wrap_html
(literal_last_modification language)
ftime)
source_files));
wrapped fmt
wrapped fmt;
Format.fprintf fmt "</body>\n</html>\n"
(** Performs syntax highlighting on a piece of code by using Pygments and the
special Catala lexer. *)
let pygmentize_code (c : string Marked.pos) (language : C.backend_lang) : string
=
let pygmentize_code (c : string Marked.pos) (lang : C.backend_lang) : string =
C.debug_print "Pygmenting the code chunk %s"
(Pos.to_string (Marked.get_mark c));
let temp_file_in = Filename.temp_file "catala_html_pygments" "in" in
let temp_file_out = Filename.temp_file "catala_html_pygments" "out" in
let oc = open_out temp_file_in in
Printf.fprintf oc "%s" (Marked.unmark c);
close_out oc;
let pygments = "pygmentize" in
let pygments_lexer = get_language_extension language in
let pygments_args =
[|
"-l";
pygments_lexer;
"-f";
"html";
"-O";
"style=colorful,anchorlinenos=True,lineanchors=\""
^ String.to_ascii (Pos.get_file (Marked.get_mark c))
^ "\",linenos=table,linenostart="
^ string_of_int (Pos.get_start_line (Marked.get_mark c));
"-o";
temp_file_out;
temp_file_in;
|]
let output =
File.with_temp_file "catala_html_pygments" "in" ~contents:(Marked.unmark c)
@@ fun temp_file_in ->
call_pygmentize ~lang
[
"-f";
"html";
"-O";
"anchorlinenos=True,lineanchors="
^ String.to_ascii (Pos.get_file (Marked.get_mark c))
^ ",linenos=table,linenostart="
^ string_of_int (Pos.get_start_line (Marked.get_mark c));
temp_file_in;
]
in
let cmd =
Format.asprintf "%s %s" pygments
(String.concat " " (Array.to_list pygments_args))
in
let return_code = Sys.command cmd in
if return_code <> 0 then raise_failed_pygments cmd return_code;
let oc = open_in temp_file_out in
let output = really_input_string oc (in_channel_length oc) in
close_in oc;
(* Remove code blocks delimiters needed by [Pygments]. *)
let trimmed_output =
output |> remove_cb_first_lines |> remove_cb_last_lines

View File

@ -59,7 +59,8 @@ let wrap_latex
\usepackage{fontspec}
\usepackage[hidelinks]{hyperref}
%s
\usepackage{minted}
\usepackage{fancyvrb}
\usepackage{color}
\usepackage{longtable}
\usepackage{booktabs,tabularx}
\usepackage{newunicodechar}
@ -122,8 +123,10 @@ let wrap_latex
\newunicodechar{}{$\rightarrow$}
\newunicodechar{}{$\neq$}
\newcommand*\FancyVerbStartString{\PYG{l+s}{```catala}}
\newcommand*\FancyVerbStopString{\PYG{l+s}{```}}
%s
\newcommand*\FancyVerbStartString{\PY{l+s}{```catala}}
\newcommand*\FancyVerbStopString{\PY{l+s}{```}}
\fvset{
numbers=left,
@ -157,6 +160,7 @@ codes={\catcode`\$=3\catcode`\^=7}
(match language with Fr -> "\\setmainfont{Marianne}" | _ -> "")
(* for France, we use the official font of the French state design system
https://gouvfr.atlassian.net/wiki/spaces/DB/pages/223019527/Typographie+-+Typography *)
(call_pygmentize ["-f"; "latex"; "-S"; "default"])
(literal_title language)
(literal_generated_by language)
Cli.version
@ -185,6 +189,24 @@ codes={\catcode`\$=3\catcode`\^=7}
(** {1 Weaving} *)
let code_block ~meta lang fmt (code, pos) =
(* Pygments does'nt allow to specify multiple 'verboptions' (escaping bug ?)
so we call it with "nowrap" and write the FancyVrb wrapper ourselves. *)
let pygmentized_code =
let contents = String.concat "" ["```catala\n"; code; "```"] in
File.with_temp_file "catala_latex_pygments" "in" ~contents
@@ fun temp_file_in ->
call_pygmentize ~lang ["-f"; "latex"; "-O"; "nowrap=true"; temp_file_in]
in
Format.fprintf fmt
{latex|\begin{Verbatim}[commandchars=\\\{\},numbers=left,firstnumber=%d,stepnumber=1,label={\hspace*{\fill}\texttt{%s}}%s]|latex}
(Pos.get_start_line pos + 1)
(pre_latexify (Filename.basename (Pos.get_file pos)))
(if meta then ",numbersep=9mm" else "");
Format.pp_print_newline fmt ();
Format.pp_print_string fmt pygmentized_code;
Format.pp_print_string fmt "\\end{Verbatim}\n"
let rec law_structure_to_latex
(language : C.backend_lang)
(print_only_law : bool)
@ -228,15 +250,7 @@ let rec law_structure_to_latex
let block_content = Marked.unmark c in
check_exceeding_lines start_line filename block_content;
update_lines_of_code c;
Format.fprintf fmt
"\\begin{minted}[label={\\hspace*{\\fill}\\texttt{%s}},firstnumber=%d]{%s}\n\
```catala\n\
%s```\n\
\\end{minted}"
(pre_latexify (Filename.basename (Pos.get_file (Marked.get_mark c))))
(Pos.get_start_line (Marked.get_mark c) + 1)
(get_language_extension language)
(Marked.unmark c)
code_block ~meta:false language fmt c
| A.CodeBlock (_, c, true) when not print_only_law ->
let metadata_title =
match language with
@ -253,15 +267,11 @@ let rec law_structure_to_latex
"\\begin{tcolorbox}[colframe=OliveGreen, breakable, \
title=\\textcolor{black}{\\texttt{%s}},title after \
break=\\textcolor{black}{\\texttt{%s}},before skip=1em, after skip=1em]\n\
\\begin{minted}[numbersep=9mm, firstnumber=%d, \
label={\\hspace*{\\fill}\\texttt{%s}}]{%s}\n\
```catala\n\
%s```\n\
\\end{minted}\n\
%a\n\
\\end{tcolorbox}"
metadata_title metadata_title start_line (pre_latexify filename)
(get_language_extension language)
block_content
metadata_title metadata_title
(code_block ~meta:true language)
c
| A.CodeBlock _ -> ()
(** {1 API} *)

View File

@ -124,3 +124,25 @@ let check_exceeding_lines
ANSITerminal.[red]
"%s"
String.(sub s max_len (len_s - max_len)))))
let call_pygmentize ?lang args =
let cmd = "pygmentize" in
let check_exit n =
if n <> 0 then
Errors.raise_error
"Weaving failed: pygmentize command %S returned with error code %d"
(String.concat " " (cmd :: args))
n
in
match lang with
| None -> File.process_out ~check_exit cmd args
| Some lang ->
let lexer_py =
let lexer_fname = "lexer_" ^ Cli.language_code lang ^ ".py" in
match Pygment_lexers.read lexer_fname with
| None -> failwith "Pygments lexer not found for this language"
| Some lexer -> lexer
in
File.with_temp_file "pygments_lexer_" ".py" ~contents:lexer_py
@@ fun pyg_lexer ->
File.process_out ~check_exit cmd ("-l" :: pyg_lexer :: "-x" :: args)

View File

@ -46,3 +46,8 @@ val run_pandoc : string -> [ `Html | `Latex ] -> string
val check_exceeding_lines : ?max_len:int -> int -> string -> string -> unit
(** [check_exceeding_lines ~max_len start_line filename content] prints a
warning message for each lines of [content] exceeding [max_len] characters. *)
val call_pygmentize : ?lang:Cli.backend_lang -> string list -> string
(** Calls the [pygmentize] command with the given arguments, and returns the
results as a string. If [lang] is specified, the proper arguments for the
Catala lexer are already passed. *)

134325
french_law/js/french_law.js generated

File diff suppressed because one or more lines are too long

View File

@ -3,9 +3,9 @@ from pygments.token import *
import re
__all__=['CatalaEnLexer']
__all__=['CustomLexer']
class CatalaEnLexer(RegexLexer):
class CustomLexer(RegexLexer):
name = 'CatalaEn'
aliases = ['catala_en']
filenames = ['*.catala_en']

View File

@ -8,4 +8,4 @@ version = "0.8"
dependencies = ["pygments"]
[project.entry-points."pygments.lexers"]
catala-en-lexer = "catala_en_lexer.lexer:CatalaEnLexer"
catala-en-lexer = "catala_en_lexer.lexer:CustomLexer"

View File

@ -3,9 +3,9 @@ from pygments.token import *
import re
__all__=['CatalaFrLexer']
__all__=['CustomLexer']
class CatalaFrLexer(RegexLexer):
class CustomLexer(RegexLexer):
name = 'CatalaFr'
aliases = ['catala_fr']
filenames = ['*.catala_fr']

View File

@ -8,4 +8,4 @@ version = "0.8"
dependencies = ["pygments"]
[project.entry-points."pygments.lexers"]
catala-fr-lexer = "catala_fr_lexer.lexer:CatalaFrLexer"
catala-fr-lexer = "catala_fr_lexer.lexer:CustomLexer"

View File

@ -3,9 +3,9 @@ from pygments.token import *
import re
__all__=['CatalaPlLexer']
__all__=['CustomLexer']
class CatalaPlLexer(RegexLexer):
class CustomLexer(RegexLexer):
name = 'CatalaPl'
aliases = ['catala_pl']
filenames = ['*.catala_pl']

View File

@ -8,4 +8,4 @@ version = "0.8"
dependencies = ["pygments"]
[project.entry-points."pygments.lexers"]
catala-pl-lexer = "catala_pl_lexer.lexer:CatalaPlLexer"
catala-pl-lexer = "catala_pl_lexer.lexer:CustomLexer"