LaTeX literate output: handle pygments coloration from within Catala

This leverages the embedded lexer already used for HTML output, and uses the
LaTeX pygments backend to colorise code directly, without the need for `minted`.
This commit is contained in:
Louis Gesbert 2023-03-13 18:33:15 +01:00
parent 5282aec400
commit f1e44619e0
7 changed files with 126 additions and 84 deletions

View File

@ -73,15 +73,13 @@ Next, install all the packages that Catala depends on with
This should ensure everything is set up for developing on the Catala compiler!
The Python dependencies are installed inside a local virtual environment
(`venv`), so for things like syntax coloration in Catala's literate output to
work, you will have to run
(`venv`). The Makefile rules will use it automatically when building the syntax
cheat-sheet, for example, but if you need to otherwise colorise Catala code, or
use generated Python code, you should run the following command once in every
new shell session:
. _python_venv/bin/activate
from the catala directory to enable it. This needs to be done in every new shell
session, unless you use the predefined `make` rules which already account for
that.
**Warning**: the `make dependencies` command does not include the `z3`
dependency required to enable the proof platform feature of Catala. If you wish
to enable support for the `Proof` command of the Catala compiler, you should

View File

@ -66,3 +66,45 @@ let get_out_channel ~source_file ~output_file ?ext () =
let get_formatter_of_out_channel ~source_file ~output_file ?ext () =
let f, with_ = get_out_channel ~source_file ~output_file ?ext () in
f, fun fmt -> with_ (fun oc -> with_formatter_of_out_channel oc fmt)
let with_temp_file pfx sfx ?contents f =
let filename = temp_file pfx sfx in
finally (fun () -> Sys.remove filename)
@@ fun () ->
Option.iter
(fun contents ->
with_out_channel filename (fun oc -> output_string oc contents))
contents;
f filename
let contents filename =
with_in_channel filename (fun ic ->
really_input_string ic (in_channel_length ic))
let process_out ?check_exit cmd args =
let check_exit =
let default n =
if n <> 0 then
Printf.ksprintf failwith "Sub-process %s returned with status %d" cmd n
in
Option.value check_exit ~default
in
let aargs = Array.of_list (cmd :: args) in
let ic =
try Unix.open_process_args_in cmd aargs
with Unix.Unix_error (Unix.ENOENT, _, _) ->
Printf.ksprintf failwith "ERROR: program %s not found" cmd
in
let buf = Buffer.create 4096 in
finally (fun () ->
match Unix.close_process_in ic with
| Unix.WEXITED n -> check_exit n
| Unix.WSIGNALED n | Unix.WSTOPPED n ->
Printf.ksprintf failwith "Sub-process %s was killed (%d)" cmd n)
@@ fun () ->
try
while true do
Buffer.add_channel buf ic 4096
done;
assert false
with End_of_file -> Buffer.contents buf

View File

@ -64,3 +64,18 @@ val get_formatter_of_out_channel :
val temp_file : string -> string -> string
(** Like [Filename.temp_file], but registers the file for deletion at program
exit unless Cli.debug_flag is set. *)
val with_temp_file :
string -> string -> ?contents:string -> (string -> 'a) -> 'a
(** Creates a temp file (with prefix and suffix like [temp_file], optionally
with the given contents, for the lifetime of the supplied function, then
remove it unconditionally *)
val contents : string -> string
(** Reads the contents of a file as a string *)
val process_out : ?check_exit:(int -> unit) -> string -> string list -> string
(** [process_out cmd args] executes the given command with the specified
arguments, and returns the stdout of the process as a string. [check_exit]
is called on the return code of the sub-process, the default is to fail on
anything but 0. *)

View File

@ -30,13 +30,6 @@ module C = Cli
(** Converts double lines into HTML newlines. *)
let pre_html (s : string) = String.trim (run_pandoc s `Html)
(** Raise an error if pygments cannot be found *)
let raise_failed_pygments (command : string) (error_code : int) : 'a =
Errors.raise_error
"Weaving to HTML failed: pygmentize command \"%s\" returned with error \
code %d"
command error_code
(** Partial application allowing to remove first code lines of
[<td class="code">] and [<td class="linenos">] generated HTML. Basically,
remove all code block first lines. *)
@ -57,21 +50,9 @@ let wrap_html
(language : Cli.backend_lang)
(fmt : Format.formatter)
(wrapped : Format.formatter -> unit) : unit =
let pygments = "pygmentize" in
let css_file = File.temp_file "catala_css_pygments" "" in
let pygments_args =
[| "-f"; "html"; "-S"; "colorful"; "-a"; ".catala-code" |]
let css_as_string =
call_pygmentize ["-f"; "html"; "-S"; "default"; "-a"; ".catala-code"]
in
let cmd =
Format.sprintf "%s %s > %s" pygments
(String.concat " " (Array.to_list pygments_args))
css_file
in
let return_code = Sys.command cmd in
if return_code <> 0 then raise_failed_pygments cmd return_code;
let oc = open_in css_file in
let css_as_string = really_input_string oc (in_channel_length oc) in
close_in oc;
Format.fprintf fmt
"<!DOCTYPE html>\n\
<html>\n\
@ -118,54 +99,24 @@ let wrap_html
(** Performs syntax highlighting on a piece of code by using Pygments and the
special Catala lexer. *)
let pygmentize_code (c : string Marked.pos) (language : C.backend_lang) : string
=
let pygmentize_code (c : string Marked.pos) (lang : C.backend_lang) : string =
C.debug_print "Pygmenting the code chunk %s"
(Pos.to_string (Marked.get_mark c));
let pyg_lexer =
let lexer_fname = "lexer_" ^ Cli.language_code language ^ ".py" in
match Pygment_lexers.read lexer_fname with
| None -> failwith "Pygments lexer not found for this language"
| Some parser ->
let f = File.temp_file "pygments_lexer_" ".py" in
File.with_out_channel f (fun oc -> output_string oc parser);
f
let output =
File.with_temp_file "catala_html_pygments" "in" ~contents:(Marked.unmark c)
@@ fun temp_file_in ->
call_pygmentize ~lang
[
"-f";
"html";
"-O";
"anchorlinenos=True,lineanchors="
^ String.to_ascii (Pos.get_file (Marked.get_mark c))
^ ",linenos=table,linenostart="
^ string_of_int (Pos.get_start_line (Marked.get_mark c));
temp_file_in;
]
in
let temp_file_in = File.temp_file "catala_html_pygments" "in" in
let temp_file_out = File.temp_file "catala_html_pygments" "out" in
let oc = open_out temp_file_in in
Printf.fprintf oc "%s" (Marked.unmark c);
close_out oc;
let pygments = "pygmentize" in
let pygments_args =
[|
"-l";
pyg_lexer;
"-x";
"-f";
"html";
"-O";
"style=colorful,anchorlinenos=True,lineanchors=\""
^ String.to_ascii (Pos.get_file (Marked.get_mark c))
^ "\",linenos=table,linenostart="
^ string_of_int (Pos.get_start_line (Marked.get_mark c));
"-o";
temp_file_out;
temp_file_in;
|]
in
let cmd =
Format.asprintf "%s %s" pygments
(String.concat " " (Array.to_list pygments_args))
in
let return_code = Sys.command cmd in
Sys.remove temp_file_in;
Sys.remove pyg_lexer;
if return_code <> 0 then raise_failed_pygments cmd return_code;
let oc = open_in temp_file_out in
let output = really_input_string oc (in_channel_length oc) in
close_in oc;
Sys.remove temp_file_out;
(* Remove code blocks delimiters needed by [Pygments]. *)
let trimmed_output =
output |> remove_cb_first_lines |> remove_cb_last_lines

View File

@ -59,7 +59,8 @@ let wrap_latex
\usepackage{fontspec}
\usepackage[hidelinks]{hyperref}
%s
\usepackage{minted}
\usepackage{fancyvrb}
\usepackage{color}
\usepackage{longtable}
\usepackage{booktabs,tabularx}
\usepackage{newunicodechar}
@ -122,8 +123,10 @@ let wrap_latex
\newunicodechar{}{$\rightarrow$}
\newunicodechar{}{$\neq$}
\newcommand*\FancyVerbStartString{\PYG{l+s}{```catala}}
\newcommand*\FancyVerbStopString{\PYG{l+s}{```}}
%s
\newcommand*\FancyVerbStartString{\PY{l+s}{```catala}}
\newcommand*\FancyVerbStopString{\PY{l+s}{```}}
\fvset{
numbers=left,
@ -157,6 +160,7 @@ codes={\catcode`\$=3\catcode`\^=7}
(match language with Fr -> "\\setmainfont{Marianne}" | _ -> "")
(* for France, we use the official font of the French state design system
https://gouvfr.atlassian.net/wiki/spaces/DB/pages/223019527/Typographie+-+Typography *)
(call_pygmentize ["-f"; "latex"; "-S"; "default"])
(literal_title language)
(literal_generated_by language)
Cli.version
@ -186,17 +190,22 @@ codes={\catcode`\$=3\catcode`\^=7}
(** {1 Weaving} *)
let code_block ~meta lang fmt (code, pos) =
let opts = if meta then "numbersep=9mm, " else "" in
(* Pygments does'nt allow to specify multiple 'verboptions' (escaping bug ?)
so we call it with "nowrap" and write the FancyVrb wrapper ourselves. *)
let pygmentized_code =
let contents = String.concat "" ["```catala\n"; code; "```"] in
File.with_temp_file "catala_latex_pygments" "in" ~contents
@@ fun temp_file_in ->
call_pygmentize ~lang ["-f"; "latex"; "-O"; "nowrap=true"; temp_file_in]
in
Format.fprintf fmt
"\\begin{minted}[label={\\hspace*{\\fill}\\texttt{%s}},%sfirstnumber=%d]{%s}\n\
```catala\n\
%s```\n\
\\end{minted}"
(pre_latexify (Filename.basename (Pos.get_file pos)))
opts
{latex|\begin{Verbatim}[commandchars=\\\{\},numbers=left,firstnumber=%d,stepnumber=1,label={\hspace*{\fill}\texttt{%s}}%s]|latex}
(Pos.get_start_line pos + 1)
(get_language_extension lang)
code
(pre_latexify (Filename.basename (Pos.get_file pos)))
(if meta then ",numbersep=9mm" else "");
Format.pp_print_newline fmt ();
Format.pp_print_string fmt pygmentized_code;
Format.pp_print_string fmt "\\end{Verbatim}\n"
let rec law_structure_to_latex
(language : C.backend_lang)

View File

@ -124,3 +124,25 @@ let check_exceeding_lines
ANSITerminal.[red]
"%s"
String.(sub s max_len (len_s - max_len)))))
let call_pygmentize ?lang args =
let cmd = "pygmentize" in
let check_exit n =
if n <> 0 then
Errors.raise_error
"Weaving failed: pygmentize command %S returned with error code %d"
(String.concat " " (cmd :: args))
n
in
match lang with
| None -> File.process_out ~check_exit cmd args
| Some lang ->
let lexer_py =
let lexer_fname = "lexer_" ^ Cli.language_code lang ^ ".py" in
match Pygment_lexers.read lexer_fname with
| None -> failwith "Pygments lexer not found for this language"
| Some lexer -> lexer
in
File.with_temp_file "pygments_lexer_" ".py" ~contents:lexer_py
@@ fun pyg_lexer ->
File.process_out ~check_exit cmd ("-l" :: pyg_lexer :: "-x" :: args)

View File

@ -46,3 +46,8 @@ val run_pandoc : string -> [ `Html | `Latex ] -> string
val check_exceeding_lines : ?max_len:int -> int -> string -> string -> unit
(** [check_exceeding_lines ~max_len start_line filename content] prints a
warning message for each lines of [content] exceeding [max_len] characters. *)
val call_pygmentize : ?lang:Cli.backend_lang -> string list -> string
(** Calls the [pygmentize] command with the given arguments, and returns the
results as a string. If [lang] is specified, the proper arguments for the
Catala lexer are already passed. *)