Embed the Pygments Catala lexer

This enables the HTML output to work without a custom Pygments installation (or
the proper venv activated)
This commit is contained in:
Louis Gesbert 2023-03-13 13:40:10 +01:00
parent b75910d087
commit 508992de33
15 changed files with 126856 additions and 7560 deletions

View File

@ -40,6 +40,7 @@ depends: [
"visitors" {>= "20200210"}
"zarith" {>= "1.12"}
"zarith_stubs_js" {>= "v0.14.1"}
"crunch" {>= "3.0.0"}
"alcotest" {with-test & >= "1.5.0"}
"odoc" {with-doc}
"ocamlformat" {cataladevmode & = "0.21.0"}

View File

@ -33,6 +33,13 @@ type backend_option_builtin =
type 'a backend_option = [ backend_option_builtin | `Plugin of 'a ]
(** Associates a {!type: Cli.backend_lang} with its string represtation. *)
let languages = ["en", En; "fr", Fr; "pl", Pl]
let language_code =
let rl = List.map (fun (a, b) -> b, a) languages in
fun l -> List.assoc l rl
let backend_option_to_string = function
| `Interpret -> "Interpret"
| `Makefile -> "Makefile"

View File

@ -33,6 +33,11 @@ type backend_option_builtin =
type 'a backend_option = [ backend_option_builtin | `Plugin of 'a ]
val languages : (string * backend_lang) list
val language_code : backend_lang -> string
(** Returns the lowercase two-letter language code *)
val backend_option_to_string : string backend_option -> string
(** [backend_option_to_string backend] returns the string representation of the
given [backend].*)

View File

@ -26,6 +26,12 @@ let finally f k =
f ();
r
let temp_file pfx sfx =
let f = Filename.temp_file pfx sfx in
if not !Cli.debug_flag then
at_exit (fun () -> try Sys.remove f with _ -> ());
f
let with_out_channel filename f =
let oc = open_out filename in
finally (fun () -> close_out oc) (fun () -> f oc)

View File

@ -60,3 +60,7 @@ val get_formatter_of_out_channel :
(** [get_output_format ~source_file ~output_file ?ext ()] returns the infered
filename and its corresponding [with_formatter_of_out_channel] function. If
the [output_file] is equal to [Some "-"] returns a wrapper around [stdout]. *)
val temp_file : string -> string -> string
(** Like [Filename.temp_file], but registers the file for deletion at program
exit unless Cli.debug_flag is set. *)

View File

@ -17,9 +17,6 @@
open Catala_utils
(** Associates a {!type: Cli.backend_lang} with its string represtation. *)
let languages = ["en", Cli.En; "fr", Cli.Fr; "pl", Cli.Pl]
(** Associates a file extension with its corresponding {!type: Cli.backend_lang}
string representation. *)
let extensions = [".catala_fr", "fr"; ".catala_en", "en"; ".catala_pl", "pl"]
@ -59,7 +56,7 @@ let driver source_file (options : Cli.options) : int =
try List.assoc ext extensions with Not_found -> ext)
in
let language =
try List.assoc l languages
try List.assoc l Cli.languages
with Not_found ->
Errors.raise_error
"The selected language (%s) is not supported by Catala" l

View File

@ -3,6 +3,21 @@
(public_name catala.literate)
(libraries re catala_utils surface ubase uutf))
(rule
(target pygment_lexers.ml)
(action
(progn
(copy
../../syntax_highlighting/en/pygments/catala_en_lexer/lexer.py
lexer_en.py)
(copy
../../syntax_highlighting/fr/pygments/catala_fr_lexer/lexer.py
lexer_fr.py)
(copy
../../syntax_highlighting/pl/pygments/catala_pl_lexer/lexer.py
lexer_pl.py)
(run ocaml-crunch -e py -m plain -o %{target} .))))
(documentation
(package catala)
(mld_files literate))

View File

@ -58,7 +58,7 @@ let wrap_html
(fmt : Format.formatter)
(wrapped : Format.formatter -> unit) : unit =
let pygments = "pygmentize" in
let css_file = Filename.temp_file "catala_css_pygments" "" in
let css_file = File.temp_file "catala_css_pygments" "" in
let pygments_args =
[| "-f"; "html"; "-S"; "colorful"; "-a"; ".catala-code" |]
in
@ -73,12 +73,15 @@ let wrap_html
let css_as_string = really_input_string oc (in_channel_length oc) in
close_in oc;
Format.fprintf fmt
"<head>\n\
"<!DOCTYPE html>\n\
<html>\n\
<head>\n\
<style>\n\
%s\n\
</style>\n\
<meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>\n\
</head>\n\
<body>\n\
<h1>%s<br />\n\
<small>%s Catala version %s</small>\n\
</h1>\n\
@ -110,7 +113,8 @@ let wrap_html
(literal_last_modification language)
ftime)
source_files));
wrapped fmt
wrapped fmt;
Format.fprintf fmt "</body>\n</html>\n"
(** Performs syntax highlighting on a piece of code by using Pygments and the
special Catala lexer. *)
@ -118,17 +122,26 @@ let pygmentize_code (c : string Marked.pos) (language : C.backend_lang) : string
=
C.debug_print "Pygmenting the code chunk %s"
(Pos.to_string (Marked.get_mark c));
let temp_file_in = Filename.temp_file "catala_html_pygments" "in" in
let temp_file_out = Filename.temp_file "catala_html_pygments" "out" in
let pyg_lexer =
let lexer_fname = "lexer_" ^ Cli.language_code language ^ ".py" in
match Pygment_lexers.read lexer_fname with
| None -> failwith "Pygments lexer not found for this language"
| Some parser ->
let f = File.temp_file "pygments_lexer_" ".py" in
File.with_out_channel f (fun oc -> output_string oc parser);
f
in
let temp_file_in = File.temp_file "catala_html_pygments" "in" in
let temp_file_out = File.temp_file "catala_html_pygments" "out" in
let oc = open_out temp_file_in in
Printf.fprintf oc "%s" (Marked.unmark c);
close_out oc;
let pygments = "pygmentize" in
let pygments_lexer = get_language_extension language in
let pygments_args =
[|
"-l";
pygments_lexer;
pyg_lexer;
"-x";
"-f";
"html";
"-O";
@ -146,10 +159,13 @@ let pygmentize_code (c : string Marked.pos) (language : C.backend_lang) : string
(String.concat " " (Array.to_list pygments_args))
in
let return_code = Sys.command cmd in
Sys.remove temp_file_in;
Sys.remove pyg_lexer;
if return_code <> 0 then raise_failed_pygments cmd return_code;
let oc = open_in temp_file_out in
let output = really_input_string oc (in_channel_length oc) in
close_in oc;
Sys.remove temp_file_out;
(* Remove code blocks delimiters needed by [Pygments]. *)
let trimmed_output =
output |> remove_cb_first_lines |> remove_cb_last_lines

134325
french_law/js/french_law.js generated

File diff suppressed because one or more lines are too long

View File

@ -3,9 +3,9 @@ from pygments.token import *
import re
__all__=['CatalaEnLexer']
__all__=['CustomLexer']
class CatalaEnLexer(RegexLexer):
class CustomLexer(RegexLexer):
name = 'CatalaEn'
aliases = ['catala_en']
filenames = ['*.catala_en']

View File

@ -8,4 +8,4 @@ version = "0.8"
dependencies = ["pygments"]
[project.entry-points."pygments.lexers"]
catala-en-lexer = "catala_en_lexer.lexer:CatalaEnLexer"
catala-en-lexer = "catala_en_lexer.lexer:CustomLexer"

View File

@ -3,9 +3,9 @@ from pygments.token import *
import re
__all__=['CatalaFrLexer']
__all__=['CustomLexer']
class CatalaFrLexer(RegexLexer):
class CustomLexer(RegexLexer):
name = 'CatalaFr'
aliases = ['catala_fr']
filenames = ['*.catala_fr']

View File

@ -8,4 +8,4 @@ version = "0.8"
dependencies = ["pygments"]
[project.entry-points."pygments.lexers"]
catala-fr-lexer = "catala_fr_lexer.lexer:CatalaFrLexer"
catala-fr-lexer = "catala_fr_lexer.lexer:CustomLexer"

View File

@ -3,9 +3,9 @@ from pygments.token import *
import re
__all__=['CatalaPlLexer']
__all__=['CustomLexer']
class CatalaPlLexer(RegexLexer):
class CustomLexer(RegexLexer):
name = 'CatalaPl'
aliases = ['catala_pl']
filenames = ['*.catala_pl']

View File

@ -8,4 +8,4 @@ version = "0.8"
dependencies = ["pygments"]
[project.entry-points."pygments.lexers"]
catala-pl-lexer = "catala_pl_lexer.lexer:CatalaPlLexer"
catala-pl-lexer = "catala_pl_lexer.lexer:CustomLexer"