mirror of
https://github.com/CatalaLang/catala.git
synced 2024-11-08 07:51:43 +03:00
Fix lexing of verbatim blocks
Catala doesn't interpret them at all, but it needs to refrain from interpreting its contents as markdown (titles, etc.)
This commit is contained in:
parent
caf9135b4e
commit
33ce233a29
@ -765,6 +765,31 @@ let rec lex_directive (lexbuf : lexbuf) : token =
|
||||
END_DIRECTIVE
|
||||
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
|
||||
|
||||
let lex_raw (lexbuf : lexbuf) : token =
|
||||
let prev_lexeme = Utf8.lexeme lexbuf in
|
||||
let ((_, start_pos) as prev_pos) = lexing_positions lexbuf in
|
||||
let at_bol = Lexing.(start_pos.pos_bol = start_pos.pos_cnum) in
|
||||
if at_bol then
|
||||
match%sedlex lexbuf with
|
||||
| eof -> EOF
|
||||
| "```", Star hspace, ('\n' | eof) ->
|
||||
L.context := Law;
|
||||
LAW_TEXT (Utf8.lexeme lexbuf)
|
||||
| _ -> (
|
||||
(* Nested match for lower priority; `_` matches length 0 so we effectively retry the
|
||||
sub-match at the same point *)
|
||||
let lexbuf = lexbuf in
|
||||
(* workaround sedlex bug, see https://github.com/ocaml-community/sedlex/issues/12
|
||||
(fixed in 3.1) *)
|
||||
match%sedlex lexbuf with
|
||||
| Star (Compl '\n'), ('\n' | eof) -> LAW_TEXT (Utf8.lexeme lexbuf)
|
||||
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme)
|
||||
else
|
||||
match%sedlex lexbuf with
|
||||
| eof -> EOF
|
||||
| Star (Compl '\n'), ('\n' | eof) -> LAW_TEXT (Utf8.lexeme lexbuf)
|
||||
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
|
||||
|
||||
(** Main lexing function used outside code blocks *)
|
||||
let lex_law (lexbuf : lexbuf) : token =
|
||||
let prev_lexeme = Utf8.lexeme lexbuf in
|
||||
@ -781,6 +806,9 @@ let lex_law (lexbuf : lexbuf) : token =
|
||||
L.context := Code;
|
||||
Buffer.clear L.code_buffer;
|
||||
BEGIN_METADATA
|
||||
| "```", Star (idchar | '-') ->
|
||||
L.context := Raw;
|
||||
LAW_TEXT (Utf8.lexeme lexbuf)
|
||||
| '>' ->
|
||||
L.context := Directive;
|
||||
BEGIN_DIRECTIVE
|
||||
@ -790,7 +818,8 @@ let lex_law (lexbuf : lexbuf) : token =
|
||||
(* Nested match for lower priority; `_` matches length 0 so we effectively retry the
|
||||
sub-match at the same point *)
|
||||
let lexbuf = lexbuf in
|
||||
(* workaround sedlex bug, see https://github.com/ocaml-community/sedlex/issues/12 *)
|
||||
(* workaround sedlex bug, see https://github.com/ocaml-community/sedlex/issues/12
|
||||
(fixed in 3.1) *)
|
||||
match%sedlex lexbuf with
|
||||
| Star (Compl '\n'), ('\n' | eof) -> LAW_TEXT (Utf8.lexeme lexbuf)
|
||||
| _ -> L.raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme)
|
||||
@ -805,6 +834,7 @@ let lex_law (lexbuf : lexbuf) : token =
|
||||
let lexer (lexbuf : lexbuf) : token =
|
||||
match !L.context with
|
||||
| Law -> lex_law lexbuf
|
||||
| Raw -> lex_raw lexbuf
|
||||
| Code -> lex_code lexbuf
|
||||
| Directive -> lex_directive lexbuf
|
||||
| Directive_args -> lex_directive_args lexbuf
|
||||
|
@ -42,7 +42,7 @@ let get_law_heading (lexbuf : lexbuf) : token =
|
||||
let precedence = calc_precedence (String.trim (R.get_substring rex 1)) in
|
||||
LAW_HEADING (title, article_id, is_archive, precedence)
|
||||
|
||||
type lexing_context = Law | Code | Directive | Directive_args
|
||||
type lexing_context = Law | Raw | Code | Directive | Directive_args
|
||||
|
||||
(** Boolean reference, used by the lexer as the mutable state to distinguish
|
||||
whether it is lexing code or law. *)
|
||||
|
@ -17,7 +17,7 @@
|
||||
|
||||
(** Auxiliary functions used by all lexers. *)
|
||||
|
||||
type lexing_context = Law | Code | Directive | Directive_args
|
||||
type lexing_context = Law | Raw | Code | Directive | Directive_args
|
||||
|
||||
val context : lexing_context ref
|
||||
(** Reference, used by the lexer as the mutable state to distinguish whether it
|
||||
|
Loading…
Reference in New Issue
Block a user