refactor(lexer): manage the new markdown syntax

This commit is contained in:
EmileRolley 2021-03-09 22:04:36 +01:00
parent 05f60b72f4
commit e45f416be1
2 changed files with 30 additions and 30 deletions

View File

@ -17,6 +17,7 @@
open Parser
open Sedlexing
open Utils
open Lexer_common
module R = Re.Pcre
(** Boolean reference, used by the lexer as the mutable state to distinguish whether it is lexing
@ -142,7 +143,7 @@ let rec lex_code (lexbuf : lexbuf) : token =
(* Comments *)
update_acc lexbuf;
lex_code lexbuf
| "*/" ->
| "```" ->
(* End of code section *)
is_code := false;
END_CODE !code_string_acc
@ -535,24 +536,24 @@ let lex_law (lexbuf : lexbuf) : token =
let prev_lexeme = Utf8.lexeme lexbuf in
let prev_pos = lexing_positions lexbuf in
match%sedlex lexbuf with
| "/*" ->
| "```catala" ->
is_code := true;
code_string_acc := "";
BEGIN_CODE
| eof -> EOF
| "@@", Star white_space, "Master file", Star white_space, "@@" -> MASTER_FILE
| "@@", Star white_space, "Begin metadata", Star white_space, "@@" -> BEGIN_METADATA
| "@@", Star white_space, "End metadata", Star white_space, "@@" -> END_METADATA
| ( "@@",
| '#', Star white_space, "Master file" -> MASTER_FILE
| '>', Star white_space, "Begin metadata" -> BEGIN_METADATA
| '>', Star white_space, "End metadata" -> END_METADATA
| ( '>',
Star white_space,
"Include:",
Star white_space,
Plus (Compl '@'),
Plus (Compl ('@' | '\n')),
Star white_space,
Opt ('@', Star white_space, "p.", Star white_space, Plus '0' .. '9', Star white_space),
"@@" ) ->
'\n' ) ->
let extract_components =
R.regexp "@@\\s*Include\\:\\s*([^@]+)\\s*(@\\s*p\\.\\s*([0-9]+)|)@@"
R.regexp ">\\s*Include\\:\\s*([^@\\n]+)\\s*(@\\s*p\\.\\s*([0-9]+)|)"
in
let get_component = R.get_substring (R.exec ~rex:extract_components (Utf8.lexeme lexbuf)) in
let name = get_component 1 in
@ -561,23 +562,22 @@ let lex_law (lexbuf : lexbuf) : token =
if Filename.extension name = ".pdf" then
LAW_INCLUDE (Ast.PdfFile ((name, Pos.from_lpos pos), pages))
else LAW_INCLUDE (Ast.CatalaFile (name, Pos.from_lpos pos))
| "@@", Plus (Compl '@'), "@@", Star '+' ->
let extract_code_title = R.regexp "@@([^@]+)@@([\\+]*)" in
let get_match = R.get_substring (R.exec ~rex:extract_code_title (Utf8.lexeme lexbuf)) in
let get_new_lines = R.regexp "\n" in
let new_lines_count =
try Array.length (R.extract ~rex:get_new_lines (Utf8.lexeme lexbuf)) with Not_found -> 0
| '#', Plus '#', Star white_space, Plus (Compl ('[' | ']' | '\n')), Star white_space, '\n' ->
get_law_heading lexbuf
| ( '#',
Plus '#',
Star white_space,
'[',
Star white_space,
Plus (Compl ']'),
Star white_space,
']',
'\n' ) ->
let extract_article_title = R.regexp "([#]+)\\s*\\[([^@]+)\\]" in
let get_substring =
R.get_substring (R.exec ~rex:extract_article_title (Utf8.lexeme lexbuf))
in
for _i = 1 to new_lines_count do
new_line lexbuf
done;
let law_title = get_match 1 in
let precedence = String.length (get_match 2) in
LAW_HEADING (law_title, precedence)
| "@", Plus (Compl '@'), "@" ->
let extract_article_title = R.regexp "@([^@]+)@" in
let title = R.get_substring (R.exec ~rex:extract_article_title (Utf8.lexeme lexbuf)) 1 in
let title = get_substring 2 in
let get_new_lines = R.regexp "\n" in
let new_lines_count =
try Array.length (R.extract ~rex:get_new_lines (Utf8.lexeme lexbuf)) with Not_found -> 0
@ -586,11 +586,12 @@ let lex_law (lexbuf : lexbuf) : token =
for _i = 1 to new_lines_count - 1 do
new_line lexbuf
done;
let precedence = calc_precedence (get_substring 1) in
LAW_ARTICLE (title, None, None, 1)
| Plus (Compl ('@' | '/')) -> LAW_TEXT (Utf8.lexeme lexbuf)
LAW_ARTICLE (title, None, None, precedence)
| Plus (Compl ('/' | '#' | '`' | '>')) -> LAW_TEXT (Utf8.lexeme lexbuf)
| _ -> raise_lexer_error (Pos.from_lpos prev_pos) prev_lexeme
(** Entry point of the lexer, distributes to {!val: lex_code} or {!val: lex_law} depending of {!val:
is_code}. *)
let lexer lexbuf = if !is_code then lex_code lexbuf else lex_law lexbuf
let lexer (lexbuf : lexbuf) : token = if !is_code then lex_code lexbuf else lex_law lexbuf

View File

@ -15,9 +15,8 @@
open Parser
open Sedlexing
module R = Re.Pcre
module L = Lexer
(* Calculates the precedence according a matched regex of the form : '[#]+'.
(* Calculates the precedence according a {!val: matched_regex} of the form : '[#]+'.
@note -2 because both [LAW_ARTICLE] and [LAW_HEADING] start with at least "##" and the number of
'#' remaining corresponds to the precedence. *)