Better legifrance inclusion

This commit is contained in:
Denis Merigoux 2023-01-04 16:29:21 +01:00
parent 27cf705441
commit d2ce111fc2
No known key found for this signature in database
GPG Key ID: EE99DCFA365C3EE3
5 changed files with 21 additions and 87 deletions

View File

@ -151,7 +151,8 @@ codes={\catcode`\$=3\catcode`\^=7}
\tableofcontents
\[\star\star\star\]
\clearpage|latex}
\clearpage
|latex}
(match language with Fr -> "french" | En -> "english" | Pl -> "polish")
(match language with Fr -> "\\setmainfont{Marianne}" | _ -> "")
(* for France, we use the official font of the French state design system

View File

@ -662,7 +662,7 @@ let source_file_item :=
END_DIRECTIVE ; {
let filename = String.trim (String.concat "" args) in
let pos = Pos.from_lpos $sloc in
let jorftext = Re.Pcre.regexp "JORFTEXT\\d{12}" in
let jorftext = Re.Pcre.regexp "(JORFARTI\\d{12}|LEGIARTI\\d{12}|CETATEXT\\d{12})" in
if Re.Pcre.pmatch ~rex:jorftext filename && page = None then
LawInclude (Ast.LegislativeText (filename, pos))
else if Filename.extension filename = ".pdf" || page <> None then

View File

@ -191,13 +191,6 @@ let raise_article_parsing_error
(Yojson.Basic.to_string json);
exit 1
type law_excerpt = Yojson.Basic.t
let retrieve_law_excerpt (access_token : string) (text_id : string) :
law_excerpt =
run_request
(make_request access_token "consult/jorfPart" ["textCid", text_id])
let get_article_id (article : article) : string =
try
article.content
@ -237,6 +230,18 @@ let get_article_text (article : article) : string =
with Yojson.Basic.Util.Type_error (msg, obj) ->
raise_article_parsing_error article.content msg obj
let get_article_title (article : article) : string =
try
article.content
|> Yojson.Basic.Util.member
(match article.typ with
| CETATEXT -> "text"
| LEGIARTI | JORFARTI -> "article")
|> Yojson.Basic.Util.member "titre"
|> Yojson.Basic.Util.to_string
with Yojson.Basic.Util.Type_error (msg, obj) ->
raise_article_parsing_error article.content msg obj
let get_article_expiration_date (article : article) : Unix.tm =
try
let article_id = get_article_id article in
@ -284,50 +289,3 @@ let get_article_new_version (article : article) : string =
|> Yojson.Basic.Util.to_string
with Yojson.Basic.Util.Type_error (msg, obj) ->
raise_article_parsing_error article.content msg obj)
let get_law_excerpt_title (json : law_excerpt) : string =
json |> Yojson.Basic.Util.member "title" |> Yojson.Basic.Util.to_string
type law_excerpt_article = { id : string; num : string; content : string }
let clean_html (s : string) : string =
let new_line = Re.Pcre.regexp "\\s*\\<br\\s*\\/\\>\\s*" in
let s = Re.Pcre.substitute ~rex:new_line ~subst:(fun _ -> "\n") s in
let tag = Re.Pcre.regexp "\\<[^\\>]+\\>" in
let s = Re.Pcre.substitute ~rex:tag ~subst:(fun _ -> "") s in
String.trim s
let get_law_excerpt_articles (json : law_excerpt) : law_excerpt_article list =
let articles =
json |> Yojson.Basic.Util.member "articles" |> Yojson.Basic.Util.to_list
in
let articles =
List.sort
(fun a1 a2 ->
let a1_num =
int_of_string
(a1 |> Yojson.Basic.Util.member "num" |> Yojson.Basic.Util.to_string)
in
let a2_num =
int_of_string
(a2 |> Yojson.Basic.Util.member "num" |> Yojson.Basic.Util.to_string)
in
compare a1_num a2_num)
articles
in
List.map
(fun article ->
let article_id =
article |> Yojson.Basic.Util.member "id" |> Yojson.Basic.Util.to_string
in
let article_num =
article |> Yojson.Basic.Util.member "num" |> Yojson.Basic.Util.to_string
in
let article_content =
article
|> Yojson.Basic.Util.member "content"
|> Yojson.Basic.Util.to_string
|> clean_html
in
{ id = article_id; num = article_num; content = article_content })
articles

View File

@ -42,26 +42,12 @@ val retrieve_article : access_token -> article_id -> article
(** [retrieve_article token article_id] returns the article from the LegiFrance
API.*)
type law_excerpt
val retrieve_law_excerpt : access_token -> string -> law_excerpt
(**[retrieve_law_excerpt token excerpt_id] returns a whole excerpt of a
legislative statute from the LegiFrance API. [excerpt_id] should be of the
form ["JORFTEXT000033736934"] *)
(**{2 Manipulating API objects}*)
(**{3 Articles}*)
val get_article_id : article -> string
val get_article_text : article -> string
val get_article_title : article -> string
val get_article_expiration_date : article -> Unix.tm
val get_article_new_version : article -> string
(**{3 Law excerpts}*)
val get_law_excerpt_title : law_excerpt -> string
type law_excerpt_article = { id : string; num : string; content : string }
val get_law_excerpt_articles : law_excerpt -> law_excerpt_article list

View File

@ -165,25 +165,14 @@ let compare_to_versions
let include_legislative_text
(id : string * Pos.t)
(access_token : Api.access_token) : string =
let excerpt = Api.retrieve_law_excerpt access_token (fst id) in
let title = "#" ^ Api.get_law_excerpt_title excerpt in
let excerpts = Api.get_law_excerpt_articles excerpt in
let text_to_return =
String.concat "\n\n"
(List.map (fun article -> article.Api.content) excerpts)
in
let articles =
List.map
(fun article ->
Printf.sprintf "## Article %s|%s@\n%s" article.Api.num article.Api.id
article.Api.content)
excerpts
in
let to_insert = title ^ "\n\n" ^ String.concat "\n\n" articles in
let pos = snd id in
let id = Api.parse_id (fst id) in
let article = Api.retrieve_article access_token id in
let text_to_return = Api.get_article_text article in
let to_insert = text_to_return in
Cli.debug_format "Position: %s" (Pos.to_string_short pos);
let file = Pos.get_file pos in
let include_line = Pos.get_end_line pos in
let include_line = Pos.get_start_line pos in
let ic = open_in file in
let new_file = file ^ ".new" in
Cli.warning_print "LégiFrance inclusion detected, writing new contents to %s"