Refactor suggestions

This commit is contained in:
vbot 2024-06-20 15:38:21 +02:00
parent 41caecef63
commit 441dd54ec3
No known key found for this signature in database
GPG Key ID: A102739F983C6C72
6 changed files with 60 additions and 63 deletions

View File

@ -48,51 +48,34 @@ let levenshtein_distance (s : string) (t : string) : int =
d.(m).(n) d.(m).(n)
(*We create a list composed by strings that satisfy the following rule : they module M = Stdlib.Map.Make (Int)
have the same levenshtein distance, which is the minimum distance between the
reference word "keyword" and all the strings in "candidates" (with the let compute_candidates (candidates : string list) (word : string) :
condition that this minimum is equal to or less than one third of the length string list M.t =
of keyword + 1, in order to get suggestions close to "keyword")*) List.fold_left
let suggestion_minimum_levenshtein_distance_association (fun m candidate ->
(candidates : string list) let distance = levenshtein_distance word candidate in
(keyword : string) : string list = M.update distance
let rec strings_minimum_levenshtein_distance (function None -> Some [candidate] | Some l -> Some (candidate :: l))
(minimum : int) m)
(result : string list) M.empty candidates
(candidates' : string list) : string list =
(*As we iterate through the "candidates'" list, we create a list "result" let best_candidates candidates word =
with all strings that have the last minimum levenshtein distance found let candidates = compute_candidates candidates word in
("minimum").*) M.choose_opt candidates |> function None -> [] | Some (_, l) -> List.rev l
match candidates' with
(*When a new minimum levenshtein distance is found, the new result list is let sorted_candidates ?(max_elements = 5) suggs given =
our new element "current_string" followed by strings that have the same let rec sub acc n = function
minimum distance. It will be the "result" list if there is no levenshtein | [] -> List.rev acc
distance smaller than this new minimum.*) | x :: t when n > 0 -> sub (x :: acc) (pred n) t
| current_string :: tail -> | _ -> List.rev acc
let current_levenshtein_distance =
levenshtein_distance current_string keyword
in
if current_levenshtein_distance < minimum then
strings_minimum_levenshtein_distance current_levenshtein_distance
[current_string] tail
(*The "result" list is updated (we append "current_string" to "result")
when a new string shares the same minimum levenshtein distance
"minimum"*)
else if current_levenshtein_distance = minimum then
strings_minimum_levenshtein_distance minimum
(result @ [current_string])
tail
(*If a levenshtein distance greater than the minimum is found, "result"
doesn't change*)
else strings_minimum_levenshtein_distance minimum result tail
(*The "result" list is returned at the end of the "candidates'" list.*)
| [] -> result
in in
strings_minimum_levenshtein_distance let candidates =
(1 + (String.length keyword / 3)) List.map
(*In order to select suggestions that are not too far away from the (fun (_, l) -> List.rev l)
keyword*) (M.bindings (compute_candidates suggs given))
[] candidates in
List.concat candidates |> sub [] max_elements
let format (ppf : Format.formatter) (suggestions_list : string list) = let format (ppf : Format.formatter) (suggestions_list : string list) =
match suggestions_list with match suggestions_list with

View File

@ -15,9 +15,20 @@
License for the specific language governing permissions and limitations under License for the specific language governing permissions and limitations under
the License. *) the License. *)
val suggestion_minimum_levenshtein_distance_association : val levenshtein_distance : string -> string -> int
string list -> string -> string list (** [levenshtein_distance w1 w2] computes the levenshtein distance separating
(**Returns a list of the closest words into {!name:candidates} to the keyword [w1] from [w2]. *)
{!name:keyword}*)
val best_candidates : string list -> string -> string list
(** [best_candidates suggestions word] returns the subset of elements in
[suggestions] that minimize the levenshtein distance to [word]. Multiple
candidates that have a same distance is possible. *)
val sorted_candidates :
?max_elements:int -> string list -> string -> string list
(** [sorted_candidates ?max_elements suggestions word] sorts the [suggestions]
list and retain at most [max_elements] (defaults to 5). This list is ordered
by their levenshtein distance to [word], i.e., the first elements are the
most similar. *)
val format : Format.formatter -> string list -> unit val format : Format.formatter -> string list -> unit

View File

@ -138,8 +138,7 @@ let raise_error_cons_not_found
(constructor : string Mark.pos) = (constructor : string Mark.pos) =
let constructors = Ident.Map.keys ctxt.local.constructor_idmap in let constructors = Ident.Map.keys ctxt.local.constructor_idmap in
let closest_constructors = let closest_constructors =
Suggestions.suggestion_minimum_levenshtein_distance_association constructors Suggestions.best_candidates constructors (Mark.remove constructor)
(Mark.remove constructor)
in in
Message.error Message.error
~pos_msg:(fun ppf -> Format.fprintf ppf "Here is your code :") ~pos_msg:(fun ppf -> Format.fprintf ppf "Here is your code :")

View File

@ -618,7 +618,10 @@ and typecheck_expr_top_down :
"Variable @{<yellow>%s@} is not a declared output of scope %a." "Variable @{<yellow>%s@} is not a declared output of scope %a."
field A.ScopeName.format scope_out field A.ScopeName.format scope_out
~suggestion: ~suggestion:
(List.map A.StructField.to_string (A.StructField.Map.keys str)) (Suggestions.sorted_candidates
(List.map A.StructField.to_string
(A.StructField.Map.keys str))
field)
| None -> | None ->
Message.error Message.error
~extra_pos: ~extra_pos:
@ -629,7 +632,10 @@ and typecheck_expr_top_down :
"Field@ @{<yellow>\"%s\"@}@ does@ not@ belong@ to@ structure@ \ "Field@ @{<yellow>\"%s\"@}@ does@ not@ belong@ to@ structure@ \
@{<yellow>\"%a\"@}." @{<yellow>\"%a\"@}."
field A.StructName.format name field A.StructName.format name
~suggestion:(A.Ident.Map.keys ctx.ctx_struct_fields)) ~suggestion:
(Suggestions.sorted_candidates
(A.Ident.Map.keys ctx.ctx_struct_fields)
field))
in in
try A.StructName.Map.find name candidate_structs try A.StructName.Map.find name candidate_structs
with A.StructName.Map.Not_found _ -> with A.StructName.Map.Not_found _ ->

View File

@ -105,15 +105,16 @@ module ParserAux (LocalisedLexer : Lexer_common.LocalisedLexer) = struct
let sorted_candidate_tokens lexbuf token_list env = let sorted_candidate_tokens lexbuf token_list env =
let acceptable_tokens = let acceptable_tokens =
List.filter List.filter_map
(fun (_, t) -> (fun ((_, t) as elt) ->
I.acceptable (I.input_needed env) t (fst (lexing_positions lexbuf))) if I.acceptable (I.input_needed env) t (fst (lexing_positions lexbuf))
then Some elt
else None)
token_list token_list
in in
let lexeme = Utf8.lexeme lexbuf in
let similar_acceptable_tokens = let similar_acceptable_tokens =
Suggestions.suggestion_minimum_levenshtein_distance_association Suggestions.best_candidates (List.map fst acceptable_tokens) lexeme
(List.map (fun (s, _) -> s) acceptable_tokens)
(Utf8.lexeme lexbuf)
in in
let module S = Set.Make (String) in let module S = Set.Make (String) in
let s_toks = S.of_list similar_acceptable_tokens in let s_toks = S.of_list similar_acceptable_tokens in

View File

@ -50,11 +50,8 @@ $ catala test-scope A
│ │ ‾ │ │ ‾
├─ Article ├─ Article
│ Maybe you wanted to write : "field0", or "field1", or "field2", │ Maybe you wanted to write : "field0", or "field2", or "field10",
│ or "field3", or "field4", or "field5", or "field6", or "field7", │ or "field1", or "field3" ?
│ or "field8", or "field9", or "field10", or "field11", or "field12",
│ or "field13", or "field14", or "field15", or "field16", or "field17",
│ or "field18", or "field19", or "o" ?
└─ └─
#return code 123# #return code 123#
``` ```