mirror of
https://github.com/CatalaLang/catala.git
synced 2024-11-08 07:51:43 +03:00
Refactor suggestions
This commit is contained in:
parent
41caecef63
commit
441dd54ec3
@ -48,51 +48,34 @@ let levenshtein_distance (s : string) (t : string) : int =
|
|||||||
|
|
||||||
d.(m).(n)
|
d.(m).(n)
|
||||||
|
|
||||||
(*We create a list composed by strings that satisfy the following rule : they
|
module M = Stdlib.Map.Make (Int)
|
||||||
have the same levenshtein distance, which is the minimum distance between the
|
|
||||||
reference word "keyword" and all the strings in "candidates" (with the
|
let compute_candidates (candidates : string list) (word : string) :
|
||||||
condition that this minimum is equal to or less than one third of the length
|
string list M.t =
|
||||||
of keyword + 1, in order to get suggestions close to "keyword")*)
|
List.fold_left
|
||||||
let suggestion_minimum_levenshtein_distance_association
|
(fun m candidate ->
|
||||||
(candidates : string list)
|
let distance = levenshtein_distance word candidate in
|
||||||
(keyword : string) : string list =
|
M.update distance
|
||||||
let rec strings_minimum_levenshtein_distance
|
(function None -> Some [candidate] | Some l -> Some (candidate :: l))
|
||||||
(minimum : int)
|
m)
|
||||||
(result : string list)
|
M.empty candidates
|
||||||
(candidates' : string list) : string list =
|
|
||||||
(*As we iterate through the "candidates'" list, we create a list "result"
|
let best_candidates candidates word =
|
||||||
with all strings that have the last minimum levenshtein distance found
|
let candidates = compute_candidates candidates word in
|
||||||
("minimum").*)
|
M.choose_opt candidates |> function None -> [] | Some (_, l) -> List.rev l
|
||||||
match candidates' with
|
|
||||||
(*When a new minimum levenshtein distance is found, the new result list is
|
let sorted_candidates ?(max_elements = 5) suggs given =
|
||||||
our new element "current_string" followed by strings that have the same
|
let rec sub acc n = function
|
||||||
minimum distance. It will be the "result" list if there is no levenshtein
|
| [] -> List.rev acc
|
||||||
distance smaller than this new minimum.*)
|
| x :: t when n > 0 -> sub (x :: acc) (pred n) t
|
||||||
| current_string :: tail ->
|
| _ -> List.rev acc
|
||||||
let current_levenshtein_distance =
|
|
||||||
levenshtein_distance current_string keyword
|
|
||||||
in
|
in
|
||||||
if current_levenshtein_distance < minimum then
|
let candidates =
|
||||||
strings_minimum_levenshtein_distance current_levenshtein_distance
|
List.map
|
||||||
[current_string] tail
|
(fun (_, l) -> List.rev l)
|
||||||
(*The "result" list is updated (we append "current_string" to "result")
|
(M.bindings (compute_candidates suggs given))
|
||||||
when a new string shares the same minimum levenshtein distance
|
|
||||||
"minimum"*)
|
|
||||||
else if current_levenshtein_distance = minimum then
|
|
||||||
strings_minimum_levenshtein_distance minimum
|
|
||||||
(result @ [current_string])
|
|
||||||
tail
|
|
||||||
(*If a levenshtein distance greater than the minimum is found, "result"
|
|
||||||
doesn't change*)
|
|
||||||
else strings_minimum_levenshtein_distance minimum result tail
|
|
||||||
(*The "result" list is returned at the end of the "candidates'" list.*)
|
|
||||||
| [] -> result
|
|
||||||
in
|
in
|
||||||
strings_minimum_levenshtein_distance
|
List.concat candidates |> sub [] max_elements
|
||||||
(1 + (String.length keyword / 3))
|
|
||||||
(*In order to select suggestions that are not too far away from the
|
|
||||||
keyword*)
|
|
||||||
[] candidates
|
|
||||||
|
|
||||||
let format (ppf : Format.formatter) (suggestions_list : string list) =
|
let format (ppf : Format.formatter) (suggestions_list : string list) =
|
||||||
match suggestions_list with
|
match suggestions_list with
|
||||||
|
@ -15,9 +15,20 @@
|
|||||||
License for the specific language governing permissions and limitations under
|
License for the specific language governing permissions and limitations under
|
||||||
the License. *)
|
the License. *)
|
||||||
|
|
||||||
val suggestion_minimum_levenshtein_distance_association :
|
val levenshtein_distance : string -> string -> int
|
||||||
string list -> string -> string list
|
(** [levenshtein_distance w1 w2] computes the levenshtein distance separating
|
||||||
(**Returns a list of the closest words into {!name:candidates} to the keyword
|
[w1] from [w2]. *)
|
||||||
{!name:keyword}*)
|
|
||||||
|
val best_candidates : string list -> string -> string list
|
||||||
|
(** [best_candidates suggestions word] returns the subset of elements in
|
||||||
|
[suggestions] that minimize the levenshtein distance to [word]. Multiple
|
||||||
|
candidates that have a same distance is possible. *)
|
||||||
|
|
||||||
|
val sorted_candidates :
|
||||||
|
?max_elements:int -> string list -> string -> string list
|
||||||
|
(** [sorted_candidates ?max_elements suggestions word] sorts the [suggestions]
|
||||||
|
list and retain at most [max_elements] (defaults to 5). This list is ordered
|
||||||
|
by their levenshtein distance to [word], i.e., the first elements are the
|
||||||
|
most similar. *)
|
||||||
|
|
||||||
val format : Format.formatter -> string list -> unit
|
val format : Format.formatter -> string list -> unit
|
||||||
|
@ -138,8 +138,7 @@ let raise_error_cons_not_found
|
|||||||
(constructor : string Mark.pos) =
|
(constructor : string Mark.pos) =
|
||||||
let constructors = Ident.Map.keys ctxt.local.constructor_idmap in
|
let constructors = Ident.Map.keys ctxt.local.constructor_idmap in
|
||||||
let closest_constructors =
|
let closest_constructors =
|
||||||
Suggestions.suggestion_minimum_levenshtein_distance_association constructors
|
Suggestions.best_candidates constructors (Mark.remove constructor)
|
||||||
(Mark.remove constructor)
|
|
||||||
in
|
in
|
||||||
Message.error
|
Message.error
|
||||||
~pos_msg:(fun ppf -> Format.fprintf ppf "Here is your code :")
|
~pos_msg:(fun ppf -> Format.fprintf ppf "Here is your code :")
|
||||||
|
@ -618,7 +618,10 @@ and typecheck_expr_top_down :
|
|||||||
"Variable @{<yellow>%s@} is not a declared output of scope %a."
|
"Variable @{<yellow>%s@} is not a declared output of scope %a."
|
||||||
field A.ScopeName.format scope_out
|
field A.ScopeName.format scope_out
|
||||||
~suggestion:
|
~suggestion:
|
||||||
(List.map A.StructField.to_string (A.StructField.Map.keys str))
|
(Suggestions.sorted_candidates
|
||||||
|
(List.map A.StructField.to_string
|
||||||
|
(A.StructField.Map.keys str))
|
||||||
|
field)
|
||||||
| None ->
|
| None ->
|
||||||
Message.error
|
Message.error
|
||||||
~extra_pos:
|
~extra_pos:
|
||||||
@ -629,7 +632,10 @@ and typecheck_expr_top_down :
|
|||||||
"Field@ @{<yellow>\"%s\"@}@ does@ not@ belong@ to@ structure@ \
|
"Field@ @{<yellow>\"%s\"@}@ does@ not@ belong@ to@ structure@ \
|
||||||
@{<yellow>\"%a\"@}."
|
@{<yellow>\"%a\"@}."
|
||||||
field A.StructName.format name
|
field A.StructName.format name
|
||||||
~suggestion:(A.Ident.Map.keys ctx.ctx_struct_fields))
|
~suggestion:
|
||||||
|
(Suggestions.sorted_candidates
|
||||||
|
(A.Ident.Map.keys ctx.ctx_struct_fields)
|
||||||
|
field))
|
||||||
in
|
in
|
||||||
try A.StructName.Map.find name candidate_structs
|
try A.StructName.Map.find name candidate_structs
|
||||||
with A.StructName.Map.Not_found _ ->
|
with A.StructName.Map.Not_found _ ->
|
||||||
|
@ -105,15 +105,16 @@ module ParserAux (LocalisedLexer : Lexer_common.LocalisedLexer) = struct
|
|||||||
|
|
||||||
let sorted_candidate_tokens lexbuf token_list env =
|
let sorted_candidate_tokens lexbuf token_list env =
|
||||||
let acceptable_tokens =
|
let acceptable_tokens =
|
||||||
List.filter
|
List.filter_map
|
||||||
(fun (_, t) ->
|
(fun ((_, t) as elt) ->
|
||||||
I.acceptable (I.input_needed env) t (fst (lexing_positions lexbuf)))
|
if I.acceptable (I.input_needed env) t (fst (lexing_positions lexbuf))
|
||||||
|
then Some elt
|
||||||
|
else None)
|
||||||
token_list
|
token_list
|
||||||
in
|
in
|
||||||
|
let lexeme = Utf8.lexeme lexbuf in
|
||||||
let similar_acceptable_tokens =
|
let similar_acceptable_tokens =
|
||||||
Suggestions.suggestion_minimum_levenshtein_distance_association
|
Suggestions.best_candidates (List.map fst acceptable_tokens) lexeme
|
||||||
(List.map (fun (s, _) -> s) acceptable_tokens)
|
|
||||||
(Utf8.lexeme lexbuf)
|
|
||||||
in
|
in
|
||||||
let module S = Set.Make (String) in
|
let module S = Set.Make (String) in
|
||||||
let s_toks = S.of_list similar_acceptable_tokens in
|
let s_toks = S.of_list similar_acceptable_tokens in
|
||||||
|
@ -50,11 +50,8 @@ $ catala test-scope A
|
|||||||
│ │ ‾
|
│ │ ‾
|
||||||
├─ Article
|
├─ Article
|
||||||
│
|
│
|
||||||
│ Maybe you wanted to write : "field0", or "field1", or "field2",
|
│ Maybe you wanted to write : "field0", or "field2", or "field10",
|
||||||
│ or "field3", or "field4", or "field5", or "field6", or "field7",
|
│ or "field1", or "field3" ?
|
||||||
│ or "field8", or "field9", or "field10", or "field11", or "field12",
|
|
||||||
│ or "field13", or "field14", or "field15", or "field16", or "field17",
|
|
||||||
│ or "field18", or "field19", or "o" ?
|
|
||||||
└─
|
└─
|
||||||
#return code 123#
|
#return code 123#
|
||||||
```
|
```
|
||||||
|
Loading…
Reference in New Issue
Block a user