Refactor suggestions mechanism (#636)

This commit is contained in:
vbot 2024-06-21 11:43:52 +02:00 committed by GitHub
commit b11b9f4918
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 144 additions and 75 deletions

View File

@ -48,59 +48,54 @@ let levenshtein_distance (s : string) (t : string) : int =
d.(m).(n)
(*We create a list composed by strings that satisfy the following rule : they
have the same levenshtein distance, which is the minimum distance between the
reference word "keyword" and all the strings in "candidates" (with the
condition that this minimum is equal to or less than one third of the length
of keyword + 1, in order to get suggestions close to "keyword")*)
let suggestion_minimum_levenshtein_distance_association
(candidates : string list)
(keyword : string) : string list =
let rec strings_minimum_levenshtein_distance
(minimum : int)
(result : string list)
(candidates' : string list) : string list =
(*As we iterate through the "candidates'" list, we create a list "result"
with all strings that have the last minimum levenshtein distance found
("minimum").*)
match candidates' with
(*When a new minimum levenshtein distance is found, the new result list is
our new element "current_string" followed by strings that have the same
minimum distance. It will be the "result" list if there is no levenshtein
distance smaller than this new minimum.*)
| current_string :: tail ->
let current_levenshtein_distance =
levenshtein_distance current_string keyword
in
if current_levenshtein_distance < minimum then
strings_minimum_levenshtein_distance current_levenshtein_distance
[current_string] tail
(*The "result" list is updated (we append "current_string" to "result")
when a new string shares the same minimum levenshtein distance
"minimum"*)
else if current_levenshtein_distance = minimum then
strings_minimum_levenshtein_distance minimum
(result @ [current_string])
tail
(*If a levenshtein distance greater than the minimum is found, "result"
doesn't change*)
else strings_minimum_levenshtein_distance minimum result tail
(*The "result" list is returned at the end of the "candidates'" list.*)
| [] -> result
in
strings_minimum_levenshtein_distance
(1 + (String.length keyword / 3))
(*In order to select suggestions that are not too far away from the
keyword*)
[] candidates
module M = Stdlib.Map.Make (Int)
let format (ppf : Format.formatter) (suggestions_list : string list) =
match suggestions_list with
let compute_candidates (candidates : string list) (word : string) :
string list M.t =
List.fold_left
(fun m candidate ->
let distance = levenshtein_distance word candidate in
M.update distance
(function None -> Some [candidate] | Some l -> Some (candidate :: l))
m)
M.empty candidates
let best_candidates candidates word =
let candidates = compute_candidates candidates word in
M.choose_opt candidates |> function None -> [] | Some (_, l) -> List.rev l
let sorted_candidates ?(max_elements = 5) suggs given =
let rec sub acc n = function
| [] -> List.rev acc
| x :: t when n > 0 -> sub (x :: acc) (pred n) t
| _ -> List.rev acc
in
let candidates =
List.map
(fun (_, l) -> List.rev l)
(M.bindings (compute_candidates suggs given))
in
List.concat candidates |> sub [] max_elements
let format ppf suggs =
let open Format in
let pp_elt elt = fprintf ppf "@{<yellow>\"%s\"@}" elt in
let rec loop = function
| [] -> assert false
| [h] ->
pp_elt h;
pp_print_string ppf "?"
| [h; t] ->
pp_elt h;
fprintf ppf "@ or@ ";
loop [t]
| h :: t ->
pp_elt h;
fprintf ppf ",@ ";
loop t
in
match suggs with
| [] -> ()
| _ :: _ ->
Format.pp_print_string ppf "Maybe you wanted to write : ";
Format.pp_print_list
~pp_sep:(fun ppf () -> Format.fprintf ppf ",@ or ")
(fun ppf string -> Format.fprintf ppf "@{<yellow>\"%s\"@}" string)
ppf suggestions_list;
Format.pp_print_string ppf " ?"
| suggs ->
pp_print_string ppf "Maybe you wanted to write: ";
loop suggs

View File

@ -15,9 +15,20 @@
License for the specific language governing permissions and limitations under
the License. *)
val suggestion_minimum_levenshtein_distance_association :
string list -> string -> string list
(**Returns a list of the closest words into {!name:candidates} to the keyword
{!name:keyword}*)
val levenshtein_distance : string -> string -> int
(** [levenshtein_distance w1 w2] computes the levenshtein distance separating
[w1] from [w2]. *)
val best_candidates : string list -> string -> string list
(** [best_candidates suggestions word] returns the subset of elements in
[suggestions] that minimize the levenshtein distance to [word]. Multiple
candidates that have a same distance is possible. *)
val sorted_candidates :
?max_elements:int -> string list -> string -> string list
(** [sorted_candidates ?max_elements suggestions word] sorts the [suggestions]
list and retain at most [max_elements] (defaults to 5). This list is ordered
by their levenshtein distance to [word], i.e., the first elements are the
most similar. *)
val format : Format.formatter -> string list -> unit

View File

@ -138,8 +138,7 @@ let raise_error_cons_not_found
(constructor : string Mark.pos) =
let constructors = Ident.Map.keys ctxt.local.constructor_idmap in
let closest_constructors =
Suggestions.suggestion_minimum_levenshtein_distance_association constructors
(Mark.remove constructor)
Suggestions.best_candidates constructors (Mark.remove constructor)
in
Message.error
~pos_msg:(fun ppf -> Format.fprintf ppf "Here is your code :")

View File

@ -618,7 +618,10 @@ and typecheck_expr_top_down :
"Variable @{<yellow>%s@} is not a declared output of scope %a."
field A.ScopeName.format scope_out
~suggestion:
(List.map A.StructField.to_string (A.StructField.Map.keys str))
(Suggestions.sorted_candidates
(List.map A.StructField.to_string
(A.StructField.Map.keys str))
field)
| None ->
Message.error
~extra_pos:
@ -629,7 +632,10 @@ and typecheck_expr_top_down :
"Field@ @{<yellow>\"%s\"@}@ does@ not@ belong@ to@ structure@ \
@{<yellow>\"%a\"@}."
field A.StructName.format name
~suggestion:(A.Ident.Map.keys ctx.ctx_struct_fields))
~suggestion:
(Suggestions.sorted_candidates
(A.Ident.Map.keys ctx.ctx_struct_fields)
field))
in
try A.StructName.Map.find name candidate_structs
with A.StructName.Map.Not_found _ ->

View File

@ -105,15 +105,16 @@ module ParserAux (LocalisedLexer : Lexer_common.LocalisedLexer) = struct
let sorted_candidate_tokens lexbuf token_list env =
let acceptable_tokens =
List.filter
(fun (_, t) ->
I.acceptable (I.input_needed env) t (fst (lexing_positions lexbuf)))
List.filter_map
(fun ((_, t) as elt) ->
if I.acceptable (I.input_needed env) t (fst (lexing_positions lexbuf))
then Some elt
else None)
token_list
in
let lexeme = Utf8.lexeme lexbuf in
let similar_acceptable_tokens =
Suggestions.suggestion_minimum_levenshtein_distance_association
(List.map (fun (s, _) -> s) acceptable_tokens)
(Utf8.lexeme lexbuf)
Suggestions.best_candidates (List.map fst acceptable_tokens) lexeme
in
let module S = Set.Make (String) in
let s_toks = S.of_list similar_acceptable_tokens in

View File

@ -24,7 +24,7 @@ $ catala test-scope A
│ 8 │ definition wrong_definition = 1
│ │ ‾
│ Maybe you wanted to write : "." ?
│ Maybe you wanted to write: "."?
└─
#return code 123#
```

View File

@ -0,0 +1,57 @@
###Article
```catala
declaration structure S:
data field0 content integer
data field1 content integer
data field2 content integer
data field3 content integer
data field4 content integer
data field5 content integer
data field6 content integer
data field7 content integer
data field8 content integer
data field9 content integer
data field10 content integer
data field11 content integer
data field12 content integer
data field13 content integer
data field14 content integer
data field15 content integer
data field16 content integer
data field17 content integer
data field18 content integer
data field19 content integer
declaration scope A:
input wrong_definition content S
output o content integer
scope A:
definition o equals wrong_definition.field20
```
```catala-test-inline
$ catala test-scope A
┌─[ERROR]─
│ Field "field20" does not belong to structure "S".
├─➤ tests/default/bad/verbose_errors.catala_en:31.22-31.46:
│ │
│ 31 │ definition o equals wrong_definition.field20
│ │ ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾
├─ Article
│ Structure definition
├─➤ tests/default/bad/verbose_errors.catala_en:4.23-4.24:
│ │
│ 4 │ declaration structure S:
│ │ ‾
├─ Article
│ Maybe you wanted to write: "field0", "field2", "field10", "field1" or
│ "field3"?
└─
#return code 123#
```

View File

@ -29,7 +29,7 @@ $ catala test-scope A
│ │ ‾‾‾‾‾
├─ Article
│ Maybe you wanted to write : "Case1", or "Case2" ?
│ Maybe you wanted to write: "Case1" or "Case2"?
└─
#return code 123#
```

View File

@ -25,7 +25,7 @@ $ catala Typecheck
│ │ ‾‾‾‾‾
├─ Article
│ Maybe you wanted to write : "Case1" ?
│ Maybe you wanted to write: "Case1"?
└─
#return code 123#
```

View File

@ -30,7 +30,7 @@ $ catala test-scope A
│ 12 │ definitoin i equals 3.
│ │ ‾‾‾‾‾‾‾‾‾‾
│ Maybe you wanted to write : "definition" ?
│ Maybe you wanted to write: "definition"?
└─
┌─[ERROR (2/2)]─
@ -46,7 +46,7 @@ $ catala test-scope A
│ 16 │ definition i equal money of (decimal of x)
│ │ ‾‾‾‾‾
│ Maybe you wanted to write : "equals" ?
│ Maybe you wanted to write: "equals"?
└─
#return code 123#
```

View File

@ -21,7 +21,7 @@ $ catala test-scope A
│ 5 │ output i contents decimal
│ │ ‾‾‾‾‾‾‾‾
│ Maybe you wanted to write : "content" ?
│ Maybe you wanted to write: "content"?
└─
#return code 123#
```

View File

@ -31,7 +31,7 @@ $ catala dcalc -s Titi
│ 2 │ declaration scope Toto:
│ │ ‾‾‾‾
│ Maybe you wanted to write : "bar", or "baz", or "foo" ?
│ Maybe you wanted to write: "bar", "baz" or "foo"?
└─
#return code 123#
```