mirror of
https://github.com/CatalaLang/catala.git
synced 2024-11-08 07:51:43 +03:00
Refactor suggestions mechanism (#636)
This commit is contained in:
commit
b11b9f4918
@ -48,59 +48,54 @@ let levenshtein_distance (s : string) (t : string) : int =
|
||||
|
||||
d.(m).(n)
|
||||
|
||||
(*We create a list composed by strings that satisfy the following rule : they
|
||||
have the same levenshtein distance, which is the minimum distance between the
|
||||
reference word "keyword" and all the strings in "candidates" (with the
|
||||
condition that this minimum is equal to or less than one third of the length
|
||||
of keyword + 1, in order to get suggestions close to "keyword")*)
|
||||
let suggestion_minimum_levenshtein_distance_association
|
||||
(candidates : string list)
|
||||
(keyword : string) : string list =
|
||||
let rec strings_minimum_levenshtein_distance
|
||||
(minimum : int)
|
||||
(result : string list)
|
||||
(candidates' : string list) : string list =
|
||||
(*As we iterate through the "candidates'" list, we create a list "result"
|
||||
with all strings that have the last minimum levenshtein distance found
|
||||
("minimum").*)
|
||||
match candidates' with
|
||||
(*When a new minimum levenshtein distance is found, the new result list is
|
||||
our new element "current_string" followed by strings that have the same
|
||||
minimum distance. It will be the "result" list if there is no levenshtein
|
||||
distance smaller than this new minimum.*)
|
||||
| current_string :: tail ->
|
||||
let current_levenshtein_distance =
|
||||
levenshtein_distance current_string keyword
|
||||
in
|
||||
if current_levenshtein_distance < minimum then
|
||||
strings_minimum_levenshtein_distance current_levenshtein_distance
|
||||
[current_string] tail
|
||||
(*The "result" list is updated (we append "current_string" to "result")
|
||||
when a new string shares the same minimum levenshtein distance
|
||||
"minimum"*)
|
||||
else if current_levenshtein_distance = minimum then
|
||||
strings_minimum_levenshtein_distance minimum
|
||||
(result @ [current_string])
|
||||
tail
|
||||
(*If a levenshtein distance greater than the minimum is found, "result"
|
||||
doesn't change*)
|
||||
else strings_minimum_levenshtein_distance minimum result tail
|
||||
(*The "result" list is returned at the end of the "candidates'" list.*)
|
||||
| [] -> result
|
||||
in
|
||||
strings_minimum_levenshtein_distance
|
||||
(1 + (String.length keyword / 3))
|
||||
(*In order to select suggestions that are not too far away from the
|
||||
keyword*)
|
||||
[] candidates
|
||||
module M = Stdlib.Map.Make (Int)
|
||||
|
||||
let format (ppf : Format.formatter) (suggestions_list : string list) =
|
||||
match suggestions_list with
|
||||
let compute_candidates (candidates : string list) (word : string) :
|
||||
string list M.t =
|
||||
List.fold_left
|
||||
(fun m candidate ->
|
||||
let distance = levenshtein_distance word candidate in
|
||||
M.update distance
|
||||
(function None -> Some [candidate] | Some l -> Some (candidate :: l))
|
||||
m)
|
||||
M.empty candidates
|
||||
|
||||
let best_candidates candidates word =
|
||||
let candidates = compute_candidates candidates word in
|
||||
M.choose_opt candidates |> function None -> [] | Some (_, l) -> List.rev l
|
||||
|
||||
let sorted_candidates ?(max_elements = 5) suggs given =
|
||||
let rec sub acc n = function
|
||||
| [] -> List.rev acc
|
||||
| x :: t when n > 0 -> sub (x :: acc) (pred n) t
|
||||
| _ -> List.rev acc
|
||||
in
|
||||
let candidates =
|
||||
List.map
|
||||
(fun (_, l) -> List.rev l)
|
||||
(M.bindings (compute_candidates suggs given))
|
||||
in
|
||||
List.concat candidates |> sub [] max_elements
|
||||
|
||||
let format ppf suggs =
|
||||
let open Format in
|
||||
let pp_elt elt = fprintf ppf "@{<yellow>\"%s\"@}" elt in
|
||||
let rec loop = function
|
||||
| [] -> assert false
|
||||
| [h] ->
|
||||
pp_elt h;
|
||||
pp_print_string ppf "?"
|
||||
| [h; t] ->
|
||||
pp_elt h;
|
||||
fprintf ppf "@ or@ ";
|
||||
loop [t]
|
||||
| h :: t ->
|
||||
pp_elt h;
|
||||
fprintf ppf ",@ ";
|
||||
loop t
|
||||
in
|
||||
match suggs with
|
||||
| [] -> ()
|
||||
| _ :: _ ->
|
||||
Format.pp_print_string ppf "Maybe you wanted to write : ";
|
||||
Format.pp_print_list
|
||||
~pp_sep:(fun ppf () -> Format.fprintf ppf ",@ or ")
|
||||
(fun ppf string -> Format.fprintf ppf "@{<yellow>\"%s\"@}" string)
|
||||
ppf suggestions_list;
|
||||
Format.pp_print_string ppf " ?"
|
||||
| suggs ->
|
||||
pp_print_string ppf "Maybe you wanted to write: ";
|
||||
loop suggs
|
||||
|
@ -15,9 +15,20 @@
|
||||
License for the specific language governing permissions and limitations under
|
||||
the License. *)
|
||||
|
||||
val suggestion_minimum_levenshtein_distance_association :
|
||||
string list -> string -> string list
|
||||
(**Returns a list of the closest words into {!name:candidates} to the keyword
|
||||
{!name:keyword}*)
|
||||
val levenshtein_distance : string -> string -> int
|
||||
(** [levenshtein_distance w1 w2] computes the levenshtein distance separating
|
||||
[w1] from [w2]. *)
|
||||
|
||||
val best_candidates : string list -> string -> string list
|
||||
(** [best_candidates suggestions word] returns the subset of elements in
|
||||
[suggestions] that minimize the levenshtein distance to [word]. Multiple
|
||||
candidates that have a same distance is possible. *)
|
||||
|
||||
val sorted_candidates :
|
||||
?max_elements:int -> string list -> string -> string list
|
||||
(** [sorted_candidates ?max_elements suggestions word] sorts the [suggestions]
|
||||
list and retain at most [max_elements] (defaults to 5). This list is ordered
|
||||
by their levenshtein distance to [word], i.e., the first elements are the
|
||||
most similar. *)
|
||||
|
||||
val format : Format.formatter -> string list -> unit
|
||||
|
@ -138,8 +138,7 @@ let raise_error_cons_not_found
|
||||
(constructor : string Mark.pos) =
|
||||
let constructors = Ident.Map.keys ctxt.local.constructor_idmap in
|
||||
let closest_constructors =
|
||||
Suggestions.suggestion_minimum_levenshtein_distance_association constructors
|
||||
(Mark.remove constructor)
|
||||
Suggestions.best_candidates constructors (Mark.remove constructor)
|
||||
in
|
||||
Message.error
|
||||
~pos_msg:(fun ppf -> Format.fprintf ppf "Here is your code :")
|
||||
|
@ -618,7 +618,10 @@ and typecheck_expr_top_down :
|
||||
"Variable @{<yellow>%s@} is not a declared output of scope %a."
|
||||
field A.ScopeName.format scope_out
|
||||
~suggestion:
|
||||
(List.map A.StructField.to_string (A.StructField.Map.keys str))
|
||||
(Suggestions.sorted_candidates
|
||||
(List.map A.StructField.to_string
|
||||
(A.StructField.Map.keys str))
|
||||
field)
|
||||
| None ->
|
||||
Message.error
|
||||
~extra_pos:
|
||||
@ -629,7 +632,10 @@ and typecheck_expr_top_down :
|
||||
"Field@ @{<yellow>\"%s\"@}@ does@ not@ belong@ to@ structure@ \
|
||||
@{<yellow>\"%a\"@}."
|
||||
field A.StructName.format name
|
||||
~suggestion:(A.Ident.Map.keys ctx.ctx_struct_fields))
|
||||
~suggestion:
|
||||
(Suggestions.sorted_candidates
|
||||
(A.Ident.Map.keys ctx.ctx_struct_fields)
|
||||
field))
|
||||
in
|
||||
try A.StructName.Map.find name candidate_structs
|
||||
with A.StructName.Map.Not_found _ ->
|
||||
|
@ -105,15 +105,16 @@ module ParserAux (LocalisedLexer : Lexer_common.LocalisedLexer) = struct
|
||||
|
||||
let sorted_candidate_tokens lexbuf token_list env =
|
||||
let acceptable_tokens =
|
||||
List.filter
|
||||
(fun (_, t) ->
|
||||
I.acceptable (I.input_needed env) t (fst (lexing_positions lexbuf)))
|
||||
List.filter_map
|
||||
(fun ((_, t) as elt) ->
|
||||
if I.acceptable (I.input_needed env) t (fst (lexing_positions lexbuf))
|
||||
then Some elt
|
||||
else None)
|
||||
token_list
|
||||
in
|
||||
let lexeme = Utf8.lexeme lexbuf in
|
||||
let similar_acceptable_tokens =
|
||||
Suggestions.suggestion_minimum_levenshtein_distance_association
|
||||
(List.map (fun (s, _) -> s) acceptable_tokens)
|
||||
(Utf8.lexeme lexbuf)
|
||||
Suggestions.best_candidates (List.map fst acceptable_tokens) lexeme
|
||||
in
|
||||
let module S = Set.Make (String) in
|
||||
let s_toks = S.of_list similar_acceptable_tokens in
|
||||
|
@ -24,7 +24,7 @@ $ catala test-scope A
|
||||
│ 8 │ definition wrong_definition = 1
|
||||
│ │ ‾
|
||||
│
|
||||
│ Maybe you wanted to write : "." ?
|
||||
│ Maybe you wanted to write: "."?
|
||||
└─
|
||||
#return code 123#
|
||||
```
|
||||
|
57
tests/default/bad/verbose_errors.catala_en
Normal file
57
tests/default/bad/verbose_errors.catala_en
Normal file
@ -0,0 +1,57 @@
|
||||
###Article
|
||||
|
||||
```catala
|
||||
declaration structure S:
|
||||
data field0 content integer
|
||||
data field1 content integer
|
||||
data field2 content integer
|
||||
data field3 content integer
|
||||
data field4 content integer
|
||||
data field5 content integer
|
||||
data field6 content integer
|
||||
data field7 content integer
|
||||
data field8 content integer
|
||||
data field9 content integer
|
||||
data field10 content integer
|
||||
data field11 content integer
|
||||
data field12 content integer
|
||||
data field13 content integer
|
||||
data field14 content integer
|
||||
data field15 content integer
|
||||
data field16 content integer
|
||||
data field17 content integer
|
||||
data field18 content integer
|
||||
data field19 content integer
|
||||
|
||||
declaration scope A:
|
||||
input wrong_definition content S
|
||||
output o content integer
|
||||
|
||||
scope A:
|
||||
definition o equals wrong_definition.field20
|
||||
```
|
||||
|
||||
```catala-test-inline
|
||||
$ catala test-scope A
|
||||
┌─[ERROR]─
|
||||
│
|
||||
│ Field "field20" does not belong to structure "S".
|
||||
│
|
||||
├─➤ tests/default/bad/verbose_errors.catala_en:31.22-31.46:
|
||||
│ │
|
||||
│ 31 │ definition o equals wrong_definition.field20
|
||||
│ │ ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾
|
||||
├─ Article
|
||||
│
|
||||
│ Structure definition
|
||||
├─➤ tests/default/bad/verbose_errors.catala_en:4.23-4.24:
|
||||
│ │
|
||||
│ 4 │ declaration structure S:
|
||||
│ │ ‾
|
||||
├─ Article
|
||||
│
|
||||
│ Maybe you wanted to write: "field0", "field2", "field10", "field1" or
|
||||
│ "field3"?
|
||||
└─
|
||||
#return code 123#
|
||||
```
|
@ -29,7 +29,7 @@ $ catala test-scope A
|
||||
│ │ ‾‾‾‾‾
|
||||
├─ Article
|
||||
│
|
||||
│ Maybe you wanted to write : "Case1", or "Case2" ?
|
||||
│ Maybe you wanted to write: "Case1" or "Case2"?
|
||||
└─
|
||||
#return code 123#
|
||||
```
|
||||
|
@ -25,7 +25,7 @@ $ catala Typecheck
|
||||
│ │ ‾‾‾‾‾
|
||||
├─ Article
|
||||
│
|
||||
│ Maybe you wanted to write : "Case1" ?
|
||||
│ Maybe you wanted to write: "Case1"?
|
||||
└─
|
||||
#return code 123#
|
||||
```
|
||||
|
@ -30,7 +30,7 @@ $ catala test-scope A
|
||||
│ 12 │ definitoin i equals 3.
|
||||
│ │ ‾‾‾‾‾‾‾‾‾‾
|
||||
│
|
||||
│ Maybe you wanted to write : "definition" ?
|
||||
│ Maybe you wanted to write: "definition"?
|
||||
└─
|
||||
|
||||
┌─[ERROR (2/2)]─
|
||||
@ -46,7 +46,7 @@ $ catala test-scope A
|
||||
│ 16 │ definition i equal money of (decimal of x)
|
||||
│ │ ‾‾‾‾‾
|
||||
│
|
||||
│ Maybe you wanted to write : "equals" ?
|
||||
│ Maybe you wanted to write: "equals"?
|
||||
└─
|
||||
#return code 123#
|
||||
```
|
||||
|
@ -21,7 +21,7 @@ $ catala test-scope A
|
||||
│ 5 │ output i contents decimal
|
||||
│ │ ‾‾‾‾‾‾‾‾
|
||||
│
|
||||
│ Maybe you wanted to write : "content" ?
|
||||
│ Maybe you wanted to write: "content"?
|
||||
└─
|
||||
#return code 123#
|
||||
```
|
||||
|
@ -31,7 +31,7 @@ $ catala dcalc -s Titi
|
||||
│ 2 │ declaration scope Toto:
|
||||
│ │ ‾‾‾‾
|
||||
│
|
||||
│ Maybe you wanted to write : "bar", or "baz", or "foo" ?
|
||||
│ Maybe you wanted to write: "bar", "baz" or "foo"?
|
||||
└─
|
||||
#return code 123#
|
||||
```
|
||||
|
Loading…
Reference in New Issue
Block a user