(** Computes the levenshtein distance between two strings, used to provide error
messages suggestions *)
let levenshtein_distance (s : string) (t : string) : int =
let three_way_minimum a b c = min a (min b c) in
let m = String.length s and n = String.length t in
(* for all i and j, d.(i).(j) will hold the Levenshtein distance between the
first i characters of s and the first j characters of t *)
let d = Array.make_matrix (m + 1) (n + 1) 0 in
for i = 0 to m do
d.(i).(0) <- i
(* the distance of any first string to an empty second string *)
for j = 0 to n do
d.(0).(j) <- j
(* the distance of any second string to an empty first string *)
for j = 1 to n do
for i = 1 to m do
if s.[i - 1] = t.[j - 1] then d.(i).(j) <- d.(i - 1).(j - 1)
(* no operation required *)
d.(i).(j) <-
(d.(i - 1).(j) + 1) (* a deletion *)
(d.(i).(j - 1) + 1) (* an insertion *)
(d.(i - 1).(j - 1) + 1) (* a substitution *)
(*We create a list composed by strings that satisfy the following rule : they
have the same levenshtein distance, which is the minimum distance between the
reference word "keyword" and all the strings in "string_list" (with the
condition that this minimum is equal to or less than one third of the length
of keyword + 1, in order to get suggestions close to "keyword")*)
let rec suggestion_minimum_levenshtein_distance_association
(string_list : string list)
(keyword : string) : string list option =
let rec strings_minimum_levenshtein_distance
(minimum : int)
(result : string list)
(string_list' : string list) : string list =
(*As we iterate through the "string_list'" list, we create a list "result"
with all strings that have the last minimum levenshtein distance found
match string_list' with
(*When a new minimum levenshtein distance is found, the new result list is
our new element "current_string" followed by strings that have the same
minimum distance. It will be the "result" list if there is no levenshtein
distance smaller than this new minimum.*)
| current_string :: tail ->
let current_levenshtein_distance =
levenshtein_distance current_string keyword
if current_levenshtein_distance < minimum then
strings_minimum_levenshtein_distance current_levenshtein_distance
[current_string] tail
(*The "result" list is updated (we append "current_string" to "result")
when a new string shares the same minimum levenshtein distance
else if current_levenshtein_distance = minimum then
strings_minimum_levenshtein_distance minimum
(result @ [current_string])
(*If a levenshtein distance greater than the minimum is found, "result"
doesn't change*)
else strings_minimum_levenshtein_distance minimum result tail
(*The "result" list is returned at the end of the "string_list'" list.*)
| [] -> result
let suggestions =
(1 + (String.length keyword / 3))
(*In order to select suggestions that are not too far away from the
[] string_list
match suggestions with [] -> None | _ :: _ -> Some suggestions
let display (suggestions_list : string list) (ppf : Format.formatter) =
match suggestions_list with
| [] -> ()
| _ :: _ ->
Format.pp_print_string ppf "Maybe you wanted to write : ";
~pp_sep:(fun ppf () -> Format.fprintf ppf ",@,or ")
(fun ppf string -> Format.fprintf ppf "@{<yellow>\"%s\"@}" string)
ppf suggestions_list