From 36c8a7effc83b9d9e8cb889eda076f5caade3ec6 Mon Sep 17 00:00:00 2001 From: Louis Gesbert Date: Mon, 18 Mar 2024 17:38:10 +0100 Subject: [PATCH] Sort strings in natural order when they contain numbers Seeing results sorted as 1 10 11 2 doesn't look nice. --- compiler/catala_utils/string.ml | 32 ++++++++++++++++++++++++++++++++ compiler/catala_utils/string.mli | 3 +++ 2 files changed, 35 insertions(+) diff --git a/compiler/catala_utils/string.ml b/compiler/catala_utils/string.ml index 57311837..44dc5e6f 100644 --- a/compiler/catala_utils/string.ml +++ b/compiler/catala_utils/string.ml @@ -68,7 +68,39 @@ module Arg = struct include Stdlib.String let format = format + + let compare s1 s2 = + let len1 = length s1 in + let len2 = length s2 in + let int c = int_of_char c - int_of_char '0' in + let rec readnum acc s i = + if i >= length s then acc, i + else + match get s i with + | '0' .. '9' as c -> readnum ((acc * 10) + int c) s (i + 1) + | _ -> acc, i + in + let rec aux i1 i2 = + if i1 >= len1 then if i2 >= len2 then 0 else -1 + else if i2 >= len2 then 1 + else + match get s1 i1, get s2 i2 with + | ('0' .. '9' as c1), ('0' .. '9' as c2) -> ( + let x1, i1' = readnum (int c1) s1 (i1 + 1) in + let x2, i2' = readnum (int c2) s2 (i2 + 1) in + match Int.compare x1 x2 with + | 0 -> ( + match Int.compare (i1' - i1) (i2' - i2) with + | 0 -> aux i1' i2' + | n -> n) + | n -> n) + | c1, c2 -> ( + match Char.compare c1 c2 with 0 -> aux (i1 + 1) (i2 + 1) | n -> n) + in + aux 0 0 end +let compare = Arg.compare + module Set = Set.Make (Arg) module Map = Map.Make (Arg) diff --git a/compiler/catala_utils/string.mli b/compiler/catala_utils/string.mli index 7c9abf74..ee7248be 100644 --- a/compiler/catala_utils/string.mli +++ b/compiler/catala_utils/string.mli @@ -20,6 +20,9 @@ module Map : Map.S with type key = string (** Helper functions used for string manipulation. *) +val compare : string -> string -> int +(** String comparison with natural ordering of numbers within strings *) + val to_ascii : string -> string (** Removes all non-ASCII diacritics from a string by converting them to their base letter in the Latin alphabet. *)