Define Catala_utils.String as an overlay to stdlib string

This commit is contained in:
Louis Gesbert 2022-11-21 11:17:42 +01:00
parent 660e5775de
commit 9fc4c0c10c
8 changed files with 39 additions and 43 deletions

View File

@ -14,39 +14,38 @@
License for the specific language governing permissions and limitations under
the License. *)
include Stdlib.String
let to_ascii : string -> string = Ubase.from_utf8
let is_uppercase_ascii (c : char) : bool =
let c = Char.code c in
(* 'A' <= c && c <= 'Z' *)
0x41 <= c && c <= 0x5b
let is_uppercase_ascii = function 'A'..'Z' -> true | _ -> false
let begins_with_uppercase (s : string) : bool =
if "" = s then false else is_uppercase_ascii (to_ascii s).[0]
"" <> s && is_uppercase_ascii (get (to_ascii s) 0)
let to_snake_case (s : string) : string =
let out = ref "" in
to_ascii s
|> String.iteri (fun i c ->
|> iteri (fun i c ->
out :=
!out
^ (if is_uppercase_ascii c && 0 <> i then "_" else "")
^ String.lowercase_ascii (String.make 1 c));
^ lowercase_ascii (make 1 c));
!out
let to_camel_case (s : string) : string =
let last_was_underscore = ref false in
let out = ref "" in
to_ascii s
|> String.iteri (fun i c ->
|> iteri (fun i c ->
let is_underscore = c = '_' in
let c_string = String.make 1 c in
let c_string = make 1 c in
out :=
!out
^
if is_underscore then ""
else if !last_was_underscore || 0 = i then
String.uppercase_ascii c_string
uppercase_ascii c_string
else c_string;
last_was_underscore := is_underscore);
!out

View File

@ -14,6 +14,8 @@
License for the specific language governing permissions and limitations under
the License. *)
include module type of Stdlib.String
(** Helper functions used for string manipulation. *)
val to_ascii : string -> string

View File

@ -17,7 +17,6 @@
open Catala_utils
open Shared_ast
open Ast
open String_common
module D = Dcalc.Ast
let find_struct (s : StructName.t) (ctx : decl_ctx) : typ StructField.Map.t =
@ -141,8 +140,8 @@ let avoid_keywords (s : string) : string =
let format_struct_name (fmt : Format.formatter) (v : StructName.t) : unit =
Format.asprintf "%a" StructName.format_t v
|> to_ascii
|> to_snake_case
|> String.to_ascii
|> String.to_snake_case
|> avoid_keywords
|> Format.fprintf fmt "%s"
@ -152,8 +151,8 @@ let format_to_module_name
(match name with
| `Ename v -> Format.asprintf "%a" EnumName.format_t v
| `Sname v -> Format.asprintf "%a" StructName.format_t v)
|> to_ascii
|> to_snake_case
|> String.to_ascii
|> String.to_snake_case
|> avoid_keywords
|> String.split_on_char '_'
|> List.map String.capitalize_ascii
@ -167,18 +166,18 @@ let format_struct_field_name
| Some sname ->
Format.fprintf fmt "%a.%s" format_to_module_name (`Sname sname)
| None -> Format.fprintf fmt "%s")
(avoid_keywords (to_ascii (Format.asprintf "%a" StructField.format_t v)))
(avoid_keywords (String.to_ascii (Format.asprintf "%a" StructField.format_t v)))
let format_enum_name (fmt : Format.formatter) (v : EnumName.t) : unit =
Format.fprintf fmt "%s"
(avoid_keywords
(to_snake_case (to_ascii (Format.asprintf "%a" EnumName.format_t v))))
(String.to_snake_case (String.to_ascii (Format.asprintf "%a" EnumName.format_t v))))
let format_enum_cons_name (fmt : Format.formatter) (v : EnumConstructor.t) :
unit =
Format.fprintf fmt "%s"
(avoid_keywords
(to_ascii (Format.asprintf "%a" EnumConstructor.format_t v)))
(String.to_ascii (Format.asprintf "%a" EnumConstructor.format_t v)))
let rec typ_embedding_name (fmt : Format.formatter) (ty : typ) : unit =
match Marked.unmark ty with
@ -222,16 +221,16 @@ let rec format_typ (fmt : Format.formatter) (typ : typ) : unit =
| TAny -> Format.fprintf fmt "_"
let format_var (fmt : Format.formatter) (v : 'm Var.t) : unit =
let lowercase_name = to_snake_case (to_ascii (Bindlib.name_of v)) in
let lowercase_name = String.to_snake_case (String.to_ascii (Bindlib.name_of v)) in
let lowercase_name =
Re.Pcre.substitute ~rex:(Re.Pcre.regexp "\\.")
~subst:(fun _ -> "_dot_")
lowercase_name
in
let lowercase_name = avoid_keywords (to_ascii lowercase_name) in
let lowercase_name = avoid_keywords (String.to_ascii lowercase_name) in
if
List.mem lowercase_name ["handle_default"; "handle_default_opt"]
|| begins_with_uppercase (Bindlib.name_of v)
|| String.begins_with_uppercase (Bindlib.name_of v)
then Format.fprintf fmt "%s" lowercase_name
else if lowercase_name = "_" then Format.fprintf fmt "%s" lowercase_name
else (

View File

@ -133,7 +133,7 @@ let pygmentize_code (c : string Marked.pos) (language : C.backend_lang) : string
"html";
"-O";
"style=colorful,anchorlinenos=True,lineanchors=\""
^ String_common.to_ascii (Pos.get_file (Marked.get_mark c))
^ String.to_ascii (Pos.get_file (Marked.get_mark c))
^ "\",linenos=table,linenostart="
^ string_of_int (Pos.get_start_line (Marked.get_mark c));
"-o";
@ -160,7 +160,7 @@ let pygmentize_code (c : string Marked.pos) (language : C.backend_lang) : string
let sanitize_html_href str =
str
|> String_common.to_ascii
|> String.to_ascii
|> R.substitute ~rex:(R.regexp "[' '°\"]") ~subst:(function _ -> "%20")
let rec law_structure_to_html

View File

@ -20,7 +20,6 @@
open Catala_utils
open Shared_ast
open String_common
open Lcalc
open Lcalc.Ast
open Lcalc.To_ocaml
@ -43,8 +42,8 @@ module To_jsoo = struct
(v : StructField.t) : unit =
let s =
Format.asprintf "%a" StructField.format_t v
|> to_ascii
|> to_snake_case
|> String.to_ascii
|> String.to_snake_case
|> avoid_keywords
|> to_camel_case
in
@ -118,17 +117,17 @@ module To_jsoo = struct
let format_var_camel_case (fmt : Format.formatter) (v : 'm Var.t) : unit =
let lowercase_name =
Bindlib.name_of v
|> to_ascii
|> to_snake_case
|> String.to_ascii
|> String.to_snake_case
|> Re.Pcre.substitute ~rex:(Re.Pcre.regexp "\\.") ~subst:(fun _ ->
"_dot_")
|> to_ascii
|> String.to_ascii
|> avoid_keywords
|> to_camel_case
in
if
List.mem lowercase_name ["handle_default"; "handle_default_opt"]
|| begins_with_uppercase (Bindlib.name_of v)
|| String.begins_with_uppercase (Bindlib.name_of v)
then Format.fprintf fmt "%s" lowercase_name
else if lowercase_name = "_" then Format.fprintf fmt "%s" lowercase_name
else Format.fprintf fmt "%s_" lowercase_name

View File

@ -21,7 +21,6 @@ let name = "json_schema"
let extension = "_schema.json"
open Catala_utils
open String_common
open Shared_ast
open Lcalc.Ast
open Lcalc.To_ocaml
@ -41,8 +40,8 @@ module To_json = struct
(v : StructField.t) : unit =
let s =
Format.asprintf "%a" StructField.format_t v
|> to_ascii
|> to_snake_case
|> String.to_ascii
|> String.to_snake_case
|> avoid_keywords
|> to_camel_case
in

View File

@ -18,7 +18,6 @@
open Catala_utils
open Shared_ast
open Ast
open String_common
module Runtime = Runtime_ocaml.Runtime
module D = Dcalc.Ast
module L = Lcalc.Ast
@ -125,23 +124,23 @@ let avoid_keywords (s : string) : string =
let format_struct_name (fmt : Format.formatter) (v : StructName.t) : unit =
Format.fprintf fmt "%s"
(avoid_keywords
(to_camel_case (to_ascii (Format.asprintf "%a" StructName.format_t v))))
(String.to_camel_case (String.to_ascii (Format.asprintf "%a" StructName.format_t v))))
let format_struct_field_name (fmt : Format.formatter) (v : StructField.t) : unit
=
Format.fprintf fmt "%s"
(avoid_keywords (to_ascii (Format.asprintf "%a" StructField.format_t v)))
(avoid_keywords (String.to_ascii (Format.asprintf "%a" StructField.format_t v)))
let format_enum_name (fmt : Format.formatter) (v : EnumName.t) : unit =
Format.fprintf fmt "%s"
(avoid_keywords
(to_camel_case (to_ascii (Format.asprintf "%a" EnumName.format_t v))))
(String.to_camel_case (String.to_ascii (Format.asprintf "%a" EnumName.format_t v))))
let format_enum_cons_name (fmt : Format.formatter) (v : EnumConstructor.t) :
unit =
Format.fprintf fmt "%s"
(avoid_keywords
(to_ascii (Format.asprintf "%a" EnumConstructor.format_t v)))
(String.to_ascii (Format.asprintf "%a" EnumConstructor.format_t v)))
let typ_needs_parens (e : typ) : bool =
match Marked.unmark e with TArrow _ | TArray _ -> true | _ -> false
@ -179,10 +178,10 @@ let rec format_typ (fmt : Format.formatter) (typ : typ) : unit =
let format_name_cleaned (fmt : Format.formatter) (s : string) : unit =
s
|> to_ascii
|> to_snake_case
|> String.to_ascii
|> String.to_snake_case
|> Re.Pcre.substitute ~rex:(Re.Pcre.regexp "\\.") ~subst:(fun _ -> "_dot_")
|> to_ascii
|> String.to_ascii
|> avoid_keywords
|> Format.fprintf fmt "%s"

View File

@ -15,7 +15,6 @@
the License. *)
open Catala_utils
open String_common
open Definitions
let typ_needs_parens (ty : typ) : bool =
@ -27,7 +26,7 @@ let uid_list (fmt : Format.formatter) (infos : Uid.MarkedString.info list) :
~pp_sep:(fun fmt () -> Format.pp_print_char fmt '.')
(fun fmt info ->
Cli.format_with_style
(if begins_with_uppercase (Marked.unmark info) then [ANSITerminal.red]
(if String.begins_with_uppercase (Marked.unmark info) then [ANSITerminal.red]
else [])
fmt
(Uid.MarkedString.to_string info))