refactor(compiler): remove the camomile dependency due to the new Utils.String_common module based on Ubase

This commit is contained in:
Emile Rolley 2022-08-03 17:02:13 +02:00
parent 03aebf7f1c
commit d85812109c
18 changed files with 151 additions and 212 deletions

View File

@ -1,29 +1,29 @@
{ lib
, pkgs
, fetchFromGitHub
, buildDunePackage
, alcotest
, ansiterminal
, sedlex
, menhir
, unionfind
, bindlib
, cmdliner_1_1_0
, re
, zarith
, zarith_stubs_js
, ocamlgraph
, calendar
, visitors
, benchmark
, bindlib
, buildDunePackage
, calendar
, cmdliner_1_1_0
, cppo
, fetchFromGitHub
, js_of_ocaml
, js_of_ocaml-ppx
, camomile
, cppo
, ppx_deriving
, z3
, alcotest
, ppx_yojson_conv
, menhir
, menhirLib ? null #for nixos-unstable compatibility.
, ocamlgraph
, pkgs
, ppx_deriving
, ppx_yojson_conv
, re
, sedlex
, ubase
, unionfind
, visitors
, z3
, zarith
, zarith_stubs_js
}:
buildDunePackage rec {
@ -37,34 +37,30 @@ buildDunePackage rec {
useDune2 = true;
propagatedBuildInputs = [
alcotest
ansiterminal
sedlex
menhir
menhirLib
cmdliner_1_1_0
re
zarith
zarith_stubs_js
ocamlgraph
calendar
visitors
benchmark
bindlib
calendar
camomile
cmdliner_1_1_0
cppo
js_of_ocaml
js_of_ocaml-ppx
ppx_yojson_conv
camomile
cppo
z3
menhir
menhirLib
ocamlgraph
pkgs.z3
ppx_deriving
alcotest
ppx_yojson_conv
re
sedlex
ubase
unionfind
bindlib
visitors
z3
zarith
zarith_stubs_js
] ++ (if isNull menhirLib then [ ] else [ menhirLib ]);
doCheck = true;

View File

@ -21,7 +21,6 @@ depends: [
"benchmark" {>= "1.6"}
"bindlib" {>= "5.0.1"}
"calendar" {>= "2.04"}
"camomile" {>= "1.0.2"}
"cmdliner" {>= "1.1.0"}
"cppo" {>= "1"}
"dune" {>= "2.8"}

View File

@ -1,7 +1,7 @@
(library
(name dcalc)
(public_name catala.dcalc)
(libraries bindlib unionFind utils re camomile catala.runtime_ocaml)
(libraries bindlib unionFind utils re ubase catala.runtime_ocaml)
(preprocess
(pps visitors.ppx)))

View File

@ -16,22 +16,11 @@
open Utils
open Ast
open String_common
let typ_needs_parens (e : typ) : bool =
match e with TArrow _ | TArray _ -> true | _ -> false
let is_uppercase (x : CamomileLibraryDefault.Camomile.UChar.t) : bool =
try
match CamomileLibraryDefault.Camomile.UCharInfo.general_category x with
| `Ll -> false
| `Lu -> true
| _ -> false
with _ -> true
let begins_with_uppercase (s : string) : bool =
let first_letter = CamomileLibraryDefault.Camomile.UTF8.get s 0 in
is_uppercase first_letter
let format_uid_list
(fmt : Format.formatter)
(infos : Uid.MarkedString.info list) : unit =

View File

@ -18,11 +18,6 @@
open Utils
(** {1 Helpers} *)
val is_uppercase : CamomileLibraryDefault.Camomile.UChar.t -> bool
val begins_with_uppercase : string -> bool
(** {1 Common syntax highlighting helpers}*)
val format_base_type : Format.formatter -> string -> unit

View File

@ -1,81 +0,0 @@
(* This file is part of the Catala compiler, a specification language for tax
and social benefits computation rules. Copyright (C) 2021 Inria, contributor:
Denis Merigoux <denis.merigoux@inria.fr>
Licensed under the Apache License, Version 2.0 (the "License"); you may not
use this file except in compliance with the License. You may obtain a copy of
the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and limitations under
the License. *)
let to_ascii (s : string) : string =
let out = ref "" in
CamomileLibraryDefault.Camomile.UTF8.iter
(fun c ->
let code = CamomileLibraryDefault.Camomile.UChar.uint_code c in
out :=
!out
^
match code with
| 0xc7 -> "C"
| 0xe7 -> "c"
| c when c >= 0xc0 && c <= 0xc6 -> "A"
| c when c >= 0xe0 && c <= 0xe6 -> "a"
| c when c >= 0xc8 && c <= 0xcb -> "E"
| c when c >= 0xe8 && c <= 0xeb -> "e"
| c when c >= 0xcc && c <= 0xcf -> "I"
| c when c >= 0xec && c <= 0xef -> "i"
| c when c >= 0xd2 && c <= 0xd6 -> "O"
| c when c >= 0xf2 && c <= 0xf6 -> "o"
| c when c >= 0xd9 && c <= 0xdc -> "U"
| c when c >= 0xf9 && c <= 0xfc -> "u"
| _ ->
if code > 128 then "_"
else String.make 1 (CamomileLibraryDefault.Camomile.UChar.char_of c))
s;
!out
let to_lowercase (s : string) : string =
let is_first = ref true in
let out = ref "" in
CamomileLibraryDefault.Camomile.UTF8.iter
(fun c ->
let is_uppercase = Dcalc.Print.is_uppercase c in
out :=
!out
^ (if is_uppercase && not !is_first then "_" else "")
^ String.lowercase_ascii
(String.make 1 (CamomileLibraryDefault.Camomile.UChar.char_of c));
is_first := false)
s;
!out
let to_uppercase (s : string) : string =
let last_was_underscore = ref false in
let is_first = ref true in
let out = ref "" in
CamomileLibraryDefault.Camomile.UTF8.iter
(fun c ->
let is_underscore =
c = CamomileLibraryDefault.Camomile.UChar.of_char '_'
in
let c_string =
String.make 1 (CamomileLibraryDefault.Camomile.UChar.char_of c)
in
out :=
!out
^
if is_underscore then ""
else if !last_was_underscore || !is_first then
String.uppercase_ascii c_string
else c_string;
last_was_underscore := is_underscore;
is_first := false)
s;
!out

View File

@ -1,27 +0,0 @@
(* This file is part of the Catala compiler, a specification language for tax
and social benefits computation rules. Copyright (C) 2021 Inria, contributor:
Denis Merigoux <denis.merigoux@inria.fr>
Licensed under the Apache License, Version 2.0 (the "License"); you may not
use this file except in compliance with the License. You may obtain a copy of
the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and limitations under
the License. *)
(** Helper functions common to all Catala compiler backends *)
val to_ascii : string -> string
(** Removes all non-ASCII diacritics from a string by converting them to their
base letter in the Latin alphabet *)
val to_lowercase : string -> string
(** Converts CamlCase into snake_case *)
val to_uppercase : string -> string
(** Convertes snake_case into CamlCase *)

View File

@ -1,7 +1,7 @@
(library
(name lcalc)
(public_name catala.lcalc)
(libraries bindlib dcalc scopelang catala.runtime_ocaml)
(libraries bindlib ubase dcalc scopelang catala.runtime_ocaml)
(preprocess
(pps visitors.ppx)))

View File

@ -17,18 +17,6 @@
open Utils
open Ast
let is_uppercase (x : CamomileLibraryDefault.Camomile.UChar.t) : bool =
try
match CamomileLibraryDefault.Camomile.UCharInfo.general_category x with
| `Ll -> false
| `Lu -> true
| _ -> false
with _ -> true
let begins_with_uppercase (s : string) : bool =
let first_letter = CamomileLibraryDefault.Camomile.UTF8.get s 0 in
is_uppercase first_letter
(** {b Note:} (EmileRolley) seems to be factorizable with
Dcalc.Print.format_lit. *)
let format_lit (fmt : Format.formatter) (l : lit Marked.pos) : unit =

View File

@ -16,11 +16,6 @@
open Utils
(** {1 Helpers} *)
val is_uppercase : CamomileLibraryDefault.Camomile.UChar.t -> bool
val begins_with_uppercase : string -> bool
(** {1 Formatters} *)
val format_lit : Format.formatter -> Ast.lit Marked.pos -> unit

View File

@ -16,7 +16,7 @@
open Utils
open Ast
open Backends
open String_common
module D = Dcalc.Ast
let find_struct (s : D.StructName.t) (ctx : D.decl_ctx) :
@ -148,7 +148,7 @@ let avoid_keywords (s : string) : string =
let format_struct_name (fmt : Format.formatter) (v : Dcalc.Ast.StructName.t) :
unit =
Format.asprintf "%a" Dcalc.Ast.StructName.format_t v
|> to_ascii |> to_lowercase |> avoid_keywords |> Format.fprintf fmt "%s"
|> to_ascii |> to_snake_case |> avoid_keywords |> Format.fprintf fmt "%s"
let format_to_module_name
(fmt : Format.formatter)
@ -156,7 +156,7 @@ let format_to_module_name
(match name with
| `Ename v -> Format.asprintf "%a" D.EnumName.format_t v
| `Sname v -> Format.asprintf "%a" D.StructName.format_t v)
|> to_ascii |> to_lowercase |> avoid_keywords |> String.split_on_char '_'
|> to_ascii |> to_snake_case |> avoid_keywords |> String.split_on_char '_'
|> List.map String.capitalize_ascii
|> String.concat "" |> Format.fprintf fmt "%s"
@ -175,7 +175,7 @@ let format_enum_name (fmt : Format.formatter) (v : Dcalc.Ast.EnumName.t) : unit
=
Format.fprintf fmt "%s"
(avoid_keywords
(to_lowercase
(to_snake_case
(to_ascii (Format.asprintf "%a" Dcalc.Ast.EnumName.format_t v))))
let format_enum_cons_name
@ -237,7 +237,7 @@ let rec format_typ (fmt : Format.formatter) (typ : Dcalc.Ast.typ Marked.pos) :
| TAny -> Format.fprintf fmt "_"
let format_var (fmt : Format.formatter) (v : 'm var) : unit =
let lowercase_name = to_lowercase (to_ascii (Bindlib.name_of v)) in
let lowercase_name = to_snake_case (to_ascii (Bindlib.name_of v)) in
let lowercase_name =
Re.Pcre.substitute ~rex:(Re.Pcre.regexp "\\.")
~subst:(fun _ -> "_dot_")
@ -246,10 +246,12 @@ let format_var (fmt : Format.formatter) (v : 'm var) : unit =
let lowercase_name = avoid_keywords (to_ascii lowercase_name) in
if
List.mem lowercase_name ["handle_default"; "handle_default_opt"]
|| Dcalc.Print.begins_with_uppercase (Bindlib.name_of v)
|| begins_with_uppercase (Bindlib.name_of v)
then Format.fprintf fmt "%s" lowercase_name
else if lowercase_name = "_" then Format.fprintf fmt "%s" lowercase_name
else Format.fprintf fmt "%s_" lowercase_name
else (
Cli.debug_print "lowercase_name: %s " lowercase_name;
Format.fprintf fmt "%s_" lowercase_name)
let needs_parens (e : 'm marked_expr) : bool =
match Marked.unmark e with

View File

@ -184,7 +184,7 @@ let check_exceeding_lines
(content : string) =
content |> String.split_on_char '\n'
|> List.iteri (fun i s ->
if CamomileLibrary.UTF8.length s > max_len then (
if String.length s > max_len then (
Cli.warning_print "The line %s in %s is exceeding %s characters:"
(Cli.with_style
ANSITerminal.[Bold; yellow]

View File

@ -19,9 +19,9 @@
the associated [js_of_ocaml] wrapper. *)
open Utils
open String_common
open Lcalc
open Lcalc.Ast
open Lcalc.Backends
open Lcalc.To_ocaml
module D = Dcalc.Ast
@ -42,7 +42,7 @@ module To_jsoo = struct
(v : Dcalc.Ast.StructFieldName.t) : unit =
let s =
Format.asprintf "%a" Dcalc.Ast.StructFieldName.format_t v
|> to_ascii |> to_lowercase |> avoid_keywords |> to_camel_case
|> to_ascii |> to_snake_case |> avoid_keywords |> to_camel_case
in
Format.fprintf fmt "%s" s
@ -128,14 +128,14 @@ module To_jsoo = struct
let format_var_camel_case (fmt : Format.formatter) (v : 'm var) : unit =
let lowercase_name =
Bindlib.name_of v |> to_ascii |> to_lowercase
Bindlib.name_of v |> to_ascii |> to_snake_case
|> Re.Pcre.substitute ~rex:(Re.Pcre.regexp "\\.") ~subst:(fun _ ->
"_dot_")
|> to_ascii |> avoid_keywords |> to_camel_case
in
if
List.mem lowercase_name ["handle_default"; "handle_default_opt"]
|| Dcalc.Print.begins_with_uppercase (Bindlib.name_of v)
|| begins_with_uppercase (Bindlib.name_of v)
then Format.fprintf fmt "%s" lowercase_name
else if lowercase_name = "_" then Format.fprintf fmt "%s" lowercase_name
else Format.fprintf fmt "%s_" lowercase_name

View File

@ -21,8 +21,8 @@ let name = "json_schema"
let extension = "_schema.json"
open Utils
open String_common
open Lcalc.Ast
open Lcalc.Backends
open Lcalc.To_ocaml
module D = Dcalc.Ast
@ -40,7 +40,7 @@ module To_json = struct
(v : Dcalc.Ast.StructFieldName.t) : unit =
let s =
Format.asprintf "%a" Dcalc.Ast.StructFieldName.format_t v
|> to_ascii |> to_lowercase |> avoid_keywords |> to_camel_case
|> to_ascii |> to_snake_case |> avoid_keywords |> to_camel_case
in
Format.fprintf fmt "%s" s

View File

@ -17,7 +17,7 @@
open Utils
open Ast
open Lcalc.Backends
open String_common
module Runtime = Runtime_ocaml.Runtime
module D = Dcalc.Ast
module L = Lcalc.Ast
@ -131,7 +131,7 @@ let format_struct_name (fmt : Format.formatter) (v : Dcalc.Ast.StructName.t) :
unit =
Format.fprintf fmt "%s"
(avoid_keywords
(to_uppercase
(to_camel_case
(to_ascii (Format.asprintf "%a" Dcalc.Ast.StructName.format_t v))))
let format_struct_field_name
@ -145,7 +145,7 @@ let format_enum_name (fmt : Format.formatter) (v : Dcalc.Ast.EnumName.t) : unit
=
Format.fprintf fmt "%s"
(avoid_keywords
(to_uppercase
(to_camel_case
(to_ascii (Format.asprintf "%a" Dcalc.Ast.EnumName.format_t v))))
let format_enum_cons_name
@ -193,14 +193,9 @@ let rec format_typ (fmt : Format.formatter) (typ : Dcalc.Ast.typ Marked.pos) :
| TAny -> Format.fprintf fmt "Any"
let format_name_cleaned (fmt : Format.formatter) (s : string) : unit =
let lowercase_name = to_lowercase (to_ascii s) in
let lowercase_name =
Re.Pcre.substitute ~rex:(Re.Pcre.regexp "\\.")
~subst:(fun _ -> "_dot_")
lowercase_name
in
let lowercase_name = avoid_keywords (to_ascii lowercase_name) in
Format.fprintf fmt "%s" lowercase_name
s |> to_ascii |> to_snake_case
|> Re.Pcre.substitute ~rex:(Re.Pcre.regexp "\\.") ~subst:(fun _ -> "_dot_")
|> to_ascii |> avoid_keywords |> Format.fprintf fmt "%s"
module StringMap = Map.Make (String)
module IntMap = Map.Make (Int)

View File

@ -1,7 +1,7 @@
(library
(name utils)
(public_name catala.utils)
(libraries cmdliner ANSITerminal re))
(libraries cmdliner ubase ANSITerminal re))
(documentation
(package catala)

View File

@ -0,0 +1,51 @@
(* This file is part of the Catala compiler, a specification language for tax
and social benefits computation rules. Copyright (C) 2020 Inria, contributor:
Denis Merigoux <denis.merigoux@inria.fr>, Emile Rolley <emile.rolley@tuta.io>
Licensed under the Apache License, Version 2.0 (the "License"); you may not
use this file except in compliance with the License. You may obtain a copy of
the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and limitations under
the License. *)
let to_ascii : string -> string = Ubase.from_utf8
let is_uppercase (c : char) : bool =
let c = Char.code c in
0x41 <= c && c <= 0x5b
let begins_with_uppercase (s : string) : bool =
if "" = s then false else is_uppercase (to_ascii s).[0]
let to_snake_case (s : string) : string =
let out = ref "" in
to_ascii s
|> String.iteri (fun i c ->
out :=
!out
^ (if is_uppercase c && 0 <> i then "_" else "")
^ String.lowercase_ascii (String.make 1 c));
!out
let to_camel_case (s : string) : string =
let last_was_underscore = ref false in
let out = ref "" in
to_ascii s
|> String.iteri (fun i c ->
let is_underscore = c = '_' in
let c_string = String.make 1 c in
out :=
!out
^
if is_underscore then ""
else if !last_was_underscore || 0 = i then
String.uppercase_ascii c_string
else c_string;
last_was_underscore := is_underscore);
!out

View File

@ -0,0 +1,37 @@
(* This file is part of the Catala compiler, a specification language for tax
and social benefits computation rules. Copyright (C) 2020 Inria, contributor:
Denis Merigoux <denis.merigoux@inria.fr>, Emile Rolley <emile.rolley@tuta.io>
Licensed under the Apache License, Version 2.0 (the "License"); you may not
use this file except in compliance with the License. You may obtain a copy of
the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and limitations under
the License. *)
(** Helper functions manipulating strings common to all Catala compiler
internals. *)
val to_ascii : string -> string
(** Removes all non-ASCII diacritics from a string by converting them to their
base letter in the Latin alphabet. *)
val is_uppercase : char -> bool
(** [is_uppercase c] returns if [c] is the set ['A'...'Z']. *)
val begins_with_uppercase : string -> bool
(** [begins_with_uppercase s] returns if the first letter of [s] is
[is_uppercase]. If [s] is empty returns false. *)
val to_snake_case : string -> string
(** Converts CamlCase into snake_case after removing Remove all diacritics on
Latin letters. *)
val to_camel_case : string -> string
(** Converts snake_case into CamlCase after removing Remove all diacritics on
Latin letters. *)