2022-08-03 18:02:13 +03:00
|
|
|
(* This file is part of the Catala compiler, a specification language for tax
|
|
|
|
and social benefits computation rules. Copyright (C) 2020 Inria, contributor:
|
|
|
|
Denis Merigoux <denis.merigoux@inria.fr>, Emile Rolley <emile.rolley@tuta.io>
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
|
|
use this file except in compliance with the License. You may obtain a copy of
|
|
|
|
the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
|
|
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
|
|
License for the specific language governing permissions and limitations under
|
|
|
|
the License. *)
|
|
|
|
|
2022-11-21 13:17:42 +03:00
|
|
|
include Stdlib.String
|
|
|
|
|
2022-08-03 18:02:13 +03:00
|
|
|
let to_ascii : string -> string = Ubase.from_utf8
|
2022-11-24 20:00:45 +03:00
|
|
|
let is_uppercase_ascii = function 'A' .. 'Z' -> true | _ -> false
|
2022-08-03 18:02:13 +03:00
|
|
|
|
|
|
|
let begins_with_uppercase (s : string) : bool =
|
2022-11-21 13:17:42 +03:00
|
|
|
"" <> s && is_uppercase_ascii (get (to_ascii s) 0)
|
2022-08-03 18:02:13 +03:00
|
|
|
|
|
|
|
let to_snake_case (s : string) : string =
|
2024-03-05 19:59:38 +03:00
|
|
|
let out = Buffer.create (2 * length s) in
|
|
|
|
s
|
|
|
|
|> to_ascii
|
2022-11-21 13:17:42 +03:00
|
|
|
|> iteri (fun i c ->
|
2024-08-30 16:00:13 +03:00
|
|
|
if is_uppercase_ascii c && 0 <> i && get s (i-1) <> '_' then Buffer.add_char out '_';
|
2024-03-05 19:59:38 +03:00
|
|
|
Buffer.add_char out (Char.lowercase_ascii c));
|
|
|
|
Buffer.contents out
|
2022-08-03 18:02:13 +03:00
|
|
|
|
|
|
|
let to_camel_case (s : string) : string =
|
2024-03-05 19:59:38 +03:00
|
|
|
let last_was_underscore = ref true in
|
|
|
|
let out = Buffer.create (length s) in
|
|
|
|
s
|
|
|
|
|> to_ascii
|
|
|
|
|> iter (function
|
|
|
|
| '_' -> last_was_underscore := true
|
|
|
|
| c ->
|
|
|
|
Buffer.add_char out
|
|
|
|
(if !last_was_underscore then Char.uppercase_ascii c else c);
|
|
|
|
last_was_underscore := false);
|
|
|
|
Buffer.contents out
|
2022-11-22 22:57:59 +03:00
|
|
|
|
Add overloaded operators for the common operations
This uses the same disambiguation mechanism put in place for
structures, calling the typer on individual rules on the desugared AST
to propagate types, in order to resolve ambiguous operators like `+`
to their strongly typed counterparts (`+!`, `+.`, `+$`, `+@`, `+$`) in
the translation to scopelang.
The patch includes some normalisation of the definition of all the
operators, and classifies them based on their typing policy instead of
their arity. It also adds a little more flexibility:
- a couple new operators, like `-` on date and duration
- optional type annotation on some aggregation constructions
The `Shared_ast` lib is also lightly restructured, with the `Expr`
module split into `Type`, `Operator` and `Expr`.
2022-11-29 11:47:53 +03:00
|
|
|
let remove_prefix ~prefix s =
|
|
|
|
if starts_with ~prefix s then
|
|
|
|
let plen = length prefix in
|
|
|
|
sub s plen (length s - plen)
|
|
|
|
else s
|
|
|
|
|
2024-06-21 16:41:44 +03:00
|
|
|
let trim_end s =
|
|
|
|
let rec stop n =
|
|
|
|
if n < 0 then n
|
|
|
|
else
|
|
|
|
match get s n with
|
|
|
|
| ' ' | '\x0c' | '\n' | '\r' | '\t' -> stop (n - 1)
|
|
|
|
| _ -> n
|
|
|
|
in
|
|
|
|
let last = length s - 1 in
|
|
|
|
let i = stop last in
|
|
|
|
if i = last then s else sub s 0 (i + 1)
|
|
|
|
|
2023-07-07 15:48:53 +03:00
|
|
|
(* Note: this should do, but remains incorrect for combined unicode characters
|
|
|
|
that display as one (e.g. `e` + postfix `'`). We should switch to Uuseg at
|
|
|
|
some poing *)
|
|
|
|
let width s =
|
|
|
|
let len = length s in
|
|
|
|
let rec aux ncols i =
|
|
|
|
if i >= len then ncols
|
|
|
|
else if get s i = '\t' then aux (ncols + 8) (i + 1)
|
|
|
|
else aux (ncols + 1) (i + Uchar.utf_decode_length (get_utf_8_uchar s i))
|
|
|
|
in
|
|
|
|
aux 0 0
|
|
|
|
|
2023-07-12 12:48:46 +03:00
|
|
|
let format ppf s = Format.pp_print_as ppf (width s) s
|
2023-07-11 18:10:00 +03:00
|
|
|
|
2023-07-12 12:48:46 +03:00
|
|
|
module Arg = struct
|
|
|
|
include Stdlib.String
|
|
|
|
|
|
|
|
let format = format
|
2024-03-18 19:38:10 +03:00
|
|
|
|
|
|
|
let compare s1 s2 =
|
|
|
|
let len1 = length s1 in
|
|
|
|
let len2 = length s2 in
|
|
|
|
let int c = int_of_char c - int_of_char '0' in
|
|
|
|
let rec readnum acc s i =
|
|
|
|
if i >= length s then acc, i
|
|
|
|
else
|
|
|
|
match get s i with
|
|
|
|
| '0' .. '9' as c -> readnum ((acc * 10) + int c) s (i + 1)
|
|
|
|
| _ -> acc, i
|
|
|
|
in
|
|
|
|
let rec aux i1 i2 =
|
|
|
|
if i1 >= len1 then if i2 >= len2 then 0 else -1
|
|
|
|
else if i2 >= len2 then 1
|
|
|
|
else
|
|
|
|
match get s1 i1, get s2 i2 with
|
|
|
|
| ('0' .. '9' as c1), ('0' .. '9' as c2) -> (
|
|
|
|
let x1, i1' = readnum (int c1) s1 (i1 + 1) in
|
|
|
|
let x2, i2' = readnum (int c2) s2 (i2 + 1) in
|
|
|
|
match Int.compare x1 x2 with
|
|
|
|
| 0 -> (
|
|
|
|
match Int.compare (i1' - i1) (i2' - i2) with
|
|
|
|
| 0 -> aux i1' i2'
|
|
|
|
| n -> n)
|
|
|
|
| n -> n)
|
|
|
|
| c1, c2 -> (
|
|
|
|
match Char.compare c1 c2 with 0 -> aux (i1 + 1) (i2 + 1) | n -> n)
|
|
|
|
in
|
|
|
|
aux 0 0
|
2023-07-12 12:48:46 +03:00
|
|
|
end
|
|
|
|
|
2024-03-18 19:38:10 +03:00
|
|
|
let compare = Arg.compare
|
2024-05-24 15:26:44 +03:00
|
|
|
let hash t = Hash.raw t
|
2024-03-18 19:38:10 +03:00
|
|
|
|
2023-07-12 12:48:46 +03:00
|
|
|
module Set = Set.Make (Arg)
|
|
|
|
module Map = Map.Make (Arg)
|