2020-11-23 13:42:29 +03:00
|
|
|
(* This file is part of the Catala compiler, a specification language for tax
|
|
|
|
and social benefits computation rules. Copyright (C) 2020 Inria, contributor:
|
|
|
|
Denis Merigoux <denis.merigoux@inria.fr>
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
|
|
use this file except in compliance with the License. You may obtain a copy of
|
|
|
|
the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
|
|
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
|
|
License for the specific language governing permissions and limitations under
|
|
|
|
the License. *)
|
|
|
|
|
2020-12-14 20:09:38 +03:00
|
|
|
(** Global identifiers factories using a generative functor *)
|
|
|
|
|
|
|
|
(** The information carried in global identifiers *)
|
2020-11-23 13:42:29 +03:00
|
|
|
module type Info = sig
|
|
|
|
type info
|
|
|
|
|
2022-08-17 17:14:14 +03:00
|
|
|
val to_string : info -> string
|
2022-11-21 12:46:17 +03:00
|
|
|
val format : Format.formatter -> info -> unit
|
2022-08-25 13:09:51 +03:00
|
|
|
|
|
|
|
val equal : info -> info -> bool
|
|
|
|
(** Equality disregards position *)
|
|
|
|
|
|
|
|
val compare : info -> info -> int
|
|
|
|
(** Comparison disregards position *)
|
2024-05-24 15:26:44 +03:00
|
|
|
|
|
|
|
val hash : info -> Hash.t
|
|
|
|
(** Hashing disregards position *)
|
2020-11-23 13:42:29 +03:00
|
|
|
end
|
|
|
|
|
2023-05-17 16:44:57 +03:00
|
|
|
module MarkedString : Info with type info = string Mark.pos
|
2020-12-14 20:09:38 +03:00
|
|
|
(** The only kind of information carried in Catala identifiers is the original
|
|
|
|
string of the identifier annotated with the position where it is declared or
|
|
|
|
used. *)
|
2020-11-23 13:42:29 +03:00
|
|
|
|
2020-12-14 20:09:38 +03:00
|
|
|
(** Identifiers have abstract types, but are comparable so they can be used as
|
|
|
|
keys in maps or sets. Their underlying information can be retrieved at any
|
|
|
|
time. *)
|
2020-11-23 13:42:29 +03:00
|
|
|
module type Id = sig
|
|
|
|
type t
|
|
|
|
type info
|
|
|
|
|
|
|
|
val fresh : info -> t
|
|
|
|
val get_info : t -> info
|
|
|
|
val compare : t -> t -> int
|
2022-08-22 19:53:30 +03:00
|
|
|
val equal : t -> t -> bool
|
2023-07-12 12:48:46 +03:00
|
|
|
val format : Format.formatter -> t -> unit
|
2023-11-20 18:01:06 +03:00
|
|
|
val to_string : t -> string
|
2024-05-24 15:26:44 +03:00
|
|
|
|
|
|
|
val id : t -> int
|
|
|
|
(** Returns the unique ID of the identifier *)
|
|
|
|
|
|
|
|
val hash : t -> Hash.t
|
|
|
|
(** While [id] returns a unique ID valable for a given Uid instance within a
|
|
|
|
given run of catala, this is a raw hash of the identifier string.
|
|
|
|
Therefore, it may collide within a given program, but remains meaninful
|
|
|
|
across separate compilations. *)
|
2022-11-21 11:57:41 +03:00
|
|
|
|
2022-11-21 12:12:45 +03:00
|
|
|
module Set : Set.S with type elt = t
|
|
|
|
module Map : Map.S with type key = t
|
2020-11-23 13:42:29 +03:00
|
|
|
end
|
|
|
|
|
2023-09-01 11:43:46 +03:00
|
|
|
(** Used to define a consistent specific style when printing the different kinds
|
|
|
|
of uids *)
|
|
|
|
module type Style = sig
|
|
|
|
val style : Ocolor_types.style
|
|
|
|
end
|
|
|
|
|
2020-12-14 20:09:38 +03:00
|
|
|
(** This is the generative functor that ensures that two modules resulting from
|
|
|
|
two different calls to [Make] will be viewed as different types [t] by the
|
|
|
|
OCaml typechecker. Prevents mixing up different sorts of identifiers. *)
|
2023-11-20 18:01:06 +03:00
|
|
|
module Make (X : Info) (_ : Style) () : Id with type info = X.info
|
2022-11-21 12:11:51 +03:00
|
|
|
|
2022-11-21 12:12:45 +03:00
|
|
|
(** Shortcut for creating a kind of uids over marked strings *)
|
2023-11-20 18:01:06 +03:00
|
|
|
module Gen (_ : Style) () : Id with type info = MarkedString.info
|
2023-08-30 18:49:29 +03:00
|
|
|
|
|
|
|
(** {2 Handling of Uids with additional path information} *)
|
|
|
|
|
2023-11-20 18:01:06 +03:00
|
|
|
module Module : Id with type info = MarkedString.info
|
2023-08-30 18:49:29 +03:00
|
|
|
|
|
|
|
module Path : sig
|
|
|
|
type t = Module.t list
|
|
|
|
|
|
|
|
val to_string : t -> string
|
|
|
|
val format : Format.formatter -> t -> unit
|
|
|
|
val equal : t -> t -> bool
|
|
|
|
val compare : t -> t -> int
|
2024-05-24 18:24:14 +03:00
|
|
|
|
|
|
|
val strip : t -> t -> t
|
|
|
|
(** [strip pfx p] removed [pfx] from the start of [p]. if [p] doesn't start
|
|
|
|
with [pfx], it is returned unchanged *)
|
2023-08-30 18:49:29 +03:00
|
|
|
end
|
|
|
|
|
|
|
|
(** Same as [Gen] but also registers path information *)
|
2023-11-20 18:01:06 +03:00
|
|
|
module Gen_qualified (_ : Style) () : sig
|
2023-08-30 18:49:29 +03:00
|
|
|
include Id with type info = Path.t * MarkedString.info
|
|
|
|
|
|
|
|
val fresh : Path.t -> MarkedString.info -> t
|
|
|
|
val path : t -> Path.t
|
|
|
|
val get_info : t -> MarkedString.info
|
Implement safe renaming of idents for backend printing
Previously we had some heuristics in the backends trying to achieve this with a
lot of holes ; this should be much more solid, relying on `Bindlib` to do the
correct renamings.
**Note1**: it's not plugged into the backends other than OCaml at the moment.
**Note2**: the related, obsolete heuristics haven't been cleaned out yet
**Note3**: we conservatively suppose a single namespace at the moment. This is
required for e.g. Python, but it forces vars named like struct fields to be
renamed, which is more verbose in e.g. OCaml. The renaming engine could be
improved to support different namespaces, with a way to select how to route the
different kinds of identifiers into them.
Similarly, customisation for what needs to be uppercase or lowercase is not
available yet.
**Note4**: besides excluding keywords, we should also be careful to exclude (or
namespace):
- the idents used in the runtime (e.g. `o_add_int_int`)
- the dynamically generated idents (e.g. `embed_*`)
**Note5**: module names themselves aren't handled yet. The reason is that they
must be discoverable by the user, and even need to match the filenames, etc. In
other words, imagine that `Mod` is a keyword in the target language. You can't
rename a module called `Mod` to `Mod1` without knowing the whole module context,
because that would destroy the mapping for a module already called `Mod1`.
A reliable solution would be to translate all module names to e.g.
`CatalaModule_*`, which we can assume will never conflict with any built-in, and
forbid idents starting with that prefix. We may also want to restrict their
names to ASCII ? Currently we use a projection, but what if I have two modules
called `Là` and `La` ?
2024-08-05 18:08:36 +03:00
|
|
|
|
2024-05-24 18:24:14 +03:00
|
|
|
val hash : strip:Path.t -> t -> Hash.t
|
Implement safe renaming of idents for backend printing
Previously we had some heuristics in the backends trying to achieve this with a
lot of holes ; this should be much more solid, relying on `Bindlib` to do the
correct renamings.
**Note1**: it's not plugged into the backends other than OCaml at the moment.
**Note2**: the related, obsolete heuristics haven't been cleaned out yet
**Note3**: we conservatively suppose a single namespace at the moment. This is
required for e.g. Python, but it forces vars named like struct fields to be
renamed, which is more verbose in e.g. OCaml. The renaming engine could be
improved to support different namespaces, with a way to select how to route the
different kinds of identifiers into them.
Similarly, customisation for what needs to be uppercase or lowercase is not
available yet.
**Note4**: besides excluding keywords, we should also be careful to exclude (or
namespace):
- the idents used in the runtime (e.g. `o_add_int_int`)
- the dynamically generated idents (e.g. `embed_*`)
**Note5**: module names themselves aren't handled yet. The reason is that they
must be discoverable by the user, and even need to match the filenames, etc. In
other words, imagine that `Mod` is a keyword in the target language. You can't
rename a module called `Mod` to `Mod1` without knowing the whole module context,
because that would destroy the mapping for a module already called `Mod1`.
A reliable solution would be to translate all module names to e.g.
`CatalaModule_*`, which we can assume will never conflict with any built-in, and
forbid idents starting with that prefix. We may also want to restrict their
names to ASCII ? Currently we use a projection, but what if I have two modules
called `Là` and `La` ?
2024-08-05 18:08:36 +03:00
|
|
|
(** [strip] strips that prefix from the start of the path before hashing *)
|
2023-08-30 18:49:29 +03:00
|
|
|
end
|