From e33ea8d55c0616498eaa8a98c5852340ad1c5af6 Mon Sep 17 00:00:00 2001 From: Denis Merigoux Date: Mon, 14 Dec 2020 18:09:38 +0100 Subject: [PATCH] Finished documentation --- Makefile | 9 +++-- README.md | 7 +++- src/catala/catala_surface/surface.mld | 3 ++ src/catala/default_calculus/ast.ml | 8 ++++ src/catala/default_calculus/dcalc.mld | 41 +++++++++++++++++++- src/catala/default_calculus/interpreter.ml | 28 ++++++++----- src/catala/default_calculus/typing.ml | 16 ++++++++ src/catala/index.mld | 9 ++++- src/catala/literate_programming/dune | 4 ++ src/catala/literate_programming/html.ml | 12 +++++- src/catala/literate_programming/latex.ml | 11 ++++++ src/catala/literate_programming/literate.mld | 9 +++++ src/catala/scope_language/ast.ml | 2 + src/catala/scope_language/scopelang.mld | 6 ++- src/catala/utils/dune | 4 ++ src/catala/utils/errors.ml | 7 ++++ src/catala/utils/uid.mli | 10 +++++ src/catala/utils/utils.mld | 38 ++++++++++++++++++ 18 files changed, 206 insertions(+), 18 deletions(-) create mode 100644 src/catala/literate_programming/literate.mld create mode 100644 src/catala/utils/utils.mld diff --git a/Makefile b/Makefile index 3347bf4d..8aadc270 100644 --- a/Makefile +++ b/Makefile @@ -44,10 +44,11 @@ build: $(MAKE) format dune build -doc: build +doc: dune build @doc + ln -sf $(PWD)/_build/default/_doc/_html/index.html doc/odoc.html -install: build +install: dune build @install ########################################## @@ -175,5 +176,5 @@ inspect: # Special targets ########################################## .PHONY: inspect clean all literate_examples english allocations_familiales pygments \ - install build format install-dependencies install-dependencies-ocaml \ - catala.html + install build doc format install-dependencies install-dependencies-ocaml \ + catala.html diff --git a/README.md b/README.md index 02661c79..fe8333aa 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,12 @@ See [the dedicated readme](doc/formalization/README.md). ### Compiler documentation -See [the dedicated readme](src/README.md). +The compiler documentation is auto-generated from its source code using +`dune` and `odoc`. Use + + make doc + +to generate the documentation, then open the `doc/odoc.html` file in any browser. ## License diff --git a/src/catala/catala_surface/surface.mld b/src/catala/catala_surface/surface.mld index 394f656e..4cdf7de1 100644 --- a/src/catala/catala_surface/surface.mld +++ b/src/catala/catala_surface/surface.mld @@ -8,6 +8,9 @@ The module describing the abstract syntax tree is: {!modules: Surface.Ast} +This representation can also be weaved into literate programming outputs +using the {{:literate.html} literate programming modules}. + {1 Lexing } Relevant modules: diff --git a/src/catala/default_calculus/ast.ml b/src/catala/default_calculus/ast.ml index 36c83bd2..5080a7dc 100644 --- a/src/catala/default_calculus/ast.ml +++ b/src/catala/default_calculus/ast.ml @@ -15,6 +15,10 @@ module Pos = Utils.Pos module Uid = Utils.Uid +(** Abstract syntax tree for the default calculus *) + +(** {1 Abstract syntax tree} *) + type typ_lit = TBool | TUnit | TInt | TRat | TMoney | TDate | TDuration type typ = @@ -68,6 +72,8 @@ type unop = type operator = Binop of binop | Unop of unop +(** The expressions use the {{:https://lepigre.fr/ocaml-bindlib/} Bindlib} library, based on + higher-order abstract syntax*) type expr = | EVar of expr Bindlib.var Pos.marked | ETuple of (expr Pos.marked * Uid.MarkedString.info option) list @@ -86,6 +92,8 @@ type expr = | EDefault of expr Pos.marked * expr Pos.marked * expr Pos.marked list | EIfThenElse of expr Pos.marked * expr Pos.marked * expr Pos.marked +(** {1 Variable helpers} *) + module Var = struct type t = expr Bindlib.var diff --git a/src/catala/default_calculus/dcalc.mld b/src/catala/default_calculus/dcalc.mld index 9c7bf6da..8fba41c9 100644 --- a/src/catala/default_calculus/dcalc.mld +++ b/src/catala/default_calculus/dcalc.mld @@ -1 +1,40 @@ -{0 Default calculus} \ No newline at end of file +{0 Default calculus} + +This representation is the fourth in the compilation chain +(see {{: index.html#architecture} Architecture}). Its main difference +with the previous {{: desugared.html} desugared representation} is that scopes +have been lowered into regular functions, and enums and structs have been +lowered to sum and product types. + +The module describing the abstract syntax tree is: + +{!modules: Dcalc.Ast} + +Printing helpers can be found in {!module: Dcalc.Print}. + +This intermediate representation corresponds to the default calculus +presented in the {{: https://github.com/CatalaLang/catala/raw/master/doc/formalization/formalization.pdf} +Catala formalization}. + +{1 Typing } + +Related modules: + +{!modules: Dcalc.Typing} + +This representation is where the typing is performed. Indeed, {!module: Dcalc.Typing} +implements the classical {{: https://en.wikipedia.org/wiki/Hindley%E2%80%93Milner_type_system} W algorithm} +corresponding to a Hindley-Milner type system, without type constraints. + +{1 Interpreter} + +Related modules: + +{!modules: Dcalc.Interpreter} + +Since this representation is currently the last of the compilation chain, +an {!module: Dcalc.Interpreter} module is provided to match the execution +semantics of the default calculus. + +Later, translations to a regular lambda calculus and/or a simple imperative +language are bound to be added. \ No newline at end of file diff --git a/src/catala/default_calculus/interpreter.ml b/src/catala/default_calculus/interpreter.ml index 3d4420bc..3ecc5afc 100644 --- a/src/catala/default_calculus/interpreter.ml +++ b/src/catala/default_calculus/interpreter.ml @@ -12,14 +12,30 @@ or implied. See the License for the specific language governing permissions and limitations under the License. *) +(** Reference interpreter for the default calculus *) + module Pos = Utils.Pos module Errors = Utils.Errors module Cli = Utils.Cli module A = Ast +(** {1 Helpers} *) + let is_empty_error (e : A.expr Pos.marked) : bool = match Pos.unmark e with ELit LEmptyError -> true | _ -> false +let empty_thunked_term : Ast.expr Pos.marked = + let silent = Ast.Var.make ("_", Pos.no_pos) in + Bindlib.unbox + (Ast.make_abs + (Array.of_list [ silent ]) + (Bindlib.box (Ast.ELit Ast.LEmptyError, Pos.no_pos)) + Pos.no_pos + [ (Ast.TLit Ast.TUnit, Pos.no_pos) ] + Pos.no_pos) + +(** {1 Evaluation} *) + let evaluate_operator (op : A.operator Pos.marked) (args : A.expr Pos.marked list) : A.expr Pos.marked = Pos.same_pos_as @@ -289,16 +305,10 @@ let rec evaluate_expr (e : A.expr Pos.marked) : A.expr Pos.marked = term was well-typed)" (Pos.get_position e') ) -let empty_thunked_term : Ast.expr Pos.marked = - let silent = Ast.Var.make ("_", Pos.no_pos) in - Bindlib.unbox - (Ast.make_abs - (Array.of_list [ silent ]) - (Bindlib.box (Ast.ELit Ast.LEmptyError, Pos.no_pos)) - Pos.no_pos - [ (Ast.TLit Ast.TUnit, Pos.no_pos) ] - Pos.no_pos) +(** {1 API} *) +(** Interpret a program. This function expects an expression typed as a function whose argument are + all thunked. The function is executed by providing for each argument a thunked empty default. *) let interpret_program (e : Ast.expr Pos.marked) : (Ast.Var.t * Ast.expr Pos.marked) list = match Pos.unmark (evaluate_expr e) with | Ast.EAbs (_, binder, taus) -> ( diff --git a/src/catala/default_calculus/typing.ml b/src/catala/default_calculus/typing.ml index 2a58f21f..9640a43a 100644 --- a/src/catala/default_calculus/typing.ml +++ b/src/catala/default_calculus/typing.ml @@ -20,6 +20,10 @@ module Errors = Utils.Errors module A = Ast module Cli = Utils.Cli +(** {1 Types and unification} *) + +(** We do not reuse {!type: Dcalc.Ast.typ} because we have to include a new [TAny] variant. Indeed, + error terms can have any type and this has to be captured by the type sytem. *) type typ = | TLit of A.typ_lit | TArrow of typ Pos.marked UnionFind.elem * typ Pos.marked UnionFind.elem @@ -42,6 +46,7 @@ let rec format_typ (fmt : Format.formatter) (ty : typ Pos.marked UnionFind.elem) ts | TArrow (t1, t2) -> Format.fprintf fmt "%a → %a" format_typ t1 format_typ t2 +(** Raises an error if unification cannot be performed *) let rec unify (t1 : typ Pos.marked UnionFind.elem) (t2 : typ Pos.marked UnionFind.elem) : unit = (* Cli.debug_print (Format.asprintf "Unifying %a and %a" format_typ t1 format_typ t2); *) let t1_repr = UnionFind.get (UnionFind.find t1) in @@ -76,6 +81,9 @@ let rec unify (t1 : typ Pos.marked UnionFind.elem) (t2 : typ Pos.marked UnionFin (Some (Format.asprintf "Type %a coming from expression:" format_typ t2), t2_pos); ] +(** Operators have a single type, instead of being polymorphic with constraints. This allows us to + have a simpler type system, while we argue the syntactic burden of operator annotations helps + the programmer visualize the type flow in the code. *) let op_type (op : A.operator Pos.marked) : typ Pos.marked UnionFind.elem = let pos = Pos.get_position op in let bt = UnionFind.make (TLit TBool, pos) in @@ -134,8 +142,11 @@ let rec typ_to_ast (ty : typ Pos.marked UnionFind.elem) : A.typ Pos.marked = | TAny -> A.TLit A.TUnit) (UnionFind.get (UnionFind.find ty)) +(** {1 Double-directed typing} *) + type env = typ Pos.marked A.VarMap.t +(** Infers the most permissive type from an expression *) let rec typecheck_expr_bottom_up (env : env) (e : A.expr Pos.marked) : typ Pos.marked UnionFind.elem = match Pos.unmark e with @@ -244,6 +255,7 @@ let rec typecheck_expr_bottom_up (env : env) (e : A.expr Pos.marked) : typ Pos.m typecheck_expr_top_down env e' (UnionFind.make (Pos.same_pos_as (TLit TBool) e')); UnionFind.make (Pos.same_pos_as (TLit TUnit) e') +(** Checks whether the expression can be typed with the provided type *) and typecheck_expr_top_down (env : env) (e : A.expr Pos.marked) (tau : typ Pos.marked UnionFind.elem) : unit = match Pos.unmark e with @@ -372,9 +384,13 @@ and typecheck_expr_top_down (env : env) (e : A.expr Pos.marked) typecheck_expr_top_down env e' (UnionFind.make (Pos.same_pos_as (TLit TBool) e')); unify tau (UnionFind.make (Pos.same_pos_as (TLit TUnit) e')) +(** {1 API} *) + +(* Infer the type of an expression *) let infer_type (e : A.expr Pos.marked) : A.typ Pos.marked = let ty = typecheck_expr_bottom_up A.VarMap.empty e in typ_to_ast ty +(** Typechecks an expression given an expected type *) let check_type (e : A.expr Pos.marked) (tau : A.typ Pos.marked) = typecheck_expr_top_down A.VarMap.empty e (UnionFind.make (Pos.map_under_mark ast_to_typ tau)) diff --git a/src/catala/index.mld b/src/catala/index.mld index 25a82ab9..38401fb8 100644 --- a/src/catala/index.mld +++ b/src/catala/index.mld @@ -61,6 +61,13 @@ More documentation can be found on each intermediate representations here. {li {{: dcalc.html} The default calculus}} } +The main compilation chain is defined in: + +{!modules: Catala.Driver} + Last, two more modules contain additional features for the compiler: -{!modules: Utils Literate } \ No newline at end of file +{ul +{li {{: literate.html} Literate programming}} +{li {{: utils.html} Compiler utilities}} +} \ No newline at end of file diff --git a/src/catala/literate_programming/dune b/src/catala/literate_programming/dune index 8925f858..7a7de45b 100644 --- a/src/catala/literate_programming/dune +++ b/src/catala/literate_programming/dune @@ -2,3 +2,7 @@ (name literate) (public_name catala.literate) (libraries re utils surface)) + +(documentation + (package catala) + (mld_files literate)) diff --git a/src/catala/literate_programming/html.ml b/src/catala/literate_programming/html.ml index 68b28c83..61bad4fd 100644 --- a/src/catala/literate_programming/html.ml +++ b/src/catala/literate_programming/html.ml @@ -23,17 +23,24 @@ module P = Printf module R = Re.Pcre module C = Cli +(** {1 Helpers} *) + +(** Converts double lines into HTML newlines. *) let pre_html (s : string) = let s = String.trim s in let doublenewline = R.regexp "\n\n" in let s = R.substitute ~rex:doublenewline ~subst:(fun _ -> "
\n") s in s +(** Raise an error if pygments cannot be found *) let raise_failed_pygments (command : string) (error_code : int) : 'a = Errors.raise_error (Printf.sprintf "Weaving to HTML failed: pygmentize command \"%s\" returned with error code %d" command error_code) +(** Usage: [wrap_html source_files custom_pygments language fmt wrapped] + + Prints an HTML complete page structure around the [wrapped] content. *) let wrap_html (source_files : string list) (custom_pygments : string option) (language : Cli.backend_lang) (fmt : Format.formatter) (wrapped : Format.formatter -> unit) : unit = @@ -91,6 +98,7 @@ let wrap_html (source_files : string list) (custom_pygments : string option) source_files)); wrapped fmt +(** Performs syntax highlighting on a piece of code by using Pygments and the special Catala lexer. *) let pygmentize_code (c : string Pos.marked) (language : C.backend_lang) (custom_pygments : string option) : string = C.debug_print (Printf.sprintf "Pygmenting the code chunk %s" (Pos.to_string (Pos.get_position c))); @@ -125,7 +133,7 @@ let pygmentize_code (c : string Pos.marked) (language : C.backend_lang) close_in oc; output -type program_state = InsideArticle | OutsideArticle +(** {1 Weaving} *) let law_article_item_to_html (custom_pygments : string option) (language : C.backend_lang) (fmt : Format.formatter) (i : A.law_article_item) : unit = @@ -189,6 +197,8 @@ let program_item_to_html (custom_pygments : string option) (language : C.backend (fmt : Format.formatter) (i : A.program_item) : unit = match i with A.LawStructure s -> law_structure_to_html custom_pygments language fmt s +(** {1 API} *) + let ast_to_html (custom_pygments : string option) (language : C.backend_lang) (fmt : Format.formatter) (program : A.program) : unit = Format.pp_print_list diff --git a/src/catala/literate_programming/latex.ml b/src/catala/literate_programming/latex.ml index 416dc8a8..d1252eab 100644 --- a/src/catala/literate_programming/latex.ml +++ b/src/catala/literate_programming/latex.ml @@ -22,6 +22,9 @@ module A = Surface.Ast module R = Re.Pcre module C = Cli +(** {1 Helpers} *) + +(** Espaces various LaTeX-sensitive characters *) let pre_latexify (s : string) = let percent = R.regexp "%" in let s = R.substitute ~rex:percent ~subst:(fun _ -> "\\%") s in @@ -33,6 +36,9 @@ let pre_latexify (s : string) = let s = R.substitute ~rex:underscore ~subst:(fun _ -> "\\_") s in s +(** Usage: [wrap_latex source_files custom_pygments language fmt wrapped] + + Prints an LaTeX complete documùent structure around the [wrapped] content. *) let wrap_latex (source_files : string list) (custom_pygments : string option) (language : C.backend_lang) (fmt : Format.formatter) (wrapped : Format.formatter -> unit) = Format.fprintf fmt @@ -107,6 +113,7 @@ let wrap_latex (source_files : string list) (custom_pygments : string option) wrapped fmt; Format.fprintf fmt "\n\n\\end{document}" +(** Replaces math operators by their nice unicode counterparts *) let math_syms_replace (c : string) : string = let date = "\\d\\d/\\d\\d/\\d\\d\\d\\d" in let syms = R.regexp (date ^ "|!=|<=|>=|--|->|\\*|/") in @@ -122,6 +129,8 @@ let math_syms_replace (c : string) : string = in R.substitute ~rex:syms ~subst:syms2cmd c +(** {1 Weaving} *) + let law_article_item_to_latex (language : C.backend_lang) (fmt : Format.formatter) (i : A.law_article_item) : unit = match i with @@ -187,6 +196,8 @@ let program_item_to_latex (language : C.backend_lang) (fmt : Format.formatter) ( : unit = match i with A.LawStructure law_s -> law_structure_to_latex language fmt law_s +(** {1 API} *) + let ast_to_latex (language : C.backend_lang) (fmt : Format.formatter) (program : A.program) : unit = Format.pp_print_list ~pp_sep:(fun fmt () -> Format.fprintf fmt "\n\n") diff --git a/src/catala/literate_programming/literate.mld b/src/catala/literate_programming/literate.mld new file mode 100644 index 00000000..f76aff78 --- /dev/null +++ b/src/catala/literate_programming/literate.mld @@ -0,0 +1,9 @@ +{0 Literate programming} + +Related modules: + +{!modules: Literate.Html Literate.Latex} + +These module take the {{:surface.html} surface representation} of the Catala +program and process it into different literate programming outputs, like +an HTML page or a LaTeX document. \ No newline at end of file diff --git a/src/catala/scope_language/ast.ml b/src/catala/scope_language/ast.ml index b219f074..1d8b69bd 100644 --- a/src/catala/scope_language/ast.ml +++ b/src/catala/scope_language/ast.ml @@ -81,6 +81,8 @@ type typ = | TEnum of EnumName.t | TArrow of typ Pos.marked * typ Pos.marked +(** The expressions use the {{:https://lepigre.fr/ocaml-bindlib/} Bindlib} library, based on + higher-order abstract syntax*) type expr = | ELocation of location | EVar of expr Bindlib.var Pos.marked diff --git a/src/catala/scope_language/scopelang.mld b/src/catala/scope_language/scopelang.mld index addaf813..e5fc89bf 100644 --- a/src/catala/scope_language/scopelang.mld +++ b/src/catala/scope_language/scopelang.mld @@ -1,6 +1,6 @@ {0 The scope language } -This representation is the second in the compilation chain +This representation is the third in the compilation chain (see {{: index.html#architecture} Architecture}). Its main difference with the previous {{: desugared.html} desugared representation} is that inside a scope, the definitions are ordered according to their computational @@ -13,6 +13,10 @@ The module describing the abstract syntax tree is: Printing helpers can be found in {!module: Scopelang.Print}. +This intermediate representation corresponds to the scope language +presented in the {{: https://github.com/CatalaLang/catala/raw/master/doc/formalization/formalization.pdf} +Catala formalization}. + {1 Translation to the default calculus} Related modules: diff --git a/src/catala/utils/dune b/src/catala/utils/dune index ee8dc89b..6c2f8ca2 100644 --- a/src/catala/utils/dune +++ b/src/catala/utils/dune @@ -2,3 +2,7 @@ (name utils) (public_name catala.utils) (libraries cmdliner dune-build-info ANSITerminal)) + +(documentation + (package catala) + (mld_files utils)) diff --git a/src/catala/utils/errors.ml b/src/catala/utils/errors.ml index 7f4e5407..5bb220f6 100644 --- a/src/catala/utils/errors.ml +++ b/src/catala/utils/errors.ml @@ -14,7 +14,12 @@ (** Error formatting and helper functions *) +(** {1 Error exception and printing} *) + exception StructuredError of (string * (string option * Pos.t) list) +(** The payload of the expression is a main error message, with a list of secondary positions + related to the error, each carrying an optional secondary message to describe what is pointed by + the position. *) let print_structured_error (msg : string) (pos : (string option * Pos.t) list) : string = Printf.sprintf "%s%s%s" msg @@ -27,6 +32,8 @@ let print_structured_error (msg : string) (pos : (string option * Pos.t) list) : (Pos.retrieve_loc_text pos)) pos)) +(** {1 Error exception and printing} *) + let raise_spanned_error (msg : string) ?(span_msg : string option) (span : Pos.t) : 'a = raise (StructuredError (msg, [ (span_msg, span) ])) diff --git a/src/catala/utils/uid.mli b/src/catala/utils/uid.mli index 7c9c8242..f8ec123c 100644 --- a/src/catala/utils/uid.mli +++ b/src/catala/utils/uid.mli @@ -12,6 +12,9 @@ or implied. See the License for the specific language governing permissions and limitations under the License. *) +(** Global identifiers factories using a generative functor *) + +(** The information carried in global identifiers *) module type Info = sig type info @@ -19,7 +22,11 @@ module type Info = sig end module MarkedString : Info with type info = string Pos.marked +(** The only kind of information carried in Catala identifiers is the original string of the + identifier annotated with the position where it is declared or used. *) +(** Identifiers have abstract types, but are comparable so they can be used as keys in maps or sets. + Their underlying information can be retrieved at any time. *) module type Id = sig type t @@ -36,4 +43,7 @@ module type Id = sig val hash : t -> int end +(** This is the generative functor that ensures that two modules resulting from two different calls + to [Make] will be viewed as different types [t] by the OCaml typechecker. Prevents mixing up + different sorts of identifiers. *) module Make (X : Info) () : Id with type info = X.info diff --git a/src/catala/utils/utils.mld b/src/catala/utils/utils.mld new file mode 100644 index 00000000..ba7a92a1 --- /dev/null +++ b/src/catala/utils/utils.mld @@ -0,0 +1,38 @@ +{0 Compiler utilities} + +{1 Unique identifiers} + +Related modules: + +{!modules: Utils.Uid} + +In {{: desugared.html} the desugared representation} or in the +{{: scopelang.html} the scope language}, a number of things are named using +global identifiers. These identifiers use OCaml's type system to statically +distinguish e.g. a scope identifier from a struct identifier. + +The {!module: Utils.Uid} module provides a generative functor whose output is +a fresh sort of global identifiers. + +{1 Source code positions} + +Related modules: + +{!modules: Utils.Pos} + +This module is used throughout the compiler to annotate the abstract syntax +trees with information about the position of the element in the original source +code. These annotations are critical to produce readable error messages. + +{1 Error messages} + +Related modules: + +{!modules: Utils.Errors} + +Error handling is critical in a compiler. The Catala compiler uses an architecture +of error messages inspired by the Rust compiler, where error messages all +correspond to the same exception. This exception carries messages and positions +that are displayed in the end in a nicely-formatted error message. + +Hence, all error thrown by the compiler should use {!module: Utils.Errors} \ No newline at end of file