Finished documentation

This commit is contained in:
Denis Merigoux 2020-12-14 18:09:38 +01:00
parent ef38a6f5be
commit e33ea8d55c
18 changed files with 206 additions and 18 deletions

View File

@ -44,10 +44,11 @@ build:
$(MAKE) format
dune build
doc: build
doc:
dune build @doc
ln -sf $(PWD)/_build/default/_doc/_html/index.html doc/odoc.html
install: build
install:
dune build @install
##########################################
@ -175,5 +176,5 @@ inspect:
# Special targets
##########################################
.PHONY: inspect clean all literate_examples english allocations_familiales pygments \
install build format install-dependencies install-dependencies-ocaml \
catala.html
install build doc format install-dependencies install-dependencies-ocaml \
catala.html

View File

@ -71,7 +71,12 @@ See [the dedicated readme](doc/formalization/README.md).
### Compiler documentation
See [the dedicated readme](src/README.md).
The compiler documentation is auto-generated from its source code using
`dune` and `odoc`. Use
make doc
to generate the documentation, then open the `doc/odoc.html` file in any browser.
## License

View File

@ -8,6 +8,9 @@ The module describing the abstract syntax tree is:
{!modules: Surface.Ast}
This representation can also be weaved into literate programming outputs
using the {{:literate.html} literate programming modules}.
{1 Lexing }
Relevant modules:

View File

@ -15,6 +15,10 @@
module Pos = Utils.Pos
module Uid = Utils.Uid
(** Abstract syntax tree for the default calculus *)
(** {1 Abstract syntax tree} *)
type typ_lit = TBool | TUnit | TInt | TRat | TMoney | TDate | TDuration
type typ =
@ -68,6 +72,8 @@ type unop =
type operator = Binop of binop | Unop of unop
(** The expressions use the {{:https://lepigre.fr/ocaml-bindlib/} Bindlib} library, based on
higher-order abstract syntax*)
type expr =
| EVar of expr Bindlib.var Pos.marked
| ETuple of (expr Pos.marked * Uid.MarkedString.info option) list
@ -86,6 +92,8 @@ type expr =
| EDefault of expr Pos.marked * expr Pos.marked * expr Pos.marked list
| EIfThenElse of expr Pos.marked * expr Pos.marked * expr Pos.marked
(** {1 Variable helpers} *)
module Var = struct
type t = expr Bindlib.var

View File

@ -1 +1,40 @@
{0 Default calculus}
{0 Default calculus}
This representation is the fourth in the compilation chain
(see {{: index.html#architecture} Architecture}). Its main difference
with the previous {{: desugared.html} desugared representation} is that scopes
have been lowered into regular functions, and enums and structs have been
lowered to sum and product types.
The module describing the abstract syntax tree is:
{!modules: Dcalc.Ast}
Printing helpers can be found in {!module: Dcalc.Print}.
This intermediate representation corresponds to the default calculus
presented in the {{: https://github.com/CatalaLang/catala/raw/master/doc/formalization/formalization.pdf}
Catala formalization}.
{1 Typing }
Related modules:
{!modules: Dcalc.Typing}
This representation is where the typing is performed. Indeed, {!module: Dcalc.Typing}
implements the classical {{: https://en.wikipedia.org/wiki/Hindley%E2%80%93Milner_type_system} W algorithm}
corresponding to a Hindley-Milner type system, without type constraints.
{1 Interpreter}
Related modules:
{!modules: Dcalc.Interpreter}
Since this representation is currently the last of the compilation chain,
an {!module: Dcalc.Interpreter} module is provided to match the execution
semantics of the default calculus.
Later, translations to a regular lambda calculus and/or a simple imperative
language are bound to be added.

View File

@ -12,14 +12,30 @@
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
(** Reference interpreter for the default calculus *)
module Pos = Utils.Pos
module Errors = Utils.Errors
module Cli = Utils.Cli
module A = Ast
(** {1 Helpers} *)
let is_empty_error (e : A.expr Pos.marked) : bool =
match Pos.unmark e with ELit LEmptyError -> true | _ -> false
let empty_thunked_term : Ast.expr Pos.marked =
let silent = Ast.Var.make ("_", Pos.no_pos) in
Bindlib.unbox
(Ast.make_abs
(Array.of_list [ silent ])
(Bindlib.box (Ast.ELit Ast.LEmptyError, Pos.no_pos))
Pos.no_pos
[ (Ast.TLit Ast.TUnit, Pos.no_pos) ]
Pos.no_pos)
(** {1 Evaluation} *)
let evaluate_operator (op : A.operator Pos.marked) (args : A.expr Pos.marked list) :
A.expr Pos.marked =
Pos.same_pos_as
@ -289,16 +305,10 @@ let rec evaluate_expr (e : A.expr Pos.marked) : A.expr Pos.marked =
term was well-typed)"
(Pos.get_position e') )
let empty_thunked_term : Ast.expr Pos.marked =
let silent = Ast.Var.make ("_", Pos.no_pos) in
Bindlib.unbox
(Ast.make_abs
(Array.of_list [ silent ])
(Bindlib.box (Ast.ELit Ast.LEmptyError, Pos.no_pos))
Pos.no_pos
[ (Ast.TLit Ast.TUnit, Pos.no_pos) ]
Pos.no_pos)
(** {1 API} *)
(** Interpret a program. This function expects an expression typed as a function whose argument are
all thunked. The function is executed by providing for each argument a thunked empty default. *)
let interpret_program (e : Ast.expr Pos.marked) : (Ast.Var.t * Ast.expr Pos.marked) list =
match Pos.unmark (evaluate_expr e) with
| Ast.EAbs (_, binder, taus) -> (

View File

@ -20,6 +20,10 @@ module Errors = Utils.Errors
module A = Ast
module Cli = Utils.Cli
(** {1 Types and unification} *)
(** We do not reuse {!type: Dcalc.Ast.typ} because we have to include a new [TAny] variant. Indeed,
error terms can have any type and this has to be captured by the type sytem. *)
type typ =
| TLit of A.typ_lit
| TArrow of typ Pos.marked UnionFind.elem * typ Pos.marked UnionFind.elem
@ -42,6 +46,7 @@ let rec format_typ (fmt : Format.formatter) (ty : typ Pos.marked UnionFind.elem)
ts
| TArrow (t1, t2) -> Format.fprintf fmt "%a → %a" format_typ t1 format_typ t2
(** Raises an error if unification cannot be performed *)
let rec unify (t1 : typ Pos.marked UnionFind.elem) (t2 : typ Pos.marked UnionFind.elem) : unit =
(* Cli.debug_print (Format.asprintf "Unifying %a and %a" format_typ t1 format_typ t2); *)
let t1_repr = UnionFind.get (UnionFind.find t1) in
@ -76,6 +81,9 @@ let rec unify (t1 : typ Pos.marked UnionFind.elem) (t2 : typ Pos.marked UnionFin
(Some (Format.asprintf "Type %a coming from expression:" format_typ t2), t2_pos);
]
(** Operators have a single type, instead of being polymorphic with constraints. This allows us to
have a simpler type system, while we argue the syntactic burden of operator annotations helps
the programmer visualize the type flow in the code. *)
let op_type (op : A.operator Pos.marked) : typ Pos.marked UnionFind.elem =
let pos = Pos.get_position op in
let bt = UnionFind.make (TLit TBool, pos) in
@ -134,8 +142,11 @@ let rec typ_to_ast (ty : typ Pos.marked UnionFind.elem) : A.typ Pos.marked =
| TAny -> A.TLit A.TUnit)
(UnionFind.get (UnionFind.find ty))
(** {1 Double-directed typing} *)
type env = typ Pos.marked A.VarMap.t
(** Infers the most permissive type from an expression *)
let rec typecheck_expr_bottom_up (env : env) (e : A.expr Pos.marked) : typ Pos.marked UnionFind.elem
=
match Pos.unmark e with
@ -244,6 +255,7 @@ let rec typecheck_expr_bottom_up (env : env) (e : A.expr Pos.marked) : typ Pos.m
typecheck_expr_top_down env e' (UnionFind.make (Pos.same_pos_as (TLit TBool) e'));
UnionFind.make (Pos.same_pos_as (TLit TUnit) e')
(** Checks whether the expression can be typed with the provided type *)
and typecheck_expr_top_down (env : env) (e : A.expr Pos.marked)
(tau : typ Pos.marked UnionFind.elem) : unit =
match Pos.unmark e with
@ -372,9 +384,13 @@ and typecheck_expr_top_down (env : env) (e : A.expr Pos.marked)
typecheck_expr_top_down env e' (UnionFind.make (Pos.same_pos_as (TLit TBool) e'));
unify tau (UnionFind.make (Pos.same_pos_as (TLit TUnit) e'))
(** {1 API} *)
(* Infer the type of an expression *)
let infer_type (e : A.expr Pos.marked) : A.typ Pos.marked =
let ty = typecheck_expr_bottom_up A.VarMap.empty e in
typ_to_ast ty
(** Typechecks an expression given an expected type *)
let check_type (e : A.expr Pos.marked) (tau : A.typ Pos.marked) =
typecheck_expr_top_down A.VarMap.empty e (UnionFind.make (Pos.map_under_mark ast_to_typ tau))

View File

@ -61,6 +61,13 @@ More documentation can be found on each intermediate representations here.
{li {{: dcalc.html} The default calculus}}
}
The main compilation chain is defined in:
{!modules: Catala.Driver}
Last, two more modules contain additional features for the compiler:
{!modules: Utils Literate }
{ul
{li {{: literate.html} Literate programming}}
{li {{: utils.html} Compiler utilities}}
}

View File

@ -2,3 +2,7 @@
(name literate)
(public_name catala.literate)
(libraries re utils surface))
(documentation
(package catala)
(mld_files literate))

View File

@ -23,17 +23,24 @@ module P = Printf
module R = Re.Pcre
module C = Cli
(** {1 Helpers} *)
(** Converts double lines into HTML newlines. *)
let pre_html (s : string) =
let s = String.trim s in
let doublenewline = R.regexp "\n\n" in
let s = R.substitute ~rex:doublenewline ~subst:(fun _ -> "<br/>\n") s in
s
(** Raise an error if pygments cannot be found *)
let raise_failed_pygments (command : string) (error_code : int) : 'a =
Errors.raise_error
(Printf.sprintf "Weaving to HTML failed: pygmentize command \"%s\" returned with error code %d"
command error_code)
(** Usage: [wrap_html source_files custom_pygments language fmt wrapped]
Prints an HTML complete page structure around the [wrapped] content. *)
let wrap_html (source_files : string list) (custom_pygments : string option)
(language : Cli.backend_lang) (fmt : Format.formatter) (wrapped : Format.formatter -> unit) :
unit =
@ -91,6 +98,7 @@ let wrap_html (source_files : string list) (custom_pygments : string option)
source_files));
wrapped fmt
(** Performs syntax highlighting on a piece of code by using Pygments and the special Catala lexer. *)
let pygmentize_code (c : string Pos.marked) (language : C.backend_lang)
(custom_pygments : string option) : string =
C.debug_print (Printf.sprintf "Pygmenting the code chunk %s" (Pos.to_string (Pos.get_position c)));
@ -125,7 +133,7 @@ let pygmentize_code (c : string Pos.marked) (language : C.backend_lang)
close_in oc;
output
type program_state = InsideArticle | OutsideArticle
(** {1 Weaving} *)
let law_article_item_to_html (custom_pygments : string option) (language : C.backend_lang)
(fmt : Format.formatter) (i : A.law_article_item) : unit =
@ -189,6 +197,8 @@ let program_item_to_html (custom_pygments : string option) (language : C.backend
(fmt : Format.formatter) (i : A.program_item) : unit =
match i with A.LawStructure s -> law_structure_to_html custom_pygments language fmt s
(** {1 API} *)
let ast_to_html (custom_pygments : string option) (language : C.backend_lang)
(fmt : Format.formatter) (program : A.program) : unit =
Format.pp_print_list

View File

@ -22,6 +22,9 @@ module A = Surface.Ast
module R = Re.Pcre
module C = Cli
(** {1 Helpers} *)
(** Espaces various LaTeX-sensitive characters *)
let pre_latexify (s : string) =
let percent = R.regexp "%" in
let s = R.substitute ~rex:percent ~subst:(fun _ -> "\\%") s in
@ -33,6 +36,9 @@ let pre_latexify (s : string) =
let s = R.substitute ~rex:underscore ~subst:(fun _ -> "\\_") s in
s
(** Usage: [wrap_latex source_files custom_pygments language fmt wrapped]
Prints an LaTeX complete documùent structure around the [wrapped] content. *)
let wrap_latex (source_files : string list) (custom_pygments : string option)
(language : C.backend_lang) (fmt : Format.formatter) (wrapped : Format.formatter -> unit) =
Format.fprintf fmt
@ -107,6 +113,7 @@ let wrap_latex (source_files : string list) (custom_pygments : string option)
wrapped fmt;
Format.fprintf fmt "\n\n\\end{document}"
(** Replaces math operators by their nice unicode counterparts *)
let math_syms_replace (c : string) : string =
let date = "\\d\\d/\\d\\d/\\d\\d\\d\\d" in
let syms = R.regexp (date ^ "|!=|<=|>=|--|->|\\*|/") in
@ -122,6 +129,8 @@ let math_syms_replace (c : string) : string =
in
R.substitute ~rex:syms ~subst:syms2cmd c
(** {1 Weaving} *)
let law_article_item_to_latex (language : C.backend_lang) (fmt : Format.formatter)
(i : A.law_article_item) : unit =
match i with
@ -187,6 +196,8 @@ let program_item_to_latex (language : C.backend_lang) (fmt : Format.formatter) (
: unit =
match i with A.LawStructure law_s -> law_structure_to_latex language fmt law_s
(** {1 API} *)
let ast_to_latex (language : C.backend_lang) (fmt : Format.formatter) (program : A.program) : unit =
Format.pp_print_list
~pp_sep:(fun fmt () -> Format.fprintf fmt "\n\n")

View File

@ -0,0 +1,9 @@
{0 Literate programming}
Related modules:
{!modules: Literate.Html Literate.Latex}
These module take the {{:surface.html} surface representation} of the Catala
program and process it into different literate programming outputs, like
an HTML page or a LaTeX document.

View File

@ -81,6 +81,8 @@ type typ =
| TEnum of EnumName.t
| TArrow of typ Pos.marked * typ Pos.marked
(** The expressions use the {{:https://lepigre.fr/ocaml-bindlib/} Bindlib} library, based on
higher-order abstract syntax*)
type expr =
| ELocation of location
| EVar of expr Bindlib.var Pos.marked

View File

@ -1,6 +1,6 @@
{0 The scope language }
This representation is the second in the compilation chain
This representation is the third in the compilation chain
(see {{: index.html#architecture} Architecture}). Its main difference
with the previous {{: desugared.html} desugared representation} is that inside
a scope, the definitions are ordered according to their computational
@ -13,6 +13,10 @@ The module describing the abstract syntax tree is:
Printing helpers can be found in {!module: Scopelang.Print}.
This intermediate representation corresponds to the scope language
presented in the {{: https://github.com/CatalaLang/catala/raw/master/doc/formalization/formalization.pdf}
Catala formalization}.
{1 Translation to the default calculus}
Related modules:

View File

@ -2,3 +2,7 @@
(name utils)
(public_name catala.utils)
(libraries cmdliner dune-build-info ANSITerminal))
(documentation
(package catala)
(mld_files utils))

View File

@ -14,7 +14,12 @@
(** Error formatting and helper functions *)
(** {1 Error exception and printing} *)
exception StructuredError of (string * (string option * Pos.t) list)
(** The payload of the expression is a main error message, with a list of secondary positions
related to the error, each carrying an optional secondary message to describe what is pointed by
the position. *)
let print_structured_error (msg : string) (pos : (string option * Pos.t) list) : string =
Printf.sprintf "%s%s%s" msg
@ -27,6 +32,8 @@ let print_structured_error (msg : string) (pos : (string option * Pos.t) list) :
(Pos.retrieve_loc_text pos))
pos))
(** {1 Error exception and printing} *)
let raise_spanned_error (msg : string) ?(span_msg : string option) (span : Pos.t) : 'a =
raise (StructuredError (msg, [ (span_msg, span) ]))

View File

@ -12,6 +12,9 @@
or implied. See the License for the specific language governing permissions and limitations under
the License. *)
(** Global identifiers factories using a generative functor *)
(** The information carried in global identifiers *)
module type Info = sig
type info
@ -19,7 +22,11 @@ module type Info = sig
end
module MarkedString : Info with type info = string Pos.marked
(** The only kind of information carried in Catala identifiers is the original string of the
identifier annotated with the position where it is declared or used. *)
(** Identifiers have abstract types, but are comparable so they can be used as keys in maps or sets.
Their underlying information can be retrieved at any time. *)
module type Id = sig
type t
@ -36,4 +43,7 @@ module type Id = sig
val hash : t -> int
end
(** This is the generative functor that ensures that two modules resulting from two different calls
to [Make] will be viewed as different types [t] by the OCaml typechecker. Prevents mixing up
different sorts of identifiers. *)
module Make (X : Info) () : Id with type info = X.info

View File

@ -0,0 +1,38 @@
{0 Compiler utilities}
{1 Unique identifiers}
Related modules:
{!modules: Utils.Uid}
In {{: desugared.html} the desugared representation} or in the
{{: scopelang.html} the scope language}, a number of things are named using
global identifiers. These identifiers use OCaml's type system to statically
distinguish e.g. a scope identifier from a struct identifier.
The {!module: Utils.Uid} module provides a generative functor whose output is
a fresh sort of global identifiers.
{1 Source code positions}
Related modules:
{!modules: Utils.Pos}
This module is used throughout the compiler to annotate the abstract syntax
trees with information about the position of the element in the original source
code. These annotations are critical to produce readable error messages.
{1 Error messages}
Related modules:
{!modules: Utils.Errors}
Error handling is critical in a compiler. The Catala compiler uses an architecture
of error messages inspired by the Rust compiler, where error messages all
correspond to the same exception. This exception carries messages and positions
that are displayed in the end in a nicely-formatted error message.
Hence, all error thrown by the compiler should use {!module: Utils.Errors}