\documentclass[11pt,a4paper]{article} \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} \usepackage[english]{babel} \usepackage{fullpage} \usepackage{lmodern} \usepackage{amsmath,amssymb} \usepackage{mathpartir} \usepackage{turnstile} \usepackage{fancyvrb} \usepackage{xcolor} \usepackage{booktabs} \usepackage{csquotes} \usepackage{biblatex} \addbibresource{catala.bib} \newtheorem{theorem}{Theorem} \newtheorem{lemma}{Lemma} \newcommand{\sref}[1]{\S\ref{sec:#1}} %% Syntax \newcommand{\synvar}[1]{\ensuremath{#1}} \newcommand{\synkeyword}[1]{\textcolor{red!60!black}{\texttt{#1}}} \newcommand{\synpunct}[1]{\textcolor{black!40!white}{\texttt{#1}}} \newcommand{\synname}[1]{\ensuremath{\mathsf{#1}}} \newcommand{\synbool}{\synkeyword{bool}} \newcommand{\synnum}{\synkeyword{num}} \newcommand{\syndate}{\synkeyword{date}} \newcommand{\synvec}{\synkeyword{vec~}} \newcommand{\synopt}{\synkeyword{opt~}} \newcommand{\synrule}{\synkeyword{rule~}} \newcommand{\synlet}{\synkeyword{let~}} \newcommand{\synin}{\synkeyword{~in~}} \newcommand{\synif}{\synkeyword{if~}} \newcommand{\synthen}{\synkeyword{~then~}} \newcommand{\synelse}{\synkeyword{~else~}} \newcommand{\syncall}{\synkeyword{call~}} \newcommand{\synscope}{\synkeyword{scope~}} \newcommand{\synequal}{\synpunct{~=~}} \newcommand{\synjust}{~\synpunct{:\raisebox{-0.9pt}{-}}~} \newcommand{\syntyped}{~\synpunct{:}~} \newcommand{\syncomma}{\synpunct{,}} \newcommand{\syndot}{\synpunct{.}~} \newcommand{\synunit}{\synpunct{()}} \newcommand{\synunitt}{\synkeyword{unit}} \newcommand{\syntrue}{\synkeyword{true}} \newcommand{\synfalse}{\synkeyword{false}} \newcommand{\synop}{\synpunct{\odot}} \newcommand{\synlambda}{\synpunct{$\lambda$}~} \newcommand{\synand}{\synpunct{\wedge}} \newcommand{\synor}{\synpunct{\vee}} \newcommand{\synlparen}{\synpunct{(}} \newcommand{\synrparen}{\synpunct{)}} \newcommand{\synlsquare}{\synpunct{[}} \newcommand{\synrsquare}{\synpunct{]}} \newcommand{\synlbracket}{\synpunct{\{}} \newcommand{\synrbracket}{\synpunct{\}}} \newcommand{\synlangle}{\synpunct{$\langle$}} \newcommand{\synrangle}{\synpunct{$\rangle$}} \newcommand{\synmid}{\synpunct{~$|$~}} \newcommand{\synemptydefault}{\synvar{\varnothing}} \newcommand{\synerror}{\synvar{\circledast}} \newcommand{\synstar}{\synpunct{~$*$~}} \newcommand{\synvardef}{\synkeyword{definition~}} \newcommand{\synscopecall}{\synkeyword{scope\_call~}} \newcommand{\synlarrow}{~\synpunct{$\leftarrow$}~} \newcommand{\synarrow}{~\synpunct{$\rightarrow$}~} \newcommand{\synellipsis}{\synpunct{,$\ldots$,}} \newcommand{\synlistellipsis}{\synpunct{;$\ldots$;}} \newcommand{\syndef}{$ ::= $} \newcommand{\synalt}{\;$|$\;} \newcommand{\synhole}{\synvar{\cdot}} \newcommand{\syncrashifempty}{\synkeyword{crash\_if\_empty}} \newcommand{\synnone}{\texttt{None}} \newcommand{\synsome}{\texttt{Some}~} \newcommand{\synmatch}{\synkeyword{match}~} \newcommand{\synwith}{~\synkeyword{with}~} \newcommand{\synoption}{\;\texttt{option}} \newcommand{\synraise}{\synkeyword{raise}\;} \newcommand{\synemptyerror}{\texttt{EmptyError}} \newcommand{\synconflicterror}{\texttt{ConflictError}} \newcommand{\syntry}{\synkeyword{try}\;} \newcommand{\synlist}{\;\texttt{list}} %% Typing \newcommand{\typctx}[1]{\textcolor{orange!90!black}{\ensuremath{#1}}} \newcommand{\typempty}{\typctx{\varnothing}} \newcommand{\typcomma}{\typctx{,\;}} \newcommand{\typvdash}{\typctx{\;\vdash\;}} \newcommand{\typcolon}{\typctx{\;:\;}} \newcommand{\typlpar}{\typctx{(}} \newcommand{\typrpar}{\typctx{)}} %% Evaluation \newcommand{\exctx}[1]{\textcolor{blue!80!black}{\ensuremath{#1}}} \newcommand{\exeemptysubdefaults}{\exctx{\mathsf{empty\_count}}} \newcommand{\execonflictsubdefaults}{\exctx{\mathsf{conflict\_count}}} \newcommand{\Omegaarg}{\Omega_{arg}} \newcommand{\excaller}{\exctx{\complement}} \newcommand{\excomma}{\exctx{,}\;} \newcommand{\exvdash}{\;\exctx{\vdash}\;} \newcommand{\exempty}{\exctx{\varnothing}} \newcommand{\exemptyv}{\exctx{\varnothing_v}} \newcommand{\exemptyarg}{\exctx{\varnothing_{arg}}} \newcommand{\exvarmap}{\exctx{~\mapsto~}} \newcommand{\exscopemap}{\exctx{~\rightarrowtail~}} \newcommand{\exArrow}{\exctx{~\Rrightarrow~}} \newcommand{\exeq}{\exctx{\;=\;}} \newcommand{\exeval}{\exctx{\;\longrightarrow\;}} \newcommand{\exevalstar}{\exctx{\;\longrightarrow^*\;}} \newcommand{\exat}{\exctx{\texttt{\;@\;}}} \newcommand{\exsemicolon}{\exctx{;~}} \newcommand{\excomp}{\dashrightarrow} %% Reduction of the scope language \newcommand{\redctx}[1]{\textcolor{green!50!black}{\ensuremath{#1}}} \newcommand{\reduces}{\redctx{~\rightsquigarrow~}} \newcommand{\redvdash}{\redctx{\;\vdash\;}} \newcommand{\redturnstile}[1]{\;\ensuremath{\redctx{\vdash}_{#1}}\;\;} \newcommand{\redcomma}{\redctx{,\;}} \newcommand{\redsc}{\redctx{;\;}} \newcommand{\redcolon}{\redctx{\;:\;}} \newcommand{\redempty}{\redctx{\varnothing}} \newcommand{\redproduce}{\;\redctx{\Rrightarrow}\;} \newcommand{\redellipsis}{\redctx{,\ldots,~}} \newcommand{\redlparen}{\redctx{(}} \newcommand{\redrparen}{\redctx{)}} \newcommand{\redequal}{\redctx{~=~}} \newcommand{\redinit}{\redctx{\mathsf{init\_subvars}}} %% Reduction of the defaults \newcommand{\compctx}[1]{\textcolor{yellow!70!black}{\ensuremath{#1}}} \newcommand{\compkeyword}[1]{\textcolor{yellow!60!black}{\texttt{#1}}} \newcommand{\compiles}{\ensuremath{~\compctx{\rightrightarrows}~}} \newcommand{\compnormal}{\compkeyword{normal}} \newcommand{\compdefault}{\compkeyword{default}} \newcommand{\compcons}{\compkeyword{cons}} \newcommand{\compvdash}{\compctx{\;\vdash\;}} \newcommand{\compok}{\;\;\compkeyword{ok}} \title{Formalization of the Catala language} \date{November 2020} \author{Denis Merigoux, Nicolas Chataing} \begin{document} \maketitle \tableofcontents \section{Introduction} Tax law defines how taxes should be computed, depending on various characteristic of a fiscal household. Government agencies around the world use computer programs to compute the law, which are derived from the local tax law. Translating tax law into an unambiguous computer program is tricky because the law is subject to interpretations and ambiguities. The goal of the Catala domain-specific language is to provide a way to clearly express the interpretation chosen for the computer program, and display it close to the law it is supposed to model. To complete this goal, our language needs some kind of \emph{locality} property that enables cutting the computer program in bits that match the way the legislative text is structured. This subject has been extensively studied by Lawsky \cite{lawsky2017, lawsky2018, lawsky2020form}, whose work has greatly inspired our approach. The structure exhibited by Lawsky follows a kind of non-monotonic logic called default logic \cite{Reiter1987}. Indeed, unlike traditional programming, when the law defines a value for a variable, it does so in a \emph{base case} that applies only if no \emph{exceptions} apply. To determine the value of a variable, one needs to first consider all the exceptions that could modify the base case. It is this precise behavior which we intend to capture when defining the semantics of Catala. \section{Default calculus} We choose to present the core of Catala as a lambda-calculus augmented by a special \enquote{default} expression. This special expression enables dealing with the logical structure underlying tax law. Our lambda-calculus has only unit and boolean values, but this base could be enriched with more complex values and traditional lambda-calculus extensions (such as algebraic data types or $\Lambda$-polymorphism). \subsection{Syntax} \label{sec:defaultcalc:syntax} \begin{center} \begin{tabular}{lrrll} Type&\synvar{\tau}&\syndef&\synbool\synalt\synunitt&boolean and unit types\\ &&\synalt&\synvar{\tau}\synarrow\synvar{\tau}&function type \\ &&&&\\ Expression&\synvar{e}&\syndef&\synvar{x}\synalt\syntrue\synalt\synfalse\synalt\synunit&variable, literal\\ &&\synalt&\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e}\synalt\synvar{e}\;\synvar{e}&$\lambda$-calculus\\ &&\synalt&\synvar{d}&default term\\ &&&&\\ Default&\synvar{d}&\syndef&\synlangle $[\synvar{e}^*] \synmid\synvar{e}\synjust\synvar{e}$\synrangle&default term\\ &&\synalt&\synerror&conflict error term\\ &&\synalt&\synemptydefault&empty error term\\ \end{tabular} \end{center} Compared to the regular lambda calculus, we add a construction coming from default logic. Particularly, we focus on a subset of default logic called categorical, prioritized default logic \cite{Brewka2000}. In this setting, a default is a logical rule of the form $A \synjust B$ where $A$ is the justification of the rule and $B$ is the consequence. The rule can only be applied if $A$ is consistent with the current knowledge $W$: from $A\wedge W$, one should not derive $\bot$. If multiple rules $A \synjust B_1$ and $A \synjust B_2$ can be applied at the same time, then only one of them is applied through an explicit ordering of the rules. To incorporate this form of logic inside our programming language, we set $A$ to be an expression that can be evaluated to \syntrue{} or \synfalse{}, and $B$ the expression that the default should reduce to if $A$ is true. If $A$ is false, then we look up for other rules of lesser priority to apply. This priority is encoded trough a syntactic tree data structure\footnote{Thanks to Pierre-Évariste Dagand for this insight.}. A node of the tree contains a base case to consider, but first a list of higher-priority exceptions that don't have a particular ordering between them. This structure is sufficient to model the base case/exceptions structure or the law, and in particular the fact that exceptions are not always prioritized in the legislative text. In the term \synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid\synvar{e_{\text{just}}}\synjust \synvar{e_{\text{cons}}} \synrangle, \synvar{e_{\text{just}}} is the justification $A$, \synvar{e_{\text{cons}}} is the consequence $B$ and \synvar{e_1}\synellipsis\synvar{e_n} are the list of exceptions to be considered first. Of course, this evaluation scheme can fail if no more rules can be applied, or if two or more exceptions of the same priority have their justification evaluate to \syntrue{}. The error terms \synerror{} and \synemptydefault{} encode these failure cases. Note that if a Catala program correctly derived from a legislative source evaluates to \synerror{} or \synemptydefault{}, this could mean a flaw in the law itself. \synemptydefault{} means that the law did not specify what happens in a given situation, while \synerror{} means that two or more rules specified in the law conflict with each other on a given situation. \subsection{Typing} \label{sec:defaultcalc:typing} Our typing strategy is an extension of the simply-typed lambda calculus. The typing judgment \fbox{$\typctx{\Gamma}\typvdash\synvar{e}\typcolon\synvar{\tau}$} reads as \enquote{under context $\typctx{\Gamma}$, expression $\synvar{e}$ has type $\synvar{\tau}$}. \begin{center} \begin{tabular}{lrrll} Typing context&\typctx{\Gamma}&\syndef&\typempty&empty context\\ (unordered map)&&\synalt&\typctx{\Gamma}\typcomma\synvar{x}\typcolon\synvar{\tau}&typed variable\\ \end{tabular} \end{center} We start by the usual rules of simply-typed lambda calculus. \begin{mathpar} \inferrule[T-UnitLit]{}{ \typctx{\Gamma}\typvdash\synunit\syntyped\synunitt } \inferrule[T-TrueLit]{}{ \typctx{\Gamma}\typvdash\syntrue\syntyped\synbool } \inferrule[T-FalseLit]{}{ \typctx{\Gamma}\typvdash\synfalse\syntyped\synbool } \inferrule[T-Var]{}{ \typctx{\Gamma}\typcomma\synvar{x}\typcolon\synvar{\tau}\typvdash\synvar{x}\syntyped\synvar{\tau} } \inferrule[T-Abs] {\typctx{\Gamma}\typcomma\synvar{x}\typcolon\synvar{\tau}\typvdash\synvar{e}\typcolon\synvar{\tau'}} {\typctx{\Gamma}\typvdash\synlambda\synlparen\synvar{x}\syntyped{\tau}\synrparen\syndot\synvar{e}\typcolon\synvar{\tau}\synarrow\synvar{\tau'}} \inferrule[T-App] { \typctx{\Gamma}\typvdash\synvar{e_1}\typcolon\synvar{\tau_2}\synarrow\synvar{\tau_1}\\ \typctx{\Gamma}\typvdash\synvar{e_2}\typcolon\synvar{\tau_2} } {\typctx{\Gamma}\typvdash\synvar{e_1}\;\synvar{e_2}\typcolon\synvar{\tau_1}} \end{mathpar} Then we move to the special default terms. First, the error terms that stand for any type. \begin{mathpar} \inferrule[ConflictError]{}{\typctx{\Gamma}\typvdash\synerror\typcolon\synvar{\tau}} \inferrule[EmptyError]{}{\typctx{\Gamma}\typvdash\synemptydefault\typcolon\synvar{\tau}} \end{mathpar} Now the interesting part for the default terms. As mentioned earlier, the justification \synvar{e_{\text{just}}} is a boolean, while \synvar{e_{\text{cons}}} can evaluate to any value. \TirName{DefaultBase} specifies how the tree structure of the default should be typed. \begin{mathpar} \inferrule[T-Default] { \typctx{\Gamma}\typvdash\synvar{e_1}\typcolon{\tau}\\ \cdots\\ \typctx{\Gamma}\typvdash\synvar{e_n}\typcolon{\tau}\\ \typctx{\Gamma}\typvdash\synvar{e_{\text{just}}}\typcolon\synbool\\ \typctx{\Gamma}\typvdash\synvar{e_{\text{cons}}}\typcolon\synvar{\tau} } {\typctx{\Gamma}\typvdash\synlangle \synvar{e_1}\synellipsis\synvar{e_n}\synmid \synvar{e_{\text{just}}}\synjust\synvar{e_{\text{cons}}}\synrangle\typcolon\synvar{\tau}} \end{mathpar} The situation becomes more complex in the presence of functions. Indeed, want our default expressions to depend on parameters. By only allowing \synvar{e_{\text{just}}} to be \synbool{}, we force the user to declare the parameters in a \synlambda that wraps the default from the outside. Using this scheme, all the expressions inside the tree structure of the default will depend on the same bound variable \synvar{x}. \subsection{Evaluation} We give this default calculus small-step, structured operational semantics. The one-step reduction judgment is of the form \fbox{\synvar{e}\exeval\synvar{e'}}. In our simple language, values are just booleans, functions or error terms. We use a evaluation contexts to efficiently describe the evaluation order. Evaluation contexts are expression with a hole indicating the sub-term currently being reduced. \begin{center} \begin{tabular}{lrrll} Values&\synvar{v}&\syndef&\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e}&functions\\ &&\synalt&\syntrue\synalt\synfalse & booleans\\ &&\synalt&\synerror\synalt\synemptydefault&errors\\ Evaluation &\synvar{C_\lambda}&\syndef&\synhole\;\synvar{e}\synalt\synvar{v}\;\synhole&function application evaluation\\ contexts&&\synalt&\synlangle$[\synvar{v}^*]$\synmid\synhole\synjust\synvar{e}\synrangle&default justification evaluation\\ &&\synalt&\synlangle$[\synvar{v}^*]$\synmid\syntrue\synjust\synhole \synrangle&default consequence evaluation\\ &\synvar{C}&\syndef&\synvar{C_\lambda}®ular contexts\\ &&\synalt&\synlangle$[\synvar{v}^*]$\syncomma\synhole\syncomma$[\synvar{e}^*]$\synmid \synvar{e}\synjust\synvar{e}\synrangle&default exceptions evaluation\\ \end{tabular} \end{center} We choose a call-by-value reduction strategy. First, we present the usual reduction rules for beta-reduction and evaluation inside a context hole. Note that \TirName{D-Context} does not deal with error terms, which will have a special treatment for error propagation later. \begin{mathpar} \inferrule[D-Context] {\synvar{e}\exeval\synvar{e'}\\ e'\notin\{\synerror,\synemptydefault\}} {\synvar{C}[\synvar{e}]\exeval\synvar{C}[\synvar{e'}]} \inferrule[D-$\beta$]{}{ (\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot{e})\;\synvar{v} \exeval\synvar{e}[\synvar{x}\mapsto\synvar{v}] } \end{mathpar} Now we have to describe how the default terms reduce. First, we consider the list of exceptions to the default, \synvar{e_1}\synellipsis\synvar{e_n}, that should be all evaluated (left to right), according to the sub-default evaluation context. Then, we consider all the values yielded by the exception evaluation and define two functions over these values. Let $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n})$ returns the number of empty error terms \synemptydefault{} among the exception values. We then case analyze on this count: \begin{itemize} \item if $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) =n$, then none of the exceptions apply and we evaluate the base case; \item if $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) =n - 1$, then only only one of the exceptions apply and we return its corresponding value; \item if $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) < n - 1$, then two or more exceptions apply and we raise a conflict error \synerror. \end{itemize} \begin{mathpar} \inferrule[D-DefaultFalseNoExceptions] {} {\synlangle \synemptydefault{}\synellipsis\synemptydefault{}\synmid\synfalse\synjust \synvar{e} \synrangle\exeval \synemptydefault{}} \inferrule[D-DefaultTrueNoExceptions] {} {\synlangle \synemptydefault{}\synellipsis\synemptydefault{}\synmid\syntrue\synjust \synvar{v}\synrangle\exeval v} \inferrule[D-DefaultOneException] {} {\synlangle \synemptydefault\synellipsis\synemptydefault\syncomma\synvar{v}\syncomma\synemptydefault\synellipsis\synemptydefault \synmid \synvar{e_1}\synjust \synvar{e_2} \synrangle\exeval \synvar{v}} \inferrule[D-DefaultExceptionsConflict] {\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) rule b = < true :- a + 1 > scope Y: rule X_1[a] = < true :- 42 > call X_1 rule c = < X_1[b] != 43 :- false | X_1[b] == 43 :- true > \end{Verbatim} \caption{Illustrative program written in the scope language\label{fig:simplescopeprogram}} \end{figure} Considered alone, the execution \Verb+X+ is simple: \Verb+a+ and \Verb+b+ are defined by a single default whose justification is \Verb+true+. Hence, \Verb+a+ should evaluate to \Verb+0+ and \Verb+b+ should evaluate to \Verb+1+. Now, consider scope \Verb+Y+. It defines a single variable \Verb+c+ with two defaults line 8, but the justifications for these two defaults use the result of the evaluation (line 7) of variable \Verb+b+ of the sub-scope \Verb+X_1+. Line 6 shows an example of providing an \enquote{argument} to the subscope call. The execution goes like this: at line 7 when calling the sub-scope, \Verb+X_1[a]+ has two defaults, one coming from line 2, the other calling from line 6. Because the caller has priority over the callee, the default from line 6 wins and \Verb+X_1[a]+ evaluates to \Verb+42+. Consequently, \Verb+X_1[b]+ evaluates to \Verb+43+. This triggers the second default in the list of line 8: the exception evaluates first, but does not apply. Then, the base case applies, and evaluates \Verb+c+ to \Verb+true+. The goal is to provide an encoding of the scope language into the lambda calculus that is compatible with this intuitive description of how scopes should evaluate. To get a high-level picture of the translation, we first show what the previous simple program will translate to, using ML-like syntax for the target default calculus in Fig.~\ref{fig:simpledefaultprogram}. \begin{figure} \begin{Verbatim}[frame=lines,label=Simple default program, numbers=left, framesep=10pt, samepage=true] let X (a: unit -> int) (b: unit -> int) : (int * int) = let a : int = < a () | < true :- 0 >> in let b : int = < b () | < true :- a + 1 >> in (a, b) let Y (c: unit -> bool) : bool = let X_1[a] : unit -> int = fun () -> < true :- 42 > in let X_1[b] : unit -> int = fun () -> EmptyError in let (X_1[a], X_1[b]) : int * int = X(X_1[a], X_1[b]) in let c : bool = < c () | < X_1[b] != 43 :- false | X_1[b] == 43 :- true >> in c \end{Verbatim} \caption{Default calculus program resulting from the compilation of Fig.~\ref{fig:simplescopeprogram} \label{fig:simpledefaultprogram}} \end{figure} We start unravelling this translation with the scope \Verb+X+. \Verb+X+ has been turned into a function whose arguments are all the local variables of the scope. However, the arguments have type \Verb+unit -> +. Indeed, we want the arguments of \Verb+X+ (line 1) to be the default expression supplied by the caller of \Verb+X+, which are considered as exceptions to the base expression defining the local variables of \Verb+X+ (lines 2 and 3). After the merging of scope-local and scope-arguments defaults, we apply \Verb+()+ to the thunk to force evaluation and get back the value. Finally, \Verb+X+ returns the tuple of all its local variables (line 4). The translation of \Verb+Y+ exhibits the pattern for sub-scope calls. Lines 7 translates the assignment of the sub-scope argument \Verb+X_1[a]+. Before calling \Verb+X_1+ (line 8), the other argument \Verb+X_1[b]+ is initialized to the neutral \synemptydefault{} that will be ignored at execution because \Verb+X+ provides more defaults for \Verb+b+. The sub-scope call is translated to a regular function call (line 9). The results of the call are then used in the two defaults for \Verb+c+ (line 10), which have been turned into a default tree taking into account the possible input for \Verb+c+. \subsection{Formalization of the translation} \label{sec:scope:formalization} The main judgment of reduction from scope language to default calculus is \fbox{$\synvar{P}\redvdash\synvar{\sigma}\reduces\synvar{e}\redproduce\redctx{\Delta_\mathrm{own}}$}, which reduces a scope declaration to a function in the default calculus, while providing the list of its own variables. \begin{center} \begin{tabular}{lrrll} Translation context&\redctx{\Delta}&\syndef&\redempty&empty context\\ (unordered map)&&\synalt&\redctx{\Delta_\mathrm{own}}\redcomma\redctx{\Delta_\mathrm{sub}}&own and sub-scopes contexts\\ &\redctx{\Delta_\mathrm{own}}&\syndef&\redempty\synalt\redctx{\Delta_\mathrm{own}}\redcomma\synvar{a}\redcolon\synvar{\tau}&typed scope variable\\ &\redctx{\Delta_\mathrm{sub}}&\syndef&\redempty\synalt\redctx{\Delta_\mathrm{sub}}\redcomma $\synvar{S}_\synvar{n}$\synlsquare\synvar{a}\synrsquare\redcolon\synvar{\tau}&typed sub-scope variable\\ \end{tabular} \end{center} The translation context \redctx{\Delta} is similar to the typing context \typctx{\Gamma} of the default calculus, but it only takes into account the new scope-related location. At any point, \redctx{\Delta} will contain the scope locations defined (and usable in expressions) so far. \redctx{\Delta} is divided in \redctx{\Delta_\mathrm{own}} and \redctx{\Delta_\mathrm{sub}}, which contain respectively the scope's own variables and the variables of its sub-scopes. We will describe the translation from top to bottom, in order to keep the big picture in mind. We will assume the default calculus has been expanded with the usual ML \synlet \synin construction, as well as tuples. Here is the top-level rule for translating scopes. \begin{mathpar} \inferrule[T-Scope]{ \synvar{P}\redsc\redempty\redturnstile{\synvar{S}} \synvar{r_1}\synellipsis\synvar{r_n} \reduces \synvar{e} \redproduce \synvar{a_1}\redcolon\synvar{\tau_1}\redellipsis\synvar{a_m}\redcolon\synvar{\tau_m}\redcomma \redctx{\Delta_\mathrm{sub}} }{ \synvar{P}\redvdash\synscope\synvar{S}\syntyped\synvar{r_1}\synellipsis\synvar{r_n}\reduces\\ \synlet\synvar{S}\;\synlparen\synvar{a_1}\syntyped\synunitt\synarrow\synvar{\tau_1}\synrparen\;\cdots\; \synlparen\synvar{a_m}\syntyped\synunitt\synarrow\synvar{\tau_m}\synrparen\syntyped \synlparen\synvar{\tau_1}\synstar\cdots\synstar\synvar{\tau_m}\synrparen\synequal \synvar{e}[\synhole\mapsto\synlparen\synvar{a_1}\synellipsis\synvar{a_m}\synrparen]\redproduce\\ \synvar{a_1}\redcolon\synvar{\tau_1}\redellipsis\synvar{a_m}\redcolon\synvar{\tau_m} } \end{mathpar} This rule has a lot to unpack, but it is just the formal description of the translation scheme described earlier. To translate scope declaration \synvar{S} with associated rules \synvar{r_1}\synellipsis\synvar{r_n}, we use a helper judgment \fbox{\synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}} \synvar{r_1}\synellipsis\synvar{r_n} \reduces \synvar{e} \redproduce \redctx{\Delta'}} which reads as \enquote{% given a program \synvar{P} and a translation context \redctx{\Delta}, the rules \synvar{r_1}\synellipsis\synvar{r_n} belonging to scope \synvar{S} translate to the expression \synvar{e}, producing a new typing context \redctx{\Delta'}}. In this \TirName{T-Scope} rule, we isolate in the resulting \redctx{\Delta'} all the scope variables \synvar{a_1},\ldots,\synvar{a_m} from the sub-scope variables. Indeed, those variable will be the arguments and the return values of the function corresponding to the scope \synvar{S}. The expression \synvar{e} that stands for rules \synvar{r_1}\synellipsis\synvar{r_n} is a series of \synlet bindings, the last one finishing by a hole (\synhole). We use this hole as a placeholder to be filled with the return value of the function, which is the tuple \synlparen\synvar{a_1}\synellipsis\synvar{a_n}\synrparen. Note that in accordance to the translation scheme and the need for a delayed evaluation of defaults, the arguments of \synvar{S} have a thunked type. \begin{mathpar} \inferrule[T-Rules]{ \synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}} \synvar{r_1} \reduces \synvar{e_1} \redproduce\redctx{\Delta'}\\ \synvar{P}\redsc\redctx{\Delta'}\redturnstile{\synvar{S}} \synvar{r_2}\synellipsis\synvar{r_n} \reduces \synvar{e_2} \redproduce\redctx{\Delta''} }{ \synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}} \synvar{r_1}\synellipsis\synvar{r_n} \reduces \synvar{e_1}[\synhole\mapsto \synvar{e_2}] \redproduce\redctx{\Delta''} } \end{mathpar} The translation of the sequence of rules consists of chaining the different \synlet \synin expressions together with the same hole (\synhole{}) substitution as the previous rule. Now, we can define the translation for individual rules, starting with the definitions of scope variables. \begin{mathpar} \inferrule[T-DefScopeVar]{ \synvar{a}\notin\redctx{\Delta}\\ \redctx{\Delta}\typvdash \synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid \synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle\typcolon\synvar{\tau} }{ \synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}} \synrule\synvar{a}\syntyped\synvar{\tau}\synequal \synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid \synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle \reduces \\ \synlet a\syntyped\synvar{\tau}\synequal \synlangle a\;\synunit\synmid \synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid \synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle \synrangle\synin\synhole \redproduce\synvar{a}\redcolon\synvar{\tau}\redcomma\redctx{\Delta} } \end{mathpar} The premise of \TirName{T-DefScopeVar}, $\synvar{a}\notin\redctx{\Delta}$, indicates that our scope language allows each scope variable to be defined only once, with one default tree. This single default tree can incorporate multiple prioritized definitions of the same variable scattered around various legislative articles, but we assume in our scope language that these scattered definitions have been already collected. Therefore, the ordering of rules is very important in our scope language, because it should be compatible with the dependency graph of the scope locations. As the underlying default calculus is decidable and does not allow fixpoint definitions, the dependency graph of the scope locations should not be cyclic and therefore the topological ordering of its nodes should correspond to the order of the rules inside the scope declaration. This dependency ordering is enforced by the premise \redctx{\Delta}\typvdash\synlangle\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synmid\synvar{e_1}\synellipsis\synvar{e_n}\synrangle\typcolon\synvar{\tau}, which seeds the typing judgment of \sref{defaultcalc:typing} with \redctx{\Delta} (the scope locations defined so far). Since scope variables are also arguments of the scope, \TirName{T-DefScopeVar} redefines \synvar{a} by merging the new default tree with the default expression \synvar{a} of type \synunitt\synarrow\synvar{\tau} passed as an argument to \synvar{S}. This merging is done by defining the incoming argument as an exception to the scope-local expression. This translation scheme ensures that the caller always has priority over the callee. The evaluation of the incoming arguments is forced by applying \synunit, yielding a value of type \synvar{\tau} for \synvar{a}. ow that we have presented the translation scheme for rules defining scope variables, we can switch to the translation of sub-scope variables definitions and calls. We will start by the rules that define sub-scope variables, prior to calling the associated sub-scope. \begin{mathpar} \inferrule[T-DefSubScopeVar]{ S\neq S'\\ \synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\notin\redctx{\Delta}\\ \redctx{\Delta}\typvdash\synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid \synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle \typcolon\synvar{\tau} }{ \synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}} \synrule\synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\syntyped\synvar{\tau}\synequal \synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid \synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle \reduces \\ \synlet \synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\syntyped\synunitt\synarrow\synvar{\tau}\synequal \synlambda \synlparen\synunit\syntyped\synunitt\synrparen\syndot \synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid \synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle \synin\synhole\redproduce\\\synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\redcolon\synunitt\synarrow\synvar{\tau}\redcomma\redctx{\Delta} } \end{mathpar} This rule is very similar to \TirName{T-DefScopeVar}, and actually simpler. The premise $S\neq S'$ means that a scope $S$ cannot have a recursive definition; it cannot call into itself and define sub-scope variables of its own scope. Note that $\synvar{S'}_\synvar{n}$\synlsquare\synvar{a}\synrsquare\redcolon\synunitt\synarrow\synvar{\tau} is added to \redctx{\Delta} in the final part of the judgment; $\synvar{S'}_\synvar{n}$\synlsquare\synvar{a}\synrsquare{} has been defined as a sub-scope argument but not as a value that can be used by the scope yet, its type is \synunitt\synarrow\synvar{\tau} and not \synvar{\tau}. When all the arguments of sub-scope \synvar{S'} have been defined using, \TirName{T-DefSubScopeVar}, the sub-scope itself can be called. \begin{mathpar} \inferrule[T-SubScopeCall]{ S\neq S'\\ P(S') = \sigma'\\ P\redvdash\sigma'\reduces e'\redproduce \synvar{a'_1}\redcolon\synvar{\tau'_1}\redellipsis\synvar{a'_n}\redcolon\synvar{\tau'_n}\redcomma\redctx{\Delta'_\mathrm{sub}}\\ \redinit\redlparen\redctx{\Delta}\redsc \synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare \redellipsis\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\redrparen \redequal\synvar{e_\mathrm{init}} }{ \synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}}\syncall \synvar{S'}_\synvar{n}\reduces \synvar{e_\mathrm{init}}[\synhole\mapsto \synlet\synlparen \synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare\synellipsis \synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\synrparen \syntyped \synlparen \synvar{\tau'_1}\synstar\cdots\synstar\synvar{\tau'_n}\synrparen\synequal\\ \synvar{e'}\;\synlparen\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare\synrparen \cdots\synlparen\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\synrparen \synin\synhole\;]\redproduce \synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare\redcolon\synvar{\tau'_1}\redellipsis \synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\redcolon\synvar{\tau'_n}\redcomma \redctx{\Delta} } \end{mathpar} Again, this rule has a lot to unpack, but is meant as a generalization of the translation scheme illustrated in \sref{scope:example}. Let us start with the premises. As earlier, $S\neq S'$ means that scope declarations cannot be recursive. Next, we fetch the declaration \synvar{\sigma'} of \synvar{S'} inside the program $P$. \synvar{\sigma'} is reduced into the function expression \synvar{e'}, whose arguments correspond to the scope variables of \synvar{S'}: \synvar{a'_1}\synellipsis\synvar{a'_n}. Then, we need to define all the arguments necessary to call \synvar{e'}. Some of these arguments have been defined earlier in the translation, and they were added to \redctx{\Delta}. But some arguments may not have been defined yet, and is its precisely the job of the \redinit{} helper to produce the \synvar{e_\mathrm{init}} expression to define those missing arguments with the \synemptydefault{} value. The conclusion of \TirName{T-SubScopeCall} defines the reduction of \syncall$\synvar{S'}_\synvar{n}$. After \synvar{e_\mathrm{init}}, we translate the sub-scope call to the default calculus call of the corresponding expression \synvar{e'}, which takes as arguments the defaults and returns the corresponding values after evaluation. Finally, the new translation context produced is \redctx{\Delta} augmented with all the variables of sub-scope \synvar{S'}, who are available for use in later definitions of the scope. The last item we need to define in order to complete the translation is \redinit{}. Its definition is quite simple, since it produces an expression defining to \synemptydefault{} all the variables from a list not present in \redctx{\Delta}. \begin{mathpar} \inferrule[T-InitSubVarsInDelta]{ \synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redcolon\synvar{\tau_1} \in\redctx{\Delta}\\ \redinit\redlparen\redctx{\Delta}\redsc \synvar{S'}_\synvar{n}\synlsquare\synvar{a_2}\synrsquare\redellipsis \synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen \redequal\synvar{e} }{ \redinit\redlparen\redctx{\Delta}\redsc \synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redellipsis \synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen \redequal\synvar{e} } \inferrule[T-InitSubVarsNotInDelta]{ \synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redcolon\synvar{\tau_1} \notin\redctx{\Delta}\\ \redinit\redlparen\redctx{\Delta}\redsc \synvar{S'}_\synvar{n}\synlsquare\synvar{a_2}\synrsquare\redellipsis \synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen \redequal\synvar{e'} }{ \redinit\redlparen\redctx{\Delta}\redsc \synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redellipsis \synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen \redequal\\ \synlet\synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare \syntyped\synunitt\synarrow\synvar{\tau_1}\synequal \synlambda\synlparen\synunit\syntyped\synunitt\synrparen\syndot\synemptydefault\synin \synvar{e} } \inferrule[T-InitSubVarsEmpty]{}{ \redinit\redlparen\redctx{\Delta}\redrparen \redequal\synhole } \end{mathpar} \section{From default calculus to lambda calculus} \subsection{Using exceptions} The default calculus is a solid semantic foundation for the Catala language, but it is not a good compilation target since default logic cannot be shallowly embedded easily in mainstream programming languages. Hence, we propose a compilation scheme whose goal is to eliminate default terms and empty error terms (\synemptydefault) from the default calculus, leaving us with a the semantics of a regular lambda calculus. The conflict error term (\synerror) is less problematic since its semantics correspond to an early exit from the program. In order to lower the default term to a lambda calculus term, we need to extend the traditional lambda calculus with several classic extensions: algebraic data types and recursive data types (lists). Indeed, we need an optional accumulator to emulate the exception count that triggers rules like \TirName{D-DefaultExceptionsConflict} and \TirName{D-DefaultOneException}. The empty error term has a complex propagation rule (\TirName{D-ContextEmptyError}) that naturally maps to a catchable exception. Hence, in this translation scheme, assume that the target lambda calculus has support for exceptions. Combining these features, we propose a translation of the default term. This translation relies on a small runtime function \texttt{process\_exceptions}, whose body implements the semantics of the default calculus. \begin{align*} \texttt{process\_exceptions}\quad&\syntyped&& \synlparen\synunitt\synarrow\synvar{\tau}\synrparen\synlist\synarrow\synvar{\tau}\synoption\\ \texttt{process\_exceptions}\quad&\triangleq&& \texttt{fold\_left}\;\synlparen\synlambda\synlparen\synvar{a}\syntyped \synvar{\tau}\synoption\synrparen\; \synlparen\synvar{e'}\syntyped \synunitt\synarrow\synvar{\tau}\synrparen\syndot\\ &&&\quad\synlet\synvar{e'}\syntyped\synvar{\tau}\synoption\synequal\\ &&&\quad\quad\syntry \synsome\synlparen\synvar{e'}\synunit\synrparen \synwith\synemptyerror\synarrow \synnone\\ &&&\quad\!\!\!\!\synin\\ &&&\quad\synmatch\synlparen\synvar{a}\syncomma\;\synvar{e'}\synrparen\synwith\\ &&&\quad\quad\synmid\synlparen\synnone\syncomma\;\synvar{e'}\synrparen\synarrow\synvar{e'}\\ &&&\quad\quad\synmid\synlparen\synsome\synvar{a}\syncomma\;\synnone\synrparen\synarrow \synsome\synvar{a}\\ &&&\quad\quad\synmid\synlparen\synsome\synvar{a}\syncomma\;\synsome \synvar{e'}\synrparen\synarrow \synraise\synconflicterror \synrparen\;\synnone \end{align*} Note that the \synemptyerror{} exception is caught within \texttt{process\_exceptions}; making it the only place in the output code where this exception can be caught. This is consistent with the evaluation context of the default exceptions, which is the only evaluation context that belongs to $C$ but not to $C_\lambda$. We can now proceed to the formal translation rules defining the compilation judgment $\fbox{\synvar{e}\compiles\synvar{e'}}$ where \synvar{e} is an expression of the default calculus and $\synvar{e'}$ is an expression of the target lambda calculus enriched with algebraic data types and exceptions. \begin{mathpar} \inferrule[C-Default]{ \synvar{e_1}\compiles\synvar{e_1'}\\ \cdots\\ \synvar{e_n}\compiles\synvar{e_n'}\\ \synvar{e_\mathrm{just}}\compiles\synvar{e_\mathrm{just}'}\\ \synvar{e_\mathrm{cons}}\compiles\synvar{e_\mathrm{cons}'}\\ }{ \synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid\synvar{e_\mathrm{just}} \synjust\synvar{e_\mathrm{cons}}\synrangle\compiles\\ \synlet\synvar{r_\mathrm{exceptions}}\synequal \texttt{process\_exceptions}\;\synlsquare\synlambda\synvar{\_}\synarrow\synvar{e_1'} \synlistellipsis\synlambda\synvar{\_}\synarrow\synvar{e_n'}\synrsquare\synin\\\synmatch \synvar{r_\mathrm{exceptions}}\synwith\synsome\synvar{e'}\synarrow \synvar{e'}\synmid\synnone\synarrow \synif\synvar{e_\mathrm{just}'}\synthen\synvar{e_\mathrm{cons}'}\synelse\synraise\synemptyerror } \inferrule[C-EmptyError]{}{ \synemptydefault\compiles\synraise\synemptyerror } \inferrule[C-ConflictError]{}{ \synerror\compiles\synraise\synconflicterror } \inferrule[C-Var]{}{\synvar{x}\compiles\synvar{x}} \inferrule[C-Literal]{\synvar{e}\in\{\synunit,\;\syntrue,\;\synfalse\}}{ \synvar{e}\compiles\synvar{e} } \inferrule[C-Abs]{ \synvar{e}\compiles\synvar{e'} }{ \synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e}\compiles \synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e'} } \inferrule[C-App]{ \synvar{e_1}\compiles\synvar{e_1'}\\ \synvar{e_2}\compiles\synvar{e_2'} }{ \synvar{e_1}\;\synvar{e_2}\compiles\synvar{e_1'}\;\synvar{e_2'} } \end{mathpar} We overload the \exeval{} notation as the stepping judgment both in the default calculus and the target lambda calculus. Similarly, we overload the typing judgment \typvdash. We prove this theorem by induction on the default calculus expression \synvar{e} and start by applying an inversion lemma on the judgment \synvar{e}\compiles\synvar{e'}. \paragraph{Theorem (type preservation)} \textit{If \synvar{e}\compiles\synvar{e'} and \typempty\typvdash\synvar{e}\typcolon\synvar{\tau}, then \typempty\typvdash\synvar{e'}\typcolon\synvar{\tau}} The proof can be carried out by induction on \synvar{e} without any trouble. The most difficult part is to check the correct typing of \texttt{process\_exceptions} in the lambda calculus $\blacksquare$ \paragraph{Theorem (translation correctness)} \textit{If \synvar{e}\compiles\synvar{e'} and \synvar{e}\exevalstar\synvar{v}, then either $\synvar{v}=\synemptydefault$, $\synvar{v}=\synerror$ or there exists a lambda calculus value \synvar{v'} such that \synvar{v}\compiles\synvar{v'} and \synvar{e'}\exevalstar\synvar{v'}.} \begin{itemize} \item Rules \TirName{C-EmptyError}, \TirName{C-ConflictError}, \TirName{C-Var} and \TirName{C-Literal} yield an immediate conclusion. \item For rule \TirName{C-Abs} with $\synvar{e}=\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e_1}$, we apply the induction hypothesis on \synvar{e_1} and conclude since functions are values ($\synvar{v'}=\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e_1'}$). \item For rule \TirName{C-App} with $\synvar{e} = \synvar{e_1}\;\synvar{e_2}$, we apply the induction hypothesis on \synvar{e_1} and \synvar{e_2}. If either \synvar{e_1} or \synvar{e_2} evaluate to \synemptydefault, we can apply \TirName{D-ContextEmptyError} and conclude. Similarly, if either \synvar{e_1} or \synvar{e_2} evaluate to \synerror, we can apply \TirName{D-ContextConflictError} and conclude. Let us now suppose that we are not in one of those cases, and that \synvar{e_1}\exevalstar\synvar{v_1} and \synvar{e_2}\exevalstar\synvar{v_2}. \synvar{v_1} and \synvar{v_2} are not error terms, so we can apply \TirName{C-Literal} or \TirName{C-Abs} to get \synvar{v_1'} and \synvar{v_2'} such that \synvar{v_1}\compiles\synvar{v_1'} and \synvar{v_2}\compiles\synvar{v_2'}. By the induction hypothesis applied on \synvar{e_1} and \synvar{e_2}, we know that \synvar{e_1'}\exevalstar\synvar{v_1'} and \synvar{e_2'}\exevalstar\synvar{v_2'}. To recap, we have $\synvar{e_1}\;\synvar{e_2}\exevalstar\synvar{v_1}\;\synvar{v_2}$, $\synvar{e_1'}\;\synvar{e_2'}\exevalstar\synvar{v_1'}\;\synvar{v_2'}$ and by \TirName{C-App}, $\synvar{v_1}\;\synvar{v_2}\compiles\synvar{v_1'}\;\synvar{v_2'}$. We also know that $\synvar{v_1}\;\synvar{v_2}\exevalstar\synvar{v}$ beginning by a $\beta$-reduction. We can then apply the induction hypothesis a third time on $\synvar{v_1}\;\synvar{v_2}$ to get the \synvar{v'} on which we conclude. \item Let us consider now \TirName{C-Default} with $e=\synlangle\synvar{e_1}\synellipsis \synvar{e_n}\synmid\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle$. If any of the sub-terms evaluates to \synerror{}, then $\synvar{e}$ evaluates to $\synerror$ (\TirName{D-ContextConflictError}) and we conclude. Now, we case analyse on the number of exceptions that don't evaluate to \synemptydefault{}. \begin{itemize} \item If more than one exception does not evaluate to \synemptydefault{}, then \synvar{e} evaluates to \synerror{} by \TirName{D-DefaultExceptionsConflict} and we conclude. \item If one and only one exception $\synvar{e_i}$ does not evaluates to \synemptydefault{}, then we apply the induction hypothesis and get a couple $(\synvar{v_i},\synvar{v_i'})$ such that $\synvar{e_i}\exevalstar\synvar{v_i}$, $\synvar{e_i'}\exevalstar\synvar{v_i'}$ and $\synvar{v_i}\compiles\synvar{v_i}$. Then, we claim that the \synvar{r_\mathrm{exceptions}} result of \texttt{process\_exceptions} will evaluate to \synsome\synvar{v_i'} in the lambda calculus translation of \synvar{e}. Indeed, along the $\texttt{fold\_left}$ the accumulator \synvar{a} will remain \synnone{} as it encounters all the \synvar{e_j'} that raise the \synemptyerror{} caught inside the fold function. The accumulator will then encounter \synvar{e_i'}, that yields \synvar{v_i'} and pick up its value. By following the rest of the translated code, \synvar{e'} will evaluate to \synvar{v_i'} and we can conclude. \item If all exceptions evaluate to \synemptydefault{}, then we claim that the \synvar{r_\mathrm{exceptions}} result will evaluate to \synnone{}. Indeed, the accumulator will stay at its original \synnone{} value during the whole fold process of the exceptions. We now look at the evaluation of \synvar{e_\mathrm{just}}. If it evaluates to \synemptydefault{} or \synerror{}, then the whole expression evaluates to the error term (\TirName{D-ContextConflictError} and \TirName{D-ContextEmptyError}) and we conclude. By type safety of the default calculus, \synvar{e_\mathrm{just}} evaluates to either $\synvar{v_\mathrm{just}} =\syntrue{}$ or \synfalse{}. By type preservation of the translation \synvar{e_\mathrm{just}'} evaluates to either $\synvar{v_\mathrm{just'}} =\syntrue{}$ or \synfalse{}. By application of the induction hypothesis on \synvar{e_\mathrm{just}'}, we know that \synvar{v_\mathrm{just}}\compiles\synvar{v_\mathrm{just}'} and by inversion on the \compiles{} judgment, we conclude $\synvar{v_\mathrm{just}}=\synvar{v_\mathrm{just}'}$. From there, if $\synvar{v_\mathrm{just'}} =\syntrue{}$, then we conclude by applying the induction hypothesis on \synvar{e_\mathrm{cons}}. If $\synvar{v_\mathrm{just'}} =\synfalse{}$, then the default evaluates to \synemptydefault{} and we conclude $\blacksquare$ \end{itemize} \end{itemize} % \subsection{An alternative compilation scheme} % While perfectly correct, this maximalist translation scheme would entail % a lot of redundant branching in the generated code, which would hinder the % resulting program's performance. Because Catala aims at providing % production-ready high-performance code that can scale to computations % concerning millions of household, we need to be more clever to reduce the % number of generated branches. % Let's look back at the code of Fig.~\ref{fig:simpledefaultprogram}. One way % we can avoid having to propagate errors all the time in the program is to % contain empty error propagation inside each scope variable. Concretely, % that means enforcing a crashing error each time a scope variable evaluates % to an empty error term. This gives us a new default calculus program, % Fig~\ref{fig:noerrordefaultprogram}, which differs % from Fig.~\ref{fig:simpledefaultprogram} by the addition of \syncrashifempty{} % calls to wrap up each scope variable definition. % \begin{figure} % \begin{Verbatim}[frame=lines,label=Simple default program without error propagation, numbers=left, framesep=10pt, samepage=true] % let X (a: unit -> int) (b: unit -> int) : (int * int) = % let a : int = crash_if_empty < a () | < true :- 0 >> in % let b : int = crash_if_empty < b () | < true :- a + 1 >> in % (a, b) % let Y (c: unit -> bool) : bool = % let X_1[a] : unit -> int = fun () -> < true :- 42 > in % let X_1[b] : unit -> int = fun () -> EmptyError in % let (X_1[a], X_1[b]) : int * int = X(X_1[a], X_1[b]) in % let c : bool = crash_if_empty % < c () | < X_1[b] != 43 :- false | X_1[b] == 43 :- true >> % in % c % \end{Verbatim} % \caption{Alternative to Fig.~\ref{fig:simpledefaultprogram} with error containment % \label{fig:noerrordefaultprogram}} % \end{figure} % We can model \syncrashifempty{} as a special operator of the default % calculus governed by the following rules: % \begin{mathpar} % \inferrule[T-CrashIfEmpty]{ % \typctx{\Gamma}\typvdash\synvar{e}\typcolon\synvar{\tau} % }{ % \typctx{\Gamma}\typvdash\syncrashifempty\;\synvar{e}\typcolon\synvar{\tau} % } % \inferrule[D-CrashIfEmptyError]{}{ % \syncrashifempty\;\synemptydefault\exeval\synerror % } % \inferrule[D-CrashIfEmptyOK]{ % \synvar{e}\neq \synemptydefault % }{ % \syncrashifempty\;\synvar{e}\exeval \synvar{e} % } % \inferrule[C-CrashIfEmpty]{ % \synvar{e}\compiles\synvar{e'} % }{ % \syncrashifempty\;\synvar{e}\compiles\synmatch\synvar{e'}\synwith % \synnone\;\synarrow\synerror\synmid\synsome\synvar{e'}\synarrow % \synvar{e'} % } % \end{mathpar} % The addition of \syncrashifempty{} to the default calculus allow us to prevent % \synemptydefault{} to leak beyond this special operator call. Hence, we can % apply our \synvar{\tau} to \synvar{\tau} \texttt{option} transformation scheme % locally rather than globally on all terms of the program. More specifically, % we can assume that after it has been defined, a scope variable has type % \synvar{\tau} rather than \synvar{\tau} \texttt{option} in our translated program. % \begin{figure}[htb] % \begin{Verbatim}[frame=lines,label=Simple lambda calculus program, numbers=left, framesep=10pt, samepage=true] % let crash_if_empty x = match x with Some x -> x | None -> raise Error % let X (a: unit -> int option) (b: unit -> int option) : (int * int) = % let a : int = crash_if_empty (match a () with % | Some x -> Some x % | None -> if true then Some 0 else None)) % in % let b : int = crash_if_empty (match b () with % | Some x -> Some x % | None -> if true then Some (a + 1) else None) % in % (a, b) % let Y (c: unit -> bool option) : bool = % let X_1[a] : unit -> int = fun () -> if true then Some 42 else None in % let X_1[b] : unit -> int = fun () -> None in % let (X_1[a], X_1[b]) : int * int = X(X_1[a], X_1[b]) in % let c : bool = crash_if_empty (match c () with % | Some x -> Some x % | None -> (match (if X_1[b] != 43 then Some false else None) with % | Some x -> Some x % | None -> if X_1[b] == 43 then true else None)) % in % c % \end{Verbatim} % \caption{Translation of Fig.~\ref{fig:noerrordefaultprogram} to lambda calculus % \label{fig:lambdaprogram}} % \end{figure} % Fig.~\ref{fig:lambdaprogram} shows the result of the compilation of the code in % Fig.~\ref{fig:noerrordefaultprogram} according to our locally-restricted % compilation scheme. The \texttt{process\_exceptions} functions has been % partially evaluated and specialized to the example for readability. % This compilation mode helps minimizing the number of branching required % at execution time, but comes a the price of being very specialized to the % exact shape of the programs that we wish to compile. Indeed, we had to % know which function parameter types to change from \synvar{\tau} to % \synvar{\tau} \texttt{option}: here, only the thunked arguments of the % scopes. We claim that the correctness of the local application of our compilation % scheme can be validated by a mere typechecking pass over the resulting % lambda-calculus program. % Last, we want to discuss further the insertion of \syncrashifempty{} calls. % Inserting these calls effectively changes the semantics of the default calculus % program. By preventing the \synemptydefault{} error to propagate and be later % caught by an exception of a default, we change the behavior of the program. % Hence, it is the program of Fig.~\ref{fig:noerrordefaultprogram} that % should be taken as a reference, and not the program of % Fig.~\ref{fig:simpledefaultprogram}. % We chose to insert the \syncrashifempty{} calls before each scope variable % definition because it corresponds to the following high-level behavior: % each scope variable should be defined at execution time by one rule coming % from the source Catala program. Indeed, if no rules from the source % Catala program were to apply for a particular scope variable, then this % scope variable would evaluate to \synemptydefault{} in the default calculus. % Contrary to tax rules DSL like \cite{merigoux:hal-02936606} that have a special % \texttt{undefined} value (similar to \synemptydefault{} or the null pointer), % we wanted Catala not to reproduce the billion-dollar mistake and force the % programmer to rely on user-defined option types to deal with missing data % situations. Alternatively, the Catala programmer can also define an additional % base case rule in the source program defining a user-chosen default value % for a particular scope-variable. \printbibliography \end{document}