catala/doc/formalization/formalization.tex
2021-02-07 22:38:04 +01:00

1172 lines
56 KiB
TeX

\documentclass[11pt,a4paper]{article}
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage[english]{babel}
\usepackage{fullpage}
\usepackage{lmodern}
\usepackage{amsmath,amssymb}
\usepackage{mathpartir}
\usepackage{turnstile}
\usepackage{fancyvrb}
\usepackage{xcolor}
\usepackage{booktabs}
\usepackage{csquotes}
\usepackage{biblatex}
\addbibresource{catala.bib}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newcommand{\sref}[1]{\S\ref{sec:#1}}
%% Syntax
\newcommand{\synvar}[1]{\ensuremath{#1}}
\newcommand{\synkeyword}[1]{\textcolor{red!60!black}{\texttt{#1}}}
\newcommand{\synpunct}[1]{\textcolor{black!40!white}{\texttt{#1}}}
\newcommand{\synname}[1]{\ensuremath{\mathsf{#1}}}
\newcommand{\synbool}{\synkeyword{bool}}
\newcommand{\synnum}{\synkeyword{num}}
\newcommand{\syndate}{\synkeyword{date}}
\newcommand{\synvec}{\synkeyword{vec~}}
\newcommand{\synopt}{\synkeyword{opt~}}
\newcommand{\synrule}{\synkeyword{rule~}}
\newcommand{\synlet}{\synkeyword{let~}}
\newcommand{\synin}{\synkeyword{~in~}}
\newcommand{\synif}{\synkeyword{if~}}
\newcommand{\synthen}{\synkeyword{~then~}}
\newcommand{\synelse}{\synkeyword{~else~}}
\newcommand{\syncall}{\synkeyword{call~}}
\newcommand{\synscope}{\synkeyword{scope~}}
\newcommand{\synequal}{\synpunct{~=~}}
\newcommand{\synjust}{~\synpunct{:\raisebox{-0.9pt}{-}}~}
\newcommand{\syntyped}{~\synpunct{:}~}
\newcommand{\syncomma}{\synpunct{,}}
\newcommand{\syndot}{\synpunct{.}~}
\newcommand{\synunit}{\synpunct{()}}
\newcommand{\synunitt}{\synkeyword{unit}}
\newcommand{\syntrue}{\synkeyword{true}}
\newcommand{\synfalse}{\synkeyword{false}}
\newcommand{\synop}{\synpunct{\odot}}
\newcommand{\synlambda}{\synpunct{$\lambda$}~}
\newcommand{\synand}{\synpunct{\wedge}}
\newcommand{\synor}{\synpunct{\vee}}
\newcommand{\synlparen}{\synpunct{(}}
\newcommand{\synrparen}{\synpunct{)}}
\newcommand{\synlsquare}{\synpunct{[}}
\newcommand{\synrsquare}{\synpunct{]}}
\newcommand{\synlbracket}{\synpunct{\{}}
\newcommand{\synrbracket}{\synpunct{\}}}
\newcommand{\synlangle}{\synpunct{$\langle$}}
\newcommand{\synrangle}{\synpunct{$\rangle$}}
\newcommand{\synmid}{\synpunct{~$|$~}}
\newcommand{\synemptydefault}{\synvar{\varnothing}}
\newcommand{\synerror}{\synvar{\circledast}}
\newcommand{\synstar}{\synpunct{~$*$~}}
\newcommand{\synvardef}{\synkeyword{definition~}}
\newcommand{\synscopecall}{\synkeyword{scope\_call~}}
\newcommand{\synlarrow}{~\synpunct{$\leftarrow$}~}
\newcommand{\synarrow}{~\synpunct{$\rightarrow$}~}
\newcommand{\synellipsis}{\synpunct{,$\ldots$,}}
\newcommand{\synlistellipsis}{\synpunct{;$\ldots$;}}
\newcommand{\syndef}{$ ::= $}
\newcommand{\synalt}{\;$|$\;}
\newcommand{\synhole}{\synvar{\cdot}}
\newcommand{\syncrashifempty}{\synkeyword{crash\_if\_empty}}
\newcommand{\synnone}{\texttt{None}}
\newcommand{\synsome}{\texttt{Some}~}
\newcommand{\synmatch}{\synkeyword{match}~}
\newcommand{\synwith}{~\synkeyword{with}~}
\newcommand{\synoption}{\;\texttt{option}}
\newcommand{\synraise}{\synkeyword{raise}\;}
\newcommand{\synemptyerror}{\texttt{EmptyError}}
\newcommand{\synconflicterror}{\texttt{ConflictError}}
\newcommand{\syntry}{\synkeyword{try}\;}
\newcommand{\synlist}{\;\texttt{list}}
%% Typing
\newcommand{\typctx}[1]{\textcolor{orange!90!black}{\ensuremath{#1}}}
\newcommand{\typempty}{\typctx{\varnothing}}
\newcommand{\typcomma}{\typctx{,\;}}
\newcommand{\typvdash}{\typctx{\;\vdash\;}}
\newcommand{\typcolon}{\typctx{\;:\;}}
\newcommand{\typlpar}{\typctx{(}}
\newcommand{\typrpar}{\typctx{)}}
%% Evaluation
\newcommand{\exctx}[1]{\textcolor{blue!80!black}{\ensuremath{#1}}}
\newcommand{\exeemptysubdefaults}{\exctx{\mathsf{empty\_count}}}
\newcommand{\execonflictsubdefaults}{\exctx{\mathsf{conflict\_count}}}
\newcommand{\Omegaarg}{\Omega_{arg}}
\newcommand{\excaller}{\exctx{\complement}}
\newcommand{\excomma}{\exctx{,}\;}
\newcommand{\exvdash}{\;\exctx{\vdash}\;}
\newcommand{\exempty}{\exctx{\varnothing}}
\newcommand{\exemptyv}{\exctx{\varnothing_v}}
\newcommand{\exemptyarg}{\exctx{\varnothing_{arg}}}
\newcommand{\exvarmap}{\exctx{~\mapsto~}}
\newcommand{\exscopemap}{\exctx{~\rightarrowtail~}}
\newcommand{\exArrow}{\exctx{~\Rrightarrow~}}
\newcommand{\exeq}{\exctx{\;=\;}}
\newcommand{\exeval}{\exctx{\;\longrightarrow\;}}
\newcommand{\exevalstar}{\exctx{\;\longrightarrow^*\;}}
\newcommand{\exat}{\exctx{\texttt{\;@\;}}}
\newcommand{\exsemicolon}{\exctx{;~}}
\newcommand{\excomp}{\dashrightarrow}
%% Reduction of the scope language
\newcommand{\redctx}[1]{\textcolor{green!50!black}{\ensuremath{#1}}}
\newcommand{\reduces}{\redctx{~\rightsquigarrow~}}
\newcommand{\redvdash}{\redctx{\;\vdash\;}}
\newcommand{\redturnstile}[1]{\;\ensuremath{\redctx{\vdash}_{#1}}\;\;}
\newcommand{\redcomma}{\redctx{,\;}}
\newcommand{\redsc}{\redctx{;\;}}
\newcommand{\redcolon}{\redctx{\;:\;}}
\newcommand{\redempty}{\redctx{\varnothing}}
\newcommand{\redproduce}{\;\redctx{\Rrightarrow}\;}
\newcommand{\redellipsis}{\redctx{,\ldots,~}}
\newcommand{\redlparen}{\redctx{(}}
\newcommand{\redrparen}{\redctx{)}}
\newcommand{\redequal}{\redctx{~=~}}
\newcommand{\redinit}{\redctx{\mathsf{init\_subvars}}}
%% Reduction of the defaults
\newcommand{\compctx}[1]{\textcolor{yellow!70!black}{\ensuremath{#1}}}
\newcommand{\compkeyword}[1]{\textcolor{yellow!60!black}{\texttt{#1}}}
\newcommand{\compiles}{\ensuremath{~\compctx{\rightrightarrows}~}}
\newcommand{\compnormal}{\compkeyword{normal}}
\newcommand{\compdefault}{\compkeyword{default}}
\newcommand{\compcons}{\compkeyword{cons}}
\newcommand{\compvdash}{\compctx{\;\vdash\;}}
\newcommand{\compok}{\;\;\compkeyword{ok}}
\title{Formalization of the Catala language}
\date{November 2020}
\author{Denis Merigoux, Nicolas Chataing}
\begin{document}
\maketitle
\tableofcontents
\section{Introduction}
Tax law defines how taxes should be computed, depending on various characteristic
of a fiscal household. Government agencies around the world use computer
programs to compute the law, which are derived from the local tax law. Translating
tax law into an unambiguous computer program is tricky because the law is subject to
interpretations and ambiguities. The goal of the Catala domain-specific language
is to provide a way to clearly express the interpretation chosen for the
computer program, and display it close to the law it is supposed to model.
To complete this goal, our language needs some kind of \emph{locality} property
that enables cutting the computer program in bits that match the way the
legislative text is structured. This subject has been extensively studied by
Lawsky \cite{lawsky2017, lawsky2018, lawsky2020form}, whose work has greatly
inspired our approach.
The structure exhibited by Lawsky follows a kind of non-monotonic logic called
default logic \cite{Reiter1987}. Indeed, unlike traditional programming, when the law defines
a value for a variable, it does so in a \emph{base case} that applies only if
no \emph{exceptions} apply. To determine the value of a variable, one needs to
first consider all the exceptions that could modify the base case.
It is this precise behavior which we intend to capture when defining the semantics
of Catala.
\section{Default calculus}
We choose to present the core of Catala as a lambda-calculus augmented by a special
\enquote{default} expression. This special expression enables dealing with
the logical structure underlying tax law. Our lambda-calculus has only unit and
boolean values, but this base could be enriched with more complex values and traditional
lambda-calculus extensions (such as algebraic data types or $\Lambda$-polymorphism).
\subsection{Syntax}
\label{sec:defaultcalc:syntax}
\begin{center}
\begin{tabular}{lrrll}
Type&\synvar{\tau}&\syndef&\synbool\synalt\synunitt&boolean and unit types\\
&&\synalt&\synvar{\tau}\synarrow\synvar{\tau}&function type \\
&&&&\\
Expression&\synvar{e}&\syndef&\synvar{x}\synalt\syntrue\synalt\synfalse\synalt\synunit&variable, literal\\
&&\synalt&\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e}\synalt\synvar{e}\;\synvar{e}&$\lambda$-calculus\\
&&\synalt&\synvar{d}&default term\\
&&&&\\
Default&\synvar{d}&\syndef&\synlangle $[\synvar{e}^*] \synmid\synvar{e}\synjust\synvar{e}$\synrangle&default term\\
&&\synalt&\synerror&conflict error term\\
&&\synalt&\synemptydefault&empty error term\\
\end{tabular}
\end{center}
Compared to the regular lambda calculus, we add a construction coming from
default logic. Particularly, we focus on a subset of default logic called
categorical, prioritized default logic \cite{Brewka2000}.
In this setting, a default is a logical
rule of the form $A \synjust B$ where $A$ is the justification of the rule and
$B$ is the consequence. The rule can only be applied if $A$ is consistent with
the current knowledge $W$: from $A\wedge W$, one should not derive $\bot$.
If multiple rules $A \synjust B_1$ and $A \synjust B_2$
can be applied at the same time, then only one of them is applied through
an explicit ordering of the rules.
To incorporate this form of logic inside our programming language, we set $A$ to
be an expression that can be evaluated to \syntrue{} or \synfalse{}, and $B$
the expression that the default should reduce to if $A$ is true. If $A$ is false,
then we look up for other rules of lesser priority to apply. This priority
is encoded trough a syntactic tree data structure\footnote{Thanks to Pierre-Évariste Dagand for this insight.}.
A node of the tree contains a base case to consider, but first a list of higher-priority
exceptions that don't have a particular ordering between them. This structure is
sufficient to model the base case/exceptions structure or the law, and in particular
the fact that exceptions are not always prioritized in the legislative text.
In the term \synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid\synvar{e_{\text{just}}}\synjust
\synvar{e_{\text{cons}}} \synrangle, \synvar{e_{\text{just}}}
is the justification $A$, \synvar{e_{\text{cons}}} is the consequence $B$ and
\synvar{e_1}\synellipsis\synvar{e_n} are the list of exceptions to be considered first.
Of course, this evaluation scheme can fail if no more
rules can be applied, or if two or more exceptions of the same priority have their
justification evaluate to \syntrue{}. The error terms \synerror{} and \synemptydefault{}
encode these failure cases. Note that if a Catala program correctly derived from a legislative
source evaluates to \synerror{} or \synemptydefault{}, this could mean a flaw in the
law itself. \synemptydefault{} means that the law did not specify what happens
in a given situation, while \synerror{} means that two or more rules specified in
the law conflict with each other on a given situation.
\subsection{Typing}
\label{sec:defaultcalc:typing}
Our typing strategy is an extension of the simply-typed lambda calculus.
The typing judgment \fbox{$\typctx{\Gamma}\typvdash\synvar{e}\typcolon\synvar{\tau}$} reads as
\enquote{under context $\typctx{\Gamma}$, expression $\synvar{e}$ has type $\synvar{\tau}$}.
\begin{center}
\begin{tabular}{lrrll}
Typing context&\typctx{\Gamma}&\syndef&\typempty&empty context\\
(unordered map)&&\synalt&\typctx{\Gamma}\typcomma\synvar{x}\typcolon\synvar{\tau}&typed variable\\
\end{tabular}
\end{center}
We start by the usual rules of simply-typed lambda calculus.
\begin{mathpar}
\inferrule[T-UnitLit]{}{
\typctx{\Gamma}\typvdash\synunit\syntyped\synunitt
}
\inferrule[T-TrueLit]{}{
\typctx{\Gamma}\typvdash\syntrue\syntyped\synbool
}
\inferrule[T-FalseLit]{}{
\typctx{\Gamma}\typvdash\synfalse\syntyped\synbool
}
\inferrule[T-Var]{}{
\typctx{\Gamma}\typcomma\synvar{x}\typcolon\synvar{\tau}\typvdash\synvar{x}\syntyped\synvar{\tau}
}
\inferrule[T-Abs]
{\typctx{\Gamma}\typcomma\synvar{x}\typcolon\synvar{\tau}\typvdash\synvar{e}\typcolon\synvar{\tau'}}
{\typctx{\Gamma}\typvdash\synlambda\synlparen\synvar{x}\syntyped{\tau}\synrparen\syndot\synvar{e}\typcolon\synvar{\tau}\synarrow\synvar{\tau'}}
\inferrule[T-App]
{
\typctx{\Gamma}\typvdash\synvar{e_1}\typcolon\synvar{\tau_2}\synarrow\synvar{\tau_1}\\
\typctx{\Gamma}\typvdash\synvar{e_2}\typcolon\synvar{\tau_2}
}
{\typctx{\Gamma}\typvdash\synvar{e_1}\;\synvar{e_2}\typcolon\synvar{\tau_1}}
\end{mathpar}
Then we move to the special default terms. First, the error terms that stand for
any type.
\begin{mathpar}
\inferrule[ConflictError]{}{\typctx{\Gamma}\typvdash\synerror\typcolon\synvar{\tau}}
\inferrule[EmptyError]{}{\typctx{\Gamma}\typvdash\synemptydefault\typcolon\synvar{\tau}}
\end{mathpar}
Now the interesting part for the default terms. As mentioned earlier, the
justification \synvar{e_{\text{just}}} is a boolean, while \synvar{e_{\text{cons}}}
can evaluate to any value. \TirName{DefaultBase} specifies how the tree structure
of the default should be typed.
\begin{mathpar}
\inferrule[T-Default]
{
\typctx{\Gamma}\typvdash\synvar{e_1}\typcolon{\tau}\\
\cdots\\
\typctx{\Gamma}\typvdash\synvar{e_n}\typcolon{\tau}\\
\typctx{\Gamma}\typvdash\synvar{e_{\text{just}}}\typcolon\synbool\\
\typctx{\Gamma}\typvdash\synvar{e_{\text{cons}}}\typcolon\synvar{\tau}
}
{\typctx{\Gamma}\typvdash\synlangle
\synvar{e_1}\synellipsis\synvar{e_n}\synmid
\synvar{e_{\text{just}}}\synjust\synvar{e_{\text{cons}}}\synrangle\typcolon\synvar{\tau}}
\end{mathpar}
The situation becomes more complex in the presence of functions. Indeed, want
our default expressions to depend on parameters. By only allowing \synvar{e_{\text{just}}}
to be \synbool{}, we force the user to declare the parameters in a \synlambda
that wraps the default from the outside. Using this scheme, all the expressions
inside the tree structure of the default will depend on the same bound variable
\synvar{x}.
\subsection{Evaluation}
We give this default calculus small-step, structured operational semantics. The
one-step reduction judgment is of the form \fbox{\synvar{e}\exeval\synvar{e'}}.
In our simple language, values are just booleans, functions or error terms.
We use a evaluation contexts to efficiently describe the evaluation order.
Evaluation contexts are expression with a hole indicating the sub-term
currently being reduced.
\begin{center}
\begin{tabular}{lrrll}
Values&\synvar{v}&\syndef&\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e}&functions\\
&&\synalt&\syntrue\synalt\synfalse & booleans\\
&&\synalt&\synerror\synalt\synemptydefault&errors\\
Evaluation &\synvar{C_\lambda}&\syndef&\synhole\;\synvar{e}\synalt\synvar{v}\;\synhole&function application evaluation\\
contexts&&\synalt&\synlangle$[\synvar{v}^*]$\synmid\synhole\synjust\synvar{e}\synrangle&default justification evaluation\\
&&\synalt&\synlangle$[\synvar{v}^*]$\synmid\syntrue\synjust\synhole \synrangle&default consequence evaluation\\
&\synvar{C}&\syndef&\synvar{C_\lambda}&regular contexts\\
&&\synalt&\synlangle$[\synvar{v}^*]$\syncomma\synhole\syncomma$[\synvar{e}^*]$\synmid
\synvar{e}\synjust\synvar{e}\synrangle&default exceptions evaluation\\
\end{tabular}
\end{center}
We choose a call-by-value reduction strategy.
First, we present the usual reduction rules for beta-reduction
and evaluation inside a context hole. Note that \TirName{D-Context} does not
deal with error terms, which will have a special treatment for error propagation
later.
\begin{mathpar}
\inferrule[D-Context]
{\synvar{e}\exeval\synvar{e'}\\ e'\notin\{\synerror,\synemptydefault\}}
{\synvar{C}[\synvar{e}]\exeval\synvar{C}[\synvar{e'}]}
\inferrule[D-$\beta$]{}{
(\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot{e})\;\synvar{v}
\exeval\synvar{e}[\synvar{x}\mapsto\synvar{v}]
}
\end{mathpar}
Now we have to describe how the default terms reduce. First, we consider
the list of exceptions to the default,
\synvar{e_1}\synellipsis\synvar{e_n}, that should be all evaluated (left to right),
according to the sub-default evaluation context. Then, we consider all the
values yielded by the exception evaluation and define two functions over these
values. Let $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n})$ returns
the number of empty error terms \synemptydefault{} among the exception values.
We then case analyze on this count:
\begin{itemize}
\item if $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) =n$, then
none of the exceptions apply and we evaluate the base case;
\item if $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) =n - 1$,
then only only one of the exceptions apply and we return its corresponding value;
\item if $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) < n - 1$,
then two or more exceptions apply and we raise a conflict error \synerror.
\end{itemize}
\begin{mathpar}
\inferrule[D-DefaultFalseNoExceptions]
{}
{\synlangle \synemptydefault{}\synellipsis\synemptydefault{}\synmid\synfalse\synjust \synvar{e} \synrangle\exeval \synemptydefault{}}
\inferrule[D-DefaultTrueNoExceptions]
{}
{\synlangle \synemptydefault{}\synellipsis\synemptydefault{}\synmid\syntrue\synjust \synvar{v}\synrangle\exeval v}
\inferrule[D-DefaultOneException]
{}
{\synlangle \synemptydefault\synellipsis\synemptydefault\syncomma\synvar{v}\syncomma\synemptydefault\synellipsis\synemptydefault
\synmid \synvar{e_1}\synjust \synvar{e_2}
\synrangle\exeval \synvar{v}}
\inferrule[D-DefaultExceptionsConflict]
{\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) <n - 1}
{\synlangle \synvar{v_1}\synellipsis\synvar{v_n}\synmid
\synvar{e_1}\synjust \synvar{e_2}\synrangle\exeval \synerror{}}
\end{mathpar}
When none of the exceptions apply, we can suppose that the justification of the default
is already reduced to a variable \synvar{v}, which should be a boolean by virtue of
typing. If \synvar{v} is
\syntrue{}, then this rule applies and we reduce to the consequence. If it is
\synfalse{}, then the base case does not apply either and we throw an empty
default error.
Last, we need to define how our error terms propagate. Because the rules for
sub-default evaluation have to count the number of error terms in the list
of sub-defaults, we cannot always immediately propagate the error term \synemptydefault{} in
all the evaluation contexts as it usually done. Rather, we rely on the
distinction between the $\lambda$-calculus evaluation contexts $\synvar{C_\lambda}$
and the sub-default evaluation context. Hence the following rules for error
propagation:
\begin{mathpar}
\inferrule[D-ContextEmptyError]
{\synvar{e}\exeval\synemptydefault}
{\synvar{C_\lambda}[\synvar{e}]\exeval\synemptydefault}
\inferrule[D-ContextConflictError]
{\synvar{e}\exeval\synerror}
{\synvar{C}[\synvar{e}]\exeval\synerror}
\end{mathpar}
\section{Scope language}
Our core default calculus provides a value language adapted to the drafting style
of tax law. Each article of the law will provide one or more rules encoded as
defaults. But how to collect those defaults into a single expression that
will compute the result that we want? How to reuse existing rules in different
contexts?
These question point out the lack of an abstraction structure adapted to
the legislative drafting style. Indeed, our \synlambda functions are not
convenient to compose together the rules scattered around the legislative text.
Moreover, the abstractions defined in the legislative text exhibit a behavior
quite different from \synlambda functions.
First, the blurred limits between abstraction units.
In the legislative text, objects and data are referred in a free variable style.
It is up to us to put the necessary bindings for these free variables, but
it is not trivial to do so. For that, one need to define the perimeter of
each abstraction unit, a legislative \emph{scope}, which might encompass multiple
articles.
Second, the confusion between local variables and function parameters. The
base-case vs. exception structure of the law also extends between legislative
scopes. For instance, a scope $A$ can define a variable $x$ to have value $a$, but
another legislative scope $B$ can \emph{call into} $A$ but specifying that
$x$ should be $b$. In this setting, $B$ defines an exception for $x$, that
should be dealt with using our default calculus.
Based on these two characteristic, we propose a high-level \emph{scope language},
semantically defined by its encoding in the default calculus.
\subsection{Syntax}
A scope $S$ is a legislative abstraction unit that can encompass multiple
articles. $S$ is comprised of multiple rules that define a scope variable $a$
to a certain expression under a condition that characterize the base case or
the exception.
$S$ can also call into another scope $S'$, as a function can call
into another. These calls are scattered in the legislative texts and have
to be identified by the programmer. Since $S$ can call $S'$ multiple times
with different \enquote{parameters}, we have to distinguish between these
sub-call and give them different names $\synvar{S'}_1$,
$\synvar{S'}_2$, etc. A program $P$ is
a list of scope declarations $\sigma$.
\begin{center}
\begin{tabular}{lrrll}
Scope name&\synvar{S}&&&\\
Scope call identifier&\synvar{n}&&&\\
Location&\synvar{\ell}&\syndef&\synvar{a}&scope variable\\
&&\synalt&$\synvar{S}_\synvar{n}$\synlsquare\synvar{a}\synrsquare&sub-scope call variable\\
Expression&\synvar{e}&\syndef&\synvar{\ell}&location\\
&&\synalt&$\cdots$&default calculus expressions\\
&&&&\\
Rule&\synvar{r}&\syndef&\synrule\synvar{\ell}\syntyped\synvar{\tau}\synequal\synlangle
$[\synvar{e}^*]$\synmid\synvar{e}\synjust
\synvar{e}\synrangle
&Location definition\\
&&\synalt&\syncall$\synvar{S}_\synvar{n}$&sub-scope call\\
Scope declaration&\synvar{\sigma}&\syndef&\synscope\synvar{S}\syntyped $[\synvar{r}^*]$&\\
Program&\synvar{P}&\syndef&$[\sigma^*]$&\\
\end{tabular}
\end{center}
\subsection{Running example}
\label{sec:scope:example}
Let's illustrate how the scope language plays out with a simple program
that calls a sub-scope, with Fig.~\ref{fig:simplescopeprogram}.
\begin{figure}
\begin{Verbatim}[frame=lines,label=Simple scope program, numbers=left, framesep=10pt, samepage=true]
scope X:
rule a = < true :- 0 >
rule b = < true :- a + 1 >
scope Y:
rule X_1[a] = < true :- 42 >
call X_1
rule c = < X_1[b] != 43 :- false | X_1[b] == 43 :- true >
\end{Verbatim}
\caption{Illustrative program written in the scope language\label{fig:simplescopeprogram}}
\end{figure}
Considered alone, the execution \Verb+X+ is simple: \Verb+a+ and \Verb+b+ are defined by
a single default whose justification is \Verb+true+. Hence, \Verb+a+ should evaluate
to \Verb+0+ and \Verb+b+ should evaluate to \Verb+1+.
Now, consider scope \Verb+Y+. It defines a single variable \Verb+c+ with two defaults
line 8, but the justifications for these two defaults use the result of
the evaluation (line 7) of variable \Verb+b+ of the sub-scope \Verb+X_1+.
Line 6 shows an example of providing an \enquote{argument} to the subscope call.
The execution goes like this: at line 7 when calling the sub-scope,
\Verb+X_1[a]+ has two defaults, one coming from line 2, the other calling
from line 6. Because the caller has priority over the callee, the default from line
6 wins and \Verb+X_1[a]+ evaluates to \Verb+42+. Consequently,
\Verb+X_1[b]+ evaluates to \Verb+43+.
This triggers the second default in the list of line 8: the exception
evaluates first, but does not apply. Then, the base case applies,
and evaluates \Verb+c+ to \Verb+true+.
The goal is to provide an encoding of the scope language
into the lambda calculus that is compatible with this intuitive description
of how scopes should evaluate. To get a high-level
picture of the translation, we first show what the previous simple program will translate
to, using ML-like syntax for the target default calculus in Fig.~\ref{fig:simpledefaultprogram}.
\begin{figure}
\begin{Verbatim}[frame=lines,label=Simple default program, numbers=left, framesep=10pt, samepage=true]
let X (a: unit -> int) (b: unit -> int) : (int * int) =
let a : int = < a () | < true :- 0 >> in
let b : int = < b () | < true :- a + 1 >> in
(a, b)
let Y (c: unit -> bool) : bool =
let X_1[a] : unit -> int = fun () -> < true :- 42 > in
let X_1[b] : unit -> int = fun () -> EmptyError in
let (X_1[a], X_1[b]) : int * int = X(X_1[a], X_1[b]) in
let c : bool = < c () | < X_1[b] != 43 :- false | X_1[b] == 43 :- true >> in
c
\end{Verbatim}
\caption{Default calculus program resulting from the compilation of Fig.~\ref{fig:simplescopeprogram}
\label{fig:simpledefaultprogram}}
\end{figure}
We start unravelling this translation with the scope \Verb+X+. \Verb+X+ has
been turned into a function whose arguments are all the local variables of the
scope. However, the arguments have type \Verb+unit -> <type>+. Indeed, we want the
arguments of \Verb+X+ (line 1) to be the default expression supplied by the caller of
\Verb+X+, which are considered as exceptions to the base
expression defining the local variables of \Verb+X+ (lines 2 and 3).
After the merging of scope-local and
scope-arguments defaults, we apply
\Verb+()+ to the thunk to force evaluation and get back the value.
Finally, \Verb+X+ returns the tuple of all its local variables (line 4).
The translation of \Verb+Y+ exhibits the pattern for sub-scope calls.
Lines 7 translates the assignment of the sub-scope argument \Verb+X_1[a]+.
Before calling \Verb+X_1+ (line 8), the other argument \Verb+X_1[b]+ is
initialized to the neutral \synemptydefault{} that will be ignored at execution
because \Verb+X+ provides more defaults for \Verb+b+.
The sub-scope call is translated to a regular
function call (line 9). The results of the call are then used in the two defaults
for \Verb+c+ (line 10), which have been turned into a default tree taking into
account the possible input for \Verb+c+.
\subsection{Formalization of the translation}
\label{sec:scope:formalization}
The main judgment of reduction from scope language to default calculus is
\fbox{$\synvar{P}\redvdash\synvar{\sigma}\reduces\synvar{e}\redproduce\redctx{\Delta_\mathrm{own}}$}, which reduces
a scope declaration to a function in the default calculus, while providing the
list of its own variables.
\begin{center}
\begin{tabular}{lrrll}
Translation context&\redctx{\Delta}&\syndef&\redempty&empty context\\
(unordered map)&&\synalt&\redctx{\Delta_\mathrm{own}}\redcomma\redctx{\Delta_\mathrm{sub}}&own and sub-scopes contexts\\
&\redctx{\Delta_\mathrm{own}}&\syndef&\redempty\synalt\redctx{\Delta_\mathrm{own}}\redcomma\synvar{a}\redcolon\synvar{\tau}&typed scope variable\\
&\redctx{\Delta_\mathrm{sub}}&\syndef&\redempty\synalt\redctx{\Delta_\mathrm{sub}}\redcomma
$\synvar{S}_\synvar{n}$\synlsquare\synvar{a}\synrsquare\redcolon\synvar{\tau}&typed sub-scope variable\\
\end{tabular}
\end{center}
The translation context \redctx{\Delta} is similar to the typing context \typctx{\Gamma} of
the default calculus, but it only takes into account the new scope-related location. At any
point, \redctx{\Delta} will contain the scope locations defined (and usable in expressions) so far.
\redctx{\Delta} is divided in \redctx{\Delta_\mathrm{own}} and \redctx{\Delta_\mathrm{sub}},
which contain respectively the scope's own variables and the variables of its
sub-scopes.
We will describe
the translation from top to bottom, in order to keep the big picture in mind.
We will assume the default calculus has been expanded with the usual ML
\synlet \synin construction, as well as tuples. Here is the top-level rule for
translating scopes.
\begin{mathpar}
\inferrule[T-Scope]{
\synvar{P}\redsc\redempty\redturnstile{\synvar{S}} \synvar{r_1}\synellipsis\synvar{r_n}
\reduces \synvar{e} \redproduce
\synvar{a_1}\redcolon\synvar{\tau_1}\redellipsis\synvar{a_m}\redcolon\synvar{\tau_m}\redcomma
\redctx{\Delta_\mathrm{sub}}
}{
\synvar{P}\redvdash\synscope\synvar{S}\syntyped\synvar{r_1}\synellipsis\synvar{r_n}\reduces\\
\synlet\synvar{S}\;\synlparen\synvar{a_1}\syntyped\synunitt\synarrow\synvar{\tau_1}\synrparen\;\cdots\;
\synlparen\synvar{a_m}\syntyped\synunitt\synarrow\synvar{\tau_m}\synrparen\syntyped
\synlparen\synvar{\tau_1}\synstar\cdots\synstar\synvar{\tau_m}\synrparen\synequal
\synvar{e}[\synhole\mapsto\synlparen\synvar{a_1}\synellipsis\synvar{a_m}\synrparen]\redproduce\\
\synvar{a_1}\redcolon\synvar{\tau_1}\redellipsis\synvar{a_m}\redcolon\synvar{\tau_m}
}
\end{mathpar}
This rule has a lot to unpack, but it is just the formal description of the
translation scheme described earlier. To translate scope declaration \synvar{S}
with associated rules \synvar{r_1}\synellipsis\synvar{r_n}, we use a helper
judgment \fbox{\synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}} \synvar{r_1}\synellipsis\synvar{r_n}
\reduces \synvar{e} \redproduce \redctx{\Delta'}} which reads as \enquote{%
given a program \synvar{P} and a translation context \redctx{\Delta},
the rules \synvar{r_1}\synellipsis\synvar{r_n} belonging to scope \synvar{S}
translate to the expression \synvar{e}, producing a new typing context
\redctx{\Delta'}}.
In this \TirName{T-Scope} rule, we isolate in the resulting \redctx{\Delta'} all the
scope variables \synvar{a_1},\ldots,\synvar{a_m} from the sub-scope variables.
Indeed, those variable will be the arguments and the return values of the function
corresponding to the scope \synvar{S}. The expression \synvar{e} that stands
for rules \synvar{r_1}\synellipsis\synvar{r_n} is a series of \synlet bindings,
the last one finishing by a hole (\synhole). We use this hole as a placeholder
to be filled with the return value of the function, which is the tuple
\synlparen\synvar{a_1}\synellipsis\synvar{a_n}\synrparen. Note that in accordance
to the translation scheme and the need for a delayed evaluation of defaults,
the arguments of \synvar{S} have a thunked type.
\begin{mathpar}
\inferrule[T-Rules]{
\synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}} \synvar{r_1}
\reduces \synvar{e_1} \redproduce\redctx{\Delta'}\\
\synvar{P}\redsc\redctx{\Delta'}\redturnstile{\synvar{S}} \synvar{r_2}\synellipsis\synvar{r_n}
\reduces \synvar{e_2} \redproduce\redctx{\Delta''}
}{
\synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}} \synvar{r_1}\synellipsis\synvar{r_n}
\reduces \synvar{e_1}[\synhole\mapsto \synvar{e_2}] \redproduce\redctx{\Delta''}
}
\end{mathpar}
The translation of the sequence of rules consists of chaining the different
\synlet \synin expressions together with the same hole (\synhole{}) substitution as the
previous rule. Now, we can define the translation for individual rules, starting
with the definitions of scope variables.
\begin{mathpar}
\inferrule[T-DefScopeVar]{
\synvar{a}\notin\redctx{\Delta}\\
\redctx{\Delta}\typvdash
\synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid
\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle\typcolon\synvar{\tau}
}{
\synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}}
\synrule\synvar{a}\syntyped\synvar{\tau}\synequal
\synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid
\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle
\reduces \\
\synlet a\syntyped\synvar{\tau}\synequal
\synlangle a\;\synunit\synmid
\synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid
\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle
\synrangle\synin\synhole
\redproduce\synvar{a}\redcolon\synvar{\tau}\redcomma\redctx{\Delta}
}
\end{mathpar}
The premise of \TirName{T-DefScopeVar}, $\synvar{a}\notin\redctx{\Delta}$, indicates
that our scope language allows each scope variable to be defined only once, with
one default tree. This single default tree can incorporate multiple prioritized definitions
of the same variable scattered around various legislative articles, but we
assume in our scope language that these scattered definitions have been
already collected. Therefore, the ordering of rules is very important in our
scope language, because it should be compatible with the dependency graph
of the scope locations. As the underlying default calculus is decidable and
does not allow fixpoint definitions, the dependency graph of the scope locations
should not be cyclic and therefore the topological ordering of its nodes
should correspond to the order of the rules inside the scope declaration. This
dependency ordering is enforced by the premise
\redctx{\Delta}\typvdash\synlangle\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synmid\synvar{e_1}\synellipsis\synvar{e_n}\synrangle\typcolon\synvar{\tau},
which seeds the typing judgment of \sref{defaultcalc:typing} with \redctx{\Delta}
(the scope locations defined so far).
Since scope variables are also arguments of the scope, \TirName{T-DefScopeVar}
redefines \synvar{a} by merging the new default tree with the default expression
\synvar{a} of type \synunitt\synarrow\synvar{\tau} passed as an argument to \synvar{S}.
This merging is done by defining the incoming argument as an exception to the
scope-local expression. This translation scheme ensures that the caller always
has priority over the callee. The evaluation of the incoming arguments is forced by applying \synunit,
yielding a value of type \synvar{\tau} for \synvar{a}.
ow that we have presented the
translation scheme for rules defining scope variables, we can switch to the
translation of sub-scope variables definitions and calls. We will start by
the rules that define sub-scope variables, prior to calling the associated
sub-scope.
\begin{mathpar}
\inferrule[T-DefSubScopeVar]{
S\neq S'\\
\synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\notin\redctx{\Delta}\\
\redctx{\Delta}\typvdash\synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid
\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle
\typcolon\synvar{\tau}
}{
\synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}}
\synrule\synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\syntyped\synvar{\tau}\synequal
\synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid
\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle
\reduces \\
\synlet \synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\syntyped\synunitt\synarrow\synvar{\tau}\synequal
\synlambda \synlparen\synunit\syntyped\synunitt\synrparen\syndot
\synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid
\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle
\synin\synhole\redproduce\\\synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\redcolon\synunitt\synarrow\synvar{\tau}\redcomma\redctx{\Delta}
}
\end{mathpar}
This rule is very similar to \TirName{T-DefScopeVar}, and actually simpler.
The premise $S\neq S'$ means that a scope
$S$ cannot have a recursive definition; it cannot call into itself and define
sub-scope variables of its own scope. Note that
$\synvar{S'}_\synvar{n}$\synlsquare\synvar{a}\synrsquare\redcolon\synunitt\synarrow\synvar{\tau}
is added to \redctx{\Delta} in the final part of the judgment;
$\synvar{S'}_\synvar{n}$\synlsquare\synvar{a}\synrsquare{}
has been defined as a sub-scope argument but not as a value that can be used by the
scope yet, its type is \synunitt\synarrow\synvar{\tau} and not \synvar{\tau}.
When all the arguments of
sub-scope \synvar{S'} have been defined using, \TirName{T-DefSubScopeVar},
the sub-scope itself can be called.
\begin{mathpar}
\inferrule[T-SubScopeCall]{
S\neq S'\\
P(S') = \sigma'\\
P\redvdash\sigma'\reduces e'\redproduce
\synvar{a'_1}\redcolon\synvar{\tau'_1}\redellipsis\synvar{a'_n}\redcolon\synvar{\tau'_n}\redcomma\redctx{\Delta'_\mathrm{sub}}\\
\redinit\redlparen\redctx{\Delta}\redsc
\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare
\redellipsis\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\redrparen
\redequal\synvar{e_\mathrm{init}}
}{
\synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}}\syncall
\synvar{S'}_\synvar{n}\reduces
\synvar{e_\mathrm{init}}[\synhole\mapsto
\synlet\synlparen \synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare\synellipsis
\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\synrparen
\syntyped \synlparen \synvar{\tau'_1}\synstar\cdots\synstar\synvar{\tau'_n}\synrparen\synequal\\
\synvar{e'}\;\synlparen\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare\synrparen
\cdots\synlparen\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\synrparen
\synin\synhole\;]\redproduce
\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare\redcolon\synvar{\tau'_1}\redellipsis
\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\redcolon\synvar{\tau'_n}\redcomma
\redctx{\Delta}
}
\end{mathpar}
Again, this rule has a lot to unpack, but is meant as a generalization of the
translation scheme illustrated in \sref{scope:example}. Let us start with
the premises. As earlier, $S\neq S'$ means that scope declarations cannot be
recursive. Next, we fetch the declaration \synvar{\sigma'} of \synvar{S'} inside
the program $P$. \synvar{\sigma'} is reduced into the function expression \synvar{e'},
whose arguments correspond to the scope variables of \synvar{S'}:
\synvar{a'_1}\synellipsis\synvar{a'_n}. Then, we need to define all the arguments
necessary to call \synvar{e'}. Some of these arguments have been defined earlier
in the translation, and they were added to \redctx{\Delta}. But some arguments
may not have been defined yet, and is its precisely the job of the \redinit{}
helper to produce the \synvar{e_\mathrm{init}} expression to define those
missing arguments with the \synemptydefault{} value.
The conclusion of \TirName{T-SubScopeCall} defines the reduction of
\syncall$\synvar{S'}_\synvar{n}$. After \synvar{e_\mathrm{init}},
we translate the sub-scope call to the default calculus call of the corresponding expression \synvar{e'},
which takes as arguments the defaults and returns the corresponding values after
evaluation. Finally, the new translation context produced is \redctx{\Delta}
augmented with all the variables of sub-scope \synvar{S'}, who are available
for use in later definitions of the scope.
The last item we need to define in order to complete the translation is \redinit{}.
Its definition is quite simple, since it produces an expression defining to \synemptydefault{}
all the variables from a list not present in \redctx{\Delta}.
\begin{mathpar}
\inferrule[T-InitSubVarsInDelta]{
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redcolon\synvar{\tau_1}
\in\redctx{\Delta}\\
\redinit\redlparen\redctx{\Delta}\redsc
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_2}\synrsquare\redellipsis
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen
\redequal\synvar{e}
}{
\redinit\redlparen\redctx{\Delta}\redsc
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redellipsis
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen
\redequal\synvar{e}
}
\inferrule[T-InitSubVarsNotInDelta]{
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redcolon\synvar{\tau_1}
\notin\redctx{\Delta}\\
\redinit\redlparen\redctx{\Delta}\redsc
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_2}\synrsquare\redellipsis
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen
\redequal\synvar{e'}
}{
\redinit\redlparen\redctx{\Delta}\redsc
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redellipsis
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen
\redequal\\
\synlet\synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare
\syntyped\synunitt\synarrow\synvar{\tau_1}\synequal
\synlambda\synlparen\synunit\syntyped\synunitt\synrparen\syndot\synemptydefault\synin \synvar{e}
}
\inferrule[T-InitSubVarsEmpty]{}{
\redinit\redlparen\redctx{\Delta}\redrparen
\redequal\synhole
}
\end{mathpar}
\section{From default calculus to lambda calculus}
\subsection{Using exceptions}
The default calculus is a solid semantic foundation for the Catala language,
but it is not a good compilation target since default logic cannot be shallowly
embedded easily in mainstream programming languages. Hence, we propose a
compilation scheme whose goal is to eliminate default terms and empty error
terms (\synemptydefault) from the default calculus, leaving us with a the semantics of a
regular lambda calculus. The conflict error term (\synerror) is less problematic
since its semantics correspond to an early exit from the program.
In order to lower the default term to a lambda calculus term, we need to extend
the traditional lambda calculus with several classic extensions: algebraic
data types and recursive data types (lists). Indeed, we need an optional
accumulator to emulate the exception count that triggers rules like
\TirName{D-DefaultExceptionsConflict} and \TirName{D-DefaultOneException}.
The empty error term has a complex propagation rule (\TirName{D-ContextEmptyError})
that naturally maps to a catchable exception. Hence, in this translation scheme,
assume that the target lambda calculus has support for exceptions. Combining these
features, we propose a translation of the default term. This translation relies
on a small runtime function \texttt{process\_exceptions}, whose body
implements the semantics of the default calculus.
\begin{align*}
\texttt{process\_exceptions}\quad&\syntyped&&
\synlparen\synunitt\synarrow\synvar{\tau}\synrparen\synlist\synarrow\synvar{\tau}\synoption\\
\texttt{process\_exceptions}\quad&\triangleq&&
\texttt{fold\_left}\;\synlparen\synlambda\synlparen\synvar{a}\syntyped
\synvar{\tau}\synoption\synrparen\;
\synlparen\synvar{e'}\syntyped
\synunitt\synarrow\synvar{\tau}\synrparen\syndot\\
&&&\quad\synlet\synvar{e'}\syntyped\synvar{\tau}\synoption\synequal\\
&&&\quad\quad\syntry
\synsome\synlparen\synvar{e'}\synunit\synrparen
\synwith\synemptyerror\synarrow \synnone\\
&&&\quad\!\!\!\!\synin\\
&&&\quad\synmatch\synlparen\synvar{a}\syncomma\;\synvar{e'}\synrparen\synwith\\
&&&\quad\quad\synmid\synlparen\synnone\syncomma\;\synvar{e'}\synrparen\synarrow\synvar{e'}\\
&&&\quad\quad\synmid\synlparen\synsome\synvar{a}\syncomma\;\synnone\synrparen\synarrow
\synsome\synvar{a}\\
&&&\quad\quad\synmid\synlparen\synsome\synvar{a}\syncomma\;\synsome
\synvar{e'}\synrparen\synarrow \synraise\synconflicterror \synrparen\;\synnone
\end{align*}
Note that the \synemptyerror{} exception is caught within \texttt{process\_exceptions};
making it the only place in the output code where this exception can be caught.
This is consistent with the evaluation context of the default exceptions,
which is the only evaluation context that belongs to $C$ but not to $C_\lambda$.
We can now proceed to the formal translation rules defining the compilation
judgment $\fbox{\synvar{e}\compiles\synvar{e'}}$ where \synvar{e} is an
expression of the default calculus and $\synvar{e'}$ is an expression
of the target lambda calculus enriched with algebraic data types and exceptions.
\begin{mathpar}
\inferrule[C-Default]{
\synvar{e_1}\compiles\synvar{e_1'}\\
\cdots\\
\synvar{e_n}\compiles\synvar{e_n'}\\
\synvar{e_\mathrm{just}}\compiles\synvar{e_\mathrm{just}'}\\
\synvar{e_\mathrm{cons}}\compiles\synvar{e_\mathrm{cons}'}\\
}{
\synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid\synvar{e_\mathrm{just}}
\synjust\synvar{e_\mathrm{cons}}\synrangle\compiles\\
\synlet\synvar{r_\mathrm{exceptions}}\synequal
\texttt{process\_exceptions}\;\synlsquare\synlambda\synvar{\_}\synarrow\synvar{e_1'}
\synlistellipsis\synlambda\synvar{\_}\synarrow\synvar{e_n'}\synrsquare\synin\\\synmatch
\synvar{r_\mathrm{exceptions}}\synwith\synsome\synvar{e'}\synarrow
\synvar{e'}\synmid\synnone\synarrow
\synif\synvar{e_\mathrm{just}'}\synthen\synvar{e_\mathrm{cons}'}\synelse\synraise\synemptyerror
}
\inferrule[C-EmptyError]{}{
\synemptydefault\compiles\synraise\synemptyerror
}
\inferrule[C-ConflictError]{}{
\synerror\compiles\synraise\synconflicterror
}
\inferrule[C-Var]{}{\synvar{x}\compiles\synvar{x}}
\inferrule[C-Literal]{\synvar{e}\in\{\synunit,\;\syntrue,\;\synfalse\}}{
\synvar{e}\compiles\synvar{e}
}
\inferrule[C-Abs]{
\synvar{e}\compiles\synvar{e'}
}{
\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e}\compiles
\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e'}
}
\inferrule[C-App]{
\synvar{e_1}\compiles\synvar{e_1'}\\
\synvar{e_2}\compiles\synvar{e_2'}
}{
\synvar{e_1}\;\synvar{e_2}\compiles\synvar{e_1'}\;\synvar{e_2'}
}
\end{mathpar}
We overload the \exeval{} notation as
the stepping judgment both in the default calculus and the target lambda
calculus. Similarly, we overload the typing judgment \typvdash.
We prove this theorem by induction on the default calculus expression \synvar{e} and start
by applying an inversion lemma on the judgment \synvar{e}\compiles\synvar{e'}.
\paragraph{Theorem (type preservation)} \textit{If \synvar{e}\compiles\synvar{e'}
and \typempty\typvdash\synvar{e}\typcolon\synvar{\tau}, then
\typempty\typvdash\synvar{e'}\typcolon\synvar{\tau}}
The proof can be carried out by induction on \synvar{e} without any trouble.
The most difficult part is to check the correct typing of \texttt{process\_exceptions}
in the lambda calculus $\blacksquare$
\paragraph{Theorem (translation correctness)} \textit{If \synvar{e}\compiles\synvar{e'}
and \synvar{e}\exevalstar\synvar{v}, then
either $\synvar{v}=\synemptydefault$, $\synvar{v}=\synerror$ or
there exists a lambda calculus value \synvar{v'} such that
\synvar{v}\compiles\synvar{v'} and \synvar{e'}\exevalstar\synvar{v'}.}
\begin{itemize}
\item Rules \TirName{C-EmptyError}, \TirName{C-ConflictError}, \TirName{C-Var} and
\TirName{C-Literal} yield an immediate conclusion.
\item For rule \TirName{C-Abs} with
$\synvar{e}=\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e_1}$,
we apply the induction hypothesis on \synvar{e_1} and conclude since functions
are values ($\synvar{v'}=\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e_1'}$).
\item For rule \TirName{C-App} with $\synvar{e} = \synvar{e_1}\;\synvar{e_2}$, we
apply the induction hypothesis on \synvar{e_1} and \synvar{e_2}. If either
\synvar{e_1} or \synvar{e_2} evaluate to \synemptydefault, we can apply
\TirName{D-ContextEmptyError} and conclude. Similarly, if either
\synvar{e_1} or \synvar{e_2} evaluate to \synerror, we can apply
\TirName{D-ContextConflictError} and conclude. Let us now suppose that we are not
in one of those cases, and that \synvar{e_1}\exevalstar\synvar{v_1} and
\synvar{e_2}\exevalstar\synvar{v_2}. \synvar{v_1} and \synvar{v_2} are not
error terms, so we can apply \TirName{C-Literal} or \TirName{C-Abs} to
get \synvar{v_1'} and \synvar{v_2'} such that \synvar{v_1}\compiles\synvar{v_1'}
and \synvar{v_2}\compiles\synvar{v_2'}. By the induction hypothesis applied on
\synvar{e_1} and \synvar{e_2}, we know that \synvar{e_1'}\exevalstar\synvar{v_1'}
and \synvar{e_2'}\exevalstar\synvar{v_2'}.
To recap, we have $\synvar{e_1}\;\synvar{e_2}\exevalstar\synvar{v_1}\;\synvar{v_2}$,
$\synvar{e_1'}\;\synvar{e_2'}\exevalstar\synvar{v_1'}\;\synvar{v_2'}$ and
by \TirName{C-App}, $\synvar{v_1}\;\synvar{v_2}\compiles\synvar{v_1'}\;\synvar{v_2'}$.
We also know that $\synvar{v_1}\;\synvar{v_2}\exevalstar\synvar{v}$ beginning
by a $\beta$-reduction. We can then apply the induction hypothesis a third time
on $\synvar{v_1}\;\synvar{v_2}$ to get the \synvar{v'} on which we conclude.
\item Let us consider now \TirName{C-Default} with $e=\synlangle\synvar{e_1}\synellipsis
\synvar{e_n}\synmid\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle$.
If any of the sub-terms
evaluates to \synerror{}, then $\synvar{e}$ evaluates to $\synerror$
(\TirName{D-ContextConflictError}) and we conclude. Now, we case analyse on the
number of exceptions that don't evaluate to \synemptydefault{}.
\begin{itemize}
\item If more than one exception does not evaluate to \synemptydefault{},
then \synvar{e} evaluates to \synerror{} by \TirName{D-DefaultExceptionsConflict}
and we conclude.
\item If one and only one exception $\synvar{e_i}$ does not evaluates to \synemptydefault{},
then we apply the induction hypothesis and get a couple $(\synvar{v_i},\synvar{v_i'})$ such
that $\synvar{e_i}\exevalstar\synvar{v_i}$, $\synvar{e_i'}\exevalstar\synvar{v_i'}$
and $\synvar{v_i}\compiles\synvar{v_i}$. Then, we claim that the
\synvar{r_\mathrm{exceptions}} result of \texttt{process\_exceptions} will
evaluate to \synsome\synvar{v_i'} in the lambda calculus translation of \synvar{e}.
Indeed, along the $\texttt{fold\_left}$ the accumulator \synvar{a} will remain
\synnone{} as it encounters all the \synvar{e_j'} that raise the \synemptyerror{}
caught inside the fold function. The accumulator will then encounter \synvar{e_i'},
that yields \synvar{v_i'} and pick up its value. By following the rest of the
translated code, \synvar{e'} will evaluate to \synvar{v_i'} and we can
conclude.
\item If all exceptions evaluate to \synemptydefault{}, then we claim that
the \synvar{r_\mathrm{exceptions}} result will
evaluate to \synnone{}. Indeed, the accumulator will stay at its original
\synnone{} value during the whole fold process of the exceptions.
We now look at the evaluation of \synvar{e_\mathrm{just}}. If it evaluates
to \synemptydefault{} or \synerror{}, then the whole expression evaluates
to the error term (\TirName{D-ContextConflictError} and \TirName{D-ContextEmptyError})
and we conclude. By type safety of the default calculus, \synvar{e_\mathrm{just}}
evaluates to either $\synvar{v_\mathrm{just}} =\syntrue{}$ or \synfalse{}. By type preservation of the
translation \synvar{e_\mathrm{just}'} evaluates to
either $\synvar{v_\mathrm{just'}} =\syntrue{}$ or \synfalse{}. By application of the induction hypothesis
on \synvar{e_\mathrm{just}'}, we know that
\synvar{v_\mathrm{just}}\compiles\synvar{v_\mathrm{just}'} and by
inversion on the \compiles{} judgment, we conclude $\synvar{v_\mathrm{just}}=\synvar{v_\mathrm{just}'}$.
From there, if $\synvar{v_\mathrm{just'}} =\syntrue{}$, then we conclude
by applying the induction hypothesis on \synvar{e_\mathrm{cons}}. If
$\synvar{v_\mathrm{just'}} =\synfalse{}$, then the default evaluates to
\synemptydefault{} and we conclude $\blacksquare$
\end{itemize}
\end{itemize}
% \subsection{An alternative compilation scheme}
% While perfectly correct, this maximalist translation scheme would entail
% a lot of redundant branching in the generated code, which would hinder the
% resulting program's performance. Because Catala aims at providing
% production-ready high-performance code that can scale to computations
% concerning millions of household, we need to be more clever to reduce the
% number of generated branches.
% Let's look back at the code of Fig.~\ref{fig:simpledefaultprogram}. One way
% we can avoid having to propagate errors all the time in the program is to
% contain empty error propagation inside each scope variable. Concretely,
% that means enforcing a crashing error each time a scope variable evaluates
% to an empty error term. This gives us a new default calculus program,
% Fig~\ref{fig:noerrordefaultprogram}, which differs
% from Fig.~\ref{fig:simpledefaultprogram} by the addition of \syncrashifempty{}
% calls to wrap up each scope variable definition.
% \begin{figure}
% \begin{Verbatim}[frame=lines,label=Simple default program without error propagation, numbers=left, framesep=10pt, samepage=true]
% let X (a: unit -> int) (b: unit -> int) : (int * int) =
% let a : int = crash_if_empty < a () | < true :- 0 >> in
% let b : int = crash_if_empty < b () | < true :- a + 1 >> in
% (a, b)
% let Y (c: unit -> bool) : bool =
% let X_1[a] : unit -> int = fun () -> < true :- 42 > in
% let X_1[b] : unit -> int = fun () -> EmptyError in
% let (X_1[a], X_1[b]) : int * int = X(X_1[a], X_1[b]) in
% let c : bool = crash_if_empty
% < c () | < X_1[b] != 43 :- false | X_1[b] == 43 :- true >>
% in
% c
% \end{Verbatim}
% \caption{Alternative to Fig.~\ref{fig:simpledefaultprogram} with error containment
% \label{fig:noerrordefaultprogram}}
% \end{figure}
% We can model \syncrashifempty{} as a special operator of the default
% calculus governed by the following rules:
% \begin{mathpar}
% \inferrule[T-CrashIfEmpty]{
% \typctx{\Gamma}\typvdash\synvar{e}\typcolon\synvar{\tau}
% }{
% \typctx{\Gamma}\typvdash\syncrashifempty\;\synvar{e}\typcolon\synvar{\tau}
% }
% \inferrule[D-CrashIfEmptyError]{}{
% \syncrashifempty\;\synemptydefault\exeval\synerror
% }
% \inferrule[D-CrashIfEmptyOK]{
% \synvar{e}\neq \synemptydefault
% }{
% \syncrashifempty\;\synvar{e}\exeval \synvar{e}
% }
% \inferrule[C-CrashIfEmpty]{
% \synvar{e}\compiles\synvar{e'}
% }{
% \syncrashifempty\;\synvar{e}\compiles\synmatch\synvar{e'}\synwith
% \synnone\;\synarrow\synerror\synmid\synsome\synvar{e'}\synarrow
% \synvar{e'}
% }
% \end{mathpar}
% The addition of \syncrashifempty{} to the default calculus allow us to prevent
% \synemptydefault{} to leak beyond this special operator call. Hence, we can
% apply our \synvar{\tau} to \synvar{\tau} \texttt{option} transformation scheme
% locally rather than globally on all terms of the program. More specifically,
% we can assume that after it has been defined, a scope variable has type
% \synvar{\tau} rather than \synvar{\tau} \texttt{option} in our translated program.
% \begin{figure}[htb]
% \begin{Verbatim}[frame=lines,label=Simple lambda calculus program, numbers=left, framesep=10pt, samepage=true]
% let crash_if_empty x = match x with Some x -> x | None -> raise Error
% let X (a: unit -> int option) (b: unit -> int option) : (int * int) =
% let a : int = crash_if_empty (match a () with
% | Some x -> Some x
% | None -> if true then Some 0 else None))
% in
% let b : int = crash_if_empty (match b () with
% | Some x -> Some x
% | None -> if true then Some (a + 1) else None)
% in
% (a, b)
% let Y (c: unit -> bool option) : bool =
% let X_1[a] : unit -> int = fun () -> if true then Some 42 else None in
% let X_1[b] : unit -> int = fun () -> None in
% let (X_1[a], X_1[b]) : int * int = X(X_1[a], X_1[b]) in
% let c : bool = crash_if_empty (match c () with
% | Some x -> Some x
% | None -> (match (if X_1[b] != 43 then Some false else None) with
% | Some x -> Some x
% | None -> if X_1[b] == 43 then true else None))
% in
% c
% \end{Verbatim}
% \caption{Translation of Fig.~\ref{fig:noerrordefaultprogram} to lambda calculus
% \label{fig:lambdaprogram}}
% \end{figure}
% Fig.~\ref{fig:lambdaprogram} shows the result of the compilation of the code in
% Fig.~\ref{fig:noerrordefaultprogram} according to our locally-restricted
% compilation scheme. The \texttt{process\_exceptions} functions has been
% partially evaluated and specialized to the example for readability.
% This compilation mode helps minimizing the number of branching required
% at execution time, but comes a the price of being very specialized to the
% exact shape of the programs that we wish to compile. Indeed, we had to
% know which function parameter types to change from \synvar{\tau} to
% \synvar{\tau} \texttt{option}: here, only the thunked arguments of the
% scopes. We claim that the correctness of the local application of our compilation
% scheme can be validated by a mere typechecking pass over the resulting
% lambda-calculus program.
% Last, we want to discuss further the insertion of \syncrashifempty{} calls.
% Inserting these calls effectively changes the semantics of the default calculus
% program. By preventing the \synemptydefault{} error to propagate and be later
% caught by an exception of a default, we change the behavior of the program.
% Hence, it is the program of Fig.~\ref{fig:noerrordefaultprogram} that
% should be taken as a reference, and not the program of
% Fig.~\ref{fig:simpledefaultprogram}.
% We chose to insert the \syncrashifempty{} calls before each scope variable
% definition because it corresponds to the following high-level behavior:
% each scope variable should be defined at execution time by one rule coming
% from the source Catala program. Indeed, if no rules from the source
% Catala program were to apply for a particular scope variable, then this
% scope variable would evaluate to \synemptydefault{} in the default calculus.
% Contrary to tax rules DSL like \cite{merigoux:hal-02936606} that have a special
% \texttt{undefined} value (similar to \synemptydefault{} or the null pointer),
% we wanted Catala not to reproduce the billion-dollar mistake and force the
% programmer to rely on user-defined option types to deal with missing data
% situations. Alternatively, the Catala programmer can also define an additional
% base case rule in the source program defining a user-chosen default value
% for a particular scope-variable.
\printbibliography
\end{document}