\documentclass[11pt,a4paper]{article}

\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage[english]{babel}
\usepackage{fullpage}
\usepackage{lmodern}
\usepackage{amsmath,amssymb}
\usepackage{mathpartir}
\usepackage{turnstile}
\usepackage{fancyvrb}
\usepackage{xcolor}
\usepackage{booktabs}
\usepackage{csquotes}
\usepackage{biblatex}
\addbibresource{catala.bib}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}

\newcommand{\sref}[1]{\S\ref{sec:#1}}

%% Syntax
\newcommand{\synvar}[1]{\ensuremath{#1}}
\newcommand{\synkeyword}[1]{\textcolor{red!60!black}{\texttt{#1}}}
\newcommand{\synpunct}[1]{\textcolor{black!40!white}{\texttt{#1}}}
\newcommand{\synname}[1]{\ensuremath{\mathsf{#1}}}
\newcommand{\synbool}{\synkeyword{bool}}
\newcommand{\synnum}{\synkeyword{num}}
\newcommand{\syndate}{\synkeyword{date}}
\newcommand{\synvec}{\synkeyword{vec~}}
\newcommand{\synopt}{\synkeyword{opt~}}
\newcommand{\synrule}{\synkeyword{rule~}}
\newcommand{\synlet}{\synkeyword{let~}}
\newcommand{\synin}{\synkeyword{~in~}}
\newcommand{\synif}{\synkeyword{if~}}
\newcommand{\synthen}{\synkeyword{~then~}}
\newcommand{\synelse}{\synkeyword{~else~}}
\newcommand{\syncall}{\synkeyword{call~}}
\newcommand{\synscope}{\synkeyword{scope~}}
\newcommand{\synequal}{\synpunct{~=~}}
\newcommand{\synjust}{~\synpunct{:\raisebox{-0.9pt}{-}}~}
\newcommand{\syntyped}{~\synpunct{:}~}
\newcommand{\syncomma}{\synpunct{,}}
\newcommand{\syndot}{\synpunct{.}~}
\newcommand{\synunit}{\synpunct{()}}
\newcommand{\synunitt}{\synkeyword{unit}}
\newcommand{\syntrue}{\synkeyword{true}}
\newcommand{\synfalse}{\synkeyword{false}}
\newcommand{\synop}{\synpunct{\odot}}
\newcommand{\synlambda}{\synpunct{$\lambda$}~}
\newcommand{\synand}{\synpunct{\wedge}}
\newcommand{\synor}{\synpunct{\vee}}
\newcommand{\synlparen}{\synpunct{(}}
\newcommand{\synrparen}{\synpunct{)}}
\newcommand{\synlsquare}{\synpunct{[}}
\newcommand{\synrsquare}{\synpunct{]}}
\newcommand{\synlbracket}{\synpunct{\{}}
\newcommand{\synrbracket}{\synpunct{\}}}
\newcommand{\synlangle}{\synpunct{$\langle$}}
\newcommand{\synrangle}{\synpunct{$\rangle$}}
\newcommand{\synmid}{\synpunct{~$|$~}}
\newcommand{\synemptydefault}{\synvar{\varnothing}}
\newcommand{\synerror}{\synvar{\circledast}}
\newcommand{\synstar}{\synpunct{~$*$~}}
\newcommand{\synvardef}{\synkeyword{definition~}}
\newcommand{\synscopecall}{\synkeyword{scope\_call~}}
\newcommand{\synlarrow}{~\synpunct{$\leftarrow$}~}
\newcommand{\synarrow}{~\synpunct{$\rightarrow$}~}
\newcommand{\synellipsis}{\synpunct{,$\ldots$,}}
\newcommand{\synlistellipsis}{\synpunct{;$\ldots$;}}
\newcommand{\syndef}{$ ::= $}
\newcommand{\synalt}{\;$|$\;}
\newcommand{\synhole}{\synvar{\cdot}}
\newcommand{\syncrashifempty}{\synkeyword{crash\_if\_empty}}
\newcommand{\synnone}{\texttt{None}}
\newcommand{\synsome}{\texttt{Some}~}
\newcommand{\synmatch}{\synkeyword{match}~}
\newcommand{\synwith}{~\synkeyword{with}~}
\newcommand{\synoption}{\;\texttt{option}}
\newcommand{\synraise}{\synkeyword{raise}\;}
\newcommand{\synemptyerror}{\texttt{EmptyError}}
\newcommand{\synconflicterror}{\texttt{ConflictError}}
\newcommand{\syntry}{\synkeyword{try}\;}
\newcommand{\synlist}{\;\texttt{list}}

%% Typing
\newcommand{\typctx}[1]{\textcolor{orange!90!black}{\ensuremath{#1}}}
\newcommand{\typempty}{\typctx{\varnothing}}
\newcommand{\typcomma}{\typctx{,\;}}
\newcommand{\typvdash}{\typctx{\;\vdash\;}}
\newcommand{\typcolon}{\typctx{\;:\;}}
\newcommand{\typlpar}{\typctx{(}}
\newcommand{\typrpar}{\typctx{)}}

%% Evaluation
\newcommand{\exctx}[1]{\textcolor{blue!80!black}{\ensuremath{#1}}}
\newcommand{\exeemptysubdefaults}{\exctx{\mathsf{empty\_count}}}
\newcommand{\execonflictsubdefaults}{\exctx{\mathsf{conflict\_count}}}
\newcommand{\Omegaarg}{\Omega_{arg}}
\newcommand{\excaller}{\exctx{\complement}}
\newcommand{\excomma}{\exctx{,}\;}
\newcommand{\exvdash}{\;\exctx{\vdash}\;}
\newcommand{\exempty}{\exctx{\varnothing}}
\newcommand{\exemptyv}{\exctx{\varnothing_v}}
\newcommand{\exemptyarg}{\exctx{\varnothing_{arg}}}
\newcommand{\exvarmap}{\exctx{~\mapsto~}}
\newcommand{\exscopemap}{\exctx{~\rightarrowtail~}}
\newcommand{\exArrow}{\exctx{~\Rrightarrow~}}
\newcommand{\exeq}{\exctx{\;=\;}}
\newcommand{\exeval}{\exctx{\;\longrightarrow\;}}
\newcommand{\exevalstar}{\exctx{\;\longrightarrow^*\;}}
\newcommand{\exat}{\exctx{\texttt{\;@\;}}}
\newcommand{\exsemicolon}{\exctx{;~}}
\newcommand{\excomp}{\dashrightarrow}

%% Reduction of the scope language 
\newcommand{\redctx}[1]{\textcolor{green!50!black}{\ensuremath{#1}}}
\newcommand{\reduces}{\redctx{~\rightsquigarrow~}}
\newcommand{\redvdash}{\redctx{\;\vdash\;}}
\newcommand{\redturnstile}[1]{\;\ensuremath{\redctx{\vdash}_{#1}}\;\;}
\newcommand{\redcomma}{\redctx{,\;}}
\newcommand{\redsc}{\redctx{;\;}}
\newcommand{\redcolon}{\redctx{\;:\;}}
\newcommand{\redempty}{\redctx{\varnothing}}
\newcommand{\redproduce}{\;\redctx{\Rrightarrow}\;}
\newcommand{\redellipsis}{\redctx{,\ldots,~}}

\newcommand{\redlparen}{\redctx{(}}
\newcommand{\redrparen}{\redctx{)}}
\newcommand{\redequal}{\redctx{~=~}}
\newcommand{\redinit}{\redctx{\mathsf{init\_subvars}}}

%% Reduction of the defaults
\newcommand{\compctx}[1]{\textcolor{yellow!70!black}{\ensuremath{#1}}}
\newcommand{\compkeyword}[1]{\textcolor{yellow!60!black}{\texttt{#1}}}
\newcommand{\compiles}{\ensuremath{~\compctx{\rightrightarrows}~}}
\newcommand{\compnormal}{\compkeyword{normal}}
\newcommand{\compdefault}{\compkeyword{default}}
\newcommand{\compcons}{\compkeyword{cons}}
\newcommand{\compvdash}{\compctx{\;\vdash\;}}
\newcommand{\compok}{\;\;\compkeyword{ok}}


\title{Formalization of the Catala language}
\date{November 2020}
\author{Denis Merigoux, Nicolas Chataing}

\begin{document}
\maketitle

\tableofcontents

\section{Introduction}

Tax law defines how taxes should be computed, depending on various characteristic
of a fiscal household. Government agencies around the world use computer 
programs to compute the law, which are derived from the local tax law. Translating 
tax law into an unambiguous computer program is tricky because the law is subject to 
interpretations and ambiguities. The goal of the Catala domain-specific language 
is to provide a way to clearly express the interpretation chosen for the 
computer program, and display it close to the law it is supposed to model.

To complete this goal, our language needs some kind of \emph{locality} property
that enables cutting the computer program in bits that match the way the 
legislative text is structured. This subject has been extensively studied by
Lawsky \cite{lawsky2017, lawsky2018, lawsky2020form}, whose work has greatly 
inspired our approach.

The structure exhibited by Lawsky follows a kind of non-monotonic logic called 
default logic \cite{Reiter1987}. Indeed, unlike traditional programming, when the law defines 
a value for a variable, it does so in a \emph{base case} that applies only if 
no \emph{exceptions} apply. To determine the value of a variable, one needs to 
first consider all the exceptions that could modify the base case. 

It is this precise behavior which we intend to capture when defining the semantics 
of Catala.

\section{Default calculus}

We choose to present the core of Catala as a lambda-calculus augmented by a special 
\enquote{default} expression. This special expression enables dealing with 
the logical structure underlying tax law. Our lambda-calculus has only unit and 
boolean values, but this base could be enriched with more complex values and traditional 
lambda-calculus extensions (such as algebraic data types or $\Lambda$-polymorphism).

\subsection{Syntax}
\label{sec:defaultcalc:syntax}

\begin{center}
\begin{tabular}{lrrll}
  Type&\synvar{\tau}&\syndef&\synbool\synalt\synunitt&boolean and unit types\\
  &&\synalt&\synvar{\tau}\synarrow\synvar{\tau}&function type \\
  &&&&\\
  Expression&\synvar{e}&\syndef&\synvar{x}\synalt\syntrue\synalt\synfalse\synalt\synunit&variable, literal\\
  &&\synalt&\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e}\synalt\synvar{e}\;\synvar{e}&$\lambda$-calculus\\
  &&\synalt&\synvar{d}&default term\\
  &&&&\\
  Default&\synvar{d}&\syndef&\synlangle $[\synvar{e}^*] \synmid\synvar{e}\synjust\synvar{e}$\synrangle&default term\\
  &&\synalt&\synerror&conflict error term\\
  &&\synalt&\synemptydefault&empty error term\\
\end{tabular}
\end{center}

Compared to the regular lambda calculus, we add a construction coming from 
default logic. Particularly, we focus on a subset of default logic called 
categorical, prioritized default logic \cite{Brewka2000}. 
In this setting, a default is a logical 
rule of the form $A \synjust B$ where $A$ is the justification of the rule and 
$B$ is the consequence. The rule can only be applied if $A$ is consistent with 
the current knowledge $W$: from $A\wedge W$, one should not derive $\bot$.
If multiple rules $A \synjust B_1$ and $A \synjust B_2$
can be applied at the same time, then only one of them is applied through 
an explicit ordering of the rules.

To incorporate this form of logic inside our programming language, we set $A$ to 
be an expression that can be evaluated to \syntrue{} or \synfalse{}, and $B$
the expression that the default should reduce to if $A$ is true. If $A$ is false,
then we look up for other rules of lesser priority to apply. This priority 
is encoded trough a syntactic tree data structure\footnote{Thanks to Pierre-Évariste Dagand for this insight.}.
A node of the tree contains a base case to consider, but first a list of higher-priority 
exceptions that don't have a particular ordering between them. This structure is 
sufficient to model the base case/exceptions structure or the law, and in particular 
the fact that exceptions are not always prioritized in the legislative text.

In the term \synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid\synvar{e_{\text{just}}}\synjust
\synvar{e_{\text{cons}}} \synrangle, \synvar{e_{\text{just}}} 
is the justification $A$, \synvar{e_{\text{cons}}} is the consequence $B$ and 
\synvar{e_1}\synellipsis\synvar{e_n} are the list of exceptions to be considered first.
 
Of course, this evaluation scheme can fail if no more 
rules can be applied, or if two or more exceptions of the same priority have their 
justification evaluate to \syntrue{}. The error terms \synerror{} and \synemptydefault{}
encode these failure cases. Note that if a Catala program correctly derived from a legislative 
source evaluates to \synerror{} or \synemptydefault{}, this could mean a flaw in the 
law itself. \synemptydefault{} means that the law did not specify what happens 
in a given situation, while \synerror{} means that two or more rules specified in 
the law conflict with each other on a given situation.

\subsection{Typing}
\label{sec:defaultcalc:typing}

Our typing strategy is an extension of the simply-typed lambda calculus.
The typing judgment \fbox{$\typctx{\Gamma}\typvdash\synvar{e}\typcolon\synvar{\tau}$} reads as
\enquote{under context $\typctx{\Gamma}$, expression $\synvar{e}$ has type $\synvar{\tau}$}.
\begin{center}
  \begin{tabular}{lrrll}
    Typing context&\typctx{\Gamma}&\syndef&\typempty&empty context\\
    (unordered map)&&\synalt&\typctx{\Gamma}\typcomma\synvar{x}\typcolon\synvar{\tau}&typed variable\\
  \end{tabular}
\end{center}


We start by the usual rules of simply-typed lambda calculus.
\begin{mathpar}
  \inferrule[T-UnitLit]{}{ 
      \typctx{\Gamma}\typvdash\synunit\syntyped\synunitt
  }

  \inferrule[T-TrueLit]{}{ 
      \typctx{\Gamma}\typvdash\syntrue\syntyped\synbool
  }

  \inferrule[T-FalseLit]{}{ 
      \typctx{\Gamma}\typvdash\synfalse\syntyped\synbool
  }

  \inferrule[T-Var]{}{
      \typctx{\Gamma}\typcomma\synvar{x}\typcolon\synvar{\tau}\typvdash\synvar{x}\syntyped\synvar{\tau}
  }

  \inferrule[T-Abs]
  {\typctx{\Gamma}\typcomma\synvar{x}\typcolon\synvar{\tau}\typvdash\synvar{e}\typcolon\synvar{\tau'}}
  {\typctx{\Gamma}\typvdash\synlambda\synlparen\synvar{x}\syntyped{\tau}\synrparen\syndot\synvar{e}\typcolon\synvar{\tau}\synarrow\synvar{\tau'}}

  \inferrule[T-App]
  {
    \typctx{\Gamma}\typvdash\synvar{e_1}\typcolon\synvar{\tau_2}\synarrow\synvar{\tau_1}\\
    \typctx{\Gamma}\typvdash\synvar{e_2}\typcolon\synvar{\tau_2}
  }
  {\typctx{\Gamma}\typvdash\synvar{e_1}\;\synvar{e_2}\typcolon\synvar{\tau_1}}
\end{mathpar}

Then we move to the special default terms. First, the error terms that stand for 
any type.
\begin{mathpar}
  \inferrule[ConflictError]{}{\typctx{\Gamma}\typvdash\synerror\typcolon\synvar{\tau}}

  \inferrule[EmptyError]{}{\typctx{\Gamma}\typvdash\synemptydefault\typcolon\synvar{\tau}}
\end{mathpar}

Now the interesting part for the default terms. As mentioned earlier, the 
justification \synvar{e_{\text{just}}} is a boolean, while \synvar{e_{\text{cons}}}
can evaluate to any value. \TirName{DefaultBase} specifies how the tree structure 
of the default should be typed.
\begin{mathpar}
  \inferrule[T-Default]
  {
    \typctx{\Gamma}\typvdash\synvar{e_1}\typcolon{\tau}\\
    \cdots\\
    \typctx{\Gamma}\typvdash\synvar{e_n}\typcolon{\tau}\\
    \typctx{\Gamma}\typvdash\synvar{e_{\text{just}}}\typcolon\synbool\\
    \typctx{\Gamma}\typvdash\synvar{e_{\text{cons}}}\typcolon\synvar{\tau}
  }
  {\typctx{\Gamma}\typvdash\synlangle
  \synvar{e_1}\synellipsis\synvar{e_n}\synmid 
  \synvar{e_{\text{just}}}\synjust\synvar{e_{\text{cons}}}\synrangle\typcolon\synvar{\tau}}
\end{mathpar}

The situation becomes more complex in the presence of functions. Indeed, want 
our default expressions to depend on parameters. By only allowing \synvar{e_{\text{just}}}
to be \synbool{}, we force the user to declare the parameters in a \synlambda 
that wraps the default from the outside. Using this scheme, all the expressions 
inside the tree structure of the default will depend on the same bound variable 
\synvar{x}.

\subsection{Evaluation}

We give this default calculus small-step, structured operational semantics. The 
one-step reduction judgment is of the form \fbox{\synvar{e}\exeval\synvar{e'}}.

In our simple language, values are just booleans, functions or error terms. 
We use a evaluation contexts to efficiently describe the evaluation order. 
Evaluation contexts are expression with a hole indicating the sub-term 
currently being reduced.
\begin{center}
  \begin{tabular}{lrrll}
    Values&\synvar{v}&\syndef&\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e}&functions\\
                    &&\synalt&\syntrue\synalt\synfalse & booleans\\
                    &&\synalt&\synerror\synalt\synemptydefault&errors\\
    Evaluation &\synvar{C_\lambda}&\syndef&\synhole\;\synvar{e}\synalt\synvar{v}\;\synhole&function application evaluation\\
    contexts&&\synalt&\synlangle$[\synvar{v}^*]$\synmid\synhole\synjust\synvar{e}\synrangle&default justification evaluation\\
    &&\synalt&\synlangle$[\synvar{v}^*]$\synmid\syntrue\synjust\synhole \synrangle&default consequence evaluation\\
    &\synvar{C}&\syndef&\synvar{C_\lambda}&regular contexts\\  
    &&\synalt&\synlangle$[\synvar{v}^*]$\syncomma\synhole\syncomma$[\synvar{e}^*]$\synmid
       \synvar{e}\synjust\synvar{e}\synrangle&default exceptions evaluation\\
  \end{tabular}
\end{center}

We choose a call-by-value reduction strategy.
First, we present the usual reduction rules for beta-reduction 
and evaluation inside a context hole. Note that \TirName{D-Context} does not 
deal with error terms, which will have a special treatment for error propagation 
later.
\begin{mathpar}
   \inferrule[D-Context]
  {\synvar{e}\exeval\synvar{e'}\\ e'\notin\{\synerror,\synemptydefault\}}
  {\synvar{C}[\synvar{e}]\exeval\synvar{C}[\synvar{e'}]}

 \inferrule[D-$\beta$]{}{
   (\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot{e})\;\synvar{v}
    \exeval\synvar{e}[\synvar{x}\mapsto\synvar{v}]
 }
\end{mathpar}

Now we have to describe how the default terms reduce. First, we consider 
the list of exceptions to the default,
\synvar{e_1}\synellipsis\synvar{e_n}, that should be all evaluated (left to right),
according to the sub-default evaluation context. Then, we consider all the 
values yielded by the exception evaluation and define two functions over these 
values. Let $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n})$ returns 
the number of empty error terms \synemptydefault{} among the exception values.
 We then case analyze on this count:
\begin{itemize}
  \item if $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) =n$, then 
  none of the exceptions apply and we evaluate the base case;
  \item if $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) =n - 1$,
  then only only one of the exceptions apply and we return its corresponding value;
  \item if $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) < n - 1$,
  then two or more exceptions apply and we raise a conflict error \synerror.
\end{itemize}


\begin{mathpar}
  \inferrule[D-DefaultFalseNoExceptions]
  {}
  {\synlangle \synemptydefault{}\synellipsis\synemptydefault{}\synmid\synfalse\synjust \synvar{e} \synrangle\exeval \synemptydefault{}}

  \inferrule[D-DefaultTrueNoExceptions]
  {}
  {\synlangle \synemptydefault{}\synellipsis\synemptydefault{}\synmid\syntrue\synjust \synvar{v}\synrangle\exeval v}


  \inferrule[D-DefaultOneException]
  {}
  {\synlangle \synemptydefault\synellipsis\synemptydefault\syncomma\synvar{v}\syncomma\synemptydefault\synellipsis\synemptydefault
  \synmid  \synvar{e_1}\synjust \synvar{e_2}
  \synrangle\exeval \synvar{v}}

  \inferrule[D-DefaultExceptionsConflict]
  {\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) <n - 1}
  {\synlangle \synvar{v_1}\synellipsis\synvar{v_n}\synmid 
  \synvar{e_1}\synjust \synvar{e_2}\synrangle\exeval \synerror{}}
\end{mathpar}

When none of the exceptions apply, we can suppose that the justification of the default 
is already reduced to a variable \synvar{v}, which should be a boolean by virtue of 
typing. If \synvar{v} is 
\syntrue{}, then this rule applies and we reduce to the consequence. If it is 
\synfalse{}, then the base case does not apply either and we throw an empty 
default error.


Last, we need to define how our error terms propagate. Because the rules for 
sub-default evaluation have to count the number of error terms in the list 
of sub-defaults, we cannot always immediately propagate the error term \synemptydefault{} in 
all the evaluation contexts as it usually done. Rather, we rely on the 
distinction between the $\lambda$-calculus evaluation contexts $\synvar{C_\lambda}$
and the sub-default evaluation context. Hence the following rules for error 
propagation:

\begin{mathpar}
   \inferrule[D-ContextEmptyError]
  {\synvar{e}\exeval\synemptydefault}
  {\synvar{C_\lambda}[\synvar{e}]\exeval\synemptydefault}

\inferrule[D-ContextConflictError]
  {\synvar{e}\exeval\synerror}
  {\synvar{C}[\synvar{e}]\exeval\synerror}
\end{mathpar}

\section{Scope language}

Our core default calculus provides a value language adapted to the drafting style 
of tax law. Each article of the law will provide one or more rules encoded as 
defaults. But how to collect those defaults into a single expression that 
will compute the result that we want? How to reuse existing rules in different 
contexts?

These question point out the lack of an abstraction structure adapted to 
the legislative drafting style. Indeed, our \synlambda functions are not 
convenient to compose together the rules scattered around the legislative text. 
Moreover, the abstractions defined in the legislative text exhibit a behavior
quite different from \synlambda functions.

First, the blurred limits between abstraction units.
In the legislative text, objects and data are referred in a free variable style.
It is up to us to put the necessary bindings for these free variables, but 
it is not trivial to do so. For that, one need to define the perimeter of 
each abstraction unit, a legislative \emph{scope}, which might encompass multiple 
articles.

Second, the confusion between local variables and function parameters. The 
base-case vs. exception structure of the law also extends between legislative 
scopes. For instance, a scope $A$ can define a variable $x$ to have value $a$, but 
another legislative scope $B$ can \emph{call into} $A$ but specifying that 
$x$ should be $b$. In this setting, $B$ defines an exception for $x$, that 
should be dealt with using our default calculus.

Based on these two characteristic, we propose a high-level \emph{scope language},
semantically defined by its encoding in the default calculus.

\subsection{Syntax}

A scope $S$ is a legislative abstraction unit that can encompass multiple 
articles. $S$ is comprised of multiple rules that define a scope variable $a$ 
to a certain expression under a condition that characterize the base case or 
the exception.

$S$ can also call into another scope $S'$, as a function can call 
into another. These calls are scattered in the legislative texts and have 
to be identified by the programmer. Since $S$ can call $S'$ multiple times
with different \enquote{parameters}, we have to distinguish between these
sub-call and give them different names $\synvar{S'}_1$,
 $\synvar{S'}_2$, etc. A program $P$ is 
 a list of scope declarations $\sigma$.

\begin{center}
\begin{tabular}{lrrll}
  Scope name&\synvar{S}&&&\\
  Scope call identifier&\synvar{n}&&&\\
  Location&\synvar{\ell}&\syndef&\synvar{a}&scope variable\\
        &&\synalt&$\synvar{S}_\synvar{n}$\synlsquare\synvar{a}\synrsquare&sub-scope call variable\\
  Expression&\synvar{e}&\syndef&\synvar{\ell}&location\\
  &&\synalt&$\cdots$&default calculus expressions\\
  &&&&\\
  Rule&\synvar{r}&\syndef&\synrule\synvar{\ell}\syntyped\synvar{\tau}\synequal\synlangle
                        $[\synvar{e}^*]$\synmid\synvar{e}\synjust
                         \synvar{e}\synrangle
      &Location definition\\
  &&\synalt&\syncall$\synvar{S}_\synvar{n}$&sub-scope call\\
  Scope declaration&\synvar{\sigma}&\syndef&\synscope\synvar{S}\syntyped $[\synvar{r}^*]$&\\
  Program&\synvar{P}&\syndef&$[\sigma^*]$&\\
\end{tabular}
\end{center}
 
\subsection{Running example}
\label{sec:scope:example}

Let's illustrate how the scope language plays out with a simple program 
that calls a sub-scope, with Fig.~\ref{fig:simplescopeprogram}.

\begin{figure}
\begin{Verbatim}[frame=lines,label=Simple scope program, numbers=left, framesep=10pt, samepage=true]
scope X:
  rule a = < true :- 0 >
  rule b = < true :- a + 1 >

scope Y:
  rule X_1[a] = < true :- 42 >
  call X_1
  rule c = < X_1[b] != 43 :- false | X_1[b] == 43 :- true > 
\end{Verbatim}
\caption{Illustrative program written in the scope language\label{fig:simplescopeprogram}}
\end{figure}

Considered alone, the execution \Verb+X+ is simple: \Verb+a+ and \Verb+b+ are defined by 
a single default whose justification is \Verb+true+. Hence, \Verb+a+ should evaluate 
to \Verb+0+ and \Verb+b+ should evaluate to \Verb+1+.

Now, consider scope \Verb+Y+. It defines a single variable \Verb+c+ with two defaults 
line 8, but the justifications for these two defaults use the result of 
the evaluation (line 7) of variable \Verb+b+ of the sub-scope \Verb+X_1+. 
Line 6 shows an example of providing an \enquote{argument} to the subscope call.
The execution goes like this: at line 7 when calling the sub-scope,
\Verb+X_1[a]+ has two defaults, one coming from line 2, the other calling 
from line 6. Because the caller has priority over the callee, the default from line 
6 wins and \Verb+X_1[a]+ evaluates to \Verb+42+. Consequently,
\Verb+X_1[b]+ evaluates to \Verb+43+.
This triggers the second default in the list of line 8: the exception 
evaluates first, but does not apply. Then, the base case applies,
and evaluates \Verb+c+ to \Verb+true+. 

The goal is to provide an encoding of the scope language 
into the lambda calculus that is compatible with this intuitive description 
of how scopes should evaluate. To get a high-level 
picture of the translation, we first show what the previous simple program will translate 
to, using ML-like syntax for the target default calculus in Fig.~\ref{fig:simpledefaultprogram}.

\begin{figure}
\begin{Verbatim}[frame=lines,label=Simple default program, numbers=left, framesep=10pt, samepage=true]
let X (a: unit -> int) (b: unit -> int) : (int * int) =
  let a : int = < a () | < true :- 0 >> in
  let b : int = < b () | < true :- a + 1 >> in 
  (a, b)

let Y (c: unit -> bool) : bool = 
  let X_1[a] : unit -> int = fun () -> < true :- 42 > in
  let X_1[b] : unit -> int = fun () -> EmptyError in 
  let (X_1[a], X_1[b]) : int * int = X(X_1[a], X_1[b]) in 
  let c : bool = < c () | < X_1[b] != 43 :- false | X_1[b] == 43 :- true >> in 
  c 
\end{Verbatim}
\caption{Default calculus program resulting from the compilation of Fig.~\ref{fig:simplescopeprogram}
\label{fig:simpledefaultprogram}}
\end{figure}

We start unravelling this translation with the scope \Verb+X+. \Verb+X+ has 
been turned into a function whose arguments are all the local variables of the 
scope. However, the arguments have type \Verb+unit -> <type>+. Indeed, we want the 
arguments of \Verb+X+ (line 1) to be the default expression supplied by the caller of
\Verb+X+, which are considered as exceptions to the base 
expression defining the local variables of \Verb+X+ (lines 2 and 3). 
After the merging of scope-local and 
scope-arguments defaults, we apply 
\Verb+()+ to the thunk to force evaluation and get back the value.
Finally, \Verb+X+ returns the tuple of all its local variables (line 4).

The translation of \Verb+Y+ exhibits the pattern for sub-scope calls.
Lines 7 translates the assignment of the sub-scope argument \Verb+X_1[a]+.
Before calling \Verb+X_1+ (line 8), the other argument \Verb+X_1[b]+ is 
initialized to the neutral \synemptydefault{} that will be ignored at execution 
because \Verb+X+ provides more defaults for \Verb+b+.
The sub-scope call is translated to a regular 
function call (line 9). The results of the call are then used in the two defaults  
for \Verb+c+ (line 10), which have been turned into a default tree taking into 
account the possible input for \Verb+c+.

\subsection{Formalization of the translation}
\label{sec:scope:formalization}


The main judgment of reduction from scope language to default calculus is 
\fbox{$\synvar{P}\redvdash\synvar{\sigma}\reduces\synvar{e}\redproduce\redctx{\Delta_\mathrm{own}}$}, which reduces 
a scope declaration to a function in the default calculus, while providing the 
list of its own variables.

\begin{center}
  \begin{tabular}{lrrll}
    Translation context&\redctx{\Delta}&\syndef&\redempty&empty context\\
    (unordered map)&&\synalt&\redctx{\Delta_\mathrm{own}}\redcomma\redctx{\Delta_\mathrm{sub}}&own and sub-scopes contexts\\
    &\redctx{\Delta_\mathrm{own}}&\syndef&\redempty\synalt\redctx{\Delta_\mathrm{own}}\redcomma\synvar{a}\redcolon\synvar{\tau}&typed scope variable\\
    &\redctx{\Delta_\mathrm{sub}}&\syndef&\redempty\synalt\redctx{\Delta_\mathrm{sub}}\redcomma
      $\synvar{S}_\synvar{n}$\synlsquare\synvar{a}\synrsquare\redcolon\synvar{\tau}&typed sub-scope variable\\
  \end{tabular}
\end{center}

The translation context \redctx{\Delta} is similar to the typing context \typctx{\Gamma} of 
the default calculus, but it only takes into account the new scope-related location. At any 
point, \redctx{\Delta} will contain the scope locations defined (and usable in expressions) so far.
\redctx{\Delta} is divided in \redctx{\Delta_\mathrm{own}} and \redctx{\Delta_\mathrm{sub}},
which contain respectively the scope's own variables and the variables of its 
sub-scopes.

 We will describe 
the translation from top to bottom, in order to keep the big picture in mind.
We will assume the default calculus has been expanded with the usual ML 
\synlet \synin construction, as well as tuples. Here is the top-level rule for 
translating scopes.
\begin{mathpar}
\inferrule[T-Scope]{
  \synvar{P}\redsc\redempty\redturnstile{\synvar{S}} \synvar{r_1}\synellipsis\synvar{r_n}
  \reduces \synvar{e} \redproduce 
  \synvar{a_1}\redcolon\synvar{\tau_1}\redellipsis\synvar{a_m}\redcolon\synvar{\tau_m}\redcomma
  \redctx{\Delta_\mathrm{sub}}
}{
  \synvar{P}\redvdash\synscope\synvar{S}\syntyped\synvar{r_1}\synellipsis\synvar{r_n}\reduces\\
  \synlet\synvar{S}\;\synlparen\synvar{a_1}\syntyped\synunitt\synarrow\synvar{\tau_1}\synrparen\;\cdots\;
  \synlparen\synvar{a_m}\syntyped\synunitt\synarrow\synvar{\tau_m}\synrparen\syntyped 
  \synlparen\synvar{\tau_1}\synstar\cdots\synstar\synvar{\tau_m}\synrparen\synequal
  \synvar{e}[\synhole\mapsto\synlparen\synvar{a_1}\synellipsis\synvar{a_m}\synrparen]\redproduce\\
  \synvar{a_1}\redcolon\synvar{\tau_1}\redellipsis\synvar{a_m}\redcolon\synvar{\tau_m}
}
\end{mathpar}

This rule has a lot to unpack, but it is just the formal description of the 
translation scheme described earlier. To translate scope declaration \synvar{S}
with associated rules  \synvar{r_1}\synellipsis\synvar{r_n}, we use a helper 
judgment  \fbox{\synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}} \synvar{r_1}\synellipsis\synvar{r_n}
\reduces \synvar{e} \redproduce \redctx{\Delta'}} which reads as \enquote{%
given a program \synvar{P} and a translation context \redctx{\Delta},
the rules \synvar{r_1}\synellipsis\synvar{r_n} belonging to scope \synvar{S}
translate to the expression \synvar{e}, producing a new typing context 
\redctx{\Delta'}}.
 In this \TirName{T-Scope} rule, we isolate in the resulting \redctx{\Delta'} all the 
scope variables \synvar{a_1},\ldots,\synvar{a_m} from the sub-scope variables.
Indeed, those variable will be the arguments and the return values of the function 
corresponding to the scope \synvar{S}. The expression \synvar{e} that stands 
for rules \synvar{r_1}\synellipsis\synvar{r_n} is a series of \synlet bindings,
the last one finishing by a hole (\synhole). We use this hole as a placeholder 
to be filled with the return value of the function, which is the tuple 
\synlparen\synvar{a_1}\synellipsis\synvar{a_n}\synrparen. Note that in accordance
to the translation scheme and the need for a delayed evaluation of defaults, 
the arguments of \synvar{S} have a thunked type.
\begin{mathpar}
\inferrule[T-Rules]{
  \synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}} \synvar{r_1}
  \reduces \synvar{e_1} \redproduce\redctx{\Delta'}\\
  \synvar{P}\redsc\redctx{\Delta'}\redturnstile{\synvar{S}} \synvar{r_2}\synellipsis\synvar{r_n}
  \reduces \synvar{e_2} \redproduce\redctx{\Delta''}
}{
 \synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}} \synvar{r_1}\synellipsis\synvar{r_n}
  \reduces \synvar{e_1}[\synhole\mapsto \synvar{e_2}] \redproduce\redctx{\Delta''}
}
\end{mathpar}

The translation of the sequence of rules consists of chaining the different 
\synlet \synin expressions together with the same  hole (\synhole{}) substitution as the 
previous rule. Now, we can define the translation for individual rules, starting 
with the definitions of scope variables.
\begin{mathpar}
\inferrule[T-DefScopeVar]{
  \synvar{a}\notin\redctx{\Delta}\\
  \redctx{\Delta}\typvdash
  \synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid
  \synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle\typcolon\synvar{\tau}
}{
  \synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}}
  \synrule\synvar{a}\syntyped\synvar{\tau}\synequal 
  \synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid 
  \synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle
  \reduces \\
  \synlet a\syntyped\synvar{\tau}\synequal
  \synlangle a\;\synunit\synmid
  \synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid 
  \synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle
  \synrangle\synin\synhole
  \redproduce\synvar{a}\redcolon\synvar{\tau}\redcomma\redctx{\Delta}
}
\end{mathpar}

The premise of \TirName{T-DefScopeVar}, $\synvar{a}\notin\redctx{\Delta}$, indicates 
that our scope language allows each scope variable to be defined only once, with 
one default tree. This single default tree can incorporate multiple prioritized definitions 
of the same variable scattered around various legislative articles, but we 
assume in our scope language that these scattered definitions have been 
already collected. Therefore, the ordering of rules is very important in our 
scope language, because it should be compatible with the dependency graph 
of the scope locations. As the underlying default calculus is decidable and 
does not allow fixpoint definitions, the dependency graph of the scope locations 
should not be cyclic and therefore the topological ordering of its nodes
should correspond to the order of the rules inside the scope declaration. This 
dependency ordering is enforced by the premise
\redctx{\Delta}\typvdash\synlangle\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synmid\synvar{e_1}\synellipsis\synvar{e_n}\synrangle\typcolon\synvar{\tau},
which seeds the typing judgment of \sref{defaultcalc:typing} with \redctx{\Delta}
(the scope locations defined so far).

Since scope variables are also arguments of the scope, \TirName{T-DefScopeVar}
redefines \synvar{a} by merging the new default tree with the default expression 
\synvar{a} of type \synunitt\synarrow\synvar{\tau} passed as an argument to \synvar{S}.
This merging is done by defining the incoming argument as an exception to the 
scope-local expression. This translation scheme ensures that the caller always 
has priority over the callee. The evaluation of the incoming arguments is forced by applying \synunit,
yielding a value of type \synvar{\tau} for \synvar{a}.

ow that we have presented the 
translation scheme for rules defining scope variables, we can switch to the 
translation of sub-scope variables definitions and calls. We will start by 
the rules that define sub-scope variables, prior to calling the associated 
sub-scope.
\begin{mathpar}
\inferrule[T-DefSubScopeVar]{
  S\neq S'\\
  \synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\notin\redctx{\Delta}\\
  \redctx{\Delta}\typvdash\synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid
  \synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle
  \typcolon\synvar{\tau}
}{
  \synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}}
  \synrule\synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\syntyped\synvar{\tau}\synequal 
  \synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid 
  \synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle
  \reduces \\
  \synlet \synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\syntyped\synunitt\synarrow\synvar{\tau}\synequal 
  \synlambda \synlparen\synunit\syntyped\synunitt\synrparen\syndot 
  \synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid 
  \synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle
  \synin\synhole\redproduce\\\synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\redcolon\synunitt\synarrow\synvar{\tau}\redcomma\redctx{\Delta}
}
\end{mathpar}

This rule is very similar to \TirName{T-DefScopeVar}, and actually simpler. 
The premise $S\neq S'$ means that a scope 
$S$ cannot have a recursive definition; it cannot call into itself and define 
sub-scope variables of its own scope. Note that 
$\synvar{S'}_\synvar{n}$\synlsquare\synvar{a}\synrsquare\redcolon\synunitt\synarrow\synvar{\tau}
is added to \redctx{\Delta} in the final part of the judgment;
 $\synvar{S'}_\synvar{n}$\synlsquare\synvar{a}\synrsquare{}
has been defined as a sub-scope argument but not as a value that can be used by the 
scope yet, its type is \synunitt\synarrow\synvar{\tau} and not \synvar{\tau}.

When all the arguments of 
sub-scope \synvar{S'} have been defined using, \TirName{T-DefSubScopeVar}, 
the sub-scope itself can be called.
\begin{mathpar}
\inferrule[T-SubScopeCall]{
  S\neq S'\\
  P(S') = \sigma'\\
  P\redvdash\sigma'\reduces e'\redproduce 
  \synvar{a'_1}\redcolon\synvar{\tau'_1}\redellipsis\synvar{a'_n}\redcolon\synvar{\tau'_n}\redcomma\redctx{\Delta'_\mathrm{sub}}\\
  \redinit\redlparen\redctx{\Delta}\redsc 
  \synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare
  \redellipsis\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\redrparen
  \redequal\synvar{e_\mathrm{init}}
}{
  \synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}}\syncall 
  \synvar{S'}_\synvar{n}\reduces
  \synvar{e_\mathrm{init}}[\synhole\mapsto
  \synlet\synlparen \synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare\synellipsis
  \synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\synrparen
  \syntyped \synlparen \synvar{\tau'_1}\synstar\cdots\synstar\synvar{\tau'_n}\synrparen\synequal\\
  \synvar{e'}\;\synlparen\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare\synrparen
  \cdots\synlparen\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\synrparen
  \synin\synhole\;]\redproduce
  \synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare\redcolon\synvar{\tau'_1}\redellipsis
  \synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\redcolon\synvar{\tau'_n}\redcomma 
  \redctx{\Delta}
}
\end{mathpar}

Again, this rule has a lot to unpack, but is meant as a generalization of the 
translation scheme illustrated in \sref{scope:example}. Let us start with 
the premises. As earlier, $S\neq S'$ means that scope declarations cannot be 
recursive. Next, we fetch the declaration \synvar{\sigma'} of \synvar{S'} inside 
the program $P$. \synvar{\sigma'} is reduced into the function expression \synvar{e'},
whose arguments correspond to the scope variables of \synvar{S'}: 
\synvar{a'_1}\synellipsis\synvar{a'_n}. Then, we need to define all the arguments 
necessary to call \synvar{e'}. Some of these arguments have been defined earlier 
in the translation, and they were added to \redctx{\Delta}. But some arguments 
may not have been defined yet, and is its precisely the job of the \redinit{} 
helper to produce the \synvar{e_\mathrm{init}} expression to define those 
missing arguments with the \synemptydefault{} value.

The conclusion of \TirName{T-SubScopeCall} defines the reduction of 
\syncall$\synvar{S'}_\synvar{n}$. After \synvar{e_\mathrm{init}},
we translate the sub-scope call to the default calculus call of the corresponding expression \synvar{e'},
which takes as arguments the defaults and returns the corresponding values after 
evaluation. Finally, the new translation context produced is \redctx{\Delta}
augmented with all the variables of sub-scope \synvar{S'}, who are available 
for use in later definitions of the scope.

The last item we need to define in order to complete the translation is \redinit{}.
Its definition is quite simple, since it produces an expression defining to \synemptydefault{}
all the variables from a list not present in \redctx{\Delta}.
\begin{mathpar}
\inferrule[T-InitSubVarsInDelta]{
  \synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redcolon\synvar{\tau_1}
  \in\redctx{\Delta}\\
  \redinit\redlparen\redctx{\Delta}\redsc
  \synvar{S'}_\synvar{n}\synlsquare\synvar{a_2}\synrsquare\redellipsis
  \synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen
  \redequal\synvar{e}
}{
  \redinit\redlparen\redctx{\Delta}\redsc
  \synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redellipsis
  \synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen
  \redequal\synvar{e}
}

\inferrule[T-InitSubVarsNotInDelta]{
  \synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redcolon\synvar{\tau_1}
  \notin\redctx{\Delta}\\
  \redinit\redlparen\redctx{\Delta}\redsc
  \synvar{S'}_\synvar{n}\synlsquare\synvar{a_2}\synrsquare\redellipsis
  \synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen
  \redequal\synvar{e'}
}{
  \redinit\redlparen\redctx{\Delta}\redsc
  \synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redellipsis
  \synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen
  \redequal\\
  \synlet\synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare
  \syntyped\synunitt\synarrow\synvar{\tau_1}\synequal
  \synlambda\synlparen\synunit\syntyped\synunitt\synrparen\syndot\synemptydefault\synin \synvar{e}
}

\inferrule[T-InitSubVarsEmpty]{}{
  \redinit\redlparen\redctx{\Delta}\redrparen
  \redequal\synhole
}
\end{mathpar}

\section{From default calculus to lambda calculus}

\subsection{Using exceptions}

The default calculus is a solid semantic foundation for the Catala language,
but it is not a good compilation target since default logic cannot be shallowly
embedded easily in mainstream programming languages. Hence, we propose a
compilation scheme whose goal is to eliminate default terms and empty error 
terms (\synemptydefault) from the default calculus, leaving us with a the semantics of a
regular lambda calculus. The conflict error term (\synerror) is less problematic 
since its semantics correspond to an early exit from the program.

In order to lower the default term to a lambda calculus term, we need to extend 
the traditional lambda calculus with several classic extensions: algebraic 
data types and recursive data types (lists). Indeed, we need an optional 
accumulator to emulate the exception count that triggers rules like
\TirName{D-DefaultExceptionsConflict} and \TirName{D-DefaultOneException}.

The empty error term has a complex propagation rule (\TirName{D-ContextEmptyError})
that naturally maps to a catchable exception. Hence, in this translation scheme,
assume that the target lambda calculus has support for exceptions. Combining these 
features, we propose a translation of the default term. This translation relies 
on a small runtime function \texttt{process\_exceptions}, whose body 
implements the semantics of the default calculus. 


\begin{align*}
  \texttt{process\_exceptions}\quad&\syntyped&&
  \synlparen\synunitt\synarrow\synvar{\tau}\synrparen\synlist\synarrow\synvar{\tau}\synoption\\
  \texttt{process\_exceptions}\quad&\triangleq&&
  \texttt{fold\_left}\;\synlparen\synlambda\synlparen\synvar{a}\syntyped
  \synvar{\tau}\synoption\synrparen\;
  \synlparen\synvar{e'}\syntyped
  \synunitt\synarrow\synvar{\tau}\synrparen\syndot\\
  &&&\quad\synlet\synvar{e'}\syntyped\synvar{\tau}\synoption\synequal\\
  &&&\quad\quad\syntry
  \synsome\synlparen\synvar{e'}\synunit\synrparen
  \synwith\synemptyerror\synarrow \synnone\\
  &&&\quad\!\!\!\!\synin\\
  &&&\quad\synmatch\synlparen\synvar{a}\syncomma\;\synvar{e'}\synrparen\synwith\\
  &&&\quad\quad\synmid\synlparen\synnone\syncomma\;\synvar{e'}\synrparen\synarrow\synvar{e'}\\
  &&&\quad\quad\synmid\synlparen\synsome\synvar{a}\syncomma\;\synnone\synrparen\synarrow
  \synsome\synvar{a}\\
  &&&\quad\quad\synmid\synlparen\synsome\synvar{a}\syncomma\;\synsome
  \synvar{e'}\synrparen\synarrow \synraise\synconflicterror \synrparen\;\synnone
\end{align*}

Note that the \synemptyerror{} exception is caught within \texttt{process\_exceptions};
making it the only place in the output code where this exception can be caught.
This is consistent with the evaluation context of the default exceptions, 
which is the only evaluation context that belongs to $C$ but not to $C_\lambda$.

We can now proceed to the formal translation rules defining the compilation 
judgment $\fbox{\synvar{e}\compiles\synvar{e'}}$ where \synvar{e} is an 
expression of the default calculus and $\synvar{e'}$ is an expression 
of the target lambda calculus enriched with algebraic data types and exceptions.

\begin{mathpar}
\inferrule[C-Default]{
  \synvar{e_1}\compiles\synvar{e_1'}\\
  \cdots\\
  \synvar{e_n}\compiles\synvar{e_n'}\\
  \synvar{e_\mathrm{just}}\compiles\synvar{e_\mathrm{just}'}\\
  \synvar{e_\mathrm{cons}}\compiles\synvar{e_\mathrm{cons}'}\\
}{
  \synlangle\synvar{e_1}\synellipsis\synvar{e_n}\synmid\synvar{e_\mathrm{just}}
  \synjust\synvar{e_\mathrm{cons}}\synrangle\compiles\\
  \synlet\synvar{r_\mathrm{exceptions}}\synequal
  \texttt{process\_exceptions}\;\synlsquare\synlambda\synvar{\_}\synarrow\synvar{e_1'}
  \synlistellipsis\synlambda\synvar{\_}\synarrow\synvar{e_n'}\synrsquare\synin\\\synmatch
  \synvar{r_\mathrm{exceptions}}\synwith\synsome\synvar{e'}\synarrow 
  \synvar{e'}\synmid\synnone\synarrow
  \synif\synvar{e_\mathrm{just}'}\synthen\synvar{e_\mathrm{cons}'}\synelse\synraise\synemptyerror
}

\inferrule[C-EmptyError]{}{
  \synemptydefault\compiles\synraise\synemptyerror
}


\inferrule[C-ConflictError]{}{
  \synerror\compiles\synraise\synconflicterror
}

\inferrule[C-Var]{}{\synvar{x}\compiles\synvar{x}}

\inferrule[C-Literal]{\synvar{e}\in\{\synunit,\;\syntrue,\;\synfalse\}}{
  \synvar{e}\compiles\synvar{e}
}

\inferrule[C-Abs]{
  \synvar{e}\compiles\synvar{e'}
}{
  \synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e}\compiles 
  \synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e'} 
}

\inferrule[C-App]{
  \synvar{e_1}\compiles\synvar{e_1'}\\
  \synvar{e_2}\compiles\synvar{e_2'}
}{
  \synvar{e_1}\;\synvar{e_2}\compiles\synvar{e_1'}\;\synvar{e_2'}
}
 

\end{mathpar}

We overload the \exeval{} notation as 
the stepping judgment both in the default calculus and the target lambda 
calculus. Similarly, we overload the typing judgment \typvdash. 
We prove this theorem by induction on the default calculus expression \synvar{e} and start 
by applying an inversion lemma on the judgment \synvar{e}\compiles\synvar{e'}. 


\paragraph{Theorem (type preservation)} \textit{If \synvar{e}\compiles\synvar{e'} 
and \typempty\typvdash\synvar{e}\typcolon\synvar{\tau}, then 
\typempty\typvdash\synvar{e'}\typcolon\synvar{\tau}}

The proof can be carried out by induction on \synvar{e} without any trouble.
The most difficult part is to check the correct typing of \texttt{process\_exceptions}
in the lambda calculus $\blacksquare$

\paragraph{Theorem (translation correctness)} \textit{If \synvar{e}\compiles\synvar{e'} 
and \synvar{e}\exevalstar\synvar{v}, then 
either $\synvar{v}=\synemptydefault$, $\synvar{v}=\synerror$  or 
there exists a lambda calculus value \synvar{v'} such that 
\synvar{v}\compiles\synvar{v'} and \synvar{e'}\exevalstar\synvar{v'}.}

\begin{itemize}
  \item Rules \TirName{C-EmptyError}, \TirName{C-ConflictError}, \TirName{C-Var} and 
  \TirName{C-Literal} yield an immediate conclusion.
  \item For rule \TirName{C-Abs} with
   $\synvar{e}=\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e_1}$,
   we apply the induction hypothesis on \synvar{e_1} and conclude since functions 
   are values ($\synvar{v'}=\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e_1'}$).
  \item For rule \TirName{C-App} with $\synvar{e} = \synvar{e_1}\;\synvar{e_2}$, we 
  apply the induction hypothesis on \synvar{e_1} and \synvar{e_2}. If either 
  \synvar{e_1} or \synvar{e_2} evaluate to \synemptydefault, we can apply 
  \TirName{D-ContextEmptyError} and conclude. Similarly, if either 
  \synvar{e_1} or \synvar{e_2} evaluate to \synerror, we can apply 
  \TirName{D-ContextConflictError} and conclude. Let us now suppose that we are not 
  in one of those cases, and that \synvar{e_1}\exevalstar\synvar{v_1} and 
  \synvar{e_2}\exevalstar\synvar{v_2}. \synvar{v_1} and \synvar{v_2} are not 
  error terms, so we can apply \TirName{C-Literal} or \TirName{C-Abs} to 
  get \synvar{v_1'} and \synvar{v_2'} such that \synvar{v_1}\compiles\synvar{v_1'}
  and \synvar{v_2}\compiles\synvar{v_2'}. By the induction hypothesis applied on 
  \synvar{e_1} and \synvar{e_2}, we know that \synvar{e_1'}\exevalstar\synvar{v_1'}
  and \synvar{e_2'}\exevalstar\synvar{v_2'}. 

  To recap, we have $\synvar{e_1}\;\synvar{e_2}\exevalstar\synvar{v_1}\;\synvar{v_2}$,
  $\synvar{e_1'}\;\synvar{e_2'}\exevalstar\synvar{v_1'}\;\synvar{v_2'}$ and 
  by \TirName{C-App}, $\synvar{v_1}\;\synvar{v_2}\compiles\synvar{v_1'}\;\synvar{v_2'}$.
  We also know that $\synvar{v_1}\;\synvar{v_2}\exevalstar\synvar{v}$ beginning 
  by a $\beta$-reduction. We can then apply the induction hypothesis a third time 
  on $\synvar{v_1}\;\synvar{v_2}$ to get the \synvar{v'} on which we conclude.

  \item Let us consider now \TirName{C-Default} with $e=\synlangle\synvar{e_1}\synellipsis
  \synvar{e_n}\synmid\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synrangle$.
   If any of the sub-terms 
  evaluates to \synerror{}, then $\synvar{e}$ evaluates to $\synerror$ 
  (\TirName{D-ContextConflictError}) and we conclude. Now, we case analyse on the 
  number of exceptions that don't evaluate to \synemptydefault{}.
  \begin{itemize}
    \item If more than one exception does not evaluate to \synemptydefault{}, 
    then \synvar{e} evaluates to \synerror{} by \TirName{D-DefaultExceptionsConflict} 
    and we conclude.
    \item If one and only one exception $\synvar{e_i}$ does not evaluates to \synemptydefault{}, 
    then we apply the induction hypothesis and get a couple $(\synvar{v_i},\synvar{v_i'})$ such 
    that $\synvar{e_i}\exevalstar\synvar{v_i}$, $\synvar{e_i'}\exevalstar\synvar{v_i'}$
    and $\synvar{v_i}\compiles\synvar{v_i}$. Then, we claim that the 
    \synvar{r_\mathrm{exceptions}} result of \texttt{process\_exceptions} will 
    evaluate to \synsome\synvar{v_i'} in the lambda calculus translation of \synvar{e}.
    Indeed, along the $\texttt{fold\_left}$ the accumulator \synvar{a} will remain 
    \synnone{} as it encounters all the \synvar{e_j'} that raise the \synemptyerror{}
    caught inside the fold function. The accumulator will then encounter \synvar{e_i'},
    that yields \synvar{v_i'} and pick up its value. By following the rest of the 
    translated code, \synvar{e'} will evaluate to \synvar{v_i'} and we can 
    conclude. 
    \item If all exceptions evaluate to \synemptydefault{}, then we claim that 
    the \synvar{r_\mathrm{exceptions}} result will 
    evaluate to \synnone{}. Indeed, the accumulator will stay at its original 
    \synnone{} value during the whole fold process of the exceptions.
    We now look at the evaluation of \synvar{e_\mathrm{just}}. If it evaluates 
    to \synemptydefault{} or \synerror{}, then the whole expression evaluates
    to the error term (\TirName{D-ContextConflictError} and \TirName{D-ContextEmptyError}) 
    and we conclude. By type safety of the default calculus, \synvar{e_\mathrm{just}} 
    evaluates to either $\synvar{v_\mathrm{just}} =\syntrue{}$ or \synfalse{}. By type preservation of the 
    translation \synvar{e_\mathrm{just}'} evaluates to 
    either $\synvar{v_\mathrm{just'}} =\syntrue{}$ or \synfalse{}. By application of the induction hypothesis 
    on \synvar{e_\mathrm{just}'}, we know that 
    \synvar{v_\mathrm{just}}\compiles\synvar{v_\mathrm{just}'} and by 
    inversion on the \compiles{} judgment, we conclude $\synvar{v_\mathrm{just}}=\synvar{v_\mathrm{just}'}$.
    From there, if $\synvar{v_\mathrm{just'}} =\syntrue{}$, then we conclude 
    by applying the induction hypothesis on \synvar{e_\mathrm{cons}}. If 
    $\synvar{v_\mathrm{just'}} =\synfalse{}$, then the default evaluates to 
    \synemptydefault{} and we conclude $\blacksquare$
  \end{itemize}
\end{itemize}


% \subsection{An alternative compilation scheme}

% While perfectly correct, this maximalist translation scheme would entail 
% a lot of redundant branching in the generated code, which would hinder the 
% resulting program's performance. Because Catala aims at providing 
% production-ready high-performance code that can scale to computations 
% concerning millions of household, we need to be more clever to reduce the 
% number of generated branches. 

% Let's look back at the code of Fig.~\ref{fig:simpledefaultprogram}. One way 
% we can avoid having to propagate errors all the time in the program is to 
% contain empty error propagation inside each scope variable. Concretely,
% that means enforcing a crashing error each time a scope variable evaluates 
% to an empty error term. This gives us a new default calculus program, 
% Fig~\ref{fig:noerrordefaultprogram}, which differs 
% from Fig.~\ref{fig:simpledefaultprogram} by the addition of \syncrashifempty{}
% calls to wrap up each scope variable definition.


% \begin{figure}
%   \begin{Verbatim}[frame=lines,label=Simple default program without error propagation, numbers=left, framesep=10pt, samepage=true]
%   let X (a: unit -> int) (b: unit -> int) : (int * int) =
%     let a : int = crash_if_empty < a () | < true :- 0 >> in
%     let b : int = crash_if_empty < b () | < true :- a + 1 >> in 
%     (a, b)
  
%   let Y (c: unit -> bool) : bool = 
%     let X_1[a] : unit -> int = fun () -> < true :- 42 > in
%     let X_1[b] : unit -> int = fun () -> EmptyError in 
%     let (X_1[a], X_1[b]) : int * int = X(X_1[a], X_1[b]) in 
%     let c : bool = crash_if_empty 
%       < c () | < X_1[b] != 43 :- false | X_1[b] == 43 :- true >>
%     in 
%     c 
%   \end{Verbatim}
%   \caption{Alternative to Fig.~\ref{fig:simpledefaultprogram} with error containment
%   \label{fig:noerrordefaultprogram}}
%   \end{figure}

% We can model \syncrashifempty{} as a special operator of the default 
% calculus governed by the following rules:

% \begin{mathpar}
% \inferrule[T-CrashIfEmpty]{
%   \typctx{\Gamma}\typvdash\synvar{e}\typcolon\synvar{\tau}
% }{
%   \typctx{\Gamma}\typvdash\syncrashifempty\;\synvar{e}\typcolon\synvar{\tau}
% }

% \inferrule[D-CrashIfEmptyError]{}{
%   \syncrashifempty\;\synemptydefault\exeval\synerror
% }

% \inferrule[D-CrashIfEmptyOK]{
%   \synvar{e}\neq \synemptydefault
% }{
%   \syncrashifempty\;\synvar{e}\exeval \synvar{e}
% }

% \inferrule[C-CrashIfEmpty]{
%   \synvar{e}\compiles\synvar{e'}
% }{
%   \syncrashifempty\;\synvar{e}\compiles\synmatch\synvar{e'}\synwith
%   \synnone\;\synarrow\synerror\synmid\synsome\synvar{e'}\synarrow 
%   \synvar{e'}
% }
% \end{mathpar}

% The addition of \syncrashifempty{} to the default calculus allow us to prevent 
% \synemptydefault{} to leak beyond this special operator call. Hence, we can 
% apply our \synvar{\tau} to \synvar{\tau} \texttt{option} transformation scheme 
% locally rather than globally on all terms of the program. More specifically, 
% we can assume that after it has been defined, a scope variable has type 
% \synvar{\tau} rather than \synvar{\tau} \texttt{option} in our translated program.

% \begin{figure}[htb]
%   \begin{Verbatim}[frame=lines,label=Simple lambda calculus program, numbers=left, framesep=10pt, samepage=true]
%   let crash_if_empty x = match x with Some x -> x | None -> raise Error 
  
%   let X (a: unit -> int option) (b: unit -> int option) : (int * int) =
%     let a : int = crash_if_empty (match a () with 
%       | Some x -> Some x
%       | None -> if true then Some 0 else None))
%     in
%     let b : int = crash_if_empty (match b () with 
%       | Some x -> Some x 
%       | None -> if true then Some (a + 1) else None)
%     in 
%     (a, b)
  
%   let Y (c: unit -> bool option) : bool = 
%     let X_1[a] : unit -> int = fun () -> if true then Some 42 else None in
%     let X_1[b] : unit -> int = fun () -> None in 
%     let (X_1[a], X_1[b]) : int * int = X(X_1[a], X_1[b]) in 
%     let c : bool = crash_if_empty (match c () with  
%       | Some x -> Some x 
%       | None -> (match (if X_1[b] != 43 then Some false else None) with 
%         | Some x -> Some x 
%         | None -> if X_1[b] == 43 then true else None))
%     in 
%     c 
%   \end{Verbatim}
%   \caption{Translation of Fig.~\ref{fig:noerrordefaultprogram} to lambda calculus
%   \label{fig:lambdaprogram}}
%   \end{figure}

% Fig.~\ref{fig:lambdaprogram} shows the result of the compilation of the code in
% Fig.~\ref{fig:noerrordefaultprogram} according to our locally-restricted 
% compilation scheme. The \texttt{process\_exceptions} functions has been 
% partially evaluated and specialized to the example for readability.

% This compilation mode helps minimizing the number of branching required 
% at execution time, but comes a the price of being very specialized to the 
% exact shape of the programs that we wish to compile. Indeed, we had to 
% know which function parameter types to change from \synvar{\tau} to 
% \synvar{\tau} \texttt{option}: here, only the thunked arguments of the 
% scopes. We claim that the correctness of the local application of our compilation 
% scheme can be validated by a mere typechecking pass over the resulting 
% lambda-calculus program.

% Last, we want to discuss further the insertion of \syncrashifempty{} calls. 
% Inserting these calls effectively changes the semantics of the default calculus 
% program. By preventing the \synemptydefault{} error to propagate and be later 
% caught by an exception of a default, we change the behavior of the program.
% Hence, it is the program of Fig.~\ref{fig:noerrordefaultprogram} that 
% should be taken as a reference, and not the program of 
% Fig.~\ref{fig:simpledefaultprogram}.

% We chose to insert the \syncrashifempty{} calls before each scope variable 
% definition because it corresponds to the following high-level behavior:
% each scope variable should be defined at execution time by one rule coming 
% from the source Catala program. Indeed, if no rules from the source 
% Catala program were to apply for a particular scope variable, then this 
% scope variable would evaluate to \synemptydefault{} in the default calculus.

% Contrary to tax rules DSL like \cite{merigoux:hal-02936606} that have a special 
% \texttt{undefined} value (similar to \synemptydefault{} or the null pointer), 
% we wanted Catala not to reproduce the billion-dollar mistake and force the 
% programmer to rely on user-defined option types to deal with missing data 
% situations. Alternatively, the Catala programmer can also define an additional 
% base case rule in the source program defining a user-chosen default value 
% for a particular scope-variable. 

\printbibliography


\end{document}