catala/doc/formalization/formalization.tex
2020-11-20 10:12:09 +01:00

822 lines
40 KiB
TeX

\documentclass[11pt,a4paper]{article}
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage[english]{babel}
\usepackage{fullpage}
\usepackage{lmodern}
\usepackage{amsmath,amssymb}
\usepackage{mathpartir}
\usepackage{turnstile}
\usepackage{fancyvrb}
\usepackage{xcolor}
\usepackage{booktabs}
\usepackage{csquotes}
\usepackage{biblatex}
\addbibresource{catala.bib}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newcommand{\sref}[1]{\S\ref{sec:#1}}
%% Syntax
\newcommand{\synvar}[1]{\ensuremath{#1}}
\newcommand{\synkeyword}[1]{\textcolor{red!60!black}{\texttt{#1}}}
\newcommand{\synpunct}[1]{\textcolor{black!40!white}{\texttt{#1}}}
\newcommand{\synname}[1]{\ensuremath{\mathsf{#1}}}
\newcommand{\synbool}{\synkeyword{bool}}
\newcommand{\synnum}{\synkeyword{num}}
\newcommand{\syndate}{\synkeyword{date}}
\newcommand{\synvec}{\synkeyword{vec~}}
\newcommand{\synopt}{\synkeyword{opt~}}
\newcommand{\synrule}{\synkeyword{rule~}}
\newcommand{\synlet}{\synkeyword{let~}}
\newcommand{\synin}{\synkeyword{~in~}}
\newcommand{\synif}{\synkeyword{if~}}
\newcommand{\synthen}{\synkeyword{~then~}}
\newcommand{\synelse}{\synkeyword{~else~}}
\newcommand{\syncall}{\synkeyword{call~}}
\newcommand{\synscope}{\synkeyword{scope~}}
\newcommand{\synequal}{\synpunct{~=~}}
\newcommand{\synjust}{~\synpunct{:\raisebox{-0.9pt}{-}}~}
\newcommand{\syntyped}{~\synpunct{:}~}
\newcommand{\syncomma}{\synpunct{,}}
\newcommand{\syndot}{\synpunct{.}~}
\newcommand{\synunit}{\synpunct{()}}
\newcommand{\synunitt}{\synkeyword{unit}}
\newcommand{\syntrue}{\synkeyword{true}}
\newcommand{\synfalse}{\synkeyword{false}}
\newcommand{\synop}{\synpunct{\odot}}
\newcommand{\synlambda}{\synpunct{$\lambda$}~}
\newcommand{\synand}{\synpunct{\wedge}}
\newcommand{\synor}{\synpunct{\vee}}
\newcommand{\synlparen}{\synpunct{(}}
\newcommand{\synrparen}{\synpunct{)}}
\newcommand{\synlsquare}{\synpunct{[}}
\newcommand{\synrsquare}{\synpunct{]}}
\newcommand{\synlbracket}{\synpunct{\{}}
\newcommand{\synrbracket}{\synpunct{\}}}
\newcommand{\synlangle}{\synpunct{$\langle$}}
\newcommand{\synrangle}{\synpunct{$\rangle$}}
\newcommand{\synmid}{\synpunct{~$|$~}}
\newcommand{\synemptydefault}{\synvar{\varnothing}}
\newcommand{\synerror}{\synvar{\circledast}}
\newcommand{\synmerge}{\synpunct{~$+\!+$~}}
\newcommand{\synstar}{\synpunct{~$*$~}}
\newcommand{\synvardef}{\synkeyword{definition~}}
\newcommand{\synscopecall}{\synkeyword{scope\_call~}}
\newcommand{\synlarrow}{~\synpunct{$\leftarrow$}~}
\newcommand{\synarrow}{~\synpunct{$\rightarrow$}~}
\newcommand{\synellipsis}{\synpunct{,$\ldots$,}}
\newcommand{\syndef}{$ ::= $}
\newcommand{\synalt}{\;$|$\;}
\newcommand{\synhole}{\synvar{\cdot}}
%% Typing
\newcommand{\typctx}[1]{\textcolor{orange!90!black}{\ensuremath{#1}}}
\newcommand{\typempty}{\typctx{\varnothing}}
\newcommand{\typcomma}{\typctx{,\;}}
\newcommand{\typvdash}{\typctx{\;\vdash\;}}
\newcommand{\typcolon}{\typctx{\;:\;}}
\newcommand{\typlpar}{\typctx{(}}
\newcommand{\typrpar}{\typctx{)}}
%% Evaluation
\newcommand{\exctx}[1]{\textcolor{blue!80!black}{\ensuremath{#1}}}
\newcommand{\exeemptysubdefaults}{\exctx{\mathsf{empty\_count}}}
\newcommand{\execonflictsubdefaults}{\exctx{\mathsf{conflict\_count}}}
\newcommand{\Omegaarg}{\Omega_{arg}}
\newcommand{\excaller}{\exctx{\complement}}
\newcommand{\excomma}{\exctx{,}\;}
\newcommand{\exvdash}{\;\exctx{\vdash}\;}
\newcommand{\exempty}{\exctx{\varnothing}}
\newcommand{\exemptyv}{\exctx{\varnothing_v}}
\newcommand{\exemptyarg}{\exctx{\varnothing_{arg}}}
\newcommand{\exvarmap}{\exctx{~\mapsto~}}
\newcommand{\exscopemap}{\exctx{~\rightarrowtail~}}
\newcommand{\exArrow}{\exctx{~\Rrightarrow~}}
\newcommand{\exeq}{\exctx{\;=\;}}
\newcommand{\exeval}{\exctx{\;\longrightarrow\;}}
\newcommand{\exevalstar}{\exctx{\;\longrightarrow^*\;}}
\newcommand{\exat}{\exctx{\texttt{\;@\;}}}
\newcommand{\exsemicolon}{\exctx{;~}}
\newcommand{\excomp}{\dashrightarrow}
%% Reduction of the scope language
\newcommand{\redctx}[1]{\textcolor{green!50!black}{\ensuremath{#1}}}
\newcommand{\reduces}{\redctx{~\rightsquigarrow~}}
\newcommand{\redvdash}{\redctx{\;\vdash\;}}
\newcommand{\redturnstile}[1]{\;\ensuremath{\redctx{\vdash}_{#1}}\;\;}
\newcommand{\redcomma}{\redctx{,\;}}
\newcommand{\redsc}{\redctx{;\;}}
\newcommand{\redcolon}{\redctx{\;:\;}}
\newcommand{\redempty}{\redctx{\varnothing}}
\newcommand{\redproduce}{\;\redctx{\Rrightarrow}\;}
\newcommand{\redellipsis}{\redctx{,\ldots,~}}
\newcommand{\redlparen}{\redctx{(}}
\newcommand{\redrparen}{\redctx{)}}
\newcommand{\redequal}{\redctx{~=~}}
\newcommand{\redinit}{\redctx{\mathsf{init\_subvars}}}
\title{Formalization of the Catala language}
\date{November 2020}
\author{Denis Merigoux, Nicolas Chataing}
\begin{document}
\maketitle
\tableofcontents
\section{Introduction}
Tax law defines how taxes should be computed, depending on various characteristic
of a fiscal household. Government agencies around the world use computer
programs to compute the law, which are derived from the local tax law. Translating
tax law into an unambiguous computer program is tricky because the law is subject to
interpretations and ambiguities. The goal of the Catala domain-specific language
is to provide a way to clearly express the interpretation chosen for the
computer program, and display it close to the law it is supposed to model.
To complete this goal, our language needs some kind of \emph{locality} property
that enables cutting the computer program in bits that match the way the
legislative text is structured. This subject has been extensively studied by
Lawsky \cite{lawsky2017, lawsky2018, lawsky2020form}, whose work has greatly
inspired our approach.
The structure exhibited by Lawsky follows a kind of non-monotonic logic called
default logic \cite{Reiter1987}. Indeed, unlike traditional programming, when the law defines
a value for a variable, it does so in a \emph{base case} that applies only if
no \emph{exceptions} apply. To determine the value of a variable, one needs to
first consider all the exceptions that could modify the base case.
It is this precise behavior which we intend to capture when defining the semantics
of Catala.
\section{Default calculus}
We choose to present the core of Catala as a lambda-calculus augmented by a special
\enquote{default} expression. This special expression enables dealing with
the logical structure underlying tax law. Our lambda-calculus has only unit and
boolean values, but this base could be enriched with more complex values and traditional
lambda-calculus extensions (such as algebraic data types or $\Lambda$-polymorphism).
\subsection{Syntax}
\label{sec:defaultcalc:syntax}
\begin{center}
\begin{tabular}{lrrll}
Type&\synvar{\tau}&\syndef&\synbool\synalt\synunitt&boolean and unit types\\
&&\synalt&\synvar{\tau}\synarrow\synvar{\tau}&function type \\
&&&&\\
Expression&\synvar{e}&\syndef&\synvar{x}\synalt\syntrue\synalt\synfalse\synalt\synunit&variable, literal\\
&&\synalt&\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e}\synalt\synvar{e}\;\synvar{e}&$\lambda$-calculus\\
&&\synalt&\synvar{d}&default term\\
&&&&\\
Default&\synvar{d}&\syndef&\synlangle\synvar{e}\synjust\synvar{e}\synmid $[\synvar{e}^*]$\synrangle&default term\\
&&\synalt&\synerror&conflict error term\\
&&\synalt&\synemptydefault&empty error term\\
\end{tabular}
\end{center}
Compared to the regular lambda calculus, we add a construction coming from
default logic. Particularly, we focus on a subset of default logic called
categorical, prioritized default logic \cite{Brewka2000}. In this setting, a default is a logical
rule of the form $A \synjust B$ where $A$ is the justification of the rule and
$B$ is the consequence. The rule can only be applied if $A$ is consistent with
the current knowledge $W$: from $A\wedge W$, one should not derive $\bot$.
If multiple rules $A \synjust B_1$ and $A \synjust B_2$
can be applied at the same time, then only one of them is applied through
an explicit ordering of the rules.
To incorporate this form of logic inside our programming language, we set $A$ to
be an expression that can be evaluated to \syntrue{} or \synfalse{}, and $B$
the expression that the default should reduce to if $A$ is true. If $A$ is false,
then we look up for other rules of lesser priority to apply. This priority
is encoded trough a syntactic tree data structure\footnote{Thanks to Pierre-Évariste Dagand for this insight.}.
A node of the tree contains a default to consider first, and then a list of lower-priority
defaults that don't have a particular ordering between them. This structure is
sufficient to model the base case/exceptions structure or the law, and in particular
the fact that exceptions are not always prioritized in the legislative text.
In the term \synlangle\synvar{e_{\text{just}}}\synjust
\synvar{e_{\text{cons}}}\synmid \synvar{e_1}\synellipsis\synvar{e_n}\synrangle, \synvar{e_{\text{just}}}
is the justification $A$, \synvar{e_{\text{cons}}} is the consequence $B$ and
\synvar{e_1}\synellipsis\synvar{e_n} is the list of rules to be considered if \synvar{e_{\text{just}}}
evaluates to \synfalse{}.
Of course, this evaluation scheme can fail if no more
rules can be applied, or if two or more rules of the same priority have their
justification evaluate to \syntrue{}. The error terms \synerror{} and \synemptydefault{}
encode these failure cases. Note that if a Catala program correctly derived from a legislative
source evaluates to \synerror{} or \synemptydefault{}, this could mean a flaw in the
law itself. \synemptydefault{} means that the law did not specify what happens
in a given situation, while \synerror{} means that two or more rules specified in
the law conflict with each other on a given situation.
\subsection{Typing}
\label{sec:defaultcalc:typing}
Our typing strategy is an extension of the simply-typed lambda calculus.
The typing judgment \fbox{$\typctx{\Gamma}\typvdash\synvar{e}\typcolon\synvar{\tau}$} reads as
\enquote{under context $\typctx{\Gamma}$, expression $\synvar{e}$ has type $\synvar{\tau}$}.
\begin{center}
\begin{tabular}{lrrll}
Typing context&\typctx{\Gamma}&\syndef&\typempty&empty context\\
(unordered map)&&\synalt&\typctx{\Gamma}\typcomma\synvar{x}\typcolon\synvar{\tau}&typed variable\\
\end{tabular}
\end{center}
We start by the usual rules of simply-typed lambda calculus.
\begin{mathpar}
\inferrule[T-UnitLit]{}{
\typctx{\Gamma}\typvdash\synunit\syntyped\synunitt
}
\inferrule[T-TrueLit]{}{
\typctx{\Gamma}\typvdash\syntrue\syntyped\synbool
}
\inferrule[T-FalseLit]{}{
\typctx{\Gamma}\typvdash\synfalse\syntyped\synbool
}
\inferrule[T-Var]{}{
\typctx{\Gamma}\typcomma\synvar{x}\typcolon\synvar{\tau}\typvdash\synvar{x}\syntyped\synvar{\tau}
}
\inferrule[T-Abs]
{\typctx{\Gamma}\typcomma\synvar{x}\typcolon\synvar{\tau}\typvdash\synvar{e}\typcolon\synvar{\tau'}}
{\typctx{\Gamma}\typvdash\synlambda\synlparen\synvar{x}\syntyped{\tau}\synrparen\syndot\synvar{e}\typcolon\synvar{\tau}\synarrow\synvar{\tau'}}
\inferrule[T-App]
{
\typctx{\Gamma}\typvdash\synvar{e_1}\typcolon\synvar{\tau_2}\synarrow\synvar{\tau_1}\\
\typctx{\Gamma}\typvdash\synvar{e_2}\typcolon\synvar{\tau_2}
}
{\typctx{\Gamma}\typvdash\synvar{e_1}\;\synvar{e_2}\typcolon\synvar{\tau_1}}
\end{mathpar}
Then we move to the special default terms. First, the error terms that stand for
any type.
\begin{mathpar}
\inferrule[ConflictError]{}{\typctx{\Gamma}\typvdash\synerror\typcolon\synvar{\tau}}
\inferrule[EmptyError]{}{\typctx{\Gamma}\typvdash\synemptydefault\typcolon\synvar{\tau}}
\end{mathpar}
Now the interesting part for the default terms. As mentioned earlier, the
justification \synvar{e_{\text{just}}} is a boolean, while \synvar{e_{\text{cons}}}
can evaluate to any value. \TirName{DefaultBase} specifies how the tree structure
of the default should be typed.
\begin{mathpar}
\inferrule[T-Default]
{
\typctx{\Gamma}\typvdash\synvar{e_{\text{just}}}\typcolon\synbool\\
\typctx{\Gamma}\typvdash\synvar{e_{\text{cons}}}\typcolon\synvar{\tau}\\
\typctx{\Gamma}\typvdash\synvar{e_1}\typcolon{\tau}\\
\cdots\\
\typctx{\Gamma}\typvdash\synvar{e_n}\typcolon{\tau}
}
{\typctx{\Gamma}\typvdash\synlangle\synvar{e_{\text{just}}}\synjust\synvar{e_{\text{cons}}}\synmid
\synvar{e_1}\synellipsis\synvar{e_n}\synrangle\typcolon\synvar{\tau}}
\end{mathpar}
The situation becomes more complex in the presence of functions. Indeed, want
our default expressions to depend on parameters. By only allowing \synvar{e_{\text{just}}}
to be \synbool{}, we force the user to declare the parameters in a \synlambda
that wraps the default from the outside. Using this scheme, all the expressions
inside the tree structure of the default will depend on the same bound variable
\synvar{x}.
\subsection{Evaluation}
We give this default calculus small-step, structured operational semantics. The
one-step reduction judgment is of the form \fbox{\synvar{e}\exeval\synvar{e'}}.
In our simple language, values are just booleans, functions or error terms.
We use a evaluation contexts to efficiently describe the evaluation order.
Evaluation contexts are expression with a hole indicating the sub-term
currently being reduced.
\begin{center}
\begin{tabular}{lrrll}
Values&\synvar{v}&\syndef&\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot\synvar{e}&functions\\
&&\synalt&\syntrue\synalt\synfalse & booleans\\
&&\synalt&\synerror\synalt\synemptydefault&errors\\
Evaluation &\synvar{C_\lambda}&\syndef&\synhole\;\synvar{e}\synalt\synvar{v}\;\synhole&function application\\
contexts&&\synalt&\synlangle\synhole\synjust\synvar{e}\synmid $[\synvar{e}^*]$\synrangle&default justification evaluation\\
&\synvar{C}&\syndef&\synvar{C_\lambda}&regular contexts\\
&&\synalt&\synlangle\syntrue\synjust\synhole\synmid $[\synvar{e}^*]$\synrangle&default consequence evaluation\\
&&\synalt&\synlangle\synfalse\synjust\synvar{e}\synmid $[\synvar{v}^*]$%
\syncomma\synhole\syncomma$[\synvar{e}^*]$\synrangle&sub-default evaluation
\end{tabular}
\end{center}
We choose a call-by-value reduction strategy.
First, we present the usual reduction rules for beta-reduction
and evaluation inside a context hole. Note that \TirName{D-Context} does not
deal with error terms, which will have a special treatment for error propagation
later.
\begin{mathpar}
\inferrule[D-Context]
{\synvar{e}\exeval\synvar{e'}\\ e'\notin\{\synerror,\synemptydefault\}}
{\synvar{C}[\synvar{e}]\exeval\synvar{C}[\synvar{e'}]}
\inferrule[D-$\beta$]{}{
(\synlambda\synlparen\synvar{x}\syntyped\synvar{\tau}\synrparen\syndot{e})\;\synvar{v}
\exeval\synvar{e}[\synvar{x}\mapsto\synvar{v}]
}
\end{mathpar}
Now we have to describe how the default terms reduce. Thanks to a the
\TirName{D-Context} rule, we can suppose that the justification of the default
is already reduced to a variable \synvar{v}. By applying the beta-reduction
rule \TirName{D-$\beta$}, we can further reduce to the case where \synvar{v} is a boolean.
This is where we encode our default logic evaluation semantics. If \synvar{v} is
\syntrue{}, then this rule applies and we reduce to the consequence. We don't
even have to consider rules of lower priority lower in the tree. This behavior
is similar to short-circuit reduction rules of boolean operators, that enable
a significant performance gain at execution.
\begin{mathpar}
\inferrule[D-DefaultTrueNoError]
{v\neq\synemptydefault}
{\synlangle \syntrue\synjust \synvar{v}\synmid \synvar{e_1}\synellipsis\synvar{e_n}\synrangle\exeval v}
\end{mathpar}
However, if the consequence of the first default evaluates to \synemptydefault,
then we fall back on the rules of lower priority, as if the justification had
evaluated to \synfalse. This behavior is useful when the consequence of the
first default is itself a \enquote{high-priority} default tree. In this case,
\synvar{e_1}\synellipsis\synvar{e_n} acts a the \enquote{low-priority} default
tree. The chaining
behavior from the high-priority tree to the low-priority tree
defined by \TirName{D-DefaultTrueError}, will be very useful in
\sref{scope:formalization}.
\begin{mathpar}
\inferrule[D-DefaultTrueError]
{}
{
\synlangle \syntrue\synjust \synemptydefault\synmid \synvar{e_1}\synellipsis\synvar{e_n}\synrangle\exeval
\synlangle \synfalse\synjust \synemptydefault\synmid \synvar{e_1}\synellipsis\synvar{e_n} \synrangle
}
\end{mathpar}
If the consequence of the default is \synfalse{}, then we have to consider rules of lower priority
\synvar{e_1}\synellipsis\synvar{e_n} that should be all evaluated (left to right),
according to the sub-default evaluation context. Then, we consider all the
values yielded by the sub-default evaluation and define two functions over these
values. Let $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n})$ returns
the number of empty error terms \synemptydefault{} among the values. We then case analyze on this count:
\begin{itemize}
\item if $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) =n$, then
none of the sub-defaults apply and we return \synemptydefault;
\item if $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) =n - 1$,
then only only one of the sub-default apply and we return its corresponding value;
\item if $\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) < n - 1$,
then two or more sub-default apply and we raise a conflict error \synerror.
\end{itemize}
\begin{mathpar}
\inferrule[D-DefaultFalseNoSub]
{}
{\synlangle \synfalse\synjust \synvar{e}\synmid \synemptydefault{}\synellipsis\synemptydefault{}\synrangle\exeval \synemptydefault{}}
\inferrule[D-DefaultFalseOneSub]
{}
{\synlangle \synfalse\synjust \synvar{e}\synmid
\synemptydefault\synellipsis\synemptydefault\syncomma\synvar{v}\syncomma\synemptydefault\synellipsis\synemptydefault\synrangle\exeval \synvar{v}}
\inferrule[D-DefaultFalseSubConflict]
{\exeemptysubdefaults(\synvar{v_1}\synellipsis\synvar{v_n}) <n - 1}
{\synlangle \synfalse\synjust \synvar{e}\synmid \synvar{v_1}\synellipsis\synvar{v_n}\synrangle\exeval \synerror{}}
\end{mathpar}
Last, we need to define how our error terms propagate. Because the rules for
sub-default evaluation have to count the number of error terms in the list
of sub-defaults, we cannot always immediately propagate the error term \synemptydefault{} in
all the evaluation contexts as it usually done. Rather, we rely on the
distinction between the $\lambda$-calculus evaluation contexts $\synvar{C_\lambda}$
and the sub-default evaluation context. Hence the following rules for error
propagation:
\begin{mathpar}
\inferrule[D-ContextEmptyError]
{\synvar{e}\exeval\synemptydefault}
{\synvar{C_\lambda}[\synvar{e}]\exeval\synemptydefault}
\inferrule[D-ContextConflictError]
{\synvar{e}\exeval\synerror}
{\synvar{C}[\synvar{e}]\exeval\synerror}
\end{mathpar}
\section{Scope language}
Our core default calculus provides a value language adapted to the drafting style
of tax law. Each article of the law will provide one or more rules encoded as
defaults. But how to collect those defaults into a single expression that
will compute the result that we want? How to reuse existing rules in different
contexts?
These question point out the lack of an abstraction structure adapted to
the legislative drafting style. Indeed, our \synlambda functions are not
convenient to compose together the rules scattered around the legislative text.
Moreover, the abstractions defined in the legislative text exhibit a behavior
quite different from \synlambda functions.
First, the blurred limits between abstraction units.
In the legislative text, objects and data are referred in a free variable style.
It is up to us to put the necessary bindings for these free variables, but
it is not trivial to do so. For that, one need to define the perimeter of
each abstraction unit, a legislative \emph{scope}, which might encompass multiple
articles.
Second, the confusion between local variables and function parameters. The
base-case vs. exception structure of the law also extends between legislative
scopes. For instance, a scope $A$ can define a variable $x$ to have value $a$, but
another legislative scope $B$ can \emph{call into} $A$ but specifying that
$x$ should be $b$. In this setting, $B$ defines an exception for $x$, that
should be dealt with using our default calculus.
Based on these two characteristic, we propose a high-level \emph{scope language},
semantically defined by its encoding in the default calculus.
\subsection{Syntax}
A scope $S$ is a legislative abstraction unit that can encompass multiple
articles. $S$ is comprised of multiple rules that define a scope variable $a$
to a certain expression under a condition that characterize the base case or
the exception.
$S$ can also call into another scope $S'$, as a function can call
into another. These calls are scattered in the legislative texts and have
to be identified by the programmer. Since $S$ can call $S'$ multiple times
with different \enquote{parameters}, we have to distinguish between these
sub-call and give them different names $\synvar{S'}_1$,
$\synvar{S'}_2$, etc. A program $P$ is
a list of scope declarations $\sigma$.
\begin{center}
\begin{tabular}{lrrll}
Scope name&\synvar{S}&&&\\
Scope call identifier&\synvar{n}&&&\\
Location&\synvar{\ell}&\syndef&\synvar{a}&scope variable\\
&&\synalt&$\synvar{S}_\synvar{n}$\synlsquare\synvar{a}\synrsquare&sub-scope call variable\\
Expression&\synvar{e}&\syndef&\synvar{\ell}&location\\
&&\synalt&$\cdots$&default calculus expressions\\
&&&&\\
Rule&\synvar{r}&\syndef&\synrule\synvar{\ell}\syntyped\synvar{\tau}\synequal\synlangle\synvar{e}\synjust
\synvar{e}\synmid$[\synvar{e}^*]$\synrangle
&Location definition\\
&&\synalt&\syncall$\synvar{S}_\synvar{n}$&sub-scope call\\
Scope declaration&\synvar{\sigma}&\syndef&\synscope\synvar{S}\syntyped $[\synvar{r}^*]$&\\
Program&\synvar{P}&\syndef&$[\sigma^*]$&\\
\end{tabular}
\end{center}
\subsection{Running example}
\label{sec:scope:example}
Let's illustrate how the scope language plays out with a simple program
that calls a sub-scope:
\begin{Verbatim}[frame=lines,label=Simple scope program, numbers=left, framesep=10pt, samepage=true]
scope X:
rule a = < true :- 0 | >
rule b = < true :- a + 1 | >
scope Y:
rule X_1[a] = < true :- 42 | >
call X_1
rule c = < X_1[b] == 1 :- false | >, < X_1[b] == 43 :- true | >
\end{Verbatim}
Considered alone, the execution \Verb+X+ is simple: \Verb+a+ and \Verb+b+ are defined by
a single default whose justification is \Verb+true+. Hence, \Verb+a+ should evaluate
to \Verb+0+ and \Verb+b+ should evaluate to \Verb+1+.
Now, consider scope \Verb+Y+. It defines a single variable \Verb+c+ with two defaults
lines 8 and 9, but the justifications for these two defaults use the result of
the evaluation (line 7) of variable \Verb+b+ of the sub-scope \Verb+X_1+.
Line 6 shows an example of providing an \enquote{argument} to the subscope call.
The execution goes like this: at line 7 when calling the sub-scope,
\Verb+X_1[a]+ has two defaults, one coming from line 2, the other calling
from line 6. Because the caller has priority over the callee, the default from line
6 wins and \Verb+X_1[a]+ evaluates to \Verb+42+. Consequently,
\Verb+X_1[b]+ evaluates to \Verb+43+.
This triggers the second default in the list of line 9 which evaluates \Verb+c+ to \Verb+true+.
The goal is to provide an encoding of the scope language
into the lambda calculus that is compatible with this intuitive description
of how scopes should evaluate. To get a high-level
picture of the translation, we first show what the previous simple program will translate
to, using ML-like syntax for the target default calculus:
\begin{Verbatim}[frame=lines,label=Simple default program, numbers=left, framesep=10pt, samepage=true]
let X (a: unit -> int) (b: unit -> int) : (int * int) =
let a : unit -> int = a ++ (fun () -> < true :- 0 | >) in
let a : int = a () in
let b : unit -> int = b ++ (fun () -> < true :- a + 1 | >) in
let b : int = b () in
(a, b)
let Y (c: unit -> bool) : bool =
let X_1[a] : unit -> int = fun () -> < true :- 42 | > in
let X_1[b] : unit -> int = fun () -> EmptyError in
let (X_1[a], X_1[b]) : int * int = X(X_1[a], X_1[b]) in
let c : unit -> bool = c ++ (fun () ->
< X_1[b] == 1 :- false>, < X_1[b] == 43 :- true >)
in
let c : bool = c () in
c
\end{Verbatim}
We start unravelling this translation with the scope \Verb+X+. \Verb+X+ has
been turned into a function whose arguments are all the local variables of the
scope. However, the arguments have type \Verb+unit -> <type>+. Indeed, we want the
arguments of \Verb+X+ (line 1 ) to be the default expression supplied by the caller of
\Verb+X+, which will later be merged with a higher priority (operator \Verb|++|) to the default
expression defining the local variables of \Verb+X+ (lines 2 and 4). But since defaults are not
values in our default calculus, we have to thunk them in the arguments of \Verb|X|
so that their evaluation is delayed. After the defaults have been merged, we apply
\Verb+()+ to the thunk (lines 3 and 5) to force evaluation and get back the value.
Finally, \Verb+X+ returns the tuple of all its local variables (line 6).
The translation of \Verb+Y+ exhibits the pattern for sub-scope calls.
Lines 9 translates the assignment of the sub-scope argument \Verb+X_1[a]+.
Before calling \Verb+X_1+ (line 11), the other argument \Verb+X_1[b]+ is
initialized to the neutral \synemptydefault{} that will be ignored at execution
because \Verb+X+ provides more defaults for \Verb+b+.
The sub-scope call is translated to a regular
function call (line 11). The results of the call are then used in the two defaults
for \Verb+c+ (lines 13), which have been turned into a default tree taking into
account that no priority has been declared between the two defaults.
Finally, \Verb+c+ is evaluated (line 15).
\subsection{Formalization of the translation}
\label{sec:scope:formalization}
The main judgment of reduction from scope language to default calculus is
\fbox{$\synvar{P}\redvdash\synvar{\sigma}\reduces\synvar{e}\redproduce\redctx{\Delta_\mathrm{own}}$}, which reduces
a scope declaration to a function in the default calculus, while providing the
list of its own variables.
\begin{center}
\begin{tabular}{lrrll}
Translation context&\redctx{\Delta}&\syndef&\redempty&empty context\\
(unordered map)&&\synalt&\redctx{\Delta_\mathrm{own}}\redcomma\redctx{\Delta_\mathrm{sub}}&own and sub-scopes contexts\\
&\redctx{\Delta_\mathrm{own}}&\syndef&\redempty\synalt\redctx{\Delta_\mathrm{own}}\redcomma\synvar{a}\redcolon\synvar{\tau}&typed scope variable\\
&\redctx{\Delta_\mathrm{sub}}&\syndef&\redempty\synalt\redctx{\Delta_\mathrm{sub}}\redcomma
$\synvar{S}_\synvar{n}$\synlsquare\synvar{a}\synrsquare\redcolon\synvar{\tau}&typed sub-scope variable\\
\end{tabular}
\end{center}
The translation context \redctx{\Delta} is similar to the typing context \typctx{\Gamma} of
the default calculus, but it only takes into account the new scope-related location. At any
point, \redctx{\Delta} will contain the scope locations defined (and usable in expressions) so far.
\redctx{\Delta} is divided in \redctx{\Delta_\mathrm{own}} and \redctx{\Delta_\mathrm{sub}},
which contain respectively the scope's own variables and the variables of its
sub-scopes.
We will describe
the translation from top to bottom, in order to keep the big picture in mind.
We will assume the default calculus has been expanded with the usual ML
\synlet \synin construction, as well as tuples. Here is the top-level rule for
translating scopes.
\begin{mathpar}
\inferrule[T-Scope]{
\synvar{P}\redsc\redempty\redturnstile{\synvar{S}} \synvar{r_1}\synellipsis\synvar{r_n}
\reduces \synvar{e} \redproduce
\synvar{a_1}\redcolon\synvar{\tau_1}\redellipsis\synvar{a_m}\redcolon\synvar{\tau_m}\redcomma
\redctx{\Delta_\mathrm{sub}}
}{
\synvar{P}\redvdash\synscope\synvar{S}\syntyped\synvar{r_1}\synellipsis\synvar{r_n}\reduces\\
\synlet\synvar{S}\;\synlparen\synvar{a_1}\syntyped\synunitt\synarrow\synvar{\tau_1}\synrparen\;\cdots\;
\synlparen\synvar{a_m}\syntyped\synunitt\synarrow\synvar{\tau_m}\synrparen\syntyped
\synlparen\synvar{\tau_1}\synstar\cdots\synstar\synvar{\tau_m}\synrparen\synequal
\synvar{e}[\synhole\mapsto\synlparen\synvar{a_1}\synellipsis\synvar{a_m}\synrparen]\redproduce\\
\synvar{a_1}\redcolon\synvar{\tau_1}\redellipsis\synvar{a_m}\redcolon\synvar{\tau_m}
}
\end{mathpar}
This rule has a lot to unpack, but it is just the formal description of the
translation scheme described earlier. To translate scope declaration \synvar{S}
with associated rules \synvar{r_1}\synellipsis\synvar{r_n}, we use a helper
judgment \fbox{\synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}} \synvar{r_1}\synellipsis\synvar{r_n}
\reduces \synvar{e} \redproduce \redctx{\Delta'}} which reads as \enquote{%
given a program \synvar{P} and a translation context \redctx{\Delta},
the rules \synvar{r_1}\synellipsis\synvar{r_n} belonging to scope \synvar{S}
translate to the expression \synvar{e}, producing a new typing context
\redctx{\Delta'}}.
In this \TirName{T-Scope} rule, we isolate in the resulting \redctx{\Delta'} all the
scope variables \synvar{a_1},\ldots,\synvar{a_m} from the sub-scope variables.
Indeed, those variable will be the arguments and the return values of the function
corresponding to the scope \synvar{S}. The expression \synvar{e} that stands
for rules \synvar{r_1}\synellipsis\synvar{r_n} is a series of \synlet bindings,
the last one finishing by a hole (\synhole). We use this hole as a placeholder
to be filled with the return value of the function, which is the tuple
\synlparen\synvar{a_1}\synellipsis\synvar{a_n}\synrparen. Note that in accordance
to the translation scheme and the need for a delayed evaluation of defaults,
the arguments of \synvar{S} have a thunked type.
\begin{mathpar}
\inferrule[T-Rules]{
\synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}} \synvar{r_1}
\reduces \synvar{e_1} \redproduce\redctx{\Delta'}\\
\synvar{P}\redsc\redctx{\Delta'}\redturnstile{\synvar{S}} \synvar{r_2}\synellipsis\synvar{r_n}
\reduces \synvar{e_2} \redproduce\redctx{\Delta''}
}{
\synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}} \synvar{r_1}\synellipsis\synvar{r_n}
\reduces \synvar{e_1}[\synhole\mapsto \synvar{e_2}] \redproduce\redctx{\Delta''}
}
\end{mathpar}
The translation of the sequence of rules consists of chaining the different
\synlet \synin expressions together with the same hole (\synhole{}) substitution as the
previous rule. Now, we can define the translation for individual rules, starting
with the definitions of scope variables.
\begin{mathpar}
\inferrule[T-DefScopeVar]{
\synvar{a}\notin\redctx{\Delta}\\
\redctx{\Delta}\typvdash\synlangle\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synmid\synvar{e_1}\synellipsis\synvar{e_n}\synrangle\typcolon\synvar{\tau}
}{
\synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}}
\synrule\synvar{a}\syntyped\synvar{\tau}\synequal
\synlangle\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synmid\synvar{e_1}\synellipsis\synvar{e_n}\synrangle
\reduces \\
\synlet a\syntyped\synvar{\tau}\synequal \synlparen\synvar{a} \synmerge
\synlambda \synlparen \synunit\syntyped\synunitt\synrparen\syndot
\synlangle\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synmid\synvar{e_1}\synellipsis\synvar{e_n}\synrangle
\synrparen\;\synunit\synin\synhole
\redproduce\synvar{a}\redcolon\synvar{\tau}\redcomma\redctx{\Delta}
}
\end{mathpar}
The premise of \TirName{T-DefScopeVar}, $\synvar{a}\notin\redctx{\Delta}$, indicates
that our scope language allows each scope variable to be defined only once, with
one default tree. This single default tree can incorporate multiple prioritized definitions
of the same variable scattered around various legislative articles, but we
assume in our scope language that these scattered definitions have been
already collected. Therefore, the ordering of rules is very important in our
scope language, because it should be compatible with the dependency graph
of the scope locations. As the underlying default calculus is decidable and
does not allow fixpoint definitions, the dependency graph of the scope locations
should not be cyclic and therefore the topological ordering of its nodes
should correspond to the order of the rules inside the scope declaration. This
dependency ordering is enforced by the premise
\redctx{\Delta}\typvdash\synlangle\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synmid\synvar{e_1}\synellipsis\synvar{e_n}\synrangle\typcolon\synvar{\tau},
which seeds the typing judgment of \sref{defaultcalc:typing} with \redctx{\Delta}
(the scope locations defined so far).
Since scope variables are also arguments of the scope, \TirName{T-DefScopeVar}
redefines \synvar{a} by merging the new default tree with the default expression
\synvar{a} of type \synunitt\synarrow\synvar{\tau} passed as an argument to \synvar{S}.
The merging operator,\synmerge, has the following definition :
\begin{align*}
\synlet \synlparen\synmerge\synrparen\;&
\synlparen\synvar{e_1}\syntyped\synunitt\synarrow\synvar{\tau}\synrparen\;
\synlparen\synvar{e_2}\syntyped\synunitt\synarrow\synvar{\tau}\synrparen\syntyped
\synunitt\synarrow\synvar{\tau}
\synequal\\
&\synlangle\synlambda\synlparen\synunit\syntyped\synunitt\synrparen\syndot\syntrue\synjust
\synvar{e_1}\synmid\synvar{e_2}\synrangle
\end{align*}
\synmerge is asymetric in terms of priority: in \synvar{e_1}\synmerge\synvar{e_2},
\synvar{e_1} will have the highest priority and it is only in the case where
none of the rules in \synvar{e_1} apply that \synvar{e_2} will be considered.
In \TirName{T-DefScopeVar}, the left-hand-side of \synmerge is coherent with
our objective of giving priority to the caller.
Finally, the evaluation of the merged default tree is forced by applying \synunit,
yielding a value of type \synvar{\tau} for \synvar{a} which will be available
for use in the rest of the translated program. Now that we have presented the
translation scheme for rules defining scope variables, we can switch to the
translation of sub-scope variables definitions and calls. We will start by
the rules that define sub-scope variables, prior to calling the associated
sub-scope.
\begin{mathpar}
\inferrule[T-DefSubScopeVar]{
S\neq S'\\
\synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\notin\redctx{\Delta}\\
\redctx{\Delta}\typvdash\synlangle\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synmid\synvar{e_1}\synellipsis\synvar{e_n}\synrangle
\typcolon\synvar{\tau}
}{
\synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}}
\synrule\synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\syntyped\synvar{\tau}\synequal
\synlangle\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synmid\synvar{e_1}\synellipsis\synvar{e_n}\synrangle
\reduces \\
\synlet \synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\syntyped\synunitt\synarrow\synvar{\tau}\synequal
\synlambda \synlparen\synunit\syntyped\synunitt\synrparen\syndot
\synlangle\synvar{e_\mathrm{just}}\synjust\synvar{e_\mathrm{cons}}\synmid\synvar{e_1}\synellipsis\synvar{e_n}\synrangle
\synin\synhole\redproduce\\\synvar{S'}_\synvar{n}\synlsquare\synvar{a}\synrsquare\redcolon\synunitt\synarrow\synvar{\tau}\redcomma\redctx{\Delta}
}
\end{mathpar}
This rule is very similar to \TirName{T-DefScopeVar}, and actually simpler. It does not feature the
merge operator $\!\!\!\synmerge\!\!\!$, because sub-scope variables like
$\synvar{S'}_\synvar{n}$\synlsquare\synvar{a}\synrsquare{}
are not part of the scope's argument. The premise $S\neq S'$ means that a scope
$S$ cannot have a recursive definition; it cannot call into itself and define
sub-scope variables of its own scope. Note that
$\synvar{S'}_\synvar{n}$\synlsquare\synvar{a}\synrsquare\redcolon\synunitt\synarrow\synvar{\tau}
is added to \redctx{\Delta} in the final part of the judgment;
$\synvar{S'}_\synvar{n}$\synlsquare\synvar{a}\synrsquare{}
has been defined as a sub-scope argument but not as a value that can be used by the
scope yet, its type is \synunitt\synarrow\synvar{\tau} and not \synvar{\tau}.
When all the arguments of
sub-scope \synvar{S'} have been defined using, \TirName{T-DefSubScopeVar},
the sub-scope itself can be called.
\begin{mathpar}
\inferrule[T-SubScopeCall]{
S\neq S'\\
P(S') = \sigma'\\
P\redvdash\sigma'\reduces e'\redproduce
\synvar{a'_1}\redcolon\synvar{\tau'_1}\redellipsis\synvar{a'_n}\redcolon\synvar{\tau'_n}\redcomma\redctx{\Delta'_\mathrm{sub}}\\
\redinit\redlparen\redctx{\Delta}\redsc
\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare
\redellipsis\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\redrparen
\redequal\synvar{e_\mathrm{init}}
}{
\synvar{P}\redsc\redctx{\Delta}\redturnstile{\synvar{S}}\syncall
\synvar{S'}_\synvar{n}\reduces
\synvar{e_\mathrm{init}}[\synhole\mapsto
\synlet\synlparen \synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare\synellipsis
\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\synrparen
\syntyped \synlparen \synvar{\tau'_1}\synstar\cdots\synstar\synvar{\tau'_n}\synrparen\synequal\\
\synvar{e'}\;\synlparen\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare\synrparen
\cdots\synlparen\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\synrparen
\synin\synhole\;]\redproduce
\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_1}\synrsquare\redcolon\synvar{\tau'_1}\redellipsis
\synvar{S'}_\synvar{n}\synlsquare\synvar{a'_n}\synrsquare\redcolon\synvar{\tau'_n}\redcomma
\redctx{\Delta}
}
\end{mathpar}
Again, this rule has a lot to unpack, but is meant as a generalization of the
translation scheme illustrated in \sref{scope:example}. Let us start with
the premises. As earlier, $S\neq S'$ means that scope declarations cannot be
recursive. Next, we fetch the declaration \synvar{\sigma'} of \synvar{S'} inside
the program $P$. \synvar{\sigma'} is reduced into the function expression \synvar{e'},
whose arguments correspond to the scope variables of \synvar{S'}:
\synvar{a'_1}\synellipsis\synvar{a'_n}. Then, we need to define all the arguments
necessary to call \synvar{e'}. Some of these arguments have been defined earlier
in the translation, and they were added to \redctx{\Delta}. But some arguments
may not have been defined yet, and is its precisely the job of the \redinit{}
helper to produce the \synvar{e_\mathrm{init}} expression to define those
missing arguments with the \synemptydefault{} value.
The conclusion of \TirName{T-SubScopeCall} defines the reduction of
\syncall$\synvar{S'}_\synvar{n}$. After \synvar{e_\mathrm{init}},
we translate the sub-scope call to the default calculus call of the corresponding expression \synvar{e'},
which takes as arguments the defaults and returns the corresponding values after
evaluation. Finally, the new translation context produced is \redctx{\Delta}
augmented with all the variables of sub-scope \synvar{S'}, who are available
for use in later definitions of the scope.
The last item we need to define in order to complete the translation is \redinit{}.
Its definition is quite simple, since it produces an expression defining to \synemptydefault{}
all the variables from a list not present in \redctx{\Delta}.
\begin{mathpar}
\inferrule[T-InitSubVarsInDelta]{
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redcolon\synvar{\tau_1}
\in\redctx{\Delta}\\
\redinit\redlparen\redctx{\Delta}\redsc
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_2}\synrsquare\redellipsis
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen
\redequal\synvar{e}
}{
\redinit\redlparen\redctx{\Delta}\redsc
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redellipsis
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen
\redequal\synvar{e}
}
\inferrule[T-InitSubVarsNotInDelta]{
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redcolon\synvar{\tau_1}
\notin\redctx{\Delta}\\
\redinit\redlparen\redctx{\Delta}\redsc
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_2}\synrsquare\redellipsis
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen
\redequal\synvar{e'}
}{
\redinit\redlparen\redctx{\Delta}\redsc
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare\redellipsis
\synvar{S'}_\synvar{n}\synlsquare\synvar{a_n}\synrsquare\redrparen
\redequal\\
\synlet\synvar{S'}_\synvar{n}\synlsquare\synvar{a_1}\synrsquare
\syntyped\synunitt\synarrow\synvar{\tau_1}\synequal
\synlambda\synlparen\synunit\syntyped\synunitt\synrparen\syndot\synemptydefault\synin \synvar{e}
}
\inferrule[T-InitSubVarsEmpty]{}{
\redinit\redlparen\redctx{\Delta}\redrparen
\redequal\synhole
}
\end{mathpar}
\printbibliography
\end{document}