Moved article papers/stcs-2019

This commit is contained in:
Anabra 2019-02-04 19:19:31 +01:00
parent 56a139c634
commit 44505eacb5
16 changed files with 1 additions and 825 deletions

View File

@ -1,12 +0,0 @@
\documentclass[main.tex]{subfiles}
\begin{document}
\begin{abstract}
GRIN is short for Graph Reduction Intermediate Notation~\cite{boquist-phd}, a modern back end for lazy functional languages. Most of the currently available compilers for such languages share a common flaw: they can only optimize programs on a per-module basis. The GRIN framework allows for interprocedural whole program analysis, enabling optimizing code transformations across functions and modules as well.
Some implementations of GRIN already exist, but most of them were developed only for experimentation purposes. Thus, they either compromise on low level efficiency, or contain ad hoc modifications compared to the original specification.
Our goal is to provide a full-fledged implementation of GRIN by combining the currently available best technologies like LLVM~\cite{llvm-2004}, and measure the framework's effectiveness compared to some of the most well-known functional language compilers such as the Glasgow Haskell Compiler~\cite{ghc} and the Idris compiler~\cite{idris}. We also present some improvements to the already existing components of the framework. Some of these improvements include a typed representation for the intermediate language and an interprocedural program optimization, the dead data elimination.
\end{abstract}
\end{document}

View File

@ -1,6 +0,0 @@
\begin{document}
The project has been supported by the European Union,
co-financed by the European Social Fund (EFOP-3.6.3-VEKOP-16-2017-00002).
\end{document}

View File

@ -1,194 +0,0 @@
@phdthesis
{
boquist-phd,
author = {Urban Boquist},
school = {{Chalmers University of Technology and Göteborg University}},
title ={{Code Optimisation Techniques for Lazy Functional Languages}},
year = {1999},
isbn = {91-7197-792-9}
}
@inproceedings
{
boquist-grin,
author = {Boquist, Urban and Johnsson, Thomas},
title = {{The GRIN Project: A Highly Optimising Back End for Lazy Functional Languages}},
booktitle = {{Selected Papers from the 8th International Workshop on Implementation of Functional Languages}},
series = {IFL '96},
year = {1997},
isbn = {3-540-63237-9},
pages = {58--84},
numpages = {27},
url = {http://dl.acm.org/citation.cfm?id=647975.743083},
acmid = {743083},
publisher = {{Springer-Verlag}},
address = {Berlin, Heidelberg}
}
@mastersthesis
{
remi-masters,
author = {Remi Turk},
school = {{Universiteit van Amsterdam}},
title ={{A modern back-end for a dependently typed language}},
year = {2010}
}
@inproceedings
{
uhc,
author = {Dijkstra, Atze and Fokker, Jeroen and Swierstra, S. Doaitse},
title = {{The Architecture of the Utrecht Haskell Compiler}},
booktitle = {{Proceedings of the 2Nd ACM SIGPLAN Symposium on Haskell}},
series = {Haskell '09},
year = {2009},
isbn = {978-1-60558-508-6},
location = {Edinburgh, Scotland},
pages = {93--104},
numpages = {12},
url = {http://doi.acm.org/10.1145/1596638.1596650},
doi = {10.1145/1596638.1596650},
acmid = {1596650},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {aspect orientation, attribute grammar, compiler architecture, haskell},
}
@article
{ hrc,
author = {Liu, Hai and Glew, Neal and Petersen, Leaf and Anderson, Todd A.},
title = {{The Intel Labs Haskell Research Compiler}},
journal = {{SIGPLAN Not.}},
issue_date = {December 2013},
volume = {48},
number = {12},
month = sep,
year = {2013},
issn = {0362-1340},
pages = {105--116},
numpages = {12},
url = {http://doi.acm.org/10.1145/2578854.2503779},
doi = {10.1145/2578854.2503779},
acmid = {2503779},
publisher = {ACM},
address = {New York, NY, USA},
keywords = {compiler optimization, functional language compiler, haskell},
}
@inproceedings
{ haskell-gap,
author = {Petersen, Leaf and Anderson, Todd A. and Liu, Hai and Glew, Neal},
title = {{Measuring the Haskell Gap}},
booktitle = {{Proceedings of the 25th Symposium on Implementation and Application of Functional Languages}},
series = {IFL '13},
year = {2014},
isbn = {978-1-4503-2988-0},
location = {Nijmegen, Netherlands},
pages = {61:61--61:72},
articleno = {61},
numpages = {12},
url = {http://doi.acm.org/10.1145/2620678.2620685},
doi = {10.1145/2620678.2620685},
acmid = {2620685},
publisher = {ACM},
address = {New York, NY, USA},
}
@techreport{
mlton-llvm,
author = {Brian Andrew Leibig},
title = {An LLVM Back-end for MLton},
year = {2013},
url = {https://www.cs.rit.edu/~mtf/student-resources/20124_leibig_msproject.pdf},
note = {A Project Report Submitted in Partial Fulfillment of the Requirements for the Degree of Master of Science in Computer Science},
institution = {Department of Computer Science, B. Thomas Golisano College of Computing and Information Sciences},
}
@article
{
contification,
author = {Fluet, Matthew and Weeks, Stephen},
title = {{Contification Using Dominators}},
journal = {{SIGPLAN Not.}},
issue_date = {October 2001},
volume = {36},
number = {10},
month = oct,
year = {2001},
issn = {0362-1340},
pages = {2--13},
numpages = {12},
url = {http://doi.acm.org/10.1145/507669.507639},
doi = {10.1145/507669.507639},
acmid = {507639},
publisher = {ACM},
address = {New York, NY, USA},
}
@inproceedings
{
mlton,
author = {Weeks, Stephen},
title = {{Whole-program Compilation in MLton}},
booktitle = {{Proceedings of the 2006 Workshop on ML}},
series = {ML '06},
year = {2006},
isbn = {1-59593-483-9},
location = {Portland, Oregon, USA},
pages = {1--1},
numpages = {1},
url = {http://doi.acm.org/10.1145/1159876.1159877},
doi = {10.1145/1159876.1159877},
acmid = {1159877},
publisher = {ACM},
address = {New York, NY, USA},
}
@inproceedings
{
llvm-2004,
author = {Chris Lattner and Vikram Adve},
title = {{LLVM}: A Compilation Framework for Lifelong Program Analysis and Transformation},
booktitle = {CGO},
address = {San Jose, CA, USA},
month = {Mar},
year = {2004},
pages = {75--88},
}
@misc
{
clang,
title = {{Clang: a C language family front end for LLVM}},
url = {https://clang.llvm.org}
}
@inproceedings
{
ghc,
author = {Hall, Cordelia V. and Hammond, Kevin and Partain, Will and Peyton Jones, Simon L. and Wadler, Philip},
title = {{The Glasgow Haskell Compiler: A Retrospective}},
booktitle = {{Proceedings of the 1992 Glasgow Workshop on Functional Programming}},
year = {1993},
isbn = {3-540-19820-2},
pages = {62--71},
numpages = {10},
url = {http://dl.acm.org/citation.cfm?id=647557.729914},
acmid = {729914},
publisher = {{Springer-Verlag}},
address = {London, UK},
}
@article
{
idris,
title={Idris, a general-purpose dependently typed programming language: Design and implementation},
volume={23},
DOI={10.1017/S095679681300018X},
number={5},
journal={{Journal of Functional Programming}},
publisher={Cambridge University Press},
author={{Brady, Edwin}},
year={2013},
pages={552593}
}

View File

@ -1,10 +0,0 @@
\documentclass[main.tex]{subfiles}
\begin{document}
\makeatletter
\preto{\@verbatim}{\topsep=0pt \partopsep=0pt }
\makeatother
\bibliographystyle{IEEEtran}
\bibliography{bib_database}
\end{document}

View File

@ -1,50 +0,0 @@
\documentclass[main.tex]{subfiles}
\begin{document}
Dead code elimination is one of the most well-known compiler optimization techniques. The aim of dead code elimination is to remove certain parts of the program that neither affect its final result nor its side effects. This includes code that can never be executed, and also code which only consists of irrelevant operations on dead variables. Dead code elimination can reduce the size of the input program, as well as increase its execution speed. Furthermore, it can facilitate other optimizing transformation by restructuring the code.
\subsection{Dead Code Elmination in GRIN}
The original GRIN framework has three different type of dead code eliminating transformations. These are dead function elimination, dead variable elimination and dead function paramater elimination. In general, the effectiveness of most optimizations solely depends on the accuracy of the information it has about the program. The more precise information it has, the more agressive it can be. Furthermore, running the same transformation but with additional information available, can often yield more efficient code.
In the original framework, the dead code eliminating transformations were provided only a very rough approximation of the liveness of variables and function parameters. In fact, a variable was deemed dead only if it was never used in the program. As a consequence, the required analyses were really fast, but the transformations themselves were very limited as well.
\subsection{Interprocedural Liveness Analysis} \label{sub-sec:lva}
In order to improve the effectiveness of dead code elimination, we need more sophisticated data-flow analyses. Liveness analysis is a standard data-flow analysis that determines which variables are live in the program and which ones are not. It is important to note, that even if a variable is used in the program, it does not necessarily mean it is live. See Program~code~\ref{code:lva-example}.
\begin{codeFloat}[h]
\begin{center}
\begin{minipage}{0.35\textwidth}
\begin{haskell}
main =
n <- pure 5
y <- pure (CInt n)
pure 0
\end{haskell}
\subcaption{Put into a data constructor}
\end{minipage}
\hspace{1cm}
\begin{minipage}{0.35\textwidth}
\begin{haskell}
main =
n <- pure 5
foo n
foo x = pure 0
\end{haskell}
\subcaption{Argument to a function call}
\end{minipage}
\end{center}
\caption{Examples demonstrating that a used variable can still be dead}
\label{code:lva-example}
\end{codeFloat}
In the first example, we can see a program where the variable \pilcode{n} is used, it is put into a \pilcode{CInt} node, but despite this, it is obvious to see that \pilcode{n} is still dead. Moreover, the liveness analysis can determine this fact just by examining the function body locally. It does not need to analyze any function calls. However, in the second example, we can see a very similar situation, but here \pilcode{n} is an argument to a function call. To calculate the liveness of \pilcode{n}, the analysis either has to assume that the arguments of \pilcode{foo} are always live, or it has to analyze the body of the function. The former decision yields a faster, but less precise \emph{intraprocedural} analysis, the latter results in a bit more costly, but also more accurate \emph{interprocedural} analysis.
By extending the analysis with interprocedural elements, we can obtain quite a good estimate of the live variables in the program, while minimizing the cost of the algorithm. Using the information gathered by the liveness analysis, the original optimizations can remove even more dead code segments.
%TODO: example here?
\end{document}

View File

@ -1,12 +0,0 @@
\documentclass[main.tex]{subfiles}
\begin{document}
% intro
\subsection{Simplifying Data Structures}
\subsection{Simplifying Type Class Dictionaries}
\subsection{Type Erasure}
\end{document}

View File

@ -1,129 +0,0 @@
\documentclass[main.tex]{subfiles}
\begin{document}
% TODO: reference Remi Turk & HRC
Conventional dead code eliminating optimizations usually only remove statements or expressions from programs; however, \emph{dead data elimination} can transform the underlying data structures themselves. Essentially, it can specialize a certain data structure for a given use-site by removing or transforming unnecessary parts of it. It is a very powerful optimization technique that can significantly decrease memory usage and reduce the number of heap operations.
\subsection{Dead Data Elimination in GRIN}
In the context of GRIN, dead data elimination removes dead fields of data constructors (or nodes) for both definition- and use-sites. In the followings, we will refer to definition-sites as \emph{producers} and to use-sites as \emph{consumers}. Producers and consumers are in a \emph{many-to-many} relationship with each other. A producer can define a variable used by many consumers, and a consumer can use a variable possibly defined by many producers. It only depends on the control-flow of the program. Program~code~\ref{code:dde-simple} illustrates dead data elimination on a very simple example with a single producer and a single consumer.
\begin{codeFloat}[h]
\begin{center}
\begin{minipage}{0.35\textwidth}
\begin{haskell}
main =
x <- pure (CPair 0 1)
y <- snd x
pure y
snd p =
(CPair a b) <- pure p
pure b
\end{haskell}
\subcaption{Before the transformation}
\end{minipage}
$\xRightarrow{\text{\emph{a} is dead}}$
\begin{minipage}{0.35\textwidth}
\begin{haskell}
main =
x <- pure (CPair' 1)
y <- snd x
pure y
snd p =
(CPair' b) <- pure p
pure b
\end{haskell}
\subcaption{After the transformation}
\end{minipage}
\end{center}
\caption{A simple example for dead data elimination}
\label{code:dde-simple}
\end{codeFloat}
As we can see, the first component of the pair is never used, so the optimization can safely eliminate the first field of the node. It is important to note, that the transformation has to remove the dead field for both the producer and the consumer. Furthermore, the name of the node also has to be changed to preserve type correctness, since the transformation is specific to each producer-consumer group. This means, the data constructor \pilcode{CPair} still exists, and it can be used by other parts of the program, but a new, specialized version is introduced for any optimizable producer-consumer group~\footnote{Strictly speaking, a new version is only introduced for each different set of live fields used by producer-consumer groups}.
Dead data elimination requires a considerable amount of data-flow analyses and possibly multiple transformation passes. First of all, it has to identify potentially removable dead fields of a node. This information can be acquired by running liveness analysis on the program (see Section~\ref{sub-sec:lva}). After that, it has to connect producers with consumers by running the \emph{created-by data-flow analysis}. Then it has to group producers together sharing at least one common consumer, and determine whether a given field for a given producer can be removed globally, or just dummified locally. Finally, it has to transform both the producers and the consumers.
\subsection{Created-by Analysis}
The created-by analysis, as its name suggests is responsible for determining the set of producers a given variable-was possibly created by. For our purposes, it is sufficient to track only node valued variables, since these are the only potential candidates for dead data elimination. Analysis~example~\ref{analysis:cby} demonstrates how the algorithm works on a simple program.
\begin{analysisFloat}[h]
\begin{center}
\begin{minipage}{0.35\textwidth}
\begin{haskell}
null xs =
y <- case xs of
(CNil) ->
a <- pure (CTrue)
pure a
(CCons z zs) ->
b <- pure (CFalse)
pure b
pure y
\end{haskell}
\subcaption{Input program}
\end{minipage}
\hspace{1cm}
\begin{minipage}{0.40\textwidth}
\begin{tcolorbox}[tab2,tabularx={l|r}]
Variable & Producers \\
\hline\hline
\pilcode{xs} & $\set{CNil[\dots], CCons[\dots]}$\footnotemark[2] \\\hline
\pilcode{a} & $\set{CTrue[\pilcode{a}]}$ \\\hline
\pilcode{b} & $\set{CFalse[\pilcode{b}]}$ \\\hline
\pilcode{y} & $\set{CTrue[\pilcode{a}], CFalse[\pilcode{b}]}$ \\
\end{tcolorbox}
\subcaption{Anyalsis result}
\end{minipage}
\end{center}
\caption{An example demonstrating the created-by analysis}
\label{analysis:cby}
\end{analysisFloat}
\footnotetext[2]{\label{footnote:cby-example}For the sake of simplicity, we will assume that \pilcode{xs} was constructed with the \pilcode{CNil} and \pilcode{CCons} tags. Also its producers are irrelevant in this example}
The result of the analysis is a mapping from variable names to set of producers grouped by their tags. For example, we could say that ''variable \pilcode{y} was created by the producer \pilcode{a} given it was constructed with the \pilcode{CTrue} tag''. Naturally, a variable can be constructed with many different tags, and each tag can have multiple producers. Also, it is important to note that some variables are their own producers. This is because producers are basically definitions-sites or bindings, identified by the name of the variable on their left-hand sides. However, not all bindings have variables on their left-hand side, and some values may not be bound to variables. Fortunately, this problem can be easily solved by a simple program transformation.
\subsection{Grouping Producers}
On a higher abstraction level, the result of the created-by analysis can be interpreted as a bipartite graph between producers and consumers. One group of nodes represents the producers and the other one represents the consumers. A producer is connected to a consumer if and only if the value created by the producer can be consumed by the consumer. Furthermore, each component of the graph corresponds to producer-consumer group. Each producer inside the group can only create values consumed by the consumers inside the same group, and a similar statement holds for the consumers as well.
\subsection{Transforming Producers and Consumers}
As mentioned earlier, the transformation applied by dead data elimination can be specific for each producer-consumer group, and both the producers and the consumers have to be transformed. Also, the transformation can not always simply remove the dead field of a producer. Take a look at Figure~\ref{fig:producers-and-consumers}.
\begin{figure}[h]
\centering
\begin{adjustbox}{scale = 1.5}
\begin{tikzpicture}[ node distance = 1cm and 2cm, on grid ]
\node [shape=circle,draw=black] (P1) {$P_1$};
\node [shape=circle,draw=black] (P2) [right =of P1] {$P_2$};
\coordinate (Middle) at ($(P1)!0.5!(P2)$);
\node [shape=circle,draw=black] (C2) [below =of Middle] {$C_2$};
\node [shape=circle,draw=black] (C1) [left =of C2] {$C_1$};
\node [shape=circle,draw=black] (C3) [right =of C2] {$C_3$};
\path[-{Stealth[scale=1.5]}] (P1) edge [] (C1)
(P1) edge [] (C2)
(P2) edge [] (C2)
(P2) edge [] (C3);
\end{tikzpicture}
\end{adjustbox}
\caption{Producer-consumer group}
\label{fig:producers-and-consumers}
\end{figure}
As we can see, producers $P_1$ and $P_2$ share a common consumer $C_2$. Let's assume, that the shared value is a \pilcode{CPair} node with two fields, and neither $C_1$, nor $C_2$ uses the first field of that node. This means, the first field of the \pilcode{CPair} node is locally dead for producer $P_1$. Also, suppose that $C_3$ does use the first field of that node, meaning it is live for $P_2$, hence it cannot be removed. In this situation, if the transformation were to remove the locally dead field from $P_1$, then it would lead to a type mismatch at $C_2$, since $C_2$ would receive two \pilcode{CPair} nodes with different number of arguments, with possibly different types for their first fields. In order to resolve this issue the transformation has to rename the tag at $P_1$ to \pilcode{CPair'}, and create new patterns for \pilcode{CPair'} at $C_1$ and $C_2$ by duplicating and renaming the existing ones for \pilcode{CPair}. This way, we can avoid potential memory operations at the cost of code duplication.
\subsection{The \pilcode{undefined} value}
Another option would be to only \emph{dummify} the locally dead fields. In other words, instead of removing the field at the producer and restructuring the consumers, the transformation could simply introduce a dummy value for that field. The dummy value could be any placeholder with the same type as the locally dead field. For instance, it could be any literal of that type. A more sophisticated solution would be to introduce an undefined value. The \pilcode{undefined} value is a placeholder as well, but it carries much more information. By marking certain values undefined instead of just introducing placeholder literals, we can facilitate other optimizations down the pipeline. However, each \pilcode{undefined} value has to be explicitly type annotated for the heap points-to analysis to work correctly. Unlike the other approach mentioned earlier, this alternative avoids any code duplication.
\end{document}

View File

@ -1,14 +0,0 @@
\documentclass[main.tex]{subfiles}
\begin{document}
GRIN is short for \emph{Graph Reduction Intermediate Notation}. GRIN consists of an intermediate representation language (IR in the followings) as well as the entire compiler back end framework built around it. GRIN tries to resolve the issues highlighted in Section~\ref{sec-intro} by using interprocedural whole program optimization.
Interprocedural program analysis is a type of data-flow analysis that propagates information about certain program elements through function calls. Using interprocedural analyses instead of intraprocedural ones, allows for optimizations across functions. This means the framework can handle the issue of large sets of small interconnecting functions presented by the composable programming style.
Whole program analysis enables optimizations across modules. This type of data-flow analysis has all the available information about the program at once. As a consequence, it is possible to analyze and optimize global functions. With the help of whole program analysis, laziness can be made explicit. In fact, the evaluation of suspended computations in GRIN is done by an ordinary function called \pilcode{eval}. This is a global function uniquely generated for each program, meaning it can be optimized just like any other function by using whole program analysis.
Finally, since the analyses and optimizations are implemented on a general intermediate representation, all other languages can benefit from the features provided by the GRIN back end. The intermediate layer of GRIN between the front end language and the low level machine code serves the purpose of eliminating functional artifacts from programs. This is achieved by using optimizing program transformations specific to the GRIN IR and functional languages in general. The simplified programs can then be optimized further by using conventional techniques already available. For example, it is possible to compile GRIN to LLVM and take advantage of an entire compiler framework providing a huge array of very powerful tools and features.
% TODO: refer LLVM section
\end{document}

View File

@ -1,14 +0,0 @@
\documentclass[main.tex]{subfiles}
\begin{document}
Over the last few years, the functional programming paradigm has become even more popular and prominent than it was before. More and more industrial applications emerge, the paradigm itself keeps evolving, existing functional languages are being refined day by day, and even completely new languages appear. Yet, it seems the corresponding compiler technology lacks behind a bit.
Functional languages come with a multitude of interesting features that allow us to write programs on higher abstraction levels. Some of these features include higher-order functions, laziness and very sophisticated type systems. Although these features make writing code more convenient, they also complicate the compilation process.
Compiler front ends usually handle these problems very well, but the back ends often struggle to produce efficient low level code. The reason for this is that back ends have a hard time optimizing code containing \emph{functional artifacts}. These functional artifacts are the by-products of high-level language features mentioned earlier. For example, higher-order functions can introduce unknown function calls and laziness can result in implicit value evaluation which can prove to be very hard to optimize. As a consequence, compilers generally compromise on low level efficiency for high-level language features.
Moreover, the paradigm itself also encourages a certain programming style which further complicates the situation. Functional code usually consist of many smaller functions, rather than fewer big ones. This style of coding results in more composable programs, but also presents more difficulties for compilation, since optimizing only individual functions is no longer sufficient.
In order to resolve these problems, we need a compiler back end that can optimize across functions as well as allow the optimization of laziness in some way. Also, it would be beneficial if the back end could theoretically handle any front end language.
\end{document}

View File

@ -1,60 +0,0 @@
\documentclass[main.tex]{subfiles}
\begin{document}
%TODO: GRIN: functional domain - imperative domain, LLVM: architecture independent domain - architecture specific domain
LLVM is a collection of compiler technologies consisting of an intermediate representation called the LLVM IR, a modularly built compiler framework and many other tools built on these technologies. This section discusses the benefits and challenges of compiling GRIN to LLVM.
\subsection{Benefits and Challenges}
The main advantage LLVM has over other CISC and RISC based languages lies in its modular design and library based structure. The compiler framework built around LLVM is entirely customizable and can generate highly optimized low level machine code for most architectures. Furthermore, it offers a vast range of tools and features out of the box, such as different debugging tools or compilation to WebAssembly.
However, compiling unrefined functional code to LLVM does not yield the results one would expect. Since LLVM was mainly designed for imperative languages, functional programs may prove to be difficult to optimize. The reason for this is that functional artifacts or even just the general structuring of functional programs can render conventional optimization techniques useless.
While LLVM acts as a transitional layer between architecture independent, and architecture specific domains, GRIN serves the same purpose for the functional and imperative domains. Figure~\ref{fig:grin-back-end} illustrates this domain separtion. The purpose of GRIN is to eliminate functional artifacts and restructure functional programs in a way so that they can be efficiently optimized by conventional techniques.
\begin{figure}[h]
\centering
\begin{adjustbox}{scale = 1.4}
\tikzset{every loop/.style={-{Stealth[scale=1.5]}}}
\begin{tikzpicture}[ node distance = 1.2cm and 1.5cm
, on grid
, loop/.append style={-triangle 60}
]
\node [draw=black] (haskell) {Haskell};
\node [draw=black] (idris) [left =of haskell] {Idris};
\node [draw=black] (agda) [right =of haskell] {Agda};
\node [draw=black] (grin) [below =of haskell] {GRIN};
\node [draw=black] (llvm) [below =of grin] {LLVM};
\path[-{Stealth[scale=1.5]}]
(idris) edge [] (grin)
(haskell) edge [] (grin)
(agda) edge [] (grin)
(grin) edge [] (llvm);
\end{tikzpicture}
\end{adjustbox}
\caption{Possible representation of different function languages}
\label{fig:grin-back-end}
\end{figure}
The main challenge of compiling GRIN to LLVM has to do with the discrepancy between the respective type systems of these languages: GRIN is untyped, while LLVM has static typing. In order to make compilation to LLVM possible, we need a typed representation for GRIN as well. Fortunately, this problem can be circumvented by implementing a type inference algorithm for the language. To achieve this, we can extend an already existing component of the framework, the heap points-to data-flow analysis.
\subsection{Heap points-to Analysis}
Heap points-to analysis (HPT in the followings), or pointer analysis is a commonly used data-flow analysis in the context of imperative languages. The result of the analysis contains information about the possible variables or heap locations a given pointer can point to. In the context of GRIN, it is used to determine the type of data constructors (or nodes) a given variable could have been constructed with. The result is a mapping of variables and abstract heap locations to sets of data constructors.
%TODO: example, referece
The original version of the analysis presented in \cite{boquist-phd} and further detailed in \cite{boquist-grin} only supports node level granularity. This means, that the types of literals are not differentiated, they are unified under a common "basic value" type. Therefore, the analysis cannot be used for type inference as it is. In order to facilitate type inference, HPT has to be extended, so that it propagates type information about literals as well. This can be easily achieved by slightly adjusting the original version. Using the result of the modified algorithm, we can generate LLVM IR code from GRIN.
%TODO: reference UHC paper
However, in some cases the monomorphic type inference algorithm presented above is not sufficient. For example, the Glasgow Haskell Compiler has polymorphic primitive operations. This means, that despite GRIN being a monomorphic language, certain compiler front ends can introduce external polymorphic functions to GRIN programs. To resolve this problem, we have to further extend the heap points-to analysis. The algorithm now needs a table of external functions with their respective type information. These functions \emph{can} be polymorphic, hence they need special treatment during the analysis. When encountering external function applications, the algorithm has to determine the concrete type of the return value based on the possible types of the function arguments. Essentially, it has to fill all the type variables present in the type of the return value with concrete types. This can be achieved by unification. Fortunately, the unification algorithm can be expressed in terms of the same data-flow operations HPT already uses.
\end{document}

Binary file not shown.

View File

@ -1,48 +0,0 @@
\documentclass[10pt,a4paper,oneside]{article}
\usepackage{style}
\usepackage{subfiles}
%TODO: remove vspace from title
\title{\vspace{-2cm}A modern look at GRIN,\\ an optimizing functional language back end}
%\title{Dead Code Elimination for GRIN with Interprocedural Whole-Program Analysis}
\date{\today}
\author{Péter Dávid Podlovics, Csaba Hruska}
\begin{document}
\maketitle
\subfile{abstract}
\section{Introduction} \label{sec-intro}
\subfile{introduction}
\section{Related Work}
\subfile{related-work}
\section{Graph Reduction Intermediate Notation}
\subfile{grin}
\section{Compiling to LLVM}
\subfile{llvm}
\section{Dead Code Elimination}
\subfile{dce}
\section{Dead Data Elimination}
\subfile{dde}
% \section{Applications of Dead Data Elimination}
% \subfile{dde-app}
% \section{Compiled Abstract Interpretation}
% \section{Results}
% \section{Conclusion}
\section*{Acknowledgements}
\subfile{acknowledgements}
\subfile{bibliography}
\end{document}

View File

@ -1,22 +0,0 @@
\documentclass[main.tex]{subfiles}
\begin{document}
\subsection{GRIN}
%TODO: reference to hbcc
The original GRIN framework was developed by U. Boquist, and first described in an article~\cite{boquist-grin}, then in his PhD thesis~\cite{boquist-phd}. This version of GRIN used the Chalmers Haskell-B Compiler as its front end and RISC as its back end. At that time, his implementation of GRIN already compared favorably to the existing Glasgow Haskell Compiler of version 4.01.
\subsection{Adaptations of GRIN}
Other compilers also use GRIN as their back end. Probably the most notable one is the Utrecht Haskell Compiler~\cite{uhc}. UHC is a completely standalone Haskell compiler with its own front end. The main idea behind UHC is to use attribute grammars handle the ever-growing complexity of compiler construction in an easily manageable way.
\subsection{Other Intermediate Representations}
GRIN is not the only IR available for functional languages. In fact, it is not even the most advanced one. The Haskell Research Compiler~\cite{hrc} and the MLton~\cite{mlton} Standard ML compiler both use IR languages very similar to GRIN. However, these IRs are built from basic blocks instead of monadic bindings. This approach opens up a whole spectrum of new optimization opportunities. For example, the Haskell Research Compiler uses SIMD vectorization passes in its optimization pipeline, and achieves performance metrics comparable to native C~\cite{haskell-gap}.
\subsection{Compilers with LLVM Back Ends}
In the imperative setting, probably the most well-known compiler with an LLVM back end is Clang~\cite{clang}. Clang's main goal is to provide a production quality compiler with a reusable, library-like structure. However, certain functional language compilers also have LLVM back ends. The two most notable ones are the Glasgow Haskell Compiler~\cite{ghc} and MLton~\cite{mlton-llvm}.
\end{document}

View File

@ -1,244 +0,0 @@
\ProvidesPackage{style}
\textwidth 15.0cm
\textheight 22.0cm
\oddsidemargin 0.4cm
\evensidemargin 0.4cm
\topmargin 0.0cm
\frenchspacing
\pagestyle{myheadings}
\setcounter{tocdepth}{1}
\usepackage[toc,page]{appendix}
\usepackage{hyperref}
\usepackage{float}
\usepackage{newfloat}
\usepackage{footnote}
\usepackage{subcaption}
\usepackage{cite}
\usepackage{url}
\usepackage{caption}
\usepackage{graphicx}
\graphicspath{ {../img/} }
\usepackage[bottom]{footmisc}
\usepackage{enumitem}
\setlist{nosep}
\usepackage[utf8]{inputenc}
\usepackage{etoolbox}
\usepackage{adjustbox}
\usepackage{latexsym,amssymb,amsmath,mathtools}
\usepackage{algorithm}
\usepackage{algorithmicx}
\usepackage{algpseudocode}
\floatstyle{plain}
\DeclareFloatingEnvironment
[ name = {Program code}
, placement = htbp
, fileext = loc
, within = section
]{codeFloat}
\DeclareFloatingEnvironment
[ name = {Table}
, placement = htbp
, fileext = lot
, within = section
]{tableFloat}
\DeclareFloatingEnvironment
[ name = {Analysis example}
, placement = htbp
, fileext = loc
, within = section
]{analysisFloat}
\DeclarePairedDelimiter\set\{\}
\hypersetup{%
colorlinks=true,% hyperlinks will be coloured
allcolors=blue,% hyperlink text will be green
}
\newcommand*\Let[2]{\State #1 $\gets$ #2}
\algrenewcommand\algorithmicrequire{\textbf{Precondition:}}
\algrenewcommand\algorithmicensure{\textbf{Postcondition:}}
\newcommand*\patBind[3]{\State \textbf{#1} #2 $\rightarrow$ #3}
\algblockdefx[CaseBlock]{case}{endCase} %
[1]{\textbf{case} $#1$ \textbf{of}} %
{}
\algblockdefx[PatMatch]{patMatch}{endPatMatch} %
[3]{ \Call{#1}{\textbf{#2} #3} = } %
{}
\usepackage{minted}
\usepackage[table]{xcolor}
\usepackage{listings}
\usepackage{lstautogobble}
\definecolor{identifierColor}{rgb}{0.65,0.16,0.16}
\definecolor{keywordColor}{rgb}{0.65,0.20,0.90}
\lstnewenvironment{code}
{ \lstset
{ language = Haskell
, basicstyle = \small\ttfamily
, breaklines = true
, backgroundcolor = \color{gray!15}
, frame = single
, autogobble = true
, xleftmargin = 0.1cm
, xrightmargin = 0.2cm
%, identifierstyle = \color{gray}
, keywordstyle = \color{violet}
, morekeywords = {}
, escapechar = \%
}
}
{}
\PassOptionsToPackage{usenames,dvipsnames,svgnames}{xcolor}
\usepackage{tikz}
\usetikzlibrary{arrows,arrows.meta,shapes,positioning,automata,calc}
\usepackage{pgfplots}
\usepackage{tcolorbox}
\usepackage{tabularx}
\usepackage{array}
\usepackage{zref-savepos}
\usepackage{diagbox}
\usepackage{colortbl}
\tcbuselibrary{skins}
\tcbuselibrary{minted}
\newcolumntype{Y}{>{\raggedleft\arraybackslash}X}
\tcbset
{ tab2/.style =
{ enhanced
, fonttitle=\bfseries
, fontupper=\normalsize\sffamily
, colback = gray!5!white
, colframe = gray!75!black
, colbacktitle=yellow!40!white
, coltitle=black,center title
}
, hbox
}
\newtcblisting{haskell}
{ listing engine = minted
, minted style = colorful
, minted language = Haskell
, minted options = { fontsize = \small
, breaklines
, autogobble
, linenos
, numbersep = 3mm
, escapeinside = \%\%
}
, colback = gray!5!white
, colframe = gray!75!black
, listing only
, left = 5mm
, enhanced
, overlay = { \begin{tcbclipinterior}
\fill[gray!80!blue!20!white] (frame.south west) rectangle ([xshift=5mm]frame.north west);
\end{tcbclipinterior}
}
}
\newtcblisting{oneLineHaskell}
{ listing engine = minted
, minted style = colorful
, minted language = Haskell
, minted options = { fontsize = \normalsize
, breaklines
, autogobble
, numbersep = 3mm
, escapeinside = \%\%
}
, colback = gray!5!white
, colframe = gray!75!black
, listing only
, left = 2mm
, top = 1mm
, bottom = 1mm
, enhanced
}
\colorlet{lightgreen}{green!50!white}
\colorlet{lightblue}{blue!40!white}
\colorlet{lightyellow}{yellow!50!white}
\colorlet{lightred}{red!40!white}
\newcommand*{\paper}{thesis}
\newcommand*{\ext}[1]{\texttt{#1}}
\newcommand*{\chk}[1]{\texttt{#1}}
\newcommand*{\lvar}[1]{\textit{#1}}
\newcommand*{\ilcode}[1]{\mintinline{Haskell}{#1}} % inline code
\newcommand*{\pilcode}[1]{\texttt{#1}} % plain inline code
% NOTE: This command need styRectDef to be defined locally
\newcommand*{\tikzcustom}[0]
{
% \tikzset{styRectDef/.style = {rectangle, rounded corners, draw=black, inner xsep=6mm, inner ysep=3mm}}
\tikzset{styRectGn/.style = {styRectDef, draw=green, fill=green!20}}
\tikzset{styRectBl/.style = {styRectDef, draw=cyan, fill=cyan!40}}
\tikzset{styRectGy/.style = {styRectDef, draw=gray, fill=gray!17}}
\tikzset{styConn/.style = {very thick, ->, -latex, shorten <=1mm, shorten >=1mm}}
\tikzset{styAnnotDef/.style = {rectangle, rounded corners, draw=black, inner xsep=2mm, inner ysep=1mm}}
\tikzset{styLabel/.style = {styAnnotDef, draw=black, fill=gray!10}}
}
\newcounter{NoTableEntry}
\renewcommand*{\theNoTableEntry}{NTE-\the\value{NoTableEntry}}
\newcommand*{\notableentry}{%
\multicolumn{1}{@{}c@{}|}{%
\stepcounter{NoTableEntry}%
\vadjust pre{\zsavepos{\theNoTableEntry t}}% top
\vadjust{\zsavepos{\theNoTableEntry b}}% bottom
\zsavepos{\theNoTableEntry l}% left
\hspace{0pt plus 1filll}%
\zsavepos{\theNoTableEntry r}% right
\tikz[overlay]{%
\draw[red]
let
\n{llx}={\zposx{\theNoTableEntry l}sp-\zposx{\theNoTableEntry r}sp},
\n{urx}={0},
\n{lly}={\zposy{\theNoTableEntry b}sp-\zposy{\theNoTableEntry r}sp},
\n{ury}={\zposy{\theNoTableEntry t}sp-\zposy{\theNoTableEntry r}sp}
in
(\n{llx}, \n{lly}) -- (\n{urx}, \n{ury})
(\n{llx}, \n{ury}) -- (\n{urx}, \n{lly})
;
}%
}%
}
\makeatletter
\newcommand{\captionabove}[2][]
{
\vskip-\abovecaptionskip
\vskip+\belowcaptionskip
\ifx\@nnil#1\@nnil
\caption{#2}%
\else
\caption[#1]{#2}%
\fi
\vskip+\abovecaptionskip
\vskip-\belowcaptionskip
}
% automatic period at the end of footnote
\makeatletter%
\long\def\@makefntext#1{%
\parindent 1em\noindent \hb@xt@ 1.8em{\hss\@makefnmark}#1.}
\makeatother

View File

@ -6,4 +6,4 @@
*.out
*.gz
*.listing
_minted-main/
**/_minted-main/

View File

@ -1,9 +0,0 @@
*.toc
*.aux
*.bbl
*.blg
*.log
*.out
*.gz
*.listing
_minted-main/