diff --git a/Makefile b/Makefile
index 4c8b186..022945e 100644
--- a/Makefile
+++ b/Makefile
@@ -26,7 +26,7 @@ VALID_IDRIS_VERSION_REGEXP = "1.3.2.*"
 
 -include custom.mk
 
-.PHONY: ttimp idris2 prelude test base clean lib_clean check_version
+.PHONY: ttimp idris2 prelude test base clean lib_clean check_version idris2c dist/idris2.c
 
 all: idris2 libs test
 
@@ -93,3 +93,9 @@ install-libs: libs
 	make -C libs/base install IDRIS2=../../idris2
 	make -C libs/network install IDRIS2=../../idris2 IDRIS2_VERSION=${IDRIS2_VERSION}
 	make -C libs/contrib install IDRIS2=../../idris2
+
+dist/idris2.c:
+	idris --build idris2-mkc.ipkg
+	cat idris2.c dist/rts/idris_main.c > dist/idris2.c
+
+idris2c: dist/idris2.c
diff --git a/dist/build.sh b/dist/build.sh
new file mode 100644
index 0000000..f964b59
--- /dev/null
+++ b/dist/build.sh
@@ -0,0 +1,2 @@
+make -C rts
+clang idris2.c -o idris2 -I rts -L rts -lidris_rts -lpthread -lgmp -lm
diff --git a/dist/config.mk b/dist/config.mk
new file mode 100644
index 0000000..69f3183
--- /dev/null
+++ b/dist/config.mk
@@ -0,0 +1,29 @@
+RANLIB          ?=ranlib
+CFLAGS          :=-O2 -Wall -std=c99 -pipe -fdata-sections -ffunction-sections -D_POSIX_C_SOURCE=200809L -DHAS_PTHREAD $(CFLAGS)
+
+ifneq (, $(findstring bsd, $(MACHINE)))
+	GMP_INCLUDE_DIR      :=
+else
+	GMP_INCLUDE_DIR      :=-I/usr/local/include
+endif
+
+MACHINE         := $(shell $(CC) -dumpmachine)
+ifneq (, $(findstring darwin, $(MACHINE)))
+	OS      :=darwin
+else ifneq (, $(findstring cygwin, $(MACHINE)))
+	OS      :=windows
+else ifneq (, $(findstring mingw, $(MACHINE)))
+	OS      :=windows
+else ifneq (, $(findstring windows, $(MACHINE)))
+	OS      :=windows
+else
+	OS      :=unix
+endif
+
+ifeq ($(OS),darwin)
+	SHLIB_SUFFIX    :=.dylib
+else ifeq ($(OS),windows)
+	SHLIB_SUFFIX    :=.DLL
+else
+	SHLIB_SUFFIX    :=.so
+endif
diff --git a/dist/rts/Makefile b/dist/rts/Makefile
new file mode 100644
index 0000000..122677c
--- /dev/null
+++ b/dist/rts/Makefile
@@ -0,0 +1,41 @@
+include ../config.mk
+
+OBJS = idris_rts.o idris_heap.o idris_gc.o idris_gmp.o idris_bitstring.o \
+       idris_opts.o idris_stats.o idris_utf8.o idris_stdfgn.o \
+       idris_buffer.o getline.o idris_net.o
+HDRS = idris_rts.h idris_heap.h idris_gc.h idris_gmp.h idris_bitstring.h \
+       idris_opts.h idris_stats.h idris_stdfgn.h idris_net.h \
+       idris_buffer.h idris_utf8.h getline.h
+CFLAGS := $(CFLAGS)
+CFLAGS += $(GMP_INCLUDE_DIR) $(GMP) -DIDRIS_TARGET_OS="\"$(OS)\""
+CFLAGS += -DIDRIS_TARGET_TRIPLE="\"$(MACHINE)\""
+
+ifeq ($(OS), windows)
+	OBJS += windows/win_utils.o
+else
+	CFLAGS += -fPIC
+endif
+
+ifndef IDRIS_GMP
+	OBJS += mini-gmp.o
+	HDRS += mini-gmp.h
+endif
+
+LIBTARGET = libidris_rts.a
+
+build: $(LIBTARGET) $(DYLIBTARGET)
+
+$(LIBTARGET) : $(OBJS)
+	$(AR) rc $(LIBTARGET) $(OBJS)
+	$(RANLIB) $(LIBTARGET)
+
+install :
+	mkdir -p $(TARGET)
+	install $(LIBTARGET) $(HDRS) $(TARGET)
+
+clean :
+	rm -f $(OBJS) $(LIBTARGET) $(DYLIBTARGET)
+
+$(OBJS): $(HDRS)
+
+.PHONY: build install clean
diff --git a/dist/rts/getline.c b/dist/rts/getline.c
new file mode 100644
index 0000000..2e83528
--- /dev/null
+++ b/dist/rts/getline.c
@@ -0,0 +1,82 @@
+/*  $NetBSD: fgetln.c,v 1.9 2008/04/29 06:53:03 martin Exp $  */
+
+/*-
+ * Copyright (c) 2011 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Christos Zoulas.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+
+ssize_t
+getdelim(char **buf, size_t *bufsiz, int delimiter, FILE *fp)
+{
+    char *ptr, *eptr;
+
+    if (*buf == NULL || *bufsiz == 0) {
+        *bufsiz = BUFSIZ;
+        if ((*buf = malloc(*bufsiz)) == NULL)
+            return -1;
+    }
+
+    for (ptr = *buf, eptr = *buf + *bufsiz;;) {
+        int c = fgetc(fp);
+        if (c == -1) {
+            if (feof(fp)) {
+                *ptr = '\0';
+                return ptr - *buf;
+            } else {
+                return -1;
+            }
+        }
+        *ptr++ = c;
+        if (c == delimiter) {
+            *ptr = '\0';
+            return ptr - *buf;
+        }
+        if (ptr + 2 >= eptr) {
+            char *nbuf;
+            size_t nbufsiz = *bufsiz * 2;
+            ssize_t d = ptr - *buf;
+            if ((nbuf = realloc(*buf, nbufsiz)) == NULL)
+                return -1;
+            *buf = nbuf;
+            *bufsiz = nbufsiz;
+            eptr = nbuf + nbufsiz;
+            ptr = nbuf + d;
+        }
+    }
+}
+
+ssize_t
+getline(char **buf, size_t *bufsiz, FILE *fp)
+{
+  return getdelim(buf, bufsiz, '\n', fp);
+}
diff --git a/dist/rts/getline.h b/dist/rts/getline.h
new file mode 100644
index 0000000..a138834
--- /dev/null
+++ b/dist/rts/getline.h
@@ -0,0 +1,7 @@
+#ifndef GETLINE_H
+#define GETLINE_H
+#include <stdlib.h>
+#include <stdio.h>
+ssize_t getdelim(char **buf, size_t *bufsiz, int delimiter, FILE *fp);
+ssize_t getline(char **buf, size_t *bufsiz, FILE *fp);
+#endif // GETLINE_H
diff --git a/dist/rts/idris_bitstring.c b/dist/rts/idris_bitstring.c
new file mode 100644
index 0000000..d8f9ac6
--- /dev/null
+++ b/dist/rts/idris_bitstring.c
@@ -0,0 +1,593 @@
+#include <assert.h>
+
+#include "idris_rts.h"
+
+VAL idris_b8const(VM *vm, uint8_t a) {
+    return MKINT(a);
+}
+
+VAL idris_b16const(VM *vm, uint16_t a) {
+    return MKINT(a);
+}
+
+VAL idris_b32const(VM *vm, uint32_t a) {
+    Bits32 * cl = iallocate(vm, sizeof(*cl), 0);
+    SETTY(cl, CT_BITS32);
+    cl->bits32 = a;
+    return (VAL)cl;
+}
+
+VAL idris_b64const(VM *vm, uint64_t a) {
+    Bits64 * cl = iallocate(vm, sizeof(*cl), 0);
+    SETTY(cl, CT_BITS64);
+    cl->bits64 = a;
+    return (VAL)cl;
+}
+
+VAL idris_b8(VM *vm, VAL a) {
+    return idris_b8const(vm, GETINT(a));
+}
+
+VAL idris_b16(VM *vm, VAL a) {
+    return idris_b16const(vm, GETINT(a));
+}
+
+VAL idris_b32(VM *vm, VAL a) {
+    return idris_b32const(vm, GETINT(a));
+}
+
+VAL idris_b64(VM *vm, VAL a) {
+    return idris_b64const(vm, GETINT(a));
+}
+
+VAL idris_castB32Int(VM *vm, VAL a) {
+    return MKINT(GETBITS32(a));
+}
+
+VAL idris_b8Plus(VM *vm, VAL a, VAL b) {
+    uint8_t A = GETBITS8(a);
+    uint8_t B = GETBITS8(b);
+    return idris_b8const(vm, A + B);
+}
+
+VAL idris_b8Minus(VM *vm, VAL a, VAL b) {
+    uint8_t A = GETBITS8(a);
+    uint8_t B = GETBITS8(b);
+    return idris_b8const(vm, A - B);
+}
+
+VAL idris_b8Times(VM *vm, VAL a, VAL b) {
+    uint8_t A = GETBITS8(a);
+    uint8_t B = GETBITS8(b);
+    return idris_b8const(vm, A * B);
+}
+
+VAL idris_b8UDiv(VM *vm, VAL a, VAL b) {
+    uint8_t A = GETBITS8(a);
+    uint8_t B = GETBITS8(b);
+    return idris_b8const(vm, A / B);
+}
+
+VAL idris_b8SDiv(VM *vm, VAL a, VAL b) {
+    int8_t A = GETBITS8(a);
+    int8_t B = GETBITS8(b);
+    return idris_b8const(vm, A / B);
+}
+
+VAL idris_b8URem(VM *vm, VAL a, VAL b) {
+    uint8_t A = GETBITS8(a);
+    uint8_t B = GETBITS8(b);
+    return idris_b8const(vm, A % B);
+}
+
+VAL idris_b8SRem(VM *vm, VAL a, VAL b) {
+    int8_t A = GETBITS8(a);
+    int8_t B = GETBITS8(b);
+    return idris_b8const(vm, A % B);
+}
+
+VAL idris_b8Lt(VM *vm, VAL a, VAL b) {
+    return MKINT((i_int) (GETBITS8(a) < GETBITS8(b)));
+}
+
+VAL idris_b8Gt(VM *vm, VAL a, VAL b) {
+    return MKINT((i_int) (GETBITS8(a) > GETBITS8(b)));
+}
+
+VAL idris_b8Eq(VM *vm, VAL a, VAL b) {
+    return MKINT((i_int) (GETBITS8(a) == GETBITS8(b)));
+}
+
+VAL idris_b8Lte(VM *vm, VAL a, VAL b) {
+    return MKINT((i_int) (GETBITS8(a) <= GETBITS8(b)));
+}
+
+VAL idris_b8Gte(VM *vm, VAL a, VAL b) {
+    return MKINT((i_int) (GETBITS8(a) >= GETBITS8(b)));
+}
+
+VAL idris_b8Compl(VM *vm, VAL a) {
+    uint8_t A = GETBITS8(a);
+    return idris_b8const(vm, ~ A);
+}
+
+VAL idris_b8And(VM *vm, VAL a, VAL b) {
+    uint8_t A = GETBITS8(a);
+    uint8_t B = GETBITS8(b);
+    return idris_b8const(vm, A & B);
+}
+
+VAL idris_b8Or(VM *vm, VAL a, VAL b) {
+    uint8_t A = GETBITS8(a);
+    uint8_t B = GETBITS8(b);
+    return idris_b8const(vm, A | B);
+}
+
+VAL idris_b8Xor(VM *vm, VAL a, VAL b) {
+    uint8_t A = GETBITS8(a);
+    uint8_t B = GETBITS8(b);
+    return idris_b8const(vm, A ^ B);
+}
+
+VAL idris_b8Shl(VM *vm, VAL a, VAL b) {
+    uint8_t A = GETBITS8(a);
+    uint8_t B = GETBITS8(b);
+    return idris_b8const(vm, A << B);
+}
+
+VAL idris_b8LShr(VM *vm, VAL a, VAL b) {
+    uint8_t A = GETBITS8(a);
+    uint8_t B = GETBITS8(b);
+    return idris_b8const(vm, A >> B);
+}
+
+VAL idris_b8AShr(VM *vm, VAL a, VAL b) {
+    int8_t A = GETBITS8(a);
+    int8_t B = GETBITS8(b);
+    return idris_b8const(vm, A >> B);
+}
+
+VAL idris_b16Plus(VM *vm, VAL a, VAL b) {
+    uint16_t A = GETBITS16(a);
+    uint16_t B = GETBITS16(b);
+    return idris_b16const(vm, A + B);
+}
+
+VAL idris_b16Minus(VM *vm, VAL a, VAL b) {
+    uint16_t A = GETBITS16(a);
+    uint16_t B = GETBITS16(b);
+    return idris_b16const(vm, A - B);
+}
+
+VAL idris_b16Times(VM *vm, VAL a, VAL b) {
+    uint16_t A = GETBITS16(a);
+    uint16_t B = GETBITS16(b);
+    return idris_b16const(vm, A * B);
+}
+
+VAL idris_b16UDiv(VM *vm, VAL a, VAL b) {
+    uint16_t A = GETBITS16(a);
+    uint16_t B = GETBITS16(b);
+    return idris_b16const(vm, A / B);
+}
+
+VAL idris_b16SDiv(VM *vm, VAL a, VAL b) {
+    int16_t A = GETBITS16(a);
+    int16_t B = GETBITS16(b);
+    return idris_b16const(vm, A / B);
+}
+
+VAL idris_b16URem(VM *vm, VAL a, VAL b) {
+    uint16_t A = GETBITS16(a);
+    uint16_t B = GETBITS16(b);
+    return idris_b16const(vm, A % B);
+}
+
+VAL idris_b16SRem(VM *vm, VAL a, VAL b) {
+    int16_t A = GETBITS16(a);
+    int16_t B = GETBITS16(b);
+    return idris_b16const(vm, A % B);
+}
+
+VAL idris_b16Lt(VM *vm, VAL a, VAL b) {
+    return MKINT((i_int) (GETBITS16(a) < GETBITS16(b)));
+}
+
+VAL idris_b16Gt(VM *vm, VAL a, VAL b) {
+    return MKINT((i_int) (GETBITS16(a) > GETBITS16(b)));
+}
+
+VAL idris_b16Eq(VM *vm, VAL a, VAL b) {
+    return MKINT((i_int) (GETBITS16(a) == GETBITS16(b)));
+}
+
+VAL idris_b16Lte(VM *vm, VAL a, VAL b) {
+    return MKINT((i_int) (GETBITS16(a) <= GETBITS16(b)));
+}
+
+VAL idris_b16Gte(VM *vm, VAL a, VAL b) {
+    return MKINT((i_int) (GETBITS16(a) >= GETBITS16(b)));
+}
+
+VAL idris_b16Compl(VM *vm, VAL a) {
+    uint16_t A = GETBITS16(a);
+    return idris_b16const(vm, ~ A);
+}
+
+VAL idris_b16And(VM *vm, VAL a, VAL b) {
+    uint16_t A = GETBITS16(a);
+    uint16_t B = GETBITS16(b);
+    return idris_b16const(vm, A & B);
+}
+
+VAL idris_b16Or(VM *vm, VAL a, VAL b) {
+    uint16_t A = GETBITS16(a);
+    uint16_t B = GETBITS16(b);
+    return idris_b16const(vm, A | B);
+}
+
+VAL idris_b16Xor(VM *vm, VAL a, VAL b) {
+    uint16_t A = GETBITS16(a);
+    uint16_t B = GETBITS16(b);
+    return idris_b16const(vm, A ^ B);
+}
+
+VAL idris_b16Shl(VM *vm, VAL a, VAL b) {
+    uint16_t A = GETBITS16(a);
+    uint16_t B = GETBITS16(b);
+    return idris_b16const(vm, A << B);
+}
+
+VAL idris_b16LShr(VM *vm, VAL a, VAL b) {
+    uint16_t A = GETBITS16(a);
+    uint16_t B = GETBITS16(b);
+    return idris_b16const(vm, A >> B);
+}
+
+VAL idris_b16AShr(VM *vm, VAL a, VAL b) {
+    int16_t A = GETBITS16(a);
+    int16_t B = GETBITS16(b);
+    return idris_b16const(vm, A >> B);
+}
+
+VAL idris_b32Plus(VM *vm, VAL a, VAL b) {
+    uint32_t A = GETBITS32(a);
+    uint32_t B = GETBITS32(b);
+    return idris_b32const(vm, A + B);
+}
+
+VAL idris_b32Minus(VM *vm, VAL a, VAL b) {
+    uint32_t A = GETBITS32(a);
+    uint32_t B = GETBITS32(b);
+    return idris_b32const(vm, A - B);
+}
+
+VAL idris_b32Times(VM *vm, VAL a, VAL b) {
+    uint32_t A = GETBITS32(a);
+    uint32_t B = GETBITS32(b);
+    return idris_b32const(vm, A * B);
+}
+
+VAL idris_b32UDiv(VM *vm, VAL a, VAL b) {
+    uint32_t A = GETBITS32(a);
+    uint32_t B = GETBITS32(b);
+    return idris_b32const(vm, A / B);
+}
+
+VAL idris_b32SDiv(VM *vm, VAL a, VAL b) {
+    int32_t A = GETBITS32(a);
+    int32_t B = GETBITS32(b);
+    return idris_b32const(vm, A / B);
+}
+
+VAL idris_b32URem(VM *vm, VAL a, VAL b) {
+    uint32_t A = GETBITS32(a);
+    uint32_t B = GETBITS32(b);
+    return idris_b32const(vm, A % B);
+}
+
+VAL idris_b32SRem(VM *vm, VAL a, VAL b) {
+    int32_t A = GETBITS32(a);
+    int32_t B = GETBITS32(b);
+    return idris_b32const(vm, A % B);
+}
+
+VAL idris_b32Lt(VM *vm, VAL a, VAL b) {
+    return MKINT((i_int) (GETBITS32(a) < GETBITS32(b)));
+}
+
+VAL idris_b32Gt(VM *vm, VAL a, VAL b) {
+    return MKINT((i_int) (GETBITS32(a) > GETBITS32(b)));
+}
+
+VAL idris_b32Eq(VM *vm, VAL a, VAL b) {
+    return MKINT((i_int) (GETBITS32(a) == GETBITS32(b)));
+}
+
+VAL idris_b32Lte(VM *vm, VAL a, VAL b) {
+    return MKINT((i_int) (GETBITS32(a) <= GETBITS32(b)));
+}
+
+VAL idris_b32Gte(VM *vm, VAL a, VAL b) {
+    return MKINT((i_int) (GETBITS32(a) >= GETBITS32(b)));
+}
+
+VAL idris_b32Compl(VM *vm, VAL a) {
+    uint32_t A = GETBITS32(a);
+    return idris_b32const(vm, ~ A);
+}
+
+VAL idris_b32And(VM *vm, VAL a, VAL b) {
+    uint32_t A = GETBITS32(a);
+    uint32_t B = GETBITS32(b);
+    return idris_b32const(vm, A & B);
+}
+
+VAL idris_b32Or(VM *vm, VAL a, VAL b) {
+    uint32_t A = GETBITS32(a);
+    uint32_t B = GETBITS32(b);
+    return idris_b32const(vm, A | B);
+}
+
+VAL idris_b32Xor(VM *vm, VAL a, VAL b) {
+    uint32_t A = GETBITS32(a);
+    uint32_t B = GETBITS32(b);
+    return idris_b32const(vm, A ^ B);
+}
+
+VAL idris_b32Shl(VM *vm, VAL a, VAL b) {
+    uint32_t A = GETBITS32(a);
+    uint32_t B = GETBITS32(b);
+    return idris_b32const(vm, A << B);
+}
+
+VAL idris_b32LShr(VM *vm, VAL a, VAL b) {
+    uint32_t A = GETBITS32(a);
+    uint32_t B = GETBITS32(b);
+    return idris_b32const(vm, A >> B);
+}
+
+VAL idris_b32AShr(VM *vm, VAL a, VAL b) {
+    int32_t A = GETBITS32(a);
+    int32_t B = GETBITS32(b);
+    return idris_b32const(vm, A >> B);
+}
+
+VAL idris_b64Plus(VM *vm, VAL a, VAL b) {
+    uint64_t A = GETBITS64(a);
+    uint64_t B = GETBITS64(b);
+    return idris_b64const(vm, A + B);
+}
+
+VAL idris_b64Minus(VM *vm, VAL a, VAL b) {
+    uint64_t A = GETBITS64(a);
+    uint64_t B = GETBITS64(b);
+    return idris_b64const(vm, A - B);
+}
+
+VAL idris_b64Times(VM *vm, VAL a, VAL b) {
+    uint64_t A = GETBITS64(a);
+    uint64_t B = GETBITS64(b);
+    return idris_b64const(vm, A * B);
+}
+
+VAL idris_b64UDiv(VM *vm, VAL a, VAL b) {
+    uint64_t A = GETBITS64(a);
+    uint64_t B = GETBITS64(b);
+    return idris_b64const(vm, A / B);
+}
+
+VAL idris_b64SDiv(VM *vm, VAL a, VAL b) {
+    int64_t A = GETBITS64(a);
+    int64_t B = GETBITS64(b);
+    return idris_b64const(vm, A / B);
+}
+
+VAL idris_b64URem(VM *vm, VAL a, VAL b) {
+    uint64_t A = GETBITS64(a);
+    uint64_t B = GETBITS64(b);
+    return idris_b64const(vm, A % B);
+}
+
+VAL idris_b64SRem(VM *vm, VAL a, VAL b) {
+    int64_t A = GETBITS64(a);
+    int64_t B = GETBITS64(b);
+    return idris_b64const(vm, A % B);
+}
+
+VAL idris_b64Lt(VM *vm, VAL a, VAL b) {
+    uint64_t A = GETBITS64(a);
+    uint64_t B = GETBITS64(b);
+    return MKINT((i_int) (A < B));
+}
+
+VAL idris_b64Gt(VM *vm, VAL a, VAL b) {
+    uint64_t A = GETBITS64(a);
+    uint64_t B = GETBITS64(b);
+    return MKINT((i_int) (A > B));
+}
+
+VAL idris_b64Eq(VM *vm, VAL a, VAL b) {
+    uint64_t A = GETBITS64(a);
+    uint64_t B = GETBITS64(b);
+    return MKINT((i_int) (A == B));
+}
+
+VAL idris_b64Lte(VM *vm, VAL a, VAL b) {
+    uint64_t A = GETBITS64(a);
+    uint64_t B = GETBITS64(b);
+    return MKINT((i_int) (A <= B));
+}
+
+VAL idris_b64Gte(VM *vm, VAL a, VAL b) {
+    uint64_t A = GETBITS64(a);
+    uint64_t B = GETBITS64(b);
+    return MKINT((i_int) (A >= B));
+}
+
+VAL idris_b64Compl(VM *vm, VAL a) {
+    uint64_t A = GETBITS64(a);
+    return idris_b64const(vm, ~ A);
+}
+
+VAL idris_b64And(VM *vm, VAL a, VAL b) {
+    uint64_t A = GETBITS64(a);
+    uint64_t B = GETBITS64(b);
+    return idris_b64const(vm, A & B);
+}
+
+VAL idris_b64Or(VM *vm, VAL a, VAL b) {
+    uint64_t A = GETBITS64(a);
+    uint64_t B = GETBITS64(b);
+    return idris_b64const(vm, A | B);
+}
+
+VAL idris_b64Xor(VM *vm, VAL a, VAL b) {
+    uint64_t A = GETBITS64(a);
+    uint64_t B = GETBITS64(b);
+    return idris_b64const(vm, A ^ B);
+}
+
+VAL idris_b64Shl(VM *vm, VAL a, VAL b) {
+    uint64_t A = GETBITS64(a);
+    uint64_t B = GETBITS64(b);
+    return idris_b64const(vm, A << B);
+}
+
+VAL idris_b64LShr(VM *vm, VAL a, VAL b) {
+    uint64_t A = GETBITS64(a);
+    uint64_t B = GETBITS64(b);
+    return idris_b64const(vm, A >> B);
+}
+
+VAL idris_b64AShr(VM *vm, VAL a, VAL b) {
+    int64_t A = GETBITS64(a);
+    int64_t B = GETBITS64(b);
+    return idris_b64const(vm, A >> B);
+}
+
+VAL idris_b8Z16(VM *vm, VAL a) {
+    uint8_t A = GETBITS8(a);
+    return idris_b16const(vm, A);
+}
+
+VAL idris_b8Z32(VM *vm, VAL a) {
+    uint8_t A = GETBITS8(a);
+    return idris_b32const(vm, A);
+}
+
+VAL idris_b8Z64(VM *vm, VAL a) {
+    uint8_t A = GETBITS8(a);
+    return idris_b64const(vm, A);
+}
+
+VAL idris_b8S16(VM *vm, VAL a) {
+    int8_t A = GETBITS8(a);
+    return idris_b16const(vm, (int16_t) A);
+}
+
+VAL idris_b8S32(VM *vm, VAL a) {
+    int8_t A = GETBITS8(a);
+    return idris_b32const(vm, (int32_t) A);
+}
+
+VAL idris_b8S64(VM *vm, VAL a) {
+    int8_t A = GETBITS8(a);
+    return idris_b64const(vm, (int64_t) A);
+}
+
+VAL idris_b16Z32(VM *vm, VAL a) {
+    uint16_t A = GETBITS16(a);
+    return idris_b32const(vm, (uint32_t) A);
+}
+
+VAL idris_b16Z64(VM *vm, VAL a) {
+    uint16_t A = GETBITS16(a);
+    return idris_b64const(vm, (uint64_t) A);
+}
+
+VAL idris_b16S32(VM *vm, VAL a) {
+    int16_t A = GETBITS16(a);
+    return idris_b32const(vm, (int32_t) A);
+}
+
+VAL idris_b16S64(VM *vm, VAL a) {
+    int16_t A = GETBITS16(a);
+    return idris_b64const(vm, (int64_t) A);
+}
+
+VAL idris_b16T8(VM *vm, VAL a) {
+    uint16_t A = GETBITS16(a);
+    return idris_b8const(vm, (uint8_t) A);
+}
+
+VAL idris_b32Z64(VM *vm, VAL a) {
+    uint32_t A = GETBITS32(a);
+    return idris_b64const(vm, (uint64_t) A);
+}
+
+VAL idris_b32S64(VM *vm, VAL a) {
+    int32_t A = GETBITS32(a);
+    return idris_b64const(vm, (int64_t) A);
+}
+
+VAL idris_b32T8(VM *vm, VAL a) {
+    uint32_t A = GETBITS32(a);
+    return idris_b8const(vm, (uint8_t) A);
+}
+
+VAL idris_b32T16(VM *vm, VAL a) {
+    uint32_t A = GETBITS32(a);
+    return idris_b16const(vm, (uint16_t) A);
+}
+
+VAL idris_b64T8(VM *vm, VAL a) {
+    uint64_t A = GETBITS64(a);
+    return idris_b8const(vm, (uint8_t) A);
+}
+
+VAL idris_b64T16(VM *vm, VAL a) {
+    uint64_t A = GETBITS64(a);
+    return idris_b16const(vm, (uint16_t) A);
+}
+
+VAL idris_b64T32(VM *vm, VAL a) {
+    uint64_t A = GETBITS64(a);
+    return idris_b32const(vm, (uint32_t) A);
+}
+
+VAL idris_peekB8(VM* vm, VAL ptr, VAL offset) {
+    return MKB8(vm, *(uint8_t*)((char *)GETPTR(ptr) + GETINT(offset)));
+}
+
+VAL idris_pokeB8(VAL ptr, VAL offset, VAL data) {
+    *(uint8_t*)((char *)GETPTR(ptr) + GETINT(offset)) = GETBITS8(data);
+    return MKINT(0);
+}
+
+VAL idris_peekB16(VM* vm, VAL ptr, VAL offset) {
+    return MKB16(vm, *(uint16_t*)((char *)GETPTR(ptr) + GETINT(offset)));
+}
+
+VAL idris_pokeB16(VAL ptr, VAL offset, VAL data) {
+    *(uint16_t*)((char *)GETPTR(ptr) + GETINT(offset)) = GETBITS16(data);
+    return MKINT(0);
+}
+
+VAL idris_peekB32(VM* vm, VAL ptr, VAL offset) {
+    return MKB32(vm, *(uint32_t*)((char *)GETPTR(ptr) + GETINT(offset)));
+}
+
+VAL idris_pokeB32(VAL ptr, VAL offset, VAL data) {
+    *(uint32_t*)((char *)GETPTR(ptr) + GETINT(offset)) = GETBITS32(data);
+    return MKINT(0);
+}
+
+VAL idris_peekB64(VM* vm, VAL ptr, VAL offset) {
+    return MKB64(vm, *(uint64_t*)((char *)GETPTR(ptr) + GETINT(offset)));
+}
+
+VAL idris_pokeB64(VAL ptr, VAL offset, VAL data) {
+    *(uint64_t*)((char *)GETPTR(ptr) + GETINT(offset)) = GETBITS64(data);
+    return MKINT(0);
+}
diff --git a/dist/rts/idris_bitstring.h b/dist/rts/idris_bitstring.h
new file mode 100644
index 0000000..cba88d7
--- /dev/null
+++ b/dist/rts/idris_bitstring.h
@@ -0,0 +1,134 @@
+#ifndef _IDRISBITSTRING_H
+#define _IDRISBITSTRING_H
+
+VAL idris_b8(VM *vm, VAL a);
+VAL idris_b16(VM *vm, VAL a);
+VAL idris_b32(VM *vm, VAL a);
+VAL idris_b64(VM *vm, VAL a);
+VAL idris_castB32Int(VM *vm, VAL a);
+VAL idris_b8const(VM *vm, uint8_t a);
+VAL idris_b16const(VM *vm, uint16_t a);
+VAL idris_b32const(VM *vm, uint32_t a);
+VAL idris_b64const(VM *vm, uint64_t a);
+
+VAL idris_b8Plus(VM *vm, VAL a, VAL b);
+VAL idris_b8Minus(VM *vm, VAL a, VAL b);
+VAL idris_b8Times(VM *vm, VAL a, VAL b);
+VAL idris_b8UDiv(VM *vm, VAL a, VAL b);
+VAL idris_b8SDiv(VM *vm, VAL a, VAL b);
+VAL idris_b8URem(VM *vm, VAL a, VAL b);
+VAL idris_b8SRem(VM *vm, VAL a, VAL b);
+VAL idris_b8Lt(VM *vm, VAL a, VAL b);
+VAL idris_b8Gt(VM *vm, VAL a, VAL b);
+VAL idris_b8Eq(VM *vm, VAL a, VAL b);
+VAL idris_b8Lte(VM *vm, VAL a, VAL b);
+VAL idris_b8Gte(VM *vm, VAL a, VAL b);
+VAL idris_b8Compl(VM *vm, VAL a);
+VAL idris_b8And(VM *vm, VAL a, VAL b);
+VAL idris_b8Or(VM *vm, VAL a, VAL b);
+VAL idris_b8Neg(VM *vm, VAL a);
+VAL idris_b8Xor(VM *vm, VAL a, VAL b);
+VAL idris_b8Shl(VM *vm, VAL a, VAL b);
+VAL idris_b8LShr(VM *vm, VAL a, VAL b);
+VAL idris_b8AShr(VM *vm, VAL a, VAL b);
+
+VAL idris_b16Eq(VM *vm, VAL a, VAL b);
+VAL idris_b32Eq(VM *vm, VAL a, VAL b);
+VAL idris_b64Eq(VM *vm, VAL a, VAL b);
+
+VAL idris_b8Z16(VM *vm, VAL a);
+VAL idris_b8Z32(VM *vm, VAL a);
+VAL idris_b8Z64(VM *vm, VAL a);
+VAL idris_b8S16(VM *vm, VAL a);
+VAL idris_b8S32(VM *vm, VAL a);
+VAL idris_b8S64(VM *vm, VAL a);
+
+VAL idris_b16Plus(VM *vm, VAL a, VAL b);
+VAL idris_b16Minus(VM *vm, VAL a, VAL b);
+VAL idris_b16Times(VM *vm, VAL a, VAL b);
+VAL idris_b16UDiv(VM *vm, VAL a, VAL b);
+VAL idris_b16SDiv(VM *vm, VAL a, VAL b);
+VAL idris_b16URem(VM *vm, VAL a, VAL b);
+VAL idris_b16SRem(VM *vm, VAL a, VAL b);
+VAL idris_b16Lt(VM *vm, VAL a, VAL b);
+VAL idris_b16Gt(VM *vm, VAL a, VAL b);
+VAL idris_b16Eq(VM *vm, VAL a, VAL b);
+VAL idris_b16Lte(VM *vm, VAL a, VAL b);
+VAL idris_b16Gte(VM *vm, VAL a, VAL b);
+VAL idris_b16Compl(VM *vm, VAL a);
+VAL idris_b16And(VM *vm, VAL a, VAL b);
+VAL idris_b16Or(VM *vm, VAL a, VAL b);
+VAL idris_b16Neg(VM *vm, VAL a);
+VAL idris_b16Xor(VM *vm, VAL a, VAL b);
+VAL idris_b16Shl(VM *vm, VAL a, VAL b);
+VAL idris_b16LShr(VM *vm, VAL a, VAL b);
+VAL idris_b16AShr(VM *vm, VAL a, VAL b);
+
+VAL idris_b16Z32(VM *vm, VAL a);
+VAL idris_b16Z64(VM *vm, VAL a);
+VAL idris_b16S32(VM *vm, VAL a);
+VAL idris_b16S64(VM *vm, VAL a);
+VAL idris_b16T8(VM *vm, VAL a);
+
+VAL idris_b32Plus(VM *vm, VAL a, VAL b);
+VAL idris_b32Minus(VM *vm, VAL a, VAL b);
+VAL idris_b32Times(VM *vm, VAL a, VAL b);
+VAL idris_b32UDiv(VM *vm, VAL a, VAL b);
+VAL idris_b32SDiv(VM *vm, VAL a, VAL b);
+VAL idris_b32URem(VM *vm, VAL a, VAL b);
+VAL idris_b32SRem(VM *vm, VAL a, VAL b);
+VAL idris_b32Lt(VM *vm, VAL a, VAL b);
+VAL idris_b32Gt(VM *vm, VAL a, VAL b);
+VAL idris_b32Eq(VM *vm, VAL a, VAL b);
+VAL idris_b32Lte(VM *vm, VAL a, VAL b);
+VAL idris_b32Gte(VM *vm, VAL a, VAL b);
+VAL idris_b32Compl(VM *vm, VAL a);
+VAL idris_b32And(VM *vm, VAL a, VAL b);
+VAL idris_b32Or(VM *vm, VAL a, VAL b);
+VAL idris_b32Neg(VM *vm, VAL a);
+VAL idris_b32Xor(VM *vm, VAL a, VAL b);
+VAL idris_b32Shl(VM *vm, VAL a, VAL b);
+VAL idris_b32LShr(VM *vm, VAL a, VAL b);
+VAL idris_b32AShr(VM *vm, VAL a, VAL b);
+
+VAL idris_b32Z64(VM *vm, VAL a);
+VAL idris_b32S64(VM *vm, VAL a);
+VAL idris_b32T8(VM *vm, VAL a);
+VAL idris_b32T16(VM *vm, VAL a);
+
+VAL idris_b64Plus(VM *vm, VAL a, VAL b);
+VAL idris_b64Minus(VM *vm, VAL a, VAL b);
+VAL idris_b64Times(VM *vm, VAL a, VAL b);
+VAL idris_b64UDiv(VM *vm, VAL a, VAL b);
+VAL idris_b64SDiv(VM *vm, VAL a, VAL b);
+VAL idris_b64URem(VM *vm, VAL a, VAL b);
+VAL idris_b64SRem(VM *vm, VAL a, VAL b);
+VAL idris_b64Lt(VM *vm, VAL a, VAL b);
+VAL idris_b64Gt(VM *vm, VAL a, VAL b);
+VAL idris_b64Eq(VM *vm, VAL a, VAL b);
+VAL idris_b64Lte(VM *vm, VAL a, VAL b);
+VAL idris_b64Gte(VM *vm, VAL a, VAL b);
+VAL idris_b64Compl(VM *vm, VAL a);
+VAL idris_b64And(VM *vm, VAL a, VAL b);
+VAL idris_b64Or(VM *vm, VAL a, VAL b);
+VAL idris_b64Neg(VM *vm, VAL a);
+VAL idris_b64Xor(VM *vm, VAL a, VAL b);
+VAL idris_b64Shl(VM *vm, VAL a, VAL b);
+VAL idris_b64LShr(VM *vm, VAL a, VAL b);
+VAL idris_b64AShr(VM *vm, VAL a, VAL b);
+
+VAL idris_b64T8(VM *vm, VAL a);
+VAL idris_b64T16(VM *vm, VAL a);
+VAL idris_b64T32(VM *vm, VAL a);
+
+// memory access
+VAL idris_peekB8(VM* vm, VAL ptr, VAL offset);
+VAL idris_pokeB8(VAL ptr, VAL offset, VAL data);
+VAL idris_peekB16(VM* vm, VAL ptr, VAL offset);
+VAL idris_pokeB16(VAL ptr, VAL offset, VAL data);
+VAL idris_peekB32(VM* vm, VAL ptr, VAL offset);
+VAL idris_pokeB32(VAL ptr, VAL offset, VAL data);
+VAL idris_peekB64(VM* vm, VAL ptr, VAL offset);
+VAL idris_pokeB64(VAL ptr, VAL offset, VAL data);
+
+#endif
diff --git a/dist/rts/idris_buffer.c b/dist/rts/idris_buffer.c
new file mode 100644
index 0000000..ec95341
--- /dev/null
+++ b/dist/rts/idris_buffer.c
@@ -0,0 +1,146 @@
+#include "idris_rts.h"
+#include "idris_buffer.h"
+
+typedef struct {
+    int size;
+    uint8_t data[0];
+} Buffer;
+
+VAL idris_newBuffer(VM* vm, int bytes) {
+    size_t size = sizeof(Buffer) + bytes*sizeof(uint8_t);
+
+    Buffer* buf = malloc(size);
+    if (buf == NULL) {
+        return NULL;
+    }
+
+    buf->size = bytes;
+    memset(buf->data, 0, bytes);
+
+    void* retbuf = MKMPTR(vm, buf, size);
+    free(buf);
+    return retbuf;
+}
+
+void idris_copyBuffer(void* from, int start, int len,
+                      void* to, int loc) {
+    Buffer* bfrom = from;
+    Buffer* bto = to;
+
+    if (loc >= 0 && loc+len <= bto->size) {
+        memcpy(bto->data + loc, bfrom->data + start, len);
+    }
+}
+
+int idris_getBufferSize(void* buffer) {
+    return ((Buffer*)buffer)->size;
+}
+
+void idris_setBufferByte(void* buffer, int loc, uint8_t byte) {
+    Buffer* b = buffer;
+    if (loc >= 0 && loc < b->size) {
+        b->data[loc] = byte;
+    }
+}
+
+void idris_setBufferInt(void* buffer, int loc, int val) {
+    Buffer* b = buffer;
+    if (loc >= 0 && loc+3 < b->size) {
+        b->data[loc] = val & 0xff;
+        b->data[loc+1] = (val >> 8) & 0xff;
+        b->data[loc+2] = (val >> 16) & 0xff;
+        b->data[loc+3] = (val >> 24) & 0xff;
+    }
+}
+
+void idris_setBufferDouble(void* buffer, int loc, double val) {
+    Buffer* b = buffer;
+    // I am not proud of this
+    if (loc >= 0 && loc + sizeof(double) <= b->size) {
+        unsigned char* c = (unsigned char*)(& val);
+        int i;
+        for (i = 0; i < sizeof(double); ++i) {
+            b->data[loc+i] = c[i];
+        }
+    }
+}
+
+void idris_setBufferString(void* buffer, int loc, char* str) {
+    Buffer* b = buffer;
+    int len = strlen(str);
+
+    if (loc >= 0 && loc+len <= b->size) {
+        memcpy((b->data)+loc, str, len);
+    }
+}
+
+uint8_t idris_getBufferByte(void* buffer, int loc) {
+    Buffer* b = buffer;
+    if (loc >= 0 && loc < b->size) {
+        return b->data[loc];
+    } else {
+        return 0;
+    }
+}
+
+int idris_getBufferInt(void* buffer, int loc) {
+    Buffer* b = buffer;
+    if (loc >= 0 && loc+3 < b->size) {
+        return b->data[loc] +
+               (b->data[loc+1] << 8) +
+               (b->data[loc+2] << 16) +
+               (b->data[loc+3] << 24);
+    } else {
+        return 0;
+    }
+}
+
+double idris_getBufferDouble(void* buffer, int loc) {
+    Buffer* b = buffer;
+    double d;
+    // I am even less proud of this
+    unsigned char *c = (unsigned char*)(& d);
+    if (loc >= 0 && loc + sizeof(double) <= b->size) {
+        int i;
+        for (i = 0; i < sizeof(double); ++i) {
+            c[i] = b->data[loc+i];
+        }
+        return d;
+    }
+    else {
+        return 0;
+    }
+}
+
+VAL idris_getBufferString(void* buffer, int loc, int len) {
+    Buffer* b = buffer;
+    char * s = (char*)(b->data + loc);
+    size_t sz = loc >= 0 && loc+len <= b->size? len : 0;
+    return MKSTRlen(get_vm(), s, sz);
+}
+
+int idris_readBuffer(FILE* h, void* buffer, int loc, int max) {
+    Buffer* b = buffer;
+    size_t len;
+
+    if (loc >= 0 && loc < b->size) {
+        if (loc + max > b->size) {
+            max = b->size - loc;
+        }
+        len = fread((b->data)+loc, sizeof(uint8_t), (size_t)max, h);
+        return len;
+    } else {
+        return 0;
+    }
+}
+
+void idris_writeBuffer(FILE* h, void* buffer, int loc, int len) {
+    Buffer* b = buffer;
+
+    if (loc >= 0 && loc < b->size) {
+        if (loc + len > b->size) {
+            len = b->size - loc;
+        }
+        fwrite((b->data)+loc, sizeof(uint8_t), len, h);
+    }
+}
diff --git a/dist/rts/idris_buffer.h b/dist/rts/idris_buffer.h
new file mode 100644
index 0000000..93dbf77
--- /dev/null
+++ b/dist/rts/idris_buffer.h
@@ -0,0 +1,29 @@
+#ifndef __BUFFER_H
+#define __BUFFER_H
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include "idris_rts.h"
+
+VAL idris_newBuffer(VM* vm, int bytes);
+
+int idris_getBufferSize(void* buffer);
+
+void idris_setBufferByte(void* buffer, int loc, uint8_t byte);
+void idris_setBufferInt(void* buffer, int loc, int val);
+void idris_setBufferDouble(void* buffer, int loc, double val);
+void idris_setBufferString(void* buffer, int loc, char* str);
+
+void idris_copyBuffer(void* from, int start, int len,
+                      void* to, int loc);
+
+int idris_readBuffer(FILE* h, void* buffer, int loc, int max);
+void idris_writeBuffer(FILE* h, void* buffer, int loc, int len);
+
+uint8_t idris_getBufferByte(void* buffer, int loc);
+int idris_getBufferInt(void* buffer, int loc);
+double idris_getBufferDouble(void* buffer, int loc);
+VAL idris_getBufferString(void* buffer, int loc, int len);
+
+#endif
diff --git a/dist/rts/idris_gc.c b/dist/rts/idris_gc.c
new file mode 100644
index 0000000..3f5a5f1
--- /dev/null
+++ b/dist/rts/idris_gc.c
@@ -0,0 +1,156 @@
+#include "idris_heap.h"
+#include "idris_rts.h"
+#include "idris_gc.h"
+#include "idris_bitstring.h"
+#include <assert.h>
+
+static inline VAL copy_plain(VM* vm, VAL x, size_t sz) {
+    VAL cl = iallocate(vm, sz, 1);
+    memcpy(cl, x, sz);
+    return cl;
+}
+
+VAL copy(VM* vm, VAL x) {
+    int ar;
+    VAL cl;
+    if (x==NULL) {
+        return x;
+    }
+    switch(GETTY(x)) {
+    case CT_INT: return x;
+    case CT_BITS32: return copy_plain(vm, x, sizeof(Bits32));
+    case CT_BITS64: return copy_plain(vm, x, sizeof(Bits64));
+    case CT_FLOAT: return copy_plain(vm, x, sizeof(Float));
+    case CT_FWD:
+        return GETPTR(x);
+    case CT_CDATA:
+        cl = copy_plain(vm, x, sizeof(CDataC));
+        c_heap_mark_item(GETCDATA(x));
+        break;
+    case CT_BIGINT:
+        cl = MKBIGMc(vm, GETMPZ(x));
+        break;
+    case CT_CON:
+        ar = CARITY(x);
+        if (ar == 0 && CTAG(x) < 256) {
+            return x;
+        }
+        // FALLTHROUGH
+    case CT_ARRAY:
+    case CT_STRING:
+    case CT_REF:
+    case CT_STROFFSET:
+    case CT_PTR:
+    case CT_MANAGEDPTR:
+    case CT_RAWDATA:
+        cl = copy_plain(vm, x, x->hdr.sz);
+        break;
+    default:
+        cl = NULL;
+        assert(0);
+        break;
+    }
+    assert(x->hdr.sz >= sizeof(Fwd));
+    SETTY(x, CT_FWD);
+    ((Fwd*)x)->fwd = cl;
+    return cl;
+}
+
+void cheney(VM *vm) {
+    char* scan = aligned_heap_pointer(vm->heap.heap);
+
+    while(scan < vm->heap.next) {
+       VAL heap_item = (VAL)scan;
+       // If it's a CT_CON, CT_REF or CT_STROFFSET, copy its arguments
+       switch(GETTY(heap_item)) {
+       case CT_CON:
+           {
+               Con * c = (Con*)heap_item;
+               size_t len = CARITY(c);
+               for(size_t i = 0; i < len; ++i)
+                   c->args[i] = copy(vm, c->args[i]);
+           }
+           break;
+       case CT_ARRAY:
+           {
+               Array * a = (Array*)heap_item;
+               size_t len = CELEM(a);
+               for(size_t i = 0; i < len; ++i)
+                   a->array[i] = copy(vm, a->array[i]);
+           }
+           break;
+       case CT_REF:
+           {
+               Ref * r = (Ref*)heap_item;
+               r->ref = copy(vm, r->ref);
+           }
+           break;
+       case CT_STROFFSET:
+           {
+               StrOffset * s = (StrOffset*)heap_item;
+               s->base = (String*)copy(vm, (VAL)s->base);
+           }
+           break;
+       default: // Nothing to copy
+           break;
+       }
+       scan += aligned(valSize(heap_item));
+    }
+    assert(scan == vm->heap.next);
+}
+
+void idris_gc(VM* vm) {
+    HEAP_CHECK(vm)
+    STATS_ENTER_GC(vm->stats, vm->heap.size)
+
+    if (vm->heap.old != NULL)
+        free(vm->heap.old);
+
+    /* Allocate swap heap. */
+    alloc_heap(&vm->heap, vm->heap.size, vm->heap.growth, vm->heap.heap);
+
+    VAL* root;
+
+    for(root = vm->valstack; root < vm->valstack_top; ++root) {
+        *root = copy(vm, *root);
+    }
+
+#ifdef HAS_PTHREAD
+    Msg* msg;
+
+    for(msg = vm->inbox; msg < vm->inbox_write; ++msg) {
+        msg->msg = copy(vm, msg->msg);
+    }
+#endif
+
+    vm->ret = copy(vm, vm->ret);
+    vm->reg1 = copy(vm, vm->reg1);
+
+    cheney(vm);
+
+    // After reallocation, if we've still more than half filled the new heap, grow the heap
+    // for next time.
+
+    if ((vm->heap.next - vm->heap.heap) > vm->heap.size >> 1) {
+        vm->heap.size += vm->heap.growth;
+    }
+
+    // finally, sweep the C heap
+    c_heap_sweep(&vm->c_heap);
+
+    STATS_LEAVE_GC(vm->stats, vm->heap.size, vm->heap.next - vm->heap.heap)
+    HEAP_CHECK(vm)
+}
+
+void idris_gcInfo(VM* vm, int doGC) {
+    printf("Stack: <BOT %p> <TOP %p>\n", vm->valstack, vm->valstack_top);
+    printf("Final heap size         %zd\n", vm->heap.size);
+    printf("Final heap use          %zd\n", vm->heap.next - vm->heap.heap);
+    if (doGC) { idris_gc(vm); }
+    printf("Final heap use after GC %zd\n", vm->heap.next - vm->heap.heap);
+#ifdef IDRIS_ENABLE_STATS
+    printf("Total allocations       %" PRIu64 "\n", vm->stats.allocations);
+#endif
+    printf("Number of collections   %" PRIu32 "\n", vm->stats.collections);
+
+}
diff --git a/dist/rts/idris_gc.h b/dist/rts/idris_gc.h
new file mode 100644
index 0000000..352f111
--- /dev/null
+++ b/dist/rts/idris_gc.h
@@ -0,0 +1,9 @@
+#ifndef _IDRISGC_H
+#define _IDRISGC_H
+
+#include "idris_rts.h"
+
+void idris_gc(VM* vm);
+void idris_gcInfo(VM* vm, int doGC);
+
+#endif
diff --git a/dist/rts/idris_gmp.c b/dist/rts/idris_gmp.c
new file mode 100644
index 0000000..1b908d7
--- /dev/null
+++ b/dist/rts/idris_gmp.c
@@ -0,0 +1,377 @@
+#include "idris_rts.h"
+#ifdef IDRIS_GMP
+#include <gmp.h>
+#else
+#include "mini-gmp.h"
+#endif
+#include <stdlib.h>
+#include <string.h>
+
+// TMP HACK! Require this much space in the heap before a GMP operation
+// so it doesn't garbage collect in the middle.
+// This is highly dodgy and needs to be done better because who knows if
+// GMP will need to allocate more than 64k... better to work out how
+// much space is needed (or find another way of preventing copying)
+#define IDRIS_MAXGMP 65536
+
+void init_gmpalloc(void) {
+    mp_set_memory_functions(idris_alloc, idris_realloc, idris_free);
+}
+
+VAL MKBIGI(int val) {
+    return MKINT((i_int)val);
+}
+
+static BigInt * allocBig(VM * vm) {
+    idris_requireAlloc(vm, IDRIS_MAXGMP);
+    BigInt * cl = iallocate(vm, sizeof(*cl) + sizeof(mpz_t), 0);
+    idris_doneAlloc(vm);
+    SETTY(cl, CT_BIGINT);
+    mpz_init(*getmpz(cl));
+    return cl;
+}
+
+VAL MKBIGC(VM* vm, char* val) {
+    if (*val == '\0') {
+        return MKBIGI(0);
+    }
+    else {
+        BigInt * cl = allocBig(vm);
+        mpz_set_str(*getmpz(cl), val, 10);
+        return (VAL)cl;
+    }
+}
+
+VAL MKBIGM(VM* vm, void* ibig) {
+    BigInt * cl = allocBig(vm);
+    mpz_set(*getmpz(cl), *((mpz_t*)ibig));
+    return (VAL)cl;
+}
+
+VAL MKBIGMc(VM* vm, void* ibig) {
+    BigInt * cl = allocBig(vm);
+    mpz_init_set(*getmpz(cl), *((mpz_t*)ibig));
+    return (VAL)cl;
+}
+
+VAL MKBIGUI(VM* vm, unsigned long val) {
+    BigInt * cl = allocBig(vm);
+    mpz_init_set_ui(*getmpz(cl), val);
+    return (VAL)cl;
+}
+
+VAL MKBIGSI(VM* vm, signed long val) {
+    BigInt * cl = allocBig(vm);
+    mpz_init_set_si(*getmpz(cl), val);
+    return (VAL)cl;
+}
+
+static BigInt * getbig(VM * vm, VAL x) {
+    switch(GETTY(x)) {
+    case CT_INT:
+        {
+            BigInt * cl = allocBig(vm);
+            mpz_set_si(*getmpz(cl), GETINT(x));
+            return cl;
+        }
+    case CT_FWD:
+        return getbig(vm, ((Fwd*)x)->fwd);
+    default:
+        return (BigInt*)x;
+    }
+}
+
+#define GETBIG (VAL)getbig
+
+VAL bigAdd(VM* vm, VAL x, VAL y) {
+    BigInt * cl = allocBig(vm);
+    mpz_add(*getmpz(cl), *getmpz(getbig(vm,x)), *getmpz(getbig(vm,y)));
+    return (VAL)cl;
+}
+
+VAL bigSub(VM* vm, VAL x, VAL y) {
+    BigInt * cl = allocBig(vm);
+    mpz_sub(*getmpz(cl), *getmpz(getbig(vm,x)), *getmpz(getbig(vm,y)));
+    return (VAL)cl;
+}
+
+VAL bigMul(VM* vm, VAL x, VAL y) {
+    BigInt * cl = allocBig(vm);
+    mpz_mul(*getmpz(cl), *getmpz(getbig(vm,x)), *getmpz(getbig(vm,y)));
+    return (VAL)cl;
+}
+
+VAL bigDiv(VM* vm, VAL x, VAL y) {
+    BigInt * cl = allocBig(vm);
+    mpz_tdiv_q(*getmpz(cl), *getmpz(getbig(vm,x)), *getmpz(getbig(vm,y)));
+    return (VAL)cl;
+}
+
+VAL bigMod(VM* vm, VAL x, VAL y) {
+    BigInt * cl = allocBig(vm);
+    mpz_tdiv_r(*getmpz(cl), *getmpz(getbig(vm,x)), *getmpz(getbig(vm,y)));
+    return (VAL)cl;
+}
+
+VAL bigAnd(VM* vm, VAL x, VAL y) {
+    BigInt * cl = allocBig(vm);
+    mpz_and(*getmpz(cl), *getmpz(getbig(vm,x)), *getmpz(getbig(vm,y)));
+    return (VAL)cl;
+}
+
+VAL bigOr(VM* vm, VAL x, VAL y) {
+    BigInt * cl = allocBig(vm);
+    mpz_ior(*getmpz(cl), *getmpz(getbig(vm,x)), *getmpz(getbig(vm,y)));
+    return (VAL)cl;
+}
+
+VAL bigShiftLeft(VM* vm, VAL x, VAL y) {
+    BigInt * cl = allocBig(vm);
+    mpz_mul_2exp(*getmpz(cl), *getmpz(getbig(vm,x)), GETINT(y));
+    return (VAL)cl;
+}
+
+
+VAL bigLShiftRight(VM* vm, VAL x, VAL y) {
+    BigInt * cl = allocBig(vm);
+    mpz_fdiv_q_2exp(*getmpz(cl), *getmpz(getbig(vm,x)), GETINT(y));
+    return (VAL)cl;
+}
+
+VAL bigAShiftRight(VM* vm, VAL x, VAL y) {
+    BigInt * cl = allocBig(vm);
+    mpz_fdiv_q_2exp(*getmpz(cl), *getmpz(getbig(vm,x)), GETINT(y));
+    return (VAL)cl;
+}
+
+VAL idris_bigAnd(VM* vm, VAL x, VAL y) {
+    if (ISINT(x) && ISINT(y)) {
+        return INTOP(&, x, y);
+    } else {
+        return bigAnd(vm, GETBIG(vm, x), GETBIG(vm, y));
+    }
+}
+
+VAL idris_bigOr(VM* vm, VAL x, VAL y) {
+    if (ISINT(x) && ISINT(y)) {
+        return INTOP(|, x, y);
+    } else {
+        return bigOr(vm, GETBIG(vm, x), GETBIG(vm, y));
+    }
+}
+
+VAL idris_bigPlus(VM* vm, VAL x, VAL y) {
+    if (ISINT(x) && ISINT(y)) {
+        i_int vx = GETINT(x);
+        i_int vy = GETINT(y);
+        if ((vx <= 0 && vy >=0) || (vx >=0 && vy <=0)) {
+            return ADD(x, y);
+        }
+        i_int res = vx + vy;
+        if (res >= 1<<30 || res <= -(1 << 30)) {
+            return bigAdd(vm, GETBIG(vm, x), GETBIG(vm, y));
+        } else {
+            return MKINT(res);
+        }
+    } else {
+        return bigAdd(vm, GETBIG(vm, x), GETBIG(vm, y));
+    }
+}
+
+VAL idris_bigMinus(VM* vm, VAL x, VAL y) {
+    if (ISINT(x) && ISINT(y)) {
+        i_int vx = GETINT(x);
+        i_int vy = GETINT(y);
+        if ((vx <= 0 && vy <=0) || (vx >=0 && vy <=0)) {
+            return INTOP(-, x, y);
+        }
+        i_int res = vx - vy;
+        if (res >= 1<<30 || res <= -(1 << 30)) {
+            return bigSub(vm, GETBIG(vm, x), GETBIG(vm, y));
+        } else {
+            return MKINT(res);
+        }
+    } else {
+        return bigSub(vm, GETBIG(vm, x), GETBIG(vm, y));
+    }
+}
+
+VAL idris_bigTimes(VM* vm, VAL x, VAL y) {
+    if (ISINT(x) && ISINT(y)) {
+        i_int vx = GETINT(x);
+        i_int vy = GETINT(y);
+        // we could work out likelihood of overflow by checking the number
+        // of necessary bits. Here's a quick conservative hack instead.
+        if ((vx < (1<<15) && vy < (1<16)) ||
+            (vx < (1<<16) && vy < (1<15)) ||
+            (vx < (1<<20) && vy < (1<11)) ||
+            (vx < (1<<11) && vy < (1<20)) ||
+            (vx < (1<<23) && vy < (1<<8)) ||
+            (vx < (1<<8) && vy < (1<<23))) { // ultra-conservative!
+            return INTOP(*,x,y);
+        } else {
+            return bigMul(vm, GETBIG(vm, x), GETBIG(vm, y));
+        }
+    } else {
+        return bigMul(vm, GETBIG(vm, x), GETBIG(vm, y));
+    }
+}
+
+VAL idris_bigShiftLeft(VM* vm, VAL x, VAL y) {
+    if (ISINT(x) && ISINT(y)) {
+        return INTOP(<<, x, y);
+    } else {
+        return bigShiftLeft(vm, GETBIG(vm, x), y);
+    }
+}
+
+VAL idris_bigAShiftRight(VM* vm, VAL x, VAL y) {
+    if (ISINT(x) && ISINT(y)) {
+        return INTOP(>>, x, y);
+    } else {
+        return bigAShiftRight(vm, GETBIG(vm, x), y);
+    }
+}
+
+VAL idris_bigLShiftRight(VM* vm, VAL x, VAL y) {
+    if (ISINT(x) && ISINT(y)) {
+        return INTOP(>>, x, y);
+    } else {
+        return bigLShiftRight(vm, GETBIG(vm, x), y);
+    }
+}
+
+VAL idris_bigDivide(VM* vm, VAL x, VAL y) {
+    if (ISINT(x) && ISINT(y)) {
+        return INTOP(/, x, y);
+    } else {
+        return bigDiv(vm, GETBIG(vm, x), GETBIG(vm, y));
+    }
+}
+
+VAL idris_bigMod(VM* vm, VAL x, VAL y) {
+    if (ISINT(x) && ISINT(y)) {
+        return INTOP(%, x, y);
+    } else {
+        return bigMod(vm, GETBIG(vm, x), GETBIG(vm, y));
+    }
+}
+
+int bigEqConst(VAL x, int c) {
+    if (ISINT(x)) { return (GETINT(x) == c); }
+    else {
+        int rv = mpz_cmp_si(GETMPZ(x), c);
+        return (rv == 0);
+    }
+}
+
+VAL bigEq(VM* vm, VAL x, VAL y) {
+    return MKINT((i_int)(mpz_cmp(GETMPZ(x), GETMPZ(y)) == 0));
+}
+
+VAL bigLt(VM* vm, VAL x, VAL y) {
+    return MKINT((i_int)(mpz_cmp(GETMPZ(x), GETMPZ(y)) < 0));
+}
+
+VAL bigGt(VM* vm, VAL x, VAL y) {
+    return MKINT((i_int)(mpz_cmp(GETMPZ(x), GETMPZ(y)) > 0));
+}
+
+VAL bigLe(VM* vm, VAL x, VAL y) {
+    return MKINT((i_int)(mpz_cmp(GETMPZ(x), GETMPZ(y)) <= 0));
+}
+
+VAL bigGe(VM* vm, VAL x, VAL y) {
+    return MKINT((i_int)(mpz_cmp(GETMPZ(x), GETMPZ(y)) >= 0));
+}
+
+VAL idris_bigEq(VM* vm, VAL x, VAL y) {
+    if (ISINT(x) && ISINT(y)) {
+        return MKINT((i_int)(GETINT(x) == GETINT(y)));
+    } else {
+        return bigEq(vm, GETBIG(vm, x), GETBIG(vm, y));
+    }
+}
+
+VAL idris_bigLt(VM* vm, VAL x, VAL y) {
+    if (ISINT(x) && ISINT(y)) {
+        return MKINT((i_int)(GETINT(x) < GETINT(y)));
+    } else {
+        return bigLt(vm, GETBIG(vm, x), GETBIG(vm, y));
+    }
+}
+
+VAL idris_bigLe(VM* vm, VAL x, VAL y) {
+    if (ISINT(x) && ISINT(y)) {
+        return MKINT((i_int)(GETINT(x) <= GETINT(y)));
+    } else {
+        return bigLe(vm, GETBIG(vm, x), GETBIG(vm, y));
+    }
+}
+
+VAL idris_bigGt(VM* vm, VAL x, VAL y) {
+    if (ISINT(x) && ISINT(y)) {
+        return MKINT((i_int)(GETINT(x) > GETINT(y)));
+    } else {
+        return bigGt(vm, GETBIG(vm, x), GETBIG(vm, y));
+    }
+}
+
+VAL idris_bigGe(VM* vm, VAL x, VAL y) {
+    if (ISINT(x) && ISINT(y)) {
+        return MKINT((i_int)(GETINT(x) >= GETINT(y)));
+    } else {
+        return bigGe(vm, GETBIG(vm, x), GETBIG(vm, y));
+    }
+}
+
+
+VAL idris_castIntBig(VM* vm, VAL i) {
+    return i;
+}
+
+VAL idris_castBigInt(VM* vm, VAL i) {
+    if (ISINT(i)) {
+        return i;
+    } else {
+        return MKINT((i_int)(mpz_get_ui(GETMPZ(i))));
+    }
+}
+
+VAL idris_castBigFloat(VM* vm, VAL i) {
+    if (ISINT(i)) {
+        return MKFLOAT(vm, GETINT(i));
+    } else {
+        return MKFLOAT(vm, mpz_get_d(GETMPZ(i)));
+    }
+}
+
+VAL idris_castFloatBig(VM* vm, VAL f) {
+    double val = GETFLOAT(f);
+    BigInt * cl = allocBig(vm);
+    mpz_init_set_d(*getmpz(cl), val);
+    return (VAL)cl;
+}
+
+VAL idris_castStrBig(VM* vm, VAL i) {
+    return MKBIGC(vm, GETSTR(i));
+}
+
+VAL idris_castBigStr(VM* vm, VAL i) {
+    char* str = mpz_get_str(NULL, 10, *getmpz(getbig(vm, i)));
+    return MKSTR(vm, str);
+}
+
+// Get 64 bits out of a big int with special handling
+// for systems that have 32-bit longs which needs two limbs to
+// fill that.
+uint64_t idris_truncBigB64(const mpz_t bi) {
+    if (sizeof(mp_limb_t) == 8) {
+        return mpz_get_ui(bi);
+    }
+    int64_t out = mpz_get_ui(bi);
+    if (mpz_size(bi) > 1) {
+        out |= ((int64_t)mpz_getlimbn(bi, 1)) << 32;
+    }
+    return out;
+}
diff --git a/dist/rts/idris_gmp.h b/dist/rts/idris_gmp.h
new file mode 100644
index 0000000..cdae2a7
--- /dev/null
+++ b/dist/rts/idris_gmp.h
@@ -0,0 +1,57 @@
+#ifndef _IDRISGMP_H
+#define _IDRISGMP_H
+
+#ifdef IDRIS_GMP
+#include <gmp.h>
+#else
+#include "mini-gmp.h"
+#endif
+
+#include "idris_rts.h"
+
+// Set memory allocation functions
+void init_gmpalloc(void);
+
+VAL MKBIGI(int val);
+VAL MKBIGC(VM* vm, char* bigint);
+VAL MKBIGM(VM* vm, void* bigint);
+VAL MKBIGMc(VM* vm, void* bigint);
+VAL MKBIGUI(VM* vm, unsigned long val);
+VAL MKBIGSI(VM* vm, signed long val);
+
+VAL idris_bigPlus(VM*, VAL x, VAL y);
+VAL idris_bigMinus(VM*, VAL x, VAL y);
+VAL idris_bigTimes(VM*, VAL x, VAL y);
+VAL idris_bigDivide(VM*, VAL x, VAL y);
+VAL idris_bigMod(VM*, VAL x, VAL y);
+
+int bigEqConst(VAL x, int c);
+
+VAL idris_bigEq(VM*, VAL x, VAL y);
+VAL idris_bigLt(VM*, VAL x, VAL y);
+VAL idris_bigLe(VM*, VAL x, VAL y);
+VAL idris_bigGt(VM*, VAL x, VAL y);
+VAL idris_bigGe(VM*, VAL x, VAL y);
+
+VAL idris_castIntBig(VM* vm, VAL i);
+VAL idris_castBigInt(VM* vm, VAL i);
+VAL idris_castFloatBig(VM* vm, VAL i);
+VAL idris_castBigFloat(VM* vm, VAL i);
+VAL idris_castStrBig(VM* vm, VAL i);
+VAL idris_castBigStr(VM* vm, VAL i);
+
+VAL idris_bigAnd(VM* vm, VAL x, VAL y);
+VAL idris_bigOr(VM* vm, VAL x, VAL y);
+VAL idris_bigShiftLeft(VM* vm, VAL x, VAL y);
+VAL idris_bigAShiftRight(VM* vm, VAL x, VAL y);
+VAL idris_bigLShiftRight(VM* vm, VAL x, VAL y);
+
+uint64_t idris_truncBigB64(const mpz_t bi);
+
+static inline mpz_t * getmpz(BigInt * v) {
+    return (mpz_t*)(v->big);
+}
+
+#define GETMPZ(x) *getmpz((BigInt*)x)
+
+#endif
diff --git a/dist/rts/idris_heap.c b/dist/rts/idris_heap.c
new file mode 100644
index 0000000..1ab12a5
--- /dev/null
+++ b/dist/rts/idris_heap.c
@@ -0,0 +1,242 @@
+#include "idris_heap.h"
+#include "idris_rts.h"
+#include "idris_gc.h"
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <assert.h>
+
+static void c_heap_free_item(CHeap * heap, CHeapItem * item)
+{
+    assert(item->size <= heap->size);
+    heap->size -= item->size;
+
+    // fix links
+    if (item->next != NULL)
+    {
+        item->next->prev_next = item->prev_next;
+    }
+    *(item->prev_next) = item->next;
+
+    // free payload
+    item->finalizer(item->data);
+
+    // free item struct
+    free(item);
+}
+
+CHeapItem * c_heap_create_item(void * data, size_t size, CDataFinalizer * finalizer)
+{
+    CHeapItem * item = (CHeapItem *) malloc(sizeof(CHeapItem));
+
+    item->data = data;
+    item->size = size;
+    item->finalizer = finalizer;
+    item->is_used = false;
+    item->next = NULL;
+    item->prev_next = NULL;
+
+    return item;
+}
+
+void c_heap_insert_if_needed(VM * vm, CHeap * heap, CHeapItem * item)
+{
+    if (item->prev_next != NULL) return;  // already inserted
+
+    if (heap->first != NULL)
+    {
+        heap->first->prev_next = &item->next;
+    }
+
+    item->prev_next = &heap->first;
+    item->next = heap->first;
+
+    heap->first = item;
+
+    // at this point, links are done; let's calculate sizes
+
+    heap->size += item->size;
+    if (heap->size >= heap->gc_trigger_size)
+    {
+        item->is_used = true;  // don't collect what we're inserting
+        idris_gc(vm);
+    }
+}
+
+void c_heap_mark_item(CHeapItem * item)
+{
+    item->is_used = true;
+}
+
+void c_heap_sweep(CHeap * heap)
+{
+    CHeapItem * p = heap->first;
+    while (p != NULL)
+    {
+        if (p->is_used)
+        {
+            p->is_used = false;
+            p = p->next;
+        }
+        else
+        {
+            CHeapItem * unused_item = p;
+            p = p->next;
+
+            c_heap_free_item(heap, unused_item);
+        }
+    }
+
+    heap->gc_trigger_size = C_HEAP_GC_TRIGGER_SIZE(heap->size);
+}
+
+void c_heap_init(CHeap * heap)
+{
+    heap->first = NULL;
+    heap->size = 0;
+    heap->gc_trigger_size = C_HEAP_GC_TRIGGER_SIZE(heap->size);
+}
+
+void c_heap_destroy(CHeap * heap)
+{
+    while (heap->first != NULL)
+    {
+        c_heap_free_item(heap, heap->first);  // will update heap->first via the backward link
+    }
+}
+
+/* Used for initializing the FP heap. */
+void alloc_heap(Heap * h, size_t heap_size, size_t growth, char * old)
+{
+    char * mem = malloc(heap_size);
+
+    if (mem == NULL) {
+        fprintf(stderr,
+                "RTS ERROR: Unable to allocate heap. Requested %zd bytes.\n",
+                heap_size);
+        exit(EXIT_FAILURE);
+    }
+    memset(mem, 0, heap_size);
+
+    h->heap = mem;
+    h->next = aligned_heap_pointer(h->heap);
+    h->end  = h->heap + heap_size;
+
+    h->size   = heap_size;
+    h->growth = growth;
+
+    h->old = old;
+}
+
+void free_heap(Heap * h) {
+    free(h->heap);
+
+    if (h->old != NULL) {
+        free(h->old);
+    }
+}
+
+
+// TODO: more testing
+/******************** Heap testing ********************************************/
+void heap_check_underflow(Heap * heap) {
+    if (!(heap->heap <= heap->next)) {
+       fprintf(stderr, "RTS ERROR: HEAP UNDERFLOW <bot %p> <cur %p>\n",
+               heap->heap, heap->next);
+        exit(EXIT_FAILURE);
+    }
+}
+
+void heap_check_overflow(Heap * heap) {
+    if (!(heap->next <= heap->end)) {
+       fprintf(stderr, "RTS ERROR: HEAP OVERFLOW <cur %p> <end %p>\n",
+               heap->next, heap->end);
+        exit(EXIT_FAILURE);
+    }
+}
+
+int is_valid_ref(VAL v) {
+    return (v != NULL) && !(ISINT(v));
+}
+
+int ref_in_heap(Heap * heap, VAL v) {
+    return ((VAL)heap->heap <= v) && (v < (VAL)heap->next);
+}
+
+char* aligned_heap_pointer(char * heap) {
+#ifdef FORCE_ALIGNMENT
+    if (((i_int)heap&1) == 1) {
+	    return (heap + 1);
+    } else
+#endif
+    {
+	    return heap;
+    }
+}
+
+// Checks three important properties:
+// 1. Closure.
+//      Check if all pointers in the _heap_ points only to heap.
+// 2. Unidirectionality. (if compact gc)
+//      All references in the heap should be are unidirectional. In other words,
+//      more recently allocated closure can point only to earlier allocated one.
+// 3. After gc there should be no forward references.
+//
+void heap_check_pointers(Heap * heap) {
+    char* scan = NULL;
+
+    size_t item_size = 0;
+    for(scan = heap->heap; scan < heap->next; scan += item_size) {
+       VAL heap_item = (VAL)scan;
+       item_size = aligned(valSize(heap_item));
+
+       switch(GETTY(heap_item)) {
+       case CT_CON:
+           {
+	     Con * c = (Con*)heap_item;
+             size_t ar = CARITY(c);
+             size_t i;
+             for(i = 0; i < ar; ++i) {
+                 VAL ptr = c->args[i];
+
+                 if (is_valid_ref(ptr)) {
+                     // Check for closure.
+                     if (!ref_in_heap(heap, ptr)) {
+                         fprintf(stderr,
+                                 "RTS ERROR: heap closure broken. "\
+                                 "<HEAP %p %p %p> <REF %p>\n",
+                                 heap->heap, heap->next, heap->end, ptr);
+                         exit(EXIT_FAILURE);
+                     }
+#if 0 // TODO macro
+                     // Check for unidirectionality.
+                     if (!(ptr < heap_item)) {
+                         fprintf(stderr,
+                                 "RTS ERROR: heap unidirectionality broken:" \
+                                 "<CT_CON %p> <FIELD %p>\n",
+                                 heap_item, ptr);
+                         exit(EXIT_FAILURE);
+                     }
+#endif
+                 }
+             }
+             break;
+           }
+       case CT_FWD:
+           // Check for artifacts after cheney gc.
+           fprintf(stderr, "RTS ERROR: CT_FWD in working heap.\n");
+           exit(EXIT_FAILURE);
+           break;
+       default:
+           break;
+       }
+    }
+}
+
+void heap_check_all(Heap * heap)
+{
+    heap_check_underflow(heap);
+    heap_check_overflow(heap);
+    heap_check_pointers(heap);
+}
diff --git a/dist/rts/idris_heap.h b/dist/rts/idris_heap.h
new file mode 100644
index 0000000..f2596a8
--- /dev/null
+++ b/dist/rts/idris_heap.h
@@ -0,0 +1,111 @@
+#ifndef _IDRIS_HEAP_H
+#define _IDRIS_HEAP_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+/* *** C heap ***
+ * Objects with finalizers. Mark&sweep-collected.
+ *
+ * The C heap is implemented as a doubly linked list
+ * of pointers coupled with their finalizers.
+ */
+
+struct VM;
+
+#define C_HEAP_GC_TRIGGER_SIZE(heap_size) \
+    (heap_size < 2048    \
+        ? 4096           \
+        : 2 * heap_size  \
+    )
+
+typedef void CDataFinalizer(void *);
+
+typedef struct CHeapItem {
+    /// Payload.
+    void * data;
+
+    /// Size of the item, in bytes.
+    /// This does not have to be a precise size. It is used to assess
+    /// whether the heap needs garbage collection.
+    size_t size;
+
+    /// Finalizer that will be called on the payload pointer.
+    /// Its job is to deallocate all associated resources,
+    /// including the memory pointed to by `data` (if any).
+    CDataFinalizer * finalizer;
+
+    /// The mark bit set by the FP heap traversal,
+    /// cleared by C heap sweep.
+    bool is_used;
+
+    /// Next item in the C heap.
+    struct CHeapItem * next;
+
+    /// Pointer to the previous next-pointer.
+    struct CHeapItem ** prev_next;
+} CHeapItem;
+
+typedef struct CHeap {
+    /// The first item in the heap. NULL if the heap is empty.
+    CHeapItem * first;
+
+    /// Total size of the heap. (Sum of sizes of items.)
+    /// This may not be a precise size since individual items'
+    /// sizes may be just estimates.
+    size_t size;
+
+    /// When heap reaches this size, GC will be triggered.
+    size_t gc_trigger_size;
+} CHeap;
+
+/// Create a C heap.
+void c_heap_init(CHeap * c_heap);
+
+/// Destroy the given C heap. Will not deallocate the given pointer.
+/// Will call finalizers & deallocate all blocks in the heap.
+void c_heap_destroy(CHeap * c_heap);
+
+/// Insert the given item into the heap if it's not there yet.
+/// The VM pointer is needed because this operation may trigger GC.
+void c_heap_insert_if_needed(struct VM * vm, CHeap * c_heap, CHeapItem * item);
+
+/// Mark the given item as used.
+void c_heap_mark_item(CHeapItem * item);
+
+/// Sweep the C heap, finalizing and freeing unused blocks.
+void c_heap_sweep(CHeap * c_heap);
+
+/// Create a C heap item from its payload, size estimate, and finalizer.
+/// The size does not have to be precise but it should roughly reflect
+/// how big the item is for GC to work effectively.
+CHeapItem * c_heap_create_item(void * data, size_t size, CDataFinalizer * finalizer);
+
+/* *** Idris heap **
+ * Objects without finalizers. Cheney-collected.
+ */
+
+typedef struct {
+    char*  next;   // Next allocated chunk. Should always (heap <= next < end).
+    char*  heap;   // Point to bottom of heap
+    char*  end;    // Point to top of heap
+    size_t size;   // Size of _next_ heap. Size of current heap is /end - heap/.
+    size_t growth; // Quantity of heap growth in bytes.
+
+    char* old;
+} Heap;
+
+
+void alloc_heap(Heap * heap, size_t heap_size, size_t growth, char * old);
+void free_heap(Heap * heap);
+char* aligned_heap_pointer(char * heap);
+
+#ifdef IDRIS_DEBUG
+void heap_check_all(Heap * heap);
+// Should be used _between_ gc's.
+#define HEAP_CHECK(vm) heap_check_all(&(vm->heap));
+#else
+#define HEAP_CHECK(vm)
+#endif // IDRIS_DEBUG
+
+#endif // _IDRIS_HEAP_H
diff --git a/dist/rts/idris_main.c b/dist/rts/idris_main.c
new file mode 100644
index 0000000..f05662a
--- /dev/null
+++ b/dist/rts/idris_main.c
@@ -0,0 +1,78 @@
+#include "idris_gmp.h"
+#include "idris_opts.h"
+#include "idris_rts.h"
+#include "idris_stats.h"
+
+void* _idris__123_runMain_95_0_125_(VM* vm, VAL* oldbase);
+
+#ifdef _WIN32
+
+#include <Windows.h>
+
+int win32_get_argv_utf8(int *argc_ptr, char ***argv_ptr)
+{
+    int argc;
+    char **argv;
+    wchar_t **argv_utf16 = CommandLineToArgvW(GetCommandLineW(), &argc);
+    int i;
+    int offset = (argc + 1) * sizeof(char *);
+    int size = offset;
+    for (i = 0; i < argc; i++) {
+        size += WideCharToMultiByte(CP_UTF8, 0, argv_utf16[i], -1, 0, 0, 0, 0);
+    }
+    argv = (char **)malloc(size);
+    for (i = 0; i < argc; i++) {
+        argv[i] = (char *)argv + offset;
+        offset += WideCharToMultiByte(CP_UTF8, 0, argv_utf16[i], -1, argv[i], size - offset, 0, 0);
+    }
+    *argc_ptr = argc;
+    *argv_ptr = argv;
+    return 0;
+}
+
+#endif
+
+// The default options should give satisfactory results under many circumstances.
+RTSOpts opts = {
+    .init_heap_size = 16384000,
+    .max_stack_size = 4096000,
+    .show_summary   = 0
+};
+
+#ifdef _WIN32
+int main() {
+    int argc;
+    char **argv;
+    win32_get_argv_utf8(&argc, &argv);
+#else
+int main(int argc, char **argv) {
+#endif
+    parse_shift_args(&opts, &argc, &argv);
+
+    __idris_argc = argc;
+    __idris_argv = argv;
+
+    VM* vm = init_vm(opts.max_stack_size, opts.init_heap_size, 1);
+    init_threadkeys();
+    init_threaddata(vm);
+    init_gmpalloc();
+
+    init_nullaries();
+    init_signals();
+
+    _idris__123_runMain_95_0_125_(vm, NULL);
+
+#ifdef IDRIS_DEBUG
+    if (opts.show_summary) {
+        idris_gcInfo(vm, 1);
+    }
+#endif
+
+    Stats stats = terminate(vm);
+
+    if (opts.show_summary) {
+        print_stats(&stats);
+    }
+
+    return EXIT_SUCCESS;
+}
diff --git a/dist/rts/idris_net.c b/dist/rts/idris_net.c
new file mode 100644
index 0000000..9256cc6
--- /dev/null
+++ b/dist/rts/idris_net.c
@@ -0,0 +1,351 @@
+// C-Side of the Idris network library
+// (C) Simon Fowler, 2014
+// MIT Licensed. Have fun!
+#include "idris_net.h"
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#ifndef _WIN32
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#else
+static int socket_inited = 0;
+static WSADATA wsa_data;
+
+static void clean_sockets(void) {
+    WSACleanup();
+}
+
+static int check_init(void) {
+    if (!socket_inited) {
+        int result = WSAStartup(MAKEWORD(2, 2), &wsa_data);
+        if (result == NO_ERROR) {
+            socket_inited = 1;
+            atexit(clean_sockets);
+        }
+    }
+    return socket_inited;
+}
+#endif
+
+
+void buf_htonl(void* buf, int len) {
+    int* buf_i = (int*) buf;
+    int i;
+    for (i = 0; i < (len / sizeof(int)) + 1; i++) {
+        buf_i[i] = htonl(buf_i[i]);
+    }
+}
+
+void buf_ntohl(void* buf, int len) {
+    int* buf_i = (int*) buf;
+    int i;
+    for (i = 0; i < (len / sizeof(int)) + 1; i++) {
+        buf_i[i] = ntohl(buf_i[i]);
+    }
+}
+
+void* idrnet_malloc(int size) {
+    return malloc(size);
+}
+
+void idrnet_free(void* ptr) {
+    free(ptr);
+}
+
+
+int idrnet_socket(int domain, int type, int protocol) {
+#ifdef _WIN32
+    if (!check_init()) {
+        return -1;
+    }
+#endif
+    return socket(domain, type, protocol);
+}
+
+// We call this from quite a few functions. Given a textual host and an int port,
+// populates a struct addrinfo.
+int idrnet_getaddrinfo(struct addrinfo** address_res, char* host, int port,
+                       int family, int socket_type) {
+
+    struct addrinfo hints;
+    // Convert port into string
+    char str_port[8];
+    sprintf(str_port, "%d", port);
+
+    // Set up hints structure
+    memset(&hints, 0, sizeof(hints)); // zero out hints
+    hints.ai_family = family;
+    hints.ai_socktype = socket_type;
+
+    // If the length of the hostname is 0 (i.e, it was set to Nothing in Idris)
+    // then we want to instruct the C library to fill in the IP automatically
+    if (strlen(host) == 0) {
+        hints.ai_flags = AI_PASSIVE; // fill in IP automatically
+        return getaddrinfo(NULL, str_port, &hints, address_res);
+    }
+    return getaddrinfo(host, str_port, &hints, address_res);
+
+}
+
+int idrnet_bind(int sockfd, int family, int socket_type, char* host, int port) {
+    struct addrinfo* address_res;
+    int addr_res = idrnet_getaddrinfo(&address_res, host, port, family, socket_type);
+    if (addr_res != 0) {
+        //printf("Lib err: bind getaddrinfo\n");
+        return -1;
+    }
+
+    int bind_res = bind(sockfd, address_res->ai_addr, address_res->ai_addrlen);
+    if (bind_res == -1) {
+        //freeaddrinfo(address_res);
+        //printf("Lib err: bind\n");
+        return -1;
+    }
+    return 0;
+}
+
+int idrnet_connect(int sockfd, int family, int socket_type, char* host, int port) {
+    struct addrinfo* remote_host;
+    int addr_res = idrnet_getaddrinfo(&remote_host, host, port, family, socket_type);
+    if (addr_res != 0) {
+        return -1;
+    }
+
+    int connect_res = connect(sockfd, remote_host->ai_addr, remote_host->ai_addrlen);
+    if (connect_res == -1) {
+        freeaddrinfo(remote_host);
+        return -1;
+    }
+    freeaddrinfo(remote_host);
+    return 0;
+}
+
+
+int idrnet_sockaddr_family(void* sockaddr) {
+    struct sockaddr* addr = (struct sockaddr*) sockaddr;
+    return (int) addr->sa_family;
+}
+
+char* idrnet_sockaddr_ipv4(void* sockaddr) {
+    struct sockaddr_in* addr = (struct sockaddr_in*) sockaddr;
+    char* ip_addr = (char*) malloc(sizeof(char) * INET_ADDRSTRLEN);
+    inet_ntop(AF_INET, &(addr->sin_addr), ip_addr, INET_ADDRSTRLEN);
+    //printf("Lib: ip_addr: %s\n", ip_addr);
+    return ip_addr;
+}
+
+int idrnet_sockaddr_ipv4_port(void* sockaddr) {
+    struct sockaddr_in* addr = (struct sockaddr_in*) sockaddr;
+    return ((int) ntohs(addr->sin_port));
+}
+
+void* idrnet_create_sockaddr() {
+    return malloc(sizeof(struct sockaddr_storage));
+}
+
+
+int idrnet_accept(int sockfd, void* sockaddr) {
+    struct sockaddr* addr = (struct sockaddr*) sockaddr;
+    socklen_t addr_size = sizeof(struct sockaddr_storage);
+    return accept(sockfd, addr, &addr_size);
+}
+
+int idrnet_send(int sockfd, char* data) {
+    int len = strlen(data); // For now.
+    return send(sockfd, (void*) data, len, 0);
+}
+
+int idrnet_send_buf(int sockfd, void* data, int len) {
+    void* buf_cpy = malloc(len);
+    memset(buf_cpy, 0, len);
+    memcpy(buf_cpy, data, len);
+    buf_htonl(buf_cpy, len);
+    int res = send(sockfd, buf_cpy, len, 0);
+    free(buf_cpy);
+    return res;
+}
+
+void* idrnet_recv(int sockfd, int len) {
+    idrnet_recv_result* res_struct =
+        (idrnet_recv_result*) malloc(sizeof(idrnet_recv_result));
+    char* buf = malloc(len + 1);
+    memset(buf, 0, len + 1);
+    int recv_res = recv(sockfd, buf, len, 0);
+    res_struct->result = recv_res;
+
+    if (recv_res > 0) { // Data was received
+        buf[recv_res + 1] = 0x00; // Null-term, so Idris can interpret it
+    }
+    res_struct->payload = buf;
+    return (void*) res_struct;
+}
+
+int idrnet_recv_buf(int sockfd, void* buf, int len) {
+    int recv_res = recv(sockfd, buf, len, 0);
+    if (recv_res != -1) {
+        buf_ntohl(buf, len);
+    }
+    return recv_res;
+}
+
+int idrnet_get_recv_res(void* res_struct) {
+    return (((idrnet_recv_result*) res_struct)->result);
+}
+
+char* idrnet_get_recv_payload(void* res_struct) {
+    return (((idrnet_recv_result*) res_struct)->payload);
+}
+
+void idrnet_free_recv_struct(void* res_struct) {
+    idrnet_recv_result* i_res_struct =
+        (idrnet_recv_result*) res_struct;
+    if (i_res_struct->payload != NULL) {
+        free(i_res_struct->payload);
+    }
+    free(res_struct);
+}
+
+int idrnet_errno() {
+    return errno;
+}
+
+
+int idrnet_sendto(int sockfd, char* data, char* host, int port, int family) {
+
+    struct addrinfo* remote_host;
+    int addr_res = idrnet_getaddrinfo(&remote_host, host, port, family, SOCK_DGRAM);
+    if (addr_res != 0) {
+        return -1;
+    }
+
+    int send_res = sendto(sockfd, data, strlen(data), 0,
+                        remote_host->ai_addr, remote_host->ai_addrlen);
+    freeaddrinfo(remote_host);
+    return send_res;
+}
+
+int idrnet_sendto_buf(int sockfd, void* buf, int buf_len, char* host, int port, int family) {
+
+    struct addrinfo* remote_host;
+    int addr_res = idrnet_getaddrinfo(&remote_host, host, port, family, SOCK_DGRAM);
+    if (addr_res != 0) {
+        //printf("lib err: sendto getaddrinfo \n");
+        return -1;
+    }
+
+    buf_htonl(buf, buf_len);
+
+    int send_res = sendto(sockfd, buf, buf_len, 0,
+                        remote_host->ai_addr, remote_host->ai_addrlen);
+    if (send_res == -1) {
+        perror("lib err: sendto \n");
+    }
+    //freeaddrinfo(remote_host);
+    return send_res;
+}
+
+
+
+void* idrnet_recvfrom(int sockfd, int len) {
+/*
+ * int recvfrom(int sockfd, void *buf, int len, unsigned int flags,
+             struct sockaddr *from, int *fromlen);
+*/
+    // Allocate the required structures, and nuke them
+    struct sockaddr_storage* remote_addr =
+        (struct sockaddr_storage*) malloc(sizeof(struct sockaddr_storage));
+    char* buf = (char*) malloc(len + 1);
+    idrnet_recvfrom_result* ret =
+        (idrnet_recvfrom_result*) malloc(sizeof(idrnet_recvfrom_result));
+    memset(remote_addr, 0, sizeof(struct sockaddr_storage));
+    memset(buf, 0, len + 1);
+    memset(ret, 0, sizeof(idrnet_recvfrom_result));
+    socklen_t fromlen = sizeof(struct sockaddr_storage);
+
+    int recv_res = recvfrom(sockfd, buf, len, 0, (struct sockaddr*) remote_addr, &fromlen);
+    ret->result = recv_res;
+    // Check for failure...
+    if (recv_res == -1) {
+        free(buf);
+        free(remote_addr);
+    } else {
+        // If data was received, process and populate
+        ret->result = recv_res;
+        ret->remote_addr = remote_addr;
+        // Ensure the last byte is NULL, since in this mode we're sending strings
+        buf[len] = 0x00;
+        ret->payload = (void*) buf;
+    }
+
+    return ret;
+}
+
+void* idrnet_recvfrom_buf(int sockfd, void* buf, int len) {
+    // Allocate the required structures, and nuke them
+    struct sockaddr_storage* remote_addr =
+        (struct sockaddr_storage*) malloc(sizeof(struct sockaddr_storage));
+    idrnet_recvfrom_result* ret =
+        (idrnet_recvfrom_result*) malloc(sizeof(idrnet_recvfrom_result));
+    memset(remote_addr, 0, sizeof(struct sockaddr_storage));
+    memset(ret, 0, sizeof(idrnet_recvfrom_result));
+    socklen_t fromlen = 0;
+
+    int recv_res = recvfrom(sockfd, buf, len, 0, (struct sockaddr*) remote_addr, &fromlen);
+    // Check for failure... But don't free the buffer! Not our job.
+    ret->result = recv_res;
+    if (recv_res == -1) {
+        free(remote_addr);
+    }
+    // Payload will be NULL -- since it's been put into the user-specified buffer. We
+    // still need the return struct to get our hands on the remote address, though.
+    if (recv_res > 0) {
+        buf_ntohl(buf, len);
+        ret->payload = NULL;
+        ret->remote_addr = remote_addr;
+    }
+    return ret;
+}
+
+int idrnet_get_recvfrom_res(void* res_struct) {
+    return (((idrnet_recvfrom_result*) res_struct)->result);
+}
+
+char* idrnet_get_recvfrom_payload(void* res_struct) {
+    return (((idrnet_recvfrom_result*) res_struct)->payload);
+}
+
+void* idrnet_get_recvfrom_sockaddr(void* res_struct) {
+    idrnet_recvfrom_result* recv_struct = (idrnet_recvfrom_result*) res_struct;
+    return recv_struct->remote_addr;
+}
+
+int idrnet_get_recvfrom_port(void* res_struct) {
+    idrnet_recvfrom_result* recv_struct = (idrnet_recvfrom_result*) res_struct;
+    if (recv_struct->remote_addr != NULL) {
+        struct sockaddr_in* remote_addr_in =
+            (struct sockaddr_in*) recv_struct->remote_addr;
+        return ((int) ntohs(remote_addr_in->sin_port));
+    }
+    return -1;
+}
+
+void idrnet_free_recvfrom_struct(void* res_struct) {
+    idrnet_recvfrom_result* recv_struct = (idrnet_recvfrom_result*) res_struct;
+    if (recv_struct != NULL) {
+        if (recv_struct->payload != NULL) {
+            free(recv_struct->payload);
+        }
+        if (recv_struct->remote_addr != NULL) {
+            free(recv_struct->remote_addr);
+        }
+    }
+}
+
+int idrnet_geteagain() {
+    return EAGAIN;
+}
+
diff --git a/dist/rts/idris_net.h b/dist/rts/idris_net.h
new file mode 100644
index 0000000..9667660
--- /dev/null
+++ b/dist/rts/idris_net.h
@@ -0,0 +1,91 @@
+#ifndef IDRISNET_H
+#define IDRISNET_H
+
+// Includes used by the idris-file.
+#ifdef _WIN32
+#include <winsock2.h>
+#include <Ws2tcpip.h>
+#else
+#include <netdb.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#endif
+
+struct sockaddr_storage;
+struct addrinfo;
+
+typedef struct idrnet_recv_result {
+    int result;
+    void* payload;
+} idrnet_recv_result;
+
+// Same type of thing as idrnet_recv_result, but for UDP, so stores some 
+// address information
+typedef struct idrnet_recvfrom_result {
+    int result;
+    void* payload;
+    struct sockaddr_storage* remote_addr;
+} idrnet_recvfrom_result;
+
+// Memory management functions
+void* idrnet_malloc(int size);
+void idrnet_free(void* ptr);
+
+// Gets value of errno
+int idrnet_errno(); 
+
+int idrnet_socket(int domain, int type, int protocol);
+
+// Bind
+int idrnet_bind(int sockfd, int family, int socket_type, char* host, int port);
+
+// Connect
+int idrnet_connect(int sockfd, int family, int socket_type, char* host, int port);
+
+// Accessor functions for struct_sockaddr
+int idrnet_sockaddr_family(void* sockaddr);
+char* idrnet_sockaddr_ipv4(void* sockaddr);
+int idrnet_sockaddr_ipv4_port(void* sockaddr);
+void* idrnet_create_sockaddr();
+
+// Accept
+int idrnet_accept(int sockfd, void* sockaddr);
+
+// Send
+int idrnet_send(int sockfd, char* data);
+int idrnet_send_buf(int sockfd, void* data, int len);
+
+// Receive
+// Creates a result structure containing result and payload
+void* idrnet_recv(int sockfd, int len);
+// Receives directly into a buffer
+int idrnet_recv_buf(int sockfd, void* buf, int len);
+
+// UDP Send
+int idrnet_sendto(int sockfd, char* data, char* host, int port, int family);
+int idrnet_sendto_buf(int sockfd, void* buf, int buf_len, char* host, int port, int family);
+
+
+// UDP Receive
+void* idrnet_recvfrom(int sockfd, int len);
+void* idrnet_recvfrom_buf(int sockfd, void* buf, int len);
+
+// Receive structure accessors
+int idrnet_get_recv_res(void* res_struct);
+char* idrnet_get_recv_payload(void* res_struct);
+void idrnet_free_recv_struct(void* res_struct);
+
+// Recvfrom structure accessors
+int idrnet_get_recvfrom_res(void* res_struct);
+char* idrnet_get_recvfrom_payload(void* res_struct);
+void* idrnet_get_recvfrom_sockaddr(void* res_struct);
+void idrnet_free_recvfrom_struct(void* res_struct);
+
+
+int idrnet_getaddrinfo(struct addrinfo** address_res, char* host, 
+    int port, int family, int socket_type);
+
+int idrnet_geteagain();
+
+#endif
diff --git a/dist/rts/idris_opts.c b/dist/rts/idris_opts.c
new file mode 100644
index 0000000..c6cc53c
--- /dev/null
+++ b/dist/rts/idris_opts.c
@@ -0,0 +1,109 @@
+#include "idris_opts.h"
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
+
+#define USAGE "\n"                                              \
+    "Usage: <prog> [+RTS <rtsopts> -RTS] <args>\n\n"            \
+    "Options:\n\n"                                              \
+    "  -?    Print this message and exits.\n"                   \
+    "  -s    Summary GC statistics.\n"                          \
+    "  -H    Initial heap size. Egs: -H4M, -H500K, -H1G\n"      \
+    "  -K    Sets the maximum stack size. Egs: -K8M\n"          \
+    "\n"
+
+void print_usage(FILE * s) {
+    fprintf(s, USAGE);
+}
+
+int read_size(char * str) {
+    int size = 0;
+    char mult = ' ';
+
+    int r = sscanf(str, "%u%c", &size, &mult);
+
+    if (r == 1)
+        return size;
+
+    if (r == 2) {
+        switch (mult) {
+        case 'K': size = size << 10; break;
+        case 'M': size = size << 20; break;
+        case 'G': size = size << 30; break;
+        default:
+            fprintf(stderr,
+                    "RTS Opts: Unable to recognize size suffix `%c'.\n" \
+                    "          Possible suffixes are K or M or G.\n",
+                    mult);
+            print_usage(stderr);
+            exit(EXIT_FAILURE);
+        }
+        return size;
+    }
+
+    fprintf(stderr, "RTS Opts: Unable to parse size. Egs: 1K, 10M, 2G.\n");
+    print_usage(stderr);
+    exit(EXIT_FAILURE);
+}
+
+
+int parse_args(RTSOpts * opts, int argc, char *argv[])
+{
+    if (argc == 0)
+        return 0;
+
+    if (strcmp(argv[0], "+RTS") != 0)
+        return 0;
+
+    int i;
+    for (i = 1; i < argc; i++) {
+        if (strcmp(argv[i], "-RTS") == 0) {
+            return i + 1;
+        }
+
+        if (argv[i][0] != '-') {
+            fprintf(stderr, "RTS options should start with '-'.\n");
+            print_usage(stderr);
+            exit(EXIT_FAILURE);
+        }
+
+        switch (argv[i][1])
+        {
+        case '?':
+            print_usage(stdout);
+            exit(EXIT_SUCCESS);
+            break;
+
+        case 's':
+            opts->show_summary = 1;
+            break;
+
+        case 'H':
+            opts->init_heap_size = read_size(argv[i] + 2);
+            break;
+
+        case 'K':
+            opts->max_stack_size = read_size(argv[i] + 2);
+            break;
+
+        default:
+            printf("RTS opts: Wrong argument: %s\n", argv[i]);
+            print_usage(stderr);
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    return argc;
+}
+
+void parse_shift_args(RTSOpts * opts, int * argc, char *** argv) {
+    size_t shift = parse_args(opts, (*argc) - 1, (*argv) + 1);
+
+    char *prg = (*argv)[0];
+    *argc = *argc - shift;
+    *argv = *argv + shift;
+    (*argv)[0] = prg;
+}
diff --git a/dist/rts/idris_opts.h b/dist/rts/idris_opts.h
new file mode 100644
index 0000000..8fbbaeb
--- /dev/null
+++ b/dist/rts/idris_opts.h
@@ -0,0 +1,19 @@
+#ifndef _IDRIS_OPTS_H
+#define _IDRIS_OPTS_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+typedef struct {
+    size_t init_heap_size;
+    size_t max_stack_size;
+    int    show_summary;
+} RTSOpts;
+
+void print_usage(FILE * s);
+
+// Parse rts options and shift arguments such that rts options becomes invisible
+// for main program.
+void parse_shift_args(RTSOpts * opts, int * argc, char *** argv);
+
+#endif
diff --git a/dist/rts/idris_rts.c b/dist/rts/idris_rts.c
new file mode 100644
index 0000000..a4222c4
--- /dev/null
+++ b/dist/rts/idris_rts.c
@@ -0,0 +1,1174 @@
+#include <assert.h>
+#include <errno.h>
+
+#include "idris_rts.h"
+#include "idris_gc.h"
+#include "idris_utf8.h"
+#include "idris_bitstring.h"
+#include "getline.h"
+
+#define STATIC_ASSERT(COND,MSG) typedef char static_assertion_##MSG[(COND)?1:-1]
+
+STATIC_ASSERT(sizeof(Hdr) == 8, condSizeOfHdr);
+
+#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__DragonFly__)
+#include <signal.h>
+#endif
+
+
+#ifdef HAS_PTHREAD
+static pthread_key_t vm_key;
+#else
+static VM* global_vm;
+#endif
+
+void free_key(void *vvm) {
+    // nothing to free, we just used the VM pointer which is freed elsewhere
+}
+
+VM* init_vm(int stack_size, size_t heap_size,
+            int max_threads // not implemented yet
+            ) {
+
+    VM* vm = malloc(sizeof(VM));
+    STATS_INIT_STATS(vm->stats)
+    STATS_ENTER_INIT(vm->stats)
+
+    VAL* valstack = malloc(stack_size * sizeof(VAL));
+
+    vm->active = 1;
+    vm->valstack = valstack;
+    vm->valstack_top = valstack;
+    vm->valstack_base = valstack;
+    vm->stack_max = valstack + stack_size;
+
+    alloc_heap(&(vm->heap), heap_size, heap_size, NULL);
+
+    c_heap_init(&vm->c_heap);
+
+    vm->ret = NULL;
+    vm->reg1 = NULL;
+#ifdef HAS_PTHREAD
+    vm->inbox = malloc(1024*sizeof(vm->inbox[0]));
+    assert(vm->inbox);
+    memset(vm->inbox, 0, 1024*sizeof(vm->inbox[0]));
+    vm->inbox_end = vm->inbox + 1024;
+    vm->inbox_write = vm->inbox;
+    vm->inbox_nextid = 1;
+
+    // The allocation lock must be reentrant. The lock exists to ensure that
+    // no memory is allocated during the message sending process, but we also
+    // check the lock in calls to allocate.
+    // The problem comes when we use requireAlloc to guarantee a chunk of memory
+    // first: this sets the lock, and since it is not reentrant, we get a deadlock.
+    pthread_mutexattr_t rec_attr;
+    pthread_mutexattr_init(&rec_attr);
+    pthread_mutexattr_settype(&rec_attr, PTHREAD_MUTEX_RECURSIVE);
+
+    pthread_mutex_init(&(vm->inbox_lock), NULL);
+    pthread_mutex_init(&(vm->inbox_block), NULL);
+    pthread_mutex_init(&(vm->alloc_lock), &rec_attr);
+    pthread_cond_init(&(vm->inbox_waiting), NULL);
+
+    vm->max_threads = max_threads;
+    vm->processes = 0;
+    vm->creator = NULL;
+
+#else
+    global_vm = vm;
+#endif
+    STATS_LEAVE_INIT(vm->stats)
+    return vm;
+}
+
+VM* idris_vm(void) {
+    VM* vm = init_vm(4096000, 4096000, 1);
+    init_threadkeys();
+    init_threaddata(vm);
+    init_gmpalloc();
+    init_nullaries();
+    init_signals();
+
+    return vm;
+}
+
+VM* get_vm(void) {
+#ifdef HAS_PTHREAD
+    return pthread_getspecific(vm_key);
+#else
+    return global_vm;
+#endif
+}
+
+void close_vm(VM* vm) {
+    terminate(vm);
+}
+
+#ifdef HAS_PTHREAD
+void create_key(void) {
+    pthread_key_create(&vm_key, free_key);
+}
+#endif
+
+void init_threadkeys(void) {
+#ifdef HAS_PTHREAD
+    static pthread_once_t key_once = PTHREAD_ONCE_INIT;
+    pthread_once(&key_once, create_key);
+#endif
+}
+
+void init_threaddata(VM *vm) {
+#ifdef HAS_PTHREAD
+    pthread_setspecific(vm_key, vm);
+#endif
+}
+
+void init_signals(void) {
+#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__DragonFly__)
+    signal(SIGPIPE, SIG_IGN);
+#endif
+}
+
+Stats terminate(VM* vm) {
+    Stats stats = vm->stats;
+    STATS_ENTER_EXIT(stats)
+    free(vm->valstack);
+    free_heap(&(vm->heap));
+    c_heap_destroy(&(vm->c_heap));
+#ifdef HAS_PTHREAD
+    pthread_mutex_destroy(&(vm->inbox_lock));
+    pthread_mutex_destroy(&(vm->inbox_block));
+    pthread_mutex_destroy(&(vm->alloc_lock));
+    pthread_cond_destroy(&(vm->inbox_waiting));
+    free(vm->inbox);
+    if (vm->creator != NULL) {
+        vm->creator->processes--;
+    }
+#endif
+    // free(vm);
+    // Set the VM as inactive, so that if any message gets sent to it
+    // it will not get there, rather than crash the entire system.
+    // (TODO: We really need to be cleverer than this if we're going to
+    // write programs than create lots of threads...)
+    vm->active = 0;
+
+    STATS_LEAVE_EXIT(stats)
+    return stats;
+}
+
+CData cdata_allocate(size_t size, CDataFinalizer finalizer)
+{
+    void * data = (void *) malloc(size);
+    return cdata_manage(data, size, finalizer);
+}
+
+CData cdata_manage(void * data, size_t size, CDataFinalizer finalizer)
+{
+    return c_heap_create_item(data, size, finalizer);
+}
+
+void idris_requireAlloc(VM * vm, size_t size) {
+    if (!(vm->heap.next + size < vm->heap.end)) {
+        idris_gc(vm);
+    }
+#ifdef HAS_PTHREAD
+    int lock = vm->processes > 0;
+    if (lock) { // We only need to lock if we're in concurrent mode
+       pthread_mutex_lock(&vm->alloc_lock);
+    }
+#endif
+}
+
+void idris_doneAlloc(VM * vm) {
+#ifdef HAS_PTHREAD
+    int lock = vm->processes > 0;
+    if (lock) { // We only need to lock if we're in concurrent mode
+       pthread_mutex_unlock(&vm->alloc_lock);
+    }
+#endif
+}
+
+int space(VM* vm, size_t size) {
+    return (vm->heap.next + size) < vm->heap.end;
+}
+
+void* idris_alloc(size_t size) {
+    RawData * cl = (RawData*) allocate(sizeof(*cl)+size, 0);
+    SETTY(cl, CT_RAWDATA);
+    return (void*)cl->raw;
+}
+
+void* idris_realloc(void* old, size_t old_size, size_t size) {
+    void* ptr = idris_alloc(size);
+    memcpy(ptr, old, old_size);
+    return ptr;
+}
+
+void idris_free(void* ptr, size_t size) {
+}
+
+void * allocate(size_t sz, int lock) {
+    return iallocate(get_vm(), sz, lock);
+}
+
+void* iallocate(VM * vm, size_t isize, int outerlock) {
+    size_t size = aligned(isize);
+
+#ifdef HAS_PTHREAD
+    int lock = vm->processes > 0 && !outerlock;
+
+    if (lock) { // not message passing
+       pthread_mutex_lock(&vm->alloc_lock);
+    }
+#endif
+
+    if (vm->heap.next + size < vm->heap.end) {
+        STATS_ALLOC(vm->stats, size)
+        char* ptr = vm->heap.next;
+        vm->heap.next += size;
+        assert(vm->heap.next <= vm->heap.end);
+        ((Hdr*)ptr)->sz = isize;
+
+#ifdef HAS_PTHREAD
+        if (lock) { // not message passing
+           pthread_mutex_unlock(&vm->alloc_lock);
+        }
+#endif
+        return (void*)ptr;
+    } else {
+        // If we're trying to allocate something bigger than the heap,
+        // grow the heap here so that the new heap is big enough.
+        if (size > vm->heap.size) {
+            vm->heap.size += size;
+        }
+        idris_gc(vm);
+
+        // If there's still not enough room, grow the heap and try again
+        if (vm->heap.next + size >= vm->heap.end) {
+            vm->heap.size += size+vm->heap.growth;
+            idris_gc(vm);
+        }
+
+#ifdef HAS_PTHREAD
+        if (lock) { // not message passing
+           pthread_mutex_unlock(&vm->alloc_lock);
+        }
+#endif
+        return iallocate(vm, size, outerlock);
+    }
+
+}
+
+static String * allocStr(VM * vm, size_t len, int outer) {
+    String * cl = iallocate(vm, sizeof(*cl) + len + 1, outer);
+    SETTY(cl, CT_STRING);
+    cl->slen = len;
+    return cl;
+}
+
+static VAL mkfloat(VM* vm, double val, int outer) {
+    Float * cl = iallocate(vm, sizeof(*cl), outer);
+    SETTY(cl, CT_FLOAT);
+    cl->f = val;
+    return (VAL)cl;
+}
+
+VAL MKFLOAT(VM* vm, double val) {
+    return mkfloat(vm, val, 0);
+}
+
+VAL MKFLOATc(VM* vm, double val) {
+    return mkfloat(vm, val, 1);
+}
+
+static VAL mkstrlen(VM* vm, const char * str, size_t len, int outer) {
+    String * cl = allocStr(vm, len, outer);
+    // hdr.u8 used to mark a null string
+    cl->hdr.u8 = str == NULL;
+    if (!cl->hdr.u8)
+      memcpy(cl->str, str, len);
+    return (VAL)cl;
+}
+
+VAL MKSTRlen(VM* vm, const char * str, size_t len) {
+    return mkstrlen(vm, str, len, 0);
+}
+
+VAL MKSTRclen(VM* vm, char* str, size_t len) {
+    return mkstrlen(vm, str, len, 1);
+}
+
+VAL MKSTR(VM* vm, const char* str) {
+    return mkstrlen(vm, str, str? strlen(str) : 0, 0);
+}
+
+VAL MKSTRc(VM* vm, char* str) {
+    return mkstrlen(vm, str, strlen(str), 1);
+}
+
+static char * getstroff(StrOffset * stroff) {
+    return stroff->base->str + stroff->offset;
+}
+
+char* GETSTROFF(VAL stroff) {
+    // Assume STROFF
+    return getstroff((StrOffset*)stroff);
+}
+
+static size_t getstrofflen(StrOffset * stroff) {
+    return stroff->base->slen - stroff->offset;
+}
+
+size_t GETSTROFFLEN(VAL stroff) {
+    // Assume STROFF
+    // we're working in char* here so no worries about utf8 char length
+    return getstrofflen((StrOffset*)stroff);
+}
+
+static VAL mkcdata(VM * vm, CHeapItem * item, int outer) {
+    c_heap_insert_if_needed(vm, &vm->c_heap, item);
+    CDataC * cl = iallocate(vm, sizeof(*cl), outer);
+    SETTY(cl, CT_CDATA);
+    cl->item = item;
+    return (VAL)cl;
+}
+
+VAL MKCDATA(VM* vm, CHeapItem * item) {
+    return mkcdata(vm, item, 0);
+}
+
+VAL MKCDATAc(VM* vm, CHeapItem * item) {
+    return mkcdata(vm, item, 1);
+}
+
+static VAL mkptr(VM* vm, void* ptr, int outer) {
+    Ptr * cl = iallocate(vm, sizeof(*cl), outer);
+    SETTY(cl, CT_PTR);
+    cl->ptr = ptr;
+    return (VAL)cl;
+}
+
+VAL MKPTR(VM* vm, void* ptr) {
+    return mkptr(vm, ptr, 0);
+}
+
+VAL MKPTRc(VM* vm, void* ptr) {
+    return mkptr(vm, ptr, 1);
+}
+
+VAL mkmptr(VM* vm, void* ptr, size_t size, int outer) {
+    ManagedPtr * cl = iallocate(vm, sizeof(*cl) + size, outer);
+    SETTY(cl, CT_MANAGEDPTR);
+    memcpy(cl->mptr, ptr, size);
+    return (VAL)cl;
+}
+
+VAL MKMPTR(VM* vm, void* ptr, size_t size) {
+    return mkmptr(vm, ptr, size, 0);
+}
+
+VAL MKMPTRc(VM* vm, void* ptr, size_t size) {
+    return mkmptr(vm, ptr, size, 1);
+}
+
+VAL MKB8(VM* vm, uint8_t bits8) {
+    return MKINT(bits8);
+}
+
+VAL MKB16(VM* vm, uint16_t bits16) {
+    return MKINT(bits16);
+}
+
+VAL MKB32(VM* vm, uint32_t bits32) {
+    Bits32 * cl = iallocate(vm, sizeof(*cl), 1);
+    SETTY(cl, CT_BITS32);
+    cl->bits32 = bits32;
+    return (VAL)cl;
+}
+
+VAL MKB64(VM* vm, uint64_t bits64) {
+    Bits64 * cl = iallocate(vm, sizeof(*cl), 1);
+    SETTY(cl, CT_BITS64);
+    cl->bits64 = bits64;
+    return (VAL)cl;
+}
+
+void idris_trace(VM* vm, const char* func, int line) {
+    printf("At %s:%d\n", func, line);
+    dumpStack(vm);
+    puts("");
+    fflush(stdout);
+}
+
+void dumpStack(VM* vm) {
+    int i = 0;
+    VAL* root;
+
+    for (root = vm->valstack; root < vm->valstack_top; ++root, ++i) {
+        printf("%d: ", i);
+        dumpVal(*root);
+        if (*root >= (VAL)(vm->heap.heap) && *root < (VAL)(vm->heap.end)) { printf(" OK"); }
+        if (root == vm->valstack_base) { printf(" <--- base"); }
+        printf("\n");
+    }
+    printf("RET: ");
+    dumpVal(vm->ret);
+    printf("\n");
+}
+
+void dumpVal(VAL v) {
+    if (v==NULL) return;
+    int i;
+    switch(GETTY(v)) {
+    case CT_INT:
+        printf("%" PRIdPTR " ", GETINT(v));
+        break;
+    case CT_CON:
+        {
+            Con * cl = (Con*)v;
+            printf("%d[", (int)TAG(cl));
+            for(i = 0; i < CARITY(cl); ++i) {
+                dumpVal(cl->args[i]);
+            }
+            printf("] ");
+        }
+        break;
+    case CT_STRING:
+        {
+            String * cl = (String*)v;
+            printf("STR[%s]", cl->str);
+        }
+        break;
+    case CT_STROFFSET:
+        {
+            StrOffset * cl = (StrOffset*)v;
+            printf("OFFSET[");
+            dumpVal((VAL)cl->base);
+            printf("]");
+        }
+        break;
+    case CT_FWD:
+        {
+            Fwd * cl = (Fwd*)v;
+            printf("CT_FWD ");
+            dumpVal((VAL)cl->fwd);
+        }
+        break;
+    default:
+        printf("val");
+    }
+
+}
+
+void idris_memset(void* ptr, i_int offset, uint8_t c, i_int size) {
+    memset(((uint8_t*)ptr) + offset, c, size);
+}
+
+uint8_t idris_peek(void* ptr, i_int offset) {
+    return *(((uint8_t*)ptr) + offset);
+}
+
+void idris_poke(void* ptr, i_int offset, uint8_t data) {
+    *(((uint8_t*)ptr) + offset) = data;
+}
+
+
+VAL idris_peekPtr(VM* vm, VAL ptr, VAL offset) {
+    void** addr = (void **)(((char *)GETPTR(ptr)) + GETINT(offset));
+    return MKPTR(vm, *addr);
+}
+
+VAL idris_pokePtr(VAL ptr, VAL offset, VAL data) {
+    void** addr = (void **)((char *)GETPTR(ptr) + GETINT(offset));
+    *addr = GETPTR(data);
+    return MKINT(0);
+}
+
+VAL idris_peekDouble(VM* vm, VAL ptr, VAL offset) {
+    return MKFLOAT(vm, *(double*)((char *)GETPTR(ptr) + GETINT(offset)));
+}
+
+VAL idris_pokeDouble(VAL ptr, VAL offset, VAL data) {
+    *(double*)((char *)GETPTR(ptr) + GETINT(offset)) = GETFLOAT(data);
+    return MKINT(0);
+}
+
+VAL idris_peekSingle(VM* vm, VAL ptr, VAL offset) {
+    return MKFLOAT(vm, *(float*)((char *)GETPTR(ptr) + GETINT(offset)));
+}
+
+VAL idris_pokeSingle(VAL ptr, VAL offset, VAL data) {
+    *(float*)((char *)GETPTR(ptr) + GETINT(offset)) = GETFLOAT(data);
+    return MKINT(0);
+}
+
+void idris_memmove(void* dest, void* src, i_int dest_offset, i_int src_offset, i_int size) {
+    memmove((char *)dest + dest_offset, (char *)src + src_offset, size);
+}
+
+VAL idris_castIntStr(VM* vm, VAL i) {
+    int x = (int) GETINT(i);
+    String * cl = allocStr(vm, 16, 0);
+    cl->slen = sprintf(cl->str, "%d", x);
+    return (VAL)cl;
+}
+
+VAL idris_castBitsStr(VM* vm, VAL i) {
+    String * cl;
+    ClosureType ty = GETTY(i);
+
+    switch (ty) {
+    case CT_INT: // 8/16 bits
+        // max length 16 bit unsigned int str 5 chars (65,535)
+        cl = allocStr(vm, 6, 0);
+        cl->slen = sprintf(cl->str, "%" PRIu16, (uint16_t)GETBITS16(i));
+        break;
+    case CT_BITS32:
+        // max length 32 bit unsigned int str 10 chars (4,294,967,295)
+        cl = allocStr(vm, 11, 0);
+        cl->slen = sprintf(cl->str, "%" PRIu32, GETBITS32(i));
+        break;
+    case CT_BITS64:
+        // max length 64 bit unsigned int str 20 chars (18,446,744,073,709,551,615)
+        cl = allocStr(vm, 21, 0);
+        cl->slen = sprintf(cl->str, "%" PRIu64, GETBITS64(i));
+        break;
+    default:
+        fprintf(stderr, "Fatal Error: ClosureType %d, not an integer type", ty);
+        exit(EXIT_FAILURE);
+    }
+    return (VAL)cl;
+}
+
+VAL idris_castStrInt(VM* vm, VAL i) {
+    char *end;
+    i_int v = strtol(GETSTR(i), &end, 10);
+    if (*end == '\0' || *end == '\n' || *end == '\r')
+        return MKINT(v);
+    else
+        return MKINT(0);
+}
+
+VAL idris_castFloatStr(VM* vm, VAL i) {
+    String * cl = allocStr(vm, 32, 0);
+    cl->slen = snprintf(cl->str, 32, "%.16g", GETFLOAT(i));
+    return (VAL)cl;
+}
+
+VAL idris_castStrFloat(VM* vm, VAL i) {
+    return MKFLOAT(vm, strtod(GETSTR(i), NULL));
+}
+
+VAL idris_concat(VM* vm, VAL l, VAL r) {
+    char *rs = GETSTR(r);
+    char *ls = GETSTR(l);
+    size_t llen = GETSTRLEN(l);
+    size_t rlen = GETSTRLEN(r);
+
+    String * cl = allocStr(vm, llen + rlen, 0);
+    memcpy(cl->str, ls, llen);
+    memcpy(cl->str + llen, rs, rlen);
+    return (VAL)cl;
+}
+
+VAL idris_strlt(VM* vm, VAL l, VAL r) {
+    char *ls = GETSTR(l);
+    char *rs = GETSTR(r);
+
+    return MKINT((i_int)(strcmp(ls, rs) < 0));
+}
+
+VAL idris_streq(VM* vm, VAL l, VAL r) {
+    char *ls = GETSTR(l);
+    char *rs = GETSTR(r);
+
+    return MKINT((i_int)(strcmp(ls, rs) == 0));
+}
+
+VAL idris_strlen(VM* vm, VAL l) {
+    return MKINT((i_int)(idris_utf8_strlen(GETSTR(l))));
+}
+
+VAL idris_readStr(VM* vm, FILE* h) {
+    VAL ret;
+    char *buffer = NULL;
+    size_t n = 0;
+    ssize_t len;
+    len = getline(&buffer, &n, h);
+    if (len <= 0) {
+        ret = MKSTR(vm, "");
+    } else {
+        ret = MKSTR(vm, buffer);
+    }
+    free(buffer);
+    return ret;
+}
+
+VAL idris_readChars(VM* vm, int num, FILE* h) {
+    VAL ret;
+    char *buffer = malloc((num+1)*sizeof(char));
+    size_t len;
+    len = fread(buffer, sizeof(char), (size_t)num, h);
+    buffer[len] = '\0';
+
+    if (len <= 0) {
+        ret = MKSTR(vm, "");
+    } else {
+        ret = MKSTR(vm, buffer);
+    }
+    free(buffer);
+    return ret;
+}
+
+void idris_crash(char* msg) {
+    fprintf(stderr, "%s\n", msg);
+    exit(1);
+}
+
+VAL idris_strHead(VM* vm, VAL str) {
+    return idris_strIndex(vm, str, 0);
+}
+
+VAL MKSTROFFc(VM* vm, VAL basestr) {
+    StrOffset * cl = iallocate(vm, sizeof(*cl), 1);
+    SETTY(cl, CT_STROFFSET);
+    cl->base = (String*)basestr;
+    return (VAL)cl;
+}
+
+VAL idris_strShift(VM* vm, VAL str, int num) {
+    size_t sz = sizeof(StrOffset);
+    // If there's no room, just copy the string, or we'll have a problem after
+    // gc moves str
+    if (space(vm, sz)) {
+        int offset = 0;
+        StrOffset * root = (StrOffset*)str;
+        StrOffset * cl = iallocate(vm, sz, 0);
+        SETTY(cl, CT_STROFFSET);
+
+        while(root!=NULL && !ISSTR(root)) { // find the root, carry on.
+                              // In theory, at most one step here!
+            offset += root->offset;
+            root = (StrOffset*)root->base;
+        }
+
+        cl->base = (String*)root;
+        cl->offset = offset+idris_utf8_findOffset(GETSTR(str), num);
+        return (VAL)cl;
+    } else {
+        char* nstr = GETSTR(str);
+        return MKSTR(vm, nstr+idris_utf8_charlen(nstr));
+    }
+}
+
+VAL idris_strTail(VM* vm, VAL str) {
+    return idris_strShift(vm, str, 1);
+}
+
+VAL idris_strCons(VM* vm, VAL x, VAL xs) {
+    char *xstr = GETSTR(xs);
+    int xval = GETINT(x);
+    size_t xlen = GETSTRLEN(xs);
+    String * cl;
+
+    if (xval < 0x80) { // ASCII char
+      cl = allocStr(vm, xlen + 1, 0);
+        cl->str[0] = (char)(GETINT(x));
+        memcpy(cl->str+1, xstr, xlen);
+    } else {
+        char *init = idris_utf8_fromChar(xval);
+        size_t ilen = strlen(init);
+        int newlen = ilen + xlen;
+        cl = allocStr(vm, newlen, 0);
+        memcpy(cl->str, init, ilen);
+        memcpy(cl->str + ilen, xstr, xlen);
+        free(init);
+    }
+    return (VAL)cl;
+}
+
+VAL idris_strIndex(VM* vm, VAL str, VAL i) {
+    int idx = idris_utf8_index(GETSTR(str), GETINT(i));
+    return MKINT((i_int)idx);
+}
+
+VAL idris_substr(VM* vm, VAL offset, VAL length, VAL str) {
+    size_t offset_val = GETINT(offset);
+    size_t length_val = GETINT(length);
+    char* str_val = GETSTR(str);
+
+    // If the substring is a suffix, use idris_strShift to avoid reallocating
+    if (offset_val + length_val >= GETSTRLEN(str)) {
+        return idris_strShift(vm, str, offset_val);
+    }
+    else {
+        char *start = idris_utf8_advance(str_val, offset_val);
+        char *end = idris_utf8_advance(start, length_val);
+        size_t sz = end - start;
+
+        if (space(vm, sz)) {
+            String * newstr = allocStr(vm, sz, 0);
+            memcpy(newstr->str, start, sz);
+            newstr->str[sz] = '\0';
+            return (VAL)newstr;
+        } else {
+            // Need to copy into an intermediate string before allocating,
+            // because if there's no enough space then allocating will move the
+            // original string!
+            char* cpystr = malloc(sz);
+            memcpy(cpystr, start, sz);
+
+            String * newstr = allocStr(vm, sz, 0);
+            memcpy(newstr->str, cpystr, sz);
+            newstr->str[sz] = '\0';
+            free(cpystr);
+            return (VAL)newstr;
+        }
+    }
+}
+
+VAL idris_strRev(VM* vm, VAL str) {
+    char *xstr = GETSTR(str);
+    size_t xlen = GETSTRLEN(str);
+
+    String * cl = allocStr(vm, xlen, 0);
+    idris_utf8_rev(xstr, cl->str);
+    return (VAL)cl;
+}
+
+VAL idris_newRefLock(VAL x, int outerlock) {
+    Ref * cl = allocate(sizeof(*cl), outerlock);
+    SETTY(cl, CT_REF);
+    cl->ref = x;
+    return (VAL)cl;
+}
+
+VAL idris_newRef(VAL x) {
+    return idris_newRefLock(x, 0);
+}
+
+void idris_writeRef(VAL ref, VAL x) {
+    Ref * r = (Ref*)ref;
+    r->ref = x;
+    SETTY(ref, CT_REF);
+}
+
+VAL idris_readRef(VAL ref) {
+    Ref * r = (Ref*)ref;
+    return r->ref;
+}
+
+VAL idris_newArray(VM* vm, int size, VAL def) {
+    Array * cl;
+    int i;
+    cl = allocArrayF(vm, size, 0);
+    for(i=0; i<size; ++i) {
+        cl->array[i] = def;
+    }
+    return (VAL)cl;
+}
+
+void idris_arraySet(VAL arr, int index, VAL newval) {
+    Array * cl = (Array*)arr;
+    cl->array[index] = newval;
+}
+
+VAL idris_arrayGet(VAL arr, int index) {
+    Array * cl = (Array*)arr;
+    return cl->array[index];
+}
+
+VAL idris_systemInfo(VM* vm, VAL index) {
+    int i = GETINT(index);
+    switch(i) {
+        case 0: // backend
+            return MKSTR(vm, "c");
+        case 1:
+            return MKSTR(vm, IDRIS_TARGET_OS);
+        case 2:
+            return MKSTR(vm, IDRIS_TARGET_TRIPLE);
+    }
+    return MKSTR(vm, "");
+}
+
+#ifdef HAS_PTHREAD
+typedef struct {
+    VM* vm; // thread's VM
+    func fn;
+    VAL arg;
+} ThreadData;
+
+void* runThread(void* arg) {
+    ThreadData* td = (ThreadData*)arg;
+    VM* vm = td->vm;
+    func fn = td->fn;
+
+    init_threaddata(vm);
+
+    TOP(0) = td->arg;
+    BASETOP(0);
+    ADDTOP(1);
+    free(td);
+    fn(vm, NULL);
+
+    //    Stats stats =
+    terminate(vm);
+    //    aggregate_stats(&(td->vm->stats), &stats);
+    return NULL;
+}
+
+void* vmThread(VM* callvm, func f, VAL arg) {
+    VM* vm = init_vm(callvm->stack_max - callvm->valstack, callvm->heap.size,
+                     callvm->max_threads);
+    vm->processes=1; // since it can send and receive messages
+    vm->creator = callvm;
+    pthread_t t;
+    pthread_attr_t attr;
+//    size_t stacksize;
+
+    pthread_attr_init(&attr);
+//    pthread_attr_getstacksize (&attr, &stacksize);
+//    pthread_attr_setstacksize (&attr, stacksize*64);
+
+    ThreadData *td = malloc(sizeof(ThreadData)); // free'd in runThread
+    td->vm = vm;
+    td->fn = f;
+    td->arg = copyTo(vm, arg);
+
+    callvm->processes++;
+
+    int ok = pthread_create(&t, &attr, runThread, td);
+    pthread_attr_destroy(&attr);
+//    usleep(100);
+    if (ok == 0) {
+        return vm;
+    } else {
+        terminate(vm);
+        return NULL;
+    }
+}
+
+void* idris_stopThread(VM* vm) {
+    terminate(vm);
+    pthread_exit(NULL);
+    return NULL;
+}
+
+static VAL doCopyTo(VM* vm, VAL x);
+
+static void copyArray(VM* vm, VAL * dst, VAL * src, size_t len) {
+    size_t i;
+    for(i = 0; i < len; ++i)
+      dst[i] = doCopyTo(vm, src[i]);
+}
+
+
+// VM is assumed to be a different vm from the one x lives on
+
+static VAL doCopyTo(VM* vm, VAL x) {
+    int ar;
+    VAL cl;
+    if (x==NULL) {
+        return x;
+    }
+    switch(GETTY(x)) {
+    case CT_INT:
+        return x;
+    case CT_CDATA:
+        cl = MKCDATAc(vm, GETCDATA(x));
+        break;
+    case CT_BIGINT:
+        cl = MKBIGMc(vm, GETMPZ(x));
+        break;
+    case CT_CON:
+        ar = CARITY(x);
+        if (ar == 0 && CTAG(x) < 256) { // globally allocated
+            cl = x;
+        } else {
+            Con * c = allocConF(vm, CTAG(x), ar, 1);
+            copyArray(vm, c->args, ((Con*)x)->args, ar);
+            cl = (VAL)c;
+        }
+        break;
+    case CT_ARRAY: {
+        size_t len = CELEM(x);
+        Array * a = allocArrayF(vm, len, 1);
+        copyArray(vm, a->array, ((Array*)x)->array, len);
+        cl = (VAL)a;
+    } break;
+    case CT_STRING:
+    case CT_FLOAT:
+    case CT_PTR:
+    case CT_MANAGEDPTR:
+    case CT_BITS32:
+    case CT_BITS64:
+    case CT_RAWDATA:
+        {
+            cl = iallocate(vm, x->hdr.sz, 0);
+            memcpy(cl, x, x->hdr.sz);
+        }
+        break;
+    default:
+        assert(0); // We're in trouble if this happens...
+	cl = NULL;
+    }
+    return cl;
+}
+
+VAL copyTo(VM* vm, VAL x) {
+    VAL ret = doCopyTo(vm, x);
+    return ret;
+}
+
+// Add a message to another VM's message queue
+int idris_sendMessage(VM* sender, int channel_id, VM* dest, VAL msg) {
+    // FIXME: If GC kicks in in the middle of the copy, we're in trouble.
+    // Probably best check there is enough room in advance. (How?)
+
+    // Also a problem if we're allocating at the same time as the
+    // destination thread (which is very likely)
+    // Should the inbox be a different memory space?
+
+    // So: we try to copy, if a collection happens, we do the copy again
+    // under the assumption there's enough space this time.
+
+    if (dest->active == 0) { return 0; } // No VM to send to
+
+    int gcs = dest->stats.collections;
+    pthread_mutex_lock(&dest->alloc_lock);
+    VAL dmsg = copyTo(dest, msg);
+    pthread_mutex_unlock(&dest->alloc_lock);
+
+    if (dest->stats.collections > gcs) {
+        // a collection will have invalidated the copy
+        pthread_mutex_lock(&dest->alloc_lock);
+        dmsg = copyTo(dest, msg); // try again now there's room...
+        pthread_mutex_unlock(&dest->alloc_lock);
+    }
+
+    pthread_mutex_lock(&(dest->inbox_lock));
+
+    if (dest->inbox_write >= dest->inbox_end) {
+        // FIXME: This is obviously bad in the long run. This should
+        // either block, make the inbox bigger, or return an error code,
+        // or possibly make it user configurable
+        fprintf(stderr, "Inbox full");
+        exit(-1);
+    }
+
+    dest->inbox_write->msg = dmsg;
+    if (channel_id == 0) {
+        // Set lowest bit to indicate this message is initiating a channel
+        channel_id = 1 + ((dest->inbox_nextid++) << 1);
+    } else {
+        channel_id = channel_id << 1;
+    }
+    dest->inbox_write->channel_id = channel_id;
+
+    dest->inbox_write->sender = sender;
+    dest->inbox_write++;
+
+    // Wake up the other thread
+    pthread_mutex_lock(&dest->inbox_block);
+    pthread_cond_signal(&dest->inbox_waiting);
+    pthread_mutex_unlock(&dest->inbox_block);
+
+//    printf("Sending [signalled]...\n");
+
+    pthread_mutex_unlock(&(dest->inbox_lock));
+//    printf("Sending [unlock]...\n");
+    return channel_id >> 1;
+}
+
+VM* idris_checkMessages(VM* vm) {
+    return idris_checkMessagesFrom(vm, 0, NULL);
+}
+
+Msg* idris_checkInitMessages(VM* vm) {
+    Msg* msg;
+
+    for (msg = vm->inbox; msg < vm->inbox_end && msg->msg != NULL; ++msg) {
+	if ((msg->channel_id & 1) == 1) { // init bit set
+            return msg;
+        }
+    }
+    return 0;
+}
+
+VM* idris_checkMessagesFrom(VM* vm, int channel_id, VM* sender) {
+    Msg* msg;
+
+    for (msg = vm->inbox; msg < vm->inbox_end && msg->msg != NULL; ++msg) {
+        if (sender == NULL || msg->sender == sender) {
+            if (channel_id == 0 || channel_id == msg->channel_id >> 1) {
+                return msg->sender;
+            }
+        }
+    }
+    return 0;
+}
+
+VM* idris_checkMessagesTimeout(VM* vm, int delay) {
+    VM* sender = idris_checkMessagesFrom(vm, 0, NULL);
+    if (sender != NULL) {
+        return sender;
+    }
+
+    struct timespec timeout;
+    int status;
+
+    // Wait either for a timeout or until we get a signal that a message
+    // has arrived.
+    pthread_mutex_lock(&vm->inbox_block);
+    timeout.tv_sec = time (NULL) + delay;
+    timeout.tv_nsec = 0;
+    status = pthread_cond_timedwait(&vm->inbox_waiting, &vm->inbox_block,
+                               &timeout);
+    (void)(status); //don't emit 'unused' warning
+    pthread_mutex_unlock(&vm->inbox_block);
+
+    return idris_checkMessagesFrom(vm, 0, NULL);
+}
+
+
+Msg* idris_getMessageFrom(VM* vm, int channel_id, VM* sender) {
+    Msg* msg;
+
+    for (msg = vm->inbox; msg < vm->inbox_write; ++msg) {
+        if (sender == NULL || msg->sender == sender) {
+            if (channel_id == 0 || channel_id == msg->channel_id >> 1) {
+                return msg;
+            }
+        }
+    }
+    return NULL;
+}
+
+// block until there is a message in the queue
+Msg* idris_recvMessage(VM* vm) {
+    return idris_recvMessageFrom(vm, 0, NULL);
+}
+
+Msg* idris_recvMessageFrom(VM* vm, int channel_id, VM* sender) {
+    Msg* msg;
+    Msg* ret;
+
+    struct timespec timeout;
+    int status;
+
+    if (sender && sender->active == 0) { return NULL; } // No VM to receive from
+
+    pthread_mutex_lock(&vm->inbox_block);
+    msg = idris_getMessageFrom(vm, channel_id, sender);
+
+    while (msg == NULL) {
+//        printf("No message yet\n");
+//        printf("Waiting [lock]...\n");
+        timeout.tv_sec = time (NULL) + 3;
+        timeout.tv_nsec = 0;
+        status = pthread_cond_timedwait(&vm->inbox_waiting, &vm->inbox_block,
+                               &timeout);
+        (void)(status); //don't emit 'unused' warning
+//        printf("Waiting [unlock]... %d\n", status);
+        msg = idris_getMessageFrom(vm, channel_id, sender);
+    }
+    pthread_mutex_unlock(&vm->inbox_block);
+
+    if (msg != NULL) {
+        ret = malloc(sizeof(*ret));
+        ret->msg = msg->msg;
+        ret->sender = msg->sender;
+
+        pthread_mutex_lock(&(vm->inbox_lock));
+
+        // Slide everything down after the message in the inbox,
+        // Move the inbox_write pointer down, and clear the value at the
+        // end - O(n) but it's easier since the message from a specific
+        // sender could be anywhere in the inbox
+
+        for(;msg < vm->inbox_write; ++msg) {
+            if (msg+1 != vm->inbox_end) {
+                msg->sender = (msg + 1)->sender;
+                msg->msg = (msg + 1)->msg;
+            }
+        }
+
+        vm->inbox_write->msg = NULL;
+        vm->inbox_write->sender = NULL;
+        vm->inbox_write--;
+
+        pthread_mutex_unlock(&(vm->inbox_lock));
+    } else {
+        fprintf(stderr, "No messages waiting");
+        exit(-1);
+    }
+    return ret;
+}
+#endif
+
+VAL idris_getMsg(Msg* msg) {
+    return msg->msg;
+}
+
+VM* idris_getSender(Msg* msg) {
+    return msg->sender;
+}
+
+int idris_getChannel(Msg* msg) {
+    return msg->channel_id >> 1;
+}
+
+void idris_freeMsg(Msg* msg) {
+    free(msg);
+}
+
+int idris_errno(void) {
+    return errno;
+}
+
+char* idris_showerror(int err) {
+    return strerror(err);
+}
+
+Con nullary_cons[256];
+
+void init_nullaries(void) {
+    int i;
+    for(i = 0; i < 256; ++i) {
+        Con * cl = nullary_cons + i;
+        cl->hdr.sz = sizeof(*cl);
+        SETTY(cl, CT_CON);
+        cl->tag = i;
+    }
+}
+
+int __idris_argc;
+char **__idris_argv;
+
+int idris_numArgs(void) {
+    return __idris_argc;
+}
+
+const char* idris_getArg(int i) {
+    return __idris_argv[i];
+}
+
+void idris_disableBuffering(void) {
+  setvbuf(stdin, NULL, _IONBF, 0);
+  setvbuf(stdout, NULL, _IONBF, 0);
+}
+
+#ifndef SEL4
+int idris_usleep(int usec) {
+    struct timespec t;
+    t.tv_sec = usec / 1000000;
+    t.tv_nsec = (usec % 1000000) * 1000;
+
+    return nanosleep(&t, NULL);
+}
+#endif // SEL4
+
+void stackOverflow(void) {
+  fprintf(stderr, "Stack overflow");
+  exit(-1);
+}
diff --git a/dist/rts/idris_rts.h b/dist/rts/idris_rts.h
new file mode 100644
index 0000000..3fb099d
--- /dev/null
+++ b/dist/rts/idris_rts.h
@@ -0,0 +1,546 @@
+#ifndef _IDRISRTS_H
+#define _IDRISRTS_H
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#ifdef HAS_PTHREAD
+#include <stdarg.h>
+#include <pthread.h>
+#endif
+
+#include "idris_heap.h"
+#include "idris_stats.h"
+
+
+#ifndef EXIT_SUCCESS
+#define EXIT_SUCCESS 0
+#endif
+#ifndef EXIT_FAILURE
+#define EXIT_FAILURE 1
+#endif
+
+// Closures
+typedef enum {
+    CT_CON, CT_ARRAY, CT_INT, CT_BIGINT,
+    CT_FLOAT, CT_STRING, CT_STROFFSET, CT_BITS32,
+    CT_BITS64, CT_PTR, CT_REF, CT_FWD,
+    CT_MANAGEDPTR, CT_RAWDATA, CT_CDATA
+} ClosureType;
+
+typedef struct Hdr {
+    uint8_t ty;
+    uint8_t u8;
+    uint16_t u16;
+    uint32_t sz;
+} Hdr;
+
+typedef struct Val {
+    Hdr hdr;
+} Val;
+
+typedef struct Val * VAL;
+
+typedef struct Con {
+    Hdr hdr;
+    uint32_t tag;
+    VAL args[0];
+} Con;
+
+typedef struct Array {
+    Hdr hdr;
+    VAL array[0];
+} Array;
+
+typedef struct BigInt {
+    Hdr hdr;
+    char big[0];
+} BigInt;
+
+typedef struct Float {
+    Hdr hdr;
+    double f;
+} Float;
+
+typedef struct String {
+    Hdr hdr;
+    size_t slen;
+    char str[0];
+} String;
+
+typedef struct StrOffset {
+    Hdr hdr;
+    String * base;
+    size_t offset;
+} StrOffset;
+
+typedef struct Bits32 {
+    Hdr hdr;
+    uint32_t bits32;
+} Bits32;
+
+typedef struct Bits64 {
+    Hdr hdr;
+    uint64_t bits64;
+} Bits64;
+
+typedef struct Ptr {
+    Hdr hdr;
+    void * ptr;
+} Ptr;
+
+typedef struct Ref {
+    Hdr hdr;
+    VAL ref;
+} Ref;
+
+typedef struct Fwd {
+    Hdr hdr;
+    VAL fwd;
+} Fwd;
+
+typedef struct ManagedPtr {
+    Hdr hdr;
+    char mptr[0];
+} ManagedPtr;
+
+typedef struct RawData {
+    Hdr hdr;
+    char raw[0];
+} RawData;
+
+typedef struct CDataC {
+    Hdr hdr;
+    CHeapItem * item;
+} CDataC;
+
+struct VM;
+
+struct Msg_t {
+    struct VM* sender;
+    // An identifier to say which conversation this message is part of.
+    // Lowest bit is set if the id is the first message in a conversation.
+    int channel_id;
+    VAL msg;
+};
+
+typedef struct Msg_t Msg;
+
+struct VM {
+    int active; // 0 if no longer running; keep for message passing
+                // TODO: If we're going to have lots of concurrent threads,
+                // we really need to be cleverer than this!
+
+    VAL* valstack;
+    VAL* valstack_top;
+    VAL* valstack_base;
+    VAL* stack_max;
+
+    CHeap c_heap;
+    Heap heap;
+#ifdef HAS_PTHREAD
+    pthread_mutex_t inbox_lock;
+    pthread_mutex_t inbox_block;
+    pthread_mutex_t alloc_lock;
+    pthread_cond_t inbox_waiting;
+
+    Msg* inbox; // Block of memory for storing messages
+    Msg* inbox_end; // End of block of memory
+    int inbox_nextid; // Next channel id
+    Msg* inbox_write; // Location of next message to write
+
+    int processes; // Number of child processes
+    int max_threads; // maximum number of threads to run in parallel
+    struct VM* creator; // The VM that created this VM, NULL for root VM
+#endif
+    Stats stats;
+
+    VAL ret;
+    VAL reg1;
+};
+
+typedef struct VM VM;
+
+
+/* C data interface: allocation on the C heap.
+ *
+ * Although not enforced in code, CData is meant to be opaque
+ * and non-RTS code (such as libraries or C bindings) should
+ * access only its (void *) field called "data".
+ *
+ * Feel free to mutate cd->data; the heap does not care
+ * about its particular value. However, keep in mind
+ * that it must not break Idris's referential transparency.
+ *
+ * If you call cdata_allocate or cdata_manage, the resulting
+ * CData object *must* be returned from your FFI function so
+ * that it is inserted in the C heap. Otherwise the memory
+ * will be leaked.
+ */
+
+/// C data block. Contains (void * data).
+typedef CHeapItem * CData;
+
+/// Allocate memory, returning the corresponding C data block.
+CData cdata_allocate(size_t size, CDataFinalizer * finalizer);
+
+/// Wrap a pointer as a C data block.
+/// The size should be an estimate of how much memory, in bytes,
+/// is associated with the pointer. This estimate need not be absolutely precise
+/// but it is necessary for GC to work effectively.
+CData cdata_manage(void * data, size_t size, CDataFinalizer * finalizer);
+
+
+// Create a new VM
+VM* init_vm(int stack_size, size_t heap_size,
+            int max_threads);
+
+// Get the VM for the current thread
+VM* get_vm(void);
+// Initialise thread-local data for this VM
+void init_threaddata(VM *vm);
+// Clean up a VM once it's no longer needed
+Stats terminate(VM* vm);
+
+// Create a new VM, set up everything with sensible defaults (use when
+// calling Idris from C)
+VM* idris_vm(void);
+void close_vm(VM* vm);
+
+// Set up key for thread-local data - called once from idris_main
+void init_threadkeys(void);
+
+// Functions all take a pointer to their VM, and previous stack base,
+// and return nothing.
+typedef void*(*func)(VM*, VAL*);
+
+// Register access
+
+#define RVAL (vm->ret)
+#define LOC(x) (vm->valstack_base[x])
+#define TOP(x) (vm->valstack_top[x])
+#define REG1 (vm->reg1)
+
+// Retrieving values
+static inline char * getstr(String * x) {
+    // hdr.u8 used to mark a null string
+    return x->hdr.u8? NULL : x->str;
+}
+
+static inline size_t getstrlen(String * x) {
+    return x->slen;
+}
+
+#define GETSTR(x) (ISSTR(x) ? getstr((String*)(x)) : GETSTROFF(x))
+#define GETSTRLEN(x) (ISSTR(x) ? getstrlen((String*)(x)) : GETSTROFFLEN(x))
+#define GETPTR(x) (((Ptr*)(x))->ptr)
+#define GETMPTR(x) (((ManagedPtr*)(x))->mptr)
+#define GETFLOAT(x) (((Float*)(x))->f)
+#define GETCDATA(x) (((CDataC*)(x))->item)
+
+#define GETBITS8(x) (GETINT(x))
+#define GETBITS16(x) (GETINT(x))
+#define GETBITS32(x) (((Bits32*)(x))->bits32)
+#define GETBITS64(x) (((Bits64*)(x))->bits64)
+
+// Already checked it's a CT_CON
+#define CTAG(x) (((Con*)(x))->tag)
+#define CARITY(x) (((Con*)(x))->hdr.u16) // hdr.u16 used to store arity
+
+#define TAG(x) (ISINT(x) || x == NULL ? (-1) : ( GETTY(x) == CT_CON ? CTAG((Con*)x) : (-1)) )
+#define ARITY(x) (ISINT(x) || x == NULL ? (-1) : ( GETTY(x) == CT_CON ? CARITY((Con*)x) : (-1)) )
+
+#define CELEM(x) (((x)->hdr.sz - sizeof(Array)) / sizeof(VAL))
+
+#define GETTY(x) (ISINT(x)? CT_INT : (ClosureType)((x)->hdr.ty))
+#define SETTY(x,t) ((x)->hdr.ty = t)
+
+// Integers, floats and operators
+
+typedef intptr_t i_int;
+
+// Shifting a negative number left is undefined and (rightly) gives a warning,
+// but we're only interested in shifting the bit pattern, so cast it
+#define MKINT(x) ((void*)((i_int)((((uintptr_t)x)<<1)+1)))
+#define GETINT(x) ((i_int)(x)>>1)
+#define ISINT(x) ((((i_int)x)&1) == 1)
+#define ISSTR(x) (GETTY(x) == CT_STRING)
+
+#define INTOP(op,x,y) MKINT((i_int)((((i_int)x)>>1) op (((i_int)y)>>1)))
+#define UINTOP(op,x,y) MKINT((i_int)((((uintptr_t)x)>>1) op (((uintptr_t)y)>>1)))
+#define FLOATOP(op,x,y) MKFLOAT(vm, ((GETFLOAT(x)) op (GETFLOAT(y))))
+#define FLOATBOP(op,x,y) MKINT((i_int)(((GETFLOAT(x)) op (GETFLOAT(y)))))
+#define ADD(x,y) (void*)(((i_int)x)+(((i_int)y)-1))
+#define MULT(x,y) (MKINT((((i_int)x)>>1) * (((i_int)y)>>1)))
+
+// Stack management
+
+#ifdef IDRIS_TRACE
+#define TRACE idris_trace(vm, __FUNCTION__, __LINE__);
+#else
+#define TRACE
+#endif
+
+#define INITFRAME TRACE\
+                  __attribute__((unused)) VAL* myoldbase;\
+                  void* callres
+
+#define REBASE vm->valstack_base = oldbase; return NULL
+#define RESERVE(x) do { \
+    if (vm->valstack_top+(x) > vm->stack_max) { stackOverflow(); } \
+    else { memset(vm->valstack_top, 0, (x)*sizeof(VAL)); } \
+  } while(0)
+#define RESERVENOALLOC(x) do { \
+    if (vm->valstack_top+(x) > vm->stack_max) { stackOverflow(); } \
+  } while(0)
+#define ADDTOP(x) vm->valstack_top += (x)
+#define TOPBASE(x) vm->valstack_top = vm->valstack_base + (x)
+#define BASETOP(x) vm->valstack_base = vm->valstack_top + (x)
+#define STOREOLD myoldbase = vm->valstack_base
+
+#define CALL(f) callres = f(vm, myoldbase); \
+  while(callres!=NULL) { \
+      callres = ((func)(callres))(vm, myoldbase); \
+  }
+#define TAILCALL(f) return (void*)(f);
+
+// Creating new values (each value placed at the top of the stack)
+VAL MKFLOAT(VM* vm, double val);
+VAL MKSTR(VM* vm, const char* str);
+VAL MKSTRlen(VM* vm, const char* str, size_t size);
+VAL MKPTR(VM* vm, void* ptr);
+VAL MKMPTR(VM* vm, void* ptr, size_t size);
+VAL MKB8(VM* vm, uint8_t b);
+VAL MKB16(VM* vm, uint16_t b);
+VAL MKB32(VM* vm, uint32_t b);
+VAL MKB64(VM* vm, uint64_t b);
+VAL MKCDATA(VM* vm, CHeapItem * item);
+
+// following versions don't take a lock when allocating
+VAL MKFLOATc(VM* vm, double val);
+VAL MKSTROFFc(VM* vm, VAL basestr);
+VAL MKSTRc(VM* vm, char* str);
+VAL MKSTRclen(VM* vm, char* str, size_t len);
+VAL MKPTRc(VM* vm, void* ptr);
+VAL MKMPTRc(VM* vm, void* ptr, size_t size);
+VAL MKCDATAc(VM* vm, CHeapItem * item);
+
+char* GETSTROFF(VAL stroff);
+size_t GETSTROFFLEN(VAL stroff);
+
+#define SETARG(x, i, a) (((Con*)(x))->args)[i] = ((VAL)(a))
+#define GETARG(x, i) (((Con*)(x))->args[i])
+
+#define PROJECT(vm,r,loc,num) \
+    memcpy(&(LOC(loc)), ((Con*)(r))->args, sizeof(VAL)*num)
+#define SLIDE(vm, args) \
+    memmove(&(LOC(0)), &(TOP(0)), sizeof(VAL)*args)
+
+void* iallocate(VM *, size_t, int);
+
+void* allocate(size_t size, int outerlock);
+// void* allocCon(VM* vm, int arity, int outerlock);
+
+// When allocating from C, call 'idris_requireAlloc' with a size to
+// guarantee that no garbage collection will happen (and hence nothing
+// will move) until at least size bytes have been allocated.
+// idris_doneAlloc *must* be called when allocation from C is done (as it
+// may take a lock if other threads are running).
+
+void idris_requireAlloc(VM *, size_t size);
+void idris_doneAlloc(VM *);
+
+// public interface to allocation (note that this may move other pointers
+// if allocating beyond the limits given by idris_requireAlloc!)
+// 'realloc' just calls alloc and copies; 'free' does nothing
+void* idris_alloc(size_t size);
+void* idris_realloc(void* old, size_t old_size, size_t size);
+void idris_free(void* ptr, size_t size);
+
+static inline void updateConF(Con * cl, unsigned tag, unsigned arity) {
+    SETTY(cl, CT_CON);
+    cl->tag = tag;
+    // hdr.u16 used to store arity
+    cl->hdr.u16 = arity;
+    assert(cl->hdr.sz == sizeof(*cl) + sizeof(VAL) * arity);
+    // cl->hdr.sz = sizeof(*cl) + sizeof(VAL) * arity;
+}
+
+static inline Con * allocConF(VM * vm, unsigned tag, unsigned arity, int outer) {
+    Con * cl = iallocate(vm, sizeof(*cl) + sizeof(VAL) * arity, outer);
+    SETTY(cl, CT_CON);
+    cl->tag = tag;
+    // hdr.u16 used to store arity
+    cl->hdr.u16 = arity;
+    return cl;
+}
+
+static inline Array * allocArrayF(VM * vm, size_t len, int outer) {
+    Array * cl = iallocate(vm, sizeof(*cl) + sizeof(VAL) * len, outer);
+    SETTY(cl, CT_ARRAY);
+    return cl;
+}
+
+
+#define allocCon(cl, vm, t, a, o) (cl) = (VAL)allocConF(vm, t, a, o)
+
+#define updateCon(cl, old, tag, arity) (cl) = (old); updateConF(cl, tag, arity)
+
+#define NULL_CON(x) ((VAL)(nullary_cons + x))
+
+#define allocArray(cl, vm, len, o) (cl) = (VAL)allocArrayF(vm, len, o)
+
+int idris_errno(void);
+char* idris_showerror(int err);
+
+extern Con nullary_cons[];
+void init_nullaries(void);
+
+void init_signals(void);
+
+void* vmThread(VM* callvm, func f, VAL arg);
+void* idris_stopThread(VM* vm);
+
+// Copy a structure to another vm's heap
+VAL copyTo(VM* newVM, VAL x);
+
+// Add a message to another VM's message queue
+int idris_sendMessage(VM* sender, int channel_id, VM* dest, VAL msg);
+// Check whether there are any messages in the queue and return PID of
+// sender if so (null if not)
+VM* idris_checkMessages(VM* vm);
+// Check whether there are any messages which are initiating a conversation
+// in the queue and return the message if so (without removing it)
+Msg* idris_checkInitMessages(VM* vm);
+// Check whether there are any messages in the queue
+VM* idris_checkMessagesFrom(VM* vm, int channel_id, VM* sender);
+// Check whether there are any messages in the queue, and wait if not
+VM* idris_checkMessagesTimeout(VM* vm, int timeout);
+// block until there is a message in the queue
+Msg* idris_recvMessage(VM* vm);
+// block until there is a message in the queue
+Msg* idris_recvMessageFrom(VM* vm, int channel_id, VM* sender);
+
+// Query/free structure used to return message data (recvMessage will malloc,
+// so needs an explicit free)
+VAL idris_getMsg(Msg* msg);
+VM* idris_getSender(Msg* msg);
+int idris_getChannel(Msg* msg);
+void idris_freeMsg(Msg* msg);
+
+void idris_trace(VM* vm, const char* func, int line);
+void dumpVal(VAL r);
+void dumpStack(VM* vm);
+
+// Casts
+#define idris_castIntFloat(x) MKFLOAT(vm, (double)(GETINT(x)))
+#define idris_castFloatInt(x) MKINT((i_int)(GETFLOAT(x)))
+
+VAL idris_castIntStr(VM* vm, VAL i);
+VAL idris_castBitsStr(VM* vm, VAL i);
+VAL idris_castStrInt(VM* vm, VAL i);
+VAL idris_castFloatStr(VM* vm, VAL i);
+VAL idris_castStrFloat(VM* vm, VAL i);
+
+// Raw memory manipulation
+void idris_memset(void* ptr, i_int offset, uint8_t c, i_int size);
+void idris_memmove(void* dest, void* src, i_int dest_offset, i_int src_offset, i_int size);
+uint8_t idris_peek(void* ptr, i_int offset);
+void idris_poke(void* ptr, i_int offset, uint8_t data);
+
+VAL idris_peekPtr(VM* vm, VAL ptr, VAL offset);
+VAL idris_pokePtr(VAL ptr, VAL offset, VAL data);
+VAL idris_peekDouble(VM* vm, VAL ptr, VAL offset);
+VAL idris_pokeDouble(VAL ptr, VAL offset, VAL data);
+VAL idris_peekSingle(VM* vm, VAL ptr, VAL offset);
+VAL idris_pokeSingle(VAL ptr, VAL offset, VAL data);
+
+// Crash with a message (used for partial primitives)
+void idris_crash(char* msg);
+
+// String primitives
+VAL idris_concat(VM* vm, VAL l, VAL r);
+VAL idris_strlt(VM* vm, VAL l, VAL r);
+VAL idris_streq(VM* vm, VAL l, VAL r);
+VAL idris_strlen(VM* vm, VAL l);
+// Read a line from a file
+VAL idris_readStr(VM* vm, FILE* h);
+// Read up to 'num' characters from a file
+VAL idris_readChars(VM* vm, int num, FILE* h);
+
+VAL idris_strHead(VM* vm, VAL str);
+VAL idris_strShift(VM* vm, VAL str, int num);
+VAL idris_strTail(VM* vm, VAL str);
+// This is not expected to be efficient! Mostly we wouldn't expect to call
+// it at all at run time.
+VAL idris_strCons(VM* vm, VAL x, VAL xs);
+VAL idris_strIndex(VM* vm, VAL str, VAL i);
+VAL idris_strRev(VM* vm, VAL str);
+VAL idris_substr(VM* vm, VAL offset, VAL length, VAL str);
+
+// Support for IORefs
+VAL idris_newRefLock(VAL x, int outerlock);
+VAL idris_newRef(VAL x);
+void idris_writeRef(VAL ref, VAL x);
+VAL idris_readRef(VAL ref);
+
+// Support for IOArrays
+VAL idris_newArray(VM* vm, int size, VAL def);
+void idris_arraySet(VAL arr, int index, VAL newval);
+VAL idris_arrayGet(VAL arr, int index);
+
+// system infox
+// used indices:
+//   0 returns backend
+//   1 returns OS
+VAL idris_systemInfo(VM* vm, VAL index);
+
+// Command line args
+
+extern int __idris_argc;
+extern char **__idris_argv;
+
+int idris_numArgs(void);
+const char *idris_getArg(int i);
+
+// disable stdin/stdout buffering
+void idris_disableBuffering(void);
+
+#ifndef SEL4
+int idris_usleep(int usec);
+#endif // SEL4
+
+// Handle stack overflow.
+// Just reports an error and exits.
+
+void stackOverflow(void);
+
+// I think these names are nicer for an API...
+
+#define idris_constructor allocCon
+#define idris_setConArg SETARG
+#define idris_getConArg GETARG
+#define idris_mkInt(x) MKINT((intptr_t)(x))
+
+#include "idris_gmp.h"
+
+static inline size_t valSize(VAL v) {
+    return v->hdr.sz;
+}
+
+static inline size_t aligned(size_t sz) {
+    return (sz + sizeof(void*) - 1) & ~(sizeof(void*)-1);
+}
+
+VM* get_vm(void);
+
+#endif
+
+/*
+  Local variables: **
+  c-file-style: "bsd" **
+  c-basic-offset: 4 **
+  indent-tabs-mode: nil **
+  End: **
+*/
diff --git a/dist/rts/idris_stats.c b/dist/rts/idris_stats.c
new file mode 100644
index 0000000..38b5587
--- /dev/null
+++ b/dist/rts/idris_stats.c
@@ -0,0 +1,68 @@
+#include "idris_stats.h"
+
+#include <stdio.h>
+#include <locale.h>
+
+#ifdef IDRIS_ENABLE_STATS
+
+void print_stats(const Stats * stats) {
+    clock_t total   = clock() - stats->start_time;
+    clock_t mut     = total - stats->init_time - stats->gc_time - stats->exit_time;
+    double  mut_sec = (double)mut   / CLOCKS_PER_SEC;
+
+    uint64_t avg_chunk = 0;
+    if (stats->alloc_count > 0) {
+      avg_chunk = (uint64_t)((double)stats->allocations / (double)stats->alloc_count);
+    }
+
+    uint64_t alloc_rate = 0;
+    if (mut > 0) {
+      alloc_rate  = (uint64_t)((double)(stats->allocations) / mut_sec);
+    }
+
+    double gc_percent = 0.0;
+    double productivity = 0.0;
+    if (total > 0) {
+        gc_percent = 100 * (double)stats->gc_time / (double)total;
+        productivity = 100 * ((double)mut / (double)total);
+    }
+
+    setlocale(LC_NUMERIC, "");
+    printf("\n");
+    printf("%'20" PRIu64 " bytes allocated in the heap\n",  stats->allocations);
+    printf("%'20" PRIu64 " bytes copied during GC\n",       stats->copied);
+    printf("%'20" PRIu32 " maximum heap size\n",            stats->max_heap_size);
+    printf("%'20" PRIu32 " chunks allocated in the heap\n", stats->alloc_count);
+    printf("%'20" PRIu64 " average chunk size\n\n",         avg_chunk);
+
+    printf("GC called %d times\n\n", stats->collections);
+
+    printf("INIT  time: %8.3fs\n",   (double)stats->init_time / CLOCKS_PER_SEC);
+    printf("MUT   time: %8.3fs\n",   mut_sec);
+    printf("GC    time: %8.3fs\n",   (double)stats->gc_time   / CLOCKS_PER_SEC);
+    printf("EXIT  time: %8.3fs\n",   (double)stats->exit_time / CLOCKS_PER_SEC);
+    printf("TOTAL time: %8.3fs\n\n", (double)total            / CLOCKS_PER_SEC);
+
+    printf("%%GC   time: %.2f%%\n\n", gc_percent);
+
+    printf("Alloc rate %'" PRIu64 " bytes per MUT sec\n\n", alloc_rate);
+
+    printf("Productivity %.2f%%\n", productivity);
+}
+
+void aggregate_stats(Stats * stats1, const Stats * stats2) {
+    fprintf(stderr, "RTS error: aggregate_stats not implemented");
+}
+
+#else
+
+void print_stats(const Stats * stats) {
+    fprintf(stderr, "RTS ERROR: Stats are disabled.\n"  \
+                    "By the way GC called %d times.\n", stats->collections);
+}
+
+void aggregate_stats(Stats * stats1, const Stats * stats2) {
+    stats1->collections += stats2->collections;
+}
+
+#endif // IDRIS_ENABLE_STATS
diff --git a/dist/rts/idris_stats.h b/dist/rts/idris_stats.h
new file mode 100644
index 0000000..39ef7d2
--- /dev/null
+++ b/dist/rts/idris_stats.h
@@ -0,0 +1,76 @@
+#ifndef _IDRIS_STATS_H
+#define _IDRIS_STATS_H
+
+#ifdef IDRIS_ENABLE_STATS
+#include <time.h>
+#endif
+
+#include <inttypes.h>
+#include <stdint.h>
+
+
+// TODO: measure user time, exclusive/inclusive stats
+typedef struct {
+#ifdef IDRIS_ENABLE_STATS
+    uint64_t allocations;       // Size of allocated space in bytes for all execution time.
+    uint32_t alloc_count;       // How many times alloc is called.
+    uint64_t copied;            // Size of space copied during GC.
+    uint32_t max_heap_size;     // Maximum heap size achieved.
+
+    clock_t init_time;     // Time spent for vm initialization.
+    clock_t exit_time;     // Time spent for vm termination.
+    clock_t gc_time;       // Time spent for gc for all execution time.
+    clock_t max_gc_pause;  // Time spent for longest gc.
+    clock_t start_time;    // Time of rts entry point.
+#endif // IDRIS_ENABLE_STATS
+    uint32_t collections;       // How many times gc called.
+} Stats; // without start time it's a monoid, can we remove start_time it somehow?
+
+void print_stats(const Stats * stats);
+void aggregate_stats(Stats * stats1, const Stats * stats2);
+
+
+#ifdef IDRIS_ENABLE_STATS
+
+#ifndef MAX
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+#define STATS_INIT_STATS(stats)                 \
+    memset(&stats, 0, sizeof(Stats));           \
+    stats.start_time  = clock();
+
+#define STATS_ALLOC(stats, size)                \
+    stats.allocations += size;                  \
+    stats.alloc_count = stats.alloc_count + 1;
+
+#define STATS_ENTER_INIT(stats) clock_t _start_time = clock();
+#define STATS_LEAVE_INIT(stats) stats.init_time = clock() - _start_time;
+
+#define STATS_ENTER_EXIT(stats) clock_t _start_time = clock();
+#define STATS_LEAVE_EXIT(stats) stats.exit_time = clock() - _start_time;
+
+#define STATS_ENTER_GC(stats, heap_size)                        \
+    clock_t _start_time = clock();                              \
+    stats.max_heap_size = MAX(stats.max_heap_size, heap_size);
+#define STATS_LEAVE_GC(stats, heap_size, heap_occuped)          \
+    clock_t _pause = clock() - _start_time;                     \
+    stats.gc_time += _pause;                                    \
+    stats.max_gc_pause = MAX(_pause, stats.max_gc_pause);       \
+    stats.max_heap_size = MAX(stats.max_heap_size, heap_size);  \
+    stats.copied     += heap_occuped;                           \
+    stats.collections = stats.collections + 1;
+
+#else
+#define STATS_INIT_STATS(stats) memset(&stats, 0, sizeof(Stats));
+#define STATS_ENTER_INIT(stats)
+#define STATS_LEAVE_INIT(stats)
+#define STATS_ENTER_EXIT(stats)
+#define STATS_LEAVE_EXIT(stats)
+#define STATS_ALLOC(stats, size)
+#define STATS_ENTER_GC(stats, heap_size)
+#define STATS_LEAVE_GC(stats, heap_size, heap_occuped)  \
+    stats.collections = stats.collections + 1;
+#endif // IDRIS_ENABLE_STATS
+
+#endif // _IDRIS_STATS_H
diff --git a/dist/rts/idris_stdfgn.c b/dist/rts/idris_stdfgn.c
new file mode 100644
index 0000000..1505535
--- /dev/null
+++ b/dist/rts/idris_stdfgn.c
@@ -0,0 +1,292 @@
+#include "idris_stdfgn.h"
+#include "idris_rts.h"
+#include "idris_gmp.h"
+#include "idris_gc.h"
+
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <dirent.h>
+#include <unistd.h>
+
+#ifdef _WIN32
+#include "windows/win_utils.h"
+#else
+#include <sys/select.h>
+#endif
+
+extern char** environ;
+
+void putStr(char* str) {
+    printf("%s", str);
+}
+
+void *fileOpen(char *name, char *mode) {
+#ifdef _WIN32
+    FILE *f = win32_u8fopen(name, mode);
+#else
+    FILE *f = fopen(name, mode);
+#endif
+    return (void *)f;
+}
+
+void fileClose(void* h) {
+    FILE* f = (FILE*)h;
+    fclose(f);
+}
+
+int fileEOF(void* h) {
+    FILE* f = (FILE*)h;
+    return feof(f);
+}
+
+int fileError(void* h) {
+    FILE* f = (FILE*)h;
+    return ferror(f);
+}
+
+int fileRemove(const char *filename) {
+    return remove(filename);
+}
+
+int fileSize(void* h) {
+    FILE* f = (FILE*)h;
+    int fd = fileno(f);
+
+    struct stat buf;
+    if (fstat(fd, &buf) == 0) {
+        return (int)(buf.st_size);
+    } else {
+        return -1;
+    }
+}
+
+VAL fileAccessTime(void* h) {
+    FILE* f = (FILE*)h;
+    int fd = fileno(f);
+
+    struct stat buf;
+    if (fstat(fd, &buf) == 0) {
+        return MKBIGI(buf.st_atime);
+    } else {
+        return MKBIGI(-1);
+    }
+}
+
+VAL fileModifiedTime(void* h) {
+    FILE* f = (FILE*)h;
+    int fd = fileno(f);
+
+    struct stat buf;
+    if (fstat(fd, &buf) == 0) {
+        return MKBIGI(buf.st_mtime);
+    } else {
+        return MKBIGI(-1);
+    }
+}
+
+VAL fileStatusTime(void* h) {
+    FILE* f = (FILE*)h;
+    int fd = fileno(f);
+
+    struct stat buf;
+    if (fstat(fd, &buf) == 0) {
+        return MKBIGI(buf.st_ctime);
+    } else {
+        return MKBIGI(-1);
+    }
+}
+
+typedef struct {
+    DIR* dirptr;
+    int error;
+} DirInfo;
+
+void* idris_dirOpen(char* dname) {
+    DIR *d = opendir(dname);
+    if (d == NULL) {
+        return NULL;
+    } else {
+        DirInfo* di = malloc(sizeof(DirInfo));
+        di->dirptr = d;
+        di->error = 0;
+
+        return (void*)di;
+    }
+}
+
+void idris_dirClose(void* h) {
+    DirInfo* di = (DirInfo*)h;
+
+    closedir(di->dirptr);
+    free(di);
+}
+
+char* idris_nextDirEntry(void* h) {
+    DirInfo* di = (DirInfo*)h;
+    struct dirent* de = readdir(di->dirptr);
+
+    if (de == NULL) {
+        di->error = -1;
+        return NULL;
+    } else {
+        return de->d_name;
+    }
+}
+
+int idris_mkdir(char* dname) {
+#ifdef _WIN32
+    return mkdir(dname);
+#else
+    return mkdir(dname, S_IRWXU | S_IRGRP | S_IROTH);
+#endif
+}
+
+int idris_chdir(char* dname) {
+    return chdir(dname);
+}
+
+int idris_dirError(void *dptr) {
+    return ((DirInfo*)dptr)->error;
+}
+
+int idris_writeStr(void* h, char* str) {
+    FILE* f = (FILE*)h;
+    if (fputs(str, f) >= 0) {
+        return 0;
+    } else {
+        return -1;
+    }
+}
+
+int fpoll(void* h)
+{
+#ifdef _WIN32
+    return win_fpoll(h);
+#else
+    FILE* f = (FILE*)h;
+    fd_set x;
+    struct timeval timeout;
+    timeout.tv_sec = 1;
+    timeout.tv_usec = 0;
+    int fd = fileno(f);
+
+    FD_ZERO(&x);
+    FD_SET(fd, &x);
+
+    int r = select(fd+1, &x, 0, 0, &timeout);
+    return r;
+#endif
+}
+
+void *do_popen(const char *cmd, const char *mode) {
+#ifdef _WIN32
+    FILE *f = win32_u8popen(cmd, mode);
+#else
+    FILE *f = popen(cmd, mode);
+    //    int d = fileno(f);
+    //    fcntl(d, F_SETFL, O_NONBLOCK);
+#endif
+    return f;
+}
+
+int isNull(void* ptr) {
+    return ptr==NULL;
+}
+
+void* idris_stdin() {
+    return (void*)stdin;
+}
+
+char* getEnvPair(int i) {
+    return *(environ + i);
+}
+
+VAL idris_time() {
+    time_t t = time(NULL);
+    return MKBIGI(t);
+}
+
+VAL idris_clock(VM* vm) {
+    VAL result;
+#ifdef _WIN32
+    int64_t sec, nsec;
+    win32_gettime(&sec, &nsec);
+    idris_constructor(result, vm, 0, 2, 0);
+    idris_setConArg(result, 0, MKBIGI(sec));
+    idris_setConArg(result, 1, MKBIGI(nsec));
+#else
+    struct timespec ts;
+    // We're not checking the result here, which is of course bad, but
+    // CLOCK_REALTIME is required by POSIX at least!
+    clock_gettime(CLOCK_REALTIME, &ts);
+    idris_constructor(result, vm, 0, 2, 0);
+    idris_setConArg(result, 0, MKBIGI(ts.tv_sec));
+    idris_setConArg(result, 1, MKBIGI(ts.tv_nsec));
+#endif
+    return result;
+}
+
+VAL idris_mkFileError(VM* vm) {
+    VAL result;
+    switch(errno) {
+        // Make sure this corresponds to the FileError structure in
+        // Prelude.File
+        case ENOENT:
+            idris_constructor(result, vm, 3, 0, 0);
+            break;
+        case EACCES:
+            idris_constructor(result, vm, 4, 0, 0);
+            break;
+        default:
+            idris_constructor(result, vm, 0, 1, 0);
+            idris_setConArg(result, 0, MKINT((intptr_t)errno));
+            break;
+    }
+    return result;
+}
+
+void idris_forceGC(void* vm) {
+    idris_gc((VM*)vm);
+}
+
+typedef struct {
+    char* string;
+    int len;
+} StrBuffer;
+
+void* idris_makeStringBuffer(int len) {
+    StrBuffer* sb = malloc(sizeof(StrBuffer));
+    if (sb != NULL) {
+        sb->string = malloc(len);
+        sb->string[0] = '\0';
+        sb->len = 0;
+    }
+    return sb;
+}
+
+void idris_addToString(void* buffer, char* str) {
+    StrBuffer* sb = (StrBuffer*)buffer;
+    int len = strlen(str);
+
+    memcpy(sb->string + sb->len, str, len+1);
+    sb->len += len;
+}
+
+VAL idris_getString(VM* vm, void* buffer) {
+    StrBuffer* sb = (StrBuffer*)buffer;
+
+    VAL str = MKSTR(vm, sb->string);
+    free(sb->string);
+    free(sb);
+    return str;
+}
+
+VAL idris_currentDir() {
+   char cwd[1024];
+   if (getcwd(cwd, sizeof(cwd)) != NULL)
+     return MKSTR(get_vm(),cwd);
+   else
+     return MKSTR(get_vm(),"");
+}
diff --git a/dist/rts/idris_stdfgn.h b/dist/rts/idris_stdfgn.h
new file mode 100644
index 0000000..65778ff
--- /dev/null
+++ b/dist/rts/idris_stdfgn.h
@@ -0,0 +1,62 @@
+#ifndef _IDRISSTDFGN_H
+#define _IDRISSTDFGN_H
+
+#include "idris_rts.h"
+
+// A collection of useful standard functions to be used by the prelude.
+
+void putStr(char* str);
+//char* readStr();
+
+void* fileOpen(char* f, char* mode);
+void fileClose(void* h);
+int fileEOF(void* h);
+int fileError(void* h);
+int fileRemove(const char* fname);
+// Returns a negative number if not a file (e.g. directory or device)
+int fileSize(void* h);
+
+// Return a negative number if not a file (e.g. directory or device)
+VAL fileAccessTime(void* h);
+VAL fileModifiedTime(void* h);
+VAL fileStatusTime(void* h);
+
+void* idris_dirOpen(char* dname);
+void idris_dirClose(void* h);
+char* idris_nextDirEntry(void* h);
+
+// Create a directory; return 0 on success or -1 on failure
+int idris_mkdir(char* dname);
+int idris_chdir(char* dname);
+
+// Return 0 if ok, or -1 if there was an error with the given directory
+// (like ferror)
+int idris_dirError(void *dptr);
+
+// return 0 on success
+int idris_writeStr(void*h, char* str);
+// construct a file error structure (see Prelude.File) from errno
+VAL idris_mkFileError(VM* vm);
+
+// Some machinery for building a large string without reallocating
+// Create a string with space for 'len' bytes
+void* idris_makeStringBuffer(int len);
+void idris_addToString(void* buffer, char* str);
+VAL idris_getString(VM* vm, void* buffer);
+
+void* do_popen(const char* cmd, const char* mode);
+int fpoll(void* h);
+
+int isNull(void* ptr);
+void* idris_stdin();
+
+char* getEnvPair(int i);
+
+VAL idris_time();
+VAL idris_clock(VM* vm);
+
+void idris_forceGC();
+
+VAL idris_currentDir();
+
+#endif
diff --git a/dist/rts/idris_utf8.c b/dist/rts/idris_utf8.c
new file mode 100644
index 0000000..96832ac
--- /dev/null
+++ b/dist/rts/idris_utf8.c
@@ -0,0 +1,181 @@
+#include "idris_utf8.h"
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+int idris_utf8_strlen(char *s) {
+   int i = 0, j = 0;
+   while (s[i]) {
+     if ((s[i] & 0xc0) != 0x80) j++;
+     i++;
+   }
+   return j;
+}
+
+int idris_utf8_charlen(char* s) {
+    int init = (int)s[0];
+    if ((init & 0x80) == 0) {
+        return 1; // Top bit unset, so 1 byte
+    }
+    if ((init > 244 && init < 256) ||
+        (init == 192) ||
+        (init == 193)) {
+        return 1; // Invalid characters
+    }
+    int i = 1;
+    while ((s[i] & 0xc0) == 0x80) {
+        i++; // Move on until top 2 bits are not 10
+    }
+    return i;
+}
+
+unsigned idris_utf8_index(char* s, int idx) {
+   int i = 0, j = 0;
+   while (j < idx) {
+     if ((s[i] & 0xc0) != 0x80) j++;
+     i++;
+   }
+   // Find the start of the next character
+   while ((s[i] & 0xc0) == 0x80) { i++; }
+
+   unsigned bytes = 0;
+   unsigned top = 0;
+
+   int init = (int)s[1];
+
+   // s[i] is now the start of the character we want
+   if ((s[i] & 0x80) == 0) {
+       bytes = 1;
+       top = (int)(s[i]);
+   } else if ((init > 244 && init < 256) ||
+              (init == 192) ||
+              (init == 193)) {
+       bytes = 1;
+       top = (int)(s[i]); // Invalid characters
+   } else if ((s[i] & 0xe0) == 0xc0) {
+       bytes = 2;
+       top = (int)(s[i] & 0x1f); // 5 bits
+   } else if ((s[i] & 0xf0) == 0xe0) {
+       bytes = 3;
+       top = (int)(s[i] & 0x0f); // 4 bits
+   } else if ((s[i] & 0xf8) == 0xf0) {
+       bytes = 4;
+       top = (int)(s[i] & 0x07); // 3 bits
+   } else if ((s[i] & 0xfc) == 0xf8) {
+       bytes = 5;
+       top = (int)(s[i] & 0x03); // 2 bits
+   } else if ((s[i] & 0xfe) == 0xfc) {
+       bytes = 6;
+       top = (int)(s[i] & 0x01); // 1 bits
+   }
+
+   while (bytes > 1) {
+       top = top << 6;
+       top += s[++i] & 0x3f; // 6 bits
+       --bytes;
+   }
+
+   return top;
+}
+
+char* idris_utf8_advance(char* str, int i) {
+    while (i > 0 && *str != '\0') {
+        // In a UTF8 single-byte char, the highest bit is 0.  In the
+        // first byte of a multi-byte char, the highest two bits are
+        // 11, but the rest of the bytes start with 10. So we can
+        // decrement our character counter when we see something other
+        // than 10 at the front.
+
+        // This is a bit of an overapproximation, as invalid multibyte
+        // sequences that are too long will be treated as if they are
+        // OK, but it's always paying attention to null-termination.
+        if ((*str & 0xc0) != 0x80) {
+            i--;
+        }
+        str++;
+    }
+    // Now we've found the first byte of the last character. Advance
+    // to the end of it, or the end of the string, whichever is first.
+    // Here, we don't risk overrunning the end of the string because
+    // ('\0' & 0xc0) != 0x80.
+    while ((*str & 0xc0) == 0x80) { str++; }
+
+    return str;
+}
+
+int idris_utf8_findOffset(char* str, int i) {
+    int offset = 0;
+    while(i > 0) {
+        int len = idris_utf8_charlen(str);
+        str+=len;
+        offset+=len;
+        i--;
+    }
+    return offset;
+}
+
+
+char* idris_utf8_fromChar(int x) {
+    char* str;
+    int bytes = 0, top = 0;
+
+    if (x < 0x80) {
+        str = malloc(2);
+        str[0] = (char)x;
+        str[1] = '\0';
+        return str;
+    }
+
+    if (x >= 0x80 && x <= 0x7ff) {
+        bytes = 2;
+        top = 0xc0;
+    } else if (x >= 0x800 && x <= 0xffff) {
+        bytes = 3;
+        top = 0xe0;
+    } else if (x >= 0x10000 && x <= 0x10ffff) {
+        bytes = 4;
+        top = 0xf0;
+    }
+
+    str = malloc(bytes + 1);
+    str[bytes] = '\0';
+    while(bytes > 0) {
+        int xbits = x & 0x3f; // Next 6 bits
+        bytes--;
+        if (bytes > 0) {
+            str[bytes] = (char)xbits + 0x80;
+        } else {
+            str[0] = (char)xbits + top;
+        }
+        x = x >> 6;
+    }
+
+    return str;
+}
+
+void reverse_range(char *start, char *end)
+{
+    while(start < end)
+    {
+        char c = *start;
+        *start++ = *end;
+        *end-- = c;
+    }
+}
+
+char* reverse_char(char *start)
+{
+    char *end = start;
+    while((end[1] & 0xc0) == 0x80) { end++; }
+    reverse_range(start, end);
+    return(end + 1);
+}
+
+char* idris_utf8_rev(char* s, char* result) {
+    strcpy(result, s);
+    char* end = result;
+    while(*end) { end = reverse_char(end); }
+    reverse_range(result, end-1);
+    return result;
+}
+
diff --git a/dist/rts/idris_utf8.h b/dist/rts/idris_utf8.h
new file mode 100644
index 0000000..a3b976c
--- /dev/null
+++ b/dist/rts/idris_utf8.h
@@ -0,0 +1,26 @@
+#ifndef _IDRIS_UTF8
+#define _IDRIS_UTF8
+
+/* Various functions for dealing with UTF8 encoding. These are probably
+   not very efficient (and I'm (EB) making no guarantees about their
+   correctness.) Nevertheless, they mean that we can treat Strings as
+   UFT8. Patches welcome :). */
+
+// Get length of a UTF8 encoded string in characters
+int idris_utf8_strlen(char *s);
+// Get number of bytes the first character takes in a string
+int idris_utf8_charlen(char* s);
+// Return int representation of string at an index.
+// Assumes in bounds.
+unsigned idris_utf8_index(char* s, int j);
+// Convert a char as an integer to a char* as a byte sequence
+// Null terminated; caller responsible for freeing
+char* idris_utf8_fromChar(int x);
+// Reverse a UTF8 encoded string, putting the result in 'result'
+char* idris_utf8_rev(char* s, char* result);
+// Advance a pointer into a string by i UTF8 characters.
+// Return original pointer if i <= 0.
+char* idris_utf8_advance(char* str, int i);
+// Return the offset of the ith UTF8 character in the string
+int idris_utf8_findOffset(char* str, int i);
+#endif
diff --git a/dist/rts/libtest.c b/dist/rts/libtest.c
new file mode 100644
index 0000000..237c8ce
--- /dev/null
+++ b/dist/rts/libtest.c
@@ -0,0 +1 @@
+int main() {}
diff --git a/dist/rts/mini-gmp.c b/dist/rts/mini-gmp.c
new file mode 100644
index 0000000..b5a4fe3
--- /dev/null
+++ b/dist/rts/mini-gmp.c
@@ -0,0 +1,4412 @@
+/* mini-gmp, a minimalistic implementation of a GNU GMP subset.
+
+   Contributed to the GNU project by Niels Möller
+
+Copyright 1991-1997, 1999-2016 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* NOTE: All functions in this file which are not declared in
+   mini-gmp.h are internal, and are not intended to be compatible
+   neither with GMP nor with future versions of mini-gmp. */
+
+/* Much of the material copied from GMP files, including: gmp-impl.h,
+   longlong.h, mpn/generic/add_n.c, mpn/generic/addmul_1.c,
+   mpn/generic/lshift.c, mpn/generic/mul_1.c,
+   mpn/generic/mul_basecase.c, mpn/generic/rshift.c,
+   mpn/generic/sbpi1_div_qr.c, mpn/generic/sub_n.c,
+   mpn/generic/submul_1.c. */
+
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mini-gmp.h"
+
+
+/* Macros */
+#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
+
+#define GMP_LIMB_MAX (~ (mp_limb_t) 0)
+#define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1))
+
+#define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2))
+#define GMP_LLIMB_MASK (GMP_HLIMB_BIT - 1)
+
+#define GMP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT)
+#define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1))
+
+#define GMP_ABS(x) ((x) >= 0 ? (x) : -(x))
+#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1))
+
+#define GMP_MIN(a, b) ((a) < (b) ? (a) : (b))
+#define GMP_MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#define GMP_CMP(a,b) (((a) > (b)) - ((a) < (b)))
+
+#define gmp_assert_nocarry(x) do { \
+    mp_limb_t __cy = (x);	   \
+    assert (__cy == 0);		   \
+  } while (0)
+
+#define gmp_clz(count, x) do {						\
+    mp_limb_t __clz_x = (x);						\
+    unsigned __clz_c;							\
+    for (__clz_c = 0;							\
+	 (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0;	\
+	 __clz_c += 8)							\
+      __clz_x <<= 8;							\
+    for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++)		\
+      __clz_x <<= 1;							\
+    (count) = __clz_c;							\
+  } while (0)
+
+#define gmp_ctz(count, x) do {						\
+    mp_limb_t __ctz_x = (x);						\
+    unsigned __ctz_c = 0;						\
+    gmp_clz (__ctz_c, __ctz_x & - __ctz_x);				\
+    (count) = GMP_LIMB_BITS - 1 - __ctz_c;				\
+  } while (0)
+
+#define gmp_add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    mp_limb_t __x;							\
+    __x = (al) + (bl);							\
+    (sh) = (ah) + (bh) + (__x < (al));					\
+    (sl) = __x;								\
+  } while (0)
+
+#define gmp_sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    mp_limb_t __x;							\
+    __x = (al) - (bl);							\
+    (sh) = (ah) - (bh) - ((al) < (bl));					\
+    (sl) = __x;								\
+  } while (0)
+
+#define gmp_umul_ppmm(w1, w0, u, v)					\
+  do {									\
+    mp_limb_t __x0, __x1, __x2, __x3;					\
+    unsigned __ul, __vl, __uh, __vh;					\
+    mp_limb_t __u = (u), __v = (v);					\
+									\
+    __ul = __u & GMP_LLIMB_MASK;					\
+    __uh = __u >> (GMP_LIMB_BITS / 2);					\
+    __vl = __v & GMP_LLIMB_MASK;					\
+    __vh = __v >> (GMP_LIMB_BITS / 2);					\
+									\
+    __x0 = (mp_limb_t) __ul * __vl;					\
+    __x1 = (mp_limb_t) __ul * __vh;					\
+    __x2 = (mp_limb_t) __uh * __vl;					\
+    __x3 = (mp_limb_t) __uh * __vh;					\
+									\
+    __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */	\
+    __x1 += __x2;		/* but this indeed can */		\
+    if (__x1 < __x2)		/* did we get it? */			\
+      __x3 += GMP_HLIMB_BIT;	/* yes, add it in the proper pos. */	\
+									\
+    (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2));			\
+    (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK);	\
+  } while (0)
+
+#define gmp_udiv_qrnnd_preinv(q, r, nh, nl, d, di)			\
+  do {									\
+    mp_limb_t _qh, _ql, _r, _mask;					\
+    gmp_umul_ppmm (_qh, _ql, (nh), (di));				\
+    gmp_add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));		\
+    _r = (nl) - _qh * (d);						\
+    _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */		\
+    _qh += _mask;							\
+    _r += _mask & (d);							\
+    if (_r >= (d))							\
+      {									\
+	_r -= (d);							\
+	_qh++;								\
+      }									\
+									\
+    (r) = _r;								\
+    (q) = _qh;								\
+  } while (0)
+
+#define gmp_udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv)		\
+  do {									\
+    mp_limb_t _q0, _t1, _t0, _mask;					\
+    gmp_umul_ppmm ((q), _q0, (n2), (dinv));				\
+    gmp_add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1));			\
+									\
+    /* Compute the two most significant limbs of n - q'd */		\
+    (r1) = (n1) - (d1) * (q);						\
+    gmp_sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0));		\
+    gmp_umul_ppmm (_t1, _t0, (d0), (q));				\
+    gmp_sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0);			\
+    (q)++;								\
+									\
+    /* Conditionally adjust q and the remainders */			\
+    _mask = - (mp_limb_t) ((r1) >= _q0);				\
+    (q) += _mask;							\
+    gmp_add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \
+    if ((r1) >= (d1))							\
+      {									\
+	if ((r1) > (d1) || (r0) >= (d0))				\
+	  {								\
+	    (q)++;							\
+	    gmp_sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));	\
+	  }								\
+      }									\
+  } while (0)
+
+/* Swap macros. */
+#define MP_LIMB_T_SWAP(x, y)						\
+  do {									\
+    mp_limb_t __mp_limb_t_swap__tmp = (x);				\
+    (x) = (y);								\
+    (y) = __mp_limb_t_swap__tmp;					\
+  } while (0)
+#define MP_SIZE_T_SWAP(x, y)						\
+  do {									\
+    mp_size_t __mp_size_t_swap__tmp = (x);				\
+    (x) = (y);								\
+    (y) = __mp_size_t_swap__tmp;					\
+  } while (0)
+#define MP_BITCNT_T_SWAP(x,y)			\
+  do {						\
+    mp_bitcnt_t __mp_bitcnt_t_swap__tmp = (x);	\
+    (x) = (y);					\
+    (y) = __mp_bitcnt_t_swap__tmp;		\
+  } while (0)
+#define MP_PTR_SWAP(x, y)						\
+  do {									\
+    mp_ptr __mp_ptr_swap__tmp = (x);					\
+    (x) = (y);								\
+    (y) = __mp_ptr_swap__tmp;						\
+  } while (0)
+#define MP_SRCPTR_SWAP(x, y)						\
+  do {									\
+    mp_srcptr __mp_srcptr_swap__tmp = (x);				\
+    (x) = (y);								\
+    (y) = __mp_srcptr_swap__tmp;					\
+  } while (0)
+
+#define MPN_PTR_SWAP(xp,xs, yp,ys)					\
+  do {									\
+    MP_PTR_SWAP (xp, yp);						\
+    MP_SIZE_T_SWAP (xs, ys);						\
+  } while(0)
+#define MPN_SRCPTR_SWAP(xp,xs, yp,ys)					\
+  do {									\
+    MP_SRCPTR_SWAP (xp, yp);						\
+    MP_SIZE_T_SWAP (xs, ys);						\
+  } while(0)
+
+#define MPZ_PTR_SWAP(x, y)						\
+  do {									\
+    mpz_ptr __mpz_ptr_swap__tmp = (x);					\
+    (x) = (y);								\
+    (y) = __mpz_ptr_swap__tmp;						\
+  } while (0)
+#define MPZ_SRCPTR_SWAP(x, y)						\
+  do {									\
+    mpz_srcptr __mpz_srcptr_swap__tmp = (x);				\
+    (x) = (y);								\
+    (y) = __mpz_srcptr_swap__tmp;					\
+  } while (0)
+
+const int mp_bits_per_limb = GMP_LIMB_BITS;
+
+
+/* Memory allocation and other helper functions. */
+static void
+gmp_die (const char *msg)
+{
+  fprintf (stderr, "%s\n", msg);
+  abort();
+}
+
+static void *
+gmp_default_alloc (size_t size)
+{
+  void *p;
+
+  assert (size > 0);
+
+  p = malloc (size);
+  if (!p)
+    gmp_die("gmp_default_alloc: Virtual memory exhausted.");
+
+  return p;
+}
+
+static void *
+gmp_default_realloc (void *old, size_t old_size, size_t new_size)
+{
+  void * p;
+
+  p = realloc (old, new_size);
+
+  if (!p)
+    gmp_die("gmp_default_realloc: Virtual memory exhausted.");
+
+  return p;
+}
+
+static void
+gmp_default_free (void *p, size_t size)
+{
+  free (p);
+}
+
+static void * (*gmp_allocate_func) (size_t) = gmp_default_alloc;
+static void * (*gmp_reallocate_func) (void *, size_t, size_t) = gmp_default_realloc;
+static void (*gmp_free_func) (void *, size_t) = gmp_default_free;
+
+void
+mp_get_memory_functions (void *(**alloc_func) (size_t),
+			 void *(**realloc_func) (void *, size_t, size_t),
+			 void (**free_func) (void *, size_t))
+{
+  if (alloc_func)
+    *alloc_func = gmp_allocate_func;
+
+  if (realloc_func)
+    *realloc_func = gmp_reallocate_func;
+
+  if (free_func)
+    *free_func = gmp_free_func;
+}
+
+void
+mp_set_memory_functions (void *(*alloc_func) (size_t),
+			 void *(*realloc_func) (void *, size_t, size_t),
+			 void (*free_func) (void *, size_t))
+{
+  if (!alloc_func)
+    alloc_func = gmp_default_alloc;
+  if (!realloc_func)
+    realloc_func = gmp_default_realloc;
+  if (!free_func)
+    free_func = gmp_default_free;
+
+  gmp_allocate_func = alloc_func;
+  gmp_reallocate_func = realloc_func;
+  gmp_free_func = free_func;
+}
+
+#define gmp_xalloc(size) ((*gmp_allocate_func)((size)))
+#define gmp_free(p) ((*gmp_free_func) ((p), 0))
+
+static mp_ptr
+gmp_xalloc_limbs (mp_size_t size)
+{
+  return (mp_ptr) gmp_xalloc (size * sizeof (mp_limb_t));
+}
+
+static mp_ptr
+gmp_xrealloc_limbs (mp_ptr old, mp_size_t size)
+{
+  assert (size > 0);
+  return (mp_ptr) (*gmp_reallocate_func) (old, 0, size * sizeof (mp_limb_t));
+}
+
+
+/* MPN interface */
+
+void
+mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n)
+{
+  mp_size_t i;
+  for (i = 0; i < n; i++)
+    d[i] = s[i];
+}
+
+void
+mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n)
+{
+  while (--n >= 0)
+    d[n] = s[n];
+}
+
+int
+mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  while (--n >= 0)
+    {
+      if (ap[n] != bp[n])
+	return ap[n] > bp[n] ? 1 : -1;
+    }
+  return 0;
+}
+
+static int
+mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+  if (an != bn)
+    return an < bn ? -1 : 1;
+  else
+    return mpn_cmp (ap, bp, an);
+}
+
+static mp_size_t
+mpn_normalized_size (mp_srcptr xp, mp_size_t n)
+{
+  while (n > 0 && xp[n-1] == 0)
+    --n;
+  return n;
+}
+
+int
+mpn_zero_p(mp_srcptr rp, mp_size_t n)
+{
+  return mpn_normalized_size (rp, n) == 0;
+}
+
+void
+mpn_zero (mp_ptr rp, mp_size_t n)
+{
+  while (--n >= 0)
+    rp[n] = 0;
+}
+
+mp_limb_t
+mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+  mp_size_t i;
+
+  assert (n > 0);
+  i = 0;
+  do
+    {
+      mp_limb_t r = ap[i] + b;
+      /* Carry out */
+      b = (r < b);
+      rp[i] = r;
+    }
+  while (++i < n);
+
+  return b;
+}
+
+mp_limb_t
+mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mp_size_t i;
+  mp_limb_t cy;
+
+  for (i = 0, cy = 0; i < n; i++)
+    {
+      mp_limb_t a, b, r;
+      a = ap[i]; b = bp[i];
+      r = a + cy;
+      cy = (r < cy);
+      r += b;
+      cy += (r < b);
+      rp[i] = r;
+    }
+  return cy;
+}
+
+mp_limb_t
+mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+  mp_limb_t cy;
+
+  assert (an >= bn);
+
+  cy = mpn_add_n (rp, ap, bp, bn);
+  if (an > bn)
+    cy = mpn_add_1 (rp + bn, ap + bn, an - bn, cy);
+  return cy;
+}
+
+mp_limb_t
+mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+  mp_size_t i;
+
+  assert (n > 0);
+
+  i = 0;
+  do
+    {
+      mp_limb_t a = ap[i];
+      /* Carry out */
+      mp_limb_t cy = a < b;
+      rp[i] = a - b;
+      b = cy;
+    }
+  while (++i < n);
+
+  return b;
+}
+
+mp_limb_t
+mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mp_size_t i;
+  mp_limb_t cy;
+
+  for (i = 0, cy = 0; i < n; i++)
+    {
+      mp_limb_t a, b;
+      a = ap[i]; b = bp[i];
+      b += cy;
+      cy = (b < cy);
+      cy += (a < b);
+      rp[i] = a - b;
+    }
+  return cy;
+}
+
+mp_limb_t
+mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+  mp_limb_t cy;
+
+  assert (an >= bn);
+
+  cy = mpn_sub_n (rp, ap, bp, bn);
+  if (an > bn)
+    cy = mpn_sub_1 (rp + bn, ap + bn, an - bn, cy);
+  return cy;
+}
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t ul, cl, hpl, lpl;
+
+  assert (n >= 1);
+
+  cl = 0;
+  do
+    {
+      ul = *up++;
+      gmp_umul_ppmm (hpl, lpl, ul, vl);
+
+      lpl += cl;
+      cl = (lpl < cl) + hpl;
+
+      *rp++ = lpl;
+    }
+  while (--n != 0);
+
+  return cl;
+}
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t ul, cl, hpl, lpl, rl;
+
+  assert (n >= 1);
+
+  cl = 0;
+  do
+    {
+      ul = *up++;
+      gmp_umul_ppmm (hpl, lpl, ul, vl);
+
+      lpl += cl;
+      cl = (lpl < cl) + hpl;
+
+      rl = *rp;
+      lpl = rl + lpl;
+      cl += lpl < rl;
+      *rp++ = lpl;
+    }
+  while (--n != 0);
+
+  return cl;
+}
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t ul, cl, hpl, lpl, rl;
+
+  assert (n >= 1);
+
+  cl = 0;
+  do
+    {
+      ul = *up++;
+      gmp_umul_ppmm (hpl, lpl, ul, vl);
+
+      lpl += cl;
+      cl = (lpl < cl) + hpl;
+
+      rl = *rp;
+      lpl = rl - lpl;
+      cl += lpl > rl;
+      *rp++ = lpl;
+    }
+  while (--n != 0);
+
+  return cl;
+}
+
+mp_limb_t
+mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
+{
+  assert (un >= vn);
+  assert (vn >= 1);
+
+  /* We first multiply by the low order limb. This result can be
+     stored, not added, to rp. We also avoid a loop for zeroing this
+     way. */
+
+  rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
+
+  /* Now accumulate the product of up[] and the next higher limb from
+     vp[]. */
+
+  while (--vn >= 1)
+    {
+      rp += 1, vp += 1;
+      rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);
+    }
+  return rp[un];
+}
+
+void
+mpn_mul_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mpn_mul (rp, ap, n, bp, n);
+}
+
+void
+mpn_sqr (mp_ptr rp, mp_srcptr ap, mp_size_t n)
+{
+  mpn_mul (rp, ap, n, ap, n);
+}
+
+mp_limb_t
+mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_limb_t retval;
+
+  assert (n >= 1);
+  assert (cnt >= 1);
+  assert (cnt < GMP_LIMB_BITS);
+
+  up += n;
+  rp += n;
+
+  tnc = GMP_LIMB_BITS - cnt;
+  low_limb = *--up;
+  retval = low_limb >> tnc;
+  high_limb = (low_limb << cnt);
+
+  while (--n != 0)
+    {
+      low_limb = *--up;
+      *--rp = high_limb | (low_limb >> tnc);
+      high_limb = (low_limb << cnt);
+    }
+  *--rp = high_limb;
+
+  return retval;
+}
+
+mp_limb_t
+mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_limb_t retval;
+
+  assert (n >= 1);
+  assert (cnt >= 1);
+  assert (cnt < GMP_LIMB_BITS);
+
+  tnc = GMP_LIMB_BITS - cnt;
+  high_limb = *up++;
+  retval = (high_limb << tnc);
+  low_limb = high_limb >> cnt;
+
+  while (--n != 0)
+    {
+      high_limb = *up++;
+      *rp++ = low_limb | (high_limb << tnc);
+      low_limb = high_limb >> cnt;
+    }
+  *rp = low_limb;
+
+  return retval;
+}
+
+static mp_bitcnt_t
+mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un,
+		 mp_limb_t ux)
+{
+  unsigned cnt;
+
+  assert (ux == 0 || ux == GMP_LIMB_MAX);
+  assert (0 <= i && i <= un );
+
+  while (limb == 0)
+    {
+      i++;
+      if (i == un)
+	return (ux == 0 ? ~(mp_bitcnt_t) 0 : un * GMP_LIMB_BITS);
+      limb = ux ^ up[i];
+    }
+  gmp_ctz (cnt, limb);
+  return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt;
+}
+
+mp_bitcnt_t
+mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit)
+{
+  mp_size_t i;
+  i = bit / GMP_LIMB_BITS;
+
+  return mpn_common_scan ( ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)),
+			  i, ptr, i, 0);
+}
+
+mp_bitcnt_t
+mpn_scan0 (mp_srcptr ptr, mp_bitcnt_t bit)
+{
+  mp_size_t i;
+  i = bit / GMP_LIMB_BITS;
+
+  return mpn_common_scan (~ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)),
+			  i, ptr, i, GMP_LIMB_MAX);
+}
+
+void
+mpn_com (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  while (--n >= 0)
+    *rp++ = ~ *up++;
+}
+
+mp_limb_t
+mpn_neg (mp_ptr rp, mp_srcptr up, mp_size_t n)
+{
+  while (*up == 0)
+    {
+      *rp = 0;
+      if (!--n)
+	return 0;
+      ++up; ++rp;
+    }
+  *rp = - *up;
+  mpn_com (++rp, ++up, --n);
+  return 1;
+}
+
+
+/* MPN division interface. */
+
+/* The 3/2 inverse is defined as
+
+     m = floor( (B^3-1) / (B u1 + u0)) - B
+*/
+mp_limb_t
+mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0)
+{
+  mp_limb_t r, p, m, ql;
+  unsigned ul, uh, qh;
+
+  assert (u1 >= GMP_LIMB_HIGHBIT);
+
+  /* For notation, let b denote the half-limb base, so that B = b^2.
+     Split u1 = b uh + ul. */
+  ul = u1 & GMP_LLIMB_MASK;
+  uh = u1 >> (GMP_LIMB_BITS / 2);
+
+  /* Approximation of the high half of quotient. Differs from the 2/1
+     inverse of the half limb uh, since we have already subtracted
+     u0. */
+  qh = ~u1 / uh;
+
+  /* Adjust to get a half-limb 3/2 inverse, i.e., we want
+
+     qh' = floor( (b^3 - 1) / u) - b = floor ((b^3 - b u - 1) / u
+         = floor( (b (~u) + b-1) / u),
+
+     and the remainder
+
+     r = b (~u) + b-1 - qh (b uh + ul)
+       = b (~u - qh uh) + b-1 - qh ul
+
+     Subtraction of qh ul may underflow, which implies adjustments.
+     But by normalization, 2 u >= B > qh ul, so we need to adjust by
+     at most 2.
+  */
+
+  r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK;
+
+  p = (mp_limb_t) qh * ul;
+  /* Adjustment steps taken from udiv_qrnnd_c */
+  if (r < p)
+    {
+      qh--;
+      r += u1;
+      if (r >= u1) /* i.e. we didn't get carry when adding to r */
+	if (r < p)
+	  {
+	    qh--;
+	    r += u1;
+	  }
+    }
+  r -= p;
+
+  /* Low half of the quotient is
+
+       ql = floor ( (b r + b-1) / u1).
+
+     This is a 3/2 division (on half-limbs), for which qh is a
+     suitable inverse. */
+
+  p = (r >> (GMP_LIMB_BITS / 2)) * qh + r;
+  /* Unlike full-limb 3/2, we can add 1 without overflow. For this to
+     work, it is essential that ql is a full mp_limb_t. */
+  ql = (p >> (GMP_LIMB_BITS / 2)) + 1;
+
+  /* By the 3/2 trick, we don't need the high half limb. */
+  r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1;
+
+  if (r >= (p << (GMP_LIMB_BITS / 2)))
+    {
+      ql--;
+      r += u1;
+    }
+  m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql;
+  if (r >= u1)
+    {
+      m++;
+      r -= u1;
+    }
+
+  /* Now m is the 2/1 invers of u1. If u0 > 0, adjust it to become a
+     3/2 inverse. */
+  if (u0 > 0)
+    {
+      mp_limb_t th, tl;
+      r = ~r;
+      r += u0;
+      if (r < u0)
+	{
+	  m--;
+	  if (r >= u1)
+	    {
+	      m--;
+	      r -= u1;
+	    }
+	  r -= u1;
+	}
+      gmp_umul_ppmm (th, tl, u0, m);
+      r += th;
+      if (r < th)
+	{
+	  m--;
+	  m -= ((r > u1) | ((r == u1) & (tl > u0)));
+	}
+    }
+
+  return m;
+}
+
+struct gmp_div_inverse
+{
+  /* Normalization shift count. */
+  unsigned shift;
+  /* Normalized divisor (d0 unused for mpn_div_qr_1) */
+  mp_limb_t d1, d0;
+  /* Inverse, for 2/1 or 3/2. */
+  mp_limb_t di;
+};
+
+static void
+mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d)
+{
+  unsigned shift;
+
+  assert (d > 0);
+  gmp_clz (shift, d);
+  inv->shift = shift;
+  inv->d1 = d << shift;
+  inv->di = mpn_invert_limb (inv->d1);
+}
+
+static void
+mpn_div_qr_2_invert (struct gmp_div_inverse *inv,
+		     mp_limb_t d1, mp_limb_t d0)
+{
+  unsigned shift;
+
+  assert (d1 > 0);
+  gmp_clz (shift, d1);
+  inv->shift = shift;
+  if (shift > 0)
+    {
+      d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
+      d0 <<= shift;
+    }
+  inv->d1 = d1;
+  inv->d0 = d0;
+  inv->di = mpn_invert_3by2 (d1, d0);
+}
+
+static void
+mpn_div_qr_invert (struct gmp_div_inverse *inv,
+		   mp_srcptr dp, mp_size_t dn)
+{
+  assert (dn > 0);
+
+  if (dn == 1)
+    mpn_div_qr_1_invert (inv, dp[0]);
+  else if (dn == 2)
+    mpn_div_qr_2_invert (inv, dp[1], dp[0]);
+  else
+    {
+      unsigned shift;
+      mp_limb_t d1, d0;
+
+      d1 = dp[dn-1];
+      d0 = dp[dn-2];
+      assert (d1 > 0);
+      gmp_clz (shift, d1);
+      inv->shift = shift;
+      if (shift > 0)
+	{
+	  d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
+	  d0 = (d0 << shift) | (dp[dn-3] >> (GMP_LIMB_BITS - shift));
+	}
+      inv->d1 = d1;
+      inv->d0 = d0;
+      inv->di = mpn_invert_3by2 (d1, d0);
+    }
+}
+
+/* Not matching current public gmp interface, rather corresponding to
+   the sbpi1_div_* functions. */
+static mp_limb_t
+mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn,
+		     const struct gmp_div_inverse *inv)
+{
+  mp_limb_t d, di;
+  mp_limb_t r;
+  mp_ptr tp = NULL;
+
+  if (inv->shift > 0)
+    {
+      tp = qp ? qp : gmp_xalloc_limbs (nn);
+      r = mpn_lshift (tp, np, nn, inv->shift);
+      np = tp;
+    }
+  else
+    r = 0;
+
+  d = inv->d1;
+  di = inv->di;
+  while (--nn >= 0)
+    {
+      mp_limb_t q;
+
+      gmp_udiv_qrnnd_preinv (q, r, r, np[nn], d, di);
+      if (qp)
+	qp[nn] = q;
+    }
+  if (inv->shift > 0 && !qp)
+    gmp_free (tp);
+
+  return r >> inv->shift;
+}
+
+static mp_limb_t
+mpn_div_qr_1 (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_limb_t d)
+{
+  assert (d > 0);
+
+  /* Special case for powers of two. */
+  if ((d & (d-1)) == 0)
+    {
+      mp_limb_t r = np[0] & (d-1);
+      if (qp)
+	{
+	  if (d <= 1)
+	    mpn_copyi (qp, np, nn);
+	  else
+	    {
+	      unsigned shift;
+	      gmp_ctz (shift, d);
+	      mpn_rshift (qp, np, nn, shift);
+	    }
+	}
+      return r;
+    }
+  else
+    {
+      struct gmp_div_inverse inv;
+      mpn_div_qr_1_invert (&inv, d);
+      return mpn_div_qr_1_preinv (qp, np, nn, &inv);
+    }
+}
+
+static void
+mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+		     const struct gmp_div_inverse *inv)
+{
+  unsigned shift;
+  mp_size_t i;
+  mp_limb_t d1, d0, di, r1, r0;
+  mp_ptr tp;
+
+  assert (nn >= 2);
+  shift = inv->shift;
+  d1 = inv->d1;
+  d0 = inv->d0;
+  di = inv->di;
+
+  if (shift > 0)
+    {
+      tp = gmp_xalloc_limbs (nn);
+      r1 = mpn_lshift (tp, np, nn, shift);
+      np = tp;
+    }
+  else
+    r1 = 0;
+
+  r0 = np[nn - 1];
+
+  i = nn - 2;
+  do
+    {
+      mp_limb_t n0, q;
+      n0 = np[i];
+      gmp_udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
+
+      if (qp)
+	qp[i] = q;
+    }
+  while (--i >= 0);
+
+  if (shift > 0)
+    {
+      assert ((r0 << (GMP_LIMB_BITS - shift)) == 0);
+      r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift));
+      r1 >>= shift;
+
+      gmp_free (tp);
+    }
+
+  rp[1] = r1;
+  rp[0] = r0;
+}
+
+#if 0
+static void
+mpn_div_qr_2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+	      mp_limb_t d1, mp_limb_t d0)
+{
+  struct gmp_div_inverse inv;
+  assert (nn >= 2);
+
+  mpn_div_qr_2_invert (&inv, d1, d0);
+  mpn_div_qr_2_preinv (qp, rp, np, nn, &inv);
+}
+#endif
+
+static void
+mpn_div_qr_pi1 (mp_ptr qp,
+		mp_ptr np, mp_size_t nn, mp_limb_t n1,
+		mp_srcptr dp, mp_size_t dn,
+		mp_limb_t dinv)
+{
+  mp_size_t i;
+
+  mp_limb_t d1, d0;
+  mp_limb_t cy, cy1;
+  mp_limb_t q;
+
+  assert (dn > 2);
+  assert (nn >= dn);
+
+  d1 = dp[dn - 1];
+  d0 = dp[dn - 2];
+
+  assert ((d1 & GMP_LIMB_HIGHBIT) != 0);
+  /* Iteration variable is the index of the q limb.
+   *
+   * We divide <n1, np[dn-1+i], np[dn-2+i], np[dn-3+i],..., np[i]>
+   * by            <d1,          d0,        dp[dn-3],  ..., dp[0] >
+   */
+
+  i = nn - dn;
+  do
+    {
+      mp_limb_t n0 = np[dn-1+i];
+
+      if (n1 == d1 && n0 == d0)
+	{
+	  q = GMP_LIMB_MAX;
+	  mpn_submul_1 (np+i, dp, dn, q);
+	  n1 = np[dn-1+i];	/* update n1, last loop's value will now be invalid */
+	}
+      else
+	{
+	  gmp_udiv_qr_3by2 (q, n1, n0, n1, n0, np[dn-2+i], d1, d0, dinv);
+
+	  cy = mpn_submul_1 (np + i, dp, dn-2, q);
+
+	  cy1 = n0 < cy;
+	  n0 = n0 - cy;
+	  cy = n1 < cy1;
+	  n1 = n1 - cy1;
+	  np[dn-2+i] = n0;
+
+	  if (cy != 0)
+	    {
+	      n1 += d1 + mpn_add_n (np + i, np + i, dp, dn - 1);
+	      q--;
+	    }
+	}
+
+      if (qp)
+	qp[i] = q;
+    }
+  while (--i >= 0);
+
+  np[dn - 1] = n1;
+}
+
+static void
+mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
+		   mp_srcptr dp, mp_size_t dn,
+		   const struct gmp_div_inverse *inv)
+{
+  assert (dn > 0);
+  assert (nn >= dn);
+
+  if (dn == 1)
+    np[0] = mpn_div_qr_1_preinv (qp, np, nn, inv);
+  else if (dn == 2)
+    mpn_div_qr_2_preinv (qp, np, np, nn, inv);
+  else
+    {
+      mp_limb_t nh;
+      unsigned shift;
+
+      assert (inv->d1 == dp[dn-1]);
+      assert (inv->d0 == dp[dn-2]);
+      assert ((inv->d1 & GMP_LIMB_HIGHBIT) != 0);
+
+      shift = inv->shift;
+      if (shift > 0)
+	nh = mpn_lshift (np, np, nn, shift);
+      else
+	nh = 0;
+
+      mpn_div_qr_pi1 (qp, np, nn, nh, dp, dn, inv->di);
+
+      if (shift > 0)
+	gmp_assert_nocarry (mpn_rshift (np, np, dn, shift));
+    }
+}
+
+static void
+mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+{
+  struct gmp_div_inverse inv;
+  mp_ptr tp = NULL;
+
+  assert (dn > 0);
+  assert (nn >= dn);
+
+  mpn_div_qr_invert (&inv, dp, dn);
+  if (dn > 2 && inv.shift > 0)
+    {
+      tp = gmp_xalloc_limbs (dn);
+      gmp_assert_nocarry (mpn_lshift (tp, dp, dn, inv.shift));
+      dp = tp;
+    }
+  mpn_div_qr_preinv (qp, np, nn, dp, dn, &inv);
+  if (tp)
+    gmp_free (tp);
+}
+
+
+/* MPN base conversion. */
+static unsigned
+mpn_base_power_of_two_p (unsigned b)
+{
+  switch (b)
+    {
+    case 2: return 1;
+    case 4: return 2;
+    case 8: return 3;
+    case 16: return 4;
+    case 32: return 5;
+    case 64: return 6;
+    case 128: return 7;
+    case 256: return 8;
+    default: return 0;
+    }
+}
+
+struct mpn_base_info
+{
+  /* bb is the largest power of the base which fits in one limb, and
+     exp is the corresponding exponent. */
+  unsigned exp;
+  mp_limb_t bb;
+};
+
+static void
+mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b)
+{
+  mp_limb_t m;
+  mp_limb_t p;
+  unsigned exp;
+
+  m = GMP_LIMB_MAX / b;
+  for (exp = 1, p = b; p <= m; exp++)
+    p *= b;
+
+  info->exp = exp;
+  info->bb = p;
+}
+
+static mp_bitcnt_t
+mpn_limb_size_in_base_2 (mp_limb_t u)
+{
+  unsigned shift;
+
+  assert (u > 0);
+  gmp_clz (shift, u);
+  return GMP_LIMB_BITS - shift;
+}
+
+static size_t
+mpn_get_str_bits (unsigned char *sp, unsigned bits, mp_srcptr up, mp_size_t un)
+{
+  unsigned char mask;
+  size_t sn, j;
+  mp_size_t i;
+  unsigned shift;
+
+  sn = ((un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1])
+	+ bits - 1) / bits;
+
+  mask = (1U << bits) - 1;
+
+  for (i = 0, j = sn, shift = 0; j-- > 0;)
+    {
+      unsigned char digit = up[i] >> shift;
+
+      shift += bits;
+
+      if (shift >= GMP_LIMB_BITS && ++i < un)
+	{
+	  shift -= GMP_LIMB_BITS;
+	  digit |= up[i] << (bits - shift);
+	}
+      sp[j] = digit & mask;
+    }
+  return sn;
+}
+
+/* We generate digits from the least significant end, and reverse at
+   the end. */
+static size_t
+mpn_limb_get_str (unsigned char *sp, mp_limb_t w,
+		  const struct gmp_div_inverse *binv)
+{
+  mp_size_t i;
+  for (i = 0; w > 0; i++)
+    {
+      mp_limb_t h, l, r;
+
+      h = w >> (GMP_LIMB_BITS - binv->shift);
+      l = w << binv->shift;
+
+      gmp_udiv_qrnnd_preinv (w, r, h, l, binv->d1, binv->di);
+      assert ( (r << (GMP_LIMB_BITS - binv->shift)) == 0);
+      r >>= binv->shift;
+
+      sp[i] = r;
+    }
+  return i;
+}
+
+static size_t
+mpn_get_str_other (unsigned char *sp,
+		   int base, const struct mpn_base_info *info,
+		   mp_ptr up, mp_size_t un)
+{
+  struct gmp_div_inverse binv;
+  size_t sn;
+  size_t i;
+
+  mpn_div_qr_1_invert (&binv, base);
+
+  sn = 0;
+
+  if (un > 1)
+    {
+      struct gmp_div_inverse bbinv;
+      mpn_div_qr_1_invert (&bbinv, info->bb);
+
+      do
+	{
+	  mp_limb_t w;
+	  size_t done;
+	  w = mpn_div_qr_1_preinv (up, up, un, &bbinv);
+	  un -= (up[un-1] == 0);
+	  done = mpn_limb_get_str (sp + sn, w, &binv);
+
+	  for (sn += done; done < info->exp; done++)
+	    sp[sn++] = 0;
+	}
+      while (un > 1);
+    }
+  sn += mpn_limb_get_str (sp + sn, up[0], &binv);
+
+  /* Reverse order */
+  for (i = 0; 2*i + 1 < sn; i++)
+    {
+      unsigned char t = sp[i];
+      sp[i] = sp[sn - i - 1];
+      sp[sn - i - 1] = t;
+    }
+
+  return sn;
+}
+
+size_t
+mpn_get_str (unsigned char *sp, int base, mp_ptr up, mp_size_t un)
+{
+  unsigned bits;
+
+  assert (un > 0);
+  assert (up[un-1] > 0);
+
+  bits = mpn_base_power_of_two_p (base);
+  if (bits)
+    return mpn_get_str_bits (sp, bits, up, un);
+  else
+    {
+      struct mpn_base_info info;
+
+      mpn_get_base_info (&info, base);
+      return mpn_get_str_other (sp, base, &info, up, un);
+    }
+}
+
+static mp_size_t
+mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn,
+		  unsigned bits)
+{
+  mp_size_t rn;
+  size_t j;
+  unsigned shift;
+
+  for (j = sn, rn = 0, shift = 0; j-- > 0; )
+    {
+      if (shift == 0)
+	{
+	  rp[rn++] = sp[j];
+	  shift += bits;
+	}
+      else
+	{
+	  rp[rn-1] |= (mp_limb_t) sp[j] << shift;
+	  shift += bits;
+	  if (shift >= GMP_LIMB_BITS)
+	    {
+	      shift -= GMP_LIMB_BITS;
+	      if (shift > 0)
+		rp[rn++] = (mp_limb_t) sp[j] >> (bits - shift);
+	    }
+	}
+    }
+  rn = mpn_normalized_size (rp, rn);
+  return rn;
+}
+
+/* Result is usually normalized, except for all-zero input, in which
+   case a single zero limb is written at *RP, and 1 is returned. */
+static mp_size_t
+mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn,
+		   mp_limb_t b, const struct mpn_base_info *info)
+{
+  mp_size_t rn;
+  mp_limb_t w;
+  unsigned k;
+  size_t j;
+
+  assert (sn > 0);
+
+  k = 1 + (sn - 1) % info->exp;
+
+  j = 0;
+  w = sp[j++];
+  while (--k != 0)
+    w = w * b + sp[j++];
+
+  rp[0] = w;
+
+  for (rn = 1; j < sn;)
+    {
+      mp_limb_t cy;
+
+      w = sp[j++];
+      for (k = 1; k < info->exp; k++)
+	w = w * b + sp[j++];
+
+      cy = mpn_mul_1 (rp, rp, rn, info->bb);
+      cy += mpn_add_1 (rp, rp, rn, w);
+      if (cy > 0)
+	rp[rn++] = cy;
+    }
+  assert (j == sn);
+
+  return rn;
+}
+
+mp_size_t
+mpn_set_str (mp_ptr rp, const unsigned char *sp, size_t sn, int base)
+{
+  unsigned bits;
+
+  if (sn == 0)
+    return 0;
+
+  bits = mpn_base_power_of_two_p (base);
+  if (bits)
+    return mpn_set_str_bits (rp, sp, sn, bits);
+  else
+    {
+      struct mpn_base_info info;
+
+      mpn_get_base_info (&info, base);
+      return mpn_set_str_other (rp, sp, sn, base, &info);
+    }
+}
+
+
+/* MPZ interface */
+void
+mpz_init (mpz_t r)
+{
+  static const mp_limb_t dummy_limb = 0xc1a0;
+
+  r->_mp_alloc = 0;
+  r->_mp_size = 0;
+  r->_mp_d = (mp_ptr) &dummy_limb;
+}
+
+/* The utility of this function is a bit limited, since many functions
+   assigns the result variable using mpz_swap. */
+void
+mpz_init2 (mpz_t r, mp_bitcnt_t bits)
+{
+  mp_size_t rn;
+
+  bits -= (bits != 0);		/* Round down, except if 0 */
+  rn = 1 + bits / GMP_LIMB_BITS;
+
+  r->_mp_alloc = rn;
+  r->_mp_size = 0;
+  r->_mp_d = gmp_xalloc_limbs (rn);
+}
+
+void
+mpz_clear (mpz_t r)
+{
+  if (r->_mp_alloc)
+    gmp_free (r->_mp_d);
+}
+
+static mp_ptr
+mpz_realloc (mpz_t r, mp_size_t size)
+{
+  size = GMP_MAX (size, 1);
+
+  if (r->_mp_alloc)
+    r->_mp_d = gmp_xrealloc_limbs (r->_mp_d, size);
+  else
+    r->_mp_d = gmp_xalloc_limbs (size);
+  r->_mp_alloc = size;
+
+  if (GMP_ABS (r->_mp_size) > size)
+    r->_mp_size = 0;
+
+  return r->_mp_d;
+}
+
+/* Realloc for an mpz_t WHAT if it has less than NEEDED limbs.  */
+#define MPZ_REALLOC(z,n) ((n) > (z)->_mp_alloc			\
+			  ? mpz_realloc(z,n)			\
+			  : (z)->_mp_d)
+
+/* MPZ assignment and basic conversions. */
+void
+mpz_set_si (mpz_t r, signed long int x)
+{
+  if (x >= 0)
+    mpz_set_ui (r, x);
+  else /* (x < 0) */
+    {
+      r->_mp_size = -1;
+      MPZ_REALLOC (r, 1)[0] = GMP_NEG_CAST (unsigned long int, x);
+    }
+}
+
+void
+mpz_set_ui (mpz_t r, unsigned long int x)
+{
+  if (x > 0)
+    {
+      r->_mp_size = 1;
+      MPZ_REALLOC (r, 1)[0] = x;
+    }
+  else
+    r->_mp_size = 0;
+}
+
+void
+mpz_set (mpz_t r, const mpz_t x)
+{
+  /* Allow the NOP r == x */
+  if (r != x)
+    {
+      mp_size_t n;
+      mp_ptr rp;
+
+      n = GMP_ABS (x->_mp_size);
+      rp = MPZ_REALLOC (r, n);
+
+      mpn_copyi (rp, x->_mp_d, n);
+      r->_mp_size = x->_mp_size;
+    }
+}
+
+void
+mpz_init_set_si (mpz_t r, signed long int x)
+{
+  mpz_init (r);
+  mpz_set_si (r, x);
+}
+
+void
+mpz_init_set_ui (mpz_t r, unsigned long int x)
+{
+  mpz_init (r);
+  mpz_set_ui (r, x);
+}
+
+void
+mpz_init_set (mpz_t r, const mpz_t x)
+{
+  mpz_init (r);
+  mpz_set (r, x);
+}
+
+int
+mpz_fits_slong_p (const mpz_t u)
+{
+  mp_size_t us = u->_mp_size;
+
+  if (us == 1)
+    return u->_mp_d[0] < GMP_LIMB_HIGHBIT;
+  else if (us == -1)
+    return u->_mp_d[0] <= GMP_LIMB_HIGHBIT;
+  else
+    return (us == 0);
+}
+
+int
+mpz_fits_ulong_p (const mpz_t u)
+{
+  mp_size_t us = u->_mp_size;
+
+  return (us == (us > 0));
+}
+
+long int
+mpz_get_si (const mpz_t u)
+{
+  if (u->_mp_size < 0)
+    /* This expression is necessary to properly handle 0x80000000 */
+    return -1 - (long) ((u->_mp_d[0] - 1) & ~GMP_LIMB_HIGHBIT);
+  else
+    return (long) (mpz_get_ui (u) & ~GMP_LIMB_HIGHBIT);
+}
+
+unsigned long int
+mpz_get_ui (const mpz_t u)
+{
+  return u->_mp_size == 0 ? 0 : u->_mp_d[0];
+}
+
+size_t
+mpz_size (const mpz_t u)
+{
+  return GMP_ABS (u->_mp_size);
+}
+
+mp_limb_t
+mpz_getlimbn (const mpz_t u, mp_size_t n)
+{
+  if (n >= 0 && n < GMP_ABS (u->_mp_size))
+    return u->_mp_d[n];
+  else
+    return 0;
+}
+
+void
+mpz_realloc2 (mpz_t x, mp_bitcnt_t n)
+{
+  mpz_realloc (x, 1 + (n - (n != 0)) / GMP_LIMB_BITS);
+}
+
+mp_srcptr
+mpz_limbs_read (mpz_srcptr x)
+{
+  return x->_mp_d;
+}
+
+mp_ptr
+mpz_limbs_modify (mpz_t x, mp_size_t n)
+{
+  assert (n > 0);
+  return MPZ_REALLOC (x, n);
+}
+
+mp_ptr
+mpz_limbs_write (mpz_t x, mp_size_t n)
+{
+  return mpz_limbs_modify (x, n);
+}
+
+void
+mpz_limbs_finish (mpz_t x, mp_size_t xs)
+{
+  mp_size_t xn;
+  xn = mpn_normalized_size (x->_mp_d, GMP_ABS (xs));
+  x->_mp_size = xs < 0 ? -xn : xn;
+}
+
+mpz_srcptr
+mpz_roinit_n (mpz_t x, mp_srcptr xp, mp_size_t xs)
+{
+  x->_mp_alloc = 0;
+  x->_mp_d = (mp_ptr) xp;
+  mpz_limbs_finish (x, xs);
+  return x;
+}
+
+
+/* Conversions and comparison to double. */
+void
+mpz_set_d (mpz_t r, double x)
+{
+  int sign;
+  mp_ptr rp;
+  mp_size_t rn, i;
+  double B;
+  double Bi;
+  mp_limb_t f;
+
+  /* x != x is true when x is a NaN, and x == x * 0.5 is true when x is
+     zero or infinity. */
+  if (x != x || x == x * 0.5)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  sign = x < 0.0 ;
+  if (sign)
+    x = - x;
+
+  if (x < 1.0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+  B = 2.0 * (double) GMP_LIMB_HIGHBIT;
+  Bi = 1.0 / B;
+  for (rn = 1; x >= B; rn++)
+    x *= Bi;
+
+  rp = MPZ_REALLOC (r, rn);
+
+  f = (mp_limb_t) x;
+  x -= f;
+  assert (x < 1.0);
+  i = rn-1;
+  rp[i] = f;
+  while (--i >= 0)
+    {
+      x = B * x;
+      f = (mp_limb_t) x;
+      x -= f;
+      assert (x < 1.0);
+      rp[i] = f;
+    }
+
+  r->_mp_size = sign ? - rn : rn;
+}
+
+void
+mpz_init_set_d (mpz_t r, double x)
+{
+  mpz_init (r);
+  mpz_set_d (r, x);
+}
+
+double
+mpz_get_d (const mpz_t u)
+{
+  mp_size_t un;
+  double x;
+  double B = 2.0 * (double) GMP_LIMB_HIGHBIT;
+
+  un = GMP_ABS (u->_mp_size);
+
+  if (un == 0)
+    return 0.0;
+
+  x = u->_mp_d[--un];
+  while (un > 0)
+    x = B*x + u->_mp_d[--un];
+
+  if (u->_mp_size < 0)
+    x = -x;
+
+  return x;
+}
+
+int
+mpz_cmpabs_d (const mpz_t x, double d)
+{
+  mp_size_t xn;
+  double B, Bi;
+  mp_size_t i;
+
+  xn = x->_mp_size;
+  d = GMP_ABS (d);
+
+  if (xn != 0)
+    {
+      xn = GMP_ABS (xn);
+
+      B = 2.0 * (double) GMP_LIMB_HIGHBIT;
+      Bi = 1.0 / B;
+
+      /* Scale d so it can be compared with the top limb. */
+      for (i = 1; i < xn; i++)
+	d *= Bi;
+
+      if (d >= B)
+	return -1;
+
+      /* Compare floor(d) to top limb, subtract and cancel when equal. */
+      for (i = xn; i-- > 0;)
+	{
+	  mp_limb_t f, xl;
+
+	  f = (mp_limb_t) d;
+	  xl = x->_mp_d[i];
+	  if (xl > f)
+	    return 1;
+	  else if (xl < f)
+	    return -1;
+	  d = B * (d - f);
+	}
+    }
+  return - (d > 0.0);
+}
+
+int
+mpz_cmp_d (const mpz_t x, double d)
+{
+  if (x->_mp_size < 0)
+    {
+      if (d >= 0.0)
+	return -1;
+      else
+	return -mpz_cmpabs_d (x, d);
+    }
+  else
+    {
+      if (d < 0.0)
+	return 1;
+      else
+	return mpz_cmpabs_d (x, d);
+    }
+}
+
+
+/* MPZ comparisons and the like. */
+int
+mpz_sgn (const mpz_t u)
+{
+  return GMP_CMP (u->_mp_size, 0);
+}
+
+int
+mpz_cmp_si (const mpz_t u, long v)
+{
+  mp_size_t usize = u->_mp_size;
+
+  if (usize < -1)
+    return -1;
+  else if (v >= 0)
+    return mpz_cmp_ui (u, v);
+  else if (usize >= 0)
+    return 1;
+  else /* usize == -1 */
+    return GMP_CMP (GMP_NEG_CAST (mp_limb_t, v), u->_mp_d[0]);
+}
+
+int
+mpz_cmp_ui (const mpz_t u, unsigned long v)
+{
+  mp_size_t usize = u->_mp_size;
+
+  if (usize > 1)
+    return 1;
+  else if (usize < 0)
+    return -1;
+  else
+    return GMP_CMP (mpz_get_ui (u), v);
+}
+
+int
+mpz_cmp (const mpz_t a, const mpz_t b)
+{
+  mp_size_t asize = a->_mp_size;
+  mp_size_t bsize = b->_mp_size;
+
+  if (asize != bsize)
+    return (asize < bsize) ? -1 : 1;
+  else if (asize >= 0)
+    return mpn_cmp (a->_mp_d, b->_mp_d, asize);
+  else
+    return mpn_cmp (b->_mp_d, a->_mp_d, -asize);
+}
+
+int
+mpz_cmpabs_ui (const mpz_t u, unsigned long v)
+{
+  if (GMP_ABS (u->_mp_size) > 1)
+    return 1;
+  else
+    return GMP_CMP (mpz_get_ui (u), v);
+}
+
+int
+mpz_cmpabs (const mpz_t u, const mpz_t v)
+{
+  return mpn_cmp4 (u->_mp_d, GMP_ABS (u->_mp_size),
+		   v->_mp_d, GMP_ABS (v->_mp_size));
+}
+
+void
+mpz_abs (mpz_t r, const mpz_t u)
+{
+  mpz_set (r, u);
+  r->_mp_size = GMP_ABS (r->_mp_size);
+}
+
+void
+mpz_neg (mpz_t r, const mpz_t u)
+{
+  mpz_set (r, u);
+  r->_mp_size = -r->_mp_size;
+}
+
+void
+mpz_swap (mpz_t u, mpz_t v)
+{
+  MP_SIZE_T_SWAP (u->_mp_size, v->_mp_size);
+  MP_SIZE_T_SWAP (u->_mp_alloc, v->_mp_alloc);
+  MP_PTR_SWAP (u->_mp_d, v->_mp_d);
+}
+
+
+/* MPZ addition and subtraction */
+
+/* Adds to the absolute value. Returns new size, but doesn't store it. */
+static mp_size_t
+mpz_abs_add_ui (mpz_t r, const mpz_t a, unsigned long b)
+{
+  mp_size_t an;
+  mp_ptr rp;
+  mp_limb_t cy;
+
+  an = GMP_ABS (a->_mp_size);
+  if (an == 0)
+    {
+      MPZ_REALLOC (r, 1)[0] = b;
+      return b > 0;
+    }
+
+  rp = MPZ_REALLOC (r, an + 1);
+
+  cy = mpn_add_1 (rp, a->_mp_d, an, b);
+  rp[an] = cy;
+  an += cy;
+
+  return an;
+}
+
+/* Subtract from the absolute value. Returns new size, (or -1 on underflow),
+   but doesn't store it. */
+static mp_size_t
+mpz_abs_sub_ui (mpz_t r, const mpz_t a, unsigned long b)
+{
+  mp_size_t an = GMP_ABS (a->_mp_size);
+  mp_ptr rp;
+
+  if (an == 0)
+    {
+      MPZ_REALLOC (r, 1)[0] = b;
+      return -(b > 0);
+    }
+  rp = MPZ_REALLOC (r, an);
+  if (an == 1 && a->_mp_d[0] < b)
+    {
+      rp[0] = b - a->_mp_d[0];
+      return -1;
+    }
+  else
+    {
+      gmp_assert_nocarry (mpn_sub_1 (rp, a->_mp_d, an, b));
+      return mpn_normalized_size (rp, an);
+    }
+}
+
+void
+mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b)
+{
+  if (a->_mp_size >= 0)
+    r->_mp_size = mpz_abs_add_ui (r, a, b);
+  else
+    r->_mp_size = -mpz_abs_sub_ui (r, a, b);
+}
+
+void
+mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b)
+{
+  if (a->_mp_size < 0)
+    r->_mp_size = -mpz_abs_add_ui (r, a, b);
+  else
+    r->_mp_size = mpz_abs_sub_ui (r, a, b);
+}
+
+void
+mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b)
+{
+  if (b->_mp_size < 0)
+    r->_mp_size = mpz_abs_add_ui (r, b, a);
+  else
+    r->_mp_size = -mpz_abs_sub_ui (r, b, a);
+}
+
+static mp_size_t
+mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b)
+{
+  mp_size_t an = GMP_ABS (a->_mp_size);
+  mp_size_t bn = GMP_ABS (b->_mp_size);
+  mp_ptr rp;
+  mp_limb_t cy;
+
+  if (an < bn)
+    {
+      MPZ_SRCPTR_SWAP (a, b);
+      MP_SIZE_T_SWAP (an, bn);
+    }
+
+  rp = MPZ_REALLOC (r, an + 1);
+  cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn);
+
+  rp[an] = cy;
+
+  return an + cy;
+}
+
+static mp_size_t
+mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b)
+{
+  mp_size_t an = GMP_ABS (a->_mp_size);
+  mp_size_t bn = GMP_ABS (b->_mp_size);
+  int cmp;
+  mp_ptr rp;
+
+  cmp = mpn_cmp4 (a->_mp_d, an, b->_mp_d, bn);
+  if (cmp > 0)
+    {
+      rp = MPZ_REALLOC (r, an);
+      gmp_assert_nocarry (mpn_sub (rp, a->_mp_d, an, b->_mp_d, bn));
+      return mpn_normalized_size (rp, an);
+    }
+  else if (cmp < 0)
+    {
+      rp = MPZ_REALLOC (r, bn);
+      gmp_assert_nocarry (mpn_sub (rp, b->_mp_d, bn, a->_mp_d, an));
+      return -mpn_normalized_size (rp, bn);
+    }
+  else
+    return 0;
+}
+
+void
+mpz_add (mpz_t r, const mpz_t a, const mpz_t b)
+{
+  mp_size_t rn;
+
+  if ( (a->_mp_size ^ b->_mp_size) >= 0)
+    rn = mpz_abs_add (r, a, b);
+  else
+    rn = mpz_abs_sub (r, a, b);
+
+  r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
+}
+
+void
+mpz_sub (mpz_t r, const mpz_t a, const mpz_t b)
+{
+  mp_size_t rn;
+
+  if ( (a->_mp_size ^ b->_mp_size) >= 0)
+    rn = mpz_abs_sub (r, a, b);
+  else
+    rn = mpz_abs_add (r, a, b);
+
+  r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
+}
+
+
+/* MPZ multiplication */
+void
+mpz_mul_si (mpz_t r, const mpz_t u, long int v)
+{
+  if (v < 0)
+    {
+      mpz_mul_ui (r, u, GMP_NEG_CAST (unsigned long int, v));
+      mpz_neg (r, r);
+    }
+  else
+    mpz_mul_ui (r, u, (unsigned long int) v);
+}
+
+void
+mpz_mul_ui (mpz_t r, const mpz_t u, unsigned long int v)
+{
+  mp_size_t un, us;
+  mp_ptr tp;
+  mp_limb_t cy;
+
+  us = u->_mp_size;
+
+  if (us == 0 || v == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  un = GMP_ABS (us);
+
+  tp = MPZ_REALLOC (r, un + 1);
+  cy = mpn_mul_1 (tp, u->_mp_d, un, v);
+  tp[un] = cy;
+
+  un += (cy > 0);
+  r->_mp_size = (us < 0) ? - un : un;
+}
+
+void
+mpz_mul (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  int sign;
+  mp_size_t un, vn, rn;
+  mpz_t t;
+  mp_ptr tp;
+
+  un = u->_mp_size;
+  vn = v->_mp_size;
+
+  if (un == 0 || vn == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  sign = (un ^ vn) < 0;
+
+  un = GMP_ABS (un);
+  vn = GMP_ABS (vn);
+
+  mpz_init2 (t, (un + vn) * GMP_LIMB_BITS);
+
+  tp = t->_mp_d;
+  if (un >= vn)
+    mpn_mul (tp, u->_mp_d, un, v->_mp_d, vn);
+  else
+    mpn_mul (tp, v->_mp_d, vn, u->_mp_d, un);
+
+  rn = un + vn;
+  rn -= tp[rn-1] == 0;
+
+  t->_mp_size = sign ? - rn : rn;
+  mpz_swap (r, t);
+  mpz_clear (t);
+}
+
+void
+mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits)
+{
+  mp_size_t un, rn;
+  mp_size_t limbs;
+  unsigned shift;
+  mp_ptr rp;
+
+  un = GMP_ABS (u->_mp_size);
+  if (un == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  limbs = bits / GMP_LIMB_BITS;
+  shift = bits % GMP_LIMB_BITS;
+
+  rn = un + limbs + (shift > 0);
+  rp = MPZ_REALLOC (r, rn);
+  if (shift > 0)
+    {
+      mp_limb_t cy = mpn_lshift (rp + limbs, u->_mp_d, un, shift);
+      rp[rn-1] = cy;
+      rn -= (cy == 0);
+    }
+  else
+    mpn_copyd (rp + limbs, u->_mp_d, un);
+
+  mpn_zero (rp, limbs);
+
+  r->_mp_size = (u->_mp_size < 0) ? - rn : rn;
+}
+
+void
+mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v)
+{
+  mpz_t t;
+  mpz_init (t);
+  mpz_mul_ui (t, u, v);
+  mpz_add (r, r, t);
+  mpz_clear (t);
+}
+
+void
+mpz_submul_ui (mpz_t r, const mpz_t u, unsigned long int v)
+{
+  mpz_t t;
+  mpz_init (t);
+  mpz_mul_ui (t, u, v);
+  mpz_sub (r, r, t);
+  mpz_clear (t);
+}
+
+void
+mpz_addmul (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mpz_t t;
+  mpz_init (t);
+  mpz_mul (t, u, v);
+  mpz_add (r, r, t);
+  mpz_clear (t);
+}
+
+void
+mpz_submul (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mpz_t t;
+  mpz_init (t);
+  mpz_mul (t, u, v);
+  mpz_sub (r, r, t);
+  mpz_clear (t);
+}
+
+
+/* MPZ division */
+enum mpz_div_round_mode { GMP_DIV_FLOOR, GMP_DIV_CEIL, GMP_DIV_TRUNC };
+
+/* Allows q or r to be zero. Returns 1 iff remainder is non-zero. */
+static int
+mpz_div_qr (mpz_t q, mpz_t r,
+	    const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode)
+{
+  mp_size_t ns, ds, nn, dn, qs;
+  ns = n->_mp_size;
+  ds = d->_mp_size;
+
+  if (ds == 0)
+    gmp_die("mpz_div_qr: Divide by zero.");
+
+  if (ns == 0)
+    {
+      if (q)
+	q->_mp_size = 0;
+      if (r)
+	r->_mp_size = 0;
+      return 0;
+    }
+
+  nn = GMP_ABS (ns);
+  dn = GMP_ABS (ds);
+
+  qs = ds ^ ns;
+
+  if (nn < dn)
+    {
+      if (mode == GMP_DIV_CEIL && qs >= 0)
+	{
+	  /* q = 1, r = n - d */
+	  if (r)
+	    mpz_sub (r, n, d);
+	  if (q)
+	    mpz_set_ui (q, 1);
+	}
+      else if (mode == GMP_DIV_FLOOR && qs < 0)
+	{
+	  /* q = -1, r = n + d */
+	  if (r)
+	    mpz_add (r, n, d);
+	  if (q)
+	    mpz_set_si (q, -1);
+	}
+      else
+	{
+	  /* q = 0, r = d */
+	  if (r)
+	    mpz_set (r, n);
+	  if (q)
+	    q->_mp_size = 0;
+	}
+      return 1;
+    }
+  else
+    {
+      mp_ptr np, qp;
+      mp_size_t qn, rn;
+      mpz_t tq, tr;
+
+      mpz_init_set (tr, n);
+      np = tr->_mp_d;
+
+      qn = nn - dn + 1;
+
+      if (q)
+	{
+	  mpz_init2 (tq, qn * GMP_LIMB_BITS);
+	  qp = tq->_mp_d;
+	}
+      else
+	qp = NULL;
+
+      mpn_div_qr (qp, np, nn, d->_mp_d, dn);
+
+      if (qp)
+	{
+	  qn -= (qp[qn-1] == 0);
+
+	  tq->_mp_size = qs < 0 ? -qn : qn;
+	}
+      rn = mpn_normalized_size (np, dn);
+      tr->_mp_size = ns < 0 ? - rn : rn;
+
+      if (mode == GMP_DIV_FLOOR && qs < 0 && rn != 0)
+	{
+	  if (q)
+	    mpz_sub_ui (tq, tq, 1);
+	  if (r)
+	    mpz_add (tr, tr, d);
+	}
+      else if (mode == GMP_DIV_CEIL && qs >= 0 && rn != 0)
+	{
+	  if (q)
+	    mpz_add_ui (tq, tq, 1);
+	  if (r)
+	    mpz_sub (tr, tr, d);
+	}
+
+      if (q)
+	{
+	  mpz_swap (tq, q);
+	  mpz_clear (tq);
+	}
+      if (r)
+	mpz_swap (tr, r);
+
+      mpz_clear (tr);
+
+      return rn != 0;
+    }
+}
+
+void
+mpz_cdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, r, n, d, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, r, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, r, n, d, GMP_DIV_TRUNC);
+}
+
+void
+mpz_cdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, NULL, n, d, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, NULL, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC);
+}
+
+void
+mpz_cdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (NULL, r, n, d, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (NULL, r, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (NULL, r, n, d, GMP_DIV_TRUNC);
+}
+
+void
+mpz_mod (mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (NULL, r, n, d, d->_mp_size >= 0 ? GMP_DIV_FLOOR : GMP_DIV_CEIL);
+}
+
+static void
+mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index,
+		enum mpz_div_round_mode mode)
+{
+  mp_size_t un, qn;
+  mp_size_t limb_cnt;
+  mp_ptr qp;
+  int adjust;
+
+  un = u->_mp_size;
+  if (un == 0)
+    {
+      q->_mp_size = 0;
+      return;
+    }
+  limb_cnt = bit_index / GMP_LIMB_BITS;
+  qn = GMP_ABS (un) - limb_cnt;
+  bit_index %= GMP_LIMB_BITS;
+
+  if (mode == ((un > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* un != 0 here. */
+    /* Note: Below, the final indexing at limb_cnt is valid because at
+       that point we have qn > 0. */
+    adjust = (qn <= 0
+	      || !mpn_zero_p (u->_mp_d, limb_cnt)
+	      || (u->_mp_d[limb_cnt]
+		  & (((mp_limb_t) 1 << bit_index) - 1)));
+  else
+    adjust = 0;
+
+  if (qn <= 0)
+    qn = 0;
+  else
+    {
+      qp = MPZ_REALLOC (q, qn);
+
+      if (bit_index != 0)
+	{
+	  mpn_rshift (qp, u->_mp_d + limb_cnt, qn, bit_index);
+	  qn -= qp[qn - 1] == 0;
+	}
+      else
+	{
+	  mpn_copyi (qp, u->_mp_d + limb_cnt, qn);
+	}
+    }
+
+  q->_mp_size = qn;
+
+  if (adjust)
+    mpz_add_ui (q, q, 1);
+  if (un < 0)
+    mpz_neg (q, q);
+}
+
+static void
+mpz_div_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bit_index,
+		enum mpz_div_round_mode mode)
+{
+  mp_size_t us, un, rn;
+  mp_ptr rp;
+  mp_limb_t mask;
+
+  us = u->_mp_size;
+  if (us == 0 || bit_index == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+  rn = (bit_index + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS;
+  assert (rn > 0);
+
+  rp = MPZ_REALLOC (r, rn);
+  un = GMP_ABS (us);
+
+  mask = GMP_LIMB_MAX >> (rn * GMP_LIMB_BITS - bit_index);
+
+  if (rn > un)
+    {
+      /* Quotient (with truncation) is zero, and remainder is
+	 non-zero */
+      if (mode == ((us > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* us != 0 here. */
+	{
+	  /* Have to negate and sign extend. */
+	  mp_size_t i;
+
+	  gmp_assert_nocarry (! mpn_neg (rp, u->_mp_d, un));
+	  for (i = un; i < rn - 1; i++)
+	    rp[i] = GMP_LIMB_MAX;
+
+	  rp[rn-1] = mask;
+	  us = -us;
+	}
+      else
+	{
+	  /* Just copy */
+	  if (r != u)
+	    mpn_copyi (rp, u->_mp_d, un);
+
+	  rn = un;
+	}
+    }
+  else
+    {
+      if (r != u)
+	mpn_copyi (rp, u->_mp_d, rn - 1);
+
+      rp[rn-1] = u->_mp_d[rn-1] & mask;
+
+      if (mode == ((us > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* us != 0 here. */
+	{
+	  /* If r != 0, compute 2^{bit_count} - r. */
+	  mpn_neg (rp, rp, rn);
+
+	  rp[rn-1] &= mask;
+
+	  /* us is not used for anything else, so we can modify it
+	     here to indicate flipped sign. */
+	  us = -us;
+	}
+    }
+  rn = mpn_normalized_size (rp, rn);
+  r->_mp_size = us < 0 ? -rn : rn;
+}
+
+void
+mpz_cdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_q_2exp (r, u, cnt, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_q_2exp (r, u, cnt, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_q_2exp (r, u, cnt, GMP_DIV_TRUNC);
+}
+
+void
+mpz_cdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_r_2exp (r, u, cnt, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_r_2exp (r, u, cnt, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_r_2exp (r, u, cnt, GMP_DIV_TRUNC);
+}
+
+void
+mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d)
+{
+  gmp_assert_nocarry (mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC));
+}
+
+int
+mpz_divisible_p (const mpz_t n, const mpz_t d)
+{
+  return mpz_div_qr (NULL, NULL, n, d, GMP_DIV_TRUNC) == 0;
+}
+
+int
+mpz_congruent_p (const mpz_t a, const mpz_t b, const mpz_t m)
+{
+  mpz_t t;
+  int res;
+
+  /* a == b (mod 0) iff a == b */
+  if (mpz_sgn (m) == 0)
+    return (mpz_cmp (a, b) == 0);
+
+  mpz_init (t);
+  mpz_sub (t, a, b);
+  res = mpz_divisible_p (t, m);
+  mpz_clear (t);
+
+  return res;
+}
+
+static unsigned long
+mpz_div_qr_ui (mpz_t q, mpz_t r,
+	       const mpz_t n, unsigned long d, enum mpz_div_round_mode mode)
+{
+  mp_size_t ns, qn;
+  mp_ptr qp;
+  mp_limb_t rl;
+  mp_size_t rs;
+
+  ns = n->_mp_size;
+  if (ns == 0)
+    {
+      if (q)
+	q->_mp_size = 0;
+      if (r)
+	r->_mp_size = 0;
+      return 0;
+    }
+
+  qn = GMP_ABS (ns);
+  if (q)
+    qp = MPZ_REALLOC (q, qn);
+  else
+    qp = NULL;
+
+  rl = mpn_div_qr_1 (qp, n->_mp_d, qn, d);
+  assert (rl < d);
+
+  rs = rl > 0;
+  rs = (ns < 0) ? -rs : rs;
+
+  if (rl > 0 && ( (mode == GMP_DIV_FLOOR && ns < 0)
+		  || (mode == GMP_DIV_CEIL && ns >= 0)))
+    {
+      if (q)
+	gmp_assert_nocarry (mpn_add_1 (qp, qp, qn, 1));
+      rl = d - rl;
+      rs = -rs;
+    }
+
+  if (r)
+    {
+      MPZ_REALLOC (r, 1)[0] = rl;
+      r->_mp_size = rs;
+    }
+  if (q)
+    {
+      qn -= (qp[qn-1] == 0);
+      assert (qn == 0 || qp[qn-1] > 0);
+
+      q->_mp_size = (ns < 0) ? - qn : qn;
+    }
+
+  return rl;
+}
+
+unsigned long
+mpz_cdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, r, n, d, GMP_DIV_CEIL);
+}
+
+unsigned long
+mpz_fdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, r, n, d, GMP_DIV_FLOOR);
+}
+
+unsigned long
+mpz_tdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, r, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_cdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_CEIL);
+}
+
+unsigned long
+mpz_fdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_FLOOR);
+}
+
+unsigned long
+mpz_tdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_cdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_CEIL);
+}
+unsigned long
+mpz_fdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_FLOOR);
+}
+unsigned long
+mpz_tdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_cdiv_ui (const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_CEIL);
+}
+
+unsigned long
+mpz_fdiv_ui (const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_FLOOR);
+}
+
+unsigned long
+mpz_tdiv_ui (const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_mod_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_divexact_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+  gmp_assert_nocarry (mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_TRUNC));
+}
+
+int
+mpz_divisible_ui_p (const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_TRUNC) == 0;
+}
+
+
+/* GCD */
+static mp_limb_t
+mpn_gcd_11 (mp_limb_t u, mp_limb_t v)
+{
+  unsigned shift;
+
+  assert ( (u | v) > 0);
+
+  if (u == 0)
+    return v;
+  else if (v == 0)
+    return u;
+
+  gmp_ctz (shift, u | v);
+
+  u >>= shift;
+  v >>= shift;
+
+  if ( (u & 1) == 0)
+    MP_LIMB_T_SWAP (u, v);
+
+  while ( (v & 1) == 0)
+    v >>= 1;
+
+  while (u != v)
+    {
+      if (u > v)
+	{
+	  u -= v;
+	  do
+	    u >>= 1;
+	  while ( (u & 1) == 0);
+	}
+      else
+	{
+	  v -= u;
+	  do
+	    v >>= 1;
+	  while ( (v & 1) == 0);
+	}
+    }
+  return u << shift;
+}
+
+unsigned long
+mpz_gcd_ui (mpz_t g, const mpz_t u, unsigned long v)
+{
+  mp_size_t un;
+
+  if (v == 0)
+    {
+      if (g)
+	mpz_abs (g, u);
+    }
+  else
+    {
+      un = GMP_ABS (u->_mp_size);
+      if (un != 0)
+	v = mpn_gcd_11 (mpn_div_qr_1 (NULL, u->_mp_d, un, v), v);
+
+      if (g)
+	mpz_set_ui (g, v);
+    }
+
+  return v;
+}
+
+static mp_bitcnt_t
+mpz_make_odd (mpz_t r)
+{
+  mp_bitcnt_t shift;
+
+  assert (r->_mp_size > 0);
+  /* Count trailing zeros, equivalent to mpn_scan1, because we know that there is a 1 */
+  shift = mpn_common_scan (r->_mp_d[0], 0, r->_mp_d, 0, 0);
+  mpz_tdiv_q_2exp (r, r, shift);
+
+  return shift;
+}
+
+void
+mpz_gcd (mpz_t g, const mpz_t u, const mpz_t v)
+{
+  mpz_t tu, tv;
+  mp_bitcnt_t uz, vz, gz;
+
+  if (u->_mp_size == 0)
+    {
+      mpz_abs (g, v);
+      return;
+    }
+  if (v->_mp_size == 0)
+    {
+      mpz_abs (g, u);
+      return;
+    }
+
+  mpz_init (tu);
+  mpz_init (tv);
+
+  mpz_abs (tu, u);
+  uz = mpz_make_odd (tu);
+  mpz_abs (tv, v);
+  vz = mpz_make_odd (tv);
+  gz = GMP_MIN (uz, vz);
+
+  if (tu->_mp_size < tv->_mp_size)
+    mpz_swap (tu, tv);
+
+  mpz_tdiv_r (tu, tu, tv);
+  if (tu->_mp_size == 0)
+    {
+      mpz_swap (g, tv);
+    }
+  else
+    for (;;)
+      {
+	int c;
+
+	mpz_make_odd (tu);
+	c = mpz_cmp (tu, tv);
+	if (c == 0)
+	  {
+	    mpz_swap (g, tu);
+	    break;
+	  }
+	if (c < 0)
+	  mpz_swap (tu, tv);
+
+	if (tv->_mp_size == 1)
+	  {
+	    mp_limb_t vl = tv->_mp_d[0];
+	    mp_limb_t ul = mpz_tdiv_ui (tu, vl);
+	    mpz_set_ui (g, mpn_gcd_11 (ul, vl));
+	    break;
+	  }
+	mpz_sub (tu, tu, tv);
+      }
+  mpz_clear (tu);
+  mpz_clear (tv);
+  mpz_mul_2exp (g, g, gz);
+}
+
+void
+mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v)
+{
+  mpz_t tu, tv, s0, s1, t0, t1;
+  mp_bitcnt_t uz, vz, gz;
+  mp_bitcnt_t power;
+
+  if (u->_mp_size == 0)
+    {
+      /* g = 0 u + sgn(v) v */
+      signed long sign = mpz_sgn (v);
+      mpz_abs (g, v);
+      if (s)
+	mpz_set_ui (s, 0);
+      if (t)
+	mpz_set_si (t, sign);
+      return;
+    }
+
+  if (v->_mp_size == 0)
+    {
+      /* g = sgn(u) u + 0 v */
+      signed long sign = mpz_sgn (u);
+      mpz_abs (g, u);
+      if (s)
+	mpz_set_si (s, sign);
+      if (t)
+	mpz_set_ui (t, 0);
+      return;
+    }
+
+  mpz_init (tu);
+  mpz_init (tv);
+  mpz_init (s0);
+  mpz_init (s1);
+  mpz_init (t0);
+  mpz_init (t1);
+
+  mpz_abs (tu, u);
+  uz = mpz_make_odd (tu);
+  mpz_abs (tv, v);
+  vz = mpz_make_odd (tv);
+  gz = GMP_MIN (uz, vz);
+
+  uz -= gz;
+  vz -= gz;
+
+  /* Cofactors corresponding to odd gcd. gz handled later. */
+  if (tu->_mp_size < tv->_mp_size)
+    {
+      mpz_swap (tu, tv);
+      MPZ_SRCPTR_SWAP (u, v);
+      MPZ_PTR_SWAP (s, t);
+      MP_BITCNT_T_SWAP (uz, vz);
+    }
+
+  /* Maintain
+   *
+   * u = t0 tu + t1 tv
+   * v = s0 tu + s1 tv
+   *
+   * where u and v denote the inputs with common factors of two
+   * eliminated, and det (s0, t0; s1, t1) = 2^p. Then
+   *
+   * 2^p tu =  s1 u - t1 v
+   * 2^p tv = -s0 u + t0 v
+   */
+
+  /* After initial division, tu = q tv + tu', we have
+   *
+   * u = 2^uz (tu' + q tv)
+   * v = 2^vz tv
+   *
+   * or
+   *
+   * t0 = 2^uz, t1 = 2^uz q
+   * s0 = 0,    s1 = 2^vz
+   */
+
+  mpz_setbit (t0, uz);
+  mpz_tdiv_qr (t1, tu, tu, tv);
+  mpz_mul_2exp (t1, t1, uz);
+
+  mpz_setbit (s1, vz);
+  power = uz + vz;
+
+  if (tu->_mp_size > 0)
+    {
+      mp_bitcnt_t shift;
+      shift = mpz_make_odd (tu);
+      mpz_mul_2exp (t0, t0, shift);
+      mpz_mul_2exp (s0, s0, shift);
+      power += shift;
+
+      for (;;)
+	{
+	  int c;
+	  c = mpz_cmp (tu, tv);
+	  if (c == 0)
+	    break;
+
+	  if (c < 0)
+	    {
+	      /* tv = tv' + tu
+	       *
+	       * u = t0 tu + t1 (tv' + tu) = (t0 + t1) tu + t1 tv'
+	       * v = s0 tu + s1 (tv' + tu) = (s0 + s1) tu + s1 tv' */
+
+	      mpz_sub (tv, tv, tu);
+	      mpz_add (t0, t0, t1);
+	      mpz_add (s0, s0, s1);
+
+	      shift = mpz_make_odd (tv);
+	      mpz_mul_2exp (t1, t1, shift);
+	      mpz_mul_2exp (s1, s1, shift);
+	    }
+	  else
+	    {
+	      mpz_sub (tu, tu, tv);
+	      mpz_add (t1, t0, t1);
+	      mpz_add (s1, s0, s1);
+
+	      shift = mpz_make_odd (tu);
+	      mpz_mul_2exp (t0, t0, shift);
+	      mpz_mul_2exp (s0, s0, shift);
+	    }
+	  power += shift;
+	}
+    }
+
+  /* Now tv = odd part of gcd, and -s0 and t0 are corresponding
+     cofactors. */
+
+  mpz_mul_2exp (tv, tv, gz);
+  mpz_neg (s0, s0);
+
+  /* 2^p g = s0 u + t0 v. Eliminate one factor of two at a time. To
+     adjust cofactors, we need u / g and v / g */
+
+  mpz_divexact (s1, v, tv);
+  mpz_abs (s1, s1);
+  mpz_divexact (t1, u, tv);
+  mpz_abs (t1, t1);
+
+  while (power-- > 0)
+    {
+      /* s0 u + t0 v = (s0 - v/g) u - (t0 + u/g) v */
+      if (mpz_odd_p (s0) || mpz_odd_p (t0))
+	{
+	  mpz_sub (s0, s0, s1);
+	  mpz_add (t0, t0, t1);
+	}
+      mpz_divexact_ui (s0, s0, 2);
+      mpz_divexact_ui (t0, t0, 2);
+    }
+
+  /* Arrange so that |s| < |u| / 2g */
+  mpz_add (s1, s0, s1);
+  if (mpz_cmpabs (s0, s1) > 0)
+    {
+      mpz_swap (s0, s1);
+      mpz_sub (t0, t0, t1);
+    }
+  if (u->_mp_size < 0)
+    mpz_neg (s0, s0);
+  if (v->_mp_size < 0)
+    mpz_neg (t0, t0);
+
+  mpz_swap (g, tv);
+  if (s)
+    mpz_swap (s, s0);
+  if (t)
+    mpz_swap (t, t0);
+
+  mpz_clear (tu);
+  mpz_clear (tv);
+  mpz_clear (s0);
+  mpz_clear (s1);
+  mpz_clear (t0);
+  mpz_clear (t1);
+}
+
+void
+mpz_lcm (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mpz_t g;
+
+  if (u->_mp_size == 0 || v->_mp_size == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  mpz_init (g);
+
+  mpz_gcd (g, u, v);
+  mpz_divexact (g, u, g);
+  mpz_mul (r, g, v);
+
+  mpz_clear (g);
+  mpz_abs (r, r);
+}
+
+void
+mpz_lcm_ui (mpz_t r, const mpz_t u, unsigned long v)
+{
+  if (v == 0 || u->_mp_size == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  v /= mpz_gcd_ui (NULL, u, v);
+  mpz_mul_ui (r, u, v);
+
+  mpz_abs (r, r);
+}
+
+int
+mpz_invert (mpz_t r, const mpz_t u, const mpz_t m)
+{
+  mpz_t g, tr;
+  int invertible;
+
+  if (u->_mp_size == 0 || mpz_cmpabs_ui (m, 1) <= 0)
+    return 0;
+
+  mpz_init (g);
+  mpz_init (tr);
+
+  mpz_gcdext (g, tr, NULL, u, m);
+  invertible = (mpz_cmp_ui (g, 1) == 0);
+
+  if (invertible)
+    {
+      if (tr->_mp_size < 0)
+	{
+	  if (m->_mp_size >= 0)
+	    mpz_add (tr, tr, m);
+	  else
+	    mpz_sub (tr, tr, m);
+	}
+      mpz_swap (r, tr);
+    }
+
+  mpz_clear (g);
+  mpz_clear (tr);
+  return invertible;
+}
+
+
+/* Higher level operations (sqrt, pow and root) */
+
+void
+mpz_pow_ui (mpz_t r, const mpz_t b, unsigned long e)
+{
+  unsigned long bit;
+  mpz_t tr;
+  mpz_init_set_ui (tr, 1);
+
+  bit = GMP_ULONG_HIGHBIT;
+  do
+    {
+      mpz_mul (tr, tr, tr);
+      if (e & bit)
+	mpz_mul (tr, tr, b);
+      bit >>= 1;
+    }
+  while (bit > 0);
+
+  mpz_swap (r, tr);
+  mpz_clear (tr);
+}
+
+void
+mpz_ui_pow_ui (mpz_t r, unsigned long blimb, unsigned long e)
+{
+  mpz_t b;
+  mpz_pow_ui (r, mpz_roinit_n (b, &blimb, 1), e);
+}
+
+void
+mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m)
+{
+  mpz_t tr;
+  mpz_t base;
+  mp_size_t en, mn;
+  mp_srcptr mp;
+  struct gmp_div_inverse minv;
+  unsigned shift;
+  mp_ptr tp = NULL;
+
+  en = GMP_ABS (e->_mp_size);
+  mn = GMP_ABS (m->_mp_size);
+  if (mn == 0)
+    gmp_die ("mpz_powm: Zero modulo.");
+
+  if (en == 0)
+    {
+      mpz_set_ui (r, 1);
+      return;
+    }
+
+  mp = m->_mp_d;
+  mpn_div_qr_invert (&minv, mp, mn);
+  shift = minv.shift;
+
+  if (shift > 0)
+    {
+      /* To avoid shifts, we do all our reductions, except the final
+	 one, using a *normalized* m. */
+      minv.shift = 0;
+
+      tp = gmp_xalloc_limbs (mn);
+      gmp_assert_nocarry (mpn_lshift (tp, mp, mn, shift));
+      mp = tp;
+    }
+
+  mpz_init (base);
+
+  if (e->_mp_size < 0)
+    {
+      if (!mpz_invert (base, b, m))
+	gmp_die ("mpz_powm: Negative exponent and non-invertible base.");
+    }
+  else
+    {
+      mp_size_t bn;
+      mpz_abs (base, b);
+
+      bn = base->_mp_size;
+      if (bn >= mn)
+	{
+	  mpn_div_qr_preinv (NULL, base->_mp_d, base->_mp_size, mp, mn, &minv);
+	  bn = mn;
+	}
+
+      /* We have reduced the absolute value. Now take care of the
+	 sign. Note that we get zero represented non-canonically as
+	 m. */
+      if (b->_mp_size < 0)
+	{
+	  mp_ptr bp = MPZ_REALLOC (base, mn);
+	  gmp_assert_nocarry (mpn_sub (bp, mp, mn, bp, bn));
+	  bn = mn;
+	}
+      base->_mp_size = mpn_normalized_size (base->_mp_d, bn);
+    }
+  mpz_init_set_ui (tr, 1);
+
+  while (--en >= 0)
+    {
+      mp_limb_t w = e->_mp_d[en];
+      mp_limb_t bit;
+
+      bit = GMP_LIMB_HIGHBIT;
+      do
+	{
+	  mpz_mul (tr, tr, tr);
+	  if (w & bit)
+	    mpz_mul (tr, tr, base);
+	  if (tr->_mp_size > mn)
+	    {
+	      mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
+	      tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
+	    }
+	  bit >>= 1;
+	}
+      while (bit > 0);
+    }
+
+  /* Final reduction */
+  if (tr->_mp_size >= mn)
+    {
+      minv.shift = shift;
+      mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
+      tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
+    }
+  if (tp)
+    gmp_free (tp);
+
+  mpz_swap (r, tr);
+  mpz_clear (tr);
+  mpz_clear (base);
+}
+
+void
+mpz_powm_ui (mpz_t r, const mpz_t b, unsigned long elimb, const mpz_t m)
+{
+  mpz_t e;
+  mpz_powm (r, b, mpz_roinit_n (e, &elimb, 1), m);
+}
+
+/* x=trunc(y^(1/z)), r=y-x^z */
+void
+mpz_rootrem (mpz_t x, mpz_t r, const mpz_t y, unsigned long z)
+{
+  int sgn;
+  mpz_t t, u;
+
+  sgn = y->_mp_size < 0;
+  if ((~z & sgn) != 0)
+    gmp_die ("mpz_rootrem: Negative argument, with even root.");
+  if (z == 0)
+    gmp_die ("mpz_rootrem: Zeroth root.");
+
+  if (mpz_cmpabs_ui (y, 1) <= 0) {
+    if (x)
+      mpz_set (x, y);
+    if (r)
+      r->_mp_size = 0;
+    return;
+  }
+
+  mpz_init (u);
+  mpz_init (t);
+  mpz_setbit (t, mpz_sizeinbase (y, 2) / z + 1);
+
+  if (z == 2) /* simplify sqrt loop: z-1 == 1 */
+    do {
+      mpz_swap (u, t);			/* u = x */
+      mpz_tdiv_q (t, y, u);		/* t = y/x */
+      mpz_add (t, t, u);		/* t = y/x + x */
+      mpz_tdiv_q_2exp (t, t, 1);	/* x'= (y/x + x)/2 */
+    } while (mpz_cmpabs (t, u) < 0);	/* |x'| < |x| */
+  else /* z != 2 */ {
+    mpz_t v;
+
+    mpz_init (v);
+    if (sgn)
+      mpz_neg (t, t);
+
+    do {
+      mpz_swap (u, t);			/* u = x */
+      mpz_pow_ui (t, u, z - 1);		/* t = x^(z-1) */
+      mpz_tdiv_q (t, y, t);		/* t = y/x^(z-1) */
+      mpz_mul_ui (v, u, z - 1);		/* v = x*(z-1) */
+      mpz_add (t, t, v);		/* t = y/x^(z-1) + x*(z-1) */
+      mpz_tdiv_q_ui (t, t, z);		/* x'=(y/x^(z-1) + x*(z-1))/z */
+    } while (mpz_cmpabs (t, u) < 0);	/* |x'| < |x| */
+
+    mpz_clear (v);
+  }
+
+  if (r) {
+    mpz_pow_ui (t, u, z);
+    mpz_sub (r, y, t);
+  }
+  if (x)
+    mpz_swap (x, u);
+  mpz_clear (u);
+  mpz_clear (t);
+}
+
+int
+mpz_root (mpz_t x, const mpz_t y, unsigned long z)
+{
+  int res;
+  mpz_t r;
+
+  mpz_init (r);
+  mpz_rootrem (x, r, y, z);
+  res = r->_mp_size == 0;
+  mpz_clear (r);
+
+  return res;
+}
+
+/* Compute s = floor(sqrt(u)) and r = u - s^2. Allows r == NULL */
+void
+mpz_sqrtrem (mpz_t s, mpz_t r, const mpz_t u)
+{
+  mpz_rootrem (s, r, u, 2);
+}
+
+void
+mpz_sqrt (mpz_t s, const mpz_t u)
+{
+  mpz_rootrem (s, NULL, u, 2);
+}
+
+int
+mpz_perfect_square_p (const mpz_t u)
+{
+  if (u->_mp_size <= 0)
+    return (u->_mp_size == 0);
+  else
+    return mpz_root (NULL, u, 2);
+}
+
+int
+mpn_perfect_square_p (mp_srcptr p, mp_size_t n)
+{
+  mpz_t t;
+
+  assert (n > 0);
+  assert (p [n-1] != 0);
+  return mpz_root (NULL, mpz_roinit_n (t, p, n), 2);
+}
+
+mp_size_t
+mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr p, mp_size_t n)
+{
+  mpz_t s, r, u;
+  mp_size_t res;
+
+  assert (n > 0);
+  assert (p [n-1] != 0);
+
+  mpz_init (r);
+  mpz_init (s);
+  mpz_rootrem (s, r, mpz_roinit_n (u, p, n), 2);
+
+  assert (s->_mp_size == (n+1)/2);
+  mpn_copyd (sp, s->_mp_d, s->_mp_size);
+  mpz_clear (s);
+  res = r->_mp_size;
+  if (rp)
+    mpn_copyd (rp, r->_mp_d, res);
+  mpz_clear (r);
+  return res;
+}
+
+/* Combinatorics */
+
+void
+mpz_fac_ui (mpz_t x, unsigned long n)
+{
+  mpz_set_ui (x, n + (n == 0));
+  while (n > 2)
+    mpz_mul_ui (x, x, --n);
+}
+
+void
+mpz_bin_uiui (mpz_t r, unsigned long n, unsigned long k)
+{
+  mpz_t t;
+
+  mpz_set_ui (r, k <= n);
+
+  if (k > (n >> 1))
+    k = (k <= n) ? n - k : 0;
+
+  mpz_init (t);
+  mpz_fac_ui (t, k);
+
+  for (; k > 0; k--)
+      mpz_mul_ui (r, r, n--);
+
+  mpz_divexact (r, r, t);
+  mpz_clear (t);
+}
+
+
+/* Primality testing */
+static int
+gmp_millerrabin (const mpz_t n, const mpz_t nm1, mpz_t y,
+		 const mpz_t q, mp_bitcnt_t k)
+{
+  assert (k > 0);
+
+  /* Caller must initialize y to the base. */
+  mpz_powm (y, y, q, n);
+
+  if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
+    return 1;
+
+  while (--k > 0)
+    {
+      mpz_powm_ui (y, y, 2, n);
+      if (mpz_cmp (y, nm1) == 0)
+	return 1;
+      /* y == 1 means that the previous y was a non-trivial square root
+	 of 1 (mod n). y == 0 means that n is a power of the base.
+	 In either case, n is not prime. */
+      if (mpz_cmp_ui (y, 1) <= 0)
+	return 0;
+    }
+  return 0;
+}
+
+/* This product is 0xc0cfd797, and fits in 32 bits. */
+#define GMP_PRIME_PRODUCT \
+  (3UL*5UL*7UL*11UL*13UL*17UL*19UL*23UL*29UL)
+
+/* Bit (p+1)/2 is set, for each odd prime <= 61 */
+#define GMP_PRIME_MASK 0xc96996dcUL
+
+int
+mpz_probab_prime_p (const mpz_t n, int reps)
+{
+  mpz_t nm1;
+  mpz_t q;
+  mpz_t y;
+  mp_bitcnt_t k;
+  int is_prime;
+  int j;
+
+  /* Note that we use the absolute value of n only, for compatibility
+     with the real GMP. */
+  if (mpz_even_p (n))
+    return (mpz_cmpabs_ui (n, 2) == 0) ? 2 : 0;
+
+  /* Above test excludes n == 0 */
+  assert (n->_mp_size != 0);
+
+  if (mpz_cmpabs_ui (n, 64) < 0)
+    return (GMP_PRIME_MASK >> (n->_mp_d[0] >> 1)) & 2;
+
+  if (mpz_gcd_ui (NULL, n, GMP_PRIME_PRODUCT) != 1)
+    return 0;
+
+  /* All prime factors are >= 31. */
+  if (mpz_cmpabs_ui (n, 31*31) < 0)
+    return 2;
+
+  /* Use Miller-Rabin, with a deterministic sequence of bases, a[j] =
+     j^2 + j + 41 using Euler's polynomial. We potentially stop early,
+     if a[j] >= n - 1. Since n >= 31*31, this can happen only if reps >
+     30 (a[30] == 971 > 31*31 == 961). */
+
+  mpz_init (nm1);
+  mpz_init (q);
+  mpz_init (y);
+
+  /* Find q and k, where q is odd and n = 1 + 2**k * q.  */
+  nm1->_mp_size = mpz_abs_sub_ui (nm1, n, 1);
+  k = mpz_scan1 (nm1, 0);
+  mpz_tdiv_q_2exp (q, nm1, k);
+
+  for (j = 0, is_prime = 1; is_prime & (j < reps); j++)
+    {
+      mpz_set_ui (y, (unsigned long) j*j+j+41);
+      if (mpz_cmp (y, nm1) >= 0)
+	{
+	  /* Don't try any further bases. This "early" break does not affect
+	     the result for any reasonable reps value (<=5000 was tested) */
+	  assert (j >= 30);
+	  break;
+	}
+      is_prime = gmp_millerrabin (n, nm1, y, q, k);
+    }
+  mpz_clear (nm1);
+  mpz_clear (q);
+  mpz_clear (y);
+
+  return is_prime;
+}
+
+
+/* Logical operations and bit manipulation. */
+
+/* Numbers are treated as if represented in two's complement (and
+   infinitely sign extended). For a negative values we get the two's
+   complement from -x = ~x + 1, where ~ is bitwise complement.
+   Negation transforms
+
+     xxxx10...0
+
+   into
+
+     yyyy10...0
+
+   where yyyy is the bitwise complement of xxxx. So least significant
+   bits, up to and including the first one bit, are unchanged, and
+   the more significant bits are all complemented.
+
+   To change a bit from zero to one in a negative number, subtract the
+   corresponding power of two from the absolute value. This can never
+   underflow. To change a bit from one to zero, add the corresponding
+   power of two, and this might overflow. E.g., if x = -001111, the
+   two's complement is 110001. Clearing the least significant bit, we
+   get two's complement 110000, and -010000. */
+
+int
+mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index)
+{
+  mp_size_t limb_index;
+  unsigned shift;
+  mp_size_t ds;
+  mp_size_t dn;
+  mp_limb_t w;
+  int bit;
+
+  ds = d->_mp_size;
+  dn = GMP_ABS (ds);
+  limb_index = bit_index / GMP_LIMB_BITS;
+  if (limb_index >= dn)
+    return ds < 0;
+
+  shift = bit_index % GMP_LIMB_BITS;
+  w = d->_mp_d[limb_index];
+  bit = (w >> shift) & 1;
+
+  if (ds < 0)
+    {
+      /* d < 0. Check if any of the bits below is set: If so, our bit
+	 must be complemented. */
+      if (shift > 0 && (w << (GMP_LIMB_BITS - shift)) > 0)
+	return bit ^ 1;
+      while (--limb_index >= 0)
+	if (d->_mp_d[limb_index] > 0)
+	  return bit ^ 1;
+    }
+  return bit;
+}
+
+static void
+mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index)
+{
+  mp_size_t dn, limb_index;
+  mp_limb_t bit;
+  mp_ptr dp;
+
+  dn = GMP_ABS (d->_mp_size);
+
+  limb_index = bit_index / GMP_LIMB_BITS;
+  bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
+
+  if (limb_index >= dn)
+    {
+      mp_size_t i;
+      /* The bit should be set outside of the end of the number.
+	 We have to increase the size of the number. */
+      dp = MPZ_REALLOC (d, limb_index + 1);
+
+      dp[limb_index] = bit;
+      for (i = dn; i < limb_index; i++)
+	dp[i] = 0;
+      dn = limb_index + 1;
+    }
+  else
+    {
+      mp_limb_t cy;
+
+      dp = d->_mp_d;
+
+      cy = mpn_add_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit);
+      if (cy > 0)
+	{
+	  dp = MPZ_REALLOC (d, dn + 1);
+	  dp[dn++] = cy;
+	}
+    }
+
+  d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
+}
+
+static void
+mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index)
+{
+  mp_size_t dn, limb_index;
+  mp_ptr dp;
+  mp_limb_t bit;
+
+  dn = GMP_ABS (d->_mp_size);
+  dp = d->_mp_d;
+
+  limb_index = bit_index / GMP_LIMB_BITS;
+  bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
+
+  assert (limb_index < dn);
+
+  gmp_assert_nocarry (mpn_sub_1 (dp + limb_index, dp + limb_index,
+				 dn - limb_index, bit));
+  dn = mpn_normalized_size (dp, dn);
+  d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
+}
+
+void
+mpz_setbit (mpz_t d, mp_bitcnt_t bit_index)
+{
+  if (!mpz_tstbit (d, bit_index))
+    {
+      if (d->_mp_size >= 0)
+	mpz_abs_add_bit (d, bit_index);
+      else
+	mpz_abs_sub_bit (d, bit_index);
+    }
+}
+
+void
+mpz_clrbit (mpz_t d, mp_bitcnt_t bit_index)
+{
+  if (mpz_tstbit (d, bit_index))
+    {
+      if (d->_mp_size >= 0)
+	mpz_abs_sub_bit (d, bit_index);
+      else
+	mpz_abs_add_bit (d, bit_index);
+    }
+}
+
+void
+mpz_combit (mpz_t d, mp_bitcnt_t bit_index)
+{
+  if (mpz_tstbit (d, bit_index) ^ (d->_mp_size < 0))
+    mpz_abs_sub_bit (d, bit_index);
+  else
+    mpz_abs_add_bit (d, bit_index);
+}
+
+void
+mpz_com (mpz_t r, const mpz_t u)
+{
+  mpz_neg (r, u);
+  mpz_sub_ui (r, r, 1);
+}
+
+void
+mpz_and (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mp_size_t un, vn, rn, i;
+  mp_ptr up, vp, rp;
+
+  mp_limb_t ux, vx, rx;
+  mp_limb_t uc, vc, rc;
+  mp_limb_t ul, vl, rl;
+
+  un = GMP_ABS (u->_mp_size);
+  vn = GMP_ABS (v->_mp_size);
+  if (un < vn)
+    {
+      MPZ_SRCPTR_SWAP (u, v);
+      MP_SIZE_T_SWAP (un, vn);
+    }
+  if (vn == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  uc = u->_mp_size < 0;
+  vc = v->_mp_size < 0;
+  rc = uc & vc;
+
+  ux = -uc;
+  vx = -vc;
+  rx = -rc;
+
+  /* If the smaller input is positive, higher limbs don't matter. */
+  rn = vx ? un : vn;
+
+  rp = MPZ_REALLOC (r, rn + (mp_size_t) rc);
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  i = 0;
+  do
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      vl = (vp[i] ^ vx) + vc;
+      vc = vl < vc;
+
+      rl = ( (ul & vl) ^ rx) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  while (++i < vn);
+  assert (vc == 0);
+
+  for (; i < rn; i++)
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      rl = ( (ul & vx) ^ rx) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  if (rc)
+    rp[rn++] = rc;
+  else
+    rn = mpn_normalized_size (rp, rn);
+
+  r->_mp_size = rx ? -rn : rn;
+}
+
+void
+mpz_ior (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mp_size_t un, vn, rn, i;
+  mp_ptr up, vp, rp;
+
+  mp_limb_t ux, vx, rx;
+  mp_limb_t uc, vc, rc;
+  mp_limb_t ul, vl, rl;
+
+  un = GMP_ABS (u->_mp_size);
+  vn = GMP_ABS (v->_mp_size);
+  if (un < vn)
+    {
+      MPZ_SRCPTR_SWAP (u, v);
+      MP_SIZE_T_SWAP (un, vn);
+    }
+  if (vn == 0)
+    {
+      mpz_set (r, u);
+      return;
+    }
+
+  uc = u->_mp_size < 0;
+  vc = v->_mp_size < 0;
+  rc = uc | vc;
+
+  ux = -uc;
+  vx = -vc;
+  rx = -rc;
+
+  /* If the smaller input is negative, by sign extension higher limbs
+     don't matter. */
+  rn = vx ? vn : un;
+
+  rp = MPZ_REALLOC (r, rn + (mp_size_t) rc);
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  i = 0;
+  do
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      vl = (vp[i] ^ vx) + vc;
+      vc = vl < vc;
+
+      rl = ( (ul | vl) ^ rx) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  while (++i < vn);
+  assert (vc == 0);
+
+  for (; i < rn; i++)
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      rl = ( (ul | vx) ^ rx) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  if (rc)
+    rp[rn++] = rc;
+  else
+    rn = mpn_normalized_size (rp, rn);
+
+  r->_mp_size = rx ? -rn : rn;
+}
+
+void
+mpz_xor (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mp_size_t un, vn, i;
+  mp_ptr up, vp, rp;
+
+  mp_limb_t ux, vx, rx;
+  mp_limb_t uc, vc, rc;
+  mp_limb_t ul, vl, rl;
+
+  un = GMP_ABS (u->_mp_size);
+  vn = GMP_ABS (v->_mp_size);
+  if (un < vn)
+    {
+      MPZ_SRCPTR_SWAP (u, v);
+      MP_SIZE_T_SWAP (un, vn);
+    }
+  if (vn == 0)
+    {
+      mpz_set (r, u);
+      return;
+    }
+
+  uc = u->_mp_size < 0;
+  vc = v->_mp_size < 0;
+  rc = uc ^ vc;
+
+  ux = -uc;
+  vx = -vc;
+  rx = -rc;
+
+  rp = MPZ_REALLOC (r, un + (mp_size_t) rc);
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  i = 0;
+  do
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      vl = (vp[i] ^ vx) + vc;
+      vc = vl < vc;
+
+      rl = (ul ^ vl ^ rx) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  while (++i < vn);
+  assert (vc == 0);
+
+  for (; i < un; i++)
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      rl = (ul ^ ux) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  if (rc)
+    rp[un++] = rc;
+  else
+    un = mpn_normalized_size (rp, un);
+
+  r->_mp_size = rx ? -un : un;
+}
+
+static unsigned
+gmp_popcount_limb (mp_limb_t x)
+{
+  unsigned c;
+
+  /* Do 16 bits at a time, to avoid limb-sized constants. */
+  for (c = 0; x > 0; x >>= 16)
+    {
+      unsigned w = ((x >> 1) & 0x5555) + (x & 0x5555);
+      w = ((w >> 2) & 0x3333) + (w & 0x3333);
+      w = ((w >> 4) & 0x0f0f) + (w & 0x0f0f);
+      w = (w >> 8) + (w & 0x00ff);
+      c += w;
+    }
+  return c;
+}
+
+mp_bitcnt_t
+mpn_popcount (mp_srcptr p, mp_size_t n)
+{
+  mp_size_t i;
+  mp_bitcnt_t c;
+
+  for (c = 0, i = 0; i < n; i++)
+    c += gmp_popcount_limb (p[i]);
+
+  return c;
+}
+
+mp_bitcnt_t
+mpz_popcount (const mpz_t u)
+{
+  mp_size_t un;
+
+  un = u->_mp_size;
+
+  if (un < 0)
+    return ~(mp_bitcnt_t) 0;
+
+  return mpn_popcount (u->_mp_d, un);
+}
+
+mp_bitcnt_t
+mpz_hamdist (const mpz_t u, const mpz_t v)
+{
+  mp_size_t un, vn, i;
+  mp_limb_t uc, vc, ul, vl, comp;
+  mp_srcptr up, vp;
+  mp_bitcnt_t c;
+
+  un = u->_mp_size;
+  vn = v->_mp_size;
+
+  if ( (un ^ vn) < 0)
+    return ~(mp_bitcnt_t) 0;
+
+  comp = - (uc = vc = (un < 0));
+  if (uc)
+    {
+      assert (vn < 0);
+      un = -un;
+      vn = -vn;
+    }
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  if (un < vn)
+    MPN_SRCPTR_SWAP (up, un, vp, vn);
+
+  for (i = 0, c = 0; i < vn; i++)
+    {
+      ul = (up[i] ^ comp) + uc;
+      uc = ul < uc;
+
+      vl = (vp[i] ^ comp) + vc;
+      vc = vl < vc;
+
+      c += gmp_popcount_limb (ul ^ vl);
+    }
+  assert (vc == 0);
+
+  for (; i < un; i++)
+    {
+      ul = (up[i] ^ comp) + uc;
+      uc = ul < uc;
+
+      c += gmp_popcount_limb (ul ^ comp);
+    }
+
+  return c;
+}
+
+mp_bitcnt_t
+mpz_scan1 (const mpz_t u, mp_bitcnt_t starting_bit)
+{
+  mp_ptr up;
+  mp_size_t us, un, i;
+  mp_limb_t limb, ux;
+
+  us = u->_mp_size;
+  un = GMP_ABS (us);
+  i = starting_bit / GMP_LIMB_BITS;
+
+  /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit
+     for u<0. Notice this test picks up any u==0 too. */
+  if (i >= un)
+    return (us >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit);
+
+  up = u->_mp_d;
+  ux = 0;
+  limb = up[i];
+
+  if (starting_bit != 0)
+    {
+      if (us < 0)
+	{
+	  ux = mpn_zero_p (up, i);
+	  limb = ~ limb + ux;
+	  ux = - (mp_limb_t) (limb >= ux);
+	}
+
+      /* Mask to 0 all bits before starting_bit, thus ignoring them. */
+      limb &= (GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS));
+    }
+
+  return mpn_common_scan (limb, i, up, un, ux);
+}
+
+mp_bitcnt_t
+mpz_scan0 (const mpz_t u, mp_bitcnt_t starting_bit)
+{
+  mp_ptr up;
+  mp_size_t us, un, i;
+  mp_limb_t limb, ux;
+
+  us = u->_mp_size;
+  ux = - (mp_limb_t) (us >= 0);
+  un = GMP_ABS (us);
+  i = starting_bit / GMP_LIMB_BITS;
+
+  /* When past end, there's an immediate 0 bit for u>=0, or no 0 bits for
+     u<0.  Notice this test picks up all cases of u==0 too. */
+  if (i >= un)
+    return (ux ? starting_bit : ~(mp_bitcnt_t) 0);
+
+  up = u->_mp_d;
+  limb = up[i] ^ ux;
+
+  if (ux == 0)
+    limb -= mpn_zero_p (up, i); /* limb = ~(~limb + zero_p) */
+
+  /* Mask all bits before starting_bit, thus ignoring them. */
+  limb &= (GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS));
+
+  return mpn_common_scan (limb, i, up, un, ux);
+}
+
+
+/* MPZ base conversion. */
+
+size_t
+mpz_sizeinbase (const mpz_t u, int base)
+{
+  mp_size_t un;
+  mp_srcptr up;
+  mp_ptr tp;
+  mp_bitcnt_t bits;
+  struct gmp_div_inverse bi;
+  size_t ndigits;
+
+  assert (base >= 2);
+  assert (base <= 36);
+
+  un = GMP_ABS (u->_mp_size);
+  if (un == 0)
+    return 1;
+
+  up = u->_mp_d;
+
+  bits = (un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]);
+  switch (base)
+    {
+    case 2:
+      return bits;
+    case 4:
+      return (bits + 1) / 2;
+    case 8:
+      return (bits + 2) / 3;
+    case 16:
+      return (bits + 3) / 4;
+    case 32:
+      return (bits + 4) / 5;
+      /* FIXME: Do something more clever for the common case of base
+	 10. */
+    }
+
+  tp = gmp_xalloc_limbs (un);
+  mpn_copyi (tp, up, un);
+  mpn_div_qr_1_invert (&bi, base);
+
+  ndigits = 0;
+  do
+    {
+      ndigits++;
+      mpn_div_qr_1_preinv (tp, tp, un, &bi);
+      un -= (tp[un-1] == 0);
+    }
+  while (un > 0);
+
+  gmp_free (tp);
+  return ndigits;
+}
+
+char *
+mpz_get_str (char *sp, int base, const mpz_t u)
+{
+  unsigned bits;
+  const char *digits;
+  mp_size_t un;
+  size_t i, sn;
+
+  if (base >= 0)
+    {
+      digits = "0123456789abcdefghijklmnopqrstuvwxyz";
+    }
+  else
+    {
+      base = -base;
+      digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+    }
+  if (base <= 1)
+    base = 10;
+  if (base > 36)
+    return NULL;
+
+  sn = 1 + mpz_sizeinbase (u, base);
+  if (!sp)
+    sp = (char *) gmp_xalloc (1 + sn);
+
+  un = GMP_ABS (u->_mp_size);
+
+  if (un == 0)
+    {
+      sp[0] = '0';
+      sp[1] = '\0';
+      return sp;
+    }
+
+  i = 0;
+
+  if (u->_mp_size < 0)
+    sp[i++] = '-';
+
+  bits = mpn_base_power_of_two_p (base);
+
+  if (bits)
+    /* Not modified in this case. */
+    sn = i + mpn_get_str_bits ((unsigned char *) sp + i, bits, u->_mp_d, un);
+  else
+    {
+      struct mpn_base_info info;
+      mp_ptr tp;
+
+      mpn_get_base_info (&info, base);
+      tp = gmp_xalloc_limbs (un);
+      mpn_copyi (tp, u->_mp_d, un);
+
+      sn = i + mpn_get_str_other ((unsigned char *) sp + i, base, &info, tp, un);
+      gmp_free (tp);
+    }
+
+  for (; i < sn; i++)
+    sp[i] = digits[(unsigned char) sp[i]];
+
+  sp[sn] = '\0';
+  return sp;
+}
+
+int
+mpz_set_str (mpz_t r, const char *sp, int base)
+{
+  unsigned bits;
+  mp_size_t rn, alloc;
+  mp_ptr rp;
+  size_t dn;
+  int sign;
+  unsigned char *dp;
+
+  assert (base == 0 || (base >= 2 && base <= 36));
+
+  while (isspace( (unsigned char) *sp))
+    sp++;
+
+  sign = (*sp == '-');
+  sp += sign;
+
+  if (base == 0)
+    {
+      if (sp[0] == '0')
+	{
+	  if (sp[1] == 'x' || sp[1] == 'X')
+	    {
+	      base = 16;
+	      sp += 2;
+	    }
+	  else if (sp[1] == 'b' || sp[1] == 'B')
+	    {
+	      base = 2;
+	      sp += 2;
+	    }
+	  else
+	    base = 8;
+	}
+      else
+	base = 10;
+    }
+
+  if (!*sp)
+    {
+      r->_mp_size = 0;
+      return -1;
+    }
+  dp = (unsigned char *) gmp_xalloc (strlen (sp));
+
+  for (dn = 0; *sp; sp++)
+    {
+      unsigned digit;
+
+      if (isspace ((unsigned char) *sp))
+	continue;
+      else if (*sp >= '0' && *sp <= '9')
+	digit = *sp - '0';
+      else if (*sp >= 'a' && *sp <= 'z')
+	digit = *sp - 'a' + 10;
+      else if (*sp >= 'A' && *sp <= 'Z')
+	digit = *sp - 'A' + 10;
+      else
+	digit = base; /* fail */
+
+      if (digit >= (unsigned) base)
+	{
+	  gmp_free (dp);
+	  r->_mp_size = 0;
+	  return -1;
+	}
+
+      dp[dn++] = digit;
+    }
+
+  if (!dn)
+    {
+      gmp_free (dp);
+      r->_mp_size = 0;
+      return -1;
+    }
+  bits = mpn_base_power_of_two_p (base);
+
+  if (bits > 0)
+    {
+      alloc = (dn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS;
+      rp = MPZ_REALLOC (r, alloc);
+      rn = mpn_set_str_bits (rp, dp, dn, bits);
+    }
+  else
+    {
+      struct mpn_base_info info;
+      mpn_get_base_info (&info, base);
+      alloc = (dn + info.exp - 1) / info.exp;
+      rp = MPZ_REALLOC (r, alloc);
+      rn = mpn_set_str_other (rp, dp, dn, base, &info);
+      /* Normalization, needed for all-zero input. */
+      assert (rn > 0);
+      rn -= rp[rn-1] == 0;
+    }
+  assert (rn <= alloc);
+  gmp_free (dp);
+
+  r->_mp_size = sign ? - rn : rn;
+
+  return 0;
+}
+
+int
+mpz_init_set_str (mpz_t r, const char *sp, int base)
+{
+  mpz_init (r);
+  return mpz_set_str (r, sp, base);
+}
+
+size_t
+mpz_out_str (FILE *stream, int base, const mpz_t x)
+{
+  char *str;
+  size_t len;
+
+  str = mpz_get_str (NULL, base, x);
+  len = strlen (str);
+  len = fwrite (str, 1, len, stream);
+  gmp_free (str);
+  return len;
+}
+
+
+static int
+gmp_detect_endian (void)
+{
+  static const int i = 2;
+  const unsigned char *p = (const unsigned char *) &i;
+  return 1 - *p;
+}
+
+/* Import and export. Does not support nails. */
+void
+mpz_import (mpz_t r, size_t count, int order, size_t size, int endian,
+	    size_t nails, const void *src)
+{
+  const unsigned char *p;
+  ptrdiff_t word_step;
+  mp_ptr rp;
+  mp_size_t rn;
+
+  /* The current (partial) limb. */
+  mp_limb_t limb;
+  /* The number of bytes already copied to this limb (starting from
+     the low end). */
+  size_t bytes;
+  /* The index where the limb should be stored, when completed. */
+  mp_size_t i;
+
+  if (nails != 0)
+    gmp_die ("mpz_import: Nails not supported.");
+
+  assert (order == 1 || order == -1);
+  assert (endian >= -1 && endian <= 1);
+
+  if (endian == 0)
+    endian = gmp_detect_endian ();
+
+  p = (unsigned char *) src;
+
+  word_step = (order != endian) ? 2 * size : 0;
+
+  /* Process bytes from the least significant end, so point p at the
+     least significant word. */
+  if (order == 1)
+    {
+      p += size * (count - 1);
+      word_step = - word_step;
+    }
+
+  /* And at least significant byte of that word. */
+  if (endian == 1)
+    p += (size - 1);
+
+  rn = (size * count + sizeof(mp_limb_t) - 1) / sizeof(mp_limb_t);
+  rp = MPZ_REALLOC (r, rn);
+
+  for (limb = 0, bytes = 0, i = 0; count > 0; count--, p += word_step)
+    {
+      size_t j;
+      for (j = 0; j < size; j++, p -= (ptrdiff_t) endian)
+	{
+	  limb |= (mp_limb_t) *p << (bytes++ * CHAR_BIT);
+	  if (bytes == sizeof(mp_limb_t))
+	    {
+	      rp[i++] = limb;
+	      bytes = 0;
+	      limb = 0;
+	    }
+	}
+    }
+  assert (i + (bytes > 0) == rn);
+  if (limb != 0)
+    rp[i++] = limb;
+  else
+    i = mpn_normalized_size (rp, i);
+
+  r->_mp_size = i;
+}
+
+void *
+mpz_export (void *r, size_t *countp, int order, size_t size, int endian,
+	    size_t nails, const mpz_t u)
+{
+  size_t count;
+  mp_size_t un;
+
+  if (nails != 0)
+    gmp_die ("mpz_import: Nails not supported.");
+
+  assert (order == 1 || order == -1);
+  assert (endian >= -1 && endian <= 1);
+  assert (size > 0 || u->_mp_size == 0);
+
+  un = u->_mp_size;
+  count = 0;
+  if (un != 0)
+    {
+      size_t k;
+      unsigned char *p;
+      ptrdiff_t word_step;
+      /* The current (partial) limb. */
+      mp_limb_t limb;
+      /* The number of bytes left to to in this limb. */
+      size_t bytes;
+      /* The index where the limb was read. */
+      mp_size_t i;
+
+      un = GMP_ABS (un);
+
+      /* Count bytes in top limb. */
+      limb = u->_mp_d[un-1];
+      assert (limb != 0);
+
+      k = 0;
+      do {
+	k++; limb >>= CHAR_BIT;
+      } while (limb != 0);
+
+      count = (k + (un-1) * sizeof (mp_limb_t) + size - 1) / size;
+
+      if (!r)
+	r = gmp_xalloc (count * size);
+
+      if (endian == 0)
+	endian = gmp_detect_endian ();
+
+      p = (unsigned char *) r;
+
+      word_step = (order != endian) ? 2 * size : 0;
+
+      /* Process bytes from the least significant end, so point p at the
+	 least significant word. */
+      if (order == 1)
+	{
+	  p += size * (count - 1);
+	  word_step = - word_step;
+	}
+
+      /* And at least significant byte of that word. */
+      if (endian == 1)
+	p += (size - 1);
+
+      for (bytes = 0, i = 0, k = 0; k < count; k++, p += word_step)
+	{
+	  size_t j;
+	  for (j = 0; j < size; j++, p -= (ptrdiff_t) endian)
+	    {
+	      if (bytes == 0)
+		{
+		  if (i < un)
+		    limb = u->_mp_d[i++];
+		  bytes = sizeof (mp_limb_t);
+		}
+	      *p = limb;
+	      limb >>= CHAR_BIT;
+	      bytes--;
+	    }
+	}
+      assert (i == un);
+      assert (k == count);
+    }
+
+  if (countp)
+    *countp = count;
+
+  return r;
+}
diff --git a/dist/rts/mini-gmp.h b/dist/rts/mini-gmp.h
new file mode 100644
index 0000000..bb5c637
--- /dev/null
+++ b/dist/rts/mini-gmp.h
@@ -0,0 +1,298 @@
+/* mini-gmp, a minimalistic implementation of a GNU GMP subset.
+
+Copyright 2011-2015 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+/* About mini-gmp: This is a minimal implementation of a subset of the
+   GMP interface. It is intended for inclusion into applications which
+   have modest bignums needs, as a fallback when the real GMP library
+   is not installed.
+
+   This file defines the public interface. */
+
+#ifndef __MINI_GMP_H__
+#define __MINI_GMP_H__
+
+/* For size_t */
+#include <stddef.h>
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+void mp_set_memory_functions (void *(*) (size_t),
+			      void *(*) (void *, size_t, size_t),
+			      void (*) (void *, size_t));
+
+void mp_get_memory_functions (void *(**) (size_t),
+			      void *(**) (void *, size_t, size_t),
+			      void (**) (void *, size_t));
+
+typedef unsigned long mp_limb_t;
+typedef long mp_size_t;
+typedef unsigned long mp_bitcnt_t;
+
+typedef mp_limb_t *mp_ptr;
+typedef const mp_limb_t *mp_srcptr;
+
+typedef struct
+{
+  int _mp_alloc;		/* Number of *limbs* allocated and pointed
+				   to by the _mp_d field.  */
+  int _mp_size;			/* abs(_mp_size) is the number of limbs the
+				   last field points to.  If _mp_size is
+				   negative this is a negative number.  */
+  mp_limb_t *_mp_d;		/* Pointer to the limbs.  */
+} __mpz_struct;
+
+typedef __mpz_struct mpz_t[1];
+
+typedef __mpz_struct *mpz_ptr;
+typedef const __mpz_struct *mpz_srcptr;
+
+extern const int mp_bits_per_limb;
+
+void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
+void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
+void mpn_zero (mp_ptr, mp_size_t);
+
+int mpn_cmp (mp_srcptr, mp_srcptr, mp_size_t);
+int mpn_zero_p (mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_add_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t mpn_add (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_sub_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t mpn_sub (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_mul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_addmul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_submul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+mp_limb_t mpn_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void mpn_sqr (mp_ptr, mp_srcptr, mp_size_t);
+int mpn_perfect_square_p (mp_srcptr, mp_size_t);
+mp_size_t mpn_sqrtrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_lshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+mp_limb_t mpn_rshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+
+mp_bitcnt_t mpn_scan0 (mp_srcptr, mp_bitcnt_t);
+mp_bitcnt_t mpn_scan1 (mp_srcptr, mp_bitcnt_t);
+
+void mpn_com (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t mpn_neg (mp_ptr, mp_srcptr, mp_size_t);
+
+mp_bitcnt_t mpn_popcount (mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_invert_3by2 (mp_limb_t, mp_limb_t);
+#define mpn_invert_limb(x) mpn_invert_3by2 ((x), 0)
+
+size_t mpn_get_str (unsigned char *, int, mp_ptr, mp_size_t);
+mp_size_t mpn_set_str (mp_ptr, const unsigned char *, size_t, int);
+
+void mpz_init (mpz_t);
+void mpz_init2 (mpz_t, mp_bitcnt_t);
+void mpz_clear (mpz_t);
+
+#define mpz_odd_p(z)   (((z)->_mp_size != 0) & (int) (z)->_mp_d[0])
+#define mpz_even_p(z)  (! mpz_odd_p (z))
+
+int mpz_sgn (const mpz_t);
+int mpz_cmp_si (const mpz_t, long);
+int mpz_cmp_ui (const mpz_t, unsigned long);
+int mpz_cmp (const mpz_t, const mpz_t);
+int mpz_cmpabs_ui (const mpz_t, unsigned long);
+int mpz_cmpabs (const mpz_t, const mpz_t);
+int mpz_cmp_d (const mpz_t, double);
+int mpz_cmpabs_d (const mpz_t, double);
+
+void mpz_abs (mpz_t, const mpz_t);
+void mpz_neg (mpz_t, const mpz_t);
+void mpz_swap (mpz_t, mpz_t);
+
+void mpz_add_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_add (mpz_t, const mpz_t, const mpz_t);
+void mpz_sub_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_ui_sub (mpz_t, unsigned long, const mpz_t);
+void mpz_sub (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_mul_si (mpz_t, const mpz_t, long int);
+void mpz_mul_ui (mpz_t, const mpz_t, unsigned long int);
+void mpz_mul (mpz_t, const mpz_t, const mpz_t);
+void mpz_mul_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_addmul_ui (mpz_t, const mpz_t, unsigned long int);
+void mpz_addmul (mpz_t, const mpz_t, const mpz_t);
+void mpz_submul_ui (mpz_t, const mpz_t, unsigned long int);
+void mpz_submul (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_cdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_fdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_tdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_cdiv_q (mpz_t, const mpz_t, const mpz_t);
+void mpz_fdiv_q (mpz_t, const mpz_t, const mpz_t);
+void mpz_tdiv_q (mpz_t, const mpz_t, const mpz_t);
+void mpz_cdiv_r (mpz_t, const mpz_t, const mpz_t);
+void mpz_fdiv_r (mpz_t, const mpz_t, const mpz_t);
+void mpz_tdiv_r (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_cdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_fdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_tdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_cdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_fdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_tdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+
+void mpz_mod (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_divexact (mpz_t, const mpz_t, const mpz_t);
+
+int mpz_divisible_p (const mpz_t, const mpz_t);
+int mpz_congruent_p (const mpz_t, const mpz_t, const mpz_t);
+
+unsigned long mpz_cdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_fdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_tdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_cdiv_q_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_fdiv_q_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_tdiv_q_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_cdiv_r_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_fdiv_r_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_tdiv_r_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_cdiv_ui (const mpz_t, unsigned long);
+unsigned long mpz_fdiv_ui (const mpz_t, unsigned long);
+unsigned long mpz_tdiv_ui (const mpz_t, unsigned long);
+
+unsigned long mpz_mod_ui (mpz_t, const mpz_t, unsigned long);
+
+void mpz_divexact_ui (mpz_t, const mpz_t, unsigned long);
+
+int mpz_divisible_ui_p (const mpz_t, unsigned long);
+
+unsigned long mpz_gcd_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_gcd (mpz_t, const mpz_t, const mpz_t);
+void mpz_gcdext (mpz_t, mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_lcm_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_lcm (mpz_t, const mpz_t, const mpz_t);
+int mpz_invert (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_sqrtrem (mpz_t, mpz_t, const mpz_t);
+void mpz_sqrt (mpz_t, const mpz_t);
+int mpz_perfect_square_p (const mpz_t);
+
+void mpz_pow_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_ui_pow_ui (mpz_t, unsigned long, unsigned long);
+void mpz_powm (mpz_t, const mpz_t, const mpz_t, const mpz_t);
+void mpz_powm_ui (mpz_t, const mpz_t, unsigned long, const mpz_t);
+
+void mpz_rootrem (mpz_t, mpz_t, const mpz_t, unsigned long);
+int mpz_root (mpz_t, const mpz_t, unsigned long);
+
+void mpz_fac_ui (mpz_t, unsigned long);
+void mpz_bin_uiui (mpz_t, unsigned long, unsigned long);
+
+int mpz_probab_prime_p (const mpz_t, int);
+
+int mpz_tstbit (const mpz_t, mp_bitcnt_t);
+void mpz_setbit (mpz_t, mp_bitcnt_t);
+void mpz_clrbit (mpz_t, mp_bitcnt_t);
+void mpz_combit (mpz_t, mp_bitcnt_t);
+
+void mpz_com (mpz_t, const mpz_t);
+void mpz_and (mpz_t, const mpz_t, const mpz_t);
+void mpz_ior (mpz_t, const mpz_t, const mpz_t);
+void mpz_xor (mpz_t, const mpz_t, const mpz_t);
+
+mp_bitcnt_t mpz_popcount (const mpz_t);
+mp_bitcnt_t mpz_hamdist (const mpz_t, const mpz_t);
+mp_bitcnt_t mpz_scan0 (const mpz_t, mp_bitcnt_t);
+mp_bitcnt_t mpz_scan1 (const mpz_t, mp_bitcnt_t);
+
+int mpz_fits_slong_p (const mpz_t);
+int mpz_fits_ulong_p (const mpz_t);
+long int mpz_get_si (const mpz_t);
+unsigned long int mpz_get_ui (const mpz_t);
+double mpz_get_d (const mpz_t);
+size_t mpz_size (const mpz_t);
+mp_limb_t mpz_getlimbn (const mpz_t, mp_size_t);
+
+void mpz_realloc2 (mpz_t, mp_bitcnt_t);
+mp_srcptr mpz_limbs_read (mpz_srcptr);
+mp_ptr mpz_limbs_modify (mpz_t, mp_size_t);
+mp_ptr mpz_limbs_write (mpz_t, mp_size_t);
+void mpz_limbs_finish (mpz_t, mp_size_t);
+mpz_srcptr mpz_roinit_n (mpz_t, mp_srcptr, mp_size_t);
+
+#define MPZ_ROINIT_N(xp, xs) {{0, (xs),(xp) }}
+
+void mpz_set_si (mpz_t, signed long int);
+void mpz_set_ui (mpz_t, unsigned long int);
+void mpz_set (mpz_t, const mpz_t);
+void mpz_set_d (mpz_t, double);
+
+void mpz_init_set_si (mpz_t, signed long int);
+void mpz_init_set_ui (mpz_t, unsigned long int);
+void mpz_init_set (mpz_t, const mpz_t);
+void mpz_init_set_d (mpz_t, double);
+
+size_t mpz_sizeinbase (const mpz_t, int);
+char *mpz_get_str (char *, int, const mpz_t);
+int mpz_set_str (mpz_t, const char *, int);
+int mpz_init_set_str (mpz_t, const char *, int);
+
+/* This long list taken from gmp.h. */
+/* For reference, "defined(EOF)" cannot be used here.  In g++ 2.95.4,
+   <iostream> defines EOF but not FILE.  */
+#if defined (FILE)                                              \
+  || defined (H_STDIO)                                          \
+  || defined (_H_STDIO)               /* AIX */                 \
+  || defined (_STDIO_H)               /* glibc, Sun, SCO */     \
+  || defined (_STDIO_H_)              /* BSD, OSF */            \
+  || defined (__STDIO_H)              /* Borland */             \
+  || defined (__STDIO_H__)            /* IRIX */                \
+  || defined (_STDIO_INCLUDED)        /* HPUX */                \
+  || defined (__dj_include_stdio_h_)  /* DJGPP */               \
+  || defined (_FILE_DEFINED)          /* Microsoft */           \
+  || defined (__STDIO__)              /* Apple MPW MrC */       \
+  || defined (_MSL_STDIO_H)           /* Metrowerks */          \
+  || defined (_STDIO_H_INCLUDED)      /* QNX4 */		\
+  || defined (_ISO_STDIO_ISO_H)       /* Sun C++ */		\
+  || defined (__STDIO_LOADED)         /* VMS */
+size_t mpz_out_str (FILE *, int, const mpz_t);
+#endif
+
+void mpz_import (mpz_t, size_t, int, size_t, int, size_t, const void *);
+void *mpz_export (void *, size_t *, int, size_t, int, size_t, const mpz_t);
+
+#if defined (__cplusplus)
+}
+#endif
+#endif /* __MINI_GMP_H__ */
diff --git a/dist/rts/windows/win_utils.c b/dist/rts/windows/win_utils.c
new file mode 100644
index 0000000..daf711d
--- /dev/null
+++ b/dist/rts/windows/win_utils.c
@@ -0,0 +1,67 @@
+#include <io.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <windows.h>
+
+
+// THis file exists to avoid clashes between windows.h and idris_rts.h
+//
+
+int win_fpoll(void *h)
+{
+    HANDLE wh =(HANDLE) _get_osfhandle(_fileno((FILE *)h));
+    if (wh == INVALID_HANDLE_VALUE) {
+        return -1;
+    }
+    DWORD ret = WaitForSingleObject(wh, 1000);
+    // Imitate the return values of select()
+    if (ret == WAIT_OBJECT_0)
+        return 1;
+    if (ret == WAIT_TIMEOUT)
+        return 0;
+    return -1;
+}
+
+int widen_utf8(const char *filename_utf8, LPWSTR *filename_w)
+{
+    int num_chars = MultiByteToWideChar(CP_UTF8, 0, filename_utf8, -1, 0, 0);
+    int size = sizeof(WCHAR);
+    *filename_w = (LPWSTR)malloc(size * num_chars);
+    MultiByteToWideChar(CP_UTF8, 0, filename_utf8, -1, *filename_w, num_chars);
+    return num_chars;
+}
+
+FILE *win32_u8fopen(const char *path, const char *mode)
+{
+    LPWSTR wpath, wmode;
+    widen_utf8(path, &wpath);
+    widen_utf8(mode, &wmode);
+    FILE *f = _wfopen(wpath, wmode);
+    free(wpath);
+    free(wmode);
+    return f;
+}
+
+FILE *win32_u8popen(const char *path, const char *mode)
+{
+    LPWSTR wpath, wmode;
+    widen_utf8(path, &wpath);
+    widen_utf8(mode, &wmode);
+    FILE *f = _wpopen(wpath, wmode);
+    free(wpath);
+    free(wmode);
+    return f;
+}
+
+void win32_gettime(int64_t* sec, int64_t* nsec)
+{
+    FILETIME ft;
+    GetSystemTimePreciseAsFileTime(&ft);
+    ULARGE_INTEGER t;
+    t.HighPart = ft.dwHighDateTime;
+    t.LowPart = ft.dwLowDateTime;
+
+    *nsec = (t.QuadPart % 10000000)*100;
+    *sec = t.QuadPart / 10000000;
+    *sec -= 11644473600; // LDAP epoch to Unix epoch 
+}
\ No newline at end of file
diff --git a/dist/rts/windows/win_utils.h b/dist/rts/windows/win_utils.h
new file mode 100644
index 0000000..9e47e93
--- /dev/null
+++ b/dist/rts/windows/win_utils.h
@@ -0,0 +1,9 @@
+#include <stdint.h>
+#include <stdio.h>
+
+#pragma once
+
+int win_fpoll(void *h);
+FILE *win32_u8fopen(const char *path, const char *mode);
+FILE *win32_u8popen(const char *path, const char *mode);
+void win32_gettime(int64_t* sec, int64_t* nsec);
diff --git a/idris2-mkc.ipkg b/idris2-mkc.ipkg
new file mode 100644
index 0000000..98a40ac
--- /dev/null
+++ b/idris2-mkc.ipkg
@@ -0,0 +1,133 @@
+package idris2
+
+modules = 
+    Compiler.Common,
+    Compiler.CompileExpr,
+    Compiler.Inline,
+    Compiler.Scheme.Chez,
+    Compiler.Scheme.Chicken,
+    Compiler.Scheme.Racket,
+    Compiler.Scheme.Common,
+
+    Control.Delayed,
+
+    Core.AutoSearch,
+    Core.Binary,
+    Core.CaseBuilder,
+    Core.CaseTree,
+    Core.Context,
+    Core.CompileExpr,
+    Core.Core,
+    Core.Coverage,
+    Core.Directory,
+    Core.Env,
+    Core.FC,
+    Core.GetType,
+    Core.Hash,
+    Core.LinearCheck,
+    Core.Metadata,
+    Core.Name,
+    Core.Normalise,
+    Core.Options,
+    Core.Reflect,
+    Core.Termination,
+    Core.TT,
+    Core.TTC,
+    Core.Unify,
+    Core.UnifyState,
+    Core.Value,
+
+    Data.ANameMap,
+    Data.Bool.Extra,
+    Data.IntMap,
+    Data.IOArray,
+    Data.NameMap,
+    Data.StringMap,
+    Data.These,
+    Data.StringTrie,
+
+    Idris.CommandLine,
+    Idris.Desugar,
+    Idris.Elab.Implementation,
+    Idris.Elab.Interface,
+    Idris.Error,
+    Idris.IDEMode.CaseSplit,
+    Idris.IDEMode.Commands,
+    Idris.IDEMode.MakeClause,
+    Idris.IDEMode.Parser,
+    Idris.IDEMode.REPL,
+    Idris.IDEMode.TokenLine,
+    Idris.ModTree,
+    Idris.Package,
+    Idris.Parser,
+    Idris.ProcessIdr,
+    Idris.REPL,
+    Idris.REPLCommon,
+    Idris.REPLOpts,
+    Idris.Resugar,
+    Idris.SetOptions,
+    Idris.Socket,
+    Idris.Socket.Data,
+    Idris.Socket.Raw,
+    Idris.Syntax,
+
+    Parser.Lexer,
+    Parser.Support,
+
+    Text.Lexer,
+    Text.Lexer.Core,
+    Text.Parser,
+    Text.Parser.Core,
+    Text.Quantity,
+    Text.Token,
+
+    TTImp.BindImplicits,
+    TTImp.Elab,
+    TTImp.Elab.Ambiguity,
+    TTImp.Elab.App,
+    TTImp.Elab.As,
+    TTImp.Elab.Binders,
+    TTImp.Elab.Case,
+    TTImp.Elab.Check,
+    TTImp.Elab.Dot,
+    TTImp.Elab.Hole,
+    TTImp.Elab.ImplicitBind,
+    TTImp.Elab.Lazy,
+    TTImp.Elab.Local,
+    TTImp.Elab.Prim,
+    TTImp.Elab.Quote,
+    TTImp.Elab.Record,
+    TTImp.Elab.Rewrite,
+    TTImp.Elab.Term,
+    TTImp.Elab.Utils,
+    TTImp.Impossible,
+    TTImp.Interactive.CaseSplit,
+    TTImp.Interactive.ExprSearch,
+    TTImp.Interactive.GenerateDef,
+    TTImp.Interactive.MakeLemma,
+    TTImp.Parser,
+    TTImp.ProcessData,
+    TTImp.ProcessDecls,
+    TTImp.ProcessDef,
+    TTImp.ProcessParams,
+    TTImp.ProcessRecord,
+    TTImp.ProcessType,
+    TTImp.Reflect,
+    TTImp.TTImp,
+    TTImp.Unelab,
+    TTImp.Utils,
+    TTImp.WithClause,
+
+    Utils.Binary,
+    Utils.Hex,
+    Utils.Shunting,
+
+    Yaffle.Main,
+    Yaffle.REPL
+
+sourcedir = src
+executable = idris2.c
+opts = "--partial-eval -S"
+
+main = Idris.Main
+