Add String offsets to VM

Now strTail just stores an offset into a string, meaning that it doesn't
need to reallocate or copy an entire string. Therefore traversing a
large string is now much quicker.

Note - this means the entire string gets GCed at once, so if there is a
large string but only a pointer to the end of it, the whole string still
needs to be stored. For traversal, this isn't really a problem, but we
may want to reallocate if the GC discovers there is only one reference
to an offset string.
This commit is contained in:
Edwin Brady 2013-11-25 15:13:28 +00:00
parent 73e2e7ec51
commit 9418fe0ecb
3 changed files with 65 additions and 4 deletions

View File

@ -24,6 +24,9 @@ VAL copy(VM* vm, VAL x) {
case STRING:
cl = MKSTRc(vm, x->info.str);
break;
case STROFFSET:
cl = MKSTROFFc(vm, x->info.str_offset);
break;
case BIGINT:
cl = MKBIGMc(vm, x->info.ptr);
break;
@ -60,7 +63,7 @@ void cheney(VM *vm) {
while(scan < vm->heap.next) {
size_t inc = *((size_t*)scan);
VAL heap_item = (VAL)(scan+sizeof(size_t));
// If it's a CON, copy its arguments
// If it's a CON or STROFFSET, copy its arguments
switch(GETTY(heap_item)) {
case CON:
ar = ARITY(heap_item);
@ -71,6 +74,10 @@ void cheney(VM *vm) {
heap_item->info.c.args[i] = newptr;
}
break;
case STROFFSET:
heap_item->info.str_offset->str
= copy(vm, heap_item->info.str_offset->str);
break;
default: // Nothing to copy
break;
}

View File

@ -84,6 +84,10 @@ void idris_doneAlloc(VM* vm) {
}
}
int space(VM* vm, size_t size) {
return (vm->heap.next + size + sizeof(size_t) < vm->heap.end);
}
void* allocate(VM* vm, size_t size, int outerlock) {
// return malloc(size);
int lock = vm->processes > 0 && !outerlock;
@ -161,6 +165,12 @@ VAL MKSTR(VM* vm, const char* str) {
return cl;
}
char* GETSTROFF(VAL stroff) {
// Assume STROFF
StrOffset* root = stroff->info.str_offset;
return (root->str->info.str + root->offset);
}
VAL MKPTR(VM* vm, void* ptr) {
Closure* cl = allocate(vm, sizeof(Closure), 0);
SETTY(cl, PTR);
@ -397,8 +407,41 @@ VAL idris_strHead(VM* vm, VAL str) {
return MKINT((i_int)(GETSTR(str)[0]));
}
VAL MKSTROFFc(VM* vm, StrOffset* off) {
Closure* cl = allocate(vm, sizeof(Closure) + sizeof(StrOffset), 1);
SETTY(cl, STROFFSET);
cl->info.str_offset = (StrOffset*)((char*)cl + sizeof(Closure));
cl->info.str_offset->str = off->str;
cl->info.str_offset->offset = off->offset;
return cl;
}
VAL idris_strTail(VM* vm, VAL str) {
return MKSTR(vm, GETSTR(str)+1);
// If there's no room, just copy the string, or we'll have a problem after
// gc moves str
if (space(vm, sizeof(Closure) + sizeof(StrOffset))) {
Closure* cl = allocate(vm, sizeof(Closure) + sizeof(StrOffset), 0);
SETTY(cl, STROFFSET);
cl->info.str_offset = (StrOffset*)((char*)cl + sizeof(Closure));
int offset = 0;
VAL root = str;
while(root!=NULL && !ISSTR(root)) { // find the root, carry on.
// In theory, at most one step here!
offset += root->info.str_offset->offset;
root = root->info.str_offset->str;
}
cl->info.str_offset->str = root;
cl->info.str_offset->offset = offset+1;
return cl;
} else {
return MKSTR(vm, GETSTR(str)+1);
}
}
VAL idris_strCons(VM* vm, VAL x, VAL xs) {

View File

@ -15,7 +15,8 @@
// Closures
typedef enum {
CON, INT, BIGINT, FLOAT, STRING, BITS8, BITS16, BITS32, BITS64, UNIT, PTR, FWD
CON, INT, BIGINT, FLOAT, STRING, STROFFSET,
BITS8, BITS16, BITS32, BITS64, UNIT, PTR, FWD
} ClosureType;
typedef struct Closure *VAL;
@ -25,6 +26,11 @@ typedef struct {
VAL args[];
} con;
typedef struct {
VAL str;
int offset;
} StrOffset;
typedef struct Closure {
// Use top 16 bits of ty for saying which heap value is in
// Bottom 16 bits for closure type
@ -34,6 +40,7 @@ typedef struct Closure {
int i;
double f;
char* str;
StrOffset* str_offset;
void* ptr;
uint8_t bits8;
uint16_t bits16;
@ -92,7 +99,7 @@ typedef void(*func)(VM*, VAL*);
// Retrieving values
#define GETSTR(x) (((VAL)(x))->info.str)
#define GETSTR(x) (ISSTR(x) ? (((VAL)(x))->info.str) : GETSTROFF(x))
#define GETPTR(x) (((VAL)(x))->info.ptr)
#define GETFLOAT(x) (((VAL)(x))->info.f)
@ -119,6 +126,7 @@ typedef intptr_t i_int;
#define MKINT(x) ((void*)((x)<<1)+1)
#define GETINT(x) ((i_int)(x)>>1)
#define ISINT(x) ((((i_int)x)&1) == 1)
#define ISSTR(x) (((VAL)(x))->ty == STRING)
#define INTOP(op,x,y) MKINT((i_int)((((i_int)x)>>1) op (((i_int)y)>>1)))
#define UINTOP(op,x,y) MKINT((i_int)((((uintptr_t)x)>>1) op (((uintptr_t)y)>>1)))
@ -151,9 +159,12 @@ VAL MKB64(VM* vm, uint64_t b);
// following versions don't take a lock when allocating
VAL MKFLOATc(VM* vm, double val);
VAL MKSTROFFc(VM* vm, StrOffset* off);
VAL MKSTRc(VM* vm, char* str);
VAL MKPTRc(VM* vm, void* ptr);
char* GETSTROFF(VAL stroff);
// #define SETTAG(x, a) (x)->info.c.tag = (a)
#define SETARG(x, i, a) ((x)->info.c.args)[i] = ((VAL)(a))
#define GETARG(x, i) ((x)->info.c.args)[i]