Use offset for substr if possible

If the substring is a suffix, use a pointer to the existing string
rather than constructing a new string.
This commit is contained in:
Edwin Brady 2017-03-15 15:59:07 +00:00
parent ce5e330fe7
commit 2debf366a7
6 changed files with 40 additions and 12 deletions

View File

@ -636,7 +636,7 @@ VAL MKSTROFFc(VM* vm, StrOffset* off) {
return cl;
}
VAL idris_strTail(VM* vm, VAL str) {
VAL idris_strShift(VM* vm, VAL str, int num) {
// If there's no room, just copy the string, or we'll have a problem after
// gc moves str
if (space(vm, sizeof(Closure) + sizeof(StrOffset))) {
@ -654,7 +654,7 @@ VAL idris_strTail(VM* vm, VAL str) {
}
cl->info.str_offset->str = root;
cl->info.str_offset->offset = offset+idris_utf8_charlen(GETSTR(str));
cl->info.str_offset->offset = offset+idris_utf8_findOffset(GETSTR(str), num);
return cl;
} else {
@ -663,6 +663,10 @@ VAL idris_strTail(VM* vm, VAL str) {
}
}
VAL idris_strTail(VM* vm, VAL str) {
return idris_strShift(vm, str, 1);
}
VAL idris_strCons(VM* vm, VAL x, VAL xs) {
char *xstr = GETSTR(xs);
int xval = GETINT(x);
@ -692,14 +696,24 @@ VAL idris_strIndex(VM* vm, VAL str, VAL i) {
}
VAL idris_substr(VM* vm, VAL offset, VAL length, VAL str) {
char *start = idris_utf8_advance(GETSTR(str), GETINT(offset));
char *end = idris_utf8_advance(start, GETINT(length));
Closure* newstr = allocate(sizeof(Closure) + (end - start) +1, 0);
SETTY(newstr, CT_STRING);
newstr -> info.str = (char*)newstr + sizeof(Closure);
memcpy(newstr -> info.str, start, end - start);
*(newstr -> info.str + (end - start) + 1) = '\0';
return newstr;
int offset_val = GETINT(offset);
int length_val = GETINT(length);
char* str_val = GETSTR(str);
// If the substring is a suffix, use idris_strShift to avoid reallocating
if (offset_val + length_val >= strlen(str_val)) {
return idris_strShift(vm, str, offset_val);
}
else {
char *start = idris_utf8_advance(str_val, offset_val);
char *end = idris_utf8_advance(start, length_val);
Closure* newstr = allocate(sizeof(Closure) + (end - start) +1, 0);
SETTY(newstr, CT_STRING);
newstr -> info.str = (char*)newstr + sizeof(Closure);
memcpy(newstr -> info.str, start, end - start);
*(newstr -> info.str + (end - start) + 1) = '\0';
return newstr;
}
}
VAL idris_strRev(VM* vm, VAL str) {

View File

@ -378,6 +378,7 @@ VAL idris_readStr(VM* vm, FILE* h);
VAL idris_readChars(VM* vm, int num, FILE* h);
VAL idris_strHead(VM* vm, VAL str);
VAL idris_strShift(VM* vm, VAL str, int num);
VAL idris_strTail(VM* vm, VAL str);
// This is not expected to be efficient! Mostly we wouldn't expect to call
// it at all at run time.

View File

@ -103,6 +103,17 @@ char* idris_utf8_advance(char* str, int i) {
return str;
}
int idris_utf8_findOffset(char* str, int i) {
int offset = 0;
while(i > 0) {
int len = idris_utf8_charlen(str);
str+=len;
offset+=len;
i--;
}
return offset;
}
char* idris_utf8_fromChar(int x) {
char* str;

View File

@ -21,4 +21,6 @@ char* idris_utf8_rev(char* s, char* result);
// Advance a pointer into a string by i UTF8 characters.
// Return original pointer if i <= 0.
char* idris_utf8_advance(char* str, int i);
// Return the offset of the ith UTF8 character in the string
int idris_utf8_findOffset(char* str, int i);
#endif

View File

@ -626,7 +626,7 @@ doOp v (LExternal rf) [_,x]
= v ++ "idris_readStr(vm, GETPTR(" ++ creg x ++ "))"
doOp v (LExternal rf) [_,len,x]
| rf == sUN "prim__readChars"
= v ++ "idris_readChars(vm, GETINT(" ++ creg len ++
= v ++ "idris_readChars(vm, GETINT(" ++ creg len ++
"), GETPTR(" ++ creg x ++ "))"
doOp v (LExternal wf) [_,x,s]
| wf == sUN "prim__writeFile"

View File

@ -425,7 +425,7 @@ execForeign env ctxt arity ty fn xs onfail
". Are all cases covered?"
| Just (FFun "idris_addToString" [(_, strBuf), (_, str)] _) <- foreignFromTT arity ty fn xs
= case (strBuf, str) of
(EStringBuf ref, EConstant (Str add)) ->
(EStringBuf ref, EConstant (Str add)) ->
do execIO $ modifyIORef ref (++add)
execApp env ctxt ioUnit (drop arity xs)
_ -> execFail . Msg $