Complete RefC standard String support

- Fix off-by-one error in String reverse
- Correct order of arguments in strSubstr
- Actually use start index of strSubstr
- Reduce memory usage of strSubstr in case of overrunning string end
- Add fastPack/fastUnpack/fastConcat
- Use unsigned chars for character comparisons
- Fix generated C character encodings
This commit is contained in:
Robert Wright 2021-05-17 11:48:46 +01:00 committed by G. Allais
parent 978d86f28d
commit c34c6e0959
9 changed files with 165 additions and 49 deletions

View File

@ -416,6 +416,7 @@ Traversable List where
-- If you need to concatenate strings at compile time, use Prelude.concat. -- If you need to concatenate strings at compile time, use Prelude.concat.
%foreign %foreign
"scheme:string-concat" "scheme:string-concat"
"C:fastConcat"
"javascript:lambda:(xs)=>''.concat(...__prim_idris2js_array(xs))" "javascript:lambda:(xs)=>''.concat(...__prim_idris2js_array(xs))"
export export
fastConcat : List String -> String fastConcat : List String -> String
@ -543,6 +544,7 @@ pack (x :: xs) = strCons x (pack xs)
%foreign %foreign
"scheme:string-pack" "scheme:string-pack"
"C:fastPack"
"javascript:lambda:(xs)=>''.concat(...__prim_idris2js_array(xs))" "javascript:lambda:(xs)=>''.concat(...__prim_idris2js_array(xs))"
export export
fastPack : List Char -> String fastPack : List Char -> String
@ -569,6 +571,7 @@ unpack str = unpack' (prim__cast_IntegerInt (natToInteger (length str)) - 1) str
-- If you need to unpack strings at compile time, use Prelude.unpack. -- If you need to unpack strings at compile time, use Prelude.unpack.
%foreign %foreign
"scheme:string-unpack" "scheme:string-unpack"
"C:fastUnpack"
"javascript:lambda:(str)=>__prim_js2idris_array(Array.from(str))" "javascript:lambda:(str)=>__prim_js2idris_array(Array.from(str))"
export export
fastUnpack : String -> List Char fastUnpack : String -> List Char

View File

@ -75,46 +75,15 @@ cName n = assert_total $ idris_crash ("INTERNAL ERROR: Unsupported name in C bac
-- not really total but this way this internal error does not contaminate everything else -- not really total but this way this internal error does not contaminate everything else
escapeChar : Char -> String escapeChar : Char -> String
escapeChar '\DEL' = "127" escapeChar c = if isAlphaNum c || isNL c
escapeChar '\NUL' = "0" then show c
escapeChar '\SOH' = "1" else "(char)" ++ show (ord c)
escapeChar '\STX' = "2"
escapeChar '\ETX' = "3"
escapeChar '\EOT' = "4"
escapeChar '\ENQ' = "5"
escapeChar '\ACK' = "6"
escapeChar '\BEL' = "7"
escapeChar '\BS' = "8"
escapeChar '\HT' = "9"
escapeChar '\LF' = "10"
escapeChar '\VT' = "11"
escapeChar '\FF' = "12"
escapeChar '\CR' = "13"
escapeChar '\SO' = "14"
escapeChar '\SI' = "15"
escapeChar '\DLE' = "16"
escapeChar '\DC1' = "17"
escapeChar '\DC2' = "18"
escapeChar '\DC3' = "19"
escapeChar '\DC4' = "20"
escapeChar '\NAK' = "21"
escapeChar '\SYN' = "22"
escapeChar '\ETB' = "23"
escapeChar '\CAN' = "24"
escapeChar '\EM' = "25"
escapeChar '\SUB' = "26"
escapeChar '\ESC' = "27"
escapeChar '\FS' = "28"
escapeChar '\GS' = "29"
escapeChar '\RS' = "30"
escapeChar '\US' = "31"
escapeChar c = show c
cStringQuoted : String -> String cStringQuoted : String -> String
cStringQuoted cs = strCons '"' (showCString (unpack cs) "\"") cStringQuoted cs = strCons '"' (showCString (unpack cs) "\"")
where where
showCChar : Char -> String -> String showCChar : Char -> String -> String
showCChar '\\' = ("bkslash" ++) showCChar '\\' = ("\\\\" ++)
showCChar c showCChar c
= if c < chr 32 = if c < chr 32
then (("\\x" ++ leftPad '0' 2 (asHex (cast c))) ++ "\"\"" ++) then (("\\x" ++ leftPad '0' 2 (asHex (cast c))) ++ "\"\"" ++)

View File

@ -76,7 +76,7 @@ typedef struct
typedef struct typedef struct
{ {
Value_header header; Value_header header;
char c; unsigned char c;
} Value_Char; } Value_Char;
typedef struct typedef struct

View File

@ -45,7 +45,7 @@ Value *reverse(Value *str)
memset(retVal->str, 0, l + 1); memset(retVal->str, 0, l + 1);
char *p = retVal->str; char *p = retVal->str;
char *q = input->str + (l - 1); char *q = input->str + (l - 1);
for (int i = 1; i < l; i++) for (int i = 0; i < l; i++)
{ {
*p++ = *q--; *p++ = *q--;
} }
@ -85,18 +85,103 @@ Value *strAppend(Value *a, Value *b)
return (Value *)retVal; return (Value *)retVal;
} }
Value *strSubstr(Value *s, Value *start, Value *len) Value *strSubstr(Value *start, Value *len, Value *s)
{ {
Value_String *retVal; char *input = ((Value_String *)s)->str;
switch (len->header.tag) int offset = extractInt(start);
int l = extractInt(len);
int tailLen = strlen(input);
if (tailLen < l)
{ {
case INT64_TAG: l = tailLen;
retVal = makeEmptyString(((Value_Int64 *)len)->i64 + 1);
memcpy(retVal->str, ((Value_String *)s)->str, ((Value_Int64 *)len)->i64);
return (Value *)retVal;
default:
retVal = makeEmptyString(((Value_Int32 *)len)->i32 + 1);
memcpy(retVal->str, ((Value_String *)s)->str, ((Value_Int32 *)len)->i32);
return (Value *)retVal;
} }
Value_String *retVal = makeEmptyString(l + 1);
memcpy(retVal->str, input + offset, l);
return (Value *)retVal;
}
char *fastPack(Value *charList)
{
Value_Constructor *current;
int l = 0;
current = (Value_Constructor *)charList;
while (current->total == 2)
{
l ++;
current = (Value_Constructor *)current->args[1];
}
char *retVal = malloc(l + 1);
retVal[l] = 0;
int i = 0;
current = (Value_Constructor *)charList;
while (current->total == 2)
{
retVal[i++] = ((Value_Char *)current->args[0])->c;
current = (Value_Constructor *)current->args[1];
}
return retVal;
}
Value *fastUnpack(char *str)
{
if (str[0] == '\0') {
return (Value *)newConstructor(0, 0, "Prelude_Types_Nil");
}
Value_Constructor *retVal = newConstructor(2, 1, "Prelude_Types__colon_colon");
retVal->args[0] = (Value *)makeChar(str[0]);
int i = 1;
Value_Constructor *current = retVal;
Value_Constructor *next;
while (str[i] != '\0') {
next = newConstructor(2, 1, "Prelude_Types__colon_colon");
next->args[0] = (Value *)makeChar(str[i]);
current->args[1] = (Value *)next;
i ++;
current = next;
}
current->args[1] = (Value *)newConstructor(0, 0, "Prelude_Types_Nil");
return (Value *)retVal;
}
char *fastConcat(Value *strList)
{
Value_Constructor *current;
int totalLength = 0;
current = (Value_Constructor *)strList;
while (current->total == 2)
{
totalLength += strlen(((Value_String *)current->args[0])->str);
current = (Value_Constructor *)current->args[1];
}
char *retVal = malloc(totalLength + 1);
retVal[totalLength + 1] = '\0';
char *currentStr;
int currentStrLen;
int offset = 0;
current = (Value_Constructor *)strList;
while (current->total == 2)
{
currentStr = ((Value_String *)current->args[0])->str;
currentStrLen = strlen(currentStr);
memcpy(retVal + offset, currentStr, currentStrLen);
offset += currentStrLen;
current = (Value_Constructor *)current->args[1];
}
return retVal;
} }

View File

@ -11,5 +11,8 @@ Value *strIndex(Value *str, Value *i);
Value *strCons(Value *c, Value *str); Value *strCons(Value *c, Value *str);
Value *strAppend(Value *a, Value *b); Value *strAppend(Value *a, Value *b);
Value *strSubstr(Value *s, Value *start, Value *len); Value *strSubstr(Value *s, Value *start, Value *len);
char *fastPack(Value *charList);
Value *fastUnpack(char *str);
char *fastConcat(Value *strList);
#endif #endif

View File

@ -222,7 +222,9 @@ chezTests = MkTestPool "Chez backend" [Chez]
refcTests : TestPool refcTests : TestPool
refcTests = MkTestPool "Reference counting C backend" [C] refcTests = MkTestPool "Reference counting C backend" [C]
[ "refc001" , "refc002" ] [ "refc001" , "refc002"
, "strings"
]
racketTests : TestPool racketTests : TestPool
racketTests = MkTestPool "Racket backend" [Racket] racketTests = MkTestPool "Racket backend" [Racket]

View File

@ -0,0 +1,32 @@
module TestStrings
import Data.String
main : IO ()
main = do
let helloWorld = "Hello, " ++ "world"
putStrLn helloWorld
putStrLn $ show $ length helloWorld
putStrLn $ reverse helloWorld
putStrLn $ substr 1 2 helloWorld
putStrLn $ show $ assert_total $ strIndex helloWorld 1
putStrLn $ strCons 'a' "bc"
putStrLn $ show $ strUncons "abc"
putStrLn $ fastPack ['p', 'a', 'c', 'k']
putStrLn $ show $ fastUnpack "unpack"
putStrLn $ fastConcat ["con", "cat", "en", "ate"]
let chars = ['a', 'A', '~', '0', ' ', '\n', '\x9f']
putStrLn $ show $ map isUpper chars
putStrLn $ show $ map isLower chars
putStrLn $ show $ map isDigit chars
putStrLn $ show $ map isSpace chars
putStrLn $ show $ map isNL chars
putStrLn $ show $ map isControl chars
putStrLn $ show $ map chr [97, 65, 126, 48, 32, 10, 159]
putStrLn $ show $ map ord chars

View File

@ -0,0 +1,18 @@
Hello, world
12
dlrow ,olleH
el
'e'
abc
Just ('a', "bc")
pack
['u', 'n', 'p', 'a', 'c', 'k']
concatenate
[False, True, False, False, False, False, False]
[True, False, False, False, False, False, False]
[False, False, False, True, False, False, False]
[False, False, False, False, True, True, False]
[False, False, False, False, False, True, False]
[False, False, False, False, False, True, True]
['a', 'A', '~', '0', ' ', '\LF', '\159']
[97, 65, 126, 48, 32, 10, 159]

4
tests/refc/strings/run Normal file
View File

@ -0,0 +1,4 @@
$1 --no-banner --no-color --console-width 0 --cg refc -o refc_strings TestStrings.idr > /dev/null
./build/exec/refc_strings
rm -rf build