Merge pull request #232 from mkfifo/string-library-improvements

String library improvements
This commit is contained in:
Erik Svedäng 2018-05-22 19:10:22 +02:00 committed by GitHub
commit 707614e999
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 379 additions and 43 deletions

View File

@ -166,13 +166,28 @@
(aset-uninitialized! &na i (f (nth a i))))
na))
(doc sum-count "Returns the sum of counts from an Array of Arrays.")
(defn sum-count [xs]
(let-do [sum 0
lxs (Array.count xs)]
(for [i 0 lxs]
(set! sum (+ sum (Array.count (Array.nth xs i)))))
sum))
(doc concat "Returns a new Array which is the concatenation of the provided `xs`.")
(defn concat [xs]
(let-do [result []
len (Array.count xs)]
(for [i 0 len]
(let [ys (nth xs i)
inner-len (Array.count ys)]
(for [j 0 inner-len]
(set! result (Array.push-back result @(nth ys j))))))
;; This is using a StringBuilder pattern to only perform one allocation and
;; to only copy each of the incoming Array(s) once.
;; This currently performs wasted Array.count calls, as we call it for each
;; Array once here and once in sum-count.
(let-do [j 0
lxs (Array.count xs)
result (Array.allocate (sum-count xs))]
(for [i 0 lxs]
(let-do [arr (Array.nth xs i)
len (Array.count arr)]
(for [k 0 len]
(aset-uninitialized! &result (+ j k) @(Array.nth arr k)))
(set! j (+ j len))))
result))
)

View File

@ -18,10 +18,10 @@
(hidden get-unit)
(defn get-unit [n]
(cond
(< n 1000.0) (String.append (Double.str n) @"ns")
(< n 1000000.0) (String.append (Double.str (/ n 1000.0)) @"µs")
(< n 1000000000.0) (String.append (Double.str (/ n 1000000.0)) @"ms")
(String.append (Double.str (/ n 1000000000.0)) @"s")))
(< n 1000.0) (StringCopy.append (Double.str n) @"ns")
(< n 1000000.0) (StringCopy.append (Double.str (/ n 1000.0)) @"µs")
(< n 1000000000.0) (StringCopy.append (Double.str (/ n 1000000.0)) @"ms")
(StringCopy.append (Double.str (/ n 1000000000.0)) @"s")))
(private print)
(hidden print)

View File

@ -12,6 +12,6 @@
(let [segments (split-by path &[\/])
n (dec (count &segments))
without-last (prefix-array &segments n)]
(join &(copy-map append-slash &without-last))))
(concat &(copy-map append-slash &without-last))))
)

View File

@ -6,7 +6,7 @@
(if (= idx -1)
(list 'copy s) ; no more splits found, just return string
(if (= \% (String.char-at s (inc idx))) ; this is an escaped %
(list 'String.append
(list 'StringCopy.append
(list 'copy "%")
(fmt-internal (String.substring s (+ idx 2) len) args))
(if (= 0 (count args)) ; we need to insert something, but have nothing
@ -19,9 +19,9 @@
(macro-error "error in format string: too many arguments to format string")
(list 'format s (car args)))
(let [slice (String.substring s 0 (+ (inc idx) next))]
(list 'String.append (list 'format slice (car args))
(fmt-internal (String.substring s (+ (inc idx) next) len)
(cdr args)))))))))))
(list 'StringCopy.append (list 'format slice (car args))
(fmt-internal (String.substring s (+ (inc idx) next) len)
(cdr args)))))))))))
(doc fmt "fmt formats a string. It supports all of the string interpolations defined in format of the type that should be interpolated (e.g. %d and %x on integers).")
(defmacro fmt [s :rest args]

View File

@ -260,7 +260,7 @@
(list "")
(if (= (count forms) 1)
(list 'str (car forms))
(list 'String.append (list 'str (car forms)) (build-str* (cdr forms))))))
(list 'StringCopy.append (list 'str (car forms)) (build-str* (cdr forms))))))
(defmacro str* [:rest forms]
(build-str* forms))

View File

@ -3,40 +3,48 @@
(defmodule String
(register = (Fn [&String &String] Bool))
(register append (Fn [String String] String)) ;; TODO: should take &String:s
(register append (Fn [&String &String] String))
(register delete (Fn [String] ()))
(register copy (Fn [&String] String))
(register count (Fn [&String] Int))
(register cstr (Fn [&String] (Ptr Char)))
(register str (Fn [&String] String))
(register prn (Fn [&String] String))
(register index-of (Fn [&String Char] Int))
(register index-of-from (Fn [&String Char Int] Int))
(register char-at (Fn [&String Int] Char))
(register chars (Fn [&String] (Array Char)))
(register from-chars (Fn [&(Array Char)] String))
(register tail (λ [(Ref String)] String))
(register format (Fn [&String &String] String))
(register string-set! (Fn [&String Int Char] ()))
(register string-set-at! (Fn [&String Int &String] ()))
(register allocate (Fn [Int Char] String))
(defn /= [a b]
(not (= (the (Ref String) a) b)))
(doc head "Returns the character at start of string.")
(defn head [s]
(char-at s 0))
(doc repeat "Returns a new string which is `inpt` repeated `n` times.")
(defn repeat [n inpt]
(let [str @""]
(do
(for [i 0 n]
(set! str (append str @inpt)))
(set! str (append &str inpt)))
str)))
(defn pad-left [len pad s]
(let [x (Int.max 0 (- len (count s)))]
(append (from-chars &(Array.replicate x &pad)) @s)))
(append &(from-chars &(Array.replicate x &pad)) s)))
(defn pad-right [len pad s]
(let [x (Int.max 0 (- len (count s)))]
(append @s (from-chars &(Array.replicate x &pad)))))
(append s &(from-chars &(Array.replicate x &pad)))))
(doc count-char "Returns the number of occurrences of `c` in the string `s`.")
(defn count-char [s c]
(let-do [n 0]
(for [i 0 (count s)]
@ -44,39 +52,101 @@
(set! n (Int.inc n))))
n))
(doc reverse "Produce a new string which is `s` reversed.")
(defn reverse [s]
(from-chars &(Array.reverse (chars s))))
(doc empty? "Check if the string is the empty string.")
(defn empty? [s]
(Int.= (count s) 0))
(defn substring [s a b]
(from-chars &(Array.subarray &(chars s) a b)))
(doc prefix-string "Return the first `a` characters of the string `s`.")
(defn prefix-string [s a]
(from-chars &(Array.prefix-array &(chars s) a)))
(doc suffix-string "Return the last `b` characters of the string `s`.")
(defn suffix-string [s b]
(from-chars &(Array.suffix-array &(chars s) b)))
(doc starts-with? "Check if the string `s` begins with the string `sub`.")
(defn starts-with? [s sub]
(= sub &(prefix-string s (count sub))))
(doc starts-with? "Check if the string `s` ends with the string `sub`.")
(defn ends-with? [s sub]
(= sub &(suffix-string s (- (count s) (count sub)))))
(doc zero "The empty string.")
(defn zero [] @"")
;; TODO: Should use some kind of "StringBuilder" instead of generating intermediate strings.
(defn join [strings]
(let-do [result @""
len (Array.count strings)]
(for [i 0 len]
(set! result (String.append result @(Array.nth strings i))))
(doc sum-count "Returns the sum of counts from an array of Strings.")
(defn sum-count [strings]
(let-do [sum 0
lstrings (Array.count strings)]
(for [i 0 lstrings]
(set! sum (+ sum (String.count (Array.nth strings i)))))
sum))
(doc concat "Returns a new string which is the concatenation of the provided `strings`.")
(defn concat [strings]
;; This is using a StringBuilder pattern to only perform one allocation and
;; to only copy each of the incoming strings once.
;; This currently performs wasted String.count calls, as we call it for each
;; string once here and once in sum-count.
(let-do [j 0
lstrings (Array.count strings)
result (String.allocate (sum-count strings) \ )]
(for [i 0 lstrings]
(let-do [str (Array.nth strings i)
len (String.count str)]
(string-set-at! &result j str)
(set! j (+ j len))))
result))
(doc join "Returns a new string which is the concatenation of the provided `strings` separated by string `sep`.")
(defn join [sep strings]
(let-do [j 0
lstrings (Array.count strings)
num-seps (- lstrings 1)
sep-length (String.count &sep)
seps-size (* num-seps sep-length)
result (String.allocate (+ seps-size (sum-count strings)) \ )]
(for [i 0 lstrings]
(let-do [str (Array.nth strings i)
len (String.count str)]
(when (> i 0)
(do
(string-set-at! &result j &sep)
(set! j (+ j sep-length))))
(string-set-at! &result j str)
(set! j (+ j len))))
result))
(doc join-with-char "Returns a new string which is the concatenation of the provided `strings` separated by char `sep`.")
(defn join-with-char [sep strings]
;; (= (join-with-char \ ["Hello" "world"]) (join " " ["Hello" "world"]))
(let-do [j 0
lstrings (Array.count strings)
sep-count (- lstrings 1)
result (String.allocate (+ sep-count (sum-count strings)) \ )]
(for [i 0 lstrings]
(let-do [str (Array.nth strings i)
len (String.count str)]
(when (> i 0)
(do
(string-set! &result j sep)
(set! j (+ j 1))))
(string-set-at! &result j str)
(set! j (+ j len))))
result))
)
(defmodule StringCopy
(register append (Fn [String String] String))
(defn = [a b]
(String.= &a &b))
@ -87,7 +157,7 @@
(defdynamic string-join- [strs]
(if (= (count strs) 0)
'(String.copy "")
(list 'String.append (car strs) (string-join- (cdr strs)))))
(list 'StringCopy.append (car strs) (string-join- (cdr strs)))))
;; TODO: Remove this function and replace uses with 'str*'
(defmacro string-join [:rest strs]

View File

@ -33,7 +33,7 @@
(if (String.= cname (Array.nth (Array.nth &color-table i) 0))
(set! res @(Array.nth (Array.nth &color-table i) 1))
()))
(String.append @"\x1b[" (String.append res @"m")))))
(StringCopy.append @"\x1b[" (StringCopy.append res @"m")))))
(defn color [cname]
@ -85,11 +85,11 @@
(if (Int.> (Int.+ passed failed) 0)
(do
(IO.color "green")
(if (Int.> passed 0) (IO.print &(String.append @"\t|" (String.repeat passed "="))) ())
(if (Int.> passed 0) (IO.print &(StringCopy.append @"\t|" (String.repeat passed "="))) ())
(if (Int.= failed 0) (IO.print "|") ())
(IO.color "red")
(if (Int.= passed 0) (IO.print "\t|") ())
(if (Int.> failed 0) (IO.print &(String.append (String.repeat failed "=") @"|")) ())
(if (Int.> failed 0) (IO.print &(StringCopy.append (String.repeat failed "=") @"|")) ())
(IO.println ""))
())
(IO.color "green")

View File

@ -584,11 +584,11 @@ String Pattern_internal_add_value(PatternMatchState *ms, String res, String src,
}
res = Pattern_internal_add_char(res, tr[i]);
}
else if (tr[i] == '0') res = String_append(res, src);
else if (tr[i] == '0') res = StringCopy_append(res, src);
else {
Array a = {.len = 0, .capacity = 0, .data = NULL};
Pattern_internal_push_onecapture(ms, tr[i] - '1', src, e, a);
res = String_append(res, ((String*)a.data)[0]); /* add capture to accumulated result */
res = StringCopy_append(res, ((String*)a.data)[0]); /* add capture to accumulated result */
}
}
}

View File

@ -4,10 +4,75 @@
#include <carp_memory.h>
#include <core.h>
String String_allocate(int len, char byte) {
/* Allocate a string of length 'len + 1'
* setting the first len bytes to byte
* and adding a null terminator
*
* String_alloc(10, "a") == "aaaaaaaaaa"
*/
String ptr = CARP_MALLOC(len+1);
memset(ptr, byte, len);
ptr[len] = '\0';
return ptr;
}
void String_delete(String s) {
CARP_FREE(s);
}
void String_string_MINUS_set_BANG_(String *s, int i, char ch) {
#ifndef OPTIMIZE
int l = strlen(*s);
assert(i >= 0);
assert(i < l);
#endif
(*s)[i] = ch;
}
void String_string_MINUS_set_MINUS_at_BANG_(String *into, int i, String *src) {
char *dest = (*into) + i;
int lsrc = strlen(*src);
#ifndef OPTIMIZE
int linto = strlen(*into);
assert(i >= 0);
/* given a string and indicies
*
* 0 1 2 3 4 5 6 7 8 9
* "a b c d e f g h i j"
* linto = strlen(...) = 10
*
* if we want to insert at '6' a string of length '4'
*
* 0 1 2 3
* "w x y z"
* ldest = strlen(...) = 4
*
* we need to make sure that the new string will not grow the first
*
* 0 1 2 3 4 5 6 7 8 9
* "a b c d e f g h i j"
* ^
* |
* 0 1 2 3
* "w x y z"
*
* we check this by
* (i + ldest - 1) < linto
* (6 + 4 - 1) < 10
* (10 - 1) < 10
* 9 < 10
* true
*
* so this write is safe
*/
assert((i+lsrc-1) < linto);
#endif
strncpy(dest, *src, lsrc);
}
String String_copy(String *s) {
size_t len = strlen(*s) + 1;
String ptr = CARP_MALLOC(len);
@ -23,12 +88,17 @@ bool String__EQ_(String *a, String *b) {
return strcmp(*a, *b) == 0;
}
String String_append(String a, String b) {
int la = strlen(a);
int lb = strlen(b);
String String_append(String *a, String *b) {
int la = strlen(*a);
int lb = strlen(*b);
int total = la + lb + 1;
String buffer = CARP_MALLOC(total);
snprintf(buffer, total, "%s%s", a, b);
snprintf(buffer, total, "%s%s", *a, *b);
return buffer;
}
String StringCopy_append(String a, String b) {
String buffer = String_append(&a, &b);
CARP_FREE(a);
CARP_FREE(b);
return buffer;
@ -196,3 +266,24 @@ String Long_format(String* str, long x) {
long Long_from_MINUS_string(String *s) {
return atol(*s);
}
int String_index_MINUS_of_MINUS_from(String *s, char c, int i) {
/* Return index of first occurrence of `c` in `s` AFTER index i
* Returns -1 if not found
*/
++i; // skip first character as we want AFTER i
int len = strlen(*s);
for (; i<len; ++i) {
if (c == (*s)[i]) {
return i;
}
}
return -1;
}
int String_index_MINUS_of(String *s, char c) {
/* Return index of first occurrence of `c` in `s`
* Returns -1 if not found
*/
return String_index_MINUS_of_MINUS_from(s, c, -1);
}

View File

@ -89,7 +89,7 @@ be used at a specific point in your program. In such cases the concept of 'holes
add a hole in your source code and reload (":r") to let the Carp compiler figure out what type goes there.
```
(String.append ?w00t @"!") ;; Will generate a type error telling you that the type of '?w00t' is String
(StringCopy.append ?w00t @"!") ;; Will generate a type error telling you that the type of '?w00t' is String
```
### Special forms during evaluation of dynamic code

View File

@ -12,7 +12,7 @@
(defmodule Things
(defn inside [s]
(let [msg (String.append s (String.copy "!"))]
(let [msg (StringCopy.append s (String.copy "!"))]
(println (ref msg))))
(defn call []
(inside (String.copy "Hello"))))
@ -131,7 +131,7 @@
(defn print-last-string []
(println &(get-last-string [(String.copy "NO") (String.copy "NO") (String.copy "YES")])))
(defn exclaim [x] (String.append x @"!"))
(defn exclaim [x] (StringCopy.append x @"!"))
(deftype Simple [])
(deftype Complex [x Int f Float d Double s String c Char])

View File

@ -8,8 +8,8 @@
[4 5 6]
[7 8 9]])
(defn excl [x] (String.append x @"!"))
(defn excl-ref [x] (String.append @x @"!"))
(defn excl [x] (StringCopy.append x @"!"))
(defn excl-ref [x] (StringCopy.append @x @"!"))
(defn inc-ref [x] (+ @x 1))
@ -132,4 +132,9 @@
&(aupdate [1 2] 1 inc-ref)
"aupdate works as expected"
)
(assert-equal test
&[1 2 3 4 5 6 7 8]
&(concat &[[1] [2 3] [4 5 6] [7 8]])
"concat works as expected"
)
(print-test-results test))))

View File

@ -191,7 +191,7 @@
(assert (= &[@"q" @"b" @"c"] &xs)))))
(defn append-ref [a b]
(String.append @a @b))
(StringCopy.append @a @b))
(defn array-reduce []
(let [xs [@"a" @"b" @"c"]
@ -317,6 +317,18 @@
ys (Array.copy-map str-ref &xs)]
(assert (= &[@"1" @"2" @"3" @"4"] &ys))))
(defn string-append-leak-test []
(let [a "abcdef"
b "ghijklmnopqrstuvwxyz"]
(let [result (String.append a b)]
(assert (StringCopy.= result @"abcdefghijklmnopqrstuvwxyz")))))
(defn stringcopy-append-leak-test []
(let [a "abcdef"
b "ghijklmnopqrstuvwxyz"]
(let [result (StringCopy.append @a @b)]
(assert (StringCopy.= result @"abcdefghijklmnopqrstuvwxyz")))))
(defn main []
(let []
(with-test test
@ -370,4 +382,6 @@
(assert-no-leak test array-replicate "array-replicate does not leak")
(assert-no-leak test array-copy-map-1 "array-copy-map-1 does not leak")
(assert-no-leak test array-copy-map-2 "array-copy-map-2 does not leak")
(assert-no-leak test string-append-leak-test "String.append does not leak")
(assert-no-leak test stringcopy-append-leak-test "StringCopy.append does not leak")
(print-test-results test))))

View File

@ -5,6 +5,24 @@
(defn main []
(with-test test
(assert-true test
(= @"hello world" @"hello world")
"string = works as expected"
)
(assert-true test
(/= @"hello world" @"bob")
"string /= works as expected"
)
(assert-equal test
"true"
&(str true)
"str on true works as expected"
)
(assert-equal test
"false"
&(str false)
"str on false works as expected"
)
(assert-equal test
\s
(char-at "lisp" 2)
@ -49,6 +67,10 @@
(empty? "")
"empty? returns true on empty string"
)
(assert-true test
(empty? &(zero))
"empty? returns true on (zero)"
)
(assert-equal test
&[\e \r \i \k]
&(chars "erik")
@ -197,6 +219,125 @@
&(split-by "erikmsvedlhejxfoo" &[\m \l \x])
"split-by works correctly"
)
(assert-equal test
"hello world"
&(append "hello " "world")
"append works correctly"
)
(assert-equal test
"hello world"
&(concat &[@"hello" @" " @"world"])
"concat works correctly"
)
(assert-equal test
"hello world"
&(join @"" &[@"hello" @" " @"world"])
"join works correctly I"
)
(assert-equal test
"hello world"
&(join @" " &[@"hello" @"world"])
"join works correctly II"
)
(assert-equal test
"hello aaaa there aaaa world"
&(join @" aaaa " &[@"hello" @"there" @"world"])
"join works correctly III"
)
(assert-equal test
"hello there world"
&(join-with-char \ &[@"hello" @"there" @"world"])
"join-with-char works correctly I"
)
(assert-equal test
"hello, there, world"
&(join-with-char \, &[@"hello" @" there" @" world"])
"join-with-char works correctly II"
)
(assert-equal test
"hellohellohello"
&(repeat 3 "hello")
"repeat works correctly"
)
(assert-equal test
"bcde"
&(let-do [str @"aaaa"]
(string-set! &str 0 \b)
(string-set! &str 1 \c)
(string-set! &str 2 \d)
(string-set! &str 3 \e)
str)
"string-set! works correctly"
)
(assert-equal test
"aaaabcdeaq"
&(let-do [str @"aaaaaaaaaa"]
(string-set-at! &str 4 "bcde")
(string-set-at! &str 9 "q")
str)
"string-set-at! works correctly"
)
(assert-equal test
"aaaaaaaaaa"
&(allocate 10 \a)
"allocate works correctly I"
)
(assert-equal test
""
&(allocate 0 \a)
"allocate works correctly II"
)
(assert-equal test
5
(count &(allocate 5 \a))
"allocate works correctly III"
)
(assert-equal test
2
(index-of "abcde" \c)
"index-of works correctly I"
)
(assert-equal test
4
(index-of "abcde" \e)
"index-of works correctly II"
)
(assert-equal test
-1
(index-of "abcde" \f)
"index-of works correctly III"
)
(assert-equal test
0
(index-of "abcde" \a)
"index-of works correctly IIII"
)
(assert-equal test
-1
(index-of-from "abcde" \a 0)
"index-of-from works correctly I"
)
(assert-equal test
0
(index-of-from "abcde" \a -1)
"index-of-from works correctly II"
)
(assert-equal test
2
(index-of-from "abcab" \c 1)
"index-of-from works correctly III"
)
(assert-equal test
-1
(index-of-from "abcab" \c 2)
"index-of-from works correctly IV"
)
(assert-equal test
5
(index-of-from "abcabc" \c 2)
"index-of-from works correctly IV"
)
(print-test-results test)
)
)