Carp/core/String.carp
Scott Olsen 3148703a22
feat: add Dynamic.String.to-array (#1382)
* feat: add Dynamic.String.to-array

Adds a new dynamic function that converts a string to an array of
strings, one for each character in the string. In other words, it's the
same as Dynamic.String.to-list but the result is an array structure
instead of a list.

It's also significantly faster than to-list on large inputs. The current
implementation of Dynamic.String.to-list takes long to process large
inputs, presumably because of its recursive behavior and multiple calls
to cons (I'm currently investigating this) (e.g. use read-file then try
calling to-list, you may need to wait a while). Contrarily, to-array is
nearly instantaneous even when the input string is large. This is
because it leverages Haskell's `splitOn` directly, which, when given an
empty delimiter, (which is the case for the empty string), splits the
entire input sequence. Because "" is the empty list for a string in
Haskell's representation, calling split-on with "" results in the
splitting behavior. The output also includes the delimiting case, so we
drop it using cdr.

* test: add test for Dynamic.String.to-array
2022-02-14 09:31:13 +01:00

526 lines
15 KiB
Plaintext

(system-include "carp_string.h")
(system-include "ctype.h")
(defmodule Byte
(hidden tolower-) ; helper func for String.ascii-to-lower
(register tolower- (Fn [Byte] Byte) "tolower")
(hidden toupper-) ; helper func for String.ascii-to-upper
(register toupper- (Fn [Byte] Byte) "toupper")
)
(doc String "is the string data type for representing text.")
(defmodule String
(register = (Fn [&String &String] Bool))
(register > (Fn [&String &String] Bool))
(register < (Fn [&String &String] Bool))
(register append (Fn [&String &String] String))
(register delete (Fn [String] ()))
(register copy (Fn [&String] String))
(register length (Fn [&String] Int))
(register cstr (Fn [&String] (Ptr CChar)))
(register from-cstr (Fn [(Ptr CChar)] String))
(register str (Fn [&String] String))
(register prn (Fn [&String] String))
(register index-of (Fn [&String Char] Int))
(register index-of-from (Fn [&String Char Int] Int))
(register char-at (Fn [&String Int] Char))
(register chars (Fn [&String] (Array Char)))
(register from-chars (Fn [&(Array Char)] String))
(register tail (λ [(Ref String)] String))
(register format (Fn [&String &String] String))
(implements format String.format)
(register string-set! (Fn [&String Int Char] ()))
(register string-set-at! (Fn [&String Int &String] ()))
(register allocate (Fn [Int Char] String))
(doc allocated? "checks if a String returned by String.allocate is not NULL. This is a safeguard against out-of-memory conditions.")
(defn allocated? [s]
(not (null? (cstr s))) )
(register from-bytes (Fn [&(Array Byte)] String))
(register to-bytes (Fn [&String] (Array Byte)))
(implements < String.<)
(implements > String.>)
(implements = String.=)
(implements copy String.copy)
(implements prn String.prn)
(implements str String.str)
(implements delete String.delete)
(doc head "Returns the character at start of string.")
(defn head [s]
(char-at s 0))
(doc repeat "Returns a new string which is `inpt` repeated `n` times.")
(defn repeat [n inpt]
(let [l (length inpt)
str (String.allocate (* n l) \0)]
(do
(for [i 0 n]
(string-set-at! &str (* i l) inpt))
str)))
(doc pad-left "Pads the left of a string with len bytes using the padding pad.")
(defn pad-left [len pad s]
(let [x (max 0 (- len (length s)))]
(append &(from-chars &(Array.replicate x &pad)) s)))
(doc pad-right "Pads the right of a string with len bytes using the padding pad.")
(defn pad-right [len pad s]
(let [x (max 0 (- len (length s)))]
(append s &(from-chars &(Array.replicate x &pad)))))
(doc count-char "Returns the number of occurrences of `c` in the string `s`.")
(defn count-char [s c]
(let-do [n 0]
(for [i 0 (length s)]
(when (= c (char-at s i))
(set! n (Int.inc n))))
n))
(doc reverse "Produce a new string which is `s` reversed.")
(defn reverse [s]
(from-chars &(Array.reverse (chars s))))
(doc empty? "Check if the string is the empty string.")
(defn empty? [s]
(Int.= (length s) 0))
(implements empty? String.empty?)
(doc slice "Return part of a string, beginning at index `a` and ending at index `b`.")
(defn slice [s a b]
(from-chars &(Array.slice &(chars s) a b)))
(implements slice String.slice)
(doc prefix "Return the first `a` characters of the string `s`.")
(defn prefix [s a]
(from-chars &(Array.prefix &(chars s) a)))
(doc suffix "Returns the suffix string starting at the `b`th character.")
(defn suffix [s b]
(from-chars &(Array.suffix &(chars s) b)))
(doc starts-with? "Check if the string `s` begins with the string `sub`.")
(defn starts-with? [s sub]
(let [ls (length sub)]
(and (>= (length s) ls) (= sub &(prefix s ls)))))
(doc ends-with? "Check if the string `s` ends with the string `sub`.")
(defn ends-with? [s sub]
(let [ls (length s)
lsub (length sub)]
(and (>= ls lsub) (= sub &(suffix s (- ls lsub))))))
(doc zero "The empty string.")
(defn zero [] @"")
(implements zero String.zero)
(doc sum-length "Returns the sum of lengths from an array of Strings.")
(defn sum-length [strings]
(let-do [sum 0
lstrings (Array.length strings)]
(for [i 0 lstrings]
(set! sum (+ sum (String.length (Array.unsafe-nth strings i)))))
sum))
(doc concat "Returns a new string which is the concatenation of the provided `strings`.")
(defn concat [strings]
;; This is using a StringBuilder pattern to only perform one allocation and
;; to only copy each of the incoming strings once.
;; This currently performs wasted String.length calls, as we call it for each
;; string once here and once in sum-length.
(let-do [j 0
lstrings (Array.length strings)
result (String.allocate (sum-length strings) \ )]
(for [i 0 lstrings]
(let-do [str (Array.unsafe-nth strings i)
len (String.length str)]
(string-set-at! &result j str)
(set! j (+ j len))))
result))
(doc join "Returns a new string which is the concatenation of the provided `strings` separated by string `sep`.")
(defn join [sep strings]
(let-do [j 0
lstrings (Array.length strings)
num-seps (max 0 (- lstrings 1))
sep-length (String.length sep)
seps-size (* num-seps sep-length)
result (String.allocate (+ seps-size (sum-length strings)) \ )]
(for [i 0 lstrings]
(let-do [str (Array.unsafe-nth strings i)
len (String.length str)]
(when (> i 0)
(do
(string-set-at! &result j sep)
(set! j (+ j sep-length))))
(string-set-at! &result j str)
(set! j (+ j len))))
result))
(doc join-with-char "Returns a new string which is the concatenation of the provided `strings` separated by char `sep`.")
(defn join-with-char [sep strings]
;; (= (join-with-char \ ["Hello" "world"]) (join " " ["Hello" "world"]))
(let-do [j 0
lstrings (Array.length strings)
sep-length (max 0 (- lstrings 1))
result (String.allocate (+ sep-length (sum-length strings)) \ )]
(for [i 0 lstrings]
(let-do [str (Array.unsafe-nth strings i)
len (String.length str)]
(when (> i 0)
(do
(string-set! &result j sep)
(set! j (+ j 1))))
(string-set-at! &result j str)
(set! j (+ j len))))
result))
(doc contains? "Checks whether the string `s` contains the character `c`.")
(defn contains? [s c] (> (index-of s c) -1))
(doc ascii-to-lower "converts each character in this string to lower case using tolower() from standard C library. Note: this will only work for ASCII characters.")
(defn ascii-to-lower [s]
(String.from-bytes &(Array.endo-map &(fn [c] (Byte.tolower- c)) (String.to-bytes s))) )
(doc ascii-to-upper "converts each character in this string to upper case using toupper() from standard C library. Note: this will only work for ASCII characters.")
(defn ascii-to-upper [s]
(String.from-bytes &(Array.endo-map &(fn [c] (Byte.toupper- c)) (String.to-bytes s))) )
)
(defmodule StringCopy
(defn str [s] (the String s))
(defn = [a b]
(String.= &a &b))
(implements = StringCopy.=)
(defn < [a b]
(String.< &a &b))
(implements < StringCopy.<)
(defn > [a b]
(String.> &a &b))
(implements > StringCopy.>)
(defn prn [s]
(prn &(the String s)))
(implements prn StringCopy.prn)
(defn str [s]
(str &(the String s)))
(implements str StringCopy.str)
)
(defmodule Bool
(register str (Fn [Bool] String))
(implements str Bool.str)
(register format (Fn [&String Bool] String))
(implements format Bool.format)
)
(defmodule Int
(register str (Fn [Int] String))
(implements str Int.str)
(register format (Fn [&String Int] String))
(implements format Int.format)
(private from-string-internal)
(hidden from-string-internal)
(register from-string-internal (λ [&String &Int] Bool))
(defn from-string [s]
(let [res 0]
(if (from-string-internal s &res)
(Maybe.Just res)
(Maybe.Nothing))))
(implements from-string Int.from-string)
)
(defmodule Byte
(register str (Fn [Byte] String))
(implements str Byte.str)
(register format (Fn [&String Byte] String))
(implements format Byte.format)
(private from-string-internal)
(hidden from-string-internal)
(register from-string-internal (λ [&String &Byte] Bool))
(defn from-string [s]
(let [res 0b]
(if (from-string-internal s &res)
(Maybe.Just res)
(Maybe.Nothing))))
(implements from-string Byte.from-string)
)
(defmodule Float
(register str (Fn [Float] String))
(implements str Float.str)
(register format (Fn [&String Float] String))
(implements format Float.format)
(private from-string-internal)
(hidden from-string-internal)
(register from-string-internal (λ [&String &Float] Bool))
(defn from-string [s]
(let [res 0.0f]
(if (from-string-internal s &res)
(Maybe.Just res)
(Maybe.Nothing))))
(implements from-string Float.from-string)
)
(defmodule Long
(register str (Fn [Long] String))
(implements str Long.str)
(register format (Fn [&String Long] String))
(implements format Long.format)
(private from-string-internal)
(hidden from-string-internal)
(register from-string-internal (λ [&String &Long] Bool))
(defn from-string [s]
(let [res 0l]
(if (from-string-internal s &res)
(Maybe.Just res)
(Maybe.Nothing))))
(implements from-string Long.from-string)
)
(defmodule Double
(register str (Fn [Double] String))
(implements str Double.str)
(register format (Fn [&String Double] String))
(implements format Double.format)
(private from-string-internal)
(hidden from-string-internal)
(register from-string-internal (λ [&String &Double] Bool))
(defn from-string [s]
(let [res 0.0]
(if (from-string-internal s &res)
(Maybe.Just res)
(Maybe.Nothing))))
(implements from-string Double.from-string)
)
(defmodule Char
(register str (Fn [Char] String))
(implements str Char.str)
(register prn (Fn [Char] String))
(implements prn Char.prn)
(register format (Fn [&String Char] String))
(implements format Char.format)
)
(defmodule Int (defn prn [x] (Int.str x)) (implements prn Int.prn))
(defmodule IntRef
(defn prn [x] (Int.str @x))
(implements prn IntRef.prn)
(defn str [x] (Int.str @x))
(implements str IntRef.str)
(defn format [s x] (Int.format s @x))
(implements format IntRef.format)
)
(defmodule Bool (defn prn [x] (Bool.str x)) (implements prn Bool.prn))
(defmodule BoolRef
(defn prn [x] (Bool.str @x))
(implements prn BoolRef.prn)
(defn str [x] (Bool.str @x))
(implements str BoolRef.str)
(defn format [s x] (Bool.format s @x))
(implements format BoolRef.format)
)
(defmodule Byte (defn prn [x] (Byte.str x)) (implements prn Byte.prn))
(defmodule ByteRef
(defn prn [x] (Byte.str @x))
(implements prn ByteRef.prn)
(defn str [x] (Byte.str @x))
(implements str ByteRef.str)
(defn format [s x] (Byte.format s @x))
(implements format ByteRef.format)
)
(defmodule Long (defn prn [x] (Long.str x)) (implements prn Long.prn))
(defmodule LongRef
(defn prn [x] (Long.str @x))
(implements prn LongRef.prn)
(defn str [x] (Long.str @x))
(implements str LongRef.str)
(defn format [s x] (Long.format s @x))
(implements format LongRef.format)
)
(defmodule Float (defn prn [x] (Float.str x)) (implements prn Float.prn))
(defmodule FloatRef
(defn prn [x] (Float.str @x))
(implements prn FloatRef.prn)
(defn str [x] (Float.str @x))
(implements str FloatRef.str)
(defn format [s x] (Float.format s @x))
(implements format FloatRef.format)
)
(defmodule Double (defn prn [x] (Double.str x)) (implements prn Double.prn))
(defmodule DoubleRef
(defn prn [x] (Double.str @x))
(implements prn DoubleRef.prn)
(defn str [x] (Double.str @x))
(implements str DoubleRef.str)
(defn format [s x] (Double.format s @x))
(implements format DoubleRef.format)
)
(defmodule Array (defn prn [x] (Array.str x)) (implements prn Array.prn))
(defmodule Int8
(register str (λ [Int8] String))
(implements str Int8.str)
(defn prn [a] (Int8.str a))
(implements prn Int8.prn)
)
(defmodule Int8Extra
(defn str [a] (Int8.str @a))
(implements str Int8Extra.str)
(defn prn [a] (Int8.prn @a))
(implements prn Int8Extra.prn)
)
(defmodule Int16
(register str (λ [Int16] String))
(implements str Int16.str)
(defn prn [a] (Int16.str a))
(implements prn Int16.prn)
)
(defmodule Int16Extra
(defn prn [a] (Int16.prn @a))
(implements prn Int16Extra.prn)
(defn str [a] (Int16.str @a))
(implements str Int16Extra.str)
)
(defmodule Int32
(register str (λ [Int32] String))
(implements str Int32.str)
(defn prn [a] (Int32.str a))
(implements prn Int32.prn)
)
(defmodule Int32Extra
(defn str [a] (Int32.str @a))
(implements str Int32Extra.str)
(defn prn [a] (Int32.prn @a))
(implements prn Int32Extra.prn)
)
(defmodule Int64
(register str (λ [Int64] String))
(implements str Int64.str)
(defn prn [a] (Int64.str a))
(implements prn Int64.prn)
)
(defmodule Int64Extra
(defn str [a] (Int64.str @a))
(implements str Int64Extra.str)
(defn prn [a] (Int64.prn @a))
(implements prn Int64Extra.prn)
)
(defmodule Uint8
(register str (λ [Uint8] String))
(implements str Uint8.str)
(defn prn [a] (Uint8.str a))
(implements prn Uint8.prn)
)
(defmodule Uint8Extra
(defn str [a] (Uint8.str @a))
(implements str Uint8Extra.str)
(defn prn [a] (Uint8.prn @a))
(implements prn Uint8Extra.prn)
)
(defmodule Uint16
(register str (λ [Uint16] String))
(implements str Uint16.str)
(defn prn [a] (Uint16.str a))
(implements prn Uint16.prn)
)
(defmodule Uint16Extra
(defn prn [a] (Uint16.prn @a))
(implements prn Uint16Extra.prn)
(defn str [a] (Uint16.str @a))
(implements str Uint16Extra.str)
)
(defmodule Uint32
(register str (λ [Uint32] String))
(implements str Uint32.str)
(defn prn [a] (Uint32.str a))
(implements prn Uint32.prn)
)
(defmodule Uint32Extra
(defn str [a] (Uint32.str @a))
(implements str Uint32Extra.str)
(defn prn [a] (Uint32.prn @a))
(implements prn Uint32Extra.prn)
)
(defmodule Uint64
(register str (λ [Uint64] String))
(implements str Uint64.str)
(defn prn [a] (Uint64.str a))
(implements prn Uint64.prn)
)
(defmodule Uint64Extra
(defn str [a] (Uint64.str @a))
(implements str Uint64Extra.str)
(defn prn [a] (Uint64.prn @a))
(implements prn Uint64Extra.prn)
)
(defmodule Pointer
(register strp (Fn [(Ptr a)] String) "Pointer_strp")
(defn str [a] (Pointer.strp a))
(implements str Pointer.str)
(defn prn [a] (Pointer.strp a))
(implements prn Pointer.prn)
)
(defndynamic build-str* [forms]
(if (= (length forms) 0)
'("")
(if (= (length forms) 1)
`&(str %(car forms))
`&(String.append &(str %(car forms)) %(build-str* (cdr forms))))))
(defmacro str* [:rest forms]
`(copy %(build-str* forms)))
(defmodule Dynamic
(defmodule String
(defndynamic empty? [s]
(= (String.length s) 0))
(defndynamic to-list [s]
(if (String.empty? s)
'()
(cons (String.head s) (String.to-list (String.tail s)))))
(doc to-array
"Converts a string to an array of strings of each of its characters.")
(defndynamic to-array [s]
(cdr (String.split-on "" s)))
)
)