mirror of
https://github.com/carp-lang/Carp.git
synced 2024-10-26 05:45:37 +03:00
e3af878e2a
* core: return Array.unsafe-first as reference (#970) * core: return Array.unsafe-last as reference (#970) * Make examples and tests build
152 lines
5.4 KiB
Plaintext
152 lines
5.4 KiB
Plaintext
(system-include "carp_pattern.h")
|
||
|
||
(defmodule Pattern
|
||
(doc find "finds the index of a pattern in a string.
|
||
|
||
Returns `-1` if it doesn’t find a matching pattern.")
|
||
(register find (Fn [&Pattern &String] Int))
|
||
(doc find-all "finds all indices of a pattern in a string.
|
||
|
||
Returns `[]` if it doesn’t find a matching pattern.")
|
||
(register find-all (Fn [&Pattern &String] (Array Int)))
|
||
(doc match-groups "finds the match groups of the first match of a pattern in
|
||
a string.
|
||
|
||
Returns `[]` if it doesn’t find a matching pattern.")
|
||
(register match-groups (Fn [&Pattern &String] (Array String)))
|
||
(doc match-str "finds the first match of a pattern in a string.
|
||
|
||
Returns `[]` if it doesn’t find a matching pattern.")
|
||
(register match-str (Fn [&Pattern &String] String))
|
||
(doc global-match "finds all matches of a pattern in a string as a nested
|
||
array.
|
||
|
||
Returns `[]` if it doesn’t find a matching pattern.")
|
||
(register global-match (Fn [&Pattern &String] (Array (Array String))))
|
||
(doc substitute "finds all matches of a pattern in a string and replaces it
|
||
by another pattern `n` times.
|
||
|
||
The substitute pattern can reference the original pattern by match group
|
||
indices, such as `\1`. This means that backslashes need to be double escaped.
|
||
|
||
If you want to replace all occurrences of the pattern, use `-1`.")
|
||
(register substitute (Fn [&Pattern &String &String Int] String))
|
||
(doc matches? "checks whether a pattern matches a string.")
|
||
(defn matches? [pat s] (/= (find pat s) -1))
|
||
|
||
(register str (Fn [&Pattern] String))
|
||
(implements str Pattern.str)
|
||
(register prn (Fn [&Pattern] String))
|
||
(implements prn Pattern.prn)
|
||
|
||
(register init (Fn [&String] Pattern))
|
||
(register = (Fn [&Pattern &Pattern] Bool))
|
||
(implements = Pattern.=)
|
||
(register delete (Fn [Pattern] ()))
|
||
(register copy (Fn [&Pattern] Pattern))
|
||
(implements copy Pattern.copy)
|
||
|
||
(doc from-chars "creates a pattern that matches a group of characters from a
|
||
list of those characters.")
|
||
(defn from-chars [chars]
|
||
(Pattern.init &(str* @"[" (String.from-chars chars) @"]")))
|
||
|
||
(defn global-match-str [p s]
|
||
(Array.copy-map &(fn [x] @(Array.unsafe-first x)) &(global-match p s)))
|
||
|
||
(doc split "splits a string by a pattern.")
|
||
(defn split [p s]
|
||
(let-do [idx (find-all p s)
|
||
strs (global-match-str p s)
|
||
lidx (Array.length &idx)
|
||
result (Array.allocate (Int.inc lidx))]
|
||
(Array.aset-uninitialized! &result 0
|
||
(slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s))))
|
||
(for [i 0 (Int.dec (Array.length &idx))]
|
||
(let [plen (length (Array.unsafe-nth &strs i))]
|
||
(Array.aset-uninitialized! &result (Int.inc i)
|
||
(slice s (+ @(Array.unsafe-nth &idx i) plen)
|
||
@(Array.unsafe-nth &idx (Int.inc i))))))
|
||
(when (> lidx 0)
|
||
(let [plen (length (Array.unsafe-nth &strs (Int.dec lidx)))]
|
||
(Array.aset-uninitialized! &result lidx
|
||
(suffix s (+ @(Array.unsafe-nth &idx (Int.dec lidx))
|
||
plen)))))
|
||
result))
|
||
|
||
)
|
||
|
||
(defmodule String
|
||
(doc in? "checks whether a string contains another string.")
|
||
(defn in? [s sub]
|
||
(Pattern.matches? &(Pattern.init sub) s))
|
||
|
||
(doc upper? "checks whether a string is all uppercase.")
|
||
(defn upper? [s]
|
||
(Pattern.matches? #"^[\u\s\p]*$" s))
|
||
|
||
(doc lower? "checks whether a string is all lowercase.")
|
||
(defn lower? [s]
|
||
(Pattern.matches? #"^[\l\s\p]*$" s))
|
||
|
||
(doc num? "checks whether a string is numerical.")
|
||
(defn num? [s]
|
||
(Pattern.matches? #"^[0-9]*$" s))
|
||
|
||
(doc alpha? "checks whether a string contains only alphabetical characters (a-Z).")
|
||
(defn alpha? [s]
|
||
(Pattern.matches? #"^[\u\l]*$" s))
|
||
|
||
(doc alphanum? "checks whether a string is alphanumerical.")
|
||
(defn alphanum? [s]
|
||
(Pattern.matches? #"^[\w]*$" s))
|
||
|
||
(doc hex? "checks whether a string is hexadecimal.")
|
||
(defn hex? [s]
|
||
(Pattern.matches? #"^[\x]*$" s))
|
||
|
||
(doc trim-left "trims whitespace from the left of a string.")
|
||
(defn trim-left [s]
|
||
(Pattern.substitute #"^\s+" s "" 1))
|
||
|
||
(doc trim-right "trims whitespace from the right of a string.")
|
||
(defn trim-right [s]
|
||
(Pattern.substitute #"\s+$" s "" 1))
|
||
|
||
(doc trim "trims whitespace from both sides of a string.")
|
||
(defn trim [s]
|
||
(trim-left &(trim-right s)))
|
||
|
||
(doc chomp "trims a newline from the end of a string.")
|
||
(defn chomp [s]
|
||
(Pattern.substitute #"\r$" &(Pattern.substitute #"\n$" s "" 1) "" 1))
|
||
|
||
(doc collapse-whitespace "collapses groups of whitespace into single spaces.")
|
||
(defn collapse-whitespace [s]
|
||
(Pattern.substitute #"\s+" s " " -1))
|
||
|
||
(doc split-by "splits a string by separators.")
|
||
(defn split-by [s separators]
|
||
(let-do [pat (Pattern.from-chars separators)
|
||
idx (Pattern.find-all &pat s)
|
||
lidx (Array.length &idx)
|
||
result (Array.allocate (Int.inc lidx))]
|
||
(Array.aset-uninitialized! &result 0
|
||
(slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s))))
|
||
(for [i 0 (Int.dec (Array.length &idx))]
|
||
(Array.aset-uninitialized! &result (Int.inc i)
|
||
(slice s (Int.inc @(Array.unsafe-nth &idx i)) @(Array.unsafe-nth &idx (Int.inc i)))))
|
||
(when (> lidx 0)
|
||
(Array.aset-uninitialized! &result lidx
|
||
(suffix s (Int.inc @(Array.unsafe-nth &idx (Int.dec lidx))))))
|
||
result))
|
||
|
||
(doc words "splits a string into words.")
|
||
(defn words [s]
|
||
(split-by s &[\tab \space \newline]))
|
||
|
||
(doc lines "splits a string into lines.")
|
||
(defn lines [s]
|
||
(split-by s &[\newline]))
|
||
)
|