Carp/core/Pattern.carp

141 lines
5.2 KiB
Plaintext
Raw Normal View History

(system-include "carp_pattern.h")
(defmodule Pattern
2019-05-04 10:41:33 +03:00
(doc find "finds the index of a pattern in a string.
Returns `-1` if it doesnt find a matching pattern.")
(register find (Fn [&Pattern &String] Int))
2019-05-04 10:41:33 +03:00
(doc find-all "finds all indices of a pattern in a string.
Returns `[]` if it doesnt find a matching pattern.")
(register find-all (Fn [&Pattern &String] (Array Int)))
2019-05-04 10:41:33 +03:00
(doc match-groups "finds the match groups of the first match of a pattern in a string.
Returns `[]` if it doesnt find a matching pattern.")
2018-11-14 23:21:56 +03:00
(register match-groups (Fn [&Pattern &String] (Array String)))
2019-05-04 10:41:33 +03:00
(doc match-str "finds the first match of a pattern in a string.
Returns `[]` if it doesnt find a matching pattern.")
(register match-str (Fn [&Pattern &String] String))
2019-05-04 10:41:33 +03:00
(doc global-match "finds all matches of a pattern in a string as a nested array.
Returns `[]` if it doesnt find a matching pattern.")
(register global-match (Fn [&Pattern &String] (Array (Array String))))
2019-05-04 10:41:33 +03:00
(doc substitute "finds all matches of a pattern in a string and replaces it by another pattern `n` times.
If you want to replace all occurrences of the pattern, use `-1`.")
(register substitute (Fn [&Pattern &String &String Int] String))
2019-05-04 10:41:33 +03:00
(doc matches? "checks whether a pattern matches a string.")
(defn matches? [pat s] (/= (find pat s) -1))
2018-03-03 16:59:12 +03:00
(register str (Fn [&Pattern] String))
(register prn (Fn [&Pattern] String))
(register init (Fn [&String] Pattern))
(register = (Fn [&Pattern &Pattern] Bool))
(register delete (Fn [Pattern] ()))
(register copy (Fn [&Pattern] Pattern))
2019-05-04 10:41:33 +03:00
(doc from-chars "creates a pattern that matches a group of characters from a list of those characters.")
(defn from-chars [chars]
(Pattern.init &(str* @"[" (String.from-chars chars) @"]")))
(defn global-match-str [p s]
(Array.copy-map &Array.unsafe-first &(global-match p s)))
(doc split "splits a string by a pattern.")
(defn split [p s]
(let-do [idx (find-all p s)
strs (global-match-str p s)
lidx (Array.length &idx)
result (Array.allocate (Int.inc lidx))]
(Array.aset-uninitialized! &result 0
2020-02-11 11:09:30 +03:00
(slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s))))
(for [i 0 (Int.dec (Array.length &idx))]
(let [plen (length (Array.unsafe-nth &strs i))]
(Array.aset-uninitialized! &result (Int.inc i)
2020-02-11 11:09:30 +03:00
(slice s (+ @(Array.unsafe-nth &idx i) plen)
@(Array.unsafe-nth &idx (Int.inc i))))))
(when (> lidx 0)
(let [plen (length (Array.unsafe-nth &strs (Int.dec lidx)))]
(Array.aset-uninitialized! &result lidx
2020-02-11 11:09:30 +03:00
(suffix s (+ @(Array.unsafe-nth &idx (Int.dec lidx))
plen)))))
result))
)
2018-03-13 13:59:48 +03:00
(defmodule String
2019-05-04 10:41:33 +03:00
(doc in? "checks whether a string contains another string.")
2018-03-13 13:59:48 +03:00
(defn in? [s sub]
(Pattern.matches? &(Pattern.init sub) s))
2019-05-04 10:41:33 +03:00
(doc upper? "checks whether a string is all uppercase.")
2018-03-13 13:59:48 +03:00
(defn upper? [s]
(Pattern.matches? #"^[\u\s\p]*$" s))
2019-05-04 10:41:33 +03:00
(doc lower? "checks whether a string is all lowercase.")
2018-03-13 13:59:48 +03:00
(defn lower? [s]
(Pattern.matches? #"^[\l\s\p]*$" s))
2019-05-04 10:41:33 +03:00
(doc num? "checks whether a string is numerical.")
2018-03-13 13:59:48 +03:00
(defn num? [s]
(Pattern.matches? #"^[0-9]*$" s))
2019-05-04 10:41:33 +03:00
(doc alpha? "checks whether a string contains only alphabetical characters (a-Z).")
2018-03-13 13:59:48 +03:00
(defn alpha? [s]
(Pattern.matches? #"^[\u\l]*$" s))
2019-05-04 10:41:33 +03:00
(doc alphanum? "checks whether a string is alphanumerical.")
2018-03-13 13:59:48 +03:00
(defn alphanum? [s]
(Pattern.matches? #"^[\w]*$" s))
2019-05-04 10:41:33 +03:00
(doc hex? "checks whether a string is hexadecimal.")
2018-03-13 13:59:48 +03:00
(defn hex? [s]
(Pattern.matches? #"^[\x]*$" s))
2019-05-04 10:41:33 +03:00
(doc trim-left "trims whitespace from the left of a string.")
2018-03-13 13:59:48 +03:00
(defn trim-left [s]
(Pattern.substitute #"^\s+" s "" 1))
2019-05-04 10:41:33 +03:00
(doc trim-right "trims whitespace from the right of a string.")
2018-03-13 13:59:48 +03:00
(defn trim-right [s]
(Pattern.substitute #"\s+$" s "" 1))
2019-05-04 10:41:33 +03:00
(doc trim "trims whitespace from both sides of a string.")
2018-03-13 13:59:48 +03:00
(defn trim [s]
(trim-left &(trim-right s)))
2019-05-04 10:41:33 +03:00
(doc chomp "trims a newline from the end of a string.")
2018-03-13 13:59:48 +03:00
(defn chomp [s]
(Pattern.substitute #"\r$" &(Pattern.substitute #"\n$" s "" 1) "" 1))
2018-03-13 13:59:48 +03:00
2019-05-04 10:41:33 +03:00
(doc collapse-whitespace "collapses groups of whitespace into single spaces.")
2018-03-13 13:59:48 +03:00
(defn collapse-whitespace [s]
(Pattern.substitute #"\s+" s " " -1))
2019-05-04 10:41:33 +03:00
(doc split-by "splits a string by separators.")
(defn split-by [s separators]
(let-do [pat (Pattern.from-chars separators)
idx (Pattern.find-all &pat s)
lidx (Array.length &idx)
result (Array.allocate (Int.inc lidx))]
(Array.aset-uninitialized! &result 0
2020-02-11 11:09:30 +03:00
(slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s))))
(for [i 0 (Int.dec (Array.length &idx))]
(Array.aset-uninitialized! &result (Int.inc i)
2020-02-11 11:09:30 +03:00
(slice s (Int.inc @(Array.unsafe-nth &idx i)) @(Array.unsafe-nth &idx (Int.inc i)))))
(when (> lidx 0)
(Array.aset-uninitialized! &result lidx
2020-02-11 11:09:30 +03:00
(suffix s (Int.inc @(Array.unsafe-nth &idx (Int.dec lidx))))))
result))
2019-05-04 10:41:33 +03:00
(doc words "splits a string into words.")
(defn words [s]
(split-by s &[\tab \space \newline]))
2019-05-04 10:41:33 +03:00
(doc lines "splits a string into lines.")
(defn lines [s]
(split-by s &[\newline]))
2018-03-13 13:59:48 +03:00
)