(system-include "carp_pattern.h") (doc Pattern "is a data type for pattern matching, similar to, but not the same as, Regular Expressions. [See the docs for more information](../LanguageGuide.html#patterns).") (defmodule Pattern (register-type MatchResult [start Int, end Int]) (defmodule MatchResult (defn ref-str [ref-matchres] (fmt "(MatchResult start=%d end=%d)" (MatchResult.start ref-matchres) (MatchResult.end ref-matchres) )) (implements str Pattern.MatchResult.ref-str) (implements prn Pattern.MatchResult.ref-str) (defn str [matchres] (Pattern.MatchResult.ref-str &matchres) ) (implements str Pattern.MatchResult.str) (implements prn Pattern.MatchResult.str) ) (defn non-match? [match-res] (or (Int.< @(MatchResult.start match-res) 0) (Int.< @(MatchResult.end match-res) 0) )) (defn extract [match-res data] (if (non-match? match-res) (Maybe.Nothing) (Maybe.Just (String.slice data @(MatchResult.start match-res) @(MatchResult.end match-res) )))) (doc match-from "returns start and end indizes of the first match after start-pos. Note that the end index points to the 1st character _after_ the match (like all slice functions).") (register match-from (Fn [&Pattern &String Int] Pattern.MatchResult)) (doc match "returns start and end indizes of the first match after the start of the string. Note that the end index points to the 1st character _after_ the match (like all slice functions).") (defn match [pattern data] (match-from pattern data 0) ) (doc find "finds the index of a pattern in a string. Returns `-1` if it doesn’t find a matching pattern.") (defn find [pattern data] @(Pattern.MatchResult.start &(Pattern.match pattern data)) ) (doc find-all "finds all indices of a pattern in a string. The patterns may _not_ overlap. Returns `[]` if it doesn’t find a matching pattern.") (defn find-all-matches [pattern data] (let-do [result [] stop (String.length data) found (match-from pattern data 0) start @(MatchResult.end &found) ] (while-do (and (Int.<= start stop) (not (non-match? &found)) ) (set! result (Array.push-back result found)) (set! found (match-from pattern data start)) (set! start @(MatchResult.end &found)) ) result )) (defn find-all [pattern data] (Array.copy-map &(fn [m] @(MatchResult.start m)) &(find-all-matches pattern data) )) (doc match-groups "finds the match groups of the first match of a pattern in a string. Returns `[]` if it doesn’t find a matching pattern.") (register match-groups (Fn [&Pattern &String] (Array String))) (doc match-str "finds the first match of a pattern in a string. Returns an empty string if it doesn’t find a matching pattern.") (defn match-str [pattern data] (Maybe.from (Pattern.extract &(Pattern.match pattern data) data) @"") ) (doc match-all-groups "finds all match groups of a pattern in a string as a nested array. Returns `[]` if it doesn’t find a matching pattern.") (register match-all-groups (Fn [&Pattern &String] (Array (Array String)))) (doc substitute "finds all matches of a pattern in a string and replaces it by another pattern `n` times. The substitute pattern can reference the original pattern by match group indices, such as `\1`. This means that backslashes need to be double escaped. If you want to replace all occurrences of the pattern, use `-1`.") (register substitute (Fn [&Pattern &String &String Int] String)) (doc matches? "checks whether a pattern matches a string.") (defn matches? [pat s] (/= (find pat s) -1)) (register str (Fn [&Pattern] String)) (implements str Pattern.str) (register prn (Fn [&Pattern] String)) (implements prn Pattern.prn) (register init (Fn [&String] Pattern)) (register = (Fn [&Pattern &Pattern] Bool)) (implements = Pattern.=) (register delete (Fn [Pattern] ())) (implements delete Pattern.delete) (register copy (Fn [&Pattern] Pattern)) (implements copy Pattern.copy) (doc from-chars "creates a pattern that matches a group of characters from a list of those characters.") (defn from-chars [chars] (Pattern.init &(str* @"[" (String.from-chars chars) @"]"))) (defn global-match-str [pattern data] (Array.copy-map &(fn [m] (Maybe.unsafe-from (extract m data))) &(find-all-matches pattern data))) (doc split "splits a string by a pattern.") (defn split [p s] (let-do [idx (find-all p s) strs (global-match-str p s) lidx (Array.length &idx) result (Array.allocate (Int.inc lidx))] (Array.aset-uninitialized! &result 0 (slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s)))) (for [i 0 (Int.dec (Array.length &idx))] (let [plen (length (Array.unsafe-nth &strs i))] (Array.aset-uninitialized! &result (Int.inc i) (slice s (+ @(Array.unsafe-nth &idx i) plen) @(Array.unsafe-nth &idx (Int.inc i)))))) (when (> lidx 0) (let [plen (length (Array.unsafe-nth &strs (Int.dec lidx)))] (Array.aset-uninitialized! &result lidx (suffix s (+ @(Array.unsafe-nth &idx (Int.dec lidx)) plen))))) result)) ) (defmodule String (doc in? "checks whether a string contains another string.") (defn in? [s sub] (Pattern.matches? &(Pattern.init sub) s)) (doc upper? "checks whether a string is all uppercase.") (defn upper? [s] (Pattern.matches? #"^[\u\s\p]*$" s)) (doc lower? "checks whether a string is all lowercase.") (defn lower? [s] (Pattern.matches? #"^[\l\s\p]*$" s)) (doc num? "checks whether a string is numerical.") (defn num? [s] (Pattern.matches? #"^[0-9]*$" s)) (doc alpha? "checks whether a string contains only alphabetical characters (a-Z).") (defn alpha? [s] (Pattern.matches? #"^[\u\l]*$" s)) (doc alphanum? "checks whether a string is alphanumerical.") (defn alphanum? [s] (Pattern.matches? #"^[\w]*$" s)) (doc hex? "checks whether a string is hexadecimal.") (defn hex? [s] (Pattern.matches? #"^[\x]*$" s)) (doc trim-left "trims whitespace from the left of a string.") (defn trim-left [s] (Pattern.substitute #"^\s+" s "" 1)) (doc trim-right "trims whitespace from the right of a string.") (defn trim-right [s] (Pattern.substitute #"\s+$" s "" 1)) (doc trim "trims whitespace from both sides of a string.") (defn trim [s] (trim-left &(trim-right s))) (doc chomp "trims a newline from the end of a string.") (defn chomp [s] (Pattern.substitute #"\r$" &(Pattern.substitute #"\n$" s "" 1) "" 1)) (doc collapse-whitespace "collapses groups of whitespace into single spaces.") (defn collapse-whitespace [s] (Pattern.substitute #"\s+" s " " -1)) (doc split-by "splits a string by separators.") (defn split-by [s separators] (let-do [pat (Pattern.from-chars separators) idx (Pattern.find-all &pat s) lidx (Array.length &idx) result (Array.allocate (Int.inc lidx))] (Array.aset-uninitialized! &result 0 (slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s)))) (for [i 0 (Int.dec (Array.length &idx))] (Array.aset-uninitialized! &result (Int.inc i) (slice s (Int.inc @(Array.unsafe-nth &idx i)) @(Array.unsafe-nth &idx (Int.inc i))))) (when (> lidx 0) (Array.aset-uninitialized! &result lidx (suffix s (Int.inc @(Array.unsafe-nth &idx (Int.dec lidx)))))) result)) (doc words "splits a string into words.") (defn words [s] (Array.endo-filter &(fn [s] (not (empty? s))) (split-by s &[\tab \space \newline]))) (doc lines "splits a string into lines.") (defn lines [s] (split-by s &[\newline])) )