mirror of
https://github.com/carp-lang/Carp.git
synced 2024-10-26 13:51:50 +03:00
209 lines
7.8 KiB
Plaintext
209 lines
7.8 KiB
Plaintext
(system-include "carp_pattern.h")
|
||
|
||
(doc Pattern "is a data type for pattern matching, similar to, but not the same
|
||
as, Regular Expressions. [See the docs for more
|
||
information](../LanguageGuide.html#patterns).")
|
||
(defmodule Pattern
|
||
|
||
(register-type MatchResult "PatternMatchResult" [start Int, end Int])
|
||
(defmodule MatchResult
|
||
(defn ref-str [ref-matchres]
|
||
(fmt "(MatchResult start=%d end=%d)"
|
||
(MatchResult.start ref-matchres)
|
||
(MatchResult.end ref-matchres) ))
|
||
(implements str Pattern.MatchResult.ref-str)
|
||
(implements prn Pattern.MatchResult.ref-str)
|
||
(defn str [matchres]
|
||
(Pattern.MatchResult.ref-str &matchres) )
|
||
(implements str Pattern.MatchResult.str)
|
||
(implements prn Pattern.MatchResult.str)
|
||
)
|
||
|
||
(defn non-match? [match-res]
|
||
(or (Int.< @(MatchResult.start match-res) 0)
|
||
(Int.< @(MatchResult.end match-res) 0) ))
|
||
(defn extract [match-res data]
|
||
(if (non-match? match-res)
|
||
(Maybe.Nothing)
|
||
(Maybe.Just (String.slice data @(MatchResult.start match-res)
|
||
@(MatchResult.end match-res) ))))
|
||
|
||
(doc match-from "returns start and end indizes of the first match after start-pos. Note that the end index points to the 1st character _after_ the match (like all slice functions).")
|
||
(register match-from (Fn [&Pattern &String Int] Pattern.MatchResult))
|
||
(doc match "returns start and end indizes of the first match after the start of the string. Note that the end index points to the 1st character _after_ the match (like all slice functions).")
|
||
(defn match [pattern data]
|
||
(match-from pattern data 0) )
|
||
|
||
(doc find "finds the index of a pattern in a string.
|
||
|
||
Returns `-1` if it doesn’t find a matching pattern.")
|
||
(defn find [pattern data]
|
||
@(Pattern.MatchResult.start &(Pattern.match pattern data)) )
|
||
|
||
(doc find-all "finds all indices of a pattern in a string. The patterns may _not_ overlap.
|
||
|
||
Returns `[]` if it doesn’t find a matching pattern.")
|
||
(defn find-all-matches [pattern data]
|
||
(let-do [result []
|
||
stop (String.length data)
|
||
found (match-from pattern data 0)
|
||
start @(MatchResult.end &found) ]
|
||
(while-do (and (Int.<= start stop)
|
||
(not (non-match? &found)) )
|
||
(set! result (Array.push-back result found))
|
||
(set! found (match-from pattern data start))
|
||
(set! start @(MatchResult.end &found)) )
|
||
result ))
|
||
(defn find-all [pattern data]
|
||
(Array.copy-map
|
||
&(fn [m] @(MatchResult.start m))
|
||
&(find-all-matches pattern data) ))
|
||
|
||
|
||
(doc match-groups "finds the match groups of the first match of a pattern in
|
||
a string.
|
||
|
||
Returns `[]` if it doesn’t find a matching pattern.")
|
||
(register match-groups (Fn [&Pattern &String] (Array String)))
|
||
(doc match-str "finds the first match of a pattern in a string.
|
||
|
||
Returns an empty string if it doesn’t find a matching pattern.")
|
||
(defn match-str [pattern data]
|
||
(Maybe.from (Pattern.extract &(Pattern.match pattern data) data) @"") )
|
||
|
||
(doc match-all-groups "finds all match groups of a pattern in a string as a nested
|
||
array.
|
||
|
||
Returns `[]` if it doesn’t find a matching pattern.")
|
||
(register match-all-groups (Fn [&Pattern &String] (Array (Array String))))
|
||
(doc substitute "finds all matches of a pattern in a string and replaces it
|
||
by another pattern `n` times.
|
||
|
||
The substitute pattern can reference the original pattern by match group
|
||
indices, such as `\1`. This means that backslashes need to be double escaped.
|
||
|
||
If you want to replace all occurrences of the pattern, use `-1`.")
|
||
(register substitute (Fn [&Pattern &String &String Int] String))
|
||
(doc matches? "checks whether a pattern matches a string.")
|
||
(defn matches? [pat s] (/= (find pat s) -1))
|
||
|
||
(register str (Fn [&Pattern] String))
|
||
(implements str Pattern.str)
|
||
(register prn (Fn [&Pattern] String))
|
||
(implements prn Pattern.prn)
|
||
|
||
(register init (Fn [&String] Pattern))
|
||
(register = (Fn [&Pattern &Pattern] Bool))
|
||
(implements = Pattern.=)
|
||
(register delete (Fn [Pattern] ()))
|
||
(implements delete Pattern.delete)
|
||
(register copy (Fn [&Pattern] Pattern))
|
||
(implements copy Pattern.copy)
|
||
|
||
(doc from-chars "creates a pattern that matches a group of characters from a
|
||
list of those characters.")
|
||
(defn from-chars [chars]
|
||
(Pattern.init &(str* @"[" (String.from-chars chars) @"]")))
|
||
|
||
(defn global-match-str [pattern data]
|
||
(Array.copy-map
|
||
&(fn [m] (Maybe.unsafe-from (extract m data)))
|
||
&(find-all-matches pattern data)))
|
||
|
||
|
||
(doc split "splits a string by a pattern.")
|
||
(defn split [p s]
|
||
(let-do [idx (find-all p s)
|
||
strs (global-match-str p s)
|
||
lidx (Array.length &idx)
|
||
result (Array.allocate (Int.inc lidx))]
|
||
(Array.aset-uninitialized! &result 0
|
||
(slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s))))
|
||
(for [i 0 (Int.dec (Array.length &idx))]
|
||
(let [plen (length (Array.unsafe-nth &strs i))]
|
||
(Array.aset-uninitialized! &result (Int.inc i)
|
||
(slice s (+ @(Array.unsafe-nth &idx i) plen)
|
||
@(Array.unsafe-nth &idx (Int.inc i))))))
|
||
(when (> lidx 0)
|
||
(let [plen (length (Array.unsafe-nth &strs (Int.dec lidx)))]
|
||
(Array.aset-uninitialized! &result lidx
|
||
(suffix s (+ @(Array.unsafe-nth &idx (Int.dec lidx))
|
||
plen)))))
|
||
result))
|
||
|
||
)
|
||
|
||
(defmodule String
|
||
(doc in? "checks whether a string contains another string.")
|
||
(defn in? [s sub]
|
||
(Pattern.matches? &(Pattern.init sub) s))
|
||
|
||
(doc upper? "checks whether a string is all uppercase.")
|
||
(defn upper? [s]
|
||
(Pattern.matches? #"^[\u\s\p]*$" s))
|
||
|
||
(doc lower? "checks whether a string is all lowercase.")
|
||
(defn lower? [s]
|
||
(Pattern.matches? #"^[\l\s\p]*$" s))
|
||
|
||
(doc num? "checks whether a string is numerical.")
|
||
(defn num? [s]
|
||
(Pattern.matches? #"^[0-9]*$" s))
|
||
|
||
(doc alpha? "checks whether a string contains only alphabetical characters (a-Z).")
|
||
(defn alpha? [s]
|
||
(Pattern.matches? #"^[\u\l]*$" s))
|
||
|
||
(doc alphanum? "checks whether a string is alphanumerical.")
|
||
(defn alphanum? [s]
|
||
(Pattern.matches? #"^[\w]*$" s))
|
||
|
||
(doc hex? "checks whether a string is hexadecimal.")
|
||
(defn hex? [s]
|
||
(Pattern.matches? #"^[\x]*$" s))
|
||
|
||
(doc trim-left "trims whitespace from the left of a string.")
|
||
(defn trim-left [s]
|
||
(Pattern.substitute #"^\s+" s "" 1))
|
||
|
||
(doc trim-right "trims whitespace from the right of a string.")
|
||
(defn trim-right [s]
|
||
(Pattern.substitute #"\s+$" s "" 1))
|
||
|
||
(doc trim "trims whitespace from both sides of a string.")
|
||
(defn trim [s]
|
||
(trim-left &(trim-right s)))
|
||
|
||
(doc chomp "trims a newline from the end of a string.")
|
||
(defn chomp [s]
|
||
(Pattern.substitute #"\r$" &(Pattern.substitute #"\n$" s "" 1) "" 1))
|
||
|
||
(doc collapse-whitespace "collapses groups of whitespace into single spaces.")
|
||
(defn collapse-whitespace [s]
|
||
(Pattern.substitute #"\s+" s " " -1))
|
||
|
||
(doc split-by "splits a string by separators.")
|
||
(defn split-by [s separators]
|
||
(let-do [pat (Pattern.from-chars separators)
|
||
idx (Pattern.find-all &pat s)
|
||
lidx (Array.length &idx)
|
||
result (Array.allocate (Int.inc lidx))]
|
||
(Array.aset-uninitialized! &result 0
|
||
(slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s))))
|
||
(for [i 0 (Int.dec (Array.length &idx))]
|
||
(Array.aset-uninitialized! &result (Int.inc i)
|
||
(slice s (Int.inc @(Array.unsafe-nth &idx i)) @(Array.unsafe-nth &idx (Int.inc i)))))
|
||
(when (> lidx 0)
|
||
(Array.aset-uninitialized! &result lidx
|
||
(suffix s (Int.inc @(Array.unsafe-nth &idx (Int.dec lidx))))))
|
||
result))
|
||
|
||
(doc words "splits a string into words.")
|
||
(defn words [s]
|
||
(Array.endo-filter &(fn [s] (not (empty? s))) (split-by s &[\tab \space \newline])))
|
||
|
||
(doc lines "splits a string into lines.")
|
||
(defn lines [s]
|
||
(split-by s &[\newline]))
|
||
)
|