Carp/core/Pattern.carp
Scott Olsen b3ae93bfc4
fix: ensure registered types with fields emit path (#1364)
This fixes an issue where by types with fields registered in modules
weren't emitted with their module paths. This makes the behavior between
RegisterTypeWithoutFields and RegisterTypeWithFields the same. They both
account for the current module path in which the type is registered and
output the emitted typedef appropriately.

This fix also eliminates the need for a workaround in core/Pattern.carp.
We previously had to register MatchResult with an override because of
the old behavior, but now the override is no longer needed (since
MatchResult is defined as PatternMatchResult in its source header).  The
call (register MatchResult) within (defmodule Pattern) emits
"PatternMatchResult" by default since we now account for module paths
for registered types.
2021-12-16 21:31:16 +01:00

208 lines
7.8 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

(system-include "carp_pattern.h")
(doc Pattern "is a data type for pattern matching, similar to, but not the same
as, Regular Expressions. [See the docs for more
information](../LanguageGuide.html#patterns).")
(defmodule Pattern
(register-type MatchResult [start Int, end Int])
(defmodule MatchResult
(defn ref-str [ref-matchres]
(fmt "(MatchResult start=%d end=%d)"
(MatchResult.start ref-matchres)
(MatchResult.end ref-matchres) ))
(implements str Pattern.MatchResult.ref-str)
(implements prn Pattern.MatchResult.ref-str)
(defn str [matchres]
(Pattern.MatchResult.ref-str &matchres) )
(implements str Pattern.MatchResult.str)
(implements prn Pattern.MatchResult.str)
)
(defn non-match? [match-res]
(or (Int.< @(MatchResult.start match-res) 0)
(Int.< @(MatchResult.end match-res) 0) ))
(defn extract [match-res data]
(if (non-match? match-res)
(Maybe.Nothing)
(Maybe.Just (String.slice data @(MatchResult.start match-res)
@(MatchResult.end match-res) ))))
(doc match-from "returns start and end indizes of the first match after start-pos. Note that the end index points to the 1st character _after_ the match (like all slice functions).")
(register match-from (Fn [&Pattern &String Int] Pattern.MatchResult))
(doc match "returns start and end indizes of the first match after the start of the string. Note that the end index points to the 1st character _after_ the match (like all slice functions).")
(defn match [pattern data]
(match-from pattern data 0) )
(doc find "finds the index of a pattern in a string.
Returns `-1` if it doesnt find a matching pattern.")
(defn find [pattern data]
@(Pattern.MatchResult.start &(Pattern.match pattern data)) )
(doc find-all "finds all indices of a pattern in a string. The patterns may _not_ overlap.
Returns `[]` if it doesnt find a matching pattern.")
(defn find-all-matches [pattern data]
(let-do [result []
stop (String.length data)
found (match-from pattern data 0)
start @(MatchResult.end &found) ]
(while-do (and (Int.<= start stop)
(not (non-match? &found)) )
(set! result (Array.push-back result found))
(set! found (match-from pattern data start))
(set! start @(MatchResult.end &found)) )
result ))
(defn find-all [pattern data]
(Array.copy-map
&(fn [m] @(MatchResult.start m))
&(find-all-matches pattern data) ))
(doc match-groups "finds the match groups of the first match of a pattern in
a string.
Returns `[]` if it doesnt find a matching pattern.")
(register match-groups (Fn [&Pattern &String] (Array String)))
(doc match-str "finds the first match of a pattern in a string.
Returns an empty string if it doesnt find a matching pattern.")
(defn match-str [pattern data]
(Maybe.from (Pattern.extract &(Pattern.match pattern data) data) @"") )
(doc match-all-groups "finds all match groups of a pattern in a string as a nested
array.
Returns `[]` if it doesnt find a matching pattern.")
(register match-all-groups (Fn [&Pattern &String] (Array (Array String))))
(doc substitute "finds all matches of a pattern in a string and replaces it
by another pattern `n` times.
The substitute pattern can reference the original pattern by match group
indices, such as `\1`. This means that backslashes need to be double escaped.
If you want to replace all occurrences of the pattern, use `-1`.")
(register substitute (Fn [&Pattern &String &String Int] String))
(doc matches? "checks whether a pattern matches a string.")
(defn matches? [pat s] (/= (find pat s) -1))
(register str (Fn [&Pattern] String))
(implements str Pattern.str)
(register prn (Fn [&Pattern] String))
(implements prn Pattern.prn)
(register init (Fn [&String] Pattern))
(register = (Fn [&Pattern &Pattern] Bool))
(implements = Pattern.=)
(register delete (Fn [Pattern] ()))
(implements delete Pattern.delete)
(register copy (Fn [&Pattern] Pattern))
(implements copy Pattern.copy)
(doc from-chars "creates a pattern that matches a group of characters from a
list of those characters.")
(defn from-chars [chars]
(Pattern.init &(str* @"[" (String.from-chars chars) @"]")))
(defn global-match-str [pattern data]
(Array.copy-map
&(fn [m] (Maybe.unsafe-from (extract m data)))
&(find-all-matches pattern data)))
(doc split "splits a string by a pattern.")
(defn split [p s]
(let-do [idx (find-all p s)
strs (global-match-str p s)
lidx (Array.length &idx)
result (Array.allocate (Int.inc lidx))]
(Array.aset-uninitialized! &result 0
(slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s))))
(for [i 0 (Int.dec (Array.length &idx))]
(let [plen (length (Array.unsafe-nth &strs i))]
(Array.aset-uninitialized! &result (Int.inc i)
(slice s (+ @(Array.unsafe-nth &idx i) plen)
@(Array.unsafe-nth &idx (Int.inc i))))))
(when (> lidx 0)
(let [plen (length (Array.unsafe-nth &strs (Int.dec lidx)))]
(Array.aset-uninitialized! &result lidx
(suffix s (+ @(Array.unsafe-nth &idx (Int.dec lidx))
plen)))))
result))
)
(defmodule String
(doc in? "checks whether a string contains another string.")
(defn in? [s sub]
(Pattern.matches? &(Pattern.init sub) s))
(doc upper? "checks whether a string is all uppercase.")
(defn upper? [s]
(Pattern.matches? #"^[\u\s\p]*$" s))
(doc lower? "checks whether a string is all lowercase.")
(defn lower? [s]
(Pattern.matches? #"^[\l\s\p]*$" s))
(doc num? "checks whether a string is numerical.")
(defn num? [s]
(Pattern.matches? #"^[0-9]*$" s))
(doc alpha? "checks whether a string contains only alphabetical characters (a-Z).")
(defn alpha? [s]
(Pattern.matches? #"^[\u\l]*$" s))
(doc alphanum? "checks whether a string is alphanumerical.")
(defn alphanum? [s]
(Pattern.matches? #"^[\w]*$" s))
(doc hex? "checks whether a string is hexadecimal.")
(defn hex? [s]
(Pattern.matches? #"^[\x]*$" s))
(doc trim-left "trims whitespace from the left of a string.")
(defn trim-left [s]
(Pattern.substitute #"^\s+" s "" 1))
(doc trim-right "trims whitespace from the right of a string.")
(defn trim-right [s]
(Pattern.substitute #"\s+$" s "" 1))
(doc trim "trims whitespace from both sides of a string.")
(defn trim [s]
(trim-left &(trim-right s)))
(doc chomp "trims a newline from the end of a string.")
(defn chomp [s]
(Pattern.substitute #"\r$" &(Pattern.substitute #"\n$" s "" 1) "" 1))
(doc collapse-whitespace "collapses groups of whitespace into single spaces.")
(defn collapse-whitespace [s]
(Pattern.substitute #"\s+" s " " -1))
(doc split-by "splits a string by separators.")
(defn split-by [s separators]
(let-do [pat (Pattern.from-chars separators)
idx (Pattern.find-all &pat s)
lidx (Array.length &idx)
result (Array.allocate (Int.inc lidx))]
(Array.aset-uninitialized! &result 0
(slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s))))
(for [i 0 (Int.dec (Array.length &idx))]
(Array.aset-uninitialized! &result (Int.inc i)
(slice s (Int.inc @(Array.unsafe-nth &idx i)) @(Array.unsafe-nth &idx (Int.inc i)))))
(when (> lidx 0)
(Array.aset-uninitialized! &result lidx
(suffix s (Int.inc @(Array.unsafe-nth &idx (Int.dec lidx))))))
result))
(doc words "splits a string into words.")
(defn words [s]
(Array.endo-filter &(fn [s] (not (empty? s))) (split-by s &[\tab \space \newline])))
(doc lines "splits a string into lines.")
(defn lines [s]
(split-by s &[\newline]))
)