mirror of
https://github.com/carp-lang/Carp.git
synced 2024-10-26 13:51:50 +03:00
b3ae93bfc4
This fixes an issue where by types with fields registered in modules weren't emitted with their module paths. This makes the behavior between RegisterTypeWithoutFields and RegisterTypeWithFields the same. They both account for the current module path in which the type is registered and output the emitted typedef appropriately. This fix also eliminates the need for a workaround in core/Pattern.carp. We previously had to register MatchResult with an override because of the old behavior, but now the override is no longer needed (since MatchResult is defined as PatternMatchResult in its source header). The call (register MatchResult) within (defmodule Pattern) emits "PatternMatchResult" by default since we now account for module paths for registered types.
208 lines
7.8 KiB
Plaintext
208 lines
7.8 KiB
Plaintext
(system-include "carp_pattern.h")
|
||
|
||
(doc Pattern "is a data type for pattern matching, similar to, but not the same
|
||
as, Regular Expressions. [See the docs for more
|
||
information](../LanguageGuide.html#patterns).")
|
||
(defmodule Pattern
|
||
(register-type MatchResult [start Int, end Int])
|
||
(defmodule MatchResult
|
||
(defn ref-str [ref-matchres]
|
||
(fmt "(MatchResult start=%d end=%d)"
|
||
(MatchResult.start ref-matchres)
|
||
(MatchResult.end ref-matchres) ))
|
||
(implements str Pattern.MatchResult.ref-str)
|
||
(implements prn Pattern.MatchResult.ref-str)
|
||
(defn str [matchres]
|
||
(Pattern.MatchResult.ref-str &matchres) )
|
||
(implements str Pattern.MatchResult.str)
|
||
(implements prn Pattern.MatchResult.str)
|
||
)
|
||
|
||
(defn non-match? [match-res]
|
||
(or (Int.< @(MatchResult.start match-res) 0)
|
||
(Int.< @(MatchResult.end match-res) 0) ))
|
||
(defn extract [match-res data]
|
||
(if (non-match? match-res)
|
||
(Maybe.Nothing)
|
||
(Maybe.Just (String.slice data @(MatchResult.start match-res)
|
||
@(MatchResult.end match-res) ))))
|
||
|
||
(doc match-from "returns start and end indizes of the first match after start-pos. Note that the end index points to the 1st character _after_ the match (like all slice functions).")
|
||
(register match-from (Fn [&Pattern &String Int] Pattern.MatchResult))
|
||
(doc match "returns start and end indizes of the first match after the start of the string. Note that the end index points to the 1st character _after_ the match (like all slice functions).")
|
||
(defn match [pattern data]
|
||
(match-from pattern data 0) )
|
||
|
||
(doc find "finds the index of a pattern in a string.
|
||
|
||
Returns `-1` if it doesn’t find a matching pattern.")
|
||
(defn find [pattern data]
|
||
@(Pattern.MatchResult.start &(Pattern.match pattern data)) )
|
||
|
||
(doc find-all "finds all indices of a pattern in a string. The patterns may _not_ overlap.
|
||
|
||
Returns `[]` if it doesn’t find a matching pattern.")
|
||
(defn find-all-matches [pattern data]
|
||
(let-do [result []
|
||
stop (String.length data)
|
||
found (match-from pattern data 0)
|
||
start @(MatchResult.end &found) ]
|
||
(while-do (and (Int.<= start stop)
|
||
(not (non-match? &found)) )
|
||
(set! result (Array.push-back result found))
|
||
(set! found (match-from pattern data start))
|
||
(set! start @(MatchResult.end &found)) )
|
||
result ))
|
||
(defn find-all [pattern data]
|
||
(Array.copy-map
|
||
&(fn [m] @(MatchResult.start m))
|
||
&(find-all-matches pattern data) ))
|
||
|
||
|
||
(doc match-groups "finds the match groups of the first match of a pattern in
|
||
a string.
|
||
|
||
Returns `[]` if it doesn’t find a matching pattern.")
|
||
(register match-groups (Fn [&Pattern &String] (Array String)))
|
||
(doc match-str "finds the first match of a pattern in a string.
|
||
|
||
Returns an empty string if it doesn’t find a matching pattern.")
|
||
(defn match-str [pattern data]
|
||
(Maybe.from (Pattern.extract &(Pattern.match pattern data) data) @"") )
|
||
|
||
(doc match-all-groups "finds all match groups of a pattern in a string as a nested
|
||
array.
|
||
|
||
Returns `[]` if it doesn’t find a matching pattern.")
|
||
(register match-all-groups (Fn [&Pattern &String] (Array (Array String))))
|
||
(doc substitute "finds all matches of a pattern in a string and replaces it
|
||
by another pattern `n` times.
|
||
|
||
The substitute pattern can reference the original pattern by match group
|
||
indices, such as `\1`. This means that backslashes need to be double escaped.
|
||
|
||
If you want to replace all occurrences of the pattern, use `-1`.")
|
||
(register substitute (Fn [&Pattern &String &String Int] String))
|
||
(doc matches? "checks whether a pattern matches a string.")
|
||
(defn matches? [pat s] (/= (find pat s) -1))
|
||
|
||
(register str (Fn [&Pattern] String))
|
||
(implements str Pattern.str)
|
||
(register prn (Fn [&Pattern] String))
|
||
(implements prn Pattern.prn)
|
||
|
||
(register init (Fn [&String] Pattern))
|
||
(register = (Fn [&Pattern &Pattern] Bool))
|
||
(implements = Pattern.=)
|
||
(register delete (Fn [Pattern] ()))
|
||
(implements delete Pattern.delete)
|
||
(register copy (Fn [&Pattern] Pattern))
|
||
(implements copy Pattern.copy)
|
||
|
||
(doc from-chars "creates a pattern that matches a group of characters from a
|
||
list of those characters.")
|
||
(defn from-chars [chars]
|
||
(Pattern.init &(str* @"[" (String.from-chars chars) @"]")))
|
||
|
||
(defn global-match-str [pattern data]
|
||
(Array.copy-map
|
||
&(fn [m] (Maybe.unsafe-from (extract m data)))
|
||
&(find-all-matches pattern data)))
|
||
|
||
|
||
(doc split "splits a string by a pattern.")
|
||
(defn split [p s]
|
||
(let-do [idx (find-all p s)
|
||
strs (global-match-str p s)
|
||
lidx (Array.length &idx)
|
||
result (Array.allocate (Int.inc lidx))]
|
||
(Array.aset-uninitialized! &result 0
|
||
(slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s))))
|
||
(for [i 0 (Int.dec (Array.length &idx))]
|
||
(let [plen (length (Array.unsafe-nth &strs i))]
|
||
(Array.aset-uninitialized! &result (Int.inc i)
|
||
(slice s (+ @(Array.unsafe-nth &idx i) plen)
|
||
@(Array.unsafe-nth &idx (Int.inc i))))))
|
||
(when (> lidx 0)
|
||
(let [plen (length (Array.unsafe-nth &strs (Int.dec lidx)))]
|
||
(Array.aset-uninitialized! &result lidx
|
||
(suffix s (+ @(Array.unsafe-nth &idx (Int.dec lidx))
|
||
plen)))))
|
||
result))
|
||
|
||
)
|
||
|
||
(defmodule String
|
||
(doc in? "checks whether a string contains another string.")
|
||
(defn in? [s sub]
|
||
(Pattern.matches? &(Pattern.init sub) s))
|
||
|
||
(doc upper? "checks whether a string is all uppercase.")
|
||
(defn upper? [s]
|
||
(Pattern.matches? #"^[\u\s\p]*$" s))
|
||
|
||
(doc lower? "checks whether a string is all lowercase.")
|
||
(defn lower? [s]
|
||
(Pattern.matches? #"^[\l\s\p]*$" s))
|
||
|
||
(doc num? "checks whether a string is numerical.")
|
||
(defn num? [s]
|
||
(Pattern.matches? #"^[0-9]*$" s))
|
||
|
||
(doc alpha? "checks whether a string contains only alphabetical characters (a-Z).")
|
||
(defn alpha? [s]
|
||
(Pattern.matches? #"^[\u\l]*$" s))
|
||
|
||
(doc alphanum? "checks whether a string is alphanumerical.")
|
||
(defn alphanum? [s]
|
||
(Pattern.matches? #"^[\w]*$" s))
|
||
|
||
(doc hex? "checks whether a string is hexadecimal.")
|
||
(defn hex? [s]
|
||
(Pattern.matches? #"^[\x]*$" s))
|
||
|
||
(doc trim-left "trims whitespace from the left of a string.")
|
||
(defn trim-left [s]
|
||
(Pattern.substitute #"^\s+" s "" 1))
|
||
|
||
(doc trim-right "trims whitespace from the right of a string.")
|
||
(defn trim-right [s]
|
||
(Pattern.substitute #"\s+$" s "" 1))
|
||
|
||
(doc trim "trims whitespace from both sides of a string.")
|
||
(defn trim [s]
|
||
(trim-left &(trim-right s)))
|
||
|
||
(doc chomp "trims a newline from the end of a string.")
|
||
(defn chomp [s]
|
||
(Pattern.substitute #"\r$" &(Pattern.substitute #"\n$" s "" 1) "" 1))
|
||
|
||
(doc collapse-whitespace "collapses groups of whitespace into single spaces.")
|
||
(defn collapse-whitespace [s]
|
||
(Pattern.substitute #"\s+" s " " -1))
|
||
|
||
(doc split-by "splits a string by separators.")
|
||
(defn split-by [s separators]
|
||
(let-do [pat (Pattern.from-chars separators)
|
||
idx (Pattern.find-all &pat s)
|
||
lidx (Array.length &idx)
|
||
result (Array.allocate (Int.inc lidx))]
|
||
(Array.aset-uninitialized! &result 0
|
||
(slice s 0 (if (> lidx 0) @(Array.unsafe-nth &idx 0) (length s))))
|
||
(for [i 0 (Int.dec (Array.length &idx))]
|
||
(Array.aset-uninitialized! &result (Int.inc i)
|
||
(slice s (Int.inc @(Array.unsafe-nth &idx i)) @(Array.unsafe-nth &idx (Int.inc i)))))
|
||
(when (> lidx 0)
|
||
(Array.aset-uninitialized! &result lidx
|
||
(suffix s (Int.inc @(Array.unsafe-nth &idx (Int.dec lidx))))))
|
||
result))
|
||
|
||
(doc words "splits a string into words.")
|
||
(defn words [s]
|
||
(Array.endo-filter &(fn [s] (not (empty? s))) (split-by s &[\tab \space \newline])))
|
||
|
||
(doc lines "splits a string into lines.")
|
||
(defn lines [s]
|
||
(split-by s &[\newline]))
|
||
)
|