mirror of
https://github.com/enso-org/enso.git
synced 2024-11-26 08:52:58 +03:00
Wip/gmt/match find only text (#5721)
Rename is_match + match to match + find (respectively), and remove all non-regexp functionality. Regexp flags and Match_Mode are also no longer supported by these methods.
This commit is contained in:
parent
78aab133c7
commit
3a09ee88f6
@ -318,6 +318,8 @@
|
||||
- [Moved regex functionality out of `Text.locate` and `Text.locate_all` into
|
||||
`Text.match` and `Text.match_all`.][5679]
|
||||
- [`File.parent` may return `Nothing`.][5699]
|
||||
- [Removed non-regex functionality from `is_match`, `match`, and `match_all`,
|
||||
and renamed them to `match`, `find`, `find_all` (respectively).][5721]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -488,7 +490,9 @@
|
||||
[5645]: https://github.com/enso-org/enso/pull/5645
|
||||
[5646]: https://github.com/enso-org/enso/pull/5646
|
||||
[5656]: https://github.com/enso-org/enso/pull/5656
|
||||
[5679]: https://github.com/enso-org/enso/pull/5679
|
||||
[5699]: https://github.com/enso-org/enso/pull/5699
|
||||
[5721]: https://github.com/enso-org/enso/pull/5721
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
import project.Data.Locale.Locale
|
||||
|
||||
from project.Data.Boolean import Boolean, True, False
|
||||
|
||||
polyglot java import org.enso.base.text.TextFoldingStrategy
|
||||
|
||||
type Case_Sensitivity
|
||||
@ -25,3 +27,11 @@ type Case_Sensitivity
|
||||
Case_Sensitivity.Sensitive -> TextFoldingStrategy.unicodeNormalizedFold
|
||||
Case_Sensitivity.Insensitive locale ->
|
||||
TextFoldingStrategy.caseInsensitiveFold locale.java_locale
|
||||
|
||||
## PRIVATE
|
||||
Is case insensitive.
|
||||
is_case_insensitive : Boolean
|
||||
is_case_insensitive self = case self of
|
||||
Case_Sensitivity.Default -> False
|
||||
Case_Sensitivity.Sensitive -> False
|
||||
Case_Sensitivity.Insensitive _ -> True
|
||||
|
@ -13,6 +13,7 @@ import project.Data.Text.Encoding.Encoding
|
||||
import project.Data.Text.Location
|
||||
import project.Data.Text.Matching_Mode.Matching_Mode
|
||||
import project.Data.Text.Regex
|
||||
import project.Data.Text.Regex.Match.Match
|
||||
import project.Data.Text.Regex.Regex_Mode.Regex_Mode
|
||||
import project.Data.Text.Regex_Matcher.Regex_Matcher
|
||||
import project.Data.Text.Span.Span
|
||||
@ -209,116 +210,77 @@ Text.characters self =
|
||||
self.each bldr.append
|
||||
bldr.to_vector
|
||||
|
||||
## ALIAS find
|
||||
|
||||
Matches the text in `self` against the provided `term`, returning the first
|
||||
or last match if present or `Nothing` if there are no matches.
|
||||
## Find the regular expression `pattern` in `self`, returning the first match
|
||||
if present or `Nothing` if not found.
|
||||
|
||||
Arguments:
|
||||
- term: The pattern to match `self` against. We recommend using _raw text_
|
||||
to write your patterns.
|
||||
- mode: This argument specifies whether the first or last match should be
|
||||
returned.
|
||||
- matcher: If a `Text_Matcher`, the text is compared using case-sensitivity
|
||||
rules specified in the matcher. If a `Regex_Matcher`, the term is used as a
|
||||
regular expression and matched using the associated options.
|
||||
- pattern: The pattern to match `self` against.
|
||||
- case_sensitivity: Specifies if the text values should be compared case
|
||||
sensitively.
|
||||
|
||||
> Example
|
||||
Find the first substring matching the regex.
|
||||
|
||||
example_match =
|
||||
regex = "a[ab]c"
|
||||
"aabbbbccccaabcaaaa".match regex == "abc"
|
||||
example_find =
|
||||
## This matches `abc` @ character 11
|
||||
"aabbbbccccaabcaaaa".find "a[ab]c"
|
||||
example_find_insensitive =
|
||||
## This matches `aBc` @ character 11
|
||||
"aabbbbccccaaBcaaaa".find "a[ab]c" Case_Sensitivity.Insensitive
|
||||
Text.find : Text -> Case_Sensitivity -> Match | Nothing ! Compile_Error
|
||||
Text.find self pattern=".*" case_sensitivity=Case_Sensitivity.Sensitive =
|
||||
case_insensitive = case_sensitivity.is_case_insensitive
|
||||
Regex.compile pattern case_insensitive=case_insensitive . match self Matching_Mode.First
|
||||
|
||||
! Last Match in Regex Mode
|
||||
Regex always performs the search from the front and matching the last
|
||||
occurrence means selecting the last of the matches while still generating
|
||||
matches from the beginning. Regex does not return overlapping matches - it
|
||||
will return a match at some position and then continue the search after that
|
||||
match. This will lead to slightly different behavior for overlapping
|
||||
occurrences of a pattern in Regex mode than in exact text matching mode
|
||||
where the matches are searched for from the back.
|
||||
|
||||
> Example
|
||||
Comparing Matching in Last Mode in Regex and Text mode
|
||||
|
||||
"aAa".match "aa" mode=Matching_Mode.Last matcher=Text_Matcher.Case_Insensitive == "Aa"
|
||||
"aAa".match "aa" mode=Matching_Mode.Last matcher=(Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Insensitive) == "aA"
|
||||
|
||||
Text.match : Text -> Matching_Mode -> (Text_Matcher | Regex_Matcher) -> Text | Nothing
|
||||
Text.match self term mode=Matching_Mode.First matcher=Regex_Matcher.Value = case matcher of
|
||||
_ : Text_Matcher ->
|
||||
case_sensitivity = case matcher of
|
||||
Text_Matcher.Case_Sensitive -> Case_Sensitivity.Sensitive
|
||||
Text_Matcher.Case_Insensitive _ -> Case_Sensitivity.Insensitive
|
||||
case self.locate term mode case_sensitivity of
|
||||
Nothing -> Nothing
|
||||
span -> span.text
|
||||
_ : Regex_Matcher -> case mode of
|
||||
Matching_Mode.First ->
|
||||
case matcher.compile term . match self Matching_Mode.First of
|
||||
Nothing -> Nothing
|
||||
match -> match.span 0 . to_grapheme_span . text
|
||||
Matching_Mode.Last ->
|
||||
case matcher.compile term . match self Regex_Mode.All of
|
||||
Nothing -> Nothing
|
||||
matches -> matches.last.span 0 . to_grapheme_span . text
|
||||
|
||||
## ALIAS find_all
|
||||
|
||||
Matches all occurrences text in `self` against the provided `term`, returning
|
||||
a vector of matches.
|
||||
## Finds all the matches of the regular expression `pattern` in `self`,
|
||||
returning a Vector. If not found, will be an empty Vector.
|
||||
|
||||
Arguments:
|
||||
- term: The pattern to match `self` against. We recommend using _raw text_
|
||||
to write your patterns.
|
||||
- matcher: If a `Text_Matcher`, the text is compared using case-sensitivity
|
||||
rules specified in the matcher. If a `Regex_Matcher`, the term is used as a
|
||||
regular expression and matched using the associated options.
|
||||
- pattern: The pattern to match `self` against.
|
||||
- case_sensitivity: Specifies if the text values should be compared case
|
||||
sensitively.
|
||||
|
||||
> Example
|
||||
Find all substrings matching the regex.
|
||||
Find the substring matching the regex.
|
||||
|
||||
example_match =
|
||||
regex = "a[ab]c"
|
||||
"aabcbbccaacaa".match regex == ["abc", "aac"]
|
||||
Text.match_all : Text -> (Text_Matcher | Regex_Matcher) -> Vector Text
|
||||
Text.match_all self term=".*" matcher=Regex_Matcher.Value = case matcher of
|
||||
_ : Text_Matcher ->
|
||||
case_sensitivity = case matcher of
|
||||
Text_Matcher.Case_Sensitive -> Case_Sensitivity.Sensitive
|
||||
Text_Matcher.Case_Insensitive _ -> Case_Sensitivity.Insensitive
|
||||
self.locate_all term case_sensitivity . map .text
|
||||
_ : Regex_Matcher ->
|
||||
case matcher.compile term . match self Regex_Mode.All of
|
||||
Nothing -> []
|
||||
matches -> matches.map m-> m.span 0 . to_grapheme_span . text
|
||||
example_find_all =
|
||||
## This matches `aabbbbc` @ character 0 and `abc` @ character 11
|
||||
"aabbbbccccaabcaaaa".find_all "a[ab]+c"
|
||||
example_find_all_insensitive =
|
||||
## This matches `aABbbbc` @ character 0 and `aBC` @ character 11
|
||||
"aABbbbccccaaBCaaaa".find_all "a[ab]+c" Case_Sensitivity.Insensitive
|
||||
Text.find_all : Text -> Case_Sensitivity -> Vector Match ! Compile_Error
|
||||
Text.find_all self pattern=".*" case_sensitivity=Case_Sensitivity.Sensitive =
|
||||
case_insensitive = case_sensitivity.is_case_insensitive
|
||||
case Regex.compile pattern case_insensitive=case_insensitive . match self Regex_Mode.All of
|
||||
Nothing -> []
|
||||
matches -> matches
|
||||
|
||||
## ALIAS Check Matches
|
||||
|
||||
Checks if the whole text in `self` matches a provided `pattern`.
|
||||
|
||||
Arguments:
|
||||
- pattern: The pattern to match `self` against. We recommend using _raw text_
|
||||
to write your patterns.
|
||||
- matcher: If a `Text_Matcher`, the text is compared using case-sensitivity
|
||||
rules specified in the matcher. If a `Regex_Matcher`, the term is used as a
|
||||
regular expression and matched using the associated options.
|
||||
- pattern: The pattern to match `self` against.
|
||||
- case_sensitivity: Specifies if the text values should be compared case
|
||||
sensitively.
|
||||
|
||||
> Example
|
||||
Checks if some text matches a basic email regex. NOTE: This regex is _not_
|
||||
compliant with RFC 5322.
|
||||
Checks if whole text matches a basic email regex.
|
||||
|
||||
example_match =
|
||||
regex = ".+@.+"
|
||||
"contact@enso.org".is_match regex
|
||||
Text.is_match : Text -> (Text_Matcher | Regex_Matcher) -> Boolean ! Compile_Error
|
||||
Text.is_match self pattern=".*" matcher=Regex_Matcher.Value = case matcher of
|
||||
Text_Matcher.Case_Sensitive -> self == pattern
|
||||
Text_Matcher.Case_Insensitive locale -> self.equals_ignore_case pattern locale
|
||||
_ : Regex_Matcher ->
|
||||
compiled_pattern = matcher.compile pattern
|
||||
compiled_pattern.matches self
|
||||
regex = ".+ct@.+"
|
||||
# Evaluates to true
|
||||
"contact@enso.org".match regex
|
||||
example_match_insensitive =
|
||||
regex = ".+ct@.+"
|
||||
# Evaluates to true
|
||||
"CONTACT@enso.org".match regex Case_Sensitivity.Insensitive
|
||||
Text.match : Text -> Case_Sensitivity -> Boolean ! Compile_Error
|
||||
Text.match self pattern=".*" case_sensitivity=Case_Sensitivity.Sensitive =
|
||||
case_insensitive = case_sensitivity.is_case_insensitive
|
||||
compiled_pattern = Regex.compile pattern case_insensitive=case_insensitive
|
||||
compiled_pattern.matches self
|
||||
|
||||
## ALIAS Split Text
|
||||
|
||||
@ -1380,4 +1342,3 @@ slice_text text char_ranges =
|
||||
char_ranges.map char_range->
|
||||
sb.append text char_range.start char_range.end
|
||||
sb.toString
|
||||
|
||||
|
@ -57,7 +57,7 @@ type Suite_Config
|
||||
should_run_group self name =
|
||||
regexp = self.only_group_regexp
|
||||
case regexp of
|
||||
_ : Text -> name.is_match regexp . catch Any (_->True)
|
||||
_ : Text -> name.match regexp . catch Any (_->True)
|
||||
_ -> True
|
||||
|
||||
should_output_junit self =
|
||||
|
@ -57,6 +57,8 @@ type Manual
|
||||
- Note that currently the regex-based operations may not handle the edge
|
||||
cases described above too well.
|
||||
spec =
|
||||
check_span result span = result.span 0 . to_grapheme_span . should_equal span
|
||||
check_span_all result spans = result . map (m-> (m.span 0).to_grapheme_span) . should_equal spans
|
||||
Test.group "Text" <|
|
||||
kshi = '\u0915\u094D\u0937\u093F'
|
||||
facepalm = '\u{1F926}\u{1F3FC}\u200D\u2642\uFE0F'
|
||||
@ -1175,20 +1177,18 @@ spec =
|
||||
|
||||
Test.specify "should allow regexes in match" <|
|
||||
hello = "Hello World!"
|
||||
regex = Regex_Matcher.Value
|
||||
regex_insensitive = Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Insensitive
|
||||
hello.match ".o" Matching_Mode.First matcher=regex . should_equal "lo"
|
||||
hello.match ".o" Matching_Mode.Last matcher=regex . should_equal "Wo"
|
||||
hello.match_all ".o" matcher=regex . should_equal ["lo", "Wo"]
|
||||
|
||||
"foobar".match "BAR" Matching_Mode.First matcher=regex_insensitive . should_equal "bar"
|
||||
check_span (hello.find ".o" Case_Sensitivity.Insensitive) (Span.Value (3.up_to 5) "Hello World!")
|
||||
check_span_all (hello.find_all ".o") [Span.Value (3.up_to 5) "Hello World!", Span.Value (6.up_to 8) "Hello World!"]
|
||||
|
||||
check_span ("foobar".find "BAR" Case_Sensitivity.Insensitive) (Span.Value (3.up_to 6) "foobar")
|
||||
|
||||
## Regex matching does not do case folding
|
||||
"Strasse".match "ß" Matching_Mode.First matcher=regex_insensitive . should_equal Nothing
|
||||
"Strasse".find "ß" Case_Sensitivity.Insensitive . should_equal Nothing
|
||||
|
||||
## But it should handle the Unicode normalization
|
||||
accents = 'a\u{301}e\u{301}o\u{301}'
|
||||
accents.match accent_1 Matching_Mode.First matcher=regex . should_equal 'e\u{301}'
|
||||
check_span (accents.find accent_1) (Span.Value (1.up_to 2) 'a\u{301}e\u{301}o\u{301}')
|
||||
|
||||
Test.specify "should correctly handle regex edge cases in locate" pending="Figure out how to make Regex correctly handle empty patterns." <|
|
||||
regex = Regex_Matcher.Value
|
||||
@ -1209,29 +1209,10 @@ spec =
|
||||
"aaa aaa".locate "aa" mode=Matching_Mode.Last case_sensitivity=Case_Sensitivity.Sensitive . should_equal (Span.Value (5.up_to 7) "aaa aaa")
|
||||
|
||||
Test.specify "should allow to match one or more occurrences of a pattern in the text" <|
|
||||
"abacadae".match_all "a[bc]" . should_equal ["ab", "ac"]
|
||||
"abacadae".match_all "a." . should_equal ["ab", "ac", "ad", "ae"]
|
||||
"abacadae".match_all "a.*" . should_equal ["abacadae"]
|
||||
"abacadae".match_all "a.+?" . should_equal ["ab", "ac", "ad", "ae"]
|
||||
|
||||
"aAa".match "aa" mode=Matching_Mode.Last matcher=Text_Matcher.Case_Insensitive . should_equal "Aa"
|
||||
"aAa".match "aa" mode=Matching_Mode.Last matcher=(Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Insensitive) . should_equal "aA"
|
||||
|
||||
"abacadae".match "a[bc]" mode=Matching_Mode.Last . should_equal "ac"
|
||||
"abacadae".match "a." mode=Matching_Mode.Last . should_equal "ae"
|
||||
"abacadae".match "a.*" mode=Matching_Mode.Last . should_equal "abacadae"
|
||||
"abacadae".match "a.+?" mode=Matching_Mode.Last . should_equal "ae"
|
||||
|
||||
"abacadae".match "a[bc]" matcher=Text_Matcher.Case_Sensitive . should_equal Nothing
|
||||
"abABacAC".match "ab" matcher=Text_Matcher.Case_Sensitive mode=Matching_Mode.Last . should_equal "ab"
|
||||
"abABacAC".match "ab" matcher=Text_Matcher.Case_Insensitive mode=Matching_Mode.Last . should_equal "AB"
|
||||
|
||||
"abABacAC".match_all "ab" matcher=Text_Matcher.Case_Sensitive . should_equal ["ab"]
|
||||
"abABacAC".match_all "ab" matcher=Text_Matcher.Case_Insensitive . should_equal ["ab", "AB"]
|
||||
"abacadae".match_all "a[bc]" matcher=Text_Matcher.Case_Sensitive . should_equal []
|
||||
|
||||
"Strasse and Straße".match_all "STRASSE" matcher=Text_Matcher.Case_Sensitive . should_equal []
|
||||
"Strasse and Straße".match_all "STRASSE" matcher=Text_Matcher.Case_Insensitive . should_equal ["Strasse", "Straße"]
|
||||
check_span_all ("abacadae".find_all "a[bc]") [Span.Value (0.up_to 2) "abacadae", Span.Value (2.up_to 4) "abacadae"]
|
||||
check_span_all ("abacadae".find_all "a.") [Span.Value (0.up_to 2) "abacadae", Span.Value (2.up_to 4) "abacadae", Span.Value (4.up_to 6) "abacadae", Span.Value (6.up_to 8) "abacadae"]
|
||||
check_span_all ("abacadae".find_all "a.*") [Span.Value (0.up_to 8) "abacadae"]
|
||||
check_span_all ("abacadae".find_all "a.+?") [Span.Value (0.up_to 2) "abacadae", Span.Value (2.up_to 4) "abacadae", Span.Value (4.up_to 6) "abacadae", Span.Value (6.up_to 8) "abacadae"]
|
||||
|
||||
Test.specify "should default to exact matching for locate but regex for match" <|
|
||||
txt = "aba[bc]adacae"
|
||||
@ -1242,131 +1223,57 @@ spec =
|
||||
txt.locate "a[bc]" . should_equal (Span.Value (2.up_to 7) txt)
|
||||
txt.locate_all "a[bc]" . should_equal [Span.Value (2.up_to 7) txt]
|
||||
|
||||
"ab".match "a[bc]" . should_equal "ab"
|
||||
"a[bc]".match "a[bc]" . should_equal Nothing
|
||||
"a[bc]".match_all "a[bc]" . should_equal []
|
||||
check_span ("ab".find "a[bc]") (Span.Value (0.up_to 2) "ab")
|
||||
"a[bc]".find "a[bc]" . should_equal Nothing
|
||||
"a[bc]".find_all "a[bc]" . should_equal []
|
||||
|
||||
txt.match "a[bc]" . should_equal "ab"
|
||||
txt.match_all "a[bc]" . should_equal ["ab", "ac"]
|
||||
check_span (txt.find "a[bc]") (Span.Value (0.up_to 2) "aba[bc]adacae")
|
||||
check_span_all (txt.find_all "a[bc]") [Span.Value (0.up_to 2) "aba[bc]adacae", Span.Value (9.up_to 11) "aba[bc]adacae"]
|
||||
|
||||
Test.group "Regex matching" <|
|
||||
Test.specify "should be possible on text" <|
|
||||
match = "My Text: Goes Here".match "^My Text: (.+)$"
|
||||
match.should_equal "My Text: Goes Here"
|
||||
match = "My Text: Goes Here".find "^My Text: (.+)$"
|
||||
check_span match (Span.Value (0.up_to 18) "My Text: Goes Here")
|
||||
|
||||
Test.specify "should be possible on unicode text" <|
|
||||
txt = "maza건반zaa"
|
||||
txt.match "^a..z$" . should_equal Nothing
|
||||
txt.match "^m..a..z.a$" . should_equal txt
|
||||
txt.match "a..z" . should_equal "a건반z"
|
||||
|
||||
Test.specify "should be possible in ascii mode" <|
|
||||
match = "İ".match "\w" matcher=(Regex_Matcher.Value match_ascii=True)
|
||||
match.should_equal Nothing
|
||||
txt.find "^a..z$" . should_equal Nothing
|
||||
check_span (txt.find "^m..a..z.a$") (Span.Value (0.up_to 9) txt)
|
||||
check_span (txt.find "a..z") (Span.Value (3.up_to 7) txt)
|
||||
|
||||
Test.specify "should be possible in case-insensitive mode" <|
|
||||
match = "MY".match "my" matcher=(Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Insensitive)
|
||||
match.should_equal "MY"
|
||||
match = "MY".find "my" Case_Sensitivity.Insensitive
|
||||
check_span match (Span.Value (0.up_to 2) "MY")
|
||||
|
||||
Test.specify "should be possible in dot_matches_newline mode" <|
|
||||
match = 'Foo\n'.match "(....)" matcher=(Regex_Matcher.Value dot_matches_newline=True)
|
||||
match.should_equal 'Foo\n'
|
||||
|
||||
Test.specify "should be possible in multiline mode" <|
|
||||
text = """
|
||||
Foo
|
||||
bar
|
||||
match = text.match_all "^(...)$" matcher=(Regex_Matcher.Value multiline=True)
|
||||
match.should_equal ["Foo", "bar"]
|
||||
|
||||
Test.specify "should be possible in comments mode" <|
|
||||
match = "abcde".match "(..) # Match two of any character" matcher=(Regex_Matcher.Value comments=True)
|
||||
match.should_equal "ab"
|
||||
|
||||
Test.group "Text.is_match" <|
|
||||
Test.group "Text.match" <|
|
||||
Test.specify "should default to regex" <|
|
||||
"My Text: Goes Here".is_match "^My Text: (.+)$" . should_be_true
|
||||
"555-801-1923".is_match "^\d{3}-\d{3}-\d{4}$" . should_be_true
|
||||
"Hello".is_match "^[a-z]+$" . should_be_false
|
||||
"Hello".is_match "^[a-z]+$" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Insensitive) . should_be_true
|
||||
"My Text: Goes Here".match "^My Text: (.+)$" . should_be_true
|
||||
"555-801-1923".match "^\d{3}-\d{3}-\d{4}$" . should_be_true
|
||||
"Hello".match "^[a-z]+$" . should_be_false
|
||||
"Hello".match "^[a-z]+$" Case_Sensitivity.Insensitive . should_be_true
|
||||
|
||||
Test.specify "should only match whole input" <|
|
||||
"Hello".is_match "[a-z]" . should_be_false
|
||||
"x".is_match "[a-z]" . should_be_true
|
||||
|
||||
Test.specify "should allow Text_Matcher too" <|
|
||||
"foobar".is_match "foobar" matcher=Text_Matcher.Case_Sensitive . should_be_true
|
||||
"foobar".is_match "FOOBAR" matcher=Text_Matcher.Case_Sensitive . should_be_false
|
||||
"foobar".is_match "foo.*" matcher=Text_Matcher.Case_Sensitive . should_be_false
|
||||
"foobar".is_match "foo" matcher=Text_Matcher.Case_Sensitive . should_be_false
|
||||
|
||||
"foobar".is_match "foobar" matcher=Text_Matcher.Case_Insensitive . should_be_true
|
||||
"foobar".is_match "FOOBAR" matcher=Text_Matcher.Case_Insensitive . should_be_true
|
||||
"foobar".is_match "foo.*" matcher=Text_Matcher.Case_Insensitive . should_be_false
|
||||
"foobar".is_match "foo" matcher=Text_Matcher.Case_Insensitive . should_be_false
|
||||
"Hello".match "[a-z]" . should_be_false
|
||||
"x".match "[a-z]" . should_be_true
|
||||
|
||||
Test.specify "should be possible on unicode text" <|
|
||||
"Korean: 건반".is_match "^Korean: (.+)$" . should_be_true
|
||||
|
||||
Test.specify "should be possible in ascii mode" <|
|
||||
"İ".is_match "\w" (Regex_Matcher.Value match_ascii=True) . should_be_false
|
||||
"Korean: 건반".match "^Korean: (.+)$" . should_be_true
|
||||
|
||||
Test.specify "should be possible in case-insensitive mode" <|
|
||||
"MY".is_match "my" (Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Insensitive) . should_be_true
|
||||
|
||||
Test.specify "should be possible in dot_matches_newline mode" <|
|
||||
'Foo\n'.is_match "(....)" (Regex_Matcher.Value dot_matches_newline=True) . should_be_true
|
||||
|
||||
multiline_matches_message = """
|
||||
This test does not make sense once we require matches to match the
|
||||
whole string. The `multiline` parameter may not make sense for the
|
||||
`matches` function. This should be revisited when Text library is
|
||||
being redesigned.
|
||||
Test.specify "should be possible in multiline mode" pending=multiline_matches_message <|
|
||||
text = """
|
||||
Foo
|
||||
bar
|
||||
text.is_match "^(...)$" (Regex_Matcher.Value multiline=True) . should_be_true
|
||||
|
||||
Test.specify "should be possible in comments mode" <|
|
||||
"abcde".is_match "(.....) # Match any five characters" (Regex_Matcher.Value comments=True) . should_be_true
|
||||
"MY".match "my" Case_Sensitivity.Insensitive . should_be_true
|
||||
|
||||
Test.group "Regex finding" <|
|
||||
Test.specify "should be possible on text" <|
|
||||
match = "My Text: Goes Here".match "^My Text: (.+)$" mode=Matching_Mode.First
|
||||
match . should_be_a Text
|
||||
match . should_equal "My Text: Goes Here"
|
||||
match = "My Text: Goes Here".find "^My Text: (.+)$"
|
||||
check_span match (Span.Value (0.up_to 18) "My Text: Goes Here")
|
||||
|
||||
Test.specify "should be possible on unicode text" <|
|
||||
match = "Korean: 건반".match "^Korean: (.+)$" mode=Matching_Mode.First
|
||||
match . should_be_a Text
|
||||
match . should_equal "Korean: 건반"
|
||||
|
||||
Test.specify "should be possible in ascii mode" <|
|
||||
match = "İ".match "\w" matcher=(Regex_Matcher.Value match_ascii=True)
|
||||
match . should_equal Nothing
|
||||
match = "Korean: 건반".find "^Korean: (.+)$"
|
||||
check_span match (Span.Value (0.up_to 10) "Korean: 건반")
|
||||
|
||||
Test.specify "should be possible in case-insensitive mode" <|
|
||||
match = "MY".match "my" matcher=(Regex_Matcher.Value case_sensitivity=Case_Sensitivity.Insensitive)
|
||||
match . should_be_a Text
|
||||
match . should_equal "MY"
|
||||
|
||||
Test.specify "should be possible in dot_matches_newline mode" <|
|
||||
match = 'Foo\n'.match "(....)" matcher=(Regex_Matcher.Value dot_matches_newline=True)
|
||||
match . should_be_a Text
|
||||
match . should_equal 'Foo\n'
|
||||
|
||||
Test.specify "should be possible in multiline mode" <|
|
||||
text = """
|
||||
Foo
|
||||
bar
|
||||
match = text.match_all "^(...)$" matcher=(Regex_Matcher.Value multiline=True)
|
||||
match . should_equal ["Foo", "bar"]
|
||||
|
||||
Test.specify "should be possible in comments mode" <|
|
||||
match = "abcde".match "(..) # Match two of any character" matcher=(Regex_Matcher.Value comments=True)
|
||||
match . should_be_a Text
|
||||
match . should_equal "ab"
|
||||
match = "MY".find "my" Case_Sensitivity.Insensitive
|
||||
check_span match (Span.Value (0.up_to 2) "MY")
|
||||
|
||||
Test.group "Regex splitting" <|
|
||||
Test.specify "should be possible on text" <|
|
||||
|
Loading…
Reference in New Issue
Block a user