mirror of
https://github.com/enso-org/enso.git
synced 2024-12-23 12:21:37 +03:00
Update Column_Selector and Column_Mapping to use Matcher over Matching_Strategy (#3299)
Implements https://www.pivotaltracker.com/story/show/181339748
This commit is contained in:
parent
abbb3a4679
commit
b03416f907
@ -743,7 +743,7 @@ Text.from_codepoints codepoints = Text_Utils.from_codepoints codepoints.to_array
|
||||
"Hello!".starts_with "hello" (Text_Matcher Case_Insensitive.new) == True
|
||||
"Hello!".starts_with "[a-z]" Regex_Matcher.new == False
|
||||
"Hello!".starts_with "[A-Z]" Regex_Matcher.new == True
|
||||
Text.starts_with : Text -> (Text_Matcher | Regex_Matcher) -> Boolean
|
||||
Text.starts_with : Text -> Matcher -> Boolean
|
||||
Text.starts_with prefix matcher=Text_Matcher.new = case matcher of
|
||||
Text_Matcher case_sensitivity -> case case_sensitivity of
|
||||
True ->
|
||||
@ -752,7 +752,7 @@ Text.starts_with prefix matcher=Text_Matcher.new = case matcher of
|
||||
this.take (Text_Sub_Range.First prefix.length) . equals_ignore_case prefix locale=locale
|
||||
Regex_Matcher _ _ _ _ _ ->
|
||||
preprocessed_pattern = "\A(?:" + prefix + ")"
|
||||
compiled_pattern = here.prepare_regex preprocessed_pattern matcher
|
||||
compiled_pattern = matcher.compile preprocessed_pattern
|
||||
match = compiled_pattern.match this Mode.First
|
||||
match.is_nothing.not
|
||||
|
||||
@ -780,7 +780,7 @@ Text.starts_with prefix matcher=Text_Matcher.new = case matcher of
|
||||
"Hello World".ends_with "world" == False
|
||||
"Hello World".ends_with "world" (Text_Matcher Case_Insensitive.new) == True
|
||||
"Hello World".ends_with "[A-Z][a-z]{4}" Regex_Matcher.new == True
|
||||
Text.ends_with : Text -> (Text_Matcher | Regex_Matcher) -> Boolean
|
||||
Text.ends_with : Text -> Matcher -> Boolean
|
||||
Text.ends_with suffix matcher=Text_Matcher.new = case matcher of
|
||||
Text_Matcher case_sensitivity -> case case_sensitivity of
|
||||
True ->
|
||||
@ -789,7 +789,7 @@ Text.ends_with suffix matcher=Text_Matcher.new = case matcher of
|
||||
this.take (Text_Sub_Range.Last suffix.length) . equals_ignore_case suffix locale=locale
|
||||
Regex_Matcher _ _ _ _ _ ->
|
||||
preprocessed_pattern = "(?:" + suffix + ")\z"
|
||||
compiled_pattern = here.prepare_regex preprocessed_pattern matcher
|
||||
compiled_pattern = matcher.compile preprocessed_pattern
|
||||
match = compiled_pattern.match this Mode.First
|
||||
match.is_nothing.not
|
||||
|
||||
@ -844,14 +844,14 @@ Text.ends_with suffix matcher=Text_Matcher.new = case matcher of
|
||||
See if the text "Hello!" contains any lowercase letters, using a regex.
|
||||
|
||||
"Hello!".contains "[a-z]" Regex_Matcher.new
|
||||
Text.contains : Text -> (Text_Matcher | Regex_Matcher) -> Boolean
|
||||
Text.contains : Text -> Matcher -> Boolean
|
||||
Text.contains term="" matcher=Text_Matcher.new = case matcher of
|
||||
Text_Matcher case_sensitivity -> case case_sensitivity of
|
||||
True -> Text_Utils.contains this term
|
||||
Case_Insensitive locale ->
|
||||
Text_Utils.contains (this.to_case_insensitive_key locale) (term.to_case_insensitive_key locale)
|
||||
Regex_Matcher _ _ _ _ _ ->
|
||||
compiled_pattern = here.prepare_regex term matcher
|
||||
compiled_pattern = matcher.compile term
|
||||
match = compiled_pattern.match this Mode.First
|
||||
match.is_nothing.not
|
||||
|
||||
@ -941,7 +941,8 @@ Text.take range =
|
||||
if char_range.is_error then char_range else
|
||||
Text_Utils.substring this char_range.start char_range.end
|
||||
|
||||
## Creates a new Text by removing the specified range of the input.
|
||||
## ALIAS skip, remove
|
||||
Creates a new Text by removing the specified range of the input.
|
||||
|
||||
This can select a section of text from the beginning, end, or middle of the
|
||||
input using various criteria defined by the range parameter.
|
||||
@ -1040,16 +1041,3 @@ Text.to_lower_case locale=Locale.default =
|
||||
Text.to_upper_case : Locale.Locale -> Text
|
||||
Text.to_upper_case locale=Locale.default =
|
||||
UCharacter.toUpperCase locale.java_locale this
|
||||
|
||||
## PRIVATE
|
||||
prepare_regex : Text -> Regex_Matcher -> Pattern
|
||||
prepare_regex pattern regex_matcher = case regex_matcher of
|
||||
Regex_Matcher case_sensitive multiline match_ascii dot_matches_newline comments ->
|
||||
case_insensitive = case case_sensitive of
|
||||
True -> False
|
||||
## TODO [RW] Currently locale is not supported in case-insensitive
|
||||
Regex matching. There are plans to revisit it:
|
||||
https://www.pivotaltracker.com/story/show/181313576
|
||||
Case_Insensitive _ -> True
|
||||
compiled_pattern = Regex.compile pattern case_insensitive=case_insensitive match_ascii=match_ascii dot_matches_newline=dot_matches_newline multiline=multiline comments=comments
|
||||
compiled_pattern
|
||||
|
@ -1,5 +1,19 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Base.Data.Locale
|
||||
import Standard.Base.Data.Text.Regex
|
||||
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Report_Warning
|
||||
from Standard.Base.Error.Warnings import Warning_System
|
||||
|
||||
## UNSTABLE
|
||||
An error indicating that some criteria did not match any names in the input.
|
||||
type No_Matches_Found (criteria : Vector Text)
|
||||
|
||||
No_Matches_Found.to_display_text : Text
|
||||
No_Matches_Found.to_display_text =
|
||||
"The criteria "+this.criteria.to_text+" did not match any names in the input."
|
||||
|
||||
|
||||
## Represents case-insensitive comparison mode.
|
||||
|
||||
Arguments:
|
||||
@ -40,6 +54,20 @@ type Text_Matcher (case_sensitive : (True | Case_Insensitive) = True)
|
||||
'comments' in the regex.
|
||||
type Regex_Matcher (case_sensitive : (True | Case_Insensitive) = True) (multiline : Boolean = False) (match_ascii : Boolean = False) (dot_matches_newline : Boolean = False) (comments : Boolean = False)
|
||||
|
||||
## UNSTABLE
|
||||
Compiles a provided pattern according to the rules defined in this
|
||||
`Regex_Matcher`.
|
||||
Regex_Matcher.compile : Text -> Pattern
|
||||
Regex_Matcher.compile pattern =
|
||||
case_insensitive = case this.case_sensitive of
|
||||
True -> False
|
||||
## TODO [RW] Currently locale is not supported in case-insensitive
|
||||
Regex matching. There are plans to revisit it:
|
||||
https://www.pivotaltracker.com/story/show/181313576
|
||||
Case_Insensitive _ -> True
|
||||
compiled_pattern = Regex.compile pattern case_insensitive=case_insensitive match_ascii=this.match_ascii dot_matches_newline=this.dot_matches_newline multiline=this.multiline comments=this.comments
|
||||
compiled_pattern
|
||||
|
||||
## UNSTABLE
|
||||
Represents case-insensitive comparison mode.
|
||||
|
||||
@ -110,3 +138,181 @@ Text_Matcher.new case_sensitive=True = Text_Matcher case_sensitive
|
||||
Regex_Matcher.new : (True | Case_Insensitive) -> Boolean -> Boolean -> Boolean -> Boolean -> Regex_Matcher
|
||||
Regex_Matcher.new case_sensitive=True multiline=False match_ascii=False dot_matches_newline=False comments=False =
|
||||
Regex_Matcher case_sensitive multiline match_ascii dot_matches_newline comments
|
||||
|
||||
## UNSTABLE
|
||||
Checks if a name matches the provided criterion according to the specified
|
||||
matching strategy.
|
||||
|
||||
Arguments:
|
||||
- name: A `Text` representing the name being matched.
|
||||
- criterion: A `Text` representing the name to be matched.
|
||||
|
||||
> Example
|
||||
Check if the provided name matches a regular expression.
|
||||
|
||||
Text_Matcher.new.match_single_criterion "Foobar" "foo" == False
|
||||
Text_Matcher.match_single_criterion : Text -> Text -> Boolean
|
||||
Text_Matcher.match_single_criterion name criterion =
|
||||
case this.case_sensitive of
|
||||
True -> name == criterion
|
||||
Case_Insensitive locale -> name.equals_ignore_case criterion locale=locale
|
||||
|
||||
## UNSTABLE
|
||||
Checks if a name matches the provided criterion according to the specified
|
||||
matching strategy.
|
||||
|
||||
Arguments:
|
||||
- name: A `Text` representing the name being matched.
|
||||
- criterion: A `Text` representing the regular expression specifying the
|
||||
matching criterion.
|
||||
|
||||
> Example
|
||||
Check if the provided name matches a regular expression.
|
||||
|
||||
Regex_Matcher.new case_sensitive=Case_Insensitive.new . match_single_criterion "Foobar" "f.*" == True
|
||||
Regex_Matcher.match_single_criterion : Text -> Text -> Boolean
|
||||
Regex_Matcher.match_single_criterion name criterion =
|
||||
this.compile criterion . matches name
|
||||
|
||||
## UNSTABLE
|
||||
Selects objects from an input list that match any of the provided criteria.
|
||||
|
||||
Arguments:
|
||||
- objects: A list of objects to be matched.
|
||||
- criteria: A list of texts representing the matching criteria. Their meaning
|
||||
depends on the matching strategy.
|
||||
- reorder: Specifies whether to reorder the matched objects according to the
|
||||
order of the matching criteria.
|
||||
If `False`, the matched entries are returned in the same order as in the
|
||||
input.
|
||||
If `True`, the matched entries are returned in the order of the criteria
|
||||
matching them. If a single object has been matched by multiple criteria, it
|
||||
is placed in the group belonging to the first matching criterion on the
|
||||
list.
|
||||
If a single criterion's group has more than one element, their relative
|
||||
order is the same as in the input.
|
||||
- name_mapper: A function mapping a provided object to its name, which will
|
||||
then be matched with the criteria. It is set to the identity function by
|
||||
default, thus allowing the input to be a list of names to match. But it can
|
||||
be overridden to enable matching more complex objects.
|
||||
- matcher: A `Matcher` instance specifying how to interpret the criterion.
|
||||
- on_problems: Specifies the behavior when a problem occurs during the
|
||||
function.
|
||||
By default, a warning is issued, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings.
|
||||
- warnings: A Warning_System instance specifying how to handle warnings. This
|
||||
is a temporary workaround to allow for testing the warning mechanism. Once
|
||||
the proper warning system is implemented, this argument will become
|
||||
obsolete and will be removed. No user code should use this argument, as it
|
||||
will be removed in the future.
|
||||
|
||||
> Example
|
||||
Selects objects matching one of the provided patterns, preserving the input order.
|
||||
|
||||
Regex_Matcher.new case_sensitive=True . match_criteria ["foo", "foobar", "quux", "baz", "Foo"] [".*ba.*", "f.*"] == ["foo", "foobar", "baz"]
|
||||
|
||||
> Example
|
||||
Selects pairs matching their first element with the provided criteria and
|
||||
ordering the result according to the order of criteria that matched them.
|
||||
|
||||
Text_Matcher.new.match_criteria [Pair "foo" 42, Pair "bar" 33, Pair "baz" 10, Pair "foo" 0, Pair 10 10] ["bar", "foo"] reorder=True name_mapper=_.name == [Pair "bar" 33, Pair "foo" 42, Pair "foo" 0]
|
||||
Text_Matcher.match_criteria : Vector Any -> Vector Text -> Boolean -> (Any -> Text) -> Problem_Behavior -> Warning_System -> Vector Any ! No_Matches_Found
|
||||
Text_Matcher.match_criteria = here.match_criteria_implementation this
|
||||
|
||||
## UNSTABLE
|
||||
Selects objects from an input list that match any of the provided criteria.
|
||||
|
||||
Arguments:
|
||||
- objects: A list of objects to be matched.
|
||||
- criteria: A list of texts representing the matching criteria. Their meaning
|
||||
depends on the matching strategy.
|
||||
- reorder: Specifies whether to reorder the matched objects according to the
|
||||
order of the matching criteria.
|
||||
If `False`, the matched entries are returned in the same order as in the
|
||||
input.
|
||||
If `True`, the matched entries are returned in the order of the criteria
|
||||
matching them. If a single object has been matched by multiple criteria, it
|
||||
is placed in the group belonging to the first matching criterion on the
|
||||
list.
|
||||
If a single criterion's group has more than one element, their relative
|
||||
order is the same as in the input.
|
||||
- name_mapper: A function mapping a provided object to its name, which will
|
||||
then be matched with the criteria. It is set to the identity function by
|
||||
default, thus allowing the input to be a list of names to match. But it can
|
||||
be overridden to enable matching more complex objects.
|
||||
- matcher: A `Matcher` instance specifying how to interpret the criterion.
|
||||
- on_problems: Specifies the behavior when a problem occurs during the
|
||||
function.
|
||||
By default, a warning is issued, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings.
|
||||
- warnings: A Warning_System instance specifying how to handle warnings. This
|
||||
is a temporary workaround to allow for testing the warning mechanism. Once
|
||||
the proper warning system is implemented, this argument will become
|
||||
obsolete and will be removed. No user code should use this argument, as it
|
||||
will be removed in the future.
|
||||
|
||||
> Example
|
||||
Selects objects matching one of the provided patterns, preserving the input order.
|
||||
|
||||
Regex_Matcher.new case_sensitive=True . match_criteria ["foo", "foobar", "quux", "baz", "Foo"] [".*ba.*", "f.*"] == ["foo", "foobar", "baz"]
|
||||
|
||||
> Example
|
||||
Selects pairs matching their first element with the provided criteria and
|
||||
ordering the result according to the order of criteria that matched them.
|
||||
|
||||
Text_Matcher.new.match_criteria [Pair "foo" 42, Pair "bar" 33, Pair "baz" 10, Pair "foo" 0, Pair 10 10] ["bar", "foo"] reorder=True name_mapper=_.name == [Pair "bar" 33, Pair "foo" 42, Pair "foo" 0]
|
||||
Regex_Matcher.match_criteria : Vector Any -> Vector Text -> Boolean -> (Any -> Text) -> Problem_Behavior -> Warning_System -> Vector Any ! No_Matches_Found
|
||||
Regex_Matcher.match_criteria = here.match_criteria_implementation this
|
||||
|
||||
## A common supertype representing a matching strategy.
|
||||
type Matcher
|
||||
Text_Matcher
|
||||
Regex_Matcher
|
||||
|
||||
## PRIVATE
|
||||
match_criteria_implementation matcher objects criteria reorder=False name_mapper=(x->x) on_problems=Report_Warning warnings=Warnings.default = Panic.recover <|
|
||||
[matcher, objects, criteria, reorder, name_mapper, on_problems, warnings] . each Panic.rethrow
|
||||
|
||||
# match_matrix . at i . at j specifies whether objects.at i matches criteria.at j
|
||||
match_matrix = objects.map obj->
|
||||
criteria.map criterion->
|
||||
name = name_mapper obj
|
||||
matcher.match_single_criterion name criterion
|
||||
|
||||
# Checks if the ith object is matched by any criterion.
|
||||
is_object_matched_by_anything : Integer -> Boolean
|
||||
is_object_matched_by_anything i =
|
||||
match_matrix.at i . any x->x
|
||||
|
||||
# Checks if the ith criterion matches any columns.
|
||||
does_criterion_match_anything : Integer -> Boolean
|
||||
does_criterion_match_anything i =
|
||||
match_matrix.map (col -> col.at i) . any x->x
|
||||
|
||||
# Selects object indices which satisfy the provided predicate.
|
||||
select_matching_indices : (Integer -> Boolean) -> Vector Text
|
||||
select_matching_indices matcher =
|
||||
0.up_to objects.length . to_vector . filter matcher
|
||||
|
||||
# Check consistency
|
||||
checked_criteria = criteria.map_with_index j-> criterion->
|
||||
has_matches = does_criterion_match_anything j
|
||||
Pair has_matches criterion
|
||||
unmatched_criteria = checked_criteria.filter (p -> p.first.not) . map .second
|
||||
|
||||
selected_indices = case reorder of
|
||||
True ->
|
||||
nested_indices = 0.up_to criteria.length . map j->
|
||||
is_object_matched_by_this_criterion i =
|
||||
match_matrix.at i . at j
|
||||
select_matching_indices is_object_matched_by_this_criterion
|
||||
nested_indices.flat_map x->x . distinct
|
||||
False ->
|
||||
select_matching_indices is_object_matched_by_anything
|
||||
|
||||
result = selected_indices.map objects.at
|
||||
problems = if unmatched_criteria.is_empty then [] else
|
||||
[No_Matches_Found unmatched_criteria]
|
||||
on_problems.attach_problems_after result problems warnings
|
||||
|
@ -125,7 +125,7 @@ type Table
|
||||
> Example
|
||||
Select columns matching a regular expression.
|
||||
|
||||
table.select_columns (By_Name ["foo.+", "b.*"] (Matching.Regex case_senitivity=Case_Insensitive.new))
|
||||
table.select_columns (By_Name ["foo.+", "b.*"] (Regex_Matcher.new case_sensitive=Case_Insensitive.new))
|
||||
|
||||
> Example
|
||||
Select the first two columns and the last column, moving the last one to front.
|
||||
@ -179,7 +179,7 @@ type Table
|
||||
> Example
|
||||
Remove columns matching a regular expression.
|
||||
|
||||
table.remove_columns (By_Name ["foo.+", "b.*"] (Matching.Regex case_senitivity=Case_Insensitive.new))
|
||||
table.remove_columns (By_Name ["foo.+", "b.*"] (Regex_Matcher.new case_sensitive=Case_Insensitive.new))
|
||||
|
||||
> Example
|
||||
Remove the first two columns and the last column.
|
||||
@ -233,7 +233,7 @@ type Table
|
||||
> Example
|
||||
Move columns matching a regular expression to front, keeping columns matching "foo.+" before columns matching "b.*".
|
||||
|
||||
table.reorder_columns (By_Name ["foo.+", "b.*"] (Matching.Regex case_senitivity=Case_Insensitive.new))
|
||||
table.reorder_columns (By_Name ["foo.+", "b.*"] (Regex_Matcher.new case_sensitive=Case_Insensitive.new))
|
||||
|
||||
> Example
|
||||
Swap the first two columns.
|
||||
|
@ -1,17 +1,15 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table.Data.Matching import Matching_Strategy, Exact
|
||||
|
||||
## Specifies a selection of columns from the table and the new name for them to
|
||||
become.
|
||||
type Column_Mapping
|
||||
|
||||
## Selects columns based on their names.
|
||||
|
||||
The `matching_strategy` can be used to specify if the names should be
|
||||
matched exactly or should be treated as regular expressions. It also
|
||||
allows to specify if the matching should be case-sensitive.
|
||||
type By_Name (names : Map Text Text) (matching_strategy : Matching_Strategy = Exact True)
|
||||
The `matcher` can be used to specify if the names should be matched
|
||||
exactly or should be treated as regular expressions. It also allows to
|
||||
specify if the matching should be case-sensitive.
|
||||
type By_Name (names : Map Text Text) (matcher : Matcher = Text_Matcher.new)
|
||||
|
||||
## Selects columns by their index.
|
||||
|
||||
@ -36,5 +34,5 @@ type Column_Mapping
|
||||
|
||||
## UNSTABLE
|
||||
A temporary workaround to allow the By_Name constructor to work with default arguments.
|
||||
By_Name.new : Map Text Text -> Matching_Strategy -> By_Name
|
||||
By_Name.new names (matching_strategy = Exact.new) = By_Name names matching_strategy
|
||||
By_Name.new : Map Text Text -> Matcher -> By_Name
|
||||
By_Name.new names (matcher = Text_Matcher.new) = By_Name names matcher
|
||||
|
@ -1,17 +1,15 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table.Data.Matching import Matching_Strategy, Exact
|
||||
|
||||
## Specifies a selection of columns from the table on which an operation is
|
||||
going to be performed.
|
||||
type Column_Selector
|
||||
|
||||
## Selects columns based on their names.
|
||||
|
||||
The `matching_strategy` can be used to specify if the names should be
|
||||
matched exactly or should be treated as regular expressions. It also
|
||||
allows to specify if the matching should be case-sensitive.
|
||||
type By_Name (names : Vector Text) (matching_strategy : Matching_Strategy = Exact.new)
|
||||
The `matcher` can be used to specify if the names should be matched
|
||||
exactly or should be treated as regular expressions. It also allows to
|
||||
specify if the matching should be case-sensitive.
|
||||
type By_Name (names : Vector Text) (matcher : Matcher = Text_Matcher.new)
|
||||
|
||||
## Selects columns by their index.
|
||||
|
||||
@ -36,5 +34,5 @@ type Column_Selector
|
||||
Atom constructors, as described in the following issue:
|
||||
https://github.com/enso-org/enso/issues/1600
|
||||
Once that issue is fixed, it can be removed.
|
||||
By_Name.new : Vector Text -> Matching_Strategy -> By_Name
|
||||
By_Name.new names (matching_strategy = Exact.new) = By_Name names matching_strategy
|
||||
By_Name.new : Vector Text -> Matcher -> By_Name
|
||||
By_Name.new names (matcher = Text_Matcher.new) = By_Name names matcher
|
||||
|
@ -1,187 +0,0 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Data.Locale
|
||||
import Standard.Base.Data.Text.Regex as Regex_Module
|
||||
|
||||
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Report_Warning
|
||||
from Standard.Base.Error.Warnings import Warning_System
|
||||
|
||||
## Strategy for matching names.
|
||||
type Matching_Strategy
|
||||
## UNSTABLE
|
||||
Exact name matching.
|
||||
|
||||
A name is matched if its exact name is provided.
|
||||
type Exact (case_sensitivity : (True | Case_Insensitive) = True)
|
||||
|
||||
## UNSTABLE
|
||||
Regex-based name matching.
|
||||
|
||||
A name is matched if its name matches the provided regular expression.
|
||||
type Regex (case_sensitivity : (True | Case_Insensitive) = True)
|
||||
|
||||
## ADVANCED
|
||||
Compiles the regular expression following the Matching_Strategy rules.
|
||||
compile : Text -> Regex_Module.Pattern
|
||||
compile criterion =
|
||||
case this of
|
||||
Regex _ ->
|
||||
insensitive = case this.case_sensitivity of
|
||||
True -> False
|
||||
## TODO [RW] Currently locale is not supported in
|
||||
case-insensitive Regex matching. There are plans to
|
||||
revisit it:
|
||||
https://www.pivotaltracker.com/story/show/181313576
|
||||
Case_Insensitive _ -> True
|
||||
re = Regex_Module.compile criterion case_insensitive=insensitive
|
||||
re
|
||||
Exact _ -> Error.throw "Invalid Matching_Strategy to compile"
|
||||
|
||||
|
||||
## UNSTABLE
|
||||
A temporary workaround to allow the `Exact` constructor to work with default
|
||||
arguments.
|
||||
|
||||
It is needed, because there are issues with relying on default arguments of
|
||||
Atom constructors, as described in the following issue:
|
||||
https://github.com/enso-org/enso/issues/1600
|
||||
Once that issue is fixed, it can be removed.
|
||||
Exact.new : (True | Case_Insensitive) -> Exact
|
||||
Exact.new (case_sensitivity = True) = Exact case_sensitivity
|
||||
|
||||
|
||||
## UNSTABLE
|
||||
A temporary workaround to allow the `Regex` constructor to work with default
|
||||
arguments.
|
||||
|
||||
It is needed, because there are issues with relying on default arguments of
|
||||
Atom constructors, as described in the following issue:
|
||||
https://github.com/enso-org/enso/issues/1600
|
||||
Once that issue is fixed, it can be removed.
|
||||
Regex.new : (True | Case_Insensitive) -> Regex
|
||||
Regex.new (case_sensitivity = True) = Regex case_sensitivity
|
||||
|
||||
|
||||
## UNSTABLE
|
||||
An error indicating that some criteria did not match any names in the input.
|
||||
type No_Matches_Found (criteria : Vector Text)
|
||||
|
||||
No_Matches_Found.to_display_text : Text
|
||||
No_Matches_Found.to_display_text =
|
||||
"The criteria "+this.criteria.to_text+" did not match any names in the input."
|
||||
|
||||
|
||||
## UNSTABLE
|
||||
Selects objects from an input list that match any of the provided criteria.
|
||||
|
||||
Arguments:
|
||||
- objects: A list of objects to be matched.
|
||||
- criteria: A list of texts representing the matching criteria. Their meaning
|
||||
depends on the matching strategy.
|
||||
- reorder: Specifies whether to reorder the matched objects according to the
|
||||
order of the matching criteria.
|
||||
If `False`, the matched entries are returned in the same order as in the
|
||||
input.
|
||||
If `True`, the matched entries are returned in the order of the criteria
|
||||
matching them. If a single object has been matched by multiple criteria, it
|
||||
is placed in the group belonging to the first matching criterion on the
|
||||
list.
|
||||
If a single criterion's group has more than one element, their relative
|
||||
order is the same as in the input.
|
||||
- name_mapper: A function mapping a provided object to its name, which will
|
||||
then be matched with the criteria. It is set to the identity function by
|
||||
default, thus allowing the input to be a list of names to match. But it can
|
||||
be overridden to enable matching more complex objects.
|
||||
- matching_strategy: A `Matching_Strategy` instance specifying how to
|
||||
interpret the criterion.
|
||||
- on_problems: Specifies the behavior when a problem occurs during the
|
||||
function.
|
||||
By default, a warning is issued, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings.
|
||||
- warnings: A Warning_System instance specifying how to handle warnings. This
|
||||
is a temporary workaround to allow for testing the warning mechanism. Once
|
||||
the proper warning system is implemented, this argument will become
|
||||
obsolete and will be removed. No user code should use this argument, as it
|
||||
will be removed in the future.
|
||||
|
||||
> Example
|
||||
Selects objects matching one of the provided patterns, preserving the input order.
|
||||
|
||||
Matching.match_criteria ["foo", "foobar", "quux", "baz", "Foo"] [".*ba.*", "f.*"] matching_strategy=(Regex case_sensitivity=True) == ["foo", "foobar", "baz"]
|
||||
|
||||
> Example
|
||||
Selects pairs matching their first element with the provided criteria and
|
||||
ordering the result according to the order of criteria that matched them.
|
||||
|
||||
Matching.match_criteria [Pair "foo" 42, Pair "bar" 33, Pair "baz" 10, Pair "foo" 0, Pair 10 10] ["bar", "foo"] reorder=True name_mapper=_.name == [Pair "bar" 33, Pair "foo" 42, Pair "foo" 0]
|
||||
match_criteria : Vector Any -> Vector Text -> Boolean -> (Any -> Text) -> Matching_Strategy -> Problem_Behavior -> Warning_System -> Vector Any ! No_Matches_Found
|
||||
match_criteria objects criteria reorder=False name_mapper=(x->x) matching_strategy=(Exact case_sensitivity=True) on_problems=Report_Warning warnings=Warnings.default = Panic.recover <|
|
||||
[objects, criteria, reorder, name_mapper, matching_strategy, on_problems, warnings] . each Panic.rethrow
|
||||
|
||||
# match_matrix . at i . at j specifies whether objects.at i matches criteria.at j
|
||||
match_matrix = objects.map obj->
|
||||
criteria.map criterion->
|
||||
name = name_mapper obj
|
||||
here.match_single_criterion name criterion matching_strategy
|
||||
|
||||
# Checks if the ith object is matched by any criterion.
|
||||
is_object_matched_by_anything : Integer -> Boolean
|
||||
is_object_matched_by_anything i =
|
||||
match_matrix.at i . any x->x
|
||||
|
||||
# Checks if the ith criterion matches any columns.
|
||||
does_criterion_match_anything : Integer -> Boolean
|
||||
does_criterion_match_anything i =
|
||||
match_matrix.map (col -> col.at i) . any x->x
|
||||
|
||||
# Selects object indices which satisfy the provided predicate.
|
||||
select_matching_indices : (Integer -> Boolean) -> Vector Text
|
||||
select_matching_indices matcher =
|
||||
0.up_to objects.length . to_vector . filter matcher
|
||||
|
||||
# Check consistency
|
||||
checked_criteria = criteria.map_with_index j-> criterion->
|
||||
has_matches = does_criterion_match_anything j
|
||||
Pair has_matches criterion
|
||||
unmatched_criteria = checked_criteria.filter (p -> p.first.not) . map .second
|
||||
|
||||
selected_indices = case reorder of
|
||||
True ->
|
||||
nested_indices = 0.up_to criteria.length . map j->
|
||||
is_object_matched_by_this_criterion i =
|
||||
match_matrix.at i . at j
|
||||
select_matching_indices is_object_matched_by_this_criterion
|
||||
nested_indices.flat_map x->x . distinct
|
||||
False ->
|
||||
select_matching_indices is_object_matched_by_anything
|
||||
|
||||
result = selected_indices.map objects.at
|
||||
problems = if unmatched_criteria.is_empty then [] else
|
||||
[No_Matches_Found unmatched_criteria]
|
||||
on_problems.attach_problems_after result problems warnings
|
||||
|
||||
|
||||
## UNSTABLE
|
||||
Checks if a name matches the provided criterion according to the specified
|
||||
matching strategy.
|
||||
|
||||
Arguments:
|
||||
- name: A `Text` representing the name being matched.
|
||||
- criterion: A `Text` representing the matching criterion. It can be a simple
|
||||
name or a regular expression; its meaning depends on the value of
|
||||
`matching_strategy`.
|
||||
- matching_strategy: A `Matching_Strategy` instance specifying how the
|
||||
criterion should be interpreted.
|
||||
|
||||
> Example
|
||||
Check if the provided name matches a regular expression.
|
||||
|
||||
Matching.match_single_criterion "Foobar" "f.*" (Regex case_sensitivity=Case_Insensitive.new) == True
|
||||
match_single_criterion : Text -> Text -> Matching_Strategy -> Boolean
|
||||
match_single_criterion name criterion matching_strategy = case matching_strategy of
|
||||
Exact case_sensitivity ->
|
||||
case case_sensitivity of
|
||||
True -> name == criterion
|
||||
Case_Insensitive locale -> name.equals_ignore_case criterion locale=locale
|
||||
Regex _ ->
|
||||
matching_strategy.compile criterion . matches name
|
@ -279,7 +279,7 @@ type Table
|
||||
> Example
|
||||
Select columns matching a regular expression.
|
||||
|
||||
table.select_columns (By_Name ["foo.+", "b.*"] (Matching.Regex case_senitivity=Case_Insensitive.new))
|
||||
table.select_columns (By_Name ["foo.+", "b.*"] (Regex_Matcher.new case_sensitive=Case_Insensitive.new))
|
||||
|
||||
> Example
|
||||
Select the first two columns and the last column, moving the last one to front.
|
||||
@ -333,7 +333,7 @@ type Table
|
||||
> Example
|
||||
Remove columns matching a regular expression.
|
||||
|
||||
table.remove_columns (By_Name ["foo.+", "b.*"] (Matching.Regex case_senitivity=Case_Insensitive.new))
|
||||
table.remove_columns (By_Name ["foo.+", "b.*"] (Regex_Matcher.new case_sensitive=Case_Insensitive.new))
|
||||
|
||||
> Example
|
||||
Remove the first two columns and the last column.
|
||||
@ -387,7 +387,7 @@ type Table
|
||||
> Example
|
||||
Move columns matching a regular expression to front, keeping columns matching "foo.+" before columns matching "b.*".
|
||||
|
||||
table.reorder_columns (By_Name ["foo.+", "b.*"] (Matching.Regex case_senitivity=Case_Insensitive.new))
|
||||
table.reorder_columns (By_Name ["foo.+", "b.*"] (Regex_Matcher.new case_sensitive=Case_Insensitive.new))
|
||||
|
||||
> Example
|
||||
Swap the first two columns.
|
||||
|
@ -1,7 +1,7 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Base.Error.Warnings
|
||||
import Standard.Table.Data.Matching
|
||||
import Standard.Base.Data.Text.Matching
|
||||
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Name, By_Index, By_Column
|
||||
from Standard.Table.Data.Sort_Method as Sort_Method_Module import Sort_Method
|
||||
from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Problem_Behavior, Report_Warning
|
||||
@ -150,16 +150,16 @@ rename_columns internal_columns mapping on_problems warnings =
|
||||
matched = HashSet.new
|
||||
|
||||
mapper = name->
|
||||
index = 0.up_to good_names.length . find i->(Matching.match_single_criterion name ((good_names.at i).at 0) ms)
|
||||
index = 0.up_to good_names.length . find i->(ms.match_single_criterion name ((good_names.at i).at 0))
|
||||
case index of
|
||||
Nothing -> Nothing
|
||||
_ ->
|
||||
matched.add index
|
||||
new_name = case ms of
|
||||
Matching.Regex _ ->
|
||||
Regex_Matcher _ _ _ _ _ ->
|
||||
pattern = ms.compile ((good_names.at index).at 0)
|
||||
pattern.replace name ((good_names.at index).at 1)
|
||||
_ -> (good_names.at index).at 1
|
||||
Text_Matcher _ -> (good_names.at index).at 1
|
||||
unique.make_unique new_name
|
||||
|
||||
new_names = 0.up_to col_count . map i->(mapper (internal_columns.at i).name)
|
||||
@ -168,7 +168,7 @@ rename_columns internal_columns mapping on_problems warnings =
|
||||
Validation_Result new_names (validation.problems + if unused.is_empty then [] else [Missing_Input_Columns unused])
|
||||
|
||||
mapped = case mapping of
|
||||
Column_Mapping.By_Column vec -> name_mapper (vec.map r-> [r.at 0 . name, r.at 1]) (Matching.Exact case_sensitivity=True)
|
||||
Column_Mapping.By_Column vec -> name_mapper (vec.map r-> [r.at 0 . name, r.at 1]) (Text_Matcher.new case_sensitive=True)
|
||||
Column_Mapping.By_Name map ms -> name_mapper map.to_vector ms
|
||||
Column_Mapping.By_Index map ->
|
||||
validation = here.validate_indices col_count map.keys
|
||||
@ -251,11 +251,11 @@ sort_columns internal_columns sort_method =
|
||||
will be removed in the future.
|
||||
select_columns_helper : Vector -> Column_Selector -> Boolean -> Problem_Behavior -> Warnings.Warning_System -> Vector
|
||||
select_columns_helper internal_columns selector reorder on_problems warnings = case selector of
|
||||
By_Name names matching_strategy ->
|
||||
By_Name names matcher ->
|
||||
validation = here.validate_unique names v->[Duplicate_Column_Selectors v]
|
||||
on_problems.attach_problems_before validation.problems warnings <|
|
||||
Warnings.map_warnings_and_errors here.promote_no_matches_to_missing_columns warnings warnings->
|
||||
Matching.match_criteria internal_columns validation.valid reorder=reorder name_mapper=(_.name) matching_strategy=matching_strategy on_problems=on_problems warnings=warnings
|
||||
matcher.match_criteria internal_columns validation.valid reorder=reorder name_mapper=(_.name) on_problems=on_problems warnings=warnings
|
||||
By_Index indices ->
|
||||
validation = here.validate_indices internal_columns.length indices
|
||||
good_indices = validation.valid.map p->(p.at 0)
|
||||
@ -267,7 +267,7 @@ select_columns_helper internal_columns selector reorder on_problems warnings = c
|
||||
here.select_indices_preserving_order internal_columns good_indices
|
||||
By_Column columns ->
|
||||
column_names = columns.map .name
|
||||
new_selector = By_Name column_names (Matching.Exact case_sensitivity=True)
|
||||
new_selector = By_Name column_names (Text_Matcher.new case_sensitive=True)
|
||||
here.select_columns internal_columns new_selector reorder=reorder on_problems=on_problems warnings=warnings
|
||||
|
||||
## PRIVATE
|
||||
|
@ -548,6 +548,6 @@ run_spec ~behavior =
|
||||
case ex of
|
||||
Failure _ -> ex
|
||||
Finished_With_Error err stack_trace_text ->
|
||||
Failure ("An unexpected error was returned: " + err.to_display_text + '\n' + stack_trace_text)
|
||||
_ -> Failure ("An unexpected panic was thrown: " + ex.to_display_text + '\n' + maybeExc.get_stack_trace_text)
|
||||
Failure ("An unexpected error was returned: " + err.to_text + '\n' + stack_trace_text)
|
||||
_ -> Failure ("An unexpected panic was thrown: " + ex.to_text + '\n' + maybeExc.get_stack_trace_text)
|
||||
result
|
||||
|
@ -4,7 +4,6 @@ import Standard.Test.Problems
|
||||
|
||||
import Standard.Base.Error.Problem_Behavior
|
||||
import Standard.Base.Error.Warnings
|
||||
import Standard.Table.Data.Matching
|
||||
import Standard.Table.Data.Column_Mapping
|
||||
from Standard.Table.Error as Table_Errors import all
|
||||
from Standard.Table.Data.Column_Selector as Column_Selector_Module import all
|
||||
@ -43,7 +42,7 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
Test.group prefix+"Table.select_columns" <|
|
||||
Test.specify "should work as shown in the doc examples" <|
|
||||
expect_column_names ["foo", "bar"] <| table.select_columns (By_Name.new ["bar", "foo"])
|
||||
expect_column_names ["bar", "Baz", "foo_1", "foo_2"] <| table.select_columns (By_Name ["foo.+", "b.*"] (Matching.Regex Case_Insensitive.new))
|
||||
expect_column_names ["bar", "Baz", "foo_1", "foo_2"] <| table.select_columns (By_Name ["foo.+", "b.*"] (Regex_Matcher.new case_sensitive=Case_Insensitive.new))
|
||||
expect_column_names ["abcd123", "foo", "bar"] <| table.select_columns (By_Index [-1, 0, 1]) reorder=True
|
||||
|
||||
column1 = table.at "foo_1"
|
||||
@ -57,11 +56,11 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
table_2 . at "foo" . to_vector . should_equal [1,2,3]
|
||||
|
||||
Test.specify "should correctly handle regex matching" <|
|
||||
expect_column_names ["foo"] <| table.select_columns (By_Name ["foo"] Matching.Regex.new)
|
||||
expect_column_names ["ab.+123", "abcd123"] <| table.select_columns (By_Name ["a.*"] Matching.Regex.new)
|
||||
expect_column_names ["ab.+123", "abcd123"] <| table.select_columns (By_Name ["ab.+123"] Matching.Regex.new)
|
||||
expect_column_names ["foo"] <| table.select_columns (By_Name ["foo"] Regex_Matcher.new)
|
||||
expect_column_names ["ab.+123", "abcd123"] <| table.select_columns (By_Name ["a.*"] Regex_Matcher.new)
|
||||
expect_column_names ["ab.+123", "abcd123"] <| table.select_columns (By_Name ["ab.+123"] Regex_Matcher.new)
|
||||
expect_column_names ["ab.+123"] <| table.select_columns (By_Name.new ["ab.+123"])
|
||||
expect_column_names ["abcd123"] <| table.select_columns (By_Name ["abcd123"] Matching.Regex.new)
|
||||
expect_column_names ["abcd123"] <| table.select_columns (By_Name ["abcd123"] Regex_Matcher.new)
|
||||
|
||||
Test.specify "should allow negative indices" <|
|
||||
expect_column_names ["foo", "bar", "foo_2"] <| table.select_columns (By_Index [-3, 0, 1])
|
||||
@ -73,11 +72,11 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
col2 = ["bar", Integer, [4,5,6]]
|
||||
col3 = ["Bar", Integer, [7,8,9]]
|
||||
table_builder [col1, col2, col3]
|
||||
expect_column_names ["bar", "Bar"] <| table.select_columns (By_Name ["bar"] (Matching.Exact Case_Insensitive.new))
|
||||
expect_column_names ["bar", "Bar"] <| table.select_columns (By_Name ["bar"] (Text_Matcher Case_Insensitive.new))
|
||||
|
||||
Test.specify "should correctly handle regexes matching multiple names" <|
|
||||
expect_column_names ["foo", "bar", "foo_1", "foo_2"] <| table.select_columns (By_Name ["b.*", "f.+"] Matching.Regex.new)
|
||||
expect_column_names ["bar", "foo", "foo_1", "foo_2"] <| table.select_columns (By_Name ["b.*", "f.+"] Matching.Regex.new) reorder=True
|
||||
expect_column_names ["foo", "bar", "foo_1", "foo_2"] <| table.select_columns (By_Name ["b.*", "f.+"] Regex_Matcher.new)
|
||||
expect_column_names ["bar", "foo", "foo_1", "foo_2"] <| table.select_columns (By_Name ["b.*", "f.+"] Regex_Matcher.new) reorder=True
|
||||
|
||||
Test.specify "should correctly handle problems: out of bounds indices" <|
|
||||
selector = By_Index [1, 0, 100, -200, 300]
|
||||
@ -157,7 +156,7 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
Test.group prefix+"Table.remove_columns" <|
|
||||
Test.specify "should work as shown in the doc examples" <|
|
||||
expect_column_names ["Baz", "foo_1", "foo_2", "ab.+123", "abcd123"] <| table.remove_columns (By_Name.new ["bar", "foo"])
|
||||
expect_column_names ["foo", "ab.+123", "abcd123"] <| table.remove_columns (By_Name ["foo.+", "b.*"] (Matching.Regex Case_Insensitive.new))
|
||||
expect_column_names ["foo", "ab.+123", "abcd123"] <| table.remove_columns (By_Name ["foo.+", "b.*"] (Regex_Matcher.new case_sensitive=Case_Insensitive.new))
|
||||
expect_column_names ["Baz", "foo_1", "foo_2", "ab.+123"] <| table.remove_columns (By_Index [-1, 0, 1])
|
||||
|
||||
column1 = table.at "foo_1"
|
||||
@ -166,12 +165,12 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
|
||||
Test.specify "should correctly handle regex matching" <|
|
||||
last_ones = table.columns.tail.map .name
|
||||
expect_column_names last_ones <| table.remove_columns (By_Name ["foo"] Matching.Regex.new)
|
||||
expect_column_names last_ones <| table.remove_columns (By_Name ["foo"] Regex_Matcher.new)
|
||||
first_ones = ["foo", "bar", "Baz", "foo_1", "foo_2"]
|
||||
expect_column_names first_ones <| table.remove_columns (By_Name ["a.*"] Matching.Regex.new)
|
||||
expect_column_names first_ones <| table.remove_columns (By_Name ["ab.+123"] Matching.Regex.new)
|
||||
expect_column_names first_ones <| table.remove_columns (By_Name ["a.*"] Regex_Matcher.new)
|
||||
expect_column_names first_ones <| table.remove_columns (By_Name ["ab.+123"] Regex_Matcher.new)
|
||||
expect_column_names first_ones+["abcd123"] <| table.remove_columns (By_Name.new ["ab.+123"])
|
||||
expect_column_names first_ones+["ab.+123"] <| table.remove_columns (By_Name ["abcd123"] Matching.Regex.new)
|
||||
expect_column_names first_ones+["ab.+123"] <| table.remove_columns (By_Name ["abcd123"] Regex_Matcher.new)
|
||||
|
||||
Test.specify "should allow negative indices" <|
|
||||
expect_column_names ["Baz", "foo_1", "ab.+123"] <| table.remove_columns (By_Index [-1, -3, 0, 1])
|
||||
@ -183,10 +182,10 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
col2 = ["bar", Integer, [4,5,6]]
|
||||
col3 = ["Bar", Integer, [7,8,9]]
|
||||
table_builder [col1, col2, col3]
|
||||
expect_column_names ["foo"] <| table.remove_columns (By_Name ["bar"] (Matching.Exact Case_Insensitive.new))
|
||||
expect_column_names ["foo"] <| table.remove_columns (By_Name ["bar"] (Text_Matcher Case_Insensitive.new))
|
||||
|
||||
Test.specify "should correctly handle regexes matching multiple names" <|
|
||||
expect_column_names ["Baz", "ab.+123", "abcd123"] <| table.remove_columns (By_Name ["b.*", "f.+"] Matching.Regex.new)
|
||||
expect_column_names ["Baz", "ab.+123", "abcd123"] <| table.remove_columns (By_Name ["b.*", "f.+"] Regex_Matcher.new)
|
||||
|
||||
Test.specify "should correctly handle problems: out of bounds indices" <|
|
||||
selector = By_Index [1, 0, 100, -200, 300]
|
||||
@ -245,14 +244,14 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
Test.specify "should correctly handle problems: no columns in the output" <|
|
||||
selector = By_Name [".*"] Matching.Regex.new
|
||||
selector = By_Name [".*"] Regex_Matcher.new
|
||||
action = table.remove_columns selector warnings=_ on_problems=_
|
||||
tester = expect_column_names []
|
||||
problems = [No_Output_Columns]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
Test.specify "should correctly handle multiple problems" <|
|
||||
selector = By_Name [".*", "hmmm"] Matching.Regex.new
|
||||
selector = By_Name [".*", "hmmm"] Regex_Matcher.new
|
||||
action = table.remove_columns selector warnings=_ on_problems=_
|
||||
tester = expect_column_names []
|
||||
problems = [Missing_Input_Columns ["hmmm"], No_Output_Columns]
|
||||
@ -266,7 +265,7 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
Test.group prefix+"Table.reorder_columns" <|
|
||||
Test.specify "should work as shown in the doc examples" <|
|
||||
expect_column_names ["bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns (By_Name.new ["foo"]) position=After_Other_Columns
|
||||
expect_column_names ["foo_1", "foo_2", "bar", "Baz", "foo", "ab.+123", "abcd123"] <| table.reorder_columns (By_Name ["foo.+", "b.*"] (Matching.Regex Case_Insensitive.new))
|
||||
expect_column_names ["foo_1", "foo_2", "bar", "Baz", "foo", "ab.+123", "abcd123"] <| table.reorder_columns (By_Name ["foo.+", "b.*"] (Regex_Matcher.new case_sensitive=Case_Insensitive.new))
|
||||
expect_column_names ["bar", "foo", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123"] <| table.reorder_columns (By_Index [1, 0]) position=Before_Other_Columns
|
||||
expect_column_names ["bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns (By_Index [0]) position=After_Other_Columns
|
||||
|
||||
@ -275,12 +274,12 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
expect_column_names ["foo_1", "Baz", "foo", "bar", "foo_2", "ab.+123", "abcd123"] <| table.reorder_columns (By_Column [column1, column2])
|
||||
|
||||
Test.specify "should correctly handle regex matching" <|
|
||||
expect_column_names ["bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns (By_Name ["foo"] Matching.Regex.new) position=After_Other_Columns
|
||||
expect_column_names ["bar", "Baz", "foo_1", "foo_2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns (By_Name ["foo"] Regex_Matcher.new) position=After_Other_Columns
|
||||
rest = ["foo", "bar", "Baz", "foo_1", "foo_2"]
|
||||
expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns (By_Name ["a.*"] Matching.Regex.new)
|
||||
expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns (By_Name ["ab.+123"] Matching.Regex.new)
|
||||
expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns (By_Name ["a.*"] Regex_Matcher.new)
|
||||
expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns (By_Name ["ab.+123"] Regex_Matcher.new)
|
||||
expect_column_names ["ab.+123"]+rest+["abcd123"] <| table.reorder_columns (By_Name.new ["ab.+123"])
|
||||
expect_column_names ["abcd123"]+rest+["ab.+123"] <| table.reorder_columns (By_Name ["abcd123"] Matching.Regex.new)
|
||||
expect_column_names ["abcd123"]+rest+["ab.+123"] <| table.reorder_columns (By_Name ["abcd123"] Regex_Matcher.new)
|
||||
|
||||
Test.specify "should allow negative indices" <|
|
||||
expect_column_names ["abcd123", "foo_2", "foo", "bar", "Baz", "foo_1", "ab.+123"] <| table.reorder_columns (By_Index [-1, -3, 0, 1])
|
||||
@ -292,10 +291,10 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
col2 = ["bar", Integer, [4,5,6]]
|
||||
col3 = ["Bar", Integer, [7,8,9]]
|
||||
table_builder [col1, col2, col3]
|
||||
expect_column_names ["bar", "Bar", "foo"] <| table.reorder_columns (By_Name ["bar"] (Matching.Exact Case_Insensitive.new))
|
||||
expect_column_names ["bar", "Bar", "foo"] <| table.reorder_columns (By_Name ["bar"] (Text_Matcher Case_Insensitive.new))
|
||||
|
||||
Test.specify "should correctly handle regexes matching multiple names" <|
|
||||
expect_column_names ["bar", "foo", "foo_1", "foo_2", "Baz", "ab.+123", "abcd123"] <| table.reorder_columns (By_Name ["b.*", "f.+"] Matching.Regex.new)
|
||||
expect_column_names ["bar", "foo", "foo_1", "foo_2", "Baz", "ab.+123", "abcd123"] <| table.reorder_columns (By_Name ["b.*", "f.+"] Regex_Matcher.new)
|
||||
|
||||
Test.specify "should correctly handle problems: out of bounds indices" <|
|
||||
selector = By_Index [1, 0, 100, -200, 300]
|
||||
@ -412,22 +411,22 @@ spec prefix table_builder supports_case_sensitive_columns =
|
||||
Test.specify "should work by name" <|
|
||||
map = Map.from_vector [["alpha", "FirstColumn"], ["delta", "Another"]]
|
||||
expect_column_names ["FirstColumn", "beta", "gamma", "Another"] <|
|
||||
table.rename_columns (Column_Mapping.By_Name map (Matching.Exact True))
|
||||
table.rename_columns (Column_Mapping.By_Name map (Text_Matcher True))
|
||||
|
||||
Test.specify "should work by name case insensitively" <|
|
||||
map = Map.from_vector [["ALPHA", "FirstColumn"], ["DELTA", "Another"]]
|
||||
expect_column_names ["FirstColumn", "beta", "gamma", "Another"] <|
|
||||
table.rename_columns (Column_Mapping.By_Name map (Matching.Exact Case_Insensitive.new))
|
||||
table.rename_columns (Column_Mapping.By_Name map (Text_Matcher Case_Insensitive.new))
|
||||
|
||||
Test.specify "should work by name using regex" <|
|
||||
map = Map.from_vector [["a.*", "FirstColumn"]]
|
||||
expect_column_names ["FirstColumn", "beta", "gamma", "delta"] <|
|
||||
table.rename_columns (Column_Mapping.By_Name map (Matching.Regex.new))
|
||||
table.rename_columns (Column_Mapping.By_Name map (Regex_Matcher.new))
|
||||
|
||||
Test.specify "should work by name using regex substitution" <|
|
||||
map = Map.from_vector [["a(.*)", "$1"]]
|
||||
expect_column_names ["lpha", "beta", "gamma", "delta"] <|
|
||||
table.rename_columns (Column_Mapping.By_Name map (Matching.Regex.new))
|
||||
table.rename_columns (Column_Mapping.By_Name map (Regex_Matcher.new))
|
||||
|
||||
Test.specify "should work by column" <|
|
||||
vec = [[table.at "alpha", "FirstColumn"], [table.at "delta", "Another"]]
|
||||
|
@ -3,7 +3,6 @@ from Standard.Base import all
|
||||
import Standard.Test
|
||||
|
||||
import project.Database_Spec
|
||||
import project.Matching_Spec
|
||||
import project.Model_Spec
|
||||
import project.Column_Spec
|
||||
import project.Csv_Spec
|
||||
@ -16,7 +15,6 @@ main = Test.Suite.run_main <|
|
||||
Csv_Spec.spec
|
||||
Json_Spec.spec
|
||||
Spreadsheet_Spec.spec
|
||||
Matching_Spec.spec
|
||||
Table_Spec.spec
|
||||
Database_Spec.sqlite_spec
|
||||
Model_Spec.spec
|
||||
|
@ -1,85 +0,0 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table.Data.Matching import all
|
||||
from Standard.Table.Error as Error_Module import all
|
||||
import Standard.Base.Error.Problem_Behavior
|
||||
import Standard.Base.Error.Warnings
|
||||
import Standard.Test
|
||||
import Standard.Test.Problems
|
||||
|
||||
type Foo_Error
|
||||
|
||||
spec = Test.group 'Matching Helper' <|
|
||||
## These are workarounds to #1600 - default arguments do not work properly
|
||||
on Atom constructors.
|
||||
|
||||
Once this is fixed, the tests should be updated accordingly.
|
||||
exact = Exact case_sensitivity=True
|
||||
regex = Regex case_sensitivity=True
|
||||
Test.specify 'should match a single name with a single exact criterion' <|
|
||||
Matching.match_single_criterion "foo" "foo" exact . should_be_true
|
||||
Matching.match_single_criterion "foobar" "foo" exact . should_be_false
|
||||
Matching.match_single_criterion "foo" "f.*" exact . should_be_false
|
||||
Matching.match_single_criterion "foo" "Foo" exact . should_be_false
|
||||
|
||||
Test.specify 'should correctly handle Unicode folding with exact matching' <|
|
||||
Matching.match_single_criterion '\u00E9' '\u0065\u{301}' exact . should_be_true
|
||||
Matching.match_single_criterion 'é' '\u00E9' exact . should_be_true
|
||||
Matching.match_single_criterion 'é' 'ę' exact . should_be_false
|
||||
|
||||
Test.specify 'should match a single name with a single regex criterion' <|
|
||||
Matching.match_single_criterion "foo" "foo" regex . should_be_true
|
||||
Matching.match_single_criterion "foobar" "foo" regex . should_be_false
|
||||
Matching.match_single_criterion "foo" "f.*" regex . should_be_true
|
||||
Matching.match_single_criterion "foo" "foo.*" regex . should_be_true
|
||||
Matching.match_single_criterion "foo" "F.*" regex . should_be_false
|
||||
|
||||
Test.specify 'should support case-insensitive matching' <|
|
||||
Matching.match_single_criterion "foo" "F.*" (Regex case_sensitivity=Case_Insensitive.new) . should_be_true
|
||||
Matching.match_single_criterion "foO" "FOo" (Exact case_sensitivity=Case_Insensitive.new) . should_be_true
|
||||
|
||||
Matching.match_single_criterion "foo" "fF.*" (Regex case_sensitivity=Case_Insensitive.new) . should_be_false
|
||||
Matching.match_single_criterion "foo" "Foos" (Exact case_sensitivity=Case_Insensitive.new) . should_be_false
|
||||
|
||||
## TODO this may not be how we want this to work, but this test is
|
||||
included to explicitly illustrate how the current implementation
|
||||
behaves in such corner cases
|
||||
Matching.match_single_criterion "β" "B" (Exact case_sensitivity=Case_Insensitive.new) . should_be_false
|
||||
|
||||
Test.specify 'should match a list of names with a list of criteria, correctly handling reordering' <|
|
||||
Matching.match_criteria ["foo", "bar", "baz"] ["baz", "foo"] reorder=True . should_equal ["baz", "foo"]
|
||||
Matching.match_criteria ["foo", "bar", "baz"] ["baz", "foo"] reorder=False . should_equal ["foo", "baz"]
|
||||
|
||||
Test.specify 'should allow multiple matches to a single criterion (Regex)' <|
|
||||
Matching.match_criteria ["foo", "bar", "baz", "quux"] ["b.*"] reorder=True matching_strategy=regex . should_equal ["bar", "baz"]
|
||||
Matching.match_criteria ["foo", "bar", "baz", "quux"] ["b.*", "foo"] reorder=False matching_strategy=regex . should_equal ["foo", "bar", "baz"]
|
||||
|
||||
Test.specify 'should include the object only with the first criterion that matched it, avoiding duplication' <|
|
||||
Matching.match_criteria ["foo", "bar", "baz", "zap"] [".*z.*", "b.*"] reorder=True matching_strategy=regex . should_equal ["baz", "zap", "bar"]
|
||||
Matching.match_criteria ["foo", "bar", "baz", "zap"] [".*z.*", "b.*"] reorder=False matching_strategy=regex . should_equal ["bar", "baz", "zap"]
|
||||
|
||||
Test.specify 'should correctly handle criteria which did not match anything' <|
|
||||
action = Matching.match_criteria ["foo", "bar", "baz"] ["baz", "unknown_column"] reorder=True warnings=_ on_problems=_
|
||||
tester = _.should_equal ["baz"]
|
||||
problems = [No_Matches_Found ["unknown_column"]]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
action_2 = Matching.match_criteria ["foo", "bar", "baz"] ["baz", "unknown_column_1", "unknown_column_2"] reorder=False warnings=_ on_problems=_
|
||||
problems_2 = [No_Matches_Found ["unknown_column_1", "unknown_column_2"]]
|
||||
Problems.test_problem_handling action_2 problems_2 tester
|
||||
|
||||
Test.specify 'should correctly work with complex object using a function extracting their names' <|
|
||||
pairs = [Pair "foo" 42, Pair "bar" 33, Pair "baz" 10, Pair "foo" 0, Pair 10 10]
|
||||
selected = [Pair "bar" 33, Pair "foo" 42, Pair "foo" 0]
|
||||
Matching.match_criteria pairs ["bar", "foo"] reorder=True name_mapper=_.first . should_equal selected
|
||||
|
||||
Matching.match_criteria [1, 2, 3] ["2"] name_mapper=_.to_text . should_equal [2]
|
||||
|
||||
Test.specify 'should correctly forward errors' <|
|
||||
Matching.match_criteria (Error.throw Foo_Error) [] . should_fail_with Foo_Error
|
||||
Matching.match_criteria [] (Error.throw Foo_Error) . should_fail_with Foo_Error
|
||||
Matching.match_criteria [] [] (Error.throw Foo_Error) . should_fail_with Foo_Error
|
||||
Matching.match_criteria [1, 2, 3] ["2"] name_mapper=(x-> if x == 3 then Error.throw Foo_Error else x.to_text) . should_fail_with Foo_Error
|
||||
Matching.match_criteria ["a"] ["a"] name_mapper=_.nonexistent_function . should_fail_with No_Such_Method_Error
|
||||
|
||||
main = Test.Suite.run_main here.spec
|
78
test/Tests/src/Data/Text/Matching_Spec.enso
Normal file
78
test/Tests/src/Data/Text/Matching_Spec.enso
Normal file
@ -0,0 +1,78 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table.Error as Error_Module import all
|
||||
import Standard.Base.Error.Problem_Behavior
|
||||
import Standard.Base.Error.Warnings
|
||||
import Standard.Base.Data.Text.Matching
|
||||
import Standard.Test
|
||||
import Standard.Test.Problems
|
||||
|
||||
type Foo_Error
|
||||
|
||||
spec = Test.group 'Matching Helper' <|
|
||||
Test.specify 'should match a single name with a single Text_Matcher criterion' <|
|
||||
Text_Matcher.new.match_single_criterion "foo" "foo" . should_be_true
|
||||
Text_Matcher.new.match_single_criterion "foobar" "foo" . should_be_false
|
||||
Text_Matcher.new.match_single_criterion "foo" "f.*" . should_be_false
|
||||
Text_Matcher.new.match_single_criterion "foo" "Foo" . should_be_false
|
||||
|
||||
Test.specify 'should correctly handle Unicode folding with Text_Matcher matching' <|
|
||||
Text_Matcher.new.match_single_criterion '\u00E9' '\u0065\u{301}' . should_be_true
|
||||
Text_Matcher.new.match_single_criterion 'é' '\u00E9' . should_be_true
|
||||
Text_Matcher.new.match_single_criterion 'é' 'ę' . should_be_false
|
||||
|
||||
Test.specify 'should match a single name with a single regex criterion' <|
|
||||
Regex_Matcher.new.match_single_criterion "foo" "foo" . should_be_true
|
||||
Regex_Matcher.new.match_single_criterion "foobar" "foo" . should_be_false
|
||||
Regex_Matcher.new.match_single_criterion "foo" "f.*" . should_be_true
|
||||
Regex_Matcher.new.match_single_criterion "foo" "foo.*" . should_be_true
|
||||
Regex_Matcher.new.match_single_criterion "foo" "F.*" . should_be_false
|
||||
|
||||
Test.specify 'should support case-insensitive matching' <|
|
||||
(Regex_Matcher.new case_sensitive=Case_Insensitive.new).match_single_criterion "foo" "F.*" . should_be_true
|
||||
(Text_Matcher.new case_sensitive=Case_Insensitive.new).match_single_criterion "foO" "FOo" . should_be_true
|
||||
|
||||
(Regex_Matcher.new case_sensitive=Case_Insensitive.new).match_single_criterion "foo" "fF.*" . should_be_false
|
||||
(Text_Matcher.new case_sensitive=Case_Insensitive.new).match_single_criterion "foo" "Foos" . should_be_false
|
||||
|
||||
# Small beta is equal to capital 'beta' which looks the same as capital 'b' but is a different symbol.
|
||||
(Text_Matcher.new case_sensitive=Case_Insensitive.new).match_single_criterion "β" "Β" . should_be_true
|
||||
(Text_Matcher.new case_sensitive=Case_Insensitive.new).match_single_criterion "β" "B" . should_be_false
|
||||
|
||||
Test.specify 'should match a list of names with a list of criteria, correctly handling reordering' <|
|
||||
Text_Matcher.new.match_criteria ["foo", "bar", "baz"] ["baz", "foo"] reorder=True . should_equal ["baz", "foo"]
|
||||
Text_Matcher.new.match_criteria ["foo", "bar", "baz"] ["baz", "foo"] reorder=False . should_equal ["foo", "baz"]
|
||||
|
||||
Test.specify 'should allow multiple matches to a single criterion (Regex)' <|
|
||||
Regex_Matcher.new.match_criteria ["foo", "bar", "baz", "quux"] ["b.*"] reorder=True . should_equal ["bar", "baz"]
|
||||
Regex_Matcher.new.match_criteria ["foo", "bar", "baz", "quux"] ["b.*", "foo"] reorder=False . should_equal ["foo", "bar", "baz"]
|
||||
|
||||
Test.specify 'should include the object only with the first criterion that matched it, avoiding duplication' <|
|
||||
Regex_Matcher.new.match_criteria ["foo", "bar", "baz", "zap"] [".*z.*", "b.*"] reorder=True . should_equal ["baz", "zap", "bar"]
|
||||
Regex_Matcher.new.match_criteria ["foo", "bar", "baz", "zap"] [".*z.*", "b.*"] reorder=False . should_equal ["bar", "baz", "zap"]
|
||||
|
||||
Test.specify 'should correctly handle criteria which did not match anything' <|
|
||||
action = Text_Matcher.new.match_criteria ["foo", "bar", "baz"] ["baz", "unknown_column"] reorder=True warnings=_ on_problems=_
|
||||
tester = _.should_equal ["baz"]
|
||||
problems = [Matching.No_Matches_Found ["unknown_column"]]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
action_2 = Text_Matcher.new.match_criteria ["foo", "bar", "baz"] ["baz", "unknown_column_1", "unknown_column_2"] reorder=False warnings=_ on_problems=_
|
||||
problems_2 = [Matching.No_Matches_Found ["unknown_column_1", "unknown_column_2"]]
|
||||
Problems.test_problem_handling action_2 problems_2 tester
|
||||
|
||||
Test.specify 'should correctly work with complex object using a function extracting their names' <|
|
||||
pairs = [Pair "foo" 42, Pair "bar" 33, Pair "baz" 10, Pair "foo" 0, Pair 10 10]
|
||||
selected = [Pair "bar" 33, Pair "foo" 42, Pair "foo" 0]
|
||||
Text_Matcher.new.match_criteria pairs ["bar", "foo"] reorder=True name_mapper=_.first . should_equal selected
|
||||
|
||||
Text_Matcher.new.match_criteria [1, 2, 3] ["2"] name_mapper=_.to_text . should_equal [2]
|
||||
|
||||
Test.specify 'should correctly forward errors' <|
|
||||
Text_Matcher.new.match_criteria (Error.throw Foo_Error) [] . should_fail_with Foo_Error
|
||||
Text_Matcher.new.match_criteria [] (Error.throw Foo_Error) . should_fail_with Foo_Error
|
||||
(Error.throw Foo_Error).match_criteria [] [] . should_fail_with Foo_Error
|
||||
Text_Matcher.new.match_criteria [1, 2, 3] ["2"] name_mapper=(x-> if x == 3 then Error.throw Foo_Error else x.to_text) . should_fail_with Foo_Error
|
||||
Text_Matcher.new.match_criteria ["a"] ["a"] name_mapper=_.nonexistent_function . should_fail_with No_Such_Method_Error
|
||||
|
||||
main = Test.Suite.run_main here.spec
|
@ -34,6 +34,7 @@ import project.Data.Time.Spec as Time_Spec
|
||||
import project.Data.Vector_Spec
|
||||
import project.Data.Text.Regex_Spec
|
||||
import project.Data.Text.Default_Regex_Engine_Spec
|
||||
import project.Data.Text.Matching_Spec
|
||||
import project.Data.Text.Span_Spec
|
||||
|
||||
import project.Network.Http.Header_Spec as Http_Header_Spec
|
||||
@ -81,6 +82,7 @@ main = Test.Suite.run_main <|
|
||||
Range_Spec.spec
|
||||
Default_Regex_Engine_Spec.spec
|
||||
Regex_Spec.spec
|
||||
Matching_Spec.spec
|
||||
Runtime_Spec.spec
|
||||
Span_Spec.spec
|
||||
Stack_Traces_Spec.spec
|
||||
|
Loading…
Reference in New Issue
Block a user