From aaa235fbad8b7b2db3d4bc8d446ab9bd9c42be63 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 14 Jul 2023 17:30:52 +0000 Subject: [PATCH] Add drop down for replace, remove Column_Selector (#7295) - Add dropdowns for `replace` functions. - Retire `Column_Selector` type. - Add `select_blank_columns` and `remove_blank_columns` functions to table types. - Allow Regex to be used to pick columns. --- CHANGELOG.md | 2 + .../src/Data/Text/Case_Sensitivity.enso | 10 - .../0.0.0-dev/src/Data/Text/Extensions.enso | 13 +- .../Base/0.0.0-dev/src/Data/Text/Regex.enso | 14 +- .../0.0.0-dev/src/Data/Time/Date_Period.enso | 4 +- .../0.0.0-dev/src/Data/Time/Time_Period.enso | 2 +- .../Base/0.0.0-dev/src/Widget_Helpers.enso | 6 + .../Database/0.0.0-dev/src/Data/Column.enso | 6 +- .../Database/0.0.0-dev/src/Data/Dialect.enso | 4 +- .../Database/0.0.0-dev/src/Data/Table.enso | 162 +++++++++---- .../src/Extensions/Upload_Database_Table.enso | 1 - .../Extensions/Upload_In_Memory_Table.enso | 1 - .../src/Internal/Base_Generator.enso | 4 +- .../0.0.0-dev/src/Internal/Upload_Table.enso | 2 +- .../src/Data_Science/Preparation.enso | 2 +- .../0.0.0-dev/src/Data/Aggregate_Column.enso | 3 +- .../Table/0.0.0-dev/src/Data/Column.enso | 4 +- .../0.0.0-dev/src/Data/Column_Selector.enso | 32 --- .../Table/0.0.0-dev/src/Data/Table.enso | 158 +++++++++---- .../src/Internal/Add_Row_Number.enso | 5 +- .../src/Internal/Aggregate_Column_Helper.enso | 5 +- .../0.0.0-dev/src/Internal/Table_Helpers.enso | 221 +++++++++--------- .../Standard/Table/0.0.0-dev/src/Main.enso | 2 - .../Missing_Values_Spec.enso | 60 ++--- .../Select_Columns_Spec.enso | 81 ++++--- .../src/Formatting/Parse_Values_Spec.enso | 13 +- .../Table_Tests/src/In_Memory/Table_Spec.enso | 10 +- 27 files changed, 458 insertions(+), 369 deletions(-) delete mode 100644 distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column_Selector.enso diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c09e0e1656..50f80354f77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -520,6 +520,7 @@ - [Improving date/time support in Table - added `date_diff`, `date_add`, `date_part` and some shorthands. Extended `Time_Period` with milli-, micro- and nanosecond periods.][7221] +- [Retire `Column_Selector` and allow regex based selection of columns.][7295] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -749,6 +750,7 @@ [7223]: https://github.com/enso-org/enso/pull/7223 [7234]: https://github.com/enso-org/enso/pull/7234 [7221]: https://github.com/enso-org/enso/pull/7221 +[7295]: https://github.com/enso-org/enso/pull/7295 #### Enso Compiler diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso index 0650dd5b61d..5460af17668 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Case_Sensitivity.enso @@ -60,13 +60,3 @@ type Case_Sensitivity to_explicit_sensitivity_in_memory self = case self of Case_Sensitivity.Default -> Case_Sensitivity.Sensitive _ -> self - - ## PRIVATE - Create matcher function - create_match_function : Boolean -> (Text -> Text -> Boolean) - create_match_function self use_regex=False = case use_regex of - True -> (name-> pattern-> Regex.compile pattern case_insensitive=self.is_case_insensitive_in_memory . matches name) - False -> case self of - Case_Sensitivity.Default -> (==) - Case_Sensitivity.Sensitive -> (==) - Case_Sensitivity.Insensitive locale -> (name-> criterion-> name.equals_ignore_case criterion locale) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso index 8c3ad2a584a..b3f689878ef 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Extensions.enso @@ -35,7 +35,7 @@ from project.Data.Boolean import Boolean, False, True from project.Data.Json import Invalid_JSON, JS_Object, Json from project.Data.Numbers import Decimal, Integer, Number, Number_Parse_Error from project.Data.Range.Extensions import all -from project.Widget_Helpers import make_date_format_selector, make_date_time_format_selector, make_delimiter_selector, make_time_format_selector +from project.Widget_Helpers import make_date_format_selector, make_date_time_format_selector, make_delimiter_selector, make_regex_text_widget, make_time_format_selector polyglot java import com.ibm.icu.lang.UCharacter polyglot java import com.ibm.icu.text.BreakIterator @@ -477,8 +477,9 @@ Text.tokenize self pattern="." case_sensitivity=Case_Sensitivity.Sensitive = Regexp replace. 'content'.replace '(.*?)'.to_regex '$2 is at $1'== 'content is at url' -Text.replace : Text | Regex -> Text-> Case_Sensitivity -> Boolean -> Text ! Illegal_Argument -Text.replace self term replacement case_sensitivity=Case_Sensitivity.Default only_first=False = +@term make_regex_text_widget +Text.replace : Text | Regex -> Text -> Case_Sensitivity -> Boolean -> Text ! Illegal_Argument +Text.replace self term:(Text | Regex) replacement:Text (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default) only_first:Boolean=False = case term of _ : Text -> if term.is_empty then self else array_from_single_result result = case result of @@ -496,11 +497,7 @@ Text.replace self term replacement case_sensitivity=Case_Sensitivity.Default onl Text_Utils.span_of_case_insensitive self term locale.java_locale False Text_Utils.replace_spans self spans_array replacement _ : Regex -> - updated_regex = case case_sensitivity of - Case_Sensitivity.Default -> term - _ -> - case_insensitive = case_sensitivity.is_case_insensitive_in_memory - term.recompile case_insensitive + updated_regex = term.recompile case_sensitivity updated_regex.replace self replacement only_first ## ALIAS Get Words diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex.enso index 96573321aa7..51cbe252a0c 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Text/Regex.enso @@ -3,6 +3,7 @@ import project.Data.Filter_Condition.Filter_Condition import project.Data.Map.Map import project.Data.Numbers.Integer import project.Data.Range.Range +import project.Data.Text.Case_Sensitivity.Case_Sensitivity import project.Data.Text.Helpers import project.Data.Text.Prim_Text_Helper import project.Data.Text.Regex.Internal.Match_Iterator.Match_Iterator @@ -370,11 +371,14 @@ type Regex Recompile the underlying regex string; used to change the case-sensitivity of a compiled Regex. - recompile : Boolean | Nothing -> Regex ! Regex_Syntax_Error | Illegal_Argument - recompile self case_insensitive=False = - should_recompile = self.case_insensitive != case_insensitive - if should_recompile.not then self else - Regex.compile self.internal_regex_object.pattern case_insensitive + recompile : Case_Sensitivity -> Regex ! Regex_Syntax_Error | Illegal_Argument + recompile self case_sensitivity:Case_Sensitivity = case case_sensitivity of + Case_Sensitivity.Default -> self + _ -> + case_insensitive = case_sensitivity.is_case_insensitive_in_memory + should_recompile = self.case_insensitive != case_insensitive + if should_recompile.not then self else + Regex.compile self.internal_regex_object.pattern case_insensitive ## PRIVATE Convert the polyglot map to a Map. diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Period.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Period.enso index 91b9eafddb9..27416f46dba 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Period.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Date_Period.enso @@ -10,9 +10,9 @@ polyglot java import java.time.temporal.ChronoUnit polyglot java import java.time.temporal.TemporalAdjuster polyglot java import java.time.temporal.TemporalAdjusters polyglot java import java.time.temporal.TemporalUnit -polyglot java import org.enso.base.Time_Utils -polyglot java import org.enso.base.time.Date_Period_Utils polyglot java import org.enso.base.time.CustomTemporalUnits +polyglot java import org.enso.base.time.Date_Period_Utils +polyglot java import org.enso.base.Time_Utils ## Represents a unit of time longer on the scale of days (longer than a day). type Date_Period diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Time_Period.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Time_Period.enso index 86ffe565393..3f62163339b 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Time_Period.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Time_Period.enso @@ -5,8 +5,8 @@ from project.Data.Boolean import Boolean, False, True polyglot java import java.time.temporal.ChronoUnit polyglot java import java.time.temporal.TemporalUnit -polyglot java import org.enso.base.Time_Utils polyglot java import org.enso.base.time.CustomTemporalUnits +polyglot java import org.enso.base.Time_Utils ## Represents a unit of time of a day or shorter. type Time_Period diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Widget_Helpers.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Widget_Helpers.enso index eb0c911f482..8c4cfb28487 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Widget_Helpers.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Widget_Helpers.enso @@ -5,6 +5,12 @@ import project.Data.Time.Time_Of_Day.Time_Of_Day import project.Metadata.Widget from project.Metadata import make_single_choice +## PRIVATE + Creates a Regex / Text Widget for search and replace. +make_regex_text_widget : Widget +make_regex_text_widget = + make_single_choice [["Text", '""'], ["Regular Expression", '(Regex.compile "^$")']] + ## PRIVATE Creates a Single_Choice Widget for delimiters. make_delimiter_selector : Widget diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index 89b2cdb659d..77be994d5de 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -1,8 +1,8 @@ from Standard.Base import all - import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Base.Errors.Illegal_State.Illegal_State import Standard.Base.Internal.Rounding_Helpers +from Standard.Base.Widget_Helpers import make_regex_text_widget import Standard.Table.Data.Column.Column as Materialized_Column import Standard.Table.Data.Type.Enso_Types @@ -1124,7 +1124,6 @@ type Column - case_sensitivity: Specifies if the text values should be compared case sensitively. - only_first: If True, only replace the first match. - - use_regex: If true, the term is used as a regular expression. > Example Replace dashes with underscores. @@ -1140,7 +1139,8 @@ type Column Replace texts in quotes with parentheses. column.replace '"(.*?)"'.to_regex '($1)' - replace : Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column + @term make_regex_text_widget + replace : Text | Regex | Column -> Text | Column -> Case_Sensitivity -> Boolean -> Column replace self term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False = _ = [term, new_text, case_sensitivity, only_first] msg = "`Column.replace` is not yet implemented." diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso index c6f292c8474..b2683b79f34 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso @@ -3,7 +3,7 @@ import Standard.Base.Errors.Unimplemented.Unimplemented import Standard.Table.Internal.Naming_Helpers.Naming_Helpers import Standard.Table.Internal.Problem_Builder.Problem_Builder -from Standard.Table import Aggregate_Column, Column_Selector, Join_Kind, Value_Type +from Standard.Table import Aggregate_Column, Join_Kind, Value_Type import project.Connection.Connection.Connection import project.Data.SQL.Builder @@ -255,6 +255,6 @@ default_fetch_primary_key connection table_name = rs = metadata.getPrimaryKeys Nothing Nothing table_name keys_table = result_set_to_table rs connection.dialect.make_column_fetcher_for_type # The names of the columns are sometimes lowercase and sometimes uppercase, so we do a case insensitive select first. - selected = keys_table.select_columns [Column_Selector.By_Name "COLUMN_NAME", Column_Selector.By_Name "KEY_SEQ"] reorder=True + selected = keys_table.select_columns ["COLUMN_NAME", "KEY_SEQ"] case_sensitivity=Case_Sensitivity.Insensitive reorder=True key_column_names = selected.order_by 1 . at 0 . to_vector if key_column_names.is_empty then Nothing else key_column_names diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 13367718159..488a4333bca 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -28,7 +28,7 @@ import Standard.Table.Internal.Table_Helpers import Standard.Table.Internal.Table_Helpers.Table_Column_Helper import Standard.Table.Internal.Unique_Name_Strategy.Unique_Name_Strategy import Standard.Table.Internal.Widget_Helpers -from Standard.Table import Aggregate_Column, Auto, Column_Selector, Data_Formatter, Match_Columns, Position, Set_Mode, Sort_Column, Value_Type +from Standard.Table import Aggregate_Column, Auto, Data_Formatter, Match_Columns, Position, Set_Mode, Sort_Column, Value_Type from Standard.Table.Data.Column import get_item_string, normalize_string_for_display from Standard.Table.Data.Table import print_table from Standard.Table.Errors import all @@ -147,8 +147,10 @@ type Table dropped from the output. Arguments: - - columns: Column selection criteria - a single instance or Vector of - names, indexes or `Column_Selector`. + - columns: Specifies columns by a single instance or Vector of names, + indexes or regular expressions to match names. + - case_sensitivity: Controls whether to be case sensitive when matching + column names. - reorder: By default, or if set to `False`, columns in the output will be in the same order as in the input table. If `True`, the order in the output table will match the order in the columns list. If a column is @@ -182,16 +184,16 @@ type Table > Example Select columns matching a regular expression. - table.select_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True] + table.select_columns "foo.+".to_regex case_sensitivity=Case_Sensitivity.Insensitive > Example Select the first two columns and the last column, moving the last one to front. table.select_columns [-1, 0, 1] reorder=True @columns Widget_Helpers.make_column_name_vector_selector - select_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Boolean -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns - select_columns self (columns = [self.columns.first.name]) (reorder = False) (error_on_missing_columns = True) (on_problems = Report_Warning) = - new_columns = self.columns_helper.select_columns selectors=columns reorder=reorder error_on_missing_columns=error_on_missing_columns on_problems=on_problems + select_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns + select_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (case_sensitivity=Case_Sensitivity.Default) (reorder:Boolean=False) (error_on_missing_columns:Boolean=True) (on_problems:Problem_Behavior=Report_Warning) = + new_columns = self.columns_helper.select_columns columns case_sensitivity reorder error_on_missing_columns on_problems self.updated_columns new_columns ## ALIAS drop_columns @@ -201,8 +203,10 @@ type Table input. Arguments: - - columns: Column selection criteria - a single instance or Vector of - names, indexes or `Column_Selector`, which are to be removed. + - columns: Specifies columns by a name, index or regular expression to + match names, or a Vector of these. + - case_sensitivity: Controls whether to be case sensitive when matching + column names. - error_on_missing_columns: Specifies if a missing input column should result in an error regardless of the `on_problems` settings. Defaults to `False`. @@ -232,27 +236,80 @@ type Table > Example Remove columns matching a regular expression. - table.remove_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True] + table.remove_columns "foo.+".to_regex Case_Sensitivity.Insensitive > Example Remove the first two columns and the last column. table.remove_columns [-1, 0, 1] @columns Widget_Helpers.make_column_name_vector_selector - remove_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns - remove_columns self (columns = [self.columns.first.name]) (error_on_missing_columns = False) (on_problems = Report_Warning) = - new_columns = self.columns_helper.remove_columns selectors=columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems + remove_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns + remove_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (case_sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=False) (on_problems:Problem_Behavior=Report_Warning) = + new_columns = self.columns_helper.remove_columns columns case_sensitivity error_on_missing_columns=error_on_missing_columns on_problems=on_problems self.updated_columns new_columns + ## ALIAS select_na + ALIAS select_missing_columns + + Select columns which are either all blank or contain blank values. If no + rows are present, all columns are considered blank. + + Arguments: + - when_any: By default, only columns consisting of all blank cells are + selected. If set to `True`, columns with one or more blank values are + selected. + - treat_nans_as_blank: specified whether `Number.nan` is considered as + blank. By default, it is not. + + ? Blank values + Blank values are `Nothing`, `""` and depending on setting `Number.nan`. + + > Example + Select completely blank columns from a table. + + table.select_blank_columns + select_blank_columns : Boolean -> Boolean -> Table + select_blank_columns self (when_any : Boolean = False) (treat_nans_as_blank : Boolean = False) = + new_columns = self.columns_helper.select_blank_columns_helper when_any treat_nans_as_blank + if new_columns.length == 0 then Error.throw (No_Output_Columns) else + self.updated_columns new_columns + + ## ALIAS drop_na + ALIAS drop_missing_columns + + Remove columns which are either all blank or contain blank values. If no + rows are present, all columns are considered blank. + + Arguments: + - when_any: By default, only columns consisting of all blank cells are + selected. If set to `True`, columns with one or more blank values are + selected. + - treat_nans_as_blank: specified whether `Number.nan` is considered as + blank. By default, it is not. + + ? Blank values + Blank values are `Nothing`, `""` and depending on setting `Number.nan`. + + > Example + Remove completely blank columns from a table. + + table.remove_blank_columns + remove_blank_columns : Boolean -> Boolean -> Table + remove_blank_columns self (when_any : Boolean = False) (treat_nans_as_blank : Boolean = False) = + new_columns = self.columns_helper.select_blank_columns_helper when_any treat_nans_as_blank invert_selection=True + if new_columns.length == 0 then Error.throw (No_Output_Columns) else + self.updated_columns new_columns + ## Returns a new table with the specified selection of columns moved to either the start or the end in the specified order. Arguments: - - columns: Column selection criteria - a single instance or Vector of - names, indexes or `Column_Selector`, which should be reordered and - specifying their order. + - columns: Specifies columns by a name, index or regular expression to + match names, or a Vector of these. - position: Specifies how to place the selected columns in relation to the remaining columns which were not matched by `columns` (if any). + - case_sensitivity: Controls whether to be case sensitive when matching + column names. - error_on_missing_columns: Specifies if a missing input column should result in an error regardless of the `on_problems` settings. Defaults to `False`. @@ -279,7 +336,7 @@ type Table > Example Move columns matching a regular expression to front, keeping columns matching "foo.+" before columns matching "b.*". - table.reorder_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True] + table.reorder_columns "foo.+".to_regex case_sensitivity=Case_Sensitivity.Insensitive > Example Swap the first two columns. @@ -291,9 +348,9 @@ type Table table.reorder_columns [0] position=Position.After_Other_Columns @columns Widget_Helpers.make_column_name_vector_selector - reorder_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Position -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns - reorder_columns self (columns = [self.columns.first.name]) (position = Position.Before_Other_Columns) (error_on_missing_columns = False) (on_problems = Report_Warning) = - new_columns = self.columns_helper.reorder_columns selectors=columns position=position error_on_missing_columns on_problems=on_problems + reorder_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Position -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns + reorder_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (position:Position=Position.Before_Other_Columns) (case_sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=False) (on_problems:Problem_Behavior=Report_Warning) = + new_columns = self.columns_helper.reorder_columns columns position case_sensitivity error_on_missing_columns on_problems self.updated_columns new_columns ## Returns a new table with the columns sorted by name according to the @@ -328,7 +385,10 @@ type Table Arguments: - column_map: Mapping from old column names to new or a vector of new - column names to apply by position. + column names to apply by position. `Regex` objects can be used + within the mapping to do pattern based renaming. + - case_sensitivity: Controls whether to be case sensitive when matching + column names. - error_on_missing_columns: Specifies if a missing input column should result in an error regardless of the `on_problems` settings. Defaults to `True`. @@ -379,12 +439,11 @@ type Table > Example For all columns starting with the prefix `name=`, replace it with `key:`. - by_name = Column_Selector.By_Name "name=(.*)" Case_Sensitivity.Sensitive use_regex=True - table.rename_columns (Map.from_vector [[by_name, "key:$1"]]) + table.rename_columns (Map.from_vector [["name=(.*)".to_regex, "key:$1"]]) @column_map Widget_Helpers.make_rename_name_vector_selector - rename_columns : Map (Text | Integer | Column_Selector) Text | Vector Text | Vector Vector -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Ambiguous_Column_Rename | Too_Many_Column_Names_Provided | Invalid_Output_Column_Names | Duplicate_Output_Column_Names - rename_columns self column_map=["Column"] (error_on_missing_columns=True) (on_problems=Report_Warning) = - new_names = Table_Helpers.rename_columns internal_columns=self.internal_columns mapping=column_map error_on_missing_columns=error_on_missing_columns on_problems=on_problems + rename_columns : Map (Text | Integer | Regex) Text | Vector Text | Vector Vector -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Ambiguous_Column_Rename | Too_Many_Column_Names_Provided | Invalid_Output_Column_Names | Duplicate_Output_Column_Names + rename_columns self (column_map:(Map | Vector)=["Column"]) (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=True) (on_problems:Problem_Behavior=Report_Warning) = + new_names = Table_Helpers.rename_columns self.internal_columns column_map case_sensitivity error_on_missing_columns on_problems Warning.with_suspended new_names names-> self.updated_columns (self.internal_columns.map c-> c.rename (names.at c.name)) @@ -566,10 +625,10 @@ type Table problem is reported. @group_by Widget_Helpers.make_column_name_vector_selector @order_by Widget_Helpers.make_order_by_selector - add_row_number : Text -> Integer -> Integer -> Vector (Text | Integer | Column_Selector) | Text | Integer -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table - add_row_number self (name:Text = "Row") (from:Integer = 1) (step:Integer = 1) group_by=[] order_by=[] on_problems=Problem_Behavior.Report_Warning = + add_row_number : Text -> Integer -> Integer -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table + add_row_number self (name:Text="Row") (from:Integer=1) (step:Integer=1) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=Problem_Behavior.Report_Warning) = problem_builder = Problem_Builder.new error_on_missing_columns=True - grouping_columns = self.columns_helper.select_columns_helper group_by True problem_builder + grouping_columns = self.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder grouping_columns.each internal_column-> column = self.make_column internal_column if column.value_type.is_floating_point then @@ -944,9 +1003,9 @@ type Table `Floating_Point_Equality` is reported according to the `on_problems` setting. @columns Widget_Helpers.make_column_name_vector_selector - distinct : Vector (Integer | Text | Column_Selector) | Text | Integer -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality + distinct : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality distinct self columns=self.column_names case_sensitivity=Case_Sensitivity.Default error_on_missing_columns=True on_problems=Report_Warning = - key_columns = self.columns_helper.select_columns selectors=columns reorder=True error_on_missing_columns=error_on_missing_columns on_problems=on_problems . catch No_Output_Columns _-> + key_columns = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=True error_on_missing_columns=error_on_missing_columns on_problems=on_problems . catch No_Output_Columns _-> Error.throw No_Input_Columns_Selected problem_builder = Problem_Builder.new new_table = self.connection.dialect.prepare_distinct self key_columns case_sensitivity problem_builder @@ -1494,7 +1553,7 @@ type Table B | Name | Another B | Country | Germany @key_columns Widget_Helpers.make_column_name_vector_selector - transpose : Vector (Integer | Text | Column_Selector) | Text | Integer -> Text -> Text -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | Duplicate_Output_Column_Names + transpose : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Text -> Text -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | Duplicate_Output_Column_Names transpose self key_columns=[] (attribute_column_name="Name") (value_column_name="Value") (error_on_missing_columns=True) (on_problems = Report_Warning) = ## Avoid unused arguments warning. We cannot rename arguments to `_`, because we need to keep the API consistent with the in-memory table. @@ -1552,7 +1611,7 @@ type Table @group_by Widget_Helpers.make_column_name_vector_selector @name_column Widget_Helpers.make_column_name_selector @values (Widget_Helpers.make_aggregate_column_selector include_group_by=False) - cross_tab : Vector (Integer | Text | Column_Selector | Aggregate_Column) | Text | Integer -> (Text | Integer) -> Aggregate_Column | Vector Aggregate_Column -> Problem_Behavior -> Table ! Missing_Input_Columns | Invalid_Aggregate_Column | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings + cross_tab : Vector (Integer | Text | Regex | Aggregate_Column) | Text | Integer | Regex -> (Text | Integer) -> Aggregate_Column | Vector Aggregate_Column -> Problem_Behavior -> Table ! Missing_Input_Columns | Invalid_Aggregate_Column | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings cross_tab self group_by name_column values=Aggregate_Column.Count (on_problems=Report_Warning) = ## Avoid unused arguments warning. We cannot rename arguments to `_`, because we need to keep the API consistent with the in-memory table. @@ -1610,9 +1669,9 @@ type Table table.parse "birthday" Value_Type.Date @type (Widget_Helpers.parse_type_selector include_auto=False) @columns Widget_Helpers.make_column_name_vector_selector - parse : Vector (Text | Integer | Column_Selector) | Text | Integer -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table + parse : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type format=Nothing error_on_missing_columns=True on_problems=Report_Warning = - selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems + selected = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems error_on_empty=False . map self.make_column selected.fold self table-> column_to_parse-> new_column = column_to_parse.parse type format on_problems table.set new_column new_name=column_to_parse.name set_mode=Set_Mode.Update @@ -1779,9 +1838,9 @@ type Table types. Due to this, a Mixed column containing values `[2, "3"]` will actually be converted into `[2, Nothing]` when casting to Integer type. @columns Widget_Helpers.make_column_name_vector_selector - cast : Vector (Text | Integer | Column_Selector) | Text | Integer -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure + cast : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure cast self columns=[0] value_type error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning = - selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems + selected = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems error_on_empty=False . map self.make_column selected.fold self table-> column_to_cast-> new_column = column_to_cast.cast value_type on_problems table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update @@ -2022,7 +2081,8 @@ type Table been replaced with the provided default(s). Arguments: - - columns: The column(s) to fill missing values of. + - columns: Specifies columns by a name, index or regular expression to + match names, or a Vector of these. - default: The value to replace missing values with. If this argument is a column, the value from `default` at the corresponding position will be used. @@ -2032,7 +2092,7 @@ type Table fill_nothing = table.fill_nothing ["col0", "col1"] 20.5 @columns Widget_Helpers.make_column_name_vector_selector - fill_nothing : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table + fill_nothing : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Any -> Table fill_nothing self columns default = transformer col = col.fill_nothing default Table_Helpers.replace_columns_with_transformed_columns self columns transformer @@ -2040,10 +2100,11 @@ type Table ## ALIAS Fill Empty, if_empty Returns a new column where empty Text values have been replaced with the - provided default(s). + provided default. Arguments: - - columns: The column(s) to fill empty values. + - columns: Specifies columns by a name, index or regular expression to + match names, or a Vector of these. - default: The value to replace empty values with. If this argument is a column, the value from `default` at the corresponding position will be used. @@ -2053,7 +2114,7 @@ type Table fill_empty = table.fill_empty ["col0", "col1"] "hello" @columns Widget_Helpers.make_column_name_vector_selector - fill_empty : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table + fill_empty : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Any -> Table fill_empty self columns default = transformer col = col.fill_empty default Table_Helpers.replace_columns_with_transformed_columns self columns transformer @@ -2062,11 +2123,11 @@ type Table row of the specified column. If `term` is empty, the function returns the table unchanged. - This method follows the exact replacement semantics of the - `Text.replace` method. + This method follows the exact replacement semantics of `Text.replace`. Arguments: - - columns: The column(s) to replace values on. + - columns: Specifies columns by a name, index or regular expression to + match names, or a Vector of these. - term: The term to find. Can be `Text`, `Regex`, or a `Column` of strings. - replacement: The text to replace matches with. @@ -2077,19 +2138,19 @@ type Table > Example Replace dashes with underscores. - table.replace "-" "_" + table.replace ["col0", "col1"] "-" "_" > Example Remove leading and trailing spaces from cells. - column.replace "^\s*(.*?)\s*$".to_regex "$1" + table.replace ["col.*".to_regex] "^\s*(.*?)\s*$".to_regex "$1" > Example Replace texts in quotes with parentheses. - column.replace '"(.*?)"'.to_regex '($1)' + column.replace ["col0"] '"(.*?)"'.to_regex '($1)' @columns Widget_Helpers.make_column_name_vector_selector - replace : Vector (Integer | Text | Column_Selector) | Text | Integer -> Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column + replace : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column replace self columns term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False = _ = [columns, term, new_text, case_sensitivity, only_first] Error.throw (Unsupported_Database_Operation.Error "Text replace is currently not supported in the database backend.") @@ -2101,7 +2162,8 @@ type Table Arguments: - connection: The connection to a database. - table_name: The name of the table to get. - - columns: List of columns to fetch. Each column is represented by a pair of column name and its expected SQL Type. + - columns: List of columns to fetch. Each column is represented by a pair of + column name and its expected SQL Type. - ctx: The context to use for the table. make_table : Connection -> Text -> Vector -> Context -> Table make_table connection table_name columns ctx = diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_Database_Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_Database_Table.enso index 9d1dbe0f64b..60c4b63aac2 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_Database_Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_Database_Table.enso @@ -2,7 +2,6 @@ from Standard.Base import all import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Table.Internal.Widget_Helpers -from Standard.Table import Column_Selector from Standard.Table.Errors import all import project.Connection.Connection.Connection diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_In_Memory_Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_In_Memory_Table.enso index 8ec49503e63..55a9b1529ae 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_In_Memory_Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Extensions/Upload_In_Memory_Table.enso @@ -3,7 +3,6 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Table.Data.Table.Table import Standard.Table.Internal.Widget_Helpers -from Standard.Table import Column_Selector from Standard.Table.Errors import all import project.Connection.Connection.Connection diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso index bc42f1f3d9c..03e297414f7 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso @@ -268,8 +268,8 @@ make_is_in_column arguments = case arguments.length of _ -> Error.throw <| Illegal_State.Error ("The operation IS_IN_COLUMN requires at exactly 3 arguments: the expression, the IN subquery, the subquery checking for nulls.") ## PRIVATE -make_row_number : Vector Builder -> Builder -make_row_number arguments (metadata : Row_Number_Metadata) = if arguments.length < 3 then Error.throw (Illegal_State.Error "Wrong amount of parameters in ROW_NUMBER IR. This is a bug in the Database library.") else +make_row_number : Vector Builder -> Row_Number_Metadata -> Builder +make_row_number (arguments : Vector) (metadata : Row_Number_Metadata) = if arguments.length < 3 then Error.throw (Illegal_State.Error "Wrong amount of parameters in ROW_NUMBER IR. This is a bug in the Database library.") else offset = arguments.at 0 step = arguments.at 1 diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso index 49d9a950a6e..46df2313f92 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload_Table.enso @@ -6,7 +6,7 @@ import Standard.Base.Errors.Illegal_State.Illegal_State import Standard.Base.Runtime.Context import Standard.Table.Data.Table.Table as In_Memory_Table -from Standard.Table import Aggregate_Column, Column_Selector, Join_Kind, Value_Type +from Standard.Table import Aggregate_Column, Join_Kind, Value_Type from Standard.Table.Errors import all import project.Connection.Connection.Connection diff --git a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Preparation.enso b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Preparation.enso index 6317a650daf..86704f2b338 100644 --- a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Preparation.enso +++ b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Preparation.enso @@ -29,7 +29,7 @@ import Standard.Examples example_drop_missing_cols = - Examples.inventory_table.remove (Column_Selector.Blank_Columns when_any=True) + Examples.inventory_table.remove_blank_columns when_any=True > Example Fill missing values in a column with the value 20.5. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso index 83cf98b2263..6e2feb251f5 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso @@ -1,7 +1,6 @@ from Standard.Base import all import project.Data.Column.Column -import project.Data.Column_Selector.Column_Selector import project.Data.Sort_Column.Sort_Column ## Defines an Aggregate Column @@ -32,7 +31,7 @@ type Aggregate_Column multiple selection. - new_name: name of new column. - ignore_nothing: if all values are Nothing won't be included. - Count_Distinct (columns:(Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector | Column))=0) (new_name:Text="") (ignore_nothing:Boolean=False) # Column needed because of 6866 + Count_Distinct (columns:(Text | Integer | Regex | Vector (Integer | Text | Regex | Column))=0) (new_name:Text="") (ignore_nothing:Boolean=False) # Column needed because of 6866 ## ALIAS Count_Not_Null diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index 4af6bfb2600..5253c1ad204 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -5,6 +5,7 @@ import Standard.Base.Errors.Common.Index_Out_Of_Bounds import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Base.Errors.Illegal_State.Illegal_State import Standard.Base.Internal.Rounding_Helpers +from Standard.Base.Widget_Helpers import make_regex_text_widget import project.Data.Data_Formatter.Data_Formatter import project.Data.Table.Table @@ -1212,7 +1213,8 @@ type Column Replace texts in quotes with parentheses. column.replace '"(.*?)"'.to_regex '($1)' - replace : Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column + @term make_regex_text_widget + replace : Text | Regex | Column -> Text | Column -> Case_Sensitivity -> Boolean -> Column replace self term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False = Value_Type.expect_text self <| term_fn = wrap_text_or_regex_argument_as_value_provider term diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column_Selector.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column_Selector.enso deleted file mode 100644 index d92fc6e82dd..00000000000 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column_Selector.enso +++ /dev/null @@ -1,32 +0,0 @@ -from Standard.Base import all - -## Specifies a selection of columns from the table on which an operation is - going to be performed. -type Column_Selector - ## Selects columns based on their names. - - It can do regex-based and case insensitive matching if requested. - It is possible for it to match multiple columns, in which case all the - matched ones will be included in the same relative order as in the table. - By_Name name:Text case_sensitivity:Case_Sensitivity=Case_Sensitivity.Insensitive use_regex:Boolean=False - - ## ALIAS dropna - ALIAS drop_missing_columns - Select columns which are either all blank or contain blank values. - - Arguments: - - when_any: By default, only columns consisting of all blank cells are - selected. If set to `True`, columns containing at least one blank value - will be selected too. If there are no rows, the column is treated as - blank regardless of this argument. - - treat_nans_as_blank: If `True`, then `Number.nan` is considered as - blank. - - ? Blank values - Blank values are `Nothing`, `""` and depending on setting `Number.nan`. - - > Example - Remove completely blank columns from a table. - - table.remove_columns Column_Selector.Blank_Columns - Blank_Columns when_any:Boolean=False treat_nans_as_blank:Boolean=False diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index d71fe01349e..a5c311d8b0f 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -15,7 +15,6 @@ from Standard.Base.Widget_Helpers import make_delimiter_selector import project.Data.Aggregate_Column.Aggregate_Column import project.Data.Column as Column_Module import project.Data.Column.Column -import project.Data.Column_Selector.Column_Selector import project.Data.Data_Formatter.Data_Formatter import project.Data.Expression.Expression import project.Data.Expression.Expression_Error @@ -267,8 +266,10 @@ type Table dropped from the output. Arguments: - - columns: Column selection criteria - a single instance or Vector of - names, indexes or `Column_Selector`. + - columns: Specifies columns by a name, index or regular expression to + match names, or a Vector of these. + - case_sensitivity: Controls whether to be case sensitive when matching + column names. - reorder: By default, or if set to `False`, columns in the output will be in the same order as in the input table. If `True`, the order in the output table will match the order in the columns list. If a column is @@ -303,27 +304,29 @@ type Table > Example Select columns matching a regular expression. - table.select_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True] + table.select_columns "foo.+".to_regex case_sensitivity=Case_Sensitivity.Insensitive > Example Select the first two columns and the last column, moving the last one to front. table.select_columns [-1, 0, 1] reorder=True @columns Widget_Helpers.make_column_name_vector_selector - select_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Boolean -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns - select_columns self columns=[self.columns.first.name] (reorder = False) (error_on_missing_columns = True) (on_problems = Report_Warning) = - new_columns = self.columns_helper.select_columns selectors=columns reorder=reorder error_on_missing_columns=error_on_missing_columns on_problems=on_problems + select_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns + select_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (case_sensitivity=Case_Sensitivity.Default) (reorder:Boolean=False) (error_on_missing_columns:Boolean=True) (on_problems:Problem_Behavior=Report_Warning) = + new_columns = self.columns_helper.select_columns columns case_sensitivity reorder error_on_missing_columns on_problems Table.new new_columns ## ALIAS drop_columns - Returns a new table with the chosen set of columns, as specified by thez + Returns a new table with the chosen set of columns, as specified by the `columns`, removed from the input table. Any unmatched input columns will be kept in the output. Columns are returned in the same order as in the input. Arguments: - - columns: Column selection criteria - a single instance or Vector of - names, indexes or `Column_Selector`, which are to be removed. + - columns: Specifies columns by a name, index or regular expression to + match names, or a Vector of these. + - case_sensitivity: Controls whether to be case sensitive when matching + column names. - error_on_missing_columns: Specifies if a missing input column should result in an error regardless of the `on_problems` settings. Defaults to `False`. @@ -353,27 +356,80 @@ type Table > Example Remove columns matching a regular expression. - table.remove_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True] + table.remove_columns "foo.+".to_regex case_sensitivity=Case_Sensitivity.Insensitive > Example Remove the first two columns and the last column. table.remove_columns [-1, 0, 1] @columns Widget_Helpers.make_column_name_vector_selector - remove_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns - remove_columns self (columns=[self.columns.first.name]) (error_on_missing_columns = False) (on_problems = Report_Warning) = - new_columns = self.columns_helper.remove_columns selectors=columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems + remove_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns + remove_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (case_sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=False) (on_problems:Problem_Behavior=Report_Warning) = + new_columns = self.columns_helper.remove_columns columns case_sensitivity error_on_missing_columns=error_on_missing_columns on_problems=on_problems Table.new new_columns + ## ALIAS select_na + ALIAS select_missing_columns + + Select columns which are either all blank or contain blank values. If no + rows are present, all columns are considered blank. + + Arguments: + - when_any: By default, only columns consisting of all blank cells are + selected. If set to `True`, columns with one or more blank values are + selected. + - treat_nans_as_blank: specifies whether `Number.nan` is considered as + blank. By default, it is not. + + ? Blank values + Blank values are `Nothing`, `""` and depending on setting `Number.nan`. + + > Example + Select completely blank columns from a table. + + table.select_blank_columns + select_blank_columns : Boolean -> Boolean -> Table ! No_Output_Columns + select_blank_columns self (when_any : Boolean = False) (treat_nans_as_blank : Boolean = False) = + new_columns = self.columns_helper.select_blank_columns_helper when_any treat_nans_as_blank + if new_columns.length == 0 then Error.throw (No_Output_Columns) else + Table.new new_columns + + ## ALIAS drop_na + ALIAS drop_missing_columns + + Remove columns which are either all blank or contain blank values. If no + rows are present, all columns are considered blank. + + Arguments: + - when_any: By default, only columns consisting of all blank cells are + selected. If set to `True`, columns with one or more blank values are + selected. + - treat_nans_as_blank: specified whether `Number.nan` is considered as + blank. By default, it is not. + + ? Blank values + Blank values are `Nothing`, `""` and depending on setting `Number.nan`. + + > Example + Remove completely blank columns from a table. + + table.remove_blank_columns + remove_blank_columns : Boolean -> Boolean -> Table ! No_Output_Columns + remove_blank_columns self (when_any : Boolean = False) (treat_nans_as_blank : Boolean = False) = + new_columns = self.columns_helper.select_blank_columns_helper when_any treat_nans_as_blank invert_selection=True + if new_columns.length == 0 then Error.throw (No_Output_Columns) else + Table.new new_columns + ## Returns a new table with the specified selection of columns moved to either the start or the end in the specified order. Arguments: - - columns: Column selection criteria - a single instance or Vector of - names, indexes or `Column_Selector`, which should be reordered and - specifying their order. + - columns: Specifies columns by a name, index or regular expression to + match names, or a Vector of these. - position: Specifies how to place the selected columns in relation to the remaining columns which were not matched by `columns` (if any). + - case_sensitivity: Controls whether to be case sensitive when matching + column names. - error_on_missing_columns: Specifies if a missing input column should result in an error regardless of the `on_problems` settings. Defaults to `False`. @@ -400,7 +456,7 @@ type Table > Example Move columns matching a regular expression to front, keeping columns matching "foo.+" before columns matching "b.*". - table.reorder_columns [Column_Selector.By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True] + table.reorder_columns "foo.+".to_regex case_sensitivity=Case_Sensitivity.Insensitive > Example Swap the first two columns. @@ -412,9 +468,9 @@ type Table table.reorder_columns [0] position=Position.After_Other_Columns @columns Widget_Helpers.make_column_name_vector_selector - reorder_columns : Vector (Integer | Text | Column_Selector) | Text | Integer -> Position -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns - reorder_columns self (columns = [self.columns.first.name]) (position = Position.Before_Other_Columns) (error_on_missing_columns = False) (on_problems = Report_Warning) = - new_columns = self.columns_helper.reorder_columns selectors=columns position=position error_on_missing_columns=error_on_missing_columns on_problems=on_problems + reorder_columns : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Position -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns + reorder_columns self (columns : (Vector | Text | Integer | Regex) = [self.columns.first.name]) (position:Position=Position.Before_Other_Columns) (case_sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=False) (on_problems:Problem_Behavior=Report_Warning) = + new_columns = self.columns_helper.reorder_columns columns position case_sensitivity error_on_missing_columns on_problems Table.new new_columns ## Returns a new table with the columns sorted by name according to the @@ -441,7 +497,7 @@ type Table table.reorder_columns Sort_Direction.Descending sort_columns : Sort_Direction -> Text_Ordering -> Table sort_columns self order=Sort_Direction.Ascending text_ordering=Text_Ordering.Default = - new_columns = Table_Helpers.sort_columns internal_columns=self.columns order text_ordering + new_columns = Table_Helpers.sort_columns self.columns order text_ordering Table.new new_columns ## Returns a new table with the columns renamed based on either a mapping @@ -449,7 +505,10 @@ type Table Arguments: - column_map: Mapping from old column names to new or a vector of new - column names to apply by position. + column names to apply by position. `Regex` objects can be used + within the mapping to do pattern based renaming. + - case_sensitivity: Controls whether to be case sensitive when matching + column names. - error_on_missing_columns: Specifies if a missing input column should result in an error regardless of the `on_problems` settings. Defaults to `True`. @@ -500,12 +559,11 @@ type Table > Example For all columns starting with the prefix `name=`, replace it with `key:`. - by_name = Column_Selector.By_Name "name=(.*)" Case_Sensitivity.Sensitive use_regex=True - table.rename_columns (Map.from_vector [[by_name, "key:$1"]]) + table.rename_columns (Map.from_vector [["name=(.*)".to_regex, "key:$1"]]) @column_map Widget_Helpers.make_rename_name_vector_selector - rename_columns : Map (Text | Integer | Column_Selector) Text | Vector Text | Vector Vector -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Ambiguous_Column_Rename | Too_Many_Column_Names_Provided | Invalid_Output_Column_Names | Duplicate_Output_Column_Names - rename_columns self column_map=["Column"] (error_on_missing_columns=True) (on_problems=Report_Warning) = - new_names = Table_Helpers.rename_columns internal_columns=self.columns mapping=column_map error_on_missing_columns=error_on_missing_columns on_problems=on_problems + rename_columns : Map (Text | Integer | Regex) Text | Vector Text | Vector Vector -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Ambiguous_Column_Rename | Too_Many_Column_Names_Provided | Invalid_Output_Column_Names | Duplicate_Output_Column_Names + rename_columns self (column_map:(Map | Vector)=["Column"]) (case_sensitivity:Case_Sensitivity=Case_Sensitivity.Default) (error_on_missing_columns:Boolean=True) (on_problems:Problem_Behavior=Report_Warning) = + new_names = Table_Helpers.rename_columns self.columns column_map case_sensitivity error_on_missing_columns on_problems Warning.with_suspended new_names names-> Table.new (self.columns.map c-> c.rename (names.at c.name)) @@ -673,10 +731,8 @@ type Table problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns types_to_always_throw=[No_Input_Columns_Selected] columns_for_ordering = Table_Helpers.prepare_order_by self.columns columns problem_builder problem_builder.attach_problems_before on_problems <| - java_columns = columns_for_ordering.map c-> - c.column.java_column - directions = columns_for_ordering.map c-> - c.associated_selector.direction.to_sign + java_columns = columns_for_ordering.map c->c.column.java_column + directions = columns_for_ordering.map c->c.associated_selector.direction.to_sign comparator = case text_ordering.sort_digits_as_numbers of True -> @@ -727,9 +783,9 @@ type Table `Floating_Point_Equality` is reported according to the `on_problems` setting. @columns Widget_Helpers.make_column_name_vector_selector - distinct : Vector (Integer | Text | Column_Selector) | Text | Integer -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality + distinct : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | No_Input_Columns_Selected | Floating_Point_Equality distinct self (columns = self.column_names) case_sensitivity=Case_Sensitivity.Default error_on_missing_columns=True on_problems=Report_Warning = - key_columns = self.columns_helper.select_columns selectors=columns reorder=True error_on_missing_columns=error_on_missing_columns on_problems=on_problems . catch No_Output_Columns _-> + key_columns = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=True error_on_missing_columns=error_on_missing_columns on_problems=on_problems . catch No_Output_Columns _-> Error.throw No_Input_Columns_Selected java_columns = key_columns.map .java_column text_folding_strategy = Case_Sensitivity.folding_strategy case_sensitivity @@ -827,7 +883,7 @@ type Table table.parse format=(Data_Formatter.Value.with_number_formatting decimal_point=',') @columns Widget_Helpers.make_column_name_vector_selector @type Widget_Helpers.parse_type_selector - parse : Vector (Text | Integer | Column_Selector) | Text | Integer -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table + parse : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning = formatter = case format of _ : Text -> @@ -839,7 +895,7 @@ type Table parser = formatter.make_value_type_parser type select_problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns - selected_columns = self.columns_helper.select_columns_helper columns reorder=True select_problem_builder + selected_columns = self.columns_helper.select_columns_helper columns Case_Sensitivity.Default True select_problem_builder select_problem_builder.attach_problems_before on_problems <| selected_column_names = case selected_columns.is_empty of True -> @@ -914,9 +970,9 @@ type Table types. Due to this, a Mixed column containing values `[2, "3"]` will actually be converted into `[2, Nothing]` when casting to Integer type. @columns Widget_Helpers.make_column_name_vector_selector - cast : Vector (Text | Integer | Column_Selector) | Text | Integer -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure + cast : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure cast self columns=[0] value_type error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning = - selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems + selected = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems error_on_empty=False selected.fold self table-> column_to_cast-> new_column = column_to_cast.cast value_type on_problems table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update @@ -1201,8 +1257,8 @@ type Table problem is reported. @group_by Widget_Helpers.make_column_name_vector_selector @order_by Widget_Helpers.make_order_by_selector - add_row_number : Text -> Integer -> Integer -> Vector (Text | Integer | Column_Selector) | Text | Integer -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table - add_row_number self (name:Text = "Row") (from:Integer = 1) (step:Integer = 1) group_by=[] order_by=[] on_problems=Problem_Behavior.Report_Warning = + add_row_number : Text -> Integer -> Integer -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table + add_row_number self (name:Text="Row") (from:Integer=1) (step:Integer=1) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=Problem_Behavior.Report_Warning) = Add_Row_Number.add_row_number self name from step group_by order_by on_problems ## ALIAS Add Column, Update Column, New Column @@ -1784,13 +1840,13 @@ type Table B | Name | Another B | Country | Germany @key_columns Widget_Helpers.make_column_name_vector_selector - transpose : Vector (Integer | Text | Column_Selector) | Text | Integer -> Text -> Text -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | Duplicate_Output_Column_Names + transpose : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Text -> Text -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Missing_Input_Columns | Duplicate_Output_Column_Names transpose self (key_columns = []) (attribute_column_name="Name") (value_column_name="Value") (error_on_missing_columns=True) (on_problems = Report_Warning) = columns_helper = self.columns_helper unique = Unique_Name_Strategy.new problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns - id_columns = columns_helper.select_columns_helper key_columns False problem_builder + id_columns = columns_helper.select_columns_helper key_columns Case_Sensitivity.Default False problem_builder selected_names = Map.from_vector (id_columns.map column-> [column.name, True]) @@ -1855,7 +1911,7 @@ type Table @group_by Widget_Helpers.make_column_name_vector_selector @name_column Widget_Helpers.make_column_name_selector @values (Widget_Helpers.make_aggregate_column_selector include_group_by=False) - cross_tab : Vector (Integer | Text | Column_Selector | Aggregate_Column) | Text | Integer -> (Text | Integer) -> Aggregate_Column | Vector Aggregate_Column -> Problem_Behavior -> Table ! Missing_Input_Columns | Invalid_Aggregate_Column | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings + cross_tab : Vector (Integer | Text | Regex | Aggregate_Column) | Text | Integer | Regex -> (Text | Integer) -> Aggregate_Column | Vector Aggregate_Column -> Problem_Behavior -> Table ! Missing_Input_Columns | Invalid_Aggregate_Column | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings cross_tab self group_by name_column values=Aggregate_Column.Count (on_problems=Report_Warning) = columns_helper = self.columns_helper problem_builder = Problem_Builder.new error_on_missing_columns=True @@ -1872,8 +1928,8 @@ type Table ix : Integer -> [ix] name : Text -> [name] _ -> Error.throw (Illegal_Argument.Error "name_column must be a column index or name.") - matched_name = columns_helper.select_columns_helper name_column_selector True problem_builder - grouping = columns_helper.select_columns_helper (normalize_group_by group_by) True problem_builder + matched_name = columns_helper.select_columns_helper name_column_selector Case_Sensitivity.Default True problem_builder + grouping = columns_helper.select_columns_helper (normalize_group_by group_by) Case_Sensitivity.Default True problem_builder ## Validate the values values_vector = case values of @@ -2037,7 +2093,8 @@ type Table been replaced with the provided default(s). Arguments: - - columns: The column(s) to fill Nothing values. + - columns: Specifies columns by a name, index or regular expression to + match names, or a Vector of these. - default: The value to replace missing values with. If this argument is a column, the value from `default` at the corresponding position will be used. @@ -2047,7 +2104,7 @@ type Table fill_nothing = table.fill_nothing ["col0", "col1"] 20.5 @columns Widget_Helpers.make_column_name_vector_selector - fill_nothing : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table + fill_nothing : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Any -> Table fill_nothing self columns default = transformer col = col.fill_nothing default Table_Helpers.replace_columns_with_transformed_columns self columns transformer @@ -2058,7 +2115,8 @@ type Table provided default(s). Arguments: - - columns: The column(s) to fill empty values. + - columns: Specifies columns by a name, index or regular expression to + match names, or a Vector of these. - default: The value to replace empty values with. If this argument is a column, the value from `default` at the corresponding position will be used. @@ -2068,7 +2126,7 @@ type Table fill_empty = table.fill_empty ["col0", "col1"] "hello" @columns Widget_Helpers.make_column_name_vector_selector - fill_empty : Vector (Integer | Text | Column_Selector) | Text | Integer -> Column | Any -> Table + fill_empty : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Any -> Table fill_empty self columns default = transformer col = col.fill_empty default Table_Helpers.replace_columns_with_transformed_columns self columns transformer @@ -2104,7 +2162,7 @@ type Table column.replace '"(.*?)"'.to_regex '($1)' @columns Widget_Helpers.make_column_name_vector_selector - replace : Vector (Integer | Text | Column_Selector) | Text | Integer -> Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column + replace : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Text | Column | Regex -> Text | Column -> Case_Sensitivity -> Boolean -> Column replace self columns term="" new_text="" case_sensitivity=Case_Sensitivity.Sensitive only_first=False = transformer col = col.replace term new_text case_sensitivity only_first Table_Helpers.replace_columns_with_transformed_columns self columns transformer diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Add_Row_Number.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Add_Row_Number.enso index f74ba2079a9..15a37b15cdb 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Add_Row_Number.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Add_Row_Number.enso @@ -1,7 +1,6 @@ from Standard.Base import all import project.Data.Column.Column -import project.Data.Column_Selector.Column_Selector import project.Data.Set_Mode.Set_Mode import project.Data.Sort_Column.Sort_Column import project.Data.Table.Table @@ -18,10 +17,10 @@ polyglot java import org.enso.table.data.column.storage.numeric.LongRangeStorage polyglot java import org.enso.table.data.column.storage.numeric.LongStorage ## PRIVATE -add_row_number : Table -> Text -> Integer -> Integer -> (Column_Selector | Vector Text) -> Vector (Text | Sort_Column) | Text | Sort_Column -> Problem_Behavior -> Table +add_row_number : Table -> Text -> Integer -> Integer -> Text | Integer | Regex | Vector (Integer | Text | Regex) -> Vector (Text | Sort_Column) | Text | Sort_Column -> Problem_Behavior -> Table add_row_number table name from step group_by order_by on_problems = problem_builder = Problem_Builder.new error_on_missing_columns=True - grouping_columns = table.columns_helper.select_columns_helper group_by True problem_builder + grouping_columns = table.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder Unordered_Multi_Value_Key.validate_grouping_columns grouping_columns problem_builder ordering = Table_Helpers.resolve_order_by table.columns order_by problem_builder problem_builder.attach_problems_before on_problems <| diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso index e0b5f879413..76960016ebb 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso @@ -2,7 +2,6 @@ from Standard.Base import all hiding First, Last import project.Data.Aggregate_Column.Aggregate_Column import project.Data.Column.Column -import project.Data.Column_Selector.Column_Selector import project.Data.Sort_Column.Sort_Column import project.Data.Table.Table import project.Data.Type.Value_Type.Value_Type @@ -149,9 +148,9 @@ resolve_aggregate table problem_builder aggregate_column = res = columns_helper.resolve_column_or_expression c problem_builder res.if_nothing (Error.throw Internal_Missing_Column_Error) - resolve_selector_to_vector : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Vector Column ! Internal_Missing_Column_Error + resolve_selector_to_vector : Text | Integer | Vector (Integer | Text) -> Vector Column ! Internal_Missing_Column_Error resolve_selector_to_vector selector = - resolved = columns_helper.select_columns_helper selector reorder=True problem_builder + resolved = columns_helper.select_columns_helper selector Case_Sensitivity.Default True problem_builder if resolved.is_empty then Error.throw Internal_Missing_Column_Error else resolved resolve_order_by selector = case selector of diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso index 6eaef3a6378..6d4522e1739 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso @@ -2,8 +2,8 @@ from Standard.Base import all import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Base.Errors.Illegal_State.Illegal_State +import project.Data.Aggregate_Column.Aggregate_Column import project.Data.Column.Column -import project.Data.Column_Selector.Column_Selector import project.Data.Position.Position import project.Data.Set_Mode.Set_Mode import project.Data.Sort_Column.Sort_Column @@ -12,7 +12,6 @@ import project.Data.Type.Value_Type.Value_Type import project.Data.Type.Value_Type_Helpers import project.Internal.Problem_Builder.Problem_Builder import project.Internal.Unique_Name_Strategy.Unique_Name_Strategy -from project.Data.Aggregate_Column.Aggregate_Column import Maximum, Minimum from project.Errors import Ambiguous_Column_Rename, Column_Type_Mismatch, Invalid_Aggregate_Column, Missing_Input_Columns, No_Common_Type, No_Input_Columns_Selected, No_Output_Columns, Too_Many_Column_Names_Provided polyglot java import java.util.HashSet @@ -41,7 +40,9 @@ type Table_Column_Helper Arguments: - selectors: Single instance or a Vector of names, indexes or - `Column_Selector`s. + regular expressions. + - case_sensitivity: Specifies whether the column name matching should be + case sensitive. - reorder: Specifies whether to reorder the matched columns according to the order of the selection criteria. If `False`, the matched entries are returned in the same order as in @@ -57,23 +58,14 @@ type Table_Column_Helper operation. By default, a warning is issued, but the operation proceeds. If set to `Report_Error`, the operation fails with a dataflow error. If set to `Ignore`, the operation proceeds without errors or warnings. - select_columns : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Boolean -> Boolean -> Problem_Behavior -> Vector - select_columns self selectors reorder error_on_missing_columns on_problems = + - error_on_empty: Specifies if the operation should fail if no columns + are selected. + select_columns : Text | Integer | Regex | Vector (Integer | Text | Regex) -> Case_Sensitivity -> Boolean -> Boolean -> Problem_Behavior -> Boolean -> Vector + select_columns self (selectors:(Text | Integer | Regex | Vector)) (case_sensitivity:Case_Sensitivity) (reorder:Boolean) (error_on_missing_columns:Boolean) (on_problems:Problem_Behavior) (error_on_empty:Boolean=True) = problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns - result = self.select_columns_helper selectors reorder problem_builder + result = self.select_columns_helper selectors case_sensitivity reorder problem_builder problem_builder.attach_problems_before on_problems <| - if result.is_empty then Error.throw No_Output_Columns else result - - ## PRIVATE - Works like `select_columns` but will not throw `No_Output_Columns` error - and will return proper columns instead of internal columns. - Useful, when selecting a subset of columns to transform. - resolve_columns : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Boolean -> Problem_Behavior -> Boolean -> Vector - resolve_columns self selectors error_on_missing_columns on_problems reorder=False = - problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns - result = self.select_columns_helper selectors reorder problem_builder - problem_builder.attach_problems_before on_problems <| - result.map self.make_column + if error_on_empty && result.is_empty then Error.throw No_Output_Columns else result ## PRIVATE A helper function encapsulating shared code for `remove_columns` @@ -86,17 +78,19 @@ type Table_Column_Helper Arguments: - selectors: Single instance or a Vector of names, indexes or - `Column_Selector`s. + regular expressions. + - case_sensitivity: Specifies whether the column name matching should be + case sensitive. - error_on_missing_columns: Specifies if missing columns should be raised as error regardless of `on_problems`. - on_problems: Specifies the behavior when a problem occurs during the operation. By default, a warning is issued, but the operation proceeds. If set to `Report_Error`, the operation fails with a dataflow error. If set to `Ignore`, the operation proceeds without errors or warnings. - remove_columns : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Boolean -> Problem_Behavior -> Vector - remove_columns self selectors error_on_missing_columns on_problems = + remove_columns : Text | Integer | Regex | Vector (Integer | Text | Regex) -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Vector + remove_columns self (selectors:(Text | Integer | Regex | Vector)) (case_sensitivity:Case_Sensitivity) (error_on_missing_columns:Boolean) (on_problems:Problem_Behavior) = problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns - selection = self.select_columns_helper selectors reorder=False problem_builder + selection = self.select_columns_helper selectors case_sensitivity False problem_builder selected_names = Map.from_vector (selection.map column-> [column.name, True]) result = self.internal_columns.filter column-> should_be_removed = selected_names.get column.name False @@ -115,19 +109,21 @@ type Table_Column_Helper Arguments: - selectors: Single instance or a Vector of names, indexes or - `Column_Selector`s. + regular expressions. - position: Specifies how to place the selected columns in relation to the columns which were not matched by the `selector` (if any). + - case_sensitivity: Specifies whether the column name matching should be + case sensitive. - error_on_missing_columns: Specifies if missing columns should be raised as error regardless of `on_problems`. - on_problems: Specifies the behavior when a problem occurs during the operation. By default, a warning is issued, but the operation proceeds. If set to `Report_Error`, the operation fails with a dataflow error. If set to `Ignore`, the operation proceeds without errors or warnings. - reorder_columns : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Position -> Boolean -> Problem_Behavior -> Vector - reorder_columns self selectors position error_on_missing_columns on_problems = + reorder_columns : Text | Integer | Regex | Vector (Integer | Text | Regex) -> Position -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Vector + reorder_columns self (selectors:(Text | Integer | Regex | Vector)) (position:Position) (case_sensitivity:Case_Sensitivity) (error_on_missing_columns:Boolean) (on_problems:Problem_Behavior) = problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns - selection = self.select_columns_helper selectors reorder=True problem_builder + selection = self.select_columns_helper selectors case_sensitivity True problem_builder problem_builder.attach_problems_before on_problems <| selected_names = Map.from_vector (selection.map column-> [column.name, True]) other_columns = self.internal_columns.filter column-> @@ -143,31 +139,28 @@ type Table_Column_Helper provided selection criteria. Arguments: - - selectors: Single instance or a Vector of names, indexes or - `Column_Selector`s. + - selectors: Single instance or a Vector of names, indexes or regular + expressions. + - case_sensitivity: Specifies whether to match the column names ignoring + case. - reorder: Specifies whether to reorder the matched columns according to - the order of the selection criteria. - If `False`, the matched entries are returned in the same order as in - the input. - If `True`, the matched entries are returned in the order of the - criteria matching them. If a single object has been matched by multiple - criteria, it is placed in the group belonging to the first matching - criterion on the list. If a single criterion's group has more than one - element, their relative order is the same as in the input. + the order of the selection criteria. If `False`, the matched entries + are returned in the same order as in the input. If `True`, the matched + entries are returned in the order of the first criteria matching them. + If a single criterion matches more than one column, their order is the + same as in the input table. - problem_builder: Encapsulates the aggregation of encountered problems. - select_columns_helper : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Boolean -> Problem_Builder -> Vector - select_columns_helper self selectors reorder problem_builder = - resolve_selector selector = case selector of - name : Text -> resolve_selector (Column_Selector.By_Name name Case_Sensitivity.Sensitive False) - ix : Integer -> if is_index_valid self.internal_columns.length ix then [self.internal_columns.at ix] else - problem_builder.report_oob_indices [ix] - [] - Column_Selector.By_Name name case_sensitivity use_regex -> - matches = match_columns name case_sensitivity use_regex self.internal_columns - if matches.is_empty then problem_builder.report_missing_input_columns [name] - matches - Column_Selector.Blank_Columns when_any treat_nans_as_blank -> - get_blank_columns when_any treat_nans_as_blank self.internal_columns self.make_column self.table self.materialize + select_columns_helper : Text | Integer | Regex | Vector (Integer | Text | Regex) -> Case_Sensitivity -> Boolean -> Problem_Builder -> Vector + select_columns_helper self (selectors:(Text | Integer | Regex | Vector)) (case_sensitivity:Case_Sensitivity) (reorder:Boolean) (problem_builder:Problem_Builder) = + resolve_selector selector = + case selector of + ix : Integer -> if is_index_valid self.internal_columns.length ix then [self.internal_columns.at ix] else + problem_builder.report_oob_indices [ix] + [] + _ -> + matches = match_columns selector case_sensitivity self.internal_columns + if matches.is_empty then problem_builder.report_missing_input_columns [selector] + matches vector = case selectors of _ : Vector -> selectors @@ -198,6 +191,56 @@ type Table_Column_Helper problem_builder.report_oob_indices [selector] Nothing + ## PRIVATE + A helper method that gets the columns from the provided table that are + completely blank or have some blanks. + + Arguments: + - when_any: By default, only columns consisting of all blank cells are + selected. If set to `True`, columns with one or more blank values are + selected. + - treat_nans_as_blank: If `True`, then `Number.nan` is considered as + blank. + - invert_selection: If `True`, then the selection is inverted. + select_blank_columns_helper : Boolean -> Boolean -> Boolean -> Vector + select_blank_columns_helper self when_any:Boolean treat_nans_as_blank:Boolean invert_selection:Boolean=False = + blanks = self.internal_columns.map_with_index ix-> internal_column-> + column = self.make_column internal_column + blank_indicator = column.is_blank treat_nans_as_blank + blank_indicator.iif 1 0 . rename "blanks_"+ix.to_text + + ## We cannot just use a custom_column in the aggregate because of + how the column selector works. We may need to revisit this. For + now we need to use tricks like that: + + To be backend agnostic, we cannot create a new table with the + columns above. Instead, we add our blank columns to the table + and then remove any other columns we. We do not have to deal + with name conflicts, as adding a new column with a clashing + name does not affect the old column or derived columns. + table_with_blank_indicators = blanks.fold self.table tbl-> blanks_col-> tbl.set blanks_col + just_indicators = table_with_blank_indicators.select_columns (blanks.map .name) on_problems=Problem_Behavior.Report_Error + + # Maximum is equivalent to Exists and Minimum is equivalent to Forall. + col_aggregate = if when_any then Aggregate_Column.Maximum _ else Aggregate_Column.Minimum _ + aggregates = blanks.map blanks_col-> col_aggregate blanks_col.name + + aggregate_result = just_indicators.aggregate aggregates on_problems=Problem_Behavior.Report_Error + materialized_result = self.materialize <| aggregate_result.catch Any error-> + msg = "Unexpected dataflow error has been thrown in an `select_blank_columns_helper`. This is a bug in the Table library. The unexpected error was: "+error.to_display_text + Panic.throw (Illegal_State.Error message=msg cause=error) + + counts = materialized_result.rows.first + self.internal_columns.filter_with_index i-> _-> + include = case counts.at i of + ## No rows in input, so treating as blank by convention. + Nothing -> True + 1 -> True + 0 -> False + unexpected -> + Panic.throw (Illegal_State.Error "Unexpected result: "+unexpected.to_display_text+". Perhaps an implementation bug of `select_blank_columns_helper`.") + if invert_selection then include.not else include + ## PRIVATE A helper function encapsulating shared code for `rename_columns` implementations of various Table variants. See the documentation for the @@ -212,14 +255,16 @@ type Table_Column_Helper - internal_columns: A list of all columns in a table. - mapping: A selector specifying which columns should be moved and the order in which they should appear in the result. + - case_sensitivity: Specifies whether to match the column names ignoring + case. - error_on_missing_columns: If set to `True`, missing columns are treated as error regardless of `on_problems`. - on_problems: Specifies the behavior when a problem occurs during the operation. By default, a warning is issued, but the operation proceeds. If set to `Report_Error`, the operation fails with a dataflow error. If set to `Ignore`, the operation proceeds without errors or warnings. -rename_columns : Vector -> Map (Text | Integer | Column_Selector) Text | Vector Text -> Boolean -> Problem_Behavior -> Map Text Text -rename_columns internal_columns mapping error_on_missing_columns on_problems = +rename_columns : Vector -> Map (Text | Integer | Regex) Text | Vector Text -> Case_Sensitivity -> Boolean -> Problem_Behavior -> Map Text Text +rename_columns (internal_columns:Vector) (mapping:(Map | Vector)) (case_sensitivity:Case_Sensitivity) (error_on_missing_columns:Boolean) (on_problems:Problem_Behavior) = ## Convert Vector of Pairs to Map is_vec_pairs = mapping.is_a Vector && mapping.length > 0 && (mapping.first.is_a Text . not) case is_vec_pairs of @@ -246,22 +291,18 @@ rename_columns internal_columns mapping error_on_missing_columns on_problems = internal_columns.take good_names.length . zip good_names _ : Map -> resolve_rename selector replacement = case selector of - name : Text -> resolve_rename (Column_Selector.By_Name name Case_Sensitivity.Sensitive False) replacement ix : Integer -> if is_index_valid internal_columns.length ix then [Pair.new (internal_columns.at ix) replacement] else problem_builder.report_oob_indices [ix] [] - Column_Selector.By_Name name case_sensitivity use_regex -> - matches = match_columns name case_sensitivity use_regex internal_columns + _ -> + matches = match_columns selector case_sensitivity internal_columns case matches.is_empty of True -> - problem_builder.report_missing_input_columns [name] + problem_builder.report_missing_input_columns [selector] [] False -> - if use_regex.not then matches.map c-> Pair.new c replacement else - pattern = Regex.compile name case_insensitive=case_sensitivity.is_case_insensitive_in_memory - matches.map c-> Pair.new c (pattern.replace c.name replacement) - Column_Selector.Blank_Columns _ _ -> - Error.throw (Illegal_Argument.Error "Cannot use `Blank_Columns` to rename.") + if selector.is_a Regex . not then matches.map c-> Pair.new c replacement else + matches.map c-> Pair.new c (selector.replace c.name replacement) builder = mapping.to_vector.fold Vector.new_builder builder-> pair-> builder.append_vector_range (resolve_rename pair.first pair.second) builder.to_vector @@ -344,10 +385,15 @@ is_index_valid length ix = ## PRIVATE A helper method to match columns by name -match_columns : Text -> Case_Sensitivity -> Boolean -> Vector -> Vector -match_columns name case_sensitivity use_regex columns = - match = case_sensitivity.create_match_function use_regex - columns.filter c-> match c.name name +match_columns : Text | Regex -> Case_Sensitivity -> Vector -> Vector +match_columns (name : Text | Regex) (case_sensitivity : Case_Sensitivity) (columns : Vector) = + match = case name of + _ : Text -> case case_sensitivity of + Case_Sensitivity.Default -> (== name) + Case_Sensitivity.Sensitive -> (== name) + Case_Sensitivity.Insensitive locale -> name.equals_ignore_case locale=locale + _ : Regex -> name.recompile case_sensitivity . matches + columns.filter c-> match c.name ## PRIVATE A helper type used by transform helpers. @@ -379,7 +425,8 @@ resolve_order_by internal_columns column_selectors problem_builder = problem_builder.report_oob_indices [ix] [] Sort_Column.Select_By_Name name _ case_sensitivity use_regex -> - matches = match_columns name case_sensitivity use_regex internal_columns + regex_or_text = if use_regex then name.to_regex else name + matches = match_columns regex_or_text case_sensitivity internal_columns if matches.is_empty then problem_builder.report_missing_input_columns [name] matches.map c-> Column_Transform_Element.Value c selector selectors_vec = case column_selectors of @@ -447,46 +494,6 @@ unify_result_type_for_union column_set all_tables allow_type_widening problem_bu problem_builder.report_other_warning (Column_Type_Mismatch.Error column_set.name first_type got_type) Nothing -## PRIVATE - A helper method that gets the columns from the provided table that are - complete blank or have some blanks. -get_blank_columns when_any treat_nans_as_blank internal_columns make_column table materialize = - blanks = internal_columns.map_with_index ix-> internal_column-> - column = make_column internal_column - blank_indicator = column.is_blank treat_nans_as_blank - blank_indicator.iif 1 0 . rename "blanks_"+ix.to_text - - ## We cannot just use a custom_column in the aggregate because of - how the column selector works. We may need to revisit this. For - now we need to use tricks like that: - - To be backend agnostic, we cannot create a new table with the - columns above. Instead, we add our blank columns to the table - and then remove any other columns we. We do not have to deal - with name conflicts, as adding a new column with a clashing - name does not affect the old column or derived columns. - table_with_blank_indicators = blanks.fold table table-> blanks_col-> table.set blanks_col - just_indicators = table_with_blank_indicators.select_columns (blanks.map .name) on_problems=Problem_Behavior.Report_Error - - # Maximum is equivalent to Exists and Minimum is equivalent to Forall. - col_aggregate = if when_any then Maximum _ else Minimum _ - aggregates = blanks.map blanks_col-> col_aggregate blanks_col.name - - aggregate_result = just_indicators.aggregate aggregates on_problems=Problem_Behavior.Report_Error - materialized_result = materialize <| aggregate_result.catch Any error-> - msg = "Unexpected dataflow error has been thrown in an `select_columns_helper`. This is a bug in the Table library. The unexpected error was: "+error.to_display_text - Panic.throw (Illegal_State.Error message=msg cause=error) - - counts = materialized_result.rows.first - internal_columns.filter_with_index i-> _-> - case counts.at i of - ## No rows in input, so treating as blank by convention. - Nothing -> True - 1 -> True - 0 -> False - unexpected -> - Panic.throw (Illegal_State.Error "Unexpected result: "+unexpected.to_display_text+". Perhaps an implementation bug of Column_Selector.Blank_Columns.") - ## PRIVATE Replace a set of columns in the table with a new set of columns. The old columns must all exist in the table, and the list of new columns must be the @@ -503,9 +510,9 @@ replace_columns_with_columns table old_columns new_columns = Replace a set of columns in the table with a new set of columns produced by transforming the old columns. The old columns must all exist in the table, and the new columns must all have the same name. -replace_columns_with_transformed_columns : Table -> Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> (Column -> Column) -> Boolean -> Problem_Behavior -> Table +replace_columns_with_transformed_columns : Table -> Text | Integer | Regex | Vector (Integer | Text | Regex) -> (Column -> Column) -> Boolean -> Problem_Behavior -> Table replace_columns_with_transformed_columns table selectors transformer error_on_missing_columns=True on_problems=Report_Warning = - internal_columns = table.columns_helper.select_columns selectors reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems + internal_columns = table.columns_helper.select_columns selectors Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems columns = internal_columns.map table.columns_helper.make_column new_columns = columns.map transformer replace_columns_with_columns table columns new_columns diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso index 9f38e7e6ab2..8c4b3cfa73a 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso @@ -2,7 +2,6 @@ from Standard.Base import all import project.Data.Aggregate_Column.Aggregate_Column import project.Data.Column.Column -import project.Data.Column_Selector.Column_Selector import project.Data.Column_Vector_Extensions import project.Data.Data_Formatter.Data_Formatter import project.Data.Join_Condition.Join_Condition @@ -28,7 +27,6 @@ from project.Excel.Excel_Section.Excel_Section import Cell_Range, Range_Names, S export project.Data.Aggregate_Column.Aggregate_Column export project.Data.Column.Column -export project.Data.Column_Selector.Column_Selector export project.Data.Column_Vector_Extensions export project.Data.Data_Formatter.Data_Formatter export project.Data.Join_Condition.Join_Condition diff --git a/test/Table_Tests/src/Common_Table_Operations/Missing_Values_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Missing_Values_Spec.enso index bd00c3e2532..57afb9bd97c 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Missing_Values_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Missing_Values_Spec.enso @@ -1,6 +1,6 @@ from Standard.Base import all -from Standard.Table import Column_Selector, Value_Type +from Standard.Table import Value_Type from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Count_Distinct from Standard.Table.Errors import all @@ -58,12 +58,21 @@ spec setup = t4.row_count . should_equal 0 t4.at "X" . to_vector . should_equal [] + Test.specify "should allow to select blank columns" <| + r1 = t1.select_blank_columns + r1.columns.map .name . should_equal ["f"] + r1.at "f" . to_vector . should_equal [Nothing, "", Nothing, ""] + + r2 = t1.select_blank_columns when_any=True + r2.columns.map .name . should_equal ["a", "b", "d", "e", "f"] + r2.at "d" . to_vector . should_equal [Nothing, True, False, True] + Test.specify "should allow to remove blank columns" <| - r1 = t1.remove_columns (Column_Selector.Blank_Columns when_any=False) + r1 = t1.remove_blank_columns r1.columns.map .name . should_equal ["a", "b", "c", "d", "e"] r1.at "a" . to_vector . should_equal [1, Nothing, 3, 4] - r2 = t1.remove_columns (Column_Selector.Blank_Columns when_any=True) + r2 = t1.remove_blank_columns when_any=True r2.columns.map .name . should_equal ["c"] r2.at "c" . to_vector . should_equal [10, 20, 30, 40] @@ -84,14 +93,18 @@ spec setup = r2.at "X" . to_vector . to_text . should_equal "[2.0, 1.5, NaN, NaN]" r2.at "Y" . to_vector . should_equal [Nothing, 2.0, Nothing, 5.0] - r3 = t4.remove_columns (Column_Selector.Blank_Columns when_any=False) + r3 = t4.remove_blank_columns r3.columns.map .name . should_equal ["c", "g", "h"] r3.at "g" . to_vector . to_text . should_equal "[NaN, 1.0, 2.0, 3.4]" - r4 = t4.remove_columns (Column_Selector.Blank_Columns when_any=True) + r4 = t4.remove_blank_columns when_any=True r4.columns.map .name . should_equal ["c", "g"] r4.at "g" . to_vector . to_text . should_equal "[NaN, 1.0, 2.0, 3.4]" + r5 = t4.select_blank_columns when_any=True + r5.columns.map .name . should_equal ["h"] + r5.at "h" . to_vector . to_text . should_equal "[NaN, Nothing, NaN, Nothing]" + Test.specify "should allow to treat NaNs as blank if asked" <| r1 = t3.filter_blank_rows when_any=True treat_nans_as_blank=True # We cannot use `Vector.==` because `NaN != NaN`. @@ -102,42 +115,35 @@ spec setup = r2.at "X" . to_vector . to_text . should_equal "[2.0, 1.5, NaN]" r2.at "Y" . to_vector . should_equal [Nothing, 2.0, 5.0] - r3 = t4.remove_columns (Column_Selector.Blank_Columns when_any=False treat_nans_as_blank=True) + r3 = t4.remove_blank_columns when_any=False treat_nans_as_blank=True r3.columns.map .name . should_equal ["c", "g"] r3.at "g" . to_vector . to_text . should_equal "[NaN, 1.0, 2.0, 3.4]" - r4 = t4.remove_columns (Column_Selector.Blank_Columns when_any=True treat_nans_as_blank=True) - r4.columns.map .name . should_equal ["c"] - r4.at "c" . to_vector . should_equal [10, 20, 40, 30] + r4 = t4.select_blank_columns when_any=False treat_nans_as_blank=True + r4.columns.map .name . should_equal ["h"] + r4.at "h" . to_vector . to_text . should_equal "[NaN, Nothing, NaN, Nothing]" + + r5 = t4.remove_blank_columns when_any=True treat_nans_as_blank=True + r5.columns.map .name . should_equal ["c"] + r5.at "c" . to_vector . should_equal [10, 20, 40, 30] + + r6 = t4.select_blank_columns when_any=True treat_nans_as_blank=True + r6.columns.map .name . should_equal ["g", "h"] + r6.at "h" . to_vector . to_text . should_equal "[NaN, Nothing, NaN, Nothing]" if test_selection.is_nan_and_nothing_distinct.not then Test.specify "this backend treats NaN as Nothing" <| t3.at "X" . to_vector . should_equal [2.0, 1.5, Nothing, Nothing] t3.at "X" . is_nan . to_vector . should_fail_with Unsupported_Database_Operation - Test.specify "Blank_Columns selector should work for all kinds of methods accepting Column_Selector" <| - t = table_builder [["X", [1, 2, 3, 4]], ["Y", [Nothing, "", Nothing, Nothing]], ["Z", [Nothing, True, False, Nothing]]] - - t.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["Y"] - t.select_columns (Column_Selector.Blank_Columns when_any=True) . columns . map .name . should_equal ["Y", "Z"] - - t.reorder_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["Y", "X", "Z"] - - r1 = t.aggregate [Count_Distinct Column_Selector.Blank_Columns] - r1.columns . map .name . should_equal ["Count Distinct Y"] - r1.at "Count Distinct Y" . to_vector . should_equal [2] - r2 = t.aggregate [(Count_Distinct Column_Selector.Blank_Columns ignore_nothing=True)] - r2.columns . map .name . should_equal ["Count Distinct Y"] - r2.at "Count Distinct Y" . to_vector . should_equal [1] - - Test.specify "Blank_Columns selector should deal with edge cases" <| + Test.specify "select_blank_columns and remove_blank_columns should deal with edge cases" <| t = table_builder [["X", [1, 2, 3, 4]]] no_rows = t.filter "X" (Filter_Condition.Equal to=0) no_rows.row_count . should_equal 0 no_rows.at "X" . to_vector . should_equal [] - no_rows.select_columns Column_Selector.Blank_Columns . columns . map .name . should_equal ["X"] - no_rows.remove_columns Column_Selector.Blank_Columns . columns . map .name . should_fail_with No_Output_Columns + no_rows.select_blank_columns . columns . map .name . should_equal ["X"] + no_rows.remove_blank_columns . columns . map .name . should_fail_with No_Output_Columns Test.group prefix+"Filling Missing Values" <| Test.specify "should coerce long and double types to double" <| diff --git a/test/Table_Tests/src/Common_Table_Operations/Select_Columns_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Select_Columns_Spec.enso index 9c630a0ffd4..1116acd3ba4 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Select_Columns_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Select_Columns_Spec.enso @@ -1,7 +1,6 @@ from Standard.Base import all from Standard.Table import Position -from Standard.Table.Data.Column_Selector.Column_Selector import By_Name from Standard.Table.Errors import all from Standard.Test import Test, Problems @@ -28,7 +27,7 @@ spec setup = Test.group prefix+"Table.select_columns" <| Test.specify "should work as shown in the doc examples" <| expect_column_names ["foo", "bar"] <| table.select_columns ["bar", "foo"] - expect_column_names ["bar", "Baz", "foo 1", "foo 2"] <| table.select_columns [By_Name "foo.+" use_regex=True, By_Name "b.*" use_regex=True] + expect_column_names ["bar", "Baz", "foo 1", "foo 2"] <| table.select_columns ["foo.+".to_regex, "b.*".to_regex True] expect_column_names ["abcd123", "foo", "bar"] <| table.select_columns [-1, 0, 1] reorder=True Test.specify "should allow to reorder columns if asked to" <| @@ -38,11 +37,11 @@ spec setup = table_2 . at "foo" . to_vector . should_equal [1,2,3] Test.specify "should correctly handle regex matching" <| - expect_column_names ["foo"] <| table.select_columns [By_Name "foo" Case_Sensitivity.Sensitive use_regex=True] - expect_column_names ["ab.+123", "abcd123"] <| table.select_columns [By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True] - expect_column_names ["ab.+123", "abcd123"] <| table.select_columns [By_Name "ab.+123" Case_Sensitivity.Sensitive use_regex=True] + expect_column_names ["foo"] <| table.select_columns ["foo".to_regex] + expect_column_names ["ab.+123", "abcd123"] <| table.select_columns ["a.*".to_regex] + expect_column_names ["ab.+123", "abcd123"] <| table.select_columns ["ab.+123".to_regex] expect_column_names ["ab.+123"] <| table.select_columns ["ab.+123"] - expect_column_names ["abcd123"] <| table.select_columns [By_Name "abcd123" Case_Sensitivity.Sensitive use_regex=True] + expect_column_names ["abcd123"] <| table.select_columns ["abcd123".to_regex] Test.specify "should allow negative indices" <| expect_column_names ["foo", "bar", "foo 2"] <| table.select_columns [-3, 0, 1] @@ -50,8 +49,8 @@ spec setup = Test.specify "should allow mixed names and indexes" <| expect_column_names ["foo", "bar", "foo 2"] <| table.select_columns [-3, "bar", 0] expect_column_names ["foo 2", "bar", "foo"] <| table.select_columns [-3, "bar", 0] reorder=True - expect_column_names ["foo", "bar", "foo 1", "foo 2", "abcd123"] <| table.select_columns [-1, "bar", By_Name "foo.*" Case_Sensitivity.Sensitive use_regex=True] - expect_column_names ["foo", "foo 1", "foo 2", "bar", "abcd123"] <| table.select_columns [By_Name "foo.*" Case_Sensitivity.Sensitive use_regex=True, "bar", "foo", -1] reorder=True + expect_column_names ["foo", "bar", "foo 1", "foo 2", "abcd123"] <| table.select_columns [-1, "bar", "foo.*".to_regex] + expect_column_names ["foo", "foo 1", "foo 2", "bar", "abcd123"] <| table.select_columns ["foo.*".to_regex, "bar", "foo", -1] reorder=True if test_selection.supports_case_sensitive_columns then Test.specify "should correctly handle exact matches matching multiple names due to case insensitivity" <| @@ -60,11 +59,11 @@ spec setup = col2 = ["bar", [4,5,6]] col3 = ["Bar", [7,8,9]] table_builder [col1, col2, col3] - expect_column_names ["bar", "Bar"] <| table.select_columns [By_Name "bar"] + expect_column_names ["bar", "Bar"] <| table.select_columns ["bar"] Case_Sensitivity.Insensitive Test.specify "should correctly handle regexes matching multiple names" <| - expect_column_names ["foo", "bar", "foo 1", "foo 2"] <| table.select_columns [By_Name "b.*" Case_Sensitivity.Sensitive use_regex=True, By_Name "f.+" Case_Sensitivity.Sensitive use_regex=True] - expect_column_names ["bar", "foo", "foo 1", "foo 2"] <| table.select_columns [By_Name "b.*" Case_Sensitivity.Sensitive use_regex=True, By_Name "f.+" Case_Sensitivity.Sensitive use_regex=True] reorder=True + expect_column_names ["foo", "bar", "foo 1", "foo 2"] <| table.select_columns ["b.*".to_regex, "f.+".to_regex] + expect_column_names ["bar", "foo", "foo 1", "foo 2"] <| table.select_columns ["b.*".to_regex, "f.+".to_regex] reorder=True Test.specify "should correctly handle problems: out of bounds indices" <| selector = [1, 0, 100, -200, 300] @@ -104,12 +103,12 @@ spec setup = table.select_columns ["bar", "foo", "foo", "bar"] reorder=False Test.specify "should correctly handle edge-cases: duplicate matches due to case insensitivity" <| - selector = [By_Name "FOO", By_Name "foo"] - t = table.select_columns selector on_problems=Problem_Behavior.Report_Error + selector = ["FOO", "foo"] + t = table.select_columns selector Case_Sensitivity.Insensitive on_problems=Problem_Behavior.Report_Error expect_column_names ["foo"] t expect_column_names ["bar", "foo"] <| - table.select_columns [By_Name "BAR", By_Name "foo", By_Name "bar"] reorder=True + table.select_columns ["BAR", "foo", "bar"] Case_Sensitivity.Insensitive reorder=True Test.specify "should correctly handle problems: unmatched names" <| weird_name = '.*?-!@#!"' @@ -139,17 +138,17 @@ spec setup = Test.group prefix+"Table.remove_columns" <| Test.specify "should work as shown in the doc examples" <| expect_column_names ["Baz", "foo 1", "foo 2", "ab.+123", "abcd123"] <| table.remove_columns ["bar", "foo"] - expect_column_names ["foo", "ab.+123", "abcd123"] <| table.remove_columns [By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True, By_Name "b.*" Case_Sensitivity.Insensitive use_regex=True] + expect_column_names ["foo", "ab.+123", "abcd123"] <| table.remove_columns ["foo.+".to_regex, "b.*".to_regex] Case_Sensitivity.Insensitive expect_column_names ["Baz", "foo 1", "foo 2", "ab.+123"] <| table.remove_columns [-1, 0, 1] Test.specify "should correctly handle regex matching" <| last_ones = table.columns.drop 1 . map .name - expect_column_names last_ones <| table.remove_columns [By_Name "foo" Case_Sensitivity.Sensitive use_regex=True] + expect_column_names last_ones <| table.remove_columns ["foo".to_regex] first_ones = ["foo", "bar", "Baz", "foo 1", "foo 2"] - expect_column_names first_ones <| table.remove_columns [By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True] - expect_column_names first_ones <| table.remove_columns [By_Name "ab.+123" Case_Sensitivity.Sensitive use_regex=True] - expect_column_names first_ones+["abcd123"] <| table.remove_columns [By_Name "ab.+123"] - expect_column_names first_ones+["ab.+123"] <| table.remove_columns [By_Name "abcd123" Case_Sensitivity.Sensitive use_regex=True] + expect_column_names first_ones <| table.remove_columns ["a.*".to_regex] + expect_column_names first_ones <| table.remove_columns ["ab.+123".to_regex] + expect_column_names first_ones+["abcd123"] <| table.remove_columns ["ab.+123"] Case_Sensitivity.Insensitive + expect_column_names first_ones+["ab.+123"] <| table.remove_columns ["abcd123".to_regex] Test.specify "should allow negative indices" <| expect_column_names ["Baz", "foo 1", "ab.+123"] <| table.remove_columns [-1, -3, 0, 1] @@ -161,10 +160,10 @@ spec setup = col2 = ["bar", [4,5,6]] col3 = ["Bar", [7,8,9]] table_builder [col1, col2, col3] - expect_column_names ["foo"] <| table.remove_columns (By_Name "bar") + expect_column_names ["foo"] <| table.remove_columns "bar" Case_Sensitivity.Insensitive Test.specify "should correctly handle regexes matching multiple names" <| - expect_column_names ["Baz", "ab.+123", "abcd123"] <| table.remove_columns [By_Name "f.+" Case_Sensitivity.Sensitive use_regex=True, By_Name "b.*" Case_Sensitivity.Sensitive use_regex=True] + expect_column_names ["Baz", "ab.+123", "abcd123"] <| table.remove_columns ["f.+".to_regex, "b.*".to_regex] Test.specify "should correctly handle problems: out of bounds indices" <| selector = [1, 0, 100, -200, 300] @@ -192,8 +191,8 @@ spec setup = expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123"] t Test.specify "should correctly handle edge-cases: duplicate matches due to case insensitivity" <| - selector = [By_Name "FOO", By_Name "foo"] - t = table.remove_columns selector on_problems=Problem_Behavior.Report_Error + selector = ["FOO", "foo"] + t = table.remove_columns selector Case_Sensitivity.Insensitive on_problems=Problem_Behavior.Report_Error expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123"] t Test.specify "should correctly handle problems: unmatched names" <| @@ -209,11 +208,11 @@ spec setup = Test.specify "should correctly handle problems: no columns in the output" <| [Problem_Behavior.Ignore, Problem_Behavior.Report_Warning, Problem_Behavior.Report_Error].each pb-> - selector = [By_Name ".*" Case_Sensitivity.Sensitive use_regex=True] + selector = [".*".to_regex] t = table.remove_columns selector on_problems=pb t.should_fail_with No_Output_Columns - selector_2 = [By_Name ".*" Case_Sensitivity.Sensitive use_regex=True, By_Name "hmmm" Case_Sensitivity.Sensitive use_regex=True] + selector_2 = [".*".to_regex, "hmmm".to_regex] t1 = table.remove_columns selector_2 t1.should_fail_with No_Output_Columns @@ -221,17 +220,17 @@ spec setup = Test.specify "should work as shown in the doc examples" <| expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns "foo" Position.After_Other_Columns expect_column_names ["Baz", "foo 1", "foo 2", "ab.+123", "abcd123", "foo", "bar"] <| table.reorder_columns ["foo", "bar"] Position.After_Other_Columns - expect_column_names ["foo 1", "foo 2", "bar", "Baz", "foo", "ab.+123", "abcd123"] <| table.reorder_columns [By_Name "foo.+" Case_Sensitivity.Insensitive use_regex=True, By_Name "b.*" Case_Sensitivity.Insensitive use_regex=True] + expect_column_names ["foo 1", "foo 2", "bar", "Baz", "foo", "ab.+123", "abcd123"] <| table.reorder_columns ["foo.+".to_regex, "b.*".to_regex] case_sensitivity=Case_Sensitivity.Insensitive expect_column_names ["bar", "foo", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123"] <| table.reorder_columns [1, 0] Position.Before_Other_Columns expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns [0] Position.After_Other_Columns Test.specify "should correctly handle regex matching" <| - expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns [By_Name "foo" Case_Sensitivity.Sensitive use_regex=True] Position.After_Other_Columns + expect_column_names ["bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123", "foo"] <| table.reorder_columns ["foo".to_regex] Position.After_Other_Columns rest = ["foo", "bar", "Baz", "foo 1", "foo 2"] - expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns [By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True] - expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns [By_Name "ab.+123" Case_Sensitivity.Sensitive use_regex=True] + expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns ["a.*".to_regex] + expect_column_names ["ab.+123", "abcd123"]+rest <| table.reorder_columns ["ab.+123".to_regex] expect_column_names ["ab.+123"]+rest+["abcd123"] <| table.reorder_columns ["ab.+123"] - expect_column_names ["abcd123"]+rest+["ab.+123"] <| table.reorder_columns [By_Name "abcd123" Case_Sensitivity.Sensitive use_regex=True] + expect_column_names ["abcd123"]+rest+["ab.+123"] <| table.reorder_columns ["abcd123".to_regex] Test.specify "should allow negative indices" <| expect_column_names ["abcd123", "foo 2", "foo", "bar", "Baz", "foo 1", "ab.+123"] <| table.reorder_columns [-1, -3, 0, 1] @@ -243,10 +242,10 @@ spec setup = col2 = ["bar", [4,5,6]] col3 = ["Bar", [7,8,9]] table_builder [col1, col2, col3] - expect_column_names ["bar", "Bar", "foo"] <| table.reorder_columns [By_Name "bar"] + expect_column_names ["bar", "Bar", "foo"] <| table.reorder_columns ["bar"] case_sensitivity=Case_Sensitivity.Insensitive Test.specify "should correctly handle regexes matching multiple names" <| - expect_column_names ["bar", "foo", "foo 1", "foo 2", "Baz", "ab.+123", "abcd123"] <| table.reorder_columns [By_Name "b.*" Case_Sensitivity.Sensitive use_regex=True, By_Name "f.+" Case_Sensitivity.Sensitive use_regex=True] + expect_column_names ["bar", "foo", "foo 1", "foo 2", "Baz", "ab.+123", "abcd123"] <| table.reorder_columns ["b.*".to_regex, "f.+".to_regex] Test.specify "should correctly handle problems: out of bounds indices" <| selector = [1, 0, 100, -200, 300] @@ -329,7 +328,7 @@ spec setup = t1 = table_builder [["alpha", [1]], ["name=123", [2]], ["name= foo bar", [3]]] expect_column_names ["alpha", "key:123", "key: foo bar"] <| - t1.rename_columns (Map.from_vector [[By_Name "name=(.*)" Case_Sensitivity.Sensitive use_regex=True, "key:$1"]]) + t1.rename_columns (Map.from_vector [["name=(.*)".to_regex, "key:$1"]]) Test.specify "should work by index" <| map = Map.from_vector [[0, "FirstColumn"], [-2, "Another"]] @@ -357,17 +356,17 @@ spec setup = table.rename_columns map Test.specify "should work by name case-insensitively" <| - map = Map.from_vector [[By_Name "ALPHA", "FirstColumn"], [By_Name "DELTA", "Another"]] + map = Map.from_vector [["ALPHA", "FirstColumn"], ["DELTA", "Another"]] expect_column_names ["FirstColumn", "beta", "gamma", "Another"] <| - table.rename_columns map + table.rename_columns map Case_Sensitivity.Insensitive Test.specify "should work by name using regex" <| - map = Map.from_vector [[By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True, "FirstColumn"]] + map = Map.from_vector [["a.*".to_regex, "FirstColumn"]] expect_column_names ["FirstColumn", "beta", "gamma", "delta"] <| table.rename_columns map Test.specify "should work by name using regex substitution" <| - map = Map.from_vector [[By_Name "a(.*)" Case_Sensitivity.Sensitive use_regex=True, "$1"]] + map = Map.from_vector [["a(.*)".to_regex, "$1"]] expect_column_names ["lpha", "beta", "gamma", "delta"] <| table.rename_columns map @@ -411,12 +410,12 @@ spec setup = Test.specify "should correctly handle edge-cases: aliased selectors" <| t = table_builder [["alpha", [1,2,3]], ["bet", [4,5,6]]] - map1 = Map.from_vector [[By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True, "AA"], [By_Name ".*a" Case_Sensitivity.Sensitive use_regex=True, "AA"]] + map1 = Map.from_vector [["a.*".to_regex, "AA"], [".*a".to_regex, "AA"]] t1 = t.rename_columns map1 on_problems=Problem_Behavior.Report_Error Problems.assume_no_problems t1 expect_column_names ["AA", "bet"] t1 - map2 = Map.from_vector [[By_Name "a.*" Case_Sensitivity.Sensitive use_regex=True, "StartsWithA"], [By_Name ".*a" Case_Sensitivity.Sensitive use_regex=True, "EndsWithA"]] + map2 = Map.from_vector [["a.*".to_regex, "StartsWithA"], [".*a".to_regex, "EndsWithA"]] t2 = t.rename_columns map2 on_problems=Problem_Behavior.Report_Error t2.should_fail_with Ambiguous_Column_Rename err = t2.catch @@ -429,7 +428,7 @@ spec setup = This is to show that even if distinct rename patterns match the same column, if the resulting rename is unambiguous, no error is raised. - map3 = Map.from_vector [[By_Name "a(.*)" Case_Sensitivity.Sensitive use_regex=True, "$1A"], [By_Name "(.*)aa" Case_Sensitivity.Sensitive use_regex=True, "$1aA"]] + map3 = Map.from_vector [["a(.*)".to_regex, "$1A"], ["(.*)aa".to_regex, "$1aA"]] t4 = t3.rename_columns map3 on_problems=Problem_Behavior.Report_Error Problems.assume_no_problems t4 expect_column_names ["aaA", "bbb"] t4 diff --git a/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso b/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso index 95565e66e6d..a0eefcc439d 100644 --- a/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso +++ b/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso @@ -1,7 +1,7 @@ from Standard.Base import all import Standard.Base.Errors.Illegal_Argument.Illegal_Argument -from Standard.Table import Table, Data_Formatter, Column, Column_Selector +from Standard.Table import Table, Data_Formatter, Column from Standard.Table.Data.Type.Value_Type import Value_Type, Auto from Standard.Table.Errors import all @@ -309,7 +309,7 @@ spec = Test.specify "should allow selecting columns by regex" <| t1 = Table.new [["An", ["1", "2", "3"]], ["Am", ["4", "5", "6"]], ["C", ["7", "8", "9"]], ["D", ["10", "11", "12"]]] - r1 = t1.parse columns=[Column_Selector.By_Name "A.*" use_regex=True] + r1 = t1.parse columns="A.*".to_regex r1.at "An" . to_vector . should_equal [1, 2, 3] r1.at "Am" . to_vector . should_equal [4, 5, 6] r1.at "C" . to_vector . should_equal ["7", "8", "9"] @@ -321,9 +321,10 @@ spec = r1.should_fail_with Missing_Input_Columns r1.catch.criteria . should_equal ["B", "C", "E"] - r2 = t1.parse columns=[Column_Selector.By_Name "A.+" use_regex=True] + regex = "A.+".to_regex case_insensitive=True + r2 = t1.parse columns=regex r2.should_fail_with Missing_Input_Columns - r2.catch.criteria . should_equal ["A.+"] + r2.catch.criteria . should_equal [regex] action = t1.parse columns=["A", "B", "C", "E"] error_on_missing_columns=False on_problems=_ tester table = @@ -345,7 +346,7 @@ spec = Test.specify "should allow mixed column selectors" <| t1 = Table.new [["Am", ["1", "2", "3"]], ["B", ["4", "5", "6"]], ["C", ["7", "8", "9"]], ["D", ["10", "11", "12"]]] - r1 = t1.parse columns=[(Column_Selector.By_Name "A.*" use_regex=True), -2, "D"] + r1 = t1.parse columns=["A.*".to_regex, -2, "D"] r1.at "Am" . to_vector . should_equal [1, 2, 3] r1.at "B" . to_vector . should_equal ["4", "5", "6"] r1.at "C" . to_vector . should_equal [7, 8, 9] @@ -353,7 +354,7 @@ spec = Test.specify "should handle edge-cases: overlapping selectors" <| t1 = Table.new [["Am", ["1", "2", "3"]], ["B", ["4", "5", "6"]], ["C", ["7", "8", "9"]], ["D", ["10", "11", "12"]]] - r1 = t1.parse columns=[(Column_Selector.By_Name "A.*" use_regex=True), 0, "D", -1, -1, 0, 3] + r1 = t1.parse columns=["A.*".to_regex, 0, "D", -1, -1, 0, 3] r1.at "Am" . to_vector . should_equal [1, 2, 3] r1.at "B" . to_vector . should_equal ["4", "5", "6"] r1.at "C" . to_vector . should_equal ["7", "8", "9"] diff --git a/test/Table_Tests/src/In_Memory/Table_Spec.enso b/test/Table_Tests/src/In_Memory/Table_Spec.enso index ba856452f2c..c78a882cfc5 100644 --- a/test/Table_Tests/src/In_Memory/Table_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Table_Spec.enso @@ -4,7 +4,7 @@ import Standard.Base.Errors.Common.Index_Out_Of_Bounds import Standard.Base.Errors.Common.Type_Error import Standard.Base.Errors.Illegal_Argument.Illegal_Argument -from Standard.Table import Table, Column, Sort_Column, Column_Selector, Aggregate_Column +from Standard.Table import Table, Column, Sort_Column, Aggregate_Column from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all hiding First, Last import Standard.Table.Data.Type.Value_Type.Value_Type from Standard.Table.Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, No_Input_Columns_Selected, Missing_Input_Columns, No_Such_Column, Floating_Point_Equality, Invalid_Value_Type, Row_Count_Mismatch @@ -315,15 +315,9 @@ spec = t2.at "Y" . to_vector . should_equal ['A', 0] t3 = Table.new [["X", [1, 2, 3]], ["Y", ["", Nothing, Number.nan]]] - t4 = t3.remove_columns (Column_Selector.Blank_Columns treat_nans_as_blank=True) + t4 = t3.remove_blank_columns treat_nans_as_blank=True t4.columns . map .name . should_equal ["X"] - Test.specify "Blank_Columns selector should work for all kinds of methods accepting Column_Selector" <| - t = Table.new [["X", [1, 2, 3, 4, 5]], ["Y", ["", Nothing, Nothing, Number.nan, ""]]] - r1 = t.distinct (Column_Selector.Blank_Columns treat_nans_as_blank=True) - r1.at "Y" . to_vector . to_text . should_equal "[, Nothing, NaN]" - r1.at "X" . to_vector . should_equal [1, 2, 4] - Test.group "Info" <| Test.specify "should return Table information" <| a = ["strs", ["a", "b", Nothing, "a"]]