diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f42293a41e..21cd7cd4593 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -635,6 +635,7 @@ step][9343] - [Implemented constructors, comparisons, and arithmetic for a `Decimal` type.][9272] +- [Allow `Table.replace` to take mutiple target columns.][9406] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -923,6 +924,7 @@ [9346]: https://github.com/enso-org/enso/pull/9346 [9382]: https://github.com/enso-org/enso/pull/9382 [9343]: https://github.com/enso-org/enso/pull/9343 +[9406]: https://github.com/enso-org/enso/pull/9406 #### Enso Compiler diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso index 923755641e8..dc37fa2ea7f 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso @@ -1554,16 +1554,16 @@ type DB_Table ## ALIAS find replace GROUP Standard.Base.Text ICON dataframe_map_column - Replaces values in `column` using `lookup_table` to specify a - mapping from old to new values. + Replaces values in the columns using `lookup_table` to specify a mapping + from old to new values. Arguments: - lookup_table: the table to use as a mapping from old to new values. A `Map` can also be used here (in which case passing `from_column` or `to_column` is disallowed and will throw an `Illegal_Argument` error. - - column: the column within `self` to perform the replace on. - - from_column: the column within `lookup_table` to match against `column` - in `self`. + - columns: the column or columns within `self` to perform the replace on. + - from_column: the column within `lookup_table` to match against + `columns` in `self`. - to_column: the column within `lookup_table` to get new values from. - allow_unmatched_rows: Specifies how to handle missing rows in the lookup. If `False` (the default), an `Unmatched_Rows_In_Lookup` error is raised. @@ -1576,13 +1576,14 @@ type DB_Table When operating in-memory, this operation preserves the order of rows from this table (unlike `join`). + In the Database backend, there are no guarantees related to ordering of results. ! Error Conditions - If this table or the lookup table is lacking any of the columns - specified by `from_column`, `to_column`, or `column`, a + specified by `from_column`, `to_column`, or `columns`, a `Missing_Input_Columns` error is raised. - If a single row is matched by multiple entries in the lookup table, a `Non_Unique_Key` error is raised. @@ -1597,7 +1598,7 @@ type DB_Table - The following problems may be reported according to the `on_problems` setting: - If any of the `columns` is a floating-point type, - a `Floating_Point_Equality`. + a `Floating_Point_Equality` problem is reported. > Example Replace values in column 'x' using a lookup table. @@ -1626,9 +1627,9 @@ type DB_Table # 2 | 30 | c | g # 3 | 40 | d | h @column Widget_Helpers.make_column_name_selector - replace : DB_Table | Map -> (Text | Integer) -> (Text | Integer | Nothing) -> (Text | Integer | Nothing) -> Boolean -> Problem_Behavior -> DB_Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup - replace self lookup_table:(DB_Table | Map) column:(Text | Integer) from_column:(Text | Integer | Nothing)=Nothing to_column:(Text | Integer | Nothing)=Nothing allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning = - Replace_Helpers.replace self lookup_table column from_column to_column allow_unmatched_rows on_problems + replace : (DB_Table | Map) -> (Text | Integer | Vector (Text | Integer)) -> (Text | Integer | Nothing) -> (Text | Integer | Nothing) -> Boolean -> Problem_Behavior -> DB_Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup + replace self lookup_table:(DB_Table | Map) columns:(Text | Integer | Vector (Text | Integer)) from_column:(Text | Integer | Nothing)=Nothing to_column:(Text | Integer | Nothing)=Nothing allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning = + Replace_Helpers.replace self lookup_table columns from_column to_column allow_unmatched_rows on_problems ## ALIAS join by row position GROUP Standard.Base.Calculations diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index a42bc1dd8c8..76278e826b3 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -2077,16 +2077,16 @@ type Table ## ALIAS find replace GROUP Standard.Base.Text ICON dataframe_map_column - Replaces values in `column` using `lookup_table` to specify a - mapping from old to new values. + Replaces values in the columns using `lookup_table` to specify a mapping + from old to new values. Arguments: - lookup_table: the table to use as a mapping from old to new values. A `Map` can also be used here (in which case passing `from_column` or `to_column` is disallowed and will throw an `Illegal_Argument` error. - - column: the column within `self` to perform the replace on. - - from_column: the column within `lookup_table` to match against `column` - in `self`. + - columns: the column or columns within `self` to perform the replace on. + - from_column: the column within `lookup_table` to match against + `columns` in `self`. - to_column: the column within `lookup_table` to get new values from. - allow_unmatched_rows: Specifies how to handle missing rows in the lookup. If `False` (the default), an `Unmatched_Rows_In_Lookup` error is raised. @@ -2099,13 +2099,14 @@ type Table When operating in-memory, this operation preserves the order of rows from this table (unlike `join`). + In the Database backend, there are no guarantees related to ordering of results. ! Error Conditions - If this table or the lookup table is lacking any of the columns - specified by `from_column`, `to_column`, or `column`, a + specified by `from_column`, `to_column`, or `columns`, a `Missing_Input_Columns` error is raised. - If a single row is matched by multiple entries in the lookup table, a `Non_Unique_Key` error is raised. @@ -2120,7 +2121,7 @@ type Table - The following problems may be reported according to the `on_problems` setting: - If any of the `columns` is a floating-point type, - a `Floating_Point_Equality`. + a `Floating_Point_Equality` problem is reported. > Example Replace values in column 'x' using a lookup table. @@ -2149,9 +2150,9 @@ type Table # 2 | 30 | c | g # 3 | 40 | d | h @column Widget_Helpers.make_column_name_selector - replace : Table | Map -> (Text | Integer) -> (Text | Integer | Nothing) -> (Text | Integer | Nothing) -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup - replace self lookup_table:(Table | Map) column:(Text | Integer) from_column:(Text | Integer | Nothing)=Nothing to_column:(Text | Integer | Nothing)=Nothing allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning = - Replace_Helpers.replace self lookup_table column from_column to_column allow_unmatched_rows on_problems + replace : (Table | Map) -> (Text | Integer | Vector (Text | Integer)) -> (Text | Integer | Nothing) -> (Text | Integer | Nothing) -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup + replace self lookup_table:(Table | Map) columns:(Text | Integer | Vector (Text | Integer)) from_column:(Text | Integer | Nothing)=Nothing to_column:(Text | Integer | Nothing)=Nothing allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning = + Replace_Helpers.replace self lookup_table columns from_column to_column allow_unmatched_rows on_problems ## ALIAS join by row position GROUP Standard.Base.Calculations diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Replace_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Replace_Helpers.enso index 4f6013dae29..41aa0725c3f 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Replace_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Replace_Helpers.enso @@ -7,45 +7,49 @@ import project.Data.Table.Table from project.Errors import Missing_Input_Columns, No_Such_Column, Non_Unique_Key, Unmatched_Rows_In_Lookup ## PRIVATE -replace : Table -> Table | Map -> (Text | Integer) -> (Text | Integer | Nothing) -> (Text | Integer | Nothing) -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup -replace base_table lookup_table column:(Text | Integer) from_column:(Text | Integer | Nothing)=Nothing to_column:(Text | Integer | Nothing)=Nothing allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning = - case lookup_table of - _ : Map -> - if from_column.is_nothing.not || to_column.is_nothing.not then Error.throw (Illegal_Argument.Error "If a Map is provided as the lookup_table, then from_column and to_column should not also be specified.") else - handle_empty_lookup_table ~action = - if lookup_table.is_empty.not then action else - ## If the lookup table is empty but the base table is - not, and allow_unmatched_rows=False, throw - `Unmatched_Rows_In_Lookup` with the first row of the - table. Otherwise, attach a warning. - if base_table.row_count == 0 || allow_unmatched_rows then Warning.attach (Empty_Error.Error "lookup_table") base_table else - base_table.select_columns column . if_not_error <| - a_key_value = base_table.at column . at 0 - Error.throw (Unmatched_Rows_In_Lookup.Error [a_key_value]) - handle_empty_lookup_table <| - base_table.replace (base_table.make_table_from_map lookup_table 'from' 'to') column 'from' 'to' allow_unmatched_rows=allow_unmatched_rows on_problems=on_problems +replace : Table -> (Table | Map) -> (Text | Integer | Vector (Text | Integer)) -> (Text | Integer | Nothing) -> (Text | Integer | Nothing) -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup +replace base_table lookup_table columns:(Text | Integer | Vector (Text | Integer)) from_column:(Text | Integer | Nothing)=Nothing to_column:(Text | Integer | Nothing)=Nothing allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning = + case columns of + _ : Vector -> columns.fold base_table (base_table -> column-> replace base_table lookup_table column from_column to_column allow_unmatched_rows on_problems) _ -> - from_column_resolved = from_column.if_nothing 0 - to_column_resolved = to_column.if_nothing 1 - selected_lookup_columns = lookup_table.select_columns [from_column_resolved, to_column_resolved] - base_table.select_columns column . if_not_error <| selected_lookup_columns . if_not_error <| - unique = base_table.column_naming_helper.create_unique_name_strategy - unique.mark_used (base_table.column_names) + column = columns + case lookup_table of + _ : Map -> + if from_column.is_nothing.not || to_column.is_nothing.not then Error.throw (Illegal_Argument.Error "If a Map is provided as the lookup_table, then from_column and to_column should not also be specified.") else + handle_empty_lookup_table ~action = + if lookup_table.is_empty.not then action else + ## If the lookup table is empty but the base table is + not, and allow_unmatched_rows=False, throw + `Unmatched_Rows_In_Lookup` with the first row of the + table. Otherwise, attach a warning. + if base_table.row_count == 0 || allow_unmatched_rows then Warning.attach (Empty_Error.Error "lookup_table") base_table else + base_table.select_columns column . if_not_error <| + a_key_value = base_table.at column . at 0 + Error.throw (Unmatched_Rows_In_Lookup.Error [a_key_value]) + handle_empty_lookup_table <| + base_table.replace (base_table.make_table_from_map lookup_table 'from' 'to') column 'from' 'to' allow_unmatched_rows=allow_unmatched_rows on_problems=on_problems + _ -> + from_column_resolved = from_column.if_nothing 0 + to_column_resolved = to_column.if_nothing 1 + selected_lookup_columns = lookup_table.select_columns [from_column_resolved, to_column_resolved] + base_table.select_columns column . if_not_error <| selected_lookup_columns . if_not_error <| + unique = base_table.column_naming_helper.create_unique_name_strategy + unique.mark_used (base_table.column_names) - ## We perform a `merge` into `column`, using a duplicate of `column` - as the key column to join with `from_column`. + ## We perform a `merge` into `column`, using a duplicate of `column` + as the key column to join with `from_column`. - duplicate_key_column_name = unique.make_unique "duplicate_key" - duplicate_key_column = base_table.at column . rename duplicate_key_column_name - base_table_with_duplicate = base_table.set duplicate_key_column set_mode=Set_Mode.Add + duplicate_key_column_name = unique.make_unique "duplicate_key" + duplicate_key_column = base_table.at column . rename duplicate_key_column_name + base_table_with_duplicate = base_table.set duplicate_key_column set_mode=Set_Mode.Add - ## Create a lookup table with just `to_column` and `from_column`, - renamed to match the base table's `column` and its duplicate, - respectively. - lookup_table_renamed = selected_lookup_columns . rename_columns (Map.from_vector [[from_column_resolved, duplicate_key_column_name], [to_column_resolved, column]]) + ## Create a lookup table with just `to_column` and `from_column`, + renamed to match the base table's `column` and its duplicate, + respectively. + lookup_table_renamed = selected_lookup_columns . rename_columns (Map.from_vector [[from_column_resolved, duplicate_key_column_name], [to_column_resolved, column]]) - warn_if_empty result_table = if lookup_table_renamed.row_count != 0 then result_table else Warning.attach (Empty_Error.Error "lookup_table") result_table + warn_if_empty result_table = if lookup_table_renamed.row_count != 0 then result_table else Warning.attach (Empty_Error.Error "lookup_table") result_table - merged = base_table_with_duplicate.merge lookup_table_renamed duplicate_key_column_name add_new_columns=False allow_unmatched_rows=allow_unmatched_rows on_problems=on_problems - warn_if_empty <| - merged.remove_columns duplicate_key_column_name + merged = base_table_with_duplicate.merge lookup_table_renamed duplicate_key_column_name add_new_columns=False allow_unmatched_rows=allow_unmatched_rows on_problems=on_problems + warn_if_empty <| + merged.remove_columns duplicate_key_column_name diff --git a/test/Table_Tests/src/Common_Table_Operations/Join/Replace_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Join/Replace_Spec.enso index 105440f5b8f..ffec2726704 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Join/Replace_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Join/Replace_Spec.enso @@ -61,6 +61,13 @@ add_specs suite_builder setup = result = table.replace lookup_table 'x' . order_by ["x", "y"] result . should_equal expected + group_builder.specify "should be able to replace multiple columns" <| + table = table_builder [['x', [1, 2, 3, 4, 2]], ['x2', [2, 1, 2, 1, 4]], ['x3', [3, 4, 1, 3, 4]], ['y', ['a', 'b', 'c', 'd', 'e']]] + lookup_table = table_builder [['d', [4, 5, 6, 7]], ['x', [2, 1, 4, 3]], ['d2', [5, 6, 7, 8]], ['z', [20, 10, 40, 30]]] + expected = table_builder [['x', [10, 20, 20, 30, 40]], ['x2', [20, 10, 40, 20, 10]], ['x3', [30, 40, 40, 10, 30]], ['y', ['a', 'b', 'e', 'c', 'd']]] + result = table.replace lookup_table ['x', 'x2', 'x3'] 'x' 'z' . order_by ["x", "y"] + result . should_equal expected + group_builder.specify "should fail with Missing_Input_Columns if the specified columns do not exist" <| table = table_builder [['x', [1, 2, 3, 4]], ['y', ['a', 'b', 'c', 'd']]] lookup_table = table_builder [['x', [2, 1, 4, 3]], ['z', [20, 10, 40, 30]]]