mirror of
https://github.com/enso-org/enso.git
synced 2024-12-23 10:21:41 +03:00
Allow Table.replace
to take mutiple target columns (#9406)
This commit is contained in:
parent
a5b66aaf94
commit
53e2636b8c
@ -635,6 +635,7 @@
|
||||
step][9343]
|
||||
- [Implemented constructors, comparisons, and arithmetic for a `Decimal`
|
||||
type.][9272]
|
||||
- [Allow `Table.replace` to take mutiple target columns.][9406]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -923,6 +924,7 @@
|
||||
[9346]: https://github.com/enso-org/enso/pull/9346
|
||||
[9382]: https://github.com/enso-org/enso/pull/9382
|
||||
[9343]: https://github.com/enso-org/enso/pull/9343
|
||||
[9406]: https://github.com/enso-org/enso/pull/9406
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
@ -1554,16 +1554,16 @@ type DB_Table
|
||||
## ALIAS find replace
|
||||
GROUP Standard.Base.Text
|
||||
ICON dataframe_map_column
|
||||
Replaces values in `column` using `lookup_table` to specify a
|
||||
mapping from old to new values.
|
||||
Replaces values in the columns using `lookup_table` to specify a mapping
|
||||
from old to new values.
|
||||
|
||||
Arguments:
|
||||
- lookup_table: the table to use as a mapping from old to new values. A
|
||||
`Map` can also be used here (in which case passing `from_column` or
|
||||
`to_column` is disallowed and will throw an `Illegal_Argument` error.
|
||||
- column: the column within `self` to perform the replace on.
|
||||
- from_column: the column within `lookup_table` to match against `column`
|
||||
in `self`.
|
||||
- columns: the column or columns within `self` to perform the replace on.
|
||||
- from_column: the column within `lookup_table` to match against
|
||||
`columns` in `self`.
|
||||
- to_column: the column within `lookup_table` to get new values from.
|
||||
- allow_unmatched_rows: Specifies how to handle missing rows in the lookup.
|
||||
If `False` (the default), an `Unmatched_Rows_In_Lookup` error is raised.
|
||||
@ -1576,13 +1576,14 @@ type DB_Table
|
||||
|
||||
When operating in-memory, this operation preserves the order of rows
|
||||
from this table (unlike `join`).
|
||||
|
||||
In the Database backend, there are no guarantees related to ordering of
|
||||
results.
|
||||
|
||||
! Error Conditions
|
||||
|
||||
- If this table or the lookup table is lacking any of the columns
|
||||
specified by `from_column`, `to_column`, or `column`, a
|
||||
specified by `from_column`, `to_column`, or `columns`, a
|
||||
`Missing_Input_Columns` error is raised.
|
||||
- If a single row is matched by multiple entries in the lookup table,
|
||||
a `Non_Unique_Key` error is raised.
|
||||
@ -1597,7 +1598,7 @@ type DB_Table
|
||||
- The following problems may be reported according to the `on_problems`
|
||||
setting:
|
||||
- If any of the `columns` is a floating-point type,
|
||||
a `Floating_Point_Equality`.
|
||||
a `Floating_Point_Equality` problem is reported.
|
||||
|
||||
> Example
|
||||
Replace values in column 'x' using a lookup table.
|
||||
@ -1626,9 +1627,9 @@ type DB_Table
|
||||
# 2 | 30 | c | g
|
||||
# 3 | 40 | d | h
|
||||
@column Widget_Helpers.make_column_name_selector
|
||||
replace : DB_Table | Map -> (Text | Integer) -> (Text | Integer | Nothing) -> (Text | Integer | Nothing) -> Boolean -> Problem_Behavior -> DB_Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup
|
||||
replace self lookup_table:(DB_Table | Map) column:(Text | Integer) from_column:(Text | Integer | Nothing)=Nothing to_column:(Text | Integer | Nothing)=Nothing allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning =
|
||||
Replace_Helpers.replace self lookup_table column from_column to_column allow_unmatched_rows on_problems
|
||||
replace : (DB_Table | Map) -> (Text | Integer | Vector (Text | Integer)) -> (Text | Integer | Nothing) -> (Text | Integer | Nothing) -> Boolean -> Problem_Behavior -> DB_Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup
|
||||
replace self lookup_table:(DB_Table | Map) columns:(Text | Integer | Vector (Text | Integer)) from_column:(Text | Integer | Nothing)=Nothing to_column:(Text | Integer | Nothing)=Nothing allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning =
|
||||
Replace_Helpers.replace self lookup_table columns from_column to_column allow_unmatched_rows on_problems
|
||||
|
||||
## ALIAS join by row position
|
||||
GROUP Standard.Base.Calculations
|
||||
|
@ -2077,16 +2077,16 @@ type Table
|
||||
## ALIAS find replace
|
||||
GROUP Standard.Base.Text
|
||||
ICON dataframe_map_column
|
||||
Replaces values in `column` using `lookup_table` to specify a
|
||||
mapping from old to new values.
|
||||
Replaces values in the columns using `lookup_table` to specify a mapping
|
||||
from old to new values.
|
||||
|
||||
Arguments:
|
||||
- lookup_table: the table to use as a mapping from old to new values. A
|
||||
`Map` can also be used here (in which case passing `from_column` or
|
||||
`to_column` is disallowed and will throw an `Illegal_Argument` error.
|
||||
- column: the column within `self` to perform the replace on.
|
||||
- from_column: the column within `lookup_table` to match against `column`
|
||||
in `self`.
|
||||
- columns: the column or columns within `self` to perform the replace on.
|
||||
- from_column: the column within `lookup_table` to match against
|
||||
`columns` in `self`.
|
||||
- to_column: the column within `lookup_table` to get new values from.
|
||||
- allow_unmatched_rows: Specifies how to handle missing rows in the lookup.
|
||||
If `False` (the default), an `Unmatched_Rows_In_Lookup` error is raised.
|
||||
@ -2099,13 +2099,14 @@ type Table
|
||||
|
||||
When operating in-memory, this operation preserves the order of rows
|
||||
from this table (unlike `join`).
|
||||
|
||||
In the Database backend, there are no guarantees related to ordering of
|
||||
results.
|
||||
|
||||
! Error Conditions
|
||||
|
||||
- If this table or the lookup table is lacking any of the columns
|
||||
specified by `from_column`, `to_column`, or `column`, a
|
||||
specified by `from_column`, `to_column`, or `columns`, a
|
||||
`Missing_Input_Columns` error is raised.
|
||||
- If a single row is matched by multiple entries in the lookup table,
|
||||
a `Non_Unique_Key` error is raised.
|
||||
@ -2120,7 +2121,7 @@ type Table
|
||||
- The following problems may be reported according to the `on_problems`
|
||||
setting:
|
||||
- If any of the `columns` is a floating-point type,
|
||||
a `Floating_Point_Equality`.
|
||||
a `Floating_Point_Equality` problem is reported.
|
||||
|
||||
> Example
|
||||
Replace values in column 'x' using a lookup table.
|
||||
@ -2149,9 +2150,9 @@ type Table
|
||||
# 2 | 30 | c | g
|
||||
# 3 | 40 | d | h
|
||||
@column Widget_Helpers.make_column_name_selector
|
||||
replace : Table | Map -> (Text | Integer) -> (Text | Integer | Nothing) -> (Text | Integer | Nothing) -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup
|
||||
replace self lookup_table:(Table | Map) column:(Text | Integer) from_column:(Text | Integer | Nothing)=Nothing to_column:(Text | Integer | Nothing)=Nothing allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning =
|
||||
Replace_Helpers.replace self lookup_table column from_column to_column allow_unmatched_rows on_problems
|
||||
replace : (Table | Map) -> (Text | Integer | Vector (Text | Integer)) -> (Text | Integer | Nothing) -> (Text | Integer | Nothing) -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup
|
||||
replace self lookup_table:(Table | Map) columns:(Text | Integer | Vector (Text | Integer)) from_column:(Text | Integer | Nothing)=Nothing to_column:(Text | Integer | Nothing)=Nothing allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning =
|
||||
Replace_Helpers.replace self lookup_table columns from_column to_column allow_unmatched_rows on_problems
|
||||
|
||||
## ALIAS join by row position
|
||||
GROUP Standard.Base.Calculations
|
||||
|
@ -7,45 +7,49 @@ import project.Data.Table.Table
|
||||
from project.Errors import Missing_Input_Columns, No_Such_Column, Non_Unique_Key, Unmatched_Rows_In_Lookup
|
||||
|
||||
## PRIVATE
|
||||
replace : Table -> Table | Map -> (Text | Integer) -> (Text | Integer | Nothing) -> (Text | Integer | Nothing) -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup
|
||||
replace base_table lookup_table column:(Text | Integer) from_column:(Text | Integer | Nothing)=Nothing to_column:(Text | Integer | Nothing)=Nothing allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning =
|
||||
case lookup_table of
|
||||
_ : Map ->
|
||||
if from_column.is_nothing.not || to_column.is_nothing.not then Error.throw (Illegal_Argument.Error "If a Map is provided as the lookup_table, then from_column and to_column should not also be specified.") else
|
||||
handle_empty_lookup_table ~action =
|
||||
if lookup_table.is_empty.not then action else
|
||||
## If the lookup table is empty but the base table is
|
||||
not, and allow_unmatched_rows=False, throw
|
||||
`Unmatched_Rows_In_Lookup` with the first row of the
|
||||
table. Otherwise, attach a warning.
|
||||
if base_table.row_count == 0 || allow_unmatched_rows then Warning.attach (Empty_Error.Error "lookup_table") base_table else
|
||||
base_table.select_columns column . if_not_error <|
|
||||
a_key_value = base_table.at column . at 0
|
||||
Error.throw (Unmatched_Rows_In_Lookup.Error [a_key_value])
|
||||
handle_empty_lookup_table <|
|
||||
base_table.replace (base_table.make_table_from_map lookup_table 'from' 'to') column 'from' 'to' allow_unmatched_rows=allow_unmatched_rows on_problems=on_problems
|
||||
replace : Table -> (Table | Map) -> (Text | Integer | Vector (Text | Integer)) -> (Text | Integer | Nothing) -> (Text | Integer | Nothing) -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup
|
||||
replace base_table lookup_table columns:(Text | Integer | Vector (Text | Integer)) from_column:(Text | Integer | Nothing)=Nothing to_column:(Text | Integer | Nothing)=Nothing allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning =
|
||||
case columns of
|
||||
_ : Vector -> columns.fold base_table (base_table -> column-> replace base_table lookup_table column from_column to_column allow_unmatched_rows on_problems)
|
||||
_ ->
|
||||
from_column_resolved = from_column.if_nothing 0
|
||||
to_column_resolved = to_column.if_nothing 1
|
||||
selected_lookup_columns = lookup_table.select_columns [from_column_resolved, to_column_resolved]
|
||||
base_table.select_columns column . if_not_error <| selected_lookup_columns . if_not_error <|
|
||||
unique = base_table.column_naming_helper.create_unique_name_strategy
|
||||
unique.mark_used (base_table.column_names)
|
||||
column = columns
|
||||
case lookup_table of
|
||||
_ : Map ->
|
||||
if from_column.is_nothing.not || to_column.is_nothing.not then Error.throw (Illegal_Argument.Error "If a Map is provided as the lookup_table, then from_column and to_column should not also be specified.") else
|
||||
handle_empty_lookup_table ~action =
|
||||
if lookup_table.is_empty.not then action else
|
||||
## If the lookup table is empty but the base table is
|
||||
not, and allow_unmatched_rows=False, throw
|
||||
`Unmatched_Rows_In_Lookup` with the first row of the
|
||||
table. Otherwise, attach a warning.
|
||||
if base_table.row_count == 0 || allow_unmatched_rows then Warning.attach (Empty_Error.Error "lookup_table") base_table else
|
||||
base_table.select_columns column . if_not_error <|
|
||||
a_key_value = base_table.at column . at 0
|
||||
Error.throw (Unmatched_Rows_In_Lookup.Error [a_key_value])
|
||||
handle_empty_lookup_table <|
|
||||
base_table.replace (base_table.make_table_from_map lookup_table 'from' 'to') column 'from' 'to' allow_unmatched_rows=allow_unmatched_rows on_problems=on_problems
|
||||
_ ->
|
||||
from_column_resolved = from_column.if_nothing 0
|
||||
to_column_resolved = to_column.if_nothing 1
|
||||
selected_lookup_columns = lookup_table.select_columns [from_column_resolved, to_column_resolved]
|
||||
base_table.select_columns column . if_not_error <| selected_lookup_columns . if_not_error <|
|
||||
unique = base_table.column_naming_helper.create_unique_name_strategy
|
||||
unique.mark_used (base_table.column_names)
|
||||
|
||||
## We perform a `merge` into `column`, using a duplicate of `column`
|
||||
as the key column to join with `from_column`.
|
||||
## We perform a `merge` into `column`, using a duplicate of `column`
|
||||
as the key column to join with `from_column`.
|
||||
|
||||
duplicate_key_column_name = unique.make_unique "duplicate_key"
|
||||
duplicate_key_column = base_table.at column . rename duplicate_key_column_name
|
||||
base_table_with_duplicate = base_table.set duplicate_key_column set_mode=Set_Mode.Add
|
||||
duplicate_key_column_name = unique.make_unique "duplicate_key"
|
||||
duplicate_key_column = base_table.at column . rename duplicate_key_column_name
|
||||
base_table_with_duplicate = base_table.set duplicate_key_column set_mode=Set_Mode.Add
|
||||
|
||||
## Create a lookup table with just `to_column` and `from_column`,
|
||||
renamed to match the base table's `column` and its duplicate,
|
||||
respectively.
|
||||
lookup_table_renamed = selected_lookup_columns . rename_columns (Map.from_vector [[from_column_resolved, duplicate_key_column_name], [to_column_resolved, column]])
|
||||
## Create a lookup table with just `to_column` and `from_column`,
|
||||
renamed to match the base table's `column` and its duplicate,
|
||||
respectively.
|
||||
lookup_table_renamed = selected_lookup_columns . rename_columns (Map.from_vector [[from_column_resolved, duplicate_key_column_name], [to_column_resolved, column]])
|
||||
|
||||
warn_if_empty result_table = if lookup_table_renamed.row_count != 0 then result_table else Warning.attach (Empty_Error.Error "lookup_table") result_table
|
||||
warn_if_empty result_table = if lookup_table_renamed.row_count != 0 then result_table else Warning.attach (Empty_Error.Error "lookup_table") result_table
|
||||
|
||||
merged = base_table_with_duplicate.merge lookup_table_renamed duplicate_key_column_name add_new_columns=False allow_unmatched_rows=allow_unmatched_rows on_problems=on_problems
|
||||
warn_if_empty <|
|
||||
merged.remove_columns duplicate_key_column_name
|
||||
merged = base_table_with_duplicate.merge lookup_table_renamed duplicate_key_column_name add_new_columns=False allow_unmatched_rows=allow_unmatched_rows on_problems=on_problems
|
||||
warn_if_empty <|
|
||||
merged.remove_columns duplicate_key_column_name
|
||||
|
@ -61,6 +61,13 @@ add_specs suite_builder setup =
|
||||
result = table.replace lookup_table 'x' . order_by ["x", "y"]
|
||||
result . should_equal expected
|
||||
|
||||
group_builder.specify "should be able to replace multiple columns" <|
|
||||
table = table_builder [['x', [1, 2, 3, 4, 2]], ['x2', [2, 1, 2, 1, 4]], ['x3', [3, 4, 1, 3, 4]], ['y', ['a', 'b', 'c', 'd', 'e']]]
|
||||
lookup_table = table_builder [['d', [4, 5, 6, 7]], ['x', [2, 1, 4, 3]], ['d2', [5, 6, 7, 8]], ['z', [20, 10, 40, 30]]]
|
||||
expected = table_builder [['x', [10, 20, 20, 30, 40]], ['x2', [20, 10, 40, 20, 10]], ['x3', [30, 40, 40, 10, 30]], ['y', ['a', 'b', 'e', 'c', 'd']]]
|
||||
result = table.replace lookup_table ['x', 'x2', 'x3'] 'x' 'z' . order_by ["x", "y"]
|
||||
result . should_equal expected
|
||||
|
||||
group_builder.specify "should fail with Missing_Input_Columns if the specified columns do not exist" <|
|
||||
table = table_builder [['x', [1, 2, 3, 4]], ['y', ['a', 'b', 'c', 'd']]]
|
||||
lookup_table = table_builder [['x', [2, 1, 4, 3]], ['z', [20, 10, 40, 30]]]
|
||||
|
Loading…
Reference in New Issue
Block a user