mirror of
https://github.com/enso-org/enso.git
synced 2024-11-25 21:25:20 +03:00
Add Table.expand_to_rows to allow flattening vector and array values in table (#8042)
# Important Notes Also includes a fix for a reallocation bug in `InferredBuilder`.
This commit is contained in:
parent
b7d7910a88
commit
f18d1323e1
@ -585,6 +585,7 @@
|
|||||||
- [Implemented `Table.lookup_and_replace` for the in-memory backend.][7979]
|
- [Implemented `Table.lookup_and_replace` for the in-memory backend.][7979]
|
||||||
- [Added `Column_Operation` to `Table.set` allowing for more streamlined flow of
|
- [Added `Column_Operation` to `Table.set` allowing for more streamlined flow of
|
||||||
deriving column values in the GUI.][8005]
|
deriving column values in the GUI.][8005]
|
||||||
|
- [Implemented `Table.expand_to_rows` for the in-memory backend.][8029]
|
||||||
|
|
||||||
[debug-shortcuts]:
|
[debug-shortcuts]:
|
||||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||||
@ -833,6 +834,7 @@
|
|||||||
[7947]: https://github.com/enso-org/enso/pull/7947
|
[7947]: https://github.com/enso-org/enso/pull/7947
|
||||||
[7979]: https://github.com/enso-org/enso/pull/7979
|
[7979]: https://github.com/enso-org/enso/pull/7979
|
||||||
[8005]: https://github.com/enso-org/enso/pull/8005
|
[8005]: https://github.com/enso-org/enso/pull/8005
|
||||||
|
[8029]: https://github.com/enso-org/enso/pull/8029
|
||||||
|
|
||||||
#### Enso Compiler
|
#### Enso Compiler
|
||||||
|
|
||||||
|
@ -1999,6 +1999,43 @@ type Table
|
|||||||
_ = [column, fields, prefix]
|
_ = [column, fields, prefix]
|
||||||
Error.throw (Unsupported_Database_Operation.Error "Table.expand_column is currently not implemented for the Database backend. You may download the table to memory using `.read` to use this feature.")
|
Error.throw (Unsupported_Database_Operation.Error "Table.expand_column is currently not implemented for the Database backend. You may download the table to memory using `.read` to use this feature.")
|
||||||
|
|
||||||
|
## GROUP Standard.Base.Conversions
|
||||||
|
Expand aggregate values in a column to separate rows.
|
||||||
|
|
||||||
|
For each value in the specified column, if it is an aggregate (`Vector`,
|
||||||
|
`Range`, etc.), expand it to multiple rows, duplicating the values in the
|
||||||
|
other columns.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- column: The column to expand.
|
||||||
|
- at_least_one_row: for an empty aggregate value, if `at_least_one_row` is
|
||||||
|
true, a single row is output with `Nothing` for the aggregates column; if
|
||||||
|
false, no row is output at all.
|
||||||
|
|
||||||
|
The following aggregate values are supported:
|
||||||
|
- `Array`
|
||||||
|
- `Vector`
|
||||||
|
- `List`
|
||||||
|
- `Range`
|
||||||
|
- `Date_Range`
|
||||||
|
- `Pair
|
||||||
|
|
||||||
|
Any other values are treated as non-aggregate values, and their rows are kept
|
||||||
|
unchanged.
|
||||||
|
|
||||||
|
In in-memory tables, it is permitted to mix values of different types.
|
||||||
|
|
||||||
|
> Example
|
||||||
|
Expand a column of integer `Vectors` to a column of `Integer`
|
||||||
|
|
||||||
|
table = Table.new [["aaa", [1, 2]], ["bbb", [[30, 31], [40, 41]]]]
|
||||||
|
# => Table.new [["aaa", [1, 1, 2, 2]], ["bbb", [30, 31, 40, 41]]]
|
||||||
|
@column Widget_Helpers.make_column_name_selector
|
||||||
|
expand_to_rows : Text | Integer -> Boolean -> Table ! Type_Error | No_Such_Column | Index_Out_Of_Bounds
|
||||||
|
expand_to_rows self column at_least_one_row=False =
|
||||||
|
_ = [column, at_least_one_row]
|
||||||
|
Error.throw (Unsupported_Database_Operation.Error "Table.expand_to_rows is currently not implemented for the Database backend. You may download the table to memory using `.read` to use this feature.")
|
||||||
|
|
||||||
## GROUP Standard.Base.Conversions
|
## GROUP Standard.Base.Conversions
|
||||||
Cast the selected columns to a specific type.
|
Cast the selected columns to a specific type.
|
||||||
|
|
||||||
|
@ -16,12 +16,20 @@ type Convertible_To_Rows
|
|||||||
- getter: Get the value for a specified row.
|
- getter: Get the value for a specified row.
|
||||||
Value length:Integer (getter : Integer->Any)
|
Value length:Integer (getter : Integer->Any)
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Return the iterator values as a `Vector`.
|
||||||
|
to_vector : Vector Any
|
||||||
|
to_vector self = 0.up_to self.length . map self.getter
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
Convertible_To_Rows.from that:Vector = Convertible_To_Rows.Value that.length that.get
|
Convertible_To_Rows.from that:Vector = Convertible_To_Rows.Value that.length that.get
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
Convertible_To_Rows.from that:Array = Convertible_To_Rows.Value that.length that.get
|
Convertible_To_Rows.from that:Array = Convertible_To_Rows.Value that.length that.get
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Convertible_To_Rows.from that:List = Convertible_To_Rows.from that.to_vector
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
Convertible_To_Rows.from that:Range = Convertible_To_Rows.Value that.length that.get
|
Convertible_To_Rows.from that:Range = Convertible_To_Rows.Value that.length that.get
|
||||||
|
|
||||||
|
@ -1189,6 +1189,42 @@ type Table
|
|||||||
expand_column self (column : Text | Integer) (fields : Vector | Nothing = Nothing) (prefix : Text | Nothing = Nothing) =
|
expand_column self (column : Text | Integer) (fields : Vector | Nothing = Nothing) (prefix : Text | Nothing = Nothing) =
|
||||||
Expand_Objects_Helpers.expand_column self column fields prefix
|
Expand_Objects_Helpers.expand_column self column fields prefix
|
||||||
|
|
||||||
|
## GROUP Standard.Base.Conversions
|
||||||
|
Expand aggregate values in a column to separate rows.
|
||||||
|
|
||||||
|
For each value in the specified column, if it is an aggregate (`Vector`,
|
||||||
|
`Range`, etc.), expand it to multiple rows, duplicating the values in the
|
||||||
|
other columns.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- column: The column to expand.
|
||||||
|
- at_least_one_row: for an empty aggregate value, if `at_least_one_row` is
|
||||||
|
true, a single row is output with `Nothing` for the aggregates column; if
|
||||||
|
false, no row is output at all.
|
||||||
|
|
||||||
|
The following aggregate values are supported:
|
||||||
|
- `Array`
|
||||||
|
- `Vector`
|
||||||
|
- `List`
|
||||||
|
- `Range`
|
||||||
|
- `Date_Range`
|
||||||
|
- `Pair
|
||||||
|
|
||||||
|
Any other values are treated as non-aggregate values, and their rows are kept
|
||||||
|
unchanged.
|
||||||
|
|
||||||
|
In in-memory tables, it is permitted to mix values of different types.
|
||||||
|
|
||||||
|
> Example
|
||||||
|
Expand a column of integer `Vectors` to a column of `Integer`
|
||||||
|
|
||||||
|
table = Table.new [["aaa", [1, 2]], ["bbb", [[30, 31], [40, 41]]]]
|
||||||
|
# => Table.new [["aaa", [1, 1, 2, 2]], ["bbb", [30, 31, 40, 41]]]
|
||||||
|
@column Widget_Helpers.make_column_name_selector
|
||||||
|
expand_to_rows : Text | Integer -> Boolean -> Table ! Type_Error | No_Such_Column | Index_Out_Of_Bounds
|
||||||
|
expand_to_rows self column at_least_one_row=False =
|
||||||
|
Expand_Objects_Helpers.expand_to_rows self column at_least_one_row
|
||||||
|
|
||||||
## ALIAS filter rows
|
## ALIAS filter rows
|
||||||
GROUP Standard.Base.Selections
|
GROUP Standard.Base.Selections
|
||||||
|
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
import Standard.Base.Data.Text.Regex.Regex_Syntax_Error
|
import Standard.Base.Data.Text.Regex.Regex_Syntax_Error
|
||||||
import Standard.Base.Errors.Common.Index_Out_Of_Bounds
|
|
||||||
import Standard.Base.Errors.Common.Type_Error
|
import Standard.Base.Errors.Common.Type_Error
|
||||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||||
import Standard.Base.Errors.Unimplemented.Unimplemented
|
import Standard.Base.Errors.Unimplemented.Unimplemented
|
||||||
@ -9,7 +8,6 @@ from Standard.Base.Metadata import make_single_choice
|
|||||||
import project.Data.Match_Columns.Match_Columns
|
import project.Data.Match_Columns.Match_Columns
|
||||||
import project.Data.Table.Table
|
import project.Data.Table.Table
|
||||||
import project.Errors.Invalid_JSON_Format
|
import project.Errors.Invalid_JSON_Format
|
||||||
import project.Errors.No_Such_Column
|
|
||||||
import project.Internal.Expand_Objects_Helpers
|
import project.Internal.Expand_Objects_Helpers
|
||||||
import project.Internal.Parse_To_Table
|
import project.Internal.Parse_To_Table
|
||||||
import project.Internal.Widget_Helpers
|
import project.Internal.Widget_Helpers
|
||||||
|
@ -1,11 +1,17 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
|
|
||||||
|
import Standard.Base.Errors.Common.Index_Out_Of_Bounds
|
||||||
|
import Standard.Base.Errors.Common.Type_Error
|
||||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||||
|
|
||||||
import project.Data.Table.Table
|
import project.Data.Table.Table
|
||||||
import project.Data.Column.Column
|
import project.Data.Column.Column
|
||||||
import project.Data.Conversions.Convertible_To_Columns.Convertible_To_Columns
|
import project.Data.Conversions.Convertible_To_Columns.Convertible_To_Columns
|
||||||
import project.Data.Conversions.Convertible_To_Rows.Convertible_To_Rows
|
import project.Data.Conversions.Convertible_To_Rows.Convertible_To_Rows
|
||||||
|
import project.Errors.No_Such_Column
|
||||||
|
import project.Internal.Fan_Out
|
||||||
import project.Internal.Java_Exports
|
import project.Internal.Java_Exports
|
||||||
|
from project.Internal.Java_Exports import make_inferred_builder
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
expand_column : Table -> (Text | Integer) -> ((Vector Text) | Nothing) -> (Text | Nothing) -> Table
|
expand_column : Table -> (Text | Integer) -> ((Vector Text) | Nothing) -> (Text | Nothing) -> Table
|
||||||
@ -29,6 +35,45 @@ expand_column table column fields prefix =
|
|||||||
|
|
||||||
Table.new output_builder.to_vector
|
Table.new output_builder.to_vector
|
||||||
|
|
||||||
|
## GROUP Standard.Base.Conversions
|
||||||
|
Expand aggregate values in a column to separate rows.
|
||||||
|
|
||||||
|
For each value in the specified column, if it is an aggregate (`Vector`,
|
||||||
|
`Range`, etc.), expand it to multiple rows, duplicating the values in the
|
||||||
|
other columns.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- column: The column to expand.
|
||||||
|
- at_least_one_row: for an empty aggregate value, if `at_least_one_row` is
|
||||||
|
true, a single row is output with `Nothing` for the aggregates column; if
|
||||||
|
false, no row is output at all.
|
||||||
|
|
||||||
|
The following aggregate values are supported:
|
||||||
|
- `Array`
|
||||||
|
- `Vector`
|
||||||
|
- `List`
|
||||||
|
- `Range`
|
||||||
|
- `Date_Range`
|
||||||
|
- `Pair
|
||||||
|
|
||||||
|
Any other values are treated as non-aggregate values, and their rows are kept
|
||||||
|
unchanged.
|
||||||
|
|
||||||
|
In in-memory tables, it is permitted to mix values of different types.
|
||||||
|
|
||||||
|
> Example
|
||||||
|
Expand a column of integer `Vectors` to a column of `Integer`
|
||||||
|
|
||||||
|
table = Table.new [["aaa", [1, 2]], ["bbb", [[30, 31], [40, 41]]]]
|
||||||
|
# => Table.new [["aaa", [1, 1, 2, 2]], ["bbb", [30, 31, 40, 41]]]
|
||||||
|
@column Widget_Helpers.make_column_name_selector
|
||||||
|
expand_to_rows : Table -> Text | Integer -> Boolean -> Table ! Type_Error | No_Such_Column | Index_Out_Of_Bounds
|
||||||
|
expand_to_rows table column at_least_one_row=False =
|
||||||
|
row_expander : Any -> Vector
|
||||||
|
row_expander value:Convertible_To_Rows = value.to_vector
|
||||||
|
|
||||||
|
Fan_Out.fan_out_to_rows table column row_expander at_least_one_row column_builder=make_inferred_builder
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
create_table_from_objects : Any -> ((Vector Text) | Nothing) -> Table
|
create_table_from_objects : Any -> ((Vector Text) | Nothing) -> Table
|
||||||
create_table_from_objects (value : Convertible_To_Rows) (fields : Vector | Nothing) = if fields.is_nothing.not && fields.is_empty then Error.throw (Illegal_Argument.Error "The fields parameter cannot be empty.") else
|
create_table_from_objects (value : Convertible_To_Rows) (fields : Vector | Nothing) = if fields.is_nothing.not && fields.is_empty then Error.throw (Illegal_Argument.Error "The fields parameter cannot be empty.") else
|
||||||
|
@ -0,0 +1,281 @@
|
|||||||
|
from Standard.Base import all
|
||||||
|
|
||||||
|
import project.Data.Column.Column
|
||||||
|
import project.Data.Table.Table
|
||||||
|
import project.Data.Type.Value_Type.Value_Type
|
||||||
|
import project.Internal.Problem_Builder.Problem_Builder
|
||||||
|
|
||||||
|
from project.Errors import Column_Count_Exceeded, Column_Count_Mismatch
|
||||||
|
from project.Internal.Java_Exports import make_string_builder
|
||||||
|
|
||||||
|
polyglot java import org.enso.table.data.mask.OrderMask
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Transform a table by transforming a column into a set of columns. Takes a
|
||||||
|
function that maps a single element of the input column to a vector of output
|
||||||
|
values. The original column is replaced by the new columns.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- table: The table to transform.
|
||||||
|
- input_column: The column to transform.
|
||||||
|
- function: A function that transforms a single element of `input_column`
|
||||||
|
to multiple values.
|
||||||
|
fan_out_to_columns : Table -> Text | Integer -> (Any -> Vector Any) -> Integer | Nothing -> (Integer -> Any) -> Problem_Behavior -> Table | Nothing
|
||||||
|
fan_out_to_columns table input_column_id function column_count=Nothing column_builder=make_string_builder on_problems=Report_Error =
|
||||||
|
input_column = table.get input_column_id
|
||||||
|
problem_builder = Problem_Builder.new
|
||||||
|
new_columns_unrenamed = map_columns_to_multiple input_column function column_count column_builder=column_builder problem_builder=problem_builder
|
||||||
|
new_columns = rename_new_columns table input_column.name new_columns_unrenamed problem_builder
|
||||||
|
new_table = replace_column_with_columns table input_column new_columns
|
||||||
|
problem_builder.attach_problems_after on_problems new_table
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Transform a column by applying the given function to the values in the
|
||||||
|
column. The function produces multiple outputs, so each row is duplicated,
|
||||||
|
with each row getting a distinct output value in place of the original
|
||||||
|
input value. The other column values are just duplicated.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- table: The table to transform.
|
||||||
|
- input_column: The column to transform.
|
||||||
|
- function: A function that transforms a single element of `input_column`
|
||||||
|
to multiple values.
|
||||||
|
- at_least_one_row: When true, if the function returns an empty list, a
|
||||||
|
single row is output with `Nothing` for the transformed column. If false,
|
||||||
|
the row is not output at all.
|
||||||
|
fan_out_to_rows : Table -> Text | Integer -> (Any -> Vector Any) -> Boolean -> (Integer -> Any) -> Problem_Behavior -> Table
|
||||||
|
fan_out_to_rows table input_column_id function at_least_one_row=False column_builder=make_string_builder on_problems=Report_Error =
|
||||||
|
## Treat this as a special case of fan_out_to_rows_and_columns, with one
|
||||||
|
column. Wrap the provided function to convert each value to a singleton
|
||||||
|
`Vector`.
|
||||||
|
wrapped_function x = function x . map y-> [y]
|
||||||
|
column_names = [input_column_id]
|
||||||
|
fan_out_to_rows_and_columns table input_column_id wrapped_function column_names at_least_one_row=at_least_one_row column_builder=column_builder on_problems=on_problems
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Transform a column by applying the given function to the values in the
|
||||||
|
column. The function returns a `Vector` of `Vectors`. Each inner vector turns
|
||||||
|
into multiple new columns in a single row. Each inner vector within the outer
|
||||||
|
vector produces an output row, so each row is duplicated, with each row
|
||||||
|
getting a distinct set of output values in place of the original input value.
|
||||||
|
The other column values are just duplicated.
|
||||||
|
|
||||||
|
! Error Conditions
|
||||||
|
|
||||||
|
The inner vectors should all have the same number of values, which should
|
||||||
|
match the provided `column_names`. If a value is too short, it will be
|
||||||
|
padded with Nothing, and if it is too long, it will be truncated. In either
|
||||||
|
case, Column_Count_Mismatch will be added as a warning. (It is expected
|
||||||
|
that the caller of this private method will ensure that the provided
|
||||||
|
function will produce inner vectors of the correct length, but we check for
|
||||||
|
it anyway.)
|
||||||
|
|
||||||
|
> Example
|
||||||
|
f("12 34 56") -> [[1, 2], [3, 4], [5, 6]]
|
||||||
|
|
||||||
|
foo | bar | baz
|
||||||
|
----+-----------+----
|
||||||
|
x | 12 34 56 | y ===>
|
||||||
|
... | ... | ...
|
||||||
|
|
||||||
|
foo | bar 1 | bar 2 | baz
|
||||||
|
----+-------+-------+----
|
||||||
|
x | 1 | 2 | y
|
||||||
|
x | 3 | 4 | y
|
||||||
|
x | 5 | 6 | y
|
||||||
|
... | ... | ... | ...
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- table: The table to transform.
|
||||||
|
- input_column: The column to transform.
|
||||||
|
- function: A function that transforms a single element of `input_column`
|
||||||
|
to a `Vector` of `Vector` of values.
|
||||||
|
- column_names: The names for the generated columns.
|
||||||
|
- on_problems: Specifies the behavior when a problem occurs.
|
||||||
|
fan_out_to_rows_and_columns : Table -> Text | Integer -> (Any -> Vector (Vector Any)) -> Vector Text -> Boolean -> (Integer -> Any) -> Problem_Behavior -> Table
|
||||||
|
fan_out_to_rows_and_columns table input_column_id function column_names at_least_one_row=False column_builder=make_string_builder on_problems=Report_Error =
|
||||||
|
problem_builder = Problem_Builder.new
|
||||||
|
unique = table.column_naming_helper.create_unique_name_strategy
|
||||||
|
|
||||||
|
input_column = table.at input_column_id
|
||||||
|
input_storage = input_column.java_column.getStorage
|
||||||
|
num_input_rows = input_storage.size
|
||||||
|
|
||||||
|
num_output_columns = column_names.length
|
||||||
|
|
||||||
|
# Guess that most of the time, we'll get at least one value for each input.
|
||||||
|
initial_size = input_column.length
|
||||||
|
# Accumulates the outputs of the function.
|
||||||
|
output_column_builders = Vector.new num_output_columns _-> column_builder initial_size
|
||||||
|
# Accumulates repeated position indices for the order mask.
|
||||||
|
order_mask_positions = Vector.new_builder initial_size
|
||||||
|
|
||||||
|
maybe_add_empty_row vecs =
|
||||||
|
should_add_empty_row = vecs.is_empty && at_least_one_row
|
||||||
|
if should_add_empty_row.not then vecs else
|
||||||
|
empty_row = Vector.fill num_output_columns Nothing
|
||||||
|
[empty_row]
|
||||||
|
|
||||||
|
0.up_to num_input_rows . each i->
|
||||||
|
input_value = input_storage.getItemBoxed i
|
||||||
|
output_values = function input_value |> maybe_add_empty_row
|
||||||
|
# Append each group of values to the builder.
|
||||||
|
output_values.each row_unchecked->
|
||||||
|
row = uniform_length num_output_columns row_unchecked problem_builder
|
||||||
|
row.each_with_index i-> v-> output_column_builders.at i . append v
|
||||||
|
# Append n copies of the input row position, n = # of output values.
|
||||||
|
repeat_each output_values.length <| order_mask_positions.append i
|
||||||
|
|
||||||
|
# Reserve the non-input column names that will not be changing.
|
||||||
|
non_input_columns = table.columns.filter c-> c.name != input_column.name
|
||||||
|
unique.mark_used <| non_input_columns.map .name
|
||||||
|
|
||||||
|
# Build the output column
|
||||||
|
output_storages = output_column_builders.map .seal
|
||||||
|
output_columns = output_storages.map_with_index i-> output_storage->
|
||||||
|
column_name = unique.make_unique <| column_names.at i
|
||||||
|
Column.from_storage column_name output_storage
|
||||||
|
|
||||||
|
# Build the order mask.
|
||||||
|
order_mask = OrderMask.new (order_mask_positions.to_vector)
|
||||||
|
|
||||||
|
## Build the new table, replacing the input column with the new output
|
||||||
|
columns.
|
||||||
|
new_columns_unflattened = table.columns.map column->
|
||||||
|
case column.name == input_column_id of
|
||||||
|
True ->
|
||||||
|
# Replace the input column with the output columns.
|
||||||
|
output_columns
|
||||||
|
False ->
|
||||||
|
# Build a new column from the old one with the mask
|
||||||
|
old_storage = column.java_column.getStorage
|
||||||
|
new_storage = old_storage.applyMask order_mask
|
||||||
|
[Column.from_storage column.name new_storage]
|
||||||
|
new_columns = new_columns_unflattened.flatten
|
||||||
|
|
||||||
|
new_table = Table.new new_columns
|
||||||
|
problem_builder.attach_problems_after on_problems new_table
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
|
||||||
|
Map a multi-valued function over a column and return the results as set of
|
||||||
|
output columns.
|
||||||
|
|
||||||
|
Returns a Pair of a Vector of Columns and a Vector of problems.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- input_column: The column to transform.
|
||||||
|
- function: A function that transforms a single element of `input_column`
|
||||||
|
to multiple values.
|
||||||
|
- column_count: The number of columns to split to.
|
||||||
|
If `Nothing` then columns will be added to fit all data.
|
||||||
|
If the data exceeds the `column_count`, a `Column_Count_Exceeded` error
|
||||||
|
will follow the `on_problems` behavior.
|
||||||
|
- on_problems: Specifies the behavior when a problem occurs.
|
||||||
|
map_columns_to_multiple : Column -> (Any -> Vector Any) -> Integer | Nothing -> (Integer -> Any) -> Problem_Builder -> Vector Column
|
||||||
|
map_columns_to_multiple input_column function column_count column_builder=make_string_builder problem_builder =
|
||||||
|
num_rows = input_column.length
|
||||||
|
input_storage = input_column.java_column.getStorage
|
||||||
|
|
||||||
|
builders = case column_count of
|
||||||
|
Nothing ->
|
||||||
|
builders = Vector.new_builder
|
||||||
|
|
||||||
|
0.up_to num_rows . each i->
|
||||||
|
input_value = input_storage.getItemBoxed i
|
||||||
|
output_values = function input_value
|
||||||
|
|
||||||
|
# Add more builders if necessary to accommodate `output_values`.
|
||||||
|
if output_values.length > builders.length then
|
||||||
|
num_builders_needed = output_values.length - builders.length
|
||||||
|
repeat_each num_builders_needed <|
|
||||||
|
builder = column_builder num_rows
|
||||||
|
|
||||||
|
# Pad the new builder with nulls
|
||||||
|
num_nulls_needed = i
|
||||||
|
builder.appendNulls num_nulls_needed
|
||||||
|
|
||||||
|
builders.append builder
|
||||||
|
|
||||||
|
## Add `output_values` to builders; if there are more builders
|
||||||
|
than `output_values`, pad with null.
|
||||||
|
0.up_to builders.length . each i->
|
||||||
|
builders.at i . appendNoGrow (output_values.get i Nothing)
|
||||||
|
|
||||||
|
builders.to_vector
|
||||||
|
|
||||||
|
_ : Integer ->
|
||||||
|
builders = Vector.new column_count (_-> column_builder num_rows)
|
||||||
|
|
||||||
|
output_lengths = 0.up_to num_rows . map i->
|
||||||
|
input_value = input_storage.getItemBoxed i
|
||||||
|
output_values = function input_value
|
||||||
|
|
||||||
|
## Add `output_values` to builders; if there are more builders
|
||||||
|
than `output_values`, pad with null.
|
||||||
|
0.up_to builders.length . each i->
|
||||||
|
builders.at i . appendNoGrow (output_values.get i Nothing)
|
||||||
|
|
||||||
|
output_values.length
|
||||||
|
|
||||||
|
max_output_length = maximum output_lengths
|
||||||
|
|
||||||
|
if max_output_length > column_count then
|
||||||
|
problem = Column_Count_Exceeded.Error column_count max_output_length
|
||||||
|
problem_builder.report_other_warning problem
|
||||||
|
|
||||||
|
builders
|
||||||
|
|
||||||
|
# Name columns. If there's only one, use the original column name.
|
||||||
|
new_column_names = case builders.length of
|
||||||
|
1 -> [input_column.name]
|
||||||
|
_ -> 0.up_to builders.length . map i-> input_column.name + " " + (i+1).to_text
|
||||||
|
|
||||||
|
# Build Columns.
|
||||||
|
storages = builders.map .seal
|
||||||
|
new_column_names.zip storages Column.from_storage
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Rename a vector of columns to be unique when added to a table.
|
||||||
|
rename_new_columns : Table -> Text -> Vector Column -> Problem_Builder -> Vector Column
|
||||||
|
rename_new_columns table removed_column_name columns problem_builder =
|
||||||
|
unique = table.column_naming_helper.create_unique_name_strategy
|
||||||
|
remaining_columns = table.columns . filter (c-> c.name != removed_column_name) . map .name
|
||||||
|
unique.mark_used remaining_columns
|
||||||
|
new_columns = columns.map column->
|
||||||
|
new_name = unique.make_unique column.name
|
||||||
|
column.rename new_name
|
||||||
|
problem_builder.report_unique_name_strategy unique
|
||||||
|
new_columns
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Replace a single column in a table with new columns.
|
||||||
|
Does not ensure names are unique; that must be done before calling this.
|
||||||
|
replace_column_with_columns : Table -> Column -> Vector Column -> Table
|
||||||
|
replace_column_with_columns table old_column new_columns =
|
||||||
|
Table.new ((table.columns.map (c-> if c.name == old_column.name then new_columns else [c])).flatten)
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Return the maximum value of the vector.
|
||||||
|
Throws Empty_Error if the vector is empty.
|
||||||
|
maximum : Vector Any -> Vector Any
|
||||||
|
maximum vec = if vec.is_empty then Nothing else
|
||||||
|
vec.reduce (a-> b-> a.max b)
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Repeat a computation n times.
|
||||||
|
repeat_each : Integer -> Any -> Any
|
||||||
|
repeat_each n ~action = 0.up_to n . each _-> action
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Pad or truncate a vector to be a specified length; if altered, report
|
||||||
|
it as a Column_Count_Mismatch warning.
|
||||||
|
uniform_length : Integer -> Vector Any -> Problem_Builder -> Vector Any
|
||||||
|
uniform_length target_length v problem_builder = if v.length == target_length then v else
|
||||||
|
problem = Column_Count_Mismatch.Error target_length v.length
|
||||||
|
problem_builder.report_other_warning problem
|
||||||
|
case v.length < target_length of
|
||||||
|
# Pad.
|
||||||
|
True -> v.pad target_length Nothing
|
||||||
|
# Truncate.
|
||||||
|
False -> v.take target_length
|
@ -1,12 +1,8 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
|
|
||||||
import project.Data.Column.Column
|
|
||||||
import project.Data.Table.Table
|
import project.Data.Table.Table
|
||||||
import project.Data.Type.Value_Type.Value_Type
|
import project.Data.Type.Value_Type.Value_Type
|
||||||
import project.Internal.Problem_Builder.Problem_Builder
|
from project.Internal.Fan_Out import all
|
||||||
|
|
||||||
from project.Errors import Column_Count_Exceeded, Column_Count_Mismatch, Duplicate_Output_Column_Names, Invalid_Value_Type, Missing_Input_Columns
|
|
||||||
from project.Internal.Java_Exports import make_string_builder
|
|
||||||
|
|
||||||
polyglot java import org.enso.table.data.mask.OrderMask
|
polyglot java import org.enso.table.data.mask.OrderMask
|
||||||
|
|
||||||
@ -17,7 +13,7 @@ split_to_columns : Table -> Text | Integer -> Text -> Integer | Nothing -> Probl
|
|||||||
split_to_columns table input_column_id delimiter="," column_count=Nothing on_problems=Report_Error =
|
split_to_columns table input_column_id delimiter="," column_count=Nothing on_problems=Report_Error =
|
||||||
column = table.at input_column_id
|
column = table.at input_column_id
|
||||||
Value_Type.expect_text column <|
|
Value_Type.expect_text column <|
|
||||||
fan_out_to_columns table input_column_id (handle_nothing (_.split delimiter)) column_count on_problems
|
fan_out_to_columns table input_column_id (handle_nothing (_.split delimiter)) column_count on_problems=on_problems
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
Splits a column of text into a set of new rows.
|
Splits a column of text into a set of new rows.
|
||||||
@ -36,7 +32,7 @@ tokenize_to_columns : Table -> Text | Integer -> Text -> Case_Sensitivity -> Int
|
|||||||
tokenize_to_columns table input_column_id pattern case_sensitivity column_count on_problems =
|
tokenize_to_columns table input_column_id pattern case_sensitivity column_count on_problems =
|
||||||
column = table.at input_column_id
|
column = table.at input_column_id
|
||||||
Value_Type.expect_text column
|
Value_Type.expect_text column
|
||||||
fan_out_to_columns table input_column_id (handle_nothing (_.tokenize pattern case_sensitivity)) column_count on_problems
|
fan_out_to_columns table input_column_id (handle_nothing (_.tokenize pattern case_sensitivity)) column_count on_problems=on_problems
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
Tokenizes a column of text into a set of new rows using a regular
|
Tokenizes a column of text into a set of new rows using a regular
|
||||||
@ -102,285 +98,13 @@ regex_to_column_names pattern original_column_name =
|
|||||||
case group_nums_to_names.get (i+1) of
|
case group_nums_to_names.get (i+1) of
|
||||||
Nothing ->
|
Nothing ->
|
||||||
suffix = group_number_to_column_name_suffix.at (i+1)
|
suffix = group_number_to_column_name_suffix.at (i+1)
|
||||||
default_column_namer original_column_name suffix
|
original_column_name + " " + (suffix+1).to_text
|
||||||
name : Text ->
|
name : Text ->
|
||||||
name
|
name
|
||||||
|
|
||||||
## PRIVATE
|
|
||||||
Transform a table by transforming a column into a set of columns. Takes a
|
|
||||||
function that maps a single element of the input column to a vector of output
|
|
||||||
values. The original column is replaced by the new columns.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
- table: The table to transform.
|
|
||||||
- input_column: The column to transform.
|
|
||||||
- function: A function that transforms a single element of `input_column`
|
|
||||||
to multiple values.
|
|
||||||
fan_out_to_columns : Table -> Text | Integer -> (Any -> Vector Any) -> Integer | Nothing -> Problem_Behavior -> Table | Nothing
|
|
||||||
fan_out_to_columns table input_column_id function column_count=Nothing on_problems=Report_Error =
|
|
||||||
input_column = table.get input_column_id
|
|
||||||
problem_builder = Problem_Builder.new
|
|
||||||
new_columns_unrenamed = map_columns_to_multiple input_column function column_count problem_builder
|
|
||||||
new_columns = rename_new_columns table input_column.name new_columns_unrenamed problem_builder
|
|
||||||
new_table = replace_column_with_columns table input_column new_columns
|
|
||||||
problem_builder.attach_problems_after on_problems new_table
|
|
||||||
|
|
||||||
## PRIVATE
|
|
||||||
Transform a column by applying the given function to the values in the
|
|
||||||
column. The function produces multiple outputs, so each row is duplicated,
|
|
||||||
with each row getting a distinct output value in place of the original
|
|
||||||
input value. The other column values are just duplicated.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
- table: The table to transform.
|
|
||||||
- input_column: The column to transform.
|
|
||||||
- function: A function that transforms a single element of `input_column`
|
|
||||||
to multiple values.
|
|
||||||
fan_out_to_rows : Table -> Text | Integer -> (Any -> Vector Any) -> Boolean -> Problem_Behavior -> Table
|
|
||||||
fan_out_to_rows table input_column_id function at_least_one_row=False on_problems=Report_Error =
|
|
||||||
## Treat this as a special case of fan_out_to_rows_and_columns, with one
|
|
||||||
column. Wrap the provided function to convert each value to a singleton
|
|
||||||
`Vector`.
|
|
||||||
wrapped_function x = function x . map y-> [y]
|
|
||||||
column_names = [input_column_id]
|
|
||||||
fan_out_to_rows_and_columns table input_column_id wrapped_function column_names at_least_one_row=at_least_one_row on_problems=on_problems
|
|
||||||
|
|
||||||
## PRIVATE
|
|
||||||
Transform a column by applying the given function to the values in the
|
|
||||||
column. The function returns a `Vector` of `Vectors`. Each inner vector turns
|
|
||||||
into multiple new columns in a single row. Each inner vector within the outer
|
|
||||||
vector produces an output row, so each row is duplicated, with each row
|
|
||||||
getting a distinct set of output values in place of the original input value.
|
|
||||||
The other column values are just duplicated.
|
|
||||||
|
|
||||||
! Error Conditions
|
|
||||||
|
|
||||||
The inner vectors should all have the same number of values, which should
|
|
||||||
match the provided `column_names`. If a value is too short, it will be
|
|
||||||
padded with Nothing, and if it is too long, it will be truncated. In either
|
|
||||||
case, Column_Count_Mismatch will be added as a warning. (It is expected
|
|
||||||
that the caller of this private method will ensure that the provided
|
|
||||||
function will produce inner vectors of the correct length, but we check for
|
|
||||||
it anyway.)
|
|
||||||
|
|
||||||
> Example
|
|
||||||
f("12 34 56") -> [[1, 2], [3, 4], [5, 6]]
|
|
||||||
|
|
||||||
foo | bar | baz
|
|
||||||
----+-----------+----
|
|
||||||
x | 12 34 56 | y ===>
|
|
||||||
... | ... | ...
|
|
||||||
|
|
||||||
foo | bar 1 | bar 2 | baz
|
|
||||||
----+-------+-------+----
|
|
||||||
x | 1 | 2 | y
|
|
||||||
x | 3 | 4 | y
|
|
||||||
x | 5 | 6 | y
|
|
||||||
... | ... | ... | ...
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
- table: The table to transform.
|
|
||||||
- input_column: The column to transform.
|
|
||||||
- function: A function that transforms a single element of `input_column`
|
|
||||||
to a `Vector` of `Vector` of values.
|
|
||||||
- column_names: The names for the generated columns.
|
|
||||||
- on_problems: Specifies the behavior when a problem occurs.
|
|
||||||
fan_out_to_rows_and_columns : Table -> Text | Integer -> (Any -> Vector (Vector Any)) -> Vector Text -> Boolean -> Problem_Behavior -> Table
|
|
||||||
fan_out_to_rows_and_columns table input_column_id function column_names at_least_one_row=False on_problems=Report_Error =
|
|
||||||
problem_builder = Problem_Builder.new
|
|
||||||
unique = table.column_naming_helper.create_unique_name_strategy
|
|
||||||
|
|
||||||
input_column = table.at input_column_id
|
|
||||||
input_storage = input_column.java_column.getStorage
|
|
||||||
num_input_rows = input_storage.size
|
|
||||||
|
|
||||||
num_output_columns = column_names.length
|
|
||||||
|
|
||||||
# Guess that most of the time, we'll get at least one value for each input.
|
|
||||||
initial_size = input_column.length
|
|
||||||
# Accumulates the outputs of the function.
|
|
||||||
output_column_builders = Vector.new num_output_columns _-> make_string_builder initial_size
|
|
||||||
# Accumulates repeated position indices for the order mask.
|
|
||||||
order_mask_positions = Vector.new_builder initial_size
|
|
||||||
|
|
||||||
maybe_add_empty_row vecs =
|
|
||||||
should_add_empty_row = vecs.is_empty && at_least_one_row
|
|
||||||
if should_add_empty_row.not then vecs else
|
|
||||||
empty_row = Vector.fill num_output_columns Nothing
|
|
||||||
[empty_row]
|
|
||||||
|
|
||||||
0.up_to num_input_rows . each i->
|
|
||||||
input_value = input_storage.getItemBoxed i
|
|
||||||
output_values = function input_value |> maybe_add_empty_row
|
|
||||||
# Append each group of values to the builder.
|
|
||||||
output_values.each row_unchecked->
|
|
||||||
row = uniform_length num_output_columns row_unchecked problem_builder
|
|
||||||
row.each_with_index i-> v-> output_column_builders.at i . append v
|
|
||||||
# Append n copies of the input row position, n = # of output values.
|
|
||||||
repeat_each output_values.length <| order_mask_positions.append i
|
|
||||||
|
|
||||||
# Reserve the non-input column names that will not be changing.
|
|
||||||
non_input_columns = table.columns.filter c-> c.name != input_column.name
|
|
||||||
unique.mark_used <| non_input_columns.map .name
|
|
||||||
|
|
||||||
# Build the output column
|
|
||||||
output_storages = output_column_builders.map .seal
|
|
||||||
output_columns = output_storages.map_with_index i-> output_storage->
|
|
||||||
column_name = unique.make_unique <| column_names.at i
|
|
||||||
Column.from_storage column_name output_storage
|
|
||||||
|
|
||||||
# Build the order mask.
|
|
||||||
order_mask = OrderMask.new (order_mask_positions.to_vector)
|
|
||||||
|
|
||||||
## Build the new table, replacing the input column with the new output
|
|
||||||
columns.
|
|
||||||
new_columns_unflattened = table.columns.map column->
|
|
||||||
case column.name == input_column_id of
|
|
||||||
True ->
|
|
||||||
# Replace the input column with the output columns.
|
|
||||||
output_columns
|
|
||||||
False ->
|
|
||||||
# Build a new column from the old one with the mask
|
|
||||||
old_storage = column.java_column.getStorage
|
|
||||||
new_storage = old_storage.applyMask order_mask
|
|
||||||
[Column.from_storage column.name new_storage]
|
|
||||||
new_columns = new_columns_unflattened.flatten
|
|
||||||
|
|
||||||
new_table = Table.new new_columns
|
|
||||||
problem_builder.attach_problems_after on_problems new_table
|
|
||||||
|
|
||||||
## PRIVATE
|
|
||||||
|
|
||||||
Map a multi-valued function over a column and return the results as set of
|
|
||||||
output columns.
|
|
||||||
|
|
||||||
Returns a Pair of a Vector of Columns and a Vector of problems.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
- input_column: The column to transform.
|
|
||||||
- function: A function that transforms a single element of `input_column`
|
|
||||||
to multiple values.
|
|
||||||
- column_count: The number of columns to split to.
|
|
||||||
If `Nothing` then columns will be added to fit all data.
|
|
||||||
If the data exceeds the `column_count`, a `Column_Count_Exceeded` error
|
|
||||||
will follow the `on_problems` behavior.
|
|
||||||
- on_problems: Specifies the behavior when a problem occurs.
|
|
||||||
map_columns_to_multiple : Column -> (Any -> Vector Any) -> Integer | Nothing -> Problem_Builder -> Vector Column
|
|
||||||
map_columns_to_multiple input_column function column_count problem_builder =
|
|
||||||
num_rows = input_column.length
|
|
||||||
input_storage = input_column.java_column.getStorage
|
|
||||||
|
|
||||||
builders = case column_count of
|
|
||||||
Nothing ->
|
|
||||||
builders = Vector.new_builder
|
|
||||||
|
|
||||||
0.up_to num_rows . each i->
|
|
||||||
input_value = input_storage.getItemBoxed i
|
|
||||||
output_values = function input_value
|
|
||||||
|
|
||||||
# Add more builders if necessary to accommodate `output_values`.
|
|
||||||
if output_values.length > builders.length then
|
|
||||||
num_builders_needed = output_values.length - builders.length
|
|
||||||
repeat_each num_builders_needed <|
|
|
||||||
builder = make_string_builder num_rows
|
|
||||||
|
|
||||||
# Pad the new builder with nulls
|
|
||||||
num_nulls_needed = i
|
|
||||||
builder.appendNulls num_nulls_needed
|
|
||||||
|
|
||||||
builders.append builder
|
|
||||||
|
|
||||||
## Add `output_values` to builders; if there are more builders
|
|
||||||
than `output_values`, pad with null.
|
|
||||||
0.up_to builders.length . each i->
|
|
||||||
builders.at i . appendNoGrow (output_values.get i Nothing)
|
|
||||||
|
|
||||||
builders.to_vector
|
|
||||||
|
|
||||||
_ : Integer ->
|
|
||||||
builders = Vector.new column_count (_-> make_string_builder num_rows)
|
|
||||||
|
|
||||||
output_lengths = 0.up_to num_rows . map i->
|
|
||||||
input_value = input_storage.getItemBoxed i
|
|
||||||
output_values = function input_value
|
|
||||||
|
|
||||||
## Add `output_values` to builders; if there are more builders
|
|
||||||
than `output_values`, pad with null.
|
|
||||||
0.up_to builders.length . each i->
|
|
||||||
builders.at i . appendNoGrow (output_values.get i Nothing)
|
|
||||||
|
|
||||||
output_values.length
|
|
||||||
|
|
||||||
max_output_length = maximum output_lengths
|
|
||||||
|
|
||||||
if max_output_length > column_count then
|
|
||||||
problem = Column_Count_Exceeded.Error column_count max_output_length
|
|
||||||
problem_builder.report_other_warning problem
|
|
||||||
|
|
||||||
builders
|
|
||||||
|
|
||||||
# Name columns. If there's only one, use the original column name.
|
|
||||||
new_column_names = case builders.length of
|
|
||||||
1 -> [input_column.name]
|
|
||||||
_ -> 0.up_to builders.length . map i-> default_column_namer input_column.name i
|
|
||||||
|
|
||||||
# Build Columns.
|
|
||||||
storages = builders.map .seal
|
|
||||||
new_column_names.zip storages Column.from_storage
|
|
||||||
|
|
||||||
## PRIVATE
|
|
||||||
Rename a vector of columns to be unique when added to a table.
|
|
||||||
rename_new_columns : Table -> Text -> Vector Column -> Problem_Builder -> Vector Column
|
|
||||||
rename_new_columns table removed_column_name columns problem_builder =
|
|
||||||
unique = table.column_naming_helper.create_unique_name_strategy
|
|
||||||
remaining_columns = table.columns . filter (c-> c.name != removed_column_name) . map .name
|
|
||||||
unique.mark_used remaining_columns
|
|
||||||
new_columns = columns.map column->
|
|
||||||
new_name = unique.make_unique column.name
|
|
||||||
column.rename new_name
|
|
||||||
problem_builder.report_unique_name_strategy unique
|
|
||||||
new_columns
|
|
||||||
|
|
||||||
## PRIVATE
|
|
||||||
Replace a single column in a table with new columns.
|
|
||||||
Does not ensure names are unique; that must be done before calling this.
|
|
||||||
replace_column_with_columns : Table -> Column -> Vector Column -> Table
|
|
||||||
replace_column_with_columns table old_column new_columns =
|
|
||||||
Table.new ((table.columns.map (c-> if c.name == old_column.name then new_columns else [c])).flatten)
|
|
||||||
|
|
||||||
## PRIVATE
|
|
||||||
Return the maximum value of the vector.
|
|
||||||
Throws Empty_Error if the vector is empty.
|
|
||||||
maximum : Vector Any -> Vector Any
|
|
||||||
maximum vec = if vec.is_empty then Nothing else
|
|
||||||
vec.reduce (a-> b-> a.max b)
|
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
Wrap a function so that it returns [] if passed Nothing.
|
Wrap a function so that it returns [] if passed Nothing.
|
||||||
handle_nothing : (Any -> Any) -> (Any -> Any)
|
handle_nothing : (Any -> Any) -> (Any -> Any)
|
||||||
handle_nothing function = x-> case x of
|
handle_nothing function = x-> case x of
|
||||||
_ : Nothing -> []
|
_ : Nothing -> []
|
||||||
_ -> function x
|
_ -> function x
|
||||||
|
|
||||||
## PRIVATE
|
|
||||||
Repeat a computation n times.
|
|
||||||
repeat_each : Integer -> Any -> Any
|
|
||||||
repeat_each n ~action = 0.up_to n . each _-> action
|
|
||||||
|
|
||||||
## PRIVATE
|
|
||||||
Name a column by appending an integer to a base column name.
|
|
||||||
default_column_namer : Text -> Integer -> Text
|
|
||||||
default_column_namer base_name i = base_name + " " + (i+1).to_text
|
|
||||||
|
|
||||||
## PRIVATE
|
|
||||||
Pad or truncate a vector to be a specified length; if altered, report
|
|
||||||
it as a Column_Count_Mismatch warning.
|
|
||||||
uniform_length : Integer -> Vector Any -> Problem_Builder -> Vector Any
|
|
||||||
uniform_length target_length v problem_builder = if v.length == target_length then v else
|
|
||||||
problem = Column_Count_Mismatch.Error target_length v.length
|
|
||||||
problem_builder.report_other_warning problem
|
|
||||||
case v.length < target_length of
|
|
||||||
# Pad.
|
|
||||||
True -> v.pad target_length Nothing
|
|
||||||
# Truncate.
|
|
||||||
False -> v.take target_length
|
|
||||||
|
@ -155,7 +155,13 @@ public class InferredBuilder extends Builder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void retypeToMixed() {
|
private void retypeToMixed() {
|
||||||
ObjectBuilder objectBuilder = new MixedBuilder(initialSize);
|
// The new internal builder must be at least `currentSize` so it can store
|
||||||
|
// all the current values. It must also be at least 'initialSize' since the
|
||||||
|
// caller might be using appendNoGrow and is expecting to write at least
|
||||||
|
// that many values.
|
||||||
|
int capacity = Math.max(initialSize, currentSize);
|
||||||
|
|
||||||
|
ObjectBuilder objectBuilder = new MixedBuilder(capacity);
|
||||||
currentBuilder.retypeToMixed(objectBuilder.getData());
|
currentBuilder.retypeToMixed(objectBuilder.getData());
|
||||||
objectBuilder.setCurrentSize(currentBuilder.getCurrentSize());
|
objectBuilder.setCurrentSize(currentBuilder.getCurrentSize());
|
||||||
objectBuilder.setPreExistingProblems(currentBuilder.getProblems());
|
objectBuilder.setPreExistingProblems(currentBuilder.getProblems());
|
||||||
|
@ -553,6 +553,12 @@ spec setup =
|
|||||||
table = table_builder [["aaa", [1, 2]], ["bbb", [3, 4]], ["ccc", [5, 6]]]
|
table = table_builder [["aaa", [1, 2]], ["bbb", [3, 4]], ["ccc", [5, 6]]]
|
||||||
table.expand_column "bbb" . should_fail_with Unsupported_Database_Operation
|
table.expand_column "bbb" . should_fail_with Unsupported_Database_Operation
|
||||||
|
|
||||||
|
# The in-memory functionality of `expand_to_rows` is tested in test/Table_Tests/src/In_Memory/Table_Conversion_Spec.enso
|
||||||
|
if setup.is_database then Test.group prefix+"Table.expand_to_rows" <|
|
||||||
|
Test.specify "should report unsupported" <|
|
||||||
|
table = table_builder [["aaa", [1, 2]], ["bbb", [3, 4]], ["ccc", [5, 6]]]
|
||||||
|
table.expand_to_rows "bbb" . should_fail_with Unsupported_Database_Operation
|
||||||
|
|
||||||
if setup.is_database.not then Test.group prefix+"Table/Column auto value type" <|
|
if setup.is_database.not then Test.group prefix+"Table/Column auto value type" <|
|
||||||
Test.specify "should allow to narrow down types of a Mixed column" <|
|
Test.specify "should allow to narrow down types of a Mixed column" <|
|
||||||
[True, False].each shrink_types->
|
[True, False].each shrink_types->
|
||||||
|
@ -30,3 +30,11 @@ spec = Test.group "[In-Memory] Storage Builders" <|
|
|||||||
storage = builder.seal
|
storage = builder.seal
|
||||||
column = Column.from_storage "X" storage
|
column = Column.from_storage "X" storage
|
||||||
column.to_vector . should_equal vector
|
column.to_vector . should_equal vector
|
||||||
|
|
||||||
|
Test.specify "Inferred Builder should correctly resize when retyping to a mixed column, with an underestimated initial size" <|
|
||||||
|
mixed_values = [10, 11, 22, 23, 24, 25, '2020-02-28']
|
||||||
|
builder = make_inferred_builder 3
|
||||||
|
mixed_values.map v-> builder.append v
|
||||||
|
storage = builder.seal
|
||||||
|
column = Column.from_storage "X" storage
|
||||||
|
column.to_vector . should_equal mixed_values
|
||||||
|
29
test/Table_Tests/src/In_Memory/Fan_Out_Spec.enso
Normal file
29
test/Table_Tests/src/In_Memory/Fan_Out_Spec.enso
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
from Standard.Base import all
|
||||||
|
|
||||||
|
from project.Util import all
|
||||||
|
|
||||||
|
import Standard.Table.Internal.Fan_Out
|
||||||
|
from Standard.Table import Table
|
||||||
|
|
||||||
|
import Standard.Test.Extensions
|
||||||
|
from Standard.Test import Test, Test_Suite, Problems
|
||||||
|
|
||||||
|
spec =
|
||||||
|
Test.group "Fan_Out" <|
|
||||||
|
Test.specify "can do fan_out_to_columns " <|
|
||||||
|
cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]]
|
||||||
|
t = Table.new cols
|
||||||
|
expected_rows = [[0, "a", "c", Nothing], [1, "c", "d", "ef"], [2, "gh", "ij", "u"]]
|
||||||
|
expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows
|
||||||
|
t2 = Fan_Out.fan_out_to_columns t "bar" (_.split "b")
|
||||||
|
t2.should_equal expected
|
||||||
|
|
||||||
|
Test.specify "can do fan_out_to_rows" <|
|
||||||
|
cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]]
|
||||||
|
t = Table.new cols
|
||||||
|
expected_rows = [[0, "a"], [0, "c"], [1, "c"], [1, "d"], [1, "ef"], [2, "gh"], [2, "ij"], [2, "u"]]
|
||||||
|
expected = Table.from_rows ["foo", "bar"] expected_rows
|
||||||
|
t2 = Fan_Out.fan_out_to_rows t "bar" (_.split "b")
|
||||||
|
t2.should_equal expected
|
||||||
|
|
||||||
|
main = Test_Suite.run_main spec
|
@ -2,8 +2,10 @@ from Standard.Base import all
|
|||||||
|
|
||||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||||
|
|
||||||
from Standard.Table.Extensions.Table_Conversions import all
|
import Standard.Table.Data.Type.Value_Type.Value_Type
|
||||||
from Standard.Table import Table, Column
|
from Standard.Table import Table, Column
|
||||||
|
from Standard.Table.Errors import No_Such_Column
|
||||||
|
from Standard.Table.Extensions.Table_Conversions import all
|
||||||
|
|
||||||
from Standard.Test import Test, Test_Suite, Problems
|
from Standard.Test import Test, Test_Suite, Problems
|
||||||
import Standard.Test.Extensions
|
import Standard.Test.Extensions
|
||||||
@ -202,4 +204,76 @@ spec =
|
|||||||
expected = Table.new [["aaa", [1, 2]], ["expanded last", ["Smith", Nothing]], ["expanded height", [Nothing, 1.9]], ["expanded foo", [Nothing, Nothing]], ["ccc", [5, 6]]]
|
expected = Table.new [["aaa", [1, 2]], ["expanded last", ["Smith", Nothing]], ["expanded height", [Nothing, 1.9]], ["expanded foo", [Nothing, Nothing]], ["ccc", [5, 6]]]
|
||||||
table.expand_column "bbb" ["last", "height", "foo"] "expanded " . should_equal expected
|
table.expand_column "bbb" ["last", "height", "foo"] "expanded " . should_equal expected
|
||||||
|
|
||||||
|
Test.group "expand_to_rows" <|
|
||||||
|
Test.specify "Can expand single values" <|
|
||||||
|
values_to_expand = [3, 4]
|
||||||
|
table = Table.new [["aaa", [1, 2]], ["bbb", values_to_expand], ["ccc", [5, 6]]]
|
||||||
|
expected = Table.new [["aaa", [1, 2]], ["bbb", [3, 4]], ["ccc", [5, 6]]]
|
||||||
|
table.expand_to_rows "bbb" . should_equal expected
|
||||||
|
|
||||||
|
Test.specify "Can expand Vectors" <|
|
||||||
|
values_to_expand = [[10, 11], [20, 21, 22], [30]]
|
||||||
|
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||||
|
expected = Table.new [["aaa", [1, 1, 2, 2, 2, 3]], ["bbb", [10, 11, 20, 21, 22, 30]], ["ccc", [5, 5, 6, 6, 6, 7]]]
|
||||||
|
r = table.expand_to_rows "bbb"
|
||||||
|
r . should_equal expected
|
||||||
|
r.at "bbb" . value_type . should_equal Value_Type.Integer
|
||||||
|
|
||||||
|
Test.specify "Can expand Arrays" <|
|
||||||
|
values_to_expand = [[10, 11].to_array, [20, 21, 22].to_array, [30].to_array]
|
||||||
|
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||||
|
expected = Table.new [["aaa", [1, 1, 2, 2, 2, 3]], ["bbb", [10, 11, 20, 21, 22, 30]], ["ccc", [5, 5, 6, 6, 6, 7]]]
|
||||||
|
table.expand_to_rows "bbb" . should_equal expected
|
||||||
|
|
||||||
|
Test.specify "Can expand Lists" <|
|
||||||
|
values_to_expand = [[10, 11].to_list, [20, 21, 22].to_list, [30].to_list]
|
||||||
|
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||||
|
expected = Table.new [["aaa", [1, 1, 2, 2, 2, 3]], ["bbb", [10, 11, 20, 21, 22, 30]], ["ccc", [5, 5, 6, 6, 6, 7]]]
|
||||||
|
table.expand_to_rows "bbb" . should_equal expected
|
||||||
|
|
||||||
|
Test.specify "Can expand Pairs" <|
|
||||||
|
values_to_expand = [Pair.new 10 20, Pair.new "a" [30], Pair.new 40 50]
|
||||||
|
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||||
|
expected = Table.new [["aaa", [1, 1, 2, 2, 3, 3]], ["bbb", [10, 20, "a", [30], 40, 50]], ["ccc", [5, 5, 6, 6, 7, 7]]]
|
||||||
|
table.expand_to_rows "bbb" . should_equal expected
|
||||||
|
|
||||||
|
Test.specify "Can expand Ranges" <|
|
||||||
|
values_to_expand = [Range.new 10 12, Range.new 20 27 step=3, Range.new 30 31]
|
||||||
|
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||||
|
expected = Table.new [["aaa", [1, 1, 2, 2, 2, 3]], ["bbb", [10, 11, 20, 23, 26, 30]], ["ccc", [5, 5, 6, 6, 6, 7]]]
|
||||||
|
table.expand_to_rows "bbb" . should_equal expected
|
||||||
|
|
||||||
|
Test.specify "Can expand Date_Ranges" <|
|
||||||
|
range0 = (Date.new 2020 02 28).up_to (Date.new 2020 03 01)
|
||||||
|
range1 = (Date.new 2020 10 28).up_to (Date.new 2020 11 16) . with_step Date_Period.Week
|
||||||
|
range2 = (Date.new 2023 07 03).up_to (Date.new 2023 10 03) . with_step Date_Period.Month
|
||||||
|
values_to_expand = [range0, range1, range2]
|
||||||
|
values_expanded = [Date.new 2020 02 28, Date.new 2020 02 29] + [Date.new 2020 10 28, Date.new 2020 11 4, Date.new 2020 11 11, Date.new 2023 07 03, Date.new 2023 08 03] + [Date.new 2023 09 03]
|
||||||
|
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||||
|
expected = Table.new [["aaa", [1, 1, 2, 2, 2, 3, 3, 3]], ["bbb", values_expanded], ["ccc", [5, 5, 6, 6, 6, 7, 7, 7]]]
|
||||||
|
table.expand_to_rows "bbb" . should_equal expected
|
||||||
|
|
||||||
|
Test.specify "Can expand mixed columns" <|
|
||||||
|
values_to_expand = [[10, 11], 22.up_to 26, (Date.new 2020 02 28).up_to (Date.new 2020 03 01)]
|
||||||
|
values_expanded = [10, 11, 22, 23, 24, 25, Date.new 2020 02 28, Date.new 2020 02 29]
|
||||||
|
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||||
|
expected = Table.new [["aaa", [1, 1, 2, 2, 2, 2, 3, 3]], ["bbb", values_expanded], ["ccc", [5, 5, 6, 6, 6, 6, 7, 7]]]
|
||||||
|
table.expand_to_rows "bbb" . should_equal expected
|
||||||
|
|
||||||
|
Test.specify "Respects `at_least_one_row=True`" <|
|
||||||
|
values_to_expand = [[10, 11], [], [30]]
|
||||||
|
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||||
|
expected = Table.new [["aaa", [1, 1, 2, 3]], ["bbb", [10, 11, Nothing, 30]], ["ccc", [5, 5, 6, 7]]]
|
||||||
|
table.expand_to_rows "bbb" at_least_one_row=True . should_equal expected
|
||||||
|
|
||||||
|
Test.specify "Respects `at_least_one_row=False`" <|
|
||||||
|
values_to_expand = [[10, 11], [], [30]]
|
||||||
|
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||||
|
expected = Table.new [["aaa", [1, 1, 3]], ["bbb", [10, 11, 30]], ["ccc", [5, 5, 7]]]
|
||||||
|
table.expand_to_rows "bbb" . should_equal expected
|
||||||
|
|
||||||
|
Test.specify "Missing column" <|
|
||||||
|
table = Table.new [["aaa", [1, 2, 3]], ["notbbbb", [8, 8, 8]], ["ccc", [5, 6, 7]]]
|
||||||
|
table.expand_to_rows "bbb" . should_fail_with No_Such_Column
|
||||||
|
|
||||||
main = Test_Suite.run_main spec
|
main = Test_Suite.run_main spec
|
||||||
|
Loading…
Reference in New Issue
Block a user