mirror of
https://github.com/enso-org/enso.git
synced 2024-11-25 21:25:20 +03:00
Add Table.expand_to_rows to allow flattening vector and array values in table (#8042)
# Important Notes Also includes a fix for a reallocation bug in `InferredBuilder`.
This commit is contained in:
parent
b7d7910a88
commit
f18d1323e1
@ -585,6 +585,7 @@
|
||||
- [Implemented `Table.lookup_and_replace` for the in-memory backend.][7979]
|
||||
- [Added `Column_Operation` to `Table.set` allowing for more streamlined flow of
|
||||
deriving column values in the GUI.][8005]
|
||||
- [Implemented `Table.expand_to_rows` for the in-memory backend.][8029]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -833,6 +834,7 @@
|
||||
[7947]: https://github.com/enso-org/enso/pull/7947
|
||||
[7979]: https://github.com/enso-org/enso/pull/7979
|
||||
[8005]: https://github.com/enso-org/enso/pull/8005
|
||||
[8029]: https://github.com/enso-org/enso/pull/8029
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
@ -1999,6 +1999,43 @@ type Table
|
||||
_ = [column, fields, prefix]
|
||||
Error.throw (Unsupported_Database_Operation.Error "Table.expand_column is currently not implemented for the Database backend. You may download the table to memory using `.read` to use this feature.")
|
||||
|
||||
## GROUP Standard.Base.Conversions
|
||||
Expand aggregate values in a column to separate rows.
|
||||
|
||||
For each value in the specified column, if it is an aggregate (`Vector`,
|
||||
`Range`, etc.), expand it to multiple rows, duplicating the values in the
|
||||
other columns.
|
||||
|
||||
Arguments:
|
||||
- column: The column to expand.
|
||||
- at_least_one_row: for an empty aggregate value, if `at_least_one_row` is
|
||||
true, a single row is output with `Nothing` for the aggregates column; if
|
||||
false, no row is output at all.
|
||||
|
||||
The following aggregate values are supported:
|
||||
- `Array`
|
||||
- `Vector`
|
||||
- `List`
|
||||
- `Range`
|
||||
- `Date_Range`
|
||||
- `Pair
|
||||
|
||||
Any other values are treated as non-aggregate values, and their rows are kept
|
||||
unchanged.
|
||||
|
||||
In in-memory tables, it is permitted to mix values of different types.
|
||||
|
||||
> Example
|
||||
Expand a column of integer `Vectors` to a column of `Integer`
|
||||
|
||||
table = Table.new [["aaa", [1, 2]], ["bbb", [[30, 31], [40, 41]]]]
|
||||
# => Table.new [["aaa", [1, 1, 2, 2]], ["bbb", [30, 31, 40, 41]]]
|
||||
@column Widget_Helpers.make_column_name_selector
|
||||
expand_to_rows : Text | Integer -> Boolean -> Table ! Type_Error | No_Such_Column | Index_Out_Of_Bounds
|
||||
expand_to_rows self column at_least_one_row=False =
|
||||
_ = [column, at_least_one_row]
|
||||
Error.throw (Unsupported_Database_Operation.Error "Table.expand_to_rows is currently not implemented for the Database backend. You may download the table to memory using `.read` to use this feature.")
|
||||
|
||||
## GROUP Standard.Base.Conversions
|
||||
Cast the selected columns to a specific type.
|
||||
|
||||
|
@ -16,12 +16,20 @@ type Convertible_To_Rows
|
||||
- getter: Get the value for a specified row.
|
||||
Value length:Integer (getter : Integer->Any)
|
||||
|
||||
## PRIVATE
|
||||
Return the iterator values as a `Vector`.
|
||||
to_vector : Vector Any
|
||||
to_vector self = 0.up_to self.length . map self.getter
|
||||
|
||||
## PRIVATE
|
||||
Convertible_To_Rows.from that:Vector = Convertible_To_Rows.Value that.length that.get
|
||||
|
||||
## PRIVATE
|
||||
Convertible_To_Rows.from that:Array = Convertible_To_Rows.Value that.length that.get
|
||||
|
||||
## PRIVATE
|
||||
Convertible_To_Rows.from that:List = Convertible_To_Rows.from that.to_vector
|
||||
|
||||
## PRIVATE
|
||||
Convertible_To_Rows.from that:Range = Convertible_To_Rows.Value that.length that.get
|
||||
|
||||
|
@ -1189,6 +1189,42 @@ type Table
|
||||
expand_column self (column : Text | Integer) (fields : Vector | Nothing = Nothing) (prefix : Text | Nothing = Nothing) =
|
||||
Expand_Objects_Helpers.expand_column self column fields prefix
|
||||
|
||||
## GROUP Standard.Base.Conversions
|
||||
Expand aggregate values in a column to separate rows.
|
||||
|
||||
For each value in the specified column, if it is an aggregate (`Vector`,
|
||||
`Range`, etc.), expand it to multiple rows, duplicating the values in the
|
||||
other columns.
|
||||
|
||||
Arguments:
|
||||
- column: The column to expand.
|
||||
- at_least_one_row: for an empty aggregate value, if `at_least_one_row` is
|
||||
true, a single row is output with `Nothing` for the aggregates column; if
|
||||
false, no row is output at all.
|
||||
|
||||
The following aggregate values are supported:
|
||||
- `Array`
|
||||
- `Vector`
|
||||
- `List`
|
||||
- `Range`
|
||||
- `Date_Range`
|
||||
- `Pair
|
||||
|
||||
Any other values are treated as non-aggregate values, and their rows are kept
|
||||
unchanged.
|
||||
|
||||
In in-memory tables, it is permitted to mix values of different types.
|
||||
|
||||
> Example
|
||||
Expand a column of integer `Vectors` to a column of `Integer`
|
||||
|
||||
table = Table.new [["aaa", [1, 2]], ["bbb", [[30, 31], [40, 41]]]]
|
||||
# => Table.new [["aaa", [1, 1, 2, 2]], ["bbb", [30, 31, 40, 41]]]
|
||||
@column Widget_Helpers.make_column_name_selector
|
||||
expand_to_rows : Text | Integer -> Boolean -> Table ! Type_Error | No_Such_Column | Index_Out_Of_Bounds
|
||||
expand_to_rows self column at_least_one_row=False =
|
||||
Expand_Objects_Helpers.expand_to_rows self column at_least_one_row
|
||||
|
||||
## ALIAS filter rows
|
||||
GROUP Standard.Base.Selections
|
||||
|
||||
|
@ -1,6 +1,5 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Data.Text.Regex.Regex_Syntax_Error
|
||||
import Standard.Base.Errors.Common.Index_Out_Of_Bounds
|
||||
import Standard.Base.Errors.Common.Type_Error
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Errors.Unimplemented.Unimplemented
|
||||
@ -9,7 +8,6 @@ from Standard.Base.Metadata import make_single_choice
|
||||
import project.Data.Match_Columns.Match_Columns
|
||||
import project.Data.Table.Table
|
||||
import project.Errors.Invalid_JSON_Format
|
||||
import project.Errors.No_Such_Column
|
||||
import project.Internal.Expand_Objects_Helpers
|
||||
import project.Internal.Parse_To_Table
|
||||
import project.Internal.Widget_Helpers
|
||||
|
@ -1,11 +1,17 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Base.Errors.Common.Index_Out_Of_Bounds
|
||||
import Standard.Base.Errors.Common.Type_Error
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
import project.Data.Table.Table
|
||||
import project.Data.Column.Column
|
||||
import project.Data.Conversions.Convertible_To_Columns.Convertible_To_Columns
|
||||
import project.Data.Conversions.Convertible_To_Rows.Convertible_To_Rows
|
||||
import project.Errors.No_Such_Column
|
||||
import project.Internal.Fan_Out
|
||||
import project.Internal.Java_Exports
|
||||
from project.Internal.Java_Exports import make_inferred_builder
|
||||
|
||||
## PRIVATE
|
||||
expand_column : Table -> (Text | Integer) -> ((Vector Text) | Nothing) -> (Text | Nothing) -> Table
|
||||
@ -29,6 +35,45 @@ expand_column table column fields prefix =
|
||||
|
||||
Table.new output_builder.to_vector
|
||||
|
||||
## GROUP Standard.Base.Conversions
|
||||
Expand aggregate values in a column to separate rows.
|
||||
|
||||
For each value in the specified column, if it is an aggregate (`Vector`,
|
||||
`Range`, etc.), expand it to multiple rows, duplicating the values in the
|
||||
other columns.
|
||||
|
||||
Arguments:
|
||||
- column: The column to expand.
|
||||
- at_least_one_row: for an empty aggregate value, if `at_least_one_row` is
|
||||
true, a single row is output with `Nothing` for the aggregates column; if
|
||||
false, no row is output at all.
|
||||
|
||||
The following aggregate values are supported:
|
||||
- `Array`
|
||||
- `Vector`
|
||||
- `List`
|
||||
- `Range`
|
||||
- `Date_Range`
|
||||
- `Pair
|
||||
|
||||
Any other values are treated as non-aggregate values, and their rows are kept
|
||||
unchanged.
|
||||
|
||||
In in-memory tables, it is permitted to mix values of different types.
|
||||
|
||||
> Example
|
||||
Expand a column of integer `Vectors` to a column of `Integer`
|
||||
|
||||
table = Table.new [["aaa", [1, 2]], ["bbb", [[30, 31], [40, 41]]]]
|
||||
# => Table.new [["aaa", [1, 1, 2, 2]], ["bbb", [30, 31, 40, 41]]]
|
||||
@column Widget_Helpers.make_column_name_selector
|
||||
expand_to_rows : Table -> Text | Integer -> Boolean -> Table ! Type_Error | No_Such_Column | Index_Out_Of_Bounds
|
||||
expand_to_rows table column at_least_one_row=False =
|
||||
row_expander : Any -> Vector
|
||||
row_expander value:Convertible_To_Rows = value.to_vector
|
||||
|
||||
Fan_Out.fan_out_to_rows table column row_expander at_least_one_row column_builder=make_inferred_builder
|
||||
|
||||
## PRIVATE
|
||||
create_table_from_objects : Any -> ((Vector Text) | Nothing) -> Table
|
||||
create_table_from_objects (value : Convertible_To_Rows) (fields : Vector | Nothing) = if fields.is_nothing.not && fields.is_empty then Error.throw (Illegal_Argument.Error "The fields parameter cannot be empty.") else
|
||||
|
@ -0,0 +1,281 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import project.Data.Column.Column
|
||||
import project.Data.Table.Table
|
||||
import project.Data.Type.Value_Type.Value_Type
|
||||
import project.Internal.Problem_Builder.Problem_Builder
|
||||
|
||||
from project.Errors import Column_Count_Exceeded, Column_Count_Mismatch
|
||||
from project.Internal.Java_Exports import make_string_builder
|
||||
|
||||
polyglot java import org.enso.table.data.mask.OrderMask
|
||||
|
||||
## PRIVATE
|
||||
Transform a table by transforming a column into a set of columns. Takes a
|
||||
function that maps a single element of the input column to a vector of output
|
||||
values. The original column is replaced by the new columns.
|
||||
|
||||
Arguments:
|
||||
- table: The table to transform.
|
||||
- input_column: The column to transform.
|
||||
- function: A function that transforms a single element of `input_column`
|
||||
to multiple values.
|
||||
fan_out_to_columns : Table -> Text | Integer -> (Any -> Vector Any) -> Integer | Nothing -> (Integer -> Any) -> Problem_Behavior -> Table | Nothing
|
||||
fan_out_to_columns table input_column_id function column_count=Nothing column_builder=make_string_builder on_problems=Report_Error =
|
||||
input_column = table.get input_column_id
|
||||
problem_builder = Problem_Builder.new
|
||||
new_columns_unrenamed = map_columns_to_multiple input_column function column_count column_builder=column_builder problem_builder=problem_builder
|
||||
new_columns = rename_new_columns table input_column.name new_columns_unrenamed problem_builder
|
||||
new_table = replace_column_with_columns table input_column new_columns
|
||||
problem_builder.attach_problems_after on_problems new_table
|
||||
|
||||
## PRIVATE
|
||||
Transform a column by applying the given function to the values in the
|
||||
column. The function produces multiple outputs, so each row is duplicated,
|
||||
with each row getting a distinct output value in place of the original
|
||||
input value. The other column values are just duplicated.
|
||||
|
||||
Arguments:
|
||||
- table: The table to transform.
|
||||
- input_column: The column to transform.
|
||||
- function: A function that transforms a single element of `input_column`
|
||||
to multiple values.
|
||||
- at_least_one_row: When true, if the function returns an empty list, a
|
||||
single row is output with `Nothing` for the transformed column. If false,
|
||||
the row is not output at all.
|
||||
fan_out_to_rows : Table -> Text | Integer -> (Any -> Vector Any) -> Boolean -> (Integer -> Any) -> Problem_Behavior -> Table
|
||||
fan_out_to_rows table input_column_id function at_least_one_row=False column_builder=make_string_builder on_problems=Report_Error =
|
||||
## Treat this as a special case of fan_out_to_rows_and_columns, with one
|
||||
column. Wrap the provided function to convert each value to a singleton
|
||||
`Vector`.
|
||||
wrapped_function x = function x . map y-> [y]
|
||||
column_names = [input_column_id]
|
||||
fan_out_to_rows_and_columns table input_column_id wrapped_function column_names at_least_one_row=at_least_one_row column_builder=column_builder on_problems=on_problems
|
||||
|
||||
## PRIVATE
|
||||
Transform a column by applying the given function to the values in the
|
||||
column. The function returns a `Vector` of `Vectors`. Each inner vector turns
|
||||
into multiple new columns in a single row. Each inner vector within the outer
|
||||
vector produces an output row, so each row is duplicated, with each row
|
||||
getting a distinct set of output values in place of the original input value.
|
||||
The other column values are just duplicated.
|
||||
|
||||
! Error Conditions
|
||||
|
||||
The inner vectors should all have the same number of values, which should
|
||||
match the provided `column_names`. If a value is too short, it will be
|
||||
padded with Nothing, and if it is too long, it will be truncated. In either
|
||||
case, Column_Count_Mismatch will be added as a warning. (It is expected
|
||||
that the caller of this private method will ensure that the provided
|
||||
function will produce inner vectors of the correct length, but we check for
|
||||
it anyway.)
|
||||
|
||||
> Example
|
||||
f("12 34 56") -> [[1, 2], [3, 4], [5, 6]]
|
||||
|
||||
foo | bar | baz
|
||||
----+-----------+----
|
||||
x | 12 34 56 | y ===>
|
||||
... | ... | ...
|
||||
|
||||
foo | bar 1 | bar 2 | baz
|
||||
----+-------+-------+----
|
||||
x | 1 | 2 | y
|
||||
x | 3 | 4 | y
|
||||
x | 5 | 6 | y
|
||||
... | ... | ... | ...
|
||||
|
||||
Arguments:
|
||||
- table: The table to transform.
|
||||
- input_column: The column to transform.
|
||||
- function: A function that transforms a single element of `input_column`
|
||||
to a `Vector` of `Vector` of values.
|
||||
- column_names: The names for the generated columns.
|
||||
- on_problems: Specifies the behavior when a problem occurs.
|
||||
fan_out_to_rows_and_columns : Table -> Text | Integer -> (Any -> Vector (Vector Any)) -> Vector Text -> Boolean -> (Integer -> Any) -> Problem_Behavior -> Table
|
||||
fan_out_to_rows_and_columns table input_column_id function column_names at_least_one_row=False column_builder=make_string_builder on_problems=Report_Error =
|
||||
problem_builder = Problem_Builder.new
|
||||
unique = table.column_naming_helper.create_unique_name_strategy
|
||||
|
||||
input_column = table.at input_column_id
|
||||
input_storage = input_column.java_column.getStorage
|
||||
num_input_rows = input_storage.size
|
||||
|
||||
num_output_columns = column_names.length
|
||||
|
||||
# Guess that most of the time, we'll get at least one value for each input.
|
||||
initial_size = input_column.length
|
||||
# Accumulates the outputs of the function.
|
||||
output_column_builders = Vector.new num_output_columns _-> column_builder initial_size
|
||||
# Accumulates repeated position indices for the order mask.
|
||||
order_mask_positions = Vector.new_builder initial_size
|
||||
|
||||
maybe_add_empty_row vecs =
|
||||
should_add_empty_row = vecs.is_empty && at_least_one_row
|
||||
if should_add_empty_row.not then vecs else
|
||||
empty_row = Vector.fill num_output_columns Nothing
|
||||
[empty_row]
|
||||
|
||||
0.up_to num_input_rows . each i->
|
||||
input_value = input_storage.getItemBoxed i
|
||||
output_values = function input_value |> maybe_add_empty_row
|
||||
# Append each group of values to the builder.
|
||||
output_values.each row_unchecked->
|
||||
row = uniform_length num_output_columns row_unchecked problem_builder
|
||||
row.each_with_index i-> v-> output_column_builders.at i . append v
|
||||
# Append n copies of the input row position, n = # of output values.
|
||||
repeat_each output_values.length <| order_mask_positions.append i
|
||||
|
||||
# Reserve the non-input column names that will not be changing.
|
||||
non_input_columns = table.columns.filter c-> c.name != input_column.name
|
||||
unique.mark_used <| non_input_columns.map .name
|
||||
|
||||
# Build the output column
|
||||
output_storages = output_column_builders.map .seal
|
||||
output_columns = output_storages.map_with_index i-> output_storage->
|
||||
column_name = unique.make_unique <| column_names.at i
|
||||
Column.from_storage column_name output_storage
|
||||
|
||||
# Build the order mask.
|
||||
order_mask = OrderMask.new (order_mask_positions.to_vector)
|
||||
|
||||
## Build the new table, replacing the input column with the new output
|
||||
columns.
|
||||
new_columns_unflattened = table.columns.map column->
|
||||
case column.name == input_column_id of
|
||||
True ->
|
||||
# Replace the input column with the output columns.
|
||||
output_columns
|
||||
False ->
|
||||
# Build a new column from the old one with the mask
|
||||
old_storage = column.java_column.getStorage
|
||||
new_storage = old_storage.applyMask order_mask
|
||||
[Column.from_storage column.name new_storage]
|
||||
new_columns = new_columns_unflattened.flatten
|
||||
|
||||
new_table = Table.new new_columns
|
||||
problem_builder.attach_problems_after on_problems new_table
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Map a multi-valued function over a column and return the results as set of
|
||||
output columns.
|
||||
|
||||
Returns a Pair of a Vector of Columns and a Vector of problems.
|
||||
|
||||
Arguments:
|
||||
- input_column: The column to transform.
|
||||
- function: A function that transforms a single element of `input_column`
|
||||
to multiple values.
|
||||
- column_count: The number of columns to split to.
|
||||
If `Nothing` then columns will be added to fit all data.
|
||||
If the data exceeds the `column_count`, a `Column_Count_Exceeded` error
|
||||
will follow the `on_problems` behavior.
|
||||
- on_problems: Specifies the behavior when a problem occurs.
|
||||
map_columns_to_multiple : Column -> (Any -> Vector Any) -> Integer | Nothing -> (Integer -> Any) -> Problem_Builder -> Vector Column
|
||||
map_columns_to_multiple input_column function column_count column_builder=make_string_builder problem_builder =
|
||||
num_rows = input_column.length
|
||||
input_storage = input_column.java_column.getStorage
|
||||
|
||||
builders = case column_count of
|
||||
Nothing ->
|
||||
builders = Vector.new_builder
|
||||
|
||||
0.up_to num_rows . each i->
|
||||
input_value = input_storage.getItemBoxed i
|
||||
output_values = function input_value
|
||||
|
||||
# Add more builders if necessary to accommodate `output_values`.
|
||||
if output_values.length > builders.length then
|
||||
num_builders_needed = output_values.length - builders.length
|
||||
repeat_each num_builders_needed <|
|
||||
builder = column_builder num_rows
|
||||
|
||||
# Pad the new builder with nulls
|
||||
num_nulls_needed = i
|
||||
builder.appendNulls num_nulls_needed
|
||||
|
||||
builders.append builder
|
||||
|
||||
## Add `output_values` to builders; if there are more builders
|
||||
than `output_values`, pad with null.
|
||||
0.up_to builders.length . each i->
|
||||
builders.at i . appendNoGrow (output_values.get i Nothing)
|
||||
|
||||
builders.to_vector
|
||||
|
||||
_ : Integer ->
|
||||
builders = Vector.new column_count (_-> column_builder num_rows)
|
||||
|
||||
output_lengths = 0.up_to num_rows . map i->
|
||||
input_value = input_storage.getItemBoxed i
|
||||
output_values = function input_value
|
||||
|
||||
## Add `output_values` to builders; if there are more builders
|
||||
than `output_values`, pad with null.
|
||||
0.up_to builders.length . each i->
|
||||
builders.at i . appendNoGrow (output_values.get i Nothing)
|
||||
|
||||
output_values.length
|
||||
|
||||
max_output_length = maximum output_lengths
|
||||
|
||||
if max_output_length > column_count then
|
||||
problem = Column_Count_Exceeded.Error column_count max_output_length
|
||||
problem_builder.report_other_warning problem
|
||||
|
||||
builders
|
||||
|
||||
# Name columns. If there's only one, use the original column name.
|
||||
new_column_names = case builders.length of
|
||||
1 -> [input_column.name]
|
||||
_ -> 0.up_to builders.length . map i-> input_column.name + " " + (i+1).to_text
|
||||
|
||||
# Build Columns.
|
||||
storages = builders.map .seal
|
||||
new_column_names.zip storages Column.from_storage
|
||||
|
||||
## PRIVATE
|
||||
Rename a vector of columns to be unique when added to a table.
|
||||
rename_new_columns : Table -> Text -> Vector Column -> Problem_Builder -> Vector Column
|
||||
rename_new_columns table removed_column_name columns problem_builder =
|
||||
unique = table.column_naming_helper.create_unique_name_strategy
|
||||
remaining_columns = table.columns . filter (c-> c.name != removed_column_name) . map .name
|
||||
unique.mark_used remaining_columns
|
||||
new_columns = columns.map column->
|
||||
new_name = unique.make_unique column.name
|
||||
column.rename new_name
|
||||
problem_builder.report_unique_name_strategy unique
|
||||
new_columns
|
||||
|
||||
## PRIVATE
|
||||
Replace a single column in a table with new columns.
|
||||
Does not ensure names are unique; that must be done before calling this.
|
||||
replace_column_with_columns : Table -> Column -> Vector Column -> Table
|
||||
replace_column_with_columns table old_column new_columns =
|
||||
Table.new ((table.columns.map (c-> if c.name == old_column.name then new_columns else [c])).flatten)
|
||||
|
||||
## PRIVATE
|
||||
Return the maximum value of the vector.
|
||||
Throws Empty_Error if the vector is empty.
|
||||
maximum : Vector Any -> Vector Any
|
||||
maximum vec = if vec.is_empty then Nothing else
|
||||
vec.reduce (a-> b-> a.max b)
|
||||
|
||||
## PRIVATE
|
||||
Repeat a computation n times.
|
||||
repeat_each : Integer -> Any -> Any
|
||||
repeat_each n ~action = 0.up_to n . each _-> action
|
||||
|
||||
## PRIVATE
|
||||
Pad or truncate a vector to be a specified length; if altered, report
|
||||
it as a Column_Count_Mismatch warning.
|
||||
uniform_length : Integer -> Vector Any -> Problem_Builder -> Vector Any
|
||||
uniform_length target_length v problem_builder = if v.length == target_length then v else
|
||||
problem = Column_Count_Mismatch.Error target_length v.length
|
||||
problem_builder.report_other_warning problem
|
||||
case v.length < target_length of
|
||||
# Pad.
|
||||
True -> v.pad target_length Nothing
|
||||
# Truncate.
|
||||
False -> v.take target_length
|
@ -1,12 +1,8 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import project.Data.Column.Column
|
||||
import project.Data.Table.Table
|
||||
import project.Data.Type.Value_Type.Value_Type
|
||||
import project.Internal.Problem_Builder.Problem_Builder
|
||||
|
||||
from project.Errors import Column_Count_Exceeded, Column_Count_Mismatch, Duplicate_Output_Column_Names, Invalid_Value_Type, Missing_Input_Columns
|
||||
from project.Internal.Java_Exports import make_string_builder
|
||||
from project.Internal.Fan_Out import all
|
||||
|
||||
polyglot java import org.enso.table.data.mask.OrderMask
|
||||
|
||||
@ -17,7 +13,7 @@ split_to_columns : Table -> Text | Integer -> Text -> Integer | Nothing -> Probl
|
||||
split_to_columns table input_column_id delimiter="," column_count=Nothing on_problems=Report_Error =
|
||||
column = table.at input_column_id
|
||||
Value_Type.expect_text column <|
|
||||
fan_out_to_columns table input_column_id (handle_nothing (_.split delimiter)) column_count on_problems
|
||||
fan_out_to_columns table input_column_id (handle_nothing (_.split delimiter)) column_count on_problems=on_problems
|
||||
|
||||
## PRIVATE
|
||||
Splits a column of text into a set of new rows.
|
||||
@ -36,7 +32,7 @@ tokenize_to_columns : Table -> Text | Integer -> Text -> Case_Sensitivity -> Int
|
||||
tokenize_to_columns table input_column_id pattern case_sensitivity column_count on_problems =
|
||||
column = table.at input_column_id
|
||||
Value_Type.expect_text column
|
||||
fan_out_to_columns table input_column_id (handle_nothing (_.tokenize pattern case_sensitivity)) column_count on_problems
|
||||
fan_out_to_columns table input_column_id (handle_nothing (_.tokenize pattern case_sensitivity)) column_count on_problems=on_problems
|
||||
|
||||
## PRIVATE
|
||||
Tokenizes a column of text into a set of new rows using a regular
|
||||
@ -102,285 +98,13 @@ regex_to_column_names pattern original_column_name =
|
||||
case group_nums_to_names.get (i+1) of
|
||||
Nothing ->
|
||||
suffix = group_number_to_column_name_suffix.at (i+1)
|
||||
default_column_namer original_column_name suffix
|
||||
original_column_name + " " + (suffix+1).to_text
|
||||
name : Text ->
|
||||
name
|
||||
|
||||
## PRIVATE
|
||||
Transform a table by transforming a column into a set of columns. Takes a
|
||||
function that maps a single element of the input column to a vector of output
|
||||
values. The original column is replaced by the new columns.
|
||||
|
||||
Arguments:
|
||||
- table: The table to transform.
|
||||
- input_column: The column to transform.
|
||||
- function: A function that transforms a single element of `input_column`
|
||||
to multiple values.
|
||||
fan_out_to_columns : Table -> Text | Integer -> (Any -> Vector Any) -> Integer | Nothing -> Problem_Behavior -> Table | Nothing
|
||||
fan_out_to_columns table input_column_id function column_count=Nothing on_problems=Report_Error =
|
||||
input_column = table.get input_column_id
|
||||
problem_builder = Problem_Builder.new
|
||||
new_columns_unrenamed = map_columns_to_multiple input_column function column_count problem_builder
|
||||
new_columns = rename_new_columns table input_column.name new_columns_unrenamed problem_builder
|
||||
new_table = replace_column_with_columns table input_column new_columns
|
||||
problem_builder.attach_problems_after on_problems new_table
|
||||
|
||||
## PRIVATE
|
||||
Transform a column by applying the given function to the values in the
|
||||
column. The function produces multiple outputs, so each row is duplicated,
|
||||
with each row getting a distinct output value in place of the original
|
||||
input value. The other column values are just duplicated.
|
||||
|
||||
Arguments:
|
||||
- table: The table to transform.
|
||||
- input_column: The column to transform.
|
||||
- function: A function that transforms a single element of `input_column`
|
||||
to multiple values.
|
||||
fan_out_to_rows : Table -> Text | Integer -> (Any -> Vector Any) -> Boolean -> Problem_Behavior -> Table
|
||||
fan_out_to_rows table input_column_id function at_least_one_row=False on_problems=Report_Error =
|
||||
## Treat this as a special case of fan_out_to_rows_and_columns, with one
|
||||
column. Wrap the provided function to convert each value to a singleton
|
||||
`Vector`.
|
||||
wrapped_function x = function x . map y-> [y]
|
||||
column_names = [input_column_id]
|
||||
fan_out_to_rows_and_columns table input_column_id wrapped_function column_names at_least_one_row=at_least_one_row on_problems=on_problems
|
||||
|
||||
## PRIVATE
|
||||
Transform a column by applying the given function to the values in the
|
||||
column. The function returns a `Vector` of `Vectors`. Each inner vector turns
|
||||
into multiple new columns in a single row. Each inner vector within the outer
|
||||
vector produces an output row, so each row is duplicated, with each row
|
||||
getting a distinct set of output values in place of the original input value.
|
||||
The other column values are just duplicated.
|
||||
|
||||
! Error Conditions
|
||||
|
||||
The inner vectors should all have the same number of values, which should
|
||||
match the provided `column_names`. If a value is too short, it will be
|
||||
padded with Nothing, and if it is too long, it will be truncated. In either
|
||||
case, Column_Count_Mismatch will be added as a warning. (It is expected
|
||||
that the caller of this private method will ensure that the provided
|
||||
function will produce inner vectors of the correct length, but we check for
|
||||
it anyway.)
|
||||
|
||||
> Example
|
||||
f("12 34 56") -> [[1, 2], [3, 4], [5, 6]]
|
||||
|
||||
foo | bar | baz
|
||||
----+-----------+----
|
||||
x | 12 34 56 | y ===>
|
||||
... | ... | ...
|
||||
|
||||
foo | bar 1 | bar 2 | baz
|
||||
----+-------+-------+----
|
||||
x | 1 | 2 | y
|
||||
x | 3 | 4 | y
|
||||
x | 5 | 6 | y
|
||||
... | ... | ... | ...
|
||||
|
||||
Arguments:
|
||||
- table: The table to transform.
|
||||
- input_column: The column to transform.
|
||||
- function: A function that transforms a single element of `input_column`
|
||||
to a `Vector` of `Vector` of values.
|
||||
- column_names: The names for the generated columns.
|
||||
- on_problems: Specifies the behavior when a problem occurs.
|
||||
fan_out_to_rows_and_columns : Table -> Text | Integer -> (Any -> Vector (Vector Any)) -> Vector Text -> Boolean -> Problem_Behavior -> Table
|
||||
fan_out_to_rows_and_columns table input_column_id function column_names at_least_one_row=False on_problems=Report_Error =
|
||||
problem_builder = Problem_Builder.new
|
||||
unique = table.column_naming_helper.create_unique_name_strategy
|
||||
|
||||
input_column = table.at input_column_id
|
||||
input_storage = input_column.java_column.getStorage
|
||||
num_input_rows = input_storage.size
|
||||
|
||||
num_output_columns = column_names.length
|
||||
|
||||
# Guess that most of the time, we'll get at least one value for each input.
|
||||
initial_size = input_column.length
|
||||
# Accumulates the outputs of the function.
|
||||
output_column_builders = Vector.new num_output_columns _-> make_string_builder initial_size
|
||||
# Accumulates repeated position indices for the order mask.
|
||||
order_mask_positions = Vector.new_builder initial_size
|
||||
|
||||
maybe_add_empty_row vecs =
|
||||
should_add_empty_row = vecs.is_empty && at_least_one_row
|
||||
if should_add_empty_row.not then vecs else
|
||||
empty_row = Vector.fill num_output_columns Nothing
|
||||
[empty_row]
|
||||
|
||||
0.up_to num_input_rows . each i->
|
||||
input_value = input_storage.getItemBoxed i
|
||||
output_values = function input_value |> maybe_add_empty_row
|
||||
# Append each group of values to the builder.
|
||||
output_values.each row_unchecked->
|
||||
row = uniform_length num_output_columns row_unchecked problem_builder
|
||||
row.each_with_index i-> v-> output_column_builders.at i . append v
|
||||
# Append n copies of the input row position, n = # of output values.
|
||||
repeat_each output_values.length <| order_mask_positions.append i
|
||||
|
||||
# Reserve the non-input column names that will not be changing.
|
||||
non_input_columns = table.columns.filter c-> c.name != input_column.name
|
||||
unique.mark_used <| non_input_columns.map .name
|
||||
|
||||
# Build the output column
|
||||
output_storages = output_column_builders.map .seal
|
||||
output_columns = output_storages.map_with_index i-> output_storage->
|
||||
column_name = unique.make_unique <| column_names.at i
|
||||
Column.from_storage column_name output_storage
|
||||
|
||||
# Build the order mask.
|
||||
order_mask = OrderMask.new (order_mask_positions.to_vector)
|
||||
|
||||
## Build the new table, replacing the input column with the new output
|
||||
columns.
|
||||
new_columns_unflattened = table.columns.map column->
|
||||
case column.name == input_column_id of
|
||||
True ->
|
||||
# Replace the input column with the output columns.
|
||||
output_columns
|
||||
False ->
|
||||
# Build a new column from the old one with the mask
|
||||
old_storage = column.java_column.getStorage
|
||||
new_storage = old_storage.applyMask order_mask
|
||||
[Column.from_storage column.name new_storage]
|
||||
new_columns = new_columns_unflattened.flatten
|
||||
|
||||
new_table = Table.new new_columns
|
||||
problem_builder.attach_problems_after on_problems new_table
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Map a multi-valued function over a column and return the results as set of
|
||||
output columns.
|
||||
|
||||
Returns a Pair of a Vector of Columns and a Vector of problems.
|
||||
|
||||
Arguments:
|
||||
- input_column: The column to transform.
|
||||
- function: A function that transforms a single element of `input_column`
|
||||
to multiple values.
|
||||
- column_count: The number of columns to split to.
|
||||
If `Nothing` then columns will be added to fit all data.
|
||||
If the data exceeds the `column_count`, a `Column_Count_Exceeded` error
|
||||
will follow the `on_problems` behavior.
|
||||
- on_problems: Specifies the behavior when a problem occurs.
|
||||
map_columns_to_multiple : Column -> (Any -> Vector Any) -> Integer | Nothing -> Problem_Builder -> Vector Column
|
||||
map_columns_to_multiple input_column function column_count problem_builder =
|
||||
num_rows = input_column.length
|
||||
input_storage = input_column.java_column.getStorage
|
||||
|
||||
builders = case column_count of
|
||||
Nothing ->
|
||||
builders = Vector.new_builder
|
||||
|
||||
0.up_to num_rows . each i->
|
||||
input_value = input_storage.getItemBoxed i
|
||||
output_values = function input_value
|
||||
|
||||
# Add more builders if necessary to accommodate `output_values`.
|
||||
if output_values.length > builders.length then
|
||||
num_builders_needed = output_values.length - builders.length
|
||||
repeat_each num_builders_needed <|
|
||||
builder = make_string_builder num_rows
|
||||
|
||||
# Pad the new builder with nulls
|
||||
num_nulls_needed = i
|
||||
builder.appendNulls num_nulls_needed
|
||||
|
||||
builders.append builder
|
||||
|
||||
## Add `output_values` to builders; if there are more builders
|
||||
than `output_values`, pad with null.
|
||||
0.up_to builders.length . each i->
|
||||
builders.at i . appendNoGrow (output_values.get i Nothing)
|
||||
|
||||
builders.to_vector
|
||||
|
||||
_ : Integer ->
|
||||
builders = Vector.new column_count (_-> make_string_builder num_rows)
|
||||
|
||||
output_lengths = 0.up_to num_rows . map i->
|
||||
input_value = input_storage.getItemBoxed i
|
||||
output_values = function input_value
|
||||
|
||||
## Add `output_values` to builders; if there are more builders
|
||||
than `output_values`, pad with null.
|
||||
0.up_to builders.length . each i->
|
||||
builders.at i . appendNoGrow (output_values.get i Nothing)
|
||||
|
||||
output_values.length
|
||||
|
||||
max_output_length = maximum output_lengths
|
||||
|
||||
if max_output_length > column_count then
|
||||
problem = Column_Count_Exceeded.Error column_count max_output_length
|
||||
problem_builder.report_other_warning problem
|
||||
|
||||
builders
|
||||
|
||||
# Name columns. If there's only one, use the original column name.
|
||||
new_column_names = case builders.length of
|
||||
1 -> [input_column.name]
|
||||
_ -> 0.up_to builders.length . map i-> default_column_namer input_column.name i
|
||||
|
||||
# Build Columns.
|
||||
storages = builders.map .seal
|
||||
new_column_names.zip storages Column.from_storage
|
||||
|
||||
## PRIVATE
|
||||
Rename a vector of columns to be unique when added to a table.
|
||||
rename_new_columns : Table -> Text -> Vector Column -> Problem_Builder -> Vector Column
|
||||
rename_new_columns table removed_column_name columns problem_builder =
|
||||
unique = table.column_naming_helper.create_unique_name_strategy
|
||||
remaining_columns = table.columns . filter (c-> c.name != removed_column_name) . map .name
|
||||
unique.mark_used remaining_columns
|
||||
new_columns = columns.map column->
|
||||
new_name = unique.make_unique column.name
|
||||
column.rename new_name
|
||||
problem_builder.report_unique_name_strategy unique
|
||||
new_columns
|
||||
|
||||
## PRIVATE
|
||||
Replace a single column in a table with new columns.
|
||||
Does not ensure names are unique; that must be done before calling this.
|
||||
replace_column_with_columns : Table -> Column -> Vector Column -> Table
|
||||
replace_column_with_columns table old_column new_columns =
|
||||
Table.new ((table.columns.map (c-> if c.name == old_column.name then new_columns else [c])).flatten)
|
||||
|
||||
## PRIVATE
|
||||
Return the maximum value of the vector.
|
||||
Throws Empty_Error if the vector is empty.
|
||||
maximum : Vector Any -> Vector Any
|
||||
maximum vec = if vec.is_empty then Nothing else
|
||||
vec.reduce (a-> b-> a.max b)
|
||||
|
||||
## PRIVATE
|
||||
Wrap a function so that it returns [] if passed Nothing.
|
||||
handle_nothing : (Any -> Any) -> (Any -> Any)
|
||||
handle_nothing function = x-> case x of
|
||||
_ : Nothing -> []
|
||||
_ -> function x
|
||||
|
||||
## PRIVATE
|
||||
Repeat a computation n times.
|
||||
repeat_each : Integer -> Any -> Any
|
||||
repeat_each n ~action = 0.up_to n . each _-> action
|
||||
|
||||
## PRIVATE
|
||||
Name a column by appending an integer to a base column name.
|
||||
default_column_namer : Text -> Integer -> Text
|
||||
default_column_namer base_name i = base_name + " " + (i+1).to_text
|
||||
|
||||
## PRIVATE
|
||||
Pad or truncate a vector to be a specified length; if altered, report
|
||||
it as a Column_Count_Mismatch warning.
|
||||
uniform_length : Integer -> Vector Any -> Problem_Builder -> Vector Any
|
||||
uniform_length target_length v problem_builder = if v.length == target_length then v else
|
||||
problem = Column_Count_Mismatch.Error target_length v.length
|
||||
problem_builder.report_other_warning problem
|
||||
case v.length < target_length of
|
||||
# Pad.
|
||||
True -> v.pad target_length Nothing
|
||||
# Truncate.
|
||||
False -> v.take target_length
|
||||
|
@ -155,7 +155,13 @@ public class InferredBuilder extends Builder {
|
||||
}
|
||||
|
||||
private void retypeToMixed() {
|
||||
ObjectBuilder objectBuilder = new MixedBuilder(initialSize);
|
||||
// The new internal builder must be at least `currentSize` so it can store
|
||||
// all the current values. It must also be at least 'initialSize' since the
|
||||
// caller might be using appendNoGrow and is expecting to write at least
|
||||
// that many values.
|
||||
int capacity = Math.max(initialSize, currentSize);
|
||||
|
||||
ObjectBuilder objectBuilder = new MixedBuilder(capacity);
|
||||
currentBuilder.retypeToMixed(objectBuilder.getData());
|
||||
objectBuilder.setCurrentSize(currentBuilder.getCurrentSize());
|
||||
objectBuilder.setPreExistingProblems(currentBuilder.getProblems());
|
||||
|
@ -553,6 +553,12 @@ spec setup =
|
||||
table = table_builder [["aaa", [1, 2]], ["bbb", [3, 4]], ["ccc", [5, 6]]]
|
||||
table.expand_column "bbb" . should_fail_with Unsupported_Database_Operation
|
||||
|
||||
# The in-memory functionality of `expand_to_rows` is tested in test/Table_Tests/src/In_Memory/Table_Conversion_Spec.enso
|
||||
if setup.is_database then Test.group prefix+"Table.expand_to_rows" <|
|
||||
Test.specify "should report unsupported" <|
|
||||
table = table_builder [["aaa", [1, 2]], ["bbb", [3, 4]], ["ccc", [5, 6]]]
|
||||
table.expand_to_rows "bbb" . should_fail_with Unsupported_Database_Operation
|
||||
|
||||
if setup.is_database.not then Test.group prefix+"Table/Column auto value type" <|
|
||||
Test.specify "should allow to narrow down types of a Mixed column" <|
|
||||
[True, False].each shrink_types->
|
||||
|
@ -30,3 +30,11 @@ spec = Test.group "[In-Memory] Storage Builders" <|
|
||||
storage = builder.seal
|
||||
column = Column.from_storage "X" storage
|
||||
column.to_vector . should_equal vector
|
||||
|
||||
Test.specify "Inferred Builder should correctly resize when retyping to a mixed column, with an underestimated initial size" <|
|
||||
mixed_values = [10, 11, 22, 23, 24, 25, '2020-02-28']
|
||||
builder = make_inferred_builder 3
|
||||
mixed_values.map v-> builder.append v
|
||||
storage = builder.seal
|
||||
column = Column.from_storage "X" storage
|
||||
column.to_vector . should_equal mixed_values
|
||||
|
29
test/Table_Tests/src/In_Memory/Fan_Out_Spec.enso
Normal file
29
test/Table_Tests/src/In_Memory/Fan_Out_Spec.enso
Normal file
@ -0,0 +1,29 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from project.Util import all
|
||||
|
||||
import Standard.Table.Internal.Fan_Out
|
||||
from Standard.Table import Table
|
||||
|
||||
import Standard.Test.Extensions
|
||||
from Standard.Test import Test, Test_Suite, Problems
|
||||
|
||||
spec =
|
||||
Test.group "Fan_Out" <|
|
||||
Test.specify "can do fan_out_to_columns " <|
|
||||
cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]]
|
||||
t = Table.new cols
|
||||
expected_rows = [[0, "a", "c", Nothing], [1, "c", "d", "ef"], [2, "gh", "ij", "u"]]
|
||||
expected = Table.from_rows ["foo", "bar 1", "bar 2", "bar 3"] expected_rows
|
||||
t2 = Fan_Out.fan_out_to_columns t "bar" (_.split "b")
|
||||
t2.should_equal expected
|
||||
|
||||
Test.specify "can do fan_out_to_rows" <|
|
||||
cols = [["foo", [0, 1, 2]], ["bar", ["abc", "cbdbef", "ghbijbu"]]]
|
||||
t = Table.new cols
|
||||
expected_rows = [[0, "a"], [0, "c"], [1, "c"], [1, "d"], [1, "ef"], [2, "gh"], [2, "ij"], [2, "u"]]
|
||||
expected = Table.from_rows ["foo", "bar"] expected_rows
|
||||
t2 = Fan_Out.fan_out_to_rows t "bar" (_.split "b")
|
||||
t2.should_equal expected
|
||||
|
||||
main = Test_Suite.run_main spec
|
@ -2,8 +2,10 @@ from Standard.Base import all
|
||||
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
from Standard.Table.Extensions.Table_Conversions import all
|
||||
import Standard.Table.Data.Type.Value_Type.Value_Type
|
||||
from Standard.Table import Table, Column
|
||||
from Standard.Table.Errors import No_Such_Column
|
||||
from Standard.Table.Extensions.Table_Conversions import all
|
||||
|
||||
from Standard.Test import Test, Test_Suite, Problems
|
||||
import Standard.Test.Extensions
|
||||
@ -202,4 +204,76 @@ spec =
|
||||
expected = Table.new [["aaa", [1, 2]], ["expanded last", ["Smith", Nothing]], ["expanded height", [Nothing, 1.9]], ["expanded foo", [Nothing, Nothing]], ["ccc", [5, 6]]]
|
||||
table.expand_column "bbb" ["last", "height", "foo"] "expanded " . should_equal expected
|
||||
|
||||
Test.group "expand_to_rows" <|
|
||||
Test.specify "Can expand single values" <|
|
||||
values_to_expand = [3, 4]
|
||||
table = Table.new [["aaa", [1, 2]], ["bbb", values_to_expand], ["ccc", [5, 6]]]
|
||||
expected = Table.new [["aaa", [1, 2]], ["bbb", [3, 4]], ["ccc", [5, 6]]]
|
||||
table.expand_to_rows "bbb" . should_equal expected
|
||||
|
||||
Test.specify "Can expand Vectors" <|
|
||||
values_to_expand = [[10, 11], [20, 21, 22], [30]]
|
||||
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||
expected = Table.new [["aaa", [1, 1, 2, 2, 2, 3]], ["bbb", [10, 11, 20, 21, 22, 30]], ["ccc", [5, 5, 6, 6, 6, 7]]]
|
||||
r = table.expand_to_rows "bbb"
|
||||
r . should_equal expected
|
||||
r.at "bbb" . value_type . should_equal Value_Type.Integer
|
||||
|
||||
Test.specify "Can expand Arrays" <|
|
||||
values_to_expand = [[10, 11].to_array, [20, 21, 22].to_array, [30].to_array]
|
||||
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||
expected = Table.new [["aaa", [1, 1, 2, 2, 2, 3]], ["bbb", [10, 11, 20, 21, 22, 30]], ["ccc", [5, 5, 6, 6, 6, 7]]]
|
||||
table.expand_to_rows "bbb" . should_equal expected
|
||||
|
||||
Test.specify "Can expand Lists" <|
|
||||
values_to_expand = [[10, 11].to_list, [20, 21, 22].to_list, [30].to_list]
|
||||
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||
expected = Table.new [["aaa", [1, 1, 2, 2, 2, 3]], ["bbb", [10, 11, 20, 21, 22, 30]], ["ccc", [5, 5, 6, 6, 6, 7]]]
|
||||
table.expand_to_rows "bbb" . should_equal expected
|
||||
|
||||
Test.specify "Can expand Pairs" <|
|
||||
values_to_expand = [Pair.new 10 20, Pair.new "a" [30], Pair.new 40 50]
|
||||
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||
expected = Table.new [["aaa", [1, 1, 2, 2, 3, 3]], ["bbb", [10, 20, "a", [30], 40, 50]], ["ccc", [5, 5, 6, 6, 7, 7]]]
|
||||
table.expand_to_rows "bbb" . should_equal expected
|
||||
|
||||
Test.specify "Can expand Ranges" <|
|
||||
values_to_expand = [Range.new 10 12, Range.new 20 27 step=3, Range.new 30 31]
|
||||
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||
expected = Table.new [["aaa", [1, 1, 2, 2, 2, 3]], ["bbb", [10, 11, 20, 23, 26, 30]], ["ccc", [5, 5, 6, 6, 6, 7]]]
|
||||
table.expand_to_rows "bbb" . should_equal expected
|
||||
|
||||
Test.specify "Can expand Date_Ranges" <|
|
||||
range0 = (Date.new 2020 02 28).up_to (Date.new 2020 03 01)
|
||||
range1 = (Date.new 2020 10 28).up_to (Date.new 2020 11 16) . with_step Date_Period.Week
|
||||
range2 = (Date.new 2023 07 03).up_to (Date.new 2023 10 03) . with_step Date_Period.Month
|
||||
values_to_expand = [range0, range1, range2]
|
||||
values_expanded = [Date.new 2020 02 28, Date.new 2020 02 29] + [Date.new 2020 10 28, Date.new 2020 11 4, Date.new 2020 11 11, Date.new 2023 07 03, Date.new 2023 08 03] + [Date.new 2023 09 03]
|
||||
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||
expected = Table.new [["aaa", [1, 1, 2, 2, 2, 3, 3, 3]], ["bbb", values_expanded], ["ccc", [5, 5, 6, 6, 6, 7, 7, 7]]]
|
||||
table.expand_to_rows "bbb" . should_equal expected
|
||||
|
||||
Test.specify "Can expand mixed columns" <|
|
||||
values_to_expand = [[10, 11], 22.up_to 26, (Date.new 2020 02 28).up_to (Date.new 2020 03 01)]
|
||||
values_expanded = [10, 11, 22, 23, 24, 25, Date.new 2020 02 28, Date.new 2020 02 29]
|
||||
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||
expected = Table.new [["aaa", [1, 1, 2, 2, 2, 2, 3, 3]], ["bbb", values_expanded], ["ccc", [5, 5, 6, 6, 6, 6, 7, 7]]]
|
||||
table.expand_to_rows "bbb" . should_equal expected
|
||||
|
||||
Test.specify "Respects `at_least_one_row=True`" <|
|
||||
values_to_expand = [[10, 11], [], [30]]
|
||||
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||
expected = Table.new [["aaa", [1, 1, 2, 3]], ["bbb", [10, 11, Nothing, 30]], ["ccc", [5, 5, 6, 7]]]
|
||||
table.expand_to_rows "bbb" at_least_one_row=True . should_equal expected
|
||||
|
||||
Test.specify "Respects `at_least_one_row=False`" <|
|
||||
values_to_expand = [[10, 11], [], [30]]
|
||||
table = Table.new [["aaa", [1, 2, 3]], ["bbb", values_to_expand], ["ccc", [5, 6, 7]]]
|
||||
expected = Table.new [["aaa", [1, 1, 3]], ["bbb", [10, 11, 30]], ["ccc", [5, 5, 7]]]
|
||||
table.expand_to_rows "bbb" . should_equal expected
|
||||
|
||||
Test.specify "Missing column" <|
|
||||
table = Table.new [["aaa", [1, 2, 3]], ["notbbbb", [8, 8, 8]], ["ccc", [5, 6, 7]]]
|
||||
table.expand_to_rows "bbb" . should_fail_with No_Such_Column
|
||||
|
||||
main = Test_Suite.run_main spec
|
||||
|
Loading…
Reference in New Issue
Block a user