mirror of
https://github.com/enso-org/enso.git
synced 2024-11-22 22:10:15 +03:00
Added table.from_union and respective tests (#9343)
Table.from_union creates a new table when passed in a vector of tables. This is especially helpful when a grouped method is run multiple times, as it can create a unified result set.
This commit is contained in:
parent
bd92567a93
commit
f7295f3060
@ -631,6 +631,8 @@
|
||||
- [Implemented Excel Data Link][9346]
|
||||
- [Added Table.running][9346]
|
||||
- [Added Google_Analytics.Read][9239]
|
||||
- [Added `Table.from_union` to allow expanding a vector of tables in one
|
||||
step][9343]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -917,6 +919,7 @@
|
||||
[9346]: https://github.com/enso-org/enso/pull/9346
|
||||
[9382]: https://github.com/enso-org/enso/pull/9382
|
||||
[9239]: https://github.com/enso-org/enso/pull/9239
|
||||
[9343]: https://github.com/enso-org/enso/pull/9343
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
@ -2313,26 +2313,7 @@ type Table
|
||||
`Mixed` type may not be supported by most Database backends.
|
||||
union : (Table | Vector Table) -> Match_Columns -> Boolean | Report_Unmatched -> Boolean -> Problem_Behavior -> Table
|
||||
union self tables:(Table | Vector) match_columns=Match_Columns.By_Name keep_unmatched_columns=Report_Unmatched allow_type_widening=True on_problems=Report_Warning =
|
||||
all_tables = case tables of
|
||||
v : Vector -> [self] + (v.map t-> Table.from t)
|
||||
single_table -> [self, single_table]
|
||||
all_tables.if_not_error <|
|
||||
## We keep separate problem builders, because if we are reporting `No_Output_Columns`,
|
||||
we only want to add a cause coming from unification; matching reports problems that would not fit this error.
|
||||
problem_builder_for_matching = Problem_Builder.new
|
||||
problem_builder_for_unification = Problem_Builder.new
|
||||
matched_column_sets = Match_Columns_Helpers.match_columns all_tables match_columns keep_unmatched_columns problem_builder_for_matching
|
||||
result_row_count = all_tables.fold 0 c-> t-> c + t.row_count
|
||||
merged_columns = matched_column_sets.map column_set->
|
||||
case Table_Helpers.unify_result_type_for_union column_set all_tables allow_type_widening problem_builder_for_unification of
|
||||
Nothing -> Nothing
|
||||
result_type : Value_Type ->
|
||||
concat_columns column_set all_tables result_type result_row_count on_problems
|
||||
good_columns = merged_columns.filter Filter_Condition.Not_Nothing
|
||||
problem_builder_for_matching.attach_problems_before on_problems <|
|
||||
problem_builder_for_unification.attach_problems_before on_problems <|
|
||||
if good_columns.is_empty then problem_builder_for_unification.raise_no_output_columns_with_cause else
|
||||
Table.new good_columns
|
||||
Table.from_union ([self] + Vector.unify_vector_or_element tables) match_columns keep_unmatched_columns allow_type_widening on_problems
|
||||
|
||||
## ALIAS drop_missing_rows, dropna
|
||||
GROUP Standard.Base.Selections
|
||||
@ -2917,6 +2898,116 @@ type Table
|
||||
column_naming_helper : Column_Naming_Helper
|
||||
column_naming_helper self = Column_Naming_Helper.in_memory
|
||||
|
||||
## ALIAS from
|
||||
GROUP Standard.Base.Calculations
|
||||
ICON dataframes_union
|
||||
Appends records from the vector of tables into a single table.
|
||||
|
||||
Arguments:
|
||||
- tables: A vector of tables to union together. The
|
||||
tables are concatenated in the order they are specified.
|
||||
- match_columns: Specifies how to match the columns.
|
||||
- If `Match_Columns.By_Name` - the columns are matched by name across
|
||||
all provided tables.
|
||||
If unmatched columns are to be dropped, the resulting table will keep
|
||||
only the set of columns that appear in all provided tables, in the
|
||||
relative order that they appeared in the `self` table.
|
||||
If unmatched columns are kept, they are added in the order of
|
||||
appearance - i.e. first all columns from `self` will be added in the
|
||||
original order, then any columns from the second table that were not
|
||||
matched will be added at the end (preserving their relative order),
|
||||
and so on for all the remaining tables.
|
||||
- If `Match_Columns.By_Position` - the columns are mapped by position.
|
||||
If unmatched columns are to be dropped, the resulting table will have
|
||||
as many columns as the table that had the least columns and the
|
||||
column names of the first table (self) will be used.
|
||||
If unmatched columns are kept, the resulting table will have as many
|
||||
columns as the table with the most columns. Since the first table may
|
||||
not have all the necessary columns to provide column names for the
|
||||
result, the result will have column names taken from the first table
|
||||
that has the biggest number of columns.
|
||||
- keep_unmatched_columns: If set to `True`, unmatched columns are kept
|
||||
and are padded with `Nothing` for tables that did not have them.
|
||||
If set to `False`, only the common subset of columns is kept - any
|
||||
column that is not present in all tables is dropped. Defaults to
|
||||
`Report_Unmatched`, which behaves like `True` - unmatched columns are
|
||||
kept and padded with `Nothing`, but a problem is reported.
|
||||
- allow_type_widening: Specifies if the resulting column type should be
|
||||
adjusted to fit columns from all arguments. If `True`, a common type
|
||||
will be chosen for each column (see "Unifying Column Types" below).
|
||||
If `False`, the resulting column type will be the same as in the first
|
||||
table containing the column. In this case, all columns that are
|
||||
concatenated must have the same type as the first one (unless this
|
||||
had a `Mixed` type - in which case it will accept any other types).
|
||||
- on_problems: Specifies how to handle problems if they occur, reporting
|
||||
them as warnings by default.
|
||||
|
||||
- If `keep_unmatched_columns` is set to `Report_Unmatched` (the
|
||||
default):
|
||||
- If matching by name and there are columns that are not present in
|
||||
all tables, `Unmatched_Columns` is reported.
|
||||
- If matching by position and column counts of the merged tables
|
||||
differ, then a `Column_Count_Mismatch` is reported. The error will
|
||||
contain the greatest column count as its `expected` value and the
|
||||
smallest one as its `actual` value.
|
||||
- If `keep_unmatched_columns` is set to `False` and matching by name,
|
||||
it is possible that there are no columns that are common to all
|
||||
provided tables, in that case `No_Output_Columns` is thrown as a
|
||||
dataflow error regardless of the `on_problems` setting, because there
|
||||
are no columns to include in the resulting table.
|
||||
- If type widening is disabled and one of corresponding columns has a
|
||||
type that is incompatible with the type coming from the first table,
|
||||
a `Column_Type_Mismatch` is reported. The problematic column will be
|
||||
dropped from the resulting table. With type widening disabled, the
|
||||
subsequent tables must have the same types as the first one, unless
|
||||
the type of the first one was `Mixed` which will accept any other
|
||||
type.
|
||||
- If a common type coercion for a set of matched columns from
|
||||
concatenated tables cannot be found, a `No_Common_Type` is reported.
|
||||
In warning or ignore mode, the problematic column will be dropped
|
||||
from the resulting table.
|
||||
|
||||
? Unifying Column Types
|
||||
|
||||
If `allow_type_widening` is set to `True`, then the following rules are
|
||||
used to find a common type that will fit values from all merged tables.
|
||||
|
||||
Numeric columns are unified by finding the most general type that can
|
||||
fit all of the columns. The biggest integer type will be chosen and if
|
||||
integers and decimals are mixed, the decimal type will be chosen.
|
||||
If boolean columns are mixed with numeric columns, they will be coerced
|
||||
to the numeric type (and converted to 0 and 1).
|
||||
|
||||
Text types will also be coerced according to the common rules - if
|
||||
constant-length texts of different lengths are mixed, they will be
|
||||
coerced to a varying-length type.
|
||||
|
||||
If one of the matched columns has `Mixed` type, that type will be used
|
||||
regardless of types of other columns. Mixing any other types will
|
||||
result in a `No_Common_Type` problem. If columns of incompatible types
|
||||
are meant to be mixed, at least one of them should be explicitly
|
||||
retyped to the `Mixed` type to indicate that intention. Note that the
|
||||
`Mixed` type may not be supported by most Database backends.
|
||||
from_union : (Vector Table) -> Match_Columns -> Boolean | Report_Unmatched -> Boolean -> Problem_Behavior -> Table
|
||||
from_union tables:(Vector) match_columns=Match_Columns.By_Name keep_unmatched_columns=Report_Unmatched allow_type_widening=True on_problems=Report_Warning =
|
||||
all_tables = (tables.map t-> Table.from t)
|
||||
all_tables.if_not_error <|
|
||||
## We keep separate problem builders, because if we are reporting `No_Output_Columns`,
|
||||
we only want to add a cause coming from unification; matching reports problems that would not fit this error.
|
||||
problem_builder_for_matching = Problem_Builder.new
|
||||
problem_builder_for_unification = Problem_Builder.new
|
||||
matched_column_sets = Match_Columns_Helpers.match_columns all_tables match_columns keep_unmatched_columns problem_builder_for_matching
|
||||
result_row_count = all_tables.fold 0 c-> t-> c + t.row_count
|
||||
merged_columns = matched_column_sets.map column_set->
|
||||
case Table_Helpers.unify_result_type_for_union column_set all_tables allow_type_widening problem_builder_for_unification of
|
||||
Nothing -> Nothing
|
||||
result_type : Value_Type ->
|
||||
concat_columns column_set all_tables result_type result_row_count on_problems
|
||||
good_columns = merged_columns.filter Filter_Condition.Not_Nothing
|
||||
problem_builder_for_matching.attach_problems_before on_problems <|
|
||||
problem_builder_for_unification.attach_problems_before on_problems <|
|
||||
if good_columns.is_empty then problem_builder_for_unification.raise_no_output_columns_with_cause else
|
||||
Table.new good_columns
|
||||
## PRIVATE
|
||||
|
||||
Ensures that the `txt` has at least `len` characters by appending spaces at
|
||||
|
@ -24,408 +24,419 @@ type Data
|
||||
Data.Value (create_connection_fn Nothing)
|
||||
|
||||
teardown self = self.connection.close
|
||||
|
||||
# the ... operator used in the calls for Table.from_union and first.union "freezes" the default arguments so that they can be specified later, allowing us to run the full suite of tests
|
||||
call_static_union tables =
|
||||
Table.from_union tables ...
|
||||
call_member_union tables =
|
||||
first = tables.first
|
||||
rest = tables.drop 1
|
||||
first.union rest ...
|
||||
|
||||
add_specs suite_builder setup =
|
||||
prefix = setup.prefix
|
||||
create_connection_fn = setup.create_connection_func
|
||||
|
||||
suite_builder.group prefix+"Table.union" group_builder->
|
||||
data = Data.setup create_connection_fn
|
||||
|
||||
group_builder.teardown <|
|
||||
data.teardown
|
||||
run_union_tests group_builder setup call_member_union
|
||||
db_pending = if setup.is_database then "Table.from_union is currently not implemented for the Database backend."
|
||||
suite_builder.group prefix+"Table.from_union" pending=db_pending group_builder->
|
||||
run_union_tests group_builder setup call_static_union
|
||||
|
||||
table_builder cols =
|
||||
setup.table_builder cols connection=data.connection
|
||||
run_union_tests group_builder setup call_union =
|
||||
create_connection_fn = setup.create_connection_func
|
||||
data = Data.setup create_connection_fn
|
||||
group_builder.teardown <|
|
||||
data.teardown
|
||||
table_builder cols =
|
||||
setup.table_builder cols connection=data.connection
|
||||
group_builder.specify "should merge columns from multiple tables" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]], ["C", [True, False, True]]]
|
||||
t2 = table_builder [["A", [4, 5, 6]], ["B", ["d", "e", "f"]], ["C", [False, True, False]]]
|
||||
t3 = table_builder [["A", [7, 8, 9]], ["B", ["g", "h", "i"]], ["C", [True, False, False]]]
|
||||
|
||||
group_builder.specify "should merge columns from multiple tables" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]], ["C", [True, False, True]]]
|
||||
t2 = table_builder [["A", [4, 5, 6]], ["B", ["d", "e", "f"]], ["C", [False, True, False]]]
|
||||
t3 = table_builder [["A", [7, 8, 9]], ["B", ["g", "h", "i"]], ["C", [True, False, False]]]
|
||||
t4 = call_union [t1, t2]
|
||||
expect_column_names ["A", "B", "C"] t4
|
||||
t4.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6]
|
||||
t4.at "B" . to_vector . should_equal ["a", "b", "c", "d", "e", "f"]
|
||||
t4.at "C" . to_vector . should_equal [True, False, True, False, True, False]
|
||||
|
||||
t4 = t1.union t2
|
||||
expect_column_names ["A", "B", "C"] t4
|
||||
t4.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6]
|
||||
t4.at "B" . to_vector . should_equal ["a", "b", "c", "d", "e", "f"]
|
||||
t4.at "C" . to_vector . should_equal [True, False, True, False, True, False]
|
||||
t5 = call_union [t3, t1, t2]
|
||||
expect_column_names ["A", "B", "C"] t5
|
||||
t5.at "A" . to_vector . should_equal [7, 8, 9, 1, 2, 3, 4, 5, 6]
|
||||
t5.at "B" . to_vector . should_equal ["g", "h", "i", "a", "b", "c", "d", "e", "f"]
|
||||
t5.at "C" . to_vector . should_equal [True, False, False, True, False, True, False, True, False]
|
||||
|
||||
t5 = t3.union [t1, t2]
|
||||
expect_column_names ["A", "B", "C"] t5
|
||||
t5.at "A" . to_vector . should_equal [7, 8, 9, 1, 2, 3, 4, 5, 6]
|
||||
t5.at "B" . to_vector . should_equal ["g", "h", "i", "a", "b", "c", "d", "e", "f"]
|
||||
t5.at "C" . to_vector . should_equal [True, False, False, True, False, True, False, True, False]
|
||||
group_builder.specify "should fill unmatched columns (by name matching) with nulls and report a warning by default" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["C", ["d", "e", "f"]], ["A", [4, 5, 6]]]
|
||||
t3 = table_builder [["D", [Nothing, Nothing, 0]], ["C", ["g", "h", "i"]]]
|
||||
|
||||
group_builder.specify "should fill unmatched columns (by name matching) with nulls and report a warning by default" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["C", ["d", "e", "f"]], ["A", [4, 5, 6]]]
|
||||
t3 = table_builder [["D", [Nothing, Nothing, 0]], ["C", ["g", "h", "i"]]]
|
||||
action = call_union [t1, t2, t3] on_problems=_
|
||||
tester table =
|
||||
expect_column_names ["A", "B", "C", "D"] table
|
||||
table.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6, Nothing, Nothing, Nothing]
|
||||
table.at "B" . to_vector . should_equal ["a", "b", "c", Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
|
||||
table.at "C" . to_vector . should_equal [Nothing, Nothing, Nothing, "d", "e", "f", "g", "h", "i"]
|
||||
table.at "D" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, 0]
|
||||
problems = [Unmatched_Columns.Error ["A", "B", "C", "D"]]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
action = t1.union [t2, t3] on_problems=_
|
||||
tester table =
|
||||
expect_column_names ["A", "B", "C", "D"] table
|
||||
table.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6, Nothing, Nothing, Nothing]
|
||||
table.at "B" . to_vector . should_equal ["a", "b", "c", Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
|
||||
table.at "C" . to_vector . should_equal [Nothing, Nothing, Nothing, "d", "e", "f", "g", "h", "i"]
|
||||
table.at "D" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, 0]
|
||||
problems = [Unmatched_Columns.Error ["A", "B", "C", "D"]]
|
||||
Problems.test_problem_handling action problems tester
|
||||
action2 = call_union [t2, t3] on_problems=_
|
||||
tester2 table =
|
||||
expect_column_names ["C", "A", "D"] table
|
||||
table.at "C" . to_vector . should_equal ["d", "e", "f", "g", "h", "i"]
|
||||
table.at "A" . to_vector . should_equal [4, 5, 6, Nothing, Nothing, Nothing]
|
||||
table.at "D" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, 0]
|
||||
problems2 = [Unmatched_Columns.Error ["A", "D"]]
|
||||
Problems.test_problem_handling action2 problems2 tester2
|
||||
|
||||
action2 = t2.union t3 on_problems=_
|
||||
tester2 table =
|
||||
expect_column_names ["C", "A", "D"] table
|
||||
table.at "C" . to_vector . should_equal ["d", "e", "f", "g", "h", "i"]
|
||||
table.at "A" . to_vector . should_equal [4, 5, 6, Nothing, Nothing, Nothing]
|
||||
table.at "D" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, 0]
|
||||
problems2 = [Unmatched_Columns.Error ["A", "D"]]
|
||||
Problems.test_problem_handling action2 problems2 tester2
|
||||
group_builder.specify "should drop unmatched columns if asked to" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["C", ["d", "e", "f"]], ["A", [4, 5, 6]]]
|
||||
t3 = table_builder [["A", [Nothing, Nothing, 0]], ["C", ["g", "h", "i"]]]
|
||||
|
||||
group_builder.specify "should drop unmatched columns if asked to" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["C", ["d", "e", "f"]], ["A", [4, 5, 6]]]
|
||||
t3 = table_builder [["A", [Nothing, Nothing, 0]], ["C", ["g", "h", "i"]]]
|
||||
t4 = call_union[t1, t2, t3] keep_unmatched_columns=False on_problems=Problem_Behavior.Report_Error
|
||||
Problems.assume_no_problems t4
|
||||
expect_column_names ["A"] t4
|
||||
t4.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6, Nothing, Nothing, 0]
|
||||
|
||||
t4 = t1.union [t2, t3] keep_unmatched_columns=False on_problems=Problem_Behavior.Report_Error
|
||||
Problems.assume_no_problems t4
|
||||
expect_column_names ["A"] t4
|
||||
t4.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6, Nothing, Nothing, 0]
|
||||
group_builder.specify "should keep unmatched columns without errors if asked to" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["C", ["d", "e", "f"]], ["A", [4, 5, 6]]]
|
||||
t3 = table_builder [["A", [Nothing, Nothing, 0]], ["C", ["g", "h", "i"]]]
|
||||
|
||||
group_builder.specify "should keep unmatched columns without errors if asked to" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["C", ["d", "e", "f"]], ["A", [4, 5, 6]]]
|
||||
t3 = table_builder [["A", [Nothing, Nothing, 0]], ["C", ["g", "h", "i"]]]
|
||||
t4 = call_union [t1, t2, t3] keep_unmatched_columns=True on_problems=Problem_Behavior.Report_Error
|
||||
Problems.assume_no_problems t4
|
||||
expect_column_names ["A", "B", "C"] t4
|
||||
t4.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6, Nothing, Nothing, 0]
|
||||
t4.at "B" . to_vector . should_equal ["a", "b", "c", Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
|
||||
t4.at "C" . to_vector . should_equal [Nothing, Nothing, Nothing, "d", "e", "f", "g", "h", "i"]
|
||||
|
||||
t4 = t1.union [t2, t3] keep_unmatched_columns=True on_problems=Problem_Behavior.Report_Error
|
||||
Problems.assume_no_problems t4
|
||||
expect_column_names ["A", "B", "C"] t4
|
||||
t4.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6, Nothing, Nothing, 0]
|
||||
t4.at "B" . to_vector . should_equal ["a", "b", "c", Nothing, Nothing, Nothing, Nothing, Nothing, Nothing]
|
||||
t4.at "C" . to_vector . should_equal [Nothing, Nothing, Nothing, "d", "e", "f", "g", "h", "i"]
|
||||
group_builder.specify "should fail if asked to drop unmatched columns but the set of common columns is empty" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["C", ["d", "e", "f"]], ["A", [4, 5, 6]]]
|
||||
t3 = table_builder [["D", [Nothing, Nothing, 0]], ["C", ["g", "h", "i"]]]
|
||||
|
||||
group_builder.specify "should fail if asked to drop unmatched columns but the set of common columns is empty" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["C", ["d", "e", "f"]], ["A", [4, 5, 6]]]
|
||||
t3 = table_builder [["D", [Nothing, Nothing, 0]], ["C", ["g", "h", "i"]]]
|
||||
t4 = call_union [t1, t2, t3] keep_unmatched_columns=False on_problems=Problem_Behavior.Ignore
|
||||
t4.should_fail_with No_Output_Columns
|
||||
t4.catch.to_display_text . should_equal "No columns in the result, because of another problem: Unmatched columns are set to be dropped, but no common column names were found."
|
||||
|
||||
t4 = t1.union [t2, t3] keep_unmatched_columns=False on_problems=Problem_Behavior.Ignore
|
||||
t4.should_fail_with No_Output_Columns
|
||||
t4.catch.to_display_text . should_equal "No columns in the result, because of another problem: Unmatched columns are set to be dropped, but no common column names were found."
|
||||
group_builder.specify "should ignore column names when matching by position" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["Y", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["X", [4, 5, 6]], ["A", ["d", "e", "f"]]]
|
||||
|
||||
group_builder.specify "should ignore column names when matching by position" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["Y", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["X", [4, 5, 6]], ["A", ["d", "e", "f"]]]
|
||||
t3 = call_union [t1, t2] match_columns=Match_Columns.By_Position
|
||||
expect_column_names ["A", "Y"] t3
|
||||
t3.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6]
|
||||
t3.at "Y" . to_vector . should_equal ["a", "b", "c", "d", "e", "f"]
|
||||
|
||||
t3 = t1.union t2 match_columns=Match_Columns.By_Position
|
||||
expect_column_names ["A", "Y"] t3
|
||||
group_builder.specify "should fill extra columns (positional matching) with nulls and report a warning by default" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["A1", [4, 5, 6]], ["B1", ["d", "e", "f"]], ["C", [7, 8, 9]]]
|
||||
t3 = table_builder [["A2", [10, 20, 30]]]
|
||||
|
||||
action = call_union [t1, t2, t3] match_columns=Match_Columns.By_Position on_problems=_
|
||||
tester table =
|
||||
expect_column_names ["A1", "B1", "C"] table
|
||||
table.at "A1" . to_vector . should_equal [1, 2, 3, 4, 5, 6, 10, 20, 30]
|
||||
table.at "B1" . to_vector . should_equal ["a", "b", "c", "d", "e", "f", Nothing, Nothing, Nothing]
|
||||
table.at "C" . to_vector . should_equal [Nothing, Nothing, Nothing, 7, 8, 9, Nothing, Nothing, Nothing]
|
||||
problems = [Column_Count_Mismatch.Error 3 1]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
group_builder.specify "should keep the least number of columns with positional matching if asked to drop unmatched ones" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["A1", [4, 5, 6]], ["B1", ["d", "e", "f"]], ["C", [7, 8, 9]]]
|
||||
t3 = table_builder [["A2", [10, 20, 30]]]
|
||||
|
||||
t4 = call_union [t1, t2, t3] keep_unmatched_columns=False match_columns=Match_Columns.By_Position on_problems=Problem_Behavior.Report_Error
|
||||
Problems.assume_no_problems t4
|
||||
expect_column_names ["A"] t4
|
||||
t4.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6, 10, 20, 30]
|
||||
|
||||
group_builder.specify "should keep the greatest number of columns with positional matching if asked to keep unmatched ones, filling missing values with null and reporting no problems" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["A1", [4, 5, 6]], ["B1", ["d", "e", "f"]], ["C", [7, 8, 9]]]
|
||||
t3 = table_builder [["A2", [10, 20, 30]]]
|
||||
|
||||
t4 = call_union [t1, t2, t3] match_columns=Match_Columns.By_Position keep_unmatched_columns=True on_problems=Problem_Behavior.Ignore
|
||||
Problems.assume_no_problems t4
|
||||
expect_column_names ["A1", "B1", "C"] t4
|
||||
t4.at "A1" . to_vector . should_equal [1, 2, 3, 4, 5, 6, 10, 20, 30]
|
||||
t4.at "B1" . to_vector . should_equal ["a", "b", "c", "d", "e", "f", Nothing, Nothing, Nothing]
|
||||
t4.at "C" . to_vector . should_equal [Nothing, Nothing, Nothing, 7, 8, 9, Nothing, Nothing, Nothing]
|
||||
|
||||
group_builder.specify "should use column names from the first table that has enough columns in positional matching mode" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]]]
|
||||
t2 = table_builder [["X", [4, 5, 6]], ["A", ["a", "b", "c"]]]
|
||||
|
||||
check table =
|
||||
expect_column_names ["X", "A"] table
|
||||
table.at "X" . to_vector . should_equal [1, 2, 3, 4, 5, 6]
|
||||
table.at "A" . to_vector . should_equal [Nothing, Nothing, Nothing, "a", "b", "c"]
|
||||
|
||||
t3 = call_union [t1, t2] match_columns=Match_Columns.By_Position
|
||||
within_table t3 <|
|
||||
check t3
|
||||
Problems.get_attached_warnings t3 . should_equal [Column_Count_Mismatch.Error 2 1]
|
||||
|
||||
t4 = call_union [t1, t2] match_columns=Match_Columns.By_Position keep_unmatched_columns=True
|
||||
within_table t4 <|
|
||||
check t4
|
||||
|
||||
t5 = table_builder [["Y", [7, 8, 9]], ["A", ["d", "e", "f"]], ["Z", [10, 11, 12]]]
|
||||
t6 = table_builder [["W", [0]]]
|
||||
t7 = table_builder [["X", [7, 8, 9]], ["Y", ["d", "e", "f"]], ["Z", [10, 11, 12]]]
|
||||
t8 = call_union [t1, t2, t5, t6, t7] match_columns=Match_Columns.By_Position
|
||||
expect_column_names ["Y", "A", "Z"] t8
|
||||
|
||||
group_builder.specify "should allow to merge a table with itself" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = call_union [t1, t1, t1]
|
||||
expect_column_names ["A", "B"] t2
|
||||
t2.at "A" . to_vector . should_equal [1, 2, 3, 1, 2, 3, 1, 2, 3]
|
||||
t2.at "B" . to_vector . should_equal ["a", "b", "c", "a", "b", "c", "a", "b", "c"]
|
||||
|
||||
group_builder.specify "should not de-duplicate rows" <|
|
||||
t1 = table_builder [["A", [1, 1, 3]], ["B", ["a", "a", "c"]]]
|
||||
t2 = table_builder [["A", [1, 2, 2]], ["B", ["a", "b", "b"]]]
|
||||
t3 = call_union [t1, t2]
|
||||
expect_column_names ["A", "B"] t3
|
||||
t3.at "A" . to_vector . should_equal [1, 1, 3, 1, 2, 2]
|
||||
t3.at "B" . to_vector . should_equal ["a", "a", "c", "a", "b", "b"]
|
||||
|
||||
group_builder.specify "should gracefully handle the case where no tables to union were provided" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
|
||||
check_same table =
|
||||
expect_column_names ["A", "B"] table
|
||||
table.at "A" . to_vector . should_equal [1, 2, 3]
|
||||
table.at "B" . to_vector . should_equal ["a", "b", "c"]
|
||||
|
||||
check_same <| call_union [t1]
|
||||
check_same <| call_union [t1] match_columns=Match_Columns.By_Position
|
||||
|
||||
check_same <| call_union [t1] keep_unmatched_columns=False
|
||||
check_same <| call_union [t1] match_columns=Match_Columns.By_Position keep_unmatched_columns=False
|
||||
|
||||
check_same <| call_union [t1] keep_unmatched_columns=True
|
||||
check_same <| call_union [t1] match_columns=Match_Columns.By_Position keep_unmatched_columns=True
|
||||
|
||||
group_builder.specify "should correctly unify text columns of various lengths" pending=(if setup.test_selection.fixed_length_text_columns.not then "Fixed-length Char columns are not supported by this backend.") <|
|
||||
t1 = (table_builder [["A", ["a", "b", "c"]]]) . cast "A" (Value_Type.Char size=1 variable_length=False)
|
||||
t2 = (table_builder [["A", ["xyz", "abc", "def"]]]) . cast "A" (Value_Type.Char size=3 variable_length=False)
|
||||
|
||||
t1.at "A" . value_type . should_equal (Value_Type.Char size=1 variable_length=False)
|
||||
t2.at "A" . value_type . should_equal (Value_Type.Char size=3 variable_length=False)
|
||||
|
||||
t3 = call_union [t1, t2]
|
||||
expect_column_names ["A"] t3
|
||||
t3.at "A" . to_vector . should_equal ["a", "b", "c", "xyz", "abc", "def"]
|
||||
t3.at "A" . value_type . is_text . should_be_true
|
||||
Test.with_clue "t3[A].value_type="+(t3.at "A").value_type.to_display_text+": " <|
|
||||
t3.at "A" . value_type . variable_length . should_be_true
|
||||
|
||||
group_builder.specify "should find a common type that will fit the merged columns" <|
|
||||
t1 = table_builder [["A", [0, 1, 2]]]
|
||||
t2 = table_builder [["A", [1.0, 2.0, 2.5]]]
|
||||
|
||||
t1.at "A" . value_type . is_integer . should_be_true
|
||||
t2.at "A" . value_type . is_floating_point . should_be_true
|
||||
|
||||
t3 = call_union [t1, t2]
|
||||
expect_column_names ["A"] t3
|
||||
t3.at "A" . value_type . is_floating_point . should_be_true
|
||||
t3.at "A" . to_vector . should_equal [0, 1, 2, 1.0, 2.0, 2.5]
|
||||
|
||||
# Specific type tests that apply to in-memory. Database behaviour is up to implementation.
|
||||
if setup.is_database.not then
|
||||
t4 = table_builder [["A", [2^100, 2^10, 2]]]
|
||||
t4.at "A" . value_type . should_be_a (Value_Type.Decimal ...)
|
||||
|
||||
t5 = call_union [t2, t4]
|
||||
expect_column_names ["A"] t5
|
||||
t5.at "A" . value_type . is_floating_point . should_be_true
|
||||
t5.at "A" . to_vector . should_equal [1.0, 2.0, 2.5, 2^100, 2^10, 2]
|
||||
|
||||
t6 = call_union [t1, t4]
|
||||
expect_column_names ["A"] t6
|
||||
t6.at "A" . value_type . should_be_a (Value_Type.Decimal ...)
|
||||
t6.at "A" . to_vector . should_equal [0, 1, 2, 2^100, 2^10, 2]
|
||||
|
||||
# Database backends are not required to support Mixed types.
|
||||
if setup.is_database.not then
|
||||
group_builder.specify "should resort to Mixed value type only if at least one column is already Mixed" <|
|
||||
## TODO currently no way to retype a column to Mixed, so we are
|
||||
using a custom object
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["mixed", ["a", My_Type.Value 1 2, Nothing]]]
|
||||
t2 = table_builder [["A", [4, 5, 6]], ["mixed", [1, 2, 3]]]
|
||||
t1.at "mixed" . value_type . should_equal Value_Type.Mixed
|
||||
t2.at "mixed" . value_type . should_equal Value_Type.Integer
|
||||
|
||||
t3 = call_union [t1, t2]
|
||||
Problems.assume_no_problems t3
|
||||
expect_column_names ["A", "mixed"] t3
|
||||
t3.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6]
|
||||
t3.at "Y" . to_vector . should_equal ["a", "b", "c", "d", "e", "f"]
|
||||
t3.at "mixed" . to_vector . should_equal ["a", My_Type.Value 1 2, Nothing, 1, 2, 3]
|
||||
|
||||
group_builder.specify "should fill extra columns (positional matching) with nulls and report a warning by default" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["A1", [4, 5, 6]], ["B1", ["d", "e", "f"]], ["C", [7, 8, 9]]]
|
||||
t3 = table_builder [["A2", [10, 20, 30]]]
|
||||
t4 = table_builder [["A", [1, 3]], ["mixed", [True, False]]]
|
||||
t5 = table_builder [["A", [4, 5]], ["mixed", ["X", "y"]]]
|
||||
t4.at "mixed" . value_type . should_equal Value_Type.Boolean
|
||||
t5.at "mixed" . value_type . should_equal Value_Type.Char
|
||||
|
||||
action = t1.union [t2, t3] match_columns=Match_Columns.By_Position on_problems=_
|
||||
tester table =
|
||||
expect_column_names ["A1", "B1", "C"] table
|
||||
table.at "A1" . to_vector . should_equal [1, 2, 3, 4, 5, 6, 10, 20, 30]
|
||||
table.at "B1" . to_vector . should_equal ["a", "b", "c", "d", "e", "f", Nothing, Nothing, Nothing]
|
||||
table.at "C" . to_vector . should_equal [Nothing, Nothing, Nothing, 7, 8, 9, Nothing, Nothing, Nothing]
|
||||
problems = [Column_Count_Mismatch.Error 3 1]
|
||||
Problems.test_problem_handling action problems tester
|
||||
t6 = call_union [t5, t1, t2, t4]
|
||||
Problems.assume_no_problems t6
|
||||
expect_column_names ["A", "mixed"] t6
|
||||
t6.at "A" . to_vector . should_equal [4, 5, 1, 2, 3, 4, 5, 6, 1, 3]
|
||||
t6.at "mixed" . to_vector . should_equal ["X", "y", "a", My_Type.Value 1 2, Nothing, 1, 2, 3, True, False]
|
||||
t6.at "mixed" . value_type . should_equal Value_Type.Mixed
|
||||
|
||||
group_builder.specify "should keep the least number of columns with positional matching if asked to drop unmatched ones" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["A1", [4, 5, 6]], ["B1", ["d", "e", "f"]], ["C", [7, 8, 9]]]
|
||||
t3 = table_builder [["A2", [10, 20, 30]]]
|
||||
group_builder.specify "if no common type can be found, should report error and drop the problematic column" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]], ["C", [True, False, Nothing]]]
|
||||
t2 = table_builder [["C", ["x", "Y", "Z"]], ["A", [4, 5, 6]], ["B", [1, 2, 3]]]
|
||||
|
||||
t4 = t1.union [t2, t3] keep_unmatched_columns=False match_columns=Match_Columns.By_Position on_problems=Problem_Behavior.Report_Error
|
||||
Problems.assume_no_problems t4
|
||||
expect_column_names ["A"] t4
|
||||
t4.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6, 10, 20, 30]
|
||||
r1 = call_union [t1, t2] on_problems=Problem_Behavior.Report_Error
|
||||
r1.should_fail_with No_Common_Type
|
||||
|
||||
group_builder.specify "should keep the greatest number of columns with positional matching if asked to keep unmatched ones, filling missing values with null and reporting no problems" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = table_builder [["A1", [4, 5, 6]], ["B1", ["d", "e", "f"]], ["C", [7, 8, 9]]]
|
||||
t3 = table_builder [["A2", [10, 20, 30]]]
|
||||
r2 = call_union [t1, t2] on_problems=Problem_Behavior.Ignore
|
||||
Problems.assume_no_problems r2
|
||||
|
||||
t4 = t1.union [t2, t3] match_columns=Match_Columns.By_Position keep_unmatched_columns=True on_problems=Problem_Behavior.Ignore
|
||||
Problems.assume_no_problems t4
|
||||
expect_column_names ["A1", "B1", "C"] t4
|
||||
t4.at "A1" . to_vector . should_equal [1, 2, 3, 4, 5, 6, 10, 20, 30]
|
||||
t4.at "B1" . to_vector . should_equal ["a", "b", "c", "d", "e", "f", Nothing, Nothing, Nothing]
|
||||
t4.at "C" . to_vector . should_equal [Nothing, Nothing, Nothing, 7, 8, 9, Nothing, Nothing, Nothing]
|
||||
r3 = call_union [t1, t2] on_problems=Problem_Behavior.Report_Warning
|
||||
w3 = Problems.get_attached_warnings r3
|
||||
w3.each w-> w.should_be_a No_Common_Type
|
||||
w3.map w->
|
||||
## We look just at names of the Value_Type constructors, as
|
||||
different database backends may choose integers of different
|
||||
sizes and have differing settings for text types.
|
||||
types = w.types.map value_type->
|
||||
Meta.meta value_type . constructor . name
|
||||
(types == ["Char", "Integer"]) || (types == ["Boolean", "Char"]) . should_be_true
|
||||
|
||||
group_builder.specify "should use column names from the first table that has enough columns in positional matching mode" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]]]
|
||||
t2 = table_builder [["X", [4, 5, 6]], ["A", ["a", "b", "c"]]]
|
||||
# A boolean column cannot be merged with integers.
|
||||
t3 = t1.select_columns ["C", "A"] reorder=True
|
||||
t4 = t2.select_columns ["B", "A"] reorder=True
|
||||
r4 = call_union [t3, t4] match_columns=Match_Columns.By_Position on_problems=Problem_Behavior.Report_Error
|
||||
r4.should_fail_with No_Common_Type
|
||||
|
||||
check table =
|
||||
expect_column_names ["X", "A"] table
|
||||
table.at "X" . to_vector . should_equal [1, 2, 3, 4, 5, 6]
|
||||
table.at "A" . to_vector . should_equal [Nothing, Nothing, Nothing, "a", "b", "c"]
|
||||
group_builder.specify "if type widening is not allowed, should use the type from first table that contained the given column" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]]]
|
||||
t2 = table_builder [["A", [4, 5, 6]], ["B", [1.2, 2.2, 3.1]]]
|
||||
|
||||
t3 = t1.union t2 match_columns=Match_Columns.By_Position
|
||||
within_table t3 <|
|
||||
check t3
|
||||
Problems.get_attached_warnings t3 . should_equal [Column_Count_Mismatch.Error 2 1]
|
||||
|
||||
t4 = t1.union t2 match_columns=Match_Columns.By_Position keep_unmatched_columns=True
|
||||
within_table t4 <|
|
||||
check t4
|
||||
|
||||
t5 = table_builder [["Y", [7, 8, 9]], ["A", ["d", "e", "f"]], ["Z", [10, 11, 12]]]
|
||||
t6 = table_builder [["W", [0]]]
|
||||
t7 = table_builder [["X", [7, 8, 9]], ["Y", ["d", "e", "f"]], ["Z", [10, 11, 12]]]
|
||||
t8 = t1.union [t2, t5, t6, t7] match_columns=Match_Columns.By_Position
|
||||
expect_column_names ["Y", "A", "Z"] t8
|
||||
|
||||
group_builder.specify "should allow to merge a table with itself" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t2 = t1.union [t1, t1]
|
||||
expect_column_names ["A", "B"] t2
|
||||
t2.at "A" . to_vector . should_equal [1, 2, 3, 1, 2, 3, 1, 2, 3]
|
||||
t2.at "B" . to_vector . should_equal ["a", "b", "c", "a", "b", "c", "a", "b", "c"]
|
||||
|
||||
group_builder.specify "should not de-duplicate rows" <|
|
||||
t1 = table_builder [["A", [1, 1, 3]], ["B", ["a", "a", "c"]]]
|
||||
t2 = table_builder [["A", [1, 2, 2]], ["B", ["a", "b", "b"]]]
|
||||
t3 = t1.union t2
|
||||
t3 = call_union [t1, t2] allow_type_widening=False keep_unmatched_columns=True
|
||||
within_table t3 <|
|
||||
Problems.assume_no_problems t3
|
||||
expect_column_names ["A", "B"] t3
|
||||
t3.at "A" . to_vector . should_equal [1, 1, 3, 1, 2, 2]
|
||||
t3.at "B" . to_vector . should_equal ["a", "a", "c", "a", "b", "b"]
|
||||
|
||||
group_builder.specify "should gracefully handle the case where no tables to union were provided" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
|
||||
check_same table =
|
||||
expect_column_names ["A", "B"] table
|
||||
table.at "A" . to_vector . should_equal [1, 2, 3]
|
||||
table.at "B" . to_vector . should_equal ["a", "b", "c"]
|
||||
|
||||
check_same <| t1.union []
|
||||
check_same <| t1.union [] match_columns=Match_Columns.By_Position
|
||||
|
||||
check_same <| t1.union [] keep_unmatched_columns=False
|
||||
check_same <| t1.union [] match_columns=Match_Columns.By_Position keep_unmatched_columns=False
|
||||
|
||||
check_same <| t1.union [] keep_unmatched_columns=True
|
||||
check_same <| t1.union [] match_columns=Match_Columns.By_Position keep_unmatched_columns=True
|
||||
|
||||
group_builder.specify "should correctly unify text columns of various lengths" pending=(if setup.test_selection.fixed_length_text_columns.not then "Fixed-length Char columns are not supported by this backend.") <|
|
||||
t1 = (table_builder [["A", ["a", "b", "c"]]]) . cast "A" (Value_Type.Char size=1 variable_length=False)
|
||||
t2 = (table_builder [["A", ["xyz", "abc", "def"]]]) . cast "A" (Value_Type.Char size=3 variable_length=False)
|
||||
|
||||
t1.at "A" . value_type . should_equal (Value_Type.Char size=1 variable_length=False)
|
||||
t2.at "A" . value_type . should_equal (Value_Type.Char size=3 variable_length=False)
|
||||
|
||||
t3 = t1.union t2
|
||||
expect_column_names ["A"] t3
|
||||
t3.at "A" . to_vector . should_equal ["a", "b", "c", "xyz", "abc", "def"]
|
||||
t3.at "A" . value_type . is_text . should_be_true
|
||||
Test.with_clue "t3[A].value_type="+(t3.at "A").value_type.to_display_text+": " <|
|
||||
t3.at "A" . value_type . variable_length . should_be_true
|
||||
|
||||
group_builder.specify "should find a common type that will fit the merged columns" <|
|
||||
t1 = table_builder [["A", [0, 1, 2]]]
|
||||
t2 = table_builder [["A", [1.0, 2.0, 2.5]]]
|
||||
|
||||
t1.at "A" . value_type . is_integer . should_be_true
|
||||
t2.at "A" . value_type . is_floating_point . should_be_true
|
||||
|
||||
t3 = t1.union t2
|
||||
expect_column_names ["A"] t3
|
||||
t3.at "A" . value_type . is_floating_point . should_be_true
|
||||
t3.at "A" . to_vector . should_equal [0, 1, 2, 1.0, 2.0, 2.5]
|
||||
|
||||
# Specific type tests that apply to in-memory. Database behaviour is up to implementation.
|
||||
if setup.is_database.not then
|
||||
t4 = table_builder [["A", [2^100, 2^10, 2]]]
|
||||
t4.at "A" . value_type . should_be_a (Value_Type.Decimal ...)
|
||||
|
||||
t5 = t2.union t4
|
||||
expect_column_names ["A"] t5
|
||||
t5.at "A" . value_type . is_floating_point . should_be_true
|
||||
t5.at "A" . to_vector . should_equal [1.0, 2.0, 2.5, 2^100, 2^10, 2]
|
||||
|
||||
t6 = t1.union t4
|
||||
expect_column_names ["A"] t6
|
||||
t6.at "A" . value_type . should_be_a (Value_Type.Decimal ...)
|
||||
t6.at "A" . to_vector . should_equal [0, 1, 2, 2^100, 2^10, 2]
|
||||
|
||||
# Database backends are not required to support Mixed types.
|
||||
if setup.is_database.not then
|
||||
group_builder.specify "should resort to Mixed value type only if at least one column is already Mixed" <|
|
||||
## TODO currently no way to retype a column to Mixed, so we are
|
||||
using a custom object
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["mixed", ["a", My_Type.Value 1 2, Nothing]]]
|
||||
t2 = table_builder [["A", [4, 5, 6]], ["mixed", [1, 2, 3]]]
|
||||
t1.at "mixed" . value_type . should_equal Value_Type.Mixed
|
||||
t2.at "mixed" . value_type . should_equal Value_Type.Integer
|
||||
|
||||
t3 = t1.union t2
|
||||
Problems.assume_no_problems t3
|
||||
expect_column_names ["A", "mixed"] t3
|
||||
t3.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6]
|
||||
t3.at "mixed" . to_vector . should_equal ["a", My_Type.Value 1 2, Nothing, 1, 2, 3]
|
||||
|
||||
t4 = table_builder [["A", [1, 3]], ["mixed", [True, False]]]
|
||||
t5 = table_builder [["A", [4, 5]], ["mixed", ["X", "y"]]]
|
||||
t4.at "mixed" . value_type . should_equal Value_Type.Boolean
|
||||
t5.at "mixed" . value_type . should_equal Value_Type.Char
|
||||
|
||||
t6 = t5.union [t1, t2, t4]
|
||||
Problems.assume_no_problems t6
|
||||
expect_column_names ["A", "mixed"] t6
|
||||
t6.at "A" . to_vector . should_equal [4, 5, 1, 2, 3, 4, 5, 6, 1, 3]
|
||||
t6.at "mixed" . to_vector . should_equal ["X", "y", "a", My_Type.Value 1 2, Nothing, 1, 2, 3, True, False]
|
||||
t6.at "mixed" . value_type . should_equal Value_Type.Mixed
|
||||
|
||||
group_builder.specify "if no common type can be found, should report error and drop the problematic column" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]], ["C", [True, False, Nothing]]]
|
||||
t2 = table_builder [["C", ["x", "Y", "Z"]], ["A", [4, 5, 6]], ["B", [1, 2, 3]]]
|
||||
|
||||
r1 = t1.union t2 on_problems=Problem_Behavior.Report_Error
|
||||
r1.should_fail_with No_Common_Type
|
||||
|
||||
r2 = t1.union t2 on_problems=Problem_Behavior.Ignore
|
||||
Problems.assume_no_problems r2
|
||||
|
||||
r3 = t1.union t2 on_problems=Problem_Behavior.Report_Warning
|
||||
w3 = Problems.get_attached_warnings r3
|
||||
w3.each w-> w.should_be_a No_Common_Type
|
||||
w3.map w->
|
||||
## We look just at names of the Value_Type constructors, as
|
||||
different database backends may choose integers of different
|
||||
sizes and have differing settings for text types.
|
||||
types = w.types.map value_type->
|
||||
Meta.meta value_type . constructor . name
|
||||
(types == ["Char", "Integer"]) || (types == ["Boolean", "Char"]) . should_be_true
|
||||
|
||||
# A boolean column cannot be merged with integers.
|
||||
t3 = t1.select_columns ["C", "A"] reorder=True
|
||||
t4 = t2.select_columns ["B", "A"] reorder=True
|
||||
r4 = t3.union t4 match_columns=Match_Columns.By_Position on_problems=Problem_Behavior.Report_Error
|
||||
r4.should_fail_with No_Common_Type
|
||||
|
||||
group_builder.specify "if type widening is not allowed, should use the type from first table that contained the given column" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]]]
|
||||
t2 = table_builder [["A", [4, 5, 6]], ["B", [1.2, 2.2, 3.1]]]
|
||||
|
||||
t3 = t1.union t2 allow_type_widening=False keep_unmatched_columns=True
|
||||
within_table t3 <|
|
||||
Problems.assume_no_problems t3
|
||||
expect_column_names ["A", "B"] t3
|
||||
t3.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6]
|
||||
t3.at "B" . to_vector . should_equal [Nothing, Nothing, Nothing, 1.2, 2.2, 3.1]
|
||||
t3.at "A" . value_type . is_integer . should_be_true
|
||||
t2.at "B" . value_type . is_floating_point . should_be_true
|
||||
t3.at "B" . value_type . is_floating_point . should_be_true
|
||||
|
||||
group_builder.specify "if type widening is not allowed and types do not match, should report error and drop the problematic column" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", [1, 2, 3]], ["E", [1.1, 2.5, 3.2]]]
|
||||
t2 = table_builder [["A", [4, 5, 6]], ["B", [1.5, 2.5, 3.5]], ["E", [1, 2, 3]]]
|
||||
|
||||
t1.at "B" . value_type . is_integer . should_be_true
|
||||
t1.at "E" . value_type . is_floating_point . should_be_true
|
||||
|
||||
t3.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6]
|
||||
t3.at "B" . to_vector . should_equal [Nothing, Nothing, Nothing, 1.2, 2.2, 3.1]
|
||||
t3.at "A" . value_type . is_integer . should_be_true
|
||||
t2.at "B" . value_type . is_floating_point . should_be_true
|
||||
t2.at "E" . value_type . is_integer . should_be_true
|
||||
t3.at "B" . value_type . is_floating_point . should_be_true
|
||||
|
||||
action = t1.union t2 allow_type_widening=False on_problems=_
|
||||
tester table =
|
||||
expect_column_names ["A"] table
|
||||
table.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6]
|
||||
group_builder.specify "if type widening is not allowed and types do not match, should report error and drop the problematic column" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", [1, 2, 3]], ["E", [1.1, 2.5, 3.2]]]
|
||||
t2 = table_builder [["A", [4, 5, 6]], ["B", [1.5, 2.5, 3.5]], ["E", [1, 2, 3]]]
|
||||
|
||||
problem_checker problem =
|
||||
problem.should_be_a Column_Type_Mismatch
|
||||
True
|
||||
err_checker err =
|
||||
problem_checker err.catch
|
||||
warn_checker warnings =
|
||||
warnings.all problem_checker
|
||||
Problems.test_advanced_problem_handling action err_checker warn_checker tester
|
||||
t1.at "B" . value_type . is_integer . should_be_true
|
||||
t1.at "E" . value_type . is_floating_point . should_be_true
|
||||
|
||||
# Database backends are not required to support Mixed types.
|
||||
if setup.is_database.not then
|
||||
group_builder.specify "even if type widening is not allowed, if the first column is mixed, it should accept any column to be concatenated to it" <|
|
||||
t1 = table_builder [["X", ["a", 1, Nothing]]]
|
||||
t2 = table_builder [["X", [1]]]
|
||||
t3 = table_builder [["X", [1.2, 2.3, 3.4]]]
|
||||
t4 = table_builder [["X", ["a", "b"]]]
|
||||
t5 = table_builder [["X", [True, False]]]
|
||||
t2.at "B" . value_type . is_floating_point . should_be_true
|
||||
t2.at "E" . value_type . is_integer . should_be_true
|
||||
|
||||
t1.at "X" . value_type . should_equal Value_Type.Mixed
|
||||
t2.at "X" . value_type . should_equal Value_Type.Integer
|
||||
action = call_union [t1, t2] allow_type_widening=False on_problems=_
|
||||
tester table =
|
||||
expect_column_names ["A"] table
|
||||
table.at "A" . to_vector . should_equal [1, 2, 3, 4, 5, 6]
|
||||
|
||||
t6 = t1.union [t2, t3, t4, t5] allow_type_widening=False
|
||||
Problems.assume_no_problems t6
|
||||
t6.at "X" . value_type . should_equal Value_Type.Mixed
|
||||
t6.at "X" . to_vector . should_equal ["a", 1, Nothing, 1, 1.2, 2.3, 3.4, "a", "b", True, False]
|
||||
problem_checker problem =
|
||||
problem.should_be_a Column_Type_Mismatch
|
||||
True
|
||||
err_checker err =
|
||||
problem_checker err.catch
|
||||
warn_checker warnings =
|
||||
warnings.all problem_checker
|
||||
Problems.test_advanced_problem_handling action err_checker warn_checker tester
|
||||
|
||||
group_builder.specify "when finding a common type for numeric columns to be Float, any precision loss should be reported" <|
|
||||
t1 = table_builder [["X", [1, (2^62)-1, 3]]]
|
||||
t2 = table_builder [["X", [1.5, 2.5, 3.5]]]
|
||||
t3 = table_builder [["X", [(2^100)+1, 2^10, 2]]]
|
||||
# Database backends are not required to support Mixed types.
|
||||
if setup.is_database.not then
|
||||
group_builder.specify "even if type widening is not allowed, if the first column is mixed, it should accept any column to be concatenated to it" <|
|
||||
t1 = table_builder [["X", ["a", 1, Nothing]]]
|
||||
t2 = table_builder [["X", [1]]]
|
||||
t3 = table_builder [["X", [1.2, 2.3, 3.4]]]
|
||||
t4 = table_builder [["X", ["a", "b"]]]
|
||||
t5 = table_builder [["X", [True, False]]]
|
||||
|
||||
t1.at "X" . value_type . should_equal Value_Type.Integer
|
||||
t2.at "X" . value_type . should_equal Value_Type.Float
|
||||
t3.at "X" . value_type . should_be_a (Value_Type.Decimal ...)
|
||||
t1.at "X" . value_type . should_equal Value_Type.Mixed
|
||||
t2.at "X" . value_type . should_equal Value_Type.Integer
|
||||
|
||||
t4 = t2.union [t1, t3] allow_type_widening=True
|
||||
t4.at "X" . value_type . should_equal Value_Type.Float
|
||||
# Inexact float equality will make this pass:
|
||||
t4.at "X" . to_vector . should_equal [1.5, 2.5, 3.5, 1, (2^62)-1, 3, (2^100)+1, 2^10, 2]
|
||||
t6 = call_union [t1, t2, t3, t4, t5] allow_type_widening=False
|
||||
Problems.assume_no_problems t6
|
||||
t6.at "X" . value_type . should_equal Value_Type.Mixed
|
||||
t6.at "X" . to_vector . should_equal ["a", 1, Nothing, 1, 1.2, 2.3, 3.4, "a", "b", True, False]
|
||||
|
||||
w = Problems.expect_only_warning Loss_Of_Integer_Precision t4
|
||||
# Losing precision on (2^62)-1 and 2^100+1.
|
||||
w.affected_rows_count . should_equal 2
|
||||
group_builder.specify "when finding a common type for numeric columns to be Float, any precision loss should be reported" <|
|
||||
t1 = table_builder [["X", [1, (2^62)-1, 3]]]
|
||||
t2 = table_builder [["X", [1.5, 2.5, 3.5]]]
|
||||
t3 = table_builder [["X", [(2^100)+1, 2^10, 2]]]
|
||||
|
||||
group_builder.specify "if type mismatches cause all columns to be dropped, fail with No_Output_Columns" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]]]
|
||||
t2 = table_builder [["A", ['x']]]
|
||||
t1.at "X" . value_type . should_equal Value_Type.Integer
|
||||
t2.at "X" . value_type . should_equal Value_Type.Float
|
||||
t3.at "X" . value_type . should_be_a (Value_Type.Decimal ...)
|
||||
|
||||
e3 = t1.union t2 allow_type_widening=True on_problems=Problem_Behavior.Ignore
|
||||
e3.should_fail_with No_Output_Columns
|
||||
t4 = call_union [t2, t1, t3] allow_type_widening=True
|
||||
t4.at "X" . value_type . should_equal Value_Type.Float
|
||||
# Inexact float equality will make this pass:
|
||||
t4.at "X" . to_vector . should_equal [1.5, 2.5, 3.5, 1, (2^62)-1, 3, (2^100)+1, 2^10, 2]
|
||||
|
||||
t4 = table_builder [["A", [1.5]]]
|
||||
e5 = t1.union t4 allow_type_widening=False on_problems=Problem_Behavior.Ignore
|
||||
e5.should_fail_with No_Output_Columns
|
||||
w = Problems.expect_only_warning Loss_Of_Integer_Precision t4
|
||||
# Losing precision on (2^62)-1 and 2^100+1.
|
||||
w.affected_rows_count . should_equal 2
|
||||
|
||||
group_builder.specify "should find a common type (2)" <|
|
||||
t1 = (table_builder [["X", [0, 1, 2]], ["Y", ['aa', 'bb', 'cc']]]) . cast "X" (Value_Type.Integer Bits.Bits_16) . cast "Y" (Value_Type.Char size=2 variable_length=False)
|
||||
t2 = (table_builder [["X", [3, 4, 5]], ["Y", ['x', 'y', 'z']]]) . cast "X" (Value_Type.Integer Bits.Bits_32) . cast "Y" (Value_Type.Char size=1 variable_length=False)
|
||||
supports_complex_types = (t1.is_error || t2.is_error || Problems.get_attached_warnings t1 . not_empty).not
|
||||
case supports_complex_types of
|
||||
False -> Nothing
|
||||
True ->
|
||||
t12 = t1.union t2
|
||||
Problems.assume_no_problems t12
|
||||
t12.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_32)
|
||||
t12.at "Y" . value_type . should_equal (Value_Type.Char size=2 variable_length=True)
|
||||
group_builder.specify "if type mismatches cause all columns to be dropped, fail with No_Output_Columns" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]]]
|
||||
t2 = table_builder [["A", ['x']]]
|
||||
|
||||
t12.at "X" . to_vector . should_equal [0, 1, 2, 3, 4, 5]
|
||||
t12.at "Y" . to_vector . should_equal ['aa', 'bb', 'cc', 'x', 'y', 'z']
|
||||
e3 = call_union [t1, t2] allow_type_widening=True on_problems=Problem_Behavior.Ignore
|
||||
e3.should_fail_with No_Output_Columns
|
||||
|
||||
group_builder.specify "should fail to find a common type if widening is not allowed (2)" <|
|
||||
t1 = (table_builder [["X", [0, 1, 2]], ["Y", ['aa', 'bb', 'cc']]]) . cast "X" (Value_Type.Integer Bits.Bits_16) . cast "Y" (Value_Type.Char size=2 variable_length=False)
|
||||
t2 = (table_builder [["X", [3, 4, 5]], ["Y", ['x', 'y', 'z']]]) . cast "X" (Value_Type.Integer Bits.Bits_32) . cast "Y" (Value_Type.Char size=1 variable_length=False)
|
||||
supports_complex_types = (t1.is_error || t2.is_error || Problems.get_attached_warnings t1 . not_empty).not
|
||||
case supports_complex_types of
|
||||
False -> Nothing
|
||||
True ->
|
||||
r1 = t1.union t2 allow_type_widening=False
|
||||
r1.should_fail_with No_Output_Columns
|
||||
r1.catch.cause . should_be_a Column_Type_Mismatch
|
||||
r1.catch.to_display_text . should_equal "No columns in the result, because of another problem: The column [X] expects type Integer (16 bits) but one of the provided tables had type Integer (32 bits) which is not compatible with it."
|
||||
t4 = table_builder [["A", [1.5]]]
|
||||
e5 = call_union [t1, t4] allow_type_widening=False on_problems=Problem_Behavior.Ignore
|
||||
e5.should_fail_with No_Output_Columns
|
||||
|
||||
# And this should report Column_Type_Mismatch as the more important error too:
|
||||
t1.union t2 allow_type_widening=False on_problems=Problem_Behavior.Report_Error . should_fail_with Column_Type_Mismatch
|
||||
group_builder.specify "should find a common type (2)" <|
|
||||
t1 = (table_builder [["X", [0, 1, 2]], ["Y", ['aa', 'bb', 'cc']]]) . cast "X" (Value_Type.Integer Bits.Bits_16) . cast "Y" (Value_Type.Char size=2 variable_length=False)
|
||||
t2 = (table_builder [["X", [3, 4, 5]], ["Y", ['x', 'y', 'z']]]) . cast "X" (Value_Type.Integer Bits.Bits_32) . cast "Y" (Value_Type.Char size=1 variable_length=False)
|
||||
supports_complex_types = (t1.is_error || t2.is_error || Problems.get_attached_warnings t1 . not_empty).not
|
||||
case supports_complex_types of
|
||||
False -> Nothing
|
||||
True ->
|
||||
t12 = call_union [t1, t2]
|
||||
Problems.assume_no_problems t12
|
||||
t12.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_32)
|
||||
t12.at "Y" . value_type . should_equal (Value_Type.Char size=2 variable_length=True)
|
||||
|
||||
group_builder.specify "should gracefully handle tables from different backends" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
t12.at "X" . to_vector . should_equal [0, 1, 2, 3, 4, 5]
|
||||
t12.at "Y" . to_vector . should_equal ['aa', 'bb', 'cc', 'x', 'y', 'z']
|
||||
|
||||
alternative_connection = Database.connect (SQLite In_Memory)
|
||||
t0 = (Table.new [["A", [1, 2, 4]], ["B", ["10", "20", "30"]]]).select_into_database_table alternative_connection "T0" temporary=True
|
||||
group_builder.specify "should fail to find a common type if widening is not allowed (2)" <|
|
||||
t1 = (table_builder [["X", [0, 1, 2]], ["Y", ['aa', 'bb', 'cc']]]) . cast "X" (Value_Type.Integer Bits.Bits_16) . cast "Y" (Value_Type.Char size=2 variable_length=False)
|
||||
t2 = (table_builder [["X", [3, 4, 5]], ["Y", ['x', 'y', 'z']]]) . cast "X" (Value_Type.Integer Bits.Bits_32) . cast "Y" (Value_Type.Char size=1 variable_length=False)
|
||||
supports_complex_types = (t1.is_error || t2.is_error || Problems.get_attached_warnings t1 . not_empty).not
|
||||
case supports_complex_types of
|
||||
False -> Nothing
|
||||
True ->
|
||||
r1 = call_union [t1, t2] allow_type_widening=False
|
||||
r1.should_fail_with No_Output_Columns
|
||||
r1.catch.cause . should_be_a Column_Type_Mismatch
|
||||
r1.catch.to_display_text . should_equal "No columns in the result, because of another problem: The column [X] expects type Integer (16 bits) but one of the provided tables had type Integer (32 bits) which is not compatible with it."
|
||||
|
||||
r1 = t1.union t0
|
||||
r1.should_fail_with Illegal_Argument
|
||||
# And this should report Column_Type_Mismatch as the more important error too:
|
||||
call_union [t1, t2] allow_type_widening=False on_problems=Problem_Behavior.Report_Error . should_fail_with Column_Type_Mismatch
|
||||
|
||||
group_builder.specify "should gracefully handle tables from different backends" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
|
||||
alternative_connection = Database.connect (SQLite In_Memory)
|
||||
t0 = (Table.new [["A", [1, 2, 4]], ["B", ["10", "20", "30"]]]).select_into_database_table alternative_connection "T0" temporary=True
|
||||
|
||||
r1 = call_union [t1, t0]
|
||||
r1.should_fail_with Illegal_Argument
|
||||
|
Loading…
Reference in New Issue
Block a user