mirror of
https://github.com/enso-org/enso.git
synced 2024-12-22 22:51:38 +03:00
Improve basic join implementation (#3958)
Implements https://www.pivotaltracker.com/story/show/183913232 # Important Notes Added counts of succeeded/failed tests within a group and global summary, to easier see how many tests failed.
This commit is contained in:
parent
6eb30c37a5
commit
8e880e430b
@ -1,8 +1,12 @@
|
|||||||
|
from Standard.Base import all
|
||||||
|
|
||||||
type Join_Condition
|
type Join_Condition
|
||||||
## Specifies a join condition that correlates rows from the two tables if
|
## Specifies a join condition that correlates rows from the two tables if
|
||||||
the element from the `left_column` of the left table is equal to the
|
the element from the `left_column` of the left table is equal to the
|
||||||
element from the `right_column` of the right table.
|
element from the `right_column` of the right table.
|
||||||
|
|
||||||
|
Missing values are treated as equal to each other.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
- left_column: A name or index of a column in the left table.
|
- left_column: A name or index of a column in the left table.
|
||||||
- right_column: A name or index of a column in the right table.
|
- right_column: A name or index of a column in the right table.
|
||||||
@ -12,6 +16,8 @@ type Join_Condition
|
|||||||
the element from the `left` column of the left table is equal to the
|
the element from the `left` column of the left table is equal to the
|
||||||
element from the `right` column of the right table, ignoring case.
|
element from the `right` column of the right table, ignoring case.
|
||||||
|
|
||||||
|
Missing values are treated as equal to each other.
|
||||||
|
|
||||||
This is only supported for text columns.
|
This is only supported for text columns.
|
||||||
|
|
||||||
Case insensitive comparisons may make it impossible for a database
|
Case insensitive comparisons may make it impossible for a database
|
||||||
@ -20,14 +26,20 @@ type Join_Condition
|
|||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
- left: A name or index of a column in the left table.
|
- left: A name or index of a column in the left table.
|
||||||
- right: A name or index of a column in the right table.
|
- right: A name or index of a column in the right table. Defaults to the
|
||||||
Equals_Ignore_Case (left : Text | Integer) (right : Text | Integer)
|
same column selector as provided for `left`.
|
||||||
|
- locale: The locale to use for case insensitive comparisons.
|
||||||
|
Equals_Ignore_Case (left : Text | Integer) (right : Text | Integer = left) (locale : Locale = Locale.default)
|
||||||
|
|
||||||
## Specifies a join condition that correlates rows from the two tables if
|
## Specifies a join condition that correlates rows from the two tables if
|
||||||
the element from the `left` column of the left table fits between the
|
the element from the `left` column of the left table fits between the
|
||||||
corresponding elements from `right_lower` and `right_upper` columns of
|
corresponding elements from `right_lower` and `right_upper` columns of
|
||||||
the right table. The comparison is inclusive.
|
the right table. The comparison is inclusive.
|
||||||
|
|
||||||
|
Pairs of rows in which any of `left`, `right_lower`, or `right_upper` is
|
||||||
|
missing are ignored, as the comparison is assumed to be not well-defined
|
||||||
|
for missing values.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
- left: A name or index of a column in the left table.
|
- left: A name or index of a column in the left table.
|
||||||
- right_lower: A name or index of a column in the right table, used as
|
- right_lower: A name or index of a column in the right table, used as
|
||||||
|
@ -4,6 +4,7 @@ import Standard.Base.Data.Array_Proxy.Array_Proxy
|
|||||||
import Standard.Base.Data.Ordering.Comparator
|
import Standard.Base.Data.Ordering.Comparator
|
||||||
import Standard.Base.Error.Common.Index_Out_Of_Bounds
|
import Standard.Base.Error.Common.Index_Out_Of_Bounds
|
||||||
import Standard.Base.Error.Common.No_Such_Method
|
import Standard.Base.Error.Common.No_Such_Method
|
||||||
|
import Standard.Base.Error.Common.Type_Error
|
||||||
import Standard.Base.Error.File_Error.File_Error
|
import Standard.Base.Error.File_Error.File_Error
|
||||||
import Standard.Base.Error.Illegal_Argument.Illegal_Argument
|
import Standard.Base.Error.Illegal_Argument.Illegal_Argument
|
||||||
import Standard.Base.Error.Incomparable_Values.Incomparable_Values
|
import Standard.Base.Error.Incomparable_Values.Incomparable_Values
|
||||||
@ -44,6 +45,8 @@ from project.Internal.Filter_Condition_Helpers import make_filter_column
|
|||||||
polyglot java import org.enso.table.data.table.Table as Java_Table
|
polyglot java import org.enso.table.data.table.Table as Java_Table
|
||||||
polyglot java import org.enso.table.data.table.Column as Java_Column
|
polyglot java import org.enso.table.data.table.Column as Java_Column
|
||||||
polyglot java import org.enso.table.data.table.join.Equals as Java_Join_Equals
|
polyglot java import org.enso.table.data.table.join.Equals as Java_Join_Equals
|
||||||
|
polyglot java import org.enso.table.data.table.join.EqualsIgnoreCase as Java_Join_Equals_Ignore_Case
|
||||||
|
polyglot java import org.enso.table.data.table.join.Between as Java_Join_Between
|
||||||
polyglot java import org.enso.table.operations.OrderBuilder
|
polyglot java import org.enso.table.operations.OrderBuilder
|
||||||
polyglot java import org.enso.table.data.mask.OrderMask
|
polyglot java import org.enso.table.data.mask.OrderMask
|
||||||
polyglot java import java.util.UUID
|
polyglot java import java.util.UUID
|
||||||
@ -1052,7 +1055,7 @@ type Table
|
|||||||
- If a column index is invalid, an `Index_Out_Of_Bounds` is
|
- If a column index is invalid, an `Index_Out_Of_Bounds` is
|
||||||
reported and an empty result is reported.
|
reported and an empty result is reported.
|
||||||
- If a join condition correlates columns whose types are not compatible
|
- If a join condition correlates columns whose types are not compatible
|
||||||
(for example comparing numeric types with text), a
|
(for example comparing numeric types with text), an
|
||||||
`Invalid_Value_Type` is reported.
|
`Invalid_Value_Type` is reported.
|
||||||
- If decimal columns are joined on equality, a
|
- If decimal columns are joined on equality, a
|
||||||
`Floating_Point_Grouping` is reported.
|
`Floating_Point_Grouping` is reported.
|
||||||
@ -1085,35 +1088,43 @@ type Table
|
|||||||
allows to join the two tables on equality of corresponding columns with
|
allows to join the two tables on equality of corresponding columns with
|
||||||
the same name. So `table.join other on=["A", "B"]` is a shorthand for:
|
the same name. So `table.join other on=["A", "B"]` is a shorthand for:
|
||||||
table.join other on=[Join_Condition.Equals "A" "A", Join_Condition.Equals "B" "B"]
|
table.join other on=[Join_Condition.Equals "A" "A", Join_Condition.Equals "B" "B"]
|
||||||
join : Table -> Join_Kind -> Join_Condition | Vector Join_Condition | Vector Text -> Text -> Problem_Behavior -> Table
|
join : Table -> Join_Kind -> Join_Condition | Text | Vector (Join_Condition | Text) -> Text -> Problem_Behavior -> Table
|
||||||
join self right join_kind=Join_Kind.Inner on=[Join_Condition.Equals 0 0] right_prefix="Right_" on_problems=Report_Warning =
|
join self right join_kind=Join_Kind.Inner on=[Join_Condition.Equals 0 0] right_prefix="Right_" on_problems=Report_Warning =
|
||||||
# TODO check for cross-backend joins
|
if Table_Helpers.is_table right . not then Error.throw (Type_Error.Error Table right "right") else
|
||||||
|
same_backend = case right of
|
||||||
|
_ : Table -> True
|
||||||
|
_ -> False
|
||||||
|
if same_backend . not then Error.throw (Illegal_Argument.Error "Currently cross-backend joins are not supported. Materialize the table using `.read` before joining it with an in-memory Table.") else
|
||||||
|
# [left_unmatched, matched, right_unmatched]
|
||||||
|
rows_to_keep = case join_kind of
|
||||||
|
Join_Kind.Inner -> [False, True, False]
|
||||||
|
Join_Kind.Left_Outer -> [True, True, False]
|
||||||
|
Join_Kind.Right_Outer -> [False, True, True]
|
||||||
|
Join_Kind.Full -> [True, True, True]
|
||||||
|
Join_Kind.Left_Exclusive -> [True, False, False]
|
||||||
|
Join_Kind.Right_Exclusive -> [False, False, True]
|
||||||
|
|
||||||
# [left_unmatched, matched, right_unmatched]
|
columns_to_keep = case join_kind of
|
||||||
rows_to_keep = case join_kind of
|
Join_Kind.Left_Exclusive -> [True, False]
|
||||||
Join_Kind.Inner -> [False, True, False]
|
Join_Kind.Right_Exclusive -> [False, True]
|
||||||
Join_Kind.Left_Outer -> [True, True, False]
|
_ -> [True, True]
|
||||||
Join_Kind.Right_Outer -> [False, True, True]
|
|
||||||
Join_Kind.Full -> [True, True, True]
|
|
||||||
Join_Kind.Left_Exclusive -> [True, False, False]
|
|
||||||
Join_Kind.Right_Exclusive -> [False, False, True]
|
|
||||||
|
|
||||||
columns_to_keep = case join_kind of
|
join_resolution = make_join_helpers self right . resolve on on_problems
|
||||||
Join_Kind.Left_Exclusive -> [True, False]
|
right_columns_to_drop = join_resolution.redundant_column_names
|
||||||
Join_Kind.Right_Exclusive -> [False, True]
|
|
||||||
_ -> [True, True]
|
|
||||||
|
|
||||||
join_resolution = make_join_helpers self right . resolve on on_problems
|
object_comparator = Comparator.new
|
||||||
right_columns_to_drop = join_resolution.redundant_column_names
|
equality_fallback = .==
|
||||||
|
|
||||||
case join_resolution.conditions of
|
new_java_table = case join_resolution.conditions of
|
||||||
# Nothing is returned if some conditions failed to resolve, we will return an empty result in such case.
|
# Nothing is returned if some conditions failed to resolve, we will return an empty result in such case.
|
||||||
Nothing ->
|
Nothing ->
|
||||||
new_table = self.java_table.join right.java_table Nothing False False False (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix Comparator.new
|
self.java_table.join right.java_table Nothing False False False (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix object_comparator equality_fallback
|
||||||
Table.Value new_table
|
java_conditions ->
|
||||||
java_conditions ->
|
self.java_table.join right.java_table java_conditions (rows_to_keep.at 0) (rows_to_keep.at 1) (rows_to_keep.at 2) (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix object_comparator equality_fallback
|
||||||
new_table = self.java_table.join right.java_table java_conditions (rows_to_keep.at 0) (rows_to_keep.at 1) (rows_to_keep.at 2) (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix Comparator.new
|
|
||||||
Table.Value new_table
|
on_problems.attach_problems_after (Table.Value new_java_table) <|
|
||||||
|
problems = new_java_table.getProblems
|
||||||
|
Aggregate_Column_Helper.parse_aggregated_problems problems
|
||||||
|
|
||||||
## ALIAS dropna
|
## ALIAS dropna
|
||||||
ALIAS drop_missing_rows
|
ALIAS drop_missing_rows
|
||||||
@ -1504,8 +1515,8 @@ slice_ranges table ranges =
|
|||||||
## PRIVATE
|
## PRIVATE
|
||||||
make_join_helpers left_table right_table =
|
make_join_helpers left_table right_table =
|
||||||
make_equals left right = Java_Join_Equals.new left.java_column right.java_column
|
make_equals left right = Java_Join_Equals.new left.java_column right.java_column
|
||||||
make_equals_ignore_case _ _ =
|
make_equals_ignore_case left right locale =
|
||||||
Unimplemented.throw "Conditions other than Equals are not implemented yet."
|
Java_Join_Equals_Ignore_Case.new left.java_column right.java_column locale.java_locale
|
||||||
make_between _ _ _ =
|
make_between left right_lower right_upper =
|
||||||
Unimplemented.throw "Conditions other than Equals are not implemented yet."
|
Java_Join_Between.new left.java_column right_lower.java_column right_upper.java_column
|
||||||
Join_Helpers.Join_Condition_Resolver.Value (left_table.at _) (right_table.at _) make_equals make_equals_ignore_case make_between
|
Join_Helpers.Join_Condition_Resolver.Value (left_table.at _) (right_table.at _) make_equals make_equals_ignore_case make_between
|
||||||
|
@ -125,11 +125,11 @@ type Invalid_Aggregation
|
|||||||
|
|
||||||
## Indicates that a floating point number was used in a grouping.
|
## Indicates that a floating point number was used in a grouping.
|
||||||
type Floating_Point_Grouping
|
type Floating_Point_Grouping
|
||||||
Error (column:Text) (rows:[Integer])
|
Error (column:Text)
|
||||||
|
|
||||||
to_display_text : Text
|
to_display_text : Text
|
||||||
to_display_text self =
|
to_display_text self =
|
||||||
"Grouping on floating points is not recommended within "+self.column+" at row "+self.row.to_text+"."
|
"Grouping on floating points is not recommended (within "+self.column+")."
|
||||||
|
|
||||||
## Indicates that a text value with a delimiter was included in a concatenation without any quote character
|
## Indicates that a text value with a delimiter was included in a concatenation without any quote character
|
||||||
type Unquoted_Delimiter
|
type Unquoted_Delimiter
|
||||||
|
@ -232,7 +232,7 @@ parse_aggregated_problems problems =
|
|||||||
parsed = Vector.new problems_array.length i->
|
parsed = Vector.new problems_array.length i->
|
||||||
p = problems_array.at i
|
p = problems_array.at i
|
||||||
if Java.is_instance p InvalidAggregation then Invalid_Aggregation.Error p.getColumnName (Vector.from_polyglot_array p.getRows) p.getMessage else
|
if Java.is_instance p InvalidAggregation then Invalid_Aggregation.Error p.getColumnName (Vector.from_polyglot_array p.getRows) p.getMessage else
|
||||||
if Java.is_instance p FloatingPointGrouping then Floating_Point_Grouping.Error p.getColumnName (Vector.from_polyglot_array p.getRows) else
|
if Java.is_instance p FloatingPointGrouping then Floating_Point_Grouping.Error p.getColumnName else
|
||||||
if Java.is_instance p UnquotedDelimiter then Unquoted_Delimiter.Error p.getColumnName (Vector.from_polyglot_array p.getRows) else
|
if Java.is_instance p UnquotedDelimiter then Unquoted_Delimiter.Error p.getColumnName (Vector.from_polyglot_array p.getRows) else
|
||||||
Invalid_Aggregation.Error Nothing -1 "Unknown Error"
|
Invalid_Aggregation.Error Nothing -1 "Unknown Error"
|
||||||
|
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
import Standard.Base.Error.Common.Index_Out_Of_Bounds
|
import Standard.Base.Error.Common.Index_Out_Of_Bounds
|
||||||
|
|
||||||
|
from project.Errors import Invalid_Value_Type
|
||||||
import project.Data.Join_Condition.Join_Condition
|
import project.Data.Join_Condition.Join_Condition
|
||||||
|
import project.Data.Value_Type.Value_Type
|
||||||
import project.Internal.Problem_Builder.Problem_Builder
|
import project.Internal.Problem_Builder.Problem_Builder
|
||||||
|
|
||||||
import project.Errors.No_Such_Column
|
import project.Errors.No_Such_Column
|
||||||
@ -19,7 +21,7 @@ type Join_Condition_Resolver
|
|||||||
`Nothing` will be returned as the conditions indicating that state.
|
`Nothing` will be returned as the conditions indicating that state.
|
||||||
Besides, a list of redundant columns from equality joins is aggregated
|
Besides, a list of redundant columns from equality joins is aggregated
|
||||||
which can be used to deduplicate them.
|
which can be used to deduplicate them.
|
||||||
resolve : Join_Condition | Vector Join_Condition -> Problem_Behavior -> Join_Condition_Resolution
|
resolve : Join_Condition | Text | Vector (Join_Condition | Text) -> Problem_Behavior -> Join_Condition_Resolution
|
||||||
resolve self conditions on_problems =
|
resolve self conditions on_problems =
|
||||||
redundant_names = Vector.new_builder
|
redundant_names = Vector.new_builder
|
||||||
problem_builder = Problem_Builder.new
|
problem_builder = Problem_Builder.new
|
||||||
@ -38,19 +40,27 @@ type Join_Condition_Resolver
|
|||||||
conditions_vector = case conditions of
|
conditions_vector = case conditions of
|
||||||
_ : Vector -> conditions
|
_ : Vector -> conditions
|
||||||
single_condition : Join_Condition -> [single_condition]
|
single_condition : Join_Condition -> [single_condition]
|
||||||
|
handle_equals left_selector right_selector =
|
||||||
|
left = resolve_left left_selector
|
||||||
|
right = resolve_right right_selector
|
||||||
|
if left.is_nothing || right.is_nothing then Nothing else
|
||||||
|
if left.name == right.name then
|
||||||
|
redundant_names.append right.name
|
||||||
|
self.make_equals left right
|
||||||
converted = conditions_vector.map condition-> case condition of
|
converted = conditions_vector.map condition-> case condition of
|
||||||
Join_Condition.Equals left_selector right_selector ->
|
Join_Condition.Equals left_selector right_selector ->
|
||||||
|
handle_equals left_selector right_selector
|
||||||
|
column_name : Text -> handle_equals column_name column_name
|
||||||
|
Join_Condition.Equals_Ignore_Case left_selector right_selector locale ->
|
||||||
left = resolve_left left_selector
|
left = resolve_left left_selector
|
||||||
right = resolve_right right_selector
|
right = resolve_right right_selector
|
||||||
if left.is_nothing || right.is_nothing then Nothing else
|
if left.is_nothing || right.is_nothing then Nothing else
|
||||||
if left.name == right.name then
|
result = Value_Type.expect_text left.value_type <|
|
||||||
redundant_names.append right.name
|
Value_Type.expect_text right.value_type <|
|
||||||
self.make_equals left right
|
self.make_equals_ignore_case left right locale
|
||||||
Join_Condition.Equals_Ignore_Case left_selector right_selector ->
|
result.catch Invalid_Value_Type.Error error->
|
||||||
left = resolve_left left_selector
|
problem_builder.report_other_warning error
|
||||||
right = resolve_right right_selector
|
Nothing
|
||||||
if left.is_nothing || right.is_nothing then Nothing else
|
|
||||||
self.make_equals_ignore_case left right
|
|
||||||
Join_Condition.Between left_selector right_lower_selector right_upper_selector ->
|
Join_Condition.Between left_selector right_lower_selector right_upper_selector ->
|
||||||
left = resolve_left left_selector
|
left = resolve_left left_selector
|
||||||
right_lower = resolve_right right_lower_selector
|
right_lower = resolve_right right_lower_selector
|
||||||
|
@ -614,3 +614,12 @@ filter_blank_rows table when_any treat_nans_as_blank =
|
|||||||
non_missing_mask = missing_mask.not
|
non_missing_mask = missing_mask.not
|
||||||
table.filter non_missing_mask
|
table.filter non_missing_mask
|
||||||
False -> table
|
False -> table
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Checks if the given object implements a Table interface.
|
||||||
|
|
||||||
|
Currently, it relies on a hack - it checks if the object has a method
|
||||||
|
`is_table` and if it returns `True`.
|
||||||
|
is_table obj =
|
||||||
|
known_types = ["Standard.Table.Data.Table.Table", "Standard.Database.Data.Table.Table"]
|
||||||
|
known_types.contains (Meta.get_qualified_type_name obj)
|
||||||
|
@ -190,6 +190,19 @@ type Spec
|
|||||||
is_fail : Boolean
|
is_fail : Boolean
|
||||||
is_fail self = self.behaviors.any .is_fail
|
is_fail self = self.behaviors.any .is_fail
|
||||||
|
|
||||||
|
tests_succeeded : Integer
|
||||||
|
tests_succeeded self = self.behaviors.filter (x-> x.is_success) . length
|
||||||
|
|
||||||
|
tests_failed : Integer
|
||||||
|
tests_failed self = self.behaviors.filter (x-> x.is_fail) . length
|
||||||
|
|
||||||
|
tests_pending : Integer
|
||||||
|
tests_pending self = self.behaviors.filter (x-> x.is_pending) . length
|
||||||
|
|
||||||
|
## Counts how many tests have been executed, not including pending tests.
|
||||||
|
tests_executed : Integer
|
||||||
|
tests_executed self = self.behaviors.filter (b-> b.is_pending.not) . length
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
|
|
||||||
A description of a behaviors in a test.
|
A description of a behaviors in a test.
|
||||||
@ -213,3 +226,9 @@ type Behavior
|
|||||||
Checks if the behavior is a failure.
|
Checks if the behavior is a failure.
|
||||||
is_fail : Boolean
|
is_fail : Boolean
|
||||||
is_fail self = self.result.is_fail
|
is_fail self = self.result.is_fail
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
|
||||||
|
Checks if the behavior is a success.
|
||||||
|
is_success : Boolean
|
||||||
|
is_success self = self.result.is_success
|
||||||
|
@ -57,14 +57,21 @@ print_report spec config builder =
|
|||||||
|
|
||||||
should_print_behavior = config.print_only_failures.not || spec.behaviors.any (b -> b.result.is_fail)
|
should_print_behavior = config.print_only_failures.not || spec.behaviors.any (b -> b.result.is_fail)
|
||||||
if should_print_behavior then
|
if should_print_behavior then
|
||||||
IO.println (spec.name + ": [" + total_time.total_milliseconds.to_text + "ms]")
|
spec_description =
|
||||||
|
counts = spec.tests_succeeded.to_text + "/" + spec.tests_executed.to_text
|
||||||
|
times = total_time.total_milliseconds.to_text + "ms"
|
||||||
|
"[" + counts + ", " + times + "]"
|
||||||
|
IO.println (spec.name + ": " + spec_description)
|
||||||
spec.behaviors.reverse.each behavior->
|
spec.behaviors.reverse.each behavior->
|
||||||
|
make_behavior_description behavior =
|
||||||
|
times = behavior.time_taken.total_milliseconds.to_text + "ms"
|
||||||
|
"[" + times + "]"
|
||||||
case behavior.result of
|
case behavior.result of
|
||||||
Test_Result.Success ->
|
Test_Result.Success ->
|
||||||
if config.print_only_failures.not then
|
if config.print_only_failures.not then
|
||||||
IO.println (" - " + behavior.name + " [" + behavior.time_taken.total_milliseconds.to_text + "ms]")
|
IO.println (" - " + behavior.name + " " + make_behavior_description behavior)
|
||||||
Test_Result.Failure msg details ->
|
Test_Result.Failure msg details ->
|
||||||
IO.println (" - [FAILED] " + behavior.name + " [" + behavior.time_taken.total_milliseconds.to_text + "ms]")
|
IO.println (" - [FAILED] " + behavior.name + " " + make_behavior_description behavior)
|
||||||
IO.println (" Reason: " + msg)
|
IO.println (" Reason: " + msg)
|
||||||
if details.is_nothing.not then
|
if details.is_nothing.not then
|
||||||
IO.println details
|
IO.println details
|
||||||
|
@ -28,3 +28,9 @@ type Test_Result
|
|||||||
is_fail self = case self of
|
is_fail self = case self of
|
||||||
Test_Result.Failure _ _ -> True
|
Test_Result.Failure _ _ -> True
|
||||||
_ -> False
|
_ -> False
|
||||||
|
|
||||||
|
## Checks if the Test_Result is a success.
|
||||||
|
is_success : Boolean
|
||||||
|
is_success self = case self of
|
||||||
|
Test_Result.Success -> True
|
||||||
|
_ -> False
|
||||||
|
@ -38,6 +38,9 @@ type Test_Suite
|
|||||||
run_main ~specs =
|
run_main ~specs =
|
||||||
config = Suite_Config.from_environment
|
config = Suite_Config.from_environment
|
||||||
r = Test_Suite.run specs config
|
r = Test_Suite.run specs config
|
||||||
|
IO.println r.tests_succeeded.to_text+" tests succeeded."
|
||||||
|
IO.println r.tests_failed.to_text+" tests failed."
|
||||||
|
IO.println r.tests_pending.to_text+" tests skipped."
|
||||||
code = if r.is_fail then 1 else 0
|
code = if r.is_fail then 1 else 0
|
||||||
System.exit code
|
System.exit code
|
||||||
|
|
||||||
@ -71,3 +74,12 @@ type Test_Suite
|
|||||||
Checks if the suite contains any failures, and hence fails itself.
|
Checks if the suite contains any failures, and hence fails itself.
|
||||||
is_fail : Boolean
|
is_fail : Boolean
|
||||||
is_fail self = self.specs.any .is_fail
|
is_fail self = self.specs.any .is_fail
|
||||||
|
|
||||||
|
tests_succeeded : Integer
|
||||||
|
tests_succeeded self = self.specs.map .tests_succeeded . to_vector . compute Statistic.Sum . floor
|
||||||
|
|
||||||
|
tests_failed : Integer
|
||||||
|
tests_failed self = self.specs.map .tests_failed . to_vector . compute Statistic.Sum . floor
|
||||||
|
|
||||||
|
tests_pending : Integer
|
||||||
|
tests_pending self = self.specs.map .tests_pending . to_vector . compute Statistic.Sum . floor
|
||||||
|
@ -46,7 +46,12 @@ public class MultiValueIndex {
|
|||||||
MultiValueKeyBase key = keyFactory.apply(i);
|
MultiValueKeyBase key = keyFactory.apply(i);
|
||||||
|
|
||||||
if (key.hasFloatValues()) {
|
if (key.hasFloatValues()) {
|
||||||
problems.add(new FloatingPointGrouping("GroupBy", i));
|
final int row = i;
|
||||||
|
key.floatColumnPositions()
|
||||||
|
.forEach(
|
||||||
|
columnIx -> {
|
||||||
|
problems.add(new FloatingPointGrouping(keyColumns[columnIx].getName(), row));
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
List<Integer> ids = this.locs.computeIfAbsent(key, x -> new ArrayList<>());
|
List<Integer> ids = this.locs.computeIfAbsent(key, x -> new ArrayList<>());
|
||||||
@ -170,6 +175,10 @@ public class MultiValueIndex {
|
|||||||
return new Table(output, merged);
|
return new Table(output, merged);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public AggregatedProblems getProblems() {
|
||||||
|
return problems;
|
||||||
|
}
|
||||||
|
|
||||||
public int[] makeOrderMap(int rowCount) {
|
public int[] makeOrderMap(int rowCount) {
|
||||||
if (this.locs.size() == 0) {
|
if (this.locs.size() == 0) {
|
||||||
return new int[0];
|
return new int[0];
|
||||||
|
@ -2,6 +2,9 @@ package org.enso.table.data.index;
|
|||||||
|
|
||||||
import org.enso.table.data.column.storage.Storage;
|
import org.enso.table.data.column.storage.Storage;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/** The base class for keys used for sorting/grouping rows by a set of columns. */
|
/** The base class for keys used for sorting/grouping rows by a set of columns. */
|
||||||
public abstract class MultiValueKeyBase {
|
public abstract class MultiValueKeyBase {
|
||||||
protected final Storage<?>[] storages;
|
protected final Storage<?>[] storages;
|
||||||
@ -62,4 +65,19 @@ public abstract class MultiValueKeyBase {
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds which columns contain a float value at this index position and returns their positions in
|
||||||
|
* this index.
|
||||||
|
*/
|
||||||
|
public List<Integer> floatColumnPositions() {
|
||||||
|
List<Integer> result = new ArrayList<>();
|
||||||
|
for (int i = 0; i < storages.length; i++) {
|
||||||
|
Object value = this.get(i);
|
||||||
|
if (isFloatingPoint(value)) {
|
||||||
|
result.add(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,6 +14,7 @@ import org.enso.table.data.index.MultiValueIndex;
|
|||||||
import org.enso.table.data.mask.OrderMask;
|
import org.enso.table.data.mask.OrderMask;
|
||||||
import org.enso.table.data.mask.SliceRange;
|
import org.enso.table.data.mask.SliceRange;
|
||||||
import org.enso.table.data.table.join.*;
|
import org.enso.table.data.table.join.*;
|
||||||
|
import org.enso.table.data.table.join.scan.ScanJoin;
|
||||||
import org.enso.table.data.table.problems.AggregatedProblems;
|
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||||
import org.enso.table.error.NoSuchColumnException;
|
import org.enso.table.error.NoSuchColumnException;
|
||||||
import org.enso.table.error.UnexpectedColumnTypeException;
|
import org.enso.table.error.UnexpectedColumnTypeException;
|
||||||
@ -21,6 +22,7 @@ import org.enso.table.operations.Distinct;
|
|||||||
import org.enso.table.util.NameDeduplicator;
|
import org.enso.table.util.NameDeduplicator;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import java.util.function.BiFunction;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.IntStream;
|
import java.util.stream.IntStream;
|
||||||
|
|
||||||
@ -285,14 +287,18 @@ public class Table {
|
|||||||
*
|
*
|
||||||
* {@code rightColumnsToDrop} allows to drop columns from the right table that are redundant when joining on equality of equally named columns.
|
* {@code rightColumnsToDrop} allows to drop columns from the right table that are redundant when joining on equality of equally named columns.
|
||||||
*/
|
*/
|
||||||
public Table join(Table right, List<JoinCondition> conditions, boolean keepLeftUnmatched, boolean keepMatched, boolean keepRightUnmatched, boolean includeLeftColumns, boolean includeRightColumns, List<String> rightColumnsToDrop, String right_prefix, Comparator<Object> comparator) {
|
public Table join(Table right, List<JoinCondition> conditions, boolean keepLeftUnmatched, boolean keepMatched, boolean keepRightUnmatched, boolean includeLeftColumns, boolean includeRightColumns, List<String> rightColumnsToDrop, String right_prefix, Comparator<Object> objectComparator, BiFunction<Object, Object, Boolean> equalityFallback) {
|
||||||
// TODO adding prefix for right columns
|
// TODO adding prefix for right columns
|
||||||
NameDeduplicator deduplicator = new NameDeduplicator();
|
NameDeduplicator deduplicator = new NameDeduplicator();
|
||||||
|
|
||||||
JoinStrategy strategy = new IndexJoin(comparator);
|
JoinResult joinResult = null;
|
||||||
|
|
||||||
// Only compute the join if there are any results to be returned.
|
// Only compute the join if there are any results to be returned.
|
||||||
JoinResult joinResult = (keepLeftUnmatched || keepMatched || keepRightUnmatched) ? strategy.join(this, right, conditions) : null;
|
if (keepLeftUnmatched || keepMatched || keepRightUnmatched) {
|
||||||
|
// TODO We'll want a mixed strategy doing Index for supported conditions and then scanning on subgroups. For now Index works only for the simple happy path.
|
||||||
|
boolean allCanUseIndex = conditions.stream().allMatch(IndexJoin::isSupported);
|
||||||
|
JoinStrategy strategy = allCanUseIndex ? new IndexJoin(objectComparator) : new ScanJoin(objectComparator, equalityFallback);
|
||||||
|
joinResult = strategy.join(this, right, conditions);
|
||||||
|
}
|
||||||
|
|
||||||
List<Integer> leftRows = new ArrayList<>();
|
List<Integer> leftRows = new ArrayList<>();
|
||||||
List<Integer> rightRows = new ArrayList<>();
|
List<Integer> rightRows = new ArrayList<>();
|
||||||
@ -360,7 +366,8 @@ public class Table {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new Table(newColumns.toArray(new Column[0]));
|
AggregatedProblems problems = joinResult != null ? joinResult.problems() : new AggregatedProblems();
|
||||||
|
return new Table(newColumns.toArray(new Column[0]), problems);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -0,0 +1,5 @@
|
|||||||
|
package org.enso.table.data.table.join;
|
||||||
|
|
||||||
|
import org.enso.table.data.table.Column;
|
||||||
|
|
||||||
|
public record Between(Column left, Column rightLower, Column rightUpper) implements JoinCondition {}
|
@ -0,0 +1,7 @@
|
|||||||
|
package org.enso.table.data.table.join;
|
||||||
|
|
||||||
|
import org.enso.table.data.table.Column;
|
||||||
|
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
|
public record EqualsIgnoreCase(Column left, Column right, Locale locale) implements JoinCondition {}
|
@ -1,8 +1,10 @@
|
|||||||
package org.enso.table.data.table.join;
|
package org.enso.table.data.table.join;
|
||||||
|
|
||||||
|
import org.enso.table.data.column.storage.Storage;
|
||||||
import org.enso.table.data.index.MultiValueIndex;
|
import org.enso.table.data.index.MultiValueIndex;
|
||||||
import org.enso.table.data.table.Column;
|
import org.enso.table.data.table.Column;
|
||||||
import org.enso.table.data.table.Table;
|
import org.enso.table.data.table.Table;
|
||||||
|
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||||
import org.graalvm.collections.Pair;
|
import org.graalvm.collections.Pair;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -24,30 +26,41 @@ public class IndexJoin implements JoinStrategy {
|
|||||||
.filter(c -> c != null)
|
.filter(c -> c != null)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
if (equalConditions.size() != conditions.size()) {
|
if (equalConditions.size() != conditions.size()) {
|
||||||
return new ScanJoin().join(left, right, conditions);
|
throw new IllegalArgumentException("Currently conditions other than Equals are not supported in index-joins.");
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
var leftEquals = equalConditions.stream().map(Equals::left).toArray(Column[]::new);
|
||||||
var leftEquals = equalConditions.stream().map(Equals::left).toArray(Column[]::new);
|
var leftIndex = new MultiValueIndex(leftEquals, left.rowCount(), comparator);
|
||||||
var leftIndex = new MultiValueIndex(leftEquals, left.rowCount(), comparator);
|
|
||||||
|
|
||||||
var rightEquals = equalConditions.stream().map(Equals::right).toArray(Column[]::new);
|
var rightEquals = equalConditions.stream().map(Equals::right).toArray(Column[]::new);
|
||||||
var rightIndex = new MultiValueIndex(rightEquals, right.rowCount(), comparator);
|
var rightIndex = new MultiValueIndex(rightEquals, right.rowCount(), comparator);
|
||||||
|
|
||||||
List<Pair<Integer, Integer>> matches = new ArrayList<>();
|
List<Pair<Integer, Integer>> matches = new ArrayList<>();
|
||||||
for (var leftKey : leftIndex.keys()) {
|
for (var leftKey : leftIndex.keys()) {
|
||||||
if (rightIndex.contains(leftKey)) {
|
if (rightIndex.contains(leftKey)) {
|
||||||
for (var leftRow : leftIndex.get(leftKey)) {
|
for (var leftRow : leftIndex.get(leftKey)) {
|
||||||
for (var rightRow : rightIndex.get(leftKey)) {
|
for (var rightRow : rightIndex.get(leftKey)) {
|
||||||
matches.add(Pair.create(leftRow, rightRow));
|
matches.add(Pair.create(leftRow, rightRow));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new JoinResult(matches);
|
|
||||||
} catch (IllegalStateException e) {
|
|
||||||
// Fallback for custom objects
|
|
||||||
return new ScanJoin().join(left, right, conditions);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AggregatedProblems problems = AggregatedProblems.merge(new AggregatedProblems[]{
|
||||||
|
leftIndex.getProblems(),
|
||||||
|
rightIndex.getProblems()
|
||||||
|
});
|
||||||
|
return new JoinResult(matches, problems);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isSupported(JoinCondition condition) {
|
||||||
|
if (condition instanceof Equals eq) {
|
||||||
|
// Currently hashing works only for builtin types.
|
||||||
|
return isBuiltinType(eq.left().getStorage()) && isBuiltinType(eq.right().getStorage());
|
||||||
|
} else return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isBuiltinType(Storage<?> storage) {
|
||||||
|
return storage.getType() != Storage.Type.OBJECT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
package org.enso.table.data.table.join;
|
package org.enso.table.data.table.join;
|
||||||
|
|
||||||
|
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||||
import org.graalvm.collections.Pair;
|
import org.graalvm.collections.Pair;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public record JoinResult(List<Pair<Integer, Integer>> matchedRows) {}
|
public record JoinResult(List<Pair<Integer, Integer>> matchedRows, AggregatedProblems problems) {}
|
||||||
|
@ -1,41 +0,0 @@
|
|||||||
package org.enso.table.data.table.join;
|
|
||||||
|
|
||||||
import org.enso.table.data.table.Table;
|
|
||||||
import org.graalvm.collections.Pair;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class ScanJoin implements JoinStrategy {
|
|
||||||
@Override
|
|
||||||
public JoinResult join(Table left, Table right, List<JoinCondition> conditions) {
|
|
||||||
List<Pair<Integer, Integer>> matches = new ArrayList<>();
|
|
||||||
int ls = left.rowCount();
|
|
||||||
int rs = right.rowCount();
|
|
||||||
for (int l = 0; l < ls; ++l) {
|
|
||||||
for (int r = 0; r < rs; ++r) {
|
|
||||||
boolean match = true;
|
|
||||||
conditions: for (JoinCondition condition : conditions) {
|
|
||||||
switch (condition) {
|
|
||||||
case Equals eq -> {
|
|
||||||
Object leftValue = eq.left().getStorage().getItemBoxed(l);
|
|
||||||
Object rightValue = eq.right().getStorage().getItemBoxed(r);
|
|
||||||
// TODO normalize equality of strings and decimals with ints
|
|
||||||
if (!leftValue.equals(rightValue)) {
|
|
||||||
match = false;
|
|
||||||
break conditions;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
default -> throw new UnsupportedOperationException("Unsupported join condition: " + condition);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (match) {
|
|
||||||
matches.add(Pair.create(l, r));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new JoinResult(matches);
|
|
||||||
}
|
|
||||||
}
|
|
@ -0,0 +1,11 @@
|
|||||||
|
package org.enso.table.data.table.join.scan;
|
||||||
|
|
||||||
|
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||||
|
|
||||||
|
public interface Matcher {
|
||||||
|
boolean matches(int left, int right);
|
||||||
|
|
||||||
|
default AggregatedProblems getProblems() {
|
||||||
|
return AggregatedProblems.of();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,144 @@
|
|||||||
|
package org.enso.table.data.table.join.scan;
|
||||||
|
|
||||||
|
import org.enso.base.Text_Utils;
|
||||||
|
import org.enso.base.polyglot.NumericConverter;
|
||||||
|
import org.enso.table.data.column.storage.Storage;
|
||||||
|
import org.enso.table.data.column.storage.StringStorage;
|
||||||
|
import org.enso.table.data.table.join.Between;
|
||||||
|
import org.enso.table.data.table.join.Equals;
|
||||||
|
import org.enso.table.data.table.join.EqualsIgnoreCase;
|
||||||
|
import org.enso.table.data.table.join.JoinCondition;
|
||||||
|
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||||
|
import org.enso.table.data.table.problems.FloatingPointGrouping;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.function.BiFunction;
|
||||||
|
|
||||||
|
public class MatcherFactory {
|
||||||
|
private final Comparator<Object> objectComparator;
|
||||||
|
private final BiFunction<Object, Object, Boolean> equalityFallback;
|
||||||
|
|
||||||
|
public MatcherFactory(Comparator<Object> objectComparator, BiFunction<Object, Object, Boolean> equalityFallback) {
|
||||||
|
this.objectComparator = objectComparator;
|
||||||
|
this.equalityFallback = equalityFallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Matcher create(JoinCondition condition) {
|
||||||
|
return switch (condition) {
|
||||||
|
case Equals eq -> new EqualsMatcher(eq, equalityFallback);
|
||||||
|
case EqualsIgnoreCase eq -> new EqualsIgnoreCaseMatcher(eq);
|
||||||
|
case Between between -> new BetweenMatcher(between, objectComparator);
|
||||||
|
default -> throw new UnsupportedOperationException("Unsupported join condition: " + condition);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class EqualsMatcher implements Matcher {
|
||||||
|
|
||||||
|
private final BiFunction<Object, Object, Boolean> equalityFallback;
|
||||||
|
private final Storage<?> leftStorage;
|
||||||
|
private final Storage<?> rightStorage;
|
||||||
|
private final String leftColumnName;
|
||||||
|
private final String rightColumnName;
|
||||||
|
|
||||||
|
private final AggregatedProblems problems;
|
||||||
|
|
||||||
|
public EqualsMatcher(Equals eq, BiFunction<Object, Object, Boolean> equalityFallback) {
|
||||||
|
leftStorage = eq.left().getStorage();
|
||||||
|
rightStorage = eq.right().getStorage();
|
||||||
|
leftColumnName = eq.left().getName();
|
||||||
|
rightColumnName = eq.right().getName();
|
||||||
|
this.equalityFallback = equalityFallback;
|
||||||
|
problems = new AggregatedProblems();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean matches(int left, int right) {
|
||||||
|
Object leftValue = leftStorage.getItemBoxed(left);
|
||||||
|
Object rightValue = rightStorage.getItemBoxed(right);
|
||||||
|
|
||||||
|
if (NumericConverter.isCoercibleToDouble(leftValue)) {
|
||||||
|
problems.add(new FloatingPointGrouping(leftColumnName, left));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (NumericConverter.isCoercibleToDouble(rightValue)) {
|
||||||
|
problems.add(new FloatingPointGrouping(rightColumnName, right));
|
||||||
|
}
|
||||||
|
|
||||||
|
// We could do a fast-path for some known primitive types, but it doesn't matter as it will be replaced with hashing soon anyway.
|
||||||
|
return equalityFallback.apply(leftValue, rightValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AggregatedProblems getProblems() {
|
||||||
|
return problems;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class EqualsIgnoreCaseMatcher implements Matcher {
|
||||||
|
private final StringStorage leftStorage;
|
||||||
|
private final StringStorage rightStorage;
|
||||||
|
|
||||||
|
private final Locale locale;
|
||||||
|
public EqualsIgnoreCaseMatcher(EqualsIgnoreCase eq) {
|
||||||
|
if (eq.left().getStorage() instanceof StringStorage leftStrings) {
|
||||||
|
leftStorage = leftStrings;
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("Expected left column to have type Text.");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (eq.right().getStorage() instanceof StringStorage rightStrings) {
|
||||||
|
rightStorage = rightStrings;
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("Expected right column to have type Text.");
|
||||||
|
}
|
||||||
|
|
||||||
|
locale = eq.locale();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean matches(int left, int right) {
|
||||||
|
String leftValue = leftStorage.getItem(left);
|
||||||
|
String rightValue = rightStorage.getItem(right);
|
||||||
|
|
||||||
|
if (leftValue == null && rightValue == null) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (leftValue == null || rightValue == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Text_Utils.equals_ignore_case(leftValue, rightValue, locale);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class BetweenMatcher implements Matcher {
|
||||||
|
|
||||||
|
private final Comparator<Object> objectComparator;
|
||||||
|
private final Storage<?> leftStorage;
|
||||||
|
private final Storage<?> rightLowerStorage;
|
||||||
|
private final Storage<?> rightUpperStorage;
|
||||||
|
public BetweenMatcher(Between between, Comparator<Object> objectComparator) {
|
||||||
|
this.objectComparator = objectComparator;
|
||||||
|
leftStorage = between.left().getStorage();
|
||||||
|
rightLowerStorage = between.rightLower().getStorage();
|
||||||
|
rightUpperStorage = between.rightUpper().getStorage();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean matches(int left, int right) {
|
||||||
|
Object leftValue = leftStorage.getItemBoxed(left);
|
||||||
|
Object rightLowerValue = rightLowerStorage.getItemBoxed(right);
|
||||||
|
Object rightUpperValue = rightUpperStorage.getItemBoxed(right);
|
||||||
|
|
||||||
|
// If any value is missing, such a pair of rows is never correlated with Between as we assume the ordering is not well-defined for missing values.
|
||||||
|
if (leftValue == null || rightLowerValue == null || rightUpperValue == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We could do a fast-path for some known primitive types, but it doesn't matter as it should be replaced with sorting optimization soon(ish).
|
||||||
|
return objectComparator.compare(leftValue, rightLowerValue) >= 0 && objectComparator.compare(leftValue, rightUpperValue) <= 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,55 @@
|
|||||||
|
package org.enso.table.data.table.join.scan;
|
||||||
|
|
||||||
|
import org.enso.table.data.table.Table;
|
||||||
|
import org.enso.table.data.table.join.*;
|
||||||
|
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||||
|
import org.graalvm.collections.Pair;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.function.BiFunction;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
public class ScanJoin implements JoinStrategy {
|
||||||
|
|
||||||
|
private final Comparator<Object> objectComparator;
|
||||||
|
private final BiFunction<Object, Object, Boolean> equalityFallback;
|
||||||
|
|
||||||
|
public ScanJoin(
|
||||||
|
Comparator<Object> objectComparator, BiFunction<Object, Object, Boolean> equalityFallback) {
|
||||||
|
this.objectComparator = objectComparator;
|
||||||
|
this.equalityFallback = equalityFallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public JoinResult join(Table left, Table right, List<JoinCondition> conditions) {
|
||||||
|
List<Pair<Integer, Integer>> matches = new ArrayList<>();
|
||||||
|
int ls = left.rowCount();
|
||||||
|
int rs = right.rowCount();
|
||||||
|
|
||||||
|
MatcherFactory factory = new MatcherFactory(objectComparator, equalityFallback);
|
||||||
|
List<Matcher> matchers = conditions.stream().map(factory::create).collect(Collectors.toList());
|
||||||
|
|
||||||
|
for (int l = 0; l < ls; ++l) {
|
||||||
|
for (int r = 0; r < rs; ++r) {
|
||||||
|
boolean match = true;
|
||||||
|
for (Matcher matcher : matchers) {
|
||||||
|
if (!matcher.matches(l, r)) {
|
||||||
|
match = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (match) {
|
||||||
|
matches.add(Pair.create(l, r));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AggregatedProblems problems =
|
||||||
|
AggregatedProblems.merge(
|
||||||
|
matchers.stream().map(Matcher::getProblems).toArray(AggregatedProblems[]::new));
|
||||||
|
return new JoinResult(matches, problems);
|
||||||
|
}
|
||||||
|
}
|
@ -65,4 +65,12 @@ public class AggregatedProblems {
|
|||||||
|
|
||||||
return new AggregatedProblems(merged, count);
|
return new AggregatedProblems(merged, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static AggregatedProblems of(Problem... problems) {
|
||||||
|
AggregatedProblems result = new AggregatedProblems();
|
||||||
|
for (Problem p : problems) {
|
||||||
|
result.add(p);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1244,7 +1244,7 @@ spec setup =
|
|||||||
Test.specify "should warn if grouping on a floating point" <|
|
Test.specify "should warn if grouping on a floating point" <|
|
||||||
action = table.aggregate [Group_By 1] on_problems=_
|
action = table.aggregate [Group_By 1] on_problems=_
|
||||||
# All rows are marked as floating point, because the integers get coerced to double when stored in DoubleStorage
|
# All rows are marked as floating point, because the integers get coerced to double when stored in DoubleStorage
|
||||||
problems = [Floating_Point_Grouping.Error "GroupBy" [0, 1, 2]]
|
problems = [Floating_Point_Grouping.Error "Value"]
|
||||||
tester = expect_column_names ["Value"]
|
tester = expect_column_names ["Value"]
|
||||||
Problems.test_problem_handling action problems tester
|
Problems.test_problem_handling action problems tester
|
||||||
|
|
||||||
@ -1315,6 +1315,7 @@ spec setup =
|
|||||||
problems = Warning.get_all new_table . map .value
|
problems = Warning.get_all new_table . map .value
|
||||||
problems.length . should_equal 1
|
problems.length . should_equal 1
|
||||||
problems.at 0 . is_a Invalid_Aggregation.Error . should_be_true
|
problems.at 0 . is_a Invalid_Aggregation.Error . should_be_true
|
||||||
|
problems.at 0 . column . should_equal "Concatenate Value"
|
||||||
problems.at 0 . rows . length . should_equal 15
|
problems.at 0 . rows . length . should_equal 15
|
||||||
|
|
||||||
Test.specify "should merge Floating Point Grouping warnings" <|
|
Test.specify "should merge Floating Point Grouping warnings" <|
|
||||||
@ -1322,7 +1323,7 @@ spec setup =
|
|||||||
problems = Warning.get_all new_table . map .value
|
problems = Warning.get_all new_table . map .value
|
||||||
problems.length . should_equal 1
|
problems.length . should_equal 1
|
||||||
problems.at 0 . is_a Floating_Point_Grouping.Error . should_be_true
|
problems.at 0 . is_a Floating_Point_Grouping.Error . should_be_true
|
||||||
problems.at 0 . rows . length . should_equal 15
|
problems.at 0 . column . should_equal "Float"
|
||||||
|
|
||||||
if is_database then
|
if is_database then
|
||||||
Test.group prefix+"Table.aggregate should report unsupported operations but not block other aggregations in warning mode" <|
|
Test.group prefix+"Table.aggregate should report unsupported operations but not block other aggregations in warning mode" <|
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
|
import Standard.Base.Error.Illegal_State.Illegal_State
|
||||||
|
|
||||||
import Standard.Table.Data.Join_Condition.Join_Condition
|
import Standard.Table.Data.Join_Condition.Join_Condition
|
||||||
import Standard.Table.Data.Join_Kind.Join_Kind
|
import Standard.Table.Data.Join_Kind.Join_Kind
|
||||||
|
import Standard.Table.Data.Value_Type.Value_Type
|
||||||
from Standard.Table.Errors import all
|
from Standard.Table.Errors import all
|
||||||
|
|
||||||
from Standard.Test import Test, Problems
|
from Standard.Test import Test, Problems
|
||||||
@ -9,6 +11,16 @@ import Standard.Test.Extensions
|
|||||||
|
|
||||||
from project.Common_Table_Operations.Util import expect_column_names, run_default_backend
|
from project.Common_Table_Operations.Util import expect_column_names, run_default_backend
|
||||||
|
|
||||||
|
type My_Type
|
||||||
|
Value x y
|
||||||
|
|
||||||
|
compare_to self other = case other of
|
||||||
|
My_Type.Value ox oy ->
|
||||||
|
self.x+self.y . compare_to ox+oy
|
||||||
|
_ -> Ordering.Less
|
||||||
|
|
||||||
|
== self other = self.compare_to other == Ordering.Equal
|
||||||
|
|
||||||
main = run_default_backend spec
|
main = run_default_backend spec
|
||||||
|
|
||||||
spec setup =
|
spec setup =
|
||||||
@ -61,18 +73,173 @@ spec setup =
|
|||||||
t7.at "Z" . to_vector . should_equal [4]
|
t7.at "Z" . to_vector . should_equal [4]
|
||||||
t7.at "W" . to_vector . should_equal [7]
|
t7.at "W" . to_vector . should_equal [7]
|
||||||
|
|
||||||
Test.specify "should allow to join on equality of multiple columns and drop redundant columns" <|
|
t3 = table_builder [["X", [1, 1, 1, 2, 2, 2]], ["Y", ["A", "B", "B", "C", "C", "A"]], ["Z", [1, 2, 3, 4, 5, 6]]]
|
||||||
t1 = table_builder [["X", [1, 1, 1, 2, 2, 2]], ["Y", ["A", "B", "B", "C", "C", "A"]], ["Z", [1, 2, 3, 4, 5, 6]]]
|
t4 = table_builder [["X", [1, 1, 3, 2, 2, 4]], ["Y", ["B", "B", "C", "C", "D", "A"]], ["Z", [1, 2, 3, 4, 5, 6]]]
|
||||||
t2 = table_builder [["X", [1, 1, 3, 2, 2, 4]], ["Y", ["B", "B", "C", "C", "D", "A"]], ["Z", [1, 2, 3, 4, 5, 6]]]
|
check_xy_joined r =
|
||||||
|
|
||||||
conditions = [Join_Condition.Equals "Y" "Y", Join_Condition.Equals "X" "X"]
|
|
||||||
# TODO later we'll want `Right_Z` instead of `Z_1`
|
# TODO later we'll want `Right_Z` instead of `Z_1`
|
||||||
t3 = t1.join t2 on=conditions . order_by ["X", "Y", "Z", "Z_1"]
|
expect_column_names ["X", "Y", "Z", "Z_1"] r
|
||||||
expect_column_names ["X", "Y", "Z", "Z_1"] t3
|
r.at "X" . to_vector . should_equal [1, 1, 1, 1, 2, 2]
|
||||||
t3.at "X" . to_vector . should_equal [1, 1, 1, 1, 2, 2]
|
r.at "Y" . to_vector . should_equal ["B", "B", "B", "B", "C", "C"]
|
||||||
t3.at "Y" . to_vector . should_equal ["B", "B", "B", "B", "C", "C"]
|
r.at "Z" . to_vector . should_equal [2, 2, 3, 3, 4, 5]
|
||||||
t3.at "Z" . to_vector . should_equal [2, 2, 3, 3, 4, 5]
|
r.at "Z_1" . to_vector . should_equal [1, 2, 1, 2, 4, 4]
|
||||||
t3.at "Z_1" . to_vector . should_equal [1, 2, 1, 2, 4, 4]
|
|
||||||
|
Test.specify "should allow to join on equality of multiple columns and drop redundant columns" <|
|
||||||
|
conditions = [Join_Condition.Equals "Y" "Y", Join_Condition.Equals "X" "X"]
|
||||||
|
r = t3.join t4 on=conditions . order_by ["X", "Y", "Z", "Z_1"]
|
||||||
|
check_xy_joined r
|
||||||
|
|
||||||
|
Test.specify "should support same-name column join shorthand" <|
|
||||||
|
r = t3.join t4 on=["X", "Y"] . order_by ["X", "Y", "Z", "Z_1"]
|
||||||
|
check_xy_joined r
|
||||||
|
|
||||||
|
Test.specify "should allow to join on text equality ignoring case" <|
|
||||||
|
t1 = table_builder [["X", ["a", "B"]], ["Y", [1, 2]]]
|
||||||
|
t2 = table_builder [["X", ["A", "a", "b"]], ["Z", [1, 2, 3]]]
|
||||||
|
|
||||||
|
r1 = t1.join t2
|
||||||
|
expect_column_names ["X", "Y", "Z"] r1
|
||||||
|
r1 . at "X" . to_vector . should_equal ["a"]
|
||||||
|
r1 . at "Y" . to_vector . should_equal [1]
|
||||||
|
r1 . at "Z" . to_vector . should_equal [2]
|
||||||
|
|
||||||
|
r2 = t1.join t2 on=(Join_Condition.Equals_Ignore_Case "X") . order_by ["Z"]
|
||||||
|
# TODO rename to Right_X
|
||||||
|
expect_column_names ["X", "Y", "X_1", "Z"] r2
|
||||||
|
r2 . at "X" . to_vector . should_equal ["a", "a", "B"]
|
||||||
|
r2 . at "X_1" . to_vector . should_equal ["A", "a", "b"]
|
||||||
|
r2 . at "Y" . to_vector . should_equal [1, 1, 2]
|
||||||
|
r2 . at "Z" . to_vector . should_equal [1, 2, 3]
|
||||||
|
|
||||||
|
if setup.test_selection.supports_unicode_normalization then
|
||||||
|
Test.specify "should correctly handle Unicode equality" <|
|
||||||
|
t1 = table_builder [["X", ['s\u0301', 'S\u0301']], ["Y", [1, 2]]]
|
||||||
|
t2 = table_builder [["X", ['s', 'S', 'ś']], ["Z", [1, 2, 3]]]
|
||||||
|
|
||||||
|
r1 = t1.join t2
|
||||||
|
expect_column_names ["X", "Y", "Z"] r1
|
||||||
|
r1 . at "X" . to_vector . should_equal ['ś']
|
||||||
|
r1 . at "Y" . to_vector . should_equal [1]
|
||||||
|
r1 . at "Z" . to_vector . should_equal [3]
|
||||||
|
|
||||||
|
r2 = t1.join t2 on=(Join_Condition.Equals_Ignore_Case "X") . order_by ["Y"]
|
||||||
|
# TODO rename to Right_X
|
||||||
|
expect_column_names ["X", "Y", "X_1", "Z"] r2
|
||||||
|
r2 . at "X" . to_vector . should_equal ['s\u0301', 'S\u0301']
|
||||||
|
r2 . at "X_1" . to_vector . should_equal ['ś', 'ś']
|
||||||
|
r2 . at "Y" . to_vector . should_equal [1, 2]
|
||||||
|
r2 . at "Z" . to_vector . should_equal [3, 3]
|
||||||
|
|
||||||
|
# This may need a test_selection toggle in the future, depending on how well databases like coercing decimals and integers.
|
||||||
|
Test.specify "should correctly handle Enso Decimal-Integer equality" <|
|
||||||
|
t1 = table_builder [["X", [1, 2]], ["Y", [10, 20]]]
|
||||||
|
t2 = table_builder [["X", [2.0, 2.1, 0.0]], ["Z", [1, 2, 3]]]
|
||||||
|
|
||||||
|
r1 = t1.join t2
|
||||||
|
expect_column_names ["X", "Y", "Z"] r1
|
||||||
|
r1 . at "X" . to_vector . should_equal [2]
|
||||||
|
r1 . at "Y" . to_vector . should_equal [20]
|
||||||
|
r1 . at "Z" . to_vector . should_equal [1]
|
||||||
|
|
||||||
|
if setup.supports_custom_objects then
|
||||||
|
Test.specify "should allow equality joins for custom objects" <|
|
||||||
|
t1 = table_builder [["X", [My_Type.Value 1 2, My_Type.Value 2 3]], ["Y", [1, 2]]]
|
||||||
|
t2 = table_builder [["X", [My_Type.Value 5 0, My_Type.Value 2 1]], ["Z", [10, 20]]]
|
||||||
|
|
||||||
|
r1 = t1.join t2 . order_by ["Y"]
|
||||||
|
expect_column_names ["X", "Y", "Z"] r1
|
||||||
|
r1 . at "X" . to_vector . should_equal [My_Type.Value 1 2, My_Type.Value 2 3]
|
||||||
|
## We don't keep the other column, because the values in both
|
||||||
|
are equal. However, with custom comparators, they may not be
|
||||||
|
the same values, so we may consider keeping it. For not it is
|
||||||
|
dropped though for consistency.
|
||||||
|
# r1 . at "Right_X" . to_vector . should_equal [My_Type.Value 1 2, My_Type.Value 2 3]
|
||||||
|
r1 . at "Y" . to_vector . should_equal [1, 2]
|
||||||
|
r1 . at "Z" . to_vector . should_equal [20, 10]
|
||||||
|
|
||||||
|
Test.specify "should allow range-based joins (using Between) for numbers" <|
|
||||||
|
t1 = table_builder [["X", [1, 10, 12]], ["Y", [1, 2, 3]]]
|
||||||
|
t2 = table_builder [["lower", [1, 10, 8, 12]], ["upper", [1, 12, 30, 0]], ["Z", [1, 2, 3, 4]]]
|
||||||
|
|
||||||
|
r1 = t1.join t2 on=(Join_Condition.Between "X" "lower" "upper") . order_by ["X", "Z"]
|
||||||
|
expect_column_names ["X", "Y", "lower", "upper", "Z"] r1
|
||||||
|
r1 . at "X" . to_vector . should_equal [1, 10, 10, 12, 12]
|
||||||
|
r1 . at "Y" . to_vector . should_equal [1, 2, 2, 3, 3]
|
||||||
|
r1 . at "lower" . to_vector . should_equal [1, 10, 8, 10, 8]
|
||||||
|
r1 . at "upper" . to_vector . should_equal [1, 12, 30, 12, 30]
|
||||||
|
r1 . at "Z" . to_vector . should_equal [1, 2, 3, 2, 3]
|
||||||
|
|
||||||
|
Test.specify "should allow range-based joins (using Between) for text" <|
|
||||||
|
t1 = table_builder [["X", ["a", "b", "c"]], ["Y", [1, 2, 3]]]
|
||||||
|
t2 = table_builder [["lower", ["a", "b"]], ["upper", ["a", "ccc"]], ["Z", [10, 20]]]
|
||||||
|
|
||||||
|
r1 = t1.join t2 on=(Join_Condition.Between "X" "lower" "upper") . order_by ["X", "Z"]
|
||||||
|
expect_column_names ["X", "Y", "lower", "upper", "Z"] r1
|
||||||
|
r1 . at "X" . to_vector . should_equal ["a", "b", "c"]
|
||||||
|
r1 . at "Y" . to_vector . should_equal [1, 2, 3]
|
||||||
|
r1 . at "lower" . to_vector . should_equal ["a", "b", "b"]
|
||||||
|
r1 . at "upper" . to_vector . should_equal ["a", "ccc", "ccc"]
|
||||||
|
r1 . at "Z" . to_vector . should_equal [10, 20, 20]
|
||||||
|
|
||||||
|
if setup.test_selection.supports_unicode_normalization then
|
||||||
|
Test.specify "should allow range-based joins (using Between) for text with Unicode normalization" <|
|
||||||
|
t1 = table_builder [["X", ['s\u0301', 's']], ["Y", [1, 2]]]
|
||||||
|
t2 = table_builder [["lower", ['s', 'ś']], ["upper", ['sa', 'ś']], ["Z", [10, 20]]]
|
||||||
|
|
||||||
|
r1 = t1.join t2 on=(Join_Condition.Between "X" "lower" "upper") . order_by ["Y"]
|
||||||
|
expect_column_names ["X", "Y", "lower", "upper", "Z"] r1
|
||||||
|
r1 . at "X" . to_vector . should_equal ['s\u0301', 's']
|
||||||
|
r1 . at "Y" . to_vector . should_equal [1, 2]
|
||||||
|
r1 . at "lower" . to_vector . should_equal ['ś', 's']
|
||||||
|
r1 . at "upper" . to_vector . should_equal ['ś', 'sa']
|
||||||
|
r1 . at "Z" . to_vector . should_equal [20, 10]
|
||||||
|
|
||||||
|
if setup.supports_custom_objects then
|
||||||
|
Test.specify "should allow range-based joins (using Between) for custom objects" <|
|
||||||
|
t1 = table_builder [["X", [My_Type.Value 20 30, My_Type.Value 1 2]], ["Y", [1, 2]]]
|
||||||
|
t2 = table_builder [["lower", [My_Type.Value 3 0, My_Type.Value 10 10]], ["upper", [My_Type.Value 2 1, My_Type.Value 100 0]], ["Z", [10, 20]]]
|
||||||
|
|
||||||
|
r1 = t1.join t2 on=(Join_Condition.Between "X" "lower" "upper") . order_by ["Z"]
|
||||||
|
expect_column_names ["X", "Y", "lower", "upper", "Z"] r1
|
||||||
|
r1 . at "X" . to_vector . to_text . should_equal "[(My_Type.Value 1 2), (My_Type.Value 20 30)]"
|
||||||
|
r1 . at "Y" . to_vector . should_equal [2, 1]
|
||||||
|
r1 . at "lower" . to_vector . to_text . should_equal "[(My_Type.Value 3 0), (My_Type.Value 10 10)]"
|
||||||
|
r1 . at "upper" . to_vector . to_text . should_equal "[(My_Type.Value 2 1), (My_Type.Value 100 0)]"
|
||||||
|
r1 . at "Z" . to_vector . should_equal [10, 20]
|
||||||
|
|
||||||
|
Test.specify "should allow to mix join conditions of various kinds" <|
|
||||||
|
t1 = table_builder [["X", [1, 12, 12, 0]], ["Y", [1, 2, 3, 4]], ["Z", ["a", "A", "a", "ą"]], ["W", [1, 2, 3, 4]]]
|
||||||
|
t2 = table_builder [["X", [12, 12, 1]], ["l", [0, 100, 100]], ["u", [10, 100, 100]], ["Z", ["A", "A", "A"]], ["W'", [10, 20, 30]]]
|
||||||
|
|
||||||
|
r1 = t1.join t2 on=[Join_Condition.Between "Y" "l" "u", Join_Condition.Equals_Ignore_Case "Z" "Z", Join_Condition.Equals "X" "X"] . order_by ["Y"]
|
||||||
|
expect_column_names ["X", "Y", "Z", "W", "l", "u", "Z_1", "W'"] r1
|
||||||
|
r1.at "X" . to_vector . should_equal [12, 12]
|
||||||
|
r1.at "Y" . to_vector . should_equal [2, 3]
|
||||||
|
r1.at "Z" . to_vector . should_equal ["A", "a"]
|
||||||
|
r1.at "W" . to_vector . should_equal [2, 3]
|
||||||
|
r1.at "l" . to_vector . should_equal [0, 0]
|
||||||
|
r1.at "u" . to_vector . should_equal [10, 10]
|
||||||
|
r1.at "Z_1" . to_vector . should_equal ["A", "A"]
|
||||||
|
r1.at "W'" . to_vector . should_equal [10, 10]
|
||||||
|
|
||||||
|
Test.specify "should work fine if the same condition is specified multiple times" <|
|
||||||
|
r = t3.join t4 on=["X", "X", "Y", "X", "Y"] . order_by ["X", "Y", "Z", "Z_1"]
|
||||||
|
check_xy_joined r
|
||||||
|
|
||||||
|
t5 = table_builder [["X", [1, 10, 12]], ["Y", [1, 2, 3]]]
|
||||||
|
t6 = table_builder [["lower", [1, 10, 8, 12]], ["upper", [1, 12, 30, 0]], ["Z", [1, 2, 3, 4]]]
|
||||||
|
|
||||||
|
r1 = t5.join t6 on=[Join_Condition.Between "X" "lower" "upper", Join_Condition.Between "X" "lower" "upper", Join_Condition.Between "X" "lower" "upper"] . order_by ["X", "Z"]
|
||||||
|
r1 . at "X" . to_vector . should_equal [1, 10, 10, 12, 12]
|
||||||
|
r1 . at "Y" . to_vector . should_equal [1, 2, 2, 3, 3]
|
||||||
|
r1 . at "Z" . to_vector . should_equal [1, 2, 3, 2, 3]
|
||||||
|
|
||||||
|
t7 = table_builder [["X", ["a", "B"]], ["Y", [1, 2]]]
|
||||||
|
t8 = table_builder [["X", ["A", "a", "b"]], ["Z", [1, 2, 3]]]
|
||||||
|
|
||||||
|
r2 = t7.join t8 on=[Join_Condition.Equals_Ignore_Case "X", Join_Condition.Equals_Ignore_Case "X", Join_Condition.Equals_Ignore_Case "X" "X"] . order_by ["Z"]
|
||||||
|
r2 . at "X" . to_vector . should_equal ["a", "a", "B"]
|
||||||
|
r2 . at "X_1" . to_vector . should_equal ["A", "a", "b"]
|
||||||
|
r2 . at "Z" . to_vector . should_equal [1, 2, 3]
|
||||||
|
|
||||||
Test.specify "should gracefully handle unmatched columns in Join_Conditions" <|
|
Test.specify "should gracefully handle unmatched columns in Join_Conditions" <|
|
||||||
t1 = table_builder [["X", [1, 2]], ["Y", [3, 4]]]
|
t1 = table_builder [["X", [1, 2]], ["Y", [3, 4]]]
|
||||||
@ -88,6 +255,121 @@ spec setup =
|
|||||||
problems = [Column_Indexes_Out_Of_Range.Error [42, -3], Missing_Input_Columns.Error ["foo", "baz"]]
|
problems = [Column_Indexes_Out_Of_Range.Error [42, -3], Missing_Input_Columns.Error ["foo", "baz"]]
|
||||||
Problems.test_problem_handling action problems tester
|
Problems.test_problem_handling action problems tester
|
||||||
|
|
||||||
|
Test.specify "should report Invalid_Value_Type if non-text columns are provided to Equals_Ignore_Case" <|
|
||||||
|
t1 = table_builder [["X", ["1", "2", "c"]], ["Y", [1, 2, 3]]]
|
||||||
|
t2 = table_builder [["Z", ["1", "2", "c"]], ["W", [1, 2, 3]]]
|
||||||
|
|
||||||
|
tester table =
|
||||||
|
expect_column_names ["X", "Y", "Z", "W"] table
|
||||||
|
table.row_count . should_equal 0
|
||||||
|
problems = [Invalid_Value_Type.Error Value_Type.Char Value_Type.Integer]
|
||||||
|
|
||||||
|
Problems.test_problem_handling (t1.join t2 on=(Join_Condition.Equals_Ignore_Case "X" "W") on_problems=_) problems tester
|
||||||
|
Problems.test_problem_handling (t1.join t2 on=(Join_Condition.Equals_Ignore_Case "Y" "Z") on_problems=_) problems tester
|
||||||
|
|
||||||
|
Test.specify "should report Invalid_Value_Type if incompatible types are correlated" pending="We need more advanced value type support to implement this in full-generality." <|
|
||||||
|
t1 = table_builder ["X", ["1", "2", "c"]]
|
||||||
|
t2 = table_builder ["Y", [1, 2, 3]]
|
||||||
|
|
||||||
|
r1 = t1.join t2 on_problems=Problem_Behavior.Ignore
|
||||||
|
expect_column_names ["X", "Y"] r1
|
||||||
|
r1.row_count . should_equal 0
|
||||||
|
|
||||||
|
r2 = t1.join t2 on_problems=Problem_Behavior.Report_Error
|
||||||
|
r2.should_fail_with Invalid_Value_Type.Error
|
||||||
|
|
||||||
|
Test.specify "should report Invalid_Value_Type if incompatible columns types are correlated in Between" pending="We need more advanced value type support to implement this in full-generality." <|
|
||||||
|
t1 = table_builder ["X", ["1", "2", "c"], ["Y", [1, 2, 3]]]
|
||||||
|
t2 = table_builder ["Z", ["1", "2", "c"], ["W", [1, 2, 3]]]
|
||||||
|
|
||||||
|
test expected actual err =
|
||||||
|
err.should_fail_with Invalid_Value_Type.Error
|
||||||
|
err.catch . should_equal (Invalid_Value_Type.Error expected actual)
|
||||||
|
|
||||||
|
test Value_Type.Char Value_Type.Integer <|
|
||||||
|
t1.join t2 on=(Join_Condition.Between "X" "W" "W")
|
||||||
|
test Value_Type.Integer Value_Type.Char <|
|
||||||
|
t1.join t2 on=(Join_Condition.Between "Y" "W" "Z")
|
||||||
|
test Value_Type.Integer Value_Type.Char <|
|
||||||
|
t1.join t2 on=(Join_Condition.Between "Y" "Z" "W")
|
||||||
|
|
||||||
|
Test.specify "should warn when joining on equality of Decimal columns" <|
|
||||||
|
t1 = table_builder [["X", [1.5, 2.0, 2.00000000001]], ["Y", [10, 20, 30]]]
|
||||||
|
t2 = table_builder [["Z", [2.0, 1.5, 2.0]], ["W", [1, 2, 3]]]
|
||||||
|
|
||||||
|
action1 = t1.join t2 on=(Join_Condition.Equals "X" "Z") on_problems=_
|
||||||
|
tester1 table =
|
||||||
|
expect_column_names ["X", "Y", "Z", "W"] table
|
||||||
|
t1 = table.order_by ["Y", "W"]
|
||||||
|
t1.at "X" . to_vector . should_equal [1.5, 2.0, 2.0]
|
||||||
|
t1.at "Y" . to_vector . should_equal [10, 20, 20]
|
||||||
|
t1.at "Z" . to_vector . should_equal [1.5, 2.0, 2.0]
|
||||||
|
t1.at "W" . to_vector . should_equal [2, 1, 3]
|
||||||
|
problems1 = [Floating_Point_Grouping.Error "X", Floating_Point_Grouping.Error "Z"]
|
||||||
|
Problems.test_problem_handling action1 problems1 tester1
|
||||||
|
|
||||||
|
action2 = t1.join t2 on=(Join_Condition.Equals "X" "W") on_problems=_
|
||||||
|
tester2 table =
|
||||||
|
expect_column_names ["X", "Y", "Z", "W"] table
|
||||||
|
t1 = table.order_by ["Y", "W"]
|
||||||
|
t1.at "X" . to_vector . should_equal [2.0]
|
||||||
|
t1.at "Y" . to_vector . should_equal [20]
|
||||||
|
t1.at "Z" . to_vector . should_equal [1.5]
|
||||||
|
t1.at "W" . to_vector . should_equal [2]
|
||||||
|
problems2 = [Floating_Point_Grouping.Error "X"]
|
||||||
|
Problems.test_problem_handling action2 problems2 tester2
|
||||||
|
|
||||||
|
# But joining on the Between condition should not give such warnings
|
||||||
|
r2 = t1.join t2 on=(Join_Condition.Between "X" "Z" "Z")
|
||||||
|
Problems.assume_no_problems r2
|
||||||
|
|
||||||
|
if setup.supports_custom_objects then
|
||||||
|
t1 = table_builder [["X", [My_Type.Value 1 2, 2.0, 2]], ["Y", [10, 20, 30]]]
|
||||||
|
t2 = table_builder [["Z", [2.0, 1.5, 2.0]], ["W", [1, 2, 3]]]
|
||||||
|
action3 = t1.join t2 on=(Join_Condition.Equals "X" "Z") on_problems=_
|
||||||
|
tester3 table =
|
||||||
|
expect_column_names ["X", "Y", "Z", "W"] table
|
||||||
|
t1 = table.order_by ["Y", "W"]
|
||||||
|
t1.at "X" . to_vector . should_equal [2.0, 2.0, 2, 2]
|
||||||
|
t1.at "Y" . to_vector . should_equal [20, 20, 30, 30]
|
||||||
|
t1.at "Z" . to_vector . should_equal [2.0, 2.0, 2.0, 2.0]
|
||||||
|
t1.at "W" . to_vector . should_equal [1, 3, 1, 3]
|
||||||
|
problems3 = [Floating_Point_Grouping.Error "Z", Floating_Point_Grouping.Error "X"]
|
||||||
|
Problems.test_problem_handling action3 problems3 tester3
|
||||||
|
|
||||||
|
Test.specify "should correctly handle nulls in equality conditions" <|
|
||||||
|
t1 = table_builder [["X", ["A", Nothing, "a", Nothing, "ą"]], ["Y", [0, 1, 2, 3, 4]]]
|
||||||
|
t2 = table_builder [["X", ["a", Nothing, Nothing]], ["Z", [10, 20, 30]]]
|
||||||
|
|
||||||
|
r1 = t1.join t2 . order_by ["Y"]
|
||||||
|
expect_column_names ["X", "Y", "Z"] r1
|
||||||
|
r1.at "X" . to_vector . should_equal [Nothing, Nothing, "a", Nothing, Nothing]
|
||||||
|
r1.at "Y" . to_vector . should_equal [1, 1, 2, 3, 3]
|
||||||
|
r1.at "Z" . to_vector . should_equal [20, 30, 10, 20, 30]
|
||||||
|
|
||||||
|
Test.specify "should correctly handle nulls in case-insensitive equality conditions" <|
|
||||||
|
t1 = table_builder [["X", ["A", Nothing, "a", Nothing, "ą"]], ["Y", [0, 1, 2, 3, 4]]]
|
||||||
|
t2 = table_builder [["X", ["a", Nothing, Nothing]], ["Z", [10, 20, 30]]]
|
||||||
|
|
||||||
|
r1 = t1.join t2 on=(Join_Condition.Equals_Ignore_Case "X") . order_by ["Y"]
|
||||||
|
expect_column_names ["X", "Y", "X_1", "Z"] r1
|
||||||
|
r1.at "X" . to_vector . should_equal ["A", Nothing, Nothing, "a", Nothing, Nothing]
|
||||||
|
r1.at "X_1" . to_vector . should_equal ["a", Nothing, Nothing, "a", Nothing, Nothing]
|
||||||
|
r1.at "Y" . to_vector . should_equal [0, 1, 1, 2, 3, 3]
|
||||||
|
r1.at "Z" . to_vector . should_equal [10, 20, 30, 10, 20, 30]
|
||||||
|
|
||||||
|
Test.specify "should correctly handle nulls in between conditions" <|
|
||||||
|
t1 = table_builder [["X", [1, Nothing, 2, Nothing]], ["Y", [0, 1, 2, 3]]]
|
||||||
|
t2 = table_builder [["l", [Nothing, 0, 1]], ["u", [100, 10, Nothing]], ["Z", [10, 20, 30]]]
|
||||||
|
|
||||||
|
r1 = t1.join t2 on=(Join_Condition.Between "X" "l" "u") . order_by ["Y"]
|
||||||
|
expect_column_names ["X", "Y", "l", "u", "Z"] r1
|
||||||
|
r1.at "X" . to_vector . should_equal [1, 2]
|
||||||
|
r1.at "Y" . to_vector . should_equal [0, 2]
|
||||||
|
r1.at "l" . to_vector . should_equal [0, 0]
|
||||||
|
r1.at "u" . to_vector . should_equal [10, 10]
|
||||||
|
r1.at "Z" . to_vector . should_equal [20, 20]
|
||||||
|
|
||||||
Test.specify "should rename columns of the right table to avoid duplicates" <|
|
Test.specify "should rename columns of the right table to avoid duplicates" <|
|
||||||
t1 = table_builder [["X", [1, 2]], ["Y", [3, 4]]]
|
t1 = table_builder [["X", [1, 2]], ["Y", [3, 4]]]
|
||||||
t2 = table_builder [["X", [2, 1]], ["Y", [2, 2]]]
|
t2 = table_builder [["X", [2, 1]], ["Y", [2, 2]]]
|
||||||
@ -111,3 +393,8 @@ spec setup =
|
|||||||
# expect_column_names ["Right_X", "X", "Y", "Right_Y_2"]+["Right_Right_X", "Right_X_1", "Right_Y", "Right_Y_1", "Right_Y_3", "Right_Y_4"] t6
|
# expect_column_names ["Right_X", "X", "Y", "Right_Y_2"]+["Right_Right_X", "Right_X_1", "Right_Y", "Right_Y_1", "Right_Y_3", "Right_Y_4"] t6
|
||||||
# Renames happen in the order of appearance of columns
|
# Renames happen in the order of appearance of columns
|
||||||
# expect_column_names ["Right_X", "X", "Y", "Right_Y_2"]+["Right_Right_X", "Right_X_1", "Right_Y", "Right_Y_1", "Right_Y_1_1", "Right_Y_4"] t6
|
# expect_column_names ["Right_X", "X", "Y", "Right_Y_2"]+["Right_Right_X", "Right_X_1", "Right_Y", "Right_Y_1", "Right_Y_1_1", "Right_Y_4"] t6
|
||||||
|
|
||||||
|
Test.specify "should pass dataflow errors through" <|
|
||||||
|
error = Error.throw (Illegal_State.Error "FOO")
|
||||||
|
t1.join error . should_fail_with Illegal_State.Error
|
||||||
|
t1.join t2 on=["X", error] . should_fail_with Illegal_State.Error
|
||||||
|
@ -39,7 +39,40 @@ type Test_Setup
|
|||||||
support particular features.
|
support particular features.
|
||||||
Config prefix table empty_table table_builder materialize is_database test_selection aggregate_test_selection
|
Config prefix table empty_table table_builder materialize is_database test_selection aggregate_test_selection
|
||||||
|
|
||||||
|
## Specifies if the given Table backend supports custom Enso types.
|
||||||
|
|
||||||
|
Currently, only the in-memory backend does.
|
||||||
|
supports_custom_objects : Boolean
|
||||||
|
supports_custom_objects self = self.is_database.not
|
||||||
|
|
||||||
type Test_Selection
|
type Test_Selection
|
||||||
|
## The configuration specifying what features are supported by a given
|
||||||
|
backend, driving what kind of test suites should be enabled.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- supports_case_sensitive_columns: Specifies if the backend supports
|
||||||
|
case-sensitive column names. If `False`, the backend will match column
|
||||||
|
names in a case insensitive way, so that "Foo" and "foo" will refer to
|
||||||
|
the same column.
|
||||||
|
- order_by: Specifies if the backend supports ordering operations.
|
||||||
|
- natural_ordering: Specifies if the backend supports natural ordering
|
||||||
|
operations.
|
||||||
|
- case_insensitive_ordering: Specifies if the backend supports case
|
||||||
|
insensitive ordering.
|
||||||
|
- order_by_unicode_normalization_by_default: Specifies if the backend
|
||||||
|
supports unicode normalization in its default ordering.
|
||||||
|
- case_insensitive_ascii_only:
|
||||||
|
- take_drop: Specifies if the backend supports take/drop operations.
|
||||||
|
- allows_mixed_type_comparisons: Specifies if mixed operations comparing
|
||||||
|
mixed types are allowed by a given backend. Some backends will allow
|
||||||
|
such comparisons, when mixed type storage is allowed or by coercing to
|
||||||
|
the target type; others will fail with a type error.
|
||||||
|
- supports_unicode_normalization: Specifies if the backend compares
|
||||||
|
strings taking Unicode Normalization into accout, i.e. whether
|
||||||
|
's\u0301' is considered equal to 'ś'.
|
||||||
|
- is_nan_and_nothing_distinct: Specifies if the backend is able to
|
||||||
|
distinguish between a decimal NaN value and a missing value (Enso's
|
||||||
|
Nothing, or SQL's NULL). If `False`, NaN is treated as a NULL.
|
||||||
Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True supports_unicode_normalization=False is_nan_and_nothing_distinct=True
|
Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True supports_unicode_normalization=False is_nan_and_nothing_distinct=True
|
||||||
|
|
||||||
spec setup =
|
spec setup =
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
|
import Standard.Base.Error.Common.Type_Error
|
||||||
import Standard.Base.Error.Illegal_Argument.Illegal_Argument
|
import Standard.Base.Error.Illegal_Argument.Illegal_Argument
|
||||||
import Standard.Base.Error.Incomparable_Values.Incomparable_Values
|
import Standard.Base.Error.Incomparable_Values.Incomparable_Values
|
||||||
|
|
||||||
@ -12,6 +13,8 @@ from Standard.Table.Errors import Invalid_Output_Column_Names, Duplicate_Output_
|
|||||||
|
|
||||||
import Standard.Visualization
|
import Standard.Visualization
|
||||||
|
|
||||||
|
from Standard.Database import Database, SQLite, In_Memory
|
||||||
|
|
||||||
from Standard.Test import Test, Test_Suite, Problems
|
from Standard.Test import Test, Test_Suite, Problems
|
||||||
import Standard.Test.Extensions
|
import Standard.Test.Extensions
|
||||||
|
|
||||||
@ -796,14 +799,14 @@ spec =
|
|||||||
action1 = t1.distinct on_problems=_
|
action1 = t1.distinct on_problems=_
|
||||||
tester1 table =
|
tester1 table =
|
||||||
table.at "X" . to_vector . should_equal [3.0, 1.0, 2.0]
|
table.at "X" . to_vector . should_equal [3.0, 1.0, 2.0]
|
||||||
problems1 = [Floating_Point_Grouping.Error "Distinct" [0, 1, 2, 3, 4]]
|
problems1 = [Floating_Point_Grouping.Error "Distinct"]
|
||||||
Problems.test_problem_handling action1 problems1 tester1
|
Problems.test_problem_handling action1 problems1 tester1
|
||||||
|
|
||||||
t2 = Table.new [["X", [1.00000000000001, -0.3, 1.00000000000002, 1.5, 1.00000000000002, 1.00000000000002]]]
|
t2 = Table.new [["X", [1.00000000000001, -0.3, 1.00000000000002, 1.5, 1.00000000000002, 1.00000000000002]]]
|
||||||
action2 = t2.distinct on_problems=_
|
action2 = t2.distinct on_problems=_
|
||||||
tester2 table =
|
tester2 table =
|
||||||
table.at "X" . to_vector . should_equal [1.00000000000001, -0.3, 1.00000000000002, 1.5]
|
table.at "X" . to_vector . should_equal [1.00000000000001, -0.3, 1.00000000000002, 1.5]
|
||||||
problems2 = [Floating_Point_Grouping.Error "Distinct" [0, 1, 2, 3, 4, 5]]
|
problems2 = [Floating_Point_Grouping.Error "Distinct"]
|
||||||
Problems.test_problem_handling action2 problems2 tester2
|
Problems.test_problem_handling action2 problems2 tester2
|
||||||
|
|
||||||
Test.specify "should report a warning and report the whole table if no columns were selected" <|
|
Test.specify "should report a warning and report the whole table if no columns were selected" <|
|
||||||
@ -1078,6 +1081,18 @@ spec =
|
|||||||
|
|
||||||
# ToDo: Verify the warnings and error handling within cross_tab.
|
# ToDo: Verify the warnings and error handling within cross_tab.
|
||||||
|
|
||||||
|
Test.group "[In-Memory] Table.join" <|
|
||||||
|
Test.specify "should correctly report unsupported cross-backend joins" <|
|
||||||
|
t = Table.new [["X", [1, 2, 3]]]
|
||||||
|
t.join 42 . should_fail_with Type_Error.Error
|
||||||
|
|
||||||
|
db = Database.connect (SQLite In_Memory)
|
||||||
|
db_table = db.upload_table "test" (Table.new [["Y", [4, 5, 6]]])
|
||||||
|
|
||||||
|
r = t.join db_table
|
||||||
|
r.should_fail_with Illegal_Argument.Error
|
||||||
|
r.catch.message . contains "cross-backend" . should_be_true
|
||||||
|
|
||||||
run_common_spec spec =
|
run_common_spec spec =
|
||||||
selection = Common_Table_Operations.Main.Test_Selection.Config supports_case_sensitive_columns=True order_by=True natural_ordering=True case_insensitive_ordering=True order_by_unicode_normalization_by_default=True supports_unicode_normalization=True
|
selection = Common_Table_Operations.Main.Test_Selection.Config supports_case_sensitive_columns=True order_by=True natural_ordering=True case_insensitive_ordering=True order_by_unicode_normalization_by_default=True supports_unicode_normalization=True
|
||||||
aggregate_selection = Common_Table_Operations.Aggregate_Spec.Test_Selection.Config
|
aggregate_selection = Common_Table_Operations.Aggregate_Spec.Test_Selection.Config
|
||||||
|
Loading…
Reference in New Issue
Block a user