Improve performance of Join_Condition.Between by sorting on one dimension (#8212)

- Closes #5303
- Refactors `JoinStrategy` allowing us to 'stack' join strategies on top of each other (to some extent) - currently a `HashJoin` can be followed by another join strategy (currently `SortJoin`)
- Adds benchmarks for join
- Due to limitations of the sorting approach this will still not be as fast as possible for cases where there is more than 1 `Between` condition in a single query - trying to demonstrate that in benchmarks.
- We can replace sorting by d-dimensional [RangeTrees](https://en.wikipedia.org/wiki/Range_tree) to get `O((n + m) log^d n + k)` performance (where `n` and `m` are sizes of joined tables, `d` is the amount of `Between` conditions used in the query and `k` is the result set size).
- Follow up ticket for consideration later:
#8216
- Closes #8215
- After all, it turned out that `TreeSet` was problematic (because of not enough flexibility with duplicate key handling), so the simplest solution was to immediately implement this sub-task.
- Closes #8204
- Unrelated, but I ran into this here: adds type checks to other arguments of `set`.
- Before, putting in a Column as `new_name` (i.e. mistakenly messing up the order of arguments), lead to a hard to understand `Method `if_then_else` of type Column could not be found.`, instead now it would file with type error 'expected Text got Column`.
This commit is contained in:
Radosław Waśko 2023-11-08 13:59:55 +01:00 committed by GitHub
parent 1388fe1cf9
commit 1b8b30a68d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 979 additions and 501 deletions

View File

@ -837,7 +837,7 @@ type Table
table.set "2 * [total_stock]" new_name="total_stock_expr"
@new_name Widget_Helpers.make_column_name_selector
set : Column | Text | Array | Vector | Range | Date_Range | Constant_Column | Column_Operation -> Text -> Set_Mode -> Problem_Behavior -> Table ! Existing_Column | Missing_Column | No_Such_Column | Expression_Error
set self column new_name="" set_mode=Set_Mode.Add_Or_Update on_problems=Report_Warning =
set self column (new_name : Text = "") (set_mode : Set_Mode = Set_Mode.Add_Or_Update) (on_problems : Problem_Behavior = Report_Warning) =
problem_builder = Problem_Builder.new
unique = self.column_naming_helper.create_unique_name_strategy
unique.mark_used self.column_names

View File

@ -68,10 +68,10 @@ polyglot java import org.enso.base.ObjectComparator
polyglot java import org.enso.table.data.index.MultiValueIndex
polyglot java import org.enso.table.data.mask.OrderMask
polyglot java import org.enso.table.data.table.Column as Java_Column
polyglot java import org.enso.table.data.table.join.Between as Java_Join_Between
polyglot java import org.enso.table.data.table.join.Equals as Java_Join_Equals
polyglot java import org.enso.table.data.table.join.EqualsIgnoreCase as Java_Join_Equals_Ignore_Case
polyglot java import org.enso.table.data.table.join.LookupJoin
polyglot java import org.enso.table.data.table.join.conditions.Between as Java_Join_Between
polyglot java import org.enso.table.data.table.join.conditions.Equals as Java_Join_Equals
polyglot java import org.enso.table.data.table.join.conditions.EqualsIgnoreCase as Java_Join_Equals_Ignore_Case
polyglot java import org.enso.table.data.table.join.lookup.LookupJoin
polyglot java import org.enso.table.data.table.Table as Java_Table
polyglot java import org.enso.table.error.TooManyColumnsException
polyglot java import org.enso.table.error.NullValuesInKeyColumns
@ -1570,7 +1570,7 @@ type Table
table.set "2 * [total_stock]" new_name="total_stock_expr"
@column Column_Operation.default_widget
set : Text | Column -> Text -> Set_Mode -> Problem_Behavior -> Table ! Existing_Column | Missing_Column | No_Such_Column | Expression_Error
set self column:(Text | Column | Constant_Column | Column_Operation) new_name="" set_mode=Set_Mode.Add_Or_Update on_problems=Report_Warning =
set self column:(Text | Column | Constant_Column | Column_Operation) (new_name : Text = "") (set_mode : Set_Mode = Set_Mode.Add_Or_Update) (on_problems : Problem_Behavior = Report_Warning) =
problem_builder = Problem_Builder.new
unique = self.column_naming_helper.create_unique_name_strategy
unique.mark_used self.column_names

View File

@ -6,7 +6,7 @@ import project.Data.Type.Value_Type.Value_Type
import project.Data.Type.Value_Type_Helpers
from project.Errors import Missing_Input_Columns, Unexpected_Extra_Columns, Floating_Point_Equality, No_Common_Type, No_Output_Columns
polyglot java import org.enso.table.data.table.join.LookupColumnDescription
polyglot java import org.enso.table.data.table.join.lookup.LookupColumnDescription
## PRIVATE
type Lookup_Column

View File

@ -40,6 +40,10 @@ public abstract class MultiValueKeyBase {
return rowIndex;
}
public int getNumberOfColumns() {
return storages.length;
}
@Override
public abstract boolean equals(Object o);

View File

@ -78,4 +78,24 @@ public class OrderedMultiValueKey extends MultiValueKeyBase
public String toString() {
return "OrderedMultiValueKey{row="+rowIndex+"}";
}
/**
* A comparator that uses only one dimension of the key.
*/
public static class ProjectionComparator implements Comparator<OrderedMultiValueKey> {
private final int ix;
public ProjectionComparator(int ix) {
this.ix = ix;
}
@Override
public int compare(OrderedMultiValueKey o1, OrderedMultiValueKey o2) {
if (o1.storages.length != o2.storages.length) {
throw new ClassCastException("Incomparable keys.");
}
return o1.objectComparator.compare(o1.get(ix), o2.get(ix));
}
}
}

View File

@ -18,9 +18,9 @@ import org.enso.table.data.index.OrderedMultiValueKey;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.mask.SliceRange;
import org.enso.table.data.table.join.CrossJoin;
import org.enso.table.data.table.join.IndexJoin;
import org.enso.table.data.table.join.JoinCondition;
import org.enso.table.data.table.join.conditions.JoinCondition;
import org.enso.table.data.table.join.JoinResult;
import org.enso.table.data.table.join.JoinStrategy;
import org.enso.table.error.UnexpectedColumnTypeException;
import org.enso.table.operations.Distinct;
import org.enso.table.problems.ProblemAggregator;
@ -279,8 +279,8 @@ public class Table {
"be true.");
}
var strategy = new IndexJoin();
JoinResult joinResult = strategy.join(this, right, conditions, problemAggregator);
JoinStrategy strategy = JoinStrategy.createStrategy(conditions);
JoinResult joinResult = strategy.join(problemAggregator);
List<JoinResult> resultsToKeep = new ArrayList<>();

View File

@ -1,5 +0,0 @@
package org.enso.table.data.table.join;
import org.enso.table.data.table.Column;
public record Equals(Column left, Column right) implements JoinCondition {}

View File

@ -1,112 +0,0 @@
package org.enso.table.data.table.join;
import org.enso.base.text.TextFoldingStrategy;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.type.AnyObjectType;
import org.enso.table.data.index.MultiValueIndex;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
import org.enso.table.data.table.join.scan.Matcher;
import org.enso.table.data.table.join.scan.MatcherFactory;
import org.enso.table.problems.ColumnAggregatedProblemAggregator;
import org.enso.table.problems.ProblemAggregator;
import org.graalvm.polyglot.Context;
import java.util.List;
import java.util.stream.Collectors;
public class IndexJoin implements JoinStrategy {
private record HashEqualityCondition(
Column left, Column right, TextFoldingStrategy textFoldingStrategy) {
}
@Override
public JoinResult join(Table left, Table right, List<JoinCondition> conditions, ProblemAggregator problemAggregator) {
Context context = Context.getCurrent();
List<HashEqualityCondition> equalConditions =
conditions.stream()
.filter(IndexJoin::isSupported)
.map(IndexJoin::makeHashEqualityCondition)
.collect(Collectors.toList());
var remainingConditions =
conditions.stream().filter(c -> !isSupported(c)).collect(Collectors.toList());
var leftEquals =
equalConditions.stream().map(HashEqualityCondition::left).toArray(Column[]::new);
var rightEquals =
equalConditions.stream().map(HashEqualityCondition::right).toArray(Column[]::new);
var textFoldingStrategies =
equalConditions.stream()
.map(HashEqualityCondition::textFoldingStrategy)
.collect(Collectors.toList());
var leftIndex =
MultiValueIndex.makeUnorderedIndex(leftEquals, left.rowCount(), textFoldingStrategies, problemAggregator);
var rightIndex =
MultiValueIndex.makeUnorderedIndex(rightEquals, right.rowCount(), textFoldingStrategies, problemAggregator);
MatcherFactory factory = new MatcherFactory();
Matcher remainingMatcher = factory.create(
remainingConditions, new ColumnAggregatedProblemAggregator(problemAggregator)
);
JoinResult.Builder resultBuilder = new JoinResult.Builder();
for (var leftKey : leftIndex.keys()) {
if (rightIndex.contains(leftKey)) {
for (var leftRow : leftIndex.get(leftKey)) {
for (var rightRow : rightIndex.get(leftKey)) {
if (remainingMatcher.matches(leftRow, rightRow)) {
resultBuilder.addRow(leftRow, rightRow);
}
context.safepoint();
}
context.safepoint();
}
}
context.safepoint();
}
return resultBuilder.build();
}
private static boolean isSupported(JoinCondition condition) {
switch (condition) {
case Equals eq -> {
return isBuiltinType(eq.left().getStorage()) && isBuiltinType(eq.right().getStorage());
}
case EqualsIgnoreCase ignored -> {
return true;
}
default -> {
return false;
}
}
}
private static HashEqualityCondition makeHashEqualityCondition(JoinCondition eq) {
switch (eq) {
case Equals e -> {
return new HashEqualityCondition(
e.left(), e.right(), TextFoldingStrategy.unicodeNormalizedFold);
}
case EqualsIgnoreCase e -> {
return new HashEqualityCondition(
e.left(), e.right(), TextFoldingStrategy.caseInsensitiveFold(e.locale()));
}
default -> throw new IllegalStateException(
"Impossible: trying to convert condition "
+ eq
+ " to a HashEqualityCondition, but it should not be marked as supported. This is a"
+ " bug in the Table library.");
}
}
private static boolean isBuiltinType(Storage<?> storage) {
// TODO: this should be removed when #5626 and #5259 are implemented
return !storage.getType().equals(AnyObjectType.INSTANCE);
}
}

View File

@ -1,3 +0,0 @@
package org.enso.table.data.table.join;
public interface JoinCondition {}

View File

@ -1,10 +1,75 @@
package org.enso.table.data.table.join;
import java.util.List;
import org.enso.table.data.table.Table;
import org.enso.table.data.table.join.between.SortJoin;
import org.enso.table.data.table.join.conditions.Between;
import org.enso.table.data.table.join.conditions.Equals;
import org.enso.table.data.table.join.conditions.EqualsIgnoreCase;
import org.enso.table.data.table.join.conditions.HashableCondition;
import org.enso.table.data.table.join.conditions.JoinCondition;
import org.enso.table.data.table.join.hashing.HashJoin;
import org.enso.table.problems.ProblemAggregator;
import java.util.List;
/**
* A strategy used for performing a join of two tables.
*/
public interface JoinStrategy {
JoinResult join(
Table left, Table right, List<JoinCondition> conditions, ProblemAggregator problemAggregator);
JoinResult join(ProblemAggregator problemAggregator);
static JoinStrategy createStrategy(List<JoinCondition> conditions) {
if (conditions.isEmpty()) {
throw new IllegalArgumentException("At least one join condition must be provided.");
}
List<HashableCondition> hashableConditions = conditions.stream()
.filter(c -> c instanceof HashableCondition)
.map(c -> (HashableCondition) c)
.toList();
List<Between> betweenConditions = conditions.stream()
.filter(c -> c instanceof Between)
.map(c -> (Between) c)
.toList();
if (hashableConditions.size() + betweenConditions.size() != conditions.size()) {
throw new IllegalArgumentException("Unsupported join condition.");
}
if (hashableConditions.isEmpty()) {
assert !betweenConditions.isEmpty();
return new SortJoin(betweenConditions);
} else if (betweenConditions.isEmpty()) {
return new HashJoin(hashableConditions, new MatchAllStrategy());
} else {
return new HashJoin(hashableConditions, new SortJoin(betweenConditions));
}
}
class ConditionsHelper {
private final List<? extends JoinCondition> conditions;
public ConditionsHelper(List<? extends JoinCondition> conditions) {
if (conditions.isEmpty()) {
throw new IllegalArgumentException("At least one join condition must be provided.");
}
this.conditions = conditions;
}
public int getLeftTableRowCount() {
return switch (conditions.get(0)) {
case Equals equals -> equals.left().getStorage().size();
case EqualsIgnoreCase equalsIgnoreCase -> equalsIgnoreCase.left().getStorage().size();
case Between between -> between.left().getStorage().size();
};
}
public int getRightTableRowCount() {
return switch (conditions.get(0)) {
case Equals equals -> equals.right().getStorage().size();
case EqualsIgnoreCase equalsIgnoreCase -> equalsIgnoreCase.right().getStorage().size();
case Between between -> between.rightLower().getStorage().size();
};
}
}
}

View File

@ -0,0 +1,28 @@
package org.enso.table.data.table.join;
import java.util.List;
import org.enso.table.problems.ProblemAggregator;
import org.graalvm.polyglot.Context;
/**
* A pluggable strategy that can be used as the inner strategy for a join if there are no more join
* conditions to process - so all rows are matched with each other within a given group.
*/
public class MatchAllStrategy implements PluggableJoinStrategy {
@Override
public void joinSubsets(
List<Integer> leftGroup,
List<Integer> rightGroup,
JoinResult.Builder resultBuilder,
ProblemAggregator problemAggregator) {
Context context = Context.getCurrent();
for (var leftRow : leftGroup) {
for (var rightRow : rightGroup) {
resultBuilder.addRow(leftRow, rightRow);
context.safepoint();
}
context.safepoint();
}
}
}

View File

@ -0,0 +1,18 @@
package org.enso.table.data.table.join;
import java.util.List;
import org.enso.table.problems.ProblemAggregator;
/**
* A helper join strategy that can be used within another join strategy to perform a join of
* sub-sets of indices, stemming from already joining on other conditions.
*/
public interface PluggableJoinStrategy {
/** Performs a join of two sub-sets of indices. */
void joinSubsets(
List<Integer> leftGroup,
List<Integer> rightGroup,
JoinResult.Builder resultBuilder,
ProblemAggregator problemAggregator);
}

View File

@ -0,0 +1,162 @@
package org.enso.table.data.table.join.between;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import org.enso.base.ObjectComparator;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.index.OrderedMultiValueKey;
import org.enso.table.data.table.join.JoinResult;
import org.enso.table.data.table.join.JoinStrategy;
import org.enso.table.data.table.join.PluggableJoinStrategy;
import org.enso.table.data.table.join.conditions.Between;
import org.enso.table.problems.ProblemAggregator;
import org.graalvm.polyglot.Context;
public class SortJoin implements JoinStrategy, PluggableJoinStrategy {
public SortJoin(List<Between> conditions) {
conditionsHelper = new JoinStrategy.ConditionsHelper(conditions);
Context context = Context.getCurrent();
int nConditions = conditions.size();
directions = new int[nConditions];
leftStorages = new Storage<?>[nConditions];
lowerStorages = new Storage<?>[nConditions];
upperStorages = new Storage<?>[nConditions];
for (int i = 0; i < nConditions; i++) {
directions[i] = 1;
leftStorages[i] = conditions.get(i).left().getStorage();
lowerStorages[i] = conditions.get(i).rightLower().getStorage();
upperStorages[i] = conditions.get(i).rightUpper().getStorage();
context.safepoint();
}
}
private final JoinStrategy.ConditionsHelper conditionsHelper;
private final int[] directions;
private final Storage<?>[] leftStorages;
private final Storage<?>[] lowerStorages;
private final Storage<?>[] upperStorages;
@Override
public JoinResult join(ProblemAggregator problemAggregator) {
Context context = Context.getCurrent();
JoinResult.Builder resultBuilder = new JoinResult.Builder();
int leftRowCount = conditionsHelper.getLeftTableRowCount();
int rightRowCount = conditionsHelper.getRightTableRowCount();
if (leftRowCount == 0 || rightRowCount == 0) {
// if one group is completely empty, there will be no matches to report
return resultBuilder.build();
}
List<OrderedMultiValueKey> leftKeys = new ArrayList<>(leftRowCount);
for (int i = 0; i < leftRowCount; i++) {
leftKeys.add(new OrderedMultiValueKey(leftStorages, i, directions));
context.safepoint();
}
SortedListIndex<OrderedMultiValueKey> leftIndex = buildSortedLeftIndex(leftKeys);
for (int rightRowIx = 0; rightRowIx < rightRowCount; rightRowIx++) {
addMatchingLeftRows(leftIndex, rightRowIx, resultBuilder);
context.safepoint();
}
return resultBuilder.build();
}
@Override
public void joinSubsets(
List<Integer> leftGroup,
List<Integer> rightGroup,
JoinResult.Builder resultBuilder,
ProblemAggregator problemAggregator) {
Context context = Context.getCurrent();
List<OrderedMultiValueKey> leftKeys =
leftGroup.stream()
.map(i -> new OrderedMultiValueKey(leftStorages, i, directions, objectComparator))
.toList();
if (leftKeys.isEmpty()) {
// left group is completely empty - there will be no matches at all
return;
}
SortedListIndex<OrderedMultiValueKey> leftIndex = buildSortedLeftIndex(leftKeys);
for (int rightRowIx : rightGroup) {
addMatchingLeftRows(leftIndex, rightRowIx, resultBuilder);
context.safepoint();
}
}
private SortedListIndex<OrderedMultiValueKey> buildSortedLeftIndex(
List<OrderedMultiValueKey> keys) {
return SortedListIndex.build(keys, firstCoordinateComparator);
}
private OrderedMultiValueKey buildLowerBound(int rightRowIx) {
return new OrderedMultiValueKey(lowerStorages, rightRowIx, directions, objectComparator);
}
private OrderedMultiValueKey buildUpperBound(int rightRowIx) {
return new OrderedMultiValueKey(upperStorages, rightRowIx, directions, objectComparator);
}
private void addMatchingLeftRows(
SortedListIndex<OrderedMultiValueKey> sortedLeftIndex,
int rightRowIx,
JoinResult.Builder resultBuilder) {
OrderedMultiValueKey lowerBound = buildLowerBound(rightRowIx);
OrderedMultiValueKey upperBound = buildUpperBound(rightRowIx);
// If the match interval is invalid or empty, there is nothing to do.
if (lowerBound.hasAnyNulls()
|| upperBound.hasAnyNulls()
|| lowerBound.compareTo(upperBound) > 0) {
return;
}
List<OrderedMultiValueKey> firstCoordinateMatches =
sortedLeftIndex.findSubRange(lowerBound, upperBound);
Context context = Context.getCurrent();
for (OrderedMultiValueKey key : firstCoordinateMatches) {
if (isInRange(key, lowerBound, upperBound)) {
resultBuilder.addRow(key.getRowIndex(), rightRowIx);
}
context.safepoint();
}
}
private boolean isInRange(
OrderedMultiValueKey key, OrderedMultiValueKey lowerBound, OrderedMultiValueKey upperBound) {
assert key.getNumberOfColumns() == lowerBound.getNumberOfColumns();
assert key.getNumberOfColumns() == upperBound.getNumberOfColumns();
// Note: we cannot just use `compareTo`, because we are now not checking that the key is between
// the bounds in lexicographic order.
// Instead, we are checking if the key is between the bounds for all dimensions.
int n = key.getNumberOfColumns();
for (int i = 0; i < n; i++) {
var keyValue = key.get(i);
var lowerBoundValue = lowerBound.get(i);
var upperBoundValue = upperBound.get(i);
boolean fitsInThisDimension =
objectComparator.compare(keyValue, lowerBoundValue) >= 0
&& objectComparator.compare(keyValue, upperBoundValue) <= 0;
if (!fitsInThisDimension) {
return false;
}
}
return true;
}
private final ObjectComparator objectComparator = ObjectComparator.DEFAULT;
private final Comparator<OrderedMultiValueKey> firstCoordinateComparator =
new OrderedMultiValueKey.ProjectionComparator(0);
}

View File

@ -0,0 +1,129 @@
package org.enso.table.data.table.join.between;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
public class SortedListIndex<T> {
/** Defines the <= ordering for the index. */
private final Comparator<T> comparator;
/* forall 0 <= i <= j < n, sortedList[i] <= sortedList[j] */
private final ArrayList<T> sortedList;
protected SortedListIndex(ArrayList<T> sortedList, Comparator<T> comparator) {
this.comparator = comparator;
this.sortedList = sortedList;
}
public static <T> SortedListIndex<T> build(List<T> list, Comparator<T> comparator) {
ArrayList<T> copy = new ArrayList<>(list);
copy.sort(comparator);
return new SortedListIndex<>(copy, comparator);
}
/**
* Finds a sub-range of the index containing all elements between the lower and upper bounds
* (both-ends inclusive).
*/
public List<T> findSubRange(T lowerBound, T upperBound) {
int start = findLowerIndex(lowerBound);
int end = findUpperIndex(upperBound) + 1;
if (start >= end) {
return Collections.emptyList();
}
return sortedList.subList(start, end);
}
/**
* Finds the index of the first element that is greater than or equal to the argument.
*
* <p>If all elements are greater than the argument, returns 0. If all elements are less than the
* argument, returns N.
*/
private int findLowerIndex(T element) {
int start = 0;
int end = sortedList.size();
/*
* Loop invariants:
* 1) start <= end
* 2) forall 0 <= i < start: sortedList[i] < element
* 3) forall end <= i < N: sortedList[i] >= element
*
* end - start is strictly decreasing, so the loop will always terminate.
*/
while (start < end) {
// start <= mid < mid + 1 <= end
int mid = Math.addExact(start, end) / 2;
T midElement = sortedList.get(mid);
int cmp = comparator.compare(midElement, element);
if (cmp < 0) {
start = mid + 1;
} else {
end = mid;
}
}
/*
* After the loop, start >= end, but also start <= end, so start == end.
*
* Thus, from invariants:
* forall 0 <= i < start: sortedList[i] < element
* forall start <= i < N: sortedList[i] >= element
*
* start is the first element that is >= element;
* if there is no such element, it will be N.
*/
return start;
}
/**
* Finds the index of the last element that is less than or equal to the argument.
*
* <p>If all elements are greater than the argument, returns -1. If all elements are less than the
* argument, returns N-1 (index of the last element).
*/
private int findUpperIndex(T element) {
int start = 0;
int end = sortedList.size();
/*
* Loop invariants:
* 1) start <= end
* 2) forall 0 <= i < start: sortedList[i] <= element
* 3) forall end <= i < N: sortedList[i] > element
*
* end - start is strictly decreasing.
*/
while (start < end) {
// start <= mid < end
int mid = Math.addExact(start, end) / 2;
T midElement = sortedList.get(mid);
int cmp = comparator.compare(midElement, element);
if (cmp <= 0) {
start = mid + 1;
} else {
end = mid;
}
}
/*
* After the loop, start >= end, but also start <= end, so start == end.
*
* Thus, from invariants:
* forall 0 <= i < start: sortedList[i] <= element
* forall start <= i < N: sortedList[i] > element
*
* So start-1 is the last element that is <= element (if it exists);
* if there is no such element, it will be -1.
*/
return start - 1;
}
private boolean keysEqual(T k1, T k2) {
return comparator.compare(k1, k2) == 0;
}
}

View File

@ -1,4 +1,4 @@
package org.enso.table.data.table.join;
package org.enso.table.data.table.join.conditions;
import org.enso.table.data.table.Column;

View File

@ -0,0 +1,5 @@
package org.enso.table.data.table.join.conditions;
import org.enso.table.data.table.Column;
public record Equals(Column left, Column right) implements HashableCondition {}

View File

@ -1,7 +1,7 @@
package org.enso.table.data.table.join;
package org.enso.table.data.table.join.conditions;
import org.enso.table.data.table.Column;
import java.util.Locale;
public record EqualsIgnoreCase(Column left, Column right, Locale locale) implements JoinCondition {}
public record EqualsIgnoreCase(Column left, Column right, Locale locale) implements HashableCondition {}

View File

@ -0,0 +1,4 @@
package org.enso.table.data.table.join.conditions;
public sealed interface HashableCondition extends JoinCondition permits Equals, EqualsIgnoreCase {
}

View File

@ -0,0 +1,3 @@
package org.enso.table.data.table.join.conditions;
public sealed interface JoinCondition permits HashableCondition, Between {}

View File

@ -0,0 +1,84 @@
package org.enso.table.data.table.join.hashing;
import org.enso.base.text.TextFoldingStrategy;
import org.enso.table.data.index.MultiValueIndex;
import org.enso.table.data.index.UnorderedMultiValueKey;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.join.JoinResult;
import org.enso.table.data.table.join.JoinStrategy;
import org.enso.table.data.table.join.PluggableJoinStrategy;
import org.enso.table.data.table.join.conditions.Equals;
import org.enso.table.data.table.join.conditions.EqualsIgnoreCase;
import org.enso.table.data.table.join.conditions.HashableCondition;
import org.enso.table.problems.ProblemAggregator;
import org.graalvm.polyglot.Context;
import java.util.List;
/**
* A strategy that uses a hash-map to perform join on the equality conditions.
* <p>
* It then delegates to {@code remainingMatcher} to perform the remaining conditions on the matching pairs of row
* subsets.
*/
public class HashJoin implements JoinStrategy {
public HashJoin(List<HashableCondition> conditions, PluggableJoinStrategy remainingMatcher) {
conditionsHelper = new JoinStrategy.ConditionsHelper(conditions);
this.remainingMatcher = remainingMatcher;
List<HashEqualityCondition> equalConditions =
conditions.stream().map(HashJoin::makeHashEqualityCondition).toList();
if (equalConditions.isEmpty()) {
throw new IllegalArgumentException("EqualityHashJoin is applicable if there is at least one equality condition.");
}
leftEquals = equalConditions.stream().map(HashEqualityCondition::left).toArray(Column[]::new);
rightEquals = equalConditions.stream().map(HashEqualityCondition::right).toArray(Column[]::new);
textFoldingStrategies = equalConditions.stream().map(HashEqualityCondition::textFoldingStrategy).toList();
}
private final JoinStrategy.ConditionsHelper conditionsHelper;
private final Column[] leftEquals, rightEquals;
private final List<TextFoldingStrategy> textFoldingStrategies;
private final PluggableJoinStrategy remainingMatcher;
@Override
public JoinResult join(ProblemAggregator problemAggregator) {
Context context = Context.getCurrent();
var leftIndex = MultiValueIndex.makeUnorderedIndex(leftEquals, conditionsHelper.getLeftTableRowCount(),
textFoldingStrategies, problemAggregator);
var rightIndex = MultiValueIndex.makeUnorderedIndex(rightEquals, conditionsHelper.getRightTableRowCount(),
textFoldingStrategies, problemAggregator);
JoinResult.Builder resultBuilder = new JoinResult.Builder();
for (var leftEntry : leftIndex.mapping().entrySet()) {
UnorderedMultiValueKey leftKey = leftEntry.getKey();
List<Integer> leftRows = leftEntry.getValue();
List<Integer> rightRows = rightIndex.get(leftKey);
if (rightRows != null) {
remainingMatcher.joinSubsets(leftRows, rightRows, resultBuilder, problemAggregator);
}
context.safepoint();
}
return resultBuilder.build();
}
private static HashEqualityCondition makeHashEqualityCondition(HashableCondition eq) {
switch (eq) {
case Equals e -> {
return new HashEqualityCondition(e.left(), e.right(), TextFoldingStrategy.unicodeNormalizedFold);
}
case EqualsIgnoreCase e -> {
return new HashEqualityCondition(e.left(), e.right(), TextFoldingStrategy.caseInsensitiveFold(e.locale()));
}
}
}
private record HashEqualityCondition(Column left, Column right, TextFoldingStrategy textFoldingStrategy) {
}
}

View File

@ -1,4 +1,4 @@
package org.enso.table.data.table.join;
package org.enso.table.data.table.join.lookup;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.table.Column;

View File

@ -1,4 +1,4 @@
package org.enso.table.data.table.join;
package org.enso.table.data.table.join.lookup;
import org.enso.base.text.TextFoldingStrategy;
import org.enso.table.data.column.builder.Builder;
@ -9,6 +9,7 @@ import org.enso.table.data.index.UnorderedMultiValueKey;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
import org.enso.table.data.table.join.conditions.Equals;
import org.enso.table.error.NonUniqueLookupKey;
import org.enso.table.error.NullValuesInKeyColumns;
import org.enso.table.error.UnmatchedRow;
@ -17,7 +18,6 @@ import org.enso.table.util.ConstantList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.IntStream;
public class LookupJoin {

View File

@ -1,5 +0,0 @@
package org.enso.table.data.table.join.scan;
public interface Matcher {
boolean matches(int left, int right);
}

View File

@ -1,152 +0,0 @@
package org.enso.table.data.table.join.scan;
import org.enso.base.ObjectComparator;
import org.enso.base.Text_Utils;
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.StringStorage;
import org.enso.table.data.table.join.Between;
import org.enso.table.data.table.join.Equals;
import org.enso.table.data.table.join.EqualsIgnoreCase;
import org.enso.table.data.table.join.JoinCondition;
import org.enso.table.data.table.problems.FloatingPointGrouping;
import org.enso.table.problems.ColumnAggregatedProblemAggregator;
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;
public class MatcherFactory {
public Matcher create(JoinCondition condition, ColumnAggregatedProblemAggregator problemAggregator) {
return switch (condition) {
case Equals eq -> new EqualsMatcher(eq, problemAggregator);
case EqualsIgnoreCase eq -> new EqualsIgnoreCaseMatcher(eq);
case Between between -> new BetweenMatcher(between);
default -> throw new UnsupportedOperationException(
"Unsupported join condition: " + condition);
};
}
public Matcher create(List<JoinCondition> condition, ColumnAggregatedProblemAggregator problemAggregator) {
List<Matcher> matchers = condition.stream().map(m-> create(m, problemAggregator)).collect(Collectors.toList());
return new CompoundMatcher(matchers);
}
static final class CompoundMatcher implements Matcher {
private final List<Matcher> matchers;
CompoundMatcher(List<Matcher> matchers) {
this.matchers = matchers;
}
@Override
public boolean matches(int left, int right) {
for (Matcher matcher : matchers) {
if (!matcher.matches(left, right)) {
return false;
}
}
return true;
}
}
static final class EqualsMatcher implements Matcher {
private final Storage<?> leftStorage;
private final Storage<?> rightStorage;
private final String leftColumnName;
private final String rightColumnName;
private final ColumnAggregatedProblemAggregator problemAggregator;
public EqualsMatcher(Equals eq, ColumnAggregatedProblemAggregator problemAggregator) {
leftStorage = eq.left().getStorage();
rightStorage = eq.right().getStorage();
leftColumnName = eq.left().getName();
rightColumnName = eq.right().getName();
this.problemAggregator = problemAggregator;
}
@Override
public boolean matches(int left, int right) {
Object leftValue = leftStorage.getItemBoxed(left);
Object rightValue = rightStorage.getItemBoxed(right);
if (NumericConverter.isFloatLike(leftValue)) {
problemAggregator.reportColumnAggregatedProblem(new FloatingPointGrouping(leftColumnName, left));
}
if (NumericConverter.isFloatLike(rightValue)) {
problemAggregator.reportColumnAggregatedProblem(new FloatingPointGrouping(rightColumnName, right));
}
return ObjectComparator.areEqual(leftValue, rightValue);
}
}
static final class EqualsIgnoreCaseMatcher implements Matcher {
private final StringStorage leftStorage;
private final StringStorage rightStorage;
private final Locale locale;
public EqualsIgnoreCaseMatcher(EqualsIgnoreCase eq) {
if (eq.left().getStorage() instanceof StringStorage leftStrings) {
leftStorage = leftStrings;
} else {
throw new IllegalArgumentException("Expected left column to have type Text.");
}
if (eq.right().getStorage() instanceof StringStorage rightStrings) {
rightStorage = rightStrings;
} else {
throw new IllegalArgumentException("Expected right column to have type Text.");
}
locale = eq.locale();
}
@Override
public boolean matches(int left, int right) {
String leftValue = leftStorage.getItem(left);
String rightValue = rightStorage.getItem(right);
if (leftValue == null && rightValue == null) {
return true;
}
if (leftValue == null || rightValue == null) {
return false;
}
return Text_Utils.equals_ignore_case(leftValue, rightValue, locale);
}
}
static final class BetweenMatcher implements Matcher {
private final Storage<?> leftStorage;
private final Storage<?> rightLowerStorage;
private final Storage<?> rightUpperStorage;
public BetweenMatcher(Between between) {
leftStorage = between.left().getStorage();
rightLowerStorage = between.rightLower().getStorage();
rightUpperStorage = between.rightUpper().getStorage();
}
@Override
public boolean matches(int left, int right) {
Object leftValue = leftStorage.getItemBoxed(left);
Object rightLowerValue = rightLowerStorage.getItemBoxed(right);
Object rightUpperValue = rightUpperStorage.getItemBoxed(right);
// If any value is missing, such a pair of rows is never correlated with Between as we assume
// the ordering is not well-defined for missing values.
if (leftValue == null || rightLowerValue == null || rightUpperValue == null) {
return false;
}
return ObjectComparator.DEFAULT.compare(leftValue, rightLowerValue) >= 0
&& ObjectComparator.DEFAULT.compare(leftValue, rightUpperValue) <= 0;
}
}
}

View File

@ -2,7 +2,6 @@ package org.enso.table.operations;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -18,7 +17,6 @@ import org.enso.table.data.table.Column;
import org.enso.table.problems.ColumnAggregatedProblemAggregator;
import org.enso.table.problems.ProblemAggregator;
import org.enso.table.util.ConstantList;
import org.graalvm.collections.Pair;
public class AddRowNumber {
@ -62,18 +60,17 @@ public class AddRowNumber {
Storage<?>[] orderingStorages =
Arrays.stream(orderingColumns).map(Column::getStorage).toArray(Storage[]::new);
long[] numbers = new long[n];
List<Pair<OrderedMultiValueKey, Integer>> keys =
List<OrderedMultiValueKey> keys =
new ArrayList<>(
IntStream.range(0, n)
.mapToObj(
i -> Pair.create(new OrderedMultiValueKey(orderingStorages, i, directions), i))
.mapToObj(i -> new OrderedMultiValueKey(orderingStorages, i, directions))
.toList());
keys.sort(OrderedPairComparator.INSTANCE);
keys.sort(null);
RangeIterator it = new RangeIterator(start, step);
for (var key : keys) {
numbers[key.getRight()] = it.next();
numbers[key.getRowIndex()] = it.next();
}
return new LongStorage(numbers, IntegerType.INT_64);
}
@ -103,44 +100,21 @@ public class AddRowNumber {
for (var entry : groupIndex.mapping().entrySet()) {
List<Integer> indices = entry.getValue();
List<Pair<OrderedMultiValueKey, Integer>> orderingKeys =
List<OrderedMultiValueKey> orderingKeys =
new ArrayList<>(
indices.stream()
.map(
i ->
Pair.create(new OrderedMultiValueKey(orderingStorages, i, directions), i))
.map(i -> new OrderedMultiValueKey(orderingStorages, i, directions))
.toList());
orderingKeys.sort(OrderedPairComparator.INSTANCE);
orderingKeys.sort(null);
RangeIterator it = new RangeIterator(start, step);
for (var key : orderingKeys) {
numbers[key.getRight()] = it.next();
for (OrderedMultiValueKey key : orderingKeys) {
numbers[key.getRowIndex()] = it.next();
}
}
return new LongStorage(numbers, IntegerType.INT_64);
}
private static class OrderedPairComparator
implements Comparator<Pair<OrderedMultiValueKey, Integer>> {
@Override
public int compare(
Pair<OrderedMultiValueKey, Integer> o1, Pair<OrderedMultiValueKey, Integer> o2) {
int p1 = o1.getLeft().compareTo(o2.getLeft());
if (p1 != 0) {
return p1;
}
return o1.getRight().compareTo(o2.getRight());
}
@Override
public boolean equals(Object obj) {
return obj instanceof OrderedPairComparator;
}
static OrderedPairComparator INSTANCE = new OrderedPairComparator();
}
/**
* A helper for computing consecutive numbers based on a start and step. It will throw an {@link
* java.lang.ArithmeticException} if the next number overflows.

View File

@ -10,6 +10,7 @@ import project.Table.Aggregate
import project.Table.Arithmetic
import project.Table.Column_From_Vector
import project.Table.Cross_Tab
import project.Table.Join
import project.Table.Sorting
import project.Table.Internal.Multi_Value_Key
import project.Text.Build
@ -54,6 +55,7 @@ all_benchmarks =
builder.append Add_Row_Number.collect_benches
builder.append Column_From_Vector.collect_benches
builder.append Cross_Tab.collect_benches
builder.append Join.collect_benches
builder.append Sorting.collect_benches
builder.append Multi_Value_Key.collect_benches

View File

@ -0,0 +1,48 @@
from Standard.Base import all
from Standard.Base.Runtime import assert
from Standard.Table import all
from Standard.Test import Bench
from project.Config import extended_tests
options = Bench.options . set_warmup (Bench.phase_conf 1 2) . set_measure (Bench.phase_conf 2 3)
type Scenario
Value table ints_vec dates_vec bool_vec
create_scenario =
t = Table.new [["X", (200.up_to 10000 . to_vector)]]
ints_vec = 40000.up_to 130000 . to_vector
first_day = Date_Time.new 2000 1 1
make_date x = first_day + (Duration.new seconds=x)
dates_vec = ints_vec.map make_date
bool_vec = Vector.fill 7000 True
t2 = t.set (t.at "X" . map make_date) "dates"
t3 = t2.set (t.at "X" % 2 == 0) "bools"
Scenario.Value t3 ints_vec dates_vec bool_vec
type Data
Value ~scenario
create = Data.Value create_scenario
collect_benches = Bench.build builder->
data = Data.create
builder.group ("Filter_Is_In") options group_builder->
group_builder.specify "integers" <|
scenario = data.scenario
scenario.table.filter "X" (Filter_Condition.Is_In scenario.ints_vec)
group_builder.specify "dates" <|
scenario = data.scenario
scenario.table.filter "dates" (Filter_Condition.Is_In scenario.dates_vec)
group_builder.specify "bools" <|
scenario = data.scenario
scenario.table.filter "bools" (Filter_Condition.Is_In scenario.bool_vec)
main = collect_benches . run_main

View File

@ -0,0 +1,222 @@
from Standard.Base import all
from Standard.Base.Runtime import assert
from Standard.Table import all
from Standard.Test import Bench
from project.Config import extended_tests
options = Bench.options . set_warmup (Bench.phase_conf 2 5) . set_measure (Bench.phase_conf 2 5)
type Scenario
Value table1 table2
shuffle vec =
vec.take (Index_Sub_Range.Sample vec.length seed=42)
create_scenario_equals num_rows =
xs = (0.up_to num_rows).to_vector
table1 = Table.new [["key", xs]]
table2 = Table.new [["key", shuffle xs]]
Scenario.Value table1 table2
create_scenario_equals_medium_groups num_rows =
xs = (0.up_to num_rows).map x-> (x/30).floor
ys = xs.reverse.map (+2)
table1 = Table.new [["key", xs]]
table2 = Table.new [["key", ys]]
Scenario.Value table1 table2
create_scenario_equals_ignore_case num_rows =
table1 = Table.new [["key", (0.up_to num_rows).map i-> "a"+i.to_text]]
table2 = Table.new [["case_insensitive_key", (0.up_to num_rows).reverse.map i-> "A"+i.to_text]]
Scenario.Value table1 table2
create_scenario_between num_rows =
xs = (0.up_to num_rows).map x-> x*100
lows = xs.map x-> x-10
highs = xs.map x-> x+50
table1 = Table.new [["x", shuffle xs]]
table2 = Table.new [["lows", lows], ["highs", highs]]
Scenario.Value table1 table2
## The mixed scenario creates a pair of tables where all rows are mapped 1-1,
but they are split into 3 groups. Each group differs by only one 'key' while
having equal keys of the other two types.
This ensures that a combined scenario must be efficient for all conditions,
regardless of the distribution of keys - it cannot naively group by only a
subset of keys and brute force the remaining keys - because in this example,
splitting by any subset of keys will still yield a big group - only splitting
by all 3 keys gives us small groups (1-1).
create_scenario_mixed num_rows =
n = (num_rows/3).round
xs = (0.up_to n).to_vector
ys_1 = (0.up_to n).map i-> "a"+i.to_text
ys_2 = (0.up_to n).map i-> "A"+i.to_text
zs = (0.up_to n).map x-> 1000 + x*100
constant_x = Vector.new n _-> 1
constant_y = Vector.new n _-> "_"
constant_z = Vector.new n _-> 0
table1 =
group1 = Table.new [["EQ", shuffle xs], ["case_insensitive", constant_y], ["x", constant_z]]
group2 = Table.new [["EQ", constant_x], ["case_insensitive", shuffle ys_1], ["x", constant_z]]
group3 = Table.new [["EQ", constant_x], ["case_insensitive", constant_y], ["x", shuffle zs]]
group1.union [group2, group3]
table2 =
group1 = Table.new [["EQ", shuffle xs], ["case_insensitive", constant_y], ["lows", constant_z], ["highs", constant_z]]
group2 = Table.new [["EQ", constant_x], ["case_insensitive", shuffle ys_2], ["lows", constant_z], ["highs", constant_z]]
lows = zs.map x-> x-10
highs = zs.map x-> x+30
group3 = Table.new [["EQ", constant_x], ["case_insensitive", constant_y], ["lows", lows], ["highs", highs]]
group1.union [group2, group3]
Scenario.Value table1 table2
## The 2d equality scenario matches rows based on 2 keys -
it matches corresponding points on a 2d grid.
This is used to verify that multi-key joins are efficient too.
create_scenario_equals_2d num_rows =
n = num_rows.sqrt.ceil
pts = (0.up_to n).to_vector.flat_map x->
(0.up_to n).map y-> [x, y]
shuffled_pts = shuffle pts
table1 = Table.new [["x", shuffled_pts.map .first], ["y", shuffled_pts.map .second]]
table2 = Table.new [["x", pts.map .first], ["y", pts.map .second]]
Scenario.Value table1 table2
## Similarly to the example with equality, this creates a 2d grid of points, but
they are matched using the Between condition.
create_scenario_between_2d num_rows =
n = num_rows.sqrt.ceil
pts = (0.up_to n).to_vector.flat_map x->
(0.up_to n).map y-> [x, y]
shuffled_pts = shuffle pts
table1 = Table.new [["x", shuffled_pts.map .first], ["y", shuffled_pts.map .second]]
lows = pts.map p-> [p.first - 0.1, p.second - 0.1]
highs = pts.map p-> [p.first + 0.1, p.second + 0.1]
table2 = Table.new [["x_lows", lows.map .first], ["y_lows", lows.map .second], ["x_highs", highs.map .first], ["y_highs", highs.map .second]]
Scenario.Value table1 table2
## This one creates a scenario with a 2d grid of points for the left table, but
the right table contains pairs of coordinates that denote belts of size 2 x n
on that grid.
Some of them will be horizontal and some vertical, to see how the order of
Between arguments affects performance.
create_scenario_between_2d_belts num_rows =
n = num_rows.sqrt.ceil
pts = (0.up_to n).to_vector.flat_map x->
(0.up_to n).map y-> [x, y]
shuffled_pts = shuffle pts
table1 = Table.new [["x", shuffled_pts.map .first], ["y", shuffled_pts.map .second]]
horizontal_belts = Vector.new n x->
[x, x+1, 0, n, False]
vertical_belts = Vector.new n y->
[0, n, y, y+1, True]
table2 = Table.from_rows ["x_lows", "x_highs", "y_lows", "y_highs", "is_vertical"] (horizontal_belts + vertical_belts)
Scenario.Value table1 table2
## This is a scenario where we want to find rows unmatched in another table.
The scenario is set-up on purpose in such a way that the intersection of the
two tables is very large. This will only be fast if the anti-join does not
compute the intersection which is not needed in this scenario.
create_scenario_antijoin num_rows =
xs = Vector.new num_rows _-> 1
## The first 1000 rows will be unmatched (and should be returned in the anti-join).
All other rows will match with _all_ rows from `xs`, creating a huge intersection.
ys = Vector.new num_rows ix->
if ix < 1000 then -ix else 1
table1 = Table.new [["key", xs]]
table2 = Table.new [["key", ys]]
Scenario.Value table1 table2
type Data
Value ~equals ~equals_medium_groups ~equals_ignore_case ~between ~mixed ~equals2d ~between2d ~between2d_belts ~antijoin
create num_rows =
Data.Value (create_scenario_equals num_rows) (create_scenario_equals_medium_groups num_rows) (create_scenario_equals_ignore_case num_rows) (create_scenario_between num_rows) (create_scenario_mixed num_rows) (create_scenario_equals_2d num_rows) (create_scenario_between_2d num_rows) (create_scenario_between_2d_belts num_rows) (create_scenario_antijoin num_rows)
collect_benches = Bench.build builder->
num_rows = 50000
data = Data.create num_rows
builder.group ("Join_" + num_rows.to_text) options group_builder->
group_builder.specify "Equals" <|
scenario = data.equals
r = scenario.table1.join scenario.table2 on="key"
assert (r.row_count == num_rows)
group_builder.specify "Equals_Medium_Groups" <|
scenario = data.equals_medium_groups
scenario.table1.join scenario.table2 on="key"
group_builder.specify "Equals_Ignore_Case" <|
scenario = data.equals_ignore_case
r = scenario.table1.join scenario.table2 on=(Join_Condition.Equals_Ignore_Case "key" "case_insensitive_key")
assert (r.row_count == num_rows)
group_builder.specify "Between" <|
scenario = data.between
r = scenario.table1.join scenario.table2 on=(Join_Condition.Between "x" "lows" "highs")
assert (r.row_count == num_rows)
group_builder.specify "Mixed" <|
scenario = data.mixed
r = scenario.table1.join scenario.table2 on=[Join_Condition.Equals "EQ", Join_Condition.Equals_Ignore_Case "case_insensitive", Join_Condition.Between "x" "lows" "highs"]
expected_rows = data.mixed.table1.row_count
assert (r.row_count == expected_rows)
group_builder.specify "Equals_2D" <|
scenario = data.equals2d
r = scenario.table1.join scenario.table2 on=["x", "y"]
assert (r.row_count == scenario.table1.row_count)
group_builder.specify "Between_2D" <|
scenario = data.between2d
r = scenario.table1.join scenario.table2 on=[Join_Condition.Between "x" "x_lows" "x_highs", Join_Condition.Between "y" "y_lows" "y_highs"]
assert (r.row_count == scenario.table1.row_count)
if extended_tests then group_builder.specify "Between_2D_Belts_All" <|
scenario = data.between2d_belts
r = scenario.table1.join scenario.table2 on=[Join_Condition.Between "x" "x_lows" "x_highs", Join_Condition.Between "y" "y_lows" "y_highs"]
assert (r.row_count == scenario.table1.row_count)
if extended_tests then group_builder.specify "Between_2D_Belts_V" <|
scenario = data.between2d_belts
t2 = scenario.table2.filter "is_vertical" Filter_Condition.Is_True
r = scenario.table1.join t2 on=[Join_Condition.Between "x" "x_lows" "x_highs", Join_Condition.Between "y" "y_lows" "y_highs"]
assert (r.row_count == scenario.table1.row_count)
if extended_tests then group_builder.specify "Between_2D_Belts_H" <|
scenario = data.between2d_belts
t2 = scenario.table2.filter "is_vertical" Filter_Condition.Is_False
r = scenario.table1.join t2 on=[Join_Condition.Between "x" "x_lows" "x_highs", Join_Condition.Between "y" "y_lows" "y_highs"]
assert (r.row_count == scenario.table1.row_count)
# TODO this should be part of the main tests, but it was causing issues on CI; re-enable this with #8217
if extended_tests then group_builder.specify "AntiJoin" <|
scenario = data.antijoin
r = scenario.table2.join scenario.table1 on="key" join_kind=Join_Kind.Left_Exclusive
assert (r.row_count == 1000)
main = collect_benches . run_main

View File

@ -103,6 +103,15 @@ spec setup =
r = t3.join t4 join_kind=Join_Kind.Inner on=["X", "Y"] |> materialize |> _.order_by ["X", "Y", "Z", "Right Z"]
check_xy_joined r
Test.specify "should correctly handle duplicated rows in Equals" <|
t1 = table_builder [["X", [1, 2, 2, 3]]]
t2 = table_builder [["X", [1, 2, 2, 4]]]
r1 = t1.join t2 join_kind=Join_Kind.Full on="X" . order_by "X"
within_table r1 <|
# Both 2's from t1 match with _both_ ones from t2 _each_, so in total we get 4 `2` pairs:
r1.at "X" . to_vector . should_equal [Nothing, 1, 2, 2, 2, 2, 3]
r1.at "Right X" . to_vector . should_equal [4, 1, 2, 2, 2, 2, Nothing]
Test.specify "should allow to join on text equality ignoring case" <|
t1 = table_builder [["X", ["a", "B"]], ["Y", [1, 2]]]
t2 = table_builder [["X", ["A", "a", "b"]], ["Z", [1, 2, 3]]]
@ -170,7 +179,7 @@ spec setup =
t2 = table_builder [["lower", [1, 10, 8, 12]], ["upper", [1, 12, 30, 0]], ["Z", [1, 2, 3, 4]]]
r1 = t1.join join_kind=Join_Kind.Inner t2 on=(Join_Condition.Between "X" "lower" "upper") |> materialize |> _.order_by ["X", "Z"]
expect_column_names ["X", "Y", "lower", "upper", "Z"] r1
r1.column_names . should_equal ["X", "Y", "lower", "upper", "Z"]
r1 . at "X" . to_vector . should_equal [1, 10, 10, 12, 12]
r1 . at "Y" . to_vector . should_equal [1, 2, 2, 3, 3]
r1 . at "lower" . to_vector . should_equal [1, 10, 8, 10, 8]
@ -182,13 +191,71 @@ spec setup =
t2 = table_builder [["lower", ["a", "b"]], ["upper", ["a", "ccc"]], ["Z", [10, 20]]]
r1 = t1.join t2 join_kind=Join_Kind.Inner on=(Join_Condition.Between "X" "lower" "upper") |> materialize |> _.order_by ["X", "Z"]
expect_column_names ["X", "Y", "lower", "upper", "Z"] r1
r1.column_names . should_equal ["X", "Y", "lower", "upper", "Z"]
r1 . at "X" . to_vector . should_equal ["a", "b", "c"]
r1 . at "Y" . to_vector . should_equal [1, 2, 3]
r1 . at "lower" . to_vector . should_equal ["a", "b", "b"]
r1 . at "upper" . to_vector . should_equal ["a", "ccc", "ccc"]
r1 . at "Z" . to_vector . should_equal [10, 20, 20]
Test.specify "should correctly handle Between edge cases (1)" pending=(if prefix.contains "PostgreSQL" then "TODO: fix issue #8243") <|
# 1. multiple rows with the same key value on the left side
# 2. fully duplicated rows (1, 7) on the left side
# 3. empty bounds (lower > upper: 10 > 0)
# 4. equal bounds (10 = 10)
# 5. unmatched rows on both sides - Full join
t1 = table_builder [["X", [1, 10, 20, 1, 2, 1, 1]], ["id", [1, 2, 3, 4, 5, 7, 7]]]
t2 = table_builder [["lower", [0, 10, 10]], ["upper", [3, 10, 0]], ["Z", ['a', 'b', 'c']]]
r1 = t1.join t2 join_kind=Join_Kind.Full on=(Join_Condition.Between "X" "lower" "upper") |> materialize |> _.order_by ["Z", "id"]
within_table r1 <|
r1.column_names . should_equal ["X", "id", "lower", "upper", "Z"]
rows = r1.rows.map .to_vector
rows.length . should_equal 8
rows.at 0 . should_equal [20, 3, Nothing, Nothing, Nothing]
rows.at 1 . should_equal [ 1, 1, 0, 3, 'a']
rows.at 2 . should_equal [ 1, 4, 0, 3, 'a']
rows.at 3 . should_equal [ 2, 5, 0, 3, 'a']
rows.at 4 . should_equal [ 1, 7, 0, 3, 'a']
rows.at 5 . should_equal [ 1, 7, 0, 3, 'a']
rows.at 6 . should_equal [10, 2, 10, 10, 'b']
rows.at 7 . should_equal [Nothing, Nothing, 10, 0, 'c']
Test.specify "should correctly handle Between edge cases (2)" <|
# 6. multiple Between conditions
xs = [0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4]
ys = [1, 2, 3, 1, 9, 2, 3, 2, 4, 2, 1, 1, 1, 2]
pts = xs.zip ys . take (Index_Sub_Range.Sample xs.length seed=42)
t1 = table_builder [["X", pts.map .first], ["Y", pts.map .second]]
t2 = table_builder [["lx", [1]], ["ux", [3]], ["ly", [1]], ["uy", [2]]]
r2 = t1.join t2 join_kind=Join_Kind.Inner on=[Join_Condition.Between "X" "lx" "ux", Join_Condition.Between "Y" "ly" "uy"] |> materialize |> _.order_by ["X", "Y"]
within_table r2 <|
r2.at "X" . to_vector . should_equal [1, 1, 2, 3, 3]
r2.at "Y" . to_vector . should_equal [1, 2, 2, 1, 2]
t3 = table_builder [["lx", [1.9]], ["ux", [3]], ["ly", [1]], ["uy", [2]]]
r3 = t1.join t3 join_kind=Join_Kind.Inner on=[Join_Condition.Between "X" "lx" "ux", Join_Condition.Between "Y" "ly" "uy"] |> materialize |> _.order_by ["X", "Y"]
within_table r3 <|
r3.at "X" . to_vector . should_equal [2, 3, 3]
r3.at "Y" . to_vector . should_equal [2, 1, 2]
Test.specify "should correctly handle Between edge cases (3)" <|
# 7. duplicated rows on both sides
t1 = table_builder [["X", [10, 20, 20]]]
t2 = table_builder [["low", [15, 15]], ["high", [30, 30]]]
r1 = t1.join t2 join_kind=Join_Kind.Right_Outer on=(Join_Condition.Between "X" "low" "high")
within_table r1 <|
r1.at "X" . to_vector . should_equal [20, 20, 20, 20]
r1.at "low" . to_vector . should_equal [15, 15, 15, 15]
r1.at "high" . to_vector . should_equal [30, 30, 30, 30]
# 8. keep only unmatched rows
r2 = t1.join t2 join_kind=Join_Kind.Left_Exclusive on=(Join_Condition.Between "X" "low" "high")
within_table r2 <|
r2.column_names . should_equal ["X"]
r2.at "X" . to_vector . should_equal [10]
if setup.test_selection.supports_unicode_normalization then
Test.specify "should allow range-based joins (using Between) for text with Unicode normalization" <|
t1 = table_builder [["X", ['s\u0301', 's']], ["Y", [1, 2]]]
@ -368,16 +435,15 @@ spec setup =
if setup.supports_custom_objects then
t1 = table_builder [["X", [My_Type.Value 1 2, 2.0, 2]], ["Y", [10, 20, 30]]]
t2 = table_builder [["Z", [2.0, 1.5, 2.0]], ["W", [1, 2, 3]]]
action3 = t1.join t2 join_kind=Join_Kind.Inner on=(Join_Condition.Equals "X" "Z") on_problems=_
tester3 table =
expect_column_names ["X", "Y", "Z", "W"] table
t1 = table.order_by ["Y", "W"]
t1.at "X" . to_vector . should_equal [2.0, 2.0, 2, 2]
t1.at "Y" . to_vector . should_equal [20, 20, 30, 30]
t1.at "Z" . to_vector . should_equal [2.0, 2.0, 2.0, 2.0]
t1.at "W" . to_vector . should_equal [1, 3, 1, 3]
problems3 = [Floating_Point_Equality.Error "Z", Floating_Point_Equality.Error "X"]
Problems.test_problem_handling action3 problems3 tester3
r3 = t1.join t2 join_kind=Join_Kind.Inner on=(Join_Condition.Equals "X" "Z") on_problems=Problem_Behavior.Report_Warning
r3.column_names.should_equal ["X", "Y", "Z", "W"]
r4 = r3.order_by ["Y", "W"]
r4.at "X" . to_vector . should_equal [2.0, 2.0, 2, 2]
r4.at "Y" . to_vector . should_equal [20, 20, 30, 30]
r4.at "Z" . to_vector . should_equal [2.0, 2.0, 2.0, 2.0]
r4.at "W" . to_vector . should_equal [1, 3, 1, 3]
expected_problems = [Floating_Point_Equality.Error "Z", Floating_Point_Equality.Error "X"]
Problems.get_attached_warnings r3 . should_contain_the_same_elements_as expected_problems
Test.specify "should correctly handle nulls in equality conditions" pending=db_todo <|
t1 = table_builder [["X", ["A", Nothing, "a", Nothing, "ą"]], ["Y", [0, 1, 2, 3, 4]]]
@ -650,6 +716,17 @@ spec setup =
r3.at 3 . should_equal [2, 20, 2, Nothing, Nothing]
r3.at 4 . should_equal [3, 30, 3, Nothing, Nothing]
t8 = table_builder [["X", [2, 99]], ["Y", [20, 99]], ["C", [5, 99]]]
t9 = t4_2.join t8 join_kind=Join_Kind.Full on=["X", "Y", "C"]
within_table t9 <|
t9.column_names . should_equal ["X", "Y", "C", "Right X", "Right Y", "Right C"]
r3 = materialize t9 . order_by ["X", "Right X"] . rows . map .to_vector
r3.length . should_equal 4
r3.at 0 . should_equal [Nothing, Nothing, Nothing, 99, 99, 99]
r3.at 1 . should_equal [1, 10, 3, Nothing, Nothing, Nothing]
r3.at 2 . should_equal [2, 20, 5, 2, 20, 5]
r3.at 3 . should_equal [3, 30, 7, Nothing, Nothing, Nothing]
Test.specify "should gracefully handle tables from different backends" <|
alternative_connection = Database.connect (SQLite In_Memory)
t0 = (Table.new [["X", [1, 2, 4]], ["Z", [10, 20, 30]]]).select_into_database_table alternative_connection "T0" temporary=True

View File

@ -2,11 +2,13 @@ from Standard.Base import all
from Standard.Test import Test_Suite
import project.Helpers.Sorted_List_Index_Spec
import project.Helpers.Unique_Naming_Strategy_Spec
import project.Helpers.Value_Type_Spec
spec =
Unique_Naming_Strategy_Spec.spec
Sorted_List_Index_Spec.spec
Value_Type_Spec.spec
main = Test_Suite.run_main spec

View File

@ -0,0 +1,64 @@
from Standard.Base import all
# We need this import, to ensure that we depend on `Standard.Table`, so that the Java import of `org.enso.table` is valid.
from Standard.Table import all
from Standard.Test import Test, Test_Suite
import Standard.Test.Extensions
polyglot java import java.util.Comparator
polyglot java import org.enso.table.data.table.join.between.SortedListIndex
main = Test_Suite.run_main spec
## White-box tests for the SortedListIndex, ensuring correctness of the
implementation - these are additional tests apart from
the `Join_Condition.Between` test cases, to ensure no off-by-one errors
or other bugs are present in the implementation.
spec = Test.group "SortedListIndex (used for SortJoin)" <|
make_index vec = SortedListIndex.build vec Comparator.naturalOrder
v1 = [0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 10, 10, 10, 10, 11, 14, 17, 19]
v1_shuffled = v1.take (Index_Sub_Range.Sample v1.length)
index1 = make_index v1_shuffled
Test.specify "should correctly handle empty matches" <|
Vector.from_polyglot_array (index1.findSubRange 9 9) . should_equal []
Vector.from_polyglot_array (index1.findSubRange -10 -2) . should_equal []
Vector.from_polyglot_array (index1.findSubRange 200 300) . should_equal []
Vector.from_polyglot_array (index1.findSubRange 20 0) . should_equal []
Test.specify "should correctly handle single-element matches" <|
Vector.from_polyglot_array (index1.findSubRange 8 8) . should_equal [8]
Vector.from_polyglot_array (index1.findSubRange 12 16) . should_equal [14]
Vector.from_polyglot_array (index1.findSubRange 18 100) . should_equal [19]
Vector.from_polyglot_array (index1.findSubRange 19 100) . should_equal [19]
Vector.from_polyglot_array (index1.findSubRange 19 19) . should_equal [19]
Test.specify "should correctly handle matches" <|
Vector.from_polyglot_array (index1.findSubRange 4 6) . should_equal [4, 5, 6]
Vector.from_polyglot_array (index1.findSubRange 3 5) . should_equal [3, 3, 4, 5]
Vector.from_polyglot_array (index1.findSubRange 0 3) . should_equal [0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3]
Vector.from_polyglot_array (index1.findSubRange 2 4) . should_equal [2, 2, 2, 3, 3, 4]
Vector.from_polyglot_array (index1.findSubRange 8 10) . should_equal [8, 10, 10, 10, 10]
Vector.from_polyglot_array (index1.findSubRange 8 11) . should_equal [8, 10, 10, 10, 10, 11]
Vector.from_polyglot_array (index1.findSubRange 8 12) . should_equal [8, 10, 10, 10, 10, 11]
Vector.from_polyglot_array (index1.findSubRange 9 12) . should_equal [10, 10, 10, 10, 11]
Test.specify "should correctly handle big all-equal ranges" <|
Vector.from_polyglot_array (index1.findSubRange 1 1) . should_equal [1, 1, 1, 1]
Vector.from_polyglot_array (index1.findSubRange 7 7) . should_equal [7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7]
Test.specify "other cases: empty index" <|
index2 = make_index []
Vector.from_polyglot_array (index2.findSubRange 1 5) . should_equal []
Test.specify "other cases: single element index" <|
index2 = make_index [5]
Vector.from_polyglot_array (index2.findSubRange 1 5) . should_equal [5]
Vector.from_polyglot_array (index2.findSubRange 5 5) . should_equal [5]
Vector.from_polyglot_array (index2.findSubRange 1 2) . should_equal []
Vector.from_polyglot_array (index2.findSubRange 2 1) . should_equal []
Vector.from_polyglot_array (index2.findSubRange 10 10) . should_equal []

View File

@ -1,118 +0,0 @@
from Standard.Base import all
from Standard.Table import all
from Standard.Test import Test, Test_Suite
import Standard.Test.Extensions
from project.Util import all
spec =
Test.group "[In-Memory] Table.join performance" <|
n = 10000
Test.specify "should efficiently compute equality joins" <|
vec = 0.up_to n . to_vector
vec2 = 1.up_to n+1 . to_vector
t1 = Table.new [["X", vec], ["Y", 0.up_to n . map (_ % 2)]]
t2 = Table.new [["B", [0, 1]]]
t3 = Table.new [["X", vec.reverse], ["Z", vec2]]
r1 = Duration.time_execution <|
t1.join t2 on=(Join_Condition.Equals "Y" "B")
r2 = Duration.time_execution <|
t1.join t3 on="X"
t4 = r2.second . order_by ["X"]
t4.at "X" . to_vector . should_equal <| vec
t4.at "Z" . to_vector . should_equal <| vec2.reverse
base_ms = r1.first.total_milliseconds
expected_max_time_ms = base_ms * 5 + 100
runtime_ms = r2.first.total_milliseconds
if runtime_ms > expected_max_time_ms then
Test.fail "Expected a join of "+n.to_text+"x"+n.to_text+" with linear result size to be efficient, but it took "+runtime_ms.to_text+"ms while a join of 2x"+n.to_text+" with the same result size took "+base_ms.to_text+"ms. The maximum time threshold for this operation to be deemed efficient has been estimated at "+expected_max_time_ms.to_text+"ms."
Test.specify "should efficiently compute equality joins mixed with other secondary conditions" <|
vec = 0.up_to n . to_vector
vec2 = 1.up_to n+1 . to_vector
t1 = Table.new [["X", vec], ["Y", 0.up_to n . map (_ % 2)], ["A", Vector.fill n "a"], ["B", Vector.fill n 9]]
t2 = Table.new [["B", [0, 1]], ["A", ["A", "A"]], ["l", [0, 0]], ["u", [20, 20]]]
t3 = Table.new [["X", vec.reverse], ["Z", vec2], ["A", Vector.fill n "a"], ["l", Vector.fill n 0], ["u", Vector.fill n 20]]
secondary_conditions = [Join_Condition.Equals_Ignore_Case "A", Join_Condition.Between "B" "l" "u"]
r1 = Duration.time_execution <|
t1.join t2 on=secondary_conditions+[Join_Condition.Equals "Y" "B"]
r2 = Duration.time_execution <|
t1.join t3 on=secondary_conditions+[Join_Condition.Equals "X" "X"]
t4 = r2.second . order_by ["X"]
t4.at "X" . to_vector . should_equal <| vec
t4.at "Z" . to_vector . should_equal <| vec2.reverse
base_ms = r1.first.total_milliseconds
expected_max_time_ms = base_ms * 5 + 100
runtime_ms = r2.first.total_milliseconds
if runtime_ms > expected_max_time_ms then
Test.fail "Expected a join of "+n.to_text+"x"+n.to_text+" with linear result size to be efficient, but it took "+runtime_ms.to_text+"ms while a join of 2x"+n.to_text+" with the same result size took "+base_ms.to_text+"ms. The maximum time threshold for this operation to be deemed efficient has been estimated at "+expected_max_time_ms.to_text+"ms."
Test.specify "should efficiently compute case-insensitive equality joins" <|
unique_text_for_number prefix i =
suffix = Text.from_utf_8 [97 + i%20]
prefix + i.to_text + "-" + suffix
lowers = 0.up_to n . map (unique_text_for_number "a")
uppers = 0.up_to n . map (unique_text_for_number "A")
t1 = Table.new [["X", lowers], ["Y", 0.up_to n . map i-> if i%2 == 0 then "a" else "b"], ["A", Vector.fill n 44], ["B", Vector.fill n 9], ["N", 0.up_to n . to_vector]]
t2 = Table.new [["B", ["A", "B", "a"]], ["A", [44, 44, 44]], ["l", [0, 0, 0]], ["u", [20, 20, 20]]]
t3 = Table.new [["X", uppers.reverse], ["Z", 1.up_to n+1 . to_vector], ["A", Vector.fill n 44], ["l", Vector.fill n 0], ["u", Vector.fill n 20]]
secondary_conditions = [Join_Condition.Equals "A", Join_Condition.Between "B" "l" "u"]
r1 = Duration.time_execution <|
t1.join t2 on=[Join_Condition.Equals_Ignore_Case "Y" "B"]+secondary_conditions
r1.second.row_count . should_equal (n + n/2)
r2 = Duration.time_execution <|
t1.join t3 on=[Join_Condition.Equals_Ignore_Case "X" "X"]+secondary_conditions
t4 = r2.second . order_by "N"
t4.row_count . should_equal n
t4.at "X" . to_vector . should_equal lowers
t4.at "Right X" . to_vector . should_equal uppers
t4.at "Z" . to_vector . should_equal <| 1.up_to n+1 . to_vector . reverse
base_ms = r1.first.total_milliseconds
expected_max_time_ms = base_ms * 5 + 100
runtime_ms = r2.first.total_milliseconds
if runtime_ms > expected_max_time_ms then
Test.fail "Expected a join of "+n.to_text+"x"+n.to_text+" with linear result size to be efficient, but it took "+runtime_ms.to_text+"ms while a join of 3x"+n.to_text+" with the same result size took "+base_ms.to_text+"ms. The maximum time threshold for this operation to be deemed efficient has been estimated at "+expected_max_time_ms.to_text+"ms."
Test.specify "should efficiently compute Between joins" pending="TODO in task https://www.pivotaltracker.com/story/show/183913337" <|
xs = 0.up_to n . map x-> x * 20
ls = 0.up_to n . map x-> x * 20 - 20
us = 0.up_to n . map x-> x * 20 + 5
t1 = Table.new [["X", xs], ["A", Vector.fill n "a"], ["B", Vector.fill n 44]]
# We set up the ranges so that each entry of `t1` will match 2, apart from the first entry matched only once.
t2 = Table.new [["l", [0, 10]], ["u", [20 * n, 20 * n + 100]], ["A", ["a", "A"]], ["B", [44, 44]]]
# Here also, each range from `t3` will match 2 entries of `t1`, apart from the first one.
t3 = Table.new [["l", ls], ["u", us], ["A", Vector.fill n "A"], ["B", Vector.fill n 44]]
conditions = [Join_Condition.Equals_Ignore_Case "A", Join_Condition.Between "X" "l" "u", Join_Condition.Equals "B"]
r1 = Duration.time_execution <|
t1.join t2 on=conditions
r1.second.row_count . should_equal (2*n - 1)
r2 = Duration.time_execution <|
t1.join t3 on=conditions
t4 = r2.second . order_by ["X", "l"]
t4.row_count . should_equal (2*n - 1)
t4.at "X" . to_vector . should_equal ((xs.flat_map x-> [x, x]) . drop (Last 1))
t4.at "l" . to_vector . should_equal (ls.zip (ls.drop 1) . flatten)+[ls.last]
base_ms = r1.first.total_milliseconds
expected_max_time_ms = base_ms * 5 + 100
runtime_ms = r2.first.total_milliseconds
if runtime_ms > expected_max_time_ms then
Test.fail "Expected a join of "+n.to_text+"x"+n.to_text+" with linear result size to be efficient, but it took "+runtime_ms.to_text+"ms while a join of 2x"+n.to_text+" with the same result size took "+base_ms.to_text+"ms. The maximum time threshold for this operation to be deemed efficient has been estimated at "+expected_max_time_ms.to_text+"ms."
main = Test_Suite.run_main spec

View File

@ -8,7 +8,6 @@ import project.In_Memory.Column_Spec
import project.In_Memory.Column_Format_Spec
import project.In_Memory.Common_Spec
import project.In_Memory.Integer_Overflow_Spec
import project.In_Memory.Join_Performance_Spec
import project.In_Memory.Lossy_Conversions_Spec
import project.In_Memory.Parse_To_Table_Spec
import project.In_Memory.Split_Tokenize_Spec
@ -29,7 +28,6 @@ spec =
Table_Time_Of_Day_Spec.spec
Aggregate_Column_Spec.spec
Builders_Spec.spec
Join_Performance_Spec.spec
Split_Tokenize_Spec.spec
Parse_To_Table_Spec.spec

View File

@ -871,42 +871,6 @@ spec =
t2.filter "Y" (Filter_Condition.Is_In in_vector) . at "Y" . to_vector . should_equal expected_neg_vector
t2.filter "Y" (Filter_Condition.Is_In in_column) . at "Y" . to_vector . should_equal expected_neg_vector
Test.specify "should perform `Is_In` efficiently for builtin types" <|
first_day = Date_Time.new 2000 1 1
make_date x = first_day + (Duration.new seconds=x)
init = Duration.time_execution <|
t = Table.new [["X", (200.up_to 10000 . to_vector)]]
vec = 4000.up_to 13000 . to_vector
expected_vector = 4000.up_to 10000 . to_vector
expected_vector_2 = 200.up_to 10000 . with_step 2 . to_vector
dates_vec = vec.map make_date
bool_vec = Vector.fill 7000 True
date_col = t.at "X" . map make_date
[t, vec, expected_vector, expected_vector_2, dates_vec, bool_vec, date_col]
t = init.second . at 0
vec = init.second . at 1
expected_vector = init.second . at 2
expected_vector_2 = init.second . at 3
dates_vec = init.second . at 4
bool_vec = init.second . at 5
date_col = init.second . at 6
expected_max_time_ms = init.first.total_milliseconds * 2
check_timing name ~action =
res = Duration.time_execution action
runtime_ms = res.first.total_milliseconds
if runtime_ms > expected_max_time_ms then
Test.fail "Expected `Is_In` on "+name+" to be efficient, but it took "+runtime_ms.to_text+"ms while initialization itself took just "+expected_max_time_ms.to_text+"ms."
check_timing "integers" <|
t.filter "X" (Filter_Condition.Is_In vec) . at "X" . to_vector . should_equal expected_vector
check_timing "booleans" <|
t.filter (t.at "X" % 2 == 0) (Filter_Condition.Is_In bool_vec) . at "X" . to_vector . should_equal expected_vector_2
check_timing "dates" <|
t.filter date_col (Filter_Condition.Is_In dates_vec) . at "X" . to_vector . should_equal expected_vector
Test.group "[In-Memory-specific] Table.join" <|
Test.specify "should correctly report unsupported cross-backend joins" <|
t = Table.new [["X", [1, 2, 3]]]