From c6b6384fe6197e9198005d45362d0693afe3bb8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 24 Nov 2023 03:44:57 +0100 Subject: [PATCH] Improve performance of anti-join (#8338) - Closes #8217 --- .../Table/0.0.0-dev/src/Data/Join_Kind.enso | 12 ++++ .../Table/0.0.0-dev/src/Data/Table.enso | 11 +--- .../java/org/enso/table/data/table/Table.java | 57 +++--------------- .../enso/table/data/table/join/CrossJoin.java | 6 +- .../enso/table/data/table/join/JoinKind.java | 21 +++++++ .../table/data/table/join/JoinResult.java | 34 ++++++----- .../table/data/table/join/JoinStrategy.java | 10 ++-- .../data/table/join/MatchAllStrategy.java | 6 +- .../data/table/join/between/SortJoin.java | 59 ++++++++++++++++--- .../data/table/join/hashing/HashJoin.java | 26 +++++++- test/Benchmarks/src/Table/Join.enso | 3 +- .../Join/Join_Spec.enso | 41 +++++++++---- 12 files changed, 182 insertions(+), 104 deletions(-) create mode 100644 std-bits/table/src/main/java/org/enso/table/data/table/join/JoinKind.java diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Join_Kind.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Join_Kind.enso index 0f16e765b5..f3235e6ada 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Join_Kind.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Join_Kind.enso @@ -1,3 +1,5 @@ +polyglot java import org.enso.table.data.table.join.JoinKind as Java_Join_Kind + type Join_Kind ## Returns only rows where a match between the left and right table is found. If one row from the left table matches multiple rows in the right @@ -36,3 +38,13 @@ type Join_Kind In this mode, unlike in others, only columns of the right table are returned, since all columns of the left table would be all null anyway. Right_Exclusive + + ## PRIVATE + to_java : Java_Join_Kind + to_java self = case self of + Join_Kind.Inner -> Java_Join_Kind.INNER + Join_Kind.Left_Outer -> Java_Join_Kind.LEFT_OUTER + Join_Kind.Right_Outer -> Java_Join_Kind.RIGHT_OUTER + Join_Kind.Full -> Java_Join_Kind.FULL + Join_Kind.Left_Exclusive -> Java_Join_Kind.LEFT_ANTI + Join_Kind.Right_Exclusive -> Java_Join_Kind.RIGHT_ANTI diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 3d49212147..5afaaa0588 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -1808,15 +1808,6 @@ type Table @on Widget_Helpers.make_join_condition_selector join : Table -> Join_Kind -> Vector (Join_Condition | Text) | Text -> Text -> Problem_Behavior -> Table join self right:Table (join_kind : Join_Kind = Join_Kind.Left_Outer) on=[Join_Condition.Equals self.column_names.first] right_prefix="Right " on_problems=Report_Warning = Out_Of_Memory.handle_java_exception "join" <| - # [left_unmatched, matched, right_unmatched] - rows_to_keep = case join_kind of - Join_Kind.Inner -> [False, True, False] - Join_Kind.Left_Outer -> [True, True, False] - Join_Kind.Right_Outer -> [False, True, True] - Join_Kind.Full -> [True, True, True] - Join_Kind.Left_Exclusive -> [True, False, False] - Join_Kind.Right_Exclusive -> [False, False, True] - columns_to_keep = case join_kind of Join_Kind.Left_Exclusive -> [True, False] Join_Kind.Right_Exclusive -> [False, True] @@ -1827,7 +1818,7 @@ type Table java_conditions = join_resolution.conditions new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator-> - self.java_table.join right.java_table java_conditions (rows_to_keep.at 0) (rows_to_keep.at 1) (rows_to_keep.at 2) (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix java_aggregator + self.java_table.join right.java_table java_conditions join_kind.to_java (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix java_aggregator Table.Value new_java_table ## ALIAS cartesian join diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java index 9ba670e52d..00ba62389d 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java @@ -18,6 +18,7 @@ import org.enso.table.data.index.OrderedMultiValueKey; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; import org.enso.table.data.table.join.CrossJoin; +import org.enso.table.data.table.join.JoinKind; import org.enso.table.data.table.join.conditions.JoinCondition; import org.enso.table.data.table.join.JoinResult; import org.enso.table.data.table.join.JoinStrategy; @@ -269,58 +270,17 @@ public class Table { * form one table. {@code rightColumnsToDrop} allows to drop columns from the right table that are redundant when * joining on equality of equally named columns. */ - public Table join(Table right, List conditions, boolean keepLeftUnmatched, boolean keepMatched, - boolean keepRightUnmatched, boolean includeLeftColumns, boolean includeRightColumns, + public Table join(Table right, List conditions, JoinKind joinKind, boolean includeLeftColumns, boolean includeRightColumns, List rightColumnsToDrop, String right_prefix, ProblemAggregator problemAggregator) { - Context context = Context.getCurrent(); NameDeduplicator nameDeduplicator = NameDeduplicator.createDefault(problemAggregator); - if (!keepLeftUnmatched && !keepMatched && !keepRightUnmatched) { - throw new IllegalArgumentException("At least one of keepLeftUnmatched, keepMatched or keepRightUnmatched must " + - "be true."); - } - JoinStrategy strategy = JoinStrategy.createStrategy(conditions); + JoinStrategy strategy = JoinStrategy.createStrategy(conditions, joinKind); JoinResult joinResult = strategy.join(problemAggregator); - List resultsToKeep = new ArrayList<>(); - - if (keepMatched) { - resultsToKeep.add(joinResult); - } - - if (keepLeftUnmatched) { - Set matchedLeftRows = joinResult.leftMatchedRows(); - JoinResult.Builder leftUnmatchedBuilder = new JoinResult.Builder(); - for (int i = 0; i < this.rowCount(); i++) { - if (!matchedLeftRows.contains(i)) { - leftUnmatchedBuilder.addRow(i, Index.NOT_FOUND); - } - - context.safepoint(); - } - - resultsToKeep.add(leftUnmatchedBuilder.build()); - } - - if (keepRightUnmatched) { - Set matchedRightRows = joinResult.rightMatchedRows(); - JoinResult.Builder rightUnmatchedBuilder = new JoinResult.Builder(); - for (int i = 0; i < right.rowCount(); i++) { - if (!matchedRightRows.contains(i)) { - rightUnmatchedBuilder.addRow(Index.NOT_FOUND, i); - } - - context.safepoint(); - } - - resultsToKeep.add(rightUnmatchedBuilder.build()); - } - List newColumns = new ArrayList<>(); if (includeLeftColumns) { - OrderMask leftMask = - OrderMask.concat(resultsToKeep.stream().map(JoinResult::getLeftOrderMask).collect(Collectors.toList())); + OrderMask leftMask = joinResult.getLeftOrderMask(); for (Column column : this.columns) { Column newColumn = column.applyMask(leftMask); newColumns.add(newColumn); @@ -328,14 +288,13 @@ public class Table { } if (includeRightColumns) { - OrderMask rightMask = - OrderMask.concat(resultsToKeep.stream().map(JoinResult::getRightOrderMask).collect(Collectors.toList())); - List leftColumnNames = newColumns.stream().map(Column::getName).collect(Collectors.toList()); + OrderMask rightMask = joinResult.getRightOrderMask(); + List leftColumnNames = newColumns.stream().map(Column::getName).toList(); HashSet toDrop = new HashSet<>(rightColumnsToDrop); List rightColumnsToKeep = - Arrays.stream(right.getColumns()).filter(col -> !toDrop.contains(col.getName())).collect(Collectors.toList()); - List rightColumNames = rightColumnsToKeep.stream().map(Column::getName).collect(Collectors.toList()); + Arrays.stream(right.getColumns()).filter(col -> !toDrop.contains(col.getName())).toList(); + List rightColumNames = rightColumnsToKeep.stream().map(Column::getName).toList(); List newRightColumnNames = nameDeduplicator.combineWithPrefix(leftColumnNames, rightColumNames, right_prefix); diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/join/CrossJoin.java b/std-bits/table/src/main/java/org/enso/table/data/table/join/CrossJoin.java index 94fb6df841..dc33aa9d4b 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/join/CrossJoin.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/CrossJoin.java @@ -5,10 +5,12 @@ import org.graalvm.polyglot.Context; public class CrossJoin { public static JoinResult perform(int leftRowCount, int rightRowCount) { Context context = Context.getCurrent(); - JoinResult.Builder resultBuilder = new JoinResult.Builder(leftRowCount * rightRowCount); + JoinResult.BuilderSettings settings = new JoinResult.BuilderSettings(true, true, true); + JoinResult.Builder resultBuilder = + new JoinResult.Builder(leftRowCount * rightRowCount, settings); for (int l = 0; l < leftRowCount; ++l) { for (int r = 0; r < rightRowCount; ++r) { - resultBuilder.addRow(l, r); + resultBuilder.addMatchedRowsPair(l, r); context.safepoint(); } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinKind.java b/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinKind.java new file mode 100644 index 0000000000..9750cad938 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinKind.java @@ -0,0 +1,21 @@ +package org.enso.table.data.table.join; + +public enum JoinKind { + INNER, + FULL, + LEFT_OUTER, + RIGHT_OUTER, + LEFT_ANTI, + RIGHT_ANTI; + + public static JoinResult.BuilderSettings makeSettings(JoinKind joinKind) { + return switch (joinKind) { + case INNER -> new JoinResult.BuilderSettings(true, false, false); + case FULL -> new JoinResult.BuilderSettings(true, true, true); + case LEFT_OUTER -> new JoinResult.BuilderSettings(true, true, false); + case RIGHT_OUTER -> new JoinResult.BuilderSettings(true, false, true); + case LEFT_ANTI -> new JoinResult.BuilderSettings(false, true, false); + case RIGHT_ANTI -> new JoinResult.BuilderSettings(false, false, true); + }; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinResult.java b/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinResult.java index 9e67307a73..e108a29cbf 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinResult.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinResult.java @@ -3,11 +3,6 @@ package org.enso.table.data.table.join; import org.enso.base.arrays.IntArrayBuilder; import org.enso.table.data.mask.OrderMask; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; -import java.util.stream.Collectors; - public record JoinResult(int[] matchedRowsLeftIndices, int[] matchedRowsRightIndices) { public OrderMask getLeftOrderMask() { @@ -18,32 +13,39 @@ public record JoinResult(int[] matchedRowsLeftIndices, int[] matchedRowsRightInd return new OrderMask(matchedRowsRightIndices); } - public Set leftMatchedRows() { - return new HashSet<>(Arrays.stream(matchedRowsLeftIndices).boxed().collect(Collectors.toList())); - } - - public Set rightMatchedRows() { - return new HashSet<>(Arrays.stream(matchedRowsRightIndices).boxed().collect(Collectors.toList())); - } + public record BuilderSettings(boolean wantsCommon, boolean wantsLeftUnmatched, boolean wantsRightUnmatched) {} public static class Builder { IntArrayBuilder leftIndices; IntArrayBuilder rightIndices; - public Builder(int initialCapacity) { + final BuilderSettings settings; + + public Builder(int initialCapacity, BuilderSettings settings) { leftIndices = new IntArrayBuilder(initialCapacity); rightIndices = new IntArrayBuilder(initialCapacity); + this.settings = settings; } - public Builder() { - this(128); + public Builder(BuilderSettings settings) { + this(128, settings); } - public void addRow(int leftIndex, int rightIndex) { + public void addMatchedRowsPair(int leftIndex, int rightIndex) { leftIndices.add(leftIndex); rightIndices.add(rightIndex); } + public void addUnmatchedLeftRow(int leftIndex) { + leftIndices.add(leftIndex); + rightIndices.add(-1); + } + + public void addUnmatchedRightRow(int rightIndex) { + leftIndices.add(-1); + rightIndices.add(rightIndex); + } + public JoinResult build() { return new JoinResult(leftIndices.build(), rightIndices.build()); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinStrategy.java b/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinStrategy.java index 4371c238e3..a02e2c7283 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinStrategy.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinStrategy.java @@ -17,11 +17,13 @@ import java.util.List; public interface JoinStrategy { JoinResult join(ProblemAggregator problemAggregator); - static JoinStrategy createStrategy(List conditions) { + static JoinStrategy createStrategy(List conditions, JoinKind joinKind) { if (conditions.isEmpty()) { throw new IllegalArgumentException("At least one join condition must be provided."); } + JoinResult.BuilderSettings builderSettings = JoinKind.makeSettings(joinKind); + List hashableConditions = conditions.stream() .filter(c -> c instanceof HashableCondition) .map(c -> (HashableCondition) c) @@ -37,11 +39,11 @@ public interface JoinStrategy { if (hashableConditions.isEmpty()) { assert !betweenConditions.isEmpty(); - return new SortJoin(betweenConditions); + return new SortJoin(betweenConditions, builderSettings); } else if (betweenConditions.isEmpty()) { - return new HashJoin(hashableConditions, new MatchAllStrategy()); + return new HashJoin(hashableConditions, new MatchAllStrategy(), builderSettings); } else { - return new HashJoin(hashableConditions, new SortJoin(betweenConditions)); + return new HashJoin(hashableConditions, new SortJoin(betweenConditions, builderSettings), builderSettings); } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/join/MatchAllStrategy.java b/std-bits/table/src/main/java/org/enso/table/data/table/join/MatchAllStrategy.java index a25882e65d..f06a813891 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/join/MatchAllStrategy.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/MatchAllStrategy.java @@ -15,10 +15,14 @@ public class MatchAllStrategy implements PluggableJoinStrategy { List rightGroup, JoinResult.Builder resultBuilder, ProblemAggregator problemAggregator) { + if (!resultBuilder.settings.wantsCommon()) { + return; + } + Context context = Context.getCurrent(); for (var leftRow : leftGroup) { for (var rightRow : rightGroup) { - resultBuilder.addRow(leftRow, rightRow); + resultBuilder.addMatchedRowsPair(leftRow, rightRow); context.safepoint(); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/join/between/SortJoin.java b/std-bits/table/src/main/java/org/enso/table/data/table/join/between/SortJoin.java index 50dedbbbac..548060fe35 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/join/between/SortJoin.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/between/SortJoin.java @@ -1,6 +1,7 @@ package org.enso.table.data.table.join.between; import java.util.ArrayList; +import java.util.BitSet; import java.util.Comparator; import java.util.List; import org.enso.base.ObjectComparator; @@ -15,8 +16,9 @@ import org.graalvm.polyglot.Context; public class SortJoin implements JoinStrategy, PluggableJoinStrategy { - public SortJoin(List conditions) { + public SortJoin(List conditions, JoinResult.BuilderSettings resultBuilderSettings) { conditionsHelper = new JoinStrategy.ConditionsHelper(conditions); + this.resultBuilderSettings = resultBuilderSettings; Context context = Context.getCurrent(); int nConditions = conditions.size(); @@ -34,16 +36,18 @@ public class SortJoin implements JoinStrategy, PluggableJoinStrategy { } private final JoinStrategy.ConditionsHelper conditionsHelper; + private final JoinResult.BuilderSettings resultBuilderSettings; private final int[] directions; private final Storage[] leftStorages; private final Storage[] lowerStorages; private final Storage[] upperStorages; + private final BitSet matchedLeftRows = new BitSet(); @Override public JoinResult join(ProblemAggregator problemAggregator) { Context context = Context.getCurrent(); - JoinResult.Builder resultBuilder = new JoinResult.Builder(); + JoinResult.Builder resultBuilder = new JoinResult.Builder(resultBuilderSettings); int leftRowCount = conditionsHelper.getLeftTableRowCount(); int rightRowCount = conditionsHelper.getRightTableRowCount(); @@ -60,10 +64,22 @@ public class SortJoin implements JoinStrategy, PluggableJoinStrategy { SortedListIndex leftIndex = buildSortedLeftIndex(leftKeys); for (int rightRowIx = 0; rightRowIx < rightRowCount; rightRowIx++) { - addMatchingLeftRows(leftIndex, rightRowIx, resultBuilder); + int matches = addMatchingLeftRows(leftIndex, rightRowIx, resultBuilder); + if (resultBuilderSettings.wantsRightUnmatched() && matches == 0) { + resultBuilder.addUnmatchedRightRow(rightRowIx); + } context.safepoint(); } + if (resultBuilderSettings.wantsLeftUnmatched()) { + for (int leftRowIx = 0; leftRowIx < leftRowCount; leftRowIx++) { + if (!matchedLeftRows.get(leftRowIx)) { + resultBuilder.addUnmatchedLeftRow(leftRowIx); + } + context.safepoint(); + } + } + return resultBuilder.build(); } @@ -87,9 +103,21 @@ public class SortJoin implements JoinStrategy, PluggableJoinStrategy { SortedListIndex leftIndex = buildSortedLeftIndex(leftKeys); for (int rightRowIx : rightGroup) { - addMatchingLeftRows(leftIndex, rightRowIx, resultBuilder); + int matches = addMatchingLeftRows(leftIndex, rightRowIx, resultBuilder); + if (resultBuilderSettings.wantsRightUnmatched() && matches == 0) { + resultBuilder.addUnmatchedRightRow(rightRowIx); + } context.safepoint(); } + + if (resultBuilderSettings.wantsLeftUnmatched()) { + for (int leftRowIx : leftGroup) { + if (!matchedLeftRows.get(leftRowIx)) { + resultBuilder.addUnmatchedLeftRow(leftRowIx); + } + context.safepoint(); + } + } } private SortedListIndex buildSortedLeftIndex( @@ -105,7 +133,13 @@ public class SortJoin implements JoinStrategy, PluggableJoinStrategy { return new OrderedMultiValueKey(upperStorages, rightRowIx, directions, objectComparator); } - private void addMatchingLeftRows( + /** + * Adds all pairs of rows from the left index matching the right index to the builder, and reports + * the match count. + * + *

It also marks any of the left rows that were matched, in the {@code matchedLeftRows}. + */ + private int addMatchingLeftRows( SortedListIndex sortedLeftIndex, int rightRowIx, JoinResult.Builder resultBuilder) { @@ -116,19 +150,30 @@ public class SortJoin implements JoinStrategy, PluggableJoinStrategy { if (lowerBound.hasAnyNulls() || upperBound.hasAnyNulls() || lowerBound.compareTo(upperBound) > 0) { - return; + return 0; } + int matchCount = 0; + List firstCoordinateMatches = sortedLeftIndex.findSubRange(lowerBound, upperBound); Context context = Context.getCurrent(); for (OrderedMultiValueKey key : firstCoordinateMatches) { if (isInRange(key, lowerBound, upperBound)) { - resultBuilder.addRow(key.getRowIndex(), rightRowIx); + int leftRowIx = key.getRowIndex(); + matchCount++; + if (resultBuilderSettings.wantsCommon()) { + resultBuilder.addMatchedRowsPair(leftRowIx, rightRowIx); + } + if (resultBuilderSettings.wantsLeftUnmatched()) { + matchedLeftRows.set(leftRowIx); + } } context.safepoint(); } + + return matchCount; } private boolean isInRange( diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/join/hashing/HashJoin.java b/std-bits/table/src/main/java/org/enso/table/data/table/join/hashing/HashJoin.java index 66b4f0c87b..77abe3a6f1 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/join/hashing/HashJoin.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/hashing/HashJoin.java @@ -13,6 +13,7 @@ import org.enso.table.data.table.join.conditions.HashableCondition; import org.enso.table.problems.ProblemAggregator; import org.graalvm.polyglot.Context; +import java.util.HashMap; import java.util.List; /** @@ -22,9 +23,10 @@ import java.util.List; * subsets. */ public class HashJoin implements JoinStrategy { - public HashJoin(List conditions, PluggableJoinStrategy remainingMatcher) { + public HashJoin(List conditions, PluggableJoinStrategy remainingMatcher, JoinResult.BuilderSettings resultBuilderSettings) { conditionsHelper = new JoinStrategy.ConditionsHelper(conditions); this.remainingMatcher = remainingMatcher; + this.resultBuilderSettings = resultBuilderSettings; List equalConditions = conditions.stream().map(HashJoin::makeHashEqualityCondition).toList(); @@ -42,6 +44,7 @@ public class HashJoin implements JoinStrategy { private final Column[] leftEquals, rightEquals; private final List textFoldingStrategies; private final PluggableJoinStrategy remainingMatcher; + private final JoinResult.BuilderSettings resultBuilderSettings; @Override public JoinResult join(ProblemAggregator problemAggregator) { @@ -52,7 +55,7 @@ public class HashJoin implements JoinStrategy { var rightIndex = MultiValueIndex.makeUnorderedIndex(rightEquals, conditionsHelper.getRightTableRowCount(), textFoldingStrategies, problemAggregator); - JoinResult.Builder resultBuilder = new JoinResult.Builder(); + JoinResult.Builder resultBuilder = new JoinResult.Builder(resultBuilderSettings); for (var leftEntry : leftIndex.mapping().entrySet()) { UnorderedMultiValueKey leftKey = leftEntry.getKey(); List leftRows = leftEntry.getValue(); @@ -60,11 +63,30 @@ public class HashJoin implements JoinStrategy { if (rightRows != null) { remainingMatcher.joinSubsets(leftRows, rightRows, resultBuilder, problemAggregator); + } else { + if (resultBuilderSettings.wantsLeftUnmatched()) { + for (int leftRow : leftRows) { + resultBuilder.addUnmatchedLeftRow(leftRow); + context.safepoint(); + } + } } context.safepoint(); } + if (resultBuilderSettings.wantsRightUnmatched()) { + for (var rightEntry : rightIndex.mapping().entrySet()) { + UnorderedMultiValueKey rightKey = rightEntry.getKey(); + boolean wasCompletelyUnmatched = !leftIndex.contains(rightKey); + if (wasCompletelyUnmatched) { + for (int rightRow : rightEntry.getValue()) { + resultBuilder.addUnmatchedRightRow(rightRow); + } + } + } + } + return resultBuilder.build(); } diff --git a/test/Benchmarks/src/Table/Join.enso b/test/Benchmarks/src/Table/Join.enso index b1c2e1eced..062dfc5cbf 100644 --- a/test/Benchmarks/src/Table/Join.enso +++ b/test/Benchmarks/src/Table/Join.enso @@ -213,8 +213,7 @@ collect_benches = Bench.build builder-> r = scenario.table1.join t2 on=[Join_Condition.Between "x" "x_lows" "x_highs", Join_Condition.Between "y" "y_lows" "y_highs"] assert (r.row_count == scenario.table1.row_count) - # TODO this should be part of the main tests, but it was causing issues on CI; re-enable this with #8217 - if extended_tests then group_builder.specify "AntiJoin" <| + group_builder.specify "AntiJoin" <| scenario = data.antijoin r = scenario.table2.join scenario.table1 on="key" join_kind=Join_Kind.Left_Exclusive assert (r.row_count == 1000) diff --git a/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso index d24981ef2d..006ec3d395 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso @@ -284,18 +284,37 @@ spec setup = Test.specify "should allow to mix join conditions of various kinds" <| t1 = table_builder [["X", [1, 12, 12, 0]], ["Y", [1, 2, 3, 4]], ["Z", ["a", "A", "a", "ą"]], ["W", [1, 2, 3, 4]]] - t2 = table_builder [["X", [12, 12, 1]], ["l", [0, 100, 100]], ["u", [10, 100, 100]], ["Z", ["A", "A", "A"]], ["W'", [10, 20, 30]]] + t2 = table_builder [["X", [12, 12, 1]], ["l", [0, 100, 100]], ["u", [10, 100, 200]], ["Z", ["A", "A", "A"]], ["W'", [10, 20, 30]]] - r1 = t1.join t2 join_kind=Join_Kind.Inner on=[Join_Condition.Between "Y" "l" "u", Join_Condition.Equals_Ignore_Case "Z" "Z", Join_Condition.Equals "X" "X"] |> materialize |> _.order_by ["Y"] - expect_column_names ["X", "Y", "Z", "W", "l", "u", "Right Z", "W'"] r1 - r1.at "X" . to_vector . should_equal [12, 12] - r1.at "Y" . to_vector . should_equal [2, 3] - r1.at "Z" . to_vector . should_equal ["A", "a"] - r1.at "W" . to_vector . should_equal [2, 3] - r1.at "l" . to_vector . should_equal [0, 0] - r1.at "u" . to_vector . should_equal [10, 10] - r1.at "Right Z" . to_vector . should_equal ["A", "A"] - r1.at "W'" . to_vector . should_equal [10, 10] + conditions = [Join_Condition.Between "Y" "l" "u", Join_Condition.Equals_Ignore_Case "Z" "Z", Join_Condition.Equals "X" "X"] + r1 = t1.join t2 join_kind=Join_Kind.Inner on=conditions |> materialize |> _.order_by ["Y"] + within_table r1 <| + r1.column_names.should_equal ["X", "Y", "Z", "W", "l", "u", "Right Z", "W'"] + r1.at "X" . to_vector . should_equal [12, 12] + r1.at "Y" . to_vector . should_equal [2, 3] + r1.at "Z" . to_vector . should_equal ["A", "a"] + r1.at "W" . to_vector . should_equal [2, 3] + r1.at "l" . to_vector . should_equal [0, 0] + r1.at "u" . to_vector . should_equal [10, 10] + r1.at "Right Z" . to_vector . should_equal ["A", "A"] + r1.at "W'" . to_vector . should_equal [10, 10] + + r2 = t1.join t2 join_kind=Join_Kind.Left_Exclusive on=conditions |> materialize |> _.order_by ["Y"] + within_table r2 <| + r2.column_names.should_equal ["X", "Y", "Z", "W"] + r2.at "X" . to_vector . should_equal [1, 0] + r2.at "Y" . to_vector . should_equal [1, 4] + r2.at "Z" . to_vector . should_equal ["a", "ą"] + r2.at "W" . to_vector . should_equal [1, 4] + + r3 = t1.join t2 join_kind=Join_Kind.Right_Exclusive on=conditions |> materialize |> _.order_by ["W'"] + within_table r3 <| + r3.column_names.should_equal ["X", "l", "u", "Z", "W'"] + r3.at "X" . to_vector . should_equal [12, 1] + r3.at "l" . to_vector . should_equal [100, 100] + r3.at "u" . to_vector . should_equal [100, 200] + r3.at "Z" . to_vector . should_equal ["A", "A"] + r3.at "W'" . to_vector . should_equal [20, 30] Test.specify "should work fine if the same condition is specified multiple times" <| r = t3.join t4 join_kind=Join_Kind.Inner on=["X", "X", "Y", "X", "Y"] |> materialize |> _.order_by ["X", "Y", "Z", "Right Z"]