mirror of
https://github.com/enso-org/enso.git
synced 2024-11-22 11:52:59 +03:00
Split HashJoin to SimpleHashJoin and CompoundHashJoin (#8850)
Completes #8342 . Creates a SimpleHashJoin and CompoundHashJoin. # Important Notes Creates SimpleHashJoin and CompoundHashJoin. CompoundHashJoin is what was HashJoin. SimpleHashJoin is a new implementation that only indexs the smaller of the 2 tables being joined together. The rest is refactor and clean-up of the shared join code.
This commit is contained in:
parent
ed65af7005
commit
340a3eec4e
@ -5,7 +5,8 @@ import org.enso.table.data.table.join.between.SortJoin;
|
||||
import org.enso.table.data.table.join.conditions.Between;
|
||||
import org.enso.table.data.table.join.conditions.HashableCondition;
|
||||
import org.enso.table.data.table.join.conditions.JoinCondition;
|
||||
import org.enso.table.data.table.join.hashing.HashJoin;
|
||||
import org.enso.table.data.table.join.hashing.CompoundHashJoin;
|
||||
import org.enso.table.data.table.join.hashing.SimpleHashJoin;
|
||||
import org.enso.table.problems.ProblemAggregator;
|
||||
|
||||
/** A strategy used for performing a join of two tables. */
|
||||
@ -31,12 +32,9 @@ public interface JoinStrategy {
|
||||
assert !betweenConditions.isEmpty();
|
||||
return new SortJoin(betweenConditions, joinKind);
|
||||
} else if (betweenConditions.isEmpty()) {
|
||||
return new HashJoin(
|
||||
hashableConditions,
|
||||
joinKind.wantsCommon ? new MatchAllStrategy() : new NoOpStrategy(),
|
||||
joinKind);
|
||||
return new SimpleHashJoin(hashableConditions, joinKind);
|
||||
} else {
|
||||
return new HashJoin(hashableConditions, new SortJoin(betweenConditions, joinKind), joinKind);
|
||||
return new CompoundHashJoin(hashableConditions, betweenConditions, joinKind);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,28 +0,0 @@
|
||||
package org.enso.table.data.table.join;
|
||||
|
||||
import java.util.List;
|
||||
import org.enso.table.problems.ProblemAggregator;
|
||||
import org.graalvm.polyglot.Context;
|
||||
|
||||
/**
|
||||
* A pluggable strategy that can be used as the inner strategy for a join if there are no more join
|
||||
* conditions to process - so all rows are matched with each other within a given group.
|
||||
*/
|
||||
public class MatchAllStrategy implements PluggableJoinStrategy {
|
||||
@Override
|
||||
public void joinSubsets(
|
||||
List<Integer> leftGroup,
|
||||
List<Integer> rightGroup,
|
||||
JoinResult.Builder resultBuilder,
|
||||
ProblemAggregator problemAggregator) {
|
||||
Context context = Context.getCurrent();
|
||||
for (var leftRow : leftGroup) {
|
||||
for (var rightRow : rightGroup) {
|
||||
resultBuilder.addMatchedRowsPair(leftRow, rightRow);
|
||||
context.safepoint();
|
||||
}
|
||||
|
||||
context.safepoint();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
package org.enso.table.data.table.join;
|
||||
|
||||
import java.util.List;
|
||||
import org.enso.table.problems.ProblemAggregator;
|
||||
|
||||
public class NoOpStrategy implements PluggableJoinStrategy {
|
||||
@Override
|
||||
public void joinSubsets(
|
||||
List<Integer> leftGroup,
|
||||
List<Integer> rightGroup,
|
||||
JoinResult.Builder resultBuilder,
|
||||
ProblemAggregator problemAggregator) {
|
||||
return;
|
||||
}
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
package org.enso.table.data.table.join;
|
||||
|
||||
import java.util.List;
|
||||
import org.enso.table.problems.ProblemAggregator;
|
||||
|
||||
/**
|
||||
* A helper join strategy that can be used within another join strategy to perform a join of
|
||||
* sub-sets of indices, stemming from already joining on other conditions.
|
||||
*/
|
||||
public interface PluggableJoinStrategy {
|
||||
|
||||
/** Performs a join of two sub-sets of indices. */
|
||||
void joinSubsets(
|
||||
List<Integer> leftGroup,
|
||||
List<Integer> rightGroup,
|
||||
JoinResult.Builder resultBuilder,
|
||||
ProblemAggregator problemAggregator);
|
||||
}
|
@ -10,12 +10,11 @@ import org.enso.table.data.index.OrderedMultiValueKey;
|
||||
import org.enso.table.data.table.join.JoinKind;
|
||||
import org.enso.table.data.table.join.JoinResult;
|
||||
import org.enso.table.data.table.join.JoinStrategy;
|
||||
import org.enso.table.data.table.join.PluggableJoinStrategy;
|
||||
import org.enso.table.data.table.join.conditions.Between;
|
||||
import org.enso.table.problems.ProblemAggregator;
|
||||
import org.graalvm.polyglot.Context;
|
||||
|
||||
public class SortJoin implements JoinStrategy, PluggableJoinStrategy {
|
||||
public class SortJoin implements JoinStrategy {
|
||||
|
||||
public SortJoin(List<Between> conditions, JoinKind joinKind) {
|
||||
JoinStrategy.ensureConditionsNotEmpty(conditions);
|
||||
@ -83,7 +82,6 @@ public class SortJoin implements JoinStrategy, PluggableJoinStrategy {
|
||||
return resultBuilder.buildAndInvalidate();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void joinSubsets(
|
||||
List<Integer> leftGroup,
|
||||
List<Integer> rightGroup,
|
||||
@ -184,7 +182,6 @@ public class SortJoin implements JoinStrategy, PluggableJoinStrategy {
|
||||
// Note: we cannot just use `compareTo`, because we are now not checking that the key is between
|
||||
// the bounds in lexicographic order.
|
||||
// Instead, we are checking if the key is between the bounds for all dimensions.
|
||||
|
||||
int n = key.getNumberOfColumns();
|
||||
for (int i = 0; i < n; i++) {
|
||||
var keyValue = key.get(i);
|
||||
|
@ -0,0 +1,90 @@
|
||||
package org.enso.table.data.table.join.hashing;
|
||||
|
||||
import java.util.List;
|
||||
import org.enso.table.data.index.MultiValueIndex;
|
||||
import org.enso.table.data.index.UnorderedMultiValueKey;
|
||||
import org.enso.table.data.table.join.JoinKind;
|
||||
import org.enso.table.data.table.join.JoinResult;
|
||||
import org.enso.table.data.table.join.JoinStrategy;
|
||||
import org.enso.table.data.table.join.between.SortJoin;
|
||||
import org.enso.table.data.table.join.conditions.Between;
|
||||
import org.enso.table.data.table.join.conditions.HashableCondition;
|
||||
import org.enso.table.problems.ProblemAggregator;
|
||||
import org.graalvm.polyglot.Context;
|
||||
|
||||
/**
|
||||
* A strategy that uses a hash-map to perform join on the equality conditions.
|
||||
*
|
||||
* <p>It then delegates to {@code SortJoin} to perform the remaining conditions on the matching
|
||||
* pairs of row subsets.
|
||||
*/
|
||||
public class CompoundHashJoin implements JoinStrategy {
|
||||
|
||||
public CompoundHashJoin(
|
||||
List<HashableCondition> hashableConditions,
|
||||
List<Between> betweenConditions,
|
||||
JoinKind joinKind) {
|
||||
this.hashJoinConfig = new HashJoinConfig(hashableConditions);
|
||||
this.sortJoin = new SortJoin(betweenConditions, joinKind);
|
||||
this.joinKind = joinKind;
|
||||
}
|
||||
|
||||
private final HashJoinConfig hashJoinConfig;
|
||||
private final SortJoin sortJoin;
|
||||
private final JoinKind joinKind;
|
||||
|
||||
@Override
|
||||
public JoinResult join(ProblemAggregator problemAggregator) {
|
||||
Context context = Context.getCurrent();
|
||||
|
||||
var leftIndex =
|
||||
MultiValueIndex.makeUnorderedIndex(
|
||||
hashJoinConfig.getLeftEquals(),
|
||||
hashJoinConfig.getLeftNumRows(),
|
||||
hashJoinConfig.getTextFoldingStrategies(),
|
||||
problemAggregator);
|
||||
var rightIndex =
|
||||
MultiValueIndex.makeUnorderedIndex(
|
||||
hashJoinConfig.getRightEquals(),
|
||||
hashJoinConfig.getRightNumRows(),
|
||||
hashJoinConfig.getTextFoldingStrategies(),
|
||||
problemAggregator);
|
||||
|
||||
JoinResult.Builder resultBuilder = new JoinResult.Builder();
|
||||
for (var leftEntry : leftIndex.mapping().entrySet()) {
|
||||
UnorderedMultiValueKey leftKey = leftEntry.getKey();
|
||||
List<Integer> leftRows = leftEntry.getValue();
|
||||
// If any field of the key is null, it cannot match anything.
|
||||
List<Integer> rightRows = leftKey.hasAnyNulls() ? null : rightIndex.get(leftKey);
|
||||
|
||||
if (rightRows != null) {
|
||||
sortJoin.joinSubsets(leftRows, rightRows, resultBuilder, problemAggregator);
|
||||
} else {
|
||||
if (joinKind.wantsLeftUnmatched) {
|
||||
for (int leftRow : leftRows) {
|
||||
resultBuilder.addUnmatchedLeftRow(leftRow);
|
||||
context.safepoint();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
context.safepoint();
|
||||
}
|
||||
|
||||
if (joinKind.wantsRightUnmatched) {
|
||||
for (var rightEntry : rightIndex.mapping().entrySet()) {
|
||||
UnorderedMultiValueKey rightKey = rightEntry.getKey();
|
||||
// If any field of the key is null, it cannot match anything.
|
||||
boolean wasCompletelyUnmatched =
|
||||
rightKey.hasAnyNulls() ? true : !leftIndex.contains(rightKey);
|
||||
if (wasCompletelyUnmatched) {
|
||||
for (int rightRow : rightEntry.getValue()) {
|
||||
resultBuilder.addUnmatchedRightRow(rightRow);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return resultBuilder.buildAndInvalidate();
|
||||
}
|
||||
}
|
@ -1,116 +0,0 @@
|
||||
package org.enso.table.data.table.join.hashing;
|
||||
|
||||
import java.util.List;
|
||||
import org.enso.base.text.TextFoldingStrategy;
|
||||
import org.enso.table.data.index.MultiValueIndex;
|
||||
import org.enso.table.data.index.UnorderedMultiValueKey;
|
||||
import org.enso.table.data.table.Column;
|
||||
import org.enso.table.data.table.join.JoinKind;
|
||||
import org.enso.table.data.table.join.JoinResult;
|
||||
import org.enso.table.data.table.join.JoinStrategy;
|
||||
import org.enso.table.data.table.join.PluggableJoinStrategy;
|
||||
import org.enso.table.data.table.join.conditions.Equals;
|
||||
import org.enso.table.data.table.join.conditions.EqualsIgnoreCase;
|
||||
import org.enso.table.data.table.join.conditions.HashableCondition;
|
||||
import org.enso.table.problems.ProblemAggregator;
|
||||
import org.graalvm.polyglot.Context;
|
||||
|
||||
/**
|
||||
* A strategy that uses a hash-map to perform join on the equality conditions.
|
||||
*
|
||||
* <p>It then delegates to {@code remainingMatcher} to perform the remaining conditions on the
|
||||
* matching pairs of row subsets.
|
||||
*/
|
||||
public class HashJoin implements JoinStrategy {
|
||||
public HashJoin(
|
||||
List<HashableCondition> conditions,
|
||||
PluggableJoinStrategy remainingMatcher,
|
||||
JoinKind joinKind) {
|
||||
JoinStrategy.ensureConditionsNotEmpty(conditions);
|
||||
this.remainingMatcher = remainingMatcher;
|
||||
this.joinKind = joinKind;
|
||||
|
||||
List<HashEqualityCondition> equalConditions =
|
||||
conditions.stream().map(HashJoin::makeHashEqualityCondition).toList();
|
||||
|
||||
if (equalConditions.isEmpty()) {
|
||||
throw new IllegalArgumentException(
|
||||
"EqualityHashJoin is applicable if there is at least one equality condition.");
|
||||
}
|
||||
|
||||
leftEquals = equalConditions.stream().map(HashEqualityCondition::left).toArray(Column[]::new);
|
||||
rightEquals = equalConditions.stream().map(HashEqualityCondition::right).toArray(Column[]::new);
|
||||
textFoldingStrategies =
|
||||
equalConditions.stream().map(HashEqualityCondition::textFoldingStrategy).toList();
|
||||
}
|
||||
|
||||
private final Column[] leftEquals, rightEquals;
|
||||
private final List<TextFoldingStrategy> textFoldingStrategies;
|
||||
private final PluggableJoinStrategy remainingMatcher;
|
||||
private final JoinKind joinKind;
|
||||
|
||||
@Override
|
||||
public JoinResult join(ProblemAggregator problemAggregator) {
|
||||
Context context = Context.getCurrent();
|
||||
|
||||
var leftIndex =
|
||||
MultiValueIndex.makeUnorderedIndex(
|
||||
leftEquals, leftEquals[0].getSize(), textFoldingStrategies, problemAggregator);
|
||||
var rightIndex =
|
||||
MultiValueIndex.makeUnorderedIndex(
|
||||
rightEquals, rightEquals[0].getSize(), textFoldingStrategies, problemAggregator);
|
||||
|
||||
JoinResult.Builder resultBuilder = new JoinResult.Builder();
|
||||
for (var leftEntry : leftIndex.mapping().entrySet()) {
|
||||
UnorderedMultiValueKey leftKey = leftEntry.getKey();
|
||||
List<Integer> leftRows = leftEntry.getValue();
|
||||
// If any field of the key is null, it cannot match anything.
|
||||
List<Integer> rightRows = leftKey.hasAnyNulls() ? null : rightIndex.get(leftKey);
|
||||
|
||||
if (rightRows != null) {
|
||||
remainingMatcher.joinSubsets(leftRows, rightRows, resultBuilder, problemAggregator);
|
||||
} else {
|
||||
if (joinKind.wantsLeftUnmatched) {
|
||||
for (int leftRow : leftRows) {
|
||||
resultBuilder.addUnmatchedLeftRow(leftRow);
|
||||
context.safepoint();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
context.safepoint();
|
||||
}
|
||||
|
||||
if (joinKind.wantsRightUnmatched) {
|
||||
for (var rightEntry : rightIndex.mapping().entrySet()) {
|
||||
UnorderedMultiValueKey rightKey = rightEntry.getKey();
|
||||
// If any field of the key is null, it cannot match anything.
|
||||
boolean wasCompletelyUnmatched =
|
||||
rightKey.hasAnyNulls() ? true : !leftIndex.contains(rightKey);
|
||||
if (wasCompletelyUnmatched) {
|
||||
for (int rightRow : rightEntry.getValue()) {
|
||||
resultBuilder.addUnmatchedRightRow(rightRow);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return resultBuilder.buildAndInvalidate();
|
||||
}
|
||||
|
||||
private static HashEqualityCondition makeHashEqualityCondition(HashableCondition eq) {
|
||||
switch (eq) {
|
||||
case Equals e -> {
|
||||
return new HashEqualityCondition(
|
||||
e.left(), e.right(), TextFoldingStrategy.unicodeNormalizedFold);
|
||||
}
|
||||
case EqualsIgnoreCase e -> {
|
||||
return new HashEqualityCondition(
|
||||
e.left(), e.right(), TextFoldingStrategy.caseInsensitiveFold(e.locale()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private record HashEqualityCondition(
|
||||
Column left, Column right, TextFoldingStrategy textFoldingStrategy) {}
|
||||
}
|
@ -0,0 +1,72 @@
|
||||
package org.enso.table.data.table.join.hashing;
|
||||
|
||||
import java.util.List;
|
||||
import org.enso.base.text.TextFoldingStrategy;
|
||||
import org.enso.table.data.table.Column;
|
||||
import org.enso.table.data.table.join.JoinStrategy;
|
||||
import org.enso.table.data.table.join.conditions.Equals;
|
||||
import org.enso.table.data.table.join.conditions.EqualsIgnoreCase;
|
||||
import org.enso.table.data.table.join.conditions.HashableCondition;
|
||||
|
||||
public class HashJoinConfig {
|
||||
|
||||
private final Column[] leftEquals;
|
||||
private final Column[] rightEquals;
|
||||
private final List<TextFoldingStrategy> textFoldingStrategies;
|
||||
|
||||
public HashJoinConfig(List<HashableCondition> conditions) {
|
||||
JoinStrategy.ensureConditionsNotEmpty(conditions);
|
||||
List<HashEqualityCondition> equalConditions =
|
||||
conditions.stream().map(HashJoinConfig::makeHashEqualityCondition).toList();
|
||||
|
||||
this.leftEquals =
|
||||
equalConditions.stream().map(HashEqualityCondition::left).toArray(Column[]::new);
|
||||
this.rightEquals =
|
||||
equalConditions.stream().map(HashEqualityCondition::right).toArray(Column[]::new);
|
||||
this.textFoldingStrategies =
|
||||
equalConditions.stream().map(HashEqualityCondition::textFoldingStrategy).toList();
|
||||
}
|
||||
|
||||
public HashJoinConfig(
|
||||
Column[] leftEquals, Column[] rightEquals, List<TextFoldingStrategy> textFoldingStrategies) {
|
||||
this.leftEquals = leftEquals;
|
||||
this.rightEquals = rightEquals;
|
||||
this.textFoldingStrategies = textFoldingStrategies;
|
||||
}
|
||||
|
||||
public Column[] getLeftEquals() {
|
||||
return leftEquals;
|
||||
}
|
||||
|
||||
public Column[] getRightEquals() {
|
||||
return rightEquals;
|
||||
}
|
||||
|
||||
public int getLeftNumRows() {
|
||||
return leftEquals[0].getSize();
|
||||
}
|
||||
|
||||
public int getRightNumRows() {
|
||||
return rightEquals[0].getSize();
|
||||
}
|
||||
|
||||
public List<TextFoldingStrategy> getTextFoldingStrategies() {
|
||||
return textFoldingStrategies;
|
||||
}
|
||||
|
||||
private static HashEqualityCondition makeHashEqualityCondition(HashableCondition eq) {
|
||||
switch (eq) {
|
||||
case Equals e -> {
|
||||
return new HashEqualityCondition(
|
||||
e.left(), e.right(), TextFoldingStrategy.unicodeNormalizedFold);
|
||||
}
|
||||
case EqualsIgnoreCase e -> {
|
||||
return new HashEqualityCondition(
|
||||
e.left(), e.right(), TextFoldingStrategy.caseInsensitiveFold(e.locale()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private record HashEqualityCondition(
|
||||
Column left, Column right, TextFoldingStrategy textFoldingStrategy) {}
|
||||
}
|
@ -0,0 +1,177 @@
|
||||
package org.enso.table.data.table.join.hashing;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.index.MultiValueIndex;
|
||||
import org.enso.table.data.index.UnorderedMultiValueKey;
|
||||
import org.enso.table.data.table.Column;
|
||||
import org.enso.table.data.table.join.JoinKind;
|
||||
import org.enso.table.data.table.join.JoinResult;
|
||||
import org.enso.table.data.table.join.JoinStrategy;
|
||||
import org.enso.table.data.table.join.conditions.HashableCondition;
|
||||
import org.enso.table.problems.ColumnAggregatedProblemAggregator;
|
||||
import org.enso.table.problems.ProblemAggregator;
|
||||
import org.graalvm.polyglot.Context;
|
||||
|
||||
/** A strategy that uses a hash-map to perform join on the equality conditions. */
|
||||
public class SimpleHashJoin implements JoinStrategy {
|
||||
|
||||
public SimpleHashJoin(List<HashableCondition> conditions, JoinKind joinKind) {
|
||||
var tempHashJoinConfig = new HashJoinConfig(conditions);
|
||||
|
||||
// algorithm assumes that left table is the big table.
|
||||
// If not we will flip the left and right tables over to do the join
|
||||
if (tempHashJoinConfig.getLeftNumRows() >= tempHashJoinConfig.getRightNumRows()) {
|
||||
this.hashJoinConfig = tempHashJoinConfig;
|
||||
this.joinKind = joinKind;
|
||||
this.resultBuilder = new SimpleHashJoinResultBuilder(false);
|
||||
} else {
|
||||
// flip left and right inside of HashJoinConfig
|
||||
this.hashJoinConfig =
|
||||
new HashJoinConfig(
|
||||
tempHashJoinConfig.getRightEquals(),
|
||||
tempHashJoinConfig.getLeftEquals(),
|
||||
tempHashJoinConfig.getTextFoldingStrategies());
|
||||
this.joinKind = flipJoinKind(joinKind);
|
||||
this.resultBuilder = new SimpleHashJoinResultBuilder(true);
|
||||
}
|
||||
}
|
||||
|
||||
private final HashJoinConfig hashJoinConfig;
|
||||
private final JoinKind joinKind;
|
||||
private final SimpleHashJoinResultBuilder resultBuilder;
|
||||
|
||||
@Override
|
||||
public JoinResult join(ProblemAggregator problemAggregator) {
|
||||
// algorithm assumes that left table is the big table.
|
||||
// If not we have flipped the tables round to do the join.
|
||||
// If you are debugging your left table might not be your left table here.
|
||||
// The result builder flips the indexes back as you add them
|
||||
assert (hashJoinConfig.getLeftNumRows() >= hashJoinConfig.getRightNumRows());
|
||||
|
||||
var groupingProblemAggregator = new ColumnAggregatedProblemAggregator(problemAggregator);
|
||||
var rightIndex =
|
||||
MultiValueIndex.makeUnorderedIndex(
|
||||
hashJoinConfig.getRightEquals(),
|
||||
hashJoinConfig.getRightNumRows(),
|
||||
hashJoinConfig.getTextFoldingStrategies(),
|
||||
problemAggregator);
|
||||
var storage =
|
||||
Arrays.stream(hashJoinConfig.getLeftEquals())
|
||||
.map(Column::getStorage)
|
||||
.toArray(Storage[]::new);
|
||||
Set<UnorderedMultiValueKey> matchedRightKeys = new HashSet<>();
|
||||
|
||||
Context context = Context.getCurrent();
|
||||
for (int leftRow = 0; leftRow < hashJoinConfig.getLeftNumRows(); leftRow++) {
|
||||
var leftKey = makeLeftKey(storage, leftRow, groupingProblemAggregator);
|
||||
// If any field of the key is null, it cannot match anything.
|
||||
List<Integer> rightRows = leftKey.hasAnyNulls() ? null : rightIndex.get(leftKey);
|
||||
if (rightRows != null) {
|
||||
if (joinKind.wantsCommon) {
|
||||
addAll(leftRow, rightRows, resultBuilder);
|
||||
}
|
||||
if (joinKind.wantsRightUnmatched) {
|
||||
matchedRightKeys.add(leftKey);
|
||||
}
|
||||
} else if (joinKind.wantsLeftUnmatched) {
|
||||
resultBuilder.addUnmatchedLeftRow(leftRow);
|
||||
context.safepoint();
|
||||
}
|
||||
context.safepoint();
|
||||
}
|
||||
|
||||
if (joinKind.wantsRightUnmatched) {
|
||||
addUnmatchedRightRows(rightIndex, matchedRightKeys);
|
||||
}
|
||||
|
||||
return resultBuilder.buildAndInvalidate();
|
||||
}
|
||||
|
||||
private void addUnmatchedRightRows(
|
||||
MultiValueIndex<UnorderedMultiValueKey> rightIndex,
|
||||
Set<UnorderedMultiValueKey> matchedRightKeys) {
|
||||
Context context = Context.getCurrent();
|
||||
for (var rightEntry : rightIndex.mapping().entrySet()) {
|
||||
UnorderedMultiValueKey rightKey = rightEntry.getKey();
|
||||
boolean wasCompletelyUnmatched = !matchedRightKeys.contains(rightKey);
|
||||
if (wasCompletelyUnmatched) {
|
||||
for (int rightRow : rightEntry.getValue()) {
|
||||
resultBuilder.addUnmatchedRightRow(rightRow);
|
||||
context.safepoint();
|
||||
}
|
||||
}
|
||||
context.safepoint();
|
||||
}
|
||||
}
|
||||
|
||||
public UnorderedMultiValueKey makeLeftKey(
|
||||
Storage[] storage,
|
||||
int rowNumber,
|
||||
ColumnAggregatedProblemAggregator groupingProblemAggregator) {
|
||||
var leftEquals = hashJoinConfig.getLeftEquals();
|
||||
var leftKey =
|
||||
new UnorderedMultiValueKey(storage, rowNumber, hashJoinConfig.getTextFoldingStrategies());
|
||||
leftKey.checkAndReportFloatingEquality(
|
||||
groupingProblemAggregator, columnIx -> leftEquals[columnIx].getName());
|
||||
return leftKey;
|
||||
}
|
||||
|
||||
private static void addAll(
|
||||
int leftRow, List<Integer> rightGroup, SimpleHashJoinResultBuilder resultBuilder) {
|
||||
Context context = Context.getCurrent();
|
||||
for (var rightRow : rightGroup) {
|
||||
resultBuilder.addMatchedRowsPair(leftRow, rightRow);
|
||||
context.safepoint();
|
||||
}
|
||||
context.safepoint();
|
||||
}
|
||||
|
||||
private static JoinKind flipJoinKind(JoinKind joinKind) {
|
||||
return switch (joinKind) {
|
||||
case LEFT_OUTER -> JoinKind.RIGHT_OUTER;
|
||||
case RIGHT_OUTER -> JoinKind.LEFT_OUTER;
|
||||
case LEFT_ANTI -> JoinKind.RIGHT_ANTI;
|
||||
case RIGHT_ANTI -> JoinKind.LEFT_ANTI;
|
||||
default -> joinKind;
|
||||
};
|
||||
}
|
||||
|
||||
private class SimpleHashJoinResultBuilder {
|
||||
|
||||
public SimpleHashJoinResultBuilder(boolean flipLeftAndRight) {
|
||||
this.flipLeftAndRight = flipLeftAndRight;
|
||||
this.resultBuilder = new JoinResult.Builder();
|
||||
}
|
||||
|
||||
JoinResult.Builder resultBuilder;
|
||||
private final boolean flipLeftAndRight;
|
||||
|
||||
public void addMatchedRowsPair(int leftIndex, int rightIndex) {
|
||||
addPair(leftIndex, rightIndex);
|
||||
}
|
||||
|
||||
public void addUnmatchedLeftRow(int leftIndex) {
|
||||
addPair(leftIndex, -1);
|
||||
}
|
||||
|
||||
public void addUnmatchedRightRow(int rightIndex) {
|
||||
addPair(-1, rightIndex);
|
||||
}
|
||||
|
||||
public JoinResult buildAndInvalidate() {
|
||||
return resultBuilder.buildAndInvalidate();
|
||||
}
|
||||
|
||||
private void addPair(int leftIndex, int rightIndex) {
|
||||
if (flipLeftAndRight) {
|
||||
resultBuilder.addMatchedRowsPair(rightIndex, leftIndex);
|
||||
} else {
|
||||
resultBuilder.addMatchedRowsPair(leftIndex, rightIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user