Use the MultiValueIndex for the JoinStrategy. (#3959)

Use the MultiValueStrategy for pure equals Joins.
This commit is contained in:
James Dunkerley 2022-12-08 12:24:53 +00:00 committed by GitHub
parent 43167c1617
commit 11e07f8676
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 72 additions and 12 deletions

View File

@ -1109,10 +1109,10 @@ type Table
case join_resolution.conditions of
# Nothing is returned if some conditions failed to resolve, we will return an empty result in such case.
Nothing ->
new_table = self.java_table.join right.java_table Nothing False False False (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix
new_table = self.java_table.join right.java_table Nothing False False False (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix Comparator.new
Table.Value new_table
java_conditions ->
new_table = self.java_table.join right.java_table java_conditions (rows_to_keep.at 0) (rows_to_keep.at 1) (rows_to_keep.at 2) (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix
new_table = self.java_table.join right.java_table java_conditions (rows_to_keep.at 0) (rows_to_keep.at 1) (rows_to_keep.at 2) (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix Comparator.new
Table.Value new_table
## ALIAS dropna

View File

@ -186,4 +186,16 @@ public class MultiValueIndex {
return output;
}
public Set<MultiValueKeyBase> keys() {
return locs.keySet();
}
public boolean contains(MultiValueKeyBase key) {
return this.locs.containsKey(key);
}
public List<Integer> get(MultiValueKeyBase key) {
return this.locs.get(key);
}
}

View File

@ -13,10 +13,7 @@ import org.enso.table.data.index.Index;
import org.enso.table.data.index.MultiValueIndex;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.mask.SliceRange;
import org.enso.table.data.table.join.JoinCondition;
import org.enso.table.data.table.join.JoinResult;
import org.enso.table.data.table.join.JoinStrategy;
import org.enso.table.data.table.join.ScanJoin;
import org.enso.table.data.table.join.*;
import org.enso.table.data.table.problems.AggregatedProblems;
import org.enso.table.error.NoSuchColumnException;
import org.enso.table.error.UnexpectedColumnTypeException;
@ -288,16 +285,14 @@ public class Table {
*
* {@code rightColumnsToDrop} allows to drop columns from the right table that are redundant when joining on equality of equally named columns.
*/
public Table join(Table right, List<JoinCondition> conditions, boolean keepLeftUnmatched, boolean keepMatched, boolean keepRightUnmatched, boolean includeLeftColumns, boolean includeRightColumns, List<String> rightColumnsToDrop, String right_prefix) {
public Table join(Table right, List<JoinCondition> conditions, boolean keepLeftUnmatched, boolean keepMatched, boolean keepRightUnmatched, boolean includeLeftColumns, boolean includeRightColumns, List<String> rightColumnsToDrop, String right_prefix, Comparator<Object> comparator) {
// TODO adding prefix for right columns
NameDeduplicator deduplicator = new NameDeduplicator();
JoinStrategy strategy = new ScanJoin();
JoinResult joinResult = null;
JoinStrategy strategy = new IndexJoin(comparator);
// Only compute the join if there are any results to be returned.
if (keepLeftUnmatched || keepMatched || keepRightUnmatched) {
joinResult = strategy.join(this, right, conditions);
}
JoinResult joinResult = (keepLeftUnmatched || keepMatched || keepRightUnmatched) ? strategy.join(this, right, conditions) : null;
List<Integer> leftRows = new ArrayList<>();
List<Integer> rightRows = new ArrayList<>();

View File

@ -0,0 +1,53 @@
package org.enso.table.data.table.join;
import org.enso.table.data.index.MultiValueIndex;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
import org.graalvm.collections.Pair;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
public class IndexJoin implements JoinStrategy {
private final Comparator<Object> comparator;
public IndexJoin(Comparator<Object> comparator) {
this.comparator = comparator;
}
@Override
public JoinResult join(Table left, Table right, List<JoinCondition> conditions) {
var equalConditions = conditions.stream()
.map(c -> c instanceof Equals e ? e : null)
.filter(c -> c != null)
.collect(Collectors.toList());
if (equalConditions.size() != conditions.size()) {
return new ScanJoin().join(left, right, conditions);
}
try {
var leftEquals = equalConditions.stream().map(Equals::left).toArray(Column[]::new);
var leftIndex = new MultiValueIndex(leftEquals, left.rowCount(), comparator);
var rightEquals = equalConditions.stream().map(Equals::right).toArray(Column[]::new);
var rightIndex = new MultiValueIndex(rightEquals, right.rowCount(), comparator);
List<Pair<Integer, Integer>> matches = new ArrayList<>();
for (var leftKey : leftIndex.keys()) {
if (rightIndex.contains(leftKey)) {
for (var leftRow : leftIndex.get(leftKey)) {
for (var rightRow : rightIndex.get(leftKey)) {
matches.add(Pair.create(leftRow, rightRow));
}
}
}
}
return new JoinResult(matches);
} catch (IllegalStateException e) {
// Fallback for custom objects
return new ScanJoin().join(left, right, conditions);
}
}
}