mirror of
https://github.com/enso-org/enso.git
synced 2024-11-09 17:51:29 +03:00
Use the MultiValueIndex for the JoinStrategy. (#3959)
Use the MultiValueStrategy for pure equals Joins.
This commit is contained in:
parent
43167c1617
commit
11e07f8676
@ -1109,10 +1109,10 @@ type Table
|
||||
case join_resolution.conditions of
|
||||
# Nothing is returned if some conditions failed to resolve, we will return an empty result in such case.
|
||||
Nothing ->
|
||||
new_table = self.java_table.join right.java_table Nothing False False False (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix
|
||||
new_table = self.java_table.join right.java_table Nothing False False False (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix Comparator.new
|
||||
Table.Value new_table
|
||||
java_conditions ->
|
||||
new_table = self.java_table.join right.java_table java_conditions (rows_to_keep.at 0) (rows_to_keep.at 1) (rows_to_keep.at 2) (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix
|
||||
new_table = self.java_table.join right.java_table java_conditions (rows_to_keep.at 0) (rows_to_keep.at 1) (rows_to_keep.at 2) (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix Comparator.new
|
||||
Table.Value new_table
|
||||
|
||||
## ALIAS dropna
|
||||
|
@ -186,4 +186,16 @@ public class MultiValueIndex {
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
public Set<MultiValueKeyBase> keys() {
|
||||
return locs.keySet();
|
||||
}
|
||||
|
||||
public boolean contains(MultiValueKeyBase key) {
|
||||
return this.locs.containsKey(key);
|
||||
}
|
||||
|
||||
public List<Integer> get(MultiValueKeyBase key) {
|
||||
return this.locs.get(key);
|
||||
}
|
||||
}
|
||||
|
@ -13,10 +13,7 @@ import org.enso.table.data.index.Index;
|
||||
import org.enso.table.data.index.MultiValueIndex;
|
||||
import org.enso.table.data.mask.OrderMask;
|
||||
import org.enso.table.data.mask.SliceRange;
|
||||
import org.enso.table.data.table.join.JoinCondition;
|
||||
import org.enso.table.data.table.join.JoinResult;
|
||||
import org.enso.table.data.table.join.JoinStrategy;
|
||||
import org.enso.table.data.table.join.ScanJoin;
|
||||
import org.enso.table.data.table.join.*;
|
||||
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||
import org.enso.table.error.NoSuchColumnException;
|
||||
import org.enso.table.error.UnexpectedColumnTypeException;
|
||||
@ -288,16 +285,14 @@ public class Table {
|
||||
*
|
||||
* {@code rightColumnsToDrop} allows to drop columns from the right table that are redundant when joining on equality of equally named columns.
|
||||
*/
|
||||
public Table join(Table right, List<JoinCondition> conditions, boolean keepLeftUnmatched, boolean keepMatched, boolean keepRightUnmatched, boolean includeLeftColumns, boolean includeRightColumns, List<String> rightColumnsToDrop, String right_prefix) {
|
||||
public Table join(Table right, List<JoinCondition> conditions, boolean keepLeftUnmatched, boolean keepMatched, boolean keepRightUnmatched, boolean includeLeftColumns, boolean includeRightColumns, List<String> rightColumnsToDrop, String right_prefix, Comparator<Object> comparator) {
|
||||
// TODO adding prefix for right columns
|
||||
NameDeduplicator deduplicator = new NameDeduplicator();
|
||||
|
||||
JoinStrategy strategy = new ScanJoin();
|
||||
JoinResult joinResult = null;
|
||||
JoinStrategy strategy = new IndexJoin(comparator);
|
||||
|
||||
// Only compute the join if there are any results to be returned.
|
||||
if (keepLeftUnmatched || keepMatched || keepRightUnmatched) {
|
||||
joinResult = strategy.join(this, right, conditions);
|
||||
}
|
||||
JoinResult joinResult = (keepLeftUnmatched || keepMatched || keepRightUnmatched) ? strategy.join(this, right, conditions) : null;
|
||||
|
||||
List<Integer> leftRows = new ArrayList<>();
|
||||
List<Integer> rightRows = new ArrayList<>();
|
||||
|
@ -0,0 +1,53 @@
|
||||
package org.enso.table.data.table.join;
|
||||
|
||||
import org.enso.table.data.index.MultiValueIndex;
|
||||
import org.enso.table.data.table.Column;
|
||||
import org.enso.table.data.table.Table;
|
||||
import org.graalvm.collections.Pair;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class IndexJoin implements JoinStrategy {
|
||||
private final Comparator<Object> comparator;
|
||||
|
||||
public IndexJoin(Comparator<Object> comparator) {
|
||||
this.comparator = comparator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public JoinResult join(Table left, Table right, List<JoinCondition> conditions) {
|
||||
var equalConditions = conditions.stream()
|
||||
.map(c -> c instanceof Equals e ? e : null)
|
||||
.filter(c -> c != null)
|
||||
.collect(Collectors.toList());
|
||||
if (equalConditions.size() != conditions.size()) {
|
||||
return new ScanJoin().join(left, right, conditions);
|
||||
}
|
||||
|
||||
try {
|
||||
var leftEquals = equalConditions.stream().map(Equals::left).toArray(Column[]::new);
|
||||
var leftIndex = new MultiValueIndex(leftEquals, left.rowCount(), comparator);
|
||||
|
||||
var rightEquals = equalConditions.stream().map(Equals::right).toArray(Column[]::new);
|
||||
var rightIndex = new MultiValueIndex(rightEquals, right.rowCount(), comparator);
|
||||
|
||||
List<Pair<Integer, Integer>> matches = new ArrayList<>();
|
||||
for (var leftKey : leftIndex.keys()) {
|
||||
if (rightIndex.contains(leftKey)) {
|
||||
for (var leftRow : leftIndex.get(leftKey)) {
|
||||
for (var rightRow : rightIndex.get(leftKey)) {
|
||||
matches.add(Pair.create(leftRow, rightRow));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return new JoinResult(matches);
|
||||
} catch (IllegalStateException e) {
|
||||
// Fallback for custom objects
|
||||
return new ScanJoin().join(left, right, conditions);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user