Improve performance of Join_Condition.Between by sorting on one dimension (#8212)

- Closes #5303 - Refactors `JoinStrategy` allowing us to 'stack' join strategies on top of each other (to some extent) - currently a `HashJoin` can be followed by another join strategy (currently `SortJoin`) - Adds benchmarks for join - Due to limitations of the sorting approach this will still not be as fast as possible for cases where there is more than 1 `Between` condition in a single query - trying to demonstrate that in benchmarks. - We can replace sorting by d-dimensional [RangeTrees](https://en.wikipedia.org/wiki/Range_tree) to get `O((n + m) log^d n + k)` performance (where `n` and `m` are sizes of joined tables, `d` is the amount of `Between` conditions used in the query and `k` is the result set size). - Follow up ticket for consideration later: #8216 - Closes #8215 - After all, it turned out that `TreeSet` was problematic (because of not enough flexibility with duplicate key handling), so the simplest solution was to immediately implement this sub-task. - Closes #8204 - Unrelated, but I ran into this here: adds type checks to other arguments of `set`. - Before, putting in a Column as `new_name` (i.e. mistakenly messing up the order of arguments), lead to a hard to understand `Method `if_then_else` of type Column could not be found.`, instead now it would file with type error 'expected Text got Column`.
2024-12-23 07:12:20 +03:00 · 2023-11-08 13:59:55 +01:00 · 2023-11-08 13:59:55 +01:00 · 1b8b30a68d
commit 1b8b30a68d
parent 1388fe1cf9
34 changed files with 979 additions and 501 deletions
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
@ -837,7 +837,7 @@ type Table
                 table.set "2 * [total_stock]" new_name="total_stock_expr"
    @new_name Widget_Helpers.make_column_name_selector
    set : Column | Text | Array | Vector | Range | Date_Range | Constant_Column | Column_Operation -> Text -> Set_Mode -> Problem_Behavior -> Table ! Existing_Column | Missing_Column | No_Such_Column | Expression_Error
-    set self column new_name="" set_mode=Set_Mode.Add_Or_Update on_problems=Report_Warning =
+    set self column (new_name : Text = "") (set_mode : Set_Mode = Set_Mode.Add_Or_Update) (on_problems : Problem_Behavior = Report_Warning) =
        problem_builder = Problem_Builder.new
        unique = self.column_naming_helper.create_unique_name_strategy
        unique.mark_used self.column_names
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
@ -68,10 +68,10 @@ polyglot java import org.enso.base.ObjectComparator
 polyglot java import org.enso.table.data.index.MultiValueIndex
 polyglot java import org.enso.table.data.mask.OrderMask
 polyglot java import org.enso.table.data.table.Column as Java_Column
-polyglot java import org.enso.table.data.table.join.Between as Java_Join_Between
-polyglot java import org.enso.table.data.table.join.Equals as Java_Join_Equals
-polyglot java import org.enso.table.data.table.join.EqualsIgnoreCase as Java_Join_Equals_Ignore_Case
-polyglot java import org.enso.table.data.table.join.LookupJoin
+polyglot java import org.enso.table.data.table.join.conditions.Between as Java_Join_Between
+polyglot java import org.enso.table.data.table.join.conditions.Equals as Java_Join_Equals
+polyglot java import org.enso.table.data.table.join.conditions.EqualsIgnoreCase as Java_Join_Equals_Ignore_Case
+polyglot java import org.enso.table.data.table.join.lookup.LookupJoin
 polyglot java import org.enso.table.data.table.Table as Java_Table
 polyglot java import org.enso.table.error.TooManyColumnsException
 polyglot java import org.enso.table.error.NullValuesInKeyColumns
@ -1570,7 +1570,7 @@ type Table
                 table.set "2 * [total_stock]" new_name="total_stock_expr"
    @column Column_Operation.default_widget
    set : Text | Column -> Text -> Set_Mode -> Problem_Behavior -> Table ! Existing_Column | Missing_Column | No_Such_Column | Expression_Error
-    set self column:(Text | Column | Constant_Column | Column_Operation) new_name="" set_mode=Set_Mode.Add_Or_Update on_problems=Report_Warning =
+    set self column:(Text | Column | Constant_Column | Column_Operation) (new_name : Text = "") (set_mode : Set_Mode = Set_Mode.Add_Or_Update) (on_problems : Problem_Behavior = Report_Warning) =
        problem_builder = Problem_Builder.new
        unique = self.column_naming_helper.create_unique_name_strategy
        unique.mark_used self.column_names
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Lookup_Helpers.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Lookup_Helpers.enso
@ -6,7 +6,7 @@ import project.Data.Type.Value_Type.Value_Type
 import project.Data.Type.Value_Type_Helpers
 from project.Errors import Missing_Input_Columns, Unexpected_Extra_Columns, Floating_Point_Equality, No_Common_Type, No_Output_Columns

-polyglot java import org.enso.table.data.table.join.LookupColumnDescription
+polyglot java import org.enso.table.data.table.join.lookup.LookupColumnDescription

 ## PRIVATE
 type Lookup_Column
--- a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueKeyBase.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueKeyBase.java
@ -40,6 +40,10 @@ public abstract class MultiValueKeyBase {
    return rowIndex;
  }

+  public int getNumberOfColumns() {
+    return storages.length;
+  }
+
  @Override
  public abstract boolean equals(Object o);

--- a/std-bits/table/src/main/java/org/enso/table/data/index/OrderedMultiValueKey.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/index/OrderedMultiValueKey.java
@ -78,4 +78,24 @@ public class OrderedMultiValueKey extends MultiValueKeyBase
  public String toString() {
    return "OrderedMultiValueKey{row="+rowIndex+"}";
  }
+
+  /**
+   * A comparator that uses only one dimension of the key.
+   */
+  public static class ProjectionComparator implements Comparator<OrderedMultiValueKey> {
+    private final int ix;
+
+    public ProjectionComparator(int ix) {
+      this.ix = ix;
+    }
+
+    @Override
+    public int compare(OrderedMultiValueKey o1, OrderedMultiValueKey o2) {
+      if (o1.storages.length != o2.storages.length) {
+        throw new ClassCastException("Incomparable keys.");
+      }
+
+      return o1.objectComparator.compare(o1.get(ix), o2.get(ix));
+    }
+  }
 }
--- a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java
@ -18,9 +18,9 @@ import org.enso.table.data.index.OrderedMultiValueKey;
 import org.enso.table.data.mask.OrderMask;
 import org.enso.table.data.mask.SliceRange;
 import org.enso.table.data.table.join.CrossJoin;
-import org.enso.table.data.table.join.IndexJoin;
-import org.enso.table.data.table.join.JoinCondition;
+import org.enso.table.data.table.join.conditions.JoinCondition;
 import org.enso.table.data.table.join.JoinResult;
+import org.enso.table.data.table.join.JoinStrategy;
 import org.enso.table.error.UnexpectedColumnTypeException;
 import org.enso.table.operations.Distinct;
 import org.enso.table.problems.ProblemAggregator;
@ -279,8 +279,8 @@ public class Table {
          "be true.");
    }

-    var strategy = new IndexJoin();
-    JoinResult joinResult = strategy.join(this, right, conditions, problemAggregator);
+    JoinStrategy strategy = JoinStrategy.createStrategy(conditions);
+    JoinResult joinResult = strategy.join(problemAggregator);

    List<JoinResult> resultsToKeep = new ArrayList<>();

--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/Equals.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/Equals.java
@ -1,5 +0,0 @@
-package org.enso.table.data.table.join;
-
-import org.enso.table.data.table.Column;
-
-public record Equals(Column left, Column right) implements JoinCondition {}
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/IndexJoin.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/IndexJoin.java
@ -1,112 +0,0 @@
-package org.enso.table.data.table.join;
-
-import org.enso.base.text.TextFoldingStrategy;
-import org.enso.table.data.column.storage.Storage;
-import org.enso.table.data.column.storage.type.AnyObjectType;
-import org.enso.table.data.index.MultiValueIndex;
-import org.enso.table.data.table.Column;
-import org.enso.table.data.table.Table;
-import org.enso.table.data.table.join.scan.Matcher;
-import org.enso.table.data.table.join.scan.MatcherFactory;
-import org.enso.table.problems.ColumnAggregatedProblemAggregator;
-import org.enso.table.problems.ProblemAggregator;
-import org.graalvm.polyglot.Context;
-
-import java.util.List;
-import java.util.stream.Collectors;
-
-public class IndexJoin implements JoinStrategy {
-  private record HashEqualityCondition(
-      Column left, Column right, TextFoldingStrategy textFoldingStrategy) {
-  }
-
-  @Override
-  public JoinResult join(Table left, Table right, List<JoinCondition> conditions, ProblemAggregator problemAggregator) {
-    Context context = Context.getCurrent();
-    List<HashEqualityCondition> equalConditions =
-        conditions.stream()
-            .filter(IndexJoin::isSupported)
-            .map(IndexJoin::makeHashEqualityCondition)
-            .collect(Collectors.toList());
-
-    var remainingConditions =
-        conditions.stream().filter(c -> !isSupported(c)).collect(Collectors.toList());
-
-    var leftEquals =
-        equalConditions.stream().map(HashEqualityCondition::left).toArray(Column[]::new);
-    var rightEquals =
-        equalConditions.stream().map(HashEqualityCondition::right).toArray(Column[]::new);
-    var textFoldingStrategies =
-        equalConditions.stream()
-            .map(HashEqualityCondition::textFoldingStrategy)
-            .collect(Collectors.toList());
-
-    var leftIndex =
-        MultiValueIndex.makeUnorderedIndex(leftEquals, left.rowCount(), textFoldingStrategies, problemAggregator);
-    var rightIndex =
-        MultiValueIndex.makeUnorderedIndex(rightEquals, right.rowCount(), textFoldingStrategies, problemAggregator);
-
-    MatcherFactory factory = new MatcherFactory();
-    Matcher remainingMatcher = factory.create(
-        remainingConditions, new ColumnAggregatedProblemAggregator(problemAggregator)
-    );
-
-    JoinResult.Builder resultBuilder = new JoinResult.Builder();
-    for (var leftKey : leftIndex.keys()) {
-      if (rightIndex.contains(leftKey)) {
-        for (var leftRow : leftIndex.get(leftKey)) {
-          for (var rightRow : rightIndex.get(leftKey)) {
-            if (remainingMatcher.matches(leftRow, rightRow)) {
-              resultBuilder.addRow(leftRow, rightRow);
-            }
-
-            context.safepoint();
-          }
-
-          context.safepoint();
-        }
-      }
-
-      context.safepoint();
-    }
-
-    return resultBuilder.build();
-  }
-
-  private static boolean isSupported(JoinCondition condition) {
-    switch (condition) {
-      case Equals eq -> {
-        return isBuiltinType(eq.left().getStorage()) && isBuiltinType(eq.right().getStorage());
-      }
-      case EqualsIgnoreCase ignored -> {
-        return true;
-      }
-      default -> {
-        return false;
-      }
-    }
-  }
-
-  private static HashEqualityCondition makeHashEqualityCondition(JoinCondition eq) {
-    switch (eq) {
-      case Equals e -> {
-        return new HashEqualityCondition(
-            e.left(), e.right(), TextFoldingStrategy.unicodeNormalizedFold);
-      }
-      case EqualsIgnoreCase e -> {
-        return new HashEqualityCondition(
-            e.left(), e.right(), TextFoldingStrategy.caseInsensitiveFold(e.locale()));
-      }
-      default -> throw new IllegalStateException(
-          "Impossible: trying to convert condition "
-              + eq
-              + " to a HashEqualityCondition, but it should not be marked as supported. This is a"
-              + " bug in the Table library.");
-    }
-  }
-
-  private static boolean isBuiltinType(Storage<?> storage) {
-    // TODO: this should be removed when #5626 and #5259 are implemented
-    return !storage.getType().equals(AnyObjectType.INSTANCE);
-  }
-}
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinCondition.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinCondition.java
@ -1,3 +0,0 @@
-package org.enso.table.data.table.join;
-
-public interface JoinCondition {}
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinStrategy.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinStrategy.java
@ -1,10 +1,75 @@
 package org.enso.table.data.table.join;

-import java.util.List;
-import org.enso.table.data.table.Table;
+import org.enso.table.data.table.join.between.SortJoin;
+import org.enso.table.data.table.join.conditions.Between;
+import org.enso.table.data.table.join.conditions.Equals;
+import org.enso.table.data.table.join.conditions.EqualsIgnoreCase;
+import org.enso.table.data.table.join.conditions.HashableCondition;
+import org.enso.table.data.table.join.conditions.JoinCondition;
+import org.enso.table.data.table.join.hashing.HashJoin;
 import org.enso.table.problems.ProblemAggregator;

+import java.util.List;
+
+/**
+ * A strategy used for performing a join of two tables.
+ */
 public interface JoinStrategy {
-  JoinResult join(
-      Table left, Table right, List<JoinCondition> conditions, ProblemAggregator problemAggregator);
+  JoinResult join(ProblemAggregator problemAggregator);
+
+  static JoinStrategy createStrategy(List<JoinCondition> conditions) {
+    if (conditions.isEmpty()) {
+      throw new IllegalArgumentException("At least one join condition must be provided.");
+    }
+
+    List<HashableCondition> hashableConditions = conditions.stream()
+        .filter(c -> c instanceof HashableCondition)
+        .map(c -> (HashableCondition) c)
+        .toList();
+    List<Between> betweenConditions = conditions.stream()
+        .filter(c -> c instanceof Between)
+        .map(c -> (Between) c)
+        .toList();
+
+    if (hashableConditions.size() + betweenConditions.size() != conditions.size()) {
+      throw new IllegalArgumentException("Unsupported join condition.");
+    }
+
+    if (hashableConditions.isEmpty()) {
+      assert !betweenConditions.isEmpty();
+      return new SortJoin(betweenConditions);
+    } else if (betweenConditions.isEmpty()) {
+      return new HashJoin(hashableConditions, new MatchAllStrategy());
+    } else {
+      return new HashJoin(hashableConditions, new SortJoin(betweenConditions));
+    }
+  }
+
+  class ConditionsHelper {
+    private final List<? extends JoinCondition> conditions;
+
+    public ConditionsHelper(List<? extends JoinCondition> conditions) {
+      if (conditions.isEmpty()) {
+        throw new IllegalArgumentException("At least one join condition must be provided.");
+      }
+
+      this.conditions = conditions;
+    }
+
+    public int getLeftTableRowCount() {
+      return switch (conditions.get(0)) {
+        case Equals equals -> equals.left().getStorage().size();
+        case EqualsIgnoreCase equalsIgnoreCase -> equalsIgnoreCase.left().getStorage().size();
+        case Between between -> between.left().getStorage().size();
+      };
+    }
+
+    public int getRightTableRowCount() {
+      return switch (conditions.get(0)) {
+        case Equals equals -> equals.right().getStorage().size();
+        case EqualsIgnoreCase equalsIgnoreCase -> equalsIgnoreCase.right().getStorage().size();
+        case Between between -> between.rightLower().getStorage().size();
+      };
+    }
+  }
 }
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/MatchAllStrategy.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/MatchAllStrategy.java
@ -0,0 +1,28 @@
+package org.enso.table.data.table.join;
+
+import java.util.List;
+import org.enso.table.problems.ProblemAggregator;
+import org.graalvm.polyglot.Context;
+
+/**
+ * A pluggable strategy that can be used as the inner strategy for a join if there are no more join
+ * conditions to process - so all rows are matched with each other within a given group.
+ */
+public class MatchAllStrategy implements PluggableJoinStrategy {
+  @Override
+  public void joinSubsets(
+      List<Integer> leftGroup,
+      List<Integer> rightGroup,
+      JoinResult.Builder resultBuilder,
+      ProblemAggregator problemAggregator) {
+    Context context = Context.getCurrent();
+    for (var leftRow : leftGroup) {
+      for (var rightRow : rightGroup) {
+        resultBuilder.addRow(leftRow, rightRow);
+        context.safepoint();
+      }
+
+      context.safepoint();
+    }
+  }
+}
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/PluggableJoinStrategy.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/PluggableJoinStrategy.java
@ -0,0 +1,18 @@
+package org.enso.table.data.table.join;
+
+import java.util.List;
+import org.enso.table.problems.ProblemAggregator;
+
+/**
+ * A helper join strategy that can be used within another join strategy to perform a join of
+ * sub-sets of indices, stemming from already joining on other conditions.
+ */
+public interface PluggableJoinStrategy {
+
+  /** Performs a join of two sub-sets of indices. */
+  void joinSubsets(
+      List<Integer> leftGroup,
+      List<Integer> rightGroup,
+      JoinResult.Builder resultBuilder,
+      ProblemAggregator problemAggregator);
+}
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/between/SortJoin.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/between/SortJoin.java
@ -0,0 +1,162 @@
+package org.enso.table.data.table.join.between;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import org.enso.base.ObjectComparator;
+import org.enso.table.data.column.storage.Storage;
+import org.enso.table.data.index.OrderedMultiValueKey;
+import org.enso.table.data.table.join.JoinResult;
+import org.enso.table.data.table.join.JoinStrategy;
+import org.enso.table.data.table.join.PluggableJoinStrategy;
+import org.enso.table.data.table.join.conditions.Between;
+import org.enso.table.problems.ProblemAggregator;
+import org.graalvm.polyglot.Context;
+
+public class SortJoin implements JoinStrategy, PluggableJoinStrategy {
+
+  public SortJoin(List<Between> conditions) {
+    conditionsHelper = new JoinStrategy.ConditionsHelper(conditions);
+
+    Context context = Context.getCurrent();
+    int nConditions = conditions.size();
+    directions = new int[nConditions];
+    leftStorages = new Storage<?>[nConditions];
+    lowerStorages = new Storage<?>[nConditions];
+    upperStorages = new Storage<?>[nConditions];
+    for (int i = 0; i < nConditions; i++) {
+      directions[i] = 1;
+      leftStorages[i] = conditions.get(i).left().getStorage();
+      lowerStorages[i] = conditions.get(i).rightLower().getStorage();
+      upperStorages[i] = conditions.get(i).rightUpper().getStorage();
+      context.safepoint();
+    }
+  }
+
+  private final JoinStrategy.ConditionsHelper conditionsHelper;
+
+  private final int[] directions;
+  private final Storage<?>[] leftStorages;
+  private final Storage<?>[] lowerStorages;
+  private final Storage<?>[] upperStorages;
+
+  @Override
+  public JoinResult join(ProblemAggregator problemAggregator) {
+    Context context = Context.getCurrent();
+    JoinResult.Builder resultBuilder = new JoinResult.Builder();
+
+    int leftRowCount = conditionsHelper.getLeftTableRowCount();
+    int rightRowCount = conditionsHelper.getRightTableRowCount();
+    if (leftRowCount == 0 || rightRowCount == 0) {
+      // if one group is completely empty, there will be no matches to report
+      return resultBuilder.build();
+    }
+    List<OrderedMultiValueKey> leftKeys = new ArrayList<>(leftRowCount);
+    for (int i = 0; i < leftRowCount; i++) {
+      leftKeys.add(new OrderedMultiValueKey(leftStorages, i, directions));
+      context.safepoint();
+    }
+
+    SortedListIndex<OrderedMultiValueKey> leftIndex = buildSortedLeftIndex(leftKeys);
+
+    for (int rightRowIx = 0; rightRowIx < rightRowCount; rightRowIx++) {
+      addMatchingLeftRows(leftIndex, rightRowIx, resultBuilder);
+      context.safepoint();
+    }
+
+    return resultBuilder.build();
+  }
+
+  @Override
+  public void joinSubsets(
+      List<Integer> leftGroup,
+      List<Integer> rightGroup,
+      JoinResult.Builder resultBuilder,
+      ProblemAggregator problemAggregator) {
+    Context context = Context.getCurrent();
+
+    List<OrderedMultiValueKey> leftKeys =
+        leftGroup.stream()
+            .map(i -> new OrderedMultiValueKey(leftStorages, i, directions, objectComparator))
+            .toList();
+    if (leftKeys.isEmpty()) {
+      // left group is completely empty - there will be no matches at all
+      return;
+    }
+
+    SortedListIndex<OrderedMultiValueKey> leftIndex = buildSortedLeftIndex(leftKeys);
+
+    for (int rightRowIx : rightGroup) {
+      addMatchingLeftRows(leftIndex, rightRowIx, resultBuilder);
+      context.safepoint();
+    }
+  }
+
+  private SortedListIndex<OrderedMultiValueKey> buildSortedLeftIndex(
+      List<OrderedMultiValueKey> keys) {
+    return SortedListIndex.build(keys, firstCoordinateComparator);
+  }
+
+  private OrderedMultiValueKey buildLowerBound(int rightRowIx) {
+    return new OrderedMultiValueKey(lowerStorages, rightRowIx, directions, objectComparator);
+  }
+
+  private OrderedMultiValueKey buildUpperBound(int rightRowIx) {
+    return new OrderedMultiValueKey(upperStorages, rightRowIx, directions, objectComparator);
+  }
+
+  private void addMatchingLeftRows(
+      SortedListIndex<OrderedMultiValueKey> sortedLeftIndex,
+      int rightRowIx,
+      JoinResult.Builder resultBuilder) {
+    OrderedMultiValueKey lowerBound = buildLowerBound(rightRowIx);
+    OrderedMultiValueKey upperBound = buildUpperBound(rightRowIx);
+
+    // If the match interval is invalid or empty, there is nothing to do.
+    if (lowerBound.hasAnyNulls()
+        || upperBound.hasAnyNulls()
+        || lowerBound.compareTo(upperBound) > 0) {
+      return;
+    }
+
+    List<OrderedMultiValueKey> firstCoordinateMatches =
+        sortedLeftIndex.findSubRange(lowerBound, upperBound);
+    Context context = Context.getCurrent();
+    for (OrderedMultiValueKey key : firstCoordinateMatches) {
+      if (isInRange(key, lowerBound, upperBound)) {
+        resultBuilder.addRow(key.getRowIndex(), rightRowIx);
+      }
+
+      context.safepoint();
+    }
+  }
+
+  private boolean isInRange(
+      OrderedMultiValueKey key, OrderedMultiValueKey lowerBound, OrderedMultiValueKey upperBound) {
+    assert key.getNumberOfColumns() == lowerBound.getNumberOfColumns();
+    assert key.getNumberOfColumns() == upperBound.getNumberOfColumns();
+
+    // Note: we cannot just use `compareTo`, because we are now not checking that the key is between
+    // the bounds in lexicographic order.
+    // Instead, we are checking if the key is between the bounds for all dimensions.
+
+    int n = key.getNumberOfColumns();
+    for (int i = 0; i < n; i++) {
+      var keyValue = key.get(i);
+      var lowerBoundValue = lowerBound.get(i);
+      var upperBoundValue = upperBound.get(i);
+      boolean fitsInThisDimension =
+          objectComparator.compare(keyValue, lowerBoundValue) >= 0
+              && objectComparator.compare(keyValue, upperBoundValue) <= 0;
+      if (!fitsInThisDimension) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  private final ObjectComparator objectComparator = ObjectComparator.DEFAULT;
+  private final Comparator<OrderedMultiValueKey> firstCoordinateComparator =
+      new OrderedMultiValueKey.ProjectionComparator(0);
+}
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/between/SortedListIndex.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/between/SortedListIndex.java
@ -0,0 +1,129 @@
+package org.enso.table.data.table.join.between;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+public class SortedListIndex<T> {
+  /** Defines the <= ordering for the index. */
+  private final Comparator<T> comparator;
+
+  /* forall 0 <= i <= j < n, sortedList[i] <= sortedList[j] */
+  private final ArrayList<T> sortedList;
+
+  protected SortedListIndex(ArrayList<T> sortedList, Comparator<T> comparator) {
+    this.comparator = comparator;
+    this.sortedList = sortedList;
+  }
+
+  public static <T> SortedListIndex<T> build(List<T> list, Comparator<T> comparator) {
+    ArrayList<T> copy = new ArrayList<>(list);
+    copy.sort(comparator);
+    return new SortedListIndex<>(copy, comparator);
+  }
+
+  /**
+   * Finds a sub-range of the index containing all elements between the lower and upper bounds
+   * (both-ends inclusive).
+   */
+  public List<T> findSubRange(T lowerBound, T upperBound) {
+    int start = findLowerIndex(lowerBound);
+    int end = findUpperIndex(upperBound) + 1;
+    if (start >= end) {
+      return Collections.emptyList();
+    }
+
+    return sortedList.subList(start, end);
+  }
+
+  /**
+   * Finds the index of the first element that is greater than or equal to the argument.
+   *
+   * <p>If all elements are greater than the argument, returns 0. If all elements are less than the
+   * argument, returns N.
+   */
+  private int findLowerIndex(T element) {
+    int start = 0;
+    int end = sortedList.size();
+
+    /*
+     * Loop invariants:
+     * 1) start <= end
+     * 2) forall 0 <= i < start: sortedList[i] < element
+     * 3) forall end <= i < N: sortedList[i] >= element
+     *
+     * end - start is strictly decreasing, so the loop will always terminate.
+     */
+    while (start < end) {
+      // start <= mid < mid + 1 <= end
+      int mid = Math.addExact(start, end) / 2;
+      T midElement = sortedList.get(mid);
+      int cmp = comparator.compare(midElement, element);
+      if (cmp < 0) {
+        start = mid + 1;
+      } else {
+        end = mid;
+      }
+    }
+
+    /*
+     * After the loop, start >= end, but also start <= end, so start == end.
+     *
+     * Thus, from invariants:
+     * forall 0 <= i < start: sortedList[i] < element
+     * forall start <= i < N: sortedList[i] >= element
+     *
+     * start is the first element that is >= element;
+     * if there is no such element, it will be N.
+     */
+    return start;
+  }
+
+  /**
+   * Finds the index of the last element that is less than or equal to the argument.
+   *
+   * <p>If all elements are greater than the argument, returns -1. If all elements are less than the
+   * argument, returns N-1 (index of the last element).
+   */
+  private int findUpperIndex(T element) {
+    int start = 0;
+    int end = sortedList.size();
+
+    /*
+     * Loop invariants:
+     * 1) start <= end
+     * 2) forall 0 <= i < start: sortedList[i] <= element
+     * 3) forall end <= i < N: sortedList[i] > element
+     *
+     * end - start is strictly decreasing.
+     */
+    while (start < end) {
+      // start <= mid < end
+      int mid = Math.addExact(start, end) / 2;
+      T midElement = sortedList.get(mid);
+      int cmp = comparator.compare(midElement, element);
+      if (cmp <= 0) {
+        start = mid + 1;
+      } else {
+        end = mid;
+      }
+    }
+
+    /*
+     * After the loop, start >= end, but also start <= end, so start == end.
+     *
+     * Thus, from invariants:
+     * forall 0 <= i < start: sortedList[i] <= element
+     * forall start <= i < N: sortedList[i] > element
+     *
+     * So start-1 is the last element that is <= element (if it exists);
+     * if there is no such element, it will be -1.
+     */
+    return start - 1;
+  }
+
+  private boolean keysEqual(T k1, T k2) {
+    return comparator.compare(k1, k2) == 0;
+  }
+}
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/conditions/Between.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/conditions/Between.java
@ -1,4 +1,4 @@
-package org.enso.table.data.table.join;
+package org.enso.table.data.table.join.conditions;

 import org.enso.table.data.table.Column;

--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/conditions/Equals.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/conditions/Equals.java
@ -0,0 +1,5 @@
+package org.enso.table.data.table.join.conditions;
+
+import org.enso.table.data.table.Column;
+
+public record Equals(Column left, Column right) implements HashableCondition {}
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/conditions/EqualsIgnoreCase.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/conditions/EqualsIgnoreCase.java
@ -1,7 +1,7 @@
-package org.enso.table.data.table.join;
+package org.enso.table.data.table.join.conditions;

 import org.enso.table.data.table.Column;

 import java.util.Locale;

-public record EqualsIgnoreCase(Column left, Column right, Locale locale) implements JoinCondition {}
+public record EqualsIgnoreCase(Column left, Column right, Locale locale) implements HashableCondition {}
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/conditions/HashableCondition.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/conditions/HashableCondition.java
@ -0,0 +1,4 @@
+package org.enso.table.data.table.join.conditions;
+
+public sealed interface HashableCondition extends JoinCondition permits Equals, EqualsIgnoreCase {
+}
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/conditions/JoinCondition.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/conditions/JoinCondition.java
@ -0,0 +1,3 @@
+package org.enso.table.data.table.join.conditions;
+
+public sealed interface JoinCondition permits HashableCondition, Between {}
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/hashing/HashJoin.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/hashing/HashJoin.java
@ -0,0 +1,84 @@
+package org.enso.table.data.table.join.hashing;
+
+import org.enso.base.text.TextFoldingStrategy;
+import org.enso.table.data.index.MultiValueIndex;
+import org.enso.table.data.index.UnorderedMultiValueKey;
+import org.enso.table.data.table.Column;
+import org.enso.table.data.table.join.JoinResult;
+import org.enso.table.data.table.join.JoinStrategy;
+import org.enso.table.data.table.join.PluggableJoinStrategy;
+import org.enso.table.data.table.join.conditions.Equals;
+import org.enso.table.data.table.join.conditions.EqualsIgnoreCase;
+import org.enso.table.data.table.join.conditions.HashableCondition;
+import org.enso.table.problems.ProblemAggregator;
+import org.graalvm.polyglot.Context;
+
+import java.util.List;
+
+/**
+ * A strategy that uses a hash-map to perform join on the equality conditions.
+ * <p>
+ * It then delegates to {@code remainingMatcher} to perform the remaining conditions on the matching pairs of row
+ * subsets.
+ */
+public class HashJoin implements JoinStrategy {
+  public HashJoin(List<HashableCondition> conditions, PluggableJoinStrategy remainingMatcher) {
+    conditionsHelper = new JoinStrategy.ConditionsHelper(conditions);
+    this.remainingMatcher = remainingMatcher;
+
+    List<HashEqualityCondition> equalConditions =
+        conditions.stream().map(HashJoin::makeHashEqualityCondition).toList();
+
+    if (equalConditions.isEmpty()) {
+      throw new IllegalArgumentException("EqualityHashJoin is applicable if there is at least one equality condition.");
+    }
+
+    leftEquals = equalConditions.stream().map(HashEqualityCondition::left).toArray(Column[]::new);
+    rightEquals = equalConditions.stream().map(HashEqualityCondition::right).toArray(Column[]::new);
+    textFoldingStrategies = equalConditions.stream().map(HashEqualityCondition::textFoldingStrategy).toList();
+  }
+
+  private final JoinStrategy.ConditionsHelper conditionsHelper;
+  private final Column[] leftEquals, rightEquals;
+  private final List<TextFoldingStrategy> textFoldingStrategies;
+  private final PluggableJoinStrategy remainingMatcher;
+
+  @Override
+  public JoinResult join(ProblemAggregator problemAggregator) {
+    Context context = Context.getCurrent();
+
+    var leftIndex = MultiValueIndex.makeUnorderedIndex(leftEquals, conditionsHelper.getLeftTableRowCount(),
+        textFoldingStrategies, problemAggregator);
+    var rightIndex = MultiValueIndex.makeUnorderedIndex(rightEquals, conditionsHelper.getRightTableRowCount(),
+        textFoldingStrategies, problemAggregator);
+
+    JoinResult.Builder resultBuilder = new JoinResult.Builder();
+    for (var leftEntry : leftIndex.mapping().entrySet()) {
+      UnorderedMultiValueKey leftKey = leftEntry.getKey();
+      List<Integer> leftRows = leftEntry.getValue();
+      List<Integer> rightRows = rightIndex.get(leftKey);
+
+      if (rightRows != null) {
+        remainingMatcher.joinSubsets(leftRows, rightRows, resultBuilder, problemAggregator);
+      }
+
+      context.safepoint();
+    }
+
+    return resultBuilder.build();
+  }
+
+  private static HashEqualityCondition makeHashEqualityCondition(HashableCondition eq) {
+    switch (eq) {
+      case Equals e -> {
+        return new HashEqualityCondition(e.left(), e.right(), TextFoldingStrategy.unicodeNormalizedFold);
+      }
+      case EqualsIgnoreCase e -> {
+        return new HashEqualityCondition(e.left(), e.right(), TextFoldingStrategy.caseInsensitiveFold(e.locale()));
+      }
+    }
+  }
+
+  private record HashEqualityCondition(Column left, Column right, TextFoldingStrategy textFoldingStrategy) {
+  }
+}
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/lookup/LookupColumnDescription.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/lookup/LookupColumnDescription.java
@ -1,4 +1,4 @@
-package org.enso.table.data.table.join;
+package org.enso.table.data.table.join.lookup;

 import org.enso.table.data.column.storage.type.StorageType;
 import org.enso.table.data.table.Column;
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/lookup/LookupJoin.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/lookup/LookupJoin.java
@ -1,4 +1,4 @@
-package org.enso.table.data.table.join;
+package org.enso.table.data.table.join.lookup;

 import org.enso.base.text.TextFoldingStrategy;
 import org.enso.table.data.column.builder.Builder;
@ -9,6 +9,7 @@ import org.enso.table.data.index.UnorderedMultiValueKey;
 import org.enso.table.data.mask.OrderMask;
 import org.enso.table.data.table.Column;
 import org.enso.table.data.table.Table;
+import org.enso.table.data.table.join.conditions.Equals;
 import org.enso.table.error.NonUniqueLookupKey;
 import org.enso.table.error.NullValuesInKeyColumns;
 import org.enso.table.error.UnmatchedRow;
@ -17,7 +18,6 @@ import org.enso.table.util.ConstantList;

 import java.util.Arrays;
 import java.util.List;
-import java.util.Map;
 import java.util.stream.IntStream;

 public class LookupJoin {
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/scan/Matcher.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/scan/Matcher.java
@ -1,5 +0,0 @@
-package org.enso.table.data.table.join.scan;
-
-public interface Matcher {
-  boolean matches(int left, int right);
-}
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/scan/MatcherFactory.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/scan/MatcherFactory.java
@ -1,152 +0,0 @@
-package org.enso.table.data.table.join.scan;
-
-import org.enso.base.ObjectComparator;
-import org.enso.base.Text_Utils;
-import org.enso.base.polyglot.NumericConverter;
-import org.enso.table.data.column.storage.Storage;
-import org.enso.table.data.column.storage.StringStorage;
-import org.enso.table.data.table.join.Between;
-import org.enso.table.data.table.join.Equals;
-import org.enso.table.data.table.join.EqualsIgnoreCase;
-import org.enso.table.data.table.join.JoinCondition;
-import org.enso.table.data.table.problems.FloatingPointGrouping;
-import org.enso.table.problems.ColumnAggregatedProblemAggregator;
-
-import java.util.List;
-import java.util.Locale;
-import java.util.stream.Collectors;
-
-public class MatcherFactory {
-  public Matcher create(JoinCondition condition, ColumnAggregatedProblemAggregator problemAggregator) {
-    return switch (condition) {
-      case Equals eq -> new EqualsMatcher(eq, problemAggregator);
-      case EqualsIgnoreCase eq -> new EqualsIgnoreCaseMatcher(eq);
-      case Between between -> new BetweenMatcher(between);
-      default -> throw new UnsupportedOperationException(
-          "Unsupported join condition: " + condition);
-    };
-  }
-
-  public Matcher create(List<JoinCondition> condition, ColumnAggregatedProblemAggregator problemAggregator) {
-    List<Matcher> matchers = condition.stream().map(m-> create(m, problemAggregator)).collect(Collectors.toList());
-    return new CompoundMatcher(matchers);
-  }
-
-  static final class CompoundMatcher implements Matcher {
-    private final List<Matcher> matchers;
-
-    CompoundMatcher(List<Matcher> matchers) {
-      this.matchers = matchers;
-    }
-
-    @Override
-    public boolean matches(int left, int right) {
-      for (Matcher matcher : matchers) {
-        if (!matcher.matches(left, right)) {
-          return false;
-        }
-      }
-
-      return true;
-    }
-  }
-
-  static final class EqualsMatcher implements Matcher {
-    private final Storage<?> leftStorage;
-    private final Storage<?> rightStorage;
-    private final String leftColumnName;
-    private final String rightColumnName;
-    private final ColumnAggregatedProblemAggregator problemAggregator;
-
-    public EqualsMatcher(Equals eq, ColumnAggregatedProblemAggregator problemAggregator) {
-      leftStorage = eq.left().getStorage();
-      rightStorage = eq.right().getStorage();
-      leftColumnName = eq.left().getName();
-      rightColumnName = eq.right().getName();
-      this.problemAggregator = problemAggregator;
-    }
-
-    @Override
-    public boolean matches(int left, int right) {
-      Object leftValue = leftStorage.getItemBoxed(left);
-      Object rightValue = rightStorage.getItemBoxed(right);
-
-      if (NumericConverter.isFloatLike(leftValue)) {
-        problemAggregator.reportColumnAggregatedProblem(new FloatingPointGrouping(leftColumnName, left));
-      }
-
-      if (NumericConverter.isFloatLike(rightValue)) {
-        problemAggregator.reportColumnAggregatedProblem(new FloatingPointGrouping(rightColumnName, right));
-      }
-
-      return ObjectComparator.areEqual(leftValue, rightValue);
-    }
-  }
-
-  static final class EqualsIgnoreCaseMatcher implements Matcher {
-    private final StringStorage leftStorage;
-    private final StringStorage rightStorage;
-
-    private final Locale locale;
-
-    public EqualsIgnoreCaseMatcher(EqualsIgnoreCase eq) {
-      if (eq.left().getStorage() instanceof StringStorage leftStrings) {
-        leftStorage = leftStrings;
-      } else {
-        throw new IllegalArgumentException("Expected left column to have type Text.");
-      }
-
-      if (eq.right().getStorage() instanceof StringStorage rightStrings) {
-        rightStorage = rightStrings;
-      } else {
-        throw new IllegalArgumentException("Expected right column to have type Text.");
-      }
-
-      locale = eq.locale();
-    }
-
-    @Override
-    public boolean matches(int left, int right) {
-      String leftValue = leftStorage.getItem(left);
-      String rightValue = rightStorage.getItem(right);
-
-      if (leftValue == null && rightValue == null) {
-        return true;
-      }
-
-      if (leftValue == null || rightValue == null) {
-        return false;
-      }
-
-      return Text_Utils.equals_ignore_case(leftValue, rightValue, locale);
-    }
-  }
-
-  static final class BetweenMatcher implements Matcher {
-    private final Storage<?> leftStorage;
-    private final Storage<?> rightLowerStorage;
-    private final Storage<?> rightUpperStorage;
-
-    public BetweenMatcher(Between between) {
-      leftStorage = between.left().getStorage();
-      rightLowerStorage = between.rightLower().getStorage();
-      rightUpperStorage = between.rightUpper().getStorage();
-    }
-
-    @Override
-    public boolean matches(int left, int right) {
-      Object leftValue = leftStorage.getItemBoxed(left);
-      Object rightLowerValue = rightLowerStorage.getItemBoxed(right);
-      Object rightUpperValue = rightUpperStorage.getItemBoxed(right);
-
-      // If any value is missing, such a pair of rows is never correlated with Between as we assume
-      // the ordering is not well-defined for missing values.
-      if (leftValue == null || rightLowerValue == null || rightUpperValue == null) {
-        return false;
-      }
-
-      return ObjectComparator.DEFAULT.compare(leftValue, rightLowerValue) >= 0
-          && ObjectComparator.DEFAULT.compare(leftValue, rightUpperValue) <= 0;
-    }
-  }
-}
--- a/std-bits/table/src/main/java/org/enso/table/operations/AddRowNumber.java
+++ b/std-bits/table/src/main/java/org/enso/table/operations/AddRowNumber.java
@ -2,7 +2,6 @@ package org.enso.table.operations;

 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@ -18,7 +17,6 @@ import org.enso.table.data.table.Column;
 import org.enso.table.problems.ColumnAggregatedProblemAggregator;
 import org.enso.table.problems.ProblemAggregator;
 import org.enso.table.util.ConstantList;
-import org.graalvm.collections.Pair;

 public class AddRowNumber {

@ -62,18 +60,17 @@ public class AddRowNumber {
    Storage<?>[] orderingStorages =
        Arrays.stream(orderingColumns).map(Column::getStorage).toArray(Storage[]::new);
    long[] numbers = new long[n];
-    List<Pair<OrderedMultiValueKey, Integer>> keys =
+    List<OrderedMultiValueKey> keys =
        new ArrayList<>(
            IntStream.range(0, n)
-                .mapToObj(
-                    i -> Pair.create(new OrderedMultiValueKey(orderingStorages, i, directions), i))
+                .mapToObj(i -> new OrderedMultiValueKey(orderingStorages, i, directions))
                .toList());

-    keys.sort(OrderedPairComparator.INSTANCE);
+    keys.sort(null);

    RangeIterator it = new RangeIterator(start, step);
    for (var key : keys) {
-      numbers[key.getRight()] = it.next();
+      numbers[key.getRowIndex()] = it.next();
    }
    return new LongStorage(numbers, IntegerType.INT_64);
  }
@ -103,44 +100,21 @@ public class AddRowNumber {

    for (var entry : groupIndex.mapping().entrySet()) {
      List<Integer> indices = entry.getValue();
-      List<Pair<OrderedMultiValueKey, Integer>> orderingKeys =
+      List<OrderedMultiValueKey> orderingKeys =
          new ArrayList<>(
              indices.stream()
-                  .map(
-                      i ->
-                          Pair.create(new OrderedMultiValueKey(orderingStorages, i, directions), i))
+                  .map(i -> new OrderedMultiValueKey(orderingStorages, i, directions))
                  .toList());
-      orderingKeys.sort(OrderedPairComparator.INSTANCE);
+      orderingKeys.sort(null);
      RangeIterator it = new RangeIterator(start, step);
-      for (var key : orderingKeys) {
-        numbers[key.getRight()] = it.next();
+      for (OrderedMultiValueKey key : orderingKeys) {
+        numbers[key.getRowIndex()] = it.next();
      }
    }

    return new LongStorage(numbers, IntegerType.INT_64);
  }

-  private static class OrderedPairComparator
-      implements Comparator<Pair<OrderedMultiValueKey, Integer>> {
-    @Override
-    public int compare(
-        Pair<OrderedMultiValueKey, Integer> o1, Pair<OrderedMultiValueKey, Integer> o2) {
-      int p1 = o1.getLeft().compareTo(o2.getLeft());
-      if (p1 != 0) {
-        return p1;
-      }
-
-      return o1.getRight().compareTo(o2.getRight());
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-      return obj instanceof OrderedPairComparator;
-    }
-
-    static OrderedPairComparator INSTANCE = new OrderedPairComparator();
-  }
-
  /**
   * A helper for computing consecutive numbers based on a start and step. It will throw an {@link
   * java.lang.ArithmeticException} if the next number overflows.
--- a/test/Benchmarks/src/Main.enso
+++ b/test/Benchmarks/src/Main.enso
@ -10,6 +10,7 @@ import project.Table.Aggregate
 import project.Table.Arithmetic
 import project.Table.Column_From_Vector
 import project.Table.Cross_Tab
+import project.Table.Join
 import project.Table.Sorting
 import project.Table.Internal.Multi_Value_Key
 import project.Text.Build
@ -54,6 +55,7 @@ all_benchmarks =
    builder.append Add_Row_Number.collect_benches
    builder.append Column_From_Vector.collect_benches
    builder.append Cross_Tab.collect_benches
+    builder.append Join.collect_benches
    builder.append Sorting.collect_benches
    builder.append Multi_Value_Key.collect_benches

--- a/test/Benchmarks/src/Table/Is_In.enso
+++ b/test/Benchmarks/src/Table/Is_In.enso
@ -0,0 +1,48 @@
+from Standard.Base import all
+from Standard.Base.Runtime import assert
+
+from Standard.Table import all
+
+from Standard.Test import Bench
+
+from project.Config import extended_tests
+
+options = Bench.options . set_warmup (Bench.phase_conf 1 2) . set_measure (Bench.phase_conf 2 3)
+
+type Scenario
+    Value table ints_vec dates_vec bool_vec
+
+create_scenario =
+    t = Table.new [["X", (200.up_to 10000 . to_vector)]]
+    ints_vec = 40000.up_to 130000 . to_vector
+    first_day = Date_Time.new 2000 1 1
+    make_date x = first_day + (Duration.new seconds=x)
+    dates_vec = ints_vec.map make_date
+    bool_vec = Vector.fill 7000 True
+
+    t2 = t.set (t.at "X" . map make_date) "dates"
+    t3 = t2.set (t.at "X" % 2 == 0) "bools"
+    Scenario.Value t3 ints_vec dates_vec bool_vec
+
+type Data
+    Value ~scenario
+
+    create = Data.Value create_scenario
+
+collect_benches = Bench.build builder->
+    data = Data.create
+
+    builder.group ("Filter_Is_In") options group_builder->
+        group_builder.specify "integers" <|
+            scenario = data.scenario
+            scenario.table.filter "X" (Filter_Condition.Is_In scenario.ints_vec)
+
+        group_builder.specify "dates" <|
+            scenario = data.scenario
+            scenario.table.filter "dates" (Filter_Condition.Is_In scenario.dates_vec)
+
+        group_builder.specify "bools" <|
+            scenario = data.scenario
+            scenario.table.filter "bools" (Filter_Condition.Is_In scenario.bool_vec)
+
+main = collect_benches . run_main
--- a/test/Benchmarks/src/Table/Join.enso
+++ b/test/Benchmarks/src/Table/Join.enso
@ -0,0 +1,222 @@
+from Standard.Base import all
+from Standard.Base.Runtime import assert
+
+from Standard.Table import all
+
+from Standard.Test import Bench
+
+from project.Config import extended_tests
+
+options = Bench.options . set_warmup (Bench.phase_conf 2 5) . set_measure (Bench.phase_conf 2 5)
+
+type Scenario
+    Value table1 table2
+
+shuffle vec =
+    vec.take (Index_Sub_Range.Sample vec.length seed=42)
+
+create_scenario_equals num_rows =
+    xs = (0.up_to num_rows).to_vector
+    table1 = Table.new [["key", xs]]
+    table2 = Table.new [["key", shuffle xs]]
+    Scenario.Value table1 table2
+
+create_scenario_equals_medium_groups num_rows =
+    xs = (0.up_to num_rows).map x-> (x/30).floor
+    ys = xs.reverse.map (+2)
+    table1 = Table.new [["key", xs]]
+    table2 = Table.new [["key", ys]]
+    Scenario.Value table1 table2
+
+create_scenario_equals_ignore_case num_rows =
+    table1 = Table.new [["key", (0.up_to num_rows).map i-> "a"+i.to_text]]
+    table2 = Table.new [["case_insensitive_key", (0.up_to num_rows).reverse.map i-> "A"+i.to_text]]
+    Scenario.Value table1 table2
+
+create_scenario_between num_rows =
+    xs = (0.up_to num_rows).map x-> x*100
+    lows = xs.map x-> x-10
+    highs = xs.map x-> x+50
+
+    table1 = Table.new [["x", shuffle xs]]
+    table2 = Table.new [["lows", lows], ["highs", highs]]
+    Scenario.Value table1 table2
+
+## The mixed scenario creates a pair of tables where all rows are mapped 1-1,
+   but they are split into 3 groups. Each group differs by only one 'key' while
+   having equal keys of the other two types.
+
+   This ensures that a combined scenario must be efficient for all conditions,
+   regardless of the distribution of keys - it cannot naively group by only a
+   subset of keys and brute force the remaining keys - because in this example,
+   splitting by any subset of keys will still yield a big group - only splitting
+   by all 3 keys gives us small groups (1-1).
+create_scenario_mixed num_rows =
+    n = (num_rows/3).round
+    xs = (0.up_to n).to_vector
+    ys_1 = (0.up_to n).map i-> "a"+i.to_text
+    ys_2 = (0.up_to n).map i-> "A"+i.to_text
+    zs = (0.up_to n).map x-> 1000 + x*100
+
+    constant_x = Vector.new n _-> 1
+    constant_y = Vector.new n _-> "_"
+    constant_z = Vector.new n _-> 0
+
+    table1 =
+        group1 = Table.new [["EQ", shuffle xs], ["case_insensitive", constant_y], ["x", constant_z]]
+        group2 = Table.new [["EQ", constant_x], ["case_insensitive", shuffle ys_1], ["x", constant_z]]
+        group3 = Table.new [["EQ", constant_x], ["case_insensitive", constant_y], ["x", shuffle zs]]
+        group1.union [group2, group3]
+
+    table2 =
+        group1 = Table.new [["EQ", shuffle xs], ["case_insensitive", constant_y], ["lows", constant_z], ["highs", constant_z]]
+        group2 = Table.new [["EQ", constant_x], ["case_insensitive", shuffle ys_2], ["lows", constant_z], ["highs", constant_z]]
+
+        lows = zs.map x-> x-10
+        highs = zs.map x-> x+30
+        group3 = Table.new [["EQ", constant_x], ["case_insensitive", constant_y], ["lows", lows], ["highs", highs]]
+
+        group1.union [group2, group3]
+
+    Scenario.Value table1 table2
+
+## The 2d equality scenario matches rows based on 2 keys -
+   it matches corresponding points on a 2d grid.
+
+   This is used to verify that multi-key joins are efficient too.
+create_scenario_equals_2d num_rows =
+    n = num_rows.sqrt.ceil
+    pts = (0.up_to n).to_vector.flat_map x->
+        (0.up_to n).map y-> [x, y]
+
+    shuffled_pts = shuffle pts
+
+    table1 = Table.new [["x", shuffled_pts.map .first], ["y", shuffled_pts.map .second]]
+    table2 = Table.new [["x", pts.map .first], ["y", pts.map .second]]
+    Scenario.Value table1 table2
+
+## Similarly to the example with equality, this creates a 2d grid of points, but
+   they are matched using the Between condition.
+create_scenario_between_2d num_rows =
+    n = num_rows.sqrt.ceil
+    pts = (0.up_to n).to_vector.flat_map x->
+        (0.up_to n).map y-> [x, y]
+
+    shuffled_pts = shuffle pts
+    table1 = Table.new [["x", shuffled_pts.map .first], ["y", shuffled_pts.map .second]]
+
+    lows = pts.map p-> [p.first - 0.1, p.second - 0.1]
+    highs = pts.map p-> [p.first + 0.1, p.second + 0.1]
+
+    table2 = Table.new [["x_lows", lows.map .first], ["y_lows", lows.map .second], ["x_highs", highs.map .first], ["y_highs", highs.map .second]]
+    Scenario.Value table1 table2
+
+## This one creates a scenario with a 2d grid of points for the left table, but
+   the right table contains pairs of coordinates that denote belts of size 2 x n
+   on that grid.
+
+   Some of them will be horizontal and some vertical, to see how the order of
+   Between arguments affects performance.
+create_scenario_between_2d_belts num_rows =
+    n = num_rows.sqrt.ceil
+    pts = (0.up_to n).to_vector.flat_map x->
+        (0.up_to n).map y-> [x, y]
+
+    shuffled_pts = shuffle pts
+    table1 = Table.new [["x", shuffled_pts.map .first], ["y", shuffled_pts.map .second]]
+
+    horizontal_belts = Vector.new n x->
+        [x, x+1, 0, n, False]
+    vertical_belts = Vector.new n y->
+        [0, n, y, y+1, True]
+
+    table2 = Table.from_rows ["x_lows", "x_highs", "y_lows", "y_highs", "is_vertical"] (horizontal_belts + vertical_belts)
+    Scenario.Value table1 table2
+
+## This is a scenario where we want to find rows unmatched in another table.
+
+   The scenario is set-up on purpose in such a way that the intersection of the
+   two tables is very large. This will only be fast if the anti-join does not
+   compute the intersection which is not needed in this scenario.
+create_scenario_antijoin num_rows =
+    xs = Vector.new num_rows _-> 1
+
+    ## The first 1000 rows will be unmatched (and should be returned in the anti-join).
+       All other rows will match with _all_ rows from `xs`, creating a huge intersection.
+    ys = Vector.new num_rows ix->
+        if ix < 1000 then -ix else 1
+
+    table1 = Table.new [["key", xs]]
+    table2 = Table.new [["key", ys]]
+    Scenario.Value table1 table2
+
+type Data
+    Value ~equals ~equals_medium_groups ~equals_ignore_case ~between ~mixed ~equals2d ~between2d ~between2d_belts ~antijoin
+
+    create num_rows =
+        Data.Value (create_scenario_equals num_rows) (create_scenario_equals_medium_groups num_rows) (create_scenario_equals_ignore_case num_rows) (create_scenario_between num_rows) (create_scenario_mixed num_rows) (create_scenario_equals_2d num_rows) (create_scenario_between_2d num_rows) (create_scenario_between_2d_belts num_rows) (create_scenario_antijoin num_rows)
+
+collect_benches = Bench.build builder->
+    num_rows = 50000
+    data = Data.create num_rows
+
+    builder.group ("Join_" + num_rows.to_text) options group_builder->
+        group_builder.specify "Equals" <|
+            scenario = data.equals
+            r = scenario.table1.join scenario.table2 on="key"
+            assert (r.row_count == num_rows)
+
+        group_builder.specify "Equals_Medium_Groups" <|
+            scenario = data.equals_medium_groups
+            scenario.table1.join scenario.table2 on="key"
+
+        group_builder.specify "Equals_Ignore_Case" <|
+            scenario = data.equals_ignore_case
+            r = scenario.table1.join scenario.table2 on=(Join_Condition.Equals_Ignore_Case "key" "case_insensitive_key")
+            assert (r.row_count == num_rows)
+
+        group_builder.specify "Between" <|
+            scenario = data.between
+            r = scenario.table1.join scenario.table2 on=(Join_Condition.Between "x" "lows" "highs")
+            assert (r.row_count == num_rows)
+
+        group_builder.specify "Mixed" <|
+            scenario = data.mixed
+            r = scenario.table1.join scenario.table2 on=[Join_Condition.Equals "EQ", Join_Condition.Equals_Ignore_Case "case_insensitive", Join_Condition.Between "x" "lows" "highs"]
+            expected_rows = data.mixed.table1.row_count
+            assert (r.row_count == expected_rows)
+
+        group_builder.specify "Equals_2D" <|
+            scenario = data.equals2d
+            r = scenario.table1.join scenario.table2 on=["x", "y"]
+            assert (r.row_count == scenario.table1.row_count)
+
+        group_builder.specify "Between_2D" <|
+            scenario = data.between2d
+            r = scenario.table1.join scenario.table2 on=[Join_Condition.Between "x" "x_lows" "x_highs", Join_Condition.Between "y" "y_lows" "y_highs"]
+            assert (r.row_count == scenario.table1.row_count)
+
+        if extended_tests then group_builder.specify "Between_2D_Belts_All" <|
+            scenario = data.between2d_belts
+            r = scenario.table1.join scenario.table2 on=[Join_Condition.Between "x" "x_lows" "x_highs", Join_Condition.Between "y" "y_lows" "y_highs"]
+            assert (r.row_count == scenario.table1.row_count)
+
+        if extended_tests then group_builder.specify "Between_2D_Belts_V" <|
+            scenario = data.between2d_belts
+            t2 = scenario.table2.filter "is_vertical" Filter_Condition.Is_True
+            r = scenario.table1.join t2 on=[Join_Condition.Between "x" "x_lows" "x_highs", Join_Condition.Between "y" "y_lows" "y_highs"]
+            assert (r.row_count == scenario.table1.row_count)
+
+        if extended_tests then group_builder.specify "Between_2D_Belts_H" <|
+            scenario = data.between2d_belts
+            t2 = scenario.table2.filter "is_vertical" Filter_Condition.Is_False
+            r = scenario.table1.join t2 on=[Join_Condition.Between "x" "x_lows" "x_highs", Join_Condition.Between "y" "y_lows" "y_highs"]
+            assert (r.row_count == scenario.table1.row_count)
+
+        # TODO this should be part of the main tests, but it was causing issues on CI; re-enable this with #8217
+        if extended_tests then group_builder.specify "AntiJoin" <|
+            scenario = data.antijoin
+            r = scenario.table2.join scenario.table1 on="key" join_kind=Join_Kind.Left_Exclusive
+            assert (r.row_count == 1000)
+
+main = collect_benches . run_main
--- a/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso
+++ b/test/Table_Tests/src/Common_Table_Operations/Join/Join_Spec.enso
@ -103,6 +103,15 @@ spec setup =
            r = t3.join t4 join_kind=Join_Kind.Inner on=["X", "Y"] |> materialize |> _.order_by ["X", "Y", "Z", "Right Z"]
            check_xy_joined r

+        Test.specify "should correctly handle duplicated rows in Equals" <|
+            t1 = table_builder [["X", [1, 2, 2, 3]]]
+            t2 = table_builder [["X", [1, 2, 2, 4]]]
+            r1 = t1.join t2 join_kind=Join_Kind.Full on="X" . order_by "X"
+            within_table r1 <|
+                # Both 2's from t1 match with _both_ ones from t2 _each_, so in total we get 4 `2` pairs:
+                r1.at "X" . to_vector . should_equal [Nothing, 1, 2, 2, 2, 2, 3]
+                r1.at "Right X" . to_vector . should_equal [4, 1, 2, 2, 2, 2, Nothing]
+
        Test.specify "should allow to join on text equality ignoring case" <|
            t1 = table_builder [["X", ["a", "B"]], ["Y", [1, 2]]]
            t2 = table_builder [["X", ["A", "a", "b"]], ["Z", [1, 2, 3]]]
@ -170,7 +179,7 @@ spec setup =
            t2 = table_builder [["lower", [1, 10, 8, 12]], ["upper", [1, 12, 30, 0]], ["Z", [1, 2, 3, 4]]]

            r1 = t1.join join_kind=Join_Kind.Inner t2 on=(Join_Condition.Between "X" "lower" "upper") |> materialize |> _.order_by ["X", "Z"]
-            expect_column_names ["X", "Y", "lower", "upper", "Z"] r1
+            r1.column_names . should_equal ["X", "Y", "lower", "upper", "Z"]
            r1 . at "X" . to_vector . should_equal     [1, 10, 10, 12, 12]
            r1 . at "Y" . to_vector . should_equal     [1, 2,  2,  3,  3]
            r1 . at "lower" . to_vector . should_equal [1, 10, 8,  10, 8]
@ -182,13 +191,71 @@ spec setup =
            t2 = table_builder [["lower", ["a", "b"]], ["upper", ["a", "ccc"]], ["Z", [10, 20]]]

            r1 = t1.join t2 join_kind=Join_Kind.Inner on=(Join_Condition.Between "X" "lower" "upper") |> materialize |> _.order_by ["X", "Z"]
-            expect_column_names ["X", "Y", "lower", "upper", "Z"] r1
+            r1.column_names . should_equal ["X", "Y", "lower", "upper", "Z"]
            r1 . at "X" . to_vector . should_equal     ["a", "b",   "c"]
            r1 . at "Y" . to_vector . should_equal     [1,    2,     3]
            r1 . at "lower" . to_vector . should_equal ["a", "b",   "b"]
            r1 . at "upper" . to_vector . should_equal ["a", "ccc", "ccc"]
            r1 . at "Z" . to_vector . should_equal     [10,   20,    20]

+        Test.specify "should correctly handle Between edge cases (1)" pending=(if prefix.contains "PostgreSQL" then "TODO: fix issue #8243") <|
+            # 1. multiple rows with the same key value on the left side
+            # 2. fully duplicated rows (1, 7) on the left side
+            # 3. empty bounds (lower > upper: 10 > 0)
+            # 4. equal bounds (10 = 10)
+            # 5. unmatched rows on both sides - Full join
+            t1 = table_builder [["X", [1, 10, 20, 1, 2, 1, 1]], ["id", [1, 2, 3, 4, 5, 7, 7]]]
+            t2 = table_builder [["lower", [0, 10, 10]], ["upper", [3, 10, 0]], ["Z", ['a', 'b', 'c']]]
+            r1 = t1.join t2 join_kind=Join_Kind.Full on=(Join_Condition.Between "X" "lower" "upper") |> materialize |> _.order_by ["Z", "id"]
+            within_table r1 <|
+                r1.column_names . should_equal ["X", "id", "lower", "upper", "Z"]
+                rows = r1.rows.map .to_vector
+                rows.length . should_equal 8
+
+                rows.at 0 . should_equal [20, 3, Nothing, Nothing, Nothing]
+                rows.at 1 . should_equal [ 1, 1,  0,  3, 'a']
+                rows.at 2 . should_equal [ 1, 4,  0,  3, 'a']
+                rows.at 3 . should_equal [ 2, 5,  0,  3, 'a']
+                rows.at 4 . should_equal [ 1, 7,  0,  3, 'a']
+                rows.at 5 . should_equal [ 1, 7,  0,  3, 'a']
+                rows.at 6 . should_equal [10, 2, 10, 10, 'b']
+                rows.at 7 . should_equal [Nothing, Nothing, 10, 0, 'c']
+
+        Test.specify "should correctly handle Between edge cases (2)" <|
+            # 6. multiple Between conditions
+            xs = [0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4]
+            ys = [1, 2, 3, 1, 9, 2, 3, 2, 4, 2, 1, 1, 1, 2]
+            pts = xs.zip ys . take (Index_Sub_Range.Sample xs.length seed=42)
+            t1 = table_builder [["X", pts.map .first], ["Y", pts.map .second]]
+
+            t2 = table_builder [["lx", [1]], ["ux", [3]], ["ly", [1]], ["uy", [2]]]
+            r2 = t1.join t2 join_kind=Join_Kind.Inner on=[Join_Condition.Between "X" "lx" "ux", Join_Condition.Between "Y" "ly" "uy"] |> materialize |> _.order_by ["X", "Y"]
+            within_table r2 <|
+                r2.at "X" . to_vector . should_equal [1, 1, 2, 3, 3]
+                r2.at "Y" . to_vector . should_equal [1, 2, 2, 1, 2]
+
+            t3 = table_builder [["lx", [1.9]], ["ux", [3]], ["ly", [1]], ["uy", [2]]]
+            r3 = t1.join t3 join_kind=Join_Kind.Inner on=[Join_Condition.Between "X" "lx" "ux", Join_Condition.Between "Y" "ly" "uy"] |> materialize |> _.order_by ["X", "Y"]
+            within_table r3 <|
+                r3.at "X" . to_vector . should_equal [2, 3, 3]
+                r3.at "Y" . to_vector . should_equal [2, 1, 2]
+
+        Test.specify "should correctly handle Between edge cases (3)" <|
+            # 7. duplicated rows on both sides
+            t1 = table_builder [["X", [10, 20, 20]]]
+            t2 = table_builder [["low", [15, 15]], ["high", [30, 30]]]
+            r1 = t1.join t2 join_kind=Join_Kind.Right_Outer on=(Join_Condition.Between "X" "low" "high")
+            within_table r1 <|
+                r1.at "X" . to_vector . should_equal [20, 20, 20, 20]
+                r1.at "low" . to_vector . should_equal [15, 15, 15, 15]
+                r1.at "high" . to_vector . should_equal [30, 30, 30, 30]
+
+            # 8. keep only unmatched rows
+            r2 = t1.join t2 join_kind=Join_Kind.Left_Exclusive on=(Join_Condition.Between "X" "low" "high")
+            within_table r2 <|
+                r2.column_names . should_equal ["X"]
+                r2.at "X" . to_vector . should_equal [10]
+
        if setup.test_selection.supports_unicode_normalization then
            Test.specify "should allow range-based joins (using Between) for text with Unicode normalization" <|
                t1 = table_builder [["X", ['s\u0301', 's']], ["Y", [1, 2]]]
@ -368,16 +435,15 @@ spec setup =
            if setup.supports_custom_objects then
                t1 = table_builder [["X", [My_Type.Value 1 2, 2.0, 2]], ["Y", [10, 20, 30]]]
                t2 = table_builder [["Z", [2.0, 1.5, 2.0]], ["W", [1, 2, 3]]]
-                action3 = t1.join t2 join_kind=Join_Kind.Inner on=(Join_Condition.Equals "X" "Z") on_problems=_
-                tester3 table =
-                    expect_column_names ["X", "Y", "Z", "W"] table
-                    t1 = table.order_by ["Y", "W"]
-                    t1.at "X" . to_vector . should_equal [2.0, 2.0, 2, 2]
-                    t1.at "Y" . to_vector . should_equal [20, 20, 30, 30]
-                    t1.at "Z" . to_vector . should_equal [2.0, 2.0, 2.0, 2.0]
-                    t1.at "W" . to_vector . should_equal [1, 3, 1, 3]
-                problems3 = [Floating_Point_Equality.Error "Z", Floating_Point_Equality.Error "X"]
-                Problems.test_problem_handling action3 problems3 tester3
+                r3 = t1.join t2 join_kind=Join_Kind.Inner on=(Join_Condition.Equals "X" "Z") on_problems=Problem_Behavior.Report_Warning
+                r3.column_names.should_equal ["X", "Y", "Z", "W"]
+                r4 = r3.order_by ["Y", "W"]
+                r4.at "X" . to_vector . should_equal [2.0, 2.0, 2, 2]
+                r4.at "Y" . to_vector . should_equal [20, 20, 30, 30]
+                r4.at "Z" . to_vector . should_equal [2.0, 2.0, 2.0, 2.0]
+                r4.at "W" . to_vector . should_equal [1, 3, 1, 3]
+                expected_problems = [Floating_Point_Equality.Error "Z", Floating_Point_Equality.Error "X"]
+                Problems.get_attached_warnings r3 . should_contain_the_same_elements_as expected_problems

        Test.specify "should correctly handle nulls in equality conditions" pending=db_todo <|
            t1 = table_builder [["X", ["A", Nothing, "a", Nothing, "ą"]], ["Y", [0, 1, 2, 3, 4]]]
@ -650,6 +716,17 @@ spec setup =
                r3.at 3 . should_equal [2, 20, 2, Nothing, Nothing]
                r3.at 4 . should_equal [3, 30, 3, Nothing, Nothing]

+            t8 = table_builder [["X", [2, 99]], ["Y", [20, 99]], ["C", [5, 99]]]
+            t9 = t4_2.join t8 join_kind=Join_Kind.Full on=["X", "Y", "C"]
+            within_table t9 <|
+                t9.column_names . should_equal ["X", "Y", "C", "Right X", "Right Y", "Right C"]
+                r3 = materialize t9 . order_by ["X", "Right X"] . rows . map .to_vector
+                r3.length . should_equal 4
+                r3.at 0 . should_equal [Nothing, Nothing, Nothing, 99, 99, 99]
+                r3.at 1 . should_equal [1, 10, 3, Nothing, Nothing, Nothing]
+                r3.at 2 . should_equal [2, 20, 5, 2, 20, 5]
+                r3.at 3 . should_equal [3, 30, 7, Nothing, Nothing, Nothing]
+
        Test.specify "should gracefully handle tables from different backends" <|
            alternative_connection = Database.connect (SQLite In_Memory)
            t0 = (Table.new [["X", [1, 2, 4]], ["Z", [10, 20, 30]]]).select_into_database_table alternative_connection "T0" temporary=True
--- a/test/Table_Tests/src/Helpers/Main.enso
+++ b/test/Table_Tests/src/Helpers/Main.enso
@ -2,11 +2,13 @@ from Standard.Base import all

 from Standard.Test import Test_Suite

+import project.Helpers.Sorted_List_Index_Spec
 import project.Helpers.Unique_Naming_Strategy_Spec
 import project.Helpers.Value_Type_Spec

 spec =
    Unique_Naming_Strategy_Spec.spec
+    Sorted_List_Index_Spec.spec
    Value_Type_Spec.spec

 main = Test_Suite.run_main spec
--- a/test/Table_Tests/src/Helpers/Sorted_List_Index_Spec.enso
+++ b/test/Table_Tests/src/Helpers/Sorted_List_Index_Spec.enso
@ -0,0 +1,64 @@
+from Standard.Base import all
+
+# We need this import, to ensure that we depend on `Standard.Table`, so that the Java import of `org.enso.table` is valid.
+from Standard.Table import all
+
+from Standard.Test import Test, Test_Suite
+import Standard.Test.Extensions
+
+polyglot java import java.util.Comparator
+polyglot java import org.enso.table.data.table.join.between.SortedListIndex
+
+
+main = Test_Suite.run_main spec
+
+## White-box tests for the SortedListIndex, ensuring correctness of the
+   implementation - these are additional tests apart from
+   the `Join_Condition.Between` test cases, to ensure no off-by-one errors
+   or other bugs are present in the implementation.
+spec = Test.group "SortedListIndex (used for SortJoin)" <|
+    make_index vec = SortedListIndex.build vec Comparator.naturalOrder
+
+    v1 = [0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 10, 10, 10, 10, 11, 14, 17, 19]
+    v1_shuffled = v1.take (Index_Sub_Range.Sample v1.length)
+    index1 = make_index v1_shuffled
+
+    Test.specify "should correctly handle empty matches" <|
+        Vector.from_polyglot_array (index1.findSubRange 9 9) . should_equal []
+        Vector.from_polyglot_array (index1.findSubRange -10 -2) . should_equal []
+        Vector.from_polyglot_array (index1.findSubRange 200 300) . should_equal []
+        Vector.from_polyglot_array (index1.findSubRange 20 0) . should_equal []
+
+    Test.specify "should correctly handle single-element matches" <|
+        Vector.from_polyglot_array (index1.findSubRange 8 8) . should_equal [8]
+        Vector.from_polyglot_array (index1.findSubRange 12 16) . should_equal [14]
+        Vector.from_polyglot_array (index1.findSubRange 18 100) . should_equal [19]
+        Vector.from_polyglot_array (index1.findSubRange 19 100) . should_equal [19]
+        Vector.from_polyglot_array (index1.findSubRange 19 19) . should_equal [19]
+
+    Test.specify "should correctly handle matches" <|
+        Vector.from_polyglot_array (index1.findSubRange 4 6) . should_equal [4, 5, 6]
+        Vector.from_polyglot_array (index1.findSubRange 3 5) . should_equal [3, 3, 4, 5]
+
+        Vector.from_polyglot_array (index1.findSubRange 0 3) . should_equal [0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3]
+        Vector.from_polyglot_array (index1.findSubRange 2 4) . should_equal [2, 2, 2, 3, 3, 4]
+        Vector.from_polyglot_array (index1.findSubRange 8 10) . should_equal [8, 10, 10, 10, 10]
+        Vector.from_polyglot_array (index1.findSubRange 8 11) . should_equal [8, 10, 10, 10, 10, 11]
+        Vector.from_polyglot_array (index1.findSubRange 8 12) . should_equal [8, 10, 10, 10, 10, 11]
+        Vector.from_polyglot_array (index1.findSubRange 9 12) . should_equal [10, 10, 10, 10, 11]
+
+    Test.specify "should correctly handle big all-equal ranges" <|
+        Vector.from_polyglot_array (index1.findSubRange 1 1) . should_equal [1, 1, 1, 1]
+        Vector.from_polyglot_array (index1.findSubRange 7 7) . should_equal [7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7]
+
+    Test.specify "other cases: empty index" <|
+        index2 = make_index []
+        Vector.from_polyglot_array (index2.findSubRange 1 5) . should_equal []
+
+    Test.specify "other cases: single element index" <|
+        index2 = make_index [5]
+        Vector.from_polyglot_array (index2.findSubRange 1 5) . should_equal [5]
+        Vector.from_polyglot_array (index2.findSubRange 5 5) . should_equal [5]
+        Vector.from_polyglot_array (index2.findSubRange 1 2) . should_equal []
+        Vector.from_polyglot_array (index2.findSubRange 2 1) . should_equal []
+        Vector.from_polyglot_array (index2.findSubRange 10 10) . should_equal []
--- a/test/Table_Tests/src/In_Memory/Join_Performance_Spec.enso
+++ b/test/Table_Tests/src/In_Memory/Join_Performance_Spec.enso
@ -1,118 +0,0 @@
-from Standard.Base import all
-
-from Standard.Table import all
-
-from Standard.Test import Test, Test_Suite
-import Standard.Test.Extensions
-from project.Util import all
-
-spec =
-    Test.group "[In-Memory] Table.join performance" <|
-        n = 10000
-        Test.specify "should efficiently compute equality joins" <|
-            vec = 0.up_to n . to_vector
-            vec2 = 1.up_to n+1 . to_vector
-            t1 = Table.new [["X", vec], ["Y", 0.up_to n . map (_ % 2)]]
-            t2 = Table.new [["B", [0, 1]]]
-            t3 = Table.new [["X", vec.reverse], ["Z", vec2]]
-
-            r1 = Duration.time_execution <|
-                t1.join t2 on=(Join_Condition.Equals "Y" "B")
-
-            r2 = Duration.time_execution <|
-                t1.join t3 on="X"
-            t4 = r2.second . order_by ["X"]
-            t4.at "X" . to_vector . should_equal <| vec
-            t4.at "Z" . to_vector . should_equal <| vec2.reverse
-
-            base_ms = r1.first.total_milliseconds
-            expected_max_time_ms = base_ms * 5 + 100
-            runtime_ms = r2.first.total_milliseconds
-            if runtime_ms > expected_max_time_ms then
-                Test.fail "Expected a join of "+n.to_text+"x"+n.to_text+" with linear result size to be efficient, but it took "+runtime_ms.to_text+"ms while a join of 2x"+n.to_text+" with the same result size took "+base_ms.to_text+"ms. The maximum time threshold for this operation to be deemed efficient has been estimated at "+expected_max_time_ms.to_text+"ms."
-
-        Test.specify "should efficiently compute equality joins mixed with other secondary conditions" <|
-            vec = 0.up_to n . to_vector
-            vec2 = 1.up_to n+1 . to_vector
-            t1 = Table.new [["X", vec], ["Y", 0.up_to n . map (_ % 2)], ["A", Vector.fill n "a"], ["B", Vector.fill n 9]]
-            t2 = Table.new [["B", [0, 1]], ["A", ["A", "A"]], ["l", [0, 0]], ["u", [20, 20]]]
-            t3 = Table.new [["X", vec.reverse], ["Z", vec2], ["A", Vector.fill n "a"], ["l", Vector.fill n 0], ["u", Vector.fill n 20]]
-
-            secondary_conditions = [Join_Condition.Equals_Ignore_Case "A", Join_Condition.Between "B" "l" "u"]
-
-            r1 = Duration.time_execution <|
-                t1.join t2 on=secondary_conditions+[Join_Condition.Equals "Y" "B"]
-
-            r2 = Duration.time_execution <|
-                t1.join t3 on=secondary_conditions+[Join_Condition.Equals "X" "X"]
-            t4 = r2.second . order_by ["X"]
-            t4.at "X" . to_vector . should_equal <| vec
-            t4.at "Z" . to_vector . should_equal <| vec2.reverse
-
-            base_ms = r1.first.total_milliseconds
-            expected_max_time_ms = base_ms * 5 + 100
-            runtime_ms = r2.first.total_milliseconds
-            if runtime_ms > expected_max_time_ms then
-                Test.fail "Expected a join of "+n.to_text+"x"+n.to_text+" with linear result size to be efficient, but it took "+runtime_ms.to_text+"ms while a join of 2x"+n.to_text+" with the same result size took "+base_ms.to_text+"ms. The maximum time threshold for this operation to be deemed efficient has been estimated at "+expected_max_time_ms.to_text+"ms."
-
-        Test.specify "should efficiently compute case-insensitive equality joins" <|
-            unique_text_for_number prefix i =
-                suffix = Text.from_utf_8 [97 + i%20]
-                prefix + i.to_text + "-" + suffix
-            lowers = 0.up_to n . map (unique_text_for_number "a")
-            uppers = 0.up_to n . map (unique_text_for_number "A")
-            t1 = Table.new [["X", lowers], ["Y", 0.up_to n . map i-> if i%2 == 0 then "a" else "b"], ["A", Vector.fill n 44], ["B", Vector.fill n 9], ["N", 0.up_to n . to_vector]]
-            t2 = Table.new [["B", ["A", "B", "a"]], ["A", [44, 44, 44]], ["l", [0, 0, 0]], ["u", [20, 20, 20]]]
-            t3 = Table.new [["X", uppers.reverse], ["Z", 1.up_to n+1 . to_vector], ["A", Vector.fill n 44], ["l", Vector.fill n 0], ["u", Vector.fill n 20]]
-
-            secondary_conditions = [Join_Condition.Equals "A", Join_Condition.Between "B" "l" "u"]
-
-            r1 = Duration.time_execution <|
-                t1.join t2 on=[Join_Condition.Equals_Ignore_Case "Y" "B"]+secondary_conditions
-            r1.second.row_count . should_equal (n + n/2)
-
-            r2 = Duration.time_execution <|
-                t1.join t3 on=[Join_Condition.Equals_Ignore_Case "X" "X"]+secondary_conditions
-            t4 = r2.second . order_by "N"
-            t4.row_count . should_equal n
-            t4.at "X" . to_vector . should_equal lowers
-            t4.at "Right X" . to_vector . should_equal uppers
-            t4.at "Z" . to_vector . should_equal <| 1.up_to n+1 . to_vector . reverse
-
-            base_ms = r1.first.total_milliseconds
-            expected_max_time_ms = base_ms * 5 + 100
-            runtime_ms = r2.first.total_milliseconds
-            if runtime_ms > expected_max_time_ms then
-                Test.fail "Expected a join of "+n.to_text+"x"+n.to_text+" with linear result size to be efficient, but it took "+runtime_ms.to_text+"ms while a join of 3x"+n.to_text+" with the same result size took "+base_ms.to_text+"ms. The maximum time threshold for this operation to be deemed efficient has been estimated at "+expected_max_time_ms.to_text+"ms."
-
-        Test.specify "should efficiently compute Between joins" pending="TODO in task https://www.pivotaltracker.com/story/show/183913337" <|
-            xs = 0.up_to n . map x-> x * 20
-            ls = 0.up_to n . map x-> x * 20 - 20
-            us = 0.up_to n . map x-> x * 20 + 5
-            t1 = Table.new [["X", xs], ["A", Vector.fill n "a"], ["B", Vector.fill n 44]]
-            # We set up the ranges so that each entry of `t1` will match 2, apart from the first entry matched only once.
-            t2 = Table.new [["l", [0, 10]], ["u", [20 * n, 20 * n + 100]], ["A", ["a", "A"]], ["B", [44, 44]]]
-            # Here also, each range from `t3` will match 2 entries of `t1`, apart from the first one.
-            t3 = Table.new [["l", ls], ["u", us], ["A", Vector.fill n "A"], ["B", Vector.fill n 44]]
-
-            conditions = [Join_Condition.Equals_Ignore_Case "A", Join_Condition.Between "X" "l" "u", Join_Condition.Equals "B"]
-
-            r1 = Duration.time_execution <|
-                t1.join t2 on=conditions
-            r1.second.row_count . should_equal (2*n - 1)
-
-            r2 = Duration.time_execution <|
-                t1.join t3 on=conditions
-            t4 = r2.second . order_by ["X", "l"]
-            t4.row_count . should_equal (2*n - 1)
-
-            t4.at "X" . to_vector . should_equal ((xs.flat_map x-> [x, x]) . drop (Last 1))
-            t4.at "l" . to_vector . should_equal (ls.zip (ls.drop 1) . flatten)+[ls.last]
-
-            base_ms = r1.first.total_milliseconds
-            expected_max_time_ms = base_ms * 5 + 100
-            runtime_ms = r2.first.total_milliseconds
-            if runtime_ms > expected_max_time_ms then
-                Test.fail "Expected a join of "+n.to_text+"x"+n.to_text+" with linear result size to be efficient, but it took "+runtime_ms.to_text+"ms while a join of 2x"+n.to_text+" with the same result size took "+base_ms.to_text+"ms. The maximum time threshold for this operation to be deemed efficient has been estimated at "+expected_max_time_ms.to_text+"ms."
-
-main = Test_Suite.run_main spec
--- a/test/Table_Tests/src/In_Memory/Main.enso
+++ b/test/Table_Tests/src/In_Memory/Main.enso
@ -8,7 +8,6 @@ import project.In_Memory.Column_Spec
 import project.In_Memory.Column_Format_Spec
 import project.In_Memory.Common_Spec
 import project.In_Memory.Integer_Overflow_Spec
-import project.In_Memory.Join_Performance_Spec
 import project.In_Memory.Lossy_Conversions_Spec
 import project.In_Memory.Parse_To_Table_Spec
 import project.In_Memory.Split_Tokenize_Spec
@ -29,7 +28,6 @@ spec =
    Table_Time_Of_Day_Spec.spec
    Aggregate_Column_Spec.spec
    Builders_Spec.spec
-    Join_Performance_Spec.spec
    Split_Tokenize_Spec.spec
    Parse_To_Table_Spec.spec

--- a/test/Table_Tests/src/In_Memory/Table_Spec.enso
+++ b/test/Table_Tests/src/In_Memory/Table_Spec.enso
@ -871,42 +871,6 @@ spec =
                            t2.filter "Y" (Filter_Condition.Is_In in_vector) . at "Y" . to_vector . should_equal expected_neg_vector
                            t2.filter "Y" (Filter_Condition.Is_In in_column) . at "Y" . to_vector . should_equal expected_neg_vector

-        Test.specify "should perform `Is_In` efficiently for builtin types" <|
-            first_day = Date_Time.new 2000 1 1
-            make_date x = first_day + (Duration.new seconds=x)
-            init = Duration.time_execution <|
-                t = Table.new [["X", (200.up_to 10000 . to_vector)]]
-                vec = 4000.up_to 13000 . to_vector
-                expected_vector = 4000.up_to 10000 . to_vector
-                expected_vector_2 = 200.up_to 10000 . with_step 2 . to_vector
-                dates_vec = vec.map make_date
-                bool_vec = Vector.fill 7000 True
-                date_col = t.at "X" . map make_date
-                [t, vec, expected_vector, expected_vector_2, dates_vec, bool_vec, date_col]
-            t = init.second . at 0
-            vec = init.second . at 1
-            expected_vector = init.second . at 2
-            expected_vector_2 = init.second . at 3
-            dates_vec = init.second . at 4
-            bool_vec = init.second . at 5
-            date_col = init.second . at 6
-
-            expected_max_time_ms = init.first.total_milliseconds * 2
-            check_timing name ~action =
-                res = Duration.time_execution action
-                runtime_ms = res.first.total_milliseconds
-                if runtime_ms > expected_max_time_ms then
-                    Test.fail "Expected `Is_In` on "+name+" to be efficient, but it took "+runtime_ms.to_text+"ms while initialization itself took just "+expected_max_time_ms.to_text+"ms."
-
-            check_timing "integers" <|
-                t.filter "X" (Filter_Condition.Is_In vec) . at "X" . to_vector . should_equal expected_vector
-
-            check_timing "booleans" <|
-                t.filter (t.at "X" % 2 == 0) (Filter_Condition.Is_In bool_vec) . at "X" . to_vector . should_equal expected_vector_2
-
-            check_timing "dates" <|
-                t.filter date_col (Filter_Condition.Is_In dates_vec) . at "X" . to_vector . should_equal expected_vector
-
    Test.group "[In-Memory-specific] Table.join" <|
        Test.specify "should correctly report unsupported cross-backend joins" <|
            t = Table.new [["X", [1, 2, 3]]]