Implement Distinct for the Database backends (#4027)

Implements https://www.pivotaltracker.com/story/show/182307281
This commit is contained in:
Radosław Waśko 2023-01-11 23:46:54 +01:00 committed by GitHub
parent fe1cf9a9ce
commit 0088096a58
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 414 additions and 98 deletions

View File

@ -270,6 +270,7 @@
- [Overhauled the JSON support (now based of JavaScript), `Data.fetch` and other
minor tweaks][3987]
- [Enable Date, Time and DateTime to be read and written to Excel.][3997]
- [Implemented `Table.distinct` for Database backends.][4027]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -423,6 +424,7 @@
[3987]: https://github.com/enso-org/enso/pull/3987
[3997]: https://github.com/enso-org/enso/pull/3997
[4013]: https://github.com/enso-org/enso/pull/4013
[4027]: https://github.com/enso-org/enso/pull/4027
#### Enso Compiler

View File

@ -126,7 +126,7 @@ type Connection
Database_Table_Module.make_table self name columns ctx
SQL_Query.Table_Name name ->
ctx = Context.for_table name (if alias == "" then name else alias)
columns = self.jdbc_connection.fetch_columns (self.dialect.generate_sql (Query.Select_All ctx))
columns = self.jdbc_connection.fetch_columns (self.dialect.generate_sql (Query.Select Nothing ctx))
Database_Table_Module.make_table self name columns ctx
## Execute the query and load the results into memory as a Table.

View File

@ -2,6 +2,7 @@ from Standard.Base import all
import Standard.Base.Error.Unimplemented.Unimplemented
from Standard.Table import Aggregate_Column, Join_Kind
import Standard.Table.Internal.Problem_Builder.Problem_Builder
import project.Connection.Connection.Connection
import project.Data.SQL_Statement.SQL_Statement
@ -56,6 +57,12 @@ type Dialect
prepare_join self =
Unimplemented.throw "This is an interface only."
## PRIVATE
Prepares a distinct operation.
prepare_distinct : Table -> Vector -> Boolean -> Problem_Builder -> Table
prepare_distinct self =
Unimplemented.throw "This is an interface only."
## PRIVATE
The dialect of SQLite databases.

View File

@ -23,7 +23,7 @@ import Standard.Table.Internal.Aggregate_Column_Helper
from Standard.Table.Data.Column import get_item_string
from Standard.Table.Data.Table import print_table
from Standard.Table.Internal.Filter_Condition_Helpers import make_filter_column
from Standard.Table.Errors import Column_Count_Mismatch, No_Index_Set_Error, No_Such_Column
from Standard.Table.Errors import Column_Count_Mismatch, No_Index_Set_Error, No_Such_Column, No_Input_Columns_Selected, No_Output_Columns
import project.Data.Column.Column
import project.Data.SQL_Statement.SQL_Statement
@ -624,7 +624,9 @@ type Table
input table.
When multiple rows have the same values within the specified columns, the
first row of each such set is returned.
first row of each such set is returned if possible, but in database
backends any row from each set may be returned (for example if the row
ordering is unspecified).
For the in-memory table, the unique rows will be in the order they
occurred in the input (this is not guaranteed for database operations).
@ -649,8 +651,19 @@ type Table
`Floating_Point_Grouping` warning.
distinct : Vector Text | Column_Selector -> Case_Sensitivity -> Problem_Behavior -> Table
distinct self (columns = Column_Selector.By_Name (self.columns.map .name)) case_sensitivity=Case_Sensitivity.Sensitive on_problems=Report_Warning =
_ = [columns, case_sensitivity, on_problems]
Error.throw (Unsupported_Database_Operation.Error "`Table.distinct` is not yet implemented for the database backend.")
problem_builder = Problem_Builder.new
warning_mapper error = case error of
No_Output_Columns -> Maybe.Some No_Input_Columns_Selected
_ -> Nothing
key_columns = Warning.map_warnings_and_errors warning_mapper <|
self.columns_helper.select_columns selector=columns reorder=True on_problems=on_problems
text_case_insensitive = case case_sensitivity of
Case_Sensitivity.Sensitive -> False
Case_Sensitivity.Insensitive locale ->
Helpers.assume_default_locale locale <|
True
new_table = self.connection.dialect.prepare_distinct self key_columns text_case_insensitive problem_builder
problem_builder.attach_problems_before on_problems new_table
## Joins two tables according to the specified join conditions.
@ -800,7 +813,7 @@ type Table
new_columns = partitioned.first
problems = partitioned.second
on_problems.attach_problems_before problems <|
self.updated_context_and_columns new_ctx new_columns
self.updated_context_and_columns new_ctx new_columns subquery=True
## Returns a new table with a chosen subset of columns left unchanged and
the other columns pivoted to rows with a single name field and a single
@ -908,7 +921,7 @@ type Table
computing too much we do not pass all the columns but only the first
one.
setup = self.context.as_subquery self.name [[self.internal_columns.first]]
new_ctx = Context.for_subquery setup.first
new_ctx = Context.for_subquery setup.subquery
query = Query.Select [[column_name, expr]] new_ctx
sql = self.connection.dialect.generate_sql query
table = self.connection.read_statement sql
@ -957,8 +970,8 @@ type Table
Naively wrapping each column in a `COUNT(...)` will not
always work as aggregates cannot be nested.
setup = self.context.as_subquery self.name [self.internal_columns]
new_ctx = Context.for_subquery setup.first
new_columns = setup.second.first.map column->
new_ctx = Context.for_subquery setup.subquery
new_columns = setup.new_columns.first.map column->
[column.name, SQL_Expression.Operation "COUNT" [column.expression]]
query = Query.Select new_columns new_ctx
self.connection.dialect.generate_sql query
@ -1007,8 +1020,24 @@ type Table
Arguments:
- ctx: The new context for this table.
- internal_columns: The new columns to include in the table.
- subquery: A boolean indicating whether the operation should be wrapped
in a subquery. This is a simple workaround for operations which may be
affected by further operations if not wrapped. For example, a group-by
may need to be wrapped in this way if a filter is to be performed on it
later on. Ideally, this should be done only on demand, if the
subsequent operation needs it and operations like join should try to
avoid nesting subqueries without necessity. However, for now, for
simplicity, we are always wrapping brittle operations. This may be
revised in the future, to generate better and more concise SQL code.
updated_context_and_columns : Context -> Vector Internal_Column -> Table
updated_context_and_columns self ctx internal_columns = Table.Value self.name self.connection internal_columns ctx
updated_context_and_columns self ctx internal_columns subquery=False = case subquery of
True ->
setup = ctx.as_subquery self.name [internal_columns]
new_ctx = Context.for_subquery setup.subquery
new_columns = setup.new_columns.first
Table.Value self.name self.connection new_columns new_ctx
False ->
Table.Value self.name self.connection internal_columns ctx
## PRIVATE

View File

@ -178,7 +178,7 @@ base_dialect =
functions = [["COALESCE", make_function "COALESCE"], ["ROW_MIN", make_function "MIN"], ["ROW_MAX", make_function "MAX"]]
agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]]
text = [is_empty, bin "LIKE", simple_equals_ignore_case]
text = [is_empty, bin "LIKE", simple_equals_ignore_case, fold_case]
nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
contains = [["IS_IN", make_is_in], ["IS_IN_COLUMN", make_is_in_column]]
base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains)
@ -293,6 +293,11 @@ generate_from_part dialect from_spec = case from_spec of
sub = generate_query dialect (Query.Select columns context)
sub.paren ++ alias dialect as_name
## PRIVATE
fold_case = lift_unary_op "FOLD_CASE" arg->
code "LOWER(UPPER(" ++ arg ++ "))"
## PRIVATE
simple_equals_ignore_case = Base_Generator.lift_binary_op "equals_ignore_case" a-> b->
code "LOWER(UPPER(" ++ a ++ ")) = LOWER(UPPER(" ++ b ++ "))"
@ -377,10 +382,16 @@ generate_query : Internal_Dialect -> Query -> Builder
generate_query dialect query = case query of
Query.Select columns ctx ->
gen_column pair = (generate_expression dialect pair.second) ++ alias dialect pair.first
cols = SQL.join ", " (columns.map gen_column)
code "SELECT " ++ cols ++ generate_select_context dialect ctx
Query.Select_All ctx ->
code "SELECT * " ++ generate_select_context dialect ctx
cols = case columns of
Nothing -> code "*"
_ -> SQL.join ", " (columns.map gen_column)
prefix = case ctx.distinct_on of
Nothing -> code ""
expressions : Vector ->
# TODO I just realised this does not make sense in other backends than Postgres, so we should probably fail in such cases; probably rewrite into a generic modifier? or a transform?
generated = SQL.join ", " (expressions.map (generate_expression dialect))
code "DISTINCT ON (" ++ generated ++ ") "
code "SELECT " ++ prefix ++ cols ++ generate_select_context dialect ctx
Query.Insert table_name pairs ->
generate_insert_query dialect table_name pairs
_ -> Error.throw <| Unsupported_Database_Operation.Error "Unsupported query type."

View File

@ -0,0 +1,17 @@
from Standard.Base import all
from Standard.Table.Errors import Floating_Point_Grouping
import project.Internal.IR.SQL_Expression.SQL_Expression
## PRIVATE
make_distinct_expression text_case_insensitive problem_builder key_column =
if key_column.sql_type.is_definitely_double then
problem_builder.report_other_warning (Floating_Point_Grouping.Error key_column.name)
expr = key_column.expression
needs_case_fold = text_case_insensitive && key_column.sql_type.is_definitely_text
case needs_case_fold of
True -> SQL_Expression.Operation "FOLD_CASE" [expr]
False -> expr

View File

@ -105,16 +105,14 @@ prepare_subqueries left right needs_left_indicator needs_right_indicator =
# TODO [RW] Not all of these included columns are actually usable from the external context, so
# in the future we may consider pruning some of them as additional optimization and simplification of the query
# (the only columns that are needed are ones that the later performed join resolution needs).
left_config = left.context.as_subquery left_alias [left.internal_columns, left_indicators]
right_config = right.context.as_subquery right_alias [right.internal_columns, right_indicators]
left_sub = left.context.as_subquery left_alias [left.internal_columns, left_indicators]
right_sub = right.context.as_subquery right_alias [right.internal_columns, right_indicators]
left_subquery = left_config.first
new_left_columns = left_config.second.at 0
new_left_indicators = left_config.second.at 1
right_subquery = right_config.first
new_right_columns = right_config.second.at 0
new_right_indicators = right_config.second.at 1
new_left_columns = left_sub.new_columns.first
new_left_indicators = left_sub.new_columns.second
new_right_columns = right_sub.new_columns.first
new_right_indicators = right_sub.new_columns.second
left_setup = Join_Subquery_Setup.Value left_subquery new_left_columns left.internal_columns (new_left_indicators.get 0)
right_setup = Join_Subquery_Setup.Value right_subquery new_right_columns right.internal_columns (new_right_indicators.get 0)
left_setup = Join_Subquery_Setup.Value left_sub.subquery new_left_columns left.internal_columns (new_left_indicators.get 0)
right_setup = Join_Subquery_Setup.Value right_sub.subquery new_right_columns right.internal_columns (new_right_indicators.get 0)
Pair.new left_setup right_setup

View File

@ -19,7 +19,7 @@ type Context
- alias: An alias name to use for table within the query.
for_table : Text -> Text -> Context
for_table table_name alias=table_name =
Context.Value (From_Spec.Table table_name alias) [] [] [] Nothing
Context.Value (From_Spec.Table table_name alias) [] [] [] Nothing Nothing
## PRIVATE
@ -30,7 +30,7 @@ type Context
- alias: An alias name to use for table within the query.
for_query : Text -> Text -> Context
for_query raw_sql alias =
Context.Value (From_Spec.Query raw_sql alias) [] [] [] Nothing
Context.Value (From_Spec.Query raw_sql alias) [] [] [] Nothing Nothing
## PRIVATE
@ -40,7 +40,7 @@ type Context
- subquery: The subquery to lift into a context.
for_subquery : From_Spec -> Context
for_subquery subquery =
Context.Value subquery [] [] [] Nothing
Context.Value subquery [] [] [] Nothing Nothing
## PRIVATE
@ -63,7 +63,7 @@ type Context
grouped-by columns or aggregate expressions.
- limit: an optional maximum number of elements that the equery should
return.
Value (from_spec : From_Spec) (where_filters : Vector SQL_Expression) (orders : Vector Order_Descriptor) (groups : Vector SQL_Expression) (limit : Nothing | Integer)
Value (from_spec : From_Spec) (where_filters : Vector SQL_Expression) (orders : Vector Order_Descriptor) (groups : Vector SQL_Expression) (limit : Nothing | Integer) (distinct_on : Nothing | Vector SQL_Expression)
## PRIVATE
@ -73,7 +73,7 @@ type Context
- new_filters: The new filters to set in the query.
set_where_filters : Vector SQL_Expression -> Context
set_where_filters self new_filters =
Context.Value self.from_spec new_filters self.orders self.groups self.limit
Context.Value self.from_spec new_filters self.orders self.groups self.limit self.distinct_on
## PRIVATE
@ -83,7 +83,7 @@ type Context
- new_orders: The new ordering clauses to set in the query.
set_orders : Vector Order_Descriptor -> Context
set_orders self new_orders =
Context.Value self.from_spec self.where_filters new_orders self.groups self.limit
Context.Value self.from_spec self.where_filters new_orders self.groups self.limit self.distinct_on
## PRIVATE
@ -100,7 +100,7 @@ type Context
- new_orders: The new ordering clauses to add to the query.
add_orders : Vector Order_Descriptor -> Context
add_orders self new_orders =
Context.Value self.from_spec self.where_filters new_orders+self.orders self.groups self.limit
Context.Value self.from_spec self.where_filters new_orders+self.orders self.groups self.limit self.distinct_on
## PRIVATE
@ -110,7 +110,7 @@ type Context
- new_groups: The new grouping clauses to set in the query.
set_groups : Vector SQL_Expression -> Context
set_groups self new_groups =
Context.Value self.from_spec self.where_filters self.orders new_groups self.limit
Context.Value self.from_spec self.where_filters self.orders new_groups self.limit self.distinct_on
## PRIVATE
@ -120,7 +120,14 @@ type Context
- new_limit: The new limit clauses to set in the query.
set_limit : (Nothing | Integer) -> Context
set_limit self new_limit =
Context.Value self.from_spec self.where_filters self.orders self.groups new_limit
Context.Value self.from_spec self.where_filters self.orders self.groups new_limit self.distinct_on
## PRIVATE
Returns a copy of the context with changed `distinct_on` expressions.
set_distinct_on : (Nothing | Vector SQL_Expression) -> Context
set_distinct_on self new_distinct_on =
Context.Value self.from_spec self.where_filters self.orders self.groups self.limit new_distinct_on
## PRIVATE
@ -136,8 +143,7 @@ type Context
to one from the original list but it is valid in the new context.
This is useful as a preprocessing step between combining queries, for example in a join.
# as_subquery : Text -> Vector (Vector Internal_Column) -> [From_Spec.Sub_Query, Vector (Vector Internal_Column)]
as_subquery : Text -> Vector Any -> Vector
as_subquery : Text -> Vector (Vector Internal_Column) -> Subquery_Setup
as_subquery self alias column_lists =
rewrite_internal_column : Internal_Column -> Internal_Column
rewrite_internal_column column =
@ -150,4 +156,7 @@ type Context
columns.map column-> [column.name, column.expression]
new_from = From_Spec.Sub_Query encapsulated_columns self alias
[new_from, new_columns]
Subquery_Setup.Value new_from new_columns
type Subquery_Setup
Value (subquery : From_Spec) (new_columns : Vector (Vector Internal_Column))

View File

@ -15,17 +15,10 @@ type Query
Arguments:
- expressions: List of pairs specifying the columns to materialize; each
is a pair whose first element is the name of the materialized column
and the second element is the expression to compute.
and the second element is the expression to compute. If `Nothing` is
provided, all available columns will be selected.
- context: The query context, see `Context` for more detail.
Select (expressions : Vector (Pair Text SQL_Expression)) (context : Context)
## PRIVATE
A Select SQL query that gets all columns in a table.
Arguments:
- context: The query context, see `Context` for more detail.
Select_All context
Select (expressions : Nothing | Vector (Pair Text SQL_Expression)) (context : Context)
## PRIVATE

View File

@ -1,8 +1,10 @@
from Standard.Base import all hiding First, Last
import Standard.Base.Error.Illegal_Argument.Illegal_Argument
import Standard.Base.Error.Illegal_State.Illegal_State
import Standard.Base.Error.Unimplemented.Unimplemented
import Standard.Table.Data.Aggregate_Column.Aggregate_Column
import Standard.Table.Internal.Problem_Builder.Problem_Builder
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all
import project.Connection.Connection.Connection
@ -11,7 +13,9 @@ import project.Data.SQL_Statement.SQL_Statement
import project.Data.SQL_Type.SQL_Type
import project.Data.Table.Table
import project.Internal.Base_Generator
import project.Internal.Common.Database_Distinct_Helper
import project.Internal.Common.Database_Join_Helper
import project.Internal.IR.Context.Context
import project.Internal.IR.From_Spec.From_Spec
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.Internal_Column.Internal_Column
@ -75,6 +79,18 @@ type Postgres_Dialect
prepare_join self connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select =
Database_Join_Helper.default_prepare_join connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select
## PRIVATE
Prepares a distinct operation.
prepare_distinct : Table -> Vector -> Boolean -> Problem_Builder -> Table
prepare_distinct self table key_columns text_case_insensitive problem_builder =
setup = table.context.as_subquery table.name+"_inner" [table.internal_columns]
new_columns = setup.new_columns.first
column_mapping = Map.from_vector <| new_columns.map c-> [c.name, c]
new_key_columns = key_columns.map c-> column_mapping.at c.name
distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression text_case_insensitive problem_builder)
new_context = Context.for_subquery setup.subquery . set_distinct_on distinct_expressions
table.updated_context_and_columns new_context new_columns subquery=True
## PRIVATE
make_internal_generator_dialect =
cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]]

View File

@ -3,6 +3,7 @@ import Standard.Base.Error.Illegal_Argument.Illegal_Argument
import Standard.Base.Error.Illegal_State.Illegal_State
import Standard.Table.Data.Aggregate_Column.Aggregate_Column
import Standard.Table.Internal.Problem_Builder.Problem_Builder
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all
import project.Connection.Connection.Connection
@ -11,11 +12,13 @@ import project.Data.SQL_Statement.SQL_Statement
import project.Data.SQL_Type.SQL_Type
import project.Data.Table.Table
import project.Internal.Base_Generator
import project.Internal.IR.Context.Context
import project.Internal.IR.From_Spec.From_Spec
import project.Internal.IR.Internal_Column.Internal_Column
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
import project.Internal.IR.Order_Descriptor.Order_Descriptor
import project.Internal.IR.Query.Query
import project.Internal.Common.Database_Distinct_Helper
import project.Internal.Common.Database_Join_Helper
from project.Data.SQL import code
@ -95,6 +98,18 @@ type SQLite_Dialect
# Other kinds of joins just fall back to the default logic.
Database_Join_Helper.default_prepare_join connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select
## PRIVATE
Prepares a distinct operation.
prepare_distinct : Table -> Vector -> Boolean -> Problem_Builder -> Table
prepare_distinct self table key_columns text_case_insensitive problem_builder =
setup = table.context.as_subquery table.name+"_inner" [table.internal_columns]
new_columns = setup.new_columns.first
column_mapping = Map.from_vector <| new_columns.map c-> [c.name, c]
new_key_columns = key_columns.map c-> column_mapping.at c.name
distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression text_case_insensitive problem_builder)
new_context = Context.for_subquery setup.subquery . set_groups distinct_expressions
table.updated_context_and_columns new_context new_columns subquery=True
## PRIVATE
make_internal_generator_dialect =
text = [starts_with, contains, ends_with]+concat_ops

View File

@ -620,7 +620,9 @@ type Table
input table.
When multiple rows have the same values within the specified columns, the
first row of each such set is returned.
first row of each such set is returned if possible, but in database
backends any row from each set may be returned (for example if the row
ordering is unspecified).
For the in-memory table, the unique rows will be in the order they
occurred in the input (this is not guaranteed for database operations).

View File

@ -3,7 +3,7 @@ import Standard.Base.Error.Illegal_Argument.Illegal_Argument
from project.Errors import Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Row, Mismatched_Quote, Additional_Invalid_Rows, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter, Additional_Warnings
polyglot java import org.enso.table.data.table.problems.AggregatedProblems
polyglot java import org.enso.table.problems.AggregatedProblems
polyglot java import org.enso.table.data.table.problems.FloatingPointGrouping
polyglot java import org.enso.table.data.table.problems.InvalidAggregation
polyglot java import org.enso.table.data.table.problems.UnquotedDelimiter

View File

@ -1,6 +1,6 @@
package org.enso.table.aggregations;
import org.enso.table.data.table.problems.AggregatedProblems;
import org.enso.table.problems.AggregatedProblems;
import org.enso.table.problems.Problem;
import java.util.Arrays;

View File

@ -6,7 +6,7 @@ import org.enso.table.data.column.builder.object.*;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
import org.enso.table.data.table.problems.AggregatedProblems;
import org.enso.table.problems.AggregatedProblems;
import org.enso.table.data.table.problems.FloatingPointGrouping;
import org.enso.table.util.ConstantList;

View File

@ -16,7 +16,7 @@ import org.enso.table.data.table.join.IndexJoin;
import org.enso.table.data.table.join.JoinCondition;
import org.enso.table.data.table.join.JoinResult;
import org.enso.table.data.table.join.JoinStrategy;
import org.enso.table.data.table.problems.AggregatedProblems;
import org.enso.table.problems.AggregatedProblems;
import org.enso.table.error.UnexpectedColumnTypeException;
import org.enso.table.operations.Distinct;
import org.enso.table.util.NameDeduplicator;

View File

@ -12,7 +12,7 @@ import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
import org.enso.table.data.table.join.scan.Matcher;
import org.enso.table.data.table.join.scan.MatcherFactory;
import org.enso.table.data.table.problems.AggregatedProblems;
import org.enso.table.problems.AggregatedProblems;
import org.graalvm.collections.Pair;
public class IndexJoin implements JoinStrategy {

View File

@ -1,6 +1,6 @@
package org.enso.table.data.table.join;
import org.enso.table.data.table.problems.AggregatedProblems;
import org.enso.table.problems.AggregatedProblems;
import org.graalvm.collections.Pair;
import java.util.List;

View File

@ -1,6 +1,6 @@
package org.enso.table.data.table.join.scan;
import org.enso.table.data.table.problems.AggregatedProblems;
import org.enso.table.problems.AggregatedProblems;
public interface Matcher {
boolean matches(int left, int right);

View File

@ -13,7 +13,7 @@ import org.enso.table.data.table.join.Between;
import org.enso.table.data.table.join.Equals;
import org.enso.table.data.table.join.EqualsIgnoreCase;
import org.enso.table.data.table.join.JoinCondition;
import org.enso.table.data.table.problems.AggregatedProblems;
import org.enso.table.problems.AggregatedProblems;
import org.enso.table.data.table.problems.FloatingPointGrouping;
public class MatcherFactory {

View File

@ -1,16 +1,19 @@
package org.enso.table.operations;
import java.util.*;
import org.enso.base.text.TextFoldingStrategy;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.index.MultiValueKeyBase;
import org.enso.table.data.index.UnorderedMultiValueKey;
import org.enso.table.data.table.Column;
import org.enso.table.data.table.problems.AggregatedProblems;
import org.enso.table.data.table.problems.FloatingPointGrouping;
import org.enso.table.problems.AggregatedProblems;
import org.enso.table.util.ConstantList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashSet;
import java.util.List;
public class Distinct {
/** Creates a row mask containing only the first row from sets of rows grouped by key columns. */
public static BitSet buildDistinctRowsMask(
@ -29,7 +32,11 @@ public class Distinct {
UnorderedMultiValueKey key = new UnorderedMultiValueKey(storage, i, strategies);
if (key.hasFloatValues()) {
problems.add(new FloatingPointGrouping("Distinct", i));
final int row = i;
key.floatColumnPositions()
.forEach(
columnIx ->
problems.add(new FloatingPointGrouping(keyColumns[columnIx].getName(), row)));
}
if (!visitedRows.contains(key)) {

View File

@ -1,6 +1,6 @@
package org.enso.table.data.table.problems;
package org.enso.table.problems;
import org.enso.table.problems.Problem;
import org.enso.table.data.table.problems.ColumnAggregatedProblems;
import java.util.ArrayList;
import java.util.List;

View File

@ -0,0 +1,93 @@
from Standard.Base import all
from Standard.Table import Column_Selector, Sort_Column, Sort_Column_Selector
from Standard.Table.Errors import Floating_Point_Grouping
from Standard.Test import Test, Problems
import Standard.Test.Extensions
from project.Common_Table_Operations.Util import run_default_backend
main = run_default_backend spec
spec setup =
table_builder = setup.table_builder
materialize = setup.materialize
Test.group setup.prefix+"Table.distinct" <|
Test.specify "should group by all columns by default" <|
a = ["A", ["a", "b", "a", "b", "a", "b"]]
b = ["B", [2, 1, 2, 2, 2, 1]]
t = table_builder [a, b]
r = t.distinct on_problems=Report_Error |> materialize |> _.order_by ["A", "B"]
r.at "A" . to_vector . should_equal ["a", "b", "b"]
r.at "B" . to_vector . should_equal [2, 1, 2]
Test.specify "should allow to select distinct rows based on a subset of columns, returning any row from each group" <|
a = ["A", ["a", "a", "a", "a", "a", "a"]]
b = ["B", [1, 1, 2, 2, 1, 2]]
c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]
t = table_builder [a, b, c]
r1 = t.distinct (Column_Selector.By_Name ["A"]) on_problems=Report_Error |> materialize
r1.at "A" . to_vector . should_equal ["a"]
r1.at "B" . to_vector . should_equal [1]
r1.at "C" . to_vector . should_equal [0.1]
r2 = t.distinct ["A", "B"] on_problems=Report_Error |> materialize |> _.order_by "B"
r2.at "A" . to_vector . should_equal ["a", "a"]
r2.at "B" . to_vector . should_equal [1, 2]
cv = r2.at "C" . to_vector
[0.1, 0.2, 0.5].contains (cv.at 0) . should_be_true
[0.3, 0.4, 0.6].contains (cv.at 1) . should_be_true
if setup.test_selection.distinct_returns_first_row_from_group_if_ordered then
Test.specify "should allow to select distinct rows based on a subset of columns, returning any first from each group if the table was ordered" <|
a = ["A", ["a", "a", "a", "a", "a", "a"]]
b = ["B", [1, 1, 2, 2, 1, 2]]
c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]
t = table_builder [a, b, c] . order_by (Sort_Column_Selector.By_Name [(Sort_Column.Name "C" Sort_Direction.Descending)])
r2 = t.distinct ["A", "B"] on_problems=Report_Error |> materialize |> _.order_by "B"
r2.at "A" . to_vector . should_equal ["a", "a"]
r2.at "B" . to_vector . should_equal [1, 2]
r2.at "C" . to_vector . should_equal [0.5, 0.6]
Test.specify "should allow to control case-sensitivity of keys" <|
x = ["X", ['A', 'a', 'enso', 'Enso', 'A']]
t1 = table_builder [x]
d1 = t1.distinct (Column_Selector.By_Name ["X"]) on_problems=Report_Error |> materialize |> _.order_by ["X"]
d1.at "X" . to_vector . should_equal ['A', 'Enso', 'a', 'enso']
d2 = t1.distinct (Column_Selector.By_Name ["X"]) case_sensitivity=Case_Sensitivity.Insensitive on_problems=Report_Error |> materialize |> _.order_by ["X"]
v = d2.at "X" . to_vector
v.length . should_equal 2
v.filter (_.equals_ignore_case "enso") . length . should_equal 1
v.filter (_.equals_ignore_case "a") . length . should_equal 1
Test.specify "should report a warning if the key contains floating point values" <|
t1 = table_builder [["X", [3.0, 1.0, 2.0, 2.0, 1.0]]]
action1 = t1.distinct on_problems=_
tester1 table =
v = table.at "X" . to_vector
v.length . should_equal 3
v.fold 0 (+) . should_equal 6.0
problems1 = [Floating_Point_Grouping.Error "X"]
Problems.test_problem_handling action1 problems1 tester1
Test.specify "should handle nulls correctly" <|
a = ["A", ["a", Nothing, "b", "a", "b", Nothing, "a", "b"]]
b = ["B", [1, 2, 3, 4, 5, 6, 7, 8]]
t = table_builder [a, b]
r = t.distinct ["A"] on_problems=Report_Error |> materialize |> _.order_by "A"
va = r.at "A" . to_vector
vb = r.at "B" . to_vector
va . should_equal [Nothing, "a", "b"]
va.at 0 . should_equal Nothing
[2, 6].contains (vb.at 0) . should_be_true
va.at 1 . should_equal "a"
[1, 4, 7].contains (vb.at 1) . should_be_true
va.at 2 . should_equal "b"
[3, 5, 8].contains (vb.at 2) . should_be_true

View File

@ -0,0 +1,118 @@
from Standard.Base import all
# We hide the table constructor as instead we are supposed to use `table_builder` which is backend-agnostic.
from Standard.Table import all hiding Table
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Group_By, Count, Sum
from Standard.Test import Test, Problems
import Standard.Test.Extensions
from project.Common_Table_Operations.Util import run_default_backend
main = run_default_backend spec
spec setup =
table_builder = setup.table_builder
materialize = setup.materialize
Test.group setup.prefix+" Interactions Between various operations" <|
Test.specify "aggregates and joins" <|
t1 = table_builder [["Count", [1, 2, 3]], ["Class", ["X", "Y", "Z"]]]
t2 = table_builder [["Letter", ["A", "B", "A", "A", "C", "A", "C", "D", "D", "B", "B"]]]
t3 = t2.aggregate [Group_By "Letter", Count]
t4 = t3.join t1 on="Count" join_kind=Join_Kind.Left_Outer |> materialize |> _.order_by "Letter"
t4.columns.map .name . should_equal ["Letter", "Count", "Class"]
rows = t4.rows . map .to_vector
rows.at 0 . should_equal ["A", 4, Nothing]
rows.at 1 . should_equal ["B", 3, "Z"]
rows.at 2 . should_equal ["C", 2, "Y"]
rows.at 3 . should_equal ["D", 2, "Y"]
Test.specify "aggregates and distinct" <|
t2 = table_builder [["Letter", ["A", "B", "A", "A", "C", "C"]], ["Points", [2, 5, 2, 1, 10, 3]]]
t3 = t2.aggregate [Group_By "Letter", Sum "Points"]
t4 = t3.distinct "Sum Points" |> materialize |> _.order_by "Sum Points"
t4.columns.map .name . should_equal ["Letter", "Sum Points"]
t4.row_count . should_equal 2
rows = t4.rows . map .to_vector
r1 = rows.at 0
r1.second . should_equal 5
["A", "B"].contains r1.first . should_be_true
rows.at 1 . should_equal ["C", 13]
Test.specify "aggregates and filtering" <|
t2 = table_builder [["Letter", ["A", "B", "A", "A", "C", "C", "B"]], ["Points", [2, 5, 2, 1, 10, 3, 0]]]
t3 = t2.aggregate [Group_By "Letter", Sum "Points"]
t4 = t3.filter "Sum Points" (Filter_Condition.Equal 5) |> materialize |> _.order_by "Letter"
t4.columns.map .name . should_equal ["Letter", "Sum Points"]
rows = t4.rows . map .to_vector
rows.at 0 . should_equal ["A", 5]
rows.at 1 . should_equal ["B", 5]
Test.specify "aggregates and ordering" <|
t1 = table_builder [["Letter", ["C", "A", "B", "A", "A", "C", "C", "B"]], ["Points", [0, -100, 5, 2, 1, 10, 3, 0]]]
t2 = t1.aggregate [Group_By "Letter", Sum "Points"]
t3 = t2.order_by "Sum Points" |> materialize
t3.columns.map .name . should_equal ["Letter", "Sum Points"]
t3.at "Letter" . to_vector . should_equal ["A", "B", "C"]
t3.at "Sum Points" . to_vector . should_equal [-97, 5, 13]
Test.specify "distinct and ordering" <|
t1 = table_builder [["X", [1, 2, 2, 1]], ["Y", ["a", "b", "b", "a"]], ["Z", [1, 2, 3, 4]]]
# These are 'adversarial' white-box examples constructed knowing that Postgres' DISTINCT ON does not play too well with ORDER BY and it needs to be handled carefully.
t2 = t1.order_by "X" . distinct "X" |> materialize
t2.row_count . should_equal 2
t3 = t1.order_by "Y" . distinct "X" |> materialize
t3.row_count . should_equal 2
t4 = t1.order_by "Y" . distinct "X" . order_by "Y" |> materialize
t4.row_count . should_equal 2
if setup.test_selection.distinct_returns_first_row_from_group_if_ordered then
Test.specify "distinct and ordering if first row is returned after ordering" <|
a = ["A", ["a", "a", "a", "a", "a", "a"]]
b = ["B", [1, 1, 2, 2, 1, 2]]
c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]
t = table_builder [a, b, c] . order_by (Sort_Column_Selector.By_Name [(Sort_Column.Name "C" Sort_Direction.Descending)])
t2 = t.distinct ["A", "B"] on_problems=Report_Error
# Now, reverse the order!
## But the distinct was taken under descending order, so that
should be preserved - we will still have _last_ rows from
each group (first in reversed order).
t3 = t2.order_by "C"
r = t3 |> materialize
r.at "A" . to_vector . should_equal ["a", "a"]
r.at "B" . to_vector . should_equal [1, 2]
r.at "C" . to_vector . should_equal [0.5, 0.6]
## It should matter whether we do the filter _before_ or _after_ the
distinct operation.
It is easier to test this if we can rely on distinct returning
the first row, if it is returning any row, it is harder to write
tests that distinguish the two cases (filter before and after).
Test.specify "distinct and filtering" <|
a = ["A", ["a", "a", "b", "a", "b"]]
b = ["B", [1, 2, 5, 5, 2]]
c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5]]
t = table_builder [a, b, c] . order_by "C"
t2 = t.distinct ["A"] on_problems=Report_Error
r2 = t2 |> materialize
r2.at "A" . to_vector . should_equal ["a", "b"]
r2.at "B" . to_vector . should_equal [1, 5]
t3 = t2.filter "B" (Filter_Condition.Equal 5)
r3 = t3 |> materialize
r3.at "A" . to_vector . should_equal ["b"]
r3.at "B" . to_vector . should_equal [5]
t4 = t.filter "B" (Filter_Condition.Equal 5)
t5 = t4.distinct ["A"] on_problems=Report_Error
r5 = t5 |> materialize
r5.at "A" . to_vector . should_contain_the_same_elements_as ["b", "a"]
r5.at "B" . to_vector . should_equal [5, 5]

View File

@ -2,8 +2,10 @@ from Standard.Base import all
import project.Common_Table_Operations.Column_Operations_Spec
import project.Common_Table_Operations.Core_Spec
import project.Common_Table_Operations.Distinct_Spec
import project.Common_Table_Operations.Expression_Spec
import project.Common_Table_Operations.Filter_Spec
import project.Common_Table_Operations.Integration_Tests
import project.Common_Table_Operations.Join_Spec
import project.Common_Table_Operations.Missing_Values_Spec
import project.Common_Table_Operations.Order_By_Spec
@ -76,7 +78,11 @@ type Test_Selection
- supports_full_join: Specifies if the backend supports full joins.
SQLite doesn't so we need to disable them until we implement a proper
workaround.
Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True supports_unicode_normalization=False is_nan_and_nothing_distinct=True supports_full_join=True
- distinct_returns_first_row_from_group_if_ordered: If `order_by` was
applied before, the distinct operation will return the first row from
each group. Guaranteed in the in-memory backend, but may not be
supported by all databases.
Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True supports_unicode_normalization=False is_nan_and_nothing_distinct=True supports_full_join=True distinct_returns_first_row_from_group_if_ordered=True
spec setup =
Core_Spec.spec setup
@ -89,5 +95,7 @@ spec setup =
Take_Drop_Spec.spec setup
Expression_Spec.spec detailed=False setup
Join_Spec.spec setup
Distinct_Spec.spec setup
Integration_Tests.spec setup
main = run_default_backend spec

View File

@ -160,10 +160,10 @@ spec =
Test.group "[Codegen] Aggregation" <|
Test.specify "should allow to count rows" <|
code = t1.aggregate [Group_By "A" "A grp", Count "counter"] . to_sql . prepare
code . should_equal ['SELECT "T1"."A" AS "A grp", COUNT(*) AS "counter" FROM "T1" AS "T1" GROUP BY "T1"."A"', []]
code . should_equal ['SELECT "T1"."A grp" AS "A grp", "T1"."counter" AS "counter" FROM (SELECT "T1"."A" AS "A grp", COUNT(*) AS "counter" FROM "T1" AS "T1" GROUP BY "T1"."A") AS "T1"', []]
Test.specify "should allow to group by multiple fields" <|
code = t1.aggregate [Sum "A" "sum_a", Group_By "C" Nothing, Group_By "B" "B grp"] . to_sql . prepare
code . should_equal ['SELECT SUM("T1"."A") AS "sum_a", "T1"."C" AS "C", "T1"."B" AS "B grp" FROM "T1" AS "T1" GROUP BY "T1"."C", "T1"."B"', []]
code . should_equal ['SELECT "T1"."sum_a" AS "sum_a", "T1"."C" AS "C", "T1"."B grp" AS "B grp" FROM (SELECT SUM("T1"."A") AS "sum_a", "T1"."C" AS "C", "T1"."B" AS "B grp" FROM "T1" AS "T1" GROUP BY "T1"."C", "T1"."B") AS "T1"', []]
main = Test_Suite.run_main spec

View File

@ -85,9 +85,9 @@ postgres_specific_spec connection db_name =
connection.execute_update 'DROP VIEW "'+vinfo+'";'
connection.execute_update 'DROP TABLE "'+tinfo+'";'
tinfo = Name_Generator.random_name "Tinfo"
connection.execute_update 'CREATE TEMPORARY TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL, "doubles" DOUBLE PRECISION)'
Test.group "[PostgreSQL] Info" <|
tinfo = Name_Generator.random_name "Tinfo"
connection.execute_update 'CREATE TEMPORARY TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL, "doubles" DOUBLE PRECISION)'
t = connection.query (SQL_Query.Table_Name tinfo)
t.insert ["a", Nothing, False, 1.2, 0.000000000001]
t.insert ["abc", Nothing, Nothing, 1.3, Nothing]
@ -110,7 +110,14 @@ postgres_specific_spec connection db_name =
t.at "ints" . sql_type . is_definitely_integer . should_be_true
t.at "bools" . sql_type . is_definitely_boolean . should_be_true
t.at "reals" . sql_type . is_definitely_double . should_be_true
connection.execute_update 'DROP TABLE "'+tinfo+'"'
Test.group "[PostgreSQL] Dialect-specific codegen" <|
Test.specify "should generate queries for the Distinct operation" <|
t = connection.query (SQL_Query.Table_Name tinfo)
code_template = 'SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."reals" AS "reals", "{Tinfo}"."doubles" AS "doubles" FROM (SELECT DISTINCT ON ("{Tinfo}_inner"."strs") "{Tinfo}_inner"."strs" AS "strs", "{Tinfo}_inner"."ints" AS "ints", "{Tinfo}_inner"."bools" AS "bools", "{Tinfo}_inner"."reals" AS "reals", "{Tinfo}_inner"."doubles" AS "doubles" FROM (SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."reals" AS "reals", "{Tinfo}"."doubles" AS "doubles" FROM "{Tinfo}" AS "{Tinfo}") AS "{Tinfo}_inner") AS "{Tinfo}"'
expected_code = code_template.replace "{Tinfo}" tinfo
t.distinct ["strs"] . to_sql . prepare . should_equal [expected_code, []]
connection.execute_update 'DROP TABLE "'+tinfo+'"'
Test.group "[PostgreSQL] Table.aggregate should correctly infer result types" <|
name = Name_Generator.random_name "Ttypes"

View File

@ -73,9 +73,9 @@ sqlite_specific_spec connection =
action . should_fail_with SQL_Error.Error
action.catch.to_text . should_equal "There was an SQL error: [SQLITE_ERROR] SQL error or missing database (no such table: undefined_table). [Query was: SELECT A FROM undefined_table]"
tinfo = Name_Generator.random_name "Tinfo"
connection.execute_update 'CREATE TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL)'
Test.group "[SQLite] Metadata" <|
tinfo = Name_Generator.random_name "Tinfo"
connection.execute_update 'CREATE TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL)'
t = connection.query (SQL_Query.Table_Name tinfo)
t.insert ["a", Nothing, False, 1.2]
t.insert ["abc", Nothing, Nothing, 1.3]
@ -96,6 +96,13 @@ sqlite_specific_spec connection =
t.at "reals" . sql_type . is_definitely_boolean . should_be_false
t.at "bools" . sql_type . is_definitely_double . should_be_false
Test.group "[SQLite] Dialect-specific codegen" <|
Test.specify "should generate queries for the Distinct operation" <|
t = connection.query (SQL_Query.Table_Name tinfo)
code_template = 'SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."reals" AS "reals" FROM (SELECT "{Tinfo}_inner"."strs" AS "strs", "{Tinfo}_inner"."ints" AS "ints", "{Tinfo}_inner"."bools" AS "bools", "{Tinfo}_inner"."reals" AS "reals" FROM (SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."reals" AS "reals" FROM "{Tinfo}" AS "{Tinfo}") AS "{Tinfo}_inner" GROUP BY "{Tinfo}_inner"."strs") AS "{Tinfo}"'
expected_code = code_template.replace "{Tinfo}" tinfo
t.distinct ["strs"] . to_sql . prepare . should_equal [expected_code, []]
sqlite_spec connection prefix =
name_counter = Ref.new 0
table_builder columns =

View File

@ -709,33 +709,18 @@ spec =
row of ones sharing the same distinctness key. For database tests (to be
added later) we can not rely on ordering.
Test.group "[In-Memory] Table.distinct" <|
Test.specify "should allow to select distinct rows based on a subset of columns" <|
Test.specify "should allow to select distinct rows based on a subset of columns, returning the first row from each group" <|
a = ["A", ["a", "a", "a", "a", "a", "a"]]
b = ["B", [1, 1, 2, 2, 1, 2]]
c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]
t = Table.new [a, b, c]
r1 = t.distinct (Column_Selector.By_Name ["A"]) on_problems=Report_Error
r1.at "A" . to_vector . should_equal ["a"]
r1.at "B" . to_vector . should_equal [1]
r1.at "C" . to_vector . should_equal [0.1]
r2 = t.distinct (Column_Selector.By_Name ["A", "B"]) on_problems=Report_Error
r2.at "A" . to_vector . should_equal ["a", "a"]
r2.at "B" . to_vector . should_equal [1, 2]
r2.at "C" . to_vector . should_equal [0.1, 0.3]
r3 = t.distinct ["A"] on_problems=Report_Error
r3.at "A" . to_vector . should_equal ["a"]
r3.at "B" . to_vector . should_equal [1]
r3.at "C" . to_vector . should_equal [0.1]
r4 = t.distinct ["A", "B"] on_problems=Report_Error
r4.at "A" . to_vector . should_equal ["a", "a"]
r4.at "B" . to_vector . should_equal [1, 2]
r4.at "C" . to_vector . should_equal [0.1, 0.3]
Test.specify "should handle nulls correctly" <|
Test.specify "should handle nulls correctly and preserve original ordering" <|
a = ["A", ["a", Nothing, "b", "a", "b", Nothing, "a", "b"]]
b = ["B", [1, 2, 3, 4, 5, 6, 7, 8]]
t = Table.new [a, b]
@ -747,7 +732,7 @@ spec =
t1 = Table.new [["X", ['ś', 's\u0301', 's', 'ś']]]
t1.distinct . at "X" . to_vector . should_equal ['ś', 's']
Test.specify "should allow to control case-sensitivity of keys" <|
Test.specify "should allow to control case-sensitivity of keys, correctly handling Unicode folding" <|
x = ["X", ['A', 'a', 'enso', 'śledź', 'Enso', 'A', 's\u0301ledz\u0301']]
y = ["Y", [1, 2, 3, 4, 5, 6, 7]]
t1 = Table.new [x, y]
@ -767,14 +752,14 @@ spec =
action1 = t1.distinct on_problems=_
tester1 table =
table.at "X" . to_vector . should_equal [3.0, 1.0, 2.0]
problems1 = [Floating_Point_Grouping.Error "Distinct"]
problems1 = [Floating_Point_Grouping.Error "X"]
Problems.test_problem_handling action1 problems1 tester1
t2 = Table.new [["X", [1.00000000000001, -0.3, 1.00000000000002, 1.5, 1.00000000000002, 1.00000000000002]]]
action2 = t2.distinct on_problems=_
tester2 table =
table.at "X" . to_vector . should_equal [1.00000000000001, -0.3, 1.00000000000002, 1.5]
problems2 = [Floating_Point_Grouping.Error "Distinct"]
problems2 = [Floating_Point_Grouping.Error "X"]
Problems.test_problem_handling action2 problems2 tester2
Test.specify "should report a warning and report the whole table if no columns were selected" <|
@ -793,14 +778,6 @@ spec =
t = Table.new [["X", [My.Data 1 2, My.Data 3 4, My.Data 1 2]]]
t.distinct . should_fail_with Illegal_Argument.Error
Test.specify "should group by all columns by default" <|
a = ["A", ["a", "b", "a", "b", "a", "b"]]
b = ["B", [2, 1, 2, 2, 2, 1]]
t = Table.new [a, b]
r = t.distinct on_problems=Report_Error
r.at "A" . to_vector . should_equal ["a", "b", "b"]
r.at "B" . to_vector . should_equal [2, 1, 2]
Test.group "[In-Memory] Table.filter" <|
Test.specify "by a custom predicate" <|
t = Table.new [["ix", [1, 2, 3, 4, 5]], ["X", [5, 0, 4, 5, 1]]]