Implement Distinct for the Database backends (#4027)

Implements https://www.pivotaltracker.com/story/show/182307281
2024-12-23 13:02:07 +03:00 · 2023-01-11 23:46:54 +01:00 · 2023-01-11 23:46:54 +01:00 · 0088096a58
commit 0088096a58
parent fe1cf9a9ce
29 changed files with 414 additions and 98 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -270,6 +270,7 @@
 - [Overhauled the JSON support (now based of JavaScript), `Data.fetch` and other
  minor tweaks][3987]
 - [Enable Date, Time and DateTime to be read and written to Excel.][3997]
+- [Implemented `Table.distinct` for Database backends.][4027]

 [debug-shortcuts]:
  https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -423,6 +424,7 @@
 [3987]: https://github.com/enso-org/enso/pull/3987
 [3997]: https://github.com/enso-org/enso/pull/3997
 [4013]: https://github.com/enso-org/enso/pull/4013
+[4027]: https://github.com/enso-org/enso/pull/4027

 #### Enso Compiler

--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Connection/Connection.enso
@ -126,7 +126,7 @@ type Connection
            Database_Table_Module.make_table self name columns ctx
        SQL_Query.Table_Name name ->
            ctx = Context.for_table name (if alias == "" then name else alias)
-            columns = self.jdbc_connection.fetch_columns (self.dialect.generate_sql (Query.Select_All ctx))
+            columns = self.jdbc_connection.fetch_columns (self.dialect.generate_sql (Query.Select Nothing ctx))
            Database_Table_Module.make_table self name columns ctx

    ## Execute the query and load the results into memory as a Table.
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Dialect.enso
@ -2,6 +2,7 @@ from Standard.Base import all
 import Standard.Base.Error.Unimplemented.Unimplemented

 from Standard.Table import Aggregate_Column, Join_Kind
+import Standard.Table.Internal.Problem_Builder.Problem_Builder

 import project.Connection.Connection.Connection
 import project.Data.SQL_Statement.SQL_Statement
@ -56,6 +57,12 @@ type Dialect
    prepare_join self =
        Unimplemented.throw "This is an interface only."

+    ## PRIVATE
+       Prepares a distinct operation.
+    prepare_distinct : Table -> Vector -> Boolean -> Problem_Builder -> Table
+    prepare_distinct self =
+        Unimplemented.throw "This is an interface only."
+
 ## PRIVATE

   The dialect of SQLite databases.
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
@ -23,7 +23,7 @@ import Standard.Table.Internal.Aggregate_Column_Helper
 from Standard.Table.Data.Column import get_item_string
 from Standard.Table.Data.Table import print_table
 from Standard.Table.Internal.Filter_Condition_Helpers import make_filter_column
-from Standard.Table.Errors import Column_Count_Mismatch, No_Index_Set_Error, No_Such_Column
+from Standard.Table.Errors import Column_Count_Mismatch, No_Index_Set_Error, No_Such_Column, No_Input_Columns_Selected, No_Output_Columns

 import project.Data.Column.Column
 import project.Data.SQL_Statement.SQL_Statement
@ -624,7 +624,9 @@ type Table
       input table.

       When multiple rows have the same values within the specified columns, the
-       first row of each such set is returned.
+       first row of each such set is returned if possible, but in database
+       backends any row from each set may be returned (for example if the row
+       ordering is unspecified).

       For the in-memory table, the unique rows will be in the order they
       occurred in the input (this is not guaranteed for database operations).
@ -649,8 +651,19 @@ type Table
           `Floating_Point_Grouping` warning.
    distinct : Vector Text | Column_Selector -> Case_Sensitivity -> Problem_Behavior -> Table
    distinct self (columns = Column_Selector.By_Name (self.columns.map .name)) case_sensitivity=Case_Sensitivity.Sensitive on_problems=Report_Warning =
-        _ = [columns, case_sensitivity, on_problems]
-        Error.throw (Unsupported_Database_Operation.Error "`Table.distinct` is not yet implemented for the database backend.")
+        problem_builder = Problem_Builder.new
+        warning_mapper error = case error of
+            No_Output_Columns -> Maybe.Some No_Input_Columns_Selected
+            _ -> Nothing
+        key_columns = Warning.map_warnings_and_errors warning_mapper <|
+            self.columns_helper.select_columns selector=columns reorder=True on_problems=on_problems
+        text_case_insensitive = case case_sensitivity of
+            Case_Sensitivity.Sensitive -> False
+            Case_Sensitivity.Insensitive locale ->
+                Helpers.assume_default_locale locale <|
+                    True
+        new_table = self.connection.dialect.prepare_distinct self key_columns text_case_insensitive problem_builder
+        problem_builder.attach_problems_before on_problems new_table

    ## Joins two tables according to the specified join conditions.

@ -800,7 +813,7 @@ type Table
            new_columns = partitioned.first
            problems = partitioned.second
            on_problems.attach_problems_before problems <|
-                self.updated_context_and_columns new_ctx new_columns
+                self.updated_context_and_columns new_ctx new_columns subquery=True

    ## Returns a new table with a chosen subset of columns left unchanged and
       the other columns pivoted to rows with a single name field and a single
@ -908,7 +921,7 @@ type Table
           computing too much we do not pass all the columns but only the first
           one.
        setup = self.context.as_subquery self.name [[self.internal_columns.first]]
-        new_ctx = Context.for_subquery setup.first
+        new_ctx = Context.for_subquery setup.subquery
        query = Query.Select [[column_name, expr]] new_ctx
        sql = self.connection.dialect.generate_sql query
        table = self.connection.read_statement sql
@ -957,8 +970,8 @@ type Table
               Naively wrapping each column in a `COUNT(...)` will not
               always work as aggregates cannot be nested.
            setup = self.context.as_subquery self.name [self.internal_columns]
-            new_ctx = Context.for_subquery setup.first
-            new_columns = setup.second.first.map column->
+            new_ctx = Context.for_subquery setup.subquery
+            new_columns = setup.new_columns.first.map column->
                [column.name, SQL_Expression.Operation "COUNT" [column.expression]]
            query = Query.Select new_columns new_ctx
            self.connection.dialect.generate_sql query
@ -1007,8 +1020,24 @@ type Table
       Arguments:
       - ctx: The new context for this table.
       - internal_columns: The new columns to include in the table.
+       - subquery: A boolean indicating whether the operation should be wrapped
+         in a subquery. This is a simple workaround for operations which may be
+         affected by further operations if not wrapped. For example, a group-by
+         may need to be wrapped in this way if a filter is to be performed on it
+         later on. Ideally, this should be done only on demand, if the
+         subsequent operation needs it and operations like join should try to
+         avoid nesting subqueries without necessity. However, for now, for
+         simplicity, we are always wrapping brittle operations. This may be
+         revised in the future, to generate better and more concise SQL code.
    updated_context_and_columns : Context -> Vector Internal_Column -> Table
-    updated_context_and_columns self ctx internal_columns = Table.Value self.name self.connection internal_columns ctx
+    updated_context_and_columns self ctx internal_columns subquery=False = case subquery of
+        True ->
+            setup = ctx.as_subquery self.name [internal_columns]
+            new_ctx = Context.for_subquery setup.subquery
+            new_columns = setup.new_columns.first
+            Table.Value self.name self.connection new_columns new_ctx
+        False ->
+            Table.Value self.name self.connection internal_columns ctx

    ## PRIVATE

--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso
@ -178,7 +178,7 @@ base_dialect =
    functions = [["COALESCE", make_function "COALESCE"], ["ROW_MIN", make_function "MIN"], ["ROW_MAX", make_function "MAX"]]
    agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
    counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]]
-    text = [is_empty, bin "LIKE", simple_equals_ignore_case]
+    text = [is_empty, bin "LIKE", simple_equals_ignore_case, fold_case]
    nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
    contains = [["IS_IN", make_is_in], ["IS_IN_COLUMN", make_is_in_column]]
    base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains)
@ -293,6 +293,11 @@ generate_from_part dialect from_spec = case from_spec of
        sub = generate_query dialect (Query.Select columns context)
        sub.paren ++ alias dialect as_name

+
+## PRIVATE
+fold_case = lift_unary_op "FOLD_CASE" arg->
+    code "LOWER(UPPER(" ++ arg ++ "))"
+
 ## PRIVATE
 simple_equals_ignore_case = Base_Generator.lift_binary_op "equals_ignore_case" a-> b->
    code "LOWER(UPPER(" ++ a ++ ")) = LOWER(UPPER(" ++ b ++ "))"
@ -377,10 +382,16 @@ generate_query : Internal_Dialect -> Query -> Builder
 generate_query dialect query = case query of
    Query.Select columns ctx ->
        gen_column pair = (generate_expression dialect pair.second) ++ alias dialect pair.first
-        cols = SQL.join ", " (columns.map gen_column)
-        code "SELECT " ++ cols ++ generate_select_context dialect ctx
-    Query.Select_All ctx ->
-        code "SELECT * " ++ generate_select_context dialect ctx
+        cols = case columns of
+            Nothing -> code "*"
+            _ -> SQL.join ", " (columns.map gen_column)
+        prefix = case ctx.distinct_on of
+            Nothing -> code ""
+            expressions : Vector ->
+                # TODO I just realised this does not make sense in other backends than Postgres, so we should probably fail in such cases; probably rewrite into a generic modifier? or a transform?
+                generated = SQL.join ", " (expressions.map (generate_expression dialect))
+                code "DISTINCT ON (" ++ generated ++ ") "
+        code "SELECT " ++ prefix ++ cols ++ generate_select_context dialect ctx
    Query.Insert table_name pairs ->
        generate_insert_query dialect table_name pairs
    _ -> Error.throw <| Unsupported_Database_Operation.Error "Unsupported query type."
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Distinct_Helper.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Distinct_Helper.enso
@ -0,0 +1,17 @@
+from Standard.Base import all
+
+from Standard.Table.Errors import Floating_Point_Grouping
+
+import project.Internal.IR.SQL_Expression.SQL_Expression
+
+## PRIVATE
+make_distinct_expression text_case_insensitive problem_builder key_column =
+    if key_column.sql_type.is_definitely_double then
+        problem_builder.report_other_warning (Floating_Point_Grouping.Error key_column.name)
+
+    expr = key_column.expression
+
+    needs_case_fold = text_case_insensitive && key_column.sql_type.is_definitely_text
+    case needs_case_fold of
+        True -> SQL_Expression.Operation "FOLD_CASE" [expr]
+        False -> expr
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Join_Helper.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Common/Database_Join_Helper.enso
@ -105,16 +105,14 @@ prepare_subqueries left right needs_left_indicator needs_right_indicator =
    # TODO [RW] Not all of these included columns are actually usable from the external context, so
    # in the future we may consider pruning some of them as additional optimization and simplification of the query
    # (the only columns that are needed are ones that the later performed join resolution needs).
-    left_config = left.context.as_subquery left_alias [left.internal_columns, left_indicators]
-    right_config = right.context.as_subquery right_alias [right.internal_columns, right_indicators]
+    left_sub = left.context.as_subquery left_alias [left.internal_columns, left_indicators]
+    right_sub = right.context.as_subquery right_alias [right.internal_columns, right_indicators]

-    left_subquery = left_config.first
-    new_left_columns = left_config.second.at 0
-    new_left_indicators = left_config.second.at 1
-    right_subquery = right_config.first
-    new_right_columns = right_config.second.at 0
-    new_right_indicators = right_config.second.at 1
+    new_left_columns = left_sub.new_columns.first
+    new_left_indicators = left_sub.new_columns.second
+    new_right_columns = right_sub.new_columns.first
+    new_right_indicators = right_sub.new_columns.second

-    left_setup = Join_Subquery_Setup.Value left_subquery new_left_columns left.internal_columns (new_left_indicators.get 0)
-    right_setup = Join_Subquery_Setup.Value right_subquery new_right_columns right.internal_columns (new_right_indicators.get 0)
+    left_setup = Join_Subquery_Setup.Value left_sub.subquery new_left_columns left.internal_columns (new_left_indicators.get 0)
+    right_setup = Join_Subquery_Setup.Value right_sub.subquery new_right_columns right.internal_columns (new_right_indicators.get 0)
    Pair.new left_setup right_setup
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Context.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Context.enso
@ -19,7 +19,7 @@ type Context
       - alias: An alias name to use for table within the query.
    for_table : Text -> Text -> Context
    for_table table_name alias=table_name =
-        Context.Value (From_Spec.Table table_name alias) [] [] [] Nothing
+        Context.Value (From_Spec.Table table_name alias) [] [] [] Nothing Nothing

    ## PRIVATE

@ -30,7 +30,7 @@ type Context
       - alias: An alias name to use for table within the query.
    for_query : Text -> Text -> Context
    for_query raw_sql alias =
-        Context.Value (From_Spec.Query raw_sql alias) [] [] [] Nothing
+        Context.Value (From_Spec.Query raw_sql alias) [] [] [] Nothing Nothing

    ## PRIVATE

@ -40,7 +40,7 @@ type Context
       - subquery: The subquery to lift into a context.
    for_subquery : From_Spec -> Context
    for_subquery subquery =
-        Context.Value subquery [] [] [] Nothing
+        Context.Value subquery [] [] [] Nothing Nothing

    ## PRIVATE

@ -63,7 +63,7 @@ type Context
         grouped-by columns or aggregate expressions.
       - limit: an optional maximum number of elements that the equery should
         return.
-    Value (from_spec : From_Spec) (where_filters : Vector SQL_Expression) (orders : Vector Order_Descriptor) (groups : Vector SQL_Expression) (limit : Nothing | Integer)
+    Value (from_spec : From_Spec) (where_filters : Vector SQL_Expression) (orders : Vector Order_Descriptor) (groups : Vector SQL_Expression) (limit : Nothing | Integer) (distinct_on : Nothing | Vector SQL_Expression)

    ## PRIVATE

@ -73,7 +73,7 @@ type Context
       - new_filters: The new filters to set in the query.
    set_where_filters : Vector SQL_Expression -> Context
    set_where_filters self new_filters =
-        Context.Value self.from_spec new_filters self.orders self.groups self.limit
+        Context.Value self.from_spec new_filters self.orders self.groups self.limit self.distinct_on

    ## PRIVATE

@ -83,7 +83,7 @@ type Context
       - new_orders: The new ordering clauses to set in the query.
    set_orders : Vector Order_Descriptor -> Context
    set_orders self new_orders =
-        Context.Value self.from_spec self.where_filters new_orders self.groups self.limit
+        Context.Value self.from_spec self.where_filters new_orders self.groups self.limit self.distinct_on

    ## PRIVATE

@ -100,7 +100,7 @@ type Context
       - new_orders: The new ordering clauses to add to the query.
    add_orders : Vector Order_Descriptor -> Context
    add_orders self new_orders =
-        Context.Value self.from_spec self.where_filters new_orders+self.orders self.groups self.limit
+        Context.Value self.from_spec self.where_filters new_orders+self.orders self.groups self.limit self.distinct_on

    ## PRIVATE

@ -110,7 +110,7 @@ type Context
       - new_groups: The new grouping clauses to set in the query.
    set_groups : Vector SQL_Expression -> Context
    set_groups self new_groups =
-        Context.Value self.from_spec self.where_filters self.orders new_groups self.limit
+        Context.Value self.from_spec self.where_filters self.orders new_groups self.limit self.distinct_on

    ## PRIVATE

@ -120,7 +120,14 @@ type Context
       - new_limit: The new limit clauses to set in the query.
    set_limit : (Nothing | Integer) -> Context
    set_limit self new_limit =
-       Context.Value self.from_spec self.where_filters self.orders self.groups new_limit
+       Context.Value self.from_spec self.where_filters self.orders self.groups new_limit self.distinct_on
+
+    ## PRIVATE
+
+         Returns a copy of the context with changed `distinct_on` expressions.
+    set_distinct_on : (Nothing | Vector SQL_Expression) -> Context
+    set_distinct_on self new_distinct_on =
+       Context.Value self.from_spec self.where_filters self.orders self.groups self.limit new_distinct_on

    ## PRIVATE

@ -136,8 +143,7 @@ type Context
       to one from the original list but it is valid in the new context.

       This is useful as a preprocessing step between combining queries, for example in a join.
-    # as_subquery : Text -> Vector (Vector Internal_Column) -> [From_Spec.Sub_Query, Vector (Vector Internal_Column)]
-    as_subquery : Text -> Vector Any -> Vector
+    as_subquery : Text -> Vector (Vector Internal_Column) -> Subquery_Setup
    as_subquery self alias column_lists =
        rewrite_internal_column : Internal_Column -> Internal_Column
        rewrite_internal_column column =
@ -150,4 +156,7 @@ type Context
            columns.map column-> [column.name, column.expression]
        new_from = From_Spec.Sub_Query encapsulated_columns self alias

-        [new_from, new_columns]
+        Subquery_Setup.Value new_from new_columns
+
+type Subquery_Setup
+    Value (subquery : From_Spec) (new_columns : Vector (Vector Internal_Column))
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Query.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/IR/Query.enso
@ -15,17 +15,10 @@ type Query
       Arguments:
       - expressions: List of pairs specifying the columns to materialize; each
         is a pair whose first element is the name of the materialized column
-         and the second element is the expression to compute.
+         and the second element is the expression to compute. If `Nothing` is
+         provided, all available columns will be selected.
       - context: The query context, see `Context` for more detail.
-    Select (expressions : Vector (Pair Text SQL_Expression)) (context : Context)
-
-    ## PRIVATE
-
-       A Select SQL query that gets all columns in a table.
-
-       Arguments:
-       - context: The query context, see `Context` for more detail.
-    Select_All context
+    Select (expressions : Nothing | Vector (Pair Text SQL_Expression)) (context : Context)

    ## PRIVATE

--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso
@ -1,8 +1,10 @@
 from Standard.Base import all hiding First, Last
 import Standard.Base.Error.Illegal_Argument.Illegal_Argument
 import Standard.Base.Error.Illegal_State.Illegal_State
+import Standard.Base.Error.Unimplemented.Unimplemented

 import Standard.Table.Data.Aggregate_Column.Aggregate_Column
+import Standard.Table.Internal.Problem_Builder.Problem_Builder
 from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all

 import project.Connection.Connection.Connection
@ -11,7 +13,9 @@ import project.Data.SQL_Statement.SQL_Statement
 import project.Data.SQL_Type.SQL_Type
 import project.Data.Table.Table
 import project.Internal.Base_Generator
+import project.Internal.Common.Database_Distinct_Helper
 import project.Internal.Common.Database_Join_Helper
+import project.Internal.IR.Context.Context
 import project.Internal.IR.From_Spec.From_Spec
 import project.Internal.IR.SQL_Expression.SQL_Expression
 import project.Internal.IR.Internal_Column.Internal_Column
@ -75,6 +79,18 @@ type Postgres_Dialect
    prepare_join self connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select =
        Database_Join_Helper.default_prepare_join connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select

+    ## PRIVATE
+       Prepares a distinct operation.
+    prepare_distinct : Table -> Vector -> Boolean -> Problem_Builder -> Table
+    prepare_distinct self table key_columns text_case_insensitive problem_builder =
+        setup = table.context.as_subquery table.name+"_inner" [table.internal_columns]
+        new_columns = setup.new_columns.first
+        column_mapping = Map.from_vector <| new_columns.map c-> [c.name, c]
+        new_key_columns = key_columns.map c-> column_mapping.at c.name
+        distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression text_case_insensitive problem_builder)
+        new_context = Context.for_subquery setup.subquery . set_distinct_on distinct_expressions
+        table.updated_context_and_columns new_context new_columns subquery=True
+
 ## PRIVATE
 make_internal_generator_dialect =
    cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]]
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso
@ -3,6 +3,7 @@ import Standard.Base.Error.Illegal_Argument.Illegal_Argument
 import Standard.Base.Error.Illegal_State.Illegal_State

 import Standard.Table.Data.Aggregate_Column.Aggregate_Column
+import Standard.Table.Internal.Problem_Builder.Problem_Builder
 from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all

 import project.Connection.Connection.Connection
@ -11,11 +12,13 @@ import project.Data.SQL_Statement.SQL_Statement
 import project.Data.SQL_Type.SQL_Type
 import project.Data.Table.Table
 import project.Internal.Base_Generator
+import project.Internal.IR.Context.Context
 import project.Internal.IR.From_Spec.From_Spec
 import project.Internal.IR.Internal_Column.Internal_Column
 import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
 import project.Internal.IR.Order_Descriptor.Order_Descriptor
 import project.Internal.IR.Query.Query
+import project.Internal.Common.Database_Distinct_Helper
 import project.Internal.Common.Database_Join_Helper

 from project.Data.SQL import code
@ -95,6 +98,18 @@ type SQLite_Dialect
            # Other kinds of joins just fall back to the default logic.
            Database_Join_Helper.default_prepare_join connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select

+    ## PRIVATE
+       Prepares a distinct operation.
+    prepare_distinct : Table -> Vector -> Boolean -> Problem_Builder -> Table
+    prepare_distinct self table key_columns text_case_insensitive problem_builder =
+        setup = table.context.as_subquery table.name+"_inner" [table.internal_columns]
+        new_columns = setup.new_columns.first
+        column_mapping = Map.from_vector <| new_columns.map c-> [c.name, c]
+        new_key_columns = key_columns.map c-> column_mapping.at c.name
+        distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression text_case_insensitive problem_builder)
+        new_context = Context.for_subquery setup.subquery . set_groups distinct_expressions
+        table.updated_context_and_columns new_context new_columns subquery=True
+
 ## PRIVATE
 make_internal_generator_dialect =
    text = [starts_with, contains, ends_with]+concat_ops
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
@ -620,7 +620,9 @@ type Table
       input table.

       When multiple rows have the same values within the specified columns, the
-       first row of each such set is returned.
+       first row of each such set is returned if possible, but in database
+       backends any row from each set may be returned (for example if the row
+       ordering is unspecified).

       For the in-memory table, the unique rows will be in the order they
       occurred in the input (this is not guaranteed for database operations).
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Java_Problems.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Java_Problems.enso
@ -3,7 +3,7 @@ import Standard.Base.Error.Illegal_Argument.Illegal_Argument

 from project.Errors import Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Row, Mismatched_Quote, Additional_Invalid_Rows, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter, Additional_Warnings

-polyglot java import org.enso.table.data.table.problems.AggregatedProblems
+polyglot java import org.enso.table.problems.AggregatedProblems
 polyglot java import org.enso.table.data.table.problems.FloatingPointGrouping
 polyglot java import org.enso.table.data.table.problems.InvalidAggregation
 polyglot java import org.enso.table.data.table.problems.UnquotedDelimiter
--- a/std-bits/table/src/main/java/org/enso/table/aggregations/Aggregator.java
+++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Aggregator.java
@ -1,6 +1,6 @@
 package org.enso.table.aggregations;

-import org.enso.table.data.table.problems.AggregatedProblems;
+import org.enso.table.problems.AggregatedProblems;
 import org.enso.table.problems.Problem;

 import java.util.Arrays;
--- a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java
@ -6,7 +6,7 @@ import org.enso.table.data.column.builder.object.*;
 import org.enso.table.data.column.storage.Storage;
 import org.enso.table.data.table.Column;
 import org.enso.table.data.table.Table;
-import org.enso.table.data.table.problems.AggregatedProblems;
+import org.enso.table.problems.AggregatedProblems;
 import org.enso.table.data.table.problems.FloatingPointGrouping;
 import org.enso.table.util.ConstantList;

--- a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java
@ -16,7 +16,7 @@ import org.enso.table.data.table.join.IndexJoin;
 import org.enso.table.data.table.join.JoinCondition;
 import org.enso.table.data.table.join.JoinResult;
 import org.enso.table.data.table.join.JoinStrategy;
-import org.enso.table.data.table.problems.AggregatedProblems;
+import org.enso.table.problems.AggregatedProblems;
 import org.enso.table.error.UnexpectedColumnTypeException;
 import org.enso.table.operations.Distinct;
 import org.enso.table.util.NameDeduplicator;
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/IndexJoin.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/IndexJoin.java
@ -12,7 +12,7 @@ import org.enso.table.data.table.Column;
 import org.enso.table.data.table.Table;
 import org.enso.table.data.table.join.scan.Matcher;
 import org.enso.table.data.table.join.scan.MatcherFactory;
-import org.enso.table.data.table.problems.AggregatedProblems;
+import org.enso.table.problems.AggregatedProblems;
 import org.graalvm.collections.Pair;

 public class IndexJoin implements JoinStrategy {
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinResult.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/JoinResult.java
@ -1,6 +1,6 @@
 package org.enso.table.data.table.join;

-import org.enso.table.data.table.problems.AggregatedProblems;
+import org.enso.table.problems.AggregatedProblems;
 import org.graalvm.collections.Pair;

 import java.util.List;
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/scan/Matcher.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/scan/Matcher.java
@ -1,6 +1,6 @@
 package org.enso.table.data.table.join.scan;

-import org.enso.table.data.table.problems.AggregatedProblems;
+import org.enso.table.problems.AggregatedProblems;

 public interface Matcher {
  boolean matches(int left, int right);
--- a/std-bits/table/src/main/java/org/enso/table/data/table/join/scan/MatcherFactory.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/join/scan/MatcherFactory.java
@ -13,7 +13,7 @@ import org.enso.table.data.table.join.Between;
 import org.enso.table.data.table.join.Equals;
 import org.enso.table.data.table.join.EqualsIgnoreCase;
 import org.enso.table.data.table.join.JoinCondition;
-import org.enso.table.data.table.problems.AggregatedProblems;
+import org.enso.table.problems.AggregatedProblems;
 import org.enso.table.data.table.problems.FloatingPointGrouping;

 public class MatcherFactory {
--- a/std-bits/table/src/main/java/org/enso/table/operations/Distinct.java
+++ b/std-bits/table/src/main/java/org/enso/table/operations/Distinct.java
@ -1,16 +1,19 @@
 package org.enso.table.operations;

-import java.util.*;
-
 import org.enso.base.text.TextFoldingStrategy;
 import org.enso.table.data.column.storage.Storage;
 import org.enso.table.data.index.MultiValueKeyBase;
 import org.enso.table.data.index.UnorderedMultiValueKey;
 import org.enso.table.data.table.Column;
-import org.enso.table.data.table.problems.AggregatedProblems;
 import org.enso.table.data.table.problems.FloatingPointGrouping;
+import org.enso.table.problems.AggregatedProblems;
 import org.enso.table.util.ConstantList;

+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.HashSet;
+import java.util.List;
+
 public class Distinct {
  /** Creates a row mask containing only the first row from sets of rows grouped by key columns. */
  public static BitSet buildDistinctRowsMask(
@ -29,7 +32,11 @@ public class Distinct {
        UnorderedMultiValueKey key = new UnorderedMultiValueKey(storage, i, strategies);

        if (key.hasFloatValues()) {
-          problems.add(new FloatingPointGrouping("Distinct", i));
+          final int row = i;
+          key.floatColumnPositions()
+              .forEach(
+                  columnIx ->
+                      problems.add(new FloatingPointGrouping(keyColumns[columnIx].getName(), row)));
        }

        if (!visitedRows.contains(key)) {
--- a/std-bits/table/src/main/java/org/enso/table/data/table/problems/AggregatedProblems.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/problems/AggregatedProblems.java
@ -1,6 +1,6 @@
-package org.enso.table.data.table.problems;
+package org.enso.table.problems;

-import org.enso.table.problems.Problem;
+import org.enso.table.data.table.problems.ColumnAggregatedProblems;

 import java.util.ArrayList;
 import java.util.List;
--- a/test/Table_Tests/src/Common_Table_Operations/Distinct_Spec.enso
+++ b/test/Table_Tests/src/Common_Table_Operations/Distinct_Spec.enso
@ -0,0 +1,93 @@
+from Standard.Base import all
+
+from Standard.Table import Column_Selector, Sort_Column, Sort_Column_Selector
+from Standard.Table.Errors import Floating_Point_Grouping
+
+from Standard.Test import Test, Problems
+import Standard.Test.Extensions
+
+from project.Common_Table_Operations.Util import run_default_backend
+
+main = run_default_backend spec
+
+spec setup =
+    table_builder = setup.table_builder
+    materialize = setup.materialize
+    Test.group setup.prefix+"Table.distinct" <|
+        Test.specify "should group by all columns by default" <|
+            a = ["A", ["a", "b", "a", "b", "a", "b"]]
+            b = ["B", [2, 1, 2, 2, 2, 1]]
+            t = table_builder [a, b]
+            r = t.distinct on_problems=Report_Error |> materialize |> _.order_by ["A", "B"]
+            r.at "A" . to_vector . should_equal ["a", "b", "b"]
+            r.at "B" . to_vector . should_equal [2, 1, 2]
+
+        Test.specify "should allow to select distinct rows based on a subset of columns, returning any row from each group" <|
+            a = ["A", ["a", "a", "a", "a", "a", "a"]]
+            b = ["B", [1, 1, 2, 2, 1, 2]]
+            c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]
+            t = table_builder [a, b, c]
+
+            r1 = t.distinct (Column_Selector.By_Name ["A"]) on_problems=Report_Error |> materialize
+            r1.at "A" . to_vector . should_equal ["a"]
+            r1.at "B" . to_vector . should_equal [1]
+            r1.at "C" . to_vector . should_equal [0.1]
+
+            r2 = t.distinct ["A", "B"] on_problems=Report_Error |> materialize |> _.order_by "B"
+            r2.at "A" . to_vector . should_equal ["a", "a"]
+            r2.at "B" . to_vector . should_equal [1, 2]
+            cv = r2.at "C" . to_vector
+            [0.1, 0.2, 0.5].contains (cv.at 0) . should_be_true
+            [0.3, 0.4, 0.6].contains (cv.at 1) . should_be_true
+
+        if setup.test_selection.distinct_returns_first_row_from_group_if_ordered then
+            Test.specify "should allow to select distinct rows based on a subset of columns, returning any first from each group if the table was ordered" <|
+                a = ["A", ["a", "a", "a", "a", "a", "a"]]
+                b = ["B", [1, 1, 2, 2, 1, 2]]
+                c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]
+                t = table_builder [a, b, c] . order_by (Sort_Column_Selector.By_Name [(Sort_Column.Name "C" Sort_Direction.Descending)])
+
+                r2 = t.distinct ["A", "B"] on_problems=Report_Error |> materialize |> _.order_by "B"
+                r2.at "A" . to_vector . should_equal ["a", "a"]
+                r2.at "B" . to_vector . should_equal [1, 2]
+                r2.at "C" . to_vector . should_equal [0.5, 0.6]
+
+        Test.specify "should allow to control case-sensitivity of keys" <|
+            x = ["X", ['A', 'a', 'enso', 'Enso', 'A']]
+            t1 = table_builder [x]
+            d1 = t1.distinct (Column_Selector.By_Name ["X"]) on_problems=Report_Error |> materialize |> _.order_by ["X"]
+            d1.at "X" . to_vector . should_equal ['A', 'Enso', 'a', 'enso']
+
+            d2 = t1.distinct (Column_Selector.By_Name ["X"]) case_sensitivity=Case_Sensitivity.Insensitive on_problems=Report_Error |> materialize |> _.order_by ["X"]
+            v = d2.at "X" . to_vector
+            v.length . should_equal 2
+            v.filter (_.equals_ignore_case "enso") . length . should_equal 1
+            v.filter (_.equals_ignore_case "a") . length . should_equal 1
+
+        Test.specify "should report a warning if the key contains floating point values" <|
+            t1 = table_builder [["X", [3.0, 1.0, 2.0, 2.0, 1.0]]]
+            action1 = t1.distinct on_problems=_
+            tester1 table =
+                v = table.at "X" . to_vector
+                v.length . should_equal 3
+                v.fold 0 (+) . should_equal 6.0
+            problems1 = [Floating_Point_Grouping.Error "X"]
+            Problems.test_problem_handling action1 problems1 tester1
+
+        Test.specify "should handle nulls correctly" <|
+            a = ["A", ["a", Nothing, "b", "a", "b", Nothing, "a", "b"]]
+            b = ["B", [1, 2, 3, 4, 5, 6, 7, 8]]
+            t = table_builder [a, b]
+            r = t.distinct ["A"] on_problems=Report_Error |> materialize |> _.order_by "A"
+            va = r.at "A" . to_vector
+            vb = r.at "B" . to_vector
+            va . should_equal [Nothing, "a", "b"]
+
+            va.at 0 . should_equal Nothing
+            [2, 6].contains (vb.at 0) . should_be_true
+
+            va.at 1 . should_equal "a"
+            [1, 4, 7].contains (vb.at 1) . should_be_true
+
+            va.at 2 . should_equal "b"
+            [3, 5, 8].contains (vb.at 2) . should_be_true
--- a/test/Table_Tests/src/Common_Table_Operations/Integration_Tests.enso
+++ b/test/Table_Tests/src/Common_Table_Operations/Integration_Tests.enso
@ -0,0 +1,118 @@
+from Standard.Base import all
+
+# We hide the table constructor as instead we are supposed to use `table_builder` which is backend-agnostic.
+from Standard.Table import all hiding Table
+from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Group_By, Count, Sum
+
+from Standard.Test import Test, Problems
+import Standard.Test.Extensions
+
+from project.Common_Table_Operations.Util import run_default_backend
+
+main = run_default_backend spec
+
+spec setup =
+    table_builder = setup.table_builder
+    materialize = setup.materialize
+    Test.group setup.prefix+" Interactions Between various operations" <|
+        Test.specify "aggregates and joins" <|
+            t1 = table_builder [["Count", [1, 2, 3]], ["Class", ["X", "Y", "Z"]]]
+            t2 = table_builder [["Letter", ["A", "B", "A", "A", "C", "A", "C", "D", "D", "B", "B"]]]
+
+            t3 = t2.aggregate [Group_By "Letter", Count]
+            t4 = t3.join t1 on="Count" join_kind=Join_Kind.Left_Outer |> materialize |> _.order_by "Letter"
+            t4.columns.map .name . should_equal ["Letter", "Count", "Class"]
+            rows = t4.rows . map .to_vector
+            rows.at 0 . should_equal ["A", 4, Nothing]
+            rows.at 1 . should_equal ["B", 3, "Z"]
+            rows.at 2 . should_equal ["C", 2, "Y"]
+            rows.at 3 . should_equal ["D", 2, "Y"]
+
+        Test.specify "aggregates and distinct" <|
+            t2 = table_builder [["Letter", ["A", "B", "A", "A", "C", "C"]], ["Points", [2, 5, 2, 1, 10, 3]]]
+
+            t3 = t2.aggregate [Group_By "Letter", Sum "Points"]
+            t4 = t3.distinct "Sum Points" |> materialize |> _.order_by "Sum Points"
+            t4.columns.map .name . should_equal ["Letter", "Sum Points"]
+            t4.row_count . should_equal 2
+
+            rows = t4.rows . map .to_vector
+            r1 = rows.at 0
+            r1.second . should_equal 5
+            ["A", "B"].contains r1.first . should_be_true
+            rows.at 1 . should_equal ["C", 13]
+
+        Test.specify "aggregates and filtering" <|
+            t2 = table_builder [["Letter", ["A", "B", "A", "A", "C", "C", "B"]], ["Points", [2, 5, 2, 1, 10, 3, 0]]]
+
+            t3 = t2.aggregate [Group_By "Letter", Sum "Points"]
+            t4 = t3.filter "Sum Points" (Filter_Condition.Equal 5)  |> materialize |> _.order_by "Letter"
+            t4.columns.map .name . should_equal ["Letter", "Sum Points"]
+            rows = t4.rows . map .to_vector
+            rows.at 0 . should_equal ["A", 5]
+            rows.at 1 . should_equal ["B", 5]
+
+        Test.specify "aggregates and ordering" <|
+            t1 = table_builder [["Letter", ["C", "A", "B", "A", "A", "C", "C", "B"]], ["Points", [0, -100, 5, 2, 1, 10, 3, 0]]]
+            t2 = t1.aggregate [Group_By "Letter", Sum "Points"]
+            t3 = t2.order_by "Sum Points" |> materialize
+            t3.columns.map .name . should_equal ["Letter", "Sum Points"]
+            t3.at "Letter" . to_vector . should_equal ["A", "B", "C"]
+            t3.at "Sum Points" . to_vector . should_equal [-97, 5, 13]
+
+        Test.specify "distinct and ordering" <|
+            t1 = table_builder [["X", [1, 2, 2, 1]], ["Y", ["a", "b", "b", "a"]], ["Z", [1, 2, 3, 4]]]
+
+            # These are 'adversarial' white-box examples constructed knowing that Postgres' DISTINCT ON does not play too well with ORDER BY and it needs to be handled carefully.
+            t2 = t1.order_by "X" . distinct "X" |> materialize
+            t2.row_count . should_equal 2
+            t3 = t1.order_by "Y" . distinct "X" |> materialize
+            t3.row_count . should_equal 2
+            t4 = t1.order_by "Y" . distinct "X" . order_by "Y" |> materialize
+            t4.row_count . should_equal 2
+
+        if setup.test_selection.distinct_returns_first_row_from_group_if_ordered then
+            Test.specify "distinct and ordering if first row is returned after ordering" <|
+                a = ["A", ["a", "a", "a", "a", "a", "a"]]
+                b = ["B", [1, 1, 2, 2, 1, 2]]
+                c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]
+                t = table_builder [a, b, c] . order_by (Sort_Column_Selector.By_Name [(Sort_Column.Name "C" Sort_Direction.Descending)])
+
+                t2 = t.distinct ["A", "B"] on_problems=Report_Error
+                # Now, reverse the order!
+                ## But the distinct was taken under descending order, so that
+                   should be preserved - we will still have _last_ rows from
+                   each group (first in reversed order).
+                t3 = t2.order_by "C"
+                r = t3 |> materialize
+                r.at "A" . to_vector . should_equal ["a", "a"]
+                r.at "B" . to_vector . should_equal [1, 2]
+                r.at "C" . to_vector . should_equal [0.5, 0.6]
+
+            ## It should matter whether we do the filter _before_ or _after_ the
+               distinct operation.
+
+               It is easier to test this if we can rely on distinct returning
+               the first row, if it is returning any row, it is harder to write
+               tests that distinguish the two cases (filter before and after).
+            Test.specify "distinct and filtering" <|
+                a = ["A", ["a", "a", "b", "a", "b"]]
+                b = ["B", [1, 2, 5, 5, 2]]
+                c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5]]
+                t = table_builder [a, b, c] . order_by "C"
+
+                t2 = t.distinct ["A"] on_problems=Report_Error
+                r2 = t2 |> materialize
+                r2.at "A" . to_vector . should_equal ["a", "b"]
+                r2.at "B" . to_vector . should_equal [1, 5]
+
+                t3 = t2.filter "B" (Filter_Condition.Equal 5)
+                r3 = t3 |> materialize
+                r3.at "A" . to_vector . should_equal ["b"]
+                r3.at "B" . to_vector . should_equal [5]
+
+                t4 = t.filter "B" (Filter_Condition.Equal 5)
+                t5 = t4.distinct ["A"] on_problems=Report_Error
+                r5 = t5 |> materialize
+                r5.at "A" . to_vector . should_contain_the_same_elements_as ["b", "a"]
+                r5.at "B" . to_vector . should_equal [5, 5]
--- a/test/Table_Tests/src/Common_Table_Operations/Main.enso
+++ b/test/Table_Tests/src/Common_Table_Operations/Main.enso
@ -2,8 +2,10 @@ from Standard.Base import all

 import project.Common_Table_Operations.Column_Operations_Spec
 import project.Common_Table_Operations.Core_Spec
+import project.Common_Table_Operations.Distinct_Spec
 import project.Common_Table_Operations.Expression_Spec
 import project.Common_Table_Operations.Filter_Spec
+import project.Common_Table_Operations.Integration_Tests
 import project.Common_Table_Operations.Join_Spec
 import project.Common_Table_Operations.Missing_Values_Spec
 import project.Common_Table_Operations.Order_By_Spec
@ -76,7 +78,11 @@ type Test_Selection
       - supports_full_join: Specifies if the backend supports full joins.
         SQLite doesn't so we need to disable them until we implement a proper
         workaround.
-    Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True supports_unicode_normalization=False is_nan_and_nothing_distinct=True supports_full_join=True
+       - distinct_returns_first_row_from_group_if_ordered: If `order_by` was
+         applied before, the distinct operation will return the first row from
+         each group. Guaranteed in the in-memory backend, but may not be
+         supported by all databases.
+    Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True supports_unicode_normalization=False is_nan_and_nothing_distinct=True supports_full_join=True distinct_returns_first_row_from_group_if_ordered=True

 spec setup =
    Core_Spec.spec setup
@ -89,5 +95,7 @@ spec setup =
    Take_Drop_Spec.spec setup
    Expression_Spec.spec detailed=False setup
    Join_Spec.spec setup
+    Distinct_Spec.spec setup
+    Integration_Tests.spec setup

 main = run_default_backend spec
--- a/test/Table_Tests/src/Database/Codegen_Spec.enso
+++ b/test/Table_Tests/src/Database/Codegen_Spec.enso
@ -160,10 +160,10 @@ spec =
    Test.group "[Codegen] Aggregation" <|
        Test.specify "should allow to count rows" <|
            code = t1.aggregate [Group_By "A" "A grp", Count "counter"] . to_sql . prepare
-            code . should_equal ['SELECT "T1"."A" AS "A grp", COUNT(*) AS "counter" FROM "T1" AS "T1" GROUP BY "T1"."A"', []]
+            code . should_equal ['SELECT "T1"."A grp" AS "A grp", "T1"."counter" AS "counter" FROM (SELECT "T1"."A" AS "A grp", COUNT(*) AS "counter" FROM "T1" AS "T1" GROUP BY "T1"."A") AS "T1"', []]

        Test.specify "should allow to group by multiple fields" <|
            code = t1.aggregate [Sum "A" "sum_a", Group_By "C" Nothing, Group_By "B" "B grp"] . to_sql . prepare
-            code . should_equal ['SELECT SUM("T1"."A") AS "sum_a", "T1"."C" AS "C", "T1"."B" AS "B grp" FROM "T1" AS "T1" GROUP BY "T1"."C", "T1"."B"', []]
+            code . should_equal ['SELECT "T1"."sum_a" AS "sum_a", "T1"."C" AS "C", "T1"."B grp" AS "B grp" FROM (SELECT SUM("T1"."A") AS "sum_a", "T1"."C" AS "C", "T1"."B" AS "B grp" FROM "T1" AS "T1" GROUP BY "T1"."C", "T1"."B") AS "T1"', []]

 main = Test_Suite.run_main spec
--- a/test/Table_Tests/src/Database/Postgres_Spec.enso
+++ b/test/Table_Tests/src/Database/Postgres_Spec.enso
@ -85,9 +85,9 @@ postgres_specific_spec connection db_name =
        connection.execute_update 'DROP VIEW "'+vinfo+'";'
        connection.execute_update 'DROP TABLE "'+tinfo+'";'

+    tinfo = Name_Generator.random_name "Tinfo"
+    connection.execute_update 'CREATE TEMPORARY TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL, "doubles" DOUBLE PRECISION)'
    Test.group "[PostgreSQL] Info" <|
-        tinfo = Name_Generator.random_name "Tinfo"
-        connection.execute_update 'CREATE TEMPORARY TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL, "doubles" DOUBLE PRECISION)'
        t = connection.query (SQL_Query.Table_Name tinfo)
        t.insert ["a", Nothing, False, 1.2, 0.000000000001]
        t.insert ["abc", Nothing, Nothing, 1.3, Nothing]
@ -110,7 +110,14 @@ postgres_specific_spec connection db_name =
            t.at "ints" . sql_type . is_definitely_integer . should_be_true
            t.at "bools" . sql_type . is_definitely_boolean . should_be_true
            t.at "reals" . sql_type . is_definitely_double . should_be_true
-        connection.execute_update 'DROP TABLE "'+tinfo+'"'
+
+    Test.group "[PostgreSQL] Dialect-specific codegen" <|
+        Test.specify "should generate queries for the Distinct operation" <|
+            t = connection.query (SQL_Query.Table_Name tinfo)
+            code_template = 'SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."reals" AS "reals", "{Tinfo}"."doubles" AS "doubles" FROM (SELECT DISTINCT ON ("{Tinfo}_inner"."strs") "{Tinfo}_inner"."strs" AS "strs", "{Tinfo}_inner"."ints" AS "ints", "{Tinfo}_inner"."bools" AS "bools", "{Tinfo}_inner"."reals" AS "reals", "{Tinfo}_inner"."doubles" AS "doubles" FROM (SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."reals" AS "reals", "{Tinfo}"."doubles" AS "doubles" FROM "{Tinfo}" AS "{Tinfo}") AS "{Tinfo}_inner") AS "{Tinfo}"'
+            expected_code = code_template.replace "{Tinfo}" tinfo
+            t.distinct ["strs"] . to_sql . prepare . should_equal [expected_code, []]
+    connection.execute_update 'DROP TABLE "'+tinfo+'"'

    Test.group "[PostgreSQL] Table.aggregate should correctly infer result types" <|
        name = Name_Generator.random_name "Ttypes"
--- a/test/Table_Tests/src/Database/SQLite_Spec.enso
+++ b/test/Table_Tests/src/Database/SQLite_Spec.enso
@ -73,9 +73,9 @@ sqlite_specific_spec connection =
            action . should_fail_with SQL_Error.Error
            action.catch.to_text . should_equal "There was an SQL error: [SQLITE_ERROR] SQL error or missing database (no such table: undefined_table). [Query was: SELECT A FROM undefined_table]"

+    tinfo = Name_Generator.random_name "Tinfo"
+    connection.execute_update 'CREATE TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL)'
    Test.group "[SQLite] Metadata" <|
-        tinfo = Name_Generator.random_name "Tinfo"
-        connection.execute_update 'CREATE TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL)'
        t = connection.query (SQL_Query.Table_Name tinfo)
        t.insert ["a", Nothing, False, 1.2]
        t.insert ["abc", Nothing, Nothing, 1.3]
@ -96,6 +96,13 @@ sqlite_specific_spec connection =
            t.at "reals" . sql_type . is_definitely_boolean . should_be_false
            t.at "bools" . sql_type . is_definitely_double . should_be_false

+    Test.group "[SQLite] Dialect-specific codegen" <|
+        Test.specify "should generate queries for the Distinct operation" <|
+            t = connection.query (SQL_Query.Table_Name tinfo)
+            code_template = 'SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."reals" AS "reals" FROM (SELECT "{Tinfo}_inner"."strs" AS "strs", "{Tinfo}_inner"."ints" AS "ints", "{Tinfo}_inner"."bools" AS "bools", "{Tinfo}_inner"."reals" AS "reals" FROM (SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."reals" AS "reals" FROM "{Tinfo}" AS "{Tinfo}") AS "{Tinfo}_inner" GROUP BY "{Tinfo}_inner"."strs") AS "{Tinfo}"'
+            expected_code = code_template.replace "{Tinfo}" tinfo
+            t.distinct ["strs"] . to_sql . prepare . should_equal [expected_code, []]
+
 sqlite_spec connection prefix =
    name_counter = Ref.new 0
    table_builder columns =
--- a/test/Table_Tests/src/In_Memory/Table_Spec.enso
+++ b/test/Table_Tests/src/In_Memory/Table_Spec.enso
@ -709,33 +709,18 @@ spec =
       row of ones sharing the same distinctness key. For database tests (to be
       added later) we can not rely on ordering.
    Test.group "[In-Memory] Table.distinct" <|
-        Test.specify "should allow to select distinct rows based on a subset of columns" <|
+        Test.specify "should allow to select distinct rows based on a subset of columns, returning the first row from each group" <|
            a = ["A", ["a", "a", "a", "a", "a", "a"]]
            b = ["B", [1, 1, 2, 2, 1, 2]]
            c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]
            t = Table.new [a, b, c]

-            r1 = t.distinct (Column_Selector.By_Name ["A"]) on_problems=Report_Error
-            r1.at "A" . to_vector . should_equal ["a"]
-            r1.at "B" . to_vector . should_equal [1]
-            r1.at "C" . to_vector . should_equal [0.1]
-
            r2 = t.distinct (Column_Selector.By_Name ["A", "B"]) on_problems=Report_Error
            r2.at "A" . to_vector . should_equal ["a", "a"]
            r2.at "B" . to_vector . should_equal [1, 2]
            r2.at "C" . to_vector . should_equal [0.1, 0.3]

-            r3 = t.distinct ["A"] on_problems=Report_Error
-            r3.at "A" . to_vector . should_equal ["a"]
-            r3.at "B" . to_vector . should_equal [1]
-            r3.at "C" . to_vector . should_equal [0.1]
-
-            r4 = t.distinct ["A", "B"] on_problems=Report_Error
-            r4.at "A" . to_vector . should_equal ["a", "a"]
-            r4.at "B" . to_vector . should_equal [1, 2]
-            r4.at "C" . to_vector . should_equal [0.1, 0.3]
-
-        Test.specify "should handle nulls correctly" <|
+        Test.specify "should handle nulls correctly and preserve original ordering" <|
            a = ["A", ["a", Nothing, "b", "a", "b", Nothing, "a", "b"]]
            b = ["B", [1, 2, 3, 4, 5, 6, 7, 8]]
            t = Table.new [a, b]
@ -747,7 +732,7 @@ spec =
            t1 = Table.new [["X", ['ś', 's\u0301', 's', 'ś']]]
            t1.distinct . at "X" . to_vector . should_equal ['ś', 's']

-        Test.specify "should allow to control case-sensitivity of keys" <|
+        Test.specify "should allow to control case-sensitivity of keys, correctly handling Unicode folding" <|
            x = ["X", ['A', 'a', 'enso', 'śledź', 'Enso', 'A', 's\u0301ledz\u0301']]
            y = ["Y", [1, 2, 3, 4, 5, 6, 7]]
            t1 = Table.new [x, y]
@ -767,14 +752,14 @@ spec =
            action1 = t1.distinct on_problems=_
            tester1 table =
                table.at "X" . to_vector . should_equal [3.0, 1.0, 2.0]
-            problems1 = [Floating_Point_Grouping.Error "Distinct"]
+            problems1 = [Floating_Point_Grouping.Error "X"]
            Problems.test_problem_handling action1 problems1 tester1

            t2 = Table.new [["X", [1.00000000000001, -0.3, 1.00000000000002, 1.5, 1.00000000000002, 1.00000000000002]]]
            action2 = t2.distinct on_problems=_
            tester2 table =
                table.at "X" . to_vector . should_equal [1.00000000000001, -0.3, 1.00000000000002, 1.5]
-            problems2 = [Floating_Point_Grouping.Error "Distinct"]
+            problems2 = [Floating_Point_Grouping.Error "X"]
            Problems.test_problem_handling action2 problems2 tester2

        Test.specify "should report a warning and report the whole table if no columns were selected" <|
@ -793,14 +778,6 @@ spec =
            t = Table.new [["X", [My.Data 1 2, My.Data 3 4, My.Data 1 2]]]
            t.distinct . should_fail_with Illegal_Argument.Error

-        Test.specify "should group by all columns by default" <|
-            a = ["A", ["a", "b", "a", "b", "a", "b"]]
-            b = ["B", [2, 1, 2, 2, 2, 1]]
-            t = Table.new [a, b]
-            r = t.distinct on_problems=Report_Error
-            r.at "A" . to_vector . should_equal ["a", "b", "b"]
-            r.at "B" . to_vector . should_equal [2, 1, 2]
-
    Test.group "[In-Memory] Table.filter" <|
        Test.specify "by a custom predicate" <|
            t = Table.new [["ix", [1, 2, 3, 4, 5]], ["X", [5, 0, 4, 5, 1]]]