mirror of
https://github.com/enso-org/enso.git
synced 2024-12-23 13:02:07 +03:00
Implement Distinct for the Database backends (#4027)
Implements https://www.pivotaltracker.com/story/show/182307281
This commit is contained in:
parent
fe1cf9a9ce
commit
0088096a58
@ -270,6 +270,7 @@
|
||||
- [Overhauled the JSON support (now based of JavaScript), `Data.fetch` and other
|
||||
minor tweaks][3987]
|
||||
- [Enable Date, Time and DateTime to be read and written to Excel.][3997]
|
||||
- [Implemented `Table.distinct` for Database backends.][4027]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -423,6 +424,7 @@
|
||||
[3987]: https://github.com/enso-org/enso/pull/3987
|
||||
[3997]: https://github.com/enso-org/enso/pull/3997
|
||||
[4013]: https://github.com/enso-org/enso/pull/4013
|
||||
[4027]: https://github.com/enso-org/enso/pull/4027
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
@ -126,7 +126,7 @@ type Connection
|
||||
Database_Table_Module.make_table self name columns ctx
|
||||
SQL_Query.Table_Name name ->
|
||||
ctx = Context.for_table name (if alias == "" then name else alias)
|
||||
columns = self.jdbc_connection.fetch_columns (self.dialect.generate_sql (Query.Select_All ctx))
|
||||
columns = self.jdbc_connection.fetch_columns (self.dialect.generate_sql (Query.Select Nothing ctx))
|
||||
Database_Table_Module.make_table self name columns ctx
|
||||
|
||||
## Execute the query and load the results into memory as a Table.
|
||||
|
@ -2,6 +2,7 @@ from Standard.Base import all
|
||||
import Standard.Base.Error.Unimplemented.Unimplemented
|
||||
|
||||
from Standard.Table import Aggregate_Column, Join_Kind
|
||||
import Standard.Table.Internal.Problem_Builder.Problem_Builder
|
||||
|
||||
import project.Connection.Connection.Connection
|
||||
import project.Data.SQL_Statement.SQL_Statement
|
||||
@ -56,6 +57,12 @@ type Dialect
|
||||
prepare_join self =
|
||||
Unimplemented.throw "This is an interface only."
|
||||
|
||||
## PRIVATE
|
||||
Prepares a distinct operation.
|
||||
prepare_distinct : Table -> Vector -> Boolean -> Problem_Builder -> Table
|
||||
prepare_distinct self =
|
||||
Unimplemented.throw "This is an interface only."
|
||||
|
||||
## PRIVATE
|
||||
|
||||
The dialect of SQLite databases.
|
||||
|
@ -23,7 +23,7 @@ import Standard.Table.Internal.Aggregate_Column_Helper
|
||||
from Standard.Table.Data.Column import get_item_string
|
||||
from Standard.Table.Data.Table import print_table
|
||||
from Standard.Table.Internal.Filter_Condition_Helpers import make_filter_column
|
||||
from Standard.Table.Errors import Column_Count_Mismatch, No_Index_Set_Error, No_Such_Column
|
||||
from Standard.Table.Errors import Column_Count_Mismatch, No_Index_Set_Error, No_Such_Column, No_Input_Columns_Selected, No_Output_Columns
|
||||
|
||||
import project.Data.Column.Column
|
||||
import project.Data.SQL_Statement.SQL_Statement
|
||||
@ -624,7 +624,9 @@ type Table
|
||||
input table.
|
||||
|
||||
When multiple rows have the same values within the specified columns, the
|
||||
first row of each such set is returned.
|
||||
first row of each such set is returned if possible, but in database
|
||||
backends any row from each set may be returned (for example if the row
|
||||
ordering is unspecified).
|
||||
|
||||
For the in-memory table, the unique rows will be in the order they
|
||||
occurred in the input (this is not guaranteed for database operations).
|
||||
@ -649,8 +651,19 @@ type Table
|
||||
`Floating_Point_Grouping` warning.
|
||||
distinct : Vector Text | Column_Selector -> Case_Sensitivity -> Problem_Behavior -> Table
|
||||
distinct self (columns = Column_Selector.By_Name (self.columns.map .name)) case_sensitivity=Case_Sensitivity.Sensitive on_problems=Report_Warning =
|
||||
_ = [columns, case_sensitivity, on_problems]
|
||||
Error.throw (Unsupported_Database_Operation.Error "`Table.distinct` is not yet implemented for the database backend.")
|
||||
problem_builder = Problem_Builder.new
|
||||
warning_mapper error = case error of
|
||||
No_Output_Columns -> Maybe.Some No_Input_Columns_Selected
|
||||
_ -> Nothing
|
||||
key_columns = Warning.map_warnings_and_errors warning_mapper <|
|
||||
self.columns_helper.select_columns selector=columns reorder=True on_problems=on_problems
|
||||
text_case_insensitive = case case_sensitivity of
|
||||
Case_Sensitivity.Sensitive -> False
|
||||
Case_Sensitivity.Insensitive locale ->
|
||||
Helpers.assume_default_locale locale <|
|
||||
True
|
||||
new_table = self.connection.dialect.prepare_distinct self key_columns text_case_insensitive problem_builder
|
||||
problem_builder.attach_problems_before on_problems new_table
|
||||
|
||||
## Joins two tables according to the specified join conditions.
|
||||
|
||||
@ -800,7 +813,7 @@ type Table
|
||||
new_columns = partitioned.first
|
||||
problems = partitioned.second
|
||||
on_problems.attach_problems_before problems <|
|
||||
self.updated_context_and_columns new_ctx new_columns
|
||||
self.updated_context_and_columns new_ctx new_columns subquery=True
|
||||
|
||||
## Returns a new table with a chosen subset of columns left unchanged and
|
||||
the other columns pivoted to rows with a single name field and a single
|
||||
@ -908,7 +921,7 @@ type Table
|
||||
computing too much we do not pass all the columns but only the first
|
||||
one.
|
||||
setup = self.context.as_subquery self.name [[self.internal_columns.first]]
|
||||
new_ctx = Context.for_subquery setup.first
|
||||
new_ctx = Context.for_subquery setup.subquery
|
||||
query = Query.Select [[column_name, expr]] new_ctx
|
||||
sql = self.connection.dialect.generate_sql query
|
||||
table = self.connection.read_statement sql
|
||||
@ -957,8 +970,8 @@ type Table
|
||||
Naively wrapping each column in a `COUNT(...)` will not
|
||||
always work as aggregates cannot be nested.
|
||||
setup = self.context.as_subquery self.name [self.internal_columns]
|
||||
new_ctx = Context.for_subquery setup.first
|
||||
new_columns = setup.second.first.map column->
|
||||
new_ctx = Context.for_subquery setup.subquery
|
||||
new_columns = setup.new_columns.first.map column->
|
||||
[column.name, SQL_Expression.Operation "COUNT" [column.expression]]
|
||||
query = Query.Select new_columns new_ctx
|
||||
self.connection.dialect.generate_sql query
|
||||
@ -1007,8 +1020,24 @@ type Table
|
||||
Arguments:
|
||||
- ctx: The new context for this table.
|
||||
- internal_columns: The new columns to include in the table.
|
||||
- subquery: A boolean indicating whether the operation should be wrapped
|
||||
in a subquery. This is a simple workaround for operations which may be
|
||||
affected by further operations if not wrapped. For example, a group-by
|
||||
may need to be wrapped in this way if a filter is to be performed on it
|
||||
later on. Ideally, this should be done only on demand, if the
|
||||
subsequent operation needs it and operations like join should try to
|
||||
avoid nesting subqueries without necessity. However, for now, for
|
||||
simplicity, we are always wrapping brittle operations. This may be
|
||||
revised in the future, to generate better and more concise SQL code.
|
||||
updated_context_and_columns : Context -> Vector Internal_Column -> Table
|
||||
updated_context_and_columns self ctx internal_columns = Table.Value self.name self.connection internal_columns ctx
|
||||
updated_context_and_columns self ctx internal_columns subquery=False = case subquery of
|
||||
True ->
|
||||
setup = ctx.as_subquery self.name [internal_columns]
|
||||
new_ctx = Context.for_subquery setup.subquery
|
||||
new_columns = setup.new_columns.first
|
||||
Table.Value self.name self.connection new_columns new_ctx
|
||||
False ->
|
||||
Table.Value self.name self.connection internal_columns ctx
|
||||
|
||||
## PRIVATE
|
||||
|
||||
|
@ -178,7 +178,7 @@ base_dialect =
|
||||
functions = [["COALESCE", make_function "COALESCE"], ["ROW_MIN", make_function "MIN"], ["ROW_MAX", make_function "MAX"]]
|
||||
agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
|
||||
counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]]
|
||||
text = [is_empty, bin "LIKE", simple_equals_ignore_case]
|
||||
text = [is_empty, bin "LIKE", simple_equals_ignore_case, fold_case]
|
||||
nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
|
||||
contains = [["IS_IN", make_is_in], ["IS_IN_COLUMN", make_is_in_column]]
|
||||
base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains)
|
||||
@ -293,6 +293,11 @@ generate_from_part dialect from_spec = case from_spec of
|
||||
sub = generate_query dialect (Query.Select columns context)
|
||||
sub.paren ++ alias dialect as_name
|
||||
|
||||
|
||||
## PRIVATE
|
||||
fold_case = lift_unary_op "FOLD_CASE" arg->
|
||||
code "LOWER(UPPER(" ++ arg ++ "))"
|
||||
|
||||
## PRIVATE
|
||||
simple_equals_ignore_case = Base_Generator.lift_binary_op "equals_ignore_case" a-> b->
|
||||
code "LOWER(UPPER(" ++ a ++ ")) = LOWER(UPPER(" ++ b ++ "))"
|
||||
@ -377,10 +382,16 @@ generate_query : Internal_Dialect -> Query -> Builder
|
||||
generate_query dialect query = case query of
|
||||
Query.Select columns ctx ->
|
||||
gen_column pair = (generate_expression dialect pair.second) ++ alias dialect pair.first
|
||||
cols = SQL.join ", " (columns.map gen_column)
|
||||
code "SELECT " ++ cols ++ generate_select_context dialect ctx
|
||||
Query.Select_All ctx ->
|
||||
code "SELECT * " ++ generate_select_context dialect ctx
|
||||
cols = case columns of
|
||||
Nothing -> code "*"
|
||||
_ -> SQL.join ", " (columns.map gen_column)
|
||||
prefix = case ctx.distinct_on of
|
||||
Nothing -> code ""
|
||||
expressions : Vector ->
|
||||
# TODO I just realised this does not make sense in other backends than Postgres, so we should probably fail in such cases; probably rewrite into a generic modifier? or a transform?
|
||||
generated = SQL.join ", " (expressions.map (generate_expression dialect))
|
||||
code "DISTINCT ON (" ++ generated ++ ") "
|
||||
code "SELECT " ++ prefix ++ cols ++ generate_select_context dialect ctx
|
||||
Query.Insert table_name pairs ->
|
||||
generate_insert_query dialect table_name pairs
|
||||
_ -> Error.throw <| Unsupported_Database_Operation.Error "Unsupported query type."
|
||||
|
@ -0,0 +1,17 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table.Errors import Floating_Point_Grouping
|
||||
|
||||
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
|
||||
## PRIVATE
|
||||
make_distinct_expression text_case_insensitive problem_builder key_column =
|
||||
if key_column.sql_type.is_definitely_double then
|
||||
problem_builder.report_other_warning (Floating_Point_Grouping.Error key_column.name)
|
||||
|
||||
expr = key_column.expression
|
||||
|
||||
needs_case_fold = text_case_insensitive && key_column.sql_type.is_definitely_text
|
||||
case needs_case_fold of
|
||||
True -> SQL_Expression.Operation "FOLD_CASE" [expr]
|
||||
False -> expr
|
@ -105,16 +105,14 @@ prepare_subqueries left right needs_left_indicator needs_right_indicator =
|
||||
# TODO [RW] Not all of these included columns are actually usable from the external context, so
|
||||
# in the future we may consider pruning some of them as additional optimization and simplification of the query
|
||||
# (the only columns that are needed are ones that the later performed join resolution needs).
|
||||
left_config = left.context.as_subquery left_alias [left.internal_columns, left_indicators]
|
||||
right_config = right.context.as_subquery right_alias [right.internal_columns, right_indicators]
|
||||
left_sub = left.context.as_subquery left_alias [left.internal_columns, left_indicators]
|
||||
right_sub = right.context.as_subquery right_alias [right.internal_columns, right_indicators]
|
||||
|
||||
left_subquery = left_config.first
|
||||
new_left_columns = left_config.second.at 0
|
||||
new_left_indicators = left_config.second.at 1
|
||||
right_subquery = right_config.first
|
||||
new_right_columns = right_config.second.at 0
|
||||
new_right_indicators = right_config.second.at 1
|
||||
new_left_columns = left_sub.new_columns.first
|
||||
new_left_indicators = left_sub.new_columns.second
|
||||
new_right_columns = right_sub.new_columns.first
|
||||
new_right_indicators = right_sub.new_columns.second
|
||||
|
||||
left_setup = Join_Subquery_Setup.Value left_subquery new_left_columns left.internal_columns (new_left_indicators.get 0)
|
||||
right_setup = Join_Subquery_Setup.Value right_subquery new_right_columns right.internal_columns (new_right_indicators.get 0)
|
||||
left_setup = Join_Subquery_Setup.Value left_sub.subquery new_left_columns left.internal_columns (new_left_indicators.get 0)
|
||||
right_setup = Join_Subquery_Setup.Value right_sub.subquery new_right_columns right.internal_columns (new_right_indicators.get 0)
|
||||
Pair.new left_setup right_setup
|
||||
|
@ -19,7 +19,7 @@ type Context
|
||||
- alias: An alias name to use for table within the query.
|
||||
for_table : Text -> Text -> Context
|
||||
for_table table_name alias=table_name =
|
||||
Context.Value (From_Spec.Table table_name alias) [] [] [] Nothing
|
||||
Context.Value (From_Spec.Table table_name alias) [] [] [] Nothing Nothing
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -30,7 +30,7 @@ type Context
|
||||
- alias: An alias name to use for table within the query.
|
||||
for_query : Text -> Text -> Context
|
||||
for_query raw_sql alias =
|
||||
Context.Value (From_Spec.Query raw_sql alias) [] [] [] Nothing
|
||||
Context.Value (From_Spec.Query raw_sql alias) [] [] [] Nothing Nothing
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -40,7 +40,7 @@ type Context
|
||||
- subquery: The subquery to lift into a context.
|
||||
for_subquery : From_Spec -> Context
|
||||
for_subquery subquery =
|
||||
Context.Value subquery [] [] [] Nothing
|
||||
Context.Value subquery [] [] [] Nothing Nothing
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -63,7 +63,7 @@ type Context
|
||||
grouped-by columns or aggregate expressions.
|
||||
- limit: an optional maximum number of elements that the equery should
|
||||
return.
|
||||
Value (from_spec : From_Spec) (where_filters : Vector SQL_Expression) (orders : Vector Order_Descriptor) (groups : Vector SQL_Expression) (limit : Nothing | Integer)
|
||||
Value (from_spec : From_Spec) (where_filters : Vector SQL_Expression) (orders : Vector Order_Descriptor) (groups : Vector SQL_Expression) (limit : Nothing | Integer) (distinct_on : Nothing | Vector SQL_Expression)
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -73,7 +73,7 @@ type Context
|
||||
- new_filters: The new filters to set in the query.
|
||||
set_where_filters : Vector SQL_Expression -> Context
|
||||
set_where_filters self new_filters =
|
||||
Context.Value self.from_spec new_filters self.orders self.groups self.limit
|
||||
Context.Value self.from_spec new_filters self.orders self.groups self.limit self.distinct_on
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -83,7 +83,7 @@ type Context
|
||||
- new_orders: The new ordering clauses to set in the query.
|
||||
set_orders : Vector Order_Descriptor -> Context
|
||||
set_orders self new_orders =
|
||||
Context.Value self.from_spec self.where_filters new_orders self.groups self.limit
|
||||
Context.Value self.from_spec self.where_filters new_orders self.groups self.limit self.distinct_on
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -100,7 +100,7 @@ type Context
|
||||
- new_orders: The new ordering clauses to add to the query.
|
||||
add_orders : Vector Order_Descriptor -> Context
|
||||
add_orders self new_orders =
|
||||
Context.Value self.from_spec self.where_filters new_orders+self.orders self.groups self.limit
|
||||
Context.Value self.from_spec self.where_filters new_orders+self.orders self.groups self.limit self.distinct_on
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -110,7 +110,7 @@ type Context
|
||||
- new_groups: The new grouping clauses to set in the query.
|
||||
set_groups : Vector SQL_Expression -> Context
|
||||
set_groups self new_groups =
|
||||
Context.Value self.from_spec self.where_filters self.orders new_groups self.limit
|
||||
Context.Value self.from_spec self.where_filters self.orders new_groups self.limit self.distinct_on
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -120,7 +120,14 @@ type Context
|
||||
- new_limit: The new limit clauses to set in the query.
|
||||
set_limit : (Nothing | Integer) -> Context
|
||||
set_limit self new_limit =
|
||||
Context.Value self.from_spec self.where_filters self.orders self.groups new_limit
|
||||
Context.Value self.from_spec self.where_filters self.orders self.groups new_limit self.distinct_on
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Returns a copy of the context with changed `distinct_on` expressions.
|
||||
set_distinct_on : (Nothing | Vector SQL_Expression) -> Context
|
||||
set_distinct_on self new_distinct_on =
|
||||
Context.Value self.from_spec self.where_filters self.orders self.groups self.limit new_distinct_on
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -136,8 +143,7 @@ type Context
|
||||
to one from the original list but it is valid in the new context.
|
||||
|
||||
This is useful as a preprocessing step between combining queries, for example in a join.
|
||||
# as_subquery : Text -> Vector (Vector Internal_Column) -> [From_Spec.Sub_Query, Vector (Vector Internal_Column)]
|
||||
as_subquery : Text -> Vector Any -> Vector
|
||||
as_subquery : Text -> Vector (Vector Internal_Column) -> Subquery_Setup
|
||||
as_subquery self alias column_lists =
|
||||
rewrite_internal_column : Internal_Column -> Internal_Column
|
||||
rewrite_internal_column column =
|
||||
@ -150,4 +156,7 @@ type Context
|
||||
columns.map column-> [column.name, column.expression]
|
||||
new_from = From_Spec.Sub_Query encapsulated_columns self alias
|
||||
|
||||
[new_from, new_columns]
|
||||
Subquery_Setup.Value new_from new_columns
|
||||
|
||||
type Subquery_Setup
|
||||
Value (subquery : From_Spec) (new_columns : Vector (Vector Internal_Column))
|
||||
|
@ -15,17 +15,10 @@ type Query
|
||||
Arguments:
|
||||
- expressions: List of pairs specifying the columns to materialize; each
|
||||
is a pair whose first element is the name of the materialized column
|
||||
and the second element is the expression to compute.
|
||||
and the second element is the expression to compute. If `Nothing` is
|
||||
provided, all available columns will be selected.
|
||||
- context: The query context, see `Context` for more detail.
|
||||
Select (expressions : Vector (Pair Text SQL_Expression)) (context : Context)
|
||||
|
||||
## PRIVATE
|
||||
|
||||
A Select SQL query that gets all columns in a table.
|
||||
|
||||
Arguments:
|
||||
- context: The query context, see `Context` for more detail.
|
||||
Select_All context
|
||||
Select (expressions : Nothing | Vector (Pair Text SQL_Expression)) (context : Context)
|
||||
|
||||
## PRIVATE
|
||||
|
||||
|
@ -1,8 +1,10 @@
|
||||
from Standard.Base import all hiding First, Last
|
||||
import Standard.Base.Error.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Error.Illegal_State.Illegal_State
|
||||
import Standard.Base.Error.Unimplemented.Unimplemented
|
||||
|
||||
import Standard.Table.Data.Aggregate_Column.Aggregate_Column
|
||||
import Standard.Table.Internal.Problem_Builder.Problem_Builder
|
||||
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all
|
||||
|
||||
import project.Connection.Connection.Connection
|
||||
@ -11,7 +13,9 @@ import project.Data.SQL_Statement.SQL_Statement
|
||||
import project.Data.SQL_Type.SQL_Type
|
||||
import project.Data.Table.Table
|
||||
import project.Internal.Base_Generator
|
||||
import project.Internal.Common.Database_Distinct_Helper
|
||||
import project.Internal.Common.Database_Join_Helper
|
||||
import project.Internal.IR.Context.Context
|
||||
import project.Internal.IR.From_Spec.From_Spec
|
||||
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
import project.Internal.IR.Internal_Column.Internal_Column
|
||||
@ -75,6 +79,18 @@ type Postgres_Dialect
|
||||
prepare_join self connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select =
|
||||
Database_Join_Helper.default_prepare_join connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select
|
||||
|
||||
## PRIVATE
|
||||
Prepares a distinct operation.
|
||||
prepare_distinct : Table -> Vector -> Boolean -> Problem_Builder -> Table
|
||||
prepare_distinct self table key_columns text_case_insensitive problem_builder =
|
||||
setup = table.context.as_subquery table.name+"_inner" [table.internal_columns]
|
||||
new_columns = setup.new_columns.first
|
||||
column_mapping = Map.from_vector <| new_columns.map c-> [c.name, c]
|
||||
new_key_columns = key_columns.map c-> column_mapping.at c.name
|
||||
distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression text_case_insensitive problem_builder)
|
||||
new_context = Context.for_subquery setup.subquery . set_distinct_on distinct_expressions
|
||||
table.updated_context_and_columns new_context new_columns subquery=True
|
||||
|
||||
## PRIVATE
|
||||
make_internal_generator_dialect =
|
||||
cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]]
|
||||
|
@ -3,6 +3,7 @@ import Standard.Base.Error.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Error.Illegal_State.Illegal_State
|
||||
|
||||
import Standard.Table.Data.Aggregate_Column.Aggregate_Column
|
||||
import Standard.Table.Internal.Problem_Builder.Problem_Builder
|
||||
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all
|
||||
|
||||
import project.Connection.Connection.Connection
|
||||
@ -11,11 +12,13 @@ import project.Data.SQL_Statement.SQL_Statement
|
||||
import project.Data.SQL_Type.SQL_Type
|
||||
import project.Data.Table.Table
|
||||
import project.Internal.Base_Generator
|
||||
import project.Internal.IR.Context.Context
|
||||
import project.Internal.IR.From_Spec.From_Spec
|
||||
import project.Internal.IR.Internal_Column.Internal_Column
|
||||
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
|
||||
import project.Internal.IR.Order_Descriptor.Order_Descriptor
|
||||
import project.Internal.IR.Query.Query
|
||||
import project.Internal.Common.Database_Distinct_Helper
|
||||
import project.Internal.Common.Database_Join_Helper
|
||||
|
||||
from project.Data.SQL import code
|
||||
@ -95,6 +98,18 @@ type SQLite_Dialect
|
||||
# Other kinds of joins just fall back to the default logic.
|
||||
Database_Join_Helper.default_prepare_join connection join_kind new_table_name left_subquery right_subquery on_expressions where_expressions columns_to_select
|
||||
|
||||
## PRIVATE
|
||||
Prepares a distinct operation.
|
||||
prepare_distinct : Table -> Vector -> Boolean -> Problem_Builder -> Table
|
||||
prepare_distinct self table key_columns text_case_insensitive problem_builder =
|
||||
setup = table.context.as_subquery table.name+"_inner" [table.internal_columns]
|
||||
new_columns = setup.new_columns.first
|
||||
column_mapping = Map.from_vector <| new_columns.map c-> [c.name, c]
|
||||
new_key_columns = key_columns.map c-> column_mapping.at c.name
|
||||
distinct_expressions = new_key_columns.map (Database_Distinct_Helper.make_distinct_expression text_case_insensitive problem_builder)
|
||||
new_context = Context.for_subquery setup.subquery . set_groups distinct_expressions
|
||||
table.updated_context_and_columns new_context new_columns subquery=True
|
||||
|
||||
## PRIVATE
|
||||
make_internal_generator_dialect =
|
||||
text = [starts_with, contains, ends_with]+concat_ops
|
||||
|
@ -620,7 +620,9 @@ type Table
|
||||
input table.
|
||||
|
||||
When multiple rows have the same values within the specified columns, the
|
||||
first row of each such set is returned.
|
||||
first row of each such set is returned if possible, but in database
|
||||
backends any row from each set may be returned (for example if the row
|
||||
ordering is unspecified).
|
||||
|
||||
For the in-memory table, the unique rows will be in the order they
|
||||
occurred in the input (this is not guaranteed for database operations).
|
||||
|
@ -3,7 +3,7 @@ import Standard.Base.Error.Illegal_Argument.Illegal_Argument
|
||||
|
||||
from project.Errors import Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Row, Mismatched_Quote, Additional_Invalid_Rows, Invalid_Aggregation, Floating_Point_Grouping, Unquoted_Delimiter, Additional_Warnings
|
||||
|
||||
polyglot java import org.enso.table.data.table.problems.AggregatedProblems
|
||||
polyglot java import org.enso.table.problems.AggregatedProblems
|
||||
polyglot java import org.enso.table.data.table.problems.FloatingPointGrouping
|
||||
polyglot java import org.enso.table.data.table.problems.InvalidAggregation
|
||||
polyglot java import org.enso.table.data.table.problems.UnquotedDelimiter
|
||||
|
@ -1,6 +1,6 @@
|
||||
package org.enso.table.aggregations;
|
||||
|
||||
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||
import org.enso.table.problems.AggregatedProblems;
|
||||
import org.enso.table.problems.Problem;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
@ -6,7 +6,7 @@ import org.enso.table.data.column.builder.object.*;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.table.Column;
|
||||
import org.enso.table.data.table.Table;
|
||||
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||
import org.enso.table.problems.AggregatedProblems;
|
||||
import org.enso.table.data.table.problems.FloatingPointGrouping;
|
||||
import org.enso.table.util.ConstantList;
|
||||
|
||||
|
@ -16,7 +16,7 @@ import org.enso.table.data.table.join.IndexJoin;
|
||||
import org.enso.table.data.table.join.JoinCondition;
|
||||
import org.enso.table.data.table.join.JoinResult;
|
||||
import org.enso.table.data.table.join.JoinStrategy;
|
||||
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||
import org.enso.table.problems.AggregatedProblems;
|
||||
import org.enso.table.error.UnexpectedColumnTypeException;
|
||||
import org.enso.table.operations.Distinct;
|
||||
import org.enso.table.util.NameDeduplicator;
|
||||
|
@ -12,7 +12,7 @@ import org.enso.table.data.table.Column;
|
||||
import org.enso.table.data.table.Table;
|
||||
import org.enso.table.data.table.join.scan.Matcher;
|
||||
import org.enso.table.data.table.join.scan.MatcherFactory;
|
||||
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||
import org.enso.table.problems.AggregatedProblems;
|
||||
import org.graalvm.collections.Pair;
|
||||
|
||||
public class IndexJoin implements JoinStrategy {
|
||||
|
@ -1,6 +1,6 @@
|
||||
package org.enso.table.data.table.join;
|
||||
|
||||
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||
import org.enso.table.problems.AggregatedProblems;
|
||||
import org.graalvm.collections.Pair;
|
||||
|
||||
import java.util.List;
|
||||
|
@ -1,6 +1,6 @@
|
||||
package org.enso.table.data.table.join.scan;
|
||||
|
||||
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||
import org.enso.table.problems.AggregatedProblems;
|
||||
|
||||
public interface Matcher {
|
||||
boolean matches(int left, int right);
|
||||
|
@ -13,7 +13,7 @@ import org.enso.table.data.table.join.Between;
|
||||
import org.enso.table.data.table.join.Equals;
|
||||
import org.enso.table.data.table.join.EqualsIgnoreCase;
|
||||
import org.enso.table.data.table.join.JoinCondition;
|
||||
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||
import org.enso.table.problems.AggregatedProblems;
|
||||
import org.enso.table.data.table.problems.FloatingPointGrouping;
|
||||
|
||||
public class MatcherFactory {
|
||||
|
@ -1,16 +1,19 @@
|
||||
package org.enso.table.operations;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.enso.base.text.TextFoldingStrategy;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.index.MultiValueKeyBase;
|
||||
import org.enso.table.data.index.UnorderedMultiValueKey;
|
||||
import org.enso.table.data.table.Column;
|
||||
import org.enso.table.data.table.problems.AggregatedProblems;
|
||||
import org.enso.table.data.table.problems.FloatingPointGrouping;
|
||||
import org.enso.table.problems.AggregatedProblems;
|
||||
import org.enso.table.util.ConstantList;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
||||
public class Distinct {
|
||||
/** Creates a row mask containing only the first row from sets of rows grouped by key columns. */
|
||||
public static BitSet buildDistinctRowsMask(
|
||||
@ -29,7 +32,11 @@ public class Distinct {
|
||||
UnorderedMultiValueKey key = new UnorderedMultiValueKey(storage, i, strategies);
|
||||
|
||||
if (key.hasFloatValues()) {
|
||||
problems.add(new FloatingPointGrouping("Distinct", i));
|
||||
final int row = i;
|
||||
key.floatColumnPositions()
|
||||
.forEach(
|
||||
columnIx ->
|
||||
problems.add(new FloatingPointGrouping(keyColumns[columnIx].getName(), row)));
|
||||
}
|
||||
|
||||
if (!visitedRows.contains(key)) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
package org.enso.table.data.table.problems;
|
||||
package org.enso.table.problems;
|
||||
|
||||
import org.enso.table.problems.Problem;
|
||||
import org.enso.table.data.table.problems.ColumnAggregatedProblems;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
@ -0,0 +1,93 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table import Column_Selector, Sort_Column, Sort_Column_Selector
|
||||
from Standard.Table.Errors import Floating_Point_Grouping
|
||||
|
||||
from Standard.Test import Test, Problems
|
||||
import Standard.Test.Extensions
|
||||
|
||||
from project.Common_Table_Operations.Util import run_default_backend
|
||||
|
||||
main = run_default_backend spec
|
||||
|
||||
spec setup =
|
||||
table_builder = setup.table_builder
|
||||
materialize = setup.materialize
|
||||
Test.group setup.prefix+"Table.distinct" <|
|
||||
Test.specify "should group by all columns by default" <|
|
||||
a = ["A", ["a", "b", "a", "b", "a", "b"]]
|
||||
b = ["B", [2, 1, 2, 2, 2, 1]]
|
||||
t = table_builder [a, b]
|
||||
r = t.distinct on_problems=Report_Error |> materialize |> _.order_by ["A", "B"]
|
||||
r.at "A" . to_vector . should_equal ["a", "b", "b"]
|
||||
r.at "B" . to_vector . should_equal [2, 1, 2]
|
||||
|
||||
Test.specify "should allow to select distinct rows based on a subset of columns, returning any row from each group" <|
|
||||
a = ["A", ["a", "a", "a", "a", "a", "a"]]
|
||||
b = ["B", [1, 1, 2, 2, 1, 2]]
|
||||
c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]
|
||||
t = table_builder [a, b, c]
|
||||
|
||||
r1 = t.distinct (Column_Selector.By_Name ["A"]) on_problems=Report_Error |> materialize
|
||||
r1.at "A" . to_vector . should_equal ["a"]
|
||||
r1.at "B" . to_vector . should_equal [1]
|
||||
r1.at "C" . to_vector . should_equal [0.1]
|
||||
|
||||
r2 = t.distinct ["A", "B"] on_problems=Report_Error |> materialize |> _.order_by "B"
|
||||
r2.at "A" . to_vector . should_equal ["a", "a"]
|
||||
r2.at "B" . to_vector . should_equal [1, 2]
|
||||
cv = r2.at "C" . to_vector
|
||||
[0.1, 0.2, 0.5].contains (cv.at 0) . should_be_true
|
||||
[0.3, 0.4, 0.6].contains (cv.at 1) . should_be_true
|
||||
|
||||
if setup.test_selection.distinct_returns_first_row_from_group_if_ordered then
|
||||
Test.specify "should allow to select distinct rows based on a subset of columns, returning any first from each group if the table was ordered" <|
|
||||
a = ["A", ["a", "a", "a", "a", "a", "a"]]
|
||||
b = ["B", [1, 1, 2, 2, 1, 2]]
|
||||
c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]
|
||||
t = table_builder [a, b, c] . order_by (Sort_Column_Selector.By_Name [(Sort_Column.Name "C" Sort_Direction.Descending)])
|
||||
|
||||
r2 = t.distinct ["A", "B"] on_problems=Report_Error |> materialize |> _.order_by "B"
|
||||
r2.at "A" . to_vector . should_equal ["a", "a"]
|
||||
r2.at "B" . to_vector . should_equal [1, 2]
|
||||
r2.at "C" . to_vector . should_equal [0.5, 0.6]
|
||||
|
||||
Test.specify "should allow to control case-sensitivity of keys" <|
|
||||
x = ["X", ['A', 'a', 'enso', 'Enso', 'A']]
|
||||
t1 = table_builder [x]
|
||||
d1 = t1.distinct (Column_Selector.By_Name ["X"]) on_problems=Report_Error |> materialize |> _.order_by ["X"]
|
||||
d1.at "X" . to_vector . should_equal ['A', 'Enso', 'a', 'enso']
|
||||
|
||||
d2 = t1.distinct (Column_Selector.By_Name ["X"]) case_sensitivity=Case_Sensitivity.Insensitive on_problems=Report_Error |> materialize |> _.order_by ["X"]
|
||||
v = d2.at "X" . to_vector
|
||||
v.length . should_equal 2
|
||||
v.filter (_.equals_ignore_case "enso") . length . should_equal 1
|
||||
v.filter (_.equals_ignore_case "a") . length . should_equal 1
|
||||
|
||||
Test.specify "should report a warning if the key contains floating point values" <|
|
||||
t1 = table_builder [["X", [3.0, 1.0, 2.0, 2.0, 1.0]]]
|
||||
action1 = t1.distinct on_problems=_
|
||||
tester1 table =
|
||||
v = table.at "X" . to_vector
|
||||
v.length . should_equal 3
|
||||
v.fold 0 (+) . should_equal 6.0
|
||||
problems1 = [Floating_Point_Grouping.Error "X"]
|
||||
Problems.test_problem_handling action1 problems1 tester1
|
||||
|
||||
Test.specify "should handle nulls correctly" <|
|
||||
a = ["A", ["a", Nothing, "b", "a", "b", Nothing, "a", "b"]]
|
||||
b = ["B", [1, 2, 3, 4, 5, 6, 7, 8]]
|
||||
t = table_builder [a, b]
|
||||
r = t.distinct ["A"] on_problems=Report_Error |> materialize |> _.order_by "A"
|
||||
va = r.at "A" . to_vector
|
||||
vb = r.at "B" . to_vector
|
||||
va . should_equal [Nothing, "a", "b"]
|
||||
|
||||
va.at 0 . should_equal Nothing
|
||||
[2, 6].contains (vb.at 0) . should_be_true
|
||||
|
||||
va.at 1 . should_equal "a"
|
||||
[1, 4, 7].contains (vb.at 1) . should_be_true
|
||||
|
||||
va.at 2 . should_equal "b"
|
||||
[3, 5, 8].contains (vb.at 2) . should_be_true
|
@ -0,0 +1,118 @@
|
||||
from Standard.Base import all
|
||||
|
||||
# We hide the table constructor as instead we are supposed to use `table_builder` which is backend-agnostic.
|
||||
from Standard.Table import all hiding Table
|
||||
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Group_By, Count, Sum
|
||||
|
||||
from Standard.Test import Test, Problems
|
||||
import Standard.Test.Extensions
|
||||
|
||||
from project.Common_Table_Operations.Util import run_default_backend
|
||||
|
||||
main = run_default_backend spec
|
||||
|
||||
spec setup =
|
||||
table_builder = setup.table_builder
|
||||
materialize = setup.materialize
|
||||
Test.group setup.prefix+" Interactions Between various operations" <|
|
||||
Test.specify "aggregates and joins" <|
|
||||
t1 = table_builder [["Count", [1, 2, 3]], ["Class", ["X", "Y", "Z"]]]
|
||||
t2 = table_builder [["Letter", ["A", "B", "A", "A", "C", "A", "C", "D", "D", "B", "B"]]]
|
||||
|
||||
t3 = t2.aggregate [Group_By "Letter", Count]
|
||||
t4 = t3.join t1 on="Count" join_kind=Join_Kind.Left_Outer |> materialize |> _.order_by "Letter"
|
||||
t4.columns.map .name . should_equal ["Letter", "Count", "Class"]
|
||||
rows = t4.rows . map .to_vector
|
||||
rows.at 0 . should_equal ["A", 4, Nothing]
|
||||
rows.at 1 . should_equal ["B", 3, "Z"]
|
||||
rows.at 2 . should_equal ["C", 2, "Y"]
|
||||
rows.at 3 . should_equal ["D", 2, "Y"]
|
||||
|
||||
Test.specify "aggregates and distinct" <|
|
||||
t2 = table_builder [["Letter", ["A", "B", "A", "A", "C", "C"]], ["Points", [2, 5, 2, 1, 10, 3]]]
|
||||
|
||||
t3 = t2.aggregate [Group_By "Letter", Sum "Points"]
|
||||
t4 = t3.distinct "Sum Points" |> materialize |> _.order_by "Sum Points"
|
||||
t4.columns.map .name . should_equal ["Letter", "Sum Points"]
|
||||
t4.row_count . should_equal 2
|
||||
|
||||
rows = t4.rows . map .to_vector
|
||||
r1 = rows.at 0
|
||||
r1.second . should_equal 5
|
||||
["A", "B"].contains r1.first . should_be_true
|
||||
rows.at 1 . should_equal ["C", 13]
|
||||
|
||||
Test.specify "aggregates and filtering" <|
|
||||
t2 = table_builder [["Letter", ["A", "B", "A", "A", "C", "C", "B"]], ["Points", [2, 5, 2, 1, 10, 3, 0]]]
|
||||
|
||||
t3 = t2.aggregate [Group_By "Letter", Sum "Points"]
|
||||
t4 = t3.filter "Sum Points" (Filter_Condition.Equal 5) |> materialize |> _.order_by "Letter"
|
||||
t4.columns.map .name . should_equal ["Letter", "Sum Points"]
|
||||
rows = t4.rows . map .to_vector
|
||||
rows.at 0 . should_equal ["A", 5]
|
||||
rows.at 1 . should_equal ["B", 5]
|
||||
|
||||
Test.specify "aggregates and ordering" <|
|
||||
t1 = table_builder [["Letter", ["C", "A", "B", "A", "A", "C", "C", "B"]], ["Points", [0, -100, 5, 2, 1, 10, 3, 0]]]
|
||||
t2 = t1.aggregate [Group_By "Letter", Sum "Points"]
|
||||
t3 = t2.order_by "Sum Points" |> materialize
|
||||
t3.columns.map .name . should_equal ["Letter", "Sum Points"]
|
||||
t3.at "Letter" . to_vector . should_equal ["A", "B", "C"]
|
||||
t3.at "Sum Points" . to_vector . should_equal [-97, 5, 13]
|
||||
|
||||
Test.specify "distinct and ordering" <|
|
||||
t1 = table_builder [["X", [1, 2, 2, 1]], ["Y", ["a", "b", "b", "a"]], ["Z", [1, 2, 3, 4]]]
|
||||
|
||||
# These are 'adversarial' white-box examples constructed knowing that Postgres' DISTINCT ON does not play too well with ORDER BY and it needs to be handled carefully.
|
||||
t2 = t1.order_by "X" . distinct "X" |> materialize
|
||||
t2.row_count . should_equal 2
|
||||
t3 = t1.order_by "Y" . distinct "X" |> materialize
|
||||
t3.row_count . should_equal 2
|
||||
t4 = t1.order_by "Y" . distinct "X" . order_by "Y" |> materialize
|
||||
t4.row_count . should_equal 2
|
||||
|
||||
if setup.test_selection.distinct_returns_first_row_from_group_if_ordered then
|
||||
Test.specify "distinct and ordering if first row is returned after ordering" <|
|
||||
a = ["A", ["a", "a", "a", "a", "a", "a"]]
|
||||
b = ["B", [1, 1, 2, 2, 1, 2]]
|
||||
c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]
|
||||
t = table_builder [a, b, c] . order_by (Sort_Column_Selector.By_Name [(Sort_Column.Name "C" Sort_Direction.Descending)])
|
||||
|
||||
t2 = t.distinct ["A", "B"] on_problems=Report_Error
|
||||
# Now, reverse the order!
|
||||
## But the distinct was taken under descending order, so that
|
||||
should be preserved - we will still have _last_ rows from
|
||||
each group (first in reversed order).
|
||||
t3 = t2.order_by "C"
|
||||
r = t3 |> materialize
|
||||
r.at "A" . to_vector . should_equal ["a", "a"]
|
||||
r.at "B" . to_vector . should_equal [1, 2]
|
||||
r.at "C" . to_vector . should_equal [0.5, 0.6]
|
||||
|
||||
## It should matter whether we do the filter _before_ or _after_ the
|
||||
distinct operation.
|
||||
|
||||
It is easier to test this if we can rely on distinct returning
|
||||
the first row, if it is returning any row, it is harder to write
|
||||
tests that distinguish the two cases (filter before and after).
|
||||
Test.specify "distinct and filtering" <|
|
||||
a = ["A", ["a", "a", "b", "a", "b"]]
|
||||
b = ["B", [1, 2, 5, 5, 2]]
|
||||
c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5]]
|
||||
t = table_builder [a, b, c] . order_by "C"
|
||||
|
||||
t2 = t.distinct ["A"] on_problems=Report_Error
|
||||
r2 = t2 |> materialize
|
||||
r2.at "A" . to_vector . should_equal ["a", "b"]
|
||||
r2.at "B" . to_vector . should_equal [1, 5]
|
||||
|
||||
t3 = t2.filter "B" (Filter_Condition.Equal 5)
|
||||
r3 = t3 |> materialize
|
||||
r3.at "A" . to_vector . should_equal ["b"]
|
||||
r3.at "B" . to_vector . should_equal [5]
|
||||
|
||||
t4 = t.filter "B" (Filter_Condition.Equal 5)
|
||||
t5 = t4.distinct ["A"] on_problems=Report_Error
|
||||
r5 = t5 |> materialize
|
||||
r5.at "A" . to_vector . should_contain_the_same_elements_as ["b", "a"]
|
||||
r5.at "B" . to_vector . should_equal [5, 5]
|
@ -2,8 +2,10 @@ from Standard.Base import all
|
||||
|
||||
import project.Common_Table_Operations.Column_Operations_Spec
|
||||
import project.Common_Table_Operations.Core_Spec
|
||||
import project.Common_Table_Operations.Distinct_Spec
|
||||
import project.Common_Table_Operations.Expression_Spec
|
||||
import project.Common_Table_Operations.Filter_Spec
|
||||
import project.Common_Table_Operations.Integration_Tests
|
||||
import project.Common_Table_Operations.Join_Spec
|
||||
import project.Common_Table_Operations.Missing_Values_Spec
|
||||
import project.Common_Table_Operations.Order_By_Spec
|
||||
@ -76,7 +78,11 @@ type Test_Selection
|
||||
- supports_full_join: Specifies if the backend supports full joins.
|
||||
SQLite doesn't so we need to disable them until we implement a proper
|
||||
workaround.
|
||||
Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True supports_unicode_normalization=False is_nan_and_nothing_distinct=True supports_full_join=True
|
||||
- distinct_returns_first_row_from_group_if_ordered: If `order_by` was
|
||||
applied before, the distinct operation will return the first row from
|
||||
each group. Guaranteed in the in-memory backend, but may not be
|
||||
supported by all databases.
|
||||
Config supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False take_drop=True allows_mixed_type_comparisons=True supports_unicode_normalization=False is_nan_and_nothing_distinct=True supports_full_join=True distinct_returns_first_row_from_group_if_ordered=True
|
||||
|
||||
spec setup =
|
||||
Core_Spec.spec setup
|
||||
@ -89,5 +95,7 @@ spec setup =
|
||||
Take_Drop_Spec.spec setup
|
||||
Expression_Spec.spec detailed=False setup
|
||||
Join_Spec.spec setup
|
||||
Distinct_Spec.spec setup
|
||||
Integration_Tests.spec setup
|
||||
|
||||
main = run_default_backend spec
|
||||
|
@ -160,10 +160,10 @@ spec =
|
||||
Test.group "[Codegen] Aggregation" <|
|
||||
Test.specify "should allow to count rows" <|
|
||||
code = t1.aggregate [Group_By "A" "A grp", Count "counter"] . to_sql . prepare
|
||||
code . should_equal ['SELECT "T1"."A" AS "A grp", COUNT(*) AS "counter" FROM "T1" AS "T1" GROUP BY "T1"."A"', []]
|
||||
code . should_equal ['SELECT "T1"."A grp" AS "A grp", "T1"."counter" AS "counter" FROM (SELECT "T1"."A" AS "A grp", COUNT(*) AS "counter" FROM "T1" AS "T1" GROUP BY "T1"."A") AS "T1"', []]
|
||||
|
||||
Test.specify "should allow to group by multiple fields" <|
|
||||
code = t1.aggregate [Sum "A" "sum_a", Group_By "C" Nothing, Group_By "B" "B grp"] . to_sql . prepare
|
||||
code . should_equal ['SELECT SUM("T1"."A") AS "sum_a", "T1"."C" AS "C", "T1"."B" AS "B grp" FROM "T1" AS "T1" GROUP BY "T1"."C", "T1"."B"', []]
|
||||
code . should_equal ['SELECT "T1"."sum_a" AS "sum_a", "T1"."C" AS "C", "T1"."B grp" AS "B grp" FROM (SELECT SUM("T1"."A") AS "sum_a", "T1"."C" AS "C", "T1"."B" AS "B grp" FROM "T1" AS "T1" GROUP BY "T1"."C", "T1"."B") AS "T1"', []]
|
||||
|
||||
main = Test_Suite.run_main spec
|
||||
|
@ -85,9 +85,9 @@ postgres_specific_spec connection db_name =
|
||||
connection.execute_update 'DROP VIEW "'+vinfo+'";'
|
||||
connection.execute_update 'DROP TABLE "'+tinfo+'";'
|
||||
|
||||
tinfo = Name_Generator.random_name "Tinfo"
|
||||
connection.execute_update 'CREATE TEMPORARY TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL, "doubles" DOUBLE PRECISION)'
|
||||
Test.group "[PostgreSQL] Info" <|
|
||||
tinfo = Name_Generator.random_name "Tinfo"
|
||||
connection.execute_update 'CREATE TEMPORARY TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL, "doubles" DOUBLE PRECISION)'
|
||||
t = connection.query (SQL_Query.Table_Name tinfo)
|
||||
t.insert ["a", Nothing, False, 1.2, 0.000000000001]
|
||||
t.insert ["abc", Nothing, Nothing, 1.3, Nothing]
|
||||
@ -110,7 +110,14 @@ postgres_specific_spec connection db_name =
|
||||
t.at "ints" . sql_type . is_definitely_integer . should_be_true
|
||||
t.at "bools" . sql_type . is_definitely_boolean . should_be_true
|
||||
t.at "reals" . sql_type . is_definitely_double . should_be_true
|
||||
connection.execute_update 'DROP TABLE "'+tinfo+'"'
|
||||
|
||||
Test.group "[PostgreSQL] Dialect-specific codegen" <|
|
||||
Test.specify "should generate queries for the Distinct operation" <|
|
||||
t = connection.query (SQL_Query.Table_Name tinfo)
|
||||
code_template = 'SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."reals" AS "reals", "{Tinfo}"."doubles" AS "doubles" FROM (SELECT DISTINCT ON ("{Tinfo}_inner"."strs") "{Tinfo}_inner"."strs" AS "strs", "{Tinfo}_inner"."ints" AS "ints", "{Tinfo}_inner"."bools" AS "bools", "{Tinfo}_inner"."reals" AS "reals", "{Tinfo}_inner"."doubles" AS "doubles" FROM (SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."reals" AS "reals", "{Tinfo}"."doubles" AS "doubles" FROM "{Tinfo}" AS "{Tinfo}") AS "{Tinfo}_inner") AS "{Tinfo}"'
|
||||
expected_code = code_template.replace "{Tinfo}" tinfo
|
||||
t.distinct ["strs"] . to_sql . prepare . should_equal [expected_code, []]
|
||||
connection.execute_update 'DROP TABLE "'+tinfo+'"'
|
||||
|
||||
Test.group "[PostgreSQL] Table.aggregate should correctly infer result types" <|
|
||||
name = Name_Generator.random_name "Ttypes"
|
||||
|
@ -73,9 +73,9 @@ sqlite_specific_spec connection =
|
||||
action . should_fail_with SQL_Error.Error
|
||||
action.catch.to_text . should_equal "There was an SQL error: [SQLITE_ERROR] SQL error or missing database (no such table: undefined_table). [Query was: SELECT A FROM undefined_table]"
|
||||
|
||||
tinfo = Name_Generator.random_name "Tinfo"
|
||||
connection.execute_update 'CREATE TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL)'
|
||||
Test.group "[SQLite] Metadata" <|
|
||||
tinfo = Name_Generator.random_name "Tinfo"
|
||||
connection.execute_update 'CREATE TABLE "'+tinfo+'" ("strs" VARCHAR, "ints" INTEGER, "bools" BOOLEAN, "reals" REAL)'
|
||||
t = connection.query (SQL_Query.Table_Name tinfo)
|
||||
t.insert ["a", Nothing, False, 1.2]
|
||||
t.insert ["abc", Nothing, Nothing, 1.3]
|
||||
@ -96,6 +96,13 @@ sqlite_specific_spec connection =
|
||||
t.at "reals" . sql_type . is_definitely_boolean . should_be_false
|
||||
t.at "bools" . sql_type . is_definitely_double . should_be_false
|
||||
|
||||
Test.group "[SQLite] Dialect-specific codegen" <|
|
||||
Test.specify "should generate queries for the Distinct operation" <|
|
||||
t = connection.query (SQL_Query.Table_Name tinfo)
|
||||
code_template = 'SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."reals" AS "reals" FROM (SELECT "{Tinfo}_inner"."strs" AS "strs", "{Tinfo}_inner"."ints" AS "ints", "{Tinfo}_inner"."bools" AS "bools", "{Tinfo}_inner"."reals" AS "reals" FROM (SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."reals" AS "reals" FROM "{Tinfo}" AS "{Tinfo}") AS "{Tinfo}_inner" GROUP BY "{Tinfo}_inner"."strs") AS "{Tinfo}"'
|
||||
expected_code = code_template.replace "{Tinfo}" tinfo
|
||||
t.distinct ["strs"] . to_sql . prepare . should_equal [expected_code, []]
|
||||
|
||||
sqlite_spec connection prefix =
|
||||
name_counter = Ref.new 0
|
||||
table_builder columns =
|
||||
|
@ -709,33 +709,18 @@ spec =
|
||||
row of ones sharing the same distinctness key. For database tests (to be
|
||||
added later) we can not rely on ordering.
|
||||
Test.group "[In-Memory] Table.distinct" <|
|
||||
Test.specify "should allow to select distinct rows based on a subset of columns" <|
|
||||
Test.specify "should allow to select distinct rows based on a subset of columns, returning the first row from each group" <|
|
||||
a = ["A", ["a", "a", "a", "a", "a", "a"]]
|
||||
b = ["B", [1, 1, 2, 2, 1, 2]]
|
||||
c = ["C", [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]]
|
||||
t = Table.new [a, b, c]
|
||||
|
||||
r1 = t.distinct (Column_Selector.By_Name ["A"]) on_problems=Report_Error
|
||||
r1.at "A" . to_vector . should_equal ["a"]
|
||||
r1.at "B" . to_vector . should_equal [1]
|
||||
r1.at "C" . to_vector . should_equal [0.1]
|
||||
|
||||
r2 = t.distinct (Column_Selector.By_Name ["A", "B"]) on_problems=Report_Error
|
||||
r2.at "A" . to_vector . should_equal ["a", "a"]
|
||||
r2.at "B" . to_vector . should_equal [1, 2]
|
||||
r2.at "C" . to_vector . should_equal [0.1, 0.3]
|
||||
|
||||
r3 = t.distinct ["A"] on_problems=Report_Error
|
||||
r3.at "A" . to_vector . should_equal ["a"]
|
||||
r3.at "B" . to_vector . should_equal [1]
|
||||
r3.at "C" . to_vector . should_equal [0.1]
|
||||
|
||||
r4 = t.distinct ["A", "B"] on_problems=Report_Error
|
||||
r4.at "A" . to_vector . should_equal ["a", "a"]
|
||||
r4.at "B" . to_vector . should_equal [1, 2]
|
||||
r4.at "C" . to_vector . should_equal [0.1, 0.3]
|
||||
|
||||
Test.specify "should handle nulls correctly" <|
|
||||
Test.specify "should handle nulls correctly and preserve original ordering" <|
|
||||
a = ["A", ["a", Nothing, "b", "a", "b", Nothing, "a", "b"]]
|
||||
b = ["B", [1, 2, 3, 4, 5, 6, 7, 8]]
|
||||
t = Table.new [a, b]
|
||||
@ -747,7 +732,7 @@ spec =
|
||||
t1 = Table.new [["X", ['ś', 's\u0301', 's', 'ś']]]
|
||||
t1.distinct . at "X" . to_vector . should_equal ['ś', 's']
|
||||
|
||||
Test.specify "should allow to control case-sensitivity of keys" <|
|
||||
Test.specify "should allow to control case-sensitivity of keys, correctly handling Unicode folding" <|
|
||||
x = ["X", ['A', 'a', 'enso', 'śledź', 'Enso', 'A', 's\u0301ledz\u0301']]
|
||||
y = ["Y", [1, 2, 3, 4, 5, 6, 7]]
|
||||
t1 = Table.new [x, y]
|
||||
@ -767,14 +752,14 @@ spec =
|
||||
action1 = t1.distinct on_problems=_
|
||||
tester1 table =
|
||||
table.at "X" . to_vector . should_equal [3.0, 1.0, 2.0]
|
||||
problems1 = [Floating_Point_Grouping.Error "Distinct"]
|
||||
problems1 = [Floating_Point_Grouping.Error "X"]
|
||||
Problems.test_problem_handling action1 problems1 tester1
|
||||
|
||||
t2 = Table.new [["X", [1.00000000000001, -0.3, 1.00000000000002, 1.5, 1.00000000000002, 1.00000000000002]]]
|
||||
action2 = t2.distinct on_problems=_
|
||||
tester2 table =
|
||||
table.at "X" . to_vector . should_equal [1.00000000000001, -0.3, 1.00000000000002, 1.5]
|
||||
problems2 = [Floating_Point_Grouping.Error "Distinct"]
|
||||
problems2 = [Floating_Point_Grouping.Error "X"]
|
||||
Problems.test_problem_handling action2 problems2 tester2
|
||||
|
||||
Test.specify "should report a warning and report the whole table if no columns were selected" <|
|
||||
@ -793,14 +778,6 @@ spec =
|
||||
t = Table.new [["X", [My.Data 1 2, My.Data 3 4, My.Data 1 2]]]
|
||||
t.distinct . should_fail_with Illegal_Argument.Error
|
||||
|
||||
Test.specify "should group by all columns by default" <|
|
||||
a = ["A", ["a", "b", "a", "b", "a", "b"]]
|
||||
b = ["B", [2, 1, 2, 2, 2, 1]]
|
||||
t = Table.new [a, b]
|
||||
r = t.distinct on_problems=Report_Error
|
||||
r.at "A" . to_vector . should_equal ["a", "b", "b"]
|
||||
r.at "B" . to_vector . should_equal [2, 1, 2]
|
||||
|
||||
Test.group "[In-Memory] Table.filter" <|
|
||||
Test.specify "by a custom predicate" <|
|
||||
t = Table.new [["ix", [1, 2, 3, 4, 5]], ["X", [5, 0, 4, 5, 1]]]
|
||||
|
Loading…
Reference in New Issue
Block a user