Improve database Table.order_by (#3514)

Implements https://www.pivotaltracker.com/story/show/182195405

Adds support for the Postgres dialect and simple case insensitive collation for SQLite.
This commit is contained in:
Radosław Waśko 2022-06-07 14:31:55 +02:00 committed by GitHub
parent 7d94efa6f2
commit a382e0c15e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 84 additions and 35 deletions

View File

@ -133,6 +133,7 @@
representing `Delimited` files without storing them on the filesystem.][3478]
- [Added rank data, correlation and covariance statistics for `Vector`][3484]
- [Implemented `Table.order_by` for the SQLite backend.][3502]
- [Implemented `Table.order_by` for the PostgreSQL backend.][3514]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -208,6 +209,7 @@
[3478]: https://github.com/enso-org/enso/pull/3478
[3484]: https://github.com/enso-org/enso/pull/3484
[3502]: https://github.com/enso-org/enso/pull/3502
[3514]: https://github.com/enso-org/enso/pull/3514
#### Enso Compiler

View File

@ -5,5 +5,8 @@ from Standard.Base import all
Arguments:
- sort_digits_as_numbers: Sort digits in the text as numbers. Setting this to
`True` results in a "Natural" ordering.
- case_sensitive: Specifies if the ordering should be case case sensitive.
type Text_Ordering (sort_digits_as_numbers:Boolean=False) (case_sensitive:(True|Case_Insensitive)=True)
- case_sensitive: Specifies if the ordering should be case case sensitive. If
set to `Nothing` (the default), it chooses the default ordering for a given
backend. For the In-memory backend, the default ordering is case sensitive.
In databases, the default ordering depends on the database configuration.
type Text_Ordering (sort_digits_as_numbers:Boolean=False) (case_sensitive:(Nothing|True|Case_Insensitive)=Nothing)

View File

@ -45,7 +45,7 @@ type Dialect
One of the purposes of this method is to verify if the expected ordering
settings are supported by the given database backend.
prepare_order_descriptor : Connection -> IR.Internal_Column -> Sort_Direction -> Text_Ordering -> IR.Order_Descriptor
prepare_order_descriptor : IR.Internal_Column -> Sort_Direction -> Text_Ordering -> IR.Order_Descriptor
prepare_order_descriptor = Errors.unimplemented "This is an interface only."
## PRIVATE

View File

@ -6,6 +6,8 @@ from Standard.Database.Data.Sql import Sql_Type
import Standard.Database.Data.Dialect
import Standard.Database.Data.Dialect.Helpers
import Standard.Database.Data.Internal.Base_Generator
import Standard.Database.Data.Internal.IR
from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error
## PRIVATE
@ -49,14 +51,14 @@ type Postgresql_Dialect
One of the purposes of this method is to verify if the expected ordering
settings are supported by the given database backend.
prepare_order_descriptor : Connection -> IR.Internal_Column -> Sort_Direction -> Text_Ordering -> IR.Order_Descriptor
prepare_order_descriptor connection internal_column sort_direction text_ordering =
_ = [connection, internal_column, sort_direction, text_ordering]
Errors.unimplemented "TODO"
prepare_order_descriptor : IR.Internal_Column -> Sort_Direction -> Text_Ordering -> IR.Order_Descriptor
prepare_order_descriptor internal_column sort_direction text_ordering =
here.make_order_descriptor internal_column sort_direction text_ordering
## PRIVATE
make_internal_generator_dialect =
text = [here.starts_with, here.contains, here.ends_with, here.agg_shortest, here.agg_longest]+here.concat_ops
cases = [["LOWER", Base_Generator.make_function "LOWER"], ["UPPER", Base_Generator.make_function "UPPER"]]
text = [here.starts_with, here.contains, here.ends_with, here.agg_shortest, here.agg_longest]+here.concat_ops+cases
counts = [here.agg_count_is_null, here.agg_count_empty, here.agg_count_not_empty, ["COUNT_DISTINCT", here.agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", here.agg_count_distinct_include_null]]
stddev_pop = ["STDDEV_POP", Base_Generator.make_function "stddev_pop"]
@ -218,3 +220,27 @@ make_contains_expr expr substring =
## PRIVATE
contains = Base_Generator.lift_binary_op "contains" here.make_contains_expr
## PRIVATE
make_order_descriptor internal_column sort_direction text_ordering =
nulls = case sort_direction of
Sort_Direction.Ascending -> IR.Nulls_First
Sort_Direction.Descending -> IR.Nulls_Last
case internal_column.sql_type.is_likely_text of
True ->
## In the future we can modify this error to suggest using a custom defined collation.
if text_ordering.sort_digits_as_numbers then Error.throw (Unsupported_Database_Operation_Error "Natural ordering is currently not supported. You may need to materialize the Table to perform this operation.") else
case text_ordering.case_sensitive of
Nothing ->
IR.Order_Descriptor internal_column.expression sort_direction nulls_order=nulls collation=Nothing
True ->
IR.Order_Descriptor internal_column.expression sort_direction nulls_order=nulls collation="ucs_basic"
Case_Insensitive locale -> case Locale.default.java_locale.equals locale.java_locale of
False ->
Error.throw (Unsupported_Database_Operation_Error "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.")
True ->
upper = IR.Operation "UPPER" [internal_column.expression]
folded_expression = IR.Operation "LOWER" [upper]
IR.Order_Descriptor folded_expression sort_direction nulls_order=nulls collation=Nothing
False ->
IR.Order_Descriptor internal_column.expression sort_direction nulls_order=nulls collation=Nothing

View File

@ -49,7 +49,6 @@ type Redshift_Dialect
One of the purposes of this method is to verify if the expected ordering
settings are supported by the given database backend.
prepare_order_descriptor : Connection -> IR.Internal_Column -> Sort_Direction -> Text_Ordering -> IR.Order_Descriptor
prepare_order_descriptor connection internal_column sort_direction text_ordering =
_ = [connection, internal_column, sort_direction, text_ordering]
Errors.unimplemented "TODO"
prepare_order_descriptor : IR.Internal_Column -> Sort_Direction -> Text_Ordering -> IR.Order_Descriptor
prepare_order_descriptor internal_column sort_direction text_ordering =
Postgres.make_order_descriptor internal_column sort_direction text_ordering

View File

@ -49,12 +49,20 @@ type Sqlite_Dialect
One of the purposes of this method is to verify if the expected ordering
settings are supported by the given database backend.
prepare_order_descriptor : Connection -> IR.Internal_Column -> Sort_Direction -> Text_Ordering -> IR.Order_Descriptor
prepare_order_descriptor _ internal_column sort_direction text_ordering = case internal_column.sql_type.is_likely_text of
prepare_order_descriptor : IR.Internal_Column -> Sort_Direction -> Text_Ordering -> IR.Order_Descriptor
prepare_order_descriptor internal_column sort_direction text_ordering = case internal_column.sql_type.is_likely_text of
True ->
if text_ordering.sort_digits_as_numbers then Error.throw (Unsupported_Database_Operation_Error "Natural ordering is not supported by the SQLite backend. You may need to materialize the Table to perform this operation.") else
if text_ordering.case_sensitive != True then Error.throw (Unsupported_Database_Operation_Error "Case insensitive ordering is not supported by the SQLite backend. You may need to materialize the Table to perform this operation.") else
IR.Order_Descriptor internal_column.expression sort_direction collation=Nothing
case text_ordering.case_sensitive of
Nothing ->
IR.Order_Descriptor internal_column.expression sort_direction collation=Nothing
True ->
IR.Order_Descriptor internal_column.expression sort_direction collation="BINARY"
Case_Insensitive locale -> case Locale.default.java_locale.equals locale.java_locale of
False ->
Error.throw (Unsupported_Database_Operation_Error "Case insensitive ordering with custom locale is not supported by the SQLite backend. You may need to materialize the Table to perform this operation.")
True ->
IR.Order_Descriptor internal_column.expression sort_direction collation="NOCASE"
False ->
IR.Order_Descriptor internal_column.expression sort_direction collation=Nothing

View File

@ -465,6 +465,8 @@ type Table
- If values do not implement an ordering, an
`Incomparable_Values_Error`.
Missing (`Nothing`) values are sorted as less than any other object.
> Example
Order the table by the column "alpha" in ascending order.
@ -486,7 +488,7 @@ type Table
propagate dataflow errors correctly. See:
https://www.pivotaltracker.com/story/show/181057718
Panic.throw_wrapped_if_error <|
this.connection.dialect.prepare_order_descriptor this.connection internal_column associated_selector.direction text_ordering
this.connection.dialect.prepare_order_descriptor internal_column associated_selector.direction text_ordering
new_ctx = this.context.add_orders new_order_descriptors
this.updated_context new_ctx

View File

@ -206,7 +206,8 @@ rename_columns internal_columns mapping on_problems =
- text_ordering: The sort methodology to use.
sort_columns : Vector -> Sort_Direction -> Text_Ordering -> Vector
sort_columns internal_columns direction text_ordering =
mapper = case text_ordering.case_sensitive of
case_sensitive = text_ordering.case_sensitive.if_nothing True
mapper = case case_sensitive of
True -> _.name
Case_Insensitive locale ->
col -> col.name.to_case_insensitive_key locale=locale

View File

@ -11,7 +11,7 @@ from Standard.Table.Data.Position as Position_Module import all
import Standard.Table.Data.Sort_Column_Selector
import Standard.Table.Data.Sort_Column
type Test_Selection supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=False order_by_unicode_normalization_by_default=False
type Test_Selection supports_case_sensitive_columns=True order_by=True natural_ordering=False case_insensitive_ordering=True order_by_unicode_normalization_by_default=False case_insensitive_ascii_only=False
## A common test suite for shared operations on the Table API.
@ -527,14 +527,15 @@ spec prefix table_builder test_selection pending=Nothing =
col7 = ["psi", [Nothing, "c01", "c10", "C2"]]
col8 = ["phi", ["śc", Nothing, 's\u0301b', "śa"]]
col9 = ["tau", [32.0, 0.5, -0.1, 1.6]]
table_builder [col1, col2, col3, col4, col5, col6, col7, col8, col9]
col10 = ["rho", ["BB", Nothing, Nothing, "B"]]
table_builder [col1, col2, col3, col4, col5, col6, col7, col8, col9, col10]
Test.specify "should work as shown in the doc examples" <|
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "alpha"])
t1.at "alpha" . to_vector . should_equal [0, 1, 2, 3]
t1.at "gamma" . to_vector . should_equal [4, 3, 2, 1]
t2 = table.order_by (Sort_Column_Selector.By_Index [Sort_Column.Index 1, Sort_Column.Index -7 Sort_Direction.Descending])
t2 = table.order_by (Sort_Column_Selector.By_Index [Sort_Column.Index 1, Sort_Column.Index -8 Sort_Direction.Descending])
t2.at "beta" . to_vector . should_equal ["a", "a", "b", "b"]
t2.at "gamma" . to_vector . should_equal [3, 1, 4, 2]
t2.at "alpha" . to_vector . should_equal [1, 3, 0, 2]
@ -564,13 +565,13 @@ spec prefix table_builder test_selection pending=Nothing =
Problems.test_problem_handling action problems tester
Test.specify "should correctly handle problems: aliased indices" <|
selector = Sort_Column_Selector.By_Index [Sort_Column.Index 1, Sort_Column.Index -8 Sort_Direction.Descending, Sort_Column.Index -7 Sort_Direction.Descending, Sort_Column.Index 2 Sort_Direction.Ascending]
selector = Sort_Column_Selector.By_Index [Sort_Column.Index 1, Sort_Column.Index -9 Sort_Direction.Descending, Sort_Column.Index -8 Sort_Direction.Descending, Sort_Column.Index 2 Sort_Direction.Ascending]
action = table.order_by selector on_problems=_
tester table =
table.at "beta" . to_vector . should_equal ["a", "a", "b", "b"]
table.at "gamma" . to_vector . should_equal [3, 1, 4, 2]
table.at "alpha" . to_vector . should_equal [1, 3, 0, 2]
problems = [Input_Indices_Already_Matched [Sort_Column.Index -8 Sort_Direction.Descending, Sort_Column.Index 2]]
problems = [Input_Indices_Already_Matched [Sort_Column.Index -9 Sort_Direction.Descending, Sort_Column.Index 2]]
Problems.test_problem_handling action problems tester
Test.specify "should correctly handle problems: duplicate names" <|
@ -679,9 +680,11 @@ spec prefix table_builder test_selection pending=Nothing =
t1.at "xi" . to_vector . should_equal [Nothing, 0.5, 1.0, 1.5]
t1.at "alpha" . to_vector . should_equal [1, 0, 3, 2]
t2 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"])
t2.at "psi" . to_vector . should_equal [Nothing, "C2", "c01", "c10"]
t2.at "alpha" . to_vector . should_equal [3, 0, 2, 1]
t2 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "rho"])
t2.at "rho" . to_vector . should_equal [Nothing, Nothing, "B", "BB"]
t3 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "rho" Sort_Direction.Descending])
t3.at "rho" . to_vector . should_equal ["BB", "B", Nothing, Nothing]
Test.specify "should behave as expected with Unicode normalization, depending on the defaults settings" <|
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "phi"])
@ -702,16 +705,21 @@ spec prefix table_builder test_selection pending=Nothing =
t2.at "delta" . to_vector . should_equal ["a03", "a1", "a10", "a2"]
t2.at "alpha" . to_vector . should_equal [0, 2, 3, 1]
# TODO [RW] This test must actually be verified in practice.
Test.specify "should support case insensitive ordering" pending=(if test_selection.case_insensitive_ordering.not then "Case insensitive ordering is not supported.") <|
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "eta"]) text_ordering=(Text_Ordering case_sensitive=Case_Insensitive)
t1.at "eta" . to_vector . should_equal ["Aleph", "alpha", "Beta", "bądź"]
expected = case test_selection.case_insensitive_ascii_only of
True -> ["Aleph", "alpha", "Beta", "bądź"]
False -> ["Aleph", "alpha", "bądź", "Beta"]
t1.at "eta" . to_vector . should_equal expected
t2 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "eta"]) text_ordering=(Text_Ordering case_sensitive=Case_Insensitive (Locale.new "pl" "PL"))
t2.at "eta" . to_vector . should_equal ["Aleph", "alpha", "bądź", "Beta"]
t2 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "eta"]) text_ordering=(Text_Ordering case_sensitive=True)
t2.at "eta" . to_vector . should_equal ["Aleph", "Beta", "alpha", "bądź"]
t3 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "eta"])
t3.at "eta" . to_vector . should_equal ["Aleph", "Beta", "alpha", "bądź"]
t3 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) text_ordering=(Text_Ordering case_sensitive=Case_Insensitive)
t3.at "psi" . to_vector . should_equal [Nothing, "c01", "c10", "C2"]
t4 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi" Sort_Direction.Descending]) text_ordering=(Text_Ordering case_sensitive=True)
t4.at "psi" . to_vector . should_equal ["c10", "c01", "C2", Nothing]
Test.specify "should support natural and case insensitive ordering at the same time" pending=(if (test_selection.natural_ordering.not || test_selection.case_insensitive_ordering.not) then "Natural ordering or case sensitive ordering is not supported.") <|
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "psi"]) text_ordering=(Text_Ordering sort_digits_as_numbers=True case_sensitive=Case_Insensitive)

View File

@ -99,7 +99,7 @@ run_tests connection pending=Nothing =
Common_Spec.spec prefix connection pending=pending
here.postgres_specific_spec connection pending=pending
common_selection = Common_Table_Spec.Test_Selection supports_case_sensitive_columns=True order_by=False
common_selection = Common_Table_Spec.Test_Selection supports_case_sensitive_columns=True order_by_unicode_normalization_by_default=True
Common_Table_Spec.spec prefix table_builder test_selection=common_selection pending=pending
selection = Aggregate_Spec.Test_Selection first_last_row_order=False aggregation_problems=False

View File

@ -62,7 +62,7 @@ spec =
Common_Spec.spec prefix connection
here.sqlite_specific_spec connection
common_selection = Common_Table_Spec.Test_Selection supports_case_sensitive_columns=False order_by=True natural_ordering=False case_insensitive_ordering=False
common_selection = Common_Table_Spec.Test_Selection supports_case_sensitive_columns=False order_by=True natural_ordering=False case_insensitive_ordering=True case_insensitive_ascii_only=True
Common_Table_Spec.spec prefix table_builder test_selection=common_selection
## For now `advanced_stats`, `first_last`, `text_shortest_longest` and

View File

@ -635,7 +635,7 @@ spec =
t_3 = Table.new [c_3_1, c_3_2, c_3_3]
t_3.default_visualization.should_equal Visualization.Id.table
selection = Common_Table_Spec.Test_Selection supports_case_sensitive_columns=True order_by=False
selection = Common_Table_Spec.Test_Selection supports_case_sensitive_columns=True order_by=False natural_ordering=True case_insensitive_ordering=True order_by_unicode_normalization_by_default=True
Common_Table_Spec.spec "[In-Memory] " table_builder=Table.new test_selection=selection
Test.group "Use First Row As Names" <|