From ba56f8e89b84ee815ad0a63a6f981ac17f88d3ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 23 Jul 2024 08:58:11 +0200 Subject: [PATCH] Snowflake Dialect - pt. 7 (#10612) - Closes #9486 - All tests are succeeding or marked pending - Created follow up tickets for things that still need to be addressed, including: - Fixing upload / table update #10609 - Fixing `Count_Distinct` on Boolean columns #10611 - Running the tests on CI is not part of this PR - to be addressed separately --- .../src/Internal/Snowflake_Dialect.enso | 38 ++++++-- .../src/main/resources/default/src/Main.enso | 1 + test/Snowflake_Tests/src/Snowflake_Spec.enso | 92 +++++++----------- .../Aggregate_Spec.enso | 20 ++++ .../Derived_Columns_Spec.enso | 2 +- .../Join/Replace_Spec.enso | 95 ++++++++++--------- .../Text_Cleanse_Spec.enso | 5 +- .../src/Common_Table_Operations/Util.enso | 11 ++- .../Table_Tests/src/Database/Upload_Spec.enso | 19 ++-- test/Table_Tests/src/Util.enso | 4 +- 10 files changed, 155 insertions(+), 132 deletions(-) diff --git a/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Internal/Snowflake_Dialect.enso b/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Internal/Snowflake_Dialect.enso index a1d7439d3f8..64b4129ae52 100644 --- a/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Internal/Snowflake_Dialect.enso +++ b/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Internal/Snowflake_Dialect.enso @@ -4,6 +4,7 @@ from Standard.Base import all import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Base.Errors.Illegal_State.Illegal_State import Standard.Base.Errors.Unimplemented.Unimplemented +import Standard.Base.Runtime.Ref.Ref import Standard.Table.Internal.Problem_Builder.Problem_Builder import Standard.Table.Internal.Vector_Builder.Vector_Builder @@ -167,13 +168,12 @@ type Snowflake_Dialect mapping = self.get_type_mapping source_type = mapping.sql_type_to_value_type column.sql_type_reference.get target_value_type = mapping.sql_type_to_value_type target_type - # Boolean to Numeric casts need special handling: - transformed_expression = case source_type.is_boolean && target_value_type.is_numeric of - True -> - SQL_Expression.Operation "IIF" [Internals_Access.column_expression column, SQL_Expression.Literal "1", SQL_Expression.Literal "0"] - False -> Internals_Access.column_expression column - target_type_sql_text = mapping.sql_type_to_text target_type - new_expression = SQL_Expression.Operation "CAST" [transformed_expression, SQL_Expression.Literal target_type_sql_text] + + new_expression = make_custom_cast column source_type target_value_type . if_nothing <| + source_expression = Internals_Access.column_expression column + target_type_sql_text = mapping.sql_type_to_text target_type + SQL_Expression.Operation "CAST" [source_expression, SQL_Expression.Literal target_type_sql_text] + new_sql_type_reference = infer_result_type_from_database_callback new_expression Internal_Column.Value column.name new_sql_type_reference new_expression @@ -699,5 +699,29 @@ make_distinct_extension distinct_expressions = SQL_Builder.code " QUALIFY ROW_NUMBER() OVER (PARTITION BY " ++ joined ++ " ORDER BY 1) = 1 " Context_Extension.Value position=550 expressions=distinct_expressions run_generator=run_generator +## PRIVATE + Returns a custom cast expression if it is needed for a specific pair of types, + or Nothing if the default cast is sufficient. +make_custom_cast (column : Internal_Column) (source_value_type : Value_Type) (target_value_type : Value_Type) -> SQL_Expression | Nothing = + result = Ref.new Nothing + + # Custom expression for boolean to float cast, as regular cast does not support it. + if source_value_type.is_boolean && target_value_type.is_floating_point then + result.put <| + SQL_Expression.Operation "IIF" [Internals_Access.column_expression column, SQL_Expression.Literal "1", SQL_Expression.Literal "0"] + + # If the text length is bounded, we need to add a `LEFT` call to truncate to desired length avoiding errors. + if target_value_type.is_text && target_value_type.size.is_nothing.not then + # But we only do so if the source type was also text. + # For any other source type, we keep the original behaviour - failing to convert if the text representation would not fit. + if source_value_type.is_text then result.put <| + max_size = (target_value_type.size : Integer) + truncated = SQL_Expression.Operation "LEFT" [Internals_Access.column_expression column, SQL_Expression.Literal max_size.to_text] + # We still need a cast to ensure the Value_Type gets the max size in it - LEFT returns no size limit unfortunately. + target_type_name = "VARCHAR(" + max_size.to_text + ")" + SQL_Expression.Operation "CAST" [truncated, SQL_Expression.Literal target_type_name] + + result.get + ## PRIVATE snowflake_dialect_name = "Snowflake" diff --git a/lib/scala/pkg/src/main/resources/default/src/Main.enso b/lib/scala/pkg/src/main/resources/default/src/Main.enso index 68123049992..4881697300e 100644 --- a/lib/scala/pkg/src/main/resources/default/src/Main.enso +++ b/lib/scala/pkg/src/main/resources/default/src/Main.enso @@ -3,6 +3,7 @@ from Standard.Table import all from Standard.Database import all from Standard.AWS import all from Standard.Google_Api import all +from Standard.Snowflake import all import Standard.Visualization main = diff --git a/test/Snowflake_Tests/src/Snowflake_Spec.enso b/test/Snowflake_Tests/src/Snowflake_Spec.enso index b4aa1438ea0..836fb73124b 100644 --- a/test/Snowflake_Tests/src/Snowflake_Spec.enso +++ b/test/Snowflake_Tests/src/Snowflake_Spec.enso @@ -184,7 +184,7 @@ snowflake_specific_spec suite_builder default_connection db_name setup = # The integer column is treated as NUMBER(38, 0) in Snowflake so the value type reflects that: i.at "Value Type" . to_vector . should_equal [Value_Type.Char, Value_Type.Decimal 38 0, Value_Type.Boolean, Value_Type.Float] - group_builder.specify "should return Table information, also for aggregated results" <| + group_builder.specify "should return Table information, also for aggregated results" pending="TODO: fix https://github.com/enso-org/enso/issues/10611" <| i = data.t.aggregate columns=[Aggregate_Column.Concatenate "strs", Aggregate_Column.Sum "ints", Aggregate_Column.Count_Distinct "bools"] . column_info i.at "Column" . to_vector . should_equal ["Concatenate strs", "Sum ints", "Count Distinct bools"] i.at "Items Count" . to_vector . should_equal [1, 1, 1] @@ -240,7 +240,7 @@ snowflake_specific_spec suite_builder default_connection db_name setup = # We expect warnings about coercing Decimal types w1 = Problems.expect_warning Inexact_Type_Coercion t1 w1.requested_type . should_equal (Value_Type.Decimal 24 -3) - w1.actual_type . should_equal (Value_Type.Decimal 38 0) + w1.actual_type . should_equal (Value_Type.Decimal Nothing Nothing) t1.update_rows (Table.new [["d1", [1.2345678910]], ["d2", [12.3456]], ["d3", [1234567.8910]], ["f", [1.5]]]) update_action=Update_Action.Insert . should_succeed @@ -257,18 +257,6 @@ snowflake_specific_spec suite_builder default_connection db_name setup = m1.at "d3" . to_vector . should_equal [1234568] m1.at "f" . to_vector . should_equal [1.5] - suite_builder.group "[Snowflake] Dialect-specific codegen" group_builder-> - data = Snowflake_Info_Data.setup default_connection - - group_builder.teardown <| - data.teardown - - group_builder.specify "should generate queries for the Distinct operation" <| - t = data.connection.query (SQL_Query.Table_Name data.tinfo) - code_template = 'SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."doubles" AS "doubles" FROM (SELECT DISTINCT ON ("{Tinfo}_inner"."strs") "{Tinfo}_inner"."strs" AS "strs", "{Tinfo}_inner"."ints" AS "ints", "{Tinfo}_inner"."bools" AS "bools", "{Tinfo}_inner"."doubles" AS "doubles" FROM (SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."doubles" AS "doubles" FROM "{Tinfo}" AS "{Tinfo}") AS "{Tinfo}_inner") AS "{Tinfo}"' - expected_code = code_template.replace "{Tinfo}" data.tinfo - t.distinct ["strs"] . to_sql . prepare . should_equal [expected_code, []] - suite_builder.group "[Snowflake] Table.aggregate should correctly infer result types" group_builder-> data = Snowflake_Aggregate_Data.setup default_connection @@ -284,8 +272,13 @@ snowflake_specific_spec suite_builder default_connection db_name setup = group_builder.specify "Counts" <| r = data.t.aggregate columns=[Aggregate_Column.Count, Aggregate_Column.Count_Empty "txt", Aggregate_Column.Count_Not_Empty "txt", Aggregate_Column.Count_Distinct "i1", Aggregate_Column.Count_Not_Nothing "i2", Aggregate_Column.Count_Nothing "i3"] r.column_count . should_equal 6 - r.columns.each column-> - column.value_type . should_equal (Value_Type.Decimal 18 0) + + r.at "Count" . value_type . should_equal (Value_Type.Decimal 18 0) + r.at "Count Empty txt" . value_type . should_equal (Value_Type.Decimal 13 0) + r.at "Count Not Empty txt" . value_type . should_equal (Value_Type.Decimal 13 0) + r.at "Count Distinct i1" . value_type . should_equal (Value_Type.Decimal 18 0) + r.at "Count Not Nothing i2" . value_type . should_equal (Value_Type.Decimal 18 0) + r.at "Count Nothing i3" . value_type . should_equal (Value_Type.Decimal 13 0) group_builder.specify "Sum" <| r = data.t.aggregate columns=[Aggregate_Column.Sum "i1", Aggregate_Column.Sum "i2", Aggregate_Column.Sum "i3", Aggregate_Column.Sum "i4", Aggregate_Column.Sum "r1", Aggregate_Column.Sum "r2"] @@ -308,24 +301,14 @@ snowflake_specific_spec suite_builder default_connection db_name setup = suite_builder.group "[Snowflake] Warning/Error handling" group_builder-> - group_builder.specify "query warnings should be propagated" <| - long_name = (Name_Generator.random_name "T") + ("a" * 100) - r = default_connection.get.execute_update 'CREATE TEMPORARY TABLE "'+long_name+'" ("A" VARCHAR)' - w1 = Problems.expect_only_warning SQL_Warning r - # The display text may itself be truncated, so we just check the first words. - w1.to_display_text . should_contain "identifier" - # And check the full message for words that could be truncated in short message. - w1.message . should_contain "truncated to" - - table = default_connection.get.query (SQL_Query.Raw_SQL 'SELECT 1 AS "'+long_name+'"') - w2 = Problems.expect_only_warning SQL_Warning table - w2.message . should_contain "truncated" - effective_name = table.column_names . at 0 - effective_name . should_not_equal long_name - long_name.should_contain effective_name - group_builder.specify "is capable of handling weird tables" <| - default_connection.get.execute_update 'CREATE TEMPORARY TABLE "empty-column-name" ("" VARCHAR)' . should_fail_with SQL_Error + default_connection.get.execute_update 'CREATE TEMPORARY TABLE "empty-column-name" ("" VARCHAR)' . should_succeed + t = default_connection.get.query "empty-column-name" + t.columns.length . should_equal 1 + # The column is renamed to something valid upon read: + t.column_names . should_equal ["Column 1"] + # Should be readable: + t.read . at 0 . to_vector . should_equal [] Problems.assume_no_problems <| default_connection.get.execute_update 'CREATE TEMPORARY TABLE "clashing-unicode-names" ("ś" VARCHAR, "s\u0301" INTEGER)' @@ -343,7 +326,9 @@ snowflake_specific_spec suite_builder default_connection db_name setup = r3.catch.cause . should_be_a Duplicate_Output_Column_Names r4 = default_connection.get.query 'SELECT 1 AS ""' - r4.should_fail_with SQL_Error + r4.should_fail_with Illegal_Argument + r4.catch.to_display_text . should_contain "The provided custom SQL query is invalid and may suffer data corruption" + r4.catch.to_display_text . should_contain "The name '' is invalid" suite_builder.group "[Snowflake] Edge Cases" group_builder-> group_builder.specify "materialize should respect the overridden type" pending="TODO" <| @@ -525,31 +510,31 @@ snowflake_specific_spec suite_builder default_connection db_name setup = suite_builder.group "[Snowflake] math functions" group_builder-> group_builder.specify "round, trunc, ceil, floor" <| col = table_builder [["x", [0.1, 0.9, 3.1, 3.9, -0.1, -0.9, -3.1, -3.9]]] . at "x" - col . cast Value_Type.Integer . ceil . value_type . should_equal Value_Type.Float + col . cast Value_Type.Integer . ceil . value_type . should_equal (Value_Type.Decimal 38 0) col . cast Value_Type.Float . round . value_type . should_equal Value_Type.Float - col . cast Value_Type.Integer . round . value_type . should_equal Value_Type.Float - col . cast Value_Type.Decimal . round . value_type . should_equal Value_Type.Decimal + col . cast Value_Type.Integer . round . value_type . should_equal (Value_Type.Decimal 38 0) + col . cast Value_Type.Decimal . round . value_type . should_equal (Value_Type.Decimal 38 0) col . cast Value_Type.Float . round 1 . value_type . should_equal Value_Type.Float - col . cast Value_Type.Integer . round 1 . value_type . should_equal Value_Type.Decimal - col . cast Value_Type.Decimal . round 1 . value_type . should_equal Value_Type.Decimal + col . cast Value_Type.Integer . round 1 . value_type . should_equal (Value_Type.Decimal 38 0) + col . cast Value_Type.Decimal . round 1 . value_type . should_equal (Value_Type.Decimal 38 0) col . cast Value_Type.Float . round use_bankers=True . value_type . should_equal Value_Type.Float col . cast Value_Type.Integer . round use_bankers=True . value_type . should_equal Value_Type.Float - col . cast Value_Type.Decimal . round use_bankers=True . value_type . should_equal Value_Type.Decimal + col . cast Value_Type.Decimal . round use_bankers=True . value_type . should_equal Value_Type.Float col . cast Value_Type.Float . ceil . value_type . should_equal Value_Type.Float - col . cast Value_Type.Integer . ceil . value_type . should_equal Value_Type.Float - col . cast Value_Type.Decimal . ceil . value_type . should_equal Value_Type.Decimal + col . cast Value_Type.Integer . ceil . value_type . should_equal (Value_Type.Decimal 38 0) + col . cast Value_Type.Decimal . ceil . value_type . should_equal (Value_Type.Decimal 38 0) col . cast Value_Type.Float . floor . value_type . should_equal Value_Type.Float - col . cast Value_Type.Integer . floor . value_type . should_equal Value_Type.Float - col . cast Value_Type.Decimal . floor . value_type . should_equal Value_Type.Decimal + col . cast Value_Type.Integer . floor . value_type . should_equal (Value_Type.Decimal 38 0) + col . cast Value_Type.Decimal . floor . value_type . should_equal (Value_Type.Decimal 38 0) col . cast Value_Type.Float . truncate . value_type . should_equal Value_Type.Float - col . cast Value_Type.Integer . truncate . value_type . should_equal Value_Type.Float - col . cast Value_Type.Decimal . truncate . value_type . should_equal Value_Type.Decimal + col . cast Value_Type.Integer . truncate . value_type . should_equal (Value_Type.Decimal 38 0) + col . cast Value_Type.Decimal . truncate . value_type . should_equal (Value_Type.Decimal 38 0) do_op n op = table = light_table_builder [["x", [n]]] @@ -578,18 +563,6 @@ snowflake_specific_spec suite_builder default_connection db_name setup = do_op Number.positive_infinity op . should_equal Number.positive_infinity do_op Number.negative_infinity op . should_equal Number.negative_infinity - group_builder.specify "round returns the correct type" <| - do_round 231.2 1 . should_be_a Float - do_round 231.2 0 . should_be_a Float - do_round 231.2 . should_be_a Float - do_round 231.2 -1 . should_be_a Float - - group_builder.specify "round returns the correct type" <| - do_round 231 1 . should_be_a Float - do_round 231 0 . should_be_a Float - do_round 231 . should_be_a Float - do_round 231 -1 . should_be_a Float - type Lazy_Ref Value ~get @@ -604,7 +577,6 @@ add_snowflake_specs suite_builder create_connection_fn db_name = ix = name_counter.get name_counter . put ix+1 name = Name_Generator.random_name "table_"+ix.to_text - in_mem_table = Table.new columns in_mem_table.select_into_database_table (connection.if_nothing default_connection.get) name primary_key=Nothing temporary=True light_table_builder columns = @@ -662,7 +634,7 @@ add_table_specs suite_builder = cloud_setup.with_prepared_environment <| with_secret "my_snowflake_username" base_details.credentials.username username_secret-> with_secret "my_snowflake_password" base_details.credentials.password password_secret-> secret_credentials = Credentials.Username_And_Password username_secret password_secret - details = Snowflake_Details.Snowflake base_details.account_name secret_credentials base_details.database base_details.schema base_details.warehouse + details = Snowflake_Details.Snowflake base_details.account secret_credentials base_details.database base_details.schema base_details.warehouse connection = Database.connect details connection.should_succeed Panic.with_finalizer connection.close <| diff --git a/test/Table_Tests/src/Common_Table_Operations/Aggregate_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Aggregate_Spec.enso index 7c01951849c..c107f102e87 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Aggregate_Spec.enso @@ -1061,6 +1061,26 @@ add_specs suite_builder setup = m1.columns.first.name . should_equal "Count Distinct A B" m1.columns.first.to_vector . should_equal [3] + group_builder.specify "should work correctly with Boolean columns" pending=(if prefix.contains "Snowflake" then "TODO: fix https://github.com/enso-org/enso/issues/10611") <| + table = table_builder [["A", [True, True, True]], ["B", [False, False, False]], ["C", [True, False, True]], ["D", [Nothing, False, True]]] + + t_with_nulls = table.aggregate columns=[..Count_Distinct "A", ..Count_Distinct "B", ..Count_Distinct "C", ..Count_Distinct "D"] + m1 = materialize t_with_nulls + m1.column_count . should_equal 4 + m1.at "Count Distinct A" . to_vector . should_equal [1] + m1.at "Count Distinct B" . to_vector . should_equal [1] + m1.at "Count Distinct C" . to_vector . should_equal [2] + m1.at "Count Distinct D" . to_vector . should_equal [3] + + t_without_nulls = table.aggregate columns=[..Count_Distinct "A" ignore_nothing=True, ..Count_Distinct "B" ignore_nothing=True, ..Count_Distinct "C" ignore_nothing=True, ..Count_Distinct "D" ignore_nothing=True] + m2 = materialize t_without_nulls + m2.column_count . should_equal 4 + m2.at "Count Distinct A" . to_vector . should_equal [1] + m2.at "Count Distinct B" . to_vector . should_equal [1] + m2.at "Count Distinct C" . to_vector . should_equal [2] + # The NULL is ignored, and not counted towards the total + m2.at "Count Distinct D" . to_vector . should_equal [2] + suite_builder.group prefix+"Table.aggregate Standard_Deviation" pending=(resolve_pending test_selection.std_dev) group_builder-> group_builder.specify "should correctly handle single elements" <| r1 = table_builder [["X", [1]]] . aggregate columns=[Standard_Deviation "X" (population=False), Standard_Deviation "X" (population=True)] diff --git a/test/Table_Tests/src/Common_Table_Operations/Derived_Columns_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Derived_Columns_Spec.enso index 5ad5b633646..ca82ef82bc5 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Derived_Columns_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Derived_Columns_Spec.enso @@ -18,7 +18,7 @@ add_specs suite_builder setup = table_builder = setup.light_table_builder pending_datetime = if setup.test_selection.date_time.not then "Date/Time operations are not supported by this backend." suite_builder.group prefix+"(Derived_Columns_Spec) Table.set with Simple_Expression" group_builder-> - group_builder.specify "arithmetics" <| + group_builder.specify "arithmetics" pending=(if prefix.contains "Snowflake" then "TODO: re-enable these once https://github.com/enso-org/enso/pull/10583 is merged") <| t = table_builder [["A", [1, 2]], ["B", [10, 40]]] t.set (Simple_Expression.Simple_Expr (Column_Ref.Name "A") Simple_Calculation.Copy) "C" . at "C" . to_vector . should_equal [1, 2] t.set (..Simple_Expr (..Name "A") ..Copy) "C" . at "C" . to_vector . should_equal [1, 2] diff --git a/test/Table_Tests/src/Common_Table_Operations/Join/Replace_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Join/Replace_Spec.enso index f53b1c48599..a36a2178252 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Join/Replace_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Join/Replace_Spec.enso @@ -14,167 +14,170 @@ import project.Util main filter=Nothing = run_default_backend add_specs filter +type Lazy_Ref + Value ~get add_specs suite_builder setup = prefix = setup.prefix suite_builder.group prefix+"Table.replace" group_builder-> table_builder = build_sorted_table setup - table_builder_typed columns value_type = - cast_columns = columns.map c-> - Column.from_vector (c.at 0) (c.at 1) . cast value_type - build_sorted_table setup cast_columns + table1 = Lazy_Ref.Value <| + table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]] + table2 = Lazy_Ref.Value <| + table_builder [['x', [2, 1, 4, 3]], ['z', [20, 10, 40, 30]]] + table3 = Lazy_Ref.Value <| + table_builder [['x', [1, 2, 3, 4]], ['y', ['a', 'b', 'c', 'd']]] + empty_table = Lazy_Ref.Value <| + in_memory = Table.new [['x', [1]], ['z', [2]]] + empty = in_memory.take 0 + table_builder empty.columns group_builder.specify "should be able to replace values via a lookup table, using from/to column defaults" <| - table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]] - lookup_table = table_builder [['x', [2, 1, 4, 3]], ['z', [20, 10, 40, 30]]] - expected = table_builder [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']]] + table = table1.get + lookup_table = table2.get + expected = Table.new [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']]] result = table.replace lookup_table 'x' . sort ["x", "y"] result . should_equal expected group_builder.specify "should be able to replace values via a lookup table, specifying from/to columns" <| - table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]] + table = table1.get lookup_table = table_builder [['d', [4, 5, 6, 7]], ['x', [2, 1, 4, 3]], ['d2', [5, 6, 7, 8]], ['z', [20, 10, 40, 30]]] - expected = table_builder [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']]] + expected = Table.new [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']]] result = table.replace lookup_table 'x' 'x' 'z' . sort ["x", "y"] result . should_equal expected group_builder.specify "should be able to replace values via a lookup table provided as a Map" <| - table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]] + table = table1.get lookup_table = Dictionary.from_vector [[2, 20], [1, 10], [4, 40], [3, 30]] - expected = table_builder [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']]] + expected = Table.new [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']]] result = table.replace lookup_table 'x' . sort ["x", "y"] result . should_equal expected group_builder.specify "should be able to replace multiple columns" <| table = table_builder [['x', [1, 2, 3, 4, 2]], ['x2', [2, 1, 2, 1, 4]], ['x3', [3, 4, 1, 3, 4]], ['y', ['a', 'b', 'c', 'd', 'e']]] lookup_table = table_builder [['d', [4, 5, 6, 7]], ['x', [2, 1, 4, 3]], ['d2', [5, 6, 7, 8]], ['z', [20, 10, 40, 30]]] - expected = table_builder [['x', [10, 20, 20, 30, 40]], ['x2', [20, 10, 40, 20, 10]], ['x3', [30, 40, 40, 10, 30]], ['y', ['a', 'b', 'e', 'c', 'd']]] + expected = Table.new [['x', [10, 20, 20, 30, 40]], ['x2', [20, 10, 40, 20, 10]], ['x3', [30, 40, 40, 10, 30]], ['y', ['a', 'b', 'e', 'c', 'd']]] result = table.replace lookup_table ['x', 'x2', 'x3'] 'x' 'z' . sort ["x", "y"] result . should_equal expected group_builder.specify "should be able to replace multiple columns selected by regex" <| table = table_builder [['x', [1, 2, 3, 4, 2]], ['x2', [2, 1, 2, 1, 4]], ['x3', [3, 4, 1, 3, 4]], ['y', ['a', 'b', 'c', 'd', 'e']]] lookup_table = table_builder [['d', [4, 5, 6, 7]], ['x', [2, 1, 4, 3]], ['d2', [5, 6, 7, 8]], ['z', [20, 10, 40, 30]]] - expected = table_builder [['x', [10, 20, 20, 30, 40]], ['x2', [20, 10, 40, 20, 10]], ['x3', [30, 40, 40, 10, 30]], ['y', ['a', 'b', 'e', 'c', 'd']]] + expected = Table.new [['x', [10, 20, 20, 30, 40]], ['x2', [20, 10, 40, 20, 10]], ['x3', [30, 40, 40, 10, 30]], ['y', ['a', 'b', 'e', 'c', 'd']]] result = table.replace lookup_table [(regex 'x.*')] 'x' 'z' . sort ["x", "y"] result . should_equal expected group_builder.specify "should be able to replace multiple columns selected by type" <| table = table_builder [['x', [1, 2, 3, 4, 2]], ['x2', [2, 1, 2, 1, 4]], ['x3', [3, 4, 1, 3, 4]], ['y', ['a', 'b', 'c', 'd', 'e']]] lookup_table = table_builder [['d', [4, 5, 6, 7]], ['x', [2, 1, 4, 3]], ['d2', [5, 6, 7, 8]], ['z', [20, 10, 40, 30]]] - expected = table_builder [['x', [10, 20, 20, 30, 40]], ['x2', [20, 10, 40, 20, 10]], ['x3', [30, 40, 40, 10, 30]], ['y', ['a', 'b', 'e', 'c', 'd']]] + expected = Table.new [['x', [10, 20, 20, 30, 40]], ['x2', [20, 10, 40, 20, 10]], ['x3', [30, 40, 40, 10, 30]], ['y', ['a', 'b', 'e', 'c', 'd']]] result = table.replace lookup_table [..By_Type ..Integer] 'x' 'z' . sort ["x", "y"] result . should_equal expected group_builder.specify "should fail with Missing_Input_Columns if the specified columns do not exist" <| - table = table_builder [['x', [1, 2, 3, 4]], ['y', ['a', 'b', 'c', 'd']]] - lookup_table = table_builder [['x', [2, 1, 4, 3]], ['z', [20, 10, 40, 30]]] + table = table3.get + lookup_table = table2.get table.replace lookup_table 'q' 'x' 'z' . should_fail_with Missing_Input_Columns table.replace lookup_table 'x' 'q' 'z' . should_fail_with Missing_Input_Columns table.replace lookup_table 'x' 'x' 'q' . should_fail_with Missing_Input_Columns group_builder.specify "can allow unmatched rows" <| - table = table_builder [['x', [1, 2, 3, 4]], ['y', ['a', 'b', 'c', 'd']]] + table = table3.get lookup_table = table_builder [['x', [4, 3, 1]], ['z', [40, 30, 10]]] - expected = table_builder [['x', [2, 10, 30, 40]], ['y', ['b', 'a', 'c', 'd']]] + expected = Table.new [['x', [2, 10, 30, 40]], ['y', ['b', 'a', 'c', 'd']]] result = table.replace lookup_table 'x' . sort "x" result . should_equal expected group_builder.specify "fails on unmatched rows" <| - table = table_builder [['x', [1, 2, 3, 4]], ['y', ['a', 'b', 'c', 'd']]] + table = table3.get lookup_table = table_builder [['x', [4, 3, 1]], ['z', [40, 30, 10]]] table.replace lookup_table 'x' allow_unmatched_rows=False . should_fail_with Unmatched_Rows_In_Lookup group_builder.specify "fails on non-unique keys" <| - table = table_builder [['x', [1, 2, 3, 4]], ['y', ['a', 'b', 'c', 'd']]] + table = table3.get lookup_table = table_builder [['x', [2, 1, 4, 1, 3]], ['z', [20, 10, 40, 11, 30]]] table.replace lookup_table 'x' . should_fail_with Non_Unique_Key group_builder.specify "should avoid name clashes in the (internally) generated column name" <| - table = table_builder [['duplicate_key', [1, 2, 3, 4]], ['y', ['a', 'b', 'c', 'd']]] - lookup_table = table_builder [['x', [2, 1, 4, 3]], ['z', [20, 10, 40, 30]]] - expected = table_builder [['duplicate_key', [10, 20, 30, 40]], ['y', ['a', 'b', 'c', 'd']]] - result = table.replace lookup_table 'duplicate_key' + table = table3.get.rename_columns [['x', 'duplicate_key']] + lookup_table = table2.get + expected = Table.new [['duplicate_key', [10, 20, 30, 40]], ['y', ['a', 'b', 'c', 'd']]] + result = table.replace lookup_table 'duplicate_key' . sort 'y' result . should_equal expected group_builder.specify "(edge-case) should allow lookup with itself" <| table = table_builder [['x', [2, 1, 4, 3]], ['y', [20, 10, 40, 30]]] - expected = table_builder [['x', [10, 20, 30, 40]], ['y', [10, 20, 30, 40]]] + expected = Table.new [['x', [10, 20, 30, 40]], ['y', [10, 20, 30, 40]]] result = table.replace table 'x' . sort "x" result . should_equal expected group_builder.specify "should not merge columns other than the one specified in the `column` param" <| table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']], ['q', [4, 5, 6, 7, 8]]] lookup_table = table_builder [['x', [2, 1, 4, 3]], ['z', [20, 10, 40, 30]], ['q', [40, 50, 60, 70]]] - expected = table_builder [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']], ['q', [4, 5, 8, 6, 7]]] + expected = Table.new [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']], ['q', [4, 5, 8, 6, 7]]] result = table.replace lookup_table 'x' . sort ["x", "y"] result . should_equal expected group_builder.specify "should fail on null key values in lookup table" <| - table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]] + table = table1.get lookup_table = table_builder [['x', [2, 1, Nothing, 3]], ['z', [20, 10, 40, 30]]] table.replace lookup_table 'x' . should_fail_with Null_Values_In_Key_Columns - group_builder.specify "should fail on an empty lookup table with no columns" <| - table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]] - lookup_table = table_builder [] - table.replace lookup_table 'x' . should_fail_with Illegal_Argument - group_builder.specify "should accept an empty (but well-formed) lookup table if allow_unmatched_rows=True, but expect a warning" <| - table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]] - lookup_table = table_builder_typed [['x', []], ['z', []]] Value_Type.Integer + table = table1.get + lookup_table = empty_table.get t = table.replace lookup_table 'x' . sort ['y'] t . should_equal table Problems.expect_warning (Empty_Error.Error "lookup_table") t group_builder.specify "should throw an error on an empty (but well-formed) lookup table and non-empty base table if allow_unmatched_rows=False" <| - table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]] . sort ['x'] - lookup_table = table_builder_typed [['x', []], ['z', []]] Value_Type.Integer + table = table1.get . sort ['x'] + lookup_table = empty_table.get t = table.replace lookup_table 'x' allow_unmatched_rows=False . sort ['y'] t . should_fail_with Unmatched_Rows_In_Lookup group_builder.specify "should accept an empty (but well-formed) lookup table if the base table is also empty, but expect a warning" <| - table = table_builder_typed [['x', []], ['z', []]] Value_Type.Integer - lookup_table = table_builder_typed [['x', []], ['z', []]] Value_Type.Integer + table = empty_table.get + lookup_table = empty_table.get t = table.replace lookup_table 'x' t . should_equal table Problems.expect_warning (Empty_Error.Error "lookup_table") t group_builder.specify "should accept an empty lookup map, if allow_unmatched_rows=True, but expect a warning" <| - table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]] + table = table1.get t = table.replace Dictionary.empty 'x' t . should_equal table Problems.expect_warning (Empty_Error.Error "lookup_table") t group_builder.specify "should throw an error on an empty lookup map and non-empty base table if allow_unmatched_rows=False" <| - table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]] . sort ['x'] + table = table1.get . sort ['x'] t = table.replace Dictionary.empty 'x' allow_unmatched_rows=False t . should_fail_with Unmatched_Rows_In_Lookup t.catch.example_key_values . should_equal [1] group_builder.specify "should accept an empty lookup map if the base table is also empty, but expect a warning" <| - table = table_builder_typed [['x', []], ['z', []]] Value_Type.Integer + table = empty_table.get t = table.replace Dictionary.empty 'x' t . should_equal table Problems.expect_warning (Empty_Error.Error "lookup_table") t group_builder.specify "should not allow from/to_coumn to specified if the argument is a Map" <| - table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]] + table = table1.get lookup_table = Dictionary.from_vector [[2, 20], [1, 10], [4, 40], [3, 30]] table.replace lookup_table 'x' from_column=8 . should_fail_with Illegal_Argument table.replace lookup_table 'x' to_column=9 . should_fail_with Illegal_Argument table.replace lookup_table 'x' from_column=8 to_column=9 . should_fail_with Illegal_Argument if setup.test_selection.date_time then group_builder.specify "should correctly infer types of columns (Date/Time)" <| - my_table = table_builder [["X", [1, 2, 3, 2]]] + my_table = setup.table_builder [["X", [1, 2, 3, 2]], ["rowid", [1, 2, 3, 4]]] - t1 = my_table.replace (Dictionary.from_vector [[1, Date.new 2021], [2, Date.new 2022], [3, Date.new 2023]]) "X" allow_unmatched_rows=False + t1 = my_table.replace (Dictionary.from_vector [[1, Date.new 2021], [2, Date.new 2022], [3, Date.new 2023]]) "X" allow_unmatched_rows=False . sort "rowid" t1.at "X" . value_type . should_equal Value_Type.Date t1.at "X" . to_vector . should_equal [Date.new 2021, Date.new 2022, Date.new 2023, Date.new 2022] - t2 = my_table.replace (Dictionary.from_vector [[1, Time_Of_Day.new 1 2 3], [2, Time_Of_Day.new 4 5 6], [3, Time_Of_Day.new 7 8 9]]) "X" allow_unmatched_rows=False + t2 = my_table.replace (Dictionary.from_vector [[1, Time_Of_Day.new 1 2 3], [2, Time_Of_Day.new 4 5 6], [3, Time_Of_Day.new 7 8 9]]) "X" allow_unmatched_rows=False . sort "rowid" t2.at "X" . value_type . should_equal Value_Type.Time t2.at "X" . to_vector . should_equal [Time_Of_Day.new 1 2 3, Time_Of_Day.new 4 5 6, Time_Of_Day.new 7 8 9, Time_Of_Day.new 4 5 6] - t3 = my_table.replace (Dictionary.from_vector [[1, Date_Time.new 2021 1 1 1 1 1], [2, Date_Time.new 2022 2 2 2 2 2], [3, Date_Time.new 2023 3 3 3 3 3]]) "X" allow_unmatched_rows=False + t3 = my_table.replace (Dictionary.from_vector [[1, Date_Time.new 2021 1 1 1 1 1], [2, Date_Time.new 2022 2 2 2 2 2], [3, Date_Time.new 2023 3 3 3 3 3]]) "X" allow_unmatched_rows=False . sort "rowid" t3.at "X" . value_type . should_be_a (Value_Type.Date_Time ...) diff --git a/test/Table_Tests/src/Common_Table_Operations/Text_Cleanse_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Text_Cleanse_Spec.enso index 922a2f6d16f..e4b2a231a8d 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Text_Cleanse_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Text_Cleanse_Spec.enso @@ -12,7 +12,7 @@ import Standard.Test.Extensions from Standard.Database.Errors import Unsupported_Database_Operation from Standard.Table import Column, Table, Value_Type, Auto, Bits from Standard.Table.Errors import Invalid_Value_Type, Invalid_Column_Names -from project.Common_Table_Operations.Util import run_default_backend +from project.Common_Table_Operations.Util import run_default_backend, build_sorted_table from Standard.Test import all @@ -24,13 +24,12 @@ main filter=Nothing = run_default_backend add_specs filter add_specs suite_builder setup = prefix = setup.prefix materialize = setup.materialize - table_builder = setup.table_builder suite_builder.group prefix+"Table Text Cleanse" group_builder-> flight = ["Flight", [" BA0123", "BA0123 ", " SG0456 ", "BA 0123", " S G 0 4 5 6 "]] passenger = ["Passenger", [" Albert Einstein", "Marie Curie ", " Isaac Newton ", "Stephen Hawking", " A d a Lovelace "]] ticket_price = ["Ticket Price", [101, 576, 73, 112, 74]] table = Lazy_Ref.Value <| - table_builder [flight, passenger, ticket_price] + build_sorted_table setup [flight, passenger, ticket_price] group_builder.specify "should remove leading whitespace" <| clean_flight = ["Flight", ["BA0123", "BA0123 ", "SG0456 ", "BA 0123", "S G 0 4 5 6 "]] clean_passenger = ["Passenger", ["Albert Einstein", "Marie Curie ", "Isaac Newton ", "Stephen Hawking", "A d a Lovelace "]] diff --git a/test/Table_Tests/src/Common_Table_Operations/Util.enso b/test/Table_Tests/src/Common_Table_Operations/Util.enso index c19ffea17ac..c4317ad89c0 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Util.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Util.enso @@ -1,5 +1,5 @@ from Standard.Base import all -from Standard.Table import Table, Value_Type +from Standard.Table import Column, Table, Value_Type from Standard.Test import all @@ -72,9 +72,12 @@ Error.should_equal_tz_agnostic self other = build_sorted_table setup table_structure = # Workaround for https://github.com/enso-org/enso/issues/10321 if setup.prefix.contains "Snowflake" . not then setup.table_builder table_structure else - row_count = table_structure.first.second.length - new_structure = table_structure+[["row_id", (0.up_to row_count) . to_vector]] - setup.table_builder new_structure . sort "row_id" . remove_columns ["row_id"] + row_count = case table_structure.first of + def : Vector -> def.second.length + col : Column -> col.length + if row_count == 0 then setup.table_builder table_structure else + new_structure = table_structure+[["row_id", (0.up_to row_count) . to_vector]] + setup.table_builder new_structure . sort "row_id" . remove_columns ["row_id"] ## PRIVATE is_float_or_decimal ~column = case column.value_type of diff --git a/test/Table_Tests/src/Database/Upload_Spec.enso b/test/Table_Tests/src/Database/Upload_Spec.enso index 86581e3fbd9..ea95f00eeb4 100644 --- a/test/Table_Tests/src/Database/Upload_Spec.enso +++ b/test/Table_Tests/src/Database/Upload_Spec.enso @@ -65,7 +65,7 @@ database_table_builder name_prefix args primary_key=[] connection = database, so features relying on persistence cannot really be tested. add_specs suite_builder setup make_new_connection persistent_connector=True = prefix = setup.prefix - + snowflake_pending = if prefix.contains "Snowflake" then "Table Upload / Update is not yet fully implemented for Snowflake backend. TODO: https://github.com/enso-org/enso/issues/10609" data = Data.setup make_new_connection suite_builder.group prefix+"(Upload_Spec) Creating an empty table" group_builder-> group_builder.specify "should allow to specify the column names and types" <| @@ -336,7 +336,7 @@ add_specs suite_builder setup make_new_connection persistent_connector=True = tmp_connection.close data.connection.query (SQL_Query.Table_Name name) . at "X" . to_vector . should_equal_ignoring_order [1, 2, 3] - group_builder.specify "should not create any table if upload fails" <| + group_builder.specify "should not create any table if upload fails" pending=snowflake_pending <| normal_column = Column.from_vector "Y" ((100+0).up_to (100+1000)).to_vector exploding_column = make_mock_column "X" (0.up_to 1000).to_vector 512 exploding_table = Table.new [normal_column, exploding_column] @@ -371,7 +371,7 @@ add_specs suite_builder setup make_new_connection persistent_connector=True = r1 = data.in_memory_table.select_into_database_table data.connection (Name_Generator.random_name "primary-key-4") primary_key=["X", "nonexistent"] r1.should_fail_with Missing_Input_Columns - group_builder.specify "should fail if the primary key is not unique" <| + group_builder.specify "should fail if the primary key is not unique" pending=snowflake_pending <| t1 = Table.new [["X", [1, 2, 1]], ["Y", ['b', 'b', 'a']]] run_with_and_without_output <| @@ -525,7 +525,7 @@ add_specs suite_builder setup make_new_connection persistent_connector=True = r1 = db_table.select_into_database_table data.connection (Name_Generator.random_name "copied-table") temporary=True primary_key=["nonexistent"] r1.should_fail_with Missing_Input_Columns - group_builder.specify "should fail when the primary key is not unique" <| + group_builder.specify "should fail when the primary key is not unique" pending=snowflake_pending <| t = Table.new [["X", [1, 2, 1]], ["Y", ['b', 'b', 'a']]] db_table = t.select_into_database_table data.connection (Name_Generator.random_name "source-table") temporary=True primary_key=Nothing Problems.assume_no_problems db_table @@ -748,6 +748,7 @@ add_specs suite_builder setup make_new_connection persistent_connector=True = test_table_append group_builder (data : Data) source_table_builder target_table_builder = + snowflake_pending = if group_builder.name.contains "Snowflake" then "Table Upload / Update is not yet fully implemented for Snowflake backend. TODO: https://github.com/enso-org/enso/issues/10609" group_builder.specify "should be able to append new rows to a table" <| dest = target_table_builder [["X", [1, 2, 3]], ["Y", ['a', 'b', 'c']]] primary_key=["X"] connection=data.connection src = source_table_builder [["X", [4, 5, 6]], ["Y", ['d', 'e', 'f']]] connection=data.connection @@ -929,7 +930,7 @@ test_table_append group_builder (data : Data) source_table_builder target_table_ r5 = dest.update_rows src update_action=Update_Action.Align_Records key_columns=[] r5.should_fail_with Illegal_Argument - group_builder.specify "should fail if the key is not unique in the input table" <| + group_builder.specify "should fail if the key is not unique in the input table" pending=snowflake_pending <| d1 = target_table_builder [["X", [0, 10, 100]]] primary_key=["X"] connection=data.connection d2 = target_table_builder [["X", [0, 10, 100]]] connection=data.connection src = source_table_builder [["X", [1, 1, 3]]] connection=data.connection @@ -1095,7 +1096,7 @@ test_table_append group_builder (data : Data) source_table_builder target_table_ r2 = in_memory_table.update_rows t r2.should_fail_with Illegal_Argument - group_builder.specify "should warn if type widening occurs" <| + group_builder.specify "should warn if type widening occurs" pending=snowflake_pending <| dest = target_table_builder [["X", [3.25, 4.25, 10.0]]] connection=data.connection src = source_table_builder [["X", [1, 2, 0]]] connection=data.connection @@ -1145,7 +1146,7 @@ test_table_append group_builder (data : Data) source_table_builder target_table_ result = dest.update_rows src update_action=Update_Action.Insert key_columns=[] result.should_fail_with Column_Type_Mismatch - group_builder.specify "should not leave behind any garbage temporary tables if the upload fails" <| + group_builder.specify "should not leave behind any garbage temporary tables if the upload fails" pending=snowflake_pending <| dest_name = Name_Generator.random_name "dest-table" # We will make the upload fail by violating the NOT NULL constraint. dest = data.connection.create_table dest_name [Column_Description.Value "X" Value_Type.Integer [Column_Constraint.Not_Null]] temporary=True primary_key=[] . should_succeed @@ -1354,14 +1355,14 @@ tests group_builder (data : Data) make_new_connection source_table_builder (suff Problems.expect_only_warning Dry_Run_Operation r1 # The target table is unaffected. - dest.at "X" . to_vector . should_equal v + dest.at "X" . to_vector . should_equal_ignoring_order v src2 = source_table_builder [["X", [4]]] connection=data.connection r2 = dest.delete_rows src2 key_columns=["X"] allow_duplicate_matches=True # 3 rows would be deleted r2.should_equal 3 Problems.expect_only_warning Dry_Run_Operation r2 - dest.at "X" . to_vector . should_equal v + dest.at "X" . to_vector . should_equal_ignoring_order v if persistent_connector then group_builder.specify "will not overwrite an existing table with a dry-run table if the name is clashing (select_into_database_table)"+suffix <| diff --git a/test/Table_Tests/src/Util.enso b/test/Table_Tests/src/Util.enso index 723b29c7d43..b956fe84dd6 100644 --- a/test/Table_Tests/src/Util.enso +++ b/test/Table_Tests/src/Util.enso @@ -24,13 +24,13 @@ DB_Table.should_equal : DB_Table -> Integer -> Any DB_Table.should_equal self expected frames_to_skip=0 = t0 = self.read t1 = expected.read - t0 . should_equal t1 frames_to_skip + t0 . should_equal t1 frames_to_skip+1 DB_Column.should_equal : DB_Column -> Integer -> Any DB_Column.should_equal self expected frames_to_skip=0 = t0 = self.read t1 = expected.read - t0 . should_equal t1 frames_to_skip + t0 . should_equal t1 frames_to_skip+1 type Test_Failure_Error ## PRIVATE