Snowflake Dialect - pt. 7 (#10612)

- Closes #9486
- All tests are succeeding or marked pending
- Created follow up tickets for things that still need to be addressed, including:
- Fixing upload / table update #10609
- Fixing `Count_Distinct` on Boolean columns #10611
- Running the tests on CI is not part of this PR - to be addressed separately
This commit is contained in:
Radosław Waśko 2024-07-23 08:58:11 +02:00 committed by GitHub
parent 71bae7e4b0
commit ba56f8e89b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 155 additions and 132 deletions

View File

@ -4,6 +4,7 @@ from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.Errors.Unimplemented.Unimplemented
import Standard.Base.Runtime.Ref.Ref
import Standard.Table.Internal.Problem_Builder.Problem_Builder
import Standard.Table.Internal.Vector_Builder.Vector_Builder
@ -167,13 +168,12 @@ type Snowflake_Dialect
mapping = self.get_type_mapping
source_type = mapping.sql_type_to_value_type column.sql_type_reference.get
target_value_type = mapping.sql_type_to_value_type target_type
# Boolean to Numeric casts need special handling:
transformed_expression = case source_type.is_boolean && target_value_type.is_numeric of
True ->
SQL_Expression.Operation "IIF" [Internals_Access.column_expression column, SQL_Expression.Literal "1", SQL_Expression.Literal "0"]
False -> Internals_Access.column_expression column
new_expression = make_custom_cast column source_type target_value_type . if_nothing <|
source_expression = Internals_Access.column_expression column
target_type_sql_text = mapping.sql_type_to_text target_type
new_expression = SQL_Expression.Operation "CAST" [transformed_expression, SQL_Expression.Literal target_type_sql_text]
SQL_Expression.Operation "CAST" [source_expression, SQL_Expression.Literal target_type_sql_text]
new_sql_type_reference = infer_result_type_from_database_callback new_expression
Internal_Column.Value column.name new_sql_type_reference new_expression
@ -699,5 +699,29 @@ make_distinct_extension distinct_expressions =
SQL_Builder.code " QUALIFY ROW_NUMBER() OVER (PARTITION BY " ++ joined ++ " ORDER BY 1) = 1 "
Context_Extension.Value position=550 expressions=distinct_expressions run_generator=run_generator
## PRIVATE
Returns a custom cast expression if it is needed for a specific pair of types,
or Nothing if the default cast is sufficient.
make_custom_cast (column : Internal_Column) (source_value_type : Value_Type) (target_value_type : Value_Type) -> SQL_Expression | Nothing =
result = Ref.new Nothing
# Custom expression for boolean to float cast, as regular cast does not support it.
if source_value_type.is_boolean && target_value_type.is_floating_point then
result.put <|
SQL_Expression.Operation "IIF" [Internals_Access.column_expression column, SQL_Expression.Literal "1", SQL_Expression.Literal "0"]
# If the text length is bounded, we need to add a `LEFT` call to truncate to desired length avoiding errors.
if target_value_type.is_text && target_value_type.size.is_nothing.not then
# But we only do so if the source type was also text.
# For any other source type, we keep the original behaviour - failing to convert if the text representation would not fit.
if source_value_type.is_text then result.put <|
max_size = (target_value_type.size : Integer)
truncated = SQL_Expression.Operation "LEFT" [Internals_Access.column_expression column, SQL_Expression.Literal max_size.to_text]
# We still need a cast to ensure the Value_Type gets the max size in it - LEFT returns no size limit unfortunately.
target_type_name = "VARCHAR(" + max_size.to_text + ")"
SQL_Expression.Operation "CAST" [truncated, SQL_Expression.Literal target_type_name]
result.get
## PRIVATE
snowflake_dialect_name = "Snowflake"

View File

@ -3,6 +3,7 @@ from Standard.Table import all
from Standard.Database import all
from Standard.AWS import all
from Standard.Google_Api import all
from Standard.Snowflake import all
import Standard.Visualization
main =

View File

@ -184,7 +184,7 @@ snowflake_specific_spec suite_builder default_connection db_name setup =
# The integer column is treated as NUMBER(38, 0) in Snowflake so the value type reflects that:
i.at "Value Type" . to_vector . should_equal [Value_Type.Char, Value_Type.Decimal 38 0, Value_Type.Boolean, Value_Type.Float]
group_builder.specify "should return Table information, also for aggregated results" <|
group_builder.specify "should return Table information, also for aggregated results" pending="TODO: fix https://github.com/enso-org/enso/issues/10611" <|
i = data.t.aggregate columns=[Aggregate_Column.Concatenate "strs", Aggregate_Column.Sum "ints", Aggregate_Column.Count_Distinct "bools"] . column_info
i.at "Column" . to_vector . should_equal ["Concatenate strs", "Sum ints", "Count Distinct bools"]
i.at "Items Count" . to_vector . should_equal [1, 1, 1]
@ -240,7 +240,7 @@ snowflake_specific_spec suite_builder default_connection db_name setup =
# We expect warnings about coercing Decimal types
w1 = Problems.expect_warning Inexact_Type_Coercion t1
w1.requested_type . should_equal (Value_Type.Decimal 24 -3)
w1.actual_type . should_equal (Value_Type.Decimal 38 0)
w1.actual_type . should_equal (Value_Type.Decimal Nothing Nothing)
t1.update_rows (Table.new [["d1", [1.2345678910]], ["d2", [12.3456]], ["d3", [1234567.8910]], ["f", [1.5]]]) update_action=Update_Action.Insert . should_succeed
@ -257,18 +257,6 @@ snowflake_specific_spec suite_builder default_connection db_name setup =
m1.at "d3" . to_vector . should_equal [1234568]
m1.at "f" . to_vector . should_equal [1.5]
suite_builder.group "[Snowflake] Dialect-specific codegen" group_builder->
data = Snowflake_Info_Data.setup default_connection
group_builder.teardown <|
data.teardown
group_builder.specify "should generate queries for the Distinct operation" <|
t = data.connection.query (SQL_Query.Table_Name data.tinfo)
code_template = 'SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."doubles" AS "doubles" FROM (SELECT DISTINCT ON ("{Tinfo}_inner"."strs") "{Tinfo}_inner"."strs" AS "strs", "{Tinfo}_inner"."ints" AS "ints", "{Tinfo}_inner"."bools" AS "bools", "{Tinfo}_inner"."doubles" AS "doubles" FROM (SELECT "{Tinfo}"."strs" AS "strs", "{Tinfo}"."ints" AS "ints", "{Tinfo}"."bools" AS "bools", "{Tinfo}"."doubles" AS "doubles" FROM "{Tinfo}" AS "{Tinfo}") AS "{Tinfo}_inner") AS "{Tinfo}"'
expected_code = code_template.replace "{Tinfo}" data.tinfo
t.distinct ["strs"] . to_sql . prepare . should_equal [expected_code, []]
suite_builder.group "[Snowflake] Table.aggregate should correctly infer result types" group_builder->
data = Snowflake_Aggregate_Data.setup default_connection
@ -284,8 +272,13 @@ snowflake_specific_spec suite_builder default_connection db_name setup =
group_builder.specify "Counts" <|
r = data.t.aggregate columns=[Aggregate_Column.Count, Aggregate_Column.Count_Empty "txt", Aggregate_Column.Count_Not_Empty "txt", Aggregate_Column.Count_Distinct "i1", Aggregate_Column.Count_Not_Nothing "i2", Aggregate_Column.Count_Nothing "i3"]
r.column_count . should_equal 6
r.columns.each column->
column.value_type . should_equal (Value_Type.Decimal 18 0)
r.at "Count" . value_type . should_equal (Value_Type.Decimal 18 0)
r.at "Count Empty txt" . value_type . should_equal (Value_Type.Decimal 13 0)
r.at "Count Not Empty txt" . value_type . should_equal (Value_Type.Decimal 13 0)
r.at "Count Distinct i1" . value_type . should_equal (Value_Type.Decimal 18 0)
r.at "Count Not Nothing i2" . value_type . should_equal (Value_Type.Decimal 18 0)
r.at "Count Nothing i3" . value_type . should_equal (Value_Type.Decimal 13 0)
group_builder.specify "Sum" <|
r = data.t.aggregate columns=[Aggregate_Column.Sum "i1", Aggregate_Column.Sum "i2", Aggregate_Column.Sum "i3", Aggregate_Column.Sum "i4", Aggregate_Column.Sum "r1", Aggregate_Column.Sum "r2"]
@ -308,24 +301,14 @@ snowflake_specific_spec suite_builder default_connection db_name setup =
suite_builder.group "[Snowflake] Warning/Error handling" group_builder->
group_builder.specify "query warnings should be propagated" <|
long_name = (Name_Generator.random_name "T") + ("a" * 100)
r = default_connection.get.execute_update 'CREATE TEMPORARY TABLE "'+long_name+'" ("A" VARCHAR)'
w1 = Problems.expect_only_warning SQL_Warning r
# The display text may itself be truncated, so we just check the first words.
w1.to_display_text . should_contain "identifier"
# And check the full message for words that could be truncated in short message.
w1.message . should_contain "truncated to"
table = default_connection.get.query (SQL_Query.Raw_SQL 'SELECT 1 AS "'+long_name+'"')
w2 = Problems.expect_only_warning SQL_Warning table
w2.message . should_contain "truncated"
effective_name = table.column_names . at 0
effective_name . should_not_equal long_name
long_name.should_contain effective_name
group_builder.specify "is capable of handling weird tables" <|
default_connection.get.execute_update 'CREATE TEMPORARY TABLE "empty-column-name" ("" VARCHAR)' . should_fail_with SQL_Error
default_connection.get.execute_update 'CREATE TEMPORARY TABLE "empty-column-name" ("" VARCHAR)' . should_succeed
t = default_connection.get.query "empty-column-name"
t.columns.length . should_equal 1
# The column is renamed to something valid upon read:
t.column_names . should_equal ["Column 1"]
# Should be readable:
t.read . at 0 . to_vector . should_equal []
Problems.assume_no_problems <|
default_connection.get.execute_update 'CREATE TEMPORARY TABLE "clashing-unicode-names" ("ś" VARCHAR, "s\u0301" INTEGER)'
@ -343,7 +326,9 @@ snowflake_specific_spec suite_builder default_connection db_name setup =
r3.catch.cause . should_be_a Duplicate_Output_Column_Names
r4 = default_connection.get.query 'SELECT 1 AS ""'
r4.should_fail_with SQL_Error
r4.should_fail_with Illegal_Argument
r4.catch.to_display_text . should_contain "The provided custom SQL query is invalid and may suffer data corruption"
r4.catch.to_display_text . should_contain "The name '' is invalid"
suite_builder.group "[Snowflake] Edge Cases" group_builder->
group_builder.specify "materialize should respect the overridden type" pending="TODO" <|
@ -525,31 +510,31 @@ snowflake_specific_spec suite_builder default_connection db_name setup =
suite_builder.group "[Snowflake] math functions" group_builder->
group_builder.specify "round, trunc, ceil, floor" <|
col = table_builder [["x", [0.1, 0.9, 3.1, 3.9, -0.1, -0.9, -3.1, -3.9]]] . at "x"
col . cast Value_Type.Integer . ceil . value_type . should_equal Value_Type.Float
col . cast Value_Type.Integer . ceil . value_type . should_equal (Value_Type.Decimal 38 0)
col . cast Value_Type.Float . round . value_type . should_equal Value_Type.Float
col . cast Value_Type.Integer . round . value_type . should_equal Value_Type.Float
col . cast Value_Type.Decimal . round . value_type . should_equal Value_Type.Decimal
col . cast Value_Type.Integer . round . value_type . should_equal (Value_Type.Decimal 38 0)
col . cast Value_Type.Decimal . round . value_type . should_equal (Value_Type.Decimal 38 0)
col . cast Value_Type.Float . round 1 . value_type . should_equal Value_Type.Float
col . cast Value_Type.Integer . round 1 . value_type . should_equal Value_Type.Decimal
col . cast Value_Type.Decimal . round 1 . value_type . should_equal Value_Type.Decimal
col . cast Value_Type.Integer . round 1 . value_type . should_equal (Value_Type.Decimal 38 0)
col . cast Value_Type.Decimal . round 1 . value_type . should_equal (Value_Type.Decimal 38 0)
col . cast Value_Type.Float . round use_bankers=True . value_type . should_equal Value_Type.Float
col . cast Value_Type.Integer . round use_bankers=True . value_type . should_equal Value_Type.Float
col . cast Value_Type.Decimal . round use_bankers=True . value_type . should_equal Value_Type.Decimal
col . cast Value_Type.Decimal . round use_bankers=True . value_type . should_equal Value_Type.Float
col . cast Value_Type.Float . ceil . value_type . should_equal Value_Type.Float
col . cast Value_Type.Integer . ceil . value_type . should_equal Value_Type.Float
col . cast Value_Type.Decimal . ceil . value_type . should_equal Value_Type.Decimal
col . cast Value_Type.Integer . ceil . value_type . should_equal (Value_Type.Decimal 38 0)
col . cast Value_Type.Decimal . ceil . value_type . should_equal (Value_Type.Decimal 38 0)
col . cast Value_Type.Float . floor . value_type . should_equal Value_Type.Float
col . cast Value_Type.Integer . floor . value_type . should_equal Value_Type.Float
col . cast Value_Type.Decimal . floor . value_type . should_equal Value_Type.Decimal
col . cast Value_Type.Integer . floor . value_type . should_equal (Value_Type.Decimal 38 0)
col . cast Value_Type.Decimal . floor . value_type . should_equal (Value_Type.Decimal 38 0)
col . cast Value_Type.Float . truncate . value_type . should_equal Value_Type.Float
col . cast Value_Type.Integer . truncate . value_type . should_equal Value_Type.Float
col . cast Value_Type.Decimal . truncate . value_type . should_equal Value_Type.Decimal
col . cast Value_Type.Integer . truncate . value_type . should_equal (Value_Type.Decimal 38 0)
col . cast Value_Type.Decimal . truncate . value_type . should_equal (Value_Type.Decimal 38 0)
do_op n op =
table = light_table_builder [["x", [n]]]
@ -578,18 +563,6 @@ snowflake_specific_spec suite_builder default_connection db_name setup =
do_op Number.positive_infinity op . should_equal Number.positive_infinity
do_op Number.negative_infinity op . should_equal Number.negative_infinity
group_builder.specify "round returns the correct type" <|
do_round 231.2 1 . should_be_a Float
do_round 231.2 0 . should_be_a Float
do_round 231.2 . should_be_a Float
do_round 231.2 -1 . should_be_a Float
group_builder.specify "round returns the correct type" <|
do_round 231 1 . should_be_a Float
do_round 231 0 . should_be_a Float
do_round 231 . should_be_a Float
do_round 231 -1 . should_be_a Float
type Lazy_Ref
Value ~get
@ -604,7 +577,6 @@ add_snowflake_specs suite_builder create_connection_fn db_name =
ix = name_counter.get
name_counter . put ix+1
name = Name_Generator.random_name "table_"+ix.to_text
in_mem_table = Table.new columns
in_mem_table.select_into_database_table (connection.if_nothing default_connection.get) name primary_key=Nothing temporary=True
light_table_builder columns =
@ -662,7 +634,7 @@ add_table_specs suite_builder =
cloud_setup.with_prepared_environment <|
with_secret "my_snowflake_username" base_details.credentials.username username_secret-> with_secret "my_snowflake_password" base_details.credentials.password password_secret->
secret_credentials = Credentials.Username_And_Password username_secret password_secret
details = Snowflake_Details.Snowflake base_details.account_name secret_credentials base_details.database base_details.schema base_details.warehouse
details = Snowflake_Details.Snowflake base_details.account secret_credentials base_details.database base_details.schema base_details.warehouse
connection = Database.connect details
connection.should_succeed
Panic.with_finalizer connection.close <|

View File

@ -1061,6 +1061,26 @@ add_specs suite_builder setup =
m1.columns.first.name . should_equal "Count Distinct A B"
m1.columns.first.to_vector . should_equal [3]
group_builder.specify "should work correctly with Boolean columns" pending=(if prefix.contains "Snowflake" then "TODO: fix https://github.com/enso-org/enso/issues/10611") <|
table = table_builder [["A", [True, True, True]], ["B", [False, False, False]], ["C", [True, False, True]], ["D", [Nothing, False, True]]]
t_with_nulls = table.aggregate columns=[..Count_Distinct "A", ..Count_Distinct "B", ..Count_Distinct "C", ..Count_Distinct "D"]
m1 = materialize t_with_nulls
m1.column_count . should_equal 4
m1.at "Count Distinct A" . to_vector . should_equal [1]
m1.at "Count Distinct B" . to_vector . should_equal [1]
m1.at "Count Distinct C" . to_vector . should_equal [2]
m1.at "Count Distinct D" . to_vector . should_equal [3]
t_without_nulls = table.aggregate columns=[..Count_Distinct "A" ignore_nothing=True, ..Count_Distinct "B" ignore_nothing=True, ..Count_Distinct "C" ignore_nothing=True, ..Count_Distinct "D" ignore_nothing=True]
m2 = materialize t_without_nulls
m2.column_count . should_equal 4
m2.at "Count Distinct A" . to_vector . should_equal [1]
m2.at "Count Distinct B" . to_vector . should_equal [1]
m2.at "Count Distinct C" . to_vector . should_equal [2]
# The NULL is ignored, and not counted towards the total
m2.at "Count Distinct D" . to_vector . should_equal [2]
suite_builder.group prefix+"Table.aggregate Standard_Deviation" pending=(resolve_pending test_selection.std_dev) group_builder->
group_builder.specify "should correctly handle single elements" <|
r1 = table_builder [["X", [1]]] . aggregate columns=[Standard_Deviation "X" (population=False), Standard_Deviation "X" (population=True)]

View File

@ -18,7 +18,7 @@ add_specs suite_builder setup =
table_builder = setup.light_table_builder
pending_datetime = if setup.test_selection.date_time.not then "Date/Time operations are not supported by this backend."
suite_builder.group prefix+"(Derived_Columns_Spec) Table.set with Simple_Expression" group_builder->
group_builder.specify "arithmetics" <|
group_builder.specify "arithmetics" pending=(if prefix.contains "Snowflake" then "TODO: re-enable these once https://github.com/enso-org/enso/pull/10583 is merged") <|
t = table_builder [["A", [1, 2]], ["B", [10, 40]]]
t.set (Simple_Expression.Simple_Expr (Column_Ref.Name "A") Simple_Calculation.Copy) "C" . at "C" . to_vector . should_equal [1, 2]
t.set (..Simple_Expr (..Name "A") ..Copy) "C" . at "C" . to_vector . should_equal [1, 2]

View File

@ -14,167 +14,170 @@ import project.Util
main filter=Nothing = run_default_backend add_specs filter
type Lazy_Ref
Value ~get
add_specs suite_builder setup =
prefix = setup.prefix
suite_builder.group prefix+"Table.replace" group_builder->
table_builder = build_sorted_table setup
table_builder_typed columns value_type =
cast_columns = columns.map c->
Column.from_vector (c.at 0) (c.at 1) . cast value_type
build_sorted_table setup cast_columns
table1 = Lazy_Ref.Value <|
table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]]
table2 = Lazy_Ref.Value <|
table_builder [['x', [2, 1, 4, 3]], ['z', [20, 10, 40, 30]]]
table3 = Lazy_Ref.Value <|
table_builder [['x', [1, 2, 3, 4]], ['y', ['a', 'b', 'c', 'd']]]
empty_table = Lazy_Ref.Value <|
in_memory = Table.new [['x', [1]], ['z', [2]]]
empty = in_memory.take 0
table_builder empty.columns
group_builder.specify "should be able to replace values via a lookup table, using from/to column defaults" <|
table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]]
lookup_table = table_builder [['x', [2, 1, 4, 3]], ['z', [20, 10, 40, 30]]]
expected = table_builder [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']]]
table = table1.get
lookup_table = table2.get
expected = Table.new [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']]]
result = table.replace lookup_table 'x' . sort ["x", "y"]
result . should_equal expected
group_builder.specify "should be able to replace values via a lookup table, specifying from/to columns" <|
table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]]
table = table1.get
lookup_table = table_builder [['d', [4, 5, 6, 7]], ['x', [2, 1, 4, 3]], ['d2', [5, 6, 7, 8]], ['z', [20, 10, 40, 30]]]
expected = table_builder [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']]]
expected = Table.new [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']]]
result = table.replace lookup_table 'x' 'x' 'z' . sort ["x", "y"]
result . should_equal expected
group_builder.specify "should be able to replace values via a lookup table provided as a Map" <|
table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]]
table = table1.get
lookup_table = Dictionary.from_vector [[2, 20], [1, 10], [4, 40], [3, 30]]
expected = table_builder [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']]]
expected = Table.new [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']]]
result = table.replace lookup_table 'x' . sort ["x", "y"]
result . should_equal expected
group_builder.specify "should be able to replace multiple columns" <|
table = table_builder [['x', [1, 2, 3, 4, 2]], ['x2', [2, 1, 2, 1, 4]], ['x3', [3, 4, 1, 3, 4]], ['y', ['a', 'b', 'c', 'd', 'e']]]
lookup_table = table_builder [['d', [4, 5, 6, 7]], ['x', [2, 1, 4, 3]], ['d2', [5, 6, 7, 8]], ['z', [20, 10, 40, 30]]]
expected = table_builder [['x', [10, 20, 20, 30, 40]], ['x2', [20, 10, 40, 20, 10]], ['x3', [30, 40, 40, 10, 30]], ['y', ['a', 'b', 'e', 'c', 'd']]]
expected = Table.new [['x', [10, 20, 20, 30, 40]], ['x2', [20, 10, 40, 20, 10]], ['x3', [30, 40, 40, 10, 30]], ['y', ['a', 'b', 'e', 'c', 'd']]]
result = table.replace lookup_table ['x', 'x2', 'x3'] 'x' 'z' . sort ["x", "y"]
result . should_equal expected
group_builder.specify "should be able to replace multiple columns selected by regex" <|
table = table_builder [['x', [1, 2, 3, 4, 2]], ['x2', [2, 1, 2, 1, 4]], ['x3', [3, 4, 1, 3, 4]], ['y', ['a', 'b', 'c', 'd', 'e']]]
lookup_table = table_builder [['d', [4, 5, 6, 7]], ['x', [2, 1, 4, 3]], ['d2', [5, 6, 7, 8]], ['z', [20, 10, 40, 30]]]
expected = table_builder [['x', [10, 20, 20, 30, 40]], ['x2', [20, 10, 40, 20, 10]], ['x3', [30, 40, 40, 10, 30]], ['y', ['a', 'b', 'e', 'c', 'd']]]
expected = Table.new [['x', [10, 20, 20, 30, 40]], ['x2', [20, 10, 40, 20, 10]], ['x3', [30, 40, 40, 10, 30]], ['y', ['a', 'b', 'e', 'c', 'd']]]
result = table.replace lookup_table [(regex 'x.*')] 'x' 'z' . sort ["x", "y"]
result . should_equal expected
group_builder.specify "should be able to replace multiple columns selected by type" <|
table = table_builder [['x', [1, 2, 3, 4, 2]], ['x2', [2, 1, 2, 1, 4]], ['x3', [3, 4, 1, 3, 4]], ['y', ['a', 'b', 'c', 'd', 'e']]]
lookup_table = table_builder [['d', [4, 5, 6, 7]], ['x', [2, 1, 4, 3]], ['d2', [5, 6, 7, 8]], ['z', [20, 10, 40, 30]]]
expected = table_builder [['x', [10, 20, 20, 30, 40]], ['x2', [20, 10, 40, 20, 10]], ['x3', [30, 40, 40, 10, 30]], ['y', ['a', 'b', 'e', 'c', 'd']]]
expected = Table.new [['x', [10, 20, 20, 30, 40]], ['x2', [20, 10, 40, 20, 10]], ['x3', [30, 40, 40, 10, 30]], ['y', ['a', 'b', 'e', 'c', 'd']]]
result = table.replace lookup_table [..By_Type ..Integer] 'x' 'z' . sort ["x", "y"]
result . should_equal expected
group_builder.specify "should fail with Missing_Input_Columns if the specified columns do not exist" <|
table = table_builder [['x', [1, 2, 3, 4]], ['y', ['a', 'b', 'c', 'd']]]
lookup_table = table_builder [['x', [2, 1, 4, 3]], ['z', [20, 10, 40, 30]]]
table = table3.get
lookup_table = table2.get
table.replace lookup_table 'q' 'x' 'z' . should_fail_with Missing_Input_Columns
table.replace lookup_table 'x' 'q' 'z' . should_fail_with Missing_Input_Columns
table.replace lookup_table 'x' 'x' 'q' . should_fail_with Missing_Input_Columns
group_builder.specify "can allow unmatched rows" <|
table = table_builder [['x', [1, 2, 3, 4]], ['y', ['a', 'b', 'c', 'd']]]
table = table3.get
lookup_table = table_builder [['x', [4, 3, 1]], ['z', [40, 30, 10]]]
expected = table_builder [['x', [2, 10, 30, 40]], ['y', ['b', 'a', 'c', 'd']]]
expected = Table.new [['x', [2, 10, 30, 40]], ['y', ['b', 'a', 'c', 'd']]]
result = table.replace lookup_table 'x' . sort "x"
result . should_equal expected
group_builder.specify "fails on unmatched rows" <|
table = table_builder [['x', [1, 2, 3, 4]], ['y', ['a', 'b', 'c', 'd']]]
table = table3.get
lookup_table = table_builder [['x', [4, 3, 1]], ['z', [40, 30, 10]]]
table.replace lookup_table 'x' allow_unmatched_rows=False . should_fail_with Unmatched_Rows_In_Lookup
group_builder.specify "fails on non-unique keys" <|
table = table_builder [['x', [1, 2, 3, 4]], ['y', ['a', 'b', 'c', 'd']]]
table = table3.get
lookup_table = table_builder [['x', [2, 1, 4, 1, 3]], ['z', [20, 10, 40, 11, 30]]]
table.replace lookup_table 'x' . should_fail_with Non_Unique_Key
group_builder.specify "should avoid name clashes in the (internally) generated column name" <|
table = table_builder [['duplicate_key', [1, 2, 3, 4]], ['y', ['a', 'b', 'c', 'd']]]
lookup_table = table_builder [['x', [2, 1, 4, 3]], ['z', [20, 10, 40, 30]]]
expected = table_builder [['duplicate_key', [10, 20, 30, 40]], ['y', ['a', 'b', 'c', 'd']]]
result = table.replace lookup_table 'duplicate_key'
table = table3.get.rename_columns [['x', 'duplicate_key']]
lookup_table = table2.get
expected = Table.new [['duplicate_key', [10, 20, 30, 40]], ['y', ['a', 'b', 'c', 'd']]]
result = table.replace lookup_table 'duplicate_key' . sort 'y'
result . should_equal expected
group_builder.specify "(edge-case) should allow lookup with itself" <|
table = table_builder [['x', [2, 1, 4, 3]], ['y', [20, 10, 40, 30]]]
expected = table_builder [['x', [10, 20, 30, 40]], ['y', [10, 20, 30, 40]]]
expected = Table.new [['x', [10, 20, 30, 40]], ['y', [10, 20, 30, 40]]]
result = table.replace table 'x' . sort "x"
result . should_equal expected
group_builder.specify "should not merge columns other than the one specified in the `column` param" <|
table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']], ['q', [4, 5, 6, 7, 8]]]
lookup_table = table_builder [['x', [2, 1, 4, 3]], ['z', [20, 10, 40, 30]], ['q', [40, 50, 60, 70]]]
expected = table_builder [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']], ['q', [4, 5, 8, 6, 7]]]
expected = Table.new [['x', [10, 20, 20, 30, 40]], ['y', ['a', 'b', 'e', 'c', 'd']], ['q', [4, 5, 8, 6, 7]]]
result = table.replace lookup_table 'x' . sort ["x", "y"]
result . should_equal expected
group_builder.specify "should fail on null key values in lookup table" <|
table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]]
table = table1.get
lookup_table = table_builder [['x', [2, 1, Nothing, 3]], ['z', [20, 10, 40, 30]]]
table.replace lookup_table 'x' . should_fail_with Null_Values_In_Key_Columns
group_builder.specify "should fail on an empty lookup table with no columns" <|
table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]]
lookup_table = table_builder []
table.replace lookup_table 'x' . should_fail_with Illegal_Argument
group_builder.specify "should accept an empty (but well-formed) lookup table if allow_unmatched_rows=True, but expect a warning" <|
table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]]
lookup_table = table_builder_typed [['x', []], ['z', []]] Value_Type.Integer
table = table1.get
lookup_table = empty_table.get
t = table.replace lookup_table 'x' . sort ['y']
t . should_equal table
Problems.expect_warning (Empty_Error.Error "lookup_table") t
group_builder.specify "should throw an error on an empty (but well-formed) lookup table and non-empty base table if allow_unmatched_rows=False" <|
table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]] . sort ['x']
lookup_table = table_builder_typed [['x', []], ['z', []]] Value_Type.Integer
table = table1.get . sort ['x']
lookup_table = empty_table.get
t = table.replace lookup_table 'x' allow_unmatched_rows=False . sort ['y']
t . should_fail_with Unmatched_Rows_In_Lookup
group_builder.specify "should accept an empty (but well-formed) lookup table if the base table is also empty, but expect a warning" <|
table = table_builder_typed [['x', []], ['z', []]] Value_Type.Integer
lookup_table = table_builder_typed [['x', []], ['z', []]] Value_Type.Integer
table = empty_table.get
lookup_table = empty_table.get
t = table.replace lookup_table 'x'
t . should_equal table
Problems.expect_warning (Empty_Error.Error "lookup_table") t
group_builder.specify "should accept an empty lookup map, if allow_unmatched_rows=True, but expect a warning" <|
table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]]
table = table1.get
t = table.replace Dictionary.empty 'x'
t . should_equal table
Problems.expect_warning (Empty_Error.Error "lookup_table") t
group_builder.specify "should throw an error on an empty lookup map and non-empty base table if allow_unmatched_rows=False" <|
table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]] . sort ['x']
table = table1.get . sort ['x']
t = table.replace Dictionary.empty 'x' allow_unmatched_rows=False
t . should_fail_with Unmatched_Rows_In_Lookup
t.catch.example_key_values . should_equal [1]
group_builder.specify "should accept an empty lookup map if the base table is also empty, but expect a warning" <|
table = table_builder_typed [['x', []], ['z', []]] Value_Type.Integer
table = empty_table.get
t = table.replace Dictionary.empty 'x'
t . should_equal table
Problems.expect_warning (Empty_Error.Error "lookup_table") t
group_builder.specify "should not allow from/to_coumn to specified if the argument is a Map" <|
table = table_builder [['x', [1, 2, 3, 4, 2]], ['y', ['a', 'b', 'c', 'd', 'e']]]
table = table1.get
lookup_table = Dictionary.from_vector [[2, 20], [1, 10], [4, 40], [3, 30]]
table.replace lookup_table 'x' from_column=8 . should_fail_with Illegal_Argument
table.replace lookup_table 'x' to_column=9 . should_fail_with Illegal_Argument
table.replace lookup_table 'x' from_column=8 to_column=9 . should_fail_with Illegal_Argument
if setup.test_selection.date_time then group_builder.specify "should correctly infer types of columns (Date/Time)" <|
my_table = table_builder [["X", [1, 2, 3, 2]]]
my_table = setup.table_builder [["X", [1, 2, 3, 2]], ["rowid", [1, 2, 3, 4]]]
t1 = my_table.replace (Dictionary.from_vector [[1, Date.new 2021], [2, Date.new 2022], [3, Date.new 2023]]) "X" allow_unmatched_rows=False
t1 = my_table.replace (Dictionary.from_vector [[1, Date.new 2021], [2, Date.new 2022], [3, Date.new 2023]]) "X" allow_unmatched_rows=False . sort "rowid"
t1.at "X" . value_type . should_equal Value_Type.Date
t1.at "X" . to_vector . should_equal [Date.new 2021, Date.new 2022, Date.new 2023, Date.new 2022]
t2 = my_table.replace (Dictionary.from_vector [[1, Time_Of_Day.new 1 2 3], [2, Time_Of_Day.new 4 5 6], [3, Time_Of_Day.new 7 8 9]]) "X" allow_unmatched_rows=False
t2 = my_table.replace (Dictionary.from_vector [[1, Time_Of_Day.new 1 2 3], [2, Time_Of_Day.new 4 5 6], [3, Time_Of_Day.new 7 8 9]]) "X" allow_unmatched_rows=False . sort "rowid"
t2.at "X" . value_type . should_equal Value_Type.Time
t2.at "X" . to_vector . should_equal [Time_Of_Day.new 1 2 3, Time_Of_Day.new 4 5 6, Time_Of_Day.new 7 8 9, Time_Of_Day.new 4 5 6]
t3 = my_table.replace (Dictionary.from_vector [[1, Date_Time.new 2021 1 1 1 1 1], [2, Date_Time.new 2022 2 2 2 2 2], [3, Date_Time.new 2023 3 3 3 3 3]]) "X" allow_unmatched_rows=False
t3 = my_table.replace (Dictionary.from_vector [[1, Date_Time.new 2021 1 1 1 1 1], [2, Date_Time.new 2022 2 2 2 2 2], [3, Date_Time.new 2023 3 3 3 3 3]]) "X" allow_unmatched_rows=False . sort "rowid"
t3.at "X" . value_type . should_be_a (Value_Type.Date_Time ...)

View File

@ -12,7 +12,7 @@ import Standard.Test.Extensions
from Standard.Database.Errors import Unsupported_Database_Operation
from Standard.Table import Column, Table, Value_Type, Auto, Bits
from Standard.Table.Errors import Invalid_Value_Type, Invalid_Column_Names
from project.Common_Table_Operations.Util import run_default_backend
from project.Common_Table_Operations.Util import run_default_backend, build_sorted_table
from Standard.Test import all
@ -24,13 +24,12 @@ main filter=Nothing = run_default_backend add_specs filter
add_specs suite_builder setup =
prefix = setup.prefix
materialize = setup.materialize
table_builder = setup.table_builder
suite_builder.group prefix+"Table Text Cleanse" group_builder->
flight = ["Flight", [" BA0123", "BA0123 ", " SG0456 ", "BA 0123", " S G 0 4 5 6 "]]
passenger = ["Passenger", [" Albert Einstein", "Marie Curie ", " Isaac Newton ", "Stephen Hawking", " A d a Lovelace "]]
ticket_price = ["Ticket Price", [101, 576, 73, 112, 74]]
table = Lazy_Ref.Value <|
table_builder [flight, passenger, ticket_price]
build_sorted_table setup [flight, passenger, ticket_price]
group_builder.specify "should remove leading whitespace" <|
clean_flight = ["Flight", ["BA0123", "BA0123 ", "SG0456 ", "BA 0123", "S G 0 4 5 6 "]]
clean_passenger = ["Passenger", ["Albert Einstein", "Marie Curie ", "Isaac Newton ", "Stephen Hawking", "A d a Lovelace "]]

View File

@ -1,5 +1,5 @@
from Standard.Base import all
from Standard.Table import Table, Value_Type
from Standard.Table import Column, Table, Value_Type
from Standard.Test import all
@ -72,7 +72,10 @@ Error.should_equal_tz_agnostic self other =
build_sorted_table setup table_structure =
# Workaround for https://github.com/enso-org/enso/issues/10321
if setup.prefix.contains "Snowflake" . not then setup.table_builder table_structure else
row_count = table_structure.first.second.length
row_count = case table_structure.first of
def : Vector -> def.second.length
col : Column -> col.length
if row_count == 0 then setup.table_builder table_structure else
new_structure = table_structure+[["row_id", (0.up_to row_count) . to_vector]]
setup.table_builder new_structure . sort "row_id" . remove_columns ["row_id"]

View File

@ -65,7 +65,7 @@ database_table_builder name_prefix args primary_key=[] connection =
database, so features relying on persistence cannot really be tested.
add_specs suite_builder setup make_new_connection persistent_connector=True =
prefix = setup.prefix
snowflake_pending = if prefix.contains "Snowflake" then "Table Upload / Update is not yet fully implemented for Snowflake backend. TODO: https://github.com/enso-org/enso/issues/10609"
data = Data.setup make_new_connection
suite_builder.group prefix+"(Upload_Spec) Creating an empty table" group_builder->
group_builder.specify "should allow to specify the column names and types" <|
@ -336,7 +336,7 @@ add_specs suite_builder setup make_new_connection persistent_connector=True =
tmp_connection.close
data.connection.query (SQL_Query.Table_Name name) . at "X" . to_vector . should_equal_ignoring_order [1, 2, 3]
group_builder.specify "should not create any table if upload fails" <|
group_builder.specify "should not create any table if upload fails" pending=snowflake_pending <|
normal_column = Column.from_vector "Y" ((100+0).up_to (100+1000)).to_vector
exploding_column = make_mock_column "X" (0.up_to 1000).to_vector 512
exploding_table = Table.new [normal_column, exploding_column]
@ -371,7 +371,7 @@ add_specs suite_builder setup make_new_connection persistent_connector=True =
r1 = data.in_memory_table.select_into_database_table data.connection (Name_Generator.random_name "primary-key-4") primary_key=["X", "nonexistent"]
r1.should_fail_with Missing_Input_Columns
group_builder.specify "should fail if the primary key is not unique" <|
group_builder.specify "should fail if the primary key is not unique" pending=snowflake_pending <|
t1 = Table.new [["X", [1, 2, 1]], ["Y", ['b', 'b', 'a']]]
run_with_and_without_output <|
@ -525,7 +525,7 @@ add_specs suite_builder setup make_new_connection persistent_connector=True =
r1 = db_table.select_into_database_table data.connection (Name_Generator.random_name "copied-table") temporary=True primary_key=["nonexistent"]
r1.should_fail_with Missing_Input_Columns
group_builder.specify "should fail when the primary key is not unique" <|
group_builder.specify "should fail when the primary key is not unique" pending=snowflake_pending <|
t = Table.new [["X", [1, 2, 1]], ["Y", ['b', 'b', 'a']]]
db_table = t.select_into_database_table data.connection (Name_Generator.random_name "source-table") temporary=True primary_key=Nothing
Problems.assume_no_problems db_table
@ -748,6 +748,7 @@ add_specs suite_builder setup make_new_connection persistent_connector=True =
test_table_append group_builder (data : Data) source_table_builder target_table_builder =
snowflake_pending = if group_builder.name.contains "Snowflake" then "Table Upload / Update is not yet fully implemented for Snowflake backend. TODO: https://github.com/enso-org/enso/issues/10609"
group_builder.specify "should be able to append new rows to a table" <|
dest = target_table_builder [["X", [1, 2, 3]], ["Y", ['a', 'b', 'c']]] primary_key=["X"] connection=data.connection
src = source_table_builder [["X", [4, 5, 6]], ["Y", ['d', 'e', 'f']]] connection=data.connection
@ -929,7 +930,7 @@ test_table_append group_builder (data : Data) source_table_builder target_table_
r5 = dest.update_rows src update_action=Update_Action.Align_Records key_columns=[]
r5.should_fail_with Illegal_Argument
group_builder.specify "should fail if the key is not unique in the input table" <|
group_builder.specify "should fail if the key is not unique in the input table" pending=snowflake_pending <|
d1 = target_table_builder [["X", [0, 10, 100]]] primary_key=["X"] connection=data.connection
d2 = target_table_builder [["X", [0, 10, 100]]] connection=data.connection
src = source_table_builder [["X", [1, 1, 3]]] connection=data.connection
@ -1095,7 +1096,7 @@ test_table_append group_builder (data : Data) source_table_builder target_table_
r2 = in_memory_table.update_rows t
r2.should_fail_with Illegal_Argument
group_builder.specify "should warn if type widening occurs" <|
group_builder.specify "should warn if type widening occurs" pending=snowflake_pending <|
dest = target_table_builder [["X", [3.25, 4.25, 10.0]]] connection=data.connection
src = source_table_builder [["X", [1, 2, 0]]] connection=data.connection
@ -1145,7 +1146,7 @@ test_table_append group_builder (data : Data) source_table_builder target_table_
result = dest.update_rows src update_action=Update_Action.Insert key_columns=[]
result.should_fail_with Column_Type_Mismatch
group_builder.specify "should not leave behind any garbage temporary tables if the upload fails" <|
group_builder.specify "should not leave behind any garbage temporary tables if the upload fails" pending=snowflake_pending <|
dest_name = Name_Generator.random_name "dest-table"
# We will make the upload fail by violating the NOT NULL constraint.
dest = data.connection.create_table dest_name [Column_Description.Value "X" Value_Type.Integer [Column_Constraint.Not_Null]] temporary=True primary_key=[] . should_succeed
@ -1354,14 +1355,14 @@ tests group_builder (data : Data) make_new_connection source_table_builder (suff
Problems.expect_only_warning Dry_Run_Operation r1
# The target table is unaffected.
dest.at "X" . to_vector . should_equal v
dest.at "X" . to_vector . should_equal_ignoring_order v
src2 = source_table_builder [["X", [4]]] connection=data.connection
r2 = dest.delete_rows src2 key_columns=["X"] allow_duplicate_matches=True
# 3 rows would be deleted
r2.should_equal 3
Problems.expect_only_warning Dry_Run_Operation r2
dest.at "X" . to_vector . should_equal v
dest.at "X" . to_vector . should_equal_ignoring_order v
if persistent_connector then
group_builder.specify "will not overwrite an existing table with a dry-run table if the name is clashing (select_into_database_table)"+suffix <|

View File

@ -24,13 +24,13 @@ DB_Table.should_equal : DB_Table -> Integer -> Any
DB_Table.should_equal self expected frames_to_skip=0 =
t0 = self.read
t1 = expected.read
t0 . should_equal t1 frames_to_skip
t0 . should_equal t1 frames_to_skip+1
DB_Column.should_equal : DB_Column -> Integer -> Any
DB_Column.should_equal self expected frames_to_skip=0 =
t0 = self.read
t1 = expected.read
t0 . should_equal t1 frames_to_skip
t0 . should_equal t1 frames_to_skip+1
type Test_Failure_Error
## PRIVATE