Implement Table.lookup_and_replace in Database (#8146)

- Closes #7981
- Adds a `RUNTIME_ERROR` operation into the DB dialect, that may be used to 'crash' a query if a condition is met - used to validate if `lookup_and_replace` invariants are still satisfied when the query is materialized.
- Removes old `Table_Helpers.is_table` and `same_backend` checks, in favour of the new way of checking this that relies on `Table.from` conversions, and is much simpler to use and also more robust.
This commit is contained in:
Radosław Waśko 2023-10-31 16:19:55 +01:00 committed by GitHub
parent 660f4b35ce
commit 79011bd550
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 789 additions and 158 deletions

View File

@ -11,3 +11,7 @@ type Redshift_Error_Mapper
# Currently not implemented, skipping the error recognition.
_ = error
False
## PRIVATE
transform_custom_errors : SQL_Error -> Any
transform_custom_errors error = error

View File

@ -29,7 +29,6 @@ import Standard.Table.Internal.Add_Row_Number
import Standard.Table.Internal.Aggregate_Column_Helper
import Standard.Table.Internal.Column_Naming_Helper.Column_Naming_Helper
import Standard.Table.Internal.Constant_Column.Constant_Column
import Standard.Table.Internal.Lookup_Helpers
import Standard.Table.Internal.Problem_Builder.Problem_Builder
import Standard.Table.Internal.Table_Helpers
import Standard.Table.Internal.Table_Helpers.Table_Column_Helper
@ -50,6 +49,7 @@ import project.Data.Take_Drop_Helpers
import project.Internal.Aggregate_Helper
import project.Internal.Base_Generator
import project.Internal.Common.Database_Join_Helper
import project.Internal.Common.Lookup_Query_Helper
import project.Internal.Helpers
import project.Internal.IR.Context.Context
import project.Internal.IR.From_Spec.From_Spec
@ -61,7 +61,7 @@ import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
import project.Internal.SQL_Type_Reference.SQL_Type_Reference
from project.Data.Take_Drop_Helpers import Take_Drop
from project.Errors import Integrity_Error, Table_Not_Found, Unsupported_Database_Operation
from project.Errors import Integrity_Error, Table_Not_Found, Unsupported_Database_Operation, SQL_Error
polyglot java import java.sql.JDBCType
polyglot java import java.util.UUID
@ -1212,16 +1212,12 @@ type Table
## PRIVATE
Implementation of both `join` and `cross_join`.
join_or_cross_join : Table -> Join_Kind | Join_Kind_Cross -> Vector (Join_Condition | Text) | Text -> Text -> Problem_Behavior -> Table
join_or_cross_join self right join_kind on right_prefix on_problems =
can_proceed = if Table_Helpers.is_table right . not then Error.throw (Type_Error.Error Table right "right") else
same_backend = case right of
_ : Table -> True
_ -> False
join_or_cross_join self right:Table join_kind on right_prefix on_problems =
can_proceed = Helpers.ensure_same_connection "table" [self, right] <|
join_conditions_ok = join_kind != Join_Kind_Cross.Cross || on == []
if same_backend . not then Error.throw (Illegal_Argument.Error "Currently cross-backend joins are not supported. You need to upload the in-memory table before joining it with a database one, or materialize this table.") else
if join_conditions_ok . not then Error.throw (Illegal_Argument.Error "Cross join does not allow join conditions") else
True
if can_proceed then
if join_conditions_ok . not then Error.throw (Illegal_Argument.Error "Cross join does not allow join conditions") else
True
can_proceed.if_not_error <|
left = self
table_name_deduplicator = self.connection.base_connection.table_naming_helper.create_unique_name_strategy
table_name_deduplicator.mark_used [left.name, right.name]
@ -1314,14 +1310,13 @@ type Table
The ordering of rows in the resulting table is not specified.
cross_join : Table -> Integer | Nothing -> Text -> Problem_Behavior -> Table
cross_join self right right_row_limit=100 right_prefix="Right " on_problems=Report_Warning =
if check_db_table "right" right then
limit_problems = case right_row_limit.is_nothing.not && (right.row_count > right_row_limit) of
True ->
[Cross_Join_Row_Limit_Exceeded.Error right_row_limit right.row_count]
False -> []
on_problems.attach_problems_before limit_problems <|
self.join_or_cross_join right join_kind=Join_Kind_Cross.Cross on=[] right_prefix on_problems
cross_join self right:Table right_row_limit=100 right_prefix="Right " on_problems=Report_Warning =
limit_problems = case right_row_limit.is_nothing.not && (right.row_count > right_row_limit) of
True ->
[Cross_Join_Row_Limit_Exceeded.Error right_row_limit right.row_count]
False -> []
on_problems.attach_problems_before limit_problems <|
self.join_or_cross_join right join_kind=Join_Kind_Cross.Cross on=[] right_prefix on_problems
## Replaces values in this table by values from a lookup table.
New values are looked up in the lookup table based on the `key_columns`.
@ -1358,8 +1353,8 @@ type Table
specified in `key_columns`, a `Missing_Input_Columns` error is raised.
- If an empty vector is provided for `key_columns`, a
`No_Input_Columns_Selected` error is raised.
- If the lookup table contains multiple rows with the same values in
the `key_columns`, an `Non_Unique_Key` error is raised.
- If a single row is matched by multiple entries in the lookup table,
a `Non_Unique_Key` error is raised.
- If a column that is being updated from the lookup table has a type
that is not compatible with the type of the corresponding column in
this table, a `No_Common_Type` error is raised.
@ -1377,8 +1372,8 @@ type Table
@key_columns Widget_Helpers.make_column_name_vector_selector
lookup_and_replace : Table -> (Vector (Integer | Text | Regex) | Text | Integer | Regex) -> Boolean -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup
lookup_and_replace self lookup_table:Table key_columns:(Vector (Integer | Text | Regex) | Text | Integer | Regex) add_new_columns:Boolean=True allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning =
_ = [lookup_table, key_columns, add_new_columns, allow_unmatched_rows, on_problems]
Error.throw (Unsupported_Database_Operation.Error "Table.lookup_and_replace is not implemented yet for the Database backends.")
Helpers.ensure_same_connection "table" [self, lookup_table] <|
Lookup_Query_Helper.build_lookup_query self lookup_table key_columns add_new_columns allow_unmatched_rows on_problems
## ALIAS join by row position
GROUP Standard.Base.Calculations
@ -1522,11 +1517,11 @@ type Table
retyped to the `Mixed` type to indicate that intention. Note that the
`Mixed` type may not be supported by most Database backends.
union : (Table | Vector Table) -> Match_Columns -> Boolean | Report_Unmatched -> Boolean -> Problem_Behavior -> Table
union self tables match_columns=Match_Columns.By_Name keep_unmatched_columns=Report_Unmatched allow_type_widening=True on_problems=Report_Warning =
union self tables:Vector|Table match_columns=Match_Columns.By_Name keep_unmatched_columns=Report_Unmatched allow_type_widening=True on_problems=Report_Warning =
all_tables = case tables of
v : Vector -> [self] + v
v : Vector -> [self] + (v.map t-> Table.from t)
single_table -> [self, single_table]
all_tables.all (check_db_table "tables") . if_not_error <|
Helpers.ensure_same_connection "table" all_tables <|
problem_builder = Problem_Builder.new
matched_column_sets = Match_Columns_Helpers.match_columns all_tables match_columns keep_unmatched_columns problem_builder
dialect = self.connection.dialect
@ -2154,7 +2149,8 @@ type Table
False ->
sql = preprocessed.to_sql
column_type_suggestions = preprocessed.internal_columns.map .sql_type_reference
materialized_table = self.connection.read_statement sql column_type_suggestions
materialized_table = self.connection.read_statement sql column_type_suggestions . catch SQL_Error sql_error->
Error.throw (self.connection.dialect.get_error_mapper.transform_custom_errors sql_error)
expected_types = self.columns.map .value_type
actual_types = materialized_table.columns.map .value_type
@ -2527,19 +2523,6 @@ type Wrapped_Error
## PRIVATE
Value value
## PRIVATE
Checks if the argument is a proper table and comes from the current backend.
It returns True or throws a dataflow error explaining the issue.
TODO [RW]: this should check that the tables are on the same connection
check_db_table arg_name table =
if Table_Helpers.is_table table . not then Error.throw (Type_Error.Error Table table arg_name) else
same_backend = table.is_a Table
case same_backend of
False ->
Error.throw (Illegal_Argument.Error "Currently cross-backend operations are not supported. Materialize the table using `.read` before mixing it with an in-memory Table.")
True -> True
## PRIVATE
By default, join on the first column, unless it's a cross join, in which
case there are no join conditions.
@ -2553,5 +2536,10 @@ Materialized_Table.from (that:Table) =
_ = [that]
Error.throw (Illegal_Argument.Error "Currently cross-backend operations are not supported. Materialize the table using `.read` before mixing it with an in-memory Table.")
## PRIVATE
Table.from (that:Materialized_Table) =
_ = [that]
Error.throw (Illegal_Argument.Error "Currently cross-backend operations are not supported. Either materialize the other table using `.read` or upload the table into the database using `.select_into_database_table`.")
## PRIVATE
Table_Ref.from (that:Table) = Table_Ref.Value that

View File

@ -203,3 +203,27 @@ type Unsupported_Database_Encoding
Pretty print the unsupported database encoding warning.
to_display_text : Text
to_display_text self = self.message
## Indicates that the database has been modified between the time the query was
prepared and the time it was executed, breaking an expected invariant and
potentially causing data corruption.
The query needs to be rebuilt again.
type Invariant_Violation
## PRIVATE
Indicates that the database has been modified between the time the query
was prepared and the time it was executed, breaking an expected invariant
and potentially causing data corruption.
Arguments:
- message: A message describing the broken invariant, if available.
It may be set to `Nothing` if the backend cannot decode the invariant
message from the SQL error.
- original_cause: The original SQL error that this error has been
translated from.
Error (message:Text|Nothing) (original_cause:SQL_Error)
## PRIVATE
to_display_text : Text
to_display_text self =
"The database has been modified between the time the query was prepared and executed, leading to possibly incorrect results. Please re-run the workflow to retry."

View File

@ -193,7 +193,7 @@ base_dialect =
nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
contains = [["IS_IN", make_is_in], ["IS_IN_COLUMN", make_is_in_column]]
types = [simple_cast]
windows = [["ROW_NUMBER", make_row_number]]
windows = [["ROW_NUMBER", make_row_number], ["ROW_NUMBER_IN_GROUP", make_row_number_in_group]]
base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains + types + windows)
Internal_Dialect.Value base_map wrap_in_quotes
@ -219,7 +219,7 @@ make_iif arguments = case arguments.length of
An IR expression for constructing SQL `CASE` expressions.
case_when : Vector Builder -> Builder
case_when arguments =
if arguments.length < 4 then Error.throw (Illegal_State.Error "CASE_WHEN needs at least 3 arguments.") else
if arguments.length < 3 then Error.throw (Illegal_State.Error "CASE_WHEN needs at least 3 arguments.") else
fallback = arguments.last
cases = arguments.drop (Last 1)
if cases.length % 2 != 0 then Error.throw (Illegal_State.Error "CASE_WHEN expects an odd number of arguments (two arguments for each case and a fallback).") else
@ -286,6 +286,14 @@ make_row_number (arguments : Vector) (metadata : Row_Number_Metadata) = if argum
Builder.code "PARTITION BY " ++ Builder.join ", " grouping
Builder.code "(row_number() OVER (" ++ group_part ++ " ORDER BY " ++ Builder.join ", " ordering ++ ") * " ++ step.paren ++ " + " ++ offset.paren ++ ")"
## PRIVATE
A helper for `lookup_and_replace`, and perhaps other operation.
It creates an expression that returns a row number within a group.
make_row_number_in_group arguments =
if arguments.length == 0 then
Panic.throw <| Illegal_State.Error "The operation ROW_NUMBER_IN_GROUP requires at least one argument."
Builder.code "row_number() OVER (PARTITION BY " ++ (Builder.join ", " arguments) ++ ")"
## PRIVATE
Builds code for an expression.
@ -299,6 +307,9 @@ generate_expression dialect expr = case expr of
dialect.wrap_identifier origin ++ '.' ++ dialect.wrap_identifier name
SQL_Expression.Constant value -> Builder.interpolation value
SQL_Expression.Literal value -> Builder.code value
SQL_Expression.Text_Literal value ->
escaped = value.replace "'" "''"
Builder.code ("'" + escaped + "'")
SQL_Expression.Operation kind arguments metadata ->
op = dialect.operation_map.get kind (Error.throw <| Unsupported_Database_Operation.Error kind)
parsed_args = arguments.map (generate_expression dialect)

View File

@ -0,0 +1,239 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_State.Illegal_State
from Standard.Base.Runtime import assert
import Standard.Table.Internal.Lookup_Helpers
import Standard.Table.Internal.Lookup_Helpers.Lookup_Column
from Standard.Table import Join_Kind, Value_Type
from Standard.Table.Errors import all
import project.Data.Table.Table
import project.Internal.IR.Context.Context
import project.Internal.IR.From_Spec.From_Spec
import project.Internal.IR.Internal_Column.Internal_Column
import project.Internal.IR.SQL_Expression.SQL_Expression
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
import project.Internal.SQL_Type_Reference.SQL_Type_Reference
from project.Internal.Upload_Table import check_for_null_keys
## PRIVATE
Implementation of `lookup_and_replace` for Database backend.
See `Table.lookup_and_replace` for more details.
build_lookup_query : Table -> Table -> (Vector (Integer | Text | Regex) | Text | Integer | Regex) -> Boolean -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup
build_lookup_query base_table lookup_table key_columns add_new_columns allow_unmatched_rows on_problems =
lookup_columns = Lookup_Helpers.prepare_columns_for_lookup base_table lookup_table key_columns add_new_columns allow_unmatched_rows on_problems
lookup_columns.if_not_error <| check_initial_invariants base_table lookup_table lookup_columns allow_unmatched_rows <|
column_naming_helper = base_table.connection.base_connection.column_naming_helper
unique_name_strategy = column_naming_helper.create_unique_name_strategy
unique_name_strategy.mark_used base_table.column_names
unique_name_strategy.mark_used lookup_table.column_names
subquery_setup = prepare_subqueries base_table lookup_table lookup_columns unique_name_strategy
# We need to construct the context _before_ constructing the columns, to be able to use it in column construction (for inferring types).
new_ctx = make_context_for_lookup_join lookup_columns subquery_setup
infer_type_in_result expr =
SQL_Type_Reference.new base_table.connection new_ctx expr
## TODO [RW] here we will perform as many fetches as there are
Replace_Column instances, but technically we could perform just
one fetch fetching all column types - TODO we should do that.
See #6118.
new_columns = lookup_columns.map_with_index ix-> c-> case c of
Lookup_Column.Key_Column _ _ -> subquery_setup.get_self_column ix
Lookup_Column.Keep_Column _ -> subquery_setup.get_self_column ix
Lookup_Column.Replace_Column _ _ expected_type ->
dialect = base_table.connection.dialect
subquery_setup.create_merged_column ix expected_type dialect infer_type_in_result allow_unmatched_rows
Lookup_Column.Add_Column _ -> subquery_setup.get_lookup_column ix
## Originally, I wanted to add invariant checks to all columns (or some of them),
but storing it in WHERE has 2 benefits:
1. The query is actually _simpler_ - instead of having the check in the expression for _every_ column
or multiple columns, we have it only once.
2. We have a guarantee that the query optimizer will not remove it (which could happen if it was a separate
column, or was added only to some column and these columns got removed in subsequent steps) - which is
paramount to ensuring that our rows will not get duplicated in case the key uniqueness gets violated.
new_ctx_with_invariant_check = new_ctx.add_where_filters [make_invariant_check subquery_setup.lookup_counter allow_unmatched_rows]
precheck_for_duplicate_matches lookup_columns subquery_setup base_table.connection new_ctx <|
Table.Value subquery_setup.new_table_name base_table.connection new_columns new_ctx_with_invariant_check
## PRIVATE
Checks if they key contains NULL values or if there would be unmatched rows
(if `allow_unmatched_rows` is `False`), and reports any errors.
check_initial_invariants : Table -> Table -> Vector Lookup_Column -> Boolean -> Any -> Any
check_initial_invariants base_table lookup_table lookup_columns allow_unmatched_rows ~continuation =
key_column_names = lookup_columns.flat_map c-> case c of
Lookup_Column.Key_Column base_column lookup_column ->
assert (base_column.name == lookup_column.name) "Columns should have equal names."
[lookup_column.name]
_ -> []
check_for_null_keys lookup_table key_column_names <|
if allow_unmatched_rows then continuation else
unmatched_rows = base_table.join lookup_table on=key_column_names join_kind=Join_Kind.Left_Exclusive . select_columns key_column_names
unmatched_example = unmatched_rows.read max_rows=1
if unmatched_example.row_count == 0 then continuation else
first_row = unmatched_example.rows.first
Error.throw (Unmatched_Rows_In_Lookup.Error first_row.to_vector)
## PRIVATE
Prepares the lookup table counter that is used for two things:
1. It allows us to check if a given row had found a match in the lookup row
and decide which value to use in `Replace_Column` case (the counter will
be `1` if there is a match and `NULL` if there is not).
2. It allows us to find duplicate matches - if a row with counter >1 is found,
that means that a single row has matched multiple rows in the lookup table
and we should report an error.
make_lookup_counter_column dialect lookup_columns unique_name_strategy =
grouping_expressions = lookup_columns.flat_map c-> case c of
Lookup_Column.Key_Column _ lookup_column -> [lookup_column.expression]
_ -> []
row_number_expression = SQL_Expression.Operation "ROW_NUMBER_IN_GROUP" grouping_expressions
sql_type = dialect.get_type_mapping.value_type_to_sql Value_Type.Integer Problem_Behavior.Ignore
Internal_Column.Value (unique_name_strategy.make_unique "lookup_counter") (SQL_Type_Reference.from_constant sql_type) row_number_expression
## PRIVATE
Gathers information about prepared subqueries in one place - to make it
easier to pass this context between various helper functions.
The setup is prepared in such a way, that for each `Lookup_Column` we have a
vector of base table columns and lookup table columns corresponding to it.
Depending on the type of the `Lookup_Column`, one of these vectors may be
empty. The helper functions `get_self_column` and `get_lookup_column` allow
easy access of the corresponding columns for the given index - this should be
used with `lookup_columns.map_with_index`.
type Lookup_Subquery_Setup
## PRIVATE
Value self_sub lookup_sub lookup_counter new_table_name
## PRIVATE
get_self_column self ix =
self.self_sub.new_columns.at ix . first
## PRIVATE
get_lookup_column self ix =
self.lookup_sub.new_columns.at ix . first
## PRIVATE
Creates an `Internal_Column` representing the `Replace_Column` scenario:
taking data from lookup table if it was matched, and from base table
otherwise.
This method also ensure that the column has the expected type, unifying
types of the two sources.
create_merged_column self ix expected_type dialect infer_type_in_result allow_unmatched_rows =
self_col = self.get_self_column ix
lookup_col = self.get_lookup_column ix
is_lookup_found = SQL_Expression.Operation "==" [self.lookup_counter.expression, SQL_Expression.Literal "1"]
expression = case allow_unmatched_rows of
True -> SQL_Expression.Operation "CASE" [is_lookup_found, lookup_col.expression, self_col.expression]
False -> Error.throw (Illegal_State.Error "Assumed that prepare_columns_for_lookup never returns Replace_Column if allow_unmatched_rows=False. This is a bug in the Database library.")
input_column = Internal_Column.Value self_col.name (infer_type_in_result expression) expression
adapted = dialect.adapt_unified_column input_column expected_type infer_type_in_result
Internal_Column.Value self_col.name adapted.sql_type_reference adapted.expression
## PRIVATE
Wraps the two source tables into subqueries and creates a set of new columns
(encapsulated as `Lookup_Subquery_Setup`), together with the `lookup_counter`
column (see `make_lookup_counter_column`), that are valid in the context of a
lookup join query (it translates the source columns valid in the input
contexts, to the external join context).
prepare_subqueries base_table lookup_table lookup_columns unique_name_strategy =
table_name_deduplicator = base_table.connection.base_connection.table_naming_helper.create_unique_name_strategy
self_alias = table_name_deduplicator.make_unique base_table.name
lookup_alias = table_name_deduplicator.make_unique lookup_table.name
new_table_name = table_name_deduplicator.make_unique <|
base_table.name + "_" + lookup_table.name
lookup_counter_base = make_lookup_counter_column lookup_table.connection.dialect lookup_columns unique_name_strategy
self_requested_columns = Vector.new_builder
lookup_requested_columns = Vector.new_builder
lookup_columns.each c-> case c of
Lookup_Column.Key_Column base_column key_column ->
self_requested_columns.append [base_column]
lookup_requested_columns.append [key_column]
Lookup_Column.Keep_Column base_column ->
self_requested_columns.append [base_column]
lookup_requested_columns.append []
Lookup_Column.Replace_Column base_column lookup_column _ ->
self_requested_columns.append [base_column]
lookup_requested_columns.append [lookup_column]
Lookup_Column.Add_Column lookup_column ->
self_requested_columns.append []
lookup_requested_columns.append [lookup_column]
self_sub = base_table.context.as_subquery self_alias self_requested_columns.to_vector
lookup_sub = lookup_table.context.as_subquery lookup_alias lookup_requested_columns.to_vector+[[lookup_counter_base]]
lookup_counter = lookup_sub.new_columns.at -1 . first
Lookup_Subquery_Setup.Value self_sub lookup_sub lookup_counter new_table_name
## PRIVATE
Creates a context for the lookup join query.
make_context_for_lookup_join lookup_columns subquery_setup =
on_expressions = (_.flatten) <| lookup_columns.map_with_index ix-> c-> case c of
Lookup_Column.Key_Column _ _ ->
self_col = subquery_setup.get_self_column ix
lookup_col = subquery_setup.get_lookup_column ix
[SQL_Expression.Operation "==" [self_col.expression, lookup_col.expression]]
_ -> []
new_from = From_Spec.Join SQL_Join_Kind.Left subquery_setup.self_sub.subquery subquery_setup.lookup_sub.subquery on_expressions
Context.for_subquery new_from
## PRIVATE
Runs a query that checks if there are any duplicate matches in the lookup
result. The query tries to minimize the amount of work and data that is
transferred, but it may not be cheap. It is however needed to be able to
report errors early.
precheck_for_duplicate_matches lookup_columns subquery_setup connection new_ctx ~continuation =
key_columns_for_duplicate_check = (_.flatten) <| lookup_columns.map_with_index ix-> c-> case c of
Lookup_Column.Key_Column _ _ -> [subquery_setup.get_self_column ix]
_ -> []
table_for_duplicate_check = Table.Value subquery_setup.new_table_name connection [subquery_setup.lookup_counter]+key_columns_for_duplicate_check new_ctx
duplicate_lookup_matches = table_for_duplicate_check.filter 0 (Filter_Condition.Greater than=1) . read max_rows=1
case duplicate_lookup_matches.row_count > 0 of
True ->
first_example_row = duplicate_lookup_matches.read.rows.first.to_vector
duplicate_example_count = first_example_row.first
duplicate_example_key = first_example_row.drop 1
Error.throw (Non_Unique_Key.Error (key_columns_for_duplicate_check.map .name) duplicate_example_key duplicate_example_count)
False -> continuation
## PRIVATE
Creates an expression that can be baked into the query (preferably in such
a way that it will not get removed after query transformations), that will
fail if the 'key uniqueness' invariant and the 'all rows matching'
(if `allow_unmatched_rows=False`) are violated when the query is being
materialized.
This is needed, because the initial checks are run when the query is built,
but there is no guarantee that the table will not be changed before the query
is actually materialized. If in the meantime, the data is modified and
invariants are broken, the query may return completely unexpected results
(e.g. rows of the source table could get duplicated, which should never
happen with `lookup_and_replace`).
This additional checks ensures that if the invariants are broken, the query
will fail instead of returning corrupted data. Its error reporting may be
limited in comparison to the initial checks which may compute more
information.
make_invariant_check lookup_counter allow_unmatched_rows =
make_error message =
SQL_Expression.Operation "RUNTIME_ERROR" [SQL_Expression.Text_Literal "lookup_and_replace: "+message, lookup_counter.expression]
exact_match =
is_matched = SQL_Expression.Operation "==" [lookup_counter.expression, SQL_Expression.Literal "1"]
[is_matched, SQL_Expression.Literal "TRUE"]
unmatched =
is_unmatched = SQL_Expression.Operation "IS_NULL" [lookup_counter.expression]
case allow_unmatched_rows of
True -> [is_unmatched, SQL_Expression.Literal "TRUE"]
False -> [is_unmatched, make_error "Some rows have no matches in the lookup table."]
# This will be matched if the first branches do not match - this should happen if there is more than 1 match for a row.
fallback = [make_error "Duplicate matches found in the lookup table."]
SQL_Expression.Operation "CASE" unmatched+exact_match+fallback

View File

@ -15,3 +15,11 @@ type Error_Mapper
is_primary_key_violation error =
_ = error
Unimplemented.throw "This is an interface only."
## PRIVATE
Called by `Table.read`, allowing the dialect to transform a generic
`SQL_Error` into a more specific error type, if applicable.
transform_custom_errors : SQL_Error -> Any
transform_custom_errors error =
_ = error
Unimplemented.throw "This is an interface only."

View File

@ -1,4 +1,6 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
from Standard.Base.Runtime import assert
import project.Data.Column.Column
import project.Data.Table.Table
@ -32,7 +34,17 @@ check_integrity entity1 entity2 =
- entity2: The entity to check against the first.
check_connection : (Table | Column) -> (Table | Column) -> Boolean
check_connection entity1 entity2 =
Meta.is_same_object entity1.connection entity2.connection
# The `if_not_error` is needed `Meta.is_same_object` does not forward dataflow errors.
entity1.if_not_error <| entity2.if_not_error <|
Meta.is_same_object entity1.connection entity2.connection
## PRIVATE
ensure_same_connection : Text -> Vector -> Any -> Any ! Illegal_Argument
ensure_same_connection name entities ~continuation =
assert entities.not_empty
all_same = entities.all entity-> check_connection entity entities.first
if all_same then continuation else
Error.throw (Illegal_Argument.Error "The input "+name+"s come from different connections. Cross-connection operations are not supported.")
## PRIVATE

View File

@ -36,6 +36,18 @@ type SQL_Expression
as-is into a query.
Literal (value : Text)
## PRIVATE
A text literal that should be inserted into a query, wrapped in quotes
with any quotes it may contain escaped.
This is useful to avoid requiring unnecessary interpolations for text
constants in queries.
User-provided values should always be passed as interpolated `Constant`s.
This should only be used for 'statically' known constants - ones baked
into the library source code.
Text_Literal (value : Text)
## PRIVATE
The internal representation of an SQL expression built from an operation

View File

@ -297,7 +297,8 @@ make_internal_generator_dialect =
stats = [agg_median, agg_mode, agg_percentile, stddev_pop, stddev_samp]
date_ops = [make_extract_as_int "year", make_extract_as_int "quarter", make_extract_as_int "month", make_extract_as_int "week", make_extract_as_int "day", make_extract_as_int "hour", make_extract_as_int "minute", make_extract_fractional_as_int "second", make_extract_fractional_as_int "millisecond" modulus=1000, make_extract_fractional_as_int "microsecond" modulus=1000, ["date_add", make_date_add], ["date_diff", make_date_diff]]
special_overrides = [is_null, is_empty]
my_mappings = text + counts + stats + first_last_aggregators + arith_extensions + bool + date_ops + special_overrides
other = [["RUNTIME_ERROR", make_runtime_error_op]]
my_mappings = text + counts + stats + first_last_aggregators + arith_extensions + bool + date_ops + special_overrides + other
Base_Generator.base_dialect . extend_with my_mappings
## PRIVATE
@ -727,3 +728,26 @@ as_int32 expr =
## PRIVATE
postgres_statement_setter = Statement_Setter.default
## PRIVATE
The RUNTIME_ERROR operation should allow the query to compile fine and it
will not prevent it from running if the branch including this operation is
not taken. But if the branch is computed, it should ensure the query fails.
This query never returns a value, so its type should be polymorphic. However,
that is not possible - so currently it just 'pretends' that it would return a
Boolean - because that is the type we expect in the use-case. This can be
altered if needed.
It takes a variable as the second argument. It can be any value that is not
statically known - this ensure that the optimizer will not be able to
pre-compute the expression too early (which could make the query fail
spuriously). See `make_invariant_check` in `Lookup_Query_Helper` for an
example.
make_runtime_error_op arguments =
if arguments.length != 2 then
Panic.throw (Illegal_Argument.Error "RUNTIME_ERROR takes exactly 2 arguments (error message and a variable to ensure deferred execution).")
error_message = arguments.at 0
variable_to_defer = arguments.at 1
Builder.code "CAST('[ENSO INVARIANT VIOLATED: '||" ++ error_message ++ "||'] '||COALESCE(" ++ variable_to_defer ++ "::TEXT,'NULL') AS BOOLEAN)"

View File

@ -1,6 +1,6 @@
from Standard.Base import all
from project.Errors import SQL_Error
from project.Errors import SQL_Error, Invariant_Violation
## PRIVATE
type Postgres_Error_Mapper
@ -9,3 +9,12 @@ type Postgres_Error_Mapper
is_primary_key_violation : SQL_Error -> Boolean
is_primary_key_violation error =
error.java_exception.getMessage.contains "duplicate key value violates unique constraint"
## PRIVATE
transform_custom_errors : SQL_Error -> Any
transform_custom_errors error =
message = error.java_exception.getMessage
if message.contains "ENSO INVARIANT VIOLATED" . not then error else
payloads = message.tokenize "\[ENSO INVARIANT VIOLATED: (.*)\]"
if payloads.length != 1 then error else
Invariant_Violation.Error payloads.first error

View File

@ -286,9 +286,10 @@ make_internal_generator_dialect =
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
stats = [agg_stddev_pop, agg_stddev_samp]
arith_extensions = [is_inf, floating_point_div, mod_op]
other = [["RUNTIME_ERROR", make_runtime_error_op]]
bool = [bool_or]
my_mappings = text + counts + stats + arith_extensions + bool
my_mappings = text + counts + stats + arith_extensions + bool + other
Base_Generator.base_dialect . extend_with my_mappings
## PRIVATE
@ -481,3 +482,21 @@ make_custom_cast column target_value_type type_mapping =
result.put expr
result.get
## PRIVATE
The RUNTIME_ERROR operation should allow the query to compile fine and it
will not prevent it from running if the branch including this operation is
not taken. But if the branch is computed, it should ensure the query fails.
This query never returns a value, so its type should be polymorphic. However,
that is not possible - so currently the SQLite dialect just does not handle
inferring a type for it. Thus, it should only be used in places that will not
need client-side type inference (e.g. WHERE clause is ok).
This can be changed in the future, if needed.
make_runtime_error_op arguments =
if arguments.length != 2 then
Panic.throw (Illegal_Argument.Error "RUNTIME_ERROR takes exactly 2 arguments (error message and a variable to ensure deferred execution).")
error_message = arguments.at 0
variable_to_defer = arguments.at 1
# We have to ensure that the implementation of SQLite that we use does not have a MATCH function defined which would make the code below succeed.
Builder.code "match('[ENSO INVARIANT VIOLATED: '||" ++ error_message ++ "||'] ', " ++ variable_to_defer ++ ")"

View File

@ -1,6 +1,6 @@
from Standard.Base import all
from project.Errors import SQL_Error
from project.Errors import SQL_Error, Invariant_Violation
polyglot java import org.sqlite.SQLiteErrorCode
polyglot java import org.sqlite.SQLiteException
@ -14,3 +14,13 @@ type SQLite_Error_Mapper
case error.java_exception of
sqlite_exception : SQLiteException ->
sqlite_exception.getResultCode == SQLiteErrorCode.SQLITE_CONSTRAINT_PRIMARYKEY
## PRIVATE
transform_custom_errors : SQL_Error -> Any
transform_custom_errors error =
invariant_failure_message = "unable to use function MATCH in the requested context"
message = error.java_exception.getMessage
if message.contains invariant_failure_message . not then error else
query_contains_invariant_check = error.related_query.if_nothing "" . contains "ENSO INVARIANT VIOLATED"
if query_contains_invariant_check . not then error else
Invariant_Violation.Error Nothing error

View File

@ -179,14 +179,17 @@ operations_map =
handle_cast _ =
Panic.throw (Illegal_State.Error "Cast relies on its own type inference logic, so this code should never be reached. This is a bug in the Database library.")
handle_runtime_error _ =
Panic.throw (Illegal_State.Error "RUNTIME_ERROR should not be part of direct type inference, so this code should never be reached. This is a bug in the Database library.")
always_boolean_ops = ["==", "!=", "equals_ignore_case", ">=", "<=", "<", ">", "BETWEEN", "AND", "OR", "NOT", "IS_NULL", "IS_EMPTY", "LIKE", "IS_IN", "IS_IN_COLUMN", "starts_with", "ends_with", "contains", "BOOL_OR", "IS_INF"]
always_floating_ops = ["/", "mod", "AVG", "STDDEV_POP", "STDDEV_SAMP", "ROUND"]
always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM", "REPLACE"]
always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS", "ROW_NUMBER"]
always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS", "ROW_NUMBER", "ROW_NUMBER_IN_GROUP"]
same_as_first = ["TRUNCATE", "CEIL", "FLOOR"]
arithmetic_ops = ["ADD_NUMBER", "-", "*", "^", "%", "SUM"]
merge_input_types_ops = ["ROW_MAX", "ROW_MIN", "MAX", "MIN", "FILL_NULL", "COALESCE"]
others = [["IIF", handle_iif], ["CAST", handle_cast], ["CASE", handle_case]]
others = [["IIF", handle_iif], ["CAST", handle_cast], ["CASE", handle_case], ["RUNTIME_ERROR", handle_runtime_error]]
Map.from_vector <|
v1 = always_boolean_ops.map [_, const SQLite_Types.boolean]
v2 = always_floating_ops.map [_, const SQLite_Types.real]

View File

@ -1666,28 +1666,27 @@ type Table
@on Widget_Helpers.make_join_condition_selector
join : Table -> Join_Kind -> Vector (Join_Condition | Text) | Text -> Text -> Problem_Behavior -> Table
join self right:Table (join_kind : Join_Kind = Join_Kind.Left_Outer) on=[Join_Condition.Equals self.column_names.first] right_prefix="Right " on_problems=Report_Warning = Out_Of_Memory.handle_java_exception "join" <|
if check_table "right" right then
# [left_unmatched, matched, right_unmatched]
rows_to_keep = case join_kind of
Join_Kind.Inner -> [False, True, False]
Join_Kind.Left_Outer -> [True, True, False]
Join_Kind.Right_Outer -> [False, True, True]
Join_Kind.Full -> [True, True, True]
Join_Kind.Left_Exclusive -> [True, False, False]
Join_Kind.Right_Exclusive -> [False, False, True]
# [left_unmatched, matched, right_unmatched]
rows_to_keep = case join_kind of
Join_Kind.Inner -> [False, True, False]
Join_Kind.Left_Outer -> [True, True, False]
Join_Kind.Right_Outer -> [False, True, True]
Join_Kind.Full -> [True, True, True]
Join_Kind.Left_Exclusive -> [True, False, False]
Join_Kind.Right_Exclusive -> [False, False, True]
columns_to_keep = case join_kind of
Join_Kind.Left_Exclusive -> [True, False]
Join_Kind.Right_Exclusive -> [False, True]
_ -> [True, True]
columns_to_keep = case join_kind of
Join_Kind.Left_Exclusive -> [True, False]
Join_Kind.Right_Exclusive -> [False, True]
_ -> [True, True]
join_resolution = make_join_helpers self right . resolve on on_problems
right_columns_to_drop = if join_kind == Join_Kind.Inner then join_resolution.redundant_column_names else []
join_resolution = make_join_helpers self right . resolve on on_problems
right_columns_to_drop = if join_kind == Join_Kind.Inner then join_resolution.redundant_column_names else []
java_conditions = join_resolution.conditions
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
self.java_table.join right.java_table java_conditions (rows_to_keep.at 0) (rows_to_keep.at 1) (rows_to_keep.at 2) (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix java_aggregator
Table.Value new_java_table
java_conditions = join_resolution.conditions
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
self.java_table.join right.java_table java_conditions (rows_to_keep.at 0) (rows_to_keep.at 1) (rows_to_keep.at 2) (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix java_aggregator
Table.Value new_java_table
## ALIAS cartesian join
GROUP Standard.Base.Calculations
@ -1726,15 +1725,14 @@ type Table
layout order while for database tables the order may be unspecified).
cross_join : Table -> Integer | Nothing -> Text -> Problem_Behavior -> Table
cross_join self right:Table right_row_limit=100 right_prefix="Right " on_problems=Report_Warning = Out_Of_Memory.handle_java_exception "cross_join" <|
if check_table "right" right then
limit_problems = case right_row_limit.is_nothing.not && (right.row_count > right_row_limit) of
True ->
[Cross_Join_Row_Limit_Exceeded.Error right_row_limit right.row_count]
False -> []
on_problems.attach_problems_before limit_problems <|
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
self.java_table.crossJoin right.java_table right_prefix java_aggregator
Table.Value new_java_table
limit_problems = case right_row_limit.is_nothing.not && (right.row_count > right_row_limit) of
True ->
[Cross_Join_Row_Limit_Exceeded.Error right_row_limit right.row_count]
False -> []
on_problems.attach_problems_before limit_problems <|
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
self.java_table.crossJoin right.java_table right_prefix java_aggregator
Table.Value new_java_table
## Replaces values in this table by values from a lookup table.
New values are looked up in the lookup table based on the `key_columns`.
@ -1771,8 +1769,8 @@ type Table
specified in `key_columns`, a `Missing_Input_Columns` error is raised.
- If an empty vector is provided for `key_columns`, a
`No_Input_Columns_Selected` error is raised.
- If the lookup table contains multiple rows with the same values in
the `key_columns`, an `Non_Unique_Key` error is raised.
- If a single row is matched by multiple entries in the lookup table,
a `Non_Unique_Key` error is raised.
- If a column that is being updated from the lookup table has a type
that is not compatible with the type of the corresponding column in
this table, a `No_Common_Type` error is raised.
@ -1874,20 +1872,19 @@ type Table
@keep_unmatched (make_single_choice [["True", "Boolean.True"], ["False", "Boolean.False"], ["Report", Meta.get_qualified_type_name Report_Unmatched]])
zip : Table -> Boolean | Report_Unmatched -> Text -> Problem_Behavior -> Table
zip self right:Table keep_unmatched=Report_Unmatched right_prefix="Right " on_problems=Report_Warning =
if check_table "right" right then
keep_unmatched_bool = case keep_unmatched of
Report_Unmatched -> True
b : Boolean -> b
report_mismatch = keep_unmatched == Report_Unmatched
keep_unmatched_bool = case keep_unmatched of
Report_Unmatched -> True
b : Boolean -> b
report_mismatch = keep_unmatched == Report_Unmatched
left_row_count = self.row_count
right_row_count = right.row_count
problems = if (left_row_count == right_row_count) || report_mismatch.not then [] else
[Row_Count_Mismatch.Error left_row_count right_row_count]
on_problems.attach_problems_before problems <|
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
self.java_table.zip right.java_table keep_unmatched_bool right_prefix java_aggregator
Table.Value new_java_table
left_row_count = self.row_count
right_row_count = right.row_count
problems = if (left_row_count == right_row_count) || report_mismatch.not then [] else
[Row_Count_Mismatch.Error left_row_count right_row_count]
on_problems.attach_problems_before problems <|
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
self.java_table.zip right.java_table keep_unmatched_bool right_prefix java_aggregator
Table.Value new_java_table
## ALIAS append, concat
GROUP Standard.Base.Calculations
@ -1980,12 +1977,11 @@ type Table
retyped to the `Mixed` type to indicate that intention. Note that the
`Mixed` type may not be supported by most Database backends.
union : (Table | Vector Table) -> Match_Columns -> Boolean | Report_Unmatched -> Boolean -> Problem_Behavior -> Table
union self tables match_columns=Match_Columns.By_Name keep_unmatched_columns=Report_Unmatched allow_type_widening=True on_problems=Report_Warning =
union self tables:Vector|Table match_columns=Match_Columns.By_Name keep_unmatched_columns=Report_Unmatched allow_type_widening=True on_problems=Report_Warning =
all_tables = case tables of
v : Vector -> [self] + v
single_column : Column -> [self, single_column.to_table]
v : Vector -> [self] + (v.map t-> Table.from t)
single_table -> [self, single_table]
all_tables.all (check_table "tables") . if_not_error <|
all_tables.if_not_error <|
problem_builder = Problem_Builder.new
matched_column_sets = Match_Columns_Helpers.match_columns all_tables match_columns keep_unmatched_columns problem_builder
result_row_count = all_tables.fold 0 c-> t-> c + t.row_count
@ -2531,17 +2527,6 @@ make_join_helpers left_table right_table =
Java_Join_Between.new left.java_column right_lower.java_column right_upper.java_column
Join_Helpers.Join_Condition_Resolver.Value (left_table.at _) (right_table.at _) make_equals make_equals_ignore_case make_between
## PRIVATE
Checks if the argument is a proper table and comes from the current backend.
It returns True or throws a dataflow error explaining the issue.
check_table arg_name table =
if Table_Helpers.is_table table . not then Error.throw (Type_Error.Error Table table arg_name) else
same_backend = table.is_a Table
case same_backend of
False ->
Error.throw (Illegal_Argument.Error "Currently cross-backend operations are not supported. Materialize the table using `.read` before mixing it with an in-memory Table.")
True -> True
## PRIVATE
A helper that efficiently concatenates storages of in-memory columns.
concat_columns column_set all_tables result_type result_row_count on_problems =

View File

@ -1,29 +1,32 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import project.Data.Type.Storage
import project.Data.Type.Value_Type.Value_Type
import project.Data.Type.Value_Type_Helpers
from project.Errors import Missing_Input_Columns, Unexpected_Extra_Columns, Floating_Point_Equality, No_Common_Type
from project.Errors import Missing_Input_Columns, Unexpected_Extra_Columns, Floating_Point_Equality, No_Common_Type, No_Output_Columns
polyglot java import org.enso.table.data.table.join.LookupColumnDescription
## PRIVATE
type Lookup_Settings
## PRIVATE
Value (key_columns : Vector Text) (columns_to_update : Set Text) (columns_to_add : Vector Text)
## PRIVATE
type Lookup_Column
## PRIVATE
A key column used to correlate rows between the two tables.
Key_Column base_column lookup_column
## PRIVATE
A column from the base table that is kept as-is.
Keep_Column base_column
## PRIVATE
A column that is present in both tables.
If the row is matched with a row from the lookup table, the value from
the lookup table is used; otherwise the value from the base table is kept.
Replace_Column base_column lookup_column (common_type : Value_Type)
## PRIVATE
A new column added from the lookup table.
If the row has no match, it will be `Nothing`.
Add_Column lookup_column
## PRIVATE
@ -38,7 +41,8 @@ type Lookup_Column
It also reports any errors or warnings related to selecting these columns.
prepare_columns_for_lookup base_table lookup_table key_columns_selector add_new_columns allow_unmatched_rows on_problems =
key_columns = base_table.select_columns key_columns_selector . column_names
key_columns = base_table.select_columns key_columns_selector . column_names . catch No_Output_Columns _->
Error.throw (Illegal_Argument.Error "At least one key column must be provided for `lookup_and_replace`.")
lookup_table_key_columns = lookup_table.select_columns key_columns . catch Missing_Input_Columns error->
Error.throw (Missing_Input_Columns.Error error.criteria "the lookup table")

View File

@ -452,15 +452,6 @@ filter_blank_rows table when_any treat_nans_as_blank =
table.filter non_missing_mask Filter_Condition.Is_True
False -> table
## PRIVATE
Checks if the given object implements a Table interface.
Currently, it relies on a hack - it checks if the object has a method
`is_table` and if it returns `True`.
is_table obj =
known_types = ["Standard.Table.Data.Table.Table", "Standard.Database.Data.Table.Table"]
known_types.contains (Meta.get_qualified_type_name obj)
## PRIVATE
Checks if the given object implements a Table interface.

View File

@ -35,7 +35,6 @@ public class LookupJoin {
LookupJoin joiner = new LookupJoin(keys, columnDescriptions, allowUnmatchedRows, problemAggregator);
joiner.checkNullsInKey();
joiner.verifyLookupUniqueness();
return joiner.join();
}
@ -78,22 +77,6 @@ public class LookupJoin {
}
}
private void verifyLookupUniqueness() {
if (!lookupIndex.isUnique()) {
// Find the duplicated key
for (Map.Entry<UnorderedMultiValueKey, List<Integer>> group : lookupIndex.mapping().entrySet()) {
int groupSize = group.getValue().size();
if (groupSize > 1) {
UnorderedMultiValueKey key = group.getKey();
List<Object> exampleValues = IntStream.range(0, keyColumnNames.size()).mapToObj(key::get).toList();
throw new NonUniqueLookupKey(keyColumnNames, exampleValues, groupSize);
}
}
assert false : "isUnique returned false, but no duplicated key was found.";
}
}
private Table join() {
List<LookupOutputColumn> outputColumns = columnDescriptions.stream().map(this::prepareOutputColumn).toList();
List<LookupOutputColumn.MergeColumns> columnsToMerge =
@ -145,8 +128,12 @@ public class LookupJoin {
}
}
if (lookupRowIndices.size() > 1) {
List<Object> exampleValues = IntStream.range(0, keyColumnNames.size()).mapToObj(key::get).toList();
throw new NonUniqueLookupKey(keyColumnNames, exampleValues, lookupRowIndices.size());
}
assert !lookupRowIndices.isEmpty() : "No Index group should be empty.";
assert lookupRowIndices.size() == 1 : "This should have been checked in verifyLookupUniqueness()";
return lookupRowIndices.get(0);
}

View File

@ -3,10 +3,13 @@ import Standard.Base.Errors.Common.Arithmetic_Error
import Standard.Base.Errors.Common.Index_Out_Of_Bounds
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
from Standard.Table import Set_Mode
from Standard.Table import all
from Standard.Table.Errors import all
import Standard.Table.Data.Expression.Expression_Error
from Standard.Database import all
from Standard.Database.Errors import Integrity_Error
from Standard.Test import Test, Problems
import Standard.Test.Extensions
@ -163,6 +166,17 @@ spec setup =
problems2 = [Arithmetic_Error.Error "Division by zero (at rows [2])."]
Problems.test_problem_handling action2 problems2 tester2
Test.specify "should gracefully handle columns from different backends" <|
t1 = table_builder [["A", [1, 2, 3]]]
alternative_connection = Database.connect (SQLite In_Memory)
t0 = (Table.new [["X", [3, 2, 1]]]).select_into_database_table alternative_connection "T0" temporary=True
c0 = t0.at "X"
r1 = t1.set c0
case setup.is_database of
True -> r1.should_fail_with Integrity_Error
False -> r1.should_fail_with Illegal_Argument
Test.group prefix+"Table.column_names" <|
Test.specify "should return the names of all columns" <|
table.column_names . should_equal ["foo", "bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123"]

View File

@ -1,15 +1,17 @@
from Standard.Base import all
import Standard.Base.Errors.Common.Type_Error
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Table.Data.Join_Kind_Cross.Join_Kind_Cross
import Standard.Test.Extensions
from Standard.Database.Errors import Unsupported_Database_Operation
from Standard.Table import all hiding Table
from Standard.Table import all
from Standard.Table.Errors import all
import Standard.Table.Data.Join_Kind_Cross.Join_Kind_Cross
from Standard.Database import all
from Standard.Database.Errors import Unsupported_Database_Operation, Integrity_Error
from Standard.Test import Test, Problems
import Standard.Test.Extensions
from project.Common_Table_Operations.Util import expect_column_names, run_default_backend
@ -159,3 +161,11 @@ spec setup =
t1 = table_builder [["X", [1, 2]], ["Y", [4, 5]]]
t2 = table_builder [["Z", ['a', 'b']], ["W", ['c', 'd']]]
Test.expect_panic_with (t1.join t2 join_kind=Join_Kind_Cross.Cross on=[]) Type_Error
Test.specify "should gracefully handle tables from different backends" <|
t1 = table_builder [["A", ["a", "b"]]]
alternative_connection = Database.connect (SQLite In_Memory)
t0 = (Table.new [["X", [1, 2, 4]], ["Z", [10, 20, 30]]]).select_into_database_table alternative_connection "T0" temporary=True
r1 = t1.cross_join t0
r1.should_fail_with Illegal_Argument

View File

@ -1,10 +1,12 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
from Standard.Table import all hiding Table
from Standard.Table import all
from Standard.Table.Errors import all
from Standard.Database.Errors import Unsupported_Database_Operation
from Standard.Database import all
from Standard.Database.Errors import Unsupported_Database_Operation, Integrity_Error
from Standard.Test import Test, Problems
import Standard.Test.Extensions
@ -647,3 +649,10 @@ spec setup =
r3.at 2 . should_equal [Nothing, 10, 7, 7, 200]
r3.at 3 . should_equal [2, 20, 2, Nothing, Nothing]
r3.at 4 . should_equal [3, 30, 3, Nothing, Nothing]
Test.specify "should gracefully handle tables from different backends" <|
alternative_connection = Database.connect (SQLite In_Memory)
t0 = (Table.new [["X", [1, 2, 4]], ["Z", [10, 20, 30]]]).select_into_database_table alternative_connection "T0" temporary=True
r1 = t1.join t0
r1.should_fail_with Illegal_Argument

View File

@ -1,9 +1,11 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
from Standard.Table import all
from Standard.Table.Errors import all
from Standard.Database.Errors import Unsupported_Database_Operation
from Standard.Database import all
from Standard.Database.Errors import Unsupported_Database_Operation, Invariant_Violation, Integrity_Error
from Standard.Test import Test, Problems
import Standard.Test.Extensions
@ -17,8 +19,7 @@ spec setup =
prefix = setup.prefix
table_builder = setup.table_builder
materialize = setup.materialize
db_pending = if setup.is_database then "Not implemented yet. TODO: https://github.com/enso-org/enso/issues/7981"
Test.group prefix+"Table.lookup_and_replace" pending=db_pending <|
Test.group prefix+"Table.lookup_and_replace" <|
Test.specify "should allow to simply update columns based on a lookup table" <|
lookup = table_builder [["Y", ["A", "B", "A"]], ["X", [1, 2, 3]]]
my_table = table_builder [["X", [1, 2, 3, 2]], ["Y", ["Z", "ZZ", "ZZZ", "ZZZZ"]], ["Z", [10, 20, 30, 40]]]
@ -149,17 +150,42 @@ spec setup =
m2.at "Y" . to_vector . should_equal ["A", "B", "A", "B"]
m2.at "Z" . to_vector . should_equal [100, 200, 100, 100]
Test.specify "will fail on duplicate keys in the lookup table" <|
Test.specify "will fail on duplicate matches in the lookup table" <|
lookup = table_builder [["X", [1, 2, 1]], ["Y", ["A", "B", "C"]]]
my_table = table_builder [["X", [4, 2, 3, 2]], ["Y", ["Z", "ZZ", "ZZZ", "ZZZZ"]], ["Z", [10, 20, 30, 40]]]
r2 = my_table.lookup_and_replace lookup key_columns=["X"]
# If the duplicates do not show up in result - it is accepted.
t2 = my_table.lookup_and_replace lookup key_columns=["X"]
t2.column_names . should_equal ["X", "Y", "Z"]
m2 = t2 |> materialize |> _.order_by "Z"
m2.at "Z" . to_vector . should_equal [10, 20, 30, 40]
m2.at "X" . to_vector . should_equal [4, 2, 3, 2]
m2.at "Y" . to_vector . should_equal ["Z", "B", "ZZZ", "B"]
my_table2 = table_builder [["X", [1, 2]], ["Y", ["Z", "ZZ"]], ["Z", [10, 20]]]
r2 = my_table2.lookup_and_replace lookup key_columns=["X"]
r2.should_fail_with Non_Unique_Key
r2.catch.key_column_names . should_equal ["X"]
r2.catch.clashing_example_key_values . should_equal [1]
r2.catch.clashing_example_row_count . should_equal 2
lookup2 = table_builder [["X", [1, 1]], ["Y", ["A", "A"]], ["Z", [100, 100]]]
r3 = my_table.lookup_and_replace lookup2 key_columns=["X", "Y"]
Problems.assume_no_problems <| my_table.lookup_and_replace lookup2 key_columns=["X", "Y"]
r3 = my_table2.lookup_and_replace lookup2 key_columns=["X"]
r3.should_fail_with Non_Unique_Key
m3 = my_table2.lookup_and_replace lookup2 key_columns=["X", "Y"] |> materialize |> _.order_by "Z"
m3.at "Z" . to_vector . should_equal [10, 20]
m3.at "X" . to_vector . should_equal [1, 2]
m3.at "Y" . to_vector . should_equal ["Z", "ZZ"]
my_table3 = table_builder [["X", [1, 1, 2]], ["Y", ["A", "Z", "ZZ"]], ["Z", [10, 20, 30]]]
r4 = my_table3.lookup_and_replace lookup2 key_columns=["X", "Y"]
r4.should_fail_with Non_Unique_Key
r4.catch.key_column_names . should_equal ["X", "Y"]
r4.catch.clashing_example_key_values . should_equal [1, "A"]
r4.catch.clashing_example_row_count . should_equal 2
Test.specify "will preserve count of rows, even if there are duplicates" <|
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
my_table = table_builder [["X", [1, 2, 2, 2, 1]], ["Z", [10, 20, 20, 20, 50]]]
@ -171,6 +197,64 @@ spec setup =
m2.at "Y" . to_vector . should_equal ["A", "B", "B", "B", "A"]
m2.at "Z" . to_vector . should_equal [10, 20, 20, 20, 50]
Test.specify "should correctly preserve types of original, merged and added columns" <|
table = table_builder [["key1", [0, 1]], ["key2", ["o", "?"]], ["X", [1, 10]], ["Y", ["A", "E"]], ["Z", [1.5, 2.0]], ["W", [True, False]], ["A", [2, 22]], ["B", ["1", "2"]], ["C", [2.0, 2.5]], ["D", [False, False]]]
lookup = table_builder [["key1", [0, 2]], ["key2", ["o", "?"]], ["X2", [100, 1000]], ["Y2", ["foo", "bar"]], ["Z2", [0.5, 4.0]], ["W2", [False, True]], ["A", [3, 55]], ["B", ["F", "F"]], ["C", [3.0, 10.5]], ["D", [True, False]]]
[True, False].each allow_unmatched_rows->
table_prepared = if allow_unmatched_rows then table else
# If we don't allow unmatched rows, we filter the input to only contain the matching row - this test is not about matching, but about the types.
table.filter "key1" (Filter_Condition.Equal to=0)
t2 = table_prepared.lookup_and_replace lookup key_columns=["key1", "key2"] allow_unmatched_rows=allow_unmatched_rows
t2.column_names . should_equal ["key1", "key2", "X", "Y", "Z", "W", "A", "B", "C", "D", "X2", "Y2", "Z2", "W2"]
t2.at "key1" . value_type . should_be_a (Value_Type.Integer ...)
t2.at "key2" . value_type . should_equal Value_Type.Char
t2.at "X" . value_type . should_be_a (Value_Type.Integer ...)
t2.at "Y" . value_type . should_equal Value_Type.Char
t2.at "Z" . value_type . should_equal Value_Type.Float
t2.at "W" . value_type . should_equal Value_Type.Boolean
t2.at "A" . value_type . should_be_a (Value_Type.Integer ...)
t2.at "B" . value_type . should_equal Value_Type.Char
t2.at "C" . value_type . should_equal Value_Type.Float
t2.at "D" . value_type . should_equal Value_Type.Boolean
t2.at "X2" . value_type . should_be_a (Value_Type.Integer ...)
t2.at "Y2" . value_type . should_equal Value_Type.Char
t2.at "Z2" . value_type . should_equal Value_Type.Float
t2.at "W2" . value_type . should_equal Value_Type.Boolean
if setup.test_selection.fixed_length_text_columns then Test.specify "should correctly preserve types of original, merged and added columns (various Char types test case)" <|
table2 = table_builder [["key", ["0"]], ["X", ["a"]], ["A", ["bbbbb"]]] . cast "key" (Value_Type.Char size=50) . cast "X" (Value_Type.Char size=1) . cast "A" (Value_Type.Char size=5 variable_length=False)
lookup2 = table_builder [["key", ["0"]], ["X2", ["ccc"]], ["A", ["dddd"]]] . cast "key" (Value_Type.Char size=100) . cast "X2" (Value_Type.Char size=3 variable_length=False) . cast "A" (Value_Type.Char size=4 variable_length=False)
table2.at "key" . value_type . should_equal (Value_Type.Char size=50 variable_length=True)
table2.at "X" . value_type . should_equal (Value_Type.Char size=1 variable_length=True)
table2.at "A" . value_type . should_equal (Value_Type.Char size=5 variable_length=False)
lookup2.at "key" . value_type . should_equal (Value_Type.Char size=100 variable_length=True)
lookup2.at "X2" . value_type . should_equal (Value_Type.Char size=3 variable_length=False)
lookup2.at "A" . value_type . should_equal (Value_Type.Char size=4 variable_length=False)
[True, False].each allow_unmatched_rows->
t3 = table2.lookup_and_replace lookup2 key_columns=["key"] allow_unmatched_rows=allow_unmatched_rows
t3.column_names . should_equal ["key", "X", "A", "X2"]
# The key column keeps its original type
t3.at "key" . value_type . should_equal (Value_Type.Char size=50 variable_length=True)
t3.at "X" . value_type . should_equal (Value_Type.Char size=1 variable_length=True)
t3.at "X2" . value_type . should_equal (Value_Type.Char size=3 variable_length=False)
case allow_unmatched_rows of
True ->
# The merged column gets a merged type (max size in that case, but gets variable length due to differences), because it may contain both old and new values.
t3.at "A" . value_type . should_equal (Value_Type.Char size=5 variable_length=True)
False ->
# If unmatched rows are not allowed, we can guarantee only _new_ values (from the lookup table) will be in the result, so instead of merging the type we inherit the type from the lookup table.
t3.at "A" . value_type . should_equal (Value_Type.Char size=4 variable_length=False)
Test.specify "will report Floating_Point_Equality if floating-point columns are used as key" <|
lookup = table_builder [["X", [1.0, 2.0, 3.0]], ["Y", ["A", "B", "C"]]]
my_table = table_builder [["X", [2.0, 3.0, 2.0, 3.0]], ["Y", ["Z", "ZZ", "ZZZ", "ZZZZ"]], ["Z", [10, 20, 30, 40]]]
@ -224,6 +308,17 @@ spec setup =
m2.at "Y" . to_vector . should_equal ["B", Nothing, Nothing, Nothing]
m2.at "Z" . to_vector . should_equal [10, 20, 30, 40]
# But NULLs in source are ok, but mean such rows will never match:
r3 = my_table2.lookup_and_replace lookup2 key_columns="X" allow_unmatched_rows=False
r3.should_fail_with Unmatched_Rows_In_Lookup
Test.specify "will not allow providing no key_columns" <|
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
my_table = table_builder [["X", [2, 1]], ["Z", [10, 20]]]
r2 = my_table.lookup_and_replace lookup key_columns=[]
r2.should_fail_with Illegal_Argument
if setup.is_database.not then Test.specify "(in-memory only) will preserve the order of rows from the original table" <|
lookup = table_builder [["Y", [1, 0]], ["V", ["TRUE", "FALSE"]]]
xs = 0.up_to 50 . to_vector
@ -237,3 +332,106 @@ spec setup =
t2.at "Y" . to_vector . should_equal ys
vs = xs.map x-> if (x%2) == 1 then "TRUE" else "FALSE"
t2.at "V" . to_vector . should_equal vs
if setup.is_database then Test.specify "(database-only) will fail if pre-checked invariants get invalidated between the query is constructed and then materialized" <|
Test.with_clue "(lookup is unique check) " <|
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
table = table_builder [["X", [1, 2, 2]], ["Y", ["Z", "ZZ", "ZZZ"]], ["Z", [10, 20, 30]]]
r1 = table.lookup_and_replace lookup key_columns="X"
# Immediately, the query is all good.
Problems.assume_no_problems r1
m1 = r1 |> materialize |> _.order_by "Z"
m1.at "X" . to_vector . should_equal [1, 2, 2]
m1.at "Y" . to_vector . should_equal ["A", "B", "B"]
m1.at "Z" . to_vector . should_equal [10, 20, 30]
# Now, we change the table, so that the lookup is no longer unique - even if the values are the same - we care about row uniqueness.
Problems.assume_no_problems <|
lookup.update_rows (Table.new [["X", [2]], ["Y", ["B"]]]) key_columns=[] update_action=Update_Action.Insert
lookup.filter "X" (Filter_Condition.Equal to=2) . row_count . should_equal 2
# Now, the query is no longer valid:
m2 = r1 |> materialize
m2.should_fail_with Invariant_Violation
m2.catch.to_display_text . should_contain "database has been modified"
m2.catch.to_display_text . should_contain "re-run the workflow"
## Now, even if we drop the lookup-related columns, we still should get an error
(otherwise we would risk duplicating records due to the non-uniqueness!)
r3 = r1.select_columns ["Z"]
Problems.assume_no_problems r3
m3 = r3 |> materialize
## Alternatively, theoretically, this query could succeed
(knowing that it no longer depends on the lookup at all).
But if it does so, we need to guarantee that we do not get duplicated rows in the result:
r3.row_count . should_equal 3
r3.at "Z" . to_vector . length . should_equal 3
r3.at "Z" . to_vector . should_contain_the_same_elements_as [10, 20, 30]
m3.should_fail_with Invariant_Violation
r3.at "Z" . to_vector . should_fail_with Invariant_Violation
Test.with_clue "(no unmatched rows check - added a row in source) " <|
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
table = table_builder [["X", [1, 2, 2]], ["Y", ["Z", "ZZ", "ZZZ"]], ["Z", [10, 20, 30]]]
r1 = table.lookup_and_replace lookup key_columns="X" allow_unmatched_rows=False
# Immediately, the query is all good.
Problems.assume_no_problems r1
m1 = r1 |> materialize |> _.order_by "Z"
m1.at "X" . to_vector . should_equal [1, 2, 2]
m1.at "Y" . to_vector . should_equal ["A", "B", "B"]
m1.at "Z" . to_vector . should_equal [10, 20, 30]
# Now, we change the source table, so that there are unmatched rows:
Problems.assume_no_problems <|
table.update_rows (Table.new [["X", [33]], ["Y", ["F"]], ["Z", [40]]]) key_columns=[] update_action=Update_Action.Insert
table.row_count . should_equal 4
# Now, the query is no longer valid:
m2 = r1 |> materialize
m2.should_fail_with Invariant_Violation
Test.with_clue "(no unmatched rows check - removed a row in lookup) " <|
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
table = table_builder [["X", [1, 2, 2]], ["Y", ["Z", "ZZ", "ZZZ"]], ["Z", [10, 20, 30]]]
r1 = table.lookup_and_replace lookup key_columns="X" allow_unmatched_rows=False
# Immediately, the query is all good.
Problems.assume_no_problems r1
m1 = r1 |> materialize |> _.order_by "Z"
m1.at "X" . to_vector . should_equal [1, 2, 2]
m1.at "Y" . to_vector . should_equal ["A", "B", "B"]
m1.at "Z" . to_vector . should_equal [10, 20, 30]
# Now, we change the lookup table, so that there are unmatched rows:
Problems.assume_no_problems <|
lookup.delete_rows (Table.new [["X", [1]]]) key_columns=["X"]
lookup.row_count . should_equal 1
# Now, the query is no longer valid:
m2 = r1 |> materialize
m2.should_fail_with Invariant_Violation
# This does not seem useful really, but there is no reason to disallow it, so we should ensure it does not crash.
Test.specify "(edge-case) should allow lookup with itself" <|
table = table_builder [["X", [1, 2, 3]], ["Y", ["A", "B", "C"]]]
t2 = table.lookup_and_replace table key_columns="X"
t2.column_names . should_equal ["X", "Y"]
m2 = t2 |> materialize |> _.order_by "X"
m2.at "X" . to_vector . should_equal [1, 2, 3]
m2.at "Y" . to_vector . should_equal ["A", "B", "C"]
Test.specify "should gracefully handle tables from different backends" <|
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
alternative_connection = Database.connect (SQLite In_Memory)
t0 = (Table.new [["A", [3, 2, 1]], ["B", ["x", "y", "z"]]]).select_into_database_table alternative_connection "T0" temporary=True
r1 = t1.lookup_and_replace t0 key_columns="A"
r1.should_fail_with Illegal_Argument

View File

@ -1,10 +1,12 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Table.Data.Type.Value_Type.Bits
from Standard.Table import all
from Standard.Table.Errors import all
from Standard.Database.Errors import Unsupported_Database_Operation
from Standard.Database import all
from Standard.Database.Errors import Unsupported_Database_Operation, Integrity_Error
from Standard.Test import Test, Problems
import Standard.Test.Extensions
@ -393,3 +395,12 @@ spec setup =
# And this should report Column_Type_Mismatch as the more important error too.
#t1.union t2 allow_type_widening=False on_problems=Problem_Behavior.Report_Error . should_fail_with Column_Type_Mismatch
t1.union t2 allow_type_widening=False on_problems=Problem_Behavior.Report_Error . should_fail_with No_Output_Columns
Test.specify "should gracefully handle tables from different backends" <|
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
alternative_connection = Database.connect (SQLite In_Memory)
t0 = (Table.new [["A", [1, 2, 4]], ["B", ["10", "20", "30"]]]).select_into_database_table alternative_connection "T0" temporary=True
r1 = t1.union t0
r1.should_fail_with Illegal_Argument

View File

@ -1,10 +1,12 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
from Standard.Table import all hiding Table
from Standard.Table import all
from Standard.Table.Errors import all
from Standard.Database.Errors import Unsupported_Database_Operation
from Standard.Database import all
from Standard.Database.Errors import Unsupported_Database_Operation, Integrity_Error
from Standard.Test import Test, Problems
import Standard.Test.Extensions
@ -236,3 +238,12 @@ spec setup =
padded.at "times" . value_type . should_equal Value_Type.Time
padded.at "datetimes" . value_type . should_equal Value_Type.Date_Time
padded.at "mixed" . value_type . should_equal Value_Type.Mixed
Test.specify "should gracefully handle tables from different backends" <|
t1 = table_builder [["X", [1, 2, 3]], ["Y", [4, 5, 6]]]
alternative_connection = Database.connect (SQLite In_Memory)
t0 = (Table.new [["W", [1, 2, 4]]]).select_into_database_table alternative_connection "T0" temporary=True
r1 = t1.zip t0
r1.should_fail_with Illegal_Argument

View File

@ -135,6 +135,54 @@ run_tests prefix connection upload =
r3 = connection.query "SELECT * FROM ........"
r3.should_fail_with SQL_Error
Test.specify "will fail if the table is modified and a column gets removed" <|
name = Name_Generator.random_name "removing-column"
Problems.assume_no_problems <|
(Table.new [["a", [1, 2, 3]], ["b", [4, 5, 6]]]).select_into_database_table connection name temporary=True
t1 = connection.query name
m1 = t1.read
Problems.assume_no_problems m1
m1.at "a" . to_vector . should_equal [1, 2, 3]
m1.at "b" . to_vector . should_equal [4, 5, 6]
Problems.assume_no_problems <| connection.drop_table name
Problems.assume_no_problems <|
(Table.new [["a", [100, 200]]]).select_into_database_table connection name temporary=True
# Reading a column that was kept will work OK
t1.at "a" . to_vector . should_equal [100, 200]
# But reading the whole table will fail on the missing column:
m2 = t1.read
m2.should_fail_with SQL_Error
Test.specify "will not fail if the table is modified and a column gets added" <|
name = Name_Generator.random_name "adding-column"
Problems.assume_no_problems <|
(Table.new [["a", [1, 2, 3]], ["b", [4, 5, 6]]]).select_into_database_table connection name temporary=True
t1 = connection.query name
m1 = t1.read
Problems.assume_no_problems m1
m1.at "a" . to_vector . should_equal [1, 2, 3]
m1.at "b" . to_vector . should_equal [4, 5, 6]
Problems.assume_no_problems <| connection.drop_table name
Problems.assume_no_problems <|
(Table.new [["a", [100, 200]], ["b", [300, 400]], ["c", [500, 600]]]).select_into_database_table connection name temporary=True
m2 = t1.read
Problems.assume_no_problems m2
m2.column_names . should_equal ["a", "b"]
m2.at "a" . to_vector . should_equal [100, 200]
m2.at "b" . to_vector . should_equal [300, 400]
t1.at "c" . should_fail_with No_Such_Column
t2 = connection.query name
t2.column_names . should_equal ["a", "b", "c"]
Test.group prefix+"Masking Tables" <|
Test.specify "should allow to select rows from a table or column based on an expression" <|
t2 = t1.filter (t1.at "a" == 1)