mirror of
https://github.com/enso-org/enso.git
synced 2024-12-27 18:54:03 +03:00
Implement Table.lookup_and_replace
in Database (#8146)
- Closes #7981 - Adds a `RUNTIME_ERROR` operation into the DB dialect, that may be used to 'crash' a query if a condition is met - used to validate if `lookup_and_replace` invariants are still satisfied when the query is materialized. - Removes old `Table_Helpers.is_table` and `same_backend` checks, in favour of the new way of checking this that relies on `Table.from` conversions, and is much simpler to use and also more robust.
This commit is contained in:
parent
660f4b35ce
commit
79011bd550
@ -11,3 +11,7 @@ type Redshift_Error_Mapper
|
||||
# Currently not implemented, skipping the error recognition.
|
||||
_ = error
|
||||
False
|
||||
|
||||
## PRIVATE
|
||||
transform_custom_errors : SQL_Error -> Any
|
||||
transform_custom_errors error = error
|
||||
|
@ -29,7 +29,6 @@ import Standard.Table.Internal.Add_Row_Number
|
||||
import Standard.Table.Internal.Aggregate_Column_Helper
|
||||
import Standard.Table.Internal.Column_Naming_Helper.Column_Naming_Helper
|
||||
import Standard.Table.Internal.Constant_Column.Constant_Column
|
||||
import Standard.Table.Internal.Lookup_Helpers
|
||||
import Standard.Table.Internal.Problem_Builder.Problem_Builder
|
||||
import Standard.Table.Internal.Table_Helpers
|
||||
import Standard.Table.Internal.Table_Helpers.Table_Column_Helper
|
||||
@ -50,6 +49,7 @@ import project.Data.Take_Drop_Helpers
|
||||
import project.Internal.Aggregate_Helper
|
||||
import project.Internal.Base_Generator
|
||||
import project.Internal.Common.Database_Join_Helper
|
||||
import project.Internal.Common.Lookup_Query_Helper
|
||||
import project.Internal.Helpers
|
||||
import project.Internal.IR.Context.Context
|
||||
import project.Internal.IR.From_Spec.From_Spec
|
||||
@ -61,7 +61,7 @@ import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
|
||||
import project.Internal.SQL_Type_Reference.SQL_Type_Reference
|
||||
from project.Data.Take_Drop_Helpers import Take_Drop
|
||||
from project.Errors import Integrity_Error, Table_Not_Found, Unsupported_Database_Operation
|
||||
from project.Errors import Integrity_Error, Table_Not_Found, Unsupported_Database_Operation, SQL_Error
|
||||
|
||||
polyglot java import java.sql.JDBCType
|
||||
polyglot java import java.util.UUID
|
||||
@ -1212,16 +1212,12 @@ type Table
|
||||
## PRIVATE
|
||||
Implementation of both `join` and `cross_join`.
|
||||
join_or_cross_join : Table -> Join_Kind | Join_Kind_Cross -> Vector (Join_Condition | Text) | Text -> Text -> Problem_Behavior -> Table
|
||||
join_or_cross_join self right join_kind on right_prefix on_problems =
|
||||
can_proceed = if Table_Helpers.is_table right . not then Error.throw (Type_Error.Error Table right "right") else
|
||||
same_backend = case right of
|
||||
_ : Table -> True
|
||||
_ -> False
|
||||
join_or_cross_join self right:Table join_kind on right_prefix on_problems =
|
||||
can_proceed = Helpers.ensure_same_connection "table" [self, right] <|
|
||||
join_conditions_ok = join_kind != Join_Kind_Cross.Cross || on == []
|
||||
if same_backend . not then Error.throw (Illegal_Argument.Error "Currently cross-backend joins are not supported. You need to upload the in-memory table before joining it with a database one, or materialize this table.") else
|
||||
if join_conditions_ok . not then Error.throw (Illegal_Argument.Error "Cross join does not allow join conditions") else
|
||||
True
|
||||
if can_proceed then
|
||||
if join_conditions_ok . not then Error.throw (Illegal_Argument.Error "Cross join does not allow join conditions") else
|
||||
True
|
||||
can_proceed.if_not_error <|
|
||||
left = self
|
||||
table_name_deduplicator = self.connection.base_connection.table_naming_helper.create_unique_name_strategy
|
||||
table_name_deduplicator.mark_used [left.name, right.name]
|
||||
@ -1314,14 +1310,13 @@ type Table
|
||||
|
||||
The ordering of rows in the resulting table is not specified.
|
||||
cross_join : Table -> Integer | Nothing -> Text -> Problem_Behavior -> Table
|
||||
cross_join self right right_row_limit=100 right_prefix="Right " on_problems=Report_Warning =
|
||||
if check_db_table "right" right then
|
||||
limit_problems = case right_row_limit.is_nothing.not && (right.row_count > right_row_limit) of
|
||||
True ->
|
||||
[Cross_Join_Row_Limit_Exceeded.Error right_row_limit right.row_count]
|
||||
False -> []
|
||||
on_problems.attach_problems_before limit_problems <|
|
||||
self.join_or_cross_join right join_kind=Join_Kind_Cross.Cross on=[] right_prefix on_problems
|
||||
cross_join self right:Table right_row_limit=100 right_prefix="Right " on_problems=Report_Warning =
|
||||
limit_problems = case right_row_limit.is_nothing.not && (right.row_count > right_row_limit) of
|
||||
True ->
|
||||
[Cross_Join_Row_Limit_Exceeded.Error right_row_limit right.row_count]
|
||||
False -> []
|
||||
on_problems.attach_problems_before limit_problems <|
|
||||
self.join_or_cross_join right join_kind=Join_Kind_Cross.Cross on=[] right_prefix on_problems
|
||||
|
||||
## Replaces values in this table by values from a lookup table.
|
||||
New values are looked up in the lookup table based on the `key_columns`.
|
||||
@ -1358,8 +1353,8 @@ type Table
|
||||
specified in `key_columns`, a `Missing_Input_Columns` error is raised.
|
||||
- If an empty vector is provided for `key_columns`, a
|
||||
`No_Input_Columns_Selected` error is raised.
|
||||
- If the lookup table contains multiple rows with the same values in
|
||||
the `key_columns`, an `Non_Unique_Key` error is raised.
|
||||
- If a single row is matched by multiple entries in the lookup table,
|
||||
a `Non_Unique_Key` error is raised.
|
||||
- If a column that is being updated from the lookup table has a type
|
||||
that is not compatible with the type of the corresponding column in
|
||||
this table, a `No_Common_Type` error is raised.
|
||||
@ -1377,8 +1372,8 @@ type Table
|
||||
@key_columns Widget_Helpers.make_column_name_vector_selector
|
||||
lookup_and_replace : Table -> (Vector (Integer | Text | Regex) | Text | Integer | Regex) -> Boolean -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup
|
||||
lookup_and_replace self lookup_table:Table key_columns:(Vector (Integer | Text | Regex) | Text | Integer | Regex) add_new_columns:Boolean=True allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning =
|
||||
_ = [lookup_table, key_columns, add_new_columns, allow_unmatched_rows, on_problems]
|
||||
Error.throw (Unsupported_Database_Operation.Error "Table.lookup_and_replace is not implemented yet for the Database backends.")
|
||||
Helpers.ensure_same_connection "table" [self, lookup_table] <|
|
||||
Lookup_Query_Helper.build_lookup_query self lookup_table key_columns add_new_columns allow_unmatched_rows on_problems
|
||||
|
||||
## ALIAS join by row position
|
||||
GROUP Standard.Base.Calculations
|
||||
@ -1522,11 +1517,11 @@ type Table
|
||||
retyped to the `Mixed` type to indicate that intention. Note that the
|
||||
`Mixed` type may not be supported by most Database backends.
|
||||
union : (Table | Vector Table) -> Match_Columns -> Boolean | Report_Unmatched -> Boolean -> Problem_Behavior -> Table
|
||||
union self tables match_columns=Match_Columns.By_Name keep_unmatched_columns=Report_Unmatched allow_type_widening=True on_problems=Report_Warning =
|
||||
union self tables:Vector|Table match_columns=Match_Columns.By_Name keep_unmatched_columns=Report_Unmatched allow_type_widening=True on_problems=Report_Warning =
|
||||
all_tables = case tables of
|
||||
v : Vector -> [self] + v
|
||||
v : Vector -> [self] + (v.map t-> Table.from t)
|
||||
single_table -> [self, single_table]
|
||||
all_tables.all (check_db_table "tables") . if_not_error <|
|
||||
Helpers.ensure_same_connection "table" all_tables <|
|
||||
problem_builder = Problem_Builder.new
|
||||
matched_column_sets = Match_Columns_Helpers.match_columns all_tables match_columns keep_unmatched_columns problem_builder
|
||||
dialect = self.connection.dialect
|
||||
@ -2154,7 +2149,8 @@ type Table
|
||||
False ->
|
||||
sql = preprocessed.to_sql
|
||||
column_type_suggestions = preprocessed.internal_columns.map .sql_type_reference
|
||||
materialized_table = self.connection.read_statement sql column_type_suggestions
|
||||
materialized_table = self.connection.read_statement sql column_type_suggestions . catch SQL_Error sql_error->
|
||||
Error.throw (self.connection.dialect.get_error_mapper.transform_custom_errors sql_error)
|
||||
|
||||
expected_types = self.columns.map .value_type
|
||||
actual_types = materialized_table.columns.map .value_type
|
||||
@ -2527,19 +2523,6 @@ type Wrapped_Error
|
||||
## PRIVATE
|
||||
Value value
|
||||
|
||||
## PRIVATE
|
||||
Checks if the argument is a proper table and comes from the current backend.
|
||||
It returns True or throws a dataflow error explaining the issue.
|
||||
|
||||
TODO [RW]: this should check that the tables are on the same connection
|
||||
check_db_table arg_name table =
|
||||
if Table_Helpers.is_table table . not then Error.throw (Type_Error.Error Table table arg_name) else
|
||||
same_backend = table.is_a Table
|
||||
case same_backend of
|
||||
False ->
|
||||
Error.throw (Illegal_Argument.Error "Currently cross-backend operations are not supported. Materialize the table using `.read` before mixing it with an in-memory Table.")
|
||||
True -> True
|
||||
|
||||
## PRIVATE
|
||||
By default, join on the first column, unless it's a cross join, in which
|
||||
case there are no join conditions.
|
||||
@ -2553,5 +2536,10 @@ Materialized_Table.from (that:Table) =
|
||||
_ = [that]
|
||||
Error.throw (Illegal_Argument.Error "Currently cross-backend operations are not supported. Materialize the table using `.read` before mixing it with an in-memory Table.")
|
||||
|
||||
## PRIVATE
|
||||
Table.from (that:Materialized_Table) =
|
||||
_ = [that]
|
||||
Error.throw (Illegal_Argument.Error "Currently cross-backend operations are not supported. Either materialize the other table using `.read` or upload the table into the database using `.select_into_database_table`.")
|
||||
|
||||
## PRIVATE
|
||||
Table_Ref.from (that:Table) = Table_Ref.Value that
|
||||
|
@ -203,3 +203,27 @@ type Unsupported_Database_Encoding
|
||||
Pretty print the unsupported database encoding warning.
|
||||
to_display_text : Text
|
||||
to_display_text self = self.message
|
||||
|
||||
## Indicates that the database has been modified between the time the query was
|
||||
prepared and the time it was executed, breaking an expected invariant and
|
||||
potentially causing data corruption.
|
||||
|
||||
The query needs to be rebuilt again.
|
||||
type Invariant_Violation
|
||||
## PRIVATE
|
||||
Indicates that the database has been modified between the time the query
|
||||
was prepared and the time it was executed, breaking an expected invariant
|
||||
and potentially causing data corruption.
|
||||
|
||||
Arguments:
|
||||
- message: A message describing the broken invariant, if available.
|
||||
It may be set to `Nothing` if the backend cannot decode the invariant
|
||||
message from the SQL error.
|
||||
- original_cause: The original SQL error that this error has been
|
||||
translated from.
|
||||
Error (message:Text|Nothing) (original_cause:SQL_Error)
|
||||
|
||||
## PRIVATE
|
||||
to_display_text : Text
|
||||
to_display_text self =
|
||||
"The database has been modified between the time the query was prepared and executed, leading to possibly incorrect results. Please re-run the workflow to retry."
|
||||
|
@ -193,7 +193,7 @@ base_dialect =
|
||||
nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
|
||||
contains = [["IS_IN", make_is_in], ["IS_IN_COLUMN", make_is_in_column]]
|
||||
types = [simple_cast]
|
||||
windows = [["ROW_NUMBER", make_row_number]]
|
||||
windows = [["ROW_NUMBER", make_row_number], ["ROW_NUMBER_IN_GROUP", make_row_number_in_group]]
|
||||
base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains + types + windows)
|
||||
Internal_Dialect.Value base_map wrap_in_quotes
|
||||
|
||||
@ -219,7 +219,7 @@ make_iif arguments = case arguments.length of
|
||||
An IR expression for constructing SQL `CASE` expressions.
|
||||
case_when : Vector Builder -> Builder
|
||||
case_when arguments =
|
||||
if arguments.length < 4 then Error.throw (Illegal_State.Error "CASE_WHEN needs at least 3 arguments.") else
|
||||
if arguments.length < 3 then Error.throw (Illegal_State.Error "CASE_WHEN needs at least 3 arguments.") else
|
||||
fallback = arguments.last
|
||||
cases = arguments.drop (Last 1)
|
||||
if cases.length % 2 != 0 then Error.throw (Illegal_State.Error "CASE_WHEN expects an odd number of arguments (two arguments for each case and a fallback).") else
|
||||
@ -286,6 +286,14 @@ make_row_number (arguments : Vector) (metadata : Row_Number_Metadata) = if argum
|
||||
Builder.code "PARTITION BY " ++ Builder.join ", " grouping
|
||||
Builder.code "(row_number() OVER (" ++ group_part ++ " ORDER BY " ++ Builder.join ", " ordering ++ ") * " ++ step.paren ++ " + " ++ offset.paren ++ ")"
|
||||
|
||||
## PRIVATE
|
||||
A helper for `lookup_and_replace`, and perhaps other operation.
|
||||
It creates an expression that returns a row number within a group.
|
||||
make_row_number_in_group arguments =
|
||||
if arguments.length == 0 then
|
||||
Panic.throw <| Illegal_State.Error "The operation ROW_NUMBER_IN_GROUP requires at least one argument."
|
||||
Builder.code "row_number() OVER (PARTITION BY " ++ (Builder.join ", " arguments) ++ ")"
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Builds code for an expression.
|
||||
@ -299,6 +307,9 @@ generate_expression dialect expr = case expr of
|
||||
dialect.wrap_identifier origin ++ '.' ++ dialect.wrap_identifier name
|
||||
SQL_Expression.Constant value -> Builder.interpolation value
|
||||
SQL_Expression.Literal value -> Builder.code value
|
||||
SQL_Expression.Text_Literal value ->
|
||||
escaped = value.replace "'" "''"
|
||||
Builder.code ("'" + escaped + "'")
|
||||
SQL_Expression.Operation kind arguments metadata ->
|
||||
op = dialect.operation_map.get kind (Error.throw <| Unsupported_Database_Operation.Error kind)
|
||||
parsed_args = arguments.map (generate_expression dialect)
|
||||
|
@ -0,0 +1,239 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||
from Standard.Base.Runtime import assert
|
||||
|
||||
import Standard.Table.Internal.Lookup_Helpers
|
||||
import Standard.Table.Internal.Lookup_Helpers.Lookup_Column
|
||||
from Standard.Table import Join_Kind, Value_Type
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
import project.Data.Table.Table
|
||||
import project.Internal.IR.Context.Context
|
||||
import project.Internal.IR.From_Spec.From_Spec
|
||||
import project.Internal.IR.Internal_Column.Internal_Column
|
||||
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
|
||||
import project.Internal.SQL_Type_Reference.SQL_Type_Reference
|
||||
from project.Internal.Upload_Table import check_for_null_keys
|
||||
|
||||
## PRIVATE
|
||||
Implementation of `lookup_and_replace` for Database backend.
|
||||
See `Table.lookup_and_replace` for more details.
|
||||
build_lookup_query : Table -> Table -> (Vector (Integer | Text | Regex) | Text | Integer | Regex) -> Boolean -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup
|
||||
build_lookup_query base_table lookup_table key_columns add_new_columns allow_unmatched_rows on_problems =
|
||||
lookup_columns = Lookup_Helpers.prepare_columns_for_lookup base_table lookup_table key_columns add_new_columns allow_unmatched_rows on_problems
|
||||
lookup_columns.if_not_error <| check_initial_invariants base_table lookup_table lookup_columns allow_unmatched_rows <|
|
||||
column_naming_helper = base_table.connection.base_connection.column_naming_helper
|
||||
unique_name_strategy = column_naming_helper.create_unique_name_strategy
|
||||
unique_name_strategy.mark_used base_table.column_names
|
||||
unique_name_strategy.mark_used lookup_table.column_names
|
||||
|
||||
subquery_setup = prepare_subqueries base_table lookup_table lookup_columns unique_name_strategy
|
||||
|
||||
# We need to construct the context _before_ constructing the columns, to be able to use it in column construction (for inferring types).
|
||||
new_ctx = make_context_for_lookup_join lookup_columns subquery_setup
|
||||
infer_type_in_result expr =
|
||||
SQL_Type_Reference.new base_table.connection new_ctx expr
|
||||
|
||||
## TODO [RW] here we will perform as many fetches as there are
|
||||
Replace_Column instances, but technically we could perform just
|
||||
one fetch fetching all column types - TODO we should do that.
|
||||
See #6118.
|
||||
new_columns = lookup_columns.map_with_index ix-> c-> case c of
|
||||
Lookup_Column.Key_Column _ _ -> subquery_setup.get_self_column ix
|
||||
Lookup_Column.Keep_Column _ -> subquery_setup.get_self_column ix
|
||||
Lookup_Column.Replace_Column _ _ expected_type ->
|
||||
dialect = base_table.connection.dialect
|
||||
subquery_setup.create_merged_column ix expected_type dialect infer_type_in_result allow_unmatched_rows
|
||||
Lookup_Column.Add_Column _ -> subquery_setup.get_lookup_column ix
|
||||
|
||||
## Originally, I wanted to add invariant checks to all columns (or some of them),
|
||||
but storing it in WHERE has 2 benefits:
|
||||
1. The query is actually _simpler_ - instead of having the check in the expression for _every_ column
|
||||
or multiple columns, we have it only once.
|
||||
2. We have a guarantee that the query optimizer will not remove it (which could happen if it was a separate
|
||||
column, or was added only to some column and these columns got removed in subsequent steps) - which is
|
||||
paramount to ensuring that our rows will not get duplicated in case the key uniqueness gets violated.
|
||||
new_ctx_with_invariant_check = new_ctx.add_where_filters [make_invariant_check subquery_setup.lookup_counter allow_unmatched_rows]
|
||||
|
||||
precheck_for_duplicate_matches lookup_columns subquery_setup base_table.connection new_ctx <|
|
||||
Table.Value subquery_setup.new_table_name base_table.connection new_columns new_ctx_with_invariant_check
|
||||
|
||||
## PRIVATE
|
||||
Checks if they key contains NULL values or if there would be unmatched rows
|
||||
(if `allow_unmatched_rows` is `False`), and reports any errors.
|
||||
check_initial_invariants : Table -> Table -> Vector Lookup_Column -> Boolean -> Any -> Any
|
||||
check_initial_invariants base_table lookup_table lookup_columns allow_unmatched_rows ~continuation =
|
||||
key_column_names = lookup_columns.flat_map c-> case c of
|
||||
Lookup_Column.Key_Column base_column lookup_column ->
|
||||
assert (base_column.name == lookup_column.name) "Columns should have equal names."
|
||||
[lookup_column.name]
|
||||
_ -> []
|
||||
check_for_null_keys lookup_table key_column_names <|
|
||||
if allow_unmatched_rows then continuation else
|
||||
unmatched_rows = base_table.join lookup_table on=key_column_names join_kind=Join_Kind.Left_Exclusive . select_columns key_column_names
|
||||
unmatched_example = unmatched_rows.read max_rows=1
|
||||
if unmatched_example.row_count == 0 then continuation else
|
||||
first_row = unmatched_example.rows.first
|
||||
Error.throw (Unmatched_Rows_In_Lookup.Error first_row.to_vector)
|
||||
|
||||
## PRIVATE
|
||||
Prepares the lookup table counter that is used for two things:
|
||||
1. It allows us to check if a given row had found a match in the lookup row
|
||||
and decide which value to use in `Replace_Column` case (the counter will
|
||||
be `1` if there is a match and `NULL` if there is not).
|
||||
2. It allows us to find duplicate matches - if a row with counter >1 is found,
|
||||
that means that a single row has matched multiple rows in the lookup table
|
||||
and we should report an error.
|
||||
make_lookup_counter_column dialect lookup_columns unique_name_strategy =
|
||||
grouping_expressions = lookup_columns.flat_map c-> case c of
|
||||
Lookup_Column.Key_Column _ lookup_column -> [lookup_column.expression]
|
||||
_ -> []
|
||||
row_number_expression = SQL_Expression.Operation "ROW_NUMBER_IN_GROUP" grouping_expressions
|
||||
sql_type = dialect.get_type_mapping.value_type_to_sql Value_Type.Integer Problem_Behavior.Ignore
|
||||
Internal_Column.Value (unique_name_strategy.make_unique "lookup_counter") (SQL_Type_Reference.from_constant sql_type) row_number_expression
|
||||
|
||||
## PRIVATE
|
||||
Gathers information about prepared subqueries in one place - to make it
|
||||
easier to pass this context between various helper functions.
|
||||
|
||||
The setup is prepared in such a way, that for each `Lookup_Column` we have a
|
||||
vector of base table columns and lookup table columns corresponding to it.
|
||||
Depending on the type of the `Lookup_Column`, one of these vectors may be
|
||||
empty. The helper functions `get_self_column` and `get_lookup_column` allow
|
||||
easy access of the corresponding columns for the given index - this should be
|
||||
used with `lookup_columns.map_with_index`.
|
||||
type Lookup_Subquery_Setup
|
||||
## PRIVATE
|
||||
Value self_sub lookup_sub lookup_counter new_table_name
|
||||
|
||||
## PRIVATE
|
||||
get_self_column self ix =
|
||||
self.self_sub.new_columns.at ix . first
|
||||
|
||||
## PRIVATE
|
||||
get_lookup_column self ix =
|
||||
self.lookup_sub.new_columns.at ix . first
|
||||
|
||||
## PRIVATE
|
||||
Creates an `Internal_Column` representing the `Replace_Column` scenario:
|
||||
taking data from lookup table if it was matched, and from base table
|
||||
otherwise.
|
||||
This method also ensure that the column has the expected type, unifying
|
||||
types of the two sources.
|
||||
create_merged_column self ix expected_type dialect infer_type_in_result allow_unmatched_rows =
|
||||
self_col = self.get_self_column ix
|
||||
lookup_col = self.get_lookup_column ix
|
||||
is_lookup_found = SQL_Expression.Operation "==" [self.lookup_counter.expression, SQL_Expression.Literal "1"]
|
||||
expression = case allow_unmatched_rows of
|
||||
True -> SQL_Expression.Operation "CASE" [is_lookup_found, lookup_col.expression, self_col.expression]
|
||||
False -> Error.throw (Illegal_State.Error "Assumed that prepare_columns_for_lookup never returns Replace_Column if allow_unmatched_rows=False. This is a bug in the Database library.")
|
||||
input_column = Internal_Column.Value self_col.name (infer_type_in_result expression) expression
|
||||
adapted = dialect.adapt_unified_column input_column expected_type infer_type_in_result
|
||||
Internal_Column.Value self_col.name adapted.sql_type_reference adapted.expression
|
||||
|
||||
## PRIVATE
|
||||
Wraps the two source tables into subqueries and creates a set of new columns
|
||||
(encapsulated as `Lookup_Subquery_Setup`), together with the `lookup_counter`
|
||||
column (see `make_lookup_counter_column`), that are valid in the context of a
|
||||
lookup join query (it translates the source columns valid in the input
|
||||
contexts, to the external join context).
|
||||
prepare_subqueries base_table lookup_table lookup_columns unique_name_strategy =
|
||||
table_name_deduplicator = base_table.connection.base_connection.table_naming_helper.create_unique_name_strategy
|
||||
self_alias = table_name_deduplicator.make_unique base_table.name
|
||||
lookup_alias = table_name_deduplicator.make_unique lookup_table.name
|
||||
new_table_name = table_name_deduplicator.make_unique <|
|
||||
base_table.name + "_" + lookup_table.name
|
||||
|
||||
lookup_counter_base = make_lookup_counter_column lookup_table.connection.dialect lookup_columns unique_name_strategy
|
||||
|
||||
self_requested_columns = Vector.new_builder
|
||||
lookup_requested_columns = Vector.new_builder
|
||||
|
||||
lookup_columns.each c-> case c of
|
||||
Lookup_Column.Key_Column base_column key_column ->
|
||||
self_requested_columns.append [base_column]
|
||||
lookup_requested_columns.append [key_column]
|
||||
Lookup_Column.Keep_Column base_column ->
|
||||
self_requested_columns.append [base_column]
|
||||
lookup_requested_columns.append []
|
||||
Lookup_Column.Replace_Column base_column lookup_column _ ->
|
||||
self_requested_columns.append [base_column]
|
||||
lookup_requested_columns.append [lookup_column]
|
||||
Lookup_Column.Add_Column lookup_column ->
|
||||
self_requested_columns.append []
|
||||
lookup_requested_columns.append [lookup_column]
|
||||
|
||||
self_sub = base_table.context.as_subquery self_alias self_requested_columns.to_vector
|
||||
lookup_sub = lookup_table.context.as_subquery lookup_alias lookup_requested_columns.to_vector+[[lookup_counter_base]]
|
||||
lookup_counter = lookup_sub.new_columns.at -1 . first
|
||||
|
||||
Lookup_Subquery_Setup.Value self_sub lookup_sub lookup_counter new_table_name
|
||||
|
||||
## PRIVATE
|
||||
Creates a context for the lookup join query.
|
||||
make_context_for_lookup_join lookup_columns subquery_setup =
|
||||
on_expressions = (_.flatten) <| lookup_columns.map_with_index ix-> c-> case c of
|
||||
Lookup_Column.Key_Column _ _ ->
|
||||
self_col = subquery_setup.get_self_column ix
|
||||
lookup_col = subquery_setup.get_lookup_column ix
|
||||
[SQL_Expression.Operation "==" [self_col.expression, lookup_col.expression]]
|
||||
_ -> []
|
||||
|
||||
new_from = From_Spec.Join SQL_Join_Kind.Left subquery_setup.self_sub.subquery subquery_setup.lookup_sub.subquery on_expressions
|
||||
Context.for_subquery new_from
|
||||
|
||||
## PRIVATE
|
||||
Runs a query that checks if there are any duplicate matches in the lookup
|
||||
result. The query tries to minimize the amount of work and data that is
|
||||
transferred, but it may not be cheap. It is however needed to be able to
|
||||
report errors early.
|
||||
precheck_for_duplicate_matches lookup_columns subquery_setup connection new_ctx ~continuation =
|
||||
key_columns_for_duplicate_check = (_.flatten) <| lookup_columns.map_with_index ix-> c-> case c of
|
||||
Lookup_Column.Key_Column _ _ -> [subquery_setup.get_self_column ix]
|
||||
_ -> []
|
||||
table_for_duplicate_check = Table.Value subquery_setup.new_table_name connection [subquery_setup.lookup_counter]+key_columns_for_duplicate_check new_ctx
|
||||
duplicate_lookup_matches = table_for_duplicate_check.filter 0 (Filter_Condition.Greater than=1) . read max_rows=1
|
||||
case duplicate_lookup_matches.row_count > 0 of
|
||||
True ->
|
||||
first_example_row = duplicate_lookup_matches.read.rows.first.to_vector
|
||||
duplicate_example_count = first_example_row.first
|
||||
duplicate_example_key = first_example_row.drop 1
|
||||
Error.throw (Non_Unique_Key.Error (key_columns_for_duplicate_check.map .name) duplicate_example_key duplicate_example_count)
|
||||
False -> continuation
|
||||
|
||||
## PRIVATE
|
||||
Creates an expression that can be baked into the query (preferably in such
|
||||
a way that it will not get removed after query transformations), that will
|
||||
fail if the 'key uniqueness' invariant and the 'all rows matching'
|
||||
(if `allow_unmatched_rows=False`) are violated when the query is being
|
||||
materialized.
|
||||
|
||||
This is needed, because the initial checks are run when the query is built,
|
||||
but there is no guarantee that the table will not be changed before the query
|
||||
is actually materialized. If in the meantime, the data is modified and
|
||||
invariants are broken, the query may return completely unexpected results
|
||||
(e.g. rows of the source table could get duplicated, which should never
|
||||
happen with `lookup_and_replace`).
|
||||
|
||||
This additional checks ensures that if the invariants are broken, the query
|
||||
will fail instead of returning corrupted data. Its error reporting may be
|
||||
limited in comparison to the initial checks which may compute more
|
||||
information.
|
||||
make_invariant_check lookup_counter allow_unmatched_rows =
|
||||
make_error message =
|
||||
SQL_Expression.Operation "RUNTIME_ERROR" [SQL_Expression.Text_Literal "lookup_and_replace: "+message, lookup_counter.expression]
|
||||
|
||||
exact_match =
|
||||
is_matched = SQL_Expression.Operation "==" [lookup_counter.expression, SQL_Expression.Literal "1"]
|
||||
[is_matched, SQL_Expression.Literal "TRUE"]
|
||||
unmatched =
|
||||
is_unmatched = SQL_Expression.Operation "IS_NULL" [lookup_counter.expression]
|
||||
case allow_unmatched_rows of
|
||||
True -> [is_unmatched, SQL_Expression.Literal "TRUE"]
|
||||
False -> [is_unmatched, make_error "Some rows have no matches in the lookup table."]
|
||||
|
||||
# This will be matched if the first branches do not match - this should happen if there is more than 1 match for a row.
|
||||
fallback = [make_error "Duplicate matches found in the lookup table."]
|
||||
|
||||
SQL_Expression.Operation "CASE" unmatched+exact_match+fallback
|
@ -15,3 +15,11 @@ type Error_Mapper
|
||||
is_primary_key_violation error =
|
||||
_ = error
|
||||
Unimplemented.throw "This is an interface only."
|
||||
|
||||
## PRIVATE
|
||||
Called by `Table.read`, allowing the dialect to transform a generic
|
||||
`SQL_Error` into a more specific error type, if applicable.
|
||||
transform_custom_errors : SQL_Error -> Any
|
||||
transform_custom_errors error =
|
||||
_ = error
|
||||
Unimplemented.throw "This is an interface only."
|
||||
|
@ -1,4 +1,6 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
from Standard.Base.Runtime import assert
|
||||
|
||||
import project.Data.Column.Column
|
||||
import project.Data.Table.Table
|
||||
@ -32,7 +34,17 @@ check_integrity entity1 entity2 =
|
||||
- entity2: The entity to check against the first.
|
||||
check_connection : (Table | Column) -> (Table | Column) -> Boolean
|
||||
check_connection entity1 entity2 =
|
||||
Meta.is_same_object entity1.connection entity2.connection
|
||||
# The `if_not_error` is needed `Meta.is_same_object` does not forward dataflow errors.
|
||||
entity1.if_not_error <| entity2.if_not_error <|
|
||||
Meta.is_same_object entity1.connection entity2.connection
|
||||
|
||||
## PRIVATE
|
||||
ensure_same_connection : Text -> Vector -> Any -> Any ! Illegal_Argument
|
||||
ensure_same_connection name entities ~continuation =
|
||||
assert entities.not_empty
|
||||
all_same = entities.all entity-> check_connection entity entities.first
|
||||
if all_same then continuation else
|
||||
Error.throw (Illegal_Argument.Error "The input "+name+"s come from different connections. Cross-connection operations are not supported.")
|
||||
|
||||
## PRIVATE
|
||||
|
||||
|
@ -36,6 +36,18 @@ type SQL_Expression
|
||||
as-is into a query.
|
||||
Literal (value : Text)
|
||||
|
||||
## PRIVATE
|
||||
A text literal that should be inserted into a query, wrapped in quotes
|
||||
with any quotes it may contain escaped.
|
||||
|
||||
This is useful to avoid requiring unnecessary interpolations for text
|
||||
constants in queries.
|
||||
|
||||
User-provided values should always be passed as interpolated `Constant`s.
|
||||
This should only be used for 'statically' known constants - ones baked
|
||||
into the library source code.
|
||||
Text_Literal (value : Text)
|
||||
|
||||
## PRIVATE
|
||||
|
||||
The internal representation of an SQL expression built from an operation
|
||||
|
@ -297,7 +297,8 @@ make_internal_generator_dialect =
|
||||
stats = [agg_median, agg_mode, agg_percentile, stddev_pop, stddev_samp]
|
||||
date_ops = [make_extract_as_int "year", make_extract_as_int "quarter", make_extract_as_int "month", make_extract_as_int "week", make_extract_as_int "day", make_extract_as_int "hour", make_extract_as_int "minute", make_extract_fractional_as_int "second", make_extract_fractional_as_int "millisecond" modulus=1000, make_extract_fractional_as_int "microsecond" modulus=1000, ["date_add", make_date_add], ["date_diff", make_date_diff]]
|
||||
special_overrides = [is_null, is_empty]
|
||||
my_mappings = text + counts + stats + first_last_aggregators + arith_extensions + bool + date_ops + special_overrides
|
||||
other = [["RUNTIME_ERROR", make_runtime_error_op]]
|
||||
my_mappings = text + counts + stats + first_last_aggregators + arith_extensions + bool + date_ops + special_overrides + other
|
||||
Base_Generator.base_dialect . extend_with my_mappings
|
||||
|
||||
## PRIVATE
|
||||
@ -727,3 +728,26 @@ as_int32 expr =
|
||||
|
||||
## PRIVATE
|
||||
postgres_statement_setter = Statement_Setter.default
|
||||
|
||||
## PRIVATE
|
||||
The RUNTIME_ERROR operation should allow the query to compile fine and it
|
||||
will not prevent it from running if the branch including this operation is
|
||||
not taken. But if the branch is computed, it should ensure the query fails.
|
||||
|
||||
This query never returns a value, so its type should be polymorphic. However,
|
||||
that is not possible - so currently it just 'pretends' that it would return a
|
||||
Boolean - because that is the type we expect in the use-case. This can be
|
||||
altered if needed.
|
||||
|
||||
It takes a variable as the second argument. It can be any value that is not
|
||||
statically known - this ensure that the optimizer will not be able to
|
||||
pre-compute the expression too early (which could make the query fail
|
||||
spuriously). See `make_invariant_check` in `Lookup_Query_Helper` for an
|
||||
example.
|
||||
make_runtime_error_op arguments =
|
||||
if arguments.length != 2 then
|
||||
Panic.throw (Illegal_Argument.Error "RUNTIME_ERROR takes exactly 2 arguments (error message and a variable to ensure deferred execution).")
|
||||
error_message = arguments.at 0
|
||||
variable_to_defer = arguments.at 1
|
||||
|
||||
Builder.code "CAST('[ENSO INVARIANT VIOLATED: '||" ++ error_message ++ "||'] '||COALESCE(" ++ variable_to_defer ++ "::TEXT,'NULL') AS BOOLEAN)"
|
||||
|
@ -1,6 +1,6 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from project.Errors import SQL_Error
|
||||
from project.Errors import SQL_Error, Invariant_Violation
|
||||
|
||||
## PRIVATE
|
||||
type Postgres_Error_Mapper
|
||||
@ -9,3 +9,12 @@ type Postgres_Error_Mapper
|
||||
is_primary_key_violation : SQL_Error -> Boolean
|
||||
is_primary_key_violation error =
|
||||
error.java_exception.getMessage.contains "duplicate key value violates unique constraint"
|
||||
|
||||
## PRIVATE
|
||||
transform_custom_errors : SQL_Error -> Any
|
||||
transform_custom_errors error =
|
||||
message = error.java_exception.getMessage
|
||||
if message.contains "ENSO INVARIANT VIOLATED" . not then error else
|
||||
payloads = message.tokenize "\[ENSO INVARIANT VIOLATED: (.*)\]"
|
||||
if payloads.length != 1 then error else
|
||||
Invariant_Violation.Error payloads.first error
|
||||
|
@ -286,9 +286,10 @@ make_internal_generator_dialect =
|
||||
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
|
||||
stats = [agg_stddev_pop, agg_stddev_samp]
|
||||
arith_extensions = [is_inf, floating_point_div, mod_op]
|
||||
other = [["RUNTIME_ERROR", make_runtime_error_op]]
|
||||
|
||||
bool = [bool_or]
|
||||
my_mappings = text + counts + stats + arith_extensions + bool
|
||||
my_mappings = text + counts + stats + arith_extensions + bool + other
|
||||
Base_Generator.base_dialect . extend_with my_mappings
|
||||
|
||||
## PRIVATE
|
||||
@ -481,3 +482,21 @@ make_custom_cast column target_value_type type_mapping =
|
||||
result.put expr
|
||||
|
||||
result.get
|
||||
|
||||
## PRIVATE
|
||||
The RUNTIME_ERROR operation should allow the query to compile fine and it
|
||||
will not prevent it from running if the branch including this operation is
|
||||
not taken. But if the branch is computed, it should ensure the query fails.
|
||||
|
||||
This query never returns a value, so its type should be polymorphic. However,
|
||||
that is not possible - so currently the SQLite dialect just does not handle
|
||||
inferring a type for it. Thus, it should only be used in places that will not
|
||||
need client-side type inference (e.g. WHERE clause is ok).
|
||||
This can be changed in the future, if needed.
|
||||
make_runtime_error_op arguments =
|
||||
if arguments.length != 2 then
|
||||
Panic.throw (Illegal_Argument.Error "RUNTIME_ERROR takes exactly 2 arguments (error message and a variable to ensure deferred execution).")
|
||||
error_message = arguments.at 0
|
||||
variable_to_defer = arguments.at 1
|
||||
# We have to ensure that the implementation of SQLite that we use does not have a MATCH function defined which would make the code below succeed.
|
||||
Builder.code "match('[ENSO INVARIANT VIOLATED: '||" ++ error_message ++ "||'] ', " ++ variable_to_defer ++ ")"
|
||||
|
@ -1,6 +1,6 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from project.Errors import SQL_Error
|
||||
from project.Errors import SQL_Error, Invariant_Violation
|
||||
|
||||
polyglot java import org.sqlite.SQLiteErrorCode
|
||||
polyglot java import org.sqlite.SQLiteException
|
||||
@ -14,3 +14,13 @@ type SQLite_Error_Mapper
|
||||
case error.java_exception of
|
||||
sqlite_exception : SQLiteException ->
|
||||
sqlite_exception.getResultCode == SQLiteErrorCode.SQLITE_CONSTRAINT_PRIMARYKEY
|
||||
|
||||
## PRIVATE
|
||||
transform_custom_errors : SQL_Error -> Any
|
||||
transform_custom_errors error =
|
||||
invariant_failure_message = "unable to use function MATCH in the requested context"
|
||||
message = error.java_exception.getMessage
|
||||
if message.contains invariant_failure_message . not then error else
|
||||
query_contains_invariant_check = error.related_query.if_nothing "" . contains "ENSO INVARIANT VIOLATED"
|
||||
if query_contains_invariant_check . not then error else
|
||||
Invariant_Violation.Error Nothing error
|
||||
|
@ -179,14 +179,17 @@ operations_map =
|
||||
handle_cast _ =
|
||||
Panic.throw (Illegal_State.Error "Cast relies on its own type inference logic, so this code should never be reached. This is a bug in the Database library.")
|
||||
|
||||
handle_runtime_error _ =
|
||||
Panic.throw (Illegal_State.Error "RUNTIME_ERROR should not be part of direct type inference, so this code should never be reached. This is a bug in the Database library.")
|
||||
|
||||
always_boolean_ops = ["==", "!=", "equals_ignore_case", ">=", "<=", "<", ">", "BETWEEN", "AND", "OR", "NOT", "IS_NULL", "IS_EMPTY", "LIKE", "IS_IN", "IS_IN_COLUMN", "starts_with", "ends_with", "contains", "BOOL_OR", "IS_INF"]
|
||||
always_floating_ops = ["/", "mod", "AVG", "STDDEV_POP", "STDDEV_SAMP", "ROUND"]
|
||||
always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM", "REPLACE"]
|
||||
always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS", "ROW_NUMBER"]
|
||||
always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS", "ROW_NUMBER", "ROW_NUMBER_IN_GROUP"]
|
||||
same_as_first = ["TRUNCATE", "CEIL", "FLOOR"]
|
||||
arithmetic_ops = ["ADD_NUMBER", "-", "*", "^", "%", "SUM"]
|
||||
merge_input_types_ops = ["ROW_MAX", "ROW_MIN", "MAX", "MIN", "FILL_NULL", "COALESCE"]
|
||||
others = [["IIF", handle_iif], ["CAST", handle_cast], ["CASE", handle_case]]
|
||||
others = [["IIF", handle_iif], ["CAST", handle_cast], ["CASE", handle_case], ["RUNTIME_ERROR", handle_runtime_error]]
|
||||
Map.from_vector <|
|
||||
v1 = always_boolean_ops.map [_, const SQLite_Types.boolean]
|
||||
v2 = always_floating_ops.map [_, const SQLite_Types.real]
|
||||
|
@ -1666,28 +1666,27 @@ type Table
|
||||
@on Widget_Helpers.make_join_condition_selector
|
||||
join : Table -> Join_Kind -> Vector (Join_Condition | Text) | Text -> Text -> Problem_Behavior -> Table
|
||||
join self right:Table (join_kind : Join_Kind = Join_Kind.Left_Outer) on=[Join_Condition.Equals self.column_names.first] right_prefix="Right " on_problems=Report_Warning = Out_Of_Memory.handle_java_exception "join" <|
|
||||
if check_table "right" right then
|
||||
# [left_unmatched, matched, right_unmatched]
|
||||
rows_to_keep = case join_kind of
|
||||
Join_Kind.Inner -> [False, True, False]
|
||||
Join_Kind.Left_Outer -> [True, True, False]
|
||||
Join_Kind.Right_Outer -> [False, True, True]
|
||||
Join_Kind.Full -> [True, True, True]
|
||||
Join_Kind.Left_Exclusive -> [True, False, False]
|
||||
Join_Kind.Right_Exclusive -> [False, False, True]
|
||||
# [left_unmatched, matched, right_unmatched]
|
||||
rows_to_keep = case join_kind of
|
||||
Join_Kind.Inner -> [False, True, False]
|
||||
Join_Kind.Left_Outer -> [True, True, False]
|
||||
Join_Kind.Right_Outer -> [False, True, True]
|
||||
Join_Kind.Full -> [True, True, True]
|
||||
Join_Kind.Left_Exclusive -> [True, False, False]
|
||||
Join_Kind.Right_Exclusive -> [False, False, True]
|
||||
|
||||
columns_to_keep = case join_kind of
|
||||
Join_Kind.Left_Exclusive -> [True, False]
|
||||
Join_Kind.Right_Exclusive -> [False, True]
|
||||
_ -> [True, True]
|
||||
columns_to_keep = case join_kind of
|
||||
Join_Kind.Left_Exclusive -> [True, False]
|
||||
Join_Kind.Right_Exclusive -> [False, True]
|
||||
_ -> [True, True]
|
||||
|
||||
join_resolution = make_join_helpers self right . resolve on on_problems
|
||||
right_columns_to_drop = if join_kind == Join_Kind.Inner then join_resolution.redundant_column_names else []
|
||||
join_resolution = make_join_helpers self right . resolve on on_problems
|
||||
right_columns_to_drop = if join_kind == Join_Kind.Inner then join_resolution.redundant_column_names else []
|
||||
|
||||
java_conditions = join_resolution.conditions
|
||||
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
|
||||
self.java_table.join right.java_table java_conditions (rows_to_keep.at 0) (rows_to_keep.at 1) (rows_to_keep.at 2) (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix java_aggregator
|
||||
Table.Value new_java_table
|
||||
java_conditions = join_resolution.conditions
|
||||
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
|
||||
self.java_table.join right.java_table java_conditions (rows_to_keep.at 0) (rows_to_keep.at 1) (rows_to_keep.at 2) (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix java_aggregator
|
||||
Table.Value new_java_table
|
||||
|
||||
## ALIAS cartesian join
|
||||
GROUP Standard.Base.Calculations
|
||||
@ -1726,15 +1725,14 @@ type Table
|
||||
layout order while for database tables the order may be unspecified).
|
||||
cross_join : Table -> Integer | Nothing -> Text -> Problem_Behavior -> Table
|
||||
cross_join self right:Table right_row_limit=100 right_prefix="Right " on_problems=Report_Warning = Out_Of_Memory.handle_java_exception "cross_join" <|
|
||||
if check_table "right" right then
|
||||
limit_problems = case right_row_limit.is_nothing.not && (right.row_count > right_row_limit) of
|
||||
True ->
|
||||
[Cross_Join_Row_Limit_Exceeded.Error right_row_limit right.row_count]
|
||||
False -> []
|
||||
on_problems.attach_problems_before limit_problems <|
|
||||
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
|
||||
self.java_table.crossJoin right.java_table right_prefix java_aggregator
|
||||
Table.Value new_java_table
|
||||
limit_problems = case right_row_limit.is_nothing.not && (right.row_count > right_row_limit) of
|
||||
True ->
|
||||
[Cross_Join_Row_Limit_Exceeded.Error right_row_limit right.row_count]
|
||||
False -> []
|
||||
on_problems.attach_problems_before limit_problems <|
|
||||
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
|
||||
self.java_table.crossJoin right.java_table right_prefix java_aggregator
|
||||
Table.Value new_java_table
|
||||
|
||||
## Replaces values in this table by values from a lookup table.
|
||||
New values are looked up in the lookup table based on the `key_columns`.
|
||||
@ -1771,8 +1769,8 @@ type Table
|
||||
specified in `key_columns`, a `Missing_Input_Columns` error is raised.
|
||||
- If an empty vector is provided for `key_columns`, a
|
||||
`No_Input_Columns_Selected` error is raised.
|
||||
- If the lookup table contains multiple rows with the same values in
|
||||
the `key_columns`, an `Non_Unique_Key` error is raised.
|
||||
- If a single row is matched by multiple entries in the lookup table,
|
||||
a `Non_Unique_Key` error is raised.
|
||||
- If a column that is being updated from the lookup table has a type
|
||||
that is not compatible with the type of the corresponding column in
|
||||
this table, a `No_Common_Type` error is raised.
|
||||
@ -1874,20 +1872,19 @@ type Table
|
||||
@keep_unmatched (make_single_choice [["True", "Boolean.True"], ["False", "Boolean.False"], ["Report", Meta.get_qualified_type_name Report_Unmatched]])
|
||||
zip : Table -> Boolean | Report_Unmatched -> Text -> Problem_Behavior -> Table
|
||||
zip self right:Table keep_unmatched=Report_Unmatched right_prefix="Right " on_problems=Report_Warning =
|
||||
if check_table "right" right then
|
||||
keep_unmatched_bool = case keep_unmatched of
|
||||
Report_Unmatched -> True
|
||||
b : Boolean -> b
|
||||
report_mismatch = keep_unmatched == Report_Unmatched
|
||||
keep_unmatched_bool = case keep_unmatched of
|
||||
Report_Unmatched -> True
|
||||
b : Boolean -> b
|
||||
report_mismatch = keep_unmatched == Report_Unmatched
|
||||
|
||||
left_row_count = self.row_count
|
||||
right_row_count = right.row_count
|
||||
problems = if (left_row_count == right_row_count) || report_mismatch.not then [] else
|
||||
[Row_Count_Mismatch.Error left_row_count right_row_count]
|
||||
on_problems.attach_problems_before problems <|
|
||||
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
|
||||
self.java_table.zip right.java_table keep_unmatched_bool right_prefix java_aggregator
|
||||
Table.Value new_java_table
|
||||
left_row_count = self.row_count
|
||||
right_row_count = right.row_count
|
||||
problems = if (left_row_count == right_row_count) || report_mismatch.not then [] else
|
||||
[Row_Count_Mismatch.Error left_row_count right_row_count]
|
||||
on_problems.attach_problems_before problems <|
|
||||
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
|
||||
self.java_table.zip right.java_table keep_unmatched_bool right_prefix java_aggregator
|
||||
Table.Value new_java_table
|
||||
|
||||
## ALIAS append, concat
|
||||
GROUP Standard.Base.Calculations
|
||||
@ -1980,12 +1977,11 @@ type Table
|
||||
retyped to the `Mixed` type to indicate that intention. Note that the
|
||||
`Mixed` type may not be supported by most Database backends.
|
||||
union : (Table | Vector Table) -> Match_Columns -> Boolean | Report_Unmatched -> Boolean -> Problem_Behavior -> Table
|
||||
union self tables match_columns=Match_Columns.By_Name keep_unmatched_columns=Report_Unmatched allow_type_widening=True on_problems=Report_Warning =
|
||||
union self tables:Vector|Table match_columns=Match_Columns.By_Name keep_unmatched_columns=Report_Unmatched allow_type_widening=True on_problems=Report_Warning =
|
||||
all_tables = case tables of
|
||||
v : Vector -> [self] + v
|
||||
single_column : Column -> [self, single_column.to_table]
|
||||
v : Vector -> [self] + (v.map t-> Table.from t)
|
||||
single_table -> [self, single_table]
|
||||
all_tables.all (check_table "tables") . if_not_error <|
|
||||
all_tables.if_not_error <|
|
||||
problem_builder = Problem_Builder.new
|
||||
matched_column_sets = Match_Columns_Helpers.match_columns all_tables match_columns keep_unmatched_columns problem_builder
|
||||
result_row_count = all_tables.fold 0 c-> t-> c + t.row_count
|
||||
@ -2531,17 +2527,6 @@ make_join_helpers left_table right_table =
|
||||
Java_Join_Between.new left.java_column right_lower.java_column right_upper.java_column
|
||||
Join_Helpers.Join_Condition_Resolver.Value (left_table.at _) (right_table.at _) make_equals make_equals_ignore_case make_between
|
||||
|
||||
## PRIVATE
|
||||
Checks if the argument is a proper table and comes from the current backend.
|
||||
It returns True or throws a dataflow error explaining the issue.
|
||||
check_table arg_name table =
|
||||
if Table_Helpers.is_table table . not then Error.throw (Type_Error.Error Table table arg_name) else
|
||||
same_backend = table.is_a Table
|
||||
case same_backend of
|
||||
False ->
|
||||
Error.throw (Illegal_Argument.Error "Currently cross-backend operations are not supported. Materialize the table using `.read` before mixing it with an in-memory Table.")
|
||||
True -> True
|
||||
|
||||
## PRIVATE
|
||||
A helper that efficiently concatenates storages of in-memory columns.
|
||||
concat_columns column_set all_tables result_type result_row_count on_problems =
|
||||
|
@ -1,29 +1,32 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
import project.Data.Type.Storage
|
||||
import project.Data.Type.Value_Type.Value_Type
|
||||
import project.Data.Type.Value_Type_Helpers
|
||||
from project.Errors import Missing_Input_Columns, Unexpected_Extra_Columns, Floating_Point_Equality, No_Common_Type
|
||||
from project.Errors import Missing_Input_Columns, Unexpected_Extra_Columns, Floating_Point_Equality, No_Common_Type, No_Output_Columns
|
||||
|
||||
polyglot java import org.enso.table.data.table.join.LookupColumnDescription
|
||||
|
||||
## PRIVATE
|
||||
type Lookup_Settings
|
||||
## PRIVATE
|
||||
Value (key_columns : Vector Text) (columns_to_update : Set Text) (columns_to_add : Vector Text)
|
||||
|
||||
## PRIVATE
|
||||
type Lookup_Column
|
||||
## PRIVATE
|
||||
A key column used to correlate rows between the two tables.
|
||||
Key_Column base_column lookup_column
|
||||
|
||||
## PRIVATE
|
||||
A column from the base table that is kept as-is.
|
||||
Keep_Column base_column
|
||||
|
||||
## PRIVATE
|
||||
A column that is present in both tables.
|
||||
If the row is matched with a row from the lookup table, the value from
|
||||
the lookup table is used; otherwise the value from the base table is kept.
|
||||
Replace_Column base_column lookup_column (common_type : Value_Type)
|
||||
|
||||
## PRIVATE
|
||||
A new column added from the lookup table.
|
||||
If the row has no match, it will be `Nothing`.
|
||||
Add_Column lookup_column
|
||||
|
||||
## PRIVATE
|
||||
@ -38,7 +41,8 @@ type Lookup_Column
|
||||
|
||||
It also reports any errors or warnings related to selecting these columns.
|
||||
prepare_columns_for_lookup base_table lookup_table key_columns_selector add_new_columns allow_unmatched_rows on_problems =
|
||||
key_columns = base_table.select_columns key_columns_selector . column_names
|
||||
key_columns = base_table.select_columns key_columns_selector . column_names . catch No_Output_Columns _->
|
||||
Error.throw (Illegal_Argument.Error "At least one key column must be provided for `lookup_and_replace`.")
|
||||
lookup_table_key_columns = lookup_table.select_columns key_columns . catch Missing_Input_Columns error->
|
||||
Error.throw (Missing_Input_Columns.Error error.criteria "the lookup table")
|
||||
|
||||
|
@ -452,15 +452,6 @@ filter_blank_rows table when_any treat_nans_as_blank =
|
||||
table.filter non_missing_mask Filter_Condition.Is_True
|
||||
False -> table
|
||||
|
||||
## PRIVATE
|
||||
Checks if the given object implements a Table interface.
|
||||
|
||||
Currently, it relies on a hack - it checks if the object has a method
|
||||
`is_table` and if it returns `True`.
|
||||
is_table obj =
|
||||
known_types = ["Standard.Table.Data.Table.Table", "Standard.Database.Data.Table.Table"]
|
||||
known_types.contains (Meta.get_qualified_type_name obj)
|
||||
|
||||
## PRIVATE
|
||||
Checks if the given object implements a Table interface.
|
||||
|
||||
|
@ -35,7 +35,6 @@ public class LookupJoin {
|
||||
|
||||
LookupJoin joiner = new LookupJoin(keys, columnDescriptions, allowUnmatchedRows, problemAggregator);
|
||||
joiner.checkNullsInKey();
|
||||
joiner.verifyLookupUniqueness();
|
||||
return joiner.join();
|
||||
}
|
||||
|
||||
@ -78,22 +77,6 @@ public class LookupJoin {
|
||||
}
|
||||
}
|
||||
|
||||
private void verifyLookupUniqueness() {
|
||||
if (!lookupIndex.isUnique()) {
|
||||
// Find the duplicated key
|
||||
for (Map.Entry<UnorderedMultiValueKey, List<Integer>> group : lookupIndex.mapping().entrySet()) {
|
||||
int groupSize = group.getValue().size();
|
||||
if (groupSize > 1) {
|
||||
UnorderedMultiValueKey key = group.getKey();
|
||||
List<Object> exampleValues = IntStream.range(0, keyColumnNames.size()).mapToObj(key::get).toList();
|
||||
throw new NonUniqueLookupKey(keyColumnNames, exampleValues, groupSize);
|
||||
}
|
||||
}
|
||||
|
||||
assert false : "isUnique returned false, but no duplicated key was found.";
|
||||
}
|
||||
}
|
||||
|
||||
private Table join() {
|
||||
List<LookupOutputColumn> outputColumns = columnDescriptions.stream().map(this::prepareOutputColumn).toList();
|
||||
List<LookupOutputColumn.MergeColumns> columnsToMerge =
|
||||
@ -145,8 +128,12 @@ public class LookupJoin {
|
||||
}
|
||||
}
|
||||
|
||||
if (lookupRowIndices.size() > 1) {
|
||||
List<Object> exampleValues = IntStream.range(0, keyColumnNames.size()).mapToObj(key::get).toList();
|
||||
throw new NonUniqueLookupKey(keyColumnNames, exampleValues, lookupRowIndices.size());
|
||||
}
|
||||
|
||||
assert !lookupRowIndices.isEmpty() : "No Index group should be empty.";
|
||||
assert lookupRowIndices.size() == 1 : "This should have been checked in verifyLookupUniqueness()";
|
||||
return lookupRowIndices.get(0);
|
||||
}
|
||||
|
||||
|
@ -3,10 +3,13 @@ import Standard.Base.Errors.Common.Arithmetic_Error
|
||||
import Standard.Base.Errors.Common.Index_Out_Of_Bounds
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
from Standard.Table import Set_Mode
|
||||
from Standard.Table import all
|
||||
from Standard.Table.Errors import all
|
||||
import Standard.Table.Data.Expression.Expression_Error
|
||||
|
||||
from Standard.Database import all
|
||||
from Standard.Database.Errors import Integrity_Error
|
||||
|
||||
from Standard.Test import Test, Problems
|
||||
import Standard.Test.Extensions
|
||||
|
||||
@ -163,6 +166,17 @@ spec setup =
|
||||
problems2 = [Arithmetic_Error.Error "Division by zero (at rows [2])."]
|
||||
Problems.test_problem_handling action2 problems2 tester2
|
||||
|
||||
Test.specify "should gracefully handle columns from different backends" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]]]
|
||||
alternative_connection = Database.connect (SQLite In_Memory)
|
||||
t0 = (Table.new [["X", [3, 2, 1]]]).select_into_database_table alternative_connection "T0" temporary=True
|
||||
c0 = t0.at "X"
|
||||
|
||||
r1 = t1.set c0
|
||||
case setup.is_database of
|
||||
True -> r1.should_fail_with Integrity_Error
|
||||
False -> r1.should_fail_with Illegal_Argument
|
||||
|
||||
Test.group prefix+"Table.column_names" <|
|
||||
Test.specify "should return the names of all columns" <|
|
||||
table.column_names . should_equal ["foo", "bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123"]
|
||||
|
@ -1,15 +1,17 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Base.Errors.Common.Type_Error
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||
import Standard.Table.Data.Join_Kind_Cross.Join_Kind_Cross
|
||||
import Standard.Test.Extensions
|
||||
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation
|
||||
from Standard.Table import all hiding Table
|
||||
from Standard.Table import all
|
||||
from Standard.Table.Errors import all
|
||||
import Standard.Table.Data.Join_Kind_Cross.Join_Kind_Cross
|
||||
|
||||
from Standard.Database import all
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation, Integrity_Error
|
||||
|
||||
from Standard.Test import Test, Problems
|
||||
import Standard.Test.Extensions
|
||||
|
||||
from project.Common_Table_Operations.Util import expect_column_names, run_default_backend
|
||||
|
||||
@ -159,3 +161,11 @@ spec setup =
|
||||
t1 = table_builder [["X", [1, 2]], ["Y", [4, 5]]]
|
||||
t2 = table_builder [["Z", ['a', 'b']], ["W", ['c', 'd']]]
|
||||
Test.expect_panic_with (t1.join t2 join_kind=Join_Kind_Cross.Cross on=[]) Type_Error
|
||||
|
||||
Test.specify "should gracefully handle tables from different backends" <|
|
||||
t1 = table_builder [["A", ["a", "b"]]]
|
||||
alternative_connection = Database.connect (SQLite In_Memory)
|
||||
t0 = (Table.new [["X", [1, 2, 4]], ["Z", [10, 20, 30]]]).select_into_database_table alternative_connection "T0" temporary=True
|
||||
|
||||
r1 = t1.cross_join t0
|
||||
r1.should_fail_with Illegal_Argument
|
||||
|
@ -1,10 +1,12 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||
|
||||
from Standard.Table import all hiding Table
|
||||
from Standard.Table import all
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation
|
||||
from Standard.Database import all
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation, Integrity_Error
|
||||
|
||||
from Standard.Test import Test, Problems
|
||||
import Standard.Test.Extensions
|
||||
@ -647,3 +649,10 @@ spec setup =
|
||||
r3.at 2 . should_equal [Nothing, 10, 7, 7, 200]
|
||||
r3.at 3 . should_equal [2, 20, 2, Nothing, Nothing]
|
||||
r3.at 4 . should_equal [3, 30, 3, Nothing, Nothing]
|
||||
|
||||
Test.specify "should gracefully handle tables from different backends" <|
|
||||
alternative_connection = Database.connect (SQLite In_Memory)
|
||||
t0 = (Table.new [["X", [1, 2, 4]], ["Z", [10, 20, 30]]]).select_into_database_table alternative_connection "T0" temporary=True
|
||||
|
||||
r1 = t1.join t0
|
||||
r1.should_fail_with Illegal_Argument
|
||||
|
@ -1,9 +1,11 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
from Standard.Table import all
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation
|
||||
from Standard.Database import all
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation, Invariant_Violation, Integrity_Error
|
||||
|
||||
from Standard.Test import Test, Problems
|
||||
import Standard.Test.Extensions
|
||||
@ -17,8 +19,7 @@ spec setup =
|
||||
prefix = setup.prefix
|
||||
table_builder = setup.table_builder
|
||||
materialize = setup.materialize
|
||||
db_pending = if setup.is_database then "Not implemented yet. TODO: https://github.com/enso-org/enso/issues/7981"
|
||||
Test.group prefix+"Table.lookup_and_replace" pending=db_pending <|
|
||||
Test.group prefix+"Table.lookup_and_replace" <|
|
||||
Test.specify "should allow to simply update columns based on a lookup table" <|
|
||||
lookup = table_builder [["Y", ["A", "B", "A"]], ["X", [1, 2, 3]]]
|
||||
my_table = table_builder [["X", [1, 2, 3, 2]], ["Y", ["Z", "ZZ", "ZZZ", "ZZZZ"]], ["Z", [10, 20, 30, 40]]]
|
||||
@ -149,17 +150,42 @@ spec setup =
|
||||
m2.at "Y" . to_vector . should_equal ["A", "B", "A", "B"]
|
||||
m2.at "Z" . to_vector . should_equal [100, 200, 100, 100]
|
||||
|
||||
Test.specify "will fail on duplicate keys in the lookup table" <|
|
||||
Test.specify "will fail on duplicate matches in the lookup table" <|
|
||||
lookup = table_builder [["X", [1, 2, 1]], ["Y", ["A", "B", "C"]]]
|
||||
my_table = table_builder [["X", [4, 2, 3, 2]], ["Y", ["Z", "ZZ", "ZZZ", "ZZZZ"]], ["Z", [10, 20, 30, 40]]]
|
||||
|
||||
r2 = my_table.lookup_and_replace lookup key_columns=["X"]
|
||||
# If the duplicates do not show up in result - it is accepted.
|
||||
t2 = my_table.lookup_and_replace lookup key_columns=["X"]
|
||||
t2.column_names . should_equal ["X", "Y", "Z"]
|
||||
m2 = t2 |> materialize |> _.order_by "Z"
|
||||
m2.at "Z" . to_vector . should_equal [10, 20, 30, 40]
|
||||
m2.at "X" . to_vector . should_equal [4, 2, 3, 2]
|
||||
m2.at "Y" . to_vector . should_equal ["Z", "B", "ZZZ", "B"]
|
||||
|
||||
my_table2 = table_builder [["X", [1, 2]], ["Y", ["Z", "ZZ"]], ["Z", [10, 20]]]
|
||||
r2 = my_table2.lookup_and_replace lookup key_columns=["X"]
|
||||
r2.should_fail_with Non_Unique_Key
|
||||
r2.catch.key_column_names . should_equal ["X"]
|
||||
r2.catch.clashing_example_key_values . should_equal [1]
|
||||
r2.catch.clashing_example_row_count . should_equal 2
|
||||
|
||||
lookup2 = table_builder [["X", [1, 1]], ["Y", ["A", "A"]], ["Z", [100, 100]]]
|
||||
r3 = my_table.lookup_and_replace lookup2 key_columns=["X", "Y"]
|
||||
Problems.assume_no_problems <| my_table.lookup_and_replace lookup2 key_columns=["X", "Y"]
|
||||
r3 = my_table2.lookup_and_replace lookup2 key_columns=["X"]
|
||||
r3.should_fail_with Non_Unique_Key
|
||||
|
||||
m3 = my_table2.lookup_and_replace lookup2 key_columns=["X", "Y"] |> materialize |> _.order_by "Z"
|
||||
m3.at "Z" . to_vector . should_equal [10, 20]
|
||||
m3.at "X" . to_vector . should_equal [1, 2]
|
||||
m3.at "Y" . to_vector . should_equal ["Z", "ZZ"]
|
||||
|
||||
my_table3 = table_builder [["X", [1, 1, 2]], ["Y", ["A", "Z", "ZZ"]], ["Z", [10, 20, 30]]]
|
||||
r4 = my_table3.lookup_and_replace lookup2 key_columns=["X", "Y"]
|
||||
r4.should_fail_with Non_Unique_Key
|
||||
r4.catch.key_column_names . should_equal ["X", "Y"]
|
||||
r4.catch.clashing_example_key_values . should_equal [1, "A"]
|
||||
r4.catch.clashing_example_row_count . should_equal 2
|
||||
|
||||
Test.specify "will preserve count of rows, even if there are duplicates" <|
|
||||
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
|
||||
my_table = table_builder [["X", [1, 2, 2, 2, 1]], ["Z", [10, 20, 20, 20, 50]]]
|
||||
@ -171,6 +197,64 @@ spec setup =
|
||||
m2.at "Y" . to_vector . should_equal ["A", "B", "B", "B", "A"]
|
||||
m2.at "Z" . to_vector . should_equal [10, 20, 20, 20, 50]
|
||||
|
||||
Test.specify "should correctly preserve types of original, merged and added columns" <|
|
||||
table = table_builder [["key1", [0, 1]], ["key2", ["o", "?"]], ["X", [1, 10]], ["Y", ["A", "E"]], ["Z", [1.5, 2.0]], ["W", [True, False]], ["A", [2, 22]], ["B", ["1", "2"]], ["C", [2.0, 2.5]], ["D", [False, False]]]
|
||||
lookup = table_builder [["key1", [0, 2]], ["key2", ["o", "?"]], ["X2", [100, 1000]], ["Y2", ["foo", "bar"]], ["Z2", [0.5, 4.0]], ["W2", [False, True]], ["A", [3, 55]], ["B", ["F", "F"]], ["C", [3.0, 10.5]], ["D", [True, False]]]
|
||||
|
||||
[True, False].each allow_unmatched_rows->
|
||||
table_prepared = if allow_unmatched_rows then table else
|
||||
# If we don't allow unmatched rows, we filter the input to only contain the matching row - this test is not about matching, but about the types.
|
||||
table.filter "key1" (Filter_Condition.Equal to=0)
|
||||
t2 = table_prepared.lookup_and_replace lookup key_columns=["key1", "key2"] allow_unmatched_rows=allow_unmatched_rows
|
||||
t2.column_names . should_equal ["key1", "key2", "X", "Y", "Z", "W", "A", "B", "C", "D", "X2", "Y2", "Z2", "W2"]
|
||||
|
||||
t2.at "key1" . value_type . should_be_a (Value_Type.Integer ...)
|
||||
t2.at "key2" . value_type . should_equal Value_Type.Char
|
||||
|
||||
t2.at "X" . value_type . should_be_a (Value_Type.Integer ...)
|
||||
t2.at "Y" . value_type . should_equal Value_Type.Char
|
||||
t2.at "Z" . value_type . should_equal Value_Type.Float
|
||||
t2.at "W" . value_type . should_equal Value_Type.Boolean
|
||||
|
||||
t2.at "A" . value_type . should_be_a (Value_Type.Integer ...)
|
||||
t2.at "B" . value_type . should_equal Value_Type.Char
|
||||
t2.at "C" . value_type . should_equal Value_Type.Float
|
||||
t2.at "D" . value_type . should_equal Value_Type.Boolean
|
||||
|
||||
t2.at "X2" . value_type . should_be_a (Value_Type.Integer ...)
|
||||
t2.at "Y2" . value_type . should_equal Value_Type.Char
|
||||
t2.at "Z2" . value_type . should_equal Value_Type.Float
|
||||
t2.at "W2" . value_type . should_equal Value_Type.Boolean
|
||||
|
||||
if setup.test_selection.fixed_length_text_columns then Test.specify "should correctly preserve types of original, merged and added columns (various Char types test case)" <|
|
||||
table2 = table_builder [["key", ["0"]], ["X", ["a"]], ["A", ["bbbbb"]]] . cast "key" (Value_Type.Char size=50) . cast "X" (Value_Type.Char size=1) . cast "A" (Value_Type.Char size=5 variable_length=False)
|
||||
lookup2 = table_builder [["key", ["0"]], ["X2", ["ccc"]], ["A", ["dddd"]]] . cast "key" (Value_Type.Char size=100) . cast "X2" (Value_Type.Char size=3 variable_length=False) . cast "A" (Value_Type.Char size=4 variable_length=False)
|
||||
|
||||
table2.at "key" . value_type . should_equal (Value_Type.Char size=50 variable_length=True)
|
||||
table2.at "X" . value_type . should_equal (Value_Type.Char size=1 variable_length=True)
|
||||
table2.at "A" . value_type . should_equal (Value_Type.Char size=5 variable_length=False)
|
||||
|
||||
lookup2.at "key" . value_type . should_equal (Value_Type.Char size=100 variable_length=True)
|
||||
lookup2.at "X2" . value_type . should_equal (Value_Type.Char size=3 variable_length=False)
|
||||
lookup2.at "A" . value_type . should_equal (Value_Type.Char size=4 variable_length=False)
|
||||
|
||||
[True, False].each allow_unmatched_rows->
|
||||
t3 = table2.lookup_and_replace lookup2 key_columns=["key"] allow_unmatched_rows=allow_unmatched_rows
|
||||
t3.column_names . should_equal ["key", "X", "A", "X2"]
|
||||
|
||||
# The key column keeps its original type
|
||||
t3.at "key" . value_type . should_equal (Value_Type.Char size=50 variable_length=True)
|
||||
t3.at "X" . value_type . should_equal (Value_Type.Char size=1 variable_length=True)
|
||||
t3.at "X2" . value_type . should_equal (Value_Type.Char size=3 variable_length=False)
|
||||
|
||||
case allow_unmatched_rows of
|
||||
True ->
|
||||
# The merged column gets a merged type (max size in that case, but gets variable length due to differences), because it may contain both old and new values.
|
||||
t3.at "A" . value_type . should_equal (Value_Type.Char size=5 variable_length=True)
|
||||
False ->
|
||||
# If unmatched rows are not allowed, we can guarantee only _new_ values (from the lookup table) will be in the result, so instead of merging the type we inherit the type from the lookup table.
|
||||
t3.at "A" . value_type . should_equal (Value_Type.Char size=4 variable_length=False)
|
||||
|
||||
Test.specify "will report Floating_Point_Equality if floating-point columns are used as key" <|
|
||||
lookup = table_builder [["X", [1.0, 2.0, 3.0]], ["Y", ["A", "B", "C"]]]
|
||||
my_table = table_builder [["X", [2.0, 3.0, 2.0, 3.0]], ["Y", ["Z", "ZZ", "ZZZ", "ZZZZ"]], ["Z", [10, 20, 30, 40]]]
|
||||
@ -224,6 +308,17 @@ spec setup =
|
||||
m2.at "Y" . to_vector . should_equal ["B", Nothing, Nothing, Nothing]
|
||||
m2.at "Z" . to_vector . should_equal [10, 20, 30, 40]
|
||||
|
||||
# But NULLs in source are ok, but mean such rows will never match:
|
||||
r3 = my_table2.lookup_and_replace lookup2 key_columns="X" allow_unmatched_rows=False
|
||||
r3.should_fail_with Unmatched_Rows_In_Lookup
|
||||
|
||||
Test.specify "will not allow providing no key_columns" <|
|
||||
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
|
||||
my_table = table_builder [["X", [2, 1]], ["Z", [10, 20]]]
|
||||
|
||||
r2 = my_table.lookup_and_replace lookup key_columns=[]
|
||||
r2.should_fail_with Illegal_Argument
|
||||
|
||||
if setup.is_database.not then Test.specify "(in-memory only) will preserve the order of rows from the original table" <|
|
||||
lookup = table_builder [["Y", [1, 0]], ["V", ["TRUE", "FALSE"]]]
|
||||
xs = 0.up_to 50 . to_vector
|
||||
@ -237,3 +332,106 @@ spec setup =
|
||||
t2.at "Y" . to_vector . should_equal ys
|
||||
vs = xs.map x-> if (x%2) == 1 then "TRUE" else "FALSE"
|
||||
t2.at "V" . to_vector . should_equal vs
|
||||
|
||||
if setup.is_database then Test.specify "(database-only) will fail if pre-checked invariants get invalidated between the query is constructed and then materialized" <|
|
||||
Test.with_clue "(lookup is unique check) " <|
|
||||
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
|
||||
table = table_builder [["X", [1, 2, 2]], ["Y", ["Z", "ZZ", "ZZZ"]], ["Z", [10, 20, 30]]]
|
||||
|
||||
r1 = table.lookup_and_replace lookup key_columns="X"
|
||||
# Immediately, the query is all good.
|
||||
Problems.assume_no_problems r1
|
||||
|
||||
m1 = r1 |> materialize |> _.order_by "Z"
|
||||
m1.at "X" . to_vector . should_equal [1, 2, 2]
|
||||
m1.at "Y" . to_vector . should_equal ["A", "B", "B"]
|
||||
m1.at "Z" . to_vector . should_equal [10, 20, 30]
|
||||
|
||||
# Now, we change the table, so that the lookup is no longer unique - even if the values are the same - we care about row uniqueness.
|
||||
Problems.assume_no_problems <|
|
||||
lookup.update_rows (Table.new [["X", [2]], ["Y", ["B"]]]) key_columns=[] update_action=Update_Action.Insert
|
||||
lookup.filter "X" (Filter_Condition.Equal to=2) . row_count . should_equal 2
|
||||
|
||||
# Now, the query is no longer valid:
|
||||
m2 = r1 |> materialize
|
||||
m2.should_fail_with Invariant_Violation
|
||||
m2.catch.to_display_text . should_contain "database has been modified"
|
||||
m2.catch.to_display_text . should_contain "re-run the workflow"
|
||||
|
||||
## Now, even if we drop the lookup-related columns, we still should get an error
|
||||
(otherwise we would risk duplicating records due to the non-uniqueness!)
|
||||
r3 = r1.select_columns ["Z"]
|
||||
Problems.assume_no_problems r3
|
||||
|
||||
m3 = r3 |> materialize
|
||||
## Alternatively, theoretically, this query could succeed
|
||||
(knowing that it no longer depends on the lookup at all).
|
||||
But if it does so, we need to guarantee that we do not get duplicated rows in the result:
|
||||
|
||||
r3.row_count . should_equal 3
|
||||
r3.at "Z" . to_vector . length . should_equal 3
|
||||
r3.at "Z" . to_vector . should_contain_the_same_elements_as [10, 20, 30]
|
||||
m3.should_fail_with Invariant_Violation
|
||||
r3.at "Z" . to_vector . should_fail_with Invariant_Violation
|
||||
|
||||
Test.with_clue "(no unmatched rows check - added a row in source) " <|
|
||||
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
|
||||
table = table_builder [["X", [1, 2, 2]], ["Y", ["Z", "ZZ", "ZZZ"]], ["Z", [10, 20, 30]]]
|
||||
|
||||
r1 = table.lookup_and_replace lookup key_columns="X" allow_unmatched_rows=False
|
||||
# Immediately, the query is all good.
|
||||
Problems.assume_no_problems r1
|
||||
|
||||
m1 = r1 |> materialize |> _.order_by "Z"
|
||||
m1.at "X" . to_vector . should_equal [1, 2, 2]
|
||||
m1.at "Y" . to_vector . should_equal ["A", "B", "B"]
|
||||
m1.at "Z" . to_vector . should_equal [10, 20, 30]
|
||||
|
||||
# Now, we change the source table, so that there are unmatched rows:
|
||||
Problems.assume_no_problems <|
|
||||
table.update_rows (Table.new [["X", [33]], ["Y", ["F"]], ["Z", [40]]]) key_columns=[] update_action=Update_Action.Insert
|
||||
table.row_count . should_equal 4
|
||||
|
||||
# Now, the query is no longer valid:
|
||||
m2 = r1 |> materialize
|
||||
m2.should_fail_with Invariant_Violation
|
||||
|
||||
Test.with_clue "(no unmatched rows check - removed a row in lookup) " <|
|
||||
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
|
||||
table = table_builder [["X", [1, 2, 2]], ["Y", ["Z", "ZZ", "ZZZ"]], ["Z", [10, 20, 30]]]
|
||||
|
||||
r1 = table.lookup_and_replace lookup key_columns="X" allow_unmatched_rows=False
|
||||
# Immediately, the query is all good.
|
||||
Problems.assume_no_problems r1
|
||||
|
||||
m1 = r1 |> materialize |> _.order_by "Z"
|
||||
m1.at "X" . to_vector . should_equal [1, 2, 2]
|
||||
m1.at "Y" . to_vector . should_equal ["A", "B", "B"]
|
||||
m1.at "Z" . to_vector . should_equal [10, 20, 30]
|
||||
|
||||
# Now, we change the lookup table, so that there are unmatched rows:
|
||||
Problems.assume_no_problems <|
|
||||
lookup.delete_rows (Table.new [["X", [1]]]) key_columns=["X"]
|
||||
lookup.row_count . should_equal 1
|
||||
|
||||
# Now, the query is no longer valid:
|
||||
m2 = r1 |> materialize
|
||||
m2.should_fail_with Invariant_Violation
|
||||
|
||||
# This does not seem useful really, but there is no reason to disallow it, so we should ensure it does not crash.
|
||||
Test.specify "(edge-case) should allow lookup with itself" <|
|
||||
table = table_builder [["X", [1, 2, 3]], ["Y", ["A", "B", "C"]]]
|
||||
t2 = table.lookup_and_replace table key_columns="X"
|
||||
t2.column_names . should_equal ["X", "Y"]
|
||||
|
||||
m2 = t2 |> materialize |> _.order_by "X"
|
||||
m2.at "X" . to_vector . should_equal [1, 2, 3]
|
||||
m2.at "Y" . to_vector . should_equal ["A", "B", "C"]
|
||||
|
||||
Test.specify "should gracefully handle tables from different backends" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
alternative_connection = Database.connect (SQLite In_Memory)
|
||||
t0 = (Table.new [["A", [3, 2, 1]], ["B", ["x", "y", "z"]]]).select_into_database_table alternative_connection "T0" temporary=True
|
||||
|
||||
r1 = t1.lookup_and_replace t0 key_columns="A"
|
||||
r1.should_fail_with Illegal_Argument
|
||||
|
@ -1,10 +1,12 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
import Standard.Table.Data.Type.Value_Type.Bits
|
||||
from Standard.Table import all
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation
|
||||
from Standard.Database import all
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation, Integrity_Error
|
||||
|
||||
from Standard.Test import Test, Problems
|
||||
import Standard.Test.Extensions
|
||||
@ -393,3 +395,12 @@ spec setup =
|
||||
# And this should report Column_Type_Mismatch as the more important error too.
|
||||
#t1.union t2 allow_type_widening=False on_problems=Problem_Behavior.Report_Error . should_fail_with Column_Type_Mismatch
|
||||
t1.union t2 allow_type_widening=False on_problems=Problem_Behavior.Report_Error . should_fail_with No_Output_Columns
|
||||
|
||||
Test.specify "should gracefully handle tables from different backends" <|
|
||||
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||
|
||||
alternative_connection = Database.connect (SQLite In_Memory)
|
||||
t0 = (Table.new [["A", [1, 2, 4]], ["B", ["10", "20", "30"]]]).select_into_database_table alternative_connection "T0" temporary=True
|
||||
|
||||
r1 = t1.union t0
|
||||
r1.should_fail_with Illegal_Argument
|
||||
|
@ -1,10 +1,12 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||
|
||||
from Standard.Table import all hiding Table
|
||||
from Standard.Table import all
|
||||
from Standard.Table.Errors import all
|
||||
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation
|
||||
from Standard.Database import all
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation, Integrity_Error
|
||||
|
||||
from Standard.Test import Test, Problems
|
||||
import Standard.Test.Extensions
|
||||
@ -236,3 +238,12 @@ spec setup =
|
||||
padded.at "times" . value_type . should_equal Value_Type.Time
|
||||
padded.at "datetimes" . value_type . should_equal Value_Type.Date_Time
|
||||
padded.at "mixed" . value_type . should_equal Value_Type.Mixed
|
||||
|
||||
Test.specify "should gracefully handle tables from different backends" <|
|
||||
t1 = table_builder [["X", [1, 2, 3]], ["Y", [4, 5, 6]]]
|
||||
|
||||
alternative_connection = Database.connect (SQLite In_Memory)
|
||||
t0 = (Table.new [["W", [1, 2, 4]]]).select_into_database_table alternative_connection "T0" temporary=True
|
||||
|
||||
r1 = t1.zip t0
|
||||
r1.should_fail_with Illegal_Argument
|
||||
|
@ -135,6 +135,54 @@ run_tests prefix connection upload =
|
||||
r3 = connection.query "SELECT * FROM ........"
|
||||
r3.should_fail_with SQL_Error
|
||||
|
||||
Test.specify "will fail if the table is modified and a column gets removed" <|
|
||||
name = Name_Generator.random_name "removing-column"
|
||||
Problems.assume_no_problems <|
|
||||
(Table.new [["a", [1, 2, 3]], ["b", [4, 5, 6]]]).select_into_database_table connection name temporary=True
|
||||
|
||||
t1 = connection.query name
|
||||
m1 = t1.read
|
||||
Problems.assume_no_problems m1
|
||||
m1.at "a" . to_vector . should_equal [1, 2, 3]
|
||||
m1.at "b" . to_vector . should_equal [4, 5, 6]
|
||||
|
||||
Problems.assume_no_problems <| connection.drop_table name
|
||||
Problems.assume_no_problems <|
|
||||
(Table.new [["a", [100, 200]]]).select_into_database_table connection name temporary=True
|
||||
|
||||
# Reading a column that was kept will work OK
|
||||
t1.at "a" . to_vector . should_equal [100, 200]
|
||||
|
||||
# But reading the whole table will fail on the missing column:
|
||||
m2 = t1.read
|
||||
m2.should_fail_with SQL_Error
|
||||
|
||||
Test.specify "will not fail if the table is modified and a column gets added" <|
|
||||
name = Name_Generator.random_name "adding-column"
|
||||
Problems.assume_no_problems <|
|
||||
(Table.new [["a", [1, 2, 3]], ["b", [4, 5, 6]]]).select_into_database_table connection name temporary=True
|
||||
|
||||
t1 = connection.query name
|
||||
m1 = t1.read
|
||||
Problems.assume_no_problems m1
|
||||
m1.at "a" . to_vector . should_equal [1, 2, 3]
|
||||
m1.at "b" . to_vector . should_equal [4, 5, 6]
|
||||
|
||||
Problems.assume_no_problems <| connection.drop_table name
|
||||
Problems.assume_no_problems <|
|
||||
(Table.new [["a", [100, 200]], ["b", [300, 400]], ["c", [500, 600]]]).select_into_database_table connection name temporary=True
|
||||
|
||||
m2 = t1.read
|
||||
Problems.assume_no_problems m2
|
||||
m2.column_names . should_equal ["a", "b"]
|
||||
m2.at "a" . to_vector . should_equal [100, 200]
|
||||
m2.at "b" . to_vector . should_equal [300, 400]
|
||||
|
||||
t1.at "c" . should_fail_with No_Such_Column
|
||||
|
||||
t2 = connection.query name
|
||||
t2.column_names . should_equal ["a", "b", "c"]
|
||||
|
||||
Test.group prefix+"Masking Tables" <|
|
||||
Test.specify "should allow to select rows from a table or column based on an expression" <|
|
||||
t2 = t1.filter (t1.at "a" == 1)
|
||||
|
Loading…
Reference in New Issue
Block a user