mirror of
https://github.com/enso-org/enso.git
synced 2024-12-28 11:25:30 +03:00
Implement Table.lookup_and_replace
in Database (#8146)
- Closes #7981 - Adds a `RUNTIME_ERROR` operation into the DB dialect, that may be used to 'crash' a query if a condition is met - used to validate if `lookup_and_replace` invariants are still satisfied when the query is materialized. - Removes old `Table_Helpers.is_table` and `same_backend` checks, in favour of the new way of checking this that relies on `Table.from` conversions, and is much simpler to use and also more robust.
This commit is contained in:
parent
660f4b35ce
commit
79011bd550
@ -11,3 +11,7 @@ type Redshift_Error_Mapper
|
|||||||
# Currently not implemented, skipping the error recognition.
|
# Currently not implemented, skipping the error recognition.
|
||||||
_ = error
|
_ = error
|
||||||
False
|
False
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
transform_custom_errors : SQL_Error -> Any
|
||||||
|
transform_custom_errors error = error
|
||||||
|
@ -29,7 +29,6 @@ import Standard.Table.Internal.Add_Row_Number
|
|||||||
import Standard.Table.Internal.Aggregate_Column_Helper
|
import Standard.Table.Internal.Aggregate_Column_Helper
|
||||||
import Standard.Table.Internal.Column_Naming_Helper.Column_Naming_Helper
|
import Standard.Table.Internal.Column_Naming_Helper.Column_Naming_Helper
|
||||||
import Standard.Table.Internal.Constant_Column.Constant_Column
|
import Standard.Table.Internal.Constant_Column.Constant_Column
|
||||||
import Standard.Table.Internal.Lookup_Helpers
|
|
||||||
import Standard.Table.Internal.Problem_Builder.Problem_Builder
|
import Standard.Table.Internal.Problem_Builder.Problem_Builder
|
||||||
import Standard.Table.Internal.Table_Helpers
|
import Standard.Table.Internal.Table_Helpers
|
||||||
import Standard.Table.Internal.Table_Helpers.Table_Column_Helper
|
import Standard.Table.Internal.Table_Helpers.Table_Column_Helper
|
||||||
@ -50,6 +49,7 @@ import project.Data.Take_Drop_Helpers
|
|||||||
import project.Internal.Aggregate_Helper
|
import project.Internal.Aggregate_Helper
|
||||||
import project.Internal.Base_Generator
|
import project.Internal.Base_Generator
|
||||||
import project.Internal.Common.Database_Join_Helper
|
import project.Internal.Common.Database_Join_Helper
|
||||||
|
import project.Internal.Common.Lookup_Query_Helper
|
||||||
import project.Internal.Helpers
|
import project.Internal.Helpers
|
||||||
import project.Internal.IR.Context.Context
|
import project.Internal.IR.Context.Context
|
||||||
import project.Internal.IR.From_Spec.From_Spec
|
import project.Internal.IR.From_Spec.From_Spec
|
||||||
@ -61,7 +61,7 @@ import project.Internal.IR.SQL_Expression.SQL_Expression
|
|||||||
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
|
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
|
||||||
import project.Internal.SQL_Type_Reference.SQL_Type_Reference
|
import project.Internal.SQL_Type_Reference.SQL_Type_Reference
|
||||||
from project.Data.Take_Drop_Helpers import Take_Drop
|
from project.Data.Take_Drop_Helpers import Take_Drop
|
||||||
from project.Errors import Integrity_Error, Table_Not_Found, Unsupported_Database_Operation
|
from project.Errors import Integrity_Error, Table_Not_Found, Unsupported_Database_Operation, SQL_Error
|
||||||
|
|
||||||
polyglot java import java.sql.JDBCType
|
polyglot java import java.sql.JDBCType
|
||||||
polyglot java import java.util.UUID
|
polyglot java import java.util.UUID
|
||||||
@ -1212,16 +1212,12 @@ type Table
|
|||||||
## PRIVATE
|
## PRIVATE
|
||||||
Implementation of both `join` and `cross_join`.
|
Implementation of both `join` and `cross_join`.
|
||||||
join_or_cross_join : Table -> Join_Kind | Join_Kind_Cross -> Vector (Join_Condition | Text) | Text -> Text -> Problem_Behavior -> Table
|
join_or_cross_join : Table -> Join_Kind | Join_Kind_Cross -> Vector (Join_Condition | Text) | Text -> Text -> Problem_Behavior -> Table
|
||||||
join_or_cross_join self right join_kind on right_prefix on_problems =
|
join_or_cross_join self right:Table join_kind on right_prefix on_problems =
|
||||||
can_proceed = if Table_Helpers.is_table right . not then Error.throw (Type_Error.Error Table right "right") else
|
can_proceed = Helpers.ensure_same_connection "table" [self, right] <|
|
||||||
same_backend = case right of
|
|
||||||
_ : Table -> True
|
|
||||||
_ -> False
|
|
||||||
join_conditions_ok = join_kind != Join_Kind_Cross.Cross || on == []
|
join_conditions_ok = join_kind != Join_Kind_Cross.Cross || on == []
|
||||||
if same_backend . not then Error.throw (Illegal_Argument.Error "Currently cross-backend joins are not supported. You need to upload the in-memory table before joining it with a database one, or materialize this table.") else
|
if join_conditions_ok . not then Error.throw (Illegal_Argument.Error "Cross join does not allow join conditions") else
|
||||||
if join_conditions_ok . not then Error.throw (Illegal_Argument.Error "Cross join does not allow join conditions") else
|
True
|
||||||
True
|
can_proceed.if_not_error <|
|
||||||
if can_proceed then
|
|
||||||
left = self
|
left = self
|
||||||
table_name_deduplicator = self.connection.base_connection.table_naming_helper.create_unique_name_strategy
|
table_name_deduplicator = self.connection.base_connection.table_naming_helper.create_unique_name_strategy
|
||||||
table_name_deduplicator.mark_used [left.name, right.name]
|
table_name_deduplicator.mark_used [left.name, right.name]
|
||||||
@ -1314,14 +1310,13 @@ type Table
|
|||||||
|
|
||||||
The ordering of rows in the resulting table is not specified.
|
The ordering of rows in the resulting table is not specified.
|
||||||
cross_join : Table -> Integer | Nothing -> Text -> Problem_Behavior -> Table
|
cross_join : Table -> Integer | Nothing -> Text -> Problem_Behavior -> Table
|
||||||
cross_join self right right_row_limit=100 right_prefix="Right " on_problems=Report_Warning =
|
cross_join self right:Table right_row_limit=100 right_prefix="Right " on_problems=Report_Warning =
|
||||||
if check_db_table "right" right then
|
limit_problems = case right_row_limit.is_nothing.not && (right.row_count > right_row_limit) of
|
||||||
limit_problems = case right_row_limit.is_nothing.not && (right.row_count > right_row_limit) of
|
True ->
|
||||||
True ->
|
[Cross_Join_Row_Limit_Exceeded.Error right_row_limit right.row_count]
|
||||||
[Cross_Join_Row_Limit_Exceeded.Error right_row_limit right.row_count]
|
False -> []
|
||||||
False -> []
|
on_problems.attach_problems_before limit_problems <|
|
||||||
on_problems.attach_problems_before limit_problems <|
|
self.join_or_cross_join right join_kind=Join_Kind_Cross.Cross on=[] right_prefix on_problems
|
||||||
self.join_or_cross_join right join_kind=Join_Kind_Cross.Cross on=[] right_prefix on_problems
|
|
||||||
|
|
||||||
## Replaces values in this table by values from a lookup table.
|
## Replaces values in this table by values from a lookup table.
|
||||||
New values are looked up in the lookup table based on the `key_columns`.
|
New values are looked up in the lookup table based on the `key_columns`.
|
||||||
@ -1358,8 +1353,8 @@ type Table
|
|||||||
specified in `key_columns`, a `Missing_Input_Columns` error is raised.
|
specified in `key_columns`, a `Missing_Input_Columns` error is raised.
|
||||||
- If an empty vector is provided for `key_columns`, a
|
- If an empty vector is provided for `key_columns`, a
|
||||||
`No_Input_Columns_Selected` error is raised.
|
`No_Input_Columns_Selected` error is raised.
|
||||||
- If the lookup table contains multiple rows with the same values in
|
- If a single row is matched by multiple entries in the lookup table,
|
||||||
the `key_columns`, an `Non_Unique_Key` error is raised.
|
a `Non_Unique_Key` error is raised.
|
||||||
- If a column that is being updated from the lookup table has a type
|
- If a column that is being updated from the lookup table has a type
|
||||||
that is not compatible with the type of the corresponding column in
|
that is not compatible with the type of the corresponding column in
|
||||||
this table, a `No_Common_Type` error is raised.
|
this table, a `No_Common_Type` error is raised.
|
||||||
@ -1377,8 +1372,8 @@ type Table
|
|||||||
@key_columns Widget_Helpers.make_column_name_vector_selector
|
@key_columns Widget_Helpers.make_column_name_vector_selector
|
||||||
lookup_and_replace : Table -> (Vector (Integer | Text | Regex) | Text | Integer | Regex) -> Boolean -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup
|
lookup_and_replace : Table -> (Vector (Integer | Text | Regex) | Text | Integer | Regex) -> Boolean -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup
|
||||||
lookup_and_replace self lookup_table:Table key_columns:(Vector (Integer | Text | Regex) | Text | Integer | Regex) add_new_columns:Boolean=True allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning =
|
lookup_and_replace self lookup_table:Table key_columns:(Vector (Integer | Text | Regex) | Text | Integer | Regex) add_new_columns:Boolean=True allow_unmatched_rows:Boolean=True on_problems:Problem_Behavior=Problem_Behavior.Report_Warning =
|
||||||
_ = [lookup_table, key_columns, add_new_columns, allow_unmatched_rows, on_problems]
|
Helpers.ensure_same_connection "table" [self, lookup_table] <|
|
||||||
Error.throw (Unsupported_Database_Operation.Error "Table.lookup_and_replace is not implemented yet for the Database backends.")
|
Lookup_Query_Helper.build_lookup_query self lookup_table key_columns add_new_columns allow_unmatched_rows on_problems
|
||||||
|
|
||||||
## ALIAS join by row position
|
## ALIAS join by row position
|
||||||
GROUP Standard.Base.Calculations
|
GROUP Standard.Base.Calculations
|
||||||
@ -1522,11 +1517,11 @@ type Table
|
|||||||
retyped to the `Mixed` type to indicate that intention. Note that the
|
retyped to the `Mixed` type to indicate that intention. Note that the
|
||||||
`Mixed` type may not be supported by most Database backends.
|
`Mixed` type may not be supported by most Database backends.
|
||||||
union : (Table | Vector Table) -> Match_Columns -> Boolean | Report_Unmatched -> Boolean -> Problem_Behavior -> Table
|
union : (Table | Vector Table) -> Match_Columns -> Boolean | Report_Unmatched -> Boolean -> Problem_Behavior -> Table
|
||||||
union self tables match_columns=Match_Columns.By_Name keep_unmatched_columns=Report_Unmatched allow_type_widening=True on_problems=Report_Warning =
|
union self tables:Vector|Table match_columns=Match_Columns.By_Name keep_unmatched_columns=Report_Unmatched allow_type_widening=True on_problems=Report_Warning =
|
||||||
all_tables = case tables of
|
all_tables = case tables of
|
||||||
v : Vector -> [self] + v
|
v : Vector -> [self] + (v.map t-> Table.from t)
|
||||||
single_table -> [self, single_table]
|
single_table -> [self, single_table]
|
||||||
all_tables.all (check_db_table "tables") . if_not_error <|
|
Helpers.ensure_same_connection "table" all_tables <|
|
||||||
problem_builder = Problem_Builder.new
|
problem_builder = Problem_Builder.new
|
||||||
matched_column_sets = Match_Columns_Helpers.match_columns all_tables match_columns keep_unmatched_columns problem_builder
|
matched_column_sets = Match_Columns_Helpers.match_columns all_tables match_columns keep_unmatched_columns problem_builder
|
||||||
dialect = self.connection.dialect
|
dialect = self.connection.dialect
|
||||||
@ -2154,7 +2149,8 @@ type Table
|
|||||||
False ->
|
False ->
|
||||||
sql = preprocessed.to_sql
|
sql = preprocessed.to_sql
|
||||||
column_type_suggestions = preprocessed.internal_columns.map .sql_type_reference
|
column_type_suggestions = preprocessed.internal_columns.map .sql_type_reference
|
||||||
materialized_table = self.connection.read_statement sql column_type_suggestions
|
materialized_table = self.connection.read_statement sql column_type_suggestions . catch SQL_Error sql_error->
|
||||||
|
Error.throw (self.connection.dialect.get_error_mapper.transform_custom_errors sql_error)
|
||||||
|
|
||||||
expected_types = self.columns.map .value_type
|
expected_types = self.columns.map .value_type
|
||||||
actual_types = materialized_table.columns.map .value_type
|
actual_types = materialized_table.columns.map .value_type
|
||||||
@ -2527,19 +2523,6 @@ type Wrapped_Error
|
|||||||
## PRIVATE
|
## PRIVATE
|
||||||
Value value
|
Value value
|
||||||
|
|
||||||
## PRIVATE
|
|
||||||
Checks if the argument is a proper table and comes from the current backend.
|
|
||||||
It returns True or throws a dataflow error explaining the issue.
|
|
||||||
|
|
||||||
TODO [RW]: this should check that the tables are on the same connection
|
|
||||||
check_db_table arg_name table =
|
|
||||||
if Table_Helpers.is_table table . not then Error.throw (Type_Error.Error Table table arg_name) else
|
|
||||||
same_backend = table.is_a Table
|
|
||||||
case same_backend of
|
|
||||||
False ->
|
|
||||||
Error.throw (Illegal_Argument.Error "Currently cross-backend operations are not supported. Materialize the table using `.read` before mixing it with an in-memory Table.")
|
|
||||||
True -> True
|
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
By default, join on the first column, unless it's a cross join, in which
|
By default, join on the first column, unless it's a cross join, in which
|
||||||
case there are no join conditions.
|
case there are no join conditions.
|
||||||
@ -2553,5 +2536,10 @@ Materialized_Table.from (that:Table) =
|
|||||||
_ = [that]
|
_ = [that]
|
||||||
Error.throw (Illegal_Argument.Error "Currently cross-backend operations are not supported. Materialize the table using `.read` before mixing it with an in-memory Table.")
|
Error.throw (Illegal_Argument.Error "Currently cross-backend operations are not supported. Materialize the table using `.read` before mixing it with an in-memory Table.")
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Table.from (that:Materialized_Table) =
|
||||||
|
_ = [that]
|
||||||
|
Error.throw (Illegal_Argument.Error "Currently cross-backend operations are not supported. Either materialize the other table using `.read` or upload the table into the database using `.select_into_database_table`.")
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
Table_Ref.from (that:Table) = Table_Ref.Value that
|
Table_Ref.from (that:Table) = Table_Ref.Value that
|
||||||
|
@ -203,3 +203,27 @@ type Unsupported_Database_Encoding
|
|||||||
Pretty print the unsupported database encoding warning.
|
Pretty print the unsupported database encoding warning.
|
||||||
to_display_text : Text
|
to_display_text : Text
|
||||||
to_display_text self = self.message
|
to_display_text self = self.message
|
||||||
|
|
||||||
|
## Indicates that the database has been modified between the time the query was
|
||||||
|
prepared and the time it was executed, breaking an expected invariant and
|
||||||
|
potentially causing data corruption.
|
||||||
|
|
||||||
|
The query needs to be rebuilt again.
|
||||||
|
type Invariant_Violation
|
||||||
|
## PRIVATE
|
||||||
|
Indicates that the database has been modified between the time the query
|
||||||
|
was prepared and the time it was executed, breaking an expected invariant
|
||||||
|
and potentially causing data corruption.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- message: A message describing the broken invariant, if available.
|
||||||
|
It may be set to `Nothing` if the backend cannot decode the invariant
|
||||||
|
message from the SQL error.
|
||||||
|
- original_cause: The original SQL error that this error has been
|
||||||
|
translated from.
|
||||||
|
Error (message:Text|Nothing) (original_cause:SQL_Error)
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
to_display_text : Text
|
||||||
|
to_display_text self =
|
||||||
|
"The database has been modified between the time the query was prepared and executed, leading to possibly incorrect results. Please re-run the workflow to retry."
|
||||||
|
@ -193,7 +193,7 @@ base_dialect =
|
|||||||
nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
|
nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
|
||||||
contains = [["IS_IN", make_is_in], ["IS_IN_COLUMN", make_is_in_column]]
|
contains = [["IS_IN", make_is_in], ["IS_IN_COLUMN", make_is_in_column]]
|
||||||
types = [simple_cast]
|
types = [simple_cast]
|
||||||
windows = [["ROW_NUMBER", make_row_number]]
|
windows = [["ROW_NUMBER", make_row_number], ["ROW_NUMBER_IN_GROUP", make_row_number_in_group]]
|
||||||
base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains + types + windows)
|
base_map = Map.from_vector (arith + logic + compare + functions + agg + counts + text + nulls + contains + types + windows)
|
||||||
Internal_Dialect.Value base_map wrap_in_quotes
|
Internal_Dialect.Value base_map wrap_in_quotes
|
||||||
|
|
||||||
@ -219,7 +219,7 @@ make_iif arguments = case arguments.length of
|
|||||||
An IR expression for constructing SQL `CASE` expressions.
|
An IR expression for constructing SQL `CASE` expressions.
|
||||||
case_when : Vector Builder -> Builder
|
case_when : Vector Builder -> Builder
|
||||||
case_when arguments =
|
case_when arguments =
|
||||||
if arguments.length < 4 then Error.throw (Illegal_State.Error "CASE_WHEN needs at least 3 arguments.") else
|
if arguments.length < 3 then Error.throw (Illegal_State.Error "CASE_WHEN needs at least 3 arguments.") else
|
||||||
fallback = arguments.last
|
fallback = arguments.last
|
||||||
cases = arguments.drop (Last 1)
|
cases = arguments.drop (Last 1)
|
||||||
if cases.length % 2 != 0 then Error.throw (Illegal_State.Error "CASE_WHEN expects an odd number of arguments (two arguments for each case and a fallback).") else
|
if cases.length % 2 != 0 then Error.throw (Illegal_State.Error "CASE_WHEN expects an odd number of arguments (two arguments for each case and a fallback).") else
|
||||||
@ -286,6 +286,14 @@ make_row_number (arguments : Vector) (metadata : Row_Number_Metadata) = if argum
|
|||||||
Builder.code "PARTITION BY " ++ Builder.join ", " grouping
|
Builder.code "PARTITION BY " ++ Builder.join ", " grouping
|
||||||
Builder.code "(row_number() OVER (" ++ group_part ++ " ORDER BY " ++ Builder.join ", " ordering ++ ") * " ++ step.paren ++ " + " ++ offset.paren ++ ")"
|
Builder.code "(row_number() OVER (" ++ group_part ++ " ORDER BY " ++ Builder.join ", " ordering ++ ") * " ++ step.paren ++ " + " ++ offset.paren ++ ")"
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
A helper for `lookup_and_replace`, and perhaps other operation.
|
||||||
|
It creates an expression that returns a row number within a group.
|
||||||
|
make_row_number_in_group arguments =
|
||||||
|
if arguments.length == 0 then
|
||||||
|
Panic.throw <| Illegal_State.Error "The operation ROW_NUMBER_IN_GROUP requires at least one argument."
|
||||||
|
Builder.code "row_number() OVER (PARTITION BY " ++ (Builder.join ", " arguments) ++ ")"
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
|
|
||||||
Builds code for an expression.
|
Builds code for an expression.
|
||||||
@ -299,6 +307,9 @@ generate_expression dialect expr = case expr of
|
|||||||
dialect.wrap_identifier origin ++ '.' ++ dialect.wrap_identifier name
|
dialect.wrap_identifier origin ++ '.' ++ dialect.wrap_identifier name
|
||||||
SQL_Expression.Constant value -> Builder.interpolation value
|
SQL_Expression.Constant value -> Builder.interpolation value
|
||||||
SQL_Expression.Literal value -> Builder.code value
|
SQL_Expression.Literal value -> Builder.code value
|
||||||
|
SQL_Expression.Text_Literal value ->
|
||||||
|
escaped = value.replace "'" "''"
|
||||||
|
Builder.code ("'" + escaped + "'")
|
||||||
SQL_Expression.Operation kind arguments metadata ->
|
SQL_Expression.Operation kind arguments metadata ->
|
||||||
op = dialect.operation_map.get kind (Error.throw <| Unsupported_Database_Operation.Error kind)
|
op = dialect.operation_map.get kind (Error.throw <| Unsupported_Database_Operation.Error kind)
|
||||||
parsed_args = arguments.map (generate_expression dialect)
|
parsed_args = arguments.map (generate_expression dialect)
|
||||||
|
@ -0,0 +1,239 @@
|
|||||||
|
from Standard.Base import all
|
||||||
|
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||||
|
from Standard.Base.Runtime import assert
|
||||||
|
|
||||||
|
import Standard.Table.Internal.Lookup_Helpers
|
||||||
|
import Standard.Table.Internal.Lookup_Helpers.Lookup_Column
|
||||||
|
from Standard.Table import Join_Kind, Value_Type
|
||||||
|
from Standard.Table.Errors import all
|
||||||
|
|
||||||
|
import project.Data.Table.Table
|
||||||
|
import project.Internal.IR.Context.Context
|
||||||
|
import project.Internal.IR.From_Spec.From_Spec
|
||||||
|
import project.Internal.IR.Internal_Column.Internal_Column
|
||||||
|
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||||
|
import project.Internal.IR.SQL_Join_Kind.SQL_Join_Kind
|
||||||
|
import project.Internal.SQL_Type_Reference.SQL_Type_Reference
|
||||||
|
from project.Internal.Upload_Table import check_for_null_keys
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Implementation of `lookup_and_replace` for Database backend.
|
||||||
|
See `Table.lookup_and_replace` for more details.
|
||||||
|
build_lookup_query : Table -> Table -> (Vector (Integer | Text | Regex) | Text | Integer | Regex) -> Boolean -> Boolean -> Problem_Behavior -> Table ! Missing_Input_Columns | Non_Unique_Key | Unmatched_Rows_In_Lookup
|
||||||
|
build_lookup_query base_table lookup_table key_columns add_new_columns allow_unmatched_rows on_problems =
|
||||||
|
lookup_columns = Lookup_Helpers.prepare_columns_for_lookup base_table lookup_table key_columns add_new_columns allow_unmatched_rows on_problems
|
||||||
|
lookup_columns.if_not_error <| check_initial_invariants base_table lookup_table lookup_columns allow_unmatched_rows <|
|
||||||
|
column_naming_helper = base_table.connection.base_connection.column_naming_helper
|
||||||
|
unique_name_strategy = column_naming_helper.create_unique_name_strategy
|
||||||
|
unique_name_strategy.mark_used base_table.column_names
|
||||||
|
unique_name_strategy.mark_used lookup_table.column_names
|
||||||
|
|
||||||
|
subquery_setup = prepare_subqueries base_table lookup_table lookup_columns unique_name_strategy
|
||||||
|
|
||||||
|
# We need to construct the context _before_ constructing the columns, to be able to use it in column construction (for inferring types).
|
||||||
|
new_ctx = make_context_for_lookup_join lookup_columns subquery_setup
|
||||||
|
infer_type_in_result expr =
|
||||||
|
SQL_Type_Reference.new base_table.connection new_ctx expr
|
||||||
|
|
||||||
|
## TODO [RW] here we will perform as many fetches as there are
|
||||||
|
Replace_Column instances, but technically we could perform just
|
||||||
|
one fetch fetching all column types - TODO we should do that.
|
||||||
|
See #6118.
|
||||||
|
new_columns = lookup_columns.map_with_index ix-> c-> case c of
|
||||||
|
Lookup_Column.Key_Column _ _ -> subquery_setup.get_self_column ix
|
||||||
|
Lookup_Column.Keep_Column _ -> subquery_setup.get_self_column ix
|
||||||
|
Lookup_Column.Replace_Column _ _ expected_type ->
|
||||||
|
dialect = base_table.connection.dialect
|
||||||
|
subquery_setup.create_merged_column ix expected_type dialect infer_type_in_result allow_unmatched_rows
|
||||||
|
Lookup_Column.Add_Column _ -> subquery_setup.get_lookup_column ix
|
||||||
|
|
||||||
|
## Originally, I wanted to add invariant checks to all columns (or some of them),
|
||||||
|
but storing it in WHERE has 2 benefits:
|
||||||
|
1. The query is actually _simpler_ - instead of having the check in the expression for _every_ column
|
||||||
|
or multiple columns, we have it only once.
|
||||||
|
2. We have a guarantee that the query optimizer will not remove it (which could happen if it was a separate
|
||||||
|
column, or was added only to some column and these columns got removed in subsequent steps) - which is
|
||||||
|
paramount to ensuring that our rows will not get duplicated in case the key uniqueness gets violated.
|
||||||
|
new_ctx_with_invariant_check = new_ctx.add_where_filters [make_invariant_check subquery_setup.lookup_counter allow_unmatched_rows]
|
||||||
|
|
||||||
|
precheck_for_duplicate_matches lookup_columns subquery_setup base_table.connection new_ctx <|
|
||||||
|
Table.Value subquery_setup.new_table_name base_table.connection new_columns new_ctx_with_invariant_check
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Checks if they key contains NULL values or if there would be unmatched rows
|
||||||
|
(if `allow_unmatched_rows` is `False`), and reports any errors.
|
||||||
|
check_initial_invariants : Table -> Table -> Vector Lookup_Column -> Boolean -> Any -> Any
|
||||||
|
check_initial_invariants base_table lookup_table lookup_columns allow_unmatched_rows ~continuation =
|
||||||
|
key_column_names = lookup_columns.flat_map c-> case c of
|
||||||
|
Lookup_Column.Key_Column base_column lookup_column ->
|
||||||
|
assert (base_column.name == lookup_column.name) "Columns should have equal names."
|
||||||
|
[lookup_column.name]
|
||||||
|
_ -> []
|
||||||
|
check_for_null_keys lookup_table key_column_names <|
|
||||||
|
if allow_unmatched_rows then continuation else
|
||||||
|
unmatched_rows = base_table.join lookup_table on=key_column_names join_kind=Join_Kind.Left_Exclusive . select_columns key_column_names
|
||||||
|
unmatched_example = unmatched_rows.read max_rows=1
|
||||||
|
if unmatched_example.row_count == 0 then continuation else
|
||||||
|
first_row = unmatched_example.rows.first
|
||||||
|
Error.throw (Unmatched_Rows_In_Lookup.Error first_row.to_vector)
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Prepares the lookup table counter that is used for two things:
|
||||||
|
1. It allows us to check if a given row had found a match in the lookup row
|
||||||
|
and decide which value to use in `Replace_Column` case (the counter will
|
||||||
|
be `1` if there is a match and `NULL` if there is not).
|
||||||
|
2. It allows us to find duplicate matches - if a row with counter >1 is found,
|
||||||
|
that means that a single row has matched multiple rows in the lookup table
|
||||||
|
and we should report an error.
|
||||||
|
make_lookup_counter_column dialect lookup_columns unique_name_strategy =
|
||||||
|
grouping_expressions = lookup_columns.flat_map c-> case c of
|
||||||
|
Lookup_Column.Key_Column _ lookup_column -> [lookup_column.expression]
|
||||||
|
_ -> []
|
||||||
|
row_number_expression = SQL_Expression.Operation "ROW_NUMBER_IN_GROUP" grouping_expressions
|
||||||
|
sql_type = dialect.get_type_mapping.value_type_to_sql Value_Type.Integer Problem_Behavior.Ignore
|
||||||
|
Internal_Column.Value (unique_name_strategy.make_unique "lookup_counter") (SQL_Type_Reference.from_constant sql_type) row_number_expression
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Gathers information about prepared subqueries in one place - to make it
|
||||||
|
easier to pass this context between various helper functions.
|
||||||
|
|
||||||
|
The setup is prepared in such a way, that for each `Lookup_Column` we have a
|
||||||
|
vector of base table columns and lookup table columns corresponding to it.
|
||||||
|
Depending on the type of the `Lookup_Column`, one of these vectors may be
|
||||||
|
empty. The helper functions `get_self_column` and `get_lookup_column` allow
|
||||||
|
easy access of the corresponding columns for the given index - this should be
|
||||||
|
used with `lookup_columns.map_with_index`.
|
||||||
|
type Lookup_Subquery_Setup
|
||||||
|
## PRIVATE
|
||||||
|
Value self_sub lookup_sub lookup_counter new_table_name
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
get_self_column self ix =
|
||||||
|
self.self_sub.new_columns.at ix . first
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
get_lookup_column self ix =
|
||||||
|
self.lookup_sub.new_columns.at ix . first
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Creates an `Internal_Column` representing the `Replace_Column` scenario:
|
||||||
|
taking data from lookup table if it was matched, and from base table
|
||||||
|
otherwise.
|
||||||
|
This method also ensure that the column has the expected type, unifying
|
||||||
|
types of the two sources.
|
||||||
|
create_merged_column self ix expected_type dialect infer_type_in_result allow_unmatched_rows =
|
||||||
|
self_col = self.get_self_column ix
|
||||||
|
lookup_col = self.get_lookup_column ix
|
||||||
|
is_lookup_found = SQL_Expression.Operation "==" [self.lookup_counter.expression, SQL_Expression.Literal "1"]
|
||||||
|
expression = case allow_unmatched_rows of
|
||||||
|
True -> SQL_Expression.Operation "CASE" [is_lookup_found, lookup_col.expression, self_col.expression]
|
||||||
|
False -> Error.throw (Illegal_State.Error "Assumed that prepare_columns_for_lookup never returns Replace_Column if allow_unmatched_rows=False. This is a bug in the Database library.")
|
||||||
|
input_column = Internal_Column.Value self_col.name (infer_type_in_result expression) expression
|
||||||
|
adapted = dialect.adapt_unified_column input_column expected_type infer_type_in_result
|
||||||
|
Internal_Column.Value self_col.name adapted.sql_type_reference adapted.expression
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Wraps the two source tables into subqueries and creates a set of new columns
|
||||||
|
(encapsulated as `Lookup_Subquery_Setup`), together with the `lookup_counter`
|
||||||
|
column (see `make_lookup_counter_column`), that are valid in the context of a
|
||||||
|
lookup join query (it translates the source columns valid in the input
|
||||||
|
contexts, to the external join context).
|
||||||
|
prepare_subqueries base_table lookup_table lookup_columns unique_name_strategy =
|
||||||
|
table_name_deduplicator = base_table.connection.base_connection.table_naming_helper.create_unique_name_strategy
|
||||||
|
self_alias = table_name_deduplicator.make_unique base_table.name
|
||||||
|
lookup_alias = table_name_deduplicator.make_unique lookup_table.name
|
||||||
|
new_table_name = table_name_deduplicator.make_unique <|
|
||||||
|
base_table.name + "_" + lookup_table.name
|
||||||
|
|
||||||
|
lookup_counter_base = make_lookup_counter_column lookup_table.connection.dialect lookup_columns unique_name_strategy
|
||||||
|
|
||||||
|
self_requested_columns = Vector.new_builder
|
||||||
|
lookup_requested_columns = Vector.new_builder
|
||||||
|
|
||||||
|
lookup_columns.each c-> case c of
|
||||||
|
Lookup_Column.Key_Column base_column key_column ->
|
||||||
|
self_requested_columns.append [base_column]
|
||||||
|
lookup_requested_columns.append [key_column]
|
||||||
|
Lookup_Column.Keep_Column base_column ->
|
||||||
|
self_requested_columns.append [base_column]
|
||||||
|
lookup_requested_columns.append []
|
||||||
|
Lookup_Column.Replace_Column base_column lookup_column _ ->
|
||||||
|
self_requested_columns.append [base_column]
|
||||||
|
lookup_requested_columns.append [lookup_column]
|
||||||
|
Lookup_Column.Add_Column lookup_column ->
|
||||||
|
self_requested_columns.append []
|
||||||
|
lookup_requested_columns.append [lookup_column]
|
||||||
|
|
||||||
|
self_sub = base_table.context.as_subquery self_alias self_requested_columns.to_vector
|
||||||
|
lookup_sub = lookup_table.context.as_subquery lookup_alias lookup_requested_columns.to_vector+[[lookup_counter_base]]
|
||||||
|
lookup_counter = lookup_sub.new_columns.at -1 . first
|
||||||
|
|
||||||
|
Lookup_Subquery_Setup.Value self_sub lookup_sub lookup_counter new_table_name
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Creates a context for the lookup join query.
|
||||||
|
make_context_for_lookup_join lookup_columns subquery_setup =
|
||||||
|
on_expressions = (_.flatten) <| lookup_columns.map_with_index ix-> c-> case c of
|
||||||
|
Lookup_Column.Key_Column _ _ ->
|
||||||
|
self_col = subquery_setup.get_self_column ix
|
||||||
|
lookup_col = subquery_setup.get_lookup_column ix
|
||||||
|
[SQL_Expression.Operation "==" [self_col.expression, lookup_col.expression]]
|
||||||
|
_ -> []
|
||||||
|
|
||||||
|
new_from = From_Spec.Join SQL_Join_Kind.Left subquery_setup.self_sub.subquery subquery_setup.lookup_sub.subquery on_expressions
|
||||||
|
Context.for_subquery new_from
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Runs a query that checks if there are any duplicate matches in the lookup
|
||||||
|
result. The query tries to minimize the amount of work and data that is
|
||||||
|
transferred, but it may not be cheap. It is however needed to be able to
|
||||||
|
report errors early.
|
||||||
|
precheck_for_duplicate_matches lookup_columns subquery_setup connection new_ctx ~continuation =
|
||||||
|
key_columns_for_duplicate_check = (_.flatten) <| lookup_columns.map_with_index ix-> c-> case c of
|
||||||
|
Lookup_Column.Key_Column _ _ -> [subquery_setup.get_self_column ix]
|
||||||
|
_ -> []
|
||||||
|
table_for_duplicate_check = Table.Value subquery_setup.new_table_name connection [subquery_setup.lookup_counter]+key_columns_for_duplicate_check new_ctx
|
||||||
|
duplicate_lookup_matches = table_for_duplicate_check.filter 0 (Filter_Condition.Greater than=1) . read max_rows=1
|
||||||
|
case duplicate_lookup_matches.row_count > 0 of
|
||||||
|
True ->
|
||||||
|
first_example_row = duplicate_lookup_matches.read.rows.first.to_vector
|
||||||
|
duplicate_example_count = first_example_row.first
|
||||||
|
duplicate_example_key = first_example_row.drop 1
|
||||||
|
Error.throw (Non_Unique_Key.Error (key_columns_for_duplicate_check.map .name) duplicate_example_key duplicate_example_count)
|
||||||
|
False -> continuation
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Creates an expression that can be baked into the query (preferably in such
|
||||||
|
a way that it will not get removed after query transformations), that will
|
||||||
|
fail if the 'key uniqueness' invariant and the 'all rows matching'
|
||||||
|
(if `allow_unmatched_rows=False`) are violated when the query is being
|
||||||
|
materialized.
|
||||||
|
|
||||||
|
This is needed, because the initial checks are run when the query is built,
|
||||||
|
but there is no guarantee that the table will not be changed before the query
|
||||||
|
is actually materialized. If in the meantime, the data is modified and
|
||||||
|
invariants are broken, the query may return completely unexpected results
|
||||||
|
(e.g. rows of the source table could get duplicated, which should never
|
||||||
|
happen with `lookup_and_replace`).
|
||||||
|
|
||||||
|
This additional checks ensures that if the invariants are broken, the query
|
||||||
|
will fail instead of returning corrupted data. Its error reporting may be
|
||||||
|
limited in comparison to the initial checks which may compute more
|
||||||
|
information.
|
||||||
|
make_invariant_check lookup_counter allow_unmatched_rows =
|
||||||
|
make_error message =
|
||||||
|
SQL_Expression.Operation "RUNTIME_ERROR" [SQL_Expression.Text_Literal "lookup_and_replace: "+message, lookup_counter.expression]
|
||||||
|
|
||||||
|
exact_match =
|
||||||
|
is_matched = SQL_Expression.Operation "==" [lookup_counter.expression, SQL_Expression.Literal "1"]
|
||||||
|
[is_matched, SQL_Expression.Literal "TRUE"]
|
||||||
|
unmatched =
|
||||||
|
is_unmatched = SQL_Expression.Operation "IS_NULL" [lookup_counter.expression]
|
||||||
|
case allow_unmatched_rows of
|
||||||
|
True -> [is_unmatched, SQL_Expression.Literal "TRUE"]
|
||||||
|
False -> [is_unmatched, make_error "Some rows have no matches in the lookup table."]
|
||||||
|
|
||||||
|
# This will be matched if the first branches do not match - this should happen if there is more than 1 match for a row.
|
||||||
|
fallback = [make_error "Duplicate matches found in the lookup table."]
|
||||||
|
|
||||||
|
SQL_Expression.Operation "CASE" unmatched+exact_match+fallback
|
@ -15,3 +15,11 @@ type Error_Mapper
|
|||||||
is_primary_key_violation error =
|
is_primary_key_violation error =
|
||||||
_ = error
|
_ = error
|
||||||
Unimplemented.throw "This is an interface only."
|
Unimplemented.throw "This is an interface only."
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Called by `Table.read`, allowing the dialect to transform a generic
|
||||||
|
`SQL_Error` into a more specific error type, if applicable.
|
||||||
|
transform_custom_errors : SQL_Error -> Any
|
||||||
|
transform_custom_errors error =
|
||||||
|
_ = error
|
||||||
|
Unimplemented.throw "This is an interface only."
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
|
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||||
|
from Standard.Base.Runtime import assert
|
||||||
|
|
||||||
import project.Data.Column.Column
|
import project.Data.Column.Column
|
||||||
import project.Data.Table.Table
|
import project.Data.Table.Table
|
||||||
@ -32,7 +34,17 @@ check_integrity entity1 entity2 =
|
|||||||
- entity2: The entity to check against the first.
|
- entity2: The entity to check against the first.
|
||||||
check_connection : (Table | Column) -> (Table | Column) -> Boolean
|
check_connection : (Table | Column) -> (Table | Column) -> Boolean
|
||||||
check_connection entity1 entity2 =
|
check_connection entity1 entity2 =
|
||||||
Meta.is_same_object entity1.connection entity2.connection
|
# The `if_not_error` is needed `Meta.is_same_object` does not forward dataflow errors.
|
||||||
|
entity1.if_not_error <| entity2.if_not_error <|
|
||||||
|
Meta.is_same_object entity1.connection entity2.connection
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
ensure_same_connection : Text -> Vector -> Any -> Any ! Illegal_Argument
|
||||||
|
ensure_same_connection name entities ~continuation =
|
||||||
|
assert entities.not_empty
|
||||||
|
all_same = entities.all entity-> check_connection entity entities.first
|
||||||
|
if all_same then continuation else
|
||||||
|
Error.throw (Illegal_Argument.Error "The input "+name+"s come from different connections. Cross-connection operations are not supported.")
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
|
|
||||||
|
@ -36,6 +36,18 @@ type SQL_Expression
|
|||||||
as-is into a query.
|
as-is into a query.
|
||||||
Literal (value : Text)
|
Literal (value : Text)
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
A text literal that should be inserted into a query, wrapped in quotes
|
||||||
|
with any quotes it may contain escaped.
|
||||||
|
|
||||||
|
This is useful to avoid requiring unnecessary interpolations for text
|
||||||
|
constants in queries.
|
||||||
|
|
||||||
|
User-provided values should always be passed as interpolated `Constant`s.
|
||||||
|
This should only be used for 'statically' known constants - ones baked
|
||||||
|
into the library source code.
|
||||||
|
Text_Literal (value : Text)
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
|
|
||||||
The internal representation of an SQL expression built from an operation
|
The internal representation of an SQL expression built from an operation
|
||||||
|
@ -297,7 +297,8 @@ make_internal_generator_dialect =
|
|||||||
stats = [agg_median, agg_mode, agg_percentile, stddev_pop, stddev_samp]
|
stats = [agg_median, agg_mode, agg_percentile, stddev_pop, stddev_samp]
|
||||||
date_ops = [make_extract_as_int "year", make_extract_as_int "quarter", make_extract_as_int "month", make_extract_as_int "week", make_extract_as_int "day", make_extract_as_int "hour", make_extract_as_int "minute", make_extract_fractional_as_int "second", make_extract_fractional_as_int "millisecond" modulus=1000, make_extract_fractional_as_int "microsecond" modulus=1000, ["date_add", make_date_add], ["date_diff", make_date_diff]]
|
date_ops = [make_extract_as_int "year", make_extract_as_int "quarter", make_extract_as_int "month", make_extract_as_int "week", make_extract_as_int "day", make_extract_as_int "hour", make_extract_as_int "minute", make_extract_fractional_as_int "second", make_extract_fractional_as_int "millisecond" modulus=1000, make_extract_fractional_as_int "microsecond" modulus=1000, ["date_add", make_date_add], ["date_diff", make_date_diff]]
|
||||||
special_overrides = [is_null, is_empty]
|
special_overrides = [is_null, is_empty]
|
||||||
my_mappings = text + counts + stats + first_last_aggregators + arith_extensions + bool + date_ops + special_overrides
|
other = [["RUNTIME_ERROR", make_runtime_error_op]]
|
||||||
|
my_mappings = text + counts + stats + first_last_aggregators + arith_extensions + bool + date_ops + special_overrides + other
|
||||||
Base_Generator.base_dialect . extend_with my_mappings
|
Base_Generator.base_dialect . extend_with my_mappings
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
@ -727,3 +728,26 @@ as_int32 expr =
|
|||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
postgres_statement_setter = Statement_Setter.default
|
postgres_statement_setter = Statement_Setter.default
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
The RUNTIME_ERROR operation should allow the query to compile fine and it
|
||||||
|
will not prevent it from running if the branch including this operation is
|
||||||
|
not taken. But if the branch is computed, it should ensure the query fails.
|
||||||
|
|
||||||
|
This query never returns a value, so its type should be polymorphic. However,
|
||||||
|
that is not possible - so currently it just 'pretends' that it would return a
|
||||||
|
Boolean - because that is the type we expect in the use-case. This can be
|
||||||
|
altered if needed.
|
||||||
|
|
||||||
|
It takes a variable as the second argument. It can be any value that is not
|
||||||
|
statically known - this ensure that the optimizer will not be able to
|
||||||
|
pre-compute the expression too early (which could make the query fail
|
||||||
|
spuriously). See `make_invariant_check` in `Lookup_Query_Helper` for an
|
||||||
|
example.
|
||||||
|
make_runtime_error_op arguments =
|
||||||
|
if arguments.length != 2 then
|
||||||
|
Panic.throw (Illegal_Argument.Error "RUNTIME_ERROR takes exactly 2 arguments (error message and a variable to ensure deferred execution).")
|
||||||
|
error_message = arguments.at 0
|
||||||
|
variable_to_defer = arguments.at 1
|
||||||
|
|
||||||
|
Builder.code "CAST('[ENSO INVARIANT VIOLATED: '||" ++ error_message ++ "||'] '||COALESCE(" ++ variable_to_defer ++ "::TEXT,'NULL') AS BOOLEAN)"
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
|
|
||||||
from project.Errors import SQL_Error
|
from project.Errors import SQL_Error, Invariant_Violation
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
type Postgres_Error_Mapper
|
type Postgres_Error_Mapper
|
||||||
@ -9,3 +9,12 @@ type Postgres_Error_Mapper
|
|||||||
is_primary_key_violation : SQL_Error -> Boolean
|
is_primary_key_violation : SQL_Error -> Boolean
|
||||||
is_primary_key_violation error =
|
is_primary_key_violation error =
|
||||||
error.java_exception.getMessage.contains "duplicate key value violates unique constraint"
|
error.java_exception.getMessage.contains "duplicate key value violates unique constraint"
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
transform_custom_errors : SQL_Error -> Any
|
||||||
|
transform_custom_errors error =
|
||||||
|
message = error.java_exception.getMessage
|
||||||
|
if message.contains "ENSO INVARIANT VIOLATED" . not then error else
|
||||||
|
payloads = message.tokenize "\[ENSO INVARIANT VIOLATED: (.*)\]"
|
||||||
|
if payloads.length != 1 then error else
|
||||||
|
Invariant_Violation.Error payloads.first error
|
||||||
|
@ -286,9 +286,10 @@ make_internal_generator_dialect =
|
|||||||
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
|
counts = [agg_count_is_null, agg_count_empty, agg_count_not_empty, ["COUNT_DISTINCT", agg_count_distinct], ["COUNT_DISTINCT_INCLUDE_NULL", agg_count_distinct_include_null]]
|
||||||
stats = [agg_stddev_pop, agg_stddev_samp]
|
stats = [agg_stddev_pop, agg_stddev_samp]
|
||||||
arith_extensions = [is_inf, floating_point_div, mod_op]
|
arith_extensions = [is_inf, floating_point_div, mod_op]
|
||||||
|
other = [["RUNTIME_ERROR", make_runtime_error_op]]
|
||||||
|
|
||||||
bool = [bool_or]
|
bool = [bool_or]
|
||||||
my_mappings = text + counts + stats + arith_extensions + bool
|
my_mappings = text + counts + stats + arith_extensions + bool + other
|
||||||
Base_Generator.base_dialect . extend_with my_mappings
|
Base_Generator.base_dialect . extend_with my_mappings
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
@ -481,3 +482,21 @@ make_custom_cast column target_value_type type_mapping =
|
|||||||
result.put expr
|
result.put expr
|
||||||
|
|
||||||
result.get
|
result.get
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
The RUNTIME_ERROR operation should allow the query to compile fine and it
|
||||||
|
will not prevent it from running if the branch including this operation is
|
||||||
|
not taken. But if the branch is computed, it should ensure the query fails.
|
||||||
|
|
||||||
|
This query never returns a value, so its type should be polymorphic. However,
|
||||||
|
that is not possible - so currently the SQLite dialect just does not handle
|
||||||
|
inferring a type for it. Thus, it should only be used in places that will not
|
||||||
|
need client-side type inference (e.g. WHERE clause is ok).
|
||||||
|
This can be changed in the future, if needed.
|
||||||
|
make_runtime_error_op arguments =
|
||||||
|
if arguments.length != 2 then
|
||||||
|
Panic.throw (Illegal_Argument.Error "RUNTIME_ERROR takes exactly 2 arguments (error message and a variable to ensure deferred execution).")
|
||||||
|
error_message = arguments.at 0
|
||||||
|
variable_to_defer = arguments.at 1
|
||||||
|
# We have to ensure that the implementation of SQLite that we use does not have a MATCH function defined which would make the code below succeed.
|
||||||
|
Builder.code "match('[ENSO INVARIANT VIOLATED: '||" ++ error_message ++ "||'] ', " ++ variable_to_defer ++ ")"
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
|
|
||||||
from project.Errors import SQL_Error
|
from project.Errors import SQL_Error, Invariant_Violation
|
||||||
|
|
||||||
polyglot java import org.sqlite.SQLiteErrorCode
|
polyglot java import org.sqlite.SQLiteErrorCode
|
||||||
polyglot java import org.sqlite.SQLiteException
|
polyglot java import org.sqlite.SQLiteException
|
||||||
@ -14,3 +14,13 @@ type SQLite_Error_Mapper
|
|||||||
case error.java_exception of
|
case error.java_exception of
|
||||||
sqlite_exception : SQLiteException ->
|
sqlite_exception : SQLiteException ->
|
||||||
sqlite_exception.getResultCode == SQLiteErrorCode.SQLITE_CONSTRAINT_PRIMARYKEY
|
sqlite_exception.getResultCode == SQLiteErrorCode.SQLITE_CONSTRAINT_PRIMARYKEY
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
transform_custom_errors : SQL_Error -> Any
|
||||||
|
transform_custom_errors error =
|
||||||
|
invariant_failure_message = "unable to use function MATCH in the requested context"
|
||||||
|
message = error.java_exception.getMessage
|
||||||
|
if message.contains invariant_failure_message . not then error else
|
||||||
|
query_contains_invariant_check = error.related_query.if_nothing "" . contains "ENSO INVARIANT VIOLATED"
|
||||||
|
if query_contains_invariant_check . not then error else
|
||||||
|
Invariant_Violation.Error Nothing error
|
||||||
|
@ -179,14 +179,17 @@ operations_map =
|
|||||||
handle_cast _ =
|
handle_cast _ =
|
||||||
Panic.throw (Illegal_State.Error "Cast relies on its own type inference logic, so this code should never be reached. This is a bug in the Database library.")
|
Panic.throw (Illegal_State.Error "Cast relies on its own type inference logic, so this code should never be reached. This is a bug in the Database library.")
|
||||||
|
|
||||||
|
handle_runtime_error _ =
|
||||||
|
Panic.throw (Illegal_State.Error "RUNTIME_ERROR should not be part of direct type inference, so this code should never be reached. This is a bug in the Database library.")
|
||||||
|
|
||||||
always_boolean_ops = ["==", "!=", "equals_ignore_case", ">=", "<=", "<", ">", "BETWEEN", "AND", "OR", "NOT", "IS_NULL", "IS_EMPTY", "LIKE", "IS_IN", "IS_IN_COLUMN", "starts_with", "ends_with", "contains", "BOOL_OR", "IS_INF"]
|
always_boolean_ops = ["==", "!=", "equals_ignore_case", ">=", "<=", "<", ">", "BETWEEN", "AND", "OR", "NOT", "IS_NULL", "IS_EMPTY", "LIKE", "IS_IN", "IS_IN_COLUMN", "starts_with", "ends_with", "contains", "BOOL_OR", "IS_INF"]
|
||||||
always_floating_ops = ["/", "mod", "AVG", "STDDEV_POP", "STDDEV_SAMP", "ROUND"]
|
always_floating_ops = ["/", "mod", "AVG", "STDDEV_POP", "STDDEV_SAMP", "ROUND"]
|
||||||
always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM", "REPLACE"]
|
always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM", "REPLACE"]
|
||||||
always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS", "ROW_NUMBER"]
|
always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS", "ROW_NUMBER", "ROW_NUMBER_IN_GROUP"]
|
||||||
same_as_first = ["TRUNCATE", "CEIL", "FLOOR"]
|
same_as_first = ["TRUNCATE", "CEIL", "FLOOR"]
|
||||||
arithmetic_ops = ["ADD_NUMBER", "-", "*", "^", "%", "SUM"]
|
arithmetic_ops = ["ADD_NUMBER", "-", "*", "^", "%", "SUM"]
|
||||||
merge_input_types_ops = ["ROW_MAX", "ROW_MIN", "MAX", "MIN", "FILL_NULL", "COALESCE"]
|
merge_input_types_ops = ["ROW_MAX", "ROW_MIN", "MAX", "MIN", "FILL_NULL", "COALESCE"]
|
||||||
others = [["IIF", handle_iif], ["CAST", handle_cast], ["CASE", handle_case]]
|
others = [["IIF", handle_iif], ["CAST", handle_cast], ["CASE", handle_case], ["RUNTIME_ERROR", handle_runtime_error]]
|
||||||
Map.from_vector <|
|
Map.from_vector <|
|
||||||
v1 = always_boolean_ops.map [_, const SQLite_Types.boolean]
|
v1 = always_boolean_ops.map [_, const SQLite_Types.boolean]
|
||||||
v2 = always_floating_ops.map [_, const SQLite_Types.real]
|
v2 = always_floating_ops.map [_, const SQLite_Types.real]
|
||||||
|
@ -1666,28 +1666,27 @@ type Table
|
|||||||
@on Widget_Helpers.make_join_condition_selector
|
@on Widget_Helpers.make_join_condition_selector
|
||||||
join : Table -> Join_Kind -> Vector (Join_Condition | Text) | Text -> Text -> Problem_Behavior -> Table
|
join : Table -> Join_Kind -> Vector (Join_Condition | Text) | Text -> Text -> Problem_Behavior -> Table
|
||||||
join self right:Table (join_kind : Join_Kind = Join_Kind.Left_Outer) on=[Join_Condition.Equals self.column_names.first] right_prefix="Right " on_problems=Report_Warning = Out_Of_Memory.handle_java_exception "join" <|
|
join self right:Table (join_kind : Join_Kind = Join_Kind.Left_Outer) on=[Join_Condition.Equals self.column_names.first] right_prefix="Right " on_problems=Report_Warning = Out_Of_Memory.handle_java_exception "join" <|
|
||||||
if check_table "right" right then
|
# [left_unmatched, matched, right_unmatched]
|
||||||
# [left_unmatched, matched, right_unmatched]
|
rows_to_keep = case join_kind of
|
||||||
rows_to_keep = case join_kind of
|
Join_Kind.Inner -> [False, True, False]
|
||||||
Join_Kind.Inner -> [False, True, False]
|
Join_Kind.Left_Outer -> [True, True, False]
|
||||||
Join_Kind.Left_Outer -> [True, True, False]
|
Join_Kind.Right_Outer -> [False, True, True]
|
||||||
Join_Kind.Right_Outer -> [False, True, True]
|
Join_Kind.Full -> [True, True, True]
|
||||||
Join_Kind.Full -> [True, True, True]
|
Join_Kind.Left_Exclusive -> [True, False, False]
|
||||||
Join_Kind.Left_Exclusive -> [True, False, False]
|
Join_Kind.Right_Exclusive -> [False, False, True]
|
||||||
Join_Kind.Right_Exclusive -> [False, False, True]
|
|
||||||
|
|
||||||
columns_to_keep = case join_kind of
|
columns_to_keep = case join_kind of
|
||||||
Join_Kind.Left_Exclusive -> [True, False]
|
Join_Kind.Left_Exclusive -> [True, False]
|
||||||
Join_Kind.Right_Exclusive -> [False, True]
|
Join_Kind.Right_Exclusive -> [False, True]
|
||||||
_ -> [True, True]
|
_ -> [True, True]
|
||||||
|
|
||||||
join_resolution = make_join_helpers self right . resolve on on_problems
|
join_resolution = make_join_helpers self right . resolve on on_problems
|
||||||
right_columns_to_drop = if join_kind == Join_Kind.Inner then join_resolution.redundant_column_names else []
|
right_columns_to_drop = if join_kind == Join_Kind.Inner then join_resolution.redundant_column_names else []
|
||||||
|
|
||||||
java_conditions = join_resolution.conditions
|
java_conditions = join_resolution.conditions
|
||||||
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
|
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
|
||||||
self.java_table.join right.java_table java_conditions (rows_to_keep.at 0) (rows_to_keep.at 1) (rows_to_keep.at 2) (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix java_aggregator
|
self.java_table.join right.java_table java_conditions (rows_to_keep.at 0) (rows_to_keep.at 1) (rows_to_keep.at 2) (columns_to_keep.at 0) (columns_to_keep.at 1) right_columns_to_drop right_prefix java_aggregator
|
||||||
Table.Value new_java_table
|
Table.Value new_java_table
|
||||||
|
|
||||||
## ALIAS cartesian join
|
## ALIAS cartesian join
|
||||||
GROUP Standard.Base.Calculations
|
GROUP Standard.Base.Calculations
|
||||||
@ -1726,15 +1725,14 @@ type Table
|
|||||||
layout order while for database tables the order may be unspecified).
|
layout order while for database tables the order may be unspecified).
|
||||||
cross_join : Table -> Integer | Nothing -> Text -> Problem_Behavior -> Table
|
cross_join : Table -> Integer | Nothing -> Text -> Problem_Behavior -> Table
|
||||||
cross_join self right:Table right_row_limit=100 right_prefix="Right " on_problems=Report_Warning = Out_Of_Memory.handle_java_exception "cross_join" <|
|
cross_join self right:Table right_row_limit=100 right_prefix="Right " on_problems=Report_Warning = Out_Of_Memory.handle_java_exception "cross_join" <|
|
||||||
if check_table "right" right then
|
limit_problems = case right_row_limit.is_nothing.not && (right.row_count > right_row_limit) of
|
||||||
limit_problems = case right_row_limit.is_nothing.not && (right.row_count > right_row_limit) of
|
True ->
|
||||||
True ->
|
[Cross_Join_Row_Limit_Exceeded.Error right_row_limit right.row_count]
|
||||||
[Cross_Join_Row_Limit_Exceeded.Error right_row_limit right.row_count]
|
False -> []
|
||||||
False -> []
|
on_problems.attach_problems_before limit_problems <|
|
||||||
on_problems.attach_problems_before limit_problems <|
|
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
|
||||||
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
|
self.java_table.crossJoin right.java_table right_prefix java_aggregator
|
||||||
self.java_table.crossJoin right.java_table right_prefix java_aggregator
|
Table.Value new_java_table
|
||||||
Table.Value new_java_table
|
|
||||||
|
|
||||||
## Replaces values in this table by values from a lookup table.
|
## Replaces values in this table by values from a lookup table.
|
||||||
New values are looked up in the lookup table based on the `key_columns`.
|
New values are looked up in the lookup table based on the `key_columns`.
|
||||||
@ -1771,8 +1769,8 @@ type Table
|
|||||||
specified in `key_columns`, a `Missing_Input_Columns` error is raised.
|
specified in `key_columns`, a `Missing_Input_Columns` error is raised.
|
||||||
- If an empty vector is provided for `key_columns`, a
|
- If an empty vector is provided for `key_columns`, a
|
||||||
`No_Input_Columns_Selected` error is raised.
|
`No_Input_Columns_Selected` error is raised.
|
||||||
- If the lookup table contains multiple rows with the same values in
|
- If a single row is matched by multiple entries in the lookup table,
|
||||||
the `key_columns`, an `Non_Unique_Key` error is raised.
|
a `Non_Unique_Key` error is raised.
|
||||||
- If a column that is being updated from the lookup table has a type
|
- If a column that is being updated from the lookup table has a type
|
||||||
that is not compatible with the type of the corresponding column in
|
that is not compatible with the type of the corresponding column in
|
||||||
this table, a `No_Common_Type` error is raised.
|
this table, a `No_Common_Type` error is raised.
|
||||||
@ -1874,20 +1872,19 @@ type Table
|
|||||||
@keep_unmatched (make_single_choice [["True", "Boolean.True"], ["False", "Boolean.False"], ["Report", Meta.get_qualified_type_name Report_Unmatched]])
|
@keep_unmatched (make_single_choice [["True", "Boolean.True"], ["False", "Boolean.False"], ["Report", Meta.get_qualified_type_name Report_Unmatched]])
|
||||||
zip : Table -> Boolean | Report_Unmatched -> Text -> Problem_Behavior -> Table
|
zip : Table -> Boolean | Report_Unmatched -> Text -> Problem_Behavior -> Table
|
||||||
zip self right:Table keep_unmatched=Report_Unmatched right_prefix="Right " on_problems=Report_Warning =
|
zip self right:Table keep_unmatched=Report_Unmatched right_prefix="Right " on_problems=Report_Warning =
|
||||||
if check_table "right" right then
|
keep_unmatched_bool = case keep_unmatched of
|
||||||
keep_unmatched_bool = case keep_unmatched of
|
Report_Unmatched -> True
|
||||||
Report_Unmatched -> True
|
b : Boolean -> b
|
||||||
b : Boolean -> b
|
report_mismatch = keep_unmatched == Report_Unmatched
|
||||||
report_mismatch = keep_unmatched == Report_Unmatched
|
|
||||||
|
|
||||||
left_row_count = self.row_count
|
left_row_count = self.row_count
|
||||||
right_row_count = right.row_count
|
right_row_count = right.row_count
|
||||||
problems = if (left_row_count == right_row_count) || report_mismatch.not then [] else
|
problems = if (left_row_count == right_row_count) || report_mismatch.not then [] else
|
||||||
[Row_Count_Mismatch.Error left_row_count right_row_count]
|
[Row_Count_Mismatch.Error left_row_count right_row_count]
|
||||||
on_problems.attach_problems_before problems <|
|
on_problems.attach_problems_before problems <|
|
||||||
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
|
new_java_table = Java_Problems.with_problem_aggregator on_problems java_aggregator->
|
||||||
self.java_table.zip right.java_table keep_unmatched_bool right_prefix java_aggregator
|
self.java_table.zip right.java_table keep_unmatched_bool right_prefix java_aggregator
|
||||||
Table.Value new_java_table
|
Table.Value new_java_table
|
||||||
|
|
||||||
## ALIAS append, concat
|
## ALIAS append, concat
|
||||||
GROUP Standard.Base.Calculations
|
GROUP Standard.Base.Calculations
|
||||||
@ -1980,12 +1977,11 @@ type Table
|
|||||||
retyped to the `Mixed` type to indicate that intention. Note that the
|
retyped to the `Mixed` type to indicate that intention. Note that the
|
||||||
`Mixed` type may not be supported by most Database backends.
|
`Mixed` type may not be supported by most Database backends.
|
||||||
union : (Table | Vector Table) -> Match_Columns -> Boolean | Report_Unmatched -> Boolean -> Problem_Behavior -> Table
|
union : (Table | Vector Table) -> Match_Columns -> Boolean | Report_Unmatched -> Boolean -> Problem_Behavior -> Table
|
||||||
union self tables match_columns=Match_Columns.By_Name keep_unmatched_columns=Report_Unmatched allow_type_widening=True on_problems=Report_Warning =
|
union self tables:Vector|Table match_columns=Match_Columns.By_Name keep_unmatched_columns=Report_Unmatched allow_type_widening=True on_problems=Report_Warning =
|
||||||
all_tables = case tables of
|
all_tables = case tables of
|
||||||
v : Vector -> [self] + v
|
v : Vector -> [self] + (v.map t-> Table.from t)
|
||||||
single_column : Column -> [self, single_column.to_table]
|
|
||||||
single_table -> [self, single_table]
|
single_table -> [self, single_table]
|
||||||
all_tables.all (check_table "tables") . if_not_error <|
|
all_tables.if_not_error <|
|
||||||
problem_builder = Problem_Builder.new
|
problem_builder = Problem_Builder.new
|
||||||
matched_column_sets = Match_Columns_Helpers.match_columns all_tables match_columns keep_unmatched_columns problem_builder
|
matched_column_sets = Match_Columns_Helpers.match_columns all_tables match_columns keep_unmatched_columns problem_builder
|
||||||
result_row_count = all_tables.fold 0 c-> t-> c + t.row_count
|
result_row_count = all_tables.fold 0 c-> t-> c + t.row_count
|
||||||
@ -2531,17 +2527,6 @@ make_join_helpers left_table right_table =
|
|||||||
Java_Join_Between.new left.java_column right_lower.java_column right_upper.java_column
|
Java_Join_Between.new left.java_column right_lower.java_column right_upper.java_column
|
||||||
Join_Helpers.Join_Condition_Resolver.Value (left_table.at _) (right_table.at _) make_equals make_equals_ignore_case make_between
|
Join_Helpers.Join_Condition_Resolver.Value (left_table.at _) (right_table.at _) make_equals make_equals_ignore_case make_between
|
||||||
|
|
||||||
## PRIVATE
|
|
||||||
Checks if the argument is a proper table and comes from the current backend.
|
|
||||||
It returns True or throws a dataflow error explaining the issue.
|
|
||||||
check_table arg_name table =
|
|
||||||
if Table_Helpers.is_table table . not then Error.throw (Type_Error.Error Table table arg_name) else
|
|
||||||
same_backend = table.is_a Table
|
|
||||||
case same_backend of
|
|
||||||
False ->
|
|
||||||
Error.throw (Illegal_Argument.Error "Currently cross-backend operations are not supported. Materialize the table using `.read` before mixing it with an in-memory Table.")
|
|
||||||
True -> True
|
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
A helper that efficiently concatenates storages of in-memory columns.
|
A helper that efficiently concatenates storages of in-memory columns.
|
||||||
concat_columns column_set all_tables result_type result_row_count on_problems =
|
concat_columns column_set all_tables result_type result_row_count on_problems =
|
||||||
|
@ -1,29 +1,32 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
|
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||||
|
|
||||||
import project.Data.Type.Storage
|
import project.Data.Type.Storage
|
||||||
import project.Data.Type.Value_Type.Value_Type
|
import project.Data.Type.Value_Type.Value_Type
|
||||||
import project.Data.Type.Value_Type_Helpers
|
import project.Data.Type.Value_Type_Helpers
|
||||||
from project.Errors import Missing_Input_Columns, Unexpected_Extra_Columns, Floating_Point_Equality, No_Common_Type
|
from project.Errors import Missing_Input_Columns, Unexpected_Extra_Columns, Floating_Point_Equality, No_Common_Type, No_Output_Columns
|
||||||
|
|
||||||
polyglot java import org.enso.table.data.table.join.LookupColumnDescription
|
polyglot java import org.enso.table.data.table.join.LookupColumnDescription
|
||||||
|
|
||||||
## PRIVATE
|
|
||||||
type Lookup_Settings
|
|
||||||
## PRIVATE
|
|
||||||
Value (key_columns : Vector Text) (columns_to_update : Set Text) (columns_to_add : Vector Text)
|
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
type Lookup_Column
|
type Lookup_Column
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
|
A key column used to correlate rows between the two tables.
|
||||||
Key_Column base_column lookup_column
|
Key_Column base_column lookup_column
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
|
A column from the base table that is kept as-is.
|
||||||
Keep_Column base_column
|
Keep_Column base_column
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
|
A column that is present in both tables.
|
||||||
|
If the row is matched with a row from the lookup table, the value from
|
||||||
|
the lookup table is used; otherwise the value from the base table is kept.
|
||||||
Replace_Column base_column lookup_column (common_type : Value_Type)
|
Replace_Column base_column lookup_column (common_type : Value_Type)
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
|
A new column added from the lookup table.
|
||||||
|
If the row has no match, it will be `Nothing`.
|
||||||
Add_Column lookup_column
|
Add_Column lookup_column
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
@ -38,7 +41,8 @@ type Lookup_Column
|
|||||||
|
|
||||||
It also reports any errors or warnings related to selecting these columns.
|
It also reports any errors or warnings related to selecting these columns.
|
||||||
prepare_columns_for_lookup base_table lookup_table key_columns_selector add_new_columns allow_unmatched_rows on_problems =
|
prepare_columns_for_lookup base_table lookup_table key_columns_selector add_new_columns allow_unmatched_rows on_problems =
|
||||||
key_columns = base_table.select_columns key_columns_selector . column_names
|
key_columns = base_table.select_columns key_columns_selector . column_names . catch No_Output_Columns _->
|
||||||
|
Error.throw (Illegal_Argument.Error "At least one key column must be provided for `lookup_and_replace`.")
|
||||||
lookup_table_key_columns = lookup_table.select_columns key_columns . catch Missing_Input_Columns error->
|
lookup_table_key_columns = lookup_table.select_columns key_columns . catch Missing_Input_Columns error->
|
||||||
Error.throw (Missing_Input_Columns.Error error.criteria "the lookup table")
|
Error.throw (Missing_Input_Columns.Error error.criteria "the lookup table")
|
||||||
|
|
||||||
|
@ -452,15 +452,6 @@ filter_blank_rows table when_any treat_nans_as_blank =
|
|||||||
table.filter non_missing_mask Filter_Condition.Is_True
|
table.filter non_missing_mask Filter_Condition.Is_True
|
||||||
False -> table
|
False -> table
|
||||||
|
|
||||||
## PRIVATE
|
|
||||||
Checks if the given object implements a Table interface.
|
|
||||||
|
|
||||||
Currently, it relies on a hack - it checks if the object has a method
|
|
||||||
`is_table` and if it returns `True`.
|
|
||||||
is_table obj =
|
|
||||||
known_types = ["Standard.Table.Data.Table.Table", "Standard.Database.Data.Table.Table"]
|
|
||||||
known_types.contains (Meta.get_qualified_type_name obj)
|
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
Checks if the given object implements a Table interface.
|
Checks if the given object implements a Table interface.
|
||||||
|
|
||||||
|
@ -35,7 +35,6 @@ public class LookupJoin {
|
|||||||
|
|
||||||
LookupJoin joiner = new LookupJoin(keys, columnDescriptions, allowUnmatchedRows, problemAggregator);
|
LookupJoin joiner = new LookupJoin(keys, columnDescriptions, allowUnmatchedRows, problemAggregator);
|
||||||
joiner.checkNullsInKey();
|
joiner.checkNullsInKey();
|
||||||
joiner.verifyLookupUniqueness();
|
|
||||||
return joiner.join();
|
return joiner.join();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -78,22 +77,6 @@ public class LookupJoin {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void verifyLookupUniqueness() {
|
|
||||||
if (!lookupIndex.isUnique()) {
|
|
||||||
// Find the duplicated key
|
|
||||||
for (Map.Entry<UnorderedMultiValueKey, List<Integer>> group : lookupIndex.mapping().entrySet()) {
|
|
||||||
int groupSize = group.getValue().size();
|
|
||||||
if (groupSize > 1) {
|
|
||||||
UnorderedMultiValueKey key = group.getKey();
|
|
||||||
List<Object> exampleValues = IntStream.range(0, keyColumnNames.size()).mapToObj(key::get).toList();
|
|
||||||
throw new NonUniqueLookupKey(keyColumnNames, exampleValues, groupSize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
assert false : "isUnique returned false, but no duplicated key was found.";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private Table join() {
|
private Table join() {
|
||||||
List<LookupOutputColumn> outputColumns = columnDescriptions.stream().map(this::prepareOutputColumn).toList();
|
List<LookupOutputColumn> outputColumns = columnDescriptions.stream().map(this::prepareOutputColumn).toList();
|
||||||
List<LookupOutputColumn.MergeColumns> columnsToMerge =
|
List<LookupOutputColumn.MergeColumns> columnsToMerge =
|
||||||
@ -145,8 +128,12 @@ public class LookupJoin {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (lookupRowIndices.size() > 1) {
|
||||||
|
List<Object> exampleValues = IntStream.range(0, keyColumnNames.size()).mapToObj(key::get).toList();
|
||||||
|
throw new NonUniqueLookupKey(keyColumnNames, exampleValues, lookupRowIndices.size());
|
||||||
|
}
|
||||||
|
|
||||||
assert !lookupRowIndices.isEmpty() : "No Index group should be empty.";
|
assert !lookupRowIndices.isEmpty() : "No Index group should be empty.";
|
||||||
assert lookupRowIndices.size() == 1 : "This should have been checked in verifyLookupUniqueness()";
|
|
||||||
return lookupRowIndices.get(0);
|
return lookupRowIndices.get(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,10 +3,13 @@ import Standard.Base.Errors.Common.Arithmetic_Error
|
|||||||
import Standard.Base.Errors.Common.Index_Out_Of_Bounds
|
import Standard.Base.Errors.Common.Index_Out_Of_Bounds
|
||||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||||
|
|
||||||
from Standard.Table import Set_Mode
|
from Standard.Table import all
|
||||||
from Standard.Table.Errors import all
|
from Standard.Table.Errors import all
|
||||||
import Standard.Table.Data.Expression.Expression_Error
|
import Standard.Table.Data.Expression.Expression_Error
|
||||||
|
|
||||||
|
from Standard.Database import all
|
||||||
|
from Standard.Database.Errors import Integrity_Error
|
||||||
|
|
||||||
from Standard.Test import Test, Problems
|
from Standard.Test import Test, Problems
|
||||||
import Standard.Test.Extensions
|
import Standard.Test.Extensions
|
||||||
|
|
||||||
@ -163,6 +166,17 @@ spec setup =
|
|||||||
problems2 = [Arithmetic_Error.Error "Division by zero (at rows [2])."]
|
problems2 = [Arithmetic_Error.Error "Division by zero (at rows [2])."]
|
||||||
Problems.test_problem_handling action2 problems2 tester2
|
Problems.test_problem_handling action2 problems2 tester2
|
||||||
|
|
||||||
|
Test.specify "should gracefully handle columns from different backends" <|
|
||||||
|
t1 = table_builder [["A", [1, 2, 3]]]
|
||||||
|
alternative_connection = Database.connect (SQLite In_Memory)
|
||||||
|
t0 = (Table.new [["X", [3, 2, 1]]]).select_into_database_table alternative_connection "T0" temporary=True
|
||||||
|
c0 = t0.at "X"
|
||||||
|
|
||||||
|
r1 = t1.set c0
|
||||||
|
case setup.is_database of
|
||||||
|
True -> r1.should_fail_with Integrity_Error
|
||||||
|
False -> r1.should_fail_with Illegal_Argument
|
||||||
|
|
||||||
Test.group prefix+"Table.column_names" <|
|
Test.group prefix+"Table.column_names" <|
|
||||||
Test.specify "should return the names of all columns" <|
|
Test.specify "should return the names of all columns" <|
|
||||||
table.column_names . should_equal ["foo", "bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123"]
|
table.column_names . should_equal ["foo", "bar", "Baz", "foo 1", "foo 2", "ab.+123", "abcd123"]
|
||||||
|
@ -1,15 +1,17 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
|
|
||||||
import Standard.Base.Errors.Common.Type_Error
|
import Standard.Base.Errors.Common.Type_Error
|
||||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||||
import Standard.Table.Data.Join_Kind_Cross.Join_Kind_Cross
|
|
||||||
import Standard.Test.Extensions
|
|
||||||
|
|
||||||
from Standard.Database.Errors import Unsupported_Database_Operation
|
from Standard.Table import all
|
||||||
from Standard.Table import all hiding Table
|
|
||||||
from Standard.Table.Errors import all
|
from Standard.Table.Errors import all
|
||||||
|
import Standard.Table.Data.Join_Kind_Cross.Join_Kind_Cross
|
||||||
|
|
||||||
|
from Standard.Database import all
|
||||||
|
from Standard.Database.Errors import Unsupported_Database_Operation, Integrity_Error
|
||||||
|
|
||||||
from Standard.Test import Test, Problems
|
from Standard.Test import Test, Problems
|
||||||
|
import Standard.Test.Extensions
|
||||||
|
|
||||||
from project.Common_Table_Operations.Util import expect_column_names, run_default_backend
|
from project.Common_Table_Operations.Util import expect_column_names, run_default_backend
|
||||||
|
|
||||||
@ -159,3 +161,11 @@ spec setup =
|
|||||||
t1 = table_builder [["X", [1, 2]], ["Y", [4, 5]]]
|
t1 = table_builder [["X", [1, 2]], ["Y", [4, 5]]]
|
||||||
t2 = table_builder [["Z", ['a', 'b']], ["W", ['c', 'd']]]
|
t2 = table_builder [["Z", ['a', 'b']], ["W", ['c', 'd']]]
|
||||||
Test.expect_panic_with (t1.join t2 join_kind=Join_Kind_Cross.Cross on=[]) Type_Error
|
Test.expect_panic_with (t1.join t2 join_kind=Join_Kind_Cross.Cross on=[]) Type_Error
|
||||||
|
|
||||||
|
Test.specify "should gracefully handle tables from different backends" <|
|
||||||
|
t1 = table_builder [["A", ["a", "b"]]]
|
||||||
|
alternative_connection = Database.connect (SQLite In_Memory)
|
||||||
|
t0 = (Table.new [["X", [1, 2, 4]], ["Z", [10, 20, 30]]]).select_into_database_table alternative_connection "T0" temporary=True
|
||||||
|
|
||||||
|
r1 = t1.cross_join t0
|
||||||
|
r1.should_fail_with Illegal_Argument
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
|
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||||
|
|
||||||
from Standard.Table import all hiding Table
|
from Standard.Table import all
|
||||||
from Standard.Table.Errors import all
|
from Standard.Table.Errors import all
|
||||||
|
|
||||||
from Standard.Database.Errors import Unsupported_Database_Operation
|
from Standard.Database import all
|
||||||
|
from Standard.Database.Errors import Unsupported_Database_Operation, Integrity_Error
|
||||||
|
|
||||||
from Standard.Test import Test, Problems
|
from Standard.Test import Test, Problems
|
||||||
import Standard.Test.Extensions
|
import Standard.Test.Extensions
|
||||||
@ -647,3 +649,10 @@ spec setup =
|
|||||||
r3.at 2 . should_equal [Nothing, 10, 7, 7, 200]
|
r3.at 2 . should_equal [Nothing, 10, 7, 7, 200]
|
||||||
r3.at 3 . should_equal [2, 20, 2, Nothing, Nothing]
|
r3.at 3 . should_equal [2, 20, 2, Nothing, Nothing]
|
||||||
r3.at 4 . should_equal [3, 30, 3, Nothing, Nothing]
|
r3.at 4 . should_equal [3, 30, 3, Nothing, Nothing]
|
||||||
|
|
||||||
|
Test.specify "should gracefully handle tables from different backends" <|
|
||||||
|
alternative_connection = Database.connect (SQLite In_Memory)
|
||||||
|
t0 = (Table.new [["X", [1, 2, 4]], ["Z", [10, 20, 30]]]).select_into_database_table alternative_connection "T0" temporary=True
|
||||||
|
|
||||||
|
r1 = t1.join t0
|
||||||
|
r1.should_fail_with Illegal_Argument
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
|
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||||
|
|
||||||
from Standard.Table import all
|
from Standard.Table import all
|
||||||
from Standard.Table.Errors import all
|
from Standard.Table.Errors import all
|
||||||
|
|
||||||
from Standard.Database.Errors import Unsupported_Database_Operation
|
from Standard.Database import all
|
||||||
|
from Standard.Database.Errors import Unsupported_Database_Operation, Invariant_Violation, Integrity_Error
|
||||||
|
|
||||||
from Standard.Test import Test, Problems
|
from Standard.Test import Test, Problems
|
||||||
import Standard.Test.Extensions
|
import Standard.Test.Extensions
|
||||||
@ -17,8 +19,7 @@ spec setup =
|
|||||||
prefix = setup.prefix
|
prefix = setup.prefix
|
||||||
table_builder = setup.table_builder
|
table_builder = setup.table_builder
|
||||||
materialize = setup.materialize
|
materialize = setup.materialize
|
||||||
db_pending = if setup.is_database then "Not implemented yet. TODO: https://github.com/enso-org/enso/issues/7981"
|
Test.group prefix+"Table.lookup_and_replace" <|
|
||||||
Test.group prefix+"Table.lookup_and_replace" pending=db_pending <|
|
|
||||||
Test.specify "should allow to simply update columns based on a lookup table" <|
|
Test.specify "should allow to simply update columns based on a lookup table" <|
|
||||||
lookup = table_builder [["Y", ["A", "B", "A"]], ["X", [1, 2, 3]]]
|
lookup = table_builder [["Y", ["A", "B", "A"]], ["X", [1, 2, 3]]]
|
||||||
my_table = table_builder [["X", [1, 2, 3, 2]], ["Y", ["Z", "ZZ", "ZZZ", "ZZZZ"]], ["Z", [10, 20, 30, 40]]]
|
my_table = table_builder [["X", [1, 2, 3, 2]], ["Y", ["Z", "ZZ", "ZZZ", "ZZZZ"]], ["Z", [10, 20, 30, 40]]]
|
||||||
@ -149,17 +150,42 @@ spec setup =
|
|||||||
m2.at "Y" . to_vector . should_equal ["A", "B", "A", "B"]
|
m2.at "Y" . to_vector . should_equal ["A", "B", "A", "B"]
|
||||||
m2.at "Z" . to_vector . should_equal [100, 200, 100, 100]
|
m2.at "Z" . to_vector . should_equal [100, 200, 100, 100]
|
||||||
|
|
||||||
Test.specify "will fail on duplicate keys in the lookup table" <|
|
Test.specify "will fail on duplicate matches in the lookup table" <|
|
||||||
lookup = table_builder [["X", [1, 2, 1]], ["Y", ["A", "B", "C"]]]
|
lookup = table_builder [["X", [1, 2, 1]], ["Y", ["A", "B", "C"]]]
|
||||||
my_table = table_builder [["X", [4, 2, 3, 2]], ["Y", ["Z", "ZZ", "ZZZ", "ZZZZ"]], ["Z", [10, 20, 30, 40]]]
|
my_table = table_builder [["X", [4, 2, 3, 2]], ["Y", ["Z", "ZZ", "ZZZ", "ZZZZ"]], ["Z", [10, 20, 30, 40]]]
|
||||||
|
|
||||||
r2 = my_table.lookup_and_replace lookup key_columns=["X"]
|
# If the duplicates do not show up in result - it is accepted.
|
||||||
|
t2 = my_table.lookup_and_replace lookup key_columns=["X"]
|
||||||
|
t2.column_names . should_equal ["X", "Y", "Z"]
|
||||||
|
m2 = t2 |> materialize |> _.order_by "Z"
|
||||||
|
m2.at "Z" . to_vector . should_equal [10, 20, 30, 40]
|
||||||
|
m2.at "X" . to_vector . should_equal [4, 2, 3, 2]
|
||||||
|
m2.at "Y" . to_vector . should_equal ["Z", "B", "ZZZ", "B"]
|
||||||
|
|
||||||
|
my_table2 = table_builder [["X", [1, 2]], ["Y", ["Z", "ZZ"]], ["Z", [10, 20]]]
|
||||||
|
r2 = my_table2.lookup_and_replace lookup key_columns=["X"]
|
||||||
r2.should_fail_with Non_Unique_Key
|
r2.should_fail_with Non_Unique_Key
|
||||||
|
r2.catch.key_column_names . should_equal ["X"]
|
||||||
|
r2.catch.clashing_example_key_values . should_equal [1]
|
||||||
|
r2.catch.clashing_example_row_count . should_equal 2
|
||||||
|
|
||||||
lookup2 = table_builder [["X", [1, 1]], ["Y", ["A", "A"]], ["Z", [100, 100]]]
|
lookup2 = table_builder [["X", [1, 1]], ["Y", ["A", "A"]], ["Z", [100, 100]]]
|
||||||
r3 = my_table.lookup_and_replace lookup2 key_columns=["X", "Y"]
|
Problems.assume_no_problems <| my_table.lookup_and_replace lookup2 key_columns=["X", "Y"]
|
||||||
|
r3 = my_table2.lookup_and_replace lookup2 key_columns=["X"]
|
||||||
r3.should_fail_with Non_Unique_Key
|
r3.should_fail_with Non_Unique_Key
|
||||||
|
|
||||||
|
m3 = my_table2.lookup_and_replace lookup2 key_columns=["X", "Y"] |> materialize |> _.order_by "Z"
|
||||||
|
m3.at "Z" . to_vector . should_equal [10, 20]
|
||||||
|
m3.at "X" . to_vector . should_equal [1, 2]
|
||||||
|
m3.at "Y" . to_vector . should_equal ["Z", "ZZ"]
|
||||||
|
|
||||||
|
my_table3 = table_builder [["X", [1, 1, 2]], ["Y", ["A", "Z", "ZZ"]], ["Z", [10, 20, 30]]]
|
||||||
|
r4 = my_table3.lookup_and_replace lookup2 key_columns=["X", "Y"]
|
||||||
|
r4.should_fail_with Non_Unique_Key
|
||||||
|
r4.catch.key_column_names . should_equal ["X", "Y"]
|
||||||
|
r4.catch.clashing_example_key_values . should_equal [1, "A"]
|
||||||
|
r4.catch.clashing_example_row_count . should_equal 2
|
||||||
|
|
||||||
Test.specify "will preserve count of rows, even if there are duplicates" <|
|
Test.specify "will preserve count of rows, even if there are duplicates" <|
|
||||||
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
|
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
|
||||||
my_table = table_builder [["X", [1, 2, 2, 2, 1]], ["Z", [10, 20, 20, 20, 50]]]
|
my_table = table_builder [["X", [1, 2, 2, 2, 1]], ["Z", [10, 20, 20, 20, 50]]]
|
||||||
@ -171,6 +197,64 @@ spec setup =
|
|||||||
m2.at "Y" . to_vector . should_equal ["A", "B", "B", "B", "A"]
|
m2.at "Y" . to_vector . should_equal ["A", "B", "B", "B", "A"]
|
||||||
m2.at "Z" . to_vector . should_equal [10, 20, 20, 20, 50]
|
m2.at "Z" . to_vector . should_equal [10, 20, 20, 20, 50]
|
||||||
|
|
||||||
|
Test.specify "should correctly preserve types of original, merged and added columns" <|
|
||||||
|
table = table_builder [["key1", [0, 1]], ["key2", ["o", "?"]], ["X", [1, 10]], ["Y", ["A", "E"]], ["Z", [1.5, 2.0]], ["W", [True, False]], ["A", [2, 22]], ["B", ["1", "2"]], ["C", [2.0, 2.5]], ["D", [False, False]]]
|
||||||
|
lookup = table_builder [["key1", [0, 2]], ["key2", ["o", "?"]], ["X2", [100, 1000]], ["Y2", ["foo", "bar"]], ["Z2", [0.5, 4.0]], ["W2", [False, True]], ["A", [3, 55]], ["B", ["F", "F"]], ["C", [3.0, 10.5]], ["D", [True, False]]]
|
||||||
|
|
||||||
|
[True, False].each allow_unmatched_rows->
|
||||||
|
table_prepared = if allow_unmatched_rows then table else
|
||||||
|
# If we don't allow unmatched rows, we filter the input to only contain the matching row - this test is not about matching, but about the types.
|
||||||
|
table.filter "key1" (Filter_Condition.Equal to=0)
|
||||||
|
t2 = table_prepared.lookup_and_replace lookup key_columns=["key1", "key2"] allow_unmatched_rows=allow_unmatched_rows
|
||||||
|
t2.column_names . should_equal ["key1", "key2", "X", "Y", "Z", "W", "A", "B", "C", "D", "X2", "Y2", "Z2", "W2"]
|
||||||
|
|
||||||
|
t2.at "key1" . value_type . should_be_a (Value_Type.Integer ...)
|
||||||
|
t2.at "key2" . value_type . should_equal Value_Type.Char
|
||||||
|
|
||||||
|
t2.at "X" . value_type . should_be_a (Value_Type.Integer ...)
|
||||||
|
t2.at "Y" . value_type . should_equal Value_Type.Char
|
||||||
|
t2.at "Z" . value_type . should_equal Value_Type.Float
|
||||||
|
t2.at "W" . value_type . should_equal Value_Type.Boolean
|
||||||
|
|
||||||
|
t2.at "A" . value_type . should_be_a (Value_Type.Integer ...)
|
||||||
|
t2.at "B" . value_type . should_equal Value_Type.Char
|
||||||
|
t2.at "C" . value_type . should_equal Value_Type.Float
|
||||||
|
t2.at "D" . value_type . should_equal Value_Type.Boolean
|
||||||
|
|
||||||
|
t2.at "X2" . value_type . should_be_a (Value_Type.Integer ...)
|
||||||
|
t2.at "Y2" . value_type . should_equal Value_Type.Char
|
||||||
|
t2.at "Z2" . value_type . should_equal Value_Type.Float
|
||||||
|
t2.at "W2" . value_type . should_equal Value_Type.Boolean
|
||||||
|
|
||||||
|
if setup.test_selection.fixed_length_text_columns then Test.specify "should correctly preserve types of original, merged and added columns (various Char types test case)" <|
|
||||||
|
table2 = table_builder [["key", ["0"]], ["X", ["a"]], ["A", ["bbbbb"]]] . cast "key" (Value_Type.Char size=50) . cast "X" (Value_Type.Char size=1) . cast "A" (Value_Type.Char size=5 variable_length=False)
|
||||||
|
lookup2 = table_builder [["key", ["0"]], ["X2", ["ccc"]], ["A", ["dddd"]]] . cast "key" (Value_Type.Char size=100) . cast "X2" (Value_Type.Char size=3 variable_length=False) . cast "A" (Value_Type.Char size=4 variable_length=False)
|
||||||
|
|
||||||
|
table2.at "key" . value_type . should_equal (Value_Type.Char size=50 variable_length=True)
|
||||||
|
table2.at "X" . value_type . should_equal (Value_Type.Char size=1 variable_length=True)
|
||||||
|
table2.at "A" . value_type . should_equal (Value_Type.Char size=5 variable_length=False)
|
||||||
|
|
||||||
|
lookup2.at "key" . value_type . should_equal (Value_Type.Char size=100 variable_length=True)
|
||||||
|
lookup2.at "X2" . value_type . should_equal (Value_Type.Char size=3 variable_length=False)
|
||||||
|
lookup2.at "A" . value_type . should_equal (Value_Type.Char size=4 variable_length=False)
|
||||||
|
|
||||||
|
[True, False].each allow_unmatched_rows->
|
||||||
|
t3 = table2.lookup_and_replace lookup2 key_columns=["key"] allow_unmatched_rows=allow_unmatched_rows
|
||||||
|
t3.column_names . should_equal ["key", "X", "A", "X2"]
|
||||||
|
|
||||||
|
# The key column keeps its original type
|
||||||
|
t3.at "key" . value_type . should_equal (Value_Type.Char size=50 variable_length=True)
|
||||||
|
t3.at "X" . value_type . should_equal (Value_Type.Char size=1 variable_length=True)
|
||||||
|
t3.at "X2" . value_type . should_equal (Value_Type.Char size=3 variable_length=False)
|
||||||
|
|
||||||
|
case allow_unmatched_rows of
|
||||||
|
True ->
|
||||||
|
# The merged column gets a merged type (max size in that case, but gets variable length due to differences), because it may contain both old and new values.
|
||||||
|
t3.at "A" . value_type . should_equal (Value_Type.Char size=5 variable_length=True)
|
||||||
|
False ->
|
||||||
|
# If unmatched rows are not allowed, we can guarantee only _new_ values (from the lookup table) will be in the result, so instead of merging the type we inherit the type from the lookup table.
|
||||||
|
t3.at "A" . value_type . should_equal (Value_Type.Char size=4 variable_length=False)
|
||||||
|
|
||||||
Test.specify "will report Floating_Point_Equality if floating-point columns are used as key" <|
|
Test.specify "will report Floating_Point_Equality if floating-point columns are used as key" <|
|
||||||
lookup = table_builder [["X", [1.0, 2.0, 3.0]], ["Y", ["A", "B", "C"]]]
|
lookup = table_builder [["X", [1.0, 2.0, 3.0]], ["Y", ["A", "B", "C"]]]
|
||||||
my_table = table_builder [["X", [2.0, 3.0, 2.0, 3.0]], ["Y", ["Z", "ZZ", "ZZZ", "ZZZZ"]], ["Z", [10, 20, 30, 40]]]
|
my_table = table_builder [["X", [2.0, 3.0, 2.0, 3.0]], ["Y", ["Z", "ZZ", "ZZZ", "ZZZZ"]], ["Z", [10, 20, 30, 40]]]
|
||||||
@ -224,6 +308,17 @@ spec setup =
|
|||||||
m2.at "Y" . to_vector . should_equal ["B", Nothing, Nothing, Nothing]
|
m2.at "Y" . to_vector . should_equal ["B", Nothing, Nothing, Nothing]
|
||||||
m2.at "Z" . to_vector . should_equal [10, 20, 30, 40]
|
m2.at "Z" . to_vector . should_equal [10, 20, 30, 40]
|
||||||
|
|
||||||
|
# But NULLs in source are ok, but mean such rows will never match:
|
||||||
|
r3 = my_table2.lookup_and_replace lookup2 key_columns="X" allow_unmatched_rows=False
|
||||||
|
r3.should_fail_with Unmatched_Rows_In_Lookup
|
||||||
|
|
||||||
|
Test.specify "will not allow providing no key_columns" <|
|
||||||
|
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
|
||||||
|
my_table = table_builder [["X", [2, 1]], ["Z", [10, 20]]]
|
||||||
|
|
||||||
|
r2 = my_table.lookup_and_replace lookup key_columns=[]
|
||||||
|
r2.should_fail_with Illegal_Argument
|
||||||
|
|
||||||
if setup.is_database.not then Test.specify "(in-memory only) will preserve the order of rows from the original table" <|
|
if setup.is_database.not then Test.specify "(in-memory only) will preserve the order of rows from the original table" <|
|
||||||
lookup = table_builder [["Y", [1, 0]], ["V", ["TRUE", "FALSE"]]]
|
lookup = table_builder [["Y", [1, 0]], ["V", ["TRUE", "FALSE"]]]
|
||||||
xs = 0.up_to 50 . to_vector
|
xs = 0.up_to 50 . to_vector
|
||||||
@ -237,3 +332,106 @@ spec setup =
|
|||||||
t2.at "Y" . to_vector . should_equal ys
|
t2.at "Y" . to_vector . should_equal ys
|
||||||
vs = xs.map x-> if (x%2) == 1 then "TRUE" else "FALSE"
|
vs = xs.map x-> if (x%2) == 1 then "TRUE" else "FALSE"
|
||||||
t2.at "V" . to_vector . should_equal vs
|
t2.at "V" . to_vector . should_equal vs
|
||||||
|
|
||||||
|
if setup.is_database then Test.specify "(database-only) will fail if pre-checked invariants get invalidated between the query is constructed and then materialized" <|
|
||||||
|
Test.with_clue "(lookup is unique check) " <|
|
||||||
|
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
|
||||||
|
table = table_builder [["X", [1, 2, 2]], ["Y", ["Z", "ZZ", "ZZZ"]], ["Z", [10, 20, 30]]]
|
||||||
|
|
||||||
|
r1 = table.lookup_and_replace lookup key_columns="X"
|
||||||
|
# Immediately, the query is all good.
|
||||||
|
Problems.assume_no_problems r1
|
||||||
|
|
||||||
|
m1 = r1 |> materialize |> _.order_by "Z"
|
||||||
|
m1.at "X" . to_vector . should_equal [1, 2, 2]
|
||||||
|
m1.at "Y" . to_vector . should_equal ["A", "B", "B"]
|
||||||
|
m1.at "Z" . to_vector . should_equal [10, 20, 30]
|
||||||
|
|
||||||
|
# Now, we change the table, so that the lookup is no longer unique - even if the values are the same - we care about row uniqueness.
|
||||||
|
Problems.assume_no_problems <|
|
||||||
|
lookup.update_rows (Table.new [["X", [2]], ["Y", ["B"]]]) key_columns=[] update_action=Update_Action.Insert
|
||||||
|
lookup.filter "X" (Filter_Condition.Equal to=2) . row_count . should_equal 2
|
||||||
|
|
||||||
|
# Now, the query is no longer valid:
|
||||||
|
m2 = r1 |> materialize
|
||||||
|
m2.should_fail_with Invariant_Violation
|
||||||
|
m2.catch.to_display_text . should_contain "database has been modified"
|
||||||
|
m2.catch.to_display_text . should_contain "re-run the workflow"
|
||||||
|
|
||||||
|
## Now, even if we drop the lookup-related columns, we still should get an error
|
||||||
|
(otherwise we would risk duplicating records due to the non-uniqueness!)
|
||||||
|
r3 = r1.select_columns ["Z"]
|
||||||
|
Problems.assume_no_problems r3
|
||||||
|
|
||||||
|
m3 = r3 |> materialize
|
||||||
|
## Alternatively, theoretically, this query could succeed
|
||||||
|
(knowing that it no longer depends on the lookup at all).
|
||||||
|
But if it does so, we need to guarantee that we do not get duplicated rows in the result:
|
||||||
|
|
||||||
|
r3.row_count . should_equal 3
|
||||||
|
r3.at "Z" . to_vector . length . should_equal 3
|
||||||
|
r3.at "Z" . to_vector . should_contain_the_same_elements_as [10, 20, 30]
|
||||||
|
m3.should_fail_with Invariant_Violation
|
||||||
|
r3.at "Z" . to_vector . should_fail_with Invariant_Violation
|
||||||
|
|
||||||
|
Test.with_clue "(no unmatched rows check - added a row in source) " <|
|
||||||
|
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
|
||||||
|
table = table_builder [["X", [1, 2, 2]], ["Y", ["Z", "ZZ", "ZZZ"]], ["Z", [10, 20, 30]]]
|
||||||
|
|
||||||
|
r1 = table.lookup_and_replace lookup key_columns="X" allow_unmatched_rows=False
|
||||||
|
# Immediately, the query is all good.
|
||||||
|
Problems.assume_no_problems r1
|
||||||
|
|
||||||
|
m1 = r1 |> materialize |> _.order_by "Z"
|
||||||
|
m1.at "X" . to_vector . should_equal [1, 2, 2]
|
||||||
|
m1.at "Y" . to_vector . should_equal ["A", "B", "B"]
|
||||||
|
m1.at "Z" . to_vector . should_equal [10, 20, 30]
|
||||||
|
|
||||||
|
# Now, we change the source table, so that there are unmatched rows:
|
||||||
|
Problems.assume_no_problems <|
|
||||||
|
table.update_rows (Table.new [["X", [33]], ["Y", ["F"]], ["Z", [40]]]) key_columns=[] update_action=Update_Action.Insert
|
||||||
|
table.row_count . should_equal 4
|
||||||
|
|
||||||
|
# Now, the query is no longer valid:
|
||||||
|
m2 = r1 |> materialize
|
||||||
|
m2.should_fail_with Invariant_Violation
|
||||||
|
|
||||||
|
Test.with_clue "(no unmatched rows check - removed a row in lookup) " <|
|
||||||
|
lookup = table_builder [["X", [1, 2]], ["Y", ["A", "B"]]]
|
||||||
|
table = table_builder [["X", [1, 2, 2]], ["Y", ["Z", "ZZ", "ZZZ"]], ["Z", [10, 20, 30]]]
|
||||||
|
|
||||||
|
r1 = table.lookup_and_replace lookup key_columns="X" allow_unmatched_rows=False
|
||||||
|
# Immediately, the query is all good.
|
||||||
|
Problems.assume_no_problems r1
|
||||||
|
|
||||||
|
m1 = r1 |> materialize |> _.order_by "Z"
|
||||||
|
m1.at "X" . to_vector . should_equal [1, 2, 2]
|
||||||
|
m1.at "Y" . to_vector . should_equal ["A", "B", "B"]
|
||||||
|
m1.at "Z" . to_vector . should_equal [10, 20, 30]
|
||||||
|
|
||||||
|
# Now, we change the lookup table, so that there are unmatched rows:
|
||||||
|
Problems.assume_no_problems <|
|
||||||
|
lookup.delete_rows (Table.new [["X", [1]]]) key_columns=["X"]
|
||||||
|
lookup.row_count . should_equal 1
|
||||||
|
|
||||||
|
# Now, the query is no longer valid:
|
||||||
|
m2 = r1 |> materialize
|
||||||
|
m2.should_fail_with Invariant_Violation
|
||||||
|
|
||||||
|
# This does not seem useful really, but there is no reason to disallow it, so we should ensure it does not crash.
|
||||||
|
Test.specify "(edge-case) should allow lookup with itself" <|
|
||||||
|
table = table_builder [["X", [1, 2, 3]], ["Y", ["A", "B", "C"]]]
|
||||||
|
t2 = table.lookup_and_replace table key_columns="X"
|
||||||
|
t2.column_names . should_equal ["X", "Y"]
|
||||||
|
|
||||||
|
m2 = t2 |> materialize |> _.order_by "X"
|
||||||
|
m2.at "X" . to_vector . should_equal [1, 2, 3]
|
||||||
|
m2.at "Y" . to_vector . should_equal ["A", "B", "C"]
|
||||||
|
|
||||||
|
Test.specify "should gracefully handle tables from different backends" <|
|
||||||
|
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||||
|
alternative_connection = Database.connect (SQLite In_Memory)
|
||||||
|
t0 = (Table.new [["A", [3, 2, 1]], ["B", ["x", "y", "z"]]]).select_into_database_table alternative_connection "T0" temporary=True
|
||||||
|
|
||||||
|
r1 = t1.lookup_and_replace t0 key_columns="A"
|
||||||
|
r1.should_fail_with Illegal_Argument
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
|
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||||
|
|
||||||
import Standard.Table.Data.Type.Value_Type.Bits
|
import Standard.Table.Data.Type.Value_Type.Bits
|
||||||
from Standard.Table import all
|
from Standard.Table import all
|
||||||
from Standard.Table.Errors import all
|
from Standard.Table.Errors import all
|
||||||
|
|
||||||
from Standard.Database.Errors import Unsupported_Database_Operation
|
from Standard.Database import all
|
||||||
|
from Standard.Database.Errors import Unsupported_Database_Operation, Integrity_Error
|
||||||
|
|
||||||
from Standard.Test import Test, Problems
|
from Standard.Test import Test, Problems
|
||||||
import Standard.Test.Extensions
|
import Standard.Test.Extensions
|
||||||
@ -393,3 +395,12 @@ spec setup =
|
|||||||
# And this should report Column_Type_Mismatch as the more important error too.
|
# And this should report Column_Type_Mismatch as the more important error too.
|
||||||
#t1.union t2 allow_type_widening=False on_problems=Problem_Behavior.Report_Error . should_fail_with Column_Type_Mismatch
|
#t1.union t2 allow_type_widening=False on_problems=Problem_Behavior.Report_Error . should_fail_with Column_Type_Mismatch
|
||||||
t1.union t2 allow_type_widening=False on_problems=Problem_Behavior.Report_Error . should_fail_with No_Output_Columns
|
t1.union t2 allow_type_widening=False on_problems=Problem_Behavior.Report_Error . should_fail_with No_Output_Columns
|
||||||
|
|
||||||
|
Test.specify "should gracefully handle tables from different backends" <|
|
||||||
|
t1 = table_builder [["A", [1, 2, 3]], ["B", ["a", "b", "c"]]]
|
||||||
|
|
||||||
|
alternative_connection = Database.connect (SQLite In_Memory)
|
||||||
|
t0 = (Table.new [["A", [1, 2, 4]], ["B", ["10", "20", "30"]]]).select_into_database_table alternative_connection "T0" temporary=True
|
||||||
|
|
||||||
|
r1 = t1.union t0
|
||||||
|
r1.should_fail_with Illegal_Argument
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
|
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||||
|
|
||||||
from Standard.Table import all hiding Table
|
from Standard.Table import all
|
||||||
from Standard.Table.Errors import all
|
from Standard.Table.Errors import all
|
||||||
|
|
||||||
from Standard.Database.Errors import Unsupported_Database_Operation
|
from Standard.Database import all
|
||||||
|
from Standard.Database.Errors import Unsupported_Database_Operation, Integrity_Error
|
||||||
|
|
||||||
from Standard.Test import Test, Problems
|
from Standard.Test import Test, Problems
|
||||||
import Standard.Test.Extensions
|
import Standard.Test.Extensions
|
||||||
@ -236,3 +238,12 @@ spec setup =
|
|||||||
padded.at "times" . value_type . should_equal Value_Type.Time
|
padded.at "times" . value_type . should_equal Value_Type.Time
|
||||||
padded.at "datetimes" . value_type . should_equal Value_Type.Date_Time
|
padded.at "datetimes" . value_type . should_equal Value_Type.Date_Time
|
||||||
padded.at "mixed" . value_type . should_equal Value_Type.Mixed
|
padded.at "mixed" . value_type . should_equal Value_Type.Mixed
|
||||||
|
|
||||||
|
Test.specify "should gracefully handle tables from different backends" <|
|
||||||
|
t1 = table_builder [["X", [1, 2, 3]], ["Y", [4, 5, 6]]]
|
||||||
|
|
||||||
|
alternative_connection = Database.connect (SQLite In_Memory)
|
||||||
|
t0 = (Table.new [["W", [1, 2, 4]]]).select_into_database_table alternative_connection "T0" temporary=True
|
||||||
|
|
||||||
|
r1 = t1.zip t0
|
||||||
|
r1.should_fail_with Illegal_Argument
|
||||||
|
@ -135,6 +135,54 @@ run_tests prefix connection upload =
|
|||||||
r3 = connection.query "SELECT * FROM ........"
|
r3 = connection.query "SELECT * FROM ........"
|
||||||
r3.should_fail_with SQL_Error
|
r3.should_fail_with SQL_Error
|
||||||
|
|
||||||
|
Test.specify "will fail if the table is modified and a column gets removed" <|
|
||||||
|
name = Name_Generator.random_name "removing-column"
|
||||||
|
Problems.assume_no_problems <|
|
||||||
|
(Table.new [["a", [1, 2, 3]], ["b", [4, 5, 6]]]).select_into_database_table connection name temporary=True
|
||||||
|
|
||||||
|
t1 = connection.query name
|
||||||
|
m1 = t1.read
|
||||||
|
Problems.assume_no_problems m1
|
||||||
|
m1.at "a" . to_vector . should_equal [1, 2, 3]
|
||||||
|
m1.at "b" . to_vector . should_equal [4, 5, 6]
|
||||||
|
|
||||||
|
Problems.assume_no_problems <| connection.drop_table name
|
||||||
|
Problems.assume_no_problems <|
|
||||||
|
(Table.new [["a", [100, 200]]]).select_into_database_table connection name temporary=True
|
||||||
|
|
||||||
|
# Reading a column that was kept will work OK
|
||||||
|
t1.at "a" . to_vector . should_equal [100, 200]
|
||||||
|
|
||||||
|
# But reading the whole table will fail on the missing column:
|
||||||
|
m2 = t1.read
|
||||||
|
m2.should_fail_with SQL_Error
|
||||||
|
|
||||||
|
Test.specify "will not fail if the table is modified and a column gets added" <|
|
||||||
|
name = Name_Generator.random_name "adding-column"
|
||||||
|
Problems.assume_no_problems <|
|
||||||
|
(Table.new [["a", [1, 2, 3]], ["b", [4, 5, 6]]]).select_into_database_table connection name temporary=True
|
||||||
|
|
||||||
|
t1 = connection.query name
|
||||||
|
m1 = t1.read
|
||||||
|
Problems.assume_no_problems m1
|
||||||
|
m1.at "a" . to_vector . should_equal [1, 2, 3]
|
||||||
|
m1.at "b" . to_vector . should_equal [4, 5, 6]
|
||||||
|
|
||||||
|
Problems.assume_no_problems <| connection.drop_table name
|
||||||
|
Problems.assume_no_problems <|
|
||||||
|
(Table.new [["a", [100, 200]], ["b", [300, 400]], ["c", [500, 600]]]).select_into_database_table connection name temporary=True
|
||||||
|
|
||||||
|
m2 = t1.read
|
||||||
|
Problems.assume_no_problems m2
|
||||||
|
m2.column_names . should_equal ["a", "b"]
|
||||||
|
m2.at "a" . to_vector . should_equal [100, 200]
|
||||||
|
m2.at "b" . to_vector . should_equal [300, 400]
|
||||||
|
|
||||||
|
t1.at "c" . should_fail_with No_Such_Column
|
||||||
|
|
||||||
|
t2 = connection.query name
|
||||||
|
t2.column_names . should_equal ["a", "b", "c"]
|
||||||
|
|
||||||
Test.group prefix+"Masking Tables" <|
|
Test.group prefix+"Masking Tables" <|
|
||||||
Test.specify "should allow to select rows from a table or column based on an expression" <|
|
Test.specify "should allow to select rows from a table or column based on an expression" <|
|
||||||
t2 = t1.filter (t1.at "a" == 1)
|
t2 = t1.filter (t1.at "a" == 1)
|
||||||
|
Loading…
Reference in New Issue
Block a user