Infer SQLite types locally (#6381)

Closes #6208
This commit is contained in:
Radosław Waśko 2023-04-24 12:55:12 +02:00 committed by GitHub
parent 8b62deba7d
commit f3873f9768
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 111 additions and 86 deletions

View File

@ -121,21 +121,24 @@ type Column
is always passed as the first argument).
- new_name: The name of the resulting column.
make_op self op_kind operands new_name =
type_mapping = self.connection.dialect.get_type_mapping
prepare_operand operand = case operand of
other_column : Column ->
if Helpers.check_integrity self other_column then other_column.expression else
Error.throw <| Unsupported_Database_Operation.Error "Cannot use columns coming from different contexts in one expression without a join."
constant ->
SQL_Expression.Constant constant
checked_support = if self.connection.dialect.is_supported op_kind then True else
Error.throw (Unsupported_Database_Operation.Error "The operation "+op_kind+" is not supported by this backend.")
checked_support.if_not_error <|
type_mapping = self.connection.dialect.get_type_mapping
prepare_operand operand = case operand of
other_column : Column ->
if Helpers.check_integrity self other_column then other_column.expression else
Error.throw <| Unsupported_Database_Operation.Error "Cannot use columns coming from different contexts in one expression without a join."
constant ->
SQL_Expression.Constant constant
expressions = operands.map prepare_operand
new_expr = SQL_Expression.Operation op_kind ([self.expression] + expressions)
expressions = operands.map prepare_operand
new_expr = SQL_Expression.Operation op_kind ([self.expression] + expressions)
infer_from_database_callback expression =
SQL_Type_Reference.new self.connection self.context expression
new_type_ref = type_mapping.infer_return_type infer_from_database_callback op_kind [self]+operands new_expr
Column.Value new_name self.connection new_type_ref new_expr self.context
infer_from_database_callback expression =
SQL_Type_Reference.new self.connection self.context expression
new_type_ref = type_mapping.infer_return_type infer_from_database_callback op_kind [self]+operands new_expr
Column.Value new_name self.connection new_type_ref new_expr self.context
## PRIVATE

View File

@ -179,6 +179,13 @@ type Dialect
_ = aggregate
Unimplemented.throw "This is an interface only."
## PRIVATE
Checks if an operation is supported by the dialect.
is_supported : Text -> Boolean
is_supported self operation =
_ = operation
Unimplemented.throw "This is an interface only."
## PRIVATE
The dialect of SQLite databases.

View File

@ -140,8 +140,7 @@ type SQLite_Dialect
## PRIVATE
make_cast : Internal_Column -> SQL_Type -> (SQL_Expression -> SQL_Type_Reference) -> Internal_Column
make_cast self column target_type infer_result_type_from_database_callback =
_ = infer_result_type_from_database_callback
make_cast self column target_type _ =
mapping = self.get_type_mapping
sql_type_text = mapping.sql_type_to_text target_type
new_expression = SQL_Expression.Operation "CAST" [column.expression, SQL_Expression.Literal sql_type_text]
@ -172,8 +171,8 @@ type SQLite_Dialect
## PRIVATE
prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement
prepare_fetch_types_query self expression context =
minimized_context = context.set_limit 1
self.generate_sql (Query.Select [["typed_column", expression]] minimized_context)
_ = [expression, context]
Panic.throw (Illegal_State.Error "Type inference by asking the Database for the expected types is not supported in SQLite since it tended to give wrong results. This should have never been called - if it was - that is a bug in the Database library.")
## PRIVATE
check_aggregate_support : Aggregate_Column -> Boolean ! Unsupported_Database_Operation

View File

@ -85,59 +85,15 @@ type SQLite_Type_Mapping
sql_type_to_text sql_type = SQL_Type_Mapping.default_sql_type_to_text sql_type
## PRIVATE
The SQLite type mapping takes special measures to keep boolean columns
boolean even if the Database will say that they are numeric.
To do so, any operation that returns booleans will override its return
type to boolean, and operations that return the same type as inputs will
also ensure to override to the boolean type if the input was boolean. In
particular, if the operations accept multiple arguments, they will
override the return type to boolean if all the input arguments had
boolean type.
The SQL type mapping uses the same logic as the in-memory backend, just
simplified to only the types that it supports. It does not rely on the
Database to tell the expected types, because it has been found to be
unreliable in more complex expressions.
infer_return_type : (SQL_Expression -> SQL_Type_Reference) -> Text -> Vector -> SQL_Expression -> SQL_Type_Reference
infer_return_type infer_from_database_callback op_name arguments expression =
return value_type =
sql_type = SQLite_Type_Mapping.value_type_to_sql value_type Problem_Behavior.Ignore
SQL_Type_Reference.from_constant sql_type
infer_default_type =
infer_from_database_callback expression
find_type arg = case arg of
column : Column -> column.value_type
internal_column : Internal_Column ->
SQLite_Type_Mapping.sql_type_to_value_type internal_column.sql_type_reference.get
enso_value -> Enso_Types.most_specific_value_type enso_value use_smallest=True
reconcile_types types =
result = Value_Type_Helpers.find_common_type types strict=False
# We remap Mixed to Char, to be consistent with our main mapping.
if result == Value_Type.Mixed then default_text else result
## We actually re-use the logic from the in-memory backend, since the
SQLite types are a small subset of that and the logic for SQLite
essentially forms a proper sub-algebra (in the universal algebra
terms).
find_a_common_type _ =
inputs_types = arguments.map find_type
return (reconcile_types inputs_types)
handle_iif _ =
if arguments.length != 3 then
Panic.throw (Illegal_State.Error "Impossible: IIF must have 3 arguments. This is a bug in the Database library.")
inputs_types = arguments.drop 1 . map find_type
return (reconcile_types inputs_types)
always_boolean_ops = ["==", "!=", "equals_ignore_case", ">=", "<=", "<", ">", "BETWEEN", "AND", "OR", "NOT", "IS_NULL", "IS_NAN", "IS_EMPTY", "LIKE", "IS_IN", "starts_with", "ends_with", "contains"]
always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED"]
merge_input_types_ops = ["ROW_MAX", "ROW_MIN", "MAX", "MIN", "FIRST", "LAST", "FIRST_NOT_NULL", "LAST_NOT_NULL", "FILL_NULL"]
others = [["IIF", handle_iif]]
mapping = Map.from_vector <|
v1 = always_boolean_ops.map [_, const (return Value_Type.Boolean)]
v2 = merge_input_types_ops.map [_, find_a_common_type]
v3 = always_text_ops.map [_, const (return default_text)]
v1 + v2 + v3 + others
handler = mapping.get op_name (_ -> infer_default_type)
handler Nothing
infer_return_type _ op_name arguments _ =
handler = operations_map.get op_name (_ -> Error.throw (Illegal_State.Error "Impossible: Unknown operation "+op_name+". This is a bug in the Database library."))
sql_type = handler arguments
SQL_Type_Reference.from_constant sql_type
## PRIVATE
SQLite `ResultSet` metadata may differ row-by-row, so we cannot rely on
@ -159,15 +115,58 @@ type SQLite_Type_Mapping
For types like dates - we map them to unsupported type, because date
operations in SQLite are currently not supported due to their weird storage.
simple_types_map = Map.from_vector <|
ints = [Types.TINYINT, Types.SMALLINT, Types.BIGINT, Types.INTEGER] . map x-> [x, Value_Type.Integer Bits.Bits_64]
floats = [Types.DOUBLE, Types.REAL, Types.FLOAT] . map x-> [x, Value_Type.Float Bits.Bits_64]
ints = [Types.TINYINT, Types.SMALLINT, Types.BIGINT, Types.INTEGER] . map x-> [x, default_integer]
floats = [Types.DOUBLE, Types.REAL, Types.FLOAT] . map x-> [x, default_float]
# We treat numeric as a float, since that is what really sits in SQLite under the hood.
numerics = [Types.DECIMAL, Types.NUMERIC] . map x-> [x, Value_Type.Float Bits.Bits_64]
numerics = [Types.DECIMAL, Types.NUMERIC] . map x-> [x, default_float]
strings = [Types.CHAR, Types.VARCHAR] . map x-> [x, default_text]
blobs = [Types.BINARY, Types.BLOB, Types.CLOB] . map x-> [x, Value_Type.Binary]
special_types = [[Types.BOOLEAN, Value_Type.Boolean]]
ints + floats + numerics + strings + blobs + special_types
## PRIVATE
Maps operation names to functions that infer its result type.
operations_map : Map Text (Vector -> SQL_Type)
operations_map =
find_type arg = case arg of
column : Column -> column.value_type
internal_column : Internal_Column ->
SQLite_Type_Mapping.sql_type_to_value_type internal_column.sql_type_reference.get
enso_value -> Enso_Types.most_specific_value_type enso_value use_smallest=True
## We actually re-use the logic from the in-memory backend, since the
SQLite types essentially implement a very simple subset of our types.
find_a_common_type arguments =
types = arguments.map find_type
unified = Value_Type_Helpers.find_common_type types strict=False
# We remap Mixed to Char, to be consistent with our main mapping.
result = if unified == Value_Type.Mixed then default_text else unified
SQLite_Type_Mapping.value_type_to_sql result Problem_Behavior.Ignore
handle_iif arguments =
if arguments.length != 3 then
Panic.throw (Illegal_State.Error "Impossible: IIF must have 3 arguments. This is a bug in the Database library.")
find_a_common_type (arguments.drop 1)
handle_cast _ =
Panic.throw (Illegal_State.Error "Cast relies on its own type inference logic, so this code should never be reached. This is a bug in the Database library.")
always_boolean_ops = ["==", "!=", "equals_ignore_case", ">=", "<=", "<", ">", "BETWEEN", "AND", "OR", "NOT", "IS_NULL", "IS_EMPTY", "LIKE", "IS_IN", "IS_IN_COLUMN", "starts_with", "ends_with", "contains", "BOOL_OR"]
always_floating_ops = ["/", "mod", "AVG", "STDDEV_POP", "STDDEV_SAMP"]
always_text_ops = ["ADD_TEXT", "CONCAT", "CONCAT_QUOTE_IF_NEEDED", "MAKE_CASE_SENSITIVE", "FOLD_CASE", "TRIM", "LTRIM", "RTRIM"]
always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS"]
arithmetic_ops = ["ADD_NUMBER", "-", "*", "^", "%", "SUM"]
merge_input_types_ops = ["ROW_MAX", "ROW_MIN", "MAX", "MIN", "FILL_NULL", "COALESCE"]
others = [["IIF", handle_iif], ["CAST", handle_cast]]
Map.from_vector <|
v1 = always_boolean_ops.map [_, const SQLite_Types.boolean]
v2 = always_floating_ops.map [_, const SQLite_Types.real]
v3 = always_integer_ops.map [_, const SQLite_Types.integer]
v4 = always_text_ops.map [_, const SQLite_Types.text]
v5 = arithmetic_ops.map [_, find_a_common_type]
v6 = merge_input_types_ops.map [_, find_a_common_type]
v1 + v2 + v3 + v4 + v5 + v6 + others
## PRIVATE
type SQLite_Types
## PRIVATE
@ -191,3 +190,9 @@ type SQLite_Types
## PRIVATE
default_text = Value_Type.Char size=Nothing variable_length=True
## PRIVATE
default_float = Value_Type.Float Bits.Bits_64
## PRIVATE
default_integer = Value_Type.Integer Bits.Bits_64

View File

@ -64,8 +64,7 @@ spec setup =
c4.value_type.is_integer . should_be_true
c4.to_vector . should_equal [1001, 1000, 1001]
pending_sqlite_types = if prefix.contains "SQLite" then "TODO: perform SQLite type inference locally - #6208"
Test.specify "should not lose the type after further operations were performed on the result, even if the first row is NULL" pending=pending_sqlite_types <|
Test.specify "should not lose the type after further operations were performed on the result, even if the first row is NULL" <|
t = table_builder [["X", [Nothing, 1, 2, 3000]], ["Y", [Nothing, True, False, True]]]
c1 = t.at "X" . cast Value_Type.Char
c2 = t.at "Y" . cast Value_Type.Integer

View File

@ -5,7 +5,7 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
from Standard.Table import Value_Type
from Standard.Table.Errors import all
from Standard.Database.Errors import SQL_Error
from Standard.Database.Errors import all
from Standard.Test import Test, Problems
import Standard.Test.Extensions
@ -328,11 +328,15 @@ spec setup =
(y ^ "a").should_fail_with Invalid_Value_Type
(y ^ 42).should_fail_with Invalid_Value_Type
if setup.test_selection.is_nan_and_nothing_distinct then
Test.specify "should support is_nan" <|
case setup.test_selection.is_nan_and_nothing_distinct of
True -> Test.specify "should support is_nan" <|
t = table_builder [["X", [1.5, 2, Number.nan]], ["Y", [1, 2, 3]]]
t.at "X" . is_nan . to_vector . should_equal [False, False, True]
t.at "Y" . is_nan . should_fail_with Invalid_Value_Type
False -> Test.specify "should report that is_nan is not supported" <|
t = table_builder [["X", [1.5]]]
t.at "X" . is_nan . should_fail_with Unsupported_Database_Operation
Test.specify "should support is_blank" <|
t = table_builder [["X", [1.5, 2, Number.nan, Nothing]], ["Y", [1, Nothing, 3, 4]]]
t.at "X" . is_blank treat_nans_as_blank=True . to_vector . should_equal [False, False, True, True]

View File

@ -4,6 +4,8 @@ import Standard.Table.Data.Type.Value_Type.Bits
from Standard.Table import Aggregate_Column, Value_Type
from Standard.Table.Errors import Invalid_Value_Type, Inexact_Type_Coercion
import Standard.Database.Data.Dialect
import Standard.Database.Internal.SQLite.SQLite_Type_Mapping
from Standard.Database import Database, SQLite, In_Memory, SQL_Query
from Standard.Test import Problems, Test, Test_Suite
@ -57,26 +59,25 @@ spec =
t = make_table "foo" [["a", "int"], ["b", "text"], ["c", "boolean"], ["d", "double precision"]]
t.compute 'starts_with([b], "1")' . value_type . should_equal Value_Type.Boolean
t.compute '[a] * [d]' . value_type . should_equal (Value_Type.Float Bits.Bits_64)
t.compute '[a] + 100' . value_type . should_equal (Value_Type.Float Bits.Bits_64)
t.compute '[a] + 100.0' . value_type . should_equal (Value_Type.Float Bits.Bits_64)
t.compute '[a] * [d]' . value_type . should_equal Value_Type.Float
t.compute '[a] + 100' . value_type . should_equal Value_Type.Integer
t.compute '[a] + 100.0' . value_type . should_equal Value_Type.Float
t.compute '[c] || not [c]' . value_type . should_equal Value_Type.Boolean
t.compute '[b] + "_suf"' . value_type . should_equal Value_Type.Char
t.compute 'fill_nothing([c], false)' . value_type . should_equal Value_Type.Boolean
t.compute 'fill_empty([b], "<NA>")' . value_type . should_equal Value_Type.Char
t.compute 'is_blank([b])' . value_type . should_equal Value_Type.Boolean
t.compute 'is_empty([b])' . value_type . should_equal Value_Type.Boolean
t.compute 'is_nan([d])' . value_type . should_equal Value_Type.Boolean
t.compute 'is_nothing([a])' . value_type . should_equal Value_Type.Boolean
t2 = t.aggregate [Aggregate_Column.Group_By "b", Aggregate_Column.Sum "a", Aggregate_Column.Maximum "a", Aggregate_Column.Minimum "d", Aggregate_Column.Count_Not_Nothing "c", Aggregate_Column.Concatenate "b", Aggregate_Column.Count]
t2.at "b" . value_type . should_equal Value_Type.Char
t2.at "Sum a" . value_type . should_equal (Value_Type.Float Bits.Bits_64)
t2.at "Maximum a" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
t2.at "Minimum d" . value_type . should_equal (Value_Type.Float Bits.Bits_64)
t2.at "Count Not Nothing c" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
t2.at "Sum a" . value_type . should_equal Value_Type.Integer
t2.at "Maximum a" . value_type . should_equal Value_Type.Integer
t2.at "Minimum d" . value_type . should_equal Value_Type.Float
t2.at "Count Not Nothing c" . value_type . should_equal Value_Type.Integer
t2.at "Concatenate b" . value_type . should_equal Value_Type.Char
t2.at "Count" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
t2.at "Count" . value_type . should_equal Value_Type.Integer
# First is not currently implemented in SQLite
# t2.at "First c" . value_type . should_equal Value_Type.Boolean
@ -91,4 +92,11 @@ spec =
t2.at "b" . value_type . should_equal Value_Type.Integer
Problems.expect_warning Inexact_Type_Coercion t2
Test.specify "should be able to infer types for all supported operations" <|
dialect = Dialect.sqlite
internal_mapping = dialect.internal_generator_dialect.operation_map
operation_type_mapping = SQLite_Type_Mapping.operations_map
operation_type_mapping.keys.sort . should_equal internal_mapping.keys.sort
main = Test_Suite.run_main spec