From a9a464af37427796821c24b6af46670cdddea98a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Sat, 20 May 2023 00:11:23 +0200 Subject: [PATCH] Implement simple variants of `parse` for the Database backend (#6731) Implements the simplest `parse` scenarios for the Database backend. Before #6711 these could have been done by `cast`, but in #6711 the APIs were unified to only allow casting to the same set of types in both in-memory and Database. Converting Text to other types is supposed to be done by `parse` and not `cast`, so the ability to use `cast` for rudimentary parsing is removed in the Database backend to make it consistent with in-memory. But now it is lacking any, even simplest, Text->Int/Text->Date support. To alleviate that, the simple scenarios for `parse` are implemented (no support for format customization yet, will boil down to a cast under the hood). --- .../Database/0.0.0-dev/src/Data/Column.enso | 77 +++++++++++++++---- .../Database/0.0.0-dev/src/Data/Table.enso | 76 +++++++++++++++--- .../src/Internal/Base_Generator.enso | 18 ++++- .../src/Internal/SQLite/SQLite_Dialect.enso | 20 ++++- .../Internal/SQLite/SQLite_Type_Mapping.enso | 24 +++++- .../Table/0.0.0-dev/src/Data/Column.enso | 23 ++++-- .../Table/0.0.0-dev/src/Data/Table.enso | 35 ++++++--- .../src/Internal/Widget_Helpers.enso | 8 +- .../Standard/Test/0.0.0-dev/src/Problems.enso | 2 +- .../Conversion_Spec.enso | 37 ++++++++- .../Date_Time_Spec.enso | 4 +- .../Types/SQLite_Type_Mapping_Spec.enso | 12 +-- 12 files changed, 272 insertions(+), 64 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index 4f354a115e..38bf8b7337 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -990,12 +990,55 @@ type Column Error.throw (Illegal_State.Error "The dialect "+self.connection.dialect.name+" does not support a boolean type. The implementation of `is_in` should be revised to account for this. This is an internal issue with the Database library.") Column.Value new_name self.connection new_type_ref new_expr self.context - ## Parsing values is not supported in database columns. - @type Widget_Helpers.parse_type_selector - parse : Value_Type | Auto -> Text | Data_Formatter -> Problem_Behavior -> Column - parse self type=Auto format=Data_Formatter.Value on_problems=Report_Warning = - _ = [type, format, on_problems] - Error.throw <| Unsupported_Database_Operation.Error "`Column.parse` is not implemented yet for the Database backends." + ## Parses a text column into values. + + In the Database backends, the default formatting settings of the + particular database are used. + + In the in-memory backend, the default parser options only parse values + where the process is reversible (e.g., 0123 would not be converted to an + integer as there is a leading 0). However, settings in the + `Data_Formatter` can control this. + + Arguments: + - type: The type to parse the column to. Defaults to `Auto` meaning that + the type will be inferred from the data. In the Database backends, + `Auto` is not supported, so a specific type must be selected. + - format: The formatting settings to use when parsing the column. + For `Date`, `Time_Of_Day` and `Date_Time`, a Java date time style + can be used. For `Boolean`, it should be two values that represent true + and false, separated by a `|`. Alternatively, a `Data_Formatter` can be + passed to provide complete customisation of the formatting. If + `Nothing` is provided, the default formatting settings of the backend + will be used. `Nothing` is currently the only setting accepted by the + Database backends. + - on_problems: Specifies how to handle if a problem occurs, raising as a + warning by default. + + ! Error Conditions + + - If the column is not a text column, an `Invalid_Value_Type` error is + raised. + - If some values in the column did not match the expected datatype + format, an `Invalid_Format` problem is reported. The problematic + cells are replaced with `Nothing`. + - If custom formatting settings were provided, but the database backend + does not support customization, an `Unsupported_Database_Operation` + error is reported. + @type (Widget_Helpers.parse_type_selector include_auto=False) + parse : Value_Type | Auto -> Text | Data_Formatter | Nothing -> Problem_Behavior -> Column + parse self type format=Nothing on_problems=Report_Warning = + check_parameters = + if type == Auto then Error.throw (Unsupported_Database_Operation.Error "The `Auto` parse type is not supported by the Database backend. Either pick a specific type or materialize the table to memory using `.read`.") else + case format of + Nothing -> Nothing + _ -> Error.throw (Unsupported_Database_Operation.Error "Custom formatting is not supported by the Database backend. Please set the format to `Nothing` to indicate that the default Database settings can be used, or if custom formatting is needed, materialize the table to memory using `.read` first.") + check_parameters.if_not_error <| + Value_Type.expect_text self <| + ## In the future we may have some specific logic, for example + allowing to support formatting settings. For now, the + Database parse just boils down to a simple CAST. + self.internal_do_cast type on_problems ## Formatting values is not supported in database columns. format : Text | Column -> Locale -> Column ! Illegal_Argument @@ -1044,14 +1087,20 @@ type Column cast : Value_Type -> Problem_Behavior -> Column ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure cast self value_type on_problems=Problem_Behavior.Report_Warning = check_cast_compatibility self.value_type value_type <| - dialect = self.connection.dialect - type_mapping = dialect.get_type_mapping - target_sql_type = type_mapping.value_type_to_sql value_type on_problems - target_sql_type.if_not_error <| - infer_from_database new_expression = - SQL_Type_Reference.new self.connection self.context new_expression - new_column = dialect.make_cast self.as_internal target_sql_type infer_from_database - Column.Value new_column.name self.connection new_column.sql_type_reference new_column.expression self.context + self.internal_do_cast value_type on_problems + + ## PRIVATE + Shares the core CAST logic between `cast` and `parse`. + internal_do_cast : Value_Type -> Problem_Behavior -> Column + internal_do_cast self value_type on_problems = + dialect = self.connection.dialect + type_mapping = dialect.get_type_mapping + target_sql_type = type_mapping.value_type_to_sql value_type on_problems + target_sql_type.if_not_error <| + infer_from_database new_expression = + SQL_Type_Reference.new self.connection self.context new_expression + new_column = dialect.make_cast self.as_internal target_sql_type infer_from_database + Column.Value new_column.name self.connection new_column.sql_type_reference new_column.expression self.context ## ALIAS Transform Column diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index bc1cd97025..38ca34c51c 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -1405,17 +1405,68 @@ type Table msg = "Cross tab of database tables is not supported, the table has to be materialized first with `read`." Error.throw (Unsupported_Database_Operation.Error msg) - ## Parsing values is not supported in database tables, the table has to be - loaded into memory first with `read`. - @type Widget_Helpers.parse_type_selector + ## Parses columns within a Table to a specific value type. + By default, it looks at all `Text` columns and attempts to deduce the + type (columns with other types are not affected). + + In the Database backends, the default formatting settings of the + particular database are used. + + In the in-memory backend, the default parser options only parse values + where the process is reversible (e.g., 0123 would not be converted to an + integer as there is a leading 0). However, settings in the + `Data_Formatter` can control this. + + Arguments: + - columns: The columns to parse. If not specified, all text columns + will be parsed. + - type: The type to parse the columns to. Defaults to `Auto` meaning that + the type will be inferred from the data. In the Database backends, + `Auto` is not supported, so a specific type must be selected. + - format: The formatting settings to use when parsing the columns. + For `Date`, `Time_Of_Day` and `Date_Time`, a Java date time style + can be used. For `Boolean`, it should be two values that represent true + and false, separated by a `|`. Alternatively, a `Data_Formatter` can be + passed to provide complete customisation of the formatting. If + `Nothing` is provided, the default formatting settings of the backend + will be used. `Nothing` is currently the only setting accepted by the + Database backends. + - error_on_missing_columns: if `True` (the default) raises an error if + any column is missing. Otherwise, reported as a problem. + - on_problems: Specifies how to handle if a problem occurs, raising as a + warning by default. + + ! Error Conditions + + - If a column in `columns` is not in the input table, a + `Missing_Input_Columns` is raised as an error or problem + following the `error_on_missing_columns` rules. + - If a column index is out of range, a `Column_Indexes_Out_Of_Range` is + raised as an error or problem following the + `error_on_missing_columns` rules. + - If a column selected for parsing is not a text column, an + `Invalid_Value_Type` error is raised. + - If no columns have been selected for parsing, + a `No_Input_Columns_Selected` error is raised. + - If custom formatting settings were provided, but the database backend + does not support customization, an `Unsupported_Database_Operation` + error is reported. + + > Example + Parse dates in a column. + + table.parse "birthday" Value_Type.Date + @type (Widget_Helpers.parse_type_selector include_auto=False) @columns Widget_Helpers.make_column_name_vector_selector - parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> Value_Type | Auto -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table - parse columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning = - ## Avoid unused arguments warning. We cannot rename arguments to `_`, - because we need to keep the API consistent with the in-memory table. - _ = [columns, type, format, error_on_missing_columns, on_problems] - msg = "Parsing values is not supported in database tables, the table has to be materialized first with `read`." - Error.throw (Unsupported_Database_Operation.Error msg) + parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table + parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type format=Nothing error_on_missing_columns=True on_problems=Report_Warning = + selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems + result = selected.fold self table-> column_to_parse-> + new_column = column_to_parse.parse type format on_problems + table.set new_column new_name=column_to_parse.name set_mode=Set_Mode.Update + ## The temporary variable for result is added due to the #6765 bug. + It should be removed once it is fixed. + result ## Splits a column of text into a set of new columns. The original column will be removed from the table. @@ -1572,9 +1623,12 @@ type Table cast : (Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector)) -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure cast self columns=[0] value_type error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning = selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems - selected.fold self table-> column_to_cast-> + result = selected.fold self table-> column_to_cast-> new_column = column_to_cast.cast value_type on_problems table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update + ## The temporary variable for result is added due to the #6765 bug. + It should be removed once it is fixed. + result ## ALIAS dropna ALIAS drop_missing_rows diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso index e51d1aec70..f020784232 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso @@ -175,7 +175,7 @@ base_dialect = fun = name -> [name, make_function name] arith = [["ADD_NUMBER", make_binary_op "+"], ["ADD_TEXT", make_binary_op "||"], bin "-", bin "*", bin "/", bin "%", ["mod", make_function "MOD"], ["^", make_function "POWER"]] - logic = [bin "AND", bin "OR", unary "NOT", ["IIF", make_iif]] + logic = [bin "AND", bin "OR", unary "NOT", ["IIF", make_iif], ["CASE", case_when]] eq = lift_binary_op "==" make_equals neq = lift_binary_op "!=" make_not_equals compare = [eq, neq, bin "<", bin ">", bin "<=", bin ">=", ["BETWEEN", make_between]] @@ -206,6 +206,22 @@ make_iif arguments = case arguments.length of _ -> Error.throw <| Illegal_State.Error ("Invalid amount of arguments for operation IIF") +## PRIVATE + For Internal use. + An IR expression for constructing SQL `CASE` expressions. +case_when : Vector Builder -> Builder +case_when arguments = + if arguments.length < 4 then Error.throw (Illegal_State.Error "CASE_WHEN needs at least 3 arguments.") else + fallback = arguments.last + cases = arguments.drop (Last 1) + if cases.length % 2 != 0 then Error.throw (Illegal_State.Error "CASE_WHEN expects an odd number of arguments (two arguments for each case and a fallback).") else + n = cases.length . div 2 + cases_exprs = 0.up_to n . map i-> + condition = cases.at 2*i + result = cases.at (2*i)+1 + Builder.code "WHEN " ++ condition ++ " THEN " ++ result + Builder.code "CASE " ++ Builder.join " " cases_exprs ++ " ELSE " ++ fallback ++ " END" + ## PRIVATE simple_cast = Base_Generator.lift_binary_op "CAST" a-> b-> Builder.code "CAST(" ++ a ++ " AS " ++ b ++ ")" diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso index 703e58b21e..6ac22bbd2b 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso @@ -1,6 +1,7 @@ from Standard.Base import all hiding First, Last import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Base.Errors.Illegal_State.Illegal_State +import Standard.Base.Runtime.Ref.Ref import Standard.Table.Data.Aggregate_Column.Aggregate_Column import Standard.Table.Internal.Naming_Helpers.Naming_Helpers @@ -367,7 +368,18 @@ mod_op = Base_Generator.lift_binary_op "mod" x-> y-> ## PRIVATE It will return `Nothing` if the type does not require custom logic. make_custom_cast column target_value_type type_mapping = - if target_value_type.is_text then - column_type = type_mapping.sql_type_to_value_type column.sql_type_reference.get - if column_type == Value_Type.Boolean then - SQL_Expression.Operation "IIF" [column.expression, SQL_Expression.Literal "'true'", SQL_Expression.Literal "'false'"] + result = Ref.new Nothing + column_type = + type_mapping.sql_type_to_value_type column.sql_type_reference.get + if target_value_type.is_text && (column_type == Value_Type.Boolean) then + expr = SQL_Expression.Operation "IIF" [column.expression, SQL_Expression.Literal "'true'", SQL_Expression.Literal "'false'"] + result.put expr + + if (target_value_type == Value_Type.Boolean) && column_type.is_text then + lower = SQL_Expression.Operation "FOLD_CASE" [column.expression] + is_true = SQL_Expression.Operation "==" [lower, SQL_Expression.Literal "'true'"] + is_false = SQL_Expression.Operation "==" [lower, SQL_Expression.Literal "'false'"] + expr = SQL_Expression.Operation "CASE" [is_true, SQL_Expression.Literal "TRUE", is_false, SQL_Expression.Literal "FALSE", SQL_Expression.Literal "NULL"] + result.put expr + + result.get diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso index 81a1990e23..c56fc50f28 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso @@ -14,6 +14,7 @@ import project.Internal.IR.Internal_Column.Internal_Column import project.Internal.IR.SQL_Expression.SQL_Expression import project.Internal.SQL_Type_Mapping import project.Internal.SQL_Type_Reference.SQL_Type_Reference +from project.Errors import Unsupported_Database_Operation polyglot java import java.sql.Types @@ -59,9 +60,9 @@ type SQLite_Type_Mapping Value_Type.Float _ -> SQLite_Types.real Value_Type.Decimal _ _ -> SQLite_Types.numeric Value_Type.Char _ _ -> SQLite_Types.text - Value_Type.Time -> SQLite_Types.text - Value_Type.Date -> SQLite_Types.text - Value_Type.Date_Time _ -> SQLite_Types.text + Value_Type.Time -> unsupported_date_time + Value_Type.Date -> unsupported_date_time + Value_Type.Date_Time _ -> unsupported_date_time Value_Type.Binary _ _ -> SQLite_Types.blob Value_Type.Mixed -> SQLite_Types.text Value_Type.Unsupported_Data_Type type_name underlying_type -> @@ -158,6 +159,17 @@ operations_map = Panic.throw (Illegal_State.Error "Impossible: IIF must have 3 arguments. This is a bug in the Database library.") find_a_common_type (arguments.drop 1) + handle_case arguments = + fallback = arguments.last + cases = arguments.drop (Last 1) + if cases.length % 2 != 0 then + Panic.throw (Illegal_State.Error "Impossible: constructed a CASE with an odd number of case arguments.") + if cases.is_empty then + Panic.throw (Illegal_State.Error "Impossible: too few cases provided for a CASE statement.") + case_results = cases.take (Index_Sub_Range.Every 2 first=1) + possible_results = case_results + [fallback] + find_a_common_type possible_results + handle_cast _ = Panic.throw (Illegal_State.Error "Cast relies on its own type inference logic, so this code should never be reached. This is a bug in the Database library.") @@ -167,7 +179,7 @@ operations_map = always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS"] arithmetic_ops = ["ADD_NUMBER", "-", "*", "^", "%", "SUM"] merge_input_types_ops = ["ROW_MAX", "ROW_MIN", "MAX", "MIN", "FILL_NULL", "COALESCE"] - others = [["IIF", handle_iif], ["CAST", handle_cast]] + others = [["IIF", handle_iif], ["CAST", handle_cast], ["CASE", handle_case]] Map.from_vector <| v1 = always_boolean_ops.map [_, const SQLite_Types.boolean] v2 = always_floating_ops.map [_, const SQLite_Types.real] @@ -206,3 +218,7 @@ default_float = Value_Type.Float Bits.Bits_64 ## PRIVATE default_integer = Value_Type.Integer Bits.Bits_64 + +## PRIVATE +unsupported_date_time = + Error.throw (Unsupported_Database_Operation.Error "Date/time types are not supported by the SQLite backend.") diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index 2f949c918d..167fed8054 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -1094,16 +1094,28 @@ type Column new_vector = self.to_vector.map (Filter_Condition.Is_In true_vector).to_predicate Column.from_vector result_name new_vector - ## Parses a text column into values + ## Parses a text column into values. + + In the Database backends, the default formatting settings of the + particular database are used. + + In the in-memory backend, the default parser options only parse values + where the process is reversible (e.g., 0123 would not be converted to an + integer as there is a leading 0). However, settings in the + `Data_Formatter` can control this. Arguments: - type: The type to parse the column to. Defaults to `Auto` meaning that - the type will be inferred from the data. + the type will be inferred from the data. In the Database backends, + `Auto` is not supported, so a specific type must be selected. - format: The formatting settings to use when parsing the column. For `Date`, `Time_Of_Day` and `Date_Time`, a Java date time style can be used. For `Boolean`, it should be two values that represent true and false, separated by a `|`. Alternatively, a `Data_Formatter` can be - passed to provide complete customisation of the formatting. + passed to provide complete customisation of the formatting. If + `Nothing` is provided, the default formatting settings of the backend + will be used. `Nothing` is currently the only setting accepted by the + Database backends. - on_problems: Specifies how to handle if a problem occurs, raising as a warning by default. @@ -1159,14 +1171,15 @@ type Column example_contains = Examples.text_column_1.parse Boolean 'Yes|No' @type Widget_Helpers.parse_type_selector - parse : Value_Type | Auto -> Text | Data_Formatter -> Problem_Behavior -> Column + parse : Value_Type | Auto -> Text | Data_Formatter | Nothing -> Problem_Behavior -> Column parse self type=Auto format=Data_Formatter.Value on_problems=Report_Warning = Value_Type.expect_text self <| formatter = case format of _ : Text -> Data_Formatter.Value.with_format type format _ : Data_Formatter -> format - _ -> Error.throw (Illegal_Argument.Error "Invalid format type. Expected Text or Data_Formatter.") + Nothing -> Data_Formatter.Value + _ -> Error.throw (Illegal_Argument.Error "Invalid format type. Expected Text or Data_Formatter or Nothing.") parser = formatter.make_value_type_parser type storage = self.java_column.getStorage diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 02a074800c..3ef838b49a 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -758,21 +758,28 @@ type Table By default, it looks at all `Text` columns and attempts to deduce the type (columns with other types are not affected). - The default parser options only parse values where the process is - reversible (e.g., 0123 would not be converted to an integer as there is - a leading 0). However, settings in the `Data_Formatter` can - control this. + In the Database backends, the default formatting settings of the + particular database are used. + + In the in-memory backend, the default parser options only parse values + where the process is reversible (e.g., 0123 would not be converted to an + integer as there is a leading 0). However, settings in the + `Data_Formatter` can control this. Arguments: - columns: The columns to parse. If not specified, all text columns will be parsed. - type: The type to parse the columns to. Defaults to `Auto` meaning that - the type will be inferred from the data. + the type will be inferred from the data. In the Database backends, + `Auto` is not supported, so a specific type must be selected. - format: The formatting settings to use when parsing the columns. For `Date`, `Time_Of_Day` and `Date_Time`, a Java date time style can be used. For `Boolean`, it should be two values that represent true and false, separated by a `|`. Alternatively, a `Data_Formatter` can be - passed to provide complete customisation of the formatting. + passed to provide complete customisation of the formatting. If + `Nothing` is provided, the default formatting settings of the backend + will be used. `Nothing` is currently the only setting accepted by the + Database backends. - error_on_missing_columns: if `True` (the default) raises an error if any column is missing. Otherwise, reported as a problem. - on_problems: Specifies how to handle if a problem occurs, raising as a @@ -820,17 +827,17 @@ type Table > Example Parse the first and last columns containing Yes/No values as booleans. - table.parse columns=[0, -1] type=Boolean format="Yes|No" + table.parse columns=[0, -1] type=Value_Type.Boolean format="Yes|No" > Example Parse dates in a column in the format `yyyy-MM-dd` (the default format). - table.parse "birthday" Date + table.parse "birthday" Value_Type.Date > Example Parse dates in a column in the format `dd/MM/yyyy`. - table.parse "birthday" Date 'dd/MM/yyyy' + table.parse "birthday" Value_Type.Date 'dd/MM/yyyy' > Example Parse all columns inferring their types, using `,` as the decimal point for numbers. @@ -838,13 +845,14 @@ type Table table.parse format=(Data_Formatter.Value.with_number_formatting decimal_point=',') @type Widget_Helpers.parse_type_selector @columns Widget_Helpers.make_column_name_vector_selector - parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> Value_Type | Auto -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table + parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning = formatter = case format of _ : Text -> Data_Formatter.Value.with_format type format _ : Data_Formatter -> format - _ -> Error.throw (Illegal_Argument.Error "Invalid format type. Expected Text or Data_Formatter.") + Nothing -> Data_Formatter.Value + _ -> Error.throw (Illegal_Argument.Error "Invalid format type. Expected Text or Data_Formatter or Nothing.") parser = formatter.make_value_type_parser type @@ -919,9 +927,12 @@ type Table cast : (Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector)) -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure cast self columns=[0] value_type error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning = selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems - selected.fold self table-> column_to_cast-> + result = selected.fold self table-> column_to_cast-> new_column = column_to_cast.cast value_type on_problems table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update + ## The temporary variable for result is added due to the #6765 bug. + It should be removed once it is fixed. + result ## Splits a column of text into a set of new columns. The original column will be removed from the table. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso index cd53922435..15a38421fc 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso @@ -72,9 +72,11 @@ make_order_by_selector table display=Display.Always = ## PRIVATE Selector for type argument on `Column.parse`. parse_type_selector : Single_Choice -parse_type_selector = +parse_type_selector include_auto=True = valid_parse_targets = Parse_Values_Helper.valid_parse_targets - choice = ['Auto'] + (valid_parse_targets.map t-> 'Value_Type.'+t) - names = ['Auto'] + valid_parse_targets + + prefix = if include_auto then ['Auto'] else [] + choice = prefix + (valid_parse_targets.map t-> 'Value_Type.'+t) + names = prefix + valid_parse_targets options = names.zip choice . map pair-> Option pair.first pair.second Single_Choice display=Display.Always values=options diff --git a/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso b/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso index c9cf3ab805..6d45e96f4b 100644 --- a/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso +++ b/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso @@ -87,11 +87,11 @@ assume_no_problems result = - result: The value to check. expect_warning : Any -> Any -> Nothing expect_warning expected_warning result = + loc = Meta.get_source_location 1 warnings = get_attached_warnings result found = warnings.find if_missing=Nothing x-> (x == expected_warning) || (x.is_a expected_warning) found.if_nothing <| - loc = Meta.get_source_location 2 Test.fail "Expected the result to contain a warning: "+expected_warning.to_text+", but it did not. The warnings were "+warnings.short_display_text+' (at '+loc+').' ## UNSTABLE diff --git a/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso index 431017fdca..a3421d2725 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso @@ -4,7 +4,7 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument from Standard.Table import Value_Type import Standard.Table.Data.Type.Value_Type.Bits -from Standard.Table.Errors import Conversion_Failure +from Standard.Table.Errors import Missing_Input_Columns, Conversion_Failure from Standard.Database.Errors import Unsupported_Database_Operation from Standard.Test import Test, Problems @@ -201,6 +201,23 @@ spec setup = r1.should_fail_with Illegal_Argument r1.to_display_text . should_contain "`parse` should be used instead" + Test.specify "should report missing columns" <| + t = table_builder [["X", [1, 2, 3]], ["Y", [4, 5, 6]]] + + r1 = t.cast ["X", "Z"] Value_Type.Char + r1.should_fail_with Missing_Input_Columns + r1.catch.criteria . should_equal ["Z"] + + t2 = t.cast ["X", "Z"] Value_Type.Char error_on_missing_columns=False + warn = Problems.expect_warning Missing_Input_Columns t2 + warn.criteria . should_equal ["Z"] + t2.at "X" . to_vector . should_equal ["1", "2", "3"] + t2.at "Y" . to_vector . should_equal [4, 5, 6] + + t3 = t.cast ["Z"] Value_Type.Char error_on_missing_columns=False + t3.at "X" . to_vector . should_equal [1, 2, 3] + t3.at "Y" . to_vector . should_equal [4, 5, 6] + Test.specify "should work if the first row is NULL" <| t = table_builder [["X", [Nothing, 1, 2, 3000]], ["Y", [Nothing, True, False, True]]] @@ -267,7 +284,7 @@ spec setup = t3.at "Y" . value_type . should_equal (Value_Type.Char size=3 variable_length=False) t3.at "Y" . to_vector . should_equal ["a ", "abc", "abc"] - Test.group prefix+"Simple variant of Table/Column.parse in all backends" pending=(if setup.is_database then "parse is not yet implemented in DB") <| + Test.group prefix+"Simple variant of Table/Column.parse in all backends" <| Test.specify "should be able to parse simple integers" <| t = table_builder [["X", ["42", "0", "-1"]]] @@ -321,3 +338,19 @@ spec setup = c2 = t.parse ["X"] Value_Type.Boolean . at "X" c2.value_type.should_equal Value_Type.Boolean c2.to_vector . should_equal [True, False, True] + + Test.specify "should report missing columns" <| + t = table_builder [["X", ["42", "0", "-1"]]] + + t1 = t.parse ["X", "Y"] Value_Type.Integer error_on_missing_columns=False + t1.at "X" . to_vector . should_equal [42, 0, -1] + t1.at "X" . value_type . is_integer . should_be_true + Problems.expect_warning Missing_Input_Columns t1 + + t2 = t.parse ["Y"] Value_Type.Integer error_on_missing_columns=False + Problems.expect_warning Missing_Input_Columns t2 + t2.at "X" . to_vector . should_equal ["42", "0", "-1"] + + r3 = t.parse ["X", "Y"] Value_Type.Integer + r3.should_fail_with Missing_Input_Columns + r3.catch.criteria . should_equal ["Y"] diff --git a/test/Table_Tests/src/Common_Table_Operations/Date_Time_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Date_Time_Spec.enso index cae4242ae4..a3adb785b1 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Date_Time_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Date_Time_Spec.enso @@ -4,6 +4,8 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument from Standard.Table import Value_Type from Standard.Table.Errors import Inexact_Type_Coercion, Invalid_Value_Type +from Standard.Database.Errors import Unsupported_Database_Operation + from Standard.Test import Test, Problems import Standard.Test.Extensions @@ -136,4 +138,4 @@ spec setup = Test.specify "will warn when uploading a Table containing Dates" <| d = Date.new 2020 10 24 table = table_builder [["A", [d]], ["X", [123]]] - Problems.expect_warning Inexact_Type_Coercion table + table.should_fail_with Unsupported_Database_Operation diff --git a/test/Table_Tests/src/Database/Types/SQLite_Type_Mapping_Spec.enso b/test/Table_Tests/src/Database/Types/SQLite_Type_Mapping_Spec.enso index daaf503fc4..c727adc513 100644 --- a/test/Table_Tests/src/Database/Types/SQLite_Type_Mapping_Spec.enso +++ b/test/Table_Tests/src/Database/Types/SQLite_Type_Mapping_Spec.enso @@ -8,6 +8,7 @@ import Standard.Database.Data.Dialect import Standard.Database.Extensions.Upload_Table import Standard.Database.Internal.SQLite.SQLite_Type_Mapping from Standard.Database import Database, SQLite, In_Memory, SQL_Query +from Standard.Database.Errors import Unsupported_Database_Operation from Standard.Test import Problems, Test, Test_Suite import Standard.Test.Extensions @@ -93,18 +94,17 @@ spec = t2.at "b" . value_type . should_equal Value_Type.Integer Problems.expect_warning Inexact_Type_Coercion t2 - Test.specify "will coerce date/time columns to Text" <| + Test.specify "will coerce date/time columns to Text in existing tables" <| t = make_table "datetime-table" [["a", "DATE"], ["b", "TIME"], ["c", "DATETIME"], ["d", "TIMESTAMP"]] t.at "a" . value_type . should_equal Value_Type.Char t.at "b" . value_type . should_equal Value_Type.Char t.at "c" . value_type . should_equal Value_Type.Char t.at "d" . value_type . should_equal Value_Type.Char - t1 = Table.new [["a", [Date.now]], ["b", [Time_Of_Day.now]], ["c", [Date_Time.now]]] - t2 = t1.create_database_table connection temporary=True - t2.at "a" . value_type . should_equal Value_Type.Char - t2.at "b" . value_type . should_equal Value_Type.Char - t2.at "c" . value_type . should_equal Value_Type.Char + Test.specify "does not support creating tables with date/time values" <| + t = Table.new [["a", [Date.now]], ["b", [Time_Of_Day.now]], ["c", [Date_Time.now]]] + r1 = t.create_database_table connection temporary=True + r1.should_fail_with Unsupported_Database_Operation Test.specify "should be able to infer types for all supported operations" <| dialect = Dialect.sqlite