mirror of
https://github.com/enso-org/enso.git
synced 2024-11-22 22:10:15 +03:00
Implement simple variants of parse
for the Database backend (#6731)
Implements the simplest `parse` scenarios for the Database backend. Before #6711 these could have been done by `cast`, but in #6711 the APIs were unified to only allow casting to the same set of types in both in-memory and Database. Converting Text to other types is supposed to be done by `parse` and not `cast`, so the ability to use `cast` for rudimentary parsing is removed in the Database backend to make it consistent with in-memory. But now it is lacking any, even simplest, Text->Int/Text->Date support. To alleviate that, the simple scenarios for `parse` are implemented (no support for format customization yet, will boil down to a cast under the hood).
This commit is contained in:
parent
658395e011
commit
a9a464af37
@ -990,12 +990,55 @@ type Column
|
||||
Error.throw (Illegal_State.Error "The dialect "+self.connection.dialect.name+" does not support a boolean type. The implementation of `is_in` should be revised to account for this. This is an internal issue with the Database library.")
|
||||
Column.Value new_name self.connection new_type_ref new_expr self.context
|
||||
|
||||
## Parsing values is not supported in database columns.
|
||||
@type Widget_Helpers.parse_type_selector
|
||||
parse : Value_Type | Auto -> Text | Data_Formatter -> Problem_Behavior -> Column
|
||||
parse self type=Auto format=Data_Formatter.Value on_problems=Report_Warning =
|
||||
_ = [type, format, on_problems]
|
||||
Error.throw <| Unsupported_Database_Operation.Error "`Column.parse` is not implemented yet for the Database backends."
|
||||
## Parses a text column into values.
|
||||
|
||||
In the Database backends, the default formatting settings of the
|
||||
particular database are used.
|
||||
|
||||
In the in-memory backend, the default parser options only parse values
|
||||
where the process is reversible (e.g., 0123 would not be converted to an
|
||||
integer as there is a leading 0). However, settings in the
|
||||
`Data_Formatter` can control this.
|
||||
|
||||
Arguments:
|
||||
- type: The type to parse the column to. Defaults to `Auto` meaning that
|
||||
the type will be inferred from the data. In the Database backends,
|
||||
`Auto` is not supported, so a specific type must be selected.
|
||||
- format: The formatting settings to use when parsing the column.
|
||||
For `Date`, `Time_Of_Day` and `Date_Time`, a Java date time style
|
||||
can be used. For `Boolean`, it should be two values that represent true
|
||||
and false, separated by a `|`. Alternatively, a `Data_Formatter` can be
|
||||
passed to provide complete customisation of the formatting. If
|
||||
`Nothing` is provided, the default formatting settings of the backend
|
||||
will be used. `Nothing` is currently the only setting accepted by the
|
||||
Database backends.
|
||||
- on_problems: Specifies how to handle if a problem occurs, raising as a
|
||||
warning by default.
|
||||
|
||||
! Error Conditions
|
||||
|
||||
- If the column is not a text column, an `Invalid_Value_Type` error is
|
||||
raised.
|
||||
- If some values in the column did not match the expected datatype
|
||||
format, an `Invalid_Format` problem is reported. The problematic
|
||||
cells are replaced with `Nothing`.
|
||||
- If custom formatting settings were provided, but the database backend
|
||||
does not support customization, an `Unsupported_Database_Operation`
|
||||
error is reported.
|
||||
@type (Widget_Helpers.parse_type_selector include_auto=False)
|
||||
parse : Value_Type | Auto -> Text | Data_Formatter | Nothing -> Problem_Behavior -> Column
|
||||
parse self type format=Nothing on_problems=Report_Warning =
|
||||
check_parameters =
|
||||
if type == Auto then Error.throw (Unsupported_Database_Operation.Error "The `Auto` parse type is not supported by the Database backend. Either pick a specific type or materialize the table to memory using `.read`.") else
|
||||
case format of
|
||||
Nothing -> Nothing
|
||||
_ -> Error.throw (Unsupported_Database_Operation.Error "Custom formatting is not supported by the Database backend. Please set the format to `Nothing` to indicate that the default Database settings can be used, or if custom formatting is needed, materialize the table to memory using `.read` first.")
|
||||
check_parameters.if_not_error <|
|
||||
Value_Type.expect_text self <|
|
||||
## In the future we may have some specific logic, for example
|
||||
allowing to support formatting settings. For now, the
|
||||
Database parse just boils down to a simple CAST.
|
||||
self.internal_do_cast type on_problems
|
||||
|
||||
## Formatting values is not supported in database columns.
|
||||
format : Text | Column -> Locale -> Column ! Illegal_Argument
|
||||
@ -1044,14 +1087,20 @@ type Column
|
||||
cast : Value_Type -> Problem_Behavior -> Column ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure
|
||||
cast self value_type on_problems=Problem_Behavior.Report_Warning =
|
||||
check_cast_compatibility self.value_type value_type <|
|
||||
dialect = self.connection.dialect
|
||||
type_mapping = dialect.get_type_mapping
|
||||
target_sql_type = type_mapping.value_type_to_sql value_type on_problems
|
||||
target_sql_type.if_not_error <|
|
||||
infer_from_database new_expression =
|
||||
SQL_Type_Reference.new self.connection self.context new_expression
|
||||
new_column = dialect.make_cast self.as_internal target_sql_type infer_from_database
|
||||
Column.Value new_column.name self.connection new_column.sql_type_reference new_column.expression self.context
|
||||
self.internal_do_cast value_type on_problems
|
||||
|
||||
## PRIVATE
|
||||
Shares the core CAST logic between `cast` and `parse`.
|
||||
internal_do_cast : Value_Type -> Problem_Behavior -> Column
|
||||
internal_do_cast self value_type on_problems =
|
||||
dialect = self.connection.dialect
|
||||
type_mapping = dialect.get_type_mapping
|
||||
target_sql_type = type_mapping.value_type_to_sql value_type on_problems
|
||||
target_sql_type.if_not_error <|
|
||||
infer_from_database new_expression =
|
||||
SQL_Type_Reference.new self.connection self.context new_expression
|
||||
new_column = dialect.make_cast self.as_internal target_sql_type infer_from_database
|
||||
Column.Value new_column.name self.connection new_column.sql_type_reference new_column.expression self.context
|
||||
|
||||
## ALIAS Transform Column
|
||||
|
||||
|
@ -1405,17 +1405,68 @@ type Table
|
||||
msg = "Cross tab of database tables is not supported, the table has to be materialized first with `read`."
|
||||
Error.throw (Unsupported_Database_Operation.Error msg)
|
||||
|
||||
## Parsing values is not supported in database tables, the table has to be
|
||||
loaded into memory first with `read`.
|
||||
@type Widget_Helpers.parse_type_selector
|
||||
## Parses columns within a Table to a specific value type.
|
||||
By default, it looks at all `Text` columns and attempts to deduce the
|
||||
type (columns with other types are not affected).
|
||||
|
||||
In the Database backends, the default formatting settings of the
|
||||
particular database are used.
|
||||
|
||||
In the in-memory backend, the default parser options only parse values
|
||||
where the process is reversible (e.g., 0123 would not be converted to an
|
||||
integer as there is a leading 0). However, settings in the
|
||||
`Data_Formatter` can control this.
|
||||
|
||||
Arguments:
|
||||
- columns: The columns to parse. If not specified, all text columns
|
||||
will be parsed.
|
||||
- type: The type to parse the columns to. Defaults to `Auto` meaning that
|
||||
the type will be inferred from the data. In the Database backends,
|
||||
`Auto` is not supported, so a specific type must be selected.
|
||||
- format: The formatting settings to use when parsing the columns.
|
||||
For `Date`, `Time_Of_Day` and `Date_Time`, a Java date time style
|
||||
can be used. For `Boolean`, it should be two values that represent true
|
||||
and false, separated by a `|`. Alternatively, a `Data_Formatter` can be
|
||||
passed to provide complete customisation of the formatting. If
|
||||
`Nothing` is provided, the default formatting settings of the backend
|
||||
will be used. `Nothing` is currently the only setting accepted by the
|
||||
Database backends.
|
||||
- error_on_missing_columns: if `True` (the default) raises an error if
|
||||
any column is missing. Otherwise, reported as a problem.
|
||||
- on_problems: Specifies how to handle if a problem occurs, raising as a
|
||||
warning by default.
|
||||
|
||||
! Error Conditions
|
||||
|
||||
- If a column in `columns` is not in the input table, a
|
||||
`Missing_Input_Columns` is raised as an error or problem
|
||||
following the `error_on_missing_columns` rules.
|
||||
- If a column index is out of range, a `Column_Indexes_Out_Of_Range` is
|
||||
raised as an error or problem following the
|
||||
`error_on_missing_columns` rules.
|
||||
- If a column selected for parsing is not a text column, an
|
||||
`Invalid_Value_Type` error is raised.
|
||||
- If no columns have been selected for parsing,
|
||||
a `No_Input_Columns_Selected` error is raised.
|
||||
- If custom formatting settings were provided, but the database backend
|
||||
does not support customization, an `Unsupported_Database_Operation`
|
||||
error is reported.
|
||||
|
||||
> Example
|
||||
Parse dates in a column.
|
||||
|
||||
table.parse "birthday" Value_Type.Date
|
||||
@type (Widget_Helpers.parse_type_selector include_auto=False)
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> Value_Type | Auto -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table
|
||||
parse columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning =
|
||||
## Avoid unused arguments warning. We cannot rename arguments to `_`,
|
||||
because we need to keep the API consistent with the in-memory table.
|
||||
_ = [columns, type, format, error_on_missing_columns, on_problems]
|
||||
msg = "Parsing values is not supported in database tables, the table has to be materialized first with `read`."
|
||||
Error.throw (Unsupported_Database_Operation.Error msg)
|
||||
parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table
|
||||
parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type format=Nothing error_on_missing_columns=True on_problems=Report_Warning =
|
||||
selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
result = selected.fold self table-> column_to_parse->
|
||||
new_column = column_to_parse.parse type format on_problems
|
||||
table.set new_column new_name=column_to_parse.name set_mode=Set_Mode.Update
|
||||
## The temporary variable for result is added due to the #6765 bug.
|
||||
It should be removed once it is fixed.
|
||||
result
|
||||
|
||||
## Splits a column of text into a set of new columns.
|
||||
The original column will be removed from the table.
|
||||
@ -1572,9 +1623,12 @@ type Table
|
||||
cast : (Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector)) -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure
|
||||
cast self columns=[0] value_type error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning =
|
||||
selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
selected.fold self table-> column_to_cast->
|
||||
result = selected.fold self table-> column_to_cast->
|
||||
new_column = column_to_cast.cast value_type on_problems
|
||||
table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update
|
||||
## The temporary variable for result is added due to the #6765 bug.
|
||||
It should be removed once it is fixed.
|
||||
result
|
||||
|
||||
## ALIAS dropna
|
||||
ALIAS drop_missing_rows
|
||||
|
@ -175,7 +175,7 @@ base_dialect =
|
||||
fun = name -> [name, make_function name]
|
||||
|
||||
arith = [["ADD_NUMBER", make_binary_op "+"], ["ADD_TEXT", make_binary_op "||"], bin "-", bin "*", bin "/", bin "%", ["mod", make_function "MOD"], ["^", make_function "POWER"]]
|
||||
logic = [bin "AND", bin "OR", unary "NOT", ["IIF", make_iif]]
|
||||
logic = [bin "AND", bin "OR", unary "NOT", ["IIF", make_iif], ["CASE", case_when]]
|
||||
eq = lift_binary_op "==" make_equals
|
||||
neq = lift_binary_op "!=" make_not_equals
|
||||
compare = [eq, neq, bin "<", bin ">", bin "<=", bin ">=", ["BETWEEN", make_between]]
|
||||
@ -206,6 +206,22 @@ make_iif arguments = case arguments.length of
|
||||
_ ->
|
||||
Error.throw <| Illegal_State.Error ("Invalid amount of arguments for operation IIF")
|
||||
|
||||
## PRIVATE
|
||||
For Internal use.
|
||||
An IR expression for constructing SQL `CASE` expressions.
|
||||
case_when : Vector Builder -> Builder
|
||||
case_when arguments =
|
||||
if arguments.length < 4 then Error.throw (Illegal_State.Error "CASE_WHEN needs at least 3 arguments.") else
|
||||
fallback = arguments.last
|
||||
cases = arguments.drop (Last 1)
|
||||
if cases.length % 2 != 0 then Error.throw (Illegal_State.Error "CASE_WHEN expects an odd number of arguments (two arguments for each case and a fallback).") else
|
||||
n = cases.length . div 2
|
||||
cases_exprs = 0.up_to n . map i->
|
||||
condition = cases.at 2*i
|
||||
result = cases.at (2*i)+1
|
||||
Builder.code "WHEN " ++ condition ++ " THEN " ++ result
|
||||
Builder.code "CASE " ++ Builder.join " " cases_exprs ++ " ELSE " ++ fallback ++ " END"
|
||||
|
||||
## PRIVATE
|
||||
simple_cast = Base_Generator.lift_binary_op "CAST" a-> b->
|
||||
Builder.code "CAST(" ++ a ++ " AS " ++ b ++ ")"
|
||||
|
@ -1,6 +1,7 @@
|
||||
from Standard.Base import all hiding First, Last
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||
import Standard.Base.Runtime.Ref.Ref
|
||||
|
||||
import Standard.Table.Data.Aggregate_Column.Aggregate_Column
|
||||
import Standard.Table.Internal.Naming_Helpers.Naming_Helpers
|
||||
@ -367,7 +368,18 @@ mod_op = Base_Generator.lift_binary_op "mod" x-> y->
|
||||
## PRIVATE
|
||||
It will return `Nothing` if the type does not require custom logic.
|
||||
make_custom_cast column target_value_type type_mapping =
|
||||
if target_value_type.is_text then
|
||||
column_type = type_mapping.sql_type_to_value_type column.sql_type_reference.get
|
||||
if column_type == Value_Type.Boolean then
|
||||
SQL_Expression.Operation "IIF" [column.expression, SQL_Expression.Literal "'true'", SQL_Expression.Literal "'false'"]
|
||||
result = Ref.new Nothing
|
||||
column_type =
|
||||
type_mapping.sql_type_to_value_type column.sql_type_reference.get
|
||||
if target_value_type.is_text && (column_type == Value_Type.Boolean) then
|
||||
expr = SQL_Expression.Operation "IIF" [column.expression, SQL_Expression.Literal "'true'", SQL_Expression.Literal "'false'"]
|
||||
result.put expr
|
||||
|
||||
if (target_value_type == Value_Type.Boolean) && column_type.is_text then
|
||||
lower = SQL_Expression.Operation "FOLD_CASE" [column.expression]
|
||||
is_true = SQL_Expression.Operation "==" [lower, SQL_Expression.Literal "'true'"]
|
||||
is_false = SQL_Expression.Operation "==" [lower, SQL_Expression.Literal "'false'"]
|
||||
expr = SQL_Expression.Operation "CASE" [is_true, SQL_Expression.Literal "TRUE", is_false, SQL_Expression.Literal "FALSE", SQL_Expression.Literal "NULL"]
|
||||
result.put expr
|
||||
|
||||
result.get
|
||||
|
@ -14,6 +14,7 @@ import project.Internal.IR.Internal_Column.Internal_Column
|
||||
import project.Internal.IR.SQL_Expression.SQL_Expression
|
||||
import project.Internal.SQL_Type_Mapping
|
||||
import project.Internal.SQL_Type_Reference.SQL_Type_Reference
|
||||
from project.Errors import Unsupported_Database_Operation
|
||||
|
||||
polyglot java import java.sql.Types
|
||||
|
||||
@ -59,9 +60,9 @@ type SQLite_Type_Mapping
|
||||
Value_Type.Float _ -> SQLite_Types.real
|
||||
Value_Type.Decimal _ _ -> SQLite_Types.numeric
|
||||
Value_Type.Char _ _ -> SQLite_Types.text
|
||||
Value_Type.Time -> SQLite_Types.text
|
||||
Value_Type.Date -> SQLite_Types.text
|
||||
Value_Type.Date_Time _ -> SQLite_Types.text
|
||||
Value_Type.Time -> unsupported_date_time
|
||||
Value_Type.Date -> unsupported_date_time
|
||||
Value_Type.Date_Time _ -> unsupported_date_time
|
||||
Value_Type.Binary _ _ -> SQLite_Types.blob
|
||||
Value_Type.Mixed -> SQLite_Types.text
|
||||
Value_Type.Unsupported_Data_Type type_name underlying_type ->
|
||||
@ -158,6 +159,17 @@ operations_map =
|
||||
Panic.throw (Illegal_State.Error "Impossible: IIF must have 3 arguments. This is a bug in the Database library.")
|
||||
find_a_common_type (arguments.drop 1)
|
||||
|
||||
handle_case arguments =
|
||||
fallback = arguments.last
|
||||
cases = arguments.drop (Last 1)
|
||||
if cases.length % 2 != 0 then
|
||||
Panic.throw (Illegal_State.Error "Impossible: constructed a CASE with an odd number of case arguments.")
|
||||
if cases.is_empty then
|
||||
Panic.throw (Illegal_State.Error "Impossible: too few cases provided for a CASE statement.")
|
||||
case_results = cases.take (Index_Sub_Range.Every 2 first=1)
|
||||
possible_results = case_results + [fallback]
|
||||
find_a_common_type possible_results
|
||||
|
||||
handle_cast _ =
|
||||
Panic.throw (Illegal_State.Error "Cast relies on its own type inference logic, so this code should never be reached. This is a bug in the Database library.")
|
||||
|
||||
@ -167,7 +179,7 @@ operations_map =
|
||||
always_integer_ops = ["COUNT", "COUNT_IS_NULL", "COUNT_DISTINCT", "COUNT_DISTINCT_INCLUDE_NULL", "COUNT_EMPTY", "COUNT_NOT_EMPTY", "COUNT_ROWS"]
|
||||
arithmetic_ops = ["ADD_NUMBER", "-", "*", "^", "%", "SUM"]
|
||||
merge_input_types_ops = ["ROW_MAX", "ROW_MIN", "MAX", "MIN", "FILL_NULL", "COALESCE"]
|
||||
others = [["IIF", handle_iif], ["CAST", handle_cast]]
|
||||
others = [["IIF", handle_iif], ["CAST", handle_cast], ["CASE", handle_case]]
|
||||
Map.from_vector <|
|
||||
v1 = always_boolean_ops.map [_, const SQLite_Types.boolean]
|
||||
v2 = always_floating_ops.map [_, const SQLite_Types.real]
|
||||
@ -206,3 +218,7 @@ default_float = Value_Type.Float Bits.Bits_64
|
||||
|
||||
## PRIVATE
|
||||
default_integer = Value_Type.Integer Bits.Bits_64
|
||||
|
||||
## PRIVATE
|
||||
unsupported_date_time =
|
||||
Error.throw (Unsupported_Database_Operation.Error "Date/time types are not supported by the SQLite backend.")
|
||||
|
@ -1094,16 +1094,28 @@ type Column
|
||||
new_vector = self.to_vector.map (Filter_Condition.Is_In true_vector).to_predicate
|
||||
Column.from_vector result_name new_vector
|
||||
|
||||
## Parses a text column into values
|
||||
## Parses a text column into values.
|
||||
|
||||
In the Database backends, the default formatting settings of the
|
||||
particular database are used.
|
||||
|
||||
In the in-memory backend, the default parser options only parse values
|
||||
where the process is reversible (e.g., 0123 would not be converted to an
|
||||
integer as there is a leading 0). However, settings in the
|
||||
`Data_Formatter` can control this.
|
||||
|
||||
Arguments:
|
||||
- type: The type to parse the column to. Defaults to `Auto` meaning that
|
||||
the type will be inferred from the data.
|
||||
the type will be inferred from the data. In the Database backends,
|
||||
`Auto` is not supported, so a specific type must be selected.
|
||||
- format: The formatting settings to use when parsing the column.
|
||||
For `Date`, `Time_Of_Day` and `Date_Time`, a Java date time style
|
||||
can be used. For `Boolean`, it should be two values that represent true
|
||||
and false, separated by a `|`. Alternatively, a `Data_Formatter` can be
|
||||
passed to provide complete customisation of the formatting.
|
||||
passed to provide complete customisation of the formatting. If
|
||||
`Nothing` is provided, the default formatting settings of the backend
|
||||
will be used. `Nothing` is currently the only setting accepted by the
|
||||
Database backends.
|
||||
- on_problems: Specifies how to handle if a problem occurs, raising as a
|
||||
warning by default.
|
||||
|
||||
@ -1159,14 +1171,15 @@ type Column
|
||||
|
||||
example_contains = Examples.text_column_1.parse Boolean 'Yes|No'
|
||||
@type Widget_Helpers.parse_type_selector
|
||||
parse : Value_Type | Auto -> Text | Data_Formatter -> Problem_Behavior -> Column
|
||||
parse : Value_Type | Auto -> Text | Data_Formatter | Nothing -> Problem_Behavior -> Column
|
||||
parse self type=Auto format=Data_Formatter.Value on_problems=Report_Warning =
|
||||
Value_Type.expect_text self <|
|
||||
formatter = case format of
|
||||
_ : Text ->
|
||||
Data_Formatter.Value.with_format type format
|
||||
_ : Data_Formatter -> format
|
||||
_ -> Error.throw (Illegal_Argument.Error "Invalid format type. Expected Text or Data_Formatter.")
|
||||
Nothing -> Data_Formatter.Value
|
||||
_ -> Error.throw (Illegal_Argument.Error "Invalid format type. Expected Text or Data_Formatter or Nothing.")
|
||||
|
||||
parser = formatter.make_value_type_parser type
|
||||
storage = self.java_column.getStorage
|
||||
|
@ -758,21 +758,28 @@ type Table
|
||||
By default, it looks at all `Text` columns and attempts to deduce the
|
||||
type (columns with other types are not affected).
|
||||
|
||||
The default parser options only parse values where the process is
|
||||
reversible (e.g., 0123 would not be converted to an integer as there is
|
||||
a leading 0). However, settings in the `Data_Formatter` can
|
||||
control this.
|
||||
In the Database backends, the default formatting settings of the
|
||||
particular database are used.
|
||||
|
||||
In the in-memory backend, the default parser options only parse values
|
||||
where the process is reversible (e.g., 0123 would not be converted to an
|
||||
integer as there is a leading 0). However, settings in the
|
||||
`Data_Formatter` can control this.
|
||||
|
||||
Arguments:
|
||||
- columns: The columns to parse. If not specified, all text columns
|
||||
will be parsed.
|
||||
- type: The type to parse the columns to. Defaults to `Auto` meaning that
|
||||
the type will be inferred from the data.
|
||||
the type will be inferred from the data. In the Database backends,
|
||||
`Auto` is not supported, so a specific type must be selected.
|
||||
- format: The formatting settings to use when parsing the columns.
|
||||
For `Date`, `Time_Of_Day` and `Date_Time`, a Java date time style
|
||||
can be used. For `Boolean`, it should be two values that represent true
|
||||
and false, separated by a `|`. Alternatively, a `Data_Formatter` can be
|
||||
passed to provide complete customisation of the formatting.
|
||||
passed to provide complete customisation of the formatting. If
|
||||
`Nothing` is provided, the default formatting settings of the backend
|
||||
will be used. `Nothing` is currently the only setting accepted by the
|
||||
Database backends.
|
||||
- error_on_missing_columns: if `True` (the default) raises an error if
|
||||
any column is missing. Otherwise, reported as a problem.
|
||||
- on_problems: Specifies how to handle if a problem occurs, raising as a
|
||||
@ -820,17 +827,17 @@ type Table
|
||||
> Example
|
||||
Parse the first and last columns containing Yes/No values as booleans.
|
||||
|
||||
table.parse columns=[0, -1] type=Boolean format="Yes|No"
|
||||
table.parse columns=[0, -1] type=Value_Type.Boolean format="Yes|No"
|
||||
|
||||
> Example
|
||||
Parse dates in a column in the format `yyyy-MM-dd` (the default format).
|
||||
|
||||
table.parse "birthday" Date
|
||||
table.parse "birthday" Value_Type.Date
|
||||
|
||||
> Example
|
||||
Parse dates in a column in the format `dd/MM/yyyy`.
|
||||
|
||||
table.parse "birthday" Date 'dd/MM/yyyy'
|
||||
table.parse "birthday" Value_Type.Date 'dd/MM/yyyy'
|
||||
|
||||
> Example
|
||||
Parse all columns inferring their types, using `,` as the decimal point for numbers.
|
||||
@ -838,13 +845,14 @@ type Table
|
||||
table.parse format=(Data_Formatter.Value.with_number_formatting decimal_point=',')
|
||||
@type Widget_Helpers.parse_type_selector
|
||||
@columns Widget_Helpers.make_column_name_vector_selector
|
||||
parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> Value_Type | Auto -> Text | Data_Formatter -> Boolean -> Problem_Behavior -> Table
|
||||
parse : Text | Integer | Column_Selector | Vector (Text | Integer | Column_Selector) -> Value_Type | Auto -> Text | Data_Formatter | Nothing -> Boolean -> Problem_Behavior -> Table
|
||||
parse self columns=(self.columns . filter (c-> c.value_type.is_text) . map .name) type=Auto format=Data_Formatter.Value error_on_missing_columns=True on_problems=Report_Warning =
|
||||
formatter = case format of
|
||||
_ : Text ->
|
||||
Data_Formatter.Value.with_format type format
|
||||
_ : Data_Formatter -> format
|
||||
_ -> Error.throw (Illegal_Argument.Error "Invalid format type. Expected Text or Data_Formatter.")
|
||||
Nothing -> Data_Formatter.Value
|
||||
_ -> Error.throw (Illegal_Argument.Error "Invalid format type. Expected Text or Data_Formatter or Nothing.")
|
||||
|
||||
parser = formatter.make_value_type_parser type
|
||||
|
||||
@ -919,9 +927,12 @@ type Table
|
||||
cast : (Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector)) -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure
|
||||
cast self columns=[0] value_type error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning =
|
||||
selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems
|
||||
selected.fold self table-> column_to_cast->
|
||||
result = selected.fold self table-> column_to_cast->
|
||||
new_column = column_to_cast.cast value_type on_problems
|
||||
table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update
|
||||
## The temporary variable for result is added due to the #6765 bug.
|
||||
It should be removed once it is fixed.
|
||||
result
|
||||
|
||||
## Splits a column of text into a set of new columns.
|
||||
The original column will be removed from the table.
|
||||
|
@ -72,9 +72,11 @@ make_order_by_selector table display=Display.Always =
|
||||
## PRIVATE
|
||||
Selector for type argument on `Column.parse`.
|
||||
parse_type_selector : Single_Choice
|
||||
parse_type_selector =
|
||||
parse_type_selector include_auto=True =
|
||||
valid_parse_targets = Parse_Values_Helper.valid_parse_targets
|
||||
choice = ['Auto'] + (valid_parse_targets.map t-> 'Value_Type.'+t)
|
||||
names = ['Auto'] + valid_parse_targets
|
||||
|
||||
prefix = if include_auto then ['Auto'] else []
|
||||
choice = prefix + (valid_parse_targets.map t-> 'Value_Type.'+t)
|
||||
names = prefix + valid_parse_targets
|
||||
options = names.zip choice . map pair-> Option pair.first pair.second
|
||||
Single_Choice display=Display.Always values=options
|
||||
|
@ -87,11 +87,11 @@ assume_no_problems result =
|
||||
- result: The value to check.
|
||||
expect_warning : Any -> Any -> Nothing
|
||||
expect_warning expected_warning result =
|
||||
loc = Meta.get_source_location 1
|
||||
warnings = get_attached_warnings result
|
||||
found = warnings.find if_missing=Nothing x->
|
||||
(x == expected_warning) || (x.is_a expected_warning)
|
||||
found.if_nothing <|
|
||||
loc = Meta.get_source_location 2
|
||||
Test.fail "Expected the result to contain a warning: "+expected_warning.to_text+", but it did not. The warnings were "+warnings.short_display_text+' (at '+loc+').'
|
||||
|
||||
## UNSTABLE
|
||||
|
@ -4,7 +4,7 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
from Standard.Table import Value_Type
|
||||
import Standard.Table.Data.Type.Value_Type.Bits
|
||||
|
||||
from Standard.Table.Errors import Conversion_Failure
|
||||
from Standard.Table.Errors import Missing_Input_Columns, Conversion_Failure
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation
|
||||
|
||||
from Standard.Test import Test, Problems
|
||||
@ -201,6 +201,23 @@ spec setup =
|
||||
r1.should_fail_with Illegal_Argument
|
||||
r1.to_display_text . should_contain "`parse` should be used instead"
|
||||
|
||||
Test.specify "should report missing columns" <|
|
||||
t = table_builder [["X", [1, 2, 3]], ["Y", [4, 5, 6]]]
|
||||
|
||||
r1 = t.cast ["X", "Z"] Value_Type.Char
|
||||
r1.should_fail_with Missing_Input_Columns
|
||||
r1.catch.criteria . should_equal ["Z"]
|
||||
|
||||
t2 = t.cast ["X", "Z"] Value_Type.Char error_on_missing_columns=False
|
||||
warn = Problems.expect_warning Missing_Input_Columns t2
|
||||
warn.criteria . should_equal ["Z"]
|
||||
t2.at "X" . to_vector . should_equal ["1", "2", "3"]
|
||||
t2.at "Y" . to_vector . should_equal [4, 5, 6]
|
||||
|
||||
t3 = t.cast ["Z"] Value_Type.Char error_on_missing_columns=False
|
||||
t3.at "X" . to_vector . should_equal [1, 2, 3]
|
||||
t3.at "Y" . to_vector . should_equal [4, 5, 6]
|
||||
|
||||
Test.specify "should work if the first row is NULL" <|
|
||||
t = table_builder [["X", [Nothing, 1, 2, 3000]], ["Y", [Nothing, True, False, True]]]
|
||||
|
||||
@ -267,7 +284,7 @@ spec setup =
|
||||
t3.at "Y" . value_type . should_equal (Value_Type.Char size=3 variable_length=False)
|
||||
t3.at "Y" . to_vector . should_equal ["a ", "abc", "abc"]
|
||||
|
||||
Test.group prefix+"Simple variant of Table/Column.parse in all backends" pending=(if setup.is_database then "parse is not yet implemented in DB") <|
|
||||
Test.group prefix+"Simple variant of Table/Column.parse in all backends" <|
|
||||
Test.specify "should be able to parse simple integers" <|
|
||||
t = table_builder [["X", ["42", "0", "-1"]]]
|
||||
|
||||
@ -321,3 +338,19 @@ spec setup =
|
||||
c2 = t.parse ["X"] Value_Type.Boolean . at "X"
|
||||
c2.value_type.should_equal Value_Type.Boolean
|
||||
c2.to_vector . should_equal [True, False, True]
|
||||
|
||||
Test.specify "should report missing columns" <|
|
||||
t = table_builder [["X", ["42", "0", "-1"]]]
|
||||
|
||||
t1 = t.parse ["X", "Y"] Value_Type.Integer error_on_missing_columns=False
|
||||
t1.at "X" . to_vector . should_equal [42, 0, -1]
|
||||
t1.at "X" . value_type . is_integer . should_be_true
|
||||
Problems.expect_warning Missing_Input_Columns t1
|
||||
|
||||
t2 = t.parse ["Y"] Value_Type.Integer error_on_missing_columns=False
|
||||
Problems.expect_warning Missing_Input_Columns t2
|
||||
t2.at "X" . to_vector . should_equal ["42", "0", "-1"]
|
||||
|
||||
r3 = t.parse ["X", "Y"] Value_Type.Integer
|
||||
r3.should_fail_with Missing_Input_Columns
|
||||
r3.catch.criteria . should_equal ["Y"]
|
||||
|
@ -4,6 +4,8 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
from Standard.Table import Value_Type
|
||||
from Standard.Table.Errors import Inexact_Type_Coercion, Invalid_Value_Type
|
||||
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation
|
||||
|
||||
from Standard.Test import Test, Problems
|
||||
import Standard.Test.Extensions
|
||||
|
||||
@ -136,4 +138,4 @@ spec setup =
|
||||
Test.specify "will warn when uploading a Table containing Dates" <|
|
||||
d = Date.new 2020 10 24
|
||||
table = table_builder [["A", [d]], ["X", [123]]]
|
||||
Problems.expect_warning Inexact_Type_Coercion table
|
||||
table.should_fail_with Unsupported_Database_Operation
|
||||
|
@ -8,6 +8,7 @@ import Standard.Database.Data.Dialect
|
||||
import Standard.Database.Extensions.Upload_Table
|
||||
import Standard.Database.Internal.SQLite.SQLite_Type_Mapping
|
||||
from Standard.Database import Database, SQLite, In_Memory, SQL_Query
|
||||
from Standard.Database.Errors import Unsupported_Database_Operation
|
||||
|
||||
from Standard.Test import Problems, Test, Test_Suite
|
||||
import Standard.Test.Extensions
|
||||
@ -93,18 +94,17 @@ spec =
|
||||
t2.at "b" . value_type . should_equal Value_Type.Integer
|
||||
Problems.expect_warning Inexact_Type_Coercion t2
|
||||
|
||||
Test.specify "will coerce date/time columns to Text" <|
|
||||
Test.specify "will coerce date/time columns to Text in existing tables" <|
|
||||
t = make_table "datetime-table" [["a", "DATE"], ["b", "TIME"], ["c", "DATETIME"], ["d", "TIMESTAMP"]]
|
||||
t.at "a" . value_type . should_equal Value_Type.Char
|
||||
t.at "b" . value_type . should_equal Value_Type.Char
|
||||
t.at "c" . value_type . should_equal Value_Type.Char
|
||||
t.at "d" . value_type . should_equal Value_Type.Char
|
||||
|
||||
t1 = Table.new [["a", [Date.now]], ["b", [Time_Of_Day.now]], ["c", [Date_Time.now]]]
|
||||
t2 = t1.create_database_table connection temporary=True
|
||||
t2.at "a" . value_type . should_equal Value_Type.Char
|
||||
t2.at "b" . value_type . should_equal Value_Type.Char
|
||||
t2.at "c" . value_type . should_equal Value_Type.Char
|
||||
Test.specify "does not support creating tables with date/time values" <|
|
||||
t = Table.new [["a", [Date.now]], ["b", [Time_Of_Day.now]], ["c", [Date_Time.now]]]
|
||||
r1 = t.create_database_table connection temporary=True
|
||||
r1.should_fail_with Unsupported_Database_Operation
|
||||
|
||||
Test.specify "should be able to infer types for all supported operations" <|
|
||||
dialect = Dialect.sqlite
|
||||
|
Loading…
Reference in New Issue
Block a user