Implement cast for Table and Column (#6711)

Closes #6112
This commit is contained in:
Radosław Waśko 2023-05-19 12:00:20 +02:00 committed by GitHub
parent 08e6d21885
commit 447786a304
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
35 changed files with 1013 additions and 256 deletions

View File

@ -445,6 +445,7 @@
- [Added `at_least_one` flag to `Table.tokenize_to_rows`.][6539]
- [Moved `Redshift` connector into a separate `AWS` library.][6550]
- [Added `Date_Range`.][6621]
- [Implemented the `cast` operation for `Table` and `Column`.][6711]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -655,6 +656,7 @@
[6539]: https://github.com/enso-org/enso/pull/6539
[6550]: https://github.com/enso-org/enso/pull/6550
[6621]: https://github.com/enso-org/enso/pull/6621
[6711]: https://github.com/enso-org/enso/pull/6711
#### Enso Compiler

View File

@ -9,7 +9,8 @@ import Standard.Table.Internal.Java_Problems
import Standard.Table.Internal.Problem_Builder.Problem_Builder
import Standard.Table.Internal.Widget_Helpers
from Standard.Table import Sort_Column, Data_Formatter, Value_Type, Auto
from Standard.Table.Errors import Floating_Point_Equality, Inexact_Type_Coercion, Invalid_Value_Type, Lossy_Conversion
from Standard.Table.Errors import Floating_Point_Equality, Inexact_Type_Coercion, Invalid_Value_Type, Conversion_Failure
from Standard.Table.Internal.Cast_Helpers import check_cast_compatibility
import project.Connection.Connection.Connection
import project.Data.SQL_Statement.SQL_Statement
@ -1002,18 +1003,13 @@ type Column
_ = [format, locale]
Error.throw <| Unsupported_Database_Operation.Error "`Column.format` is not implemented yet for the Database backends."
## PRIVATE
UNSTABLE
Cast the column to a specific type.
## Cast the column to a specific type.
Arguments:
- value_type: The `Value_Type` to cast the column to.
- on_problems: Specifies how to handle problems if they occur, reporting
them as warnings by default.
TODO [RW] this is a prototype needed for debugging, proper implementation
and testing will come with #6112.
In the Database backend, this will boil down to a CAST operation.
In the in-memory backend, a conversion will be performed according to
the following rules:
@ -1024,6 +1020,9 @@ type Column
length.
- Conversion between numeric types will replace values exceeding the
range of the target type with `Nothing`.
- Converting decimal numbers into integers will truncate or round them,
depending on the backend. If more control is needed, use the various
rounding functions (such as `round` or `floor`).
- Booleans may also be converted to numbers, with `True` being converted
to `1` and `False` to `0`. The reverse is not supported - use `iif`
instead.
@ -1032,24 +1031,19 @@ type Column
- If a `Date` is to be converted to `Date_Time`, it will be set at
midnight of the default system timezone.
? Conversion Precision
In the in-memory backend, if the conversion is lossy, a
`Lossy_Conversion` warning will be reported. The only exception is when
truncating a column which is already a text column - as then the
truncation seems like an intended behaviour, so it is not reported. If
truncating needs to occur when converting a non-text column, a warning
will still be reported.
Currently, the warning is not reported for Database backends.
If the target type cannot fit some of the values (for example due to too
small range), a `Conversion_Failure` may be reported according to the
`on_problems` rules. The Database backends may fail with `SQL_Error`
instead.
? Inexact Target Type
If the backend does not support the requested target type, the closest
supported type is chosen and a `Inexact_Type_Coercion` problem is
reported.
cast : Value_Type -> Problem_Behavior -> Column ! Illegal_Argument | Inexact_Type_Coercion | Lossy_Conversion
cast self value_type=self.value_type on_problems=Problem_Behavior.Report_Warning =
cast : Value_Type -> Problem_Behavior -> Column ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure
cast self value_type on_problems=Problem_Behavior.Report_Warning =
check_cast_compatibility self.value_type value_type <|
dialect = self.connection.dialect
type_mapping = dialect.get_type_mapping
target_sql_type = type_mapping.value_type_to_sql value_type on_problems

View File

@ -1526,9 +1526,7 @@ type Table
_ = [column, pattern, case_sensitivity, parse_values, on_problems]
Error.throw (Unsupported_Database_Operation.Error "Table.parse_to_columns is not implemented yet for the Database backends.")
## PRIVATE
UNSTABLE
Cast the selected columns to a specific type.
## Cast the selected columns to a specific type.
Returns a new table in which the selected columns are replaced with
columns having the new types.
@ -1539,9 +1537,6 @@ type Table
- on_problems: Specifies how to handle problems if they occur, reporting
them as warnings by default.
TODO [RW] this is a prototype needed for debugging, proper implementation
and testing will come with #6112.
In the Database backend, this will boil down to a CAST operation.
In the in-memory backend, a conversion will be performed according to
the following rules:
@ -1552,6 +1547,9 @@ type Table
length.
- Conversion between numeric types will replace values exceeding the
range of the target type with `Nothing`.
- Converting decimal numbers into integers will truncate or round them,
depending on the backend. If more control is needed, use the various
rounding functions (such as `round` or `floor`).
- Booleans may also be converted to numbers, with `True` being converted
to `1` and `False` to `0`. The reverse is not supported - use `iif`
instead.
@ -1560,16 +1558,10 @@ type Table
- If a `Date` is to be converted to `Date_Time`, it will be set at
midnight of the default system timezone.
? Conversion Precision
In the in-memory backend, if the conversion is lossy, a
`Lossy_Conversion` warning will be reported. The only exception is when
truncating a column which is already a text column - as then the
truncation seems like an intended behaviour, so it is not reported. If
truncating needs to occur when converting a non-text column, a warning
will still be reported.
Currently, the warning is not reported for Database backends.
If the target type cannot fit some of the values (for example due to too
small range), a `Conversion_Failure` may be reported according to the
`on_problems` rules. The Database backends may fail with `SQL_Error`
instead.
? Inexact Target Type
@ -1577,10 +1569,10 @@ type Table
supported type is chosen and a `Inexact_Type_Coercion` problem is
reported.
@columns Widget_Helpers.make_column_name_vector_selector
cast : (Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector)) -> Value_Type -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Lossy_Conversion
cast self columns=[0] value_type=Value_Type.Char on_problems=Problem_Behavior.Report_Warning =
selected = self.select_columns columns
selected.columns.fold self table-> column_to_cast->
cast : (Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector)) -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure
cast self columns=[0] value_type error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning =
selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems
selected.fold self table-> column_to_cast->
new_column = column_to_cast.cast value_type on_problems
table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update

View File

@ -145,12 +145,19 @@ type SQLite_Dialect
make_cast : Internal_Column -> SQL_Type -> (SQL_Expression -> SQL_Type_Reference) -> Internal_Column
make_cast self column target_type _ =
mapping = self.get_type_mapping
sql_type_text = mapping.sql_type_to_text target_type
new_expression = SQL_Expression.Operation "CAST" [column.expression, SQL_Expression.Literal sql_type_text]
# We override the type here, because SQLite gets it wrong if the column starts with NULL values.
target_value_type = mapping.sql_type_to_value_type target_type
custom_cast = make_custom_cast column target_value_type mapping
new_expression = custom_cast.if_nothing <|
self.make_cast_expression column target_type
new_sql_type_reference = SQL_Type_Reference.from_constant target_type
Internal_Column.Value column.name new_sql_type_reference new_expression
## PRIVATE
make_cast_expression self column target_type =
mapping = self.get_type_mapping
sql_type_text = mapping.sql_type_to_text target_type
SQL_Expression.Operation "CAST" [column.expression, SQL_Expression.Literal sql_type_text]
## PRIVATE
needs_execute_query_for_type_inference : Boolean
needs_execute_query_for_type_inference self = True
@ -164,12 +171,15 @@ type SQLite_Dialect
So after unifying columns with mixed types, we add a cast to ensure that.
adapt_unified_column : Internal_Column -> Value_Type -> (SQL_Expression -> SQL_Type_Reference) -> Internal_Column
adapt_unified_column self column approximate_result_type infer_result_type_from_database_callback =
_ = infer_result_type_from_database_callback
# TODO [RW] This may be revisited with #6281.
case approximate_result_type of
Nothing -> column
_ ->
sql_type = self.get_type_mapping.value_type_to_sql approximate_result_type Problem_Behavior.Ignore
self.make_cast column sql_type infer_result_type_from_database_callback
new_expression = self.make_cast_expression column sql_type
new_sql_type_reference = SQL_Type_Reference.from_constant sql_type
Internal_Column.Value column.name new_sql_type_reference new_expression
## PRIVATE
prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement
@ -353,3 +363,11 @@ decimal_div = Base_Generator.lift_binary_op "/" x-> y->
## PRIVATE
mod_op = Base_Generator.lift_binary_op "mod" x-> y->
x ++ " - FLOOR(CAST(" ++ x ++ " AS REAL) / CAST(" ++ y ++ " AS REAL)) * " ++ y
## PRIVATE
It will return `Nothing` if the type does not require custom logic.
make_custom_cast column target_value_type type_mapping =
if target_value_type.is_text then
column_type = type_mapping.sql_type_to_value_type column.sql_type_reference.get
if column_type == Value_Type.Boolean then
SQL_Expression.Operation "IIF" [column.expression, SQL_Expression.Literal "'true'", SQL_Expression.Literal "'false'"]

View File

@ -13,6 +13,7 @@ import project.Data.Type.Enso_Types
import project.Data.Type.Storage
import project.Data.Type.Value_Type_Helpers
import project.Data.Table.Table
import project.Internal.Cast_Helpers
import project.Internal.Java_Problems
import project.Internal.Naming_Helpers.Naming_Helpers
import project.Internal.Parse_Values_Helper
@ -21,7 +22,7 @@ import project.Data.Type.Value_Type_Helpers
from project.Data.Table import print_table
from project.Data.Type.Value_Type import Value_Type, Auto
from project.Errors import No_Index_Set_Error, Floating_Point_Equality, Invalid_Value_Type, Inexact_Type_Coercion
from project.Errors import No_Index_Set_Error, Floating_Point_Equality, Invalid_Value_Type, Inexact_Type_Coercion, Conversion_Failure
from project.Internal.Java_Exports import make_string_builder
polyglot java import org.enso.table.data.column.operation.map.MapOperationProblemBuilder
@ -1279,6 +1280,54 @@ type Column
_ -> Error.throw <| Illegal_Argument.Error <| "Unsupported format type: " + format.to_text
new_column
## Cast the column to a specific type.
Arguments:
- value_type: The `Value_Type` to cast the column to.
- on_problems: Specifies how to handle problems if they occur, reporting
them as warnings by default.
In the Database backend, this will boil down to a CAST operation.
In the in-memory backend, a conversion will be performed according to
the following rules:
- Anything can be cast into the `Mixed` type.
- Converting to a `Char` type, the elements of the column will be
converted to text. If it is fixed length, the texts will be trimmed or
padded on the right with the space character to match the desired
length.
- Conversion between numeric types will replace values exceeding the
range of the target type with `Nothing`.
- Converting decimal numbers into integers will truncate or round them,
depending on the backend. If more control is needed, use the various
rounding functions (such as `round` or `floor`).
- Booleans may also be converted to numbers, with `True` being converted
to `1` and `False` to `0`. The reverse is not supported - use `iif`
instead.
- A `Date_Time` may be converted into a `Date` or `Time` type - the
resulting value will be truncated to the desired type.
- If a `Date` is to be converted to `Date_Time`, it will be set at
midnight of the default system timezone.
If the target type cannot fit some of the values (for example due to too
small range), a `Conversion_Failure` may be reported according to the
`on_problems` rules. The Database backends may fail with `SQL_Error`
instead.
? Inexact Target Type
If the backend does not support the requested target type, the closest
supported type is chosen and a `Inexact_Type_Coercion` problem is
reported.
cast : Value_Type -> Problem_Behavior -> Column ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure
cast self value_type on_problems=Problem_Behavior.Report_Warning =
Cast_Helpers.check_cast_compatibility self.value_type value_type <|
target_storage_type = Storage.from_value_type value_type on_problems
cast_problem_builder = Cast_Helpers.new_java_problem_builder self.name value_type
new_storage = self.java_column.getStorage.cast target_storage_type cast_problem_builder.to_java
problems = cast_problem_builder.get_problems
on_problems.attach_problems_before problems <|
Column.from_storage self.name new_storage
## ALIAS Transform Column
Applies `function` to each item in this column and returns the column

View File

@ -873,9 +873,7 @@ type Table
parse_problem_builder.attach_problems_before on_problems <|
Table.new new_columns
## PRIVATE
UNSTABLE
Cast the selected columns to a specific type.
## Cast the selected columns to a specific type.
Returns a new table in which the selected columns are replaced with
columns having the new types.
@ -886,9 +884,6 @@ type Table
- on_problems: Specifies how to handle problems if they occur, reporting
them as warnings by default.
TODO [RW] this is a prototype needed for debugging, proper implementation
and testing will come with #6112.
In the Database backend, this will boil down to a CAST operation.
In the in-memory backend, a conversion will be performed according to
the following rules:
@ -899,6 +894,9 @@ type Table
length.
- Conversion between numeric types will replace values exceeding the
range of the target type with `Nothing`.
- Converting decimal numbers into integers will truncate or round them,
depending on the backend. If more control is needed, use the various
rounding functions (such as `round` or `floor`).
- Booleans may also be converted to numbers, with `True` being converted
to `1` and `False` to `0`. The reverse is not supported - use `iif`
instead.
@ -907,16 +905,10 @@ type Table
- If a `Date` is to be converted to `Date_Time`, it will be set at
midnight of the default system timezone.
? Conversion Precision
In the in-memory backend, if the conversion is lossy, a
`Lossy_Conversion` warning will be reported. The only exception is when
truncating a column which is already a text column - as then the
truncation seems like an intended behaviour, so it is not reported. If
truncating needs to occur when converting a non-text column, a warning
will still be reported.
Currently, the warning is not reported for Database backends.
If the target type cannot fit some of the values (for example due to too
small range), a `Conversion_Failure` may be reported according to the
`on_problems` rules. The Database backends may fail with `SQL_Error`
instead.
? Inexact Target Type
@ -924,11 +916,12 @@ type Table
supported type is chosen and a `Inexact_Type_Coercion` problem is
reported.
@columns Widget_Helpers.make_column_name_vector_selector
cast : (Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector)) -> Value_Type -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Lossy_Conversion
cast self columns=[0] value_type=Value_Type.Char on_problems=Problem_Behavior.Report_Warning =
_ = [columns, value_type, on_problems]
## TODO [RW] actual implementation in #6112
self
cast : (Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector)) -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure
cast self columns=[0] value_type error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning =
selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems
selected.fold self table-> column_to_cast->
new_column = column_to_cast.cast value_type on_problems
table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update
## Splits a column of text into a set of new columns.
The original column will be removed from the table.

View File

@ -161,12 +161,13 @@ type Value_Type
_ -> False
## UNSTABLE
Checks if the `Value_Type` represents any numeric type - integer,
Checks if the `Value_Type` represents any numeric type - integer, byte,
floating point or decimal.
is_numeric : Boolean
is_numeric self = case self of
Value_Type.Integer _ -> True
Value_Type.Float _ -> True
Value_Type.Byte -> True
Value_Type.Decimal _ _ -> True
_ -> False

View File

@ -552,11 +552,23 @@ type Inexact_Type_Coercion
to_text self =
"Inexact_Type_Coercion.Warning (requested_type = " + self.requested_type.to_text + ") (actual_type = " + self.actual_type.to_text + ")"
## TODO figure out this error in #6112
type Lossy_Conversion
## Indicates that some likely not-insignificant information was lost during
a conversion.
Error
type Conversion_Failure
## Indicates that some values from the column could not be converted to the
desired type.
This may occur for example when a number does not fit the range of the
target type.
Error (target_type : Value_Type) (related_column : Text) (affected_rows_count : Nothing|Integer)
## PRIVATE
Create a human-readable version of the error.
to_display_text : Text
to_display_text self =
rows_info = case self.affected_rows_count of
Nothing -> "Some values"
count -> count.to_text+" rows"
rows_info + " could not be converted into the target type "+self.target_type.to_display_text+" when converting the column ["+self.related_column+"]."
type Invalid_Value_For_Type
## PRIVATE

View File

@ -0,0 +1,58 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import project.Data.Type.Value_Type.Value_Type
import project.Internal.Parse_Values_Helper
from project.Errors import Conversion_Failure
polyglot java import org.enso.table.data.column.operation.CastProblemBuilder
## PRIVATE
Checks if one type can be cast into another and returns a dataflow error
explaining the situation if not.
check_cast_compatibility source_type target_type ~action =
are_compatible = if (target_type == Value_Type.Mixed) || target_type.is_text || (source_type == target_type) then True else
if source_type.is_text && is_a_valid_parse_target target_type then Error.throw (Illegal_Argument.Error "To parse a text column into "+target_type.to_display_text+" type, `parse` should be used instead of `cast`.") else
if source_type == Value_Type.Boolean then target_type.is_numeric else
if source_type.is_numeric then target_type.is_numeric else
case source_type of
Value_Type.Date_Time _ ->
(target_type == Value_Type.Date) || (target_type == Value_Type.Time)
Value_Type.Date -> target_type.has_date
Value_Type.Binary _ _ -> case target_type of
Value_Type.Binary _ _ -> True
_ -> False
_ -> False
if are_compatible then action else
Error.throw (Illegal_Argument.Error "Cannot cast "+source_type.to_display_text+" type into "+target_type.to_display_text+" type.")
## PRIVATE
Checks if the type is a valid argument for `parse`.
is_a_valid_parse_target target_type =
case Meta.meta target_type of
atom : Meta.Atom ->
Parse_Values_Helper.valid_parse_targets.contains atom.constructor.name
_ -> False
## PRIVATE
type Cast_Problem_Builder
## PRIVATE
Value column_name target_type to_java
## PRIVATE
Returns a vector of all reported problems.
get_problems : Vector
get_problems self =
builder = Vector.new_builder
java_instance = self.to_java
lossy_conversion_rows = java_instance.getLossyConversionRowCount
if lossy_conversion_rows > 0 then
builder.append (Conversion_Failure.Error self.target_type self.column_name lossy_conversion_rows)
builder.to_vector
## PRIVATE
new_java_problem_builder : Text -> Value_Type -> Cast_Problem_Builder
new_java_problem_builder column_name target_type =
Cast_Problem_Builder.Value column_name target_type CastProblemBuilder.new

View File

@ -13,3 +13,7 @@ translate_parsing_problem expected_value_type problem = case problem of
Invalid_Format.Error java_problem.column expected_value_type (Vector.from_polyglot_array java_problem.cells)
_ ->
Panic.throw (Illegal_State.Error "Reported an unknown problem type: "+problem.to_text)
## PRIVATE
valid_parse_targets =
['Integer', 'Float', 'Date', 'Date_Time', 'Time', 'Boolean']

View File

@ -64,6 +64,17 @@ type Table_Column_Helper
problem_builder.attach_problems_before on_problems <|
if result.is_empty then Error.throw No_Output_Columns else result
## PRIVATE
Works like `select_columns` but will not throw `No_Output_Columns` error
and will return proper columns instead of internal columns.
Useful, when selecting a subset of columns to transform.
resolve_columns : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Boolean -> Problem_Behavior -> Boolean -> Vector
resolve_columns self selectors error_on_missing_columns on_problems reorder=False =
problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns
result = self.select_columns_helper selectors reorder problem_builder
problem_builder.attach_problems_before on_problems <|
result.map self.make_column
## PRIVATE
A helper function encapsulating shared code for `remove_columns`
implementations of various Table variants. See the documentation for the

View File

@ -5,6 +5,7 @@ import Standard.Base.Metadata.Display
import project.Data.Table.Table
import project.Data.Aggregate_Column.Aggregate_Column
import project.Internal.Parse_Values_Helper
## PRIVATE
Make an aggregate column selector.
@ -72,8 +73,8 @@ make_order_by_selector table display=Display.Always =
Selector for type argument on `Column.parse`.
parse_type_selector : Single_Choice
parse_type_selector =
choice = ['Auto', 'Value_Type.Integer', 'Value_Type.Float', 'Value_Type.Date', 'Value_Type.Date_Time', 'Value_Type.Time', 'Value_Type.Boolean']
names = ['Auto', 'Integer', 'Float', 'Date', 'Date_Time', 'Time', 'Boolean']
valid_parse_targets = Parse_Values_Helper.valid_parse_targets
choice = ['Auto'] + (valid_parse_targets.map t-> 'Value_Type.'+t)
names = ['Auto'] + valid_parse_targets
options = names.zip choice . map pair-> Option pair.first pair.second
Single_Choice display=Display.Always values=options

View File

@ -90,7 +90,7 @@ expect_warning expected_warning result =
warnings = get_attached_warnings result
found = warnings.find if_missing=Nothing x->
(x == expected_warning) || (x.is_a expected_warning)
if found.is_nothing then
found.if_nothing <|
loc = Meta.get_source_location 2
Test.fail "Expected the result to contain a warning: "+expected_warning.to_text+", but it did not. The warnings were "+warnings.short_display_text+' (at '+loc+').'

View File

@ -14,6 +14,7 @@ import org.enso.interpreter.dsl.Builtin;
import org.enso.interpreter.runtime.EnsoContext;
import org.enso.interpreter.runtime.data.text.Text;
import org.enso.interpreter.runtime.library.dispatch.TypesLibrary;
import org.enso.polyglot.common_utils.Core_Date_Utils;
import java.time.DateTimeException;
import java.time.LocalDate;
@ -121,6 +122,9 @@ public final class EnsoDate implements TruffleObject {
@CompilerDirectives.TruffleBoundary
@ExportMessage
public Object toDisplayString(boolean allowSideEffects) {
return DateTimeFormatter.ISO_LOCAL_DATE.format(date);
return DATE_FORMATTER.format(date);
}
private static final DateTimeFormatter DATE_FORMATTER =
Core_Date_Utils.defaultLocalDateFormatter();
}

View File

@ -17,6 +17,7 @@ import org.enso.interpreter.dsl.Builtin;
import org.enso.interpreter.runtime.EnsoContext;
import org.enso.interpreter.runtime.data.text.Text;
import org.enso.interpreter.runtime.library.dispatch.TypesLibrary;
import org.enso.polyglot.common_utils.Core_Date_Utils;
@ExportLibrary(InteropLibrary.class)
@ExportLibrary(TypesLibrary.class)
@ -122,7 +123,7 @@ public final class EnsoTimeOfDay implements TruffleObject {
@Builtin.Method(description = "Return this datetime to the datetime in the provided time zone.")
@CompilerDirectives.TruffleBoundary
public Text toText() {
return Text.create(DateTimeFormatter.ISO_LOCAL_TIME.format(localTime));
return Text.create(TIME_FORMATTER.format(localTime));
}
@ExportMessage
@ -168,6 +169,9 @@ public final class EnsoTimeOfDay implements TruffleObject {
@CompilerDirectives.TruffleBoundary
@ExportMessage
public Object toDisplayString(boolean allowSideEffects) {
return DateTimeFormatter.ISO_LOCAL_TIME.format(localTime);
return TIME_FORMATTER.format(localTime);
}
private static final DateTimeFormatter TIME_FORMATTER =
Core_Date_Utils.defaultLocalTimeFormatter();
}

View File

@ -33,6 +33,16 @@ public class Core_Date_Utils {
.toFormatter();
}
/** @return default Date formatter for parsing a Date. */
public static DateTimeFormatter defaultLocalDateFormatter() {
return DateTimeFormatter.ISO_LOCAL_DATE;
}
/** @return default Time formatter for parsing a Time_Of_Day. */
public static DateTimeFormatter defaultLocalTimeFormatter() {
return DateTimeFormatter.ISO_LOCAL_TIME;
}
/**
* Parse a date time string into a ZonedDateTime.
*

View File

@ -10,26 +10,26 @@ import org.enso.table.data.column.storage.type.IntegerType;
public abstract class Builder {
public static Builder getForType(StorageType type, int size) {
Builder builder = switch (type) {
case AnyObjectType() -> new ObjectBuilder(size);
case BooleanType() -> new BoolBuilder(size);
case DateType() -> new DateBuilder(size);
case DateTimeType() -> new DateTimeBuilder(size);
case TimeOfDayType() -> new TimeOfDayBuilder(size);
case FloatType(Bits bits) ->
switch (bits) {
case AnyObjectType x -> new ObjectBuilder(size);
case BooleanType x -> new BoolBuilder(size);
case DateType x -> new DateBuilder(size);
case DateTimeType x -> new DateTimeBuilder(size);
case TimeOfDayType x -> new TimeOfDayBuilder(size);
case FloatType floatType ->
switch (floatType.bits()) {
case BITS_64 -> NumericBuilder.createDoubleBuilder(size);
default -> throw new IllegalArgumentException("Only 64-bit floats are currently supported.");
};
case IntegerType(Bits bits) ->
switch (bits) {
case IntegerType integerType ->
switch (integerType.bits()) {
case BITS_64 -> NumericBuilder.createLongBuilder(size);
default -> throw new IllegalArgumentException("TODO: Builders other than 64-bit int are not yet supported.");
};
case TextType(long maxLength, boolean isFixed) -> {
if (isFixed) {
case TextType textType -> {
if (textType.fixedLength()) {
throw new IllegalArgumentException("Fixed-length text builders are not yet supported yet.");
}
if (maxLength >= 0) {
if (textType.maxLength() >= 0) {
throw new IllegalArgumentException("Text builders with a maximum length are not yet supported yet.");
}

View File

@ -0,0 +1,13 @@
package org.enso.table.data.column.operation;
public class CastProblemBuilder {
private int lossyConversionRowCount = 0;
public void reportLossyConversion() {
lossyConversionRowCount++;
}
public int getLossyConversionRowCount() {
return lossyConversionRowCount;
}
}

View File

@ -3,18 +3,18 @@ package org.enso.table.data.column.storage;
import java.util.BitSet;
import java.util.List;
import java.util.function.IntFunction;
import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.object.BoolBuilder;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.builder.object.NumericBuilder;
import org.enso.table.data.column.builder.object.StringBuilder;
import org.enso.table.data.column.operation.CastProblemBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.MapOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemBuilder;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.operation.map.bool.BooleanIsInOp;
import org.enso.table.data.column.storage.type.BooleanType;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.column.storage.type.*;
import org.enso.table.data.index.Index;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.mask.SliceRange;
@ -81,12 +81,14 @@ public final class BoolStorage extends Storage<Boolean> {
}
@Override
protected Storage<?> runVectorizedMap(String name, Object argument, MapOperationProblemBuilder problemBuilder) {
protected Storage<?> runVectorizedMap(
String name, Object argument, MapOperationProblemBuilder problemBuilder) {
return ops.runMap(name, this, argument, problemBuilder);
}
@Override
protected Storage<?> runVectorizedZip(String name, Storage<?> argument, MapOperationProblemBuilder problemBuilder) {
protected Storage<?> runVectorizedZip(
String name, Storage<?> argument, MapOperationProblemBuilder problemBuilder) {
return ops.runZip(name, this, argument, problemBuilder);
}
@ -198,10 +200,10 @@ public final class BoolStorage extends Storage<Boolean> {
private static IntFunction<Object> makeRowProvider(Value value) {
if (value.isHostObject() && value.asHostObject() instanceof Storage<?> s) {
return i->(Object)s.getItemBoxed(i);
return i -> (Object) s.getItemBoxed(i);
}
var converted = Polyglot_Utils.convertPolyglotValue(value);
return i->converted;
return i -> converted;
}
private static MapOpStorage<Boolean, BoolStorage> buildOps() {
@ -217,7 +219,8 @@ public final class BoolStorage extends Storage<Boolean> {
.add(
new MapOperation<>(Maps.EQ) {
@Override
public BoolStorage runMap(BoolStorage storage, Object arg, MapOperationProblemBuilder problemBuilder) {
public BoolStorage runMap(
BoolStorage storage, Object arg, MapOperationProblemBuilder problemBuilder) {
if (arg == null) {
return BoolStorage.makeEmpty(storage.size);
} else if (arg instanceof Boolean v) {
@ -233,7 +236,8 @@ public final class BoolStorage extends Storage<Boolean> {
}
@Override
public BoolStorage runZip(BoolStorage storage, Storage<?> arg, MapOperationProblemBuilder problemBuilder) {
public BoolStorage runZip(
BoolStorage storage, Storage<?> arg, MapOperationProblemBuilder problemBuilder) {
BitSet out = new BitSet();
BitSet missing = new BitSet();
for (int i = 0; i < storage.size; i++) {
@ -251,7 +255,8 @@ public final class BoolStorage extends Storage<Boolean> {
.add(
new MapOperation<>(Maps.AND) {
@Override
public BoolStorage runMap(BoolStorage storage, Object arg, MapOperationProblemBuilder problemBuilder) {
public BoolStorage runMap(
BoolStorage storage, Object arg, MapOperationProblemBuilder problemBuilder) {
if (arg == null) {
return BoolStorage.makeEmpty(storage.size);
} else if (arg instanceof Boolean v) {
@ -266,7 +271,8 @@ public final class BoolStorage extends Storage<Boolean> {
}
@Override
public BoolStorage runZip(BoolStorage storage, Storage<?> arg, MapOperationProblemBuilder problemBuilder) {
public BoolStorage runZip(
BoolStorage storage, Storage<?> arg, MapOperationProblemBuilder problemBuilder) {
if (arg instanceof BoolStorage v) {
BitSet missing = v.isMissing.get(0, storage.size);
missing.or(storage.isMissing);
@ -295,7 +301,8 @@ public final class BoolStorage extends Storage<Boolean> {
.add(
new MapOperation<>(Maps.OR) {
@Override
public BoolStorage runMap(BoolStorage storage, Object arg, MapOperationProblemBuilder problemBuilder) {
public BoolStorage runMap(
BoolStorage storage, Object arg, MapOperationProblemBuilder problemBuilder) {
if (arg == null) {
return BoolStorage.makeEmpty(storage.size);
} else if (arg instanceof Boolean v) {
@ -310,7 +317,8 @@ public final class BoolStorage extends Storage<Boolean> {
}
@Override
public BoolStorage runZip(BoolStorage storage, Storage<?> arg, MapOperationProblemBuilder problemBuilder) {
public BoolStorage runZip(
BoolStorage storage, Storage<?> arg, MapOperationProblemBuilder problemBuilder) {
if (arg instanceof BoolStorage v) {
BitSet missing = v.isMissing.get(0, storage.size);
missing.or(storage.isMissing);
@ -391,4 +399,51 @@ public final class BoolStorage extends Storage<Boolean> {
return new BoolStorage(newValues, newMissing, newSize, negated);
}
@Override
public Storage<?> cast(StorageType targetType, CastProblemBuilder castProblemBuilder) {
return switch (targetType) {
case AnyObjectType any ->
new MixedStorageFacade(this);
case BooleanType booleanType ->
this;
case FloatType floatType -> {
int n = size();
NumericBuilder builder = NumericBuilder.createDoubleBuilder(n);
for (int i = 0; i < n; i++) {
if (isNa(i)) {
builder.appendNulls(1);
} else {
builder.appendDouble(values.get(i) ? 1.0 : 0.0);
}
}
yield builder.seal();
}
case IntegerType integerType -> {
int n = size();
NumericBuilder builder = NumericBuilder.createLongBuilder(n);
for (int i = 0; i < n; i++) {
if (isNa(i)) {
builder.appendNulls(1);
} else {
builder.appendLong(values.get(i) ? 1 : 0);
}
}
yield builder.seal();
}
case TextType textType -> {
int n = size();
StringBuilder builder = new StringBuilder(n);
for (int i = 0; i < n; i++) {
if (isMissing.get(i)) {
builder.appendNulls(1);
} else {
builder.append(values.get(i) ? "True" : "False");
}
}
yield StringStorage.adapt(builder.seal(), textType);
}
default -> throw new IllegalStateException("Conversion of BoolStorage to " + targetType + " is not supported");
};
}
}

View File

@ -1,14 +1,23 @@
package org.enso.table.data.column.storage;
import java.time.LocalDate;
import java.time.LocalTime;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import org.enso.polyglot.common_utils.Core_Date_Utils;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.builder.object.DateBuilder;
import org.enso.table.data.column.builder.object.DateTimeBuilder;
import org.enso.table.data.column.builder.object.StringBuilder;
import org.enso.table.data.column.operation.CastProblemBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.UnaryIntegerOp;
import org.enso.table.data.column.operation.map.datetime.DateTimeIsInOp;
import org.enso.table.data.column.storage.type.DateTimeType;
import org.enso.table.data.column.storage.type.DateType;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.column.storage.type.TextType;
public final class DateStorage extends SpecializedStorage<LocalDate> {
/**
@ -67,4 +76,37 @@ public final class DateStorage extends SpecializedStorage<LocalDate> {
public Builder createDefaultBuilderOfSameType(int capacity) {
return new DateBuilder(capacity);
}
@Override
public Storage<?> cast(StorageType targetType, CastProblemBuilder castProblemBuilder) {
if (targetType instanceof DateTimeType) {
int n = size();
DateTimeBuilder builder = new DateTimeBuilder(n);
for (int i = 0; i < n; i++) {
LocalDate date = data[i];
if (date == null) {
builder.appendNulls(1);
} else {
ZonedDateTime converted = date.atStartOfDay().atZone(ZoneId.systemDefault());
builder.append(converted);
}
}
return builder.seal();
} else if (targetType instanceof TextType textType) {
int n = size();
StringBuilder builder = new StringBuilder(n);
var formatter = Core_Date_Utils.defaultLocalDateFormatter();
for (int i = 0; i < n; i++) {
LocalDate item = data[i];
if (item == null) {
builder.appendNulls(1);
} else {
builder.append(item.format(formatter));
}
}
return StringStorage.adapt(builder.seal(), textType);
} else {
return super.cast(targetType, castProblemBuilder);
}
}
}

View File

@ -1,13 +1,20 @@
package org.enso.table.data.column.storage;
import org.enso.polyglot.common_utils.Core_Date_Utils;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.builder.object.DateBuilder;
import org.enso.table.data.column.builder.object.DateTimeBuilder;
import org.enso.table.data.column.builder.object.StringBuilder;
import org.enso.table.data.column.builder.object.TimeOfDayBuilder;
import org.enso.table.data.column.operation.CastProblemBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.UnaryIntegerOp;
import org.enso.table.data.column.operation.map.datetime.DateTimeIsInOp;
import org.enso.table.data.column.storage.type.DateTimeType;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.column.storage.type.*;
import org.enso.table.formatting.DateTimeFormatter;
import java.time.LocalDate;
import java.time.LocalTime;
import java.time.ZonedDateTime;
public final class DateTimeStorage extends SpecializedStorage<ZonedDateTime> {
@ -69,4 +76,50 @@ public final class DateTimeStorage extends SpecializedStorage<ZonedDateTime> {
public Builder createDefaultBuilderOfSameType(int capacity) {
return new DateTimeBuilder(capacity);
}
@Override
public Storage<?> cast(StorageType targetType, CastProblemBuilder castProblemBuilder) {
if (targetType instanceof DateType) {
int n = size();
DateBuilder builder = new DateBuilder(n);
for (int i = 0; i < n; i++) {
ZonedDateTime dateTime = data[i];
if (dateTime == null) {
builder.appendNulls(1);
} else {
LocalDate converted = dateTime.toLocalDate();
builder.append(converted);
}
}
return builder.seal();
} else if (targetType instanceof TimeOfDayType) {
int n = size();
TimeOfDayBuilder builder = new TimeOfDayBuilder(n);
for (int i = 0; i < n; i++) {
ZonedDateTime dateTime = data[i];
if (dateTime == null) {
builder.appendNulls(1);
} else {
LocalTime converted = dateTime.toLocalTime();
builder.append(converted);
}
}
return builder.seal();
} else if (targetType instanceof TextType textType) {
int n = size();
StringBuilder builder = new StringBuilder(n);
var formatter = Core_Date_Utils.defaultZonedDateTimeFormatter();
for (int i = 0; i < n; i++) {
ZonedDateTime item = data[i];
if (item == null) {
builder.appendNulls(1);
} else {
builder.append(item.format(formatter));
}
}
return StringStorage.adapt(builder.seal(), textType);
} else {
return super.cast(targetType, castProblemBuilder);
}
}
}

View File

@ -1,24 +1,27 @@
package org.enso.table.data.column.storage;
import java.util.BitSet;
import java.util.List;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.builder.object.NumericBuilder;
import org.enso.table.data.column.builder.object.StringBuilder;
import org.enso.table.data.column.operation.CastProblemBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.MapOperationProblemBuilder;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.operation.map.numeric.DoubleBooleanOp;
import org.enso.table.data.column.operation.map.numeric.DoubleIsInOp;
import org.enso.table.data.column.operation.map.numeric.DoubleNumericOp;
import org.enso.table.data.column.storage.type.FloatType;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.column.storage.type.*;
import org.enso.table.data.index.Index;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.mask.SliceRange;
import org.graalvm.polyglot.Value;
/** A column containing floating point numbers. */
import java.util.BitSet;
import java.util.List;
/**
* A column containing floating point numbers.
*/
public final class DoubleStorage extends NumericStorage<Double> {
private final long[] data;
private final BitSet isMissing;
@ -28,8 +31,7 @@ public final class DoubleStorage extends NumericStorage<Double> {
/**
* @param data the underlying data
* @param size the number of items stored
* @param isMissing a bit set denoting at index {@code i} whether or not the value at index {@code
* i} is missing.
* @param isMissing a bit set denoting at index {@code i} whether the value at index {@code i} is missing.
*/
public DoubleStorage(long[] data, int size, BitSet isMissing) {
this.data = data;
@ -43,13 +45,17 @@ public final class DoubleStorage extends NumericStorage<Double> {
return new DoubleStorage(new long[0], size, isMissing);
}
/** @inheritDoc */
/**
* @inheritDoc
*/
@Override
public int size() {
return size;
}
/** @inheritDoc */
/**
* @inheritDoc
*/
@Override
public int countMissing() {
return isMissing.cardinality();
@ -73,13 +79,17 @@ public final class DoubleStorage extends NumericStorage<Double> {
return isMissing.get(idx) ? null : Double.longBitsToDouble(data[idx]);
}
/** @inheritDoc */
/**
* @inheritDoc
*/
@Override
public StorageType getType() {
return FloatType.FLOAT_64;
}
/** @inheritDoc */
/**
* @inheritDoc
*/
@Override
public boolean isNa(long idx) {
return isMissing.get((int) idx);
@ -357,4 +367,46 @@ public final class DoubleStorage extends NumericStorage<Double> {
return new DoubleStorage(newData, newSize, newMissing);
}
@Override
public Storage<?> cast(StorageType targetType, CastProblemBuilder castProblemBuilder) {
return switch (targetType) {
case AnyObjectType any -> new MixedStorageFacade(this);
case FloatType floatType -> this;
case IntegerType integerType -> {
int n = size();
NumericBuilder builder = NumericBuilder.createLongBuilder(n);
double min = (double) integerType.getMinValue();
double max = (double) integerType.getMaxValue();
for (int i = 0; i < n; i++) {
if (isMissing.get(i)) {
builder.appendNulls(1);
} else {
double value = getItem(i);
if (value < min || value > max) {
builder.appendNulls(1);
castProblemBuilder.reportLossyConversion();
} else {
long converted = (long) value;
builder.appendLong(converted);
}
}
}
yield builder.seal();
}
case TextType textType -> {
int n = size();
StringBuilder builder = new StringBuilder(n);
for (int i = 0; i < n; i++) {
if (isMissing.get(i)) {
builder.appendNulls(1);
} else {
builder.append(Double.toString(getItem(i)));
}
}
yield StringStorage.adapt(builder.seal(), textType);
}
default -> throw new IllegalStateException("Conversion of DoubleStorage to " + targetType + " is not supported");
};
}
}

View File

@ -3,14 +3,15 @@ package org.enso.table.data.column.storage;
import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.builder.object.NumericBuilder;
import org.enso.table.data.column.builder.object.StringBuilder;
import org.enso.table.data.column.operation.CastProblemBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.MapOperationProblemBuilder;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.operation.map.numeric.LongBooleanOp;
import org.enso.table.data.column.operation.map.numeric.LongIsInOp;
import org.enso.table.data.column.operation.map.numeric.LongNumericOp;
import org.enso.table.data.column.storage.type.IntegerType;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.column.storage.type.*;
import org.enso.table.data.index.Index;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.mask.SliceRange;
@ -19,7 +20,9 @@ import org.graalvm.polyglot.Value;
import java.util.BitSet;
import java.util.List;
/** A column storing 64-bit integers. */
/**
* A column storing 64-bit integers.
*/
public final class LongStorage extends NumericStorage<Long> {
// TODO [RW] at some point we will want to add separate storage classes for byte, short and int,
// for more compact storage and more efficient handling of smaller integers; for now we will be
@ -51,13 +54,17 @@ public final class LongStorage extends NumericStorage<Long> {
this(data, data.length, new BitSet());
}
/** @inheritDoc */
/**
* @inheritDoc
*/
@Override
public int size() {
return size;
}
/** @inheritDoc */
/**
* @inheritDoc
*/
@Override
public int countMissing() {
return isMissing.cardinality();
@ -81,14 +88,18 @@ public final class LongStorage extends NumericStorage<Long> {
return isMissing.get(idx) ? null : data[idx];
}
/** @inheritDoc */
/**
* @inheritDoc
*/
@Override
public StorageType getType() {
// TODO add possibility to set integer bit limit
// TODO add possibility to set integer bit limit (#5159)
return IntegerType.INT_64;
}
/** @inheritDoc */
/**
* @inheritDoc
*/
@Override
public boolean isNa(long idx) {
return isMissing.get((int) idx);
@ -446,4 +457,38 @@ public final class LongStorage extends NumericStorage<Long> {
return new LongStorage(newData, newSize, newMissing);
}
@Override
public Storage<?> cast(StorageType targetType, CastProblemBuilder castProblemBuilder) {
return switch (targetType) {
case AnyObjectType any -> new MixedStorageFacade(this);
case IntegerType integerType -> this;
case FloatType floatType -> {
int n = size();
NumericBuilder builder = NumericBuilder.createDoubleBuilder(n);
for (int i = 0; i < n; i++) {
if (isNa(i)) {
builder.appendNulls(1);
} else {
double converted = (double) getItem(i);
builder.appendDouble(converted);
}
}
yield builder.seal();
}
case TextType textType -> {
int n = size();
StringBuilder builder = new StringBuilder(n);
for (int i = 0; i < n; i++) {
if (isMissing.get(i)) {
builder.appendNulls(1);
} else {
builder.append(Long.toString(getItem(i)));
}
}
yield StringStorage.adapt(builder.seal(), textType);
}
default -> throw new IllegalStateException("Conversion of LongStorage to " + targetType + " is not supported");
};
}
}

View File

@ -1,6 +1,7 @@
package org.enso.table.data.column.storage;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.operation.CastProblemBuilder;
import org.enso.table.data.column.operation.map.MapOperationProblemBuilder;
import org.enso.table.data.column.storage.type.AnyObjectType;
import org.enso.table.data.column.storage.type.StorageType;
@ -99,4 +100,9 @@ public class MixedStorageFacade extends Storage<Object> {
Storage<?> newStorage = underlyingStorage.slice(ranges);
return new MixedStorageFacade(newStorage);
}
@Override
public Storage<?> cast(StorageType targetType, CastProblemBuilder castProblemBuilder) {
return null;
}
}

View File

@ -4,10 +4,12 @@ import java.util.BitSet;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.builder.object.ObjectBuilder;
import org.enso.table.data.column.builder.object.StringBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.storage.type.AnyObjectType;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.column.storage.type.TextType;
/** A column storing arbitrary objects. */
public final class ObjectStorage extends SpecializedStorage<Object> {

View File

@ -2,9 +2,14 @@ package org.enso.table.data.column.storage;
import java.util.BitSet;
import java.util.List;
import org.enso.table.data.column.builder.object.StringBuilder;
import org.enso.table.data.column.operation.CastProblemBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.MapOperationProblemBuilder;
import org.enso.table.data.column.storage.type.AnyObjectType;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.column.storage.type.TextType;
import org.enso.table.data.index.Index;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.mask.SliceRange;
@ -149,4 +154,27 @@ public abstract class SpecializedStorage<T> extends Storage<T> {
return newInstance(newData, newSize);
}
@Override
public Storage<?> cast(StorageType targetType, CastProblemBuilder castProblemBuilder) {
if (targetType == getType()) {
return this;
} else if (targetType instanceof AnyObjectType) {
return new MixedStorageFacade(this);
} else if (targetType instanceof TextType textType) {
int n = size();
StringBuilder builder = new StringBuilder(n);
for (int i = 0; i < n; i++) {
Object item = data[i];
if (item == null) {
builder.appendNulls(1);
} else {
builder.append(item.toString());
}
}
return StringStorage.adapt(builder.seal(), textType);
} else {
throw new IllegalStateException("Conversion of " + this.getClass().getSimpleName() + " to " + targetType + " is not supported");
}
}
}

View File

@ -4,6 +4,7 @@ import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.builder.object.ObjectBuilder;
import org.enso.table.data.column.operation.CastProblemBuilder;
import org.enso.table.data.column.operation.map.MapOperationProblemBuilder;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.mask.OrderMask;
@ -345,4 +346,6 @@ public abstract class Storage<T> {
}
return new LongStorage(data);
}
public abstract Storage<?> cast(StorageType targetType, CastProblemBuilder castProblemBuilder);
}

View File

@ -1,10 +1,10 @@
package org.enso.table.data.column.storage;
import java.util.BitSet;
import org.enso.base.Text_Utils;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.builder.object.StringBuilder;
import org.enso.table.data.column.operation.CastProblemBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.MapOperation;
import org.enso.table.data.column.operation.map.MapOperationProblemBuilder;
@ -13,6 +13,7 @@ import org.enso.table.data.column.operation.map.text.LikeOp;
import org.enso.table.data.column.operation.map.text.StringBooleanOp;
import org.enso.table.data.column.operation.map.text.StringIsInOp;
import org.enso.table.data.column.operation.map.text.StringStringOp;
import org.enso.table.data.column.storage.type.AnyObjectType;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.column.storage.type.TextType;
import org.graalvm.polyglot.Value;
@ -47,12 +48,14 @@ public final class StringStorage extends SpecializedStorage<String> {
private static final MapOpStorage<String, SpecializedStorage<String>> ops = buildOps();
@Override
protected Storage<?> runVectorizedMap(String name, Object argument, MapOperationProblemBuilder problemBuilder) {
protected Storage<?> runVectorizedMap(
String name, Object argument, MapOperationProblemBuilder problemBuilder) {
return ops.runMap(name, this, argument, problemBuilder);
}
@Override
protected Storage<?> runVectorizedZip(String name, Storage<?> argument, MapOperationProblemBuilder problemBuilder) {
protected Storage<?> runVectorizedZip(
String name, Storage<?> argument, MapOperationProblemBuilder problemBuilder) {
return ops.runZip(name, this, argument, problemBuilder);
}
@ -70,12 +73,24 @@ public final class StringStorage extends SpecializedStorage<String> {
return new StringBuilder(capacity);
}
@Override
public Storage<?> cast(StorageType targetType, CastProblemBuilder castProblemBuilder) {
return switch (targetType) {
case AnyObjectType any -> new MixedStorageFacade(this);
case TextType textType -> adapt(this, textType);
default -> throw new IllegalStateException("Conversion of StringStorage to " + targetType + " is not supported");
};
}
private static MapOpStorage<String, SpecializedStorage<String>> buildOps() {
MapOpStorage<String, SpecializedStorage<String>> t = ObjectStorage.buildObjectOps();
t.add(
new MapOperation<>(Maps.EQ) {
@Override
public BoolStorage runMap(SpecializedStorage<String> storage, Object arg, MapOperationProblemBuilder problemBuilder) {
public BoolStorage runMap(
SpecializedStorage<String> storage,
Object arg,
MapOperationProblemBuilder problemBuilder) {
BitSet r = new BitSet();
BitSet missing = new BitSet();
for (int i = 0; i < storage.size(); i++) {
@ -89,7 +104,10 @@ public final class StringStorage extends SpecializedStorage<String> {
}
@Override
public BoolStorage runZip(SpecializedStorage<String> storage, Storage<?> arg, MapOperationProblemBuilder problemBuilder) {
public BoolStorage runZip(
SpecializedStorage<String> storage,
Storage<?> arg,
MapOperationProblemBuilder problemBuilder) {
BitSet r = new BitSet();
BitSet missing = new BitSet();
for (int i = 0; i < storage.size(); i++) {
@ -140,7 +158,8 @@ public final class StringStorage extends SpecializedStorage<String> {
});
t.add(new LikeOp());
t.add(new StringIsInOp<>());
t.add(new StringStringOp(Maps.ADD) {
t.add(
new StringStringOp(Maps.ADD) {
@Override
protected String doString(String a, String b) {
return a + b;
@ -148,4 +167,19 @@ public final class StringStorage extends SpecializedStorage<String> {
});
return t;
}
/**
* A helper method that can be used to adapt a variable length storage to a target type that may
* potentially be fixed length.
*
* <p>It will ensure that the values are trimmed or padded wherever necessary.
*/
public static Storage<String> adapt(Storage<String> storage, TextType type) {
if (type.fixedLength()) {
// TODO [RW] #5159
throw new IllegalStateException("Fixed length conversion is currently not supported.");
} else {
return storage;
}
}
}

View File

@ -1,12 +1,18 @@
package org.enso.table.data.column.storage;
import java.time.LocalTime;
import java.time.ZonedDateTime;
import org.enso.polyglot.common_utils.Core_Date_Utils;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.builder.object.StringBuilder;
import org.enso.table.data.column.builder.object.TimeOfDayBuilder;
import org.enso.table.data.column.operation.CastProblemBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.datetime.DateTimeIsInOp;
import org.enso.table.data.column.storage.type.AnyObjectType;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.column.storage.type.TextType;
import org.enso.table.data.column.storage.type.TimeOfDayType;
public final class TimeOfDayStorage extends SpecializedStorage<LocalTime> {
@ -45,4 +51,24 @@ public final class TimeOfDayStorage extends SpecializedStorage<LocalTime> {
public Builder createDefaultBuilderOfSameType(int capacity) {
return new TimeOfDayBuilder(capacity);
}
@Override
public Storage<?> cast(StorageType targetType, CastProblemBuilder castProblemBuilder) {
if (targetType instanceof TextType textType) {
int n = size();
StringBuilder builder = new StringBuilder(n);
var formatter = Core_Date_Utils.defaultLocalTimeFormatter();
for (int i = 0; i < n; i++) {
LocalTime item = data[i];
if (item == null) {
builder.appendNulls(1);
} else {
builder.append(item.format(formatter));
}
}
return StringStorage.adapt(builder.seal(), textType);
} else {
return super.cast(targetType, castProblemBuilder);
}
}
}

View File

@ -2,4 +2,22 @@ package org.enso.table.data.column.storage.type;
public record IntegerType(Bits bits) implements StorageType {
public static final IntegerType INT_64 = new IntegerType(Bits.BITS_64);
public long getMaxValue() {
return switch (bits) {
case BITS_8 -> Byte.MAX_VALUE;
case BITS_16 -> Short.MAX_VALUE;
case BITS_32 -> Integer.MAX_VALUE;
case BITS_64 -> Long.MAX_VALUE;
};
}
public long getMinValue() {
return switch (bits) {
case BITS_8 -> Byte.MIN_VALUE;
case BITS_16 -> Short.MIN_VALUE;
case BITS_32 -> Integer.MIN_VALUE;
case BITS_64 -> Long.MIN_VALUE;
};
}
}

View File

@ -1,2 +1,4 @@
*.bak
*.db
spreadsheet.xls
spreadsheet.xlsx

View File

@ -1,104 +0,0 @@
from Standard.Base import all
from Standard.Table import Value_Type
import Standard.Table.Data.Type.Value_Type.Bits
from Standard.Test import Test, Problems
import Standard.Test.Extensions
from project.Common_Table_Operations.Util import run_default_backend
main = run_default_backend spec
spec setup =
prefix = setup.prefix
table_builder = setup.table_builder
materialize = setup.materialize
# TODO this spec will be expanded in #6112
Test.group prefix+"Column.cast" pending=(if setup.is_database.not then "Cast is not implemented in the in-memory backend yet.") <|
Test.specify "should allow to cast an integer column to text" <|
t = table_builder [["X", [1, 2, 3000]]]
c = t.at "X" . cast Value_Type.Char
c.value_type.is_text . should_be_true
c.to_vector . should_equal ["1", "2", "3000"]
Test.specify "should allow to cast a boolean column to integer" <|
t = table_builder [["X", [True, False, True]]]
c = t.at "X" . cast Value_Type.Integer
c.value_type.is_integer . should_be_true
c.to_vector . should_equal [1, 0, 1]
Test.specify "should allow to cast a boolean column to text" pending="TODO: sqlite has issue with this, figure out in #6112" <|
t = table_builder [["X", [True, False, True]]]
c = t.at "X" . cast Value_Type.Char
c.value_type.is_text . should_be_true
c.to_vector . should_equal ["true", "false", "true"]
Test.specify "should allow to cast a text column to fixed-length" pending=(if setup.test_selection.fixed_length_text_columns.not then "Fixed-length Char columns are not supported by this backend.") <|
t = table_builder [["X", ["a", "DEF", "a slightly longer text"]]]
c = t.at "X" . cast (Value_Type.Char size=3 variable_length=False)
c.value_type . should_equal (Value_Type.Char size=3 variable_length=False)
c.to_vector . should_equal ["a ", "DEF", "a s"]
Test.specify "should work if the first row is NULL" <|
t = table_builder [["X", [Nothing, 1, 2, 3000]], ["Y", [Nothing, True, False, True]]]
c1 = t.at "X" . cast Value_Type.Char
c1.value_type.is_text . should_be_true
c1.to_vector . should_equal [Nothing, "1", "2", "3000"]
c2 = t.at "Y" . cast Value_Type.Integer
c2.value_type.is_integer . should_be_true
c2.to_vector . should_equal [Nothing, 1, 0, 1]
Test.specify "should not lose the type after further operations were performed on the result" <|
t = table_builder [["X", [1, 2, 3000]], ["Y", [True, False, True]]]
c1 = t.at "X" . cast Value_Type.Char
c2 = t.at "Y" . cast Value_Type.Integer
c3 = c1 + '_suffix'
c3.value_type.is_text . should_be_true
c3.to_vector . should_equal ["1_suffix", "2_suffix", "3000_suffix"]
c4 = c2 + 1000
c4.value_type.is_integer . should_be_true
c4.to_vector . should_equal [1001, 1000, 1001]
Test.specify "should not lose the type after further operations were performed on the result, even if the first row is NULL" <|
t = table_builder [["X", [Nothing, 1, 2, 3000]], ["Y", [Nothing, True, False, True]]]
c1 = t.at "X" . cast Value_Type.Char
c2 = t.at "Y" . cast Value_Type.Integer
c3 = c1 + '_suffix'
c3.value_type.is_text . should_be_true
c3.to_vector . should_equal [Nothing, "1_suffix", "2_suffix", "3000_suffix"]
c4 = c2 + 1000
c4.value_type.is_integer . should_be_true
c4.to_vector . should_equal [Nothing, 1001, 1000, 1001]
Test.group prefix+"Table.cast" pending=(if setup.is_database.not then "Cast is not implemented in the in-memory backend yet.") <|
Test.specify 'should cast the columns "in-place" and not reorder them' <|
t = table_builder [["X", [1, 2, 3000]], ["Y", [4, 5, 6]], ["Z", [7, 8, 9]], ["A", [True, False, True]]]
t2 = t.cast ["Z", "Y"] Value_Type.Char
t2.column_names . should_equal ["X", "Y", "Z", "A"]
t2.at "X" . value_type . is_integer . should_be_true
t2.at "Y" . value_type . is_text . should_be_true
t2.at "Z" . value_type . is_text . should_be_true
t2.at "A" . value_type . is_boolean . should_be_true
t2.at "X" . to_vector . should_equal [1, 2, 3000]
t2.at "Y" . to_vector . should_equal ["4", "5", "6"]
t2.at "Z" . to_vector . should_equal ["7", "8", "9"]
t2.at "A" . to_vector . should_equal [True, False, True]
if setup.test_selection.fixed_length_text_columns then
Test.specify "should preserve the overridden types when materialized" pending="TODO: #5159 needed" <|
t = table_builder [["X", [1, 2, 100]], ["Y", ["a", "abcdef", "abc"]]]
t2 = t . cast "X" (Value_Type.Integer Bits.Bits_16) . cast "Y" (Value_Type.Char size=3 variable_length=False)
t3 = materialize t2
t3.at "X" . value_type . should_equal (t2.at "X" . value_type)
t3.at "Y" . value_type . should_equal (Value_Type.Char size=3 variable_length=False)
t3.at "Y" . to_vector . should_equal ["a ", "abc", "abc"]

View File

@ -0,0 +1,323 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
from Standard.Table import Value_Type
import Standard.Table.Data.Type.Value_Type.Bits
from Standard.Table.Errors import Conversion_Failure
from Standard.Database.Errors import Unsupported_Database_Operation
from Standard.Test import Test, Problems
import Standard.Test.Extensions
from project.Common_Table_Operations.Util import run_default_backend
polyglot java import java.lang.Long as Java_Long
main = run_default_backend spec
type My_Type
Value x
to_text : Text
to_text self = "{{{MY Type [x="+self.x.to_text+"] }}}"
spec setup =
prefix = setup.prefix
table_builder = setup.table_builder
materialize = setup.materialize
supports_dates = setup.test_selection.date_time
Test.group prefix+"Table/Column.cast - to text" <|
Test.specify "should allow to cast columns of various basic types to text" <|
t = table_builder [["X", [1, 2, 3000]], ["Y", [True, False, True]], ["Z", [1.5, 0.125, -2.5]], ["W", ["a", "DEF", "a slightly longer text"]]]
t2 = t.cast t.column_names Value_Type.Char
t2.at "X" . value_type . is_text . should_be_true
t2.at "Y" . value_type . is_text . should_be_true
t2.at "Z" . value_type . is_text . should_be_true
t2.at "W" . value_type . is_text . should_be_true
t2.at "X" . to_vector . should_equal ["1", "2", "3000"]
# Depending on the backend, the case of True/False literals may differ.
t2.at "Y" . to_vector . map (_.to_case Case.Lower) . should_equal ["true", "false", "true"]
t2.at "Z" . to_vector . should_equal ["1.5", "0.125", "-2.5"]
t2.at "W" . to_vector . should_equal ["a", "DEF", "a slightly longer text"]
if supports_dates then
Test.specify "should allow to cast date/time columns to text" <|
t = table_builder [["X", [Date.new 2015 1 1, Date.new 2023 12 31]], ["Y", [Time_Of_Day.new 1 2 3, Time_Of_Day.new 23 57 59]], ["Z", [Date_Time.new 2015 1 1 1 2 3, Date_Time.new 2023 11 30 22 45 44]]]
t2 = t.cast t.column_names Value_Type.Char
t2.at "X" . value_type . is_text . should_be_true
t2.at "Y" . value_type . is_text . should_be_true
t2.at "Z" . value_type . is_text . should_be_true
t2.at "X" . to_vector . should_equal ["2015-01-01", "2023-12-31"]
t2.at "Y" . to_vector . should_equal ["01:02:03", "23:57:59"]
# The particular format depends on the backend.
vz = t2.at "Z" . to_vector
vz.first . should_contain "2015-01-01"
vz.first . should_contain "01:02:03"
vz.second . should_contain "2023-11-30"
vz.second . should_contain "22:45:44"
if setup.is_database.not then
Test.specify "should allow to cast a column of objects to text" <|
t = table_builder [["X", [My_Type.Value 42, My_Type.Value "X"]]]
c = t.at "X" . cast Value_Type.Char
c.value_type.is_text . should_be_true
c.to_vector . should_equal ["{{{MY Type [x=42] }}}", "{{{MY Type [x=X] }}}"]
if setup.test_selection.fixed_length_text_columns then
Test.specify "should allow to cast a text column to fixed-length" <|
t = table_builder [["X", ["a", "DEF", "a slightly longer text"]]]
c = t.at "X" . cast (Value_Type.Char size=3 variable_length=False)
c.value_type . should_equal (Value_Type.Char size=3 variable_length=False)
c.to_vector . should_equal ["a ", "DEF", "a s"]
# No Conversion_Failure warning here, because we started with text, so it was expected we will trim it if needed.
Problems.assume_no_problems c
Test.specify "should allow casting a non-text column to fixed-length text" <|
t = table_builder [["X", [1, 22, 333]]]
c = t.at "X" . cast (Value_Type.Char size=3 variable_length=False)
c.value_type . should_equal (Value_Type.Char size=3 variable_length=False)
c.to_vector . should_equal ["1 ", "22 ", "333"]
Problems.assume_no_problems c
Test.specify "should warn when losing data if the fixed-length text length is too short to fit the data" pending=(if setup.is_database then "Conversion_Failure is not supported in Database yet.") <|
t = table_builder [["X", [15, 1000000, 123456]]]
c = t.at "X" . cast (Value_Type.Char size=3 variable_length=False)
c.value_type . should_equal (Value_Type.Char size=3 variable_length=False)
c.to_vector . should_equal ["15 ", "100", "123"]
Problems.expect_warning Conversion_Failure c
Test.group prefix+"Table/Column.cast - numeric" <|
Test.specify "should allow to cast a boolean column to integer" <|
t = table_builder [["X", [True, False, True]]]
c = t.at "X" . cast Value_Type.Integer
vt = c.value_type
Test.with_clue "Expecting "+vt.to_display_text+" to be Integer. " <|
vt.is_integer . should_be_true
c.to_vector . should_equal [1, 0, 1]
Test.specify "should allow to cast an integer column to floating point" <|
t = table_builder [["X", [1, 2, 3]]]
c = t.at "X" . cast Value_Type.Float
c.value_type.is_floating_point . should_be_true
c.to_vector . should_equal [1.0, 2.0, 3.0]
Test.specify "should allow to cast an integer column to a smaller bit-width and larger bit-width" pending="TODO: #5159" <|
t = table_builder [["X", [1, 2, 3]]]
c = t.at "X" . cast (Value_Type.Integer Bits.Bits_16)
c.value_type . should_equal (Value_Type.Integer Bits.Bits_16)
c.to_vector . should_equal [1, 2, 3]
t2 = table_builder [["X", [1, 2, 12000000]]]
c2 = t2.at "X" . cast (Value_Type.Integer Bits.Bits_16)
c2.value_type . should_equal (Value_Type.Integer Bits.Bits_16)
c2.to_vector . should_equal [1, 2, Nothing]
# This can likely only be checked on in-memory.
Problems.expect_warning Conversion_Failure c2
r3 = t2.at "X" . cast (Value_Type.Integer Bits.Bits_16) on_problems=Problem_Behavior.Report_Error
r3.should_fail_with Conversion_Failure
# Now converting the 16-bit column `c` into 32 bits.
c3 = c.cast (Value_Type.Integer Bits.Bits_32)
c3.value_type . should_equal (Value_Type.Integer Bits.Bits_32)
c3.to_vector . should_equal [1, 2, 3]
Test.specify "should allow to cast a floating point column to integer" <|
t = table_builder [["X", [1.0001, 2.25, 4.0]]]
c = t.at "X" . cast Value_Type.Integer
vt = c.value_type
Test.with_clue "Expecting "+vt.to_display_text+" to be Integer. " <|
vt.is_integer . should_be_true
c.to_vector . should_equal [1, 2, 4]
# Not reporting Lossy Conversion as converting floats to integers obviously truncates the value.
Problems.assume_no_problems c
# The backend may either truncate or round.
t2 = table_builder [["X", [1.1, 4.9]]]
c2 = t2.at "X" . cast Value_Type.Integer
v2 = c2.to_vector
[[1, 4], [1, 5]] . should_contain v2
if setup.is_database.not then
Test.specify "should report Conversion_Failure if converting a huge float to an integer overflows it" <|
max_long = Java_Long.MAX_VALUE
too_big_double = (max_long + 1.0) * 1000.0
(too_big_double > max_long) . should_be_true
min_long = Java_Long.MIN_VALUE
too_small_double = (min_long - 1.0) * 1000.0
(too_small_double < min_long) . should_be_true
v = [1.0, 2.1, max_long, too_big_double, min_long, too_small_double, 4.0]
t = table_builder [["X", v]]
t.at "X" . to_vector . should_equal v
t.at "X" . value_type . should_equal Value_Type.Float
c = t.at "X" . cast Value_Type.Integer
c.value_type . should_equal Value_Type.Integer
c.to_vector . should_equal [1, 2, max_long, Nothing, min_long, Nothing, 4]
warning = Problems.expect_warning Conversion_Failure c
warning.to_display_text . should_contain "2 rows could not be converted"
if supports_dates then
Test.group prefix+"Table/Column.cast - date/time" <|
Test.specify "should allow to get the Date part from a Date_Time" <|
t = table_builder [["X", [Date_Time.new 2015 1 2 3 4 5, Date_Time.new 2023 12 31 23 56 59]]]
c = t.at "X" . cast Value_Type.Date
c.value_type . should_equal Value_Type.Date
c.to_vector . should_equal [Date.new 2015 1 2, Date.new 2023 12 31]
Test.specify "should allow to get the Time_Of_Day part from a Date_Time" <|
t = table_builder [["X", [Date_Time.new 2015 1 2 3 4 5, Date_Time.new 2023 12 31 23 56 59]]]
c = t.at "X" . cast Value_Type.Time
c.value_type . should_equal Value_Type.Time
c.to_vector . should_equal [Time_Of_Day.new 3 4 5, Time_Of_Day.new 23 56 59]
Test.specify "should allow to convert a Date into Date_Time" <|
day1 = Date.new 2015 1 2
day2 = Date.new 2023 12 31
t = table_builder [["X", [day1, day2]]]
c = t.at "X" . cast Value_Type.Date_Time
c.value_type . should_equal Value_Type.Date_Time
vz = c.to_vector
# We cannot rely on what timezone the backend uses, so we just ensure that the time difference between the two results is consistent.
diff = Duration.between vz.first vz.second
expected_diff = Duration.between day1.to_date_time day2.to_date_time
diff . should_equal expected_diff
Test.group prefix+"Table/Column.cast - checking invariants" <|
Test.specify "should report an error for unsupported conversions" <|
t = table_builder [["X", [1, 2, 3]]]
r1 = t.at "X" . cast Value_Type.Boolean
r1.should_fail_with Illegal_Argument
Test.specify "should report an error pointing to the Table.parse method where applicable" <|
t = table_builder [["X", ["1", "2", "3"]]]
r1 = t.at "X" . cast Value_Type.Integer
r1.should_fail_with Illegal_Argument
r1.to_display_text . should_contain "`parse` should be used instead"
Test.specify "should work if the first row is NULL" <|
t = table_builder [["X", [Nothing, 1, 2, 3000]], ["Y", [Nothing, True, False, True]]]
c1 = t.at "X" . cast Value_Type.Char
c1.value_type.is_text . should_be_true
c1.to_vector . should_equal [Nothing, "1", "2", "3000"]
c2 = t.at "Y" . cast Value_Type.Integer
c2.value_type . should_equal Value_Type.Integer
c2.to_vector . should_equal [Nothing, 1, 0, 1]
Test.specify "should not lose the type after further operations were performed on the result" <|
t = table_builder [["X", [1, 2, 3000]], ["Y", [True, False, True]]]
c1 = t.at "X" . cast Value_Type.Char
c2 = t.at "Y" . cast Value_Type.Integer
c3 = c1 + '_suffix'
c3.value_type.is_text . should_be_true
c3.to_vector . should_equal ["1_suffix", "2_suffix", "3000_suffix"]
c4 = c2 + 1000
vt4 = c4.value_type
Test.with_clue "Expecting "+vt4.to_display_text+" to be Integer. " <|
vt4.is_integer . should_be_true
c4.to_vector . should_equal [1001, 1000, 1001]
Test.specify "should not lose the type after further operations were performed on the result, even if the first row is NULL" <|
t = table_builder [["X", [Nothing, 1, 2, 3000]], ["Y", [Nothing, True, False, True]]]
c1 = t.at "X" . cast Value_Type.Char
c2 = t.at "Y" . cast Value_Type.Integer
c3 = c1 + '_suffix'
c3.value_type.is_text . should_be_true
c3.to_vector . should_equal [Nothing, "1_suffix", "2_suffix", "3000_suffix"]
c4 = c2 + 1000
vt4 = c4.value_type
Test.with_clue "Expecting "+vt4.to_display_text+" to be Integer. " <|
vt4.is_integer . should_be_true
c4.to_vector . should_equal [Nothing, 1001, 1000, 1001]
Test.specify 'Table.cast should cast the columns "in-place" and not reorder them' <|
t = table_builder [["X", [1, 2, 3000]], ["Y", [4, 5, 6]], ["Z", [7, 8, 9]], ["A", [True, False, True]]]
t2 = t.cast ["Z", "Y"] Value_Type.Char
t2.column_names . should_equal ["X", "Y", "Z", "A"]
t2.at "X" . value_type . is_integer . should_be_true
t2.at "Y" . value_type . is_text . should_be_true
t2.at "Z" . value_type . is_text . should_be_true
t2.at "A" . value_type . is_boolean . should_be_true
t2.at "X" . to_vector . should_equal [1, 2, 3000]
t2.at "Y" . to_vector . should_equal ["4", "5", "6"]
t2.at "Z" . to_vector . should_equal ["7", "8", "9"]
t2.at "A" . to_vector . should_equal [True, False, True]
if setup.test_selection.fixed_length_text_columns then
Test.specify "should preserve the overridden types when materialized" pending="TODO: #5159 needed" <|
t = table_builder [["X", [1, 2, 100]], ["Y", ["a", "abcdef", "abc"]]]
t2 = t . cast "X" (Value_Type.Integer Bits.Bits_16) . cast "Y" (Value_Type.Char size=3 variable_length=False)
t3 = materialize t2
t3.at "X" . value_type . should_equal (t2.at "X" . value_type)
t3.at "Y" . value_type . should_equal (Value_Type.Char size=3 variable_length=False)
t3.at "Y" . to_vector . should_equal ["a ", "abc", "abc"]
Test.group prefix+"Simple variant of Table/Column.parse in all backends" pending=(if setup.is_database then "parse is not yet implemented in DB") <|
Test.specify "should be able to parse simple integers" <|
t = table_builder [["X", ["42", "0", "-1"]]]
c1 = t.at "X" . parse Value_Type.Integer
c1.value_type.is_integer . should_be_true
c1.to_vector . should_equal [42, 0, -1]
c2 = t.parse ["X"] Value_Type.Integer . at "X"
c2.value_type.is_integer . should_be_true
c2.to_vector . should_equal [42, 0, -1]
Test.specify "should be able to parse simple floats" <|
t = table_builder [["X", ["42.5", "0.25", "-1.0"]]]
c1 = t.at "X" . parse Value_Type.Float
c1.value_type.is_floating_point . should_be_true
c1.to_vector . should_equal [42.5, 0.25, -1.0]
c2 = t.parse ["X"] Value_Type.Float . at "X"
c2.value_type.is_floating_point . should_be_true
c2.to_vector . should_equal [42.5, 0.25, -1.0]
if supports_dates then
Test.specify "should be able to parse dates using a default format" <|
t = table_builder [["X", ["2018-01-01", "2023-12-31"]]]
c1 = t.at "X" . parse Value_Type.Date
c1.value_type.should_equal Value_Type.Date
c1.to_vector . should_equal [Date.new 2018 1 1, Date.new 2023 12 31]
c2 = t.parse ["X"] Value_Type.Date . at "X"
c2.value_type.should_equal Value_Type.Date
c2.to_vector . should_equal [Date.new 2018 1 1, Date.new 2023 12 31]
if supports_dates.not then
Test.specify "should report that date parsing is unsupported" <|
t = table_builder [["X", ["2018-01-01", "2023-12-31"]]]
r1 = t.at "X" . parse Value_Type.Date
r1.should_fail_with Unsupported_Database_Operation
r2 = t.parse ["X"] Value_Type.Date
r2.should_fail_with Unsupported_Database_Operation
Test.specify "should be able to parse booleans with default format" <|
t = table_builder [["X", ["true", "false", "true"]]]
c1 = t.at "X" . parse Value_Type.Boolean
c1.value_type.should_equal Value_Type.Boolean
c1.to_vector . should_equal [True, False, True]
c2 = t.parse ["X"] Value_Type.Boolean . at "X"
c2.value_type.should_equal Value_Type.Boolean
c2.to_vector . should_equal [True, False, True]

View File

@ -4,7 +4,7 @@ import project.Common_Table_Operations.Aggregate_Spec
import project.Common_Table_Operations.Column_Operations_Spec
import project.Common_Table_Operations.Core_Spec
import project.Common_Table_Operations.Cross_Tab_Spec
import project.Common_Table_Operations.Cast_Spec
import project.Common_Table_Operations.Conversion_Spec
import project.Common_Table_Operations.Date_Time_Spec
import project.Common_Table_Operations.Distinct_Spec
import project.Common_Table_Operations.Expression_Spec
@ -95,7 +95,7 @@ spec setup =
Select_Columns_Spec.spec setup
Column_Operations_Spec.spec setup
Date_Time_Spec.spec setup
Cast_Spec.spec setup
Conversion_Spec.spec setup
Aggregate_Spec.spec setup
Filter_Spec.spec setup
Missing_Values_Spec.spec setup

View File

@ -1,6 +1,7 @@
package org.enso.table_test_helpers;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.operation.CastProblemBuilder;
import org.enso.table.data.column.operation.map.MapOperationProblemBuilder;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.type.IntegerType;
@ -107,4 +108,9 @@ public class ExplodingStorage extends Storage<Long> {
public Storage<Long> slice(List<SliceRange> ranges) {
return null;
}
@Override
public Storage<?> cast(StorageType targetType, CastProblemBuilder castProblemBuilder) {
return null;
}
}