mirror of
https://github.com/enso-org/enso.git
synced 2024-12-23 07:12:20 +03:00
parent
cf16d32894
commit
c690559ec4
@ -578,6 +578,7 @@
|
|||||||
- [Renamed `Decimal` to `Float`.][7807]
|
- [Renamed `Decimal` to `Float`.][7807]
|
||||||
- [Implemented `Date_Time_Formatter` for more user-friendly date/time format
|
- [Implemented `Date_Time_Formatter` for more user-friendly date/time format
|
||||||
parsing.][7826]
|
parsing.][7826]
|
||||||
|
- [Implemented `Table.auto_value_types` for in-memory tables.][7908]
|
||||||
|
|
||||||
[debug-shortcuts]:
|
[debug-shortcuts]:
|
||||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||||
@ -820,6 +821,7 @@
|
|||||||
[7776]: https://github.com/enso-org/enso/pull/7776
|
[7776]: https://github.com/enso-org/enso/pull/7776
|
||||||
[7807]: https://github.com/enso-org/enso/pull/7807
|
[7807]: https://github.com/enso-org/enso/pull/7807
|
||||||
[7826]: https://github.com/enso-org/enso/pull/7826
|
[7826]: https://github.com/enso-org/enso/pull/7826
|
||||||
|
[7908]: https://github.com/enso-org/enso/pull/7908
|
||||||
|
|
||||||
#### Enso Compiler
|
#### Enso Compiler
|
||||||
|
|
||||||
|
@ -1,6 +1,4 @@
|
|||||||
import project.Any.Any
|
import project.Any.Any
|
||||||
import project.Data.Ordering.Comparable
|
|
||||||
import project.Data.Ordering.Ordering
|
|
||||||
import project.Nothing.Nothing
|
import project.Nothing.Nothing
|
||||||
from project.Data.Boolean.Boolean import False, True
|
from project.Data.Boolean.Boolean import False, True
|
||||||
|
|
||||||
@ -98,4 +96,3 @@ type Boolean
|
|||||||
if (27 % 3) == 0 then IO.println "Fizz"
|
if (27 % 3) == 0 then IO.println "Fizz"
|
||||||
if_then : Any -> Any | Nothing
|
if_then : Any -> Any | Nothing
|
||||||
if_then self ~on_true = @Builtin_Method "Boolean.if_then"
|
if_then self ~on_true = @Builtin_Method "Boolean.if_then"
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import project.Any.Any
|
import project.Any.Any
|
||||||
|
import project.Data.Ordering.Comparable
|
||||||
import project.Data.Locale.Locale
|
import project.Data.Locale.Locale
|
||||||
import project.Data.Text.Text
|
import project.Data.Text.Text
|
||||||
import project.Error.Error
|
import project.Error.Error
|
||||||
@ -1169,3 +1170,39 @@ type Number_Parse_Error
|
|||||||
to_display_text : Text
|
to_display_text : Text
|
||||||
to_display_text self =
|
to_display_text self =
|
||||||
"Could not parse " + self.text.to_text + " as a double."
|
"Could not parse " + self.text.to_text + " as a double."
|
||||||
|
|
||||||
|
## A wrapper type that ensures that a function may only take positive integers.
|
||||||
|
type Positive_Integer
|
||||||
|
## PRIVATE
|
||||||
|
This constructor should not be used by user code as it can be used to
|
||||||
|
break the invariants. Instead, this type should only be created by `new`
|
||||||
|
or conversions.
|
||||||
|
Value (integer : Integer)
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
ADVANCED
|
||||||
|
Constructor to create a `Positive_Integer` from an `Integer` - checking
|
||||||
|
if it satisfies the condition. User code should prefer the
|
||||||
|
`Positive_Integer.from` conversion.
|
||||||
|
new (integer : Integer) =
|
||||||
|
if integer > 0 then Positive_Integer.Value integer else
|
||||||
|
Error.throw (Illegal_Argument.Error "Expected a positive integer, but got "+integer.to_display_text)
|
||||||
|
|
||||||
|
## Allows to create a `Positive_Integer` from an `Integer`.
|
||||||
|
It will throw `Illegal_Argument` if the provided integer is not positive.
|
||||||
|
Positive_Integer.from (that : Integer) = Positive_Integer.new that
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Integer.from (that : Positive_Integer) = that.integer
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
type Positive_Integer_Comparator
|
||||||
|
## PRIVATE
|
||||||
|
compare x y =
|
||||||
|
Comparable.from x.integer . compare x.integer y.integer
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
hash x = Comparable.from x.integer . hash x.integer
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
Comparable.from (_:Positive_Integer) = Positive_Integer_Comparator
|
||||||
|
@ -1579,6 +1579,15 @@ type Column
|
|||||||
check_cast_compatibility self.value_type value_type <|
|
check_cast_compatibility self.value_type value_type <|
|
||||||
self.internal_do_cast value_type on_problems
|
self.internal_do_cast value_type on_problems
|
||||||
|
|
||||||
|
## Change the value type of the column to a more specific one, based on its
|
||||||
|
contents.
|
||||||
|
|
||||||
|
This operation is currently not available in the Database backend.
|
||||||
|
auto_value_type : Boolean -> Column
|
||||||
|
auto_value_type self shrink_types=False =
|
||||||
|
_ = shrink_types
|
||||||
|
Error.throw <| Unsupported_Database_Operation.Error "`Column.auto_value_type` is not supported in the Database backends."
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
Shares the core CAST logic between `cast` and `parse`.
|
Shares the core CAST logic between `cast` and `parse`.
|
||||||
internal_do_cast : Value_Type -> Problem_Behavior -> Column
|
internal_do_cast : Value_Type -> Problem_Behavior -> Column
|
||||||
|
@ -1979,6 +1979,15 @@ type Table
|
|||||||
new_column = column_to_cast.cast value_type on_problems
|
new_column = column_to_cast.cast value_type on_problems
|
||||||
table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update
|
table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update
|
||||||
|
|
||||||
|
## Change the value type of table columns to a more specific one, based on
|
||||||
|
their contents.
|
||||||
|
|
||||||
|
This operation is currently not available in the Database backend.
|
||||||
|
auto_value_types : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Boolean -> Boolean -> Problem_Behavior -> Table
|
||||||
|
auto_value_types self columns=self.column_names shrink_types=False error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning =
|
||||||
|
_ = [columns, shrink_types, error_on_missing_columns, on_problems]
|
||||||
|
Error.throw (Unsupported_Database_Operation.Error "Table.auto_value_types is not supported in the Database backends.")
|
||||||
|
|
||||||
## ALIAS drop_missing_rows, dropna
|
## ALIAS drop_missing_rows, dropna
|
||||||
GROUP Standard.Base.Selections
|
GROUP Standard.Base.Selections
|
||||||
Remove rows which are all blank or containing blank values.
|
Remove rows which are all blank or containing blank values.
|
||||||
|
@ -1762,6 +1762,43 @@ type Column
|
|||||||
on_problems.attach_problems_before problems <|
|
on_problems.attach_problems_before problems <|
|
||||||
Column.from_storage self.name new_storage
|
Column.from_storage self.name new_storage
|
||||||
|
|
||||||
|
## Change the value type of the column to a more specific one, based on its
|
||||||
|
contents.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- shrink_types: If set `True`, smaller types will be chosen if possible,
|
||||||
|
according to the rules below. Defaults to `False`.
|
||||||
|
|
||||||
|
? Auto Type Selection Rules
|
||||||
|
|
||||||
|
- If a `Mixed` column can be assigned a single type, like `Char` or
|
||||||
|
`Integer`, that will be used.
|
||||||
|
- Text columns are not parsed. To do that, use the `parse` method.
|
||||||
|
- If a `Float` column contains only integers, it will be converted to
|
||||||
|
an Integer column.
|
||||||
|
- If a `Decimal` column contains only integers that could fit in a
|
||||||
|
64-bit integer storage, it will be converted to an Integer column.
|
||||||
|
- If `shrink_types` is `False` (default), no other transformations are
|
||||||
|
applied.
|
||||||
|
- However, if `shrink_types` is set to `True`, then:
|
||||||
|
- Integer columns will be assigned the smallest size that can fit all
|
||||||
|
values (down to 16-bit integers; converting to the `Byte` type has
|
||||||
|
to be done manually through `cast`).
|
||||||
|
- If all elements in a text column have the same length, the type
|
||||||
|
will become fixed length.
|
||||||
|
- Otherwise, if a text column is variable length, but all text
|
||||||
|
elements are no longer than 255 characters, the column will get a
|
||||||
|
max length of 255. Otherwise, the column size limit will stay
|
||||||
|
unchanged.
|
||||||
|
auto_value_type : Boolean -> Column
|
||||||
|
auto_value_type self shrink_types=False =
|
||||||
|
new_value_type = case shrink_types of
|
||||||
|
False -> self.inferred_precise_value_type
|
||||||
|
True ->
|
||||||
|
Storage.to_value_type self.java_column.getStorage.inferPreciseTypeShrunk
|
||||||
|
# We run with Report_Error because we do not expect any problems.
|
||||||
|
self.cast new_value_type on_problems=Problem_Behavior.Report_Error
|
||||||
|
|
||||||
## ALIAS transform column
|
## ALIAS transform column
|
||||||
|
|
||||||
Applies `function` to each item in this column and returns the column
|
Applies `function` to each item in this column and returns the column
|
||||||
|
@ -88,10 +88,20 @@ type Table
|
|||||||
Column.from_vector (v.at 0) (v.at 1) . java_column
|
Column.from_vector (v.at 0) (v.at 1) . java_column
|
||||||
Column.Value java_col -> java_col
|
Column.Value java_col -> java_col
|
||||||
_ -> invalid_input_shape
|
_ -> invalid_input_shape
|
||||||
if cols.is_empty then Error.throw (Illegal_Argument.Error "Cannot create a table with no columns.") else
|
Panic.recover Illegal_Argument <|
|
||||||
if (cols.all c-> c.getSize == cols.first.getSize).not then Error.throw (Illegal_Argument.Error "All columns must have the same row count.") else
|
if cols.is_empty then
|
||||||
if cols.distinct .getName . length != cols.length then Error.throw (Illegal_Argument.Error "Column names must be distinct.") else
|
Panic.throw (Illegal_Argument.Error "Cannot create a table with no columns.")
|
||||||
Table.Value (Java_Table.new cols)
|
|
||||||
|
if cols.distinct .getName . length != cols.length then
|
||||||
|
Panic.throw (Illegal_Argument.Error "Column names must be distinct.")
|
||||||
|
|
||||||
|
mismatched_size_column = cols.find if_missing=Nothing c->
|
||||||
|
c.getSize != cols.first.getSize
|
||||||
|
if mismatched_size_column.is_nothing.not then
|
||||||
|
msg = "All columns must have the same row count, but the column [" + mismatched_size_column.getName + "] has " + mismatched_size_column.getSize.to_text + " rows, while the column [" + cols.first.getName + "] has " + cols.first.getSize.to_text + " rows."
|
||||||
|
Panic.throw (Illegal_Argument.Error msg)
|
||||||
|
|
||||||
|
Table.Value (Java_Table.new cols)
|
||||||
|
|
||||||
## GROUP Standard.Base.Constants
|
## GROUP Standard.Base.Constants
|
||||||
Creates a new table from a vector of column names and a vector of vectors
|
Creates a new table from a vector of column names and a vector of vectors
|
||||||
@ -946,6 +956,9 @@ type Table
|
|||||||
Arguments:
|
Arguments:
|
||||||
- columns: The selection of columns to cast.
|
- columns: The selection of columns to cast.
|
||||||
- value_type: The `Value_Type` to cast the column to.
|
- value_type: The `Value_Type` to cast the column to.
|
||||||
|
- error_on_missing_columns: Specifies if a missing input column should
|
||||||
|
result in an error regardless of the `on_problems` settings. Defaults
|
||||||
|
to `True`.
|
||||||
- on_problems: Specifies how to handle problems if they occur, reporting
|
- on_problems: Specifies how to handle problems if they occur, reporting
|
||||||
them as warnings by default.
|
them as warnings by default.
|
||||||
|
|
||||||
@ -996,6 +1009,50 @@ type Table
|
|||||||
new_column = column_to_cast.cast value_type on_problems
|
new_column = column_to_cast.cast value_type on_problems
|
||||||
table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update
|
table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update
|
||||||
|
|
||||||
|
## Change the value type of table columns to a more specific one, based on
|
||||||
|
their contents.
|
||||||
|
|
||||||
|
This is most useful for `Mixed` type columns and will allow to narrow
|
||||||
|
down the type if all values in the column fit a more specific type.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- columns: The selection of columns to convert.
|
||||||
|
- shrink_types: If set `True`, smaller types will be chosen if possible,
|
||||||
|
according to the rules below. Defaults to `False`.
|
||||||
|
- error_on_missing_columns: Specifies if a missing input column should
|
||||||
|
result in an error regardless of the `on_problems` settings. Defaults
|
||||||
|
to `True`.
|
||||||
|
- on_problems: Specifies how to handle problems if they occur, reporting
|
||||||
|
them as warnings by default.
|
||||||
|
|
||||||
|
? Auto Type Selection Rules
|
||||||
|
|
||||||
|
- If a `Mixed` column can be assigned a single type, like `Char` or
|
||||||
|
`Integer`, that will be used.
|
||||||
|
- Text columns are not parsed. To do that, use the `parse` method.
|
||||||
|
- If a `Float` column contains only integers, it will be converted to
|
||||||
|
an Integer column.
|
||||||
|
- If a `Decimal` column contains only integers that could fit in a
|
||||||
|
64-bit integer storage, it will be converted to an Integer column.
|
||||||
|
- If `shrink_types` is `False` (default), no other transformations are
|
||||||
|
applied.
|
||||||
|
- However, if `shrink_types` is set to `True`, then:
|
||||||
|
- Integer columns will be assigned the smallest size that can fit all
|
||||||
|
values (down to 16-bit integers; converting to the `Byte` type has
|
||||||
|
to be done manually through `cast`).
|
||||||
|
- If all elements in a text column have the same length, the type
|
||||||
|
will become fixed length.
|
||||||
|
- Otherwise, if a text column is variable length, but all text
|
||||||
|
elements are no longer than 255 characters, the column will get a
|
||||||
|
max length of 255. Otherwise, the column size limit will stay
|
||||||
|
unchanged.
|
||||||
|
auto_value_types : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Boolean -> Boolean -> Problem_Behavior -> Table
|
||||||
|
auto_value_types self columns=self.column_names shrink_types=False error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning =
|
||||||
|
selected = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems error_on_empty=False
|
||||||
|
selected.fold self table-> column_to_cast->
|
||||||
|
new_column = column_to_cast.auto_value_type shrink_types
|
||||||
|
table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update
|
||||||
|
|
||||||
## GROUP Standard.Base.Conversions
|
## GROUP Standard.Base.Conversions
|
||||||
Splits a column of text into a set of new columns.
|
Splits a column of text into a set of new columns.
|
||||||
The original column will be removed from the table.
|
The original column will be removed from the table.
|
||||||
|
@ -18,7 +18,7 @@ polyglot java import org.enso.table.data.column.storage.type.IntegerType
|
|||||||
most_specific_value_type : Any -> Boolean -> Value_Type
|
most_specific_value_type : Any -> Boolean -> Value_Type
|
||||||
most_specific_value_type value use_smallest=False =
|
most_specific_value_type value use_smallest=False =
|
||||||
case value of
|
case value of
|
||||||
_ : Float -> Value_Type.Float Bits.Bits_64
|
_ : Float -> Value_Type.Float Bits.Bits_64
|
||||||
_ : Boolean -> Value_Type.Boolean
|
_ : Boolean -> Value_Type.Boolean
|
||||||
_ : Date -> Value_Type.Date
|
_ : Date -> Value_Type.Date
|
||||||
_ : Time_Of_Day -> Value_Type.Time
|
_ : Time_Of_Day -> Value_Type.Time
|
||||||
@ -33,9 +33,12 @@ most_specific_value_type value use_smallest=False =
|
|||||||
# We do a small rewrite here - for integers we always return the Integer type, even if the value is small enough to fit in a Byte.
|
# We do a small rewrite here - for integers we always return the Integer type, even if the value is small enough to fit in a Byte.
|
||||||
if value_type == Value_Type.Byte then Value_Type.Integer Bits.Bits_16 else value_type
|
if value_type == Value_Type.Byte then Value_Type.Integer Bits.Bits_16 else value_type
|
||||||
True -> Value_Type.Decimal precision=Nothing scale=0
|
True -> Value_Type.Decimal precision=Nothing scale=0
|
||||||
text : Text -> case use_smallest of
|
text : Text ->
|
||||||
False -> Value_Type.Char size=Nothing variable_length=True
|
length = text.length
|
||||||
True -> Value_Type.Char size=text.length variable_length=False
|
# Not using Char size=0 for empty strings, because that would be an invalid value.
|
||||||
|
case use_smallest && length > 0 of
|
||||||
|
True -> Value_Type.Char size=text.length variable_length=False
|
||||||
|
False -> Value_Type.Char size=Nothing variable_length=True
|
||||||
## TODO [RW] once we add Enso Native Object Type Value Type, we probably
|
## TODO [RW] once we add Enso Native Object Type Value Type, we probably
|
||||||
want to prefer it over Mixed
|
want to prefer it over Mixed
|
||||||
_ -> Value_Type.Mixed
|
_ -> Value_Type.Mixed
|
||||||
|
@ -54,7 +54,7 @@ closest_storage_type value_type = case value_type of
|
|||||||
Error.throw (Illegal_Argument.Error "Value_Type.Char with fixed length must have a non-nothing size")
|
Error.throw (Illegal_Argument.Error "Value_Type.Char with fixed length must have a non-nothing size")
|
||||||
Value_Type.Char max_length variable_length ->
|
Value_Type.Char max_length variable_length ->
|
||||||
fixed_length = variable_length.not
|
fixed_length = variable_length.not
|
||||||
TextType.new max_length fixed_length
|
TextType.new (max_length : Integer) fixed_length
|
||||||
Value_Type.Date -> DateType.INSTANCE
|
Value_Type.Date -> DateType.INSTANCE
|
||||||
# We currently will not support storing dates without timezones in in-memory mode.
|
# We currently will not support storing dates without timezones in in-memory mode.
|
||||||
Value_Type.Date_Time _ -> DateTimeType.INSTANCE
|
Value_Type.Date_Time _ -> DateTimeType.INSTANCE
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from Standard.Base import all
|
from Standard.Base import all
|
||||||
|
import Standard.Base.Data.Numbers.Positive_Integer
|
||||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||||
|
|
||||||
import project.Data.Type.Value_Type_Helpers
|
import project.Data.Type.Value_Type_Helpers
|
||||||
@ -95,12 +96,22 @@ type Value_Type
|
|||||||
|
|
||||||
ANSI SQL: CHAR, VARCHAR, TEXT, LONGVARCHAR, NCHAR, NVARCHAR, TEXT, CLOB, NCLOB
|
ANSI SQL: CHAR, VARCHAR, TEXT, LONGVARCHAR, NCHAR, NVARCHAR, TEXT, CLOB, NCLOB
|
||||||
|
|
||||||
|
! Counting Characters
|
||||||
|
|
||||||
|
Note that different backends may count the text in different ways.
|
||||||
|
The in-memory backend treats a single grapheme cluster (e.g. 💡) as a
|
||||||
|
single character unit. In most database systems more complex grapheme
|
||||||
|
clusters may be counted as multiple characters. So there isn't a 1-1
|
||||||
|
correspondence between these limits across backends which may cause
|
||||||
|
strings to be truncated if they contain such characters and are close
|
||||||
|
to the limit.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
- size: the maximum number of characters that can be stored in the
|
- size: the maximum number of characters that can be stored in the
|
||||||
column. It can be nothing to indicate no limit.
|
column. It can be nothing to indicate no limit. It cannot be 0.
|
||||||
- variable_length: whether the size is a maximum or a fixed length.
|
- variable_length: whether the size is a maximum or a fixed length.
|
||||||
A fixed length string must have a non-nothing size.
|
A fixed length string must have a non-nothing size.
|
||||||
Char size:(Integer|Nothing)=Nothing variable_length:Boolean=True
|
Char (size : (Positive_Integer | Nothing) = Nothing) variable_length:Boolean=True
|
||||||
|
|
||||||
## Date
|
## Date
|
||||||
|
|
||||||
@ -383,15 +394,23 @@ type Value_Type
|
|||||||
Value_Type.Integer size -> "Integer (" + size.to_text + ")"
|
Value_Type.Integer size -> "Integer (" + size.to_text + ")"
|
||||||
Value_Type.Float size -> "Float (" + size.to_text + ")"
|
Value_Type.Float size -> "Float (" + size.to_text + ")"
|
||||||
Value_Type.Decimal precision scale -> "Decimal (precision=" + precision.to_text + ", scale=" + scale.to_text + ")"
|
Value_Type.Decimal precision scale -> "Decimal (precision=" + precision.to_text + ", scale=" + scale.to_text + ")"
|
||||||
Value_Type.Char size variable_length -> case variable_length of
|
Value_Type.Char size variable_length ->
|
||||||
True -> "Char (variable length, max_size=" + size.to_text + ")"
|
size_text = case size of
|
||||||
False -> "Char (fixed length, size=" + size.to_text + ")"
|
Nothing -> "unlimited"
|
||||||
|
_ -> size.to Integer . to_text
|
||||||
|
case variable_length of
|
||||||
|
True -> "Char (variable length, max_size=" + size_text + ")"
|
||||||
|
False -> "Char (fixed length, size=" + size_text + ")"
|
||||||
Value_Type.Date -> "Date"
|
Value_Type.Date -> "Date"
|
||||||
Value_Type.Date_Time with_timezone -> "Date_Time (with_timezone=" + with_timezone.to_text + ")"
|
Value_Type.Date_Time with_timezone -> "Date_Time (with_timezone=" + with_timezone.to_text + ")"
|
||||||
Value_Type.Time -> "Time"
|
Value_Type.Time -> "Time"
|
||||||
Value_Type.Binary size variable_length -> case variable_length of
|
Value_Type.Binary size variable_length ->
|
||||||
True -> "Binary (variable length, max_size=" + size.to_text + " bytes)"
|
size_text = case size of
|
||||||
False -> "Binary (fixed length, size=" + size.to_text + " bytes)"
|
Nothing -> "unlimited"
|
||||||
|
_ -> size.to Integer . to_text + " bytes"
|
||||||
|
case variable_length of
|
||||||
|
True -> "Binary (variable length, max_size=" + size_text + ")"
|
||||||
|
False -> "Binary (fixed length, size=" + size_text + ")"
|
||||||
Value_Type.Unsupported_Data_Type type_name _ -> case type_name of
|
Value_Type.Unsupported_Data_Type type_name _ -> case type_name of
|
||||||
Nothing -> "Unsupported_Data_Type"
|
Nothing -> "Unsupported_Data_Type"
|
||||||
_ : Text -> "Unsupported_Data_Type (" + type_name + ")"
|
_ : Text -> "Unsupported_Data_Type (" + type_name + ")"
|
||||||
|
@ -10,6 +10,7 @@ import org.enso.table.data.column.storage.type.BigIntegerType;
|
|||||||
import org.enso.table.data.column.storage.type.FloatType;
|
import org.enso.table.data.column.storage.type.FloatType;
|
||||||
import org.enso.table.data.column.storage.type.StorageType;
|
import org.enso.table.data.column.storage.type.StorageType;
|
||||||
import org.enso.table.error.ValueTypeMismatchException;
|
import org.enso.table.error.ValueTypeMismatchException;
|
||||||
|
import org.graalvm.polyglot.Context;
|
||||||
|
|
||||||
// For now the BigInteger builder is just a stub, reusing the ObjectBuilder and adding a warning.
|
// For now the BigInteger builder is just a stub, reusing the ObjectBuilder and adding a warning.
|
||||||
public class BigIntegerBuilder extends TypedBuilderImpl<BigInteger> {
|
public class BigIntegerBuilder extends TypedBuilderImpl<BigInteger> {
|
||||||
@ -88,10 +89,12 @@ public class BigIntegerBuilder extends TypedBuilderImpl<BigInteger> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static BigIntegerBuilder retypeFromLongBuilder(LongBuilder longBuilder) {
|
public static BigIntegerBuilder retypeFromLongBuilder(LongBuilder longBuilder) {
|
||||||
|
BigIntegerBuilder res = new BigIntegerBuilder(longBuilder.data.length);
|
||||||
int n = longBuilder.currentSize;
|
int n = longBuilder.currentSize;
|
||||||
BigIntegerBuilder res = new BigIntegerBuilder(n);
|
Context context = Context.getCurrent();
|
||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) {
|
||||||
res.appendNoGrow(BigInteger.valueOf(longBuilder.data[i]));
|
res.appendNoGrow(BigInteger.valueOf(longBuilder.data[i]));
|
||||||
|
context.safepoint();
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
@ -26,10 +26,10 @@ public class LongBuilderChecked extends LongBuilder {
|
|||||||
if (o == null) {
|
if (o == null) {
|
||||||
isMissing.set(currentSize++);
|
isMissing.set(currentSize++);
|
||||||
} else {
|
} else {
|
||||||
try {
|
Long x = NumericConverter.tryConvertingToLong(o);
|
||||||
long x = NumericConverter.coerceToLong(o);
|
if (x != null) {
|
||||||
appendLongNoGrow(x);
|
appendLongNoGrow(x);
|
||||||
} catch (UnsupportedOperationException e) {
|
} else {
|
||||||
throw new ValueTypeMismatchException(type, o);
|
throw new ValueTypeMismatchException(type, o);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -19,10 +19,10 @@ public class LongBuilderUnchecked extends LongBuilder {
|
|||||||
if (o == null) {
|
if (o == null) {
|
||||||
isMissing.set(currentSize++);
|
isMissing.set(currentSize++);
|
||||||
} else {
|
} else {
|
||||||
try {
|
Long x = NumericConverter.tryConvertingToLong(o);
|
||||||
long x = NumericConverter.coerceToLong(o);
|
if (x != null) {
|
||||||
data[currentSize++] = x;
|
appendLongNoGrow(x);
|
||||||
} catch (UnsupportedOperationException e) {
|
} else {
|
||||||
throw new ValueTypeMismatchException(getType(), o);
|
throw new ValueTypeMismatchException(getType(), o);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -29,8 +29,8 @@ public class ToTextStorageConverter implements StorageConverter<String> {
|
|||||||
|
|
||||||
public Storage<String> cast(Storage<?> storage, CastProblemBuilder problemBuilder) {
|
public Storage<String> cast(Storage<?> storage, CastProblemBuilder problemBuilder) {
|
||||||
if (storage instanceof StringStorage stringStorage) {
|
if (storage instanceof StringStorage stringStorage) {
|
||||||
if (stringStorage.getType().equals(targetType)) {
|
if (canAvoidCopying(stringStorage)) {
|
||||||
return stringStorage;
|
return retypeStringStorage(stringStorage);
|
||||||
} else {
|
} else {
|
||||||
return adaptStringStorage(stringStorage, problemBuilder);
|
return adaptStringStorage(stringStorage, problemBuilder);
|
||||||
}
|
}
|
||||||
@ -150,7 +150,8 @@ public class ToTextStorageConverter implements StorageConverter<String> {
|
|||||||
return builder.seal();
|
return builder.seal();
|
||||||
}
|
}
|
||||||
|
|
||||||
private <T> Storage<String> castDateTimeStorage(Storage<T> storage, Function<T, String> converter, CastProblemBuilder problemBuilder) {
|
private <T> Storage<String> castDateTimeStorage(Storage<T> storage, Function<T, String> converter,
|
||||||
|
CastProblemBuilder problemBuilder) {
|
||||||
Context context = Context.getCurrent();
|
Context context = Context.getCurrent();
|
||||||
StringBuilder builder = new StringBuilder(storage.size(), targetType);
|
StringBuilder builder = new StringBuilder(storage.size(), targetType);
|
||||||
for (int i = 0; i < storage.size(); i++) {
|
for (int i = 0; i < storage.size(); i++) {
|
||||||
@ -204,4 +205,43 @@ public class ToTextStorageConverter implements StorageConverter<String> {
|
|||||||
problemBuilder.aggregateOtherProblems(builder.getProblems());
|
problemBuilder.aggregateOtherProblems(builder.getProblems());
|
||||||
return builder.seal();
|
return builder.seal();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean canAvoidCopying(StringStorage stringStorage) {
|
||||||
|
if (targetType.fitsExactly(stringStorage.getType())) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
long maxLength = Long.MIN_VALUE;
|
||||||
|
long minLength = Long.MAX_VALUE;
|
||||||
|
for (int i = 0; i < stringStorage.size(); i++) {
|
||||||
|
String value = stringStorage.getItem(i);
|
||||||
|
if (value == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
long length = value.length();
|
||||||
|
if (length > maxLength) {
|
||||||
|
maxLength = length;
|
||||||
|
}
|
||||||
|
if (length < minLength) {
|
||||||
|
minLength = length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (targetType.fixedLength()) {
|
||||||
|
boolean effectivelyFixedLength = minLength == maxLength;
|
||||||
|
return effectivelyFixedLength && targetType.maxLength() == maxLength;
|
||||||
|
} else {
|
||||||
|
return targetType.maxLength() == -1 || maxLength <= targetType.maxLength();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new storage re-using the existing array.
|
||||||
|
* <p>
|
||||||
|
* This can only be done if the values do not need any adaptations, checked by {@code canAvoidCopying}.
|
||||||
|
*/
|
||||||
|
private Storage<String> retypeStringStorage(StringStorage stringStorage) {
|
||||||
|
return new StringStorage(stringStorage.getData(), stringStorage.size(), targetType);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -23,7 +23,7 @@ public abstract class StringStringOp extends BinaryMapOperation<String, Speciali
|
|||||||
public Storage<?> runBinaryMap(SpecializedStorage<String> storage, Object arg, MapOperationProblemBuilder problemBuilder) {
|
public Storage<?> runBinaryMap(SpecializedStorage<String> storage, Object arg, MapOperationProblemBuilder problemBuilder) {
|
||||||
int size = storage.size();
|
int size = storage.size();
|
||||||
if (arg == null) {
|
if (arg == null) {
|
||||||
StringBuilder builder = new StringBuilder(size, TextType.variableLengthWithLimit(0));
|
StringBuilder builder = new StringBuilder(size, TextType.VARIABLE_LENGTH);
|
||||||
builder.appendNulls(size);
|
builder.appendNulls(size);
|
||||||
return builder.seal();
|
return builder.seal();
|
||||||
} else if (arg instanceof String argString) {
|
} else if (arg instanceof String argString) {
|
||||||
|
@ -105,6 +105,20 @@ public final class MixedStorage extends ObjectStorage {
|
|||||||
return inferredType;
|
return inferredType;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StorageType inferPreciseTypeShrunk() {
|
||||||
|
Storage<?> specialized = getInferredStorage();
|
||||||
|
if (specialized == null) {
|
||||||
|
// If no specialized type is available, it means that:
|
||||||
|
assert inferredType instanceof AnyObjectType;
|
||||||
|
return AnyObjectType.INSTANCE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we are able to get a more specialized storage for more specific type - we delegate to its
|
||||||
|
// own shrinking logic.
|
||||||
|
return specialized.inferPreciseTypeShrunk();
|
||||||
|
}
|
||||||
|
|
||||||
private Storage<?> getInferredStorage() {
|
private Storage<?> getInferredStorage() {
|
||||||
if (!hasSpecializedStorageBeenInferred) {
|
if (!hasSpecializedStorageBeenInferred) {
|
||||||
StorageType inferredType = inferPreciseType();
|
StorageType inferredType = inferPreciseType();
|
||||||
|
@ -43,6 +43,11 @@ public class MixedStorageFacade extends Storage<Object> {
|
|||||||
return underlyingStorage.inferPreciseType();
|
return underlyingStorage.inferPreciseType();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StorageType inferPreciseTypeShrunk() {
|
||||||
|
return underlyingStorage.inferPreciseTypeShrunk();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isNa(long idx) {
|
public boolean isNa(long idx) {
|
||||||
return underlyingStorage.isNa(idx);
|
return underlyingStorage.isNa(idx);
|
||||||
|
@ -39,6 +39,19 @@ public abstract class Storage<T> {
|
|||||||
return getType();
|
return getType();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the smallest type (according to Column.auto_value_type rules) that may still fit all
|
||||||
|
* values in this column.
|
||||||
|
*
|
||||||
|
* <p>It is a sibling of `inferPreciseType` that allows some further shrinking. It is kept
|
||||||
|
* separate, because `inferPreciseType` should be quick to compute (cached if needed) as it is
|
||||||
|
* used in typechecking of lots of operations. This one however, is only used in a specific
|
||||||
|
* `auto_value_type` use-case and rarely will need to be computed more than once.
|
||||||
|
*/
|
||||||
|
public StorageType inferPreciseTypeShrunk() {
|
||||||
|
return getType();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a more specialized storage, if available.
|
* Returns a more specialized storage, if available.
|
||||||
*
|
*
|
||||||
|
@ -3,9 +3,9 @@ package org.enso.table.data.column.storage;
|
|||||||
import org.enso.base.Text_Utils;
|
import org.enso.base.Text_Utils;
|
||||||
import org.enso.table.data.column.builder.Builder;
|
import org.enso.table.data.column.builder.Builder;
|
||||||
import org.enso.table.data.column.builder.StringBuilder;
|
import org.enso.table.data.column.builder.StringBuilder;
|
||||||
import org.enso.table.data.column.operation.map.MapOperationStorage;
|
|
||||||
import org.enso.table.data.column.operation.map.BinaryMapOperation;
|
import org.enso.table.data.column.operation.map.BinaryMapOperation;
|
||||||
import org.enso.table.data.column.operation.map.MapOperationProblemBuilder;
|
import org.enso.table.data.column.operation.map.MapOperationProblemBuilder;
|
||||||
|
import org.enso.table.data.column.operation.map.MapOperationStorage;
|
||||||
import org.enso.table.data.column.operation.map.UnaryMapOperation;
|
import org.enso.table.data.column.operation.map.UnaryMapOperation;
|
||||||
import org.enso.table.data.column.operation.map.text.LikeOp;
|
import org.enso.table.data.column.operation.map.text.LikeOp;
|
||||||
import org.enso.table.data.column.operation.map.text.StringBooleanOp;
|
import org.enso.table.data.column.operation.map.text.StringBooleanOp;
|
||||||
@ -19,10 +19,13 @@ import org.graalvm.polyglot.Value;
|
|||||||
|
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
|
|
||||||
/** A column storing strings. */
|
/**
|
||||||
|
* A column storing strings.
|
||||||
|
*/
|
||||||
public final class StringStorage extends SpecializedStorage<String> {
|
public final class StringStorage extends SpecializedStorage<String> {
|
||||||
|
|
||||||
private final TextType type;
|
private final TextType type;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param data the underlying data
|
* @param data the underlying data
|
||||||
* @param size the number of items stored
|
* @param size the number of items stored
|
||||||
@ -111,7 +114,8 @@ public final class StringStorage extends SpecializedStorage<String> {
|
|||||||
t.add(
|
t.add(
|
||||||
new UnaryMapOperation<>(Maps.IS_EMPTY) {
|
new UnaryMapOperation<>(Maps.IS_EMPTY) {
|
||||||
@Override
|
@Override
|
||||||
protected BoolStorage runUnaryMap(SpecializedStorage<String> storage, MapOperationProblemBuilder problemBuilder) {
|
protected BoolStorage runUnaryMap(SpecializedStorage<String> storage,
|
||||||
|
MapOperationProblemBuilder problemBuilder) {
|
||||||
BitSet r = new BitSet();
|
BitSet r = new BitSet();
|
||||||
Context context = Context.getCurrent();
|
Context context = Context.getCurrent();
|
||||||
for (int i = 0; i < storage.size; i++) {
|
for (int i = 0; i < storage.size; i++) {
|
||||||
@ -162,4 +166,40 @@ public final class StringStorage extends SpecializedStorage<String> {
|
|||||||
});
|
});
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StorageType inferPreciseTypeShrunk() {
|
||||||
|
if (type.fixedLength()) {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
long minLength = Long.MAX_VALUE;
|
||||||
|
long maxLength = Long.MIN_VALUE;
|
||||||
|
for (int i = 0; i < size(); i++) {
|
||||||
|
String s = getItem(i);
|
||||||
|
if (s != null) {
|
||||||
|
long length = Text_Utils.grapheme_length(s);
|
||||||
|
minLength = Math.min(minLength, length);
|
||||||
|
maxLength = Math.max(maxLength, length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// maxLength will be <0 if all values were null and will be ==0 if all values were empty strings.
|
||||||
|
// In both of these cases, we avoid shrinking the type and return the original type instead.
|
||||||
|
if (maxLength <= 0) {
|
||||||
|
return getType();
|
||||||
|
}
|
||||||
|
|
||||||
|
final long SHORT_LENGTH_THRESHOLD = 255;
|
||||||
|
if (minLength == maxLength) {
|
||||||
|
return TextType.fixedLength(minLength);
|
||||||
|
} else if (maxLength <= SHORT_LENGTH_THRESHOLD && (type.maxLength() < 0 || SHORT_LENGTH_THRESHOLD < type.maxLength())) {
|
||||||
|
// If the string was unbounded or the bound was larger than 255, we shrink it to 255.
|
||||||
|
return TextType.variableLengthWithLimit(SHORT_LENGTH_THRESHOLD);
|
||||||
|
} else {
|
||||||
|
// Otherwise, we return the original type (because it was either smaller than the proposed 255 bound, or the
|
||||||
|
// existing elements to do not fit into the 255 bound).
|
||||||
|
return getType();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -23,6 +23,8 @@ import org.enso.table.data.column.operation.map.numeric.isin.LongIsInOp;
|
|||||||
import org.enso.table.data.column.storage.BoolStorage;
|
import org.enso.table.data.column.storage.BoolStorage;
|
||||||
import org.enso.table.data.column.storage.Storage;
|
import org.enso.table.data.column.storage.Storage;
|
||||||
import org.enso.table.data.column.storage.type.IntegerType;
|
import org.enso.table.data.column.storage.type.IntegerType;
|
||||||
|
import org.enso.table.data.column.storage.type.StorageType;
|
||||||
|
import org.graalvm.polyglot.Context;
|
||||||
|
|
||||||
public abstract class AbstractLongStorage extends NumericStorage<Long> {
|
public abstract class AbstractLongStorage extends NumericStorage<Long> {
|
||||||
public abstract long getItem(int idx);
|
public abstract long getItem(int idx);
|
||||||
@ -77,6 +79,46 @@ public abstract class AbstractLongStorage extends NumericStorage<Long> {
|
|||||||
@Override
|
@Override
|
||||||
public abstract IntegerType getType();
|
public abstract IntegerType getType();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StorageType inferPreciseType() {
|
||||||
|
return getType();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StorageType inferPreciseTypeShrunk() {
|
||||||
|
// If the type is already smallest possible, we return it unchanged (we will return 8-bit
|
||||||
|
// columns as-is, although
|
||||||
|
// we will not shrink 16-bit columns to 8-bits even if it were possible).
|
||||||
|
if (getType().bits().toInteger() <= 16) {
|
||||||
|
return getType();
|
||||||
|
}
|
||||||
|
|
||||||
|
IntegerType[] possibleTypes =
|
||||||
|
new IntegerType[] {IntegerType.INT_16, IntegerType.INT_32, IntegerType.INT_64};
|
||||||
|
|
||||||
|
int currentTypeIdx = 0;
|
||||||
|
int n = size();
|
||||||
|
Context context = Context.getCurrent();
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
if (isNa(i)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
long item = getItem(i);
|
||||||
|
while (!possibleTypes[currentTypeIdx].fits(item)) {
|
||||||
|
currentTypeIdx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentTypeIdx >= possibleTypes.length - 1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
context.safepoint();
|
||||||
|
}
|
||||||
|
|
||||||
|
return possibleTypes[currentTypeIdx];
|
||||||
|
}
|
||||||
|
|
||||||
private static MapOperationStorage<Long, AbstractLongStorage> buildOps() {
|
private static MapOperationStorage<Long, AbstractLongStorage> buildOps() {
|
||||||
MapOperationStorage<Long, AbstractLongStorage> ops = new MapOperationStorage<>();
|
MapOperationStorage<Long, AbstractLongStorage> ops = new MapOperationStorage<>();
|
||||||
ops.add(new AddOp<>())
|
ops.add(new AddOp<>())
|
||||||
|
@ -20,6 +20,7 @@ import org.enso.table.data.column.operation.map.numeric.isin.BigIntegerIsInOp;
|
|||||||
import org.enso.table.data.column.storage.ObjectStorage;
|
import org.enso.table.data.column.storage.ObjectStorage;
|
||||||
import org.enso.table.data.column.storage.SpecializedStorage;
|
import org.enso.table.data.column.storage.SpecializedStorage;
|
||||||
import org.enso.table.data.column.storage.type.BigIntegerType;
|
import org.enso.table.data.column.storage.type.BigIntegerType;
|
||||||
|
import org.enso.table.data.column.storage.type.IntegerType;
|
||||||
import org.enso.table.data.column.storage.type.StorageType;
|
import org.enso.table.data.column.storage.type.StorageType;
|
||||||
|
|
||||||
public class BigIntegerStorage extends SpecializedStorage<BigInteger> {
|
public class BigIntegerStorage extends SpecializedStorage<BigInteger> {
|
||||||
@ -59,7 +60,7 @@ public class BigIntegerStorage extends SpecializedStorage<BigInteger> {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected BigInteger[] newUnderlyingArray(int size) {
|
protected BigInteger[] newUnderlyingArray(int size) {
|
||||||
return new BigInteger[0];
|
return new BigInteger[size];
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -96,4 +97,67 @@ public class BigIntegerStorage extends SpecializedStorage<BigInteger> {
|
|||||||
|
|
||||||
return cachedMaxPrecisionStored;
|
return cachedMaxPrecisionStored;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private StorageType inferredType = null;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StorageType inferPreciseType() {
|
||||||
|
if (inferredType == null) {
|
||||||
|
boolean allFitInLong = true;
|
||||||
|
int visitedCount = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
BigInteger value = data[i];
|
||||||
|
if (value == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
visitedCount++;
|
||||||
|
boolean fitsInLong = IntegerType.INT_64.fits(value);
|
||||||
|
if (!fitsInLong) {
|
||||||
|
allFitInLong = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inferredType =
|
||||||
|
(allFitInLong && visitedCount > 0) ? IntegerType.INT_64 : BigIntegerType.INSTANCE;
|
||||||
|
}
|
||||||
|
|
||||||
|
return inferredType;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StorageType inferPreciseTypeShrunk() {
|
||||||
|
StorageType preciseType = inferPreciseType();
|
||||||
|
if (preciseType instanceof IntegerType) {
|
||||||
|
return findSmallestIntegerTypeThatFits();
|
||||||
|
}
|
||||||
|
|
||||||
|
return preciseType;
|
||||||
|
}
|
||||||
|
|
||||||
|
private StorageType findSmallestIntegerTypeThatFits() {
|
||||||
|
// This method assumes that all values _do_ fit in some integer type.
|
||||||
|
assert inferredType instanceof IntegerType;
|
||||||
|
|
||||||
|
final BigIntegerStorage parent = this;
|
||||||
|
|
||||||
|
// We create a Long storage that gets values by converting our storage.
|
||||||
|
ComputedNullableLongStorage longAdapter =
|
||||||
|
new ComputedNullableLongStorage(size) {
|
||||||
|
@Override
|
||||||
|
protected Long computeItem(int idx) {
|
||||||
|
BigInteger bigInteger = parent.getItem(idx);
|
||||||
|
if (bigInteger == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return bigInteger.longValueExact();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// And rely on its shrinking logic.
|
||||||
|
return longAdapter.inferPreciseTypeShrunk();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,197 @@
|
|||||||
|
package org.enso.table.data.column.storage.numeric;
|
||||||
|
|
||||||
|
import java.util.BitSet;
|
||||||
|
import java.util.List;
|
||||||
|
import org.enso.table.data.column.storage.Storage;
|
||||||
|
import org.enso.table.data.column.storage.type.IntegerType;
|
||||||
|
import org.enso.table.data.index.Index;
|
||||||
|
import org.enso.table.data.mask.OrderMask;
|
||||||
|
import org.enso.table.data.mask.SliceRange;
|
||||||
|
import org.graalvm.polyglot.Context;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements a storage that computes the ith stored value using some function.
|
||||||
|
*
|
||||||
|
* <p>This storage allows for missing values. Prefer {@link ComputedLongStorage} for non-nullable
|
||||||
|
* case.
|
||||||
|
*/
|
||||||
|
public abstract class ComputedNullableLongStorage extends AbstractLongStorage {
|
||||||
|
protected final int size;
|
||||||
|
|
||||||
|
protected abstract Long computeItem(int idx);
|
||||||
|
|
||||||
|
protected ComputedNullableLongStorage(int size) {
|
||||||
|
this.size = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size() {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int countMissing() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntegerType getType() {
|
||||||
|
return IntegerType.INT_64;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isNa(long idx) {
|
||||||
|
if (idx < 0 || idx >= size) {
|
||||||
|
throw new IndexOutOfBoundsException(
|
||||||
|
"Index " + idx + " is out of bounds for range of length " + size + ".");
|
||||||
|
}
|
||||||
|
|
||||||
|
return computeItem((int) idx) == null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Long getItemBoxed(int idx) {
|
||||||
|
if (idx < 0 || idx >= size) {
|
||||||
|
throw new IndexOutOfBoundsException(
|
||||||
|
"Index " + idx + " is out of bounds for range of length " + size + ".");
|
||||||
|
}
|
||||||
|
|
||||||
|
return computeItem(idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getItem(int idx) {
|
||||||
|
return getItemBoxed(idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BitSet getIsMissing() {
|
||||||
|
BitSet missing = new BitSet();
|
||||||
|
Context context = Context.getCurrent();
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
if (computeItem(i) == null) {
|
||||||
|
missing.set(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
context.safepoint();
|
||||||
|
}
|
||||||
|
return missing;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Storage<Long> mask(BitSet mask, int cardinality) {
|
||||||
|
BitSet newMissing = new BitSet();
|
||||||
|
long[] newData = new long[cardinality];
|
||||||
|
int resIx = 0;
|
||||||
|
Context context = Context.getCurrent();
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
if (mask.get(i)) {
|
||||||
|
Long item = computeItem(i);
|
||||||
|
if (item == null) {
|
||||||
|
newMissing.set(resIx++);
|
||||||
|
} else {
|
||||||
|
newData[resIx++] = item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context.safepoint();
|
||||||
|
}
|
||||||
|
return new LongStorage(newData, cardinality, newMissing, getType());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Storage<Long> applyMask(OrderMask mask) {
|
||||||
|
int[] positions = mask.getPositions();
|
||||||
|
long[] newData = new long[positions.length];
|
||||||
|
BitSet newMissing = new BitSet();
|
||||||
|
Context context = Context.getCurrent();
|
||||||
|
for (int i = 0; i < positions.length; i++) {
|
||||||
|
if (positions[i] == Index.NOT_FOUND) {
|
||||||
|
newMissing.set(i);
|
||||||
|
} else {
|
||||||
|
Long item = computeItem(positions[i]);
|
||||||
|
if (item == null) {
|
||||||
|
newMissing.set(i);
|
||||||
|
} else {
|
||||||
|
newData[i] = item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context.safepoint();
|
||||||
|
}
|
||||||
|
return new LongStorage(newData, positions.length, newMissing, getType());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Storage<Long> countMask(int[] counts, int total) {
|
||||||
|
long[] newData = new long[total];
|
||||||
|
BitSet newMissing = new BitSet();
|
||||||
|
int pos = 0;
|
||||||
|
Context context = Context.getCurrent();
|
||||||
|
for (int i = 0; i < counts.length; i++) {
|
||||||
|
Long item = computeItem(i);
|
||||||
|
if (item == null) {
|
||||||
|
newMissing.set(pos, pos + counts[i]);
|
||||||
|
pos += counts[i];
|
||||||
|
} else {
|
||||||
|
long nonNullItem = item;
|
||||||
|
for (int j = 0; j < counts[i]; j++) {
|
||||||
|
newData[pos++] = nonNullItem;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context.safepoint();
|
||||||
|
}
|
||||||
|
return new LongStorage(newData, total, newMissing, getType());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Storage<Long> slice(int offset, int limit) {
|
||||||
|
int newSize = Math.min(size - offset, limit);
|
||||||
|
long[] newData = new long[newSize];
|
||||||
|
BitSet newMissing = new BitSet();
|
||||||
|
Context context = Context.getCurrent();
|
||||||
|
for (int i = 0; i < newSize; i++) {
|
||||||
|
Long item = computeItem(offset + i);
|
||||||
|
if (item == null) {
|
||||||
|
newMissing.set(i);
|
||||||
|
} else {
|
||||||
|
newData[i] = item;
|
||||||
|
}
|
||||||
|
context.safepoint();
|
||||||
|
}
|
||||||
|
return new LongStorage(newData, newSize, newMissing, getType());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Storage<Long> slice(List<SliceRange> ranges) {
|
||||||
|
int newSize = SliceRange.totalLength(ranges);
|
||||||
|
long[] newData = new long[newSize];
|
||||||
|
BitSet newMissing = new BitSet(newSize);
|
||||||
|
int offset = 0;
|
||||||
|
Context context = Context.getCurrent();
|
||||||
|
for (SliceRange range : ranges) {
|
||||||
|
int rangeStart = range.start();
|
||||||
|
int length = range.end() - rangeStart;
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
Long item = computeItem(rangeStart + i);
|
||||||
|
if (item == null) {
|
||||||
|
newMissing.set(offset + i);
|
||||||
|
} else {
|
||||||
|
newData[offset + i] = item;
|
||||||
|
}
|
||||||
|
context.safepoint();
|
||||||
|
}
|
||||||
|
offset += length;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new LongStorage(newData, newSize, newMissing, getType());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AbstractLongStorage widen(IntegerType widerType) {
|
||||||
|
// Currently the implementation only reports 64-bit type so there is no widening to do - we can
|
||||||
|
// just return self.
|
||||||
|
assert getType().equals(IntegerType.INT_64);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}
|
@ -26,6 +26,7 @@ import org.enso.table.data.column.operation.map.numeric.isin.DoubleIsInOp;
|
|||||||
import org.enso.table.data.column.storage.BoolStorage;
|
import org.enso.table.data.column.storage.BoolStorage;
|
||||||
import org.enso.table.data.column.storage.Storage;
|
import org.enso.table.data.column.storage.Storage;
|
||||||
import org.enso.table.data.column.storage.type.FloatType;
|
import org.enso.table.data.column.storage.type.FloatType;
|
||||||
|
import org.enso.table.data.column.storage.type.IntegerType;
|
||||||
import org.enso.table.data.column.storage.type.StorageType;
|
import org.enso.table.data.column.storage.type.StorageType;
|
||||||
import org.enso.table.data.index.Index;
|
import org.enso.table.data.index.Index;
|
||||||
import org.enso.table.data.mask.OrderMask;
|
import org.enso.table.data.mask.OrderMask;
|
||||||
@ -388,4 +389,68 @@ public final class DoubleStorage extends NumericStorage<Double> implements Doubl
|
|||||||
|
|
||||||
return new DoubleStorage(newData, newSize, newMissing);
|
return new DoubleStorage(newData, newSize, newMissing);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private StorageType inferredType = null;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StorageType inferPreciseType() {
|
||||||
|
if (inferredType == null) {
|
||||||
|
boolean areAllIntegers = true;
|
||||||
|
int visitedNumbers = 0;
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
if (isMissing.get(i)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
double value = Double.longBitsToDouble(data[i]);
|
||||||
|
visitedNumbers++;
|
||||||
|
boolean isWholeNumber = value % 1.0 == 0.0;
|
||||||
|
boolean canBeInteger = isWholeNumber && IntegerType.INT_64.fits(value);
|
||||||
|
if (!canBeInteger) {
|
||||||
|
areAllIntegers = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We only switch to integers if there was at least one number.
|
||||||
|
inferredType = (areAllIntegers && visitedNumbers > 0) ? IntegerType.INT_64 : getType();
|
||||||
|
}
|
||||||
|
|
||||||
|
return inferredType;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public StorageType inferPreciseTypeShrunk() {
|
||||||
|
StorageType inferred = inferPreciseType();
|
||||||
|
if (inferred instanceof IntegerType) {
|
||||||
|
return findSmallestIntegerTypeThatFits();
|
||||||
|
} else {
|
||||||
|
return inferred;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private StorageType findSmallestIntegerTypeThatFits() {
|
||||||
|
assert inferredType instanceof IntegerType;
|
||||||
|
|
||||||
|
final DoubleStorage parent = this;
|
||||||
|
|
||||||
|
// We create a Long storage that gets values by converting our storage.
|
||||||
|
ComputedNullableLongStorage longAdapter =
|
||||||
|
new ComputedNullableLongStorage(size) {
|
||||||
|
@Override
|
||||||
|
protected Long computeItem(int idx) {
|
||||||
|
if (parent.isNa(idx)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
double value = parent.getItem(idx);
|
||||||
|
assert value % 1.0 == 0.0
|
||||||
|
: "The value " + value + " should be a whole number (guaranteed by checks).";
|
||||||
|
return (long) value;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// And rely on its shrinking logic.
|
||||||
|
return longAdapter.inferPreciseTypeShrunk();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -11,9 +11,11 @@ import java.time.ZonedDateTime;
|
|||||||
/**
|
/**
|
||||||
* Represents an underlying internal storage type that can be mapped to the Value Type that is exposed to users.
|
* Represents an underlying internal storage type that can be mapped to the Value Type that is exposed to users.
|
||||||
*/
|
*/
|
||||||
public sealed interface StorageType permits AnyObjectType, BigIntegerType, BooleanType, DateTimeType, DateType, FloatType, IntegerType, TextType, TimeOfDayType {
|
public sealed interface StorageType permits AnyObjectType, BigIntegerType, BooleanType, DateTimeType, DateType,
|
||||||
|
FloatType, IntegerType, TextType, TimeOfDayType {
|
||||||
/**
|
/**
|
||||||
* @return the StorageType that represents a given boxed item.
|
* @return the StorageType that represents a given boxed item. This has special handling for floating-point values -
|
||||||
|
* if they represent a whole number, they will be treated as integers.
|
||||||
*/
|
*/
|
||||||
static StorageType forBoxedItem(Object item) {
|
static StorageType forBoxedItem(Object item) {
|
||||||
if (NumericConverter.isCoercibleToLong(item)) {
|
if (NumericConverter.isCoercibleToLong(item)) {
|
||||||
@ -21,6 +23,11 @@ public sealed interface StorageType permits AnyObjectType, BigIntegerType, Boole
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (NumericConverter.isFloatLike(item)) {
|
if (NumericConverter.isFloatLike(item)) {
|
||||||
|
double value = NumericConverter.coerceToDouble(item);
|
||||||
|
if (value % 1.0 == 0.0 && IntegerType.INT_64.fits(value)) {
|
||||||
|
return IntegerType.INT_64;
|
||||||
|
}
|
||||||
|
|
||||||
return FloatType.FLOAT_64;
|
return FloatType.FLOAT_64;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -32,7 +39,7 @@ public sealed interface StorageType permits AnyObjectType, BigIntegerType, Boole
|
|||||||
case LocalTime t -> TimeOfDayType.INSTANCE;
|
case LocalTime t -> TimeOfDayType.INSTANCE;
|
||||||
case LocalDateTime d -> DateTimeType.INSTANCE;
|
case LocalDateTime d -> DateTimeType.INSTANCE;
|
||||||
case ZonedDateTime d -> DateTimeType.INSTANCE;
|
case ZonedDateTime d -> DateTimeType.INSTANCE;
|
||||||
default -> null;
|
default -> AnyObjectType.INSTANCE;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,12 @@ package org.enso.table.data.column.storage.type;
|
|||||||
import org.enso.base.Text_Utils;
|
import org.enso.base.Text_Utils;
|
||||||
|
|
||||||
public record TextType(long maxLength, boolean fixedLength) implements StorageType {
|
public record TextType(long maxLength, boolean fixedLength) implements StorageType {
|
||||||
|
public TextType {
|
||||||
|
if (maxLength == 0) {
|
||||||
|
throw new IllegalArgumentException("The maxLength of a text type must be positive or -1 to indicate unlimited length.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static final TextType VARIABLE_LENGTH = new TextType(-1, false);
|
public static final TextType VARIABLE_LENGTH = new TextType(-1, false);
|
||||||
|
|
||||||
public static TextType fixedLength(long length) {
|
public static TextType fixedLength(long length) {
|
||||||
@ -10,7 +16,7 @@ public record TextType(long maxLength, boolean fixedLength) implements StorageTy
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static TextType variableLengthWithLimit(long maxLength) {
|
public static TextType variableLengthWithLimit(long maxLength) {
|
||||||
assert maxLength >= 0;
|
assert maxLength > 0;
|
||||||
return new TextType(maxLength, false);
|
return new TextType(maxLength, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,6 +96,10 @@ public record TextType(long maxLength, boolean fixedLength) implements StorageTy
|
|||||||
|
|
||||||
boolean bothFixed = type1.fixedLength && type2.fixedLength;
|
boolean bothFixed = type1.fixedLength && type2.fixedLength;
|
||||||
long lengthSum = type1.maxLength + type2.maxLength;
|
long lengthSum = type1.maxLength + type2.maxLength;
|
||||||
|
if (lengthSum == 0) {
|
||||||
|
return VARIABLE_LENGTH;
|
||||||
|
}
|
||||||
|
|
||||||
return new TextType(lengthSum, bothFixed);
|
return new TextType(lengthSum, bothFixed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -67,7 +67,6 @@ spec setup =
|
|||||||
c.value_type.is_text . should_be_true
|
c.value_type.is_text . should_be_true
|
||||||
c.to_vector . should_equal ["{{{MY Type [x=42] }}}", "{{{MY Type [x=X] }}}"]
|
c.to_vector . should_equal ["{{{MY Type [x=42] }}}", "{{{MY Type [x=X] }}}"]
|
||||||
|
|
||||||
# TODO what to test here?
|
|
||||||
Test.specify "should allow to cast an integer column to a decimal type" <|
|
Test.specify "should allow to cast an integer column to a decimal type" <|
|
||||||
t = table_builder [["X", [1, 2, 3]]]
|
t = table_builder [["X", [1, 2, 3]]]
|
||||||
c = t.at "X" . cast Value_Type.Decimal
|
c = t.at "X" . cast Value_Type.Decimal
|
||||||
@ -116,6 +115,15 @@ spec setup =
|
|||||||
w2 = Problems.expect_warning Conversion_Failure c2
|
w2 = Problems.expect_warning Conversion_Failure c2
|
||||||
w2.affected_rows_count . should_equal 4
|
w2.affected_rows_count . should_equal 4
|
||||||
|
|
||||||
|
Test.specify "should not allow 0-length Char type" <|
|
||||||
|
c1 = table_builder [["X", ["a", "", "bcd"]]] . at "X"
|
||||||
|
r1 = c1.cast (Value_Type.Char size=0 variable_length=False)
|
||||||
|
r1.should_fail_with Illegal_Argument
|
||||||
|
r1.catch.to_display_text . should_contain "positive"
|
||||||
|
|
||||||
|
r2 = c1.cast (Value_Type.Char size=0 variable_length=True)
|
||||||
|
r2.should_fail_with Illegal_Argument
|
||||||
|
|
||||||
Test.group prefix+"Table/Column.cast - numeric" <|
|
Test.group prefix+"Table/Column.cast - numeric" <|
|
||||||
Test.specify "should allow to cast a boolean column to integer" <|
|
Test.specify "should allow to cast a boolean column to integer" <|
|
||||||
t = table_builder [["X", [True, False, True]]]
|
t = table_builder [["X", [True, False, True]]]
|
||||||
@ -531,3 +539,254 @@ spec setup =
|
|||||||
r3 = t.parse ["X", "Y"] Value_Type.Integer
|
r3 = t.parse ["X", "Y"] Value_Type.Integer
|
||||||
r3.should_fail_with Missing_Input_Columns
|
r3.should_fail_with Missing_Input_Columns
|
||||||
r3.catch.criteria . should_equal ["Y"]
|
r3.catch.criteria . should_equal ["Y"]
|
||||||
|
|
||||||
|
if setup.is_database then Test.group prefix+"Table/Column auto value type" <|
|
||||||
|
Test.specify "should report unsupported" <|
|
||||||
|
t = table_builder [["X", [1, 2, 3]]]
|
||||||
|
t.auto_value_types . should_fail_with Unsupported_Database_Operation
|
||||||
|
t.at "X" . auto_value_type . should_fail_with Unsupported_Database_Operation
|
||||||
|
|
||||||
|
if setup.is_database.not then Test.group prefix+"Table/Column auto value type" <|
|
||||||
|
Test.specify "should allow to narrow down types of a Mixed column" <|
|
||||||
|
[True, False].each shrink_types->
|
||||||
|
mixer = My_Type.Value 1
|
||||||
|
t0 = table_builder [["strs", [mixer, "a", "b"]], ["ints", [mixer, 2, 3]], ["floats", [mixer, 1.5, 2.5]], ["mix", [1, mixer, "a"]], ["dates", [mixer, Date.new 2022, Date.new 2020]], ["datetimes", [mixer, Date_Time.new 2022 12 30 13 45, Date_Time.new 2020]], ["times", [mixer, Time_Of_Day.new 12 30, Time_Of_Day.new 13 45]], ["mixed_time", [Date.new 2022, Time_Of_Day.new 12 30, Date_Time.new 2019]], ["bools", [mixer, True, False]]]
|
||||||
|
t1 = t0.drop 1
|
||||||
|
|
||||||
|
t1.at "strs" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t1.at "ints" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t1.at "floats" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t1.at "mix" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t1.at "dates" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t1.at "datetimes" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t1.at "times" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t1.at "mixed_time" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t1.at "bools" . value_type . should_equal Value_Type.Mixed
|
||||||
|
|
||||||
|
t2 = t1.auto_value_types shrink_types=shrink_types
|
||||||
|
# Depending on shrink_types value the size of the Char/Integer types may vary - exact details tested elsewhere.
|
||||||
|
t2.at "strs" . value_type . should_be_a (Value_Type.Char ...)
|
||||||
|
t2.at "ints" . value_type . should_be_a (Value_Type.Integer ...)
|
||||||
|
t2.at "floats" . value_type . should_equal Value_Type.Float
|
||||||
|
t2.at "mix" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t2.at "dates" . value_type . should_equal Value_Type.Date
|
||||||
|
t2.at "datetimes" . value_type . should_equal Value_Type.Date_Time
|
||||||
|
t2.at "times" . value_type . should_equal Value_Type.Time
|
||||||
|
t2.at "mixed_time" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t2.at "bools" . value_type . should_equal Value_Type.Boolean
|
||||||
|
|
||||||
|
Test.specify "will only modify selected columns" <|
|
||||||
|
mixer = My_Type.Value 1
|
||||||
|
t0 = table_builder [["strs", [mixer, "a", "b"]], ["ints", [mixer, 2, 3]], ["floats", [mixer, 1.5, 2.5]]]
|
||||||
|
t1 = t0.drop 1
|
||||||
|
|
||||||
|
t2 = t1.auto_value_types []
|
||||||
|
t2.at "strs" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t2.at "ints" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t2.at "floats" . value_type . should_equal Value_Type.Mixed
|
||||||
|
|
||||||
|
t3 = t1.auto_value_types ["strs"]
|
||||||
|
t3.at "strs" . value_type . should_equal Value_Type.Char
|
||||||
|
t3.at "ints" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t3.at "floats" . value_type . should_equal Value_Type.Mixed
|
||||||
|
|
||||||
|
# should match ints and floats but not strs
|
||||||
|
t4 = t1.auto_value_types "[if].*".to_regex
|
||||||
|
t4.at "strs" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t4.at "ints" . value_type . should_equal Value_Type.Integer
|
||||||
|
t4.at "floats" . value_type . should_equal Value_Type.Float
|
||||||
|
|
||||||
|
Test.specify "will convert a Float column to Integer if all values can be represented as long" <|
|
||||||
|
t1 = table_builder [["X", [1.0, 2.0, 3.0]], ["Y", [1.0, 2.5, 3.0]], ["Z", [1.0, 2.0, (2.0^100)]]]
|
||||||
|
t1.at "X" . value_type . should_equal Value_Type.Float
|
||||||
|
t1.at "Y" . value_type . should_equal Value_Type.Float
|
||||||
|
t1.at "Z" . value_type . should_equal Value_Type.Float
|
||||||
|
|
||||||
|
t2 = t1.auto_value_types shrink_types=False
|
||||||
|
t2.at "X" . to_vector . should_equal [1, 2, 3]
|
||||||
|
t2.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
|
||||||
|
t2.at "Y" . value_type . should_equal Value_Type.Float
|
||||||
|
## Technically, Z could get converted to Decimal type. But IMO that
|
||||||
|
is not desirable - at this scale the Float is no longer a
|
||||||
|
precise type (as not even consecutive integers are exactly
|
||||||
|
representable). And Decimal is expected to be precise. So such a
|
||||||
|
conversion should only happen by explicit request, not
|
||||||
|
automatically.
|
||||||
|
t2.at "Z" . value_type . should_equal Value_Type.Float
|
||||||
|
|
||||||
|
Test.specify "will not parse text columns" <|
|
||||||
|
t1 = table_builder [["X", ["1", "2", "3"]]]
|
||||||
|
c2 = t1.at "X" . auto_value_type
|
||||||
|
c2.value_type . should_equal Value_Type.Char
|
||||||
|
|
||||||
|
Test.specify "will 'undo' a cast to Mixed" <|
|
||||||
|
t1 = table_builder [["X", [1, 2, 3]], ["Y", ["a", "b", "c"]]]
|
||||||
|
t2 = t1.cast ["X", "Y"] Value_Type.Mixed
|
||||||
|
t2.at "X" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t2.at "Y" . value_type . should_equal Value_Type.Mixed
|
||||||
|
|
||||||
|
t3 = t2.auto_value_types
|
||||||
|
t3.at "X" . value_type . should_equal Value_Type.Integer
|
||||||
|
t3.at "Y" . value_type . should_equal Value_Type.Char
|
||||||
|
|
||||||
|
Test.specify "will choose Decimal type if all values are integers but cannot fit long" <|
|
||||||
|
c0 = table_builder [["X", [My_Type.Value 42, 1, 2, 2^100]]] . at "X"
|
||||||
|
c1 = c0.drop 1
|
||||||
|
|
||||||
|
c1.value_type . should_equal Value_Type.Mixed
|
||||||
|
c2 = c1.auto_value_type
|
||||||
|
c2.value_type . should_be_a (Value_Type.Decimal ...)
|
||||||
|
c2.to_vector . should_equal [1, 2, 2^100]
|
||||||
|
|
||||||
|
Test.specify "will try to find the smallest integer type to fit the value (if shrink_types=True)" <|
|
||||||
|
[False, True].each is_mixed->
|
||||||
|
prefix = if is_mixed then "mixed" else 0
|
||||||
|
t0 = table_builder [["X", [prefix, 1, 2, 3]], ["Y", [prefix, 2^20, 2, 3]], ["Z", [prefix, 2^50, 2, 3]], ["F", [prefix, 1.0, 2.0, 3.0]]]
|
||||||
|
t1 = t0.drop 1
|
||||||
|
|
||||||
|
case is_mixed of
|
||||||
|
True -> t1.at "Z" . value_type . should_equal Value_Type.Mixed
|
||||||
|
False -> t1.at "Z" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
|
||||||
|
|
||||||
|
case is_mixed of
|
||||||
|
True -> t1.at "F" . value_type . should_equal Value_Type.Mixed
|
||||||
|
False -> t1.at "F" . value_type . should_equal Value_Type.Float
|
||||||
|
|
||||||
|
t2 = t1.auto_value_types shrink_types=False
|
||||||
|
t2.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
|
||||||
|
t2.at "Y" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
|
||||||
|
t2.at "Z" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
|
||||||
|
t2.at "F" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
|
||||||
|
|
||||||
|
t3 = t1.auto_value_types shrink_types=True
|
||||||
|
# Even though X's values are small enough to fit in a Byte, we stick to 16-bit Integers.
|
||||||
|
t3.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_16)
|
||||||
|
t3.at "Y" . value_type . should_equal (Value_Type.Integer Bits.Bits_32)
|
||||||
|
t3.at "Z" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
|
||||||
|
# Shrinking Floats also finds the smallest type that fits.
|
||||||
|
t3.at "F" . value_type . should_equal (Value_Type.Integer Bits.Bits_16)
|
||||||
|
|
||||||
|
Test.specify "will not return Byte columns by default, but should leave existing Byte columns intact" <|
|
||||||
|
c1 = table_builder [["X", [1, 2, 3]]] . at "X" . cast Value_Type.Byte
|
||||||
|
c1.value_type . should_equal Value_Type.Byte
|
||||||
|
|
||||||
|
[True, False].each shrink_types->
|
||||||
|
c2 = c1.auto_value_type shrink_types=shrink_types
|
||||||
|
c2.value_type . should_equal Value_Type.Byte
|
||||||
|
|
||||||
|
Test.specify "Decimal (scale=0, i.e. integer) columns should also be shrinked if possible and shrink_types=True" <|
|
||||||
|
t0 = table_builder [["X", [2^100, 1, 2, 3]], ["Y", [10, 20, 2^100, 30]], ["Z", [1, 2, 3, 4]]] . cast "Z" (Value_Type.Decimal scale=0)
|
||||||
|
t1 = t0.drop 1
|
||||||
|
|
||||||
|
t1.at "X" . value_type . should_equal (Value_Type.Decimal scale=0)
|
||||||
|
t1.at "Y" . value_type . should_equal (Value_Type.Decimal scale=0)
|
||||||
|
t1.at "Z" . value_type . should_equal (Value_Type.Decimal scale=0)
|
||||||
|
|
||||||
|
t2 = t1.auto_value_types shrink_types=False
|
||||||
|
|
||||||
|
# Without shrinking we get an integer type, but not the smallest one - just the default 64-bit.
|
||||||
|
t2.at "X" . to_vector . should_equal [1, 2, 3]
|
||||||
|
t2.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
|
||||||
|
t2.at "Y" . value_type . should_equal (Value_Type.Decimal scale=0)
|
||||||
|
t2.at "Z" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
|
||||||
|
|
||||||
|
t3 = t1.auto_value_types shrink_types=True
|
||||||
|
t3.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_16)
|
||||||
|
t3.at "Y" . value_type . should_equal (Value_Type.Decimal scale=0)
|
||||||
|
t3.at "Z" . value_type . should_equal (Value_Type.Integer Bits.Bits_16)
|
||||||
|
|
||||||
|
Test.specify "if all text values have the same length, will change the type to fixed-length string (if shrink_types=True)" <|
|
||||||
|
[False, True].each is_mixed->
|
||||||
|
prefix = if is_mixed then 42 else "FOOBARBAZ"
|
||||||
|
c0 = table_builder [["X", [prefix, "aa", "bb", "cc"]]] . at "X"
|
||||||
|
c1 = c0.drop 1
|
||||||
|
c1.to_vector . should_equal ["aa", "bb", "cc"]
|
||||||
|
|
||||||
|
case is_mixed of
|
||||||
|
True -> c1.value_type . should_equal Value_Type.Mixed
|
||||||
|
False -> c1.value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
|
||||||
|
|
||||||
|
c2 = c1.auto_value_type shrink_types=False
|
||||||
|
c2.value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
|
||||||
|
|
||||||
|
c3 = c1.auto_value_type shrink_types=True
|
||||||
|
c3.value_type . should_equal (Value_Type.Char size=2 variable_length=False)
|
||||||
|
|
||||||
|
c4 = table_builder [["X", ["a", "x", "y"]]] . at "X" . cast (Value_Type.Char size=100 variable_length=True)
|
||||||
|
c4.to_vector . should_equal ["a", "x", "y"]
|
||||||
|
c4.value_type . should_equal (Value_Type.Char size=100 variable_length=True)
|
||||||
|
|
||||||
|
c5 = c4.auto_value_type shrink_types=False
|
||||||
|
c5.value_type . should_equal (Value_Type.Char size=100 variable_length=True)
|
||||||
|
|
||||||
|
c6 = c4.auto_value_type shrink_types=True
|
||||||
|
c6.value_type . should_equal (Value_Type.Char size=1 variable_length=False)
|
||||||
|
|
||||||
|
Test.specify "if all text values are empty string, the type will remain unchanged" <|
|
||||||
|
c1 = table_builder [["X", ["", ""]]] . at "X"
|
||||||
|
c2 = c1.cast (Value_Type.Char size=100 variable_length=True)
|
||||||
|
|
||||||
|
c1.value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
|
||||||
|
c2.value_type . should_equal (Value_Type.Char size=100 variable_length=True)
|
||||||
|
|
||||||
|
[True, False].each shrink_types->
|
||||||
|
c1_b = c1.auto_value_type shrink_types=shrink_types
|
||||||
|
c1_b.value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
|
||||||
|
|
||||||
|
c2_b = c2.auto_value_type shrink_types=shrink_types
|
||||||
|
c2_b.value_type . should_equal (Value_Type.Char size=100 variable_length=True)
|
||||||
|
|
||||||
|
Test.specify "if all text values fit under 255 characters, will add a 255 length limit (if shrink_types=True)" <|
|
||||||
|
t1 = table_builder [["short_unbounded", ["a", "bb", "ccc"]], ["long_unbounded", ["a"*100, "b"*200, "c"*300]]]
|
||||||
|
|
||||||
|
t2 = t1 . set (t1.at "short_unbounded" . cast (Value_Type.Char size=1000)) "short_1000" . set (t1.at "short_unbounded" . cast (Value_Type.Char size=10)) "short_10" . set (t1.at "long_unbounded" . cast (Value_Type.Char size=400)) "long_400" . set (t1.at "short_unbounded" . cast Value_Type.Mixed) "short_mixed"
|
||||||
|
t2.at "short_mixed" . value_type . should_equal Value_Type.Mixed
|
||||||
|
|
||||||
|
t3 = t2.auto_value_types shrink_types=False
|
||||||
|
t3.at "short_unbounded" . value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
|
||||||
|
t3.at "short_1000" . value_type . should_equal (Value_Type.Char size=1000 variable_length=True)
|
||||||
|
t3.at "short_10" . value_type . should_equal (Value_Type.Char size=10 variable_length=True)
|
||||||
|
# Mixed column gets to be text again.
|
||||||
|
t3.at "short_mixed" . value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
|
||||||
|
t3.at "long_unbounded" . value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
|
||||||
|
t3.at "long_400" . value_type . should_equal (Value_Type.Char size=400 variable_length=True)
|
||||||
|
|
||||||
|
t4 = t2.auto_value_types shrink_types=True
|
||||||
|
# Short ones get shortened to 255 unless they were shorter already.
|
||||||
|
t4.at "short_unbounded" . value_type . should_equal (Value_Type.Char size=255 variable_length=True)
|
||||||
|
t4.at "short_1000" . value_type . should_equal (Value_Type.Char size=255 variable_length=True)
|
||||||
|
t4.at "short_10" . value_type . should_equal (Value_Type.Char size=10 variable_length=True)
|
||||||
|
t4.at "short_mixed" . value_type . should_equal (Value_Type.Char size=255 variable_length=True)
|
||||||
|
# Long ones cannot fit in 255 so they are kept as-is.
|
||||||
|
t4.at "long_unbounded" . value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
|
||||||
|
t4.at "long_400" . value_type . should_equal (Value_Type.Char size=400 variable_length=True)
|
||||||
|
|
||||||
|
Test.specify "can deal with all-null columns" <|
|
||||||
|
t0 = table_builder [["mix", [My_Type.Value 1, Nothing, Nothing]], ["int", [42, Nothing, Nothing]], ["str", ["a", Nothing, Nothing]], ["float", [1.5, Nothing, Nothing]], ["decimal", [2^100, 2^10, 2]]]
|
||||||
|
t1 = t0.drop 1
|
||||||
|
|
||||||
|
t1.at "mix" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t1.at "int" . value_type . should_equal Value_Type.Integer
|
||||||
|
t1.at "float" . value_type . should_equal Value_Type.Float
|
||||||
|
t1.at "str" . value_type . should_equal Value_Type.Char
|
||||||
|
t1.at "decimal" . value_type . should_equal (Value_Type.Decimal scale=0)
|
||||||
|
|
||||||
|
t2 = t1.auto_value_types shrink_types=False
|
||||||
|
t2.at "mix" . value_type . should_equal Value_Type.Mixed
|
||||||
|
t2.at "int" . value_type . should_equal Value_Type.Integer
|
||||||
|
## Technically, if there are no elements, "all of elements" are
|
||||||
|
whole integers (quantification over empty domain is trivially true).
|
||||||
|
However, that would be rather not useful, so instead we keep the
|
||||||
|
original type.
|
||||||
|
t2.at "float" . value_type . should_equal Value_Type.Float
|
||||||
|
t1.at "decimal" . value_type . should_equal (Value_Type.Decimal scale=0)
|
||||||
|
t2.at "str" . value_type . should_equal Value_Type.Char
|
||||||
|
|
||||||
|
t3 = t1.auto_value_types shrink_types=True
|
||||||
|
t3.at "mix" . value_type . should_equal Value_Type.Mixed
|
||||||
|
# Technically, if there are no elements, then they can be fit inside of the smallest types available:
|
||||||
|
t3.at "int" . value_type . should_equal (Value_Type.Integer Bits.Bits_16)
|
||||||
|
t3.at "float" . value_type . should_equal Value_Type.Float
|
||||||
|
t1.at "decimal" . value_type . should_equal (Value_Type.Decimal scale=0)
|
||||||
|
# But for Text we make an exception and keep the type unbounded: 0-length fixed length string simply would not make any sense.
|
||||||
|
t3.at "str" . value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
|
||||||
|
@ -125,7 +125,7 @@ spec setup =
|
|||||||
k x = if x == 2 then Time_Of_Day.new 13 05 else (x+1).to_text
|
k x = if x == 2 then Time_Of_Day.new 13 05 else (x+1).to_text
|
||||||
r7 = c1.map k expected_value_type=Value_Type.Char
|
r7 = c1.map k expected_value_type=Value_Type.Char
|
||||||
r7.should_fail_with Invalid_Value_Type
|
r7.should_fail_with Invalid_Value_Type
|
||||||
r7.catch.to_display_text . should_contain "Expected type Char (variable length, max_size=Nothing), but got a value 13:05:00 of type Time"
|
r7.catch.to_display_text . should_contain "Expected type Char (variable length, max_size=unlimited), but got a value 13:05:00 of type Time"
|
||||||
|
|
||||||
l x = if x == 2 then 42 else Date.new 2022 05 x
|
l x = if x == 2 then 42 else Date.new 2022 05 x
|
||||||
r8 = c1.map l expected_value_type=Value_Type.Date
|
r8 = c1.map l expected_value_type=Value_Type.Date
|
||||||
|
@ -17,7 +17,7 @@ spec =
|
|||||||
Value_Type.Float.to_display_text . should_equal "Float (64 bits)"
|
Value_Type.Float.to_display_text . should_equal "Float (64 bits)"
|
||||||
Value_Type.Decimal.to_display_text . should_equal "Decimal (precision=Nothing, scale=Nothing)"
|
Value_Type.Decimal.to_display_text . should_equal "Decimal (precision=Nothing, scale=Nothing)"
|
||||||
|
|
||||||
Value_Type.Char.to_display_text . should_equal "Char (variable length, max_size=Nothing)"
|
Value_Type.Char.to_display_text . should_equal "Char (variable length, max_size=unlimited)"
|
||||||
(Value_Type.Binary 8 False).to_display_text . should_equal "Binary (fixed length, size=8 bytes)"
|
(Value_Type.Binary 8 False).to_display_text . should_equal "Binary (fixed length, size=8 bytes)"
|
||||||
|
|
||||||
Value_Type.Date.to_display_text . should_equal "Date"
|
Value_Type.Date.to_display_text . should_equal "Date"
|
||||||
|
@ -166,10 +166,14 @@ spec =
|
|||||||
c8.value_type . should_equal Value_Type.Mixed
|
c8.value_type . should_equal Value_Type.Mixed
|
||||||
c8.to_vector . should_equal ["aaa", 42, Date.new 2022 08 22]
|
c8.to_vector . should_equal ["aaa", 42, Date.new 2022 08 22]
|
||||||
|
|
||||||
|
c9 = Column.from_vector "X" [Time_Of_Day.new 10 11 12, Time_Of_Day.new 11 30] Value_Type.Time
|
||||||
|
c9.value_type . should_equal Value_Type.Time
|
||||||
|
c9.to_vector . should_equal [Time_Of_Day.new 10 11 12, Time_Of_Day.new 11 30]
|
||||||
|
|
||||||
Test.specify "will fail if unexpected values are encountered for the requested type" <|
|
Test.specify "will fail if unexpected values are encountered for the requested type" <|
|
||||||
r1 = Column.from_vector "X" ["a", 2] Value_Type.Char
|
r1 = Column.from_vector "X" ["a", 2] Value_Type.Char
|
||||||
r1.should_fail_with Invalid_Value_Type
|
r1.should_fail_with Invalid_Value_Type
|
||||||
r1.catch.to_display_text.should_contain "Expected type Char (variable length, max_size=Nothing), but got a value 2 of type Integer (16 bits)"
|
r1.catch.to_display_text.should_contain "Expected type Char (variable length, max_size=unlimited), but got a value 2 of type Integer (16 bits)"
|
||||||
|
|
||||||
r2 = Column.from_vector "X" ["aaa", "b"] (Value_Type.Char size=3 variable_length=False)
|
r2 = Column.from_vector "X" ["aaa", "b"] (Value_Type.Char size=3 variable_length=False)
|
||||||
r2.should_fail_with Invalid_Value_Type
|
r2.should_fail_with Invalid_Value_Type
|
||||||
@ -177,7 +181,7 @@ spec =
|
|||||||
|
|
||||||
r3 = Column.from_vector "X" ["aaa", 42] Value_Type.Char
|
r3 = Column.from_vector "X" ["aaa", 42] Value_Type.Char
|
||||||
r3.should_fail_with Invalid_Value_Type
|
r3.should_fail_with Invalid_Value_Type
|
||||||
r3.catch.to_display_text.should_contain "Expected type Char (variable length, max_size=Nothing), but got a value 42 of type Integer (16 bits)"
|
r3.catch.to_display_text.should_contain "Expected type Char (variable length, max_size=unlimited), but got a value 42 of type Integer (16 bits)"
|
||||||
|
|
||||||
r4 = Column.from_vector "X" [12, Time_Of_Day.new 10 11 12] Value_Type.Integer
|
r4 = Column.from_vector "X" [12, Time_Of_Day.new 10 11 12] Value_Type.Integer
|
||||||
r4.should_fail_with Invalid_Value_Type
|
r4.should_fail_with Invalid_Value_Type
|
||||||
@ -199,6 +203,13 @@ spec =
|
|||||||
r8.should_fail_with Invalid_Value_Type
|
r8.should_fail_with Invalid_Value_Type
|
||||||
r8.catch.to_display_text.should_contain "Expected type Byte, but got a value 1000000000 of type Integer (32 bits)"
|
r8.catch.to_display_text.should_contain "Expected type Byte, but got a value 1000000000 of type Integer (32 bits)"
|
||||||
|
|
||||||
|
Test.specify "will not allow to construct a column with Char size=0" <|
|
||||||
|
r1 = Column.from_vector "X" [] (Value_Type.Char size=0 variable_length=False)
|
||||||
|
r1.should_fail_with Illegal_Argument
|
||||||
|
|
||||||
|
r2 = Column.from_vector "X" [] (Value_Type.Char size=0 variable_length=True)
|
||||||
|
r2.should_fail_with Illegal_Argument
|
||||||
|
|
||||||
Test.group "Rounding" <|
|
Test.group "Rounding" <|
|
||||||
Test.specify "should be able to round a column of decimals" <|
|
Test.specify "should be able to round a column of decimals" <|
|
||||||
Column.from_vector "foo" [1.2, 2.3, 2.5, 3.6] . round . should_equal (Column.from_vector "round([foo])" [1, 2, 3, 4])
|
Column.from_vector "foo" [1.2, 2.3, 2.5, 3.6] . round . should_equal (Column.from_vector "round([foo])" [1, 2, 3, 4])
|
||||||
|
Loading…
Reference in New Issue
Block a user