mirror of
https://github.com/enso-org/enso.git
synced 2024-11-22 22:10:15 +03:00
Add support for Is_In
and Not_In
to Filter_Condition
(#3790)
Implements https://www.pivotaltracker.com/story/show/183389945
This commit is contained in:
parent
811d82c787
commit
82de8f88bd
@ -211,6 +211,7 @@
|
||||
`Not_Like`.][3775]
|
||||
- [Reimplemented `Duration` as a built-in type.][3759]
|
||||
- [Implemented `Table.replace_text` for in-memory table.][3793]
|
||||
- [Extended `Filter_Condition` with `Is_In` and `Not_In`.][3790]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -339,6 +340,7 @@
|
||||
[3775]: https://github.com/enso-org/enso/pull/3775
|
||||
[3759]: https://github.com/enso-org/enso/pull/3759
|
||||
[3793]: https://github.com/enso-org/enso/pull/3793
|
||||
[3790]: https://github.com/enso-org/enso/pull/3790
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
@ -1192,7 +1192,6 @@ lazy val parser = (project in file("lib/scala/parser"))
|
||||
s"-Djava.library.path=$root/target/rust/debug"
|
||||
},
|
||||
libraryDependencies ++= Seq(
|
||||
"com.storm-enroute" %% "scalameter" % scalameterVersion % "bench",
|
||||
"org.scalatest" %%% "scalatest" % scalatestVersion % Test
|
||||
),
|
||||
testFrameworks := List(
|
||||
|
@ -105,6 +105,12 @@ type Filter_Condition
|
||||
See https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8032926
|
||||
Not_Like pattern:Text
|
||||
|
||||
## Is the value contained in `values`?
|
||||
Is_In values:Vector
|
||||
|
||||
## Is the value not contained in `values`?
|
||||
Not_In values:Vector
|
||||
|
||||
## Converts a `Filter_Condition` condition into a predicate taking an
|
||||
element and returning a value indicating whether the element should be
|
||||
accepted by the filter.
|
||||
@ -142,6 +148,10 @@ type Filter_Condition
|
||||
Not_Like sql_pattern ->
|
||||
regex = sql_like_to_regex sql_pattern
|
||||
elem -> regex.matches elem . not
|
||||
## TODO once we have proper hashing we could create a hashmap and
|
||||
answer quicker, currently we need to do a full scan for each element.
|
||||
Is_In values -> values.contains
|
||||
Not_In values -> elem -> values.contains elem . not
|
||||
|
||||
## PRIVATE
|
||||
sql_like_to_regex sql_pattern =
|
||||
|
@ -273,6 +273,6 @@ type Duration
|
||||
|
||||
import Standard.Base.Data.Time.Duration
|
||||
|
||||
example_is_empty = 10.seconds.is_empty
|
||||
example_is_empty = Duration.zero.is_empty
|
||||
is_empty : Boolean
|
||||
is_empty self = self.to_vector . all (==0)
|
||||
|
@ -46,7 +46,7 @@ polyglot java import org.enso.base.Http_Utils
|
||||
import Standard.Base.Network.Proxy
|
||||
|
||||
example_new =
|
||||
Http.new (timeout = 30.seconds) (proxy = Proxy.new "example.com" 8080)
|
||||
Http.new (timeout = (Duration.new seconds=30)) (proxy = Proxy.new "example.com" 8080)
|
||||
new : Duration -> Boolean -> Proxy -> Http
|
||||
new (timeout = (Duration.new seconds=10)) (follow_redirects = True) (proxy = Proxy.System) (version = Version.Http_1_1) =
|
||||
Http_Data timeout follow_redirects proxy version
|
||||
@ -595,7 +595,7 @@ type Http
|
||||
example_request =
|
||||
form = [Form.text_field "name" "John Doe"]
|
||||
req = Request.new Method.Post "http://httpbin.org/post" . with_form form
|
||||
http = Http.new (timeout = 30.seconds)
|
||||
http = Http.new (timeout = (Duration.new seconds=30))
|
||||
http.request req
|
||||
request : Request -> Response ! Request_Error
|
||||
request self req =
|
||||
|
@ -13,7 +13,7 @@ import project.Internal.IR.Internal_Column.Internal_Column
|
||||
|
||||
from project.Data.Table import Table, freshen_columns
|
||||
|
||||
from project.Errors import Unsupported_Database_Operation_Error
|
||||
from project.Errors import Unsupported_Database_Operation_Error, Unsupported_Database_Operation_Error_Data
|
||||
|
||||
type Column
|
||||
|
||||
@ -130,7 +130,7 @@ type Column
|
||||
prepare_operand operand operand_type = case operand of
|
||||
other_column : Column ->
|
||||
if Helpers.check_integrity self other_column then other_column.expression else
|
||||
Error.throw <| Unsupported_Database_Operation_Error "Cannot use columns coming from different contexts in one expression without a join."
|
||||
Error.throw <| Unsupported_Database_Operation_Error_Data "Cannot use columns coming from different contexts in one expression without a join."
|
||||
constant ->
|
||||
actual_operand_type = operand_type.if_nothing self.sql_type
|
||||
Expression.Constant actual_operand_type constant
|
||||
@ -394,6 +394,32 @@ type Column
|
||||
/ : Column | Any -> Column
|
||||
/ self other = self.make_binary_op "/" other
|
||||
|
||||
## Element-wise modulus.
|
||||
|
||||
Arguments:
|
||||
- other: The value to modulo `self` against. If `other` is a column, the
|
||||
modulus is performed pairwise between corresponding elements of `self`
|
||||
and `other`.
|
||||
|
||||
Returns a column with results of modulus this column's elements against
|
||||
`other`.
|
||||
|
||||
> Example
|
||||
Modulus of two columns against each other.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_mod = Examples.integer_column % Examples.decimal_column
|
||||
|
||||
> Example
|
||||
Modulus of a column with a number.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_mod = Examples.integer_column % 3
|
||||
% : Column | Any -> Column
|
||||
% self other = self.make_binary_op "%" other
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Element-wise boolean conjunction.
|
||||
@ -433,20 +459,20 @@ type Column
|
||||
Returns a column of booleans, with `True` items at the positions where
|
||||
this column contains a `Nothing`.
|
||||
is_missing : Column
|
||||
is_missing self = self.make_unary_op "ISNULL" new_type=SQL_Type.boolean
|
||||
is_missing self = self.make_unary_op "IS_NULL" new_type=SQL_Type.boolean
|
||||
|
||||
## PRIVATE
|
||||
Returns a column of booleans, with `True` items at the positions where
|
||||
this column contains an empty string or `Nothing`.
|
||||
is_empty : Column
|
||||
is_empty self = self.make_unary_op "ISEMPTY" new_type=SQL_Type.boolean
|
||||
is_empty self = self.make_unary_op "IS_EMPTY" new_type=SQL_Type.boolean
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Returns a new column where missing values have been replaced with the
|
||||
provided default.
|
||||
fill_missing : Any -> Column
|
||||
fill_missing self default = self.make_binary_op "FILLNULL" default
|
||||
fill_missing self default = self.make_binary_op "FILL_NULL" default
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
@ -495,7 +521,7 @@ type Column
|
||||
take self range=(First 1) =
|
||||
_ = range
|
||||
msg = "`Column.take` is not yet implemented."
|
||||
Error.throw (Unsupported_Database_Operation_Error msg)
|
||||
Error.throw (Unsupported_Database_Operation_Error_Data msg)
|
||||
|
||||
## UNSTABLE
|
||||
Creates a new Column from the input with the specified range of rows
|
||||
@ -507,7 +533,7 @@ type Column
|
||||
drop self range=(First 1) =
|
||||
_ = range
|
||||
msg = "`Column.drop` is not yet implemented."
|
||||
Error.throw (Unsupported_Database_Operation_Error msg)
|
||||
Error.throw (Unsupported_Database_Operation_Error_Data msg)
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
@ -551,10 +577,63 @@ type Column
|
||||
contains : Column | Text -> Column
|
||||
contains self other = self.make_binary_op "contains" other new_type=SQL_Type.boolean
|
||||
|
||||
## PRIVATE
|
||||
Checks for each element of the column if it matches an SQL-like pattern.
|
||||
## Checks for each element of the column if it matches an SQL-like pattern.
|
||||
|
||||
Arguments:
|
||||
- pattern: The pattern to match `self` against. If it is a column, the
|
||||
operation is performed pairwise between corresponding elements of
|
||||
`self` and that column. The pattern is an SQL-like pattern, where
|
||||
`%` matches any sequence of characters and `_` matches any single
|
||||
character.
|
||||
|
||||
> Example
|
||||
Check if elements of a column start with 'F' and end with a dot.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_contains = Examples.text_column_1.like "F%."
|
||||
like : Column | Text -> Column
|
||||
like self other = self.make_binary_op "LIKE" other new_type=SQL_Type.boolean
|
||||
like self pattern = self.make_binary_op "LIKE" pattern new_type=SQL_Type.boolean
|
||||
|
||||
## Checks for each element of the column if it is contained within the
|
||||
provided vector.
|
||||
|
||||
Arguments:
|
||||
- vector: A vector of elements. The resulting column will contain true at
|
||||
the positions where the corresponding element of `self` is contained
|
||||
in `vector`.
|
||||
|
||||
> Example
|
||||
Check if elements of a column are contained in a provided vector.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_contains = Examples.text_column_1.is_in [1, 2, 5]
|
||||
is_in self vector =
|
||||
## This is slightly hacky - we don't provide operand types as we want to
|
||||
allow any type to get through and currently we do not have a mapping
|
||||
from Enso types to SQL types (it may be available in the future). So
|
||||
we just rely on Nothing resolving to the current column type. That
|
||||
type may not always match the operands, but the current
|
||||
implementation uses this type only for two purposes: generated SQL
|
||||
visualization (so the color will be consistent with the column type
|
||||
and not the value type - that can be confusing, we probably want to
|
||||
fix it later) and setting up the query - but at the set up this only
|
||||
applies to adding nulls - setting any other object does not check the
|
||||
type at this level anyway.
|
||||
partitioned = vector.partition .is_nothing
|
||||
nulls = partitioned.first
|
||||
non_nulls = partitioned.second
|
||||
## Since SQL `NULL IN (NULL)` yields `NULL`, we need to handle this case
|
||||
separately. So we handle all non-null values using `IS_IN` and then
|
||||
`OR` that with a null check (if the vector contained any nulls to
|
||||
begin with). The implementation also ensures that even
|
||||
`NULL IN (...)` is coalesced to False, so that negation works as
|
||||
expected.
|
||||
is_in_not_null = self.make_op "IS_IN" operands=non_nulls new_type=SQL_Type.boolean
|
||||
case nulls.not_empty of
|
||||
True -> is_in_not_null || self.is_missing
|
||||
False -> is_in_not_null
|
||||
|
||||
## PRIVATE
|
||||
as_internal : Internal_Column
|
||||
|
@ -11,7 +11,7 @@ import project.Internal.IR.Nulls_Order.Nulls_Order
|
||||
import project.Internal.IR.Query.Query
|
||||
from project.Data.SQL import code
|
||||
|
||||
from project.Errors import Unsupported_Database_Operation_Error
|
||||
from project.Errors import Unsupported_Database_Operation_Error_Data
|
||||
|
||||
type Internal_Dialect
|
||||
|
||||
@ -169,14 +169,15 @@ base_dialect =
|
||||
unary = name -> [name, make_unary_op name]
|
||||
fun = name -> [name, make_function name]
|
||||
|
||||
arith = [bin "+", bin "-", bin "*", bin "/"]
|
||||
arith = [bin "+", bin "-", bin "*", bin "/", bin "%"]
|
||||
logic = [bin "AND", bin "OR", unary "NOT"]
|
||||
compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">=", ["BETWEEN", make_between]]
|
||||
agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
|
||||
counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]]
|
||||
text = [["ISEMPTY", make_is_empty], bin "LIKE"]
|
||||
nulls = [["ISNULL", make_right_unary_op "IS NULL"], ["FILLNULL", make_function "COALESCE"]]
|
||||
base_map = Map.from_vector (arith + logic + compare + agg + counts + text + nulls)
|
||||
text = [["IS_EMPTY", make_is_empty], bin "LIKE"]
|
||||
nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
|
||||
contains = [["IS_IN", make_is_in]]
|
||||
base_map = Map.from_vector (arith + logic + compare + agg + counts + text + nulls + contains)
|
||||
Internal_Dialect.Value base_map wrap_in_quotes
|
||||
|
||||
## PRIVATE
|
||||
@ -188,7 +189,7 @@ make_is_empty arguments = case arguments.length of
|
||||
is_empty = (arg ++ " = ''").paren
|
||||
(is_null ++ " OR " ++ is_empty).paren
|
||||
_ ->
|
||||
Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation ISEMPTY")
|
||||
Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation IS_EMPTY")
|
||||
|
||||
## PRIVATE
|
||||
make_between : Vector Builder -> Builder
|
||||
@ -201,6 +202,21 @@ make_between arguments = case arguments.length of
|
||||
_ ->
|
||||
Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation BETWEEN")
|
||||
|
||||
## PRIVATE
|
||||
make_is_in : Vector Builder -> Builder
|
||||
make_is_in arguments = case arguments.length of
|
||||
0 -> Error.throw <| Illegal_State_Error_Data ("The operation IS_IN requires at least one argument.")
|
||||
## If only the self argument is provided, no value will ever be in the empty list, so we just short circuit to false.
|
||||
`IN ()` would be more meaningful, but it is a syntax error.
|
||||
1 -> code '2=1' . paren
|
||||
_ ->
|
||||
expr = arguments.first
|
||||
list = arguments.tail
|
||||
is_in = expr ++ " IN (" ++ (SQL.join ", " list) ++ ")"
|
||||
## We ensure that even `NULL IN (...)` is coalesced to False, so that
|
||||
negation will work as expected.
|
||||
code "COALESCE(" ++ is_in ++ ", 2=1)"
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Builds code for an expression.
|
||||
@ -214,7 +230,7 @@ generate_expression dialect expr = case expr of
|
||||
dialect.wrap_identifier origin ++ '.' ++ dialect.wrap_identifier name
|
||||
Expression.Constant sql_type value -> SQL.interpolation sql_type value
|
||||
Expression.Operation kind arguments ->
|
||||
op = dialect.operation_map.get_or_else kind (Error.throw <| Unsupported_Database_Operation_Error kind)
|
||||
op = dialect.operation_map.get_or_else kind (Error.throw <| Unsupported_Database_Operation_Error_Data kind)
|
||||
parsed_args = arguments.map (generate_expression dialect)
|
||||
op parsed_args
|
||||
_ : Order_Descriptor -> generate_order dialect expr
|
||||
@ -337,7 +353,7 @@ generate_query dialect query = case query of
|
||||
code "SELECT * " ++ generate_select_context dialect ctx
|
||||
Query.Insert table_name pairs ->
|
||||
generate_insert_query dialect table_name pairs
|
||||
_ -> Error.throw <| Unsupported_Database_Operation_Error "Unsupported query type."
|
||||
_ -> Error.throw <| Unsupported_Database_Operation_Error_Data "Unsupported query type."
|
||||
|
||||
## PRIVATE
|
||||
Arguments:
|
||||
|
@ -13,7 +13,7 @@ import project.Internal.IR.Order_Descriptor.Order_Descriptor
|
||||
import project.Internal.IR.Nulls_Order.Nulls_Order
|
||||
|
||||
from project.Data.SQL import code
|
||||
from project.Errors import Unsupported_Database_Operation_Error
|
||||
from project.Errors import Unsupported_Database_Operation_Error_Data
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -232,7 +232,7 @@ make_order_descriptor internal_column sort_direction text_ordering =
|
||||
case internal_column.sql_type.is_likely_text of
|
||||
True ->
|
||||
## In the future we can modify this error to suggest using a custom defined collation.
|
||||
if text_ordering.sort_digits_as_numbers then Error.throw (Unsupported_Database_Operation_Error "Natural ordering is currently not supported. You may need to materialize the Table to perform this operation.") else
|
||||
if text_ordering.sort_digits_as_numbers then Error.throw (Unsupported_Database_Operation_Error_Data "Natural ordering is currently not supported. You may need to materialize the Table to perform this operation.") else
|
||||
case text_ordering.case_sensitivity of
|
||||
Nothing ->
|
||||
Order_Descriptor.Value internal_column.expression sort_direction nulls_order=nulls collation=Nothing
|
||||
@ -240,7 +240,7 @@ make_order_descriptor internal_column sort_direction text_ordering =
|
||||
Order_Descriptor.Value internal_column.expression sort_direction nulls_order=nulls collation="ucs_basic"
|
||||
Case_Sensitivity.Insensitive locale -> case locale == Locale.default of
|
||||
False ->
|
||||
Error.throw (Unsupported_Database_Operation_Error "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.")
|
||||
Error.throw (Unsupported_Database_Operation_Error_Data "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.")
|
||||
True ->
|
||||
upper = Expression.Operation "UPPER" [internal_column.expression]
|
||||
folded_expression = Expression.Operation "LOWER" [upper]
|
||||
|
@ -374,6 +374,32 @@ type Column
|
||||
/ : Column | Any -> Column
|
||||
/ self other = run_vectorized_binary_op self '/' (/) other
|
||||
|
||||
## Element-wise modulus.
|
||||
|
||||
Arguments:
|
||||
- other: The value to modulo `self` against. If `other` is a column, the
|
||||
modulus is performed pairwise between corresponding elements of `self`
|
||||
and `other`.
|
||||
|
||||
Returns a column with results of modulus this column's elements against
|
||||
`other`.
|
||||
|
||||
> Example
|
||||
Modulus of two columns against each other.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_mod = Examples.integer_column % Examples.decimal_column
|
||||
|
||||
> Example
|
||||
Modulus of a column with a number.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_mod = Examples.integer_column % 3
|
||||
% : Column | Any -> Column
|
||||
% self other = run_vectorized_binary_op self '%' (%) other
|
||||
|
||||
## ALIAS AND Columns
|
||||
|
||||
Element-wise boolean conjunction.
|
||||
@ -585,11 +611,41 @@ type Column
|
||||
contains self other =
|
||||
run_vectorized_binary_op self "contains" (a -> b -> a.contains b) other
|
||||
|
||||
## PRIVATE
|
||||
Checks for each element of the column if it matches an SQL-like pattern.
|
||||
## Checks for each element of the column if it matches an SQL-like pattern.
|
||||
|
||||
Arguments:
|
||||
- pattern: The pattern to match `self` against. If it is a column, the
|
||||
operation is performed pairwise between corresponding elements of
|
||||
`self` and that column. The pattern is an SQL-like pattern, where
|
||||
`%` matches any sequence of characters and `_` matches any single
|
||||
character.
|
||||
|
||||
> Example
|
||||
Check if elements of a column start with 'F' and end with a dot.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_contains = Examples.text_column_1.like "F%."
|
||||
like : Column | Text -> Column
|
||||
like self other =
|
||||
run_vectorized_binary_op self "like" (_ -> _ -> Error.throw (Illegal_State_Error "The `Like` operation should only be used on Text columns.")) other
|
||||
like self pattern =
|
||||
run_vectorized_binary_op self "like" (_ -> _ -> Error.throw (Illegal_State_Error "The `Like` operation should only be used on Text columns.")) pattern
|
||||
|
||||
## Checks for each element of the column if it is contained within the
|
||||
provided vector.
|
||||
|
||||
Arguments:
|
||||
- vector: A vector of elements. The resulting column will contain true at
|
||||
the positions where the corresponding element of `self` is contained
|
||||
in `vector`.
|
||||
|
||||
> Example
|
||||
Check if elements of a column are contained in a provided vector.
|
||||
|
||||
import Standard.Examples
|
||||
|
||||
example_contains = Examples.text_column_1.is_in [1, 2, 5]
|
||||
is_in self vector =
|
||||
run_vectorized_binary_op self "is_in" (elem -> vector -> vector.contains elem) vector skip_nulls=False
|
||||
|
||||
## ALIAS Transform Column
|
||||
|
||||
@ -1137,18 +1193,22 @@ type Empty_Error
|
||||
- name: The name of the vectorized operation.
|
||||
- fallback_fn: A function used if the vectorized operation isn't available.
|
||||
- operand: The operand to apply to the function after `column`.
|
||||
- skip_nulls: Specifies if nulls should be skipped. If set to `True`, a null
|
||||
value results in null without passing it to the function. If set to
|
||||
`False`, the null values are passed as any other value and can have custom
|
||||
handling logic.
|
||||
run_vectorized_binary_op : Column -> Text -> (Any -> Any) -> Any -> Column
|
||||
run_vectorized_binary_op column name fallback_fn operand = case operand of
|
||||
run_vectorized_binary_op column name fallback_fn operand skip_nulls=True = case operand of
|
||||
Column.Column_Data col2 ->
|
||||
s1 = column.java_column.getStorage
|
||||
ix = column.java_column.getIndex
|
||||
s2 = col2.getStorage
|
||||
rs = s1.zip name fallback_fn s2 True
|
||||
rs = s1.zip name fallback_fn s2 skip_nulls
|
||||
Column.Column_Data (Java_Column.new "Result" ix rs)
|
||||
_ ->
|
||||
s1 = column.java_column.getStorage
|
||||
ix = column.java_column.getIndex
|
||||
rs = s1.bimap name fallback_fn operand
|
||||
rs = s1.bimap name fallback_fn operand skip_nulls
|
||||
Column.Column_Data (Java_Column.new "Result" ix rs)
|
||||
|
||||
## PRIVATE
|
||||
|
@ -7,6 +7,7 @@ import Standard.Base.Data.Text.Case
|
||||
import Standard.Base.System.Platform
|
||||
|
||||
import project.Data.Column.Column
|
||||
from project.Data.Column import get_item_string
|
||||
import project.Data.Column_Name_Mapping.Column_Name_Mapping
|
||||
import project.Data.Column_Selector.Column_Selector
|
||||
import project.Data.Data_Formatter.Data_Formatter
|
||||
|
@ -56,6 +56,9 @@ make_filter_column source_column filter_condition = case filter_condition of
|
||||
Value_Type.expect_text source_column.value_type <|
|
||||
expect_column_or_value_as_text "pattern" pattern <|
|
||||
source_column.like pattern . not
|
||||
# Vector
|
||||
Is_In values -> source_column.is_in values
|
||||
Not_In values -> source_column.is_in values . not
|
||||
|
||||
## PRIVATE
|
||||
expect_column_or_value_as_text field_name column_or_value ~action = case column_or_value of
|
||||
|
@ -0,0 +1,108 @@
|
||||
package org.enso.base.polyglot;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
||||
/**
|
||||
* The numeric converter deals with conversions of Java numeric types to the two main types
|
||||
* supported by Enso - Long for integers and Double for decimals. Any other types are coerced to one
|
||||
* of these types.
|
||||
*
|
||||
* <p>It provides two concepts - coercion - which allows to coerce an integer type to a decimal, but
|
||||
* will not convert a decimal to an integer even if it has 0 fractional part. Then there is
|
||||
* conversion which allows to convert a decimal with 0 fractional part to an integer. Conversion
|
||||
* should be used when we care about the original type of the object (i.e. we want any decimals to
|
||||
* require decimal storage even if they have 0 fractional part). Conversion is to be used when we
|
||||
* want to be consistent with Enso's equality semantics where 2 == 2.0.
|
||||
*/
|
||||
public class NumericConverter {
|
||||
/**
|
||||
* Coerces a number (possibly an integer) to a Double.
|
||||
*
|
||||
* <p>Will throw an exception if the object is not a number.
|
||||
*/
|
||||
public static double coerceToDouble(Object o) {
|
||||
return switch (o) {
|
||||
case Double x -> x;
|
||||
case BigDecimal x -> x.doubleValue();
|
||||
case Float x -> x.doubleValue();
|
||||
default -> (double) coerceToLong(o);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Coerces a number to an Integer.
|
||||
*
|
||||
* <p>Will throw an exception if the object is not an integer.
|
||||
*
|
||||
* <p>Decimal values are not accepted.
|
||||
*/
|
||||
public static long coerceToLong(Object o) {
|
||||
return switch (o) {
|
||||
case Long x -> x;
|
||||
case Integer x -> x.longValue();
|
||||
case Short x -> x.longValue();
|
||||
case Byte x -> x.longValue();
|
||||
default -> throw new UnsupportedOperationException();
|
||||
};
|
||||
}
|
||||
|
||||
/** Returns true if the object is any supported number. */
|
||||
public static boolean isCoercibleToDouble(Object o) {
|
||||
return o instanceof Double
|
||||
|| o instanceof BigDecimal
|
||||
|| o instanceof Float
|
||||
|| isCoercibleToLong(o);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the object is any supported integer.
|
||||
*
|
||||
* <p>Returns false for decimals with 0 fractional part - the type itself must be an integer type.
|
||||
*/
|
||||
public static boolean isCoercibleToLong(Object o) {
|
||||
return o instanceof Long || o instanceof Integer || o instanceof Short || o instanceof Byte;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries converting the value to a Double.
|
||||
*
|
||||
* <p>It will return null if the object represented a non-numeric value.
|
||||
*/
|
||||
public static Double tryConvertingToDouble(Object o) {
|
||||
return switch (o) {
|
||||
case Double x -> x;
|
||||
case BigDecimal x -> x.doubleValue();
|
||||
case Float x -> x.doubleValue();
|
||||
case Long x -> x.doubleValue();
|
||||
case Integer x -> x.doubleValue();
|
||||
case Short x -> x.doubleValue();
|
||||
case Byte x -> x.doubleValue();
|
||||
case null, default -> null;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries converting the value to a Long.
|
||||
*
|
||||
* <p>Decimal number types are accepted, only if their fractional part is 0. It will return null
|
||||
* if the object represented a non-integer value.
|
||||
*/
|
||||
public static Long tryConvertingToLong(Object o) {
|
||||
return switch (o) {
|
||||
case Long x -> x;
|
||||
case Integer x -> x.longValue();
|
||||
case Short x -> x.longValue();
|
||||
case Byte x -> x.longValue();
|
||||
case Double x -> x % 1.0 == 0.0 ? x.longValue() : null;
|
||||
case Float x -> x % 1.0f == 0.0f ? x.longValue() : null;
|
||||
case BigDecimal x -> {
|
||||
try {
|
||||
yield x.longValueExact();
|
||||
} catch (ArithmeticException e) {
|
||||
yield null;
|
||||
}
|
||||
}
|
||||
case null, default -> null;
|
||||
};
|
||||
}
|
||||
}
|
@ -1,11 +1,14 @@
|
||||
package org.enso.base;
|
||||
|
||||
import org.graalvm.polyglot.Value;
|
||||
package org.enso.base.polyglot;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
import org.graalvm.polyglot.Value;
|
||||
|
||||
public class Polyglot_Utils {
|
||||
/**
|
||||
* Converts a polyglot Value ensuring that various date/time types are converted to the correct
|
||||
* type.
|
||||
*/
|
||||
public static Object convertPolyglotValue(Value item) {
|
||||
if (item.isDate()) {
|
||||
LocalDate d = item.asDate();
|
||||
@ -26,12 +29,14 @@ public class Polyglot_Utils {
|
||||
return item.as(Object.class);
|
||||
}
|
||||
|
||||
/** A helper functions for situations where we cannot use the Value conversion directly.
|
||||
* <p>
|
||||
* Mostly happens due to the issue: https://github.com/oracle/graal/issues/4967
|
||||
* Once that issue is resolved, we should probably remove this helper.
|
||||
* <p>
|
||||
* In that case we take a generic Object, knowing that the values of interest to us will be passed as Value anyway - so we can check that and fire the conversion if needed.
|
||||
/**
|
||||
* A helper functions for situations where we cannot use the Value conversion directly.
|
||||
*
|
||||
* <p>Mostly happens due to the issue: https://github.com/oracle/graal/issues/4967 Once that issue
|
||||
* is resolved, we should probably remove this helper.
|
||||
*
|
||||
* <p>In that case we take a generic Object, knowing that the values of interest to us will be
|
||||
* passed as Value anyway - so we can check that and fire the conversion if needed.
|
||||
*/
|
||||
public static Object convertPolyglotValue(Object item) {
|
||||
if (item instanceof Value v) {
|
@ -8,7 +8,7 @@ import org.enso.table.data.table.problems.UnquotedDelimiter;
|
||||
import java.util.List;
|
||||
|
||||
public class Concatenate extends Aggregator {
|
||||
private final Storage storage;
|
||||
private final Storage<?> storage;
|
||||
private final String separator;
|
||||
private final String prefix;
|
||||
private final String suffix;
|
||||
|
@ -15,7 +15,7 @@ import java.util.List;
|
||||
* does count when all items are null.
|
||||
*/
|
||||
public class CountDistinct extends Aggregator {
|
||||
private final Storage[] storage;
|
||||
private final Storage<?>[] storage;
|
||||
private final Comparator<Object> objectComparator;
|
||||
private final boolean ignoreAllNull;
|
||||
|
||||
|
@ -11,7 +11,7 @@ import java.util.List;
|
||||
* counts null or empty entries. If `isEmpty` is false, counts non-empty entries.
|
||||
*/
|
||||
public class CountEmpty extends Aggregator {
|
||||
private final Storage storage;
|
||||
private final Storage<?> storage;
|
||||
private final boolean isEmpty;
|
||||
|
||||
/**
|
||||
|
@ -10,7 +10,7 @@ import java.util.List;
|
||||
* counts null entries. If `isNothing` is false, counts non-null entries.
|
||||
*/
|
||||
public class CountNothing extends Aggregator {
|
||||
private final Storage storage;
|
||||
private final Storage<?> storage;
|
||||
private final boolean isNothing;
|
||||
|
||||
/**
|
||||
|
@ -10,8 +10,8 @@ import java.util.List;
|
||||
|
||||
/** Aggregate Column finding the first value in a group. */
|
||||
public class First extends Aggregator {
|
||||
private final Storage storage;
|
||||
private final Storage[] orderByColumns;
|
||||
private final Storage<?> storage;
|
||||
private final Storage<?>[] orderByColumns;
|
||||
private final int[] orderByDirections;
|
||||
private final Comparator<Object> objectComparator;
|
||||
private final boolean ignoreNothing;
|
||||
|
@ -7,7 +7,7 @@ import java.util.List;
|
||||
|
||||
/** Aggregate Column getting the grouping key. */
|
||||
public class GroupBy extends Aggregator {
|
||||
private final Storage storage;
|
||||
private final Storage<?> storage;
|
||||
|
||||
public GroupBy(String name, Column column) {
|
||||
super(name, column.getStorage().getType());
|
||||
|
@ -9,8 +9,8 @@ import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
public class Last extends Aggregator {
|
||||
private final Storage storage;
|
||||
private final Storage[] orderByColumns;
|
||||
private final Storage<?> storage;
|
||||
private final Storage<?>[] orderByColumns;
|
||||
private final int[] orderByDirections;
|
||||
private final Comparator<Object> objectComparator;
|
||||
private final boolean ignoreNothing;
|
||||
|
@ -18,7 +18,7 @@ public class Mean extends Aggregator {
|
||||
}
|
||||
}
|
||||
|
||||
private final Storage storage;
|
||||
private final Storage<?> storage;
|
||||
|
||||
public Mean(String name, Column column) {
|
||||
super(name, Storage.Type.DOUBLE);
|
||||
|
@ -11,7 +11,7 @@ import java.util.List;
|
||||
* Aggregate Column finding the minimum (minOrMax = -1) or maximum (minOrMax = 1) entry in a group.
|
||||
*/
|
||||
public class MinOrMax extends Aggregator {
|
||||
private final Storage storage;
|
||||
private final Storage<?> storage;
|
||||
private final int minOrMax;
|
||||
private final Comparator<Object> objectComparator;
|
||||
|
||||
|
@ -10,7 +10,7 @@ import java.util.Map;
|
||||
|
||||
/** Aggregate Column computing the most common value in a group (ignoring Nothing). */
|
||||
public class Mode extends Aggregator {
|
||||
private final Storage storage;
|
||||
private final Storage<?> storage;
|
||||
|
||||
public Mode(String name, Column column) {
|
||||
super(name, column.getStorage().getType());
|
||||
|
@ -11,7 +11,7 @@ import java.util.TreeMap;
|
||||
|
||||
/** Aggregate Column computing a percentile value in a group. */
|
||||
public class Percentile extends Aggregator {
|
||||
private final Storage storage;
|
||||
private final Storage<?> storage;
|
||||
private final double percentile;
|
||||
|
||||
public Percentile(String name, Column column, double percentile) {
|
||||
|
@ -9,7 +9,7 @@ import java.util.List;
|
||||
|
||||
/** Aggregate Column finding the longest or shortest string in a group. */
|
||||
public class ShortestOrLongest extends Aggregator {
|
||||
private final Storage storage;
|
||||
private final Storage<?> storage;
|
||||
private final int minOrMax;
|
||||
|
||||
public ShortestOrLongest(String name, Column column, int minOrMax) {
|
||||
|
@ -20,7 +20,7 @@ public class StandardDeviation extends Aggregator {
|
||||
}
|
||||
}
|
||||
|
||||
private final Storage storage;
|
||||
private final Storage<?> storage;
|
||||
private final boolean population;
|
||||
|
||||
public StandardDeviation(String name, Column column, boolean population) {
|
||||
|
@ -8,7 +8,7 @@ import java.util.List;
|
||||
|
||||
/** Aggregate Column computing the total value in a group. */
|
||||
public class Sum extends Aggregator {
|
||||
private final Storage storage;
|
||||
private final Storage<?> storage;
|
||||
|
||||
public Sum(String name, Column column) {
|
||||
super(name, Storage.Type.DOUBLE);
|
||||
|
@ -62,7 +62,7 @@ public class BoolBuilder extends TypedBuilder {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
public Storage<Boolean> seal() {
|
||||
return new BoolStorage(vals, isNa, size, false);
|
||||
}
|
||||
|
||||
|
@ -36,5 +36,5 @@ public abstract class Builder {
|
||||
public abstract int getCurrentSize();
|
||||
|
||||
/** @return a storage containing all the items appended so far */
|
||||
public abstract Storage seal();
|
||||
public abstract Storage<?> seal();
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ public class DateBuilder extends TypedBuilderImpl<LocalDate> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
public Storage<LocalDate> seal() {
|
||||
return new DateStorage(data, currentSize);
|
||||
}
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ public class DateTimeBuilder extends TypedBuilderImpl<ZonedDateTime> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
public Storage<ZonedDateTime> seal() {
|
||||
return new DateTimeStorage(data, currentSize);
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
package org.enso.table.data.column.builder.object;
|
||||
|
||||
import org.enso.base.polyglot.NumericConverter;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
@ -81,10 +82,10 @@ public class InferredBuilder extends Builder {
|
||||
int initialCapacity = Math.max(initialSize, currentSize);
|
||||
if (o instanceof Boolean) {
|
||||
currentBuilder = new BoolBuilder();
|
||||
} else if (o instanceof Double || o instanceof BigDecimal) {
|
||||
currentBuilder = NumericBuilder.createDoubleBuilder(initialCapacity);
|
||||
} else if (o instanceof Long) {
|
||||
} else if (NumericConverter.isCoercibleToLong(o)) {
|
||||
currentBuilder = NumericBuilder.createLongBuilder(initialCapacity);
|
||||
} else if (NumericConverter.isCoercibleToDouble(o)) {
|
||||
currentBuilder = NumericBuilder.createDoubleBuilder(initialCapacity);
|
||||
} else if (o instanceof LocalDate) {
|
||||
currentBuilder = new DateBuilder(initialCapacity);
|
||||
} else if (o instanceof LocalTime) {
|
||||
@ -106,11 +107,15 @@ public class InferredBuilder extends Builder {
|
||||
new RetypeInfo(Boolean.class, Storage.Type.BOOL),
|
||||
new RetypeInfo(Long.class, Storage.Type.LONG),
|
||||
new RetypeInfo(Double.class, Storage.Type.DOUBLE),
|
||||
new RetypeInfo(String.class, Storage.Type.STRING),
|
||||
new RetypeInfo(BigDecimal.class, Storage.Type.DOUBLE),
|
||||
new RetypeInfo(LocalDate.class, Storage.Type.DATE),
|
||||
new RetypeInfo(LocalTime.class, Storage.Type.TIME_OF_DAY),
|
||||
new RetypeInfo(ZonedDateTime.class, Storage.Type.DATE_TIME),
|
||||
new RetypeInfo(String.class, Storage.Type.STRING));
|
||||
new RetypeInfo(Float.class, Storage.Type.DOUBLE),
|
||||
new RetypeInfo(Integer.class, Storage.Type.LONG),
|
||||
new RetypeInfo(Short.class, Storage.Type.LONG),
|
||||
new RetypeInfo(Byte.class, Storage.Type.LONG));
|
||||
|
||||
private void retypeAndAppend(Object o) {
|
||||
for (RetypeInfo info : retypePairs) {
|
||||
@ -138,7 +143,7 @@ public class InferredBuilder extends Builder {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
public Storage<?> seal() {
|
||||
if (currentBuilder == null) {
|
||||
initBuilderFor(null);
|
||||
}
|
||||
|
@ -1,10 +1,10 @@
|
||||
package org.enso.table.data.column.builder.object;
|
||||
|
||||
import org.enso.base.polyglot.NumericConverter;
|
||||
import org.enso.table.data.column.storage.DoubleStorage;
|
||||
import org.enso.table.data.column.storage.LongStorage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.Arrays;
|
||||
import java.util.BitSet;
|
||||
|
||||
@ -69,37 +69,20 @@ public class NumericBuilder extends TypedBuilder {
|
||||
if (o == null) {
|
||||
isMissing.set(currentSize++);
|
||||
} else if (isDouble) {
|
||||
double value = toDouble(o);
|
||||
double value = NumericConverter.coerceToDouble(o);
|
||||
data[currentSize++] = Double.doubleToRawLongBits(value);
|
||||
} else {
|
||||
data[currentSize++] = toLong(o);
|
||||
data[currentSize++] = NumericConverter.coerceToLong(o);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean accepts(Object o) {
|
||||
if (isDouble && (o instanceof Double || o instanceof BigDecimal)) {
|
||||
return true;
|
||||
if (isDouble) {
|
||||
return NumericConverter.isCoercibleToDouble(o);
|
||||
} else {
|
||||
return NumericConverter.isCoercibleToLong(o);
|
||||
}
|
||||
|
||||
return o instanceof Long || o instanceof Integer || o instanceof Byte;
|
||||
}
|
||||
|
||||
private static double toDouble(Object o) {
|
||||
return switch (o) {
|
||||
case Double x -> x;
|
||||
case BigDecimal x -> x.doubleValue();
|
||||
default -> (double) toLong(o);
|
||||
};
|
||||
}
|
||||
|
||||
private static long toLong(Object o) {
|
||||
return switch (o) {
|
||||
case Long x -> x;
|
||||
case Integer x -> x.longValue();
|
||||
case Byte x -> x.longValue();
|
||||
default -> throw new UnsupportedOperationException();
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -159,7 +142,7 @@ public class NumericBuilder extends TypedBuilder {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
public Storage<?> seal() {
|
||||
if (isDouble) {
|
||||
return new DoubleStorage(data, currentSize, isMissing);
|
||||
} else {
|
||||
|
@ -67,7 +67,7 @@ public class ObjectBuilder extends TypedBuilder {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
public Storage<Object> seal() {
|
||||
return new ObjectStorage(data, currentSize);
|
||||
}
|
||||
|
||||
|
@ -30,7 +30,7 @@ public class StringBuilder extends TypedBuilderImpl<String> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
public Storage<String> seal() {
|
||||
return new StringStorage(data, currentSize);
|
||||
}
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ public class TimeOfDayBuilder extends TypedBuilderImpl<LocalTime> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
public Storage<LocalTime> seal() {
|
||||
return new TimeOfDayStorage(data, currentSize);
|
||||
}
|
||||
}
|
||||
|
@ -109,7 +109,7 @@ public class PrimInferredStorageBuilder extends StorageBuilder {
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public Storage seal() {
|
||||
public Storage<?> seal() {
|
||||
if (type == Type.LONG) {
|
||||
return new LongStorage(data, size, isMissing);
|
||||
} else {
|
||||
|
@ -19,5 +19,5 @@ public abstract class StorageBuilder {
|
||||
*
|
||||
* @return the storage resulting from this builder's operation.
|
||||
*/
|
||||
public abstract Storage seal();
|
||||
public abstract Storage<?> seal();
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
package org.enso.table.data.column.builder.string;
|
||||
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.data.column.storage.StringStorage;
|
||||
|
||||
/** A column builder appending all the values passed to it in an unchanged form. */
|
||||
@ -44,7 +45,7 @@ public class StringStorageBuilder extends StorageBuilder {
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
public StringStorage seal() {
|
||||
public Storage<String> seal() {
|
||||
return new StringStorage(data, size);
|
||||
}
|
||||
}
|
||||
|
@ -24,5 +24,5 @@ public abstract class Aggregator {
|
||||
*
|
||||
* @return the storage containing all aggregation results.
|
||||
*/
|
||||
public abstract Storage seal();
|
||||
public abstract Storage<?> seal();
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ import java.util.stream.IntStream;
|
||||
|
||||
/** Aggregates a storage by counting the non-missing values in each group. */
|
||||
public class CountAggregator extends Aggregator {
|
||||
private final Storage storage;
|
||||
private final Storage<?> storage;
|
||||
private final long[] counts;
|
||||
private int position = 0;
|
||||
|
||||
@ -16,7 +16,7 @@ public class CountAggregator extends Aggregator {
|
||||
* @param resultSize the exact number of times {@link Aggregator#nextGroup(IntStream)} will be
|
||||
* called.
|
||||
*/
|
||||
public CountAggregator(Storage storage, int resultSize) {
|
||||
public CountAggregator(Storage<?> storage, int resultSize) {
|
||||
this.storage = storage;
|
||||
this.counts = new long[resultSize];
|
||||
}
|
||||
@ -27,7 +27,7 @@ public class CountAggregator extends Aggregator {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
public Storage<Long> seal() {
|
||||
return new LongStorage(counts);
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package org.enso.table.data.column.operation.aggregate;
|
||||
|
||||
import org.enso.base.Polyglot_Utils;
|
||||
import org.enso.base.polyglot.Polyglot_Utils;
|
||||
import org.enso.table.data.column.builder.object.InferredBuilder;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.graalvm.polyglot.Value;
|
||||
@ -16,7 +16,7 @@ import java.util.stream.Stream;
|
||||
public class FunctionAggregator extends Aggregator {
|
||||
private final Function<List<Object>, Value> aggregateFunction;
|
||||
private final boolean skipNa;
|
||||
private final Storage storage;
|
||||
private final Storage<?> storage;
|
||||
private final InferredBuilder builder;
|
||||
|
||||
/**
|
||||
@ -27,7 +27,7 @@ public class FunctionAggregator extends Aggregator {
|
||||
*/
|
||||
public FunctionAggregator(
|
||||
Function<List<Object>, Value> aggregateFunction,
|
||||
Storage storage,
|
||||
Storage<?> storage,
|
||||
boolean skipNa,
|
||||
int resultSize) {
|
||||
this.aggregateFunction = aggregateFunction;
|
||||
@ -53,7 +53,7 @@ public class FunctionAggregator extends Aggregator {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
public Storage<?> seal() {
|
||||
return builder.seal();
|
||||
}
|
||||
}
|
||||
|
@ -53,7 +53,7 @@ public abstract class LongToLongAggregator extends Aggregator {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
public Storage<Long> seal() {
|
||||
return new LongStorage(items, items.length, missing);
|
||||
}
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ import java.util.stream.IntStream;
|
||||
* DoubleStorage}.
|
||||
*/
|
||||
public abstract class NumericAggregator extends Aggregator {
|
||||
private final NumericStorage storage;
|
||||
private final NumericStorage<?> storage;
|
||||
private final long[] data;
|
||||
private final BitSet missing;
|
||||
private int position = 0;
|
||||
@ -24,7 +24,7 @@ public abstract class NumericAggregator extends Aggregator {
|
||||
* @param storage the data source
|
||||
* @param resultSize the number of times {@link Aggregator#nextGroup(IntStream)} will be called
|
||||
*/
|
||||
public NumericAggregator(NumericStorage storage, int resultSize) {
|
||||
public NumericAggregator(NumericStorage<?> storage, int resultSize) {
|
||||
this.storage = storage;
|
||||
this.data = new long[resultSize];
|
||||
this.missing = new BitSet();
|
||||
@ -72,7 +72,7 @@ public abstract class NumericAggregator extends Aggregator {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage seal() {
|
||||
public Storage<Double> seal() {
|
||||
return new DoubleStorage(data, data.length, missing);
|
||||
}
|
||||
}
|
||||
|
@ -8,12 +8,13 @@ import java.util.Map;
|
||||
/**
|
||||
* Stores map-like operations that can be performed on a given type.
|
||||
*
|
||||
* @param <T> the storage type handled by these operations.
|
||||
* @param <T> the type of elements stored in the storage
|
||||
* @param <S> the storage type handled by these operations.
|
||||
*/
|
||||
public class MapOpStorage<T extends Storage> {
|
||||
private final Map<String, MapOperation<T>> ops = new HashMap<>();
|
||||
public class MapOpStorage<T, S extends Storage<? super T>> {
|
||||
private final Map<String, MapOperation<T, S>> ops = new HashMap<>();
|
||||
|
||||
protected MapOperation<? super T> getOp(String name) {
|
||||
protected MapOperation<? super T, ? super S> getOp(String name) {
|
||||
return ops.get(name);
|
||||
}
|
||||
|
||||
@ -36,7 +37,7 @@ public class MapOpStorage<T extends Storage> {
|
||||
* @param arg the argument to pass to the operation
|
||||
* @return the result of running the operation
|
||||
*/
|
||||
public Storage runMap(String n, T storage, Object arg) {
|
||||
public Storage<?> runMap(String n, S storage, Object arg) {
|
||||
return ops.get(n).runMap(storage, arg);
|
||||
}
|
||||
|
||||
@ -49,7 +50,7 @@ public class MapOpStorage<T extends Storage> {
|
||||
* @param arg the storage containing operation arguments
|
||||
* @return the result of running the operation
|
||||
*/
|
||||
public Storage runZip(String n, T storage, Storage arg) {
|
||||
public Storage<?> runZip(String n, S storage, Storage<?> arg) {
|
||||
return ops.get(n).runZip(storage, arg);
|
||||
}
|
||||
|
||||
@ -59,7 +60,7 @@ public class MapOpStorage<T extends Storage> {
|
||||
* @param op the operation to add
|
||||
* @return this operation set
|
||||
*/
|
||||
public MapOpStorage<T> add(MapOperation<T> op) {
|
||||
public MapOpStorage<T, S> add(MapOperation<T, S> op) {
|
||||
ops.put(op.getName(), op);
|
||||
return this;
|
||||
}
|
||||
@ -68,23 +69,23 @@ public class MapOpStorage<T extends Storage> {
|
||||
* Creates a child set, containing all the operations defined in this, that can be extended
|
||||
* independently.
|
||||
*
|
||||
* @param <S> the desired result type
|
||||
* @param <U> the desired result type
|
||||
* @return a child of this storage
|
||||
*/
|
||||
public <S extends T> MapOpStorage<S> makeChild() {
|
||||
public <U extends T> MapOpStorage<U, S> makeChild() {
|
||||
return new ChildStorage<>(this);
|
||||
}
|
||||
|
||||
private static class ChildStorage<T extends Storage> extends MapOpStorage<T> {
|
||||
private final MapOpStorage<? super T> parent;
|
||||
private static class ChildStorage<T, S extends Storage<? super T>> extends MapOpStorage<T, S> {
|
||||
private final MapOpStorage<? super T, ? super S> parent;
|
||||
|
||||
private ChildStorage(MapOpStorage<? super T> parent) {
|
||||
private ChildStorage(MapOpStorage<? super T, ? super S> parent) {
|
||||
this.parent = parent;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected MapOperation<? super T> getOp(String name) {
|
||||
MapOperation<? super T> local = super.getOp(name);
|
||||
protected MapOperation<? super T, ? super S> getOp(String name) {
|
||||
MapOperation<? super T, ? super S> local = super.getOp(name);
|
||||
if (local == null) return parent.getOp(name);
|
||||
return local;
|
||||
}
|
||||
@ -95,12 +96,12 @@ public class MapOpStorage<T extends Storage> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runMap(String n, T storage, Object arg) {
|
||||
public Storage<?> runMap(String n, S storage, Object arg) {
|
||||
return getOp(n).runMap(storage, arg);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runZip(String n, T storage, Storage arg) {
|
||||
public Storage<?> runZip(String n, S storage, Storage<?> arg) {
|
||||
return getOp(n).runZip(storage, arg);
|
||||
}
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ import org.enso.table.data.column.storage.Storage;
|
||||
*
|
||||
* @param <I> the supported storage type.
|
||||
*/
|
||||
public abstract class MapOperation<I extends Storage> {
|
||||
public abstract class MapOperation<T, I extends Storage<? super T>> {
|
||||
private final String name;
|
||||
|
||||
/**
|
||||
@ -26,7 +26,7 @@ public abstract class MapOperation<I extends Storage> {
|
||||
* @param arg the argument passed to the operation
|
||||
* @return the result of running the operation
|
||||
*/
|
||||
public abstract Storage runMap(I storage, Object arg);
|
||||
public abstract Storage<?> runMap(I storage, Object arg);
|
||||
|
||||
/**
|
||||
* Run the operation in zip mode
|
||||
@ -35,7 +35,7 @@ public abstract class MapOperation<I extends Storage> {
|
||||
* @param arg the storage providing second arguments to the operation
|
||||
* @return the result of running the operation
|
||||
*/
|
||||
public abstract Storage runZip(I storage, Storage arg);
|
||||
public abstract Storage<?> runZip(I storage, Storage<?> arg);
|
||||
|
||||
/** @return the name of this operation */
|
||||
public String getName() {
|
||||
|
@ -0,0 +1,99 @@
|
||||
package org.enso.table.data.column.operation.map;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
import org.enso.base.polyglot.Polyglot_Utils;
|
||||
import org.enso.table.data.column.storage.BoolStorage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
/**
|
||||
* A specialized implementation for the IS_IN operation for builtin types, relying on hashing. Since
|
||||
* for some columns we know what types of objects can be stored, we can filter out any objects that
|
||||
* do not match that type and then rely on a consistent definition of hashcode for these builtin
|
||||
* types (which is not available in general for custom objects).
|
||||
*/
|
||||
public class SpecializedIsInOp<T, S extends Storage<T>> extends MapOperation<T, S> {
|
||||
/**
|
||||
* An optimized representation of the vector of values to match.
|
||||
*
|
||||
* <p>It indicates whether the vector contained a null value and contains a hashmap of the vector
|
||||
* elements for faster contains checks.
|
||||
*/
|
||||
public record CompactRepresentation<T>(HashSet<T> coercedValues, boolean hasNulls) {}
|
||||
|
||||
private final Function<List<?>, CompactRepresentation<T>> prepareList;
|
||||
|
||||
/**
|
||||
* Creates a new operation with a given preprocessing function.
|
||||
*
|
||||
* <p>The responsibility of the function is to analyse the list and create a hashmap of relevant
|
||||
* elements, coerced to a type that is consistent with the storage type of the given column. Any
|
||||
* elements not fitting the expected type can (and should) be discarded.
|
||||
*
|
||||
* <p>It is important to correctly coerce the types, for example in Enso 2 == 2.0, so if we are
|
||||
* getting a Long for a DoubleColumn, it should be converted to a Double before adding it to the
|
||||
* hashmap. Similarly, for LongStorage, non-integer Doubles can be ignored, but Doubles with 0
|
||||
* fractional part need to be converted into a Long. These conversions can be achieved with the
|
||||
* {@code NumericConverter} class.
|
||||
*/
|
||||
public static <T, S extends Storage<T>> SpecializedIsInOp<T, S> make(
|
||||
Function<List<?>, CompactRepresentation<T>> prepareList) {
|
||||
return new SpecializedIsInOp<>(prepareList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new operation which ensures the Enso Date/Time types are correctly coerced.
|
||||
*
|
||||
* <p>It uses the provided {@code storageClass} to only keep the elements that are of the same
|
||||
* type as expected in the storage.
|
||||
*/
|
||||
public static <T, S extends Storage<T>> SpecializedIsInOp<T, S> makeForTimeColumns(Class<T> storageClass) {
|
||||
return SpecializedIsInOp.make(
|
||||
list -> {
|
||||
HashSet<T> set = new HashSet<>();
|
||||
boolean hasNulls = false;
|
||||
for (Object o : list) {
|
||||
hasNulls |= o == null;
|
||||
Object coerced = Polyglot_Utils.convertPolyglotValue(o);
|
||||
if (storageClass.isInstance(coerced)) {
|
||||
set.add(storageClass.cast(coerced));
|
||||
}
|
||||
}
|
||||
return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls);
|
||||
});
|
||||
}
|
||||
|
||||
SpecializedIsInOp(Function<List<?>, CompactRepresentation<T>> prepareList) {
|
||||
super(Storage.Maps.IS_IN);
|
||||
this.prepareList = prepareList;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage<?> runMap(S storage, Object arg) {
|
||||
if (arg instanceof List) {
|
||||
return runMap(storage, (List<?>) arg);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Argument to `is_in` must be a vector.");
|
||||
}
|
||||
}
|
||||
|
||||
public Storage<?> runMap(S storage, List<?> arg) {
|
||||
CompactRepresentation<T> compactRepresentation = prepareList.apply(arg);
|
||||
BitSet newVals = new BitSet();
|
||||
for (int i = 0; i < storage.size(); i++) {
|
||||
if (storage.isNa(i) && compactRepresentation.hasNulls) {
|
||||
newVals.set(i);
|
||||
} else if (compactRepresentation.coercedValues.contains(storage.getItemBoxed(i))) {
|
||||
newVals.set(i);
|
||||
}
|
||||
}
|
||||
return new BoolStorage(newVals, new BitSet(), storage.size(), false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage<?> runZip(S storage, Storage<?> arg) {
|
||||
throw new IllegalStateException("Zip mode is not supported for this operation.");
|
||||
}
|
||||
}
|
@ -7,20 +7,20 @@ import org.enso.table.data.column.storage.Storage;
|
||||
*
|
||||
* @param <I> the supported storage type
|
||||
*/
|
||||
public abstract class UnaryMapOperation<I extends Storage> extends MapOperation<I> {
|
||||
public abstract class UnaryMapOperation<T, I extends Storage<T>> extends MapOperation<T, I> {
|
||||
public UnaryMapOperation(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
protected abstract Storage run(I storage);
|
||||
protected abstract Storage<?> run(I storage);
|
||||
|
||||
@Override
|
||||
public Storage runMap(I storage, Object arg) {
|
||||
public Storage<?> runMap(I storage, Object arg) {
|
||||
return run(storage);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runZip(I storage, Storage arg) {
|
||||
public Storage<?> runZip(I storage, Storage<?> arg) {
|
||||
return run(storage);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,89 @@
|
||||
package org.enso.table.data.column.operation.map.bool;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
|
||||
import org.enso.table.data.column.operation.map.MapOperation;
|
||||
import org.enso.table.data.column.storage.BoolStorage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
/**
|
||||
* A specialized implementation for the IS_IN operation on booleans - since booleans have just three
|
||||
* possible values we can have a highly efficient implementation that does not even rely on hashmap
|
||||
* and after processing the input vector, performs the checks in constant time.
|
||||
*/
|
||||
public class BooleanIsInOp extends MapOperation<Boolean, BoolStorage> {
|
||||
public BooleanIsInOp() {
|
||||
super(Storage.Maps.IS_IN);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BoolStorage runMap(BoolStorage storage, Object arg) {
|
||||
if (arg instanceof List) {
|
||||
return runMap(storage, (List<?>) arg);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Argument to `is_in` must be a vector.");
|
||||
}
|
||||
}
|
||||
|
||||
public BoolStorage runMap(BoolStorage storage, List<?> arg) {
|
||||
boolean hadTrue = false;
|
||||
boolean hadFalse = false;
|
||||
boolean hadNull = false;
|
||||
|
||||
for (Object o : arg) {
|
||||
switch (o) {
|
||||
case Boolean b -> {
|
||||
hadTrue |= b;
|
||||
hadFalse |= !b;
|
||||
}
|
||||
case null -> hadNull = true;
|
||||
default -> {}
|
||||
}
|
||||
}
|
||||
|
||||
BitSet newVals;
|
||||
boolean negated = false;
|
||||
|
||||
if (hadNull && hadTrue && hadFalse) {
|
||||
// We use empty newVals which has everything set to false and negate it to make all of that set to true with zero cost.
|
||||
newVals = new BitSet();
|
||||
negated = true;
|
||||
} else if (!hadNull && !hadTrue && !hadFalse) {
|
||||
// No values are present, so the result is to be false everywhere.
|
||||
newVals = new BitSet();
|
||||
}
|
||||
else if (hadNull && !hadTrue && !hadFalse) {
|
||||
// Only missing values are in the set, so we just return the missing indicator.
|
||||
newVals = storage.getIsMissing();
|
||||
} else if (hadTrue && hadFalse) { // && !hadNull
|
||||
// All non-missing values are in the set - so we just return the negated missing indicator.
|
||||
newVals = storage.getIsMissing();
|
||||
negated = true;
|
||||
} else {
|
||||
// hadTrue != hadFalse
|
||||
newVals = storage.getValues().get(0, storage.size());
|
||||
if (hadTrue) {
|
||||
if (storage.isNegated()) {
|
||||
newVals.flip(0, storage.size());
|
||||
}
|
||||
} else { // hadFalse
|
||||
if (!storage.isNegated()) {
|
||||
newVals.flip(0, storage.size());
|
||||
}
|
||||
}
|
||||
newVals.andNot(storage.getIsMissing());
|
||||
|
||||
if (hadNull) {
|
||||
newVals.or(storage.getIsMissing());
|
||||
}
|
||||
}
|
||||
|
||||
return new BoolStorage(newVals, new BitSet(), storage.size(), negated);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage<?> runZip(BoolStorage storage, Storage<?> arg) {
|
||||
throw new IllegalStateException("Zip mode is not supported for this operation.");
|
||||
}
|
||||
}
|
@ -10,7 +10,7 @@ import org.enso.table.error.UnexpectedTypeException;
|
||||
import java.util.BitSet;
|
||||
|
||||
/** An operation expecting a numeric argument and returning a boolean. */
|
||||
public abstract class DoubleBooleanOp extends MapOperation<DoubleStorage> {
|
||||
public abstract class DoubleBooleanOp extends MapOperation<Double, DoubleStorage> {
|
||||
public DoubleBooleanOp(String name) {
|
||||
super(name);
|
||||
}
|
||||
@ -59,7 +59,7 @@ public abstract class DoubleBooleanOp extends MapOperation<DoubleStorage> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runZip(DoubleStorage storage, Storage arg) {
|
||||
public BoolStorage runZip(DoubleStorage storage, Storage<?> arg) {
|
||||
if (arg instanceof DoubleStorage v) {
|
||||
BitSet newVals = new BitSet();
|
||||
BitSet newMissing = new BitSet();
|
||||
|
@ -9,7 +9,7 @@ import org.enso.table.error.UnexpectedTypeException;
|
||||
import java.util.BitSet;
|
||||
|
||||
/** An operation expecting a numeric argument and returning a number. */
|
||||
public abstract class DoubleNumericOp extends MapOperation<DoubleStorage> {
|
||||
public abstract class DoubleNumericOp extends MapOperation<Double, DoubleStorage> {
|
||||
|
||||
public DoubleNumericOp(String name) {
|
||||
super(name);
|
||||
@ -18,7 +18,7 @@ public abstract class DoubleNumericOp extends MapOperation<DoubleStorage> {
|
||||
protected abstract double doDouble(double a, double b);
|
||||
|
||||
@Override
|
||||
public Storage runMap(DoubleStorage storage, Object arg) {
|
||||
public Storage<Double> runMap(DoubleStorage storage, Object arg) {
|
||||
double x;
|
||||
if (arg instanceof Double) {
|
||||
x = (Double) arg;
|
||||
@ -37,7 +37,7 @@ public abstract class DoubleNumericOp extends MapOperation<DoubleStorage> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runZip(DoubleStorage storage, Storage arg) {
|
||||
public Storage<Double> runZip(DoubleStorage storage, Storage<?> arg) {
|
||||
if (arg instanceof LongStorage v) {
|
||||
long[] out = new long[storage.size()];
|
||||
BitSet newMissing = new BitSet();
|
||||
|
@ -10,7 +10,7 @@ import org.enso.table.error.UnexpectedTypeException;
|
||||
import java.util.BitSet;
|
||||
|
||||
/** An operation expecting a numeric argument and returning a boolean. */
|
||||
public abstract class LongBooleanOp extends MapOperation<LongStorage> {
|
||||
public abstract class LongBooleanOp extends MapOperation<Long, LongStorage> {
|
||||
public LongBooleanOp(String name) {
|
||||
super(name);
|
||||
}
|
||||
@ -61,7 +61,7 @@ public abstract class LongBooleanOp extends MapOperation<LongStorage> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runZip(LongStorage storage, Storage arg) {
|
||||
public BoolStorage runZip(LongStorage storage, Storage<?> arg) {
|
||||
if (arg instanceof DoubleStorage v) {
|
||||
BitSet newVals = new BitSet();
|
||||
BitSet newMissing = new BitSet();
|
||||
|
@ -3,13 +3,14 @@ package org.enso.table.data.column.operation.map.numeric;
|
||||
import org.enso.table.data.column.operation.map.MapOperation;
|
||||
import org.enso.table.data.column.storage.DoubleStorage;
|
||||
import org.enso.table.data.column.storage.LongStorage;
|
||||
import org.enso.table.data.column.storage.NumericStorage;
|
||||
import org.enso.table.data.column.storage.Storage;
|
||||
import org.enso.table.error.UnexpectedTypeException;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/** An operation expecting a numeric argument and returning a boolean. */
|
||||
public abstract class LongNumericOp extends MapOperation<LongStorage> {
|
||||
public abstract class LongNumericOp extends MapOperation<Long, LongStorage> {
|
||||
private final boolean alwaysCast;
|
||||
|
||||
public LongNumericOp(String name, boolean alwaysCast) {
|
||||
@ -26,7 +27,7 @@ public abstract class LongNumericOp extends MapOperation<LongStorage> {
|
||||
public abstract long doLong(long in, long arg);
|
||||
|
||||
@Override
|
||||
public Storage runMap(LongStorage storage, Object arg) {
|
||||
public NumericStorage<?> runMap(LongStorage storage, Object arg) {
|
||||
if (arg instanceof Long && !alwaysCast) {
|
||||
long x = (Long) arg;
|
||||
long[] newVals = new long[storage.size()];
|
||||
@ -50,7 +51,7 @@ public abstract class LongNumericOp extends MapOperation<LongStorage> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runZip(LongStorage storage, Storage arg) {
|
||||
public NumericStorage<?> runZip(LongStorage storage, Storage<?> arg) {
|
||||
if (arg instanceof LongStorage v) {
|
||||
long[] out = new long[storage.size()];
|
||||
BitSet newMissing = new BitSet();
|
||||
|
@ -35,7 +35,7 @@ public class LikeOp extends StringBooleanOp {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runMap(SpecializedStorage<String> storage, Object arg) {
|
||||
public BoolStorage runMap(SpecializedStorage<String> storage, Object arg) {
|
||||
if (arg == null) {
|
||||
BitSet newVals = new BitSet();
|
||||
BitSet newMissing = new BitSet();
|
||||
|
@ -9,7 +9,7 @@ import org.enso.table.error.UnexpectedTypeException;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
public abstract class StringBooleanOp extends MapOperation<SpecializedStorage<String>> {
|
||||
public abstract class StringBooleanOp extends MapOperation<String, SpecializedStorage<String>> {
|
||||
public StringBooleanOp(String name) {
|
||||
super(name);
|
||||
}
|
||||
@ -21,7 +21,7 @@ public abstract class StringBooleanOp extends MapOperation<SpecializedStorage<St
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runMap(SpecializedStorage<String> storage, Object arg) {
|
||||
public BoolStorage runMap(SpecializedStorage<String> storage, Object arg) {
|
||||
if (arg == null) {
|
||||
BitSet newVals = new BitSet();
|
||||
BitSet newMissing = new BitSet();
|
||||
@ -53,7 +53,7 @@ public abstract class StringBooleanOp extends MapOperation<SpecializedStorage<St
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runZip(SpecializedStorage<String> storage, Storage arg) {
|
||||
public BoolStorage runZip(SpecializedStorage<String> storage, Storage<?> arg) {
|
||||
if (arg instanceof StringStorage v) {
|
||||
BitSet newVals = new BitSet();
|
||||
BitSet newMissing = new BitSet();
|
||||
|
@ -1,8 +1,11 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
import org.enso.table.data.column.operation.map.MapOpStorage;
|
||||
import org.enso.table.data.column.operation.map.MapOperation;
|
||||
import org.enso.table.data.column.operation.map.UnaryMapOperation;
|
||||
import org.enso.table.data.column.operation.map.bool.BooleanIsInOp;
|
||||
import org.enso.table.data.index.Index;
|
||||
import org.enso.table.data.mask.OrderMask;
|
||||
import org.enso.table.data.mask.SliceRange;
|
||||
@ -10,12 +13,9 @@ import org.enso.table.error.UnexpectedColumnTypeException;
|
||||
import org.enso.table.error.UnexpectedTypeException;
|
||||
import org.graalvm.polyglot.Value;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
|
||||
/** A boolean column storage. */
|
||||
public class BoolStorage extends Storage {
|
||||
private static final MapOpStorage<BoolStorage> ops = buildOps();
|
||||
public final class BoolStorage extends Storage<Boolean> {
|
||||
private static final MapOpStorage<Boolean, BoolStorage> ops = buildOps();
|
||||
private final BitSet values;
|
||||
private final BitSet isMissing;
|
||||
private final int size;
|
||||
@ -33,7 +33,9 @@ public class BoolStorage extends Storage {
|
||||
return size;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
/**
|
||||
* @inheritDoc
|
||||
*/
|
||||
@Override
|
||||
public int countMissing() {
|
||||
return isMissing.cardinality();
|
||||
@ -45,10 +47,9 @@ public class BoolStorage extends Storage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getItemBoxed(int idx) {
|
||||
public Boolean getItemBoxed(int idx) {
|
||||
return isMissing.get(idx) ? null : getItem(idx);
|
||||
}
|
||||
|
||||
public boolean getItem(long idx) {
|
||||
return negated != values.get((int) idx);
|
||||
}
|
||||
@ -64,12 +65,12 @@ public class BoolStorage extends Storage {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Storage runVectorizedMap(String name, Object argument) {
|
||||
protected Storage<?> runVectorizedMap(String name, Object argument) {
|
||||
return ops.runMap(name, this, argument);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Storage runVectorizedZip(String name, Storage argument) {
|
||||
protected Storage<?> runVectorizedZip(String name, Storage<?> argument) {
|
||||
return ops.runZip(name, this, argument);
|
||||
}
|
||||
|
||||
@ -99,7 +100,7 @@ public class BoolStorage extends Storage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage fillMissing(Value arg) {
|
||||
public Storage<?> fillMissing(Value arg) {
|
||||
if (arg.isBoolean()) {
|
||||
return fillMissingBoolean(arg.asBoolean());
|
||||
} else {
|
||||
@ -108,7 +109,7 @@ public class BoolStorage extends Storage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage mask(BitSet mask, int cardinality) {
|
||||
public BoolStorage mask(BitSet mask, int cardinality) {
|
||||
BitSet newMissing = new BitSet();
|
||||
BitSet newValues = new BitSet();
|
||||
int resultIx = 0;
|
||||
@ -118,6 +119,10 @@ public class BoolStorage extends Storage {
|
||||
newMissing.set(resultIx++);
|
||||
} else if (values.get(i)) {
|
||||
newValues.set(resultIx++);
|
||||
} else {
|
||||
// We don't set any bits, but still increment the counter to indicate that we have just
|
||||
// 'inserted' a false value.
|
||||
resultIx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -125,7 +130,7 @@ public class BoolStorage extends Storage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage applyMask(OrderMask mask) {
|
||||
public BoolStorage applyMask(OrderMask mask) {
|
||||
int[] positions = mask.getPositions();
|
||||
BitSet newNa = new BitSet();
|
||||
BitSet newVals = new BitSet();
|
||||
@ -140,7 +145,7 @@ public class BoolStorage extends Storage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage countMask(int[] counts, int total) {
|
||||
public BoolStorage countMask(int[] counts, int total) {
|
||||
BitSet newNa = new BitSet();
|
||||
BitSet newVals = new BitSet();
|
||||
int pos = 0;
|
||||
@ -159,12 +164,12 @@ public class BoolStorage extends Storage {
|
||||
return negated;
|
||||
}
|
||||
|
||||
private static MapOpStorage<BoolStorage> buildOps() {
|
||||
MapOpStorage<BoolStorage> ops = new MapOpStorage<>();
|
||||
private static MapOpStorage<Boolean, BoolStorage> buildOps() {
|
||||
MapOpStorage<Boolean, BoolStorage> ops = new MapOpStorage<>();
|
||||
ops.add(
|
||||
new UnaryMapOperation<>(Maps.NOT) {
|
||||
@Override
|
||||
protected Storage run(BoolStorage storage) {
|
||||
protected BoolStorage run(BoolStorage storage) {
|
||||
return new BoolStorage(
|
||||
storage.values, storage.isMissing, storage.size, !storage.negated);
|
||||
}
|
||||
@ -172,9 +177,9 @@ public class BoolStorage extends Storage {
|
||||
.add(
|
||||
new MapOperation<>(Maps.EQ) {
|
||||
@Override
|
||||
public Storage runMap(BoolStorage storage, Object arg) {
|
||||
if (arg instanceof Boolean) {
|
||||
if ((Boolean) arg) {
|
||||
public BoolStorage runMap(BoolStorage storage, Object arg) {
|
||||
if (arg instanceof Boolean v) {
|
||||
if (v) {
|
||||
return storage;
|
||||
} else {
|
||||
return new BoolStorage(
|
||||
@ -186,7 +191,7 @@ public class BoolStorage extends Storage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runZip(BoolStorage storage, Storage arg) {
|
||||
public BoolStorage runZip(BoolStorage storage, Storage<?> arg) {
|
||||
BitSet out = new BitSet();
|
||||
BitSet missing = new BitSet();
|
||||
for (int i = 0; i < storage.size; i++) {
|
||||
@ -204,9 +209,8 @@ public class BoolStorage extends Storage {
|
||||
.add(
|
||||
new MapOperation<>(Maps.AND) {
|
||||
@Override
|
||||
public Storage runMap(BoolStorage storage, Object arg) {
|
||||
if (arg instanceof Boolean) {
|
||||
boolean v = (Boolean) arg;
|
||||
public BoolStorage runMap(BoolStorage storage, Object arg) {
|
||||
if (arg instanceof Boolean v) {
|
||||
if (v) {
|
||||
return storage;
|
||||
} else {
|
||||
@ -218,7 +222,7 @@ public class BoolStorage extends Storage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runZip(BoolStorage storage, Storage arg) {
|
||||
public BoolStorage runZip(BoolStorage storage, Storage<?> arg) {
|
||||
if (arg instanceof BoolStorage v) {
|
||||
BitSet missing = v.isMissing.get(0, storage.size);
|
||||
missing.or(storage.isMissing);
|
||||
@ -247,9 +251,8 @@ public class BoolStorage extends Storage {
|
||||
.add(
|
||||
new MapOperation<>(Maps.OR) {
|
||||
@Override
|
||||
public Storage runMap(BoolStorage storage, Object arg) {
|
||||
if (arg instanceof Boolean) {
|
||||
boolean v = (Boolean) arg;
|
||||
public BoolStorage runMap(BoolStorage storage, Object arg) {
|
||||
if (arg instanceof Boolean v) {
|
||||
if (v) {
|
||||
return new BoolStorage(new BitSet(), storage.isMissing, storage.size, true);
|
||||
} else {
|
||||
@ -261,7 +264,7 @@ public class BoolStorage extends Storage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runZip(BoolStorage storage, Storage arg) {
|
||||
public BoolStorage runZip(BoolStorage storage, Storage<?> arg) {
|
||||
if (arg instanceof BoolStorage v) {
|
||||
BitSet missing = v.isMissing.get(0, storage.size);
|
||||
missing.or(storage.isMissing);
|
||||
@ -287,7 +290,8 @@ public class BoolStorage extends Storage {
|
||||
throw new UnexpectedColumnTypeException("Boolean");
|
||||
}
|
||||
}
|
||||
});
|
||||
})
|
||||
.add(new BooleanIsInOp());
|
||||
return ops;
|
||||
}
|
||||
|
||||
|
@ -1,10 +1,10 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import org.enso.table.data.column.operation.map.MapOpStorage;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import org.enso.table.data.column.operation.map.MapOpStorage;
|
||||
import org.enso.table.data.column.operation.map.SpecializedIsInOp;
|
||||
|
||||
public class DateStorage extends SpecializedStorage<LocalDate> {
|
||||
public final class DateStorage extends SpecializedStorage<LocalDate> {
|
||||
/**
|
||||
* @param data the underlying data
|
||||
* @param size the number of items stored
|
||||
@ -13,10 +13,12 @@ public class DateStorage extends SpecializedStorage<LocalDate> {
|
||||
super(data, size, ops);
|
||||
}
|
||||
|
||||
private static final MapOpStorage<SpecializedStorage<LocalDate>> ops = buildOps();
|
||||
private static final MapOpStorage<LocalDate, SpecializedStorage<LocalDate>> ops = buildOps();
|
||||
|
||||
private static MapOpStorage<SpecializedStorage<LocalDate>> buildOps() {
|
||||
return ObjectStorage.buildObjectOps();
|
||||
private static MapOpStorage<LocalDate, SpecializedStorage<LocalDate>> buildOps() {
|
||||
MapOpStorage<LocalDate, SpecializedStorage<LocalDate>> t = ObjectStorage.buildObjectOps();
|
||||
t.add(SpecializedIsInOp.makeForTimeColumns(LocalDate.class));
|
||||
return t;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1,10 +1,11 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import org.enso.table.data.column.operation.map.MapOpStorage;
|
||||
import org.enso.table.data.column.operation.map.SpecializedIsInOp;
|
||||
|
||||
import java.time.ZonedDateTime;
|
||||
|
||||
public class DateTimeStorage extends SpecializedStorage<ZonedDateTime> {
|
||||
public final class DateTimeStorage extends SpecializedStorage<ZonedDateTime> {
|
||||
/**
|
||||
* @param data the underlying data
|
||||
* @param size the number of items stored
|
||||
@ -13,10 +14,14 @@ public class DateTimeStorage extends SpecializedStorage<ZonedDateTime> {
|
||||
super(data, size, ops);
|
||||
}
|
||||
|
||||
private static final MapOpStorage<SpecializedStorage<ZonedDateTime>> ops = buildOps();
|
||||
private static final MapOpStorage<ZonedDateTime, SpecializedStorage<ZonedDateTime>> ops =
|
||||
buildOps();
|
||||
|
||||
private static MapOpStorage<SpecializedStorage<ZonedDateTime>> buildOps() {
|
||||
return ObjectStorage.buildObjectOps();
|
||||
private static MapOpStorage<ZonedDateTime, SpecializedStorage<ZonedDateTime>> buildOps() {
|
||||
MapOpStorage<ZonedDateTime, SpecializedStorage<ZonedDateTime>> t =
|
||||
ObjectStorage.buildObjectOps();
|
||||
t.add(SpecializedIsInOp.makeForTimeColumns(ZonedDateTime.class));
|
||||
return t;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1,7 +1,12 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import org.enso.base.polyglot.NumericConverter;
|
||||
import org.enso.table.data.column.builder.object.NumericBuilder;
|
||||
import org.enso.table.data.column.operation.map.MapOpStorage;
|
||||
import org.enso.table.data.column.operation.map.SpecializedIsInOp;
|
||||
import org.enso.table.data.column.operation.map.UnaryMapOperation;
|
||||
import org.enso.table.data.column.operation.map.numeric.DoubleBooleanOp;
|
||||
import org.enso.table.data.column.operation.map.numeric.DoubleNumericOp;
|
||||
@ -10,15 +15,12 @@ import org.enso.table.data.mask.OrderMask;
|
||||
import org.enso.table.data.mask.SliceRange;
|
||||
import org.graalvm.polyglot.Value;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
|
||||
/** A column containing floating point numbers. */
|
||||
public class DoubleStorage extends NumericStorage {
|
||||
public final class DoubleStorage extends NumericStorage<Double> {
|
||||
private final long[] data;
|
||||
private final BitSet isMissing;
|
||||
private final int size;
|
||||
private static final MapOpStorage<DoubleStorage> ops = buildOps();
|
||||
private static final MapOpStorage<Double, DoubleStorage> ops = buildOps();
|
||||
|
||||
/**
|
||||
* @param data the underlying data
|
||||
@ -58,7 +60,7 @@ public class DoubleStorage extends NumericStorage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getItemBoxed(int idx) {
|
||||
public Double getItemBoxed(int idx) {
|
||||
return isMissing.get(idx) ? null : Double.longBitsToDouble(data[idx]);
|
||||
}
|
||||
|
||||
@ -80,16 +82,16 @@ public class DoubleStorage extends NumericStorage {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Storage runVectorizedMap(String name, Object argument) {
|
||||
protected Storage<?> runVectorizedMap(String name, Object argument) {
|
||||
return ops.runMap(name, this, argument);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Storage runVectorizedZip(String name, Storage argument) {
|
||||
protected Storage<?> runVectorizedZip(String name, Storage<?> argument) {
|
||||
return ops.runZip(name, this, argument);
|
||||
}
|
||||
|
||||
private Storage fillMissingDouble(double arg) {
|
||||
private Storage<?> fillMissingDouble(double arg) {
|
||||
final var builder = NumericBuilder.createDoubleBuilder(size());
|
||||
long rawArg = Double.doubleToRawLongBits(arg);
|
||||
for (int i = 0; i < size(); i++) {
|
||||
@ -103,7 +105,7 @@ public class DoubleStorage extends NumericStorage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage fillMissing(Value arg) {
|
||||
public Storage<?> fillMissing(Value arg) {
|
||||
if (arg.isNumber()) {
|
||||
if (arg.fitsInLong()) {
|
||||
return fillMissingDouble(arg.asLong());
|
||||
@ -116,7 +118,7 @@ public class DoubleStorage extends NumericStorage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleStorage mask(BitSet mask, int cardinality) {
|
||||
public Storage<Double> mask(BitSet mask, int cardinality) {
|
||||
BitSet newMissing = new BitSet();
|
||||
long[] newData = new long[cardinality];
|
||||
int resIx = 0;
|
||||
@ -133,7 +135,7 @@ public class DoubleStorage extends NumericStorage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage applyMask(OrderMask mask) {
|
||||
public Storage<Double> applyMask(OrderMask mask) {
|
||||
int[] positions = mask.getPositions();
|
||||
long[] newData = new long[positions.length];
|
||||
BitSet newMissing = new BitSet();
|
||||
@ -148,7 +150,7 @@ public class DoubleStorage extends NumericStorage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage countMask(int[] counts, int total) {
|
||||
public Storage<Double> countMask(int[] counts, int total) {
|
||||
long[] newData = new long[total];
|
||||
BitSet newMissing = new BitSet();
|
||||
int pos = 0;
|
||||
@ -169,8 +171,8 @@ public class DoubleStorage extends NumericStorage {
|
||||
return isMissing;
|
||||
}
|
||||
|
||||
private static MapOpStorage<DoubleStorage> buildOps() {
|
||||
MapOpStorage<DoubleStorage> ops = new MapOpStorage<>();
|
||||
private static MapOpStorage<Double, DoubleStorage> buildOps() {
|
||||
MapOpStorage<Double, DoubleStorage> ops = new MapOpStorage<>();
|
||||
ops.add(
|
||||
new DoubleNumericOp(Maps.ADD) {
|
||||
@Override
|
||||
@ -249,15 +251,29 @@ public class DoubleStorage extends NumericStorage {
|
||||
.add(
|
||||
new UnaryMapOperation<>(Maps.IS_MISSING) {
|
||||
@Override
|
||||
public Storage run(DoubleStorage storage) {
|
||||
public BoolStorage run(DoubleStorage storage) {
|
||||
return new BoolStorage(storage.isMissing, new BitSet(), storage.size, false);
|
||||
}
|
||||
});
|
||||
})
|
||||
.add(
|
||||
SpecializedIsInOp.make(
|
||||
list -> {
|
||||
HashSet<Double> set = new HashSet<>();
|
||||
boolean hasNulls = false;
|
||||
for (Object o : list) {
|
||||
hasNulls |= o == null;
|
||||
Double x = NumericConverter.tryConvertingToDouble(o);
|
||||
if (x != null) {
|
||||
set.add(x);
|
||||
}
|
||||
}
|
||||
return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls);
|
||||
}));
|
||||
return ops;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleStorage slice(int offset, int limit) {
|
||||
public Storage<Double> slice(int offset, int limit) {
|
||||
int newSize = Math.min(size - offset, limit);
|
||||
long[] newData = new long[newSize];
|
||||
System.arraycopy(data, offset, newData, 0, newSize);
|
||||
@ -266,7 +282,7 @@ public class DoubleStorage extends NumericStorage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public DoubleStorage slice(List<SliceRange> ranges) {
|
||||
public Storage<Double> slice(List<SliceRange> ranges) {
|
||||
int newSize = SliceRange.totalLength(ranges);
|
||||
long[] newData = new long[newSize];
|
||||
BitSet newMissing = new BitSet(newSize);
|
||||
|
@ -1,9 +1,16 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.OptionalLong;
|
||||
import java.util.stream.LongStream;
|
||||
import org.enso.base.polyglot.NumericConverter;
|
||||
import org.enso.table.data.column.builder.object.NumericBuilder;
|
||||
import org.enso.table.data.column.operation.aggregate.Aggregator;
|
||||
import org.enso.table.data.column.operation.aggregate.numeric.LongToLongAggregator;
|
||||
import org.enso.table.data.column.operation.map.MapOpStorage;
|
||||
import org.enso.table.data.column.operation.map.SpecializedIsInOp;
|
||||
import org.enso.table.data.column.operation.map.UnaryMapOperation;
|
||||
import org.enso.table.data.column.operation.map.numeric.LongBooleanOp;
|
||||
import org.enso.table.data.column.operation.map.numeric.LongNumericOp;
|
||||
@ -12,17 +19,12 @@ import org.enso.table.data.mask.OrderMask;
|
||||
import org.enso.table.data.mask.SliceRange;
|
||||
import org.graalvm.polyglot.Value;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
import java.util.OptionalLong;
|
||||
import java.util.stream.LongStream;
|
||||
|
||||
/** A column storing 64-bit integers. */
|
||||
public class LongStorage extends NumericStorage {
|
||||
public final class LongStorage extends NumericStorage<Long> {
|
||||
private final long[] data;
|
||||
private final BitSet isMissing;
|
||||
private final int size;
|
||||
private static final MapOpStorage<LongStorage> ops = buildOps();
|
||||
private static final MapOpStorage<Long, LongStorage> ops = buildOps();
|
||||
|
||||
/**
|
||||
* @param data the underlying data
|
||||
@ -40,13 +42,17 @@ public class LongStorage extends NumericStorage {
|
||||
this(data, data.length, new BitSet());
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
/**
|
||||
* @inheritDoc
|
||||
*/
|
||||
@Override
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
/**
|
||||
* @inheritDoc
|
||||
*/
|
||||
@Override
|
||||
public int countMissing() {
|
||||
return isMissing.cardinality();
|
||||
@ -66,17 +72,21 @@ public class LongStorage extends NumericStorage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getItemBoxed(int idx) {
|
||||
public Long getItemBoxed(int idx) {
|
||||
return isMissing.get(idx) ? null : data[idx];
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
/**
|
||||
* @inheritDoc
|
||||
*/
|
||||
@Override
|
||||
public int getType() {
|
||||
return Type.LONG;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
/**
|
||||
* @inheritDoc
|
||||
*/
|
||||
@Override
|
||||
public boolean isNa(long idx) {
|
||||
return isMissing.get((int) idx);
|
||||
@ -88,12 +98,12 @@ public class LongStorage extends NumericStorage {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Storage runVectorizedMap(String name, Object argument) {
|
||||
protected Storage<?> runVectorizedMap(String name, Object argument) {
|
||||
return ops.runMap(name, this, argument);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Storage runVectorizedZip(String name, Storage argument) {
|
||||
protected Storage<?> runVectorizedZip(String name, Storage<?> argument) {
|
||||
return ops.runZip(name, this, argument);
|
||||
}
|
||||
|
||||
@ -137,7 +147,7 @@ public class LongStorage extends NumericStorage {
|
||||
};
|
||||
}
|
||||
|
||||
private Storage fillMissingDouble(double arg) {
|
||||
private Storage<?> fillMissingDouble(double arg) {
|
||||
final var builder = NumericBuilder.createDoubleBuilder(size());
|
||||
long rawArg = Double.doubleToRawLongBits(arg);
|
||||
for (int i = 0; i < size(); i++) {
|
||||
@ -151,7 +161,7 @@ public class LongStorage extends NumericStorage {
|
||||
return builder.seal();
|
||||
}
|
||||
|
||||
private Storage fillMissingLong(long arg) {
|
||||
private Storage<?> fillMissingLong(long arg) {
|
||||
final var builder = NumericBuilder.createLongBuilder(size());
|
||||
for (int i = 0; i < size(); i++) {
|
||||
if (isMissing.get(i)) {
|
||||
@ -164,7 +174,7 @@ public class LongStorage extends NumericStorage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage fillMissing(Value arg) {
|
||||
public Storage<?> fillMissing(Value arg) {
|
||||
if (arg.isNumber()) {
|
||||
if (arg.fitsInLong()) {
|
||||
return fillMissingLong(arg.asLong());
|
||||
@ -177,7 +187,7 @@ public class LongStorage extends NumericStorage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongStorage mask(BitSet mask, int cardinality) {
|
||||
public Storage<Long> mask(BitSet mask, int cardinality) {
|
||||
BitSet newMissing = new BitSet();
|
||||
long[] newData = new long[cardinality];
|
||||
int resIx = 0;
|
||||
@ -194,7 +204,7 @@ public class LongStorage extends NumericStorage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage applyMask(OrderMask mask) {
|
||||
public Storage<Long> applyMask(OrderMask mask) {
|
||||
int[] positions = mask.getPositions();
|
||||
long[] newData = new long[positions.length];
|
||||
BitSet newMissing = new BitSet();
|
||||
@ -209,7 +219,7 @@ public class LongStorage extends NumericStorage {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage countMask(int[] counts, int total) {
|
||||
public Storage<Long> countMask(int[] counts, int total) {
|
||||
long[] newData = new long[total];
|
||||
BitSet newMissing = new BitSet();
|
||||
int pos = 0;
|
||||
@ -230,8 +240,8 @@ public class LongStorage extends NumericStorage {
|
||||
return isMissing;
|
||||
}
|
||||
|
||||
private static MapOpStorage<LongStorage> buildOps() {
|
||||
MapOpStorage<LongStorage> ops = new MapOpStorage<>();
|
||||
private static MapOpStorage<Long, LongStorage> buildOps() {
|
||||
MapOpStorage<Long, LongStorage> ops = new MapOpStorage<>();
|
||||
ops.add(
|
||||
new LongNumericOp(Maps.ADD) {
|
||||
@Override
|
||||
@ -360,10 +370,24 @@ public class LongStorage extends NumericStorage {
|
||||
.add(
|
||||
new UnaryMapOperation<>(Maps.IS_MISSING) {
|
||||
@Override
|
||||
public Storage run(LongStorage storage) {
|
||||
public BoolStorage run(LongStorage storage) {
|
||||
return new BoolStorage(storage.isMissing, new BitSet(), storage.size, false);
|
||||
}
|
||||
});
|
||||
})
|
||||
.add(
|
||||
SpecializedIsInOp.make(
|
||||
list -> {
|
||||
HashSet<Long> set = new HashSet<>();
|
||||
boolean hasNulls = false;
|
||||
for (Object o : list) {
|
||||
hasNulls |= o == null;
|
||||
Long x = NumericConverter.tryConvertingToLong(o);
|
||||
if (x != null) {
|
||||
set.add(x);
|
||||
}
|
||||
}
|
||||
return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls);
|
||||
}));
|
||||
return ops;
|
||||
}
|
||||
|
||||
|
@ -1,12 +1,11 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.stream.DoubleStream;
|
||||
import org.enso.table.data.column.operation.aggregate.Aggregator;
|
||||
import org.enso.table.data.column.operation.aggregate.numeric.NumericAggregator;
|
||||
|
||||
import java.util.stream.DoubleStream;
|
||||
|
||||
/** A storage containing items representable as a {@code double}. */
|
||||
public abstract class NumericStorage extends Storage {
|
||||
public abstract class NumericStorage<T> extends Storage<T> {
|
||||
/**
|
||||
* Returns the value stored at the given index. The return value if the given index is missing
|
||||
* ({@link #isNa(long)}) is undefined.
|
||||
|
@ -1,12 +1,11 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
import org.enso.table.data.column.operation.map.MapOpStorage;
|
||||
import org.enso.table.data.column.operation.map.UnaryMapOperation;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/** A column storing arbitrary objects. */
|
||||
public class ObjectStorage extends SpecializedStorage<Object> {
|
||||
public final class ObjectStorage extends SpecializedStorage<Object> {
|
||||
/**
|
||||
* @param data the underlying data
|
||||
* @param size the number of items stored
|
||||
@ -30,14 +29,14 @@ public class ObjectStorage extends SpecializedStorage<Object> {
|
||||
return Type.OBJECT;
|
||||
}
|
||||
|
||||
private static final MapOpStorage<SpecializedStorage<Object>> ops = buildObjectOps();
|
||||
private static final MapOpStorage<Object, SpecializedStorage<Object>> ops = buildObjectOps();
|
||||
|
||||
static <S extends SpecializedStorage<?>> MapOpStorage<S> buildObjectOps() {
|
||||
MapOpStorage<S> ops = new MapOpStorage<>();
|
||||
static <T, S extends SpecializedStorage<T>> MapOpStorage<T, S> buildObjectOps() {
|
||||
MapOpStorage<T, S> ops = new MapOpStorage<>();
|
||||
ops.add(
|
||||
new UnaryMapOperation<>(Maps.IS_MISSING) {
|
||||
@Override
|
||||
protected Storage run(S storage) {
|
||||
protected BoolStorage run(S storage) {
|
||||
BitSet r = new BitSet();
|
||||
for (int i = 0; i < storage.size; i++) {
|
||||
if (storage.data[i] == null) {
|
||||
|
@ -1,14 +1,13 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
import org.enso.table.data.column.operation.map.MapOpStorage;
|
||||
import org.enso.table.data.index.Index;
|
||||
import org.enso.table.data.mask.OrderMask;
|
||||
import org.enso.table.data.mask.SliceRange;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
|
||||
public abstract class SpecializedStorage<T> extends Storage {
|
||||
public abstract class SpecializedStorage<T> extends Storage<T> {
|
||||
|
||||
protected abstract SpecializedStorage<T> newInstance(T[] data, int size);
|
||||
|
||||
@ -21,7 +20,7 @@ public abstract class SpecializedStorage<T> extends Storage {
|
||||
* @param data the underlying data
|
||||
* @param size the number of items stored
|
||||
*/
|
||||
protected SpecializedStorage(T[] data, int size, MapOpStorage<SpecializedStorage<T>> ops) {
|
||||
protected SpecializedStorage(T[] data, int size, MapOpStorage<T, SpecializedStorage<T>> ops) {
|
||||
this.data = data;
|
||||
this.size = size;
|
||||
this.ops = ops;
|
||||
@ -29,7 +28,7 @@ public abstract class SpecializedStorage<T> extends Storage {
|
||||
|
||||
protected final T[] data;
|
||||
protected final int size;
|
||||
private final MapOpStorage<SpecializedStorage<T>> ops;
|
||||
private final MapOpStorage<T, SpecializedStorage<T>> ops;
|
||||
|
||||
/** @inheritDoc */
|
||||
@Override
|
||||
@ -74,12 +73,12 @@ public abstract class SpecializedStorage<T> extends Storage {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Storage runVectorizedMap(String name, Object argument) {
|
||||
protected Storage<?> runVectorizedMap(String name, Object argument) {
|
||||
return ops.runMap(name, this, argument);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Storage runVectorizedZip(String name, Storage argument) {
|
||||
protected Storage<?> runVectorizedZip(String name, Storage<?> argument) {
|
||||
return ops.runZip(name, this, argument);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,11 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import org.enso.base.Polyglot_Utils;
|
||||
import java.util.BitSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Function;
|
||||
import org.enso.base.polyglot.Polyglot_Utils;
|
||||
import org.enso.table.data.column.builder.object.Builder;
|
||||
import org.enso.table.data.column.builder.object.InferredBuilder;
|
||||
import org.enso.table.data.column.builder.object.ObjectBuilder;
|
||||
@ -11,14 +16,8 @@ import org.enso.table.data.mask.OrderMask;
|
||||
import org.enso.table.data.mask.SliceRange;
|
||||
import org.graalvm.polyglot.Value;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Function;
|
||||
|
||||
/** An abstract representation of a data column. */
|
||||
public abstract class Storage {
|
||||
public abstract class Storage<T> {
|
||||
/** @return the number of elements in this column (including NAs) */
|
||||
public abstract int size();
|
||||
|
||||
@ -42,7 +41,7 @@ public abstract class Storage {
|
||||
* @param idx the index to look up
|
||||
* @return the item at position {@code idx}
|
||||
*/
|
||||
public abstract Object getItemBoxed(int idx);
|
||||
public abstract T getItemBoxed(int idx);
|
||||
|
||||
/**
|
||||
* Enumerating possible storage types.
|
||||
@ -83,6 +82,7 @@ public abstract class Storage {
|
||||
public static final String ENDS_WITH = "ends_with";
|
||||
public static final String CONTAINS = "contains";
|
||||
public static final String LIKE = "like";
|
||||
public static final String IS_IN = "is_in";
|
||||
}
|
||||
|
||||
public static final class Aggregators {
|
||||
@ -95,9 +95,9 @@ public abstract class Storage {
|
||||
|
||||
protected abstract boolean isOpVectorized(String name);
|
||||
|
||||
protected abstract Storage runVectorizedMap(String name, Object argument);
|
||||
protected abstract Storage<?> runVectorizedMap(String name, Object argument);
|
||||
|
||||
protected abstract Storage runVectorizedZip(String name, Storage argument);
|
||||
protected abstract Storage<?> runVectorizedZip(String name, Storage<?> argument);
|
||||
|
||||
/**
|
||||
* Runs a function on each non-missing element in this storage and gathers the results.
|
||||
@ -106,17 +106,23 @@ public abstract class Storage {
|
||||
* supported. If this argument is null, the vectorized operation will never be used.
|
||||
* @param function the function to run.
|
||||
* @param argument the argument to pass to each run of the function
|
||||
* @param skipNulls specifies whether null values on the input should result in a null result
|
||||
* without passing them through the function, this is useful if the function does not support
|
||||
* the null-values, but it needs to be set to false if the function should handle them.
|
||||
* @return the result of running the function on all non-missing elements.
|
||||
*/
|
||||
public final Storage bimap(
|
||||
String name, BiFunction<Object, Object, Object> function, Object argument) {
|
||||
public final Storage<?> bimap(
|
||||
String name,
|
||||
BiFunction<Object, Object, Object> function,
|
||||
Object argument,
|
||||
boolean skipNulls) {
|
||||
if (name != null && isOpVectorized(name)) {
|
||||
return runVectorizedMap(name, argument);
|
||||
}
|
||||
Builder builder = new InferredBuilder(size());
|
||||
for (int i = 0; i < size(); i++) {
|
||||
Object it = getItemBoxed(i);
|
||||
if (it == null) {
|
||||
if (skipNulls && it == null) {
|
||||
builder.appendNoGrow(null);
|
||||
} else {
|
||||
Object result = function.apply(it, argument);
|
||||
@ -165,7 +171,7 @@ public abstract class Storage {
|
||||
* @param function the function to run.
|
||||
* @return the result of running the function on all non-missing elements.
|
||||
*/
|
||||
public final Storage map(String name, Function<Object, Value> function) {
|
||||
public final Storage<?> map(String name, Function<Object, Value> function) {
|
||||
if (name != null && isOpVectorized(name)) {
|
||||
return runVectorizedMap(name, null);
|
||||
}
|
||||
@ -192,8 +198,8 @@ public abstract class Storage {
|
||||
* @param skipNa whether rows containing missing values should be passed to the function.
|
||||
* @return the result of running the function on all non-missing elements.
|
||||
*/
|
||||
public final Storage zip(
|
||||
String name, BiFunction<Object, Object, Object> function, Storage arg, boolean skipNa) {
|
||||
public final Storage<?> zip(
|
||||
String name, BiFunction<Object, Object, Object> function, Storage<?> arg, boolean skipNa) {
|
||||
if (name != null && isOpVectorized(name)) {
|
||||
return runVectorizedZip(name, arg);
|
||||
}
|
||||
@ -218,7 +224,7 @@ public abstract class Storage {
|
||||
* @param arg the value to use for missing elements
|
||||
* @return a new storage, with all missing elements replaced by arg
|
||||
*/
|
||||
public Storage fillMissing(Value arg) {
|
||||
public Storage<?> fillMissing(Value arg) {
|
||||
return fillMissingHelper(arg, new ObjectBuilder(size()));
|
||||
}
|
||||
|
||||
@ -228,7 +234,7 @@ public abstract class Storage {
|
||||
* @param other the source of default values
|
||||
* @return a new storage with missing values filled
|
||||
*/
|
||||
public Storage fillMissingFrom(Storage other) {
|
||||
public Storage<?> fillMissingFrom(Storage<?> other) {
|
||||
var builder = new InferredBuilder(size());
|
||||
for (int i = 0; i < size(); i++) {
|
||||
if (isNa(i)) {
|
||||
@ -240,7 +246,7 @@ public abstract class Storage {
|
||||
return builder.seal();
|
||||
}
|
||||
|
||||
protected final Storage fillMissingHelper(Value arg, Builder builder) {
|
||||
protected final Storage<?> fillMissingHelper(Value arg, Builder builder) {
|
||||
Object convertedFallback = Polyglot_Utils.convertPolyglotValue(arg);
|
||||
for (int i = 0; i < size(); i++) {
|
||||
Object it = getItemBoxed(i);
|
||||
@ -260,14 +266,14 @@ public abstract class Storage {
|
||||
* @param cardinality the number of true values in mask
|
||||
* @return a new storage, masked with the given mask
|
||||
*/
|
||||
public abstract Storage mask(BitSet mask, int cardinality);
|
||||
public abstract Storage<T> mask(BitSet mask, int cardinality);
|
||||
|
||||
/**
|
||||
* Returns a new storage, ordered according to the rules specified in a mask.
|
||||
*
|
||||
* @param mask@return a storage resulting from applying the reordering rules
|
||||
*/
|
||||
public abstract Storage applyMask(OrderMask mask);
|
||||
public abstract Storage<T> applyMask(OrderMask mask);
|
||||
|
||||
/**
|
||||
* Returns a new storage, resulting from applying the rules specified in a mask. The resulting
|
||||
@ -280,13 +286,13 @@ public abstract class Storage {
|
||||
* storage
|
||||
* @return the storage masked according to the specified rules
|
||||
*/
|
||||
public abstract Storage countMask(int[] counts, int total);
|
||||
public abstract Storage<T> countMask(int[] counts, int total);
|
||||
|
||||
/** @return a copy of the storage containing a slice of the original data */
|
||||
public abstract Storage slice(int offset, int limit);
|
||||
public abstract Storage<T> slice(int offset, int limit);
|
||||
|
||||
/** @return a copy of the storage consisting of slices of the original data */
|
||||
public abstract Storage slice(List<SliceRange> ranges);
|
||||
public abstract Storage<T> slice(List<SliceRange> ranges);
|
||||
|
||||
public List<Object> toList() {
|
||||
return new StorageListView(this);
|
||||
@ -297,7 +303,7 @@ public abstract class Storage {
|
||||
*
|
||||
* @return a storage counting the number of times each value in this one has been seen before.
|
||||
*/
|
||||
public Storage duplicateCount() {
|
||||
public Storage<?> duplicateCount() {
|
||||
long[] data = new long[size()];
|
||||
HashMap<Object, Integer> occurenceCount = new HashMap<>();
|
||||
for (int i = 0; i < size(); i++) {
|
||||
|
@ -11,7 +11,7 @@ import java.util.Objects;
|
||||
* is not modifiable.
|
||||
*/
|
||||
public class StorageListView implements List<Object> {
|
||||
private final Storage storage;
|
||||
private final Storage<?> storage;
|
||||
private final int from;
|
||||
private final int to;
|
||||
|
||||
@ -20,11 +20,11 @@ public class StorageListView implements List<Object> {
|
||||
*
|
||||
* @param storage the storage to wrap.
|
||||
*/
|
||||
public StorageListView(Storage storage) {
|
||||
public StorageListView(Storage<?> storage) {
|
||||
this(storage, 0, storage.size());
|
||||
}
|
||||
|
||||
private StorageListView(Storage storage, int from, int to) {
|
||||
private StorageListView(Storage<?> storage, int from, int to) {
|
||||
this.storage = storage;
|
||||
this.from = from;
|
||||
this.to = to;
|
||||
|
@ -1,17 +1,19 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.HashSet;
|
||||
import org.enso.base.Text_Utils;
|
||||
import org.enso.table.data.column.builder.object.StringBuilder;
|
||||
import org.enso.table.data.column.operation.map.MapOpStorage;
|
||||
import org.enso.table.data.column.operation.map.MapOperation;
|
||||
import org.enso.table.data.column.operation.map.SpecializedIsInOp;
|
||||
import org.enso.table.data.column.operation.map.UnaryMapOperation;
|
||||
import org.enso.table.data.column.operation.map.text.LikeOp;
|
||||
import org.enso.table.data.column.operation.map.text.StringBooleanOp;
|
||||
import org.graalvm.polyglot.Value;
|
||||
|
||||
/** A column storing strings. */
|
||||
public class StringStorage extends SpecializedStorage<String> {
|
||||
public final class StringStorage extends SpecializedStorage<String> {
|
||||
|
||||
/**
|
||||
* @param data the underlying data
|
||||
@ -36,7 +38,7 @@ public class StringStorage extends SpecializedStorage<String> {
|
||||
return Type.STRING;
|
||||
}
|
||||
|
||||
private static final MapOpStorage<SpecializedStorage<String>> ops = buildOps();
|
||||
private static final MapOpStorage<String, SpecializedStorage<String>> ops = buildOps();
|
||||
|
||||
@Override
|
||||
protected boolean isOpVectorized(String name) {
|
||||
@ -44,17 +46,17 @@ public class StringStorage extends SpecializedStorage<String> {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Storage runVectorizedMap(String name, Object argument) {
|
||||
protected Storage<?> runVectorizedMap(String name, Object argument) {
|
||||
return ops.runMap(name, this, argument);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Storage runVectorizedZip(String name, Storage argument) {
|
||||
protected Storage<?> runVectorizedZip(String name, Storage<?> argument) {
|
||||
return ops.runZip(name, this, argument);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage fillMissing(Value arg) {
|
||||
public Storage<?> fillMissing(Value arg) {
|
||||
if (arg.isString()) {
|
||||
return fillMissingHelper(arg, new StringBuilder(size()));
|
||||
} else {
|
||||
@ -62,12 +64,12 @@ public class StringStorage extends SpecializedStorage<String> {
|
||||
}
|
||||
}
|
||||
|
||||
private static MapOpStorage<SpecializedStorage<String>> buildOps() {
|
||||
MapOpStorage<SpecializedStorage<String>> t = ObjectStorage.buildObjectOps();
|
||||
private static MapOpStorage<String, SpecializedStorage<String>> buildOps() {
|
||||
MapOpStorage<String, SpecializedStorage<String>> t = ObjectStorage.buildObjectOps();
|
||||
t.add(
|
||||
new MapOperation<>(Maps.EQ) {
|
||||
@Override
|
||||
public Storage runMap(SpecializedStorage<String> storage, Object arg) {
|
||||
public BoolStorage runMap(SpecializedStorage<String> storage, Object arg) {
|
||||
BitSet r = new BitSet();
|
||||
BitSet missing = new BitSet();
|
||||
for (int i = 0; i < storage.size(); i++) {
|
||||
@ -81,7 +83,7 @@ public class StringStorage extends SpecializedStorage<String> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Storage runZip(SpecializedStorage<String> storage, Storage arg) {
|
||||
public BoolStorage runZip(SpecializedStorage<String> storage, Storage<?> arg) {
|
||||
BitSet r = new BitSet();
|
||||
BitSet missing = new BitSet();
|
||||
for (int i = 0; i < storage.size(); i++) {
|
||||
@ -98,7 +100,7 @@ public class StringStorage extends SpecializedStorage<String> {
|
||||
t.add(
|
||||
new UnaryMapOperation<>(Maps.IS_EMPTY) {
|
||||
@Override
|
||||
protected Storage run(SpecializedStorage<String> storage) {
|
||||
protected BoolStorage run(SpecializedStorage<String> storage) {
|
||||
BitSet r = new BitSet();
|
||||
for (int i = 0; i < storage.size; i++) {
|
||||
String s = storage.data[i];
|
||||
@ -131,6 +133,19 @@ public class StringStorage extends SpecializedStorage<String> {
|
||||
}
|
||||
});
|
||||
t.add(new LikeOp());
|
||||
t.add(
|
||||
SpecializedIsInOp.make(
|
||||
list -> {
|
||||
HashSet<String> set = new HashSet<>();
|
||||
boolean hasNulls = false;
|
||||
for (Object o : list) {
|
||||
hasNulls |= o == null;
|
||||
if (o instanceof String s) {
|
||||
set.add(s);
|
||||
}
|
||||
}
|
||||
return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls);
|
||||
}));
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
@ -1,10 +1,10 @@
|
||||
package org.enso.table.data.column.storage;
|
||||
|
||||
import org.enso.table.data.column.operation.map.MapOpStorage;
|
||||
|
||||
import java.time.LocalTime;
|
||||
import org.enso.table.data.column.operation.map.MapOpStorage;
|
||||
import org.enso.table.data.column.operation.map.SpecializedIsInOp;
|
||||
|
||||
public class TimeOfDayStorage extends SpecializedStorage<LocalTime> {
|
||||
public final class TimeOfDayStorage extends SpecializedStorage<LocalTime> {
|
||||
/**
|
||||
* @param data the underlying data
|
||||
* @param size the number of items stored
|
||||
@ -13,10 +13,12 @@ public class TimeOfDayStorage extends SpecializedStorage<LocalTime> {
|
||||
super(data, size, ops);
|
||||
}
|
||||
|
||||
private static final MapOpStorage<SpecializedStorage<LocalTime>> ops = buildOps();
|
||||
private static final MapOpStorage<LocalTime, SpecializedStorage<LocalTime>> ops = buildOps();
|
||||
|
||||
private static MapOpStorage<SpecializedStorage<LocalTime>> buildOps() {
|
||||
return ObjectStorage.buildObjectOps();
|
||||
private static MapOpStorage<LocalTime, SpecializedStorage<LocalTime>> buildOps() {
|
||||
MapOpStorage<LocalTime, SpecializedStorage<LocalTime>> t = ObjectStorage.buildObjectOps();
|
||||
t.add(SpecializedIsInOp.makeForTimeColumns(LocalTime.class));
|
||||
return t;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -7,17 +7,17 @@ import org.enso.table.data.mask.SliceRange;
|
||||
import org.enso.table.data.table.Column;
|
||||
|
||||
public class HashIndex extends Index {
|
||||
private final Storage items;
|
||||
private final Storage<?> items;
|
||||
private final Map<Object, List<Integer>> locs;
|
||||
private final String name;
|
||||
|
||||
private HashIndex(Storage items, Map<Object, List<Integer>> locs, String name) {
|
||||
private HashIndex(Storage<?> items, Map<Object, List<Integer>> locs, String name) {
|
||||
this.items = items;
|
||||
this.locs = locs;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
private HashIndex(String name, Storage items, int size) {
|
||||
private HashIndex(String name, Storage<?> items, int size) {
|
||||
Map<Object, List<Integer>> locations = new HashMap<>();
|
||||
for (int i = 0; i < size; i++) {
|
||||
List<Integer> its = locations.computeIfAbsent(items.getItemBoxed(i), x -> new ArrayList<>());
|
||||
@ -28,7 +28,7 @@ public class HashIndex extends Index {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public static HashIndex fromStorage(String name, Storage storage) {
|
||||
public static HashIndex fromStorage(String name, Storage<?> storage) {
|
||||
return new HashIndex(name, storage, storage.size());
|
||||
}
|
||||
|
||||
@ -59,19 +59,19 @@ public class HashIndex extends Index {
|
||||
|
||||
@Override
|
||||
public Index mask(BitSet mask, int cardinality) {
|
||||
Storage newSt = items.mask(mask, cardinality);
|
||||
Storage<?> newSt = items.mask(mask, cardinality);
|
||||
return HashIndex.fromStorage(name, newSt);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Index countMask(int[] counts, int total) {
|
||||
Storage newSt = items.countMask(counts, total);
|
||||
Storage<?> newSt = items.countMask(counts, total);
|
||||
return HashIndex.fromStorage(name, newSt);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Index applyMask(OrderMask mask) {
|
||||
Storage newSt = items.applyMask(mask);
|
||||
Storage<?> newSt = items.applyMask(mask);
|
||||
return HashIndex.fromStorage(name, newSt);
|
||||
}
|
||||
|
||||
@ -86,7 +86,7 @@ public class HashIndex extends Index {
|
||||
mask.set(i);
|
||||
}
|
||||
}
|
||||
Storage newItems = items.mask(mask, locs.size());
|
||||
Storage<?> newItems = items.mask(mask, locs.size());
|
||||
return new HashIndex(newItems, newLocs, name);
|
||||
}
|
||||
|
||||
|
@ -30,7 +30,7 @@ public class MultiValueIndex {
|
||||
boolean isOrdered = ordering != null;
|
||||
this.locs = isOrdered ? new TreeMap<>() : new HashMap<>();
|
||||
|
||||
Storage[] storage = Arrays.stream(keyColumns).map(Column::getStorage).toArray(Storage[]::new);
|
||||
Storage<?>[] storage = Arrays.stream(keyColumns).map(Column::getStorage).toArray(Storage[]::new);
|
||||
IntFunction<MultiValueKeyBase> keyFactory =
|
||||
isOrdered
|
||||
? i -> new OrderedMultiValueKey(storage, i, ordering, objectComparator)
|
||||
|
@ -4,7 +4,7 @@ import org.enso.table.data.column.storage.Storage;
|
||||
|
||||
/** The base class for keys used for sorting/grouping rows by a set of columns. */
|
||||
public abstract class MultiValueKeyBase {
|
||||
protected final Storage[] storages;
|
||||
protected final Storage<?>[] storages;
|
||||
protected final int rowIndex;
|
||||
protected boolean hasFloatValues = false;
|
||||
protected boolean floatsComputed = false;
|
||||
@ -13,7 +13,7 @@ public abstract class MultiValueKeyBase {
|
||||
* Constructs a key based on an array of column storages and the index of the row the key is
|
||||
* associated with.
|
||||
*/
|
||||
public MultiValueKeyBase(Storage[] storage, int rowIndex) {
|
||||
public MultiValueKeyBase(Storage<?>[] storage, int rowIndex) {
|
||||
this.storages = storage;
|
||||
this.rowIndex = rowIndex;
|
||||
}
|
||||
@ -28,7 +28,7 @@ public abstract class MultiValueKeyBase {
|
||||
|
||||
/** Checks if all cells in the current row are missing. */
|
||||
public boolean areAllNull() {
|
||||
for (Storage value : storages) {
|
||||
for (Storage<?> value : storages) {
|
||||
if (!value.isNa(rowIndex)) {
|
||||
return false;
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ public class OrderedMultiValueKey extends MultiValueKeyBase
|
||||
private final int[] directions;
|
||||
|
||||
public OrderedMultiValueKey(
|
||||
Storage[] storages, int rowIndex, int[] directions, Comparator<Object> objectComparator) {
|
||||
Storage<?>[] storages, int rowIndex, int[] directions, Comparator<Object> objectComparator) {
|
||||
super(storages, rowIndex);
|
||||
this.objectComparator = objectComparator;
|
||||
this.directions = directions;
|
||||
|
@ -21,12 +21,12 @@ public class UnorderedMultiValueKey extends MultiValueKeyBase {
|
||||
private final int hashCodeValue;
|
||||
private final TextFoldingStrategy textFoldingStrategy;
|
||||
|
||||
public UnorderedMultiValueKey(Storage[] storages, int rowIndex) {
|
||||
public UnorderedMultiValueKey(Storage<?>[] storages, int rowIndex) {
|
||||
this(storages, rowIndex, TextFoldingStrategy.unicodeNormalizedFold);
|
||||
}
|
||||
|
||||
public UnorderedMultiValueKey(
|
||||
Storage[] storages, int rowIndex, TextFoldingStrategy textFoldingStrategy) {
|
||||
Storage<?>[] storages, int rowIndex, TextFoldingStrategy textFoldingStrategy) {
|
||||
super(storages, rowIndex);
|
||||
this.textFoldingStrategy = textFoldingStrategy;
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
package org.enso.table.data.table;
|
||||
|
||||
import org.enso.base.Polyglot_Utils;
|
||||
import org.enso.base.polyglot.Polyglot_Utils;
|
||||
import org.enso.table.data.column.builder.object.InferredBuilder;
|
||||
import org.enso.table.data.column.operation.aggregate.Aggregator;
|
||||
import org.enso.table.data.column.storage.BoolStorage;
|
||||
@ -13,8 +13,6 @@ import org.enso.table.data.mask.SliceRange;
|
||||
import org.enso.table.error.UnexpectedColumnTypeException;
|
||||
import org.graalvm.polyglot.Value;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
@ -23,7 +21,7 @@ import java.util.stream.IntStream;
|
||||
/** A representation of a column. Consists of a column name and the underlying storage. */
|
||||
public class Column {
|
||||
private final String name;
|
||||
private final Storage storage;
|
||||
private final Storage<?> storage;
|
||||
private final Index index;
|
||||
|
||||
/**
|
||||
@ -32,7 +30,7 @@ public class Column {
|
||||
* @param name the column name
|
||||
* @param storage the underlying storage
|
||||
*/
|
||||
public Column(String name, Index index, Storage storage) {
|
||||
public Column(String name, Index index, Storage<?> storage) {
|
||||
this.name = name;
|
||||
this.storage = storage;
|
||||
this.index = index;
|
||||
@ -44,7 +42,7 @@ public class Column {
|
||||
* @param name the column name
|
||||
* @param storage the underlying storage
|
||||
*/
|
||||
public Column(String name, Storage storage) {
|
||||
public Column(String name, Storage<?> storage) {
|
||||
this(name, new DefaultIndex(storage.size()), storage);
|
||||
}
|
||||
|
||||
@ -63,7 +61,7 @@ public class Column {
|
||||
}
|
||||
|
||||
/** @return the underlying storage */
|
||||
public Storage getStorage() {
|
||||
public Storage<?> getStorage() {
|
||||
return storage;
|
||||
}
|
||||
|
||||
@ -148,7 +146,7 @@ public class Column {
|
||||
* @return a column indexed by {@code col}
|
||||
*/
|
||||
public Column setIndex(Column col) {
|
||||
Storage storage = col.getStorage();
|
||||
Storage<?> storage = col.getStorage();
|
||||
Index ix = HashIndex.fromStorage(col.getName(), storage);
|
||||
return this.withIndex(ix);
|
||||
}
|
||||
@ -183,7 +181,7 @@ public class Column {
|
||||
*/
|
||||
public Column applyMask(OrderMask mask) {
|
||||
Index newIndex = index.applyMask(mask);
|
||||
Storage newStorage = storage.applyMask(mask);
|
||||
Storage<?> newStorage = storage.applyMask(mask);
|
||||
return new Column(name, newIndex, newStorage);
|
||||
}
|
||||
|
||||
|
@ -182,7 +182,7 @@ public class Table {
|
||||
* @return a table indexed by the proper column
|
||||
*/
|
||||
public Table indexFromColumn(Column col) {
|
||||
Storage storage = col.getStorage();
|
||||
Storage<?> storage = col.getStorage();
|
||||
Index ix = HashIndex.fromStorage(col.getName(), storage);
|
||||
List<Column> newColumns = new ArrayList<>();
|
||||
Column indexCol = index.toColumn();
|
||||
@ -294,7 +294,7 @@ public class Table {
|
||||
matches[i] = other.index.loc(index.iloc(i));
|
||||
}
|
||||
} else {
|
||||
Storage onS = getColumnByName(on).getStorage();
|
||||
Storage<?> onS = getColumnByName(on).getStorage();
|
||||
for (int i = 0; i < s; i++) {
|
||||
matches[i] = other.index.loc(onS.getItemBoxed(i));
|
||||
}
|
||||
@ -360,7 +360,7 @@ public class Table {
|
||||
Arrays.stream(columns)
|
||||
.map(
|
||||
column -> {
|
||||
Storage newStorage = column.getStorage().applyMask(orderMask);
|
||||
Storage<?> newStorage = column.getStorage().applyMask(orderMask);
|
||||
return new Column(column.getName(), newIndex, newStorage);
|
||||
})
|
||||
.toArray(Column[]::new);
|
||||
@ -431,7 +431,7 @@ public class Table {
|
||||
return new Table(newColumns, newIndex);
|
||||
}
|
||||
|
||||
private Storage concatStorages(Storage left, Storage right) {
|
||||
private Storage<?> concatStorages(Storage<?> left, Storage<?> right) {
|
||||
InferredBuilder builder = new InferredBuilder(left.size() + right.size());
|
||||
for (int i = 0; i < left.size(); i++) {
|
||||
builder.appendNoGrow(left.getItemBoxed(i));
|
||||
@ -442,7 +442,7 @@ public class Table {
|
||||
return builder.seal();
|
||||
}
|
||||
|
||||
private Storage nullPad(int nullCount, Storage storage, boolean start) {
|
||||
private Storage<?> nullPad(int nullCount, Storage<?> storage, boolean start) {
|
||||
InferredBuilder builder = new InferredBuilder(nullCount + storage.size());
|
||||
if (start) {
|
||||
builder.appendNulls(nullCount);
|
||||
|
@ -21,7 +21,8 @@ public class Distinct {
|
||||
if (keyColumns.length != 0) {
|
||||
HashSet<MultiValueKeyBase> visitedRows = new HashSet<>();
|
||||
int size = keyColumns[0].getSize();
|
||||
Storage[] storage = Arrays.stream(keyColumns).map(Column::getStorage).toArray(Storage[]::new);
|
||||
Storage<?>[] storage =
|
||||
Arrays.stream(keyColumns).map(Column::getStorage).toArray(Storage[]::new);
|
||||
for (int i = 0; i < size; i++) {
|
||||
UnorderedMultiValueKey key = new UnorderedMultiValueKey(storage, i, textFoldingStrategy);
|
||||
|
||||
|
@ -43,7 +43,7 @@ public class OrderBuilder {
|
||||
* @return a comparator with properties described above
|
||||
*/
|
||||
public Comparator<Integer> toComparator() {
|
||||
final Storage storage = column.getStorage();
|
||||
final Storage<?> storage = column.getStorage();
|
||||
Comparator<Object> itemCmp = customComparator;
|
||||
if (!ascending) {
|
||||
itemCmp = itemCmp.reversed();
|
||||
|
@ -35,5 +35,6 @@ public abstract class DatatypeParser {
|
||||
* Parses a column of texts (represented as a {@code StringStorage}) and returns a new storage,
|
||||
* containing the parsed elements.
|
||||
*/
|
||||
public abstract WithProblems<Storage> parseColumn(String columnName, StringStorage sourceStorage);
|
||||
public abstract WithProblems<Storage<?>> parseColumn(
|
||||
String columnName, Storage<String> sourceStorage);
|
||||
}
|
||||
|
@ -22,7 +22,7 @@ public class IdentityParser extends IncrementalDatatypeParser {
|
||||
}
|
||||
|
||||
@Override
|
||||
public WithProblems<Storage> parseColumn(String columnName, StringStorage sourceStorage) {
|
||||
public WithProblems<Storage<?>> parseColumn(String columnName, Storage<String> sourceStorage) {
|
||||
return new WithProblems<>(sourceStorage, List.of());
|
||||
}
|
||||
}
|
||||
|
@ -29,12 +29,12 @@ public abstract class IncrementalDatatypeParser extends DatatypeParser {
|
||||
* Parses a column of texts (represented as a {@code StringStorage}) and returns a new storage,
|
||||
* containing the parsed elements.
|
||||
*/
|
||||
public WithProblems<Storage> parseColumn(String columnName, StringStorage sourceStorage) {
|
||||
public WithProblems<Storage<?>> parseColumn(String columnName, Storage<String> sourceStorage) {
|
||||
Builder builder = makeBuilderWithCapacity(sourceStorage.size());
|
||||
var aggregator = new ProblemAggregatorImpl(columnName);
|
||||
|
||||
for (int i = 0; i < sourceStorage.size(); ++i) {
|
||||
String cell = sourceStorage.getItem(i);
|
||||
String cell = sourceStorage.getItemBoxed(i);
|
||||
if (cell != null) {
|
||||
Object parsed = parseSingleValue(cell, aggregator);
|
||||
builder.appendNoGrow(parsed);
|
||||
|
@ -40,14 +40,14 @@ public class TypeInferringParser extends DatatypeParser {
|
||||
}
|
||||
|
||||
@Override
|
||||
public WithProblems<Storage> parseColumn(String columnName, StringStorage sourceStorage) {
|
||||
public WithProblems<Storage<?>> parseColumn(String columnName, Storage<String> sourceStorage) {
|
||||
parsers:
|
||||
for (IncrementalDatatypeParser parser : baseParsers) {
|
||||
Builder builder = parser.makeBuilderWithCapacity(sourceStorage.size());
|
||||
var aggregator = new ProblemAggregatorImpl(columnName);
|
||||
|
||||
for (int i = 0; i < sourceStorage.size(); ++i) {
|
||||
String cell = sourceStorage.getItem(i);
|
||||
String cell = sourceStorage.getItemBoxed(i);
|
||||
if (cell != null) {
|
||||
Object parsed = parser.parseSingleValue(cell, aggregator);
|
||||
if (aggregator.hasProblems()) {
|
||||
|
@ -452,13 +452,13 @@ public class DelimitedReader {
|
||||
Column[] columns = new Column[builders.length];
|
||||
for (int i = 0; i < builders.length; i++) {
|
||||
String columnName = effectiveColumnNames[i];
|
||||
StringStorage col = builders[i].seal();
|
||||
Storage<String> col = builders[i].seal();
|
||||
|
||||
WithProblems<Storage> parseResult = valueParser.parseColumn(columnName, col);
|
||||
WithProblems<Storage<?>> parseResult = valueParser.parseColumn(columnName, col);
|
||||
for (var problem : parseResult.problems()) {
|
||||
reportProblem(problem);
|
||||
}
|
||||
Storage storage = parseResult.value();
|
||||
Storage<?> storage = parseResult.value();
|
||||
|
||||
columns[i] = new Column(columnName, new DefaultIndex(storage.size()), storage);
|
||||
}
|
||||
|
@ -268,7 +268,7 @@ public class ExcelWriter {
|
||||
return;
|
||||
}
|
||||
|
||||
Storage[] storages = Arrays.stream(columns).map(Column::getStorage).toArray(Storage[]::new);
|
||||
Storage<?>[] storages = Arrays.stream(columns).map(Column::getStorage).toArray(Storage[]::new);
|
||||
for (int i = 0; i < rowCount; i++) {
|
||||
Row row = sheet.getRow(currentRow);
|
||||
if (row == null) {
|
||||
@ -276,7 +276,7 @@ public class ExcelWriter {
|
||||
}
|
||||
|
||||
for (int j = 0; j < columns.length; j++) {
|
||||
Storage storage = storages[j];
|
||||
Storage<?> storage = storages[j];
|
||||
int idx = j + firstColumn - 1;
|
||||
|
||||
Cell cell = row.getCell(idx);
|
||||
@ -305,7 +305,7 @@ public class ExcelWriter {
|
||||
return newStyle;
|
||||
}
|
||||
|
||||
private static void writeValueToCell(Cell cell, int j, Storage storage, Workbook workbook)
|
||||
private static void writeValueToCell(Cell cell, int j, Storage<?> storage, Workbook workbook)
|
||||
throws IllegalStateException {
|
||||
if (storage.isNa(j)) {
|
||||
cell.setBlank();
|
||||
|
@ -22,7 +22,7 @@ main =
|
||||
ints = (0.up_to vector_size).to_vector.take (Sample vector_size 42)
|
||||
start = Date_Time.new 1990 1 1
|
||||
dates = ints.map x->
|
||||
start + x.seconds
|
||||
start + (Duration.new seconds=x)
|
||||
objects = ints.map My.Data
|
||||
|
||||
ints_table = Table.new [['ints', ints]]
|
||||
|
@ -1217,11 +1217,51 @@ spec prefix table_builder test_selection pending=Nothing =
|
||||
t2.at "ix" . to_vector . should_equal [2, 4]
|
||||
t2.at "X" . to_vector . should_equal [1, 4]
|
||||
|
||||
Test.specify "by an Is_In check in a Vector" <|
|
||||
t = table_builder [["ix", [1, 2, 3, Nothing, 5, 6]], ["X", ["a", "b", "ccc", "X", "f", "2"]]]
|
||||
t.filter "X" (Filter_Condition.Is_In ["X", "a", "c"]) on_problems=Report_Error . at "X" . to_vector . should_equal ["a", "X"]
|
||||
t.filter "X" (Filter_Condition.Not_In ["X", "a", "c"]) on_problems=Report_Error . at "X" . to_vector . should_equal ["b", "ccc", "f", "2"]
|
||||
t.filter "X" (Filter_Condition.Is_In ["ccc"]) on_problems=Report_Error . at "X" . to_vector . should_equal ["ccc"]
|
||||
t.filter "X" (Filter_Condition.Is_In []) on_problems=Report_Error . at "X" . to_vector . should_equal []
|
||||
t.filter "X" (Filter_Condition.Not_In []) on_problems=Report_Error . at "X" . to_vector . should_equal ["a", "b", "ccc", "X", "f", "2"]
|
||||
|
||||
t.filter "ix" (Filter_Condition.Is_In [Nothing, 2, 5, 4]) on_problems=Report_Error . at "ix" . to_vector . should_equal [2, Nothing, 5]
|
||||
t.filter "ix" (Filter_Condition.Is_In [2, 5, 4]) on_problems=Report_Error . at "ix" . to_vector . should_equal [2, 5]
|
||||
t.filter "ix" (Filter_Condition.Is_In [Nothing]) on_problems=Report_Error . at "ix" . to_vector . should_equal [Nothing]
|
||||
t.filter "ix" (Filter_Condition.Not_In [Nothing]) on_problems=Report_Error . at "ix" . to_vector . should_equal [1, 2, 3, 5, 6]
|
||||
t.filter "ix" (Filter_Condition.Not_In [1, 3]) on_problems=Report_Error . at "ix" . to_vector . should_equal [2, Nothing, 5, 6]
|
||||
|
||||
v1 = t.filter "X" (Filter_Condition.Is_In ["c", "f", "b", "b", "b", 15, Nothing]) on_problems=Report_Error . at "X" . to_vector
|
||||
case test_selection.allows_mixed_type_comparisons of
|
||||
True -> v1.should_equal ["b", "f"]
|
||||
False -> v1.should_fail_with SQL_Error_Data
|
||||
v2 = t.filter "ix" (Filter_Condition.Is_In ["c", 3, 2, "a"]) on_problems=Report_Error . at "ix" . to_vector
|
||||
case test_selection.allows_mixed_type_comparisons of
|
||||
True -> v2.should_equal [2, 3]
|
||||
False -> v2.should_fail_with SQL_Error_Data
|
||||
|
||||
t2 = table_builder [["A", [True, False, True]], ["B", [False, False, False]], ["C", [True, False, Nothing]]]
|
||||
t2.filter "A" (Filter_Condition.Is_In [True, Nothing]) . at "A" . to_vector . should_equal [True, True]
|
||||
t2.filter "B" (Filter_Condition.Is_In [True, Nothing]) . at "B" . to_vector . should_equal []
|
||||
t2.filter "C" (Filter_Condition.Is_In [True, Nothing]) . at "C" . to_vector . should_equal [True, Nothing]
|
||||
t2.filter "A" (Filter_Condition.Is_In [False]) . at "A" . to_vector . should_equal [False]
|
||||
t2.filter "B" (Filter_Condition.Is_In [False]) . at "B" . to_vector . should_equal [False, False, False]
|
||||
t2.filter "C" (Filter_Condition.Is_In [False, False]) . at "C" . to_vector . should_equal [False]
|
||||
|
||||
Test.specify "by a boolean mask" <|
|
||||
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["b", [True, False, Nothing, True, True]]]
|
||||
t.filter "b" on_problems=Report_Error . at "ix" . to_vector . should_equal [1, 4, 5]
|
||||
t.filter "b" Filter_Condition.Is_False on_problems=Report_Error . at "ix" . to_vector . should_equal [2]
|
||||
|
||||
Test.specify "should correctly reorder all kinds of columns" <|
|
||||
t = table_builder [["ints", [1, 2, 3, Nothing, 4]], ["floats", [4.0, Nothing, 3.0, 2.0, 1.0]], ["bools", [False, False, True, Nothing, False]], ["strings", ["a", Nothing, "b", "c", "d"]], ["mask", [False, True, True, True, Nothing]]]
|
||||
t2 = t.filter "mask" on_problems=Report_Error
|
||||
t2.at "ints" . to_vector . should_equal [2, 3, Nothing]
|
||||
t2.at "floats" . to_vector . should_equal [Nothing, 3.0, 2.0]
|
||||
t2.at "bools" . to_vector . should_equal [False, True, Nothing]
|
||||
t2.at "strings" . to_vector . should_equal [Nothing, "b", "c"]
|
||||
t2.at "mask" . to_vector . should_equal [True, True, True]
|
||||
|
||||
Test.specify "should check types of boolean operations" <|
|
||||
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["b", [True, False, Nothing, True, True]]]
|
||||
tester = check_empty ["ix", "b"]
|
||||
|
@ -108,6 +108,19 @@ spec =
|
||||
t4 = t1.filter "A" (Filter_Condition.Between (t1.at "B") 33)
|
||||
t4.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" BETWEEN "T1"."B" AND ?)', [[33, int]]]
|
||||
|
||||
Test.specify "should generate an IN expression" <|
|
||||
t2 = t1.filter "A" (Filter_Condition.Is_In [1, 2, 'foo'])
|
||||
t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE COALESCE("T1"."A" IN (?, ?, ?), 2=1)', [[1, int], [2, int], ["foo", int]]]
|
||||
|
||||
t3 = t1.filter "A" (Filter_Condition.Is_In [1])
|
||||
t3.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE COALESCE("T1"."A" IN (?), 2=1)', [[1, int]]]
|
||||
|
||||
t4 = t1.filter "A" (Filter_Condition.Is_In [])
|
||||
t4.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE (2=1)', []]
|
||||
|
||||
t5 = t1.filter "A" (Filter_Condition.Is_In [Nothing])
|
||||
t5.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ((2=1) OR ("T1"."A" IS NULL))', []]
|
||||
|
||||
Test.group "[Codegen] Joining Tables" <|
|
||||
t2 = test_connection.query (SQL_Query.Table_Name "T2")
|
||||
t3 = test_connection.query (SQL_Query.Table_Name "T3")
|
||||
|
@ -1,5 +1,6 @@
|
||||
from Standard.Base import all
|
||||
from Standard.Base.Error.Problem_Behavior import Report_Error
|
||||
import Standard.Base.Data.Time.Duration
|
||||
|
||||
from Standard.Table import Table, Column, Sort_Column, Column_Selector, Sort_Column_Selector, Aggregate_Column
|
||||
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all hiding First, Last
|
||||
@ -77,6 +78,22 @@ spec =
|
||||
t.at 'latitude' . to_vector . should_equal [34.19, 4.88]
|
||||
t.at 'elevation' . to_vector . should_equal [Nothing, 19]
|
||||
|
||||
make_varied_type_table =
|
||||
strs = ["strs", ["a", "b", "c", Nothing]]
|
||||
ints = ["ints", [Nothing, 1, 2, 4]]
|
||||
doubles = ["doubles", [0.0, 1.5, Nothing, 2.0]]
|
||||
doubles_and_ints = ["doubles_and_ints", [0, 1.5, Nothing, 2]]
|
||||
custom_objects = ["custom_objects", [My.Data 1 2, My.Data 3 4, Nothing, Nothing]]
|
||||
dates = ["dates", [Nothing, Date.new 2000, Date.new 2022 8 20, Date.new 1999 1 1]]
|
||||
times = ["times", [Time_Of_Day.new 18 00, Time_Of_Day.new 1 2 34, Nothing, Time_Of_Day.new]]
|
||||
datetimes = ["datetimes", [Date_Time.new 2000, Date_Time.new 1999 1 2 3 4 5, Nothing, Date_Time.new 2022 8 27 11 22 25]]
|
||||
mixed = ["mixed", [1, "a", Nothing, Date.new 2022 8 27]]
|
||||
mixed_dates = ["mixed_dates", [Date.new 1999 1 2, Date_Time.new 1999 1 2 3 40, Date.new 1999 1 2, Date_Time.new 1999 1 2 3 40]]
|
||||
just_nulls = ["just_nulls", [Nothing, Nothing, Nothing, Nothing]]
|
||||
|
||||
Table.new [strs, ints, doubles, doubles_and_ints, custom_objects, dates, times, datetimes, mixed, mixed_dates, just_nulls]
|
||||
varied_type_table = make_varied_type_table
|
||||
|
||||
Test.group 'Construction' <|
|
||||
Test.specify 'should allow creating a table from rows' <|
|
||||
header = ['foo', 'bar']
|
||||
@ -87,30 +104,17 @@ spec =
|
||||
r.at 'bar' . to_vector . should_equal [False, True, False]
|
||||
|
||||
Test.specify "should correctly infer storage types" <|
|
||||
strs = ["strs", ["a", "b", "c", Nothing]]
|
||||
ints = ["ints", [Nothing, 1, 2, 4]]
|
||||
doubles = ["doubles", [0.0, 1.5, Nothing, 2.0]]
|
||||
doubles_and_ints = ["doubles_and_ints", [0, 1.5, Nothing, 2]]
|
||||
custom_objects = ["custom_objects", [My.Data 1 2, My.Data 3 4, Nothing, Nothing]]
|
||||
dates = ["dates", [Nothing, Date.new 2000, Date.new 2022 8 20, Date.new 1999 1 1]]
|
||||
times = ["times", [Time_Of_Day.new 18 00, Time_Of_Day.new 1 2 34, Nothing, Time_Of_Day.new]]
|
||||
datetimes = ["datetimes", [Date_Time.new 2000, Date_Time.new 1999 1 2 3 4 5, Nothing, Date_Time.new 2022 8 27 11 22 25]]
|
||||
mixed = ["mixed", [1, "a", Nothing, Date.new 2022 8 27]]
|
||||
mixed_dates = ["mixed_dates", [Date.new 1999 1 2, Date_Time.new 1999 1 2 3 40, Date.new 1999 1 2, Date_Time.new 1999 1 2 3 40]]
|
||||
just_nulls = ["just_nulls", [Nothing, Nothing, Nothing, Nothing]]
|
||||
|
||||
table = Table.new [strs, ints, doubles, doubles_and_ints, custom_objects, dates, times, datetimes, mixed, mixed_dates, just_nulls]
|
||||
table.at "strs" . storage_type . should_equal Storage.Text
|
||||
table.at "ints" . storage_type . should_equal Storage.Integer
|
||||
table.at "doubles" . storage_type . should_equal Storage.Decimal
|
||||
table.at "doubles_and_ints" . storage_type . should_equal Storage.Decimal
|
||||
table.at "custom_objects" . storage_type . should_equal Storage.Any
|
||||
table.at "dates" . storage_type . should_equal Storage.Date
|
||||
table.at "times" . storage_type . should_equal Storage.Time_Of_Day
|
||||
table.at "datetimes" . storage_type . should_equal Storage.Date_Time
|
||||
table.at "mixed" . storage_type . should_equal Storage.Any
|
||||
table.at "mixed_dates" . storage_type . should_equal Storage.Any
|
||||
table.at "just_nulls" . storage_type . should_equal Storage.Any
|
||||
varied_type_table.at "strs" . storage_type . should_equal Storage.Text
|
||||
varied_type_table.at "ints" . storage_type . should_equal Storage.Integer
|
||||
varied_type_table.at "doubles" . storage_type . should_equal Storage.Decimal
|
||||
varied_type_table.at "doubles_and_ints" . storage_type . should_equal Storage.Decimal
|
||||
varied_type_table.at "custom_objects" . storage_type . should_equal Storage.Any
|
||||
varied_type_table.at "dates" . storage_type . should_equal Storage.Date
|
||||
varied_type_table.at "times" . storage_type . should_equal Storage.Time_Of_Day
|
||||
varied_type_table.at "datetimes" . storage_type . should_equal Storage.Date_Time
|
||||
varied_type_table.at "mixed" . storage_type . should_equal Storage.Any
|
||||
varied_type_table.at "mixed_dates" . storage_type . should_equal Storage.Any
|
||||
varied_type_table.at "just_nulls" . storage_type . should_equal Storage.Any
|
||||
|
||||
pending_python_missing = if Polyglot.is_language_installed "python" . not then
|
||||
"Can't run Python tests, Python is not installed."
|
||||
@ -943,6 +947,87 @@ spec =
|
||||
t2.at "A" . to_vector . should_equal [2, 3]
|
||||
t2.at "B" . to_vector . should_equal [5, 6]
|
||||
|
||||
Test.specify "by an Is_In check in a Vector, on various types of columns" <|
|
||||
varied_type_table.filter "strs" (Filter_Condition.Is_In ["c", "b", Nothing]) . at "strs" . to_vector . should_equal ["b", "c", Nothing]
|
||||
varied_type_table.filter "ints" (Filter_Condition.Is_In [1, 2, 3]) . at "ints" . to_vector . should_equal [1, 2]
|
||||
varied_type_table.filter "ints" (Filter_Condition.Is_In [1, Nothing]) . at "ints" . to_vector . should_equal [Nothing, 1]
|
||||
varied_type_table.filter "doubles" (Filter_Condition.Is_In [0.0, Nothing]) . at "doubles" . to_vector . should_equal [0.0, Nothing]
|
||||
varied_type_table.filter "dates" (Filter_Condition.Is_In [Date.new 2000, Date.new 1999 1 1, Date_Time.new 2022 8 20]) . at "dates" . to_vector . should_equal [Date.new 2000, Date.new 1999 1 1]
|
||||
varied_type_table.filter "datetimes" (Filter_Condition.Is_In [Date_Time.new 2022 8 27 11 22 25, Nothing, Date_Time.new 2030, Date.new 2000]) . at "datetimes" . to_vector . should_equal [Nothing, Date_Time.new 2022 8 27 11 22 25]
|
||||
varied_type_table.filter "times" (Filter_Condition.Is_In [Time_Of_Day.new 18 00, Time_Of_Day.new 18 19, Date_Time.new 2000 1 1]) . at "times" . to_vector . should_equal [Time_Of_Day.new 18 00]
|
||||
varied_type_table.filter "mixed" (Filter_Condition.Is_In [42, "a", 1, Nothing, Date.new 2022 8 27, Date_Time.new 2022 8 27]) . at "mixed" . to_vector . should_equal [1, "a", Nothing, Date.new 2022 8 27]
|
||||
varied_type_table.filter "mixed" (Filter_Condition.Is_In [42, Date_Time.new 2022 8 27, 1]) . at "mixed" . to_vector . should_equal [1]
|
||||
varied_type_table.filter "just_nulls" (Filter_Condition.Is_In []) . at "just_nulls" . to_vector . should_equal []
|
||||
varied_type_table.filter "just_nulls" (Filter_Condition.Is_In [Nothing, Nothing, 0]) . at "just_nulls" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing]
|
||||
varied_type_table.filter "just_nulls" (Filter_Condition.Is_In [0]) . at "just_nulls" . to_vector . should_equal []
|
||||
varied_type_table.filter "custom_objects" (Filter_Condition.Is_In [2, My.Data 2 1, Nothing]) . at "custom_objects" . to_vector . should_equal [My.Data 1 2, Nothing, Nothing]
|
||||
|
||||
t2 = Table.new [["ints", [1, 2, 3]], ["doubles", [1.2, 0.0, 1.0]]]
|
||||
t2.filter "ints" (Filter_Condition.Is_In [2.0, 1.5, 3, 4]) . at "ints" . to_vector . should_equal [2, 3]
|
||||
t2.filter "doubles" (Filter_Condition.Is_In [0.1, 1, 3, 1.2]) . at "doubles" . to_vector . should_equal [1.2, 1.0]
|
||||
|
||||
# We test these very carefully as our implementation relies on some short-circuit logic that is not as trivial as the hashmap checks which are done for other builtin types.
|
||||
[True, False].each has_nulls->
|
||||
[True, False].each has_true->
|
||||
[True, False].each has_false->
|
||||
vec_builder = Vector.new_builder
|
||||
if has_nulls then vec_builder.append Nothing
|
||||
if has_true then vec_builder.append True
|
||||
if has_false then vec_builder.append False
|
||||
in_vector = vec_builder.to_vector
|
||||
|
||||
vectors = [[True, False, Nothing], [Nothing, Nothing, Nothing], [False, False, True], [True, True, True], [False, False, False], [Nothing, True, True], [False, Nothing, False]]
|
||||
vectors.each column_vector->
|
||||
not x = case x of
|
||||
True -> False
|
||||
False -> True
|
||||
Nothing -> Nothing
|
||||
negated_column_vector = column_vector.map not
|
||||
t = Table.new [["X", column_vector]]
|
||||
|
||||
expected_vector = column_vector.filter (Filter_Condition.Is_In in_vector)
|
||||
expected_neg_vector = negated_column_vector.filter (Filter_Condition.Is_In in_vector)
|
||||
|
||||
t.filter "X" (Filter_Condition.Is_In in_vector) on_problems=Report_Error . at "X" . to_vector . should_equal expected_vector
|
||||
t2 = t.set "Y" (t.at "X" . not)
|
||||
t2.filter "Y" (Filter_Condition.Is_In in_vector) on_problems=Report_Error . at "Y" . to_vector . should_equal expected_neg_vector
|
||||
|
||||
Test.specify "should perform `Is_In` efficiently for builtin types" <|
|
||||
first_day = Date_Time.new 2000 1 1
|
||||
make_date x = first_day + (Duration.new seconds=x)
|
||||
init = Duration.time_execution <|
|
||||
t = Table.new [["X", (200.up_to 10000 . to_vector)]]
|
||||
vec = 4000.up_to 13000 . to_vector
|
||||
expected_vector = 4000.up_to 10000 . to_vector
|
||||
expected_vector_2 = 200.up_to 10000 . with_step 2 . to_vector
|
||||
dates_vec = vec.map make_date
|
||||
bool_vec = Vector.fill 7000 True
|
||||
date_col = t.at "X" . map make_date
|
||||
[t, vec, expected_vector, expected_vector_2, dates_vec, bool_vec, date_col]
|
||||
t = init.second . at 0
|
||||
vec = init.second . at 1
|
||||
expected_vector = init.second . at 2
|
||||
expected_vector_2 = init.second . at 3
|
||||
dates_vec = init.second . at 4
|
||||
bool_vec = init.second . at 5
|
||||
date_col = init.second . at 6
|
||||
|
||||
expected_max_time_ms = init.first.total_milliseconds * 2
|
||||
check_timing name ~action =
|
||||
res = Duration.time_execution action
|
||||
runtime_ms = res.first.total_milliseconds
|
||||
if runtime_ms > expected_max_time_ms then
|
||||
Test.fail "Expected `Is_In` on "+name+" to be efficient, but it took "+runtime_ms.to_text+"ms while initialization itself took just "+expected_max_time_ms.to_text+"ms."
|
||||
|
||||
check_timing "integers" <|
|
||||
t.filter "X" (Filter_Condition.Is_In vec) . at "X" . to_vector . should_equal expected_vector
|
||||
|
||||
check_timing "booleans" <|
|
||||
t.filter (t.at "X" % 2 == 0) (Filter_Condition.Is_In bool_vec) . at "X" . to_vector . should_equal expected_vector_2
|
||||
|
||||
check_timing "dates" <|
|
||||
t.filter date_col (Filter_Condition.Is_In dates_vec) . at "X" . to_vector . should_equal expected_vector
|
||||
|
||||
main = Test.Suite.run_main spec
|
||||
|
||||
## JS indexes months form 0, so we need to subtract 1.
|
||||
|
@ -53,6 +53,8 @@ spec = Test.group "List" <|
|
||||
list.filter (Filter_Condition.Equal_Or_Greater than=3) . should_equal [3, 4, 5].to_list
|
||||
list.filter (Filter_Condition.Equal_Or_Less than=(-1)) . should_equal Nil
|
||||
list.filter (Filter_Condition.Between 2 4) . should_equal [2, 3, 4].to_list
|
||||
list.filter (Filter_Condition.Is_In [7, 3, 2]) . should_equal [2, 3].to_list
|
||||
list.filter (Filter_Condition.Not_In [7, 3, 2]) . should_equal [1, 4, 5].to_list
|
||||
|
||||
Test.expect_panic_with (list.filter (Filter_Condition.Starts_With "a")) No_Such_Method_Error_Data
|
||||
list.filter Filter_Condition.Is_True . should_equal Nil
|
||||
|
@ -77,6 +77,8 @@ spec = Test.group "Range" <|
|
||||
range.filter (Filter_Condition.Equal_Or_Less than=(-1)) . should_equal []
|
||||
range.filter (Filter_Condition.Between 2 4) . should_equal [2, 3, 4]
|
||||
range.filter (Filter_Condition.Between 2.1 4.5) . should_equal [3, 4]
|
||||
range.filter (Filter_Condition.Is_In [7, 3, 2]) . should_equal [2, 3]
|
||||
range.filter (Filter_Condition.Not_In [7, 3, 2]) . should_equal [1, 4, 5]
|
||||
|
||||
Test.expect_panic_with (range.filter (Filter_Condition.Starts_With "a")) No_Such_Method_Error_Data
|
||||
Test.expect_panic_with (range.filter (Filter_Condition.Like "a%")) Unsupported_Argument_Types_Data
|
||||
|
@ -151,6 +151,9 @@ spec = Test.group "Vectors" <|
|
||||
vec.filter (Filter_Condition.Equal_Or_Less than=(-1)) . should_equal []
|
||||
vec.filter (Filter_Condition.Between 2 4) . should_equal [2, 3, 4]
|
||||
vec.filter (Filter_Condition.Between 2.1 4.5) . should_equal [3, 4]
|
||||
vec.filter (Filter_Condition.Is_In [7, 3, 2, 2, 2]) . should_equal [2, 3]
|
||||
vec.filter (Filter_Condition.Is_In []) . should_equal []
|
||||
vec.filter (Filter_Condition.Not_In [7, 3, 2, 2]) . should_equal [1, 4, 5]
|
||||
|
||||
Test.expect_panic_with (vec.filter (Filter_Condition.Starts_With "a")) No_Such_Method_Error_Data
|
||||
vec.filter Filter_Condition.Is_True . should_equal []
|
||||
@ -167,6 +170,9 @@ spec = Test.group "Vectors" <|
|
||||
txtvec.filter (Filter_Condition.Greater than="b") . should_equal ["bbb", "cccc", "baaa", "ś"]
|
||||
txtvec.filter (Filter_Condition.Between "b" "c") . should_equal ["bbb", "baaa"]
|
||||
Test.expect_panic_with (txtvec.filter (Filter_Condition.Starts_With 42)) Unsupported_Argument_Types_Data
|
||||
txtvec.filter Filter_Condition.Is_True . should_equal []
|
||||
txtvec.filter (Filter_Condition.Is_In [1, 2]) . should_equal []
|
||||
txtvec.filter (Filter_Condition.Is_In ["bbb", 's\u0301', "bbb", "FOOBAR"]) . should_equal ["bbb", "ś"]
|
||||
|
||||
["", Nothing, " ", "a"].filter (Filter_Condition.Is_Empty) . should_equal ["", Nothing]
|
||||
["", Nothing, " ", "a"].filter (Filter_Condition.Not_Empty) . should_equal [" ", "a"]
|
||||
|
Loading…
Reference in New Issue
Block a user