diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7ae8aee0aa..1e10d7a658 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -211,6 +211,7 @@
`Not_Like`.][3775]
- [Reimplemented `Duration` as a built-in type.][3759]
- [Implemented `Table.replace_text` for in-memory table.][3793]
+- [Extended `Filter_Condition` with `Is_In` and `Not_In`.][3790]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@@ -339,6 +340,7 @@
[3775]: https://github.com/enso-org/enso/pull/3775
[3759]: https://github.com/enso-org/enso/pull/3759
[3793]: https://github.com/enso-org/enso/pull/3793
+[3790]: https://github.com/enso-org/enso/pull/3790
#### Enso Compiler
diff --git a/build.sbt b/build.sbt
index 8edb282da6..d619883e51 100644
--- a/build.sbt
+++ b/build.sbt
@@ -1192,7 +1192,6 @@ lazy val parser = (project in file("lib/scala/parser"))
s"-Djava.library.path=$root/target/rust/debug"
},
libraryDependencies ++= Seq(
- "com.storm-enroute" %% "scalameter" % scalameterVersion % "bench",
"org.scalatest" %%% "scalatest" % scalatestVersion % Test
),
testFrameworks := List(
diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Filter_Condition.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Filter_Condition.enso
index f22c29621d..9358408e9e 100644
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Filter_Condition.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Filter_Condition.enso
@@ -105,6 +105,12 @@ type Filter_Condition
See https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8032926
Not_Like pattern:Text
+ ## Is the value contained in `values`?
+ Is_In values:Vector
+
+ ## Is the value not contained in `values`?
+ Not_In values:Vector
+
## Converts a `Filter_Condition` condition into a predicate taking an
element and returning a value indicating whether the element should be
accepted by the filter.
@@ -142,6 +148,10 @@ type Filter_Condition
Not_Like sql_pattern ->
regex = sql_like_to_regex sql_pattern
elem -> regex.matches elem . not
+ ## TODO once we have proper hashing we could create a hashmap and
+ answer quicker, currently we need to do a full scan for each element.
+ Is_In values -> values.contains
+ Not_In values -> elem -> values.contains elem . not
## PRIVATE
sql_like_to_regex sql_pattern =
diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Duration.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Duration.enso
index d6fc64af3a..d309496003 100644
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Duration.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Duration.enso
@@ -273,6 +273,6 @@ type Duration
import Standard.Base.Data.Time.Duration
- example_is_empty = 10.seconds.is_empty
+ example_is_empty = Duration.zero.is_empty
is_empty : Boolean
is_empty self = self.to_vector . all (==0)
diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Network/Http.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Network/Http.enso
index d927161908..6bf58f5983 100644
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Network/Http.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Network/Http.enso
@@ -46,7 +46,7 @@ polyglot java import org.enso.base.Http_Utils
import Standard.Base.Network.Proxy
example_new =
- Http.new (timeout = 30.seconds) (proxy = Proxy.new "example.com" 8080)
+ Http.new (timeout = (Duration.new seconds=30)) (proxy = Proxy.new "example.com" 8080)
new : Duration -> Boolean -> Proxy -> Http
new (timeout = (Duration.new seconds=10)) (follow_redirects = True) (proxy = Proxy.System) (version = Version.Http_1_1) =
Http_Data timeout follow_redirects proxy version
@@ -595,7 +595,7 @@ type Http
example_request =
form = [Form.text_field "name" "John Doe"]
req = Request.new Method.Post "http://httpbin.org/post" . with_form form
- http = Http.new (timeout = 30.seconds)
+ http = Http.new (timeout = (Duration.new seconds=30))
http.request req
request : Request -> Response ! Request_Error
request self req =
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso
index 6d7a467a30..4826673c6d 100644
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso
@@ -13,7 +13,7 @@ import project.Internal.IR.Internal_Column.Internal_Column
from project.Data.Table import Table, freshen_columns
-from project.Errors import Unsupported_Database_Operation_Error
+from project.Errors import Unsupported_Database_Operation_Error, Unsupported_Database_Operation_Error_Data
type Column
@@ -130,7 +130,7 @@ type Column
prepare_operand operand operand_type = case operand of
other_column : Column ->
if Helpers.check_integrity self other_column then other_column.expression else
- Error.throw <| Unsupported_Database_Operation_Error "Cannot use columns coming from different contexts in one expression without a join."
+ Error.throw <| Unsupported_Database_Operation_Error_Data "Cannot use columns coming from different contexts in one expression without a join."
constant ->
actual_operand_type = operand_type.if_nothing self.sql_type
Expression.Constant actual_operand_type constant
@@ -394,6 +394,32 @@ type Column
/ : Column | Any -> Column
/ self other = self.make_binary_op "/" other
+ ## Element-wise modulus.
+
+ Arguments:
+ - other: The value to modulo `self` against. If `other` is a column, the
+ modulus is performed pairwise between corresponding elements of `self`
+ and `other`.
+
+ Returns a column with results of modulus this column's elements against
+ `other`.
+
+ > Example
+ Modulus of two columns against each other.
+
+ import Standard.Examples
+
+ example_mod = Examples.integer_column % Examples.decimal_column
+
+ > Example
+ Modulus of a column with a number.
+
+ import Standard.Examples
+
+ example_mod = Examples.integer_column % 3
+ % : Column | Any -> Column
+ % self other = self.make_binary_op "%" other
+
## UNSTABLE
Element-wise boolean conjunction.
@@ -433,20 +459,20 @@ type Column
Returns a column of booleans, with `True` items at the positions where
this column contains a `Nothing`.
is_missing : Column
- is_missing self = self.make_unary_op "ISNULL" new_type=SQL_Type.boolean
+ is_missing self = self.make_unary_op "IS_NULL" new_type=SQL_Type.boolean
## PRIVATE
Returns a column of booleans, with `True` items at the positions where
this column contains an empty string or `Nothing`.
is_empty : Column
- is_empty self = self.make_unary_op "ISEMPTY" new_type=SQL_Type.boolean
+ is_empty self = self.make_unary_op "IS_EMPTY" new_type=SQL_Type.boolean
## UNSTABLE
Returns a new column where missing values have been replaced with the
provided default.
fill_missing : Any -> Column
- fill_missing self default = self.make_binary_op "FILLNULL" default
+ fill_missing self default = self.make_binary_op "FILL_NULL" default
## UNSTABLE
@@ -495,7 +521,7 @@ type Column
take self range=(First 1) =
_ = range
msg = "`Column.take` is not yet implemented."
- Error.throw (Unsupported_Database_Operation_Error msg)
+ Error.throw (Unsupported_Database_Operation_Error_Data msg)
## UNSTABLE
Creates a new Column from the input with the specified range of rows
@@ -507,7 +533,7 @@ type Column
drop self range=(First 1) =
_ = range
msg = "`Column.drop` is not yet implemented."
- Error.throw (Unsupported_Database_Operation_Error msg)
+ Error.throw (Unsupported_Database_Operation_Error_Data msg)
## UNSTABLE
@@ -551,10 +577,63 @@ type Column
contains : Column | Text -> Column
contains self other = self.make_binary_op "contains" other new_type=SQL_Type.boolean
- ## PRIVATE
- Checks for each element of the column if it matches an SQL-like pattern.
+ ## Checks for each element of the column if it matches an SQL-like pattern.
+
+ Arguments:
+ - pattern: The pattern to match `self` against. If it is a column, the
+ operation is performed pairwise between corresponding elements of
+ `self` and that column. The pattern is an SQL-like pattern, where
+ `%` matches any sequence of characters and `_` matches any single
+ character.
+
+ > Example
+ Check if elements of a column start with 'F' and end with a dot.
+
+ import Standard.Examples
+
+ example_contains = Examples.text_column_1.like "F%."
like : Column | Text -> Column
- like self other = self.make_binary_op "LIKE" other new_type=SQL_Type.boolean
+ like self pattern = self.make_binary_op "LIKE" pattern new_type=SQL_Type.boolean
+
+ ## Checks for each element of the column if it is contained within the
+ provided vector.
+
+ Arguments:
+ - vector: A vector of elements. The resulting column will contain true at
+ the positions where the corresponding element of `self` is contained
+ in `vector`.
+
+ > Example
+ Check if elements of a column are contained in a provided vector.
+
+ import Standard.Examples
+
+ example_contains = Examples.text_column_1.is_in [1, 2, 5]
+ is_in self vector =
+ ## This is slightly hacky - we don't provide operand types as we want to
+ allow any type to get through and currently we do not have a mapping
+ from Enso types to SQL types (it may be available in the future). So
+ we just rely on Nothing resolving to the current column type. That
+ type may not always match the operands, but the current
+ implementation uses this type only for two purposes: generated SQL
+ visualization (so the color will be consistent with the column type
+ and not the value type - that can be confusing, we probably want to
+ fix it later) and setting up the query - but at the set up this only
+ applies to adding nulls - setting any other object does not check the
+ type at this level anyway.
+ partitioned = vector.partition .is_nothing
+ nulls = partitioned.first
+ non_nulls = partitioned.second
+ ## Since SQL `NULL IN (NULL)` yields `NULL`, we need to handle this case
+ separately. So we handle all non-null values using `IS_IN` and then
+ `OR` that with a null check (if the vector contained any nulls to
+ begin with). The implementation also ensures that even
+ `NULL IN (...)` is coalesced to False, so that negation works as
+ expected.
+ is_in_not_null = self.make_op "IS_IN" operands=non_nulls new_type=SQL_Type.boolean
+ case nulls.not_empty of
+ True -> is_in_not_null || self.is_missing
+ False -> is_in_not_null
## PRIVATE
as_internal : Internal_Column
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso
index 8405041367..04d2876f37 100644
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso
@@ -11,7 +11,7 @@ import project.Internal.IR.Nulls_Order.Nulls_Order
import project.Internal.IR.Query.Query
from project.Data.SQL import code
-from project.Errors import Unsupported_Database_Operation_Error
+from project.Errors import Unsupported_Database_Operation_Error_Data
type Internal_Dialect
@@ -169,14 +169,15 @@ base_dialect =
unary = name -> [name, make_unary_op name]
fun = name -> [name, make_function name]
- arith = [bin "+", bin "-", bin "*", bin "/"]
+ arith = [bin "+", bin "-", bin "*", bin "/", bin "%"]
logic = [bin "AND", bin "OR", unary "NOT"]
compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">=", ["BETWEEN", make_between]]
agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"]
counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]]
- text = [["ISEMPTY", make_is_empty], bin "LIKE"]
- nulls = [["ISNULL", make_right_unary_op "IS NULL"], ["FILLNULL", make_function "COALESCE"]]
- base_map = Map.from_vector (arith + logic + compare + agg + counts + text + nulls)
+ text = [["IS_EMPTY", make_is_empty], bin "LIKE"]
+ nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]]
+ contains = [["IS_IN", make_is_in]]
+ base_map = Map.from_vector (arith + logic + compare + agg + counts + text + nulls + contains)
Internal_Dialect.Value base_map wrap_in_quotes
## PRIVATE
@@ -188,7 +189,7 @@ make_is_empty arguments = case arguments.length of
is_empty = (arg ++ " = ''").paren
(is_null ++ " OR " ++ is_empty).paren
_ ->
- Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation ISEMPTY")
+ Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation IS_EMPTY")
## PRIVATE
make_between : Vector Builder -> Builder
@@ -201,6 +202,21 @@ make_between arguments = case arguments.length of
_ ->
Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation BETWEEN")
+## PRIVATE
+make_is_in : Vector Builder -> Builder
+make_is_in arguments = case arguments.length of
+ 0 -> Error.throw <| Illegal_State_Error_Data ("The operation IS_IN requires at least one argument.")
+ ## If only the self argument is provided, no value will ever be in the empty list, so we just short circuit to false.
+ `IN ()` would be more meaningful, but it is a syntax error.
+ 1 -> code '2=1' . paren
+ _ ->
+ expr = arguments.first
+ list = arguments.tail
+ is_in = expr ++ " IN (" ++ (SQL.join ", " list) ++ ")"
+ ## We ensure that even `NULL IN (...)` is coalesced to False, so that
+ negation will work as expected.
+ code "COALESCE(" ++ is_in ++ ", 2=1)"
+
## PRIVATE
Builds code for an expression.
@@ -214,7 +230,7 @@ generate_expression dialect expr = case expr of
dialect.wrap_identifier origin ++ '.' ++ dialect.wrap_identifier name
Expression.Constant sql_type value -> SQL.interpolation sql_type value
Expression.Operation kind arguments ->
- op = dialect.operation_map.get_or_else kind (Error.throw <| Unsupported_Database_Operation_Error kind)
+ op = dialect.operation_map.get_or_else kind (Error.throw <| Unsupported_Database_Operation_Error_Data kind)
parsed_args = arguments.map (generate_expression dialect)
op parsed_args
_ : Order_Descriptor -> generate_order dialect expr
@@ -337,7 +353,7 @@ generate_query dialect query = case query of
code "SELECT * " ++ generate_select_context dialect ctx
Query.Insert table_name pairs ->
generate_insert_query dialect table_name pairs
- _ -> Error.throw <| Unsupported_Database_Operation_Error "Unsupported query type."
+ _ -> Error.throw <| Unsupported_Database_Operation_Error_Data "Unsupported query type."
## PRIVATE
Arguments:
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso
index 5b95026064..719b15d4c8 100644
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso
@@ -13,7 +13,7 @@ import project.Internal.IR.Order_Descriptor.Order_Descriptor
import project.Internal.IR.Nulls_Order.Nulls_Order
from project.Data.SQL import code
-from project.Errors import Unsupported_Database_Operation_Error
+from project.Errors import Unsupported_Database_Operation_Error_Data
## PRIVATE
@@ -232,7 +232,7 @@ make_order_descriptor internal_column sort_direction text_ordering =
case internal_column.sql_type.is_likely_text of
True ->
## In the future we can modify this error to suggest using a custom defined collation.
- if text_ordering.sort_digits_as_numbers then Error.throw (Unsupported_Database_Operation_Error "Natural ordering is currently not supported. You may need to materialize the Table to perform this operation.") else
+ if text_ordering.sort_digits_as_numbers then Error.throw (Unsupported_Database_Operation_Error_Data "Natural ordering is currently not supported. You may need to materialize the Table to perform this operation.") else
case text_ordering.case_sensitivity of
Nothing ->
Order_Descriptor.Value internal_column.expression sort_direction nulls_order=nulls collation=Nothing
@@ -240,7 +240,7 @@ make_order_descriptor internal_column sort_direction text_ordering =
Order_Descriptor.Value internal_column.expression sort_direction nulls_order=nulls collation="ucs_basic"
Case_Sensitivity.Insensitive locale -> case locale == Locale.default of
False ->
- Error.throw (Unsupported_Database_Operation_Error "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.")
+ Error.throw (Unsupported_Database_Operation_Error_Data "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.")
True ->
upper = Expression.Operation "UPPER" [internal_column.expression]
folded_expression = Expression.Operation "LOWER" [upper]
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso
index 22a30d1a35..ed46acc275 100644
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso
@@ -374,6 +374,32 @@ type Column
/ : Column | Any -> Column
/ self other = run_vectorized_binary_op self '/' (/) other
+ ## Element-wise modulus.
+
+ Arguments:
+ - other: The value to modulo `self` against. If `other` is a column, the
+ modulus is performed pairwise between corresponding elements of `self`
+ and `other`.
+
+ Returns a column with results of modulus this column's elements against
+ `other`.
+
+ > Example
+ Modulus of two columns against each other.
+
+ import Standard.Examples
+
+ example_mod = Examples.integer_column % Examples.decimal_column
+
+ > Example
+ Modulus of a column with a number.
+
+ import Standard.Examples
+
+ example_mod = Examples.integer_column % 3
+ % : Column | Any -> Column
+ % self other = run_vectorized_binary_op self '%' (%) other
+
## ALIAS AND Columns
Element-wise boolean conjunction.
@@ -585,11 +611,41 @@ type Column
contains self other =
run_vectorized_binary_op self "contains" (a -> b -> a.contains b) other
- ## PRIVATE
- Checks for each element of the column if it matches an SQL-like pattern.
+ ## Checks for each element of the column if it matches an SQL-like pattern.
+
+ Arguments:
+ - pattern: The pattern to match `self` against. If it is a column, the
+ operation is performed pairwise between corresponding elements of
+ `self` and that column. The pattern is an SQL-like pattern, where
+ `%` matches any sequence of characters and `_` matches any single
+ character.
+
+ > Example
+ Check if elements of a column start with 'F' and end with a dot.
+
+ import Standard.Examples
+
+ example_contains = Examples.text_column_1.like "F%."
like : Column | Text -> Column
- like self other =
- run_vectorized_binary_op self "like" (_ -> _ -> Error.throw (Illegal_State_Error "The `Like` operation should only be used on Text columns.")) other
+ like self pattern =
+ run_vectorized_binary_op self "like" (_ -> _ -> Error.throw (Illegal_State_Error "The `Like` operation should only be used on Text columns.")) pattern
+
+ ## Checks for each element of the column if it is contained within the
+ provided vector.
+
+ Arguments:
+ - vector: A vector of elements. The resulting column will contain true at
+ the positions where the corresponding element of `self` is contained
+ in `vector`.
+
+ > Example
+ Check if elements of a column are contained in a provided vector.
+
+ import Standard.Examples
+
+ example_contains = Examples.text_column_1.is_in [1, 2, 5]
+ is_in self vector =
+ run_vectorized_binary_op self "is_in" (elem -> vector -> vector.contains elem) vector skip_nulls=False
## ALIAS Transform Column
@@ -1137,18 +1193,22 @@ type Empty_Error
- name: The name of the vectorized operation.
- fallback_fn: A function used if the vectorized operation isn't available.
- operand: The operand to apply to the function after `column`.
+ - skip_nulls: Specifies if nulls should be skipped. If set to `True`, a null
+ value results in null without passing it to the function. If set to
+ `False`, the null values are passed as any other value and can have custom
+ handling logic.
run_vectorized_binary_op : Column -> Text -> (Any -> Any) -> Any -> Column
-run_vectorized_binary_op column name fallback_fn operand = case operand of
+run_vectorized_binary_op column name fallback_fn operand skip_nulls=True = case operand of
Column.Column_Data col2 ->
s1 = column.java_column.getStorage
ix = column.java_column.getIndex
s2 = col2.getStorage
- rs = s1.zip name fallback_fn s2 True
+ rs = s1.zip name fallback_fn s2 skip_nulls
Column.Column_Data (Java_Column.new "Result" ix rs)
_ ->
s1 = column.java_column.getStorage
ix = column.java_column.getIndex
- rs = s1.bimap name fallback_fn operand
+ rs = s1.bimap name fallback_fn operand skip_nulls
Column.Column_Data (Java_Column.new "Result" ix rs)
## PRIVATE
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
index 4abffe477a..6ca21a5698 100644
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
@@ -7,6 +7,7 @@ import Standard.Base.Data.Text.Case
import Standard.Base.System.Platform
import project.Data.Column.Column
+from project.Data.Column import get_item_string
import project.Data.Column_Name_Mapping.Column_Name_Mapping
import project.Data.Column_Selector.Column_Selector
import project.Data.Data_Formatter.Data_Formatter
diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso
index 24deeea8c7..21c376b550 100644
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso
@@ -56,6 +56,9 @@ make_filter_column source_column filter_condition = case filter_condition of
Value_Type.expect_text source_column.value_type <|
expect_column_or_value_as_text "pattern" pattern <|
source_column.like pattern . not
+ # Vector
+ Is_In values -> source_column.is_in values
+ Not_In values -> source_column.is_in values . not
## PRIVATE
expect_column_or_value_as_text field_name column_or_value ~action = case column_or_value of
diff --git a/std-bits/base/src/main/java/org/enso/base/polyglot/NumericConverter.java b/std-bits/base/src/main/java/org/enso/base/polyglot/NumericConverter.java
new file mode 100644
index 0000000000..d1af2066aa
--- /dev/null
+++ b/std-bits/base/src/main/java/org/enso/base/polyglot/NumericConverter.java
@@ -0,0 +1,108 @@
+package org.enso.base.polyglot;
+
+import java.math.BigDecimal;
+
+/**
+ * The numeric converter deals with conversions of Java numeric types to the two main types
+ * supported by Enso - Long for integers and Double for decimals. Any other types are coerced to one
+ * of these types.
+ *
+ *
It provides two concepts - coercion - which allows to coerce an integer type to a decimal, but
+ * will not convert a decimal to an integer even if it has 0 fractional part. Then there is
+ * conversion which allows to convert a decimal with 0 fractional part to an integer. Conversion
+ * should be used when we care about the original type of the object (i.e. we want any decimals to
+ * require decimal storage even if they have 0 fractional part). Conversion is to be used when we
+ * want to be consistent with Enso's equality semantics where 2 == 2.0.
+ */
+public class NumericConverter {
+ /**
+ * Coerces a number (possibly an integer) to a Double.
+ *
+ *
Will throw an exception if the object is not a number.
+ */
+ public static double coerceToDouble(Object o) {
+ return switch (o) {
+ case Double x -> x;
+ case BigDecimal x -> x.doubleValue();
+ case Float x -> x.doubleValue();
+ default -> (double) coerceToLong(o);
+ };
+ }
+
+ /**
+ * Coerces a number to an Integer.
+ *
+ *
Will throw an exception if the object is not an integer.
+ *
+ *
Decimal values are not accepted.
+ */
+ public static long coerceToLong(Object o) {
+ return switch (o) {
+ case Long x -> x;
+ case Integer x -> x.longValue();
+ case Short x -> x.longValue();
+ case Byte x -> x.longValue();
+ default -> throw new UnsupportedOperationException();
+ };
+ }
+
+ /** Returns true if the object is any supported number. */
+ public static boolean isCoercibleToDouble(Object o) {
+ return o instanceof Double
+ || o instanceof BigDecimal
+ || o instanceof Float
+ || isCoercibleToLong(o);
+ }
+
+ /**
+ * Returns true if the object is any supported integer.
+ *
+ *
Returns false for decimals with 0 fractional part - the type itself must be an integer type.
+ */
+ public static boolean isCoercibleToLong(Object o) {
+ return o instanceof Long || o instanceof Integer || o instanceof Short || o instanceof Byte;
+ }
+
+ /**
+ * Tries converting the value to a Double.
+ *
+ *
It will return null if the object represented a non-numeric value.
+ */
+ public static Double tryConvertingToDouble(Object o) {
+ return switch (o) {
+ case Double x -> x;
+ case BigDecimal x -> x.doubleValue();
+ case Float x -> x.doubleValue();
+ case Long x -> x.doubleValue();
+ case Integer x -> x.doubleValue();
+ case Short x -> x.doubleValue();
+ case Byte x -> x.doubleValue();
+ case null, default -> null;
+ };
+ }
+
+ /**
+ * Tries converting the value to a Long.
+ *
+ *
Decimal number types are accepted, only if their fractional part is 0. It will return null
+ * if the object represented a non-integer value.
+ */
+ public static Long tryConvertingToLong(Object o) {
+ return switch (o) {
+ case Long x -> x;
+ case Integer x -> x.longValue();
+ case Short x -> x.longValue();
+ case Byte x -> x.longValue();
+ case Double x -> x % 1.0 == 0.0 ? x.longValue() : null;
+ case Float x -> x % 1.0f == 0.0f ? x.longValue() : null;
+ case BigDecimal x -> {
+ try {
+ yield x.longValueExact();
+ } catch (ArithmeticException e) {
+ yield null;
+ }
+ }
+ case null, default -> null;
+ };
+ }
+}
diff --git a/std-bits/base/src/main/java/org/enso/base/Polyglot_Utils.java b/std-bits/base/src/main/java/org/enso/base/polyglot/Polyglot_Utils.java
similarity index 56%
rename from std-bits/base/src/main/java/org/enso/base/Polyglot_Utils.java
rename to std-bits/base/src/main/java/org/enso/base/polyglot/Polyglot_Utils.java
index 23abd642bd..14d0592ce7 100644
--- a/std-bits/base/src/main/java/org/enso/base/Polyglot_Utils.java
+++ b/std-bits/base/src/main/java/org/enso/base/polyglot/Polyglot_Utils.java
@@ -1,11 +1,14 @@
-package org.enso.base;
-
-import org.graalvm.polyglot.Value;
+package org.enso.base.polyglot;
import java.time.LocalDate;
import java.time.LocalDateTime;
+import org.graalvm.polyglot.Value;
public class Polyglot_Utils {
+ /**
+ * Converts a polyglot Value ensuring that various date/time types are converted to the correct
+ * type.
+ */
public static Object convertPolyglotValue(Value item) {
if (item.isDate()) {
LocalDate d = item.asDate();
@@ -26,12 +29,14 @@ public class Polyglot_Utils {
return item.as(Object.class);
}
- /** A helper functions for situations where we cannot use the Value conversion directly.
- *
- * Mostly happens due to the issue: https://github.com/oracle/graal/issues/4967
- * Once that issue is resolved, we should probably remove this helper.
- *
- * In that case we take a generic Object, knowing that the values of interest to us will be passed as Value anyway - so we can check that and fire the conversion if needed.
+ /**
+ * A helper functions for situations where we cannot use the Value conversion directly.
+ *
+ *
Mostly happens due to the issue: https://github.com/oracle/graal/issues/4967 Once that issue
+ * is resolved, we should probably remove this helper.
+ *
+ *
In that case we take a generic Object, knowing that the values of interest to us will be
+ * passed as Value anyway - so we can check that and fire the conversion if needed.
*/
public static Object convertPolyglotValue(Object item) {
if (item instanceof Value v) {
diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java
index 82ca979e37..6b6ddf25a9 100644
--- a/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java
+++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java
@@ -8,7 +8,7 @@ import org.enso.table.data.table.problems.UnquotedDelimiter;
import java.util.List;
public class Concatenate extends Aggregator {
- private final Storage storage;
+ private final Storage> storage;
private final String separator;
private final String prefix;
private final String suffix;
diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/CountDistinct.java b/std-bits/table/src/main/java/org/enso/table/aggregations/CountDistinct.java
index 43bd6d9855..6a6b7351c2 100644
--- a/std-bits/table/src/main/java/org/enso/table/aggregations/CountDistinct.java
+++ b/std-bits/table/src/main/java/org/enso/table/aggregations/CountDistinct.java
@@ -15,7 +15,7 @@ import java.util.List;
* does count when all items are null.
*/
public class CountDistinct extends Aggregator {
- private final Storage[] storage;
+ private final Storage>[] storage;
private final Comparator objectComparator;
private final boolean ignoreAllNull;
diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/CountEmpty.java b/std-bits/table/src/main/java/org/enso/table/aggregations/CountEmpty.java
index 9af37f14a9..79059bc594 100644
--- a/std-bits/table/src/main/java/org/enso/table/aggregations/CountEmpty.java
+++ b/std-bits/table/src/main/java/org/enso/table/aggregations/CountEmpty.java
@@ -11,7 +11,7 @@ import java.util.List;
* counts null or empty entries. If `isEmpty` is false, counts non-empty entries.
*/
public class CountEmpty extends Aggregator {
- private final Storage storage;
+ private final Storage> storage;
private final boolean isEmpty;
/**
diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/CountNothing.java b/std-bits/table/src/main/java/org/enso/table/aggregations/CountNothing.java
index 3995f837e3..7fff60818b 100644
--- a/std-bits/table/src/main/java/org/enso/table/aggregations/CountNothing.java
+++ b/std-bits/table/src/main/java/org/enso/table/aggregations/CountNothing.java
@@ -10,7 +10,7 @@ import java.util.List;
* counts null entries. If `isNothing` is false, counts non-null entries.
*/
public class CountNothing extends Aggregator {
- private final Storage storage;
+ private final Storage> storage;
private final boolean isNothing;
/**
diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/First.java b/std-bits/table/src/main/java/org/enso/table/aggregations/First.java
index dcb6db4660..7e564545b0 100644
--- a/std-bits/table/src/main/java/org/enso/table/aggregations/First.java
+++ b/std-bits/table/src/main/java/org/enso/table/aggregations/First.java
@@ -10,8 +10,8 @@ import java.util.List;
/** Aggregate Column finding the first value in a group. */
public class First extends Aggregator {
- private final Storage storage;
- private final Storage[] orderByColumns;
+ private final Storage> storage;
+ private final Storage>[] orderByColumns;
private final int[] orderByDirections;
private final Comparator objectComparator;
private final boolean ignoreNothing;
diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/GroupBy.java b/std-bits/table/src/main/java/org/enso/table/aggregations/GroupBy.java
index a9342d7766..1e1e05ff37 100644
--- a/std-bits/table/src/main/java/org/enso/table/aggregations/GroupBy.java
+++ b/std-bits/table/src/main/java/org/enso/table/aggregations/GroupBy.java
@@ -7,7 +7,7 @@ import java.util.List;
/** Aggregate Column getting the grouping key. */
public class GroupBy extends Aggregator {
- private final Storage storage;
+ private final Storage> storage;
public GroupBy(String name, Column column) {
super(name, column.getStorage().getType());
diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Last.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Last.java
index 02843e4a39..d259c3fa5d 100644
--- a/std-bits/table/src/main/java/org/enso/table/aggregations/Last.java
+++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Last.java
@@ -9,8 +9,8 @@ import java.util.Comparator;
import java.util.List;
public class Last extends Aggregator {
- private final Storage storage;
- private final Storage[] orderByColumns;
+ private final Storage> storage;
+ private final Storage>[] orderByColumns;
private final int[] orderByDirections;
private final Comparator objectComparator;
private final boolean ignoreNothing;
diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java
index 8f5052a5e2..bf101a0dd7 100644
--- a/std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java
+++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java
@@ -18,7 +18,7 @@ public class Mean extends Aggregator {
}
}
- private final Storage storage;
+ private final Storage> storage;
public Mean(String name, Column column) {
super(name, Storage.Type.DOUBLE);
diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/MinOrMax.java b/std-bits/table/src/main/java/org/enso/table/aggregations/MinOrMax.java
index 761c7b8219..2b99a7803f 100644
--- a/std-bits/table/src/main/java/org/enso/table/aggregations/MinOrMax.java
+++ b/std-bits/table/src/main/java/org/enso/table/aggregations/MinOrMax.java
@@ -11,7 +11,7 @@ import java.util.List;
* Aggregate Column finding the minimum (minOrMax = -1) or maximum (minOrMax = 1) entry in a group.
*/
public class MinOrMax extends Aggregator {
- private final Storage storage;
+ private final Storage> storage;
private final int minOrMax;
private final Comparator objectComparator;
diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Mode.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Mode.java
index 6414b567db..78725e689d 100644
--- a/std-bits/table/src/main/java/org/enso/table/aggregations/Mode.java
+++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Mode.java
@@ -10,7 +10,7 @@ import java.util.Map;
/** Aggregate Column computing the most common value in a group (ignoring Nothing). */
public class Mode extends Aggregator {
- private final Storage storage;
+ private final Storage> storage;
public Mode(String name, Column column) {
super(name, column.getStorage().getType());
diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java
index c8fb79341a..2e6a75ac80 100644
--- a/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java
+++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java
@@ -11,7 +11,7 @@ import java.util.TreeMap;
/** Aggregate Column computing a percentile value in a group. */
public class Percentile extends Aggregator {
- private final Storage storage;
+ private final Storage> storage;
private final double percentile;
public Percentile(String name, Column column, double percentile) {
diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java b/std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java
index 8830d5a3f5..324400334e 100644
--- a/std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java
+++ b/std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java
@@ -9,7 +9,7 @@ import java.util.List;
/** Aggregate Column finding the longest or shortest string in a group. */
public class ShortestOrLongest extends Aggregator {
- private final Storage storage;
+ private final Storage> storage;
private final int minOrMax;
public ShortestOrLongest(String name, Column column, int minOrMax) {
diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java b/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java
index 041815aab5..07543ded5f 100644
--- a/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java
+++ b/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java
@@ -20,7 +20,7 @@ public class StandardDeviation extends Aggregator {
}
}
- private final Storage storage;
+ private final Storage> storage;
private final boolean population;
public StandardDeviation(String name, Column column, boolean population) {
diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java
index 1a9f5941a5..0a068944a8 100644
--- a/std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java
+++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java
@@ -8,7 +8,7 @@ import java.util.List;
/** Aggregate Column computing the total value in a group. */
public class Sum extends Aggregator {
- private final Storage storage;
+ private final Storage> storage;
public Sum(String name, Column column) {
super(name, Storage.Type.DOUBLE);
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java
index 01ab8825c5..bf4fd8c37f 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java
@@ -62,7 +62,7 @@ public class BoolBuilder extends TypedBuilder {
}
@Override
- public Storage seal() {
+ public Storage seal() {
return new BoolStorage(vals, isNa, size, false);
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java
index 847a63da59..b22af959ac 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java
@@ -36,5 +36,5 @@ public abstract class Builder {
public abstract int getCurrentSize();
/** @return a storage containing all the items appended so far */
- public abstract Storage seal();
+ public abstract Storage> seal();
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateBuilder.java
index a7bc719be8..ba07de9e2b 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateBuilder.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateBuilder.java
@@ -32,7 +32,7 @@ public class DateBuilder extends TypedBuilderImpl {
}
@Override
- public Storage seal() {
+ public Storage seal() {
return new DateStorage(data, currentSize);
}
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateTimeBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateTimeBuilder.java
index 55a2d1198c..5eb6ec5193 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateTimeBuilder.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateTimeBuilder.java
@@ -32,7 +32,7 @@ public class DateTimeBuilder extends TypedBuilderImpl {
}
@Override
- public Storage seal() {
+ public Storage seal() {
return new DateTimeStorage(data, currentSize);
}
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java
index 68b3bce030..1d4f1c3376 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java
@@ -1,5 +1,6 @@
package org.enso.table.data.column.builder.object;
+import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.storage.Storage;
import java.math.BigDecimal;
@@ -81,10 +82,10 @@ public class InferredBuilder extends Builder {
int initialCapacity = Math.max(initialSize, currentSize);
if (o instanceof Boolean) {
currentBuilder = new BoolBuilder();
- } else if (o instanceof Double || o instanceof BigDecimal) {
- currentBuilder = NumericBuilder.createDoubleBuilder(initialCapacity);
- } else if (o instanceof Long) {
+ } else if (NumericConverter.isCoercibleToLong(o)) {
currentBuilder = NumericBuilder.createLongBuilder(initialCapacity);
+ } else if (NumericConverter.isCoercibleToDouble(o)) {
+ currentBuilder = NumericBuilder.createDoubleBuilder(initialCapacity);
} else if (o instanceof LocalDate) {
currentBuilder = new DateBuilder(initialCapacity);
} else if (o instanceof LocalTime) {
@@ -106,11 +107,15 @@ public class InferredBuilder extends Builder {
new RetypeInfo(Boolean.class, Storage.Type.BOOL),
new RetypeInfo(Long.class, Storage.Type.LONG),
new RetypeInfo(Double.class, Storage.Type.DOUBLE),
+ new RetypeInfo(String.class, Storage.Type.STRING),
new RetypeInfo(BigDecimal.class, Storage.Type.DOUBLE),
new RetypeInfo(LocalDate.class, Storage.Type.DATE),
new RetypeInfo(LocalTime.class, Storage.Type.TIME_OF_DAY),
new RetypeInfo(ZonedDateTime.class, Storage.Type.DATE_TIME),
- new RetypeInfo(String.class, Storage.Type.STRING));
+ new RetypeInfo(Float.class, Storage.Type.DOUBLE),
+ new RetypeInfo(Integer.class, Storage.Type.LONG),
+ new RetypeInfo(Short.class, Storage.Type.LONG),
+ new RetypeInfo(Byte.class, Storage.Type.LONG));
private void retypeAndAppend(Object o) {
for (RetypeInfo info : retypePairs) {
@@ -138,7 +143,7 @@ public class InferredBuilder extends Builder {
}
@Override
- public Storage seal() {
+ public Storage> seal() {
if (currentBuilder == null) {
initBuilderFor(null);
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java
index ea9d01ac1c..065a067a65 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java
@@ -1,10 +1,10 @@
package org.enso.table.data.column.builder.object;
+import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.storage.DoubleStorage;
import org.enso.table.data.column.storage.LongStorage;
import org.enso.table.data.column.storage.Storage;
-import java.math.BigDecimal;
import java.util.Arrays;
import java.util.BitSet;
@@ -69,37 +69,20 @@ public class NumericBuilder extends TypedBuilder {
if (o == null) {
isMissing.set(currentSize++);
} else if (isDouble) {
- double value = toDouble(o);
+ double value = NumericConverter.coerceToDouble(o);
data[currentSize++] = Double.doubleToRawLongBits(value);
} else {
- data[currentSize++] = toLong(o);
+ data[currentSize++] = NumericConverter.coerceToLong(o);
}
}
@Override
public boolean accepts(Object o) {
- if (isDouble && (o instanceof Double || o instanceof BigDecimal)) {
- return true;
+ if (isDouble) {
+ return NumericConverter.isCoercibleToDouble(o);
+ } else {
+ return NumericConverter.isCoercibleToLong(o);
}
-
- return o instanceof Long || o instanceof Integer || o instanceof Byte;
- }
-
- private static double toDouble(Object o) {
- return switch (o) {
- case Double x -> x;
- case BigDecimal x -> x.doubleValue();
- default -> (double) toLong(o);
- };
- }
-
- private static long toLong(Object o) {
- return switch (o) {
- case Long x -> x;
- case Integer x -> x.longValue();
- case Byte x -> x.longValue();
- default -> throw new UnsupportedOperationException();
- };
}
@Override
@@ -159,7 +142,7 @@ public class NumericBuilder extends TypedBuilder {
}
@Override
- public Storage seal() {
+ public Storage> seal() {
if (isDouble) {
return new DoubleStorage(data, currentSize, isMissing);
} else {
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java
index 5a90814168..32eaa4b438 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java
@@ -67,7 +67,7 @@ public class ObjectBuilder extends TypedBuilder {
}
@Override
- public Storage seal() {
+ public Storage seal() {
return new ObjectStorage(data, currentSize);
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java
index fa2b4752c6..e359f922ef 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java
@@ -30,7 +30,7 @@ public class StringBuilder extends TypedBuilderImpl {
}
@Override
- public Storage seal() {
+ public Storage seal() {
return new StringStorage(data, currentSize);
}
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/TimeOfDayBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/TimeOfDayBuilder.java
index e0c6731511..f3e847eacb 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/TimeOfDayBuilder.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/TimeOfDayBuilder.java
@@ -32,7 +32,7 @@ public class TimeOfDayBuilder extends TypedBuilderImpl {
}
@Override
- public Storage seal() {
+ public Storage seal() {
return new TimeOfDayStorage(data, currentSize);
}
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/PrimInferredStorageBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/PrimInferredStorageBuilder.java
index ab0c2714a0..51088c1073 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/PrimInferredStorageBuilder.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/PrimInferredStorageBuilder.java
@@ -109,7 +109,7 @@ public class PrimInferredStorageBuilder extends StorageBuilder {
/** @inheritDoc */
@Override
- public Storage seal() {
+ public Storage> seal() {
if (type == Type.LONG) {
return new LongStorage(data, size, isMissing);
} else {
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StorageBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StorageBuilder.java
index 09fa48ba86..54175a9c1c 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StorageBuilder.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StorageBuilder.java
@@ -19,5 +19,5 @@ public abstract class StorageBuilder {
*
* @return the storage resulting from this builder's operation.
*/
- public abstract Storage seal();
+ public abstract Storage> seal();
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StringStorageBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StringStorageBuilder.java
index 6ca0eca04e..2f072e68f5 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StringStorageBuilder.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StringStorageBuilder.java
@@ -1,5 +1,6 @@
package org.enso.table.data.column.builder.string;
+import org.enso.table.data.column.storage.Storage;
import org.enso.table.data.column.storage.StringStorage;
/** A column builder appending all the values passed to it in an unchanged form. */
@@ -44,7 +45,7 @@ public class StringStorageBuilder extends StorageBuilder {
/** @inheritDoc */
@Override
- public StringStorage seal() {
+ public Storage seal() {
return new StringStorage(data, size);
}
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/Aggregator.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/Aggregator.java
index 748d15724b..86d817f27c 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/Aggregator.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/Aggregator.java
@@ -24,5 +24,5 @@ public abstract class Aggregator {
*
* @return the storage containing all aggregation results.
*/
- public abstract Storage seal();
+ public abstract Storage> seal();
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/CountAggregator.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/CountAggregator.java
index 044f6e0ef6..391dbac5a2 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/CountAggregator.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/CountAggregator.java
@@ -7,7 +7,7 @@ import java.util.stream.IntStream;
/** Aggregates a storage by counting the non-missing values in each group. */
public class CountAggregator extends Aggregator {
- private final Storage storage;
+ private final Storage> storage;
private final long[] counts;
private int position = 0;
@@ -16,7 +16,7 @@ public class CountAggregator extends Aggregator {
* @param resultSize the exact number of times {@link Aggregator#nextGroup(IntStream)} will be
* called.
*/
- public CountAggregator(Storage storage, int resultSize) {
+ public CountAggregator(Storage> storage, int resultSize) {
this.storage = storage;
this.counts = new long[resultSize];
}
@@ -27,7 +27,7 @@ public class CountAggregator extends Aggregator {
}
@Override
- public Storage seal() {
+ public Storage seal() {
return new LongStorage(counts);
}
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java
index 048bdeba73..411f0edaa6 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java
@@ -1,6 +1,6 @@
package org.enso.table.data.column.operation.aggregate;
-import org.enso.base.Polyglot_Utils;
+import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.storage.Storage;
import org.graalvm.polyglot.Value;
@@ -16,7 +16,7 @@ import java.util.stream.Stream;
public class FunctionAggregator extends Aggregator {
private final Function, Value> aggregateFunction;
private final boolean skipNa;
- private final Storage storage;
+ private final Storage> storage;
private final InferredBuilder builder;
/**
@@ -27,7 +27,7 @@ public class FunctionAggregator extends Aggregator {
*/
public FunctionAggregator(
Function, Value> aggregateFunction,
- Storage storage,
+ Storage> storage,
boolean skipNa,
int resultSize) {
this.aggregateFunction = aggregateFunction;
@@ -53,7 +53,7 @@ public class FunctionAggregator extends Aggregator {
}
@Override
- public Storage seal() {
+ public Storage> seal() {
return builder.seal();
}
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/LongToLongAggregator.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/LongToLongAggregator.java
index 9cbbef8491..ea89191747 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/LongToLongAggregator.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/LongToLongAggregator.java
@@ -53,7 +53,7 @@ public abstract class LongToLongAggregator extends Aggregator {
}
@Override
- public Storage seal() {
+ public Storage seal() {
return new LongStorage(items, items.length, missing);
}
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/NumericAggregator.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/NumericAggregator.java
index e7831a3b3b..ed8bec7c01 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/NumericAggregator.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/NumericAggregator.java
@@ -15,7 +15,7 @@ import java.util.stream.IntStream;
* DoubleStorage}.
*/
public abstract class NumericAggregator extends Aggregator {
- private final NumericStorage storage;
+ private final NumericStorage> storage;
private final long[] data;
private final BitSet missing;
private int position = 0;
@@ -24,7 +24,7 @@ public abstract class NumericAggregator extends Aggregator {
* @param storage the data source
* @param resultSize the number of times {@link Aggregator#nextGroup(IntStream)} will be called
*/
- public NumericAggregator(NumericStorage storage, int resultSize) {
+ public NumericAggregator(NumericStorage> storage, int resultSize) {
this.storage = storage;
this.data = new long[resultSize];
this.missing = new BitSet();
@@ -72,7 +72,7 @@ public abstract class NumericAggregator extends Aggregator {
}
@Override
- public Storage seal() {
+ public Storage seal() {
return new DoubleStorage(data, data.length, missing);
}
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOpStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOpStorage.java
index 7e78cbc10e..25448ce312 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOpStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOpStorage.java
@@ -8,12 +8,13 @@ import java.util.Map;
/**
* Stores map-like operations that can be performed on a given type.
*
- * @param the storage type handled by these operations.
+ * @param the type of elements stored in the storage
+ * @param the storage type handled by these operations.
*/
-public class MapOpStorage {
- private final Map> ops = new HashMap<>();
+public class MapOpStorage> {
+ private final Map> ops = new HashMap<>();
- protected MapOperation super T> getOp(String name) {
+ protected MapOperation super T, ? super S> getOp(String name) {
return ops.get(name);
}
@@ -36,7 +37,7 @@ public class MapOpStorage {
* @param arg the argument to pass to the operation
* @return the result of running the operation
*/
- public Storage runMap(String n, T storage, Object arg) {
+ public Storage> runMap(String n, S storage, Object arg) {
return ops.get(n).runMap(storage, arg);
}
@@ -49,7 +50,7 @@ public class MapOpStorage {
* @param arg the storage containing operation arguments
* @return the result of running the operation
*/
- public Storage runZip(String n, T storage, Storage arg) {
+ public Storage> runZip(String n, S storage, Storage> arg) {
return ops.get(n).runZip(storage, arg);
}
@@ -59,7 +60,7 @@ public class MapOpStorage {
* @param op the operation to add
* @return this operation set
*/
- public MapOpStorage add(MapOperation op) {
+ public MapOpStorage add(MapOperation op) {
ops.put(op.getName(), op);
return this;
}
@@ -68,23 +69,23 @@ public class MapOpStorage {
* Creates a child set, containing all the operations defined in this, that can be extended
* independently.
*
- * @param the desired result type
+ * @param the desired result type
* @return a child of this storage
*/
- public MapOpStorage makeChild() {
+ public MapOpStorage makeChild() {
return new ChildStorage<>(this);
}
- private static class ChildStorage extends MapOpStorage {
- private final MapOpStorage super T> parent;
+ private static class ChildStorage> extends MapOpStorage {
+ private final MapOpStorage super T, ? super S> parent;
- private ChildStorage(MapOpStorage super T> parent) {
+ private ChildStorage(MapOpStorage super T, ? super S> parent) {
this.parent = parent;
}
@Override
- protected MapOperation super T> getOp(String name) {
- MapOperation super T> local = super.getOp(name);
+ protected MapOperation super T, ? super S> getOp(String name) {
+ MapOperation super T, ? super S> local = super.getOp(name);
if (local == null) return parent.getOp(name);
return local;
}
@@ -95,12 +96,12 @@ public class MapOpStorage {
}
@Override
- public Storage runMap(String n, T storage, Object arg) {
+ public Storage> runMap(String n, S storage, Object arg) {
return getOp(n).runMap(storage, arg);
}
@Override
- public Storage runZip(String n, T storage, Storage arg) {
+ public Storage> runZip(String n, S storage, Storage> arg) {
return getOp(n).runZip(storage, arg);
}
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOperation.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOperation.java
index 6a42c0005a..67398d1858 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOperation.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOperation.java
@@ -7,7 +7,7 @@ import org.enso.table.data.column.storage.Storage;
*
* @param the supported storage type.
*/
-public abstract class MapOperation {
+public abstract class MapOperation> {
private final String name;
/**
@@ -26,7 +26,7 @@ public abstract class MapOperation {
* @param arg the argument passed to the operation
* @return the result of running the operation
*/
- public abstract Storage runMap(I storage, Object arg);
+ public abstract Storage> runMap(I storage, Object arg);
/**
* Run the operation in zip mode
@@ -35,7 +35,7 @@ public abstract class MapOperation {
* @param arg the storage providing second arguments to the operation
* @return the result of running the operation
*/
- public abstract Storage runZip(I storage, Storage arg);
+ public abstract Storage> runZip(I storage, Storage> arg);
/** @return the name of this operation */
public String getName() {
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/SpecializedIsInOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/SpecializedIsInOp.java
new file mode 100644
index 0000000000..21b6b7494a
--- /dev/null
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/SpecializedIsInOp.java
@@ -0,0 +1,99 @@
+package org.enso.table.data.column.operation.map;
+
+import java.util.BitSet;
+import java.util.HashSet;
+import java.util.List;
+import java.util.function.Function;
+import org.enso.base.polyglot.Polyglot_Utils;
+import org.enso.table.data.column.storage.BoolStorage;
+import org.enso.table.data.column.storage.Storage;
+
+/**
+ * A specialized implementation for the IS_IN operation for builtin types, relying on hashing. Since
+ * for some columns we know what types of objects can be stored, we can filter out any objects that
+ * do not match that type and then rely on a consistent definition of hashcode for these builtin
+ * types (which is not available in general for custom objects).
+ */
+public class SpecializedIsInOp> extends MapOperation {
+ /**
+ * An optimized representation of the vector of values to match.
+ *
+ * It indicates whether the vector contained a null value and contains a hashmap of the vector
+ * elements for faster contains checks.
+ */
+ public record CompactRepresentation(HashSet coercedValues, boolean hasNulls) {}
+
+ private final Function, CompactRepresentation> prepareList;
+
+ /**
+ * Creates a new operation with a given preprocessing function.
+ *
+ * The responsibility of the function is to analyse the list and create a hashmap of relevant
+ * elements, coerced to a type that is consistent with the storage type of the given column. Any
+ * elements not fitting the expected type can (and should) be discarded.
+ *
+ *
It is important to correctly coerce the types, for example in Enso 2 == 2.0, so if we are
+ * getting a Long for a DoubleColumn, it should be converted to a Double before adding it to the
+ * hashmap. Similarly, for LongStorage, non-integer Doubles can be ignored, but Doubles with 0
+ * fractional part need to be converted into a Long. These conversions can be achieved with the
+ * {@code NumericConverter} class.
+ */
+ public static > SpecializedIsInOp make(
+ Function, CompactRepresentation> prepareList) {
+ return new SpecializedIsInOp<>(prepareList);
+ }
+
+ /**
+ * Creates a new operation which ensures the Enso Date/Time types are correctly coerced.
+ *
+ * It uses the provided {@code storageClass} to only keep the elements that are of the same
+ * type as expected in the storage.
+ */
+ public static > SpecializedIsInOp makeForTimeColumns(Class storageClass) {
+ return SpecializedIsInOp.make(
+ list -> {
+ HashSet set = new HashSet<>();
+ boolean hasNulls = false;
+ for (Object o : list) {
+ hasNulls |= o == null;
+ Object coerced = Polyglot_Utils.convertPolyglotValue(o);
+ if (storageClass.isInstance(coerced)) {
+ set.add(storageClass.cast(coerced));
+ }
+ }
+ return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls);
+ });
+ }
+
+ SpecializedIsInOp(Function, CompactRepresentation> prepareList) {
+ super(Storage.Maps.IS_IN);
+ this.prepareList = prepareList;
+ }
+
+ @Override
+ public Storage> runMap(S storage, Object arg) {
+ if (arg instanceof List) {
+ return runMap(storage, (List>) arg);
+ } else {
+ throw new IllegalArgumentException("Argument to `is_in` must be a vector.");
+ }
+ }
+
+ public Storage> runMap(S storage, List> arg) {
+ CompactRepresentation compactRepresentation = prepareList.apply(arg);
+ BitSet newVals = new BitSet();
+ for (int i = 0; i < storage.size(); i++) {
+ if (storage.isNa(i) && compactRepresentation.hasNulls) {
+ newVals.set(i);
+ } else if (compactRepresentation.coercedValues.contains(storage.getItemBoxed(i))) {
+ newVals.set(i);
+ }
+ }
+ return new BoolStorage(newVals, new BitSet(), storage.size(), false);
+ }
+
+ @Override
+ public Storage> runZip(S storage, Storage> arg) {
+ throw new IllegalStateException("Zip mode is not supported for this operation.");
+ }
+}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/UnaryMapOperation.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/UnaryMapOperation.java
index 38e00c2e50..38ac2cbed2 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/UnaryMapOperation.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/UnaryMapOperation.java
@@ -7,20 +7,20 @@ import org.enso.table.data.column.storage.Storage;
*
* @param the supported storage type
*/
-public abstract class UnaryMapOperation extends MapOperation {
+public abstract class UnaryMapOperation> extends MapOperation {
public UnaryMapOperation(String name) {
super(name);
}
- protected abstract Storage run(I storage);
+ protected abstract Storage> run(I storage);
@Override
- public Storage runMap(I storage, Object arg) {
+ public Storage> runMap(I storage, Object arg) {
return run(storage);
}
@Override
- public Storage runZip(I storage, Storage arg) {
+ public Storage> runZip(I storage, Storage> arg) {
return run(storage);
}
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/bool/BooleanIsInOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/bool/BooleanIsInOp.java
new file mode 100644
index 0000000000..016cf9ec30
--- /dev/null
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/bool/BooleanIsInOp.java
@@ -0,0 +1,89 @@
+package org.enso.table.data.column.operation.map.bool;
+
+import java.util.BitSet;
+import java.util.List;
+
+import org.enso.table.data.column.operation.map.MapOperation;
+import org.enso.table.data.column.storage.BoolStorage;
+import org.enso.table.data.column.storage.Storage;
+
+/**
+ * A specialized implementation for the IS_IN operation on booleans - since booleans have just three
+ * possible values we can have a highly efficient implementation that does not even rely on hashmap
+ * and after processing the input vector, performs the checks in constant time.
+ */
+public class BooleanIsInOp extends MapOperation {
+ public BooleanIsInOp() {
+ super(Storage.Maps.IS_IN);
+ }
+
+ @Override
+ public BoolStorage runMap(BoolStorage storage, Object arg) {
+ if (arg instanceof List) {
+ return runMap(storage, (List>) arg);
+ } else {
+ throw new IllegalArgumentException("Argument to `is_in` must be a vector.");
+ }
+ }
+
+ public BoolStorage runMap(BoolStorage storage, List> arg) {
+ boolean hadTrue = false;
+ boolean hadFalse = false;
+ boolean hadNull = false;
+
+ for (Object o : arg) {
+ switch (o) {
+ case Boolean b -> {
+ hadTrue |= b;
+ hadFalse |= !b;
+ }
+ case null -> hadNull = true;
+ default -> {}
+ }
+ }
+
+ BitSet newVals;
+ boolean negated = false;
+
+ if (hadNull && hadTrue && hadFalse) {
+ // We use empty newVals which has everything set to false and negate it to make all of that set to true with zero cost.
+ newVals = new BitSet();
+ negated = true;
+ } else if (!hadNull && !hadTrue && !hadFalse) {
+ // No values are present, so the result is to be false everywhere.
+ newVals = new BitSet();
+ }
+ else if (hadNull && !hadTrue && !hadFalse) {
+ // Only missing values are in the set, so we just return the missing indicator.
+ newVals = storage.getIsMissing();
+ } else if (hadTrue && hadFalse) { // && !hadNull
+ // All non-missing values are in the set - so we just return the negated missing indicator.
+ newVals = storage.getIsMissing();
+ negated = true;
+ } else {
+ // hadTrue != hadFalse
+ newVals = storage.getValues().get(0, storage.size());
+ if (hadTrue) {
+ if (storage.isNegated()) {
+ newVals.flip(0, storage.size());
+ }
+ } else { // hadFalse
+ if (!storage.isNegated()) {
+ newVals.flip(0, storage.size());
+ }
+ }
+ newVals.andNot(storage.getIsMissing());
+
+ if (hadNull) {
+ newVals.or(storage.getIsMissing());
+ }
+ }
+
+ return new BoolStorage(newVals, new BitSet(), storage.size(), negated);
+ }
+
+ @Override
+ public Storage> runZip(BoolStorage storage, Storage> arg) {
+ throw new IllegalStateException("Zip mode is not supported for this operation.");
+ }
+}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleBooleanOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleBooleanOp.java
index f1b370a10d..64d5b95f0b 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleBooleanOp.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleBooleanOp.java
@@ -10,7 +10,7 @@ import org.enso.table.error.UnexpectedTypeException;
import java.util.BitSet;
/** An operation expecting a numeric argument and returning a boolean. */
-public abstract class DoubleBooleanOp extends MapOperation {
+public abstract class DoubleBooleanOp extends MapOperation {
public DoubleBooleanOp(String name) {
super(name);
}
@@ -59,7 +59,7 @@ public abstract class DoubleBooleanOp extends MapOperation {
}
@Override
- public Storage runZip(DoubleStorage storage, Storage arg) {
+ public BoolStorage runZip(DoubleStorage storage, Storage> arg) {
if (arg instanceof DoubleStorage v) {
BitSet newVals = new BitSet();
BitSet newMissing = new BitSet();
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleNumericOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleNumericOp.java
index b35f959bb7..0968ea143a 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleNumericOp.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleNumericOp.java
@@ -9,7 +9,7 @@ import org.enso.table.error.UnexpectedTypeException;
import java.util.BitSet;
/** An operation expecting a numeric argument and returning a number. */
-public abstract class DoubleNumericOp extends MapOperation {
+public abstract class DoubleNumericOp extends MapOperation {
public DoubleNumericOp(String name) {
super(name);
@@ -18,7 +18,7 @@ public abstract class DoubleNumericOp extends MapOperation {
protected abstract double doDouble(double a, double b);
@Override
- public Storage runMap(DoubleStorage storage, Object arg) {
+ public Storage runMap(DoubleStorage storage, Object arg) {
double x;
if (arg instanceof Double) {
x = (Double) arg;
@@ -37,7 +37,7 @@ public abstract class DoubleNumericOp extends MapOperation {
}
@Override
- public Storage runZip(DoubleStorage storage, Storage arg) {
+ public Storage runZip(DoubleStorage storage, Storage> arg) {
if (arg instanceof LongStorage v) {
long[] out = new long[storage.size()];
BitSet newMissing = new BitSet();
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongBooleanOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongBooleanOp.java
index aa5aca0abd..d96a70ae1a 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongBooleanOp.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongBooleanOp.java
@@ -10,7 +10,7 @@ import org.enso.table.error.UnexpectedTypeException;
import java.util.BitSet;
/** An operation expecting a numeric argument and returning a boolean. */
-public abstract class LongBooleanOp extends MapOperation {
+public abstract class LongBooleanOp extends MapOperation {
public LongBooleanOp(String name) {
super(name);
}
@@ -61,7 +61,7 @@ public abstract class LongBooleanOp extends MapOperation {
}
@Override
- public Storage runZip(LongStorage storage, Storage arg) {
+ public BoolStorage runZip(LongStorage storage, Storage> arg) {
if (arg instanceof DoubleStorage v) {
BitSet newVals = new BitSet();
BitSet newMissing = new BitSet();
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongNumericOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongNumericOp.java
index 9c45ed6a3a..b0657f4d03 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongNumericOp.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongNumericOp.java
@@ -3,13 +3,14 @@ package org.enso.table.data.column.operation.map.numeric;
import org.enso.table.data.column.operation.map.MapOperation;
import org.enso.table.data.column.storage.DoubleStorage;
import org.enso.table.data.column.storage.LongStorage;
+import org.enso.table.data.column.storage.NumericStorage;
import org.enso.table.data.column.storage.Storage;
import org.enso.table.error.UnexpectedTypeException;
import java.util.BitSet;
/** An operation expecting a numeric argument and returning a boolean. */
-public abstract class LongNumericOp extends MapOperation {
+public abstract class LongNumericOp extends MapOperation {
private final boolean alwaysCast;
public LongNumericOp(String name, boolean alwaysCast) {
@@ -26,7 +27,7 @@ public abstract class LongNumericOp extends MapOperation {
public abstract long doLong(long in, long arg);
@Override
- public Storage runMap(LongStorage storage, Object arg) {
+ public NumericStorage> runMap(LongStorage storage, Object arg) {
if (arg instanceof Long && !alwaysCast) {
long x = (Long) arg;
long[] newVals = new long[storage.size()];
@@ -50,7 +51,7 @@ public abstract class LongNumericOp extends MapOperation {
}
@Override
- public Storage runZip(LongStorage storage, Storage arg) {
+ public NumericStorage> runZip(LongStorage storage, Storage> arg) {
if (arg instanceof LongStorage v) {
long[] out = new long[storage.size()];
BitSet newMissing = new BitSet();
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/LikeOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/LikeOp.java
index 0963d6ab99..3bc0ce1841 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/LikeOp.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/LikeOp.java
@@ -35,7 +35,7 @@ public class LikeOp extends StringBooleanOp {
}
@Override
- public Storage runMap(SpecializedStorage storage, Object arg) {
+ public BoolStorage runMap(SpecializedStorage storage, Object arg) {
if (arg == null) {
BitSet newVals = new BitSet();
BitSet newMissing = new BitSet();
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/StringBooleanOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/StringBooleanOp.java
index 156f2dec73..d2274bf277 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/StringBooleanOp.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/StringBooleanOp.java
@@ -9,7 +9,7 @@ import org.enso.table.error.UnexpectedTypeException;
import java.util.BitSet;
-public abstract class StringBooleanOp extends MapOperation> {
+public abstract class StringBooleanOp extends MapOperation> {
public StringBooleanOp(String name) {
super(name);
}
@@ -21,7 +21,7 @@ public abstract class StringBooleanOp extends MapOperation storage, Object arg) {
+ public BoolStorage runMap(SpecializedStorage storage, Object arg) {
if (arg == null) {
BitSet newVals = new BitSet();
BitSet newMissing = new BitSet();
@@ -53,7 +53,7 @@ public abstract class StringBooleanOp extends MapOperation storage, Storage arg) {
+ public BoolStorage runZip(SpecializedStorage storage, Storage> arg) {
if (arg instanceof StringStorage v) {
BitSet newVals = new BitSet();
BitSet newMissing = new BitSet();
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java
index 5ebcac2ce3..9fd5807b02 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java
@@ -1,8 +1,11 @@
package org.enso.table.data.column.storage;
+import java.util.BitSet;
+import java.util.List;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.MapOperation;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
+import org.enso.table.data.column.operation.map.bool.BooleanIsInOp;
import org.enso.table.data.index.Index;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.mask.SliceRange;
@@ -10,12 +13,9 @@ import org.enso.table.error.UnexpectedColumnTypeException;
import org.enso.table.error.UnexpectedTypeException;
import org.graalvm.polyglot.Value;
-import java.util.BitSet;
-import java.util.List;
-
/** A boolean column storage. */
-public class BoolStorage extends Storage {
- private static final MapOpStorage ops = buildOps();
+public final class BoolStorage extends Storage {
+ private static final MapOpStorage ops = buildOps();
private final BitSet values;
private final BitSet isMissing;
private final int size;
@@ -33,7 +33,9 @@ public class BoolStorage extends Storage {
return size;
}
- /** @inheritDoc */
+ /**
+ * @inheritDoc
+ */
@Override
public int countMissing() {
return isMissing.cardinality();
@@ -45,10 +47,9 @@ public class BoolStorage extends Storage {
}
@Override
- public Object getItemBoxed(int idx) {
+ public Boolean getItemBoxed(int idx) {
return isMissing.get(idx) ? null : getItem(idx);
}
-
public boolean getItem(long idx) {
return negated != values.get((int) idx);
}
@@ -64,12 +65,12 @@ public class BoolStorage extends Storage {
}
@Override
- protected Storage runVectorizedMap(String name, Object argument) {
+ protected Storage> runVectorizedMap(String name, Object argument) {
return ops.runMap(name, this, argument);
}
@Override
- protected Storage runVectorizedZip(String name, Storage argument) {
+ protected Storage> runVectorizedZip(String name, Storage> argument) {
return ops.runZip(name, this, argument);
}
@@ -99,7 +100,7 @@ public class BoolStorage extends Storage {
}
@Override
- public Storage fillMissing(Value arg) {
+ public Storage> fillMissing(Value arg) {
if (arg.isBoolean()) {
return fillMissingBoolean(arg.asBoolean());
} else {
@@ -108,7 +109,7 @@ public class BoolStorage extends Storage {
}
@Override
- public Storage mask(BitSet mask, int cardinality) {
+ public BoolStorage mask(BitSet mask, int cardinality) {
BitSet newMissing = new BitSet();
BitSet newValues = new BitSet();
int resultIx = 0;
@@ -118,6 +119,10 @@ public class BoolStorage extends Storage {
newMissing.set(resultIx++);
} else if (values.get(i)) {
newValues.set(resultIx++);
+ } else {
+ // We don't set any bits, but still increment the counter to indicate that we have just
+ // 'inserted' a false value.
+ resultIx++;
}
}
}
@@ -125,7 +130,7 @@ public class BoolStorage extends Storage {
}
@Override
- public Storage applyMask(OrderMask mask) {
+ public BoolStorage applyMask(OrderMask mask) {
int[] positions = mask.getPositions();
BitSet newNa = new BitSet();
BitSet newVals = new BitSet();
@@ -140,7 +145,7 @@ public class BoolStorage extends Storage {
}
@Override
- public Storage countMask(int[] counts, int total) {
+ public BoolStorage countMask(int[] counts, int total) {
BitSet newNa = new BitSet();
BitSet newVals = new BitSet();
int pos = 0;
@@ -159,12 +164,12 @@ public class BoolStorage extends Storage {
return negated;
}
- private static MapOpStorage buildOps() {
- MapOpStorage ops = new MapOpStorage<>();
+ private static MapOpStorage buildOps() {
+ MapOpStorage ops = new MapOpStorage<>();
ops.add(
new UnaryMapOperation<>(Maps.NOT) {
@Override
- protected Storage run(BoolStorage storage) {
+ protected BoolStorage run(BoolStorage storage) {
return new BoolStorage(
storage.values, storage.isMissing, storage.size, !storage.negated);
}
@@ -172,9 +177,9 @@ public class BoolStorage extends Storage {
.add(
new MapOperation<>(Maps.EQ) {
@Override
- public Storage runMap(BoolStorage storage, Object arg) {
- if (arg instanceof Boolean) {
- if ((Boolean) arg) {
+ public BoolStorage runMap(BoolStorage storage, Object arg) {
+ if (arg instanceof Boolean v) {
+ if (v) {
return storage;
} else {
return new BoolStorage(
@@ -186,7 +191,7 @@ public class BoolStorage extends Storage {
}
@Override
- public Storage runZip(BoolStorage storage, Storage arg) {
+ public BoolStorage runZip(BoolStorage storage, Storage> arg) {
BitSet out = new BitSet();
BitSet missing = new BitSet();
for (int i = 0; i < storage.size; i++) {
@@ -204,9 +209,8 @@ public class BoolStorage extends Storage {
.add(
new MapOperation<>(Maps.AND) {
@Override
- public Storage runMap(BoolStorage storage, Object arg) {
- if (arg instanceof Boolean) {
- boolean v = (Boolean) arg;
+ public BoolStorage runMap(BoolStorage storage, Object arg) {
+ if (arg instanceof Boolean v) {
if (v) {
return storage;
} else {
@@ -218,7 +222,7 @@ public class BoolStorage extends Storage {
}
@Override
- public Storage runZip(BoolStorage storage, Storage arg) {
+ public BoolStorage runZip(BoolStorage storage, Storage> arg) {
if (arg instanceof BoolStorage v) {
BitSet missing = v.isMissing.get(0, storage.size);
missing.or(storage.isMissing);
@@ -247,9 +251,8 @@ public class BoolStorage extends Storage {
.add(
new MapOperation<>(Maps.OR) {
@Override
- public Storage runMap(BoolStorage storage, Object arg) {
- if (arg instanceof Boolean) {
- boolean v = (Boolean) arg;
+ public BoolStorage runMap(BoolStorage storage, Object arg) {
+ if (arg instanceof Boolean v) {
if (v) {
return new BoolStorage(new BitSet(), storage.isMissing, storage.size, true);
} else {
@@ -261,7 +264,7 @@ public class BoolStorage extends Storage {
}
@Override
- public Storage runZip(BoolStorage storage, Storage arg) {
+ public BoolStorage runZip(BoolStorage storage, Storage> arg) {
if (arg instanceof BoolStorage v) {
BitSet missing = v.isMissing.get(0, storage.size);
missing.or(storage.isMissing);
@@ -287,7 +290,8 @@ public class BoolStorage extends Storage {
throw new UnexpectedColumnTypeException("Boolean");
}
}
- });
+ })
+ .add(new BooleanIsInOp());
return ops;
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateStorage.java
index 2cd593be8a..b41854f958 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateStorage.java
@@ -1,10 +1,10 @@
package org.enso.table.data.column.storage;
-import org.enso.table.data.column.operation.map.MapOpStorage;
-
import java.time.LocalDate;
+import org.enso.table.data.column.operation.map.MapOpStorage;
+import org.enso.table.data.column.operation.map.SpecializedIsInOp;
-public class DateStorage extends SpecializedStorage {
+public final class DateStorage extends SpecializedStorage {
/**
* @param data the underlying data
* @param size the number of items stored
@@ -13,10 +13,12 @@ public class DateStorage extends SpecializedStorage {
super(data, size, ops);
}
- private static final MapOpStorage> ops = buildOps();
+ private static final MapOpStorage> ops = buildOps();
- private static MapOpStorage> buildOps() {
- return ObjectStorage.buildObjectOps();
+ private static MapOpStorage> buildOps() {
+ MapOpStorage> t = ObjectStorage.buildObjectOps();
+ t.add(SpecializedIsInOp.makeForTimeColumns(LocalDate.class));
+ return t;
}
@Override
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateTimeStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateTimeStorage.java
index 4dbd5e922a..620e66b20b 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateTimeStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateTimeStorage.java
@@ -1,10 +1,11 @@
package org.enso.table.data.column.storage;
import org.enso.table.data.column.operation.map.MapOpStorage;
+import org.enso.table.data.column.operation.map.SpecializedIsInOp;
import java.time.ZonedDateTime;
-public class DateTimeStorage extends SpecializedStorage {
+public final class DateTimeStorage extends SpecializedStorage {
/**
* @param data the underlying data
* @param size the number of items stored
@@ -13,10 +14,14 @@ public class DateTimeStorage extends SpecializedStorage {
super(data, size, ops);
}
- private static final MapOpStorage> ops = buildOps();
+ private static final MapOpStorage> ops =
+ buildOps();
- private static MapOpStorage> buildOps() {
- return ObjectStorage.buildObjectOps();
+ private static MapOpStorage> buildOps() {
+ MapOpStorage> t =
+ ObjectStorage.buildObjectOps();
+ t.add(SpecializedIsInOp.makeForTimeColumns(ZonedDateTime.class));
+ return t;
}
@Override
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java
index c6ee450328..9b5ace27b1 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java
@@ -1,7 +1,12 @@
package org.enso.table.data.column.storage;
+import java.util.BitSet;
+import java.util.HashSet;
+import java.util.List;
+import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.builder.object.NumericBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
+import org.enso.table.data.column.operation.map.SpecializedIsInOp;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.operation.map.numeric.DoubleBooleanOp;
import org.enso.table.data.column.operation.map.numeric.DoubleNumericOp;
@@ -10,15 +15,12 @@ import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.mask.SliceRange;
import org.graalvm.polyglot.Value;
-import java.util.BitSet;
-import java.util.List;
-
/** A column containing floating point numbers. */
-public class DoubleStorage extends NumericStorage {
+public final class DoubleStorage extends NumericStorage {
private final long[] data;
private final BitSet isMissing;
private final int size;
- private static final MapOpStorage ops = buildOps();
+ private static final MapOpStorage ops = buildOps();
/**
* @param data the underlying data
@@ -58,7 +60,7 @@ public class DoubleStorage extends NumericStorage {
}
@Override
- public Object getItemBoxed(int idx) {
+ public Double getItemBoxed(int idx) {
return isMissing.get(idx) ? null : Double.longBitsToDouble(data[idx]);
}
@@ -80,16 +82,16 @@ public class DoubleStorage extends NumericStorage {
}
@Override
- protected Storage runVectorizedMap(String name, Object argument) {
+ protected Storage> runVectorizedMap(String name, Object argument) {
return ops.runMap(name, this, argument);
}
@Override
- protected Storage runVectorizedZip(String name, Storage argument) {
+ protected Storage> runVectorizedZip(String name, Storage> argument) {
return ops.runZip(name, this, argument);
}
- private Storage fillMissingDouble(double arg) {
+ private Storage> fillMissingDouble(double arg) {
final var builder = NumericBuilder.createDoubleBuilder(size());
long rawArg = Double.doubleToRawLongBits(arg);
for (int i = 0; i < size(); i++) {
@@ -103,7 +105,7 @@ public class DoubleStorage extends NumericStorage {
}
@Override
- public Storage fillMissing(Value arg) {
+ public Storage> fillMissing(Value arg) {
if (arg.isNumber()) {
if (arg.fitsInLong()) {
return fillMissingDouble(arg.asLong());
@@ -116,7 +118,7 @@ public class DoubleStorage extends NumericStorage {
}
@Override
- public DoubleStorage mask(BitSet mask, int cardinality) {
+ public Storage mask(BitSet mask, int cardinality) {
BitSet newMissing = new BitSet();
long[] newData = new long[cardinality];
int resIx = 0;
@@ -133,7 +135,7 @@ public class DoubleStorage extends NumericStorage {
}
@Override
- public Storage applyMask(OrderMask mask) {
+ public Storage applyMask(OrderMask mask) {
int[] positions = mask.getPositions();
long[] newData = new long[positions.length];
BitSet newMissing = new BitSet();
@@ -148,7 +150,7 @@ public class DoubleStorage extends NumericStorage {
}
@Override
- public Storage countMask(int[] counts, int total) {
+ public Storage countMask(int[] counts, int total) {
long[] newData = new long[total];
BitSet newMissing = new BitSet();
int pos = 0;
@@ -169,8 +171,8 @@ public class DoubleStorage extends NumericStorage {
return isMissing;
}
- private static MapOpStorage buildOps() {
- MapOpStorage ops = new MapOpStorage<>();
+ private static MapOpStorage buildOps() {
+ MapOpStorage ops = new MapOpStorage<>();
ops.add(
new DoubleNumericOp(Maps.ADD) {
@Override
@@ -249,15 +251,29 @@ public class DoubleStorage extends NumericStorage {
.add(
new UnaryMapOperation<>(Maps.IS_MISSING) {
@Override
- public Storage run(DoubleStorage storage) {
+ public BoolStorage run(DoubleStorage storage) {
return new BoolStorage(storage.isMissing, new BitSet(), storage.size, false);
}
- });
+ })
+ .add(
+ SpecializedIsInOp.make(
+ list -> {
+ HashSet set = new HashSet<>();
+ boolean hasNulls = false;
+ for (Object o : list) {
+ hasNulls |= o == null;
+ Double x = NumericConverter.tryConvertingToDouble(o);
+ if (x != null) {
+ set.add(x);
+ }
+ }
+ return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls);
+ }));
return ops;
}
@Override
- public DoubleStorage slice(int offset, int limit) {
+ public Storage slice(int offset, int limit) {
int newSize = Math.min(size - offset, limit);
long[] newData = new long[newSize];
System.arraycopy(data, offset, newData, 0, newSize);
@@ -266,7 +282,7 @@ public class DoubleStorage extends NumericStorage {
}
@Override
- public DoubleStorage slice(List ranges) {
+ public Storage slice(List ranges) {
int newSize = SliceRange.totalLength(ranges);
long[] newData = new long[newSize];
BitSet newMissing = new BitSet(newSize);
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java
index 802341342c..7bd9b22c7d 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java
@@ -1,9 +1,16 @@
package org.enso.table.data.column.storage;
+import java.util.BitSet;
+import java.util.HashSet;
+import java.util.List;
+import java.util.OptionalLong;
+import java.util.stream.LongStream;
+import org.enso.base.polyglot.NumericConverter;
import org.enso.table.data.column.builder.object.NumericBuilder;
import org.enso.table.data.column.operation.aggregate.Aggregator;
import org.enso.table.data.column.operation.aggregate.numeric.LongToLongAggregator;
import org.enso.table.data.column.operation.map.MapOpStorage;
+import org.enso.table.data.column.operation.map.SpecializedIsInOp;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.operation.map.numeric.LongBooleanOp;
import org.enso.table.data.column.operation.map.numeric.LongNumericOp;
@@ -12,17 +19,12 @@ import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.mask.SliceRange;
import org.graalvm.polyglot.Value;
-import java.util.BitSet;
-import java.util.List;
-import java.util.OptionalLong;
-import java.util.stream.LongStream;
-
/** A column storing 64-bit integers. */
-public class LongStorage extends NumericStorage {
+public final class LongStorage extends NumericStorage {
private final long[] data;
private final BitSet isMissing;
private final int size;
- private static final MapOpStorage ops = buildOps();
+ private static final MapOpStorage ops = buildOps();
/**
* @param data the underlying data
@@ -40,13 +42,17 @@ public class LongStorage extends NumericStorage {
this(data, data.length, new BitSet());
}
- /** @inheritDoc */
+ /**
+ * @inheritDoc
+ */
@Override
public int size() {
return size;
}
- /** @inheritDoc */
+ /**
+ * @inheritDoc
+ */
@Override
public int countMissing() {
return isMissing.cardinality();
@@ -66,17 +72,21 @@ public class LongStorage extends NumericStorage {
}
@Override
- public Object getItemBoxed(int idx) {
+ public Long getItemBoxed(int idx) {
return isMissing.get(idx) ? null : data[idx];
}
- /** @inheritDoc */
+ /**
+ * @inheritDoc
+ */
@Override
public int getType() {
return Type.LONG;
}
- /** @inheritDoc */
+ /**
+ * @inheritDoc
+ */
@Override
public boolean isNa(long idx) {
return isMissing.get((int) idx);
@@ -88,12 +98,12 @@ public class LongStorage extends NumericStorage {
}
@Override
- protected Storage runVectorizedMap(String name, Object argument) {
+ protected Storage> runVectorizedMap(String name, Object argument) {
return ops.runMap(name, this, argument);
}
@Override
- protected Storage runVectorizedZip(String name, Storage argument) {
+ protected Storage> runVectorizedZip(String name, Storage> argument) {
return ops.runZip(name, this, argument);
}
@@ -137,7 +147,7 @@ public class LongStorage extends NumericStorage {
};
}
- private Storage fillMissingDouble(double arg) {
+ private Storage> fillMissingDouble(double arg) {
final var builder = NumericBuilder.createDoubleBuilder(size());
long rawArg = Double.doubleToRawLongBits(arg);
for (int i = 0; i < size(); i++) {
@@ -151,7 +161,7 @@ public class LongStorage extends NumericStorage {
return builder.seal();
}
- private Storage fillMissingLong(long arg) {
+ private Storage> fillMissingLong(long arg) {
final var builder = NumericBuilder.createLongBuilder(size());
for (int i = 0; i < size(); i++) {
if (isMissing.get(i)) {
@@ -164,7 +174,7 @@ public class LongStorage extends NumericStorage {
}
@Override
- public Storage fillMissing(Value arg) {
+ public Storage> fillMissing(Value arg) {
if (arg.isNumber()) {
if (arg.fitsInLong()) {
return fillMissingLong(arg.asLong());
@@ -177,7 +187,7 @@ public class LongStorage extends NumericStorage {
}
@Override
- public LongStorage mask(BitSet mask, int cardinality) {
+ public Storage mask(BitSet mask, int cardinality) {
BitSet newMissing = new BitSet();
long[] newData = new long[cardinality];
int resIx = 0;
@@ -194,7 +204,7 @@ public class LongStorage extends NumericStorage {
}
@Override
- public Storage applyMask(OrderMask mask) {
+ public Storage applyMask(OrderMask mask) {
int[] positions = mask.getPositions();
long[] newData = new long[positions.length];
BitSet newMissing = new BitSet();
@@ -209,7 +219,7 @@ public class LongStorage extends NumericStorage {
}
@Override
- public Storage countMask(int[] counts, int total) {
+ public Storage countMask(int[] counts, int total) {
long[] newData = new long[total];
BitSet newMissing = new BitSet();
int pos = 0;
@@ -230,8 +240,8 @@ public class LongStorage extends NumericStorage {
return isMissing;
}
- private static MapOpStorage buildOps() {
- MapOpStorage ops = new MapOpStorage<>();
+ private static MapOpStorage buildOps() {
+ MapOpStorage ops = new MapOpStorage<>();
ops.add(
new LongNumericOp(Maps.ADD) {
@Override
@@ -360,10 +370,24 @@ public class LongStorage extends NumericStorage {
.add(
new UnaryMapOperation<>(Maps.IS_MISSING) {
@Override
- public Storage run(LongStorage storage) {
+ public BoolStorage run(LongStorage storage) {
return new BoolStorage(storage.isMissing, new BitSet(), storage.size, false);
}
- });
+ })
+ .add(
+ SpecializedIsInOp.make(
+ list -> {
+ HashSet set = new HashSet<>();
+ boolean hasNulls = false;
+ for (Object o : list) {
+ hasNulls |= o == null;
+ Long x = NumericConverter.tryConvertingToLong(o);
+ if (x != null) {
+ set.add(x);
+ }
+ }
+ return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls);
+ }));
return ops;
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/NumericStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/NumericStorage.java
index 57bef25a4e..d577fc41f0 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/NumericStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/NumericStorage.java
@@ -1,12 +1,11 @@
package org.enso.table.data.column.storage;
+import java.util.stream.DoubleStream;
import org.enso.table.data.column.operation.aggregate.Aggregator;
import org.enso.table.data.column.operation.aggregate.numeric.NumericAggregator;
-import java.util.stream.DoubleStream;
-
/** A storage containing items representable as a {@code double}. */
-public abstract class NumericStorage extends Storage {
+public abstract class NumericStorage extends Storage {
/**
* Returns the value stored at the given index. The return value if the given index is missing
* ({@link #isNa(long)}) is undefined.
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java
index e2102c7985..72549b00df 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java
@@ -1,12 +1,11 @@
package org.enso.table.data.column.storage;
+import java.util.BitSet;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
-import java.util.BitSet;
-
/** A column storing arbitrary objects. */
-public class ObjectStorage extends SpecializedStorage {
+public final class ObjectStorage extends SpecializedStorage {
/**
* @param data the underlying data
* @param size the number of items stored
@@ -30,14 +29,14 @@ public class ObjectStorage extends SpecializedStorage {
return Type.OBJECT;
}
- private static final MapOpStorage> ops = buildObjectOps();
+ private static final MapOpStorage> ops = buildObjectOps();
- static > MapOpStorage buildObjectOps() {
- MapOpStorage ops = new MapOpStorage<>();
+ static > MapOpStorage buildObjectOps() {
+ MapOpStorage ops = new MapOpStorage<>();
ops.add(
new UnaryMapOperation<>(Maps.IS_MISSING) {
@Override
- protected Storage run(S storage) {
+ protected BoolStorage run(S storage) {
BitSet r = new BitSet();
for (int i = 0; i < storage.size; i++) {
if (storage.data[i] == null) {
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java
index b2a5f78c0d..96acb58c4b 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java
@@ -1,14 +1,13 @@
package org.enso.table.data.column.storage;
+import java.util.BitSet;
+import java.util.List;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.index.Index;
import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.mask.SliceRange;
-import java.util.BitSet;
-import java.util.List;
-
-public abstract class SpecializedStorage extends Storage {
+public abstract class SpecializedStorage extends Storage {
protected abstract SpecializedStorage newInstance(T[] data, int size);
@@ -21,7 +20,7 @@ public abstract class SpecializedStorage extends Storage {
* @param data the underlying data
* @param size the number of items stored
*/
- protected SpecializedStorage(T[] data, int size, MapOpStorage> ops) {
+ protected SpecializedStorage(T[] data, int size, MapOpStorage> ops) {
this.data = data;
this.size = size;
this.ops = ops;
@@ -29,7 +28,7 @@ public abstract class SpecializedStorage extends Storage {
protected final T[] data;
protected final int size;
- private final MapOpStorage> ops;
+ private final MapOpStorage> ops;
/** @inheritDoc */
@Override
@@ -74,12 +73,12 @@ public abstract class SpecializedStorage extends Storage {
}
@Override
- protected Storage runVectorizedMap(String name, Object argument) {
+ protected Storage> runVectorizedMap(String name, Object argument) {
return ops.runMap(name, this, argument);
}
@Override
- protected Storage runVectorizedZip(String name, Storage argument) {
+ protected Storage> runVectorizedZip(String name, Storage> argument) {
return ops.runZip(name, this, argument);
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java
index b1560d3d74..82e1976e32 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java
@@ -1,6 +1,11 @@
package org.enso.table.data.column.storage;
-import org.enso.base.Polyglot_Utils;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.List;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.object.Builder;
import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.builder.object.ObjectBuilder;
@@ -11,14 +16,8 @@ import org.enso.table.data.mask.OrderMask;
import org.enso.table.data.mask.SliceRange;
import org.graalvm.polyglot.Value;
-import java.util.BitSet;
-import java.util.HashMap;
-import java.util.List;
-import java.util.function.BiFunction;
-import java.util.function.Function;
-
/** An abstract representation of a data column. */
-public abstract class Storage {
+public abstract class Storage {
/** @return the number of elements in this column (including NAs) */
public abstract int size();
@@ -42,7 +41,7 @@ public abstract class Storage {
* @param idx the index to look up
* @return the item at position {@code idx}
*/
- public abstract Object getItemBoxed(int idx);
+ public abstract T getItemBoxed(int idx);
/**
* Enumerating possible storage types.
@@ -83,6 +82,7 @@ public abstract class Storage {
public static final String ENDS_WITH = "ends_with";
public static final String CONTAINS = "contains";
public static final String LIKE = "like";
+ public static final String IS_IN = "is_in";
}
public static final class Aggregators {
@@ -95,9 +95,9 @@ public abstract class Storage {
protected abstract boolean isOpVectorized(String name);
- protected abstract Storage runVectorizedMap(String name, Object argument);
+ protected abstract Storage> runVectorizedMap(String name, Object argument);
- protected abstract Storage runVectorizedZip(String name, Storage argument);
+ protected abstract Storage> runVectorizedZip(String name, Storage> argument);
/**
* Runs a function on each non-missing element in this storage and gathers the results.
@@ -106,17 +106,23 @@ public abstract class Storage {
* supported. If this argument is null, the vectorized operation will never be used.
* @param function the function to run.
* @param argument the argument to pass to each run of the function
+ * @param skipNulls specifies whether null values on the input should result in a null result
+ * without passing them through the function, this is useful if the function does not support
+ * the null-values, but it needs to be set to false if the function should handle them.
* @return the result of running the function on all non-missing elements.
*/
- public final Storage bimap(
- String name, BiFunction function, Object argument) {
+ public final Storage> bimap(
+ String name,
+ BiFunction function,
+ Object argument,
+ boolean skipNulls) {
if (name != null && isOpVectorized(name)) {
return runVectorizedMap(name, argument);
}
Builder builder = new InferredBuilder(size());
for (int i = 0; i < size(); i++) {
Object it = getItemBoxed(i);
- if (it == null) {
+ if (skipNulls && it == null) {
builder.appendNoGrow(null);
} else {
Object result = function.apply(it, argument);
@@ -165,7 +171,7 @@ public abstract class Storage {
* @param function the function to run.
* @return the result of running the function on all non-missing elements.
*/
- public final Storage map(String name, Function function) {
+ public final Storage> map(String name, Function function) {
if (name != null && isOpVectorized(name)) {
return runVectorizedMap(name, null);
}
@@ -192,8 +198,8 @@ public abstract class Storage {
* @param skipNa whether rows containing missing values should be passed to the function.
* @return the result of running the function on all non-missing elements.
*/
- public final Storage zip(
- String name, BiFunction function, Storage arg, boolean skipNa) {
+ public final Storage> zip(
+ String name, BiFunction function, Storage> arg, boolean skipNa) {
if (name != null && isOpVectorized(name)) {
return runVectorizedZip(name, arg);
}
@@ -218,7 +224,7 @@ public abstract class Storage {
* @param arg the value to use for missing elements
* @return a new storage, with all missing elements replaced by arg
*/
- public Storage fillMissing(Value arg) {
+ public Storage> fillMissing(Value arg) {
return fillMissingHelper(arg, new ObjectBuilder(size()));
}
@@ -228,7 +234,7 @@ public abstract class Storage {
* @param other the source of default values
* @return a new storage with missing values filled
*/
- public Storage fillMissingFrom(Storage other) {
+ public Storage> fillMissingFrom(Storage> other) {
var builder = new InferredBuilder(size());
for (int i = 0; i < size(); i++) {
if (isNa(i)) {
@@ -240,7 +246,7 @@ public abstract class Storage {
return builder.seal();
}
- protected final Storage fillMissingHelper(Value arg, Builder builder) {
+ protected final Storage> fillMissingHelper(Value arg, Builder builder) {
Object convertedFallback = Polyglot_Utils.convertPolyglotValue(arg);
for (int i = 0; i < size(); i++) {
Object it = getItemBoxed(i);
@@ -260,14 +266,14 @@ public abstract class Storage {
* @param cardinality the number of true values in mask
* @return a new storage, masked with the given mask
*/
- public abstract Storage mask(BitSet mask, int cardinality);
+ public abstract Storage mask(BitSet mask, int cardinality);
/**
* Returns a new storage, ordered according to the rules specified in a mask.
*
* @param mask@return a storage resulting from applying the reordering rules
*/
- public abstract Storage applyMask(OrderMask mask);
+ public abstract Storage applyMask(OrderMask mask);
/**
* Returns a new storage, resulting from applying the rules specified in a mask. The resulting
@@ -280,13 +286,13 @@ public abstract class Storage {
* storage
* @return the storage masked according to the specified rules
*/
- public abstract Storage countMask(int[] counts, int total);
+ public abstract Storage countMask(int[] counts, int total);
/** @return a copy of the storage containing a slice of the original data */
- public abstract Storage slice(int offset, int limit);
+ public abstract Storage slice(int offset, int limit);
/** @return a copy of the storage consisting of slices of the original data */
- public abstract Storage slice(List ranges);
+ public abstract Storage slice(List ranges);
public List toList() {
return new StorageListView(this);
@@ -297,7 +303,7 @@ public abstract class Storage {
*
* @return a storage counting the number of times each value in this one has been seen before.
*/
- public Storage duplicateCount() {
+ public Storage> duplicateCount() {
long[] data = new long[size()];
HashMap occurenceCount = new HashMap<>();
for (int i = 0; i < size(); i++) {
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StorageListView.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StorageListView.java
index 91dab75244..edf46d679d 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StorageListView.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StorageListView.java
@@ -11,7 +11,7 @@ import java.util.Objects;
* is not modifiable.
*/
public class StorageListView implements List {
- private final Storage storage;
+ private final Storage> storage;
private final int from;
private final int to;
@@ -20,11 +20,11 @@ public class StorageListView implements List {
*
* @param storage the storage to wrap.
*/
- public StorageListView(Storage storage) {
+ public StorageListView(Storage> storage) {
this(storage, 0, storage.size());
}
- private StorageListView(Storage storage, int from, int to) {
+ private StorageListView(Storage> storage, int from, int to) {
this.storage = storage;
this.from = from;
this.to = to;
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java
index f77b54b0bb..b7ec1e6521 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java
@@ -1,17 +1,19 @@
package org.enso.table.data.column.storage;
import java.util.BitSet;
+import java.util.HashSet;
import org.enso.base.Text_Utils;
import org.enso.table.data.column.builder.object.StringBuilder;
import org.enso.table.data.column.operation.map.MapOpStorage;
import org.enso.table.data.column.operation.map.MapOperation;
+import org.enso.table.data.column.operation.map.SpecializedIsInOp;
import org.enso.table.data.column.operation.map.UnaryMapOperation;
import org.enso.table.data.column.operation.map.text.LikeOp;
import org.enso.table.data.column.operation.map.text.StringBooleanOp;
import org.graalvm.polyglot.Value;
/** A column storing strings. */
-public class StringStorage extends SpecializedStorage {
+public final class StringStorage extends SpecializedStorage {
/**
* @param data the underlying data
@@ -36,7 +38,7 @@ public class StringStorage extends SpecializedStorage {
return Type.STRING;
}
- private static final MapOpStorage> ops = buildOps();
+ private static final MapOpStorage> ops = buildOps();
@Override
protected boolean isOpVectorized(String name) {
@@ -44,17 +46,17 @@ public class StringStorage extends SpecializedStorage {
}
@Override
- protected Storage runVectorizedMap(String name, Object argument) {
+ protected Storage> runVectorizedMap(String name, Object argument) {
return ops.runMap(name, this, argument);
}
@Override
- protected Storage runVectorizedZip(String name, Storage argument) {
+ protected Storage> runVectorizedZip(String name, Storage> argument) {
return ops.runZip(name, this, argument);
}
@Override
- public Storage fillMissing(Value arg) {
+ public Storage> fillMissing(Value arg) {
if (arg.isString()) {
return fillMissingHelper(arg, new StringBuilder(size()));
} else {
@@ -62,12 +64,12 @@ public class StringStorage extends SpecializedStorage {
}
}
- private static MapOpStorage> buildOps() {
- MapOpStorage> t = ObjectStorage.buildObjectOps();
+ private static MapOpStorage> buildOps() {
+ MapOpStorage> t = ObjectStorage.buildObjectOps();
t.add(
new MapOperation<>(Maps.EQ) {
@Override
- public Storage runMap(SpecializedStorage storage, Object arg) {
+ public BoolStorage runMap(SpecializedStorage storage, Object arg) {
BitSet r = new BitSet();
BitSet missing = new BitSet();
for (int i = 0; i < storage.size(); i++) {
@@ -81,7 +83,7 @@ public class StringStorage extends SpecializedStorage {
}
@Override
- public Storage runZip(SpecializedStorage storage, Storage arg) {
+ public BoolStorage runZip(SpecializedStorage storage, Storage> arg) {
BitSet r = new BitSet();
BitSet missing = new BitSet();
for (int i = 0; i < storage.size(); i++) {
@@ -98,7 +100,7 @@ public class StringStorage extends SpecializedStorage {
t.add(
new UnaryMapOperation<>(Maps.IS_EMPTY) {
@Override
- protected Storage run(SpecializedStorage storage) {
+ protected BoolStorage run(SpecializedStorage storage) {
BitSet r = new BitSet();
for (int i = 0; i < storage.size; i++) {
String s = storage.data[i];
@@ -131,6 +133,19 @@ public class StringStorage extends SpecializedStorage {
}
});
t.add(new LikeOp());
+ t.add(
+ SpecializedIsInOp.make(
+ list -> {
+ HashSet set = new HashSet<>();
+ boolean hasNulls = false;
+ for (Object o : list) {
+ hasNulls |= o == null;
+ if (o instanceof String s) {
+ set.add(s);
+ }
+ }
+ return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls);
+ }));
return t;
}
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/TimeOfDayStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/TimeOfDayStorage.java
index 36d4510645..ab8a81a177 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/TimeOfDayStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/TimeOfDayStorage.java
@@ -1,10 +1,10 @@
package org.enso.table.data.column.storage;
-import org.enso.table.data.column.operation.map.MapOpStorage;
-
import java.time.LocalTime;
+import org.enso.table.data.column.operation.map.MapOpStorage;
+import org.enso.table.data.column.operation.map.SpecializedIsInOp;
-public class TimeOfDayStorage extends SpecializedStorage {
+public final class TimeOfDayStorage extends SpecializedStorage {
/**
* @param data the underlying data
* @param size the number of items stored
@@ -13,10 +13,12 @@ public class TimeOfDayStorage extends SpecializedStorage {
super(data, size, ops);
}
- private static final MapOpStorage> ops = buildOps();
+ private static final MapOpStorage> ops = buildOps();
- private static MapOpStorage> buildOps() {
- return ObjectStorage.buildObjectOps();
+ private static MapOpStorage> buildOps() {
+ MapOpStorage> t = ObjectStorage.buildObjectOps();
+ t.add(SpecializedIsInOp.makeForTimeColumns(LocalTime.class));
+ return t;
}
@Override
diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/HashIndex.java b/std-bits/table/src/main/java/org/enso/table/data/index/HashIndex.java
index 4aa13768bd..5955d1d61f 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/index/HashIndex.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/index/HashIndex.java
@@ -7,17 +7,17 @@ import org.enso.table.data.mask.SliceRange;
import org.enso.table.data.table.Column;
public class HashIndex extends Index {
- private final Storage items;
+ private final Storage> items;
private final Map> locs;
private final String name;
- private HashIndex(Storage items, Map> locs, String name) {
+ private HashIndex(Storage> items, Map> locs, String name) {
this.items = items;
this.locs = locs;
this.name = name;
}
- private HashIndex(String name, Storage items, int size) {
+ private HashIndex(String name, Storage> items, int size) {
Map> locations = new HashMap<>();
for (int i = 0; i < size; i++) {
List its = locations.computeIfAbsent(items.getItemBoxed(i), x -> new ArrayList<>());
@@ -28,7 +28,7 @@ public class HashIndex extends Index {
this.name = name;
}
- public static HashIndex fromStorage(String name, Storage storage) {
+ public static HashIndex fromStorage(String name, Storage> storage) {
return new HashIndex(name, storage, storage.size());
}
@@ -59,19 +59,19 @@ public class HashIndex extends Index {
@Override
public Index mask(BitSet mask, int cardinality) {
- Storage newSt = items.mask(mask, cardinality);
+ Storage> newSt = items.mask(mask, cardinality);
return HashIndex.fromStorage(name, newSt);
}
@Override
public Index countMask(int[] counts, int total) {
- Storage newSt = items.countMask(counts, total);
+ Storage> newSt = items.countMask(counts, total);
return HashIndex.fromStorage(name, newSt);
}
@Override
public Index applyMask(OrderMask mask) {
- Storage newSt = items.applyMask(mask);
+ Storage> newSt = items.applyMask(mask);
return HashIndex.fromStorage(name, newSt);
}
@@ -86,7 +86,7 @@ public class HashIndex extends Index {
mask.set(i);
}
}
- Storage newItems = items.mask(mask, locs.size());
+ Storage> newItems = items.mask(mask, locs.size());
return new HashIndex(newItems, newLocs, name);
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java
index 1c8478e591..eab87645ec 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java
@@ -30,7 +30,7 @@ public class MultiValueIndex {
boolean isOrdered = ordering != null;
this.locs = isOrdered ? new TreeMap<>() : new HashMap<>();
- Storage[] storage = Arrays.stream(keyColumns).map(Column::getStorage).toArray(Storage[]::new);
+ Storage>[] storage = Arrays.stream(keyColumns).map(Column::getStorage).toArray(Storage[]::new);
IntFunction keyFactory =
isOrdered
? i -> new OrderedMultiValueKey(storage, i, ordering, objectComparator)
diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueKeyBase.java b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueKeyBase.java
index 998dde2178..58e6be2577 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueKeyBase.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueKeyBase.java
@@ -4,7 +4,7 @@ import org.enso.table.data.column.storage.Storage;
/** The base class for keys used for sorting/grouping rows by a set of columns. */
public abstract class MultiValueKeyBase {
- protected final Storage[] storages;
+ protected final Storage>[] storages;
protected final int rowIndex;
protected boolean hasFloatValues = false;
protected boolean floatsComputed = false;
@@ -13,7 +13,7 @@ public abstract class MultiValueKeyBase {
* Constructs a key based on an array of column storages and the index of the row the key is
* associated with.
*/
- public MultiValueKeyBase(Storage[] storage, int rowIndex) {
+ public MultiValueKeyBase(Storage>[] storage, int rowIndex) {
this.storages = storage;
this.rowIndex = rowIndex;
}
@@ -28,7 +28,7 @@ public abstract class MultiValueKeyBase {
/** Checks if all cells in the current row are missing. */
public boolean areAllNull() {
- for (Storage value : storages) {
+ for (Storage> value : storages) {
if (!value.isNa(rowIndex)) {
return false;
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/OrderedMultiValueKey.java b/std-bits/table/src/main/java/org/enso/table/data/index/OrderedMultiValueKey.java
index 9a43aaf8f7..bf78e6774f 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/index/OrderedMultiValueKey.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/index/OrderedMultiValueKey.java
@@ -21,7 +21,7 @@ public class OrderedMultiValueKey extends MultiValueKeyBase
private final int[] directions;
public OrderedMultiValueKey(
- Storage[] storages, int rowIndex, int[] directions, Comparator objectComparator) {
+ Storage>[] storages, int rowIndex, int[] directions, Comparator objectComparator) {
super(storages, rowIndex);
this.objectComparator = objectComparator;
this.directions = directions;
diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/UnorderedMultiValueKey.java b/std-bits/table/src/main/java/org/enso/table/data/index/UnorderedMultiValueKey.java
index 4b4d53feb4..5405d9303f 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/index/UnorderedMultiValueKey.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/index/UnorderedMultiValueKey.java
@@ -21,12 +21,12 @@ public class UnorderedMultiValueKey extends MultiValueKeyBase {
private final int hashCodeValue;
private final TextFoldingStrategy textFoldingStrategy;
- public UnorderedMultiValueKey(Storage[] storages, int rowIndex) {
+ public UnorderedMultiValueKey(Storage>[] storages, int rowIndex) {
this(storages, rowIndex, TextFoldingStrategy.unicodeNormalizedFold);
}
public UnorderedMultiValueKey(
- Storage[] storages, int rowIndex, TextFoldingStrategy textFoldingStrategy) {
+ Storage>[] storages, int rowIndex, TextFoldingStrategy textFoldingStrategy) {
super(storages, rowIndex);
this.textFoldingStrategy = textFoldingStrategy;
diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/Column.java b/std-bits/table/src/main/java/org/enso/table/data/table/Column.java
index 068ad175eb..b3254ae030 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/table/Column.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/Column.java
@@ -1,6 +1,6 @@
package org.enso.table.data.table;
-import org.enso.base.Polyglot_Utils;
+import org.enso.base.polyglot.Polyglot_Utils;
import org.enso.table.data.column.builder.object.InferredBuilder;
import org.enso.table.data.column.operation.aggregate.Aggregator;
import org.enso.table.data.column.storage.BoolStorage;
@@ -13,8 +13,6 @@ import org.enso.table.data.mask.SliceRange;
import org.enso.table.error.UnexpectedColumnTypeException;
import org.graalvm.polyglot.Value;
-import java.time.LocalDate;
-import java.time.LocalDateTime;
import java.util.BitSet;
import java.util.List;
import java.util.function.Function;
@@ -23,7 +21,7 @@ import java.util.stream.IntStream;
/** A representation of a column. Consists of a column name and the underlying storage. */
public class Column {
private final String name;
- private final Storage storage;
+ private final Storage> storage;
private final Index index;
/**
@@ -32,7 +30,7 @@ public class Column {
* @param name the column name
* @param storage the underlying storage
*/
- public Column(String name, Index index, Storage storage) {
+ public Column(String name, Index index, Storage> storage) {
this.name = name;
this.storage = storage;
this.index = index;
@@ -44,7 +42,7 @@ public class Column {
* @param name the column name
* @param storage the underlying storage
*/
- public Column(String name, Storage storage) {
+ public Column(String name, Storage> storage) {
this(name, new DefaultIndex(storage.size()), storage);
}
@@ -63,7 +61,7 @@ public class Column {
}
/** @return the underlying storage */
- public Storage getStorage() {
+ public Storage> getStorage() {
return storage;
}
@@ -148,7 +146,7 @@ public class Column {
* @return a column indexed by {@code col}
*/
public Column setIndex(Column col) {
- Storage storage = col.getStorage();
+ Storage> storage = col.getStorage();
Index ix = HashIndex.fromStorage(col.getName(), storage);
return this.withIndex(ix);
}
@@ -183,7 +181,7 @@ public class Column {
*/
public Column applyMask(OrderMask mask) {
Index newIndex = index.applyMask(mask);
- Storage newStorage = storage.applyMask(mask);
+ Storage> newStorage = storage.applyMask(mask);
return new Column(name, newIndex, newStorage);
}
diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java
index c46a6e1b6c..0351ba069c 100644
--- a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java
@@ -182,7 +182,7 @@ public class Table {
* @return a table indexed by the proper column
*/
public Table indexFromColumn(Column col) {
- Storage storage = col.getStorage();
+ Storage> storage = col.getStorage();
Index ix = HashIndex.fromStorage(col.getName(), storage);
List newColumns = new ArrayList<>();
Column indexCol = index.toColumn();
@@ -294,7 +294,7 @@ public class Table {
matches[i] = other.index.loc(index.iloc(i));
}
} else {
- Storage onS = getColumnByName(on).getStorage();
+ Storage> onS = getColumnByName(on).getStorage();
for (int i = 0; i < s; i++) {
matches[i] = other.index.loc(onS.getItemBoxed(i));
}
@@ -360,7 +360,7 @@ public class Table {
Arrays.stream(columns)
.map(
column -> {
- Storage newStorage = column.getStorage().applyMask(orderMask);
+ Storage> newStorage = column.getStorage().applyMask(orderMask);
return new Column(column.getName(), newIndex, newStorage);
})
.toArray(Column[]::new);
@@ -431,7 +431,7 @@ public class Table {
return new Table(newColumns, newIndex);
}
- private Storage concatStorages(Storage left, Storage right) {
+ private Storage> concatStorages(Storage> left, Storage> right) {
InferredBuilder builder = new InferredBuilder(left.size() + right.size());
for (int i = 0; i < left.size(); i++) {
builder.appendNoGrow(left.getItemBoxed(i));
@@ -442,7 +442,7 @@ public class Table {
return builder.seal();
}
- private Storage nullPad(int nullCount, Storage storage, boolean start) {
+ private Storage> nullPad(int nullCount, Storage> storage, boolean start) {
InferredBuilder builder = new InferredBuilder(nullCount + storage.size());
if (start) {
builder.appendNulls(nullCount);
diff --git a/std-bits/table/src/main/java/org/enso/table/operations/Distinct.java b/std-bits/table/src/main/java/org/enso/table/operations/Distinct.java
index 7ff4c71144..c989ce7436 100644
--- a/std-bits/table/src/main/java/org/enso/table/operations/Distinct.java
+++ b/std-bits/table/src/main/java/org/enso/table/operations/Distinct.java
@@ -21,7 +21,8 @@ public class Distinct {
if (keyColumns.length != 0) {
HashSet visitedRows = new HashSet<>();
int size = keyColumns[0].getSize();
- Storage[] storage = Arrays.stream(keyColumns).map(Column::getStorage).toArray(Storage[]::new);
+ Storage>[] storage =
+ Arrays.stream(keyColumns).map(Column::getStorage).toArray(Storage[]::new);
for (int i = 0; i < size; i++) {
UnorderedMultiValueKey key = new UnorderedMultiValueKey(storage, i, textFoldingStrategy);
diff --git a/std-bits/table/src/main/java/org/enso/table/operations/OrderBuilder.java b/std-bits/table/src/main/java/org/enso/table/operations/OrderBuilder.java
index 9c49789ce3..1ca3cdbe1d 100644
--- a/std-bits/table/src/main/java/org/enso/table/operations/OrderBuilder.java
+++ b/std-bits/table/src/main/java/org/enso/table/operations/OrderBuilder.java
@@ -43,7 +43,7 @@ public class OrderBuilder {
* @return a comparator with properties described above
*/
public Comparator toComparator() {
- final Storage storage = column.getStorage();
+ final Storage> storage = column.getStorage();
Comparator itemCmp = customComparator;
if (!ascending) {
itemCmp = itemCmp.reversed();
diff --git a/std-bits/table/src/main/java/org/enso/table/parsing/DatatypeParser.java b/std-bits/table/src/main/java/org/enso/table/parsing/DatatypeParser.java
index 07bd0a9aa2..4ad5e1f1b2 100644
--- a/std-bits/table/src/main/java/org/enso/table/parsing/DatatypeParser.java
+++ b/std-bits/table/src/main/java/org/enso/table/parsing/DatatypeParser.java
@@ -35,5 +35,6 @@ public abstract class DatatypeParser {
* Parses a column of texts (represented as a {@code StringStorage}) and returns a new storage,
* containing the parsed elements.
*/
- public abstract WithProblems parseColumn(String columnName, StringStorage sourceStorage);
+ public abstract WithProblems> parseColumn(
+ String columnName, Storage sourceStorage);
}
diff --git a/std-bits/table/src/main/java/org/enso/table/parsing/IdentityParser.java b/std-bits/table/src/main/java/org/enso/table/parsing/IdentityParser.java
index ccf8eba7f7..7039c1370d 100644
--- a/std-bits/table/src/main/java/org/enso/table/parsing/IdentityParser.java
+++ b/std-bits/table/src/main/java/org/enso/table/parsing/IdentityParser.java
@@ -22,7 +22,7 @@ public class IdentityParser extends IncrementalDatatypeParser {
}
@Override
- public WithProblems parseColumn(String columnName, StringStorage sourceStorage) {
+ public WithProblems> parseColumn(String columnName, Storage sourceStorage) {
return new WithProblems<>(sourceStorage, List.of());
}
}
diff --git a/std-bits/table/src/main/java/org/enso/table/parsing/IncrementalDatatypeParser.java b/std-bits/table/src/main/java/org/enso/table/parsing/IncrementalDatatypeParser.java
index f3fbe23ea8..08e0cbca02 100644
--- a/std-bits/table/src/main/java/org/enso/table/parsing/IncrementalDatatypeParser.java
+++ b/std-bits/table/src/main/java/org/enso/table/parsing/IncrementalDatatypeParser.java
@@ -29,12 +29,12 @@ public abstract class IncrementalDatatypeParser extends DatatypeParser {
* Parses a column of texts (represented as a {@code StringStorage}) and returns a new storage,
* containing the parsed elements.
*/
- public WithProblems parseColumn(String columnName, StringStorage sourceStorage) {
+ public WithProblems> parseColumn(String columnName, Storage sourceStorage) {
Builder builder = makeBuilderWithCapacity(sourceStorage.size());
var aggregator = new ProblemAggregatorImpl(columnName);
for (int i = 0; i < sourceStorage.size(); ++i) {
- String cell = sourceStorage.getItem(i);
+ String cell = sourceStorage.getItemBoxed(i);
if (cell != null) {
Object parsed = parseSingleValue(cell, aggregator);
builder.appendNoGrow(parsed);
diff --git a/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java b/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java
index 9c9c4952a2..909c23c494 100644
--- a/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java
+++ b/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java
@@ -40,14 +40,14 @@ public class TypeInferringParser extends DatatypeParser {
}
@Override
- public WithProblems parseColumn(String columnName, StringStorage sourceStorage) {
+ public WithProblems> parseColumn(String columnName, Storage sourceStorage) {
parsers:
for (IncrementalDatatypeParser parser : baseParsers) {
Builder builder = parser.makeBuilderWithCapacity(sourceStorage.size());
var aggregator = new ProblemAggregatorImpl(columnName);
for (int i = 0; i < sourceStorage.size(); ++i) {
- String cell = sourceStorage.getItem(i);
+ String cell = sourceStorage.getItemBoxed(i);
if (cell != null) {
Object parsed = parser.parseSingleValue(cell, aggregator);
if (aggregator.hasProblems()) {
diff --git a/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java b/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java
index 4868fff965..da72a0fe8e 100644
--- a/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java
+++ b/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java
@@ -452,13 +452,13 @@ public class DelimitedReader {
Column[] columns = new Column[builders.length];
for (int i = 0; i < builders.length; i++) {
String columnName = effectiveColumnNames[i];
- StringStorage col = builders[i].seal();
+ Storage col = builders[i].seal();
- WithProblems parseResult = valueParser.parseColumn(columnName, col);
+ WithProblems> parseResult = valueParser.parseColumn(columnName, col);
for (var problem : parseResult.problems()) {
reportProblem(problem);
}
- Storage storage = parseResult.value();
+ Storage> storage = parseResult.value();
columns[i] = new Column(columnName, new DefaultIndex(storage.size()), storage);
}
diff --git a/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java b/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java
index bc7e8d7950..91cb5ec202 100644
--- a/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java
+++ b/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java
@@ -268,7 +268,7 @@ public class ExcelWriter {
return;
}
- Storage[] storages = Arrays.stream(columns).map(Column::getStorage).toArray(Storage[]::new);
+ Storage>[] storages = Arrays.stream(columns).map(Column::getStorage).toArray(Storage[]::new);
for (int i = 0; i < rowCount; i++) {
Row row = sheet.getRow(currentRow);
if (row == null) {
@@ -276,7 +276,7 @@ public class ExcelWriter {
}
for (int j = 0; j < columns.length; j++) {
- Storage storage = storages[j];
+ Storage> storage = storages[j];
int idx = j + firstColumn - 1;
Cell cell = row.getCell(idx);
@@ -305,7 +305,7 @@ public class ExcelWriter {
return newStyle;
}
- private static void writeValueToCell(Cell cell, int j, Storage storage, Workbook workbook)
+ private static void writeValueToCell(Cell cell, int j, Storage> storage, Workbook workbook)
throws IllegalStateException {
if (storage.isNa(j)) {
cell.setBlank();
diff --git a/test/Benchmarks/src/Table/Sorting.enso b/test/Benchmarks/src/Table/Sorting.enso
index 4cd0eb1acf..591040a7a2 100644
--- a/test/Benchmarks/src/Table/Sorting.enso
+++ b/test/Benchmarks/src/Table/Sorting.enso
@@ -22,7 +22,7 @@ main =
ints = (0.up_to vector_size).to_vector.take (Sample vector_size 42)
start = Date_Time.new 1990 1 1
dates = ints.map x->
- start + x.seconds
+ start + (Duration.new seconds=x)
objects = ints.map My.Data
ints_table = Table.new [['ints', ints]]
diff --git a/test/Table_Tests/src/Common_Table_Spec.enso b/test/Table_Tests/src/Common_Table_Spec.enso
index 1603e5cabc..182ce60866 100644
--- a/test/Table_Tests/src/Common_Table_Spec.enso
+++ b/test/Table_Tests/src/Common_Table_Spec.enso
@@ -1217,11 +1217,51 @@ spec prefix table_builder test_selection pending=Nothing =
t2.at "ix" . to_vector . should_equal [2, 4]
t2.at "X" . to_vector . should_equal [1, 4]
+ Test.specify "by an Is_In check in a Vector" <|
+ t = table_builder [["ix", [1, 2, 3, Nothing, 5, 6]], ["X", ["a", "b", "ccc", "X", "f", "2"]]]
+ t.filter "X" (Filter_Condition.Is_In ["X", "a", "c"]) on_problems=Report_Error . at "X" . to_vector . should_equal ["a", "X"]
+ t.filter "X" (Filter_Condition.Not_In ["X", "a", "c"]) on_problems=Report_Error . at "X" . to_vector . should_equal ["b", "ccc", "f", "2"]
+ t.filter "X" (Filter_Condition.Is_In ["ccc"]) on_problems=Report_Error . at "X" . to_vector . should_equal ["ccc"]
+ t.filter "X" (Filter_Condition.Is_In []) on_problems=Report_Error . at "X" . to_vector . should_equal []
+ t.filter "X" (Filter_Condition.Not_In []) on_problems=Report_Error . at "X" . to_vector . should_equal ["a", "b", "ccc", "X", "f", "2"]
+
+ t.filter "ix" (Filter_Condition.Is_In [Nothing, 2, 5, 4]) on_problems=Report_Error . at "ix" . to_vector . should_equal [2, Nothing, 5]
+ t.filter "ix" (Filter_Condition.Is_In [2, 5, 4]) on_problems=Report_Error . at "ix" . to_vector . should_equal [2, 5]
+ t.filter "ix" (Filter_Condition.Is_In [Nothing]) on_problems=Report_Error . at "ix" . to_vector . should_equal [Nothing]
+ t.filter "ix" (Filter_Condition.Not_In [Nothing]) on_problems=Report_Error . at "ix" . to_vector . should_equal [1, 2, 3, 5, 6]
+ t.filter "ix" (Filter_Condition.Not_In [1, 3]) on_problems=Report_Error . at "ix" . to_vector . should_equal [2, Nothing, 5, 6]
+
+ v1 = t.filter "X" (Filter_Condition.Is_In ["c", "f", "b", "b", "b", 15, Nothing]) on_problems=Report_Error . at "X" . to_vector
+ case test_selection.allows_mixed_type_comparisons of
+ True -> v1.should_equal ["b", "f"]
+ False -> v1.should_fail_with SQL_Error_Data
+ v2 = t.filter "ix" (Filter_Condition.Is_In ["c", 3, 2, "a"]) on_problems=Report_Error . at "ix" . to_vector
+ case test_selection.allows_mixed_type_comparisons of
+ True -> v2.should_equal [2, 3]
+ False -> v2.should_fail_with SQL_Error_Data
+
+ t2 = table_builder [["A", [True, False, True]], ["B", [False, False, False]], ["C", [True, False, Nothing]]]
+ t2.filter "A" (Filter_Condition.Is_In [True, Nothing]) . at "A" . to_vector . should_equal [True, True]
+ t2.filter "B" (Filter_Condition.Is_In [True, Nothing]) . at "B" . to_vector . should_equal []
+ t2.filter "C" (Filter_Condition.Is_In [True, Nothing]) . at "C" . to_vector . should_equal [True, Nothing]
+ t2.filter "A" (Filter_Condition.Is_In [False]) . at "A" . to_vector . should_equal [False]
+ t2.filter "B" (Filter_Condition.Is_In [False]) . at "B" . to_vector . should_equal [False, False, False]
+ t2.filter "C" (Filter_Condition.Is_In [False, False]) . at "C" . to_vector . should_equal [False]
+
Test.specify "by a boolean mask" <|
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["b", [True, False, Nothing, True, True]]]
t.filter "b" on_problems=Report_Error . at "ix" . to_vector . should_equal [1, 4, 5]
t.filter "b" Filter_Condition.Is_False on_problems=Report_Error . at "ix" . to_vector . should_equal [2]
+ Test.specify "should correctly reorder all kinds of columns" <|
+ t = table_builder [["ints", [1, 2, 3, Nothing, 4]], ["floats", [4.0, Nothing, 3.0, 2.0, 1.0]], ["bools", [False, False, True, Nothing, False]], ["strings", ["a", Nothing, "b", "c", "d"]], ["mask", [False, True, True, True, Nothing]]]
+ t2 = t.filter "mask" on_problems=Report_Error
+ t2.at "ints" . to_vector . should_equal [2, 3, Nothing]
+ t2.at "floats" . to_vector . should_equal [Nothing, 3.0, 2.0]
+ t2.at "bools" . to_vector . should_equal [False, True, Nothing]
+ t2.at "strings" . to_vector . should_equal [Nothing, "b", "c"]
+ t2.at "mask" . to_vector . should_equal [True, True, True]
+
Test.specify "should check types of boolean operations" <|
t = table_builder [["ix", [1, 2, 3, 4, 5]], ["b", [True, False, Nothing, True, True]]]
tester = check_empty ["ix", "b"]
diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso
index f80810de9f..2d9ed2553d 100644
--- a/test/Table_Tests/src/Database/Codegen_Spec.enso
+++ b/test/Table_Tests/src/Database/Codegen_Spec.enso
@@ -108,6 +108,19 @@ spec =
t4 = t1.filter "A" (Filter_Condition.Between (t1.at "B") 33)
t4.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" BETWEEN "T1"."B" AND ?)', [[33, int]]]
+ Test.specify "should generate an IN expression" <|
+ t2 = t1.filter "A" (Filter_Condition.Is_In [1, 2, 'foo'])
+ t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE COALESCE("T1"."A" IN (?, ?, ?), 2=1)', [[1, int], [2, int], ["foo", int]]]
+
+ t3 = t1.filter "A" (Filter_Condition.Is_In [1])
+ t3.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE COALESCE("T1"."A" IN (?), 2=1)', [[1, int]]]
+
+ t4 = t1.filter "A" (Filter_Condition.Is_In [])
+ t4.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE (2=1)', []]
+
+ t5 = t1.filter "A" (Filter_Condition.Is_In [Nothing])
+ t5.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ((2=1) OR ("T1"."A" IS NULL))', []]
+
Test.group "[Codegen] Joining Tables" <|
t2 = test_connection.query (SQL_Query.Table_Name "T2")
t3 = test_connection.query (SQL_Query.Table_Name "T3")
diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso
index 40b33a0f16..8d590cf7de 100644
--- a/test/Table_Tests/src/Table_Spec.enso
+++ b/test/Table_Tests/src/Table_Spec.enso
@@ -1,5 +1,6 @@
from Standard.Base import all
from Standard.Base.Error.Problem_Behavior import Report_Error
+import Standard.Base.Data.Time.Duration
from Standard.Table import Table, Column, Sort_Column, Column_Selector, Sort_Column_Selector, Aggregate_Column
from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all hiding First, Last
@@ -77,6 +78,22 @@ spec =
t.at 'latitude' . to_vector . should_equal [34.19, 4.88]
t.at 'elevation' . to_vector . should_equal [Nothing, 19]
+ make_varied_type_table =
+ strs = ["strs", ["a", "b", "c", Nothing]]
+ ints = ["ints", [Nothing, 1, 2, 4]]
+ doubles = ["doubles", [0.0, 1.5, Nothing, 2.0]]
+ doubles_and_ints = ["doubles_and_ints", [0, 1.5, Nothing, 2]]
+ custom_objects = ["custom_objects", [My.Data 1 2, My.Data 3 4, Nothing, Nothing]]
+ dates = ["dates", [Nothing, Date.new 2000, Date.new 2022 8 20, Date.new 1999 1 1]]
+ times = ["times", [Time_Of_Day.new 18 00, Time_Of_Day.new 1 2 34, Nothing, Time_Of_Day.new]]
+ datetimes = ["datetimes", [Date_Time.new 2000, Date_Time.new 1999 1 2 3 4 5, Nothing, Date_Time.new 2022 8 27 11 22 25]]
+ mixed = ["mixed", [1, "a", Nothing, Date.new 2022 8 27]]
+ mixed_dates = ["mixed_dates", [Date.new 1999 1 2, Date_Time.new 1999 1 2 3 40, Date.new 1999 1 2, Date_Time.new 1999 1 2 3 40]]
+ just_nulls = ["just_nulls", [Nothing, Nothing, Nothing, Nothing]]
+
+ Table.new [strs, ints, doubles, doubles_and_ints, custom_objects, dates, times, datetimes, mixed, mixed_dates, just_nulls]
+ varied_type_table = make_varied_type_table
+
Test.group 'Construction' <|
Test.specify 'should allow creating a table from rows' <|
header = ['foo', 'bar']
@@ -87,30 +104,17 @@ spec =
r.at 'bar' . to_vector . should_equal [False, True, False]
Test.specify "should correctly infer storage types" <|
- strs = ["strs", ["a", "b", "c", Nothing]]
- ints = ["ints", [Nothing, 1, 2, 4]]
- doubles = ["doubles", [0.0, 1.5, Nothing, 2.0]]
- doubles_and_ints = ["doubles_and_ints", [0, 1.5, Nothing, 2]]
- custom_objects = ["custom_objects", [My.Data 1 2, My.Data 3 4, Nothing, Nothing]]
- dates = ["dates", [Nothing, Date.new 2000, Date.new 2022 8 20, Date.new 1999 1 1]]
- times = ["times", [Time_Of_Day.new 18 00, Time_Of_Day.new 1 2 34, Nothing, Time_Of_Day.new]]
- datetimes = ["datetimes", [Date_Time.new 2000, Date_Time.new 1999 1 2 3 4 5, Nothing, Date_Time.new 2022 8 27 11 22 25]]
- mixed = ["mixed", [1, "a", Nothing, Date.new 2022 8 27]]
- mixed_dates = ["mixed_dates", [Date.new 1999 1 2, Date_Time.new 1999 1 2 3 40, Date.new 1999 1 2, Date_Time.new 1999 1 2 3 40]]
- just_nulls = ["just_nulls", [Nothing, Nothing, Nothing, Nothing]]
-
- table = Table.new [strs, ints, doubles, doubles_and_ints, custom_objects, dates, times, datetimes, mixed, mixed_dates, just_nulls]
- table.at "strs" . storage_type . should_equal Storage.Text
- table.at "ints" . storage_type . should_equal Storage.Integer
- table.at "doubles" . storage_type . should_equal Storage.Decimal
- table.at "doubles_and_ints" . storage_type . should_equal Storage.Decimal
- table.at "custom_objects" . storage_type . should_equal Storage.Any
- table.at "dates" . storage_type . should_equal Storage.Date
- table.at "times" . storage_type . should_equal Storage.Time_Of_Day
- table.at "datetimes" . storage_type . should_equal Storage.Date_Time
- table.at "mixed" . storage_type . should_equal Storage.Any
- table.at "mixed_dates" . storage_type . should_equal Storage.Any
- table.at "just_nulls" . storage_type . should_equal Storage.Any
+ varied_type_table.at "strs" . storage_type . should_equal Storage.Text
+ varied_type_table.at "ints" . storage_type . should_equal Storage.Integer
+ varied_type_table.at "doubles" . storage_type . should_equal Storage.Decimal
+ varied_type_table.at "doubles_and_ints" . storage_type . should_equal Storage.Decimal
+ varied_type_table.at "custom_objects" . storage_type . should_equal Storage.Any
+ varied_type_table.at "dates" . storage_type . should_equal Storage.Date
+ varied_type_table.at "times" . storage_type . should_equal Storage.Time_Of_Day
+ varied_type_table.at "datetimes" . storage_type . should_equal Storage.Date_Time
+ varied_type_table.at "mixed" . storage_type . should_equal Storage.Any
+ varied_type_table.at "mixed_dates" . storage_type . should_equal Storage.Any
+ varied_type_table.at "just_nulls" . storage_type . should_equal Storage.Any
pending_python_missing = if Polyglot.is_language_installed "python" . not then
"Can't run Python tests, Python is not installed."
@@ -943,6 +947,87 @@ spec =
t2.at "A" . to_vector . should_equal [2, 3]
t2.at "B" . to_vector . should_equal [5, 6]
+ Test.specify "by an Is_In check in a Vector, on various types of columns" <|
+ varied_type_table.filter "strs" (Filter_Condition.Is_In ["c", "b", Nothing]) . at "strs" . to_vector . should_equal ["b", "c", Nothing]
+ varied_type_table.filter "ints" (Filter_Condition.Is_In [1, 2, 3]) . at "ints" . to_vector . should_equal [1, 2]
+ varied_type_table.filter "ints" (Filter_Condition.Is_In [1, Nothing]) . at "ints" . to_vector . should_equal [Nothing, 1]
+ varied_type_table.filter "doubles" (Filter_Condition.Is_In [0.0, Nothing]) . at "doubles" . to_vector . should_equal [0.0, Nothing]
+ varied_type_table.filter "dates" (Filter_Condition.Is_In [Date.new 2000, Date.new 1999 1 1, Date_Time.new 2022 8 20]) . at "dates" . to_vector . should_equal [Date.new 2000, Date.new 1999 1 1]
+ varied_type_table.filter "datetimes" (Filter_Condition.Is_In [Date_Time.new 2022 8 27 11 22 25, Nothing, Date_Time.new 2030, Date.new 2000]) . at "datetimes" . to_vector . should_equal [Nothing, Date_Time.new 2022 8 27 11 22 25]
+ varied_type_table.filter "times" (Filter_Condition.Is_In [Time_Of_Day.new 18 00, Time_Of_Day.new 18 19, Date_Time.new 2000 1 1]) . at "times" . to_vector . should_equal [Time_Of_Day.new 18 00]
+ varied_type_table.filter "mixed" (Filter_Condition.Is_In [42, "a", 1, Nothing, Date.new 2022 8 27, Date_Time.new 2022 8 27]) . at "mixed" . to_vector . should_equal [1, "a", Nothing, Date.new 2022 8 27]
+ varied_type_table.filter "mixed" (Filter_Condition.Is_In [42, Date_Time.new 2022 8 27, 1]) . at "mixed" . to_vector . should_equal [1]
+ varied_type_table.filter "just_nulls" (Filter_Condition.Is_In []) . at "just_nulls" . to_vector . should_equal []
+ varied_type_table.filter "just_nulls" (Filter_Condition.Is_In [Nothing, Nothing, 0]) . at "just_nulls" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing]
+ varied_type_table.filter "just_nulls" (Filter_Condition.Is_In [0]) . at "just_nulls" . to_vector . should_equal []
+ varied_type_table.filter "custom_objects" (Filter_Condition.Is_In [2, My.Data 2 1, Nothing]) . at "custom_objects" . to_vector . should_equal [My.Data 1 2, Nothing, Nothing]
+
+ t2 = Table.new [["ints", [1, 2, 3]], ["doubles", [1.2, 0.0, 1.0]]]
+ t2.filter "ints" (Filter_Condition.Is_In [2.0, 1.5, 3, 4]) . at "ints" . to_vector . should_equal [2, 3]
+ t2.filter "doubles" (Filter_Condition.Is_In [0.1, 1, 3, 1.2]) . at "doubles" . to_vector . should_equal [1.2, 1.0]
+
+ # We test these very carefully as our implementation relies on some short-circuit logic that is not as trivial as the hashmap checks which are done for other builtin types.
+ [True, False].each has_nulls->
+ [True, False].each has_true->
+ [True, False].each has_false->
+ vec_builder = Vector.new_builder
+ if has_nulls then vec_builder.append Nothing
+ if has_true then vec_builder.append True
+ if has_false then vec_builder.append False
+ in_vector = vec_builder.to_vector
+
+ vectors = [[True, False, Nothing], [Nothing, Nothing, Nothing], [False, False, True], [True, True, True], [False, False, False], [Nothing, True, True], [False, Nothing, False]]
+ vectors.each column_vector->
+ not x = case x of
+ True -> False
+ False -> True
+ Nothing -> Nothing
+ negated_column_vector = column_vector.map not
+ t = Table.new [["X", column_vector]]
+
+ expected_vector = column_vector.filter (Filter_Condition.Is_In in_vector)
+ expected_neg_vector = negated_column_vector.filter (Filter_Condition.Is_In in_vector)
+
+ t.filter "X" (Filter_Condition.Is_In in_vector) on_problems=Report_Error . at "X" . to_vector . should_equal expected_vector
+ t2 = t.set "Y" (t.at "X" . not)
+ t2.filter "Y" (Filter_Condition.Is_In in_vector) on_problems=Report_Error . at "Y" . to_vector . should_equal expected_neg_vector
+
+ Test.specify "should perform `Is_In` efficiently for builtin types" <|
+ first_day = Date_Time.new 2000 1 1
+ make_date x = first_day + (Duration.new seconds=x)
+ init = Duration.time_execution <|
+ t = Table.new [["X", (200.up_to 10000 . to_vector)]]
+ vec = 4000.up_to 13000 . to_vector
+ expected_vector = 4000.up_to 10000 . to_vector
+ expected_vector_2 = 200.up_to 10000 . with_step 2 . to_vector
+ dates_vec = vec.map make_date
+ bool_vec = Vector.fill 7000 True
+ date_col = t.at "X" . map make_date
+ [t, vec, expected_vector, expected_vector_2, dates_vec, bool_vec, date_col]
+ t = init.second . at 0
+ vec = init.second . at 1
+ expected_vector = init.second . at 2
+ expected_vector_2 = init.second . at 3
+ dates_vec = init.second . at 4
+ bool_vec = init.second . at 5
+ date_col = init.second . at 6
+
+ expected_max_time_ms = init.first.total_milliseconds * 2
+ check_timing name ~action =
+ res = Duration.time_execution action
+ runtime_ms = res.first.total_milliseconds
+ if runtime_ms > expected_max_time_ms then
+ Test.fail "Expected `Is_In` on "+name+" to be efficient, but it took "+runtime_ms.to_text+"ms while initialization itself took just "+expected_max_time_ms.to_text+"ms."
+
+ check_timing "integers" <|
+ t.filter "X" (Filter_Condition.Is_In vec) . at "X" . to_vector . should_equal expected_vector
+
+ check_timing "booleans" <|
+ t.filter (t.at "X" % 2 == 0) (Filter_Condition.Is_In bool_vec) . at "X" . to_vector . should_equal expected_vector_2
+
+ check_timing "dates" <|
+ t.filter date_col (Filter_Condition.Is_In dates_vec) . at "X" . to_vector . should_equal expected_vector
+
main = Test.Suite.run_main spec
## JS indexes months form 0, so we need to subtract 1.
diff --git a/test/Tests/src/Data/List_Spec.enso b/test/Tests/src/Data/List_Spec.enso
index 987b117ebf..30dbb674eb 100644
--- a/test/Tests/src/Data/List_Spec.enso
+++ b/test/Tests/src/Data/List_Spec.enso
@@ -53,6 +53,8 @@ spec = Test.group "List" <|
list.filter (Filter_Condition.Equal_Or_Greater than=3) . should_equal [3, 4, 5].to_list
list.filter (Filter_Condition.Equal_Or_Less than=(-1)) . should_equal Nil
list.filter (Filter_Condition.Between 2 4) . should_equal [2, 3, 4].to_list
+ list.filter (Filter_Condition.Is_In [7, 3, 2]) . should_equal [2, 3].to_list
+ list.filter (Filter_Condition.Not_In [7, 3, 2]) . should_equal [1, 4, 5].to_list
Test.expect_panic_with (list.filter (Filter_Condition.Starts_With "a")) No_Such_Method_Error_Data
list.filter Filter_Condition.Is_True . should_equal Nil
diff --git a/test/Tests/src/Data/Range_Spec.enso b/test/Tests/src/Data/Range_Spec.enso
index df48b7b950..8fd28044b7 100644
--- a/test/Tests/src/Data/Range_Spec.enso
+++ b/test/Tests/src/Data/Range_Spec.enso
@@ -77,6 +77,8 @@ spec = Test.group "Range" <|
range.filter (Filter_Condition.Equal_Or_Less than=(-1)) . should_equal []
range.filter (Filter_Condition.Between 2 4) . should_equal [2, 3, 4]
range.filter (Filter_Condition.Between 2.1 4.5) . should_equal [3, 4]
+ range.filter (Filter_Condition.Is_In [7, 3, 2]) . should_equal [2, 3]
+ range.filter (Filter_Condition.Not_In [7, 3, 2]) . should_equal [1, 4, 5]
Test.expect_panic_with (range.filter (Filter_Condition.Starts_With "a")) No_Such_Method_Error_Data
Test.expect_panic_with (range.filter (Filter_Condition.Like "a%")) Unsupported_Argument_Types_Data
diff --git a/test/Tests/src/Data/Vector_Spec.enso b/test/Tests/src/Data/Vector_Spec.enso
index 8c770d72c4..c576e87e4c 100644
--- a/test/Tests/src/Data/Vector_Spec.enso
+++ b/test/Tests/src/Data/Vector_Spec.enso
@@ -151,6 +151,9 @@ spec = Test.group "Vectors" <|
vec.filter (Filter_Condition.Equal_Or_Less than=(-1)) . should_equal []
vec.filter (Filter_Condition.Between 2 4) . should_equal [2, 3, 4]
vec.filter (Filter_Condition.Between 2.1 4.5) . should_equal [3, 4]
+ vec.filter (Filter_Condition.Is_In [7, 3, 2, 2, 2]) . should_equal [2, 3]
+ vec.filter (Filter_Condition.Is_In []) . should_equal []
+ vec.filter (Filter_Condition.Not_In [7, 3, 2, 2]) . should_equal [1, 4, 5]
Test.expect_panic_with (vec.filter (Filter_Condition.Starts_With "a")) No_Such_Method_Error_Data
vec.filter Filter_Condition.Is_True . should_equal []
@@ -167,6 +170,9 @@ spec = Test.group "Vectors" <|
txtvec.filter (Filter_Condition.Greater than="b") . should_equal ["bbb", "cccc", "baaa", "ś"]
txtvec.filter (Filter_Condition.Between "b" "c") . should_equal ["bbb", "baaa"]
Test.expect_panic_with (txtvec.filter (Filter_Condition.Starts_With 42)) Unsupported_Argument_Types_Data
+ txtvec.filter Filter_Condition.Is_True . should_equal []
+ txtvec.filter (Filter_Condition.Is_In [1, 2]) . should_equal []
+ txtvec.filter (Filter_Condition.Is_In ["bbb", 's\u0301', "bbb", "FOOBAR"]) . should_equal ["bbb", "ś"]
["", Nothing, " ", "a"].filter (Filter_Condition.Is_Empty) . should_equal ["", Nothing]
["", Nothing, " ", "a"].filter (Filter_Condition.Not_Empty) . should_equal [" ", "a"]