From 82de8f88bdd92108f16ef23a86f31ee867d23566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Sat, 15 Oct 2022 13:29:59 +0200 Subject: [PATCH] Add support for `Is_In` and `Not_In` to `Filter_Condition` (#3790) Implements https://www.pivotaltracker.com/story/show/183389945 --- CHANGELOG.md | 2 + build.sbt | 1 - .../0.0.0-dev/src/Data/Filter_Condition.enso | 10 ++ .../0.0.0-dev/src/Data/Time/Duration.enso | 2 +- .../Base/0.0.0-dev/src/Network/Http.enso | 4 +- .../Database/0.0.0-dev/src/Data/Column.enso | 99 +++++++++++-- .../src/Internal/Base_Generator.enso | 32 +++-- .../Internal/Postgres/Postgres_Dialect.enso | 6 +- .../Table/0.0.0-dev/src/Data/Column.enso | 74 +++++++++- .../Table/0.0.0-dev/src/Data/Table.enso | 1 + .../Internal/Filter_Condition_Helpers.enso | 3 + .../enso/base/polyglot/NumericConverter.java | 108 ++++++++++++++ .../base/{ => polyglot}/Polyglot_Utils.java | 23 +-- .../enso/table/aggregations/Concatenate.java | 2 +- .../table/aggregations/CountDistinct.java | 2 +- .../enso/table/aggregations/CountEmpty.java | 2 +- .../enso/table/aggregations/CountNothing.java | 2 +- .../org/enso/table/aggregations/First.java | 4 +- .../org/enso/table/aggregations/GroupBy.java | 2 +- .../org/enso/table/aggregations/Last.java | 4 +- .../org/enso/table/aggregations/Mean.java | 2 +- .../org/enso/table/aggregations/MinOrMax.java | 2 +- .../org/enso/table/aggregations/Mode.java | 2 +- .../enso/table/aggregations/Percentile.java | 2 +- .../table/aggregations/ShortestOrLongest.java | 2 +- .../table/aggregations/StandardDeviation.java | 2 +- .../java/org/enso/table/aggregations/Sum.java | 2 +- .../column/builder/object/BoolBuilder.java | 2 +- .../data/column/builder/object/Builder.java | 2 +- .../column/builder/object/DateBuilder.java | 2 +- .../builder/object/DateTimeBuilder.java | 2 +- .../builder/object/InferredBuilder.java | 15 +- .../column/builder/object/NumericBuilder.java | 33 ++--- .../column/builder/object/ObjectBuilder.java | 2 +- .../column/builder/object/StringBuilder.java | 2 +- .../builder/object/TimeOfDayBuilder.java | 2 +- .../string/PrimInferredStorageBuilder.java | 2 +- .../column/builder/string/StorageBuilder.java | 2 +- .../builder/string/StringStorageBuilder.java | 3 +- .../operation/aggregate/Aggregator.java | 2 +- .../operation/aggregate/CountAggregator.java | 6 +- .../aggregate/FunctionAggregator.java | 8 +- .../numeric/LongToLongAggregator.java | 2 +- .../aggregate/numeric/NumericAggregator.java | 6 +- .../column/operation/map/MapOpStorage.java | 33 ++--- .../column/operation/map/MapOperation.java | 6 +- .../operation/map/SpecializedIsInOp.java | 99 +++++++++++++ .../operation/map/UnaryMapOperation.java | 8 +- .../operation/map/bool/BooleanIsInOp.java | 89 ++++++++++++ .../map/numeric/DoubleBooleanOp.java | 4 +- .../map/numeric/DoubleNumericOp.java | 6 +- .../operation/map/numeric/LongBooleanOp.java | 4 +- .../operation/map/numeric/LongNumericOp.java | 7 +- .../column/operation/map/text/LikeOp.java | 2 +- .../operation/map/text/StringBooleanOp.java | 6 +- .../data/column/storage/BoolStorage.java | 64 +++++---- .../data/column/storage/DateStorage.java | 14 +- .../data/column/storage/DateTimeStorage.java | 13 +- .../data/column/storage/DoubleStorage.java | 54 ++++--- .../data/column/storage/LongStorage.java | 72 ++++++---- .../data/column/storage/NumericStorage.java | 5 +- .../data/column/storage/ObjectStorage.java | 13 +- .../column/storage/SpecializedStorage.java | 15 +- .../table/data/column/storage/Storage.java | 58 ++++---- .../data/column/storage/StorageListView.java | 6 +- .../data/column/storage/StringStorage.java | 35 +++-- .../data/column/storage/TimeOfDayStorage.java | 14 +- .../org/enso/table/data/index/HashIndex.java | 16 +-- .../table/data/index/MultiValueIndex.java | 2 +- .../table/data/index/MultiValueKeyBase.java | 6 +- .../data/index/OrderedMultiValueKey.java | 2 +- .../data/index/UnorderedMultiValueKey.java | 4 +- .../org/enso/table/data/table/Column.java | 16 +-- .../java/org/enso/table/data/table/Table.java | 10 +- .../org/enso/table/operations/Distinct.java | 3 +- .../enso/table/operations/OrderBuilder.java | 2 +- .../enso/table/parsing/DatatypeParser.java | 3 +- .../enso/table/parsing/IdentityParser.java | 2 +- .../parsing/IncrementalDatatypeParser.java | 4 +- .../table/parsing/TypeInferringParser.java | 4 +- .../org/enso/table/read/DelimitedReader.java | 6 +- .../org/enso/table/write/ExcelWriter.java | 6 +- test/Benchmarks/src/Table/Sorting.enso | 2 +- test/Table_Tests/src/Common_Table_Spec.enso | 40 ++++++ .../src/Database/Codegen_Spec.enso | 13 ++ test/Table_Tests/src/Table_Spec.enso | 133 ++++++++++++++---- test/Tests/src/Data/List_Spec.enso | 2 + test/Tests/src/Data/Range_Spec.enso | 2 + test/Tests/src/Data/Vector_Spec.enso | 6 + 89 files changed, 1041 insertions(+), 360 deletions(-) create mode 100644 std-bits/base/src/main/java/org/enso/base/polyglot/NumericConverter.java rename std-bits/base/src/main/java/org/enso/base/{ => polyglot}/Polyglot_Utils.java (56%) create mode 100644 std-bits/table/src/main/java/org/enso/table/data/column/operation/map/SpecializedIsInOp.java create mode 100644 std-bits/table/src/main/java/org/enso/table/data/column/operation/map/bool/BooleanIsInOp.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ae8aee0aa..1e10d7a658 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -211,6 +211,7 @@ `Not_Like`.][3775] - [Reimplemented `Duration` as a built-in type.][3759] - [Implemented `Table.replace_text` for in-memory table.][3793] +- [Extended `Filter_Condition` with `Is_In` and `Not_In`.][3790] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -339,6 +340,7 @@ [3775]: https://github.com/enso-org/enso/pull/3775 [3759]: https://github.com/enso-org/enso/pull/3759 [3793]: https://github.com/enso-org/enso/pull/3793 +[3790]: https://github.com/enso-org/enso/pull/3790 #### Enso Compiler diff --git a/build.sbt b/build.sbt index 8edb282da6..d619883e51 100644 --- a/build.sbt +++ b/build.sbt @@ -1192,7 +1192,6 @@ lazy val parser = (project in file("lib/scala/parser")) s"-Djava.library.path=$root/target/rust/debug" }, libraryDependencies ++= Seq( - "com.storm-enroute" %% "scalameter" % scalameterVersion % "bench", "org.scalatest" %%% "scalatest" % scalatestVersion % Test ), testFrameworks := List( diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Filter_Condition.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Filter_Condition.enso index f22c29621d..9358408e9e 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Filter_Condition.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Filter_Condition.enso @@ -105,6 +105,12 @@ type Filter_Condition See https://bugs.java.com/bugdatabase/view_bug.do?bug_id=8032926 Not_Like pattern:Text + ## Is the value contained in `values`? + Is_In values:Vector + + ## Is the value not contained in `values`? + Not_In values:Vector + ## Converts a `Filter_Condition` condition into a predicate taking an element and returning a value indicating whether the element should be accepted by the filter. @@ -142,6 +148,10 @@ type Filter_Condition Not_Like sql_pattern -> regex = sql_like_to_regex sql_pattern elem -> regex.matches elem . not + ## TODO once we have proper hashing we could create a hashmap and + answer quicker, currently we need to do a full scan for each element. + Is_In values -> values.contains + Not_In values -> elem -> values.contains elem . not ## PRIVATE sql_like_to_regex sql_pattern = diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Duration.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Duration.enso index d6fc64af3a..d309496003 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Duration.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Time/Duration.enso @@ -273,6 +273,6 @@ type Duration import Standard.Base.Data.Time.Duration - example_is_empty = 10.seconds.is_empty + example_is_empty = Duration.zero.is_empty is_empty : Boolean is_empty self = self.to_vector . all (==0) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Network/Http.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Network/Http.enso index d927161908..6bf58f5983 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Network/Http.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Network/Http.enso @@ -46,7 +46,7 @@ polyglot java import org.enso.base.Http_Utils import Standard.Base.Network.Proxy example_new = - Http.new (timeout = 30.seconds) (proxy = Proxy.new "example.com" 8080) + Http.new (timeout = (Duration.new seconds=30)) (proxy = Proxy.new "example.com" 8080) new : Duration -> Boolean -> Proxy -> Http new (timeout = (Duration.new seconds=10)) (follow_redirects = True) (proxy = Proxy.System) (version = Version.Http_1_1) = Http_Data timeout follow_redirects proxy version @@ -595,7 +595,7 @@ type Http example_request = form = [Form.text_field "name" "John Doe"] req = Request.new Method.Post "http://httpbin.org/post" . with_form form - http = Http.new (timeout = 30.seconds) + http = Http.new (timeout = (Duration.new seconds=30)) http.request req request : Request -> Response ! Request_Error request self req = diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index 6d7a467a30..4826673c6d 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -13,7 +13,7 @@ import project.Internal.IR.Internal_Column.Internal_Column from project.Data.Table import Table, freshen_columns -from project.Errors import Unsupported_Database_Operation_Error +from project.Errors import Unsupported_Database_Operation_Error, Unsupported_Database_Operation_Error_Data type Column @@ -130,7 +130,7 @@ type Column prepare_operand operand operand_type = case operand of other_column : Column -> if Helpers.check_integrity self other_column then other_column.expression else - Error.throw <| Unsupported_Database_Operation_Error "Cannot use columns coming from different contexts in one expression without a join." + Error.throw <| Unsupported_Database_Operation_Error_Data "Cannot use columns coming from different contexts in one expression without a join." constant -> actual_operand_type = operand_type.if_nothing self.sql_type Expression.Constant actual_operand_type constant @@ -394,6 +394,32 @@ type Column / : Column | Any -> Column / self other = self.make_binary_op "/" other + ## Element-wise modulus. + + Arguments: + - other: The value to modulo `self` against. If `other` is a column, the + modulus is performed pairwise between corresponding elements of `self` + and `other`. + + Returns a column with results of modulus this column's elements against + `other`. + + > Example + Modulus of two columns against each other. + + import Standard.Examples + + example_mod = Examples.integer_column % Examples.decimal_column + + > Example + Modulus of a column with a number. + + import Standard.Examples + + example_mod = Examples.integer_column % 3 + % : Column | Any -> Column + % self other = self.make_binary_op "%" other + ## UNSTABLE Element-wise boolean conjunction. @@ -433,20 +459,20 @@ type Column Returns a column of booleans, with `True` items at the positions where this column contains a `Nothing`. is_missing : Column - is_missing self = self.make_unary_op "ISNULL" new_type=SQL_Type.boolean + is_missing self = self.make_unary_op "IS_NULL" new_type=SQL_Type.boolean ## PRIVATE Returns a column of booleans, with `True` items at the positions where this column contains an empty string or `Nothing`. is_empty : Column - is_empty self = self.make_unary_op "ISEMPTY" new_type=SQL_Type.boolean + is_empty self = self.make_unary_op "IS_EMPTY" new_type=SQL_Type.boolean ## UNSTABLE Returns a new column where missing values have been replaced with the provided default. fill_missing : Any -> Column - fill_missing self default = self.make_binary_op "FILLNULL" default + fill_missing self default = self.make_binary_op "FILL_NULL" default ## UNSTABLE @@ -495,7 +521,7 @@ type Column take self range=(First 1) = _ = range msg = "`Column.take` is not yet implemented." - Error.throw (Unsupported_Database_Operation_Error msg) + Error.throw (Unsupported_Database_Operation_Error_Data msg) ## UNSTABLE Creates a new Column from the input with the specified range of rows @@ -507,7 +533,7 @@ type Column drop self range=(First 1) = _ = range msg = "`Column.drop` is not yet implemented." - Error.throw (Unsupported_Database_Operation_Error msg) + Error.throw (Unsupported_Database_Operation_Error_Data msg) ## UNSTABLE @@ -551,10 +577,63 @@ type Column contains : Column | Text -> Column contains self other = self.make_binary_op "contains" other new_type=SQL_Type.boolean - ## PRIVATE - Checks for each element of the column if it matches an SQL-like pattern. + ## Checks for each element of the column if it matches an SQL-like pattern. + + Arguments: + - pattern: The pattern to match `self` against. If it is a column, the + operation is performed pairwise between corresponding elements of + `self` and that column. The pattern is an SQL-like pattern, where + `%` matches any sequence of characters and `_` matches any single + character. + + > Example + Check if elements of a column start with 'F' and end with a dot. + + import Standard.Examples + + example_contains = Examples.text_column_1.like "F%." like : Column | Text -> Column - like self other = self.make_binary_op "LIKE" other new_type=SQL_Type.boolean + like self pattern = self.make_binary_op "LIKE" pattern new_type=SQL_Type.boolean + + ## Checks for each element of the column if it is contained within the + provided vector. + + Arguments: + - vector: A vector of elements. The resulting column will contain true at + the positions where the corresponding element of `self` is contained + in `vector`. + + > Example + Check if elements of a column are contained in a provided vector. + + import Standard.Examples + + example_contains = Examples.text_column_1.is_in [1, 2, 5] + is_in self vector = + ## This is slightly hacky - we don't provide operand types as we want to + allow any type to get through and currently we do not have a mapping + from Enso types to SQL types (it may be available in the future). So + we just rely on Nothing resolving to the current column type. That + type may not always match the operands, but the current + implementation uses this type only for two purposes: generated SQL + visualization (so the color will be consistent with the column type + and not the value type - that can be confusing, we probably want to + fix it later) and setting up the query - but at the set up this only + applies to adding nulls - setting any other object does not check the + type at this level anyway. + partitioned = vector.partition .is_nothing + nulls = partitioned.first + non_nulls = partitioned.second + ## Since SQL `NULL IN (NULL)` yields `NULL`, we need to handle this case + separately. So we handle all non-null values using `IS_IN` and then + `OR` that with a null check (if the vector contained any nulls to + begin with). The implementation also ensures that even + `NULL IN (...)` is coalesced to False, so that negation works as + expected. + is_in_not_null = self.make_op "IS_IN" operands=non_nulls new_type=SQL_Type.boolean + case nulls.not_empty of + True -> is_in_not_null || self.is_missing + False -> is_in_not_null ## PRIVATE as_internal : Internal_Column diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso index 8405041367..04d2876f37 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Base_Generator.enso @@ -11,7 +11,7 @@ import project.Internal.IR.Nulls_Order.Nulls_Order import project.Internal.IR.Query.Query from project.Data.SQL import code -from project.Errors import Unsupported_Database_Operation_Error +from project.Errors import Unsupported_Database_Operation_Error_Data type Internal_Dialect @@ -169,14 +169,15 @@ base_dialect = unary = name -> [name, make_unary_op name] fun = name -> [name, make_function name] - arith = [bin "+", bin "-", bin "*", bin "/"] + arith = [bin "+", bin "-", bin "*", bin "/", bin "%"] logic = [bin "AND", bin "OR", unary "NOT"] compare = [bin "=", bin "!=", bin "<", bin ">", bin "<=", bin ">=", ["BETWEEN", make_between]] agg = [fun "MAX", fun "MIN", fun "AVG", fun "SUM"] counts = [fun "COUNT", ["COUNT_ROWS", make_constant "COUNT(*)"]] - text = [["ISEMPTY", make_is_empty], bin "LIKE"] - nulls = [["ISNULL", make_right_unary_op "IS NULL"], ["FILLNULL", make_function "COALESCE"]] - base_map = Map.from_vector (arith + logic + compare + agg + counts + text + nulls) + text = [["IS_EMPTY", make_is_empty], bin "LIKE"] + nulls = [["IS_NULL", make_right_unary_op "IS NULL"], ["FILL_NULL", make_function "COALESCE"]] + contains = [["IS_IN", make_is_in]] + base_map = Map.from_vector (arith + logic + compare + agg + counts + text + nulls + contains) Internal_Dialect.Value base_map wrap_in_quotes ## PRIVATE @@ -188,7 +189,7 @@ make_is_empty arguments = case arguments.length of is_empty = (arg ++ " = ''").paren (is_null ++ " OR " ++ is_empty).paren _ -> - Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation ISEMPTY") + Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation IS_EMPTY") ## PRIVATE make_between : Vector Builder -> Builder @@ -201,6 +202,21 @@ make_between arguments = case arguments.length of _ -> Error.throw <| Illegal_State_Error_Data ("Invalid amount of arguments for operation BETWEEN") +## PRIVATE +make_is_in : Vector Builder -> Builder +make_is_in arguments = case arguments.length of + 0 -> Error.throw <| Illegal_State_Error_Data ("The operation IS_IN requires at least one argument.") + ## If only the self argument is provided, no value will ever be in the empty list, so we just short circuit to false. + `IN ()` would be more meaningful, but it is a syntax error. + 1 -> code '2=1' . paren + _ -> + expr = arguments.first + list = arguments.tail + is_in = expr ++ " IN (" ++ (SQL.join ", " list) ++ ")" + ## We ensure that even `NULL IN (...)` is coalesced to False, so that + negation will work as expected. + code "COALESCE(" ++ is_in ++ ", 2=1)" + ## PRIVATE Builds code for an expression. @@ -214,7 +230,7 @@ generate_expression dialect expr = case expr of dialect.wrap_identifier origin ++ '.' ++ dialect.wrap_identifier name Expression.Constant sql_type value -> SQL.interpolation sql_type value Expression.Operation kind arguments -> - op = dialect.operation_map.get_or_else kind (Error.throw <| Unsupported_Database_Operation_Error kind) + op = dialect.operation_map.get_or_else kind (Error.throw <| Unsupported_Database_Operation_Error_Data kind) parsed_args = arguments.map (generate_expression dialect) op parsed_args _ : Order_Descriptor -> generate_order dialect expr @@ -337,7 +353,7 @@ generate_query dialect query = case query of code "SELECT * " ++ generate_select_context dialect ctx Query.Insert table_name pairs -> generate_insert_query dialect table_name pairs - _ -> Error.throw <| Unsupported_Database_Operation_Error "Unsupported query type." + _ -> Error.throw <| Unsupported_Database_Operation_Error_Data "Unsupported query type." ## PRIVATE Arguments: diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso index 5b95026064..719b15d4c8 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Dialect.enso @@ -13,7 +13,7 @@ import project.Internal.IR.Order_Descriptor.Order_Descriptor import project.Internal.IR.Nulls_Order.Nulls_Order from project.Data.SQL import code -from project.Errors import Unsupported_Database_Operation_Error +from project.Errors import Unsupported_Database_Operation_Error_Data ## PRIVATE @@ -232,7 +232,7 @@ make_order_descriptor internal_column sort_direction text_ordering = case internal_column.sql_type.is_likely_text of True -> ## In the future we can modify this error to suggest using a custom defined collation. - if text_ordering.sort_digits_as_numbers then Error.throw (Unsupported_Database_Operation_Error "Natural ordering is currently not supported. You may need to materialize the Table to perform this operation.") else + if text_ordering.sort_digits_as_numbers then Error.throw (Unsupported_Database_Operation_Error_Data "Natural ordering is currently not supported. You may need to materialize the Table to perform this operation.") else case text_ordering.case_sensitivity of Nothing -> Order_Descriptor.Value internal_column.expression sort_direction nulls_order=nulls collation=Nothing @@ -240,7 +240,7 @@ make_order_descriptor internal_column sort_direction text_ordering = Order_Descriptor.Value internal_column.expression sort_direction nulls_order=nulls collation="ucs_basic" Case_Sensitivity.Insensitive locale -> case locale == Locale.default of False -> - Error.throw (Unsupported_Database_Operation_Error "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.") + Error.throw (Unsupported_Database_Operation_Error_Data "Case insensitive ordering with custom locale is currently not supported. You may need to materialize the Table to perform this operation.") True -> upper = Expression.Operation "UPPER" [internal_column.expression] folded_expression = Expression.Operation "LOWER" [upper] diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index 22a30d1a35..ed46acc275 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -374,6 +374,32 @@ type Column / : Column | Any -> Column / self other = run_vectorized_binary_op self '/' (/) other + ## Element-wise modulus. + + Arguments: + - other: The value to modulo `self` against. If `other` is a column, the + modulus is performed pairwise between corresponding elements of `self` + and `other`. + + Returns a column with results of modulus this column's elements against + `other`. + + > Example + Modulus of two columns against each other. + + import Standard.Examples + + example_mod = Examples.integer_column % Examples.decimal_column + + > Example + Modulus of a column with a number. + + import Standard.Examples + + example_mod = Examples.integer_column % 3 + % : Column | Any -> Column + % self other = run_vectorized_binary_op self '%' (%) other + ## ALIAS AND Columns Element-wise boolean conjunction. @@ -585,11 +611,41 @@ type Column contains self other = run_vectorized_binary_op self "contains" (a -> b -> a.contains b) other - ## PRIVATE - Checks for each element of the column if it matches an SQL-like pattern. + ## Checks for each element of the column if it matches an SQL-like pattern. + + Arguments: + - pattern: The pattern to match `self` against. If it is a column, the + operation is performed pairwise between corresponding elements of + `self` and that column. The pattern is an SQL-like pattern, where + `%` matches any sequence of characters and `_` matches any single + character. + + > Example + Check if elements of a column start with 'F' and end with a dot. + + import Standard.Examples + + example_contains = Examples.text_column_1.like "F%." like : Column | Text -> Column - like self other = - run_vectorized_binary_op self "like" (_ -> _ -> Error.throw (Illegal_State_Error "The `Like` operation should only be used on Text columns.")) other + like self pattern = + run_vectorized_binary_op self "like" (_ -> _ -> Error.throw (Illegal_State_Error "The `Like` operation should only be used on Text columns.")) pattern + + ## Checks for each element of the column if it is contained within the + provided vector. + + Arguments: + - vector: A vector of elements. The resulting column will contain true at + the positions where the corresponding element of `self` is contained + in `vector`. + + > Example + Check if elements of a column are contained in a provided vector. + + import Standard.Examples + + example_contains = Examples.text_column_1.is_in [1, 2, 5] + is_in self vector = + run_vectorized_binary_op self "is_in" (elem -> vector -> vector.contains elem) vector skip_nulls=False ## ALIAS Transform Column @@ -1137,18 +1193,22 @@ type Empty_Error - name: The name of the vectorized operation. - fallback_fn: A function used if the vectorized operation isn't available. - operand: The operand to apply to the function after `column`. + - skip_nulls: Specifies if nulls should be skipped. If set to `True`, a null + value results in null without passing it to the function. If set to + `False`, the null values are passed as any other value and can have custom + handling logic. run_vectorized_binary_op : Column -> Text -> (Any -> Any) -> Any -> Column -run_vectorized_binary_op column name fallback_fn operand = case operand of +run_vectorized_binary_op column name fallback_fn operand skip_nulls=True = case operand of Column.Column_Data col2 -> s1 = column.java_column.getStorage ix = column.java_column.getIndex s2 = col2.getStorage - rs = s1.zip name fallback_fn s2 True + rs = s1.zip name fallback_fn s2 skip_nulls Column.Column_Data (Java_Column.new "Result" ix rs) _ -> s1 = column.java_column.getStorage ix = column.java_column.getIndex - rs = s1.bimap name fallback_fn operand + rs = s1.bimap name fallback_fn operand skip_nulls Column.Column_Data (Java_Column.new "Result" ix rs) ## PRIVATE diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 4abffe477a..6ca21a5698 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -7,6 +7,7 @@ import Standard.Base.Data.Text.Case import Standard.Base.System.Platform import project.Data.Column.Column +from project.Data.Column import get_item_string import project.Data.Column_Name_Mapping.Column_Name_Mapping import project.Data.Column_Selector.Column_Selector import project.Data.Data_Formatter.Data_Formatter diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso index 24deeea8c7..21c376b550 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Filter_Condition_Helpers.enso @@ -56,6 +56,9 @@ make_filter_column source_column filter_condition = case filter_condition of Value_Type.expect_text source_column.value_type <| expect_column_or_value_as_text "pattern" pattern <| source_column.like pattern . not + # Vector + Is_In values -> source_column.is_in values + Not_In values -> source_column.is_in values . not ## PRIVATE expect_column_or_value_as_text field_name column_or_value ~action = case column_or_value of diff --git a/std-bits/base/src/main/java/org/enso/base/polyglot/NumericConverter.java b/std-bits/base/src/main/java/org/enso/base/polyglot/NumericConverter.java new file mode 100644 index 0000000000..d1af2066aa --- /dev/null +++ b/std-bits/base/src/main/java/org/enso/base/polyglot/NumericConverter.java @@ -0,0 +1,108 @@ +package org.enso.base.polyglot; + +import java.math.BigDecimal; + +/** + * The numeric converter deals with conversions of Java numeric types to the two main types + * supported by Enso - Long for integers and Double for decimals. Any other types are coerced to one + * of these types. + * + *

It provides two concepts - coercion - which allows to coerce an integer type to a decimal, but + * will not convert a decimal to an integer even if it has 0 fractional part. Then there is + * conversion which allows to convert a decimal with 0 fractional part to an integer. Conversion + * should be used when we care about the original type of the object (i.e. we want any decimals to + * require decimal storage even if they have 0 fractional part). Conversion is to be used when we + * want to be consistent with Enso's equality semantics where 2 == 2.0. + */ +public class NumericConverter { + /** + * Coerces a number (possibly an integer) to a Double. + * + *

Will throw an exception if the object is not a number. + */ + public static double coerceToDouble(Object o) { + return switch (o) { + case Double x -> x; + case BigDecimal x -> x.doubleValue(); + case Float x -> x.doubleValue(); + default -> (double) coerceToLong(o); + }; + } + + /** + * Coerces a number to an Integer. + * + *

Will throw an exception if the object is not an integer. + * + *

Decimal values are not accepted. + */ + public static long coerceToLong(Object o) { + return switch (o) { + case Long x -> x; + case Integer x -> x.longValue(); + case Short x -> x.longValue(); + case Byte x -> x.longValue(); + default -> throw new UnsupportedOperationException(); + }; + } + + /** Returns true if the object is any supported number. */ + public static boolean isCoercibleToDouble(Object o) { + return o instanceof Double + || o instanceof BigDecimal + || o instanceof Float + || isCoercibleToLong(o); + } + + /** + * Returns true if the object is any supported integer. + * + *

Returns false for decimals with 0 fractional part - the type itself must be an integer type. + */ + public static boolean isCoercibleToLong(Object o) { + return o instanceof Long || o instanceof Integer || o instanceof Short || o instanceof Byte; + } + + /** + * Tries converting the value to a Double. + * + *

It will return null if the object represented a non-numeric value. + */ + public static Double tryConvertingToDouble(Object o) { + return switch (o) { + case Double x -> x; + case BigDecimal x -> x.doubleValue(); + case Float x -> x.doubleValue(); + case Long x -> x.doubleValue(); + case Integer x -> x.doubleValue(); + case Short x -> x.doubleValue(); + case Byte x -> x.doubleValue(); + case null, default -> null; + }; + } + + /** + * Tries converting the value to a Long. + * + *

Decimal number types are accepted, only if their fractional part is 0. It will return null + * if the object represented a non-integer value. + */ + public static Long tryConvertingToLong(Object o) { + return switch (o) { + case Long x -> x; + case Integer x -> x.longValue(); + case Short x -> x.longValue(); + case Byte x -> x.longValue(); + case Double x -> x % 1.0 == 0.0 ? x.longValue() : null; + case Float x -> x % 1.0f == 0.0f ? x.longValue() : null; + case BigDecimal x -> { + try { + yield x.longValueExact(); + } catch (ArithmeticException e) { + yield null; + } + } + case null, default -> null; + }; + } +} diff --git a/std-bits/base/src/main/java/org/enso/base/Polyglot_Utils.java b/std-bits/base/src/main/java/org/enso/base/polyglot/Polyglot_Utils.java similarity index 56% rename from std-bits/base/src/main/java/org/enso/base/Polyglot_Utils.java rename to std-bits/base/src/main/java/org/enso/base/polyglot/Polyglot_Utils.java index 23abd642bd..14d0592ce7 100644 --- a/std-bits/base/src/main/java/org/enso/base/Polyglot_Utils.java +++ b/std-bits/base/src/main/java/org/enso/base/polyglot/Polyglot_Utils.java @@ -1,11 +1,14 @@ -package org.enso.base; - -import org.graalvm.polyglot.Value; +package org.enso.base.polyglot; import java.time.LocalDate; import java.time.LocalDateTime; +import org.graalvm.polyglot.Value; public class Polyglot_Utils { + /** + * Converts a polyglot Value ensuring that various date/time types are converted to the correct + * type. + */ public static Object convertPolyglotValue(Value item) { if (item.isDate()) { LocalDate d = item.asDate(); @@ -26,12 +29,14 @@ public class Polyglot_Utils { return item.as(Object.class); } - /** A helper functions for situations where we cannot use the Value conversion directly. - *

- * Mostly happens due to the issue: https://github.com/oracle/graal/issues/4967 - * Once that issue is resolved, we should probably remove this helper. - *

- * In that case we take a generic Object, knowing that the values of interest to us will be passed as Value anyway - so we can check that and fire the conversion if needed. + /** + * A helper functions for situations where we cannot use the Value conversion directly. + * + *

Mostly happens due to the issue: https://github.com/oracle/graal/issues/4967 Once that issue + * is resolved, we should probably remove this helper. + * + *

In that case we take a generic Object, knowing that the values of interest to us will be + * passed as Value anyway - so we can check that and fire the conversion if needed. */ public static Object convertPolyglotValue(Object item) { if (item instanceof Value v) { diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java index 82ca979e37..6b6ddf25a9 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Concatenate.java @@ -8,7 +8,7 @@ import org.enso.table.data.table.problems.UnquotedDelimiter; import java.util.List; public class Concatenate extends Aggregator { - private final Storage storage; + private final Storage storage; private final String separator; private final String prefix; private final String suffix; diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/CountDistinct.java b/std-bits/table/src/main/java/org/enso/table/aggregations/CountDistinct.java index 43bd6d9855..6a6b7351c2 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/CountDistinct.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/CountDistinct.java @@ -15,7 +15,7 @@ import java.util.List; * does count when all items are null. */ public class CountDistinct extends Aggregator { - private final Storage[] storage; + private final Storage[] storage; private final Comparator objectComparator; private final boolean ignoreAllNull; diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/CountEmpty.java b/std-bits/table/src/main/java/org/enso/table/aggregations/CountEmpty.java index 9af37f14a9..79059bc594 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/CountEmpty.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/CountEmpty.java @@ -11,7 +11,7 @@ import java.util.List; * counts null or empty entries. If `isEmpty` is false, counts non-empty entries. */ public class CountEmpty extends Aggregator { - private final Storage storage; + private final Storage storage; private final boolean isEmpty; /** diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/CountNothing.java b/std-bits/table/src/main/java/org/enso/table/aggregations/CountNothing.java index 3995f837e3..7fff60818b 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/CountNothing.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/CountNothing.java @@ -10,7 +10,7 @@ import java.util.List; * counts null entries. If `isNothing` is false, counts non-null entries. */ public class CountNothing extends Aggregator { - private final Storage storage; + private final Storage storage; private final boolean isNothing; /** diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/First.java b/std-bits/table/src/main/java/org/enso/table/aggregations/First.java index dcb6db4660..7e564545b0 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/First.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/First.java @@ -10,8 +10,8 @@ import java.util.List; /** Aggregate Column finding the first value in a group. */ public class First extends Aggregator { - private final Storage storage; - private final Storage[] orderByColumns; + private final Storage storage; + private final Storage[] orderByColumns; private final int[] orderByDirections; private final Comparator objectComparator; private final boolean ignoreNothing; diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/GroupBy.java b/std-bits/table/src/main/java/org/enso/table/aggregations/GroupBy.java index a9342d7766..1e1e05ff37 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/GroupBy.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/GroupBy.java @@ -7,7 +7,7 @@ import java.util.List; /** Aggregate Column getting the grouping key. */ public class GroupBy extends Aggregator { - private final Storage storage; + private final Storage storage; public GroupBy(String name, Column column) { super(name, column.getStorage().getType()); diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Last.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Last.java index 02843e4a39..d259c3fa5d 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/Last.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Last.java @@ -9,8 +9,8 @@ import java.util.Comparator; import java.util.List; public class Last extends Aggregator { - private final Storage storage; - private final Storage[] orderByColumns; + private final Storage storage; + private final Storage[] orderByColumns; private final int[] orderByDirections; private final Comparator objectComparator; private final boolean ignoreNothing; diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java index 8f5052a5e2..bf101a0dd7 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java @@ -18,7 +18,7 @@ public class Mean extends Aggregator { } } - private final Storage storage; + private final Storage storage; public Mean(String name, Column column) { super(name, Storage.Type.DOUBLE); diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/MinOrMax.java b/std-bits/table/src/main/java/org/enso/table/aggregations/MinOrMax.java index 761c7b8219..2b99a7803f 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/MinOrMax.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/MinOrMax.java @@ -11,7 +11,7 @@ import java.util.List; * Aggregate Column finding the minimum (minOrMax = -1) or maximum (minOrMax = 1) entry in a group. */ public class MinOrMax extends Aggregator { - private final Storage storage; + private final Storage storage; private final int minOrMax; private final Comparator objectComparator; diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Mode.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Mode.java index 6414b567db..78725e689d 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/Mode.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Mode.java @@ -10,7 +10,7 @@ import java.util.Map; /** Aggregate Column computing the most common value in a group (ignoring Nothing). */ public class Mode extends Aggregator { - private final Storage storage; + private final Storage storage; public Mode(String name, Column column) { super(name, column.getStorage().getType()); diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java index c8fb79341a..2e6a75ac80 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Percentile.java @@ -11,7 +11,7 @@ import java.util.TreeMap; /** Aggregate Column computing a percentile value in a group. */ public class Percentile extends Aggregator { - private final Storage storage; + private final Storage storage; private final double percentile; public Percentile(String name, Column column, double percentile) { diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java b/std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java index 8830d5a3f5..324400334e 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/ShortestOrLongest.java @@ -9,7 +9,7 @@ import java.util.List; /** Aggregate Column finding the longest or shortest string in a group. */ public class ShortestOrLongest extends Aggregator { - private final Storage storage; + private final Storage storage; private final int minOrMax; public ShortestOrLongest(String name, Column column, int minOrMax) { diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java b/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java index 041815aab5..07543ded5f 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/StandardDeviation.java @@ -20,7 +20,7 @@ public class StandardDeviation extends Aggregator { } } - private final Storage storage; + private final Storage storage; private final boolean population; public StandardDeviation(String name, Column column, boolean population) { diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java index 1a9f5941a5..0a068944a8 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java @@ -8,7 +8,7 @@ import java.util.List; /** Aggregate Column computing the total value in a group. */ public class Sum extends Aggregator { - private final Storage storage; + private final Storage storage; public Sum(String name, Column column) { super(name, Storage.Type.DOUBLE); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java index 01ab8825c5..bf4fd8c37f 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/BoolBuilder.java @@ -62,7 +62,7 @@ public class BoolBuilder extends TypedBuilder { } @Override - public Storage seal() { + public Storage seal() { return new BoolStorage(vals, isNa, size, false); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java index 847a63da59..b22af959ac 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java @@ -36,5 +36,5 @@ public abstract class Builder { public abstract int getCurrentSize(); /** @return a storage containing all the items appended so far */ - public abstract Storage seal(); + public abstract Storage seal(); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateBuilder.java index a7bc719be8..ba07de9e2b 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateBuilder.java @@ -32,7 +32,7 @@ public class DateBuilder extends TypedBuilderImpl { } @Override - public Storage seal() { + public Storage seal() { return new DateStorage(data, currentSize); } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateTimeBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateTimeBuilder.java index 55a2d1198c..5eb6ec5193 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateTimeBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/DateTimeBuilder.java @@ -32,7 +32,7 @@ public class DateTimeBuilder extends TypedBuilderImpl { } @Override - public Storage seal() { + public Storage seal() { return new DateTimeStorage(data, currentSize); } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java index 68b3bce030..1d4f1c3376 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/InferredBuilder.java @@ -1,5 +1,6 @@ package org.enso.table.data.column.builder.object; +import org.enso.base.polyglot.NumericConverter; import org.enso.table.data.column.storage.Storage; import java.math.BigDecimal; @@ -81,10 +82,10 @@ public class InferredBuilder extends Builder { int initialCapacity = Math.max(initialSize, currentSize); if (o instanceof Boolean) { currentBuilder = new BoolBuilder(); - } else if (o instanceof Double || o instanceof BigDecimal) { - currentBuilder = NumericBuilder.createDoubleBuilder(initialCapacity); - } else if (o instanceof Long) { + } else if (NumericConverter.isCoercibleToLong(o)) { currentBuilder = NumericBuilder.createLongBuilder(initialCapacity); + } else if (NumericConverter.isCoercibleToDouble(o)) { + currentBuilder = NumericBuilder.createDoubleBuilder(initialCapacity); } else if (o instanceof LocalDate) { currentBuilder = new DateBuilder(initialCapacity); } else if (o instanceof LocalTime) { @@ -106,11 +107,15 @@ public class InferredBuilder extends Builder { new RetypeInfo(Boolean.class, Storage.Type.BOOL), new RetypeInfo(Long.class, Storage.Type.LONG), new RetypeInfo(Double.class, Storage.Type.DOUBLE), + new RetypeInfo(String.class, Storage.Type.STRING), new RetypeInfo(BigDecimal.class, Storage.Type.DOUBLE), new RetypeInfo(LocalDate.class, Storage.Type.DATE), new RetypeInfo(LocalTime.class, Storage.Type.TIME_OF_DAY), new RetypeInfo(ZonedDateTime.class, Storage.Type.DATE_TIME), - new RetypeInfo(String.class, Storage.Type.STRING)); + new RetypeInfo(Float.class, Storage.Type.DOUBLE), + new RetypeInfo(Integer.class, Storage.Type.LONG), + new RetypeInfo(Short.class, Storage.Type.LONG), + new RetypeInfo(Byte.class, Storage.Type.LONG)); private void retypeAndAppend(Object o) { for (RetypeInfo info : retypePairs) { @@ -138,7 +143,7 @@ public class InferredBuilder extends Builder { } @Override - public Storage seal() { + public Storage seal() { if (currentBuilder == null) { initBuilderFor(null); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java index ea9d01ac1c..065a067a65 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/NumericBuilder.java @@ -1,10 +1,10 @@ package org.enso.table.data.column.builder.object; +import org.enso.base.polyglot.NumericConverter; import org.enso.table.data.column.storage.DoubleStorage; import org.enso.table.data.column.storage.LongStorage; import org.enso.table.data.column.storage.Storage; -import java.math.BigDecimal; import java.util.Arrays; import java.util.BitSet; @@ -69,37 +69,20 @@ public class NumericBuilder extends TypedBuilder { if (o == null) { isMissing.set(currentSize++); } else if (isDouble) { - double value = toDouble(o); + double value = NumericConverter.coerceToDouble(o); data[currentSize++] = Double.doubleToRawLongBits(value); } else { - data[currentSize++] = toLong(o); + data[currentSize++] = NumericConverter.coerceToLong(o); } } @Override public boolean accepts(Object o) { - if (isDouble && (o instanceof Double || o instanceof BigDecimal)) { - return true; + if (isDouble) { + return NumericConverter.isCoercibleToDouble(o); + } else { + return NumericConverter.isCoercibleToLong(o); } - - return o instanceof Long || o instanceof Integer || o instanceof Byte; - } - - private static double toDouble(Object o) { - return switch (o) { - case Double x -> x; - case BigDecimal x -> x.doubleValue(); - default -> (double) toLong(o); - }; - } - - private static long toLong(Object o) { - return switch (o) { - case Long x -> x; - case Integer x -> x.longValue(); - case Byte x -> x.longValue(); - default -> throw new UnsupportedOperationException(); - }; } @Override @@ -159,7 +142,7 @@ public class NumericBuilder extends TypedBuilder { } @Override - public Storage seal() { + public Storage seal() { if (isDouble) { return new DoubleStorage(data, currentSize, isMissing); } else { diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java index 5a90814168..32eaa4b438 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/ObjectBuilder.java @@ -67,7 +67,7 @@ public class ObjectBuilder extends TypedBuilder { } @Override - public Storage seal() { + public Storage seal() { return new ObjectStorage(data, currentSize); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java index fa2b4752c6..e359f922ef 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/StringBuilder.java @@ -30,7 +30,7 @@ public class StringBuilder extends TypedBuilderImpl { } @Override - public Storage seal() { + public Storage seal() { return new StringStorage(data, currentSize); } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/TimeOfDayBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/TimeOfDayBuilder.java index e0c6731511..f3e847eacb 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/TimeOfDayBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/TimeOfDayBuilder.java @@ -32,7 +32,7 @@ public class TimeOfDayBuilder extends TypedBuilderImpl { } @Override - public Storage seal() { + public Storage seal() { return new TimeOfDayStorage(data, currentSize); } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/PrimInferredStorageBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/PrimInferredStorageBuilder.java index ab0c2714a0..51088c1073 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/PrimInferredStorageBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/PrimInferredStorageBuilder.java @@ -109,7 +109,7 @@ public class PrimInferredStorageBuilder extends StorageBuilder { /** @inheritDoc */ @Override - public Storage seal() { + public Storage seal() { if (type == Type.LONG) { return new LongStorage(data, size, isMissing); } else { diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StorageBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StorageBuilder.java index 09fa48ba86..54175a9c1c 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StorageBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StorageBuilder.java @@ -19,5 +19,5 @@ public abstract class StorageBuilder { * * @return the storage resulting from this builder's operation. */ - public abstract Storage seal(); + public abstract Storage seal(); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StringStorageBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StringStorageBuilder.java index 6ca0eca04e..2f072e68f5 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StringStorageBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/string/StringStorageBuilder.java @@ -1,5 +1,6 @@ package org.enso.table.data.column.builder.string; +import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.StringStorage; /** A column builder appending all the values passed to it in an unchanged form. */ @@ -44,7 +45,7 @@ public class StringStorageBuilder extends StorageBuilder { /** @inheritDoc */ @Override - public StringStorage seal() { + public Storage seal() { return new StringStorage(data, size); } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/Aggregator.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/Aggregator.java index 748d15724b..86d817f27c 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/Aggregator.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/Aggregator.java @@ -24,5 +24,5 @@ public abstract class Aggregator { * * @return the storage containing all aggregation results. */ - public abstract Storage seal(); + public abstract Storage seal(); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/CountAggregator.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/CountAggregator.java index 044f6e0ef6..391dbac5a2 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/CountAggregator.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/CountAggregator.java @@ -7,7 +7,7 @@ import java.util.stream.IntStream; /** Aggregates a storage by counting the non-missing values in each group. */ public class CountAggregator extends Aggregator { - private final Storage storage; + private final Storage storage; private final long[] counts; private int position = 0; @@ -16,7 +16,7 @@ public class CountAggregator extends Aggregator { * @param resultSize the exact number of times {@link Aggregator#nextGroup(IntStream)} will be * called. */ - public CountAggregator(Storage storage, int resultSize) { + public CountAggregator(Storage storage, int resultSize) { this.storage = storage; this.counts = new long[resultSize]; } @@ -27,7 +27,7 @@ public class CountAggregator extends Aggregator { } @Override - public Storage seal() { + public Storage seal() { return new LongStorage(counts); } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java index 048bdeba73..411f0edaa6 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/FunctionAggregator.java @@ -1,6 +1,6 @@ package org.enso.table.data.column.operation.aggregate; -import org.enso.base.Polyglot_Utils; +import org.enso.base.polyglot.Polyglot_Utils; import org.enso.table.data.column.builder.object.InferredBuilder; import org.enso.table.data.column.storage.Storage; import org.graalvm.polyglot.Value; @@ -16,7 +16,7 @@ import java.util.stream.Stream; public class FunctionAggregator extends Aggregator { private final Function, Value> aggregateFunction; private final boolean skipNa; - private final Storage storage; + private final Storage storage; private final InferredBuilder builder; /** @@ -27,7 +27,7 @@ public class FunctionAggregator extends Aggregator { */ public FunctionAggregator( Function, Value> aggregateFunction, - Storage storage, + Storage storage, boolean skipNa, int resultSize) { this.aggregateFunction = aggregateFunction; @@ -53,7 +53,7 @@ public class FunctionAggregator extends Aggregator { } @Override - public Storage seal() { + public Storage seal() { return builder.seal(); } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/LongToLongAggregator.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/LongToLongAggregator.java index 9cbbef8491..ea89191747 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/LongToLongAggregator.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/LongToLongAggregator.java @@ -53,7 +53,7 @@ public abstract class LongToLongAggregator extends Aggregator { } @Override - public Storage seal() { + public Storage seal() { return new LongStorage(items, items.length, missing); } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/NumericAggregator.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/NumericAggregator.java index e7831a3b3b..ed8bec7c01 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/NumericAggregator.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/aggregate/numeric/NumericAggregator.java @@ -15,7 +15,7 @@ import java.util.stream.IntStream; * DoubleStorage}. */ public abstract class NumericAggregator extends Aggregator { - private final NumericStorage storage; + private final NumericStorage storage; private final long[] data; private final BitSet missing; private int position = 0; @@ -24,7 +24,7 @@ public abstract class NumericAggregator extends Aggregator { * @param storage the data source * @param resultSize the number of times {@link Aggregator#nextGroup(IntStream)} will be called */ - public NumericAggregator(NumericStorage storage, int resultSize) { + public NumericAggregator(NumericStorage storage, int resultSize) { this.storage = storage; this.data = new long[resultSize]; this.missing = new BitSet(); @@ -72,7 +72,7 @@ public abstract class NumericAggregator extends Aggregator { } @Override - public Storage seal() { + public Storage seal() { return new DoubleStorage(data, data.length, missing); } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOpStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOpStorage.java index 7e78cbc10e..25448ce312 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOpStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOpStorage.java @@ -8,12 +8,13 @@ import java.util.Map; /** * Stores map-like operations that can be performed on a given type. * - * @param the storage type handled by these operations. + * @param the type of elements stored in the storage + * @param the storage type handled by these operations. */ -public class MapOpStorage { - private final Map> ops = new HashMap<>(); +public class MapOpStorage> { + private final Map> ops = new HashMap<>(); - protected MapOperation getOp(String name) { + protected MapOperation getOp(String name) { return ops.get(name); } @@ -36,7 +37,7 @@ public class MapOpStorage { * @param arg the argument to pass to the operation * @return the result of running the operation */ - public Storage runMap(String n, T storage, Object arg) { + public Storage runMap(String n, S storage, Object arg) { return ops.get(n).runMap(storage, arg); } @@ -49,7 +50,7 @@ public class MapOpStorage { * @param arg the storage containing operation arguments * @return the result of running the operation */ - public Storage runZip(String n, T storage, Storage arg) { + public Storage runZip(String n, S storage, Storage arg) { return ops.get(n).runZip(storage, arg); } @@ -59,7 +60,7 @@ public class MapOpStorage { * @param op the operation to add * @return this operation set */ - public MapOpStorage add(MapOperation op) { + public MapOpStorage add(MapOperation op) { ops.put(op.getName(), op); return this; } @@ -68,23 +69,23 @@ public class MapOpStorage { * Creates a child set, containing all the operations defined in this, that can be extended * independently. * - * @param the desired result type + * @param the desired result type * @return a child of this storage */ - public MapOpStorage makeChild() { + public MapOpStorage makeChild() { return new ChildStorage<>(this); } - private static class ChildStorage extends MapOpStorage { - private final MapOpStorage parent; + private static class ChildStorage> extends MapOpStorage { + private final MapOpStorage parent; - private ChildStorage(MapOpStorage parent) { + private ChildStorage(MapOpStorage parent) { this.parent = parent; } @Override - protected MapOperation getOp(String name) { - MapOperation local = super.getOp(name); + protected MapOperation getOp(String name) { + MapOperation local = super.getOp(name); if (local == null) return parent.getOp(name); return local; } @@ -95,12 +96,12 @@ public class MapOpStorage { } @Override - public Storage runMap(String n, T storage, Object arg) { + public Storage runMap(String n, S storage, Object arg) { return getOp(n).runMap(storage, arg); } @Override - public Storage runZip(String n, T storage, Storage arg) { + public Storage runZip(String n, S storage, Storage arg) { return getOp(n).runZip(storage, arg); } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOperation.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOperation.java index 6a42c0005a..67398d1858 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOperation.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/MapOperation.java @@ -7,7 +7,7 @@ import org.enso.table.data.column.storage.Storage; * * @param the supported storage type. */ -public abstract class MapOperation { +public abstract class MapOperation> { private final String name; /** @@ -26,7 +26,7 @@ public abstract class MapOperation { * @param arg the argument passed to the operation * @return the result of running the operation */ - public abstract Storage runMap(I storage, Object arg); + public abstract Storage runMap(I storage, Object arg); /** * Run the operation in zip mode @@ -35,7 +35,7 @@ public abstract class MapOperation { * @param arg the storage providing second arguments to the operation * @return the result of running the operation */ - public abstract Storage runZip(I storage, Storage arg); + public abstract Storage runZip(I storage, Storage arg); /** @return the name of this operation */ public String getName() { diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/SpecializedIsInOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/SpecializedIsInOp.java new file mode 100644 index 0000000000..21b6b7494a --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/SpecializedIsInOp.java @@ -0,0 +1,99 @@ +package org.enso.table.data.column.operation.map; + +import java.util.BitSet; +import java.util.HashSet; +import java.util.List; +import java.util.function.Function; +import org.enso.base.polyglot.Polyglot_Utils; +import org.enso.table.data.column.storage.BoolStorage; +import org.enso.table.data.column.storage.Storage; + +/** + * A specialized implementation for the IS_IN operation for builtin types, relying on hashing. Since + * for some columns we know what types of objects can be stored, we can filter out any objects that + * do not match that type and then rely on a consistent definition of hashcode for these builtin + * types (which is not available in general for custom objects). + */ +public class SpecializedIsInOp> extends MapOperation { + /** + * An optimized representation of the vector of values to match. + * + *

It indicates whether the vector contained a null value and contains a hashmap of the vector + * elements for faster contains checks. + */ + public record CompactRepresentation(HashSet coercedValues, boolean hasNulls) {} + + private final Function, CompactRepresentation> prepareList; + + /** + * Creates a new operation with a given preprocessing function. + * + *

The responsibility of the function is to analyse the list and create a hashmap of relevant + * elements, coerced to a type that is consistent with the storage type of the given column. Any + * elements not fitting the expected type can (and should) be discarded. + * + *

It is important to correctly coerce the types, for example in Enso 2 == 2.0, so if we are + * getting a Long for a DoubleColumn, it should be converted to a Double before adding it to the + * hashmap. Similarly, for LongStorage, non-integer Doubles can be ignored, but Doubles with 0 + * fractional part need to be converted into a Long. These conversions can be achieved with the + * {@code NumericConverter} class. + */ + public static > SpecializedIsInOp make( + Function, CompactRepresentation> prepareList) { + return new SpecializedIsInOp<>(prepareList); + } + + /** + * Creates a new operation which ensures the Enso Date/Time types are correctly coerced. + * + *

It uses the provided {@code storageClass} to only keep the elements that are of the same + * type as expected in the storage. + */ + public static > SpecializedIsInOp makeForTimeColumns(Class storageClass) { + return SpecializedIsInOp.make( + list -> { + HashSet set = new HashSet<>(); + boolean hasNulls = false; + for (Object o : list) { + hasNulls |= o == null; + Object coerced = Polyglot_Utils.convertPolyglotValue(o); + if (storageClass.isInstance(coerced)) { + set.add(storageClass.cast(coerced)); + } + } + return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls); + }); + } + + SpecializedIsInOp(Function, CompactRepresentation> prepareList) { + super(Storage.Maps.IS_IN); + this.prepareList = prepareList; + } + + @Override + public Storage runMap(S storage, Object arg) { + if (arg instanceof List) { + return runMap(storage, (List) arg); + } else { + throw new IllegalArgumentException("Argument to `is_in` must be a vector."); + } + } + + public Storage runMap(S storage, List arg) { + CompactRepresentation compactRepresentation = prepareList.apply(arg); + BitSet newVals = new BitSet(); + for (int i = 0; i < storage.size(); i++) { + if (storage.isNa(i) && compactRepresentation.hasNulls) { + newVals.set(i); + } else if (compactRepresentation.coercedValues.contains(storage.getItemBoxed(i))) { + newVals.set(i); + } + } + return new BoolStorage(newVals, new BitSet(), storage.size(), false); + } + + @Override + public Storage runZip(S storage, Storage arg) { + throw new IllegalStateException("Zip mode is not supported for this operation."); + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/UnaryMapOperation.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/UnaryMapOperation.java index 38e00c2e50..38ac2cbed2 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/UnaryMapOperation.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/UnaryMapOperation.java @@ -7,20 +7,20 @@ import org.enso.table.data.column.storage.Storage; * * @param the supported storage type */ -public abstract class UnaryMapOperation extends MapOperation { +public abstract class UnaryMapOperation> extends MapOperation { public UnaryMapOperation(String name) { super(name); } - protected abstract Storage run(I storage); + protected abstract Storage run(I storage); @Override - public Storage runMap(I storage, Object arg) { + public Storage runMap(I storage, Object arg) { return run(storage); } @Override - public Storage runZip(I storage, Storage arg) { + public Storage runZip(I storage, Storage arg) { return run(storage); } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/bool/BooleanIsInOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/bool/BooleanIsInOp.java new file mode 100644 index 0000000000..016cf9ec30 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/bool/BooleanIsInOp.java @@ -0,0 +1,89 @@ +package org.enso.table.data.column.operation.map.bool; + +import java.util.BitSet; +import java.util.List; + +import org.enso.table.data.column.operation.map.MapOperation; +import org.enso.table.data.column.storage.BoolStorage; +import org.enso.table.data.column.storage.Storage; + +/** + * A specialized implementation for the IS_IN operation on booleans - since booleans have just three + * possible values we can have a highly efficient implementation that does not even rely on hashmap + * and after processing the input vector, performs the checks in constant time. + */ +public class BooleanIsInOp extends MapOperation { + public BooleanIsInOp() { + super(Storage.Maps.IS_IN); + } + + @Override + public BoolStorage runMap(BoolStorage storage, Object arg) { + if (arg instanceof List) { + return runMap(storage, (List) arg); + } else { + throw new IllegalArgumentException("Argument to `is_in` must be a vector."); + } + } + + public BoolStorage runMap(BoolStorage storage, List arg) { + boolean hadTrue = false; + boolean hadFalse = false; + boolean hadNull = false; + + for (Object o : arg) { + switch (o) { + case Boolean b -> { + hadTrue |= b; + hadFalse |= !b; + } + case null -> hadNull = true; + default -> {} + } + } + + BitSet newVals; + boolean negated = false; + + if (hadNull && hadTrue && hadFalse) { + // We use empty newVals which has everything set to false and negate it to make all of that set to true with zero cost. + newVals = new BitSet(); + negated = true; + } else if (!hadNull && !hadTrue && !hadFalse) { + // No values are present, so the result is to be false everywhere. + newVals = new BitSet(); + } + else if (hadNull && !hadTrue && !hadFalse) { + // Only missing values are in the set, so we just return the missing indicator. + newVals = storage.getIsMissing(); + } else if (hadTrue && hadFalse) { // && !hadNull + // All non-missing values are in the set - so we just return the negated missing indicator. + newVals = storage.getIsMissing(); + negated = true; + } else { + // hadTrue != hadFalse + newVals = storage.getValues().get(0, storage.size()); + if (hadTrue) { + if (storage.isNegated()) { + newVals.flip(0, storage.size()); + } + } else { // hadFalse + if (!storage.isNegated()) { + newVals.flip(0, storage.size()); + } + } + newVals.andNot(storage.getIsMissing()); + + if (hadNull) { + newVals.or(storage.getIsMissing()); + } + } + + return new BoolStorage(newVals, new BitSet(), storage.size(), negated); + } + + @Override + public Storage runZip(BoolStorage storage, Storage arg) { + throw new IllegalStateException("Zip mode is not supported for this operation."); + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleBooleanOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleBooleanOp.java index f1b370a10d..64d5b95f0b 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleBooleanOp.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleBooleanOp.java @@ -10,7 +10,7 @@ import org.enso.table.error.UnexpectedTypeException; import java.util.BitSet; /** An operation expecting a numeric argument and returning a boolean. */ -public abstract class DoubleBooleanOp extends MapOperation { +public abstract class DoubleBooleanOp extends MapOperation { public DoubleBooleanOp(String name) { super(name); } @@ -59,7 +59,7 @@ public abstract class DoubleBooleanOp extends MapOperation { } @Override - public Storage runZip(DoubleStorage storage, Storage arg) { + public BoolStorage runZip(DoubleStorage storage, Storage arg) { if (arg instanceof DoubleStorage v) { BitSet newVals = new BitSet(); BitSet newMissing = new BitSet(); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleNumericOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleNumericOp.java index b35f959bb7..0968ea143a 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleNumericOp.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/DoubleNumericOp.java @@ -9,7 +9,7 @@ import org.enso.table.error.UnexpectedTypeException; import java.util.BitSet; /** An operation expecting a numeric argument and returning a number. */ -public abstract class DoubleNumericOp extends MapOperation { +public abstract class DoubleNumericOp extends MapOperation { public DoubleNumericOp(String name) { super(name); @@ -18,7 +18,7 @@ public abstract class DoubleNumericOp extends MapOperation { protected abstract double doDouble(double a, double b); @Override - public Storage runMap(DoubleStorage storage, Object arg) { + public Storage runMap(DoubleStorage storage, Object arg) { double x; if (arg instanceof Double) { x = (Double) arg; @@ -37,7 +37,7 @@ public abstract class DoubleNumericOp extends MapOperation { } @Override - public Storage runZip(DoubleStorage storage, Storage arg) { + public Storage runZip(DoubleStorage storage, Storage arg) { if (arg instanceof LongStorage v) { long[] out = new long[storage.size()]; BitSet newMissing = new BitSet(); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongBooleanOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongBooleanOp.java index aa5aca0abd..d96a70ae1a 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongBooleanOp.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongBooleanOp.java @@ -10,7 +10,7 @@ import org.enso.table.error.UnexpectedTypeException; import java.util.BitSet; /** An operation expecting a numeric argument and returning a boolean. */ -public abstract class LongBooleanOp extends MapOperation { +public abstract class LongBooleanOp extends MapOperation { public LongBooleanOp(String name) { super(name); } @@ -61,7 +61,7 @@ public abstract class LongBooleanOp extends MapOperation { } @Override - public Storage runZip(LongStorage storage, Storage arg) { + public BoolStorage runZip(LongStorage storage, Storage arg) { if (arg instanceof DoubleStorage v) { BitSet newVals = new BitSet(); BitSet newMissing = new BitSet(); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongNumericOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongNumericOp.java index 9c45ed6a3a..b0657f4d03 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongNumericOp.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/numeric/LongNumericOp.java @@ -3,13 +3,14 @@ package org.enso.table.data.column.operation.map.numeric; import org.enso.table.data.column.operation.map.MapOperation; import org.enso.table.data.column.storage.DoubleStorage; import org.enso.table.data.column.storage.LongStorage; +import org.enso.table.data.column.storage.NumericStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.error.UnexpectedTypeException; import java.util.BitSet; /** An operation expecting a numeric argument and returning a boolean. */ -public abstract class LongNumericOp extends MapOperation { +public abstract class LongNumericOp extends MapOperation { private final boolean alwaysCast; public LongNumericOp(String name, boolean alwaysCast) { @@ -26,7 +27,7 @@ public abstract class LongNumericOp extends MapOperation { public abstract long doLong(long in, long arg); @Override - public Storage runMap(LongStorage storage, Object arg) { + public NumericStorage runMap(LongStorage storage, Object arg) { if (arg instanceof Long && !alwaysCast) { long x = (Long) arg; long[] newVals = new long[storage.size()]; @@ -50,7 +51,7 @@ public abstract class LongNumericOp extends MapOperation { } @Override - public Storage runZip(LongStorage storage, Storage arg) { + public NumericStorage runZip(LongStorage storage, Storage arg) { if (arg instanceof LongStorage v) { long[] out = new long[storage.size()]; BitSet newMissing = new BitSet(); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/LikeOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/LikeOp.java index 0963d6ab99..3bc0ce1841 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/LikeOp.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/LikeOp.java @@ -35,7 +35,7 @@ public class LikeOp extends StringBooleanOp { } @Override - public Storage runMap(SpecializedStorage storage, Object arg) { + public BoolStorage runMap(SpecializedStorage storage, Object arg) { if (arg == null) { BitSet newVals = new BitSet(); BitSet newMissing = new BitSet(); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/StringBooleanOp.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/StringBooleanOp.java index 156f2dec73..d2274bf277 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/StringBooleanOp.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/StringBooleanOp.java @@ -9,7 +9,7 @@ import org.enso.table.error.UnexpectedTypeException; import java.util.BitSet; -public abstract class StringBooleanOp extends MapOperation> { +public abstract class StringBooleanOp extends MapOperation> { public StringBooleanOp(String name) { super(name); } @@ -21,7 +21,7 @@ public abstract class StringBooleanOp extends MapOperation storage, Object arg) { + public BoolStorage runMap(SpecializedStorage storage, Object arg) { if (arg == null) { BitSet newVals = new BitSet(); BitSet newMissing = new BitSet(); @@ -53,7 +53,7 @@ public abstract class StringBooleanOp extends MapOperation storage, Storage arg) { + public BoolStorage runZip(SpecializedStorage storage, Storage arg) { if (arg instanceof StringStorage v) { BitSet newVals = new BitSet(); BitSet newMissing = new BitSet(); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java index 5ebcac2ce3..9fd5807b02 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java @@ -1,8 +1,11 @@ package org.enso.table.data.column.storage; +import java.util.BitSet; +import java.util.List; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.column.operation.map.MapOperation; import org.enso.table.data.column.operation.map.UnaryMapOperation; +import org.enso.table.data.column.operation.map.bool.BooleanIsInOp; import org.enso.table.data.index.Index; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; @@ -10,12 +13,9 @@ import org.enso.table.error.UnexpectedColumnTypeException; import org.enso.table.error.UnexpectedTypeException; import org.graalvm.polyglot.Value; -import java.util.BitSet; -import java.util.List; - /** A boolean column storage. */ -public class BoolStorage extends Storage { - private static final MapOpStorage ops = buildOps(); +public final class BoolStorage extends Storage { + private static final MapOpStorage ops = buildOps(); private final BitSet values; private final BitSet isMissing; private final int size; @@ -33,7 +33,9 @@ public class BoolStorage extends Storage { return size; } - /** @inheritDoc */ + /** + * @inheritDoc + */ @Override public int countMissing() { return isMissing.cardinality(); @@ -45,10 +47,9 @@ public class BoolStorage extends Storage { } @Override - public Object getItemBoxed(int idx) { + public Boolean getItemBoxed(int idx) { return isMissing.get(idx) ? null : getItem(idx); } - public boolean getItem(long idx) { return negated != values.get((int) idx); } @@ -64,12 +65,12 @@ public class BoolStorage extends Storage { } @Override - protected Storage runVectorizedMap(String name, Object argument) { + protected Storage runVectorizedMap(String name, Object argument) { return ops.runMap(name, this, argument); } @Override - protected Storage runVectorizedZip(String name, Storage argument) { + protected Storage runVectorizedZip(String name, Storage argument) { return ops.runZip(name, this, argument); } @@ -99,7 +100,7 @@ public class BoolStorage extends Storage { } @Override - public Storage fillMissing(Value arg) { + public Storage fillMissing(Value arg) { if (arg.isBoolean()) { return fillMissingBoolean(arg.asBoolean()); } else { @@ -108,7 +109,7 @@ public class BoolStorage extends Storage { } @Override - public Storage mask(BitSet mask, int cardinality) { + public BoolStorage mask(BitSet mask, int cardinality) { BitSet newMissing = new BitSet(); BitSet newValues = new BitSet(); int resultIx = 0; @@ -118,6 +119,10 @@ public class BoolStorage extends Storage { newMissing.set(resultIx++); } else if (values.get(i)) { newValues.set(resultIx++); + } else { + // We don't set any bits, but still increment the counter to indicate that we have just + // 'inserted' a false value. + resultIx++; } } } @@ -125,7 +130,7 @@ public class BoolStorage extends Storage { } @Override - public Storage applyMask(OrderMask mask) { + public BoolStorage applyMask(OrderMask mask) { int[] positions = mask.getPositions(); BitSet newNa = new BitSet(); BitSet newVals = new BitSet(); @@ -140,7 +145,7 @@ public class BoolStorage extends Storage { } @Override - public Storage countMask(int[] counts, int total) { + public BoolStorage countMask(int[] counts, int total) { BitSet newNa = new BitSet(); BitSet newVals = new BitSet(); int pos = 0; @@ -159,12 +164,12 @@ public class BoolStorage extends Storage { return negated; } - private static MapOpStorage buildOps() { - MapOpStorage ops = new MapOpStorage<>(); + private static MapOpStorage buildOps() { + MapOpStorage ops = new MapOpStorage<>(); ops.add( new UnaryMapOperation<>(Maps.NOT) { @Override - protected Storage run(BoolStorage storage) { + protected BoolStorage run(BoolStorage storage) { return new BoolStorage( storage.values, storage.isMissing, storage.size, !storage.negated); } @@ -172,9 +177,9 @@ public class BoolStorage extends Storage { .add( new MapOperation<>(Maps.EQ) { @Override - public Storage runMap(BoolStorage storage, Object arg) { - if (arg instanceof Boolean) { - if ((Boolean) arg) { + public BoolStorage runMap(BoolStorage storage, Object arg) { + if (arg instanceof Boolean v) { + if (v) { return storage; } else { return new BoolStorage( @@ -186,7 +191,7 @@ public class BoolStorage extends Storage { } @Override - public Storage runZip(BoolStorage storage, Storage arg) { + public BoolStorage runZip(BoolStorage storage, Storage arg) { BitSet out = new BitSet(); BitSet missing = new BitSet(); for (int i = 0; i < storage.size; i++) { @@ -204,9 +209,8 @@ public class BoolStorage extends Storage { .add( new MapOperation<>(Maps.AND) { @Override - public Storage runMap(BoolStorage storage, Object arg) { - if (arg instanceof Boolean) { - boolean v = (Boolean) arg; + public BoolStorage runMap(BoolStorage storage, Object arg) { + if (arg instanceof Boolean v) { if (v) { return storage; } else { @@ -218,7 +222,7 @@ public class BoolStorage extends Storage { } @Override - public Storage runZip(BoolStorage storage, Storage arg) { + public BoolStorage runZip(BoolStorage storage, Storage arg) { if (arg instanceof BoolStorage v) { BitSet missing = v.isMissing.get(0, storage.size); missing.or(storage.isMissing); @@ -247,9 +251,8 @@ public class BoolStorage extends Storage { .add( new MapOperation<>(Maps.OR) { @Override - public Storage runMap(BoolStorage storage, Object arg) { - if (arg instanceof Boolean) { - boolean v = (Boolean) arg; + public BoolStorage runMap(BoolStorage storage, Object arg) { + if (arg instanceof Boolean v) { if (v) { return new BoolStorage(new BitSet(), storage.isMissing, storage.size, true); } else { @@ -261,7 +264,7 @@ public class BoolStorage extends Storage { } @Override - public Storage runZip(BoolStorage storage, Storage arg) { + public BoolStorage runZip(BoolStorage storage, Storage arg) { if (arg instanceof BoolStorage v) { BitSet missing = v.isMissing.get(0, storage.size); missing.or(storage.isMissing); @@ -287,7 +290,8 @@ public class BoolStorage extends Storage { throw new UnexpectedColumnTypeException("Boolean"); } } - }); + }) + .add(new BooleanIsInOp()); return ops; } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateStorage.java index 2cd593be8a..b41854f958 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateStorage.java @@ -1,10 +1,10 @@ package org.enso.table.data.column.storage; -import org.enso.table.data.column.operation.map.MapOpStorage; - import java.time.LocalDate; +import org.enso.table.data.column.operation.map.MapOpStorage; +import org.enso.table.data.column.operation.map.SpecializedIsInOp; -public class DateStorage extends SpecializedStorage { +public final class DateStorage extends SpecializedStorage { /** * @param data the underlying data * @param size the number of items stored @@ -13,10 +13,12 @@ public class DateStorage extends SpecializedStorage { super(data, size, ops); } - private static final MapOpStorage> ops = buildOps(); + private static final MapOpStorage> ops = buildOps(); - private static MapOpStorage> buildOps() { - return ObjectStorage.buildObjectOps(); + private static MapOpStorage> buildOps() { + MapOpStorage> t = ObjectStorage.buildObjectOps(); + t.add(SpecializedIsInOp.makeForTimeColumns(LocalDate.class)); + return t; } @Override diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateTimeStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateTimeStorage.java index 4dbd5e922a..620e66b20b 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateTimeStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateTimeStorage.java @@ -1,10 +1,11 @@ package org.enso.table.data.column.storage; import org.enso.table.data.column.operation.map.MapOpStorage; +import org.enso.table.data.column.operation.map.SpecializedIsInOp; import java.time.ZonedDateTime; -public class DateTimeStorage extends SpecializedStorage { +public final class DateTimeStorage extends SpecializedStorage { /** * @param data the underlying data * @param size the number of items stored @@ -13,10 +14,14 @@ public class DateTimeStorage extends SpecializedStorage { super(data, size, ops); } - private static final MapOpStorage> ops = buildOps(); + private static final MapOpStorage> ops = + buildOps(); - private static MapOpStorage> buildOps() { - return ObjectStorage.buildObjectOps(); + private static MapOpStorage> buildOps() { + MapOpStorage> t = + ObjectStorage.buildObjectOps(); + t.add(SpecializedIsInOp.makeForTimeColumns(ZonedDateTime.class)); + return t; } @Override diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java index c6ee450328..9b5ace27b1 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java @@ -1,7 +1,12 @@ package org.enso.table.data.column.storage; +import java.util.BitSet; +import java.util.HashSet; +import java.util.List; +import org.enso.base.polyglot.NumericConverter; import org.enso.table.data.column.builder.object.NumericBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; +import org.enso.table.data.column.operation.map.SpecializedIsInOp; import org.enso.table.data.column.operation.map.UnaryMapOperation; import org.enso.table.data.column.operation.map.numeric.DoubleBooleanOp; import org.enso.table.data.column.operation.map.numeric.DoubleNumericOp; @@ -10,15 +15,12 @@ import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; import org.graalvm.polyglot.Value; -import java.util.BitSet; -import java.util.List; - /** A column containing floating point numbers. */ -public class DoubleStorage extends NumericStorage { +public final class DoubleStorage extends NumericStorage { private final long[] data; private final BitSet isMissing; private final int size; - private static final MapOpStorage ops = buildOps(); + private static final MapOpStorage ops = buildOps(); /** * @param data the underlying data @@ -58,7 +60,7 @@ public class DoubleStorage extends NumericStorage { } @Override - public Object getItemBoxed(int idx) { + public Double getItemBoxed(int idx) { return isMissing.get(idx) ? null : Double.longBitsToDouble(data[idx]); } @@ -80,16 +82,16 @@ public class DoubleStorage extends NumericStorage { } @Override - protected Storage runVectorizedMap(String name, Object argument) { + protected Storage runVectorizedMap(String name, Object argument) { return ops.runMap(name, this, argument); } @Override - protected Storage runVectorizedZip(String name, Storage argument) { + protected Storage runVectorizedZip(String name, Storage argument) { return ops.runZip(name, this, argument); } - private Storage fillMissingDouble(double arg) { + private Storage fillMissingDouble(double arg) { final var builder = NumericBuilder.createDoubleBuilder(size()); long rawArg = Double.doubleToRawLongBits(arg); for (int i = 0; i < size(); i++) { @@ -103,7 +105,7 @@ public class DoubleStorage extends NumericStorage { } @Override - public Storage fillMissing(Value arg) { + public Storage fillMissing(Value arg) { if (arg.isNumber()) { if (arg.fitsInLong()) { return fillMissingDouble(arg.asLong()); @@ -116,7 +118,7 @@ public class DoubleStorage extends NumericStorage { } @Override - public DoubleStorage mask(BitSet mask, int cardinality) { + public Storage mask(BitSet mask, int cardinality) { BitSet newMissing = new BitSet(); long[] newData = new long[cardinality]; int resIx = 0; @@ -133,7 +135,7 @@ public class DoubleStorage extends NumericStorage { } @Override - public Storage applyMask(OrderMask mask) { + public Storage applyMask(OrderMask mask) { int[] positions = mask.getPositions(); long[] newData = new long[positions.length]; BitSet newMissing = new BitSet(); @@ -148,7 +150,7 @@ public class DoubleStorage extends NumericStorage { } @Override - public Storage countMask(int[] counts, int total) { + public Storage countMask(int[] counts, int total) { long[] newData = new long[total]; BitSet newMissing = new BitSet(); int pos = 0; @@ -169,8 +171,8 @@ public class DoubleStorage extends NumericStorage { return isMissing; } - private static MapOpStorage buildOps() { - MapOpStorage ops = new MapOpStorage<>(); + private static MapOpStorage buildOps() { + MapOpStorage ops = new MapOpStorage<>(); ops.add( new DoubleNumericOp(Maps.ADD) { @Override @@ -249,15 +251,29 @@ public class DoubleStorage extends NumericStorage { .add( new UnaryMapOperation<>(Maps.IS_MISSING) { @Override - public Storage run(DoubleStorage storage) { + public BoolStorage run(DoubleStorage storage) { return new BoolStorage(storage.isMissing, new BitSet(), storage.size, false); } - }); + }) + .add( + SpecializedIsInOp.make( + list -> { + HashSet set = new HashSet<>(); + boolean hasNulls = false; + for (Object o : list) { + hasNulls |= o == null; + Double x = NumericConverter.tryConvertingToDouble(o); + if (x != null) { + set.add(x); + } + } + return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls); + })); return ops; } @Override - public DoubleStorage slice(int offset, int limit) { + public Storage slice(int offset, int limit) { int newSize = Math.min(size - offset, limit); long[] newData = new long[newSize]; System.arraycopy(data, offset, newData, 0, newSize); @@ -266,7 +282,7 @@ public class DoubleStorage extends NumericStorage { } @Override - public DoubleStorage slice(List ranges) { + public Storage slice(List ranges) { int newSize = SliceRange.totalLength(ranges); long[] newData = new long[newSize]; BitSet newMissing = new BitSet(newSize); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java index 802341342c..7bd9b22c7d 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java @@ -1,9 +1,16 @@ package org.enso.table.data.column.storage; +import java.util.BitSet; +import java.util.HashSet; +import java.util.List; +import java.util.OptionalLong; +import java.util.stream.LongStream; +import org.enso.base.polyglot.NumericConverter; import org.enso.table.data.column.builder.object.NumericBuilder; import org.enso.table.data.column.operation.aggregate.Aggregator; import org.enso.table.data.column.operation.aggregate.numeric.LongToLongAggregator; import org.enso.table.data.column.operation.map.MapOpStorage; +import org.enso.table.data.column.operation.map.SpecializedIsInOp; import org.enso.table.data.column.operation.map.UnaryMapOperation; import org.enso.table.data.column.operation.map.numeric.LongBooleanOp; import org.enso.table.data.column.operation.map.numeric.LongNumericOp; @@ -12,17 +19,12 @@ import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; import org.graalvm.polyglot.Value; -import java.util.BitSet; -import java.util.List; -import java.util.OptionalLong; -import java.util.stream.LongStream; - /** A column storing 64-bit integers. */ -public class LongStorage extends NumericStorage { +public final class LongStorage extends NumericStorage { private final long[] data; private final BitSet isMissing; private final int size; - private static final MapOpStorage ops = buildOps(); + private static final MapOpStorage ops = buildOps(); /** * @param data the underlying data @@ -40,13 +42,17 @@ public class LongStorage extends NumericStorage { this(data, data.length, new BitSet()); } - /** @inheritDoc */ + /** + * @inheritDoc + */ @Override public int size() { return size; } - /** @inheritDoc */ + /** + * @inheritDoc + */ @Override public int countMissing() { return isMissing.cardinality(); @@ -66,17 +72,21 @@ public class LongStorage extends NumericStorage { } @Override - public Object getItemBoxed(int idx) { + public Long getItemBoxed(int idx) { return isMissing.get(idx) ? null : data[idx]; } - /** @inheritDoc */ + /** + * @inheritDoc + */ @Override public int getType() { return Type.LONG; } - /** @inheritDoc */ + /** + * @inheritDoc + */ @Override public boolean isNa(long idx) { return isMissing.get((int) idx); @@ -88,12 +98,12 @@ public class LongStorage extends NumericStorage { } @Override - protected Storage runVectorizedMap(String name, Object argument) { + protected Storage runVectorizedMap(String name, Object argument) { return ops.runMap(name, this, argument); } @Override - protected Storage runVectorizedZip(String name, Storage argument) { + protected Storage runVectorizedZip(String name, Storage argument) { return ops.runZip(name, this, argument); } @@ -137,7 +147,7 @@ public class LongStorage extends NumericStorage { }; } - private Storage fillMissingDouble(double arg) { + private Storage fillMissingDouble(double arg) { final var builder = NumericBuilder.createDoubleBuilder(size()); long rawArg = Double.doubleToRawLongBits(arg); for (int i = 0; i < size(); i++) { @@ -151,7 +161,7 @@ public class LongStorage extends NumericStorage { return builder.seal(); } - private Storage fillMissingLong(long arg) { + private Storage fillMissingLong(long arg) { final var builder = NumericBuilder.createLongBuilder(size()); for (int i = 0; i < size(); i++) { if (isMissing.get(i)) { @@ -164,7 +174,7 @@ public class LongStorage extends NumericStorage { } @Override - public Storage fillMissing(Value arg) { + public Storage fillMissing(Value arg) { if (arg.isNumber()) { if (arg.fitsInLong()) { return fillMissingLong(arg.asLong()); @@ -177,7 +187,7 @@ public class LongStorage extends NumericStorage { } @Override - public LongStorage mask(BitSet mask, int cardinality) { + public Storage mask(BitSet mask, int cardinality) { BitSet newMissing = new BitSet(); long[] newData = new long[cardinality]; int resIx = 0; @@ -194,7 +204,7 @@ public class LongStorage extends NumericStorage { } @Override - public Storage applyMask(OrderMask mask) { + public Storage applyMask(OrderMask mask) { int[] positions = mask.getPositions(); long[] newData = new long[positions.length]; BitSet newMissing = new BitSet(); @@ -209,7 +219,7 @@ public class LongStorage extends NumericStorage { } @Override - public Storage countMask(int[] counts, int total) { + public Storage countMask(int[] counts, int total) { long[] newData = new long[total]; BitSet newMissing = new BitSet(); int pos = 0; @@ -230,8 +240,8 @@ public class LongStorage extends NumericStorage { return isMissing; } - private static MapOpStorage buildOps() { - MapOpStorage ops = new MapOpStorage<>(); + private static MapOpStorage buildOps() { + MapOpStorage ops = new MapOpStorage<>(); ops.add( new LongNumericOp(Maps.ADD) { @Override @@ -360,10 +370,24 @@ public class LongStorage extends NumericStorage { .add( new UnaryMapOperation<>(Maps.IS_MISSING) { @Override - public Storage run(LongStorage storage) { + public BoolStorage run(LongStorage storage) { return new BoolStorage(storage.isMissing, new BitSet(), storage.size, false); } - }); + }) + .add( + SpecializedIsInOp.make( + list -> { + HashSet set = new HashSet<>(); + boolean hasNulls = false; + for (Object o : list) { + hasNulls |= o == null; + Long x = NumericConverter.tryConvertingToLong(o); + if (x != null) { + set.add(x); + } + } + return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls); + })); return ops; } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/NumericStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/NumericStorage.java index 57bef25a4e..d577fc41f0 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/NumericStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/NumericStorage.java @@ -1,12 +1,11 @@ package org.enso.table.data.column.storage; +import java.util.stream.DoubleStream; import org.enso.table.data.column.operation.aggregate.Aggregator; import org.enso.table.data.column.operation.aggregate.numeric.NumericAggregator; -import java.util.stream.DoubleStream; - /** A storage containing items representable as a {@code double}. */ -public abstract class NumericStorage extends Storage { +public abstract class NumericStorage extends Storage { /** * Returns the value stored at the given index. The return value if the given index is missing * ({@link #isNa(long)}) is undefined. diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java index e2102c7985..72549b00df 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java @@ -1,12 +1,11 @@ package org.enso.table.data.column.storage; +import java.util.BitSet; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.column.operation.map.UnaryMapOperation; -import java.util.BitSet; - /** A column storing arbitrary objects. */ -public class ObjectStorage extends SpecializedStorage { +public final class ObjectStorage extends SpecializedStorage { /** * @param data the underlying data * @param size the number of items stored @@ -30,14 +29,14 @@ public class ObjectStorage extends SpecializedStorage { return Type.OBJECT; } - private static final MapOpStorage> ops = buildObjectOps(); + private static final MapOpStorage> ops = buildObjectOps(); - static > MapOpStorage buildObjectOps() { - MapOpStorage ops = new MapOpStorage<>(); + static > MapOpStorage buildObjectOps() { + MapOpStorage ops = new MapOpStorage<>(); ops.add( new UnaryMapOperation<>(Maps.IS_MISSING) { @Override - protected Storage run(S storage) { + protected BoolStorage run(S storage) { BitSet r = new BitSet(); for (int i = 0; i < storage.size; i++) { if (storage.data[i] == null) { diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java index b2a5f78c0d..96acb58c4b 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java @@ -1,14 +1,13 @@ package org.enso.table.data.column.storage; +import java.util.BitSet; +import java.util.List; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.index.Index; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; -import java.util.BitSet; -import java.util.List; - -public abstract class SpecializedStorage extends Storage { +public abstract class SpecializedStorage extends Storage { protected abstract SpecializedStorage newInstance(T[] data, int size); @@ -21,7 +20,7 @@ public abstract class SpecializedStorage extends Storage { * @param data the underlying data * @param size the number of items stored */ - protected SpecializedStorage(T[] data, int size, MapOpStorage> ops) { + protected SpecializedStorage(T[] data, int size, MapOpStorage> ops) { this.data = data; this.size = size; this.ops = ops; @@ -29,7 +28,7 @@ public abstract class SpecializedStorage extends Storage { protected final T[] data; protected final int size; - private final MapOpStorage> ops; + private final MapOpStorage> ops; /** @inheritDoc */ @Override @@ -74,12 +73,12 @@ public abstract class SpecializedStorage extends Storage { } @Override - protected Storage runVectorizedMap(String name, Object argument) { + protected Storage runVectorizedMap(String name, Object argument) { return ops.runMap(name, this, argument); } @Override - protected Storage runVectorizedZip(String name, Storage argument) { + protected Storage runVectorizedZip(String name, Storage argument) { return ops.runZip(name, this, argument); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java index b1560d3d74..82e1976e32 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java @@ -1,6 +1,11 @@ package org.enso.table.data.column.storage; -import org.enso.base.Polyglot_Utils; +import java.util.BitSet; +import java.util.HashMap; +import java.util.List; +import java.util.function.BiFunction; +import java.util.function.Function; +import org.enso.base.polyglot.Polyglot_Utils; import org.enso.table.data.column.builder.object.Builder; import org.enso.table.data.column.builder.object.InferredBuilder; import org.enso.table.data.column.builder.object.ObjectBuilder; @@ -11,14 +16,8 @@ import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; import org.graalvm.polyglot.Value; -import java.util.BitSet; -import java.util.HashMap; -import java.util.List; -import java.util.function.BiFunction; -import java.util.function.Function; - /** An abstract representation of a data column. */ -public abstract class Storage { +public abstract class Storage { /** @return the number of elements in this column (including NAs) */ public abstract int size(); @@ -42,7 +41,7 @@ public abstract class Storage { * @param idx the index to look up * @return the item at position {@code idx} */ - public abstract Object getItemBoxed(int idx); + public abstract T getItemBoxed(int idx); /** * Enumerating possible storage types. @@ -83,6 +82,7 @@ public abstract class Storage { public static final String ENDS_WITH = "ends_with"; public static final String CONTAINS = "contains"; public static final String LIKE = "like"; + public static final String IS_IN = "is_in"; } public static final class Aggregators { @@ -95,9 +95,9 @@ public abstract class Storage { protected abstract boolean isOpVectorized(String name); - protected abstract Storage runVectorizedMap(String name, Object argument); + protected abstract Storage runVectorizedMap(String name, Object argument); - protected abstract Storage runVectorizedZip(String name, Storage argument); + protected abstract Storage runVectorizedZip(String name, Storage argument); /** * Runs a function on each non-missing element in this storage and gathers the results. @@ -106,17 +106,23 @@ public abstract class Storage { * supported. If this argument is null, the vectorized operation will never be used. * @param function the function to run. * @param argument the argument to pass to each run of the function + * @param skipNulls specifies whether null values on the input should result in a null result + * without passing them through the function, this is useful if the function does not support + * the null-values, but it needs to be set to false if the function should handle them. * @return the result of running the function on all non-missing elements. */ - public final Storage bimap( - String name, BiFunction function, Object argument) { + public final Storage bimap( + String name, + BiFunction function, + Object argument, + boolean skipNulls) { if (name != null && isOpVectorized(name)) { return runVectorizedMap(name, argument); } Builder builder = new InferredBuilder(size()); for (int i = 0; i < size(); i++) { Object it = getItemBoxed(i); - if (it == null) { + if (skipNulls && it == null) { builder.appendNoGrow(null); } else { Object result = function.apply(it, argument); @@ -165,7 +171,7 @@ public abstract class Storage { * @param function the function to run. * @return the result of running the function on all non-missing elements. */ - public final Storage map(String name, Function function) { + public final Storage map(String name, Function function) { if (name != null && isOpVectorized(name)) { return runVectorizedMap(name, null); } @@ -192,8 +198,8 @@ public abstract class Storage { * @param skipNa whether rows containing missing values should be passed to the function. * @return the result of running the function on all non-missing elements. */ - public final Storage zip( - String name, BiFunction function, Storage arg, boolean skipNa) { + public final Storage zip( + String name, BiFunction function, Storage arg, boolean skipNa) { if (name != null && isOpVectorized(name)) { return runVectorizedZip(name, arg); } @@ -218,7 +224,7 @@ public abstract class Storage { * @param arg the value to use for missing elements * @return a new storage, with all missing elements replaced by arg */ - public Storage fillMissing(Value arg) { + public Storage fillMissing(Value arg) { return fillMissingHelper(arg, new ObjectBuilder(size())); } @@ -228,7 +234,7 @@ public abstract class Storage { * @param other the source of default values * @return a new storage with missing values filled */ - public Storage fillMissingFrom(Storage other) { + public Storage fillMissingFrom(Storage other) { var builder = new InferredBuilder(size()); for (int i = 0; i < size(); i++) { if (isNa(i)) { @@ -240,7 +246,7 @@ public abstract class Storage { return builder.seal(); } - protected final Storage fillMissingHelper(Value arg, Builder builder) { + protected final Storage fillMissingHelper(Value arg, Builder builder) { Object convertedFallback = Polyglot_Utils.convertPolyglotValue(arg); for (int i = 0; i < size(); i++) { Object it = getItemBoxed(i); @@ -260,14 +266,14 @@ public abstract class Storage { * @param cardinality the number of true values in mask * @return a new storage, masked with the given mask */ - public abstract Storage mask(BitSet mask, int cardinality); + public abstract Storage mask(BitSet mask, int cardinality); /** * Returns a new storage, ordered according to the rules specified in a mask. * * @param mask@return a storage resulting from applying the reordering rules */ - public abstract Storage applyMask(OrderMask mask); + public abstract Storage applyMask(OrderMask mask); /** * Returns a new storage, resulting from applying the rules specified in a mask. The resulting @@ -280,13 +286,13 @@ public abstract class Storage { * storage * @return the storage masked according to the specified rules */ - public abstract Storage countMask(int[] counts, int total); + public abstract Storage countMask(int[] counts, int total); /** @return a copy of the storage containing a slice of the original data */ - public abstract Storage slice(int offset, int limit); + public abstract Storage slice(int offset, int limit); /** @return a copy of the storage consisting of slices of the original data */ - public abstract Storage slice(List ranges); + public abstract Storage slice(List ranges); public List toList() { return new StorageListView(this); @@ -297,7 +303,7 @@ public abstract class Storage { * * @return a storage counting the number of times each value in this one has been seen before. */ - public Storage duplicateCount() { + public Storage duplicateCount() { long[] data = new long[size()]; HashMap occurenceCount = new HashMap<>(); for (int i = 0; i < size(); i++) { diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StorageListView.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StorageListView.java index 91dab75244..edf46d679d 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StorageListView.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StorageListView.java @@ -11,7 +11,7 @@ import java.util.Objects; * is not modifiable. */ public class StorageListView implements List { - private final Storage storage; + private final Storage storage; private final int from; private final int to; @@ -20,11 +20,11 @@ public class StorageListView implements List { * * @param storage the storage to wrap. */ - public StorageListView(Storage storage) { + public StorageListView(Storage storage) { this(storage, 0, storage.size()); } - private StorageListView(Storage storage, int from, int to) { + private StorageListView(Storage storage, int from, int to) { this.storage = storage; this.from = from; this.to = to; diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java index f77b54b0bb..b7ec1e6521 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java @@ -1,17 +1,19 @@ package org.enso.table.data.column.storage; import java.util.BitSet; +import java.util.HashSet; import org.enso.base.Text_Utils; import org.enso.table.data.column.builder.object.StringBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.column.operation.map.MapOperation; +import org.enso.table.data.column.operation.map.SpecializedIsInOp; import org.enso.table.data.column.operation.map.UnaryMapOperation; import org.enso.table.data.column.operation.map.text.LikeOp; import org.enso.table.data.column.operation.map.text.StringBooleanOp; import org.graalvm.polyglot.Value; /** A column storing strings. */ -public class StringStorage extends SpecializedStorage { +public final class StringStorage extends SpecializedStorage { /** * @param data the underlying data @@ -36,7 +38,7 @@ public class StringStorage extends SpecializedStorage { return Type.STRING; } - private static final MapOpStorage> ops = buildOps(); + private static final MapOpStorage> ops = buildOps(); @Override protected boolean isOpVectorized(String name) { @@ -44,17 +46,17 @@ public class StringStorage extends SpecializedStorage { } @Override - protected Storage runVectorizedMap(String name, Object argument) { + protected Storage runVectorizedMap(String name, Object argument) { return ops.runMap(name, this, argument); } @Override - protected Storage runVectorizedZip(String name, Storage argument) { + protected Storage runVectorizedZip(String name, Storage argument) { return ops.runZip(name, this, argument); } @Override - public Storage fillMissing(Value arg) { + public Storage fillMissing(Value arg) { if (arg.isString()) { return fillMissingHelper(arg, new StringBuilder(size())); } else { @@ -62,12 +64,12 @@ public class StringStorage extends SpecializedStorage { } } - private static MapOpStorage> buildOps() { - MapOpStorage> t = ObjectStorage.buildObjectOps(); + private static MapOpStorage> buildOps() { + MapOpStorage> t = ObjectStorage.buildObjectOps(); t.add( new MapOperation<>(Maps.EQ) { @Override - public Storage runMap(SpecializedStorage storage, Object arg) { + public BoolStorage runMap(SpecializedStorage storage, Object arg) { BitSet r = new BitSet(); BitSet missing = new BitSet(); for (int i = 0; i < storage.size(); i++) { @@ -81,7 +83,7 @@ public class StringStorage extends SpecializedStorage { } @Override - public Storage runZip(SpecializedStorage storage, Storage arg) { + public BoolStorage runZip(SpecializedStorage storage, Storage arg) { BitSet r = new BitSet(); BitSet missing = new BitSet(); for (int i = 0; i < storage.size(); i++) { @@ -98,7 +100,7 @@ public class StringStorage extends SpecializedStorage { t.add( new UnaryMapOperation<>(Maps.IS_EMPTY) { @Override - protected Storage run(SpecializedStorage storage) { + protected BoolStorage run(SpecializedStorage storage) { BitSet r = new BitSet(); for (int i = 0; i < storage.size; i++) { String s = storage.data[i]; @@ -131,6 +133,19 @@ public class StringStorage extends SpecializedStorage { } }); t.add(new LikeOp()); + t.add( + SpecializedIsInOp.make( + list -> { + HashSet set = new HashSet<>(); + boolean hasNulls = false; + for (Object o : list) { + hasNulls |= o == null; + if (o instanceof String s) { + set.add(s); + } + } + return new SpecializedIsInOp.CompactRepresentation<>(set, hasNulls); + })); return t; } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/TimeOfDayStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/TimeOfDayStorage.java index 36d4510645..ab8a81a177 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/TimeOfDayStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/TimeOfDayStorage.java @@ -1,10 +1,10 @@ package org.enso.table.data.column.storage; -import org.enso.table.data.column.operation.map.MapOpStorage; - import java.time.LocalTime; +import org.enso.table.data.column.operation.map.MapOpStorage; +import org.enso.table.data.column.operation.map.SpecializedIsInOp; -public class TimeOfDayStorage extends SpecializedStorage { +public final class TimeOfDayStorage extends SpecializedStorage { /** * @param data the underlying data * @param size the number of items stored @@ -13,10 +13,12 @@ public class TimeOfDayStorage extends SpecializedStorage { super(data, size, ops); } - private static final MapOpStorage> ops = buildOps(); + private static final MapOpStorage> ops = buildOps(); - private static MapOpStorage> buildOps() { - return ObjectStorage.buildObjectOps(); + private static MapOpStorage> buildOps() { + MapOpStorage> t = ObjectStorage.buildObjectOps(); + t.add(SpecializedIsInOp.makeForTimeColumns(LocalTime.class)); + return t; } @Override diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/HashIndex.java b/std-bits/table/src/main/java/org/enso/table/data/index/HashIndex.java index 4aa13768bd..5955d1d61f 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/index/HashIndex.java +++ b/std-bits/table/src/main/java/org/enso/table/data/index/HashIndex.java @@ -7,17 +7,17 @@ import org.enso.table.data.mask.SliceRange; import org.enso.table.data.table.Column; public class HashIndex extends Index { - private final Storage items; + private final Storage items; private final Map> locs; private final String name; - private HashIndex(Storage items, Map> locs, String name) { + private HashIndex(Storage items, Map> locs, String name) { this.items = items; this.locs = locs; this.name = name; } - private HashIndex(String name, Storage items, int size) { + private HashIndex(String name, Storage items, int size) { Map> locations = new HashMap<>(); for (int i = 0; i < size; i++) { List its = locations.computeIfAbsent(items.getItemBoxed(i), x -> new ArrayList<>()); @@ -28,7 +28,7 @@ public class HashIndex extends Index { this.name = name; } - public static HashIndex fromStorage(String name, Storage storage) { + public static HashIndex fromStorage(String name, Storage storage) { return new HashIndex(name, storage, storage.size()); } @@ -59,19 +59,19 @@ public class HashIndex extends Index { @Override public Index mask(BitSet mask, int cardinality) { - Storage newSt = items.mask(mask, cardinality); + Storage newSt = items.mask(mask, cardinality); return HashIndex.fromStorage(name, newSt); } @Override public Index countMask(int[] counts, int total) { - Storage newSt = items.countMask(counts, total); + Storage newSt = items.countMask(counts, total); return HashIndex.fromStorage(name, newSt); } @Override public Index applyMask(OrderMask mask) { - Storage newSt = items.applyMask(mask); + Storage newSt = items.applyMask(mask); return HashIndex.fromStorage(name, newSt); } @@ -86,7 +86,7 @@ public class HashIndex extends Index { mask.set(i); } } - Storage newItems = items.mask(mask, locs.size()); + Storage newItems = items.mask(mask, locs.size()); return new HashIndex(newItems, newLocs, name); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java index 1c8478e591..eab87645ec 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java +++ b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java @@ -30,7 +30,7 @@ public class MultiValueIndex { boolean isOrdered = ordering != null; this.locs = isOrdered ? new TreeMap<>() : new HashMap<>(); - Storage[] storage = Arrays.stream(keyColumns).map(Column::getStorage).toArray(Storage[]::new); + Storage[] storage = Arrays.stream(keyColumns).map(Column::getStorage).toArray(Storage[]::new); IntFunction keyFactory = isOrdered ? i -> new OrderedMultiValueKey(storage, i, ordering, objectComparator) diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueKeyBase.java b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueKeyBase.java index 998dde2178..58e6be2577 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueKeyBase.java +++ b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueKeyBase.java @@ -4,7 +4,7 @@ import org.enso.table.data.column.storage.Storage; /** The base class for keys used for sorting/grouping rows by a set of columns. */ public abstract class MultiValueKeyBase { - protected final Storage[] storages; + protected final Storage[] storages; protected final int rowIndex; protected boolean hasFloatValues = false; protected boolean floatsComputed = false; @@ -13,7 +13,7 @@ public abstract class MultiValueKeyBase { * Constructs a key based on an array of column storages and the index of the row the key is * associated with. */ - public MultiValueKeyBase(Storage[] storage, int rowIndex) { + public MultiValueKeyBase(Storage[] storage, int rowIndex) { this.storages = storage; this.rowIndex = rowIndex; } @@ -28,7 +28,7 @@ public abstract class MultiValueKeyBase { /** Checks if all cells in the current row are missing. */ public boolean areAllNull() { - for (Storage value : storages) { + for (Storage value : storages) { if (!value.isNa(rowIndex)) { return false; } diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/OrderedMultiValueKey.java b/std-bits/table/src/main/java/org/enso/table/data/index/OrderedMultiValueKey.java index 9a43aaf8f7..bf78e6774f 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/index/OrderedMultiValueKey.java +++ b/std-bits/table/src/main/java/org/enso/table/data/index/OrderedMultiValueKey.java @@ -21,7 +21,7 @@ public class OrderedMultiValueKey extends MultiValueKeyBase private final int[] directions; public OrderedMultiValueKey( - Storage[] storages, int rowIndex, int[] directions, Comparator objectComparator) { + Storage[] storages, int rowIndex, int[] directions, Comparator objectComparator) { super(storages, rowIndex); this.objectComparator = objectComparator; this.directions = directions; diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/UnorderedMultiValueKey.java b/std-bits/table/src/main/java/org/enso/table/data/index/UnorderedMultiValueKey.java index 4b4d53feb4..5405d9303f 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/index/UnorderedMultiValueKey.java +++ b/std-bits/table/src/main/java/org/enso/table/data/index/UnorderedMultiValueKey.java @@ -21,12 +21,12 @@ public class UnorderedMultiValueKey extends MultiValueKeyBase { private final int hashCodeValue; private final TextFoldingStrategy textFoldingStrategy; - public UnorderedMultiValueKey(Storage[] storages, int rowIndex) { + public UnorderedMultiValueKey(Storage[] storages, int rowIndex) { this(storages, rowIndex, TextFoldingStrategy.unicodeNormalizedFold); } public UnorderedMultiValueKey( - Storage[] storages, int rowIndex, TextFoldingStrategy textFoldingStrategy) { + Storage[] storages, int rowIndex, TextFoldingStrategy textFoldingStrategy) { super(storages, rowIndex); this.textFoldingStrategy = textFoldingStrategy; diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/Column.java b/std-bits/table/src/main/java/org/enso/table/data/table/Column.java index 068ad175eb..b3254ae030 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/Column.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/Column.java @@ -1,6 +1,6 @@ package org.enso.table.data.table; -import org.enso.base.Polyglot_Utils; +import org.enso.base.polyglot.Polyglot_Utils; import org.enso.table.data.column.builder.object.InferredBuilder; import org.enso.table.data.column.operation.aggregate.Aggregator; import org.enso.table.data.column.storage.BoolStorage; @@ -13,8 +13,6 @@ import org.enso.table.data.mask.SliceRange; import org.enso.table.error.UnexpectedColumnTypeException; import org.graalvm.polyglot.Value; -import java.time.LocalDate; -import java.time.LocalDateTime; import java.util.BitSet; import java.util.List; import java.util.function.Function; @@ -23,7 +21,7 @@ import java.util.stream.IntStream; /** A representation of a column. Consists of a column name and the underlying storage. */ public class Column { private final String name; - private final Storage storage; + private final Storage storage; private final Index index; /** @@ -32,7 +30,7 @@ public class Column { * @param name the column name * @param storage the underlying storage */ - public Column(String name, Index index, Storage storage) { + public Column(String name, Index index, Storage storage) { this.name = name; this.storage = storage; this.index = index; @@ -44,7 +42,7 @@ public class Column { * @param name the column name * @param storage the underlying storage */ - public Column(String name, Storage storage) { + public Column(String name, Storage storage) { this(name, new DefaultIndex(storage.size()), storage); } @@ -63,7 +61,7 @@ public class Column { } /** @return the underlying storage */ - public Storage getStorage() { + public Storage getStorage() { return storage; } @@ -148,7 +146,7 @@ public class Column { * @return a column indexed by {@code col} */ public Column setIndex(Column col) { - Storage storage = col.getStorage(); + Storage storage = col.getStorage(); Index ix = HashIndex.fromStorage(col.getName(), storage); return this.withIndex(ix); } @@ -183,7 +181,7 @@ public class Column { */ public Column applyMask(OrderMask mask) { Index newIndex = index.applyMask(mask); - Storage newStorage = storage.applyMask(mask); + Storage newStorage = storage.applyMask(mask); return new Column(name, newIndex, newStorage); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java index c46a6e1b6c..0351ba069c 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java @@ -182,7 +182,7 @@ public class Table { * @return a table indexed by the proper column */ public Table indexFromColumn(Column col) { - Storage storage = col.getStorage(); + Storage storage = col.getStorage(); Index ix = HashIndex.fromStorage(col.getName(), storage); List newColumns = new ArrayList<>(); Column indexCol = index.toColumn(); @@ -294,7 +294,7 @@ public class Table { matches[i] = other.index.loc(index.iloc(i)); } } else { - Storage onS = getColumnByName(on).getStorage(); + Storage onS = getColumnByName(on).getStorage(); for (int i = 0; i < s; i++) { matches[i] = other.index.loc(onS.getItemBoxed(i)); } @@ -360,7 +360,7 @@ public class Table { Arrays.stream(columns) .map( column -> { - Storage newStorage = column.getStorage().applyMask(orderMask); + Storage newStorage = column.getStorage().applyMask(orderMask); return new Column(column.getName(), newIndex, newStorage); }) .toArray(Column[]::new); @@ -431,7 +431,7 @@ public class Table { return new Table(newColumns, newIndex); } - private Storage concatStorages(Storage left, Storage right) { + private Storage concatStorages(Storage left, Storage right) { InferredBuilder builder = new InferredBuilder(left.size() + right.size()); for (int i = 0; i < left.size(); i++) { builder.appendNoGrow(left.getItemBoxed(i)); @@ -442,7 +442,7 @@ public class Table { return builder.seal(); } - private Storage nullPad(int nullCount, Storage storage, boolean start) { + private Storage nullPad(int nullCount, Storage storage, boolean start) { InferredBuilder builder = new InferredBuilder(nullCount + storage.size()); if (start) { builder.appendNulls(nullCount); diff --git a/std-bits/table/src/main/java/org/enso/table/operations/Distinct.java b/std-bits/table/src/main/java/org/enso/table/operations/Distinct.java index 7ff4c71144..c989ce7436 100644 --- a/std-bits/table/src/main/java/org/enso/table/operations/Distinct.java +++ b/std-bits/table/src/main/java/org/enso/table/operations/Distinct.java @@ -21,7 +21,8 @@ public class Distinct { if (keyColumns.length != 0) { HashSet visitedRows = new HashSet<>(); int size = keyColumns[0].getSize(); - Storage[] storage = Arrays.stream(keyColumns).map(Column::getStorage).toArray(Storage[]::new); + Storage[] storage = + Arrays.stream(keyColumns).map(Column::getStorage).toArray(Storage[]::new); for (int i = 0; i < size; i++) { UnorderedMultiValueKey key = new UnorderedMultiValueKey(storage, i, textFoldingStrategy); diff --git a/std-bits/table/src/main/java/org/enso/table/operations/OrderBuilder.java b/std-bits/table/src/main/java/org/enso/table/operations/OrderBuilder.java index 9c49789ce3..1ca3cdbe1d 100644 --- a/std-bits/table/src/main/java/org/enso/table/operations/OrderBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/operations/OrderBuilder.java @@ -43,7 +43,7 @@ public class OrderBuilder { * @return a comparator with properties described above */ public Comparator toComparator() { - final Storage storage = column.getStorage(); + final Storage storage = column.getStorage(); Comparator itemCmp = customComparator; if (!ascending) { itemCmp = itemCmp.reversed(); diff --git a/std-bits/table/src/main/java/org/enso/table/parsing/DatatypeParser.java b/std-bits/table/src/main/java/org/enso/table/parsing/DatatypeParser.java index 07bd0a9aa2..4ad5e1f1b2 100644 --- a/std-bits/table/src/main/java/org/enso/table/parsing/DatatypeParser.java +++ b/std-bits/table/src/main/java/org/enso/table/parsing/DatatypeParser.java @@ -35,5 +35,6 @@ public abstract class DatatypeParser { * Parses a column of texts (represented as a {@code StringStorage}) and returns a new storage, * containing the parsed elements. */ - public abstract WithProblems parseColumn(String columnName, StringStorage sourceStorage); + public abstract WithProblems> parseColumn( + String columnName, Storage sourceStorage); } diff --git a/std-bits/table/src/main/java/org/enso/table/parsing/IdentityParser.java b/std-bits/table/src/main/java/org/enso/table/parsing/IdentityParser.java index ccf8eba7f7..7039c1370d 100644 --- a/std-bits/table/src/main/java/org/enso/table/parsing/IdentityParser.java +++ b/std-bits/table/src/main/java/org/enso/table/parsing/IdentityParser.java @@ -22,7 +22,7 @@ public class IdentityParser extends IncrementalDatatypeParser { } @Override - public WithProblems parseColumn(String columnName, StringStorage sourceStorage) { + public WithProblems> parseColumn(String columnName, Storage sourceStorage) { return new WithProblems<>(sourceStorage, List.of()); } } diff --git a/std-bits/table/src/main/java/org/enso/table/parsing/IncrementalDatatypeParser.java b/std-bits/table/src/main/java/org/enso/table/parsing/IncrementalDatatypeParser.java index f3fbe23ea8..08e0cbca02 100644 --- a/std-bits/table/src/main/java/org/enso/table/parsing/IncrementalDatatypeParser.java +++ b/std-bits/table/src/main/java/org/enso/table/parsing/IncrementalDatatypeParser.java @@ -29,12 +29,12 @@ public abstract class IncrementalDatatypeParser extends DatatypeParser { * Parses a column of texts (represented as a {@code StringStorage}) and returns a new storage, * containing the parsed elements. */ - public WithProblems parseColumn(String columnName, StringStorage sourceStorage) { + public WithProblems> parseColumn(String columnName, Storage sourceStorage) { Builder builder = makeBuilderWithCapacity(sourceStorage.size()); var aggregator = new ProblemAggregatorImpl(columnName); for (int i = 0; i < sourceStorage.size(); ++i) { - String cell = sourceStorage.getItem(i); + String cell = sourceStorage.getItemBoxed(i); if (cell != null) { Object parsed = parseSingleValue(cell, aggregator); builder.appendNoGrow(parsed); diff --git a/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java b/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java index 9c9c4952a2..909c23c494 100644 --- a/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java +++ b/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java @@ -40,14 +40,14 @@ public class TypeInferringParser extends DatatypeParser { } @Override - public WithProblems parseColumn(String columnName, StringStorage sourceStorage) { + public WithProblems> parseColumn(String columnName, Storage sourceStorage) { parsers: for (IncrementalDatatypeParser parser : baseParsers) { Builder builder = parser.makeBuilderWithCapacity(sourceStorage.size()); var aggregator = new ProblemAggregatorImpl(columnName); for (int i = 0; i < sourceStorage.size(); ++i) { - String cell = sourceStorage.getItem(i); + String cell = sourceStorage.getItemBoxed(i); if (cell != null) { Object parsed = parser.parseSingleValue(cell, aggregator); if (aggregator.hasProblems()) { diff --git a/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java b/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java index 4868fff965..da72a0fe8e 100644 --- a/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java +++ b/std-bits/table/src/main/java/org/enso/table/read/DelimitedReader.java @@ -452,13 +452,13 @@ public class DelimitedReader { Column[] columns = new Column[builders.length]; for (int i = 0; i < builders.length; i++) { String columnName = effectiveColumnNames[i]; - StringStorage col = builders[i].seal(); + Storage col = builders[i].seal(); - WithProblems parseResult = valueParser.parseColumn(columnName, col); + WithProblems> parseResult = valueParser.parseColumn(columnName, col); for (var problem : parseResult.problems()) { reportProblem(problem); } - Storage storage = parseResult.value(); + Storage storage = parseResult.value(); columns[i] = new Column(columnName, new DefaultIndex(storage.size()), storage); } diff --git a/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java b/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java index bc7e8d7950..91cb5ec202 100644 --- a/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java +++ b/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java @@ -268,7 +268,7 @@ public class ExcelWriter { return; } - Storage[] storages = Arrays.stream(columns).map(Column::getStorage).toArray(Storage[]::new); + Storage[] storages = Arrays.stream(columns).map(Column::getStorage).toArray(Storage[]::new); for (int i = 0; i < rowCount; i++) { Row row = sheet.getRow(currentRow); if (row == null) { @@ -276,7 +276,7 @@ public class ExcelWriter { } for (int j = 0; j < columns.length; j++) { - Storage storage = storages[j]; + Storage storage = storages[j]; int idx = j + firstColumn - 1; Cell cell = row.getCell(idx); @@ -305,7 +305,7 @@ public class ExcelWriter { return newStyle; } - private static void writeValueToCell(Cell cell, int j, Storage storage, Workbook workbook) + private static void writeValueToCell(Cell cell, int j, Storage storage, Workbook workbook) throws IllegalStateException { if (storage.isNa(j)) { cell.setBlank(); diff --git a/test/Benchmarks/src/Table/Sorting.enso b/test/Benchmarks/src/Table/Sorting.enso index 4cd0eb1acf..591040a7a2 100644 --- a/test/Benchmarks/src/Table/Sorting.enso +++ b/test/Benchmarks/src/Table/Sorting.enso @@ -22,7 +22,7 @@ main = ints = (0.up_to vector_size).to_vector.take (Sample vector_size 42) start = Date_Time.new 1990 1 1 dates = ints.map x-> - start + x.seconds + start + (Duration.new seconds=x) objects = ints.map My.Data ints_table = Table.new [['ints', ints]] diff --git a/test/Table_Tests/src/Common_Table_Spec.enso b/test/Table_Tests/src/Common_Table_Spec.enso index 1603e5cabc..182ce60866 100644 --- a/test/Table_Tests/src/Common_Table_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Spec.enso @@ -1217,11 +1217,51 @@ spec prefix table_builder test_selection pending=Nothing = t2.at "ix" . to_vector . should_equal [2, 4] t2.at "X" . to_vector . should_equal [1, 4] + Test.specify "by an Is_In check in a Vector" <| + t = table_builder [["ix", [1, 2, 3, Nothing, 5, 6]], ["X", ["a", "b", "ccc", "X", "f", "2"]]] + t.filter "X" (Filter_Condition.Is_In ["X", "a", "c"]) on_problems=Report_Error . at "X" . to_vector . should_equal ["a", "X"] + t.filter "X" (Filter_Condition.Not_In ["X", "a", "c"]) on_problems=Report_Error . at "X" . to_vector . should_equal ["b", "ccc", "f", "2"] + t.filter "X" (Filter_Condition.Is_In ["ccc"]) on_problems=Report_Error . at "X" . to_vector . should_equal ["ccc"] + t.filter "X" (Filter_Condition.Is_In []) on_problems=Report_Error . at "X" . to_vector . should_equal [] + t.filter "X" (Filter_Condition.Not_In []) on_problems=Report_Error . at "X" . to_vector . should_equal ["a", "b", "ccc", "X", "f", "2"] + + t.filter "ix" (Filter_Condition.Is_In [Nothing, 2, 5, 4]) on_problems=Report_Error . at "ix" . to_vector . should_equal [2, Nothing, 5] + t.filter "ix" (Filter_Condition.Is_In [2, 5, 4]) on_problems=Report_Error . at "ix" . to_vector . should_equal [2, 5] + t.filter "ix" (Filter_Condition.Is_In [Nothing]) on_problems=Report_Error . at "ix" . to_vector . should_equal [Nothing] + t.filter "ix" (Filter_Condition.Not_In [Nothing]) on_problems=Report_Error . at "ix" . to_vector . should_equal [1, 2, 3, 5, 6] + t.filter "ix" (Filter_Condition.Not_In [1, 3]) on_problems=Report_Error . at "ix" . to_vector . should_equal [2, Nothing, 5, 6] + + v1 = t.filter "X" (Filter_Condition.Is_In ["c", "f", "b", "b", "b", 15, Nothing]) on_problems=Report_Error . at "X" . to_vector + case test_selection.allows_mixed_type_comparisons of + True -> v1.should_equal ["b", "f"] + False -> v1.should_fail_with SQL_Error_Data + v2 = t.filter "ix" (Filter_Condition.Is_In ["c", 3, 2, "a"]) on_problems=Report_Error . at "ix" . to_vector + case test_selection.allows_mixed_type_comparisons of + True -> v2.should_equal [2, 3] + False -> v2.should_fail_with SQL_Error_Data + + t2 = table_builder [["A", [True, False, True]], ["B", [False, False, False]], ["C", [True, False, Nothing]]] + t2.filter "A" (Filter_Condition.Is_In [True, Nothing]) . at "A" . to_vector . should_equal [True, True] + t2.filter "B" (Filter_Condition.Is_In [True, Nothing]) . at "B" . to_vector . should_equal [] + t2.filter "C" (Filter_Condition.Is_In [True, Nothing]) . at "C" . to_vector . should_equal [True, Nothing] + t2.filter "A" (Filter_Condition.Is_In [False]) . at "A" . to_vector . should_equal [False] + t2.filter "B" (Filter_Condition.Is_In [False]) . at "B" . to_vector . should_equal [False, False, False] + t2.filter "C" (Filter_Condition.Is_In [False, False]) . at "C" . to_vector . should_equal [False] + Test.specify "by a boolean mask" <| t = table_builder [["ix", [1, 2, 3, 4, 5]], ["b", [True, False, Nothing, True, True]]] t.filter "b" on_problems=Report_Error . at "ix" . to_vector . should_equal [1, 4, 5] t.filter "b" Filter_Condition.Is_False on_problems=Report_Error . at "ix" . to_vector . should_equal [2] + Test.specify "should correctly reorder all kinds of columns" <| + t = table_builder [["ints", [1, 2, 3, Nothing, 4]], ["floats", [4.0, Nothing, 3.0, 2.0, 1.0]], ["bools", [False, False, True, Nothing, False]], ["strings", ["a", Nothing, "b", "c", "d"]], ["mask", [False, True, True, True, Nothing]]] + t2 = t.filter "mask" on_problems=Report_Error + t2.at "ints" . to_vector . should_equal [2, 3, Nothing] + t2.at "floats" . to_vector . should_equal [Nothing, 3.0, 2.0] + t2.at "bools" . to_vector . should_equal [False, True, Nothing] + t2.at "strings" . to_vector . should_equal [Nothing, "b", "c"] + t2.at "mask" . to_vector . should_equal [True, True, True] + Test.specify "should check types of boolean operations" <| t = table_builder [["ix", [1, 2, 3, 4, 5]], ["b", [True, False, Nothing, True, True]]] tester = check_empty ["ix", "b"] diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index f80810de9f..2d9ed2553d 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -108,6 +108,19 @@ spec = t4 = t1.filter "A" (Filter_Condition.Between (t1.at "B") 33) t4.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ("T1"."A" BETWEEN "T1"."B" AND ?)', [[33, int]]] + Test.specify "should generate an IN expression" <| + t2 = t1.filter "A" (Filter_Condition.Is_In [1, 2, 'foo']) + t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE COALESCE("T1"."A" IN (?, ?, ?), 2=1)', [[1, int], [2, int], ["foo", int]]] + + t3 = t1.filter "A" (Filter_Condition.Is_In [1]) + t3.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE COALESCE("T1"."A" IN (?), 2=1)', [[1, int]]] + + t4 = t1.filter "A" (Filter_Condition.Is_In []) + t4.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE (2=1)', []] + + t5 = t1.filter "A" (Filter_Condition.Is_In [Nothing]) + t5.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" WHERE ((2=1) OR ("T1"."A" IS NULL))', []] + Test.group "[Codegen] Joining Tables" <| t2 = test_connection.query (SQL_Query.Table_Name "T2") t3 = test_connection.query (SQL_Query.Table_Name "T3") diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso index 40b33a0f16..8d590cf7de 100644 --- a/test/Table_Tests/src/Table_Spec.enso +++ b/test/Table_Tests/src/Table_Spec.enso @@ -1,5 +1,6 @@ from Standard.Base import all from Standard.Base.Error.Problem_Behavior import Report_Error +import Standard.Base.Data.Time.Duration from Standard.Table import Table, Column, Sort_Column, Column_Selector, Sort_Column_Selector, Aggregate_Column from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all hiding First, Last @@ -77,6 +78,22 @@ spec = t.at 'latitude' . to_vector . should_equal [34.19, 4.88] t.at 'elevation' . to_vector . should_equal [Nothing, 19] + make_varied_type_table = + strs = ["strs", ["a", "b", "c", Nothing]] + ints = ["ints", [Nothing, 1, 2, 4]] + doubles = ["doubles", [0.0, 1.5, Nothing, 2.0]] + doubles_and_ints = ["doubles_and_ints", [0, 1.5, Nothing, 2]] + custom_objects = ["custom_objects", [My.Data 1 2, My.Data 3 4, Nothing, Nothing]] + dates = ["dates", [Nothing, Date.new 2000, Date.new 2022 8 20, Date.new 1999 1 1]] + times = ["times", [Time_Of_Day.new 18 00, Time_Of_Day.new 1 2 34, Nothing, Time_Of_Day.new]] + datetimes = ["datetimes", [Date_Time.new 2000, Date_Time.new 1999 1 2 3 4 5, Nothing, Date_Time.new 2022 8 27 11 22 25]] + mixed = ["mixed", [1, "a", Nothing, Date.new 2022 8 27]] + mixed_dates = ["mixed_dates", [Date.new 1999 1 2, Date_Time.new 1999 1 2 3 40, Date.new 1999 1 2, Date_Time.new 1999 1 2 3 40]] + just_nulls = ["just_nulls", [Nothing, Nothing, Nothing, Nothing]] + + Table.new [strs, ints, doubles, doubles_and_ints, custom_objects, dates, times, datetimes, mixed, mixed_dates, just_nulls] + varied_type_table = make_varied_type_table + Test.group 'Construction' <| Test.specify 'should allow creating a table from rows' <| header = ['foo', 'bar'] @@ -87,30 +104,17 @@ spec = r.at 'bar' . to_vector . should_equal [False, True, False] Test.specify "should correctly infer storage types" <| - strs = ["strs", ["a", "b", "c", Nothing]] - ints = ["ints", [Nothing, 1, 2, 4]] - doubles = ["doubles", [0.0, 1.5, Nothing, 2.0]] - doubles_and_ints = ["doubles_and_ints", [0, 1.5, Nothing, 2]] - custom_objects = ["custom_objects", [My.Data 1 2, My.Data 3 4, Nothing, Nothing]] - dates = ["dates", [Nothing, Date.new 2000, Date.new 2022 8 20, Date.new 1999 1 1]] - times = ["times", [Time_Of_Day.new 18 00, Time_Of_Day.new 1 2 34, Nothing, Time_Of_Day.new]] - datetimes = ["datetimes", [Date_Time.new 2000, Date_Time.new 1999 1 2 3 4 5, Nothing, Date_Time.new 2022 8 27 11 22 25]] - mixed = ["mixed", [1, "a", Nothing, Date.new 2022 8 27]] - mixed_dates = ["mixed_dates", [Date.new 1999 1 2, Date_Time.new 1999 1 2 3 40, Date.new 1999 1 2, Date_Time.new 1999 1 2 3 40]] - just_nulls = ["just_nulls", [Nothing, Nothing, Nothing, Nothing]] - - table = Table.new [strs, ints, doubles, doubles_and_ints, custom_objects, dates, times, datetimes, mixed, mixed_dates, just_nulls] - table.at "strs" . storage_type . should_equal Storage.Text - table.at "ints" . storage_type . should_equal Storage.Integer - table.at "doubles" . storage_type . should_equal Storage.Decimal - table.at "doubles_and_ints" . storage_type . should_equal Storage.Decimal - table.at "custom_objects" . storage_type . should_equal Storage.Any - table.at "dates" . storage_type . should_equal Storage.Date - table.at "times" . storage_type . should_equal Storage.Time_Of_Day - table.at "datetimes" . storage_type . should_equal Storage.Date_Time - table.at "mixed" . storage_type . should_equal Storage.Any - table.at "mixed_dates" . storage_type . should_equal Storage.Any - table.at "just_nulls" . storage_type . should_equal Storage.Any + varied_type_table.at "strs" . storage_type . should_equal Storage.Text + varied_type_table.at "ints" . storage_type . should_equal Storage.Integer + varied_type_table.at "doubles" . storage_type . should_equal Storage.Decimal + varied_type_table.at "doubles_and_ints" . storage_type . should_equal Storage.Decimal + varied_type_table.at "custom_objects" . storage_type . should_equal Storage.Any + varied_type_table.at "dates" . storage_type . should_equal Storage.Date + varied_type_table.at "times" . storage_type . should_equal Storage.Time_Of_Day + varied_type_table.at "datetimes" . storage_type . should_equal Storage.Date_Time + varied_type_table.at "mixed" . storage_type . should_equal Storage.Any + varied_type_table.at "mixed_dates" . storage_type . should_equal Storage.Any + varied_type_table.at "just_nulls" . storage_type . should_equal Storage.Any pending_python_missing = if Polyglot.is_language_installed "python" . not then "Can't run Python tests, Python is not installed." @@ -943,6 +947,87 @@ spec = t2.at "A" . to_vector . should_equal [2, 3] t2.at "B" . to_vector . should_equal [5, 6] + Test.specify "by an Is_In check in a Vector, on various types of columns" <| + varied_type_table.filter "strs" (Filter_Condition.Is_In ["c", "b", Nothing]) . at "strs" . to_vector . should_equal ["b", "c", Nothing] + varied_type_table.filter "ints" (Filter_Condition.Is_In [1, 2, 3]) . at "ints" . to_vector . should_equal [1, 2] + varied_type_table.filter "ints" (Filter_Condition.Is_In [1, Nothing]) . at "ints" . to_vector . should_equal [Nothing, 1] + varied_type_table.filter "doubles" (Filter_Condition.Is_In [0.0, Nothing]) . at "doubles" . to_vector . should_equal [0.0, Nothing] + varied_type_table.filter "dates" (Filter_Condition.Is_In [Date.new 2000, Date.new 1999 1 1, Date_Time.new 2022 8 20]) . at "dates" . to_vector . should_equal [Date.new 2000, Date.new 1999 1 1] + varied_type_table.filter "datetimes" (Filter_Condition.Is_In [Date_Time.new 2022 8 27 11 22 25, Nothing, Date_Time.new 2030, Date.new 2000]) . at "datetimes" . to_vector . should_equal [Nothing, Date_Time.new 2022 8 27 11 22 25] + varied_type_table.filter "times" (Filter_Condition.Is_In [Time_Of_Day.new 18 00, Time_Of_Day.new 18 19, Date_Time.new 2000 1 1]) . at "times" . to_vector . should_equal [Time_Of_Day.new 18 00] + varied_type_table.filter "mixed" (Filter_Condition.Is_In [42, "a", 1, Nothing, Date.new 2022 8 27, Date_Time.new 2022 8 27]) . at "mixed" . to_vector . should_equal [1, "a", Nothing, Date.new 2022 8 27] + varied_type_table.filter "mixed" (Filter_Condition.Is_In [42, Date_Time.new 2022 8 27, 1]) . at "mixed" . to_vector . should_equal [1] + varied_type_table.filter "just_nulls" (Filter_Condition.Is_In []) . at "just_nulls" . to_vector . should_equal [] + varied_type_table.filter "just_nulls" (Filter_Condition.Is_In [Nothing, Nothing, 0]) . at "just_nulls" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing] + varied_type_table.filter "just_nulls" (Filter_Condition.Is_In [0]) . at "just_nulls" . to_vector . should_equal [] + varied_type_table.filter "custom_objects" (Filter_Condition.Is_In [2, My.Data 2 1, Nothing]) . at "custom_objects" . to_vector . should_equal [My.Data 1 2, Nothing, Nothing] + + t2 = Table.new [["ints", [1, 2, 3]], ["doubles", [1.2, 0.0, 1.0]]] + t2.filter "ints" (Filter_Condition.Is_In [2.0, 1.5, 3, 4]) . at "ints" . to_vector . should_equal [2, 3] + t2.filter "doubles" (Filter_Condition.Is_In [0.1, 1, 3, 1.2]) . at "doubles" . to_vector . should_equal [1.2, 1.0] + + # We test these very carefully as our implementation relies on some short-circuit logic that is not as trivial as the hashmap checks which are done for other builtin types. + [True, False].each has_nulls-> + [True, False].each has_true-> + [True, False].each has_false-> + vec_builder = Vector.new_builder + if has_nulls then vec_builder.append Nothing + if has_true then vec_builder.append True + if has_false then vec_builder.append False + in_vector = vec_builder.to_vector + + vectors = [[True, False, Nothing], [Nothing, Nothing, Nothing], [False, False, True], [True, True, True], [False, False, False], [Nothing, True, True], [False, Nothing, False]] + vectors.each column_vector-> + not x = case x of + True -> False + False -> True + Nothing -> Nothing + negated_column_vector = column_vector.map not + t = Table.new [["X", column_vector]] + + expected_vector = column_vector.filter (Filter_Condition.Is_In in_vector) + expected_neg_vector = negated_column_vector.filter (Filter_Condition.Is_In in_vector) + + t.filter "X" (Filter_Condition.Is_In in_vector) on_problems=Report_Error . at "X" . to_vector . should_equal expected_vector + t2 = t.set "Y" (t.at "X" . not) + t2.filter "Y" (Filter_Condition.Is_In in_vector) on_problems=Report_Error . at "Y" . to_vector . should_equal expected_neg_vector + + Test.specify "should perform `Is_In` efficiently for builtin types" <| + first_day = Date_Time.new 2000 1 1 + make_date x = first_day + (Duration.new seconds=x) + init = Duration.time_execution <| + t = Table.new [["X", (200.up_to 10000 . to_vector)]] + vec = 4000.up_to 13000 . to_vector + expected_vector = 4000.up_to 10000 . to_vector + expected_vector_2 = 200.up_to 10000 . with_step 2 . to_vector + dates_vec = vec.map make_date + bool_vec = Vector.fill 7000 True + date_col = t.at "X" . map make_date + [t, vec, expected_vector, expected_vector_2, dates_vec, bool_vec, date_col] + t = init.second . at 0 + vec = init.second . at 1 + expected_vector = init.second . at 2 + expected_vector_2 = init.second . at 3 + dates_vec = init.second . at 4 + bool_vec = init.second . at 5 + date_col = init.second . at 6 + + expected_max_time_ms = init.first.total_milliseconds * 2 + check_timing name ~action = + res = Duration.time_execution action + runtime_ms = res.first.total_milliseconds + if runtime_ms > expected_max_time_ms then + Test.fail "Expected `Is_In` on "+name+" to be efficient, but it took "+runtime_ms.to_text+"ms while initialization itself took just "+expected_max_time_ms.to_text+"ms." + + check_timing "integers" <| + t.filter "X" (Filter_Condition.Is_In vec) . at "X" . to_vector . should_equal expected_vector + + check_timing "booleans" <| + t.filter (t.at "X" % 2 == 0) (Filter_Condition.Is_In bool_vec) . at "X" . to_vector . should_equal expected_vector_2 + + check_timing "dates" <| + t.filter date_col (Filter_Condition.Is_In dates_vec) . at "X" . to_vector . should_equal expected_vector + main = Test.Suite.run_main spec ## JS indexes months form 0, so we need to subtract 1. diff --git a/test/Tests/src/Data/List_Spec.enso b/test/Tests/src/Data/List_Spec.enso index 987b117ebf..30dbb674eb 100644 --- a/test/Tests/src/Data/List_Spec.enso +++ b/test/Tests/src/Data/List_Spec.enso @@ -53,6 +53,8 @@ spec = Test.group "List" <| list.filter (Filter_Condition.Equal_Or_Greater than=3) . should_equal [3, 4, 5].to_list list.filter (Filter_Condition.Equal_Or_Less than=(-1)) . should_equal Nil list.filter (Filter_Condition.Between 2 4) . should_equal [2, 3, 4].to_list + list.filter (Filter_Condition.Is_In [7, 3, 2]) . should_equal [2, 3].to_list + list.filter (Filter_Condition.Not_In [7, 3, 2]) . should_equal [1, 4, 5].to_list Test.expect_panic_with (list.filter (Filter_Condition.Starts_With "a")) No_Such_Method_Error_Data list.filter Filter_Condition.Is_True . should_equal Nil diff --git a/test/Tests/src/Data/Range_Spec.enso b/test/Tests/src/Data/Range_Spec.enso index df48b7b950..8fd28044b7 100644 --- a/test/Tests/src/Data/Range_Spec.enso +++ b/test/Tests/src/Data/Range_Spec.enso @@ -77,6 +77,8 @@ spec = Test.group "Range" <| range.filter (Filter_Condition.Equal_Or_Less than=(-1)) . should_equal [] range.filter (Filter_Condition.Between 2 4) . should_equal [2, 3, 4] range.filter (Filter_Condition.Between 2.1 4.5) . should_equal [3, 4] + range.filter (Filter_Condition.Is_In [7, 3, 2]) . should_equal [2, 3] + range.filter (Filter_Condition.Not_In [7, 3, 2]) . should_equal [1, 4, 5] Test.expect_panic_with (range.filter (Filter_Condition.Starts_With "a")) No_Such_Method_Error_Data Test.expect_panic_with (range.filter (Filter_Condition.Like "a%")) Unsupported_Argument_Types_Data diff --git a/test/Tests/src/Data/Vector_Spec.enso b/test/Tests/src/Data/Vector_Spec.enso index 8c770d72c4..c576e87e4c 100644 --- a/test/Tests/src/Data/Vector_Spec.enso +++ b/test/Tests/src/Data/Vector_Spec.enso @@ -151,6 +151,9 @@ spec = Test.group "Vectors" <| vec.filter (Filter_Condition.Equal_Or_Less than=(-1)) . should_equal [] vec.filter (Filter_Condition.Between 2 4) . should_equal [2, 3, 4] vec.filter (Filter_Condition.Between 2.1 4.5) . should_equal [3, 4] + vec.filter (Filter_Condition.Is_In [7, 3, 2, 2, 2]) . should_equal [2, 3] + vec.filter (Filter_Condition.Is_In []) . should_equal [] + vec.filter (Filter_Condition.Not_In [7, 3, 2, 2]) . should_equal [1, 4, 5] Test.expect_panic_with (vec.filter (Filter_Condition.Starts_With "a")) No_Such_Method_Error_Data vec.filter Filter_Condition.Is_True . should_equal [] @@ -167,6 +170,9 @@ spec = Test.group "Vectors" <| txtvec.filter (Filter_Condition.Greater than="b") . should_equal ["bbb", "cccc", "baaa", "ś"] txtvec.filter (Filter_Condition.Between "b" "c") . should_equal ["bbb", "baaa"] Test.expect_panic_with (txtvec.filter (Filter_Condition.Starts_With 42)) Unsupported_Argument_Types_Data + txtvec.filter Filter_Condition.Is_True . should_equal [] + txtvec.filter (Filter_Condition.Is_In [1, 2]) . should_equal [] + txtvec.filter (Filter_Condition.Is_In ["bbb", 's\u0301', "bbb", "FOOBAR"]) . should_equal ["bbb", "ś"] ["", Nothing, " ", "a"].filter (Filter_Condition.Is_Empty) . should_equal ["", Nothing] ["", Nothing, " ", "a"].filter (Filter_Condition.Not_Empty) . should_equal [" ", "a"]