From 817289606525cd2c57e1496cb11aa8a1d9b67ea5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 20 Oct 2023 15:18:53 +0200 Subject: [PATCH] Support `Previous_Value` in `fill_nothing` and `fill_missing` (#8105) - Adds `Previous_Value` to `fill_nothing` and `fill_empty`, as requested by #7192. --- CHANGELOG.md | 2 + .../Database/0.0.0-dev/src/Data/Column.enso | 29 ++++++--- .../Database/0.0.0-dev/src/Data/Table.enso | 22 ++++--- .../Table/0.0.0-dev/src/Data/Column.enso | 61 +++++++++++++------ .../Table/0.0.0-dev/src/Data/Constants.enso | 4 ++ .../Table/0.0.0-dev/src/Data/Table.enso | 22 ++++--- .../src/Internal/Widget_Helpers.enso | 10 +++ .../Standard/Table/0.0.0-dev/src/Main.enso | 2 + .../data/column/storage/BoolStorage.java | 33 ++++++++++ .../column/storage/MixedStorageFacade.java | 6 ++ .../column/storage/SpecializedStorage.java | 27 ++++++++ .../table/data/column/storage/Storage.java | 12 ++++ .../storage/numeric/AbstractLongStorage.java | 35 +++++++++++ .../column/storage/numeric/DoubleStorage.java | 35 +++++++++++ .../Missing_Values_Spec.enso | 41 ++++++++++++- .../table_test_helpers/ExplodingStorage.java | 6 ++ 16 files changed, 300 insertions(+), 47 deletions(-) create mode 100644 distribution/lib/Standard/Table/0.0.0-dev/src/Data/Constants.enso diff --git a/CHANGELOG.md b/CHANGELOG.md index 62d3a032c67..6e912dfd9e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -587,6 +587,7 @@ deriving column values in the GUI.][8005] - [Implemented `Table.expand_to_rows` for the in-memory backend.][8029] - [Added XML support for `.to Table` and `.expand_column`.][8083] +- [Added `Previous_Value` option to `fill_nothing` and `fill_empty`.][8105] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -837,6 +838,7 @@ [8005]: https://github.com/enso-org/enso/pull/8005 [8029]: https://github.com/enso-org/enso/pull/8029 [8083]: https://github.com/enso-org/enso/pull/8083 +[8105]: https://github.com/enso-org/enso/pull/8105 #### Enso Compiler diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index 61bddc9c43b..6a6aa5d4ad5 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -5,6 +5,7 @@ import Standard.Base.Internal.Rounding_Helpers from Standard.Base.Widget_Helpers import make_regex_text_widget import Standard.Table.Data.Column.Column as Materialized_Column +import Standard.Table.Data.Constants.Previous_Value import Standard.Table.Data.Type.Enso_Types import Standard.Table.Data.Type.Value_Type_Helpers import Standard.Table.Internal.Column_Naming_Helper.Column_Naming_Helper @@ -1008,7 +1009,10 @@ type Column Arguments: - default: The value to replace missing values with. If this argument is a column, the value from `default` at the corresponding position - will be used. + will be used. If this argument is `Previous_Value`, the missing values + will be replaced with the previous value in the column. Note that the + first rows may stay `Nothing` if they do not have a previous value to + use. > Example Fill missing values in a column with the value 20.5. @@ -1016,12 +1020,14 @@ type Column import Standard.Examples example_fill_nothing = Examples.decimal_column.fill_nothing 20.5 - fill_nothing : Column | Any -> Column + @default (Widget_Helpers.make_fill_default_value_selector include_custom_text=False) + fill_nothing : Column | Previous_Value | Any -> Column fill_nothing self default = - common_type = Value_Type_Helpers.find_common_type_for_arguments [self, default] - common_type.if_not_error <| - op_result = self.make_binary_op "FILL_NULL" default self.name - adapt_unified_column op_result common_type + if Previous_Value == default then Error.throw (Unsupported_Database_Operation.Error "The Previous_Value argument is currently not supported in the database backend.") else + common_type = Value_Type_Helpers.find_common_type_for_arguments [self, default] + common_type.if_not_error <| + op_result = self.make_binary_op "FILL_NULL" default self.name + adapt_unified_column op_result common_type ## ALIAS fill empty, if_empty GROUP Standard.Base.Values @@ -1032,7 +1038,9 @@ type Column Arguments: - default: The value to replace empty values with. If this argument is a column, the value from `default` at the corresponding position - will be used. + will be used. If this argument is `Previous_Value`, the empty values + will be replaced with the previous value in the column. Note that the + first rows may stay empty if they do not have a previous value to use. > Example Fill empty values in a column with the value "hello". @@ -1040,10 +1048,11 @@ type Column import Standard.Examples example_fill_empty = Examples.text_column_1.fill_empty "hello" - fill_empty : Column | Any -> Column + @default (Widget_Helpers.make_fill_default_value_selector include_custom_text=True) + fill_empty : Column | Previous_Value | Any -> Column fill_empty self default = - Value_Type.expect_text self <| - Value_Type.expect_text default <| + if Previous_Value == default then Error.throw (Unsupported_Database_Operation.Error "The Previous_Value argument is currently not supported in the database backend.") else + Value_Type.expect_text self <| Value_Type.expect_text default <| result = self.is_empty.iif default self result.rename self.name diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 5720fd258ef..5139c3d69df 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -13,6 +13,7 @@ from Standard.Base.Widget_Helpers import make_delimiter_selector import Standard.Table.Data.Calculations.Column_Operation.Column_Operation import Standard.Table.Data.Column_Ref.Column_Ref +import Standard.Table.Data.Constants.Previous_Value import Standard.Table.Data.Expression.Expression import Standard.Table.Data.Expression.Expression_Error import Standard.Table.Data.Join_Condition.Join_Condition @@ -2368,16 +2369,19 @@ type Table match names, or a Vector of these. - default: The value to replace missing values with. If this argument is a column, the value from `default` at the corresponding position - will be used. + will be used. If this argument is `Previous_Value`, the missing values + will be replaced with the previous value in the column. Note that the + first rows may stay `Nothing` if they do not have a previous value to + use. > Example Fill missing values in two columns with the value 20.5. fill_nothing = table.fill_nothing ["col0", "col1"] 20.5 @columns Widget_Helpers.make_column_name_vector_selector - @default Widget_Helpers.make_column_ref_by_name_selector - fill_nothing : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Any -> Table - fill_nothing self (columns : Vector | Text | Integer | Regex) (default : Column | Column_Ref | Any) = + @default (self -> Widget_Helpers.make_fill_default_value_selector column_source=self include_custom_text=False) + fill_nothing : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Previous_Value | Any -> Table + fill_nothing self (columns : Vector | Text | Integer | Regex) default = resolved_default = (self:Table_Ref).resolve default transformer col = col.fill_nothing resolved_default Table_Helpers.replace_columns_with_transformed_columns self columns transformer @@ -2393,16 +2397,18 @@ type Table match names, or a Vector of these. - default: The value to replace empty values with. If this argument is a column, the value from `default` at the corresponding position - will be used. + will be used. If this argument is `Previous_Value`, the empty values + will be replaced with the previous value in the column. Note that the + first rows may stay empty if they do not have a previous value to use. > Example Fill empty values in two columns with the value "hello". fill_empty = table.fill_empty ["col0", "col1"] "hello" @columns Widget_Helpers.make_column_name_vector_selector - @default Widget_Helpers.make_column_ref_or_text_value_selector - fill_empty : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Any -> Table - fill_empty self (columns : Vector | Text | Integer | Regex) (default : Column | Column_Ref | Any) = + @default (self -> Widget_Helpers.make_fill_default_value_selector column_source=self include_custom_text=True) + fill_empty : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Previous_Value | Any -> Table + fill_empty self (columns : Vector | Text | Integer | Regex) default = resolved_default = (self:Table_Ref).resolve default transformer col = col.fill_empty resolved_default Table_Helpers.replace_columns_with_transformed_columns self columns transformer diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index 2260cbd6128..ce37abd9724 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -11,6 +11,7 @@ import Standard.Base.Internal.Polyglot_Helpers import Standard.Base.Internal.Rounding_Helpers from Standard.Base.Widget_Helpers import make_regex_text_widget +import project.Data.Constants.Previous_Value import project.Data.Data_Formatter.Data_Formatter import project.Data.Table.Table import project.Data.Type.Enso_Types @@ -1105,7 +1106,10 @@ type Column Arguments: - default: The value to replace missing values with. If this argument is a column, the value from `default` at the corresponding position - will be used. + will be used. If this argument is `Previous_Value`, the missing values + will be replaced with the previous value in the column. Note that the + first rows may stay `Nothing` if they do not have a previous value to + use. > Example Fill missing values in a column with the value 20.5. @@ -1113,20 +1117,22 @@ type Column import Standard.Examples example_fill_nothing = Examples.decimal_column.fill_nothing 20.5 - fill_nothing : Column | Any -> Column + @default (Widget_Helpers.make_fill_default_value_selector include_custom_text=False) + fill_nothing : Column | Previous_Value | Any -> Column fill_nothing self default = - common_type = Value_Type_Helpers.find_common_type_for_arguments [self, default] - common_type.if_not_error <| - storage = self.java_column.getStorage - storage_type = Storage.from_value_type_strict common_type - new_storage = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator-> - case default of - Column.Value java_col -> - other_storage = java_col.getStorage - storage.fillMissingFrom other_storage storage_type java_problem_aggregator - _ -> - storage.fillMissing default storage_type java_problem_aggregator - Column.Value (Java_Column.new self.name new_storage) + if Previous_Value == default then fill_previous self Nothing else + common_type = Value_Type_Helpers.find_common_type_for_arguments [self, default] + common_type.if_not_error <| + storage = self.java_column.getStorage + storage_type = Storage.from_value_type_strict common_type + new_storage = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator-> + case default of + Column.Value java_col -> + other_storage = java_col.getStorage + storage.fillMissingFrom other_storage storage_type java_problem_aggregator + _ -> + storage.fillMissing default storage_type java_problem_aggregator + Column.Value (Java_Column.new self.name new_storage) ## ALIAS fill empty, if_empty GROUP Standard.Base.Values @@ -1137,7 +1143,9 @@ type Column Arguments: - default: The value to replace empty values with. If this argument is a column, the value from `default` at the corresponding position - will be used. + will be used. If this argument is `Previous_Value`, the empty values + will be replaced with the previous value in the column. Note that the + first rows may stay empty if they do not have a previous value to use. > Example Fill empty values in a column with the value "hello". @@ -1145,12 +1153,14 @@ type Column import Standard.Examples example_fill_empty = Examples.text_column_1.fill_empty "hello" - fill_empty : Column | Any -> Column + @default (Widget_Helpers.make_fill_default_value_selector include_custom_text=True) + fill_empty : Column | Previous_Value | Any -> Column fill_empty self default = Value_Type.expect_text self <| - Value_Type.expect_text default <| - result = self.is_empty.iif default self - result.rename self.name + if Previous_Value == default then fill_previous self self.is_empty else + Value_Type.expect_text default <| + result = self.is_empty.iif default self + result.rename self.name ## GROUP Standard.Base.Text Checks for each element of the column if it starts with `other`. @@ -2527,6 +2537,19 @@ naming_helper = Column_Naming_Helper.in_memory Resolves the default date period for `date_add` depending on the source column value type. default_date_period column = if column.value_type.has_date then Date_Period.Day else Time_Period.Hour +## PRIVATE + Fills the missing values in a provided column with the previous non-missing value. + + Arguments: + - column: The column to fill. + - is_missing: A boolean column specifying which elements are deemed missing. + If set to `Nothing`, this will rely on the default missing value semantics + (`is_nothing`). +fill_previous column is_missing = + missing_storage = if Nothing == is_missing then Nothing else is_missing.java_column.getStorage + new_storage = column.java_column.getStorage.fillMissingFromPrevious missing_storage + Column.from_storage column.name new_storage + ## PRIVATE Conversion method to a Column from a Vector. Column.from (that:Vector) (name:Text="Vector") = Column.from_vector name that diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Constants.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Constants.enso new file mode 100644 index 00000000000..e2b990ee3b8 --- /dev/null +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Constants.enso @@ -0,0 +1,4 @@ +## Indicates that the operation should use the previous non-missing value to + when filling in missing values, for example in `fill_nothing` and + `fill_empty`. +type Previous_Value diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 11f17279f4a..229495bf2da 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -18,6 +18,7 @@ import project.Data.Calculations.Column_Operation.Column_Operation import project.Data.Column as Column_Module import project.Data.Column.Column import project.Data.Column_Ref.Column_Ref +import project.Data.Constants.Previous_Value import project.Data.Data_Formatter.Data_Formatter import project.Data.Expression.Expression import project.Data.Expression.Expression_Error @@ -2371,16 +2372,19 @@ type Table match names, or a Vector of these. - default: The value to replace missing values with. If this argument is a column, the value from `default` at the corresponding position - will be used. + will be used. If this argument is `Previous_Value`, the missing values + will be replaced with the previous value in the column. Note that the + first rows may stay `Nothing` if they do not have a previous value to + use. > Example Fill missing values in two columns with the value 20.5. fill_nothing = table.fill_nothing ["col0", "col1"] 20.5 @columns Widget_Helpers.make_column_name_vector_selector - @default Widget_Helpers.make_column_ref_by_name_selector - fill_nothing : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Any -> Table - fill_nothing self (columns : Vector | Text | Integer | Regex) (default : Column | Column_Ref | Any) = + @default (self -> Widget_Helpers.make_fill_default_value_selector column_source=self include_custom_text=False) + fill_nothing : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Previous_Value | Any -> Table + fill_nothing self (columns : Vector | Text | Integer | Regex) default = resolved_default = (self:Table_Ref).resolve default transformer col = col.fill_nothing resolved_default Table_Helpers.replace_columns_with_transformed_columns self columns transformer @@ -2396,16 +2400,18 @@ type Table match names, or a Vector of these. - default: The value to replace empty values with. If this argument is a column, the value from `default` at the corresponding position - will be used. + will be used. If this argument is `Previous_Value`, the empty values + will be replaced with the previous value in the column. Note that the + first rows may stay empty if they do not have a previous value to use. > Example Fill empty values in two columns with the value "hello". fill_empty = table.fill_empty ["col0", "col1"] "hello" @columns Widget_Helpers.make_column_name_vector_selector - @default Widget_Helpers.make_column_ref_or_text_value_selector - fill_empty : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Any -> Table - fill_empty self (columns : Vector | Text | Integer | Regex) (default : Column | Column_Ref | Any) = + @default (self -> Widget_Helpers.make_fill_default_value_selector column_source=self include_custom_text=True) + fill_empty : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Previous_Value | Any -> Table + fill_empty self (columns : Vector | Text | Integer | Regex) default = resolved_default = (self:Table_Ref).resolve default transformer col = col.fill_empty resolved_default Table_Helpers.replace_columns_with_transformed_columns self columns transformer diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso index bed1a1da00b..ab0249e1f69 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso @@ -96,6 +96,16 @@ make_column_ref_or_text_value_selector table display=Display.Always = custom_text_option = Option "'custom text'" "''" Single_Choice values=(col_names_options+[custom_text_option]) display=display +## PRIVATE + If `column_source` is Nothing, `Column_Ref` options will not be added. +make_fill_default_value_selector : Table | Nothing -> Boolean -> Display -> Widget +make_fill_default_value_selector column_source=Nothing include_custom_text=False display=Display.Always = + col_names_options = if column_source.is_nothing then [] else + column_source.column_names.map (name -> Option name "(Column_Ref.Name "+name.pretty+")") + custom_text_option = if include_custom_text then [Option "'custom text'" "''"] else [] + previous_value_option = [Option 'Previous Value' 'Previous_Value'] + Single_Choice values=(previous_value_option+col_names_options+custom_text_option) display=display + ## PRIVATE Make a filter condition selector. make_filter_condition_selector : Table -> Display -> Widget diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso index bf27cb3fbc4..16711b8c2e4 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso @@ -23,6 +23,7 @@ import project.Excel.Excel_Range.Excel_Range import project.Excel.Excel_Section.Excel_Section import project.Excel.Excel_Workbook.Excel_Workbook import project.Extensions.Prefix_Name.Prefix_Name +from project.Data.Constants import all from project.Delimited.Delimited_Format.Delimited_Format import Delimited from project.Excel.Excel_Format.Excel_Format import Excel from project.Excel.Excel_Section.Excel_Section import Cell_Range, Range_Names, Sheet_Names, Worksheet @@ -51,6 +52,7 @@ export project.Excel.Excel_Range.Excel_Range export project.Excel.Excel_Section.Excel_Section export project.Excel.Excel_Workbook.Excel_Workbook export project.Extensions.Prefix_Name.Prefix_Name +from project.Data.Constants export all from project.Delimited.Delimited_Format.Delimited_Format export Delimited from project.Excel.Excel_Format.Excel_Format export Excel from project.Excel.Excel_Section.Excel_Section export Cell_Range, Range_Names, Sheet_Names, Worksheet diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java index 6368f5bcef8..dd9cbbfa679 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java @@ -141,6 +141,39 @@ public final class BoolStorage extends Storage { } } + @Override + public Storage fillMissingFromPrevious(BoolStorage missingIndicator) { + if (missingIndicator != null) { + throw new IllegalStateException("Custom missing value semantics are not supported by BoolStorage."); + } + + boolean previousValue = false; + boolean hasPrevious = false; + BitSet newMissing = new BitSet(); + BitSet newValues = new BitSet(); + + Context context = Context.getCurrent(); + for (int i = 0; i < size; i++) { + boolean isCurrentValueMissing = isMissing.get(i); + if (isCurrentValueMissing) { + if (hasPrevious) { + newValues.set(i, previousValue); + } else { + newMissing.set(i); + } + } else { + boolean currentValue = getItem(i); + newValues.set(i, currentValue); + previousValue = currentValue; + hasPrevious = true; + } + + context.safepoint(); + } + + return new BoolStorage(newValues, newMissing, size, false); + } + @Override public BoolStorage mask(BitSet mask, int cardinality) { Context context = Context.getCurrent(); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorageFacade.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorageFacade.java index acfca79d368..054125816b8 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorageFacade.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorageFacade.java @@ -84,6 +84,12 @@ public class MixedStorageFacade extends Storage { return underlyingStorage.runVectorizedZip(name, argument, problemAggregator); } + @Override + public Storage fillMissingFromPrevious(BoolStorage missingIndicator) { + Storage newStorage = underlyingStorage.fillMissingFromPrevious(missingIndicator); + return new MixedStorageFacade(newStorage); + } + @Override public Storage mask(BitSet mask, int cardinality) { Storage newStorage = underlyingStorage.mask(mask, cardinality); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java index 49bb12e3893..203ad4ee398 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java @@ -184,6 +184,33 @@ public abstract class SpecializedStorage extends Storage { return newInstance(newData, size + count); } + @Override + public Storage fillMissingFromPrevious(BoolStorage missingIndicator) { + if (missingIndicator != null && missingIndicator.countMissing() > 0) { + throw new IllegalArgumentException( + "Missing indicator must not contain missing values itself."); + } + + T[] newData = newUnderlyingArray(size); + T previous = null; + boolean hasPrevious = false; + + Context context = Context.getCurrent(); + for (int i = 0; i < size; i++) { + boolean isCurrentValueMissing = + missingIndicator == null ? isNa(i) : missingIndicator.getItem(i); + if (!isCurrentValueMissing) { + previous = data[i]; + hasPrevious = true; + } + + newData[i] = hasPrevious ? previous : data[i]; + context.safepoint(); + } + + return newInstance(newData, size); + } + @Override public List toList() { return new ReadOnlyList<>(this); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java index dcb1d58ddc0..29c045cf1fa 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java @@ -451,6 +451,18 @@ public abstract class Storage { return builder.seal(); } + /** + * Fills missing values with a previous non-missing value. + * + *

+ * + * @param missingIndicator Specifies which values should be considered missing. It can be used to + * implement custom missing value semantics, like `fill_empty`. It can be set to {@code null} + * to just rely on the default semantics of missing values. Some storages may not allow + * customizing the semantics. + */ + public abstract Storage fillMissingFromPrevious(BoolStorage missingIndicator); + /** * Return a new storage, containing only the items marked true in the mask. * diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/AbstractLongStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/AbstractLongStorage.java index c398a8d3cd9..9012612ede2 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/AbstractLongStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/AbstractLongStorage.java @@ -181,6 +181,41 @@ public abstract class AbstractLongStorage extends NumericStorage { return ops; } + @Override + public AbstractLongStorage fillMissingFromPrevious(BoolStorage missingIndicator) { + if (missingIndicator != null) { + throw new IllegalStateException( + "Custom missing value semantics are not supported by AbstractLongStorage."); + } + + int n = size(); + long[] newData = new long[n]; + BitSet newMissing = new BitSet(); + long previousValue = 0; + boolean hasPrevious = false; + + Context context = Context.getCurrent(); + for (int i = 0; i < n; i++) { + boolean isCurrentMissing = isNa(i); + if (isCurrentMissing) { + if (hasPrevious) { + newData[i] = previousValue; + } else { + newMissing.set(i); + } + } else { + long currentValue = getItem(i); + newData[i] = currentValue; + previousValue = currentValue; + hasPrevious = true; + } + + context.safepoint(); + } + + return new LongStorage(newData, n, newMissing, getType()); + } + /** * Return an instance of storage containing the same data but with a wider type. * diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/DoubleStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/DoubleStorage.java index e3c9c8ea8cb..bd2e9383ccb 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/DoubleStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/DoubleStorage.java @@ -213,6 +213,41 @@ public final class DoubleStorage extends NumericStorage implements Doubl return super.fillMissing(arg, commonType, problemAggregator); } + @Override + public DoubleStorage fillMissingFromPrevious(BoolStorage missingIndicator) { + if (missingIndicator != null) { + throw new IllegalStateException( + "Custom missing value semantics are not supported by DoubleStorage."); + } + + int n = size(); + long[] newData = new long[n]; + BitSet newMissing = new BitSet(); + long previousValueRaw = 0; + boolean hasPrevious = false; + + Context context = Context.getCurrent(); + for (int i = 0; i < n; i++) { + boolean isCurrentMissing = isNa(i); + if (isCurrentMissing) { + if (hasPrevious) { + newData[i] = previousValueRaw; + } else { + newMissing.set(i); + } + } else { + long currentValueRaw = data[i]; + newData[i] = currentValueRaw; + previousValueRaw = currentValueRaw; + hasPrevious = true; + } + + context.safepoint(); + } + + return new DoubleStorage(newData, n, newMissing); + } + @Override public Storage mask(BitSet mask, int cardinality) { BitSet newMissing = new BitSet(); diff --git a/test/Table_Tests/src/Common_Table_Operations/Missing_Values_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Missing_Values_Spec.enso index 6c2a8d66d8f..51bd2fd7cf9 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Missing_Values_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Missing_Values_Spec.enso @@ -1,6 +1,6 @@ from Standard.Base import all -from Standard.Table import Value_Type, Column_Ref +from Standard.Table import Value_Type, Column_Ref, Previous_Value from Standard.Table.Data.Aggregate_Column.Aggregate_Column import Count_Distinct from Standard.Table.Errors import all @@ -271,10 +271,47 @@ spec setup = e.value_type.variable_length.should_be_true Test.specify "should allow setting a default column by reference" <| - t = table_builder [["A", ["x", "", Nothing]], ["B", ["a", "b", "c"]]] + t = table_builder [["A", ["x", "", Nothing]], ["B", ["a", "b", "c"]], ["C", [Nothing, Nothing, "ZZZ"]], ["D", [Nothing, "2", "3"]]] t1 = t.fill_nothing "A" (Column_Ref.Name "B") t1.at "A" . to_vector . should_equal ["x", "", "c"] t2 = t.fill_empty "A" (Column_Ref.Name "B") t2.at "A" . to_vector . should_equal ["x", "b", "c"] + + t3 = t.fill_nothing ["A", "C"] (Column_Ref.Name "D") + t3.at "A" . to_vector . should_equal ["x", "", "3"] + t3.at "B" . to_vector . should_equal ["a", "b", "c"] + t3.at "C" . to_vector . should_equal [Nothing, "2", "ZZZ"] + t3.at "D" . to_vector . should_equal [Nothing, "2", "3"] + + if setup.is_database.not then Test.specify "should allow filling rows with previous value" <| + t = table_builder [["A", ["a", "", Nothing, Nothing, "", "b", "c", Nothing]]] + + t1 = t.fill_nothing "A" Previous_Value + t1.at "A" . to_vector . should_equal ["a", "", "", "", "", "b", "c", "c"] + + t2 = t.fill_empty "A" Previous_Value + t2.at "A" . to_vector . should_equal ["a", "a", "a", "a", "a", "b", "c", "c"] + + # May still keep Nothing/empty if there is no previous value + t3 = table_builder [["B", [Nothing, Nothing, "", Nothing, "a", Nothing]]] + t3.fill_nothing "B" Previous_Value . at "B" . to_vector . should_equal [Nothing, Nothing, "", "", "a", "a"] + t3.fill_empty "B" Previous_Value . at "B" . to_vector . should_equal [Nothing, Nothing, "", Nothing, "a", "a"] + + # Will work on multiple columns + t4 = table_builder [["A", ["a", "b", Nothing, Nothing]], ["B", [Nothing, 42, Nothing, 123]], ["C", ["", "foo", Nothing, "bar"]], ["D", [True, Nothing, False, Nothing]], ["E", [Nothing, Date.new 2001, Nothing, Nothing]], ["F", [2, Nothing, "a", Nothing]]] + t5 = t4.fill_nothing ["A", "B", "D", "E", "F"] Previous_Value + t5.at "A" . to_vector . should_equal ["a", "b", "b", "b"] + t5.at "B" . to_vector . should_equal [Nothing, 42, 42, 123] + t5.at "D" . to_vector . should_equal [True, True, False, False] + t5.at "E" . to_vector . should_equal [Nothing, Date.new 2001, Date.new 2001, Date.new 2001] + t5.at "F" . to_vector . should_equal [2, 2, "a", "a"] + + # C is unchanged + t5.at "C" . to_vector . should_equal ["", "foo", Nothing, "bar"] + + if setup.is_database then Test.specify "will for now report that Previous_Value is not supported" <| + t = table_builder [["A", ["a", "", Nothing, Nothing, "", "b", "c", Nothing]]] + t.fill_nothing "A" Previous_Value . should_fail_with Unsupported_Database_Operation + t.fill_empty "A" Previous_Value . should_fail_with Unsupported_Database_Operation diff --git a/test/Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/ExplodingStorage.java b/test/Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/ExplodingStorage.java index c7e325cf11f..56ca0db879a 100644 --- a/test/Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/ExplodingStorage.java +++ b/test/Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/ExplodingStorage.java @@ -3,6 +3,7 @@ package org.enso.table_test_helpers; import java.util.BitSet; import java.util.List; import org.enso.table.data.column.operation.map.MapOperationProblemAggregator; +import org.enso.table.data.column.storage.BoolStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.IntegerType; import org.enso.table.data.column.storage.type.StorageType; @@ -87,6 +88,11 @@ public class ExplodingStorage extends Storage { return null; } + @Override + public Storage fillMissingFromPrevious(BoolStorage missingIndicator) { + return null; + } + @Override public Storage mask(BitSet mask, int cardinality) { return null;