Add update mode to Running (#11045)

2024-11-22 22:10:15 +03:00 · 2024-09-11 15:41:33 +01:00 · 2024-09-11 15:41:33 +01:00 · a666585afe
commit a666585afe
parent 97a1628017
5 changed files with 116 additions and 49 deletions
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Table.enso
@ -2933,6 +2933,8 @@ type DB_Table
       - statistic: The running statistic to calculate.
       - of: The existing column to run the statistic over.
       - as: The name of the new column.
+       - set_mode: Specifies the expected behaviour in regards to existing
+         column with the same name.
       - group_by: Specifies the columns to group by. The running statistic is
         calculated separately for each group. By default, all rows are treated as
         a single group.
@ -2959,9 +2961,9 @@ type DB_Table
    @group_by Widget_Helpers.make_column_name_multi_selector
    @order_by Widget_Helpers.make_order_by_selector
    @of Widget_Helpers.make_column_name_selector
-    running : Statistic -> (Text | Integer) -> Text -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
-    running self (statistic:Statistic=..Count) (of:(Text | Integer)=0) (as:Text='') (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
-        _ = [statistic, of, as, group_by, order_by, on_problems]
+    running : Statistic -> (Text | Integer) -> Text -> Set_Mode -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
+    running self (statistic:Statistic=..Count) (of:(Text | Integer)=0) (as:Text='') (set_mode:Set_Mode=..Add) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
+        _ = [statistic, of, as, set_mode, group_by, order_by, on_problems]
        Error.throw (Unsupported_Database_Operation.Error "DB_Table.running is currently not implemented for the Database backend. You may download the table to memory using `.read` to use this feature.")


--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Add_Row_Number.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Add_Row_Number.enso
@ -26,20 +26,23 @@ add_row_number table name from step group_by order_by on_problems:Problem_Behavi
        Error.throw (Illegal_Argument.Error "The row number has exceeded the 64-bit integer range. BigInteger numbering is currently not supported. Please use a smaller start/step.")

    problem_builder.attach_problems_before on_problems <| Panic.catch ArithmeticException handler=handle_arithmetic_exception <| Panic.catch Unsupported_Argument_Types handler=handle_arithmetic_exception <|
-        no_order_no_group = grouping_columns.is_empty && ordering.is_empty
-        new_column = case no_order_no_group of
-            True -> make_range_column name from step table.row_count
-            False ->
-                ordering_columns = ordering.map c->c.column.java_column
-                directions = ordering.map c->c.associated_selector.direction.to_sign
-                grouping_java_columns = grouping_columns.map c->c.java_column
-                new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
-                    AddRowNumber.create_numbering from step grouping_java_columns ordering_columns directions java_problem_aggregator
-                Column.from_storage name new_storage
-
+        new_column = create_column table name from step grouping_columns ordering on_problems
        renamed_table = rename_columns_if_needed table name on_problems Table.new
        renamed_table.set new_column name set_mode=Set_Mode.Add

+## PRIVATE
+create_column table name from step grouping_columns ordering on_problems =
+    no_order_no_group = grouping_columns.is_empty && ordering.is_empty
+    case no_order_no_group of
+        True -> make_range_column name from step table.row_count
+        False ->
+            ordering_columns = ordering.map c->c.column.java_column
+            directions = ordering.map c->c.associated_selector.direction.to_sign
+            grouping_java_columns = grouping_columns.map c->c.java_column
+            new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
+                AddRowNumber.create_numbering from step grouping_java_columns ordering_columns directions java_problem_aggregator
+            Column.from_storage name new_storage
+
 ## PRIVATE
   If the table already contains a column called `name` it will be renamed to a
   unique name, so that a new column with this name can be added.
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Add_Running.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Add_Running.enso
@ -1,3 +1,5 @@
+private
+
 from Standard.Base import all
 import Standard.Base.Errors.Common.Unsupported_Argument_Types
 import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
@ -19,27 +21,31 @@ polyglot java import org.enso.table.data.column.storage.numeric.LongRangeStorage
 polyglot java import org.enso.table.operations.AddRunning

 ## PRIVATE
-add_running : Table -> Statistic -> (Text|Integer) -> Text -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
-add_running table (statistic:Statistic=Statistic.Count) (of:Text|Integer=0) (as:Text='') (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
+add_running : Table -> Statistic -> (Text|Integer) -> Text -> Set_Mode -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
+add_running table (statistic:Statistic=Statistic.Count) (of:Text|Integer=0) (as:Text='') (set_mode:Set_Mode=..Add) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
    check_running_support [statistic] <|
        of_col = table.at of
-        new_name = if as.is_empty then 'Running ' + statistic.to_text + ' of ' + of_col.name else as   
-        case statistic of
-            Statistic.Count ->
-                Add_Row_Number.add_row_number table new_name 1 1 group_by order_by on_problems
+        new_name = case as.is_empty of
+            False -> as
+            True -> case set_mode of
+                Set_Mode.Update -> of_col.name
+                _ ->  'Running ' + statistic.to_text + ' of ' + of_col.name
+
+        problem_builder = Problem_Builder.new error_on_missing_columns=True
+        grouping_columns = table.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder 
+        ordering = Table_Helpers.resolve_order_by table.columns order_by problem_builder
+        source_java_column = of_col.java_column
+        grouping_java_columns = grouping_columns.map c->c.java_column
+        ordering_java_columns = ordering.map c->
+            c.column.java_column
+        directions = ordering.map c->
+            c.associated_selector.direction.to_sign
+        new_column = case statistic of
+            Statistic.Count -> 
+                Add_Row_Number.create_column table new_name from=1 step=1 grouping_columns ordering on_problems
            _ ->
                Value_Type.expect_numeric of_col <|
-                    problem_builder = Problem_Builder.new error_on_missing_columns=True
-                    grouping_columns = table.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder 
-                    ordering = Table_Helpers.resolve_order_by table.columns order_by problem_builder
-                    source_java_column = of_col.java_column
-                    grouping_java_columns = grouping_columns.map c->c.java_column
-                    ordering_java_columns = ordering.map c->
-                        c.column.java_column
-                    directions = ordering.map c->
-                        c.associated_selector.direction.to_sign
-
-                    Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
-                        new_storage = AddRunning.create_running statistic.to_java source_java_column grouping_java_columns ordering_java_columns directions java_problem_aggregator
-                        new_column = Column.from_storage new_name new_storage
-                        table.set new_column new_name set_mode=Set_Mode.Add
+                    new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
+                        AddRunning.create_running statistic.to_java source_java_column grouping_java_columns ordering_java_columns directions java_problem_aggregator
+                    Column.from_storage new_name new_storage
+        table.set new_column new_name set_mode
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso
@ -3603,6 +3603,8 @@ type Table
       - statistic: The running statistic to calculate.
       - of: The existing column to run the statistic over.
       - as: The name of the new column.
+       - set_mode: Specifies the expected behaviour in regards to existing
+         column with the same name.
       - group_by: Specifies the columns to group by. The running statistic is
         calculated separately for each group. By default, all rows are treated as
         a single group.
@ -3629,9 +3631,9 @@ type Table
    @group_by Widget_Helpers.make_column_name_multi_selector
    @order_by Widget_Helpers.make_order_by_selector
    @of Widget_Helpers.make_column_name_selector
-    running : Statistic -> (Text | Integer) -> Text -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
-    running self (statistic:Statistic=..Count) (of:(Text | Integer)=0) (as:Text='') (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
-        Add_Running.add_running self statistic of as group_by order_by on_problems
+    running : Statistic -> (Text | Integer) -> Text -> Set_Mode -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
+    running self (statistic:Statistic=..Count) (of:(Text | Integer)=0) (as:Text='') (set_mode:Set_Mode=..Add) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
+        Add_Running.add_running self statistic of as set_mode group_by order_by on_problems

    ## PRIVATE
    column_naming_helper : Column_Naming_Helper
--- a/test/Table_Tests/src/In_Memory/Table_Running_Spec.enso
+++ b/test/Table_Tests/src/In_Memory/Table_Running_Spec.enso
@ -67,8 +67,35 @@ add_specs suite_builder =
            # 4 | SG0456 | E         | 73.77        | 5
            expected_table = data.table.zip expected_column
            result.should_equal expected_table
+        group_builder.specify "Setting the name the same as an existing column errors if update_mode not changed" <|
+            result = data.table.running Statistic.Count "Passenger" "Passenger"
+            result.should_fail_with (Existing_Column.Error 'Passenger')
+        group_builder.specify "Setting the name the same as an existing column works if update_mode update" <|
+            result = data.table.running Statistic.Count "Passenger" "Passenger" set_mode=..Update
+            expected_column = Column.from_vector "My Custom Name" [1, 2, 3, 4, 5]
+            #   | Flight | Passenger | Ticket Price 
+            #---+--------+-----------+--------------
+            # 0 | BA0123 | 1         | 100.5        
+            # 1 | BA0123 | 2         | 575.99       
+            # 2 | SG0456 | 3         | 73.23        
+            # 3 | BA0123 | 4         | 112.34       
+            # 4 | SG0456 | 5         | 73.77        
+            expected_table = data.table.set expected_column "Passenger"
+            result.should_equal expected_table
+        group_builder.specify "Not setting the name updates the first column if update_mode update" <|
+            result = data.table.running set_mode=..Update
+            expected_column = Column.from_vector "Flight" [1, 2, 3, 4, 5]
+            #   | Flight | Passenger | Ticket Price 
+            #---+--------+-----------+--------------
+            # 0 | 1      | A         | 100.5        
+            # 1 | 2      | B         | 575.99       
+            # 2 | 3      | A         | 73.23        
+            # 3 | 4      | C         | 112.34       
+            # 4 | 5      | E         | 73.77        
+            expected_table = data.table.set expected_column
+            result.should_equal expected_table
        group_builder.specify "Can group by and provide running count per group" <|
-            result = data.table.running Statistic.Count "Passenger" "Passenger num per flight" ["Flight"]
+            result = data.table.running Statistic.Count "Passenger" "Passenger num per flight" group_by=["Flight"]
            expected_column = Column.from_vector "Passenger num per flight" [1, 2, 1, 3, 2]
            #   | Flight | Passenger | Ticket Price | Passenger num per flight
            #---+--------+-----------+--------------+-------------------------
@ -80,7 +107,7 @@ add_specs suite_builder =
            expected_table = data.table.zip expected_column
            result.should_equal expected_table
        group_builder.specify "Can group by and provide running count per group based on order by" <|
-            result = data.table.running Statistic.Count "Passenger" "Ranked ticket cost per pass" ["Passenger"] ["Ticket Price"]
+            result = data.table.running Statistic.Count "Passenger" "Ranked ticket cost per pass" group_by=["Passenger"] order_by=["Ticket Price"]
            expected_column = Column.from_vector "Ranked ticket cost per pass" [2, 1, 1, 1, 1]
            #   | Flight | Passenger | Ticket Price | Ranked ticket cost per pass
            #---+--------+-----------+--------------+-------------------------
@ -92,7 +119,7 @@ add_specs suite_builder =
            expected_table = data.table.zip expected_column
            result.should_equal expected_table
        group_builder.specify "Can provide running count based on order by without grouping" <|
-            result = data.table.running Statistic.Count "Passenger" "Ranked ticket cost" [] ["Ticket Price"]
+            result = data.table.running Statistic.Count "Passenger" "Ranked ticket cost" order_by=["Ticket Price"]
            expected_column = Column.from_vector "Ranked ticket cost" [3, 5, 1, 4, 2]
            #   | Flight | Passenger | Ticket Price | Ranked ticket cost
            #---+--------+-----------+--------------+-------------------------
@ -117,8 +144,35 @@ add_specs suite_builder =
            # 4 | SG0456 | E         | 73.77        | 935.83
            expected_table = data.table.zip expected_column
            result.should_equal expected_table
+        group_builder.specify "Setting the name the same as an existing column errors if update_mode not changed" <|
+            result = data.table.running Statistic.Sum "Ticket Price" "Ticket Price"
+            result.should_fail_with (Existing_Column.Error 'Ticket Price')
+        group_builder.specify "Setting the name the same as an existing column works if update_mode update" <|
+            result = data.table.running Statistic.Sum "Ticket Price" "Ticket Price" set_mode=..Update
+            expected_column = Column.from_vector "Ticket Price" [100.5, 676.49, 749.72, 862.0600000000001, 935.83]
+            #   | Flight | Passenger | Ticket Price
+            #---+--------+-----------+-------------------------
+            # 0 | BA0123 | A         | 100.5
+            # 1 | BA0123 | B         | 676.49
+            # 2 | SG0456 | A         | 749.72
+            # 3 | BA0123 | C         | 862.06
+            # 4 | SG0456 | E         | 935.83
+            expected_table = data.table.set expected_column "Ticket Price"
+            result.should_equal expected_table
+        group_builder.specify "Not setting the name overrides existing column if update_mode update" <|
+            result = data.table.running Statistic.Sum "Ticket Price" set_mode=..Update
+            expected_column = Column.from_vector "Ticket Price" [100.5, 676.49, 749.72, 862.0600000000001, 935.83]
+            #   | Flight | Passenger | Ticket Price
+            #---+--------+-----------+-------------------------
+            # 0 | BA0123 | A         | 100.5
+            # 1 | BA0123 | B         | 676.49
+            # 2 | SG0456 | A         | 749.72
+            # 3 | BA0123 | C         | 862.06
+            # 4 | SG0456 | E         | 935.83
+            expected_table = data.table.set expected_column "Ticket Price"
+            result.should_equal expected_table
        group_builder.specify "Can group by and provide running sum per group" <|
-            result = data.table.running Statistic.Sum "Ticket Price" "Running" ["Flight"]
+            result = data.table.running Statistic.Sum "Ticket Price" "Running" group_by=["Flight"]
            expected_column = Column.from_vector "Running" [100.5, 676.49, 73.23, 788.83, 147]
            #   | Flight | Passenger | Ticket Price | Running
            #---+--------+-----------+--------------+-------------------------
@ -130,7 +184,7 @@ add_specs suite_builder =
            expected_table = data.table.zip expected_column
            result.should_equal expected_table
        group_builder.specify "Can group by and provide running sum per group based on order by" <|
-            result = data.table.running Statistic.Sum "Ticket Price" "Sum ticket cost per pass" ["Passenger"] ["Ticket Price"]
+            result = data.table.running Statistic.Sum "Ticket Price" "Sum ticket cost per pass" group_by=["Passenger"] order_by=["Ticket Price"]
            expected_column = Column.from_vector "Sum ticket cost per pass" [173.73000000000002, 575.99, 73.23, 112.34, 73.77]
            #   | Flight | Passenger | Ticket Price | Sum ticket cost per pass
            #---+--------+-----------+--------------+-------------------------
@ -142,7 +196,7 @@ add_specs suite_builder =
            expected_table = data.table.zip expected_column
            result.should_equal expected_table
        group_builder.specify "Can provide running sum based on order by without grouping" <|
-            result = data.table.running Statistic.Sum "Ticket Price" "Sum ticket cost" [] ["Ticket Price"]
+            result = data.table.running Statistic.Sum "Ticket Price" "Sum ticket cost" order_by=["Ticket Price"]
            expected_column = Column.from_vector "Sum ticket cost" [247.5, 935.83, 73.23, 359.84000000000003, 147]
            #   | Flight | Passenger | Ticket Price | Sum ticket cost
            #---+--------+-----------+--------------+-------------------------
@ -441,7 +495,7 @@ add_specs suite_builder =
            warnings = Problems.get_attached_warnings result
            warnings.not_empty . should_be_false
        group_builder.specify "Running min ignores nothing values and works with grouping and warns" <|
-            result = table.running Statistic.Minimum "Ticket Price" "Running" ["Flight"]
+            result = table.running Statistic.Minimum "Ticket Price" "Running" group_by=["Flight"]
            expected_column = Column.from_vector "Running" [100.5, 100.5, Nothing, 100.5, 73.77]
            #   | Flight | Passenger | Ticket Price | Running
            #---+--------+-----------+--------------+-------------------------
@ -456,7 +510,7 @@ add_specs suite_builder =
            w.column.should_equal "Ticket Price"
            w.rows.should_equal [2, 3]
        group_builder.specify "Running max ignores nothing values and works with grouping and warns" <|
-            result = table.running Statistic.Maximum "Ticket Price" "Running" ["Flight"]
+            result = table.running Statistic.Maximum "Ticket Price" "Running" group_by=["Flight"]
            expected_column = Column.from_vector "Running" [100.5, 575.99, Nothing, 575.99, 73.77]
            #   | Flight | Passenger | Ticket Price | Running
            #---+--------+-----------+--------------+-------------------------
@ -486,7 +540,7 @@ add_specs suite_builder =
            w.column.should_equal "Ticket Price"
            w.rows.should_equal [2, 3]
        group_builder.specify "Running mean ignores nothing values and works when first value is Nothing and warns" <|
-            result = table.running Statistic.Mean "Ticket Price" "Running" ["Flight"]
+            result = table.running Statistic.Mean "Ticket Price" "Running" group_by=["Flight"]
            expected_column = Column.from_vector "Running" [100.5, 338.245, Nothing, 338.245, 73.77]
            #   | Flight | Passenger | Ticket Price | Running
            #---+--------+-----------+--------------+-------------------------
@ -556,7 +610,7 @@ add_specs suite_builder =
            warnings = Problems.get_attached_warnings result
            warnings.not_empty . should_be_false
        group_builder.specify "Running min ignores NaN values and works with grouping and warns" <|
-            result = table.running Statistic.Minimum "Ticket Price" "Running" ["Flight"]
+            result = table.running Statistic.Minimum "Ticket Price" "Running" group_by=["Flight"]
            expected_column = Column.from_vector "Running" [100.5, 100.5, Number.nan, 100.5, 73.77]
            #   | Flight | Passenger | Ticket Price | Running
            #---+--------+-----------+--------------+-------------------------
@ -571,7 +625,7 @@ add_specs suite_builder =
            w.column.should_equal "Ticket Price"
            w.rows.should_equal [2, 3]
        group_builder.specify "Running max ignores NaN values and works with grouping and warns" <|
-            result = table.running Statistic.Maximum "Ticket Price" "Running" ["Flight"]
+            result = table.running Statistic.Maximum "Ticket Price" "Running" group_by=["Flight"]
            expected_column = Column.from_vector "Running" [100.5, 575.99, Number.nan, 575.99, 73.77]
            #   | Flight | Passenger | Ticket Price | Running
            #---+--------+-----------+--------------+-------------------------
@ -601,7 +655,7 @@ add_specs suite_builder =
            w.column.should_equal "Ticket Price"
            w.rows.should_equal [2, 3]
        group_builder.specify "Running mean ignores NaN values and works when first value is NaN and warns" <|
-            result = table.running Statistic.Mean "Ticket Price" "Running" ["Flight"]
+            result = table.running Statistic.Mean "Ticket Price" "Running" group_by=["Flight"]
            expected_column = Column.from_vector "Running" [100.5, 338.245, Number.nan, 338.245, 73.77]
            #   | Flight | Passenger | Ticket Price | Running
            #---+--------+-----------+--------------+-------------------------