Add update mode to Running (#11045)

This commit is contained in:
AdRiley 2024-09-11 15:41:33 +01:00 committed by GitHub
parent 97a1628017
commit a666585afe
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 116 additions and 49 deletions

View File

@ -2933,6 +2933,8 @@ type DB_Table
- statistic: The running statistic to calculate.
- of: The existing column to run the statistic over.
- as: The name of the new column.
- set_mode: Specifies the expected behaviour in regards to existing
column with the same name.
- group_by: Specifies the columns to group by. The running statistic is
calculated separately for each group. By default, all rows are treated as
a single group.
@ -2959,9 +2961,9 @@ type DB_Table
@group_by Widget_Helpers.make_column_name_multi_selector
@order_by Widget_Helpers.make_order_by_selector
@of Widget_Helpers.make_column_name_selector
running : Statistic -> (Text | Integer) -> Text -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
running self (statistic:Statistic=..Count) (of:(Text | Integer)=0) (as:Text='') (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
_ = [statistic, of, as, group_by, order_by, on_problems]
running : Statistic -> (Text | Integer) -> Text -> Set_Mode -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
running self (statistic:Statistic=..Count) (of:(Text | Integer)=0) (as:Text='') (set_mode:Set_Mode=..Add) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
_ = [statistic, of, as, set_mode, group_by, order_by, on_problems]
Error.throw (Unsupported_Database_Operation.Error "DB_Table.running is currently not implemented for the Database backend. You may download the table to memory using `.read` to use this feature.")

View File

@ -26,20 +26,23 @@ add_row_number table name from step group_by order_by on_problems:Problem_Behavi
Error.throw (Illegal_Argument.Error "The row number has exceeded the 64-bit integer range. BigInteger numbering is currently not supported. Please use a smaller start/step.")
problem_builder.attach_problems_before on_problems <| Panic.catch ArithmeticException handler=handle_arithmetic_exception <| Panic.catch Unsupported_Argument_Types handler=handle_arithmetic_exception <|
no_order_no_group = grouping_columns.is_empty && ordering.is_empty
new_column = case no_order_no_group of
True -> make_range_column name from step table.row_count
False ->
ordering_columns = ordering.map c->c.column.java_column
directions = ordering.map c->c.associated_selector.direction.to_sign
grouping_java_columns = grouping_columns.map c->c.java_column
new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
AddRowNumber.create_numbering from step grouping_java_columns ordering_columns directions java_problem_aggregator
Column.from_storage name new_storage
new_column = create_column table name from step grouping_columns ordering on_problems
renamed_table = rename_columns_if_needed table name on_problems Table.new
renamed_table.set new_column name set_mode=Set_Mode.Add
## PRIVATE
create_column table name from step grouping_columns ordering on_problems =
no_order_no_group = grouping_columns.is_empty && ordering.is_empty
case no_order_no_group of
True -> make_range_column name from step table.row_count
False ->
ordering_columns = ordering.map c->c.column.java_column
directions = ordering.map c->c.associated_selector.direction.to_sign
grouping_java_columns = grouping_columns.map c->c.java_column
new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
AddRowNumber.create_numbering from step grouping_java_columns ordering_columns directions java_problem_aggregator
Column.from_storage name new_storage
## PRIVATE
If the table already contains a column called `name` it will be renamed to a
unique name, so that a new column with this name can be added.

View File

@ -1,3 +1,5 @@
private
from Standard.Base import all
import Standard.Base.Errors.Common.Unsupported_Argument_Types
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
@ -19,27 +21,31 @@ polyglot java import org.enso.table.data.column.storage.numeric.LongRangeStorage
polyglot java import org.enso.table.operations.AddRunning
## PRIVATE
add_running : Table -> Statistic -> (Text|Integer) -> Text -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
add_running table (statistic:Statistic=Statistic.Count) (of:Text|Integer=0) (as:Text='') (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
add_running : Table -> Statistic -> (Text|Integer) -> Text -> Set_Mode -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
add_running table (statistic:Statistic=Statistic.Count) (of:Text|Integer=0) (as:Text='') (set_mode:Set_Mode=..Add) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
check_running_support [statistic] <|
of_col = table.at of
new_name = if as.is_empty then 'Running ' + statistic.to_text + ' of ' + of_col.name else as
case statistic of
Statistic.Count ->
Add_Row_Number.add_row_number table new_name 1 1 group_by order_by on_problems
new_name = case as.is_empty of
False -> as
True -> case set_mode of
Set_Mode.Update -> of_col.name
_ -> 'Running ' + statistic.to_text + ' of ' + of_col.name
problem_builder = Problem_Builder.new error_on_missing_columns=True
grouping_columns = table.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder
ordering = Table_Helpers.resolve_order_by table.columns order_by problem_builder
source_java_column = of_col.java_column
grouping_java_columns = grouping_columns.map c->c.java_column
ordering_java_columns = ordering.map c->
c.column.java_column
directions = ordering.map c->
c.associated_selector.direction.to_sign
new_column = case statistic of
Statistic.Count ->
Add_Row_Number.create_column table new_name from=1 step=1 grouping_columns ordering on_problems
_ ->
Value_Type.expect_numeric of_col <|
problem_builder = Problem_Builder.new error_on_missing_columns=True
grouping_columns = table.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder
ordering = Table_Helpers.resolve_order_by table.columns order_by problem_builder
source_java_column = of_col.java_column
grouping_java_columns = grouping_columns.map c->c.java_column
ordering_java_columns = ordering.map c->
c.column.java_column
directions = ordering.map c->
c.associated_selector.direction.to_sign
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
new_storage = AddRunning.create_running statistic.to_java source_java_column grouping_java_columns ordering_java_columns directions java_problem_aggregator
new_column = Column.from_storage new_name new_storage
table.set new_column new_name set_mode=Set_Mode.Add
new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
AddRunning.create_running statistic.to_java source_java_column grouping_java_columns ordering_java_columns directions java_problem_aggregator
Column.from_storage new_name new_storage
table.set new_column new_name set_mode

View File

@ -3603,6 +3603,8 @@ type Table
- statistic: The running statistic to calculate.
- of: The existing column to run the statistic over.
- as: The name of the new column.
- set_mode: Specifies the expected behaviour in regards to existing
column with the same name.
- group_by: Specifies the columns to group by. The running statistic is
calculated separately for each group. By default, all rows are treated as
a single group.
@ -3629,9 +3631,9 @@ type Table
@group_by Widget_Helpers.make_column_name_multi_selector
@order_by Widget_Helpers.make_order_by_selector
@of Widget_Helpers.make_column_name_selector
running : Statistic -> (Text | Integer) -> Text -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
running self (statistic:Statistic=..Count) (of:(Text | Integer)=0) (as:Text='') (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
Add_Running.add_running self statistic of as group_by order_by on_problems
running : Statistic -> (Text | Integer) -> Text -> Set_Mode -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
running self (statistic:Statistic=..Count) (of:(Text | Integer)=0) (as:Text='') (set_mode:Set_Mode=..Add) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
Add_Running.add_running self statistic of as set_mode group_by order_by on_problems
## PRIVATE
column_naming_helper : Column_Naming_Helper

View File

@ -67,8 +67,35 @@ add_specs suite_builder =
# 4 | SG0456 | E | 73.77 | 5
expected_table = data.table.zip expected_column
result.should_equal expected_table
group_builder.specify "Setting the name the same as an existing column errors if update_mode not changed" <|
result = data.table.running Statistic.Count "Passenger" "Passenger"
result.should_fail_with (Existing_Column.Error 'Passenger')
group_builder.specify "Setting the name the same as an existing column works if update_mode update" <|
result = data.table.running Statistic.Count "Passenger" "Passenger" set_mode=..Update
expected_column = Column.from_vector "My Custom Name" [1, 2, 3, 4, 5]
# | Flight | Passenger | Ticket Price
#---+--------+-----------+--------------
# 0 | BA0123 | 1 | 100.5
# 1 | BA0123 | 2 | 575.99
# 2 | SG0456 | 3 | 73.23
# 3 | BA0123 | 4 | 112.34
# 4 | SG0456 | 5 | 73.77
expected_table = data.table.set expected_column "Passenger"
result.should_equal expected_table
group_builder.specify "Not setting the name updates the first column if update_mode update" <|
result = data.table.running set_mode=..Update
expected_column = Column.from_vector "Flight" [1, 2, 3, 4, 5]
# | Flight | Passenger | Ticket Price
#---+--------+-----------+--------------
# 0 | 1 | A | 100.5
# 1 | 2 | B | 575.99
# 2 | 3 | A | 73.23
# 3 | 4 | C | 112.34
# 4 | 5 | E | 73.77
expected_table = data.table.set expected_column
result.should_equal expected_table
group_builder.specify "Can group by and provide running count per group" <|
result = data.table.running Statistic.Count "Passenger" "Passenger num per flight" ["Flight"]
result = data.table.running Statistic.Count "Passenger" "Passenger num per flight" group_by=["Flight"]
expected_column = Column.from_vector "Passenger num per flight" [1, 2, 1, 3, 2]
# | Flight | Passenger | Ticket Price | Passenger num per flight
#---+--------+-----------+--------------+-------------------------
@ -80,7 +107,7 @@ add_specs suite_builder =
expected_table = data.table.zip expected_column
result.should_equal expected_table
group_builder.specify "Can group by and provide running count per group based on order by" <|
result = data.table.running Statistic.Count "Passenger" "Ranked ticket cost per pass" ["Passenger"] ["Ticket Price"]
result = data.table.running Statistic.Count "Passenger" "Ranked ticket cost per pass" group_by=["Passenger"] order_by=["Ticket Price"]
expected_column = Column.from_vector "Ranked ticket cost per pass" [2, 1, 1, 1, 1]
# | Flight | Passenger | Ticket Price | Ranked ticket cost per pass
#---+--------+-----------+--------------+-------------------------
@ -92,7 +119,7 @@ add_specs suite_builder =
expected_table = data.table.zip expected_column
result.should_equal expected_table
group_builder.specify "Can provide running count based on order by without grouping" <|
result = data.table.running Statistic.Count "Passenger" "Ranked ticket cost" [] ["Ticket Price"]
result = data.table.running Statistic.Count "Passenger" "Ranked ticket cost" order_by=["Ticket Price"]
expected_column = Column.from_vector "Ranked ticket cost" [3, 5, 1, 4, 2]
# | Flight | Passenger | Ticket Price | Ranked ticket cost
#---+--------+-----------+--------------+-------------------------
@ -117,8 +144,35 @@ add_specs suite_builder =
# 4 | SG0456 | E | 73.77 | 935.83
expected_table = data.table.zip expected_column
result.should_equal expected_table
group_builder.specify "Setting the name the same as an existing column errors if update_mode not changed" <|
result = data.table.running Statistic.Sum "Ticket Price" "Ticket Price"
result.should_fail_with (Existing_Column.Error 'Ticket Price')
group_builder.specify "Setting the name the same as an existing column works if update_mode update" <|
result = data.table.running Statistic.Sum "Ticket Price" "Ticket Price" set_mode=..Update
expected_column = Column.from_vector "Ticket Price" [100.5, 676.49, 749.72, 862.0600000000001, 935.83]
# | Flight | Passenger | Ticket Price
#---+--------+-----------+-------------------------
# 0 | BA0123 | A | 100.5
# 1 | BA0123 | B | 676.49
# 2 | SG0456 | A | 749.72
# 3 | BA0123 | C | 862.06
# 4 | SG0456 | E | 935.83
expected_table = data.table.set expected_column "Ticket Price"
result.should_equal expected_table
group_builder.specify "Not setting the name overrides existing column if update_mode update" <|
result = data.table.running Statistic.Sum "Ticket Price" set_mode=..Update
expected_column = Column.from_vector "Ticket Price" [100.5, 676.49, 749.72, 862.0600000000001, 935.83]
# | Flight | Passenger | Ticket Price
#---+--------+-----------+-------------------------
# 0 | BA0123 | A | 100.5
# 1 | BA0123 | B | 676.49
# 2 | SG0456 | A | 749.72
# 3 | BA0123 | C | 862.06
# 4 | SG0456 | E | 935.83
expected_table = data.table.set expected_column "Ticket Price"
result.should_equal expected_table
group_builder.specify "Can group by and provide running sum per group" <|
result = data.table.running Statistic.Sum "Ticket Price" "Running" ["Flight"]
result = data.table.running Statistic.Sum "Ticket Price" "Running" group_by=["Flight"]
expected_column = Column.from_vector "Running" [100.5, 676.49, 73.23, 788.83, 147]
# | Flight | Passenger | Ticket Price | Running
#---+--------+-----------+--------------+-------------------------
@ -130,7 +184,7 @@ add_specs suite_builder =
expected_table = data.table.zip expected_column
result.should_equal expected_table
group_builder.specify "Can group by and provide running sum per group based on order by" <|
result = data.table.running Statistic.Sum "Ticket Price" "Sum ticket cost per pass" ["Passenger"] ["Ticket Price"]
result = data.table.running Statistic.Sum "Ticket Price" "Sum ticket cost per pass" group_by=["Passenger"] order_by=["Ticket Price"]
expected_column = Column.from_vector "Sum ticket cost per pass" [173.73000000000002, 575.99, 73.23, 112.34, 73.77]
# | Flight | Passenger | Ticket Price | Sum ticket cost per pass
#---+--------+-----------+--------------+-------------------------
@ -142,7 +196,7 @@ add_specs suite_builder =
expected_table = data.table.zip expected_column
result.should_equal expected_table
group_builder.specify "Can provide running sum based on order by without grouping" <|
result = data.table.running Statistic.Sum "Ticket Price" "Sum ticket cost" [] ["Ticket Price"]
result = data.table.running Statistic.Sum "Ticket Price" "Sum ticket cost" order_by=["Ticket Price"]
expected_column = Column.from_vector "Sum ticket cost" [247.5, 935.83, 73.23, 359.84000000000003, 147]
# | Flight | Passenger | Ticket Price | Sum ticket cost
#---+--------+-----------+--------------+-------------------------
@ -441,7 +495,7 @@ add_specs suite_builder =
warnings = Problems.get_attached_warnings result
warnings.not_empty . should_be_false
group_builder.specify "Running min ignores nothing values and works with grouping and warns" <|
result = table.running Statistic.Minimum "Ticket Price" "Running" ["Flight"]
result = table.running Statistic.Minimum "Ticket Price" "Running" group_by=["Flight"]
expected_column = Column.from_vector "Running" [100.5, 100.5, Nothing, 100.5, 73.77]
# | Flight | Passenger | Ticket Price | Running
#---+--------+-----------+--------------+-------------------------
@ -456,7 +510,7 @@ add_specs suite_builder =
w.column.should_equal "Ticket Price"
w.rows.should_equal [2, 3]
group_builder.specify "Running max ignores nothing values and works with grouping and warns" <|
result = table.running Statistic.Maximum "Ticket Price" "Running" ["Flight"]
result = table.running Statistic.Maximum "Ticket Price" "Running" group_by=["Flight"]
expected_column = Column.from_vector "Running" [100.5, 575.99, Nothing, 575.99, 73.77]
# | Flight | Passenger | Ticket Price | Running
#---+--------+-----------+--------------+-------------------------
@ -486,7 +540,7 @@ add_specs suite_builder =
w.column.should_equal "Ticket Price"
w.rows.should_equal [2, 3]
group_builder.specify "Running mean ignores nothing values and works when first value is Nothing and warns" <|
result = table.running Statistic.Mean "Ticket Price" "Running" ["Flight"]
result = table.running Statistic.Mean "Ticket Price" "Running" group_by=["Flight"]
expected_column = Column.from_vector "Running" [100.5, 338.245, Nothing, 338.245, 73.77]
# | Flight | Passenger | Ticket Price | Running
#---+--------+-----------+--------------+-------------------------
@ -556,7 +610,7 @@ add_specs suite_builder =
warnings = Problems.get_attached_warnings result
warnings.not_empty . should_be_false
group_builder.specify "Running min ignores NaN values and works with grouping and warns" <|
result = table.running Statistic.Minimum "Ticket Price" "Running" ["Flight"]
result = table.running Statistic.Minimum "Ticket Price" "Running" group_by=["Flight"]
expected_column = Column.from_vector "Running" [100.5, 100.5, Number.nan, 100.5, 73.77]
# | Flight | Passenger | Ticket Price | Running
#---+--------+-----------+--------------+-------------------------
@ -571,7 +625,7 @@ add_specs suite_builder =
w.column.should_equal "Ticket Price"
w.rows.should_equal [2, 3]
group_builder.specify "Running max ignores NaN values and works with grouping and warns" <|
result = table.running Statistic.Maximum "Ticket Price" "Running" ["Flight"]
result = table.running Statistic.Maximum "Ticket Price" "Running" group_by=["Flight"]
expected_column = Column.from_vector "Running" [100.5, 575.99, Number.nan, 575.99, 73.77]
# | Flight | Passenger | Ticket Price | Running
#---+--------+-----------+--------------+-------------------------
@ -601,7 +655,7 @@ add_specs suite_builder =
w.column.should_equal "Ticket Price"
w.rows.should_equal [2, 3]
group_builder.specify "Running mean ignores NaN values and works when first value is NaN and warns" <|
result = table.running Statistic.Mean "Ticket Price" "Running" ["Flight"]
result = table.running Statistic.Mean "Ticket Price" "Running" group_by=["Flight"]
expected_column = Column.from_vector "Running" [100.5, 338.245, Number.nan, 338.245, 73.77]
# | Flight | Passenger | Ticket Price | Running
#---+--------+-----------+--------------+-------------------------