mirror of
https://github.com/enso-org/enso.git
synced 2024-11-22 22:10:15 +03:00
Add update mode to Running (#11045)
This commit is contained in:
parent
97a1628017
commit
a666585afe
@ -2933,6 +2933,8 @@ type DB_Table
|
||||
- statistic: The running statistic to calculate.
|
||||
- of: The existing column to run the statistic over.
|
||||
- as: The name of the new column.
|
||||
- set_mode: Specifies the expected behaviour in regards to existing
|
||||
column with the same name.
|
||||
- group_by: Specifies the columns to group by. The running statistic is
|
||||
calculated separately for each group. By default, all rows are treated as
|
||||
a single group.
|
||||
@ -2959,9 +2961,9 @@ type DB_Table
|
||||
@group_by Widget_Helpers.make_column_name_multi_selector
|
||||
@order_by Widget_Helpers.make_order_by_selector
|
||||
@of Widget_Helpers.make_column_name_selector
|
||||
running : Statistic -> (Text | Integer) -> Text -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
|
||||
running self (statistic:Statistic=..Count) (of:(Text | Integer)=0) (as:Text='') (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
|
||||
_ = [statistic, of, as, group_by, order_by, on_problems]
|
||||
running : Statistic -> (Text | Integer) -> Text -> Set_Mode -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
|
||||
running self (statistic:Statistic=..Count) (of:(Text | Integer)=0) (as:Text='') (set_mode:Set_Mode=..Add) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
|
||||
_ = [statistic, of, as, set_mode, group_by, order_by, on_problems]
|
||||
Error.throw (Unsupported_Database_Operation.Error "DB_Table.running is currently not implemented for the Database backend. You may download the table to memory using `.read` to use this feature.")
|
||||
|
||||
|
||||
|
@ -26,20 +26,23 @@ add_row_number table name from step group_by order_by on_problems:Problem_Behavi
|
||||
Error.throw (Illegal_Argument.Error "The row number has exceeded the 64-bit integer range. BigInteger numbering is currently not supported. Please use a smaller start/step.")
|
||||
|
||||
problem_builder.attach_problems_before on_problems <| Panic.catch ArithmeticException handler=handle_arithmetic_exception <| Panic.catch Unsupported_Argument_Types handler=handle_arithmetic_exception <|
|
||||
no_order_no_group = grouping_columns.is_empty && ordering.is_empty
|
||||
new_column = case no_order_no_group of
|
||||
True -> make_range_column name from step table.row_count
|
||||
False ->
|
||||
ordering_columns = ordering.map c->c.column.java_column
|
||||
directions = ordering.map c->c.associated_selector.direction.to_sign
|
||||
grouping_java_columns = grouping_columns.map c->c.java_column
|
||||
new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
|
||||
AddRowNumber.create_numbering from step grouping_java_columns ordering_columns directions java_problem_aggregator
|
||||
Column.from_storage name new_storage
|
||||
|
||||
new_column = create_column table name from step grouping_columns ordering on_problems
|
||||
renamed_table = rename_columns_if_needed table name on_problems Table.new
|
||||
renamed_table.set new_column name set_mode=Set_Mode.Add
|
||||
|
||||
## PRIVATE
|
||||
create_column table name from step grouping_columns ordering on_problems =
|
||||
no_order_no_group = grouping_columns.is_empty && ordering.is_empty
|
||||
case no_order_no_group of
|
||||
True -> make_range_column name from step table.row_count
|
||||
False ->
|
||||
ordering_columns = ordering.map c->c.column.java_column
|
||||
directions = ordering.map c->c.associated_selector.direction.to_sign
|
||||
grouping_java_columns = grouping_columns.map c->c.java_column
|
||||
new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
|
||||
AddRowNumber.create_numbering from step grouping_java_columns ordering_columns directions java_problem_aggregator
|
||||
Column.from_storage name new_storage
|
||||
|
||||
## PRIVATE
|
||||
If the table already contains a column called `name` it will be renamed to a
|
||||
unique name, so that a new column with this name can be added.
|
||||
|
@ -1,3 +1,5 @@
|
||||
private
|
||||
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Common.Unsupported_Argument_Types
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
@ -19,27 +21,31 @@ polyglot java import org.enso.table.data.column.storage.numeric.LongRangeStorage
|
||||
polyglot java import org.enso.table.operations.AddRunning
|
||||
|
||||
## PRIVATE
|
||||
add_running : Table -> Statistic -> (Text|Integer) -> Text -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
|
||||
add_running table (statistic:Statistic=Statistic.Count) (of:Text|Integer=0) (as:Text='') (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
|
||||
add_running : Table -> Statistic -> (Text|Integer) -> Text -> Set_Mode -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
|
||||
add_running table (statistic:Statistic=Statistic.Count) (of:Text|Integer=0) (as:Text='') (set_mode:Set_Mode=..Add) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
|
||||
check_running_support [statistic] <|
|
||||
of_col = table.at of
|
||||
new_name = if as.is_empty then 'Running ' + statistic.to_text + ' of ' + of_col.name else as
|
||||
case statistic of
|
||||
Statistic.Count ->
|
||||
Add_Row_Number.add_row_number table new_name 1 1 group_by order_by on_problems
|
||||
new_name = case as.is_empty of
|
||||
False -> as
|
||||
True -> case set_mode of
|
||||
Set_Mode.Update -> of_col.name
|
||||
_ -> 'Running ' + statistic.to_text + ' of ' + of_col.name
|
||||
|
||||
problem_builder = Problem_Builder.new error_on_missing_columns=True
|
||||
grouping_columns = table.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder
|
||||
ordering = Table_Helpers.resolve_order_by table.columns order_by problem_builder
|
||||
source_java_column = of_col.java_column
|
||||
grouping_java_columns = grouping_columns.map c->c.java_column
|
||||
ordering_java_columns = ordering.map c->
|
||||
c.column.java_column
|
||||
directions = ordering.map c->
|
||||
c.associated_selector.direction.to_sign
|
||||
new_column = case statistic of
|
||||
Statistic.Count ->
|
||||
Add_Row_Number.create_column table new_name from=1 step=1 grouping_columns ordering on_problems
|
||||
_ ->
|
||||
Value_Type.expect_numeric of_col <|
|
||||
problem_builder = Problem_Builder.new error_on_missing_columns=True
|
||||
grouping_columns = table.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder
|
||||
ordering = Table_Helpers.resolve_order_by table.columns order_by problem_builder
|
||||
source_java_column = of_col.java_column
|
||||
grouping_java_columns = grouping_columns.map c->c.java_column
|
||||
ordering_java_columns = ordering.map c->
|
||||
c.column.java_column
|
||||
directions = ordering.map c->
|
||||
c.associated_selector.direction.to_sign
|
||||
|
||||
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
|
||||
new_storage = AddRunning.create_running statistic.to_java source_java_column grouping_java_columns ordering_java_columns directions java_problem_aggregator
|
||||
new_column = Column.from_storage new_name new_storage
|
||||
table.set new_column new_name set_mode=Set_Mode.Add
|
||||
new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
|
||||
AddRunning.create_running statistic.to_java source_java_column grouping_java_columns ordering_java_columns directions java_problem_aggregator
|
||||
Column.from_storage new_name new_storage
|
||||
table.set new_column new_name set_mode
|
||||
|
@ -3603,6 +3603,8 @@ type Table
|
||||
- statistic: The running statistic to calculate.
|
||||
- of: The existing column to run the statistic over.
|
||||
- as: The name of the new column.
|
||||
- set_mode: Specifies the expected behaviour in regards to existing
|
||||
column with the same name.
|
||||
- group_by: Specifies the columns to group by. The running statistic is
|
||||
calculated separately for each group. By default, all rows are treated as
|
||||
a single group.
|
||||
@ -3629,9 +3631,9 @@ type Table
|
||||
@group_by Widget_Helpers.make_column_name_multi_selector
|
||||
@order_by Widget_Helpers.make_order_by_selector
|
||||
@of Widget_Helpers.make_column_name_selector
|
||||
running : Statistic -> (Text | Integer) -> Text -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
|
||||
running self (statistic:Statistic=..Count) (of:(Text | Integer)=0) (as:Text='') (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
|
||||
Add_Running.add_running self statistic of as group_by order_by on_problems
|
||||
running : Statistic -> (Text | Integer) -> Text -> Set_Mode -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
|
||||
running self (statistic:Statistic=..Count) (of:(Text | Integer)=0) (as:Text='') (set_mode:Set_Mode=..Add) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) =
|
||||
Add_Running.add_running self statistic of as set_mode group_by order_by on_problems
|
||||
|
||||
## PRIVATE
|
||||
column_naming_helper : Column_Naming_Helper
|
||||
|
@ -67,8 +67,35 @@ add_specs suite_builder =
|
||||
# 4 | SG0456 | E | 73.77 | 5
|
||||
expected_table = data.table.zip expected_column
|
||||
result.should_equal expected_table
|
||||
group_builder.specify "Setting the name the same as an existing column errors if update_mode not changed" <|
|
||||
result = data.table.running Statistic.Count "Passenger" "Passenger"
|
||||
result.should_fail_with (Existing_Column.Error 'Passenger')
|
||||
group_builder.specify "Setting the name the same as an existing column works if update_mode update" <|
|
||||
result = data.table.running Statistic.Count "Passenger" "Passenger" set_mode=..Update
|
||||
expected_column = Column.from_vector "My Custom Name" [1, 2, 3, 4, 5]
|
||||
# | Flight | Passenger | Ticket Price
|
||||
#---+--------+-----------+--------------
|
||||
# 0 | BA0123 | 1 | 100.5
|
||||
# 1 | BA0123 | 2 | 575.99
|
||||
# 2 | SG0456 | 3 | 73.23
|
||||
# 3 | BA0123 | 4 | 112.34
|
||||
# 4 | SG0456 | 5 | 73.77
|
||||
expected_table = data.table.set expected_column "Passenger"
|
||||
result.should_equal expected_table
|
||||
group_builder.specify "Not setting the name updates the first column if update_mode update" <|
|
||||
result = data.table.running set_mode=..Update
|
||||
expected_column = Column.from_vector "Flight" [1, 2, 3, 4, 5]
|
||||
# | Flight | Passenger | Ticket Price
|
||||
#---+--------+-----------+--------------
|
||||
# 0 | 1 | A | 100.5
|
||||
# 1 | 2 | B | 575.99
|
||||
# 2 | 3 | A | 73.23
|
||||
# 3 | 4 | C | 112.34
|
||||
# 4 | 5 | E | 73.77
|
||||
expected_table = data.table.set expected_column
|
||||
result.should_equal expected_table
|
||||
group_builder.specify "Can group by and provide running count per group" <|
|
||||
result = data.table.running Statistic.Count "Passenger" "Passenger num per flight" ["Flight"]
|
||||
result = data.table.running Statistic.Count "Passenger" "Passenger num per flight" group_by=["Flight"]
|
||||
expected_column = Column.from_vector "Passenger num per flight" [1, 2, 1, 3, 2]
|
||||
# | Flight | Passenger | Ticket Price | Passenger num per flight
|
||||
#---+--------+-----------+--------------+-------------------------
|
||||
@ -80,7 +107,7 @@ add_specs suite_builder =
|
||||
expected_table = data.table.zip expected_column
|
||||
result.should_equal expected_table
|
||||
group_builder.specify "Can group by and provide running count per group based on order by" <|
|
||||
result = data.table.running Statistic.Count "Passenger" "Ranked ticket cost per pass" ["Passenger"] ["Ticket Price"]
|
||||
result = data.table.running Statistic.Count "Passenger" "Ranked ticket cost per pass" group_by=["Passenger"] order_by=["Ticket Price"]
|
||||
expected_column = Column.from_vector "Ranked ticket cost per pass" [2, 1, 1, 1, 1]
|
||||
# | Flight | Passenger | Ticket Price | Ranked ticket cost per pass
|
||||
#---+--------+-----------+--------------+-------------------------
|
||||
@ -92,7 +119,7 @@ add_specs suite_builder =
|
||||
expected_table = data.table.zip expected_column
|
||||
result.should_equal expected_table
|
||||
group_builder.specify "Can provide running count based on order by without grouping" <|
|
||||
result = data.table.running Statistic.Count "Passenger" "Ranked ticket cost" [] ["Ticket Price"]
|
||||
result = data.table.running Statistic.Count "Passenger" "Ranked ticket cost" order_by=["Ticket Price"]
|
||||
expected_column = Column.from_vector "Ranked ticket cost" [3, 5, 1, 4, 2]
|
||||
# | Flight | Passenger | Ticket Price | Ranked ticket cost
|
||||
#---+--------+-----------+--------------+-------------------------
|
||||
@ -117,8 +144,35 @@ add_specs suite_builder =
|
||||
# 4 | SG0456 | E | 73.77 | 935.83
|
||||
expected_table = data.table.zip expected_column
|
||||
result.should_equal expected_table
|
||||
group_builder.specify "Setting the name the same as an existing column errors if update_mode not changed" <|
|
||||
result = data.table.running Statistic.Sum "Ticket Price" "Ticket Price"
|
||||
result.should_fail_with (Existing_Column.Error 'Ticket Price')
|
||||
group_builder.specify "Setting the name the same as an existing column works if update_mode update" <|
|
||||
result = data.table.running Statistic.Sum "Ticket Price" "Ticket Price" set_mode=..Update
|
||||
expected_column = Column.from_vector "Ticket Price" [100.5, 676.49, 749.72, 862.0600000000001, 935.83]
|
||||
# | Flight | Passenger | Ticket Price
|
||||
#---+--------+-----------+-------------------------
|
||||
# 0 | BA0123 | A | 100.5
|
||||
# 1 | BA0123 | B | 676.49
|
||||
# 2 | SG0456 | A | 749.72
|
||||
# 3 | BA0123 | C | 862.06
|
||||
# 4 | SG0456 | E | 935.83
|
||||
expected_table = data.table.set expected_column "Ticket Price"
|
||||
result.should_equal expected_table
|
||||
group_builder.specify "Not setting the name overrides existing column if update_mode update" <|
|
||||
result = data.table.running Statistic.Sum "Ticket Price" set_mode=..Update
|
||||
expected_column = Column.from_vector "Ticket Price" [100.5, 676.49, 749.72, 862.0600000000001, 935.83]
|
||||
# | Flight | Passenger | Ticket Price
|
||||
#---+--------+-----------+-------------------------
|
||||
# 0 | BA0123 | A | 100.5
|
||||
# 1 | BA0123 | B | 676.49
|
||||
# 2 | SG0456 | A | 749.72
|
||||
# 3 | BA0123 | C | 862.06
|
||||
# 4 | SG0456 | E | 935.83
|
||||
expected_table = data.table.set expected_column "Ticket Price"
|
||||
result.should_equal expected_table
|
||||
group_builder.specify "Can group by and provide running sum per group" <|
|
||||
result = data.table.running Statistic.Sum "Ticket Price" "Running" ["Flight"]
|
||||
result = data.table.running Statistic.Sum "Ticket Price" "Running" group_by=["Flight"]
|
||||
expected_column = Column.from_vector "Running" [100.5, 676.49, 73.23, 788.83, 147]
|
||||
# | Flight | Passenger | Ticket Price | Running
|
||||
#---+--------+-----------+--------------+-------------------------
|
||||
@ -130,7 +184,7 @@ add_specs suite_builder =
|
||||
expected_table = data.table.zip expected_column
|
||||
result.should_equal expected_table
|
||||
group_builder.specify "Can group by and provide running sum per group based on order by" <|
|
||||
result = data.table.running Statistic.Sum "Ticket Price" "Sum ticket cost per pass" ["Passenger"] ["Ticket Price"]
|
||||
result = data.table.running Statistic.Sum "Ticket Price" "Sum ticket cost per pass" group_by=["Passenger"] order_by=["Ticket Price"]
|
||||
expected_column = Column.from_vector "Sum ticket cost per pass" [173.73000000000002, 575.99, 73.23, 112.34, 73.77]
|
||||
# | Flight | Passenger | Ticket Price | Sum ticket cost per pass
|
||||
#---+--------+-----------+--------------+-------------------------
|
||||
@ -142,7 +196,7 @@ add_specs suite_builder =
|
||||
expected_table = data.table.zip expected_column
|
||||
result.should_equal expected_table
|
||||
group_builder.specify "Can provide running sum based on order by without grouping" <|
|
||||
result = data.table.running Statistic.Sum "Ticket Price" "Sum ticket cost" [] ["Ticket Price"]
|
||||
result = data.table.running Statistic.Sum "Ticket Price" "Sum ticket cost" order_by=["Ticket Price"]
|
||||
expected_column = Column.from_vector "Sum ticket cost" [247.5, 935.83, 73.23, 359.84000000000003, 147]
|
||||
# | Flight | Passenger | Ticket Price | Sum ticket cost
|
||||
#---+--------+-----------+--------------+-------------------------
|
||||
@ -441,7 +495,7 @@ add_specs suite_builder =
|
||||
warnings = Problems.get_attached_warnings result
|
||||
warnings.not_empty . should_be_false
|
||||
group_builder.specify "Running min ignores nothing values and works with grouping and warns" <|
|
||||
result = table.running Statistic.Minimum "Ticket Price" "Running" ["Flight"]
|
||||
result = table.running Statistic.Minimum "Ticket Price" "Running" group_by=["Flight"]
|
||||
expected_column = Column.from_vector "Running" [100.5, 100.5, Nothing, 100.5, 73.77]
|
||||
# | Flight | Passenger | Ticket Price | Running
|
||||
#---+--------+-----------+--------------+-------------------------
|
||||
@ -456,7 +510,7 @@ add_specs suite_builder =
|
||||
w.column.should_equal "Ticket Price"
|
||||
w.rows.should_equal [2, 3]
|
||||
group_builder.specify "Running max ignores nothing values and works with grouping and warns" <|
|
||||
result = table.running Statistic.Maximum "Ticket Price" "Running" ["Flight"]
|
||||
result = table.running Statistic.Maximum "Ticket Price" "Running" group_by=["Flight"]
|
||||
expected_column = Column.from_vector "Running" [100.5, 575.99, Nothing, 575.99, 73.77]
|
||||
# | Flight | Passenger | Ticket Price | Running
|
||||
#---+--------+-----------+--------------+-------------------------
|
||||
@ -486,7 +540,7 @@ add_specs suite_builder =
|
||||
w.column.should_equal "Ticket Price"
|
||||
w.rows.should_equal [2, 3]
|
||||
group_builder.specify "Running mean ignores nothing values and works when first value is Nothing and warns" <|
|
||||
result = table.running Statistic.Mean "Ticket Price" "Running" ["Flight"]
|
||||
result = table.running Statistic.Mean "Ticket Price" "Running" group_by=["Flight"]
|
||||
expected_column = Column.from_vector "Running" [100.5, 338.245, Nothing, 338.245, 73.77]
|
||||
# | Flight | Passenger | Ticket Price | Running
|
||||
#---+--------+-----------+--------------+-------------------------
|
||||
@ -556,7 +610,7 @@ add_specs suite_builder =
|
||||
warnings = Problems.get_attached_warnings result
|
||||
warnings.not_empty . should_be_false
|
||||
group_builder.specify "Running min ignores NaN values and works with grouping and warns" <|
|
||||
result = table.running Statistic.Minimum "Ticket Price" "Running" ["Flight"]
|
||||
result = table.running Statistic.Minimum "Ticket Price" "Running" group_by=["Flight"]
|
||||
expected_column = Column.from_vector "Running" [100.5, 100.5, Number.nan, 100.5, 73.77]
|
||||
# | Flight | Passenger | Ticket Price | Running
|
||||
#---+--------+-----------+--------------+-------------------------
|
||||
@ -571,7 +625,7 @@ add_specs suite_builder =
|
||||
w.column.should_equal "Ticket Price"
|
||||
w.rows.should_equal [2, 3]
|
||||
group_builder.specify "Running max ignores NaN values and works with grouping and warns" <|
|
||||
result = table.running Statistic.Maximum "Ticket Price" "Running" ["Flight"]
|
||||
result = table.running Statistic.Maximum "Ticket Price" "Running" group_by=["Flight"]
|
||||
expected_column = Column.from_vector "Running" [100.5, 575.99, Number.nan, 575.99, 73.77]
|
||||
# | Flight | Passenger | Ticket Price | Running
|
||||
#---+--------+-----------+--------------+-------------------------
|
||||
@ -601,7 +655,7 @@ add_specs suite_builder =
|
||||
w.column.should_equal "Ticket Price"
|
||||
w.rows.should_equal [2, 3]
|
||||
group_builder.specify "Running mean ignores NaN values and works when first value is NaN and warns" <|
|
||||
result = table.running Statistic.Mean "Ticket Price" "Running" ["Flight"]
|
||||
result = table.running Statistic.Mean "Ticket Price" "Running" group_by=["Flight"]
|
||||
expected_column = Column.from_vector "Running" [100.5, 338.245, Number.nan, 338.245, 73.77]
|
||||
# | Flight | Passenger | Ticket Price | Running
|
||||
#---+--------+-----------+--------------+-------------------------
|
||||
|
Loading…
Reference in New Issue
Block a user