Refactor table.group_by to table.aggregate (#3339)

Following UX work move to `table.aggregate` function.
This commit is contained in:
James Dunkerley 2022-03-15 14:23:36 +00:00 committed by GitHub
parent dedd1eac96
commit 6c1c4554f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 275 additions and 132 deletions

View File

@ -64,6 +64,7 @@
- [Implemented `Map.first`, `Map.last` functions. Expanded `Table.group_by` to - [Implemented `Map.first`, `Map.last` functions. Expanded `Table.group_by` to
also compute mode, percentile, minimum, maximum.][3318] also compute mode, percentile, minimum, maximum.][3318]
- [Implemented `Text.location_of` and `Text.location_of_all` methods.][3324] - [Implemented `Text.location_of` and `Text.location_of_all` methods.][3324]
- [Replaced `Table.group_by` with `Table.aggregate`][3339]
[debug-shortcuts]: [debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -103,6 +104,7 @@
[3317]: https://github.com/enso-org/enso/pull/3317 [3317]: https://github.com/enso-org/enso/pull/3317
[3318]: https://github.com/enso-org/enso/pull/3318 [3318]: https://github.com/enso-org/enso/pull/3318
[3324]: https://github.com/enso-org/enso/pull/3324 [3324]: https://github.com/enso-org/enso/pull/3324
[3339]: https://github.com/enso-org/enso/pull/3339
#### Enso Compiler #### Enso Compiler

View File

@ -1,15 +1,18 @@
from Standard.Base import all from Standard.Base import all
from Standard.Table.Data.Column as Column_Module import Column from Standard.Table.Data.Column as Column_Module import Column
import Standard.Table.Data.Group_By import Standard.Table.Data.Group_By_Key
## Defines an Aggregate Column ## Defines an Aggregate Column
type Aggregate_Column type Aggregate_Column
## Group By
type Group_By (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
## Creates a new column with the row count of each group ## Creates a new column with the row count of each group
Arguments: Arguments:
- name: name of new column. - name: name of new column.
type Count (name:Text|Nothing=Nothing) type Count (new_name:Text|Nothing=Nothing)
## Creates a new column with the count of unique items in the selected ## Creates a new column with the count of unique items in the selected
column(s) within each group. column(s) within each group.
@ -18,7 +21,7 @@ type Aggregate_Column
- columns: either a single or set of columns (specified by name, index or Column object) to count across. - columns: either a single or set of columns (specified by name, index or Column object) to count across.
- name: name of new column. - name: name of new column.
- ignore_nothing: if all values are Nothing won't be included. - ignore_nothing: if all values are Nothing won't be included.
type Count_Distinct (columns:Column|Text|Integer|[(Column|Text|Integer)]) (name:Text|Nothing=Nothing) (ignore_nothing:Boolean=False) type Count_Distinct (columns:Column|Text|Integer|[(Column|Text|Integer)]) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=False)
## ALIAS Count_Not_Null ## ALIAS Count_Not_Null
@ -28,7 +31,7 @@ type Aggregate_Column
Arguments: Arguments:
- columns: column (specified by name, index or Column object) to count. - columns: column (specified by name, index or Column object) to count.
- name: name of new column. - name: name of new column.
type Count_Not_Nothing (column:Column|Text|Integer) (name:Text|Nothing=Nothing) type Count_Not_Nothing (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
## ALIAS Count_Null, Count_Missing ## ALIAS Count_Null, Count_Missing
@ -38,7 +41,7 @@ type Aggregate_Column
Arguments: Arguments:
- column: column (specified by name, index or Column object) to count. - column: column (specified by name, index or Column object) to count.
- name: name of new column. - name: name of new column.
type Count_Nothing (column:Column|Text|Integer) (name:Text|Nothing=Nothing) type Count_Nothing (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
## Creates a new column with the count of not `Nothing` (null) and non-empty ## Creates a new column with the count of not `Nothing` (null) and non-empty
("") values of the column within each group. ("") values of the column within each group.
@ -46,7 +49,7 @@ type Aggregate_Column
Arguments: Arguments:
- column: column (specified by name, index or Column object) to count. - column: column (specified by name, index or Column object) to count.
- name: name of new column. - name: name of new column.
type Count_Not_Empty (column:Column|Text|Integer) (name:Text|Nothing=Nothing) type Count_Not_Empty (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
## Creates a new column with the count of `Nothing` (null) or empty ("") ## Creates a new column with the count of `Nothing` (null) or empty ("")
text values of the column within each group. text values of the column within each group.
@ -54,7 +57,7 @@ type Aggregate_Column
Arguments: Arguments:
- column: column (specified by name, index or Column object) to count. - column: column (specified by name, index or Column object) to count.
- name: name of new column. - name: name of new column.
type Count_Empty (column:Column|Text|Integer) (name:Text|Nothing=Nothing) type Count_Empty (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
## Creates a new column with the sum of values (ignoring missing values) of ## Creates a new column with the sum of values (ignoring missing values) of
the specified column within each group. the specified column within each group.
@ -62,7 +65,7 @@ type Aggregate_Column
Arguments: Arguments:
- column: column (specified by name, index or Column object) to total. - column: column (specified by name, index or Column object) to total.
- name: name of new column. - name: name of new column.
type Sum (column:Column|Text|Integer) (name:Text|Nothing=Nothing) type Sum (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
## Creates a new column with the mean of values (ignoring missing values) of ## Creates a new column with the mean of values (ignoring missing values) of
the specified column within each group. the specified column within each group.
@ -70,7 +73,7 @@ type Aggregate_Column
Arguments: Arguments:
- column: column (specified by name, index or Column object) to average. - column: column (specified by name, index or Column object) to average.
- name: name of new column. - name: name of new column.
type Average (column:Column|Text|Integer) (name:Text|Nothing=Nothing) type Average (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
## Creates a new column with the median of values (ignoring missing values) ## Creates a new column with the median of values (ignoring missing values)
of the specified column within each group. of the specified column within each group.
@ -78,7 +81,7 @@ type Aggregate_Column
Arguments: Arguments:
- column: column (specified by name, index or Column object) to calculate median on. - column: column (specified by name, index or Column object) to calculate median on.
- name: name of new column. - name: name of new column.
type Median (column:Column|Text|Integer) (name:Text|Nothing=Nothing) type Median (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
## Creates a new column with the median of values (ignoring missing values) ## Creates a new column with the median of values (ignoring missing values)
of the specified column within each group. of the specified column within each group.
@ -87,7 +90,7 @@ type Aggregate_Column
- percentile: Percentage to compute from 0-1 inclusive. - percentile: Percentage to compute from 0-1 inclusive.
- column: column (specified by name, index or Column object) to compute percentile. - column: column (specified by name, index or Column object) to compute percentile.
- name: name of new column. - name: name of new column.
type Percentile (percentile:Decimal) (column:Column|Text|Integer) (name:Text|Nothing=Nothing) type Percentile (percentile:Decimal) (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
## Creates a new column with the mode of values (ignoring missing values) ## Creates a new column with the mode of values (ignoring missing values)
of the specified column within each group. of the specified column within each group.
@ -95,7 +98,7 @@ type Aggregate_Column
Arguments: Arguments:
- column: column (specified by name, index or Column object) to find the most common value. - column: column (specified by name, index or Column object) to find the most common value.
- name: name of new column. - name: name of new column.
type Mode (column:Column|Text|Integer) (name:Text|Nothing=Nothing) type Mode (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
## Creates a new column with the standard deviation of values (ignoring ## Creates a new column with the standard deviation of values (ignoring
missing values) of the column within each group. missing values) of the column within each group.
@ -104,7 +107,7 @@ type Aggregate_Column
- column: column (specified by name, index or Column object) to compute standard deviation. - column: column (specified by name, index or Column object) to compute standard deviation.
- name: name of new column. - name: name of new column.
- population argument specifies if group is a sample or the population - population argument specifies if group is a sample or the population
type Standard_Deviation (column:Column|Text|Integer) (name:Text|Nothing=Nothing) (population:Boolean=False) type Standard_Deviation (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) (population:Boolean=False)
## Creates a new column with the values concatenated together. `Nothing` values will become an empty string. ## Creates a new column with the values concatenated together. `Nothing` values will become an empty string.
@ -116,7 +119,7 @@ type Aggregate_Column
- suffix: added at the end of the result. - suffix: added at the end of the result.
- quote_char: character used to quote the values if the value is `Empty` - quote_char: character used to quote the values if the value is `Empty`
or contains the separtor. or contains the separtor.
type Concatenate (column:Column|Text|Integer) (name:Text|Nothing=Nothing) (separator:Text="") (prefix:Text="") (suffix:Text="") (quote_char:Text="") type Concatenate (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) (separator:Text="") (prefix:Text="") (suffix:Text="") (quote_char:Text="")
## Creates a new column with the first value in each group. ## Creates a new column with the first value in each group.
@ -127,7 +130,7 @@ type Aggregate_Column
not missing value returned. not missing value returned.
- order_by: required for database tables. Specifies how to order the - order_by: required for database tables. Specifies how to order the
results within the group. results within the group.
type First (column:Column|Text|Integer) (name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Column_Selector|Nothing=Nothing) type First (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Column_Selector|Nothing=Nothing)
## Creates a new column with the last value in each group. ## Creates a new column with the last value in each group.
@ -138,42 +141,43 @@ type Aggregate_Column
not missing value returned. not missing value returned.
- order_by: required for database tables. Specifies how to order the - order_by: required for database tables. Specifies how to order the
results within the group. results within the group.
type Last (column:Column|Text|Integer) (name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Column_Selector|Nothing=Nothing) type Last (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Column_Selector|Nothing=Nothing)
## Creates a new column with the maximum value in each group. ## Creates a new column with the maximum value in each group.
Arguments: Arguments:
- column: column (specified by name, index or Column object) to find maximum. - column: column (specified by name, index or Column object) to find maximum.
- name: name of new column. - name: name of new column.
type Maximum (column:Column|Text|Integer) (name:Text|Nothing=Nothing) type Maximum (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
## Creates a new column with the maximum value in each group. ## Creates a new column with the maximum value in each group.
Arguments: Arguments:
- column: column (specified by name, index or Column object) to find minimum. - column: column (specified by name, index or Column object) to find minimum.
- name: name of new column. - name: name of new column.
type Minimum (column:Column|Text|Integer) (name:Text|Nothing=Nothing) type Minimum (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
## Creates a new column with the shortest text in each group. ## Creates a new column with the shortest text in each group.
Arguments: Arguments:
- column: column (specified by name, index or Column object) to find shortest value. - column: column (specified by name, index or Column object) to find shortest value.
- name: name of new column. - name: name of new column.
type Shortest (column:Column|Text|Integer) (name:Text|Nothing=Nothing) type Shortest (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
## Creates a new column with the longest text in each group. ## Creates a new column with the longest text in each group.
Arguments: Arguments:
- column: column (specified by name, index or Column object) to find longest value. - column: column (specified by name, index or Column object) to find longest value.
- name: name of new column. - name: name of new column.
type Longest (column:Column|Text|Integer) (name:Text|Nothing=Nothing) type Longest (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
## Gets a column name to use for the aggregate column ## Gets a column name to use for the aggregate column
column_name : Table->Text column_name : Table->Text
column_name table = column_name table =
if this.name.is_nothing.not then this.name else if this.new_name.is_nothing.not then this.new_name else
get_name c = (this.resolve_column table c).name get_name c = (this.resolve_column table c).name
case this of case this of
Group_By c _ -> (get_name c)
Count _ -> "Count" Count _ -> "Count"
Count_Distinct c _ _ -> Count_Distinct c _ _ ->
case c of case c of
@ -208,6 +212,7 @@ type Aggregate_Column
initial_value : Any initial_value : Any
initial_value = case this of initial_value = case this of
Group_By _ _ -> Nothing
Count_Distinct _ _ _ -> Map.empty Count_Distinct _ _ _ -> Map.empty
Median _ _ -> Map.empty Median _ _ -> Map.empty
Percentile _ _ _ -> Map.empty Percentile _ _ _ -> Map.empty
@ -235,6 +240,7 @@ type Aggregate_Column
_ -> Error.throw (Invalid_Aggregation_Method this.col "Empty is only valid for Text") _ -> Error.throw (Invalid_Aggregation_Method this.col "Empty is only valid for Text")
case this of case this of
Group_By c _ -> create_closure c col->_->i->(col.at i)
Count _ -> count->_->(count+1) Count _ -> count->_->(count+1)
Count_Not_Nothing c _ -> create_closure c col->count->i->(count + if (col.at i).is_nothing then 0 else 1) Count_Not_Nothing c _ -> create_closure c col->count->i->(count + if (col.at i).is_nothing then 0 else 1)
Count_Nothing c _ -> create_closure c col->count->i->(count + if (col.at i).is_nothing then 1 else 0) Count_Nothing c _ -> create_closure c col->count->i->(count + if (col.at i).is_nothing then 1 else 0)
@ -294,7 +300,7 @@ type Aggregate_Column
resolved = case columns of resolved = case columns of
Vector.Vector _ -> columns.map c->(this.resolve_column table c) Vector.Vector _ -> columns.map c->(this.resolve_column table c)
_ -> [this.resolve_column table columns] _ -> [this.resolve_column table columns]
key_maker i = Group_By.key (resolved.map c->(c.at i)) key_maker i = Group_By_Key.key (resolved.map c->(c.at i))
case ignore_nothing of case ignore_nothing of
False-> map->i->(map.insert (key_maker i) 1) False-> map->i->(map.insert (key_maker i) 1)
True-> map->i-> True-> map->i->

View File

@ -17,7 +17,7 @@ from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Prob
import Standard.Table.Data.Column_Mapping import Standard.Table.Data.Column_Mapping
import Standard.Table.Data.Position import Standard.Table.Data.Position
import Standard.Table.Data.Group_By import Standard.Table.Data.Group_By_Key
import Standard.Table.Data.Aggregate_Column import Standard.Table.Data.Aggregate_Column
polyglot java import org.enso.table.data.table.Table as Java_Table polyglot java import org.enso.table.data.table.Table as Java_Table
@ -487,49 +487,46 @@ type Table
## Prototype Group By function ## Prototype Group By function
group_by : Column_Selector -> [Aggregate_Column] -> Problem_Behavior -> Table aggregate : [Aggregate_Column] -> Problem_Behavior -> Table
group_by selector columns (on_problems=Report_Warning) = aggregate columns (on_problems=Report_Warning) =
# Grouping Key # Grouping Key
key_columns = if selector.is_nothing then [] else is_a_key c = case c of
Table_Helpers.select_columns_helper internal_columns=this.columns selector=selector reorder=True on_problems=on_problems Aggregate_Column.Group_By _ _ -> True
key_length = key_columns.length _ -> False
make_key = if (key_length == 0) then _->(Group_By.key [1]) else i->(Group_By.key (key_columns.map v->(v.at i))) key_columns = columns.filter is_a_key . map c->(c.resolve_column this c.column)
make_key = if (key_columns.length == 0) then _->(Group_By_Key.key [1]) else i->(Group_By_Key.key (key_columns.map v->(v.at i)))
# New Table Accumulator # New Table Accumulator
name_strategy = Unique_Name_Strategy.new name_strategy = Unique_Name_Strategy.new
new_table = (key_columns.map c->c.name)+(columns.map c->(c.column_name this)) . map n->[name_strategy.make_unique n, Vector.new_builder] new_table = columns.map c->(c.column_name this) . map n->[name_strategy.make_unique n, Vector.new_builder]
add_row key = add_row _ =
idx = new_table.at 0 . at 1 . length idx = new_table.at 0 . at 1 . length
0.up_to key_length . each i->
new_table.at i . at 1 . append (key.values.at i).value
0.up_to (columns.length) . each i-> 0.up_to (columns.length) . each i->
column = columns.at i new_table.at i . at 1 . append ((columns.at i).initial_value)
new_table.at (i + key_length) . at 1 . append (column.initial_value)
idx idx
# Fold # Fold
aggregators = columns.map c->(c.make_aggregator this) aggregators = columns.map c->(c.make_aggregator this)
aggregate map i = aggregate map i =
key = make_key i key = make_key i
row_index = map.get_or_else key (add_row key) row_index = map.get_or_else key (add_row Nothing)
# Accumulate # Accumulate
0.up_to (columns.length) . each j-> 0.up_to (columns.length) . each j->
aggregator = aggregators.at j aggregator = aggregators.at j
array = new_table.at (j + key_length) . at 1 . to_array array = new_table.at j . at 1 . to_array
current = array . at row_index current = array . at row_index
new = aggregator current i new = aggregator current i
array . set_at row_index new array . set_at row_index new
map.insert key row_index map.insert key row_index
if ((key_length == 0) && (this.row_count == 0)) then (add_row []) else if ((key_columns.length == 0) && (this.row_count == 0)) then (add_row Nothing) else
0.up_to this.row_count . fold Map.empty aggregate 0.up_to this.row_count . fold Map.empty aggregate
# Now Finalise and make a table # Now Finalise and make a table
finalise builder index = finalise builder index =
if index < key_length then builder.to_vector else column = columns.at index
column = columns.at (index - key_length) Vector.new builder.length i->(column.evaluate (builder.to_array.at i))
Vector.new builder.length i->(column.evaluate (builder.to_array.at i))
here.new (new_table.map_with_index i->c->[c.at 0,finalise (c.at 1) i]) here.new (new_table.map_with_index i->c->[c.at 0,finalise (c.at 1) i])

View File

@ -0,0 +1,49 @@
from Standard.Base import all
import Standard.Test.Bench
import Standard.Test.Faker
import Standard.Table.Data.Table
import Standard.Table.Data.Column_Selector
from Standard.Table.Data.Aggregate_Column import all
## Bench Utilities ============================================================
vector_size = 2500
iter_size = 100
num_iterations = 10
create_table : Integer->Integer->Table
create_table rows (seed=1646322139) =
faker = Faker.new seed
key1 = ["Code", 0.up_to rows . map _-> faker.alpha 3]
key2 = ["Index", 0.up_to rows . map _-> faker.integer 0 10]
key3 = ["Flag", 0.up_to rows . map _-> faker.boolean]
value1 = ["Value", 0.up_to rows . map _-> ((faker.decimal -100 100)*100000).floor/100000]
value2 = ["ValueWithNothing", 0.up_to rows . map _-> faker.make_some_nothing ((faker.decimal -100 100)*100).floor/100]
text1 = ["TextWithNothing", 0.up_to rows . map _-> faker.make_some_nothing (faker.alpha_numeric 10)]
text2 = ["Hexadecimal", 0.up_to rows . map _-> faker.make_some_nothing (faker.hexadecimal 8)]
Table.new [key1, key2, key3, value1, value2, text1, text2]
# The Benchmarks ==============================================================
main =
IO.println <| "Making table data..."
table = here.create_table here.vector_size
Bench.measure (table.aggregate [Count Nothing]) "Count table" here.iter_size here.num_iterations
Bench.measure (table.aggregate [Count_Distinct "Index"]) "Count Distinct table" here.iter_size here.num_iterations
Bench.measure (table.aggregate [Standard_Deviation "Value"]) "StDev table" here.iter_size here.num_iterations
Bench.measure (table.aggregate [Median "Value"]) "Median table" here.iter_size here.num_iterations
Bench.measure (table.aggregate [Mode "Index"]) "Mode table" here.iter_size here.num_iterations
Bench.measure (table.aggregate [Group_By "Index", Count Nothing]) "Count grouped" here.iter_size here.num_iterations
Bench.measure (table.aggregate [Group_By "Index", Count_Distinct "Code"]) "Count Distinct grouped" here.iter_size here.num_iterations
Bench.measure (table.aggregate [Group_By "Index", Standard_Deviation "Value"]) "StDev grouped" here.iter_size here.num_iterations
Bench.measure (table.aggregate [Group_By "Index", Median "Value"]) "Median grouped" here.iter_size here.num_iterations
Bench.measure (table.aggregate [Group_By "Index", Mode "Index"]) "Mode grouped" here.iter_size here.num_iterations
Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Count Nothing]) "Count 2 level groups" here.iter_size here.num_iterations
Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Count_Distinct "Code"]) "Count Distinct 2 level groups" here.iter_size here.num_iterations
Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Standard_Deviation "Value"]) "StDev 2 level groups" here.iter_size here.num_iterations
Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Median "Value"]) "Median 2 level groups" here.iter_size here.num_iterations
Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Mode "Index"]) "Mode 2 level groups" here.iter_size here.num_iterations

View File

@ -1,49 +0,0 @@
from Standard.Base import all
import Standard.Test.Bench
import Standard.Test.Faker
import Standard.Table.Data.Table
import Standard.Table.Data.Column_Selector
from Standard.Table.Data.Aggregate_Column import all
## Bench Utilities ============================================================
vector_size = 2500
iter_size = 100
num_iterations = 10
create_table : Integer->Integer->Table
create_table rows (seed=1646322139) =
faker = Faker.new seed
key1 = ["Code", 0.up_to rows . map _-> faker.alpha 3]
key2 = ["Index", 0.up_to rows . map _-> faker.integer 0 10]
key3 = ["Flag", 0.up_to rows . map _-> faker.boolean]
value1 = ["Value", 0.up_to rows . map _-> ((faker.decimal -100 100)*100000).floor/100000]
value2 = ["ValueWithNothing", 0.up_to rows . map _-> faker.make_some_nothing ((faker.decimal -100 100)*100).floor/100]
text1 = ["TextWithNothing", 0.up_to rows . map _-> faker.make_some_nothing (faker.alpha_numeric 10)]
text2 = ["Hexadecimal", 0.up_to rows . map _-> faker.make_some_nothing (faker.hexadecimal 8)]
Table.new [key1, key2, key3, value1, value2, text1, text2]
# The Benchmarks ==============================================================
main =
IO.println <| "Making table data..."
table = here.create_table here.vector_size
Bench.measure (table.group_by (Column_Selector.By_Index []) [Count Nothing]) "Count table" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Index []) [Count_Distinct "Index"]) "Count Distinct table" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Index []) [Standard_Deviation "Value"]) "StDev table" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Index []) [Median "Value"]) "Median table" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Index []) [Mode "Index"]) "Mode table" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Count Nothing]) "Count grouped" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Count_Distinct "Code"]) "Count Distinct grouped" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Standard_Deviation "Value"]) "StDev grouped" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Median "Value"]) "Median grouped" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Mode "Index"]) "Mode grouped" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Count Nothing]) "Count 2 level groups" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Count_Distinct "Code"]) "Count Distinct 2 level groups" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Standard_Deviation "Value"]) "StDev 2 level groups" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Median "Value"]) "Median 2 level groups" here.iter_size here.num_iterations
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Mode "Index"]) "Mode 2 level groups" here.iter_size here.num_iterations

View File

@ -11,21 +11,21 @@ spec =
table = Table.from_csv file_contents table = Table.from_csv file_contents
empty_table = Table.new <| table.columns.map c->[c.name, []] empty_table = Table.new <| table.columns.map c->[c.name, []]
find_row key table = find_row key table (columns=Nothing) =
table_columns = if columns.is_nothing then table.columns else columns.map x->(table.columns.at x)
0.up_to table.row_count . find i-> 0.up_to table.row_count . find i->
0.up_to key.length . all j-> (table.columns.at j . at i)==(key.at j) 0.up_to key.length . all j-> (table_columns.at j . at i)==(key.at j)
Test.group "Table.group_by should summarize whole table " <| Test.group "Table.aggregate should summarize whole table " <|
grouping = Column_Selector.By_Index []
Test.specify "should be able to count" <| Test.specify "should be able to count" <|
grouped = table.group_by grouping [Count Nothing] grouped = table.aggregate [Count Nothing]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 1 grouped.columns.length . should_equal 1
grouped.columns.at 0 . name . should_equal "Count" grouped.columns.at 0 . name . should_equal "Count"
grouped.columns.at 0 . at 0 . should_equal 2500 grouped.columns.at 0 . at 0 . should_equal 2500
Test.specify "should be able to count missing values" <| Test.specify "should be able to count missing values" <|
grouped = table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"] grouped = table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 4 grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal" grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal"
@ -38,7 +38,7 @@ spec =
grouped.columns.at 3 . at 0 . should_equal 2251 grouped.columns.at 3 . at 0 . should_equal 2251
Test.specify "should be able to count distinct values" <| Test.specify "should be able to count distinct values" <|
grouped = table.group_by grouping [Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]] grouped = table.aggregate [Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 4 grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Count Distinct Code" grouped.columns.at 0 . name . should_equal "Count Distinct Code"
@ -51,7 +51,7 @@ spec =
grouped.columns.at 3 . at 0 . should_equal 20 grouped.columns.at 3 . at 0 . should_equal 20
Test.specify "should be able to sum, average and standard deviation of values" <| Test.specify "should be able to sum, average and standard deviation of values" <|
grouped = table.group_by grouping [Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)] grouped = table.aggregate [Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 8 grouped.columns.length . should_equal 8
grouped.columns.at 0 . name . should_equal "Sum Value" grouped.columns.at 0 . name . should_equal "Sum Value"
@ -72,7 +72,7 @@ spec =
grouped.columns.at 7 . at 0 . should_equal 58.575554 epsilon=0.000001 grouped.columns.at 7 . at 0 . should_equal 58.575554 epsilon=0.000001
Test.specify "should be able to create median, mode and percentile values" <| Test.specify "should be able to create median, mode and percentile values" <|
grouped = table.group_by grouping [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"] grouped = table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 6 grouped.columns.length . should_equal 6
grouped.columns.at 0 . name . should_equal "Median Index" grouped.columns.at 0 . name . should_equal "Median Index"
@ -89,7 +89,7 @@ spec =
grouped.columns.at 5 . at 0 . should_equal -17.960000 epsilon=0.000001 grouped.columns.at 5 . at 0 . should_equal -17.960000 epsilon=0.000001
Test.specify "should be able to get first and last values" <| Test.specify "should be able to get first and last values" <|
grouped = table.group_by grouping [First "Index", Last "Value"] grouped = table.aggregate [First "Index", Last "Value"]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 2 grouped.columns.length . should_equal 2
grouped.columns.at 0 . name . should_equal "First Index" grouped.columns.at 0 . name . should_equal "First Index"
@ -98,7 +98,7 @@ spec =
grouped.columns.at 1 . at 0 . should_equal 70.99931 epsilon=0.000001 grouped.columns.at 1 . at 0 . should_equal 70.99931 epsilon=0.000001
Test.specify "should be able to get minimum and maximum values" <| Test.specify "should be able to get minimum and maximum values" <|
grouped = table.group_by grouping [Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"] grouped = table.aggregate [Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 4 grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Minimum Value" grouped.columns.at 0 . name . should_equal "Minimum Value"
@ -111,7 +111,7 @@ spec =
grouped.columns.at 3 . at 0 . should_equal 99.95 epsilon=0.000001 grouped.columns.at 3 . at 0 . should_equal 99.95 epsilon=0.000001
Test.specify "should be able to get shortest, longest and concatenated values" <| Test.specify "should be able to get shortest, longest and concatenated values" <|
grouped = table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"] grouped = table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 3 grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Shortest TextWithNothing" grouped.columns.at 0 . name . should_equal "Shortest TextWithNothing"
@ -121,17 +121,16 @@ spec =
grouped.columns.at 2 . name . should_equal "Concatenate Code" grouped.columns.at 2 . name . should_equal "Concatenate Code"
grouped.columns.at 2 . at 0 . length . should_equal 7500 grouped.columns.at 2 . at 0 . length . should_equal 7500
Test.group "Table.group_by should summarize empty table " <| Test.group "Table.aggregate should summarize empty table " <|
grouping = Column_Selector.By_Index []
Test.specify "should be able to count" <| Test.specify "should be able to count" <|
grouped = empty_table.group_by grouping [Count Nothing] grouped = empty_table.aggregate [Count Nothing]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 1 grouped.columns.length . should_equal 1
grouped.columns.at 0 . name . should_equal "Count" grouped.columns.at 0 . name . should_equal "Count"
grouped.columns.at 0 . at 0 . should_equal 0 grouped.columns.at 0 . at 0 . should_equal 0
Test.specify "should be able to count missing values" <| Test.specify "should be able to count missing values" <|
grouped = empty_table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"] grouped = empty_table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 4 grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal" grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal"
@ -144,14 +143,14 @@ spec =
grouped.columns.at 3 . at 0 . should_equal 0 grouped.columns.at 3 . at 0 . should_equal 0
Test.specify "should be able to count distinct values" <| Test.specify "should be able to count distinct values" <|
grouped = empty_table.group_by grouping [Count_Distinct "Code"] grouped = empty_table.aggregate [Count_Distinct "Code"]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 1 grouped.columns.length . should_equal 1
grouped.columns.at 0 . name . should_equal "Count Distinct Code" grouped.columns.at 0 . name . should_equal "Count Distinct Code"
grouped.columns.at 0 . at 0 . should_equal 0 grouped.columns.at 0 . at 0 . should_equal 0
Test.specify "should be able to sum, average and standard deviation of values" <| Test.specify "should be able to sum, average and standard deviation of values" <|
grouped = empty_table.group_by grouping [Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)] grouped = empty_table.aggregate [Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 4 grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Sum Value" grouped.columns.at 0 . name . should_equal "Sum Value"
@ -164,7 +163,7 @@ spec =
grouped.columns.at 3 . at 0 . should_equal Nothing grouped.columns.at 3 . at 0 . should_equal Nothing
Test.specify "should be able to create median, mode and percentile values" <| Test.specify "should be able to create median, mode and percentile values" <|
grouped = empty_table.group_by grouping [Median "Index", Mode "Index", Percentile 0.25 "Value"] grouped = empty_table.aggregate [Median "Index", Mode "Index", Percentile 0.25 "Value"]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 3 grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Median Index" grouped.columns.at 0 . name . should_equal "Median Index"
@ -175,7 +174,7 @@ spec =
grouped.columns.at 2 . at 0 . should_equal Nothing grouped.columns.at 2 . at 0 . should_equal Nothing
Test.specify "should be able to get first and last values" <| Test.specify "should be able to get first and last values" <|
grouped = empty_table.group_by grouping [First "Index", Last "Value"] grouped = empty_table.aggregate [First "Index", Last "Value"]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 2 grouped.columns.length . should_equal 2
grouped.columns.at 0 . name . should_equal "First Index" grouped.columns.at 0 . name . should_equal "First Index"
@ -184,7 +183,7 @@ spec =
grouped.columns.at 1 . at 0 . should_equal Nothing grouped.columns.at 1 . at 0 . should_equal Nothing
Test.specify "should be able to get minimum and maximum values" <| Test.specify "should be able to get minimum and maximum values" <|
grouped = empty_table.group_by grouping [Minimum "Value", Maximum "ValueWithNothing"] grouped = empty_table.aggregate [Minimum "Value", Maximum "ValueWithNothing"]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 2 grouped.columns.length . should_equal 2
grouped.columns.at 0 . name . should_equal "Minimum Value" grouped.columns.at 0 . name . should_equal "Minimum Value"
@ -193,7 +192,7 @@ spec =
grouped.columns.at 1 . at 0 . should_equal Nothing grouped.columns.at 1 . at 0 . should_equal Nothing
Test.specify "should be able to get shortest, longest and concatenated values" <| Test.specify "should be able to get shortest, longest and concatenated values" <|
grouped = empty_table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"] grouped = empty_table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
grouped.row_count . should_equal 1 grouped.row_count . should_equal 1
grouped.columns.length . should_equal 3 grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Shortest TextWithNothing" grouped.columns.at 0 . name . should_equal "Shortest TextWithNothing"
@ -203,17 +202,16 @@ spec =
grouped.columns.at 2 . name . should_equal "Concatenate Code" grouped.columns.at 2 . name . should_equal "Concatenate Code"
grouped.columns.at 2 . at 0 . should_equal Nothing grouped.columns.at 2 . at 0 . should_equal Nothing
Test.group "Table.group_by should not summarize empty table when grouped " <| Test.group "Table.aggregate should not summarize empty table when grouped " <|
grouping = Column_Selector.By_Index [0]
Test.specify "should be able to count" <| Test.specify "should be able to count" <|
grouped = empty_table.group_by grouping [Count Nothing] grouped = empty_table.aggregate [Group_By 0, Count Nothing]
grouped.row_count . should_equal 0 grouped.row_count . should_equal 0
grouped.columns.length . should_equal 2 grouped.columns.length . should_equal 2
grouped.columns.at 0 . name . should_equal "Code" grouped.columns.at 0 . name . should_equal "Code"
grouped.columns.at 1 . name . should_equal "Count" grouped.columns.at 1 . name . should_equal "Count"
Test.specify "should be able to count missing values" <| Test.specify "should be able to count missing values" <|
grouped = empty_table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"] grouped = empty_table.aggregate [Group_By 0, Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
grouped.row_count . should_equal 0 grouped.row_count . should_equal 0
grouped.columns.length . should_equal 5 grouped.columns.length . should_equal 5
grouped.columns.at 0 . name . should_equal "Code" grouped.columns.at 0 . name . should_equal "Code"
@ -223,14 +221,14 @@ spec =
grouped.columns.at 4 . name . should_equal "Count Not Empty TextWithNothing" grouped.columns.at 4 . name . should_equal "Count Not Empty TextWithNothing"
Test.specify "should be able to count distinct values" <| Test.specify "should be able to count distinct values" <|
grouped = empty_table.group_by grouping [Count_Distinct "Code"] grouped = empty_table.aggregate [Group_By 0, Count_Distinct "Code"]
grouped.row_count . should_equal 0 grouped.row_count . should_equal 0
grouped.columns.length . should_equal 2 grouped.columns.length . should_equal 2
grouped.columns.at 0 . name . should_equal "Code" grouped.columns.at 0 . name . should_equal "Code"
grouped.columns.at 1 . name . should_equal "Count Distinct Code" grouped.columns.at 1 . name . should_equal "Count Distinct Code"
Test.specify "should be able to sum, average and standard deviation of values" <| Test.specify "should be able to sum, average and standard deviation of values" <|
grouped = empty_table.group_by grouping [Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)] grouped = empty_table.aggregate [Group_By 0, Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
grouped.row_count . should_equal 0 grouped.row_count . should_equal 0
grouped.columns.length . should_equal 5 grouped.columns.length . should_equal 5
grouped.columns.at 0 . name . should_equal "Code" grouped.columns.at 0 . name . should_equal "Code"
@ -240,7 +238,7 @@ spec =
grouped.columns.at 4 . name . should_equal "Standard Deviation ValueWithNothing" grouped.columns.at 4 . name . should_equal "Standard Deviation ValueWithNothing"
Test.specify "should be able to create median values" <| Test.specify "should be able to create median values" <|
grouped = empty_table.group_by grouping [Median "Index", Mode "Index", Percentile 0.25 "Value"] grouped = empty_table.aggregate [Group_By 0, Median "Index", Mode "Index", Percentile 0.25 "Value"]
grouped.row_count . should_equal 0 grouped.row_count . should_equal 0
grouped.columns.length . should_equal 4 grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Code" grouped.columns.at 0 . name . should_equal "Code"
@ -249,7 +247,7 @@ spec =
grouped.columns.at 3 . name . should_equal "25%-ile Value" grouped.columns.at 3 . name . should_equal "25%-ile Value"
Test.specify "should be able to get first and last values" <| Test.specify "should be able to get first and last values" <|
grouped = empty_table.group_by grouping [First "Index", Last "Value"] grouped = empty_table.aggregate [Group_By 0, First "Index", Last "Value"]
grouped.row_count . should_equal 0 grouped.row_count . should_equal 0
grouped.columns.length . should_equal 3 grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Code" grouped.columns.at 0 . name . should_equal "Code"
@ -257,7 +255,7 @@ spec =
grouped.columns.at 2 . name . should_equal "Last Value" grouped.columns.at 2 . name . should_equal "Last Value"
Test.specify "should be able to get minimum and maximum values" <| Test.specify "should be able to get minimum and maximum values" <|
grouped = empty_table.group_by grouping [Minimum "Value", Maximum "ValueWithNothing"] grouped = empty_table.aggregate [Group_By 0, Minimum "Value", Maximum "ValueWithNothing"]
grouped.row_count . should_equal 0 grouped.row_count . should_equal 0
grouped.columns.length . should_equal 3 grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Code" grouped.columns.at 0 . name . should_equal "Code"
@ -265,7 +263,7 @@ spec =
grouped.columns.at 2 . name . should_equal "Maximum ValueWithNothing" grouped.columns.at 2 . name . should_equal "Maximum ValueWithNothing"
Test.specify "should be able to get shortest, longest and concatenated values" <| Test.specify "should be able to get shortest, longest and concatenated values" <|
grouped = empty_table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"] grouped = empty_table.aggregate [Group_By 0, Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
grouped.row_count . should_equal 0 grouped.row_count . should_equal 0
grouped.columns.length . should_equal 4 grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Code" grouped.columns.at 0 . name . should_equal "Code"
@ -273,10 +271,9 @@ spec =
grouped.columns.at 2 . name . should_equal "Longest TextWithNothing" grouped.columns.at 2 . name . should_equal "Longest TextWithNothing"
grouped.columns.at 3 . name . should_equal "Concatenate Code" grouped.columns.at 3 . name . should_equal "Concatenate Code"
Test.group "Table.group_by should be able to group on single field " <| Test.group "Table.aggregate should be able to group on single field " <|
grouping = Column_Selector.By_name.new ["Index"]
Test.specify "should be able to count" <| Test.specify "should be able to count" <|
grouped = table.group_by grouping [Count Nothing] grouped = table.aggregate [Group_By "Index", Count Nothing]
grouped.row_count . should_equal 10 grouped.row_count . should_equal 10
grouped.columns.length . should_equal 2 grouped.columns.length . should_equal 2
grouped.columns.at 0 . name . should_equal "Index" grouped.columns.at 0 . name . should_equal "Index"
@ -286,7 +283,7 @@ spec =
grouped.columns.at 1 . at idx . should_equal 261 grouped.columns.at 1 . at idx . should_equal 261
Test.specify "should be able to count missing values" <| Test.specify "should be able to count missing values" <|
grouped = table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"] grouped = table.aggregate [Group_By "Index", Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
grouped.row_count . should_equal 10 grouped.row_count . should_equal 10
grouped.columns.length . should_equal 5 grouped.columns.length . should_equal 5
grouped.columns.at 0 . name . should_equal "Index" grouped.columns.at 0 . name . should_equal "Index"
@ -302,7 +299,7 @@ spec =
grouped.columns.at 4 . at idx . should_equal 230 grouped.columns.at 4 . at idx . should_equal 230
Test.specify "should be able to count distinct values" <| Test.specify "should be able to count distinct values" <|
grouped = table.group_by grouping [Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]] grouped = table.aggregate [Group_By "Index", Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]]
grouped.row_count . should_equal 10 grouped.row_count . should_equal 10
grouped.columns.length . should_equal 5 grouped.columns.length . should_equal 5
grouped.columns.at 0 . name . should_equal "Index" grouped.columns.at 0 . name . should_equal "Index"
@ -318,7 +315,7 @@ spec =
grouped.columns.at 4 . at idx . should_equal 2 grouped.columns.at 4 . at idx . should_equal 2
Test.specify "should be able to sum, average and standard deviation of values" <| Test.specify "should be able to sum, average and standard deviation of values" <|
grouped = table.group_by grouping [Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)] grouped = table.aggregate [Group_By "Index", Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
grouped.row_count . should_equal 10 grouped.row_count . should_equal 10
grouped.columns.length . should_equal 9 grouped.columns.length . should_equal 9
grouped.columns.at 0 . name . should_equal "Index" grouped.columns.at 0 . name . should_equal "Index"
@ -342,7 +339,7 @@ spec =
grouped.columns.at 8 . at idx . should_equal 56.677714 epsilon=0.000001 grouped.columns.at 8 . at idx . should_equal 56.677714 epsilon=0.000001
Test.specify "should be able to create median values" <| Test.specify "should be able to create median values" <|
grouped = table.group_by grouping [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"] grouped = table.aggregate [Group_By "Index", Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
grouped.row_count . should_equal 10 grouped.row_count . should_equal 10
grouped.columns.length . should_equal 7 grouped.columns.length . should_equal 7
grouped.columns.at 0 . name . should_equal "Index" grouped.columns.at 0 . name . should_equal "Index"
@ -362,7 +359,7 @@ spec =
grouped.columns.at 6 . at idx . should_equal -18.802000 epsilon=0.000001 grouped.columns.at 6 . at idx . should_equal -18.802000 epsilon=0.000001
Test.specify "should be able to get first and last values" <| Test.specify "should be able to get first and last values" <|
grouped = table.group_by grouping [First "TextWithNothing", Last "Value"] grouped = table.aggregate [Group_By "Index", First "TextWithNothing", Last "Value"]
grouped.row_count . should_equal 10 grouped.row_count . should_equal 10
grouped.columns.length . should_equal 3 grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Index" grouped.columns.at 0 . name . should_equal "Index"
@ -374,7 +371,7 @@ spec =
grouped.columns.at 2 . at idx . should_equal 56.15916 epsilon=0.000001 grouped.columns.at 2 . at idx . should_equal 56.15916 epsilon=0.000001
Test.specify "should be able to get minimum and maximum values" <| Test.specify "should be able to get minimum and maximum values" <|
grouped = table.group_by grouping [Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"] grouped = table.aggregate [Group_By "Index", Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
grouped.row_count . should_equal 10 grouped.row_count . should_equal 10
grouped.columns.length . should_equal 5 grouped.columns.length . should_equal 5
grouped.columns.at 0 . name . should_equal "Index" grouped.columns.at 0 . name . should_equal "Index"
@ -390,7 +387,7 @@ spec =
grouped.columns.at 4 . at idx . should_equal 99.79 epsilon=0.000001 grouped.columns.at 4 . at idx . should_equal 99.79 epsilon=0.000001
Test.specify "should be able to get shortest, longest and concatenated values" <| Test.specify "should be able to get shortest, longest and concatenated values" <|
grouped = table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"] grouped = table.aggregate [Group_By "Index", Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
grouped.row_count . should_equal 10 grouped.row_count . should_equal 10
grouped.columns.length . should_equal 4 grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Index" grouped.columns.at 0 . name . should_equal "Index"
@ -403,4 +400,141 @@ spec =
grouped.columns.at 3 . name . should_equal "Concatenate Code" grouped.columns.at 3 . name . should_equal "Concatenate Code"
grouped.columns.at 3 . at idx . length . should_equal 783 grouped.columns.at 3 . at idx . length . should_equal 783
Test.group "Table.aggregate should be able to group on multiple fields not in left columns" <|
Test.specify "should be able to count" <|
grouped = table.aggregate [Group_By "Flag", Count Nothing, Group_By "Index"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 3
grouped.columns.at 0 . name . should_equal "Flag"
grouped.columns.at 2 . name . should_equal "Index"
idx = find_row ["False", 6] grouped [0, 2]
idx.is_nothing . should_be_false
grouped.columns.at 1 . name . should_equal "Count"
grouped.columns.at 1 . at idx . should_equal 127
Test.specify "should be able to count missing values" <|
grouped = table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Group_By "Index", Count_Empty "TextWithNothing", Group_By "Flag", Count_Not_Empty "TextWithNothing"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 6
grouped.columns.at 4 . name . should_equal "Flag"
grouped.columns.at 2 . name . should_equal "Index"
idx = find_row ["False", 6] grouped [4, 2]
idx.is_nothing . should_be_false
grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal"
grouped.columns.at 0 . at idx . should_equal 8
grouped.columns.at 1 . name . should_equal "Count Not Nothing Hexadecimal"
grouped.columns.at 1 . at idx . should_equal 119
grouped.columns.at 3 . name . should_equal "Count Empty TextWithNothing"
grouped.columns.at 3 . at idx . should_equal 12
grouped.columns.at 5 . name . should_equal "Count Not Empty TextWithNothing"
grouped.columns.at 5 . at idx . should_equal 115
Test.specify "should be able to count distinct values" <|
grouped = table.aggregate [Group_By "Index", Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"], Group_By "Flag"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 6
grouped.columns.at 0 . name . should_equal "Index"
grouped.columns.at 5 . name . should_equal "Flag"
idx = find_row ["False", 6] grouped [5, 0]
idx.is_nothing . should_be_false
grouped.columns.at 1 . name . should_equal "Count Distinct Code"
grouped.columns.at 1 . at idx . should_equal 127
grouped.columns.at 2 . name . should_equal "Count Distinct Index"
grouped.columns.at 2 . at idx . should_equal 1
grouped.columns.at 3 . name . should_equal "Count Distinct Flag"
grouped.columns.at 3 . at idx . should_equal 1
grouped.columns.at 4 . name . should_equal "Count Distinct Index Flag"
grouped.columns.at 4 . at idx . should_equal 1
Test.specify "should be able to sum, average and standard deviation of values" <|
grouped = table.aggregate [Group_By "Index", Group_By "Flag", Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 10
grouped.columns.at 0 . name . should_equal "Index"
grouped.columns.at 1 . name . should_equal "Flag"
idx = find_row ["False", 6] grouped [1, 0]
idx.is_nothing . should_be_false
grouped.columns.at 2 . name . should_equal "Sum Value"
grouped.columns.at 2 . at idx . should_equal -103.050170 epsilon=0.000001
grouped.columns.at 3 . name . should_equal "Sum ValueWithNothing"
grouped.columns.at 3 . at idx . should_equal 533.57 epsilon=0.000001
grouped.columns.at 4 . name . should_equal "Average Value"
grouped.columns.at 4 . at idx . should_equal -0.811419 epsilon=0.000001
grouped.columns.at 5 . name . should_equal "Average ValueWithNothing"
grouped.columns.at 5 . at idx . should_equal 4.721858 epsilon=0.000001
grouped.columns.at 6 . name . should_equal "Standard Deviation Value"
grouped.columns.at 6 . at idx . should_equal 58.979275 epsilon=0.000001
grouped.columns.at 7 . name . should_equal "Standard Deviation ValueWithNothing"
grouped.columns.at 7 . at idx . should_equal 57.561756 epsilon=0.000001
grouped.columns.at 8 . name . should_equal "Standard Deviation Value_1"
grouped.columns.at 8 . at idx . should_equal 58.746614 epsilon=0.000001
grouped.columns.at 9 . name . should_equal "Standard Deviation ValueWithNothing_1"
grouped.columns.at 9 . at idx . should_equal 57.306492 epsilon=0.000001
Test.specify "should be able to create median values" <|
grouped = table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Group_By "Index", Group_By "Flag", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 8
grouped.columns.at 5 . name . should_equal "Flag"
grouped.columns.at 4 . name . should_equal "Index"
idx = find_row ["False", 6] grouped [5, 4]
idx.is_nothing . should_be_false
grouped.columns.at 0 . name . should_equal "Median Index"
grouped.columns.at 0 . at idx . should_equal 6 epsilon=0.000001
grouped.columns.at 1 . name . should_equal "Median Value"
grouped.columns.at 1 . at idx . should_equal 2.041150 epsilon=0.000001
grouped.columns.at 2 . name . should_equal "Median ValueWithNothing"
grouped.columns.at 2 . at idx . should_equal 3.55 epsilon=0.000001
grouped.columns.at 3 . name . should_equal "Mode Index"
grouped.columns.at 3 . at idx . should_equal 6
grouped.columns.at 6 . name . should_equal "25%-ile Value"
grouped.columns.at 6 . at idx . should_equal -52.628925 epsilon=0.000001
grouped.columns.at 7 . name . should_equal "40%-ile ValueWithNothing"
grouped.columns.at 7 . at idx . should_equal -17.174000 epsilon=0.000001
Test.specify "should be able to get first and last values" <|
grouped = table.aggregate [Group_By "Flag", First "TextWithNothing", Last "Value", Group_By "Index"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 4
grouped.columns.at 0 . name . should_equal "Flag"
grouped.columns.at 3 . name . should_equal "Index"
idx = find_row ["False", 6] grouped [0, 3]
idx.is_nothing . should_be_false
grouped.columns.at 1 . name . should_equal "First TextWithNothing"
grouped.columns.at 1 . at idx . should_equal "kmqxqkl6qx"
grouped.columns.at 2 . name . should_equal "Last Value"
grouped.columns.at 2 . at idx . should_equal 56.15916 epsilon=0.000001
Test.specify "should be able to get minimum and maximum values" <|
grouped = table.aggregate [Group_By "Index", Minimum "Value", Maximum "Value", Group_By "Flag", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 6
grouped.columns.at 3 . name . should_equal "Flag"
grouped.columns.at 0 . name . should_equal "Index"
idx = find_row ["False", 6] grouped [3, 0]
idx.is_nothing . should_be_false
grouped.columns.at 1 . name . should_equal "Minimum Value"
grouped.columns.at 1 . at idx . should_equal -99.605880 epsilon=0.000001
grouped.columns.at 2 . name . should_equal "Maximum Value"
grouped.columns.at 2 . at idx . should_equal 96.488390 epsilon=0.000001
grouped.columns.at 4 . name . should_equal "Minimum ValueWithNothing"
grouped.columns.at 4 . at idx . should_equal -99.99 epsilon=0.000001
grouped.columns.at 5 . name . should_equal "Maximum ValueWithNothing"
grouped.columns.at 5 . at idx . should_equal 97.17 epsilon=0.000001
Test.specify "should be able to get shortest, longest and concatenated values" <|
grouped = table.aggregate [Group_By "Index", Group_By "Flag", Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
grouped.row_count . should_equal 20
grouped.columns.length . should_equal 5
grouped.columns.at 0 . name . should_equal "Index"
grouped.columns.at 1 . name . should_equal "Flag"
idx = find_row [6, "False"] grouped
idx.is_nothing . should_be_false
grouped.columns.at 2 . name . should_equal "Shortest TextWithNothing"
grouped.columns.at 2 . at idx . should_equal "kmqxqkl6qx"
grouped.columns.at 3 . name . should_equal "Longest TextWithNothing"
grouped.columns.at 3 . at idx . should_equal "kmqxqkl6qx"
grouped.columns.at 4 . name . should_equal "Concatenate Code"
grouped.columns.at 4 . at idx . length . should_equal 381
main = Test.Suite.run_main here.spec main = Test.Suite.run_main here.spec

View File

@ -9,6 +9,8 @@ import project.Csv_Spec
import project.Json_Spec import project.Json_Spec
import project.Table_Spec import project.Table_Spec
import project.Spreadsheet_Spec import project.Spreadsheet_Spec
import project.Aggregate_Column_Spec
import project.Aggregate_Spec
main = Test.Suite.run_main <| main = Test.Suite.run_main <|
Column_Spec.spec Column_Spec.spec
@ -18,3 +20,5 @@ main = Test.Suite.run_main <|
Table_Spec.spec Table_Spec.spec
Database_Spec.sqlite_spec Database_Spec.sqlite_spec
Model_Spec.spec Model_Spec.spec
Aggregate_Column_Spec.spec
Aggregate_Spec.spec