mirror of
https://github.com/enso-org/enso.git
synced 2024-11-23 08:08:34 +03:00
Refactor table.group_by to table.aggregate (#3339)
Following UX work move to `table.aggregate` function.
This commit is contained in:
parent
dedd1eac96
commit
6c1c4554f5
@ -64,6 +64,7 @@
|
||||
- [Implemented `Map.first`, `Map.last` functions. Expanded `Table.group_by` to
|
||||
also compute mode, percentile, minimum, maximum.][3318]
|
||||
- [Implemented `Text.location_of` and `Text.location_of_all` methods.][3324]
|
||||
- [Replaced `Table.group_by` with `Table.aggregate`][3339]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -103,6 +104,7 @@
|
||||
[3317]: https://github.com/enso-org/enso/pull/3317
|
||||
[3318]: https://github.com/enso-org/enso/pull/3318
|
||||
[3324]: https://github.com/enso-org/enso/pull/3324
|
||||
[3339]: https://github.com/enso-org/enso/pull/3339
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
@ -1,15 +1,18 @@
|
||||
from Standard.Base import all
|
||||
|
||||
from Standard.Table.Data.Column as Column_Module import Column
|
||||
import Standard.Table.Data.Group_By
|
||||
import Standard.Table.Data.Group_By_Key
|
||||
|
||||
## Defines an Aggregate Column
|
||||
type Aggregate_Column
|
||||
## Group By
|
||||
type Group_By (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
|
||||
|
||||
## Creates a new column with the row count of each group
|
||||
|
||||
Arguments:
|
||||
- name: name of new column.
|
||||
type Count (name:Text|Nothing=Nothing)
|
||||
type Count (new_name:Text|Nothing=Nothing)
|
||||
|
||||
## Creates a new column with the count of unique items in the selected
|
||||
column(s) within each group.
|
||||
@ -18,7 +21,7 @@ type Aggregate_Column
|
||||
- columns: either a single or set of columns (specified by name, index or Column object) to count across.
|
||||
- name: name of new column.
|
||||
- ignore_nothing: if all values are Nothing won't be included.
|
||||
type Count_Distinct (columns:Column|Text|Integer|[(Column|Text|Integer)]) (name:Text|Nothing=Nothing) (ignore_nothing:Boolean=False)
|
||||
type Count_Distinct (columns:Column|Text|Integer|[(Column|Text|Integer)]) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=False)
|
||||
|
||||
## ALIAS Count_Not_Null
|
||||
|
||||
@ -28,7 +31,7 @@ type Aggregate_Column
|
||||
Arguments:
|
||||
- columns: column (specified by name, index or Column object) to count.
|
||||
- name: name of new column.
|
||||
type Count_Not_Nothing (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||
type Count_Not_Nothing (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
|
||||
|
||||
## ALIAS Count_Null, Count_Missing
|
||||
|
||||
@ -38,7 +41,7 @@ type Aggregate_Column
|
||||
Arguments:
|
||||
- column: column (specified by name, index or Column object) to count.
|
||||
- name: name of new column.
|
||||
type Count_Nothing (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||
type Count_Nothing (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
|
||||
|
||||
## Creates a new column with the count of not `Nothing` (null) and non-empty
|
||||
("") values of the column within each group.
|
||||
@ -46,7 +49,7 @@ type Aggregate_Column
|
||||
Arguments:
|
||||
- column: column (specified by name, index or Column object) to count.
|
||||
- name: name of new column.
|
||||
type Count_Not_Empty (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||
type Count_Not_Empty (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
|
||||
|
||||
## Creates a new column with the count of `Nothing` (null) or empty ("")
|
||||
text values of the column within each group.
|
||||
@ -54,7 +57,7 @@ type Aggregate_Column
|
||||
Arguments:
|
||||
- column: column (specified by name, index or Column object) to count.
|
||||
- name: name of new column.
|
||||
type Count_Empty (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||
type Count_Empty (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
|
||||
|
||||
## Creates a new column with the sum of values (ignoring missing values) of
|
||||
the specified column within each group.
|
||||
@ -62,7 +65,7 @@ type Aggregate_Column
|
||||
Arguments:
|
||||
- column: column (specified by name, index or Column object) to total.
|
||||
- name: name of new column.
|
||||
type Sum (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||
type Sum (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
|
||||
|
||||
## Creates a new column with the mean of values (ignoring missing values) of
|
||||
the specified column within each group.
|
||||
@ -70,7 +73,7 @@ type Aggregate_Column
|
||||
Arguments:
|
||||
- column: column (specified by name, index or Column object) to average.
|
||||
- name: name of new column.
|
||||
type Average (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||
type Average (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
|
||||
|
||||
## Creates a new column with the median of values (ignoring missing values)
|
||||
of the specified column within each group.
|
||||
@ -78,7 +81,7 @@ type Aggregate_Column
|
||||
Arguments:
|
||||
- column: column (specified by name, index or Column object) to calculate median on.
|
||||
- name: name of new column.
|
||||
type Median (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||
type Median (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
|
||||
|
||||
## Creates a new column with the median of values (ignoring missing values)
|
||||
of the specified column within each group.
|
||||
@ -87,7 +90,7 @@ type Aggregate_Column
|
||||
- percentile: Percentage to compute from 0-1 inclusive.
|
||||
- column: column (specified by name, index or Column object) to compute percentile.
|
||||
- name: name of new column.
|
||||
type Percentile (percentile:Decimal) (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||
type Percentile (percentile:Decimal) (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
|
||||
|
||||
## Creates a new column with the mode of values (ignoring missing values)
|
||||
of the specified column within each group.
|
||||
@ -95,7 +98,7 @@ type Aggregate_Column
|
||||
Arguments:
|
||||
- column: column (specified by name, index or Column object) to find the most common value.
|
||||
- name: name of new column.
|
||||
type Mode (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||
type Mode (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
|
||||
|
||||
## Creates a new column with the standard deviation of values (ignoring
|
||||
missing values) of the column within each group.
|
||||
@ -104,7 +107,7 @@ type Aggregate_Column
|
||||
- column: column (specified by name, index or Column object) to compute standard deviation.
|
||||
- name: name of new column.
|
||||
- population argument specifies if group is a sample or the population
|
||||
type Standard_Deviation (column:Column|Text|Integer) (name:Text|Nothing=Nothing) (population:Boolean=False)
|
||||
type Standard_Deviation (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) (population:Boolean=False)
|
||||
|
||||
## Creates a new column with the values concatenated together. `Nothing` values will become an empty string.
|
||||
|
||||
@ -116,7 +119,7 @@ type Aggregate_Column
|
||||
- suffix: added at the end of the result.
|
||||
- quote_char: character used to quote the values if the value is `Empty`
|
||||
or contains the separtor.
|
||||
type Concatenate (column:Column|Text|Integer) (name:Text|Nothing=Nothing) (separator:Text="") (prefix:Text="") (suffix:Text="") (quote_char:Text="")
|
||||
type Concatenate (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) (separator:Text="") (prefix:Text="") (suffix:Text="") (quote_char:Text="")
|
||||
|
||||
## Creates a new column with the first value in each group.
|
||||
|
||||
@ -127,7 +130,7 @@ type Aggregate_Column
|
||||
not missing value returned.
|
||||
- order_by: required for database tables. Specifies how to order the
|
||||
results within the group.
|
||||
type First (column:Column|Text|Integer) (name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Column_Selector|Nothing=Nothing)
|
||||
type First (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Column_Selector|Nothing=Nothing)
|
||||
|
||||
## Creates a new column with the last value in each group.
|
||||
|
||||
@ -138,42 +141,43 @@ type Aggregate_Column
|
||||
not missing value returned.
|
||||
- order_by: required for database tables. Specifies how to order the
|
||||
results within the group.
|
||||
type Last (column:Column|Text|Integer) (name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Column_Selector|Nothing=Nothing)
|
||||
type Last (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Column_Selector|Nothing=Nothing)
|
||||
|
||||
## Creates a new column with the maximum value in each group.
|
||||
|
||||
Arguments:
|
||||
- column: column (specified by name, index or Column object) to find maximum.
|
||||
- name: name of new column.
|
||||
type Maximum (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||
type Maximum (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
|
||||
|
||||
## Creates a new column with the maximum value in each group.
|
||||
|
||||
Arguments:
|
||||
- column: column (specified by name, index or Column object) to find minimum.
|
||||
- name: name of new column.
|
||||
type Minimum (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||
type Minimum (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
|
||||
|
||||
## Creates a new column with the shortest text in each group.
|
||||
|
||||
Arguments:
|
||||
- column: column (specified by name, index or Column object) to find shortest value.
|
||||
- name: name of new column.
|
||||
type Shortest (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||
type Shortest (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
|
||||
|
||||
## Creates a new column with the longest text in each group.
|
||||
|
||||
Arguments:
|
||||
- column: column (specified by name, index or Column object) to find longest value.
|
||||
- name: name of new column.
|
||||
type Longest (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||
type Longest (column:Column|Text|Integer) (new_name:Text|Nothing=Nothing)
|
||||
|
||||
## Gets a column name to use for the aggregate column
|
||||
column_name : Table->Text
|
||||
column_name table =
|
||||
if this.name.is_nothing.not then this.name else
|
||||
if this.new_name.is_nothing.not then this.new_name else
|
||||
get_name c = (this.resolve_column table c).name
|
||||
case this of
|
||||
Group_By c _ -> (get_name c)
|
||||
Count _ -> "Count"
|
||||
Count_Distinct c _ _ ->
|
||||
case c of
|
||||
@ -208,6 +212,7 @@ type Aggregate_Column
|
||||
|
||||
initial_value : Any
|
||||
initial_value = case this of
|
||||
Group_By _ _ -> Nothing
|
||||
Count_Distinct _ _ _ -> Map.empty
|
||||
Median _ _ -> Map.empty
|
||||
Percentile _ _ _ -> Map.empty
|
||||
@ -235,6 +240,7 @@ type Aggregate_Column
|
||||
_ -> Error.throw (Invalid_Aggregation_Method this.col "Empty is only valid for Text")
|
||||
|
||||
case this of
|
||||
Group_By c _ -> create_closure c col->_->i->(col.at i)
|
||||
Count _ -> count->_->(count+1)
|
||||
Count_Not_Nothing c _ -> create_closure c col->count->i->(count + if (col.at i).is_nothing then 0 else 1)
|
||||
Count_Nothing c _ -> create_closure c col->count->i->(count + if (col.at i).is_nothing then 1 else 0)
|
||||
@ -294,7 +300,7 @@ type Aggregate_Column
|
||||
resolved = case columns of
|
||||
Vector.Vector _ -> columns.map c->(this.resolve_column table c)
|
||||
_ -> [this.resolve_column table columns]
|
||||
key_maker i = Group_By.key (resolved.map c->(c.at i))
|
||||
key_maker i = Group_By_Key.key (resolved.map c->(c.at i))
|
||||
case ignore_nothing of
|
||||
False-> map->i->(map.insert (key_maker i) 1)
|
||||
True-> map->i->
|
||||
|
@ -17,7 +17,7 @@ from Standard.Base.Error.Problem_Behavior as Problem_Behavior_Module import Prob
|
||||
import Standard.Table.Data.Column_Mapping
|
||||
import Standard.Table.Data.Position
|
||||
|
||||
import Standard.Table.Data.Group_By
|
||||
import Standard.Table.Data.Group_By_Key
|
||||
import Standard.Table.Data.Aggregate_Column
|
||||
|
||||
polyglot java import org.enso.table.data.table.Table as Java_Table
|
||||
@ -487,49 +487,46 @@ type Table
|
||||
|
||||
|
||||
## Prototype Group By function
|
||||
group_by : Column_Selector -> [Aggregate_Column] -> Problem_Behavior -> Table
|
||||
group_by selector columns (on_problems=Report_Warning) =
|
||||
aggregate : [Aggregate_Column] -> Problem_Behavior -> Table
|
||||
aggregate columns (on_problems=Report_Warning) =
|
||||
# Grouping Key
|
||||
key_columns = if selector.is_nothing then [] else
|
||||
Table_Helpers.select_columns_helper internal_columns=this.columns selector=selector reorder=True on_problems=on_problems
|
||||
key_length = key_columns.length
|
||||
make_key = if (key_length == 0) then _->(Group_By.key [1]) else i->(Group_By.key (key_columns.map v->(v.at i)))
|
||||
is_a_key c = case c of
|
||||
Aggregate_Column.Group_By _ _ -> True
|
||||
_ -> False
|
||||
key_columns = columns.filter is_a_key . map c->(c.resolve_column this c.column)
|
||||
make_key = if (key_columns.length == 0) then _->(Group_By_Key.key [1]) else i->(Group_By_Key.key (key_columns.map v->(v.at i)))
|
||||
|
||||
# New Table Accumulator
|
||||
name_strategy = Unique_Name_Strategy.new
|
||||
new_table = (key_columns.map c->c.name)+(columns.map c->(c.column_name this)) . map n->[name_strategy.make_unique n, Vector.new_builder]
|
||||
add_row key =
|
||||
new_table = columns.map c->(c.column_name this) . map n->[name_strategy.make_unique n, Vector.new_builder]
|
||||
add_row _ =
|
||||
idx = new_table.at 0 . at 1 . length
|
||||
0.up_to key_length . each i->
|
||||
new_table.at i . at 1 . append (key.values.at i).value
|
||||
0.up_to (columns.length) . each i->
|
||||
column = columns.at i
|
||||
new_table.at (i + key_length) . at 1 . append (column.initial_value)
|
||||
new_table.at i . at 1 . append ((columns.at i).initial_value)
|
||||
idx
|
||||
|
||||
# Fold
|
||||
aggregators = columns.map c->(c.make_aggregator this)
|
||||
aggregate map i =
|
||||
key = make_key i
|
||||
row_index = map.get_or_else key (add_row key)
|
||||
row_index = map.get_or_else key (add_row Nothing)
|
||||
|
||||
# Accumulate
|
||||
0.up_to (columns.length) . each j->
|
||||
aggregator = aggregators.at j
|
||||
array = new_table.at (j + key_length) . at 1 . to_array
|
||||
array = new_table.at j . at 1 . to_array
|
||||
current = array . at row_index
|
||||
new = aggregator current i
|
||||
array . set_at row_index new
|
||||
|
||||
map.insert key row_index
|
||||
if ((key_length == 0) && (this.row_count == 0)) then (add_row []) else
|
||||
if ((key_columns.length == 0) && (this.row_count == 0)) then (add_row Nothing) else
|
||||
0.up_to this.row_count . fold Map.empty aggregate
|
||||
|
||||
# Now Finalise and make a table
|
||||
finalise builder index =
|
||||
if index < key_length then builder.to_vector else
|
||||
column = columns.at (index - key_length)
|
||||
Vector.new builder.length i->(column.evaluate (builder.to_array.at i))
|
||||
column = columns.at index
|
||||
Vector.new builder.length i->(column.evaluate (builder.to_array.at i))
|
||||
here.new (new_table.map_with_index i->c->[c.at 0,finalise (c.at 1) i])
|
||||
|
||||
|
||||
|
49
test/Benchmarks/src/Table/Aggregate.enso
Normal file
49
test/Benchmarks/src/Table/Aggregate.enso
Normal file
@ -0,0 +1,49 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Test.Bench
|
||||
import Standard.Test.Faker
|
||||
|
||||
import Standard.Table.Data.Table
|
||||
import Standard.Table.Data.Column_Selector
|
||||
from Standard.Table.Data.Aggregate_Column import all
|
||||
|
||||
## Bench Utilities ============================================================
|
||||
|
||||
vector_size = 2500
|
||||
iter_size = 100
|
||||
num_iterations = 10
|
||||
|
||||
create_table : Integer->Integer->Table
|
||||
create_table rows (seed=1646322139) =
|
||||
faker = Faker.new seed
|
||||
key1 = ["Code", 0.up_to rows . map _-> faker.alpha 3]
|
||||
key2 = ["Index", 0.up_to rows . map _-> faker.integer 0 10]
|
||||
key3 = ["Flag", 0.up_to rows . map _-> faker.boolean]
|
||||
value1 = ["Value", 0.up_to rows . map _-> ((faker.decimal -100 100)*100000).floor/100000]
|
||||
value2 = ["ValueWithNothing", 0.up_to rows . map _-> faker.make_some_nothing ((faker.decimal -100 100)*100).floor/100]
|
||||
text1 = ["TextWithNothing", 0.up_to rows . map _-> faker.make_some_nothing (faker.alpha_numeric 10)]
|
||||
text2 = ["Hexadecimal", 0.up_to rows . map _-> faker.make_some_nothing (faker.hexadecimal 8)]
|
||||
Table.new [key1, key2, key3, value1, value2, text1, text2]
|
||||
|
||||
# The Benchmarks ==============================================================
|
||||
main =
|
||||
IO.println <| "Making table data..."
|
||||
table = here.create_table here.vector_size
|
||||
|
||||
Bench.measure (table.aggregate [Count Nothing]) "Count table" here.iter_size here.num_iterations
|
||||
Bench.measure (table.aggregate [Count_Distinct "Index"]) "Count Distinct table" here.iter_size here.num_iterations
|
||||
Bench.measure (table.aggregate [Standard_Deviation "Value"]) "StDev table" here.iter_size here.num_iterations
|
||||
Bench.measure (table.aggregate [Median "Value"]) "Median table" here.iter_size here.num_iterations
|
||||
Bench.measure (table.aggregate [Mode "Index"]) "Mode table" here.iter_size here.num_iterations
|
||||
|
||||
Bench.measure (table.aggregate [Group_By "Index", Count Nothing]) "Count grouped" here.iter_size here.num_iterations
|
||||
Bench.measure (table.aggregate [Group_By "Index", Count_Distinct "Code"]) "Count Distinct grouped" here.iter_size here.num_iterations
|
||||
Bench.measure (table.aggregate [Group_By "Index", Standard_Deviation "Value"]) "StDev grouped" here.iter_size here.num_iterations
|
||||
Bench.measure (table.aggregate [Group_By "Index", Median "Value"]) "Median grouped" here.iter_size here.num_iterations
|
||||
Bench.measure (table.aggregate [Group_By "Index", Mode "Index"]) "Mode grouped" here.iter_size here.num_iterations
|
||||
|
||||
Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Count Nothing]) "Count 2 level groups" here.iter_size here.num_iterations
|
||||
Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Count_Distinct "Code"]) "Count Distinct 2 level groups" here.iter_size here.num_iterations
|
||||
Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Standard_Deviation "Value"]) "StDev 2 level groups" here.iter_size here.num_iterations
|
||||
Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Median "Value"]) "Median 2 level groups" here.iter_size here.num_iterations
|
||||
Bench.measure (table.aggregate [Group_By "Index", Group_By "Flag", Mode "Index"]) "Mode 2 level groups" here.iter_size here.num_iterations
|
@ -1,49 +0,0 @@
|
||||
from Standard.Base import all
|
||||
|
||||
import Standard.Test.Bench
|
||||
import Standard.Test.Faker
|
||||
|
||||
import Standard.Table.Data.Table
|
||||
import Standard.Table.Data.Column_Selector
|
||||
from Standard.Table.Data.Aggregate_Column import all
|
||||
|
||||
## Bench Utilities ============================================================
|
||||
|
||||
vector_size = 2500
|
||||
iter_size = 100
|
||||
num_iterations = 10
|
||||
|
||||
create_table : Integer->Integer->Table
|
||||
create_table rows (seed=1646322139) =
|
||||
faker = Faker.new seed
|
||||
key1 = ["Code", 0.up_to rows . map _-> faker.alpha 3]
|
||||
key2 = ["Index", 0.up_to rows . map _-> faker.integer 0 10]
|
||||
key3 = ["Flag", 0.up_to rows . map _-> faker.boolean]
|
||||
value1 = ["Value", 0.up_to rows . map _-> ((faker.decimal -100 100)*100000).floor/100000]
|
||||
value2 = ["ValueWithNothing", 0.up_to rows . map _-> faker.make_some_nothing ((faker.decimal -100 100)*100).floor/100]
|
||||
text1 = ["TextWithNothing", 0.up_to rows . map _-> faker.make_some_nothing (faker.alpha_numeric 10)]
|
||||
text2 = ["Hexadecimal", 0.up_to rows . map _-> faker.make_some_nothing (faker.hexadecimal 8)]
|
||||
Table.new [key1, key2, key3, value1, value2, text1, text2]
|
||||
|
||||
# The Benchmarks ==============================================================
|
||||
main =
|
||||
IO.println <| "Making table data..."
|
||||
table = here.create_table here.vector_size
|
||||
|
||||
Bench.measure (table.group_by (Column_Selector.By_Index []) [Count Nothing]) "Count table" here.iter_size here.num_iterations
|
||||
Bench.measure (table.group_by (Column_Selector.By_Index []) [Count_Distinct "Index"]) "Count Distinct table" here.iter_size here.num_iterations
|
||||
Bench.measure (table.group_by (Column_Selector.By_Index []) [Standard_Deviation "Value"]) "StDev table" here.iter_size here.num_iterations
|
||||
Bench.measure (table.group_by (Column_Selector.By_Index []) [Median "Value"]) "Median table" here.iter_size here.num_iterations
|
||||
Bench.measure (table.group_by (Column_Selector.By_Index []) [Mode "Index"]) "Mode table" here.iter_size here.num_iterations
|
||||
|
||||
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Count Nothing]) "Count grouped" here.iter_size here.num_iterations
|
||||
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Count_Distinct "Code"]) "Count Distinct grouped" here.iter_size here.num_iterations
|
||||
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Standard_Deviation "Value"]) "StDev grouped" here.iter_size here.num_iterations
|
||||
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Median "Value"]) "Median grouped" here.iter_size here.num_iterations
|
||||
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Mode "Index"]) "Mode grouped" here.iter_size here.num_iterations
|
||||
|
||||
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Count Nothing]) "Count 2 level groups" here.iter_size here.num_iterations
|
||||
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Count_Distinct "Code"]) "Count Distinct 2 level groups" here.iter_size here.num_iterations
|
||||
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Standard_Deviation "Value"]) "StDev 2 level groups" here.iter_size here.num_iterations
|
||||
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Median "Value"]) "Median 2 level groups" here.iter_size here.num_iterations
|
||||
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Mode "Index"]) "Mode 2 level groups" here.iter_size here.num_iterations
|
@ -11,21 +11,21 @@ spec =
|
||||
table = Table.from_csv file_contents
|
||||
empty_table = Table.new <| table.columns.map c->[c.name, []]
|
||||
|
||||
find_row key table =
|
||||
find_row key table (columns=Nothing) =
|
||||
table_columns = if columns.is_nothing then table.columns else columns.map x->(table.columns.at x)
|
||||
0.up_to table.row_count . find i->
|
||||
0.up_to key.length . all j-> (table.columns.at j . at i)==(key.at j)
|
||||
0.up_to key.length . all j-> (table_columns.at j . at i)==(key.at j)
|
||||
|
||||
Test.group "Table.group_by should summarize whole table " <|
|
||||
grouping = Column_Selector.By_Index []
|
||||
Test.group "Table.aggregate should summarize whole table " <|
|
||||
Test.specify "should be able to count" <|
|
||||
grouped = table.group_by grouping [Count Nothing]
|
||||
grouped = table.aggregate [Count Nothing]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 1
|
||||
grouped.columns.at 0 . name . should_equal "Count"
|
||||
grouped.columns.at 0 . at 0 . should_equal 2500
|
||||
|
||||
Test.specify "should be able to count missing values" <|
|
||||
grouped = table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped = table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal"
|
||||
@ -38,7 +38,7 @@ spec =
|
||||
grouped.columns.at 3 . at 0 . should_equal 2251
|
||||
|
||||
Test.specify "should be able to count distinct values" <|
|
||||
grouped = table.group_by grouping [Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]]
|
||||
grouped = table.aggregate [Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Count Distinct Code"
|
||||
@ -51,7 +51,7 @@ spec =
|
||||
grouped.columns.at 3 . at 0 . should_equal 20
|
||||
|
||||
Test.specify "should be able to sum, average and standard deviation of values" <|
|
||||
grouped = table.group_by grouping [Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
grouped = table.aggregate [Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 8
|
||||
grouped.columns.at 0 . name . should_equal "Sum Value"
|
||||
@ -72,7 +72,7 @@ spec =
|
||||
grouped.columns.at 7 . at 0 . should_equal 58.575554 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to create median, mode and percentile values" <|
|
||||
grouped = table.group_by grouping [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
|
||||
grouped = table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 6
|
||||
grouped.columns.at 0 . name . should_equal "Median Index"
|
||||
@ -89,7 +89,7 @@ spec =
|
||||
grouped.columns.at 5 . at 0 . should_equal -17.960000 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get first and last values" <|
|
||||
grouped = table.group_by grouping [First "Index", Last "Value"]
|
||||
grouped = table.aggregate [First "Index", Last "Value"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 2
|
||||
grouped.columns.at 0 . name . should_equal "First Index"
|
||||
@ -98,7 +98,7 @@ spec =
|
||||
grouped.columns.at 1 . at 0 . should_equal 70.99931 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get minimum and maximum values" <|
|
||||
grouped = table.group_by grouping [Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
|
||||
grouped = table.aggregate [Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Minimum Value"
|
||||
@ -111,7 +111,7 @@ spec =
|
||||
grouped.columns.at 3 . at 0 . should_equal 99.95 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get shortest, longest and concatenated values" <|
|
||||
grouped = table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
grouped = table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Shortest TextWithNothing"
|
||||
@ -121,17 +121,16 @@ spec =
|
||||
grouped.columns.at 2 . name . should_equal "Concatenate Code"
|
||||
grouped.columns.at 2 . at 0 . length . should_equal 7500
|
||||
|
||||
Test.group "Table.group_by should summarize empty table " <|
|
||||
grouping = Column_Selector.By_Index []
|
||||
Test.group "Table.aggregate should summarize empty table " <|
|
||||
Test.specify "should be able to count" <|
|
||||
grouped = empty_table.group_by grouping [Count Nothing]
|
||||
grouped = empty_table.aggregate [Count Nothing]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 1
|
||||
grouped.columns.at 0 . name . should_equal "Count"
|
||||
grouped.columns.at 0 . at 0 . should_equal 0
|
||||
|
||||
Test.specify "should be able to count missing values" <|
|
||||
grouped = empty_table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped = empty_table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal"
|
||||
@ -144,14 +143,14 @@ spec =
|
||||
grouped.columns.at 3 . at 0 . should_equal 0
|
||||
|
||||
Test.specify "should be able to count distinct values" <|
|
||||
grouped = empty_table.group_by grouping [Count_Distinct "Code"]
|
||||
grouped = empty_table.aggregate [Count_Distinct "Code"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 1
|
||||
grouped.columns.at 0 . name . should_equal "Count Distinct Code"
|
||||
grouped.columns.at 0 . at 0 . should_equal 0
|
||||
|
||||
Test.specify "should be able to sum, average and standard deviation of values" <|
|
||||
grouped = empty_table.group_by grouping [Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
grouped = empty_table.aggregate [Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Sum Value"
|
||||
@ -164,7 +163,7 @@ spec =
|
||||
grouped.columns.at 3 . at 0 . should_equal Nothing
|
||||
|
||||
Test.specify "should be able to create median, mode and percentile values" <|
|
||||
grouped = empty_table.group_by grouping [Median "Index", Mode "Index", Percentile 0.25 "Value"]
|
||||
grouped = empty_table.aggregate [Median "Index", Mode "Index", Percentile 0.25 "Value"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Median Index"
|
||||
@ -175,7 +174,7 @@ spec =
|
||||
grouped.columns.at 2 . at 0 . should_equal Nothing
|
||||
|
||||
Test.specify "should be able to get first and last values" <|
|
||||
grouped = empty_table.group_by grouping [First "Index", Last "Value"]
|
||||
grouped = empty_table.aggregate [First "Index", Last "Value"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 2
|
||||
grouped.columns.at 0 . name . should_equal "First Index"
|
||||
@ -184,7 +183,7 @@ spec =
|
||||
grouped.columns.at 1 . at 0 . should_equal Nothing
|
||||
|
||||
Test.specify "should be able to get minimum and maximum values" <|
|
||||
grouped = empty_table.group_by grouping [Minimum "Value", Maximum "ValueWithNothing"]
|
||||
grouped = empty_table.aggregate [Minimum "Value", Maximum "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 2
|
||||
grouped.columns.at 0 . name . should_equal "Minimum Value"
|
||||
@ -193,7 +192,7 @@ spec =
|
||||
grouped.columns.at 1 . at 0 . should_equal Nothing
|
||||
|
||||
Test.specify "should be able to get shortest, longest and concatenated values" <|
|
||||
grouped = empty_table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
grouped = empty_table.aggregate [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
grouped.row_count . should_equal 1
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Shortest TextWithNothing"
|
||||
@ -203,17 +202,16 @@ spec =
|
||||
grouped.columns.at 2 . name . should_equal "Concatenate Code"
|
||||
grouped.columns.at 2 . at 0 . should_equal Nothing
|
||||
|
||||
Test.group "Table.group_by should not summarize empty table when grouped " <|
|
||||
grouping = Column_Selector.By_Index [0]
|
||||
Test.group "Table.aggregate should not summarize empty table when grouped " <|
|
||||
Test.specify "should be able to count" <|
|
||||
grouped = empty_table.group_by grouping [Count Nothing]
|
||||
grouped = empty_table.aggregate [Group_By 0, Count Nothing]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 2
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
grouped.columns.at 1 . name . should_equal "Count"
|
||||
|
||||
Test.specify "should be able to count missing values" <|
|
||||
grouped = empty_table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped = empty_table.aggregate [Group_By 0, Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 5
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
@ -223,14 +221,14 @@ spec =
|
||||
grouped.columns.at 4 . name . should_equal "Count Not Empty TextWithNothing"
|
||||
|
||||
Test.specify "should be able to count distinct values" <|
|
||||
grouped = empty_table.group_by grouping [Count_Distinct "Code"]
|
||||
grouped = empty_table.aggregate [Group_By 0, Count_Distinct "Code"]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 2
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
grouped.columns.at 1 . name . should_equal "Count Distinct Code"
|
||||
|
||||
Test.specify "should be able to sum, average and standard deviation of values" <|
|
||||
grouped = empty_table.group_by grouping [Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
grouped = empty_table.aggregate [Group_By 0, Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 5
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
@ -240,7 +238,7 @@ spec =
|
||||
grouped.columns.at 4 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||
|
||||
Test.specify "should be able to create median values" <|
|
||||
grouped = empty_table.group_by grouping [Median "Index", Mode "Index", Percentile 0.25 "Value"]
|
||||
grouped = empty_table.aggregate [Group_By 0, Median "Index", Mode "Index", Percentile 0.25 "Value"]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
@ -249,7 +247,7 @@ spec =
|
||||
grouped.columns.at 3 . name . should_equal "25%-ile Value"
|
||||
|
||||
Test.specify "should be able to get first and last values" <|
|
||||
grouped = empty_table.group_by grouping [First "Index", Last "Value"]
|
||||
grouped = empty_table.aggregate [Group_By 0, First "Index", Last "Value"]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
@ -257,7 +255,7 @@ spec =
|
||||
grouped.columns.at 2 . name . should_equal "Last Value"
|
||||
|
||||
Test.specify "should be able to get minimum and maximum values" <|
|
||||
grouped = empty_table.group_by grouping [Minimum "Value", Maximum "ValueWithNothing"]
|
||||
grouped = empty_table.aggregate [Group_By 0, Minimum "Value", Maximum "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
@ -265,7 +263,7 @@ spec =
|
||||
grouped.columns.at 2 . name . should_equal "Maximum ValueWithNothing"
|
||||
|
||||
Test.specify "should be able to get shortest, longest and concatenated values" <|
|
||||
grouped = empty_table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
grouped = empty_table.aggregate [Group_By 0, Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
grouped.row_count . should_equal 0
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Code"
|
||||
@ -273,10 +271,9 @@ spec =
|
||||
grouped.columns.at 2 . name . should_equal "Longest TextWithNothing"
|
||||
grouped.columns.at 3 . name . should_equal "Concatenate Code"
|
||||
|
||||
Test.group "Table.group_by should be able to group on single field " <|
|
||||
grouping = Column_Selector.By_name.new ["Index"]
|
||||
Test.group "Table.aggregate should be able to group on single field " <|
|
||||
Test.specify "should be able to count" <|
|
||||
grouped = table.group_by grouping [Count Nothing]
|
||||
grouped = table.aggregate [Group_By "Index", Count Nothing]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 2
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
@ -286,7 +283,7 @@ spec =
|
||||
grouped.columns.at 1 . at idx . should_equal 261
|
||||
|
||||
Test.specify "should be able to count missing values" <|
|
||||
grouped = table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped = table.aggregate [Group_By "Index", Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 5
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
@ -302,7 +299,7 @@ spec =
|
||||
grouped.columns.at 4 . at idx . should_equal 230
|
||||
|
||||
Test.specify "should be able to count distinct values" <|
|
||||
grouped = table.group_by grouping [Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]]
|
||||
grouped = table.aggregate [Group_By "Index", Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 5
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
@ -318,7 +315,7 @@ spec =
|
||||
grouped.columns.at 4 . at idx . should_equal 2
|
||||
|
||||
Test.specify "should be able to sum, average and standard deviation of values" <|
|
||||
grouped = table.group_by grouping [Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
grouped = table.aggregate [Group_By "Index", Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 9
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
@ -342,7 +339,7 @@ spec =
|
||||
grouped.columns.at 8 . at idx . should_equal 56.677714 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to create median values" <|
|
||||
grouped = table.group_by grouping [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
|
||||
grouped = table.aggregate [Group_By "Index", Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 7
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
@ -362,7 +359,7 @@ spec =
|
||||
grouped.columns.at 6 . at idx . should_equal -18.802000 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get first and last values" <|
|
||||
grouped = table.group_by grouping [First "TextWithNothing", Last "Value"]
|
||||
grouped = table.aggregate [Group_By "Index", First "TextWithNothing", Last "Value"]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
@ -374,7 +371,7 @@ spec =
|
||||
grouped.columns.at 2 . at idx . should_equal 56.15916 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get minimum and maximum values" <|
|
||||
grouped = table.group_by grouping [Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
|
||||
grouped = table.aggregate [Group_By "Index", Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 5
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
@ -390,7 +387,7 @@ spec =
|
||||
grouped.columns.at 4 . at idx . should_equal 99.79 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get shortest, longest and concatenated values" <|
|
||||
grouped = table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
grouped = table.aggregate [Group_By "Index", Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
grouped.row_count . should_equal 10
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
@ -403,4 +400,141 @@ spec =
|
||||
grouped.columns.at 3 . name . should_equal "Concatenate Code"
|
||||
grouped.columns.at 3 . at idx . length . should_equal 783
|
||||
|
||||
Test.group "Table.aggregate should be able to group on multiple fields not in left columns" <|
|
||||
Test.specify "should be able to count" <|
|
||||
grouped = table.aggregate [Group_By "Flag", Count Nothing, Group_By "Index"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 3
|
||||
grouped.columns.at 0 . name . should_equal "Flag"
|
||||
grouped.columns.at 2 . name . should_equal "Index"
|
||||
idx = find_row ["False", 6] grouped [0, 2]
|
||||
idx.is_nothing . should_be_false
|
||||
grouped.columns.at 1 . name . should_equal "Count"
|
||||
grouped.columns.at 1 . at idx . should_equal 127
|
||||
|
||||
Test.specify "should be able to count missing values" <|
|
||||
grouped = table.aggregate [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Group_By "Index", Count_Empty "TextWithNothing", Group_By "Flag", Count_Not_Empty "TextWithNothing"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 6
|
||||
grouped.columns.at 4 . name . should_equal "Flag"
|
||||
grouped.columns.at 2 . name . should_equal "Index"
|
||||
idx = find_row ["False", 6] grouped [4, 2]
|
||||
idx.is_nothing . should_be_false
|
||||
grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal"
|
||||
grouped.columns.at 0 . at idx . should_equal 8
|
||||
grouped.columns.at 1 . name . should_equal "Count Not Nothing Hexadecimal"
|
||||
grouped.columns.at 1 . at idx . should_equal 119
|
||||
grouped.columns.at 3 . name . should_equal "Count Empty TextWithNothing"
|
||||
grouped.columns.at 3 . at idx . should_equal 12
|
||||
grouped.columns.at 5 . name . should_equal "Count Not Empty TextWithNothing"
|
||||
grouped.columns.at 5 . at idx . should_equal 115
|
||||
|
||||
Test.specify "should be able to count distinct values" <|
|
||||
grouped = table.aggregate [Group_By "Index", Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"], Group_By "Flag"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 6
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
grouped.columns.at 5 . name . should_equal "Flag"
|
||||
idx = find_row ["False", 6] grouped [5, 0]
|
||||
idx.is_nothing . should_be_false
|
||||
grouped.columns.at 1 . name . should_equal "Count Distinct Code"
|
||||
grouped.columns.at 1 . at idx . should_equal 127
|
||||
grouped.columns.at 2 . name . should_equal "Count Distinct Index"
|
||||
grouped.columns.at 2 . at idx . should_equal 1
|
||||
grouped.columns.at 3 . name . should_equal "Count Distinct Flag"
|
||||
grouped.columns.at 3 . at idx . should_equal 1
|
||||
grouped.columns.at 4 . name . should_equal "Count Distinct Index Flag"
|
||||
grouped.columns.at 4 . at idx . should_equal 1
|
||||
|
||||
Test.specify "should be able to sum, average and standard deviation of values" <|
|
||||
grouped = table.aggregate [Group_By "Index", Group_By "Flag", Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 10
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
grouped.columns.at 1 . name . should_equal "Flag"
|
||||
idx = find_row ["False", 6] grouped [1, 0]
|
||||
idx.is_nothing . should_be_false
|
||||
grouped.columns.at 2 . name . should_equal "Sum Value"
|
||||
grouped.columns.at 2 . at idx . should_equal -103.050170 epsilon=0.000001
|
||||
grouped.columns.at 3 . name . should_equal "Sum ValueWithNothing"
|
||||
grouped.columns.at 3 . at idx . should_equal 533.57 epsilon=0.000001
|
||||
grouped.columns.at 4 . name . should_equal "Average Value"
|
||||
grouped.columns.at 4 . at idx . should_equal -0.811419 epsilon=0.000001
|
||||
grouped.columns.at 5 . name . should_equal "Average ValueWithNothing"
|
||||
grouped.columns.at 5 . at idx . should_equal 4.721858 epsilon=0.000001
|
||||
grouped.columns.at 6 . name . should_equal "Standard Deviation Value"
|
||||
grouped.columns.at 6 . at idx . should_equal 58.979275 epsilon=0.000001
|
||||
grouped.columns.at 7 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||
grouped.columns.at 7 . at idx . should_equal 57.561756 epsilon=0.000001
|
||||
grouped.columns.at 8 . name . should_equal "Standard Deviation Value_1"
|
||||
grouped.columns.at 8 . at idx . should_equal 58.746614 epsilon=0.000001
|
||||
grouped.columns.at 9 . name . should_equal "Standard Deviation ValueWithNothing_1"
|
||||
grouped.columns.at 9 . at idx . should_equal 57.306492 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to create median values" <|
|
||||
grouped = table.aggregate [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Group_By "Index", Group_By "Flag", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 8
|
||||
grouped.columns.at 5 . name . should_equal "Flag"
|
||||
grouped.columns.at 4 . name . should_equal "Index"
|
||||
idx = find_row ["False", 6] grouped [5, 4]
|
||||
idx.is_nothing . should_be_false
|
||||
grouped.columns.at 0 . name . should_equal "Median Index"
|
||||
grouped.columns.at 0 . at idx . should_equal 6 epsilon=0.000001
|
||||
grouped.columns.at 1 . name . should_equal "Median Value"
|
||||
grouped.columns.at 1 . at idx . should_equal 2.041150 epsilon=0.000001
|
||||
grouped.columns.at 2 . name . should_equal "Median ValueWithNothing"
|
||||
grouped.columns.at 2 . at idx . should_equal 3.55 epsilon=0.000001
|
||||
grouped.columns.at 3 . name . should_equal "Mode Index"
|
||||
grouped.columns.at 3 . at idx . should_equal 6
|
||||
grouped.columns.at 6 . name . should_equal "25%-ile Value"
|
||||
grouped.columns.at 6 . at idx . should_equal -52.628925 epsilon=0.000001
|
||||
grouped.columns.at 7 . name . should_equal "40%-ile ValueWithNothing"
|
||||
grouped.columns.at 7 . at idx . should_equal -17.174000 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get first and last values" <|
|
||||
grouped = table.aggregate [Group_By "Flag", First "TextWithNothing", Last "Value", Group_By "Index"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 4
|
||||
grouped.columns.at 0 . name . should_equal "Flag"
|
||||
grouped.columns.at 3 . name . should_equal "Index"
|
||||
idx = find_row ["False", 6] grouped [0, 3]
|
||||
idx.is_nothing . should_be_false
|
||||
grouped.columns.at 1 . name . should_equal "First TextWithNothing"
|
||||
grouped.columns.at 1 . at idx . should_equal "kmqxqkl6qx"
|
||||
grouped.columns.at 2 . name . should_equal "Last Value"
|
||||
grouped.columns.at 2 . at idx . should_equal 56.15916 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get minimum and maximum values" <|
|
||||
grouped = table.aggregate [Group_By "Index", Minimum "Value", Maximum "Value", Group_By "Flag", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 6
|
||||
grouped.columns.at 3 . name . should_equal "Flag"
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
idx = find_row ["False", 6] grouped [3, 0]
|
||||
idx.is_nothing . should_be_false
|
||||
grouped.columns.at 1 . name . should_equal "Minimum Value"
|
||||
grouped.columns.at 1 . at idx . should_equal -99.605880 epsilon=0.000001
|
||||
grouped.columns.at 2 . name . should_equal "Maximum Value"
|
||||
grouped.columns.at 2 . at idx . should_equal 96.488390 epsilon=0.000001
|
||||
grouped.columns.at 4 . name . should_equal "Minimum ValueWithNothing"
|
||||
grouped.columns.at 4 . at idx . should_equal -99.99 epsilon=0.000001
|
||||
grouped.columns.at 5 . name . should_equal "Maximum ValueWithNothing"
|
||||
grouped.columns.at 5 . at idx . should_equal 97.17 epsilon=0.000001
|
||||
|
||||
Test.specify "should be able to get shortest, longest and concatenated values" <|
|
||||
grouped = table.aggregate [Group_By "Index", Group_By "Flag", Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||
grouped.row_count . should_equal 20
|
||||
grouped.columns.length . should_equal 5
|
||||
grouped.columns.at 0 . name . should_equal "Index"
|
||||
grouped.columns.at 1 . name . should_equal "Flag"
|
||||
idx = find_row [6, "False"] grouped
|
||||
idx.is_nothing . should_be_false
|
||||
grouped.columns.at 2 . name . should_equal "Shortest TextWithNothing"
|
||||
grouped.columns.at 2 . at idx . should_equal "kmqxqkl6qx"
|
||||
grouped.columns.at 3 . name . should_equal "Longest TextWithNothing"
|
||||
grouped.columns.at 3 . at idx . should_equal "kmqxqkl6qx"
|
||||
grouped.columns.at 4 . name . should_equal "Concatenate Code"
|
||||
grouped.columns.at 4 . at idx . length . should_equal 381
|
||||
|
||||
main = Test.Suite.run_main here.spec
|
@ -9,6 +9,8 @@ import project.Csv_Spec
|
||||
import project.Json_Spec
|
||||
import project.Table_Spec
|
||||
import project.Spreadsheet_Spec
|
||||
import project.Aggregate_Column_Spec
|
||||
import project.Aggregate_Spec
|
||||
|
||||
main = Test.Suite.run_main <|
|
||||
Column_Spec.spec
|
||||
@ -18,3 +20,5 @@ main = Test.Suite.run_main <|
|
||||
Table_Spec.spec
|
||||
Database_Spec.sqlite_spec
|
||||
Model_Spec.spec
|
||||
Aggregate_Column_Spec.spec
|
||||
Aggregate_Spec.spec
|
||||
|
Loading…
Reference in New Issue
Block a user