mirror of
https://github.com/enso-org/enso.git
synced 2024-12-23 22:01:42 +03:00
Restructuring the Faker type and creating tests for Group_By (#3318)
- Added Minimum, Maximum, Longest. Shortest, Mode, Percentile - Added first and last to Map - Restructured Faker type more inline with FakerJS - Created 2,500 row data set - Tests for group_by - Performance tests for group_by
This commit is contained in:
parent
f92108158c
commit
65465fb8ef
@ -64,6 +64,8 @@
|
|||||||
`Vector.fold_with_index` and `Vector.take` methods.][3236]
|
`Vector.fold_with_index` and `Vector.take` methods.][3236]
|
||||||
- [Implemented new `Text.insert` method][3311]
|
- [Implemented new `Text.insert` method][3311]
|
||||||
- [Implemented `Bool.compare_to` method][3317]
|
- [Implemented `Bool.compare_to` method][3317]
|
||||||
|
- [Implemented `Map.first`, `Map.last` functions. Expanded `Table.group_by` to
|
||||||
|
also compute mode, percentile, minimum, maximum.][3318]
|
||||||
|
|
||||||
[debug-shortcuts]:
|
[debug-shortcuts]:
|
||||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||||
@ -101,6 +103,7 @@
|
|||||||
[3236]: https://github.com/enso-org/enso/pull/3236
|
[3236]: https://github.com/enso-org/enso/pull/3236
|
||||||
[3311]: https://github.com/enso-org/enso/pull/3311
|
[3311]: https://github.com/enso-org/enso/pull/3311
|
||||||
[3317]: https://github.com/enso-org/enso/pull/3317
|
[3317]: https://github.com/enso-org/enso/pull/3317
|
||||||
|
[3317]: https://github.com/enso-org/enso/pull/3318
|
||||||
|
|
||||||
#### Enso Compiler
|
#### Enso Compiler
|
||||||
|
|
||||||
|
@ -440,6 +440,24 @@ type Map
|
|||||||
to_vector_with_builder this
|
to_vector_with_builder this
|
||||||
builder.to_vector
|
builder.to_vector
|
||||||
|
|
||||||
|
## Get a key value pair of the lowest key in the map.
|
||||||
|
If the map is empty, returns Nothing.
|
||||||
|
first : Pair
|
||||||
|
first =
|
||||||
|
first p m = case m of
|
||||||
|
Bin _ k v l _ -> @Tail_Call first (Pair k v) l
|
||||||
|
Tip -> p
|
||||||
|
first Nothing this
|
||||||
|
|
||||||
|
## Get a key value pair of the highest key in the map.
|
||||||
|
If the map is empty, returns Nothing.
|
||||||
|
last : Pair
|
||||||
|
last =
|
||||||
|
last p m = case m of
|
||||||
|
Bin _ k v _ r -> @Tail_Call last (Pair k v) r
|
||||||
|
Tip -> p
|
||||||
|
last Nothing this
|
||||||
|
|
||||||
## UNSTABLE
|
## UNSTABLE
|
||||||
|
|
||||||
An error for getting a missing value from a map.
|
An error for getting a missing value from a map.
|
||||||
|
@ -6,54 +6,111 @@ import Standard.Table.Data.Group_By
|
|||||||
## Defines an Aggregate Column
|
## Defines an Aggregate Column
|
||||||
type Aggregate_Column
|
type Aggregate_Column
|
||||||
## Creates a new column with the row count of each group
|
## Creates a new column with the row count of each group
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- name: name of new column.
|
||||||
type Count (name:Text|Nothing=Nothing)
|
type Count (name:Text|Nothing=Nothing)
|
||||||
|
|
||||||
## Creates a new column with the count of unique items in the selected
|
## Creates a new column with the count of unique items in the selected
|
||||||
column(s) within each group.
|
column(s) within each group.
|
||||||
type Count_Distinct (columns:Column|Text|Integer|[(Column|Text|Integer)]) (name:Text|Nothing=Nothing)
|
|
||||||
|
Arguments:
|
||||||
|
- columns: either a single or set of columns (specified by name, index or Column object) to count across.
|
||||||
|
- name: name of new column.
|
||||||
|
- ignore_nothing: if all values are Nothing won't be included.
|
||||||
|
type Count_Distinct (columns:Column|Text|Integer|[(Column|Text|Integer)]) (name:Text|Nothing=Nothing) (ignore_nothing:Boolean=False)
|
||||||
|
|
||||||
## ALIAS Count_Not_Null
|
## ALIAS Count_Not_Null
|
||||||
|
|
||||||
Creates a new column with the count of not `Nothing` (null) values of the
|
Creates a new column with the count of not `Nothing` (null) values of the
|
||||||
specified column within each group.
|
specified column within each group.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- columns: column (specified by name, index or Column object) to count.
|
||||||
|
- name: name of new column.
|
||||||
type Count_Not_Nothing (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
type Count_Not_Nothing (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||||
|
|
||||||
## ALIAS Count_Null, Count_Missing
|
## ALIAS Count_Null, Count_Missing
|
||||||
|
|
||||||
Creates a new column with the count of `Nothing` (null) values of the
|
Creates a new column with the count of `Nothing` (null) values of the
|
||||||
specified column within each group.
|
specified column within each group.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- column: column (specified by name, index or Column object) to count.
|
||||||
|
- name: name of new column.
|
||||||
type Count_Nothing (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
type Count_Nothing (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||||
|
|
||||||
## Creates a new column with the count of not `Nothing` (null) and non-empty
|
## Creates a new column with the count of not `Nothing` (null) and non-empty
|
||||||
("") values of the column within each group.
|
("") values of the column within each group.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- column: column (specified by name, index or Column object) to count.
|
||||||
|
- name: name of new column.
|
||||||
type Count_Not_Empty (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
type Count_Not_Empty (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||||
|
|
||||||
## Creates a new column with the count of `Nothing` (null) or empty ("")
|
## Creates a new column with the count of `Nothing` (null) or empty ("")
|
||||||
text values of the column within each group.
|
text values of the column within each group.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- column: column (specified by name, index or Column object) to count.
|
||||||
|
- name: name of new column.
|
||||||
type Count_Empty (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
type Count_Empty (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||||
|
|
||||||
## Creates a new column with the sum of values (ignoring missing values) of
|
## Creates a new column with the sum of values (ignoring missing values) of
|
||||||
the specified column within each group.
|
the specified column within each group.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- column: column (specified by name, index or Column object) to total.
|
||||||
|
- name: name of new column.
|
||||||
type Sum (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
type Sum (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||||
|
|
||||||
## Creates a new column with the mean of values (ignoring missing values) of
|
## Creates a new column with the mean of values (ignoring missing values) of
|
||||||
the specified column within each group.
|
the specified column within each group.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- column: column (specified by name, index or Column object) to average.
|
||||||
|
- name: name of new column.
|
||||||
type Average (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
type Average (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||||
|
|
||||||
## Creates a new column with the median of values (ignoring missing values)
|
## Creates a new column with the median of values (ignoring missing values)
|
||||||
of the specified column within each group.
|
of the specified column within each group.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- column: column (specified by name, index or Column object) to calculate median on.
|
||||||
|
- name: name of new column.
|
||||||
type Median (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
type Median (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||||
|
|
||||||
|
## Creates a new column with the median of values (ignoring missing values)
|
||||||
|
of the specified column within each group.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- percentile: Percentage to compute from 0-1 inclusive.
|
||||||
|
- column: column (specified by name, index or Column object) to compute percentile.
|
||||||
|
- name: name of new column.
|
||||||
|
type Percentile (percentile:Decimal) (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||||
|
|
||||||
|
## Creates a new column with the mode of values (ignoring missing values)
|
||||||
|
of the specified column within each group.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- column: column (specified by name, index or Column object) to find the most common value.
|
||||||
|
- name: name of new column.
|
||||||
|
type Mode (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||||
|
|
||||||
## Creates a new column with the standard deviation of values (ignoring
|
## Creates a new column with the standard deviation of values (ignoring
|
||||||
missing values) of the column within each group.
|
missing values) of the column within each group.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
- column: column (specified by name, index or Column object) to compute standard deviation.
|
||||||
|
- name: name of new column.
|
||||||
- population argument specifies if group is a sample or the population
|
- population argument specifies if group is a sample or the population
|
||||||
type Standard_Deviation (column:Column|Text|Integer) (name:Text|Nothing=Nothing) (population:Boolean=False)
|
type Standard_Deviation (column:Column|Text|Integer) (name:Text|Nothing=Nothing) (population:Boolean=False)
|
||||||
|
|
||||||
## Creates a new column with the values concatenated together. `Nothing` values will become an empty string.
|
## Creates a new column with the values concatenated together. `Nothing` values will become an empty string.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
- column: column (specified by name, index or Column object) to concatenate values.
|
||||||
|
- name: name of new column.
|
||||||
- separator: added between each value.
|
- separator: added between each value.
|
||||||
- prefix: added at the start of the result.
|
- prefix: added at the start of the result.
|
||||||
- suffix: added at the end of the result.
|
- suffix: added at the end of the result.
|
||||||
@ -64,6 +121,8 @@ type Aggregate_Column
|
|||||||
## Creates a new column with the first value in each group.
|
## Creates a new column with the first value in each group.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
- column: column (specified by name, index or Column object) to find first group entry.
|
||||||
|
- name: name of new column.
|
||||||
- ignore_nothing: if `True`, then missing values are ignored and first
|
- ignore_nothing: if `True`, then missing values are ignored and first
|
||||||
not missing value returned.
|
not missing value returned.
|
||||||
- order_by: required for database tables. Specifies how to order the
|
- order_by: required for database tables. Specifies how to order the
|
||||||
@ -73,12 +132,42 @@ type Aggregate_Column
|
|||||||
## Creates a new column with the last value in each group.
|
## Creates a new column with the last value in each group.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
- column: column (specified by name, index or Column object) to find last group entry.
|
||||||
|
- name: name of new column.
|
||||||
- ignore_nothing: if `True`, then missing values are ignored and last
|
- ignore_nothing: if `True`, then missing values are ignored and last
|
||||||
not missing value returned.
|
not missing value returned.
|
||||||
- order_by: required for database tables. Specifies how to order the
|
- order_by: required for database tables. Specifies how to order the
|
||||||
results within the group.
|
results within the group.
|
||||||
type Last (column:Column|Text|Integer) (name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Column_Selector|Nothing=Nothing)
|
type Last (column:Column|Text|Integer) (name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Column_Selector|Nothing=Nothing)
|
||||||
|
|
||||||
|
## Creates a new column with the maximum value in each group.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- column: column (specified by name, index or Column object) to find maximum.
|
||||||
|
- name: name of new column.
|
||||||
|
type Maximum (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||||
|
|
||||||
|
## Creates a new column with the maximum value in each group.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- column: column (specified by name, index or Column object) to find minimum.
|
||||||
|
- name: name of new column.
|
||||||
|
type Minimum (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||||
|
|
||||||
|
## Creates a new column with the shortest text in each group.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- column: column (specified by name, index or Column object) to find shortest value.
|
||||||
|
- name: name of new column.
|
||||||
|
type Shortest (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||||
|
|
||||||
|
## Creates a new column with the longest text in each group.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- column: column (specified by name, index or Column object) to find longest value.
|
||||||
|
- name: name of new column.
|
||||||
|
type Longest (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
|
||||||
|
|
||||||
## Gets a column name to use for the aggregate column
|
## Gets a column name to use for the aggregate column
|
||||||
column_name : Table->Text
|
column_name : Table->Text
|
||||||
column_name table =
|
column_name table =
|
||||||
@ -86,7 +175,7 @@ type Aggregate_Column
|
|||||||
get_name c = (this.resolve_column table c).name
|
get_name c = (this.resolve_column table c).name
|
||||||
case this of
|
case this of
|
||||||
Count _ -> "Count"
|
Count _ -> "Count"
|
||||||
Count_Distinct c _ ->
|
Count_Distinct c _ _ ->
|
||||||
case c of
|
case c of
|
||||||
Vector.Vector _ -> "Count Distinct " + ((c.map get_name).join " ")
|
Vector.Vector _ -> "Count Distinct " + ((c.map get_name).join " ")
|
||||||
_ -> "Count Distinct " + (get_name c)
|
_ -> "Count Distinct " + (get_name c)
|
||||||
@ -97,10 +186,16 @@ type Aggregate_Column
|
|||||||
Sum c _ -> "Sum " + (get_name c)
|
Sum c _ -> "Sum " + (get_name c)
|
||||||
Average c _ -> "Average " + (get_name c)
|
Average c _ -> "Average " + (get_name c)
|
||||||
Median c _ -> "Median " + (get_name c)
|
Median c _ -> "Median " + (get_name c)
|
||||||
|
Percentile p c _ -> (p*100).floor.to_text + "%-ile " + (get_name c)
|
||||||
|
Mode c _ -> "Mode " + (get_name c)
|
||||||
Standard_Deviation c _ _ -> "Standard Deviation " + (get_name c)
|
Standard_Deviation c _ _ -> "Standard Deviation " + (get_name c)
|
||||||
Concatenate c _ _ _ _ _ -> "Concatenate " + (get_name c)
|
Concatenate c _ _ _ _ _ -> "Concatenate " + (get_name c)
|
||||||
First c _ _ _ -> "First " + (get_name c)
|
First c _ _ _ -> "First " + (get_name c)
|
||||||
Last c _ _ _ -> "Last " + (get_name c)
|
Last c _ _ _ -> "Last " + (get_name c)
|
||||||
|
Maximum c _ -> "Maximum " + (get_name c)
|
||||||
|
Minimum c _ -> "Minimum " + (get_name c)
|
||||||
|
Shortest c _ -> "Shortest " + (get_name c)
|
||||||
|
Longest c _ -> "Longest " + (get_name c)
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
Given a column reference resolve to the underlying column
|
Given a column reference resolve to the underlying column
|
||||||
@ -113,14 +208,20 @@ type Aggregate_Column
|
|||||||
|
|
||||||
initial_value : Any
|
initial_value : Any
|
||||||
initial_value = case this of
|
initial_value = case this of
|
||||||
Count_Distinct _ _ -> Map.empty
|
Count_Distinct _ _ _ -> Map.empty
|
||||||
Median _ _ -> Map.empty
|
Median _ _ -> Map.empty
|
||||||
|
Percentile _ _ _ -> Map.empty
|
||||||
|
Mode _ _ -> Map.empty
|
||||||
Average _ _ -> [0, 0]
|
Average _ _ -> [0, 0]
|
||||||
Standard_Deviation _ _ _ -> [0, 0, 0]
|
Standard_Deviation _ _ _ -> [0, 0, 0]
|
||||||
Concatenate _ _ _ _ _ _ -> Nothing
|
Concatenate _ _ _ _ _ _ -> Nothing
|
||||||
First _ _ _ _ -> Nothing
|
First _ _ _ _ -> Nothing
|
||||||
Last _ _ _ _ -> Nothing
|
Last _ _ _ _ -> Nothing
|
||||||
Sum _ _ -> Nothing
|
Sum _ _ -> Nothing
|
||||||
|
Maximum _ _ -> Nothing
|
||||||
|
Minimum _ _ -> Nothing
|
||||||
|
Shortest _ _ -> Nothing
|
||||||
|
Longest _ _ -> Nothing
|
||||||
_ -> 0
|
_ -> 0
|
||||||
|
|
||||||
make_aggregator : Table->(Any->Integer->Any)
|
make_aggregator : Table->(Any->Integer->Any)
|
||||||
@ -139,16 +240,28 @@ type Aggregate_Column
|
|||||||
Count_Nothing c _ -> create_closure c col->count->i->(count + if (col.at i).is_nothing then 1 else 0)
|
Count_Nothing c _ -> create_closure c col->count->i->(count + if (col.at i).is_nothing then 1 else 0)
|
||||||
Count_Not_Empty c _ -> create_closure c col->count->i->(count + if is_empty (col.at i) then 0 else 1)
|
Count_Not_Empty c _ -> create_closure c col->count->i->(count + if is_empty (col.at i) then 0 else 1)
|
||||||
Count_Empty c _ -> create_closure c col->count->i->(count + if is_empty (col.at i) then 1 else 0)
|
Count_Empty c _ -> create_closure c col->count->i->(count + if is_empty (col.at i) then 1 else 0)
|
||||||
First c _ i _ ->
|
First c _ ignore_nothing _ ->
|
||||||
case i of
|
case ignore_nothing of
|
||||||
False -> create_closure c col->current->i->(if i==0 then (col.at i) else current)
|
False -> create_closure c col->current->i->(if i==0 then (col.at i) else current)
|
||||||
True -> create_closure c col->current->i->if current.is_nothing then (col.at i) else current
|
True -> create_closure c col->current->i->if current.is_nothing then (col.at i) else current
|
||||||
Last c _ i _ ->
|
Last c _ ignore_nothing _ ->
|
||||||
case i of
|
case ignore_nothing of
|
||||||
False -> create_closure c col->_->i->(col.at i)
|
False -> create_closure c col->_->i->(col.at i)
|
||||||
True -> create_closure c col->current->i->
|
True -> create_closure c col->current->i->
|
||||||
v = (col.at i)
|
v = (col.at i)
|
||||||
if v.is_nothing then current else v
|
if v.is_nothing then current else v
|
||||||
|
Maximum c _ -> create_closure c col->m->i->
|
||||||
|
v = col.at i
|
||||||
|
if v.is_nothing then m else if m.is_nothing then v else m.max v
|
||||||
|
Minimum c _ -> create_closure c col->m->i->
|
||||||
|
v = col.at i
|
||||||
|
if v.is_nothing then m else if m.is_nothing then v else m.min v
|
||||||
|
Shortest c _ -> create_closure c col->m->i->
|
||||||
|
v = col.at i
|
||||||
|
if v.is_nothing then m else if m.is_nothing then v else if m.length <= v.length then m else v
|
||||||
|
Longest c _ -> create_closure c col->m->i->
|
||||||
|
v = col.at i
|
||||||
|
if v.is_nothing then m else if m.is_nothing then v else if m.length >= v.length then m else v
|
||||||
Sum c _ -> create_closure c col->total->i->
|
Sum c _ -> create_closure c col->total->i->
|
||||||
v = col.at i
|
v = col.at i
|
||||||
if v.is_nothing then total else
|
if v.is_nothing then total else
|
||||||
@ -159,46 +272,57 @@ type Aggregate_Column
|
|||||||
Standard_Deviation c _ _ -> create_closure c col->a->i->
|
Standard_Deviation c _ _ -> create_closure c col->a->i->
|
||||||
v = col.at i
|
v = col.at i
|
||||||
if v.is_nothing then a else [a.first + 1, a.second + v, (a.at 2) + v*v]
|
if v.is_nothing then a else [a.first + 1, a.second + v, (a.at 2) + v*v]
|
||||||
Concatenate c _ j _ _ q -> create_closure c col->text->i->
|
Concatenate c _ join _ _ quote -> create_closure c col->text->i->
|
||||||
v = col.at i
|
v = col.at i
|
||||||
val=if v.is_nothing then "" else
|
val=if v.is_nothing then "" else
|
||||||
text = case v of
|
text = case v of
|
||||||
Text -> v
|
Text -> v
|
||||||
_ -> v.to_text
|
_ -> v.to_text
|
||||||
if text == "" then (q+q) else
|
if text == "" then (quote+quote) else
|
||||||
if text.contains j then (q+text+q) else text
|
if text.contains join then (quote+text+quote) else text
|
||||||
if i==0 then val else (text + j + val)
|
if text.is_nothing then val else (text + join + val)
|
||||||
Median c _ -> create_closure c col->map->i->
|
Median c _ -> create_closure c col->map->i->
|
||||||
val = col.at i
|
val = col.at i
|
||||||
if val.is_nothing then map else (map.insert val (1 + (map.get_or_else val 0)))
|
if val.is_nothing then map else (map.insert val (1 + (map.get_or_else val 0)))
|
||||||
Count_Distinct columns _ ->
|
Percentile _ c _ -> create_closure c col->map->i->
|
||||||
|
val = col.at i
|
||||||
|
if val.is_nothing then map else (map.insert val (1 + (map.get_or_else val 0)))
|
||||||
|
Mode c _ -> create_closure c col->map->i->
|
||||||
|
val = col.at i
|
||||||
|
if val.is_nothing then map else (map.insert val (1 + (map.get_or_else val 0)))
|
||||||
|
Count_Distinct columns _ ignore_nothing ->
|
||||||
resolved = case columns of
|
resolved = case columns of
|
||||||
Vector.Vector _ -> columns.map c->(this.resolve_column table c)
|
Vector.Vector _ -> columns.map c->(this.resolve_column table c)
|
||||||
_ -> [this.resolve_column table columns]
|
_ -> [this.resolve_column table columns]
|
||||||
key_maker i = Group_By.key (resolved.map c->(c.at i))
|
key_maker i = Group_By.key (resolved.map c->(c.at i))
|
||||||
map->i->(map.insert (key_maker i) 1)
|
case ignore_nothing of
|
||||||
|
False-> map->i->(map.insert (key_maker i) 1)
|
||||||
|
True-> map->i->
|
||||||
|
key = key_maker i
|
||||||
|
if key.values.all .is_nothing then map else (map.insert key 1)
|
||||||
|
|
||||||
evaluate : Any->Any
|
evaluate : Any->Any
|
||||||
evaluate value = case this of
|
evaluate value =
|
||||||
Count_Distinct _ _ -> value.size
|
## Given a map of values and counts, find the value at a specified percentile
|
||||||
Median _ _ ->
|
percentile p:Decimal value:Map =
|
||||||
count = value.fold 0 (+)
|
count = value.fold 0 (+)
|
||||||
if count == 0 then Nothing else
|
if count == 0 then Nothing else
|
||||||
case count%2 == 1 of
|
mid_value = (count - 1)*p + 1
|
||||||
True ->
|
if mid_value <= 1 then value.first.first else
|
||||||
mid = (count-1) / 2
|
if mid_value >= count then value.last.first else
|
||||||
output = value.fold_with_key [0, Nothing] c->k->v->
|
mid = mid_value.floor
|
||||||
new_v = c.first + v
|
|
||||||
[new_v, if c.first.up_to new_v . contains mid then k else c.second]
|
|
||||||
output.second
|
|
||||||
False ->
|
|
||||||
mid = count / 2
|
|
||||||
output = value.fold_with_key [0, Nothing, Nothing] c->k->v->
|
output = value.fold_with_key [0, Nothing, Nothing] c->k->v->
|
||||||
new_v = c.first + v
|
new_v = c.first + v
|
||||||
new_s = if c.first.up_to new_v . contains mid then k else c.second
|
new_s = if c.first.up_to new_v . contains (mid-1) then k else c.second
|
||||||
new_t = if c.first.up_to new_v . contains (mid-1) then k else (c.at 2)
|
new_t = if c.first.up_to new_v . contains mid then k else (c.at 2)
|
||||||
[new_v, new_s, new_t]
|
[new_v, new_s, new_t]
|
||||||
(output.second + (output.at 2)) / 2
|
(output.second + (output.at 2 - output.second) * (mid_value - mid))
|
||||||
|
|
||||||
|
case this of
|
||||||
|
Count_Distinct _ _ _ -> value.size
|
||||||
|
Median _ _ -> percentile 0.5 value
|
||||||
|
Percentile p _ _ -> percentile p value
|
||||||
|
Mode _ _ -> (value.fold_with_key (Pair 0 Nothing) p->k->v-> if v>(p.first) then (Pair v k) else p) . second
|
||||||
Average _ _ -> if value.first == 0 then Nothing else (value.second / value.first)
|
Average _ _ -> if value.first == 0 then Nothing else (value.second / value.first)
|
||||||
Standard_Deviation _ _ p -> if value.first == 0 then Nothing else
|
Standard_Deviation _ _ p -> if value.first == 0 then Nothing else
|
||||||
f = if p then 1 else (value.first / (value.first - 1)).sqrt
|
f = if p then 1 else (value.first / (value.first - 1)).sqrt
|
||||||
@ -206,7 +330,6 @@ type Aggregate_Column
|
|||||||
Concatenate _ _ _ s p _ -> if value.is_nothing then value else (s + value + p)
|
Concatenate _ _ _ s p _ -> if value.is_nothing then value else (s + value + p)
|
||||||
_ -> value
|
_ -> value
|
||||||
|
|
||||||
|
|
||||||
## Occurs when cannot aggregate a column
|
## Occurs when cannot aggregate a column
|
||||||
type Invalid_Aggregation_Method (column : Text) (message : Text)
|
type Invalid_Aggregation_Method (column : Text) (message : Text)
|
||||||
|
|
||||||
|
@ -4,11 +4,7 @@ import Standard.Base.Data.Ordering.Vector_Lexicographic_Order
|
|||||||
## Create a key structure for grouping operations
|
## Create a key structure for grouping operations
|
||||||
key : Vector -> Group_By_Key
|
key : Vector -> Group_By_Key
|
||||||
key values =
|
key values =
|
||||||
mapper c = case c of
|
mapper c = Comparable_Value c
|
||||||
Boolean -> Comparable_Boolean c
|
|
||||||
Nothing -> Comparable_Nothing
|
|
||||||
_ -> c
|
|
||||||
|
|
||||||
Group_By_Key <| values.map mapper
|
Group_By_Key <| values.map mapper
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
@ -25,30 +21,23 @@ type Group_By_Key
|
|||||||
compare_to that =
|
compare_to that =
|
||||||
Vector_Lexicographic_Order.compare this.values that.values
|
Vector_Lexicographic_Order.compare this.values that.values
|
||||||
|
|
||||||
## PRIVATE
|
|
||||||
Temporary workaround until Boolean compare_to completed
|
|
||||||
type Comparable_Boolean
|
|
||||||
type Comparable_Boolean value
|
|
||||||
|
|
||||||
== : Comparable_Boolean->Boolean
|
|
||||||
== that = (this.compare_to that) == Ordering.Equal
|
|
||||||
|
|
||||||
compare_to : Any->Ordering
|
|
||||||
compare_to that =
|
|
||||||
if this.value == that.value then Ordering.Equal else
|
|
||||||
if this.value then Ordering.Greater else Ordering.Less
|
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
Temporary workaround allowing Nothing to be in a Group_By
|
Temporary workaround allowing Nothing to be in a Group_By
|
||||||
type Comparable_Nothing
|
type Comparable_Value
|
||||||
type Comparable_Nothing
|
type Comparable_Value value
|
||||||
|
|
||||||
== : Comparable_Nothing->Boolean
|
== : Comparable_Nothing->Boolean
|
||||||
== that = (this.compare_to that) == Ordering.Equal
|
== that = (this.compare_to that) == Ordering.Equal
|
||||||
|
|
||||||
compare_to : Any->Ordering
|
compare_to : Any->Ordering
|
||||||
compare_to that =
|
compare_to that =
|
||||||
case that of
|
value = case that of
|
||||||
Comparable_Nothing -> Ordering.Equal
|
Comparable_Value v -> v
|
||||||
Nothing -> Ordering.Equal
|
_ -> that
|
||||||
_ -> Ordering.Less
|
|
||||||
|
case this.value of
|
||||||
|
Nothing -> if value.is_nothing then Ordering.Equal else Ordering.Less
|
||||||
|
_ -> if value.is_nothing then Ordering.Greater else this.value.compare_to value
|
||||||
|
|
||||||
|
is_nothing : Boolean
|
||||||
|
is_nothing = this.value.is_nothing
|
||||||
|
@ -8,6 +8,7 @@ import Standard.Base.Data.Time.Date
|
|||||||
import Standard.Table.Io.Spreadsheet_Write_Mode
|
import Standard.Table.Io.Spreadsheet_Write_Mode
|
||||||
import Standard.Table.Io.Format
|
import Standard.Table.Io.Format
|
||||||
import Standard.Table.Internal.Table_Helpers
|
import Standard.Table.Internal.Table_Helpers
|
||||||
|
import Standard.Table.Internal.Unique_Name_Strategy
|
||||||
|
|
||||||
from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule
|
from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule
|
||||||
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Index
|
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Index
|
||||||
@ -516,16 +517,17 @@ type Table
|
|||||||
group_by selector columns (on_problems=Report_Warning) (warnings=Warnings.default) =
|
group_by selector columns (on_problems=Report_Warning) (warnings=Warnings.default) =
|
||||||
# Grouping Key
|
# Grouping Key
|
||||||
key_columns = if selector.is_nothing then [] else
|
key_columns = if selector.is_nothing then [] else
|
||||||
Table_Helpers.select_columns internal_columns=this.columns selector=selector reorder=True on_problems=on_problems warnings=warnings
|
Table_Helpers.select_columns_helper internal_columns=this.columns selector=selector reorder=True on_problems=on_problems warnings=warnings
|
||||||
key_length = key_columns.length
|
key_length = key_columns.length
|
||||||
make_key = if (key_length == 0) then _->(Group_By.key [1]) else i->(Group_By.key (key_columns.map v->(v.at i)))
|
make_key = if (key_length == 0) then _->(Group_By.key [1]) else i->(Group_By.key (key_columns.map v->(v.at i)))
|
||||||
|
|
||||||
# New Table Accumulator
|
# New Table Accumulator
|
||||||
new_table = (key_columns.map c->c.name)+(columns.map c->(c.column_name this)) . map n->[n, Vector.new_builder]
|
name_strategy = Unique_Name_Strategy.new
|
||||||
|
new_table = (key_columns.map c->c.name)+(columns.map c->(c.column_name this)) . map n->[name_strategy.make_unique n, Vector.new_builder]
|
||||||
add_row key =
|
add_row key =
|
||||||
idx = new_table.at 0 . at 1 . length
|
idx = new_table.at 0 . at 1 . length
|
||||||
0.up_to key_length . each i->
|
0.up_to key_length . each i->
|
||||||
new_table.at i . at 1 . append (key.values.at i)
|
new_table.at i . at 1 . append (key.values.at i).value
|
||||||
0.up_to (columns.length) . each i->
|
0.up_to (columns.length) . each i->
|
||||||
column = columns.at i
|
column = columns.at i
|
||||||
new_table.at (i + key_length) . at 1 . append (column.initial_value)
|
new_table.at (i + key_length) . at 1 . append (column.initial_value)
|
||||||
|
@ -7,21 +7,24 @@ upper_case_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".utf_16
|
|||||||
lower_case_letters = "abcdefghijklmnopqrstuvwxyz".utf_16
|
lower_case_letters = "abcdefghijklmnopqrstuvwxyz".utf_16
|
||||||
numbers = "0123456789".utf_16
|
numbers = "0123456789".utf_16
|
||||||
|
|
||||||
## Creates a random number generator which can be used for creating test values.
|
## Creates a new Faker which can be used for creating test values.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
- seed: Optional seed value to make the sequence deterministic
|
- seed: Optional seed value to make the sequence deterministic
|
||||||
make_generator : Integer -> Random
|
new : Integer -> Faker
|
||||||
make_generator (seed = 0) =
|
new (seed = 0) =
|
||||||
if seed == 0 then Random.new else Random.new seed
|
generator = if seed == 0 then Random.new else Random.new seed
|
||||||
|
Faker generator
|
||||||
|
|
||||||
|
## Object to generate (deterministic) random value for testing
|
||||||
|
type Faker
|
||||||
|
type Faker generator
|
||||||
|
|
||||||
## Creates a random string based on a template and random number generator.
|
## Creates a random Text based on a template of character sets.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
- template: Vector of vectors that represent the possible characters for each
|
- template: Vector of vectors that represent the possible characters for each
|
||||||
letter, as UTF-16 code units.
|
letter, as UTF-16 code units.
|
||||||
- generator: Random number generator
|
|
||||||
|
|
||||||
> Examples:
|
> Examples:
|
||||||
Creates a fake UK National Insurance number:
|
Creates a fake UK National Insurance number:
|
||||||
@ -30,10 +33,68 @@ make_generator (seed = 0) =
|
|||||||
n = "0123456789".utf_16
|
n = "0123456789".utf_16
|
||||||
s = "ABCDFMP ".utf_16
|
s = "ABCDFMP ".utf_16
|
||||||
template = [l, l, n, n, n, n, n, s]
|
template = [l, l, n, n, n, n, n, s]
|
||||||
ni_number = make_string template make_generator
|
ni_number = Faker.new . string_value template
|
||||||
make_string : Vector -> Any -> Text
|
string_value : Vector -> Text
|
||||||
make_string template generator =
|
string_value template =
|
||||||
characters = template.map possible_chars->
|
characters = template.map possible_chars->
|
||||||
selected_char_ix = generator.nextInt possible_chars.length
|
selected_char_ix = this.generator.nextInt possible_chars.length
|
||||||
possible_chars.at selected_char_ix
|
possible_chars.at selected_char_ix
|
||||||
Text.from_utf_16 characters
|
Text.from_utf_16 characters
|
||||||
|
|
||||||
|
## Generates a Text consisting of lower/upper case characters.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- length: length of text to generate
|
||||||
|
- upper_case: use upper_case letters
|
||||||
|
alpha : Integer->Boolean->Text
|
||||||
|
alpha length=1 upper_case=False =
|
||||||
|
alphabet = if upper_case then here.upper_case_letters else here.lower_case_letters
|
||||||
|
this.string_value <| 0.up_to length . map _->alphabet
|
||||||
|
|
||||||
|
## Generates a Text consisting of lower/upper case characters and digits.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- length: length of text to generate
|
||||||
|
- upper_case: use upper_case letters
|
||||||
|
alpha_numeric : Integer->Boolean->Text
|
||||||
|
alpha_numeric length=1 upper_case=False =
|
||||||
|
alphabet = (if upper_case then here.upper_case_letters else here.lower_case_letters) + here.numbers
|
||||||
|
this.string_value <| 0.up_to length . map _->alphabet
|
||||||
|
|
||||||
|
## Generates a Text for a hexadecimal number
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- length: length of text to generate
|
||||||
|
hexadecimal : Integer->Text
|
||||||
|
hexadecimal length=1 =
|
||||||
|
alphabet = "0123456789ABCDEF".utf_16
|
||||||
|
this.string_value <| 0.up_to length . map _->alphabet
|
||||||
|
|
||||||
|
## Create a random Boolean value
|
||||||
|
boolean : Boolean
|
||||||
|
boolean =
|
||||||
|
if this.generator.nextDouble < 0.5 then True else False
|
||||||
|
|
||||||
|
## Create a random Integer value
|
||||||
|
integer : Integer->Integer->Integer
|
||||||
|
integer minimum=0 maximum=100 =
|
||||||
|
minimum + (this.generator.nextInt (maximum - minimum))
|
||||||
|
|
||||||
|
## Create a random Decimal value
|
||||||
|
decimal : Decimal->Decimal->Decimal
|
||||||
|
decimal minimum=0.0 maximum=1.0 =
|
||||||
|
minimum + this.generator.nextDouble * (maximum - minimum)
|
||||||
|
|
||||||
|
## Picks an item at Random from a list
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- items: Vector of items to pick from
|
||||||
|
- generator: Random number generator
|
||||||
|
vector_item : Vector->Any
|
||||||
|
vector_item items =
|
||||||
|
items.at (this.generator.nextInt items.length)
|
||||||
|
|
||||||
|
## Randomly converts some values to Nothing
|
||||||
|
make_some_nothing : Any->Decimal->Any
|
||||||
|
make_some_nothing value (chance=0.1) =
|
||||||
|
if this.generator.nextDouble <= chance then Nothing else value
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
package org.enso.interpreter.node.expression.builtin.mutable;
|
package org.enso.interpreter.node.expression.builtin.mutable;
|
||||||
|
|
||||||
import com.oracle.truffle.api.dsl.Cached;
|
import com.oracle.truffle.api.dsl.Cached;
|
||||||
import com.oracle.truffle.api.dsl.CachedContext;
|
|
||||||
import com.oracle.truffle.api.dsl.Fallback;
|
import com.oracle.truffle.api.dsl.Fallback;
|
||||||
import com.oracle.truffle.api.dsl.Specialization;
|
import com.oracle.truffle.api.dsl.Specialization;
|
||||||
import com.oracle.truffle.api.interop.InteropLibrary;
|
import com.oracle.truffle.api.interop.InteropLibrary;
|
||||||
@ -9,7 +8,6 @@ import com.oracle.truffle.api.interop.InvalidArrayIndexException;
|
|||||||
import com.oracle.truffle.api.interop.UnsupportedMessageException;
|
import com.oracle.truffle.api.interop.UnsupportedMessageException;
|
||||||
import com.oracle.truffle.api.library.CachedLibrary;
|
import com.oracle.truffle.api.library.CachedLibrary;
|
||||||
import com.oracle.truffle.api.nodes.Node;
|
import com.oracle.truffle.api.nodes.Node;
|
||||||
import org.enso.interpreter.Language;
|
|
||||||
import org.enso.interpreter.dsl.BuiltinMethod;
|
import org.enso.interpreter.dsl.BuiltinMethod;
|
||||||
import org.enso.interpreter.node.expression.builtin.interop.syntax.HostValueToEnsoNode;
|
import org.enso.interpreter.node.expression.builtin.interop.syntax.HostValueToEnsoNode;
|
||||||
import org.enso.interpreter.runtime.Context;
|
import org.enso.interpreter.runtime.Context;
|
||||||
@ -34,11 +32,10 @@ public abstract class CopyNode extends Node {
|
|||||||
long source_index,
|
long source_index,
|
||||||
Array dest,
|
Array dest,
|
||||||
long dest_index,
|
long dest_index,
|
||||||
long count,
|
long count) {
|
||||||
@CachedContext(Language.class) Context ctx) {
|
|
||||||
System.arraycopy(
|
System.arraycopy(
|
||||||
src.getItems(), (int) source_index, dest.getItems(), (int) dest_index, (int) count);
|
src.getItems(), (int) source_index, dest.getItems(), (int) dest_index, (int) count);
|
||||||
return ctx.getBuiltins().nothing().newInstance();
|
return Context.get(this).getBuiltins().nothing().newInstance();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Specialization(guards = "arrays.hasArrayElements(src)")
|
@Specialization(guards = "arrays.hasArrayElements(src)")
|
||||||
|
@ -21,8 +21,10 @@ main =
|
|||||||
|
|
||||||
## No specific significance to this constant, just fixed to make generated set deterministic
|
## No specific significance to this constant, just fixed to make generated set deterministic
|
||||||
fixed_random_seed = 1644575867
|
fixed_random_seed = 1644575867
|
||||||
random_generator = Faker.make_generator fixed_random_seed
|
faker = Faker.new fixed_random_seed
|
||||||
|
|
||||||
unsorted = 0.up_to here.vector_size . map _->(Faker.make_string template random_generator)
|
IO.println <| "Creating unsorted vector"
|
||||||
|
unsorted = 0.up_to here.vector_size . map _->(faker.string_value template)
|
||||||
|
|
||||||
|
IO.println <| "Benchmarking..."
|
||||||
Bench.measure (unsorted.sort by=Natural_Order.compare) "Natural Order" here.iter_size here.num_iterations
|
Bench.measure (unsorted.sort by=Natural_Order.compare) "Natural Order" here.iter_size here.num_iterations
|
||||||
|
@ -5,24 +5,6 @@ import Standard.Test.Faker
|
|||||||
|
|
||||||
## Bench Utilities ============================================================
|
## Bench Utilities ============================================================
|
||||||
|
|
||||||
make_double_strings : Integer -> Any -> Integer -> Integer -> Vector
|
|
||||||
make_double_strings count generator (min = -1000000000) (max = 1000000000) =
|
|
||||||
range = max - min
|
|
||||||
output = Array.new count
|
|
||||||
0.up_to count . each i->
|
|
||||||
v = generator.nextDouble * range - min
|
|
||||||
output.set_at i v.to_text
|
|
||||||
Vector.Vector output
|
|
||||||
|
|
||||||
make_integer_strings : Integer -> Any -> Integer -> Integer -> Vector
|
|
||||||
make_integer_strings count generator (min = -1000000000) (max = 1000000000) =
|
|
||||||
range = max - min
|
|
||||||
output = Array.new count
|
|
||||||
0.up_to count . each i->
|
|
||||||
v = (generator.nextInt range - min)
|
|
||||||
output.set_at i v.to_text
|
|
||||||
Vector.Vector output
|
|
||||||
|
|
||||||
vector_size = 1000000
|
vector_size = 1000000
|
||||||
iter_size = 100
|
iter_size = 100
|
||||||
num_iterations = 10
|
num_iterations = 10
|
||||||
@ -32,10 +14,14 @@ num_iterations = 10
|
|||||||
main =
|
main =
|
||||||
## No specific significance to this constant, just fixed to make generated set deterministic
|
## No specific significance to this constant, just fixed to make generated set deterministic
|
||||||
fixed_random_seed = 1644575867
|
fixed_random_seed = 1644575867
|
||||||
random_generator = Faker.make_generator fixed_random_seed
|
faker = Faker.new fixed_random_seed
|
||||||
|
|
||||||
double_string = here.make_double_strings here.vector_size random_generator
|
IO.println <| "Creating decimal strings"
|
||||||
Bench.measure (double_string.map Decimal.parse) "Decimal.parse" here.iter_size here.num_iterations
|
decimal_strings = Vector.new here.vector_size _->(faker.decimal -1000000000 1000000000).to_text
|
||||||
|
IO.println <| "Benchmarking Decimal.parse"
|
||||||
|
Bench.measure (decimal_strings.map Decimal.parse) "Decimal.parse" here.iter_size here.num_iterations
|
||||||
|
|
||||||
int_strings = here.make_integer_strings here.vector_size random_generator
|
IO.println <| "Creating integer strings"
|
||||||
|
int_strings = Vector.new here.vector_size _->(faker.integer -1000000000 1000000000).to_text
|
||||||
|
IO.println <| "Benchmarking Integer.parse"
|
||||||
Bench.measure (int_strings.map Integer.parse) "Integer.parse" here.iter_size here.num_iterations
|
Bench.measure (int_strings.map Integer.parse) "Integer.parse" here.iter_size here.num_iterations
|
||||||
|
49
test/Benchmarks/src/Table/Group_By.enso
Normal file
49
test/Benchmarks/src/Table/Group_By.enso
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
from Standard.Base import all
|
||||||
|
|
||||||
|
import Standard.Test.Bench
|
||||||
|
import Standard.Test.Faker
|
||||||
|
|
||||||
|
import Standard.Table.Data.Table
|
||||||
|
import Standard.Table.Data.Column_Selector
|
||||||
|
from Standard.Table.Data.Aggregate_Column import all
|
||||||
|
|
||||||
|
## Bench Utilities ============================================================
|
||||||
|
|
||||||
|
vector_size = 2500
|
||||||
|
iter_size = 100
|
||||||
|
num_iterations = 10
|
||||||
|
|
||||||
|
create_table : Integer->Integer->Table
|
||||||
|
create_table rows (seed=1646322139) =
|
||||||
|
faker = Faker.new seed
|
||||||
|
key1 = ["Code", 0.up_to rows . map _-> faker.alpha 3]
|
||||||
|
key2 = ["Index", 0.up_to rows . map _-> faker.integer 0 10]
|
||||||
|
key3 = ["Flag", 0.up_to rows . map _-> faker.boolean]
|
||||||
|
value1 = ["Value", 0.up_to rows . map _-> ((faker.decimal -100 100)*100000).floor/100000]
|
||||||
|
value2 = ["ValueWithNothing", 0.up_to rows . map _-> faker.make_some_nothing ((faker.decimal -100 100)*100).floor/100]
|
||||||
|
text1 = ["TextWithNothing", 0.up_to rows . map _-> faker.make_some_nothing (faker.alpha_numeric 10)]
|
||||||
|
text2 = ["Hexadecimal", 0.up_to rows . map _-> faker.make_some_nothing (faker.hexadecimal 8)]
|
||||||
|
Table.new [key1, key2, key3, value1, value2, text1, text2]
|
||||||
|
|
||||||
|
# The Benchmarks ==============================================================
|
||||||
|
main =
|
||||||
|
IO.println <| "Making table data..."
|
||||||
|
table = here.create_table here.vector_size
|
||||||
|
|
||||||
|
Bench.measure (table.group_by (Column_Selector.By_Index []) [Count Nothing]) "Count table" here.iter_size here.num_iterations
|
||||||
|
Bench.measure (table.group_by (Column_Selector.By_Index []) [Count_Distinct "Index"]) "Count Distinct table" here.iter_size here.num_iterations
|
||||||
|
Bench.measure (table.group_by (Column_Selector.By_Index []) [Standard_Deviation "Value"]) "StDev table" here.iter_size here.num_iterations
|
||||||
|
Bench.measure (table.group_by (Column_Selector.By_Index []) [Median "Value"]) "Median table" here.iter_size here.num_iterations
|
||||||
|
Bench.measure (table.group_by (Column_Selector.By_Index []) [Mode "Index"]) "Mode table" here.iter_size here.num_iterations
|
||||||
|
|
||||||
|
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Count Nothing]) "Count grouped" here.iter_size here.num_iterations
|
||||||
|
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Count_Distinct "Code"]) "Count Distinct grouped" here.iter_size here.num_iterations
|
||||||
|
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Standard_Deviation "Value"]) "StDev grouped" here.iter_size here.num_iterations
|
||||||
|
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Median "Value"]) "Median grouped" here.iter_size here.num_iterations
|
||||||
|
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Mode "Index"]) "Mode grouped" here.iter_size here.num_iterations
|
||||||
|
|
||||||
|
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Count Nothing]) "Count 2 level groups" here.iter_size here.num_iterations
|
||||||
|
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Count_Distinct "Code"]) "Count Distinct 2 level groups" here.iter_size here.num_iterations
|
||||||
|
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Standard_Deviation "Value"]) "StDev 2 level groups" here.iter_size here.num_iterations
|
||||||
|
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Median "Value"]) "Median 2 level groups" here.iter_size here.num_iterations
|
||||||
|
Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Mode "Index"]) "Mode 2 level groups" here.iter_size here.num_iterations
|
@ -22,22 +22,22 @@ main =
|
|||||||
This is to compare the performance of handling simple ASCII versus
|
This is to compare the performance of handling simple ASCII versus
|
||||||
performance of handling more complex Unicode characters.
|
performance of handling more complex Unicode characters.
|
||||||
bench_strcmp suite_prefix character_template common_prefix =
|
bench_strcmp suite_prefix character_template common_prefix =
|
||||||
gen = Faker.make_generator
|
faker = Faker.new
|
||||||
## Warning: this relies on the fact that Faker will treat the accent
|
## Warning: this relies on the fact that Faker will treat the accent
|
||||||
codepoint `\u{301}` as a separate code unit. We rely on this to add
|
codepoint `\u{301}` as a separate code unit. We rely on this to add
|
||||||
accents randomly to neighboring characters. If the implementation of
|
accents randomly to neighboring characters. If the implementation of
|
||||||
Faker is changed, this must be modified accordingly.
|
Faker is changed, this must be modified accordingly.
|
||||||
make_alpha_template length = Vector.new length _-> character_template
|
make_alpha_template length = Vector.new length _-> character_template
|
||||||
very_short_template = make_alpha_template 4
|
very_short_template = make_alpha_template 4
|
||||||
very_short = Vector.new 100000 _-> '🤩' + Faker.make_string very_short_template gen
|
very_short = Vector.new 100000 _-> '🤩' + faker.string_value very_short_template
|
||||||
medium_template = make_alpha_template 64
|
medium_template = make_alpha_template 64
|
||||||
medium = Vector.new 10000 _-> Faker.make_string medium_template gen
|
medium = Vector.new 10000 _-> faker.string_value medium_template
|
||||||
|
|
||||||
big_a_codepoint = 65
|
big_a_codepoint = 65
|
||||||
big_template = make_alpha_template 100000
|
big_template = make_alpha_template 100000
|
||||||
big_random = Vector.new 100 _-> Faker.make_string big_template gen
|
big_random = Vector.new 100 _-> faker.string_value big_template
|
||||||
big_early_difference = Vector.new 100 ix->
|
big_early_difference = Vector.new 100 ix->
|
||||||
"bb" + (Text.from_codepoints [big_a_codepoint + ix%5]) + "aaa" + (Faker.make_string big_template gen)
|
"bb" + (Text.from_codepoints [big_a_codepoint + ix%5]) + "aaa" + (faker.string_value big_template)
|
||||||
prefix = common_prefix.repeat 100000
|
prefix = common_prefix.repeat 100000
|
||||||
big_late_difference = Vector.new 100 ix-> prefix + (Text.from_codepoints [big_a_codepoint + ix%5])
|
big_late_difference = Vector.new 100 ix-> prefix + (Text.from_codepoints [big_a_codepoint + ix%5])
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ check_all text_vector pattern_vector mode =
|
|||||||
|
|
||||||
main =
|
main =
|
||||||
bench_contains suite_prefix character_template =
|
bench_contains suite_prefix character_template =
|
||||||
gen = Faker.make_generator
|
faker = Faker.new
|
||||||
## Warning: this relies on the fact that Faker will treat the accent
|
## Warning: this relies on the fact that Faker will treat the accent
|
||||||
codepoint `\u{301}` as a separate code unit. We rely on this to add
|
codepoint `\u{301}` as a separate code unit. We rely on this to add
|
||||||
accents randomly to neighboring characters. If the implementation of
|
accents randomly to neighboring characters. If the implementation of
|
||||||
@ -19,7 +19,7 @@ main =
|
|||||||
make_alpha_template length = Vector.new length _-> character_template
|
make_alpha_template length = Vector.new length _-> character_template
|
||||||
|
|
||||||
big_template = make_alpha_template 10000
|
big_template = make_alpha_template 10000
|
||||||
big_random = Vector.new 200 _-> Faker.make_string big_template gen
|
big_random = Vector.new 200 _-> faker.string_value big_template
|
||||||
|
|
||||||
Bench.measure (here.check_all big_random ["AAAAAA"] Text_Matcher.new) suite_prefix+" exact" 10 10
|
Bench.measure (here.check_all big_random ["AAAAAA"] Text_Matcher.new) suite_prefix+" exact" 10 10
|
||||||
Bench.measure (here.check_all big_random ["AAAAAA"] (Text_Matcher.new Case_Insensitive.new)) suite_prefix+" case-insensitive" 10 10
|
Bench.measure (here.check_all big_random ["AAAAAA"] (Text_Matcher.new Case_Insensitive.new)) suite_prefix+" case-insensitive" 10 10
|
||||||
|
2501
test/Table_Tests/data/data.csv
Normal file
2501
test/Table_Tests/data/data.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -6,7 +6,7 @@ from Standard.Table.Data.Aggregate_Column import all
|
|||||||
import Standard.Test
|
import Standard.Test
|
||||||
|
|
||||||
spec = Test.group "Aggregate Columns" <|
|
spec = Test.group "Aggregate Columns" <|
|
||||||
simple_table = Table.new [["count", [1, 2, Nothing, 3, Nothing]], ["is_valid", [Nothing, False, True, False, Nothing]], ["float", [1, 2.1, 3.4, 5.6, Nothing]], ["text", ["A", "", Nothing, "B,C", Nothing]]]
|
simple_table = Table.new [["count", [1, 2, Nothing, 3, Nothing]], ["is_valid", [Nothing, False, True, False, Nothing]], ["float", [3.4, 1, 5.6, 2.1, Nothing]], ["text", ["A", "", Nothing, "B,C", Nothing]]]
|
||||||
text_col = simple_table.at "text"
|
text_col = simple_table.at "text"
|
||||||
bool_col = simple_table.at "is_valid"
|
bool_col = simple_table.at "is_valid"
|
||||||
float_col = simple_table.at "float"
|
float_col = simple_table.at "float"
|
||||||
@ -134,8 +134,55 @@ spec = Test.group "Aggregate Columns" <|
|
|||||||
test_aggregator simple_table (Count_Distinct "count" test_name) test_name 4
|
test_aggregator simple_table (Count_Distinct "count" test_name) test_name 4
|
||||||
test_aggregator simple_table (Count_Distinct int_col test_name) test_name 4
|
test_aggregator simple_table (Count_Distinct int_col test_name) test_name 4
|
||||||
test_aggregator empty_table (Count_Distinct 0 test_name) test_name 0
|
test_aggregator empty_table (Count_Distinct 0 test_name) test_name 0
|
||||||
|
test_aggregator simple_table (Count_Distinct float_col test_name ignore_nothing=False) test_name 5
|
||||||
|
test_aggregator simple_table (Count_Distinct float_col test_name ignore_nothing=True) test_name 4
|
||||||
|
|
||||||
Test.specify "should be able to count distinct items on a multiple sets of values" <|
|
Test.specify "should be able to count distinct items on a multiple sets of values" <|
|
||||||
test_aggregator simple_table (Count_Distinct [0, 1]) "Count Distinct count is_valid" 5
|
test_aggregator simple_table (Count_Distinct [0, 1]) "Count Distinct count is_valid" 5
|
||||||
|
test_aggregator simple_table (Count_Distinct ["is_valid", "float"]) "Count Distinct is_valid float" 5
|
||||||
|
test_aggregator simple_table (Count_Distinct ["is_valid", "float"] ignore_nothing=True) "Count Distinct is_valid float" 4
|
||||||
|
|
||||||
|
Test.specify "should be able to get the minimum of a set of values" <|
|
||||||
|
test_aggregator simple_table (Minimum -2) "Minimum float" 1
|
||||||
|
test_aggregator simple_table (Minimum -2 test_name) test_name 1
|
||||||
|
test_aggregator simple_table (Minimum "float" test_name) test_name 1
|
||||||
|
test_aggregator simple_table (Minimum float_col test_name) test_name 1
|
||||||
|
test_aggregator empty_table (Minimum 0 test_name) test_name Nothing
|
||||||
|
|
||||||
|
Test.specify "should be able to get the maximum of a set of values" <|
|
||||||
|
test_aggregator simple_table (Maximum -2) "Maximum float" 5.6
|
||||||
|
test_aggregator simple_table (Maximum -2 test_name) test_name 5.6
|
||||||
|
test_aggregator simple_table (Maximum "float" test_name) test_name 5.6
|
||||||
|
test_aggregator simple_table (Maximum float_col test_name) test_name 5.6
|
||||||
|
test_aggregator empty_table (Maximum 0 test_name) test_name Nothing
|
||||||
|
|
||||||
|
Test.specify "should be able to get the shortest of a set of texts" <|
|
||||||
|
test_aggregator simple_table (Shortest -1) "Shortest text" ""
|
||||||
|
test_aggregator simple_table (Shortest -1 test_name) test_name ""
|
||||||
|
test_aggregator simple_table (Shortest "text" test_name) test_name ""
|
||||||
|
test_aggregator simple_table (Shortest text_col test_name) test_name ""
|
||||||
|
test_aggregator empty_table (Shortest 0 test_name) test_name Nothing
|
||||||
|
|
||||||
|
Test.specify "should be able to get the longest of a set of texts" <|
|
||||||
|
test_aggregator simple_table (Longest -1) "Longest text" "B,C"
|
||||||
|
test_aggregator simple_table (Longest -1 test_name) test_name "B,C"
|
||||||
|
test_aggregator simple_table (Longest "text" test_name) test_name "B,C"
|
||||||
|
test_aggregator simple_table (Longest text_col test_name) test_name "B,C"
|
||||||
|
test_aggregator empty_table (Longest 0 test_name) test_name Nothing
|
||||||
|
|
||||||
|
Test.specify "should be able to get the mode of a set of numbers" <|
|
||||||
|
mode_table = Table.new [["tests", [1,2,3,4,2,4,1,2,3,4,2,1,3,5,2,1,2,4,5,2,1,2,3,5,6,1,2,2]]]
|
||||||
|
test_aggregator mode_table (Mode -1) "Mode tests" 2
|
||||||
|
test_aggregator mode_table (Mode -1 test_name) test_name 2
|
||||||
|
test_aggregator empty_table (Mode 0 test_name) test_name Nothing
|
||||||
|
|
||||||
|
Test.specify "should be able to get the percentile of a set of numbers" <|
|
||||||
|
percentile_table = Table.new [["tests", [67,23,56,93,36,47,45,1,88,44,49,13,74,76,4,97,49,81,81,37]]]
|
||||||
|
test_aggregator percentile_table (Percentile 0 0) "0%-ile tests" 1
|
||||||
|
test_aggregator percentile_table (Percentile 0 -1 test_name) test_name 1
|
||||||
|
test_aggregator percentile_table (Percentile 0.15 0) "15%-ile tests" 21.5
|
||||||
|
test_aggregator percentile_table (Percentile 0.25 0) "25%-ile tests" 36.75
|
||||||
|
test_aggregator percentile_table (Percentile 0.66 0) "66%-ile tests" 70.78
|
||||||
|
test_aggregator empty_table (Mode 0 test_name) test_name Nothing
|
||||||
|
|
||||||
main = Test.Suite.run_main here.spec
|
main = Test.Suite.run_main here.spec
|
||||||
|
406
test/Table_Tests/src/Group_By_Spec.enso
Normal file
406
test/Table_Tests/src/Group_By_Spec.enso
Normal file
@ -0,0 +1,406 @@
|
|||||||
|
from Standard.Base import all
|
||||||
|
|
||||||
|
import Standard.Table
|
||||||
|
import Standard.Table.Data.Column_Selector
|
||||||
|
from Standard.Table.Data.Aggregate_Column import all
|
||||||
|
|
||||||
|
import Standard.Test
|
||||||
|
|
||||||
|
spec =
|
||||||
|
file_contents = (Enso_Project.data / "data.csv") . read
|
||||||
|
table = Table.from_csv file_contents
|
||||||
|
empty_table = Table.new <| table.columns.map c->[c.name, []]
|
||||||
|
|
||||||
|
find_row key table =
|
||||||
|
0.up_to table.row_count . find i->
|
||||||
|
0.up_to key.length . all j-> (table.columns.at j . at i)==(key.at j)
|
||||||
|
|
||||||
|
Test.group "Table.group_by should summarize whole table " <|
|
||||||
|
grouping = Column_Selector.By_Index []
|
||||||
|
Test.specify "should be able to count" <|
|
||||||
|
grouped = table.group_by grouping [Count Nothing]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 1
|
||||||
|
grouped.columns.at 0 . name . should_equal "Count"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal 2500
|
||||||
|
|
||||||
|
Test.specify "should be able to count missing values" <|
|
||||||
|
grouped = table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 4
|
||||||
|
grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal 236
|
||||||
|
grouped.columns.at 1 . name . should_equal "Count Not Nothing Hexadecimal"
|
||||||
|
grouped.columns.at 1 . at 0 . should_equal 2264
|
||||||
|
grouped.columns.at 2 . name . should_equal "Count Empty TextWithNothing"
|
||||||
|
grouped.columns.at 2 . at 0 . should_equal 249
|
||||||
|
grouped.columns.at 3 . name . should_equal "Count Not Empty TextWithNothing"
|
||||||
|
grouped.columns.at 3 . at 0 . should_equal 2251
|
||||||
|
|
||||||
|
Test.specify "should be able to count distinct values" <|
|
||||||
|
grouped = table.group_by grouping [Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 4
|
||||||
|
grouped.columns.at 0 . name . should_equal "Count Distinct Code"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal 2333
|
||||||
|
grouped.columns.at 1 . name . should_equal "Count Distinct Index"
|
||||||
|
grouped.columns.at 1 . at 0 . should_equal 10
|
||||||
|
grouped.columns.at 2 . name . should_equal "Count Distinct Flag"
|
||||||
|
grouped.columns.at 2 . at 0 . should_equal 2
|
||||||
|
grouped.columns.at 3 . name . should_equal "Count Distinct Index Flag"
|
||||||
|
grouped.columns.at 3 . at 0 . should_equal 20
|
||||||
|
|
||||||
|
Test.specify "should be able to sum, average and standard deviation of values" <|
|
||||||
|
grouped = table.group_by grouping [Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 8
|
||||||
|
grouped.columns.at 0 . name . should_equal "Sum Value"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal -932.411550 epsilon=0.000001
|
||||||
|
grouped.columns.at 1 . name . should_equal "Sum ValueWithNothing"
|
||||||
|
grouped.columns.at 1 . at 0 . should_equal 2757.09 epsilon=0.000001
|
||||||
|
grouped.columns.at 2 . name . should_equal "Average Value"
|
||||||
|
grouped.columns.at 2 . at 0 . should_equal -0.372965 epsilon=0.000001
|
||||||
|
grouped.columns.at 3 . name . should_equal "Average ValueWithNothing"
|
||||||
|
grouped.columns.at 3 . at 0 . should_equal 1.228650 epsilon=0.000001
|
||||||
|
grouped.columns.at 4 . name . should_equal "Standard Deviation Value"
|
||||||
|
grouped.columns.at 4 . at 0 . should_equal 56.708660 epsilon=0.000001
|
||||||
|
grouped.columns.at 5 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||||
|
grouped.columns.at 5 . at 0 . should_equal 58.588610 epsilon=0.000001
|
||||||
|
grouped.columns.at 6 . name . should_equal "Standard Deviation Value_1"
|
||||||
|
grouped.columns.at 6 . at 0 . should_equal 56.697317 epsilon=0.000001
|
||||||
|
grouped.columns.at 7 . name . should_equal "Standard Deviation ValueWithNothing_1"
|
||||||
|
grouped.columns.at 7 . at 0 . should_equal 58.575554 epsilon=0.000001
|
||||||
|
|
||||||
|
Test.specify "should be able to create median, mode and percentile values" <|
|
||||||
|
grouped = table.group_by grouping [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 6
|
||||||
|
grouped.columns.at 0 . name . should_equal "Median Index"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal 5 epsilon=0.000001
|
||||||
|
grouped.columns.at 1 . name . should_equal "Median Value"
|
||||||
|
grouped.columns.at 1 . at 0 . should_equal 1.298375 epsilon=0.000001
|
||||||
|
grouped.columns.at 2 . name . should_equal "Median ValueWithNothing"
|
||||||
|
grouped.columns.at 2 . at 0 . should_equal 2.235 epsilon=0.000001
|
||||||
|
grouped.columns.at 3 . name . should_equal "Mode Index"
|
||||||
|
grouped.columns.at 3 . at 0 . should_equal 7
|
||||||
|
grouped.columns.at 4 . name . should_equal "25%-ile Value"
|
||||||
|
grouped.columns.at 4 . at 0 . should_equal -49.962710 epsilon=0.000001
|
||||||
|
grouped.columns.at 5 . name . should_equal "40%-ile ValueWithNothing"
|
||||||
|
grouped.columns.at 5 . at 0 . should_equal -17.960000 epsilon=0.000001
|
||||||
|
|
||||||
|
Test.specify "should be able to get first and last values" <|
|
||||||
|
grouped = table.group_by grouping [First "Index", Last "Value"]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 2
|
||||||
|
grouped.columns.at 0 . name . should_equal "First Index"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal 7
|
||||||
|
grouped.columns.at 1 . name . should_equal "Last Value"
|
||||||
|
grouped.columns.at 1 . at 0 . should_equal 70.99931 epsilon=0.000001
|
||||||
|
|
||||||
|
Test.specify "should be able to get minimum and maximum values" <|
|
||||||
|
grouped = table.group_by grouping [Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 4
|
||||||
|
grouped.columns.at 0 . name . should_equal "Minimum Value"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal -99.964200 epsilon=0.000001
|
||||||
|
grouped.columns.at 1 . name . should_equal "Maximum Value"
|
||||||
|
grouped.columns.at 1 . at 0 . should_equal 99.977480 epsilon=0.000001
|
||||||
|
grouped.columns.at 2 . name . should_equal "Minimum ValueWithNothing"
|
||||||
|
grouped.columns.at 2 . at 0 . should_equal -99.99 epsilon=0.000001
|
||||||
|
grouped.columns.at 3 . name . should_equal "Maximum ValueWithNothing"
|
||||||
|
grouped.columns.at 3 . at 0 . should_equal 99.95 epsilon=0.000001
|
||||||
|
|
||||||
|
Test.specify "should be able to get shortest, longest and concatenated values" <|
|
||||||
|
grouped = table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 3
|
||||||
|
grouped.columns.at 0 . name . should_equal "Shortest TextWithNothing"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal "f5"
|
||||||
|
grouped.columns.at 1 . name . should_equal "Longest TextWithNothing"
|
||||||
|
grouped.columns.at 1 . at 0 . should_equal "setp295gjvbanana"
|
||||||
|
grouped.columns.at 2 . name . should_equal "Concatenate Code"
|
||||||
|
grouped.columns.at 2 . at 0 . length . should_equal 7500
|
||||||
|
|
||||||
|
Test.group "Table.group_by should summarize empty table " <|
|
||||||
|
grouping = Column_Selector.By_Index []
|
||||||
|
Test.specify "should be able to count" <|
|
||||||
|
grouped = empty_table.group_by grouping [Count Nothing]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 1
|
||||||
|
grouped.columns.at 0 . name . should_equal "Count"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal 0
|
||||||
|
|
||||||
|
Test.specify "should be able to count missing values" <|
|
||||||
|
grouped = empty_table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 4
|
||||||
|
grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal 0
|
||||||
|
grouped.columns.at 1 . name . should_equal "Count Not Nothing Hexadecimal"
|
||||||
|
grouped.columns.at 1 . at 0 . should_equal 0
|
||||||
|
grouped.columns.at 2 . name . should_equal "Count Empty TextWithNothing"
|
||||||
|
grouped.columns.at 2 . at 0 . should_equal 0
|
||||||
|
grouped.columns.at 3 . name . should_equal "Count Not Empty TextWithNothing"
|
||||||
|
grouped.columns.at 3 . at 0 . should_equal 0
|
||||||
|
|
||||||
|
Test.specify "should be able to count distinct values" <|
|
||||||
|
grouped = empty_table.group_by grouping [Count_Distinct "Code"]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 1
|
||||||
|
grouped.columns.at 0 . name . should_equal "Count Distinct Code"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal 0
|
||||||
|
|
||||||
|
Test.specify "should be able to sum, average and standard deviation of values" <|
|
||||||
|
grouped = empty_table.group_by grouping [Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 4
|
||||||
|
grouped.columns.at 0 . name . should_equal "Sum Value"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal Nothing
|
||||||
|
grouped.columns.at 1 . name . should_equal "Average ValueWithNothing"
|
||||||
|
grouped.columns.at 1 . at 0 . should_equal Nothing
|
||||||
|
grouped.columns.at 2 . name . should_equal "Standard Deviation Value"
|
||||||
|
grouped.columns.at 2 . at 0 . should_equal Nothing
|
||||||
|
grouped.columns.at 3 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||||
|
grouped.columns.at 3 . at 0 . should_equal Nothing
|
||||||
|
|
||||||
|
Test.specify "should be able to create median, mode and percentile values" <|
|
||||||
|
grouped = empty_table.group_by grouping [Median "Index", Mode "Index", Percentile 0.25 "Value"]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 3
|
||||||
|
grouped.columns.at 0 . name . should_equal "Median Index"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal Nothing
|
||||||
|
grouped.columns.at 1 . name . should_equal "Mode Index"
|
||||||
|
grouped.columns.at 1 . at 0 . should_equal Nothing
|
||||||
|
grouped.columns.at 2 . name . should_equal "25%-ile Value"
|
||||||
|
grouped.columns.at 2 . at 0 . should_equal Nothing
|
||||||
|
|
||||||
|
Test.specify "should be able to get first and last values" <|
|
||||||
|
grouped = empty_table.group_by grouping [First "Index", Last "Value"]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 2
|
||||||
|
grouped.columns.at 0 . name . should_equal "First Index"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal Nothing
|
||||||
|
grouped.columns.at 1 . name . should_equal "Last Value"
|
||||||
|
grouped.columns.at 1 . at 0 . should_equal Nothing
|
||||||
|
|
||||||
|
Test.specify "should be able to get minimum and maximum values" <|
|
||||||
|
grouped = empty_table.group_by grouping [Minimum "Value", Maximum "ValueWithNothing"]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 2
|
||||||
|
grouped.columns.at 0 . name . should_equal "Minimum Value"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal Nothing
|
||||||
|
grouped.columns.at 1 . name . should_equal "Maximum ValueWithNothing"
|
||||||
|
grouped.columns.at 1 . at 0 . should_equal Nothing
|
||||||
|
|
||||||
|
Test.specify "should be able to get shortest, longest and concatenated values" <|
|
||||||
|
grouped = empty_table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||||
|
grouped.row_count . should_equal 1
|
||||||
|
grouped.columns.length . should_equal 3
|
||||||
|
grouped.columns.at 0 . name . should_equal "Shortest TextWithNothing"
|
||||||
|
grouped.columns.at 0 . at 0 . should_equal Nothing
|
||||||
|
grouped.columns.at 1 . name . should_equal "Longest TextWithNothing"
|
||||||
|
grouped.columns.at 1 . at 0 . should_equal Nothing
|
||||||
|
grouped.columns.at 2 . name . should_equal "Concatenate Code"
|
||||||
|
grouped.columns.at 2 . at 0 . should_equal Nothing
|
||||||
|
|
||||||
|
Test.group "Table.group_by should not summarize empty table when grouped " <|
|
||||||
|
grouping = Column_Selector.By_Index [0]
|
||||||
|
Test.specify "should be able to count" <|
|
||||||
|
grouped = empty_table.group_by grouping [Count Nothing]
|
||||||
|
grouped.row_count . should_equal 0
|
||||||
|
grouped.columns.length . should_equal 2
|
||||||
|
grouped.columns.at 0 . name . should_equal "Code"
|
||||||
|
grouped.columns.at 1 . name . should_equal "Count"
|
||||||
|
|
||||||
|
Test.specify "should be able to count missing values" <|
|
||||||
|
grouped = empty_table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||||
|
grouped.row_count . should_equal 0
|
||||||
|
grouped.columns.length . should_equal 5
|
||||||
|
grouped.columns.at 0 . name . should_equal "Code"
|
||||||
|
grouped.columns.at 1 . name . should_equal "Count Nothing Hexadecimal"
|
||||||
|
grouped.columns.at 2 . name . should_equal "Count Not Nothing Hexadecimal"
|
||||||
|
grouped.columns.at 3 . name . should_equal "Count Empty TextWithNothing"
|
||||||
|
grouped.columns.at 4 . name . should_equal "Count Not Empty TextWithNothing"
|
||||||
|
|
||||||
|
Test.specify "should be able to count distinct values" <|
|
||||||
|
grouped = empty_table.group_by grouping [Count_Distinct "Code"]
|
||||||
|
grouped.row_count . should_equal 0
|
||||||
|
grouped.columns.length . should_equal 2
|
||||||
|
grouped.columns.at 0 . name . should_equal "Code"
|
||||||
|
grouped.columns.at 1 . name . should_equal "Count Distinct Code"
|
||||||
|
|
||||||
|
Test.specify "should be able to sum, average and standard deviation of values" <|
|
||||||
|
grouped = empty_table.group_by grouping [Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
|
||||||
|
grouped.row_count . should_equal 0
|
||||||
|
grouped.columns.length . should_equal 5
|
||||||
|
grouped.columns.at 0 . name . should_equal "Code"
|
||||||
|
grouped.columns.at 1 . name . should_equal "Sum Value"
|
||||||
|
grouped.columns.at 2 . name . should_equal "Average ValueWithNothing"
|
||||||
|
grouped.columns.at 3 . name . should_equal "Standard Deviation Value"
|
||||||
|
grouped.columns.at 4 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||||
|
|
||||||
|
Test.specify "should be able to create median values" <|
|
||||||
|
grouped = empty_table.group_by grouping [Median "Index", Mode "Index", Percentile 0.25 "Value"]
|
||||||
|
grouped.row_count . should_equal 0
|
||||||
|
grouped.columns.length . should_equal 4
|
||||||
|
grouped.columns.at 0 . name . should_equal "Code"
|
||||||
|
grouped.columns.at 1 . name . should_equal "Median Index"
|
||||||
|
grouped.columns.at 2 . name . should_equal "Mode Index"
|
||||||
|
grouped.columns.at 3 . name . should_equal "25%-ile Value"
|
||||||
|
|
||||||
|
Test.specify "should be able to get first and last values" <|
|
||||||
|
grouped = empty_table.group_by grouping [First "Index", Last "Value"]
|
||||||
|
grouped.row_count . should_equal 0
|
||||||
|
grouped.columns.length . should_equal 3
|
||||||
|
grouped.columns.at 0 . name . should_equal "Code"
|
||||||
|
grouped.columns.at 1 . name . should_equal "First Index"
|
||||||
|
grouped.columns.at 2 . name . should_equal "Last Value"
|
||||||
|
|
||||||
|
Test.specify "should be able to get minimum and maximum values" <|
|
||||||
|
grouped = empty_table.group_by grouping [Minimum "Value", Maximum "ValueWithNothing"]
|
||||||
|
grouped.row_count . should_equal 0
|
||||||
|
grouped.columns.length . should_equal 3
|
||||||
|
grouped.columns.at 0 . name . should_equal "Code"
|
||||||
|
grouped.columns.at 1 . name . should_equal "Minimum Value"
|
||||||
|
grouped.columns.at 2 . name . should_equal "Maximum ValueWithNothing"
|
||||||
|
|
||||||
|
Test.specify "should be able to get shortest, longest and concatenated values" <|
|
||||||
|
grouped = empty_table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||||
|
grouped.row_count . should_equal 0
|
||||||
|
grouped.columns.length . should_equal 4
|
||||||
|
grouped.columns.at 0 . name . should_equal "Code"
|
||||||
|
grouped.columns.at 1 . name . should_equal "Shortest TextWithNothing"
|
||||||
|
grouped.columns.at 2 . name . should_equal "Longest TextWithNothing"
|
||||||
|
grouped.columns.at 3 . name . should_equal "Concatenate Code"
|
||||||
|
|
||||||
|
Test.group "Table.group_by should be able to group on single field " <|
|
||||||
|
grouping = Column_Selector.By_name.new ["Index"]
|
||||||
|
Test.specify "should be able to count" <|
|
||||||
|
grouped = table.group_by grouping [Count Nothing]
|
||||||
|
grouped.row_count . should_equal 10
|
||||||
|
grouped.columns.length . should_equal 2
|
||||||
|
grouped.columns.at 0 . name . should_equal "Index"
|
||||||
|
idx = find_row [6] grouped
|
||||||
|
idx.is_nothing . should_be_false
|
||||||
|
grouped.columns.at 1 . name . should_equal "Count"
|
||||||
|
grouped.columns.at 1 . at idx . should_equal 261
|
||||||
|
|
||||||
|
Test.specify "should be able to count missing values" <|
|
||||||
|
grouped = table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
|
||||||
|
grouped.row_count . should_equal 10
|
||||||
|
grouped.columns.length . should_equal 5
|
||||||
|
grouped.columns.at 0 . name . should_equal "Index"
|
||||||
|
idx = find_row [6] grouped
|
||||||
|
idx.is_nothing . should_be_false
|
||||||
|
grouped.columns.at 1 . name . should_equal "Count Nothing Hexadecimal"
|
||||||
|
grouped.columns.at 1 . at idx . should_equal 24
|
||||||
|
grouped.columns.at 2 . name . should_equal "Count Not Nothing Hexadecimal"
|
||||||
|
grouped.columns.at 2 . at idx . should_equal 237
|
||||||
|
grouped.columns.at 3 . name . should_equal "Count Empty TextWithNothing"
|
||||||
|
grouped.columns.at 3 . at idx . should_equal 31
|
||||||
|
grouped.columns.at 4 . name . should_equal "Count Not Empty TextWithNothing"
|
||||||
|
grouped.columns.at 4 . at idx . should_equal 230
|
||||||
|
|
||||||
|
Test.specify "should be able to count distinct values" <|
|
||||||
|
grouped = table.group_by grouping [Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]]
|
||||||
|
grouped.row_count . should_equal 10
|
||||||
|
grouped.columns.length . should_equal 5
|
||||||
|
grouped.columns.at 0 . name . should_equal "Index"
|
||||||
|
idx = find_row [6] grouped
|
||||||
|
idx.is_nothing . should_be_false
|
||||||
|
grouped.columns.at 1 . name . should_equal "Count Distinct Code"
|
||||||
|
grouped.columns.at 1 . at idx . should_equal 260
|
||||||
|
grouped.columns.at 2 . name . should_equal "Count Distinct Index"
|
||||||
|
grouped.columns.at 2 . at idx . should_equal 1
|
||||||
|
grouped.columns.at 3 . name . should_equal "Count Distinct Flag"
|
||||||
|
grouped.columns.at 3 . at idx . should_equal 2
|
||||||
|
grouped.columns.at 4 . name . should_equal "Count Distinct Index Flag"
|
||||||
|
grouped.columns.at 4 . at idx . should_equal 2
|
||||||
|
|
||||||
|
Test.specify "should be able to sum, average and standard deviation of values" <|
|
||||||
|
grouped = table.group_by grouping [Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
|
||||||
|
grouped.row_count . should_equal 10
|
||||||
|
grouped.columns.length . should_equal 9
|
||||||
|
grouped.columns.at 0 . name . should_equal "Index"
|
||||||
|
idx = find_row [6] grouped
|
||||||
|
idx.is_nothing . should_be_false
|
||||||
|
grouped.columns.at 1 . name . should_equal "Sum Value"
|
||||||
|
grouped.columns.at 1 . at idx . should_equal -447.847390 epsilon=0.000001
|
||||||
|
grouped.columns.at 2 . name . should_equal "Sum ValueWithNothing"
|
||||||
|
grouped.columns.at 2 . at idx . should_equal 151.86 epsilon=0.000001
|
||||||
|
grouped.columns.at 3 . name . should_equal "Average Value"
|
||||||
|
grouped.columns.at 3 . at idx . should_equal -1.715890 epsilon=0.000001
|
||||||
|
grouped.columns.at 4 . name . should_equal "Average ValueWithNothing"
|
||||||
|
grouped.columns.at 4 . at idx . should_equal 0.646213 epsilon=0.000001
|
||||||
|
grouped.columns.at 5 . name . should_equal "Standard Deviation Value"
|
||||||
|
grouped.columns.at 5 . at idx . should_equal 60.272158 epsilon=0.000001
|
||||||
|
grouped.columns.at 6 . name . should_equal "Standard Deviation ValueWithNothing"
|
||||||
|
grouped.columns.at 6 . at idx . should_equal 56.798691 epsilon=0.000001
|
||||||
|
grouped.columns.at 7 . name . should_equal "Standard Deviation Value_1"
|
||||||
|
grouped.columns.at 7 . at idx . should_equal 60.156583 epsilon=0.000001
|
||||||
|
grouped.columns.at 8 . name . should_equal "Standard Deviation ValueWithNothing_1"
|
||||||
|
grouped.columns.at 8 . at idx . should_equal 56.677714 epsilon=0.000001
|
||||||
|
|
||||||
|
Test.specify "should be able to create median values" <|
|
||||||
|
grouped = table.group_by grouping [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
|
||||||
|
grouped.row_count . should_equal 10
|
||||||
|
grouped.columns.length . should_equal 7
|
||||||
|
grouped.columns.at 0 . name . should_equal "Index"
|
||||||
|
idx = find_row [6] grouped
|
||||||
|
idx.is_nothing . should_be_false
|
||||||
|
grouped.columns.at 1 . name . should_equal "Median Index"
|
||||||
|
grouped.columns.at 1 . at idx . should_equal 6 epsilon=0.000001
|
||||||
|
grouped.columns.at 2 . name . should_equal "Median Value"
|
||||||
|
grouped.columns.at 2 . at idx . should_equal 2.041150 epsilon=0.000001
|
||||||
|
grouped.columns.at 3 . name . should_equal "Median ValueWithNothing"
|
||||||
|
grouped.columns.at 3 . at idx . should_equal 1.38 epsilon=0.000001
|
||||||
|
grouped.columns.at 4 . name . should_equal "Mode Index"
|
||||||
|
grouped.columns.at 4 . at idx . should_equal 6
|
||||||
|
grouped.columns.at 5 . name . should_equal "25%-ile Value"
|
||||||
|
grouped.columns.at 5 . at idx . should_equal -56.019100 epsilon=0.000001
|
||||||
|
grouped.columns.at 6 . name . should_equal "40%-ile ValueWithNothing"
|
||||||
|
grouped.columns.at 6 . at idx . should_equal -18.802000 epsilon=0.000001
|
||||||
|
|
||||||
|
Test.specify "should be able to get first and last values" <|
|
||||||
|
grouped = table.group_by grouping [First "TextWithNothing", Last "Value"]
|
||||||
|
grouped.row_count . should_equal 10
|
||||||
|
grouped.columns.length . should_equal 3
|
||||||
|
grouped.columns.at 0 . name . should_equal "Index"
|
||||||
|
idx = find_row [6] grouped
|
||||||
|
idx.is_nothing . should_be_false
|
||||||
|
grouped.columns.at 1 . name . should_equal "First TextWithNothing"
|
||||||
|
grouped.columns.at 1 . at idx . should_equal "kmqxqkl6qx"
|
||||||
|
grouped.columns.at 2 . name . should_equal "Last Value"
|
||||||
|
grouped.columns.at 2 . at idx . should_equal 56.15916 epsilon=0.000001
|
||||||
|
|
||||||
|
Test.specify "should be able to get minimum and maximum values" <|
|
||||||
|
grouped = table.group_by grouping [Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
|
||||||
|
grouped.row_count . should_equal 10
|
||||||
|
grouped.columns.length . should_equal 5
|
||||||
|
grouped.columns.at 0 . name . should_equal "Index"
|
||||||
|
idx = find_row [6] grouped
|
||||||
|
idx.is_nothing . should_be_false
|
||||||
|
grouped.columns.at 1 . name . should_equal "Minimum Value"
|
||||||
|
grouped.columns.at 1 . at idx . should_equal -99.605880 epsilon=0.000001
|
||||||
|
grouped.columns.at 2 . name . should_equal "Maximum Value"
|
||||||
|
grouped.columns.at 2 . at idx . should_equal 99.12739 epsilon=0.000001
|
||||||
|
grouped.columns.at 3 . name . should_equal "Minimum ValueWithNothing"
|
||||||
|
grouped.columns.at 3 . at idx . should_equal -99.99 epsilon=0.000001
|
||||||
|
grouped.columns.at 4 . name . should_equal "Maximum ValueWithNothing"
|
||||||
|
grouped.columns.at 4 . at idx . should_equal 99.79 epsilon=0.000001
|
||||||
|
|
||||||
|
Test.specify "should be able to get shortest, longest and concatenated values" <|
|
||||||
|
grouped = table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
|
||||||
|
grouped.row_count . should_equal 10
|
||||||
|
grouped.columns.length . should_equal 4
|
||||||
|
grouped.columns.at 0 . name . should_equal "Index"
|
||||||
|
idx = find_row [6] grouped
|
||||||
|
idx.is_nothing . should_be_false
|
||||||
|
grouped.columns.at 1 . name . should_equal "Shortest TextWithNothing"
|
||||||
|
grouped.columns.at 1 . at idx . should_equal "kmqxqkl6qx"
|
||||||
|
grouped.columns.at 2 . name . should_equal "Longest TextWithNothing"
|
||||||
|
grouped.columns.at 2 . at idx . should_equal "kmqxqkl6qx"
|
||||||
|
grouped.columns.at 3 . name . should_equal "Concatenate Code"
|
||||||
|
grouped.columns.at 3 . at idx . length . should_equal 783
|
||||||
|
|
||||||
|
main = Test.Suite.run_main here.spec
|
@ -91,4 +91,21 @@ spec = Test.group "Maps" <|
|
|||||||
Test.specify "should allow getting a vector of the values" <|
|
Test.specify "should allow getting a vector of the values" <|
|
||||||
m = Map.empty . insert 1 2 . insert 2 4
|
m = Map.empty . insert 1 2 . insert 2 4
|
||||||
m.values . should_equal [2, 4]
|
m.values . should_equal [2, 4]
|
||||||
|
Test.specify "should be able to get the first key value pair" <|
|
||||||
|
m = Map.empty . insert 1 2 . insert 2 4
|
||||||
|
pair = m.first
|
||||||
|
pair.first . should_equal 1
|
||||||
|
pair.second . should_equal 2
|
||||||
|
Test.specify "should be able to get the first key value pair of an empty map" <|
|
||||||
|
m = Map.empty
|
||||||
|
m.first . should_equal Nothing
|
||||||
|
Test.specify "should be able to get the last key value pair" <|
|
||||||
|
m = Map.empty . insert 1 2 . insert 2 4
|
||||||
|
pair = m.last
|
||||||
|
pair.first . should_equal 2
|
||||||
|
pair.second . should_equal 4
|
||||||
|
Test.specify "should be able to get the last key value pair of an empty map" <|
|
||||||
|
m = Map.empty
|
||||||
|
m.last . should_equal Nothing
|
||||||
|
|
||||||
|
main = Test.Suite.run_main here.spec
|
||||||
|
Loading…
Reference in New Issue
Block a user