Restructuring the Faker type and creating tests for Group_By (#3318)

- Added Minimum, Maximum, Longest. Shortest, Mode, Percentile - Added first and last to Map - Restructured Faker type more inline with FakerJS - Created 2,500 row data set - Tests for group_by - Performance tests for group_by
2024-12-23 18:15:21 +03:00 · 2022-03-09 10:31:02 +00:00 · 2022-03-09 10:31:02 +00:00 · 65465fb8ef
commit 65465fb8ef
parent f92108158c
16 changed files with 3331 additions and 130 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -64,6 +64,8 @@
  `Vector.fold_with_index` and `Vector.take` methods.][3236]
 - [Implemented new `Text.insert` method][3311]
 - [Implemented `Bool.compare_to` method][3317]
+- [Implemented `Map.first`, `Map.last` functions. Expanded `Table.group_by` to
+  also compute mode, percentile, minimum, maximum.][3318]

 [debug-shortcuts]:
  https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -101,6 +103,7 @@
 [3236]: https://github.com/enso-org/enso/pull/3236
 [3311]: https://github.com/enso-org/enso/pull/3311
 [3317]: https://github.com/enso-org/enso/pull/3317
+[3317]: https://github.com/enso-org/enso/pull/3318

 #### Enso Compiler

--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Map.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Map.enso
@ -440,6 +440,24 @@ type Map
        to_vector_with_builder this
        builder.to_vector

+    ## Get a key value pair of the lowest key in the map.
+       If the map is empty, returns Nothing.
+    first : Pair
+    first =
+        first p m = case m of
+            Bin _ k v l _ -> @Tail_Call first (Pair k v) l
+            Tip -> p
+        first Nothing this
+
+    ## Get a key value pair of the highest key in the map.
+       If the map is empty, returns Nothing.
+    last : Pair
+    last =
+        last p m = case m of
+            Bin _ k v _ r -> @Tail_Call last (Pair k v) r
+            Tip -> p
+        last Nothing this
+
 ## UNSTABLE

   An error for getting a missing value from a map.
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Aggregate_Column.enso
@ -6,54 +6,111 @@ import Standard.Table.Data.Group_By
 ## Defines an Aggregate Column
 type Aggregate_Column
    ## Creates a new column with the row count of each group
+
+       Arguments:
+       - name: name of new column.
    type Count (name:Text|Nothing=Nothing)

    ## Creates a new column with the count of unique items in the selected
       column(s) within each group.
-    type Count_Distinct (columns:Column|Text|Integer|[(Column|Text|Integer)]) (name:Text|Nothing=Nothing)
+
+       Arguments:
+       - columns: either a single or set of columns (specified by name, index or Column object) to count across.
+       - name: name of new column.
+       - ignore_nothing: if all values are Nothing won't be included.
+    type Count_Distinct (columns:Column|Text|Integer|[(Column|Text|Integer)]) (name:Text|Nothing=Nothing) (ignore_nothing:Boolean=False)

    ## ALIAS Count_Not_Null

       Creates a new column with the count of not `Nothing` (null) values of the
       specified column within each group.
+
+       Arguments:
+       - columns: column (specified by name, index or Column object) to count.
+       - name: name of new column.
    type Count_Not_Nothing (column:Column|Text|Integer) (name:Text|Nothing=Nothing)

    ## ALIAS Count_Null, Count_Missing

       Creates a new column with the count of `Nothing` (null) values of the
       specified column within each group.
+
+       Arguments:
+       - column: column (specified by name, index or Column object) to count.
+       - name: name of new column.
    type Count_Nothing (column:Column|Text|Integer) (name:Text|Nothing=Nothing)

    ## Creates a new column with the count of not `Nothing` (null) and non-empty
       ("") values of the column within each group.
+
+       Arguments:
+       - column: column (specified by name, index or Column object) to count.
+       - name: name of new column.
    type Count_Not_Empty (column:Column|Text|Integer) (name:Text|Nothing=Nothing)

    ## Creates a new column with the count of `Nothing` (null) or empty ("")
       text values of the column within each group.
+
+       Arguments:
+       - column: column (specified by name, index or Column object) to count.
+       - name: name of new column.
    type Count_Empty (column:Column|Text|Integer) (name:Text|Nothing=Nothing)

    ## Creates a new column with the sum of values (ignoring missing values) of
       the specified column within each group.
+
+       Arguments:
+       - column: column (specified by name, index or Column object) to total.
+       - name: name of new column.
    type Sum (column:Column|Text|Integer) (name:Text|Nothing=Nothing)

    ## Creates a new column with the mean of values (ignoring missing values) of
       the specified column within each group.
+
+       Arguments:
+       - column: column (specified by name, index or Column object) to average.
+       - name: name of new column.
    type Average (column:Column|Text|Integer) (name:Text|Nothing=Nothing)

    ## Creates a new column with the median of values (ignoring missing values)
       of the specified column within each group.
+
+       Arguments:
+       - column: column (specified by name, index or Column object) to calculate median on.
+       - name: name of new column.
    type Median (column:Column|Text|Integer) (name:Text|Nothing=Nothing)

+    ## Creates a new column with the median of values (ignoring missing values)
+       of the specified column within each group.
+
+       Arguments:
+       - percentile: Percentage to compute from 0-1 inclusive.
+       - column: column (specified by name, index or Column object) to compute percentile.
+       - name: name of new column.
+    type Percentile (percentile:Decimal) (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
+
+    ## Creates a new column with the mode of values (ignoring missing values)
+       of the specified column within each group.
+
+       Arguments:
+       - column: column (specified by name, index or Column object) to find the most common value.
+       - name: name of new column.
+    type Mode (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
+
    ## Creates a new column with the standard deviation of values (ignoring
       missing values) of the column within each group.

       Arguments:
+       - column: column (specified by name, index or Column object) to compute standard deviation.
+       - name: name of new column.
       - population argument specifies if group is a sample or the population
    type Standard_Deviation (column:Column|Text|Integer) (name:Text|Nothing=Nothing) (population:Boolean=False)

    ## Creates a new column with the values concatenated together. `Nothing` values will become an empty string.

       Arguments:
+       - column: column (specified by name, index or Column object) to concatenate values.
+       - name: name of new column.
       - separator: added between each value.
       - prefix: added at the start of the result.
       - suffix: added at the end of the result.
@ -64,6 +121,8 @@ type Aggregate_Column
    ## Creates a new column with the first value in each group.

       Arguments:
+       - column: column (specified by name, index or Column object) to find first group entry.
+       - name: name of new column.
       - ignore_nothing: if `True`, then missing values are ignored and first
         not missing value returned.
       - order_by: required for database tables. Specifies how to order the
@ -73,12 +132,42 @@ type Aggregate_Column
    ## Creates a new column with the last value in each group.

       Arguments:
+       - column: column (specified by name, index or Column object) to find last group entry.
+       - name: name of new column.
       - ignore_nothing: if `True`, then missing values are ignored and last
         not missing value returned.
       - order_by: required for database tables. Specifies how to order the
         results within the group.
    type Last (column:Column|Text|Integer) (name:Text|Nothing=Nothing) (ignore_nothing:Boolean=True) (order_by:Column_Selector|Nothing=Nothing)

+    ## Creates a new column with the maximum value in each group.
+
+       Arguments:
+       - column: column (specified by name, index or Column object) to find maximum.
+       - name: name of new column.
+    type Maximum (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
+
+    ## Creates a new column with the maximum value in each group.
+
+       Arguments:
+       - column: column (specified by name, index or Column object) to find minimum.
+       - name: name of new column.
+    type Minimum (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
+
+    ## Creates a new column with the shortest text in each group.
+
+       Arguments:
+       - column: column (specified by name, index or Column object) to find shortest value.
+       - name: name of new column.
+    type Shortest (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
+
+    ## Creates a new column with the longest text in each group.
+
+       Arguments:
+       - column: column (specified by name, index or Column object) to find longest value.
+       - name: name of new column.
+    type Longest (column:Column|Text|Integer) (name:Text|Nothing=Nothing)
+
    ## Gets a column name to use for the aggregate column
    column_name : Table->Text
    column_name table =
@ -86,7 +175,7 @@ type Aggregate_Column
            get_name c = (this.resolve_column table c).name
            case this of
                Count _ -> "Count"
-                Count_Distinct c _ ->
+                Count_Distinct c _ _ ->
                    case c of
                        Vector.Vector _ -> "Count Distinct " + ((c.map get_name).join " ")
                        _ -> "Count Distinct " + (get_name c)
@ -97,10 +186,16 @@ type Aggregate_Column
                Sum c _ -> "Sum " + (get_name c)
                Average c _ -> "Average " + (get_name c)
                Median c _ -> "Median " + (get_name c)
+                Percentile p c _ -> (p*100).floor.to_text + "%-ile " + (get_name c)
+                Mode c _ -> "Mode " + (get_name c)
                Standard_Deviation c _ _ -> "Standard Deviation " + (get_name c)
                Concatenate c _ _ _ _ _ -> "Concatenate " + (get_name c)
                First c _ _ _ -> "First " + (get_name c)
                Last c _ _ _ -> "Last " + (get_name c)
+                Maximum c _ -> "Maximum " + (get_name c)
+                Minimum c _ -> "Minimum " + (get_name c)
+                Shortest c _ -> "Shortest " + (get_name c)
+                Longest c _ -> "Longest " + (get_name c)

    ## PRIVATE
       Given a column reference resolve to the underlying column
@ -113,14 +208,20 @@ type Aggregate_Column

    initial_value : Any
    initial_value = case this of
-        Count_Distinct _ _ -> Map.empty
+        Count_Distinct _ _ _ -> Map.empty
        Median _ _ -> Map.empty
+        Percentile _ _ _ -> Map.empty
+        Mode _ _ -> Map.empty
        Average _ _ -> [0, 0]
        Standard_Deviation _ _ _ -> [0, 0, 0]
        Concatenate _ _ _ _ _ _ -> Nothing
        First _ _ _ _ -> Nothing
        Last _ _ _ _ -> Nothing
        Sum _ _ -> Nothing
+        Maximum _ _ -> Nothing
+        Minimum _ _ -> Nothing
+        Shortest _ _ -> Nothing
+        Longest _ _ -> Nothing
        _ -> 0

    make_aggregator : Table->(Any->Integer->Any)
@ -139,16 +240,28 @@ type Aggregate_Column
            Count_Nothing c _ -> create_closure c col->count->i->(count + if (col.at i).is_nothing then 1 else 0)
            Count_Not_Empty c _ -> create_closure c col->count->i->(count + if is_empty (col.at i) then 0 else 1)
            Count_Empty c _ -> create_closure c col->count->i->(count + if is_empty (col.at i) then 1 else 0)
-            First c _ i _ ->
-                case i of
+            First c _ ignore_nothing _ ->
+                case ignore_nothing of
                    False -> create_closure c col->current->i->(if i==0 then (col.at i) else current)
                    True -> create_closure c col->current->i->if current.is_nothing then (col.at i) else current
-            Last c _ i _ ->
-                case i of
+            Last c _ ignore_nothing _ ->
+                case ignore_nothing of
                    False -> create_closure c col->_->i->(col.at i)
                    True -> create_closure c col->current->i->
                        v = (col.at i)
                        if v.is_nothing then current else v
+            Maximum c _ -> create_closure c col->m->i->
+                v = col.at i
+                if v.is_nothing then m else if m.is_nothing then v else m.max v
+            Minimum c _ -> create_closure c col->m->i->
+                v = col.at i
+                if v.is_nothing then m else if m.is_nothing then v else m.min v
+            Shortest c _ -> create_closure c col->m->i->
+                v = col.at i
+                if v.is_nothing then m else if m.is_nothing then v else if m.length <= v.length then m else v
+            Longest c _ -> create_closure c col->m->i->
+                v = col.at i
+                if v.is_nothing then m else if m.is_nothing then v else if m.length >= v.length then m else v
            Sum c _ -> create_closure c col->total->i->
                v = col.at i
                if v.is_nothing then total else
@ -159,46 +272,57 @@ type Aggregate_Column
            Standard_Deviation c _ _ -> create_closure c col->a->i->
                v = col.at i
                if v.is_nothing then a else [a.first + 1, a.second + v, (a.at 2) + v*v]
-            Concatenate c _ j _ _ q -> create_closure c col->text->i->
+            Concatenate c _ join _ _ quote -> create_closure c col->text->i->
                v = col.at i
                val=if v.is_nothing then "" else
                    text = case v of
                        Text -> v
                        _ -> v.to_text
-                    if text == "" then (q+q) else
-                        if text.contains j then (q+text+q) else text
-                if i==0 then val else (text + j + val)
+                    if text == "" then (quote+quote) else
+                        if text.contains join then (quote+text+quote) else text
+                if text.is_nothing then val else (text + join + val)
            Median c _ -> create_closure c col->map->i->
                val = col.at i
                if val.is_nothing then map else (map.insert val (1 + (map.get_or_else val 0)))
-            Count_Distinct columns _ ->
+            Percentile _ c _ -> create_closure c col->map->i->
+                val = col.at i
+                if val.is_nothing then map else (map.insert val (1 + (map.get_or_else val 0)))
+            Mode c _ -> create_closure c col->map->i->
+                val = col.at i
+                if val.is_nothing then map else (map.insert val (1 + (map.get_or_else val 0)))
+            Count_Distinct columns _ ignore_nothing ->
                resolved = case columns of
                    Vector.Vector _ -> columns.map c->(this.resolve_column table c)
                    _ -> [this.resolve_column table columns]
                key_maker i = Group_By.key (resolved.map c->(c.at i))
-                map->i->(map.insert (key_maker i) 1)
+                case ignore_nothing of
+                    False-> map->i->(map.insert (key_maker i) 1)
+                    True-> map->i->
+                        key = key_maker i
+                        if key.values.all .is_nothing then map else (map.insert key 1)

    evaluate : Any->Any
-    evaluate value = case this of
-        Count_Distinct _ _ -> value.size
-        Median _ _ ->
+    evaluate value =
+        ## Given a map of values and counts, find the value at a specified percentile
+        percentile p:Decimal value:Map =
            count = value.fold 0 (+)
            if count == 0 then Nothing else
-                case count%2 == 1 of
-                    True ->
-                        mid = (count-1) / 2
-                        output = value.fold_with_key [0, Nothing] c->k->v->
-                            new_v = c.first + v
-                            [new_v, if c.first.up_to new_v . contains mid then k else c.second]
-                        output.second
-                    False ->
-                        mid = count / 2
+                mid_value = (count - 1)*p + 1
+                if mid_value <= 1 then value.first.first else
+                    if mid_value >= count then value.last.first else
+                        mid = mid_value.floor
                        output = value.fold_with_key [0, Nothing, Nothing] c->k->v->
                            new_v = c.first + v
-                            new_s = if c.first.up_to new_v . contains mid then k else c.second
-                            new_t = if c.first.up_to new_v . contains (mid-1) then k else (c.at 2)
+                            new_s = if c.first.up_to new_v . contains (mid-1) then k else c.second
+                            new_t = if c.first.up_to new_v . contains mid then k else (c.at 2)
                            [new_v, new_s, new_t]
-                        (output.second + (output.at 2)) / 2
+                        (output.second + (output.at 2 - output.second) * (mid_value - mid))
+
+        case this of
+            Count_Distinct _ _ _ -> value.size
+            Median _ _ -> percentile 0.5 value
+            Percentile p _ _ -> percentile p value
+            Mode _ _ -> (value.fold_with_key (Pair 0 Nothing) p->k->v-> if v>(p.first) then (Pair v k) else p) . second
            Average _ _ -> if value.first == 0 then Nothing else (value.second / value.first)
            Standard_Deviation _ _ p -> if value.first == 0 then Nothing else
                f = if p then 1 else (value.first / (value.first - 1)).sqrt
@ -206,7 +330,6 @@ type Aggregate_Column
            Concatenate _ _ _ s p _ -> if value.is_nothing then value else (s + value + p)
            _ -> value

-
 ## Occurs when cannot aggregate a column
 type Invalid_Aggregation_Method (column : Text) (message : Text)

--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Group_By.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Group_By.enso
@ -4,11 +4,7 @@ import Standard.Base.Data.Ordering.Vector_Lexicographic_Order
 ## Create a key structure for grouping operations
 key : Vector -> Group_By_Key
 key values =
-    mapper c = case c of
-        Boolean -> Comparable_Boolean c
-        Nothing -> Comparable_Nothing
-        _ -> c
-
+    mapper c = Comparable_Value c
    Group_By_Key <| values.map mapper

 ## PRIVATE
@ -25,30 +21,23 @@ type Group_By_Key
    compare_to that =
        Vector_Lexicographic_Order.compare this.values that.values

-## PRIVATE
-   Temporary workaround until Boolean compare_to completed
-type Comparable_Boolean
-    type Comparable_Boolean value
-
-    == : Comparable_Boolean->Boolean
-    == that = (this.compare_to that) == Ordering.Equal
-
-    compare_to : Any->Ordering
-    compare_to that =
-        if this.value == that.value then Ordering.Equal else
-            if this.value then Ordering.Greater else Ordering.Less
-
 ## PRIVATE
   Temporary workaround allowing Nothing to be in a Group_By
-type Comparable_Nothing
-    type Comparable_Nothing
+type Comparable_Value
+    type Comparable_Value value

    == : Comparable_Nothing->Boolean
    == that = (this.compare_to that) == Ordering.Equal

    compare_to : Any->Ordering
    compare_to that =
-        case that of
-            Comparable_Nothing -> Ordering.Equal
-            Nothing -> Ordering.Equal
-            _ -> Ordering.Less
+        value = case that of
+            Comparable_Value v -> v
+            _ -> that
+
+        case this.value of
+            Nothing -> if value.is_nothing then Ordering.Equal else Ordering.Less
+            _ -> if value.is_nothing then Ordering.Greater else this.value.compare_to value
+
+    is_nothing : Boolean
+    is_nothing = this.value.is_nothing
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
@ -8,6 +8,7 @@ import Standard.Base.Data.Time.Date
 import Standard.Table.Io.Spreadsheet_Write_Mode
 import Standard.Table.Io.Format
 import Standard.Table.Internal.Table_Helpers
+import Standard.Table.Internal.Unique_Name_Strategy

 from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule
 from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Index
@ -516,16 +517,17 @@ type Table
    group_by selector columns (on_problems=Report_Warning) (warnings=Warnings.default) =
        # Grouping Key
        key_columns = if selector.is_nothing then [] else
-            Table_Helpers.select_columns internal_columns=this.columns selector=selector reorder=True on_problems=on_problems warnings=warnings
+            Table_Helpers.select_columns_helper internal_columns=this.columns selector=selector reorder=True on_problems=on_problems warnings=warnings
        key_length = key_columns.length
        make_key = if (key_length == 0) then _->(Group_By.key [1]) else i->(Group_By.key (key_columns.map v->(v.at i)))

        # New Table Accumulator
-        new_table = (key_columns.map c->c.name)+(columns.map c->(c.column_name this)) . map n->[n, Vector.new_builder]
+        name_strategy = Unique_Name_Strategy.new
+        new_table = (key_columns.map c->c.name)+(columns.map c->(c.column_name this)) . map n->[name_strategy.make_unique n, Vector.new_builder]
        add_row key =
            idx = new_table.at 0 . at 1 . length
            0.up_to key_length . each i->
-                new_table.at i . at 1 . append (key.values.at i)
+                new_table.at i . at 1 . append (key.values.at i).value
            0.up_to (columns.length) . each i->
                column = columns.at i
                new_table.at (i + key_length) . at 1 . append (column.initial_value)
--- a/distribution/lib/Standard/Test/0.0.0-dev/src/Faker.enso
+++ b/distribution/lib/Standard/Test/0.0.0-dev/src/Faker.enso
@ -7,21 +7,24 @@ upper_case_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".utf_16
 lower_case_letters = "abcdefghijklmnopqrstuvwxyz".utf_16
 numbers = "0123456789".utf_16

-## Creates a random number generator which can be used for creating test values.
+## Creates a new Faker which can be used for creating test values.

   Arguments:
   - seed: Optional seed value to make the sequence deterministic
-make_generator : Integer -> Random
-make_generator (seed = 0) =
-    if seed == 0 then Random.new else Random.new seed
+new : Integer -> Faker
+new (seed = 0) =
+    generator = if seed == 0 then Random.new else Random.new seed
+    Faker generator

+## Object to generate (deterministic) random value for testing
+type Faker
+    type Faker generator

-## Creates a random string based on a template and random number generator.
+    ## Creates a random Text based on a template of character sets.

       Arguments:
       - template: Vector of vectors that represent the possible characters for each
         letter, as UTF-16 code units.
-   - generator: Random number generator

       > Examples:
         Creates a fake UK National Insurance number:
@ -30,10 +33,68 @@ make_generator (seed = 0) =
                n = "0123456789".utf_16
                s = "ABCDFMP ".utf_16
                template = [l, l, n, n, n, n, n, s]
-            ni_number = make_string template make_generator
-make_string : Vector -> Any -> Text
-make_string template generator =
+                ni_number = Faker.new . string_value template
+    string_value : Vector -> Text
+    string_value template =
        characters = template.map possible_chars->
-        selected_char_ix = generator.nextInt possible_chars.length
+            selected_char_ix = this.generator.nextInt possible_chars.length
            possible_chars.at selected_char_ix
        Text.from_utf_16 characters
+
+    ## Generates a Text consisting of lower/upper case characters.
+
+       Arguments:
+       - length: length of text to generate
+       - upper_case: use upper_case letters
+    alpha : Integer->Boolean->Text
+    alpha length=1 upper_case=False =
+        alphabet = if upper_case then here.upper_case_letters else here.lower_case_letters
+        this.string_value <| 0.up_to length . map _->alphabet
+
+    ## Generates a Text consisting of lower/upper case characters and digits.
+
+       Arguments:
+       - length: length of text to generate
+       - upper_case: use upper_case letters
+    alpha_numeric : Integer->Boolean->Text
+    alpha_numeric length=1 upper_case=False =
+        alphabet = (if upper_case then here.upper_case_letters else here.lower_case_letters) + here.numbers
+        this.string_value <| 0.up_to length . map _->alphabet
+
+    ## Generates a Text for a hexadecimal number
+
+       Arguments:
+       - length: length of text to generate
+    hexadecimal : Integer->Text
+    hexadecimal length=1 =
+        alphabet = "0123456789ABCDEF".utf_16
+        this.string_value <| 0.up_to length . map _->alphabet
+
+    ## Create a random Boolean value
+    boolean : Boolean
+    boolean =
+        if this.generator.nextDouble < 0.5 then True else False
+
+    ## Create a random Integer value
+    integer : Integer->Integer->Integer
+    integer minimum=0 maximum=100 =
+        minimum + (this.generator.nextInt (maximum - minimum))
+
+    ## Create a random Decimal value
+    decimal : Decimal->Decimal->Decimal
+    decimal minimum=0.0 maximum=1.0 =
+        minimum + this.generator.nextDouble * (maximum - minimum)
+
+    ## Picks an item at Random from a list
+
+       Arguments:
+       - items: Vector of items to pick from
+       - generator: Random number generator
+    vector_item : Vector->Any
+    vector_item items =
+        items.at (this.generator.nextInt items.length)
+
+    ## Randomly converts some values to Nothing
+    make_some_nothing : Any->Decimal->Any
+    make_some_nothing value (chance=0.1) =
+        if this.generator.nextDouble <= chance then Nothing else value
--- a/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/mutable/CopyNode.java
+++ b/engine/runtime/src/main/java/org/enso/interpreter/node/expression/builtin/mutable/CopyNode.java
@ -1,7 +1,6 @@
 package org.enso.interpreter.node.expression.builtin.mutable;

 import com.oracle.truffle.api.dsl.Cached;
-import com.oracle.truffle.api.dsl.CachedContext;
 import com.oracle.truffle.api.dsl.Fallback;
 import com.oracle.truffle.api.dsl.Specialization;
 import com.oracle.truffle.api.interop.InteropLibrary;
@ -9,7 +8,6 @@ import com.oracle.truffle.api.interop.InvalidArrayIndexException;
 import com.oracle.truffle.api.interop.UnsupportedMessageException;
 import com.oracle.truffle.api.library.CachedLibrary;
 import com.oracle.truffle.api.nodes.Node;
-import org.enso.interpreter.Language;
 import org.enso.interpreter.dsl.BuiltinMethod;
 import org.enso.interpreter.node.expression.builtin.interop.syntax.HostValueToEnsoNode;
 import org.enso.interpreter.runtime.Context;
@ -34,11 +32,10 @@ public abstract class CopyNode extends Node {
      long source_index,
      Array dest,
      long dest_index,
-      long count,
-      @CachedContext(Language.class) Context ctx) {
+      long count) {
    System.arraycopy(
        src.getItems(), (int) source_index, dest.getItems(), (int) dest_index, (int) count);
-    return ctx.getBuiltins().nothing().newInstance();
+    return Context.get(this).getBuiltins().nothing().newInstance();
  }

  @Specialization(guards = "arrays.hasArrayElements(src)")
--- a/test/Benchmarks/src/Natural_Order_Sort.enso
+++ b/test/Benchmarks/src/Natural_Order_Sort.enso
@ -21,8 +21,10 @@ main =

    ## No specific significance to this constant, just fixed to make generated set deterministic
    fixed_random_seed = 1644575867
-    random_generator = Faker.make_generator fixed_random_seed
+    faker = Faker.new fixed_random_seed

-    unsorted = 0.up_to here.vector_size . map _->(Faker.make_string template random_generator)
+    IO.println <| "Creating unsorted vector"
+    unsorted = 0.up_to here.vector_size . map _->(faker.string_value template)

+    IO.println <| "Benchmarking..."
    Bench.measure (unsorted.sort by=Natural_Order.compare) "Natural Order" here.iter_size here.num_iterations
--- a/test/Benchmarks/src/Number_Parse.enso
+++ b/test/Benchmarks/src/Number_Parse.enso
@ -5,24 +5,6 @@ import Standard.Test.Faker

 ## Bench Utilities ============================================================

-make_double_strings : Integer -> Any -> Integer -> Integer -> Vector
-make_double_strings count generator (min = -1000000000) (max = 1000000000) =
-    range = max - min
-    output = Array.new count
-    0.up_to count . each i->
-        v = generator.nextDouble * range - min
-        output.set_at i v.to_text
-    Vector.Vector output
-
-make_integer_strings : Integer -> Any -> Integer -> Integer -> Vector
-make_integer_strings count generator (min = -1000000000) (max = 1000000000) =
-    range = max - min
-    output = Array.new count
-    0.up_to count . each i->
-        v = (generator.nextInt range - min)
-        output.set_at i v.to_text
-    Vector.Vector output
-
 vector_size = 1000000
 iter_size = 100
 num_iterations = 10
@ -32,10 +14,14 @@ num_iterations = 10
 main =
    ## No specific significance to this constant, just fixed to make generated set deterministic
    fixed_random_seed = 1644575867
-    random_generator = Faker.make_generator fixed_random_seed
+    faker = Faker.new fixed_random_seed

-    double_string = here.make_double_strings here.vector_size random_generator
-    Bench.measure (double_string.map Decimal.parse) "Decimal.parse" here.iter_size here.num_iterations
+    IO.println <| "Creating decimal strings"
+    decimal_strings = Vector.new here.vector_size _->(faker.decimal -1000000000 1000000000).to_text
+    IO.println <| "Benchmarking Decimal.parse"
+    Bench.measure (decimal_strings.map Decimal.parse) "Decimal.parse" here.iter_size here.num_iterations

-    int_strings = here.make_integer_strings here.vector_size random_generator
+    IO.println <| "Creating integer strings"
+    int_strings = Vector.new here.vector_size _->(faker.integer -1000000000 1000000000).to_text
+    IO.println <| "Benchmarking Integer.parse"
    Bench.measure (int_strings.map Integer.parse) "Integer.parse" here.iter_size here.num_iterations
--- a/test/Benchmarks/src/Table/Group_By.enso
+++ b/test/Benchmarks/src/Table/Group_By.enso
@ -0,0 +1,49 @@
+from Standard.Base import all
+
+import Standard.Test.Bench
+import Standard.Test.Faker
+
+import Standard.Table.Data.Table
+import Standard.Table.Data.Column_Selector
+from Standard.Table.Data.Aggregate_Column import all
+
+## Bench Utilities ============================================================
+
+vector_size = 2500
+iter_size = 100
+num_iterations = 10
+
+create_table : Integer->Integer->Table
+create_table rows (seed=1646322139) =
+    faker = Faker.new seed
+    key1 = ["Code", 0.up_to rows . map _-> faker.alpha 3]
+    key2 = ["Index", 0.up_to rows . map _-> faker.integer 0 10]
+    key3 = ["Flag", 0.up_to rows . map _-> faker.boolean]
+    value1 = ["Value", 0.up_to rows . map _-> ((faker.decimal -100 100)*100000).floor/100000]
+    value2 = ["ValueWithNothing", 0.up_to rows . map _-> faker.make_some_nothing ((faker.decimal -100 100)*100).floor/100]
+    text1 = ["TextWithNothing", 0.up_to rows . map _-> faker.make_some_nothing (faker.alpha_numeric 10)]
+    text2 = ["Hexadecimal", 0.up_to rows . map _-> faker.make_some_nothing (faker.hexadecimal 8)]
+    Table.new [key1, key2, key3, value1, value2, text1, text2]
+
+# The Benchmarks ==============================================================
+main =
+    IO.println <| "Making table data..."
+    table = here.create_table here.vector_size
+
+    Bench.measure (table.group_by (Column_Selector.By_Index []) [Count Nothing]) "Count table" here.iter_size here.num_iterations
+    Bench.measure (table.group_by (Column_Selector.By_Index []) [Count_Distinct "Index"]) "Count Distinct table" here.iter_size here.num_iterations
+    Bench.measure (table.group_by (Column_Selector.By_Index []) [Standard_Deviation "Value"]) "StDev table" here.iter_size here.num_iterations
+    Bench.measure (table.group_by (Column_Selector.By_Index []) [Median "Value"]) "Median table" here.iter_size here.num_iterations
+    Bench.measure (table.group_by (Column_Selector.By_Index []) [Mode "Index"]) "Mode table" here.iter_size here.num_iterations
+
+    Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Count Nothing]) "Count grouped" here.iter_size here.num_iterations
+    Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Count_Distinct "Code"]) "Count Distinct grouped" here.iter_size here.num_iterations
+    Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Standard_Deviation "Value"]) "StDev grouped" here.iter_size here.num_iterations
+    Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Median "Value"]) "Median grouped" here.iter_size here.num_iterations
+    Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index"]) [Mode "Index"]) "Mode grouped" here.iter_size here.num_iterations
+
+    Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Count Nothing]) "Count 2 level groups" here.iter_size here.num_iterations
+    Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Count_Distinct "Code"]) "Count Distinct 2 level groups" here.iter_size here.num_iterations
+    Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Standard_Deviation "Value"]) "StDev 2 level groups" here.iter_size here.num_iterations
+    Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Median "Value"]) "Median 2 level groups" here.iter_size here.num_iterations
+    Bench.measure (table.group_by (Column_Selector.By_Name.new ["Index", "Flag"]) [Mode "Index"]) "Mode 2 level groups" here.iter_size here.num_iterations
--- a/test/Benchmarks/src/Text/Compare.enso
+++ b/test/Benchmarks/src/Text/Compare.enso
@ -22,22 +22,22 @@ main =
       This is to compare the performance of handling simple ASCII versus
       performance of handling more complex Unicode characters.
    bench_strcmp suite_prefix character_template common_prefix =
-        gen = Faker.make_generator
+        faker = Faker.new
        ## Warning: this relies on the fact that Faker will treat the accent
           codepoint `\u{301}` as a separate code unit. We rely on this to add
           accents randomly to neighboring characters. If the implementation of
           Faker is changed, this must be modified accordingly.
        make_alpha_template length = Vector.new length _-> character_template
        very_short_template = make_alpha_template 4
-        very_short = Vector.new 100000 _-> '🤩' + Faker.make_string very_short_template gen
+        very_short = Vector.new 100000 _-> '🤩' + faker.string_value very_short_template
        medium_template = make_alpha_template 64
-        medium = Vector.new 10000 _-> Faker.make_string medium_template gen
+        medium = Vector.new 10000 _-> faker.string_value medium_template

        big_a_codepoint = 65
        big_template = make_alpha_template 100000
-        big_random = Vector.new 100 _-> Faker.make_string big_template gen
+        big_random = Vector.new 100 _-> faker.string_value big_template
        big_early_difference = Vector.new 100 ix->
-            "bb" + (Text.from_codepoints [big_a_codepoint + ix%5]) + "aaa" + (Faker.make_string big_template gen)
+            "bb" + (Text.from_codepoints [big_a_codepoint + ix%5]) + "aaa" + (faker.string_value big_template)
        prefix = common_prefix.repeat 100000
        big_late_difference = Vector.new 100 ix-> prefix + (Text.from_codepoints [big_a_codepoint + ix%5])

--- a/test/Benchmarks/src/Text/Contains.enso
+++ b/test/Benchmarks/src/Text/Contains.enso
@ -11,7 +11,7 @@ check_all text_vector pattern_vector mode =

 main =
    bench_contains suite_prefix character_template =
-        gen = Faker.make_generator
+        faker = Faker.new
        ## Warning: this relies on the fact that Faker will treat the accent
           codepoint `\u{301}` as a separate code unit. We rely on this to add
           accents randomly to neighboring characters. If the implementation of
@ -19,7 +19,7 @@ main =
        make_alpha_template length = Vector.new length _-> character_template

        big_template = make_alpha_template 10000
-        big_random = Vector.new 200 _-> Faker.make_string big_template gen
+        big_random = Vector.new 200 _-> faker.string_value big_template

        Bench.measure (here.check_all big_random ["AAAAAA"] Text_Matcher.new) suite_prefix+" exact" 10 10
        Bench.measure (here.check_all big_random ["AAAAAA"] (Text_Matcher.new Case_Insensitive.new)) suite_prefix+" case-insensitive" 10 10
--- a/test/Table_Tests/data/data.csv
+++ b/test/Table_Tests/data/data.csv
--- a/test/Table_Tests/src/Aggregate_Column_Spec.enso
+++ b/test/Table_Tests/src/Aggregate_Column_Spec.enso
@ -6,7 +6,7 @@ from Standard.Table.Data.Aggregate_Column import all
 import Standard.Test

 spec = Test.group "Aggregate Columns" <|
-    simple_table = Table.new [["count", [1, 2, Nothing, 3, Nothing]], ["is_valid", [Nothing, False, True, False, Nothing]], ["float", [1, 2.1, 3.4, 5.6, Nothing]], ["text", ["A", "", Nothing, "B,C", Nothing]]]
+    simple_table = Table.new [["count", [1, 2, Nothing, 3, Nothing]], ["is_valid", [Nothing, False, True, False, Nothing]], ["float", [3.4, 1, 5.6, 2.1, Nothing]], ["text", ["A", "", Nothing, "B,C", Nothing]]]
    text_col = simple_table.at "text"
    bool_col = simple_table.at "is_valid"
    float_col = simple_table.at "float"
@ -134,8 +134,55 @@ spec = Test.group "Aggregate Columns" <|
        test_aggregator simple_table (Count_Distinct "count" test_name) test_name 4
        test_aggregator simple_table (Count_Distinct int_col test_name) test_name 4
        test_aggregator empty_table (Count_Distinct 0 test_name) test_name 0
+        test_aggregator simple_table (Count_Distinct float_col test_name ignore_nothing=False) test_name 5
+        test_aggregator simple_table (Count_Distinct float_col test_name ignore_nothing=True) test_name 4

    Test.specify "should be able to count distinct items on a multiple sets of values" <|
        test_aggregator simple_table (Count_Distinct [0, 1]) "Count Distinct count is_valid" 5
+        test_aggregator simple_table (Count_Distinct ["is_valid", "float"]) "Count Distinct is_valid float" 5
+        test_aggregator simple_table (Count_Distinct ["is_valid", "float"] ignore_nothing=True) "Count Distinct is_valid float" 4
+
+    Test.specify "should be able to get the minimum of a set of values" <|
+        test_aggregator simple_table (Minimum -2) "Minimum float" 1
+        test_aggregator simple_table (Minimum -2 test_name) test_name 1
+        test_aggregator simple_table (Minimum "float" test_name) test_name 1
+        test_aggregator simple_table (Minimum float_col test_name) test_name 1
+        test_aggregator empty_table (Minimum 0 test_name) test_name Nothing
+
+    Test.specify "should be able to get the maximum of a set of values" <|
+        test_aggregator simple_table (Maximum -2) "Maximum float" 5.6
+        test_aggregator simple_table (Maximum -2 test_name) test_name 5.6
+        test_aggregator simple_table (Maximum "float" test_name) test_name 5.6
+        test_aggregator simple_table (Maximum float_col test_name) test_name 5.6
+        test_aggregator empty_table (Maximum 0 test_name) test_name Nothing
+
+    Test.specify "should be able to get the shortest of a set of texts" <|
+        test_aggregator simple_table (Shortest -1) "Shortest text" ""
+        test_aggregator simple_table (Shortest -1 test_name) test_name ""
+        test_aggregator simple_table (Shortest "text" test_name) test_name ""
+        test_aggregator simple_table (Shortest text_col test_name) test_name ""
+        test_aggregator empty_table (Shortest 0 test_name) test_name Nothing
+
+    Test.specify "should be able to get the longest of a set of texts" <|
+        test_aggregator simple_table (Longest -1) "Longest text" "B,C"
+        test_aggregator simple_table (Longest -1 test_name) test_name "B,C"
+        test_aggregator simple_table (Longest "text" test_name) test_name "B,C"
+        test_aggregator simple_table (Longest text_col test_name) test_name "B,C"
+        test_aggregator empty_table (Longest 0 test_name) test_name Nothing
+
+    Test.specify "should be able to get the mode of a set of numbers" <|
+        mode_table  = Table.new [["tests", [1,2,3,4,2,4,1,2,3,4,2,1,3,5,2,1,2,4,5,2,1,2,3,5,6,1,2,2]]]
+        test_aggregator mode_table (Mode -1) "Mode tests" 2
+        test_aggregator mode_table (Mode -1 test_name) test_name 2
+        test_aggregator empty_table (Mode 0 test_name) test_name Nothing
+
+    Test.specify "should be able to get the percentile of a set of numbers" <|
+        percentile_table  = Table.new [["tests", [67,23,56,93,36,47,45,1,88,44,49,13,74,76,4,97,49,81,81,37]]]
+        test_aggregator percentile_table (Percentile 0 0) "0%-ile tests" 1
+        test_aggregator percentile_table (Percentile 0 -1 test_name) test_name 1
+        test_aggregator percentile_table (Percentile 0.15 0) "15%-ile tests" 21.5
+        test_aggregator percentile_table (Percentile 0.25 0) "25%-ile tests" 36.75
+        test_aggregator percentile_table (Percentile 0.66 0) "66%-ile tests" 70.78
+        test_aggregator empty_table (Mode 0 test_name) test_name Nothing

 main = Test.Suite.run_main here.spec
--- a/test/Table_Tests/src/Group_By_Spec.enso
+++ b/test/Table_Tests/src/Group_By_Spec.enso
@ -0,0 +1,406 @@
+from Standard.Base import all
+
+import Standard.Table
+import Standard.Table.Data.Column_Selector
+from Standard.Table.Data.Aggregate_Column import all
+
+import Standard.Test
+
+spec =
+    file_contents = (Enso_Project.data / "data.csv") . read
+    table = Table.from_csv file_contents
+    empty_table = Table.new <| table.columns.map c->[c.name, []]
+
+    find_row key table =
+        0.up_to table.row_count . find i->
+            0.up_to key.length . all j-> (table.columns.at j . at i)==(key.at j)
+
+    Test.group "Table.group_by should summarize whole table " <|
+        grouping = Column_Selector.By_Index []
+        Test.specify "should be able to count" <|
+            grouped = table.group_by grouping [Count Nothing]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 1
+            grouped.columns.at 0 . name . should_equal "Count"
+            grouped.columns.at 0 . at 0 . should_equal 2500
+
+        Test.specify "should be able to count missing values" <|
+            grouped = table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 4
+            grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal"
+            grouped.columns.at 0 . at 0 . should_equal 236
+            grouped.columns.at 1 . name . should_equal "Count Not Nothing Hexadecimal"
+            grouped.columns.at 1 . at 0 . should_equal 2264
+            grouped.columns.at 2 . name . should_equal "Count Empty TextWithNothing"
+            grouped.columns.at 2 . at 0 . should_equal 249
+            grouped.columns.at 3 . name . should_equal "Count Not Empty TextWithNothing"
+            grouped.columns.at 3 . at 0 . should_equal 2251
+
+        Test.specify "should be able to count distinct values" <|
+            grouped = table.group_by grouping [Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 4
+            grouped.columns.at 0 . name . should_equal "Count Distinct Code"
+            grouped.columns.at 0 . at 0 . should_equal 2333
+            grouped.columns.at 1 . name . should_equal "Count Distinct Index"
+            grouped.columns.at 1 . at 0 . should_equal 10
+            grouped.columns.at 2 . name . should_equal "Count Distinct Flag"
+            grouped.columns.at 2 . at 0 . should_equal 2
+            grouped.columns.at 3 . name . should_equal "Count Distinct Index Flag"
+            grouped.columns.at 3 . at 0 . should_equal 20
+
+        Test.specify "should be able to sum, average and standard deviation of values" <|
+            grouped = table.group_by grouping [Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 8
+            grouped.columns.at 0 . name . should_equal "Sum Value"
+            grouped.columns.at 0 . at 0 . should_equal -932.411550 epsilon=0.000001
+            grouped.columns.at 1 . name . should_equal "Sum ValueWithNothing"
+            grouped.columns.at 1 . at 0 . should_equal 2757.09 epsilon=0.000001
+            grouped.columns.at 2 . name . should_equal "Average Value"
+            grouped.columns.at 2 . at 0 . should_equal -0.372965 epsilon=0.000001
+            grouped.columns.at 3 . name . should_equal "Average ValueWithNothing"
+            grouped.columns.at 3 . at 0 . should_equal 1.228650 epsilon=0.000001
+            grouped.columns.at 4 . name . should_equal "Standard Deviation Value"
+            grouped.columns.at 4 . at 0 . should_equal 56.708660 epsilon=0.000001
+            grouped.columns.at 5 . name . should_equal "Standard Deviation ValueWithNothing"
+            grouped.columns.at 5 . at 0 . should_equal 58.588610 epsilon=0.000001
+            grouped.columns.at 6 . name . should_equal "Standard Deviation Value_1"
+            grouped.columns.at 6 . at 0 . should_equal 56.697317 epsilon=0.000001
+            grouped.columns.at 7 . name . should_equal "Standard Deviation ValueWithNothing_1"
+            grouped.columns.at 7 . at 0 . should_equal 58.575554 epsilon=0.000001
+
+        Test.specify "should be able to create median, mode and percentile values" <|
+            grouped = table.group_by grouping [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 6
+            grouped.columns.at 0 . name . should_equal "Median Index"
+            grouped.columns.at 0 . at 0 . should_equal 5 epsilon=0.000001
+            grouped.columns.at 1 . name . should_equal "Median Value"
+            grouped.columns.at 1 . at 0 . should_equal 1.298375 epsilon=0.000001
+            grouped.columns.at 2 . name . should_equal "Median ValueWithNothing"
+            grouped.columns.at 2 . at 0 . should_equal 2.235 epsilon=0.000001
+            grouped.columns.at 3 . name . should_equal "Mode Index"
+            grouped.columns.at 3 . at 0 . should_equal 7
+            grouped.columns.at 4 . name . should_equal "25%-ile Value"
+            grouped.columns.at 4 . at 0 . should_equal -49.962710 epsilon=0.000001
+            grouped.columns.at 5 . name . should_equal "40%-ile ValueWithNothing"
+            grouped.columns.at 5 . at 0 . should_equal -17.960000 epsilon=0.000001
+
+        Test.specify "should be able to get first and last values" <|
+            grouped = table.group_by grouping [First "Index", Last "Value"]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 2
+            grouped.columns.at 0 . name . should_equal "First Index"
+            grouped.columns.at 0 . at 0 . should_equal 7
+            grouped.columns.at 1 . name . should_equal "Last Value"
+            grouped.columns.at 1 . at 0 . should_equal 70.99931 epsilon=0.000001
+
+        Test.specify "should be able to get minimum and maximum values" <|
+            grouped = table.group_by grouping [Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 4
+            grouped.columns.at 0 . name . should_equal "Minimum Value"
+            grouped.columns.at 0 . at 0 . should_equal -99.964200 epsilon=0.000001
+            grouped.columns.at 1 . name . should_equal "Maximum Value"
+            grouped.columns.at 1 . at 0 . should_equal 99.977480 epsilon=0.000001
+            grouped.columns.at 2 . name . should_equal "Minimum ValueWithNothing"
+            grouped.columns.at 2 . at 0 . should_equal -99.99 epsilon=0.000001
+            grouped.columns.at 3 . name . should_equal "Maximum ValueWithNothing"
+            grouped.columns.at 3 . at 0 . should_equal 99.95 epsilon=0.000001
+
+        Test.specify "should be able to get shortest, longest and concatenated values" <|
+            grouped = table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 3
+            grouped.columns.at 0 . name . should_equal "Shortest TextWithNothing"
+            grouped.columns.at 0 . at 0 . should_equal "f5"
+            grouped.columns.at 1 . name . should_equal "Longest TextWithNothing"
+            grouped.columns.at 1 . at 0 . should_equal "setp295gjvbanana"
+            grouped.columns.at 2 . name . should_equal "Concatenate Code"
+            grouped.columns.at 2 . at 0 . length . should_equal 7500
+
+    Test.group "Table.group_by should summarize empty table " <|
+        grouping = Column_Selector.By_Index []
+        Test.specify "should be able to count" <|
+            grouped = empty_table.group_by grouping [Count Nothing]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 1
+            grouped.columns.at 0 . name . should_equal "Count"
+            grouped.columns.at 0 . at 0 . should_equal 0
+
+        Test.specify "should be able to count missing values" <|
+            grouped = empty_table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 4
+            grouped.columns.at 0 . name . should_equal "Count Nothing Hexadecimal"
+            grouped.columns.at 0 . at 0 . should_equal 0
+            grouped.columns.at 1 . name . should_equal "Count Not Nothing Hexadecimal"
+            grouped.columns.at 1 . at 0 . should_equal 0
+            grouped.columns.at 2 . name . should_equal "Count Empty TextWithNothing"
+            grouped.columns.at 2 . at 0 . should_equal 0
+            grouped.columns.at 3 . name . should_equal "Count Not Empty TextWithNothing"
+            grouped.columns.at 3 . at 0 . should_equal 0
+
+        Test.specify "should be able to count distinct values" <|
+            grouped = empty_table.group_by grouping [Count_Distinct "Code"]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 1
+            grouped.columns.at 0 . name . should_equal "Count Distinct Code"
+            grouped.columns.at 0 . at 0 . should_equal 0
+
+        Test.specify "should be able to sum, average and standard deviation of values" <|
+            grouped = empty_table.group_by grouping [Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 4
+            grouped.columns.at 0 . name . should_equal "Sum Value"
+            grouped.columns.at 0 . at 0 . should_equal Nothing
+            grouped.columns.at 1 . name . should_equal "Average ValueWithNothing"
+            grouped.columns.at 1 . at 0 . should_equal Nothing
+            grouped.columns.at 2 . name . should_equal "Standard Deviation Value"
+            grouped.columns.at 2 . at 0 . should_equal Nothing
+            grouped.columns.at 3 . name . should_equal "Standard Deviation ValueWithNothing"
+            grouped.columns.at 3 . at 0 . should_equal Nothing
+
+        Test.specify "should be able to create median, mode and percentile values" <|
+            grouped = empty_table.group_by grouping [Median "Index", Mode "Index", Percentile 0.25 "Value"]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 3
+            grouped.columns.at 0 . name . should_equal "Median Index"
+            grouped.columns.at 0 . at 0 . should_equal Nothing
+            grouped.columns.at 1 . name . should_equal "Mode Index"
+            grouped.columns.at 1 . at 0 . should_equal Nothing
+            grouped.columns.at 2 . name . should_equal "25%-ile Value"
+            grouped.columns.at 2 . at 0 . should_equal Nothing
+
+        Test.specify "should be able to get first and last values" <|
+            grouped = empty_table.group_by grouping [First "Index", Last "Value"]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 2
+            grouped.columns.at 0 . name . should_equal "First Index"
+            grouped.columns.at 0 . at 0 . should_equal Nothing
+            grouped.columns.at 1 . name . should_equal "Last Value"
+            grouped.columns.at 1 . at 0 . should_equal Nothing
+
+        Test.specify "should be able to get minimum and maximum values" <|
+            grouped = empty_table.group_by grouping [Minimum "Value", Maximum "ValueWithNothing"]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 2
+            grouped.columns.at 0 . name . should_equal "Minimum Value"
+            grouped.columns.at 0 . at 0 . should_equal Nothing
+            grouped.columns.at 1 . name . should_equal "Maximum ValueWithNothing"
+            grouped.columns.at 1 . at 0 . should_equal Nothing
+
+        Test.specify "should be able to get shortest, longest and concatenated values" <|
+            grouped = empty_table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
+            grouped.row_count . should_equal 1
+            grouped.columns.length . should_equal 3
+            grouped.columns.at 0 . name . should_equal "Shortest TextWithNothing"
+            grouped.columns.at 0 . at 0 . should_equal Nothing
+            grouped.columns.at 1 . name . should_equal "Longest TextWithNothing"
+            grouped.columns.at 1 . at 0 . should_equal Nothing
+            grouped.columns.at 2 . name . should_equal "Concatenate Code"
+            grouped.columns.at 2 . at 0 . should_equal Nothing
+
+    Test.group "Table.group_by should not summarize empty table when grouped " <|
+        grouping = Column_Selector.By_Index [0]
+        Test.specify "should be able to count" <|
+            grouped = empty_table.group_by grouping [Count Nothing]
+            grouped.row_count . should_equal 0
+            grouped.columns.length . should_equal 2
+            grouped.columns.at 0 . name . should_equal "Code"
+            grouped.columns.at 1 . name . should_equal "Count"
+
+        Test.specify "should be able to count missing values" <|
+            grouped = empty_table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
+            grouped.row_count . should_equal 0
+            grouped.columns.length . should_equal 5
+            grouped.columns.at 0 . name . should_equal "Code"
+            grouped.columns.at 1 . name . should_equal "Count Nothing Hexadecimal"
+            grouped.columns.at 2 . name . should_equal "Count Not Nothing Hexadecimal"
+            grouped.columns.at 3 . name . should_equal "Count Empty TextWithNothing"
+            grouped.columns.at 4 . name . should_equal "Count Not Empty TextWithNothing"
+
+        Test.specify "should be able to count distinct values" <|
+            grouped = empty_table.group_by grouping [Count_Distinct "Code"]
+            grouped.row_count . should_equal 0
+            grouped.columns.length . should_equal 2
+            grouped.columns.at 0 . name . should_equal "Code"
+            grouped.columns.at 1 . name . should_equal "Count Distinct Code"
+
+        Test.specify "should be able to sum, average and standard deviation of values" <|
+            grouped = empty_table.group_by grouping [Sum "Value", Average "ValueWithNothing", Standard_Deviation "Value", (Standard_Deviation "ValueWithNothing" population=True)]
+            grouped.row_count . should_equal 0
+            grouped.columns.length . should_equal 5
+            grouped.columns.at 0 . name . should_equal "Code"
+            grouped.columns.at 1 . name . should_equal "Sum Value"
+            grouped.columns.at 2 . name . should_equal "Average ValueWithNothing"
+            grouped.columns.at 3 . name . should_equal "Standard Deviation Value"
+            grouped.columns.at 4 . name . should_equal "Standard Deviation ValueWithNothing"
+
+        Test.specify "should be able to create median values" <|
+            grouped = empty_table.group_by grouping [Median "Index", Mode "Index", Percentile 0.25 "Value"]
+            grouped.row_count . should_equal 0
+            grouped.columns.length . should_equal 4
+            grouped.columns.at 0 . name . should_equal "Code"
+            grouped.columns.at 1 . name . should_equal "Median Index"
+            grouped.columns.at 2 . name . should_equal "Mode Index"
+            grouped.columns.at 3 . name . should_equal "25%-ile Value"
+
+        Test.specify "should be able to get first and last values" <|
+            grouped = empty_table.group_by grouping [First "Index", Last "Value"]
+            grouped.row_count . should_equal 0
+            grouped.columns.length . should_equal 3
+            grouped.columns.at 0 . name . should_equal "Code"
+            grouped.columns.at 1 . name . should_equal "First Index"
+            grouped.columns.at 2 . name . should_equal "Last Value"
+
+        Test.specify "should be able to get minimum and maximum values" <|
+            grouped = empty_table.group_by grouping [Minimum "Value", Maximum "ValueWithNothing"]
+            grouped.row_count . should_equal 0
+            grouped.columns.length . should_equal 3
+            grouped.columns.at 0 . name . should_equal "Code"
+            grouped.columns.at 1 . name . should_equal "Minimum Value"
+            grouped.columns.at 2 . name . should_equal "Maximum ValueWithNothing"
+
+        Test.specify "should be able to get shortest, longest and concatenated values" <|
+            grouped = empty_table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
+            grouped.row_count . should_equal 0
+            grouped.columns.length . should_equal 4
+            grouped.columns.at 0 . name . should_equal "Code"
+            grouped.columns.at 1 . name . should_equal "Shortest TextWithNothing"
+            grouped.columns.at 2 . name . should_equal "Longest TextWithNothing"
+            grouped.columns.at 3 . name . should_equal "Concatenate Code"
+
+    Test.group "Table.group_by should be able to group on single field " <|
+        grouping = Column_Selector.By_name.new ["Index"]
+        Test.specify "should be able to count" <|
+            grouped = table.group_by grouping [Count Nothing]
+            grouped.row_count . should_equal 10
+            grouped.columns.length . should_equal 2
+            grouped.columns.at 0 . name . should_equal "Index"
+            idx = find_row [6] grouped
+            idx.is_nothing . should_be_false
+            grouped.columns.at 1 . name . should_equal "Count"
+            grouped.columns.at 1 . at idx . should_equal 261
+
+        Test.specify "should be able to count missing values" <|
+            grouped = table.group_by grouping [Count_Nothing "Hexadecimal", Count_Not_Nothing "Hexadecimal", Count_Empty "TextWithNothing", Count_Not_Empty "TextWithNothing"]
+            grouped.row_count . should_equal 10
+            grouped.columns.length . should_equal 5
+            grouped.columns.at 0 . name . should_equal "Index"
+            idx = find_row [6] grouped
+            idx.is_nothing . should_be_false
+            grouped.columns.at 1 . name . should_equal "Count Nothing Hexadecimal"
+            grouped.columns.at 1 . at idx . should_equal 24
+            grouped.columns.at 2 . name . should_equal "Count Not Nothing Hexadecimal"
+            grouped.columns.at 2 . at idx . should_equal 237
+            grouped.columns.at 3 . name . should_equal "Count Empty TextWithNothing"
+            grouped.columns.at 3 . at idx . should_equal 31
+            grouped.columns.at 4 . name . should_equal "Count Not Empty TextWithNothing"
+            grouped.columns.at 4 . at idx . should_equal 230
+
+        Test.specify "should be able to count distinct values" <|
+            grouped = table.group_by grouping [Count_Distinct "Code", Count_Distinct "Index", Count_Distinct "Flag", Count_Distinct ["Index", "Flag"]]
+            grouped.row_count . should_equal 10
+            grouped.columns.length . should_equal 5
+            grouped.columns.at 0 . name . should_equal "Index"
+            idx = find_row [6] grouped
+            idx.is_nothing . should_be_false
+            grouped.columns.at 1 . name . should_equal "Count Distinct Code"
+            grouped.columns.at 1 . at idx . should_equal 260
+            grouped.columns.at 2 . name . should_equal "Count Distinct Index"
+            grouped.columns.at 2 . at idx . should_equal 1
+            grouped.columns.at 3 . name . should_equal "Count Distinct Flag"
+            grouped.columns.at 3 . at idx . should_equal 2
+            grouped.columns.at 4 . name . should_equal "Count Distinct Index Flag"
+            grouped.columns.at 4 . at idx . should_equal 2
+
+        Test.specify "should be able to sum, average and standard deviation of values" <|
+            grouped = table.group_by grouping [Sum "Value", Sum "ValueWithNothing", Average "Value", Average "ValueWithNothing", Standard_Deviation "Value", Standard_Deviation "ValueWithNothing", (Standard_Deviation "Value" population=True), (Standard_Deviation "ValueWithNothing" population=True)]
+            grouped.row_count . should_equal 10
+            grouped.columns.length . should_equal 9
+            grouped.columns.at 0 . name . should_equal "Index"
+            idx = find_row [6] grouped
+            idx.is_nothing . should_be_false
+            grouped.columns.at 1 . name . should_equal "Sum Value"
+            grouped.columns.at 1 . at idx . should_equal -447.847390 epsilon=0.000001
+            grouped.columns.at 2 . name . should_equal "Sum ValueWithNothing"
+            grouped.columns.at 2 . at idx . should_equal 151.86 epsilon=0.000001
+            grouped.columns.at 3 . name . should_equal "Average Value"
+            grouped.columns.at 3 . at idx . should_equal -1.715890 epsilon=0.000001
+            grouped.columns.at 4 . name . should_equal "Average ValueWithNothing"
+            grouped.columns.at 4 . at idx . should_equal 0.646213 epsilon=0.000001
+            grouped.columns.at 5 . name . should_equal "Standard Deviation Value"
+            grouped.columns.at 5 . at idx . should_equal 60.272158 epsilon=0.000001
+            grouped.columns.at 6 . name . should_equal "Standard Deviation ValueWithNothing"
+            grouped.columns.at 6 . at idx . should_equal 56.798691 epsilon=0.000001
+            grouped.columns.at 7 . name . should_equal "Standard Deviation Value_1"
+            grouped.columns.at 7 . at idx . should_equal 60.156583 epsilon=0.000001
+            grouped.columns.at 8 . name . should_equal "Standard Deviation ValueWithNothing_1"
+            grouped.columns.at 8 . at idx . should_equal 56.677714 epsilon=0.000001
+
+        Test.specify "should be able to create median values" <|
+            grouped = table.group_by grouping [Median "Index", Median "Value", Median "ValueWithNothing", Mode "Index", Percentile 0.25 "Value", Percentile 0.40 "ValueWithNothing"]
+            grouped.row_count . should_equal 10
+            grouped.columns.length . should_equal 7
+            grouped.columns.at 0 . name . should_equal "Index"
+            idx = find_row [6] grouped
+            idx.is_nothing . should_be_false
+            grouped.columns.at 1 . name . should_equal "Median Index"
+            grouped.columns.at 1 . at idx . should_equal 6 epsilon=0.000001
+            grouped.columns.at 2 . name . should_equal "Median Value"
+            grouped.columns.at 2 . at idx . should_equal 2.041150 epsilon=0.000001
+            grouped.columns.at 3 . name . should_equal "Median ValueWithNothing"
+            grouped.columns.at 3 . at idx . should_equal 1.38 epsilon=0.000001
+            grouped.columns.at 4 . name . should_equal "Mode Index"
+            grouped.columns.at 4 . at idx . should_equal 6
+            grouped.columns.at 5 . name . should_equal "25%-ile Value"
+            grouped.columns.at 5 . at idx . should_equal -56.019100 epsilon=0.000001
+            grouped.columns.at 6 . name . should_equal "40%-ile ValueWithNothing"
+            grouped.columns.at 6 . at idx . should_equal -18.802000 epsilon=0.000001
+
+        Test.specify "should be able to get first and last values" <|
+            grouped = table.group_by grouping [First "TextWithNothing", Last "Value"]
+            grouped.row_count . should_equal 10
+            grouped.columns.length . should_equal 3
+            grouped.columns.at 0 . name . should_equal "Index"
+            idx = find_row [6] grouped
+            idx.is_nothing . should_be_false
+            grouped.columns.at 1 . name . should_equal "First TextWithNothing"
+            grouped.columns.at 1 . at idx . should_equal "kmqxqkl6qx"
+            grouped.columns.at 2 . name . should_equal "Last Value"
+            grouped.columns.at 2 . at idx . should_equal 56.15916 epsilon=0.000001
+
+        Test.specify "should be able to get minimum and maximum values" <|
+            grouped = table.group_by grouping [Minimum "Value", Maximum "Value", Minimum "ValueWithNothing", Maximum "ValueWithNothing"]
+            grouped.row_count . should_equal 10
+            grouped.columns.length . should_equal 5
+            grouped.columns.at 0 . name . should_equal "Index"
+            idx = find_row [6] grouped
+            idx.is_nothing . should_be_false
+            grouped.columns.at 1 . name . should_equal "Minimum Value"
+            grouped.columns.at 1 . at idx . should_equal -99.605880 epsilon=0.000001
+            grouped.columns.at 2 . name . should_equal "Maximum Value"
+            grouped.columns.at 2 . at idx . should_equal 99.12739 epsilon=0.000001
+            grouped.columns.at 3 . name . should_equal "Minimum ValueWithNothing"
+            grouped.columns.at 3 . at idx . should_equal -99.99 epsilon=0.000001
+            grouped.columns.at 4 . name . should_equal "Maximum ValueWithNothing"
+            grouped.columns.at 4 . at idx . should_equal 99.79 epsilon=0.000001
+
+        Test.specify "should be able to get shortest, longest and concatenated values" <|
+            grouped = table.group_by grouping [Shortest "TextWithNothing", Longest "TextWithNothing", Concatenate "Code"]
+            grouped.row_count . should_equal 10
+            grouped.columns.length . should_equal 4
+            grouped.columns.at 0 . name . should_equal "Index"
+            idx = find_row [6] grouped
+            idx.is_nothing . should_be_false
+            grouped.columns.at 1 . name . should_equal "Shortest TextWithNothing"
+            grouped.columns.at 1 . at idx . should_equal "kmqxqkl6qx"
+            grouped.columns.at 2 . name . should_equal "Longest TextWithNothing"
+            grouped.columns.at 2 . at idx . should_equal "kmqxqkl6qx"
+            grouped.columns.at 3 . name . should_equal "Concatenate Code"
+            grouped.columns.at 3 . at idx . length . should_equal 783
+
+main = Test.Suite.run_main here.spec
--- a/test/Tests/src/Data/Map_Spec.enso
+++ b/test/Tests/src/Data/Map_Spec.enso
@ -91,4 +91,21 @@ spec = Test.group "Maps" <|
    Test.specify "should allow getting a vector of the values" <|
        m = Map.empty . insert 1 2 . insert 2 4
        m.values . should_equal [2, 4]
+    Test.specify "should be able to get the first key value pair" <|
+        m = Map.empty . insert 1 2 . insert 2 4
+        pair = m.first
+        pair.first . should_equal 1
+        pair.second . should_equal 2
+    Test.specify "should be able to get the first key value pair of an empty map" <|
+        m = Map.empty
+        m.first . should_equal Nothing
+    Test.specify "should be able to get the last key value pair" <|
+        m = Map.empty . insert 1 2 . insert 2 4
+        pair = m.last
+        pair.first . should_equal 2
+        pair.second . should_equal 4
+    Test.specify "should be able to get the last key value pair of an empty map" <|
+        m = Map.empty
+        m.last . should_equal Nothing

+main = Test.Suite.run_main here.spec