Add table running functionality for Sum, Mean, Min, Max. (#9577)

* Add Table.Running * Code Review fixes * Code Review changes * Change null handling
2024-11-22 22:10:15 +03:00 · 2024-04-23 09:45:43 +01:00 · 2024-04-23 09:45:43 +01:00 · 4a97bfa31f
commit 4a97bfa31f
parent d665f4d9c2
10 changed files with 665 additions and 15 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -654,6 +654,7 @@
 - [Added `Decimal.floor`, `.ceil`, and `.trunc`.][9694]
 - [Added `recursive` option to `File.delete`.][9719]
 - [Added `Vector.build`.][9725]
+- [Added `Table.running` method][9577]

 [debug-shortcuts]:
  https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -958,6 +959,7 @@
 [9716]: https://github.com/enso-org/enso/pull/9716
 [9719]: https://github.com/enso-org/enso/pull/9719
 [9725]: https://github.com/enso-org/enso/pull/9725
+[9577]: https://github.com/enso-org/enso/pull/9577

 #### Enso Compiler

--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Statistics.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Statistics.enso
@ -23,6 +23,7 @@ polyglot java import java.lang.NullPointerException
 polyglot java import org.enso.base.CompareException
 polyglot java import org.enso.base.statistics.CorrelationStatistics
 polyglot java import org.enso.base.statistics.Rank
+polyglot java import org.enso.base.statistics.Statistic as Java_Statistic

 ## Specifies how to handle ranking of equal values.
 type Rank_Method
@ -137,6 +138,22 @@ type Statistic
        Statistic.Kurtosis -> 4
        _ -> Nothing

+    ## PRIVATE
+    to_java self = case self of
+        Statistic.Count -> Java_Statistic.Count
+        Statistic.Minimum -> Java_Statistic.Minimum
+        Statistic.Maximum -> Java_Statistic.Maximum
+        Statistic.Sum -> Java_Statistic.Sum
+        Statistic.Mean -> Java_Statistic.Mean
+        Statistic.Variance _ -> Java_Statistic.Variance
+        Statistic.Standard_Deviation _ -> Java_Statistic.StandardDeviation
+        Statistic.Skew _ -> Java_Statistic.Skew
+        Statistic.Kurtosis -> Java_Statistic.Kurtosis
+        Statistic.Covariance _ -> Java_Statistic.Covariance
+        Statistic.Pearson _ -> Java_Statistic.Pearson
+        Statistic.Spearman _ -> Java_Statistic.Spearman
+        Statistic.R_Squared _ -> Java_Statistic.R_Squared
+
    ## PRIVATE
       Compute a single statistic on a vector like object.

@ -191,14 +208,7 @@ type Statistic
       - statistics: Set of statistics to calculate.
    running_bulk : Vector -> Vector Statistic -> Vector Any
    running_bulk data statistics=[Statistic.Count, Statistic.Sum] =
-        is_unsupported s = case s of
-            Statistic.Covariance _ -> True
-            Statistic.Pearson _ -> True
-            Statistic.Spearman _ -> True
-            Statistic.R_Squared _ -> True
-            _ -> False
-
-        if statistics.any is_unsupported then Error.throw (Illegal_Argument.Error ("Unsupported Statistics ( " + (statistics.filter is_unsupported . to_text) ") for running calculations.")) else
+        check_running_support statistics <|
            moment_order = statistics.map on_problems=No_Wrap .order
            has_min_max = statistics.any (s-> s == Statistic.Minimum || s == Statistic.Maximum)
            max_moment_order = moment_order.filter (v-> v != Nothing) . fold 0 .max
@ -270,6 +280,21 @@ type Statistic
    rank_data input method=Rank_Method.Average =
        method.compute input

+## PRIVATE
+   Check if the statistics are supported for running calculations.
+check_running_support : Vector Statistic -> Any -> Any
+check_running_support statistics ~action =
+    is_unsupported s = case s of
+        Statistic.Covariance _ -> True
+        Statistic.Pearson _ -> True
+        Statistic.Spearman _ -> True
+        Statistic.R_Squared _ -> True
+        _ -> False
+
+    if statistics.any is_unsupported then Error.throw (Illegal_Argument.Error ("Unsupported Statistics ( " + (statistics.filter is_unsupported . to_text) + ") for running calculations.")) else
+        action
+
+
 ## PRIVATE
 wrap_java_call : Any -> Any
 wrap_java_call ~function =
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Add_Running.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Add_Running.enso
@ -0,0 +1,44 @@
+from Standard.Base import all
+import Standard.Base.Errors.Common.Unsupported_Argument_Types
+import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
+
+import project.Column.Column
+import project.Set_Mode.Set_Mode
+import project.Sort_Column.Sort_Column
+import project.Table.Table
+import project.Internal.Add_Row_Number
+import project.Internal.Java_Problems
+import project.Internal.Problem_Builder.Problem_Builder
+import project.Internal.Table_Helpers
+from project.Errors import Duplicate_Output_Column_Names
+import project.Value_Type.Value_Type
+
+polyglot java import java.lang.ArithmeticException
+polyglot java import org.enso.table.data.column.storage.numeric.LongRangeStorage
+polyglot java import org.enso.table.operations.AddRunning
+
+## PRIVATE
+add_running : Statistic -> (Text | Integer) -> Text -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
+add_running table (statistic:Statistic=Statistic.Count) (of:Text|Integer=0) (as:Text='') (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=Problem_Behavior.Report_Warning) =
+    check_running_support [statistic] <|
+        of_col = table.at of
+        new_name = if as.is_empty then 'Running ' + statistic.to_text + ' of ' + of_col.name else as   
+        case statistic of
+            Statistic.Count ->
+                Add_Row_Number.add_row_number table new_name 1 1 group_by order_by on_problems
+            _ ->
+                Value_Type.expect_numeric of_col <|
+                    problem_builder = Problem_Builder.new error_on_missing_columns=True
+                    grouping_columns = table.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder 
+                    ordering = Table_Helpers.resolve_order_by table.columns order_by problem_builder
+                    source_java_column = of_col.java_column
+                    grouping_java_columns = grouping_columns.map .java_column
+                    ordering_java_columns = ordering.map c->
+                        c.column.java_column
+                    directions = ordering.map c->
+                        c.associated_selector.direction.to_sign
+
+                    Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
+                        new_storage = AddRunning.create_running statistic.to_java source_java_column grouping_java_columns ordering_java_columns directions java_problem_aggregator
+                        new_column = Column.from_storage new_name new_storage
+                        table.set new_column new_name set_mode=Set_Mode.Add
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Table.enso
@ -31,6 +31,7 @@ import project.Expression.Expression
 import project.Expression.Expression_Error
 import project.Extensions.Table_Conversions
 import project.Internal.Add_Row_Number
+import project.Internal.Add_Running
 import project.Internal.Aggregate_Column_Helper
 import project.Internal.Column_Naming_Helper.Column_Naming_Helper
 import project.Internal.Constant_Column.Constant_Column
@ -2867,8 +2868,7 @@ type Table
        transformer col = col.text_replace resolved_term resolved_new_text case_sensitivity only_first
        Table_Helpers.replace_columns_with_transformed_columns self columns transformer

-    ## PRIVATE
-       ALIAS cumulative
+    ## ALIAS cumulative
       GROUP Standard.Base.Values
       ICON dataframe_map_column
       Adds a new column to the table with a running calculation.
@ -2905,10 +2905,7 @@ type Table
    @of Widget_Helpers.make_column_name_selector
    running : Statistic -> (Text | Integer) -> Text -> Vector (Text | Integer | Regex) | Text | Integer | Regex -> Vector (Text | Sort_Column) | Text -> Problem_Behavior -> Table
    running self (statistic:Statistic=Statistic.Count) (of:(Text | Integer)=0) (as:Text='') (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=Problem_Behavior.Report_Warning) =
-        if statistic != Statistic.Count then Error.throw (Illegal_Argument.Error ("Currently only Statistic.Count is supported in Table.running.")) else
-          of_col = self.at of
-          new_name = if as == '' then 'Running ' + statistic.to_text + ' of ' + of_col.name else as
-          Add_Row_Number.add_row_number self new_name 1 1 group_by order_by on_problems
+        Add_Running.add_running self statistic of as group_by order_by on_problems

    ## PRIVATE
    column_naming_helper : Column_Naming_Helper
--- a/std-bits/base/src/main/java/org/enso/base/statistics/Statistic.java
+++ b/std-bits/base/src/main/java/org/enso/base/statistics/Statistic.java
@ -0,0 +1,17 @@
+package org.enso.base.statistics;
+
+public enum Statistic {
+  Count,
+  Minimum,
+  Maximum,
+  Sum,
+  Mean,
+  Variance,
+  Standard_Deviation,
+  Skew,
+  Kurtosis,
+  Covariance,
+  Pearson,
+  Spearman,
+  R_Squared
+}
--- a/std-bits/table/src/main/java/org/enso/table/operations/AddRunning.java
+++ b/std-bits/table/src/main/java/org/enso/table/operations/AddRunning.java
@ -0,0 +1,134 @@
+package org.enso.table.operations;
+
+import org.enso.base.statistics.Statistic;
+import org.enso.table.data.column.storage.Storage;
+import org.enso.table.data.column.storage.numeric.DoubleStorage;
+import org.enso.table.data.table.Column;
+import org.enso.table.problems.ProblemAggregator;
+
+public class AddRunning {
+
+  public static Storage<Double> create_running(
+      Statistic statistic,
+      Column sourceColumn,
+      Column[] groupingColumns,
+      Column[] orderingColumns,
+      int[] directions,
+      ProblemAggregator problemAggregator) {
+    if (orderingColumns.length != directions.length) {
+      throw new IllegalArgumentException(
+          "The number of ordering columns and directions must be the same.");
+    }
+    var runningGenerator =
+        RunningGenerator.createGenerator(
+            sourceColumn, groupingColumns, orderingColumns, directions, problemAggregator);
+    runningGenerator.generate(new RunningIteratorFactoryImpl(statistic));
+    var ret =
+        new DoubleStorage(
+            runningGenerator.result, sourceColumn.getSize(), runningGenerator.isNothing);
+    return ret;
+  }
+
+  private static class RunningIteratorFactoryImpl implements RunningIteratorFactory {
+
+    Statistic statistic;
+
+    RunningIteratorFactoryImpl(Statistic statistic) {
+      this.statistic = statistic;
+    }
+
+    @Override
+    public RunningIterator getIterator() {
+      switch (statistic) {
+        case Sum -> {
+          return new RunningSumIterator();
+        }
+        case Mean -> {
+          return new RunningMeanIterator();
+        }
+        case Minimum -> {
+          return new RunningMinIterator();
+        }
+        case Maximum -> {
+          return new RunningMaxIterator();
+        }
+        default -> throw new IllegalArgumentException("Unsupported statistic: " + statistic);
+      }
+    }
+  }
+
+  private abstract static class RunningIteratorBase implements RunningIterator {
+
+    protected double current;
+    private boolean isInitialized = false;
+
+    @Override
+    public Double next(Double value) {
+      if (value != null) {
+        if (!isInitialized) {
+          isInitialized = true;
+          initialize(value);
+        } else {
+          increment(value);
+        }
+      }
+      return !isInitialized ? null : getCurrent();
+    }
+
+    public void initialize(double value) {
+      current = value;
+    }
+
+    public abstract void increment(double value);
+
+    public double getCurrent() {
+      return current;
+    }
+  }
+
+  private static class RunningSumIterator extends RunningIteratorBase {
+
+    @Override
+    public void increment(double value) {
+      current += value;
+    }
+  }
+
+  private static class RunningMeanIterator extends RunningIteratorBase {
+
+    private int currentCount;
+
+    @Override
+    public void increment(double value) {
+      current += value;
+      currentCount++;
+    }
+
+    @Override
+    public void initialize(double value) {
+      current = value;
+      currentCount = 1;
+    }
+
+    @Override
+    public double getCurrent() {
+      return current / currentCount;
+    }
+  }
+
+  private static class RunningMinIterator extends RunningIteratorBase {
+
+    @Override
+    public void increment(double value) {
+      current = Math.min(current, value);
+    }
+  }
+
+  private static class RunningMaxIterator extends RunningIteratorBase {
+
+    @Override
+    public void increment(double value) {
+      current = Math.max(current, value);
+    }
+  }
+}
--- a/std-bits/table/src/main/java/org/enso/table/operations/RunningGenerator.java
+++ b/std-bits/table/src/main/java/org/enso/table/operations/RunningGenerator.java
@ -0,0 +1,194 @@
+package org.enso.table.operations;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.IntStream;
+import org.enso.base.polyglot.NumericConverter;
+import org.enso.base.text.TextFoldingStrategy;
+import org.enso.table.data.column.storage.Storage;
+import org.enso.table.data.index.MultiValueIndex;
+import org.enso.table.data.index.OrderedMultiValueKey;
+import org.enso.table.data.index.UnorderedMultiValueKey;
+import org.enso.table.data.table.Column;
+import org.enso.table.problems.ColumnAggregatedProblemAggregator;
+import org.enso.table.problems.ProblemAggregator;
+import org.enso.table.util.ConstantList;
+
+abstract class RunningGenerator {
+
+  Storage<?> sourceStorage;
+  long[] result;
+  BitSet isNothing;
+
+  RunningGenerator(Column sourceColumn) {
+    this.sourceStorage = sourceColumn.getStorage();
+    result = new long[sourceColumn.getSize()];
+    isNothing = new BitSet();
+  }
+
+  void calculateNextValue(int i, RunningIterator it) {
+    Object value = sourceStorage.getItemBoxed(i);
+    Double dValue = NumericConverter.tryConvertingToDouble(value);
+    Double dNextValue = it.next(dValue);
+    if (dNextValue == null) {
+      isNothing.set(i);
+    } else {
+      result[i] = Double.doubleToRawLongBits(dNextValue);
+    }
+  }
+
+  // implement this method in subclasses to control the order you want to iterate over the data
+  public abstract void generate(RunningIteratorFactory factory);
+
+  public static RunningGenerator createGenerator(
+      Column sourceColumn,
+      Column[] groupingColumns,
+      Column[] orderingColumns,
+      int[] directions,
+      ProblemAggregator problemAggregator) {
+    RunningGenerator runningGenerator;
+    if (groupingColumns.length > 0 && orderingColumns.length > 0) {
+      runningGenerator =
+          new GroupingOrderingRunning(
+              sourceColumn, groupingColumns, orderingColumns, directions, problemAggregator);
+    } else if (groupingColumns.length > 0) {
+      runningGenerator =
+          new GroupingNoOrderingRunning(sourceColumn, groupingColumns, problemAggregator);
+    } else if (orderingColumns.length > 0) {
+      runningGenerator = new NoGroupingOrderingRunning(sourceColumn, orderingColumns, directions);
+    } else {
+      runningGenerator = new NoGroupingNoOrderingRunning(sourceColumn);
+    }
+    return runningGenerator;
+  }
+}
+
+class NoGroupingNoOrderingRunning extends RunningGenerator {
+
+  NoGroupingNoOrderingRunning(Column sourceColumn) {
+    super(sourceColumn);
+  }
+
+  @Override
+  public void generate(RunningIteratorFactory factory) {
+    var it = factory.getIterator();
+    for (int i = 0; i < result.length; i++) {
+      calculateNextValue(i, it);
+    }
+  }
+}
+
+class GroupingNoOrderingRunning extends RunningGenerator {
+
+  private final Column[] groupingColumns;
+  private final Storage<?>[] groupingStorages;
+  private final ColumnAggregatedProblemAggregator groupingProblemAggregator;
+  private final List<TextFoldingStrategy> textFoldingStrategy;
+  private final Map<UnorderedMultiValueKey, RunningIterator> groups;
+
+  public GroupingNoOrderingRunning(
+      Column sourceColumn, Column[] groupingColumns, ProblemAggregator problemAggregator) {
+    super(sourceColumn);
+    this.groupingColumns = groupingColumns;
+    groupingStorages =
+        Arrays.stream(groupingColumns).map(Column::getStorage).toArray(Storage[]::new);
+    groupingProblemAggregator = new ColumnAggregatedProblemAggregator(problemAggregator);
+    textFoldingStrategy =
+        ConstantList.make(TextFoldingStrategy.unicodeNormalizedFold, groupingStorages.length);
+    groups = new HashMap<>();
+  }
+
+  @Override
+  public void generate(RunningIteratorFactory factory) {
+    for (int i = 0; i < result.length; i++) {
+      var key = new UnorderedMultiValueKey(groupingStorages, i, textFoldingStrategy);
+      key.checkAndReportFloatingEquality(
+          groupingProblemAggregator, columnIx -> groupingColumns[columnIx].getName());
+      RunningIterator it = groups.computeIfAbsent(key, k -> factory.getIterator());
+      calculateNextValue(i, it);
+    }
+  }
+}
+
+class NoGroupingOrderingRunning extends RunningGenerator {
+
+  private final Storage<?>[] orderingStorages;
+  private final List<OrderedMultiValueKey> keys;
+
+  public NoGroupingOrderingRunning(
+      Column sourceColumn, Column[] orderingColumns, int[] directions) {
+    super(sourceColumn);
+    int n = orderingColumns[0].getSize();
+    orderingStorages =
+        Arrays.stream(orderingColumns).map(Column::getStorage).toArray(Storage[]::new);
+    keys =
+        new ArrayList<>(
+            IntStream.range(0, n)
+                .mapToObj(i -> new OrderedMultiValueKey(orderingStorages, i, directions))
+                .toList());
+    keys.sort(null);
+  }
+
+  @Override
+  public void generate(RunningIteratorFactory factory) {
+    var it = factory.getIterator();
+    for (var key : keys) {
+      var i = key.getRowIndex();
+      calculateNextValue(i, it);
+    }
+  }
+}
+
+class GroupingOrderingRunning extends RunningGenerator {
+
+  private final Column[] groupingColumns;
+  private final Column[] orderingColumns;
+  private final int[] directions;
+  private final Storage<?>[] groupingStorages;
+  private final Storage<?>[] orderingStorages;
+  private final ProblemAggregator problemAggregator;
+
+  public GroupingOrderingRunning(
+      Column sourceColumn,
+      Column[] groupingColumns,
+      Column[] orderingColumns,
+      int[] directions,
+      ProblemAggregator problemAggregator) {
+    super(sourceColumn);
+    this.groupingColumns = groupingColumns;
+    this.orderingColumns = orderingColumns;
+    this.directions = directions;
+    groupingStorages =
+        Arrays.stream(groupingColumns).map(Column::getStorage).toArray(Storage[]::new);
+    ConstantList.make(TextFoldingStrategy.unicodeNormalizedFold, groupingStorages.length);
+    orderingStorages =
+        Arrays.stream(orderingColumns).map(Column::getStorage).toArray(Storage[]::new);
+    this.problemAggregator = problemAggregator;
+  }
+
+  @Override
+  public void generate(RunningIteratorFactory factory) {
+    int n = orderingColumns[0].getSize();
+    var groupIndex =
+        MultiValueIndex.makeUnorderedIndex(
+            groupingColumns, n, TextFoldingStrategy.unicodeNormalizedFold, problemAggregator);
+    for (var entry : groupIndex.mapping().entrySet()) {
+      List<Integer> indices = entry.getValue();
+      List<OrderedMultiValueKey> orderingKeys =
+          new ArrayList<>(
+              indices.stream()
+                  .map(i -> new OrderedMultiValueKey(orderingStorages, i, directions))
+                  .toList());
+      orderingKeys.sort(null);
+      RunningIterator it = factory.getIterator();
+      for (OrderedMultiValueKey key : orderingKeys) {
+        var i = key.getRowIndex();
+        calculateNextValue(i, it);
+      }
+    }
+  }
+}
--- a/std-bits/table/src/main/java/org/enso/table/operations/RunningIterator.java
+++ b/std-bits/table/src/main/java/org/enso/table/operations/RunningIterator.java
@ -0,0 +1,6 @@
+package org.enso.table.operations;
+
+public interface RunningIterator {
+
+  Double next(Double value);
+}
--- a/std-bits/table/src/main/java/org/enso/table/operations/RunningIteratorFactory.java
+++ b/std-bits/table/src/main/java/org/enso/table/operations/RunningIteratorFactory.java
@ -0,0 +1,6 @@
+package org.enso.table.operations;
+
+public interface RunningIteratorFactory {
+
+  RunningIterator getIterator();
+}
--- a/test/Table_Tests/src/In_Memory/Table_Running_Spec.enso
+++ b/test/Table_Tests/src/In_Memory/Table_Running_Spec.enso
@ -3,6 +3,7 @@ from Standard.Table import Column, Table
 from Standard.Test import all
 from Standard.Table.Errors import all
 import Standard.Base.Errors.Common.Type_Error
+import Standard.Base.Errors.Illegal_Argument.Illegal_Argument

 from project.Util import all

@ -26,7 +27,7 @@ type Data
        Data.Value make_table

 add_specs suite_builder =
-    suite_builder.group "running" group_builder->
+    suite_builder.group "running count" group_builder->
        data = Data.setup
        group_builder.specify "Defaults add running count of first column" <|
            result = data.table.running
@ -88,6 +89,230 @@ add_specs suite_builder =
            # 4 | SG0456 | E         | 73.77        | 1
            expected_table = data.table.zip expected_column
            result.should_equal expected_table
+        group_builder.specify "Can provide running count based on order by without grouping" <|
+            result = data.table.running Statistic.Count "Passenger" "Ranked ticket cost" [] ["Ticket Price"]
+            expected_column = Column.from_vector "Ranked ticket cost" [3, 5, 1, 4, 2]
+            #   | Flight | Passenger | Ticket Price | Ranked ticket cost
+            #---+--------+-----------+--------------+-------------------------
+            # 0 | BA0123 | A         | 100.5        | 3
+            # 1 | BA0123 | B         | 575.99       | 5
+            # 2 | SG0456 | A         | 73.23        | 1
+            # 3 | BA0123 | C         | 112.34       | 4
+            # 4 | SG0456 | E         | 73.77        | 2
+            expected_table = data.table.zip expected_column
+            result.should_equal expected_table
+    suite_builder.group "running sum" group_builder->
+        data = Data.setup
+        group_builder.specify "Not setting the as name gives default name based on of column" <|
+            result = data.table.running Statistic.Sum "Ticket Price"
+            expected_column = Column.from_vector "Running Sum of Ticket Price" [100.5, 676.49, 749.72, 862.0600000000001, 935.83]
+            #   | Flight | Passenger | Ticket Price | Running Sum of Ticket Price
+            #---+--------+-----------+--------------+-------------------------
+            # 0 | BA0123 | A         | 100.5        | 100.5
+            # 1 | BA0123 | B         | 575.99       | 676.49
+            # 2 | SG0456 | A         | 73.23        | 749.72
+            # 3 | BA0123 | C         | 112.34       | 862.06
+            # 4 | SG0456 | E         | 73.77        | 935.83
+            expected_table = data.table.zip expected_column
+            result.should_equal expected_table
+        group_builder.specify "Can group by and provide running sum per group" <|
+            result = data.table.running Statistic.Sum "Ticket Price" "Running" ["Flight"]
+            expected_column = Column.from_vector "Running" [100.5, 676.49, 73.23, 788.83, 147]
+            #   | Flight | Passenger | Ticket Price | Running
+            #---+--------+-----------+--------------+-------------------------
+            # 0 | BA0123 | A         | 100.5        | 100.5
+            # 1 | BA0123 | B         | 575.99       | 676.49
+            # 2 | SG0456 | A         | 73.23        | 73.23
+            # 3 | BA0123 | C         | 112.34       | 788.83
+            # 4 | SG0456 | E         | 73.77        | 147
+            expected_table = data.table.zip expected_column
+            result.should_equal expected_table
+        group_builder.specify "Can group by and provide running sum per group based on order by" <|
+            result = data.table.running Statistic.Sum "Ticket Price" "Sum ticket cost per pass" ["Passenger"] ["Ticket Price"]
+            expected_column = Column.from_vector "Sum ticket cost per pass" [173.73000000000002, 575.99, 73.23, 112.34, 73.77]
+            #   | Flight | Passenger | Ticket Price | Sum ticket cost per pass
+            #---+--------+-----------+--------------+-------------------------
+            # 0 | BA0123 | A         | 100.5        | 173.73
+            # 1 | BA0123 | B         | 575.99       | 575.99
+            # 2 | SG0456 | A         | 73.23        | 73.23
+            # 3 | BA0123 | C         | 112.34       | 112.34
+            # 4 | SG0456 | E         | 73.77        | 73.77
+            expected_table = data.table.zip expected_column
+            result.should_equal expected_table
+        group_builder.specify "Can provide running sum based on order by without grouping" <|
+            result = data.table.running Statistic.Sum "Ticket Price" "Sum ticket cost" [] ["Ticket Price"]
+            expected_column = Column.from_vector "Sum ticket cost" [247.5, 935.83, 73.23, 359.84000000000003, 147]
+            #   | Flight | Passenger | Ticket Price | Ranked ticket cost
+            #---+--------+-----------+--------------+-------------------------
+            # 0 | BA0123 | A         | 100.5        | 3
+            # 1 | BA0123 | B         | 575.99       | 5
+            # 2 | SG0456 | A         | 73.23        | 1
+            # 3 | BA0123 | C         | 112.34       | 4
+            # 4 | SG0456 | E         | 73.77        | 2
+            expected_table = data.table.zip expected_column
+            result.should_equal expected_table
+    suite_builder.group "running mean" group_builder->
+        data = Data.setup
+        group_builder.specify "Not setting the as name gives default name based on of column" <|
+            result = data.table.running Statistic.Mean "Ticket Price"
+            expected_column = Column.from_vector "Running Mean of Ticket Price" [100.5, 338.245, 249.90666666666667, 215.51500000000001, 187.166]
+            #   | Flight | Passenger | Ticket Price | Running Mean of Ticket Price
+            #---+--------+-----------+--------------+-------------------------
+            # 0 | BA0123 | A         | 100.5        | 100.5
+            # 1 | BA0123 | B         | 575.99       | 338.245
+            # 2 | SG0456 | A         | 73.23        | 249.90666666666667
+            # 3 | BA0123 | C         | 112.34       | 215.51500000000001
+            # 4 | SG0456 | E         | 73.77        | 187.166
+            expected_table = data.table.zip expected_column
+            result.should_equal expected_table
+    suite_builder.group "running max" group_builder->
+        data = Data.setup
+        group_builder.specify "Not setting the as name gives default name based on of column" <|
+            result = data.table.running Statistic.Maximum "Ticket Price"
+            expected_column = Column.from_vector "Running Maximum of Ticket Price" [100.5, 575.99, 575.99, 575.99, 575.99]
+            #   | Flight | Passenger | Ticket Price | Running Maximum of Ticket Price
+            #---+--------+-----------+--------------+-------------------------
+            # 0 | BA0123 | A         | 100.5        | 100.5
+            # 1 | BA0123 | B         | 575.99       | 575.99
+            # 2 | SG0456 | A         | 73.23        | 575.99
+            # 3 | BA0123 | C         | 112.34       | 575.99
+            # 4 | SG0456 | E         | 73.77        | 575.99
+            expected_table = data.table.zip expected_column
+            result.should_equal expected_table
+    suite_builder.group "running min" group_builder->
+        data = Data.setup
+        group_builder.specify "Not setting the as name gives default name based on of column" <|
+            result = data.table.running Statistic.Minimum "Ticket Price"
+            expected_column = Column.from_vector "Running Minimum of Ticket Price" [100.5, 100.5, 73.23, 73.23, 73.23]
+            #   | Flight | Passenger | Ticket Price | Running Minimum of Ticket Price
+            #---+--------+-----------+--------------+-------------------------
+            # 0 | BA0123 | A         | 100.5        | 100.5
+            # 1 | BA0123 | B         | 575.99       | 100.5
+            # 2 | SG0456 | A         | 73.23        | 73.23
+            # 3 | BA0123 | C         | 112.34       | 73.23
+            # 4 | SG0456 | E         | 73.77        | 73.23
+            expected_table = data.table.zip expected_column
+            result.should_equal expected_table
+    suite_builder.group "nothing handling" group_builder->
+        #   | Flight | Passenger | Ticket Price
+        #---+--------+-----------+--------------
+        # 0 | BA0123 | A         | 100.5
+        # 1 | BA0123 | B         | 575.99
+        # 2 | SG0456 | A         | nothing
+        # 3 | BA0123 | C         | nothing
+        # 4 | SG0456 | E         | 73.77
+        flight = ["Flight", ["BA0123", "BA0123", "SG0456", "BA0123", "SG0456"]]
+        passenger = ["Passenger", ["A", "B", "A", "C", "E"]]
+        ticket_price = ["Ticket Price", [100.50, 575.99, Nothing, Nothing, 73.77]]
+        table = Table.new [flight, passenger, ticket_price]
+        group_builder.specify "Running count doesn't care about nothing values" <|
+            result = table.running Statistic.Count "Passenger"
+            expected_column = Column.from_vector "Running Count of Passenger" [1, 2, 3, 4, 5]
+            #   | Flight | Passenger | Ticket Price | Running Count of Passenger
+            #---+--------+-----------+--------------+-------------------------
+            # 0 | BA0123 | A         | 100.5        | 1
+            # 1 | BA0123 | B         | 575.99       | 2
+            # 2 | SG0456 | A         | nothing      | 3
+            # 3 | BA0123 | C         | nothing      | 4
+            # 4 | SG0456 | E         | 73.77        | 5
+            expected_table = table.zip expected_column
+            result.should_equal expected_table
+        group_builder.specify "Running sum works ignores nothing values" <|
+            result = table.running Statistic.Sum "Ticket Price"
+            expected_column = Column.from_vector "Running Sum of Ticket Price" [100.5, 676.49, 676.49, 676.49, 750.26]
+            #   | Flight | Passenger | Ticket Price | Running Sum of Ticket Price
+            #---+--------+-----------+--------------+-------------------------
+            # 0 | BA0123 | A         | 100.5        | 100.5
+            # 1 | BA0123 | B         | 575.99       | 676.49
+            # 2 | SG0456 | A         | Nothing      | 676.49
+            # 3 | BA0123 | C         | Nothing      | 676.49
+            # 4 | SG0456 | E         | 73.77        | 649.76
+            expected_table = table.zip expected_column
+            result.should_equal expected_table
+        group_builder.specify "Running min ignores nothing values and works with grouping" <|
+            result = table.running Statistic.Minimum "Ticket Price" "Running" ["Flight"]
+            expected_column = Column.from_vector "Running" [100.5, 100.5, Nothing, 100.5, 73.77]
+            #   | Flight | Passenger | Ticket Price | Running
+            #---+--------+-----------+--------------+-------------------------
+            # 0 | BA0123 | A         | 100.5        | 100.5
+            # 1 | BA0123 | B         | 575.99       | 100.5
+            # 2 | SG0456 | A         | Nothing      | Nothing
+            # 3 | BA0123 | C         | Nothing      | 100.5
+            # 4 | SG0456 | E         | 73.77        | 73.77
+            expected_table = table.zip expected_column
+            result.should_equal expected_table
+        group_builder.specify "Running max ignores nothing values and works with grouping" <|
+            result = table.running Statistic.Maximum "Ticket Price" "Running" ["Flight"]
+            expected_column = Column.from_vector "Running" [100.5, 575.99, Nothing, 575.99, 73.77]
+            #   | Flight | Passenger | Ticket Price | Running
+            #---+--------+-----------+--------------+-------------------------
+            # 0 | BA0123 | A         | 100.5        | 100.5
+            # 1 | BA0123 | B         | 575.99       | 575.99
+            # 2 | SG0456 | A         | Nothing      | Nothing
+            # 3 | BA0123 | C         | Nothing      | 575.99
+            # 4 | SG0456 | E         | 73.77        | 73.77
+            expected_table = table.zip expected_column
+            result.should_equal expected_table
+        group_builder.specify "Running mean ignores nothing values" <|
+            result = table.running Statistic.Mean "Ticket Price" "Running"
+            expected_column = Column.from_vector "Running" [100.5, 338.245, 338.245, 338.245, 250.08666666666667]
+            #   | Flight | Passenger | Ticket Price | Running
+            #---+--------+-----------+--------------+-------------------------
+            # 0 | BA0123 | A         | 100.5        | 100.5
+            # 1 | BA0123 | B         | 575.99       | 338.245
+            # 2 | SG0456 | A         | Nothing      | 338.245
+            # 3 | BA0123 | C         | Nothing      | 338.245
+            # 4 | SG0456 | E         | 73.77        | 250.08666666666667
+            expected_table = table.zip expected_column
+            result.should_equal expected_table
+        group_builder.specify "Running mean ignores nothing values and works when first value is Nothing" <|
+            result = table.running Statistic.Mean "Ticket Price" "Running" ["Flight"]
+            expected_column = Column.from_vector "Running" [100.5, 338.245, Nothing, 338.245, 73.77]
+            #   | Flight | Passenger | Ticket Price | Running
+            #---+--------+-----------+--------------+-------------------------
+            # 0 | BA0123 | A         | 100.5        | 100.5
+            # 1 | BA0123 | B         | 575.99       | 338.245
+            # 2 | SG0456 | A         | Nothing      | Nothing
+            # 3 | BA0123 | C         | Nothing      | 338.245
+            # 4 | SG0456 | E         | 73.77        | 73.77
+            expected_table = table.zip expected_column
+            result.should_equal expected_table
+    suite_builder.group "different types" group_builder->
+        #   | Flight | Passenger | Ticket Price
+        #---+--------+-----------+--------------
+        # 0 | BA0123 | A         | 1
+        # 1 | BA0123 | B         | 2
+        # 2 | SG0456 | A         | 3
+        # 3 | BA0123 | C         | 4
+        # 4 | SG0456 | E         | 5
+        flight = ["Flight", ["BA0123", "BA0123", "SG0456", "BA0123", "SG0456"]]
+        passenger = ["Passenger", ["A", "B", "A", "C", "E"]]
+        ticket_price = ["Ticket Price", [1, 2, 3, 4, 5]]
+        table = Table.new [flight, passenger, ticket_price]
+        group_builder.specify "Running sum works over an integer column" <|
+            result = table.running Statistic.Sum "Ticket Price"
+            expected_column = Column.from_vector "Running Sum of Ticket Price" [1.0, 3.0, 6.0, 10.0, 15.0]
+            #   | Flight | Passenger | Ticket Price | Running Sum of Ticket Price
+            #---+--------+-----------+--------------+------------------------------
+            # 0 | BA0123 | A         | 1            | 1.0
+            # 1 | BA0123 | B         | 2            | 3.0
+            # 2 | SG0456 | A         | 3            | 6.0
+            # 3 | BA0123 | C         | 4            | 10.0
+            # 4 | SG0456 | E         | 5            | 15.0
+            expected_table = table.zip expected_column
+            result.should_equal expected_table
+        group_builder.specify "Running sum does not work over a string column" <|
+            (table.running Statistic.Sum "Passenger").should_fail_with Invalid_Value_Type
+    suite_builder.group "Unsupported statistics" group_builder->
+        data = Data.setup
+        group_builder.specify "RSquared is not supported" <|
+            (data.table.running (Statistic.R_Squared [1, 2 ,3]) "Ticket Price").should_fail_with Illegal_Argument
+        group_builder.specify "Covariance is not supported" <|
+            (data.table.running (Statistic.Covariance []) "Ticket Price").should_fail_with Illegal_Argument
+        group_builder.specify "Pearson is not supported" <|
+            (data.table.running (Statistic.Pearson []) "Ticket Price").should_fail_with Illegal_Argument
+        group_builder.specify "Spearman is not supported" <|
+            (data.table.running (Statistic.Spearman []) "Ticket Price").should_fail_with Illegal_Argument

 main filter=Nothing =
    suite = Test.build suite_builder->