Rank Data, Correlation, Covariance, R Squared (#3484)

- Added new `Statistic`s: Covariance, Pearson, Spearman, R Squared - Added `covariance_matrix` function - Added `pearson_correlation` function to compute correlation matrix - Added `rank_data` and Rank_Method type to create rankings of a Vector - Added `spearman_correlation` function to compute Spearman Rank correlation matrix # Important Notes - Added `Panic.throw_wrapped_if_error` and `Panic.handle_wrapped_dataflow_error` to help with errors within a loop. - Removed `Array.set_at` use from `Table.Vector_Builder`
2024-11-23 08:08:34 +03:00 · 2022-05-30 18:13:06 +01:00 · 2022-05-30 18:13:06 +01:00 · 1aa0bb3552
commit 1aa0bb3552
parent dac49a44b5
10 changed files with 509 additions and 29 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -131,6 +131,7 @@
  and made it the default.][3472]
 - [Implemented a `Table.from Text` conversion allowing to parse strings
  representing `Delimited` files without storing them on the filesystem.][3478]
+- [Added rank data, correlation and covariance statistics for `Vector`][3484]

 [debug-shortcuts]:
  https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -204,6 +205,7 @@
 [3472]: https://github.com/enso-org/enso/pull/3472
 [3486]: https://github.com/enso-org/enso/pull/3486
 [3478]: https://github.com/enso-org/enso/pull/3478
+[3484]: https://github.com/enso-org/enso/pull/3484

 #### Enso Compiler

--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Statistics.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Statistics.enso
@ -1,16 +1,25 @@
 from Standard.Base import Boolean, True, False, Nothing, Vector, Number, Any, Error, Array, Panic, Illegal_Argument_Error, Unsupported_Argument_Types
+
 from Standard.Base.Data.Vector import Empty_Error

 import Standard.Base.Data.Ordering.Comparator

+import Standard.Base.Data.Statistics.Rank_Method
+
 polyglot java import org.enso.base.statistics.Moments
 polyglot java import org.enso.base.statistics.CountMinMax
+polyglot java import org.enso.base.statistics.CorrelationStatistics
+polyglot java import org.enso.base.statistics.Rank
+
+polyglot java import java.lang.IllegalArgumentException
+polyglot java import java.lang.ClassCastException
+polyglot java import java.lang.NullPointerException

 type Statistic
    ## PRIVATE
       Convert the Enso Statistic into Java equivalent.
-    to_java : SingleValue
-    to_java = case this of
+    to_moment_statistic : SingleValue
+    to_moment_statistic = case this of
        Sum -> Moments.SUM
        Mean -> Moments.MEAN
        Variance p -> if p then Moments.VARIANCE_POPULATION else Moments.VARIANCE
@ -52,6 +61,32 @@ type Statistic
    ## The sample kurtosis of the values.
    type Kurtosis

+    ## Calculate the Covariance between data and series.
+
+       Arguments:
+       - series: the series to compute the covariance with.
+    type Covariance (series:Vector)
+
+    ## Calculate the Pearson Correlation between data and series.
+
+       Arguments:
+       - series: the series to compute the correlation with.
+    type Pearson (series:Vector)
+
+    ## Calculate the Spearman Rank Correlation between data and series.
+
+       Arguments:
+       - series: the series to compute the correlation with.
+    type Spearman (series:Vector)
+
+    ## Calculate the coefficient of determination between data and predicted
+       series.
+
+       Arguments:
+       - predicted: the series to compute the r_squared with.
+    type R_Squared (predicted:Vector)
+
+
 ## Compute a single statistic on a vector like object.

   Arguments:
@ -69,11 +104,11 @@ compute data statistic=Count =
   - statistics: Set of statistics to calculate.
 compute_bulk : Vector -> [Statistic] -> [Any]
 compute_bulk data statistics=[Count, Sum] =
-
    count_min_max = statistics.any s->((s.is_a Count) || (s.is_a Minimum) || (s.is_a Maximum))

-    java_stats = statistics.map .to_java
+    java_stats = statistics.map .to_moment_statistic
    skip_java_stats = java_stats.all s->s.is_nothing
+
    report_invalid _ =
        statistics.map_with_index i->v->
            if java_stats.at i . is_nothing then Nothing else
@ -97,8 +132,88 @@ compute_bulk data statistics=[Count, Sum] =
            Maximum ->
                if count_min_max_values.comparatorError then (Error.throw Vector.Incomparable_Values_Error) else
                    count_min_max_values.maximum
+            Covariance s -> here.calculate_correlation_statistics data s . covariance
+            Pearson s -> here.calculate_correlation_statistics data s . pearsonCorrelation
+            Spearman s -> here.calculate_spearman_rank data s
+            R_Squared s -> here.calculate_correlation_statistics data s . rSquared
            _ -> stats_array.at i

+
+## Calculate a variance-covariance matrix between the input series.
+
+   Arguments:
+   - data: The input data sets
+covariance_matrix : [Vector] -> [Vector]
+covariance_matrix data =
+    stats_vectors = here.calculate_correlation_statistics_matrix data
+    stats_vectors.map v->(v.map .covariance)
+
+
+## Calculate a Pearson correlation matrix between the input series.
+
+   Arguments:
+   - data: The input data sets
+pearson_correlation : [Vector] -> [Vector]
+pearson_correlation data =
+    stats_vectors = here.calculate_correlation_statistics_matrix data
+    stats_vectors.map v->(v.map .pearsonCorrelation)
+
+
+## Calculate a Spearman Rank correlation matrix between the input series.
+
+   Arguments:
+   - data: The input data sets
+spearman_correlation : [Vector] -> [Vector]
+spearman_correlation data =
+    Panic.handle_wrapped_dataflow_error <|
+        output = Vector.new_builder data.length
+
+        0.up_to data.length . each i->
+            output.append <|
+                Vector.new data.length j->
+                    if j == i then 1 else
+                        if j < i then (output.at j . at i) else
+                            Panic.throw_wrapped_if_error <|
+                                here.calculate_spearman_rank (data.at i) (data.at j)
+
+        output.to_vector
+
+
+## PRIVATE
+wrap_java_call : Any -> Any
+wrap_java_call ~function =
+    report_unsupported _ = Error.throw (Illegal_Argument_Error ("Can only compute correlations on numerical data sets."))
+    handle_unsupported = Panic.catch Unsupported_Argument_Types handler=report_unsupported
+
+    report_illegal caught_panic = Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage)
+    handle_illegal = Panic.catch IllegalArgumentException handler=report_illegal
+
+    handle_unsupported <| handle_illegal <| function
+
+
+## PRIVATE
+   Given two series, get a computed CorrelationStatistics object
+calculate_correlation_statistics : Vector -> Vector -> CorrelationStatistics
+calculate_correlation_statistics x_data y_data =
+    here.wrap_java_call <| CorrelationStatistics.compute x_data.to_array y_data.to_array
+
+
+## PRIVATE
+   Given two series, get a compute the Spearman Rank correlation
+calculate_spearman_rank : Vector -> Vector -> Decimal
+calculate_spearman_rank x_data y_data =
+    here.wrap_java_call <| CorrelationStatistics.spearmanRankCorrelation x_data.to_array y_data.to_array
+
+
+## PRIVATE
+   Given a set of series get CorrelationStatistics objects
+calculate_correlation_statistics_matrix : [Vector] -> [CorrelationStatistics]
+calculate_correlation_statistics_matrix data =
+    data_array = Vector.new data.length i->(data.at i).to_array . to_array
+    stats_array = here.wrap_java_call <| CorrelationStatistics.computeMatrix data_array
+    Vector.new stats_array.length i->(Vector.Vector (stats_array.at i))
+
+
 ## Compute a single statistic on the vector.

   Arguments:
@ -115,3 +230,26 @@ Vector.Vector.compute statistic=Count =
 Vector.Vector.compute_bulk : [Statistic] -> [Any]
 Vector.Vector.compute_bulk statistics=[Count, Sum] =
    here.compute_bulk this statistics
+
+
+## Assigns a rank to each value of data, dealing with equal values according to the method.
+
+   Arguments:
+   - data: Input data to rank.
+   - method: Method used to deal with equal values.
+rank_data : Vector -> Rank_Method -> Vector
+rank_data input method=Rank_Method.Average =
+    java_method = case method of
+        Rank_Method.Minimum -> Rank.Method.MINIMUM
+        Rank_Method.Maximum -> Rank.Method.MAXIMUM
+        Rank_Method.Average -> Rank.Method.AVERAGE
+        Rank_Method.Ordinal -> Rank.Method.ORDINAL
+        Rank_Method.Dense -> Rank.Method.DENSE
+
+    report_nullpointer caught_panic = Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage)
+    handle_nullpointer = Panic.catch NullPointerException handler=report_nullpointer
+    handle_classcast = Panic.catch ClassCastException handler=(Error.throw Vector.Incomparable_Values_Error)
+
+    handle_classcast <| handle_nullpointer <|
+        java_ranks = Rank.rank input.to_array Comparator.new java_method
+        Vector.Vector java_ranks
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Statistics/Rank_Method.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Statistics/Rank_Method.enso
@ -0,0 +1,18 @@
+
+## Specifies how to handle ranking of equal values.
+type Rank_Method
+    ## Use the mean of all ranks for equal values.
+    type Average
+
+    ## Use the lowest of all ranks for equal values.
+    type Minimum
+
+    ## Use the highest of all ranks for equal values.
+    type Maximum
+
+    ## Use same rank value for equal values and next group is the immediate
+       following ranking number.
+    type Dense
+
+    ## Equal values are assigned the next rank in order that they occur.
+    type Ordinal
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Vector.enso
@ -55,19 +55,22 @@ fill length ~item =
   A vector allows to store an arbitrary number of elements in linear memory. It
   is the recommended data structure for most applications.

+   Arguments:
+   - capacity: Initial capacity of the Vector.Builder
+
   > Example
     Construct a vector using a builder that contains the items 1 to 10.

         example_new_builder =
-              builder = Vector.new_builder
+              builder = Vector.new_builder 10
              do_build start stop =
                  builder.append start
                  if start >= stop then Nothing else
                      @Tail_Call do_build start+1 stop
              do_build 1 10
              builder.to_vector
-new_builder : Builder
-new_builder = Builder.new
+new_builder : Integer -> Builder
+new_builder (capacity=1) = Builder.new capacity

 ## ADVANCED

@ -141,13 +144,7 @@ type Vector
    at : Integer -> Any ! Index_Out_Of_Bounds_Error
    at index =
        actual_index = if index < 0 then this.length + index else index
-        ## TODO [RW] Ideally we do not want an additional check here, but we
-           should catch a Invalid_Array_Index_Error panic. However, such a catch
-           should still properly forward any other panics or dataflow errors
-           which is not fully possible until the approach to handling Panics is
-           improved, as described in the following Pivotal ticket:
-           https://www.pivotaltracker.com/n/projects/2539304/stories/181029230
-        if actual_index>=0 && actual_index<this.length then this.unsafe_at actual_index else
+        Panic.catch Invalid_Array_Index_Error (this.unsafe_at actual_index) _->
            Error.throw (Index_Out_Of_Bounds_Error index this.length)

    ## ADVANCED
@ -1015,12 +1012,15 @@ type Builder

    ## Creates a new builder.

+       Arguments:
+       - capacity: Initial capacity of the Vector.Builder
+
       > Example
         Make a new builder

             Vector.new_builder
-    new : Builder
-    new = Builder (Array.new 1) 0
+    new : Integer->Builder
+    new (capacity=1) = Builder (Array.new capacity) 0

    ## Returns the current capacity (i.e. the size of the underlying storage)
       of this builder.
@ -1088,6 +1088,18 @@ type Builder
            this.append item
            Nothing

+    ## Gets an element from the vector at a specified index (0-based).
+
+       Arguments:
+       - index: The location in the vector to get the element from. The index is
+         also allowed be negative, then the elements are indexed from the back
+         of the vector, i.e. -1 will correspond to the last element.
+    at : Integer -> Any ! Index_Out_Of_Bounds_Error
+    at index =
+        actual_index = if index < 0 then this.length + index else index
+        Panic.catch Invalid_Array_Index_Error (this.to_array.at actual_index) _->
+            Error.throw (Index_Out_Of_Bounds_Error index this.length)
+
    ## Checks whether a predicate holds for at least one element of this builder.

       Arguments:
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Error/Common.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Error/Common.enso
@ -386,6 +386,23 @@ type Panic
                True -> caught_panic.convert_to_dataflow_error
                False -> Panic.throw caught_panic

+    ## If a dataflow error had occurred, wrap it in a `Wrapped_Dataflow_Error` and promote to a Panic.
+
+       Arguments:
+       - value: value to return if not an error, or rethrow as a Panic.
+    throw_wrapped_if_error : Any -> Any
+    throw_wrapped_if_error ~value =
+        if value.is_error then Panic.throw (Wrapped_Dataflow_Error value.catch) else value
+
+    ## Catch any `Wrapped_Dataflow_Error` Panic and rethrow it as a dataflow error.
+
+       Arguments:
+       - action: The code to execute that potentially raised a Wrapped_Dataflow_Error.
+    handle_wrapped_dataflow_error : Any -> Any
+    handle_wrapped_dataflow_error ~action =
+        Panic.catch Wrapped_Dataflow_Error action caught_panic->
+            Error.throw caught_panic.payload.payload
+
 ## The runtime representation of a syntax error.

   Arguments:
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Vector_Builder.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Vector_Builder.enso
@ -50,8 +50,7 @@ type Vector_Builder
       array = Array.new this.length
       go ix elem = case elem of
           Leaf vec ->
-               vec.map_with_index vi-> elem->
-                   array.set_at ix+vi elem
+               Array.copy vec.to_array 0 array ix vec.length
               ix + vec.length
           Append l r _ ->
               ix2 = go ix l
--- a/std-bits/base/src/main/java/org/enso/base/statistics/CorrelationStatistics.java
+++ b/std-bits/base/src/main/java/org/enso/base/statistics/CorrelationStatistics.java
@ -0,0 +1,96 @@
+package org.enso.base.statistics;
+
+/** Class to compute covariance and correlations between series. */
+public class CorrelationStatistics {
+  private long count = 0;
+  private double totalX = 0.0;
+  private double totalXX = 0.0;
+  private double totalY = 0.0;
+  private double totalYY = 0.0;
+  private double totalXY = 0.0;
+
+  private void append(Double x, Double y) {
+    if (x == null || x.isNaN() || y == null || y.isNaN()) {
+      return;
+    }
+
+    count++;
+    totalX += x;
+    totalXX += x * x;
+    totalY += y;
+    totalYY += y * y;
+    totalXY += x * y;
+  }
+
+  public double covariance() {
+    if (count < 2) {
+      return Double.NaN;
+    }
+
+    return (totalXY - totalX * totalY / count) / count;
+  }
+
+  public double pearsonCorrelation() {
+    if (count < 2) {
+      return Double.NaN;
+    }
+
+    double n_stdev_x = Math.sqrt(count * totalXX - totalX * totalX);
+    double n_stdev_y = Math.sqrt(count * totalYY - totalY * totalY);
+    return (count * totalXY - totalX * totalY) / (n_stdev_x * n_stdev_y);
+  }
+
+  public double rSquared() {
+    double correl = this.pearsonCorrelation();
+    return correl * correl;
+  }
+
+  /**
+   * Create the CorrelationStats between two series
+   *
+   * @param x Array of X values
+   * @param y Array of Y values
+   * @return CorrelationStats object for the 2 series.
+   */
+  public static CorrelationStatistics compute(Double[] x, Double[] y) {
+    if (x.length != y.length) {
+      throw new IllegalArgumentException("Left and right lengths are not the same.");
+    }
+
+    CorrelationStatistics output = new CorrelationStatistics();
+    for (int i = 0; i < x.length; i++) {
+      output.append(x[i], y[i]);
+    }
+    return output;
+  }
+
+  public static CorrelationStatistics[][] computeMatrix(Double[][] data) {
+    int len = data[0].length;
+
+    CorrelationStatistics[][] output = new CorrelationStatistics[data.length][];
+    for (int i = 0; i < data.length; i++) {
+      if (data[i].length != len) {
+        throw new IllegalArgumentException("Data lengths are not consistent.");
+      }
+      output[i] = new CorrelationStatistics[data.length];
+      for (int j = 0; j < data.length; j++) {
+        if (j < i) {
+          output[i][j] = output[j][i];
+        } else {
+          output[i][j] = compute(data[i], data[j]);
+        }
+      }
+    }
+    return output;
+  }
+
+  public static double spearmanRankCorrelation(Double[] x, Double[] y) {
+    double[][] pairedRanks = Rank.pairedRanks(x, y, Rank.Method.AVERAGE);
+
+    CorrelationStatistics computation = new CorrelationStatistics();
+    for (int i = 0; i < pairedRanks[0].length; i++) {
+      computation.append(pairedRanks[0][i], pairedRanks[1][i]);
+    }
+    return computation.pearsonCorrelation();
+  }
+}
--- a/std-bits/base/src/main/java/org/enso/base/statistics/Rank.java
+++ b/std-bits/base/src/main/java/org/enso/base/statistics/Rank.java
@ -0,0 +1,95 @@
+package org.enso.base.statistics;
+
+import java.util.*;
+
+public class Rank {
+  private static final Comparator<Object> DOUBLE_COMPARATOR =  (a, b) -> Double.compare((Double)a, (Double)b);
+
+  public enum Method {
+    AVERAGE,
+    MINIMUM,
+    MAXIMUM,
+    DENSE,
+    ORDINAL
+  }
+
+  private record ValueWithIndex(Object value, int index) {
+  }
+
+  public static double[] rank(Object[] input, Comparator<Object> comparator, Method method)
+      throws NullPointerException, ClassCastException
+  {
+    List<ValueWithIndex> tuples = new ArrayList<>(input.length);
+    for(int i = 0; i < input.length; i++) {
+      if (input[i] == null) {
+        throw new NullPointerException("Value is Nothing at index " + i);
+      }
+      tuples.add(new ValueWithIndex(input[i], i));
+    }
+
+    return computeRankFromTuples(tuples, comparator, method);
+  }
+
+  public static double[][] pairedRanks(Double[] x, Double[] y, Method method)
+      throws IllegalArgumentException, NullPointerException, ClassCastException
+  {
+    if (x.length != y.length) {
+      throw new IllegalArgumentException("Left and right lengths are not the same.");
+    }
+
+    List<ValueWithIndex> x_tuples = new ArrayList<>(x.length);
+    List<ValueWithIndex> y_tuples = new ArrayList<>(y.length);
+    for (int i = 0; i < x.length; i++) {
+      if (x[i] == null || Double.isNaN(x[i]) || y[i] == null || Double.isNaN(y[i])) {
+        continue;
+      }
+
+      x_tuples.add(new ValueWithIndex(x[i], x_tuples.size()));
+      y_tuples.add(new ValueWithIndex(y[i], y_tuples.size()));
+    }
+
+    return new double[][] {
+        computeRankFromTuples(x_tuples, DOUBLE_COMPARATOR, method),
+        computeRankFromTuples(y_tuples, DOUBLE_COMPARATOR, method)
+    };
+  }
+
+  private static double[] computeRankFromTuples(List<ValueWithIndex> tuples, Comparator<Object> comparator, Method method)
+      throws NullPointerException, ClassCastException
+  {
+    Comparator<ValueWithIndex> tupleComparator = (a, b) -> {
+      int c = comparator.compare(a.value, b.value);
+      return c == 0 ? Integer.compare(a.index, b.index) : -c;
+    };
+    tuples.sort(tupleComparator);
+
+    double[] output = new double[tuples.size()];
+
+    int index = 0;
+    int dense = 0;
+    while (index < tuples.size()) {
+      dense++;
+      int start = index;
+
+      // Find End of Equal Values
+      while (index < tuples.size() && comparator.compare(tuples.get(start).value, tuples.get(index).value) == 0) {
+        index++;
+      }
+
+      // Build Rank
+      for (int i = start; i < index; i++) {
+        double rank = switch (method) {
+          case MINIMUM -> start + 1;
+          case MAXIMUM -> index;
+          case DENSE -> dense;
+          case AVERAGE -> (start + 1 + index) / 2.0;
+          case ORDINAL -> i + 1;
+        };
+
+        output[tuples.get(i).index] = rank;
+      }
+    }
+
+    return output;
+  }
+}
--- a/test/Tests/src/Data/Statistics_Spec.enso
+++ b/test/Tests/src/Data/Statistics_Spec.enso
@ -1,6 +1,7 @@
-from Standard.Base import Nothing, Vector, Number, True, Illegal_Argument_Error, False
+from Standard.Base import Nothing, Vector, Number, Decimal, True, Illegal_Argument_Error, False

 import Standard.Base.Data.Statistics
+import Standard.Base.Data.Statistics.Rank_Method
 from Standard.Base.Data.Statistics import all

 import Standard.Test
@ -17,18 +18,21 @@ type No_Ord number
 # Tests

 spec =
-    simple_set = [1, 2, 3, 4, 5]
-    number_set = [0.4, -18.56, -16.99, -16.43, -45.84, 13.44, -6.85, 9.68, -8.55, 10.87, 10.38, 33.85, -41.02, 1.87, -26.52, -13.87, -39.06, 25.92, -16.01, 42.01]
-    missing_set = number_set.map_with_index i->v->(if i % 5 == 4 then Nothing else v)
-    with_nans_set = number_set.map_with_index i->v->(if i % 5 == 4 then (if i % 10 == 9 then Number.nan else Nothing) else v)
-    text_set = ["A", "B", Nothing, "D"]
-
-    ord_set = [Ord 10, Ord 2, Nothing, Ord 9]
-    no_ord_set = [No_Ord 10, No_Ord 2, Nothing, No_Ord 9]
-    
    double_error = 0.000001

+    vector_compare values expected =
+        values.each_with_index i->v->
+            case v of
+                Decimal -> v.should_equal (expected.at i) epsilon=double_error
+                _ -> v.should_equal (expected.at i)
+
    Test.group "Statistics" <|
+        simple_set = [1, 2, 3, 4, 5]
+        number_set = [0.4, -18.56, -16.99, -16.43, -45.84, 13.44, -6.85, 9.68, -8.55, 10.87, 10.38, 33.85, -41.02, 1.87, -26.52, -13.87, -39.06, 25.92, -16.01, 42.01]
+        missing_set = number_set.map_with_index i->v->(if i % 5 == 4 then Nothing else v)
+        with_nans_set = number_set.map_with_index i->v->(if i % 5 == 4 then (if i % 10 == 9 then Number.nan else Nothing) else v)
+        text_set = ["A", "B", Nothing, "D"]
+
        Test.specify "should be able to count valid values" <|
            simple_set.compute . should_equal 5
            number_set.compute . should_equal 20
@ -111,8 +115,9 @@ spec =
            stats = [Count, Minimum, Mean, Variance, Skew]
            expected = [20, -45.84, -5.064, 582.0137832, 0.165086552]
            values = number_set.compute_bulk stats
-            values.map_with_index i->v->((expected.at i - v).abs < double_error) . any v->(v == True) . should_equal True
+            vector_compare values expected

+    Test.group "Statistics - empty Vector " <|
        Test.specify "should be able to count and sum on empty Vector" <|
            [].compute . should_equal 0
            [].compute Sum . should_equal 0
@ -127,6 +132,11 @@ spec =
            [].compute Skew . is_nan . should_equal True
            [].compute Kurtosis . is_nan . should_equal True

+    Test.group "Statistics - invalid input" <|
+        text_set = ["A", "B", Nothing, "D"]
+        ord_set = [Ord 10, Ord 2, Nothing, Ord 9]
+        no_ord_set = [No_Ord 10, No_Ord 2, Nothing, No_Ord 9]
+
        Test.specify "should fail with Illegal_Argument_Error on number based statistics for text Vector" <|
            text_set.compute Sum . should_fail_with Illegal_Argument_Error
            text_set.compute Mean . should_fail_with Illegal_Argument_Error
@ -147,4 +157,95 @@ spec =
        Test.specify "should fail with Incomparable_Values_Error on mixed Vectors" <|
            [1, False].compute Minimum . should_fail_with Vector.Incomparable_Values_Error

+    Test.group "Rank Data" <|
+        Test.specify "can rank a Decimal data series" <|
+            values = [409.892906, 0.839952, 796.468572, 126.931298, -405.265005, -476.675817, 441.651325, 796.468572, 78.50094, 340.163324, 234.861926, 409.892906, 226.467105, 234.861926, 126.931298, 637.870512, -71.008044, -386.399663, -126.534337, -476.675817, 78.50094, -386.399663, 409.892906, 868.54485, 669.113037, 669.113037, 0.839952, 407.162613, -476.675817, 126.931298]
+            Statistics.rank_data values . should_equal [9, 21.5, 2.5, 17, 27, 29, 7, 2.5, 19.5, 12, 13.5, 9, 15, 13.5, 17, 6, 23, 25.5, 24, 29, 19.5, 25.5, 9, 1, 4.5, 4.5, 21.5, 11, 29, 17]
+            Statistics.rank_data values Rank_Method.Minimum . should_equal [8, 21, 2, 16, 27, 28, 7, 2, 19, 12, 13, 8, 15, 13, 16, 6, 23, 25, 24, 28, 19, 25, 8, 1, 4, 4, 21, 11, 28, 16]
+            Statistics.rank_data values Rank_Method.Maximum . should_equal [10, 22, 3, 18, 27, 30, 7, 3, 20, 12, 14, 10, 15, 14, 18, 6, 23, 26, 24, 30, 20, 26, 10, 1, 5, 5, 22, 11, 30, 18]
+            Statistics.rank_data values Rank_Method.Ordinal . should_equal [8, 21, 2, 16, 27, 28, 7, 3, 19, 12, 13, 9, 15, 14, 17, 6, 23, 25, 24, 29, 20, 26, 10, 1, 4, 5, 22, 11, 30, 18]
+            Statistics.rank_data values Rank_Method.Dense . should_equal [6, 13, 2, 11, 17, 18, 5, 2, 12, 8, 9, 6, 10, 9, 11, 4, 14, 16, 15, 18, 12, 16, 6, 1, 3, 3, 13, 7, 18, 11]
+
+        Test.specify "can rank an Integer data series" <|
+            values = [10, 1, 124, 10]
+            Statistics.rank_data values . should_equal [2.5, 4, 1, 2.5]
+
+        Test.specify "can rank a Number data series" <|
+            values = [10.0, 1, 12.4, 10]
+            Statistics.rank_data values . should_equal [2.5, 4, 1, 2.5]
+
+        Test.specify "can rank a Text data series" <|
+            values = ["G", "AA", "B", "G", "D"]
+            Statistics.rank_data values . should_equal [1.5, 5, 4, 1.5, 3]
+
+        Test.specify "should fail with Incomparable_Values_Error on custom type without compare_to" <|
+            values = [No_Ord 10, No_Ord 2, No_Ord 9]
+            Statistics.rank_data values . should_fail_with Vector.Incomparable_Values_Error
+
+        Test.specify "should fail with Incomparable_Values_Error on mixed Vectors" <|
+            Statistics.rank_data [1, "A"] . should_fail_with Vector.Incomparable_Values_Error
+
+        Test.specify "should fail with Illegal_Argument_Error on Vectors with Nothing" <|
+            Statistics.rank_data [1, Nothing, 4] . should_fail_with Illegal_Argument_Error
+
+    Test.group "Correlation Statistics" <|
+        series_a = [0.22345,0.258315,0.74663,Nothing,0.686843,0.692246,Nothing,0.401859,0.725442,Nothing,0.963527,0.520363,0.633053,0.397123,Nothing,0.458942,0.036499,0.368194,0.598939,0.296476,0.093746,0.609329]
+        series_b = [0.140743,Nothing,0.574639,0.251683,0.902023,0.08723,0.251813,0.1669,0.234405,Nothing,0.28774,0.471757,0.280681,0.925207,0.919041,0.626234,0.429497,0.358597,0.566118,0.333606,0.828172,0.887829]
+        series_c = [Nothing,0.769797,0.281678,0.462145,0.727132,0.327978,Nothing,0.648639,0.562636,Nothing,0.159836,0.367404,0.877087,0.365483,Nothing,0.931873,0.723546,0.558085,0.163396,0.940997,0.399685,0.617509]
+        series = [series_a, series_b, series_c]
+
+        Test.specify "can compute Covariance, Correlation and R Squared between a pair of series"
+            series_a.compute (Covariance series_b) . should_equal -0.0053554 epsilon=double_error
+            series_a.compute (Pearson series_b) . should_equal -0.08263943 epsilon=double_error
+            series_a.compute (Spearman series_b) . should_equal -0.09313725 epsilon=double_error
+            series_a.compute (R_Squared series_b) . should_equal 0.006829275 epsilon=double_error
+
+        Test.specify "can calculate a covariance matrix" <|
+            matrix = Statistics.covariance_matrix series
+            matrix.length . should_equal 3
+            vector_compare (matrix.at 0) [0.0571699, -0.0053554, -0.02378204]
+            vector_compare (matrix.at 1) [-0.0053554, 0.07707381, -0.00098274]
+            vector_compare (matrix.at 2) [-0.02378204, -0.00098274, 0.05837098]
+
+        Test.specify "can calculate a pearson correlation matrix" <|
+            matrix = Statistics.pearson_correlation series
+            matrix.length . should_equal 3
+            vector_compare (matrix.at 0) [1, -0.08263943, -0.40469045]
+            vector_compare (matrix.at 1) [-0.08263943, 1, -0.01537537]
+            vector_compare (matrix.at 2) [-0.40469045, -0.01537537, 1]
+
+        Test.specify "can calculate a spearman rank correlation matrix" <|
+            matrix = Statistics.spearman_correlation series
+            matrix.length . should_equal 3
+            vector_compare (matrix.at 0) [1, -0.09313725, -0.43382353]
+            vector_compare (matrix.at 1) [-0.09313725, 1, 0]
+            vector_compare (matrix.at 2) [-0.43382353, 0, 1]
+
+        Test.specify "should fail with Illegal_Argument_Error if different lengths" <|
+            data = [[1,2,3,4],[10,20,30]]
+            data.first.compute (Covariance data.second) . should_fail_with Illegal_Argument_Error
+            data.first.compute (Pearson data.second) . should_fail_with Illegal_Argument_Error
+            data.first.compute (Spearman data.second) . should_fail_with Illegal_Argument_Error
+            data.first.compute (R_Squared data.second) . should_fail_with Illegal_Argument_Error
+            Statistics.covariance_matrix data . should_fail_with Illegal_Argument_Error
+            Statistics.pearson_correlation data . should_fail_with Illegal_Argument_Error
+            Statistics.spearman_correlation data . should_fail_with Illegal_Argument_Error
+
+        Test.specify "should fail with Illegal_Argument_Error if not number based" <|
+            text = [["A","BC","CD"], ["0", "1", "2"], ["H", "I", "J"]]
+            text.first.compute (Covariance text.second) . should_fail_with Illegal_Argument_Error
+            text.first.compute (Pearson text.second) . should_fail_with Illegal_Argument_Error
+            text.first.compute (Spearman text.second) . should_fail_with Illegal_Argument_Error
+            text.first.compute (R_Squared text.second) . should_fail_with Illegal_Argument_Error
+            Statistics.covariance_matrix text . should_fail_with Illegal_Argument_Error
+            Statistics.pearson_correlation text . should_fail_with Illegal_Argument_Error
+            Statistics.spearman_correlation text . should_fail_with Illegal_Argument_Error
+
+    Test.group "Statistics - invalid input" <|
+        Test.specify "should fail with Illegal_Argument_Error on number based statistics for text Vector" <|
+            series = [["A", "B", Nothing, "D"], ["A", "B", Nothing, "D"]]
+            Statistics.covariance_matrix series . should_fail_with Illegal_Argument_Error
+            Statistics.pearson_correlation series . should_fail_with Illegal_Argument_Error
+
+
 main = Test.Suite.run_main here.spec
--- a/test/Tests/src/Main.enso
+++ b/test/Tests/src/Main.enso
@ -34,6 +34,7 @@ import project.Data.Ref_Spec
 import project.Data.Text_Spec
 import project.Data.Time.Spec as Time_Spec
 import project.Data.Vector_Spec
+import project.Data.Statistics_Spec
 import project.Data.Text.Regex_Spec
 import project.Data.Text.Utils_Spec
 import project.Data.Text.Default_Regex_Engine_Spec
@ -104,4 +105,5 @@ main = Test.Suite.run_main <|
    Time_Spec.spec
    Uri_Spec.spec
    Vector_Spec.spec
+    Statistics_Spec.spec
    Warnings_Spec.spec