Rank Data, Correlation, Covariance, R Squared (#3484)

- Added new `Statistic`s: Covariance, Pearson, Spearman, R Squared
- Added `covariance_matrix` function
- Added `pearson_correlation` function to compute correlation matrix
- Added `rank_data` and Rank_Method type to create rankings of a Vector
- Added `spearman_correlation` function to compute Spearman Rank correlation matrix

# Important Notes
- Added `Panic.throw_wrapped_if_error` and `Panic.handle_wrapped_dataflow_error` to help with errors within a loop.
- Removed `Array.set_at` use from `Table.Vector_Builder`
This commit is contained in:
James Dunkerley 2022-05-30 18:13:06 +01:00 committed by GitHub
parent dac49a44b5
commit 1aa0bb3552
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 509 additions and 29 deletions

View File

@ -131,6 +131,7 @@
and made it the default.][3472]
- [Implemented a `Table.from Text` conversion allowing to parse strings
representing `Delimited` files without storing them on the filesystem.][3478]
- [Added rank data, correlation and covariance statistics for `Vector`][3484]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -204,6 +205,7 @@
[3472]: https://github.com/enso-org/enso/pull/3472
[3486]: https://github.com/enso-org/enso/pull/3486
[3478]: https://github.com/enso-org/enso/pull/3478
[3484]: https://github.com/enso-org/enso/pull/3484
#### Enso Compiler

View File

@ -1,16 +1,25 @@
from Standard.Base import Boolean, True, False, Nothing, Vector, Number, Any, Error, Array, Panic, Illegal_Argument_Error, Unsupported_Argument_Types
from Standard.Base.Data.Vector import Empty_Error
import Standard.Base.Data.Ordering.Comparator
import Standard.Base.Data.Statistics.Rank_Method
polyglot java import org.enso.base.statistics.Moments
polyglot java import org.enso.base.statistics.CountMinMax
polyglot java import org.enso.base.statistics.CorrelationStatistics
polyglot java import org.enso.base.statistics.Rank
polyglot java import java.lang.IllegalArgumentException
polyglot java import java.lang.ClassCastException
polyglot java import java.lang.NullPointerException
type Statistic
## PRIVATE
Convert the Enso Statistic into Java equivalent.
to_java : SingleValue
to_java = case this of
to_moment_statistic : SingleValue
to_moment_statistic = case this of
Sum -> Moments.SUM
Mean -> Moments.MEAN
Variance p -> if p then Moments.VARIANCE_POPULATION else Moments.VARIANCE
@ -52,6 +61,32 @@ type Statistic
## The sample kurtosis of the values.
type Kurtosis
## Calculate the Covariance between data and series.
Arguments:
- series: the series to compute the covariance with.
type Covariance (series:Vector)
## Calculate the Pearson Correlation between data and series.
Arguments:
- series: the series to compute the correlation with.
type Pearson (series:Vector)
## Calculate the Spearman Rank Correlation between data and series.
Arguments:
- series: the series to compute the correlation with.
type Spearman (series:Vector)
## Calculate the coefficient of determination between data and predicted
series.
Arguments:
- predicted: the series to compute the r_squared with.
type R_Squared (predicted:Vector)
## Compute a single statistic on a vector like object.
Arguments:
@ -69,11 +104,11 @@ compute data statistic=Count =
- statistics: Set of statistics to calculate.
compute_bulk : Vector -> [Statistic] -> [Any]
compute_bulk data statistics=[Count, Sum] =
count_min_max = statistics.any s->((s.is_a Count) || (s.is_a Minimum) || (s.is_a Maximum))
java_stats = statistics.map .to_java
java_stats = statistics.map .to_moment_statistic
skip_java_stats = java_stats.all s->s.is_nothing
report_invalid _ =
statistics.map_with_index i->v->
if java_stats.at i . is_nothing then Nothing else
@ -97,8 +132,88 @@ compute_bulk data statistics=[Count, Sum] =
Maximum ->
if count_min_max_values.comparatorError then (Error.throw Vector.Incomparable_Values_Error) else
count_min_max_values.maximum
Covariance s -> here.calculate_correlation_statistics data s . covariance
Pearson s -> here.calculate_correlation_statistics data s . pearsonCorrelation
Spearman s -> here.calculate_spearman_rank data s
R_Squared s -> here.calculate_correlation_statistics data s . rSquared
_ -> stats_array.at i
## Calculate a variance-covariance matrix between the input series.
Arguments:
- data: The input data sets
covariance_matrix : [Vector] -> [Vector]
covariance_matrix data =
stats_vectors = here.calculate_correlation_statistics_matrix data
stats_vectors.map v->(v.map .covariance)
## Calculate a Pearson correlation matrix between the input series.
Arguments:
- data: The input data sets
pearson_correlation : [Vector] -> [Vector]
pearson_correlation data =
stats_vectors = here.calculate_correlation_statistics_matrix data
stats_vectors.map v->(v.map .pearsonCorrelation)
## Calculate a Spearman Rank correlation matrix between the input series.
Arguments:
- data: The input data sets
spearman_correlation : [Vector] -> [Vector]
spearman_correlation data =
Panic.handle_wrapped_dataflow_error <|
output = Vector.new_builder data.length
0.up_to data.length . each i->
output.append <|
Vector.new data.length j->
if j == i then 1 else
if j < i then (output.at j . at i) else
Panic.throw_wrapped_if_error <|
here.calculate_spearman_rank (data.at i) (data.at j)
output.to_vector
## PRIVATE
wrap_java_call : Any -> Any
wrap_java_call ~function =
report_unsupported _ = Error.throw (Illegal_Argument_Error ("Can only compute correlations on numerical data sets."))
handle_unsupported = Panic.catch Unsupported_Argument_Types handler=report_unsupported
report_illegal caught_panic = Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage)
handle_illegal = Panic.catch IllegalArgumentException handler=report_illegal
handle_unsupported <| handle_illegal <| function
## PRIVATE
Given two series, get a computed CorrelationStatistics object
calculate_correlation_statistics : Vector -> Vector -> CorrelationStatistics
calculate_correlation_statistics x_data y_data =
here.wrap_java_call <| CorrelationStatistics.compute x_data.to_array y_data.to_array
## PRIVATE
Given two series, get a compute the Spearman Rank correlation
calculate_spearman_rank : Vector -> Vector -> Decimal
calculate_spearman_rank x_data y_data =
here.wrap_java_call <| CorrelationStatistics.spearmanRankCorrelation x_data.to_array y_data.to_array
## PRIVATE
Given a set of series get CorrelationStatistics objects
calculate_correlation_statistics_matrix : [Vector] -> [CorrelationStatistics]
calculate_correlation_statistics_matrix data =
data_array = Vector.new data.length i->(data.at i).to_array . to_array
stats_array = here.wrap_java_call <| CorrelationStatistics.computeMatrix data_array
Vector.new stats_array.length i->(Vector.Vector (stats_array.at i))
## Compute a single statistic on the vector.
Arguments:
@ -115,3 +230,26 @@ Vector.Vector.compute statistic=Count =
Vector.Vector.compute_bulk : [Statistic] -> [Any]
Vector.Vector.compute_bulk statistics=[Count, Sum] =
here.compute_bulk this statistics
## Assigns a rank to each value of data, dealing with equal values according to the method.
Arguments:
- data: Input data to rank.
- method: Method used to deal with equal values.
rank_data : Vector -> Rank_Method -> Vector
rank_data input method=Rank_Method.Average =
java_method = case method of
Rank_Method.Minimum -> Rank.Method.MINIMUM
Rank_Method.Maximum -> Rank.Method.MAXIMUM
Rank_Method.Average -> Rank.Method.AVERAGE
Rank_Method.Ordinal -> Rank.Method.ORDINAL
Rank_Method.Dense -> Rank.Method.DENSE
report_nullpointer caught_panic = Error.throw (Illegal_Argument_Error caught_panic.payload.cause.getMessage)
handle_nullpointer = Panic.catch NullPointerException handler=report_nullpointer
handle_classcast = Panic.catch ClassCastException handler=(Error.throw Vector.Incomparable_Values_Error)
handle_classcast <| handle_nullpointer <|
java_ranks = Rank.rank input.to_array Comparator.new java_method
Vector.Vector java_ranks

View File

@ -0,0 +1,18 @@
## Specifies how to handle ranking of equal values.
type Rank_Method
## Use the mean of all ranks for equal values.
type Average
## Use the lowest of all ranks for equal values.
type Minimum
## Use the highest of all ranks for equal values.
type Maximum
## Use same rank value for equal values and next group is the immediate
following ranking number.
type Dense
## Equal values are assigned the next rank in order that they occur.
type Ordinal

View File

@ -55,19 +55,22 @@ fill length ~item =
A vector allows to store an arbitrary number of elements in linear memory. It
is the recommended data structure for most applications.
Arguments:
- capacity: Initial capacity of the Vector.Builder
> Example
Construct a vector using a builder that contains the items 1 to 10.
example_new_builder =
builder = Vector.new_builder
builder = Vector.new_builder 10
do_build start stop =
builder.append start
if start >= stop then Nothing else
@Tail_Call do_build start+1 stop
do_build 1 10
builder.to_vector
new_builder : Builder
new_builder = Builder.new
new_builder : Integer -> Builder
new_builder (capacity=1) = Builder.new capacity
## ADVANCED
@ -141,13 +144,7 @@ type Vector
at : Integer -> Any ! Index_Out_Of_Bounds_Error
at index =
actual_index = if index < 0 then this.length + index else index
## TODO [RW] Ideally we do not want an additional check here, but we
should catch a Invalid_Array_Index_Error panic. However, such a catch
should still properly forward any other panics or dataflow errors
which is not fully possible until the approach to handling Panics is
improved, as described in the following Pivotal ticket:
https://www.pivotaltracker.com/n/projects/2539304/stories/181029230
if actual_index>=0 && actual_index<this.length then this.unsafe_at actual_index else
Panic.catch Invalid_Array_Index_Error (this.unsafe_at actual_index) _->
Error.throw (Index_Out_Of_Bounds_Error index this.length)
## ADVANCED
@ -1015,12 +1012,15 @@ type Builder
## Creates a new builder.
Arguments:
- capacity: Initial capacity of the Vector.Builder
> Example
Make a new builder
Vector.new_builder
new : Builder
new = Builder (Array.new 1) 0
new : Integer->Builder
new (capacity=1) = Builder (Array.new capacity) 0
## Returns the current capacity (i.e. the size of the underlying storage)
of this builder.
@ -1088,6 +1088,18 @@ type Builder
this.append item
Nothing
## Gets an element from the vector at a specified index (0-based).
Arguments:
- index: The location in the vector to get the element from. The index is
also allowed be negative, then the elements are indexed from the back
of the vector, i.e. -1 will correspond to the last element.
at : Integer -> Any ! Index_Out_Of_Bounds_Error
at index =
actual_index = if index < 0 then this.length + index else index
Panic.catch Invalid_Array_Index_Error (this.to_array.at actual_index) _->
Error.throw (Index_Out_Of_Bounds_Error index this.length)
## Checks whether a predicate holds for at least one element of this builder.
Arguments:

View File

@ -386,6 +386,23 @@ type Panic
True -> caught_panic.convert_to_dataflow_error
False -> Panic.throw caught_panic
## If a dataflow error had occurred, wrap it in a `Wrapped_Dataflow_Error` and promote to a Panic.
Arguments:
- value: value to return if not an error, or rethrow as a Panic.
throw_wrapped_if_error : Any -> Any
throw_wrapped_if_error ~value =
if value.is_error then Panic.throw (Wrapped_Dataflow_Error value.catch) else value
## Catch any `Wrapped_Dataflow_Error` Panic and rethrow it as a dataflow error.
Arguments:
- action: The code to execute that potentially raised a Wrapped_Dataflow_Error.
handle_wrapped_dataflow_error : Any -> Any
handle_wrapped_dataflow_error ~action =
Panic.catch Wrapped_Dataflow_Error action caught_panic->
Error.throw caught_panic.payload.payload
## The runtime representation of a syntax error.
Arguments:

View File

@ -50,8 +50,7 @@ type Vector_Builder
array = Array.new this.length
go ix elem = case elem of
Leaf vec ->
vec.map_with_index vi-> elem->
array.set_at ix+vi elem
Array.copy vec.to_array 0 array ix vec.length
ix + vec.length
Append l r _ ->
ix2 = go ix l

View File

@ -0,0 +1,96 @@
package org.enso.base.statistics;
/** Class to compute covariance and correlations between series. */
public class CorrelationStatistics {
private long count = 0;
private double totalX = 0.0;
private double totalXX = 0.0;
private double totalY = 0.0;
private double totalYY = 0.0;
private double totalXY = 0.0;
private void append(Double x, Double y) {
if (x == null || x.isNaN() || y == null || y.isNaN()) {
return;
}
count++;
totalX += x;
totalXX += x * x;
totalY += y;
totalYY += y * y;
totalXY += x * y;
}
public double covariance() {
if (count < 2) {
return Double.NaN;
}
return (totalXY - totalX * totalY / count) / count;
}
public double pearsonCorrelation() {
if (count < 2) {
return Double.NaN;
}
double n_stdev_x = Math.sqrt(count * totalXX - totalX * totalX);
double n_stdev_y = Math.sqrt(count * totalYY - totalY * totalY);
return (count * totalXY - totalX * totalY) / (n_stdev_x * n_stdev_y);
}
public double rSquared() {
double correl = this.pearsonCorrelation();
return correl * correl;
}
/**
* Create the CorrelationStats between two series
*
* @param x Array of X values
* @param y Array of Y values
* @return CorrelationStats object for the 2 series.
*/
public static CorrelationStatistics compute(Double[] x, Double[] y) {
if (x.length != y.length) {
throw new IllegalArgumentException("Left and right lengths are not the same.");
}
CorrelationStatistics output = new CorrelationStatistics();
for (int i = 0; i < x.length; i++) {
output.append(x[i], y[i]);
}
return output;
}
public static CorrelationStatistics[][] computeMatrix(Double[][] data) {
int len = data[0].length;
CorrelationStatistics[][] output = new CorrelationStatistics[data.length][];
for (int i = 0; i < data.length; i++) {
if (data[i].length != len) {
throw new IllegalArgumentException("Data lengths are not consistent.");
}
output[i] = new CorrelationStatistics[data.length];
for (int j = 0; j < data.length; j++) {
if (j < i) {
output[i][j] = output[j][i];
} else {
output[i][j] = compute(data[i], data[j]);
}
}
}
return output;
}
public static double spearmanRankCorrelation(Double[] x, Double[] y) {
double[][] pairedRanks = Rank.pairedRanks(x, y, Rank.Method.AVERAGE);
CorrelationStatistics computation = new CorrelationStatistics();
for (int i = 0; i < pairedRanks[0].length; i++) {
computation.append(pairedRanks[0][i], pairedRanks[1][i]);
}
return computation.pearsonCorrelation();
}
}

View File

@ -0,0 +1,95 @@
package org.enso.base.statistics;
import java.util.*;
public class Rank {
private static final Comparator<Object> DOUBLE_COMPARATOR = (a, b) -> Double.compare((Double)a, (Double)b);
public enum Method {
AVERAGE,
MINIMUM,
MAXIMUM,
DENSE,
ORDINAL
}
private record ValueWithIndex(Object value, int index) {
}
public static double[] rank(Object[] input, Comparator<Object> comparator, Method method)
throws NullPointerException, ClassCastException
{
List<ValueWithIndex> tuples = new ArrayList<>(input.length);
for(int i = 0; i < input.length; i++) {
if (input[i] == null) {
throw new NullPointerException("Value is Nothing at index " + i);
}
tuples.add(new ValueWithIndex(input[i], i));
}
return computeRankFromTuples(tuples, comparator, method);
}
public static double[][] pairedRanks(Double[] x, Double[] y, Method method)
throws IllegalArgumentException, NullPointerException, ClassCastException
{
if (x.length != y.length) {
throw new IllegalArgumentException("Left and right lengths are not the same.");
}
List<ValueWithIndex> x_tuples = new ArrayList<>(x.length);
List<ValueWithIndex> y_tuples = new ArrayList<>(y.length);
for (int i = 0; i < x.length; i++) {
if (x[i] == null || Double.isNaN(x[i]) || y[i] == null || Double.isNaN(y[i])) {
continue;
}
x_tuples.add(new ValueWithIndex(x[i], x_tuples.size()));
y_tuples.add(new ValueWithIndex(y[i], y_tuples.size()));
}
return new double[][] {
computeRankFromTuples(x_tuples, DOUBLE_COMPARATOR, method),
computeRankFromTuples(y_tuples, DOUBLE_COMPARATOR, method)
};
}
private static double[] computeRankFromTuples(List<ValueWithIndex> tuples, Comparator<Object> comparator, Method method)
throws NullPointerException, ClassCastException
{
Comparator<ValueWithIndex> tupleComparator = (a, b) -> {
int c = comparator.compare(a.value, b.value);
return c == 0 ? Integer.compare(a.index, b.index) : -c;
};
tuples.sort(tupleComparator);
double[] output = new double[tuples.size()];
int index = 0;
int dense = 0;
while (index < tuples.size()) {
dense++;
int start = index;
// Find End of Equal Values
while (index < tuples.size() && comparator.compare(tuples.get(start).value, tuples.get(index).value) == 0) {
index++;
}
// Build Rank
for (int i = start; i < index; i++) {
double rank = switch (method) {
case MINIMUM -> start + 1;
case MAXIMUM -> index;
case DENSE -> dense;
case AVERAGE -> (start + 1 + index) / 2.0;
case ORDINAL -> i + 1;
};
output[tuples.get(i).index] = rank;
}
}
return output;
}
}

View File

@ -1,6 +1,7 @@
from Standard.Base import Nothing, Vector, Number, True, Illegal_Argument_Error, False
from Standard.Base import Nothing, Vector, Number, Decimal, True, Illegal_Argument_Error, False
import Standard.Base.Data.Statistics
import Standard.Base.Data.Statistics.Rank_Method
from Standard.Base.Data.Statistics import all
import Standard.Test
@ -17,18 +18,21 @@ type No_Ord number
# Tests
spec =
simple_set = [1, 2, 3, 4, 5]
number_set = [0.4, -18.56, -16.99, -16.43, -45.84, 13.44, -6.85, 9.68, -8.55, 10.87, 10.38, 33.85, -41.02, 1.87, -26.52, -13.87, -39.06, 25.92, -16.01, 42.01]
missing_set = number_set.map_with_index i->v->(if i % 5 == 4 then Nothing else v)
with_nans_set = number_set.map_with_index i->v->(if i % 5 == 4 then (if i % 10 == 9 then Number.nan else Nothing) else v)
text_set = ["A", "B", Nothing, "D"]
ord_set = [Ord 10, Ord 2, Nothing, Ord 9]
no_ord_set = [No_Ord 10, No_Ord 2, Nothing, No_Ord 9]
double_error = 0.000001
vector_compare values expected =
values.each_with_index i->v->
case v of
Decimal -> v.should_equal (expected.at i) epsilon=double_error
_ -> v.should_equal (expected.at i)
Test.group "Statistics" <|
simple_set = [1, 2, 3, 4, 5]
number_set = [0.4, -18.56, -16.99, -16.43, -45.84, 13.44, -6.85, 9.68, -8.55, 10.87, 10.38, 33.85, -41.02, 1.87, -26.52, -13.87, -39.06, 25.92, -16.01, 42.01]
missing_set = number_set.map_with_index i->v->(if i % 5 == 4 then Nothing else v)
with_nans_set = number_set.map_with_index i->v->(if i % 5 == 4 then (if i % 10 == 9 then Number.nan else Nothing) else v)
text_set = ["A", "B", Nothing, "D"]
Test.specify "should be able to count valid values" <|
simple_set.compute . should_equal 5
number_set.compute . should_equal 20
@ -111,8 +115,9 @@ spec =
stats = [Count, Minimum, Mean, Variance, Skew]
expected = [20, -45.84, -5.064, 582.0137832, 0.165086552]
values = number_set.compute_bulk stats
values.map_with_index i->v->((expected.at i - v).abs < double_error) . any v->(v == True) . should_equal True
vector_compare values expected
Test.group "Statistics - empty Vector " <|
Test.specify "should be able to count and sum on empty Vector" <|
[].compute . should_equal 0
[].compute Sum . should_equal 0
@ -127,6 +132,11 @@ spec =
[].compute Skew . is_nan . should_equal True
[].compute Kurtosis . is_nan . should_equal True
Test.group "Statistics - invalid input" <|
text_set = ["A", "B", Nothing, "D"]
ord_set = [Ord 10, Ord 2, Nothing, Ord 9]
no_ord_set = [No_Ord 10, No_Ord 2, Nothing, No_Ord 9]
Test.specify "should fail with Illegal_Argument_Error on number based statistics for text Vector" <|
text_set.compute Sum . should_fail_with Illegal_Argument_Error
text_set.compute Mean . should_fail_with Illegal_Argument_Error
@ -147,4 +157,95 @@ spec =
Test.specify "should fail with Incomparable_Values_Error on mixed Vectors" <|
[1, False].compute Minimum . should_fail_with Vector.Incomparable_Values_Error
Test.group "Rank Data" <|
Test.specify "can rank a Decimal data series" <|
values = [409.892906, 0.839952, 796.468572, 126.931298, -405.265005, -476.675817, 441.651325, 796.468572, 78.50094, 340.163324, 234.861926, 409.892906, 226.467105, 234.861926, 126.931298, 637.870512, -71.008044, -386.399663, -126.534337, -476.675817, 78.50094, -386.399663, 409.892906, 868.54485, 669.113037, 669.113037, 0.839952, 407.162613, -476.675817, 126.931298]
Statistics.rank_data values . should_equal [9, 21.5, 2.5, 17, 27, 29, 7, 2.5, 19.5, 12, 13.5, 9, 15, 13.5, 17, 6, 23, 25.5, 24, 29, 19.5, 25.5, 9, 1, 4.5, 4.5, 21.5, 11, 29, 17]
Statistics.rank_data values Rank_Method.Minimum . should_equal [8, 21, 2, 16, 27, 28, 7, 2, 19, 12, 13, 8, 15, 13, 16, 6, 23, 25, 24, 28, 19, 25, 8, 1, 4, 4, 21, 11, 28, 16]
Statistics.rank_data values Rank_Method.Maximum . should_equal [10, 22, 3, 18, 27, 30, 7, 3, 20, 12, 14, 10, 15, 14, 18, 6, 23, 26, 24, 30, 20, 26, 10, 1, 5, 5, 22, 11, 30, 18]
Statistics.rank_data values Rank_Method.Ordinal . should_equal [8, 21, 2, 16, 27, 28, 7, 3, 19, 12, 13, 9, 15, 14, 17, 6, 23, 25, 24, 29, 20, 26, 10, 1, 4, 5, 22, 11, 30, 18]
Statistics.rank_data values Rank_Method.Dense . should_equal [6, 13, 2, 11, 17, 18, 5, 2, 12, 8, 9, 6, 10, 9, 11, 4, 14, 16, 15, 18, 12, 16, 6, 1, 3, 3, 13, 7, 18, 11]
Test.specify "can rank an Integer data series" <|
values = [10, 1, 124, 10]
Statistics.rank_data values . should_equal [2.5, 4, 1, 2.5]
Test.specify "can rank a Number data series" <|
values = [10.0, 1, 12.4, 10]
Statistics.rank_data values . should_equal [2.5, 4, 1, 2.5]
Test.specify "can rank a Text data series" <|
values = ["G", "AA", "B", "G", "D"]
Statistics.rank_data values . should_equal [1.5, 5, 4, 1.5, 3]
Test.specify "should fail with Incomparable_Values_Error on custom type without compare_to" <|
values = [No_Ord 10, No_Ord 2, No_Ord 9]
Statistics.rank_data values . should_fail_with Vector.Incomparable_Values_Error
Test.specify "should fail with Incomparable_Values_Error on mixed Vectors" <|
Statistics.rank_data [1, "A"] . should_fail_with Vector.Incomparable_Values_Error
Test.specify "should fail with Illegal_Argument_Error on Vectors with Nothing" <|
Statistics.rank_data [1, Nothing, 4] . should_fail_with Illegal_Argument_Error
Test.group "Correlation Statistics" <|
series_a = [0.22345,0.258315,0.74663,Nothing,0.686843,0.692246,Nothing,0.401859,0.725442,Nothing,0.963527,0.520363,0.633053,0.397123,Nothing,0.458942,0.036499,0.368194,0.598939,0.296476,0.093746,0.609329]
series_b = [0.140743,Nothing,0.574639,0.251683,0.902023,0.08723,0.251813,0.1669,0.234405,Nothing,0.28774,0.471757,0.280681,0.925207,0.919041,0.626234,0.429497,0.358597,0.566118,0.333606,0.828172,0.887829]
series_c = [Nothing,0.769797,0.281678,0.462145,0.727132,0.327978,Nothing,0.648639,0.562636,Nothing,0.159836,0.367404,0.877087,0.365483,Nothing,0.931873,0.723546,0.558085,0.163396,0.940997,0.399685,0.617509]
series = [series_a, series_b, series_c]
Test.specify "can compute Covariance, Correlation and R Squared between a pair of series"
series_a.compute (Covariance series_b) . should_equal -0.0053554 epsilon=double_error
series_a.compute (Pearson series_b) . should_equal -0.08263943 epsilon=double_error
series_a.compute (Spearman series_b) . should_equal -0.09313725 epsilon=double_error
series_a.compute (R_Squared series_b) . should_equal 0.006829275 epsilon=double_error
Test.specify "can calculate a covariance matrix" <|
matrix = Statistics.covariance_matrix series
matrix.length . should_equal 3
vector_compare (matrix.at 0) [0.0571699, -0.0053554, -0.02378204]
vector_compare (matrix.at 1) [-0.0053554, 0.07707381, -0.00098274]
vector_compare (matrix.at 2) [-0.02378204, -0.00098274, 0.05837098]
Test.specify "can calculate a pearson correlation matrix" <|
matrix = Statistics.pearson_correlation series
matrix.length . should_equal 3
vector_compare (matrix.at 0) [1, -0.08263943, -0.40469045]
vector_compare (matrix.at 1) [-0.08263943, 1, -0.01537537]
vector_compare (matrix.at 2) [-0.40469045, -0.01537537, 1]
Test.specify "can calculate a spearman rank correlation matrix" <|
matrix = Statistics.spearman_correlation series
matrix.length . should_equal 3
vector_compare (matrix.at 0) [1, -0.09313725, -0.43382353]
vector_compare (matrix.at 1) [-0.09313725, 1, 0]
vector_compare (matrix.at 2) [-0.43382353, 0, 1]
Test.specify "should fail with Illegal_Argument_Error if different lengths" <|
data = [[1,2,3,4],[10,20,30]]
data.first.compute (Covariance data.second) . should_fail_with Illegal_Argument_Error
data.first.compute (Pearson data.second) . should_fail_with Illegal_Argument_Error
data.first.compute (Spearman data.second) . should_fail_with Illegal_Argument_Error
data.first.compute (R_Squared data.second) . should_fail_with Illegal_Argument_Error
Statistics.covariance_matrix data . should_fail_with Illegal_Argument_Error
Statistics.pearson_correlation data . should_fail_with Illegal_Argument_Error
Statistics.spearman_correlation data . should_fail_with Illegal_Argument_Error
Test.specify "should fail with Illegal_Argument_Error if not number based" <|
text = [["A","BC","CD"], ["0", "1", "2"], ["H", "I", "J"]]
text.first.compute (Covariance text.second) . should_fail_with Illegal_Argument_Error
text.first.compute (Pearson text.second) . should_fail_with Illegal_Argument_Error
text.first.compute (Spearman text.second) . should_fail_with Illegal_Argument_Error
text.first.compute (R_Squared text.second) . should_fail_with Illegal_Argument_Error
Statistics.covariance_matrix text . should_fail_with Illegal_Argument_Error
Statistics.pearson_correlation text . should_fail_with Illegal_Argument_Error
Statistics.spearman_correlation text . should_fail_with Illegal_Argument_Error
Test.group "Statistics - invalid input" <|
Test.specify "should fail with Illegal_Argument_Error on number based statistics for text Vector" <|
series = [["A", "B", Nothing, "D"], ["A", "B", Nothing, "D"]]
Statistics.covariance_matrix series . should_fail_with Illegal_Argument_Error
Statistics.pearson_correlation series . should_fail_with Illegal_Argument_Error
main = Test.Suite.run_main here.spec

View File

@ -34,6 +34,7 @@ import project.Data.Ref_Spec
import project.Data.Text_Spec
import project.Data.Time.Spec as Time_Spec
import project.Data.Vector_Spec
import project.Data.Statistics_Spec
import project.Data.Text.Regex_Spec
import project.Data.Text.Utils_Spec
import project.Data.Text.Default_Regex_Engine_Spec
@ -104,4 +105,5 @@ main = Test.Suite.run_main <|
Time_Spec.spec
Uri_Spec.spec
Vector_Spec.spec
Statistics_Spec.spec
Warnings_Spec.spec