diff --git a/RELEASES.md b/RELEASES.md index 5c7a70522ab..652154ab709 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -4,6 +4,8 @@ - Added support for fetching tables from Google Spreadsheets. ([#1976](https://github.com/enso-org/enso/pull/1976)). +- Added support for certain statistical functions of table columns + ([#1990](https://github.com/enso-org/enso/pull/1990)). # Enso 0.2.28 (2021-09-02) diff --git a/distribution/lib/Standard/Table/0.2.29-SNAPSHOT/src/Data/Column.enso b/distribution/lib/Standard/Table/0.2.29-SNAPSHOT/src/Data/Column.enso index dfb2088d503..8846b4fecfd 100644 --- a/distribution/lib/Standard/Table/0.2.29-SNAPSHOT/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.2.29-SNAPSHOT/src/Data/Column.enso @@ -920,6 +920,41 @@ type Column (Vector.Vector v).reduce (+) / v.length this.java_column.aggregate 'mean' vec_mean True + ## Computes the variance of the sample represented by this column. + + Arguments: + - degrees_of_freedom_correction: a correction to account for the + missing degrees of freedom in the sample. The default value of `1` + computes a sample variance. Setting it to `0` will compute population + variance instead. + variance degrees_of_freedom_correction=1 = + mean = this.mean + shifted = this - mean + sq = shifted * shifted + sq.sum / (this.length - degrees_of_freedom_correction) + + ## Computes the standard deviation of the sample represented by this column. + + Arguments: + - degrees_of_freedom_correction: a correction to account for the + missing degrees of freedom in the sample. The default value of `1` + computes a sample standard deviation. Setting it to `0` will compute + population standard deviation instead. + standard_deviation degrees_of_freedom_correction=1 = + this.variance degrees_of_freedom_correction . sqrt + + ## Computes the coefficient of determination of a given prediction column. + + Arguments: + - predictions: the column predicting the values of this column. + r_squared predictions = + prediction_diff = this - predictions + ss_res = prediction_diff*prediction_diff . sum + ss_tot_lin = this - this.mean + ss_tot = ss_tot_lin*ss_tot_lin . sum + 1 - ss_res / ss_tot + + ## UNSTABLE Sorts the column according to the specified rules. diff --git a/test/Table_Tests/src/Column_Spec.enso b/test/Table_Tests/src/Column_Spec.enso index e679693231d..7ed3095a4db 100644 --- a/test/Table_Tests/src/Column_Spec.enso +++ b/test/Table_Tests/src/Column_Spec.enso @@ -64,3 +64,23 @@ spec = Test.group "Columns" <| col = Examples.decimal_column.set_index Examples.integer_column col.index.to_vector . should_equal Examples.integer_column.to_vector + Test.specify "should allow computing variance and standard deviation" <| + const = Column.from_vector 'const' [1, 1, 1, 1, 1] + const.variance . should_equal 0 + const.standard_deviation.should_equal 0 + + rand = Column.from_vector 'random' [10.0, 4.2, 6.8, 6.2, 7.2] + rand.variance . should_equal 4.372 + rand.variance degrees_of_freedom_correction=0 . should_equal 3.4976 + rand.standard_deviation . should_equal 2.090932806 epsilon=(10 ^ -6) + rand.standard_deviation degrees_of_freedom_correction=0 . should_equal 1.870187156 epsilon=(10 ^ -6) + + Test.specify "should allow computing the R² score of a prediction" <| + sample = Column.from_vector 'sample' [1,2,3,4,5] + mean_pred = Column.from_vector 'mean' [3,3,3,3,3] + perfect_pred = Column.from_vector 'perfect' [1,2,3,4,5] + bad_pred = Column.from_vector 'bad' [5,4,3,2,1] + + sample.r_squared mean_pred . should_equal 0 + sample.r_squared perfect_pred . should_equal 1 + sample.r_squared bad_pred . should_equal -3