Statistical functions (#1990)

This commit is contained in:
Marcin Kostrzewa 2021-09-06 14:48:09 +02:00 committed by GitHub
parent a81257b402
commit 4f4e472ddf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 57 additions and 0 deletions

View File

@ -4,6 +4,8 @@
- Added support for fetching tables from Google Spreadsheets.
([#1976](https://github.com/enso-org/enso/pull/1976)).
- Added support for certain statistical functions of table columns
([#1990](https://github.com/enso-org/enso/pull/1990)).
# Enso 0.2.28 (2021-09-02)

View File

@ -920,6 +920,41 @@ type Column
(Vector.Vector v).reduce (+) / v.length
this.java_column.aggregate 'mean' vec_mean True
## Computes the variance of the sample represented by this column.
Arguments:
- degrees_of_freedom_correction: a correction to account for the
missing degrees of freedom in the sample. The default value of `1`
computes a sample variance. Setting it to `0` will compute population
variance instead.
variance degrees_of_freedom_correction=1 =
mean = this.mean
shifted = this - mean
sq = shifted * shifted
sq.sum / (this.length - degrees_of_freedom_correction)
## Computes the standard deviation of the sample represented by this column.
Arguments:
- degrees_of_freedom_correction: a correction to account for the
missing degrees of freedom in the sample. The default value of `1`
computes a sample standard deviation. Setting it to `0` will compute
population standard deviation instead.
standard_deviation degrees_of_freedom_correction=1 =
this.variance degrees_of_freedom_correction . sqrt
## Computes the coefficient of determination of a given prediction column.
Arguments:
- predictions: the column predicting the values of this column.
r_squared predictions =
prediction_diff = this - predictions
ss_res = prediction_diff*prediction_diff . sum
ss_tot_lin = this - this.mean
ss_tot = ss_tot_lin*ss_tot_lin . sum
1 - ss_res / ss_tot
## UNSTABLE
Sorts the column according to the specified rules.

View File

@ -64,3 +64,23 @@ spec = Test.group "Columns" <|
col = Examples.decimal_column.set_index Examples.integer_column
col.index.to_vector . should_equal Examples.integer_column.to_vector
Test.specify "should allow computing variance and standard deviation" <|
const = Column.from_vector 'const' [1, 1, 1, 1, 1]
const.variance . should_equal 0
const.standard_deviation.should_equal 0
rand = Column.from_vector 'random' [10.0, 4.2, 6.8, 6.2, 7.2]
rand.variance . should_equal 4.372
rand.variance degrees_of_freedom_correction=0 . should_equal 3.4976
rand.standard_deviation . should_equal 2.090932806 epsilon=(10 ^ -6)
rand.standard_deviation degrees_of_freedom_correction=0 . should_equal 1.870187156 epsilon=(10 ^ -6)
Test.specify "should allow computing the R² score of a prediction" <|
sample = Column.from_vector 'sample' [1,2,3,4,5]
mean_pred = Column.from_vector 'mean' [3,3,3,3,3]
perfect_pred = Column.from_vector 'perfect' [1,2,3,4,5]
bad_pred = Column.from_vector 'bad' [5,4,3,2,1]
sample.r_squared mean_pred . should_equal 0
sample.r_squared perfect_pred . should_equal 1
sample.r_squared bad_pred . should_equal -3