mirror of
https://github.com/enso-org/enso.git
synced 2024-12-23 00:52:09 +03:00
Statistical functions (#1990)
This commit is contained in:
parent
a81257b402
commit
4f4e472ddf
@ -4,6 +4,8 @@
|
||||
|
||||
- Added support for fetching tables from Google Spreadsheets.
|
||||
([#1976](https://github.com/enso-org/enso/pull/1976)).
|
||||
- Added support for certain statistical functions of table columns
|
||||
([#1990](https://github.com/enso-org/enso/pull/1990)).
|
||||
|
||||
# Enso 0.2.28 (2021-09-02)
|
||||
|
||||
|
@ -920,6 +920,41 @@ type Column
|
||||
(Vector.Vector v).reduce (+) / v.length
|
||||
this.java_column.aggregate 'mean' vec_mean True
|
||||
|
||||
## Computes the variance of the sample represented by this column.
|
||||
|
||||
Arguments:
|
||||
- degrees_of_freedom_correction: a correction to account for the
|
||||
missing degrees of freedom in the sample. The default value of `1`
|
||||
computes a sample variance. Setting it to `0` will compute population
|
||||
variance instead.
|
||||
variance degrees_of_freedom_correction=1 =
|
||||
mean = this.mean
|
||||
shifted = this - mean
|
||||
sq = shifted * shifted
|
||||
sq.sum / (this.length - degrees_of_freedom_correction)
|
||||
|
||||
## Computes the standard deviation of the sample represented by this column.
|
||||
|
||||
Arguments:
|
||||
- degrees_of_freedom_correction: a correction to account for the
|
||||
missing degrees of freedom in the sample. The default value of `1`
|
||||
computes a sample standard deviation. Setting it to `0` will compute
|
||||
population standard deviation instead.
|
||||
standard_deviation degrees_of_freedom_correction=1 =
|
||||
this.variance degrees_of_freedom_correction . sqrt
|
||||
|
||||
## Computes the coefficient of determination of a given prediction column.
|
||||
|
||||
Arguments:
|
||||
- predictions: the column predicting the values of this column.
|
||||
r_squared predictions =
|
||||
prediction_diff = this - predictions
|
||||
ss_res = prediction_diff*prediction_diff . sum
|
||||
ss_tot_lin = this - this.mean
|
||||
ss_tot = ss_tot_lin*ss_tot_lin . sum
|
||||
1 - ss_res / ss_tot
|
||||
|
||||
|
||||
## UNSTABLE
|
||||
|
||||
Sorts the column according to the specified rules.
|
||||
|
@ -64,3 +64,23 @@ spec = Test.group "Columns" <|
|
||||
col = Examples.decimal_column.set_index Examples.integer_column
|
||||
col.index.to_vector . should_equal Examples.integer_column.to_vector
|
||||
|
||||
Test.specify "should allow computing variance and standard deviation" <|
|
||||
const = Column.from_vector 'const' [1, 1, 1, 1, 1]
|
||||
const.variance . should_equal 0
|
||||
const.standard_deviation.should_equal 0
|
||||
|
||||
rand = Column.from_vector 'random' [10.0, 4.2, 6.8, 6.2, 7.2]
|
||||
rand.variance . should_equal 4.372
|
||||
rand.variance degrees_of_freedom_correction=0 . should_equal 3.4976
|
||||
rand.standard_deviation . should_equal 2.090932806 epsilon=(10 ^ -6)
|
||||
rand.standard_deviation degrees_of_freedom_correction=0 . should_equal 1.870187156 epsilon=(10 ^ -6)
|
||||
|
||||
Test.specify "should allow computing the R² score of a prediction" <|
|
||||
sample = Column.from_vector 'sample' [1,2,3,4,5]
|
||||
mean_pred = Column.from_vector 'mean' [3,3,3,3,3]
|
||||
perfect_pred = Column.from_vector 'perfect' [1,2,3,4,5]
|
||||
bad_pred = Column.from_vector 'bad' [5,4,3,2,1]
|
||||
|
||||
sample.r_squared mean_pred . should_equal 0
|
||||
sample.r_squared perfect_pred . should_equal 1
|
||||
sample.r_squared bad_pred . should_equal -3
|
||||
|
Loading…
Reference in New Issue
Block a user