Carp/core/Statistics.carp
2018-02-02 07:19:10 +01:00

181 lines
5.1 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

(use Int)
(use Double)
(defmodule Statistics
(deftype Summary [
sum Double,
min Double,
max Double,
mean Double,
median Double,
var Double,
stdev Double,
stdev-pct Double,
median-abs-dev Double,
median-abs-dev-pct Double,
quartiles (Array Double),
iqr Double
])
(defn sum [data]
(if (= 0 (Array.count data))
0.0
(let [total @(Array.nth data 0)]
(do
(for [i 1 (Array.count data)]
(set! total (+ total @(Array.nth data i))))
total))))
(defn mean [data]
(/ (Statistics.sum data) (from-int (Array.count data))))
(defn _pp [a mean]
(let [sum 0.0]
(do
(for [i 0 (Array.count a)]
(let [tmp (Double.- (Double.copy (Array.nth a i)) mean)]
(set! sum (Double.* tmp tmp))))
sum)))
(defn _xx [a mean]
(let [sum 0.0]
(do
(for [i 0 (Array.count a)]
(set! sum (Double.- (Double.copy (Array.nth a i)) mean)))
sum)))
(defn _ss [data]
(let [m (mean data)
tmp (_xx data m)]
(Double.- (_pp data m)
(Double./ (Double.* tmp tmp)
(Double.from-int (Array.count data))))))
(defn median [data]
(let [n (Array.count data)
sorted (Array.sort @data)]
(cond (= n 0) 0.0
(= (mod n 2) 1) @(Array.nth data (/ n 2))
(let [mid (/ n 2)] ; else
(/ (+ (the Double @(Array.nth data (dec mid)))
@(Array.nth data mid))
2.0)))))
(defn low-median [data]
(let [n (Array.count data)
sorted (Array.sort @data)]
(cond (= n 0) 0.0
(= (mod n 2) 1) @(Array.nth data (/ n 2))
@(Array.nth data (dec (/ n 2)))))) ; else
(defn high-median [data]
(let [n (Array.count data)
sorted (Array.sort @data)]
(if (= n 0)
0.0
@(Array.nth data (/ n 2)))))
(defn grouped-median [data interval]
(let [n (Array.count data)
sorted (Array.sort @data)]
(cond (= n 0) 0.0
(= n 1) @(Array.nth data 0)
(let [x @(Array.nth data (/ n 2)) ; else
l (- x (/ (from-int interval) 2.0))
cf (Array.index-of data x)
f (Array.element-count data x)]
(+ l (/ (* (from-int interval) (- (/ (from-int n) 2.0) (from-int cf)))
(from-int f)))))))
(defn variance [data]
(let [n (Array.count data)
ss (_ss data)]
(/ ss (from-int (dec n)))))
(defn pvariance [data]
(let [n (Array.count data)
ss (_ss data)]
(/ ss (from-int n))))
(defn stdev [data]
(Double.sqrt (variance data)))
(defn pstdev [data]
(Double.sqrt (pvariance data)))
(defn stdev-pct [data]
(* (/ (stdev data) (mean data)) 100.0))
(defn median-abs-dev [data]
(let [med (median data)
zero 0.0
abs-devs (Array.replicate (Array.count data) &zero)
n 1.4826] ; taken from Rust and R, because thats how its done apparently
(do
(for [i 0 (Array.count data)]
(Array.aset! &abs-devs i (abs (- med @(Array.nth data i)))))
(* (median &abs-devs) n))))
(defn median-abs-dev-pct [data]
(* (/ (median-abs-dev data) (median data)) 100.0))
(defn percentile-of-sorted [sorted pct]
(cond
(Int.= 0 (Array.count sorted)) -1.0 ; should abort here
(Double.< pct 0.0) -1.0 ; should abort here
(Double.> pct 100.0) -1.0 ; should abort here
(Int.= 1 (Array.count sorted)) @(Array.nth sorted 0)
(Double.= 100.0 pct) @(Array.nth sorted (Int.dec (Array.count sorted)))
(let [len (Int.dec (Array.count sorted))
rank (Double.* (Double./ pct 100.0) (Double.from-int len))
lrank (Double.floor rank)
d (Double.- rank lrank)
n (Double.to-int lrank)
lo @(Array.nth sorted n)
hi @(Array.nth sorted (Int.inc n))]
(Double.+ lo (Double.* d (Double.- hi lo))))))
(defn quartiles [data]
(let [tmp (Array.sort @data)
first 25.0
second 50.0
third 75.0
a (percentile-of-sorted &tmp first)
b (percentile-of-sorted &tmp second)
c (percentile-of-sorted &tmp third)]
[a b c]))
(defn iqr [data]
(let [s &(quartiles data)]
(the Double (- @(Array.nth s 2) @(Array.nth s 0)))))
(defn winsorize [samples pct]
(let [tmp &(Array.sort @samples)
lo (Statistics.percentile-of-sorted tmp pct)
hi (Statistics.percentile-of-sorted tmp (Double.- 100.0 pct))]
(do
(for [i 0 (Array.count tmp)]
(let [samp @(Array.nth tmp i)]
(cond
(> samp hi) (Array.aset! tmp i hi)
(< samp lo) (Array.aset! tmp i lo)
())))
(Array.copy tmp))))
(defn summary [samples]
(Summary.init
(Statistics.sum samples)
(Array.minimum samples)
(Array.maximum samples)
(mean samples)
(median samples)
(variance samples)
(stdev samples)
(stdev-pct samples)
(median-abs-dev samples)
(median-abs-dev-pct samples)
(quartiles samples)
(iqr samples)))
)