Carp/core/Statistics.carp

181 lines
5.2 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

(use Int)
(use Double)
(defmodule Statistics
(deftype Summary [
sum Double,
min Double,
max Double,
mean Double,
median Double,
var Double,
stdev Double,
stdev-pct Double,
median-abs-dev Double,
median-abs-dev-pct Double,
quartiles (Array Double),
iqr Double
])
(defn sum [data]
(if (= 0 (Array.length data))
0.0
(let [total @(Array.nth data 0)]
(do
(for [i 1 (Array.length data)]
(set! total (+ total @(Array.nth data i))))
total))))
(defn mean [data]
(/ (Statistics.sum data) (from-int (Array.length data))))
(defn _pp [a mean]
(let [sum 0.0]
(do
(for [i 0 (Array.length a)]
(let [tmp (Double.- (Double.copy (Array.nth a i)) mean)]
(set! sum (Double.* tmp tmp))))
sum)))
(defn _xx [a mean]
(let [sum 0.0]
(do
(for [i 0 (Array.length a)]
(set! sum (Double.- (Double.copy (Array.nth a i)) mean)))
sum)))
(defn _ss [data]
(let [m (mean data)
tmp (_xx data m)]
(Double.- (_pp data m)
(Double./ (Double.* tmp tmp)
(Double.from-int (Array.length data))))))
(defn median [data]
(let [n (Array.length data)
sorted (Array.sort @data)]
(cond (= n 0) 0.0
(= (mod n 2) 1) @(Array.nth data (/ n 2))
(let [mid (/ n 2)] ; else
(/ (+ (the Double @(Array.nth data (dec mid)))
@(Array.nth data mid))
2.0)))))
(defn low-median [data]
(let [n (Array.length data)
sorted (Array.sort @data)]
(cond (= n 0) 0.0
(= (mod n 2) 1) @(Array.nth data (/ n 2))
@(Array.nth data (dec (/ n 2)))))) ; else
(defn high-median [data]
(let [n (Array.length data)
sorted (Array.sort @data)]
(if (= n 0)
0.0
@(Array.nth data (/ n 2)))))
(defn grouped-median [data interval]
(let [n (Array.length data)
sorted (Array.sort @data)]
(cond (= n 0) 0.0
(= n 1) @(Array.nth data 0)
(let [x @(Array.nth data (/ n 2)) ; else
l (- x (/ (from-int interval) 2.0))
cf (Array.index-of data x)
f (Array.element-count data &x)]
(+ l (/ (* (from-int interval) (- (/ (from-int n) 2.0) (from-int cf)))
(from-int f)))))))
(defn variance [data]
(let [n (Array.length data)
ss (_ss data)]
(/ ss (from-int (dec n)))))
(defn pvariance [data]
(let [n (Array.length data)
ss (_ss data)]
(/ ss (from-int n))))
(defn stdev [data]
(Double.sqrt (variance data)))
(defn pstdev [data]
(Double.sqrt (pvariance data)))
(defn stdev-pct [data]
(* (/ (stdev data) (mean data)) 100.0))
(defn median-abs-dev [data]
(let [med (median data)
zero 0.0
abs-devs (Array.replicate (Array.length data) &zero)
n 1.4826] ; taken from Rust and R, because thats how its done apparently
(do
(for [i 0 (Array.length data)]
(Array.aset! &abs-devs i (abs (- med @(Array.nth data i)))))
(* (median &abs-devs) n))))
(defn median-abs-dev-pct [data]
(* (/ (median-abs-dev data) (median data)) 100.0))
(defn percentile-of-sorted [sorted pct]
(cond
(Int.= 0 (Array.length sorted)) -1.0 ; should abort here
(Double.< pct 0.0) -1.0 ; should abort here
(Double.> pct 100.0) -1.0 ; should abort here
(Int.= 1 (Array.length sorted)) @(Array.nth sorted 0)
(Double.= 100.0 pct) @(Array.nth sorted (Int.dec (Array.length sorted)))
(let [len (Int.dec (Array.length sorted))
rank (Double.* (Double./ pct 100.0) (Double.from-int len))
lrank (Double.floor rank)
d (Double.- rank lrank)
n (Double.to-int lrank)
lo @(Array.nth sorted n)
hi @(Array.nth sorted (Int.inc n))]
(Double.+ lo (Double.* d (Double.- hi lo))))))
(defn quartiles [data]
(let [tmp (Array.sort @data)
first 25.0
second 50.0
third 75.0
a (percentile-of-sorted &tmp first)
b (percentile-of-sorted &tmp second)
c (percentile-of-sorted &tmp third)]
[a b c]))
(defn iqr [data]
(let [s &(quartiles data)]
(the Double (- @(Array.nth s 2) @(Array.nth s 0)))))
(defn winsorize [samples pct]
(let [tmp &(Array.sort @samples)
lo (Statistics.percentile-of-sorted tmp pct)
hi (Statistics.percentile-of-sorted tmp (Double.- 100.0 pct))]
(do
(for [i 0 (Array.length tmp)]
(let [samp @(Array.nth tmp i)]
(cond
(> samp hi) (Array.aset! tmp i hi)
(< samp lo) (Array.aset! tmp i lo)
())))
(Array.copy tmp))))
(defn summary [samples]
(Summary.init
(Statistics.sum samples)
(Array.minimum samples)
(Array.maximum samples)
(mean samples)
(median samples)
(variance samples)
(stdev samples)
(stdev-pct samples)
(median-abs-dev samples)
(median-abs-dev-pct samples)
(quartiles samples)
(iqr samples)))
)