Add doc, powersums and mean

Rename "whole" to "cumulative"
This commit is contained in:
Harendra Kumar 2022-03-24 22:19:42 +05:30
parent f95fcbd82f
commit 0eadd04ada
4 changed files with 135 additions and 21 deletions

View File

@ -89,9 +89,24 @@ main =
, benchWithFold numElements "sum (window size 1000)"
(Ring.slidingWindow 1000 Window.sum)
, benchWithFold numElements "sum (entire stream)"
(Window.whole Window.sum)
(Window.cumulative Window.sum)
, benchWithFold numElements "sum (Data.Fold)"
Fold.sum
, benchWithFold numElements "mean (window size 100)"
(Ring.slidingWindow 100 Window.mean)
, benchWithFold numElements "mean (window size 1000)"
(Ring.slidingWindow 1000 Window.mean)
, benchWithFold numElements "mean (entire stream)"
(Window.cumulative Window.mean)
, benchWithFold numElements "mean (Data.Fold)"
Fold.mean
, benchWithFold numElements "powerSum 2 (window size 100)"
(Ring.slidingWindow 100 (Window.powerSum 2))
, benchWithFold numElements "powerSum 2 (entire stream)"
(Window.cumulative (Window.powerSum 2))
]
, bgroup
"scan"
@ -111,5 +126,9 @@ main =
(Ring.slidingWindow 100 Window.sum)
, benchWithPostscan numElements "sum (window size 1000)"
(Ring.slidingWindow 1000 Window.sum)
, benchWithPostscan numElements "mean (window size 100)"
(Ring.slidingWindow 100 Window.mean)
, benchWithPostscan numElements "mean (window size 1000)"
(Ring.slidingWindow 1000 Window.mean)
]
]

View File

@ -1,13 +1,54 @@
-- |
-- Module : Streamly.Internal.Data.Fold.Window
-- Copyright : (c) 2020 Composewell Technologies
-- License : Apache-2.0
-- Maintainer : streamly@composewell.com
-- Stability : experimental
-- Portability : GHC
--
-- Simple incremental statistical measures over a stream of data. All
-- operations use numerically stable floating point arithmetic.
--
-- Measurements can be performed over the entire input stream or on a sliding
-- window of fixed or variable size. Where possible, measures are computed
-- online without buffering the input stream.
--
-- Currently there is no overflow detection.
--
-- For more advanced statistical measures see the @streamly-statistics@
-- package.
module Streamly.Internal.Data.Fold.Window
( lmap
, length
, whole
, sum
, sumInt
, minimum
, maximum
, range
)
(
-- * Incremental Folds
-- | Folds of type @Fold m (a, Maybe a) b@ are incremental sliding window
-- folds. An input of type @(a, Nothing)@ indicates that the input element
-- @a@ is being inserted in the window without ejecting an old value
-- increasing the window size by 1. An input of type @(a, Just a)@
-- indicates that the first element is being inserted in the window and the
-- second element is being removed from the window, the window size remains
-- the same. The window size can only increase and never decrease.
--
-- You can compute the statistics over the entire stream using sliding
-- window folds by keeping the second element of the input tuple as
-- @Nothing@.
--
lmap
, cumulative
-- ** Sums
, length
, sum
, sumInt
, powerSum
, powerSumFrac
-- ** Location
, minimum
, maximum
, range
, mean
)
where
import Data.Bifunctor(bimap)
@ -40,14 +81,14 @@ import qualified Streamly.Data.Fold as Fold
lmap :: (c -> a) -> Fold m (a, Maybe a) b -> Fold m (c, Maybe c) b
lmap f = Fold.lmap (bimap f (f <$>))
-- | Convert a rolling fold to a normal fold using the entire input stream as a
-- single window.
-- | Convert an incremental fold to a cumulative fold using the entire input
-- stream as a single window.
--
-- >>> whole f = Fold.lmap (\x -> (x, Nothing)) f
-- >>> cumulative f = Fold.lmap (\x -> (x, Nothing)) f
--
{-# INLINE whole #-}
whole :: Fold m (a, Maybe a) b -> Fold m a b
whole = Fold.lmap (, Nothing)
{-# INLINE cumulative #-}
cumulative :: Fold m (a, Maybe a) b -> Fold m a b
cumulative = Fold.lmap (, Nothing)
-------------------------------------------------------------------------------
-- Sum
@ -85,6 +126,12 @@ sumInt = Fold step initial extract
--
-- | Sum of all the elements in a rolling window:
--
-- \(S = \sum_{i=1}^n x_{i}\)
--
-- This is the first power sum.
--
-- >>> sum = powerSum 1
--
-- Uses Kahan-Babuska-Neumaier style summation for numerical stability of
-- floating precision arithmetic.
--
@ -127,6 +174,10 @@ sum = Fold step initial extract
-- | The number of elements in the rolling window.
--
-- This is the \(0\)th power sum.
--
-- >>> length = powerSum 0
--
{-# INLINE length #-}
length :: (Monad m, Num b) => Fold m (a, Maybe a) b
length = Fold.foldl' step 0
@ -136,6 +187,28 @@ length = Fold.foldl' step 0
step w (_, Nothing) = w + 1
step w _ = w
-- | Sum of the \(k\)th power of all the elements in a rolling window:
--
-- \(S_k = \sum_{i=1}^n x_{i}^k\)
--
-- >>> powerSum k = lmap (^ k) sum
--
-- /Space/: \(\mathcal{O}(1)\)
--
-- /Time/: \(\mathcal{O}(n)\)
{-# INLINE powerSum #-}
powerSum :: (Monad m, Num a) => Int -> Fold m (a, Maybe a) a
powerSum k = lmap (^ k) sum
-- | Like 'powerSum' but powers can be negative or fractional. This is slower
-- than 'powerSum' for positive intergal powers.
--
-- >>> powerSumFrac p = lmap (** p) sum
--
{-# INLINE powerSumFrac #-}
powerSumFrac :: (Monad m, Floating a) => a -> Fold m (a, Maybe a) a
powerSumFrac p = lmap (** p) sum
-------------------------------------------------------------------------------
-- Location
-------------------------------------------------------------------------------
@ -149,8 +222,8 @@ length = Fold.foldl' step 0
--
-- | The minimum element in a rolling window.
--
-- If you want to compute the minimum of the entire stream Fold.minimum from streamly
-- package would be much faster.
-- If you want to compute the minimum of the entire stream Fold.minimum from
-- streamly package would be much faster.
--
-- /Time/: \(\mathcal{O}(n*w)\) where \(w\) is the window size.
--
@ -199,8 +272,8 @@ minimum = Fold step initial extract
--
-- | The maximum element in a rolling window.
--
-- If you want to compute the maximum of the entire stream Fold.maximum from streamly
-- package would be much faster.
-- If you want to compute the maximum of the entire stream Fold.maximum from
-- streamly package would be much faster.
--
-- /Time/: \(\mathcal{O}(n*w)\) where \(w\) is the window size.
--
@ -246,6 +319,23 @@ maximum = Fold step initial extract
$ fromMaybe (0, error "max: Empty stream")
$ DQ.head q
-- | Arithmetic mean of elements in a sliding window:
--
-- \(\mu = \frac{\sum_{i=1}^n x_{i}}{n}\)
--
-- This is also known as the Simple Moving Average (SMA) when used in the
-- sliding window and Cumulative Moving Avergae (CMA) when used on the entire
-- stream.
--
-- >>> mean = Fold.teeWith (/) sum length
--
-- /Space/: \(\mathcal{O}(1)\)
--
-- /Time/: \(\mathcal{O}(n)\)
{-# INLINE mean #-}
mean :: forall m a. (Monad m, Fractional a) => Fold m (a, Maybe a) a
mean = Fold.teeWith (/) sum length
-- | The difference between the maximum and minimum elements of a rolling window.
--

View File

@ -244,7 +244,6 @@ library
, Streamly.Internal.Data.Fold.Step
, Streamly.Internal.Data.Refold.Type
, Streamly.Internal.Data.Fold.Type
, Streamly.Internal.Data.Fold.Window
, Streamly.Internal.Data.Stream.StreamD.Step
, Streamly.Internal.Data.Stream.StreamD.Type
, Streamly.Internal.Data.Stream.StreamDK.Type
@ -286,6 +285,7 @@ library
, Streamly.Internal.Data.Unfold.Enumeration
, Streamly.Internal.Data.Fold.Tee
, Streamly.Internal.Data.Fold
, Streamly.Internal.Data.Fold.Window
, Streamly.Internal.Data.Parser
, Streamly.Internal.Data.Pipe
, Streamly.Internal.Data.Stream.Serial

View File

@ -29,6 +29,7 @@ main = hspec $ do
$ c1 >= -1 * deviationLimit && c1 <= deviationLimit
describe "Sum" $ testFunc sum
describe "mean" $ testFunc mean
describe "Correctness" $ do
let winSize = 3
@ -59,3 +60,7 @@ main = hspec $ do
let scanInf = [1, 2, 3, 4, 5, 12] :: [Double]
scanWin = [1, 2, 3, 3, 3, 9] :: [Double]
testFunc testCase2 sum scanInf scanWin
describe "mean" $ do
let scanInf = [1, 1, 1, 1, 1, 2] :: [Double]
scanWin = [1, 1, 1, 1, 1, 3] :: [Double]
testFunc testCase2 mean scanInf scanWin