Add iterative benchmarks for Functor/transformers

* Also factor out the existing iteration utilities and use the same everywhere
2024-11-12 22:25:41 +03:00 · 2020-06-22 20:06:43 +05:30 · 2020-06-22 20:06:43 +05:30 · b4cef11d19
commit b4cef11d19
parent 1a331cb402
3 changed files with 199 additions and 118 deletions
--- a/benchmark/Streamly/Benchmark/Prelude/Serial.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/Serial.hs
@ -25,7 +25,8 @@
 import Control.DeepSeq (NFData(..))
 import Control.Monad (when)
 import Control.Monad.IO.Class (MonadIO(..))
-import Control.Monad.State.Strict (StateT, get, put)
+import Control.Monad.State.Strict (StateT, get, put, MonadState)
+import qualified Control.Monad.State.Strict as State
 import Data.Functor.Identity (Identity, runIdentity)
 import Data.IORef (newIORef, modifyIORef')
 import GHC.Generics (Generic)
@ -824,7 +825,6 @@ o_1_space_mapping value =

        -- Mapping
        , benchIOSink value "map" (mapN serially 1)
-        , benchIOSink value "fmap" (fmapN serially 1)
        , bench "sequence" $ nfIO $ randomRIO (1, 1000) >>= \n ->
              sequence serially (sourceUnfoldrAction value n)
        , benchIOSink value "mapM" (mapM serially 1)
@ -844,7 +844,6 @@ o_1_space_mappingX4 :: Int -> [Benchmark]
 o_1_space_mappingX4 value =
    [ bgroup "mappingX4"
        [ benchIOSink value "map" (mapN serially 4)
-        , benchIOSink value "fmap" (fmapN serially 4)
        , benchIOSink value "mapM" (mapM serially 4)

        , benchIOSink value "scan" (scan 4)
@ -853,6 +852,88 @@ o_1_space_mappingX4 value =
        ]
    ]

+-------------------------------------------------------------------------------
+-- Iteration/looping utilities
+-------------------------------------------------------------------------------
+
+{-# INLINE iterateN #-}
+iterateN :: (Int -> a -> a) -> a -> Int -> a
+iterateN g initial count = f count initial
+
+    where
+
+    f (0 :: Int) x = x
+    f i x = f (i - 1) (g i x)
+
+-- Iterate a transformation over a singleton stream
+{-# INLINE iterateSingleton #-}
+iterateSingleton :: S.MonadAsync m
+    => (Int -> SerialT m Int -> SerialT m Int)
+    -> Int
+    -> Int
+    -> SerialT m Int
+iterateSingleton g count n = iterateN g (return n) count
+
+-- XXX need to check why this is slower than the explicit recursion above, even
+-- if the above code is written in a foldr like head recursive way. We also
+-- need to try this with foldlM' once #150 is fixed.
+-- However, it is perhaps best to keep the iteration benchmarks independent of
+-- foldrM and any related fusion issues.
+{-# INLINE _iterateSingleton #-}
+_iterateSingleton ::
+       S.MonadAsync m
+    => (Int -> SerialT m Int -> SerialT m Int)
+    -> Int
+    -> Int
+    -> SerialT m Int
+_iterateSingleton g value n = S.foldrM g (return n) $ sourceIntFromTo value n
+
+-- Apply transformation g count times on a stream of length len
+{-# INLINE iterateSource #-}
+iterateSource ::
+       S.MonadAsync m
+    => (SerialT m Int -> SerialT m Int)
+    -> Int
+    -> Int
+    -> Int
+    -> SerialT m Int
+iterateSource g count len n = f count (sourceUnfoldrM len n)
+
+    where
+
+    f (0 :: Int) stream = stream
+    f i stream = f (i - 1) (g stream)
+
+-------------------------------------------------------------------------------
+-- Functor
+-------------------------------------------------------------------------------
+
+o_1_space_functor :: Int -> [Benchmark]
+o_1_space_functor value =
+    [ bgroup "Functor"
+        [ benchIOSink value "fmap" (fmapN serially 1)
+        , benchIOSink value "fmap x 4" (fmapN serially 4)
+        ]
+    ]
+
+o_n_space_functor :: Int -> [Benchmark]
+o_n_space_functor value =
+    [ bgroup "Functor"
+        [ benchIO "(+) (n times) (baseline)" $ \i0 ->
+            iterateN (\i acc -> acc >>= \n -> return $ i + n) (return i0) value
+        , benchIOSrc serially "(<$) (n times)" $
+            iterateSingleton (<$) value
+        , benchIOSrc serially "fmap (n times)" $
+            iterateSingleton (P.fmap . (+)) value
+        {-
+        , benchIOSrc serially "_(<$) (n times)" $
+            _iterateSingleton (<$) value
+        , benchIOSrc serially "_fmap (n times)" $
+            _iterateSingleton (P.fmap . (+)) value
+        -}
+        ]
+    ]
+
 -------------------------------------------------------------------------------
 -- Size reducing transformations (filtering)
 -------------------------------------------------------------------------------
@ -1202,61 +1283,41 @@ o_1_space_transformations_mixedX4 value =
 -- Iterating a transformation over and over again
 -------------------------------------------------------------------------------

-{-# INLINE iterStreamLen #-}
-iterStreamLen :: Int
-iterStreamLen = 10
-
-{-# INLINE maxIters #-}
-maxIters :: Int
-maxIters = 10000
-
-{-# INLINE iterateSource #-}
-iterateSource ::
-       S.MonadAsync m
-    => (SerialT m Int -> SerialT m Int)
-    -> Int
-    -> Int
-    -> SerialT m Int
-iterateSource g i n = f i (sourceUnfoldrM iterStreamLen n)
-  where
-    f (0 :: Int) m = g m
-    f x m = g (f (x P.- 1) m)
-
 -- this is quadratic
 {-# INLINE iterateScan #-}
-iterateScan :: S.MonadAsync m => Int -> SerialT m Int
-iterateScan = iterateSource (S.scanl' (+) 0) (maxIters `div` 10)
+iterateScan :: S.MonadAsync m => Int -> Int -> Int -> SerialT m Int
+iterateScan = iterateSource (S.scanl' (+) 0)

 -- this is quadratic
 {-# INLINE iterateScanl1 #-}
-iterateScanl1 :: S.MonadAsync m => Int -> SerialT m Int
-iterateScanl1 = iterateSource (S.scanl1' (+)) (maxIters `div` 10)
+iterateScanl1 :: S.MonadAsync m => Int -> Int -> Int -> SerialT m Int
+iterateScanl1 = iterateSource (S.scanl1' (+))

 {-# INLINE iterateMapM #-}
-iterateMapM :: S.MonadAsync m => Int -> SerialT m Int
-iterateMapM = iterateSource (S.mapM return) maxIters
+iterateMapM :: S.MonadAsync m => Int -> Int -> Int -> SerialT m Int
+iterateMapM = iterateSource (S.mapM return)

 {-# INLINE iterateFilterEven #-}
-iterateFilterEven :: S.MonadAsync m => Int -> SerialT m Int
-iterateFilterEven = iterateSource (S.filter even) maxIters
+iterateFilterEven :: S.MonadAsync m => Int -> Int -> Int -> SerialT m Int
+iterateFilterEven = iterateSource (S.filter even)

 {-# INLINE iterateTakeAll #-}
-iterateTakeAll :: S.MonadAsync m => Int -> Int -> SerialT m Int
-iterateTakeAll value = iterateSource (S.take (value + 1)) maxIters
+iterateTakeAll :: S.MonadAsync m => Int -> Int -> Int -> Int -> SerialT m Int
+iterateTakeAll value = iterateSource (S.take (value + 1))

 {-# INLINE iterateDropOne #-}
-iterateDropOne :: S.MonadAsync m => Int -> SerialT m Int
-iterateDropOne = iterateSource (S.drop 1) maxIters
+iterateDropOne :: S.MonadAsync m => Int -> Int -> Int -> SerialT m Int
+iterateDropOne = iterateSource (S.drop 1)

 {-# INLINE iterateDropWhileFalse #-}
-iterateDropWhileFalse :: S.MonadAsync m => Int -> Int -> SerialT m Int
-iterateDropWhileFalse value =
-    iterateSource (S.dropWhile (> (value + 1))) maxIters
+iterateDropWhileFalse :: S.MonadAsync m
+    => Int -> Int -> Int -> Int -> SerialT m Int
+iterateDropWhileFalse value = iterateSource (S.dropWhile (> (value + 1)))

 {-# INLINE iterateDropWhileTrue #-}
-iterateDropWhileTrue :: S.MonadAsync m => Int -> Int -> SerialT m Int
-iterateDropWhileTrue value =
-    iterateSource (S.dropWhile (<= (value + 1))) maxIters
+iterateDropWhileTrue :: S.MonadAsync m
+    => Int -> Int -> Int -> Int -> SerialT m Int
+iterateDropWhileTrue value = iterateSource (S.dropWhile (<= (value + 1)))

 {-# INLINE tail #-}
 tail :: Monad m => SerialT m a -> m ()
@ -1272,23 +1333,31 @@ nullHeadTail s = do

 -- Head recursive operations.
 o_n_stack_iterated :: Int -> [Benchmark]
-o_n_stack_iterated value =
+o_n_stack_iterated value = by10 `seq` by100 `seq`
    [ bgroup "iterated"
-        [ benchIOSrc serially "mapMx10K" iterateMapM
-        , benchIOSrc serially "scanx100" iterateScan
-        , benchIOSrc serially "scanl1x100" iterateScanl1
-        , benchIOSrc serially "filterEvenx10K" iterateFilterEven
-        , benchIOSrc serially "takeAllx10K" (iterateTakeAll value)
-        , benchIOSrc serially "dropOnex10K" iterateDropOne
-        , benchIOSrc serially "dropWhileFalsex10K"
-            (iterateDropWhileFalse value)
-        , benchIOSrc serially "dropWhileTruex10K"
-            (iterateDropWhileTrue value)
+        [ benchIOSrc serially "mapM (n/10 x 10)" $ iterateMapM by10 10
+        , benchIOSrc serially "scanl' (quadratic) (n/100 x 100)" $
+            iterateScan by100 100
+        , benchIOSrc serially "scanl1' (n/10 x 10)" $ iterateScanl1 by10 10
+        , benchIOSrc serially "filterEven (n/10 x 10)" $
+            iterateFilterEven by10 10
+        , benchIOSrc serially "takeAll (n/10 x 10)" $
+            iterateTakeAll value by10 10
+        , benchIOSrc serially "dropOne (n/10 x 10)" $ iterateDropOne by10 10
+        , benchIOSrc serially "dropWhileFalse (n/10 x 10)" $
+            iterateDropWhileFalse value by10 10
+        , benchIOSrc serially "dropWhileTrue (n/10 x 10)" $
+            iterateDropWhileTrue value by10 10
        , benchIOSink value "tail" tail
        , benchIOSink value "nullHeadTail" nullHeadTail
        ]
    ]

+    where
+
+    by10 = value `div` 10
+    by100 = value `div` 100
+
 -------------------------------------------------------------------------------
 -- Pipes
 -------------------------------------------------------------------------------
@ -1546,49 +1615,17 @@ o_1_space_applicative value =
        ]
    ]

-{-# INLINE ap_ #-}
-ap_ :: Int -> Int -> IO ()
-ap_ value n =
-      S.drain
-    $ S.foldrM
-        (\x xs -> pure x *> xs)
-        (return () :: SerialT IO ())
-    $ sourceUnfoldrMSerial value n
-
-{-# INLINE apRev_ #-}
-apRev_ :: Int -> Int -> IO ()
-apRev_ value n =
-      S.drain
-    $ S.foldrM
-        (\x xs -> xs <* pure x)
-        (return () :: SerialT IO ())
-    $ sourceUnfoldrMSerial value n
-
-{-# INLINE liftA2 #-}
-liftA2 :: Int -> Int -> IO ()
-liftA2 value n =
-      S.drain
-    $ S.foldrM
-        (\x xs -> AP.liftA2 (+) (pure x) xs)
-        (S.yield 0 :: SerialT IO Int)
-    $ sourceUnfoldrMSerial value n
-
-{-# INLINE ap #-}
-ap :: Int -> Int -> IO ()
-ap value n =
-      S.drain
-    $ S.foldrM
-        (\x xs -> (P.fmap (+) (pure x) <*> xs))
-        (S.yield 0 :: SerialT IO Int)
-    $ sourceUnfoldrMSerial value n
-
 o_n_space_applicative :: Int -> [Benchmark]
 o_n_space_applicative value =
-    [ bgroup "applicative"
-        [ benchIO "(*>)" (ap_ value)
-        , benchIO "(<*)" (apRev_ value)
-        , benchIO "(<*>)" (ap value)
-        , benchIO "liftA2" (liftA2 value)
+    [ bgroup "Applicative"
+        [ benchIOSrc serially "(*>) (n times)" $
+            iterateSingleton ((*>) . pure) value
+        , benchIOSrc serially "(<*) (n times)" $
+            iterateSingleton (\x xs -> xs <* pure x) value
+        , benchIOSrc serially "(<*>) (n times)" $
+            iterateSingleton (\x xs -> (P.fmap (+) (pure x) <*> xs)) value
+        , benchIOSrc serially "liftA2 (n times)" $
+            iterateSingleton (\x xs -> AP.liftA2 (+) (pure x) xs) value
        ]
    ]

@ -1598,7 +1635,7 @@ o_n_space_applicative value =

 o_1_space_monad :: Int -> [Benchmark]
 o_1_space_monad value =
-    [ bgroup "monad"
+    [ bgroup "Monad"
        [ benchIO "outer product (sqrt n x sqrt n)" $ toNullM value serially
        , benchIO "outer product (sqrt n x sqrt n) (filterAllOut)" $
            filterAllOutM value serially
@ -1613,30 +1650,13 @@ o_1_space_monad value =
        ]
    ]

-{-# INLINE bind_ #-}
-bind_ :: Int -> Int -> IO ()
-bind_ value n =
-      S.drain
-    $ S.foldrM
-        (\x xs -> pure x >> xs)
-        (return () :: SerialT IO ())
-    $ sourceUnfoldrMSerial value n
-
-
-{-# INLINE bind #-}
-bind :: Int -> Int -> IO ()
-bind value n =
-      S.drain
-    $ S.foldrM
-        (\x xs -> xs >>= \y -> return (x + y))
-        (S.yield 0 :: SerialT IO Int)
-    $ sourceUnfoldrMSerial value n
-
 o_n_space_monad :: Int -> [Benchmark]
 o_n_space_monad value =
-    [ bgroup "monad"
-        [ benchIO "(>>)" (bind_ value)
-        , benchIO "(>>=)" (bind value)
+    [ bgroup "Monad"
+        [ benchIOSrc serially "(>>) (n times)" $
+            iterateSingleton ((>>) . pure) value
+        , benchIOSrc serially "(>>=) (n times)" $
+            iterateSingleton (\x xs -> xs >>= \y -> return (x + y)) value
        , benchIO "outer product (sqrt n x sqrt n) (toList)" $
            toListM value serially
        , benchIO "outer product (sqrt n x sqrt n) (toListSome)" $
@ -1678,6 +1698,54 @@ o_1_space_hoisting value =
        ]
    ]

+{-# INLINE iterateStateIO #-}
+iterateStateIO ::
+       (S.MonadAsync m)
+    => Int
+    -> StateT Int m Int
+iterateStateIO n = do
+    x <- get
+    if x > n
+    then do
+        put (x - 1)
+        iterateStateIO n
+    else return x
+
+{-# INLINE iterateStateT #-}
+iterateStateT :: Int -> SerialT (StateT Int IO) Int
+iterateStateT n = do
+    x <- get
+    if x > n
+    then do
+        put (x - 1)
+        iterateStateT n
+    else return x
+
+{-# INLINE iterateState #-}
+iterateState ::
+       (S.MonadAsync m, MonadState Int m)
+    => Int
+    -> SerialT m Int
+iterateState n = do
+    x <- get
+    if x > n
+    then do
+        put (x - 1)
+        iterateState n
+    else return x
+
+o_n_space_transformer :: Int -> [Benchmark]
+o_n_space_transformer value =
+    [ bgroup "transformer"
+        [ benchIO "StateT Int IO (n times) (baseline)" $ \n ->
+            State.evalStateT (iterateStateIO n) value
+        , benchIO "SerialT (StateT Int IO) (n times)" $ \n ->
+            State.evalStateT (S.drain (iterateStateT n)) value
+        , benchIO "MonadState Int m => SerialT m Int" $ \n ->
+            State.evalStateT (S.drain (iterateState n)) value
+        ]
+    ]
+
 -------------------------------------------------------------------------------
 -- Main
 -------------------------------------------------------------------------------
@ -1703,6 +1771,7 @@ main = do
            , o_1_space_elimination_multi_stream size

            -- transformation
+            , o_1_space_functor size
            , o_1_space_mapping size
            , o_1_space_mappingX4 size
            , o_1_space_filtering size
@ -1747,7 +1816,9 @@ main = do
            , o_n_space_grouping size

            -- multi-stream
+            , o_n_space_functor size
            , o_n_space_applicative size
            , o_n_space_monad size
+            , o_n_space_transformer size
            ]
        ]
--- a/bin/bench-exec-one.sh
+++ b/bin/bench-exec-one.sh
@ -35,9 +35,11 @@ bench_rts_opts_specific () {
    Prelude.Parallel/o-n-heap/monad-outer-product/*) echo -n "-M256M" ;;
    Prelude.Parallel/o-n-space/monad-outer-product/*) echo -n "-K4M -M256M" ;;

+    Prelude.Serial/o-n-space/Functor/*) echo -n "-K4M -M256M" ;;
+    Prelude.Serial/o-n-space/Applicative/*) echo -n "-K8M -M256M" ;;
+    Prelude.Serial/o-n-space/Monad/*) echo -n "-K8M -M256M" ;;
+    Prelude.Serial/o-n-space/transformer/*) echo -n "-K8M -M256M" ;;
    Prelude.Serial/o-n-space/grouping/*) echo -n "" ;;
-    Prelude.Serial/o-n-space/applicative/*) echo -n "-K8M -M256M" ;;
-    Prelude.Serial/o-n-space/monad/*) echo -n "-K8M -M64M" ;;
    Prelude.Serial/o-n-space/*) echo -n "-K4M" ;;

    Prelude.WSerial/o-n-space/*) echo -n "-K4M" ;;
--- a/src/Streamly/Internal/Prelude.hs
+++ b/src/Streamly/Internal/Prelude.hs
@ -2119,6 +2119,10 @@ scanx :: (IsStream t, Monad m) => (x -> a -> x) -> x -> (x -> b) -> t m a -> t m
 scanx = P.scanlx'

 -- XXX this needs to be concurrent
+-- XXX because of the use of D.cons for appending, scanlM' has quadratic
+-- complexity when iterated over a stream. We should use StreamK style scanlM'
+-- for linear performance on iteration.
+--
 -- | Like 'scanl'' but with a monadic fold function.
 --
 -- @since 0.4.0
@ -2143,6 +2147,10 @@ scanlMAfter' :: (IsStream t, Monad m)
 scanlMAfter' step initial done stream =
    fromStreamD $ D.scanlMAfter' step initial done $ toStreamD stream

+-- XXX because of the use of D.cons for appending, scanl' has quadratic
+-- complexity when iterated over a stream. We should use StreamK style scanlM'
+-- for linear performance on iteration.
+
 -- | Strict left scan. Like 'map', 'scanl'' too is a one to one transformation,
 -- however it adds an extra element.
 --