Add support for benchmarking with tasty-bench

tasty-bench has fewer dependencies and is agile to keep up with new GHC versions. This change is especially motivated by support for GHC 9.0.1. gauge depends on foundation/basement which lagging much behind and seem to be unmaintained.
2024-09-11 08:25:40 +03:00 · 2021-04-19 21:23:37 +05:30 · 2021-04-19 21:23:37 +05:30 · b68baf3c51
commit b68baf3c51
parent 86b3a509b4
26 changed files with 296 additions and 185 deletions
--- a/.github/workflows/haskell.yml
+++ b/.github/workflows/haskell.yml
@ -41,16 +41,15 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        name: [8.10.4+stack+lts-17.8, 8.10.2+macOS, 8.8.3+inspection+fusion-plugin+Werror, 8.8.4+stack+lts-16.31, 8.6.5+fusion-plugin, 8.6.5+streamk, 8.4.4+debug, doctests]
+        name: [9.0.1, 8.10.4+stack+lts-17.8, 8.10.2+macOS, 8.8.3+inspection+fusion-plugin+Werror, 8.8.4+stack+lts-16.31, 8.6.5+fusion-plugin, 8.6.5+streamk, 8.4.4+debug, doctests]
-        cabal_version: ["3.2"]
+        cabal_version: ["3.4"]
        include:
-          #- name: 9.0.1+no-bench
+          - name: 9.0.1
-          #  ghc_version: 9.0.1
+            ghc_version: 9.0.1
-          #  build: cabal-v2
+            build: cabal-v2
-          #  cabal_build_options: "--allow-newer=hsc2hs"
+            cabal_build_options: "--allow-newer=hsc2hs"
-          #  cabal_build_targets: "streamly streamly-tests"
+            disable_sdist_build: "y"
-          #  disable_sdist_build: "y"
+            runner: ubuntu-latest
          #  runner: ubuntu-latest
          - name: 8.10.4+stack+lts-17.8
            build: stack
            resolver: lts-17.8
--- a/benchmark/Streamly/Benchmark/Data/Array.hs
+++ b/benchmark/Streamly/Benchmark/Data/Array.hs
@ -183,9 +183,7 @@ defStreamSize = defaultStreamSize
 #endif
 main :: IO ()
-main = do
+main = runWithCLIOpts defStreamSize allBenchmarks
    (value, cfg, benches) <- parseCLIOpts defStreamSize
    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
    where
--- a/benchmark/Streamly/Benchmark/Data/Array/Stream/Foreign.hs
+++ b/benchmark/Streamly/Benchmark/Data/Array/Stream/Foreign.hs
@ -192,14 +192,12 @@ moduleName = "Data.Array.Stream.Foreign"
 main :: IO ()
 main = do
    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
    env <- mkHandleBenchEnv
-    value `seq` runMode (mode cfg) cfg benches
+    runWithCLIOpts defaultStreamSize (allBenchmarks env)
        (allBenchmarks env)
    where
-    allBenchmarks env =
+    allBenchmarks env _ =
        [ bgroup (o_1_space_prefix moduleName)
          ( o_1_space_read_chunked env
          ++ o_1_space_copy_toChunks_group_ungroup env
--- a/benchmark/Streamly/Benchmark/Data/Fold.hs
+++ b/benchmark/Streamly/Benchmark/Data/Fold.hs
@ -319,9 +319,7 @@ o_n_heap_serial value =
 -------------------------------------------------------------------------------
 main :: IO ()
-main = do
+main = runWithCLIOpts defaultStreamSize allBenchmarks
    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
    where
--- a/benchmark/Streamly/Benchmark/Data/Parser.hs
+++ b/benchmark/Streamly/Benchmark/Data/Parser.hs
@ -395,16 +395,26 @@ o_n_heap_serial value =
 -- Driver
 -------------------------------------------------------------------------------
 getArray :: (Int -> [Benchmark]) -> IO [Array.Array Int]
 #ifndef MIN_VERSION_gauge
 getArray f = do
    (value, _) <- parseCLIOpts defaultStreamSize $ bgroup "All" (f 0)
    IP.toList $ IP.arraysOf 100 $ sourceUnfoldrM value 0
 #else
 getArray _ = do
    (value, _, _) <- parseCLIOpts defaultStreamSize
    IP.toList $ IP.arraysOf 100 $ sourceUnfoldrM value 0
 #endif
 main :: IO ()
 main = do
    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
    env <- mkHandleBenchEnv
-    arrays <- IP.toList $ IP.arraysOf 100 $ sourceUnfoldrM value 0
+    arrays <- getArray (allBenchmarks env [])
-    value `seq` runMode (mode cfg) cfg benches (allBenchmarks env value arrays)
+    runWithCLIOpts defaultStreamSize (allBenchmarks env arrays)
    where
-    allBenchmarks env value arrays =
+    allBenchmarks env arrays value =
        [ bgroup (o_1_space_prefix moduleName) (o_1_space_serial value)
        , bgroup
              (o_1_space_prefix moduleName ++ "/filesystem")
--- a/benchmark/Streamly/Benchmark/Data/Parser/ParserD.hs
+++ b/benchmark/Streamly/Benchmark/Data/Parser/ParserD.hs
@ -5,6 +5,7 @@
 -- License     : BSD-3-Clause
 -- Maintainer  : streamly@composewell.com
 {-# LANGUAGE CPP #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# OPTIONS_GHC -fspec-constr-recursive=4 #-}
@ -358,16 +359,25 @@ o_n_space_serial value =
 -- Driver
 -------------------------------------------------------------------------------
 getArray :: (Int -> [Benchmark]) -> IO [Array.Array Int]
 #ifndef MIN_VERSION_gauge
 getArray f = do
    (value, _) <- parseCLIOpts defaultStreamSize $ bgroup "All" (f 0)
    IP.toList $ IP.arraysOf 100 $ sourceUnfoldrM value 0
 #else
 getArray _ = do
    (value, _, _) <- parseCLIOpts defaultStreamSize
    IP.toList $ IP.arraysOf 100 $ sourceUnfoldrM value 0
 #endif
 main :: IO ()
 main = do
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
+    arrays <- getArray (allBenchmarks [])
-    arraysSmall <- IP.toList $ IP.arraysOf 100 $ sourceUnfoldrM value 0
+    runWithCLIOpts defaultStreamSize (allBenchmarks arrays)
    value `seq` runMode (mode cfg) cfg benches
        (allBenchmarks value arraysSmall)
    where
-    allBenchmarks value arraysSmall =
+    allBenchmarks arraysSmall value =
        [ bgroup (o_1_space_prefix moduleName) (o_1_space_serial value)
        , bgroup (o_1_space_prefix moduleName) (o_1_space_serial_spanning value)
        , bgroup (o_1_space_prefix moduleName) (o_1_space_serial_nested value)
--- a/benchmark/Streamly/Benchmark/Data/Parser/ParserK.hs
+++ b/benchmark/Streamly/Benchmark/Data/Parser/ParserK.hs
@ -153,9 +153,7 @@ o_n_heap_serial value =
 -------------------------------------------------------------------------------
 main :: IO ()
-main = do
+main = runWithCLIOpts defaultStreamSize allBenchmarks
    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
    where
--- a/benchmark/Streamly/Benchmark/Data/Unfold.hs
+++ b/benchmark/Streamly/Benchmark/Data/Unfold.hs
@ -799,13 +799,12 @@ o_1_space_copy_read_exceptions env =
 main :: IO ()
 main = do
    (size, cfg, benches) <- parseCLIOpts defaultStreamSize
    env <- mkHandleBenchEnv
-    size `seq` runMode (mode cfg) cfg benches (allBenchmarks size env)
+    runWithCLIOpts defaultStreamSize (allBenchmarks env)
    where
-    allBenchmarks size env =
+    allBenchmarks env size =
        [ bgroup (o_1_space_prefix moduleName)
            $ Prelude.concat
                  [ o_1_space_transformation_input size
--- a/benchmark/Streamly/Benchmark/FileSystem/Handle.hs
+++ b/benchmark/Streamly/Benchmark/FileSystem/Handle.hs
@ -43,13 +43,12 @@ moduleName = "FileSystem.Handle"
 main :: IO ()
 main = do
    (_, cfg, benches) <- parseCLIOpts defaultStreamSize
    env <- mkHandleBenchEnv
-    runMode (mode cfg) cfg benches (allBenchmarks env)
+    runWithCLIOpts defaultStreamSize (allBenchmarks env)
    where
-    allBenchmarks env =
+    allBenchmarks env _ =
        [ bgroup (o_1_space_prefix moduleName) $ Prelude.concat
            [ RO.allBenchmarks env
            , RW.allBenchmarks env
--- a/benchmark/Streamly/Benchmark/Prelude/Ahead.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/Ahead.hs
@ -117,9 +117,7 @@ o_n_space_outerProduct value =
 -------------------------------------------------------------------------------
 main :: IO ()
-main = do
+main = runWithCLIOpts defaultStreamSize allBenchmarks
    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
    where
--- a/benchmark/Streamly/Benchmark/Prelude/Async.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/Async.hs
@ -183,9 +183,8 @@ o_n_space_outerProduct value =
 -------------------------------------------------------------------------------
 main :: IO ()
-main = do
+main = runWithCLIOpts defaultStreamSize allBenchmarks
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
+
    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
    where
--- a/benchmark/Streamly/Benchmark/Prelude/Concurrent.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/Concurrent.hs
@ -1,4 +1,3 @@
 {-# LANGUAGE RankNTypes #-}
 -- |
 -- Module      : Main
 -- Copyright   : (c) 2018 Composewell Technologies
@ -6,6 +5,8 @@
 -- License     : BSD3
 -- Maintainer  : streamly@composewell.com
 {-# LANGUAGE RankNTypes #-}
 import Control.Concurrent
 import Control.Monad (when, replicateM)
 import Streamly.Prelude
@ -77,13 +78,7 @@ concatGroup buflen threads usec n =
 main :: IO ()
 main =
-  defaultMainWith (defaultConfig
+    defaultMain
    { timeLimit = Just 0
    , minSamples = Just 1
    , minDuration = 0
    , includeFirstIter = True
    , quickMode = True
    })
    [ -- bgroup "append/buf-1-threads-10k-0sec"  (appendGroup 1 10000 0)
    -- , bgroup "append/buf-100-threads-100k-0sec"  (appendGroup 100 100000 0)
--- a/benchmark/Streamly/Benchmark/Prelude/Parallel.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/Parallel.hs
@ -219,9 +219,7 @@ o_n_space_outerProduct value =
 -------------------------------------------------------------------------------
 main :: IO ()
-main = do
+main = runWithCLIOpts defaultStreamSize allBenchmarks
    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
    where
--- a/benchmark/Streamly/Benchmark/Prelude/Rate.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/Rate.hs
@ -1,4 +1,5 @@
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE CPP #-}
 -- |
 -- Module      : Main
@ -87,12 +88,12 @@ o_1_space_async value =
 o_1_space_ahead :: Int -> [Benchmark]
 o_1_space_ahead value =
    [ bgroup
-          "aheadly"
+        "aheadly"
-          [ benchIOSrc fromAhead "avgRate/1M" $ avgRate value 1000000
+        [ benchIOSrc fromAhead "avgRate/1M" $ avgRate value 1000000
-          , benchIOSrc fromAhead "minRate/1M" $ minRate value 1000000
+        , benchIOSrc fromAhead "minRate/1M" $ minRate value 1000000
-          , benchIOSrc fromAhead "maxRate/1M" $ maxRate value 1000000
+        , benchIOSrc fromAhead "maxRate/1M" $ maxRate value 1000000
-          , benchIOSrc fromAsync "constRate/1M" $ constRate value 1000000
+        , benchIOSrc fromAsync "constRate/1M" $ constRate value 1000000
-          ]
+        ]
    ]
 -------------------------------------------------------------------------------
@ -100,13 +101,10 @@ o_1_space_ahead value =
 -------------------------------------------------------------------------------
 main :: IO ()
-main = do
+main = runWithCLIOpts defaultStreamSize allBenchmarks
    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
    where
    allBenchmarks value =
        [ bgroup (o_1_space_prefix moduleName)
-              $ concat [o_1_space_async value, o_1_space_ahead value]
+        $ concat [o_1_space_async value, o_1_space_ahead value]]
        ]
--- a/benchmark/Streamly/Benchmark/Prelude/Serial.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/Serial.hs
@ -22,7 +22,6 @@ import qualified Serial.Transformation1 as Transformation1
 import qualified Serial.Transformation2 as Transformation2
 import qualified Serial.Transformation3 as Transformation3
 import Gauge hiding (env)
 import Streamly.Benchmark.Common
 moduleName :: String
@ -37,12 +36,12 @@ moduleName = "Prelude.Serial"
 --
 main :: IO ()
 main = do
    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
    env <- mkHandleBenchEnv
-    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value env)
+    runWithCLIOpts defaultStreamSize (allBenchmarks env)
    where
-    allBenchmarks size env = Prelude.concat
+    allBenchmarks env size = Prelude.concat
        [ Generation.benchmarks moduleName size
        , Elimination.benchmarks moduleName size
        , Exceptions.benchmarks moduleName env
--- a/benchmark/Streamly/Benchmark/Prelude/WAsync.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/WAsync.hs
@ -5,6 +5,8 @@
 -- License     : BSD3
 -- Maintainer  : streamly@composewell.com
 {-# LANGUAGE CPP #-}
 import Prelude hiding (mapM)
 import Streamly.Prelude (fromWAsync, fromSerial, wAsync, maxBuffer, maxThreads)
@ -167,10 +169,8 @@ o_n_space_outerProduct value =
 -------------------------------------------------------------------------------
 main :: IO ()
-main = do
+main = runWithCLIOpts defaultStreamSize allBenchmarks
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
+    
    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
    where
    allBenchmarks value =
--- a/benchmark/Streamly/Benchmark/Prelude/WSerial.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/WSerial.hs
@ -201,9 +201,8 @@ o_n_space_outerProduct value =
 -- passed using the --stream-size option.
 --
 main :: IO ()
-main = do
+main = runWithCLIOpts defaultStreamSize allBenchmarks
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
+
    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
    where
--- a/benchmark/Streamly/Benchmark/Prelude/ZipAsync.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/ZipAsync.hs
@ -6,6 +6,7 @@
 -- Maintainer  : streamly@composewell.com
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE CPP #-}
 import Streamly.Prelude (fromSerial)
 import qualified Streamly.Prelude  as S
@ -69,9 +70,7 @@ o_1_space_outerProduct value =
 -------------------------------------------------------------------------------
 main :: IO ()
-main = do
+main = runWithCLIOpts defaultStreamSize allBenchmarks
    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
    where
--- a/benchmark/Streamly/Benchmark/Prelude/ZipSerial.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/ZipSerial.hs
@ -60,8 +60,8 @@ zipWith count n =
    S.drain $
    S.zipWith
        (,)
-        (S.fromSerial $ sourceUnfoldrM count n)
+        (S.fromSerial $ Main.sourceUnfoldrM count n)
-        (S.fromSerial $ sourceUnfoldrM count (n + 1))
+        (S.fromSerial $ Main.sourceUnfoldrM count (n + 1))
 #ifdef INSPECTION
 inspect $ hasNoTypeClasses 'zipWith
@ -75,8 +75,8 @@ zipWithM count n =
    S.drain $
    S.zipWithM
        (curry return)
-        (sourceUnfoldrM count n)
+        (Main.sourceUnfoldrM count n)
-        (sourceUnfoldrM count (n + 1))
+        (Main.sourceUnfoldrM count (n + 1))
 #ifdef INSPECTION
 inspect $ hasNoTypeClasses 'zipWithM
@ -125,9 +125,7 @@ o_1_space_outerProduct value =
 -- passed using the --stream-size option.
 --
 main :: IO ()
-main = do
+main = runWithCLIOpts defaultStreamSize allBenchmarks    
    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
    where
--- a/benchmark/Streamly/Benchmark/Unicode/Stream.hs
+++ b/benchmark/Streamly/Benchmark/Unicode/Stream.hs
@ -288,9 +288,8 @@ o_1_space_decode_encode_read env =
 main :: IO ()
 main = do
    (_, cfg, benches) <- parseCLIOpts defaultStreamSize
    env <- mkHandleBenchEnv
-    runMode (mode cfg) cfg benches (allBenchmarks env)
+    defaultMain (allBenchmarks env)
    where
--- a/benchmark/lib/Streamly/Benchmark/Common.hs
+++ b/benchmark/lib/Streamly/Benchmark/Common.hs
@ -16,7 +16,9 @@ module Streamly.Benchmark.Common
    , o_n_heap_prefix
    , o_n_stack_prefix
   -- , parseEnvOpts
    , parseCLIOpts
    , runWithCLIOpts
    , benchIOSink1
    , benchPure
@ -31,19 +33,34 @@ module Streamly.Benchmark.Common
    , mkListString
    , defaultStreamSize
    , BenchOpts(..)
 #ifndef MIN_VERSION_gauge
    , OptionDescription(..)
    , includingOptions
    , lookupOption
    , defaultMainWithIngredients
    , parseOptions
 #endif
    )
 where
-import Control.DeepSeq (NFData(..))
+#ifdef MIN_VERSION_gauge
 import Control.Exception (evaluate)
 import Control.Monad (when)
-import Data.Functor.Identity (Identity, runIdentity)
+import Text.Read (readMaybe)
 import Data.List (scanl')
 import Data.Maybe (mapMaybe)
 import System.Console.GetOpt
       (OptDescr(..), ArgDescr(..), ArgOrder(..), getOpt')
 import System.Environment (getArgs, lookupEnv, setEnv)
-import Text.Read (readMaybe)
+#else
 import Data.Proxy
 import Test.Tasty.Ingredients.Basic
 import Test.Tasty.Options
 import Test.Tasty.Runners
 #endif
 import Control.DeepSeq (NFData(..))
 import Data.Functor.Identity (Identity, runIdentity)
 import System.Random (randomRIO)
 import qualified Streamly.Prelude as S
@ -136,6 +153,7 @@ defaultStreamSize = 100000
 newtype BenchOpts = StreamSize Int deriving Show
 #ifdef MIN_VERSION_gauge
 getStreamSize :: String -> Int
 getStreamSize size =
    case (readMaybe size :: Maybe Int) of
@ -145,7 +163,7 @@ getStreamSize size =
 options :: [OptDescr BenchOpts]
 options =
    [
-      Option [] ["stream-size"] (ReqArg getSize "COUNT") "Stream element count"
+        System.Console.GetOpt.Option [] ["stream-size"] (ReqArg getSize "COUNT") "Stream element count"
    ]
    where
@ -169,7 +187,6 @@ deleteOptArgs (Just prev, _) opt =
 parseCLIOpts :: Int -> IO (Int, Config, [String])
 parseCLIOpts defStreamSize = do
    args <- getArgs
    -- Parse custom options
    let (opts, _, _, errs) = getOpt' Permute options args
    when (not $ null errs) $ error $ concat errs
@ -207,3 +224,33 @@ parseCLIOpts defStreamSize = do
                }
    let (cfg, benches) = parseWith config args'
    streamSize `seq` return (streamSize, cfg, benches)
 #else
 instance IsOption BenchOpts where
    defaultValue = StreamSize defaultStreamSize
    parseValue = fmap StreamSize . safeRead
    optionName = pure "stream-size"
    optionHelp = pure "StreamSize used in benchmarks"
 parseCLIOpts :: Int -> Benchmark -> IO (Int, [Ingredient])
 parseCLIOpts cDefSize benches = do
    let customOpts  = [Test.Tasty.Options.Option (Proxy :: Proxy BenchOpts)]
        ingredients = includingOptions customOpts : benchIngredients
    opts <- parseOptions ingredients benches
    let StreamSize size = lookupOption opts
    print $ "Stream-Size = " ++ show size
    if size == defaultStreamSize        -- LONG option is not set
    then return (cDefSize, ingredients) -- use custom defaut size of Benchmark
    else return (size, ingredients)     -- LONG option is set use large stream size
 #endif
 runWithCLIOpts :: Int -> (Int -> [Benchmark]) -> IO ()
 runWithCLIOpts cDefSize f = do
 #ifdef MIN_VERSION_gauge
    (value, cfg, benches) <- parseCLIOpts cDefSize
    value `seq` runMode (mode cfg) cfg benches (f value)
 #else
    (value, ingredients) <- parseCLIOpts cDefSize $ bgroup "All" (f 0)
    value `seq` defaultMainWithIngredients ingredients $ bgroup "All" (f value)
 #endif
--- a/benchmark/lib/Streamly/Benchmark/Common/Handle.hs
+++ b/benchmark/lib/Streamly/Benchmark/Common/Handle.hs
@ -86,6 +86,29 @@ withScaling env str =
       then str
       else str ++ " (1/" ++ show factor ++ ")"
 getHandles :: BenchEnv -> (RefHandles -> Handles) -> IO Handles
 getHandles env mkHandles = do
    r <- readIORef $ href env
    -- close old handles
    hClose $ smallInH r
    hClose $ bigInH r
    hClose $ outputH r
    -- reopen
    smallInHandle <- openFile (smallInFile env) ReadMode
    bigInHandle <- openFile (bigInFile env) ReadMode
    outHandle <- openFile outfile WriteMode
    let refHandles = RefHandles
            { smallInH = smallInHandle
            , bigInH = bigInHandle
            , outputH = outHandle
            }
    -- update
    writeIORef (href env) $ refHandles
    return $ mkHandles refHandles
 mkBenchCommon ::
       NFData b
    => (RefHandles -> Handles)
@ -94,30 +117,11 @@ mkBenchCommon ::
    -> (Handle -> Handle -> IO b)
    -> Benchmark
 mkBenchCommon mkHandles name env action =
-    bench name $ perRunEnv (do
+     bench name $ nfIO $ do
-            r <- readIORef $ href env
+        -- XXX adds significant cpu time to the benchmarks
-
+        -- tasty-bench should provide a better way to do this
-            -- close old handles
+        (Handles h1 h2) <- getHandles env mkHandles
-            hClose $ smallInH r
+        action h1 h2
            hClose $ bigInH r
            hClose $ outputH r
            -- reopen
            smallInHandle <- openFile (smallInFile env) ReadMode
            bigInHandle <- openFile (bigInFile env) ReadMode
            outHandle <- openFile outfile WriteMode
            let refHandles = RefHandles
                    { smallInH = smallInHandle
                    , bigInH = bigInHandle
                    , outputH = outHandle
                    }
            -- update
            writeIORef (href env) $ refHandles
            return $ mkHandles refHandles
        )
        (\(Handles h1 h2) -> action h1 h2)
 mkBench ::
    NFData b => String -> BenchEnv -> (Handle -> Handle -> IO b) -> Benchmark
--- a/benchmark/streamly-benchmarks.cabal
+++ b/benchmark/streamly-benchmarks.cabal
@ -54,6 +54,11 @@ flag opt
  manual: True
  default: True
 flag use-gauge
  description: Use gauge instead of tasty-bench for benchmarking
  manual: True
  default: False
 -------------------------------------------------------------------------------
 -- Common stanzas
 -------------------------------------------------------------------------------
@ -113,13 +118,22 @@ common bench-depends
    -- other libraries
    , streamly            >= 0.7.0
    , random              >= 1.0   && < 2.0
    , gauge               >= 0.2.4 && < 0.3
    , transformers        >= 0.4   && < 0.6
    , containers          >= 0.5   && < 0.7
    , typed-process     >= 0.2.3 && < 0.3
    , directory         >= 1.2.2 && < 1.4
    , ghc-prim          >= 0.4   && < 0.8
  if flag(use-gauge)    
    build-depends:  gauge >= 0.2.4 && < 0.3
  else    
    build-depends:    tasty-bench >= 0.2.5 && < 0.3
                    , tasty     >= 1.4.1
    mixins: tasty-bench
      (Test.Tasty.Bench as Gauge     
      , Test.Tasty.Bench as Gauge.Main
      )
  if flag(fusion-plugin) && !impl(ghcjs) && !impl(ghc < 8.6)
    build-depends:
        fusion-plugin     >= 0.2   && < 0.3
--- a/bin/bench-exec-one.sh
+++ b/bin/bench-exec-one.sh
@ -100,9 +100,15 @@ bench_rts_opts_specific () {
 # Speed options
 #------------------------------------------------------------------------------
-# Do not keep time limit as 0 otherwise GC stats may remain 0 in some cases.
+if test "$USE_GAUGE" -eq 0
-SUPER_QUICK_OPTIONS="--quick --min-duration 0 --time-limit 0.01 --include-first-iter"
+then
-QUICKER_OPTIONS="--min-samples 3 --time-limit 1"
+  SUPER_QUICK_OPTIONS="--stdev 1000000"
  QUICKER_OPTIONS="--stdev 1000"
 else
  # Do not keep time limit as 0 otherwise GC stats may remain 0 in some cases.
  SUPER_QUICK_OPTIONS="--quick --min-duration 0 --time-limit 0.01 --include-first-iter"
  QUICKER_OPTIONS="--min-samples 3 --time-limit 1"
 fi
 # For certain long benchmarks if the user has not requested super quick
 # mode we anyway use a slightly quicker mode.
@ -152,13 +158,38 @@ bench_quick_opts () {
  esac
 }
-last=""
+bench_output_file() {
    local bench_name=$1
    echo "charts/$bench_name/results.csv"
 }
 BENCH_NAME_ORIG=""
 for i in "$@"
 do
-    BENCH_NAME="$last"
+    BENCH_NAME_ORIG="$i"
    last="$i"
 done
 if test "$USE_GAUGE" -eq 0
 then
  # XXX this is a hack to make the "/" separated names used in the functions
  # determining options based on benchmark name. For tasty-bench the benchmark
  # names are separated by "." instead of "/".
  BENCH_NAME=$(echo $BENCH_NAME_ORIG | sed -e s/^All\.//)
  BENCH_NAME1=$(echo $BENCH_NAME | cut -f1 -d '/')
  BENCH_NAME2=$(echo $BENCH_NAME | cut -f2- -d '/' | sed -e 's/\./\//g')
  BENCH_NAME="$BENCH_NAME1/$BENCH_NAME2"
  JOB_OPT=" -j 1"
 else
  BENCH_NAME=$BENCH_NAME_ORIG
  JOB_OPT=""
 fi
 if test "$LONG" -eq 0
 then
  SIZE_OPT=""
 else
  SIZE_OPT="--stream-size 1000000"
 fi
 RTS_OPTIONS=\
 "+RTS -T \
 $(bench_exe_rts_opts $(basename $BENCH_EXEC_PATH)) \
@ -168,17 +199,27 @@ $RTS_OPTIONS \
 -RTS"
 QUICK_BENCH_OPTIONS="\
 $(if test "$QUICK_MODE" -ne 0; then echo $SUPER_QUICK_OPTIONS; else :; fi)
 $(bench_exe_quick_opts $(basename $BENCH_EXEC_PATH)) \
 $(bench_quick_opts $BENCH_NAME)"
-if test -n "$STREAM_SIZE"
+output_file=$(bench_output_file $(basename $BENCH_EXEC_PATH))
-then
+mkdir -p `dirname $output_file`
  STREAM_LEN=$(env LC_ALL=en_US.UTF-8 printf "\--stream-size %'.f\n" $STREAM_SIZE)
 fi
 echo "$BENCH_NAME: \
 $STREAM_LEN \
 $QUICK_BENCH_OPTIONS \
 $RTS_OPTIONS"
-$BENCH_EXEC_PATH $RTS_OPTIONS "$@" $QUICK_BENCH_OPTIONS
+rm -f ${output_file}.tmp
 if test $USE_GAUGE -eq 0
 then
  BENCH_NAME_ESC=$(echo "$BENCH_NAME_ORIG" | sed -e 's/\\/\\\\/g' | sed -e 's/"/\\"/g')
  $BENCH_EXEC_PATH $SIZE_OPT $JOB_OPT $RTS_OPTIONS $QUICK_BENCH_OPTIONS --csv=${output_file}.tmp \
    -p '$0 == "'"$BENCH_NAME_ESC"'"'
  tail -n +2 ${output_file}.tmp | \
    awk 'BEGIN {FPAT = "([^,]+)|(\"[^\"]+\")";OFS=","} {$2=$2/1000000000000;print}' >> $output_file
 else
  $BENCH_EXEC_PATH $SIZE_OPT $RTS_OPTIONS $QUICK_BENCH_OPTIONS --csvraw=${output_file}.tmp \
    -m exact "$BENCH_NAME"
  tail -n +2 ${output_file}.tmp >> $output_file
 fi
--- a/bin/bench.sh
+++ b/bin/bench.sh
@ -142,62 +142,71 @@ bench_output_file() {
    echo "charts/$bench_name/results.csv"
 }
-# --min-duration 0 means exactly one iteration per sample. We use a million
+run_bench_target () {
-# iterations in the benchmarking code explicitly and do not use the iterations
+  local package_name=$1
-# done by the benchmarking tool.
+  local component=$2
-#
+  local target_name=$3
-# Benchmarking tool by default discards the first iteration to remove
+  local output_file=$(bench_output_file $target_name)
 # aberrations due to initial evaluations etc. We do not discard it because we
 # are anyway doing iterations in the benchmarking code and many of them so that
 # any constant factor gets amortized and anyway it is a cost that we pay in
 # real life.
 #
 # We can pass --min-samples value from the command line as second argument
 # after the benchmark name in case we want to use more than one sample.
-# $1: bench name
+  local target_prog
-# $2: bench executable
+  target_prog=$(cabal_target_prog $package_name $component $target_name) || \
-target_exe_extra_args () {
+    die "Cannot find executable for target $target_name"
  local bench_name=$1
  local bench_prog=$2
-  local output_file=$(bench_output_file $bench_name)
+  echo "Running executable $target_name ..."
  mkdir -p `dirname $output_file`
-  local QUICK_OPTS="--quick --min-duration 0"
+  # Needed by bench-exec-one.sh
-  local SPEED_OPTIONS
+  export BENCH_EXEC_PATH=$target_prog
-  if test "$LONG" -eq 0
+  if test "$LONG" -ne 0
  then
-    if test "$SLOW" -eq 0
+      BENCH_ARGS="-p /$target_name\/o-1-space/"
-    then
+      STREAM_SIZE=10000000
-        if test "$QUICK_MODE" -eq 0
+      export STREAM_SIZE
        then
          # default mode, not super quick, not slow
          SPEED_OPTIONS="$QUICK_OPTS --min-samples 10 --time-limit 1"
        else
          # super quick but less accurate
          # When the time-limit is too low and the benchmark is tiny,
          # then if the number of iterations is very small the GC stats
          # may remain 0.  So keep the time-limit at a minimum of 10 ms
          # to collect significant stats. The problem was observed in
          # the Prelude.Serial/reverse' benchmark.
          SPEED_OPTIONS="$QUICK_OPTS --time-limit 0.01 --include-first-iter"
        fi
    else
      # Slow but more accurate mode
      SPEED_OPTIONS="--min-duration 0"
    fi
  else
      # large stream size, always super quick
      GAUGE_ARGS="$GAUGE_ARGS $bench_name/o-1-space"
      SPEED_OPTIONS="--stream-size 10000000 $QUICK_OPTS --include-first-iter"
  fi
-  echo "$SPEED_OPTIONS \
+  local MATCH=""
-    --csvraw=$output_file \
+  if test "$USE_GAUGE" -eq 0
-    -v 2 \
+  then
-    --measure-with "$SCRIPT_DIR/bench-exec-one.sh" \
+    if test "$LONG" -ne 0
-    $GAUGE_ARGS"
+    then
      MATCH="-p /$target_name\/o-1-space/"
    else
        if test -n "$GAUGE_ARGS"
        then
          local GAUGE_ARGS1=$(echo "$GAUGE_ARGS" | sed -e 's/\//\\\//g')
          MATCH="-p /$GAUGE_ARGS1/"
        fi
    fi
    echo "Name,cpuTime,2*Stdev (ps),Allocated,bytesCopied" >> $output_file
    $target_prog -l $MATCH \
      | grep "^All" \
      | while read -r name; do bin/bench-exec-one.sh "$name"; done
  else
    if test "$LONG" -ne 0
    then
      MATCH="$target_name/o-1-space"
    else
      MATCH="$GAUGE_ARGS"
    fi
    echo "name,iters,time,cycles,cpuTime,utime,stime,maxrss,minflt,majflt,nvcsw,nivcsw,allocated,numGcs,bytesCopied,mutatorWallSeconds,mutatorCpuSeconds,gcWallSeconds,gcCpuSeconds" >> $output_file
    # XXX We may have to use "sort | awk" to keep only benchmark names with
    # shortest prefix e.g. "a/b/c" and "a/b", we should only keep "a/b"
    # otherwise benchmarks will run multiple times.
    $target_prog -l \
      | grep "^$target_name" \
      | grep "^$MATCH" \
      | sort | paste -sd "," - | awk 'BEGIN {FS=","} {t="XU987"; for(i=1;i<=NF;i++) if (substr($i,1,length(t)) != t) {print $i; t=$i}}' \
      | while read -r name; do bin/bench-exec-one.sh "$name"; done
  fi
 }
 # $1: package name
 # $2: component
 # $3: targets
 run_bench_targets() {
    for i in $3
    do
      run_bench_target $1 $2 $i
    done
 }
 run_benches_comparing() {
@ -217,14 +226,14 @@ run_benches_comparing() {
    git checkout "$BASE" || die "Checkout of base commit [$BASE] failed"
    $BUILD_BENCH || die "build failed"
-    run_targets streamly-benchmarks b "$bench_list" target_exe_extra_args
+    run_bench_targets streamly-benchmarks b "$bench_list" target_exe_extra_args
    echo "Checking out candidate commit [$CANDIDATE] for benchmarking"
    git checkout "$CANDIDATE" || \
        die "Checkout of candidate [$CANDIDATE] commit failed"
    $BUILD_BENCH || die "build failed"
-    run_targets streamly-benchmarks b "$bench_list" target_exe_extra_args
+    run_bench_targets streamly-benchmarks b "$bench_list" target_exe_extra_args
    # XXX reset back to the original commit
 }
@ -248,7 +257,7 @@ run_measurements() {
  if test "$COMPARE" = "0"
  then
-    run_targets streamly-benchmarks b "$bench_list" target_exe_extra_args
+    run_bench_targets streamly-benchmarks b "$bench_list" target_exe_extra_args
  else
    run_benches_comparing "$bench_list"
  fi
@ -274,6 +283,8 @@ run_reports() {
 cd $SCRIPT_DIR/..
 USE_GAUGE=0
 export USE_GAUGE
 USE_GIT_CABAL=1
 set_common_vars
@ -323,7 +334,7 @@ do
    --long) LONG=1; shift ;;
    --graphs) GRAPH=1; shift ;;
    --no-measure) MEASURE=0; shift ;;
-    --dev-build) RUNNING_DEVBUILD=1; shift ;;
+    --dev-build) RUNNING_DEVBUILD=1; shift ;;    
    --) shift; break ;;
    -*|--*) echo "Unknown flags: $*"; echo; print_help ;;
    *) break ;;
@ -409,6 +420,7 @@ then
  run_build "$BUILD_BENCH" streamly-benchmarks bench "$TARGETS"
  export QUICK_MODE
  export RTS_OPTIONS
  export LONG
  run_measurements "$TARGETS"
 fi
--- a/stack.yaml
+++ b/stack.yaml
@ -1,4 +1,4 @@
-resolver: lts-16.12
+resolver: lts-17.9
 packages:
 - '.'
 - './benchmark'
@ -8,6 +8,8 @@ extra-deps:
 - QuickCheck-2.14.1@sha256:01e46d7b0a8d3148288ec977625f62d5516ebb5031a50c63f0453863301b4a79,7736
 - random-1.2.0@sha256:4969b16ef23e9a57a415abcd39d982c81844031557b66349651b0fe3255b8986,6094
 - splitmix-0.1.0.1@sha256:22f9662e7e8b173421872241edd39350078a9ed4bb9e9f503948c5b483c79276,5253
 - tasty-bench-0.2.5@sha256:4f00fdafc3aba6e48366a3684c427618d98c0b1b130cd9cab35c9498796c9a65,1497
 - tasty-1.4.1@sha256:69e90e965543faf0fc2c8e486d6c1d8cf81fd108e2c4541234c41490f392f94f,2638
 #allow-newer: true
 flags: {}