Add support for benchmarking with tasty-bench

tasty-bench has fewer dependencies and is agile to keep up with new GHC versions. This change is especially motivated by support for GHC 9.0.1. gauge depends on foundation/basement which lagging much behind and seem to be unmaintained.
2024-08-15 11:20:22 +03:00 · 2021-04-19 21:23:37 +05:30 · 2021-04-19 21:23:37 +05:30 · b68baf3c51
commit b68baf3c51
parent 86b3a509b4
26 changed files with 296 additions and 185 deletions
--- a/.github/workflows/haskell.yml
+++ b/.github/workflows/haskell.yml
@ -41,16 +41,15 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        name: [8.10.4+stack+lts-17.8, 8.10.2+macOS, 8.8.3+inspection+fusion-plugin+Werror, 8.8.4+stack+lts-16.31, 8.6.5+fusion-plugin, 8.6.5+streamk, 8.4.4+debug, doctests]
-        cabal_version: ["3.2"]
+        name: [9.0.1, 8.10.4+stack+lts-17.8, 8.10.2+macOS, 8.8.3+inspection+fusion-plugin+Werror, 8.8.4+stack+lts-16.31, 8.6.5+fusion-plugin, 8.6.5+streamk, 8.4.4+debug, doctests]
+        cabal_version: ["3.4"]
        include:
-          #- name: 9.0.1+no-bench
-          #  ghc_version: 9.0.1
-          #  build: cabal-v2
-          #  cabal_build_options: "--allow-newer=hsc2hs"
-          #  cabal_build_targets: "streamly streamly-tests"
-          #  disable_sdist_build: "y"
-          #  runner: ubuntu-latest
+          - name: 9.0.1
+            ghc_version: 9.0.1
+            build: cabal-v2
+            cabal_build_options: "--allow-newer=hsc2hs"
+            disable_sdist_build: "y"
+            runner: ubuntu-latest
          - name: 8.10.4+stack+lts-17.8
            build: stack
            resolver: lts-17.8
--- a/benchmark/Streamly/Benchmark/Data/Array.hs
+++ b/benchmark/Streamly/Benchmark/Data/Array.hs
@ -183,9 +183,7 @@ defStreamSize = defaultStreamSize
 #endif

 main :: IO ()
-main = do
-    (value, cfg, benches) <- parseCLIOpts defStreamSize
-    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
+main = runWithCLIOpts defStreamSize allBenchmarks

    where

--- a/benchmark/Streamly/Benchmark/Data/Array/Stream/Foreign.hs
+++ b/benchmark/Streamly/Benchmark/Data/Array/Stream/Foreign.hs
@ -192,14 +192,12 @@ moduleName = "Data.Array.Stream.Foreign"

 main :: IO ()
 main = do
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
    env <- mkHandleBenchEnv
-    value `seq` runMode (mode cfg) cfg benches
-        (allBenchmarks env)
+    runWithCLIOpts defaultStreamSize (allBenchmarks env)

    where

-    allBenchmarks env =
+    allBenchmarks env _ =
        [ bgroup (o_1_space_prefix moduleName)
          ( o_1_space_read_chunked env
          ++ o_1_space_copy_toChunks_group_ungroup env
--- a/benchmark/Streamly/Benchmark/Data/Fold.hs
+++ b/benchmark/Streamly/Benchmark/Data/Fold.hs
@ -319,9 +319,7 @@ o_n_heap_serial value =
 -------------------------------------------------------------------------------

 main :: IO ()
-main = do
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
-    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
+main = runWithCLIOpts defaultStreamSize allBenchmarks

    where

--- a/benchmark/Streamly/Benchmark/Data/Parser.hs
+++ b/benchmark/Streamly/Benchmark/Data/Parser.hs
@ -395,16 +395,26 @@ o_n_heap_serial value =
 -- Driver
 -------------------------------------------------------------------------------

+getArray :: (Int -> [Benchmark]) -> IO [Array.Array Int]
+#ifndef MIN_VERSION_gauge
+getArray f = do
+    (value, _) <- parseCLIOpts defaultStreamSize $ bgroup "All" (f 0)
+    IP.toList $ IP.arraysOf 100 $ sourceUnfoldrM value 0
+#else
+getArray _ = do
+    (value, _, _) <- parseCLIOpts defaultStreamSize
+    IP.toList $ IP.arraysOf 100 $ sourceUnfoldrM value 0
+#endif
+
 main :: IO ()
 main = do
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
    env <- mkHandleBenchEnv
-    arrays <- IP.toList $ IP.arraysOf 100 $ sourceUnfoldrM value 0
-    value `seq` runMode (mode cfg) cfg benches (allBenchmarks env value arrays)
+    arrays <- getArray (allBenchmarks env [])
+    runWithCLIOpts defaultStreamSize (allBenchmarks env arrays)

    where

-    allBenchmarks env value arrays =
+    allBenchmarks env arrays value =
        [ bgroup (o_1_space_prefix moduleName) (o_1_space_serial value)
        , bgroup
              (o_1_space_prefix moduleName ++ "/filesystem")
--- a/benchmark/Streamly/Benchmark/Data/Parser/ParserD.hs
+++ b/benchmark/Streamly/Benchmark/Data/Parser/ParserD.hs
@ -5,6 +5,7 @@
 -- License     : BSD-3-Clause
 -- Maintainer  : streamly@composewell.com

+{-# LANGUAGE CPP #-}
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE ScopedTypeVariables #-}
 {-# OPTIONS_GHC -fspec-constr-recursive=4 #-}
@ -358,16 +359,25 @@ o_n_space_serial value =
 -- Driver
 -------------------------------------------------------------------------------

+getArray :: (Int -> [Benchmark]) -> IO [Array.Array Int]
+#ifndef MIN_VERSION_gauge
+getArray f = do
+    (value, _) <- parseCLIOpts defaultStreamSize $ bgroup "All" (f 0)
+    IP.toList $ IP.arraysOf 100 $ sourceUnfoldrM value 0
+#else
+getArray _ = do
+    (value, _, _) <- parseCLIOpts defaultStreamSize
+    IP.toList $ IP.arraysOf 100 $ sourceUnfoldrM value 0
+#endif
+
 main :: IO ()
 main = do
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
-    arraysSmall <- IP.toList $ IP.arraysOf 100 $ sourceUnfoldrM value 0
-    value `seq` runMode (mode cfg) cfg benches
-        (allBenchmarks value arraysSmall)
+    arrays <- getArray (allBenchmarks [])
+    runWithCLIOpts defaultStreamSize (allBenchmarks arrays)

    where

-    allBenchmarks value arraysSmall =
+    allBenchmarks arraysSmall value =
        [ bgroup (o_1_space_prefix moduleName) (o_1_space_serial value)
        , bgroup (o_1_space_prefix moduleName) (o_1_space_serial_spanning value)
        , bgroup (o_1_space_prefix moduleName) (o_1_space_serial_nested value)
--- a/benchmark/Streamly/Benchmark/Data/Parser/ParserK.hs
+++ b/benchmark/Streamly/Benchmark/Data/Parser/ParserK.hs
@ -153,9 +153,7 @@ o_n_heap_serial value =
 -------------------------------------------------------------------------------

 main :: IO ()
-main = do
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
-    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
+main = runWithCLIOpts defaultStreamSize allBenchmarks

    where

--- a/benchmark/Streamly/Benchmark/Data/Unfold.hs
+++ b/benchmark/Streamly/Benchmark/Data/Unfold.hs
@ -799,13 +799,12 @@ o_1_space_copy_read_exceptions env =

 main :: IO ()
 main = do
-    (size, cfg, benches) <- parseCLIOpts defaultStreamSize
    env <- mkHandleBenchEnv
-    size `seq` runMode (mode cfg) cfg benches (allBenchmarks size env)
+    runWithCLIOpts defaultStreamSize (allBenchmarks env)

    where

-    allBenchmarks size env =
+    allBenchmarks env size =
        [ bgroup (o_1_space_prefix moduleName)
            $ Prelude.concat
                  [ o_1_space_transformation_input size
--- a/benchmark/Streamly/Benchmark/FileSystem/Handle.hs
+++ b/benchmark/Streamly/Benchmark/FileSystem/Handle.hs
@ -43,13 +43,12 @@ moduleName = "FileSystem.Handle"

 main :: IO ()
 main = do
-    (_, cfg, benches) <- parseCLIOpts defaultStreamSize
    env <- mkHandleBenchEnv
-    runMode (mode cfg) cfg benches (allBenchmarks env)
+    runWithCLIOpts defaultStreamSize (allBenchmarks env)

    where

-    allBenchmarks env =
+    allBenchmarks env _ =
        [ bgroup (o_1_space_prefix moduleName) $ Prelude.concat
            [ RO.allBenchmarks env
            , RW.allBenchmarks env
--- a/benchmark/Streamly/Benchmark/Prelude/Ahead.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/Ahead.hs
@ -117,9 +117,7 @@ o_n_space_outerProduct value =
 -------------------------------------------------------------------------------

 main :: IO ()
-main = do
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
-    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
+main = runWithCLIOpts defaultStreamSize allBenchmarks

    where

--- a/benchmark/Streamly/Benchmark/Prelude/Async.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/Async.hs
@ -183,9 +183,8 @@ o_n_space_outerProduct value =
 -------------------------------------------------------------------------------

 main :: IO ()
-main = do
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
-    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
+main = runWithCLIOpts defaultStreamSize allBenchmarks
+

    where

--- a/benchmark/Streamly/Benchmark/Prelude/Concurrent.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/Concurrent.hs
@ -1,4 +1,3 @@
-{-# LANGUAGE RankNTypes #-}
 -- |
 -- Module      : Main
 -- Copyright   : (c) 2018 Composewell Technologies
@ -6,6 +5,8 @@
 -- License     : BSD3
 -- Maintainer  : streamly@composewell.com

+{-# LANGUAGE RankNTypes #-}
+
 import Control.Concurrent
 import Control.Monad (when, replicateM)
 import Streamly.Prelude
@ -77,13 +78,7 @@ concatGroup buflen threads usec n =

 main :: IO ()
 main =
-  defaultMainWith (defaultConfig
-    { timeLimit = Just 0
-    , minSamples = Just 1
-    , minDuration = 0
-    , includeFirstIter = True
-    , quickMode = True
-    })
+    defaultMain

    [ -- bgroup "append/buf-1-threads-10k-0sec"  (appendGroup 1 10000 0)
    -- , bgroup "append/buf-100-threads-100k-0sec"  (appendGroup 100 100000 0)
--- a/benchmark/Streamly/Benchmark/Prelude/Parallel.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/Parallel.hs
@ -219,9 +219,7 @@ o_n_space_outerProduct value =
 -------------------------------------------------------------------------------

 main :: IO ()
-main = do
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
-    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
+main = runWithCLIOpts defaultStreamSize allBenchmarks

    where

--- a/benchmark/Streamly/Benchmark/Prelude/Rate.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/Rate.hs
@ -1,4 +1,5 @@
 {-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE CPP #-}

 -- |
 -- Module      : Main
@ -87,12 +88,12 @@ o_1_space_async value =
 o_1_space_ahead :: Int -> [Benchmark]
 o_1_space_ahead value =
    [ bgroup
-          "aheadly"
-          [ benchIOSrc fromAhead "avgRate/1M" $ avgRate value 1000000
-          , benchIOSrc fromAhead "minRate/1M" $ minRate value 1000000
-          , benchIOSrc fromAhead "maxRate/1M" $ maxRate value 1000000
-          , benchIOSrc fromAsync "constRate/1M" $ constRate value 1000000
-          ]
+        "aheadly"
+        [ benchIOSrc fromAhead "avgRate/1M" $ avgRate value 1000000
+        , benchIOSrc fromAhead "minRate/1M" $ minRate value 1000000
+        , benchIOSrc fromAhead "maxRate/1M" $ maxRate value 1000000
+        , benchIOSrc fromAsync "constRate/1M" $ constRate value 1000000
+        ]
    ]

 -------------------------------------------------------------------------------
@ -100,13 +101,10 @@ o_1_space_ahead value =
 -------------------------------------------------------------------------------

 main :: IO ()
-main = do
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
-    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
+main = runWithCLIOpts defaultStreamSize allBenchmarks

    where

    allBenchmarks value =
        [ bgroup (o_1_space_prefix moduleName)
-              $ concat [o_1_space_async value, o_1_space_ahead value]
-        ]
+        $ concat [o_1_space_async value, o_1_space_ahead value]]
--- a/benchmark/Streamly/Benchmark/Prelude/Serial.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/Serial.hs
@ -22,7 +22,6 @@ import qualified Serial.Transformation1 as Transformation1
 import qualified Serial.Transformation2 as Transformation2
 import qualified Serial.Transformation3 as Transformation3

-import Gauge hiding (env)
 import Streamly.Benchmark.Common

 moduleName :: String
@ -37,12 +36,12 @@ moduleName = "Prelude.Serial"
 --
 main :: IO ()
 main = do
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
    env <- mkHandleBenchEnv
-    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value env)
+    runWithCLIOpts defaultStreamSize (allBenchmarks env)
+
    where

-    allBenchmarks size env = Prelude.concat
+    allBenchmarks env size = Prelude.concat
        [ Generation.benchmarks moduleName size
        , Elimination.benchmarks moduleName size
        , Exceptions.benchmarks moduleName env
--- a/benchmark/Streamly/Benchmark/Prelude/WAsync.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/WAsync.hs
@ -5,6 +5,8 @@
 -- License     : BSD3
 -- Maintainer  : streamly@composewell.com

+{-# LANGUAGE CPP #-}
+
 import Prelude hiding (mapM)

 import Streamly.Prelude (fromWAsync, fromSerial, wAsync, maxBuffer, maxThreads)
@ -167,10 +169,8 @@ o_n_space_outerProduct value =
 -------------------------------------------------------------------------------

 main :: IO ()
-main = do
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
-    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
-
+main = runWithCLIOpts defaultStreamSize allBenchmarks
+    
    where

    allBenchmarks value =
--- a/benchmark/Streamly/Benchmark/Prelude/WSerial.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/WSerial.hs
@ -201,9 +201,8 @@ o_n_space_outerProduct value =
 -- passed using the --stream-size option.
 --
 main :: IO ()
-main = do
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
-    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
+main = runWithCLIOpts defaultStreamSize allBenchmarks
+

    where

--- a/benchmark/Streamly/Benchmark/Prelude/ZipAsync.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/ZipAsync.hs
@ -6,6 +6,7 @@
 -- Maintainer  : streamly@composewell.com

 {-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE CPP #-}

 import Streamly.Prelude (fromSerial)
 import qualified Streamly.Prelude  as S
@ -69,9 +70,7 @@ o_1_space_outerProduct value =
 -------------------------------------------------------------------------------

 main :: IO ()
-main = do
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
-    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
+main = runWithCLIOpts defaultStreamSize allBenchmarks

    where

--- a/benchmark/Streamly/Benchmark/Prelude/ZipSerial.hs
+++ b/benchmark/Streamly/Benchmark/Prelude/ZipSerial.hs
@ -60,8 +60,8 @@ zipWith count n =
    S.drain $
    S.zipWith
        (,)
-        (S.fromSerial $ sourceUnfoldrM count n)
-        (S.fromSerial $ sourceUnfoldrM count (n + 1))
+        (S.fromSerial $ Main.sourceUnfoldrM count n)
+        (S.fromSerial $ Main.sourceUnfoldrM count (n + 1))

 #ifdef INSPECTION
 inspect $ hasNoTypeClasses 'zipWith
@ -75,8 +75,8 @@ zipWithM count n =
    S.drain $
    S.zipWithM
        (curry return)
-        (sourceUnfoldrM count n)
-        (sourceUnfoldrM count (n + 1))
+        (Main.sourceUnfoldrM count n)
+        (Main.sourceUnfoldrM count (n + 1))

 #ifdef INSPECTION
 inspect $ hasNoTypeClasses 'zipWithM
@ -125,9 +125,7 @@ o_1_space_outerProduct value =
 -- passed using the --stream-size option.
 --
 main :: IO ()
-main = do
-    (value, cfg, benches) <- parseCLIOpts defaultStreamSize
-    value `seq` runMode (mode cfg) cfg benches (allBenchmarks value)
+main = runWithCLIOpts defaultStreamSize allBenchmarks    

    where

--- a/benchmark/Streamly/Benchmark/Unicode/Stream.hs
+++ b/benchmark/Streamly/Benchmark/Unicode/Stream.hs
@ -288,9 +288,8 @@ o_1_space_decode_encode_read env =

 main :: IO ()
 main = do
-    (_, cfg, benches) <- parseCLIOpts defaultStreamSize
    env <- mkHandleBenchEnv
-    runMode (mode cfg) cfg benches (allBenchmarks env)
+    defaultMain (allBenchmarks env)

    where

--- a/benchmark/lib/Streamly/Benchmark/Common.hs
+++ b/benchmark/lib/Streamly/Benchmark/Common.hs
@ -16,7 +16,9 @@ module Streamly.Benchmark.Common
    , o_n_heap_prefix
    , o_n_stack_prefix

+   -- , parseEnvOpts
    , parseCLIOpts
+    , runWithCLIOpts

    , benchIOSink1
    , benchPure
@ -31,19 +33,34 @@ module Streamly.Benchmark.Common
    , mkListString

    , defaultStreamSize
+    , BenchOpts(..)
+#ifndef MIN_VERSION_gauge
+    , OptionDescription(..)
+    , includingOptions
+    , lookupOption
+    , defaultMainWithIngredients
+    , parseOptions
+#endif
    )
 where

-import Control.DeepSeq (NFData(..))
+#ifdef MIN_VERSION_gauge
 import Control.Exception (evaluate)
 import Control.Monad (when)
-import Data.Functor.Identity (Identity, runIdentity)
+import Text.Read (readMaybe)
 import Data.List (scanl')
 import Data.Maybe (mapMaybe)
 import System.Console.GetOpt
       (OptDescr(..), ArgDescr(..), ArgOrder(..), getOpt')
 import System.Environment (getArgs, lookupEnv, setEnv)
-import Text.Read (readMaybe)
+#else
+import Data.Proxy
+import Test.Tasty.Ingredients.Basic
+import Test.Tasty.Options
+import Test.Tasty.Runners
+#endif
+import Control.DeepSeq (NFData(..))
+import Data.Functor.Identity (Identity, runIdentity)
 import System.Random (randomRIO)

 import qualified Streamly.Prelude as S
@ -136,6 +153,7 @@ defaultStreamSize = 100000

 newtype BenchOpts = StreamSize Int deriving Show

+#ifdef MIN_VERSION_gauge
 getStreamSize :: String -> Int
 getStreamSize size =
    case (readMaybe size :: Maybe Int) of
@ -145,7 +163,7 @@ getStreamSize size =
 options :: [OptDescr BenchOpts]
 options =
    [
-      Option [] ["stream-size"] (ReqArg getSize "COUNT") "Stream element count"
+        System.Console.GetOpt.Option [] ["stream-size"] (ReqArg getSize "COUNT") "Stream element count"
    ]

    where
@ -169,7 +187,6 @@ deleteOptArgs (Just prev, _) opt =
 parseCLIOpts :: Int -> IO (Int, Config, [String])
 parseCLIOpts defStreamSize = do
    args <- getArgs
-
    -- Parse custom options
    let (opts, _, _, errs) = getOpt' Permute options args
    when (not $ null errs) $ error $ concat errs
@ -207,3 +224,33 @@ parseCLIOpts defStreamSize = do
                }
    let (cfg, benches) = parseWith config args'
    streamSize `seq` return (streamSize, cfg, benches)
+
+#else
+instance IsOption BenchOpts where
+    defaultValue = StreamSize defaultStreamSize
+    parseValue = fmap StreamSize . safeRead
+    optionName = pure "stream-size"
+    optionHelp = pure "StreamSize used in benchmarks"
+
+parseCLIOpts :: Int -> Benchmark -> IO (Int, [Ingredient])
+parseCLIOpts cDefSize benches = do
+    let customOpts  = [Test.Tasty.Options.Option (Proxy :: Proxy BenchOpts)]
+        ingredients = includingOptions customOpts : benchIngredients
+    opts <- parseOptions ingredients benches
+    let StreamSize size = lookupOption opts
+    print $ "Stream-Size = " ++ show size
+    if size == defaultStreamSize        -- LONG option is not set
+    then return (cDefSize, ingredients) -- use custom defaut size of Benchmark
+    else return (size, ingredients)     -- LONG option is set use large stream size
+#endif
+
+runWithCLIOpts :: Int -> (Int -> [Benchmark]) -> IO ()
+runWithCLIOpts cDefSize f = do
+
+#ifdef MIN_VERSION_gauge
+    (value, cfg, benches) <- parseCLIOpts cDefSize
+    value `seq` runMode (mode cfg) cfg benches (f value)
+#else
+    (value, ingredients) <- parseCLIOpts cDefSize $ bgroup "All" (f 0)
+    value `seq` defaultMainWithIngredients ingredients $ bgroup "All" (f value)
+#endif
--- a/benchmark/lib/Streamly/Benchmark/Common/Handle.hs
+++ b/benchmark/lib/Streamly/Benchmark/Common/Handle.hs
@ -86,6 +86,29 @@ withScaling env str =
       then str
       else str ++ " (1/" ++ show factor ++ ")"

+getHandles :: BenchEnv -> (RefHandles -> Handles) -> IO Handles
+getHandles env mkHandles = do
+    r <- readIORef $ href env
+    -- close old handles
+    hClose $ smallInH r
+    hClose $ bigInH r
+    hClose $ outputH r
+
+    -- reopen
+    smallInHandle <- openFile (smallInFile env) ReadMode
+    bigInHandle <- openFile (bigInFile env) ReadMode
+    outHandle <- openFile outfile WriteMode
+
+    let refHandles = RefHandles
+            { smallInH = smallInHandle
+            , bigInH = bigInHandle
+            , outputH = outHandle
+            }
+
+    -- update
+    writeIORef (href env) $ refHandles
+    return $ mkHandles refHandles
+
 mkBenchCommon ::
       NFData b
    => (RefHandles -> Handles)
@ -94,30 +117,11 @@ mkBenchCommon ::
    -> (Handle -> Handle -> IO b)
    -> Benchmark
 mkBenchCommon mkHandles name env action =
-    bench name $ perRunEnv (do
-            r <- readIORef $ href env
-
-            -- close old handles
-            hClose $ smallInH r
-            hClose $ bigInH r
-            hClose $ outputH r
-
-            -- reopen
-            smallInHandle <- openFile (smallInFile env) ReadMode
-            bigInHandle <- openFile (bigInFile env) ReadMode
-            outHandle <- openFile outfile WriteMode
-
-            let refHandles = RefHandles
-                    { smallInH = smallInHandle
-                    , bigInH = bigInHandle
-                    , outputH = outHandle
-                    }
-
-            -- update
-            writeIORef (href env) $ refHandles
-            return $ mkHandles refHandles
-        )
-        (\(Handles h1 h2) -> action h1 h2)
+     bench name $ nfIO $ do
+        -- XXX adds significant cpu time to the benchmarks
+        -- tasty-bench should provide a better way to do this
+        (Handles h1 h2) <- getHandles env mkHandles
+        action h1 h2

 mkBench ::
    NFData b => String -> BenchEnv -> (Handle -> Handle -> IO b) -> Benchmark
--- a/benchmark/streamly-benchmarks.cabal
+++ b/benchmark/streamly-benchmarks.cabal
@ -54,6 +54,11 @@ flag opt
  manual: True
  default: True

+flag use-gauge
+  description: Use gauge instead of tasty-bench for benchmarking
+  manual: True
+  default: False
+
 -------------------------------------------------------------------------------
 -- Common stanzas
 -------------------------------------------------------------------------------
@ -113,13 +118,22 @@ common bench-depends
    -- other libraries
    , streamly            >= 0.7.0
    , random              >= 1.0   && < 2.0
-    , gauge               >= 0.2.4 && < 0.3
    , transformers        >= 0.4   && < 0.6
    , containers          >= 0.5   && < 0.7
    , typed-process     >= 0.2.3 && < 0.3
    , directory         >= 1.2.2 && < 1.4
    , ghc-prim          >= 0.4   && < 0.8

+  if flag(use-gauge)    
+    build-depends:  gauge >= 0.2.4 && < 0.3
+  else    
+    build-depends:    tasty-bench >= 0.2.5 && < 0.3
+                    , tasty     >= 1.4.1
+    mixins: tasty-bench
+      (Test.Tasty.Bench as Gauge     
+      , Test.Tasty.Bench as Gauge.Main
+      )
+
  if flag(fusion-plugin) && !impl(ghcjs) && !impl(ghc < 8.6)
    build-depends:
        fusion-plugin     >= 0.2   && < 0.3
--- a/bin/bench-exec-one.sh
+++ b/bin/bench-exec-one.sh
@ -100,9 +100,15 @@ bench_rts_opts_specific () {
 # Speed options
 #------------------------------------------------------------------------------

-# Do not keep time limit as 0 otherwise GC stats may remain 0 in some cases.
-SUPER_QUICK_OPTIONS="--quick --min-duration 0 --time-limit 0.01 --include-first-iter"
-QUICKER_OPTIONS="--min-samples 3 --time-limit 1"
+if test "$USE_GAUGE" -eq 0
+then
+  SUPER_QUICK_OPTIONS="--stdev 1000000"
+  QUICKER_OPTIONS="--stdev 1000"
+else
+  # Do not keep time limit as 0 otherwise GC stats may remain 0 in some cases.
+  SUPER_QUICK_OPTIONS="--quick --min-duration 0 --time-limit 0.01 --include-first-iter"
+  QUICKER_OPTIONS="--min-samples 3 --time-limit 1"
+fi

 # For certain long benchmarks if the user has not requested super quick
 # mode we anyway use a slightly quicker mode.
@ -152,13 +158,38 @@ bench_quick_opts () {
  esac
 }

-last=""
+bench_output_file() {
+    local bench_name=$1
+    echo "charts/$bench_name/results.csv"
+}
+
+BENCH_NAME_ORIG=""
 for i in "$@"
 do
-    BENCH_NAME="$last"
-    last="$i"
+    BENCH_NAME_ORIG="$i"
 done

+if test "$USE_GAUGE" -eq 0
+then
+  # XXX this is a hack to make the "/" separated names used in the functions
+  # determining options based on benchmark name. For tasty-bench the benchmark
+  # names are separated by "." instead of "/".
+  BENCH_NAME=$(echo $BENCH_NAME_ORIG | sed -e s/^All\.//)
+  BENCH_NAME1=$(echo $BENCH_NAME | cut -f1 -d '/')
+  BENCH_NAME2=$(echo $BENCH_NAME | cut -f2- -d '/' | sed -e 's/\./\//g')
+  BENCH_NAME="$BENCH_NAME1/$BENCH_NAME2"
+  JOB_OPT=" -j 1"
+else
+  BENCH_NAME=$BENCH_NAME_ORIG
+  JOB_OPT=""
+fi
+if test "$LONG" -eq 0
+then
+  SIZE_OPT=""
+else
+  SIZE_OPT="--stream-size 1000000"
+fi
+
 RTS_OPTIONS=\
 "+RTS -T \
 $(bench_exe_rts_opts $(basename $BENCH_EXEC_PATH)) \
@ -168,17 +199,27 @@ $RTS_OPTIONS \
 -RTS"

 QUICK_BENCH_OPTIONS="\
+$(if test "$QUICK_MODE" -ne 0; then echo $SUPER_QUICK_OPTIONS; else :; fi)
 $(bench_exe_quick_opts $(basename $BENCH_EXEC_PATH)) \
 $(bench_quick_opts $BENCH_NAME)"

-if test -n "$STREAM_SIZE"
-then
-  STREAM_LEN=$(env LC_ALL=en_US.UTF-8 printf "\--stream-size %'.f\n" $STREAM_SIZE)
-fi
+output_file=$(bench_output_file $(basename $BENCH_EXEC_PATH))
+mkdir -p `dirname $output_file`

 echo "$BENCH_NAME: \
-$STREAM_LEN \
 $QUICK_BENCH_OPTIONS \
 $RTS_OPTIONS"

-$BENCH_EXEC_PATH $RTS_OPTIONS "$@" $QUICK_BENCH_OPTIONS
+rm -f ${output_file}.tmp
+if test $USE_GAUGE -eq 0
+then
+  BENCH_NAME_ESC=$(echo "$BENCH_NAME_ORIG" | sed -e 's/\\/\\\\/g' | sed -e 's/"/\\"/g')
+  $BENCH_EXEC_PATH $SIZE_OPT $JOB_OPT $RTS_OPTIONS $QUICK_BENCH_OPTIONS --csv=${output_file}.tmp \
+    -p '$0 == "'"$BENCH_NAME_ESC"'"'
+  tail -n +2 ${output_file}.tmp | \
+    awk 'BEGIN {FPAT = "([^,]+)|(\"[^\"]+\")";OFS=","} {$2=$2/1000000000000;print}' >> $output_file
+else
+  $BENCH_EXEC_PATH $SIZE_OPT $RTS_OPTIONS $QUICK_BENCH_OPTIONS --csvraw=${output_file}.tmp \
+    -m exact "$BENCH_NAME"
+  tail -n +2 ${output_file}.tmp >> $output_file
+fi
--- a/bin/bench.sh
+++ b/bin/bench.sh
@ -142,62 +142,71 @@ bench_output_file() {
    echo "charts/$bench_name/results.csv"
 }

-# --min-duration 0 means exactly one iteration per sample. We use a million
-# iterations in the benchmarking code explicitly and do not use the iterations
-# done by the benchmarking tool.
-#
-# Benchmarking tool by default discards the first iteration to remove
-# aberrations due to initial evaluations etc. We do not discard it because we
-# are anyway doing iterations in the benchmarking code and many of them so that
-# any constant factor gets amortized and anyway it is a cost that we pay in
-# real life.
-#
-# We can pass --min-samples value from the command line as second argument
-# after the benchmark name in case we want to use more than one sample.
+run_bench_target () {
+  local package_name=$1
+  local component=$2
+  local target_name=$3
+  local output_file=$(bench_output_file $target_name)

-# $1: bench name
-# $2: bench executable
-target_exe_extra_args () {
-  local bench_name=$1
-  local bench_prog=$2
+  local target_prog
+  target_prog=$(cabal_target_prog $package_name $component $target_name) || \
+    die "Cannot find executable for target $target_name"

-  local output_file=$(bench_output_file $bench_name)
-  mkdir -p `dirname $output_file`
+  echo "Running executable $target_name ..."

-  local QUICK_OPTS="--quick --min-duration 0"
-  local SPEED_OPTIONS
-  if test "$LONG" -eq 0
+  # Needed by bench-exec-one.sh
+  export BENCH_EXEC_PATH=$target_prog
+  if test "$LONG" -ne 0
  then
-    if test "$SLOW" -eq 0
-    then
-        if test "$QUICK_MODE" -eq 0
-        then
-          # default mode, not super quick, not slow
-          SPEED_OPTIONS="$QUICK_OPTS --min-samples 10 --time-limit 1"
-        else
-          # super quick but less accurate
-          # When the time-limit is too low and the benchmark is tiny,
-          # then if the number of iterations is very small the GC stats
-          # may remain 0.  So keep the time-limit at a minimum of 10 ms
-          # to collect significant stats. The problem was observed in
-          # the Prelude.Serial/reverse' benchmark.
-          SPEED_OPTIONS="$QUICK_OPTS --time-limit 0.01 --include-first-iter"
-        fi
-    else
-      # Slow but more accurate mode
-      SPEED_OPTIONS="--min-duration 0"
-    fi
-  else
-      # large stream size, always super quick
-      GAUGE_ARGS="$GAUGE_ARGS $bench_name/o-1-space"
-      SPEED_OPTIONS="--stream-size 10000000 $QUICK_OPTS --include-first-iter"
+      BENCH_ARGS="-p /$target_name\/o-1-space/"
+      STREAM_SIZE=10000000
+      export STREAM_SIZE
  fi

-  echo "$SPEED_OPTIONS \
-    --csvraw=$output_file \
-    -v 2 \
-    --measure-with "$SCRIPT_DIR/bench-exec-one.sh" \
-    $GAUGE_ARGS"
+  local MATCH=""
+  if test "$USE_GAUGE" -eq 0
+  then
+    if test "$LONG" -ne 0
+    then
+      MATCH="-p /$target_name\/o-1-space/"
+    else
+        if test -n "$GAUGE_ARGS"
+        then
+          local GAUGE_ARGS1=$(echo "$GAUGE_ARGS" | sed -e 's/\//\\\//g')
+          MATCH="-p /$GAUGE_ARGS1/"
+        fi
+    fi
+    echo "Name,cpuTime,2*Stdev (ps),Allocated,bytesCopied" >> $output_file
+    $target_prog -l $MATCH \
+      | grep "^All" \
+      | while read -r name; do bin/bench-exec-one.sh "$name"; done
+  else
+    if test "$LONG" -ne 0
+    then
+      MATCH="$target_name/o-1-space"
+    else
+      MATCH="$GAUGE_ARGS"
+    fi
+    echo "name,iters,time,cycles,cpuTime,utime,stime,maxrss,minflt,majflt,nvcsw,nivcsw,allocated,numGcs,bytesCopied,mutatorWallSeconds,mutatorCpuSeconds,gcWallSeconds,gcCpuSeconds" >> $output_file
+    # XXX We may have to use "sort | awk" to keep only benchmark names with
+    # shortest prefix e.g. "a/b/c" and "a/b", we should only keep "a/b"
+    # otherwise benchmarks will run multiple times.
+    $target_prog -l \
+      | grep "^$target_name" \
+      | grep "^$MATCH" \
+      | sort | paste -sd "," - | awk 'BEGIN {FS=","} {t="XU987"; for(i=1;i<=NF;i++) if (substr($i,1,length(t)) != t) {print $i; t=$i}}' \
+      | while read -r name; do bin/bench-exec-one.sh "$name"; done
+  fi
+}
+
+# $1: package name
+# $2: component
+# $3: targets
+run_bench_targets() {
+    for i in $3
+    do
+      run_bench_target $1 $2 $i
+    done
 }

 run_benches_comparing() {
@ -217,14 +226,14 @@ run_benches_comparing() {
    git checkout "$BASE" || die "Checkout of base commit [$BASE] failed"

    $BUILD_BENCH || die "build failed"
-    run_targets streamly-benchmarks b "$bench_list" target_exe_extra_args
+    run_bench_targets streamly-benchmarks b "$bench_list" target_exe_extra_args

    echo "Checking out candidate commit [$CANDIDATE] for benchmarking"
    git checkout "$CANDIDATE" || \
        die "Checkout of candidate [$CANDIDATE] commit failed"

    $BUILD_BENCH || die "build failed"
-    run_targets streamly-benchmarks b "$bench_list" target_exe_extra_args
+    run_bench_targets streamly-benchmarks b "$bench_list" target_exe_extra_args
    # XXX reset back to the original commit
 }

@ -248,7 +257,7 @@ run_measurements() {

  if test "$COMPARE" = "0"
  then
-    run_targets streamly-benchmarks b "$bench_list" target_exe_extra_args
+    run_bench_targets streamly-benchmarks b "$bench_list" target_exe_extra_args
  else
    run_benches_comparing "$bench_list"
  fi
@ -274,6 +283,8 @@ run_reports() {

 cd $SCRIPT_DIR/..

+USE_GAUGE=0
+export USE_GAUGE
 USE_GIT_CABAL=1
 set_common_vars

@ -323,7 +334,7 @@ do
    --long) LONG=1; shift ;;
    --graphs) GRAPH=1; shift ;;
    --no-measure) MEASURE=0; shift ;;
-    --dev-build) RUNNING_DEVBUILD=1; shift ;;
+    --dev-build) RUNNING_DEVBUILD=1; shift ;;    
    --) shift; break ;;
    -*|--*) echo "Unknown flags: $*"; echo; print_help ;;
    *) break ;;
@ -409,6 +420,7 @@ then
  run_build "$BUILD_BENCH" streamly-benchmarks bench "$TARGETS"
  export QUICK_MODE
  export RTS_OPTIONS
+  export LONG
  run_measurements "$TARGETS"
 fi

--- a/stack.yaml
+++ b/stack.yaml
@ -1,4 +1,4 @@
-resolver: lts-16.12
+resolver: lts-17.9
 packages:
 - '.'
 - './benchmark'
@ -8,6 +8,8 @@ extra-deps:
 - QuickCheck-2.14.1@sha256:01e46d7b0a8d3148288ec977625f62d5516ebb5031a50c63f0453863301b4a79,7736
 - random-1.2.0@sha256:4969b16ef23e9a57a415abcd39d982c81844031557b66349651b0fe3255b8986,6094
 - splitmix-0.1.0.1@sha256:22f9662e7e8b173421872241edd39350078a9ed4bb9e9f503948c5b483c79276,5253
+- tasty-bench-0.2.5@sha256:4f00fdafc3aba6e48366a3684c427618d98c0b1b130cd9cab35c9498796c9a65,1497
+- tasty-1.4.1@sha256:69e90e965543faf0fc2c8e486d6c1d8cf81fd108e2c4541234c41490f392f94f,2638

 #allow-newer: true
 flags: {}