diff --git a/benchmark/Streamly/Benchmark/Data/Array/Stream/Foreign.hs b/benchmark/Streamly/Benchmark/Data/Array/Stream/Foreign.hs new file mode 100644 index 00000000..33280d97 --- /dev/null +++ b/benchmark/Streamly/Benchmark/Data/Array/Stream/Foreign.hs @@ -0,0 +1,207 @@ +-- | +-- Module : Streamly.Benchmark.Data.ParserD +-- Copyright : (c) 2020 Composewell Technologies +-- +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com + +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} + +#ifdef __HADDOCK_VERSION__ +#undef INSPECTION +#endif + +#ifdef INSPECTION +{-# LANGUAGE TemplateHaskell #-} +{-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-} +#endif + +module Main + ( + main + ) where + +import Data.Functor.Identity (runIdentity) +import Data.Word (Word8) +import System.IO (Handle) +import Prelude hiding () + +import qualified Streamly.Prelude as Stream +import qualified Streamly.Internal.Data.Array.Foreign as Array +import qualified Streamly.Internal.Data.Array.Stream.Foreign as ArrayStream +import qualified Streamly.Internal.FileSystem.Handle as Handle +import qualified Streamly.Internal.Unicode.Stream as Unicode + +import Gauge hiding (env) +import Streamly.Benchmark.Common +import Streamly.Benchmark.Common.Handle + +#ifdef INSPECTION +import Foreign.Storable (Storable) +import Streamly.Internal.Data.Stream.StreamD.Type (Step(..)) +import Test.Inspection +#endif + +------------------------------------------------------------------------------- +-- read chunked using toChunks +------------------------------------------------------------------------------- + +-- | Get the last byte from a file bytestream. +toChunksLast :: Handle -> IO (Maybe Word8) +toChunksLast inh = do + let s = Handle.toChunks inh + larr <- Stream.last s + return $ case larr of + Nothing -> Nothing + Just arr -> Array.readIndex arr (Array.length arr - 1) + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'toChunksLast +inspect $ 'toChunksLast `hasNoType` ''Step +#endif + +-- | Count the number of bytes in a file. +toChunksSumLengths :: Handle -> IO Int +toChunksSumLengths inh = + let s = Handle.toChunks inh + in Stream.sum (Stream.map Array.length s) + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'toChunksSumLengths +inspect $ 'toChunksSumLengths `hasNoType` ''Step +#endif + +-- | Sum the bytes in a file. +toChunksCountBytes :: Handle -> IO Word8 +toChunksCountBytes inh = do + let foldlArr' f z = runIdentity . Stream.foldl' f z . Array.toStream + let s = Handle.toChunks inh + Stream.foldl' (\acc arr -> acc + foldlArr' (+) 0 arr) 0 s + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'toChunksCountBytes +inspect $ 'toChunksCountBytes `hasNoType` ''Step +#endif + +toChunksDecodeUtf8Arrays :: Handle -> IO () +toChunksDecodeUtf8Arrays = + Stream.drain . Unicode.decodeUtf8Arrays . Handle.toChunks + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'toChunksDecodeUtf8Arrays +-- inspect $ 'toChunksDecodeUtf8ArraysLenient `hasNoType` ''Step +#endif + +------------------------------------------------------------------------------- +-- Splitting +------------------------------------------------------------------------------- + +-- | Count the number of lines in a file. +toChunksSplitOnSuffix :: Handle -> IO Int +toChunksSplitOnSuffix = + Stream.length . ArrayStream.splitOnSuffix 10 . Handle.toChunks + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'toChunksSplitOnSuffix +inspect $ 'toChunksSplitOnSuffix `hasNoType` ''Step +#endif + +-- XXX use a word splitting combinator instead of splitOn and test it. +-- | Count the number of words in a file. +toChunksSplitOn :: Handle -> IO Int +toChunksSplitOn = Stream.length . ArrayStream.splitOn 32 . Handle.toChunks + +#ifdef INSPECTION +inspect $ hasNoTypeClasses 'toChunksSplitOn +inspect $ 'toChunksSplitOn `hasNoType` ''Step +#endif + +o_1_space_read_chunked :: BenchEnv -> [Benchmark] +o_1_space_read_chunked env = + -- read using toChunks instead of read + [ bgroup "reduce/toChunks" + [ mkBench "Stream.last" env $ \inH _ -> + toChunksLast inH + -- Note: this cannot be fairly compared with GNU wc -c or wc -m as + -- wc uses lseek to just determine the file size rather than reading + -- and counting characters. + , mkBench "Stream.sum . Stream.map Array.length" env $ \inH _ -> + toChunksSumLengths inH + , mkBench "splitOnSuffix" env $ \inH _ -> + toChunksSplitOnSuffix inH + , mkBench "splitOn" env $ \inH _ -> + toChunksSplitOn inH + , mkBench "countBytes" env $ \inH _ -> + toChunksCountBytes inH + , mkBenchSmall "decodeUtf8Arrays" env $ \inH _ -> + toChunksDecodeUtf8Arrays inH + ] + ] + +------------------------------------------------------------------------------- +-- copy with group/ungroup transformations +------------------------------------------------------------------------------- + +-- | Lines and unlines +{-# NOINLINE copyChunksSplitInterposeSuffix #-} +copyChunksSplitInterposeSuffix :: Handle -> Handle -> IO () +copyChunksSplitInterposeSuffix inh outh = + Stream.fold (Handle.write outh) + $ ArrayStream.interposeSuffix 10 + $ ArrayStream.splitOnSuffix 10 + $ Handle.toChunks inh + +#ifdef INSPECTION +inspect $ hasNoTypeClassesExcept 'copyChunksSplitInterposeSuffix [''Storable] +inspect $ 'copyChunksSplitInterposeSuffix `hasNoType` ''Step +#endif + +-- | Words and unwords +{-# NOINLINE copyChunksSplitInterpose #-} +copyChunksSplitInterpose :: Handle -> Handle -> IO () +copyChunksSplitInterpose inh outh = + Stream.fold (Handle.write outh) + $ ArrayStream.interpose 32 + -- XXX this is not correct word splitting combinator + $ ArrayStream.splitOn 32 + $ Handle.toChunks inh + +#ifdef INSPECTION +inspect $ hasNoTypeClassesExcept 'copyChunksSplitInterpose [''Storable] +inspect $ 'copyChunksSplitInterpose `hasNoType` ''Step +#endif + +o_1_space_copy_toChunks_group_ungroup :: BenchEnv -> [Benchmark] +o_1_space_copy_toChunks_group_ungroup env = + [ bgroup "copy/toChunks/group-ungroup" + [ mkBench "interposeSuffix . splitOnSuffix" env $ \inh outh -> + copyChunksSplitInterposeSuffix inh outh + , mkBenchSmall "interpose . splitOn" env $ \inh outh -> + copyChunksSplitInterpose inh outh + ] + ] + +------------------------------------------------------------------------------- +-- Driver +------------------------------------------------------------------------------- + +moduleName :: String +moduleName = "Data.Array.Stream.Foreign" + +main :: IO () +main = do + (value, cfg, benches) <- parseCLIOpts defaultStreamSize + env <- mkHandleBenchEnv + value `seq` runMode (mode cfg) cfg benches + (allBenchmarks env) + + where + + allBenchmarks env = + [ bgroup (o_1_space_prefix moduleName) + ( o_1_space_read_chunked env + ++ o_1_space_copy_toChunks_group_ungroup env + ) + ] diff --git a/benchmark/Streamly/Benchmark/Data/Parser/ParserD.hs b/benchmark/Streamly/Benchmark/Data/Parser/ParserD.hs index e5c3be56..f45fc26c 100644 --- a/benchmark/Streamly/Benchmark/Data/Parser/ParserD.hs +++ b/benchmark/Streamly/Benchmark/Data/Parser/ParserD.hs @@ -361,17 +361,18 @@ o_n_space_serial value = main :: IO () main = do (value, cfg, benches) <- parseCLIOpts defaultStreamSize - arrays <- IP.toList $ IP.arraysOf 100 $ sourceUnfoldrM value 0 - value `seq` runMode (mode cfg) cfg benches (allBenchmarks value arrays) + arraysSmall <- IP.toList $ IP.arraysOf 100 $ sourceUnfoldrM value 0 + value `seq` runMode (mode cfg) cfg benches + (allBenchmarks value arraysSmall) where - allBenchmarks value arrays = + allBenchmarks value arraysSmall = [ bgroup (o_1_space_prefix moduleName) (o_1_space_serial value) , bgroup (o_1_space_prefix moduleName) (o_1_space_serial_spanning value) , bgroup (o_1_space_prefix moduleName) (o_1_space_serial_nested value) , bgroup (o_1_space_prefix moduleName) - (o_1_space_serial_unfold value arrays) + (o_1_space_serial_unfold value arraysSmall) , bgroup (o_n_heap_prefix moduleName) (o_n_heap_serial value) , bgroup (o_n_space_prefix moduleName) (o_n_space_serial value) ] diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs index a8a353df..32855b74 100644 --- a/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs +++ b/benchmark/Streamly/Benchmark/FileSystem/Handle/Read.hs @@ -22,7 +22,6 @@ module Handle.Read (allBenchmarks) where -import Data.Functor.Identity (runIdentity) import Data.Word (Word8) import GHC.Magic (inline) #if __GLASGOW_HASKELL__ >= 802 @@ -38,7 +37,6 @@ import qualified Streamly.Internal.Data.Array.Foreign.Type as AT import qualified Streamly.Internal.Data.Fold as FL import qualified Streamly.Internal.Data.Stream.IsStream as IP import qualified Streamly.Internal.FileSystem.Handle as IFH -import qualified Streamly.Internal.Data.Array.Stream.Foreign as AS import qualified Streamly.Internal.Unicode.Stream as IUS import qualified Streamly.Prelude as S import qualified Streamly.Unicode.Stream as SS @@ -57,97 +55,6 @@ import qualified Streamly.Internal.Data.Unfold as IUF import Test.Inspection #endif -------------------------------------------------------------------------------- --- read chunked using toChunks -------------------------------------------------------------------------------- - --- | Get the last byte from a file bytestream. -toChunksLast :: Handle -> IO (Maybe Word8) -toChunksLast inh = do - let s = IFH.toChunks inh - larr <- S.last s - return $ case larr of - Nothing -> Nothing - Just arr -> A.readIndex arr (A.length arr - 1) - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'toChunksLast -inspect $ 'toChunksLast `hasNoType` ''Step -#endif - --- | Count the number of bytes in a file. -toChunksSumLengths :: Handle -> IO Int -toChunksSumLengths inh = - let s = IFH.toChunks inh - in S.sum (S.map A.length s) - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'toChunksSumLengths -inspect $ 'toChunksSumLengths `hasNoType` ''Step -#endif - --- | Count the number of lines in a file. -toChunksSplitOnSuffix :: Handle -> IO Int -toChunksSplitOnSuffix = S.length . AS.splitOnSuffix 10 . IFH.toChunks - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'toChunksSplitOnSuffix -inspect $ 'toChunksSplitOnSuffix `hasNoType` ''Step -#endif - --- XXX use a word splitting combinator instead of splitOn and test it. --- | Count the number of words in a file. -toChunksSplitOn :: Handle -> IO Int -toChunksSplitOn = S.length . AS.splitOn 32 . IFH.toChunks - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'toChunksSplitOn -inspect $ 'toChunksSplitOn `hasNoType` ''Step -#endif - --- | Sum the bytes in a file. -toChunksCountBytes :: Handle -> IO Word8 -toChunksCountBytes inh = do - let foldlArr' f z = runIdentity . S.foldl' f z . A.toStream - let s = IFH.toChunks inh - S.foldl' (\acc arr -> acc + foldlArr' (+) 0 arr) 0 s - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'toChunksCountBytes -inspect $ 'toChunksCountBytes `hasNoType` ''Step -#endif - -toChunksDecodeUtf8Arrays :: Handle -> IO () -toChunksDecodeUtf8Arrays = - S.drain . IUS.decodeUtf8Arrays . IFH.toChunks - -#ifdef INSPECTION -inspect $ hasNoTypeClasses 'toChunksDecodeUtf8Arrays --- inspect $ 'toChunksDecodeUtf8ArraysLenient `hasNoType` ''Step -#endif - -o_1_space_read_chunked :: BenchEnv -> [Benchmark] -o_1_space_read_chunked env = - -- read using toChunks instead of read - [ bgroup "reduce/toChunks" - [ mkBench "S.last" env $ \inH _ -> - toChunksLast inH - -- Note: this cannot be fairly compared with GNU wc -c or wc -m as - -- wc uses lseek to just determine the file size rather than reading - -- and counting characters. - , mkBench "S.sum . S.map A.length" env $ \inH _ -> - toChunksSumLengths inH - , mkBench "AS.splitOnSuffix" env $ \inH _ -> - toChunksSplitOnSuffix inH - , mkBench "AS.splitOn" env $ \inH _ -> - toChunksSplitOn inH - , mkBench "countBytes" env $ \inH _ -> - toChunksCountBytes inH - , mkBenchSmall "US.decodeUtf8Arrays" env $ \inH _ -> - toChunksDecodeUtf8Arrays inH - ] - ] - -- TBD reading with unfold ------------------------------------------------------------------------------- @@ -388,8 +295,7 @@ o_1_space_reduce_read_grouped env = allBenchmarks :: BenchEnv -> [Benchmark] allBenchmarks env = Prelude.concat - [ o_1_space_read_chunked env - , o_1_space_reduce_read env + [ o_1_space_reduce_read env , o_1_space_reduce_toBytes env , o_1_space_reduce_read_grouped env ] diff --git a/benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs b/benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs index a821c25d..783fc746 100644 --- a/benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs +++ b/benchmark/Streamly/Benchmark/FileSystem/Handle/ReadWrite.hs @@ -29,7 +29,6 @@ import Streamly.Internal.Data.Array.Foreign.Type (defaultChunkSize) import qualified Streamly.FileSystem.Handle as FH import qualified Streamly.Internal.Data.Unfold as IUF import qualified Streamly.Internal.FileSystem.Handle as IFH -import qualified Streamly.Internal.Data.Array.Stream.Foreign as AS import qualified Streamly.Data.Array.Foreign as A import qualified Streamly.Prelude as S @@ -37,7 +36,6 @@ import Gauge hiding (env) import Streamly.Benchmark.Common.Handle #ifdef INSPECTION -import Foreign.Storable (Storable) import Streamly.Internal.Data.Stream.StreamD.Type (Step(..)) import qualified Streamly.Internal.Data.Stream.StreamD.Type as D @@ -206,48 +204,6 @@ o_1_space_copy env = ] ] -------------------------------------------------------------------------------- --- copy with group/ungroup transformations -------------------------------------------------------------------------------- - --- | Lines and unlines -copyChunksSplitInterposeSuffix :: Handle -> Handle -> IO () -copyChunksSplitInterposeSuffix inh outh = - S.fold (IFH.write outh) - $ AS.interposeSuffix 10 - $ AS.splitOnSuffix 10 - $ IFH.toChunks inh - -#ifdef INSPECTION -inspect $ hasNoTypeClassesExcept 'copyChunksSplitInterposeSuffix [''Storable] -inspect $ 'copyChunksSplitInterposeSuffix `hasNoType` ''Step -#endif - --- | Words and unwords -copyChunksSplitInterpose :: Handle -> Handle -> IO () -copyChunksSplitInterpose inh outh = - S.fold (IFH.write outh) - $ AS.interpose 32 - -- XXX this is not correct word splitting combinator - $ AS.splitOn 32 - $ IFH.toChunks inh - -#ifdef INSPECTION -inspect $ hasNoTypeClassesExcept 'copyChunksSplitInterpose [''Storable] -inspect $ 'copyChunksSplitInterpose `hasNoType` ''Step -#endif - -o_1_space_copy_toChunks_group_ungroup :: BenchEnv -> [Benchmark] -o_1_space_copy_toChunks_group_ungroup env = - [ bgroup "copy/toChunks/group-ungroup" - [ mkBench "AS.interposeSuffix . AS.splitOnSuffix" env $ \inh outh -> - copyChunksSplitInterposeSuffix inh outh - , mkBenchSmall "AS.interpose . AS.splitOn" env $ \inh outh -> - copyChunksSplitInterpose inh outh - ] - ] - - ------------------------------------------------------------------------------- -- ------------------------------------------------------------------------------- @@ -258,5 +214,4 @@ allBenchmarks env = Prelude.concat , o_1_space_copy_read env , o_1_space_copy_fromBytes env , o_1_space_copy env - , o_1_space_copy_toChunks_group_ungroup env ] diff --git a/benchmark/streamly-benchmarks.cabal b/benchmark/streamly-benchmarks.cabal index d9c88164..9b0eaa4b 100644 --- a/benchmark/streamly-benchmarks.cabal +++ b/benchmark/streamly-benchmarks.cabal @@ -299,7 +299,6 @@ benchmark Data.Parser.ParserD if flag(limit-build-mem) ghc-options: +RTS -M750M -RTS - benchmark Data.Parser.ParserK import: bench-options type: exitcode-stdio-1.0 @@ -458,6 +457,21 @@ benchmark Data.Array.Foreign other-modules: Streamly.Benchmark.Data.ArrayOps cpp-options: -DMEMORY_ARRAY +------------------------------------------------------------------------------- +-- Array Stream Benchmarks +------------------------------------------------------------------------------- + +benchmark Data.Array.Stream.Foreign + import: bench-options + type: exitcode-stdio-1.0 + hs-source-dirs: Streamly/Benchmark/Data/Array/Stream + main-is: Foreign.hs + if impl(ghcjs) + buildable: False + else + buildable: True + build-depends: exceptions >= 0.8 && < 0.11 + ------------------------------------------------------------------------------- -- FileIO Benchmarks ------------------------------------------------------------------------------- diff --git a/hie.yaml b/hie.yaml index 2144d5b2..c4179b3e 100644 --- a/hie.yaml +++ b/hie.yaml @@ -4,6 +4,8 @@ cradle: config: cradle: cabal: + - path: "./benchmark/Streamly/Benchmark/Data/Array/Stream/Foreign.hs" + component: "bench:Data.Array.Stream.Foreign" - path: "./benchmark/Streamly/Benchmark/Data/Fold.hs" component: "bench:Data.Fold" - path: "./benchmark/Streamly/Benchmark/Data/Parser.hs" diff --git a/streamly.cabal b/streamly.cabal index a1956d8d..4c4229ed 100644 --- a/streamly.cabal +++ b/streamly.cabal @@ -71,6 +71,7 @@ extra-source-files: benchmark/*.hs benchmark/README.md benchmark/Streamly/Benchmark/Data/*.hs + benchmark/Streamly/Benchmark/Data/Array/Stream/Foreign.hs benchmark/Streamly/Benchmark/Data/Parser/*.hs benchmark/Streamly/Benchmark/Data/Stream/*.hs benchmark/Streamly/Benchmark/FileSystem/*.hs