Add some word/unword, word counting benchmarks

This commit is contained in:
Harendra Kumar 2019-09-15 18:37:50 +05:30
parent 0b2f615930
commit 661f84262e
6 changed files with 119 additions and 26 deletions

View File

@ -136,7 +136,7 @@ matrix:
#- env: BUILD=stack RESOLVER=nightly #- env: BUILD=stack RESOLVER=nightly
# addons: {apt: {packages: [cabal-install-1.24], sources: [hvr-ghc]}} # addons: {apt: {packages: [cabal-install-1.24], sources: [hvr-ghc]}}
- env: BUILD=stack RESOLVER=lts-13 GHCVER=8.6 STACK_BUILD_OPTIONS="--flag streamly:examples-sdl --flag streamly:benchmark --flag streamly:inspection" - env: BUILD=stack RESOLVER=lts-13 GHCVER=8.6 STACK_BUILD_OPTIONS="--flag streamly:examples-sdl --flag streamly:benchmark"
addons: {apt: {packages: [cabal-install-2.4,libsdl1.2-dev], sources: [hvr-ghc]}} addons: {apt: {packages: [cabal-install-2.4,libsdl1.2-dev], sources: [hvr-ghc]}}
#- env: BUILD=stack RESOLVER=lts-12 GHCVER=8.4 #- env: BUILD=stack RESOLVER=lts-12 GHCVER=8.4

View File

@ -79,6 +79,9 @@ main = do
, mkBench "linecount" href $ do , mkBench "linecount" href $ do
Handles inh _ <- readIORef href Handles inh _ <- readIORef href
BFA.countLines inh BFA.countLines inh
, mkBench "wordcount" href $ do
Handles inh _ <- readIORef href
BFA.countWords inh
, mkBench "sum" href $ do , mkBench "sum" href $ do
Handles inh _ <- readIORef href Handles inh _ <- readIORef href
BFA.sumBytes inh BFA.sumBytes inh
@ -99,6 +102,9 @@ main = do
, mkBench "linecountU" href $ do , mkBench "linecountU" href $ do
Handles inh _ <- readIORef href Handles inh _ <- readIORef href
BFS.countLinesU inh BFS.countLinesU inh
, mkBench "wordcount" href $ do
Handles inh _ <- readIORef href
BFS.countWords inh
, mkBench "sum" href $ do , mkBench "sum" href $ do
Handles inh _ <- readIORef href Handles inh _ <- readIORef href
BFS.sumBytes inh BFS.sumBytes inh
@ -155,6 +161,12 @@ main = do
, mkBench "words-unwords" href $ do , mkBench "words-unwords" href $ do
Handles inh outh <- readIORef href Handles inh outh <- readIORef href
BFS.wordsUnwordsCopy inh outh BFS.wordsUnwordsCopy inh outh
, mkBench "words-unwords-word8" href $ do
Handles inh outh <- readIORef href
BFS.wordsUnwordsCopyWord8 inh outh
, mkBench "words-unwords-arrays" href $ do
Handles inh outh <- readIORef href
BFA.wordsUnwordsCopy inh outh
] ]
, bgroup "splitting" , bgroup "splitting"

View File

@ -23,10 +23,12 @@ module Streamly.Benchmark.FileIO.Array
last last
, countBytes , countBytes
, countLines , countLines
, countWords
, sumBytes , sumBytes
, cat , cat
, copy , copy
, linesUnlinesCopy , linesUnlinesCopy
, wordsUnwordsCopy
) )
where where
@ -83,6 +85,17 @@ inspect $ hasNoTypeClasses 'countLines
inspect $ 'countLines `hasNoType` ''Step inspect $ 'countLines `hasNoType` ''Step
#endif #endif
-- XXX use a word splitting combinator instead of splitOn and test it.
-- | Count the number of lines in a file.
{-# INLINE countWords #-}
countWords :: Handle -> IO Int
countWords = S.length . A.splitOn 32 . FH.readArrays
#ifdef INSPECTION
inspect $ hasNoTypeClasses 'countWords
inspect $ 'countWords `hasNoType` ''Step
#endif
-- | Sum the bytes in a file. -- | Sum the bytes in a file.
{-# INLINE sumBytes #-} {-# INLINE sumBytes #-}
sumBytes :: Handle -> IO Word8 sumBytes :: Handle -> IO Word8
@ -132,3 +145,18 @@ linesUnlinesCopy inh outh =
inspect $ hasNoTypeClassesExcept 'linesUnlinesCopy [''Storable] inspect $ hasNoTypeClassesExcept 'linesUnlinesCopy [''Storable]
-- inspect $ 'linesUnlinesCopy `hasNoType` ''Step -- inspect $ 'linesUnlinesCopy `hasNoType` ''Step
#endif #endif
-- | Words and unwords
{-# INLINE wordsUnwordsCopy #-}
wordsUnwordsCopy :: Handle -> Handle -> IO ()
wordsUnwordsCopy inh outh =
S.runFold (FH.writeArraysInChunksOf (1024*1024) outh)
$ S.intersperse (A.fromList [32])
-- XXX use a word splitting combinator
$ A.splitOn 32
$ FH.readArraysOf (1024*1024) inh
#ifdef INSPECTION
inspect $ hasNoTypeClassesExcept 'wordsUnwordsCopy [''Storable]
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''Step
#endif

View File

@ -25,11 +25,13 @@ module Streamly.Benchmark.FileIO.Stream
, countBytes , countBytes
, countLines , countLines
, countLinesU , countLinesU
, countWords
, sumBytes , sumBytes
, cat , cat
, catStreamWrite , catStreamWrite
, copy , copy
, linesUnlinesCopy , linesUnlinesCopy
, wordsUnwordsCopyWord8
, wordsUnwordsCopy , wordsUnwordsCopy
, copyCodecChar8 , copyCodecChar8
, copyCodecUtf8 , copyCodecUtf8
@ -105,6 +107,21 @@ inspect $ 'countLines `hasNoType` ''AT.FlattenState
inspect $ 'countLines `hasNoType` ''D.ConcatMapUState inspect $ 'countLines `hasNoType` ''D.ConcatMapUState
#endif #endif
-- | Count the number of words in a file.
{-# INLINE countWords #-}
countWords :: Handle -> IO Int
countWords =
S.length
. SS.foldWords FL.drain
. SS.decodeChar8
. FH.read
#ifdef INSPECTION
inspect $ hasNoTypeClasses 'countWords
-- inspect $ 'countWords `hasNoType` ''Step
-- inspect $ 'countWords `hasNoType` ''D.ConcatMapUState
#endif
-- | Count the number of lines in a file. -- | Count the number of lines in a file.
{-# INLINE countLinesU #-} {-# INLINE countLinesU #-}
countLinesU :: Handle -> IO Int countLinesU :: Handle -> IO Int
@ -253,30 +270,6 @@ inspect $ hasNoTypeClassesExcept 'linesUnlinesCopy [''Storable]
-- inspect $ 'linesUnlinesCopy `hasNoType` ''D.ConcatMapUState -- inspect $ 'linesUnlinesCopy `hasNoType` ''D.ConcatMapUState
#endif #endif
-- | Word, unwords and copy
{-# INLINE wordsUnwordsCopy #-}
wordsUnwordsCopy :: Handle -> Handle -> IO ()
wordsUnwordsCopy inh outh =
S.runFold (FH.write outh)
$ SS.encodeChar8
$ SS.unwords
$ SS.words
$ SS.decodeChar8
$ FH.read inh
#ifdef INSPECTION
-- inspect $ hasNoTypeClasses 'wordsUnwordsCopy
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''Step
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''AT.FlattenState
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''D.ConcatMapUState
#endif
lf :: Word8
lf = fromIntegral (ord '\n')
toarr :: String -> A.Array Word8
toarr = A.fromList . map (fromIntegral . ord)
foreign import ccall unsafe "u_iswspace" foreign import ccall unsafe "u_iswspace"
iswspace :: Int -> Int iswspace :: Int -> Int
@ -295,9 +288,65 @@ isSpace c
where where
uc = fromIntegral (ord c) :: Word uc = fromIntegral (ord c) :: Word
{-# INLINE isSp #-}
isSp :: Word8 -> Bool isSp :: Word8 -> Bool
isSp = isSpace . chr . fromIntegral isSp = isSpace . chr . fromIntegral
-- | Word, unwords and copy
{-# INLINE wordsUnwordsCopyWord8 #-}
wordsUnwordsCopyWord8 :: Handle -> Handle -> IO ()
wordsUnwordsCopyWord8 inh outh =
S.runFold (FH.write outh)
$ Internal.concatMapU Internal.fromList
$ S.intersperse [32]
$ S.wordsBy isSp FL.toList
$ FH.read inh
#ifdef INSPECTION
inspect $ hasNoTypeClasses 'wordsUnwordsCopyWord8
-- inspect $ 'wordsUnwordsCopyWord8 `hasNoType` ''Step
-- inspect $ 'wordsUnwordsCopyWord8 `hasNoType` ''D.ConcatMapUState
#endif
-- | Word, unwords and copy
{-# INLINE wordsUnwordsCopy #-}
wordsUnwordsCopy :: Handle -> Handle -> IO ()
wordsUnwordsCopy inh outh =
S.runFold (FH.write outh)
$ SS.encodeChar8
$ Internal.concatMapU Internal.fromList
$ S.intersperse " "
-- Array allocation is too expensive for such small strings. So just use
-- lists instead.
--
-- -- $ SS.unwords
-- -- $ SS.words
--
-- XXX This pipeline does not fuse with wordsBy but fuses with splitOn
-- with -funfolding-use-threshold=300. With wordsBy it does not fuse
-- even with high limits for inlining and spec-constr ghc options. With
-- -funfolding-use-threshold=400 it performs pretty well and there
-- is no evidence in the core that a join point involving Step
-- constructors is not getting inlined. Not being able to fuse at all in
-- this case could be an unknown issue, need more investigation.
$ S.wordsBy isSpace FL.toList
-- -- $ S.splitOn isSpace FL.toList
$ SS.decodeChar8
$ FH.read inh
#ifdef INSPECTION
-- inspect $ hasNoTypeClasses 'wordsUnwordsCopy
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''Step
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''AT.FlattenState
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''D.ConcatMapUState
#endif
lf :: Word8
lf = fromIntegral (ord '\n')
toarr :: String -> A.Array Word8
toarr = A.fromList . map (fromIntegral . ord)
-- | Split on line feed. -- | Split on line feed.
{-# INLINE splitOn #-} {-# INLINE splitOn #-}
splitOn :: Handle -> IO Int splitOn :: Handle -> IO Int

View File

@ -89,6 +89,7 @@ module Streamly.Internal
-- * Streamly.Unfold Experimental Exports -- * Streamly.Unfold Experimental Exports
, Unfold (..) , Unfold (..)
, fromList
-- * Streamly.Memory.Array -- * Streamly.Memory.Array
, readU , readU
@ -102,7 +103,8 @@ import Streamly.Streams.Combinators (inspectMode)
import Streamly.Streams.Parallel (tapAsync, parallelFst, parallelMin) import Streamly.Streams.Parallel (tapAsync, parallelFst, parallelMin)
import Streamly.Streams.Serial (wSerialFst, wSerialMin) import Streamly.Streams.Serial (wSerialFst, wSerialMin)
import Streamly.Unfold.Types (Unfold(..)) import Streamly.Unfold.Types (Unfold(..))
import Streamly.Unfold (fromList)
import Streamly.Fold.Internal import Streamly.Fold.Internal
import Streamly.Fold.Types import Streamly.Fold.Types
import Streamly.Prelude.Internal import Streamly.Prelude.Internal hiding (fromList)

View File

@ -598,6 +598,8 @@ benchmark array
benchmark fileio benchmark fileio
import: bench-options import: bench-options
type: exitcode-stdio-1.0 type: exitcode-stdio-1.0
-- A value of 400 works better for some benchmarks, however, it takes
-- extraordinary amount of time to compile with that.
ghc-options: -funfolding-use-threshold=150 ghc-options: -funfolding-use-threshold=150
hs-source-dirs: benchmark hs-source-dirs: benchmark
main-is: FileIO.hs main-is: FileIO.hs