Add some word/unword, word counting benchmarks

This commit is contained in:
Harendra Kumar 2019-09-15 18:37:50 +05:30
parent 0b2f615930
commit 661f84262e
6 changed files with 119 additions and 26 deletions

View File

@ -136,7 +136,7 @@ matrix:
#- env: BUILD=stack RESOLVER=nightly
# addons: {apt: {packages: [cabal-install-1.24], sources: [hvr-ghc]}}
- env: BUILD=stack RESOLVER=lts-13 GHCVER=8.6 STACK_BUILD_OPTIONS="--flag streamly:examples-sdl --flag streamly:benchmark --flag streamly:inspection"
- env: BUILD=stack RESOLVER=lts-13 GHCVER=8.6 STACK_BUILD_OPTIONS="--flag streamly:examples-sdl --flag streamly:benchmark"
addons: {apt: {packages: [cabal-install-2.4,libsdl1.2-dev], sources: [hvr-ghc]}}
#- env: BUILD=stack RESOLVER=lts-12 GHCVER=8.4

View File

@ -79,6 +79,9 @@ main = do
, mkBench "linecount" href $ do
Handles inh _ <- readIORef href
BFA.countLines inh
, mkBench "wordcount" href $ do
Handles inh _ <- readIORef href
BFA.countWords inh
, mkBench "sum" href $ do
Handles inh _ <- readIORef href
BFA.sumBytes inh
@ -99,6 +102,9 @@ main = do
, mkBench "linecountU" href $ do
Handles inh _ <- readIORef href
BFS.countLinesU inh
, mkBench "wordcount" href $ do
Handles inh _ <- readIORef href
BFS.countWords inh
, mkBench "sum" href $ do
Handles inh _ <- readIORef href
BFS.sumBytes inh
@ -155,6 +161,12 @@ main = do
, mkBench "words-unwords" href $ do
Handles inh outh <- readIORef href
BFS.wordsUnwordsCopy inh outh
, mkBench "words-unwords-word8" href $ do
Handles inh outh <- readIORef href
BFS.wordsUnwordsCopyWord8 inh outh
, mkBench "words-unwords-arrays" href $ do
Handles inh outh <- readIORef href
BFA.wordsUnwordsCopy inh outh
]
, bgroup "splitting"

View File

@ -23,10 +23,12 @@ module Streamly.Benchmark.FileIO.Array
last
, countBytes
, countLines
, countWords
, sumBytes
, cat
, copy
, linesUnlinesCopy
, wordsUnwordsCopy
)
where
@ -83,6 +85,17 @@ inspect $ hasNoTypeClasses 'countLines
inspect $ 'countLines `hasNoType` ''Step
#endif
-- XXX use a word splitting combinator instead of splitOn and test it.
-- | Count the number of lines in a file.
{-# INLINE countWords #-}
countWords :: Handle -> IO Int
countWords = S.length . A.splitOn 32 . FH.readArrays
#ifdef INSPECTION
inspect $ hasNoTypeClasses 'countWords
inspect $ 'countWords `hasNoType` ''Step
#endif
-- | Sum the bytes in a file.
{-# INLINE sumBytes #-}
sumBytes :: Handle -> IO Word8
@ -132,3 +145,18 @@ linesUnlinesCopy inh outh =
inspect $ hasNoTypeClassesExcept 'linesUnlinesCopy [''Storable]
-- inspect $ 'linesUnlinesCopy `hasNoType` ''Step
#endif
-- | Words and unwords
{-# INLINE wordsUnwordsCopy #-}
wordsUnwordsCopy :: Handle -> Handle -> IO ()
wordsUnwordsCopy inh outh =
S.runFold (FH.writeArraysInChunksOf (1024*1024) outh)
$ S.intersperse (A.fromList [32])
-- XXX use a word splitting combinator
$ A.splitOn 32
$ FH.readArraysOf (1024*1024) inh
#ifdef INSPECTION
inspect $ hasNoTypeClassesExcept 'wordsUnwordsCopy [''Storable]
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''Step
#endif

View File

@ -25,11 +25,13 @@ module Streamly.Benchmark.FileIO.Stream
, countBytes
, countLines
, countLinesU
, countWords
, sumBytes
, cat
, catStreamWrite
, copy
, linesUnlinesCopy
, wordsUnwordsCopyWord8
, wordsUnwordsCopy
, copyCodecChar8
, copyCodecUtf8
@ -105,6 +107,21 @@ inspect $ 'countLines `hasNoType` ''AT.FlattenState
inspect $ 'countLines `hasNoType` ''D.ConcatMapUState
#endif
-- | Count the number of words in a file.
{-# INLINE countWords #-}
countWords :: Handle -> IO Int
countWords =
S.length
. SS.foldWords FL.drain
. SS.decodeChar8
. FH.read
#ifdef INSPECTION
inspect $ hasNoTypeClasses 'countWords
-- inspect $ 'countWords `hasNoType` ''Step
-- inspect $ 'countWords `hasNoType` ''D.ConcatMapUState
#endif
-- | Count the number of lines in a file.
{-# INLINE countLinesU #-}
countLinesU :: Handle -> IO Int
@ -253,30 +270,6 @@ inspect $ hasNoTypeClassesExcept 'linesUnlinesCopy [''Storable]
-- inspect $ 'linesUnlinesCopy `hasNoType` ''D.ConcatMapUState
#endif
-- | Word, unwords and copy
{-# INLINE wordsUnwordsCopy #-}
wordsUnwordsCopy :: Handle -> Handle -> IO ()
wordsUnwordsCopy inh outh =
S.runFold (FH.write outh)
$ SS.encodeChar8
$ SS.unwords
$ SS.words
$ SS.decodeChar8
$ FH.read inh
#ifdef INSPECTION
-- inspect $ hasNoTypeClasses 'wordsUnwordsCopy
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''Step
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''AT.FlattenState
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''D.ConcatMapUState
#endif
lf :: Word8
lf = fromIntegral (ord '\n')
toarr :: String -> A.Array Word8
toarr = A.fromList . map (fromIntegral . ord)
foreign import ccall unsafe "u_iswspace"
iswspace :: Int -> Int
@ -295,9 +288,65 @@ isSpace c
where
uc = fromIntegral (ord c) :: Word
{-# INLINE isSp #-}
isSp :: Word8 -> Bool
isSp = isSpace . chr . fromIntegral
-- | Word, unwords and copy
{-# INLINE wordsUnwordsCopyWord8 #-}
wordsUnwordsCopyWord8 :: Handle -> Handle -> IO ()
wordsUnwordsCopyWord8 inh outh =
S.runFold (FH.write outh)
$ Internal.concatMapU Internal.fromList
$ S.intersperse [32]
$ S.wordsBy isSp FL.toList
$ FH.read inh
#ifdef INSPECTION
inspect $ hasNoTypeClasses 'wordsUnwordsCopyWord8
-- inspect $ 'wordsUnwordsCopyWord8 `hasNoType` ''Step
-- inspect $ 'wordsUnwordsCopyWord8 `hasNoType` ''D.ConcatMapUState
#endif
-- | Word, unwords and copy
{-# INLINE wordsUnwordsCopy #-}
wordsUnwordsCopy :: Handle -> Handle -> IO ()
wordsUnwordsCopy inh outh =
S.runFold (FH.write outh)
$ SS.encodeChar8
$ Internal.concatMapU Internal.fromList
$ S.intersperse " "
-- Array allocation is too expensive for such small strings. So just use
-- lists instead.
--
-- -- $ SS.unwords
-- -- $ SS.words
--
-- XXX This pipeline does not fuse with wordsBy but fuses with splitOn
-- with -funfolding-use-threshold=300. With wordsBy it does not fuse
-- even with high limits for inlining and spec-constr ghc options. With
-- -funfolding-use-threshold=400 it performs pretty well and there
-- is no evidence in the core that a join point involving Step
-- constructors is not getting inlined. Not being able to fuse at all in
-- this case could be an unknown issue, need more investigation.
$ S.wordsBy isSpace FL.toList
-- -- $ S.splitOn isSpace FL.toList
$ SS.decodeChar8
$ FH.read inh
#ifdef INSPECTION
-- inspect $ hasNoTypeClasses 'wordsUnwordsCopy
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''Step
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''AT.FlattenState
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''D.ConcatMapUState
#endif
lf :: Word8
lf = fromIntegral (ord '\n')
toarr :: String -> A.Array Word8
toarr = A.fromList . map (fromIntegral . ord)
-- | Split on line feed.
{-# INLINE splitOn #-}
splitOn :: Handle -> IO Int

View File

@ -89,6 +89,7 @@ module Streamly.Internal
-- * Streamly.Unfold Experimental Exports
, Unfold (..)
, fromList
-- * Streamly.Memory.Array
, readU
@ -102,7 +103,8 @@ import Streamly.Streams.Combinators (inspectMode)
import Streamly.Streams.Parallel (tapAsync, parallelFst, parallelMin)
import Streamly.Streams.Serial (wSerialFst, wSerialMin)
import Streamly.Unfold.Types (Unfold(..))
import Streamly.Unfold (fromList)
import Streamly.Fold.Internal
import Streamly.Fold.Types
import Streamly.Prelude.Internal
import Streamly.Prelude.Internal hiding (fromList)

View File

@ -598,6 +598,8 @@ benchmark array
benchmark fileio
import: bench-options
type: exitcode-stdio-1.0
-- A value of 400 works better for some benchmarks, however, it takes
-- extraordinary amount of time to compile with that.
ghc-options: -funfolding-use-threshold=150
hs-source-dirs: benchmark
main-is: FileIO.hs