mirror of
https://github.com/ilyakooo0/streamly.git
synced 2024-10-06 13:17:10 +03:00
Add some word/unword, word counting benchmarks
This commit is contained in:
parent
0b2f615930
commit
661f84262e
@ -136,7 +136,7 @@ matrix:
|
||||
#- env: BUILD=stack RESOLVER=nightly
|
||||
# addons: {apt: {packages: [cabal-install-1.24], sources: [hvr-ghc]}}
|
||||
|
||||
- env: BUILD=stack RESOLVER=lts-13 GHCVER=8.6 STACK_BUILD_OPTIONS="--flag streamly:examples-sdl --flag streamly:benchmark --flag streamly:inspection"
|
||||
- env: BUILD=stack RESOLVER=lts-13 GHCVER=8.6 STACK_BUILD_OPTIONS="--flag streamly:examples-sdl --flag streamly:benchmark"
|
||||
addons: {apt: {packages: [cabal-install-2.4,libsdl1.2-dev], sources: [hvr-ghc]}}
|
||||
|
||||
#- env: BUILD=stack RESOLVER=lts-12 GHCVER=8.4
|
||||
|
@ -79,6 +79,9 @@ main = do
|
||||
, mkBench "linecount" href $ do
|
||||
Handles inh _ <- readIORef href
|
||||
BFA.countLines inh
|
||||
, mkBench "wordcount" href $ do
|
||||
Handles inh _ <- readIORef href
|
||||
BFA.countWords inh
|
||||
, mkBench "sum" href $ do
|
||||
Handles inh _ <- readIORef href
|
||||
BFA.sumBytes inh
|
||||
@ -99,6 +102,9 @@ main = do
|
||||
, mkBench "linecountU" href $ do
|
||||
Handles inh _ <- readIORef href
|
||||
BFS.countLinesU inh
|
||||
, mkBench "wordcount" href $ do
|
||||
Handles inh _ <- readIORef href
|
||||
BFS.countWords inh
|
||||
, mkBench "sum" href $ do
|
||||
Handles inh _ <- readIORef href
|
||||
BFS.sumBytes inh
|
||||
@ -155,6 +161,12 @@ main = do
|
||||
, mkBench "words-unwords" href $ do
|
||||
Handles inh outh <- readIORef href
|
||||
BFS.wordsUnwordsCopy inh outh
|
||||
, mkBench "words-unwords-word8" href $ do
|
||||
Handles inh outh <- readIORef href
|
||||
BFS.wordsUnwordsCopyWord8 inh outh
|
||||
, mkBench "words-unwords-arrays" href $ do
|
||||
Handles inh outh <- readIORef href
|
||||
BFA.wordsUnwordsCopy inh outh
|
||||
]
|
||||
|
||||
, bgroup "splitting"
|
||||
|
@ -23,10 +23,12 @@ module Streamly.Benchmark.FileIO.Array
|
||||
last
|
||||
, countBytes
|
||||
, countLines
|
||||
, countWords
|
||||
, sumBytes
|
||||
, cat
|
||||
, copy
|
||||
, linesUnlinesCopy
|
||||
, wordsUnwordsCopy
|
||||
)
|
||||
where
|
||||
|
||||
@ -83,6 +85,17 @@ inspect $ hasNoTypeClasses 'countLines
|
||||
inspect $ 'countLines `hasNoType` ''Step
|
||||
#endif
|
||||
|
||||
-- XXX use a word splitting combinator instead of splitOn and test it.
|
||||
-- | Count the number of lines in a file.
|
||||
{-# INLINE countWords #-}
|
||||
countWords :: Handle -> IO Int
|
||||
countWords = S.length . A.splitOn 32 . FH.readArrays
|
||||
|
||||
#ifdef INSPECTION
|
||||
inspect $ hasNoTypeClasses 'countWords
|
||||
inspect $ 'countWords `hasNoType` ''Step
|
||||
#endif
|
||||
|
||||
-- | Sum the bytes in a file.
|
||||
{-# INLINE sumBytes #-}
|
||||
sumBytes :: Handle -> IO Word8
|
||||
@ -132,3 +145,18 @@ linesUnlinesCopy inh outh =
|
||||
inspect $ hasNoTypeClassesExcept 'linesUnlinesCopy [''Storable]
|
||||
-- inspect $ 'linesUnlinesCopy `hasNoType` ''Step
|
||||
#endif
|
||||
|
||||
-- | Words and unwords
|
||||
{-# INLINE wordsUnwordsCopy #-}
|
||||
wordsUnwordsCopy :: Handle -> Handle -> IO ()
|
||||
wordsUnwordsCopy inh outh =
|
||||
S.runFold (FH.writeArraysInChunksOf (1024*1024) outh)
|
||||
$ S.intersperse (A.fromList [32])
|
||||
-- XXX use a word splitting combinator
|
||||
$ A.splitOn 32
|
||||
$ FH.readArraysOf (1024*1024) inh
|
||||
|
||||
#ifdef INSPECTION
|
||||
inspect $ hasNoTypeClassesExcept 'wordsUnwordsCopy [''Storable]
|
||||
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''Step
|
||||
#endif
|
||||
|
@ -25,11 +25,13 @@ module Streamly.Benchmark.FileIO.Stream
|
||||
, countBytes
|
||||
, countLines
|
||||
, countLinesU
|
||||
, countWords
|
||||
, sumBytes
|
||||
, cat
|
||||
, catStreamWrite
|
||||
, copy
|
||||
, linesUnlinesCopy
|
||||
, wordsUnwordsCopyWord8
|
||||
, wordsUnwordsCopy
|
||||
, copyCodecChar8
|
||||
, copyCodecUtf8
|
||||
@ -105,6 +107,21 @@ inspect $ 'countLines `hasNoType` ''AT.FlattenState
|
||||
inspect $ 'countLines `hasNoType` ''D.ConcatMapUState
|
||||
#endif
|
||||
|
||||
-- | Count the number of words in a file.
|
||||
{-# INLINE countWords #-}
|
||||
countWords :: Handle -> IO Int
|
||||
countWords =
|
||||
S.length
|
||||
. SS.foldWords FL.drain
|
||||
. SS.decodeChar8
|
||||
. FH.read
|
||||
|
||||
#ifdef INSPECTION
|
||||
inspect $ hasNoTypeClasses 'countWords
|
||||
-- inspect $ 'countWords `hasNoType` ''Step
|
||||
-- inspect $ 'countWords `hasNoType` ''D.ConcatMapUState
|
||||
#endif
|
||||
|
||||
-- | Count the number of lines in a file.
|
||||
{-# INLINE countLinesU #-}
|
||||
countLinesU :: Handle -> IO Int
|
||||
@ -253,30 +270,6 @@ inspect $ hasNoTypeClassesExcept 'linesUnlinesCopy [''Storable]
|
||||
-- inspect $ 'linesUnlinesCopy `hasNoType` ''D.ConcatMapUState
|
||||
#endif
|
||||
|
||||
-- | Word, unwords and copy
|
||||
{-# INLINE wordsUnwordsCopy #-}
|
||||
wordsUnwordsCopy :: Handle -> Handle -> IO ()
|
||||
wordsUnwordsCopy inh outh =
|
||||
S.runFold (FH.write outh)
|
||||
$ SS.encodeChar8
|
||||
$ SS.unwords
|
||||
$ SS.words
|
||||
$ SS.decodeChar8
|
||||
$ FH.read inh
|
||||
|
||||
#ifdef INSPECTION
|
||||
-- inspect $ hasNoTypeClasses 'wordsUnwordsCopy
|
||||
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''Step
|
||||
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''AT.FlattenState
|
||||
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''D.ConcatMapUState
|
||||
#endif
|
||||
|
||||
lf :: Word8
|
||||
lf = fromIntegral (ord '\n')
|
||||
|
||||
toarr :: String -> A.Array Word8
|
||||
toarr = A.fromList . map (fromIntegral . ord)
|
||||
|
||||
foreign import ccall unsafe "u_iswspace"
|
||||
iswspace :: Int -> Int
|
||||
|
||||
@ -295,9 +288,65 @@ isSpace c
|
||||
where
|
||||
uc = fromIntegral (ord c) :: Word
|
||||
|
||||
{-# INLINE isSp #-}
|
||||
isSp :: Word8 -> Bool
|
||||
isSp = isSpace . chr . fromIntegral
|
||||
|
||||
-- | Word, unwords and copy
|
||||
{-# INLINE wordsUnwordsCopyWord8 #-}
|
||||
wordsUnwordsCopyWord8 :: Handle -> Handle -> IO ()
|
||||
wordsUnwordsCopyWord8 inh outh =
|
||||
S.runFold (FH.write outh)
|
||||
$ Internal.concatMapU Internal.fromList
|
||||
$ S.intersperse [32]
|
||||
$ S.wordsBy isSp FL.toList
|
||||
$ FH.read inh
|
||||
|
||||
#ifdef INSPECTION
|
||||
inspect $ hasNoTypeClasses 'wordsUnwordsCopyWord8
|
||||
-- inspect $ 'wordsUnwordsCopyWord8 `hasNoType` ''Step
|
||||
-- inspect $ 'wordsUnwordsCopyWord8 `hasNoType` ''D.ConcatMapUState
|
||||
#endif
|
||||
|
||||
-- | Word, unwords and copy
|
||||
{-# INLINE wordsUnwordsCopy #-}
|
||||
wordsUnwordsCopy :: Handle -> Handle -> IO ()
|
||||
wordsUnwordsCopy inh outh =
|
||||
S.runFold (FH.write outh)
|
||||
$ SS.encodeChar8
|
||||
$ Internal.concatMapU Internal.fromList
|
||||
$ S.intersperse " "
|
||||
-- Array allocation is too expensive for such small strings. So just use
|
||||
-- lists instead.
|
||||
--
|
||||
-- -- $ SS.unwords
|
||||
-- -- $ SS.words
|
||||
--
|
||||
-- XXX This pipeline does not fuse with wordsBy but fuses with splitOn
|
||||
-- with -funfolding-use-threshold=300. With wordsBy it does not fuse
|
||||
-- even with high limits for inlining and spec-constr ghc options. With
|
||||
-- -funfolding-use-threshold=400 it performs pretty well and there
|
||||
-- is no evidence in the core that a join point involving Step
|
||||
-- constructors is not getting inlined. Not being able to fuse at all in
|
||||
-- this case could be an unknown issue, need more investigation.
|
||||
$ S.wordsBy isSpace FL.toList
|
||||
-- -- $ S.splitOn isSpace FL.toList
|
||||
$ SS.decodeChar8
|
||||
$ FH.read inh
|
||||
|
||||
#ifdef INSPECTION
|
||||
-- inspect $ hasNoTypeClasses 'wordsUnwordsCopy
|
||||
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''Step
|
||||
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''AT.FlattenState
|
||||
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''D.ConcatMapUState
|
||||
#endif
|
||||
|
||||
lf :: Word8
|
||||
lf = fromIntegral (ord '\n')
|
||||
|
||||
toarr :: String -> A.Array Word8
|
||||
toarr = A.fromList . map (fromIntegral . ord)
|
||||
|
||||
-- | Split on line feed.
|
||||
{-# INLINE splitOn #-}
|
||||
splitOn :: Handle -> IO Int
|
||||
|
@ -89,6 +89,7 @@ module Streamly.Internal
|
||||
|
||||
-- * Streamly.Unfold Experimental Exports
|
||||
, Unfold (..)
|
||||
, fromList
|
||||
|
||||
-- * Streamly.Memory.Array
|
||||
, readU
|
||||
@ -102,7 +103,8 @@ import Streamly.Streams.Combinators (inspectMode)
|
||||
import Streamly.Streams.Parallel (tapAsync, parallelFst, parallelMin)
|
||||
import Streamly.Streams.Serial (wSerialFst, wSerialMin)
|
||||
import Streamly.Unfold.Types (Unfold(..))
|
||||
import Streamly.Unfold (fromList)
|
||||
|
||||
import Streamly.Fold.Internal
|
||||
import Streamly.Fold.Types
|
||||
import Streamly.Prelude.Internal
|
||||
import Streamly.Prelude.Internal hiding (fromList)
|
||||
|
@ -598,6 +598,8 @@ benchmark array
|
||||
benchmark fileio
|
||||
import: bench-options
|
||||
type: exitcode-stdio-1.0
|
||||
-- A value of 400 works better for some benchmarks, however, it takes
|
||||
-- extraordinary amount of time to compile with that.
|
||||
ghc-options: -funfolding-use-threshold=150
|
||||
hs-source-dirs: benchmark
|
||||
main-is: FileIO.hs
|
||||
|
Loading…
Reference in New Issue
Block a user