mirror of
https://github.com/ilyakooo0/streamly.git
synced 2024-10-06 13:17:10 +03:00
Add some word/unword, word counting benchmarks
This commit is contained in:
parent
0b2f615930
commit
661f84262e
@ -136,7 +136,7 @@ matrix:
|
|||||||
#- env: BUILD=stack RESOLVER=nightly
|
#- env: BUILD=stack RESOLVER=nightly
|
||||||
# addons: {apt: {packages: [cabal-install-1.24], sources: [hvr-ghc]}}
|
# addons: {apt: {packages: [cabal-install-1.24], sources: [hvr-ghc]}}
|
||||||
|
|
||||||
- env: BUILD=stack RESOLVER=lts-13 GHCVER=8.6 STACK_BUILD_OPTIONS="--flag streamly:examples-sdl --flag streamly:benchmark --flag streamly:inspection"
|
- env: BUILD=stack RESOLVER=lts-13 GHCVER=8.6 STACK_BUILD_OPTIONS="--flag streamly:examples-sdl --flag streamly:benchmark"
|
||||||
addons: {apt: {packages: [cabal-install-2.4,libsdl1.2-dev], sources: [hvr-ghc]}}
|
addons: {apt: {packages: [cabal-install-2.4,libsdl1.2-dev], sources: [hvr-ghc]}}
|
||||||
|
|
||||||
#- env: BUILD=stack RESOLVER=lts-12 GHCVER=8.4
|
#- env: BUILD=stack RESOLVER=lts-12 GHCVER=8.4
|
||||||
|
@ -79,6 +79,9 @@ main = do
|
|||||||
, mkBench "linecount" href $ do
|
, mkBench "linecount" href $ do
|
||||||
Handles inh _ <- readIORef href
|
Handles inh _ <- readIORef href
|
||||||
BFA.countLines inh
|
BFA.countLines inh
|
||||||
|
, mkBench "wordcount" href $ do
|
||||||
|
Handles inh _ <- readIORef href
|
||||||
|
BFA.countWords inh
|
||||||
, mkBench "sum" href $ do
|
, mkBench "sum" href $ do
|
||||||
Handles inh _ <- readIORef href
|
Handles inh _ <- readIORef href
|
||||||
BFA.sumBytes inh
|
BFA.sumBytes inh
|
||||||
@ -99,6 +102,9 @@ main = do
|
|||||||
, mkBench "linecountU" href $ do
|
, mkBench "linecountU" href $ do
|
||||||
Handles inh _ <- readIORef href
|
Handles inh _ <- readIORef href
|
||||||
BFS.countLinesU inh
|
BFS.countLinesU inh
|
||||||
|
, mkBench "wordcount" href $ do
|
||||||
|
Handles inh _ <- readIORef href
|
||||||
|
BFS.countWords inh
|
||||||
, mkBench "sum" href $ do
|
, mkBench "sum" href $ do
|
||||||
Handles inh _ <- readIORef href
|
Handles inh _ <- readIORef href
|
||||||
BFS.sumBytes inh
|
BFS.sumBytes inh
|
||||||
@ -155,6 +161,12 @@ main = do
|
|||||||
, mkBench "words-unwords" href $ do
|
, mkBench "words-unwords" href $ do
|
||||||
Handles inh outh <- readIORef href
|
Handles inh outh <- readIORef href
|
||||||
BFS.wordsUnwordsCopy inh outh
|
BFS.wordsUnwordsCopy inh outh
|
||||||
|
, mkBench "words-unwords-word8" href $ do
|
||||||
|
Handles inh outh <- readIORef href
|
||||||
|
BFS.wordsUnwordsCopyWord8 inh outh
|
||||||
|
, mkBench "words-unwords-arrays" href $ do
|
||||||
|
Handles inh outh <- readIORef href
|
||||||
|
BFA.wordsUnwordsCopy inh outh
|
||||||
]
|
]
|
||||||
|
|
||||||
, bgroup "splitting"
|
, bgroup "splitting"
|
||||||
|
@ -23,10 +23,12 @@ module Streamly.Benchmark.FileIO.Array
|
|||||||
last
|
last
|
||||||
, countBytes
|
, countBytes
|
||||||
, countLines
|
, countLines
|
||||||
|
, countWords
|
||||||
, sumBytes
|
, sumBytes
|
||||||
, cat
|
, cat
|
||||||
, copy
|
, copy
|
||||||
, linesUnlinesCopy
|
, linesUnlinesCopy
|
||||||
|
, wordsUnwordsCopy
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
|
|
||||||
@ -83,6 +85,17 @@ inspect $ hasNoTypeClasses 'countLines
|
|||||||
inspect $ 'countLines `hasNoType` ''Step
|
inspect $ 'countLines `hasNoType` ''Step
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
-- XXX use a word splitting combinator instead of splitOn and test it.
|
||||||
|
-- | Count the number of lines in a file.
|
||||||
|
{-# INLINE countWords #-}
|
||||||
|
countWords :: Handle -> IO Int
|
||||||
|
countWords = S.length . A.splitOn 32 . FH.readArrays
|
||||||
|
|
||||||
|
#ifdef INSPECTION
|
||||||
|
inspect $ hasNoTypeClasses 'countWords
|
||||||
|
inspect $ 'countWords `hasNoType` ''Step
|
||||||
|
#endif
|
||||||
|
|
||||||
-- | Sum the bytes in a file.
|
-- | Sum the bytes in a file.
|
||||||
{-# INLINE sumBytes #-}
|
{-# INLINE sumBytes #-}
|
||||||
sumBytes :: Handle -> IO Word8
|
sumBytes :: Handle -> IO Word8
|
||||||
@ -132,3 +145,18 @@ linesUnlinesCopy inh outh =
|
|||||||
inspect $ hasNoTypeClassesExcept 'linesUnlinesCopy [''Storable]
|
inspect $ hasNoTypeClassesExcept 'linesUnlinesCopy [''Storable]
|
||||||
-- inspect $ 'linesUnlinesCopy `hasNoType` ''Step
|
-- inspect $ 'linesUnlinesCopy `hasNoType` ''Step
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
-- | Words and unwords
|
||||||
|
{-# INLINE wordsUnwordsCopy #-}
|
||||||
|
wordsUnwordsCopy :: Handle -> Handle -> IO ()
|
||||||
|
wordsUnwordsCopy inh outh =
|
||||||
|
S.runFold (FH.writeArraysInChunksOf (1024*1024) outh)
|
||||||
|
$ S.intersperse (A.fromList [32])
|
||||||
|
-- XXX use a word splitting combinator
|
||||||
|
$ A.splitOn 32
|
||||||
|
$ FH.readArraysOf (1024*1024) inh
|
||||||
|
|
||||||
|
#ifdef INSPECTION
|
||||||
|
inspect $ hasNoTypeClassesExcept 'wordsUnwordsCopy [''Storable]
|
||||||
|
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''Step
|
||||||
|
#endif
|
||||||
|
@ -25,11 +25,13 @@ module Streamly.Benchmark.FileIO.Stream
|
|||||||
, countBytes
|
, countBytes
|
||||||
, countLines
|
, countLines
|
||||||
, countLinesU
|
, countLinesU
|
||||||
|
, countWords
|
||||||
, sumBytes
|
, sumBytes
|
||||||
, cat
|
, cat
|
||||||
, catStreamWrite
|
, catStreamWrite
|
||||||
, copy
|
, copy
|
||||||
, linesUnlinesCopy
|
, linesUnlinesCopy
|
||||||
|
, wordsUnwordsCopyWord8
|
||||||
, wordsUnwordsCopy
|
, wordsUnwordsCopy
|
||||||
, copyCodecChar8
|
, copyCodecChar8
|
||||||
, copyCodecUtf8
|
, copyCodecUtf8
|
||||||
@ -105,6 +107,21 @@ inspect $ 'countLines `hasNoType` ''AT.FlattenState
|
|||||||
inspect $ 'countLines `hasNoType` ''D.ConcatMapUState
|
inspect $ 'countLines `hasNoType` ''D.ConcatMapUState
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
-- | Count the number of words in a file.
|
||||||
|
{-# INLINE countWords #-}
|
||||||
|
countWords :: Handle -> IO Int
|
||||||
|
countWords =
|
||||||
|
S.length
|
||||||
|
. SS.foldWords FL.drain
|
||||||
|
. SS.decodeChar8
|
||||||
|
. FH.read
|
||||||
|
|
||||||
|
#ifdef INSPECTION
|
||||||
|
inspect $ hasNoTypeClasses 'countWords
|
||||||
|
-- inspect $ 'countWords `hasNoType` ''Step
|
||||||
|
-- inspect $ 'countWords `hasNoType` ''D.ConcatMapUState
|
||||||
|
#endif
|
||||||
|
|
||||||
-- | Count the number of lines in a file.
|
-- | Count the number of lines in a file.
|
||||||
{-# INLINE countLinesU #-}
|
{-# INLINE countLinesU #-}
|
||||||
countLinesU :: Handle -> IO Int
|
countLinesU :: Handle -> IO Int
|
||||||
@ -253,30 +270,6 @@ inspect $ hasNoTypeClassesExcept 'linesUnlinesCopy [''Storable]
|
|||||||
-- inspect $ 'linesUnlinesCopy `hasNoType` ''D.ConcatMapUState
|
-- inspect $ 'linesUnlinesCopy `hasNoType` ''D.ConcatMapUState
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
-- | Word, unwords and copy
|
|
||||||
{-# INLINE wordsUnwordsCopy #-}
|
|
||||||
wordsUnwordsCopy :: Handle -> Handle -> IO ()
|
|
||||||
wordsUnwordsCopy inh outh =
|
|
||||||
S.runFold (FH.write outh)
|
|
||||||
$ SS.encodeChar8
|
|
||||||
$ SS.unwords
|
|
||||||
$ SS.words
|
|
||||||
$ SS.decodeChar8
|
|
||||||
$ FH.read inh
|
|
||||||
|
|
||||||
#ifdef INSPECTION
|
|
||||||
-- inspect $ hasNoTypeClasses 'wordsUnwordsCopy
|
|
||||||
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''Step
|
|
||||||
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''AT.FlattenState
|
|
||||||
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''D.ConcatMapUState
|
|
||||||
#endif
|
|
||||||
|
|
||||||
lf :: Word8
|
|
||||||
lf = fromIntegral (ord '\n')
|
|
||||||
|
|
||||||
toarr :: String -> A.Array Word8
|
|
||||||
toarr = A.fromList . map (fromIntegral . ord)
|
|
||||||
|
|
||||||
foreign import ccall unsafe "u_iswspace"
|
foreign import ccall unsafe "u_iswspace"
|
||||||
iswspace :: Int -> Int
|
iswspace :: Int -> Int
|
||||||
|
|
||||||
@ -295,9 +288,65 @@ isSpace c
|
|||||||
where
|
where
|
||||||
uc = fromIntegral (ord c) :: Word
|
uc = fromIntegral (ord c) :: Word
|
||||||
|
|
||||||
|
{-# INLINE isSp #-}
|
||||||
isSp :: Word8 -> Bool
|
isSp :: Word8 -> Bool
|
||||||
isSp = isSpace . chr . fromIntegral
|
isSp = isSpace . chr . fromIntegral
|
||||||
|
|
||||||
|
-- | Word, unwords and copy
|
||||||
|
{-# INLINE wordsUnwordsCopyWord8 #-}
|
||||||
|
wordsUnwordsCopyWord8 :: Handle -> Handle -> IO ()
|
||||||
|
wordsUnwordsCopyWord8 inh outh =
|
||||||
|
S.runFold (FH.write outh)
|
||||||
|
$ Internal.concatMapU Internal.fromList
|
||||||
|
$ S.intersperse [32]
|
||||||
|
$ S.wordsBy isSp FL.toList
|
||||||
|
$ FH.read inh
|
||||||
|
|
||||||
|
#ifdef INSPECTION
|
||||||
|
inspect $ hasNoTypeClasses 'wordsUnwordsCopyWord8
|
||||||
|
-- inspect $ 'wordsUnwordsCopyWord8 `hasNoType` ''Step
|
||||||
|
-- inspect $ 'wordsUnwordsCopyWord8 `hasNoType` ''D.ConcatMapUState
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-- | Word, unwords and copy
|
||||||
|
{-# INLINE wordsUnwordsCopy #-}
|
||||||
|
wordsUnwordsCopy :: Handle -> Handle -> IO ()
|
||||||
|
wordsUnwordsCopy inh outh =
|
||||||
|
S.runFold (FH.write outh)
|
||||||
|
$ SS.encodeChar8
|
||||||
|
$ Internal.concatMapU Internal.fromList
|
||||||
|
$ S.intersperse " "
|
||||||
|
-- Array allocation is too expensive for such small strings. So just use
|
||||||
|
-- lists instead.
|
||||||
|
--
|
||||||
|
-- -- $ SS.unwords
|
||||||
|
-- -- $ SS.words
|
||||||
|
--
|
||||||
|
-- XXX This pipeline does not fuse with wordsBy but fuses with splitOn
|
||||||
|
-- with -funfolding-use-threshold=300. With wordsBy it does not fuse
|
||||||
|
-- even with high limits for inlining and spec-constr ghc options. With
|
||||||
|
-- -funfolding-use-threshold=400 it performs pretty well and there
|
||||||
|
-- is no evidence in the core that a join point involving Step
|
||||||
|
-- constructors is not getting inlined. Not being able to fuse at all in
|
||||||
|
-- this case could be an unknown issue, need more investigation.
|
||||||
|
$ S.wordsBy isSpace FL.toList
|
||||||
|
-- -- $ S.splitOn isSpace FL.toList
|
||||||
|
$ SS.decodeChar8
|
||||||
|
$ FH.read inh
|
||||||
|
|
||||||
|
#ifdef INSPECTION
|
||||||
|
-- inspect $ hasNoTypeClasses 'wordsUnwordsCopy
|
||||||
|
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''Step
|
||||||
|
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''AT.FlattenState
|
||||||
|
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''D.ConcatMapUState
|
||||||
|
#endif
|
||||||
|
|
||||||
|
lf :: Word8
|
||||||
|
lf = fromIntegral (ord '\n')
|
||||||
|
|
||||||
|
toarr :: String -> A.Array Word8
|
||||||
|
toarr = A.fromList . map (fromIntegral . ord)
|
||||||
|
|
||||||
-- | Split on line feed.
|
-- | Split on line feed.
|
||||||
{-# INLINE splitOn #-}
|
{-# INLINE splitOn #-}
|
||||||
splitOn :: Handle -> IO Int
|
splitOn :: Handle -> IO Int
|
||||||
|
@ -89,6 +89,7 @@ module Streamly.Internal
|
|||||||
|
|
||||||
-- * Streamly.Unfold Experimental Exports
|
-- * Streamly.Unfold Experimental Exports
|
||||||
, Unfold (..)
|
, Unfold (..)
|
||||||
|
, fromList
|
||||||
|
|
||||||
-- * Streamly.Memory.Array
|
-- * Streamly.Memory.Array
|
||||||
, readU
|
, readU
|
||||||
@ -102,7 +103,8 @@ import Streamly.Streams.Combinators (inspectMode)
|
|||||||
import Streamly.Streams.Parallel (tapAsync, parallelFst, parallelMin)
|
import Streamly.Streams.Parallel (tapAsync, parallelFst, parallelMin)
|
||||||
import Streamly.Streams.Serial (wSerialFst, wSerialMin)
|
import Streamly.Streams.Serial (wSerialFst, wSerialMin)
|
||||||
import Streamly.Unfold.Types (Unfold(..))
|
import Streamly.Unfold.Types (Unfold(..))
|
||||||
|
import Streamly.Unfold (fromList)
|
||||||
|
|
||||||
import Streamly.Fold.Internal
|
import Streamly.Fold.Internal
|
||||||
import Streamly.Fold.Types
|
import Streamly.Fold.Types
|
||||||
import Streamly.Prelude.Internal
|
import Streamly.Prelude.Internal hiding (fromList)
|
||||||
|
@ -598,6 +598,8 @@ benchmark array
|
|||||||
benchmark fileio
|
benchmark fileio
|
||||||
import: bench-options
|
import: bench-options
|
||||||
type: exitcode-stdio-1.0
|
type: exitcode-stdio-1.0
|
||||||
|
-- A value of 400 works better for some benchmarks, however, it takes
|
||||||
|
-- extraordinary amount of time to compile with that.
|
||||||
ghc-options: -funfolding-use-threshold=150
|
ghc-options: -funfolding-use-threshold=150
|
||||||
hs-source-dirs: benchmark
|
hs-source-dirs: benchmark
|
||||||
main-is: FileIO.hs
|
main-is: FileIO.hs
|
||||||
|
Loading…
Reference in New Issue
Block a user