mirror of
https://github.com/composewell/streamly.git
synced 2024-10-27 20:18:55 +03:00
324 lines
11 KiB
Haskell
324 lines
11 KiB
Haskell
--
|
|
-- Module : Streamly.Unicode.Stream
|
|
-- Copyright : (c) 2019 Composewell Technologies
|
|
-- License : BSD-3-Clause
|
|
-- Maintainer : streamly@composewell.com
|
|
-- Stability : experimental
|
|
-- Portability : GHC
|
|
|
|
{-# LANGUAGE CPP #-}
|
|
{-# LANGUAGE ScopedTypeVariables #-}
|
|
|
|
#ifdef __HADDOCK_VERSION__
|
|
#undef INSPECTION
|
|
#endif
|
|
|
|
#ifdef INSPECTION
|
|
{-# LANGUAGE TemplateHaskell #-}
|
|
{-# OPTIONS_GHC -fplugin Test.Inspection.Plugin #-}
|
|
#endif
|
|
|
|
import Prelude hiding (last, length)
|
|
import System.IO (Handle)
|
|
|
|
import qualified Streamly.Data.Array.Foreign as Array
|
|
import qualified Streamly.Data.Fold as Fold
|
|
import qualified Streamly.FileSystem.Handle as Handle
|
|
import qualified Streamly.Internal.Data.Stream.IsStream as Stream
|
|
import qualified Streamly.Internal.Data.Unfold as Unfold
|
|
import qualified Streamly.Internal.FileSystem.Handle as Handle
|
|
import qualified Streamly.Internal.Unicode.Array.Char as UnicodeArr
|
|
import qualified Streamly.Internal.Unicode.Stream as Unicode
|
|
|
|
import Gauge hiding (env)
|
|
import Streamly.Benchmark.Common
|
|
import Streamly.Benchmark.Common.Handle
|
|
|
|
#ifdef INSPECTION
|
|
import Foreign.Storable (Storable)
|
|
import Streamly.Internal.Data.Stream.StreamD.Type (Step(..))
|
|
import qualified Streamly.Internal.Data.Fold.Type as Fold
|
|
import qualified Streamly.Internal.Data.Tuple.Strict as Strict
|
|
import qualified Streamly.Internal.Data.Array.Foreign.Type as Array
|
|
import qualified Streamly.Internal.Data.Array.Foreign.Mut.Type as MArray
|
|
|
|
import Test.Inspection
|
|
#endif
|
|
|
|
moduleName :: String
|
|
moduleName = "Unicode.Stream"
|
|
|
|
-- | Copy file
|
|
{-# NOINLINE copyCodecUtf8ArraysLenient #-}
|
|
copyCodecUtf8ArraysLenient :: Handle -> Handle -> IO ()
|
|
copyCodecUtf8ArraysLenient inh outh =
|
|
Stream.fold (Handle.write outh)
|
|
$ Unicode.encodeUtf8'
|
|
$ Unicode.decodeUtf8Arrays
|
|
$ Handle.getChunks inh
|
|
|
|
#ifdef INSPECTION
|
|
inspect $ hasNoTypeClasses 'copyCodecUtf8ArraysLenient
|
|
-- inspect $ 'copyCodecUtf8ArraysLenient `hasNoType` ''Step
|
|
#endif
|
|
|
|
o_1_space_decode_encode_chunked :: BenchEnv -> [Benchmark]
|
|
o_1_space_decode_encode_chunked env =
|
|
[ bgroup "decode-encode/toChunks"
|
|
[
|
|
mkBenchSmall "encodeUtf8' . decodeUtf8Arrays" env $ \inH outH ->
|
|
copyCodecUtf8ArraysLenient inH outH
|
|
]
|
|
]
|
|
|
|
-------------------------------------------------------------------------------
|
|
-- copy with group/ungroup transformations
|
|
-------------------------------------------------------------------------------
|
|
|
|
{-# NOINLINE linesUnlinesCopy #-}
|
|
linesUnlinesCopy :: Handle -> Handle -> IO ()
|
|
linesUnlinesCopy inh outh =
|
|
Stream.fold (Handle.write outh)
|
|
$ Unicode.encodeLatin1'
|
|
$ Unicode.unlines Unfold.fromList
|
|
$ Stream.splitOnSuffix (== '\n') Fold.toList
|
|
$ Unicode.decodeLatin1
|
|
$ Stream.unfold Handle.read inh
|
|
|
|
{-# NOINLINE linesUnlinesArrayWord8Copy #-}
|
|
linesUnlinesArrayWord8Copy :: Handle -> Handle -> IO ()
|
|
linesUnlinesArrayWord8Copy inh outh =
|
|
Stream.fold (Handle.write outh)
|
|
$ Stream.interposeSuffix 10 Array.read
|
|
$ Stream.splitOnSuffix (== 10) Array.write
|
|
$ Stream.unfold Handle.read inh
|
|
|
|
-- XXX splitSuffixOn requires -funfolding-use-threshold=150 for better fusion
|
|
-- | Lines and unlines
|
|
{-# NOINLINE linesUnlinesArrayCharCopy #-}
|
|
linesUnlinesArrayCharCopy :: Handle -> Handle -> IO ()
|
|
linesUnlinesArrayCharCopy inh outh =
|
|
Stream.fold (Handle.write outh)
|
|
$ Unicode.encodeLatin1'
|
|
$ UnicodeArr.unlines
|
|
$ UnicodeArr.lines
|
|
$ Unicode.decodeLatin1
|
|
$ Stream.unfold Handle.read inh
|
|
|
|
#ifdef INSPECTION
|
|
inspect $ hasNoTypeClassesExcept 'linesUnlinesArrayCharCopy [''Storable]
|
|
-- inspect $ 'linesUnlinesArrayCharCopy `hasNoType` ''Step
|
|
#endif
|
|
|
|
-- XXX to write this we need to be able to map decodeUtf8 on the Array.read fold.
|
|
-- For that we have to write decodeUtf8 as a Pipe.
|
|
{-
|
|
{-# INLINE linesUnlinesArrayUtf8Copy #-}
|
|
linesUnlinesArrayUtf8Copy :: Handle -> Handle -> IO ()
|
|
linesUnlinesArrayUtf8Copy inh outh =
|
|
Stream.fold (Handle.write outh)
|
|
$ Unicode.encodeLatin1'
|
|
$ Stream.intercalate (Array.fromList [10]) (pipe Unicode.decodeUtf8P Array.read)
|
|
$ Stream.splitOnSuffix (== '\n') (IFold.map Unicode.encodeUtf8' Array.write)
|
|
$ Unicode.decodeLatin1
|
|
$ Stream.unfold Handle.read inh
|
|
-}
|
|
|
|
-- | Word, unwords and copy
|
|
{-# NOINLINE wordsUnwordsCopyWord8 #-}
|
|
wordsUnwordsCopyWord8 :: Handle -> Handle -> IO ()
|
|
wordsUnwordsCopyWord8 inh outh =
|
|
Stream.fold (Handle.write outh)
|
|
$ Stream.interposeSuffix 32 Unfold.fromList
|
|
$ Stream.wordsBy isSp Fold.toList
|
|
$ Stream.unfold Handle.read inh
|
|
|
|
#ifdef INSPECTION
|
|
inspect $ hasNoTypeClasses 'wordsUnwordsCopyWord8
|
|
-- inspect $ 'wordsUnwordsCopyWord8 `hasNoType` ''Step
|
|
#endif
|
|
|
|
-- | Word, unwords and copy
|
|
{-# NOINLINE wordsUnwordsCopy #-}
|
|
wordsUnwordsCopy :: Handle -> Handle -> IO ()
|
|
wordsUnwordsCopy inh outh =
|
|
Stream.fold (Handle.write outh)
|
|
$ Unicode.encodeLatin1'
|
|
$ Unicode.unwords Unfold.fromList
|
|
-- XXX This pipeline does not fuse with wordsBy but fuses with splitOn
|
|
-- with -funfolding-use-threshold=300. With wordsBy it does not fuse
|
|
-- even with high limits for inlining and spec-constr ghc options. With
|
|
-- -funfolding-use-threshold=400 it performs pretty well and there
|
|
-- is no evidence in the core that a join point involving Step
|
|
-- constructors is not getting inlined. Not being able to fuse at all in
|
|
-- this case could be an unknown issue, need more investigation.
|
|
$ Stream.wordsBy isSpace Fold.toList
|
|
-- -- $ Stream.splitOn isSpace Fold.toList
|
|
$ Unicode.decodeLatin1
|
|
$ Stream.unfold Handle.read inh
|
|
|
|
#ifdef INSPECTION
|
|
-- inspect $ hasNoTypeClasses 'wordsUnwordsCopy
|
|
-- inspect $ 'wordsUnwordsCopy `hasNoType` ''Step
|
|
#endif
|
|
|
|
{-# NOINLINE wordsUnwordsCharArrayCopy #-}
|
|
wordsUnwordsCharArrayCopy :: Handle -> Handle -> IO ()
|
|
wordsUnwordsCharArrayCopy inh outh =
|
|
Stream.fold (Handle.write outh)
|
|
$ Unicode.encodeLatin1'
|
|
$ UnicodeArr.unwords
|
|
$ UnicodeArr.words
|
|
$ Unicode.decodeLatin1
|
|
$ Stream.unfold Handle.read inh
|
|
|
|
o_1_space_copy_read_group_ungroup :: BenchEnv -> [Benchmark]
|
|
o_1_space_copy_read_group_ungroup env =
|
|
[ bgroup "ungroup-group"
|
|
[ mkBenchSmall "unlines . splitOnSuffix ([Word8])" env
|
|
$ \inh outh -> linesUnlinesCopy inh outh
|
|
, mkBenchSmall "interposeSuffix . splitOnSuffix (Array Word8)" env
|
|
$ \inh outh -> linesUnlinesArrayWord8Copy inh outh
|
|
, mkBenchSmall "UnicodeArr.unlines . UnicodeArr.lines (Array Char)" env
|
|
$ \inh outh -> linesUnlinesArrayCharCopy inh outh
|
|
|
|
, mkBenchSmall "interposeSuffix . wordsBy ([Word8])" env
|
|
$ \inh outh -> wordsUnwordsCopyWord8 inh outh
|
|
, mkBenchSmall "unwords . wordsBy ([Char])" env
|
|
$ \inh outh -> wordsUnwordsCopy inh outh
|
|
, mkBenchSmall "UnicodeArr.unwords . UnicodeArr.words (Array Char)" env
|
|
$ \inh outh -> wordsUnwordsCharArrayCopy inh outh
|
|
]
|
|
]
|
|
|
|
-------------------------------------------------------------------------------
|
|
-- copy unfold
|
|
-------------------------------------------------------------------------------
|
|
|
|
-- | Copy file (encodeLatin1')
|
|
{-# NOINLINE copyStreamLatin1' #-}
|
|
copyStreamLatin1' :: Handle -> Handle -> IO ()
|
|
copyStreamLatin1' inh outh =
|
|
Stream.fold (Handle.write outh)
|
|
$ Unicode.encodeLatin1'
|
|
$ Unicode.decodeLatin1
|
|
$ Stream.unfold Handle.read inh
|
|
|
|
#ifdef INSPECTION
|
|
inspect $ hasNoTypeClasses 'copyStreamLatin1'
|
|
inspect $ 'copyStreamLatin1' `hasNoType` ''Step
|
|
inspect $ 'copyStreamLatin1' `hasNoType` ''Unfold.ConcatState -- Handle.read/UF.many
|
|
inspect $ 'copyStreamLatin1' `hasNoType` ''MArray.ReadUState -- Handle.read/Array.read
|
|
|
|
inspect $ 'copyStreamLatin1' `hasNoType` ''Fold.Step
|
|
inspect $ 'copyStreamLatin1' `hasNoType` ''Array.ArrayUnsafe -- Handle.write/writeNUnsafe
|
|
inspect $ 'copyStreamLatin1' `hasNoType` ''Strict.Tuple3' -- Handle.write/chunksOf
|
|
#endif
|
|
|
|
-- | Copy file (encodeLatin1)
|
|
{-# NOINLINE copyStreamLatin1 #-}
|
|
copyStreamLatin1 :: Handle -> Handle -> IO ()
|
|
copyStreamLatin1 inh outh =
|
|
Stream.fold (Handle.write outh)
|
|
$ Unicode.encodeLatin1
|
|
$ Unicode.decodeLatin1
|
|
$ Stream.unfold Handle.read inh
|
|
|
|
#ifdef INSPECTION
|
|
inspect $ hasNoTypeClasses 'copyStreamLatin1
|
|
inspect $ 'copyStreamLatin1 `hasNoType` ''Step
|
|
inspect $ 'copyStreamLatin1 `hasNoType` ''Unfold.ConcatState -- Handle.read/UF.many
|
|
inspect $ 'copyStreamLatin1 `hasNoType` ''MArray.ReadUState -- Handle.read/Array.read
|
|
|
|
inspect $ 'copyStreamLatin1 `hasNoType` ''Fold.ManyState
|
|
inspect $ 'copyStreamLatin1 `hasNoType` ''Fold.Step
|
|
inspect $ 'copyStreamLatin1 `hasNoType` ''Array.ArrayUnsafe -- Handle.write/writeNUnsafe
|
|
inspect $ 'copyStreamLatin1 `hasNoType` ''Strict.Tuple3' -- Handle.write/chunksOf
|
|
#endif
|
|
|
|
-- | Copy file
|
|
_copyStreamUtf8' :: Handle -> Handle -> IO ()
|
|
_copyStreamUtf8' inh outh =
|
|
Stream.fold (Handle.write outh)
|
|
$ Unicode.encodeUtf8'
|
|
$ Unicode.decodeUtf8'
|
|
$ Stream.unfold Handle.read inh
|
|
|
|
#ifdef INSPECTION
|
|
inspect $ hasNoTypeClasses '_copyStreamUtf8'
|
|
-- inspect $ '_copyStreamUtf8 `hasNoType` ''Step
|
|
-- inspect $ '_copyStreamUtf8 `hasNoType` ''Array.FlattenState
|
|
-- inspect $ '_copyStreamUtf8 `hasNoType` ''D.ConcatMapUState
|
|
#endif
|
|
|
|
-- | Copy file
|
|
{-# NOINLINE copyStreamUtf8 #-}
|
|
copyStreamUtf8 :: Handle -> Handle -> IO ()
|
|
copyStreamUtf8 inh outh =
|
|
Stream.fold (Handle.write outh)
|
|
$ Unicode.encodeUtf8
|
|
$ Unicode.decodeUtf8
|
|
$ Stream.unfold Handle.read inh
|
|
|
|
#ifdef INSPECTION
|
|
inspect $ hasNoTypeClasses 'copyStreamUtf8
|
|
-- inspect $ 'copyStreamUtf8Lax `hasNoType` ''Step
|
|
-- inspect $ 'copyStreamUtf8Lax `hasNoType` ''Array.FlattenState
|
|
-- inspect $ 'copyStreamUtf8Lax `hasNoType` ''D.ConcatMapUState
|
|
#endif
|
|
|
|
{-# NOINLINE _copyStreamUtf8'Fold #-}
|
|
_copyStreamUtf8'Fold :: Handle -> Handle -> IO ()
|
|
_copyStreamUtf8'Fold inh outh =
|
|
Stream.fold (Handle.write outh)
|
|
$ Unicode.encodeUtf8
|
|
$ Stream.foldMany Unicode.writeCharUtf8'
|
|
$ Stream.unfold Handle.read inh
|
|
|
|
{-# NOINLINE _copyStreamUtf8Parser #-}
|
|
_copyStreamUtf8Parser :: Handle -> Handle -> IO ()
|
|
_copyStreamUtf8Parser inh outh =
|
|
Stream.fold (Handle.write outh)
|
|
$ Unicode.encodeUtf8
|
|
$ Stream.parseMany
|
|
(Unicode.parseCharUtf8With Unicode.TransliterateCodingFailure)
|
|
$ Stream.unfold Handle.read inh
|
|
|
|
o_1_space_decode_encode_read :: BenchEnv -> [Benchmark]
|
|
o_1_space_decode_encode_read env =
|
|
[ bgroup "decode-encode"
|
|
[
|
|
-- This needs an ascii file, as decode just errors out.
|
|
mkBench "encodeLatin1' . decodeLatin1" env $ \inh outh ->
|
|
copyStreamLatin1' inh outh
|
|
, mkBench "encodeLatin1 . decodeLatin1" env $ \inh outh ->
|
|
copyStreamLatin1 inh outh
|
|
#ifdef INCLUDE_STRICT_UTF8
|
|
-- Requires valid unicode input
|
|
, mkBench "encodeUtf8' . decodeUtf8'" env $ \inh outh ->
|
|
_copyStreamUtf8' inh outh
|
|
, mkBench "encodeUtf8' . foldMany writeCharUtf8'" env $ \inh outh ->
|
|
_copyStreamUtf8'Fold inh outh
|
|
#endif
|
|
, mkBenchSmall "encodeUtf8 . parseMany parseCharUtf8" env
|
|
$ \inh outh -> _copyStreamUtf8Parser inh outh
|
|
, mkBenchSmall "encodeUtf8 . decodeUtf8" env $ \inh outh ->
|
|
copyStreamUtf8 inh outh
|
|
]
|
|
]
|
|
|
|
main :: IO ()
|
|
main = do
|
|
env <- mkHandleBenchEnv
|
|
defaultMain (allBenchmarks env)
|
|
|
|
where
|
|
|
|
allBenchmarks env =
|
|
[ bgroup (o_1_space_prefix moduleName) $ Prelude.concat
|
|
[ o_1_space_copy_read_group_ungroup env
|
|
, o_1_space_decode_encode_chunked env
|
|
, o_1_space_decode_encode_read env
|
|
]
|
|
]
|