streamly/examples/HandleIO.hs
2020-01-08 21:03:20 +05:30

108 lines
3.8 KiB
Haskell

import Data.Char (ord)
import System.Environment (getArgs)
import System.IO (IOMode(..), hSeek, SeekMode(..))
import qualified Streamly.Data.Fold as FL
import qualified Streamly.FileSystem.Handle as FH
import qualified System.IO as FH
import qualified Streamly.Memory.Array as A
import qualified Streamly.Prelude as S
-- import qualified Streamly.FileSystem.FD as FH
import qualified Streamly.Internal.Data.Fold as FL
import qualified Streamly.Internal.Data.Unicode.Stream as US
import qualified Streamly.Internal.Memory.ArrayStream as AS
import qualified Streamly.Internal.Prelude as S
-- Read the contents of a file to stdout.
--
-- FH.read reads the file in 32KB chunks and converts the chunks into a byte
-- stream. FH.write takes the byte stream as input, converts it into chunks of
-- 32KB and writes those chunks to stdout.
--
_cat :: FH.Handle -> IO ()
_cat src = S.fold (FH.write FH.stdout) $ S.unfold FH.read src
-- Chunked version, more efficient than the byte stream version above. Reads
-- the file in 256KB chunks and writes those chunks to stdout.
cat :: FH.Handle -> IO ()
cat src =
S.fold (FH.writeChunks FH.stdout)
$ S.unfold FH.readChunksWithBufferOf ((256*1024), src)
-- Copy a source file to a destination file.
--
-- FH.read reads the file in 32KB chunks and converts the chunks into a byte
-- stream. FH.write takes the byte stream as input, converts it into chunks of
-- 32KB and writes those chunks to the destination file.
_cp :: FH.Handle -> FH.Handle -> IO ()
_cp src dst = S.fold (FH.write dst) $ S.unfold FH.read src
-- Chunked version, more efficient than the byte stream version above. Reads
-- the file in 256KB chunks and writes those chunks to stdout.
cp :: FH.Handle -> FH.Handle -> IO ()
cp src dst =
S.fold (FH.writeChunks dst)
$ S.unfold FH.readChunksWithBufferOf ((256*1024), src)
ord' :: Num a => Char -> a
ord' = (fromIntegral . ord)
-- Count lines like wc -l.
--
-- Char stream version. Reads the input as a byte stream, splits it into lines
-- and counts the lines..
_wcl :: FH.Handle -> IO ()
_wcl src = print =<< (S.length
$ US.lines FL.drain
$ US.decodeLatin1
$ S.unfold FH.read src)
-- More efficient chunked version. Reads chunks from the input handles and
-- splits the chunks directly instead of converting them into byte stream
-- first.
wcl :: FH.Handle -> IO ()
wcl src = print =<< (S.length
$ AS.splitOn 10
$ S.unfold FH.readChunks src)
-- grep -c
--
-- count the occurrences of a pattern in a file.
grepc :: String -> FH.Handle -> IO ()
grepc pat src = print . (subtract 1) =<< (S.length
$ S.splitOnSeq (A.fromList (map ord' pat)) FL.drain
$ S.unfold FH.read src)
-- Compute the average line length in a file.
avgll :: FH.Handle -> IO ()
avgll src = print =<< (S.fold avg
$ S.splitWithSuffix (== ord' '\n') FL.length
$ S.unfold FH.read src)
where avg = (/) <$> toDouble FL.sum <*> toDouble FL.length
toDouble = fmap (fromIntegral :: Int -> Double)
-- histogram of line lengths in a file
llhisto :: FH.Handle -> IO ()
llhisto src = print =<< (S.fold (FL.classify FL.length)
$ S.map bucket
$ S.splitWithSuffix (== ord' '\n') FL.length
$ S.unfold FH.read src)
where
bucket n = let i = n `mod` 10 in if i > 9 then (9,n) else (i,n)
main :: IO ()
main = do
name <- fmap head getArgs
src <- FH.openFile name ReadMode
let rewind = hSeek src AbsoluteSeek 0
rewind >> putStrLn "cat" >> cat src -- Unix cat program
rewind >> putStr "wcl " >> wcl src -- Unix wc -l program
rewind >> putStr "grepc " >> grepc "aaaa" src -- Unix grep -c program
rewind >> putStr "avgll " >> avgll src -- get average line length
rewind >> putStr "llhisto " >> llhisto src -- get line length histogram
dst <- FH.openFile "dst-xyz.txt" WriteMode
rewind >> putStr "cp " >> cp src dst -- Unix cp program