mirror of
https://github.com/composewell/streamly.git
synced 2024-11-10 12:47:22 +03:00
108 lines
3.8 KiB
Haskell
108 lines
3.8 KiB
Haskell
import Data.Char (ord)
|
|
import System.Environment (getArgs)
|
|
import System.IO (IOMode(..), hSeek, SeekMode(..))
|
|
|
|
import qualified Streamly.Data.Fold as FL
|
|
import qualified Streamly.FileSystem.Handle as FH
|
|
import qualified System.IO as FH
|
|
import qualified Streamly.Memory.Array as A
|
|
import qualified Streamly.Prelude as S
|
|
-- import qualified Streamly.FileSystem.FD as FH
|
|
|
|
import qualified Streamly.Internal.Data.Fold as FL
|
|
import qualified Streamly.Internal.Data.Unicode.Stream as US
|
|
import qualified Streamly.Internal.Memory.ArrayStream as AS
|
|
import qualified Streamly.Internal.Prelude as S
|
|
|
|
-- Read the contents of a file to stdout.
|
|
--
|
|
-- FH.read reads the file in 32KB chunks and converts the chunks into a byte
|
|
-- stream. FH.write takes the byte stream as input, converts it into chunks of
|
|
-- 32KB and writes those chunks to stdout.
|
|
--
|
|
_cat :: FH.Handle -> IO ()
|
|
_cat src = S.fold (FH.write FH.stdout) $ S.unfold FH.read src
|
|
|
|
-- Chunked version, more efficient than the byte stream version above. Reads
|
|
-- the file in 256KB chunks and writes those chunks to stdout.
|
|
cat :: FH.Handle -> IO ()
|
|
cat src =
|
|
S.fold (FH.writeChunks FH.stdout)
|
|
$ S.unfold FH.readChunksWithBufferOf ((256*1024), src)
|
|
|
|
-- Copy a source file to a destination file.
|
|
--
|
|
-- FH.read reads the file in 32KB chunks and converts the chunks into a byte
|
|
-- stream. FH.write takes the byte stream as input, converts it into chunks of
|
|
-- 32KB and writes those chunks to the destination file.
|
|
_cp :: FH.Handle -> FH.Handle -> IO ()
|
|
_cp src dst = S.fold (FH.write dst) $ S.unfold FH.read src
|
|
|
|
-- Chunked version, more efficient than the byte stream version above. Reads
|
|
-- the file in 256KB chunks and writes those chunks to stdout.
|
|
cp :: FH.Handle -> FH.Handle -> IO ()
|
|
cp src dst =
|
|
S.fold (FH.writeChunks dst)
|
|
$ S.unfold FH.readChunksWithBufferOf ((256*1024), src)
|
|
|
|
ord' :: Num a => Char -> a
|
|
ord' = (fromIntegral . ord)
|
|
|
|
-- Count lines like wc -l.
|
|
--
|
|
-- Char stream version. Reads the input as a byte stream, splits it into lines
|
|
-- and counts the lines..
|
|
_wcl :: FH.Handle -> IO ()
|
|
_wcl src = print =<< (S.length
|
|
$ US.lines FL.drain
|
|
$ US.decodeLatin1
|
|
$ S.unfold FH.read src)
|
|
|
|
-- More efficient chunked version. Reads chunks from the input handles and
|
|
-- splits the chunks directly instead of converting them into byte stream
|
|
-- first.
|
|
wcl :: FH.Handle -> IO ()
|
|
wcl src = print =<< (S.length
|
|
$ AS.splitOn 10
|
|
$ S.unfold FH.readChunks src)
|
|
|
|
-- grep -c
|
|
--
|
|
-- count the occurrences of a pattern in a file.
|
|
grepc :: String -> FH.Handle -> IO ()
|
|
grepc pat src = print . (subtract 1) =<< (S.length
|
|
$ S.splitOnSeq (A.fromList (map ord' pat)) FL.drain
|
|
$ S.unfold FH.read src)
|
|
|
|
-- Compute the average line length in a file.
|
|
avgll :: FH.Handle -> IO ()
|
|
avgll src = print =<< (S.fold avg
|
|
$ S.splitWithSuffix (== ord' '\n') FL.length
|
|
$ S.unfold FH.read src)
|
|
where avg = (/) <$> toDouble FL.sum <*> toDouble FL.length
|
|
toDouble = fmap (fromIntegral :: Int -> Double)
|
|
|
|
-- histogram of line lengths in a file
|
|
llhisto :: FH.Handle -> IO ()
|
|
llhisto src = print =<< (S.fold (FL.classify FL.length)
|
|
$ S.map bucket
|
|
$ S.splitWithSuffix (== ord' '\n') FL.length
|
|
$ S.unfold FH.read src)
|
|
where
|
|
bucket n = let i = n `mod` 10 in if i > 9 then (9,n) else (i,n)
|
|
|
|
main :: IO ()
|
|
main = do
|
|
name <- fmap head getArgs
|
|
src <- FH.openFile name ReadMode
|
|
let rewind = hSeek src AbsoluteSeek 0
|
|
|
|
rewind >> putStrLn "cat" >> cat src -- Unix cat program
|
|
rewind >> putStr "wcl " >> wcl src -- Unix wc -l program
|
|
rewind >> putStr "grepc " >> grepc "aaaa" src -- Unix grep -c program
|
|
rewind >> putStr "avgll " >> avgll src -- get average line length
|
|
rewind >> putStr "llhisto " >> llhisto src -- get line length histogram
|
|
|
|
dst <- FH.openFile "dst-xyz.txt" WriteMode
|
|
rewind >> putStr "cp " >> cp src dst -- Unix cp program
|