refactored siphon type and improved testing

This commit is contained in:
Andrew Martin 2016-07-04 21:42:19 -04:00
parent e3c254a82e
commit 3bfd8265bc
8 changed files with 124 additions and 66 deletions

View File

@ -19,6 +19,7 @@ library
Siphon.ByteString.Char8
Siphon
Siphon.Types
Siphon.Content
Siphon.Encoding
Siphon.Decoding
Siphon.Internal
@ -48,6 +49,9 @@ test-suite siphon-test
, QuickCheck
, text
, bytestring
, pipes
, HUnit
, test-framework-hunit
ghc-options: -threaded -rtsopts -with-rtsopts=-N
default-language: Haskell2010

View File

@ -0,0 +1,5 @@
module Siphon.Content
( byteStringChar8
) where
import Siphon.Internal

View File

@ -16,11 +16,6 @@ import qualified Data.Attoparsec.ByteString as AttoByteString
import qualified Data.ByteString.Char8 as ByteString
import qualified Data.Attoparsec.Types as Atto
byteStringChar8 :: SiphonDecoding ByteString ByteString
byteStringChar8 = SiphonDecoding
(AttoByteString.parse (row comma))
ByteString.null
-- unrow :: c1 -> (Vector c2,c1)
--
-- row :: _
@ -47,18 +42,18 @@ mkParseError i ctxs msg = id
-- | This is seldom useful but is included for completeness.
headlessPipe :: Monad m
=> SiphonDecoding c1 c2
-> Decoding Headless c2 a
-> Pipe c1 a m (DecodingRowError Headless c2)
=> Siphon c
-> Decoding Headless c a
-> Pipe c a m (DecodingRowError Headless c)
headlessPipe sd decoding = uncheckedPipe requiredLength 0 sd indexedDecoding Nothing
where
indexedDecoding = Decoding.headlessToIndexed decoding
requiredLength = Decoding.length indexedDecoding
indexedPipe :: Monad m
=> SiphonDecoding c1 c2
-> Decoding (Indexed Headless) c2 a
-> Pipe c1 a m (DecodingRowError Headless c2)
=> Siphon c
-> Decoding (Indexed Headless) c a
-> Pipe c a m (DecodingRowError Headless c)
indexedPipe sd decoding = do
(firstRow, mleftovers) <- consumeGeneral sd mkParseError
let req = Decoding.maxIndex decoding
@ -72,10 +67,10 @@ indexedPipe sd decoding = do
uncheckedPipe vlen 1 sd decoding mleftovers
headedPipe :: (Monad m, Eq c2)
=> SiphonDecoding c1 c2
-> Decoding Headed c2 a
-> Pipe c1 a m (DecodingRowError Headed c2)
headedPipe :: (Monad m, Eq c)
=> Siphon c
-> Decoding Headed c a
-> Pipe c a m (DecodingRowError Headed c)
headedPipe sd decoding = do
(headers, mleftovers) <- consumeGeneral sd mkParseError
case Decoding.headedToIndexed headers decoding of
@ -88,10 +83,10 @@ headedPipe sd decoding = do
uncheckedPipe :: Monad m
=> Int -- ^ expected length of each row
-> Int -- ^ index of first row, usually zero or one
-> SiphonDecoding c1 c2
-> Decoding (Indexed f) c2 a
-> Maybe c1
-> Pipe c1 a m (DecodingRowError f c2)
-> Siphon c
-> Decoding (Indexed f) c a
-> Maybe c
-> Pipe c a m (DecodingRowError f c)
uncheckedPipe requiredLength ix sd d mleftovers =
pipeGeneral ix sd mkParseError checkedRunWithRow mleftovers
where
@ -103,19 +98,19 @@ uncheckedPipe requiredLength ix sd d mleftovers =
else Decoding.uncheckedRunWithRow rowIx d v
consumeGeneral :: Monad m
=> SiphonDecoding c1 c2
=> Siphon c
-> (Int -> [String] -> String -> e)
-> Consumer' c1 m (Vector c2, Maybe c1)
-> Consumer' c m (Vector c, Maybe c)
consumeGeneral = error "ahh"
pipeGeneral :: Monad m
=> Int -- ^ index of first row, usually zero or one
-> SiphonDecoding c1 c2
-> Siphon c
-> (Int -> [String] -> String -> e)
-> (Int -> Vector c2 -> Either e a)
-> Maybe c1 -- ^ leftovers that should be handled first
-> Pipe c1 a m e
pipeGeneral initIx (SiphonDecoding parse isNull) wrapParseError decodeRow mleftovers =
-> (Int -> Vector c -> Either e a)
-> Maybe c -- ^ leftovers that should be handled first
-> Pipe c a m e
pipeGeneral initIx (Siphon _ _ parse isNull) wrapParseError decodeRow mleftovers =
case mleftovers of
Nothing -> go1 initIx
Just leftovers -> handleResult initIx (parse leftovers)

View File

@ -6,24 +6,30 @@ import Pipes (Pipe,yield)
import qualified Pipes.Prelude as Pipes
import qualified Colonnade.Encoding as Encoding
row :: Siphon c1 c2
-> Encoding f c1 a
row :: Siphon c
-> Encoding f c a
-> a
-> c2
row (Siphon escape intercalate) e =
-> c
row (Siphon escape intercalate _ _) e =
intercalate . Encoding.runRow escape e
header :: Siphon c1 c2
-> Encoding Headed c1 a
-> c2
header (Siphon escape intercalate) e =
header :: Siphon c
-> Encoding Headed c a
-> c
header (Siphon escape intercalate _ _) e =
intercalate (Encoding.runHeader escape e)
pipe :: Monad m => Siphon c1 c2 -> Encoding f c1 a -> Pipe a c2 m x
pipe :: Monad m
=> Siphon c
-> Encoding f c a
-> Pipe a c m x
pipe siphon encoding = Pipes.map (row siphon encoding)
pipeWithHeader :: Monad m => Siphon c1 c2 -> Encoding Headed c1 a -> Pipe a c2 m x
pipeWithHeader siphon encoding = do
headedPipe :: Monad m
=> Siphon c
-> Encoding Headed c a
-> Pipe a c m x
headedPipe siphon encoding = do
yield (header siphon encoding)
pipe siphon encoding

View File

@ -29,31 +29,50 @@ import qualified Data.ByteString.Unsafe as S
import qualified Data.Vector as V
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as LByteString
import qualified Data.ByteString.Builder as Builder
import Data.Word (Word8)
import Data.Vector (Vector)
import Data.ByteString (ByteString)
import Data.Coerce (coerce)
import Siphon.Types
import Control.Applicative
import Data.Monoid
-- parse :: Monad m
-- => SiphonDecoding c1 c2
-- -> Atto.Parser a b -- ^ Attoparsec parser
-- -> Pipes.Parser a m (Maybe (Either ParsingError b)) -- ^ Pipes parser
-- parse parser = S.StateT $ \p0 -> do
-- x <- nextSkipEmpty p0
-- case x of
-- Left r -> return (Nothing, return r)
-- Right (a,p1) -> step (yield a >>) (_parse parser a) p1
-- where
-- step diffP res p0 = case res of
-- Fail _ c m -> return (Just (Left (ParsingError c m)), diffP p0)
-- Done a b -> return (Just (Right b), yield a >> p0)
-- Partial k -> do
-- x <- nextSkipEmpty p0
-- case x of
-- Left e -> step diffP (k mempty) (return e)
-- Right (a,p1) -> step (diffP . (yield a >>)) (k a) p1
byteStringChar8 :: Siphon ByteString
byteStringChar8 = Siphon
escape
encodeRow
(A.parse (row comma))
B.null
encodeRow :: Vector (Escaped ByteString) -> ByteString
encodeRow = id
. flip B.append (B.singleton newline)
. B.intercalate (B.singleton comma)
. V.toList
. coerce
escape :: ByteString -> Escaped ByteString
escape t = case B.find (\c -> c == newline || c == comma || c == doubleQuote) t of
Nothing -> Escaped t
Just _ -> escapeAlways t
-- | This implementation is definitely suboptimal.
-- A better option (which would waste a little space
-- but would be much faster) would be to build the
-- new bytestring by writing to a buffer directly.
escapeAlways :: ByteString -> Escaped ByteString
escapeAlways t = Escaped $ LByteString.toStrict $ Builder.toLazyByteString $
Builder.word8 doubleQuote
<> B.foldl
(\ acc b -> acc <> if b == doubleQuote
then Builder.byteString
(B.pack [doubleQuote,doubleQuote])
else Builder.word8 b)
mempty
t
<> Builder.word8 doubleQuote
-- | Specialized version of 'sepBy1'' which is faster due to not
-- accepting an arbitrary separator.

View File

@ -7,12 +7,13 @@ import Data.Coerce (coerce)
import qualified Data.Text as Text
import qualified Data.Vector as Vector
siphon :: Siphon Text Text
siphon :: Siphon Text
siphon = Siphon escape encodeRow
(error "siphon: uhoent") (error "siphon: uheokj")
encodeRow :: Vector (Escaped Text) -> Text
encodeRow = id
. Text.intercalate (Text.pack ",")
. Text.intercalate (Text.singleton ',')
. Vector.toList
. coerce

View File

@ -5,12 +5,19 @@ import qualified Data.Attoparsec.Types as Atto
newtype Escaped c = Escaped { getEscaped :: c }
data Siphon c = Siphon
{ siphonEscape :: !(c -> Escaped c)
, siphonIntercalate :: !(Vector (Escaped c) -> c)
, siphonParseRow :: c -> Atto.IResult c (Vector c)
, siphonNull :: c -> Bool
}
-- | Consider changing out the use of 'Vector' here
-- with the humble list instead. It might fuse away
-- better. Not sure though.
data Siphon c1 c2 = Siphon
{ siphonEscape :: !(c1 -> Escaped c2)
, siphonIntercalate :: !(Vector (Escaped c2) -> c2)
data SiphonX c1 c2 = SiphonX
{ siphonXEscape :: !(c1 -> Escaped c2)
, siphonXIntercalate :: !(Vector (Escaped c2) -> c2)
}
data SiphonDecoding c1 c2 = SiphonDecoding

View File

@ -1,11 +1,16 @@
{-# LANGUAGE OverloadedStrings #-}
module Main (main) where
import Test.QuickCheck (Gen, Arbitrary(..), choose)
import Test.HUnit (Assertion,(@?=))
import Test.Framework (defaultMain, testGroup, Test)
import Test.Framework.Providers.QuickCheck2 (testProperty)
import Test.Framework.Providers.HUnit (testCase)
import Data.ByteString (ByteString)
import Data.Either.Combinators
import Colonnade.Types
import Data.Functor.Identity
import Data.Functor.Contravariant (contramap)
import Data.Functor.Contravariant.Divisible (divided,conquered)
import qualified Data.ByteString.Builder as Builder
@ -14,21 +19,27 @@ import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Char8 as BC8
import qualified Colonnade.Decoding as Decoding
import qualified Colonnade.Encoding as Encoding
import qualified Siphon.Encoding as SE
import qualified Siphon.Decoding as SD
import qualified Siphon.Content as SC
import qualified Pipes.Prelude as Pipes
import Pipes
main :: IO ()
main = defaultMain tests
tests :: [Test]
tests = []
tests =
[ testGroup "ByteString encode/decode"
[ testProperty "Headless Isomorphism (int,char,bool)"
$ propEncodeDecodeIso
(ipv4ToTextNaive)
(ipv4FromTextNaive)
[ testCase "Headless Encoding (int,char,bool)" testEncodingA
, testProperty "Headless Isomorphism (int,char,bool)"
$ propIsoPipe $
(SE.pipe SC.byteStringChar8 encodingA)
>->
(void $ SD.headlessPipe SC.byteStringChar8 decodingA)
]
]
byteStringDecodeInt :: ByteString -> Either String Int
byteStringDecodeInt b = do
(a,bsRem) <- maybe (Left "could not parse int") Right (BC8.readInt b)
@ -78,6 +89,16 @@ encodingA = contramap tripleToPairs
tripleToPairs :: (a,b,c) -> (a,(b,(c,())))
tripleToPairs (a,b,c) = (a,(b,(c,())))
propIsoPipe :: Eq a => Pipe a a Identity () -> [a] -> Bool
propIsoPipe p as = (Pipes.toList $ each as >-> p) == as
testEncodingA :: Assertion
testEncodingA =
( ByteString.concat $ Pipes.toList $
Pipes.yield (4,'c',False) >-> SE.pipe SC.byteStringChar8 encodingA
) @?= "4,c,false\n"
propEncodeDecodeIso :: Eq a => (a -> b) -> (b -> Maybe a) -> a -> Bool
propEncodeDecodeIso f g a = g (f a) == Just a