improve csv decoding

This commit is contained in:
Andrew Martin 2016-07-02 20:34:35 -04:00
parent b8da6c0fab
commit 45de414367
4 changed files with 151 additions and 36 deletions

View File

@ -1,5 +1,6 @@
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE BangPatterns #-}
module Colonnade.Decoding where
import Colonnade.Internal (EitherWrap(..))
@ -24,24 +25,47 @@ headless f = DecodingAp Headless f (DecodingPure id)
headed :: content -> (content -> Either String a) -> Decoding Headed content a
headed h f = DecodingAp (Headed h) f (DecodingPure id)
-- | This function uses 'unsafeIndex' to access
-- elements of the 'Vector'.
uncheckedRunWithRow ::
Int
-> Decoding (Indexed f) content a
-> Vector content
-> Either (DecodingRowError f content) a
uncheckedRunWithRow i d v = mapLeft (DecodingRowError i . RowErrorDecode) (uncheckedRun d v)
-- | This function does not check to make sure that the indicies in
-- the 'Decoding' are in the 'Vector'.
uncheckedRun :: forall content a f.
Vector content
-> Decoding (Indexed f) content a
-> Either (DecodingErrors f content) a
uncheckedRun v = getEitherWrap . go
Decoding (Indexed f) content a
-> Vector content
-> Either (DecodingCellErrors f content) a
uncheckedRun dc v = getEitherWrap (go dc)
where
go :: forall b.
Decoding (Indexed f) content b
-> EitherWrap (DecodingErrors f content) b
-> EitherWrap (DecodingCellErrors f content) b
go (DecodingPure b) = EitherWrap (Right b)
go (DecodingAp ixed@(Indexed ix h) decode apNext) =
let rnext = go apNext
content = Vector.unsafeIndex v ix
rcurrent = mapLeft (DecodingErrors . Vector.singleton . DecodingError content ixed) (decode content)
rcurrent = mapLeft (DecodingCellErrors . Vector.singleton . DecodingCellError content ixed) (decode content)
in rnext <*> (EitherWrap rcurrent)
headlessToIndexed :: forall c a.
Decoding Headless c a -> Decoding (Indexed Headless) c a
headlessToIndexed = go 0 where
go :: forall b. Int -> Decoding Headless c b -> Decoding (Indexed Headless) c b
go !ix (DecodingPure a) = DecodingPure a
go !ix (DecodingAp Headless decode apNext) =
DecodingAp (Indexed ix Headless) decode (go (ix + 1) apNext)
length :: forall f c a. Decoding f c a -> Int
length = go 0 where
go :: forall b. Int -> Decoding f c b -> Int
go !a (DecodingPure _) = a
go !a (DecodingAp _ _ apNext) = go (a + 1) apNext
-- | Maps over a 'Decoding' that expects headers, converting these
-- expected headers into the indices of the columns that they
-- correspond to.

View File

@ -9,8 +9,10 @@ module Colonnade.Types
, Headless(..)
, Indexed(..)
, HeadingErrors(..)
, DecodingError(..)
, DecodingErrors(..)
, DecodingCellError(..)
, DecodingRowError(..)
, DecodingCellErrors(..)
, RowError(..)
) where
import Data.Vector (Vector)
@ -29,8 +31,8 @@ data Headless a = Headless
deriving (Eq,Ord,Functor,Show,Read)
data Indexed f a = Indexed
{ indexedIndex :: Int
, indexedHeading :: f a
{ indexedIndex :: !Int
, indexedHeading :: !(f a)
} deriving (Eq,Ord,Functor,Show,Read)
data HeadingErrors content = HeadingErrors
@ -45,18 +47,31 @@ instance Monoid (HeadingErrors content) where
mappend (HeadingErrors a1 b1) (HeadingErrors a2 b2) = HeadingErrors
(a1 Vector.++ a2) (b1 Vector.++ b2)
data DecodingError f content = DecodingError
{ decodingErrorContent :: content
, decodingErrorHeader :: Indexed f content
, decodingErrorMessage :: String
data DecodingCellError f content = DecodingCellError
{ decodingCellErrorContent :: !content
, decodingCellErrorHeader :: !(Indexed f content)
, decodingCellErrorMessage :: !String
} deriving (Show,Read)
-- instance (Show (f content), Typeable content) => Exception (DecodingError f content)
newtype DecodingErrors f content = DecodingErrors
{ getDecodingErrors :: Vector (DecodingError f content)
newtype DecodingCellErrors f content = DecodingCellErrors
{ getDecodingCellErrors :: Vector (DecodingCellError f content)
} deriving (Monoid,Show,Read)
-- newtype ParseRowError = ParseRowError String
data DecodingRowError f content = DecodingRowError
{ decodingRowErrorRow :: !Int
, decodingRowErrorError :: !(RowError f content)
}
data RowError f content
= RowErrorParse !String -- ^ Error occurred parsing the document into cells
| RowErrorDecode !(DecodingCellErrors f content) -- ^ Error decoding the content
| RowErrorSize !Int !Int -- ^ Wrong number of cells in the row
| RowErrorHeading !(HeadingErrors content)
-- instance (Show (f content), Typeable content) => Exception (DecodingErrors f content)
instance Contravariant Headless where

View File

@ -1,13 +1,17 @@
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE BangPatterns #-}
module Siphon.Decoding where
import Siphon.Types
import Colonnade.Types
import Siphon.Internal (row,comma)
import Data.Text (Text)
import Data.ByteString (ByteString)
import Pipes (yield,Pipe,Consumer',Producer,await)
import Data.Vector (Vector)
import qualified Data.Vector as Vector
import qualified Colonnade.Decoding as Decoding
import qualified Data.Attoparsec.ByteString as AttoByteString
import qualified Data.ByteString.Char8 as ByteString
import qualified Data.Attoparsec.Types as Atto
@ -24,24 +28,96 @@ byteStringChar8 = SiphonDecoding
-- -> Vector c
-- -> Either DecodingErrors a
pipe :: Monad m
=> SiphonDecoding c1 c2
-> Atto.Parser c1 (WithEnd c2)
-> Pipe c1 (Vector c2) m String
pipe (SiphonDecoding parse isNull) p = go1 where
go1 = do
-- decodeVectorPipe ::
-- Monad m
-- => Decoding (Indexed f) c a
-- -> Pipe (Vector c) a m ()
-- decodeVectorPipe
mkParseError :: Int -> [String] -> String -> DecodingRowError f content
mkParseError i ctxs msg = id
$ DecodingRowError i
$ RowErrorParse $ concat
[ "Contexts: ["
, concat ctxs
, "], Error Message: ["
, msg
, "]"
]
headlessPipe :: Monad m
=> SiphonDecoding c1 c2
-> Decoding Headless c2 a
-> Pipe c1 a m (DecodingRowError Headless c2)
headlessPipe sd decoding = uncheckedPipe requiredLength 0 sd indexedDecoding Nothing
where
indexedDecoding = Decoding.headlessToIndexed decoding
requiredLength = Decoding.length indexedDecoding
headedPipe :: (Monad m, Eq c2)
=> SiphonDecoding c1 c2
-> Decoding Headed c2 a
-> Pipe c1 a m (DecodingRowError Headed c2)
headedPipe sd decoding = do
(headers, mleftovers) <- consumeGeneral sd mkParseError
case Decoding.headedToIndexed headers decoding of
Left headingErrs -> return (DecodingRowError 0 (RowErrorHeading headingErrs))
Right indexedDecoding ->
let requiredLength = Decoding.length indexedDecoding
in uncheckedPipe requiredLength 1 sd indexedDecoding mleftovers
uncheckedPipe :: Monad m
=> Int -- ^ expected length of each row
-> Int -- ^ index of first row, usually zero or one
-> SiphonDecoding c1 c2
-> Decoding (Indexed f) c2 a
-> Maybe c1
-> Pipe c1 a m (DecodingRowError f c2)
uncheckedPipe requiredLength ix sd d mleftovers =
pipeGeneral ix sd mkParseError checkedRunWithRow mleftovers
where
checkedRunWithRow rowIx v =
let vlen = Vector.length v in
if vlen /= requiredLength
then Left $ DecodingRowError rowIx
$ RowErrorSize requiredLength vlen
else Decoding.uncheckedRunWithRow rowIx d v
consumeGeneral :: Monad m
=> SiphonDecoding c1 c2
-> (Int -> [String] -> String -> e)
-> Consumer' c1 m (Vector c2, Maybe c1)
consumeGeneral = error "ahh"
pipeGeneral :: Monad m
=> Int -- ^ index of first row, usually zero or one
-> SiphonDecoding c1 c2
-> (Int -> [String] -> String -> e)
-> (Int -> Vector c2 -> Either e a)
-> Maybe c1 -- ^ leftovers that should be handled first
-> Pipe c1 a m e
pipeGeneral initIx (SiphonDecoding parse isNull) wrapParseError decodeRow mleftovers =
case mleftovers of
Nothing -> go1 initIx
Just leftovers -> handleResult initIx (parse leftovers)
where
go1 !ix = do
c1 <- awaitSkip isNull
handleResult (parse c1)
go2 c1 = handleResult (parse c1)
go3 k = do
handleResult ix (parse c1)
go2 !ix c1 = handleResult ix (parse c1)
go3 !ix k = do
c1 <- awaitSkip isNull
handleResult (k c1)
handleResult r = case r of
Atto.Fail _ _ _ -> error "ahh"
handleResult ix (k c1)
handleResult !ix r = case r of
Atto.Fail _ ctxs msg -> return $ wrapParseError ix ctxs msg
Atto.Done c1 v -> do
yield v
if isNull c1 then go1 else go2 c1
Atto.Partial k -> go3 k
case decodeRow ix v of
Left err -> return err
Right r -> do
yield r
if isNull c1 then go1 ix else go2 ix c1
Atto.Partial k -> go3 ix k
awaitSkip :: Monad m
=> (a -> Bool)

View File

@ -18,10 +18,10 @@ data SiphonDecoding c1 c2 = SiphonDecoding
, siphonDecodingNull :: c1 -> Bool
}
data WithEnd c = WithEnd
{ withEndEnded :: Bool
, withEndContent :: c
}
-- data WithEnd c = WithEnd
-- { withEndEnded :: !Bool
-- , withEndContent :: !c
-- }
-- data SiphonDecodingError
-- { clarify