Begin overhaul of siphon

2024-09-11 06:45:41 +03:00 · 2017-06-09 19:28:32 -04:00 · 2017-06-09 19:28:32 -04:00 · 03e9e3734b
commit 03e9e3734b
parent fca7d72085
5 changed files with 660 additions and 132 deletions
--- a/.gitignore
+++ b/.gitignore
@ -25,5 +25,6 @@ tmp/
 tags
 TAGS
 colonnade/ex1.hs
+colonnade/result

 reflex-dom-colonnade/result
--- a/siphon/siphon.cabal
+++ b/siphon/siphon.cabal
@ -1,29 +1,22 @@
-name:                siphon
-version:             0.6
-synopsis:            Encode and decode CSV files
-description:         Please see README.md
-homepage:            https://github.com/andrewthad/colonnade#readme
-license:             BSD3
-license-file:        LICENSE
-author:              Andrew Martin
-maintainer:          andrew.thaddeus@gmail.com
-copyright:           2016 Andrew Martin
-category:            web
-build-type:          Simple
-cabal-version:       >=1.10
+name: siphon
+version: 0.7
+synopsis: Encode and decode CSV files
+description: Please see README.md
+homepage: https://github.com/andrewthad/colonnade#readme
+license: BSD3
+license-file: LICENSE
+author: Andrew Martin
+maintainer: andrew.thaddeus@gmail.com
+copyright: 2016 Andrew Martin
+category: web
+build-type: Simple
+cabal-version: >=1.10

 library
  hs-source-dirs:      src
  exposed-modules:
-    Siphon.Text
-    Siphon.ByteString.Char8
    Siphon
    Siphon.Types
-    Siphon.Content
-    Siphon.Encoding
-    Siphon.Decoding
-    Siphon.Internal
-    Siphon.Internal.Text
  build-depends:
      base >= 4.7 && < 5
    , colonnade >= 1.1 && < 1.2
@ -31,8 +24,9 @@ library
    , bytestring
    , contravariant
    , vector
-    , pipes
+    , streaming
    , attoparsec
+    , transformers
  default-language:    Haskell2010

 test-suite siphon-test
--- a/siphon/src/Siphon.hs
+++ b/siphon/src/Siphon.hs
@ -1,11 +1,595 @@
-module Siphon where
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE RankNTypes #-}

-- encode :: Pipe a (Vector c) m x
-- encode
-- decode :: Pipe (Vector c) a m x
+-- {-# OPTIONS_GHC -Wall -Werr -fno-warn-unused-imports #-}

-- encode ::
+module Siphon
+  ( Siphon
+  , SiphonError
+  , Indexed(..)
+  , decodeHeadedChar8Csv
+  , humanizeSiphonError
+  ) where

-- row :: Vector (Escaped Text) -> Text
-- row = Vector.
+import Siphon.Types
+import Data.Monoid
+import Control.Applicative
+import Control.Monad
+
+import qualified Data.ByteString.Char8 as BC8
+import qualified Data.Attoparsec.ByteString as A
+import qualified Data.Attoparsec.Lazy as AL
+import qualified Data.Attoparsec.Zepto as Z
+import qualified Data.ByteString as S
+import qualified Data.ByteString.Unsafe as S
+import qualified Data.Vector as V
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as LByteString
+import qualified Data.ByteString.Builder as Builder
+import qualified Data.Text as T
+import qualified Data.List as L
+import qualified Streaming as SM
+import qualified Streaming.Prelude as SMP
+import qualified Data.Attoparsec.Types as ATYP
+import qualified Colonnade.Encode as CE
+
+import Control.Monad.Trans.Class
+import Data.ByteString.Builder (toLazyByteString,byteString)
+import Data.Attoparsec.ByteString.Char8 (char, endOfInput, string)
+import Data.Word (Word8)
+import Data.Vector (Vector)
+import Data.ByteString (ByteString)
+import Data.Coerce (coerce)
+import Data.Char (chr)
+import Streaming (Stream,Of(..))
+
+newtype Escaped c = Escaped { getEscaped :: c }
+data Ended = EndedYes | EndedNo
+data CellResult c = CellResultData !c | CellResultNewline !Ended
+
+decodeHeadedChar8Csv :: Monad m 
+  => Siphon CE.Headed ByteString a
+  -> Stream (Of ByteString) m () -- ^ encoded csv
+  -> Stream (Of a) m (Maybe (SiphonError ByteString))
+decodeHeadedChar8Csv headedSiphon s1 = do
+  e <- lift (consumeHeaderRowChar8 s1)
+  case e of
+    Left err -> return (Just err)
+    Right (v :> s2) -> case headedToIndexed v headedSiphon of
+      Left err -> return (Just err)
+      Right ixedSiphon -> do
+        let requiredLength = V.length v
+        consumeBodyChar8 1 requiredLength ixedSiphon s2
+
+encodeHeadedChar8Csv :: Monad m 
+  => Colonnade CE.Headed ByteString a
+  -> Stream (Of a) m r
+  -> Stream (Of ByteString) m r
+encodeHeadedChar8Csv headedSiphon s1 = do
+  yield (header siphon encoding)
+  pipe siphon encoding
+
+encodeGeneralCsv :: Monad m
+  => (c -> Escaped c)
+  -> c -- ^ separator
+  -> Colonnade f a c
+  -> Stream (Of a) m r
+  -> Stream (Of c) m r
+encodeGeneralCsv escapeFunc separatorStr colonnade = do
+  Pipes.map (row siphon encoding)
+
+encodeHeader :: Siphon c -> Colonnade Headed a c -> c
+  => (c -> Escaped c)
+  -> c -- ^ separator
+  -> Colonnade f a c
+  -> Stream (Of c) m r
+encodeHeader escapeFunc separatorStr colonnade = SMP.mapM_ $ \a -> do
+  let (vs,ws) = V.splitAt 1 (CE.getColonnade colonnade)
+  -- we only need to do this split because the first cell
+  -- gets treated differently than the others. It does not
+  -- get a separator added before it.
+  V.forM_ vs $ \(CE.OneColonnade _ encode) -> yield (getEscaped (escapeFunc (encode a)))
+  V.forM_ ws $ \(CE.OneColonnade _ encode) -> do
+    yield separator
+    yeied (getEscaped (escapeFunc (encode a)))
+
+encodeRow :: 
+  => (c -> Escaped c)
+  -> c -- ^ separator
+  -> Colonnade f a c
+  -> Stream (Of a) m r
+  -> Stream (Of c) m r
+encodeRow escapeFunc separatorStr colonnade = SMP.mapM_ $ \a -> do
+  let (vs,ws) = V.splitAt 1 (CE.getColonnade colonnade)
+  -- we only need to do this split because the first cell
+  -- gets treated differently than the others. It does not
+  -- get a separator added before it.
+  V.forM_ vs $ \(CE.OneColonnade _ encode) -> yield (getEscaped (escapeFunc (encode a)))
+  V.forM_ ws $ \(CE.OneColonnade _ encode) -> do
+    yield separator
+    yeied (getEscaped (escapeFunc (encode a)))
+
+data IndexedHeader a = IndexedHeader
+  { indexedHeaderIndexed :: {-# UNPACK #-} !Int
+  , indexedHeaderHeader :: !a
+  } 
+
+-- | Maps over a 'Decolonnade' that expects headers, converting these
+--   expected headers into the indices of the columns that they
+--   correspond to.
+headedToIndexed :: forall c a. Eq c
+  => Vector c -- ^ Headers in the source document
+  -> Siphon CE.Headed c a -- ^ Decolonnade that contains expected headers
+  -> Either (SiphonError c) (Siphon IndexedHeader c a)
+headedToIndexed v =
+    mapLeft (\(HeaderErrors a b c) -> SiphonError 0 (RowErrorHeaders a b c)) 
+  . getEitherWrap
+  . go
+  where
+  go :: forall b.
+        Siphon CE.Headed c b
+     -> EitherWrap (HeaderErrors c) (Siphon IndexedHeader c b)
+  go (SiphonPure b) = EitherWrap (Right (SiphonPure b))
+  go (SiphonAp (CE.Headed h) decode apNext) =
+    let rnext = go apNext
+        ixs = V.elemIndices h v
+        ixsLen = V.length ixs
+        rcurrent
+          | ixsLen == 1 = Right (V.unsafeIndex ixs 0)
+          | ixsLen == 0 = Left (HeaderErrors V.empty (V.singleton h) V.empty)
+          | otherwise   =
+              let dups = V.singleton (V.map (\ix -> CellError ix (V.unsafeIndex v ix)) ixs)
+               in Left (HeaderErrors dups V.empty V.empty)
+    in (\ix nextSiphon -> SiphonAp (IndexedHeader ix h) decode nextSiphon)
+       <$> EitherWrap rcurrent
+       <*> rnext
+
+data HeaderErrors c = HeaderErrors !(Vector (Vector (CellError c))) !(Vector c) !(Vector Int)
+
+instance Monoid (HeaderErrors c) where
+  mempty = HeaderErrors mempty mempty mempty
+  mappend (HeaderErrors a1 b1 c1) (HeaderErrors a2 b2 c2) = HeaderErrors
+    (mappend a1 a2) (mappend b1 b2) (mappend c1 c2)
+
+-- byteStringChar8 :: Siphon ByteString
+-- byteStringChar8 = Siphon
+--   escape
+--   encodeRow
+--   (A.parse (row comma))
+--   B.null
+
+encodeRow :: Vector (Escaped ByteString) -> ByteString
+encodeRow = id
+  . flip B.append (B.singleton newline)
+  . B.intercalate (B.singleton comma)
+  . V.toList
+  . coerce
+
+escape :: ByteString -> Escaped ByteString
+escape t = case B.find (\c -> c == newline || c == cr || c == comma || c == doubleQuote) t of
+  Nothing -> Escaped t
+  Just _  -> escapeAlways t
+
+-- | This implementation is definitely suboptimal.
+-- A better option (which would waste a little space
+-- but would be much faster) would be to build the
+-- new bytestring by writing to a buffer directly.
+escapeAlways :: ByteString -> Escaped ByteString
+escapeAlways t = Escaped $ LByteString.toStrict $ Builder.toLazyByteString $
+     Builder.word8 doubleQuote
+  <> B.foldl
+      (\ acc b -> acc <> if b == doubleQuote
+          then Builder.byteString
+            (B.pack [doubleQuote,doubleQuote])
+          else Builder.word8 b)
+      mempty
+      t
+  <> Builder.word8 doubleQuote
+
+-- | Specialized version of 'sepBy1'' which is faster due to not
+-- accepting an arbitrary separator.
+sepByDelim1' :: AL.Parser a
+             -> Word8  -- ^ Field delimiter
+             -> AL.Parser [a]
+sepByDelim1' p !delim = liftM2' (:) p loop
+  where
+    loop = do
+        mb <- A.peekWord8
+        case mb of
+            Just b | b == delim -> liftM2' (:) (A.anyWord8 *> p) loop
+            _                   -> pure []
+{-# INLINE sepByDelim1' #-}
+
+-- | Parse a record, not including the terminating line separator. The
+-- terminating line separate is not included as the last record in a
+-- CSV file is allowed to not have a terminating line separator. You
+-- most likely want to use the 'endOfLine' parser in combination with
+-- this parser.
+-- row :: Word8  -- ^ Field delimiter
+--     -> AL.Parser (Vector ByteString)
+-- row !delim = rowNoNewline delim <* endOfLine
+-- {-# INLINE row #-}
+-- 
+-- rowNoNewline :: Word8  -- ^ Field delimiter
+--              -> AL.Parser (Vector ByteString)
+-- rowNoNewline !delim = V.fromList <$!> field delim `sepByDelim1'` delim
+-- {-# INLINE rowNoNewline #-}
+-- 
+-- removeBlankLines :: [Vector ByteString] -> [Vector ByteString]
+-- removeBlankLines = filter (not . blankLine)
+
+-- | Parse a field. The field may be in either the escaped or
+--   non-escaped format. The return value is unescaped. This
+--   parser will consume the comma that comes after a field
+--   but not a newline that follows a field. If we are positioned
+--   at a newline when it starts, that newline will be consumed
+--   and we return CellResultNewline.
+field :: Word8 -> AL.Parser (CellResult ByteString)
+field !delim = do
+  mb <- A.peekWord8
+  -- We purposely don't use <|> as we want to commit to the first
+  -- choice if we see a double quote.
+  case mb of
+    Just b
+      | b == delim -> do
+          bs <- escapedField delim
+          return (CellResultData bs)
+      | b == 10 || b == 13 -> do
+          _ <- eatNewlines
+          isEnd <- A.atEnd
+          if isEnd
+            then return (CellResultNewline EndedYes)
+            else return (CellResultNewline EndedNo)
+      | otherwise -> do
+          bs <- unescapedField delim
+          return (CellResultData bs)
+    Nothing -> return (CellResultNewline EndedYes)
+{-# INLINE field #-}
+
+eatNewlines :: AL.Parser S.ByteString
+eatNewlines = A.takeWhile (\x -> x == 10 || x == 13)
+
+escapedField :: Word8 -> AL.Parser S.ByteString
+escapedField !delim = do
+  _ <- dquote
+  -- The scan state is 'True' if the previous character was a double
+  -- quote.  We need to drop a trailing double quote left by scan.
+  s <- S.init <$> (A.scan False $ \s c -> if c == doubleQuote
+                                          then Just (not s)
+                                          else if s then Nothing
+                                               else Just False)
+  A.skip (== delim)
+  if doubleQuote `S.elem` s
+      then case Z.parse unescape s of
+          Right r  -> return r
+          Left err -> fail err
+      else return s
+
+-- | Consume an unescaped field. If it ends with a newline,
+--   leave that in tact. If it ends with a comma, consume the comma.
+unescapedField :: Word8 -> AL.Parser S.ByteString
+unescapedField !delim = 
+  ( A.takeWhile $ \c -> 
+    c /= doubleQuote &&
+    c /= newline &&
+    c /= delim &&
+    c /= cr
+  ) <* A.skip (== delim)
+
+dquote :: AL.Parser Char
+dquote = char '"'
+
+-- | This could be improved. We could avoid the builder and just
+-- write to a buffer directly.
+unescape :: Z.Parser S.ByteString
+unescape = (LByteString.toStrict . toLazyByteString) <$!> go mempty where
+  go acc = do
+    h <- Z.takeWhile (/= doubleQuote)
+    let rest = do
+          start <- Z.take 2
+          if (S.unsafeHead start == doubleQuote &&
+              S.unsafeIndex start 1 == doubleQuote)
+              then go (acc `mappend` byteString h `mappend` byteString (BC8.singleton '"'))
+              else fail "invalid CSV escape sequence"
+    done <- Z.atEnd
+    if done
+      then return (acc `mappend` byteString h)
+      else rest
+
+-- | Is this an empty record (i.e. a blank line)?
+blankLine :: V.Vector B.ByteString -> Bool
+blankLine v = V.length v == 1 && (B.null (V.head v))
+
+-- | A version of 'liftM2' that is strict in the result of its first
+-- action.
+liftM2' :: (Monad m) => (a -> b -> c) -> m a -> m b -> m c
+liftM2' f a b = do
+    !x <- a
+    y <- b
+    return (f x y)
+{-# INLINE liftM2' #-}
+
+
+-- | Match either a single newline character @\'\\n\'@, or a carriage
+-- return followed by a newline character @\"\\r\\n\"@, or a single
+-- carriage return @\'\\r\'@.
+endOfLine :: A.Parser ()
+endOfLine = (A.word8 newline *> return ()) <|> (string (BC8.pack "\r\n") *> return ()) <|> (A.word8 cr *> return ())
+{-# INLINE endOfLine #-}
+
+doubleQuote, newline, cr, comma :: Word8
+doubleQuote = 34
+newline = 10
+cr = 13
+comma = 44
+
+-- | This adds one to the index because text editors consider
+--   line number to be one-based, not zero-based.
+humanizeSiphonError :: Eq c => (c -> String) -> SiphonError c -> String
+humanizeSiphonError toStr (SiphonError ix e) = unlines
+  $ ("Decolonnade error on line " ++ show (ix + 1) ++ " of file.")
+  : ("Error Category: " ++ descr)
+  : map ("  " ++) errDescrs
+  where (descr,errDescrs) = prettyRowError toStr e
+
+prettyRowError :: Eq c => (c -> String) -> RowError c -> (String, [String])
+prettyRowError toStr x = case x of
+  RowErrorParse -> (,) "CSV Parsing"
+    [ "The cells were malformed."
+    ]
+  RowErrorSize reqLen actualLen -> (,) "Row Length"
+    [ "Expected the row to have exactly " ++ show reqLen ++ " cells."
+    , "The row only has " ++ show actualLen ++ " cells."
+    ]
+  RowErrorHeaderSize reqLen actualLen -> (,) "Minimum Header Length"
+    [ "Expected the row to have at least " ++ show reqLen ++ " cells."
+    , "The row only has " ++ show actualLen ++ " cells."
+    ]
+  RowErrorMalformed column -> (,) "Text Decolonnade"
+    [ "Tried to decode input input in column " ++ columnNumToLetters column ++ " text"
+    , "There is a mistake in the encoding of the text."
+    ]
+  RowErrorHeaders dupErrs namedErrs unnamedErrs -> (,) "Missing Headers" $ concat
+    [ if V.length namedErrs > 0 then prettyNamedMissingHeaders toStr namedErrs else []
+    , if V.length unnamedErrs > 0 then ["Missing unnamed headers"] else []
+    , if V.length dupErrs > 0 then prettyHeadingErrors toStr dupErrs else []
+    ]
+  RowErrorDecode errs -> (,) "Cell Decolonnade" (prettyCellErrors toStr errs)
+
+prettyCellErrors :: (c -> String) -> Vector (CellError c) -> [String]
+prettyCellErrors toStr errs = drop 1 $
+  flip concatMap errs $ \(CellError ix content) ->
+    let str = toStr content in
+    [ "-----------"
+    , "Column " ++ columnNumToLetters ix
+    , "Cell Content Length: " ++ show (Prelude.length str)
+    , "Cell Content: " ++ if null str
+        then "[empty cell]"
+        else str
+    ]
+
+prettyNamedMissingHeaders :: (c -> String) -> Vector c -> [String]
+prettyNamedMissingHeaders conv missing = concat
+  [ concatMap (\h -> ["The header " ++ conv h ++ " was missing."]) missing
+  ]
+
+prettyHeadingErrors :: forall c. Eq c
+  => (c -> String) -> Vector (Vector (CellError c)) -> [String]
+prettyHeadingErrors conv missing = join (V.toList (fmap f missing))
+  where
+  f :: Vector (CellError c) -> [String]
+  f v
+    | not (V.null w) && V.all (== V.head w) (V.tail w) =
+        [ "The header ["
+        , conv (V.head w)
+        , "] appears in columns "
+        , L.intercalate ", " (V.toList (V.map (\(CellError ix _) -> columnNumToLetters ix) v))
+        ]
+    | otherwise = multiMsg : V.toList
+        (V.map (\(CellError ix content) -> "  Column " ++ columnNumToLetters ix ++ ": " ++ conv content) v)
+    where
+    w :: Vector c
+    w = V.map cellErrorContent v
+    multiMsg :: String
+    multiMsg = "Multiple headers matched the same predicate:"
+
+columnNumToLetters :: Int -> String
+columnNumToLetters i
+  | i >= 0 && i < 25 = [chr (i + 65)]
+  | otherwise = "Beyond Z. Fix this."
+
+newtype EitherWrap a b = EitherWrap
+  { getEitherWrap :: Either a b
+  } deriving (Functor)
+
+instance Monoid a => Applicative (EitherWrap a) where
+  pure = EitherWrap . Right
+  EitherWrap (Left a1) <*> EitherWrap (Left a2) = EitherWrap (Left (mappend a1 a2))
+  EitherWrap (Left a1) <*> EitherWrap (Right _) = EitherWrap (Left a1)
+  EitherWrap (Right _) <*> EitherWrap (Left a2) = EitherWrap (Left a2)
+  EitherWrap (Right f) <*> EitherWrap (Right b) = EitherWrap (Right (f b))
+
+mapLeft :: (a -> b) -> Either a c -> Either b c
+mapLeft _ (Right a) = Right a
+mapLeft f (Left a) = Left (f a)
+
+consumeHeaderRowChar8 :: Monad m
+  => Stream (Of ByteString) m ()
+  -> m (Either (SiphonError ByteString) (Of (Vector ByteString) (Stream (Of ByteString) m ())))
+consumeHeaderRowChar8 = consumeHeaderRow (A.parse (field comma)) B.null B.empty (\() -> True)
+
+consumeBodyChar8 :: forall m a. Monad m
+  => Int -- ^ index of first row, usually zero or one
+  -> Int -- ^ Required row length
+  -> Siphon IndexedHeader ByteString a
+  -> Stream (Of ByteString) m ()
+  -> Stream (Of a) m (Maybe (SiphonError ByteString))
+consumeBodyChar8 = consumeBody (A.parse (field comma)) B.null B.empty (\() -> True)
+
+consumeHeaderRow :: forall m r c. Monad m
+  => (c -> ATYP.IResult c (CellResult c))
+  -> (c -> Bool) -- ^ true if null string
+  -> c
+  -> (r -> Bool) -- ^ true if termination is acceptable
+  -> Stream (Of c) m r
+  -> m (Either (SiphonError c) (Of (Vector c) (Stream (Of c) m r)))
+consumeHeaderRow parseCell isNull emptyStr isGood s0 = go 0 StrictListNil s0
+  where
+  go :: Int
+     -> StrictList c
+     -> Stream (Of c) m r
+     -> m (Either (SiphonError c) (Of (Vector c) (Stream (Of c) m r)))
+  go !cellsLen !cells !s1 = do
+    e <- skipWhile isNull s1
+    case e of
+      Left r -> return $ if isGood r
+        then Right (reverseVectorStrictList cellsLen cells :> return r)
+        else Left (SiphonError 0 RowErrorParse)
+      Right (c :> s2) -> handleResult cellsLen cells (parseCell c) s2
+  handleResult :: Int -> StrictList c 
+               -> ATYP.IResult c (CellResult c)
+               -> Stream (Of c) m r
+               -> m (Either (SiphonError c) (Of (Vector c) (Stream (Of c) m r)))
+  handleResult !cellsLen !cells !result s1 = case result of
+    ATYP.Fail _ _ _ -> return $ Left $ SiphonError 0 RowErrorParse
+    ATYP.Done !c1 !res -> case res of
+      -- it might be wrong to ignore whether or not the stream has ended
+      CellResultNewline _ -> do
+        let v = reverseVectorStrictList cellsLen cells
+        return (Right (v :> (SMP.yield c1 >> s1)))
+      CellResultData !cd -> if isNull c1
+        then go (cellsLen + 1) (StrictListCons cd cells) s1
+        else handleResult (cellsLen + 1) (StrictListCons cd cells) (parseCell c1) s1
+    ATYP.Partial k -> do
+      e <- skipWhile isNull s1
+      case e of
+        Left r -> handleResult cellsLen cells (k emptyStr) (return r)
+        Right (c1 :> s2) -> handleResult cellsLen cells (k c1) s2
+
+consumeBody :: forall m r c a. Monad m
+  => (c -> ATYP.IResult c (CellResult c))
+  -> (c -> Bool)
+  -> c
+  -> (r -> Bool) -- ^ True if termination is acceptable. False if it is because of a decoding error.
+  -> Int -- ^ index of first row, usually zero or one
+  -> Int -- ^ Required row length
+  -> Siphon IndexedHeader c a
+  -> Stream (Of c) m r
+  -> Stream (Of a) m (Maybe (SiphonError c))
+consumeBody parseCell isNull emptyStr isGood row0 reqLen siphon s0 = go row0 0 StrictListNil s0
+  where
+  go :: Int -> Int -> StrictList c -> Stream (Of c) m r -> Stream (Of a) m (Maybe (SiphonError c))
+  go !row !cellsLen !cells !s1 = do
+    e <- lift (skipWhile isNull s1)
+    case e of
+      Left r -> return $ if isGood r
+        then Nothing
+        else Just (SiphonError row RowErrorParse)
+      Right (c :> s2) -> handleResult row cellsLen cells (parseCell c) s2
+  handleResult :: Int -> Int -> StrictList c 
+               -> ATYP.IResult c (CellResult c)
+               -> Stream (Of c) m r
+               -> Stream (Of a) m (Maybe (SiphonError c))
+  handleResult !row !cellsLen !cells !result s1 = case result of
+    ATYP.Fail _ _ _ -> return $ Just $ SiphonError row RowErrorParse
+    ATYP.Done !c1 !res -> case res of
+      CellResultNewline ended -> do
+        case decodeRow row (reverseVectorStrictList cellsLen cells) of
+          Left err -> return (Just err)
+          Right a -> do
+            SMP.yield a
+            case ended of
+              EndedYes -> do
+                e <- lift (SM.inspect s1)
+                case e of
+                  Left r -> return $ if isGood r
+                    then Nothing
+                    else Just (SiphonError row RowErrorParse)
+                  Right _ -> error "siphon: logical error, stream should be exhausted"
+              EndedNo -> if isNull c1 
+                then go (row + 1) 0 StrictListNil s1
+                else handleResult (row + 1) 0 StrictListNil (parseCell c1) s1
+      CellResultData !cd -> if isNull c1
+        then go row (cellsLen + 1) (StrictListCons cd cells) s1
+        else handleResult row (cellsLen + 1) (StrictListCons cd cells) (parseCell c1) s1
+    ATYP.Partial k -> do
+      e <- lift (skipWhile isNull s1)
+      case e of
+        Left r -> handleResult row cellsLen cells (k emptyStr) (return r)
+        Right (c1 :> s2) -> handleResult row cellsLen cells (k c1) s2
+  decodeRow :: Int -> Vector c -> Either (SiphonError c) a
+  decodeRow rowIx v =
+    let vlen = V.length v in
+    if vlen /= reqLen
+      then Left $ SiphonError rowIx $ RowErrorSize reqLen vlen
+      else uncheckedRunWithRow rowIx siphon v
+
+-- | You must pass the length of the list and as the first argument.
+reverseVectorStrictList :: Int -> StrictList c -> Vector c
+reverseVectorStrictList _ _ = error "write me"
+
+skipWhile :: forall m a r. Monad m
+  => (a -> Bool)
+  -> Stream (Of a) m r
+  -> m (Either r (Of a (Stream (Of a) m r)))
+skipWhile f = go where
+  go :: Stream (Of a) m r
+     -> m (Either r (Of a (Stream (Of a) m r)))
+  go s1 = do
+    e <- SM.inspect s1
+    case e of
+      Left _ -> return e
+      Right (a :> s2) -> if f a
+        then go s2
+        else return e
+
+-- | Strict in the spine and in the values
+data StrictList a = StrictListNil | StrictListCons !a !(StrictList a)
+
+-- | This function uses 'unsafeIndex' to access
+--   elements of the 'Vector'.
+uncheckedRunWithRow ::
+     Int
+  -> Siphon IndexedHeader c a
+  -> Vector c
+  -> Either (SiphonError c) a
+uncheckedRunWithRow i d v = mapLeft (SiphonError i . RowErrorDecode) (uncheckedRun d v)
+
+-- | This function does not check to make sure that the indicies in
+--   the 'Decolonnade' are in the 'Vector'. Only use this if you have
+--   already verified that none of the indices in the siphon are
+--   out of the bounds.
+uncheckedRun :: forall c a.
+     Siphon IndexedHeader c a
+  -> Vector c
+  -> Either (Vector (CellError c)) a
+uncheckedRun dc v = getEitherWrap (go dc)
+  where
+  go :: forall b.
+        Siphon IndexedHeader c b
+     -> EitherWrap (Vector (CellError c)) b
+  go (SiphonPure b) = EitherWrap (Right b)
+  go (SiphonAp (IndexedHeader ix _) decode apNext) =
+    let rnext = go apNext
+        content = V.unsafeIndex v ix
+        rcurrent = maybe
+          (Left (V.singleton (CellError ix content)))
+          Right
+          (decode content)
+    in rnext <*> (EitherWrap rcurrent)
+
+siphonLength :: forall f c a. Siphon f c a -> Int
+siphonLength = go 0 where
+  go :: forall b. Int -> Siphon f c b -> Int
+  go !a (SiphonPure _) = a
+  go !a (SiphonAp _ _ apNext) = go (a + 1) apNext
+
+maxIndex :: forall c a. Siphon IndexedHeader c a -> Int
+maxIndex = go 0 where
+  go :: forall b. Int -> Siphon IndexedHeader c b -> Int
+  go !ix (SiphonPure _) = ix
+  go !ix1 (SiphonAp (IndexedHeader ix2 _) _ apNext) =
+    go (max ix1 ix2) apNext

--- a/siphon/src/Siphon/Types.hs
+++ b/siphon/src/Siphon/Types.hs
@ -2,121 +2,76 @@
 {-# LANGUAGE DeriveFunctor #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}

-module Siphon.Types where
+{-# OPTIONS_GHC -Wall -Werror #-}
+
+module Siphon.Types
+  ( Siphon(..)
+  , Indexed(..)
+  , SiphonError(..)
+  , RowError(..)
+  , CellError(..)
+  ) where

 import Data.Vector (Vector)
 import Control.Exception (Exception)
 import Data.Typeable (Typeable)
-import qualified Data.Vector as Vector
-import qualified Data.Attoparsec.Types as Atto

-newtype Escaped c = Escaped { getEscaped :: c }
-
-data Siphon c = Siphon
-  { siphonEscape      :: !(c -> Escaped c)
-  , siphonIntercalate :: !(Vector (Escaped c) -> c)
-  , siphonParseRow    :: c -> Atto.IResult c (Vector c)
-  , siphonNull        :: c -> Bool
-  }
-
-data DecolonnadeCellError f content = DecolonnadeCellError
-  { decodingCellErrorContent :: !content
-  , decodingCellErrorHeader  :: !(Indexed f content)
-  , decodingCellErrorMessage :: !String
+data CellError c = CellError
+  { cellErrorColumn :: !Int
+  , cellErrorContent :: !c
  } deriving (Show,Read,Eq)

-- instance (Show (f content), Typeable content) => Exception (DecolonnadeError f content)
-
-data Indexed f a = Indexed
-  { indexedIndex :: !Int
-  , indexedHeading :: !(f a)
+newtype Indexed a = Indexed
+  { indexedIndex :: Int
  } deriving (Eq,Ord,Functor,Show,Read)

-newtype DecolonnadeCellErrors f content = DecolonnadeCellErrors
-  { getDecolonnadeCellErrors :: Vector (DecolonnadeCellError f content)
-  } deriving (Monoid,Show,Read,Eq)
-
-- newtype ParseRowError = ParseRowError String
-
-- TODO: rewrite the instances for this by hand. They
-- currently use FlexibleContexts.
-data DecolonnadeRowError f content = DecolonnadeRowError
-  { decodingRowErrorRow   :: !Int
-  , decodingRowErrorError :: !(RowError f content)
+data SiphonError c = SiphonError
+  { siphonErrorRow :: !Int
+  , siphonErrorCause :: !(RowError c)
  } deriving (Show,Read,Eq)

-- TODO: rewrite the instances for this by hand. They
-- currently use FlexibleContexts.
-data RowError f content
-  = RowErrorParse !String -- ^ Error occurred parsing the document into cells
-  | RowErrorDecode !(DecolonnadeCellErrors f content) -- ^ Error decoding the content
-  | RowErrorSize !Int !Int -- ^ Wrong number of cells in the row
-  | RowErrorHeading !(HeadingErrors content)
-  | RowErrorMinSize !Int !Int
-  | RowErrorMalformed !String -- ^ Error decoding unicode content
+instance (Show c, Typeable c) => Exception (SiphonError c)
+
+data RowError c
+  = RowErrorParse
+    -- ^ Error occurred parsing the document into cells
+  | RowErrorDecode !(Vector (CellError c))
+    -- ^ Error decoding the content
+  | RowErrorSize !Int !Int
+    -- ^ Wrong number of cells in the row
+  | RowErrorHeaders !(Vector (Vector (CellError c))) !(Vector c) !(Vector Int)
+    -- ^ Three parts:
+    --   (a) Multiple header cells matched the same expected cell, 
+    --   (b) Headers that were missing, 
+    --   (c) Missing headers that were lambdas. They cannot be
+    --   shown so instead their positions in the 'Siphon' are given.
+  | RowErrorHeaderSize !Int !Int
+    -- ^ Not enough cells in header, expected, actual
+  | RowErrorMalformed !Int
+    -- ^ Error decoding unicode content, column number
  deriving (Show,Read,Eq)

-data HeadingErrors content = HeadingErrors
-  { headingErrorsMissing   :: Vector content       -- ^ headers that were missing
-  , headingErrorsDuplicate :: Vector (content,Int) -- ^ headers that occurred more than once
-  } deriving (Show,Read,Eq)
-
-instance (Show content, Typeable content) => Exception (HeadingErrors content)
-
-instance Monoid (HeadingErrors content) where
-  mempty = HeadingErrors Vector.empty Vector.empty
-  mappend (HeadingErrors a1 b1) (HeadingErrors a2 b2) = HeadingErrors
-    (a1 Vector.++ a2) (b1 Vector.++ b2)
-
 -- | This just actually a specialization of the free applicative.
 --   Check out @Control.Applicative.Free@ in the @free@ library to
 --   learn more about this. The meanings of the fields are documented
 --   slightly more in the source code. Unfortunately, haddock does not
 --   play nicely with GADTs.
-data Decolonnade f content a where
-  DecolonnadePure :: !a -- function
-               -> Decolonnade f content a
-  DecolonnadeAp :: !(f content) -- header
-             -> !(content -> Either String a) -- decoding function
-             -> !(Decolonnade f content (a -> b)) -- next decoding
-             -> Decolonnade f content b
+data Siphon f c a where
+  SiphonPure ::
+       !a -- function
+    -> Siphon f c a
+  SiphonAp ::
+       !(f c) -- header
+    -> !(c -> Maybe a) -- decoding function
+    -> !(Siphon f c (a -> b)) -- next decoding
+    -> Siphon f c b

-instance Functor (Decolonnade f content) where
-  fmap f (DecolonnadePure a) = DecolonnadePure (f a)
-  fmap f (DecolonnadeAp h c apNext) = DecolonnadeAp h c ((f .) <$> apNext)
+instance Functor (Siphon f c) where
+  fmap f (SiphonPure a) = SiphonPure (f a)
+  fmap f (SiphonAp h c apNext) = SiphonAp h c ((f .) <$> apNext)

-instance Applicative (Decolonnade f content) where
-  pure = DecolonnadePure
-  DecolonnadePure f <*> y = fmap f y
-  DecolonnadeAp h c y <*> z = DecolonnadeAp h c (flip <$> y <*> z)
-
-- -- | This type is provided for convenience with @pipes-text@
-- data CsvResult f c
--   = CsvResultSuccess
--   | CsvResultTextDecodeError
--   | CsvResultDecodeError (DecodingRowError f c)
--   deriving (Show,Read,Eq)
-
-
-- | Consider changing out the use of 'Vector' here
-- with the humble list instead. It might fuse away
-- better. Not sure though.
-- data SiphonX c1 c2 = SiphonX
--   { siphonXEscape :: !(c1 -> Escaped c2)
--   , siphonXIntercalate :: !(Vector (Escaped c2) -> c2)
--   }
--
-- data SiphonDecoding c1 c2 = SiphonDecoding
--   { siphonDecodingParse :: c1 -> Atto.IResult c1 (Vector c2)
--   , siphonDecodingNull  :: c1 -> Bool
--   }
-
-- data WithEnd c = WithEnd
--   { withEndEnded :: !Bool
--   , withEndContent :: !c
--   }
-
-- data SiphonDecodingError
--   { clarify
--   }
+instance Applicative (Siphon f c) where
+  pure = SiphonPure
+  SiphonPure f <*> y = fmap f y
+  SiphonAp h c y <*> z = SiphonAp h c (flip <$> y <*> z)

--- a/stack.yaml
+++ b/stack.yaml
@ -38,19 +38,13 @@ resolver: lts-8.0
 packages:
 - 'colonnade'
 - 'yesod-colonnade'
- 'reflex-dom-colonnade'
 - 'blaze-colonnade'
 - 'siphon'
 - 'geolite-csv'
 # Dependency packages to be pulled from upstream that are not in the resolver
 # (e.g., acme-missiles-0.3)
 extra-deps:
- 'reflex-dom-0.3'
- 'ref-tf-0.4'
- 'reflex-0.4.0'
- 'haskell-src-exts-1.16.0.1'
- 'syb-0.5.1'
- 'ip-0.8.4'
+- 'ip-0.9'

 # Override default flag values for local packages and extra-deps
 flags: {}