Correct handling of CRLF in siphon

This commit is contained in:
Andrew Martin 2021-03-01 16:14:43 -05:00
parent a894241528
commit 3d9fd928bc
2 changed files with 20 additions and 2 deletions

View File

@ -388,7 +388,11 @@ escapedField = do
trailChar <- case mb of
Just b
| b == comma -> A.anyWord8 >> return TrailCharComma
| b == newline || b == cr -> A.anyWord8 >> return TrailCharNewline
| b == newline -> A.anyWord8 >> return TrailCharNewline
| b == cr -> do
_ <- A.anyWord8
_ <- A.word8 newline
return TrailCharNewline
| otherwise -> fail "encountered double quote after escaped field"
Nothing -> return TrailCharEnd
if doubleQuote `S.elem` s
@ -412,7 +416,11 @@ unescapedField !delim = do
case mb of
Just b
| b == comma -> A.anyWord8 >> return (bs,TrailCharComma)
| b == newline || b == cr -> A.anyWord8 >> return (bs,TrailCharNewline)
| b == newline -> A.anyWord8 >> return (bs,TrailCharNewline)
| b == cr -> do
_ <- A.anyWord8
_ <- A.word8 newline
return (bs,TrailCharNewline)
| otherwise -> fail "encountered double quote in unescaped field"
Nothing -> return (bs,TrailCharEnd)

View File

@ -108,6 +108,16 @@ tests =
]
)
) @?= (["drew","martin, drew"] :> Nothing)
, testCase "Headed Decoding (escaped characters, character per chunk, CRLF)"
$ ( runIdentity . SMP.toList )
( S.decodeCsvUtf8 decodingF
( mapM_ (SMP.yield . BC8.singleton) $ concat
[ "name\r\n"
, "drew\r\n"
, "\"martin, drew\"\r\n"
]
)
) @?= (["drew","martin, drew"] :> Nothing)
, testProperty "Headed Isomorphism (int,char,bool)"
$ propIsoStream BC8.unpack
(S.decodeCsvUtf8 decodingB)