Fix the backtrack buffer management for fromParserK

This commit results in worse performance because now we are double
buffering once in ParserD and once in ParserK. This can potentially be
fixed but would require bigger changes to unify the backtracking buffer
management for ParserD and ParserK.
This commit is contained in:
Harendra Kumar 2020-07-22 20:46:49 +05:30
parent 8a37cd8b9d
commit ec97cec4d2
6 changed files with 358 additions and 325 deletions

View File

@ -62,18 +62,18 @@ benchIOSink value name f =
-------------------------------------------------------------------------------
#ifdef FROM_PARSERK
#define PARSE_OP (IP.parseD . PRD.fromParserK)
#define PARSE_OP (IP.parseD . PR.fromParserK)
#else
#define PARSE_OP IP.parseK
#endif
{-# INLINE satisfy #-}
satisfy :: MonadCatch m => (a -> Bool) -> PR.Parser m a a
satisfy = PRD.toParserK . PRD.satisfy
satisfy = PR.toParserK . PRD.satisfy
{-# INLINE any #-}
any :: MonadCatch m => (a -> Bool) -> PR.Parser m a Bool
any = PRD.toParserK . PRD.any
any = PR.toParserK . PRD.any
{-# INLINE anyK #-}
anyK :: (MonadCatch m, Ord a) => a -> SerialT m a -> m Bool
@ -81,7 +81,7 @@ anyK value = PARSE_OP (any (> value))
{-# INLINE all #-}
all :: MonadCatch m => (a -> Bool) -> PR.Parser m a Bool
all = PRD.toParserK . PRD.all
all = PR.toParserK . PRD.all
{-# INLINE allK #-}
allK :: (MonadCatch m, Ord a) => a -> SerialT m a -> m Bool
@ -89,7 +89,7 @@ allK value = PARSE_OP (all (<= value))
{-# INLINE take #-}
take :: MonadCatch m => Int -> PR.Parser m a ()
take value = PRD.toParserK $ PRD.take value FL.drain
take value = PR.toParserK $ PRD.take value FL.drain
{-# INLINE takeK #-}
takeK :: MonadCatch m => Int -> SerialT m a -> m ()
@ -97,7 +97,7 @@ takeK value = PARSE_OP (take value)
{-# INLINE takeWhile #-}
takeWhile :: MonadCatch m => (a -> Bool) -> PR.Parser m a ()
takeWhile p = PRD.toParserK $ PRD.takeWhile p FL.drain
takeWhile p = PR.toParserK $ PRD.takeWhile p FL.drain
{-# INLINE takeWhileK #-}
takeWhileK :: MonadCatch m => Int -> SerialT m Int -> m ()

View File

@ -70,6 +70,12 @@ bench_rts_opts_specific () {
Data.Parser/o-1-space/some) echo -n "-K8M" ;;
Data.Parser.ParserD/o-1-space/manyTill) echo -n "-K4M" ;;
Data.Parser/o-1-space/manyTill) echo -n "-K4M" ;;
Data.Parser/o-n-heap/manyAlt) echo -n "-K4M -M128M" ;;
Data.Parser/o-n-heap/someAlt) echo -n "-K4M -M128M" ;;
Data.Parser.ParserK/o-n-heap/manyAlt) echo -n "-K4M -M128M" ;;
Data.Parser.ParserK/o-n-heap/someAlt) echo -n "-K4M -M128M" ;;
Data.Parser.ParserK/o-n-heap/sequence) echo -n "-M64M";;
Data.Parser.ParserK/o-n-heap/sequenceA) echo -n "-M64M";;
Data.SmallArray/o-1-sp*) echo -n "-K128K" ;;
*) echo -n "" ;;

View File

@ -219,7 +219,7 @@ import qualified Streamly.Internal.Data.Parser.ParserK.Types as K
--
{-# INLINE fromFold #-}
fromFold :: MonadCatch m => Fold m a b -> Parser m a b
fromFold = D.toParserK . D.fromFold
fromFold = K.toParserK . D.fromFold
-------------------------------------------------------------------------------
-- Terminating but not failing folds
@ -231,7 +231,7 @@ fromFold = D.toParserK . D.fromFold
--
{-# INLINE any #-}
any :: MonadCatch m => (a -> Bool) -> Parser m a Bool
any = D.toParserK . D.any
any = K.toParserK . D.any
-- |
-- >>> S.parse (PR.all (== 0)) $ S.fromList [1,0,1]
@ -239,7 +239,7 @@ any = D.toParserK . D.any
--
{-# INLINE all #-}
all :: MonadCatch m => (a -> Bool) -> Parser m a Bool
all = D.toParserK . D.all
all = K.toParserK . D.all
-- This is the dual of stream "yield".
--
@ -249,9 +249,9 @@ all = D.toParserK . D.all
--
{-# INLINE [3] yield #-}
yield :: MonadCatch m => b -> Parser m a b
yield = D.toParserK . D.yield
yield = K.toParserK . D.yield
{-# RULES "yield fallback to CPS" [2]
forall a. D.toParserK (D.yield a) = K.yield a #-}
forall a. K.toParserK (D.yield a) = K.yield a #-}
-- This is the dual of stream "yieldM".
--
@ -262,7 +262,7 @@ yield = D.toParserK . D.yield
--
{-# INLINE yieldM #-}
yieldM :: MonadCatch m => m b -> Parser m a b
yieldM = D.toParserK . D.yieldM
yieldM = K.toParserK . D.yieldM
-- This is the dual of "nil".
--
@ -273,9 +273,9 @@ yieldM = D.toParserK . D.yieldM
--
{-# INLINE [3] die #-}
die :: MonadCatch m => String -> Parser m a b
die = D.toParserK . D.die
die = K.toParserK . D.die
{-# RULES "die fallback to CPS" [2]
forall a. D.toParserK (D.die a) = K.die a #-}
forall a. K.toParserK (D.die a) = K.die a #-}
-- This is the dual of "nilM".
--
@ -286,7 +286,7 @@ die = D.toParserK . D.die
--
{-# INLINE dieM #-}
dieM :: MonadCatch m => m String -> Parser m a b
dieM = D.toParserK . D.dieM
dieM = K.toParserK . D.dieM
-------------------------------------------------------------------------------
-- Failing Parsers
@ -306,7 +306,7 @@ dieM = D.toParserK . D.dieM
--
{-# INLINE peek #-}
peek :: MonadCatch m => Parser m a a
peek = D.toParserK D.peek
peek = K.toParserK D.peek
-- | Succeeds if we are at the end of input, fails otherwise.
--
@ -317,7 +317,7 @@ peek = D.toParserK D.peek
--
{-# INLINE eof #-}
eof :: MonadCatch m => Parser m a ()
eof = D.toParserK D.eof
eof = K.toParserK D.eof
-- | Returns the next element if it passes the predicate, fails otherwise.
--
@ -328,7 +328,7 @@ eof = D.toParserK D.eof
--
{-# INLINE satisfy #-}
satisfy :: MonadCatch m => (a -> Bool) -> Parser m a a
satisfy = D.toParserK . D.satisfy
satisfy = K.toParserK . D.satisfy
-------------------------------------------------------------------------------
-- Taking elements
@ -355,7 +355,7 @@ satisfy = D.toParserK . D.satisfy
{-# INLINE takeBetween #-}
takeBetween :: -- MonadCatch m =>
Int -> Int -> Fold m a b -> Parser m a b
takeBetween _m _n = undefined -- D.toParserK . D.takeBetween m n
takeBetween _m _n = undefined -- K.toParserK . D.takeBetween m n
--
-- $take
@ -384,7 +384,7 @@ takeBetween _m _n = undefined -- D.toParserK . D.takeBetween m n
--
{-# INLINE take #-}
take :: MonadCatch m => Int -> Fold m a b -> Parser m a b
take n = D.toParserK . D.take n
take n = K.toParserK . D.take n
-- | Stops after taking exactly @n@ input elements.
--
@ -398,7 +398,7 @@ take n = D.toParserK . D.take n
--
{-# INLINE takeEQ #-}
takeEQ :: MonadCatch m => Int -> Fold m a b -> Parser m a b
takeEQ n = D.toParserK . D.takeEQ n
takeEQ n = K.toParserK . D.takeEQ n
-- | Take at least @n@ input elements, but can collect more.
--
@ -415,7 +415,7 @@ takeEQ n = D.toParserK . D.takeEQ n
--
{-# INLINE takeGE #-}
takeGE :: MonadCatch m => Int -> Fold m a b -> Parser m a b
takeGE n = D.toParserK . D.takeGE n
takeGE n = K.toParserK . D.takeGE n
-- $takeWhile
-- Note: This is called @takeWhileP@ and @munch@ in some parser libraries.
@ -439,7 +439,7 @@ takeGE n = D.toParserK . D.takeGE n
{-# INLINE takeWhileP #-}
takeWhileP :: -- MonadCatch m =>
(a -> Bool) -> Parser m a b -> Parser m a b
takeWhileP _cond = undefined -- D.toParserK . D.takeWhileP cond
takeWhileP _cond = undefined -- K.toParserK . D.takeWhileP cond
-- | Collect stream elements until an element fails the predicate. The element
-- on which the predicate fails is returned back to the input stream.
@ -460,7 +460,7 @@ takeWhileP _cond = undefined -- D.toParserK . D.takeWhileP cond
--
{-# INLINE takeWhile #-}
takeWhile :: MonadCatch m => (a -> Bool) -> Fold m a b -> Parser m a b
takeWhile cond = D.toParserK . D.takeWhile cond
takeWhile cond = K.toParserK . D.takeWhile cond
-- | Like 'takeWhile' but takes at least one element otherwise fails.
--
@ -468,7 +468,7 @@ takeWhile cond = D.toParserK . D.takeWhile cond
--
{-# INLINE takeWhile1 #-}
takeWhile1 :: MonadCatch m => (a -> Bool) -> Fold m a b -> Parser m a b
takeWhile1 cond = D.toParserK . D.takeWhile1 cond
takeWhile1 cond = K.toParserK . D.takeWhile1 cond
-- | Like 'sliceSepBy' but uses a 'Parser' instead of a 'Fold' to collect the
-- input. @sliceSepByP cond parser@ parses a slice of the input using @parser@
@ -487,7 +487,7 @@ takeWhile1 cond = D.toParserK . D.takeWhile1 cond
{-# INLINABLE sliceSepByP #-}
sliceSepByP :: -- MonadCatch m =>
(a -> Bool) -> Parser m a b -> Parser m a b
sliceSepByP _cond = undefined -- D.toParserK . D.sliceSepByP cond
sliceSepByP _cond = undefined -- K.toParserK . D.sliceSepByP cond
-- Note: Keep this consistent with S.splitOn. In fact we should eliminate
-- S.splitOn in favor of the parser.
@ -536,7 +536,7 @@ sliceSepByP _cond = undefined -- D.toParserK . D.sliceSepByP cond
-- /Internal/
{-# INLINABLE sliceSepBy #-}
sliceSepBy :: MonadCatch m => (a -> Bool) -> Fold m a b -> Parser m a b
sliceSepBy cond = D.toParserK . D.sliceSepBy cond
sliceSepBy cond = K.toParserK . D.sliceSepBy cond
-- | Like 'sliceSepBy' but does not drop the separator element, instead
-- separator is emitted as a separate element in the output.
@ -545,7 +545,7 @@ sliceSepBy cond = D.toParserK . D.sliceSepBy cond
{-# INLINABLE sliceSepWith #-}
sliceSepWith :: -- MonadCatch m =>
(a -> Bool) -> Fold m a b -> Parser m a b
sliceSepWith _cond = undefined -- D.toParserK . D.sliceSepBy cond
sliceSepWith _cond = undefined -- K.toParserK . D.sliceSepBy cond
-- | Collect stream elements until an element succeeds the predicate. Also take
-- the element on which the predicate succeeded. The succeeding element is
@ -619,7 +619,7 @@ sliceBeginWith = undefined
{-# INLINABLE sliceSepByMax #-}
sliceSepByMax :: MonadCatch m
=> (a -> Bool) -> Int -> Fold m a b -> Parser m a b
sliceSepByMax cond cnt = D.toParserK . D.sliceSepByMax cond cnt
sliceSepByMax cond cnt = K.toParserK . D.sliceSepByMax cond cnt
-- | Like 'sliceSepBy' but the separator elements can be escaped using an
-- escape char determined by the second predicate.
@ -629,7 +629,7 @@ sliceSepByMax cond cnt = D.toParserK . D.sliceSepByMax cond cnt
escapedSliceSepBy :: -- MonadCatch m =>
(a -> Bool) -> (a -> Bool) -> Fold m a b -> Parser m a b
escapedSliceSepBy _cond _esc = undefined
-- D.toParserK . D.escapedSliceSepBy cond esc
-- K.toParserK . D.escapedSliceSepBy cond esc
-- | @escapedFrameBy begin end escape@ parses a string framed using @begin@ and
-- @end@ as the frame begin and end marker elements and @escape@ as an escaping
@ -655,7 +655,7 @@ escapedSliceSepBy _cond _esc = undefined
escapedFrameBy :: -- MonadCatch m =>
(a -> Bool) -> (a -> Bool) -> (a -> Bool) -> Fold m a b -> Parser m a b
escapedFrameBy _begin _end _escape _p = undefined
-- D.toParserK . D.frameBy begin end escape p
-- K.toParserK . D.frameBy begin end escape p
-- | Like 'splitOn' but strips leading, trailing, and repeated separators.
-- Therefore, @".a..b."@ having '.' as the separator would be parsed as
@ -710,7 +710,7 @@ groupBy = undefined
--
{-# INLINE eqBy #-}
eqBy :: MonadCatch m => (a -> a -> Bool) -> [a] -> Parser m a ()
eqBy cmp = D.toParserK . D.eqBy cmp
eqBy cmp = K.toParserK . D.eqBy cmp
-------------------------------------------------------------------------------
-- nested parsers
@ -742,7 +742,7 @@ eqBy cmp = D.toParserK . D.eqBy cmp
splitWith :: MonadCatch m
=> (a -> b -> c) -> Parser m x a -> Parser m x b -> Parser m x c
splitWith f p1 p2 =
D.toParserK $ D.splitWith f (D.fromParserK p1) (D.fromParserK p2)
K.toParserK $ D.splitWith f (K.fromParserK p1) (K.fromParserK p2)
-- | Sequential parser application ignoring the output of the first parser.
-- Apply two parsers sequentially to an input stream. The input is provided to
@ -764,7 +764,7 @@ splitWith f p1 p2 =
--
{-# INLINE split_ #-}
split_ :: MonadCatch m => Parser m x a -> Parser m x b -> Parser m x b
split_ p1 p2 = D.toParserK $ D.split_ (D.fromParserK p1) (D.fromParserK p2)
split_ p1 p2 = K.toParserK $ D.split_ (K.fromParserK p1) (K.fromParserK p2)
-- | @teeWith f p1 p2@ distributes its input to both @p1@ and @p2@ until both
-- of them succeed or anyone of them fails and combines their output using @f@.
@ -776,7 +776,7 @@ split_ p1 p2 = D.toParserK $ D.split_ (D.fromParserK p1) (D.fromParserK p2)
teeWith :: MonadCatch m
=> (a -> b -> c) -> Parser m x a -> Parser m x b -> Parser m x c
teeWith f p1 p2 =
D.toParserK $ D.teeWith f (D.fromParserK p1) (D.fromParserK p2)
K.toParserK $ D.teeWith f (K.fromParserK p1) (K.fromParserK p2)
-- | Like 'teeWith' but ends parsing and zips the results, if available,
-- whenever the first parser ends.
@ -787,7 +787,7 @@ teeWith f p1 p2 =
teeWithFst :: MonadCatch m
=> (a -> b -> c) -> Parser m x a -> Parser m x b -> Parser m x c
teeWithFst f p1 p2 =
D.toParserK $ D.teeWithFst f (D.fromParserK p1) (D.fromParserK p2)
K.toParserK $ D.teeWithFst f (K.fromParserK p1) (K.fromParserK p2)
-- | Like 'teeWith' but ends parsing and zips the results, if available,
-- whenever any of the parsers ends or fails.
@ -798,7 +798,7 @@ teeWithFst f p1 p2 =
teeWithMin :: MonadCatch m
=> (a -> b -> c) -> Parser m x a -> Parser m x b -> Parser m x c
teeWithMin f p1 p2 =
D.toParserK $ D.teeWithMin f (D.fromParserK p1) (D.fromParserK p2)
K.toParserK $ D.teeWithMin f (K.fromParserK p1) (K.fromParserK p2)
-- | Sequential alternative. Apply the input to the first parser and return the
-- result if the parser succeeds. If the first parser fails then backtrack and
@ -818,7 +818,7 @@ teeWithMin f p1 p2 =
--
{-# INLINE alt #-}
alt :: MonadCatch m => Parser m x a -> Parser m x a -> Parser m x a
alt p1 p2 = D.toParserK $ D.alt (D.fromParserK p1) (D.fromParserK p2)
alt p1 p2 = K.toParserK $ D.alt (K.fromParserK p1) (K.fromParserK p2)
-- | Shortest alternative. Apply both parsers in parallel but choose the result
-- from the one which consumed least input i.e. take the shortest succeeding
@ -829,7 +829,7 @@ alt p1 p2 = D.toParserK $ D.alt (D.fromParserK p1) (D.fromParserK p2)
{-# INLINE shortest #-}
shortest :: MonadCatch m
=> Parser m x a -> Parser m x a -> Parser m x a
shortest p1 p2 = D.toParserK $ D.shortest (D.fromParserK p1) (D.fromParserK p2)
shortest p1 p2 = K.toParserK $ D.shortest (K.fromParserK p1) (K.fromParserK p2)
-- | Longest alternative. Apply both parsers in parallel but choose the result
-- from the one which consumed more input i.e. take the longest succeeding
@ -840,7 +840,7 @@ shortest p1 p2 = D.toParserK $ D.shortest (D.fromParserK p1) (D.fromParserK p2)
{-# INLINE longest #-}
longest :: MonadCatch m
=> Parser m x a -> Parser m x a -> Parser m x a
longest p1 p2 = D.toParserK $ D.longest (D.fromParserK p1) (D.fromParserK p2)
longest p1 p2 = K.toParserK $ D.longest (K.fromParserK p1) (K.fromParserK p2)
-- | Run a parser without consuming the input.
--
@ -848,7 +848,7 @@ longest p1 p2 = D.toParserK $ D.longest (D.fromParserK p1) (D.fromParserK p2)
--
{-# INLINE lookAhead #-}
lookAhead :: MonadCatch m => Parser m a b -> Parser m a b
lookAhead p = D.toParserK $ D.lookAhead $ D.fromParserK p
lookAhead p = K.toParserK $ D.lookAhead $ K.fromParserK p
-------------------------------------------------------------------------------
-- Interleaving
@ -915,7 +915,7 @@ sequence _f _p = undefined
{-# INLINE concatMap #-}
concatMap :: MonadCatch m
=> (b -> Parser m a c) -> Parser m a b -> Parser m a c
concatMap f p = D.toParserK $ D.concatMap (D.fromParserK . f) (D.fromParserK p)
concatMap f p = K.toParserK $ D.concatMap (K.fromParserK . f) (K.fromParserK p)
-------------------------------------------------------------------------------
-- Alternative Collection
@ -951,7 +951,7 @@ choice _ps = undefined
{-# INLINE manyP #-}
manyP :: -- MonadCatch m =>
Parser m b c -> Parser m a b -> Parser m a c
manyP _f _p = undefined -- D.toParserK $ D.manyP f (D.fromParserK p)
manyP _f _p = undefined -- K.toParserK $ D.manyP f (K.fromParserK p)
-- | Collect zero or more parses. Apply the parser repeatedly on the input
-- stream, stop when the parser fails, accumulate zero or more parse results
@ -964,7 +964,7 @@ manyP _f _p = undefined -- D.toParserK $ D.manyP f (D.fromParserK p)
--
{-# INLINE many #-}
many :: MonadCatch m => Fold m b c -> Parser m a b -> Parser m a c
many f p = D.toParserK $ D.many f (D.fromParserK p)
many f p = K.toParserK $ D.many f (K.fromParserK p)
-- many = countBetween 0 maxBound
-- | Collect one or more parses. Apply the supplied parser repeatedly on the
@ -978,7 +978,7 @@ many f p = D.toParserK $ D.many f (D.fromParserK p)
--
{-# INLINE some #-}
some :: MonadCatch m => Fold m b c -> Parser m a b -> Parser m a c
some f p = D.toParserK $ D.some f (D.fromParserK p)
some f p = K.toParserK $ D.some f (K.fromParserK p)
-- some f p = manyP (takeGE 1 f) p
-- many = countBetween 1 maxBound
@ -1023,7 +1023,7 @@ count n = countBetween n n
manyTillP :: -- MonadCatch m =>
Parser m b c -> Parser m a b -> Parser m a x -> Parser m a c
manyTillP _f _p1 _p2 = undefined
-- D.toParserK $ D.manyTillP f (D.fromParserK p1) (D.fromParserK p2)
-- K.toParserK $ D.manyTillP f (K.fromParserK p1) (K.fromParserK p2)
-- | @manyTill f collect test@ tries the parser @test@ on the input, if @test@
-- fails it backtracks and tries @collect@, after @collect@ succeeds @test@ is
@ -1037,7 +1037,7 @@ manyTillP _f _p1 _p2 = undefined
manyTill :: MonadCatch m
=> Fold m b c -> Parser m a b -> Parser m a x -> Parser m a c
manyTill f p1 p2 =
D.toParserK $ D.manyTill f (D.fromParserK p1) (D.fromParserK p2)
K.toParserK $ D.manyTill f (K.fromParserK p1) (K.fromParserK p2)
-- | @manyThen f collect recover@ repeats the parser @collect@ on the input and
-- collects the output in the supplied fold. If the the parser @collect@ fails,

View File

@ -19,8 +19,6 @@ module Streamly.Internal.Data.Parser.ParserD
-- First order parsers
-- * Accumulators
, fromFold
, toParserK
, fromParserK
, any
, all
, yield
@ -160,14 +158,9 @@ where
import Control.Exception (assert)
import Control.Monad.Catch (MonadCatch, MonadThrow(..))
import Prelude
hiding (any, all, take, takeWhile, sequence, concatMap)
import Prelude hiding (any, all, take, takeWhile, sequence, concatMap)
import Streamly.Internal.Data.Fold.Types (Fold(..))
import qualified Streamly.Internal.Data.Parser.ParserK.Types as K
import qualified Streamly.Internal.Data.Zipper as Z
import Streamly.Internal.Data.Parser.ParserD.Tee
import Streamly.Internal.Data.Parser.ParserD.Types
import Streamly.Internal.Data.Strict
@ -188,75 +181,6 @@ fromFold (Fold fstep finitial fextract) = Parser step finitial fextract
step s a = Partial 0 <$> fstep s a
-------------------------------------------------------------------------------
-- Convert to and from CPS style parser representation
-------------------------------------------------------------------------------
-- | Convert a direct style 'Parser' to a CPS style 'K.Parser'.
--
-- /Internal/
--
{-# INLINE_LATE toParserK #-}
toParserK :: MonadCatch m => Parser m a b -> K.Parser m a b
toParserK (Parser step initial extract) =
K.MkParser $ K.parse step initial extract
-- XXX The CPS style parsers use a zipper buffering the data, if a parserD is
-- driving the parserK then it would also be buffering the same data. For
-- ParserD, instead of maintaining the buffer in the common driver, each parser
-- can have its own buffering and we can return the unconsumed buffer in the
-- end. That way the zipper is maintained by the parser. If the parser fails
-- then it has to return all of the input. It is anyway maintained by
-- intermediate level parsers in a composition, so the only difference would be
-- that even the leaf levels parsers would do it. If we abstract the zipper
-- maintainance then it may not be too unwieldy.
--
-- | Convert a CPS style 'K.Parser' to a direct style 'Parser'.
--
-- /Unimplemented/
--
{-# INLINE_LATE fromParserK #-}
fromParserK :: MonadThrow m => K.Parser m a b -> Parser m a b
fromParserK parser = Parser step initial extract
where
initial = return Nothing
step Nothing a = do
let yieldk _ (K.Done b) = return $ K.Stop b
yieldk _ (K.Error e) = return $ K.Failed e
r <- K.runParser parser (Z.Zipper [] [] [a]) yieldk
case r of
-- XXX it should return the leftover count
K.Stop b -> return $ Done 0 b
K.Failed e -> return $ Error e
-- XXX when it pauses it may not have the result available, so it
-- is not necessarily Partial. It could be Skip instead.
-- Always returning Skip may lead to buffer accumulation.
-- XXX In case of Partial the backtrack count is always 0 because
-- ParserK would never ask to backtrack. Even for Skip it would
-- always be zero. The only place where it may need the ParserD
-- buffer is the final "Done" event where it can say there is a
-- leftover.
K.Partial cont -> return $ Partial 0 $ Just cont
step (Just cont) a = do
r <- cont (Just a)
case r of
K.Stop b -> return $ Done 0 b
K.Failed e -> return $ Error e
K.Partial cont1 -> return $ Partial 0 $ Just cont1
extract Nothing = throwM $ ParseError "end of input"
extract (Just cont) = K.extractParse cont
#ifndef DISABLE_FUSION
{-# RULES "fromParserK/toParserK fusion" [2]
forall s. toParserK (fromParserK s) = s #-}
{-# RULES "toParserK/fromParserK fusion" [2]
forall s. fromParserK (toParserK s) = s #-}
#endif
-------------------------------------------------------------------------------
-- Terminating but not failing folds
-------------------------------------------------------------------------------

View File

@ -20,15 +20,13 @@
module Streamly.Internal.Data.Parser.ParserK.Types
(
Parse (..)
, Driver (..)
, Parser (..)
, extractParse
Parser (..)
, yield
, die
-- * Parsing
, parse
-- * Conversion
, toParserK
, fromParserK
)
where
@ -46,7 +44,7 @@ import Streamly.Internal.Data.Zipper (Zipper (..))
import Prelude hiding (splitAt)
import qualified Streamly.Internal.Data.Zipper as Z
import qualified Streamly.Internal.Data.Parser.ParserD.Types as PD
import qualified Streamly.Internal.Data.Parser.ParserD.Types as D
-- | The parse driver result. The driver may stop with a final result, pause
-- with a continuation to resume, or fail with an error.
@ -54,15 +52,17 @@ import qualified Streamly.Internal.Data.Parser.ParserD.Types as PD
-- /Internal/
--
data Driver m a r =
Stop r
Stop !Int r
-- XXX we can use a "resume" and a "stop" continuations instead of Maybe.
-- measure if that works any better.
| Partial (Maybe a -> m (Driver m a r))
| Continue (Maybe a -> m (Driver m a r))
| Failed String
instance Functor m => Functor (Driver m a) where
fmap f (Stop r) = Stop (f r)
fmap f (Stop n r) = Stop n (f r)
fmap f (Partial yld) = Partial (fmap (fmap f) . yld)
fmap f (Continue yld) = Continue (fmap (fmap f) . yld)
fmap _ (Failed e) = Failed e
-- The parser's result.
@ -89,6 +89,220 @@ newtype Parser m a b = MkParser
-> m (Driver m a r)
}
-------------------------------------------------------------------------------
-- Convert direct style 'D.Parser' to CPS style 'Parser'
-------------------------------------------------------------------------------
-- Inlined definition. Without the inline "serially/parser/take" benchmark
-- degrades and splitParse does not fuse. Even using "inline" at the callsite
-- does not help.
{-# INLINE splitAt #-}
splitAt :: Int -> [a] -> ([a],[a])
splitAt n ls
| n <= 0 = ([], ls)
| otherwise = splitAt' n ls
where
splitAt' :: Int -> [a] -> ([a], [a])
splitAt' _ [] = ([], [])
splitAt' 1 (x:xs) = ([x], xs)
splitAt' m (x:xs) = (x:xs', xs'')
where
(xs', xs'') = splitAt' (m - 1) xs
-- XXX Unlike the direct style folds/parsers, the initial action in CPS parsers
-- is not performed when the fold is initialized. It is performed when the
-- first element is processed by the fold or if no elements are processed then
-- at the extraction. We should either make the direct folds like this or make
-- the CPS folds behavior also like the direct ones.
--
-- | Parse a stream zipper using a direct style parser's @step@, @initial@ and
-- @extract@ functions.
--
{-# INLINE_NORMAL parse #-}
parse
:: MonadCatch m
=> (s -> a -> m (D.Step s b))
-> m s
-> (s -> m b)
-> Zipper a
-> (Zipper a -> Parse b -> m (Driver m a r))
-> m (Driver m a r)
-- The case when no checkpoints exist
parse pstep initial extract (Zipper [] backward forward) cont =
case forward of
[] -> return $ Continue (parseCont backward initial)
_ -> goBuf backward forward initial
where
parseCont back acc (Just x) = goSingle back x acc
parseCont back acc Nothing = do
pst <- acc
r <- try $ extract pst
-- NOTE: Any data held in the backtrack buffer is considered as unused
-- data if we extract the parser result without the parser finishing.
let z = Zipper [] back []
case r of
Left (e :: D.ParseError) ->
cont z (Error (displayException e))
Right b -> cont z (Done b)
{-# INLINE goSingle #-}
goSingle back x !pst = do
r <- pst
pRes <- pstep r x
case pRes of
D.Partial 0 pst1 -> return $ Partial (parseCont [] (return pst1))
D.Partial n pst1 -> do
assert (n <= length (x:back)) (return ())
let src0 = Prelude.take n (x:back)
src = Prelude.reverse src0
goBuf [] src (return pst1)
D.Continue 0 pst1 ->
return $ Continue (parseCont (x:back) (return pst1))
D.Continue n pst1 -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0
goBuf buf1 src (return pst1)
D.Done n b -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0
cont (Zipper [] buf1 src) (Done b)
D.Error err -> cont (Zipper [] (x:back) []) (Error err)
goBuf back [] !acc = return $ Continue (parseCont back acc)
goBuf back (x:xs) !pst = do
r <- pst
pRes <- pstep r x
case pRes of
D.Partial 0 pst1 ->
goBuf [] xs (return pst1)
D.Partial n pst1 -> do
assert (n <= length (x:back)) (return ())
let src0 = Prelude.take n (x:back)
src = Prelude.reverse src0
goBuf [] (src ++ xs) (return pst1)
D.Continue 0 pst1 -> goBuf (x:back) xs (return pst1)
D.Continue n pst1 -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0 ++ xs
goBuf buf1 src (return pst1)
D.Done n b -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0 ++ xs
cont (Zipper [] buf1 src) (Done b)
D.Error err -> cont (Zipper [] (x:back) xs) (Error err)
-- The case when checkpoints exist
-- XXX code duplication alert!
parse pstep initial extract (Zipper (cp:cps) backward forward) cont =
case forward of
[] -> return $ Continue (parseCont 0 backward initial)
_ -> goBuf 0 backward forward initial
where
parseCont cnt back acc (Just x) = goSingle cnt back x acc
parseCont cnt back acc Nothing = do
pst <- acc
r <- try $ extract pst
let z = Zipper (cp + cnt : cps) back []
case r of
Left (e :: D.ParseError) ->
cont z (Error (displayException e))
Right b -> cont z (Done b)
{-# INLINE goSingle #-}
goSingle cnt back x !pst = do
r <- pst
pRes <- pstep r x
let cnt1 = cnt + 1
case pRes of
D.Partial 0 pst1 ->
return $ Partial (parseCont cnt1 [] (return pst1))
D.Partial n pst1 -> do
assert (n <= length (x:back)) (return ())
let src0 = Prelude.take n (x:back)
src = Prelude.reverse src0
goBuf (cnt1 - n) [] src (return pst1)
D.Continue 0 pst1 ->
return $ Continue (parseCont cnt1 (x:back) (return pst1))
D.Continue n pst1 -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0
assert (cnt1 - n >= 0) (return ())
goBuf (cnt1 - n) buf1 src (return pst1)
D.Done n b -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0
assert (cp + cnt1 - n >= 0) (return ())
cont (Zipper (cp + cnt1 - n : cps) buf1 src) (Done b)
D.Error err ->
cont (Zipper (cp + cnt1 : cps) (x:back) []) (Error err)
goBuf cnt back [] !acc = return $ Continue (parseCont cnt back acc)
goBuf cnt back (x:xs) !pst = do
r <- pst
pRes <- pstep r x
let cnt1 = cnt + 1
case pRes of
D.Partial 0 pst1 ->
goBuf cnt1 [] xs (return pst1)
D.Partial n pst1 -> do
assert (n <= length (x:back)) (return ())
let src0 = Prelude.take n (x:back)
src = Prelude.reverse src0
goBuf (cnt1 - n) [] (src ++ xs) (return pst1)
D.Continue 0 pst1 -> goBuf cnt1 (x:back) xs (return pst1)
D.Continue n pst1 -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0 ++ xs
assert (cnt1 - n >= 0) (return ())
goBuf (cnt1 - n) buf1 src (return pst1)
D.Done n b -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0 ++ xs
assert (cp + cnt1 - n >= 0) (return ())
cont (Zipper (cp + cnt1 - n : cps) buf1 src) (Done b)
D.Error err ->
cont (Zipper (cp + cnt1 : cps) (x:back) xs) (Error err)
-- | Convert a direct style 'Parser' to a CPS style 'K.Parser'.
--
-- /Internal/
--
{-# INLINE_LATE toParserK #-}
toParserK :: MonadCatch m => D.Parser m a b -> Parser m a b
toParserK (D.Parser step initial extract) =
MkParser $ parse step initial extract
-------------------------------------------------------------------------------
-- Convert CPS style 'Parser' to direct style 'D.Parser'
-------------------------------------------------------------------------------
-- | A continuation to extract the result when a CPS parser is done.
{-# INLINE parserDone #-}
parserDone :: Monad m => Zipper a -> Parse b -> m (Driver m a b)
parserDone (Z.Zipper [] bwd fwd) (Done b) =
-- When the parser is Done, we consider the data in the
-- backtracking buffer as well as in the forward buffer as
-- unconsumed input.
-- XXX Will it be more efficient/worth it to maintain a count
-- in the Zipper?
return $ Stop (length bwd + length fwd) b
parserDone (Z.Zipper cps _ _) (Done _) =
error $ "Bug: fromParserK: when checkpoints exist: " ++ show cps
parserDone _ (Error e) = return $ Failed e
-- | When there is no more input to feed, extract the result from the Parser.
--
-- /Internal/
@ -97,9 +311,66 @@ extractParse :: MonadThrow m => (Maybe a -> m (Driver m a b)) -> m b
extractParse cont = do
r <- cont Nothing
case r of
Stop b -> return b
Stop _ b -> return b
Partial _ -> error "Bug: extractParse got Partial"
Failed e -> throwM $ PD.ParseError e
Continue _ -> error "Bug: extractParse got Continue"
Failed e -> throwM $ D.ParseError e
-- XXX The CPS style parsers use a zipper buffering the data, if a parserD is
-- driving the parserK then it would also be buffering the same data. For
-- ParserD, instead of maintaining the buffer in the common driver, each parser
-- can have its own buffering and we can return the unconsumed buffer in the
-- end. That way the zipper is maintained by the parser. If the parser fails
-- then it has to return all of the input. It is anyway maintained by
-- intermediate level parsers in a composition, so the only difference would be
-- that even the leaf levels parsers would do it. If we abstract the zipper
-- maintainance then it may not be too unwieldy.
data FromParserK b e c = FPKDone !b | FPKError !e | FPKCont c
-- | Convert a CPS style 'Parser' to a direct style 'D.Parser'.
--
-- /Internal/
--
{-# INLINE_LATE fromParserK #-}
fromParserK :: MonadThrow m => Parser m a b -> D.Parser m a b
fromParserK parser = D.Parser step initial extract
where
initial = do
r <- runParser parser Z.nil parserDone
return $ case r of
Stop 0 b -> FPKDone b
Stop _ _ -> error "Bug: fromParserK: Stop n in initial"
Failed e -> FPKError e
Partial cont -> FPKCont cont -- XXX can we get this?
Continue cont -> FPKCont cont
step (FPKDone b) _ = return $ D.Done 1 b
step (FPKError e) _ = return $ D.Error e
step (FPKCont cont) a = do
r <- cont (Just a)
return $ case r of
Stop n b -> D.Done n b
Failed e -> D.Error e
Partial cont1 -> D.Partial 0 (FPKCont cont1)
Continue cont1 -> D.Continue 0 (FPKCont cont1)
extract (FPKDone b) = return b
extract (FPKError e) = throwM $ D.ParseError e
extract (FPKCont cont) = extractParse cont
#ifndef DISABLE_FUSION
{-# RULES "fromParserK/toParserK fusion" [2]
forall s. toParserK (fromParserK s) = s #-}
{-# RULES "toParserK/fromParserK fusion" [2]
forall s. fromParserK (toParserK s) = s #-}
#endif
-------------------------------------------------------------------------------
-- Functor
-------------------------------------------------------------------------------
-- | Maps a function over the output of the parser.
--
@ -109,7 +380,13 @@ instance Functor m => Functor (Parser m a) where
let yld z res = yieldk z (fmap f res)
in runParser parser zipper yld
-- | See 'Streamly.Internal.Data.Parser.yield'.
-------------------------------------------------------------------------------
-- Sequential applicative
-------------------------------------------------------------------------------
-- This is the dual of stream "yield".
--
-- | A parser that always yields a pure value without consuming any input.
--
-- /Internal/
--
@ -117,10 +394,6 @@ instance Functor m => Functor (Parser m a) where
yield :: b -> Parser m a b
yield b = MkParser (\zipper yieldk -> yieldk zipper (Done b))
-------------------------------------------------------------------------------
-- Sequential applicative
-------------------------------------------------------------------------------
-- | 'Applicative' form of 'Streamly.Internal.Data.Parser.splitWith'. Note that
-- this operation does not fuse, use 'Streamly.Internal.Data.Parser.splitWith'
-- when fusion is important.
@ -145,7 +418,14 @@ instance Monad m => Applicative (Parser m a) where
yield1 z (Error e) = yieldk z (Error e)
in runParser m1 zipper yield1
-- | See 'Streamly.Internal.Data.Parser.die'.
-------------------------------------------------------------------------------
-- Monad
-------------------------------------------------------------------------------
-- This is the dual of "nil".
--
-- | A parser that always fails with an error message without consuming
-- any input.
--
-- /Internal/
--
@ -219,6 +499,10 @@ instance Monad m => Fail.MonadFail (Parser m a) where
fail = die
#endif
-------------------------------------------------------------------------------
-- Alternative
-------------------------------------------------------------------------------
-- XXX one way to backtrack is to ask the input driver to create and release
-- checkpoints, like we do in the zipper. The other way is to pass around the
-- zipper itself. Currently in the direct style code we manage the checkpoints
@ -279,184 +563,3 @@ instance Monad m => MonadPlus (Parser m a) where
{-# INLINE mplus #-}
mplus = (<|>)
-------------------------------------------------------------------------------
-- Parse driver for a Zipper
-------------------------------------------------------------------------------
-- Inlined definition. Without the inline "serially/parser/take" benchmark
-- degrades and splitParse does not fuse. Even using "inline" at the callsite
-- does not help.
{-# INLINE splitAt #-}
splitAt :: Int -> [a] -> ([a],[a])
splitAt n ls
| n <= 0 = ([], ls)
| otherwise = splitAt' n ls
where
splitAt' :: Int -> [a] -> ([a], [a])
splitAt' _ [] = ([], [])
splitAt' 1 (x:xs) = ([x], xs)
splitAt' m (x:xs) = (x:xs', xs'')
where
(xs', xs'') = splitAt' (m - 1) xs
-- | Parse a stream zipper using a direct style parser's @step@, @initial@ and
-- @extract@ functions.
--
{-# INLINE_NORMAL parse #-}
parse
:: MonadCatch m
=> (s -> a -> m (PD.Step s b))
-> m s
-> (s -> m b)
-> Zipper a
-> (Zipper a -> Parse b -> m (Driver m a r))
-> m (Driver m a r)
-- The case when no checkpoints exist
parse pstep initial extract (Zipper [] backward forward) cont =
case forward of
[] -> return $ Partial (parseCont backward initial)
_ -> goBuf backward forward initial
where
parseCont back acc (Just x) = goSingle back x acc
parseCont back acc Nothing = do
pst <- acc
r <- try $ extract pst
let z = Zipper [] back []
case r of
Left (e :: PD.ParseError) ->
cont z (Error (displayException e))
Right b -> cont z (Done b)
{-# INLINE goSingle #-}
goSingle back x !pst = do
r <- pst
pRes <- pstep r x
case pRes of
-- XXX return Partial pause
PD.Partial 0 pst1 -> return $ Partial (parseCont [] (return pst1))
PD.Partial n pst1 -> do
assert (n <= length (x:back)) (return ())
let src0 = Prelude.take n (x:back)
src = Prelude.reverse src0
goBuf [] src (return pst1)
PD.Continue 0 pst1 ->
return $ Partial (parseCont (x:back) (return pst1))
PD.Continue n pst1 -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0
goBuf buf1 src (return pst1)
PD.Done n b -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0
cont (Zipper [] buf1 src) (Done b)
PD.Error err -> cont (Zipper [] (x:back) []) (Error err)
goBuf back [] !acc = return $ Partial (parseCont back acc)
goBuf back (x:xs) !pst = do
r <- pst
pRes <- pstep r x
case pRes of
PD.Partial 0 pst1 ->
goBuf [] xs (return pst1)
PD.Partial n pst1 -> do
assert (n <= length (x:back)) (return ())
let src0 = Prelude.take n (x:back)
src = Prelude.reverse src0
goBuf [] (src ++ xs) (return pst1)
PD.Continue 0 pst1 -> goBuf (x:back) xs (return pst1)
PD.Continue n pst1 -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0 ++ xs
goBuf buf1 src (return pst1)
PD.Done n b -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0 ++ xs
cont (Zipper [] buf1 src) (Done b)
PD.Error err -> cont (Zipper [] (x:back) xs) (Error err)
-- The case when checkpoints exist
-- XXX code duplication alert!
parse pstep initial extract (Zipper (cp:cps) backward forward) cont =
case forward of
[] -> return $ Partial (parseCont 0 backward initial)
_ -> goBuf 0 backward forward initial
where
parseCont cnt back acc (Just x) = goSingle cnt back x acc
parseCont _ back acc Nothing = do
pst <- acc
r <- try $ extract pst
let z = Zipper [] back []
case r of
Left (e :: PD.ParseError) ->
cont z (Error (displayException e))
Right b -> cont z (Done b)
{-# INLINE goSingle #-}
goSingle cnt back x !pst = do
r <- pst
pRes <- pstep r x
let cnt1 = cnt + 1
case pRes of
-- XXX return Partial pause
PD.Partial 0 pst1 ->
return $ Partial (parseCont cnt1 [] (return pst1))
PD.Partial n pst1 -> do
assert (n <= length (x:back)) (return ())
let src0 = Prelude.take n (x:back)
src = Prelude.reverse src0
goBuf (cnt1 - n) [] src (return pst1)
PD.Continue 0 pst1 ->
return $ Partial (parseCont cnt1 (x:back) (return pst1))
PD.Continue n pst1 -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0
assert (cnt1 - n >= 0) (return ())
goBuf (cnt1 - n) buf1 src (return pst1)
PD.Done n b -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0
assert (cp + cnt1 - n >= 0) (return ())
cont (Zipper (cp + cnt1 - n : cps) buf1 src) (Done b)
PD.Error err ->
cont (Zipper (cp + cnt1 : cps) (x:back) []) (Error err)
goBuf cnt back [] !acc = return $ Partial (parseCont cnt back acc)
goBuf cnt back (x:xs) !pst = do
r <- pst
pRes <- pstep r x
let cnt1 = cnt + 1
case pRes of
PD.Partial 0 pst1 ->
goBuf cnt1 [] xs (return pst1)
PD.Partial n pst1 -> do
assert (n <= length (x:back)) (return ())
let src0 = Prelude.take n (x:back)
src = Prelude.reverse src0
goBuf (cnt1 - n) [] (src ++ xs) (return pst1)
PD.Continue 0 pst1 -> goBuf cnt1 (x:back) xs (return pst1)
PD.Continue n pst1 -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0 ++ xs
assert (cnt1 - n >= 0) (return ())
goBuf (cnt1 - n) buf1 src (return pst1)
PD.Done n b -> do
assert (n <= length (x:back)) (return ())
let (src0, buf1) = splitAt n (x:back)
src = Prelude.reverse src0 ++ xs
assert (cp + cnt1 - n >= 0) (return ())
cont (Zipper (cp + cnt1 - n : cps) buf1 src) (Done b)
PD.Error err ->
cont (Zipper (cp + cnt1 : cps) (x:back) xs) (Error err)

View File

@ -1374,7 +1374,7 @@ parseK = parse
--
{-# INLINE [3] parse #-}
parse :: MonadThrow m => Parser m a b -> SerialT m a -> m b
parse = parseD . PRD.fromParserK
parse = parseD . PRK.fromParserK
------------------------------------------------------------------------------
-- Specialized folds
@ -3494,7 +3494,7 @@ parseMany
-> t m a
-> t m b
parseMany p m =
D.fromStreamD $ D.parseMany (PRD.fromParserK p) (D.toStreamD m)
D.fromStreamD $ D.parseMany (PRK.fromParserK p) (D.toStreamD m)
-- | @parseManyTill collect test stream@ tries the parser @test@ on the input,
-- if @test@ fails it backtracks and tries @collect@, after @collect@ succeeds
@ -3533,7 +3533,7 @@ parseIterate
-> t m a
-> t m b
parseIterate f i m = D.fromStreamD $
D.parseIterate (PRD.fromParserK . f) i (D.toStreamD m)
D.parseIterate (PRK.fromParserK . f) i (D.toStreamD m)
------------------------------------------------------------------------------
-- Grouping/Splitting