From 36bbd3bc1c524767866ca79add5d679cf536ecfb Mon Sep 17 00:00:00 2001 From: Harendra Kumar Date: Sat, 4 Feb 2023 07:54:52 +0530 Subject: [PATCH] Remove the rewrite rule based Parser wrapper layer Now we have explicit "Parser" and "ParserK" types for direct style and CPS parsers, respectively. The programmer can use either type depending on the use case. --- benchmark/Streamly/Benchmark/Data/Parser.hs | 76 +- .../Streamly/Benchmark/Data/Parser/ParserK.hs | 55 +- core/src/Streamly/Data/Parser.hs | 31 + core/src/Streamly/Data/Parser/ParserK.hs | 54 + .../Streamly/Internal/Data/Fold/Chunked.hs | 2 +- core/src/Streamly/Internal/Data/Parser.hs | 1538 +--------------- .../Internal/Data/Parser/Chunked/Type.hs | 2 + .../Streamly/Internal/Data/Parser/ParserD.hs | 886 ++++++++-- .../Internal/Data/Parser/ParserD/Type.hs | 121 +- .../Streamly/Internal/Data/Parser/ParserDK.hs | 1544 +++++++++++++++++ .../Internal/Data/Parser/ParserK/Type.hs | 5 +- .../Streamly/Internal/Data/Producer/Source.hs | 4 +- .../Streamly/Internal/Data/Stream/Chunked.hs | 4 +- .../Internal/Data/Stream/StreamD/Eliminate.hs | 4 +- .../Internal/Data/Stream/StreamD/Nesting.hs | 4 +- .../Streamly/Internal/Data/Stream/StreamK.hs | 2 +- .../Streamly/Internal/Serialize/FromBytes.hs | 14 +- core/src/Streamly/Internal/Unicode/Stream.hs | 2 +- core/streamly-core.cabal | 2 + .../Data/Stream/IsStream/Eliminate.hs | 4 +- .../Internal/Data/Stream/IsStream/Reduce.hs | 7 +- test/Streamly/Test/Data/Parser.hs | 5 +- 22 files changed, 2615 insertions(+), 1751 deletions(-) create mode 100644 core/src/Streamly/Data/Parser/ParserK.hs create mode 100644 core/src/Streamly/Internal/Data/Parser/ParserDK.hs diff --git a/benchmark/Streamly/Benchmark/Data/Parser.hs b/benchmark/Streamly/Benchmark/Data/Parser.hs index e30371402..c9c9e1f41 100644 --- a/benchmark/Streamly/Benchmark/Data/Parser.hs +++ b/benchmark/Streamly/Benchmark/Data/Parser.hs @@ -18,7 +18,6 @@ module Main ) where import Control.DeepSeq (NFData(..)) -import Data.Foldable (asum) import Data.Functor (($>)) import Data.Monoid (Sum(..)) import GHC.Magic (inline) @@ -30,8 +29,6 @@ import Streamly.Internal.Data.Stream.StreamD (Stream) import Prelude hiding (any, all, take, sequence, sequence_, sequenceA, takeWhile, dropWhile) -import qualified Data.Traversable as TR -import qualified Data.Foldable as F import qualified Control.Applicative as AP import qualified Streamly.FileSystem.Handle as Handle import qualified Streamly.Internal.Data.Array as Array @@ -113,18 +110,6 @@ benchIOSink value name f = -- Parsers ------------------------------------------------------------------------------- -{-# INLINE one #-} -one :: Monad m => Int -> Stream m Int -> m (Either ParseError (Maybe Int)) -one value = Stream.parse p - - where - - p = do - m <- PR.fromFold Fold.one - case m of - Just i -> if i >= value then pure m else p - Nothing -> pure Nothing - {-# INLINE takeBetween #-} takeBetween :: Monad m => Int -> Stream m a -> m (Either ParseError ()) takeBetween value = Stream.parse (PR.takeBetween 0 value Fold.drain) @@ -187,14 +172,20 @@ wordBy :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) wordBy value = Stream.parse (PR.wordBy (>= value) Fold.drain) {-# INLINE sepByWords #-} -sepByWords :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) -sepByWords _ = Stream.parse (wrds even Fold.drain) +sepByWords :: Monad m => Stream m Int -> m (Either ParseError ()) +sepByWords = Stream.parse (wrds even Fold.drain) where wrds p = PR.sepBy (PR.takeWhile (not . p) Fold.drain) (PR.dropWhile p) +-- Returning a list to compare with the sepBy1 in ParserK +{-# INLINE sepBy1 #-} +sepBy1 :: Monad m => Stream m Int -> m (Either ParseError [Int]) +sepBy1 xs = do + Stream.parse (PR.sepBy1 (PR.satisfy odd) (PR.satisfy even) Fold.toList) xs + {-# INLINE sepByWords1 #-} -sepByWords1 :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) -sepByWords1 _ = Stream.parse (wrds even Fold.drain) +sepByWords1 :: Monad m => Stream m Int -> m (Either ParseError ()) +sepByWords1 = Stream.parse (wrds even Fold.drain) where wrds p = PR.sepBy1 (PR.takeWhile (not . p) Fold.drain) (PR.dropWhile p) @@ -368,34 +359,6 @@ lookAhead :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) lookAhead value = Stream.parse (PR.lookAhead (PR.takeWhile (<= value) Fold.drain) $> ()) -{-# INLINE sequenceA #-} -sequenceA :: Monad m => Int -> Stream m Int -> m Int -sequenceA value xs = do - x <- Stream.parse (TR.sequenceA (replicate value (PR.satisfy (> 0)))) xs - return $ length x - -{-# INLINE sequenceA_ #-} -sequenceA_ :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) -sequenceA_ value = - Stream.parse (F.sequenceA_ $ replicate value (PR.satisfy (> 0))) - -{-# INLINE sequence #-} -sequence :: Monad m => Int -> Stream m Int -> m Int -sequence value xs = do - x <- Stream.parse (TR.sequence (replicate value (PR.satisfy (> 0)))) xs - return $ length x - -{-# INLINE sequence_ #-} -sequence_ :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) -sequence_ value = - Stream.parse (F.sequence_ $ replicate value (PR.satisfy (> 0))) - -{-# INLINE choiceAsum #-} -choiceAsum :: Monad m => Int -> Stream m Int -> m (Either ParseError Int) -choiceAsum value = - Stream.parse (asum (replicate value (PR.satisfy (< 0))) - AP.<|> PR.satisfy (> 0)) - {- {-# INLINE choice #-} choice :: Monad m => Int -> Stream m Int -> m (Either ParseError Int) @@ -429,7 +392,8 @@ parseIterate n = {-# INLINE concatSequence #-} concatSequence :: Monad m => Stream m Int -> m (Either ParseError ()) -concatSequence = Stream.parse $ PR.concatSequence Fold.drain $ Stream.repeat PR.one +concatSequence = + Stream.parse $ PR.sequence (Stream.repeat PR.one) Fold.drain {-# INLINE parseManyGroupBy #-} parseManyGroupBy :: Monad m => (Int -> Int -> Bool) -> Stream m Int -> m () @@ -449,8 +413,7 @@ instance NFData ParseError where o_1_space_serial :: Int -> [Benchmark] o_1_space_serial value = - [ benchIOSink value "one (fold)" $ one value - , benchIOSink value "takeBetween" $ takeBetween value + [ benchIOSink value "takeBetween" $ takeBetween value , benchIOSink value "takeEQ" $ takeEQ value , benchIOSink value "takeWhile" $ takeWhile value , benchIOSink value "takeWhileP" $ takeWhileP value @@ -462,8 +425,9 @@ o_1_space_serial value = , benchIOSink value "groupBy" $ groupBy , benchIOSink value "groupByRolling" $ groupByRolling , benchIOSink value "wordBy" $ wordBy value - , benchIOSink value "sepBy (words)" $ sepByWords value - , benchIOSink value "sepBy1 (words)" $ sepByWords1 value + , benchIOSink value "sepBy (words)" sepByWords + , benchIOSink value "sepBy1" sepBy1 + , benchIOSink value "sepBy1 (words)" sepByWords1 , benchIOSink value "deintercalate" $ deintercalate value , benchIOSink value "splitAp" $ splitAp value , benchIOSink value "splitApBefore" $ splitApBefore value @@ -513,17 +477,9 @@ o_n_heap_serial value = -- lookahead benchmark holds the entire input till end benchIOSink value "lookAhead" $ lookAhead value - -- accumulates the results in a list - , benchIOSink value "sequence" $ sequence value - , benchIOSink value "sequenceA" $ sequenceA value - - -- XXX why should this take O(n) heap, it discards the results? - , benchIOSink value "sequence_" $ sequence_ value - , benchIOSink value "sequenceA_" $ sequenceA_ value -- non-linear time complexity (parserD) , benchIOSink value "split_" $ split_ value -- XXX why O(n) heap? - , benchIOSink value "choice (asum)" $ choiceAsum value -- , benchIOSink value "choice" $ choice value -- These show non-linear time complexity. diff --git a/benchmark/Streamly/Benchmark/Data/Parser/ParserK.hs b/benchmark/Streamly/Benchmark/Data/Parser/ParserK.hs index e47204a6e..975738100 100644 --- a/benchmark/Streamly/Benchmark/Data/Parser/ParserK.hs +++ b/benchmark/Streamly/Benchmark/Data/Parser/ParserK.hs @@ -19,11 +19,13 @@ import Data.Foldable (asum) import Streamly.Internal.Data.Parser (ParseError(..)) import Streamly.Internal.Data.Stream.StreamD (Stream) import System.Random (randomRIO) -import Prelude hiding (any, all, take, sequence, sequenceA, takeWhile) +import Prelude hiding + (any, all, take, sequence, sequence_, sequenceA, takeWhile) import qualified Control.Applicative as AP import qualified Data.Foldable as F import qualified Data.Traversable as TR +import qualified Streamly.Data.Parser.ParserK as ParserK import qualified Streamly.Internal.Data.Fold as FL import qualified Streamly.Internal.Data.Parser.ParserK.Type as PR import qualified Streamly.Internal.Data.Parser.ParserD as PRD @@ -59,11 +61,19 @@ benchIOSink value name f = -- Parsers ------------------------------------------------------------------------------- -#ifdef FROM_PARSERK -#define PARSE_OP (Stream.parseD . PRD.fromParserK) -#else -#define PARSE_OP Stream.parse -#endif +#define PARSE_OP (Stream.parse . PRD.fromParserK) + +{-# INLINE one #-} +one :: Monad m => Int -> Stream m Int -> m (Either ParseError (Maybe Int)) +one value = Stream.parse (ParserK.toParser p) + + where + + p = do + m <- ParserK.fromFold FL.one + case m of + Just i -> if i >= value then pure m else p + Nothing -> pure Nothing {-# INLINE satisfy #-} satisfy :: Monad m => (a -> Bool) -> PR.Parser a m a @@ -106,6 +116,13 @@ sequence value xs = do x <- PARSE_OP (TR.sequence list) xs return $ Prelude.length x +{-# INLINE sequence_ #-} +sequence_ :: Monad m => Int -> Stream m Int -> m (Either ParseError ()) +sequence_ value = + let parser = satisfy (> 0) + list = Prelude.replicate value parser + in PARSE_OP (F.sequence_ list) + {-# INLINE manyAlt #-} manyAlt :: Monad m => Stream m Int -> m Int manyAlt xs = do @@ -141,17 +158,40 @@ o_1_space_serial value = , benchIOSink value "splitApp" $ splitApp value ] +{-# INLINE sepBy1 #-} +sepBy1 :: Monad m => Stream m Int -> m Int +sepBy1 xs = do + x <- PARSE_OP (parser (satisfy odd) (satisfy even)) xs + return $ Prelude.length x + + where + + parser p sep = do + x <- p + fmap (x :) $ AP.many (sep >> p) + -- O(n) heap beacuse of accumulation of the list in strict IO monad? o_n_heap_serial :: Int -> [Benchmark] o_n_heap_serial value = - [ benchIOSink value "sequenceA" $ sequenceA value + [ + -- accumulates the results in a list + -- XXX why should this take O(n) heap, it discards the results? + benchIOSink value "sequence_" $ sequence_ value , benchIOSink value "sequenceA_" $ sequenceA_ value , benchIOSink value "sequence" $ sequence value + , benchIOSink value "sequenceA" $ sequenceA value , benchIOSink value "manyAlt" manyAlt + , benchIOSink value "sepBy1" sepBy1 , benchIOSink value "someAlt" someAlt , benchIOSink value "choice" $ choice value ] +-- O(n) heap beacuse of accumulation of the list in strict IO monad? +o_1_space_recursive :: Int -> [Benchmark] +o_1_space_recursive value = + [ benchIOSink value "one (recursive)" $ one value + ] + ------------------------------------------------------------------------------- -- Driver ------------------------------------------------------------------------------- @@ -163,5 +203,6 @@ main = runWithCLIOpts defaultStreamSize allBenchmarks allBenchmarks value = [ bgroup (o_1_space_prefix moduleName) (o_1_space_serial value) + , bgroup (o_1_space_prefix moduleName) (o_1_space_recursive value) , bgroup (o_n_heap_prefix moduleName) (o_n_heap_serial value) ] diff --git a/core/src/Streamly/Data/Parser.hs b/core/src/Streamly/Data/Parser.hs index fe4adc4ae..d77cc6c25 100644 --- a/core/src/Streamly/Data/Parser.hs +++ b/core/src/Streamly/Data/Parser.hs @@ -17,6 +17,37 @@ -- (which should be the case almost always) you can just use 'fromEffect' to -- execute the lower layer monad effects. -- +-- == Performance Notes +-- +-- This module is designed for fusion, inline the operations in this module for +-- fusion to occur, avoid using these operations in recursive calls, avoid +-- operations like 'sequence', 'asum' on these parsers. If you need these then +-- use the 'ParserK' module instead. +-- +-- The 'Parser' type represents a stream consumer by composing state as data +-- which enables stream fusion. Stream fusion generates a tight loop without +-- any constructor allocations between the stages, providing C like performance +-- for the loop. Stream fusion works when multiple functions are combined in a +-- pipeline statically. Therefore, the operations in this module must be +-- inlined and must not be used recursively to allow for stream fusion. +-- +-- Using the 'Parser' type parsing operations like 'one', 'splitWith' etc. +-- degrade quadratically (O(n^2)) when combined many times. If you need to +-- combine these operations, say more than 50 times in a single loop, then you +-- should use the continuation style parser type 'ParserK' instead. Also, if +-- you need to use these operations in a recursive loop you should use +-- 'ParserK' instead. +-- +-- The 'ParserK' type represents a stream consumer by composing function calls, +-- therefore, a function call overhead is incurred at each composition. It is +-- quite fast in general but may be a few times slower than a fused parser. +-- However, it allows for scalable dynamic composition especially parsers can +-- be used in recursive calls. Using the 'ParserK' type operations like +-- 'splitWith' provide linear (O(n)) performance with respect to the number of +-- compositions.. +-- +-- 'Parser' and 'ParserK' types can be interconverted. +-- module Streamly.Data.Parser ( -- * Parser Type diff --git a/core/src/Streamly/Data/Parser/ParserK.hs b/core/src/Streamly/Data/Parser/ParserK.hs new file mode 100644 index 000000000..1b4529b04 --- /dev/null +++ b/core/src/Streamly/Data/Parser/ParserK.hs @@ -0,0 +1,54 @@ +-- | +-- Module : Streamly.Data.Parser.ParserK +-- Copyright : (c) 2023 Composewell Technologies +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com +-- Stability : pre-release +-- Portability : GHC +-- +-- Parsers using Continuation Passing Style (CPS). See notes in +-- "Streamly.Data.Parser" module to know when to use this module. +-- +-- To run a 'ParserK' convert it to a 'Parser' and then run it. +-- +module Streamly.Data.Parser.ParserK + ( + -- * Parser Type + ParserK + , Parser + + -- * Parsers + -- ** Conversions + , fromFold + , fromParser + , toParser + + -- ** Without Input + , fromPure + , fromEffect + , die + ) + +where + +import Streamly.Internal.Data.Fold (Fold) +import Streamly.Internal.Data.Parser.ParserK.Type +import qualified Streamly.Internal.Data.Parser.ParserD as ParserD + +-- | Convert a 'Fold' to a 'ParserK'. +-- +{-# INLINE fromFold #-} +fromFold :: Monad m => Fold m a b -> ParserK a m b +fromFold = ParserD.toParserK . ParserD.fromFold + +-- | Convert a 'Parser' to a 'ParserK'. +-- +{-# INLINE fromParser #-} +fromParser :: Monad m => ParserD.Parser a m b -> ParserK a m b +fromParser = ParserD.toParserK + +-- | Convert a 'ParserK' to a 'Parser'. +-- +{-# INLINE toParser #-} +toParser :: Monad m => ParserK a m b -> ParserD.Parser a m b +toParser = ParserD.fromParserK diff --git a/core/src/Streamly/Internal/Data/Fold/Chunked.hs b/core/src/Streamly/Internal/Data/Fold/Chunked.hs index 079bfa0d3..db784e243 100644 --- a/core/src/Streamly/Internal/Data/Fold/Chunked.hs +++ b/core/src/Streamly/Internal/Data/Fold/Chunked.hs @@ -182,7 +182,7 @@ fromParserD (ParserD.Parser step1 initial1 extract1) = {-# INLINE fromParser #-} fromParser :: forall m a b. (MonadIO m, Unbox a) => Parser.Parser a m b -> ChunkFold m a b -fromParser = fromParserD . ParserD.fromParserK +fromParser = fromParserD -- | Adapt an array stream fold. -- diff --git a/core/src/Streamly/Internal/Data/Parser.hs b/core/src/Streamly/Internal/Data/Parser.hs index 2847f8661..0a0f88cb3 100644 --- a/core/src/Streamly/Internal/Data/Parser.hs +++ b/core/src/Streamly/Internal/Data/Parser.hs @@ -1,1544 +1,14 @@ -{-# OPTIONS_GHC -Wno-orphans #-} - -- | -- Module : Streamly.Internal.Data.Parser --- Copyright : (c) 2020 Composewell Technologies +-- Copyright : (c) 2019 Composewell Technologies -- License : BSD-3-Clause -- Maintainer : streamly@composewell.com --- Stability : pre-release +-- Stability : experimental -- Portability : GHC -- --- Fast backtracking parsers with stream fusion and native streaming --- capability. --- --- 'Applicative' and 'Control.Applicative.Alternative' type class based --- combinators from the --- --- package can also be used with the 'Parser' type. However, there are two --- important differences between @parser-combinators@ and the equivalent ones --- provided in this module in terms of performance: --- --- 1) @parser-combinators@ use plain Haskell lists to collect the results, in a --- strict Monad like IO, the results are necessarily buffered before they can --- be consumed. This may not perform optimally in streaming applications --- processing large amounts of data. Equivalent combinators in this module can --- consume the results of parsing using a 'Fold', thus providing a scalability --- and a composable consumer. --- --- 2) Several combinators in this module can be many times faster because of --- stream fusion. For example, 'Streamly.Internal.Data.Parser.many' combinator --- in this module is much faster than the 'Control.Applicative.many' combinator --- of 'Control.Applicative.Alternative' type class. --- --- = Errors --- --- Failing parsers in this module throw the 'D.ParseError' exception. --- --- = Naming --- --- As far as possible, try that the names of the combinators in this module are --- consistent with: --- --- * --- * --- * --- * --- * - module Streamly.Internal.Data.Parser - ( - K.Parser (..) - , D.ParseError (..) - , D.Step (..) - - -- * Downgrade to Fold - , toFold - - -- First order parsers - -- * Accumulators - , fromFold - , fromFoldMaybe - , fromPure - , fromEffect - , die - , dieM - - -- * Map on input - , lmap - , lmapM - , filter - - -- * Map on output - , rmapM - - -- * Element parsers - , peek - - -- All of these can be expressed in terms of either - , one - , oneEq - , oneNotEq - , oneOf - , noneOf - , eof - , satisfy - , maybe - , either - - -- * Sequence parsers (tokenizers) - -- - -- | Parsers chained in series, if one parser terminates the composition - -- terminates. - - , lookAhead - - -- ** By length - -- | Grab a sequence of input elements without inspecting them - , takeBetween - -- , take -- takeBetween 0 n - , takeEQ -- takeBetween n n - , takeGE -- takeBetween n maxBound - , takeP - - -- Grab a sequence of input elements by inspecting them - -- ** Exact match - , listEq - , listEqBy - , streamEqBy - , subsequenceBy - - -- ** By predicate - , takeWhileP - , takeWhile - -- $takeWhile - , takeWhile1 - , dropWhile - - -- ** Separators - , takeEndBy - , takeEndBy_ - , takeEndByEsc - -- , takeEndByEsc_ - , takeStartBy - , takeStartBy_ - , takeEitherSepBy - , wordBy - -- , wordByEsc - - -- ** By comparing - , groupBy - , groupByRolling - , groupByRollingEither - - -- ** Framing - -- , takeFramedBy - , takeFramedBy_ - , takeFramedByEsc_ - , takeFramedByGeneric - , wordFramedBy - , wordQuotedBy - - -- | Unimplemented - -- - -- @ - -- , prefixOf -- match any prefix of a given string - -- , suffixOf -- match any suffix of a given string - -- , infixOf -- match any substring of a given string - -- @ - - -- Second order parsers (parsers using parsers) - -- * Binary Combinators - - -- ** Sequential Applicative - , splitWith - , split_ - -{- - -- ** Parallel Applicatives - , teeWith - , teeWithFst - , teeWithMin - -- , teeTill -- like manyTill but parallel --} - - -- ** Sequential Interleaving - -- Use two folds, run a primary parser, its rejected values go to the - -- secondary parser. - , deintercalate - -- , deintercalatePrefix - -- , deintercalateSuffix - - -- *** Special cases - -- | TODO: traditional implmentations of these may be of limited use. For - -- example, consider parsing lines separated by @\\r\\n@. The main parser - -- will have to detect and exclude the sequence @\\r\\n@ anyway so that we - -- can apply the "sep" parser. - -- - -- We can instead implement these as special cases of deintercalate. - -- - -- @ - -- , endBy - -- , sepEndBy - -- , beginBy - -- , sepBeginBy - -- , sepAroundBy - -- @ - , sepBy1 - , sepBy - - -- ** Sequential Alternative - , alt - -{- - -- ** Parallel Alternatives - , shortest - , longest - -- , fastest --} - - -- * N-ary Combinators - -- ** Sequential Collection - , concatSequence - , concatMap - - -- ** Sequential Repetition - , count - , countBetween - - , manyP - , many - , some - , manyTillP - , manyTill - , manyThen - - -- * Distribution - -- - -- | A simple and stupid impl would be to just convert the stream to an - -- array and give the array reference to all consumers. The array can be - -- grown on demand by any consumer and truncated when nonbody needs it. - - -- ** Distribute to collection - -- ** Distribute to repetition - - -- ** Interleaved collection - -- | - -- - -- 1. Round robin - -- 2. Priority based - , roundRobin - - -- ** Collection of Alternatives - -- | Unimplemented - -- - -- @ - -- , shortestN - -- , longestN - -- , fastestN -- first N successful in time - -- , choiceN -- first N successful in position - -- @ - -- , choice -- first successful in position - - -- ** Repeated Alternatives - , retryMaxTotal - , retryMaxSuccessive - , retry - - -- * Deprecated - , next + ( module Streamly.Internal.Data.Parser.ParserD ) where -import Streamly.Internal.Data.Fold.Type (Fold(..)) -import Streamly.Internal.Data.Parser.ParserK.Type (Parser) - -import qualified Data.Foldable as Foldable -import qualified Streamly.Internal.Data.Fold.Type as FL -import qualified Streamly.Internal.Data.Parser.ParserD as D -import qualified Streamly.Internal.Data.Parser.ParserK.Type as K - -#ifdef USE_STREAMK -import Streamly.Internal.Data.StreamK (Stream) -import qualified Streamly.Internal.Data.StreamK as Stream -#else -import Streamly.Internal.Data.Stream.StreamD.Type (Stream) -import qualified Streamly.Internal.Data.Stream.StreamD.Type as Stream -#endif - -import Prelude hiding - ( any, all, dropWhile, take, takeWhile, sequence, concatMap, maybe, either - , filter ) - --- --- $setup --- >>> :m --- >>> import Prelude hiding (any, all, dropWhile, take, takeWhile, sequence, concatMap, maybe, either, filter) --- >>> import Control.Applicative ((<|>)) --- >>> import Data.Char (isSpace) --- >>> import qualified Data.Maybe as Maybe --- >>> import qualified Data.Foldable as Foldable --- >>> import qualified Streamly.Data.Stream as Stream --- >>> import qualified Streamly.Internal.Data.Stream as Stream (parse, parseMany) --- >>> import qualified Streamly.Internal.Data.Fold as Fold --- >>> import qualified Streamly.Internal.Data.Parser as Parser - -------------------------------------------------------------------------------- --- Downgrade a parser to a Fold -------------------------------------------------------------------------------- --- --- | Make a 'Fold' from a 'Parser'. The fold just throws an exception if the --- parser fails or tries to backtrack. --- --- This can be useful in combinators that accept a Fold and we know that a --- Parser cannot fail or failure exception is acceptable as there is no way to --- recover. --- --- /Pre-release/ --- -{-# INLINE toFold #-} -toFold :: Monad m => Parser a m b -> Fold m a b -toFold p = D.toFold $ D.fromParserK p - -------------------------------------------------------------------------------- --- Upgrade folds to parses -------------------------------------------------------------------------------- --- --- | Make a 'Parser' from a 'Fold'. --- -{-# INLINE fromFold #-} -fromFold :: Monad m => Fold m a b -> Parser a m b -fromFold = D.toParserK . D.fromFold - --- | Convert a Maybe returning fold to an error returning parser. The first --- argument is the error message that the parser would return when the fold --- returns Nothing. --- --- /Pre-release/ --- -{-# INLINE fromFoldMaybe #-} -fromFoldMaybe :: Monad m => String -> Fold m a (Maybe b) -> Parser a m b -fromFoldMaybe err = D.toParserK . D.fromFoldMaybe err - -------------------------------------------------------------------------------- --- Terminating but not failing folds -------------------------------------------------------------------------------- --- --- This is the dual of stream "fromPure". --- --- | A parser that always yields a pure value without consuming any input. --- -{-# INLINE [3] fromPure #-} -fromPure :: Monad m => b -> Parser a m b -fromPure = D.toParserK . D.fromPure -{-# RULES "fromPure fallback to CPS" [2] - forall a. D.toParserK (D.fromPure a) = K.fromPure a #-} - --- This is the dual of stream "fromEffect". --- --- | A parser that always yields the result of an effectful action without --- consuming any input. --- -{-# INLINE fromEffect #-} -fromEffect :: Monad m => m b -> Parser a m b -fromEffect = K.fromEffect -- D.toParserK . D.fromEffect - --- This is the dual of "nil". --- --- | A parser that always fails with an error message without consuming --- any input. --- -{-# INLINE [3] die #-} -die :: Monad m => String -> Parser a m b -die = D.toParserK . D.die -{-# RULES "die fallback to CPS" [2] - forall a. D.toParserK (D.die a) = K.die a #-} - --- This is the dual of "nilM". --- --- | A parser that always fails with an effectful error message and without --- consuming any input. --- --- /Pre-release/ --- -{-# INLINE dieM #-} -dieM :: Monad m => m String -> Parser a m b -dieM = D.toParserK . D.dieM - -------------------------------------------------------------------------------- --- Map on input -------------------------------------------------------------------------------- - --- | @lmap f parser@ maps the function @f@ on the input of the parser. --- --- >>> Stream.parse (Parser.lmap (\x -> x * x) (Parser.fromFold Fold.sum)) (Stream.enumerateFromTo 1 100) --- Right 338350 --- --- > lmap = Parser.lmapM return --- -{-# INLINE lmap #-} -lmap :: Monad m => (a -> b) -> Parser b m r -> Parser a m r -lmap f p = D.toParserK $ D.lmap f $ D.fromParserK p - --- | @lmapM f parser@ maps the monadic function @f@ on the input of the parser. --- -{-# INLINE lmapM #-} -lmapM :: Monad m => (a -> m b) -> Parser b m r -> Parser a m r -lmapM f p = D.toParserK $ D.lmapM f $ D.fromParserK p - --- | @rmapM f parser@ maps the monadic function @f@ on the output of the parser. --- --- >>> rmap = fmap -{-# INLINE rmapM #-} -rmapM :: Monad m => (b -> m c) -> Parser a m b -> Parser a m c -rmapM f p = D.toParserK $ D.rmapM f $ D.fromParserK p - --- | Include only those elements that pass a predicate. --- --- >>> Stream.parse (Parser.filter (> 5) (Parser.fromFold Fold.sum)) $ Stream.fromList [1..10] --- Right 40 --- -{-# INLINE filter #-} -filter :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b -filter f p = D.toParserK $ D.filter f $ D.fromParserK p - -------------------------------------------------------------------------------- --- Failing Parsers -------------------------------------------------------------------------------- - --- | Peek the head element of a stream, without consuming it. Fails if it --- encounters end of input. --- --- >>> Stream.parse ((,) <$> Parser.peek <*> Parser.satisfy (> 0)) $ Stream.fromList [1] --- Right (1,1) --- --- @ --- peek = lookAhead (satisfy True) --- @ --- -{-# INLINE peek #-} -peek :: Monad m => Parser a m a -peek = D.toParserK D.peek - --- | Succeeds if we are at the end of input, fails otherwise. --- --- >>> Stream.parse ((,) <$> Parser.satisfy (> 0) <*> Parser.eof) $ Stream.fromList [1] --- Right (1,()) --- -{-# INLINE eof #-} -eof :: Monad m => Parser a m () -eof = D.toParserK D.eof - --- | Returns the next element if it passes the predicate, fails otherwise. --- --- >>> Stream.parse (Parser.satisfy (== 1)) $ Stream.fromList [1,0,1] --- Right 1 --- --- >>> toMaybe f x = if f x then Just x else Nothing --- >>> satisfy f = Parser.maybe (toMaybe f) --- -{-# INLINE satisfy #-} -satisfy :: Monad m => (a -> Bool) -> Parser a m a -satisfy = D.toParserK . D.satisfy - --- | Consume one element from the head of the stream. Fails if it encounters --- end of input. --- --- >>> one = Parser.satisfy $ const True --- -{-# INLINE one #-} -one :: Monad m => Parser a m a -one = satisfy $ const True - --- Alternate names: "only", "onlyThis". - --- | Match a specific element. --- --- >>> oneEq x = Parser.satisfy (== x) --- -{-# INLINE oneEq #-} -oneEq :: (Monad m, Eq a) => a -> Parser a m a -oneEq x = satisfy (== x) - --- Alternate names: "exclude", "notThis". - --- | Match anything other than the supplied element. --- --- >>> oneNotEq x = Parser.satisfy (/= x) --- -{-# INLINE oneNotEq #-} -oneNotEq :: (Monad m, Eq a) => a -> Parser a m a -oneNotEq x = satisfy (/= x) - --- | Match any one of the elements in the supplied list. --- --- >>> oneOf xs = Parser.satisfy (`Foldable.elem` xs) --- --- When performance matters a pattern matching predicate could be more --- efficient than a 'Foldable' datatype: --- --- @ --- let p x = --- case x of --- 'a' -> True --- 'e' -> True --- _ -> False --- in satisfy p --- @ --- --- GHC may use a binary search instead of linear search in the list. --- Alternatively, you can also use an array instead of list for storage and --- search. --- -{-# INLINE oneOf #-} -oneOf :: (Monad m, Eq a, Foldable f) => f a -> Parser a m a -oneOf xs = satisfy (`Foldable.elem` xs) - --- | See performance notes in 'oneOf'. --- --- >>> noneOf xs = Parser.satisfy (`Foldable.notElem` xs) --- -{-# INLINE noneOf #-} -noneOf :: (Monad m, Eq a, Foldable f) => f a -> Parser a m a -noneOf xs = satisfy (`Foldable.notElem` xs) - --- | Return the next element of the input. Returns 'Nothing' --- on end of input. Also known as 'head'. --- --- /Pre-release/ --- -{-# DEPRECATED next "Please use \"fromFold Fold.one\" instead" #-} -{-# INLINE next #-} -next :: Monad m => Parser a m (Maybe a) -next = D.toParserK D.next - --- | Map a 'Maybe' returning function on the next element in the stream. The --- parser fails if the function returns 'Nothing' otherwise returns the 'Just' --- value. --- --- >>> toEither = Maybe.maybe (Left "maybe: predicate failed") Right --- >>> maybe f = Parser.either (toEither . f) --- --- >>> maybe f = Parser.fromFoldMaybe "maybe: predicate failed" (Fold.maybe f) --- --- /Pre-release/ --- -{-# INLINE maybe #-} -maybe :: Monad m => (a -> Maybe b) -> Parser a m b -maybe = D.toParserK . D.maybe - --- | Map an 'Either' returning function on the next element in the stream. If --- the function returns 'Left err', the parser fails with the error message --- @err@ otherwise returns the 'Right' value. --- --- /Pre-release/ --- -{-# INLINE either #-} -either :: Monad m => (a -> Either String b) -> Parser a m b -either = D.toParserK . D.either - -------------------------------------------------------------------------------- --- Taking elements -------------------------------------------------------------------------------- - --- | @takeBetween m n@ takes a minimum of @m@ and a maximum of @n@ input --- elements and folds them using the supplied fold. --- --- Stops after @n@ elements. --- Fails if the stream ends before @m@ elements could be taken. --- --- Examples: - --- --- @ --- >>> :{ --- takeBetween' low high ls = Stream.parse prsr (Stream.fromList ls) --- where prsr = Parser.takeBetween low high Fold.toList --- :} --- --- @ --- --- >>> takeBetween' 2 4 [1, 2, 3, 4, 5] --- Right [1,2,3,4] --- --- >>> takeBetween' 2 4 [1, 2] --- Right [1,2] --- --- >>> takeBetween' 2 4 [1] --- Left (ParseError "takeBetween: Expecting alteast 2 elements, got 1") --- --- >>> takeBetween' 0 0 [1, 2] --- Right [] --- --- >>> takeBetween' 0 1 [] --- Right [] --- --- @takeBetween@ is the most general take operation, other take operations can --- be defined in terms of takeBetween. For example: --- --- >>> take n = Parser.takeBetween 0 n --- >>> takeEQ n = Parser.takeBetween n n --- >>> takeGE n = Parser.takeBetween n maxBound --- --- /Pre-release/ --- -{-# INLINE takeBetween #-} -takeBetween :: Monad m => - Int -> Int -> Fold m a b -> Parser a m b -takeBetween m n = D.toParserK . D.takeBetween m n - --- | Stops after taking exactly @n@ input elements. --- --- * Stops - after consuming @n@ elements. --- * Fails - if the stream or the collecting fold ends before it can collect --- exactly @n@ elements. --- --- >>> Stream.parse (Parser.takeEQ 4 Fold.toList) $ Stream.fromList [1,0,1] --- Left (ParseError "takeEQ: Expecting exactly 4 elements, input terminated on 3") --- -{-# INLINE takeEQ #-} -takeEQ :: Monad m => Int -> Fold m a b -> Parser a m b -takeEQ n = D.toParserK . D.takeEQ n - --- | Take at least @n@ input elements, but can collect more. --- --- * Stops - when the collecting fold stops. --- * Fails - if the stream or the collecting fold ends before producing @n@ --- elements. --- --- >>> Stream.parse (Parser.takeGE 4 Fold.toList) $ Stream.fromList [1,0,1] --- Left (ParseError "takeGE: Expecting at least 4 elements, input terminated on 3") --- --- >>> Stream.parse (Parser.takeGE 4 Fold.toList) $ Stream.fromList [1,0,1,0,1] --- Right [1,0,1,0,1] --- --- /Pre-release/ --- -{-# INLINE takeGE #-} -takeGE :: Monad m => Int -> Fold m a b -> Parser a m b -takeGE n = D.toParserK . D.takeGE n - -------------------------------------------------------------------------------- --- Take until a condition -------------------------------------------------------------------------------- - --- $takeWhile --- Note: This is called @takeWhileP@ and @munch@ in some parser libraries. - --- XXX We should perhaps use only takeWhileP and rename it to takeWhile. --- --- | Like 'takeWhile' but uses a 'Parser' instead of a 'Fold' to collect the --- input. The combinator stops when the condition fails or if the collecting --- parser stops. --- --- Other interesting parsers can be implemented in terms of this parser: --- --- >>> takeWhile1 cond p = Parser.takeWhileP cond (Parser.takeBetween 1 maxBound p) --- >>> takeWhileBetween cond m n p = Parser.takeWhileP cond (Parser.takeBetween m n p) --- --- Stops: when the condition fails or the collecting parser stops. --- Fails: when the collecting parser fails. --- --- /Pre-release/ --- -{-# INLINE takeWhileP #-} -takeWhileP :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b -takeWhileP cond p = D.toParserK $ D.takeWhileP cond (D.fromParserK p) - --- | Collect stream elements until an element fails the predicate. The element --- on which the predicate fails is returned back to the input stream. --- --- * Stops - when the predicate fails or the collecting fold stops. --- * Fails - never. --- --- >>> Stream.parse (Parser.takeWhile (== 0) Fold.toList) $ Stream.fromList [0,0,1,0,1] --- Right [0,0] --- --- >>> takeWhile cond f = Parser.takeWhileP cond (Parser.fromFold f) --- --- We can implement a @breakOn@ using 'takeWhile': --- --- @ --- breakOn p = takeWhile (not p) --- @ --- -{-# INLINE takeWhile #-} -takeWhile :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b -takeWhile cond = D.toParserK . D.takeWhile cond --- takeWhile cond f = takeWhileP cond (fromFold f) - --- | Like 'takeWhile' but takes at least one element otherwise fails. --- --- >>> takeWhile1 cond p = Parser.takeWhileP cond (Parser.takeBetween 1 maxBound p) --- -{-# INLINE takeWhile1 #-} -takeWhile1 :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b -takeWhile1 cond = D.toParserK . D.takeWhile1 cond --- takeWhile1 cond f = takeWhileP cond (takeBetween 1 maxBound f) - --- | Drain the input as long as the predicate succeeds, running the effects and --- discarding the results. --- --- This is also called @skipWhile@ in some parsing libraries. --- --- >>> dropWhile p = Parser.takeWhile p Fold.drain --- -{-# INLINE dropWhile #-} -dropWhile :: Monad m => (a -> Bool) -> Parser a m () -dropWhile p = takeWhile p FL.drain - -------------------------------------------------------------------------------- --- Separators -------------------------------------------------------------------------------- - --- XXX We can remove Maybe from esc -{-# INLINE takeFramedByGeneric #-} -takeFramedByGeneric :: Monad m => - Maybe (a -> Bool) - -> Maybe (a -> Bool) - -> Maybe (a -> Bool) - -> Fold m a b - -> Parser a m b -takeFramedByGeneric esc begin end f = - D.toParserK $ D.takeFramedByGeneric esc begin end f - --- | @takeEndBy cond parser@ parses a token that ends by a separator chosen by --- the supplied predicate. The separator is also taken with the token. --- --- This can be combined with other parsers to implement other interesting --- parsers as follows: --- --- >>> takeEndByLE cond n p = Parser.takeEndBy cond (Parser.fromFold $ Fold.take n p) --- >>> takeEndByBetween cond m n p = Parser.takeEndBy cond (Parser.takeBetween m n p) --- --- >>> takeEndBy = Parser.takeEndByEsc (const False) --- --- See also "Streamly.Data.Fold.takeEndBy". Unlike the fold, the collecting --- parser in the takeEndBy parser can decide whether to fail or not if the --- stream does not end with separator. --- --- /Pre-release/ --- -{-# INLINE takeEndBy #-} -takeEndBy :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b -takeEndBy cond = D.toParserK . D.takeEndBy cond . D.fromParserK --- takeEndBy = takeEndByEsc (const False) - --- | Like 'takeEndBy' but the separator is dropped. --- --- See also "Streamly.Data.Fold.takeEndBy_". --- --- /Pre-release/ --- -{-# INLINE takeEndBy_ #-} -takeEndBy_ :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b -takeEndBy_ cond = D.toParserK . D.takeEndBy_ cond . D.fromParserK -{- -takeEndBy_ isEnd p = - takeFramedByGeneric Nothing Nothing (Just isEnd) (toFold p) --} - --- | Take either the separator or the token. Separator is a Left value and --- token is Right value. --- --- /Unimplemented/ -{-# INLINE takeEitherSepBy #-} -takeEitherSepBy :: -- Monad m => - (a -> Bool) -> Fold m (Either a b) c -> Parser a m c -takeEitherSepBy _cond = undefined -- D.toParserK . D.takeEitherSepBy cond - --- | Parse a token that starts with an element chosen by the predicate. The --- parser fails if the input does not start with the selected element. --- --- * Stops - when the predicate succeeds in non-leading position. --- * Fails - when the predicate fails in the leading position. --- --- >>> splitWithPrefix p f = Stream.parseMany (Parser.takeStartBy p f) --- --- Examples: - --- --- >>> p = Parser.takeStartBy (== ',') Fold.toList --- >>> leadingComma = Stream.parse p . Stream.fromList --- >>> leadingComma "a,b" --- Left (ParseError "takeStartBy: missing frame start") --- ... --- >>> leadingComma ",," --- Right "," --- >>> leadingComma ",a,b" --- Right ",a" --- >>> leadingComma "" --- Right "" --- --- /Pre-release/ --- -{-# INLINE takeStartBy #-} -takeStartBy :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b -takeStartBy cond = D.toParserK . D.takeStartBy cond - --- | Like 'takeStartBy' but drops the separator. --- --- >>> takeStartBy_ isBegin = Parser.takeFramedByGeneric Nothing (Just isBegin) Nothing --- -{-# INLINE takeStartBy_ #-} -takeStartBy_ :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b -takeStartBy_ isBegin = takeFramedByGeneric Nothing (Just isBegin) Nothing - -------------------------------------------------------------------------------- --- Quoting and Escaping -------------------------------------------------------------------------------- - --- | Like 'takeEndBy' but the separator elements can be escaped using an --- escape char determined by the first predicate. The escape characters are --- removed. --- --- /pre-release/ -{-# INLINE takeEndByEsc #-} -takeEndByEsc :: Monad m => - (a -> Bool) -> (a -> Bool) -> Parser a m b -> Parser a m b -takeEndByEsc isEsc isEnd p = - D.toParserK $ D.takeEndByEsc isEsc isEnd (D.fromParserK p) - --- | @takeFramedByEsc_ isEsc isBegin isEnd fold@ parses a token framed using a --- begin and end predicate, and an escape character. The frame begin and end --- characters lose their special meaning if preceded by the escape character. --- --- Nested frames are allowed if begin and end markers are different, nested --- frames must be balanced unless escaped, nested frame markers are emitted as --- it is. --- --- For example, --- --- >>> p = Parser.takeFramedByEsc_ (== '\\') (== '{') (== '}') Fold.toList --- >>> Stream.parse p $ Stream.fromList "{hello}" --- Right "hello" --- >>> Stream.parse p $ Stream.fromList "{hello {world}}" --- Right "hello {world}" --- >>> Stream.parse p $ Stream.fromList "{hello \\{world}" --- Right "hello {world" --- >>> Stream.parse p $ Stream.fromList "{hello {world}" --- Left (ParseError "takeFramedByEsc_: missing frame end") --- --- /Pre-release/ -{-# INLINE takeFramedByEsc_ #-} -takeFramedByEsc_ :: Monad m => - (a -> Bool) -> (a -> Bool) -> (a -> Bool) -> Fold m a b -> Parser a m b -takeFramedByEsc_ isEsc isBegin isEnd f = - D.toParserK $ D.takeFramedByEsc_ isEsc isBegin isEnd f --- takeEndByEsc_ isEsc isEnd p = --- takeFramedByGeneric (Just isEsc) Nothing (Just isEnd) (toFold p) - --- | @takeFramedBy_ isBegin isEnd fold@ parses a token framed by a begin and an --- end predicate. --- --- >>> takeFramedBy_ = Parser.takeFramedByEsc_ (const False) --- -{-# INLINE takeFramedBy_ #-} -takeFramedBy_ :: Monad m => - (a -> Bool) -> (a -> Bool) -> Fold m a b -> Parser a m b -takeFramedBy_ isBegin isEnd f = D.toParserK $ D.takeFramedBy_ isBegin isEnd f --- takeFramedBy_ isBegin isEnd = --- takeFramedByGeneric (Just (const False)) (Just isBegin) (Just isEnd) - -------------------------------------------------------------------------------- --- Grouping and words -------------------------------------------------------------------------------- - --- Note we can also get words using something like: --- sepBy FL.toList (takeWhile (not . p) Fold.toList) (dropWhile p) --- --- But that won't be as efficient and ergonomic. --- --- | Like 'splitOn' but strips leading, trailing, and repeated separators. --- Therefore, @".a..b."@ having '.' as the separator would be parsed as --- @["a","b"]@. In other words, its like parsing words from whitespace --- separated text. --- --- * Stops - when it finds a word separator after a non-word element --- * Fails - never. --- --- >>> wordBy = Parser.wordFramedBy (const False) (const False) (const False) --- --- @ --- S.wordsBy pred f = S.parseMany (PR.wordBy pred f) --- @ --- -{-# INLINE wordBy #-} -wordBy :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b -wordBy f = D.toParserK . D.wordBy f - --- | Like 'wordBy' but treats anything inside a pair of quotes as a single --- word, the quotes can be escaped by an escape character. Recursive quotes --- are possible if quote begin and end characters are different, quotes must be --- balanced. Outermost quotes are stripped. --- --- >>> braces = Parser.wordFramedBy (== '\\') (== '{') (== '}') isSpace Fold.toList --- >>> Stream.parse braces $ Stream.fromList "{ab} cd" --- Right "ab" --- >>> Stream.parse braces $ Stream.fromList "{ab}{cd}" --- Right "abcd" --- >>> Stream.parse braces $ Stream.fromList "a{b} cd" --- Right "ab" --- >>> Stream.parse braces $ Stream.fromList "a{{b}} cd" --- Right "a{b}" --- --- >>> quotes = Parser.wordFramedBy (== '\\') (== '"') (== '"') isSpace Fold.toList --- >>> Stream.parse quotes $ Stream.fromList "\"a\"\"b\"" --- Right "ab" --- -{-# INLINE wordFramedBy #-} -wordFramedBy :: Monad m => - (a -> Bool) -- ^ Escape - -> (a -> Bool) -- ^ left quote - -> (a -> Bool) -- ^ right quote - -> (a -> Bool) -- ^ word seperator - -> Fold m a b - -> Parser a m b -wordFramedBy isEsc isBegin isEnd isSpc = - D.toParserK . D.wordFramedBy isEsc isBegin isEnd isSpc - --- | Like 'wordFramedBy' but the closing quote is determined by the opening --- quote. The first quote begin starts a quote that is closed by its --- corresponding closing quote. --- --- 'wordFramedBy' and 'wordQuotedBy' both allow multiple quote characters based --- on the predicates but 'wordQuotedBy' always fixes the quote at the first --- occurrence and then it is closed only by the corresponding closing quote. --- Therefore, other quoting characters can be embedded inside it as normal --- characters. On the other hand, 'wordFramedBy' would close the quote as soon --- as it encounters any of the closing quotes. --- --- >>> q = (`elem` ['"', '\'']) --- >>> p kQ = Parser.wordQuotedBy kQ (== '\\') q q id isSpace Fold.toList --- --- >>> Stream.parse (p False) $ Stream.fromList "a\"b'c\";'d\"e'f ghi" --- Right "ab'c;d\"ef" --- --- >>> Stream.parse (p True) $ Stream.fromList "a\"b'c\";'d\"e'f ghi" --- Right "a\"b'c\";'d\"e'f" --- -{-# INLINE wordQuotedBy #-} -wordQuotedBy :: (Monad m, Eq a) => - Bool -- ^ keep the quotes in the output - -> (a -> Bool) -- ^ Escape - -> (a -> Bool) -- ^ left quote - -> (a -> Bool) -- ^ right quote - -> (a -> a) -- ^ get right quote from left quote - -> (a -> Bool) -- ^ word seperator - -> Fold m a b - -> Parser a m b -wordQuotedBy keepQuotes isEsc isBegin isEnd toRight isSpc = - D.toParserK . D.wordQuotedBy keepQuotes isEsc isBegin isEnd toRight isSpc - --- | Given an input stream @[a,b,c,...]@ and a comparison function @cmp@, the --- parser assigns the element @a@ to the first group, then if @a \`cmp` b@ is --- 'True' @b@ is also assigned to the same group. If @a \`cmp` c@ is 'True' --- then @c@ is also assigned to the same group and so on. When the comparison --- fails the parser is terminated. Each group is folded using the 'Fold' @f@ and --- the result of the fold is the result of the parser. --- --- * Stops - when the comparison fails. --- * Fails - never. --- --- >>> :{ --- runGroupsBy eq = --- Stream.fold Fold.toList --- . Stream.parseMany (Parser.groupBy eq Fold.toList) --- . Stream.fromList --- :} --- --- >>> runGroupsBy (<) [] --- [] --- --- >>> runGroupsBy (<) [1] --- [Right [1]] --- --- >>> runGroupsBy (<) [3, 5, 4, 1, 2, 0] --- [Right [3,5,4],Right [1,2],Right [0]] --- -{-# INLINE groupBy #-} -groupBy :: Monad m => (a -> a -> Bool) -> Fold m a b -> Parser a m b -groupBy eq = D.toParserK . D.groupBy eq - --- | Unlike 'groupBy' this combinator performs a rolling comparison of two --- successive elements in the input stream. Assuming the input stream --- is @[a,b,c,...]@ and the comparison function is @cmp@, the parser --- first assigns the element @a@ to the first group, then if @a \`cmp` b@ is --- 'True' @b@ is also assigned to the same group. If @b \`cmp` c@ is 'True' --- then @c@ is also assigned to the same group and so on. When the comparison --- fails the parser is terminated. Each group is folded using the 'Fold' @f@ and --- the result of the fold is the result of the parser. --- --- * Stops - when the comparison fails. --- * Fails - never. --- --- >>> :{ --- runGroupsByRolling eq = --- Stream.fold Fold.toList --- . Stream.parseMany (Parser.groupByRolling eq Fold.toList) --- . Stream.fromList --- :} --- --- >>> runGroupsByRolling (<) [] --- [] --- --- >>> runGroupsByRolling (<) [1] --- [Right [1]] --- --- >>> runGroupsByRolling (<) [3, 5, 4, 1, 2, 0] --- [Right [3,5],Right [4],Right [1,2],Right [0]] --- --- /Pre-release/ --- -{-# INLINE groupByRolling #-} -groupByRolling :: Monad m => (a -> a -> Bool) -> Fold m a b -> Parser a m b -groupByRolling eq = D.toParserK . D.groupByRolling eq - --- | Like 'groupByRolling', but if the predicate is 'True' then collects using --- the first fold as long as the predicate holds 'True', if the predicate is --- 'False' collects using the second fold as long as it remains 'False'. --- Returns 'Left' for the first case and 'Right' for the second case. --- --- For example, if we want to detect sorted sequences in a stream, both --- ascending and descending cases we can use 'groupByRollingEither (<=) --- Fold.toList Fold.toList'. --- --- /Pre-release/ -{-# INLINE groupByRollingEither #-} -groupByRollingEither :: Monad m => - (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser a m (Either b c) -groupByRollingEither eq f1 = D.toParserK . D.groupByRollingEither eq f1 - --- | Like 'listEqBy' but uses a stream instead of a list and does not return --- the stream. --- --- See also: "Streamly.Data.Stream.streamEqBy" --- -{-# INLINE streamEqBy #-} -streamEqBy :: Monad m => (a -> a -> Bool) -> Stream m a -> Parser a m () -streamEqBy cmp = D.toParserK . D.streamEqBy cmp - --- | Match the given sequence of elements using the given comparison function. --- Returns the original sequence if successful. --- --- Definition: --- --- >>> listEqBy cmp xs = Parser.streamEqBy cmp (Stream.fromList xs) *> Parser.fromPure xs --- --- Examples: --- --- >>> Stream.parse (Parser.listEqBy (==) "string") $ Stream.fromList "string" --- Right "string" --- --- >>> Stream.parse (Parser.listEqBy (==) "mismatch") $ Stream.fromList "match" --- Left (ParseError "streamEqBy: mismtach occurred") --- -{-# INLINE listEqBy #-} -listEqBy :: Monad m => (a -> a -> Bool) -> [a] -> Parser a m [a] --- listEqBy cmp xs = D.toParserK (D.listEqBy cmp xs) -listEqBy cmp xs = streamEqBy cmp (Stream.fromList xs) *> fromPure xs - --- Rename to "list". --- | Match the input sequence with the supplied list and return it if --- successful. --- --- >>> listEq = Parser.listEqBy (==) --- -{-# INLINE listEq #-} -listEq :: (Monad m, Eq a) => [a] -> Parser a m [a] -listEq = listEqBy (==) - --- | Match if the input stream is a subsequence of the argument stream i.e. all --- the elements of the input stream occur, in order, in the argument stream. --- The elements do not have to occur consecutively. A sequence is considered a --- subsequence of itself. -{-# INLINE subsequenceBy #-} -subsequenceBy :: -- Monad m => - (a -> a -> Bool) -> Stream m a -> Parser a m () -subsequenceBy = undefined - -{- --- Should go in Data.Parser.Regex in streamly package so that it can depend on --- regex backends. -{-# INLINE regexPosix #-} -regexPosix :: -- Monad m => - Regex -> Parser m a (Maybe (Array (MatchOffset, MatchLength))) -regexPosix = undefined - -{-# INLINE regexPCRE #-} -regexPCRE :: -- Monad m => - Regex -> Parser m a (Maybe (Array (MatchOffset, MatchLength))) -regexPCRE = undefined --} - -------------------------------------------------------------------------------- --- nested parsers -------------------------------------------------------------------------------- - --- | Sequential parser application. Apply two parsers sequentially to an input --- stream. The input is provided to the first parser, when it is done the --- remaining input is provided to the second parser. If both the parsers --- succeed their outputs are combined using the supplied function. The --- operation fails if any of the parsers fail. --- --- Note: This is a parsing dual of appending streams using --- 'Streamly.Data.Stream.append', it splits the streams using two parsers and zips --- the results. --- --- This implementation is strict in the second argument, therefore, the --- following will fail: --- --- >>> Stream.parse (Parser.splitWith const (Parser.satisfy (> 0)) undefined) $ Stream.fromList [1] --- *** Exception: Prelude.undefined --- ... --- --- Compare with 'Applicative' instance method '<*>'. This implementation allows --- stream fusion but has quadratic complexity. This can fuse with other --- operations and can be faster than 'Applicative' instance for small number --- (less than 8) of compositions. --- --- Many combinators can be expressed using @splitWith@ and other parser --- primitives. Some common idioms are described below, --- --- @ --- span :: (a -> Bool) -> Fold m a b -> Fold m a b -> Parser a m b --- span pred f1 f2 = splitWith (,) ('takeWhile' pred f1) ('fromFold' f2) --- @ --- --- @ --- spanBy :: (a -> a -> Bool) -> Fold m a b -> Fold m a b -> Parser a m b --- spanBy eq f1 f2 = splitWith (,) ('groupBy' eq f1) ('fromFold' f2) --- @ --- --- @ --- spanByRolling :: (a -> a -> Bool) -> Fold m a b -> Fold m a b -> Parser a m b --- spanByRolling eq f1 f2 = splitWith (,) ('groupByRolling' eq f1) ('fromFold' f2) --- @ --- --- /Pre-release/ --- -{-# INLINE splitWith #-} -splitWith :: Monad m - => (a -> b -> c) -> Parser x m a -> Parser x m b -> Parser x m c -splitWith f p1 p2 = - D.toParserK $ D.splitWith f (D.fromParserK p1) (D.fromParserK p2) - --- | Sequential parser application ignoring the output of the first parser. --- Apply two parsers sequentially to an input stream. The input is provided to --- the first parser, when it is done the remaining input is provided to the --- second parser. The output of the parser is the output of the second parser. --- The operation fails if any of the parsers fail. --- --- This implementation is strict in the second argument, therefore, the --- following will fail: --- --- >>> Stream.parse (Parser.split_ (Parser.satisfy (> 0)) undefined) $ Stream.fromList [1] --- *** Exception: Prelude.undefined --- ... --- --- Compare with 'Applicative' instance method '*>'. This implementation allows --- stream fusion but has quadratic complexity. This can fuse with other --- operations, and can be faster than 'Applicative' instance for small --- number (less than 8) of compositions. --- --- /Pre-release/ --- -{-# INLINE split_ #-} -split_ :: Monad m => Parser x m a -> Parser x m b -> Parser x m b -split_ p1 p2 = D.toParserK $ D.split_ (D.fromParserK p1) (D.fromParserK p2) - -{- --- | @teeWith f p1 p2@ distributes its input to both @p1@ and @p2@ until both --- of them succeed or anyone of them fails and combines their output using @f@. --- The parser succeeds if both the parsers succeed. --- --- /Pre-release/ --- -{-# INLINE teeWith #-} -teeWith :: Monad m - => (a -> b -> c) -> Parser x m a -> Parser x m b -> Parser x m c -teeWith f p1 p2 = - D.toParserK $ D.teeWith f (D.fromParserK p1) (D.fromParserK p2) - --- | Like 'teeWith' but ends parsing and zips the results, if available, --- whenever the first parser ends. --- --- /Pre-release/ --- -{-# INLINE teeWithFst #-} -teeWithFst :: Monad m - => (a -> b -> c) -> Parser x m a -> Parser x m b -> Parser x m c -teeWithFst f p1 p2 = - D.toParserK $ D.teeWithFst f (D.fromParserK p1) (D.fromParserK p2) - --- | Like 'teeWith' but ends parsing and zips the results, if available, --- whenever any of the parsers ends or fails. --- --- /Unimplemented/ --- -{-# INLINE teeWithMin #-} -teeWithMin :: Monad m - => (a -> b -> c) -> Parser x m a -> Parser x m b -> Parser x m c -teeWithMin f p1 p2 = - D.toParserK $ D.teeWithMin f (D.fromParserK p1) (D.fromParserK p2) --} - --- | Sequential alternative. Apply the input to the first parser and return the --- result if the parser succeeds. If the first parser fails then backtrack and --- apply the same input to the second parser and return the result. --- --- Note: This implementation is not lazy in the second argument. The following --- will fail: --- --- >>> Stream.parse (Parser.satisfy (> 0) `Parser.alt` undefined) $ Stream.fromList [1..10] --- Right 1 --- --- Compare with 'Alternative' instance method '<|>'. This implementation allows --- stream fusion but has quadratic complexity. This can fuse with other --- operations and can be much faster than 'Alternative' instance for small --- number (less than 8) of alternatives. --- --- /Pre-release/ --- -{-# INLINE alt #-} -alt :: Monad m => Parser x m a -> Parser x m a -> Parser x m a -alt p1 p2 = D.toParserK $ D.alt (D.fromParserK p1) (D.fromParserK p2) - -{- --- | Shortest alternative. Apply both parsers in parallel but choose the result --- from the one which consumed least input i.e. take the shortest succeeding --- parse. --- --- /Pre-release/ --- -{-# INLINE shortest #-} -shortest :: Monad m - => Parser x m a -> Parser x m a -> Parser x m a -shortest p1 p2 = D.toParserK $ D.shortest (D.fromParserK p1) (D.fromParserK p2) - --- | Longest alternative. Apply both parsers in parallel but choose the result --- from the one which consumed more input i.e. take the longest succeeding --- parse. --- --- /Pre-release/ --- -{-# INLINE longest #-} -longest :: Monad m - => Parser x m a -> Parser x m a -> Parser x m a -longest p1 p2 = D.toParserK $ D.longest (D.fromParserK p1) (D.fromParserK p2) --} - --- | Run a parser without consuming the input. --- -{-# INLINE lookAhead #-} -lookAhead :: Monad m => Parser a m b -> Parser a m b -lookAhead p = D.toParserK $ D.lookAhead $ D.fromParserK p - --- | Takes at-most @n@ input elements. --- --- * Stops - when the collecting parser stops. --- * Fails - when the collecting parser fails. --- --- >>> Stream.parse (Parser.takeP 4 (Parser.takeEQ 2 Fold.toList)) $ Stream.fromList [1, 2, 3, 4, 5] --- Right [1,2] --- --- >>> Stream.parse (Parser.takeP 4 (Parser.takeEQ 5 Fold.toList)) $ Stream.fromList [1, 2, 3, 4, 5] --- Left (ParseError "takeEQ: Expecting exactly 5 elements, input terminated on 4") --- --- /Internal/ -{-# INLINE takeP #-} -takeP :: Monad m => Int -> Parser a m b -> Parser a m b -takeP i p = D.toParserK $ D.takeP i $ D.fromParserK p - -------------------------------------------------------------------------------- --- Sequential Collection -------------------------------------------------------------------------------- --- --- | @concatSequence f p@ collects sequential parses of parsers in a --- serial stream @p@ using the fold @f@. Fails if the input ends or any --- of the parsers fail. --- --- An even more efficient implementation can use ParserD type Parser in --- the stream. --- --- /Pre-release/ --- -{-# INLINE concatSequence #-} -concatSequence :: - Monad m => - Fold m b c -> Stream m (Parser a m b) -> Parser a m c -concatSequence f p = - let sp = fmap D.fromParserK p - in D.toParserK $ D.sequence sp f - --- | Map a 'Parser' returning function on the result of a 'Parser'. --- --- Compare with 'Monad' instance method '>>='. This implementation allows --- stream fusion but has quadratic complexity. This can fuse with other --- operations and can be much faster than 'Monad' instance for small number --- (less than 8) of compositions. --- --- /Pre-release/ --- -{-# INLINE concatMap #-} -concatMap :: Monad m - => (b -> Parser a m c) -> Parser a m b -> Parser a m c -concatMap f p = D.toParserK $ D.concatMap (D.fromParserK . f) (D.fromParserK p) - -{- -------------------------------------------------------------------------------- --- Alternative Collection -------------------------------------------------------------------------------- --- --- | @choice parsers@ applies the @parsers@ in order and returns the first --- successful parse. --- --- This is same as 'asum' but more efficient. --- --- /Broken/ --- -{-# INLINE choice #-} -choice :: - (Functor t, Foldable t, Monad m) => t (Parser a m b) -> Parser a m b -choice ps = D.toParserK $ D.choice $ D.fromParserK <$> ps --} - -------------------------------------------------------------------------------- --- Sequential Repetition -------------------------------------------------------------------------------- - --- | Like 'many' but uses a 'Parser' instead of a 'Fold' to collect the --- results. Parsing stops or fails if the collecting parser stops or fails. --- --- /Unimplemented/ --- -{-# INLINE manyP #-} -manyP :: -- Monad m => - Parser a m b -> Parser b m c -> Parser a m c -manyP _p _f = undefined -- D.toParserK $ D.manyP (D.fromParserK p) f - --- | Collect zero or more parses. Apply the supplied parser repeatedly on the --- input stream and push the parse results to a downstream fold. --- --- Stops: when the downstream fold stops or the parser fails. --- Fails: never, produces zero or more results. --- --- >>> many = Parser.countBetween 0 maxBound --- --- Compare with 'Control.Applicative.many'. --- -{-# INLINE many #-} -many :: Monad m => Parser a m b -> Fold m b c -> Parser a m c -many p f = D.toParserK $ D.many (D.fromParserK p) f - --- Note: many1 would perhaps be a better name for this and consistent with --- other names like takeWhile1. But we retain the name "some" for --- compatibility. --- --- | Collect one or more parses. Apply the supplied parser repeatedly on the --- input stream and push the parse results to a downstream fold. --- --- Stops: when the downstream fold stops or the parser fails. --- Fails: if it stops without producing a single result. --- --- >>> some p f = Parser.manyP p (Parser.takeGE 1 f) --- >>> some = Parser.countBetween 1 maxBound --- --- Compare with 'Control.Applicative.some'. --- -{-# INLINE some #-} -some :: Monad m => Parser a m b -> Fold m b c -> Parser a m c -some p f = D.toParserK $ D.some (D.fromParserK p) f --- some p f = manyP p (takeGE 1 f) --- some = countBetween 1 maxBound - --- | @countBetween m n f p@ collects between @m@ and @n@ sequential parses of --- parser @p@ using the fold @f@. Stop after collecting @n@ results. Fails if --- the input ends or the parser fails before @m@ results are collected. --- --- >>> countBetween m n p f = Parser.manyP p (Parser.takeBetween m n f) --- --- /Unimplemented/ --- -{-# INLINE countBetween #-} -countBetween :: -- Monad m => - Int -> Int -> Parser a m b -> Fold m b c -> Parser a m c -countBetween _m _n _p = undefined --- countBetween m n p f = manyP p (takeBetween m n f) - --- | @count n f p@ collects exactly @n@ sequential parses of parser @p@ using --- the fold @f@. Fails if the input ends or the parser fails before @n@ --- results are collected. --- --- >>> count n = Parser.countBetween n n --- >>> count n p f = Parser.manyP p (Parser.takeEQ n f) --- --- /Unimplemented/ --- -{-# INLINE count #-} -count :: -- Monad m => - Int -> Parser a m b -> Fold m b c -> Parser a m c -count n = countBetween n n --- count n p f = manyP p (takeEQ n f) - --- | Like 'manyTill' but uses a 'Parser' to collect the results instead of a --- 'Fold'. Parsing stops or fails if the collecting parser stops or fails. --- --- We can implemnent parsers like the following using 'manyTillP': --- --- @ --- countBetweenTill m n f p = manyTillP (takeBetween m n f) p --- @ --- --- /Unimplemented/ --- -{-# INLINE manyTillP #-} -manyTillP :: -- Monad m => - Parser a m b -> Parser a m x -> Parser b m c -> Parser a m c -manyTillP _p1 _p2 _f = undefined - -- D.toParserK $ D.manyTillP (D.fromParserK p1) (D.fromParserK p2) f - --- | @manyTill chunking test f@ tries the parser @test@ on the input, if @test@ --- fails it backtracks and tries @chunking@, after @chunking@ succeeds @test@ is --- tried again and so on. The parser stops when @test@ succeeds. The output of --- @test@ is discarded and the output of @chunking@ is accumulated by the --- supplied fold. The parser fails if @chunking@ fails. --- --- Stops when the fold @f@ stops. --- -{-# INLINE manyTill #-} -manyTill :: Monad m - => Parser a m b -- ^ Chunking parser. Parses chunks of input. - -> Parser a m x -- ^ Test parser. Parsing stops when this parser succeeds - -- else backtract and run the chunking parser. - -> Fold m b c -- ^ Folds the output of the chunking parser. - -> Parser a m c -manyTill collect test f = - D.toParserK $ D.manyTill (D.fromParserK collect) (D.fromParserK test) f - --- | @manyThen f collect recover@ repeats the parser @collect@ on the input and --- collects the output in the supplied fold. If the the parser @collect@ fails, --- parser @recover@ is run until it stops and then we start repeating the --- parser @collect@ again. The parser fails if the recovery parser fails. --- --- For example, this can be used to find a key frame in a video stream after an --- error. --- --- /Unimplemented/ --- -{-# INLINE manyThen #-} -manyThen :: -- (Foldable t, Monad m) => - Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c -manyThen _parser _recover _f = undefined - -------------------------------------------------------------------------------- --- Interleaving -------------------------------------------------------------------------------- --- --- To deinterleave we can chain two parsers one behind the other. The input is --- given to the first parser and the input definitively rejected by the first --- parser is given to the second parser. --- --- We can either have the parsers themselves buffer the input or use the shared --- global buffer to hold it until none of the parsers need it. When the first --- parser returns Skip (i.e. rewind) we let the second parser consume the --- rejected input and when it is done we move the cursor forward to the first --- parser again. This will require a "move forward" command as well. --- --- To implement grep we can use three parsers, one to find the pattern, one --- to store the context behind the pattern and one to store the context in --- front of the pattern. When a match occurs we need to emit the accumulator of --- all the three parsers. One parser can count the line numbers to provide the --- line number info. - --- XXX rename this to intercalate --- | Apply two parsers alternately to an input stream. The input stream is --- considered an interleaving of two patterns. The two parsers represent the --- two patterns. --- -{-# INLINE deintercalate #-} -deintercalate :: Monad m => - Parser a m x - -> Parser a m y - -> Fold m (Either x y) z - -> Parser a m z -deintercalate contentL contentR sink = - D.toParserK - $ D.deintercalate - (D.fromParserK contentL) (D.fromParserK contentR) sink - --- | Parse items separated by a separator parsed by the supplied parser. At --- least one item must be present for the parser to succeed. --- --- Note that this can go in infinite loop if both the parsers fail on some --- input. Detection of that would make the implementation more complex. --- -{-# INLINE sepBy1 #-} -sepBy1 :: Monad m => - Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c -sepBy1 p sep sink = do - x <- p - f <- fromEffect $ FL.reduce sink - f1 <- fromEffect $ FL.snoc f x - many (sep >> p) f1 - --- | Run the content parser first, when it is done, the separator parser is --- run, when it is done content parser is run again and so on. If none of the --- parsers consumes an input then parser returns a failure. --- --- >>> sepBy p1 p2 sink = Parser.deintercalate p1 p2 (Fold.catLefts sink) --- >>> sepBy content sep sink = Parser.sepBy1 content sep sink <|> return mempty --- -{-# INLINE sepBy #-} -sepBy :: Monad m => - Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c -sepBy content sep sink = - D.toParserK $ D.sepBy (D.fromParserK content) (D.fromParserK sep) sink --- sepBy sink = deintercalate (FL.catLefts sink) - -------------------------------------------------------------------------------- --- Interleaving a collection of parsers -------------------------------------------------------------------------------- --- --- | Apply a collection of parsers to an input stream in a round robin fashion. --- Each parser is applied until it stops and then we repeat starting with the --- the first parser again. --- --- /Unimplemented/ --- -{-# INLINE roundRobin #-} -roundRobin :: -- (Foldable t, Monad m) => - t (Parser a m b) -> Fold m b c -> Parser a m c -roundRobin _ps _f = undefined - -------------------------------------------------------------------------------- --- Repeated Alternatives -------------------------------------------------------------------------------- - --- | Keep trying a parser up to a maximum of @n@ failures. When the parser --- fails the input consumed till now is dropped and the new instance is tried --- on the fresh input. --- --- /Unimplemented/ --- -{-# INLINE retryMaxTotal #-} -retryMaxTotal :: -- (Monad m) => - Int -> Parser a m b -> Fold m b c -> Parser a m c -retryMaxTotal _n _p _f = undefined - --- | Like 'retryMaxTotal' but aborts after @n@ successive failures. --- --- /Unimplemented/ --- -{-# INLINE retryMaxSuccessive #-} -retryMaxSuccessive :: -- (Monad m) => - Int -> Parser a m b -> Fold m b c -> Parser a m c -retryMaxSuccessive _n _p _f = undefined - --- | Keep trying a parser until it succeeds. When the parser fails the input --- consumed till now is dropped and the new instance is tried on the fresh --- input. --- --- /Unimplemented/ --- -{-# INLINE retry #-} -retry :: -- (Monad m) => - Parser a m b -> Parser a m b -retry _p = undefined +import Streamly.Internal.Data.Parser.ParserD diff --git a/core/src/Streamly/Internal/Data/Parser/Chunked/Type.hs b/core/src/Streamly/Internal/Data/Parser/Chunked/Type.hs index 71e229d61..9672b4f4a 100644 --- a/core/src/Streamly/Internal/Data/Parser/Chunked/Type.hs +++ b/core/src/Streamly/Internal/Data/Parser/Chunked/Type.hs @@ -57,6 +57,8 @@ data Step a m r = -- Array a -> m (Step a m r), m (Step a m r) -- XXX The Array is the only difference from element parser, we can pass -- this as parameter? + -- XXX Unify element and chunked parser, by using the argument as + -- None | Single a | Chunk (Array a). | Partial !Int (Maybe (Array a) -> m (Step a m r)) | Continue !Int (Maybe (Array a) -> m (Step a m r)) | Error !Int String diff --git a/core/src/Streamly/Internal/Data/Parser/ParserD.hs b/core/src/Streamly/Internal/Data/Parser/ParserD.hs index f49759221..6043e57ab 100644 --- a/core/src/Streamly/Internal/Data/Parser/ParserD.hs +++ b/core/src/Streamly/Internal/Data/Parser/ParserD.hs @@ -1,5 +1,3 @@ -#include "inline.hs" - -- | -- Module : Streamly.Internal.Data.Parser.ParserD -- Copyright : (c) 2020 Composewell Technologies @@ -8,7 +6,42 @@ -- Stability : experimental -- Portability : GHC -- --- Direct style parser implementation with stream fusion. +-- Fast backtracking parsers with stream fusion and native streaming +-- capability. +-- +-- 'Applicative' and 'Control.Applicative.Alternative' type class based +-- combinators from the +-- +-- package can also be used with the 'Parser' type. However, there are two +-- important differences between @parser-combinators@ and the equivalent ones +-- provided in this module in terms of performance: +-- +-- 1) @parser-combinators@ use plain Haskell lists to collect the results, in a +-- strict Monad like IO, the results are necessarily buffered before they can +-- be consumed. This may not perform optimally in streaming applications +-- processing large amounts of data. Equivalent combinators in this module can +-- consume the results of parsing using a 'Fold', thus providing a scalability +-- and a composable consumer. +-- +-- 2) Several combinators in this module can be many times faster because of +-- stream fusion. For example, 'Streamly.Internal.Data.Parser.many' combinator +-- in this module is much faster than the 'Control.Applicative.many' combinator +-- of 'Control.Applicative.Alternative' type class. +-- +-- = Errors +-- +-- Failing parsers in this module throw the 'D.ParseError' exception. +-- +-- = Naming +-- +-- As far as possible, try that the names of the combinators in this module are +-- consistent with: +-- +-- * +-- * +-- * +-- * +-- * module Streamly.Internal.Data.Parser.ParserD ( @@ -16,7 +49,6 @@ module Streamly.Internal.Data.Parser.ParserD , ParseError (..) , Step (..) , Initial (..) - , rmapM -- * Conversion to/from ParserK , fromParserK @@ -40,17 +72,26 @@ module Streamly.Internal.Data.Parser.ParserD , postscan , filter + -- * Map on output + , rmapM + -- * Element parsers , peek + + -- All of these can be expressed in terms of either + , one + , oneEq + , oneNotEq + , oneOf + , noneOf , eof , satisfy - , next , maybe , either - -- * Sequence parsers + -- * Sequence parsers (tokenizers) -- - -- Parsers chained in series, if one parser terminates the composition + -- | Parsers chained in series, if one parser terminates the composition -- terminates. Currently we are using folds to collect the output of the -- parsers but we can use Parsers instead of folds to make the composition -- more powerful. For example, we can do: @@ -58,41 +99,54 @@ module Streamly.Internal.Data.Parser.ParserD -- takeEndByOrMax cond n p = takeEndBy cond (take n p) -- takeEndByBetween cond m n p = takeEndBy cond (takeBetween m n p) -- takeWhileBetween cond m n p = takeWhile cond (takeBetween m n p) + , lookAhead - -- Grab a sequence of input elements without inspecting them + -- ** By length + -- | Grab a sequence of input elements without inspecting them , takeBetween - -- , take -- take -- takeBetween 0 n + -- , take -- takeBetween 0 n , takeEQ -- takeBetween n n , takeGE -- takeBetween n maxBound -- , takeGE1 -- take1 -- takeBetween 1 n , takeP -- Grab a sequence of input elements by inspecting them - , lookAhead + -- ** Exact match + , listEq + , listEqBy + , streamEqBy + , subsequenceBy + + -- ** By predicate , takeWhile , takeWhileP , takeWhile1 + , dropWhile - -- Separators + -- ** Separators , takeEndBy , takeEndBy_ , takeEndByEsc + -- , takeEndByEsc_ , takeStartBy - , takeFramedBy_ - , takeFramedByEsc_ - , takeFramedByGeneric - - -- Words and grouping + , takeStartBy_ + , takeEitherSepBy , wordBy - , wordFramedBy - , wordQuotedBy + + -- ** By comparing , groupBy , groupByRolling , groupByRollingEither + -- ** Framing + -- , takeFramedBy + , takeFramedBy_ + , takeFramedByEsc_ + , takeFramedByGeneric + , wordFramedBy + , wordQuotedBy + -- Matching strings - , listEqBy - , streamEqBy -- , prefixOf -- match any prefix of a given string -- , suffixOf -- match any suffix of a given string -- , infixOf -- match any substring of a given string @@ -121,6 +175,25 @@ module Streamly.Internal.Data.Parser.ParserD -- Use two folds, run a primary parser, its rejected values go to the -- secondary parser. , deintercalate + -- , deintercalatePrefix + -- , deintercalateSuffix + + -- *** Special cases + -- | TODO: traditional implmentations of these may be of limited use. For + -- example, consider parsing lines separated by @\\r\\n@. The main parser + -- will have to detect and exclude the sequence @\\r\\n@ anyway so that we + -- can apply the "sep" parser. + -- + -- We can instead implement these as special cases of deintercalate. + -- + -- @ + -- , endBy + -- , sepEndBy + -- , beginBy + -- , sepBeginBy + -- , sepAroundBy + -- @ + , sepBy1 , sepBy -- ** Sequential Alternative @@ -132,6 +205,7 @@ module Streamly.Internal.Data.Parser.ParserD , longest -- , fastest -} + -- * N-ary Combinators -- ** Sequential Collection , sequence @@ -142,25 +216,12 @@ module Streamly.Internal.Data.Parser.ParserD , countBetween -- , countBetweenTill - , many , manyP + , many , some + , manyTillP , manyTill - - -- -- ** Special cases - -- XXX traditional implmentations of these may be of limited use. For - -- example, consider parsing lines separated by "\r\n". The main parser - -- will have to detect and exclude the sequence "\r\n" anyway so that we - -- can apply the "sep" parser. - -- - -- We can instead implement these as special cases of deintercalate. - -- - -- , endBy - -- , sepBy - -- , sepEndBy - -- , beginBy - -- , sepBeginBy - -- , sepAroundBy + , manyThen -- -- * Distribution -- @@ -171,24 +232,32 @@ module Streamly.Internal.Data.Parser.ParserD -- -- ** Distribute to collection -- -- ** Distribute to repetition - -- -- ** Interleaved collection - -- Round robin - -- Priority based + -- ** Interleaved collection + -- | + -- + -- 1. Round robin + -- 2. Priority based + , roundRobin + -- -- ** Interleaved repetition -- repeat one parser and when it fails run an error recovery parser -- e.g. to find a key frame in the stream after an error -- ** Collection of Alternatives + -- | Unimplemented + -- + -- @ -- , shortestN -- , longestN -- , fastestN -- first N successful in time -- , choiceN -- first N successful in position + -- @ -- , choice -- first successful in position - -- -- ** Repeated Alternatives - -- , retryMax -- try N times - -- , retryUntil -- try until successful - -- , retryUntilN -- try until successful n times + -- ** Repeated Alternatives + , retryMaxTotal + , retryMaxSuccessive + , retry -- ** Zipping Input , zipWithM @@ -196,9 +265,13 @@ module Streamly.Internal.Data.Parser.ParserD , indexed , makeIndexFilter , sampleFromthen + + -- * Deprecated + , next ) where +#include "inline.hs" #include "assert.hs" import Control.Monad (when) @@ -209,14 +282,16 @@ import Streamly.Internal.Data.SVar.Type (defState) import Streamly.Internal.Data.Either.Strict (Either'(..)) import Streamly.Internal.Data.Maybe.Strict (Maybe'(..)) import Streamly.Internal.Data.Tuple.Strict (Tuple'(..)) +import Streamly.Internal.Data.Stream.StreamD.Type (Stream) +import qualified Data.Foldable as Foldable import qualified Streamly.Internal.Data.Fold.Type as FL import qualified Streamly.Internal.Data.Stream.StreamD.Type as D import qualified Streamly.Internal.Data.Stream.StreamD.Generate as D import Prelude hiding (any, all, take, takeWhile, sequence, concatMap, maybe, either, span - , zip, filter) + , zip, filter, dropWhile) -- import Streamly.Internal.Data.Parser.ParserD.Tee import Streamly.Internal.Data.Parser.ParserD.Type @@ -224,19 +299,27 @@ import Streamly.Internal.Data.Parser.ParserD.Type -- $setup -- >>> :m -- >>> import Prelude hiding () +-- >>> import Control.Applicative +-- >>> import Data.Char (isSpace) +-- >>> import qualified Data.Foldable as Foldable -- >>> import qualified Data.Maybe as Maybe -- >>> import qualified Streamly.Data.Stream as Stream -- >>> import qualified Streamly.Internal.Data.Stream as Stream --- >>> import qualified Streamly.Data.Fold as Fold +-- >>> import qualified Streamly.Internal.Data.Fold as Fold -- >>> import qualified Streamly.Internal.Data.Parser as Parser ------------------------------------------------------------------------------- -- Downgrade a parser to a Fold ------------------------------------------------------------------------------- --- | See 'Streamly.Internal.Data.Parser.toFold'. +-- | Make a 'Fold' from a 'Parser'. The fold just throws an exception if the +-- parser fails or tries to backtrack. -- --- /Internal/ +-- This can be useful in combinators that accept a Fold and we know that a +-- Parser cannot fail or failure exception is acceptable as there is no way to +-- recover. +-- +-- /Pre-release/ -- {-# INLINE toFold #-} toFold :: Monad m => Parser a m b -> Fold m a b @@ -280,10 +363,8 @@ toFold (Parser pstep pinitial pextract) = Fold step initial extract ------------------------------------------------------------------------------- -- Upgrade folds to parses ------------------------------------------------------------------------------- --- --- | See 'Streamly.Internal.Data.Parser.fromFold'. --- --- /Pre-release/ + +-- | Make a 'Parser' from a 'Fold'. -- {-# INLINE fromFold #-} fromFold :: Monad m => Fold m a b -> Parser a m b @@ -307,7 +388,9 @@ fromFold (Fold fstep finitial fextract) = Parser step initial extract extract = fmap (Done 0) . fextract --- | Convert Maybe returning folds to error returning parsers. +-- | Convert a Maybe returning fold to an error returning parser. The first +-- argument is the error message that the parser would return when the fold +-- returns Nothing. -- -- /Pre-release/ -- @@ -348,9 +431,15 @@ fromFoldMaybe errMsg (Fold fstep finitial fextract) = -- Failing Parsers ------------------------------------------------------------------------------- --- | See 'Streamly.Internal.Data.Parser.peek'. +-- | Peek the head element of a stream, without consuming it. Fails if it +-- encounters end of input. -- --- /Pre-release/ +-- >>> Stream.parse ((,) <$> Parser.peek <*> Parser.satisfy (> 0)) $ Stream.fromList [1] +-- Right (1,1) +-- +-- @ +-- peek = lookAhead (satisfy True) +-- @ -- {-# INLINE peek #-} peek :: Monad m => Parser a m a @@ -364,9 +453,10 @@ peek = Parser step initial extract extract () = return $ Error "peek: end of input" --- | See 'Streamly.Internal.Data.Parser.eof'. +-- | Succeeds if we are at the end of input, fails otherwise. -- --- /Pre-release/ +-- >>> Stream.parse ((,) <$> Parser.satisfy (> 0) <*> Parser.eof) $ Stream.fromList [1] +-- Right (1,()) -- {-# INLINE eof #-} eof :: Monad m => Parser a m () @@ -380,10 +470,12 @@ eof = Parser step initial extract extract () = return $ Done 0 () --- | See 'Streamly.Internal.Data.Parser.next'. +-- | Return the next element of the input. Returns 'Nothing' +-- on end of input. Also known as 'head'. -- -- /Pre-release/ -- +{-# DEPRECATED next "Please use \"fromFold Fold.one\" instead" #-} {-# INLINE next #-} next :: Monad m => Parser a m (Maybe a) next = Parser step initial extract @@ -396,7 +488,9 @@ next = Parser step initial extract extract () = pure $ Done 0 Nothing --- | See 'Streamly.Internal.Data.Parser.either'. +-- | Map an 'Either' returning function on the next element in the stream. If +-- the function returns 'Left err', the parser fails with the error message +-- @err@ otherwise returns the 'Right' value. -- -- /Pre-release/ -- @@ -415,11 +509,15 @@ either f = Parser step initial extract extract () = return $ Error "end of input" --- | See 'Streamly.Internal.Data.Parser.maybe'. +-- | Map a 'Maybe' returning function on the next element in the stream. The +-- parser fails if the function returns 'Nothing' otherwise returns the 'Just' +-- value. -- -- >>> toEither = Maybe.maybe (Left "maybe: predicate failed") Right -- >>> maybe f = Parser.either (toEither . f) -- +-- >>> maybe f = Parser.fromFoldMaybe "maybe: predicate failed" (Fold.maybe f) +-- -- /Pre-release/ -- {-# INLINE maybe #-} @@ -438,13 +536,14 @@ maybe parserF = Parser step initial extract extract () = return $ Error "maybe: end of input" --- | See 'Streamly.Internal.Data.Parser.satisfy'. +-- | Returns the next element if it passes the predicate, fails otherwise. +-- +-- >>> Stream.parse (Parser.satisfy (== 1)) $ Stream.fromList [1,0,1] +-- Right 1 -- -- >>> toMaybe f x = if f x then Just x else Nothing -- >>> satisfy f = Parser.maybe (toMaybe f) -- --- /Pre-release/ --- {-# INLINE satisfy #-} satisfy :: Monad m => (a -> Bool) -> Parser a m a -- satisfy predicate = maybe (\a -> if predicate a then Just a else Nothing) @@ -461,6 +560,67 @@ satisfy predicate = Parser step initial extract extract () = return $ Error "satisfy: end of input" +-- | Consume one element from the head of the stream. Fails if it encounters +-- end of input. +-- +-- >>> one = Parser.satisfy $ const True +-- +{-# INLINE one #-} +one :: Monad m => Parser a m a +one = satisfy $ const True + +-- Alternate names: "only", "onlyThis". + +-- | Match a specific element. +-- +-- >>> oneEq x = Parser.satisfy (== x) +-- +{-# INLINE oneEq #-} +oneEq :: (Monad m, Eq a) => a -> Parser a m a +oneEq x = satisfy (== x) + +-- Alternate names: "exclude", "notThis". + +-- | Match anything other than the supplied element. +-- +-- >>> oneNotEq x = Parser.satisfy (/= x) +-- +{-# INLINE oneNotEq #-} +oneNotEq :: (Monad m, Eq a) => a -> Parser a m a +oneNotEq x = satisfy (/= x) + +-- | Match any one of the elements in the supplied list. +-- +-- >>> oneOf xs = Parser.satisfy (`Foldable.elem` xs) +-- +-- When performance matters a pattern matching predicate could be more +-- efficient than a 'Foldable' datatype: +-- +-- @ +-- let p x = +-- case x of +-- 'a' -> True +-- 'e' -> True +-- _ -> False +-- in satisfy p +-- @ +-- +-- GHC may use a binary search instead of linear search in the list. +-- Alternatively, you can also use an array instead of list for storage and +-- search. +-- +{-# INLINE oneOf #-} +oneOf :: (Monad m, Eq a, Foldable f) => f a -> Parser a m a +oneOf xs = satisfy (`Foldable.elem` xs) + +-- | See performance notes in 'oneOf'. +-- +-- >>> noneOf xs = Parser.satisfy (`Foldable.notElem` xs) +-- +{-# INLINE noneOf #-} +noneOf :: (Monad m, Eq a, Foldable f) => f a -> Parser a m a +noneOf xs = satisfy (`Foldable.notElem` xs) + ------------------------------------------------------------------------------- -- Taking elements ------------------------------------------------------------------------------- @@ -469,7 +629,43 @@ satisfy predicate = Parser step initial extract {-# ANN type Tuple'Fused Fuse #-} data Tuple'Fused a b = Tuple'Fused !a !b deriving Show --- | See 'Streamly.Internal.Data.Parser.takeBetween'. +-- | @takeBetween m n@ takes a minimum of @m@ and a maximum of @n@ input +-- elements and folds them using the supplied fold. +-- +-- Stops after @n@ elements. +-- Fails if the stream ends before @m@ elements could be taken. +-- +-- Examples: - +-- +-- @ +-- >>> :{ +-- takeBetween' low high ls = Stream.parse prsr (Stream.fromList ls) +-- where prsr = Parser.takeBetween low high Fold.toList +-- :} +-- +-- @ +-- +-- >>> takeBetween' 2 4 [1, 2, 3, 4, 5] +-- Right [1,2,3,4] +-- +-- >>> takeBetween' 2 4 [1, 2] +-- Right [1,2] +-- +-- >>> takeBetween' 2 4 [1] +-- Left (ParseError "takeBetween: Expecting alteast 2 elements, got 1") +-- +-- >>> takeBetween' 0 0 [1, 2] +-- Right [] +-- +-- >>> takeBetween' 0 1 [] +-- Right [] +-- +-- @takeBetween@ is the most general take operation, other take operations can +-- be defined in terms of takeBetween. For example: +-- +-- >>> take n = Parser.takeBetween 0 n +-- >>> takeEQ n = Parser.takeBetween n n +-- >>> takeGE n = Parser.takeBetween n maxBound -- -- /Pre-release/ -- @@ -546,9 +742,14 @@ takeBetween low high (Fold fstep finitial fextract) = | i >= low && i <= high = fmap IDone (fextract s) | otherwise = return $ IError (f i) --- | See 'Streamly.Internal.Data.Parser.takeEQ'. +-- | Stops after taking exactly @n@ input elements. -- --- /Pre-release/ +-- * Stops - after consuming @n@ elements. +-- * Fails - if the stream or the collecting fold ends before it can collect +-- exactly @n@ elements. +-- +-- >>> Stream.parse (Parser.takeEQ 4 Fold.toList) $ Stream.fromList [1,0,1] +-- Left (ParseError "takeEQ: Expecting exactly 4 elements, input terminated on 3") -- {-# INLINE takeEQ #-} takeEQ :: Monad m => Int -> Fold m a b -> Parser a m b @@ -602,7 +803,17 @@ takeEQ n (Fold fstep finitial fextract) = Parser step initial extract $ "takeEQ: Expecting exactly " ++ show cnt ++ " elements, input terminated on " ++ show i --- | See 'Streamly.Internal.Data.Parser.takeGE'. +-- | Take at least @n@ input elements, but can collect more. +-- +-- * Stops - when the collecting fold stops. +-- * Fails - if the stream or the collecting fold ends before producing @n@ +-- elements. +-- +-- >>> Stream.parse (Parser.takeGE 4 Fold.toList) $ Stream.fromList [1,0,1] +-- Left (ParseError "takeGE: Expecting at least 4 elements, input terminated on 3") +-- +-- >>> Stream.parse (Parser.takeGE 4 Fold.toList) $ Stream.fromList [1,0,1,0,1] +-- Right [1,0,1,0,1] -- -- /Pre-release/ -- @@ -659,7 +870,19 @@ takeGE n (Fold fstep finitial fextract) = Parser step initial extract -- Conditional splitting ------------------------------------------------------------------------------- --- | See 'Streamly.Internal.Data.Parser.takeWhileP'. +-- XXX We should perhaps use only takeWhileP and rename it to takeWhile. + +-- | Like 'takeWhile' but uses a 'Parser' instead of a 'Fold' to collect the +-- input. The combinator stops when the condition fails or if the collecting +-- parser stops. +-- +-- Other interesting parsers can be implemented in terms of this parser: +-- +-- >>> takeWhile1 cond p = Parser.takeWhileP cond (Parser.takeBetween 1 maxBound p) +-- >>> takeWhileBetween cond m n p = Parser.takeWhileP cond (Parser.takeBetween m n p) +-- +-- Stops: when the condition fails or the collecting parser stops. +-- Fails: when the collecting parser fails. -- -- /Pre-release/ -- @@ -682,12 +905,26 @@ takeWhileP predicate (Parser pstep pinitial pextract) = Partial _ _ -> error "Bug: takeWhileP: Partial in extract" Continue n s1 -> return $ Continue (n + 1) s1 --- | See 'Streamly.Internal.Data.Parser.takeWhile'. +-- | Collect stream elements until an element fails the predicate. The element +-- on which the predicate fails is returned back to the input stream. -- --- /Pre-release/ +-- * Stops - when the predicate fails or the collecting fold stops. +-- * Fails - never. +-- +-- >>> Stream.parse (Parser.takeWhile (== 0) Fold.toList) $ Stream.fromList [0,0,1,0,1] +-- Right [0,0] +-- +-- >>> takeWhile cond f = Parser.takeWhileP cond (Parser.fromFold f) +-- +-- We can implement a @breakOn@ using 'takeWhile': +-- +-- @ +-- breakOn p = takeWhile (not p) +-- @ -- {-# INLINE takeWhile #-} takeWhile :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b +-- takeWhile cond f = takeWhileP cond (fromFold f) takeWhile predicate (Fold fstep finitial fextract) = Parser step initial extract @@ -711,12 +948,13 @@ takeWhile predicate (Fold fstep finitial fextract) = extract s = fmap (Done 0) (fextract s) --- | See 'Streamly.Internal.Data.Parser.takeWhile1'. +-- | Like 'takeWhile' but takes at least one element otherwise fails. -- --- /Pre-release/ +-- >>> takeWhile1 cond p = Parser.takeWhileP cond (Parser.takeBetween 1 maxBound p) -- {-# INLINE takeWhile1 #-} takeWhile1 :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b +-- takeWhile1 cond f = takeWhileP cond (takeBetween 1 maxBound f) takeWhile1 predicate (Fold fstep finitial fextract) = Parser step initial extract @@ -753,6 +991,17 @@ takeWhile1 predicate (Fold fstep finitial fextract) = extract (Left' _) = return $ Error "takeWhile1: end of input" extract (Right' s) = fmap (Done 0) (fextract s) +-- | Drain the input as long as the predicate succeeds, running the effects and +-- discarding the results. +-- +-- This is also called @skipWhile@ in some parsing libraries. +-- +-- >>> dropWhile p = Parser.takeWhile p Fold.drain +-- +{-# INLINE dropWhile #-} +dropWhile :: Monad m => (a -> Bool) -> Parser a m () +dropWhile p = takeWhile p FL.drain + ------------------------------------------------------------------------------- -- Separators ------------------------------------------------------------------------------- @@ -760,6 +1009,7 @@ takeWhile1 predicate (Fold fstep finitial fextract) = data FramedEscState s = FrameEscInit !s | FrameEscGo !s !Int | FrameEscEsc !s !Int +-- XXX We can remove Maybe from esc {-# INLINE takeFramedByGeneric #-} takeFramedByGeneric :: Monad m => Maybe (a -> Bool) @@ -857,12 +1107,26 @@ takeFramedByGeneric esc begin end (Fold fstep finitial fextract) = Nothing -> err "takeFramedByGeneric: missing closing frame" extract (FrameEscEsc _ _) = err "takeFramedByGeneric: trailing escape" --- | See 'Streamly.Internal.Data.Parser.takeEndBy'. +-- | @takeEndBy cond parser@ parses a token that ends by a separator chosen by +-- the supplied predicate. The separator is also taken with the token. +-- +-- This can be combined with other parsers to implement other interesting +-- parsers as follows: +-- +-- >>> takeEndByLE cond n p = Parser.takeEndBy cond (Parser.fromFold $ Fold.take n p) +-- >>> takeEndByBetween cond m n p = Parser.takeEndBy cond (Parser.takeBetween m n p) +-- +-- >>> takeEndBy = Parser.takeEndByEsc (const False) +-- +-- See also "Streamly.Data.Fold.takeEndBy". Unlike the fold, the collecting +-- parser in the takeEndBy parser can decide whether to fail or not if the +-- stream does not end with separator. -- -- /Pre-release/ -- {-# INLINE takeEndBy #-} takeEndBy :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b +-- takeEndBy = takeEndByEsc (const False) takeEndBy cond (Parser pstep pinitial pextract) = Parser step initial pextract @@ -877,10 +1141,11 @@ takeEndBy cond (Parser pstep pinitial pextract) = then return res else extractStep pextract res --- | See 'Streamly.Internal.Data.Parser.takeEndByEsc'. --- --- /Pre-release/ +-- | Like 'takeEndBy' but the separator elements can be escaped using an +-- escape char determined by the first predicate. The escape characters are +-- removed. -- +-- /pre-release/ {-# INLINE takeEndByEsc #-} takeEndByEsc :: Monad m => (a -> Bool) -> (a -> Bool) -> Parser a m b -> Parser a m b @@ -909,12 +1174,18 @@ takeEndByEsc isEsc isSep (Parser pstep pinitial pextract) = extract (Right' _) = return $ Error "takeEndByEsc: trailing escape" --- | See 'Streamly.Internal.Data.Parser.takeEndBy_'. +-- | Like 'takeEndBy' but the separator is dropped. +-- +-- See also "Streamly.Data.Fold.takeEndBy_". -- -- /Pre-release/ -- {-# INLINE takeEndBy_ #-} takeEndBy_ :: (a -> Bool) -> Parser a m b -> Parser a m b +{- +takeEndBy_ isEnd p = + takeFramedByGeneric Nothing Nothing (Just isEnd) (toFold p) +-} takeEndBy_ cond (Parser pstep pinitial pextract) = Parser step pinitial pextract @@ -926,11 +1197,39 @@ takeEndBy_ cond (Parser pstep pinitial pextract) = then pextract s else pstep s a --- | See 'Streamly.Internal.Data.Parser.takeStartBy'. +-- | Take either the separator or the token. Separator is a Left value and +-- token is Right value. +-- +-- /Unimplemented/ +{-# INLINE takeEitherSepBy #-} +takeEitherSepBy :: -- Monad m => + (a -> Bool) -> Fold m (Either a b) c -> Parser a m c +takeEitherSepBy _cond = undefined -- D.toParserK . D.takeEitherSepBy cond + +-- | Parse a token that starts with an element chosen by the predicate. The +-- parser fails if the input does not start with the selected element. +-- +-- * Stops - when the predicate succeeds in non-leading position. +-- * Fails - when the predicate fails in the leading position. +-- +-- >>> splitWithPrefix p f = Stream.parseMany (Parser.takeStartBy p f) +-- +-- Examples: - +-- +-- >>> p = Parser.takeStartBy (== ',') Fold.toList +-- >>> leadingComma = Stream.parse p . Stream.fromList +-- >>> leadingComma "a,b" +-- Left (ParseError "takeStartBy: missing frame start") +-- ... +-- >>> leadingComma ",," +-- Right "," +-- >>> leadingComma ",a,b" +-- Right ",a" +-- >>> leadingComma "" +-- Right "" -- -- /Pre-release/ -- - {-# INLINE takeStartBy #-} takeStartBy :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b takeStartBy cond (Fold fstep finitial fextract) = @@ -966,9 +1265,40 @@ takeStartBy cond (Fold fstep finitial fextract) = extract (Left' s) = fmap (Done 0) $ fextract s extract (Right' s) = fmap (Done 0) $ fextract s +-- | Like 'takeStartBy' but drops the separator. +-- +-- >>> takeStartBy_ isBegin = Parser.takeFramedByGeneric Nothing (Just isBegin) Nothing +-- +{-# INLINE takeStartBy_ #-} +takeStartBy_ :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b +takeStartBy_ isBegin = takeFramedByGeneric Nothing (Just isBegin) Nothing + +-- | @takeFramedByEsc_ isEsc isBegin isEnd fold@ parses a token framed using a +-- begin and end predicate, and an escape character. The frame begin and end +-- characters lose their special meaning if preceded by the escape character. +-- +-- Nested frames are allowed if begin and end markers are different, nested +-- frames must be balanced unless escaped, nested frame markers are emitted as +-- it is. +-- +-- For example, +-- +-- >>> p = Parser.takeFramedByEsc_ (== '\\') (== '{') (== '}') Fold.toList +-- >>> Stream.parse p $ Stream.fromList "{hello}" +-- Right "hello" +-- >>> Stream.parse p $ Stream.fromList "{hello {world}}" +-- Right "hello {world}" +-- >>> Stream.parse p $ Stream.fromList "{hello \\{world}" +-- Right "hello {world" +-- >>> Stream.parse p $ Stream.fromList "{hello {world}" +-- Left (ParseError "takeFramedByEsc_: missing frame end") +-- +-- /Pre-release/ {-# INLINE takeFramedByEsc_ #-} takeFramedByEsc_ :: Monad m => (a -> Bool) -> (a -> Bool) -> (a -> Bool) -> Fold m a b -> Parser a m b +-- takeFramedByEsc_ isEsc isEnd p = +-- takeFramedByGeneric (Just isEsc) Nothing (Just isEnd) (toFold p) takeFramedByEsc_ isEsc isBegin isEnd (Fold fstep finitial fextract) = Parser step initial extract @@ -1017,9 +1347,16 @@ takeFramedByEsc_ isEsc isBegin isEnd (Fold fstep finitial fextract) = data FramedState s = FrameInit !s | FrameGo !s Int +-- | @takeFramedBy_ isBegin isEnd fold@ parses a token framed by a begin and an +-- end predicate. +-- +-- >>> takeFramedBy_ = Parser.takeFramedByEsc_ (const False) +-- {-# INLINE takeFramedBy_ #-} takeFramedBy_ :: Monad m => (a -> Bool) -> (a -> Bool) -> Fold m a b -> Parser a m b +-- takeFramedBy_ isBegin isEnd = +-- takeFramedByGeneric (Just (const False)) (Just isBegin) (Just isEnd) takeFramedBy_ isBegin isEnd (Fold fstep finitial fextract) = Parser step initial extract @@ -1064,8 +1401,24 @@ takeFramedBy_ isBegin isEnd (Fold fstep finitial fextract) = data WordByState s b = WBLeft !s | WBWord !s | WBRight !b --- | See 'Streamly.Internal.Data.Parser.wordBy'. +-- Note we can also get words using something like: +-- sepBy FL.toList (takeWhile (not . p) Fold.toList) (dropWhile p) -- +-- But that won't be as efficient and ergonomic. + +-- | Like 'splitOn' but strips leading, trailing, and repeated separators. +-- Therefore, @".a..b."@ having '.' as the separator would be parsed as +-- @["a","b"]@. In other words, its like parsing words from whitespace +-- separated text. +-- +-- * Stops - when it finds a word separator after a non-word element +-- * Fails - never. +-- +-- >>> wordBy = Parser.wordFramedBy (const False) (const False) (const False) +-- +-- @ +-- S.wordsBy pred f = S.parseMany (PR.wordBy pred f) +-- @ -- {-# INLINE wordBy #-} wordBy :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b @@ -1114,7 +1467,24 @@ data WordFramedState s b = | WordFramedEsc !s !Int | WordFramedSkipPost !b --- | See 'Streamly.Internal.Data.Parser.wordFramedBy' +-- | Like 'wordBy' but treats anything inside a pair of quotes as a single +-- word, the quotes can be escaped by an escape character. Recursive quotes +-- are possible if quote begin and end characters are different, quotes must be +-- balanced. Outermost quotes are stripped. +-- +-- >>> braces = Parser.wordFramedBy (== '\\') (== '{') (== '}') isSpace Fold.toList +-- >>> Stream.parse braces $ Stream.fromList "{ab} cd" +-- Right "ab" +-- >>> Stream.parse braces $ Stream.fromList "{ab}{cd}" +-- Right "abcd" +-- >>> Stream.parse braces $ Stream.fromList "a{b} cd" +-- Right "ab" +-- >>> Stream.parse braces $ Stream.fromList "a{{b}} cd" +-- Right "a{b}" +-- +-- >>> quotes = Parser.wordFramedBy (== '\\') (== '"') (== '"') isSpace Fold.toList +-- >>> Stream.parse quotes $ Stream.fromList "\"a\"\"b\"" +-- Right "ab" -- {-# INLINE wordFramedBy #-} wordFramedBy :: Monad m => @@ -1206,6 +1576,26 @@ data WordQuotedState s b a = | WordQuotedEsc !s !Int a | WordQuotedSkipPost !b +-- | Like 'wordFramedBy' but the closing quote is determined by the opening +-- quote. The first quote begin starts a quote that is closed by its +-- corresponding closing quote. +-- +-- 'wordFramedBy' and 'wordQuotedBy' both allow multiple quote characters based +-- on the predicates but 'wordQuotedBy' always fixes the quote at the first +-- occurrence and then it is closed only by the corresponding closing quote. +-- Therefore, other quoting characters can be embedded inside it as normal +-- characters. On the other hand, 'wordFramedBy' would close the quote as soon +-- as it encounters any of the closing quotes. +-- +-- >>> q = (`elem` ['"', '\'']) +-- >>> p kQ = Parser.wordQuotedBy kQ (== '\\') q q id isSpace Fold.toList +-- +-- >>> Stream.parse (p False) $ Stream.fromList "a\"b'c\";'d\"e'f ghi" +-- Right "ab'c;d\"ef" +-- +-- >>> Stream.parse (p True) $ Stream.fromList "a\"b'c\";'d\"e'f ghi" +-- Right "a\"b'c\";'d\"e'f" +-- {-# INLINE wordQuotedBy #-} wordQuotedBy :: (Monad m, Eq a) => Bool -- ^ keep the quotes in the output @@ -1313,7 +1703,31 @@ data GroupByState a s = GroupByInit !s | GroupByGrouping !a !s --- | See 'Streamly.Internal.Data.Parser.groupBy'. +-- | Given an input stream @[a,b,c,...]@ and a comparison function @cmp@, the +-- parser assigns the element @a@ to the first group, then if @a \`cmp` b@ is +-- 'True' @b@ is also assigned to the same group. If @a \`cmp` c@ is 'True' +-- then @c@ is also assigned to the same group and so on. When the comparison +-- fails the parser is terminated. Each group is folded using the 'Fold' @f@ and +-- the result of the fold is the result of the parser. +-- +-- * Stops - when the comparison fails. +-- * Fails - never. +-- +-- >>> :{ +-- runGroupsBy eq = +-- Stream.fold Fold.toList +-- . Stream.parseMany (Parser.groupBy eq Fold.toList) +-- . Stream.fromList +-- :} +-- +-- >>> runGroupsBy (<) [] +-- [] +-- +-- >>> runGroupsBy (<) [1] +-- [Right [1]] +-- +-- >>> runGroupsBy (<) [3, 5, 4, 1, 2, 0] +-- [Right [3,5,4],Right [1,2],Right [0]] -- {-# INLINE groupBy #-} groupBy :: Monad m => (a -> a -> Bool) -> Fold m a b -> Parser a m b @@ -1345,7 +1759,35 @@ groupBy eq (Fold fstep finitial fextract) = Parser step initial extract extract (GroupByInit s) = fmap (Done 0) $ fextract s extract (GroupByGrouping _ s) = fmap (Done 0) $ fextract s --- | See 'Streamly.Internal.Data.Parser.groupByRolling'. +-- | Unlike 'groupBy' this combinator performs a rolling comparison of two +-- successive elements in the input stream. Assuming the input stream +-- is @[a,b,c,...]@ and the comparison function is @cmp@, the parser +-- first assigns the element @a@ to the first group, then if @a \`cmp` b@ is +-- 'True' @b@ is also assigned to the same group. If @b \`cmp` c@ is 'True' +-- then @c@ is also assigned to the same group and so on. When the comparison +-- fails the parser is terminated. Each group is folded using the 'Fold' @f@ and +-- the result of the fold is the result of the parser. +-- +-- * Stops - when the comparison fails. +-- * Fails - never. +-- +-- >>> :{ +-- runGroupsByRolling eq = +-- Stream.fold Fold.toList +-- . Stream.parseMany (Parser.groupByRolling eq Fold.toList) +-- . Stream.fromList +-- :} +-- +-- >>> runGroupsByRolling (<) [] +-- [] +-- +-- >>> runGroupsByRolling (<) [1] +-- [Right [1]] +-- +-- >>> runGroupsByRolling (<) [3, 5, 4, 1, 2, 0] +-- [Right [3,5],Right [4],Right [1,2],Right [0]] +-- +-- /Pre-release/ -- {-# INLINE groupByRolling #-} groupByRolling :: Monad m => (a -> a -> Bool) -> Fold m a b -> Parser a m b @@ -1384,6 +1826,16 @@ data GroupByStatePair a s1 s2 | GroupByGroupingPairL !a !s1 !s2 | GroupByGroupingPairR !a !s1 !s2 +-- | Like 'groupByRolling', but if the predicate is 'True' then collects using +-- the first fold as long as the predicate holds 'True', if the predicate is +-- 'False' collects using the second fold as long as it remains 'False'. +-- Returns 'Left' for the first case and 'Right' for the second case. +-- +-- For example, if we want to detect sorted sequences in a stream, both +-- ascending and descending cases we can use 'groupByRollingEither (<=) +-- Fold.toList Fold.toList'. +-- +-- /Pre-release/ {-# INLINE groupByRollingEither #-} groupByRollingEither :: Monad m => (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser a m (Either b c) @@ -1468,13 +1920,26 @@ groupByRollingEither -- XXX use an Unfold instead of a list? -- XXX custom combinators for matching list, array and stream? -- XXX rename to listBy? + +-- | Match the given sequence of elements using the given comparison function. +-- Returns the original sequence if successful. -- --- | See 'Streamly.Internal.Data.Parser.streamEqBy'. +-- Definition: -- --- /Pre-release/ +-- >>> listEqBy cmp xs = Parser.streamEqBy cmp (Stream.fromList xs) *> Parser.fromPure xs +-- +-- Examples: +-- +-- >>> Stream.parse (Parser.listEqBy (==) "string") $ Stream.fromList "string" +-- Right "string" +-- +-- >>> Stream.parse (Parser.listEqBy (==) "mismatch") $ Stream.fromList "match" +-- Left (ParseError "streamEqBy: mismtach occurred") -- {-# INLINE listEqBy #-} listEqBy :: Monad m => (a -> a -> Bool) -> [a] -> Parser a m [a] +listEqBy cmp xs = streamEqBy cmp (D.fromList xs) *> fromPure xs +{- listEqBy cmp str = Parser step initial extract where @@ -1501,8 +1966,13 @@ listEqBy cmp str = Parser step initial extract $ Error $ "listEqBy: end of input, yet to match " ++ show (length xs) ++ " elements" +-} --- | Like 'listEqBy' but uses a stream instead of a list +-- | Like 'listEqBy' but uses a stream instead of a list and does not return +-- the stream. +-- +-- See also: "Streamly.Data.Stream.streamEqBy" +-- {-# INLINE streamEqBy #-} streamEqBy :: Monad m => (a -> a -> Bool) -> D.Stream m a -> Parser a m () streamEqBy cmp (D.Stream sstep state) = Parser step initial extract @@ -1540,6 +2010,39 @@ streamEqBy cmp (D.Stream sstep state) = Parser step initial extract extract _ = return $ Error "streamEqBy: end of input" +-- Rename to "list". +-- | Match the input sequence with the supplied list and return it if +-- successful. +-- +-- >>> listEq = Parser.listEqBy (==) +-- +{-# INLINE listEq #-} +listEq :: (Monad m, Eq a) => [a] -> Parser a m [a] +listEq = listEqBy (==) + +-- | Match if the input stream is a subsequence of the argument stream i.e. all +-- the elements of the input stream occur, in order, in the argument stream. +-- The elements do not have to occur consecutively. A sequence is considered a +-- subsequence of itself. +{-# INLINE subsequenceBy #-} +subsequenceBy :: -- Monad m => + (a -> a -> Bool) -> Stream m a -> Parser a m () +subsequenceBy = undefined + +{- +-- Should go in Data.Parser.Regex in streamly package so that it can depend on +-- regex backends. +{-# INLINE regexPosix #-} +regexPosix :: -- Monad m => + Regex -> Parser m a (Maybe (Array (MatchOffset, MatchLength))) +regexPosix = undefined + +{-# INLINE regexPCRE #-} +regexPCRE :: -- Monad m => + Regex -> Parser m a (Maybe (Array (MatchOffset, MatchLength))) +regexPCRE = undefined +-} + ------------------------------------------------------------------------------- -- Transformations on input ------------------------------------------------------------------------------- @@ -1548,7 +2051,7 @@ streamEqBy cmp (D.Stream sstep state) = Parser step initial extract -- parser can always return a Continue in initial when we feed the fold's -- initial result to it. We can work this around for postscan by introducing an -- initial state and calling "initial" only on the first input. --- + -- | Stateful scan on the input of a parser using a Fold. -- -- /Unimplemented/ @@ -1709,7 +2212,16 @@ spanByRolling eq f1 f2 = -- nested parsers ------------------------------------------------------------------------------- --- | See 'Streamly.Internal.Data.Parser.takeP'. +-- | Takes at-most @n@ input elements. +-- +-- * Stops - when the collecting parser stops. +-- * Fails - when the collecting parser fails. +-- +-- >>> Stream.parse (Parser.takeP 4 (Parser.takeEQ 2 Fold.toList)) $ Stream.fromList [1, 2, 3, 4, 5] +-- Right [1,2] +-- +-- >>> Stream.parse (Parser.takeP 4 (Parser.takeEQ 5 Fold.toList)) $ Stream.fromList [1, 2, 3, 4, 5] +-- Left (ParseError "takeEQ: Expecting exactly 5 elements, input terminated on 4") -- -- /Internal/ {-# INLINE takeP #-} @@ -1783,9 +2295,7 @@ takeP lim (Parser pstep pinitial pextract) = Parser step initial extract Error err -> IError err _ -> error "Bug: takeP invalid state in initial" --- | See 'Streamly.Internal.Data.Parser.lookahead'. --- --- /Pre-release/ +-- | Run a parser without consuming the input. -- {-# INLINE lookAhead #-} lookAhead :: Monad m => Parser a m b -> Parser a m b @@ -1822,14 +2332,32 @@ lookAhead (Parser step1 initial1 _) = Parser step initial extract ------------------------------------------------------------------------------- -- Interleaving ------------------------------------------------------------------------------- +-- +-- To deinterleave we can chain two parsers one behind the other. The input is +-- given to the first parser and the input definitively rejected by the first +-- parser is given to the second parser. +-- +-- We can either have the parsers themselves buffer the input or use the shared +-- global buffer to hold it until none of the parsers need it. When the first +-- parser returns Skip (i.e. rewind) we let the second parser consume the +-- rejected input and when it is done we move the cursor forward to the first +-- parser again. This will require a "move forward" command as well. +-- +-- To implement grep we can use three parsers, one to find the pattern, one +-- to store the context behind the pattern and one to store the context in +-- front of the pattern. When a match occurs we need to emit the accumulator of +-- all the three parsers. One parser can count the line numbers to provide the +-- line number info. data DeintercalateState fs sp ss = DeintercalateL !fs !sp | DeintercalateR !fs !ss !Bool --- | See 'Streamly.Internal.Data.Parser.deintercalate'. --- --- /Internal/ +-- XXX rename this to intercalate + +-- | Apply two parsers alternately to an input stream. The input stream is +-- considered an interleaving of two patterns. The two parsers represent the +-- two patterns. -- {-# INLINE deintercalate #-} deintercalate :: Monad m => @@ -1926,8 +2454,13 @@ data SepByState fs sp ss = SepByInit !fs !sp | SepBySeparator !fs !ss !Bool --- This is a special case of deintercalate and can be easily implemented in --- terms of deintercalate. +-- | Run the content parser first, when it is done, the separator parser is +-- run, when it is done content parser is run again and so on. If none of the +-- parsers consumes an input then parser returns a failure. +-- +-- >>> sepBy p1 p2 sink = Parser.deintercalate p1 p2 (Fold.catLefts sink) +-- >>> sepBy content sep sink = Parser.sepBy1 content sep sink <|> return mempty +-- {-# INLINE sepBy #-} sepBy :: Monad m => Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c @@ -2000,11 +2533,49 @@ sepBy extract (SepBySeparator fs _ _) = fmap (Done 0) $ fextract fs +-- XXX This can be implemented using refold, parse one and then continue +-- collecting the rest in that. + +-- | Parse items separated by a separator parsed by the supplied parser. At +-- least one item must be present for the parser to succeed. +-- +-- Note that this can go in infinite loop if both the parsers fail on some +-- input. Detection of that would make the implementation more complex. +-- +{-# INLINE sepBy1 #-} +sepBy1 :: Monad m => + Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c +sepBy1 p sep sink = do + x <- p + f <- fromEffect $ FL.reduce sink + f1 <- fromEffect $ FL.snoc f x + many (sep >> p) f1 + +------------------------------------------------------------------------------- +-- Interleaving a collection of parsers +------------------------------------------------------------------------------- +-- +-- | Apply a collection of parsers to an input stream in a round robin fashion. +-- Each parser is applied until it stops and then we repeat starting with the +-- the first parser again. +-- +-- /Unimplemented/ +-- +{-# INLINE roundRobin #-} +roundRobin :: -- (Foldable t, Monad m) => + t (Parser a m b) -> Fold m b c -> Parser a m c +roundRobin _ps _f = undefined + ------------------------------------------------------------------------------- -- Sequential Collection ------------------------------------------------------------------------------- + +-- | @sequence f p@ collects sequential parses of parsers in a +-- serial stream @p@ using the fold @f@. Fails if the input ends or any +-- of the parsers fail. +-- +-- /Pre-release/ -- --- | See 'Streamly.Internal.Data.Parser.sequence'. {-# INLINE sequence #-} sequence :: Monad m => D.Stream m (Parser a m b) -> Fold m b c -> Parser a m c @@ -2089,7 +2660,10 @@ sequence (D.Stream sstep sstate) (Fold fstep finitial fextract) = ------------------------------------------------------------------------------- {- --- | See 'Streamly.Internal.Data.Parser.choice'. +-- | @choice parsers@ applies the @parsers@ in order and returns the first +-- successful parse. +-- +-- This is same as 'asum' but more efficient. -- -- /Broken/ -- @@ -2102,23 +2676,45 @@ choice = foldl1 shortest -- Sequential Repetition ------------------------------------------------------------------------------- +-- | Like 'many' but uses a 'Parser' instead of a 'Fold' to collect the +-- results. Parsing stops or fails if the collecting parser stops or fails. +-- +-- /Unimplemented/ +-- {-# INLINE manyP #-} manyP :: -- MonadCatch m => Parser a m b -> Parser b m c -> Parser a m c manyP _p _f = undefined --- | See 'Streamly.Internal.Data.Parser.many'. +-- | Collect zero or more parses. Apply the supplied parser repeatedly on the +-- input stream and push the parse results to a downstream fold. -- --- /Pre-release/ +-- Stops: when the downstream fold stops or the parser fails. +-- Fails: never, produces zero or more results. +-- +-- >>> many = Parser.countBetween 0 maxBound +-- +-- Compare with 'Control.Applicative.many'. -- {-# INLINE many #-} many :: Monad m => Parser a m b -> Fold m b c -> Parser a m c many = splitMany -- many = countBetween 0 maxBound --- | See 'Streamly.Internal.Data.Parser.some'. +-- Note: many1 would perhaps be a better name for this and consistent with +-- other names like takeWhile1. But we retain the name "some" for +-- compatibility. + +-- | Collect one or more parses. Apply the supplied parser repeatedly on the +-- input stream and push the parse results to a downstream fold. -- --- /Pre-release/ +-- Stops: when the downstream fold stops or the parser fails. +-- Fails: if it stops without producing a single result. +-- +-- >>> some p f = Parser.manyP p (Parser.takeGE 1 f) +-- >>> some = Parser.countBetween 1 maxBound +-- +-- Compare with 'Control.Applicative.some'. -- {-# INLINE some #-} some :: Monad m => Parser a m b -> Fold m b c -> Parser a m c @@ -2126,7 +2722,11 @@ some = splitSome -- some p f = manyP p (takeGE 1 f) -- some = countBetween 1 maxBound --- | See 'Streamly.Internal.Data.Parser.countBetween'. +-- | @countBetween m n f p@ collects between @m@ and @n@ sequential parses of +-- parser @p@ using the fold @f@. Stop after collecting @n@ results. Fails if +-- the input ends or the parser fails before @m@ results are collected. +-- +-- >>> countBetween m n p f = Parser.manyP p (Parser.takeBetween m n f) -- -- /Unimplemented/ -- @@ -2136,7 +2736,12 @@ countBetween :: -- MonadCatch m => countBetween _m _n _p = undefined -- countBetween m n p f = manyP p (takeBetween m n f) --- | See 'Streamly.Internal.Data.Parser.count'. +-- | @count n f p@ collects exactly @n@ sequential parses of parser @p@ using +-- the fold @f@. Fails if the input ends or the parser fails before @n@ +-- results are collected. +-- +-- >>> count n = Parser.countBetween n n +-- >>> count n p f = Parser.manyP p (Parser.takeEQ n f) -- -- /Unimplemented/ -- @@ -2146,13 +2751,34 @@ count :: -- MonadCatch m => count n = countBetween n n -- count n p f = manyP p (takeEQ n f) +-- | Like 'manyTill' but uses a 'Parser' to collect the results instead of a +-- 'Fold'. Parsing stops or fails if the collecting parser stops or fails. +-- +-- We can implemnent parsers like the following using 'manyTillP': +-- +-- @ +-- countBetweenTill m n f p = manyTillP (takeBetween m n f) p +-- @ +-- +-- /Unimplemented/ +-- +{-# INLINE manyTillP #-} +manyTillP :: -- Monad m => + Parser a m b -> Parser a m x -> Parser b m c -> Parser a m c +manyTillP _p1 _p2 _f = undefined + -- D.toParserK $ D.manyTillP (D.fromParserK p1) (D.fromParserK p2) f + data ManyTillState fs sr sl = ManyTillR Int fs sr | ManyTillL Int fs sl --- | See 'Streamly.Internal.Data.Parser.manyTill'. +-- | @manyTill chunking test f@ tries the parser @test@ on the input, if @test@ +-- fails it backtracks and tries @chunking@, after @chunking@ succeeds @test@ is +-- tried again and so on. The parser stops when @test@ succeeds. The output of +-- @test@ is discarded and the output of @chunking@ is accumulated by the +-- supplied fold. The parser fails if @chunking@ fails. -- --- /Pre-release/ +-- Stops when the fold @f@ stops. -- {-# INLINE manyTill #-} manyTill :: Monad m @@ -2245,3 +2871,53 @@ manyTill (Parser stepL initialL extractL) return $ Continue n (ManyTillL 0 fs s) Partial _ _ -> error "Partial in extract" extract (ManyTillR _ fs _) = fmap (Done 0) $ fextract fs + +-- | @manyThen f collect recover@ repeats the parser @collect@ on the input and +-- collects the output in the supplied fold. If the the parser @collect@ fails, +-- parser @recover@ is run until it stops and then we start repeating the +-- parser @collect@ again. The parser fails if the recovery parser fails. +-- +-- For example, this can be used to find a key frame in a video stream after an +-- error. +-- +-- /Unimplemented/ +-- +{-# INLINE manyThen #-} +manyThen :: -- (Foldable t, Monad m) => + Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c +manyThen _parser _recover _f = undefined + +------------------------------------------------------------------------------- +-- Repeated Alternatives +------------------------------------------------------------------------------- + +-- | Keep trying a parser up to a maximum of @n@ failures. When the parser +-- fails the input consumed till now is dropped and the new instance is tried +-- on the fresh input. +-- +-- /Unimplemented/ +-- +{-# INLINE retryMaxTotal #-} +retryMaxTotal :: -- (Monad m) => + Int -> Parser a m b -> Fold m b c -> Parser a m c +retryMaxTotal _n _p _f = undefined + +-- | Like 'retryMaxTotal' but aborts after @n@ successive failures. +-- +-- /Unimplemented/ +-- +{-# INLINE retryMaxSuccessive #-} +retryMaxSuccessive :: -- (Monad m) => + Int -> Parser a m b -> Fold m b c -> Parser a m c +retryMaxSuccessive _n _p _f = undefined + +-- | Keep trying a parser until it succeeds. When the parser fails the input +-- consumed till now is dropped and the new instance is tried on the fresh +-- input. +-- +-- /Unimplemented/ +-- +{-# INLINE retry #-} +retry :: -- (Monad m) => + Parser a m b -> Parser a m b +retry _p = undefined diff --git a/core/src/Streamly/Internal/Data/Parser/ParserD/Type.hs b/core/src/Streamly/Internal/Data/Parser/ParserD/Type.hs index 7ffcd9602..0a10b92eb 100644 --- a/core/src/Streamly/Internal/Data/Parser/ParserD/Type.hs +++ b/core/src/Streamly/Internal/Data/Parser/ParserD/Type.hs @@ -214,6 +214,7 @@ import Fusion.Plugin.Types (Fuse(..)) import Streamly.Internal.Data.Fold.Type (Fold(..), toList) import Streamly.Internal.Data.Tuple.Strict (Tuple3'(..)) +import qualified Control.Monad.Fail as Fail import qualified Streamly.Internal.Data.Fold.Type as FL import qualified Streamly.Internal.Data.Parser.ParserK.Type as K @@ -224,6 +225,7 @@ import Prelude hiding (concatMap, filter) -- >>> import Control.Applicative ((<|>)) -- >>> import Data.Bifunctor (second) -- >>> import Prelude hiding (concatMap) +-- >>> import qualified Streamly.Data.Fold as Fold -- >>> import qualified Streamly.Data.Stream as Stream -- >>> import qualified Streamly.Internal.Data.Stream as Stream (parse) -- >>> import qualified Streamly.Internal.Data.Parser as Parser @@ -625,9 +627,9 @@ fromParserK parser = Parser step initial extract -- Mapping on the output ------------------------------------------------------------------------------ --- | Map a monadic function on the output of a parser. +-- | @rmapM f parser@ maps the monadic function @f@ on the output of the parser. -- --- /Pre-release/ +-- >>> rmap = fmap {-# INLINE rmapM #-} rmapM :: Monad m => (b -> m c) -> Parser a m b -> Parser a m c rmapM f (Parser step initial extract) = @@ -644,17 +646,14 @@ rmapM f (Parser step initial extract) = IError err -> return $ IError err step1 s a = step s a >>= mapMStep f --- | See 'Streamly.Internal.Data.Parser.fromPure'. --- --- /Pre-release/ +-- | A parser that always yields a pure value without consuming any input. -- {-# INLINE_NORMAL fromPure #-} fromPure :: Monad m => b -> Parser a m b fromPure b = Parser undefined (pure $ IDone b) undefined --- | See 'Streamly.Internal.Data.Parser.fromEffect'. --- --- /Pre-release/ +-- | A parser that always yields the result of an effectful action without +-- consuming any input. -- {-# INLINE fromEffect #-} fromEffect :: Monad m => m b -> Parser a m b @@ -667,14 +666,52 @@ fromEffect b = Parser undefined (IDone <$> b) undefined {-# ANN type SeqParseState Fuse #-} data SeqParseState sl f sr = SeqParseL sl | SeqParseR f sr --- | See 'Streamly.Internal.Data.Parser.splitWith'. --- -- Note: this implementation of splitWith is fast because of stream fusion but -- has quadratic time complexity, because each composition adds a new branch -- that each subsequent parse's input element has to go through, therefore, it -- cannot scale to a large number of compositions. After around 100 -- compositions the performance starts dipping rapidly beyond a CPS style -- unfused implementation. + +-- | Sequential parser application. Apply two parsers sequentially to an input +-- stream. The input is provided to the first parser, when it is done the +-- remaining input is provided to the second parser. If both the parsers +-- succeed their outputs are combined using the supplied function. The +-- operation fails if any of the parsers fail. +-- +-- Note: This is a parsing dual of appending streams using +-- 'Streamly.Data.Stream.append', it splits the streams using two parsers and zips +-- the results. +-- +-- This implementation is strict in the second argument, therefore, the +-- following will fail: +-- +-- >>> Stream.parse (Parser.splitWith const (Parser.satisfy (> 0)) undefined) $ Stream.fromList [1] +-- *** Exception: Prelude.undefined +-- ... +-- +-- Compare with 'Applicative' instance method '<*>'. This implementation allows +-- stream fusion but has quadratic complexity. This can fuse with other +-- operations and can be faster than 'Applicative' instance for small number +-- (less than 8) of compositions. +-- +-- Many combinators can be expressed using @splitWith@ and other parser +-- primitives. Some common idioms are described below, +-- +-- @ +-- span :: (a -> Bool) -> Fold m a b -> Fold m a b -> Parser a m b +-- span pred f1 f2 = splitWith (,) ('takeWhile' pred f1) ('fromFold' f2) +-- @ +-- +-- @ +-- spanBy :: (a -> a -> Bool) -> Fold m a b -> Fold m a b -> Parser a m b +-- spanBy eq f1 f2 = splitWith (,) ('groupBy' eq f1) ('fromFold' f2) +-- @ +-- +-- @ +-- spanByRolling :: (a -> a -> Bool) -> Fold m a b -> Fold m a b -> Parser a m b +-- spanByRolling eq f1 f2 = splitWith (,) ('groupByRolling' eq f1) ('fromFold' f2) +-- @ -- -- /Pre-release/ -- @@ -824,7 +861,24 @@ noErrorUnsafeSplitWith func (Parser stepL initialL extractL) data SeqAState sl sr = SeqAL sl | SeqAR sr -- This turns out to be slightly faster than splitWith --- | See 'Streamly.Internal.Data.Parser.split_'. + +-- | Sequential parser application ignoring the output of the first parser. +-- Apply two parsers sequentially to an input stream. The input is provided to +-- the first parser, when it is done the remaining input is provided to the +-- second parser. The output of the parser is the output of the second parser. +-- The operation fails if any of the parsers fail. +-- +-- This implementation is strict in the second argument, therefore, the +-- following will fail: +-- +-- >>> Stream.parse (Parser.split_ (Parser.satisfy (> 0)) undefined) $ Stream.fromList [1] +-- *** Exception: Prelude.undefined +-- ... +-- +-- Compare with 'Applicative' instance method '*>'. This implementation allows +-- stream fusion but has quadratic complexity. This can fuse with other +-- operations, and can be faster than 'Applicative' instance for small +-- number (less than 8) of compositions. -- -- /Pre-release/ -- @@ -962,7 +1016,20 @@ data AltParseState sl sr = AltParseL Int sl | AltParseR sr -- each subsequent alternative's input element has to go through, therefore, it -- cannot scale to a large number of compositions --- | See 'Streamly.Internal.Data.Parser.alt'. +-- | Sequential alternative. Apply the input to the first parser and return the +-- result if the parser succeeds. If the first parser fails then backtrack and +-- apply the same input to the second parser and return the result. +-- +-- Note: This implementation is not lazy in the second argument. The following +-- will fail: +-- +-- >> Stream.parse (Parser.satisfy (> 0) `Parser.alt` undefined) $ Stream.fromList [1..10] +-- *** Exception: Prelude.undefined +-- +-- Compare with ParserK 'Alternative' instance method '<|>'. This +-- implementation allows stream fusion but has quadratic complexity. This can +-- fuse with other operations and can be much faster than 'Alternative' +-- instance for small number (less than 8) of alternatives. -- -- /Time Complexity:/ O(n^2) where n is the number of compositions. -- @@ -1263,15 +1330,15 @@ splitSome (Parser step1 initial1 extract1) (Fold fstep finitial fextract) = assertM(n == cnt) return (Continue n (Tuple3' s1 0 (Right fs))) --- | See 'Streamly.Internal.Data.Parser.die'. --- --- /Pre-release/ +-- | A parser that always fails with an error message without consuming +-- any input. -- {-# INLINE_NORMAL die #-} die :: Monad m => String -> Parser a m b die err = Parser undefined (pure (IError err)) undefined --- | See 'Streamly.Internal.Data.Parser.dieM'. +-- | A parser that always fails with an effectful error message and without +-- consuming any input. -- -- /Pre-release/ -- @@ -1292,7 +1359,7 @@ dieM err = Parser undefined (IError <$> err) undefined -- Note: The implementation of '<|>' is not lazy in the second -- argument. The following code will fail: -- --- >>> Stream.parse (ParserD.toParserK $ ParserD.satisfy (> 0) <|> undefined) $ Stream.fromList [1..10] +-- >>> Stream.parse (ParserD.satisfy (> 0) <|> undefined) $ Stream.fromList [1..10] -- *** Exception: Prelude.undefined -- ... -- @@ -1314,7 +1381,7 @@ data ConcatParseState sl m a b = ConcatParseL sl | forall s. ConcatParseR (s -> a -> m (Step s b)) s (s -> m (Step s b)) --- | See 'Streamly.Internal.Data.Parser.concatMap'. +-- | Map a 'Parser' returning function on the result of a 'Parser'. -- -- /Pre-release/ -- @@ -1469,6 +1536,10 @@ instance Monad m => Monad (Parser a m) where {-# INLINE (>>) #-} (>>) = (*>) +instance Monad m => Fail.MonadFail (Parser a m) where + {-# INLINE fail #-} + fail = die + -- | See documentation of 'Streamly.Internal.Data.Parser.ParserK.Type.Parser'. -- instance Monad m => MonadPlus (Parser a m) where @@ -1486,6 +1557,13 @@ instance (Monad m, MonadIO m) => MonadIO (Parser a m) where -- Mapping on input ------------------------------------------------------------------------------ +-- | @lmap f parser@ maps the function @f@ on the input of the parser. +-- +-- >>> Stream.parse (Parser.lmap (\x -> x * x) (Parser.fromFold Fold.sum)) (Stream.enumerateFromTo 1 100) +-- Right 338350 +-- +-- > lmap = Parser.lmapM return +-- {-# INLINE lmap #-} lmap :: (a -> b) -> Parser b m r -> Parser a m r lmap f (Parser step begin done) = Parser step1 begin done @@ -1494,6 +1572,8 @@ lmap f (Parser step begin done) = Parser step1 begin done step1 x a = step x (f a) +-- | @lmapM f parser@ maps the monadic function @f@ on the input of the parser. +-- {-# INLINE lmapM #-} lmapM :: Monad m => (a -> m b) -> Parser b m r -> Parser a m r lmapM f (Parser step begin done) = Parser step1 begin done @@ -1502,6 +1582,11 @@ lmapM f (Parser step begin done) = Parser step1 begin done step1 x a = f a >>= step x +-- | Include only those elements that pass a predicate. +-- +-- >>> Stream.parse (Parser.filter (> 5) (Parser.fromFold Fold.sum)) $ Stream.fromList [1..10] +-- Right 40 +-- {-# INLINE filter #-} filter :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b filter f (Parser step initial extract) = Parser step1 initial extract diff --git a/core/src/Streamly/Internal/Data/Parser/ParserDK.hs b/core/src/Streamly/Internal/Data/Parser/ParserDK.hs new file mode 100644 index 000000000..2847f8661 --- /dev/null +++ b/core/src/Streamly/Internal/Data/Parser/ParserDK.hs @@ -0,0 +1,1544 @@ +{-# OPTIONS_GHC -Wno-orphans #-} + +-- | +-- Module : Streamly.Internal.Data.Parser +-- Copyright : (c) 2020 Composewell Technologies +-- License : BSD-3-Clause +-- Maintainer : streamly@composewell.com +-- Stability : pre-release +-- Portability : GHC +-- +-- Fast backtracking parsers with stream fusion and native streaming +-- capability. +-- +-- 'Applicative' and 'Control.Applicative.Alternative' type class based +-- combinators from the +-- +-- package can also be used with the 'Parser' type. However, there are two +-- important differences between @parser-combinators@ and the equivalent ones +-- provided in this module in terms of performance: +-- +-- 1) @parser-combinators@ use plain Haskell lists to collect the results, in a +-- strict Monad like IO, the results are necessarily buffered before they can +-- be consumed. This may not perform optimally in streaming applications +-- processing large amounts of data. Equivalent combinators in this module can +-- consume the results of parsing using a 'Fold', thus providing a scalability +-- and a composable consumer. +-- +-- 2) Several combinators in this module can be many times faster because of +-- stream fusion. For example, 'Streamly.Internal.Data.Parser.many' combinator +-- in this module is much faster than the 'Control.Applicative.many' combinator +-- of 'Control.Applicative.Alternative' type class. +-- +-- = Errors +-- +-- Failing parsers in this module throw the 'D.ParseError' exception. +-- +-- = Naming +-- +-- As far as possible, try that the names of the combinators in this module are +-- consistent with: +-- +-- * +-- * +-- * +-- * +-- * + +module Streamly.Internal.Data.Parser + ( + K.Parser (..) + , D.ParseError (..) + , D.Step (..) + + -- * Downgrade to Fold + , toFold + + -- First order parsers + -- * Accumulators + , fromFold + , fromFoldMaybe + , fromPure + , fromEffect + , die + , dieM + + -- * Map on input + , lmap + , lmapM + , filter + + -- * Map on output + , rmapM + + -- * Element parsers + , peek + + -- All of these can be expressed in terms of either + , one + , oneEq + , oneNotEq + , oneOf + , noneOf + , eof + , satisfy + , maybe + , either + + -- * Sequence parsers (tokenizers) + -- + -- | Parsers chained in series, if one parser terminates the composition + -- terminates. + + , lookAhead + + -- ** By length + -- | Grab a sequence of input elements without inspecting them + , takeBetween + -- , take -- takeBetween 0 n + , takeEQ -- takeBetween n n + , takeGE -- takeBetween n maxBound + , takeP + + -- Grab a sequence of input elements by inspecting them + -- ** Exact match + , listEq + , listEqBy + , streamEqBy + , subsequenceBy + + -- ** By predicate + , takeWhileP + , takeWhile + -- $takeWhile + , takeWhile1 + , dropWhile + + -- ** Separators + , takeEndBy + , takeEndBy_ + , takeEndByEsc + -- , takeEndByEsc_ + , takeStartBy + , takeStartBy_ + , takeEitherSepBy + , wordBy + -- , wordByEsc + + -- ** By comparing + , groupBy + , groupByRolling + , groupByRollingEither + + -- ** Framing + -- , takeFramedBy + , takeFramedBy_ + , takeFramedByEsc_ + , takeFramedByGeneric + , wordFramedBy + , wordQuotedBy + + -- | Unimplemented + -- + -- @ + -- , prefixOf -- match any prefix of a given string + -- , suffixOf -- match any suffix of a given string + -- , infixOf -- match any substring of a given string + -- @ + + -- Second order parsers (parsers using parsers) + -- * Binary Combinators + + -- ** Sequential Applicative + , splitWith + , split_ + +{- + -- ** Parallel Applicatives + , teeWith + , teeWithFst + , teeWithMin + -- , teeTill -- like manyTill but parallel +-} + + -- ** Sequential Interleaving + -- Use two folds, run a primary parser, its rejected values go to the + -- secondary parser. + , deintercalate + -- , deintercalatePrefix + -- , deintercalateSuffix + + -- *** Special cases + -- | TODO: traditional implmentations of these may be of limited use. For + -- example, consider parsing lines separated by @\\r\\n@. The main parser + -- will have to detect and exclude the sequence @\\r\\n@ anyway so that we + -- can apply the "sep" parser. + -- + -- We can instead implement these as special cases of deintercalate. + -- + -- @ + -- , endBy + -- , sepEndBy + -- , beginBy + -- , sepBeginBy + -- , sepAroundBy + -- @ + , sepBy1 + , sepBy + + -- ** Sequential Alternative + , alt + +{- + -- ** Parallel Alternatives + , shortest + , longest + -- , fastest +-} + + -- * N-ary Combinators + -- ** Sequential Collection + , concatSequence + , concatMap + + -- ** Sequential Repetition + , count + , countBetween + + , manyP + , many + , some + , manyTillP + , manyTill + , manyThen + + -- * Distribution + -- + -- | A simple and stupid impl would be to just convert the stream to an + -- array and give the array reference to all consumers. The array can be + -- grown on demand by any consumer and truncated when nonbody needs it. + + -- ** Distribute to collection + -- ** Distribute to repetition + + -- ** Interleaved collection + -- | + -- + -- 1. Round robin + -- 2. Priority based + , roundRobin + + -- ** Collection of Alternatives + -- | Unimplemented + -- + -- @ + -- , shortestN + -- , longestN + -- , fastestN -- first N successful in time + -- , choiceN -- first N successful in position + -- @ + -- , choice -- first successful in position + + -- ** Repeated Alternatives + , retryMaxTotal + , retryMaxSuccessive + , retry + + -- * Deprecated + , next + ) +where + +import Streamly.Internal.Data.Fold.Type (Fold(..)) +import Streamly.Internal.Data.Parser.ParserK.Type (Parser) + +import qualified Data.Foldable as Foldable +import qualified Streamly.Internal.Data.Fold.Type as FL +import qualified Streamly.Internal.Data.Parser.ParserD as D +import qualified Streamly.Internal.Data.Parser.ParserK.Type as K + +#ifdef USE_STREAMK +import Streamly.Internal.Data.StreamK (Stream) +import qualified Streamly.Internal.Data.StreamK as Stream +#else +import Streamly.Internal.Data.Stream.StreamD.Type (Stream) +import qualified Streamly.Internal.Data.Stream.StreamD.Type as Stream +#endif + +import Prelude hiding + ( any, all, dropWhile, take, takeWhile, sequence, concatMap, maybe, either + , filter ) + +-- +-- $setup +-- >>> :m +-- >>> import Prelude hiding (any, all, dropWhile, take, takeWhile, sequence, concatMap, maybe, either, filter) +-- >>> import Control.Applicative ((<|>)) +-- >>> import Data.Char (isSpace) +-- >>> import qualified Data.Maybe as Maybe +-- >>> import qualified Data.Foldable as Foldable +-- >>> import qualified Streamly.Data.Stream as Stream +-- >>> import qualified Streamly.Internal.Data.Stream as Stream (parse, parseMany) +-- >>> import qualified Streamly.Internal.Data.Fold as Fold +-- >>> import qualified Streamly.Internal.Data.Parser as Parser + +------------------------------------------------------------------------------- +-- Downgrade a parser to a Fold +------------------------------------------------------------------------------- +-- +-- | Make a 'Fold' from a 'Parser'. The fold just throws an exception if the +-- parser fails or tries to backtrack. +-- +-- This can be useful in combinators that accept a Fold and we know that a +-- Parser cannot fail or failure exception is acceptable as there is no way to +-- recover. +-- +-- /Pre-release/ +-- +{-# INLINE toFold #-} +toFold :: Monad m => Parser a m b -> Fold m a b +toFold p = D.toFold $ D.fromParserK p + +------------------------------------------------------------------------------- +-- Upgrade folds to parses +------------------------------------------------------------------------------- +-- +-- | Make a 'Parser' from a 'Fold'. +-- +{-# INLINE fromFold #-} +fromFold :: Monad m => Fold m a b -> Parser a m b +fromFold = D.toParserK . D.fromFold + +-- | Convert a Maybe returning fold to an error returning parser. The first +-- argument is the error message that the parser would return when the fold +-- returns Nothing. +-- +-- /Pre-release/ +-- +{-# INLINE fromFoldMaybe #-} +fromFoldMaybe :: Monad m => String -> Fold m a (Maybe b) -> Parser a m b +fromFoldMaybe err = D.toParserK . D.fromFoldMaybe err + +------------------------------------------------------------------------------- +-- Terminating but not failing folds +------------------------------------------------------------------------------- +-- +-- This is the dual of stream "fromPure". +-- +-- | A parser that always yields a pure value without consuming any input. +-- +{-# INLINE [3] fromPure #-} +fromPure :: Monad m => b -> Parser a m b +fromPure = D.toParserK . D.fromPure +{-# RULES "fromPure fallback to CPS" [2] + forall a. D.toParserK (D.fromPure a) = K.fromPure a #-} + +-- This is the dual of stream "fromEffect". +-- +-- | A parser that always yields the result of an effectful action without +-- consuming any input. +-- +{-# INLINE fromEffect #-} +fromEffect :: Monad m => m b -> Parser a m b +fromEffect = K.fromEffect -- D.toParserK . D.fromEffect + +-- This is the dual of "nil". +-- +-- | A parser that always fails with an error message without consuming +-- any input. +-- +{-# INLINE [3] die #-} +die :: Monad m => String -> Parser a m b +die = D.toParserK . D.die +{-# RULES "die fallback to CPS" [2] + forall a. D.toParserK (D.die a) = K.die a #-} + +-- This is the dual of "nilM". +-- +-- | A parser that always fails with an effectful error message and without +-- consuming any input. +-- +-- /Pre-release/ +-- +{-# INLINE dieM #-} +dieM :: Monad m => m String -> Parser a m b +dieM = D.toParserK . D.dieM + +------------------------------------------------------------------------------- +-- Map on input +------------------------------------------------------------------------------- + +-- | @lmap f parser@ maps the function @f@ on the input of the parser. +-- +-- >>> Stream.parse (Parser.lmap (\x -> x * x) (Parser.fromFold Fold.sum)) (Stream.enumerateFromTo 1 100) +-- Right 338350 +-- +-- > lmap = Parser.lmapM return +-- +{-# INLINE lmap #-} +lmap :: Monad m => (a -> b) -> Parser b m r -> Parser a m r +lmap f p = D.toParserK $ D.lmap f $ D.fromParserK p + +-- | @lmapM f parser@ maps the monadic function @f@ on the input of the parser. +-- +{-# INLINE lmapM #-} +lmapM :: Monad m => (a -> m b) -> Parser b m r -> Parser a m r +lmapM f p = D.toParserK $ D.lmapM f $ D.fromParserK p + +-- | @rmapM f parser@ maps the monadic function @f@ on the output of the parser. +-- +-- >>> rmap = fmap +{-# INLINE rmapM #-} +rmapM :: Monad m => (b -> m c) -> Parser a m b -> Parser a m c +rmapM f p = D.toParserK $ D.rmapM f $ D.fromParserK p + +-- | Include only those elements that pass a predicate. +-- +-- >>> Stream.parse (Parser.filter (> 5) (Parser.fromFold Fold.sum)) $ Stream.fromList [1..10] +-- Right 40 +-- +{-# INLINE filter #-} +filter :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b +filter f p = D.toParserK $ D.filter f $ D.fromParserK p + +------------------------------------------------------------------------------- +-- Failing Parsers +------------------------------------------------------------------------------- + +-- | Peek the head element of a stream, without consuming it. Fails if it +-- encounters end of input. +-- +-- >>> Stream.parse ((,) <$> Parser.peek <*> Parser.satisfy (> 0)) $ Stream.fromList [1] +-- Right (1,1) +-- +-- @ +-- peek = lookAhead (satisfy True) +-- @ +-- +{-# INLINE peek #-} +peek :: Monad m => Parser a m a +peek = D.toParserK D.peek + +-- | Succeeds if we are at the end of input, fails otherwise. +-- +-- >>> Stream.parse ((,) <$> Parser.satisfy (> 0) <*> Parser.eof) $ Stream.fromList [1] +-- Right (1,()) +-- +{-# INLINE eof #-} +eof :: Monad m => Parser a m () +eof = D.toParserK D.eof + +-- | Returns the next element if it passes the predicate, fails otherwise. +-- +-- >>> Stream.parse (Parser.satisfy (== 1)) $ Stream.fromList [1,0,1] +-- Right 1 +-- +-- >>> toMaybe f x = if f x then Just x else Nothing +-- >>> satisfy f = Parser.maybe (toMaybe f) +-- +{-# INLINE satisfy #-} +satisfy :: Monad m => (a -> Bool) -> Parser a m a +satisfy = D.toParserK . D.satisfy + +-- | Consume one element from the head of the stream. Fails if it encounters +-- end of input. +-- +-- >>> one = Parser.satisfy $ const True +-- +{-# INLINE one #-} +one :: Monad m => Parser a m a +one = satisfy $ const True + +-- Alternate names: "only", "onlyThis". + +-- | Match a specific element. +-- +-- >>> oneEq x = Parser.satisfy (== x) +-- +{-# INLINE oneEq #-} +oneEq :: (Monad m, Eq a) => a -> Parser a m a +oneEq x = satisfy (== x) + +-- Alternate names: "exclude", "notThis". + +-- | Match anything other than the supplied element. +-- +-- >>> oneNotEq x = Parser.satisfy (/= x) +-- +{-# INLINE oneNotEq #-} +oneNotEq :: (Monad m, Eq a) => a -> Parser a m a +oneNotEq x = satisfy (/= x) + +-- | Match any one of the elements in the supplied list. +-- +-- >>> oneOf xs = Parser.satisfy (`Foldable.elem` xs) +-- +-- When performance matters a pattern matching predicate could be more +-- efficient than a 'Foldable' datatype: +-- +-- @ +-- let p x = +-- case x of +-- 'a' -> True +-- 'e' -> True +-- _ -> False +-- in satisfy p +-- @ +-- +-- GHC may use a binary search instead of linear search in the list. +-- Alternatively, you can also use an array instead of list for storage and +-- search. +-- +{-# INLINE oneOf #-} +oneOf :: (Monad m, Eq a, Foldable f) => f a -> Parser a m a +oneOf xs = satisfy (`Foldable.elem` xs) + +-- | See performance notes in 'oneOf'. +-- +-- >>> noneOf xs = Parser.satisfy (`Foldable.notElem` xs) +-- +{-# INLINE noneOf #-} +noneOf :: (Monad m, Eq a, Foldable f) => f a -> Parser a m a +noneOf xs = satisfy (`Foldable.notElem` xs) + +-- | Return the next element of the input. Returns 'Nothing' +-- on end of input. Also known as 'head'. +-- +-- /Pre-release/ +-- +{-# DEPRECATED next "Please use \"fromFold Fold.one\" instead" #-} +{-# INLINE next #-} +next :: Monad m => Parser a m (Maybe a) +next = D.toParserK D.next + +-- | Map a 'Maybe' returning function on the next element in the stream. The +-- parser fails if the function returns 'Nothing' otherwise returns the 'Just' +-- value. +-- +-- >>> toEither = Maybe.maybe (Left "maybe: predicate failed") Right +-- >>> maybe f = Parser.either (toEither . f) +-- +-- >>> maybe f = Parser.fromFoldMaybe "maybe: predicate failed" (Fold.maybe f) +-- +-- /Pre-release/ +-- +{-# INLINE maybe #-} +maybe :: Monad m => (a -> Maybe b) -> Parser a m b +maybe = D.toParserK . D.maybe + +-- | Map an 'Either' returning function on the next element in the stream. If +-- the function returns 'Left err', the parser fails with the error message +-- @err@ otherwise returns the 'Right' value. +-- +-- /Pre-release/ +-- +{-# INLINE either #-} +either :: Monad m => (a -> Either String b) -> Parser a m b +either = D.toParserK . D.either + +------------------------------------------------------------------------------- +-- Taking elements +------------------------------------------------------------------------------- + +-- | @takeBetween m n@ takes a minimum of @m@ and a maximum of @n@ input +-- elements and folds them using the supplied fold. +-- +-- Stops after @n@ elements. +-- Fails if the stream ends before @m@ elements could be taken. +-- +-- Examples: - +-- +-- @ +-- >>> :{ +-- takeBetween' low high ls = Stream.parse prsr (Stream.fromList ls) +-- where prsr = Parser.takeBetween low high Fold.toList +-- :} +-- +-- @ +-- +-- >>> takeBetween' 2 4 [1, 2, 3, 4, 5] +-- Right [1,2,3,4] +-- +-- >>> takeBetween' 2 4 [1, 2] +-- Right [1,2] +-- +-- >>> takeBetween' 2 4 [1] +-- Left (ParseError "takeBetween: Expecting alteast 2 elements, got 1") +-- +-- >>> takeBetween' 0 0 [1, 2] +-- Right [] +-- +-- >>> takeBetween' 0 1 [] +-- Right [] +-- +-- @takeBetween@ is the most general take operation, other take operations can +-- be defined in terms of takeBetween. For example: +-- +-- >>> take n = Parser.takeBetween 0 n +-- >>> takeEQ n = Parser.takeBetween n n +-- >>> takeGE n = Parser.takeBetween n maxBound +-- +-- /Pre-release/ +-- +{-# INLINE takeBetween #-} +takeBetween :: Monad m => + Int -> Int -> Fold m a b -> Parser a m b +takeBetween m n = D.toParserK . D.takeBetween m n + +-- | Stops after taking exactly @n@ input elements. +-- +-- * Stops - after consuming @n@ elements. +-- * Fails - if the stream or the collecting fold ends before it can collect +-- exactly @n@ elements. +-- +-- >>> Stream.parse (Parser.takeEQ 4 Fold.toList) $ Stream.fromList [1,0,1] +-- Left (ParseError "takeEQ: Expecting exactly 4 elements, input terminated on 3") +-- +{-# INLINE takeEQ #-} +takeEQ :: Monad m => Int -> Fold m a b -> Parser a m b +takeEQ n = D.toParserK . D.takeEQ n + +-- | Take at least @n@ input elements, but can collect more. +-- +-- * Stops - when the collecting fold stops. +-- * Fails - if the stream or the collecting fold ends before producing @n@ +-- elements. +-- +-- >>> Stream.parse (Parser.takeGE 4 Fold.toList) $ Stream.fromList [1,0,1] +-- Left (ParseError "takeGE: Expecting at least 4 elements, input terminated on 3") +-- +-- >>> Stream.parse (Parser.takeGE 4 Fold.toList) $ Stream.fromList [1,0,1,0,1] +-- Right [1,0,1,0,1] +-- +-- /Pre-release/ +-- +{-# INLINE takeGE #-} +takeGE :: Monad m => Int -> Fold m a b -> Parser a m b +takeGE n = D.toParserK . D.takeGE n + +------------------------------------------------------------------------------- +-- Take until a condition +------------------------------------------------------------------------------- + +-- $takeWhile +-- Note: This is called @takeWhileP@ and @munch@ in some parser libraries. + +-- XXX We should perhaps use only takeWhileP and rename it to takeWhile. +-- +-- | Like 'takeWhile' but uses a 'Parser' instead of a 'Fold' to collect the +-- input. The combinator stops when the condition fails or if the collecting +-- parser stops. +-- +-- Other interesting parsers can be implemented in terms of this parser: +-- +-- >>> takeWhile1 cond p = Parser.takeWhileP cond (Parser.takeBetween 1 maxBound p) +-- >>> takeWhileBetween cond m n p = Parser.takeWhileP cond (Parser.takeBetween m n p) +-- +-- Stops: when the condition fails or the collecting parser stops. +-- Fails: when the collecting parser fails. +-- +-- /Pre-release/ +-- +{-# INLINE takeWhileP #-} +takeWhileP :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b +takeWhileP cond p = D.toParserK $ D.takeWhileP cond (D.fromParserK p) + +-- | Collect stream elements until an element fails the predicate. The element +-- on which the predicate fails is returned back to the input stream. +-- +-- * Stops - when the predicate fails or the collecting fold stops. +-- * Fails - never. +-- +-- >>> Stream.parse (Parser.takeWhile (== 0) Fold.toList) $ Stream.fromList [0,0,1,0,1] +-- Right [0,0] +-- +-- >>> takeWhile cond f = Parser.takeWhileP cond (Parser.fromFold f) +-- +-- We can implement a @breakOn@ using 'takeWhile': +-- +-- @ +-- breakOn p = takeWhile (not p) +-- @ +-- +{-# INLINE takeWhile #-} +takeWhile :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b +takeWhile cond = D.toParserK . D.takeWhile cond +-- takeWhile cond f = takeWhileP cond (fromFold f) + +-- | Like 'takeWhile' but takes at least one element otherwise fails. +-- +-- >>> takeWhile1 cond p = Parser.takeWhileP cond (Parser.takeBetween 1 maxBound p) +-- +{-# INLINE takeWhile1 #-} +takeWhile1 :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b +takeWhile1 cond = D.toParserK . D.takeWhile1 cond +-- takeWhile1 cond f = takeWhileP cond (takeBetween 1 maxBound f) + +-- | Drain the input as long as the predicate succeeds, running the effects and +-- discarding the results. +-- +-- This is also called @skipWhile@ in some parsing libraries. +-- +-- >>> dropWhile p = Parser.takeWhile p Fold.drain +-- +{-# INLINE dropWhile #-} +dropWhile :: Monad m => (a -> Bool) -> Parser a m () +dropWhile p = takeWhile p FL.drain + +------------------------------------------------------------------------------- +-- Separators +------------------------------------------------------------------------------- + +-- XXX We can remove Maybe from esc +{-# INLINE takeFramedByGeneric #-} +takeFramedByGeneric :: Monad m => + Maybe (a -> Bool) + -> Maybe (a -> Bool) + -> Maybe (a -> Bool) + -> Fold m a b + -> Parser a m b +takeFramedByGeneric esc begin end f = + D.toParserK $ D.takeFramedByGeneric esc begin end f + +-- | @takeEndBy cond parser@ parses a token that ends by a separator chosen by +-- the supplied predicate. The separator is also taken with the token. +-- +-- This can be combined with other parsers to implement other interesting +-- parsers as follows: +-- +-- >>> takeEndByLE cond n p = Parser.takeEndBy cond (Parser.fromFold $ Fold.take n p) +-- >>> takeEndByBetween cond m n p = Parser.takeEndBy cond (Parser.takeBetween m n p) +-- +-- >>> takeEndBy = Parser.takeEndByEsc (const False) +-- +-- See also "Streamly.Data.Fold.takeEndBy". Unlike the fold, the collecting +-- parser in the takeEndBy parser can decide whether to fail or not if the +-- stream does not end with separator. +-- +-- /Pre-release/ +-- +{-# INLINE takeEndBy #-} +takeEndBy :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b +takeEndBy cond = D.toParserK . D.takeEndBy cond . D.fromParserK +-- takeEndBy = takeEndByEsc (const False) + +-- | Like 'takeEndBy' but the separator is dropped. +-- +-- See also "Streamly.Data.Fold.takeEndBy_". +-- +-- /Pre-release/ +-- +{-# INLINE takeEndBy_ #-} +takeEndBy_ :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b +takeEndBy_ cond = D.toParserK . D.takeEndBy_ cond . D.fromParserK +{- +takeEndBy_ isEnd p = + takeFramedByGeneric Nothing Nothing (Just isEnd) (toFold p) +-} + +-- | Take either the separator or the token. Separator is a Left value and +-- token is Right value. +-- +-- /Unimplemented/ +{-# INLINE takeEitherSepBy #-} +takeEitherSepBy :: -- Monad m => + (a -> Bool) -> Fold m (Either a b) c -> Parser a m c +takeEitherSepBy _cond = undefined -- D.toParserK . D.takeEitherSepBy cond + +-- | Parse a token that starts with an element chosen by the predicate. The +-- parser fails if the input does not start with the selected element. +-- +-- * Stops - when the predicate succeeds in non-leading position. +-- * Fails - when the predicate fails in the leading position. +-- +-- >>> splitWithPrefix p f = Stream.parseMany (Parser.takeStartBy p f) +-- +-- Examples: - +-- +-- >>> p = Parser.takeStartBy (== ',') Fold.toList +-- >>> leadingComma = Stream.parse p . Stream.fromList +-- >>> leadingComma "a,b" +-- Left (ParseError "takeStartBy: missing frame start") +-- ... +-- >>> leadingComma ",," +-- Right "," +-- >>> leadingComma ",a,b" +-- Right ",a" +-- >>> leadingComma "" +-- Right "" +-- +-- /Pre-release/ +-- +{-# INLINE takeStartBy #-} +takeStartBy :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b +takeStartBy cond = D.toParserK . D.takeStartBy cond + +-- | Like 'takeStartBy' but drops the separator. +-- +-- >>> takeStartBy_ isBegin = Parser.takeFramedByGeneric Nothing (Just isBegin) Nothing +-- +{-# INLINE takeStartBy_ #-} +takeStartBy_ :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b +takeStartBy_ isBegin = takeFramedByGeneric Nothing (Just isBegin) Nothing + +------------------------------------------------------------------------------- +-- Quoting and Escaping +------------------------------------------------------------------------------- + +-- | Like 'takeEndBy' but the separator elements can be escaped using an +-- escape char determined by the first predicate. The escape characters are +-- removed. +-- +-- /pre-release/ +{-# INLINE takeEndByEsc #-} +takeEndByEsc :: Monad m => + (a -> Bool) -> (a -> Bool) -> Parser a m b -> Parser a m b +takeEndByEsc isEsc isEnd p = + D.toParserK $ D.takeEndByEsc isEsc isEnd (D.fromParserK p) + +-- | @takeFramedByEsc_ isEsc isBegin isEnd fold@ parses a token framed using a +-- begin and end predicate, and an escape character. The frame begin and end +-- characters lose their special meaning if preceded by the escape character. +-- +-- Nested frames are allowed if begin and end markers are different, nested +-- frames must be balanced unless escaped, nested frame markers are emitted as +-- it is. +-- +-- For example, +-- +-- >>> p = Parser.takeFramedByEsc_ (== '\\') (== '{') (== '}') Fold.toList +-- >>> Stream.parse p $ Stream.fromList "{hello}" +-- Right "hello" +-- >>> Stream.parse p $ Stream.fromList "{hello {world}}" +-- Right "hello {world}" +-- >>> Stream.parse p $ Stream.fromList "{hello \\{world}" +-- Right "hello {world" +-- >>> Stream.parse p $ Stream.fromList "{hello {world}" +-- Left (ParseError "takeFramedByEsc_: missing frame end") +-- +-- /Pre-release/ +{-# INLINE takeFramedByEsc_ #-} +takeFramedByEsc_ :: Monad m => + (a -> Bool) -> (a -> Bool) -> (a -> Bool) -> Fold m a b -> Parser a m b +takeFramedByEsc_ isEsc isBegin isEnd f = + D.toParserK $ D.takeFramedByEsc_ isEsc isBegin isEnd f +-- takeEndByEsc_ isEsc isEnd p = +-- takeFramedByGeneric (Just isEsc) Nothing (Just isEnd) (toFold p) + +-- | @takeFramedBy_ isBegin isEnd fold@ parses a token framed by a begin and an +-- end predicate. +-- +-- >>> takeFramedBy_ = Parser.takeFramedByEsc_ (const False) +-- +{-# INLINE takeFramedBy_ #-} +takeFramedBy_ :: Monad m => + (a -> Bool) -> (a -> Bool) -> Fold m a b -> Parser a m b +takeFramedBy_ isBegin isEnd f = D.toParserK $ D.takeFramedBy_ isBegin isEnd f +-- takeFramedBy_ isBegin isEnd = +-- takeFramedByGeneric (Just (const False)) (Just isBegin) (Just isEnd) + +------------------------------------------------------------------------------- +-- Grouping and words +------------------------------------------------------------------------------- + +-- Note we can also get words using something like: +-- sepBy FL.toList (takeWhile (not . p) Fold.toList) (dropWhile p) +-- +-- But that won't be as efficient and ergonomic. +-- +-- | Like 'splitOn' but strips leading, trailing, and repeated separators. +-- Therefore, @".a..b."@ having '.' as the separator would be parsed as +-- @["a","b"]@. In other words, its like parsing words from whitespace +-- separated text. +-- +-- * Stops - when it finds a word separator after a non-word element +-- * Fails - never. +-- +-- >>> wordBy = Parser.wordFramedBy (const False) (const False) (const False) +-- +-- @ +-- S.wordsBy pred f = S.parseMany (PR.wordBy pred f) +-- @ +-- +{-# INLINE wordBy #-} +wordBy :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b +wordBy f = D.toParserK . D.wordBy f + +-- | Like 'wordBy' but treats anything inside a pair of quotes as a single +-- word, the quotes can be escaped by an escape character. Recursive quotes +-- are possible if quote begin and end characters are different, quotes must be +-- balanced. Outermost quotes are stripped. +-- +-- >>> braces = Parser.wordFramedBy (== '\\') (== '{') (== '}') isSpace Fold.toList +-- >>> Stream.parse braces $ Stream.fromList "{ab} cd" +-- Right "ab" +-- >>> Stream.parse braces $ Stream.fromList "{ab}{cd}" +-- Right "abcd" +-- >>> Stream.parse braces $ Stream.fromList "a{b} cd" +-- Right "ab" +-- >>> Stream.parse braces $ Stream.fromList "a{{b}} cd" +-- Right "a{b}" +-- +-- >>> quotes = Parser.wordFramedBy (== '\\') (== '"') (== '"') isSpace Fold.toList +-- >>> Stream.parse quotes $ Stream.fromList "\"a\"\"b\"" +-- Right "ab" +-- +{-# INLINE wordFramedBy #-} +wordFramedBy :: Monad m => + (a -> Bool) -- ^ Escape + -> (a -> Bool) -- ^ left quote + -> (a -> Bool) -- ^ right quote + -> (a -> Bool) -- ^ word seperator + -> Fold m a b + -> Parser a m b +wordFramedBy isEsc isBegin isEnd isSpc = + D.toParserK . D.wordFramedBy isEsc isBegin isEnd isSpc + +-- | Like 'wordFramedBy' but the closing quote is determined by the opening +-- quote. The first quote begin starts a quote that is closed by its +-- corresponding closing quote. +-- +-- 'wordFramedBy' and 'wordQuotedBy' both allow multiple quote characters based +-- on the predicates but 'wordQuotedBy' always fixes the quote at the first +-- occurrence and then it is closed only by the corresponding closing quote. +-- Therefore, other quoting characters can be embedded inside it as normal +-- characters. On the other hand, 'wordFramedBy' would close the quote as soon +-- as it encounters any of the closing quotes. +-- +-- >>> q = (`elem` ['"', '\'']) +-- >>> p kQ = Parser.wordQuotedBy kQ (== '\\') q q id isSpace Fold.toList +-- +-- >>> Stream.parse (p False) $ Stream.fromList "a\"b'c\";'d\"e'f ghi" +-- Right "ab'c;d\"ef" +-- +-- >>> Stream.parse (p True) $ Stream.fromList "a\"b'c\";'d\"e'f ghi" +-- Right "a\"b'c\";'d\"e'f" +-- +{-# INLINE wordQuotedBy #-} +wordQuotedBy :: (Monad m, Eq a) => + Bool -- ^ keep the quotes in the output + -> (a -> Bool) -- ^ Escape + -> (a -> Bool) -- ^ left quote + -> (a -> Bool) -- ^ right quote + -> (a -> a) -- ^ get right quote from left quote + -> (a -> Bool) -- ^ word seperator + -> Fold m a b + -> Parser a m b +wordQuotedBy keepQuotes isEsc isBegin isEnd toRight isSpc = + D.toParserK . D.wordQuotedBy keepQuotes isEsc isBegin isEnd toRight isSpc + +-- | Given an input stream @[a,b,c,...]@ and a comparison function @cmp@, the +-- parser assigns the element @a@ to the first group, then if @a \`cmp` b@ is +-- 'True' @b@ is also assigned to the same group. If @a \`cmp` c@ is 'True' +-- then @c@ is also assigned to the same group and so on. When the comparison +-- fails the parser is terminated. Each group is folded using the 'Fold' @f@ and +-- the result of the fold is the result of the parser. +-- +-- * Stops - when the comparison fails. +-- * Fails - never. +-- +-- >>> :{ +-- runGroupsBy eq = +-- Stream.fold Fold.toList +-- . Stream.parseMany (Parser.groupBy eq Fold.toList) +-- . Stream.fromList +-- :} +-- +-- >>> runGroupsBy (<) [] +-- [] +-- +-- >>> runGroupsBy (<) [1] +-- [Right [1]] +-- +-- >>> runGroupsBy (<) [3, 5, 4, 1, 2, 0] +-- [Right [3,5,4],Right [1,2],Right [0]] +-- +{-# INLINE groupBy #-} +groupBy :: Monad m => (a -> a -> Bool) -> Fold m a b -> Parser a m b +groupBy eq = D.toParserK . D.groupBy eq + +-- | Unlike 'groupBy' this combinator performs a rolling comparison of two +-- successive elements in the input stream. Assuming the input stream +-- is @[a,b,c,...]@ and the comparison function is @cmp@, the parser +-- first assigns the element @a@ to the first group, then if @a \`cmp` b@ is +-- 'True' @b@ is also assigned to the same group. If @b \`cmp` c@ is 'True' +-- then @c@ is also assigned to the same group and so on. When the comparison +-- fails the parser is terminated. Each group is folded using the 'Fold' @f@ and +-- the result of the fold is the result of the parser. +-- +-- * Stops - when the comparison fails. +-- * Fails - never. +-- +-- >>> :{ +-- runGroupsByRolling eq = +-- Stream.fold Fold.toList +-- . Stream.parseMany (Parser.groupByRolling eq Fold.toList) +-- . Stream.fromList +-- :} +-- +-- >>> runGroupsByRolling (<) [] +-- [] +-- +-- >>> runGroupsByRolling (<) [1] +-- [Right [1]] +-- +-- >>> runGroupsByRolling (<) [3, 5, 4, 1, 2, 0] +-- [Right [3,5],Right [4],Right [1,2],Right [0]] +-- +-- /Pre-release/ +-- +{-# INLINE groupByRolling #-} +groupByRolling :: Monad m => (a -> a -> Bool) -> Fold m a b -> Parser a m b +groupByRolling eq = D.toParserK . D.groupByRolling eq + +-- | Like 'groupByRolling', but if the predicate is 'True' then collects using +-- the first fold as long as the predicate holds 'True', if the predicate is +-- 'False' collects using the second fold as long as it remains 'False'. +-- Returns 'Left' for the first case and 'Right' for the second case. +-- +-- For example, if we want to detect sorted sequences in a stream, both +-- ascending and descending cases we can use 'groupByRollingEither (<=) +-- Fold.toList Fold.toList'. +-- +-- /Pre-release/ +{-# INLINE groupByRollingEither #-} +groupByRollingEither :: Monad m => + (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser a m (Either b c) +groupByRollingEither eq f1 = D.toParserK . D.groupByRollingEither eq f1 + +-- | Like 'listEqBy' but uses a stream instead of a list and does not return +-- the stream. +-- +-- See also: "Streamly.Data.Stream.streamEqBy" +-- +{-# INLINE streamEqBy #-} +streamEqBy :: Monad m => (a -> a -> Bool) -> Stream m a -> Parser a m () +streamEqBy cmp = D.toParserK . D.streamEqBy cmp + +-- | Match the given sequence of elements using the given comparison function. +-- Returns the original sequence if successful. +-- +-- Definition: +-- +-- >>> listEqBy cmp xs = Parser.streamEqBy cmp (Stream.fromList xs) *> Parser.fromPure xs +-- +-- Examples: +-- +-- >>> Stream.parse (Parser.listEqBy (==) "string") $ Stream.fromList "string" +-- Right "string" +-- +-- >>> Stream.parse (Parser.listEqBy (==) "mismatch") $ Stream.fromList "match" +-- Left (ParseError "streamEqBy: mismtach occurred") +-- +{-# INLINE listEqBy #-} +listEqBy :: Monad m => (a -> a -> Bool) -> [a] -> Parser a m [a] +-- listEqBy cmp xs = D.toParserK (D.listEqBy cmp xs) +listEqBy cmp xs = streamEqBy cmp (Stream.fromList xs) *> fromPure xs + +-- Rename to "list". +-- | Match the input sequence with the supplied list and return it if +-- successful. +-- +-- >>> listEq = Parser.listEqBy (==) +-- +{-# INLINE listEq #-} +listEq :: (Monad m, Eq a) => [a] -> Parser a m [a] +listEq = listEqBy (==) + +-- | Match if the input stream is a subsequence of the argument stream i.e. all +-- the elements of the input stream occur, in order, in the argument stream. +-- The elements do not have to occur consecutively. A sequence is considered a +-- subsequence of itself. +{-# INLINE subsequenceBy #-} +subsequenceBy :: -- Monad m => + (a -> a -> Bool) -> Stream m a -> Parser a m () +subsequenceBy = undefined + +{- +-- Should go in Data.Parser.Regex in streamly package so that it can depend on +-- regex backends. +{-# INLINE regexPosix #-} +regexPosix :: -- Monad m => + Regex -> Parser m a (Maybe (Array (MatchOffset, MatchLength))) +regexPosix = undefined + +{-# INLINE regexPCRE #-} +regexPCRE :: -- Monad m => + Regex -> Parser m a (Maybe (Array (MatchOffset, MatchLength))) +regexPCRE = undefined +-} + +------------------------------------------------------------------------------- +-- nested parsers +------------------------------------------------------------------------------- + +-- | Sequential parser application. Apply two parsers sequentially to an input +-- stream. The input is provided to the first parser, when it is done the +-- remaining input is provided to the second parser. If both the parsers +-- succeed their outputs are combined using the supplied function. The +-- operation fails if any of the parsers fail. +-- +-- Note: This is a parsing dual of appending streams using +-- 'Streamly.Data.Stream.append', it splits the streams using two parsers and zips +-- the results. +-- +-- This implementation is strict in the second argument, therefore, the +-- following will fail: +-- +-- >>> Stream.parse (Parser.splitWith const (Parser.satisfy (> 0)) undefined) $ Stream.fromList [1] +-- *** Exception: Prelude.undefined +-- ... +-- +-- Compare with 'Applicative' instance method '<*>'. This implementation allows +-- stream fusion but has quadratic complexity. This can fuse with other +-- operations and can be faster than 'Applicative' instance for small number +-- (less than 8) of compositions. +-- +-- Many combinators can be expressed using @splitWith@ and other parser +-- primitives. Some common idioms are described below, +-- +-- @ +-- span :: (a -> Bool) -> Fold m a b -> Fold m a b -> Parser a m b +-- span pred f1 f2 = splitWith (,) ('takeWhile' pred f1) ('fromFold' f2) +-- @ +-- +-- @ +-- spanBy :: (a -> a -> Bool) -> Fold m a b -> Fold m a b -> Parser a m b +-- spanBy eq f1 f2 = splitWith (,) ('groupBy' eq f1) ('fromFold' f2) +-- @ +-- +-- @ +-- spanByRolling :: (a -> a -> Bool) -> Fold m a b -> Fold m a b -> Parser a m b +-- spanByRolling eq f1 f2 = splitWith (,) ('groupByRolling' eq f1) ('fromFold' f2) +-- @ +-- +-- /Pre-release/ +-- +{-# INLINE splitWith #-} +splitWith :: Monad m + => (a -> b -> c) -> Parser x m a -> Parser x m b -> Parser x m c +splitWith f p1 p2 = + D.toParserK $ D.splitWith f (D.fromParserK p1) (D.fromParserK p2) + +-- | Sequential parser application ignoring the output of the first parser. +-- Apply two parsers sequentially to an input stream. The input is provided to +-- the first parser, when it is done the remaining input is provided to the +-- second parser. The output of the parser is the output of the second parser. +-- The operation fails if any of the parsers fail. +-- +-- This implementation is strict in the second argument, therefore, the +-- following will fail: +-- +-- >>> Stream.parse (Parser.split_ (Parser.satisfy (> 0)) undefined) $ Stream.fromList [1] +-- *** Exception: Prelude.undefined +-- ... +-- +-- Compare with 'Applicative' instance method '*>'. This implementation allows +-- stream fusion but has quadratic complexity. This can fuse with other +-- operations, and can be faster than 'Applicative' instance for small +-- number (less than 8) of compositions. +-- +-- /Pre-release/ +-- +{-# INLINE split_ #-} +split_ :: Monad m => Parser x m a -> Parser x m b -> Parser x m b +split_ p1 p2 = D.toParserK $ D.split_ (D.fromParserK p1) (D.fromParserK p2) + +{- +-- | @teeWith f p1 p2@ distributes its input to both @p1@ and @p2@ until both +-- of them succeed or anyone of them fails and combines their output using @f@. +-- The parser succeeds if both the parsers succeed. +-- +-- /Pre-release/ +-- +{-# INLINE teeWith #-} +teeWith :: Monad m + => (a -> b -> c) -> Parser x m a -> Parser x m b -> Parser x m c +teeWith f p1 p2 = + D.toParserK $ D.teeWith f (D.fromParserK p1) (D.fromParserK p2) + +-- | Like 'teeWith' but ends parsing and zips the results, if available, +-- whenever the first parser ends. +-- +-- /Pre-release/ +-- +{-# INLINE teeWithFst #-} +teeWithFst :: Monad m + => (a -> b -> c) -> Parser x m a -> Parser x m b -> Parser x m c +teeWithFst f p1 p2 = + D.toParserK $ D.teeWithFst f (D.fromParserK p1) (D.fromParserK p2) + +-- | Like 'teeWith' but ends parsing and zips the results, if available, +-- whenever any of the parsers ends or fails. +-- +-- /Unimplemented/ +-- +{-# INLINE teeWithMin #-} +teeWithMin :: Monad m + => (a -> b -> c) -> Parser x m a -> Parser x m b -> Parser x m c +teeWithMin f p1 p2 = + D.toParserK $ D.teeWithMin f (D.fromParserK p1) (D.fromParserK p2) +-} + +-- | Sequential alternative. Apply the input to the first parser and return the +-- result if the parser succeeds. If the first parser fails then backtrack and +-- apply the same input to the second parser and return the result. +-- +-- Note: This implementation is not lazy in the second argument. The following +-- will fail: +-- +-- >>> Stream.parse (Parser.satisfy (> 0) `Parser.alt` undefined) $ Stream.fromList [1..10] +-- Right 1 +-- +-- Compare with 'Alternative' instance method '<|>'. This implementation allows +-- stream fusion but has quadratic complexity. This can fuse with other +-- operations and can be much faster than 'Alternative' instance for small +-- number (less than 8) of alternatives. +-- +-- /Pre-release/ +-- +{-# INLINE alt #-} +alt :: Monad m => Parser x m a -> Parser x m a -> Parser x m a +alt p1 p2 = D.toParserK $ D.alt (D.fromParserK p1) (D.fromParserK p2) + +{- +-- | Shortest alternative. Apply both parsers in parallel but choose the result +-- from the one which consumed least input i.e. take the shortest succeeding +-- parse. +-- +-- /Pre-release/ +-- +{-# INLINE shortest #-} +shortest :: Monad m + => Parser x m a -> Parser x m a -> Parser x m a +shortest p1 p2 = D.toParserK $ D.shortest (D.fromParserK p1) (D.fromParserK p2) + +-- | Longest alternative. Apply both parsers in parallel but choose the result +-- from the one which consumed more input i.e. take the longest succeeding +-- parse. +-- +-- /Pre-release/ +-- +{-# INLINE longest #-} +longest :: Monad m + => Parser x m a -> Parser x m a -> Parser x m a +longest p1 p2 = D.toParserK $ D.longest (D.fromParserK p1) (D.fromParserK p2) +-} + +-- | Run a parser without consuming the input. +-- +{-# INLINE lookAhead #-} +lookAhead :: Monad m => Parser a m b -> Parser a m b +lookAhead p = D.toParserK $ D.lookAhead $ D.fromParserK p + +-- | Takes at-most @n@ input elements. +-- +-- * Stops - when the collecting parser stops. +-- * Fails - when the collecting parser fails. +-- +-- >>> Stream.parse (Parser.takeP 4 (Parser.takeEQ 2 Fold.toList)) $ Stream.fromList [1, 2, 3, 4, 5] +-- Right [1,2] +-- +-- >>> Stream.parse (Parser.takeP 4 (Parser.takeEQ 5 Fold.toList)) $ Stream.fromList [1, 2, 3, 4, 5] +-- Left (ParseError "takeEQ: Expecting exactly 5 elements, input terminated on 4") +-- +-- /Internal/ +{-# INLINE takeP #-} +takeP :: Monad m => Int -> Parser a m b -> Parser a m b +takeP i p = D.toParserK $ D.takeP i $ D.fromParserK p + +------------------------------------------------------------------------------- +-- Sequential Collection +------------------------------------------------------------------------------- +-- +-- | @concatSequence f p@ collects sequential parses of parsers in a +-- serial stream @p@ using the fold @f@. Fails if the input ends or any +-- of the parsers fail. +-- +-- An even more efficient implementation can use ParserD type Parser in +-- the stream. +-- +-- /Pre-release/ +-- +{-# INLINE concatSequence #-} +concatSequence :: + Monad m => + Fold m b c -> Stream m (Parser a m b) -> Parser a m c +concatSequence f p = + let sp = fmap D.fromParserK p + in D.toParserK $ D.sequence sp f + +-- | Map a 'Parser' returning function on the result of a 'Parser'. +-- +-- Compare with 'Monad' instance method '>>='. This implementation allows +-- stream fusion but has quadratic complexity. This can fuse with other +-- operations and can be much faster than 'Monad' instance for small number +-- (less than 8) of compositions. +-- +-- /Pre-release/ +-- +{-# INLINE concatMap #-} +concatMap :: Monad m + => (b -> Parser a m c) -> Parser a m b -> Parser a m c +concatMap f p = D.toParserK $ D.concatMap (D.fromParserK . f) (D.fromParserK p) + +{- +------------------------------------------------------------------------------- +-- Alternative Collection +------------------------------------------------------------------------------- +-- +-- | @choice parsers@ applies the @parsers@ in order and returns the first +-- successful parse. +-- +-- This is same as 'asum' but more efficient. +-- +-- /Broken/ +-- +{-# INLINE choice #-} +choice :: + (Functor t, Foldable t, Monad m) => t (Parser a m b) -> Parser a m b +choice ps = D.toParserK $ D.choice $ D.fromParserK <$> ps +-} + +------------------------------------------------------------------------------- +-- Sequential Repetition +------------------------------------------------------------------------------- + +-- | Like 'many' but uses a 'Parser' instead of a 'Fold' to collect the +-- results. Parsing stops or fails if the collecting parser stops or fails. +-- +-- /Unimplemented/ +-- +{-# INLINE manyP #-} +manyP :: -- Monad m => + Parser a m b -> Parser b m c -> Parser a m c +manyP _p _f = undefined -- D.toParserK $ D.manyP (D.fromParserK p) f + +-- | Collect zero or more parses. Apply the supplied parser repeatedly on the +-- input stream and push the parse results to a downstream fold. +-- +-- Stops: when the downstream fold stops or the parser fails. +-- Fails: never, produces zero or more results. +-- +-- >>> many = Parser.countBetween 0 maxBound +-- +-- Compare with 'Control.Applicative.many'. +-- +{-# INLINE many #-} +many :: Monad m => Parser a m b -> Fold m b c -> Parser a m c +many p f = D.toParserK $ D.many (D.fromParserK p) f + +-- Note: many1 would perhaps be a better name for this and consistent with +-- other names like takeWhile1. But we retain the name "some" for +-- compatibility. +-- +-- | Collect one or more parses. Apply the supplied parser repeatedly on the +-- input stream and push the parse results to a downstream fold. +-- +-- Stops: when the downstream fold stops or the parser fails. +-- Fails: if it stops without producing a single result. +-- +-- >>> some p f = Parser.manyP p (Parser.takeGE 1 f) +-- >>> some = Parser.countBetween 1 maxBound +-- +-- Compare with 'Control.Applicative.some'. +-- +{-# INLINE some #-} +some :: Monad m => Parser a m b -> Fold m b c -> Parser a m c +some p f = D.toParserK $ D.some (D.fromParserK p) f +-- some p f = manyP p (takeGE 1 f) +-- some = countBetween 1 maxBound + +-- | @countBetween m n f p@ collects between @m@ and @n@ sequential parses of +-- parser @p@ using the fold @f@. Stop after collecting @n@ results. Fails if +-- the input ends or the parser fails before @m@ results are collected. +-- +-- >>> countBetween m n p f = Parser.manyP p (Parser.takeBetween m n f) +-- +-- /Unimplemented/ +-- +{-# INLINE countBetween #-} +countBetween :: -- Monad m => + Int -> Int -> Parser a m b -> Fold m b c -> Parser a m c +countBetween _m _n _p = undefined +-- countBetween m n p f = manyP p (takeBetween m n f) + +-- | @count n f p@ collects exactly @n@ sequential parses of parser @p@ using +-- the fold @f@. Fails if the input ends or the parser fails before @n@ +-- results are collected. +-- +-- >>> count n = Parser.countBetween n n +-- >>> count n p f = Parser.manyP p (Parser.takeEQ n f) +-- +-- /Unimplemented/ +-- +{-# INLINE count #-} +count :: -- Monad m => + Int -> Parser a m b -> Fold m b c -> Parser a m c +count n = countBetween n n +-- count n p f = manyP p (takeEQ n f) + +-- | Like 'manyTill' but uses a 'Parser' to collect the results instead of a +-- 'Fold'. Parsing stops or fails if the collecting parser stops or fails. +-- +-- We can implemnent parsers like the following using 'manyTillP': +-- +-- @ +-- countBetweenTill m n f p = manyTillP (takeBetween m n f) p +-- @ +-- +-- /Unimplemented/ +-- +{-# INLINE manyTillP #-} +manyTillP :: -- Monad m => + Parser a m b -> Parser a m x -> Parser b m c -> Parser a m c +manyTillP _p1 _p2 _f = undefined + -- D.toParserK $ D.manyTillP (D.fromParserK p1) (D.fromParserK p2) f + +-- | @manyTill chunking test f@ tries the parser @test@ on the input, if @test@ +-- fails it backtracks and tries @chunking@, after @chunking@ succeeds @test@ is +-- tried again and so on. The parser stops when @test@ succeeds. The output of +-- @test@ is discarded and the output of @chunking@ is accumulated by the +-- supplied fold. The parser fails if @chunking@ fails. +-- +-- Stops when the fold @f@ stops. +-- +{-# INLINE manyTill #-} +manyTill :: Monad m + => Parser a m b -- ^ Chunking parser. Parses chunks of input. + -> Parser a m x -- ^ Test parser. Parsing stops when this parser succeeds + -- else backtract and run the chunking parser. + -> Fold m b c -- ^ Folds the output of the chunking parser. + -> Parser a m c +manyTill collect test f = + D.toParserK $ D.manyTill (D.fromParserK collect) (D.fromParserK test) f + +-- | @manyThen f collect recover@ repeats the parser @collect@ on the input and +-- collects the output in the supplied fold. If the the parser @collect@ fails, +-- parser @recover@ is run until it stops and then we start repeating the +-- parser @collect@ again. The parser fails if the recovery parser fails. +-- +-- For example, this can be used to find a key frame in a video stream after an +-- error. +-- +-- /Unimplemented/ +-- +{-# INLINE manyThen #-} +manyThen :: -- (Foldable t, Monad m) => + Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c +manyThen _parser _recover _f = undefined + +------------------------------------------------------------------------------- +-- Interleaving +------------------------------------------------------------------------------- +-- +-- To deinterleave we can chain two parsers one behind the other. The input is +-- given to the first parser and the input definitively rejected by the first +-- parser is given to the second parser. +-- +-- We can either have the parsers themselves buffer the input or use the shared +-- global buffer to hold it until none of the parsers need it. When the first +-- parser returns Skip (i.e. rewind) we let the second parser consume the +-- rejected input and when it is done we move the cursor forward to the first +-- parser again. This will require a "move forward" command as well. +-- +-- To implement grep we can use three parsers, one to find the pattern, one +-- to store the context behind the pattern and one to store the context in +-- front of the pattern. When a match occurs we need to emit the accumulator of +-- all the three parsers. One parser can count the line numbers to provide the +-- line number info. + +-- XXX rename this to intercalate +-- | Apply two parsers alternately to an input stream. The input stream is +-- considered an interleaving of two patterns. The two parsers represent the +-- two patterns. +-- +{-# INLINE deintercalate #-} +deintercalate :: Monad m => + Parser a m x + -> Parser a m y + -> Fold m (Either x y) z + -> Parser a m z +deintercalate contentL contentR sink = + D.toParserK + $ D.deintercalate + (D.fromParserK contentL) (D.fromParserK contentR) sink + +-- | Parse items separated by a separator parsed by the supplied parser. At +-- least one item must be present for the parser to succeed. +-- +-- Note that this can go in infinite loop if both the parsers fail on some +-- input. Detection of that would make the implementation more complex. +-- +{-# INLINE sepBy1 #-} +sepBy1 :: Monad m => + Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c +sepBy1 p sep sink = do + x <- p + f <- fromEffect $ FL.reduce sink + f1 <- fromEffect $ FL.snoc f x + many (sep >> p) f1 + +-- | Run the content parser first, when it is done, the separator parser is +-- run, when it is done content parser is run again and so on. If none of the +-- parsers consumes an input then parser returns a failure. +-- +-- >>> sepBy p1 p2 sink = Parser.deintercalate p1 p2 (Fold.catLefts sink) +-- >>> sepBy content sep sink = Parser.sepBy1 content sep sink <|> return mempty +-- +{-# INLINE sepBy #-} +sepBy :: Monad m => + Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c +sepBy content sep sink = + D.toParserK $ D.sepBy (D.fromParserK content) (D.fromParserK sep) sink +-- sepBy sink = deintercalate (FL.catLefts sink) + +------------------------------------------------------------------------------- +-- Interleaving a collection of parsers +------------------------------------------------------------------------------- +-- +-- | Apply a collection of parsers to an input stream in a round robin fashion. +-- Each parser is applied until it stops and then we repeat starting with the +-- the first parser again. +-- +-- /Unimplemented/ +-- +{-# INLINE roundRobin #-} +roundRobin :: -- (Foldable t, Monad m) => + t (Parser a m b) -> Fold m b c -> Parser a m c +roundRobin _ps _f = undefined + +------------------------------------------------------------------------------- +-- Repeated Alternatives +------------------------------------------------------------------------------- + +-- | Keep trying a parser up to a maximum of @n@ failures. When the parser +-- fails the input consumed till now is dropped and the new instance is tried +-- on the fresh input. +-- +-- /Unimplemented/ +-- +{-# INLINE retryMaxTotal #-} +retryMaxTotal :: -- (Monad m) => + Int -> Parser a m b -> Fold m b c -> Parser a m c +retryMaxTotal _n _p _f = undefined + +-- | Like 'retryMaxTotal' but aborts after @n@ successive failures. +-- +-- /Unimplemented/ +-- +{-# INLINE retryMaxSuccessive #-} +retryMaxSuccessive :: -- (Monad m) => + Int -> Parser a m b -> Fold m b c -> Parser a m c +retryMaxSuccessive _n _p _f = undefined + +-- | Keep trying a parser until it succeeds. When the parser fails the input +-- consumed till now is dropped and the new instance is tried on the fresh +-- input. +-- +-- /Unimplemented/ +-- +{-# INLINE retry #-} +retry :: -- (Monad m) => + Parser a m b -> Parser a m b +retry _p = undefined diff --git a/core/src/Streamly/Internal/Data/Parser/ParserK/Type.hs b/core/src/Streamly/Internal/Data/Parser/ParserK/Type.hs index 1e52889dc..9584dbc59 100644 --- a/core/src/Streamly/Internal/Data/Parser/ParserK/Type.hs +++ b/core/src/Streamly/Internal/Data/Parser/ParserK/Type.hs @@ -20,7 +20,8 @@ module Streamly.Internal.Data.Parser.ParserK.Type ( Step (..) , Parse (..) - , Parser (..) + , Parser (..) -- XXX Stop exporting this + , ParserK , fromPure , fromEffect , die @@ -101,6 +102,8 @@ newtype Parser a m b = MkParser -> m (Step m a r) } +type ParserK = Parser + ------------------------------------------------------------------------------- -- Functor ------------------------------------------------------------------------------- diff --git a/core/src/Streamly/Internal/Data/Producer/Source.hs b/core/src/Streamly/Internal/Data/Producer/Source.hs index bb58def16..47abae70c 100644 --- a/core/src/Streamly/Internal/Data/Producer/Source.hs +++ b/core/src/Streamly/Internal/Data/Producer/Source.hs @@ -282,7 +282,7 @@ parseManyD parser reader = Producer step return return -- /Pre-release/ {-# INLINE parseMany #-} parseMany :: Monad m => - ParserK.Parser a m b + ParserD.Parser a m b -> Producer m (Source x a) a -> Producer m (Source x a) (Either ParseError b) -parseMany parser = parseManyD (ParserD.fromParserK parser) +parseMany = parseManyD diff --git a/core/src/Streamly/Internal/Data/Stream/Chunked.hs b/core/src/Streamly/Internal/Data/Stream/Chunked.hs index 42e6f5096..e1f7dbd5e 100644 --- a/core/src/Streamly/Internal/Data/Stream/Chunked.hs +++ b/core/src/Streamly/Internal/Data/Stream/Chunked.hs @@ -96,7 +96,7 @@ import qualified Streamly.Internal.Data.Array.Mut.Stream as AS import qualified Streamly.Internal.Data.Fold.Type as FL (Fold(..), Step(..)) import qualified Streamly.Internal.Data.Parser as PR import qualified Streamly.Internal.Data.Parser.ParserD as PRD - (Parser(..), Initial(..), fromParserK) + (Parser(..), Initial(..)) import qualified Streamly.Internal.Data.Stream.StreamD as D import qualified Streamly.Internal.Data.Stream.StreamK as K @@ -789,7 +789,7 @@ parseBreak :: parseBreak p s = fmap fromStreamD <$> parseBreakD (PRD.fromParserK p) (toStreamD s) -} -parseBreak p = parseBreakK (PRD.fromParserK p) +parseBreak = parseBreakK ------------------------------------------------------------------------------- -- Elimination - Running Array Folds and parsers diff --git a/core/src/Streamly/Internal/Data/Stream/StreamD/Eliminate.hs b/core/src/Streamly/Internal/Data/Stream/StreamD/Eliminate.hs index c32a3523e..7d18a4851 100644 --- a/core/src/Streamly/Internal/Data/Stream/StreamD/Eliminate.hs +++ b/core/src/Streamly/Internal/Data/Stream/StreamD/Eliminate.hs @@ -181,7 +181,7 @@ parseD parser strm = do -- {-# INLINE [3] parse #-} parse :: Monad m => PR.Parser a m b -> Stream m a -> m (Either ParseError b) -parse = parseD . PRD.fromParserK +parse = parseD -- | Run a 'Parse' over a stream and return rest of the Stream. {-# INLINE_NORMAL parseBreakD #-} @@ -347,7 +347,7 @@ parseBreakD (PRD.Parser pstep initial extract) stream@(Stream step state) = do -- {-# INLINE parseBreak #-} parseBreak :: Monad m => PR.Parser a m b -> Stream m a -> m (Either ParseError b, Stream m a) -parseBreak p = parseBreakD (PRD.fromParserK p) +parseBreak = parseBreakD ------------------------------------------------------------------------------ -- Specialized Folds diff --git a/core/src/Streamly/Internal/Data/Stream/StreamD/Nesting.hs b/core/src/Streamly/Internal/Data/Stream/StreamD/Nesting.hs index bc57264c9..01642e494 100644 --- a/core/src/Streamly/Internal/Data/Stream/StreamD/Nesting.hs +++ b/core/src/Streamly/Internal/Data/Stream/StreamD/Nesting.hs @@ -1620,7 +1620,7 @@ parseMany => PR.Parser a m b -> Stream m a -> Stream m (Either ParseError b) -parseMany p = parseManyD (PRD.fromParserK p) +parseMany = parseManyD -- | Apply a stream of parsers to an input stream and emit the results in the -- output stream. @@ -1890,7 +1890,7 @@ parseIterate -> b -> Stream m a -> Stream m (Either ParseError b) -parseIterate f = parseIterateD (PRD.fromParserK . f) +parseIterate = parseIterateD ------------------------------------------------------------------------------ -- Grouping diff --git a/core/src/Streamly/Internal/Data/Stream/StreamK.hs b/core/src/Streamly/Internal/Data/Stream/StreamK.hs index f721b8df0..fc563799c 100644 --- a/core/src/Streamly/Internal/Data/Stream/StreamK.hs +++ b/core/src/Streamly/Internal/Data/Stream/StreamK.hs @@ -1231,7 +1231,7 @@ parseBreakD (PR.Parser pstep initial extract) stream = do {-# INLINE parseBreak #-} parseBreak :: Monad m => Parser.Parser a m b -> Stream m a -> m (Either ParseError b, Stream m a) -parseBreak p = parseBreakD (PR.fromParserK p) +parseBreak = parseBreakD {-# INLINE parse #-} parse :: Monad m => diff --git a/core/src/Streamly/Internal/Serialize/FromBytes.hs b/core/src/Streamly/Internal/Serialize/FromBytes.hs index 9cf56e642..4771f8fbc 100644 --- a/core/src/Streamly/Internal/Serialize/FromBytes.hs +++ b/core/src/Streamly/Internal/Serialize/FromBytes.hs @@ -54,7 +54,7 @@ import qualified Streamly.Internal.Data.Array as A import qualified Streamly.Internal.Data.Parser as PR (fromPure, either, satisfy, takeEQ) import qualified Streamly.Internal.Data.Parser.ParserD as PRD - (Parser(..), Initial(..), Step(..), toParserK) + (Parser(..), Initial(..), Step(..)) -- Note: The () type does not need to have an on-disk representation in theory. -- But we use a concrete representation for it so that we count how many () @@ -155,7 +155,7 @@ word16beD = PRD.Parser step initial extract -- {-# INLINE word16be #-} word16be :: Monad m => Parser Word8 m Word16 -word16be = PRD.toParserK word16beD +word16be = word16beD -- | Little endian (LSB first) Word16 {-# INLINE word16leD #-} @@ -180,7 +180,7 @@ word16leD = PRD.Parser step initial extract -- {-# INLINE word16le #-} word16le :: Monad m => Parser Word8 m Word16 -word16le = PRD.toParserK word16leD +word16le = word16leD -- | Big endian (MSB first) Word32 {-# INLINE word32beD #-} @@ -207,7 +207,7 @@ word32beD = PRD.Parser step initial extract -- {-# INLINE word32be #-} word32be :: Monad m => Parser Word8 m Word32 -word32be = PRD.toParserK word32beD +word32be = word32beD -- | Little endian (LSB first) Word32 {-# INLINE word32leD #-} @@ -233,7 +233,7 @@ word32leD = PRD.Parser step initial extract -- {-# INLINE word32le #-} word32le :: Monad m => Parser Word8 m Word32 -word32le = PRD.toParserK word32leD +word32le = word32leD -- | Big endian (MSB first) Word64 {-# INLINE word64beD #-} @@ -260,7 +260,7 @@ word64beD = PRD.Parser step initial extract -- {-# INLINE word64be #-} word64be :: Monad m => Parser Word8 m Word64 -word64be = PRD.toParserK word64beD +word64be = word64beD -- | Little endian (LSB first) Word64 {-# INLINE word64leD #-} @@ -286,7 +286,7 @@ word64leD = PRD.Parser step initial extract -- {-# INLINE word64le #-} word64le :: Monad m => Parser Word8 m Word64 -word64le = PRD.toParserK word64leD +word64le = word64leD {-# INLINE int8 #-} int8 :: Monad m => Parser Word8 m Int8 diff --git a/core/src/Streamly/Internal/Unicode/Stream.hs b/core/src/Streamly/Internal/Unicode/Stream.hs index 42de2037e..df9ff28d7 100644 --- a/core/src/Streamly/Internal/Unicode/Stream.hs +++ b/core/src/Streamly/Internal/Unicode/Stream.hs @@ -540,7 +540,7 @@ writeCharUtf8' = ParserD.toFold (parseCharUtf8WithD ErrorOnCodingFailure) {-# INLINE parseCharUtf8With #-} parseCharUtf8With :: Monad m => CodingFailureMode -> Parser.Parser Word8 m Char -parseCharUtf8With = ParserD.toParserK . parseCharUtf8WithD +parseCharUtf8With = parseCharUtf8WithD -- XXX write it as a parser and use parseMany to decode a stream, need to check -- if that preserves the same performance. Or we can use a resumable parser diff --git a/core/streamly-core.cabal b/core/streamly-core.cabal index f819424a6..acf9c2196 100644 --- a/core/streamly-core.cabal +++ b/core/streamly-core.cabal @@ -390,6 +390,7 @@ library , Streamly.Data.Array.Mut , Streamly.Data.Fold , Streamly.Data.Parser + , Streamly.Data.Parser.ParserK , Streamly.Data.Stream , Streamly.Data.Stream.StreamK , Streamly.Data.Unfold @@ -419,6 +420,7 @@ library , Streamly.Internal.Data.Stream.Cross , Streamly.Internal.Data.List , Streamly.Data.Stream.Zip + , Streamly.Internal.Data.Parser.ParserDK build-depends: -- streamly-base diff --git a/src/Streamly/Internal/Data/Stream/IsStream/Eliminate.hs b/src/Streamly/Internal/Data/Stream/IsStream/Eliminate.hs index d3854e8a5..7c3827478 100644 --- a/src/Streamly/Internal/Data/Stream/IsStream/Eliminate.hs +++ b/src/Streamly/Internal/Data/Stream/IsStream/Eliminate.hs @@ -382,7 +382,7 @@ parseD p = D.parseD p . toStreamD -- /Internal/ {-# INLINE parseK #-} parseK :: Monad m => PRK.Parser a m b -> SerialT m a -> m (Either PRD.ParseError b) -parseK = parse +parseK p = parse (PRD.fromParserK p) -- | Parse a stream using the supplied 'Parser'. -- @@ -404,7 +404,7 @@ parseK = parse -- {-# INLINE [3] parse #-} parse :: Monad m => Parser a m b -> SerialT m a -> m (Either PRD.ParseError b) -parse = parseD . PRD.fromParserK +parse = parseD ------------------------------------------------------------------------------ -- Specific Fold Functions diff --git a/src/Streamly/Internal/Data/Stream/IsStream/Reduce.hs b/src/Streamly/Internal/Data/Stream/IsStream/Reduce.hs index fe64babd2..02515fac2 100644 --- a/src/Streamly/Internal/Data/Stream/IsStream/Reduce.hs +++ b/src/Streamly/Internal/Data/Stream/IsStream/Reduce.hs @@ -189,8 +189,7 @@ import qualified Streamly.Internal.Data.Array.Type as A import qualified Streamly.Internal.Data.Fold as FL (Fold, Step(..), takeEndBy_, takeEndBy, catMaybes, take) import qualified Streamly.Internal.Data.IsMap as IsMap -import qualified Streamly.Internal.Data.Parser.ParserD as PRD - (Parser(..), fromParserK) +import qualified Streamly.Internal.Data.Parser.ParserD as PRD (Parser(..)) import qualified Streamly.Internal.Data.Stream.IsStream.Type as IsStream import qualified Streamly.Internal.Data.Stream.StreamD as D ( foldMany @@ -388,7 +387,7 @@ parseMany -> t m a -> t m (Either ParseError b) parseMany p m = - fromStreamD $ D.parseManyD (PRD.fromParserK p) (toStreamD m) + fromStreamD $ D.parseManyD p (toStreamD m) -- | Same as parseMany but for StreamD streams. -- @@ -456,7 +455,7 @@ parseIterate -> t m a -> t m (Either ParseError b) parseIterate f i m = fromStreamD $ - D.parseIterateD (PRD.fromParserK . f) i (toStreamD m) + D.parseIterateD f i (toStreamD m) ------------------------------------------------------------------------------ -- Grouping diff --git a/test/Streamly/Test/Data/Parser.hs b/test/Streamly/Test/Data/Parser.hs index abd6361e1..ee19491ff 100644 --- a/test/Streamly/Test/Data/Parser.hs +++ b/test/Streamly/Test/Data/Parser.hs @@ -13,6 +13,7 @@ import Test.QuickCheck.Monadic (monadicIO, assert, run) import Prelude hiding (sequence) +import qualified Control.Monad.Fail as Fail import qualified Data.List as List import qualified Prelude import qualified Streamly.Internal.Data.Array as A @@ -97,7 +98,7 @@ dieM = parserFail :: Property parserFail = property $ - case runIdentity $ S.parse (fail err) (S.fromList [0 :: Int]) of + case runIdentity $ S.parse (Fail.fail err) (S.fromList [0 :: Int]) of Right _ -> False Left (ParseError e) -> err == e where @@ -255,7 +256,7 @@ takeGE = let list_length = Prelude.length ls in - case runIdentity $S.parse (P.takeGE n FL.toList) (S.fromList ls) of + case runIdentity $ S.parse (P.takeGE n FL.toList) (S.fromList ls) of Right parsed_list -> if n <= list_length then checkListEqual parsed_list ls