Merge pull request #231 from mrkkrp/add-more-combinators

* Add ‘takeP’, ‘takeRest’, and ‘atEnd’
* Fix a bug in ‘char'’
This commit is contained in:
Mark Karpov 2017-07-04 15:34:40 +07:00 committed by GitHub
commit 0eff2d0103
8 changed files with 256 additions and 92 deletions

View File

@ -77,8 +77,11 @@
signalling non-trivial failures. Signatures of some functions (`failure`,
`token`) have been changed accordingly.
* Added `takeWhileP` and `takeWhile1P` to `MonadParsec`. Added `skipWhileP`,
`skipWhile1P` as derivatives from those primitive combinators.
* Added `takeWhileP`, `takeWhile1P` and `takeP` to `MonadParsec`.
* Added `takeRest` non-primitive combinator to consume the rest of input.
* Added `atEnd` which returns `True` when end of input has been reached.
* Dropped `oneOf'` and `noneOf'` from `Text.Megaparsec.Char`. These were
seldom (if ever) used and are easily re-implemented.

View File

@ -81,6 +81,9 @@ similar to those found in Attoparsec:
* `takeWhile` and `takeWhile1` are about 150 times faster than approaches
involving `many`, `manyTill` and other similar combinators.
* `takeP` allows to grab n tokens from the stream and returns them as a
“chunk” of the stream.
So now that we have matched the main “performance boosters” of Attoparsec,
Megaparsec 6 is not significantly slower than Attoparsec if you write your
parser carefully.
@ -273,7 +276,7 @@ differences between the two libraries:
foo”, “in expression x”, etc. This is not possible with Parsec.
* Megaparsec is faster and supports efficient operations on top of `tokens`,
`takeWhileP`, and `takeWhile1P` just like Attoparsec.
`takeWhileP`, `takeWhile1P`, `takeP` just like Attoparsec.
* Megaparsec is ~~better~~ supported.

View File

@ -83,8 +83,8 @@ module Text.Megaparsec
, unexpected
, match
, region
, skipWhileP
, skipWhile1P
, takeRest
, atEnd
-- * Parser state combinators
, getInput
, setInput
@ -116,7 +116,7 @@ import Data.Data (Data)
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe (fromJust)
import Data.Proxy
import Data.Semigroup
import Data.Semigroup hiding (option)
import Data.Set (Set)
import Data.Typeable (Typeable)
import Debug.Trace
@ -693,7 +693,7 @@ class (Stream s, A.Alternative m, MonadPlus m)
-- > testChar x =
-- > if f x
-- > then Right x
-- > else Left (Set.singleton (Tokens (x:|[])), Set.empty)
-- > else Left (pure (Tokens (x:|[])), Set.empty)
token
:: (Token s -> Either ( Maybe (ErrorItem (Token s))
@ -743,7 +743,7 @@ class (Stream s, A.Alternative m, MonadPlus m)
-- The following equations should clarify the behavior:
--
-- > takeWhileP (Just "foo") f = many (satisfy f <?> "foo")
-- > takeWhileP Nothing f = many (satisfy f)
-- > takeWhileP Nothing f = many (satisfy f)
--
-- The combinator never fails, although it may parse an empty chunk.
--
@ -755,7 +755,8 @@ class (Stream s, A.Alternative m, MonadPlus m)
-> m (Tokens s) -- ^ A chunk of matching tokens
-- | Similar to 'takeWhileP', but fails if it can't parse at least one
-- token.
-- token. Note that the combinator either succeeds or fails without
-- consuming any input, so 'try' is not necessary with it.
--
-- @since 6.0.0
@ -764,6 +765,27 @@ class (Stream s, A.Alternative m, MonadPlus m)
-> (Token s -> Bool) -- ^ Predicate to use to test tokens
-> m (Tokens s) -- ^ A chunk of matching tokens
-- | Extract the specified number of tokens from the input stream and
-- return them packed as a chunk of stream. If there is not enough tokens
-- in the stream, a parse error will be signaled. It's guaranteed that if
-- the parser succeeds, the requested number of tokens will be returned.
--
-- The parser is roughly equivalent to:
--
-- > takeP (Just "foo") n = count n (anyChar <?> "foo")
-- > takeP Nothing n = count n anyChar
--
-- Note that if the combinator fails due to insufficient number of tokens
-- in the input stream, it backtracks automatically. No 'try' is necessary
-- with 'takeP'.
--
-- @since 6.0.0
takeP
:: Maybe String -- ^ Name for a single token in the row
-> Int -- ^ How many tokens to extract
-> m (Tokens s) -- ^ A chunk of matching tokens
-- | Return the full parser state as a 'State' record.
getParserState :: m (State s)
@ -786,6 +808,7 @@ instance (Ord e, Stream s) => MonadParsec e s (ParsecT e s m) where
tokens = pTokens
takeWhileP = pTakeWhileP
takeWhile1P = pTakeWhile1P
takeP = pTakeP
getParserState = pGetParserState
updateParserState = pUpdateParserState
@ -972,6 +995,27 @@ pTakeWhile1P ml f = ParsecT $ \(State input (pos:|z) tp w) cok _ _ eerr ->
in cok ts (State input' (npos:|z) (tp + len) w) hs
{-# INLINE pTakeWhile1P #-}
pTakeP :: forall e s m. Stream s
=> Maybe String
-> Int
-> ParsecT e s m (Tokens s)
pTakeP ml n = ParsecT $ \s@(State input (pos:|z) tp w) cok _ _ eerr ->
let pxy = Proxy :: Proxy s
el = Label <$> (ml >>= NE.nonEmpty)
ps = maybe E.empty E.singleton el
in case takeN_ n input of
Nothing ->
eerr (TrivialError (pos:|z) (pure EndOfInput) ps) s
Just (ts, input') ->
let len = chunkLength pxy ts
!apos = positionAtN pxy pos ts
!npos = advanceN pxy w pos ts
in if len /= n
then eerr (TrivialError (npos:|z) (pure EndOfInput) ps)
(State input (apos:|z) tp w)
else cok ts (State input' (npos:|z) (tp + len) w) mempty
{-# INLINE pTakeP #-}
pGetParserState :: ParsecT e s m (State s)
pGetParserState = ParsecT $ \s _ _ eok _ -> eok s s mempty
{-# INLINE pGetParserState #-}
@ -1004,6 +1048,7 @@ instance MonadParsec e s m => MonadParsec e s (L.StateT st m) where
tokens e ts = lift (tokens e ts)
takeWhileP l f = lift (takeWhileP l f)
takeWhile1P l f = lift (takeWhile1P l f)
takeP l n = lift (takeP l n)
getParserState = lift getParserState
updateParserState f = lift (updateParserState f)
@ -1025,6 +1070,7 @@ instance MonadParsec e s m => MonadParsec e s (S.StateT st m) where
tokens e ts = lift (tokens e ts)
takeWhileP l f = lift (takeWhileP l f)
takeWhile1P l f = lift (takeWhile1P l f)
takeP l n = lift (takeP l n)
getParserState = lift getParserState
updateParserState f = lift (updateParserState f)
@ -1043,6 +1089,7 @@ instance MonadParsec e s m => MonadParsec e s (L.ReaderT r m) where
tokens e ts = lift (tokens e ts)
takeWhileP l f = lift (takeWhileP l f)
takeWhile1P l f = lift (takeWhile1P l f)
takeP l n = lift (takeP l n)
getParserState = lift getParserState
updateParserState f = lift (updateParserState f)
@ -1064,6 +1111,7 @@ instance (Monoid w, MonadParsec e s m) => MonadParsec e s (L.WriterT w m) where
tokens e ts = lift (tokens e ts)
takeWhileP l f = lift (takeWhileP l f)
takeWhile1P l f = lift (takeWhile1P l f)
takeP l n = lift (takeP l n)
getParserState = lift getParserState
updateParserState f = lift (updateParserState f)
@ -1085,6 +1133,7 @@ instance (Monoid w, MonadParsec e s m) => MonadParsec e s (S.WriterT w m) where
tokens e ts = lift (tokens e ts)
takeWhileP l f = lift (takeWhileP l f)
takeWhile1P l f = lift (takeWhile1P l f)
takeP l n = lift (takeP l n)
getParserState = lift getParserState
updateParserState f = lift (updateParserState f)
@ -1108,6 +1157,7 @@ instance (Monoid w, MonadParsec e s m) => MonadParsec e s (L.RWST r w st m) wher
tokens e ts = lift (tokens e ts)
takeWhileP l f = lift (takeWhileP l f)
takeWhile1P l f = lift (takeWhile1P l f)
takeP l n = lift (takeP l n)
getParserState = lift getParserState
updateParserState f = lift (updateParserState f)
@ -1131,6 +1181,7 @@ instance (Monoid w, MonadParsec e s m) => MonadParsec e s (S.RWST r w st m) wher
tokens e ts = lift (tokens e ts)
takeWhileP l f = lift (takeWhileP l f)
takeWhile1P l f = lift (takeWhile1P l f)
takeP l n = lift (takeP l n)
getParserState = lift getParserState
updateParserState f = lift (updateParserState f)
@ -1149,6 +1200,7 @@ instance MonadParsec e s m => MonadParsec e s (IdentityT m) where
tokens e ts = lift $ tokens e ts
takeWhileP l f = lift (takeWhileP l f)
takeWhile1P l f = lift (takeWhile1P l f)
takeP l n = lift (takeP l n)
getParserState = lift getParserState
updateParserState f = lift $ updateParserState f
@ -1228,27 +1280,24 @@ region f m = do
Right x -> return x
{-# INLINEABLE region #-}
-- | The same as 'takeWhileP', but discards the result.
-- | Consume the rest of the input and return it as a chunk. This parser
-- never fails, but may return an empty chunk.
--
-- > takeRest = takeWhileP Nothing (const True)
--
-- @since 6.0.0
skipWhileP :: MonadParsec e s m
=> Maybe String -- ^ Name of a single token in the row
-> (Token s -> Bool) -- ^ Predicate to use to test tokens
-> m ()
skipWhileP l f = void (takeWhileP l f)
{-# INLINE skipWhileP #-}
takeRest :: MonadParsec e s m => m (Tokens s)
takeRest = takeWhileP Nothing (const True)
{-# INLINE takeRest #-}
-- | The same as 'takeWhile1P', but discards the result.
-- | Return 'True' when end of input has been reached.
--
-- @since 6.0.0
skipWhile1P :: MonadParsec e s m
=> Maybe String -- ^ Name of a single token in the row
-> (Token s -> Bool) -- ^ Predicate to use to test tokens
-> m ()
skipWhile1P l f = void (takeWhile1P l f)
{-# INLINE skipWhile1P #-}
atEnd :: MonadParsec e s m => m Bool
atEnd = option False (True <$ eof)
{-# INLINE atEnd #-}
----------------------------------------------------------------------------
-- Parser state combinators

View File

@ -49,6 +49,8 @@ where
import Control.Applicative
import Data.Char
import Data.Functor (void)
import Data.Maybe (fromMaybe)
import Data.Proxy
import Data.Word (Word8)
import qualified Text.Megaparsec.Char as C
@ -91,7 +93,7 @@ tab = C.char 9
-- See also: 'skipMany' and 'spaceChar'.
space :: (MonadParsec e s m, Token s ~ Word8) => m ()
space = skipWhileP (Just "white space") isSpace'
space = void $ takeWhileP (Just "white space") isSpace'
{-# INLINE space #-}
-- | Skip /one/ or more white space characters.
@ -99,7 +101,7 @@ space = skipWhileP (Just "white space") isSpace'
-- See also: 'skipSome' and 'spaceChar'.
space1 :: (MonadParsec e s m, Token s ~ Word8) => m ()
space1 = skipWhile1P (Just "white space") isSpace'
space1 = void $ takeWhile1P (Just "white space") isSpace'
{-# INLINE space1 #-}
----------------------------------------------------------------------------
@ -198,12 +200,14 @@ asciiChar = C.satisfy (< 128) <?> "ASCII character"
-- expecting 'E' or 'e'
char' :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s)
char' c = choice [C.char c, C.char (swapCase c)]
char' c = choice
[ C.char c
, C.char (fromMaybe c (swapCase c)) ]
where
swapCase x
| isUpper g = fromChar (toLower g)
| isLower g = fromChar (toUpper g)
| otherwise = x
| otherwise = Nothing
where
g = toChar x
{-# INLINE char' #-}
@ -229,6 +233,9 @@ toChar = chr . fromIntegral
-- | Convert a char to byte.
fromChar :: Char -> Word8
fromChar = fromIntegral . ord
fromChar :: Char -> Maybe Word8
fromChar x = let p = ord x in
if p > 0xff
then Nothing
else Just (fromIntegral p)
{-# INLINE fromChar #-}

View File

@ -61,6 +61,7 @@ where
import Control.Applicative
import Data.Char
import Data.Function (on)
import Data.Functor (void)
import Data.List.NonEmpty (NonEmpty (..))
import Data.Proxy
import qualified Data.CaseInsensitive as CI
@ -109,7 +110,7 @@ tab = char '\t'
-- See also: 'skipMany' and 'spaceChar'.
space :: (MonadParsec e s m, Token s ~ Char) => m ()
space = skipWhileP (Just "white space") isSpace
space = void $ takeWhileP (Just "white space") isSpace
{-# INLINE space #-}
-- | Skip /one/ or more white space characters.
@ -119,7 +120,7 @@ space = skipWhileP (Just "white space") isSpace
-- @since 6.0.0
space1 :: (MonadParsec e s m, Token s ~ Char) => m ()
space1 = skipWhile1P (Just "white space") isSpace
space1 = void $ takeWhile1P (Just "white space") isSpace
{-# INLINE space1 #-}
----------------------------------------------------------------------------

View File

@ -155,7 +155,7 @@ skipLineComment :: (MonadParsec e s m, Token s ~ Char)
=> Tokens s -- ^ Line comment prefix
-> m ()
skipLineComment prefix =
C.string prefix *> skipWhileP (Just "character") (/= '\n')
C.string prefix *> void (takeWhileP (Just "character") (/= '\n'))
-- | @skipBlockComment start end@ skips non-nested block comment starting
-- with @start@ and ending with @end@.

View File

@ -6,6 +6,7 @@ module Text.Megaparsec.ByteSpec (spec) where
import Control.Monad
import Data.ByteString (ByteString)
import Data.Char
import Data.Maybe (fromMaybe)
import Data.Proxy
import Data.Semigroup ((<>))
import Data.Void
@ -238,10 +239,13 @@ toChar = chr . fromIntegral
-- | Covert a char to byte.
fromChar :: Char -> Word8
fromChar = fromIntegral . ord
fromChar :: Char -> Maybe Word8
fromChar x = let p = ord x in
if p > 0xff
then Nothing
else Just (fromIntegral p)
-- | Lift char transformation to byte transformation.
liftChar :: (Char -> Char) -> Word8 -> Word8
liftChar f = fromChar . f . toChar
liftChar f x = (fromMaybe x . fromChar . f . toChar) x

View File

@ -137,19 +137,49 @@ spec = do
stateTokensProcessed + length stateInput }
runParser' p st `shouldBe` (st', Right stateInput)
describe "takeWhile1P" $
it "updates position in stream correctly" $
property $ \st@State {..} -> not (null stateInput) ==> do
let p = takeWhile1P Nothing (const True) :: CustomParser [Span]
st' = st
{ stateInput = []
, statePos =
case stateInput of
[] -> statePos
xs -> let _:|z = statePos in spanEnd (last xs) :| z
, stateTokensProcessed =
stateTokensProcessed + length stateInput }
runParser' p st `shouldBe` (st', Right stateInput)
describe "takeWhile1P" $ do
context "when stream is prefixed with matching tokens" $
it "updates position in stream correctly" $
property $ \st@State {..} -> not (null stateInput) ==> do
let p = takeWhile1P Nothing (const True) :: CustomParser [Span]
st' = st
{ stateInput = []
, statePos =
case stateInput of
[] -> statePos
xs -> let _:|z = statePos in spanEnd (last xs) :| z
, stateTokensProcessed =
stateTokensProcessed + length stateInput }
runParser' p st `shouldBe` (st', Right stateInput)
context "when stream is not prefixed with at least one matching token" $
it "updates position in stream correctly" $
property $ \st@State {..} -> do
let p = takeWhile1P Nothing (const False) :: CustomParser [Span]
fst (runParser' p st) `shouldBe` st
describe "takeP" $ do
context "when stream has enough tokens" $
it "updates position in stream correctly" $
property $ \st@State {..} -> not (null stateInput) ==> do
let p = takeP Nothing (length stateInput) :: CustomParser [Span]
st' = st
{ stateInput = []
, statePos =
case stateInput of
[] -> statePos
xs -> let _:|z = statePos in spanEnd (last xs) :| z
, stateTokensProcessed =
stateTokensProcessed + length stateInput }
runParser' p st `shouldBe` (st', Right stateInput)
context "when stream has not enough tokens" $
it "updates position in stream correctly" $
property $ \st@State {..} -> not (null stateInput) ==> do
let p = takeP Nothing (1 + length stateInput) :: CustomParser [Span]
(pos:|z) = statePos
st' = st
{ statePos = positionAtN
(Proxy :: Proxy [Span]) pos stateInput :| z }
fst (runParser' p st) `shouldBe` st'
describe "getNextTokenPosition" $ do
context "when input stream is empty" $
@ -1014,6 +1044,13 @@ spec = do
pe = err (posN 6 s) (elabel "bar")
grs p s (`shouldFailWith` pe)
grs' p s (`failsLeaving` "")
context "without label (testing hints)" $
it "there are no hints" $ do
let p :: MonadParsec Void String m => m String
p = takeWhileP Nothing (== 'a') <* empty
s = "aaa"
grs p s (`shouldFailWith` err (posN 3 s) mempty)
grs' p s (`failsLeaving` "")
describe "takeWhile1P" $ do
context "when stream is prefixed with matching tokens" $
@ -1034,13 +1071,21 @@ spec = do
pe = err posI (utok '3' <> elabel "foo")
grs p s (`shouldFailWith` pe)
grs' p s (`failsLeaving` s)
context "when stream is empty" $
it "signals correct parse error" $ do
let p :: MonadParsec Void String m => m String
p = takeWhile1P (Just "foo") isLetter
pe = err posI (ueof <> elabel "foo")
grs p "" (`shouldFailWith` pe)
grs' p "" (`failsLeaving` "")
context "when stream is empty" $ do
context "with label" $
it "signals correct parse error" $ do
let p :: MonadParsec Void String m => m String
p = takeWhile1P (Just "foo") isLetter
pe = err posI (ueof <> elabel "foo")
grs p "" (`shouldFailWith` pe)
grs' p "" (`failsLeaving` "")
context "without label" $
it "signals correct parse error" $ do
let p :: MonadParsec Void String m => m String
p = takeWhile1P Nothing isLetter
pe = err posI ueof
grs p "" (`shouldFailWith` pe)
grs' p "" (`failsLeaving` "")
context "with two takeWhile1P in a row (testing hints)" $ do
let p :: MonadParsec Void String m => m String
p = do
@ -1059,6 +1104,69 @@ spec = do
pe = err (posN 6 s) (elabel "bar")
grs p s (`shouldFailWith` pe)
grs' p s (`failsLeaving` "")
context "without label (testing hints)" $
it "there are no hints" $ do
let p :: MonadParsec Void String m => m String
p = takeWhile1P Nothing (== 'a') <* empty
s = "aaa"
grs p s (`shouldFailWith` err (posN 3 s) mempty)
grs' p s (`failsLeaving` "")
describe "takeP" $ do
context "when taking 0 tokens" $ do
context "when stream is empty" $
it "succeeds returning zero-length chunk" $ do
let p :: MonadParsec Void String m => m String
p = takeP Nothing 0
grs p "" (`shouldParse` "")
context "when stream is not empty" $
it "succeeds returning zero-length chunk" $
property $ \s -> not (null s) ==> do
let p :: MonadParsec Void String m => m String
p = takeP Nothing 0
grs p s (`shouldParse` "")
grs' p s (`succeedsLeaving` s)
context "when taking >0 tokens" $ do
context "when stream is empty" $ do
context "with label" $
it "signals correct parse error" $
property $ \(Positive n) -> do
let p :: MonadParsec Void String m => m String
p = takeP (Just "foo") n
pe = err posI (ueof <> elabel "foo")
grs p "" (`shouldFailWith` pe)
grs' p "" (`failsLeaving` "")
context "without label" $
it "signals correct parse error" $
property $ \(Positive n) -> do
let p :: MonadParsec Void String m => m String
p = takeP Nothing n
pe = err posI ueof
grs p "" (`shouldFailWith` pe)
context "when stream has not enough tokens" $
it "signals correct parse error" $
property $ \(Positive n) s -> length s < n && not (null s) ==> do
let p :: MonadParsec Void String m => m String
p = takeP (Just "foo") n
pe = err (posN n s) (ueof <> elabel "foo")
grs p s (`shouldFailWith` pe)
grs' p s (`failsLeaving` s)
context "when stream has enough tokens" $
it "succeeds returning the extracted tokens" $
property $ \(Positive n) s -> length s >= n ==> do
let p :: MonadParsec Void String m => m String
p = takeP (Just "foo") n
(s0,s1) = splitAt n s
grs p s (`shouldParse` s0)
grs' p s (`succeedsLeaving` s1)
context "when failing right after takeP (testing hints)" $
it "there are no hints to influence the parse error" $
property $ \(Positive n) s -> length s >= n ==> do
let p :: MonadParsec Void String m => m String
p = takeP (Just "foo") n <* empty
pe = err (posN n s) mempty
grs p s (`shouldFailWith` pe)
grs' p s (`failsLeaving` drop n s)
describe "derivatives from primitive combinators" $ do
@ -1107,45 +1215,34 @@ spec = do
st = st' { statePos = errorPos e }
runParser' p st `shouldBe` (st { statePos = finalPos }, Left r)
describe "skipWhileP" $ do
describe "takeRest" $
it "returns rest of the input" $
property $ \st@State {..} -> do
let p :: Parser String
p = takeRest
(pos:|z) = statePos
st' = st
{ stateInput = []
, statePos = advanceN
(Proxy :: Proxy String)
stateTabWidth
pos
stateInput :| z
, stateTokensProcessed =
stateTokensProcessed + length stateInput }
runParser' p st `shouldBe` (st', Right stateInput)
describe "atEnd" $ do
let p :: Parser Bool
p = atEnd
context "when stream is empty" $
it "returns True" $
prs p "" `shouldParse` True
context "when stream is not empty" $
it "consumes all matching tokens, zero or more" $
it "returns False" $
property $ \s -> not (null s) ==> do
let p :: MonadParsec Void String m => m ()
p = skipWhileP Nothing isLetter
grs p s (`shouldParse` ())
grs' p s (`succeedsLeaving` dropWhile isLetter s)
context "when stream is empty" $
it "succeeds returning empty chunk" $ do
let p :: MonadParsec Void String m => m ()
p = skipWhileP Nothing isLetter
grs p "" (`shouldParse` ())
grs' p "" (`succeedsLeaving` "")
describe "skipWhile1P" $ do
context "when stream is prefixed with matching tokens" $
it "consumes the tokens" $
property $ \s' -> do
let p :: MonadParsec Void String m => m ()
p = skipWhile1P Nothing isLetter
s = 'a' : s'
grs p s (`shouldParse` ())
grs' p s (`succeedsLeaving` dropWhile isLetter s)
context "when stream is not prefixed with at least one matching token" $
it "signals correct parse error" $
property $ \s' -> do
let p :: MonadParsec Void String m => m ()
p = skipWhile1P (Just "foo") isLetter
s = '3' : s'
pe = err posI (utok '3' <> elabel "foo")
grs p s (`shouldFailWith` pe)
grs' p s (`failsLeaving` s)
context "when stream is empty" $
it "signals correct parse error" $ do
let p :: MonadParsec Void String m => m ()
p = skipWhile1P (Just "foo") isLetter
pe = err posI (ueof <> elabel "foo")
grs p "" (`shouldFailWith` pe)
grs' p "" (`failsLeaving` "")
prs p s `shouldParse` False
prs' p s `succeedsLeaving` s
describe "combinators for manipulating parser state" $ do