mirror of
https://github.com/mrkkrp/megaparsec.git
synced 2024-12-28 18:54:34 +03:00
Auto-backtracking for ‘tokens’ and friends
This commit is contained in:
parent
4c5aae7098
commit
3baa263eab
@ -11,6 +11,15 @@
|
||||
* The `count` combinator now works with `Applicative` instances (previously
|
||||
it worked only with instances of `Alternative`). It's now also faster.
|
||||
|
||||
* `tokens` and parsers built upon it (such as `string` and `string'`)
|
||||
backtrack automatically on failure now, that is, when they fail, they
|
||||
never consume any input. This is done to make their consumption model
|
||||
match how error messages are reported (which becomes an important thing as
|
||||
user gets more control with primitives like `withRecovery`). This means,
|
||||
in particular, that it's no longer necessary to use `try` with
|
||||
`tokens`-based parsers. This new feature *does not* affect performance in
|
||||
any way.
|
||||
|
||||
* New primitive parser `withRecovery` added. The parser allows to recover
|
||||
from parse errors “on-the-fly” and continue parsing. Once parsing is
|
||||
finished, several parse errors may be reported or ignored altogether.
|
||||
|
@ -100,10 +100,7 @@ endBy1 p sep = some (p <* sep)
|
||||
-- parser @end@ succeeds. Returns the list of values returned by @p@. This
|
||||
-- parser can be used to scan comments:
|
||||
--
|
||||
-- > simpleComment = string "<!--" >> manyTill anyChar (try $ string "-->")
|
||||
--
|
||||
-- Note that we need to use 'try' since parsers @anyChar@ and @string
|
||||
-- \"-->\"@ overlap and @string \"-->\"@ could consume input before failing.
|
||||
-- > simpleComment = string "<!--" >> manyTill anyChar (string "-->")
|
||||
|
||||
manyTill :: Alternative m => m a -> m end -> m [a]
|
||||
manyTill p end = ([] <$ end) <|> someTill p end
|
||||
|
@ -73,9 +73,6 @@ data Operator m a
|
||||
-- > binary name f = InfixL (reservedOp name >> return f)
|
||||
-- > prefix name f = Prefix (reservedOp name >> return f)
|
||||
-- > postfix name f = Postfix (reservedOp name >> return f)
|
||||
--
|
||||
-- Please note that multi-character operators should use 'try' in order to
|
||||
-- be reported correctly in error messages.
|
||||
|
||||
makeExprParser :: MonadParsec s m t => m a -> [[Operator m a]] -> m a
|
||||
makeExprParser = foldl addPrecLevel
|
||||
|
@ -148,7 +148,7 @@ skipLineComment :: MonadParsec s m Char
|
||||
=> String -- ^ Line comment prefix
|
||||
-> m ()
|
||||
skipLineComment prefix = p >> void (manyTill C.anyChar n)
|
||||
where p = try (C.string prefix)
|
||||
where p = C.string prefix
|
||||
n = lookAhead C.newline
|
||||
|
||||
-- | @skipBlockComment start end@ skips non-nested block comment starting
|
||||
@ -159,8 +159,8 @@ skipBlockComment :: MonadParsec s m Char
|
||||
-> String -- ^ End of block comment
|
||||
-> m ()
|
||||
skipBlockComment start end = p >> void (manyTill C.anyChar n)
|
||||
where p = try (C.string start)
|
||||
n = try (C.string end)
|
||||
where p = C.string start
|
||||
n = C.string end
|
||||
|
||||
-- Indentation
|
||||
|
||||
|
@ -458,6 +458,11 @@ class (A.Alternative m, Monad m, Stream s t)
|
||||
-- 1:1:
|
||||
-- unexpected "le"
|
||||
-- expecting "let" or "lexical"
|
||||
--
|
||||
-- Please note that as of Megaparsec 4.4.0, 'string' backtracks
|
||||
-- automatically (see 'tokens'), so it does not need 'try'. However, the
|
||||
-- examples above demonstrate the idea behind 'try' so well that it was
|
||||
-- decided to keep them.
|
||||
|
||||
try :: m a -> m a
|
||||
|
||||
@ -524,6 +529,22 @@ class (A.Alternative m, Monad m, Stream s t)
|
||||
-- This can be used for example to write 'Text.Megaparsec.Char.string':
|
||||
--
|
||||
-- > string = tokens updatePosString (==)
|
||||
--
|
||||
-- Note that beginning from Megaparsec 4.4.0, this is an auto-backtracking
|
||||
-- primitive, which means that if it fails, it never consumes any
|
||||
-- input. This is done to make its consumption model match how error
|
||||
-- messages for this primitive are reported (which becomes an important
|
||||
-- thing as user gets more control with primitives like 'withRecovery'):
|
||||
--
|
||||
-- >>> parseTest (string "abc") "abd"
|
||||
-- 1:1:
|
||||
-- unexpected "abd"
|
||||
-- expecting "abc"
|
||||
--
|
||||
-- This means, in particular, that it's no longer necessary to use 'try'
|
||||
-- with 'tokens'-based parsers, such as 'Text.Megaparsec.Char.string' and
|
||||
-- 'Text.Megaparsec.Char.string''. This new feature /does not/ affect
|
||||
-- performance in any way.
|
||||
|
||||
tokens :: Eq t
|
||||
=> (Int -> SourcePos -> [t] -> SourcePos)
|
||||
@ -568,7 +589,8 @@ pLabel l p = ParsecT $ \s cok cerr eok eerr ->
|
||||
in unParser p s cok' cerr eok' eerr'
|
||||
|
||||
pTry :: ParsecT s m a -> ParsecT s m a
|
||||
pTry p = ParsecT $ \s cok _ eok eerr -> unParser p s cok eerr eok eerr
|
||||
pTry p = ParsecT $ \s cok _ eok eerr ->
|
||||
unParser p s cok eerr eok eerr
|
||||
{-# INLINE pTry #-}
|
||||
|
||||
pLookAhead :: ParsecT s m a -> ParsecT s m a
|
||||
@ -632,7 +654,7 @@ pTokens :: Stream s t
|
||||
-> [t]
|
||||
-> ParsecT s m [t]
|
||||
pTokens _ _ [] = ParsecT $ \s _ _ eok _ -> eok [] s mempty
|
||||
pTokens nextpos test tts = ParsecT $ \s@(State input pos w) cok cerr _ eerr ->
|
||||
pTokens nextpos test tts = ParsecT $ \s@(State input pos w) cok _ _ eerr ->
|
||||
let r = showToken . reverse
|
||||
errExpect x = setErrorMessage (Expected $ showToken tts)
|
||||
(newErrorMessage (Unexpected x) pos)
|
||||
@ -641,13 +663,12 @@ pTokens nextpos test tts = ParsecT $ \s@(State input pos w) cok cerr _ eerr ->
|
||||
s' = State rs pos' w
|
||||
in cok (reverse is) s' mempty
|
||||
walk (t:ts) is rs =
|
||||
let errorCont = if null is then eerr else cerr
|
||||
what = if null is then eoi else r is
|
||||
let what = if null is then eoi else r is
|
||||
in case uncons rs of
|
||||
Nothing -> errorCont (errExpect what) s
|
||||
Nothing -> eerr (errExpect what) s
|
||||
Just (x,xs)
|
||||
| test t x -> walk ts (x:is) xs
|
||||
| otherwise -> errorCont (errExpect $ r (x:is)) s
|
||||
| otherwise -> eerr (errExpect $ r (x:is)) s
|
||||
in walk tts [] input
|
||||
{-# INLINE pTokens #-}
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
module Bugs.Bug9 (main) where
|
||||
|
||||
import Control.Applicative (empty)
|
||||
@ -41,7 +40,7 @@ integer :: Parser Integer
|
||||
integer = lexeme L.integer
|
||||
|
||||
operator :: String -> Parser String
|
||||
operator = try . L.symbol sc
|
||||
operator = L.symbol sc
|
||||
|
||||
parseTopLevel :: Parser Expr
|
||||
parseTopLevel = parseExpr <* eof
|
||||
|
@ -131,8 +131,7 @@ prop_eol s = checkParser eol r s
|
||||
| "\r\n" `isPrefixOf` s = posErr 2 s [uneCh (s !! 2), exEof]
|
||||
| otherwise = posErr 0 s [ uneStr (take 2 s)
|
||||
, uneCh '\r'
|
||||
, exSpec "crlf newline"
|
||||
, exSpec "newline" ]
|
||||
, exSpec "end of line" ]
|
||||
|
||||
prop_tab :: String -> Property
|
||||
prop_tab = checkChar tab (== '\t') (Just "tab")
|
||||
|
@ -34,7 +34,7 @@ import Control.Applicative
|
||||
import Data.Char (isLetter, toUpper)
|
||||
import Data.Foldable (asum)
|
||||
import Data.List (isPrefixOf)
|
||||
import Data.Maybe (maybeToList, fromMaybe)
|
||||
import Data.Maybe (maybeToList)
|
||||
|
||||
import Control.Monad.Cont
|
||||
import Control.Monad.Except
|
||||
@ -152,7 +152,7 @@ prop_alternative_1 s0 s1
|
||||
| s0 `isPrefixOf` s1 =
|
||||
checkParser p (posErr s0l s1 [uneCh (s1 !! s0l), exEof]) s1
|
||||
| otherwise = checkParser p (Right s0) s0 .&&. checkParser p (Right s1) s1
|
||||
where p = try (string s0) <|> string s1
|
||||
where p = string s0 <|> string s1
|
||||
s0l = length s0
|
||||
|
||||
prop_alternative_2 :: Char -> Char -> Char -> Bool -> Property
|
||||
@ -166,8 +166,8 @@ prop_alternative_2 a b c l = checkParser p r s
|
||||
|
||||
prop_alternative_3 :: Property
|
||||
prop_alternative_3 = checkParser p r s
|
||||
where p = asum [empty, try (string ">>>"), empty, return "foo"] <?> "bar"
|
||||
p' = bsum [empty, try (string ">>>"), empty, return "foo"] <?> "bar"
|
||||
where p = asum [empty, string ">>>", empty, return "foo"] <?> "bar"
|
||||
p' = bsum [empty, string ">>>", empty, return "foo"] <?> "bar"
|
||||
bsum = foldl (<|>) empty
|
||||
r = simpleParse p' s
|
||||
s = ">>"
|
||||
@ -333,24 +333,23 @@ prop_hidden_0 a' b' c' = checkParser p r s
|
||||
| otherwise = Right s
|
||||
s = abcRow a b c
|
||||
|
||||
prop_hidden_1 :: String -> NonEmptyList Char -> String -> Property
|
||||
prop_hidden_1 a c' s = checkParser p r s
|
||||
prop_hidden_1 :: NonEmptyList Char -> String -> Property
|
||||
prop_hidden_1 c' s = checkParser p r s
|
||||
where c = getNonEmpty c'
|
||||
p = fromMaybe a <$> optional (hidden $ string c)
|
||||
r | null s = Right a
|
||||
| c == s = Right s
|
||||
| head c /= head s = posErr 0 s [uneCh (head s), exEof]
|
||||
| otherwise = simpleParse (string c) s
|
||||
cn = length c
|
||||
p = optional (hidden $ string c)
|
||||
r | null s = Right Nothing
|
||||
| c == s = Right (Just s)
|
||||
| c `isPrefixOf` s = posErr cn s [uneCh (s !! cn), exEof]
|
||||
| otherwise = posErr 0 s [uneCh (head s), exEof]
|
||||
|
||||
prop_try :: String -> String -> String -> Property
|
||||
prop_try pre s1' s2' = checkParser p r s
|
||||
where s1 = pre ++ s1'
|
||||
s2 = pre ++ s2'
|
||||
p = try (string s1) <|> string s2
|
||||
r | s == s1 || s == s2 = Right s
|
||||
| otherwise = posErr 0 s $ (if null s then uneEof else uneStr pre)
|
||||
: [uneStr pre, exStr s1, exStr s2]
|
||||
s = pre
|
||||
prop_try :: Char -> Char -> Char -> Property
|
||||
prop_try pre ch1 ch2 = checkParser p r s
|
||||
where s1 = sequence [char pre, char ch1]
|
||||
s2 = sequence [char pre, char ch2]
|
||||
p = try s1 <|> s2
|
||||
r = posErr 1 s [uneEof, exCh ch1, exCh ch2]
|
||||
s = [pre]
|
||||
|
||||
prop_lookAhead_0 :: Bool -> Bool -> Bool -> Property
|
||||
prop_lookAhead_0 a b c = checkParser p r s
|
||||
@ -523,15 +522,13 @@ stateFromInput s = State s (initialPos "") defaultTabWidth
|
||||
|
||||
-- IdentityT instance of MonadParsec
|
||||
|
||||
prop_IdentityT_try :: String -> String -> String -> Property
|
||||
prop_IdentityT_try pre s1' s2' = checkParser (runIdentityT p) r s
|
||||
where s1 = pre ++ s1'
|
||||
s2 = pre ++ s2'
|
||||
p = try (string s1) <|> string s2
|
||||
r | s == s1 || s == s2 = Right s
|
||||
| otherwise = posErr 0 s $ (if null s then uneEof else uneStr pre)
|
||||
: [uneStr pre, exStr s1, exStr s2]
|
||||
s = pre
|
||||
prop_IdentityT_try :: Char -> Char -> Char -> Property
|
||||
prop_IdentityT_try pre ch1 ch2 = checkParser (runIdentityT p) r s
|
||||
where s1 = sequence [char pre, char ch1]
|
||||
s2 = sequence [char pre, char ch2]
|
||||
p = try s1 <|> s2
|
||||
r = posErr 1 s [uneEof, exCh ch1, exCh ch2]
|
||||
s = [pre]
|
||||
|
||||
prop_IdentityT_notFollowedBy :: NonNegative Int -> NonNegative Int
|
||||
-> NonNegative Int -> Property
|
||||
@ -544,17 +541,16 @@ prop_IdentityT_notFollowedBy a' b' c' = checkParser (runIdentityT p) r s
|
||||
|
||||
-- ReaderT instance of MonadParsec
|
||||
|
||||
prop_ReaderT_try :: String -> String -> String -> Property
|
||||
prop_ReaderT_try pre s1' s2' = checkParser (runReaderT p (s1', s2')) r s
|
||||
where s1 = pre ++ s1'
|
||||
s2 = pre ++ s2'
|
||||
getS1 = asks ((pre ++) . fst)
|
||||
getS2 = asks ((pre ++) . snd)
|
||||
p = try (string =<< getS1) <|> (string =<< getS2)
|
||||
r | s == s1 || s == s2 = Right s
|
||||
| otherwise = posErr 0 s $ (if null s then uneEof else uneStr pre)
|
||||
: [uneStr pre, exStr s1, exStr s2]
|
||||
s = pre
|
||||
prop_ReaderT_try :: Char -> Char -> Char -> Property
|
||||
prop_ReaderT_try pre ch1 ch2 = checkParser (runReaderT p (s1, s2)) r s
|
||||
where s1 = pre : [ch1]
|
||||
s2 = pre : [ch2]
|
||||
getS1 = asks fst
|
||||
getS2 = asks snd
|
||||
p = try (g =<< getS1) <|> (g =<< getS2)
|
||||
g = sequence . fmap char
|
||||
r = posErr 1 s [uneEof, exCh ch1, exCh ch2]
|
||||
s = [pre]
|
||||
|
||||
prop_ReaderT_notFollowedBy :: NonNegative Int -> NonNegative Int
|
||||
-> NonNegative Int -> Property
|
||||
|
Loading…
Reference in New Issue
Block a user