Auto-backtracking for ‘tokens’ and friends

This commit is contained in:
mrkkrp 2016-02-17 23:32:48 +06:00
parent 4c5aae7098
commit 3baa263eab
8 changed files with 78 additions and 60 deletions

View File

@ -11,6 +11,15 @@
* The `count` combinator now works with `Applicative` instances (previously
it worked only with instances of `Alternative`). It's now also faster.
* `tokens` and parsers built upon it (such as `string` and `string'`)
backtrack automatically on failure now, that is, when they fail, they
never consume any input. This is done to make their consumption model
match how error messages are reported (which becomes an important thing as
user gets more control with primitives like `withRecovery`). This means,
in particular, that it's no longer necessary to use `try` with
`tokens`-based parsers. This new feature *does not* affect performance in
any way.
* New primitive parser `withRecovery` added. The parser allows to recover
from parse errors “on-the-fly” and continue parsing. Once parsing is
finished, several parse errors may be reported or ignored altogether.

View File

@ -100,10 +100,7 @@ endBy1 p sep = some (p <* sep)
-- parser @end@ succeeds. Returns the list of values returned by @p@. This
-- parser can be used to scan comments:
--
-- > simpleComment = string "<!--" >> manyTill anyChar (try $ string "-->")
--
-- Note that we need to use 'try' since parsers @anyChar@ and @string
-- \"-->\"@ overlap and @string \"-->\"@ could consume input before failing.
-- > simpleComment = string "<!--" >> manyTill anyChar (string "-->")
manyTill :: Alternative m => m a -> m end -> m [a]
manyTill p end = ([] <$ end) <|> someTill p end

View File

@ -73,9 +73,6 @@ data Operator m a
-- > binary name f = InfixL (reservedOp name >> return f)
-- > prefix name f = Prefix (reservedOp name >> return f)
-- > postfix name f = Postfix (reservedOp name >> return f)
--
-- Please note that multi-character operators should use 'try' in order to
-- be reported correctly in error messages.
makeExprParser :: MonadParsec s m t => m a -> [[Operator m a]] -> m a
makeExprParser = foldl addPrecLevel

View File

@ -148,7 +148,7 @@ skipLineComment :: MonadParsec s m Char
=> String -- ^ Line comment prefix
-> m ()
skipLineComment prefix = p >> void (manyTill C.anyChar n)
where p = try (C.string prefix)
where p = C.string prefix
n = lookAhead C.newline
-- | @skipBlockComment start end@ skips non-nested block comment starting
@ -159,8 +159,8 @@ skipBlockComment :: MonadParsec s m Char
-> String -- ^ End of block comment
-> m ()
skipBlockComment start end = p >> void (manyTill C.anyChar n)
where p = try (C.string start)
n = try (C.string end)
where p = C.string start
n = C.string end
-- Indentation

View File

@ -458,6 +458,11 @@ class (A.Alternative m, Monad m, Stream s t)
-- 1:1:
-- unexpected "le"
-- expecting "let" or "lexical"
--
-- Please note that as of Megaparsec 4.4.0, 'string' backtracks
-- automatically (see 'tokens'), so it does not need 'try'. However, the
-- examples above demonstrate the idea behind 'try' so well that it was
-- decided to keep them.
try :: m a -> m a
@ -524,6 +529,22 @@ class (A.Alternative m, Monad m, Stream s t)
-- This can be used for example to write 'Text.Megaparsec.Char.string':
--
-- > string = tokens updatePosString (==)
--
-- Note that beginning from Megaparsec 4.4.0, this is an auto-backtracking
-- primitive, which means that if it fails, it never consumes any
-- input. This is done to make its consumption model match how error
-- messages for this primitive are reported (which becomes an important
-- thing as user gets more control with primitives like 'withRecovery'):
--
-- >>> parseTest (string "abc") "abd"
-- 1:1:
-- unexpected "abd"
-- expecting "abc"
--
-- This means, in particular, that it's no longer necessary to use 'try'
-- with 'tokens'-based parsers, such as 'Text.Megaparsec.Char.string' and
-- 'Text.Megaparsec.Char.string''. This new feature /does not/ affect
-- performance in any way.
tokens :: Eq t
=> (Int -> SourcePos -> [t] -> SourcePos)
@ -568,7 +589,8 @@ pLabel l p = ParsecT $ \s cok cerr eok eerr ->
in unParser p s cok' cerr eok' eerr'
pTry :: ParsecT s m a -> ParsecT s m a
pTry p = ParsecT $ \s cok _ eok eerr -> unParser p s cok eerr eok eerr
pTry p = ParsecT $ \s cok _ eok eerr ->
unParser p s cok eerr eok eerr
{-# INLINE pTry #-}
pLookAhead :: ParsecT s m a -> ParsecT s m a
@ -632,7 +654,7 @@ pTokens :: Stream s t
-> [t]
-> ParsecT s m [t]
pTokens _ _ [] = ParsecT $ \s _ _ eok _ -> eok [] s mempty
pTokens nextpos test tts = ParsecT $ \s@(State input pos w) cok cerr _ eerr ->
pTokens nextpos test tts = ParsecT $ \s@(State input pos w) cok _ _ eerr ->
let r = showToken . reverse
errExpect x = setErrorMessage (Expected $ showToken tts)
(newErrorMessage (Unexpected x) pos)
@ -641,13 +663,12 @@ pTokens nextpos test tts = ParsecT $ \s@(State input pos w) cok cerr _ eerr ->
s' = State rs pos' w
in cok (reverse is) s' mempty
walk (t:ts) is rs =
let errorCont = if null is then eerr else cerr
what = if null is then eoi else r is
let what = if null is then eoi else r is
in case uncons rs of
Nothing -> errorCont (errExpect what) s
Nothing -> eerr (errExpect what) s
Just (x,xs)
| test t x -> walk ts (x:is) xs
| otherwise -> errorCont (errExpect $ r (x:is)) s
| otherwise -> eerr (errExpect $ r (x:is)) s
in walk tts [] input
{-# INLINE pTokens #-}

View File

@ -1,4 +1,3 @@
module Bugs.Bug9 (main) where
import Control.Applicative (empty)
@ -41,7 +40,7 @@ integer :: Parser Integer
integer = lexeme L.integer
operator :: String -> Parser String
operator = try . L.symbol sc
operator = L.symbol sc
parseTopLevel :: Parser Expr
parseTopLevel = parseExpr <* eof

View File

@ -131,8 +131,7 @@ prop_eol s = checkParser eol r s
| "\r\n" `isPrefixOf` s = posErr 2 s [uneCh (s !! 2), exEof]
| otherwise = posErr 0 s [ uneStr (take 2 s)
, uneCh '\r'
, exSpec "crlf newline"
, exSpec "newline" ]
, exSpec "end of line" ]
prop_tab :: String -> Property
prop_tab = checkChar tab (== '\t') (Just "tab")

View File

@ -34,7 +34,7 @@ import Control.Applicative
import Data.Char (isLetter, toUpper)
import Data.Foldable (asum)
import Data.List (isPrefixOf)
import Data.Maybe (maybeToList, fromMaybe)
import Data.Maybe (maybeToList)
import Control.Monad.Cont
import Control.Monad.Except
@ -152,7 +152,7 @@ prop_alternative_1 s0 s1
| s0 `isPrefixOf` s1 =
checkParser p (posErr s0l s1 [uneCh (s1 !! s0l), exEof]) s1
| otherwise = checkParser p (Right s0) s0 .&&. checkParser p (Right s1) s1
where p = try (string s0) <|> string s1
where p = string s0 <|> string s1
s0l = length s0
prop_alternative_2 :: Char -> Char -> Char -> Bool -> Property
@ -166,8 +166,8 @@ prop_alternative_2 a b c l = checkParser p r s
prop_alternative_3 :: Property
prop_alternative_3 = checkParser p r s
where p = asum [empty, try (string ">>>"), empty, return "foo"] <?> "bar"
p' = bsum [empty, try (string ">>>"), empty, return "foo"] <?> "bar"
where p = asum [empty, string ">>>", empty, return "foo"] <?> "bar"
p' = bsum [empty, string ">>>", empty, return "foo"] <?> "bar"
bsum = foldl (<|>) empty
r = simpleParse p' s
s = ">>"
@ -333,24 +333,23 @@ prop_hidden_0 a' b' c' = checkParser p r s
| otherwise = Right s
s = abcRow a b c
prop_hidden_1 :: String -> NonEmptyList Char -> String -> Property
prop_hidden_1 a c' s = checkParser p r s
prop_hidden_1 :: NonEmptyList Char -> String -> Property
prop_hidden_1 c' s = checkParser p r s
where c = getNonEmpty c'
p = fromMaybe a <$> optional (hidden $ string c)
r | null s = Right a
| c == s = Right s
| head c /= head s = posErr 0 s [uneCh (head s), exEof]
| otherwise = simpleParse (string c) s
cn = length c
p = optional (hidden $ string c)
r | null s = Right Nothing
| c == s = Right (Just s)
| c `isPrefixOf` s = posErr cn s [uneCh (s !! cn), exEof]
| otherwise = posErr 0 s [uneCh (head s), exEof]
prop_try :: String -> String -> String -> Property
prop_try pre s1' s2' = checkParser p r s
where s1 = pre ++ s1'
s2 = pre ++ s2'
p = try (string s1) <|> string s2
r | s == s1 || s == s2 = Right s
| otherwise = posErr 0 s $ (if null s then uneEof else uneStr pre)
: [uneStr pre, exStr s1, exStr s2]
s = pre
prop_try :: Char -> Char -> Char -> Property
prop_try pre ch1 ch2 = checkParser p r s
where s1 = sequence [char pre, char ch1]
s2 = sequence [char pre, char ch2]
p = try s1 <|> s2
r = posErr 1 s [uneEof, exCh ch1, exCh ch2]
s = [pre]
prop_lookAhead_0 :: Bool -> Bool -> Bool -> Property
prop_lookAhead_0 a b c = checkParser p r s
@ -523,15 +522,13 @@ stateFromInput s = State s (initialPos "") defaultTabWidth
-- IdentityT instance of MonadParsec
prop_IdentityT_try :: String -> String -> String -> Property
prop_IdentityT_try pre s1' s2' = checkParser (runIdentityT p) r s
where s1 = pre ++ s1'
s2 = pre ++ s2'
p = try (string s1) <|> string s2
r | s == s1 || s == s2 = Right s
| otherwise = posErr 0 s $ (if null s then uneEof else uneStr pre)
: [uneStr pre, exStr s1, exStr s2]
s = pre
prop_IdentityT_try :: Char -> Char -> Char -> Property
prop_IdentityT_try pre ch1 ch2 = checkParser (runIdentityT p) r s
where s1 = sequence [char pre, char ch1]
s2 = sequence [char pre, char ch2]
p = try s1 <|> s2
r = posErr 1 s [uneEof, exCh ch1, exCh ch2]
s = [pre]
prop_IdentityT_notFollowedBy :: NonNegative Int -> NonNegative Int
-> NonNegative Int -> Property
@ -544,17 +541,16 @@ prop_IdentityT_notFollowedBy a' b' c' = checkParser (runIdentityT p) r s
-- ReaderT instance of MonadParsec
prop_ReaderT_try :: String -> String -> String -> Property
prop_ReaderT_try pre s1' s2' = checkParser (runReaderT p (s1', s2')) r s
where s1 = pre ++ s1'
s2 = pre ++ s2'
getS1 = asks ((pre ++) . fst)
getS2 = asks ((pre ++) . snd)
p = try (string =<< getS1) <|> (string =<< getS2)
r | s == s1 || s == s2 = Right s
| otherwise = posErr 0 s $ (if null s then uneEof else uneStr pre)
: [uneStr pre, exStr s1, exStr s2]
s = pre
prop_ReaderT_try :: Char -> Char -> Char -> Property
prop_ReaderT_try pre ch1 ch2 = checkParser (runReaderT p (s1, s2)) r s
where s1 = pre : [ch1]
s2 = pre : [ch2]
getS1 = asks fst
getS2 = asks snd
p = try (g =<< getS1) <|> (g =<< getS2)
g = sequence . fmap char
r = posErr 1 s [uneEof, exCh ch1, exCh ch2]
s = [pre]
prop_ReaderT_notFollowedBy :: NonNegative Int -> NonNegative Int
-> NonNegative Int -> Property