diff --git a/CHANGELOG.md b/CHANGELOG.md index a18d1af..b7df1c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,15 @@ * The `count` combinator now works with `Applicative` instances (previously it worked only with instances of `Alternative`). It's now also faster. +* `tokens` and parsers built upon it (such as `string` and `string'`) + backtrack automatically on failure now, that is, when they fail, they + never consume any input. This is done to make their consumption model + match how error messages are reported (which becomes an important thing as + user gets more control with primitives like `withRecovery`). This means, + in particular, that it's no longer necessary to use `try` with + `tokens`-based parsers. This new feature *does not* affect performance in + any way. + * New primitive parser `withRecovery` added. The parser allows to recover from parse errors “on-the-fly” and continue parsing. Once parsing is finished, several parse errors may be reported or ignored altogether. diff --git a/Text/Megaparsec/Combinator.hs b/Text/Megaparsec/Combinator.hs index 5d80449..582438c 100644 --- a/Text/Megaparsec/Combinator.hs +++ b/Text/Megaparsec/Combinator.hs @@ -100,10 +100,7 @@ endBy1 p sep = some (p <* sep) -- parser @end@ succeeds. Returns the list of values returned by @p@. This -- parser can be used to scan comments: -- --- > simpleComment = string "") --- --- Note that we need to use 'try' since parsers @anyChar@ and @string --- \"-->\"@ overlap and @string \"-->\"@ could consume input before failing. +-- > simpleComment = string "") manyTill :: Alternative m => m a -> m end -> m [a] manyTill p end = ([] <$ end) <|> someTill p end diff --git a/Text/Megaparsec/Expr.hs b/Text/Megaparsec/Expr.hs index 1be71fb..a355bee 100644 --- a/Text/Megaparsec/Expr.hs +++ b/Text/Megaparsec/Expr.hs @@ -73,9 +73,6 @@ data Operator m a -- > binary name f = InfixL (reservedOp name >> return f) -- > prefix name f = Prefix (reservedOp name >> return f) -- > postfix name f = Postfix (reservedOp name >> return f) --- --- Please note that multi-character operators should use 'try' in order to --- be reported correctly in error messages. makeExprParser :: MonadParsec s m t => m a -> [[Operator m a]] -> m a makeExprParser = foldl addPrecLevel diff --git a/Text/Megaparsec/Lexer.hs b/Text/Megaparsec/Lexer.hs index 8d807eb..bd2c8ed 100644 --- a/Text/Megaparsec/Lexer.hs +++ b/Text/Megaparsec/Lexer.hs @@ -148,7 +148,7 @@ skipLineComment :: MonadParsec s m Char => String -- ^ Line comment prefix -> m () skipLineComment prefix = p >> void (manyTill C.anyChar n) - where p = try (C.string prefix) + where p = C.string prefix n = lookAhead C.newline -- | @skipBlockComment start end@ skips non-nested block comment starting @@ -159,8 +159,8 @@ skipBlockComment :: MonadParsec s m Char -> String -- ^ End of block comment -> m () skipBlockComment start end = p >> void (manyTill C.anyChar n) - where p = try (C.string start) - n = try (C.string end) + where p = C.string start + n = C.string end -- Indentation diff --git a/Text/Megaparsec/Prim.hs b/Text/Megaparsec/Prim.hs index c85e5e0..b0d3e9f 100644 --- a/Text/Megaparsec/Prim.hs +++ b/Text/Megaparsec/Prim.hs @@ -458,6 +458,11 @@ class (A.Alternative m, Monad m, Stream s t) -- 1:1: -- unexpected "le" -- expecting "let" or "lexical" + -- + -- Please note that as of Megaparsec 4.4.0, 'string' backtracks + -- automatically (see 'tokens'), so it does not need 'try'. However, the + -- examples above demonstrate the idea behind 'try' so well that it was + -- decided to keep them. try :: m a -> m a @@ -524,6 +529,22 @@ class (A.Alternative m, Monad m, Stream s t) -- This can be used for example to write 'Text.Megaparsec.Char.string': -- -- > string = tokens updatePosString (==) + -- + -- Note that beginning from Megaparsec 4.4.0, this is an auto-backtracking + -- primitive, which means that if it fails, it never consumes any + -- input. This is done to make its consumption model match how error + -- messages for this primitive are reported (which becomes an important + -- thing as user gets more control with primitives like 'withRecovery'): + -- + -- >>> parseTest (string "abc") "abd" + -- 1:1: + -- unexpected "abd" + -- expecting "abc" + -- + -- This means, in particular, that it's no longer necessary to use 'try' + -- with 'tokens'-based parsers, such as 'Text.Megaparsec.Char.string' and + -- 'Text.Megaparsec.Char.string''. This new feature /does not/ affect + -- performance in any way. tokens :: Eq t => (Int -> SourcePos -> [t] -> SourcePos) @@ -568,7 +589,8 @@ pLabel l p = ParsecT $ \s cok cerr eok eerr -> in unParser p s cok' cerr eok' eerr' pTry :: ParsecT s m a -> ParsecT s m a -pTry p = ParsecT $ \s cok _ eok eerr -> unParser p s cok eerr eok eerr +pTry p = ParsecT $ \s cok _ eok eerr -> + unParser p s cok eerr eok eerr {-# INLINE pTry #-} pLookAhead :: ParsecT s m a -> ParsecT s m a @@ -632,7 +654,7 @@ pTokens :: Stream s t -> [t] -> ParsecT s m [t] pTokens _ _ [] = ParsecT $ \s _ _ eok _ -> eok [] s mempty -pTokens nextpos test tts = ParsecT $ \s@(State input pos w) cok cerr _ eerr -> +pTokens nextpos test tts = ParsecT $ \s@(State input pos w) cok _ _ eerr -> let r = showToken . reverse errExpect x = setErrorMessage (Expected $ showToken tts) (newErrorMessage (Unexpected x) pos) @@ -641,13 +663,12 @@ pTokens nextpos test tts = ParsecT $ \s@(State input pos w) cok cerr _ eerr -> s' = State rs pos' w in cok (reverse is) s' mempty walk (t:ts) is rs = - let errorCont = if null is then eerr else cerr - what = if null is then eoi else r is + let what = if null is then eoi else r is in case uncons rs of - Nothing -> errorCont (errExpect what) s + Nothing -> eerr (errExpect what) s Just (x,xs) | test t x -> walk ts (x:is) xs - | otherwise -> errorCont (errExpect $ r (x:is)) s + | otherwise -> eerr (errExpect $ r (x:is)) s in walk tts [] input {-# INLINE pTokens #-} diff --git a/old-tests/Bugs/Bug9.hs b/old-tests/Bugs/Bug9.hs index 6f2b607..48ffad7 100644 --- a/old-tests/Bugs/Bug9.hs +++ b/old-tests/Bugs/Bug9.hs @@ -1,4 +1,3 @@ - module Bugs.Bug9 (main) where import Control.Applicative (empty) @@ -41,7 +40,7 @@ integer :: Parser Integer integer = lexeme L.integer operator :: String -> Parser String -operator = try . L.symbol sc +operator = L.symbol sc parseTopLevel :: Parser Expr parseTopLevel = parseExpr <* eof diff --git a/tests/Char.hs b/tests/Char.hs index 166d00e..fb213dc 100644 --- a/tests/Char.hs +++ b/tests/Char.hs @@ -131,8 +131,7 @@ prop_eol s = checkParser eol r s | "\r\n" `isPrefixOf` s = posErr 2 s [uneCh (s !! 2), exEof] | otherwise = posErr 0 s [ uneStr (take 2 s) , uneCh '\r' - , exSpec "crlf newline" - , exSpec "newline" ] + , exSpec "end of line" ] prop_tab :: String -> Property prop_tab = checkChar tab (== '\t') (Just "tab") diff --git a/tests/Prim.hs b/tests/Prim.hs index fd7bffe..7ab6cd6 100644 --- a/tests/Prim.hs +++ b/tests/Prim.hs @@ -34,7 +34,7 @@ import Control.Applicative import Data.Char (isLetter, toUpper) import Data.Foldable (asum) import Data.List (isPrefixOf) -import Data.Maybe (maybeToList, fromMaybe) +import Data.Maybe (maybeToList) import Control.Monad.Cont import Control.Monad.Except @@ -152,7 +152,7 @@ prop_alternative_1 s0 s1 | s0 `isPrefixOf` s1 = checkParser p (posErr s0l s1 [uneCh (s1 !! s0l), exEof]) s1 | otherwise = checkParser p (Right s0) s0 .&&. checkParser p (Right s1) s1 - where p = try (string s0) <|> string s1 + where p = string s0 <|> string s1 s0l = length s0 prop_alternative_2 :: Char -> Char -> Char -> Bool -> Property @@ -166,8 +166,8 @@ prop_alternative_2 a b c l = checkParser p r s prop_alternative_3 :: Property prop_alternative_3 = checkParser p r s - where p = asum [empty, try (string ">>>"), empty, return "foo"] "bar" - p' = bsum [empty, try (string ">>>"), empty, return "foo"] "bar" + where p = asum [empty, string ">>>", empty, return "foo"] "bar" + p' = bsum [empty, string ">>>", empty, return "foo"] "bar" bsum = foldl (<|>) empty r = simpleParse p' s s = ">>" @@ -333,24 +333,23 @@ prop_hidden_0 a' b' c' = checkParser p r s | otherwise = Right s s = abcRow a b c -prop_hidden_1 :: String -> NonEmptyList Char -> String -> Property -prop_hidden_1 a c' s = checkParser p r s +prop_hidden_1 :: NonEmptyList Char -> String -> Property +prop_hidden_1 c' s = checkParser p r s where c = getNonEmpty c' - p = fromMaybe a <$> optional (hidden $ string c) - r | null s = Right a - | c == s = Right s - | head c /= head s = posErr 0 s [uneCh (head s), exEof] - | otherwise = simpleParse (string c) s + cn = length c + p = optional (hidden $ string c) + r | null s = Right Nothing + | c == s = Right (Just s) + | c `isPrefixOf` s = posErr cn s [uneCh (s !! cn), exEof] + | otherwise = posErr 0 s [uneCh (head s), exEof] -prop_try :: String -> String -> String -> Property -prop_try pre s1' s2' = checkParser p r s - where s1 = pre ++ s1' - s2 = pre ++ s2' - p = try (string s1) <|> string s2 - r | s == s1 || s == s2 = Right s - | otherwise = posErr 0 s $ (if null s then uneEof else uneStr pre) - : [uneStr pre, exStr s1, exStr s2] - s = pre +prop_try :: Char -> Char -> Char -> Property +prop_try pre ch1 ch2 = checkParser p r s + where s1 = sequence [char pre, char ch1] + s2 = sequence [char pre, char ch2] + p = try s1 <|> s2 + r = posErr 1 s [uneEof, exCh ch1, exCh ch2] + s = [pre] prop_lookAhead_0 :: Bool -> Bool -> Bool -> Property prop_lookAhead_0 a b c = checkParser p r s @@ -523,15 +522,13 @@ stateFromInput s = State s (initialPos "") defaultTabWidth -- IdentityT instance of MonadParsec -prop_IdentityT_try :: String -> String -> String -> Property -prop_IdentityT_try pre s1' s2' = checkParser (runIdentityT p) r s - where s1 = pre ++ s1' - s2 = pre ++ s2' - p = try (string s1) <|> string s2 - r | s == s1 || s == s2 = Right s - | otherwise = posErr 0 s $ (if null s then uneEof else uneStr pre) - : [uneStr pre, exStr s1, exStr s2] - s = pre +prop_IdentityT_try :: Char -> Char -> Char -> Property +prop_IdentityT_try pre ch1 ch2 = checkParser (runIdentityT p) r s + where s1 = sequence [char pre, char ch1] + s2 = sequence [char pre, char ch2] + p = try s1 <|> s2 + r = posErr 1 s [uneEof, exCh ch1, exCh ch2] + s = [pre] prop_IdentityT_notFollowedBy :: NonNegative Int -> NonNegative Int -> NonNegative Int -> Property @@ -544,17 +541,16 @@ prop_IdentityT_notFollowedBy a' b' c' = checkParser (runIdentityT p) r s -- ReaderT instance of MonadParsec -prop_ReaderT_try :: String -> String -> String -> Property -prop_ReaderT_try pre s1' s2' = checkParser (runReaderT p (s1', s2')) r s - where s1 = pre ++ s1' - s2 = pre ++ s2' - getS1 = asks ((pre ++) . fst) - getS2 = asks ((pre ++) . snd) - p = try (string =<< getS1) <|> (string =<< getS2) - r | s == s1 || s == s2 = Right s - | otherwise = posErr 0 s $ (if null s then uneEof else uneStr pre) - : [uneStr pre, exStr s1, exStr s2] - s = pre +prop_ReaderT_try :: Char -> Char -> Char -> Property +prop_ReaderT_try pre ch1 ch2 = checkParser (runReaderT p (s1, s2)) r s + where s1 = pre : [ch1] + s2 = pre : [ch2] + getS1 = asks fst + getS2 = asks snd + p = try (g =<< getS1) <|> (g =<< getS2) + g = sequence . fmap char + r = posErr 1 s [uneEof, exCh ch1, exCh ch2] + s = [pre] prop_ReaderT_notFollowedBy :: NonNegative Int -> NonNegative Int -> NonNegative Int -> Property