From 3edbe9f54aca995de9139c7b0b8c6ff8ed72ce8c Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Tue, 9 Feb 2016 13:55:53 +0600 Subject: [PATCH 1/7] Implement the recovery feature Close #80. --- Text/Megaparsec.hs | 1 + Text/Megaparsec/Prim.hs | 62 +++++++++++++++++++++++++++++++++++------ stack.yaml | 2 +- 3 files changed, 56 insertions(+), 9 deletions(-) diff --git a/Text/Megaparsec.hs b/Text/Megaparsec.hs index 1010c0b..70698d9 100644 --- a/Text/Megaparsec.hs +++ b/Text/Megaparsec.hs @@ -67,6 +67,7 @@ module Text.Megaparsec , try , lookAhead , notFollowedBy + , withRecovery , eof , token , tokens diff --git a/Text/Megaparsec/Prim.hs b/Text/Megaparsec/Prim.hs index fc89a1a..c85e5e0 100644 --- a/Text/Megaparsec/Prim.hs +++ b/Text/Megaparsec/Prim.hs @@ -388,9 +388,9 @@ pZero = ParsecT $ \s@(State _ pos _) _ _ _ eerr -> pPlus :: ParsecT s m a -> ParsecT s m a -> ParsecT s m a pPlus m n = ParsecT $ \s cok cerr eok eerr -> let meerr err ms = - let ncerr err' s' = cerr (mergeError err' err) (longestMatch ms s') + let ncerr err' s' = cerr (err' <> err) (longestMatch ms s') neok x s' hs = eok x s' (toHints err <> hs) - neerr err' s' = eerr (mergeError err' err) (longestMatch ms s') + neerr err' s' = eerr (err' <> err) (longestMatch ms s') in unParser n s cok ncerr neok neerr in unParser m s cok cerr eok meerr {-# INLINE pPlus #-} @@ -474,6 +474,21 @@ class (A.Alternative m, Monad m, Stream s t) notFollowedBy :: m a -> m () + -- | @withRecovery r p@ allows continue parsing even if parser @p@ + -- fails. In this case @r@ is called with actual 'ParseError' as its + -- argument. Typical usage is to return value signifying failure to parse + -- this particular object and to consume some part of input up to start of + -- next object. + -- + -- Note that if @r@ fails, original error message is reported as if + -- without 'withRecovery'. In no way recovering parser @r@ can influence + -- error messages. + + withRecovery + :: (ParseError -> m a) -- ^ How to recover from failure + -> m a -- ^ Original parser + -> m a -- ^ Parser that can recover from failures + -- | This parser only succeeds at the end of the input. eof :: m () @@ -533,6 +548,7 @@ instance Stream s t => MonadParsec s (ParsecT s m) t where try = pTry lookAhead = pLookAhead notFollowedBy = pNotFollowedBy + withRecovery = pWithRecovery eof = pEof token = pToken tokens = pTokens @@ -570,6 +586,24 @@ pNotFollowedBy p = ParsecT $ \s@(State input pos _) _ _ eok eerr -> eerr' _ _ = eok () s mempty in unParser p s cok' cerr' eok' eerr' +pWithRecovery :: Stream s t + => (ParseError -> ParsecT s m a) + -> ParsecT s m a + -> ParsecT s m a +pWithRecovery r p = ParsecT $ \s cok cerr eok eerr -> + let mcerr err ms = + let rcerr _ _ = cerr err ms + reok x s' _ = eok x s' (toHints err) + reerr _ _ = cerr err ms + in unParser (r err) ms cok rcerr reok reerr + meerr err ms = + let rcerr _ _ = eerr err ms + reok x s' _ = eok x s' (toHints err) + reerr _ _ = eerr err ms + in unParser (r err) ms cok rcerr reok reerr + in unParser p s cok mcerr eok meerr +{-# INLINE pWithRecovery #-} + pEof :: Stream s t => ParsecT s m () pEof = label eoi $ ParsecT $ \s@(State input pos _) _ _ eok eerr -> case uncons input of @@ -830,13 +864,15 @@ parseFromFile p filename = runParser p filename <$> fromFile filename instance (MonadPlus m, MonadParsec s m t) => MonadParsec s (L.StateT e m) t where + failure = lift . failure label n (L.StateT m) = L.StateT $ label n . m try (L.StateT m) = L.StateT $ try . m lookAhead (L.StateT m) = L.StateT $ \s -> (,s) . fst <$> lookAhead (m s) notFollowedBy (L.StateT m) = L.StateT $ \s -> notFollowedBy (fst <$> m s) >> return ((),s) - failure = lift . failure + withRecovery r (L.StateT m) = L.StateT $ \s -> + withRecovery (\e -> L.runStateT (r e) s) (m s) eof = lift eof token f e = lift $ token f e tokens f e ts = lift $ tokens f e ts @@ -845,13 +881,15 @@ instance (MonadPlus m, MonadParsec s m t) => instance (MonadPlus m, MonadParsec s m t) => MonadParsec s (S.StateT e m) t where + failure = lift . failure label n (S.StateT m) = S.StateT $ label n . m try (S.StateT m) = S.StateT $ try . m lookAhead (S.StateT m) = S.StateT $ \s -> (,s) . fst <$> lookAhead (m s) notFollowedBy (S.StateT m) = S.StateT $ \s -> notFollowedBy (fst <$> m s) >> return ((),s) - failure = lift . failure + withRecovery r (S.StateT m) = S.StateT $ \s -> + withRecovery (\e -> S.runStateT (r e) s) (m s) eof = lift eof token f e = lift $ token f e tokens f e ts = lift $ tokens f e ts @@ -860,11 +898,13 @@ instance (MonadPlus m, MonadParsec s m t) instance (MonadPlus m, MonadParsec s m t) => MonadParsec s (L.ReaderT e m) t where + failure = lift . failure label n (L.ReaderT m) = L.ReaderT $ label n . m try (L.ReaderT m) = L.ReaderT $ try . m lookAhead (L.ReaderT m) = L.ReaderT $ lookAhead . m notFollowedBy (L.ReaderT m) = L.ReaderT $ notFollowedBy . m - failure = lift . failure + withRecovery r (L.ReaderT m) = L.ReaderT $ \s -> + withRecovery (\e -> L.runReaderT (r e) s) (m s) eof = lift eof token f e = lift $ token f e tokens f e ts = lift $ tokens f e ts @@ -873,13 +913,15 @@ instance (MonadPlus m, MonadParsec s m t) instance (MonadPlus m, Monoid w, MonadParsec s m t) => MonadParsec s (L.WriterT w m) t where + failure = lift . failure label n (L.WriterT m) = L.WriterT $ label n m try (L.WriterT m) = L.WriterT $ try m lookAhead (L.WriterT m) = L.WriterT $ (,mempty) . fst <$> lookAhead m notFollowedBy (L.WriterT m) = L.WriterT $ (,mempty) <$> notFollowedBy (fst <$> m) - failure = lift . failure + withRecovery r (L.WriterT m) = L.WriterT $ + withRecovery (L.runWriterT . r) m eof = lift eof token f e = lift $ token f e tokens f e ts = lift $ tokens f e ts @@ -888,13 +930,15 @@ instance (MonadPlus m, Monoid w, MonadParsec s m t) instance (MonadPlus m, Monoid w, MonadParsec s m t) => MonadParsec s (S.WriterT w m) t where + failure = lift . failure label n (S.WriterT m) = S.WriterT $ label n m try (S.WriterT m) = S.WriterT $ try m lookAhead (S.WriterT m) = S.WriterT $ (,mempty) . fst <$> lookAhead m notFollowedBy (S.WriterT m) = S.WriterT $ (,mempty) <$> notFollowedBy (fst <$> m) - failure = lift . failure + withRecovery r (S.WriterT m) = S.WriterT $ + withRecovery (S.runWriterT . r) m eof = lift eof token f e = lift $ token f e tokens f e ts = lift $ tokens f e ts @@ -903,11 +947,13 @@ instance (MonadPlus m, Monoid w, MonadParsec s m t) instance (Monad m, MonadParsec s m t) => MonadParsec s (IdentityT m) t where + failure = lift . failure label n (IdentityT m) = IdentityT $ label n m try = IdentityT . try . runIdentityT lookAhead (IdentityT m) = IdentityT $ lookAhead m notFollowedBy (IdentityT m) = IdentityT $ notFollowedBy m - failure = lift . failure + withRecovery r (IdentityT m) = IdentityT $ + withRecovery (runIdentityT . r) m eof = lift eof token f e = lift $ token f e tokens f e ts = lift $ tokens f e ts diff --git a/stack.yaml b/stack.yaml index 3a5b901..2275083 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,3 +1,3 @@ -resolver: lts-4.0 +resolver: nightly-2016-02-08 packages: - '.' From 4c5aae709824e9cd84bc48efcffdd427300af02e Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Fri, 12 Feb 2016 22:12:30 +0600 Subject: [PATCH 2/7] =?UTF-8?q?Update=20=E2=80=98CHANGELOG.md=E2=80=99=20a?= =?UTF-8?q?nd=20=E2=80=98README.md=E2=80=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 4 ++++ README.md | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17936fb..a18d1af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ * The `count` combinator now works with `Applicative` instances (previously it worked only with instances of `Alternative`). It's now also faster. +* New primitive parser `withRecovery` added. The parser allows to recover + from parse errors “on-the-fly” and continue parsing. Once parsing is + finished, several parse errors may be reported or ignored altogether. + ## Megaparsec 4.3.0 * Canonicalized `Applicative`/`Monad` instances. Thanks to Herbert Valerio diff --git a/README.md b/README.md index d864af4..65de215 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,10 @@ via combination of these primitives: * `notFollowedBy` succeeds when its argument fails, it does not consume input. +* `withRecovery` allows to recover from parse errors “on-the-fly” and + continue parsing. Once parsing is finished, several parse errors may be + reported or ignored altogether. + * `eof` only succeeds at the end of input. * `token` is used to parse single token. From 3baa263eab4732315368c57216085e88f1c9aa3e Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Wed, 17 Feb 2016 23:32:48 +0600 Subject: [PATCH 3/7] =?UTF-8?q?Auto-backtracking=20for=20=E2=80=98tokens?= =?UTF-8?q?=E2=80=99=20and=20friends?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 9 +++++ Text/Megaparsec/Combinator.hs | 5 +-- Text/Megaparsec/Expr.hs | 3 -- Text/Megaparsec/Lexer.hs | 6 +-- Text/Megaparsec/Prim.hs | 33 ++++++++++++--- old-tests/Bugs/Bug9.hs | 3 +- tests/Char.hs | 3 +- tests/Prim.hs | 76 +++++++++++++++++------------------ 8 files changed, 78 insertions(+), 60 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a18d1af..b7df1c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,15 @@ * The `count` combinator now works with `Applicative` instances (previously it worked only with instances of `Alternative`). It's now also faster. +* `tokens` and parsers built upon it (such as `string` and `string'`) + backtrack automatically on failure now, that is, when they fail, they + never consume any input. This is done to make their consumption model + match how error messages are reported (which becomes an important thing as + user gets more control with primitives like `withRecovery`). This means, + in particular, that it's no longer necessary to use `try` with + `tokens`-based parsers. This new feature *does not* affect performance in + any way. + * New primitive parser `withRecovery` added. The parser allows to recover from parse errors “on-the-fly” and continue parsing. Once parsing is finished, several parse errors may be reported or ignored altogether. diff --git a/Text/Megaparsec/Combinator.hs b/Text/Megaparsec/Combinator.hs index 5d80449..582438c 100644 --- a/Text/Megaparsec/Combinator.hs +++ b/Text/Megaparsec/Combinator.hs @@ -100,10 +100,7 @@ endBy1 p sep = some (p <* sep) -- parser @end@ succeeds. Returns the list of values returned by @p@. This -- parser can be used to scan comments: -- --- > simpleComment = string "") --- --- Note that we need to use 'try' since parsers @anyChar@ and @string --- \"-->\"@ overlap and @string \"-->\"@ could consume input before failing. +-- > simpleComment = string "") manyTill :: Alternative m => m a -> m end -> m [a] manyTill p end = ([] <$ end) <|> someTill p end diff --git a/Text/Megaparsec/Expr.hs b/Text/Megaparsec/Expr.hs index 1be71fb..a355bee 100644 --- a/Text/Megaparsec/Expr.hs +++ b/Text/Megaparsec/Expr.hs @@ -73,9 +73,6 @@ data Operator m a -- > binary name f = InfixL (reservedOp name >> return f) -- > prefix name f = Prefix (reservedOp name >> return f) -- > postfix name f = Postfix (reservedOp name >> return f) --- --- Please note that multi-character operators should use 'try' in order to --- be reported correctly in error messages. makeExprParser :: MonadParsec s m t => m a -> [[Operator m a]] -> m a makeExprParser = foldl addPrecLevel diff --git a/Text/Megaparsec/Lexer.hs b/Text/Megaparsec/Lexer.hs index 8d807eb..bd2c8ed 100644 --- a/Text/Megaparsec/Lexer.hs +++ b/Text/Megaparsec/Lexer.hs @@ -148,7 +148,7 @@ skipLineComment :: MonadParsec s m Char => String -- ^ Line comment prefix -> m () skipLineComment prefix = p >> void (manyTill C.anyChar n) - where p = try (C.string prefix) + where p = C.string prefix n = lookAhead C.newline -- | @skipBlockComment start end@ skips non-nested block comment starting @@ -159,8 +159,8 @@ skipBlockComment :: MonadParsec s m Char -> String -- ^ End of block comment -> m () skipBlockComment start end = p >> void (manyTill C.anyChar n) - where p = try (C.string start) - n = try (C.string end) + where p = C.string start + n = C.string end -- Indentation diff --git a/Text/Megaparsec/Prim.hs b/Text/Megaparsec/Prim.hs index c85e5e0..b0d3e9f 100644 --- a/Text/Megaparsec/Prim.hs +++ b/Text/Megaparsec/Prim.hs @@ -458,6 +458,11 @@ class (A.Alternative m, Monad m, Stream s t) -- 1:1: -- unexpected "le" -- expecting "let" or "lexical" + -- + -- Please note that as of Megaparsec 4.4.0, 'string' backtracks + -- automatically (see 'tokens'), so it does not need 'try'. However, the + -- examples above demonstrate the idea behind 'try' so well that it was + -- decided to keep them. try :: m a -> m a @@ -524,6 +529,22 @@ class (A.Alternative m, Monad m, Stream s t) -- This can be used for example to write 'Text.Megaparsec.Char.string': -- -- > string = tokens updatePosString (==) + -- + -- Note that beginning from Megaparsec 4.4.0, this is an auto-backtracking + -- primitive, which means that if it fails, it never consumes any + -- input. This is done to make its consumption model match how error + -- messages for this primitive are reported (which becomes an important + -- thing as user gets more control with primitives like 'withRecovery'): + -- + -- >>> parseTest (string "abc") "abd" + -- 1:1: + -- unexpected "abd" + -- expecting "abc" + -- + -- This means, in particular, that it's no longer necessary to use 'try' + -- with 'tokens'-based parsers, such as 'Text.Megaparsec.Char.string' and + -- 'Text.Megaparsec.Char.string''. This new feature /does not/ affect + -- performance in any way. tokens :: Eq t => (Int -> SourcePos -> [t] -> SourcePos) @@ -568,7 +589,8 @@ pLabel l p = ParsecT $ \s cok cerr eok eerr -> in unParser p s cok' cerr eok' eerr' pTry :: ParsecT s m a -> ParsecT s m a -pTry p = ParsecT $ \s cok _ eok eerr -> unParser p s cok eerr eok eerr +pTry p = ParsecT $ \s cok _ eok eerr -> + unParser p s cok eerr eok eerr {-# INLINE pTry #-} pLookAhead :: ParsecT s m a -> ParsecT s m a @@ -632,7 +654,7 @@ pTokens :: Stream s t -> [t] -> ParsecT s m [t] pTokens _ _ [] = ParsecT $ \s _ _ eok _ -> eok [] s mempty -pTokens nextpos test tts = ParsecT $ \s@(State input pos w) cok cerr _ eerr -> +pTokens nextpos test tts = ParsecT $ \s@(State input pos w) cok _ _ eerr -> let r = showToken . reverse errExpect x = setErrorMessage (Expected $ showToken tts) (newErrorMessage (Unexpected x) pos) @@ -641,13 +663,12 @@ pTokens nextpos test tts = ParsecT $ \s@(State input pos w) cok cerr _ eerr -> s' = State rs pos' w in cok (reverse is) s' mempty walk (t:ts) is rs = - let errorCont = if null is then eerr else cerr - what = if null is then eoi else r is + let what = if null is then eoi else r is in case uncons rs of - Nothing -> errorCont (errExpect what) s + Nothing -> eerr (errExpect what) s Just (x,xs) | test t x -> walk ts (x:is) xs - | otherwise -> errorCont (errExpect $ r (x:is)) s + | otherwise -> eerr (errExpect $ r (x:is)) s in walk tts [] input {-# INLINE pTokens #-} diff --git a/old-tests/Bugs/Bug9.hs b/old-tests/Bugs/Bug9.hs index 6f2b607..48ffad7 100644 --- a/old-tests/Bugs/Bug9.hs +++ b/old-tests/Bugs/Bug9.hs @@ -1,4 +1,3 @@ - module Bugs.Bug9 (main) where import Control.Applicative (empty) @@ -41,7 +40,7 @@ integer :: Parser Integer integer = lexeme L.integer operator :: String -> Parser String -operator = try . L.symbol sc +operator = L.symbol sc parseTopLevel :: Parser Expr parseTopLevel = parseExpr <* eof diff --git a/tests/Char.hs b/tests/Char.hs index 166d00e..fb213dc 100644 --- a/tests/Char.hs +++ b/tests/Char.hs @@ -131,8 +131,7 @@ prop_eol s = checkParser eol r s | "\r\n" `isPrefixOf` s = posErr 2 s [uneCh (s !! 2), exEof] | otherwise = posErr 0 s [ uneStr (take 2 s) , uneCh '\r' - , exSpec "crlf newline" - , exSpec "newline" ] + , exSpec "end of line" ] prop_tab :: String -> Property prop_tab = checkChar tab (== '\t') (Just "tab") diff --git a/tests/Prim.hs b/tests/Prim.hs index fd7bffe..7ab6cd6 100644 --- a/tests/Prim.hs +++ b/tests/Prim.hs @@ -34,7 +34,7 @@ import Control.Applicative import Data.Char (isLetter, toUpper) import Data.Foldable (asum) import Data.List (isPrefixOf) -import Data.Maybe (maybeToList, fromMaybe) +import Data.Maybe (maybeToList) import Control.Monad.Cont import Control.Monad.Except @@ -152,7 +152,7 @@ prop_alternative_1 s0 s1 | s0 `isPrefixOf` s1 = checkParser p (posErr s0l s1 [uneCh (s1 !! s0l), exEof]) s1 | otherwise = checkParser p (Right s0) s0 .&&. checkParser p (Right s1) s1 - where p = try (string s0) <|> string s1 + where p = string s0 <|> string s1 s0l = length s0 prop_alternative_2 :: Char -> Char -> Char -> Bool -> Property @@ -166,8 +166,8 @@ prop_alternative_2 a b c l = checkParser p r s prop_alternative_3 :: Property prop_alternative_3 = checkParser p r s - where p = asum [empty, try (string ">>>"), empty, return "foo"] "bar" - p' = bsum [empty, try (string ">>>"), empty, return "foo"] "bar" + where p = asum [empty, string ">>>", empty, return "foo"] "bar" + p' = bsum [empty, string ">>>", empty, return "foo"] "bar" bsum = foldl (<|>) empty r = simpleParse p' s s = ">>" @@ -333,24 +333,23 @@ prop_hidden_0 a' b' c' = checkParser p r s | otherwise = Right s s = abcRow a b c -prop_hidden_1 :: String -> NonEmptyList Char -> String -> Property -prop_hidden_1 a c' s = checkParser p r s +prop_hidden_1 :: NonEmptyList Char -> String -> Property +prop_hidden_1 c' s = checkParser p r s where c = getNonEmpty c' - p = fromMaybe a <$> optional (hidden $ string c) - r | null s = Right a - | c == s = Right s - | head c /= head s = posErr 0 s [uneCh (head s), exEof] - | otherwise = simpleParse (string c) s + cn = length c + p = optional (hidden $ string c) + r | null s = Right Nothing + | c == s = Right (Just s) + | c `isPrefixOf` s = posErr cn s [uneCh (s !! cn), exEof] + | otherwise = posErr 0 s [uneCh (head s), exEof] -prop_try :: String -> String -> String -> Property -prop_try pre s1' s2' = checkParser p r s - where s1 = pre ++ s1' - s2 = pre ++ s2' - p = try (string s1) <|> string s2 - r | s == s1 || s == s2 = Right s - | otherwise = posErr 0 s $ (if null s then uneEof else uneStr pre) - : [uneStr pre, exStr s1, exStr s2] - s = pre +prop_try :: Char -> Char -> Char -> Property +prop_try pre ch1 ch2 = checkParser p r s + where s1 = sequence [char pre, char ch1] + s2 = sequence [char pre, char ch2] + p = try s1 <|> s2 + r = posErr 1 s [uneEof, exCh ch1, exCh ch2] + s = [pre] prop_lookAhead_0 :: Bool -> Bool -> Bool -> Property prop_lookAhead_0 a b c = checkParser p r s @@ -523,15 +522,13 @@ stateFromInput s = State s (initialPos "") defaultTabWidth -- IdentityT instance of MonadParsec -prop_IdentityT_try :: String -> String -> String -> Property -prop_IdentityT_try pre s1' s2' = checkParser (runIdentityT p) r s - where s1 = pre ++ s1' - s2 = pre ++ s2' - p = try (string s1) <|> string s2 - r | s == s1 || s == s2 = Right s - | otherwise = posErr 0 s $ (if null s then uneEof else uneStr pre) - : [uneStr pre, exStr s1, exStr s2] - s = pre +prop_IdentityT_try :: Char -> Char -> Char -> Property +prop_IdentityT_try pre ch1 ch2 = checkParser (runIdentityT p) r s + where s1 = sequence [char pre, char ch1] + s2 = sequence [char pre, char ch2] + p = try s1 <|> s2 + r = posErr 1 s [uneEof, exCh ch1, exCh ch2] + s = [pre] prop_IdentityT_notFollowedBy :: NonNegative Int -> NonNegative Int -> NonNegative Int -> Property @@ -544,17 +541,16 @@ prop_IdentityT_notFollowedBy a' b' c' = checkParser (runIdentityT p) r s -- ReaderT instance of MonadParsec -prop_ReaderT_try :: String -> String -> String -> Property -prop_ReaderT_try pre s1' s2' = checkParser (runReaderT p (s1', s2')) r s - where s1 = pre ++ s1' - s2 = pre ++ s2' - getS1 = asks ((pre ++) . fst) - getS2 = asks ((pre ++) . snd) - p = try (string =<< getS1) <|> (string =<< getS2) - r | s == s1 || s == s2 = Right s - | otherwise = posErr 0 s $ (if null s then uneEof else uneStr pre) - : [uneStr pre, exStr s1, exStr s2] - s = pre +prop_ReaderT_try :: Char -> Char -> Char -> Property +prop_ReaderT_try pre ch1 ch2 = checkParser (runReaderT p (s1, s2)) r s + where s1 = pre : [ch1] + s2 = pre : [ch2] + getS1 = asks fst + getS2 = asks snd + p = try (g =<< getS1) <|> (g =<< getS2) + g = sequence . fmap char + r = posErr 1 s [uneEof, exCh ch1, exCh ch2] + s = [pre] prop_ReaderT_notFollowedBy :: NonNegative Int -> NonNegative Int -> NonNegative Int -> Property From 1285699b74bb5d1f45860ae2a1db2339da184b64 Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Thu, 18 Feb 2016 00:12:27 +0600 Subject: [PATCH 4/7] Improve coverage of older primitives --- megaparsec.cabal | 2 ++ tests/Prim.hs | 42 +++++++++++++++++++++++++++++++++++++----- tests/Util.hs | 13 +++---------- 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/megaparsec.cabal b/megaparsec.cabal index f2f8921..821fdaf 100644 --- a/megaparsec.cabal +++ b/megaparsec.cabal @@ -144,10 +144,12 @@ test-suite tests , Prim , Util build-depends: base >= 4.6 && < 5 + , HUnit >= 1.2 && < 1.4 , QuickCheck >= 2.4 && < 3 , megaparsec >= 4.3 , mtl == 2.* , test-framework >= 0.6 && < 1 + , test-framework-hunit >= 0.3 && < 0.4 , test-framework-quickcheck2 >= 0.3 && < 0.4 , transformers >= 0.4 && < 0.6 default-extensions: CPP diff --git a/tests/Prim.hs b/tests/Prim.hs index 7ab6cd6..8c50360 100644 --- a/tests/Prim.hs +++ b/tests/Prim.hs @@ -47,8 +47,10 @@ import qualified Control.Monad.Writer.Lazy as L import qualified Control.Monad.Writer.Strict as S import Test.Framework +import Test.Framework.Providers.HUnit (testCase) import Test.Framework.Providers.QuickCheck2 (testProperty) import Test.QuickCheck hiding (label) +import Test.HUnit (Assertion, (@?=)) import Text.Megaparsec.Char import Text.Megaparsec.Error @@ -96,9 +98,15 @@ tests = testGroup "Primitive parser combinators" , testProperty "combinator lookAhead" prop_lookAhead_0 , testProperty "combinator lookAhead hints" prop_lookAhead_1 , testProperty "combinator lookAhead messages" prop_lookAhead_2 + , testCase "combinator lookAhead cerr" case_lookAhead_3 , testProperty "combinator notFollowedBy" prop_notFollowedBy_0 , testProperty "combinator notFollowedBy twice" prop_notFollowedBy_1 , testProperty "combinator notFollowedBy eof" prop_notFollowedBy_2 + , testCase "combinator notFollowedBy cerr" case_notFollowedBy_3a + , testCase "combinator notFollowedBy cerr" case_notFollowedBy_3b + , testCase "combinator notFollowedBy eerr" case_notFollowedBy_4a + , testCase "combinator notFollowedBy eerr" case_notFollowedBy_4b + , testCase "combinator eof return value" case_eof , testProperty "combinator token" prop_token , testProperty "combinator tokens" prop_tokens , testProperty "parser state position" prop_state_pos @@ -203,7 +211,7 @@ prop_alternative_6 a b c = checkParser p r s r | c = posErr ab s $ [uneCh 'c', exEof] ++ [exCh 'a' | not a && not b] ++ [exCh 'b' | not b] | otherwise = Right s - s = abcRow' a b c + s = abcRow a b c ab = fromEnum a + fromEnum b -- Monad instance @@ -363,7 +371,7 @@ prop_lookAhead_0 a b c = checkParser p r s | h == 'b' = posErr 0 s [uneCh 'b', exCh 'a'] | h == 'c' = posErr 0 s [uneCh 'c', exSpec "label"] | otherwise = posErr 1 s [uneCh (s !! 1), exEof] - s = abcRow' a b c + s = abcRow a b c prop_lookAhead_1 :: String -> Property prop_lookAhead_1 s = checkParser p r s @@ -379,7 +387,13 @@ prop_lookAhead_2 a b c = checkParser p r s r | null s = posErr 0 s [uneEof, exCh 'a'] | a = posErr 0 s [uneCh 'a', exCh 'b'] | otherwise = posErr 0 s [uneCh (head s), exCh 'a'] - s = abcRow' a b c + s = abcRow a b c + +case_lookAhead_3 :: Assertion +case_lookAhead_3 = parse p "" s @?= posErr 1 s [msg emsg] + where p = lookAhead (char 'a' *> fail emsg) :: Parser String + emsg = "ops!" + s = "abc" prop_notFollowedBy_0 :: NonNegative Int -> NonNegative Int -> NonNegative Int -> Property @@ -410,8 +424,26 @@ prop_notFollowedBy_2 a' b' c' = checkParser p r s | otherwise = posErr a s [uneEof, exCh 'a'] s = abcRow a b c --- We omit tests for 'eof' here because it's used virtually everywhere, it's --- already thoroughly tested. +case_notFollowedBy_3a :: Assertion +case_notFollowedBy_3a = parse p "" "ab" @?= Right () + where p = notFollowedBy (char 'a' *> char 'c') + +case_notFollowedBy_3b :: Assertion +case_notFollowedBy_3b = parse p "" s @?= posErr 0 s [uneCh 'a', exCh 'c'] + where p = notFollowedBy (char 'a' *> char 'd') <* char 'c' + s = "ab" + +case_notFollowedBy_4a :: Assertion +case_notFollowedBy_4a = parse p "" "ab" @?= Right () + where p = notFollowedBy (fail "ops!") + +case_notFollowedBy_4b :: Assertion +case_notFollowedBy_4b = parse p "" s @?= posErr 0 s [uneCh 'a', exCh 'c'] + where p = notFollowedBy (fail "ops!") <* char 'c' + s = "ab" + +case_eof :: Assertion +case_eof = parse eof "" "" @?= Right () prop_token :: String -> Property prop_token s = checkParser p r s diff --git a/tests/Util.hs b/tests/Util.hs index 5e99593..3f89328 100644 --- a/tests/Util.hs +++ b/tests/Util.hs @@ -34,7 +34,6 @@ module Util , (/=\) , (!=!) , abcRow - , abcRow' , posErr , uneCh , uneStr @@ -132,15 +131,9 @@ n !=! m = simpleParse n "" === simpleParse m "" -- @a@ times, character “b” repeated @b@ times, and finally character “c” -- repeated @c@ times. -abcRow :: Int -> Int -> Int -> String -abcRow a b c = replicate a 'a' ++ replicate b 'b' ++ replicate c 'c' - --- | @abcRow' a b c@ generates string that includes character “a” if @a@ is --- 'True', then optionally character “b” if @b@ is 'True', then character --- “c” if @c@ is 'True'. - -abcRow' :: Bool -> Bool -> Bool -> String -abcRow' a b c = abcRow (fromEnum a) (fromEnum b) (fromEnum c) +abcRow :: Enum a => a -> a -> a -> String +abcRow a b c = f a 'a' ++ f b 'b' ++ f c 'c' + where f x = replicate (fromEnum x) -- | @posErr pos s ms@ is an easy way to model result of parser that -- fails. @pos@ is how many tokens (characters) has been consumed before From 4ff95dd2a8c6d5f42b4fa587bcaf81cf6b876509 Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Thu, 18 Feb 2016 02:07:08 +0600 Subject: [PATCH 5/7] =?UTF-8?q?Add=20tests=20for=20=E2=80=98withRecovery?= =?UTF-8?q?=E2=80=99=20primitive?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/Prim.hs | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/tests/Prim.hs b/tests/Prim.hs index 8c50360..dd341f1 100644 --- a/tests/Prim.hs +++ b/tests/Prim.hs @@ -53,6 +53,7 @@ import Test.QuickCheck hiding (label) import Test.HUnit (Assertion, (@?=)) import Text.Megaparsec.Char +import Text.Megaparsec.Combinator import Text.Megaparsec.Error import Text.Megaparsec.Pos import Text.Megaparsec.Prim @@ -106,6 +107,17 @@ tests = testGroup "Primitive parser combinators" , testCase "combinator notFollowedBy cerr" case_notFollowedBy_3b , testCase "combinator notFollowedBy eerr" case_notFollowedBy_4a , testCase "combinator notFollowedBy eerr" case_notFollowedBy_4b + , testProperty "combinator withRecovery" prop_withRecovery_0 + , testCase "combinator withRecovery eok" case_withRecovery_1 + , testCase "combinator withRecovery meerr-rcerr" case_withRecovery_2 + , testCase "combinator withRecovery meerr-reok" case_withRecovery_3a + , testCase "combinator withRecovery meerr-reok" case_withRecovery_3b + , testCase "combinator withRecovery mcerr-rcok" case_withRecovery_4a + , testCase "combinator withRecovery mcerr-rcok" case_withRecovery_4b + , testCase "combinator withRecovery mcerr-rcerr" case_withRecovery_5 + , testCase "combinator withRecovery mcerr-reok" case_withRecovery_6a + , testCase "combinator withRecovery mcerr-reok" case_withRecovery_6b + , testCase "combinator withRecovery mcerr-reerr" case_withRecovery_7 , testCase "combinator eof return value" case_eof , testProperty "combinator token" prop_token , testProperty "combinator tokens" prop_tokens @@ -442,6 +454,81 @@ case_notFollowedBy_4b = parse p "" s @?= posErr 0 s [uneCh 'a', exCh 'c'] where p = notFollowedBy (fail "ops!") <* char 'c' s = "ab" +prop_withRecovery_0 :: NonNegative Int -> NonNegative Int + -> NonNegative Int -> Property +prop_withRecovery_0 a' b' c' = checkParser p r s + where [a,b,c] = getNonNegative <$> [a',b',c'] + p = v <$> + withRecovery (\e -> Left e <$ g 'b') (Right <$> g 'a') <*> g 'c' + v (Right x) y = Right (x ++ y) + v (Left m) _ = Left m + g = count' 1 3 . char + r | a == 0 && b == 0 && c == 0 = posErr 0 s [uneEof, exCh 'a'] + | a == 0 && b == 0 && c > 3 = posErr 0 s [uneCh 'c', exCh 'a'] + | a == 0 && b == 0 = posErr 0 s [uneCh 'c', exCh 'a'] + | a == 0 && b > 3 = posErr 3 s [uneCh 'b', exCh 'a', exCh 'c'] + | a == 0 && c == 0 = posErr b s [uneEof, exCh 'a', exCh 'c'] + | a == 0 && c > 3 = posErr (b + 3) s [uneCh 'c', exEof] + | a == 0 = Right (posErr 0 s [uneCh 'b', exCh 'a']) + | a > 3 = posErr 3 s [uneCh 'a', exCh 'c'] + | b == 0 && c == 0 = posErr a s $ [uneEof, exCh 'c'] ++ ma + | b == 0 && c > 3 = posErr (a + 3) s [uneCh 'c', exEof] + | b == 0 = Right (Right s) + | otherwise = posErr a s $ [uneCh 'b', exCh 'c'] ++ ma + ma = [exCh 'a' | a < 3] + s = abcRow a b c + +case_withRecovery_1 :: Assertion +case_withRecovery_1 = parse p "" "abc" @?= Right "foo" + where p = withRecovery (const $ return "bar") (return "foo") + +case_withRecovery_2 :: Assertion +case_withRecovery_2 = parse p "" s @?= posErr 0 s [uneCh 'a', exStr "cba"] + where p = withRecovery (\_ -> char 'a' *> fail "ops!") (string "cba") + s = "abc" + +case_withRecovery_3a :: Assertion +case_withRecovery_3a = parse p "" "abc" @?= Right "abd" + where p = withRecovery (const $ return "abd") (string "cba") + +case_withRecovery_3b :: Assertion +case_withRecovery_3b = parse p "" s @?= posErr 0 s r + where p = withRecovery (const $ return "abd") (string "cba") <* char 'd' + r = [uneCh 'a', exStr "cba", exCh 'd'] + s = "abc" + +case_withRecovery_4a :: Assertion +case_withRecovery_4a = parse p "" "abc" @?= Right "bc" + where p = withRecovery (const $ string "bc") (char 'a' *> fail "ops!") + +case_withRecovery_4b :: Assertion +case_withRecovery_4b = parse p "" s @?= posErr 3 s [uneEof, exCh 'f'] + where p = withRecovery (const $ string "bc") h <* char 'f' + h = char 'a' *> char 'd' *> pure "foo" + s = "abc" + +case_withRecovery_5 :: Assertion +case_withRecovery_5 = parse p "" s @?= posErr 1 s [msg emsg] + where p :: Parser String + p = withRecovery (\_ -> char 'b' *> fail emsg) (char 'a' *> fail emsg) + emsg = "ops!" + s = "abc" + +case_withRecovery_6a :: Assertion +case_withRecovery_6a = parse p "" "abc" @?= Right "abd" + where p = withRecovery (const $ return "abd") (char 'a' *> fail "ops!") + +case_withRecovery_6b :: Assertion +case_withRecovery_6b = parse p "" "abc" @?= posErr 0 s r + where p = withRecovery (const $ return 'g') (char 'a' *> char 'd') <* char 'f' + r = [uneCh 'b', exCh 'd', exCh 'f'] + s = "abc" + +case_withRecovery_7 :: Assertion +case_withRecovery_7 = parse p "" s @?= posErr 1 s [uneCh 'b', exCh 'd'] + where p = withRecovery (const $ fail "ops!") (char 'a' *> char 'd') + s = "abc" + case_eof :: Assertion case_eof = parse eof "" "" @?= Right () From e9df1ba30a69c6beec2761319bb7fa7afde1691b Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Thu, 18 Feb 2016 02:42:37 +0600 Subject: [PATCH 6/7] =?UTF-8?q?Correct=20=E2=80=98withRecovery=E2=80=99=20?= =?UTF-8?q?so=20that=20it=20passes=20the=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Text/Megaparsec/Prim.hs | 14 ++++++++------ tests/Prim.hs | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Text/Megaparsec/Prim.hs b/Text/Megaparsec/Prim.hs index b0d3e9f..d80cbff 100644 --- a/Text/Megaparsec/Prim.hs +++ b/Text/Megaparsec/Prim.hs @@ -614,15 +614,17 @@ pWithRecovery :: Stream s t -> ParsecT s m a pWithRecovery r p = ParsecT $ \s cok cerr eok eerr -> let mcerr err ms = - let rcerr _ _ = cerr err ms + let rcok x s' _ = cok x s' mempty + rcerr _ _ = cerr err ms reok x s' _ = eok x s' (toHints err) - reerr _ _ = cerr err ms - in unParser (r err) ms cok rcerr reok reerr + reerr _ _ = cerr err ms + in unParser (r err) ms rcok rcerr reok reerr meerr err ms = - let rcerr _ _ = eerr err ms + let rcok x s' _ = cok x s' (toHints err) + rcerr _ _ = eerr err ms reok x s' _ = eok x s' (toHints err) - reerr _ _ = eerr err ms - in unParser (r err) ms cok rcerr reok reerr + reerr _ _ = eerr err ms + in unParser (r err) ms rcok rcerr reok reerr in unParser p s cok mcerr eok meerr {-# INLINE pWithRecovery #-} diff --git a/tests/Prim.hs b/tests/Prim.hs index dd341f1..193a6cb 100644 --- a/tests/Prim.hs +++ b/tests/Prim.hs @@ -519,7 +519,7 @@ case_withRecovery_6a = parse p "" "abc" @?= Right "abd" where p = withRecovery (const $ return "abd") (char 'a' *> fail "ops!") case_withRecovery_6b :: Assertion -case_withRecovery_6b = parse p "" "abc" @?= posErr 0 s r +case_withRecovery_6b = parse p "" "abc" @?= posErr 1 s r where p = withRecovery (const $ return 'g') (char 'a' *> char 'd') <* char 'f' r = [uneCh 'b', exCh 'd', exCh 'f'] s = "abc" From 4f5d73f230acc3c6ea28995a3beed7ef9e860be8 Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Thu, 18 Feb 2016 13:22:38 +0600 Subject: [PATCH 7/7] Remove statement about performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit More recent benchmarks made after all changes show no considerable difference. It's faster than Parsec for sure, so… --- CHANGELOG.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7df1c7..ab7a6dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,10 +3,7 @@ * Now state returned on failure is the exact state of parser at the moment when it failed, which makes incremental parsing feature much better and opens possibilities for features like “on-the-fly” recovering from parse - errors. This made `<|>` operator slower, it's now about 9 % slower than - equivalent Parsec's operator and 28 % slower than previous version of - Megaparsec. However, other combinators showed no performance degradation - and Megaparsec is still generally faster than Parsec. + errors. * The `count` combinator now works with `Applicative` instances (previously it worked only with instances of `Alternative`). It's now also faster.