Merge pull request #82 from mrkkrp/state-on-failure

Support getting actual parser state on failure
This commit is contained in:
Mark Karpov 2016-02-08 11:49:17 +05:00
commit fe9e4d736e
5 changed files with 129 additions and 59 deletions

View File

@ -1,3 +1,16 @@
## Megaparsec 4.4.0
* Now state returned on failure is the exact state of parser at the moment
when it failed, which makes incremental parsing feature much better and
opens possibilities for features like “on-the-fly” recovering from parse
errors. This made `<|>` operator slower, it's now about 9 % slower than
equivalent Parsec's operator and 28 % slower than previous version of
Megaparsec. However, other combinators showed no performance degradation
and Megaparsec is still generally faster than Parsec.
* The `count` combinator now works with `Applicative` instances (previously
it worked only with instances of `Alternative`). It's now also faster.
## Megaparsec 4.3.0
* Canonicalized `Applicative`/`Monad` instances. Thanks to Herbert Valerio

View File

@ -36,6 +36,7 @@ import Data.Foldable (asum)
#if !MIN_VERSION_base(4,8,0)
import Data.Foldable (Foldable)
import Data.Traversable (sequenceA)
#endif
-- | @between open close p@ parses @open@, followed by @p@ and @close@.
@ -57,13 +58,11 @@ choice = asum
-- | @count n p@ parses @n@ occurrences of @p@. If @n@ is smaller or
-- equal to zero, the parser equals to @return []@. Returns a list of @n@
-- values.
--
-- This parser is defined in terms of 'count'', like this:
--
-- > count n = count' n n
count :: Alternative m => Int -> m a -> m [a]
count n = count' n n
count :: Applicative m => Int -> m a -> m [a]
count n p
| n <= 0 = pure []
| otherwise = sequenceA (replicate n p)
{-# INLINE count #-}
-- | @count\' m n p@ parses from @m@ to @n@ occurrences of @p@. If @n@ is
@ -108,7 +107,6 @@ endBy1 p sep = some (p <* sep)
manyTill :: Alternative m => m a -> m end -> m [a]
manyTill p end = ([] <$ end) <|> someTill p end
{-# INLINE manyTill #-}
-- | @someTill p end@ works similarly to @manyTill p end@, but @p@ should
-- succeed at least once.
@ -148,7 +146,6 @@ sepBy1 p sep = (:) <$> p <*> many (sep *> p)
sepEndBy :: Alternative m => m a -> m sep -> m [a]
sepEndBy p sep = sepEndBy1 p sep <|> pure []
{-# INLINE sepEndBy #-}
-- | @sepEndBy1 p sep@ parses /one/ or more occurrences of @p@,
-- separated and optionally ended by @sep@. Returns a list of values

View File

@ -171,6 +171,7 @@ mergeError e1@(ParseError pos1 _) e2@(ParseError pos2 ms2) =
LT -> e2
EQ -> addErrorMessages ms2 e1
GT -> e1
{-# INLINE mergeError #-}
-- | @showMessages ms@ transforms list of error messages @ms@ into
-- their textual representation.

View File

@ -82,6 +82,17 @@ data State s = State
, stateTabWidth :: !Int }
deriving (Show, Eq)
-- | From two states, return the one with greater textual position. If the
-- positions are equal, prefer the latter state.
longestMatch :: State s -> State s -> State s
longestMatch s1@(State _ pos1 _) s2@(State _ pos2 _) =
case pos1 `compare` pos2 of
LT -> s2
EQ -> s2
GT -> s1
{-# INLINE longestMatch #-}
-- | All information available after parsing. This includes consumption of
-- input, success (with return value) or failure (with parse error), parser
-- state at the end of parsing.
@ -140,14 +151,18 @@ toHints err = Hints hints
-- Note that if resulting continuation gets 'ParseError' where all messages
-- are created with 'Message' constructor, hints are ignored.
withHints :: Hints -> (ParseError -> m b) -> ParseError -> m b
withHints
:: Hints -- ^ Hints to use
-> (ParseError -> State s -> m b) -- ^ Continuation to influence
-> ParseError -- ^ First argument of resulting continuation
-> State s -- ^ Second argument of resulting continuation
-> m b
withHints (Hints xs) c e =
let isMessage (Message _) = True
isMessage _ = False
in (if all isMessage (errorMessages e)
then c
else c . addErrorMessages (Expected <$> concat xs))
e
in if all isMessage (errorMessages e)
then c e
else c (addErrorMessages (Expected <$> concat xs) e)
-- | @accHints hs c@ results in “OK” continuation that will add given hints
-- @hs@ to third argument of original continuation @c@.
@ -221,7 +236,7 @@ instance StorableStream TL.Text Char where
-- that takes five arguments:
--
-- * State. It includes input stream, position in input stream and
-- user's backtracking state.
-- current value of tab width.
--
-- * “Consumed-OK” continuation (cok). This is just a function that
-- takes three arguments: result of parsing, state after parsing, and
@ -231,9 +246,8 @@ instance StorableStream TL.Text Char where
--
-- * “Consumed-error” continuation (cerr). This function is called when
-- some part of input stream has been consumed and parsing resulted in
-- an error. When error happens, parsing stops and we're only interested
-- in error message, so this continuation takes 'ParseError' as its only
-- argument.
-- an error. This continuation takes 'ParseError' and state information
-- at the time error occurred.
--
-- * “Empty-OK” continuation (eok). The function takes the same
-- arguments as “consumed-OK” continuation. “Empty-OK” is called when no
@ -241,8 +255,8 @@ instance StorableStream TL.Text Char where
--
-- * “Empty-error” continuation (eerr). The function is called when no
-- input has been consumed, but nonetheless parsing resulted in an
-- error. Just like “consumed-error”, the continuation take single
-- argument — 'ParseError' record.
-- error. Just like “consumed-error”, the continuation takes
-- 'ParseError' record and state information.
--
-- You call specific continuation when you want to proceed in that specific
-- branch of control flow.
@ -258,9 +272,9 @@ type Parsec s = ParsecT s Identity
newtype ParsecT s m a = ParsecT
{ unParser :: forall b. State s
-> (a -> State s -> Hints -> m b) -- consumed-OK
-> (ParseError -> m b) -- consumed-error
-> (ParseError -> State s -> m b) -- consumed-error
-> (a -> State s -> Hints -> m b) -- empty-OK
-> (ParseError -> m b) -- empty-error
-> (ParseError -> State s -> m b) -- empty-error
-> m b }
instance Functor (ParsecT s m) where
@ -284,7 +298,7 @@ instance A.Alternative (ParsecT s m) where
manyAcc :: ParsecT s m a -> ParsecT s m [a]
manyAcc p = ParsecT $ \s cok cerr eok _ ->
let errToHints c err = c (toHints err)
let errToHints c err _ = c (toHints err)
walk xs x s' _ =
unParser p s'
(seq xs $ walk $ x:xs) -- consumed-OK
@ -317,8 +331,8 @@ pBind m k = ParsecT $ \s cok cerr eok eerr ->
{-# INLINE pBind #-}
pFail :: String -> ParsecT s m a
pFail msg = ParsecT $ \s _ _ _ eerr ->
eerr $ newErrorMessage (Message msg) (statePos s)
pFail msg = ParsecT $ \s@(State _ pos _) _ _ _ eerr ->
eerr (newErrorMessage (Message msg) pos) s
{-# INLINE pFail #-}
-- | Low-level creation of the ParsecT type.
@ -330,11 +344,11 @@ mkPT k = ParsecT $ \s cok cerr eok eerr -> do
Consumed ->
case result of
OK x -> cok x s' mempty
Error e -> cerr e
Error e -> cerr e s'
Virgin ->
case result of
OK x -> eok x s' mempty
Error e -> eerr e
Error e -> eerr e s'
instance MonadIO m => MonadIO (ParsecT s m) where
liftIO = lift . liftIO
@ -364,14 +378,15 @@ instance MonadPlus (ParsecT s m) where
mplus = pPlus
pZero :: ParsecT s m a
pZero = ParsecT $ \(State _ pos _) _ _ _ eerr -> eerr $ newErrorUnknown pos
pZero = ParsecT $ \s@(State _ pos _) _ _ _ eerr ->
eerr (newErrorUnknown pos) s
pPlus :: ParsecT s m a -> ParsecT s m a -> ParsecT s m a
pPlus m n = ParsecT $ \s cok cerr eok eerr ->
let meerr err =
let ncerr err' = cerr (err' <> err)
neok x s' hs = eok x s' (toHints err <> hs)
neerr err' = eerr (err' <> err)
let meerr err ms =
let ncerr err' s' = cerr (mergeError err' err) (longestMatch ms s')
neok x s' hs = eok x s' (toHints err <> hs)
neerr err' s' = eerr (mergeError err' err) (longestMatch ms s')
in unParser n s cok ncerr neok neerr
in unParser m s cok cerr eok meerr
{-# INLINE pPlus #-}
@ -515,8 +530,8 @@ instance Stream s t => MonadParsec s (ParsecT s m) t where
updateParserState = pUpdateParserState
pFailure :: [Message] -> ParsecT s m a
pFailure msgs = ParsecT $ \(State _ pos _) _ _ _ eerr ->
eerr $ newErrorMessages msgs pos
pFailure msgs = ParsecT $ \s@(State _ pos _) _ _ _ eerr ->
eerr (newErrorMessages msgs pos) s
pLabel :: String -> ParsecT s m a -> ParsecT s m a
pLabel l p = ParsecT $ \s cok cerr eok eerr ->
@ -539,54 +554,56 @@ pLookAhead p = ParsecT $ \s _ cerr eok eerr ->
pNotFollowedBy :: Stream s t => ParsecT s m a -> ParsecT s m ()
pNotFollowedBy p = ParsecT $ \s@(State input pos _) _ _ eok eerr ->
let l = maybe eoi (showToken . fst) (uncons input)
cok' _ _ _ = eerr $ unexpectedErr l pos
cerr' _ = eok () s mempty
eok' _ _ _ = eerr $ unexpectedErr l pos
eerr' _ = eok () s mempty
cok' _ _ _ = eerr (unexpectedErr l pos) s
cerr' _ _ = eok () s mempty
eok' _ _ _ = eerr (unexpectedErr l pos) s
eerr' _ _ = eok () s mempty
in unParser p s cok' cerr' eok' eerr'
pEof :: Stream s t => ParsecT s m ()
pEof = label eoi $ ParsecT $ \s@(State input pos _) _ _ eok eerr ->
case uncons input of
Nothing -> eok () s mempty
Just (x,_) -> eerr $ unexpectedErr (showToken x) pos
Just (x,_) -> eerr (unexpectedErr (showToken x) pos) s
{-# INLINE pEof #-}
pToken :: Stream s t
=> (Int -> SourcePos -> t -> SourcePos)
-> (t -> Either [Message] a)
-> ParsecT s m a
pToken nextpos test = ParsecT $ \(State input pos w) cok _ _ eerr ->
pToken nextpos test = ParsecT $ \s@(State input pos w) cok _ _ eerr ->
case uncons input of
Nothing -> eerr $ unexpectedErr eoi pos
Nothing -> eerr (unexpectedErr eoi pos) s
Just (c,cs) ->
case test c of
Left ms -> eerr $ addErrorMessages ms (newErrorUnknown pos)
Left ms -> eerr (addErrorMessages ms (newErrorUnknown pos)) s
Right x -> let newpos = nextpos w pos c
newstate = State cs newpos w
in seq newpos $ seq newstate $ cok x newstate mempty
{-# INLINE pToken #-}
pTokens :: Stream s t
=> (Int -> SourcePos -> [t] -> SourcePos)
-> (t -> t -> Bool)
-> [t]
-> ParsecT s m [t]
=> (Int -> SourcePos -> [t] -> SourcePos)
-> (t -> t -> Bool)
-> [t]
-> ParsecT s m [t]
pTokens _ _ [] = ParsecT $ \s _ _ eok _ -> eok [] s mempty
pTokens nextpos test tts = ParsecT $ \(State input pos w) cok cerr _ eerr ->
let errExpect x = setErrorMessage (Expected $ showToken tts)
(newErrorMessage (Unexpected x) pos)
walk [] is rs = let pos' = nextpos w pos tts
s' = State rs pos' w
in cok (reverse is) s' mempty
pTokens nextpos test tts = ParsecT $ \s@(State input pos w) cok cerr _ eerr ->
let r = showToken . reverse
errExpect x = setErrorMessage (Expected $ showToken tts)
(newErrorMessage (Unexpected x) pos)
walk [] is rs =
let pos' = nextpos w pos tts
s' = State rs pos' w
in cok (reverse is) s' mempty
walk (t:ts) is rs =
let errorCont = if null is then eerr else cerr
what = if null is then eoi else showToken $ reverse is
what = if null is then eoi else r is
in case uncons rs of
Nothing -> errorCont . errExpect $ what
Nothing -> errorCont (errExpect what) s
Just (x,xs)
| test t x -> walk ts (x:is) xs
| otherwise -> errorCont . errExpect . showToken $ reverse (x:is)
| otherwise -> errorCont (errExpect $ r (x:is)) s
in walk tts [] input
{-# INLINE pTokens #-}
@ -778,10 +795,10 @@ runParsecT :: Monad m
-> State s -- ^ Initial state
-> m (Reply s a)
runParsecT p s = unParser p s cok cerr eok eerr
where cok a s' _ = return $ Reply s' Consumed (OK a)
cerr err = return $ Reply s Consumed (Error err)
eok a s' _ = return $ Reply s' Virgin (OK a)
eerr err = return $ Reply s Virgin (Error err)
where cok a s' _ = return $ Reply s' Consumed (OK a)
cerr err s' = return $ Reply s' Consumed (Error err)
eok a s' _ = return $ Reply s' Virgin (OK a)
eerr err s' = return $ Reply s' Virgin (Error err)
-- | @parseFromFile p filename@ runs parser @p@ on the input read from
-- @filename@. Returns either a 'ParseError' ('Left') or a value of type @a@

View File

@ -51,7 +51,7 @@ import Test.Framework.Providers.QuickCheck2 (testProperty)
import Test.QuickCheck hiding (label)
import Text.Megaparsec.Char
import Text.Megaparsec.Error (Message (..), ParseError, newErrorMessages)
import Text.Megaparsec.Error
import Text.Megaparsec.Pos
import Text.Megaparsec.Prim
import Text.Megaparsec.String
@ -107,6 +107,10 @@ tests = testGroup "Primitive parser combinators"
, testProperty "parser state general" prop_state
, testProperty "custom state parsing" prop_runParser'
, testProperty "custom state parsing (transformer)" prop_runParserT'
, testProperty "state on failure (mplus)" prop_stOnFail_0
, testProperty "state on failure (tab)" prop_stOnFail_1
, testProperty "state on failure (eof)" prop_stOnFail_2
, testProperty "state on failure (notFollowedBy)" prop_stOnFail_3
, testProperty "IdentityT try" prop_IdentityT_try
, testProperty "IdentityT notFollowedBy" prop_IdentityT_notFollowedBy
, testProperty "ReaderT try" prop_ReaderT_try
@ -479,6 +483,44 @@ emulateStrParsing st@(State i pos t) s =
in (st, Left $ newErrorMessages (exStr s : [uneStuff]) pos)
where l = length $ takeWhile id $ zipWith (==) s i
-- Additional tests to check returned state on failure
prop_stOnFail_0 :: Positive Int -> Positive Int -> Property
prop_stOnFail_0 na' nb' = runParser' p (stateFromInput s) === (i, r)
where i = let (Left x) = r in State "" (errorPos x) defaultTabWidth
na = getPositive na'
nb = getPositive nb'
p = try (many (char 'a') <* many (char 'b') <* char 'c')
<|> (many (char 'a') <* char 'c')
r = posErr (na + nb) s [exCh 'b', exCh 'c', uneEof]
s = replicate na 'a' ++ replicate nb 'b'
prop_stOnFail_1 :: Positive Int -> Positive Int -> Property
prop_stOnFail_1 na' t' = runParser' p (stateFromInput s) === (i, r)
where i = let (Left x) = r in State "" (errorPos x) t
na = getPositive na'
t = getPositive t'
p = many (char 'a') <* setTabWidth t <* fail myMsg
r = posErr na s [msg myMsg]
s = replicate na 'a'
myMsg = "failing now!"
prop_stOnFail_2 :: String -> Char -> Property
prop_stOnFail_2 s' ch = runParser' p (stateFromInput s) === (i, r)
where i = let (Left x) = r in State [ch] (errorPos x) defaultTabWidth
r = posErr (length s') s [uneCh ch, exEof]
p = string s' <* eof
s = s' ++ [ch]
prop_stOnFail_3 :: String -> Property
prop_stOnFail_3 s = runParser' p (stateFromInput s) === (i, r)
where i = let (Left x) = r in State s (errorPos x) defaultTabWidth
r = posErr 0 s [if null s then uneEof else uneCh (head s)]
p = notFollowedBy (string s)
stateFromInput :: Stream s t => s -> State s
stateFromInput s = State s (initialPos "") defaultTabWidth
-- IdentityT instance of MonadParsec
prop_IdentityT_try :: String -> String -> String -> Property