mirror of
https://github.com/mrkkrp/megaparsec.git
synced 2024-12-28 18:54:34 +03:00
Merge pull request #82 from mrkkrp/state-on-failure
Support getting actual parser state on failure
This commit is contained in:
commit
fe9e4d736e
13
CHANGELOG.md
13
CHANGELOG.md
@ -1,3 +1,16 @@
|
||||
## Megaparsec 4.4.0
|
||||
|
||||
* Now state returned on failure is the exact state of parser at the moment
|
||||
when it failed, which makes incremental parsing feature much better and
|
||||
opens possibilities for features like “on-the-fly” recovering from parse
|
||||
errors. This made `<|>` operator slower, it's now about 9 % slower than
|
||||
equivalent Parsec's operator and 28 % slower than previous version of
|
||||
Megaparsec. However, other combinators showed no performance degradation
|
||||
and Megaparsec is still generally faster than Parsec.
|
||||
|
||||
* The `count` combinator now works with `Applicative` instances (previously
|
||||
it worked only with instances of `Alternative`). It's now also faster.
|
||||
|
||||
## Megaparsec 4.3.0
|
||||
|
||||
* Canonicalized `Applicative`/`Monad` instances. Thanks to Herbert Valerio
|
||||
|
@ -36,6 +36,7 @@ import Data.Foldable (asum)
|
||||
|
||||
#if !MIN_VERSION_base(4,8,0)
|
||||
import Data.Foldable (Foldable)
|
||||
import Data.Traversable (sequenceA)
|
||||
#endif
|
||||
|
||||
-- | @between open close p@ parses @open@, followed by @p@ and @close@.
|
||||
@ -57,13 +58,11 @@ choice = asum
|
||||
-- | @count n p@ parses @n@ occurrences of @p@. If @n@ is smaller or
|
||||
-- equal to zero, the parser equals to @return []@. Returns a list of @n@
|
||||
-- values.
|
||||
--
|
||||
-- This parser is defined in terms of 'count'', like this:
|
||||
--
|
||||
-- > count n = count' n n
|
||||
|
||||
count :: Alternative m => Int -> m a -> m [a]
|
||||
count n = count' n n
|
||||
count :: Applicative m => Int -> m a -> m [a]
|
||||
count n p
|
||||
| n <= 0 = pure []
|
||||
| otherwise = sequenceA (replicate n p)
|
||||
{-# INLINE count #-}
|
||||
|
||||
-- | @count\' m n p@ parses from @m@ to @n@ occurrences of @p@. If @n@ is
|
||||
@ -108,7 +107,6 @@ endBy1 p sep = some (p <* sep)
|
||||
|
||||
manyTill :: Alternative m => m a -> m end -> m [a]
|
||||
manyTill p end = ([] <$ end) <|> someTill p end
|
||||
{-# INLINE manyTill #-}
|
||||
|
||||
-- | @someTill p end@ works similarly to @manyTill p end@, but @p@ should
|
||||
-- succeed at least once.
|
||||
@ -148,7 +146,6 @@ sepBy1 p sep = (:) <$> p <*> many (sep *> p)
|
||||
|
||||
sepEndBy :: Alternative m => m a -> m sep -> m [a]
|
||||
sepEndBy p sep = sepEndBy1 p sep <|> pure []
|
||||
{-# INLINE sepEndBy #-}
|
||||
|
||||
-- | @sepEndBy1 p sep@ parses /one/ or more occurrences of @p@,
|
||||
-- separated and optionally ended by @sep@. Returns a list of values
|
||||
|
@ -171,6 +171,7 @@ mergeError e1@(ParseError pos1 _) e2@(ParseError pos2 ms2) =
|
||||
LT -> e2
|
||||
EQ -> addErrorMessages ms2 e1
|
||||
GT -> e1
|
||||
{-# INLINE mergeError #-}
|
||||
|
||||
-- | @showMessages ms@ transforms list of error messages @ms@ into
|
||||
-- their textual representation.
|
||||
|
@ -82,6 +82,17 @@ data State s = State
|
||||
, stateTabWidth :: !Int }
|
||||
deriving (Show, Eq)
|
||||
|
||||
-- | From two states, return the one with greater textual position. If the
|
||||
-- positions are equal, prefer the latter state.
|
||||
|
||||
longestMatch :: State s -> State s -> State s
|
||||
longestMatch s1@(State _ pos1 _) s2@(State _ pos2 _) =
|
||||
case pos1 `compare` pos2 of
|
||||
LT -> s2
|
||||
EQ -> s2
|
||||
GT -> s1
|
||||
{-# INLINE longestMatch #-}
|
||||
|
||||
-- | All information available after parsing. This includes consumption of
|
||||
-- input, success (with return value) or failure (with parse error), parser
|
||||
-- state at the end of parsing.
|
||||
@ -140,14 +151,18 @@ toHints err = Hints hints
|
||||
-- Note that if resulting continuation gets 'ParseError' where all messages
|
||||
-- are created with 'Message' constructor, hints are ignored.
|
||||
|
||||
withHints :: Hints -> (ParseError -> m b) -> ParseError -> m b
|
||||
withHints
|
||||
:: Hints -- ^ Hints to use
|
||||
-> (ParseError -> State s -> m b) -- ^ Continuation to influence
|
||||
-> ParseError -- ^ First argument of resulting continuation
|
||||
-> State s -- ^ Second argument of resulting continuation
|
||||
-> m b
|
||||
withHints (Hints xs) c e =
|
||||
let isMessage (Message _) = True
|
||||
isMessage _ = False
|
||||
in (if all isMessage (errorMessages e)
|
||||
then c
|
||||
else c . addErrorMessages (Expected <$> concat xs))
|
||||
e
|
||||
in if all isMessage (errorMessages e)
|
||||
then c e
|
||||
else c (addErrorMessages (Expected <$> concat xs) e)
|
||||
|
||||
-- | @accHints hs c@ results in “OK” continuation that will add given hints
|
||||
-- @hs@ to third argument of original continuation @c@.
|
||||
@ -221,7 +236,7 @@ instance StorableStream TL.Text Char where
|
||||
-- that takes five arguments:
|
||||
--
|
||||
-- * State. It includes input stream, position in input stream and
|
||||
-- user's backtracking state.
|
||||
-- current value of tab width.
|
||||
--
|
||||
-- * “Consumed-OK” continuation (cok). This is just a function that
|
||||
-- takes three arguments: result of parsing, state after parsing, and
|
||||
@ -231,9 +246,8 @@ instance StorableStream TL.Text Char where
|
||||
--
|
||||
-- * “Consumed-error” continuation (cerr). This function is called when
|
||||
-- some part of input stream has been consumed and parsing resulted in
|
||||
-- an error. When error happens, parsing stops and we're only interested
|
||||
-- in error message, so this continuation takes 'ParseError' as its only
|
||||
-- argument.
|
||||
-- an error. This continuation takes 'ParseError' and state information
|
||||
-- at the time error occurred.
|
||||
--
|
||||
-- * “Empty-OK” continuation (eok). The function takes the same
|
||||
-- arguments as “consumed-OK” continuation. “Empty-OK” is called when no
|
||||
@ -241,8 +255,8 @@ instance StorableStream TL.Text Char where
|
||||
--
|
||||
-- * “Empty-error” continuation (eerr). The function is called when no
|
||||
-- input has been consumed, but nonetheless parsing resulted in an
|
||||
-- error. Just like “consumed-error”, the continuation take single
|
||||
-- argument — 'ParseError' record.
|
||||
-- error. Just like “consumed-error”, the continuation takes
|
||||
-- 'ParseError' record and state information.
|
||||
--
|
||||
-- You call specific continuation when you want to proceed in that specific
|
||||
-- branch of control flow.
|
||||
@ -258,9 +272,9 @@ type Parsec s = ParsecT s Identity
|
||||
newtype ParsecT s m a = ParsecT
|
||||
{ unParser :: forall b. State s
|
||||
-> (a -> State s -> Hints -> m b) -- consumed-OK
|
||||
-> (ParseError -> m b) -- consumed-error
|
||||
-> (ParseError -> State s -> m b) -- consumed-error
|
||||
-> (a -> State s -> Hints -> m b) -- empty-OK
|
||||
-> (ParseError -> m b) -- empty-error
|
||||
-> (ParseError -> State s -> m b) -- empty-error
|
||||
-> m b }
|
||||
|
||||
instance Functor (ParsecT s m) where
|
||||
@ -284,7 +298,7 @@ instance A.Alternative (ParsecT s m) where
|
||||
|
||||
manyAcc :: ParsecT s m a -> ParsecT s m [a]
|
||||
manyAcc p = ParsecT $ \s cok cerr eok _ ->
|
||||
let errToHints c err = c (toHints err)
|
||||
let errToHints c err _ = c (toHints err)
|
||||
walk xs x s' _ =
|
||||
unParser p s'
|
||||
(seq xs $ walk $ x:xs) -- consumed-OK
|
||||
@ -317,8 +331,8 @@ pBind m k = ParsecT $ \s cok cerr eok eerr ->
|
||||
{-# INLINE pBind #-}
|
||||
|
||||
pFail :: String -> ParsecT s m a
|
||||
pFail msg = ParsecT $ \s _ _ _ eerr ->
|
||||
eerr $ newErrorMessage (Message msg) (statePos s)
|
||||
pFail msg = ParsecT $ \s@(State _ pos _) _ _ _ eerr ->
|
||||
eerr (newErrorMessage (Message msg) pos) s
|
||||
{-# INLINE pFail #-}
|
||||
|
||||
-- | Low-level creation of the ParsecT type.
|
||||
@ -330,11 +344,11 @@ mkPT k = ParsecT $ \s cok cerr eok eerr -> do
|
||||
Consumed ->
|
||||
case result of
|
||||
OK x -> cok x s' mempty
|
||||
Error e -> cerr e
|
||||
Error e -> cerr e s'
|
||||
Virgin ->
|
||||
case result of
|
||||
OK x -> eok x s' mempty
|
||||
Error e -> eerr e
|
||||
Error e -> eerr e s'
|
||||
|
||||
instance MonadIO m => MonadIO (ParsecT s m) where
|
||||
liftIO = lift . liftIO
|
||||
@ -364,14 +378,15 @@ instance MonadPlus (ParsecT s m) where
|
||||
mplus = pPlus
|
||||
|
||||
pZero :: ParsecT s m a
|
||||
pZero = ParsecT $ \(State _ pos _) _ _ _ eerr -> eerr $ newErrorUnknown pos
|
||||
pZero = ParsecT $ \s@(State _ pos _) _ _ _ eerr ->
|
||||
eerr (newErrorUnknown pos) s
|
||||
|
||||
pPlus :: ParsecT s m a -> ParsecT s m a -> ParsecT s m a
|
||||
pPlus m n = ParsecT $ \s cok cerr eok eerr ->
|
||||
let meerr err =
|
||||
let ncerr err' = cerr (err' <> err)
|
||||
neok x s' hs = eok x s' (toHints err <> hs)
|
||||
neerr err' = eerr (err' <> err)
|
||||
let meerr err ms =
|
||||
let ncerr err' s' = cerr (mergeError err' err) (longestMatch ms s')
|
||||
neok x s' hs = eok x s' (toHints err <> hs)
|
||||
neerr err' s' = eerr (mergeError err' err) (longestMatch ms s')
|
||||
in unParser n s cok ncerr neok neerr
|
||||
in unParser m s cok cerr eok meerr
|
||||
{-# INLINE pPlus #-}
|
||||
@ -515,8 +530,8 @@ instance Stream s t => MonadParsec s (ParsecT s m) t where
|
||||
updateParserState = pUpdateParserState
|
||||
|
||||
pFailure :: [Message] -> ParsecT s m a
|
||||
pFailure msgs = ParsecT $ \(State _ pos _) _ _ _ eerr ->
|
||||
eerr $ newErrorMessages msgs pos
|
||||
pFailure msgs = ParsecT $ \s@(State _ pos _) _ _ _ eerr ->
|
||||
eerr (newErrorMessages msgs pos) s
|
||||
|
||||
pLabel :: String -> ParsecT s m a -> ParsecT s m a
|
||||
pLabel l p = ParsecT $ \s cok cerr eok eerr ->
|
||||
@ -539,54 +554,56 @@ pLookAhead p = ParsecT $ \s _ cerr eok eerr ->
|
||||
pNotFollowedBy :: Stream s t => ParsecT s m a -> ParsecT s m ()
|
||||
pNotFollowedBy p = ParsecT $ \s@(State input pos _) _ _ eok eerr ->
|
||||
let l = maybe eoi (showToken . fst) (uncons input)
|
||||
cok' _ _ _ = eerr $ unexpectedErr l pos
|
||||
cerr' _ = eok () s mempty
|
||||
eok' _ _ _ = eerr $ unexpectedErr l pos
|
||||
eerr' _ = eok () s mempty
|
||||
cok' _ _ _ = eerr (unexpectedErr l pos) s
|
||||
cerr' _ _ = eok () s mempty
|
||||
eok' _ _ _ = eerr (unexpectedErr l pos) s
|
||||
eerr' _ _ = eok () s mempty
|
||||
in unParser p s cok' cerr' eok' eerr'
|
||||
|
||||
pEof :: Stream s t => ParsecT s m ()
|
||||
pEof = label eoi $ ParsecT $ \s@(State input pos _) _ _ eok eerr ->
|
||||
case uncons input of
|
||||
Nothing -> eok () s mempty
|
||||
Just (x,_) -> eerr $ unexpectedErr (showToken x) pos
|
||||
Just (x,_) -> eerr (unexpectedErr (showToken x) pos) s
|
||||
{-# INLINE pEof #-}
|
||||
|
||||
pToken :: Stream s t
|
||||
=> (Int -> SourcePos -> t -> SourcePos)
|
||||
-> (t -> Either [Message] a)
|
||||
-> ParsecT s m a
|
||||
pToken nextpos test = ParsecT $ \(State input pos w) cok _ _ eerr ->
|
||||
pToken nextpos test = ParsecT $ \s@(State input pos w) cok _ _ eerr ->
|
||||
case uncons input of
|
||||
Nothing -> eerr $ unexpectedErr eoi pos
|
||||
Nothing -> eerr (unexpectedErr eoi pos) s
|
||||
Just (c,cs) ->
|
||||
case test c of
|
||||
Left ms -> eerr $ addErrorMessages ms (newErrorUnknown pos)
|
||||
Left ms -> eerr (addErrorMessages ms (newErrorUnknown pos)) s
|
||||
Right x -> let newpos = nextpos w pos c
|
||||
newstate = State cs newpos w
|
||||
in seq newpos $ seq newstate $ cok x newstate mempty
|
||||
{-# INLINE pToken #-}
|
||||
|
||||
pTokens :: Stream s t
|
||||
=> (Int -> SourcePos -> [t] -> SourcePos)
|
||||
-> (t -> t -> Bool)
|
||||
-> [t]
|
||||
-> ParsecT s m [t]
|
||||
=> (Int -> SourcePos -> [t] -> SourcePos)
|
||||
-> (t -> t -> Bool)
|
||||
-> [t]
|
||||
-> ParsecT s m [t]
|
||||
pTokens _ _ [] = ParsecT $ \s _ _ eok _ -> eok [] s mempty
|
||||
pTokens nextpos test tts = ParsecT $ \(State input pos w) cok cerr _ eerr ->
|
||||
let errExpect x = setErrorMessage (Expected $ showToken tts)
|
||||
(newErrorMessage (Unexpected x) pos)
|
||||
walk [] is rs = let pos' = nextpos w pos tts
|
||||
s' = State rs pos' w
|
||||
in cok (reverse is) s' mempty
|
||||
pTokens nextpos test tts = ParsecT $ \s@(State input pos w) cok cerr _ eerr ->
|
||||
let r = showToken . reverse
|
||||
errExpect x = setErrorMessage (Expected $ showToken tts)
|
||||
(newErrorMessage (Unexpected x) pos)
|
||||
walk [] is rs =
|
||||
let pos' = nextpos w pos tts
|
||||
s' = State rs pos' w
|
||||
in cok (reverse is) s' mempty
|
||||
walk (t:ts) is rs =
|
||||
let errorCont = if null is then eerr else cerr
|
||||
what = if null is then eoi else showToken $ reverse is
|
||||
what = if null is then eoi else r is
|
||||
in case uncons rs of
|
||||
Nothing -> errorCont . errExpect $ what
|
||||
Nothing -> errorCont (errExpect what) s
|
||||
Just (x,xs)
|
||||
| test t x -> walk ts (x:is) xs
|
||||
| otherwise -> errorCont . errExpect . showToken $ reverse (x:is)
|
||||
| otherwise -> errorCont (errExpect $ r (x:is)) s
|
||||
in walk tts [] input
|
||||
{-# INLINE pTokens #-}
|
||||
|
||||
@ -778,10 +795,10 @@ runParsecT :: Monad m
|
||||
-> State s -- ^ Initial state
|
||||
-> m (Reply s a)
|
||||
runParsecT p s = unParser p s cok cerr eok eerr
|
||||
where cok a s' _ = return $ Reply s' Consumed (OK a)
|
||||
cerr err = return $ Reply s Consumed (Error err)
|
||||
eok a s' _ = return $ Reply s' Virgin (OK a)
|
||||
eerr err = return $ Reply s Virgin (Error err)
|
||||
where cok a s' _ = return $ Reply s' Consumed (OK a)
|
||||
cerr err s' = return $ Reply s' Consumed (Error err)
|
||||
eok a s' _ = return $ Reply s' Virgin (OK a)
|
||||
eerr err s' = return $ Reply s' Virgin (Error err)
|
||||
|
||||
-- | @parseFromFile p filename@ runs parser @p@ on the input read from
|
||||
-- @filename@. Returns either a 'ParseError' ('Left') or a value of type @a@
|
||||
|
@ -51,7 +51,7 @@ import Test.Framework.Providers.QuickCheck2 (testProperty)
|
||||
import Test.QuickCheck hiding (label)
|
||||
|
||||
import Text.Megaparsec.Char
|
||||
import Text.Megaparsec.Error (Message (..), ParseError, newErrorMessages)
|
||||
import Text.Megaparsec.Error
|
||||
import Text.Megaparsec.Pos
|
||||
import Text.Megaparsec.Prim
|
||||
import Text.Megaparsec.String
|
||||
@ -107,6 +107,10 @@ tests = testGroup "Primitive parser combinators"
|
||||
, testProperty "parser state general" prop_state
|
||||
, testProperty "custom state parsing" prop_runParser'
|
||||
, testProperty "custom state parsing (transformer)" prop_runParserT'
|
||||
, testProperty "state on failure (mplus)" prop_stOnFail_0
|
||||
, testProperty "state on failure (tab)" prop_stOnFail_1
|
||||
, testProperty "state on failure (eof)" prop_stOnFail_2
|
||||
, testProperty "state on failure (notFollowedBy)" prop_stOnFail_3
|
||||
, testProperty "IdentityT try" prop_IdentityT_try
|
||||
, testProperty "IdentityT notFollowedBy" prop_IdentityT_notFollowedBy
|
||||
, testProperty "ReaderT try" prop_ReaderT_try
|
||||
@ -479,6 +483,44 @@ emulateStrParsing st@(State i pos t) s =
|
||||
in (st, Left $ newErrorMessages (exStr s : [uneStuff]) pos)
|
||||
where l = length $ takeWhile id $ zipWith (==) s i
|
||||
|
||||
-- Additional tests to check returned state on failure
|
||||
|
||||
prop_stOnFail_0 :: Positive Int -> Positive Int -> Property
|
||||
prop_stOnFail_0 na' nb' = runParser' p (stateFromInput s) === (i, r)
|
||||
where i = let (Left x) = r in State "" (errorPos x) defaultTabWidth
|
||||
na = getPositive na'
|
||||
nb = getPositive nb'
|
||||
p = try (many (char 'a') <* many (char 'b') <* char 'c')
|
||||
<|> (many (char 'a') <* char 'c')
|
||||
r = posErr (na + nb) s [exCh 'b', exCh 'c', uneEof]
|
||||
s = replicate na 'a' ++ replicate nb 'b'
|
||||
|
||||
prop_stOnFail_1 :: Positive Int -> Positive Int -> Property
|
||||
prop_stOnFail_1 na' t' = runParser' p (stateFromInput s) === (i, r)
|
||||
where i = let (Left x) = r in State "" (errorPos x) t
|
||||
na = getPositive na'
|
||||
t = getPositive t'
|
||||
p = many (char 'a') <* setTabWidth t <* fail myMsg
|
||||
r = posErr na s [msg myMsg]
|
||||
s = replicate na 'a'
|
||||
myMsg = "failing now!"
|
||||
|
||||
prop_stOnFail_2 :: String -> Char -> Property
|
||||
prop_stOnFail_2 s' ch = runParser' p (stateFromInput s) === (i, r)
|
||||
where i = let (Left x) = r in State [ch] (errorPos x) defaultTabWidth
|
||||
r = posErr (length s') s [uneCh ch, exEof]
|
||||
p = string s' <* eof
|
||||
s = s' ++ [ch]
|
||||
|
||||
prop_stOnFail_3 :: String -> Property
|
||||
prop_stOnFail_3 s = runParser' p (stateFromInput s) === (i, r)
|
||||
where i = let (Left x) = r in State s (errorPos x) defaultTabWidth
|
||||
r = posErr 0 s [if null s then uneEof else uneCh (head s)]
|
||||
p = notFollowedBy (string s)
|
||||
|
||||
stateFromInput :: Stream s t => s -> State s
|
||||
stateFromInput s = State s (initialPos "") defaultTabWidth
|
||||
|
||||
-- IdentityT instance of MonadParsec
|
||||
|
||||
prop_IdentityT_try :: String -> String -> String -> Property
|
||||
|
Loading…
Reference in New Issue
Block a user