megaparsec/Text/Megaparsec/Prim.hs
mrkkrp fce6c3187c eliminated user state and written ‘MonadParsec’
Close # 27.

Backtracking user state can be achieved via combination of ‘StateT’
monad transformer and ‘ParsecT’:

  StateT StateType (ParsecT s m a)

This user state can be more flexible. This fact renders current built-in
user state redundant.

To help work with this new approach (combining monad transformers more
freely) we introduce ‘MonadParsec’ MTL-style type class. All tools that
come with Megaparsec library were modified to work smoothly with any
instance of ‘MonadParsec’, not only ‘ParsecT’.
2015-09-18 15:33:44 +06:00

678 lines
23 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- |
-- Module : Text.Megaparsec.Prim
-- Copyright : © 2015 Megaparsec contributors
-- © 2007 Paolo Martini
-- © 19992001 Daan Leijen
-- License : BSD3
--
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
-- Stability : experimental
-- Portability : portable
--
-- The primitive parser combinators.
{-# OPTIONS_HADDOCK not-home #-}
module Text.Megaparsec.Prim
( -- * Used data-types
State (..)
, Stream (..)
, Consumed (..)
, Reply (..)
, Parsec
, ParsecT
-- * Primitive combinators
, MonadParsec (..)
-- * Parser state combinators
, getPosition
, setPosition
, getInput
, setInput
, setParserState
-- * Running parser
, runParser
, runParserT
, parse
, parse'
, parseTest )
where
import Data.Bool (bool)
import Data.Monoid
import Control.Monad
import Control.Monad.Cont.Class
import Control.Monad.Error.Class
import Control.Monad.Identity
import Control.Monad.Reader.Class
import Control.Monad.State.Class hiding (state)
import Control.Monad.Trans
import qualified Control.Applicative as A
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import Text.Megaparsec.Error
import Text.Megaparsec.Pos
import Text.Megaparsec.ShowToken
-- | This is Megaparsec state, it's parametrized over stream type @s@.
data State s = State
{ stateInput :: s
, statePos :: !SourcePos }
deriving (Show, Eq)
-- | An instance of @Stream s t@ has stream type @s@, and token type @t@
-- determined by the stream.
--
-- Some rough guidelines for a “correct” instance of Stream:
--
-- * @unfoldM uncons@ gives the @[t]@ corresponding to the stream.
-- * A @Stream@ instance is responsible for maintaining the “position
-- within the stream” in the stream state @s@. This is trivial unless
-- you are using the monad in a non-trivial way.
class (ShowToken t, ShowToken [t]) => Stream s t | s -> t where
uncons :: s -> Maybe (t, s)
instance (ShowToken t, ShowToken [t]) => Stream [t] t where
uncons [] = Nothing
uncons (t:ts) = Just (t, ts)
{-# INLINE uncons #-}
instance Stream B.ByteString Char where
uncons = B.uncons
{-# INLINE uncons #-}
instance Stream BL.ByteString Char where
uncons = BL.uncons
{-# INLINE uncons #-}
instance Stream T.Text Char where
uncons = T.uncons
{-# INLINE uncons #-}
instance Stream TL.Text Char where
uncons = TL.uncons
{-# INLINE uncons #-}
-- | This data structure represents an aspect of result of parser's
-- work. The two constructors have the following meaning:
--
-- * @Cosumed@ is a wrapper for result when some part of input stream
-- was consumed.
-- * @Empty@ is a wrapper for result when no input was consumed.
--
-- See also: 'Reply'.
data Consumed a = Consumed a | Empty !a
-- | This data structure represents an aspect of result of parser's
-- work. The two constructors have the following meaning:
--
-- * @Ok@ for successfully run parser.
-- * @Error@ for failed parser.
--
-- See also 'Consumed'.
data Reply s a = Ok a !(State s) | Error ParseError
-- | 'Hints' represent collection of strings to be included into 'ParserError'
-- as “expected” messages when a parser fails without consuming input right
-- after successful parser that produced the hints.
--
-- For example, without hints you could get:
--
-- >>> parseTest (many (char 'r') <* eof) "ra"
-- parse error at line 1, column 2:
-- unexpected 'a'
-- expecting end of input
--
-- we're getting better error messages with help of hints:
--
-- >>> parseTest (many (char 'r') <* eof) "ra"
-- parse error at line 1, column 2:
-- unexpected 'a'
-- expecting 'r' or end of input
newtype Hints = Hints [[String]] deriving Monoid
-- | Convert 'ParseError' record into 'Hints'.
toHints :: ParseError -> Hints
toHints err = Hints hints
where hints = if null msgs then [] else [messageString <$> msgs]
msgs = filter ((== 1) . fromEnum) $ errorMessages err
-- | @withHints hs c@ makes “error” continuation @c@ use given hints @hs@.
withHints :: Hints -> (ParseError -> m b) -> ParseError -> m b
withHints (Hints xs) c = c . addHints
where addHints err = foldr addErrorMessage err (Expected <$> concat xs)
-- | @accHints hs c@ results in “OK” continuation that will add given hints
-- @hs@ to third argument of original continuation @c@.
accHints :: Hints -> (a -> State s -> Hints -> m b) ->
a -> State s -> Hints -> m b
accHints hs1 c x s hs2 = c x s (hs1 <> hs2)
-- | Replace most recent group of hints (if any) with given string. Used in
-- 'label' combinator.
refreshLastHint :: Hints -> String -> Hints
refreshLastHint (Hints []) _ = Hints []
refreshLastHint (Hints (_:xs)) "" = Hints xs
refreshLastHint (Hints (_:xs)) l = Hints ([l]:xs)
-- If you're reading this, you may be interested in how Megaparsec works on
-- lower level. That's quite simple. 'ParsecT' is a wrapper around function
-- that takes five arguments:
--
-- * State. It includes input stream, position in input stream and
-- user's backtracking state.
--
-- * “Consumed-OK” continuation (cok). This is just a function that
-- takes three arguments: result of parsing, state after parsing, and
-- hints (see their description above). This continuation is called when
-- something has been consumed during parsing and result is OK (no error
-- occurred).
--
-- * “Consumed-error” continuation (cerr). This function is called when
-- some part of input stream has been consumed and parsing resulted in
-- an error. When error happens, parsing stops and we're only interested
-- in error message, so this continuation takes 'ParseError' as its only
-- argument.
--
-- * “Empty-OK” continuation (eok). The function takes the same
-- arguments as “consumed-OK” continuation. “Empty-OK” is called when no
-- input has been consumed and no error occurred.
--
-- * “Empty-error” continuation (eerr). The function is called when no
-- input has been consumed, but nonetheless parsing resulted in an
-- error. Just like “consumed-error”, the continuation take single
-- argument — 'ParseError' record.
--
-- You call specific continuation when you want to proceed in that specific
-- branch of control flow.
-- | @Parsec@ is non-transformer variant of more general @ParsecT@
-- monad-transformer.
type Parsec s = ParsecT s Identity
-- | @ParsecT s m a@ is a parser with stream type @s@, underlying monad @m@
-- and return type @a@.
newtype ParsecT s m a = ParsecT
{ unParser :: forall b. State s
-> (a -> State s -> Hints -> m b) -- consumed-OK
-> (ParseError -> m b) -- consumed-error
-> (a -> State s -> Hints -> m b) -- empty-OK
-> (ParseError -> m b) -- empty-error
-> m b }
instance Functor (ParsecT s m) where
fmap = pMap
pMap :: (a -> b) -> ParsecT s m a -> ParsecT s m b
pMap f p = ParsecT $ \s cok cerr eok eerr ->
unParser p s (cok . f) cerr (eok . f) eerr
{-# INLINE pMap #-}
instance A.Applicative (ParsecT s m) where
pure = return
(<*>) = ap
(*>) = (>>)
p1 <* p2 = do { x1 <- p1 ; void p2 ; return x1 }
instance A.Alternative (ParsecT s m) where
empty = mzero
(<|>) = mplus
many p = reverse <$> manyAcc p
manyAcc :: ParsecT s m a -> ParsecT s m [a]
manyAcc p = ParsecT $ \s cok cerr eok _ ->
let errToHints c err = c (toHints err)
walk xs x s' _ =
unParser p s'
(seq xs $ walk $ x:xs) -- consumed-OK
cerr -- consumed-error
manyErr -- empty-OK
(errToHints $ cok (x:xs) s') -- empty-error
in unParser p s (walk []) cerr manyErr (errToHints $ eok [] s)
manyErr :: a
manyErr = error
"Text.Megaparsec.Prim.many: combinator 'many' is applied to a parser \
\that accepts an empty string."
instance Monad (ParsecT s m) where
return = pReturn
(>>=) = pBind
fail = pFail
pReturn :: a -> ParsecT s m a
pReturn x = ParsecT $ \s _ _ eok _ -> eok x s mempty
{-# INLINE pReturn #-}
pBind :: ParsecT s m a -> (a -> ParsecT s m b) -> ParsecT s m b
pBind m k = ParsecT $ \s cok cerr eok eerr ->
let mcok x s' hs = unParser (k x) s' cok cerr
(accHints hs cok) (withHints hs cerr)
meok x s' hs = unParser (k x) s' cok cerr
(accHints hs eok) (withHints hs eerr)
in unParser m s mcok cerr meok eerr
{-# INLINE pBind #-}
pFail :: String -> ParsecT s m a
pFail msg = ParsecT $ \s _ _ _ eerr ->
eerr $ newErrorMessage (Message msg) (statePos s)
{-# INLINE pFail #-}
-- | Low-level creation of the ParsecT type.
mkPT :: Monad m => (State s -> m (Consumed (m (Reply s a)))) -> ParsecT s m a
mkPT k = ParsecT $ \s cok cerr eok eerr -> do
cons <- k s
case cons of
Consumed mrep -> do
rep <- mrep
case rep of
Ok x s' -> cok x s' mempty
Error err -> cerr err
Empty mrep -> do
rep <- mrep
case rep of
Ok x s' -> eok x s' mempty
Error err -> eerr err
instance MonadIO m => MonadIO (ParsecT s m) where
liftIO = lift . liftIO
instance MonadReader r m => MonadReader r (ParsecT s m) where
ask = lift ask
local f p = mkPT $ \s -> local f (runParsecT p s)
instance MonadState s m => MonadState s (ParsecT s' m) where
get = lift get
put = lift . put
instance MonadCont m => MonadCont (ParsecT s m) where
callCC f = mkPT $ \s ->
callCC $ \c ->
runParsecT (f (\a -> mkPT $ \s' -> c (pack s' a))) s
where pack s a = Empty $ return (Ok a s)
instance MonadError e m => MonadError e (ParsecT s m) where
throwError = lift . throwError
p `catchError` h = mkPT $ \s ->
runParsecT p s `catchError` \e ->
runParsecT (h e) s
instance MonadPlus (ParsecT s m) where
mzero = pZero
mplus = pPlus
pZero :: ParsecT s m a
pZero = ParsecT $ \(State _ pos) _ _ _ eerr -> eerr $ newErrorUnknown pos
pPlus :: ParsecT s m a -> ParsecT s m a -> ParsecT s m a
pPlus m n = ParsecT $ \s cok cerr eok eerr ->
let meerr err =
let ncerr err' = cerr (mergeError err' err)
neok x s' hs = eok x s' (toHints err <> hs)
neerr err' = eerr (mergeError err' err)
in unParser n s cok ncerr neok neerr
in unParser m s cok cerr eok meerr
{-# INLINE pPlus #-}
instance MonadTrans (ParsecT s) where
lift amb = ParsecT $ \s _ _ eok _ -> amb >>= \a -> eok a s mempty
-- Primitive combinators
infix 0 <?>
-- | Type class describing parsers independent of input type.
class (A.Alternative m, Monad m, Stream s t) =>
MonadParsec s m t | m -> s t where
-- | The parser @unexpected msg@ always fails with an unexpected error
-- message @msg@ without consuming any input.
--
-- The parsers 'fail', ('<?>') and @unexpected@ are the three parsers used
-- to generate error messages. Of these, only ('<?>') is commonly used.
unexpected :: String -> m a
-- | The parser @p \<?> msg@ behaves as parser @p@, but whenever the
-- parser @p@ fails /without consuming any input/, it replaces expect
-- error messages with the expect error message @msg@.
--
-- This is normally used at the end of a set alternatives where we want to
-- return an error message in terms of a higher level construct rather
-- than returning all possible characters. For example, if the @expr@
-- parser from the “try” example would fail, the error message is: “…:
-- expecting expression”. Without the @(\<?>)@ combinator, the message
-- would be like “…: expecting \"let\" or letter”, which is less friendly.
(<?>) :: m a -> String -> m a
(<?>) = flip label
-- | A synonym for @(\<?>)@, but as a function instead of an operator.
label :: String -> m a -> m a
label = flip (<?>)
-- | @hidden p@ behaves just like parser @p@, but it doesn't show any
-- “expected” tokens in error message when @p@ fails.
hidden :: m a -> m a
hidden = label ""
-- | The parser @try p@ behaves like parser @p@, except that it
-- pretends that it hasn't consumed any input when an error occurs.
--
-- This combinator is used whenever arbitrary look ahead is needed. Since
-- it pretends that it hasn't consumed any input when @p@ fails, the
-- ('A.<|>') combinator will try its second alternative even when the
-- first parser failed while consuming input.
--
-- For example, here is a parser that will /try/ (sorry for the pun) to
-- parse word “let” or “lexical”:
--
-- >>> parseTest (string "let" <|> string "lexical") "lexical"
-- parse error at line 1, column 1:
-- unexpected "lex"
-- expecting "let"
--
-- First parser consumed “le” and failed, @string "lexical"@ couldn't
-- succeed with “xical” as its input! Things get much better with help of
-- @try@:
--
-- >>> parseTest (try (string "let") <|> string "lexical") "lexical"
-- "lexical"
--
-- @try@ also improves error messages in case of overlapping alternatives,
-- because Megaparsec's hint system can be used:
--
-- >>> parseTest (try (string "let") <|> string "lexical") "le"
-- parse error at line 1, column 1:
-- unexpected "le"
-- expecting "let" or "lexical"
try :: m a -> m a
-- | @lookAhead p@ parses @p@ without consuming any input.
--
-- If @p@ fails and consumes some input, so does @lookAhead@. Combine with
-- 'try' if this is undesirable.
lookAhead :: m a -> m a
-- | @notFollowedBy p@ only succeeds when parser @p@ fails. This parser
-- does not consume any input and can be used to implement the “longest
-- match” rule.
notFollowedBy :: m a -> m ()
-- | This parser only succeeds at the end of the input.
eof :: m ()
-- | The parser @token nextPos testTok@ accepts a token @t@ with result
-- @x@ when the function @testTok t@ returns @'Just' x@. The position of
-- the /next/ token should be returned when @nextPos@ is called with the
-- current source position @pos@, the current token @t@ and the rest of
-- the tokens @toks@, @nextPos pos t toks@.
--
-- This is the most primitive combinator for accepting tokens. For
-- example, the 'Text.Megaparsec.Char.char' parser could be implemented
-- as:
--
-- > char c = token nextPos testChar
-- > where testChar x = if x == c then Just x else Nothing
-- > nextPos pos x xs = updatePosChar pos x
token :: (SourcePos -> t -> s -> SourcePos) -- ^ Next position calculating function.
-> (t -> Either [Message] a) -- ^ Matching function for the token to parse.
-> m a
-- | The parser @tokens posFromTok test@ parses list of tokens and returns
-- it. The resulting parser will use 'showToken' to pretty-print the
-- collection of tokens. Supplied predicate @test@ is used to check
-- equality of given and parsed tokens.
--
-- This can be used to example to write 'Text.Megaparsec.Char.string':
--
-- > string = tokens updatePosString (==)
tokens :: Eq t =>
(SourcePos -> [t] -> SourcePos) -- ^ Computes position of tokens.
-> (t -> t -> Bool) -- ^ Predicate to check equality of tokens.
-> [t] -- ^ List of tokens to parse
-> m [t]
-- | Returns the full parser state as a 'State' record.
getParserState :: m (State s)
-- | @updateParserState f@ applies function @f@ to the parser state.
updateParserState :: (State s -> State s) -> m ()
instance Stream s t => MonadParsec s (ParsecT s m) t where
unexpected = pUnexpected
label = pLabel
try = pTry
lookAhead = pLookAhead
notFollowedBy = pNotFollowedBy
eof = pEof
token = pToken
tokens = pTokens
getParserState = pGetParserState
updateParserState = pUpdateParserState
pUnexpected :: String -> ParsecT s m a
pUnexpected msg = ParsecT $ \(State _ pos) _ _ _ eerr ->
eerr $ newErrorMessage (Unexpected msg) pos
pLabel :: String -> ParsecT s m a -> ParsecT s m a
pLabel l p = ParsecT $ \s cok cerr eok eerr ->
let cok' x s' hs = cok x s' $ refreshLastHint hs l
eok' x s' hs = eok x s' $ refreshLastHint hs l
eerr' err = eerr $ setErrorMessage (Expected l) err
in unParser p s cok' cerr eok' eerr'
pTry :: ParsecT s m a -> ParsecT s m a
pTry p = ParsecT $ \s cok _ eok eerr -> unParser p s cok eerr eok eerr
{-# INLINE pTry #-}
pLookAhead :: ParsecT s m a -> ParsecT s m a
pLookAhead p = ParsecT $ \s _ cerr eok eerr ->
let eok' a _ _ = eok a s mempty
in unParser p s eok' cerr eok' eerr
{-# INLINE pLookAhead #-}
pNotFollowedBy :: Stream s t => ParsecT s m a -> ParsecT s m ()
pNotFollowedBy p = ParsecT $ \s@(State input pos) _ _ eok eerr ->
let l = maybe eoi (showToken . fst) (uncons input)
cok' _ _ _ = eerr $ unexpectedErr l pos
cerr' _ = eok () s mempty
eok' _ _ _ = eerr $ unexpectedErr l pos
eerr' _ = eok () s mempty
in unParser p s cok' cerr' eok' eerr'
pEof :: Stream s t => ParsecT s m ()
pEof = label eoi $ ParsecT $ \s@(State input pos) _ _ eok eerr ->
case uncons input of
Nothing -> eok () s mempty
Just (x,_) -> eerr $ unexpectedErr (showToken x) pos
{-# INLINE pEof #-}
pToken :: Stream s t =>
(SourcePos -> t -> s -> SourcePos)
-> (t -> Either [Message] a)
-> ParsecT s m a
pToken nextpos test = ParsecT $ \(State input pos) cok _ _ eerr ->
case uncons input of
Nothing -> eerr $ unexpectedErr eoi pos
Just (c,cs) ->
case test c of
Left ms -> eerr $ foldr addErrorMessage (newErrorUnknown pos) ms
Right x -> let newpos = nextpos pos c cs
newstate = State cs newpos
in seq newpos $ seq newstate $ cok x newstate mempty
{-# INLINE pToken #-}
pTokens :: Stream s t =>
(SourcePos -> [t] -> SourcePos)
-> (t -> t -> Bool)
-> [t]
-> ParsecT s m [t]
pTokens _ _ [] = ParsecT $ \s _ _ eok _ -> eok [] s mempty
pTokens nextpos test tts = ParsecT $ \(State input pos) cok cerr _ eerr ->
let errExpect x = setErrorMessage (Expected $ showToken tts)
(newErrorMessage (Unexpected x) pos)
walk [] _ rs = let pos' = nextpos pos tts
s' = State rs pos'
in cok tts s' mempty
walk (t:ts) is rs =
let errorCont = if null is then eerr else cerr
what = bool (showToken $ reverse is) "end of input" (null is)
in case uncons rs of
Nothing -> errorCont . errExpect $ what
Just (x,xs)
| test t x -> walk ts (x:is) xs
| otherwise -> errorCont . errExpect . showToken $ reverse (x:is)
in walk tts [] input
{-# INLINE pTokens #-}
pGetParserState :: ParsecT s m (State s)
pGetParserState = ParsecT $ \s _ _ eok _ -> eok s s mempty
{-# INLINE pGetParserState #-}
pUpdateParserState :: (State s -> State s) -> ParsecT s m ()
pUpdateParserState f = ParsecT $ \s _ _ eok _ -> eok () (f s) mempty
{-# INLINE pUpdateParserState #-}
unexpectedErr :: String -> SourcePos -> ParseError
unexpectedErr msg = newErrorMessage (Unexpected msg)
eoi :: String
eoi = "end of input"
-- Parser state combinators
-- | Returns the current source position. See also 'SourcePos'.
getPosition :: MonadParsec s m t => m SourcePos
getPosition = statePos <$> getParserState
-- | @setPosition pos@ sets the current source position to @pos@.
setPosition :: MonadParsec s m t => SourcePos -> m ()
setPosition pos = updateParserState (\(State s _) -> State s pos)
-- | Returns the current input.
getInput :: MonadParsec s m t => m s
getInput = stateInput <$> getParserState
-- | @setInput input@ continues parsing with @input@. The 'getInput' and
-- @setInput@ functions can for example be used to deal with #include files.
setInput :: MonadParsec s m t => s -> m ()
setInput s = updateParserState (\(State _ pos) -> State s pos)
-- | @setParserState st@ set the full parser state to @st@.
setParserState :: MonadParsec s m t => State s -> m ()
setParserState st = updateParserState (const st)
-- Running a parser
-- | @parse p file input@ runs a parser @p@ over 'Identity'. The
-- @file@ is only used in error messages and may be the empty
-- string. Returns either a 'ParseError' ('Left') or a value of type @a@
-- ('Right'). This is a synonym for 'runParser'.
--
-- > main = case (parse numbers "" "11, 2, 43") of
-- > Left err -> print err
-- > Right xs -> print (sum xs)
-- >
-- > numbers = commaSep integer
parse :: Stream s t => Parsec s a -> SourceName -> s -> Either ParseError a
parse = runParser
-- | @parse' p input@ runs parser @p@ on @input@ and returns result
-- inside 'Just' on success and 'Nothing' on failure. This function also
-- parses 'eof', so all input should be consumed by the parser @p@.
--
-- The function is supposed to be useful for lightweight parsing, where
-- error messages (and thus file name) are not important and entire input
-- should be parsed. For example it can be used when parsing of single
-- number according to specification of its format is desired.
parse' :: Stream s t => Parsec s a -> s -> Maybe a
parse' p s =
case parse (p <* eof) "" s of
Left _ -> Nothing
Right x -> Just x
-- | The expression @parseTest p input@ applies a parser @p@ against
-- input @input@ and prints the result to stdout. Used for testing.
parseTest :: (Stream s t, Show a) => Parsec s a -> s -> IO ()
parseTest p input =
case parse p "" input of
Left err -> putStr "parse error at " >> print err
Right x -> print x
-- | The most general way to run a parser over the 'Identity' monad.
-- @runParser p file input@ runs parser @p@ on the input list of tokens
-- @input@, obtained from source @file@. The @file@ is only used in error
-- messages and may be the empty string. Returns either a 'ParseError'
-- ('Left') or a value of type @a@ ('Right').
--
-- > parseFromFile p file = runParser p file <$> readFile file
runParser :: Stream s t => Parsec s a -> SourceName -> s -> Either ParseError a
runParser p name s = runIdentity $ runParserT p name s
-- | The most general way to run a parser. @runParserT p file input@ runs
-- parser @p@ on the input list of tokens @input@, obtained from source
-- @file@. The @file@ is only used in error messages and may be the empty
-- string. Returns a computation in the underlying monad @m@ that return
-- either a 'ParseError' ('Left') or a value of type @a@ ('Right').
runParserT :: (Monad m, Stream s t) =>
ParsecT s m a -> SourceName -> s -> m (Either ParseError a)
runParserT p name s = do
res <- runParsecT p $ State s (initialPos name)
r <- parserReply res
case r of
Ok x _ -> return $ Right x
Error err -> return $ Left err
where parserReply res =
case res of
Consumed r -> r
Empty r -> r
-- | Low-level unpacking of the ParsecT type. 'runParserT' and 'runParser'
-- are built upon this.
runParsecT :: Monad m => ParsecT s m a -> State s -> m (Consumed (m (Reply s a)))
runParsecT p s = unParser p s cok cerr eok eerr
where cok a s' _ = return . Consumed . return $ Ok a s'
cerr err = return . Consumed . return $ Error err
eok a s' _ = return . Empty . return $ Ok a s'
eerr err = return . Empty . return $ Error err