2008-01-13 20:53:15 +03:00
-----------------------------------------------------------------------------
-- |
-- Module : Text.Parsec.Prim
-- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007
-- License : BSD-style (see the LICENSE file)
--
2008-01-20 07:44:41 +03:00
-- Maintainer : derek.a.elkins@gmail.com
2008-01-13 20:53:15 +03:00
-- Stability : provisional
-- Portability : portable
--
-- The primitive parser combinators.
--
-----------------------------------------------------------------------------
2008-02-05 08:45:50 +03:00
{- # LANGUAGE MultiParamTypeClasses, FunctionalDependencies, FlexibleContexts,
2014-09-25 04:43:08 +04:00
UndecidableInstances #- }
2014-03-24 05:08:26 +04:00
{- # OPTIONS_HADDOCK not - home # -}
2008-01-13 20:53:15 +03:00
2010-03-04 06:03:44 +03:00
module Text.Parsec.Prim
( unknownError
, sysUnExpectError
, unexpected
, ParsecT
, runParsecT
, mkPT
, Parsec
, Consumed ( .. )
, Reply ( .. )
, State ( .. )
, parsecMap
, parserReturn
, parserBind
, mergeErrorReply
, parserFail
, parserZero
, parserPlus
, ( <?> )
, ( <|> )
, label
, labels
2011-02-20 19:29:20 +03:00
, lookAhead
2010-03-04 06:03:44 +03:00
, Stream ( .. )
, tokens
, try
, token
, tokenPrim
, tokenPrimEx
, many
, skipMany
, manyAccum
, runPT
, runP
, runParserT
, runParser
, parse
, parseTest
, getPosition
, getInput
, setPosition
, setInput
, getParserState
, setParserState
, updateParserState
, getState
, putState
, modifyState
, setState
, updateState
) where
2008-01-13 20:53:15 +03:00
2014-04-09 23:01:24 +04:00
import qualified Data.ByteString.Lazy.Char8 as CL
import qualified Data.ByteString.Char8 as C
import qualified Data.Text as Text
import qualified Data.Text.Lazy as TextL
2008-01-20 01:06:33 +03:00
import qualified Control.Applicative as Applicative ( Applicative ( .. ) , Alternative ( .. ) )
2008-02-13 07:32:24 +03:00
import Control.Monad ()
2008-01-13 20:53:15 +03:00
import Control.Monad.Trans
import Control.Monad.Identity
2008-02-05 08:45:50 +03:00
import Control.Monad.Reader.Class
import Control.Monad.State.Class
import Control.Monad.Cont.Class
import Control.Monad.Error.Class
2008-01-13 20:53:15 +03:00
import Text.Parsec.Pos
import Text.Parsec.Error
2008-01-20 01:06:33 +03:00
2008-02-13 07:32:24 +03:00
unknownError :: State s u -> ParseError
2008-01-13 20:53:15 +03:00
unknownError state = newErrorUnknown ( statePos state )
2008-01-20 01:06:33 +03:00
2008-02-13 07:32:24 +03:00
sysUnExpectError :: String -> SourcePos -> Reply s u a
2008-01-13 20:53:15 +03:00
sysUnExpectError msg pos = Error ( newErrorMessage ( SysUnExpect msg ) pos )
2008-01-20 01:06:33 +03:00
2008-01-22 04:37:52 +03:00
-- | The parser @unexpected msg@ always fails with an unexpected error
-- message @msg@ without consuming any input.
--
-- The parsers 'fail', ('<?>') and @unexpected@ are the three parsers
-- used to generate error messages. Of these, only ('<?>') is commonly
-- used. For an example of the use of @unexpected@, see the definition
-- of 'Text.Parsec.Combinator.notFollowedBy'.
2008-01-13 20:53:15 +03:00
unexpected :: ( Stream s m t ) => String -> ParsecT s u m a
unexpected msg
2009-03-02 03:20:00 +03:00
= ParsecT $ \ s _ _ _ eerr ->
eerr $ newErrorMessage ( UnExpect msg ) ( statePos s )
2008-01-13 20:53:15 +03:00
-- | ParserT monad transformer and Parser type
2008-01-22 04:37:52 +03:00
-- | @ParsecT s u m a@ is a parser with stream type @s@, user state type @u@,
2009-02-15 01:28:22 +03:00
-- underlying monad @m@ and return type @a@. Parsec is strict in the user state.
-- If this is undesirable, simply used a data type like @data Box a = Box a@ and
-- the state type @Box YourStateType@ to add a level of indirection.
2008-01-22 04:37:52 +03:00
2009-03-02 03:20:00 +03:00
newtype ParsecT s u m a
= ParsecT { unParser :: forall b .
State s u
-> ( a -> State s u -> ParseError -> m b ) -- consumed ok
-> ( ParseError -> m b ) -- consumed err
-> ( a -> State s u -> ParseError -> m b ) -- empty ok
-> ( ParseError -> m b ) -- empty err
-> m b
}
2010-03-04 06:03:27 +03:00
-- | Low-level unpacking of the ParsecT type. To run your parser, please look to
-- runPT, runP, runParserT, runParser and other such functions.
2009-03-02 03:20:00 +03:00
runParsecT :: Monad m => ParsecT s u m a -> State s u -> m ( Consumed ( m ( Reply s u a ) ) )
runParsecT p s = unParser p s cok cerr eok eerr
where cok a s' err = return . Consumed . return $ Ok a s' err
cerr err = return . Consumed . return $ Error err
eok a s' err = return . Empty . return $ Ok a s' err
eerr err = return . Empty . return $ Error err
2010-03-04 06:03:27 +03:00
-- | Low-level creation of the ParsecT type. You really shouldn't have to do this.
2009-03-02 03:20:00 +03:00
mkPT :: Monad m => ( State s u -> m ( Consumed ( m ( Reply s u a ) ) ) ) -> ParsecT s u m a
mkPT k = ParsecT $ \ s cok cerr eok eerr -> do
cons <- k s
case cons of
Consumed mrep -> do
rep <- mrep
case rep of
Ok x s' err -> cok x s' err
Error err -> cerr err
Empty mrep -> do
rep <- mrep
case rep of
Ok x s' err -> eok x s' err
Error err -> eerr err
2008-01-13 20:53:15 +03:00
2008-02-17 06:53:24 +03:00
type Parsec s u = ParsecT s u Identity
2008-01-13 20:53:15 +03:00
data Consumed a = Consumed a
| Empty ! a
2008-01-22 04:37:52 +03:00
2009-02-15 01:28:22 +03:00
data Reply s u a = Ok a ! ( State s u ) ParseError
2008-01-13 20:53:15 +03:00
| Error ParseError
data State s u = State {
stateInput :: s ,
statePos :: ! SourcePos ,
stateUser :: ! u
}
instance Functor Consumed where
fmap f ( Consumed x ) = Consumed ( f x )
fmap f ( Empty x ) = Empty ( f x )
instance Functor ( Reply s u ) where
fmap f ( Ok x s e ) = Ok ( f x ) s e
2008-02-13 07:32:24 +03:00
fmap _ ( Error e ) = Error e -- XXX
2008-01-13 20:53:15 +03:00
2009-03-02 03:20:00 +03:00
instance Functor ( ParsecT s u m ) where
2008-01-13 20:53:15 +03:00
fmap f p = parsecMap f p
2009-03-02 03:20:00 +03:00
parsecMap :: ( a -> b ) -> ParsecT s u m a -> ParsecT s u m b
2008-01-13 20:53:15 +03:00
parsecMap f p
2009-03-02 03:20:00 +03:00
= ParsecT $ \ s cok cerr eok eerr ->
unParser p s ( cok . f ) cerr ( eok . f ) eerr
2008-01-13 20:53:15 +03:00
2009-03-02 03:20:00 +03:00
instance Applicative . Applicative ( ParsecT s u m ) where
2008-01-14 02:54:41 +03:00
pure = return
2008-01-20 01:06:33 +03:00
( <*> ) = ap -- TODO: Can this be optimized?
2009-03-02 03:20:00 +03:00
instance Applicative . Alternative ( ParsecT s u m ) where
2008-01-20 01:06:33 +03:00
empty = mzero
( <|> ) = mplus
2008-01-13 20:53:15 +03:00
2009-03-02 03:20:00 +03:00
instance Monad ( ParsecT s u m ) where
2008-01-13 20:53:15 +03:00
return x = parserReturn x
p >>= f = parserBind p f
fail msg = parserFail msg
2008-02-05 08:45:50 +03:00
instance ( MonadIO m ) => MonadIO ( ParsecT s u m ) where
liftIO = lift . liftIO
instance ( MonadReader r m ) => MonadReader r ( ParsecT s u m ) where
ask = lift ask
2009-03-02 03:20:00 +03:00
local f p = mkPT $ \ s -> local f ( runParsecT p s )
2008-02-05 08:45:50 +03:00
-- I'm presuming the user might want a separate, non-backtracking
-- state aside from the Parsec user state.
instance ( MonadState s m ) => MonadState s ( ParsecT s' u m ) where
get = lift get
put = lift . put
instance ( MonadCont m ) => MonadCont ( ParsecT s u m ) where
2009-03-02 03:20:00 +03:00
callCC f = mkPT $ \ s ->
2008-02-05 08:45:50 +03:00
callCC $ \ c ->
2009-03-02 03:20:00 +03:00
runParsecT ( f ( \ a -> mkPT $ \ s' -> c ( pack s' a ) ) ) s
2008-02-05 08:45:50 +03:00
where pack s a = Empty $ return ( Ok a s ( unknownError s ) )
instance ( MonadError e m ) => MonadError e ( ParsecT s u m ) where
throwError = lift . throwError
2009-03-02 03:20:00 +03:00
p ` catchError ` h = mkPT $ \ s ->
2008-02-05 08:45:50 +03:00
runParsecT p s ` catchError ` \ e ->
runParsecT ( h e ) s
2009-03-02 03:20:00 +03:00
parserReturn :: a -> ParsecT s u m a
2008-01-13 20:53:15 +03:00
parserReturn x
2009-03-02 03:20:00 +03:00
= ParsecT $ \ s _ _ eok _ ->
eok x s ( unknownError s )
parserBind :: ParsecT s u m a -> ( a -> ParsecT s u m b ) -> ParsecT s u m b
{- # INLINE parserBind # -}
parserBind m k
= ParsecT $ \ s cok cerr eok eerr ->
let
-- consumed-okay case for m
mcok x s err =
let
-- if (k x) consumes, those go straigt up
pcok = cok
pcerr = cerr
-- if (k x) doesn't consume input, but is okay,
-- we still return in the consumed continuation
2010-10-04 02:55:54 +04:00
peok x s err' = cok x s ( mergeError err err' )
2009-03-02 03:20:00 +03:00
-- if (k x) doesn't consume input, but errors,
-- we return the error in the 'consumed-error'
-- continuation
peerr err' = cerr ( mergeError err err' )
in unParser ( k x ) s pcok pcerr peok peerr
-- empty-ok case for m
meok x s err =
let
-- in these cases, (k x) can return as empty
pcok = cok
2010-10-04 02:55:54 +04:00
peok x s err' = eok x s ( mergeError err err' )
2009-03-02 03:20:00 +03:00
pcerr = cerr
peerr err' = eerr ( mergeError err err' )
in unParser ( k x ) s pcok pcerr peok peerr
-- consumed-error case for m
mcerr = cerr
-- empty-error case for m
meerr = eerr
in unParser m s mcok mcerr meok meerr
2008-01-13 20:53:15 +03:00
2008-02-13 07:32:24 +03:00
mergeErrorReply :: ParseError -> Reply s u a -> Reply s u a
2008-01-13 20:53:15 +03:00
mergeErrorReply err1 reply -- XXX where to put it?
= case reply of
Ok x state err2 -> Ok x state ( mergeError err1 err2 )
Error err2 -> Error ( mergeError err1 err2 )
2009-03-02 03:20:00 +03:00
parserFail :: String -> ParsecT s u m a
2008-01-13 20:53:15 +03:00
parserFail msg
2009-03-02 03:20:00 +03:00
= ParsecT $ \ s _ _ _ eerr ->
eerr $ newErrorMessage ( Message msg ) ( statePos s )
2008-01-13 20:53:15 +03:00
2009-03-02 03:20:00 +03:00
instance MonadPlus ( ParsecT s u m ) where
2008-01-13 20:53:15 +03:00
mzero = parserZero
mplus p1 p2 = parserPlus p1 p2
2008-01-22 04:37:52 +03:00
-- | @parserZero@ always fails without consuming any input. @parserZero@ is defined
-- equal to the 'mzero' member of the 'MonadPlus' class and to the 'Control.Applicative.empty' member
2015-01-24 09:59:22 +03:00
-- of the 'Control.Applicative.Alternative' class.
2008-01-22 04:37:52 +03:00
2009-03-02 03:20:00 +03:00
parserZero :: ParsecT s u m a
2008-01-13 20:53:15 +03:00
parserZero
2009-03-02 03:20:00 +03:00
= ParsecT $ \ s _ _ _ eerr ->
eerr $ unknownError s
parserPlus :: ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
{- # INLINE parserPlus # -}
parserPlus m n
= ParsecT $ \ s cok cerr eok eerr ->
let
meerr err =
let
neok y s' err' = eok y s' ( mergeError err err' )
neerr err' = eerr $ mergeError err err'
in unParser n s cok cerr neok neerr
in unParser m s cok cerr eok meerr
2008-01-13 20:53:15 +03:00
instance MonadTrans ( ParsecT s u ) where
2009-03-02 03:20:00 +03:00
lift amb = ParsecT $ \ s _ _ eok _ -> do
a <- amb
eok a s $ unknownError s
2008-01-13 20:53:15 +03:00
infix 0 <?>
infixr 1 <|>
2014-03-24 05:06:48 +04:00
-- | The parser @p \<?> msg@ behaves as parser @p@, but whenever the
2008-01-22 04:37:52 +03:00
-- parser @p@ fails /without consuming any input/, it replaces expect
-- error messages with the expect error message @msg@.
--
-- This is normally used at the end of a set alternatives where we want
-- to return an error message in terms of a higher level construct
-- rather than returning all possible characters. For example, if the
-- @expr@ parser from the 'try' example would fail, the error
-- message is: '...: expecting expression'. Without the @(\<?>)@
-- combinator, the message would be like '...: expecting \"let\" or
-- letter', which is less friendly.
2009-03-02 03:20:00 +03:00
( <?> ) :: ( ParsecT s u m a ) -> String -> ( ParsecT s u m a )
2008-01-13 20:53:15 +03:00
p <?> msg = label p msg
2008-01-22 04:37:52 +03:00
-- | This combinator implements choice. The parser @p \<|> q@ first
-- applies @p@. If it succeeds, the value of @p@ is returned. If @p@
-- fails /without consuming any input/, parser @q@ is tried. This
-- combinator is defined equal to the 'mplus' member of the 'MonadPlus'
-- class and the ('Control.Applicative.<|>') member of 'Control.Applicative.Alternative'.
--
-- The parser is called /predictive/ since @q@ is only tried when
-- parser @p@ didn't consume any input (i.e.. the look ahead is 1).
-- This non-backtracking behaviour allows for both an efficient
-- implementation of the parser combinators and the generation of good
-- error messages.
2009-03-02 03:20:00 +03:00
( <|> ) :: ( ParsecT s u m a ) -> ( ParsecT s u m a ) -> ( ParsecT s u m a )
2008-01-13 20:53:15 +03:00
p1 <|> p2 = mplus p1 p2
2014-10-29 20:31:48 +03:00
-- | A synonym for @\<?>@, but as a function instead of an operator.
2009-03-02 03:20:00 +03:00
label :: ParsecT s u m a -> String -> ParsecT s u m a
2008-01-13 20:53:15 +03:00
label p msg
= labels p [ msg ]
2009-03-02 03:20:00 +03:00
labels :: ParsecT s u m a -> [ String ] -> ParsecT s u m a
labels p msgs =
ParsecT $ \ s cok cerr eok eerr ->
let eok' x s' error = eok x s' $ if errorIsUnknown error
then error
else setExpectErrors error msgs
eerr' err = eerr $ setExpectErrors err msgs
in unParser p s cok cerr eok' eerr'
where
setExpectErrors err [] = setErrorMessage ( Expect " " ) err
setExpectErrors err [ msg ] = setErrorMessage ( Expect msg ) err
setExpectErrors err ( msg : msgs )
= foldr ( \ msg' err' -> addErrorMessage ( Expect msg' ) err' )
( setErrorMessage ( Expect msg ) err ) msgs
2008-01-13 20:53:15 +03:00
2008-03-06 04:17:54 +03:00
-- TODO: There should be a stronger statement that can be made about this
2008-01-22 04:37:52 +03:00
-- | An instance of @Stream@ has stream type @s@, underlying monad @m@ and token type @t@ determined by the stream
2008-03-06 04:17:54 +03:00
--
-- Some rough guidelines for a \"correct\" instance of Stream:
--
-- * unfoldM uncons gives the [t] corresponding to the stream
2008-03-06 04:25:31 +03:00
--
-- * A @Stream@ instance is responsible for maintaining the \"position within the stream\" in the stream state @s@. This is trivial unless you are using the monad in a non-trivial way.
2008-03-06 04:17:54 +03:00
2008-01-13 20:53:15 +03:00
class ( Monad m ) => Stream s m t | s -> t where
uncons :: s -> m ( Maybe ( t , s ) )
2014-03-24 18:53:21 +04:00
instance ( Monad m ) => Stream [ tok ] m tok where
uncons [] = return $ Nothing
uncons ( t : ts ) = return $ Just ( t , ts )
{- # INLINE uncons # -}
2014-04-09 23:01:24 +04:00
instance ( Monad m ) => Stream CL . ByteString m Char where
uncons = return . CL . uncons
instance ( Monad m ) => Stream C . ByteString m Char where
uncons = return . C . uncons
instance ( Monad m ) => Stream Text . Text m Char where
uncons = return . Text . uncons
{- # INLINE uncons # -}
instance ( Monad m ) => Stream TextL . Text m Char where
uncons = return . TextL . uncons
{- # INLINE uncons # -}
2014-09-25 04:43:08 +04:00
tokens :: ( Stream s m t , Eq t )
2008-01-13 20:53:15 +03:00
=> ( [ t ] -> String ) -- Pretty print a list of tokens
-> ( SourcePos -> [ t ] -> SourcePos )
-> [ t ] -- List of tokens to parse
-> ParsecT s u m [ t ]
2009-03-02 03:20:00 +03:00
{- # INLINE tokens # -}
2008-01-13 20:53:15 +03:00
tokens _ _ []
2009-03-02 03:20:00 +03:00
= ParsecT $ \ s _ _ eok _ ->
eok [] s $ unknownError s
2008-02-13 07:32:24 +03:00
tokens showTokens nextposs tts @ ( tok : toks )
2014-09-25 04:43:08 +04:00
= ParsecT $ \ ( State input pos u ) cok cerr eok eerr ->
2008-01-13 20:53:15 +03:00
let
2014-09-25 04:43:08 +04:00
errEof = ( setErrorMessage ( Expect ( showTokens tts ) )
( newErrorMessage ( SysUnExpect " " ) pos ) )
2009-03-02 03:20:00 +03:00
2014-09-25 04:43:08 +04:00
errExpect x = ( setErrorMessage ( Expect ( showTokens tts ) )
( newErrorMessage ( SysUnExpect ( showTokens [ x ] ) ) pos ) )
2009-03-02 03:20:00 +03:00
2014-09-25 04:43:08 +04:00
walk [] rs = ok rs
walk ( t : ts ) rs = do
2008-01-13 20:53:15 +03:00
sr <- uncons rs
case sr of
2014-09-25 04:43:08 +04:00
Nothing -> cerr $ errEof
Just ( x , xs ) | t == x -> walk ts xs
| otherwise -> cerr $ errExpect x
2009-03-02 03:20:00 +03:00
2014-09-25 04:43:08 +04:00
ok rs = let pos' = nextposs pos tts
s' = State rs pos' u
in cok tts s' ( newErrorUnknown pos' )
2008-01-13 20:53:15 +03:00
in do
sr <- uncons input
2009-03-02 03:20:00 +03:00
case sr of
2014-09-25 04:43:08 +04:00
Nothing -> eerr $ errEof
2008-01-13 20:53:15 +03:00
Just ( x , xs )
2014-09-25 04:43:08 +04:00
| tok == x -> walk toks xs
| otherwise -> eerr $ errExpect x
2008-01-13 20:53:15 +03:00
2008-01-22 04:37:52 +03:00
-- | The parser @try p@ behaves like parser @p@, except that it
-- pretends that it hasn't consumed any input when an error occurs.
--
-- This combinator is used whenever arbitrary look ahead is needed.
-- Since it pretends that it hasn't consumed any input when @p@ fails,
-- the ('<|>') combinator will try its second alternative even when the
-- first parser failed while consuming input.
--
-- The @try@ combinator can for example be used to distinguish
-- identifiers and reserved words. Both reserved words and identifiers
-- are a sequence of letters. Whenever we expect a certain reserved
-- word where we can also expect an identifier we have to use the @try@
-- combinator. Suppose we write:
--
-- > expr = letExpr <|> identifier <?> "expression"
-- >
-- > letExpr = do{ string "let"; ... }
-- > identifier = many1 letter
--
-- If the user writes \"lexical\", the parser fails with: @unexpected
-- \'x\', expecting \'t\' in \"let\"@. Indeed, since the ('<|>') combinator
-- only tries alternatives when the first alternative hasn't consumed
-- input, the @identifier@ parser is never tried (because the prefix
-- \"le\" of the @string \"let\"@ parser is already consumed). The
-- right behaviour can be obtained by adding the @try@ combinator:
--
-- > expr = letExpr <|> identifier <?> "expression"
-- >
-- > letExpr = do{ try (string "let"); ... }
-- > identifier = many1 letter
2009-03-02 03:20:00 +03:00
try :: ParsecT s u m a -> ParsecT s u m a
try p =
2011-02-20 19:24:49 +03:00
ParsecT $ \ s cok _ eok eerr ->
unParser p s cok eerr eok eerr
2008-01-13 20:53:15 +03:00
2011-02-20 19:29:20 +03:00
-- | @lookAhead p@ parses @p@ without consuming any input.
--
-- If @p@ fails and consumes some input, so does @lookAhead@. Combine with 'try'
-- if this is undesirable.
lookAhead :: ( Stream s m t ) => ParsecT s u m a -> ParsecT s u m a
2014-09-10 17:23:34 +04:00
lookAhead p =
ParsecT $ \ s _ cerr eok eerr -> do
let eok' a _ _ = eok a s ( newErrorUnknown ( statePos s ) )
unParser p s eok' cerr eok' eerr
2011-02-20 19:29:20 +03:00
2011-12-29 02:29:53 +04:00
-- | The parser @token showTok posFromTok testTok@ accepts a token @t@
2008-01-22 04:37:52 +03:00
-- with result @x@ when the function @testTok t@ returns @'Just' x@. The
-- source position of the @t@ should be returned by @posFromTok t@ and
-- the token can be shown using @showTok t@.
--
-- This combinator is expressed in terms of 'tokenPrim'.
-- It is used to accept user defined token streams. For example,
-- suppose that we have a stream of basic tokens tupled with source
-- positions. We can than define a parser that accepts single tokens as:
--
-- > mytoken x
-- > = token showTok posFromTok testTok
-- > where
-- > showTok (pos,t) = show t
-- > posFromTok (pos,t) = pos
-- > testTok (pos,t) = if x == t then Just t else Nothing
2008-01-13 20:53:15 +03:00
token :: ( Stream s Identity t )
2008-01-22 04:37:52 +03:00
=> ( t -> String ) -- ^ Token pretty-printing function.
-> ( t -> SourcePos ) -- ^ Computes the position of a token.
-> ( t -> Maybe a ) -- ^ Matching function for the token to parse.
2008-01-13 20:53:15 +03:00
-> Parsec s u a
2008-02-13 07:32:24 +03:00
token showToken tokpos test = tokenPrim showToken nextpos test
2008-01-13 20:53:15 +03:00
where
nextpos _ tok ts = case runIdentity ( uncons ts ) of
Nothing -> tokpos tok
Just ( tok' , _ ) -> tokpos tok'
2011-12-29 02:29:53 +04:00
-- | The parser @tokenPrim showTok nextPos testTok@ accepts a token @t@
2008-01-22 04:37:52 +03:00
-- with result @x@ when the function @testTok t@ returns @'Just' x@. The
-- token can be shown using @showTok t@. The position of the /next/
-- token should be returned when @nextPos@ is called with the current
-- source position @pos@, the current token @t@ and the rest of the
-- tokens @toks@, @nextPos pos t toks@.
--
-- This is the most primitive combinator for accepting tokens. For
-- example, the 'Text.Parsec.Char.char' parser could be implemented as:
--
-- > char c
-- > = tokenPrim showChar nextPos testChar
-- > where
-- > showChar x = "'" ++ x ++ "'"
-- > testChar x = if x == c then Just x else Nothing
-- > nextPos pos x xs = updatePosChar pos x
2008-01-13 20:53:15 +03:00
tokenPrim :: ( Stream s m t )
2008-01-22 04:37:52 +03:00
=> ( t -> String ) -- ^ Token pretty-printing function.
-> ( SourcePos -> t -> s -> SourcePos ) -- ^ Next position calculating function.
-> ( t -> Maybe a ) -- ^ Matching function for the token to parse.
2008-01-13 20:53:15 +03:00
-> ParsecT s u m a
2009-03-02 03:20:00 +03:00
{- # INLINE tokenPrim # -}
2008-02-13 07:32:24 +03:00
tokenPrim showToken nextpos test = tokenPrimEx showToken nextpos Nothing test
2008-01-13 20:53:15 +03:00
tokenPrimEx :: ( Stream s m t )
2008-01-22 08:13:24 +03:00
=> ( t -> String )
2008-01-13 20:53:15 +03:00
-> ( SourcePos -> t -> s -> SourcePos )
-> Maybe ( SourcePos -> t -> s -> u -> u )
2008-01-22 08:13:24 +03:00
-> ( t -> Maybe a )
2008-01-13 20:53:15 +03:00
-> ParsecT s u m a
2009-03-02 03:20:00 +03:00
{- # INLINE tokenPrimEx # -}
tokenPrimEx showToken nextpos Nothing test
= ParsecT $ \ ( State input pos user ) cok cerr eok eerr -> do
r <- uncons input
case r of
Nothing -> eerr $ unexpectError " " pos
Just ( c , cs )
-> case test c of
Just x -> let newpos = nextpos pos c cs
newstate = State cs newpos user
in seq newpos $ seq newstate $
cok x newstate ( newErrorUnknown newpos )
Nothing -> eerr $ unexpectError ( showToken c ) pos
tokenPrimEx showToken nextpos ( Just nextState ) test
= ParsecT $ \ ( State input pos user ) cok cerr eok eerr -> do
r <- uncons input
case r of
Nothing -> eerr $ unexpectError " " pos
Just ( c , cs )
-> case test c of
Just x -> let newpos = nextpos pos c cs
newUser = nextState pos c cs user
newstate = State cs newpos newUser
in seq newpos $ seq newstate $
cok x newstate $ newErrorUnknown newpos
Nothing -> eerr $ unexpectError ( showToken c ) pos
unexpectError msg pos = newErrorMessage ( SysUnExpect msg ) pos
2008-01-13 20:53:15 +03:00
2008-01-22 04:37:52 +03:00
-- | @many p@ applies the parser @p@ /zero/ or more times. Returns a
-- list of the returned values of @p@.
--
-- > identifier = do{ c <- letter
-- > ; cs <- many (alphaNum <|> char '_')
-- > ; return (c:cs)
-- > }
2009-03-02 03:20:00 +03:00
many :: ParsecT s u m a -> ParsecT s u m [ a ]
2008-01-13 20:53:15 +03:00
many p
= do xs <- manyAccum ( : ) p
return ( reverse xs )
2008-01-22 04:37:52 +03:00
-- | @skipMany p@ applies the parser @p@ /zero/ or more times, skipping
-- its result.
--
-- > spaces = skipMany space
2009-03-02 03:20:00 +03:00
skipMany :: ParsecT s u m a -> ParsecT s u m ()
2008-01-13 20:53:15 +03:00
skipMany p
2008-02-13 07:32:24 +03:00
= do manyAccum ( \ _ _ -> [] ) p
2008-01-13 20:53:15 +03:00
return ()
2009-03-02 03:20:00 +03:00
manyAccum :: ( a -> [ a ] -> [ a ] )
2008-01-13 20:53:15 +03:00
-> ParsecT s u m a
-> ParsecT s u m [ a ]
2009-03-02 03:20:00 +03:00
manyAccum acc p =
2009-11-23 21:06:31 +03:00
ParsecT $ \ s cok cerr eok eerr ->
2009-03-02 03:20:00 +03:00
let walk xs x s' err =
unParser p s'
( seq xs $ walk $ acc x xs ) -- consumed-ok
cerr -- consumed-err
manyErr -- empty-ok
2009-11-22 20:15:01 +03:00
( \ e -> cok ( acc x xs ) s' e ) -- empty-err
2009-11-23 21:06:31 +03:00
in unParser p s ( walk [] ) cerr manyErr ( \ e -> eok [] s e )
2009-03-02 03:20:00 +03:00
manyErr = error " Text.ParserCombinators.Parsec.Prim.many: combinator 'many' is applied to a parser that accepts an empty string. "
2008-01-13 20:53:15 +03:00
2008-01-22 04:37:52 +03:00
-- < Running a parser: monadic (runPT) and pure (runP)
2008-01-13 20:53:15 +03:00
runPT :: ( Stream s m t )
=> ParsecT s u m a -> u -> SourceName -> s -> m ( Either ParseError a )
runPT p u name s
= do res <- runParsecT p ( State s ( initialPos name ) u )
r <- parserReply res
case r of
Ok x _ _ -> return ( Right x )
Error err -> return ( Left err )
where
parserReply res
= case res of
Consumed r -> r
Empty r -> r
runP :: ( Stream s Identity t )
=> Parsec s u a -> u -> SourceName -> s -> Either ParseError a
runP p u name s = runIdentity $ runPT p u name s
2008-01-22 04:37:52 +03:00
-- | The most general way to run a parser. @runParserT p state filePath
-- input@ runs parser @p@ on the input list of tokens @input@,
-- obtained from source @filePath@ with the initial user state @st@.
-- The @filePath@ is only used in error messages and may be the empty
-- string. Returns a computation in the underlying monad @m@ that return either a 'ParseError' ('Left') or a
-- value of type @a@ ('Right').
runParserT :: ( Stream s m t )
=> ParsecT s u m a -> u -> SourceName -> s -> m ( Either ParseError a )
runParserT = runPT
-- | The most general way to run a parser over the Identity monad. @runParser p state filePath
-- input@ runs parser @p@ on the input list of tokens @input@,
-- obtained from source @filePath@ with the initial user state @st@.
-- The @filePath@ is only used in error messages and may be the empty
-- string. Returns either a 'ParseError' ('Left') or a
-- value of type @a@ ('Right').
--
-- > parseFromFile p fname
-- > = do{ input <- readFile fname
-- > ; return (runParser p () fname input)
-- > }
2008-01-13 20:53:15 +03:00
runParser :: ( Stream s Identity t )
=> Parsec s u a -> u -> SourceName -> s -> Either ParseError a
runParser = runP
2008-01-22 04:37:52 +03:00
-- | @parse p filePath input@ runs a parser @p@ over Identity without user
-- state. The @filePath@ is only used in error messages and may be the
-- empty string. Returns either a 'ParseError' ('Left')
-- or a value of type @a@ ('Right').
--
-- > main = case (parse numbers "" "11, 2, 43") of
-- > Left err -> print err
-- > Right xs -> print (sum xs)
-- >
-- > numbers = commaSep integer
2008-01-13 20:53:15 +03:00
parse :: ( Stream s Identity t )
=> Parsec s () a -> SourceName -> s -> Either ParseError a
parse p = runP p ()
2008-01-22 04:37:52 +03:00
-- | The expression @parseTest p input@ applies a parser @p@ against
-- input @input@ and prints the result to stdout. Used for testing
-- parsers.
2008-01-13 20:53:15 +03:00
parseTest :: ( Stream s Identity t , Show a )
=> Parsec s () a -> s -> IO ()
parseTest p input
= case parse p " " input of
Left err -> do putStr " parse error at "
print err
Right x -> print x
2008-01-22 04:37:52 +03:00
-- < Parser state combinators
-- | Returns the current source position. See also 'SourcePos'.
2008-01-13 20:53:15 +03:00
getPosition :: ( Monad m ) => ParsecT s u m SourcePos
getPosition = do state <- getParserState
return ( statePos state )
2008-01-22 04:37:52 +03:00
-- | Returns the current input
2008-01-13 20:53:15 +03:00
getInput :: ( Monad m ) => ParsecT s u m s
getInput = do state <- getParserState
return ( stateInput state )
2008-01-22 04:37:52 +03:00
-- | @setPosition pos@ sets the current source position to @pos@.
2008-01-13 20:53:15 +03:00
setPosition :: ( Monad m ) => SourcePos -> ParsecT s u m ()
setPosition pos
= do updateParserState ( \ ( State input _ user ) -> State input pos user )
return ()
2008-01-22 04:37:52 +03:00
-- | @setInput input@ continues parsing with @input@. The 'getInput' and
-- @setInput@ functions can for example be used to deal with #include
-- files.
2008-01-13 20:53:15 +03:00
setInput :: ( Monad m ) => s -> ParsecT s u m ()
setInput input
= do updateParserState ( \ ( State _ pos user ) -> State input pos user )
return ()
2008-01-22 04:37:52 +03:00
-- | Returns the full parser state as a 'State' record.
2008-01-13 20:53:15 +03:00
getParserState :: ( Monad m ) => ParsecT s u m ( State s u )
getParserState = updateParserState id
2008-01-22 04:37:52 +03:00
-- | @setParserState st@ set the full parser state to @st@.
2008-01-13 20:53:15 +03:00
setParserState :: ( Monad m ) => State s u -> ParsecT s u m ( State s u )
setParserState st = updateParserState ( const st )
2008-01-22 04:37:52 +03:00
-- | @updateParserState f@ applies function @f@ to the parser state.
2009-03-02 03:20:00 +03:00
updateParserState :: ( State s u -> State s u ) -> ParsecT s u m ( State s u )
updateParserState f =
ParsecT $ \ s _ _ eok _ ->
let s' = f s
in eok s' s' $ unknownError s'
2008-01-13 20:53:15 +03:00
2008-01-22 04:37:52 +03:00
-- < User state combinators
-- | Returns the current user state.
2008-01-13 20:53:15 +03:00
getState :: ( Monad m ) => ParsecT s u m u
getState = stateUser ` liftM ` getParserState
2008-01-22 04:37:52 +03:00
-- | @putState st@ set the user state to @st@.
2008-01-13 20:53:15 +03:00
putState :: ( Monad m ) => u -> ParsecT s u m ()
putState u = do updateParserState $ \ s -> s { stateUser = u }
return ()
2015-05-17 21:18:05 +03:00
-- | @modifyState f@ applies function @f@ to the user state. Suppose
2008-01-22 04:37:52 +03:00
-- that we want to count identifiers in a source, we could use the user
-- state as:
--
-- > expr = do{ x <- identifier
2015-05-17 21:18:05 +03:00
-- > ; modifyState (+1)
2008-01-22 04:37:52 +03:00
-- > ; return (Id x)
-- > }
2008-01-13 20:53:15 +03:00
modifyState :: ( Monad m ) => ( u -> u ) -> ParsecT s u m ()
modifyState f = do updateParserState $ \ s -> s { stateUser = f ( stateUser s ) }
return ()
-- XXX Compat
2008-01-22 04:37:52 +03:00
-- | An alias for putState for backwards compatibility.
2008-01-13 20:53:15 +03:00
setState :: ( Monad m ) => u -> ParsecT s u m ()
setState = putState
2008-01-20 01:06:33 +03:00
2008-01-22 04:37:52 +03:00
-- | An alias for modifyState for backwards compatibility.
2008-01-13 20:53:15 +03:00
updateState :: ( Monad m ) => ( u -> u ) -> ParsecT s u m ()
updateState = modifyState