2020-04-23 22:28:20 +03:00
|
|
|
|
{-# LANGUAGE FlexibleContexts #-}
|
|
|
|
|
{-# LANGUAGE FlexibleInstances #-}
|
|
|
|
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
|
|
|
|
{-# LANGUAGE RankNTypes #-}
|
2020-10-12 23:24:28 +03:00
|
|
|
|
{-# LANGUAGE Safe #-}
|
2020-04-23 22:28:20 +03:00
|
|
|
|
{-# LANGUAGE ScopedTypeVariables #-}
|
|
|
|
|
{-# LANGUAGE TypeFamilies #-}
|
|
|
|
|
{-# LANGUAGE UndecidableInstances #-}
|
|
|
|
|
|
2015-07-28 16:32:19 +03:00
|
|
|
|
-- |
|
2015-08-01 19:24:45 +03:00
|
|
|
|
-- Module : Text.Megaparsec
|
2019-07-02 22:41:24 +03:00
|
|
|
|
-- Copyright : © 2015–present Megaparsec contributors
|
2015-07-30 19:20:37 +03:00
|
|
|
|
-- © 2007 Paolo Martini
|
|
|
|
|
-- © 1999–2001 Daan Leijen
|
2015-10-30 14:26:45 +03:00
|
|
|
|
-- License : FreeBSD
|
2015-07-28 16:32:19 +03:00
|
|
|
|
--
|
2017-05-24 15:30:10 +03:00
|
|
|
|
-- Maintainer : Mark Karpov <markkarpov92@gmail.com>
|
2015-07-29 11:38:32 +03:00
|
|
|
|
-- Stability : experimental
|
2015-07-28 16:32:19 +03:00
|
|
|
|
-- Portability : portable
|
|
|
|
|
--
|
|
|
|
|
-- This module includes everything you need to get started writing a parser.
|
2016-05-14 12:59:18 +03:00
|
|
|
|
-- If you are new to Megaparsec and don't know where to begin, take a look
|
2019-11-23 01:16:51 +03:00
|
|
|
|
-- at the tutorial <https://markkarpov.com/tutorial/megaparsec.html>.
|
2015-07-28 16:32:19 +03:00
|
|
|
|
--
|
2017-06-14 22:12:18 +03:00
|
|
|
|
-- In addition to the "Text.Megaparsec" module, which exports and re-exports
|
2021-04-25 16:49:13 +03:00
|
|
|
|
-- almost everything that you may need, we advise to import
|
2017-07-03 14:34:00 +03:00
|
|
|
|
-- "Text.Megaparsec.Char" if you plan to work with a stream of 'Char' tokens
|
2017-07-04 13:12:35 +03:00
|
|
|
|
-- or "Text.Megaparsec.Byte" if you intend to parse binary data.
|
2015-08-03 21:15:16 +03:00
|
|
|
|
--
|
2017-06-13 19:14:31 +03:00
|
|
|
|
-- It is common to start working with the library by defining a type synonym
|
|
|
|
|
-- like this:
|
2015-08-03 21:15:16 +03:00
|
|
|
|
--
|
2017-06-29 08:13:22 +03:00
|
|
|
|
-- > type Parser = Parsec Void Text
|
|
|
|
|
-- > ^ ^
|
|
|
|
|
-- > | |
|
2018-08-20 19:35:10 +03:00
|
|
|
|
-- > Custom error component Input stream type
|
2015-08-03 21:15:16 +03:00
|
|
|
|
--
|
2018-08-20 19:35:10 +03:00
|
|
|
|
-- Then you can write type signatures like @Parser 'Int'@—for a parser that
|
2017-06-13 19:14:31 +03:00
|
|
|
|
-- returns an 'Int' for example.
|
2016-12-26 13:41:30 +03:00
|
|
|
|
--
|
2017-07-28 07:28:16 +03:00
|
|
|
|
-- Similarly (since it's known to cause confusion), you should use
|
2018-08-20 19:35:10 +03:00
|
|
|
|
-- 'ParseErrorBundle' type parametrized like this:
|
2017-07-28 07:28:16 +03:00
|
|
|
|
--
|
2018-08-20 19:35:10 +03:00
|
|
|
|
-- > ParseErrorBundle Text Void
|
|
|
|
|
-- > ^ ^
|
|
|
|
|
-- > | |
|
|
|
|
|
-- > Input stream type Custom error component (the same you used in Parser)
|
2017-07-28 07:28:16 +03:00
|
|
|
|
--
|
2017-07-04 13:12:35 +03:00
|
|
|
|
-- Megaparsec uses some type-level machinery to provide flexibility without
|
|
|
|
|
-- compromising on type safety. Thus type signatures are sometimes necessary
|
2018-11-07 08:21:00 +03:00
|
|
|
|
-- to avoid ambiguous types. If you're seeing an error message that reads
|
2017-07-04 13:12:35 +03:00
|
|
|
|
-- like “Type variable @e0@ is ambiguous …”, you need to give an explicit
|
|
|
|
|
-- signature to your parser to resolve the ambiguity. It's a good idea to
|
|
|
|
|
-- provide type signatures for all top-level definitions.
|
2015-08-01 19:24:45 +03:00
|
|
|
|
module Text.Megaparsec
|
2017-06-14 22:12:18 +03:00
|
|
|
|
( -- * Re-exports
|
2017-07-30 19:14:08 +03:00
|
|
|
|
-- $reexports
|
2020-04-23 22:28:20 +03:00
|
|
|
|
module Text.Megaparsec.Pos,
|
|
|
|
|
module Text.Megaparsec.Error,
|
|
|
|
|
module Text.Megaparsec.Stream,
|
|
|
|
|
module Control.Monad.Combinators,
|
|
|
|
|
|
2017-06-14 22:12:18 +03:00
|
|
|
|
-- * Data types
|
2020-04-23 22:28:20 +03:00
|
|
|
|
State (..),
|
|
|
|
|
PosState (..),
|
|
|
|
|
Parsec,
|
|
|
|
|
ParsecT,
|
|
|
|
|
|
2017-06-14 22:12:18 +03:00
|
|
|
|
-- * Running parser
|
2020-04-23 22:28:20 +03:00
|
|
|
|
parse,
|
|
|
|
|
parseMaybe,
|
|
|
|
|
parseTest,
|
|
|
|
|
runParser,
|
|
|
|
|
runParser',
|
|
|
|
|
runParserT,
|
|
|
|
|
runParserT',
|
|
|
|
|
|
2017-06-14 22:12:18 +03:00
|
|
|
|
-- * Primitive combinators
|
2020-04-23 22:28:20 +03:00
|
|
|
|
MonadParsec (..),
|
|
|
|
|
|
2019-11-06 01:24:04 +03:00
|
|
|
|
-- * Signaling parse errors
|
|
|
|
|
-- $parse-errors
|
2020-04-23 22:28:20 +03:00
|
|
|
|
failure,
|
|
|
|
|
fancyFailure,
|
|
|
|
|
unexpected,
|
|
|
|
|
customFailure,
|
|
|
|
|
region,
|
|
|
|
|
registerParseError,
|
|
|
|
|
registerFailure,
|
|
|
|
|
registerFancyFailure,
|
|
|
|
|
|
2017-06-14 22:12:18 +03:00
|
|
|
|
-- * Derivatives of primitive combinators
|
2020-04-23 22:28:20 +03:00
|
|
|
|
single,
|
|
|
|
|
satisfy,
|
|
|
|
|
anySingle,
|
|
|
|
|
anySingleBut,
|
|
|
|
|
oneOf,
|
|
|
|
|
noneOf,
|
|
|
|
|
chunk,
|
|
|
|
|
(<?>),
|
|
|
|
|
match,
|
|
|
|
|
takeRest,
|
|
|
|
|
atEnd,
|
|
|
|
|
|
2017-06-14 22:12:18 +03:00
|
|
|
|
-- * Parser state combinators
|
2020-04-23 22:28:20 +03:00
|
|
|
|
getInput,
|
|
|
|
|
setInput,
|
|
|
|
|
getSourcePos,
|
|
|
|
|
getOffset,
|
|
|
|
|
setOffset,
|
|
|
|
|
setParserState,
|
|
|
|
|
)
|
2015-07-28 16:32:19 +03:00
|
|
|
|
where
|
2008-01-13 20:53:15 +03:00
|
|
|
|
|
2017-12-31 12:01:16 +03:00
|
|
|
|
import Control.Monad.Combinators
|
2017-06-14 22:12:18 +03:00
|
|
|
|
import Control.Monad.Identity
|
|
|
|
|
import Data.List.NonEmpty (NonEmpty (..))
|
2020-04-23 22:28:20 +03:00
|
|
|
|
import qualified Data.List.NonEmpty as NE
|
2017-07-02 19:56:01 +03:00
|
|
|
|
import Data.Maybe (fromJust)
|
2019-11-05 14:09:05 +03:00
|
|
|
|
import Data.Set (Set)
|
2020-04-23 22:28:20 +03:00
|
|
|
|
import qualified Data.Set as E
|
2018-03-27 17:40:20 +03:00
|
|
|
|
import Text.Megaparsec.Class
|
2017-07-09 20:12:23 +03:00
|
|
|
|
import Text.Megaparsec.Error
|
2018-03-27 17:40:20 +03:00
|
|
|
|
import Text.Megaparsec.Internal
|
2017-07-09 20:12:23 +03:00
|
|
|
|
import Text.Megaparsec.Pos
|
2018-03-27 17:40:20 +03:00
|
|
|
|
import Text.Megaparsec.State
|
2017-07-09 20:12:23 +03:00
|
|
|
|
import Text.Megaparsec.Stream
|
2015-08-01 17:39:20 +03:00
|
|
|
|
|
2017-07-30 19:14:08 +03:00
|
|
|
|
-- $reexports
|
|
|
|
|
--
|
2018-03-27 18:19:55 +03:00
|
|
|
|
-- Note that we re-export monadic combinators from
|
|
|
|
|
-- "Control.Monad.Combinators" because these are more efficient than
|
|
|
|
|
-- 'Applicative'-based ones. Thus 'many' and 'some' may clash with the
|
|
|
|
|
-- functions from "Control.Applicative". You need to hide the functions like
|
|
|
|
|
-- this:
|
|
|
|
|
--
|
|
|
|
|
-- > import Control.Applicative hiding (many, some)
|
|
|
|
|
--
|
2017-12-31 12:05:27 +03:00
|
|
|
|
-- Also note that you can import "Control.Monad.Combinators.NonEmpty" if you
|
|
|
|
|
-- wish that combinators like 'some' return 'NonEmpty' lists. The module
|
|
|
|
|
-- lives in the @parser-combinators@ package (you need at least version
|
|
|
|
|
-- /0.4.0/).
|
2017-07-30 19:14:08 +03:00
|
|
|
|
--
|
|
|
|
|
-- This module is intended to be imported qualified:
|
|
|
|
|
--
|
2017-12-31 12:05:27 +03:00
|
|
|
|
-- > import qualified Control.Monad.Combinators.NonEmpty as NE
|
2018-08-20 19:35:10 +03:00
|
|
|
|
--
|
|
|
|
|
-- Other modules of interest are:
|
|
|
|
|
--
|
|
|
|
|
-- * "Control.Monad.Combinators.Expr" for parsing of expressions.
|
|
|
|
|
-- * "Control.Applicative.Permutations" for parsing of permutations
|
|
|
|
|
-- phrases.
|
2017-07-30 19:14:08 +03:00
|
|
|
|
|
2017-06-14 22:12:18 +03:00
|
|
|
|
----------------------------------------------------------------------------
|
|
|
|
|
-- Data types
|
|
|
|
|
|
2017-07-25 12:43:24 +03:00
|
|
|
|
-- | 'Parsec' is a non-transformer variant of the more general 'ParsecT'
|
2017-06-14 22:12:18 +03:00
|
|
|
|
-- monad transformer.
|
|
|
|
|
type Parsec e s = ParsecT e s Identity
|
|
|
|
|
|
|
|
|
|
----------------------------------------------------------------------------
|
|
|
|
|
-- Running a parser
|
|
|
|
|
|
2018-09-04 22:24:28 +03:00
|
|
|
|
-- | @'parse' p file input@ runs parser @p@ over 'Identity' (see
|
|
|
|
|
-- 'runParserT' if you're using the 'ParsecT' monad transformer; 'parse'
|
|
|
|
|
-- itself is just a synonym for 'runParser'). It returns either a
|
|
|
|
|
-- 'ParseErrorBundle' ('Left') or a value of type @a@ ('Right').
|
|
|
|
|
-- 'errorBundlePretty' can be used to turn 'ParseErrorBundle' into the
|
|
|
|
|
-- string representation of the error message. See "Text.Megaparsec.Error"
|
|
|
|
|
-- if you need to do more advanced error analysis.
|
|
|
|
|
--
|
|
|
|
|
-- > main = case parse numbers "" "11,2,43" of
|
|
|
|
|
-- > Left bundle -> putStr (errorBundlePretty bundle)
|
2017-06-14 22:12:18 +03:00
|
|
|
|
-- > Right xs -> print (sum xs)
|
|
|
|
|
-- >
|
2018-09-04 22:24:28 +03:00
|
|
|
|
-- > numbers = decimal `sepBy` char ','
|
2020-04-23 22:28:20 +03:00
|
|
|
|
parse ::
|
|
|
|
|
-- | Parser to run
|
|
|
|
|
Parsec e s a ->
|
|
|
|
|
-- | Name of source file
|
|
|
|
|
String ->
|
|
|
|
|
-- | Input for parser
|
|
|
|
|
s ->
|
|
|
|
|
Either (ParseErrorBundle s e) a
|
2017-06-14 22:12:18 +03:00
|
|
|
|
parse = runParser
|
|
|
|
|
|
2017-07-04 13:12:35 +03:00
|
|
|
|
-- | @'parseMaybe' p input@ runs the parser @p@ on @input@ and returns the
|
2017-06-14 22:12:18 +03:00
|
|
|
|
-- result inside 'Just' on success and 'Nothing' on failure. This function
|
|
|
|
|
-- also parses 'eof', so if the parser doesn't consume all of its input, it
|
|
|
|
|
-- will fail.
|
|
|
|
|
--
|
|
|
|
|
-- The function is supposed to be useful for lightweight parsing, where
|
2018-11-07 08:21:00 +03:00
|
|
|
|
-- error messages (and thus file names) are not important and entire input
|
2021-04-25 16:49:13 +03:00
|
|
|
|
-- should be consumed. For example, it can be used for parsing of a single
|
|
|
|
|
-- number according to a specification of its format.
|
2017-06-28 10:08:26 +03:00
|
|
|
|
parseMaybe :: (Ord e, Stream s) => Parsec e s a -> s -> Maybe a
|
2017-06-14 22:12:18 +03:00
|
|
|
|
parseMaybe p s =
|
|
|
|
|
case parse (p <* eof) "" s of
|
2020-04-23 22:28:20 +03:00
|
|
|
|
Left _ -> Nothing
|
2017-06-14 22:12:18 +03:00
|
|
|
|
Right x -> Just x
|
|
|
|
|
|
2021-04-25 16:49:13 +03:00
|
|
|
|
-- | The expression @'parseTest' p input@ applies the parser @p@ on the
|
2017-07-04 13:12:35 +03:00
|
|
|
|
-- input @input@ and prints the result to stdout. Useful for testing.
|
2020-04-23 22:28:20 +03:00
|
|
|
|
parseTest ::
|
|
|
|
|
( ShowErrorComponent e,
|
|
|
|
|
Show a,
|
2020-06-26 14:55:55 +03:00
|
|
|
|
VisualStream s,
|
|
|
|
|
TraversableStream s
|
2020-04-23 22:28:20 +03:00
|
|
|
|
) =>
|
|
|
|
|
-- | Parser to run
|
|
|
|
|
Parsec e s a ->
|
|
|
|
|
-- | Input for parser
|
|
|
|
|
s ->
|
|
|
|
|
IO ()
|
2017-06-14 22:12:18 +03:00
|
|
|
|
parseTest p input =
|
|
|
|
|
case parse p "" input of
|
2020-04-23 22:28:20 +03:00
|
|
|
|
Left e -> putStr (errorBundlePretty e)
|
2017-07-25 10:36:40 +03:00
|
|
|
|
Right x -> print x
|
|
|
|
|
|
2017-07-04 13:12:35 +03:00
|
|
|
|
-- | @'runParser' p file input@ runs parser @p@ on the input stream of
|
|
|
|
|
-- tokens @input@, obtained from source @file@. The @file@ is only used in
|
2018-11-07 08:21:00 +03:00
|
|
|
|
-- error messages and may be the empty string. Returns either a
|
|
|
|
|
-- 'ParseErrorBundle' ('Left') or a value of type @a@ ('Right').
|
2017-06-14 22:12:18 +03:00
|
|
|
|
--
|
|
|
|
|
-- > parseFromFile p file = runParser p file <$> readFile file
|
2020-04-23 22:28:20 +03:00
|
|
|
|
runParser ::
|
|
|
|
|
-- | Parser to run
|
|
|
|
|
Parsec e s a ->
|
|
|
|
|
-- | Name of source file
|
|
|
|
|
String ->
|
|
|
|
|
-- | Input for parser
|
|
|
|
|
s ->
|
|
|
|
|
Either (ParseErrorBundle s e) a
|
2017-06-14 22:12:18 +03:00
|
|
|
|
runParser p name s = snd $ runParser' p (initialState name s)
|
|
|
|
|
|
|
|
|
|
-- | The function is similar to 'runParser' with the difference that it
|
2021-04-25 16:49:13 +03:00
|
|
|
|
-- accepts and returns the parser state. This allows us e.g. to specify
|
|
|
|
|
-- arbitrary textual position at the beginning of parsing. This is the most
|
|
|
|
|
-- general way to run a parser over the 'Identity' monad.
|
2017-06-14 22:12:18 +03:00
|
|
|
|
--
|
|
|
|
|
-- @since 4.2.0
|
2020-04-23 22:28:20 +03:00
|
|
|
|
runParser' ::
|
|
|
|
|
-- | Parser to run
|
|
|
|
|
Parsec e s a ->
|
|
|
|
|
-- | Initial state
|
|
|
|
|
State s e ->
|
|
|
|
|
(State s e, Either (ParseErrorBundle s e) a)
|
2017-06-14 22:12:18 +03:00
|
|
|
|
runParser' p = runIdentity . runParserT' p
|
|
|
|
|
|
2017-07-04 13:12:35 +03:00
|
|
|
|
-- | @'runParserT' p file input@ runs parser @p@ on the input list of tokens
|
2017-06-14 22:12:18 +03:00
|
|
|
|
-- @input@, obtained from source @file@. The @file@ is only used in error
|
|
|
|
|
-- messages and may be the empty string. Returns a computation in the
|
2018-11-07 08:21:00 +03:00
|
|
|
|
-- underlying monad @m@ that returns either a 'ParseErrorBundle' ('Left') or
|
|
|
|
|
-- a value of type @a@ ('Right').
|
2020-04-23 22:28:20 +03:00
|
|
|
|
runParserT ::
|
|
|
|
|
Monad m =>
|
|
|
|
|
-- | Parser to run
|
|
|
|
|
ParsecT e s m a ->
|
|
|
|
|
-- | Name of source file
|
|
|
|
|
String ->
|
|
|
|
|
-- | Input for parser
|
|
|
|
|
s ->
|
|
|
|
|
m (Either (ParseErrorBundle s e) a)
|
2018-10-26 16:07:46 +03:00
|
|
|
|
runParserT p name s = snd <$> runParserT' p (initialState name s)
|
2017-06-14 22:12:18 +03:00
|
|
|
|
|
|
|
|
|
-- | This function is similar to 'runParserT', but like 'runParser'' it
|
|
|
|
|
-- accepts and returns parser state. This is thus the most general way to
|
|
|
|
|
-- run a parser.
|
|
|
|
|
--
|
|
|
|
|
-- @since 4.2.0
|
2020-04-23 22:28:20 +03:00
|
|
|
|
runParserT' ::
|
|
|
|
|
Monad m =>
|
|
|
|
|
-- | Parser to run
|
|
|
|
|
ParsecT e s m a ->
|
|
|
|
|
-- | Initial state
|
|
|
|
|
State s e ->
|
|
|
|
|
m (State s e, Either (ParseErrorBundle s e) a)
|
2017-06-14 22:12:18 +03:00
|
|
|
|
runParserT' p s = do
|
|
|
|
|
(Reply s' _ result) <- runParsecT p s
|
2020-04-23 22:28:20 +03:00
|
|
|
|
let toBundle es =
|
|
|
|
|
ParseErrorBundle
|
|
|
|
|
{ bundleErrors =
|
|
|
|
|
NE.sortWith errorOffset es,
|
|
|
|
|
bundlePosState = statePosState s
|
|
|
|
|
}
|
2018-07-15 16:01:35 +03:00
|
|
|
|
return $ case result of
|
2019-11-06 01:24:04 +03:00
|
|
|
|
OK x ->
|
|
|
|
|
case NE.nonEmpty (stateParseErrors s') of
|
|
|
|
|
Nothing -> (s', Right x)
|
|
|
|
|
Just de -> (s', Left (toBundle de))
|
2018-07-15 16:01:35 +03:00
|
|
|
|
Error e ->
|
2019-11-06 01:24:04 +03:00
|
|
|
|
(s', Left (toBundle (e :| stateParseErrors s')))
|
2017-06-14 22:12:18 +03:00
|
|
|
|
|
2021-04-25 16:49:13 +03:00
|
|
|
|
-- | Given the name of source file and the input construct the initial state
|
|
|
|
|
-- for a parser.
|
2019-11-06 01:24:04 +03:00
|
|
|
|
initialState :: String -> s -> State s e
|
2020-04-23 22:28:20 +03:00
|
|
|
|
initialState name s =
|
|
|
|
|
State
|
|
|
|
|
{ stateInput = s,
|
|
|
|
|
stateOffset = 0,
|
|
|
|
|
statePosState =
|
|
|
|
|
PosState
|
|
|
|
|
{ pstateInput = s,
|
|
|
|
|
pstateOffset = 0,
|
|
|
|
|
pstateSourcePos = initialPos name,
|
|
|
|
|
pstateTabWidth = defaultTabWidth,
|
|
|
|
|
pstateLinePrefix = ""
|
|
|
|
|
},
|
|
|
|
|
stateParseErrors = []
|
2018-08-20 19:35:10 +03:00
|
|
|
|
}
|
2017-06-14 22:12:18 +03:00
|
|
|
|
|
2019-11-06 01:24:04 +03:00
|
|
|
|
----------------------------------------------------------------------------
|
|
|
|
|
-- Signaling parse errors
|
|
|
|
|
|
|
|
|
|
-- $parse-errors
|
|
|
|
|
--
|
|
|
|
|
-- The most general function to fail and end parsing is 'parseError'. These
|
|
|
|
|
-- are built on top of it. The section also includes functions starting with
|
|
|
|
|
-- the @register@ prefix which allow users to register “delayed”
|
|
|
|
|
-- 'ParseError's.
|
|
|
|
|
|
|
|
|
|
-- | Stop parsing and report a trivial 'ParseError'.
|
|
|
|
|
--
|
|
|
|
|
-- @since 6.0.0
|
2020-04-23 22:28:20 +03:00
|
|
|
|
failure ::
|
|
|
|
|
MonadParsec e s m =>
|
|
|
|
|
-- | Unexpected item (if any)
|
|
|
|
|
Maybe (ErrorItem (Token s)) ->
|
|
|
|
|
-- | Expected items
|
|
|
|
|
Set (ErrorItem (Token s)) ->
|
|
|
|
|
m a
|
2019-11-06 01:24:04 +03:00
|
|
|
|
failure us ps = do
|
|
|
|
|
o <- getOffset
|
|
|
|
|
parseError (TrivialError o us ps)
|
|
|
|
|
{-# INLINE failure #-}
|
|
|
|
|
|
|
|
|
|
-- | Stop parsing and report a fancy 'ParseError'. To report a single custom
|
|
|
|
|
-- parse error, see 'Text.Megaparsec.customFailure'.
|
|
|
|
|
--
|
|
|
|
|
-- @since 6.0.0
|
2020-04-23 22:28:20 +03:00
|
|
|
|
fancyFailure ::
|
|
|
|
|
MonadParsec e s m =>
|
|
|
|
|
-- | Fancy error components
|
|
|
|
|
Set (ErrorFancy e) ->
|
|
|
|
|
m a
|
2019-11-06 01:24:04 +03:00
|
|
|
|
fancyFailure xs = do
|
|
|
|
|
o <- getOffset
|
|
|
|
|
parseError (FancyError o xs)
|
|
|
|
|
{-# INLINE fancyFailure #-}
|
|
|
|
|
|
|
|
|
|
-- | The parser @'unexpected' item@ fails with an error message telling
|
|
|
|
|
-- about unexpected item @item@ without consuming any input.
|
|
|
|
|
--
|
|
|
|
|
-- > unexpected item = failure (Just item) Set.empty
|
|
|
|
|
unexpected :: MonadParsec e s m => ErrorItem (Token s) -> m a
|
|
|
|
|
unexpected item = failure (Just item) E.empty
|
|
|
|
|
{-# INLINE unexpected #-}
|
|
|
|
|
|
|
|
|
|
-- | Report a custom parse error. For a more general version, see
|
|
|
|
|
-- 'fancyFailure'.
|
|
|
|
|
--
|
|
|
|
|
-- > customFailure = fancyFailure . Set.singleton . ErrorCustom
|
|
|
|
|
--
|
|
|
|
|
-- @since 6.3.0
|
|
|
|
|
customFailure :: MonadParsec e s m => e -> m a
|
|
|
|
|
customFailure = fancyFailure . E.singleton . ErrorCustom
|
|
|
|
|
{-# INLINE customFailure #-}
|
|
|
|
|
|
|
|
|
|
-- | Specify how to process 'ParseError's that happen inside of this
|
|
|
|
|
-- wrapper. This applies to both normal and delayed 'ParseError's.
|
|
|
|
|
--
|
|
|
|
|
-- As a side-effect of the implementation the inner computation will start
|
2022-10-21 21:26:14 +03:00
|
|
|
|
-- with an empty collection of delayed errors and they will be updated and
|
2019-11-06 01:24:04 +03:00
|
|
|
|
-- “restored” on the way out of 'region'.
|
|
|
|
|
--
|
|
|
|
|
-- @since 5.3.0
|
2020-04-23 22:28:20 +03:00
|
|
|
|
region ::
|
|
|
|
|
MonadParsec e s m =>
|
|
|
|
|
-- | How to process 'ParseError's
|
|
|
|
|
(ParseError s e -> ParseError s e) ->
|
|
|
|
|
-- | The “region” that the processing applies to
|
|
|
|
|
m a ->
|
|
|
|
|
m a
|
2019-11-06 01:24:04 +03:00
|
|
|
|
region f m = do
|
|
|
|
|
deSoFar <- stateParseErrors <$> getParserState
|
|
|
|
|
updateParserState $ \s ->
|
2020-04-23 22:28:20 +03:00
|
|
|
|
s {stateParseErrors = []}
|
2019-11-06 01:24:04 +03:00
|
|
|
|
r <- observing m
|
|
|
|
|
updateParserState $ \s ->
|
2020-04-23 22:28:20 +03:00
|
|
|
|
s {stateParseErrors = (f <$> stateParseErrors s) ++ deSoFar}
|
2019-11-06 01:24:04 +03:00
|
|
|
|
case r of
|
|
|
|
|
Left err -> parseError (f err)
|
|
|
|
|
Right x -> return x
|
|
|
|
|
{-# INLINEABLE region #-}
|
|
|
|
|
|
|
|
|
|
-- | Register a 'ParseError' for later reporting. This action does not end
|
|
|
|
|
-- parsing and has no effect except for adding the given 'ParseError' to the
|
|
|
|
|
-- collection of “delayed” 'ParseError's which will be taken into
|
2021-04-25 16:49:13 +03:00
|
|
|
|
-- consideration at the end of parsing. Only if this collection is empty the
|
2019-11-06 01:24:04 +03:00
|
|
|
|
-- parser will succeed. This is the main way to report several parse errors
|
|
|
|
|
-- at once.
|
|
|
|
|
--
|
|
|
|
|
-- @since 8.0.0
|
|
|
|
|
registerParseError :: MonadParsec e s m => ParseError s e -> m ()
|
|
|
|
|
registerParseError e = updateParserState $ \s ->
|
2020-04-23 22:28:20 +03:00
|
|
|
|
s {stateParseErrors = e : stateParseErrors s}
|
2019-11-06 01:24:04 +03:00
|
|
|
|
{-# INLINE registerParseError #-}
|
|
|
|
|
|
|
|
|
|
-- | Like 'failure', but for delayed 'ParseError's.
|
|
|
|
|
--
|
|
|
|
|
-- @since 8.0.0
|
2020-04-23 22:28:20 +03:00
|
|
|
|
registerFailure ::
|
|
|
|
|
MonadParsec e s m =>
|
|
|
|
|
-- | Unexpected item (if any)
|
|
|
|
|
Maybe (ErrorItem (Token s)) ->
|
|
|
|
|
-- | Expected items
|
|
|
|
|
Set (ErrorItem (Token s)) ->
|
|
|
|
|
m ()
|
2019-11-06 01:24:04 +03:00
|
|
|
|
registerFailure us ps = do
|
|
|
|
|
o <- getOffset
|
|
|
|
|
registerParseError (TrivialError o us ps)
|
|
|
|
|
{-# INLINE registerFailure #-}
|
|
|
|
|
|
|
|
|
|
-- | Like 'fancyFailure', but for delayed 'ParseError's.
|
|
|
|
|
--
|
|
|
|
|
-- @since 8.0.0
|
2020-04-23 22:28:20 +03:00
|
|
|
|
registerFancyFailure ::
|
|
|
|
|
MonadParsec e s m =>
|
|
|
|
|
-- | Fancy error components
|
|
|
|
|
Set (ErrorFancy e) ->
|
|
|
|
|
m ()
|
2019-11-06 01:24:04 +03:00
|
|
|
|
registerFancyFailure xs = do
|
|
|
|
|
o <- getOffset
|
|
|
|
|
registerParseError (FancyError o xs)
|
|
|
|
|
{-# INLINE registerFancyFailure #-}
|
|
|
|
|
|
2017-06-14 22:12:18 +03:00
|
|
|
|
----------------------------------------------------------------------------
|
|
|
|
|
-- Derivatives of primitive combinators
|
|
|
|
|
|
2018-01-08 18:39:39 +03:00
|
|
|
|
-- | @'single' t@ only matches the single token @t@.
|
|
|
|
|
--
|
|
|
|
|
-- > semicolon = single ';'
|
|
|
|
|
--
|
|
|
|
|
-- See also: 'token', 'anySingle', 'Text.Megaparsec.Byte.char',
|
|
|
|
|
-- 'Text.Megaparsec.Char.char'.
|
|
|
|
|
--
|
|
|
|
|
-- @since 7.0.0
|
2020-04-23 22:28:20 +03:00
|
|
|
|
single ::
|
|
|
|
|
MonadParsec e s m =>
|
|
|
|
|
-- | Token to match
|
|
|
|
|
Token s ->
|
|
|
|
|
m (Token s)
|
2018-01-08 18:39:39 +03:00
|
|
|
|
single t = token testToken expected
|
|
|
|
|
where
|
|
|
|
|
testToken x = if x == t then Just x else Nothing
|
2020-04-23 22:28:20 +03:00
|
|
|
|
expected = E.singleton (Tokens (t :| []))
|
2018-01-08 18:39:39 +03:00
|
|
|
|
{-# INLINE single #-}
|
|
|
|
|
|
|
|
|
|
-- | The parser @'satisfy' f@ succeeds for any token for which the supplied
|
2018-11-07 08:21:00 +03:00
|
|
|
|
-- function @f@ returns 'True'.
|
2018-01-08 18:39:39 +03:00
|
|
|
|
--
|
|
|
|
|
-- > digitChar = satisfy isDigit <?> "digit"
|
|
|
|
|
-- > oneOf cs = satisfy (`elem` cs)
|
|
|
|
|
--
|
2020-12-14 14:08:57 +03:00
|
|
|
|
-- __Performance note__: when you need to parse a single token, it is often
|
|
|
|
|
-- a good idea to use 'satisfy' with the right predicate function instead of
|
|
|
|
|
-- creating a complex parser using the combinators.
|
|
|
|
|
--
|
2018-01-08 18:39:39 +03:00
|
|
|
|
-- See also: 'anySingle', 'anySingleBut', 'oneOf', 'noneOf'.
|
|
|
|
|
--
|
|
|
|
|
-- @since 7.0.0
|
2020-04-23 22:28:20 +03:00
|
|
|
|
satisfy ::
|
|
|
|
|
MonadParsec e s m =>
|
|
|
|
|
-- | Predicate to apply
|
|
|
|
|
(Token s -> Bool) ->
|
|
|
|
|
m (Token s)
|
2018-01-08 18:39:39 +03:00
|
|
|
|
satisfy f = token testChar E.empty
|
|
|
|
|
where
|
|
|
|
|
testChar x = if f x then Just x else Nothing
|
|
|
|
|
{-# INLINE satisfy #-}
|
|
|
|
|
|
|
|
|
|
-- | Parse and return a single token. It's a good idea to attach a 'label'
|
2018-11-07 08:21:00 +03:00
|
|
|
|
-- to this parser.
|
2018-01-08 18:39:39 +03:00
|
|
|
|
--
|
|
|
|
|
-- > anySingle = satisfy (const True)
|
|
|
|
|
--
|
|
|
|
|
-- See also: 'satisfy', 'anySingleBut'.
|
|
|
|
|
--
|
|
|
|
|
-- @since 7.0.0
|
|
|
|
|
anySingle :: MonadParsec e s m => m (Token s)
|
|
|
|
|
anySingle = satisfy (const True)
|
|
|
|
|
{-# INLINE anySingle #-}
|
|
|
|
|
|
|
|
|
|
-- | Match any token but the given one. It's a good idea to attach a 'label'
|
2018-11-07 08:21:00 +03:00
|
|
|
|
-- to this parser.
|
2018-01-08 18:39:39 +03:00
|
|
|
|
--
|
|
|
|
|
-- > anySingleBut t = satisfy (/= t)
|
|
|
|
|
--
|
|
|
|
|
-- See also: 'single', 'anySingle', 'satisfy'.
|
|
|
|
|
--
|
|
|
|
|
-- @since 7.0.0
|
2020-04-23 22:28:20 +03:00
|
|
|
|
anySingleBut ::
|
|
|
|
|
MonadParsec e s m =>
|
|
|
|
|
-- | Token we should not match
|
|
|
|
|
Token s ->
|
|
|
|
|
m (Token s)
|
2018-01-08 18:39:39 +03:00
|
|
|
|
anySingleBut t = satisfy (/= t)
|
|
|
|
|
{-# INLINE anySingleBut #-}
|
|
|
|
|
|
|
|
|
|
-- | @'oneOf' ts@ succeeds if the current token is in the supplied
|
|
|
|
|
-- collection of tokens @ts@. Returns the parsed token. Note that this
|
|
|
|
|
-- parser cannot automatically generate the “expected” component of error
|
|
|
|
|
-- message, so usually you should label it manually with 'label' or ('<?>').
|
|
|
|
|
--
|
|
|
|
|
-- > oneOf cs = satisfy (`elem` cs)
|
|
|
|
|
--
|
|
|
|
|
-- See also: 'satisfy'.
|
|
|
|
|
--
|
|
|
|
|
-- > digit = oneOf ['0'..'9'] <?> "digit"
|
|
|
|
|
--
|
|
|
|
|
-- __Performance note__: prefer 'satisfy' when you can because it's faster
|
|
|
|
|
-- when you have only a couple of tokens to compare to:
|
|
|
|
|
--
|
|
|
|
|
-- > quoteFast = satisfy (\x -> x == '\'' || x == '\"')
|
|
|
|
|
-- > quoteSlow = oneOf "'\""
|
|
|
|
|
--
|
|
|
|
|
-- @since 7.0.0
|
2020-04-23 22:28:20 +03:00
|
|
|
|
oneOf ::
|
|
|
|
|
(Foldable f, MonadParsec e s m) =>
|
|
|
|
|
-- | Collection of matching tokens
|
|
|
|
|
f (Token s) ->
|
|
|
|
|
m (Token s)
|
2018-01-08 18:39:39 +03:00
|
|
|
|
oneOf cs = satisfy (`elem` cs)
|
|
|
|
|
{-# INLINE oneOf #-}
|
|
|
|
|
|
|
|
|
|
-- | As the dual of 'oneOf', @'noneOf' ts@ succeeds if the current token
|
|
|
|
|
-- /not/ in the supplied list of tokens @ts@. Returns the parsed character.
|
|
|
|
|
-- Note that this parser cannot automatically generate the “expected”
|
|
|
|
|
-- component of error message, so usually you should label it manually with
|
|
|
|
|
-- 'label' or ('<?>').
|
|
|
|
|
--
|
|
|
|
|
-- > noneOf cs = satisfy (`notElem` cs)
|
|
|
|
|
--
|
|
|
|
|
-- See also: 'satisfy'.
|
|
|
|
|
--
|
2018-11-07 08:21:00 +03:00
|
|
|
|
-- __Performance note__: prefer 'satisfy' and 'anySingleBut' when you can
|
2018-01-08 18:39:39 +03:00
|
|
|
|
-- because it's faster.
|
|
|
|
|
--
|
|
|
|
|
-- @since 7.0.0
|
2020-04-23 22:28:20 +03:00
|
|
|
|
noneOf ::
|
|
|
|
|
(Foldable f, MonadParsec e s m) =>
|
|
|
|
|
-- | Collection of taken we should not match
|
|
|
|
|
f (Token s) ->
|
|
|
|
|
m (Token s)
|
2018-01-08 18:39:39 +03:00
|
|
|
|
noneOf cs = satisfy (`notElem` cs)
|
|
|
|
|
{-# INLINE noneOf #-}
|
|
|
|
|
|
|
|
|
|
-- | @'chunk' chk@ only matches the chunk @chk@.
|
|
|
|
|
--
|
|
|
|
|
-- > divOrMod = chunk "div" <|> chunk "mod"
|
|
|
|
|
--
|
|
|
|
|
-- See also: 'tokens', 'Text.Megaparsec.Char.string',
|
|
|
|
|
-- 'Text.Megaparsec.Byte.string'.
|
|
|
|
|
--
|
|
|
|
|
-- @since 7.0.0
|
2020-04-23 22:28:20 +03:00
|
|
|
|
chunk ::
|
|
|
|
|
MonadParsec e s m =>
|
|
|
|
|
-- | Chunk to match
|
|
|
|
|
Tokens s ->
|
|
|
|
|
m (Tokens s)
|
2018-01-08 18:39:39 +03:00
|
|
|
|
chunk = tokens (==)
|
|
|
|
|
{-# INLINE chunk #-}
|
|
|
|
|
|
2017-06-14 22:12:18 +03:00
|
|
|
|
-- | A synonym for 'label' in the form of an operator.
|
|
|
|
|
infix 0 <?>
|
|
|
|
|
|
|
|
|
|
(<?>) :: MonadParsec e s m => m a -> String -> m a
|
|
|
|
|
(<?>) = flip label
|
2017-07-02 19:56:01 +03:00
|
|
|
|
{-# INLINE (<?>) #-}
|
2017-06-14 22:12:18 +03:00
|
|
|
|
|
2017-07-25 12:43:24 +03:00
|
|
|
|
-- | Return both the result of a parse and a chunk of input that was
|
2018-09-04 22:24:28 +03:00
|
|
|
|
-- consumed during parsing. This relies on the change of the 'stateOffset'
|
|
|
|
|
-- value to evaluate how many tokens were consumed. If you mess with it
|
|
|
|
|
-- manually in the argument parser, prepare for troubles.
|
2015-08-01 17:39:20 +03:00
|
|
|
|
--
|
2017-06-14 22:12:18 +03:00
|
|
|
|
-- @since 5.3.0
|
2017-07-02 19:56:01 +03:00
|
|
|
|
match :: MonadParsec e s m => m a -> m (Tokens s, a)
|
2017-06-14 22:12:18 +03:00
|
|
|
|
match p = do
|
2020-04-23 22:28:20 +03:00
|
|
|
|
o <- getOffset
|
|
|
|
|
s <- getInput
|
|
|
|
|
r <- p
|
2018-08-20 19:35:10 +03:00
|
|
|
|
o' <- getOffset
|
2017-07-02 19:56:01 +03:00
|
|
|
|
-- NOTE The 'fromJust' call here should never fail because if the stream
|
|
|
|
|
-- is empty before 'p' (the only case when 'takeN_' can return 'Nothing'
|
|
|
|
|
-- as per its invariants), (tp' - tp) won't be greater than 0, and in that
|
|
|
|
|
-- case 'Just' is guaranteed to be returned as per another invariant of
|
|
|
|
|
-- 'takeN_'.
|
2018-08-20 19:35:10 +03:00
|
|
|
|
return ((fst . fromJust) (takeN_ (o' - o) s), r)
|
2017-07-02 19:56:01 +03:00
|
|
|
|
{-# INLINEABLE match #-}
|
2017-06-14 22:12:18 +03:00
|
|
|
|
|
2017-07-03 15:34:53 +03:00
|
|
|
|
-- | Consume the rest of the input and return it as a chunk. This parser
|
2018-09-04 22:24:28 +03:00
|
|
|
|
-- never fails, but may return the empty chunk.
|
2017-07-03 15:34:53 +03:00
|
|
|
|
--
|
|
|
|
|
-- > takeRest = takeWhileP Nothing (const True)
|
2017-07-03 14:34:00 +03:00
|
|
|
|
--
|
|
|
|
|
-- @since 6.0.0
|
2017-07-03 15:34:53 +03:00
|
|
|
|
takeRest :: MonadParsec e s m => m (Tokens s)
|
|
|
|
|
takeRest = takeWhileP Nothing (const True)
|
|
|
|
|
{-# INLINE takeRest #-}
|
2017-07-02 19:56:01 +03:00
|
|
|
|
|
2017-07-03 15:34:53 +03:00
|
|
|
|
-- | Return 'True' when end of input has been reached.
|
2017-07-03 14:34:00 +03:00
|
|
|
|
--
|
2018-01-08 18:39:39 +03:00
|
|
|
|
-- > atEnd = option False (True <$ hidden eof)
|
|
|
|
|
--
|
2017-07-03 14:34:00 +03:00
|
|
|
|
-- @since 6.0.0
|
2017-07-03 15:34:53 +03:00
|
|
|
|
atEnd :: MonadParsec e s m => m Bool
|
2017-12-17 09:03:56 +03:00
|
|
|
|
atEnd = option False (True <$ hidden eof)
|
2017-07-03 15:34:53 +03:00
|
|
|
|
{-# INLINE atEnd #-}
|
2017-06-14 22:12:18 +03:00
|
|
|
|
|
|
|
|
|
----------------------------------------------------------------------------
|
|
|
|
|
-- Parser state combinators
|
|
|
|
|
|
|
|
|
|
-- | Return the current input.
|
|
|
|
|
getInput :: MonadParsec e s m => m s
|
|
|
|
|
getInput = stateInput <$> getParserState
|
2018-05-01 10:23:52 +03:00
|
|
|
|
{-# INLINE getInput #-}
|
2015-08-01 17:39:20 +03:00
|
|
|
|
|
2018-01-08 18:39:39 +03:00
|
|
|
|
-- | @'setInput' input@ continues parsing with @input@.
|
2017-06-14 22:12:18 +03:00
|
|
|
|
setInput :: MonadParsec e s m => s -> m ()
|
2019-11-06 01:24:04 +03:00
|
|
|
|
setInput s = updateParserState (\(State _ o pst de) -> State s o pst de)
|
2018-05-01 10:23:52 +03:00
|
|
|
|
{-# INLINE setInput #-}
|
2017-06-14 22:12:18 +03:00
|
|
|
|
|
2018-08-20 19:35:10 +03:00
|
|
|
|
-- | Return the current source position. This function /is not cheap/, do
|
|
|
|
|
-- not call it e.g. on matching of every token, that's a bad idea. Still you
|
|
|
|
|
-- can use it to get 'SourcePos' to attach to things that you parse.
|
2018-01-08 18:39:39 +03:00
|
|
|
|
--
|
2018-11-07 08:21:00 +03:00
|
|
|
|
-- The function works under the assumption that we move in the input stream
|
|
|
|
|
-- only forwards and never backwards, which is always true unless the user
|
|
|
|
|
-- abuses the library.
|
2015-08-01 17:39:20 +03:00
|
|
|
|
--
|
2018-08-20 19:35:10 +03:00
|
|
|
|
-- @since 7.0.0
|
2020-06-26 14:55:55 +03:00
|
|
|
|
getSourcePos :: (TraversableStream s, MonadParsec e s m) => m SourcePos
|
2018-08-20 19:35:10 +03:00
|
|
|
|
getSourcePos = do
|
|
|
|
|
st <- getParserState
|
2019-07-13 18:11:49 +03:00
|
|
|
|
let pst = reachOffsetNoLine (stateOffset st) (statePosState st)
|
2020-04-23 22:28:20 +03:00
|
|
|
|
setParserState st {statePosState = pst}
|
2019-07-13 18:11:49 +03:00
|
|
|
|
return (pstateSourcePos pst)
|
2018-08-20 19:35:10 +03:00
|
|
|
|
{-# INLINE getSourcePos #-}
|
2017-06-14 22:12:18 +03:00
|
|
|
|
|
|
|
|
|
-- | Get the number of tokens processed so far.
|
|
|
|
|
--
|
2018-08-20 19:35:10 +03:00
|
|
|
|
-- See also: 'setOffset'.
|
2018-01-08 18:39:39 +03:00
|
|
|
|
--
|
2018-08-20 19:35:10 +03:00
|
|
|
|
-- @since 7.0.0
|
|
|
|
|
getOffset :: MonadParsec e s m => m Int
|
|
|
|
|
getOffset = stateOffset <$> getParserState
|
|
|
|
|
{-# INLINE getOffset #-}
|
2017-06-14 22:12:18 +03:00
|
|
|
|
|
|
|
|
|
-- | Set the number of tokens processed so far.
|
2015-08-01 17:39:20 +03:00
|
|
|
|
--
|
2018-08-20 19:35:10 +03:00
|
|
|
|
-- See also: 'getOffset'.
|
2018-01-08 18:39:39 +03:00
|
|
|
|
--
|
2018-08-20 19:35:10 +03:00
|
|
|
|
-- @since 7.0.0
|
|
|
|
|
setOffset :: MonadParsec e s m => Int -> m ()
|
2019-11-06 01:24:04 +03:00
|
|
|
|
setOffset o = updateParserState $ \(State s _ pst de) ->
|
|
|
|
|
State s o pst de
|
2018-08-20 19:35:10 +03:00
|
|
|
|
{-# INLINE setOffset #-}
|
2017-06-14 22:12:18 +03:00
|
|
|
|
|
2017-07-04 13:12:35 +03:00
|
|
|
|
-- | @'setParserState' st@ sets the parser state to @st@.
|
2018-01-08 18:39:39 +03:00
|
|
|
|
--
|
|
|
|
|
-- See also: 'getParserState', 'updateParserState'.
|
2019-11-06 01:24:04 +03:00
|
|
|
|
setParserState :: MonadParsec e s m => State s e -> m ()
|
2017-06-14 22:12:18 +03:00
|
|
|
|
setParserState st = updateParserState (const st)
|
2018-05-01 10:23:52 +03:00
|
|
|
|
{-# INLINE setParserState #-}
|