First attempt to outline Megaparsec 5.0.0

This is rather a sketch, we need to work on documentation, tests, and
perhaps on performance, but it should show the direction Megaparsec
5.0.0 is taking.
This commit is contained in:
mrkkrp 2016-04-17 20:08:13 +07:00
parent a3254f5371
commit acbae63a21
16 changed files with 669 additions and 659 deletions

View File

@ -43,7 +43,7 @@ script:
--test-option=--maximum-generated-tests=1000
- cabal sdist
- if [ "$CABALVER" != "1.16" ]; then
cabal haddock | grep "100%" | wc -l | grep "15";
cabal haddock | grep "100%" | wc -l | grep "14";
fi
after_script:

View File

@ -120,19 +120,21 @@ module Text.Megaparsec
, satisfy
, string
, string'
-- * Error messages
, Message (..)
, messageString
, badMessage
, ParseError
, errorPos
, errorMessages
, errorIsUnknown
-- * Textual source position
, SourcePos
, sourceName
, sourceLine
, sourceColumn
, Pos
, mkPos
, unPos
, unsafePos
, InvalidPosException (..)
, SourcePos (..)
, initialPos
, sourcePosPretty
-- * Error messages
, MessageItem (..)
, ErrorComponent (..)
, ParseError (..)
, ShowToken (..)
, ShowErrorComponent (..)
-- * Low-level operations
, Stream (..)
, State (..)

View File

@ -20,4 +20,4 @@ import qualified Data.ByteString as B
-- @Parser@ type and easily change it by importing different “type
-- modules”. This one is for strict byte-strings.
type Parser = Parsec B.ByteString
type Parser = Parsec String B.ByteString

View File

@ -20,4 +20,4 @@ import qualified Data.ByteString.Lazy as B
-- @Parser@ type and easily change it by importing different “type
-- modules”. This one is for lazy byte-strings.
type Parser = Parsec B.ByteString
type Parser = Parsec String B.ByteString

View File

@ -13,6 +13,7 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
module Text.Megaparsec.Char
( -- * Simple parsers
@ -58,11 +59,11 @@ where
import Control.Applicative ((<|>))
import Data.Char
import Data.Maybe (fromJust)
import qualified Data.Set as E
import Text.Megaparsec.Combinator
import Text.Megaparsec.Error (Message (..))
import Text.Megaparsec.Error
import Text.Megaparsec.Prim
import Text.Megaparsec.ShowToken
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>), pure)
@ -75,13 +76,13 @@ import Prelude hiding (any, elem, notElem)
-- | Parses a newline character.
newline :: MonadParsec s m Char => m Char
newline :: (MonadParsec e s m, Token s ~ Char) => m Char
newline = char '\n'
-- | Parses a carriage return character followed by a newline
-- character. Returns sequence of characters parsed.
crlf :: MonadParsec s m Char => m String
crlf :: (MonadParsec e s m, Token s ~ Char) => m String
crlf = string "\r\n"
-- | Parses a CRLF (see 'crlf') or LF (see 'newline') end of line.
@ -89,19 +90,19 @@ crlf = string "\r\n"
--
-- > eol = (pure <$> newline) <|> crlf
eol :: MonadParsec s m Char => m String
eol :: (MonadParsec e s m, Token s ~ Char) => m String
eol = (pure <$> newline) <|> crlf <?> "end of line"
-- | Parses a tab character.
tab :: MonadParsec s m Char => m Char
tab :: (MonadParsec e s m, Token s ~ Char) => m Char
tab = char '\t'
-- | Skips /zero/ or more white space characters.
--
-- See also: 'skipMany' and 'spaceChar'.
space :: MonadParsec s m Char => m ()
space :: (MonadParsec e s m, Token s ~ Char) => m ()
space = skipMany spaceChar
----------------------------------------------------------------------------
@ -110,32 +111,32 @@ space = skipMany spaceChar
-- | Parses control characters, which are the non-printing characters of the
-- Latin-1 subset of Unicode.
controlChar :: MonadParsec s m Char => m Char
controlChar :: (MonadParsec e s m, Token s ~ Char) => m Char
controlChar = satisfy isControl <?> "control character"
-- | Parses a Unicode space character, and the control characters: tab,
-- newline, carriage return, form feed, and vertical tab.
spaceChar :: MonadParsec s m Char => m Char
spaceChar :: (MonadParsec e s m, Token s ~ Char) => m Char
spaceChar = satisfy isSpace <?> "white space"
-- | Parses an upper-case or title-case alphabetic Unicode character. Title
-- case is used by a small number of letter ligatures like the
-- single-character form of Lj.
upperChar :: MonadParsec s m Char => m Char
upperChar :: (MonadParsec e s m, Token s ~ Char) => m Char
upperChar = satisfy isUpper <?> "uppercase letter"
-- | Parses a lower-case alphabetic Unicode character.
lowerChar :: MonadParsec s m Char => m Char
lowerChar :: (MonadParsec e s m, Token s ~ Char) => m Char
lowerChar = satisfy isLower <?> "lowercase letter"
-- | Parses alphabetic Unicode characters: lower-case, upper-case and
-- title-case letters, plus letters of case-less scripts and modifiers
-- letters.
letterChar :: MonadParsec s m Char => m Char
letterChar :: (MonadParsec e s m, Token s ~ Char) => m Char
letterChar = satisfy isLetter <?> "letter"
-- | Parses alphabetic or numeric digit Unicode characters.
@ -144,76 +145,76 @@ letterChar = satisfy isLetter <?> "letter"
-- parser but not by 'digitChar'. Such digits may be part of identifiers but
-- are not used by the printer and reader to represent numbers.
alphaNumChar :: MonadParsec s m Char => m Char
alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m Char
alphaNumChar = satisfy isAlphaNum <?> "alphanumeric character"
-- | Parses printable Unicode characters: letters, numbers, marks,
-- punctuation, symbols and spaces.
printChar :: MonadParsec s m Char => m Char
printChar :: (MonadParsec e s m, Token s ~ Char) => m Char
printChar = satisfy isPrint <?> "printable character"
-- | Parses an ASCII digit, i.e between “0” and “9”.
digitChar :: MonadParsec s m Char => m Char
digitChar :: (MonadParsec e s m, Token s ~ Char) => m Char
digitChar = satisfy isDigit <?> "digit"
-- | Parses an octal digit, i.e. between “0” and “7”.
octDigitChar :: MonadParsec s m Char => m Char
octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char
octDigitChar = satisfy isOctDigit <?> "octal digit"
-- | Parses a hexadecimal digit, i.e. between “0” and “9”, or “a” and “f”,
-- or “A” and “F”.
hexDigitChar :: MonadParsec s m Char => m Char
hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char
hexDigitChar = satisfy isHexDigit <?> "hexadecimal digit"
-- | Parses Unicode mark characters, for example accents and the like, which
-- combine with preceding characters.
markChar :: MonadParsec s m Char => m Char
markChar :: (MonadParsec e s m, Token s ~ Char) => m Char
markChar = satisfy isMark <?> "mark character"
-- | Parses Unicode numeric characters, including digits from various
-- scripts, Roman numerals, et cetera.
numberChar :: MonadParsec s m Char => m Char
numberChar :: (MonadParsec e s m, Token s ~ Char) => m Char
numberChar = satisfy isNumber <?> "numeric character"
-- | Parses Unicode punctuation characters, including various kinds of
-- connectors, brackets and quotes.
punctuationChar :: MonadParsec s m Char => m Char
punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m Char
punctuationChar = satisfy isPunctuation <?> "punctuation"
-- | Parses Unicode symbol characters, including mathematical and currency
-- symbols.
symbolChar :: MonadParsec s m Char => m Char
symbolChar :: (MonadParsec e s m, Token s ~ Char) => m Char
symbolChar = satisfy isSymbol <?> "symbol"
-- | Parses Unicode space and separator characters.
separatorChar :: MonadParsec s m Char => m Char
separatorChar :: (MonadParsec e s m, Token s ~ Char) => m Char
separatorChar = satisfy isSeparator <?> "separator"
-- | Parses a character from the first 128 characters of the Unicode character set,
-- corresponding to the ASCII character set.
asciiChar :: MonadParsec s m Char => m Char
asciiChar :: (MonadParsec e s m, Token s ~ Char) => m Char
asciiChar = satisfy isAscii <?> "ASCII character"
-- | Parses a character from the first 256 characters of the Unicode
-- character set, corresponding to the ISO 8859-1 (Latin-1) character set.
latin1Char :: MonadParsec s m Char => m Char
latin1Char :: (MonadParsec e s m, Token s ~ Char) => m Char
latin1Char = satisfy isLatin1 <?> "Latin-1 character"
-- | @charCategory cat@ Parses character in Unicode General Category @cat@,
-- see 'Data.Char.GeneralCategory'.
charCategory :: MonadParsec s m Char => GeneralCategory -> m Char
charCategory :: (MonadParsec e s m, Token s ~ Char) => GeneralCategory -> m Char
charCategory cat = satisfy ((== cat) . generalCategory) <?> categoryName cat
-- | Returns human-readable name of Unicode General Category.
@ -259,7 +260,7 @@ categoryName cat =
--
-- > semicolon = char ';'
char :: MonadParsec s m Char => Char -> m Char
char :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char
char c = satisfy (== c) <?> showToken c
-- | The same as 'char' but case-insensitive. This parser returns actually
@ -272,7 +273,7 @@ char c = satisfy (== c) <?> showToken c
-- unexpected 'G'
-- expecting 'E' or 'e'
char' :: MonadParsec s m Char => Char -> m Char
char' :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char
char' c = choice [char c, char $ swapCase c]
where
swapCase x
@ -282,7 +283,7 @@ char' c = choice [char c, char $ swapCase c]
-- | This parser succeeds for any character. Returns the parsed character.
anyChar :: MonadParsec s m Char => m Char
anyChar :: (MonadParsec e s m, Token s ~ Char) => m Char
anyChar = satisfy (const True) <?> "character"
-- | @oneOf cs@ succeeds if the current character is in the supplied
@ -295,7 +296,7 @@ anyChar = satisfy (const True) <?> "character"
--
-- > digit = oneOf ['0'..'9'] <?> "digit"
oneOf :: (Foldable f, MonadParsec s m Char) => f Char -> m Char
oneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
oneOf cs = satisfy (`elem` cs)
-- | The same as 'oneOf', but case-insensitive. Returns the parsed character
@ -303,21 +304,21 @@ oneOf cs = satisfy (`elem` cs)
--
-- > vowel = oneOf' "aeiou" <?> "vowel"
oneOf' :: (Foldable f, MonadParsec s m Char) => f Char -> m Char
oneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
oneOf' cs = satisfy (`elemi` cs)
-- | As the dual of 'oneOf', @noneOf cs@ succeeds if the current
-- character /not/ in the supplied list of characters @cs@. Returns the
-- parsed character.
noneOf :: (Foldable f, MonadParsec s m Char) => f Char -> m Char
noneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
noneOf cs = satisfy (`notElem` cs)
-- | The same as 'noneOf', but case-insensitive.
--
-- > consonant = noneOf' "aeiou" <?> "consonant"
noneOf' :: (Foldable f, MonadParsec s m Char) => f Char -> m Char
noneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
noneOf' cs = satisfy (`notElemi` cs)
-- | The parser @satisfy f@ succeeds for any character for which the
@ -327,12 +328,12 @@ noneOf' cs = satisfy (`notElemi` cs)
-- > digitChar = satisfy isDigit <?> "digit"
-- > oneOf cs = satisfy (`elem` cs)
satisfy :: MonadParsec s m Char => (Char -> Bool) -> m Char
satisfy :: (MonadParsec e s m, Token s ~ Char) => (Char -> Bool) -> m Char
satisfy f = token testChar
where testChar x =
if f x
then Right x
else Left . pure . Unexpected . showToken $ x
else Left (E.singleton (Token x), E.empty, E.empty)
----------------------------------------------------------------------------
-- Sequence of characters
@ -342,7 +343,7 @@ satisfy f = token testChar
--
-- > divOrMod = string "div" <|> string "mod"
string :: MonadParsec s m Char => String -> m String
string :: (MonadParsec e s m, Token s ~ Char) => String -> m String
string = tokens (==)
-- | The same as 'string', but case-insensitive. On success returns string
@ -351,7 +352,7 @@ string = tokens (==)
-- >>> parseTest (string' "foobar") "foObAr"
-- "foObAr"
string' :: MonadParsec s m Char => String -> m String
string' :: (MonadParsec e s m, Token s ~ Char) => String -> m String
string' = tokens casei
----------------------------------------------------------------------------

View File

@ -13,221 +13,219 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleInstances #-}
module Text.Megaparsec.Error
( Message (..)
, isUnexpected
, isExpected
, isMessage
, messageString
, badMessage
, ParseError
, errorPos
, errorMessages
, errorIsUnknown
, newErrorMessage
, newErrorMessages
, newErrorUnknown
, addErrorMessage
, addErrorMessages
, setErrorMessage
, setErrorPos
, mergeError
, showMessages )
( MessageItem (..)
, ErrorComponent (..)
, ParseError (..)
, ShowToken (..)
, ShowErrorComponent (..)
, parseErrorPretty
, sourcePosStackPretty )
where
import Control.Exception (Exception)
import Data.Foldable (find, concat)
import Control.Monad.Catch
import Data.Foldable (concat)
import Data.List (intercalate)
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe (fromMaybe, fromJust)
import Data.Semigroup (Semigroup((<>)))
import Data.Semigroup
import Data.Set (Set)
import Data.Typeable (Typeable)
import Prelude hiding (concat)
import qualified Data.List.NonEmpty as NE
import qualified Data.Set as E
import Text.Megaparsec.Pos
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>))
import Data.Foldable (foldMap)
import Data.Monoid (Monoid(..))
import Data.Monoid (Monoid (..))
#endif
-- | This data type represents parse error messages.
-- | Data type that represents default components of parse error message.
-- The data type is parametrized over token type @t@.
data Message
= Unexpected !String -- ^ Parser ran into an unexpected token
| Expected !String -- ^ What is expected instead
| Message !String -- ^ General-purpose error message component
deriving (Show, Eq, Ord)
data MessageItem t
= Token t
| TokenStream (NonEmpty t)
| Label (NonEmpty Char)
| EndOfInput
deriving (Show, Read, Eq, Ord, Typeable)
-- | Check if given 'Message' is created with 'Unexpected' constructor.
--
-- @since 4.4.0
-- | The type class defines how to represent information about various
-- exceptional situations in given data type. Data types that are used as
-- custom data component in 'ParseError' must be instances of this type
-- class.
isUnexpected :: Message -> Bool
isUnexpected (Unexpected _) = True
isUnexpected _ = False
{-# INLINE isUnexpected #-}
class Ord e => ErrorComponent e where
-- | Check if given 'Message' is created with 'Expected' constructor.
--
-- @since 4.4.0
-- | Represent message passed to 'fail' in parser monad.
isExpected :: Message -> Bool
isExpected (Expected _) = True
isExpected _ = False
{-# INLINE isExpected #-}
representFail :: String -> e
-- | Check if given 'Message' is created with 'Message' constructor.
--
-- @since 4.4.0
-- | Represent exception thrown in parser monad. (It implements
-- 'Control.Monad.Catch.MonadThrow').
isMessage :: Message -> Bool
isMessage (Message _) = True
isMessage _ = False
{-# INLINE isMessage #-}
representException :: Exception e' => e' -> e
-- | Extract the message string from an error message.
-- | Represent information about incorrect indentation.
messageString :: Message -> String
messageString (Unexpected s) = s
messageString (Expected s) = s
messageString (Message s) = s
{-# INLINE messageString #-}
representIndentation
:: Pos -- ^ Actual indentation level
-> Pos -- ^ Expected indentation level
-> e
-- | Test if message string is empty.
instance ErrorComponent [Char] where
representFail = id
representException = ("exception: " ++) . show
representIndentation = undefined -- TODO
badMessage :: Message -> Bool
badMessage = null . messageString
{-# INLINE badMessage #-}
-- TODO More instances?
-- | The data type @ParseError@ represents parse errors. It provides the
-- source position ('SourcePos') of the error and a list of error messages
-- ('Message').
-- stack of source positions, set of expected and unexpected tokens as well
-- as set of custom associated data. The data type is parametrized over
-- token type @t@ and custom data @e@.
--
-- Note that stack of source positions contains current position as its
-- head, and the rest of positions allows to track full sequence of include
-- files with topmost source file at the end of the list.
--
-- 'Semigroup' (or 'Monoid') instance of the data type allows to merge parse
-- errors from different branches of parsing. When merging two
-- 'ParseError's, longest match is preferred; if positions are the same,
-- collections of message items are combined.
data ParseError = ParseError
{ -- | Extract the source position from 'ParseError'.
errorPos :: !SourcePos
-- | Extract the list of error messages from 'ParseError'.
, errorMessages :: [Message] }
deriving (Eq, Typeable)
data ParseError t e = ParseError
{ errorPos :: NonEmpty SourcePos -- ^ Stack of source positions
, errorUnexpected :: Set (MessageItem t) -- ^ Unexpected items
, errorExpected :: Set (MessageItem t) -- ^ Expected items
, errorData :: Set e -- ^ Associated data, if any
} deriving (Show, Read, Eq, Typeable)
instance Show ParseError where
show e = show (errorPos e) ++ ":\n" ++ showMessages (errorMessages e)
instance Monoid ParseError where
mempty = newErrorUnknown (initialPos "")
mappend = (<>)
instance Semigroup ParseError where
instance (Ord t, Ord e) => Semigroup (ParseError t e) where
(<>) = mergeError
{-# INLINE (<>) #-}
instance Exception ParseError
instance (Ord t, Ord e) => Monoid (ParseError t e) where
mempty = ParseError (initialPos "" :| []) E.empty E.empty E.empty
mappend = (<>)
{-# INLINE mappend #-}
-- | Test whether given 'ParseError' has associated collection of error
-- messages. Return @True@ if it has none and @False@ otherwise.
errorIsUnknown :: ParseError -> Bool
errorIsUnknown (ParseError _ ms) = null ms
-- | @newErrorMessage m pos@ creates 'ParseError' with message @m@ and
-- associated position @pos@. If message @m@ has empty message string, it
-- won't be included.
newErrorMessage :: Message -> SourcePos -> ParseError
newErrorMessage m = newErrorMessages [m]
-- | @newErrorMessages ms pos@ creates 'ParseError' with messages @ms@ and
-- associated position @pos@.
--
-- @since 4.2.0
newErrorMessages :: [Message] -> SourcePos -> ParseError
newErrorMessages ms pos = addErrorMessages ms (newErrorUnknown pos)
-- | @newErrorUnknown pos@ creates 'ParseError' without any associated
-- message but with specified position @pos@.
newErrorUnknown :: SourcePos -> ParseError
newErrorUnknown pos = ParseError pos []
-- | @addErrorMessage m err@ returns @err@ with message @m@ added. This
-- function makes sure that list of messages is always sorted and doesn't
-- contain duplicates or messages with empty message strings.
addErrorMessage :: Message -> ParseError -> ParseError
addErrorMessage m (ParseError pos ms) =
ParseError pos $ if badMessage m then ms else pre ++ [m] ++ post
where pre = filter (< m) ms
post = filter (> m) ms
{-# INLINE addErrorMessage #-}
-- | @addErrorMessages ms err@ returns @err@ with messages @ms@ added. The
-- function is defined in terms of 'addErrorMessage'.
--
-- @since 4.2.0
addErrorMessages :: [Message] -> ParseError -> ParseError
addErrorMessages ms err = foldr addErrorMessage err ms
{-# INLINE addErrorMessages #-}
-- | @setErrorMessage m err@ returns @err@ with message @m@ added. This
-- function also deletes all existing error messages that were created with
-- the same constructor as @m@. If message @m@ has empty message string, the
-- function does not add the message to the result (it still deletes all
-- messages of the same type, though).
setErrorMessage :: Message -> ParseError -> ParseError
setErrorMessage m (ParseError pos ms) =
if badMessage m then err else addErrorMessage m err
where err = ParseError pos (filter (not . f) ms)
f = fromJust $ find ($ m) [isUnexpected, isExpected, isMessage]
-- | @setErrorPos pos err@ returns 'ParseError' identical to @err@, but with
-- position @pos@.
setErrorPos :: SourcePos -> ParseError -> ParseError
setErrorPos pos (ParseError _ ms) = ParseError pos ms
instance (Show t, Typeable t, Show e, Typeable e) => Exception (ParseError t e)
-- | Merge two error data structures into one joining their collections of
-- messages and preferring longest match. In other words, earlier error
-- message items and preferring longest match. In other words, earlier error
-- message is discarded. This may seem counter-intuitive, but @mergeError@
-- is only used to merge error messages of alternative branches of parsing
-- and in this case longest match should be preferred.
mergeError :: ParseError -> ParseError -> ParseError
mergeError e1@(ParseError pos1 _) e2@(ParseError pos2 ms2) =
mergeError :: (Ord t, Ord e)
=> ParseError t e -> ParseError t e -> ParseError t e
mergeError e1@(ParseError pos1 u1 p1 x1) e2@(ParseError pos2 u2 p2 x2) =
case pos1 `compare` pos2 of
LT -> e2
EQ -> addErrorMessages ms2 e1
EQ -> ParseError pos1 (E.union u1 u2) (E.union p1 p2) (E.union x1 x2)
GT -> e1
{-# INLINE mergeError #-}
-- | @showMessages ms@ transforms list of error messages @ms@ into
-- | Type class 'ShowToken' includes methods that allow to pretty-print
-- single token as well as stream of tokens. This is used for rendering of
-- error messages.
class ShowToken a where
-- | Pretty-print given token. This is used to get token representation
-- to use in error messages.
showToken :: a -> String
-- | Pretty-print non-empty stream of tokens.
showTokenStream :: NonEmpty a -> String
instance ShowToken Char where
showToken = charPretty
showTokenStream = stringPretty
-- | @charPretty ch@ returns user-friendly string representation of given
-- character @ch@, suitable for using in error messages.
charPretty :: Char -> String
charPretty '\0' = "null"
charPretty '\a' = "bell"
charPretty '\b' = "backspace"
charPretty '\t' = "tab"
charPretty '\n' = "newline"
charPretty '\v' = "vertical tab"
charPretty '\f' = "form feed"
charPretty '\r' = "carriage return"
charPretty ' ' = "space"
charPretty x = "'" ++ [x] ++ "'"
-- | @stringPretty s@ returns pretty representation of string @s@. This is
-- used when printing string tokens in error messages.
stringPretty :: NonEmpty Char -> String
stringPretty (x:|[]) = charPretty x
stringPretty ('\r':|"\n") = "crlf newline"
stringPretty xs = "\"" ++ NE.toList xs ++ "\""
-- | The type class defines how to print custom data component of
-- 'ParseError'.
class ShowErrorComponent a where
-- | Pretty-print custom data component of 'ParseError'.
showErrorComponent :: a -> String
instance ShowToken t => ShowErrorComponent (MessageItem t) where
showErrorComponent (Token t) = showToken t
showErrorComponent (TokenStream ts) = showTokenStream ts
showErrorComponent (Label label) = NE.toList label
showErrorComponent EndOfInput = "end of input"
instance ShowErrorComponent String where
showErrorComponent = id
-- TODO Instances
-- | Pretty-print 'ParseError'.
parseErrorPretty :: (ShowToken t, ShowErrorComponent e)
=> ParseError t e -- ^ Parse error to render
-> String -- ^ Result of rendering
parseErrorPretty (ParseError pos us ps xs) =
sourcePosStackPretty pos ++ ":\n" ++
if E.null us && E.null ps && E.null xs
then "unknown parse error\n"
else concat
[ messageItemsPretty "unexpected " us
, messageItemsPretty "expecting " ps
, unlines (showErrorComponent <$> E.toAscList xs) ]
-- | Pretty-print stack of source positions.
sourcePosStackPretty :: NonEmpty SourcePos -> String
sourcePosStackPretty ms = concatMap f rest ++ sourcePosPretty pos
where (pos :| rest') = ms
rest = reverse rest'
f p = "in file included from " ++ sourcePosPretty p ++ "\n"
-- | @messagesPretty ms@ transforms list of error messages @ms@ into
-- their textual representation.
showMessages :: [Message] -> String
showMessages [] = "unknown parse error"
showMessages ms = tail $ foldMap (fromMaybe "") (zipWith f ns rs)
where (unexpected, ms') = span isUnexpected ms
(expected, messages) = span isExpected ms'
f prefix m = (prefix ++) <$> m
ns = ["\nunexpected ","\nexpecting ","\n"]
rs = (renderMsgs orList <$> [unexpected, expected]) ++
[renderMsgs (concat . NE.intersperse "\n") messages]
-- | Render collection of messages. If the collection is empty, return
-- 'Nothing', otherwise return textual representation of the messages inside
-- 'Just'.
renderMsgs
:: (NonEmpty String -> String) -- ^ Function to combine results
-> [Message] -- ^ Collection of messages to render
-> Maybe String -- ^ Result, if any
-- renderMsgs _ [] = Nothing
renderMsgs f ms = f . fmap messageString <$> NE.nonEmpty ms
messageItemsPretty :: ShowErrorComponent t
=> String -- ^ Prefix to prepend
-> Set t -- ^ Collection of messages
-> String -- ^ Result of rendering
messageItemsPretty prefix ts
| E.null ts = ""
| otherwise =
let f = orList . NE.fromList . E.toAscList . E.map showErrorComponent
in prefix ++ f ts ++ "\n"
-- | Print a pretty list where items are separated with commas and the word
-- “or” according to rules of English punctuation.

View File

@ -74,7 +74,7 @@ data Operator m a
-- > prefix name f = Prefix (f <$ symbol name)
-- > postfix name f = Postfix (f <$ symbol name)
makeExprParser :: MonadParsec s m t
makeExprParser :: MonadParsec e s m
=> m a -- ^ Term parser
-> [[Operator m a]] -- ^ Operator table, see 'Operator'
-> m a -- ^ Resulting expression parser
@ -83,7 +83,7 @@ makeExprParser = foldl addPrecLevel
-- | @addPrecLevel p ops@ adds ability to parse operators in table @ops@ to
-- parser @p@.
addPrecLevel :: MonadParsec s m t => m a -> [Operator m a] -> m a
addPrecLevel :: MonadParsec e s m => m a -> [Operator m a] -> m a
addPrecLevel term ops =
term' >>= \x -> choice [ras' x, las' x, nas' x, return x] <?> "operator"
where (ras, las, nas, prefix, postfix) = foldr splitOp ([],[],[],[],[]) ops
@ -96,7 +96,7 @@ addPrecLevel term ops =
-- optional prefix and postfix unary operators. Parsers @prefix@ and
-- @postfix@ are allowed to fail, in this case 'id' is used.
pTerm :: MonadParsec s m t => m (a -> a) -> m a -> m (a -> a) -> m a
pTerm :: MonadParsec e s m => m (a -> a) -> m a -> m (a -> a) -> m a
pTerm prefix term postfix = do
pre <- option id (hidden prefix)
x <- term
@ -107,7 +107,7 @@ pTerm prefix term postfix = do
-- with parser @p@, then returns result of the operator application on @x@
-- and the term.
pInfixN :: MonadParsec s m t => m (a -> a -> a) -> m a -> a -> m a
pInfixN :: MonadParsec e s m => m (a -> a -> a) -> m a -> a -> m a
pInfixN op p x = do
f <- op
y <- p
@ -117,7 +117,7 @@ pInfixN op p x = do
-- with parser @p@, then returns result of the operator application on @x@
-- and the term.
pInfixL :: MonadParsec s m t => m (a -> a -> a) -> m a -> a -> m a
pInfixL :: MonadParsec e s m => m (a -> a -> a) -> m a -> a -> m a
pInfixL op p x = do
f <- op
y <- p
@ -128,7 +128,7 @@ pInfixL op p x = do
-- term with parser @p@, then returns result of the operator application on
-- @x@ and the term.
pInfixR :: MonadParsec s m t => m (a -> a -> a) -> m a -> a -> m a
pInfixR :: MonadParsec e s m => m (a -> a -> a) -> m a -> a -> m a
pInfixR op p x = do
f <- op
y <- p >>= \r -> pInfixR op p r <|> return r

View File

@ -20,6 +20,7 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
module Text.Megaparsec.Lexer
( -- * White space
@ -54,11 +55,12 @@ import Control.Monad (void)
import Data.Char (readLitChar)
import Data.Maybe (listToMaybe, fromMaybe, isJust)
import Data.Scientific (Scientific, toRealFloat)
import qualified Data.List.NonEmpty as NE
import Text.Megaparsec.Combinator
import Text.Megaparsec.Error
import Text.Megaparsec.Pos
import Text.Megaparsec.Prim
import Text.Megaparsec.ShowToken
import qualified Text.Megaparsec.Char as C
#if !MIN_VERSION_base(4,8,0)
@ -93,7 +95,7 @@ import Control.Applicative ((<$>), (<*), (*>), (<*>), pure)
-- to consume any white space before the first lexeme (i.e. at the beginning
-- of the file).
space :: MonadParsec s m Char
space :: (MonadParsec e s m, Token s ~ Char)
=> m () -- ^ A parser for a space character (e.g. 'C.spaceChar')
-> m () -- ^ A parser for a line comment (e.g. 'skipLineComment')
-> m () -- ^ A parser for a block comment (e.g. 'skipBlockComment')
@ -107,7 +109,7 @@ space ch line block = hidden . skipMany $ choice [ch, line, block]
-- > lexeme = L.lexeme spaceConsumer
-- > integer = lexeme L.integer
lexeme :: MonadParsec s m Char
lexeme :: (MonadParsec e s m, Token s ~ Char)
=> m () -- ^ How to consume white space after lexeme
-> m a -- ^ How to parse actual lexeme
-> m a
@ -128,7 +130,7 @@ lexeme spc p = p <* spc
-- > colon = symbol ":"
-- > dot = symbol "."
symbol :: MonadParsec s m Char
symbol :: (MonadParsec e s m, Token s ~ Char)
=> m () -- ^ How to consume white space after lexeme
-> String -- ^ String to parse
-> m String
@ -137,7 +139,7 @@ symbol spc = lexeme spc . C.string
-- | Case-insensitive version of 'symbol'. This may be helpful if you're
-- working with case-insensitive languages.
symbol' :: MonadParsec s m Char
symbol' :: (MonadParsec e s m, Token s ~ Char)
=> m () -- ^ How to consume white space after lexeme
-> String -- ^ String to parse (case-insensitive)
-> m String
@ -148,7 +150,7 @@ symbol' spc = lexeme spc . C.string'
-- consume the newline. Newline is either supposed to be consumed by 'space'
-- parser or picked up manually.
skipLineComment :: MonadParsec s m Char
skipLineComment :: (MonadParsec e s m, Token s ~ Char)
=> String -- ^ Line comment prefix
-> m ()
skipLineComment prefix = p >> void (manyTill C.anyChar n)
@ -158,7 +160,7 @@ skipLineComment prefix = p >> void (manyTill C.anyChar n)
-- | @skipBlockComment start end@ skips non-nested block comment starting
-- with @start@ and ending with @end@.
skipBlockComment :: MonadParsec s m Char
skipBlockComment :: (MonadParsec e s m, Token s ~ Char)
=> String -- ^ Start of block comment
-> String -- ^ End of block comment
-> m ()
@ -171,7 +173,7 @@ skipBlockComment start end = p >> void (manyTill C.anyChar n)
--
-- @since 5.0.0
skipBlockCommentNested :: MonadParsec s m Char
skipBlockCommentNested :: (MonadParsec e s m, Token s ~ Char)
=> String -- ^ Start of block comment
-> String -- ^ End of block comment
-> m ()
@ -191,8 +193,8 @@ skipBlockCommentNested start end = p >> void (manyTill e n)
--
-- @since 4.3.0
indentLevel :: MonadParsec s m t => m Int
indentLevel = sourceColumn <$> getPosition
indentLevel :: MonadParsec e s m => m Pos
indentLevel = sourceColumn . NE.head <$> getPosition
-- | @indentGuard spaceConsumer test@ first consumes all white space
-- (indentation) with @spaceConsumer@ parser, then it checks column
@ -205,10 +207,10 @@ indentLevel = sourceColumn <$> getPosition
-- indentation. Use returned value to check indentation on every subsequent
-- line according to syntax of your language.
indentGuard :: MonadParsec s m Char
indentGuard :: (MonadParsec e s m, Token s ~ Char)
=> m () -- ^ How to consume indentation (white space)
-> (Int -> Bool) -- ^ Predicate checking indentation level
-> m Int -- ^ Current column (indentation level)
-> (Pos -> Bool) -- ^ Predicate checking indentation level
-> m Pos -- ^ Current column (indentation level)
indentGuard spc p = do
spc
lvl <- indentLevel
@ -222,11 +224,11 @@ indentGuard spc p = do
--
-- @since 4.3.0
nonIndented :: MonadParsec s m Char
nonIndented :: (MonadParsec e s m, Token s ~ Char)
=> m () -- ^ How to consume indentation (white space)
-> m a -- ^ How to parse actual data
-> m a
nonIndented sc p = indentGuard sc (== 1) *> p
nonIndented sc p = indentGuard sc (== unsafePos 1) *> p
-- | The data type represents available behaviors for parsing of indented
-- tokens. This is used in 'indentBlock', which see.
@ -236,12 +238,12 @@ nonIndented sc p = indentGuard sc (== 1) *> p
data IndentOpt m a b
= IndentNone a
-- ^ Parse no indented tokens, just return the value
| IndentMany (Maybe Int) ([b] -> m a) (m b)
| IndentMany (Maybe Pos) ([b] -> m a) (m b)
-- ^ Parse many indented tokens (possibly zero), use given indentation
-- level (if 'Nothing', use level of the first indented token); the
-- second argument tells how to get final result, and third argument
-- describes how to parse indented token
| IndentSome (Maybe Int) ([b] -> m a) (m b)
| IndentSome (Maybe Pos) ([b] -> m a) (m b)
-- ^ Just like 'ManyIndent', but requires at least one indented token to
-- be present
@ -256,7 +258,7 @@ data IndentOpt m a b
--
-- @since 4.3.0
indentBlock :: MonadParsec s m Char
indentBlock :: (MonadParsec e s m, Token s ~ Char)
=> m () -- ^ How to consume indentation (white space)
-> m (IndentOpt m a b) -- ^ How to parse “reference” token
-> m a
@ -277,9 +279,9 @@ indentBlock sc r = do
-- | Grab indented items. This is a helper for 'indentBlock', it's not a
-- part of public API.
indentedItems :: MonadParsec s m Char
=> Int -- ^ Reference indentation level
-> Int -- ^ Level of the first indented item ('lookAhead'ed)
indentedItems :: (MonadParsec e s m, Token s ~ Char)
=> Pos -- ^ Reference indentation level
-> Pos -- ^ Level of the first indented item ('lookAhead'ed)
-> m () -- ^ How to consume indentation (white space)
-> m b -- ^ How to parse indented tokens
-> m [b]
@ -315,14 +317,14 @@ ii = "incorrect indentation"
--
-- > stringLiteral = char '"' >> manyTill L.charLiteral (char '"')
charLiteral :: MonadParsec s m Char => m Char
charLiteral :: (MonadParsec e s m, Token s ~ Char) => m Char
charLiteral = label "literal character" $ do
-- The @~@ is needed to avoid requiring a MonadFail constraint,
-- and we do know that r will be non-empty if count' succeeds.
~r@(x:_) <- lookAhead $ count' 1 8 C.anyChar
case listToMaybe (readLitChar r) of
Just (c, r') -> count (length r - length r') C.anyChar >> return c
Nothing -> unexpected (showToken x)
Nothing -> unexpected (Token x)
----------------------------------------------------------------------------
-- Numbers
@ -332,13 +334,13 @@ charLiteral = label "literal character" $ do
--
-- If you need to parse signed integers, see 'signed' combinator.
integer :: MonadParsec s m Char => m Integer
integer :: (MonadParsec e s m, Token s ~ Char) => m Integer
integer = decimal <?> "integer"
-- | The same as 'integer', but 'integer' is 'label'ed with “integer” label,
-- while this parser is labeled with “decimal integer”.
decimal :: MonadParsec s m Char => m Integer
decimal :: (MonadParsec e s m, Token s ~ Char) => m Integer
decimal = nump "" C.digitChar <?> "decimal integer"
-- | Parse an integer in hexadecimal representation. Representation of
@ -351,7 +353,7 @@ decimal = nump "" C.digitChar <?> "decimal integer"
--
-- > hexadecimal = char '0' >> char' 'x' >> L.hexadecimal
hexadecimal :: MonadParsec s m Char => m Integer
hexadecimal :: (MonadParsec e s m, Token s ~ Char) => m Integer
hexadecimal = nump "0x" C.hexDigitChar <?> "hexadecimal integer"
-- | Parse an integer in octal representation. Representation of octal
@ -360,14 +362,14 @@ hexadecimal = nump "0x" C.hexDigitChar <?> "hexadecimal integer"
-- of the programmer to parse correct prefix before parsing the number
-- itself.
octal :: MonadParsec s m Char => m Integer
octal :: (MonadParsec e s m, Token s ~ Char) => m Integer
octal = nump "0o" C.octDigitChar <?> "octal integer"
-- | @nump prefix p@ parses /one/ or more characters with @p@ parser, then
-- prepends @prefix@ to returned value and tries to interpret the result as
-- an integer according to Haskell syntax.
nump :: MonadParsec s m Char => String -> m Char -> m Integer
nump :: (MonadParsec e s m, Token s ~ Char) => String -> m Char -> m Integer
nump prefix baseDigit = read . (prefix ++) <$> some baseDigit
-- | Parse floating point value as 'Scientific' number. 'Scientific' is
@ -381,7 +383,7 @@ nump prefix baseDigit = read . (prefix ++) <$> some baseDigit
--
-- @since 5.0.0
scientific :: MonadParsec s m Char => m Scientific
scientific :: (MonadParsec e s m, Token s ~ Char) => m Scientific
scientific = label "floating point number" (read <$> f)
where f = (++) <$> some C.digitChar <*> (fraction <|> fExp)
@ -390,13 +392,13 @@ scientific = label "floating point number" (read <$> f)
--
-- > float = toRealFloat <$> scientific
float :: MonadParsec s m Char => m Double
float :: (MonadParsec e s m, Token s ~ Char) => m Double
float = toRealFloat <$> scientific
-- | This is a helper for 'float' parser. It parses fractional part of
-- floating point number, that is, dot and everything after it.
fraction :: MonadParsec s m Char => m String
fraction :: (MonadParsec e s m, Token s ~ Char) => m String
fraction = do
void (C.char '.')
d <- some C.digitChar
@ -405,7 +407,7 @@ fraction = do
-- | This helper parses exponent of floating point numbers.
fExp :: MonadParsec s m Char => m String
fExp :: (MonadParsec e s m, Token s ~ Char) => m String
fExp = do
expChar <- C.char' 'e'
signStr <- option "" (pure <$> choice (C.char <$> "+-"))
@ -417,7 +419,7 @@ fExp = do
-- 'Data.Scientific.floatingOrInteger' from "Data.Scientific" to test and
-- extract integer or real values.
number :: MonadParsec s m Char => m Scientific
number :: (MonadParsec e s m, Token s ~ Char) => m Scientific
number = label "number" (read <$> f)
where f = (++) <$> some C.digitChar <*> option "" (fraction <|> fExp)
@ -432,11 +434,11 @@ number = label "number" (read <$> f)
-- > integer = lexeme L.integer
-- > signedInteger = L.signed spaceConsumer integer
signed :: (MonadParsec s m Char, Num a) => m () -> m a -> m a
signed :: (MonadParsec e s m, Token s ~ Char, Num a) => m () -> m a -> m a
signed spc p = ($) <$> option id (lexeme spc sign) <*> p
-- | Parse a sign and return either 'id' or 'negate' according to parsed
-- sign.
sign :: (MonadParsec s m Char, Num a) => m (a -> a)
sign :: (MonadParsec e s m, Token s ~ Char, Num a) => m (a -> a)
sign = (C.char '+' *> return id) <|> (C.char '-' *> return negate)

View File

@ -59,7 +59,9 @@ data Branch s m a = forall b. Branch (PermParser s m (b -> a)) (m b)
-- > <||> char 'b'
-- > <|?> ('_', char 'c')
makePermParser :: MonadParsec s m t => PermParser s m a -> m a
makePermParser :: MonadParsec e s m
=> PermParser s m a -- ^ Given permutation parser
-> m a -- ^ Normal parser built from it
makePermParser (Perm def xs) = choice (fmap branch xs ++ empty)
where empty = case def of
Nothing -> []
@ -79,16 +81,22 @@ makePermParser (Perm def xs) = choice (fmap branch xs ++ empty)
-- by the parsers. The function @f@ gets its parameters in the order in
-- which the parsers are specified, but actual input can be in any order.
(<$$>) :: MonadParsec s m t => (a -> b) -> m a -> PermParser s m b
(<$$>) :: MonadParsec e s m
=> (a -> b) -- ^ Function to use on result of parsing
-> m a -- ^ Normal parser
-> PermParser s m b -- ^ Permutation parser build from it
f <$$> p = newperm f <||> p
-- | The expression @f \<$?> (x, p)@ creates a fresh permutation parser
-- consisting of parser @p@. The the final result of the permutation parser
-- is the function @f@ applied to the return value of @p@. The parser @p@ is
-- consisting of parser @p@. The final result of the permutation parser is
-- the function @f@ applied to the return value of @p@. The parser @p@ is
-- optional — if it cannot be applied, the default value @x@ will be used
-- instead.
(<$?>) :: MonadParsec s m t => (a -> b) -> (a, m a) -> PermParser s m b
(<$?>) :: MonadParsec e s m
=> (a -> b) -- ^ Function to use on result of parsing
-> (a, m a) -- ^ Default value and parser
-> PermParser s m b -- ^ Permutation parser
f <$?> xp = newperm f <|?> xp
-- | The expression @perm \<||> p@ adds parser @p@ to the permutation
@ -96,8 +104,10 @@ f <$?> xp = newperm f <|?> xp
-- the optional combinator ('<|?>') instead. Returns a new permutation
-- parser that includes @p@.
(<||>) :: MonadParsec s m t
=> PermParser s m (a -> b) -> m a -> PermParser s m b
(<||>) :: MonadParsec e s m
=> PermParser s m (a -> b) -- ^ Given permutation parser
-> m a -- ^ Parser to add (should not accept empty input)
-> PermParser s m b -- ^ Resulting parser
(<||>) = add
-- | The expression @perm \<||> (x, p)@ adds parser @p@ to the
@ -105,25 +115,32 @@ f <$?> xp = newperm f <|?> xp
-- applied, the default value @x@ will be used instead. Returns a new
-- permutation parser that includes the optional parser @p@.
(<|?>) :: MonadParsec s m t
=> PermParser s m (a -> b) -> (a, m a) -> PermParser s m b
(<|?>) :: MonadParsec e s m
=> PermParser s m (a -> b) -- ^ Given permutation parser
-> (a, m a) -- ^ Default value and parser
-> PermParser s m b -- ^ Resulting parser
perm <|?> (x, p) = addopt perm x p
newperm :: (a -> b) -> PermParser s m (a -> b)
newperm f = Perm (Just f) []
add :: MonadParsec s m t => PermParser s m (a -> b) -> m a -> PermParser s m b
add :: MonadParsec e s m => PermParser s m (a -> b) -> m a -> PermParser s m b
add perm@(Perm _mf fs) p = Perm Nothing (first : fmap insert fs)
where first = Branch perm p
insert (Branch perm' p') = Branch (add (mapPerms flip perm') p) p'
addopt :: MonadParsec s m t
=> PermParser s m (a -> b) -> a -> m a -> PermParser s m b
addopt :: MonadParsec e s m
=> PermParser s m (a -> b)
-> a
-> m a
-> PermParser s m b
addopt perm@(Perm mf fs) x p = Perm (fmap ($ x) mf) (first : fmap insert fs)
where first = Branch perm p
insert (Branch perm' p') = Branch (addopt (mapPerms flip perm') x p) p'
mapPerms :: MonadParsec s m t
=> (a -> b) -> PermParser s m a -> PermParser s m b
mapPerms :: MonadParsec e s m
=> (a -> b)
-> PermParser s m a
-> PermParser s m b
mapPerms f (Perm x xs) = Perm (fmap f x) (fmap mapBranch xs)
where mapBranch (Branch perm p) = Branch (mapPerms (f .) perm) p

View File

@ -12,119 +12,108 @@
-- Textual source position.
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE TupleSections #-}
module Text.Megaparsec.Pos
( SourcePos
, sourceName
, sourceLine
, sourceColumn
, InvalidTextualPosition (..)
, newPos
( -- * Abstract position
Pos
, mkPos
, unPos
, unsafePos
, InvalidPosException (..)
-- * Source position
, SourcePos (..)
, initialPos
, incSourceLine
, incSourceColumn
, setSourceName
, setSourceLine
, setSourceColumn
, sourcePosPretty
-- * Helpers implementing default behaviors
, defaultUpdatePos
, defaultTabWidth )
where
import Control.Exception (Exception, throw)
import Control.Monad.Catch
import Data.Semigroup
import Data.Typeable (Typeable)
-- | The abstract data type @SourcePos@ represents source positions. It
-- contains the name of the source (i.e. file name), a line number and a
-- column number. @SourcePos@ is an instance of the 'Show', 'Eq' and 'Ord'
-- class.
----------------------------------------------------------------------------
-- Pos
-- | Positive integer that is used to represent line number, column number,
-- and similar things like indentation level.
newtype Pos = Pos Word deriving (Show, Eq, Ord)
-- | Construction of 'Pos' from 'Word'. Zero values will throw 'InvalidPos'.
mkPos :: (MonadThrow m, Integral a) => a -> m Pos
mkPos x =
if x < 1
then throwM InvalidPosException
else (return . Pos . fromIntegral) x
{-# INLINE mkPos #-}
-- | Dangerous construction of 'Pos'. Use when you know for sure that
-- argument is positive.
unsafePos :: Word -> Pos
unsafePos x =
if x < 1
then error "Text.Megaparsec.Pos.unsafePos"
else Pos x
{-# INLINE unsafePos #-}
-- | Extract 'Word' from 'Pos'.
unPos :: Pos -> Word
unPos (Pos x) = x
{-# INLINE unPos #-}
instance Semigroup Pos where
(Pos x) <> (Pos y) = Pos (x + y)
{-# INLINE (<>) #-}
instance Read Pos where
readsPrec d =
readParen (d > 10) $ \r1 -> do
("Pos", r2) <- lex r1
(x, r3) <- readsPrec 11 r2
(,r3) <$> mkPos (x :: Integer)
-- | The exception is thrown by 'mkPos' when its argument is not a positive
-- number.
data InvalidPosException = InvalidPosException deriving (Eq, Show, Typeable)
instance Exception InvalidPosException
----------------------------------------------------------------------------
-- Source position
-- | The data type @SourcePos@ represents source positions. It contains the
-- name of the source file, a line number and a column number.
data SourcePos = SourcePos
{ -- | Extract the name of the source from a source position.
sourceName :: !String
-- | Extract the line number from a source position.
, sourceLine :: !Int
-- | Extract the column number from a source position.
, sourceColumn :: !Int }
deriving (Eq, Ord)
{ sourceName :: String -- ^ Name of source file
, sourceLine :: !Pos -- ^ Line number
, sourceColumn :: !Pos -- ^ Column number
} deriving (Show, Read, Eq, Ord)
instance Show SourcePos where
show (SourcePos n l c)
| null n = showLC
| otherwise = n ++ ":" ++ showLC
where showLC = show l ++ ":" ++ show c
-- | This exception is thrown when some action on 'SourcePos' is performed
-- that would make column number or line number inside this data structure
-- non-positive.
--
-- The 'InvalidTextualPosition' structure includes in order:
--
-- * name of file
-- * line number (possibly non-positive value)
-- * column number (possibly non-positive value)
data InvalidTextualPosition =
InvalidTextualPosition String Int Int
deriving (Eq, Show, Typeable)
instance Exception InvalidTextualPosition
-- | Create a new 'SourcePos' with the given source name, line number and
-- column number.
--
-- If line number of column number is not positive, 'InvalidTextualPosition'
-- will be thrown.
newPos :: String -- ^ File name
-> Int -- ^ Line number, minimum is 1
-> Int -- ^ Column number, minimum is 1
-> SourcePos
newPos n l c =
if l < 1 || c < 1
then throw $ InvalidTextualPosition n l c
else SourcePos n l c
{-# INLINE newPos #-}
-- | Create a new 'SourcePos' with the given source name, and line number
-- and column number set to 1, the upper left.
-- | Construct initial position (line 1, column 1) given name of source
-- file.
initialPos :: String -> SourcePos
initialPos name = newPos name 1 1
{-# INLINE initialPos #-}
initialPos n = SourcePos n u u
where u = unsafePos 1
-- | Increment the line number of a source position. If resulting line
-- number is not positive, 'InvalidTextualPosition' will be thrown.
-- | Pretty-print a 'SourcePos'.
incSourceLine :: Int -> SourcePos -> SourcePos
incSourceLine d (SourcePos n l c) = newPos n (l + d) c
{-# INLINE incSourceLine #-}
sourcePosPretty :: SourcePos -> String
sourcePosPretty (SourcePos n l c)
| null n = showLC
| otherwise = n ++ ":" ++ showLC
where showLC = show (unPos l) ++ ":" ++ show (unPos c)
-- | Increment the column number of a source position. If resulting column
-- number is not positive, 'InvalidTextualPosition' will be thrown.
incSourceColumn :: Int -> SourcePos -> SourcePos
incSourceColumn d (SourcePos n l c) = newPos n l (c + d)
{-# INLINE incSourceColumn #-}
-- | Set the name of the source.
setSourceName :: String -> SourcePos -> SourcePos
setSourceName n (SourcePos _ l c) = newPos n l c
{-# INLINE setSourceName #-}
-- | Set the line number of a source position. If the line number is not
-- positive, 'InvalidTextualPosition' will be thrown.
setSourceLine :: Int -> SourcePos -> SourcePos
setSourceLine l (SourcePos n _ c) = newPos n l c
{-# INLINE setSourceLine #-}
-- | Set the column number of a source position. If the line number is not
-- positive, 'InvalidTextualPosition' will be thrown.
setSourceColumn :: Int -> SourcePos -> SourcePos
setSourceColumn c (SourcePos n l _) = newPos n l c
{-# INLINE setSourceColumn #-}
----------------------------------------------------------------------------
-- Helpers implementing default behavior
-- | Update a source position given a character. The first argument
-- specifies tab width. If the character is a newline (\'\\n\') the line
@ -132,28 +121,26 @@ setSourceColumn c (SourcePos n l _) = newPos n l c
-- column number is incremented to the nearest tab position, i.e. @column +
-- width - ((column - 1) \`rem\` width)@. In all other cases, the column is
-- incremented by 1.
--
-- If given tab width is not positive, 'defaultTabWidth' will be used.
--
-- @since 5.0.0
defaultUpdatePos
:: Int -- ^ Tab width
:: Pos -- ^ Tab width
-> SourcePos -- ^ Current position
-> Char -- ^ Current token
-> (SourcePos, SourcePos) -- ^ Actual position and incremented position
defaultUpdatePos width apos@(SourcePos n l c) ch = (apos, npos)
where
u = unsafePos 1
w = unPos width
c' = unPos c
npos =
case ch of
'\n' -> SourcePos n (l + 1) 1
'\t' -> let w = if width < 1 then defaultTabWidth else width
in SourcePos n l (c + w - ((c - 1) `rem` w))
_ -> SourcePos n l (c + 1)
'\n' -> SourcePos n (l <> u) u
'\t' -> SourcePos n l (unsafePos $ c' + w - ((c' - 1) `rem` w))
_ -> SourcePos n l (c <> u)
-- | Value of tab width used by default. Always prefer this constant when
-- you want to refer to default tab width because actual value /may/ change
-- in future. Current value is @8@.
defaultTabWidth :: Int
defaultTabWidth = 8
defaultTabWidth :: Pos
defaultTabWidth = unsafePos 8

View File

@ -21,6 +21,7 @@
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}
{-# OPTIONS_HADDOCK not-home #-}
@ -53,7 +54,7 @@ module Text.Megaparsec.Prim
where
import Control.Monad
import qualified Control.Monad.Fail as Fail
import Control.Monad.Catch (Exception, MonadThrow (..))
import Control.Monad.Cont.Class
import Control.Monad.Error.Class
import Control.Monad.Identity
@ -62,23 +63,28 @@ import Control.Monad.State.Class hiding (state)
import Control.Monad.Trans
import Control.Monad.Trans.Identity
import Data.Foldable (foldl')
import Data.List.NonEmpty (NonEmpty (..))
import Data.Monoid hiding ((<>))
import Data.Proxy
import Data.Semigroup
import qualified Control.Applicative as A
import qualified Control.Monad.Trans.Reader as L
import qualified Control.Monad.Trans.State.Lazy as L
import qualified Control.Monad.Trans.State.Strict as S
import qualified Control.Monad.Trans.Writer.Lazy as L
import Data.Set (Set)
import Prelude hiding (all)
import qualified Control.Applicative as A
import qualified Control.Monad.Fail as Fail
import qualified Control.Monad.Trans.Reader as L
import qualified Control.Monad.Trans.State.Lazy as L
import qualified Control.Monad.Trans.State.Strict as S
import qualified Control.Monad.Trans.Writer.Lazy as L
import qualified Control.Monad.Trans.Writer.Strict as S
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.List.NonEmpty as NE
import qualified Data.Set as E
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import Text.Megaparsec.Error
import Text.Megaparsec.Pos
import Text.Megaparsec.ShowToken
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>), (<*), pure)
@ -91,8 +97,8 @@ import Control.Applicative ((<$>), (<*), pure)
data State s = State
{ stateInput :: s
, statePos :: !SourcePos
, stateTabWidth :: !Int }
, statePos :: !(NonEmpty SourcePos)
, stateTabWidth :: Pos }
deriving (Show, Eq)
-- | From two states, return the one with greater textual position. If the
@ -112,7 +118,7 @@ longestMatch s1@(State _ pos1 _) s2@(State _ pos2 _) =
--
-- See also: 'Consumption', 'Result'.
data Reply s a = Reply !(State s) Consumption (Result a)
data Reply e s a = Reply !(State s) Consumption (Result (Token s) e a)
-- | This data structure represents an aspect of result of parser's
-- work.
@ -128,9 +134,9 @@ data Consumption
--
-- See also: 'Consumption', 'Reply'.
data Result a
= OK a -- ^ Parser succeeded
| Error ParseError -- ^ Parser failed
data Result t e a
= OK a -- ^ Parser succeeded
| Error (ParseError t e) -- ^ Parser failed
-- | 'Hints' represent collection of strings to be included into 'ParserError'
-- as “expected” messages when a parser fails without consuming input right
@ -150,64 +156,67 @@ data Result a
-- unexpected 'a'
-- expecting 'r' or end of input
newtype Hints = Hints [[String]] deriving (Monoid, Semigroup)
newtype Hints t = Hints [Set (MessageItem t)] deriving (Semigroup, Monoid)
-- | Convert 'ParseError' record into 'Hints'.
toHints :: ParseError -> Hints
toHints err = Hints hints
where hints = if null msgs then [] else [messageString <$> msgs]
msgs = filter isExpected (errorMessages err)
toHints :: ParseError t e -> Hints t
toHints = Hints . pure . errorExpected
{-# INLINE toHints #-}
-- | @withHints hs c@ makes “error” continuation @c@ use given hints @hs@.
--
-- Note that if resulting continuation gets 'ParseError' where all messages
-- are created with 'Message' constructor, hints are ignored.
-- Note that if resulting continuation gets 'ParseError' that has only
-- custom data in it (no “unexpected” or “expected” items), hints are
-- ignored.
withHints
:: Hints -- ^ Hints to use
-> (ParseError -> State s -> m b) -- ^ Continuation to influence
-> ParseError -- ^ First argument of resulting continuation
withHints :: Ord (Token s)
=> Hints (Token s) -- ^ Hints to use
-> (ParseError (Token s) e -> State s -> m b) -- ^ Continuation to influence
-> ParseError (Token s) e -- ^ First argument of resulting continuation
-> State s -- ^ Second argument of resulting continuation
-> m b
withHints (Hints xs) c e =
if all isMessage (errorMessages e)
withHints (Hints ps') c e@(ParseError pos us ps xs) =
if E.null us && E.null ps && not (E.null xs)
then c e
else c (addErrorMessages (Expected <$> concat xs) e)
else c (ParseError pos us (E.unions (ps : ps')) xs)
{-# INLINE withHints #-}
-- | @accHints hs c@ results in “OK” continuation that will add given hints
-- @hs@ to third argument of original continuation @c@.
accHints
:: Hints -- ^ 'Hints' to add
-> (a -> State s -> Hints -> m b) -- ^ An “OK” continuation to alter
:: Hints t -- ^ 'Hints' to add
-> (a -> State s -> Hints t -> m b) -- ^ An “OK” continuation to alter
-> a -- ^ First argument of resulting continuation
-> State s -- ^ Second argument of resulting continuation
-> Hints -- ^ Third argument of resulting continuation
-> Hints t -- ^ Third argument of resulting continuation
-> m b
accHints hs1 c x s hs2 = c x s (hs1 <> hs2)
{-# INLINE accHints #-}
-- | Replace most recent group of hints (if any) with given string. Used in
-- 'label' combinator.
-- | Replace most recent group of hints (if any) with given 'Message' (or
-- delete it if 'Nothing' is given). Used in 'label' combinator.
refreshLastHint :: Hints -> String -> Hints
refreshLastHint :: Hints t -> Maybe (MessageItem t) -> Hints t
refreshLastHint (Hints []) _ = Hints []
refreshLastHint (Hints (_:xs)) "" = Hints xs
refreshLastHint (Hints (_:xs)) l = Hints ([l]:xs)
refreshLastHint (Hints (_:xs)) Nothing = Hints xs
refreshLastHint (Hints (_:xs)) (Just m) = Hints (E.singleton m : xs)
{-# INLINE refreshLastHint #-}
-- | An instance of @Stream s t@ has stream type @s@, and token type @t@
-- determined by the stream.
class (ShowToken t, ShowToken [t]) => Stream s t | s -> t where
class Ord (Token s) => Stream s where
-- | Type of token in stream.
type Token s
-- | Get next token from the stream. If the stream is empty, return
-- 'Nothing'.
uncons :: s -> Maybe (t, s)
uncons :: s -> Maybe (Token s, s)
-- | Update position in stream given tab width, current position, and
-- current token. The result is a tuple where the first element will be
@ -226,45 +235,48 @@ class (ShowToken t, ShowToken [t]) => Stream s t | s -> t where
-- stream with happy\/alex), then the best strategy is to use the start
-- position as actual element position and provide the end position of the
-- token as incremented one.
--
-- @since 5.0.0
updatePos
:: Proxy s -- ^ Proxy clarifying stream type
-> Int -- ^ Tab width
-> Pos -- ^ Tab width
-> SourcePos -- ^ Current position
-> t -- ^ Current token
-> Token s -- ^ Current token
-> (SourcePos, SourcePos) -- ^ Actual position and incremented position
instance Stream [Char] Char where
uncons [] = Nothing
instance Stream [Char] where
type Token String = Char
uncons [] = Nothing
uncons (t:ts) = Just (t, ts)
updatePos = const defaultUpdatePos
{-# INLINE uncons #-}
updatePos = const defaultUpdatePos
{-# INLINE updatePos #-}
instance Stream B.ByteString Char where
uncons = B.uncons
updatePos = const defaultUpdatePos
instance Stream B.ByteString where
type Token B.ByteString = Char
uncons = B.uncons
{-# INLINE uncons #-}
updatePos = const defaultUpdatePos
{-# INLINE updatePos #-}
instance Stream BL.ByteString Char where
uncons = BL.uncons
updatePos = const defaultUpdatePos
instance Stream BL.ByteString where
type Token BL.ByteString = Char
uncons = BL.uncons
{-# INLINE uncons #-}
updatePos = const defaultUpdatePos
{-# INLINE updatePos #-}
instance Stream T.Text Char where
uncons = T.uncons
updatePos = const defaultUpdatePos
instance Stream T.Text where
type Token T.Text = Char
uncons = T.uncons
{-# INLINE uncons #-}
updatePos = const defaultUpdatePos
{-# INLINE updatePos #-}
instance Stream TL.Text Char where
uncons = TL.uncons
updatePos = const defaultUpdatePos
instance Stream TL.Text where
type Token TL.Text = Char
uncons = TL.uncons
{-# INLINE uncons #-}
updatePos = const defaultUpdatePos
{-# INLINE updatePos #-}
-- If you're reading this, you may be interested in how Megaparsec works on
@ -300,39 +312,40 @@ instance Stream TL.Text Char where
-- | @Parsec@ is non-transformer variant of more general 'ParsecT'
-- monad transformer.
type Parsec s = ParsecT s Identity
type Parsec e s = ParsecT e s Identity
-- | @ParsecT s m a@ is a parser with stream type @s@, underlying monad @m@
-- and return type @a@.
-- | @ParsecT e s m a@ is a parser with custom data component of error @e@,
-- stream type @s@, underlying monad @m@ and return type @a@.
newtype ParsecT s m a = ParsecT
{ unParser :: forall b. State s
-> (a -> State s -> Hints -> m b) -- consumed-OK
-> (ParseError -> State s -> m b) -- consumed-error
-> (a -> State s -> Hints -> m b) -- empty-OK
-> (ParseError -> State s -> m b) -- empty-error
-> m b }
newtype ParsecT e s m a = ParsecT
{ unParser
:: forall b. State s
-> (a -> State s -> Hints (Token s) -> m b) -- consumed-OK
-> (ParseError (Token s) e -> State s -> m b) -- consumed-error
-> (a -> State s -> Hints (Token s) -> m b) -- empty-OK
-> (ParseError (Token s) e -> State s -> m b) -- empty-error
-> m b }
instance Functor (ParsecT s m) where
instance Functor (ParsecT e s m) where
fmap = pMap
pMap :: (a -> b) -> ParsecT s m a -> ParsecT s m b
pMap :: (a -> b) -> ParsecT e s m a -> ParsecT e s m b
pMap f p = ParsecT $ \s cok cerr eok eerr ->
unParser p s (cok . f) cerr (eok . f) eerr
{-# INLINE pMap #-}
instance A.Applicative (ParsecT s m) where
instance (ErrorComponent e, Stream s) => A.Applicative (ParsecT e s m) where
pure = pPure
(<*>) = ap
p1 *> p2 = p1 `pBind` const p2
p1 <* p2 = do { x1 <- p1 ; void p2 ; return x1 }
instance A.Alternative (ParsecT s m) where
instance (ErrorComponent e, Stream s) => A.Alternative (ParsecT e s m) where
empty = mzero
(<|>) = mplus
many p = reverse <$> manyAcc p
manyAcc :: ParsecT s m a -> ParsecT s m [a]
manyAcc :: ParsecT e s m a -> ParsecT e s m [a]
manyAcc p = ParsecT $ \s cok cerr eok _ ->
let errToHints c err _ = c (toHints err)
walk xs x s' _ =
@ -348,35 +361,47 @@ manyErr = error $
"Text.Megaparsec.Prim.many: combinator 'many' is applied to a parser"
++ " that accepts an empty string."
instance Monad (ParsecT s m) where
instance (ErrorComponent e, Stream s)
=> Monad (ParsecT e s m) where
return = pure
(>>=) = pBind
fail = Fail.fail
instance Fail.MonadFail (ParsecT s m) where
fail = pFail
pPure :: a -> ParsecT s m a
pPure :: a -> ParsecT e s m a
pPure x = ParsecT $ \s _ _ eok _ -> eok x s mempty
{-# INLINE pPure #-}
pBind :: ParsecT s m a -> (a -> ParsecT s m b) -> ParsecT s m b
pBind :: Stream s => ParsecT e s m a -> (a -> ParsecT e s m b) -> ParsecT e s m b
pBind m k = ParsecT $ \s cok cerr eok eerr ->
let mcok x s' hs = unParser (k x) s' cok cerr
(accHints hs cok) (withHints hs cerr)
(accHints hs cok) (withHints hs cerr)
meok x s' hs = unParser (k x) s' cok cerr
(accHints hs eok) (withHints hs eerr)
(accHints hs eok) (withHints hs eerr)
in unParser m s mcok cerr meok eerr
{-# INLINE pBind #-}
pFail :: String -> ParsecT s m a
instance (ErrorComponent e, Stream s)
=> Fail.MonadFail (ParsecT e s m) where
fail = pFail
pFail :: ErrorComponent e => String -> ParsecT e s m a
pFail msg = ParsecT $ \s@(State _ pos _) _ _ _ eerr ->
eerr (newErrorMessage (Message msg) pos) s
eerr (ParseError pos E.empty E.empty d) s
where d = E.singleton (representFail msg)
{-# INLINE pFail #-}
instance (ErrorComponent e, Stream s)
=> MonadThrow (ParsecT e s m) where
throwM = pThrowM
pThrowM :: (Exception e', ErrorComponent e) => e' -> ParsecT e s m a
pThrowM e = ParsecT $ \s@(State _ pos _) _ _ _ eerr ->
eerr (ParseError pos E.empty E.empty d) s
where d = E.singleton (representException e)
-- | Low-level creation of the 'ParsecT' type.
mkPT :: Monad m => (State s -> m (Reply s a)) -> ParsecT s m a
mkPT :: Monad m => (State s -> m (Reply e s a)) -> ParsecT e s m a
mkPT k = ParsecT $ \s cok cerr eok eerr -> do
(Reply s' consumption result) <- k s
case consumption of
@ -389,39 +414,48 @@ mkPT k = ParsecT $ \s cok cerr eok eerr -> do
OK x -> eok x s' mempty
Error e -> eerr e s'
instance MonadIO m => MonadIO (ParsecT s m) where
instance (ErrorComponent e, Stream s, MonadIO m)
=> MonadIO (ParsecT e s m) where
liftIO = lift . liftIO
instance MonadReader r m => MonadReader r (ParsecT s m) where
instance (ErrorComponent e, Stream s, MonadReader r m)
=> MonadReader r (ParsecT e s m) where
ask = lift ask
local f p = mkPT $ \s -> local f (runParsecT p s)
instance MonadState s m => MonadState s (ParsecT s' m) where
instance (ErrorComponent e, Stream s, MonadState st m)
=> MonadState st (ParsecT e s m) where
get = lift get
put = lift . put
instance MonadCont m => MonadCont (ParsecT s m) where
instance (ErrorComponent e, Stream s, MonadCont m)
=> MonadCont (ParsecT e s m) where
callCC f = mkPT $ \s ->
callCC $ \c ->
runParsecT (f (\a -> mkPT $ \s' -> c (pack s' a))) s
where pack s a = Reply s Virgin (OK a)
instance MonadError e m => MonadError e (ParsecT s m) where
instance (ErrorComponent e, Stream s, MonadError e' m)
=> MonadError e' (ParsecT e s m) where
throwError = lift . throwError
p `catchError` h = mkPT $ \s ->
runParsecT p s `catchError` \e ->
runParsecT (h e) s
instance MonadPlus (ParsecT s m) where
instance (ErrorComponent e, Stream s)
=> MonadPlus (ParsecT e s m) where
mzero = pZero
mplus = pPlus
pZero :: ParsecT s m a
pZero :: ParsecT e s m a
pZero = ParsecT $ \s@(State _ pos _) _ _ _ eerr ->
eerr (newErrorUnknown pos) s
eerr (ParseError pos E.empty E.empty E.empty) s
{-# INLINE pZero #-}
pPlus :: ParsecT s m a -> ParsecT s m a -> ParsecT s m a
pPlus :: (ErrorComponent e, Stream s)
=> ParsecT e s m a
-> ParsecT e s m a
-> ParsecT e s m a
pPlus m n = ParsecT $ \s cok cerr eok eerr ->
let meerr err ms =
let ncerr err' s' = cerr (err' <> err) (longestMatch ms s')
@ -431,16 +465,17 @@ pPlus m n = ParsecT $ \s cok cerr eok eerr ->
in unParser m s cok cerr eok meerr
{-# INLINE pPlus #-}
instance MonadTrans (ParsecT s) where
lift amb = ParsecT $ \s _ _ eok _ -> amb >>= \a -> eok a s mempty
instance MonadTrans (ParsecT e s) where
lift amb = ParsecT $ \s _ _ eok _ ->
amb >>= \a -> eok a s mempty
----------------------------------------------------------------------------
-- Primitive combinators
-- | Type class describing parsers independent of input type.
class (A.Alternative m, MonadPlus m, Stream s t)
=> MonadParsec s m t | m -> s t where
class (A.Alternative m, MonadPlus m, Stream s, ErrorComponent e)
=> MonadParsec e s m | m -> e s where
-- | The most general way to stop parsing and report 'ParseError'.
--
@ -450,7 +485,11 @@ class (A.Alternative m, MonadPlus m, Stream s t)
--
-- @since 4.2.0
failure :: [Message] -> m a
failure
:: Set (MessageItem (Token s)) -- ^ Unexpected items
-> Set (MessageItem (Token s)) -- ^ Expected items
-> Set e -- ^ Custom data
-> m a
-- | The parser @label name p@ behaves as parser @p@, but whenever the
-- parser @p@ fails /without consuming any input/, it replaces names of
@ -529,7 +568,7 @@ class (A.Alternative m, MonadPlus m, Stream s t)
-- @since 4.4.0
withRecovery
:: (ParseError -> m a) -- ^ How to recover from failure
:: (ParseError (Token s) e -> m a) -- ^ How to recover from failure
-> m a -- ^ Original parser
-> m a -- ^ Parser that can recover from failures
@ -550,15 +589,16 @@ class (A.Alternative m, MonadPlus m, Stream s t)
-- > then Right x
-- > else Left . pure . Unexpected . showToken $ x
token
:: (t -> Either [Message] a)
token -- FIXME fix docs
:: (Token s -> Either ( Set (MessageItem (Token s))
, Set (MessageItem (Token s))
, Set e ) a)
-- ^ Matching function for the token to parse
-> m a
-- | The parser @tokens test@ parses list of tokens and returns it. The
-- resulting parser will use 'showToken' to pretty-print the collection of
-- tokens in error messages. Supplied predicate @test@ is used to check
-- equality of given and parsed tokens.
-- | The parser @tokens test@ parses list of tokens and returns it.
-- Supplied predicate @test@ is used to check equality of given and parsed
-- tokens.
--
-- This can be used for example to write 'Text.Megaparsec.Char.string':
--
@ -580,12 +620,12 @@ class (A.Alternative m, MonadPlus m, Stream s t)
-- 'Text.Megaparsec.Char.string''. This new feature /does not/ affect
-- performance in any way.
tokens :: Eq t
=> (t -> t -> Bool)
tokens :: Eq (Token s)
=> (Token s -> Token s -> Bool)
-- ^ Predicate to check equality of tokens
-> [t]
-> [Token s]
-- ^ List of tokens to parse
-> m [t]
-> m [Token s]
-- | Returns the full parser state as a 'State' record.
@ -595,7 +635,7 @@ class (A.Alternative m, MonadPlus m, Stream s t)
updateParserState :: (State s -> State s) -> m ()
instance Stream s t => MonadParsec s (ParsecT s m) t where
instance (ErrorComponent e, Stream s) => MonadParsec e s (ParsecT e s m) where
failure = pFailure
label = pLabel
try = pTry
@ -608,45 +648,52 @@ instance Stream s t => MonadParsec s (ParsecT s m) t where
getParserState = pGetParserState
updateParserState = pUpdateParserState
pFailure :: [Message] -> ParsecT s m a
pFailure msgs = ParsecT $ \s@(State _ pos _) _ _ _ eerr ->
eerr (newErrorMessages msgs pos) s
pFailure
:: Set (MessageItem (Token s))
-> Set (MessageItem (Token s))
-> Set e
-> ParsecT e s m a
pFailure us ps xs = ParsecT $ \s@(State _ pos _) _ _ _ eerr ->
eerr (ParseError pos us ps xs) s
{-# INLINE pFailure #-}
pLabel :: String -> ParsecT s m a -> ParsecT s m a
pLabel :: String -> ParsecT e s m a -> ParsecT e s m a
pLabel l p = ParsecT $ \s cok cerr eok eerr ->
let l' = if null l then l else "rest of " ++ l
cok' x s' hs = cok x s' $ refreshLastHint hs l'
eok' x s' hs = eok x s' $ refreshLastHint hs l
eerr' err = eerr $ setErrorMessage (Expected l) err
let el = Label <$> NE.nonEmpty l
cl = Label . (NE.fromList "rest of " <>) <$> NE.nonEmpty l
cok' x s' hs = cok x s' (refreshLastHint hs cl)
eok' x s' hs = eok x s' (refreshLastHint hs el)
eerr' err = eerr err
{ errorExpected = maybe E.empty E.singleton el }
in unParser p s cok' cerr eok' eerr'
{-# INLINE pLabel #-}
pTry :: ParsecT s m a -> ParsecT s m a
pTry :: ParsecT e s m a -> ParsecT e s m a
pTry p = ParsecT $ \s cok _ eok eerr ->
unParser p s cok eerr eok eerr
{-# INLINE pTry #-}
pLookAhead :: ParsecT s m a -> ParsecT s m a
pLookAhead :: ParsecT e s m a -> ParsecT e s m a
pLookAhead p = ParsecT $ \s _ cerr eok eerr ->
let eok' a _ _ = eok a s mempty
in unParser p s eok' cerr eok' eerr
{-# INLINE pLookAhead #-}
pNotFollowedBy :: Stream s t => ParsecT s m a -> ParsecT s m ()
pNotFollowedBy :: Stream s => ParsecT e s m a -> ParsecT e s m ()
pNotFollowedBy p = ParsecT $ \s@(State input pos _) _ _ eok eerr ->
let l = maybe eoi (showToken . fst) (uncons input)
cok' _ _ _ = eerr (unexpectedErr l pos) s
let what = maybe EndOfInput (Token . fst) (uncons input)
unexpect u = ParseError pos (E.singleton u) E.empty E.empty
cok' _ _ _ = eerr (unexpect what) s
cerr' _ _ = eok () s mempty
eok' _ _ _ = eerr (unexpectedErr l pos) s
eok' _ _ _ = eerr (unexpect what) s
eerr' _ _ = eok () s mempty
in unParser p s cok' cerr' eok' eerr'
{-# INLINE pNotFollowedBy #-}
pWithRecovery
:: (ParseError -> ParsecT s m a)
-> ParsecT s m a
-> ParsecT s m a
:: (ParseError (Token s) e -> ParsecT e s m a)
-> ParsecT e s m a
-> ParsecT e s m a
pWithRecovery r p = ParsecT $ \s cok cerr eok eerr ->
let mcerr err ms =
let rcok x s' _ = cok x s' mempty
@ -663,60 +710,76 @@ pWithRecovery r p = ParsecT $ \s cok cerr eok eerr ->
in unParser p s cok mcerr eok meerr
{-# INLINE pWithRecovery #-}
pEof :: Stream s t => ParsecT s m ()
pEof = label eoi $ ParsecT $ \s@(State input pos _) _ _ eok eerr ->
pEof :: forall e s m. Stream s => ParsecT e s m ()
pEof = ParsecT $ \s@(State input (pos:|z) w) _ _ eok eerr ->
case uncons input of
Nothing -> eok () s mempty
Just (x,_) -> eerr (unexpectedErr (showToken x) pos) s
Just (x,_) ->
let !apos = fst (updatePos (Proxy :: Proxy s) w pos x)
in eerr (ParseError (apos:|z)
(E.singleton $ Token x)
(E.singleton EndOfInput)
E.empty) s
{-# INLINE pEof #-}
pToken :: forall s m t a. Stream s t
=> (t -> Either [Message] a)
-> ParsecT s m a
pToken test = ParsecT $ \s@(State input pos w) cok _ _ eerr ->
pToken :: forall e s m a. Stream s
=> (Token s -> Either ( Set (MessageItem (Token s))
, Set (MessageItem (Token s))
, Set e ) a)
-> ParsecT e s m a
pToken test = ParsecT $ \s@(State input (pos:|z) w) cok _ _ eerr ->
case uncons input of
Nothing -> eerr (unexpectedErr eoi pos) s
Nothing -> eerr (ParseError (pos:|z)
E.empty
(E.singleton EndOfInput)
E.empty) s
Just (c,cs) ->
let (!apos, !npos) = updatePos (Proxy :: Proxy s) w pos c
in case test c of
Left ms -> eerr (newErrorMessages ms apos) s
Left (us, ps, xs) -> eerr (ParseError (apos:|z) us ps xs) s
Right x ->
let !newstate = State cs npos w
let !newstate = State cs (npos:|z) w
in cok x newstate mempty
{-# INLINE pToken #-}
pTokens :: forall s m t. Stream s t
=> (t -> t -> Bool)
-> [t]
-> ParsecT s m [t]
pTokens :: forall e s m. Stream s
=> (Token s -> Token s -> Bool)
-> [Token s]
-> ParsecT e s m [Token s]
pTokens _ [] = ParsecT $ \s _ _ eok _ -> eok [] s mempty
pTokens test tts = ParsecT $ \s@(State input pos w) cok _ _ eerr ->
let r = showToken . reverse
y = Proxy :: Proxy s
errExpect x = setErrorMessage (Expected $ showToken tts)
(newErrorMessage (Unexpected x) pos)
pTokens test tts = ParsecT $ \s@(State input (pos:|z) w) cok _ _ eerr ->
let y = Proxy :: Proxy s
unexpect u = ParseError (pos:|z)
(E.singleton u)
(E.singleton . TokenStream . NE.fromList $ tts)
E.empty
go [] is rs =
let !npos = foldl' (\p t -> snd $ updatePos y w p t) pos tts
!newstate = State rs npos w
!newstate = State rs (npos:|z) w
in cok (reverse is) newstate mempty
go (t:ts) is rs =
let what = if null is then eoi else r is
let what = case NE.nonEmpty is of
Nothing -> EndOfInput
Just xs -> TokenStream (NE.reverse xs)
in case uncons rs of
Nothing -> eerr (errExpect what) s
Nothing -> eerr (unexpect what) s
Just (x,xs) ->
let !apos = fst (updatePos y w pos t)
!newstate = State input apos w
!newstate = State input (apos:|z) w
in if test t x
then go ts (x:is) xs
else eerr (errExpect $ r (x:is)) newstate
else eerr (unexpect .
TokenStream .
NE.fromList .
reverse $ (x:is)) newstate
in go tts [] input
{-# INLINE pTokens #-}
pGetParserState :: ParsecT s m (State s)
pGetParserState :: ParsecT e s m (State s)
pGetParserState = ParsecT $ \s _ _ eok _ -> eok s s mempty
{-# INLINE pGetParserState #-}
pUpdateParserState :: (State s -> State s) -> ParsecT s m ()
pUpdateParserState :: (State s -> State s) -> ParsecT e s m ()
pUpdateParserState f = ParsecT $ \s _ _ eok _ -> eok () (f s) mempty
{-# INLINE pUpdateParserState #-}
@ -724,65 +787,56 @@ pUpdateParserState f = ParsecT $ \s _ _ eok _ -> eok () (f s) mempty
infix 0 <?>
(<?>) :: MonadParsec s m t => m a -> String -> m a
(<?>) :: MonadParsec e s m => m a -> String -> m a
(<?>) = flip label
-- | The parser @unexpected msg@ always fails with an unexpected error
-- message @msg@ without consuming any input.
--
-- The parsers 'fail', 'label' and 'unexpected' are the three parsers used
-- to generate error messages. Of these, only 'label' is commonly used.
-- | The parser @unexpected item@ always fails with an error message telling
-- about unexpected item @item@ without consuming any input.
unexpected :: MonadParsec s m t => String -> m a
unexpected = failure . pure . Unexpected
unexpectedErr :: String -> SourcePos -> ParseError
unexpectedErr msg = newErrorMessage (Unexpected msg)
eoi :: String
eoi = "end of input"
unexpected :: MonadParsec e s m => MessageItem (Token s) -> m a
unexpected item = failure (E.singleton item) E.empty E.empty
----------------------------------------------------------------------------
-- Parser state combinators
-- | Returns the current input.
getInput :: MonadParsec s m t => m s
getInput :: MonadParsec e s m => m s
getInput = stateInput <$> getParserState
-- | @setInput input@ continues parsing with @input@. The 'getInput' and
-- @setInput@ functions can for example be used to deal with #include files.
setInput :: MonadParsec s m t => s -> m ()
setInput :: MonadParsec e s m => s -> m ()
setInput s = updateParserState (\(State _ pos w) -> State s pos w)
-- | Returns the current source position.
--
-- See also: 'SourcePos'.
getPosition :: MonadParsec s m t => m SourcePos
getPosition :: MonadParsec e s m => m (NonEmpty SourcePos)
getPosition = statePos <$> getParserState
-- | @setPosition pos@ sets the current source position to @pos@.
setPosition :: MonadParsec s m t => SourcePos -> m ()
setPosition :: MonadParsec e s m => NonEmpty SourcePos -> m ()
setPosition pos = updateParserState (\(State s _ w) -> State s pos w)
-- | Returns tab width. Default tab width is equal to 'defaultTabWidth'. You
-- can set different tab width with help of 'setTabWidth'.
getTabWidth :: MonadParsec s m t => m Int
getTabWidth :: MonadParsec e s m => m Pos
getTabWidth = stateTabWidth <$> getParserState
-- | Set tab width. If argument of the function is not positive number,
-- 'defaultTabWidth' will be used.
setTabWidth :: MonadParsec s m t => Int -> m ()
setTabWidth :: MonadParsec e s m => Pos -> m ()
setTabWidth w = updateParserState (\(State s pos _) -> State s pos w)
-- | @setParserState st@ set the full parser state to @st@.
setParserState :: MonadParsec s m t => State s -> m ()
setParserState :: MonadParsec e s m => State s -> m ()
setParserState st = updateParserState (const st)
----------------------------------------------------------------------------
@ -802,10 +856,10 @@ setParserState st = updateParserState (const st)
-- > numbers = commaSep integer
parse
:: Parsec s a -- ^ Parser to run
-> String -- ^ Name of source file
-> s -- ^ Input for parser
-> Either ParseError a
:: Parsec e s a -- ^ Parser to run
-> String -- ^ Name of source file
-> s -- ^ Input for parser
-> Either (ParseError (Token s) e) a
parse = runParser
-- | @parseMaybe p input@ runs parser @p@ on @input@ and returns result
@ -818,19 +872,22 @@ parse = runParser
-- should be parsed. For example it can be used when parsing of single
-- number according to specification of its format is desired.
parseMaybe :: Stream s t => Parsec s a -> s -> Maybe a
parseMaybe :: (ErrorComponent e, Stream s) => Parsec e s a -> s -> Maybe a
parseMaybe p s =
case parse (p <* eof) "" s of
Left _ -> Nothing
Right x -> Just x
-- | The expression @parseTest p input@ applies a parser @p@ against
-- input @input@ and prints the result to stdout. Used for testing.
-- | The expression @parseTest p input@ applies a parser @p@ against input
-- @input@ and prints the result to stdout. Useful for testing.
parseTest :: Show a => Parsec s a -> s -> IO ()
parseTest :: (ShowErrorComponent e, ShowToken (Token s), Show a)
=> Parsec e s a -- ^ Parser to run
-> s -- ^ Input for parser
-> IO ()
parseTest p input =
case parse p "" input of
Left e -> print e
Left e -> putStrLn (parseErrorPretty e)
Right x -> print x
-- | @runParser p file input@ runs parser @p@ on the input list of tokens
@ -841,10 +898,10 @@ parseTest p input =
-- > parseFromFile p file = runParser p file <$> readFile file
runParser
:: Parsec s a -- ^ Parser to run
:: Parsec e s a -- ^ Parser to run
-> String -- ^ Name of source file
-> s -- ^ Input for parser
-> Either ParseError a
-> Either (ParseError (Token s) e) a
runParser p name s = snd $ runParser' p (initialState name s)
-- | The function is similar to 'runParser' with the difference that it
@ -855,9 +912,9 @@ runParser p name s = snd $ runParser' p (initialState name s)
-- @since 4.2.0
runParser'
:: Parsec s a -- ^ Parser to run
:: Parsec e s a -- ^ Parser to run
-> State s -- ^ Initial state
-> (State s, Either ParseError a)
-> (State s, Either (ParseError (Token s) e) a)
runParser' p = runIdentity . runParserT' p
-- | @runParserT p file input@ runs parser @p@ on the input list of tokens
@ -867,10 +924,10 @@ runParser' p = runIdentity . runParserT' p
-- value of type @a@ ('Right').
runParserT :: Monad m
=> ParsecT s m a -- ^ Parser to run
=> ParsecT e s m a -- ^ Parser to run
-> String -- ^ Name of source file
-> s -- ^ Input for parser
-> m (Either ParseError a)
-> m (Either (ParseError (Token s) e) a)
runParserT p name s = snd `liftM` runParserT' p (initialState name s)
-- | This function is similar to 'runParserT', but like 'runParser'' it
@ -880,9 +937,9 @@ runParserT p name s = snd `liftM` runParserT' p (initialState name s)
-- @since 4.2.0
runParserT' :: Monad m
=> ParsecT s m a -- ^ Parser to run
=> ParsecT e s m a -- ^ Parser to run
-> State s -- ^ Initial state
-> m (State s, Either ParseError a)
-> m (State s, Either (ParseError (Token s) e) a)
runParserT' p s = do
(Reply s' _ result) <- runParsecT p s
case result of
@ -892,15 +949,15 @@ runParserT' p s = do
-- | Given name of source file and input construct initial state for parser.
initialState :: String -> s -> State s
initialState name s = State s (initialPos name) defaultTabWidth
initialState name s = State s (initialPos name :| []) defaultTabWidth
-- | Low-level unpacking of the 'ParsecT' type. 'runParserT' and 'runParser'
-- are built upon this.
runParsecT :: Monad m
=> ParsecT s m a -- ^ Parser to run
=> ParsecT e s m a -- ^ Parser to run
-> State s -- ^ Initial state
-> m (Reply s a)
-> m (Reply e s a)
runParsecT p s = unParser p s cok cerr eok eerr
where cok a s' _ = return $ Reply s' Consumed (OK a)
cerr err s' = return $ Reply s' Consumed (Error err)
@ -910,8 +967,8 @@ runParsecT p s = unParser p s cok cerr eok eerr
----------------------------------------------------------------------------
-- Instances of 'MonadParsec'
instance MonadParsec s m t => MonadParsec s (L.StateT e m) t where
failure = lift . failure
instance MonadParsec e s m => MonadParsec e s (L.StateT st m) where
failure us ps xs = lift (failure us ps xs)
label n (L.StateT m) = L.StateT $ label n . m
try (L.StateT m) = L.StateT $ try . m
lookAhead (L.StateT m) = L.StateT $ \s ->
@ -922,12 +979,12 @@ instance MonadParsec s m t => MonadParsec s (L.StateT e m) t where
withRecovery (\e -> L.runStateT (r e) s) (m s)
eof = lift eof
token = lift . token
tokens e ts = lift $ tokens e ts
tokens e ts = lift (tokens e ts)
getParserState = lift getParserState
updateParserState f = lift $ updateParserState f
updateParserState f = lift (updateParserState f)
instance MonadParsec s m t => MonadParsec s (S.StateT e m) t where
failure = lift . failure
instance MonadParsec e s m => MonadParsec e s (S.StateT st m) where
failure us ps xs = lift (failure us ps xs)
label n (S.StateT m) = S.StateT $ label n . m
try (S.StateT m) = S.StateT $ try . m
lookAhead (S.StateT m) = S.StateT $ \s ->
@ -938,12 +995,12 @@ instance MonadParsec s m t => MonadParsec s (S.StateT e m) t where
withRecovery (\e -> S.runStateT (r e) s) (m s)
eof = lift eof
token = lift . token
tokens e ts = lift $ tokens e ts
tokens e ts = lift (tokens e ts)
getParserState = lift getParserState
updateParserState f = lift $ updateParserState f
updateParserState f = lift (updateParserState f)
instance MonadParsec s m t => MonadParsec s (L.ReaderT e m) t where
failure = lift . failure
instance MonadParsec e s m => MonadParsec e s (L.ReaderT st m) where
failure us ps xs = lift (failure us ps xs)
label n (L.ReaderT m) = L.ReaderT $ label n . m
try (L.ReaderT m) = L.ReaderT $ try . m
lookAhead (L.ReaderT m) = L.ReaderT $ lookAhead . m
@ -952,12 +1009,12 @@ instance MonadParsec s m t => MonadParsec s (L.ReaderT e m) t where
withRecovery (\e -> L.runReaderT (r e) s) (m s)
eof = lift eof
token = lift . token
tokens e ts = lift $ tokens e ts
tokens e ts = lift (tokens e ts)
getParserState = lift getParserState
updateParserState f = lift $ updateParserState f
updateParserState f = lift (updateParserState f)
instance (Monoid w, MonadParsec s m t) => MonadParsec s (L.WriterT w m) t where
failure = lift . failure
instance (Monoid w, MonadParsec e s m) => MonadParsec e s (L.WriterT w m) where
failure us ps xs = lift (failure us ps xs)
label n (L.WriterT m) = L.WriterT $ label n m
try (L.WriterT m) = L.WriterT $ try m
lookAhead (L.WriterT m) = L.WriterT $
@ -968,12 +1025,12 @@ instance (Monoid w, MonadParsec s m t) => MonadParsec s (L.WriterT w m) t where
withRecovery (L.runWriterT . r) m
eof = lift eof
token = lift . token
tokens e ts = lift $ tokens e ts
tokens e ts = lift (tokens e ts)
getParserState = lift getParserState
updateParserState f = lift $ updateParserState f
updateParserState f = lift (updateParserState f)
instance (Monoid w, MonadParsec s m t) => MonadParsec s (S.WriterT w m) t where
failure = lift . failure
instance (Monoid w, MonadParsec e s m) => MonadParsec e s (S.WriterT w m) where
failure us ps xs = lift (failure us ps xs)
label n (S.WriterT m) = S.WriterT $ label n m
try (S.WriterT m) = S.WriterT $ try m
lookAhead (S.WriterT m) = S.WriterT $
@ -984,12 +1041,12 @@ instance (Monoid w, MonadParsec s m t) => MonadParsec s (S.WriterT w m) t where
withRecovery (S.runWriterT . r) m
eof = lift eof
token = lift . token
tokens e ts = lift $ tokens e ts
tokens e ts = lift (tokens e ts)
getParserState = lift getParserState
updateParserState f = lift $ updateParserState f
updateParserState f = lift (updateParserState f)
instance MonadParsec s m t => MonadParsec s (IdentityT m) t where
failure = lift . failure
instance MonadParsec e s m => MonadParsec e s (IdentityT m) where
failure us ps xs = lift (failure us ps xs)
label n (IdentityT m) = IdentityT $ label n m
try = IdentityT . try . runIdentityT
lookAhead (IdentityT m) = IdentityT $ lookAhead m

View File

@ -1,55 +0,0 @@
-- |
-- Module : Text.Megaparsec.ShowToken
-- Copyright : © 20152016 Megaparsec contributors
-- License : FreeBSD
--
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
-- Stability : experimental
-- Portability : portable
--
-- Pretty printing function and instances for use in error messages.
{-# LANGUAGE FlexibleInstances #-}
module Text.Megaparsec.ShowToken (ShowToken (..)) where
-- | Typeclass 'ShowToken' defines single function 'showToken' that can be
-- used to “pretty-print” various tokens. By default, all commonly used
-- instances are defined, but you can add your own, of course.
class Show a => ShowToken a where
-- | Pretty-print given token. This is used to get token representation
-- for use in error messages.
showToken :: a -> String
instance ShowToken Char where
showToken = prettyChar
-- | @prettyChar ch@ returns user-friendly string representation of given
-- character @ch@, suitable for using in error messages, for example.
prettyChar :: Char -> String
prettyChar '\0' = "null"
prettyChar '\a' = "bell"
prettyChar '\b' = "backspace"
prettyChar '\t' = "tab"
prettyChar '\n' = "newline"
prettyChar '\v' = "vertical tab"
prettyChar '\f' = "form feed"
prettyChar '\r' = "carriage return"
prettyChar ' ' = "space"
prettyChar x = "'" ++ [x] ++ "'"
instance ShowToken [Char] where
showToken = prettyString
-- | @prettyString s@ returns pretty representation of string @s@. This is
-- used when printing string tokens in error messages.
prettyString :: String -> String
prettyString "" = ""
prettyString [x] = prettyChar x
prettyString "\r\n" = "crlf newline"
prettyString xs = "\"" ++ xs ++ "\""

View File

@ -19,4 +19,4 @@ import Text.Megaparsec.Prim
-- @Parser@ type and easily change it by importing different “type
-- modules”. This one is for strings.
type Parser = Parsec String
type Parser = Parsec String String

View File

@ -20,4 +20,4 @@ import qualified Data.Text as T
-- @Parser@ type and easily change it by importing different “type
-- modules”. This one is for strict text.
type Parser = Parsec T.Text
type Parser = Parsec String T.Text

View File

@ -20,4 +20,4 @@ import qualified Data.Text.Lazy as T
-- @Parser@ type and easily change it by importing different “type
-- modules”. This one is for lazy text.
type Parser = Parsec T.Text
type Parser = Parsec String T.Text

View File

@ -58,6 +58,8 @@ flag dev
library
build-depends: base >= 4.6 && < 5
, bytestring
, containers >= 0.5 && < 0.6
, exceptions >= 0.6 && < 0.9
, mtl == 2.*
, scientific >= 0.3.1 && < 0.4
, text >= 0.2
@ -82,7 +84,6 @@ library
, Text.Megaparsec.Perm
, Text.Megaparsec.Pos
, Text.Megaparsec.Prim
, Text.Megaparsec.ShowToken
, Text.Megaparsec.String
, Text.Megaparsec.Text
, Text.Megaparsec.Text.Lazy