Move general-enough combinators to ‘Text.Megaparsec’ (#267)

This commit is contained in:
Mark Karpov 2018-01-08 22:39:39 +07:00 committed by GitHub
parent cb762351d0
commit 4fac009b48
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 378 additions and 281 deletions

View File

@ -1,5 +1,29 @@
## Megaparsec 7.0.0
* Moved some general combinators from `Text.Megaparsec.Char` and
`Text.Megaparsec.Byte` to `Text.Megaparsec`, renaming some of them for
clarity.
Practical consequences:
* Now there is the `single` combinator that is a generalization of `char`
for arbitrary streams. `Text.Megaparsec.Char` and `Text.Megaparsec.Byte`
still contain `char` as type-constrained versions of `single`.
* Similarly, now there is the `chunk` combinator that is a generalization
of `string` for arbitrary streams. `Text.Megaparsec.Char` and
`Text.Megaparsec.Byte` still contain `string` as type-constrained
versions of `single`.
* `satisfy` does not depend on type of token, and so it now lives in
`Text.Megaparsec`.
* `anyToken` was renamed to `anySingle` and moved to `Text.Megaparsec`.
* `notChar` was renamed to `anySingleBut` and moved to `Text.Megaparsec`.
* `oneOf` and `noneOf` were moved to `Text.Megaparsec`.
* Simplified the type of the `token` primitive. It now takes just a matching
function `Token s -> Maybe a` as the first argument and the collection of
expected items `Set (ErrorItem (Token s))` as the second argument. This

View File

@ -94,6 +94,13 @@ module Text.Megaparsec
-- * Primitive combinators
, MonadParsec (..)
-- * Derivatives of primitive combinators
, single
, satisfy
, anySingle
, anySingleBut
, oneOf
, noneOf
, chunk
, (<?>)
, unexpected
, customFailure
@ -155,6 +162,8 @@ import qualified Data.Set as E
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative
import Data.Foldable (Foldable, elem, notElem)
import Prelude hiding (elem, notElem)
#endif
-- $reexports
@ -771,16 +780,19 @@ class (Stream s, A.Alternative m, MonadPlus m)
eof :: m ()
-- | The parser @'token' test exp@ accepts a token @t@ with result @x@
-- when the function @test t@ returns @'Just' x@. @exp@ specifies the
-- collection of expected items to report in error messages.
-- | The parser @'token' test expected@ accepts a token @t@ with result
-- @x@ when the function @test t@ returns @'Just' x@. @expected@ specifies
-- the collection of expected items to report in error messages.
--
-- This is the most primitive combinator for accepting tokens. For
-- example, the 'Text.Megaparsec.Char.satisfy' parser is implemented as:
--
-- > satisfy f = token testChar E.empty
-- > satisfy f = token testToken E.empty
-- > where
-- > testChar x = if f x then Just x else Nothing
-- > testToken x = if f x then Just x else Nothing
--
-- __Note__: type signature of this primitive was changed in the version
-- /7.0.0/.
token
:: (Token s -> Maybe a)
@ -789,14 +801,14 @@ class (Stream s, A.Alternative m, MonadPlus m)
-- ^ Expected items (in case of an error)
-> m a
-- | The parser @'tokens' test@ parses a chunk of input and returns it.
-- Supplied predicate @test@ is used to check equality of given and parsed
-- chunks after a candidate chunk of correct length is fetched from the
-- stream.
-- | The parser @'tokens' test chk@ parses a chunk of input @chk@ and
-- returns it. The supplied predicate @test@ is used to check equality of
-- given and parsed chunks after a candidate chunk of correct length is
-- fetched from the stream.
--
-- This can be used for example to write 'Text.Megaparsec.Char.string':
-- This can be used for example to write 'Text.Megaparsec.chunk':
--
-- > string = tokens (==)
-- > chunk = tokens (==)
--
-- Note that beginning from Megaparsec 4.4.0, this is an auto-backtracking
-- primitive, which means that if it fails, it never consumes any input.
@ -1307,6 +1319,132 @@ fixs' _ (Right (b,s,w)) = (Right b, s, w)
----------------------------------------------------------------------------
-- Derivatives of primitive combinators
-- | @'single' t@ only matches the single token @t@.
--
-- > semicolon = single ';'
--
-- See also: 'token', 'anySingle', 'Text.Megaparsec.Byte.char',
-- 'Text.Megaparsec.Char.char'.
--
-- @since 7.0.0
single :: MonadParsec e s m
=> Token s -- ^ Token to match
-> m (Token s)
single t = token testToken expected
where
testToken x = if x == t then Just x else Nothing
expected = E.singleton (Tokens (t:|[]))
{-# INLINE single #-}
-- | The parser @'satisfy' f@ succeeds for any token for which the supplied
-- function @f@ returns 'True'. Returns the character that is actually
-- parsed.
--
-- > digitChar = satisfy isDigit <?> "digit"
-- > oneOf cs = satisfy (`elem` cs)
--
-- See also: 'anySingle', 'anySingleBut', 'oneOf', 'noneOf'.
--
-- @since 7.0.0
satisfy :: MonadParsec e s m
=> (Token s -> Bool) -- ^ Predicate to apply
-> m (Token s)
satisfy f = token testChar E.empty
where
testChar x = if f x then Just x else Nothing
{-# INLINE satisfy #-}
-- | Parse and return a single token. It's a good idea to attach a 'label'
-- to this parser manually.
--
-- > anySingle = satisfy (const True)
--
-- See also: 'satisfy', 'anySingleBut'.
--
-- @since 7.0.0
anySingle :: MonadParsec e s m => m (Token s)
anySingle = satisfy (const True)
{-# INLINE anySingle #-}
-- | Match any token but the given one. It's a good idea to attach a 'label'
-- to this parser manually.
--
-- > anySingleBut t = satisfy (/= t)
--
-- See also: 'single', 'anySingle', 'satisfy'.
--
-- @since 7.0.0
anySingleBut :: MonadParsec e s m
=> Token s -- ^ Token we should not match
-> m (Token s)
anySingleBut t = satisfy (/= t)
{-# INLINE anySingleBut #-}
-- | @'oneOf' ts@ succeeds if the current token is in the supplied
-- collection of tokens @ts@. Returns the parsed token. Note that this
-- parser cannot automatically generate the “expected” component of error
-- message, so usually you should label it manually with 'label' or ('<?>').
--
-- > oneOf cs = satisfy (`elem` cs)
--
-- See also: 'satisfy'.
--
-- > digit = oneOf ['0'..'9'] <?> "digit"
--
-- __Performance note__: prefer 'satisfy' when you can because it's faster
-- when you have only a couple of tokens to compare to:
--
-- > quoteFast = satisfy (\x -> x == '\'' || x == '\"')
-- > quoteSlow = oneOf "'\""
--
-- @since 7.0.0
oneOf :: (Foldable f, MonadParsec e s m)
=> f (Token s) -- ^ Collection of matching tokens
-> m (Token s)
oneOf cs = satisfy (`elem` cs)
{-# INLINE oneOf #-}
-- | As the dual of 'oneOf', @'noneOf' ts@ succeeds if the current token
-- /not/ in the supplied list of tokens @ts@. Returns the parsed character.
-- Note that this parser cannot automatically generate the “expected”
-- component of error message, so usually you should label it manually with
-- 'label' or ('<?>').
--
-- > noneOf cs = satisfy (`notElem` cs)
--
-- See also: 'satisfy'.
--
-- __Performance note__: prefer 'satisfy' and 'singleBut' when you can
-- because it's faster.
--
-- @since 7.0.0
noneOf :: (Foldable f, MonadParsec e s m)
=> f (Token s) -- ^ Collection of taken we should not match
-> m (Token s)
noneOf cs = satisfy (`notElem` cs)
{-# INLINE noneOf #-}
-- | @'chunk' chk@ only matches the chunk @chk@.
--
-- > divOrMod = chunk "div" <|> chunk "mod"
--
-- See also: 'tokens', 'Text.Megaparsec.Char.string',
-- 'Text.Megaparsec.Byte.string'.
--
-- @since 7.0.0
chunk :: MonadParsec e s m
=> Tokens s -- ^ Chunk to match
-> m (Tokens s)
chunk = tokens (==)
{-# INLINE chunk #-}
-- | A synonym for 'label' in the form of an operator.
infix 0 <?>
@ -1318,10 +1456,10 @@ infix 0 <?>
-- | The parser @'unexpected' item@ fails with an error message telling
-- about unexpected item @item@ without consuming any input.
--
-- > unexpected item = failure (pure item) Set.empty
-- > unexpected item = failure (Just item) Set.empty
unexpected :: MonadParsec e s m => ErrorItem (Token s) -> m a
unexpected item = failure (pure item) E.empty
unexpected item = failure (Just item) E.empty
{-# INLINE unexpected #-}
-- | Report a custom parse error. For a more general version, see
@ -1396,6 +1534,8 @@ takeRest = takeWhileP Nothing (const True)
-- | Return 'True' when end of input has been reached.
--
-- > atEnd = option False (True <$ hidden eof)
--
-- @since 6.0.0
atEnd :: MonadParsec e s m => m Bool
@ -1410,15 +1550,14 @@ atEnd = option False (True <$ hidden eof)
getInput :: MonadParsec e s m => m s
getInput = stateInput <$> getParserState
-- | @'setInput' input@ continues parsing with @input@. The 'getInput' and
-- 'setInput' functions can for example be used to deal with include files.
-- | @'setInput' input@ continues parsing with @input@.
setInput :: MonadParsec e s m => s -> m ()
setInput s = updateParserState (\(State _ pos tp w) -> State s pos tp w)
-- | Return the current source position.
--
-- See also: 'setPosition', 'pushPosition', 'popPosition', and 'SourcePos'.
-- See also: 'getNextTokenPosition'.
getPosition :: MonadParsec e s m => m SourcePos
getPosition = NE.head . statePos <$> getParserState
@ -1426,6 +1565,8 @@ getPosition = NE.head . statePos <$> getParserState
-- | Get the position where the next token in the stream begins. If the
-- stream is empty, return 'Nothing'.
--
-- See also: 'getPosition'.
--
-- @since 5.3.0
getNextTokenPosition :: forall e s m. MonadParsec e s m => m (Maybe SourcePos)
@ -1443,10 +1584,10 @@ setPosition :: MonadParsec e s m => SourcePos -> m ()
setPosition pos = updateParserState $ \(State s (_:|z) tp w) ->
State s (pos:|z) tp w
-- | Push a position into stack of positions and continue parsing working
-- | Push a position to the stack of positions and continue parsing working
-- with this position. Useful for working with include files and the like.
--
-- See also: 'getPosition', 'setPosition', 'popPosition', and 'SourcePos'.
-- See also: 'popPosition'.
--
-- @since 5.0.0
@ -1458,7 +1599,7 @@ pushPosition pos = updateParserState $ \(State s z tp w) ->
-- element (in that case the stack of positions remains the same). This is
-- how to return to previous source file after 'pushPosition'.
--
-- See also: 'getPosition', 'setPosition', 'pushPosition', and 'SourcePos'.
-- See also: 'pushPosition'.
--
-- @since 5.0.0
@ -1470,6 +1611,8 @@ popPosition = updateParserState $ \(State s z tp w) ->
-- | Get the number of tokens processed so far.
--
-- See also: 'setTokensProcessed'.
--
-- @since 6.0.0
getTokensProcessed :: MonadParsec e s m => m Int
@ -1477,6 +1620,8 @@ getTokensProcessed = stateTokensProcessed <$> getParserState
-- | Set the number of tokens processed so far.
--
-- See also: 'getTokensProcessed'.
--
-- @since 6.0.0
setTokensProcessed :: MonadParsec e s m => Int -> m ()
@ -1486,18 +1631,24 @@ setTokensProcessed tp = updateParserState $ \(State s pos _ w) ->
-- | Return the tab width. The default tab width is equal to
-- 'defaultTabWidth'. You can set a different tab width with the help of
-- 'setTabWidth'.
--
-- See also: 'setTabWidth'.
getTabWidth :: MonadParsec e s m => m Pos
getTabWidth = stateTabWidth <$> getParserState
-- | Set tab width. If the argument of the function is not a positive
-- number, 'defaultTabWidth' will be used.
--
-- See also: 'getTabWidth'.
setTabWidth :: MonadParsec e s m => Pos -> m ()
setTabWidth w = updateParserState $ \(State s pos tp _) ->
State s pos tp w
-- | @'setParserState' st@ sets the parser state to @st@.
--
-- See also: 'getParserState', 'updateParserState'.
setParserState :: MonadParsec e s m => State s -> m ()
setParserState st = updateParserState (const st)
@ -1615,4 +1766,4 @@ streamTake :: forall s. Stream s => Int -> s -> [Token s]
streamTake n s =
case fst <$> takeN_ n s of
Nothing -> []
Just chunk -> chunkToTokens (Proxy :: Proxy s) chunk
Just chk -> chunkToTokens (Proxy :: Proxy s) chk

View File

@ -34,16 +34,11 @@ module Text.Megaparsec.Byte
, octDigitChar
, hexDigitChar
, asciiChar
-- * More general parsers
, C.char
-- * Single byte
, char
, char'
, C.anyChar
, C.notChar
, C.oneOf
, C.noneOf
, C.satisfy
-- * Sequence of bytes
, C.string
, string
, C.string' )
where
@ -62,14 +57,14 @@ import qualified Text.Megaparsec.Char as C
-- | Parse a newline byte.
newline :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
newline = C.char 10
newline = char 10
{-# INLINE newline #-}
-- | Parse a carriage return character followed by a newline character.
-- Return the sequence of characters parsed.
crlf :: forall e s m. (MonadParsec e s m, Token s ~ Word8) => m (Tokens s)
crlf = C.string (tokensToChunk (Proxy :: Proxy s) [13,10])
crlf = string (tokensToChunk (Proxy :: Proxy s) [13,10])
{-# INLINE crlf #-}
-- | Parse a CRLF (see 'crlf') or LF (see 'newline') end of line. Return the
@ -84,7 +79,7 @@ eol = (tokenToChunk (Proxy :: Proxy s) <$> newline)
-- | Parse a tab character.
tab :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
tab = C.char 9
tab = char 9
{-# INLINE tab #-}
-- | Skip /zero/ or more white space characters.
@ -109,51 +104,51 @@ space1 = void $ takeWhile1P (Just "white space") isSpace'
-- | Parse a control character.
controlChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
controlChar = C.satisfy (isControl . toChar) <?> "control character"
controlChar = satisfy (isControl . toChar) <?> "control character"
{-# INLINE controlChar #-}
-- | Parse a space character, and the control characters: tab, newline,
-- carriage return, form feed, and vertical tab.
spaceChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
spaceChar = C.satisfy isSpace' <?> "white space"
spaceChar = satisfy isSpace' <?> "white space"
{-# INLINE spaceChar #-}
-- | Parse an upper-case character.
upperChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
upperChar = C.satisfy (isUpper . toChar) <?> "uppercase letter"
upperChar = satisfy (isUpper . toChar) <?> "uppercase letter"
{-# INLINE upperChar #-}
-- | Parse a lower-case alphabetic character.
lowerChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
lowerChar = C.satisfy (isLower . toChar) <?> "lowercase letter"
lowerChar = satisfy (isLower . toChar) <?> "lowercase letter"
{-# INLINE lowerChar #-}
-- | Parse an alphabetic character: lower-case or upper-case.
letterChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
letterChar = C.satisfy (isLetter . toChar) <?> "letter"
letterChar = satisfy (isLetter . toChar) <?> "letter"
{-# INLINE letterChar #-}
-- | Parse an alphabetic or digit characters.
alphaNumChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
alphaNumChar = C.satisfy (isAlphaNum . toChar) <?> "alphanumeric character"
alphaNumChar = satisfy (isAlphaNum . toChar) <?> "alphanumeric character"
{-# INLINE alphaNumChar #-}
-- | Parse a printable character: letter, number, mark, punctuation, symbol
-- or space.
printChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
printChar = C.satisfy (isPrint . toChar) <?> "printable character"
printChar = satisfy (isPrint . toChar) <?> "printable character"
{-# INLINE printChar #-}
-- | Parse an ASCII digit, i.e between “0” and “9”.
digitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
digitChar = C.satisfy isDigit' <?> "digit"
digitChar = satisfy isDigit' <?> "digit"
where
isDigit' x = x >= 48 && x <= 57
{-# INLINE digitChar #-}
@ -161,7 +156,7 @@ digitChar = C.satisfy isDigit' <?> "digit"
-- | Parse an octal digit, i.e. between “0” and “7”.
octDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
octDigitChar = C.satisfy isOctDigit' <?> "octal digit"
octDigitChar = satisfy isOctDigit' <?> "octal digit"
where
isOctDigit' x = x >= 48 && x <= 55
{-# INLINE octDigitChar #-}
@ -170,18 +165,26 @@ octDigitChar = C.satisfy isOctDigit' <?> "octal digit"
-- “A” and “F”.
hexDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
hexDigitChar = C.satisfy (isHexDigit . toChar) <?> "hexadecimal digit"
hexDigitChar = satisfy (isHexDigit . toChar) <?> "hexadecimal digit"
{-# INLINE hexDigitChar #-}
-- | Parse a character from the first 128 characters of the Unicode
-- character set, corresponding to the ASCII character set.
asciiChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
asciiChar = C.satisfy (< 128) <?> "ASCII character"
asciiChar = satisfy (< 128) <?> "ASCII character"
{-# INLINE asciiChar #-}
----------------------------------------------------------------------------
-- More general parsers
-- Single byte
-- | A type-constrained version of 'single'.
--
-- > newline = char 10
char :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s)
char = single
{-# INLINE char #-}
-- | The same as 'char' but case-insensitive. This parser returns the
-- actually parsed character preserving its case.
@ -195,8 +198,8 @@ asciiChar = C.satisfy (< 128) <?> "ASCII character"
char' :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s)
char' c = choice
[ C.char c
, C.char (fromMaybe c (swapCase c)) ]
[ char c
, char (fromMaybe c (swapCase c)) ]
where
swapCase x
| isUpper g = fromChar (toLower g)
@ -206,6 +209,15 @@ char' c = choice
g = toChar x
{-# INLINE char' #-}
----------------------------------------------------------------------------
-- Sequence of bytes
-- | A type-constrained version of 'chunk'.
string :: (MonadParsec e s m, Token s ~ Word8) => Tokens s -> m (Tokens s)
string = chunk
{-# INLINE string #-}
----------------------------------------------------------------------------
-- Helpers

View File

@ -68,7 +68,7 @@ skipBlockComment :: (MonadParsec e s m, Token s ~ Word8)
=> Tokens s -- ^ Start of block comment
-> Tokens s -- ^ End of block comment
-> m ()
skipBlockComment start end = p >> void (manyTill anyChar n)
skipBlockComment start end = p >> void (manyTill anySingle n)
where
p = string start
n = string end
@ -85,7 +85,7 @@ skipBlockCommentNested :: (MonadParsec e s m, Token s ~ Word8)
-> m ()
skipBlockCommentNested start end = p >> void (manyTill e n)
where
e = skipBlockCommentNested start end <|> void anyChar
e = skipBlockCommentNested start end <|> void anySingle
p = string start
n = string end
{-# INLINEABLE skipBlockCommentNested #-}

View File

@ -45,14 +45,9 @@ module Text.Megaparsec.Char
, latin1Char
, charCategory
, categoryName
-- * More general parsers
-- * Single character
, char
, char'
, anyChar
, notChar
, oneOf
, noneOf
, satisfy
-- * Sequence of characters
, string
, string' )
@ -62,16 +57,9 @@ import Control.Applicative
import Data.Char
import Data.Function (on)
import Data.Functor (void)
import Data.List.NonEmpty (NonEmpty (..))
import Data.Proxy
import Text.Megaparsec
import qualified Data.CaseInsensitive as CI
import qualified Data.Set as E
#if !MIN_VERSION_base(4,8,0)
import Data.Foldable (Foldable (), elem, notElem)
import Prelude hiding (elem, notElem)
#endif
----------------------------------------------------------------------------
-- Simple parsers
@ -289,17 +277,14 @@ categoryName = \case
NotAssigned -> "non-assigned Unicode character"
----------------------------------------------------------------------------
-- More general parsers
-- Single character
-- | @'char' c@ parses a single character @c@.
-- | A type-constrained version of 'single'.
--
-- > semicolon = char ';'
char :: MonadParsec e s m => Token s -> m (Token s)
char c = token testChar expected
where
testChar x = if x == c then Just x else Nothing
expected = E.singleton (Tokens (c:|[]))
char :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
char = single
{-# INLINE char #-}
-- | The same as 'char' but case-insensitive. This parser returns the
@ -321,87 +306,13 @@ char' c = choice [char c, char (swapCase c)]
| otherwise = x
{-# INLINE char' #-}
-- | This parser succeeds for any character. Returns the parsed character.
anyChar :: MonadParsec e s m => m (Token s)
anyChar = satisfy (const True) <?> "character"
{-# INLINE anyChar #-}
-- | Match any character but the given one. It's a good idea to attach a
-- 'label' to this parser manually.
--
-- @since 6.0.0
notChar :: MonadParsec e s m => Token s -> m (Token s)
notChar c = satisfy (/= c)
{-# INLINE notChar #-}
-- | @'oneOf' cs@ succeeds if the current character is in the supplied
-- collection of characters @cs@. Returns the parsed character. Note that
-- this parser cannot automatically generate the “expected” component of
-- error message, so usually you should label it manually with 'label' or
-- ('<?>').
--
-- See also: 'satisfy'.
--
-- > digit = oneOf ['0'..'9'] <?> "digit"
--
-- __Performance note__: prefer 'satisfy' when you can because it's faster
-- when you have only a couple of tokens to compare to:
--
-- > quoteFast = satisfy (\x -> x == '\'' || x == '\"')
-- > quoteSlow = oneOf "'\""
oneOf :: (Foldable f, MonadParsec e s m)
=> f (Token s)
-> m (Token s)
oneOf cs = satisfy (`elem` cs)
{-# INLINE oneOf #-}
-- | As the dual of 'oneOf', @'noneOf' cs@ succeeds if the current character
-- /not/ in the supplied list of characters @cs@. Returns the parsed
-- character. Note that this parser cannot automatically generate the
-- “expected” component of error message, so usually you should label it
-- manually with 'label' or ('<?>').
--
-- See also: 'satisfy'.
--
-- __Performance note__: prefer 'satisfy' and 'notChar' when you can because
-- it's faster.
noneOf :: (Foldable f, MonadParsec e s m)
=> f (Token s)
-> m (Token s)
noneOf cs = satisfy (`notElem` cs)
{-# INLINE noneOf #-}
-- | The parser @'satisfy' f@ succeeds for any character for which the
-- supplied function @f@ returns 'True'. Returns the character that is
-- actually parsed.
--
-- > digitChar = satisfy isDigit <?> "digit"
-- > oneOf cs = satisfy (`elem` cs)
satisfy :: MonadParsec e s m
=> (Token s -> Bool) -- ^ Predicate to apply
-> m (Token s)
satisfy f = token testChar E.empty
where
testChar x = if f x then Just x else Nothing
{-# INLINE satisfy #-}
----------------------------------------------------------------------------
-- Sequence of characters
-- | @'string' s@ parses a sequence of characters given by @s@. Returns the
-- parsed string (i.e. @s@).
--
-- > divOrMod = string "div" <|> string "mod"
-- | A type-constrained version of 'chunk'.
string :: MonadParsec e s m
=> Tokens s
-> m (Tokens s)
string = tokens (==)
string :: (MonadParsec e s m, Token s ~ Char) => Tokens s -> m (Tokens s)
string = chunk
{-# INLINE string #-}
-- | The same as 'string', but case-insensitive. On success returns string

View File

@ -143,7 +143,7 @@ lexeme spc p = p <* spc
-- > colon = symbol ":"
-- > dot = symbol "."
symbol :: MonadParsec e s m
symbol :: (MonadParsec e s m, Token s ~ Char)
=> m () -- ^ How to consume white space after lexeme
-> Tokens s -- ^ Symbol to parse
-> m (Tokens s)
@ -179,7 +179,7 @@ skipBlockComment :: (MonadParsec e s m, Token s ~ Char)
=> Tokens s -- ^ Start of block comment
-> Tokens s -- ^ End of block comment
-> m ()
skipBlockComment start end = p >> void (manyTill C.anyChar n)
skipBlockComment start end = p >> void (manyTill anySingle n)
where
p = C.string start
n = C.string end
@ -196,7 +196,7 @@ skipBlockCommentNested :: (MonadParsec e s m, Token s ~ Char)
-> m ()
skipBlockCommentNested start end = p >> void (manyTill e n)
where
e = skipBlockCommentNested start end <|> void C.anyChar
e = skipBlockCommentNested start end <|> void anySingle
p = C.string start
n = C.string end
{-# INLINEABLE skipBlockCommentNested #-}
@ -397,9 +397,9 @@ charLiteral :: (MonadParsec e s m, Token s ~ Char) => m Char
charLiteral = label "literal character" $ do
-- The @~@ is needed to avoid requiring a MonadFail constraint,
-- and we do know that r will be non-empty if count' succeeds.
~r@(x:_) <- lookAhead $ count' 1 8 C.anyChar
~r@(x:_) <- lookAhead (count' 1 8 anySingle)
case listToMaybe (Char.readLitChar r) of
Just (c, r') -> count (length r - length r') C.anyChar >> return c
Just (c, r') -> c <$ count (length r - length r') anySingle
Nothing -> unexpected (Tokens (x:|[]))
{-# INLINEABLE charLiteral #-}

View File

@ -8,8 +8,8 @@
-- Portability : portable
--
-- Textual source position. The position includes name of file, line number,
-- and column number. List of such positions can be used to model a stack of
-- include files.
-- and column number. A non-empty list of such positions can be used to
-- model a stack of include files.
--
-- You probably do not want to import this module directly because
-- "Text.Megaparsec" re-exports it anyway.

View File

@ -104,15 +104,6 @@ spec = do
describe "spaceChar" $
checkCharRange "white space" [9,10,11,12,13,32,160] spaceChar
-- describe "upperChar" $
-- checkCharPred "uppercase letter" (isUpper . toChar) upperChar
-- describe "lowerChar" $
-- checkCharPred "lowercase letter" (isLower . toChar) lowerChar
-- describe "letterChar" $
-- checkCharPred "letter" (isAlpha . toChar) letterChar
describe "alphaNumChar" $
checkCharPred "alphanumeric character" (isAlphaNum . toChar) alphaNumChar
@ -128,9 +119,6 @@ spec = do
describe "hexDigitChar" $
checkCharRange "hexadecimal digit" ([48..57] ++ [97..102] ++ [65..70]) hexDigitChar
-- describe "asciiChar" $
-- checkCharPred "ASCII character" (isAscii . toChar) asciiChar
describe "char'" $ do
context "when stream begins with the character specified as argument" $
it "parses the character" $

View File

@ -206,7 +206,8 @@ spec = do
describe "char'" $ do
let goodChar x =
(toUpper x == toLower x) || (isUpper x || isLower x)
(toUpper x == toLower x) ||
(((toUpper x == x) || (toLower x == x)) && (isUpper x || isLower x))
context "when stream begins with the character specified as argument" $
it "parses the character" $
property $ \ch s -> goodChar ch ==> do
@ -229,75 +230,6 @@ spec = do
let ms = ueof <> etok (toLower ch) <> etok (toUpper ch)
prs (char' ch) "" `shouldFailWith` err posI ms
describe "anyChar" $ do
context "when stream is not empty" $
it "succeeds consuming next character in the stream" $
property $ \ch s -> do
let s' = ch : s
prs anyChar s' `shouldParse` ch
prs' anyChar s' `succeedsLeaving` s
context "when stream is empty" $
it "signals correct parse error" $
prs anyChar "" `shouldFailWith` err posI (ueof <> elabel "character")
describe "notChar" $ do
context "when stream begins with the character specified as argument" $
it "signals correct parse error" $
property $ \ch s' -> do
let p = notChar ch
s = ch : s'
prs p s `shouldFailWith` err posI (utok ch)
prs' p s `failsLeaving` s
context "when stream does not begin with the character specified as argument" $
it "parses first character in the stream" $
property $ \ch s -> not (null s) && ch /= head s ==> do
let p = notChar ch
prs p s `shouldParse` head s
prs' p s `succeedsLeaving` tail s
context "when stream is empty" $
it "signals correct parse error" $
prs (notChar 'a') "" `shouldFailWith` err posI ueof
describe "oneOf" $ do
context "when stream begins with one of specified characters" $
it "parses the character" $
property $ \chs' n s -> do
let chs = getNonEmpty chs'
ch = chs !! (getNonNegative n `rem` length chs)
s' = ch : s
prs (oneOf chs) s' `shouldParse` ch
prs' (oneOf chs) s' `succeedsLeaving` s
context "when stream does not begin with any of specified characters" $
it "signals correct parse error" $
property $ \chs ch s -> ch `notElem` (chs :: String) ==> do
let s' = ch : s
prs (oneOf chs) s' `shouldFailWith` err posI (utok ch)
prs' (oneOf chs) s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
property $ \chs ->
prs (oneOf (chs :: String)) "" `shouldFailWith` err posI ueof
describe "noneOf" $ do
context "when stream does not begin with any of specified characters" $
it "parses the character" $
property $ \chs ch s -> ch `notElem` (chs :: String) ==> do
let s' = ch : s
prs (noneOf chs) s' `shouldParse` ch
prs' (noneOf chs) s' `succeedsLeaving` s
context "when stream begins with one of specified characters" $
it "signals correct parse error" $
property $ \chs' n s -> do
let chs = getNonEmpty chs'
ch = chs !! (getNonNegative n `rem` length chs)
s' = ch : s
prs (noneOf chs) s' `shouldFailWith` err posI (utok ch)
prs' (noneOf chs) s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
property $ \chs ->
prs (noneOf (chs :: String)) "" `shouldFailWith` err posI ueof
describe "string" $ do
context "when stream is prefixed with given string" $
it "parses the string" $

View File

@ -28,24 +28,24 @@ spec = do
chunkToTokens sproxy (tokensToChunk sproxy ts) === ts
describe "chunkToTokens" $
it "chunk is isomorphic to list of tokens" $
property $ \chunk ->
tokensToChunk sproxy (chunkToTokens sproxy chunk) === chunk
property $ \chk ->
tokensToChunk sproxy (chunkToTokens sproxy chk) === chk
describe "chunkLength" $
it "returns correct length of given chunk" $
property $ \chunk ->
chunkLength sproxy chunk === length chunk
property $ \chk ->
chunkLength sproxy chk === length chk
describe "chunkEmpty" $
it "only true when chunkLength returns 0" $
property $ \chunk ->
chunkEmpty sproxy chunk === (chunkLength sproxy chunk <= 0)
property $ \chk ->
chunkEmpty sproxy chk === (chunkLength sproxy chk <= 0)
describe "positionAt1" $
it "just returns the given position" $
property $ \pos t ->
positionAt1 sproxy pos t === pos
describe "positionAtN" $
it "just returns the given position" $
property $ \pos chunk ->
positionAtN sproxy pos chunk === pos
property $ \pos chk ->
positionAtN sproxy pos chk === pos
describe "advance1" $ do
context "when given newline" $
it "works correctly" $
@ -102,24 +102,24 @@ spec = do
chunkToTokens bproxy (tokensToChunk bproxy ts) === ts
describe "chunkToTokens" $
it "chunk is isomorphic to list of tokens" $
property $ \chunk ->
tokensToChunk bproxy (chunkToTokens bproxy chunk) === chunk
property $ \chk ->
tokensToChunk bproxy (chunkToTokens bproxy chk) === chk
describe "chunkLength" $
it "returns correct length of given chunk" $
property $ \chunk ->
chunkLength bproxy chunk === B.length chunk
property $ \chk ->
chunkLength bproxy chk === B.length chk
describe "chunkEmpty" $
it "only true when chunkLength returns 0" $
property $ \chunk ->
chunkEmpty bproxy chunk === (chunkLength bproxy chunk <= 0)
property $ \chk ->
chunkEmpty bproxy chk === (chunkLength bproxy chk <= 0)
describe "positionAt1" $
it "just returns the given position" $
property $ \pos t ->
positionAt1 bproxy pos t === pos
describe "positionAtN" $
it "just returns the given position" $
property $ \pos chunk ->
positionAtN bproxy pos chunk === pos
property $ \pos chk ->
positionAtN bproxy pos chk === pos
describe "advance1" $ do
context "when given newline" $
it "works correctly" $
@ -177,24 +177,24 @@ spec = do
chunkToTokens blproxy (tokensToChunk blproxy ts) === ts
describe "chunkToTokens" $
it "chunk is isomorphic to list of tokens" $
property $ \chunk ->
tokensToChunk blproxy (chunkToTokens blproxy chunk) === chunk
property $ \chk ->
tokensToChunk blproxy (chunkToTokens blproxy chk) === chk
describe "chunkLength" $
it "returns correct length of given chunk" $
property $ \chunk ->
chunkLength blproxy chunk === fromIntegral (BL.length chunk)
property $ \chk ->
chunkLength blproxy chk === fromIntegral (BL.length chk)
describe "chunkEmpty" $
it "only true when chunkLength returns 0" $
property $ \chunk ->
chunkEmpty blproxy chunk === (chunkLength blproxy chunk <= 0)
property $ \chk ->
chunkEmpty blproxy chk === (chunkLength blproxy chk <= 0)
describe "positionAt1" $
it "just returns the given position" $
property $ \pos t ->
positionAt1 blproxy pos t === pos
describe "positionAtN" $
it "just returns the given position" $
property $ \pos chunk ->
positionAtN blproxy pos chunk === pos
property $ \pos chk ->
positionAtN blproxy pos chk === pos
describe "advance1" $ do
context "when given newline" $
it "works correctly" $
@ -252,24 +252,24 @@ spec = do
chunkToTokens tproxy (tokensToChunk tproxy ts) === ts
describe "chunkToTokens" $
it "chunk is isomorphic to list of tokens" $
property $ \chunk ->
tokensToChunk tproxy (chunkToTokens tproxy chunk) === chunk
property $ \chk ->
tokensToChunk tproxy (chunkToTokens tproxy chk) === chk
describe "chunkLength" $
it "returns correct length of given chunk" $
property $ \chunk ->
chunkLength tproxy chunk === T.length chunk
property $ \chk ->
chunkLength tproxy chk === T.length chk
describe "chunkEmpty" $
it "only true when chunkLength returns 0" $
property $ \chunk ->
chunkEmpty tproxy chunk === (chunkLength tproxy chunk <= 0)
property $ \chk ->
chunkEmpty tproxy chk === (chunkLength tproxy chk <= 0)
describe "positionAt1" $
it "just returns the given position" $
property $ \pos t ->
positionAt1 tproxy pos t === pos
describe "positionAtN" $
it "just returns the given position" $
property $ \pos chunk ->
positionAtN tproxy pos chunk === pos
property $ \pos chk ->
positionAtN tproxy pos chk === pos
describe "advance1" $ do
context "when given newline" $
it "works correctly" $
@ -326,24 +326,24 @@ spec = do
chunkToTokens tlproxy (tokensToChunk tlproxy ts) === ts
describe "chunkToTokens" $
it "chunk is isomorphic to list of tokens" $
property $ \chunk ->
tokensToChunk tlproxy (chunkToTokens tlproxy chunk) === chunk
property $ \chk ->
tokensToChunk tlproxy (chunkToTokens tlproxy chk) === chk
describe "chunkLength" $
it "returns correct length of given chunk" $
property $ \chunk ->
chunkLength tlproxy chunk === fromIntegral (TL.length chunk)
property $ \chk ->
chunkLength tlproxy chk === fromIntegral (TL.length chk)
describe "chunkEmpty" $
it "only true when chunkLength returns 0" $
property $ \chunk ->
chunkEmpty tlproxy chunk === (chunkLength tlproxy chunk <= 0)
property $ \chk ->
chunkEmpty tlproxy chk === (chunkLength tlproxy chk <= 0)
describe "positionAt1" $
it "just returns the given position" $
property $ \pos t ->
positionAt1 tlproxy pos t === pos
describe "positionAtN" $
it "just returns the given position" $
property $ \pos chunk ->
positionAtN tlproxy pos chunk === pos
property $ \pos chk ->
positionAtN tlproxy pos chk === pos
describe "advance1" $ do
context "when given newline" $
it "works correctly" $

View File

@ -38,12 +38,12 @@ import qualified Control.Monad.State.Lazy as L
import qualified Control.Monad.State.Strict as S
import qualified Control.Monad.Writer.Lazy as L
import qualified Control.Monad.Writer.Strict as S
import qualified Data.ByteString as BS
import qualified Data.List as DL
import qualified Data.List.NonEmpty as NE
import qualified Data.Semigroup as G
import qualified Data.Set as E
import qualified Data.Text as T
import qualified Data.ByteString as BS
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative hiding (many, some)
@ -223,17 +223,17 @@ spec = do
describe "equivalence to 'string'" $ do
it "for String" $ property $ \s i ->
eqParser
(string s)
(chunk s)
(fromString s)
(i :: String)
it "for Text" $ property $ \s i ->
eqParser
(string (T.pack s))
(chunk (T.pack s))
(fromString s)
(i :: T.Text)
it "for ByteString" $ property $ \s i ->
eqParser
(string (fromString s :: BS.ByteString))
(chunk (fromString s :: BS.ByteString))
(fromString s)
(i :: BS.ByteString)
it "can handle Unicode" $ do
@ -314,7 +314,7 @@ spec = do
it "parses the string" $
property $ \s0 s1 s -> not (s1 `isPrefixOf` s0) ==> do
let s' = s0 ++ s
p = string s0 <|> string s1
p = chunk s0 <|> chunk s1
prs p s' `shouldParse` s0
prs' p s' `succeedsLeaving` s
context "stream begins with the second string" $
@ -1212,6 +1212,85 @@ spec = do
describe "derivatives from primitive combinators" $ do
-- NOTE 'single' is tested via 'char' in "Text.Megaparsec.Char" and
-- "Text.Megaparsec.Byte".
describe "anySingle" $ do
let p :: MonadParsec Void String m => m Char
p = anySingle
context "when stream is not empty" $
it "succeeds consuming next character in the stream" $
property $ \ch s -> do
let s' = ch : s
grs p s' (`shouldParse` ch)
grs' p s' (`succeedsLeaving` s)
context "when stream is empty" $
it "signals correct parse error" $
grs p "" (`shouldFailWith` err posI ueof)
describe "anySingleBut" $ do
context "when stream begins with the character specified as argument" $
it "signals correct parse error" $
property $ \ch s' -> do
let p :: MonadParsec Void String m => m Char
p = anySingleBut ch
s = ch : s'
grs p s (`shouldFailWith` err posI (utok ch))
grs' p s (`failsLeaving` s)
context "when stream does not begin with the character specified as argument" $
it "parses first character in the stream" $
property $ \ch s -> not (null s) && ch /= head s ==> do
let p :: MonadParsec Void String m => m Char
p = anySingleBut ch
grs p s (`shouldParse` head s)
grs' p s (`succeedsLeaving` tail s)
context "when stream is empty" $
it "signals correct parse error" $
grs (anySingleBut 'a') "" (`shouldFailWith` err posI ueof)
describe "oneOf" $ do
context "when stream begins with one of specified characters" $
it "parses the character" $
property $ \chs' n s -> do
let chs = getNonEmpty chs'
ch = chs !! (getNonNegative n `rem` length chs)
s' = ch : s
grs (oneOf chs) s' (`shouldParse` ch)
grs' (oneOf chs) s' (`succeedsLeaving` s)
context "when stream does not begin with any of specified characters" $
it "signals correct parse error" $
property $ \chs ch s -> ch `notElem` (chs :: String) ==> do
let s' = ch : s
grs (oneOf chs) s' (`shouldFailWith` err posI (utok ch))
grs' (oneOf chs) s' (`failsLeaving` s')
context "when stream is empty" $
it "signals correct parse error" $
property $ \chs ->
grs (oneOf (chs :: String)) "" (`shouldFailWith` err posI ueof)
describe "noneOf" $ do
context "when stream does not begin with any of specified characters" $
it "parses the character" $
property $ \chs ch s -> ch `notElem` (chs :: String) ==> do
let s' = ch : s
grs (noneOf chs) s' (`shouldParse` ch)
grs' (noneOf chs) s' (`succeedsLeaving` s)
context "when stream begins with one of specified characters" $
it "signals correct parse error" $
property $ \chs' n s -> do
let chs = getNonEmpty chs'
ch = chs !! (getNonNegative n `rem` length chs)
s' = ch : s
grs (noneOf chs) s' (`shouldFailWith` err posI (utok ch))
grs' (noneOf chs) s' (`failsLeaving` s')
context "when stream is empty" $
it "signals correct parse error" $
property $ \chs ->
grs (noneOf (chs :: String)) "" (`shouldFailWith` err posI ueof)
-- NOTE 'chunk' is tested via 'string' in "Text.Megaparsec.Char" and
-- "Text.Megaparsec.Byte".
describe "unexpected" $
it "signals correct parse error" $
property $ \item -> do
@ -1406,7 +1485,7 @@ spec = do
describe "notFollowedBy" $
it "generally works" $
property $ \a' b' c' -> do
let p = many (char =<< ask) <* notFollowedBy eof <* many anyChar
let p = many (char =<< ask) <* notFollowedBy eof <* many anySingle
[a,b,c] = getNonNegative <$> [a',b',c']
s = abcRow a b c
if b > 0 || c > 0
@ -1435,7 +1514,7 @@ spec = do
let p = do
L.put n
let notEof = notFollowedBy (L.modify (* 2) >> eof)
some (try (anyChar <* notEof)) <* char 'x'
some (try (anySingle <* notEof)) <* char 'x'
prs (L.runStateT p 0) "abx" `shouldParse` ("ab", n :: Integer)
describe "observing" $ do
@ -1475,7 +1554,7 @@ spec = do
let p = do
S.put n
let notEof = notFollowedBy (S.modify (* 2) >> eof)
some (try (anyChar <* notEof)) <* char 'x'
some (try (anySingle <* notEof)) <* char 'x'
prs (S.runStateT p 0) "abx" `shouldParse` ("ab", n :: Integer)
describe "observing" $ do