Move general-enough combinators to ‘Text.Megaparsec’ (#267)

2024-11-26 23:02:07 +03:00 · 2018-01-08 22:39:39 +07:00 · 2018-01-08 22:39:39 +07:00 · 4fac009b48
commit 4fac009b48
parent cb762351d0
11 changed files with 378 additions and 281 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,29 @@
 ## Megaparsec 7.0.0

+* Moved some general combinators from `Text.Megaparsec.Char` and
+  `Text.Megaparsec.Byte` to `Text.Megaparsec`, renaming some of them for
+  clarity.
+
+  Practical consequences:
+
+  * Now there is the `single` combinator that is a generalization of `char`
+    for arbitrary streams. `Text.Megaparsec.Char` and `Text.Megaparsec.Byte`
+    still contain `char` as type-constrained versions of `single`.
+
+  * Similarly, now there is the `chunk` combinator that is a generalization
+    of `string` for arbitrary streams. `Text.Megaparsec.Char` and
+    `Text.Megaparsec.Byte` still contain `string` as type-constrained
+    versions of `single`.
+
+  * `satisfy` does not depend on type of token, and so it now lives in
+    `Text.Megaparsec`.
+
+  * `anyToken` was renamed to `anySingle` and moved to `Text.Megaparsec`.
+
+  * `notChar` was renamed to `anySingleBut` and moved to `Text.Megaparsec`.
+
+  * `oneOf` and `noneOf` were moved to `Text.Megaparsec`.
+
 * Simplified the type of the `token` primitive. It now takes just a matching
  function `Token s -> Maybe a` as the first argument and the collection of
  expected items `Set (ErrorItem (Token s))` as the second argument. This
--- a/Text/Megaparsec.hs
+++ b/Text/Megaparsec.hs
@ -94,6 +94,13 @@ module Text.Megaparsec
    -- * Primitive combinators
  , MonadParsec (..)
    -- * Derivatives of primitive combinators
+  , single
+  , satisfy
+  , anySingle
+  , anySingleBut
+  , oneOf
+  , noneOf
+  , chunk
  , (<?>)
  , unexpected
  , customFailure
@ -155,6 +162,8 @@ import qualified Data.Set                          as E

 #if !MIN_VERSION_base(4,8,0)
 import Control.Applicative
+import Data.Foldable (Foldable, elem, notElem)
+import Prelude hiding (elem, notElem)
 #endif

 -- $reexports
@ -771,16 +780,19 @@ class (Stream s, A.Alternative m, MonadPlus m)

  eof :: m ()

-  -- | The parser @'token' test exp@ accepts a token @t@ with result @x@
-  -- when the function @test t@ returns @'Just' x@. @exp@ specifies the
-  -- collection of expected items to report in error messages.
+  -- | The parser @'token' test expected@ accepts a token @t@ with result
+  -- @x@ when the function @test t@ returns @'Just' x@. @expected@ specifies
+  -- the collection of expected items to report in error messages.
  --
  -- This is the most primitive combinator for accepting tokens. For
  -- example, the 'Text.Megaparsec.Char.satisfy' parser is implemented as:
  --
-  -- > satisfy f = token testChar E.empty
+  -- > satisfy f = token testToken E.empty
  -- >   where
-  -- >     testChar x = if f x then Just x else Nothing
+  -- >     testToken x = if f x then Just x else Nothing
+  --
+  -- __Note__: type signature of this primitive was changed in the version
+  -- /7.0.0/.

  token
    :: (Token s -> Maybe a)
@ -789,14 +801,14 @@ class (Stream s, A.Alternative m, MonadPlus m)
       -- ^ Expected items (in case of an error)
    -> m a

-  -- | The parser @'tokens' test@ parses a chunk of input and returns it.
-  -- Supplied predicate @test@ is used to check equality of given and parsed
-  -- chunks after a candidate chunk of correct length is fetched from the
-  -- stream.
+  -- | The parser @'tokens' test chk@ parses a chunk of input @chk@ and
+  -- returns it. The supplied predicate @test@ is used to check equality of
+  -- given and parsed chunks after a candidate chunk of correct length is
+  -- fetched from the stream.
  --
-  -- This can be used for example to write 'Text.Megaparsec.Char.string':
+  -- This can be used for example to write 'Text.Megaparsec.chunk':
  --
-  -- > string = tokens (==)
+  -- > chunk = tokens (==)
  --
  -- Note that beginning from Megaparsec 4.4.0, this is an auto-backtracking
  -- primitive, which means that if it fails, it never consumes any input.
@ -1307,6 +1319,132 @@ fixs' _ (Right (b,s,w)) = (Right b, s, w)
 ----------------------------------------------------------------------------
 -- Derivatives of primitive combinators

+-- | @'single' t@ only matches the single token @t@.
+--
+-- > semicolon = single ';'
+--
+-- See also: 'token', 'anySingle', 'Text.Megaparsec.Byte.char',
+-- 'Text.Megaparsec.Char.char'.
+--
+-- @since 7.0.0
+
+single :: MonadParsec e s m
+  => Token s           -- ^ Token to match
+  -> m (Token s)
+single t = token testToken expected
+  where
+    testToken x = if x == t then Just x else Nothing
+    expected    = E.singleton (Tokens (t:|[]))
+{-# INLINE single #-}
+
+-- | The parser @'satisfy' f@ succeeds for any token for which the supplied
+-- function @f@ returns 'True'. Returns the character that is actually
+-- parsed.
+--
+-- > digitChar = satisfy isDigit <?> "digit"
+-- > oneOf cs  = satisfy (`elem` cs)
+--
+-- See also: 'anySingle', 'anySingleBut', 'oneOf', 'noneOf'.
+--
+-- @since 7.0.0
+
+satisfy :: MonadParsec e s m
+  => (Token s -> Bool) -- ^ Predicate to apply
+  -> m (Token s)
+satisfy f = token testChar E.empty
+  where
+    testChar x = if f x then Just x else Nothing
+{-# INLINE satisfy #-}
+
+-- | Parse and return a single token. It's a good idea to attach a 'label'
+-- to this parser manually.
+--
+-- > anySingle = satisfy (const True)
+--
+-- See also: 'satisfy', 'anySingleBut'.
+--
+-- @since 7.0.0
+
+anySingle :: MonadParsec e s m => m (Token s)
+anySingle = satisfy (const True)
+{-# INLINE anySingle #-}
+
+-- | Match any token but the given one. It's a good idea to attach a 'label'
+-- to this parser manually.
+--
+-- > anySingleBut t = satisfy (/= t)
+--
+-- See also: 'single', 'anySingle', 'satisfy'.
+--
+-- @since 7.0.0
+
+anySingleBut :: MonadParsec e s m
+  => Token s           -- ^ Token we should not match
+  -> m (Token s)
+anySingleBut t = satisfy (/= t)
+{-# INLINE anySingleBut #-}
+
+-- | @'oneOf' ts@ succeeds if the current token is in the supplied
+-- collection of tokens @ts@. Returns the parsed token. Note that this
+-- parser cannot automatically generate the “expected” component of error
+-- message, so usually you should label it manually with 'label' or ('<?>').
+--
+-- > oneOf cs = satisfy (`elem` cs)
+--
+-- See also: 'satisfy'.
+--
+-- > digit = oneOf ['0'..'9'] <?> "digit"
+--
+-- __Performance note__: prefer 'satisfy' when you can because it's faster
+-- when you have only a couple of tokens to compare to:
+--
+-- > quoteFast = satisfy (\x -> x == '\'' || x == '\"')
+-- > quoteSlow = oneOf "'\""
+--
+-- @since 7.0.0
+
+oneOf :: (Foldable f, MonadParsec e s m)
+  => f (Token s)       -- ^ Collection of matching tokens
+  -> m (Token s)
+oneOf cs = satisfy (`elem` cs)
+{-# INLINE oneOf #-}
+
+-- | As the dual of 'oneOf', @'noneOf' ts@ succeeds if the current token
+-- /not/ in the supplied list of tokens @ts@. Returns the parsed character.
+-- Note that this parser cannot automatically generate the “expected”
+-- component of error message, so usually you should label it manually with
+-- 'label' or ('<?>').
+--
+-- > noneOf cs = satisfy (`notElem` cs)
+--
+-- See also: 'satisfy'.
+--
+-- __Performance note__: prefer 'satisfy' and 'singleBut' when you can
+-- because it's faster.
+--
+-- @since 7.0.0
+
+noneOf :: (Foldable f, MonadParsec e s m)
+  => f (Token s)       -- ^ Collection of taken we should not match
+  -> m (Token s)
+noneOf cs = satisfy (`notElem` cs)
+{-# INLINE noneOf #-}
+
+-- | @'chunk' chk@ only matches the chunk @chk@.
+--
+-- > divOrMod = chunk "div" <|> chunk "mod"
+--
+-- See also: 'tokens', 'Text.Megaparsec.Char.string',
+-- 'Text.Megaparsec.Byte.string'.
+--
+-- @since 7.0.0
+
+chunk :: MonadParsec e s m
+  => Tokens s          -- ^ Chunk to match
+  -> m (Tokens s)
+chunk = tokens (==)
+{-# INLINE chunk #-}
+
 -- | A synonym for 'label' in the form of an operator.

 infix 0 <?>
@ -1318,10 +1456,10 @@ infix 0 <?>
 -- | The parser @'unexpected' item@ fails with an error message telling
 -- about unexpected item @item@ without consuming any input.
 --
-- > unexpected item = failure (pure item) Set.empty
+-- > unexpected item = failure (Just item) Set.empty

 unexpected :: MonadParsec e s m => ErrorItem (Token s) -> m a
-unexpected item = failure (pure item) E.empty
+unexpected item = failure (Just item) E.empty
 {-# INLINE unexpected #-}

 -- | Report a custom parse error. For a more general version, see
@ -1396,6 +1534,8 @@ takeRest = takeWhileP Nothing (const True)

 -- | Return 'True' when end of input has been reached.
 --
+-- > atEnd = option False (True <$ hidden eof)
+--
 -- @since 6.0.0

 atEnd :: MonadParsec e s m => m Bool
@ -1410,15 +1550,14 @@ atEnd = option False (True <$ hidden eof)
 getInput :: MonadParsec e s m => m s
 getInput = stateInput <$> getParserState

-- | @'setInput' input@ continues parsing with @input@. The 'getInput' and
-- 'setInput' functions can for example be used to deal with include files.
+-- | @'setInput' input@ continues parsing with @input@.

 setInput :: MonadParsec e s m => s -> m ()
 setInput s = updateParserState (\(State _ pos tp w) -> State s pos tp w)

 -- | Return the current source position.
 --
-- See also: 'setPosition', 'pushPosition', 'popPosition', and 'SourcePos'.
+-- See also: 'getNextTokenPosition'.

 getPosition :: MonadParsec e s m => m SourcePos
 getPosition = NE.head . statePos <$> getParserState
@ -1426,6 +1565,8 @@ getPosition = NE.head . statePos <$> getParserState
 -- | Get the position where the next token in the stream begins. If the
 -- stream is empty, return 'Nothing'.
 --
+-- See also: 'getPosition'.
+--
 -- @since 5.3.0

 getNextTokenPosition :: forall e s m. MonadParsec e s m => m (Maybe SourcePos)
@ -1443,10 +1584,10 @@ setPosition :: MonadParsec e s m => SourcePos -> m ()
 setPosition pos = updateParserState $ \(State s (_:|z) tp w) ->
  State s (pos:|z) tp w

-- | Push a position into stack of positions and continue parsing working
+-- | Push a position to the stack of positions and continue parsing working
 -- with this position. Useful for working with include files and the like.
 --
-- See also: 'getPosition', 'setPosition', 'popPosition', and 'SourcePos'.
+-- See also: 'popPosition'.
 --
 -- @since 5.0.0

@ -1458,7 +1599,7 @@ pushPosition pos = updateParserState $ \(State s z tp w) ->
 -- element (in that case the stack of positions remains the same). This is
 -- how to return to previous source file after 'pushPosition'.
 --
-- See also: 'getPosition', 'setPosition', 'pushPosition', and 'SourcePos'.
+-- See also: 'pushPosition'.
 --
 -- @since 5.0.0

@ -1470,6 +1611,8 @@ popPosition = updateParserState $ \(State s z tp w) ->

 -- | Get the number of tokens processed so far.
 --
+-- See also: 'setTokensProcessed'.
+--
 -- @since 6.0.0

 getTokensProcessed :: MonadParsec e s m => m Int
@ -1477,6 +1620,8 @@ getTokensProcessed = stateTokensProcessed <$> getParserState

 -- | Set the number of tokens processed so far.
 --
+-- See also: 'getTokensProcessed'.
+--
 -- @since 6.0.0

 setTokensProcessed :: MonadParsec e s m => Int -> m ()
@ -1486,18 +1631,24 @@ setTokensProcessed tp = updateParserState $ \(State s pos _ w) ->
 -- | Return the tab width. The default tab width is equal to
 -- 'defaultTabWidth'. You can set a different tab width with the help of
 -- 'setTabWidth'.
+--
+-- See also: 'setTabWidth'.

 getTabWidth :: MonadParsec e s m => m Pos
 getTabWidth = stateTabWidth <$> getParserState

 -- | Set tab width. If the argument of the function is not a positive
 -- number, 'defaultTabWidth' will be used.
+--
+-- See also: 'getTabWidth'.

 setTabWidth :: MonadParsec e s m => Pos -> m ()
 setTabWidth w = updateParserState $ \(State s pos tp _) ->
  State s pos tp w

 -- | @'setParserState' st@ sets the parser state to @st@.
+--
+-- See also: 'getParserState', 'updateParserState'.

 setParserState :: MonadParsec e s m => State s -> m ()
 setParserState st = updateParserState (const st)
@ -1615,4 +1766,4 @@ streamTake :: forall s. Stream s => Int -> s -> [Token s]
 streamTake n s =
  case fst <$> takeN_ n s of
    Nothing -> []
-    Just chunk -> chunkToTokens (Proxy :: Proxy s) chunk
+    Just chk -> chunkToTokens (Proxy :: Proxy s) chk
--- a/Text/Megaparsec/Byte.hs
+++ b/Text/Megaparsec/Byte.hs
@ -34,16 +34,11 @@ module Text.Megaparsec.Byte
  , octDigitChar
  , hexDigitChar
  , asciiChar
-    -- * More general parsers
-  , C.char
+    -- * Single byte
+  , char
  , char'
-  , C.anyChar
-  , C.notChar
-  , C.oneOf
-  , C.noneOf
-  , C.satisfy
    -- * Sequence of bytes
-  , C.string
+  , string
  , C.string' )
 where

@ -62,14 +57,14 @@ import qualified Text.Megaparsec.Char as C
 -- | Parse a newline byte.

 newline :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
-newline = C.char 10
+newline = char 10
 {-# INLINE newline #-}

 -- | Parse a carriage return character followed by a newline character.
 -- Return the sequence of characters parsed.

 crlf :: forall e s m. (MonadParsec e s m, Token s ~ Word8) => m (Tokens s)
-crlf = C.string (tokensToChunk (Proxy :: Proxy s) [13,10])
+crlf = string (tokensToChunk (Proxy :: Proxy s) [13,10])
 {-# INLINE crlf #-}

 -- | Parse a CRLF (see 'crlf') or LF (see 'newline') end of line. Return the
@ -84,7 +79,7 @@ eol = (tokenToChunk (Proxy :: Proxy s) <$> newline)
 -- | Parse a tab character.

 tab :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
-tab = C.char 9
+tab = char 9
 {-# INLINE tab #-}

 -- | Skip /zero/ or more white space characters.
@ -109,51 +104,51 @@ space1 = void $ takeWhile1P (Just "white space") isSpace'
 -- | Parse a control character.

 controlChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
-controlChar = C.satisfy (isControl . toChar) <?> "control character"
+controlChar = satisfy (isControl . toChar) <?> "control character"
 {-# INLINE controlChar #-}

 -- | Parse a space character, and the control characters: tab, newline,
 -- carriage return, form feed, and vertical tab.

 spaceChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
-spaceChar = C.satisfy isSpace' <?> "white space"
+spaceChar = satisfy isSpace' <?> "white space"
 {-# INLINE spaceChar #-}

 -- | Parse an upper-case character.

 upperChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
-upperChar = C.satisfy (isUpper . toChar) <?> "uppercase letter"
+upperChar = satisfy (isUpper . toChar) <?> "uppercase letter"
 {-# INLINE upperChar #-}

 -- | Parse a lower-case alphabetic character.

 lowerChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
-lowerChar = C.satisfy (isLower . toChar) <?> "lowercase letter"
+lowerChar = satisfy (isLower . toChar) <?> "lowercase letter"
 {-# INLINE lowerChar #-}

 -- | Parse an alphabetic character: lower-case or upper-case.

 letterChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
-letterChar = C.satisfy (isLetter . toChar) <?> "letter"
+letterChar = satisfy (isLetter . toChar) <?> "letter"
 {-# INLINE letterChar #-}

 -- | Parse an alphabetic or digit characters.

 alphaNumChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
-alphaNumChar = C.satisfy (isAlphaNum . toChar) <?> "alphanumeric character"
+alphaNumChar = satisfy (isAlphaNum . toChar) <?> "alphanumeric character"
 {-# INLINE alphaNumChar #-}

 -- | Parse a printable character: letter, number, mark, punctuation, symbol
 -- or space.

 printChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
-printChar = C.satisfy (isPrint . toChar) <?> "printable character"
+printChar = satisfy (isPrint . toChar) <?> "printable character"
 {-# INLINE printChar #-}

 -- | Parse an ASCII digit, i.e between “0” and “9”.

 digitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
-digitChar = C.satisfy isDigit' <?> "digit"
+digitChar = satisfy isDigit' <?> "digit"
  where
    isDigit' x = x >= 48 && x <= 57
 {-# INLINE digitChar #-}
@ -161,7 +156,7 @@ digitChar = C.satisfy isDigit' <?> "digit"
 -- | Parse an octal digit, i.e. between “0” and “7”.

 octDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
-octDigitChar = C.satisfy isOctDigit' <?> "octal digit"
+octDigitChar = satisfy isOctDigit' <?> "octal digit"
  where
    isOctDigit' x = x >= 48 && x <= 55
 {-# INLINE octDigitChar #-}
@ -170,18 +165,26 @@ octDigitChar = C.satisfy isOctDigit' <?> "octal digit"
 -- “A” and “F”.

 hexDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
-hexDigitChar = C.satisfy (isHexDigit . toChar) <?> "hexadecimal digit"
+hexDigitChar = satisfy (isHexDigit . toChar) <?> "hexadecimal digit"
 {-# INLINE hexDigitChar #-}

 -- | Parse a character from the first 128 characters of the Unicode
 -- character set, corresponding to the ASCII character set.

 asciiChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
-asciiChar = C.satisfy (< 128) <?> "ASCII character"
+asciiChar = satisfy (< 128) <?> "ASCII character"
 {-# INLINE asciiChar #-}

 ----------------------------------------------------------------------------
-- More general parsers
+-- Single byte
+
+-- | A type-constrained version of 'single'.
+--
+-- > newline = char 10
+
+char :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s)
+char = single
+{-# INLINE char #-}

 -- | The same as 'char' but case-insensitive. This parser returns the
 -- actually parsed character preserving its case.
@ -195,8 +198,8 @@ asciiChar = C.satisfy (< 128) <?> "ASCII character"

 char' :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s)
 char' c = choice
-  [ C.char c
-  , C.char (fromMaybe c (swapCase c)) ]
+  [ char c
+  , char (fromMaybe c (swapCase c)) ]
  where
    swapCase x
      | isUpper g = fromChar (toLower g)
@ -206,6 +209,15 @@ char' c = choice
        g = toChar x
 {-# INLINE char' #-}

+----------------------------------------------------------------------------
+-- Sequence of bytes
+
+-- | A type-constrained version of 'chunk'.
+
+string :: (MonadParsec e s m, Token s ~ Word8) => Tokens s -> m (Tokens s)
+string = chunk
+{-# INLINE string #-}
+
 ----------------------------------------------------------------------------
 -- Helpers

--- a/Text/Megaparsec/Byte/Lexer.hs
+++ b/Text/Megaparsec/Byte/Lexer.hs
@ -68,7 +68,7 @@ skipBlockComment :: (MonadParsec e s m, Token s ~ Word8)
  => Tokens s          -- ^ Start of block comment
  -> Tokens s          -- ^ End of block comment
  -> m ()
-skipBlockComment start end = p >> void (manyTill anyChar n)
+skipBlockComment start end = p >> void (manyTill anySingle n)
  where
    p = string start
    n = string end
@ -85,7 +85,7 @@ skipBlockCommentNested :: (MonadParsec e s m, Token s ~ Word8)
  -> m ()
 skipBlockCommentNested start end = p >> void (manyTill e n)
  where
-    e = skipBlockCommentNested start end <|> void anyChar
+    e = skipBlockCommentNested start end <|> void anySingle
    p = string start
    n = string end
 {-# INLINEABLE skipBlockCommentNested #-}
--- a/Text/Megaparsec/Char.hs
+++ b/Text/Megaparsec/Char.hs
@ -45,14 +45,9 @@ module Text.Megaparsec.Char
  , latin1Char
  , charCategory
  , categoryName
-    -- * More general parsers
+    -- * Single character
  , char
  , char'
-  , anyChar
-  , notChar
-  , oneOf
-  , noneOf
-  , satisfy
    -- * Sequence of characters
  , string
  , string' )
@ -62,16 +57,9 @@ import Control.Applicative
 import Data.Char
 import Data.Function (on)
 import Data.Functor (void)
-import Data.List.NonEmpty (NonEmpty (..))
 import Data.Proxy
 import Text.Megaparsec
 import qualified Data.CaseInsensitive as CI
-import qualified Data.Set             as E
-
-#if !MIN_VERSION_base(4,8,0)
-import Data.Foldable (Foldable (), elem, notElem)
-import Prelude hiding (elem, notElem)
-#endif

 ----------------------------------------------------------------------------
 -- Simple parsers
@ -289,17 +277,14 @@ categoryName = \case
  NotAssigned          -> "non-assigned Unicode character"

 ----------------------------------------------------------------------------
-- More general parsers
+-- Single character

-- | @'char' c@ parses a single character @c@.
+-- | A type-constrained version of 'single'.
 --
 -- > semicolon = char ';'

-char :: MonadParsec e s m => Token s -> m (Token s)
-char c = token testChar expected
-  where
-    testChar x = if x == c then Just x else Nothing
-    expected   = E.singleton (Tokens (c:|[]))
+char :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
+char = single
 {-# INLINE char #-}

 -- | The same as 'char' but case-insensitive. This parser returns the
@ -321,87 +306,13 @@ char' c = choice [char c, char (swapCase c)]
      | otherwise = x
 {-# INLINE char' #-}

-- | This parser succeeds for any character. Returns the parsed character.
-
-anyChar :: MonadParsec e s m => m (Token s)
-anyChar = satisfy (const True) <?> "character"
-{-# INLINE anyChar #-}
-
-- | Match any character but the given one. It's a good idea to attach a
-- 'label' to this parser manually.
--
-- @since 6.0.0
-
-notChar :: MonadParsec e s m => Token s -> m (Token s)
-notChar c = satisfy (/= c)
-{-# INLINE notChar #-}
-
-- | @'oneOf' cs@ succeeds if the current character is in the supplied
-- collection of characters @cs@. Returns the parsed character. Note that
-- this parser cannot automatically generate the “expected” component of
-- error message, so usually you should label it manually with 'label' or
-- ('<?>').
--
-- See also: 'satisfy'.
--
-- > digit = oneOf ['0'..'9'] <?> "digit"
--
-- __Performance note__: prefer 'satisfy' when you can because it's faster
-- when you have only a couple of tokens to compare to:
--
-- > quoteFast = satisfy (\x -> x == '\'' || x == '\"')
-- > quoteSlow = oneOf "'\""
-
-oneOf :: (Foldable f, MonadParsec e s m)
-  => f (Token s)
-  -> m (Token s)
-oneOf cs = satisfy (`elem` cs)
-{-# INLINE oneOf #-}
-
-- | As the dual of 'oneOf', @'noneOf' cs@ succeeds if the current character
-- /not/ in the supplied list of characters @cs@. Returns the parsed
-- character. Note that this parser cannot automatically generate the
-- “expected” component of error message, so usually you should label it
-- manually with 'label' or ('<?>').
--
-- See also: 'satisfy'.
--
-- __Performance note__: prefer 'satisfy' and 'notChar' when you can because
-- it's faster.
-
-noneOf :: (Foldable f, MonadParsec e s m)
-  => f (Token s)
-  -> m (Token s)
-noneOf cs = satisfy (`notElem` cs)
-{-# INLINE noneOf #-}
-
-- | The parser @'satisfy' f@ succeeds for any character for which the
-- supplied function @f@ returns 'True'. Returns the character that is
-- actually parsed.
--
-- > digitChar = satisfy isDigit <?> "digit"
-- > oneOf cs  = satisfy (`elem` cs)
-
-satisfy :: MonadParsec e s m
-  => (Token s -> Bool) -- ^ Predicate to apply
-  -> m (Token s)
-satisfy f = token testChar E.empty
-  where
-    testChar x = if f x then Just x else Nothing
-{-# INLINE satisfy #-}
-
 ----------------------------------------------------------------------------
 -- Sequence of characters

-- | @'string' s@ parses a sequence of characters given by @s@. Returns the
-- parsed string (i.e. @s@).
--
-- > divOrMod = string "div" <|> string "mod"
+-- | A type-constrained version of 'chunk'.

-string :: MonadParsec e s m
-  => Tokens s
-  -> m (Tokens s)
-string = tokens (==)
+string :: (MonadParsec e s m, Token s ~ Char) => Tokens s -> m (Tokens s)
+string = chunk
 {-# INLINE string #-}

 -- | The same as 'string', but case-insensitive. On success returns string
--- a/Text/Megaparsec/Char/Lexer.hs
+++ b/Text/Megaparsec/Char/Lexer.hs
@ -143,7 +143,7 @@ lexeme spc p = p <* spc
 -- > colon     = symbol ":"
 -- > dot       = symbol "."

-symbol :: MonadParsec e s m
+symbol :: (MonadParsec e s m, Token s ~ Char)
  => m ()              -- ^ How to consume white space after lexeme
  -> Tokens s          -- ^ Symbol to parse
  -> m (Tokens s)
@ -179,7 +179,7 @@ skipBlockComment :: (MonadParsec e s m, Token s ~ Char)
  => Tokens s          -- ^ Start of block comment
  -> Tokens s          -- ^ End of block comment
  -> m ()
-skipBlockComment start end = p >> void (manyTill C.anyChar n)
+skipBlockComment start end = p >> void (manyTill anySingle n)
  where
    p = C.string start
    n = C.string end
@ -196,7 +196,7 @@ skipBlockCommentNested :: (MonadParsec e s m, Token s ~ Char)
  -> m ()
 skipBlockCommentNested start end = p >> void (manyTill e n)
  where
-    e = skipBlockCommentNested start end <|> void C.anyChar
+    e = skipBlockCommentNested start end <|> void anySingle
    p = C.string start
    n = C.string end
 {-# INLINEABLE skipBlockCommentNested #-}
@ -397,9 +397,9 @@ charLiteral :: (MonadParsec e s m, Token s ~ Char) => m Char
 charLiteral = label "literal character" $ do
  -- The @~@ is needed to avoid requiring a MonadFail constraint,
  -- and we do know that r will be non-empty if count' succeeds.
-  ~r@(x:_) <- lookAhead $ count' 1 8 C.anyChar
+  ~r@(x:_) <- lookAhead (count' 1 8 anySingle)
  case listToMaybe (Char.readLitChar r) of
-    Just (c, r') -> count (length r - length r') C.anyChar >> return c
+    Just (c, r') -> c <$ count (length r - length r') anySingle
    Nothing      -> unexpected (Tokens (x:|[]))
 {-# INLINEABLE charLiteral #-}

--- a/Text/Megaparsec/Pos.hs
+++ b/Text/Megaparsec/Pos.hs
@ -8,8 +8,8 @@
 -- Portability :  portable
 --
 -- Textual source position. The position includes name of file, line number,
-- and column number. List of such positions can be used to model a stack of
-- include files.
+-- and column number. A non-empty list of such positions can be used to
+-- model a stack of include files.
 --
 -- You probably do not want to import this module directly because
 -- "Text.Megaparsec" re-exports it anyway.
--- a/tests/Text/Megaparsec/ByteSpec.hs
+++ b/tests/Text/Megaparsec/ByteSpec.hs
@ -104,15 +104,6 @@ spec = do
  describe "spaceChar" $
    checkCharRange "white space" [9,10,11,12,13,32,160] spaceChar

-  -- describe "upperChar" $
-  --   checkCharPred "uppercase letter" (isUpper . toChar) upperChar
-
-  -- describe "lowerChar" $
-  --   checkCharPred "lowercase letter" (isLower . toChar) lowerChar
-
-  -- describe "letterChar" $
-  --   checkCharPred "letter" (isAlpha . toChar) letterChar
-
  describe "alphaNumChar" $
    checkCharPred "alphanumeric character" (isAlphaNum . toChar) alphaNumChar

@ -128,9 +119,6 @@ spec = do
  describe "hexDigitChar" $
    checkCharRange "hexadecimal digit" ([48..57] ++ [97..102] ++ [65..70]) hexDigitChar

-  -- describe "asciiChar" $
-  --   checkCharPred "ASCII character" (isAscii . toChar) asciiChar
-
  describe "char'" $ do
    context "when stream begins with the character specified as argument" $
      it "parses the character" $
--- a/tests/Text/Megaparsec/CharSpec.hs
+++ b/tests/Text/Megaparsec/CharSpec.hs
@ -206,7 +206,8 @@ spec = do

  describe "char'" $ do
    let goodChar x =
-          (toUpper x == toLower x) || (isUpper x || isLower x)
+          (toUpper x == toLower x) ||
+          (((toUpper x == x) || (toLower x == x)) && (isUpper x || isLower x))
    context "when stream begins with the character specified as argument" $
      it "parses the character" $
        property $ \ch s -> goodChar ch ==> do
@ -229,75 +230,6 @@ spec = do
          let ms = ueof <> etok (toLower ch) <> etok (toUpper ch)
          prs  (char' ch) "" `shouldFailWith` err posI ms

-  describe "anyChar" $ do
-    context "when stream is not empty" $
-      it "succeeds consuming next character in the stream" $
-        property $ \ch s -> do
-          let s' = ch : s
-          prs  anyChar s' `shouldParse`     ch
-          prs' anyChar s' `succeedsLeaving` s
-    context "when stream is empty" $
-      it "signals correct parse error" $
-        prs anyChar "" `shouldFailWith` err posI (ueof <> elabel "character")
-
-  describe "notChar" $ do
-    context "when stream begins with the character specified as argument" $
-      it "signals correct parse error" $
-        property $ \ch s' -> do
-          let p = notChar ch
-              s = ch : s'
-          prs p s `shouldFailWith` err posI (utok ch)
-          prs' p s `failsLeaving` s
-    context "when stream does not begin with the character specified as argument" $
-      it "parses first character in the stream" $
-        property $ \ch s -> not (null s) && ch /= head s ==> do
-          let p = notChar ch
-          prs  p s `shouldParse` head s
-          prs' p s `succeedsLeaving` tail s
-    context "when stream is empty" $
-      it "signals correct parse error" $
-        prs (notChar 'a') "" `shouldFailWith` err posI ueof
-
-  describe "oneOf" $ do
-    context "when stream begins with one of specified characters" $
-      it "parses the character" $
-        property $ \chs' n s -> do
-          let chs = getNonEmpty chs'
-              ch  = chs !! (getNonNegative n `rem` length chs)
-              s'  = ch : s
-          prs  (oneOf chs) s' `shouldParse`     ch
-          prs' (oneOf chs) s' `succeedsLeaving` s
-    context "when stream does not begin with any of specified characters" $
-      it "signals correct parse error" $
-        property $ \chs ch s  -> ch `notElem` (chs :: String) ==> do
-          let s' = ch : s
-          prs  (oneOf chs) s' `shouldFailWith` err posI (utok ch)
-          prs' (oneOf chs) s' `failsLeaving`   s'
-    context "when stream is empty" $
-      it "signals correct parse error" $
-        property $ \chs ->
-          prs (oneOf (chs :: String)) "" `shouldFailWith` err posI ueof
-
-  describe "noneOf" $ do
-    context "when stream does not begin with any of specified characters" $
-      it "parses the character" $
-        property $ \chs ch s  -> ch `notElem` (chs :: String) ==> do
-          let s' = ch : s
-          prs  (noneOf chs) s' `shouldParse`     ch
-          prs' (noneOf chs) s' `succeedsLeaving` s
-    context "when stream begins with one of specified characters" $
-      it "signals correct parse error" $
-        property $ \chs' n s -> do
-          let chs = getNonEmpty chs'
-              ch  = chs !! (getNonNegative n `rem` length chs)
-              s'  = ch : s
-          prs  (noneOf chs) s' `shouldFailWith` err posI (utok ch)
-          prs' (noneOf chs) s' `failsLeaving`   s'
-    context "when stream is empty" $
-      it "signals correct parse error" $
-        property $ \chs ->
-          prs (noneOf (chs :: String)) "" `shouldFailWith` err posI ueof
-
  describe "string" $ do
    context "when stream is prefixed with given string" $
      it "parses the string" $
--- a/tests/Text/Megaparsec/StreamSpec.hs
+++ b/tests/Text/Megaparsec/StreamSpec.hs
@ -28,24 +28,24 @@ spec = do
          chunkToTokens sproxy (tokensToChunk sproxy ts) === ts
    describe "chunkToTokens" $
      it "chunk is isomorphic to list of tokens" $
-        property $ \chunk ->
-          tokensToChunk sproxy (chunkToTokens sproxy chunk) === chunk
+        property $ \chk ->
+          tokensToChunk sproxy (chunkToTokens sproxy chk) === chk
    describe "chunkLength" $
      it "returns correct length of given chunk" $
-        property $ \chunk ->
-          chunkLength sproxy chunk === length chunk
+        property $ \chk ->
+          chunkLength sproxy chk === length chk
    describe "chunkEmpty" $
      it "only true when chunkLength returns 0" $
-        property $ \chunk ->
-          chunkEmpty sproxy chunk === (chunkLength sproxy chunk <= 0)
+        property $ \chk ->
+          chunkEmpty sproxy chk === (chunkLength sproxy chk <= 0)
    describe "positionAt1" $
      it "just returns the given position" $
        property $ \pos t ->
          positionAt1 sproxy pos t === pos
    describe "positionAtN" $
      it "just returns the given position" $
-        property $ \pos chunk ->
-          positionAtN sproxy pos chunk === pos
+        property $ \pos chk ->
+          positionAtN sproxy pos chk === pos
    describe "advance1" $ do
      context "when given newline" $
        it "works correctly" $
@ -102,24 +102,24 @@ spec = do
          chunkToTokens bproxy (tokensToChunk bproxy ts) === ts
    describe "chunkToTokens" $
      it "chunk is isomorphic to list of tokens" $
-        property $ \chunk ->
-          tokensToChunk bproxy (chunkToTokens bproxy chunk) === chunk
+        property $ \chk ->
+          tokensToChunk bproxy (chunkToTokens bproxy chk) === chk
    describe "chunkLength" $
      it "returns correct length of given chunk" $
-        property $ \chunk ->
-          chunkLength bproxy chunk === B.length chunk
+        property $ \chk ->
+          chunkLength bproxy chk === B.length chk
    describe "chunkEmpty" $
      it "only true when chunkLength returns 0" $
-        property $ \chunk ->
-          chunkEmpty bproxy chunk === (chunkLength bproxy chunk <= 0)
+        property $ \chk ->
+          chunkEmpty bproxy chk === (chunkLength bproxy chk <= 0)
    describe "positionAt1" $
      it "just returns the given position" $
        property $ \pos t ->
          positionAt1 bproxy pos t === pos
    describe "positionAtN" $
      it "just returns the given position" $
-        property $ \pos chunk ->
-          positionAtN bproxy pos chunk === pos
+        property $ \pos chk ->
+          positionAtN bproxy pos chk === pos
    describe "advance1" $ do
      context "when given newline" $
        it "works correctly" $
@ -177,24 +177,24 @@ spec = do
          chunkToTokens blproxy (tokensToChunk blproxy ts) === ts
    describe "chunkToTokens" $
      it "chunk is isomorphic to list of tokens" $
-        property $ \chunk ->
-          tokensToChunk blproxy (chunkToTokens blproxy chunk) === chunk
+        property $ \chk ->
+          tokensToChunk blproxy (chunkToTokens blproxy chk) === chk
    describe "chunkLength" $
      it "returns correct length of given chunk" $
-        property $ \chunk ->
-          chunkLength blproxy chunk === fromIntegral (BL.length chunk)
+        property $ \chk ->
+          chunkLength blproxy chk === fromIntegral (BL.length chk)
    describe "chunkEmpty" $
      it "only true when chunkLength returns 0" $
-        property $ \chunk ->
-          chunkEmpty blproxy chunk === (chunkLength blproxy chunk <= 0)
+        property $ \chk ->
+          chunkEmpty blproxy chk === (chunkLength blproxy chk <= 0)
    describe "positionAt1" $
      it "just returns the given position" $
        property $ \pos t ->
          positionAt1 blproxy pos t === pos
    describe "positionAtN" $
      it "just returns the given position" $
-        property $ \pos chunk ->
-          positionAtN blproxy pos chunk === pos
+        property $ \pos chk ->
+          positionAtN blproxy pos chk === pos
    describe "advance1" $ do
      context "when given newline" $
        it "works correctly" $
@ -252,24 +252,24 @@ spec = do
          chunkToTokens tproxy (tokensToChunk tproxy ts) === ts
    describe "chunkToTokens" $
      it "chunk is isomorphic to list of tokens" $
-        property $ \chunk ->
-          tokensToChunk tproxy (chunkToTokens tproxy chunk) === chunk
+        property $ \chk ->
+          tokensToChunk tproxy (chunkToTokens tproxy chk) === chk
    describe "chunkLength" $
      it "returns correct length of given chunk" $
-        property $ \chunk ->
-          chunkLength tproxy chunk === T.length chunk
+        property $ \chk ->
+          chunkLength tproxy chk === T.length chk
    describe "chunkEmpty" $
      it "only true when chunkLength returns 0" $
-        property $ \chunk ->
-          chunkEmpty tproxy chunk === (chunkLength tproxy chunk <= 0)
+        property $ \chk ->
+          chunkEmpty tproxy chk === (chunkLength tproxy chk <= 0)
    describe "positionAt1" $
      it "just returns the given position" $
        property $ \pos t ->
          positionAt1 tproxy pos t === pos
    describe "positionAtN" $
      it "just returns the given position" $
-        property $ \pos chunk ->
-          positionAtN tproxy pos chunk === pos
+        property $ \pos chk ->
+          positionAtN tproxy pos chk === pos
    describe "advance1" $ do
      context "when given newline" $
        it "works correctly" $
@ -326,24 +326,24 @@ spec = do
          chunkToTokens tlproxy (tokensToChunk tlproxy ts) === ts
    describe "chunkToTokens" $
      it "chunk is isomorphic to list of tokens" $
-        property $ \chunk ->
-          tokensToChunk tlproxy (chunkToTokens tlproxy chunk) === chunk
+        property $ \chk ->
+          tokensToChunk tlproxy (chunkToTokens tlproxy chk) === chk
    describe "chunkLength" $
      it "returns correct length of given chunk" $
-        property $ \chunk ->
-          chunkLength tlproxy chunk === fromIntegral (TL.length chunk)
+        property $ \chk ->
+          chunkLength tlproxy chk === fromIntegral (TL.length chk)
    describe "chunkEmpty" $
      it "only true when chunkLength returns 0" $
-        property $ \chunk ->
-          chunkEmpty tlproxy chunk === (chunkLength tlproxy chunk <= 0)
+        property $ \chk ->
+          chunkEmpty tlproxy chk === (chunkLength tlproxy chk <= 0)
    describe "positionAt1" $
      it "just returns the given position" $
        property $ \pos t ->
          positionAt1 tlproxy pos t === pos
    describe "positionAtN" $
      it "just returns the given position" $
-        property $ \pos chunk ->
-          positionAtN tlproxy pos chunk === pos
+        property $ \pos chk ->
+          positionAtN tlproxy pos chk === pos
    describe "advance1" $ do
      context "when given newline" $
        it "works correctly" $
--- a/tests/Text/MegaparsecSpec.hs
+++ b/tests/Text/MegaparsecSpec.hs
@ -38,12 +38,12 @@ import qualified Control.Monad.State.Lazy    as L
 import qualified Control.Monad.State.Strict  as S
 import qualified Control.Monad.Writer.Lazy   as L
 import qualified Control.Monad.Writer.Strict as S
+import qualified Data.ByteString             as BS
 import qualified Data.List                   as DL
 import qualified Data.List.NonEmpty          as NE
 import qualified Data.Semigroup              as G
 import qualified Data.Set                    as E
 import qualified Data.Text                   as T
-import qualified Data.ByteString             as BS

 #if !MIN_VERSION_base(4,8,0)
 import Control.Applicative hiding (many, some)
@ -223,17 +223,17 @@ spec = do
    describe "equivalence to 'string'" $ do
      it "for String" $ property $ \s i ->
        eqParser
-          (string s)
+          (chunk s)
          (fromString s)
          (i :: String)
      it "for Text" $ property $ \s i ->
        eqParser
-          (string (T.pack s))
+          (chunk (T.pack s))
          (fromString s)
          (i :: T.Text)
      it "for ByteString" $ property $ \s i ->
        eqParser
-          (string (fromString s :: BS.ByteString))
+          (chunk (fromString s :: BS.ByteString))
          (fromString s)
          (i :: BS.ByteString)
    it "can handle Unicode" $ do
@ -314,7 +314,7 @@ spec = do
          it "parses the string" $
            property $ \s0 s1 s -> not (s1 `isPrefixOf` s0) ==> do
              let s' = s0 ++ s
-                  p = string s0 <|> string s1
+                  p = chunk s0 <|> chunk s1
              prs  p s' `shouldParse` s0
              prs' p s' `succeedsLeaving` s
        context "stream begins with the second string" $
@ -1212,6 +1212,85 @@ spec = do

  describe "derivatives from primitive combinators" $ do

+    -- NOTE 'single' is tested via 'char' in "Text.Megaparsec.Char" and
+    -- "Text.Megaparsec.Byte".
+
+    describe "anySingle" $ do
+      let p :: MonadParsec Void String m => m Char
+          p = anySingle
+      context "when stream is not empty" $
+        it "succeeds consuming next character in the stream" $
+          property $ \ch s -> do
+            let s' = ch : s
+            grs  p s' (`shouldParse`     ch)
+            grs' p s' (`succeedsLeaving` s)
+      context "when stream is empty" $
+        it "signals correct parse error" $
+          grs p "" (`shouldFailWith` err posI ueof)
+
+    describe "anySingleBut" $ do
+      context "when stream begins with the character specified as argument" $
+        it "signals correct parse error" $
+          property $ \ch s' -> do
+            let p :: MonadParsec Void String m => m Char
+                p = anySingleBut ch
+                s = ch : s'
+            grs  p s (`shouldFailWith` err posI (utok ch))
+            grs' p s (`failsLeaving` s)
+      context "when stream does not begin with the character specified as argument" $
+        it "parses first character in the stream" $
+          property $ \ch s -> not (null s) && ch /= head s ==> do
+            let p :: MonadParsec Void String m => m Char
+                p = anySingleBut ch
+            grs  p s (`shouldParse` head s)
+            grs' p s (`succeedsLeaving` tail s)
+      context "when stream is empty" $
+        it "signals correct parse error" $
+          grs (anySingleBut 'a') "" (`shouldFailWith` err posI ueof)
+
+    describe "oneOf" $ do
+      context "when stream begins with one of specified characters" $
+        it "parses the character" $
+          property $ \chs' n s -> do
+            let chs = getNonEmpty chs'
+                ch  = chs !! (getNonNegative n `rem` length chs)
+                s'  = ch : s
+            grs  (oneOf chs) s' (`shouldParse`     ch)
+            grs' (oneOf chs) s' (`succeedsLeaving` s)
+      context "when stream does not begin with any of specified characters" $
+        it "signals correct parse error" $
+          property $ \chs ch s  -> ch `notElem` (chs :: String) ==> do
+            let s' = ch : s
+            grs  (oneOf chs) s' (`shouldFailWith` err posI (utok ch))
+            grs' (oneOf chs) s' (`failsLeaving`   s')
+      context "when stream is empty" $
+        it "signals correct parse error" $
+          property $ \chs ->
+            grs (oneOf (chs :: String)) "" (`shouldFailWith` err posI ueof)
+
+    describe "noneOf" $ do
+      context "when stream does not begin with any of specified characters" $
+        it "parses the character" $
+          property $ \chs ch s  -> ch `notElem` (chs :: String) ==> do
+            let s' = ch : s
+            grs  (noneOf chs) s' (`shouldParse`     ch)
+            grs' (noneOf chs) s' (`succeedsLeaving` s)
+      context "when stream begins with one of specified characters" $
+        it "signals correct parse error" $
+          property $ \chs' n s -> do
+            let chs = getNonEmpty chs'
+                ch  = chs !! (getNonNegative n `rem` length chs)
+                s'  = ch : s
+            grs  (noneOf chs) s' (`shouldFailWith` err posI (utok ch))
+            grs' (noneOf chs) s' (`failsLeaving`   s')
+      context "when stream is empty" $
+        it "signals correct parse error" $
+          property $ \chs ->
+            grs (noneOf (chs :: String)) "" (`shouldFailWith` err posI ueof)
+
+    -- NOTE 'chunk' is tested via 'string' in "Text.Megaparsec.Char" and
+    -- "Text.Megaparsec.Byte".
+
    describe "unexpected" $
      it "signals correct parse error" $
        property $ \item -> do
@ -1406,7 +1485,7 @@ spec = do
    describe "notFollowedBy" $
      it "generally works" $
        property $ \a' b' c' -> do
-          let p = many (char =<< ask) <* notFollowedBy eof <* many anyChar
+          let p = many (char =<< ask) <* notFollowedBy eof <* many anySingle
              [a,b,c] = getNonNegative <$> [a',b',c']
              s = abcRow a b c
          if b > 0 || c > 0
@ -1435,7 +1514,7 @@ spec = do
          let p = do
                L.put n
                let notEof = notFollowedBy (L.modify (* 2) >> eof)
-                some (try (anyChar <* notEof)) <* char 'x'
+                some (try (anySingle <* notEof)) <* char 'x'
          prs (L.runStateT p 0) "abx" `shouldParse` ("ab", n :: Integer)

    describe "observing" $ do
@ -1475,7 +1554,7 @@ spec = do
          let p = do
                S.put n
                let notEof = notFollowedBy (S.modify (* 2) >> eof)
-                some (try (anyChar <* notEof)) <* char 'x'
+                some (try (anySingle <* notEof)) <* char 'x'
          prs (S.runStateT p 0) "abx" `shouldParse` ("ab", n :: Integer)

    describe "observing" $ do