Extending stream (#229)

2024-11-26 23:02:07 +03:00 · 2017-07-02 23:56:01 +07:00 · 2017-07-02 23:56:01 +07:00 · 45f30ae7e1
commit 45f30ae7e1
parent 785d006f02
21 changed files with 1423 additions and 585 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,7 @@
 ## Megaparsec 6.0.0

+### General
+
 * Re-organized the module hierarchy. Some modules such as
  `Text.Megaparsec.Prim` do not exist anymore. Stream definitions were moved
  to `Text.Megaparsec.Stream`. Generic combinators are now re-exported from
@ -11,10 +13,30 @@
 * Dropped per-stream modules, the `Parser` type synonym is to be defined
  manually by user.

-* Control characters in parse error are displayed in a readable form even
-  when they are part of strings, for example: `{<newline>` (`{` followed by
-  the newline character). Previously control characters were rendered in
-  readable form only as standalone tokens.
+* Added a `MonadFix` instance for `ParsecT`.
+
+* More lightweight dependency tree, dropped `exceptions` and `QuickCheck`
+  dependencies.
+
+* Added dependency on `case-insensitive`.
+
+### Source positions
+
+* Now `Pos` contains an `Int` inside, not `Word`.
+
+* Dropped `unsafePos` and changed type of `mkPos` so it throws from pure
+  code if its argument is not a positive `Int`.
+
+* Added `pos1` constant that represents the `Pos` with value 1 inside.
+
+* Made `InvalidPosException` contain the invalid `Int` value that was passed
+  to `mkPos`.
+
+### Parse errors
+
+* Changed the definition of `ParseError` to have separate data constructors
+  for “trivial” errors (unexpected/expected tokens) and “fancy” errors
+  (everything else).

 * Removed the `ErrorComponent` type class, added `ErrorFancy` instead.
  `ErrorFancy` is a sum type which can represent `fail` messages, incorrect
@ -23,41 +45,39 @@
  every instance of `ErrorComponent` needed to have constructors for `fail`
  and indentation massages anyway, leading to duplication of code.

-* Changed the definition of `ParseError` to have separate data constructors
-  for “trivial” errors (unexpected/expected tokens) and “fancy” errors
-  (everything else).
+* Added `Functor` instances for `ErrorItem` and `ErrorFancy`.

 * Added the function `errorPos` to get error positions from `ParseError`
  (previously it was a record selector in `ParseError`).

-* Changed signatures of `failure` and `token`, they only can signal trivial
-  errors now.
-
-* Added a new method of `MonadParsec` type class called `fancyFailure` for
-  signalling non-trivial failures.
-
-  Signatures of some functions (`failure`, `token`) have been changed
-  accordingly.
-
-* Added `Functor` instances for `ErrorItem` and `ErrorFancy`.
-
-* Dropped `unsafePos` and changed type of `mkPos` so it throws from pure
-  code if its argument is not a positive `Word`.
-
-* Added `pos1` constant that represents the `Pos` with value 1 inside.
-
-* `defaultUpdatePos` has been moved from `Text.Megaparsec.Pos` to
-  `Text.Megaparsec.Stream`.
+* Control characters in parse error are displayed in a readable form even
+  when they are part of strings, for example: `{<newline>` (`{` followed by
+  the newline character). Previously control characters were rendered in
+  readable form only as standalone tokens.

 * Added `Text.Megaparsec.Error.Builder` module to help construct
  `ParseError`s easily. Useful for testing and debugging, previously we had
  something like that in the `hspec-megaparsec` package, but it does not
  hurt to ship it with the library.

-* More lightweight dependency tree, dropped `exceptions` and `QuickCheck`
-  dependencies.
+### Stream

-* Added a `MonadFix` instance for `ParsecT`.
+* Introduced the new `Text.Megaparsec.Stream` module that is the home of
+  `Stream` type class. In version 6, the type class has been extended
+  significantly to improve performance and make some combinators more
+  general.
+
+### Combinators
+
+* Changed signatures of `failure` and `token`, they only can signal trivial
+  errors now.
+
+* Added a new method of `MonadParsec` type class called `fancyFailure` for
+  signalling non-trivial failures. Signatures of some functions (`failure`,
+  `token`) have been changed accordingly.
+
+* Added `takeWhileP` and `takeWhile1P` to `MonadParsec`. Added `skipWhileP`,
+  `skipWhile1P` as derivatives from those primitive combinators.

 ## Megaparsec 5.3.1

--- a/README.md
+++ b/README.md
@ -54,33 +54,14 @@ On the other hand `ParsecT` is an instance of many type classes as well. The
 most useful ones are `Monad`, `Applicative`, `Alternative`, and
 `MonadParsec`.

-The module
-[`Text.Megaparsec.Combinator`](https://hackage.haskell.org/package/megaparsec/docs/Text-Megaparsec-Combinator.html) (its
-functions are included in `Text.Megaparsec`) contains traditional, general
-combinators that work with instances of `Applicative` and `Alternative`.
-
-Let's enumerate methods of the `MonadParsec` type class. The class abstracts
-primitive functions of Megaparsec parsing. The rest of the library is built
-via combination of these primitives:
+Megaparsec includes all functionality that is available in Parsec plus
+features some combinators that are missing in other parsing libraries:

 * `failure` allows to fail reporting a parse error with unexpected and
  expected items.

 * `fancyFailure` allows to fail reporting custom error messages.

-* `label` allows to add a “label” to a parser, so if it fails the user will
-  see the label instead of an automatically deduced expected token.
-
-* `hidden` hides a parser from error messages altogether. This is the
-  recommended way to hide things, prefer it to the `label ""` approach.
-
-* `try` enables backtracking in parsing.
-
-* `lookAhead` allows to parse input without consuming it.
-
-* `notFollowedBy` succeeds when its argument fails and does not consume
-  input.
-
 * `withRecovery` allows to recover from parse errors “on-the-fly” and
  continue parsing. Once parsing is finished, several parse errors may be
  reported or ignored altogether.
@ -88,21 +69,21 @@ via combination of these primitives:
 * `observing` allows to “observe” parse errors without ending parsing (they
  are returned in `Left`, while normal results are wrapped in `Right`).

-* `eof` only succeeds at the end of input.
+In addition to that, Megaparsec 6 features high-performance combinators
+similar to those found in Attoparsec:

-* `token` is used to parse a single token.
+* `tokens` makes it easy to parse several tokens in a row. This is about 100
+  time faster than matching a string token by token. `string` and `string'`
+  are built on top of this combinator. `tokens` returns “chunk” of original
+  input, meaning that if you parse `Text`, it'll return `Text` without any
+  repacking.

-* `tokens` makes it easy to parse several tokens in a row.
+* `takeWhile` and `takeWhile1` are about 150 times faster than approaches
+  involving `many`, `manyTill` and other similar combinators.

-* `getParserState` returns the full parser state.
-
-* `updateParserState` applies a given function on the parser state.
-
-This list of core functions is longer than in some other libraries. Our goal
-is efficient, readable implementations, and rich functionality, not minimal
-number of primitive combinators. You can read the comprehensive description
-of every primitive function in
-[Megaparsec documentation](https://hackage.haskell.org/package/megaparsec/docs/Text-Megaparsec-Prim.html).
+So now that we have matched the main “performance boosters” of Attoparsec,
+Megaparsec 6 is not significantly slower than Attoparsec if you write your
+parser carefully.

 Megaparsec can currently work with the following types of input stream
 out-of-the-box:
@ -120,14 +101,8 @@ Megaparsec 5 introduces well-typed error messages and the ability to use
 custom data types to adjust the library to specific domain of interest. No
 need to use a shapeless bunch of strings anymore.

-The default error component (`Dec`) has constructors corresponding to the
-`fail` function and indentation-related error messages. It is a decent
-option that should work out-of-box for most parsing needs, while you are
-free to use your own custom error component when necessary.
-
-This new design allowed Megaparsec 5 to have much more helpful error
-messages for indentation-sensitive parsing instead of the plain “incorrect
-indentation” phrase.
+The design of parse errors has been revised in version 6 significantly, but
+custom errors are still easy (probably even easier now).

 ### Alex and Happy support

@ -137,6 +112,10 @@ gives you full control over textual positions that are used to report token
 positions in error messages. You can update current position on per
 character basis or extract it from token.

+The design of the `Stream` type class has been changed significantly in
+version 6, but user can still work with custom streams of tokens without
+problems.
+
 ### Character parsing

 Megaparsec has decent support for Unicode-aware character parsing. Functions
@ -248,6 +227,11 @@ So, if you work with something human-readable where size of input data is
 usually not huge, just go with Megaparsec, otherwise Attoparsec may be a
 better choice.

+Since version 6, Megaparsec features the same fast primitives that
+Attoparsec has, so in many cases the difference in speed is not that big.
+Megaparsec now aims to be “one size fits all” ultimate solution to parsing,
+so it can be used even to parse low-level binary formats.
+
 ### Megaparsec vs Parsec

 Since Megaparsec is a fork of Parsec, we are bound to list the main
@ -288,7 +272,8 @@ differences between the two libraries:
  tag”, e.g. we could build a context stack like “in function definition
  foo”, “in expression x”, etc. This is not possible with Parsec.

-* Megaparsec is faster.
+* Megaparsec is faster and supports efficient operations on top of `tokens`,
+  `takeWhileP`, and `takeWhile1P` just like Attoparsec.

 * Megaparsec is ~~better~~ supported.

--- a/Text/Megaparsec.hs
+++ b/Text/Megaparsec.hs
@ -32,10 +32,9 @@
 -- Megaparsec 6 uses some type-level machinery to provide flexibility
 -- without compromising on type safety. Thus type signatures are sometimes
 -- necessary to avoid ambiguous types. If you're seeing a error message that
-- reads like “Ambiguous type variable @e0@ arising from … prevents the
-- constraint … from being resolved”, you need to give an explicit signature
-- to your parser to resolve the ambiguity. It's a good idea to provide type
-- signatures for all top-level definitions.
+-- reads like “Type variable @e0@ is ambiguous …”, you need to give an
+-- explicit signature to your parser to resolve the ambiguity. It's a good
+-- idea to provide type signatures for all top-level definitions.
 --
 -- Megaparsec is capable of a lot. Apart from this standard functionality
 -- you can parse permutation phrases with "Text.Megaparsec.Perm",
@ -84,6 +83,8 @@ module Text.Megaparsec
  , unexpected
  , match
  , region
+  , skipWhileP
+  , skipWhile1P
    -- * Parser state combinators
  , getInput
  , setInput
@ -112,17 +113,14 @@ import Control.Monad.State.Class hiding (state)
 import Control.Monad.Trans
 import Control.Monad.Trans.Identity
 import Data.Data (Data)
-import Data.Foldable (foldl')
-import Data.List (genericTake)
 import Data.List.NonEmpty (NonEmpty (..))
-import Data.Monoid hiding ((<>))
+import Data.Maybe (fromJust)
 import Data.Proxy
 import Data.Semigroup
 import Data.Set (Set)
 import Data.Typeable (Typeable)
 import Debug.Trace
 import GHC.Generics
-import Prelude hiding (all)
 import qualified Control.Applicative               as A
 import qualified Control.Monad.Fail                as Fail
 import qualified Control.Monad.RWS.Lazy            as L
@ -141,7 +139,6 @@ import Text.Megaparsec.Stream

 #if !MIN_VERSION_base(4,8,0)
 import Control.Applicative
-import Data.Word (Word)
 #endif

 ----------------------------------------------------------------------------
@ -154,7 +151,7 @@ data State s = State
    -- ^ Current input (already processed input is removed from the stream)
  , statePos :: NonEmpty SourcePos
    -- ^ Current position (column + line number) with support for include files
-  , stateTokensProcessed :: {-# UNPACK #-} !Word
+  , stateTokensProcessed :: {-# UNPACK #-} !Int
    -- ^ Number of processed tokens so far
    --
    -- @since 5.2.0
@ -384,7 +381,7 @@ instance (Ord e, Stream s) => MonadPlus (ParsecT e s m) where

 pZero :: ParsecT e s m a
 pZero = ParsecT $ \s@(State _ pos _ _) _ _ _ eerr ->
-  eerr (TrivialError pos E.empty E.empty) s
+  eerr (TrivialError pos Nothing E.empty) s
 {-# INLINE pZero #-}

 pPlus :: (Ord e, Stream s)
@ -570,7 +567,7 @@ class (Stream s, A.Alternative m, MonadPlus m)
  -- @since 6.0.0

  failure
-    :: Set (ErrorItem (Token s)) -- ^ Unexpected items
+    :: Maybe (ErrorItem (Token s)) -- ^ Unexpected item
    -> Set (ErrorItem (Token s)) -- ^ Expected items
    -> m a

@ -699,17 +696,18 @@ class (Stream s, A.Alternative m, MonadPlus m)
  -- >         else Left (Set.singleton (Tokens (x:|[])), Set.empty)

  token
-    :: (Token s -> Either ( Set (ErrorItem (Token s))
+    :: (Token s -> Either ( Maybe (ErrorItem (Token s))
                          , Set (ErrorItem (Token s)) ) a)
       -- ^ Matching function for the token to parse, it allows to construct
-       -- arbitrary error message on failure as well; sets in the tuple are:
-       -- unexpected and expected items
+       -- arbitrary error message on failure as well; things in the tuple
+       -- are: unexpected item (if any) and expected items
    -> Maybe (Token s) -- ^ Token to report when input stream is empty
    -> m a

-  -- | The parser @'tokens' test@ parses a list of tokens and returns it.
+  -- | The parser @'tokens' test@ parses a chunk of input and returns it.
  -- Supplied predicate @test@ is used to check equality of given and parsed
-  -- tokens.
+  -- chunks after a candidate chunk of correct length is fetched from the
+  -- stream.
  --
  -- This can be used for example to write 'Text.Megaparsec.Char.string':
  --
@ -732,11 +730,37 @@ class (Stream s, A.Alternative m, MonadPlus m)
  -- performance in any way.

  tokens
-    :: (Token s -> Token s -> Bool)
+    :: (Tokens s -> Tokens s -> Bool)
       -- ^ Predicate to check equality of tokens
-    -> [Token s]
+    -> Tokens s
       -- ^ List of tokens to parse
-    -> m [Token s]
+    -> m (Tokens s)
+
+  -- | Parse /zero/ or more tokens for which the supplied predicate holds.
+  -- Try to use this as much as possible because for many streams the
+  -- combinator is much faster than parsers built with 'many' and @satisfy@.
+  --
+  -- The following equations should clarify the behavior:
+  --
+  -- > takeWhileP (Just "foo") f = many (satisfy f <?> "foo")
+  -- > takeWhileP Nothing f      = many (satisfy f)
+  --
+  -- The combinator never fails, although it may parse an empty chunk.
+  --
+  -- @since 6.0.0
+
+  takeWhileP
+    :: Maybe String    -- ^ Name for a single token in the row
+    -> (Token s -> Bool) -- ^ Predicate to use to test tokens
+    -> m (Tokens s)    -- ^ A chunk of matching tokens
+
+  -- | Similar to 'takeWhileP', but fails if it can't parse at least one
+  -- token.
+
+  takeWhile1P
+    :: Maybe String    -- ^ Name for a single token in the row
+    -> (Token s -> Bool) -- ^ Predicate to use to test tokens
+    -> m (Tokens s)    -- ^ A chunk of matching tokens

  -- | Return the full parser state as a 'State' record.

@ -747,7 +771,7 @@ class (Stream s, A.Alternative m, MonadPlus m)
  updateParserState :: (State s -> State s) -> m ()

 instance (Ord e, Stream s) => MonadParsec e s (ParsecT e s m) where
-  failure           = pTrivialFailure
+  failure           = pFailure
  fancyFailure      = pFancyFailure
  label             = pLabel
  try               = pTry
@ -758,16 +782,18 @@ instance (Ord e, Stream s) => MonadParsec e s (ParsecT e s m) where
  eof               = pEof
  token             = pToken
  tokens            = pTokens
+  takeWhileP        = pTakeWhileP
+  takeWhile1P       = pTakeWhile1P
  getParserState    = pGetParserState
  updateParserState = pUpdateParserState

-pTrivialFailure
-  :: Set (ErrorItem (Token s))
+pFailure
+  :: Maybe (ErrorItem (Token s))
  -> Set (ErrorItem (Token s))
  -> ParsecT e s m a
-pTrivialFailure us ps = ParsecT $ \s@(State _ pos _ _) _ _ _ eerr ->
+pFailure us ps = ParsecT $ \s@(State _ pos _ _) _ _ _ eerr ->
  eerr (TrivialError pos us ps) s
-{-# INLINE pTrivialFailure #-}
+{-# INLINE pFailure #-}

 pFancyFailure
  :: Set (ErrorFancy e)
@ -804,8 +830,8 @@ pLookAhead p = ParsecT $ \s _ cerr eok eerr ->

 pNotFollowedBy :: Stream s => ParsecT e s m a -> ParsecT e s m ()
 pNotFollowedBy p = ParsecT $ \s@(State input pos _ _) _ _ eok eerr ->
-  let what = maybe EndOfInput (Tokens . nes . fst) (uncons input)
-      unexpect u = TrivialError pos (E.singleton u) E.empty
+  let what = maybe EndOfInput (Tokens . nes . fst) (take1_ input)
+      unexpect u = TrivialError pos (pure u) E.empty
      cok' _ _ _ = eerr (unexpect what) s
      cerr'  _ _ = eok () s mempty
      eok' _ _ _ = eerr (unexpect what) s
@ -844,83 +870,106 @@ pObserving p = ParsecT $ \s cok _ eok _ ->

 pEof :: forall e s m. Stream s => ParsecT e s m ()
 pEof = ParsecT $ \s@(State input (pos:|z) tp w) _ _ eok eerr ->
-  case uncons input of
+  case take1_ input of
    Nothing    -> eok () s mempty
    Just (x,_) ->
-      let !apos = fst (updatePos (Proxy :: Proxy s) w pos x)
-          us    = (E.singleton . Tokens . nes) x
+      let !apos = positionAt1 (Proxy :: Proxy s) pos x
+          us    = (pure . Tokens . nes) x
          ps    = E.singleton EndOfInput
      in eerr (TrivialError (apos:|z) us ps)
          (State input (apos:|z) tp w)
 {-# INLINE pEof #-}

 pToken :: forall e s m a. Stream s
-  => (Token s -> Either ( Set (ErrorItem (Token s))
+  => (Token s -> Either ( Maybe (ErrorItem (Token s))
                        , Set (ErrorItem (Token s)) ) a)
  -> Maybe (Token s)
  -> ParsecT e s m a
 pToken test mtoken = ParsecT $ \s@(State input (pos:|z) tp w) cok _ _ eerr ->
-  case uncons input of
+  case take1_ input of
    Nothing ->
-      let us = E.singleton EndOfInput
+      let us = pure EndOfInput
          ps = maybe E.empty (E.singleton . Tokens . nes) mtoken
      in eerr (TrivialError (pos:|z) us ps) s
    Just (c,cs) ->
-      let (apos, npos) = updatePos (Proxy :: Proxy s) w pos c
-      in case test c of
+      case test c of
        Left (us, ps) ->
-          apos `seq` eerr
-            (TrivialError (apos:|z) us ps)
-            (State input (apos:|z) tp w)
+          let !apos = positionAt1 (Proxy :: Proxy s) pos c
+          in eerr (TrivialError (apos:|z) us ps)
+                  (State input (apos:|z) tp w)
        Right x ->
-          let newstate = State cs (npos:|z) (tp + 1) w
-          in npos `seq` cok x newstate mempty
+          let !npos = advance1 (Proxy :: Proxy s) w pos c
+              newstate = State cs (npos:|z) (tp + 1) w
+          in cok x newstate mempty
 {-# INLINE pToken #-}

 pTokens :: forall e s m. Stream s
-  => (Token s -> Token s -> Bool)
-  -> [Token s]
-  -> ParsecT e s m [Token s]
-pTokens _ [] = ParsecT $ \s _ _ eok _ -> eok [] s mempty
-pTokens test tts = ParsecT $ \s@(State input (pos:|z) tp w) cok _ _ eerr ->
-  let updatePos' = updatePos (Proxy :: Proxy s) w
-      toTokens   = Tokens . NE.fromList . reverse
+  => (Tokens s -> Tokens s -> Bool)
+  -> Tokens s
+  -> ParsecT e s m (Tokens s)
+pTokens f tts = ParsecT $ \s@(State input (pos:|z) tp w) cok _ _ eerr ->
+  let pxy = Proxy :: Proxy s
      unexpect pos' u =
-        let us = E.singleton u
-            ps = (E.singleton . Tokens . NE.fromList) tts
+        let us = pure u
+            ps = (E.singleton . Tokens . NE.fromList . chunkToTokens pxy) tts
        in TrivialError pos' us ps
-      go _ [] is rs =
-        let ris   = reverse is
-            (npos, tp') = foldl'
-              (\(p, n) t -> (snd (updatePos' p t), n + 1))
-              (pos, tp)
-              ris
-        in cok ris (State rs (npos:|z) tp' w) mempty
-      go apos (t:ts) is rs =
-        case uncons rs of
-          Nothing ->
-            apos `seq` eerr
-              (unexpect (apos:|z) (toTokens is))
-              (State input (apos:|z) tp w)
-          Just (x,xs) ->
-            if test t x
-              then go apos ts (x:is) xs
-              else apos `seq` eerr
-                     (unexpect (apos:|z) . toTokens $ x:is)
-                     (State input (apos:|z) tp w)
-  in case uncons input of
-       Nothing ->
-         eerr (unexpect (pos:|z) EndOfInput) s
-       Just (x,xs) ->
-         let t:ts = tts
-             apos = fst (updatePos' pos x)
-         in if test t x
-              then go apos ts [x] xs
-              else apos `seq` eerr
-                     (unexpect (apos:|z) $ Tokens (nes x))
-                     (State input (apos:|z) tp w)
+      len = chunkLength pxy tts
+  in case takeN_ len input of
+    Nothing ->
+      eerr (unexpect (pos:|z) EndOfInput) s
+    Just (tts', input') ->
+      if f tts tts'
+        then let !npos = advanceN pxy w pos tts'
+             in cok tts' (State input' (npos:|z) (tp + len) w) mempty
+        else let !apos = positionAtN pxy pos tts'
+                 ps = (Tokens . NE.fromList . chunkToTokens pxy) tts'
+             in eerr (unexpect (apos:|z) ps) (State input (apos:|z) tp w)
 {-# INLINE pTokens #-}

+pTakeWhileP :: forall e s m. Stream s
+  => Maybe String
+  -> (Token s -> Bool)
+  -> ParsecT e s m (Tokens s)
+pTakeWhileP ml f = ParsecT $ \(State input (pos:|z) tp w) cok _ eok _ ->
+  let pxy = Proxy :: Proxy s
+      (ts, input') = takeWhile_ f input
+      !npos = advanceN pxy w pos ts
+      len = chunkLength pxy ts
+      hs =
+        case ml >>= NE.nonEmpty of
+          Nothing -> mempty
+          Just l -> (Hints . pure . E.singleton . Label) l
+  in if chunkEmpty pxy ts
+       then eok ts (State input' (npos:|z) (tp + len) w) hs
+       else cok ts (State input' (npos:|z) (tp + len) w) hs
+{-# INLINE pTakeWhileP #-}
+
+pTakeWhile1P :: forall e s m. Stream s
+  => Maybe String
+  -> (Token s -> Bool)
+  -> ParsecT e s m (Tokens s)
+pTakeWhile1P ml f = ParsecT $ \(State input (pos:|z) tp w) cok _ _ eerr ->
+  let pxy = Proxy :: Proxy s
+      (ts, input') = takeWhile_ f input
+      len = chunkLength pxy ts
+      el = Label <$> (ml >>= NE.nonEmpty)
+      hs =
+        case el of
+          Nothing -> mempty
+          Just l -> (Hints . pure . E.singleton) l
+  in if chunkEmpty pxy ts
+       then let !apos = positionAtN pxy pos ts
+                us    = pure $
+                  case take1_ input of
+                    Nothing -> EndOfInput
+                    Just (t,_) -> Tokens (nes t)
+                ps    = maybe E.empty E.singleton el
+            in eerr (TrivialError (apos:|z) us ps)
+                    (State input (apos:|z) tp w)
+       else let !npos = advanceN pxy w pos ts
+            in cok ts (State input' (npos:|z) (tp + len) w) hs
+{-# INLINE pTakeWhile1P #-}
+
 pGetParserState :: ParsecT e s m (State s)
 pGetParserState = ParsecT $ \s _ _ eok _ -> eok s s mempty
 {-# INLINE pGetParserState #-}
@ -951,6 +1000,8 @@ instance MonadParsec e s m => MonadParsec e s (L.StateT st m) where
  eof                        = lift eof
  token test mt              = lift (token test mt)
  tokens e ts                = lift (tokens e ts)
+  takeWhileP l f             = lift (takeWhileP l f)
+  takeWhile1P l f            = lift (takeWhile1P l f)
  getParserState             = lift getParserState
  updateParserState f        = lift (updateParserState f)

@ -970,6 +1021,8 @@ instance MonadParsec e s m => MonadParsec e s (S.StateT st m) where
  eof                        = lift eof
  token test mt              = lift (token test mt)
  tokens e ts                = lift (tokens e ts)
+  takeWhileP l f             = lift (takeWhileP l f)
+  takeWhile1P l f            = lift (takeWhile1P l f)
  getParserState             = lift getParserState
  updateParserState f        = lift (updateParserState f)

@ -986,6 +1039,8 @@ instance MonadParsec e s m => MonadParsec e s (L.ReaderT r m) where
  eof                         = lift eof
  token test mt               = lift (token test mt)
  tokens e ts                 = lift (tokens e ts)
+  takeWhileP l f              = lift (takeWhileP l f)
+  takeWhile1P l f             = lift (takeWhile1P l f)
  getParserState              = lift getParserState
  updateParserState f         = lift (updateParserState f)

@ -1005,6 +1060,8 @@ instance (Monoid w, MonadParsec e s m) => MonadParsec e s (L.WriterT w m) where
  eof                         = lift eof
  token test mt               = lift (token test mt)
  tokens e ts                 = lift (tokens e ts)
+  takeWhileP l f              = lift (takeWhileP l f)
+  takeWhile1P l f             = lift (takeWhile1P l f)
  getParserState              = lift getParserState
  updateParserState f         = lift (updateParserState f)

@ -1024,6 +1081,8 @@ instance (Monoid w, MonadParsec e s m) => MonadParsec e s (S.WriterT w m) where
  eof                         = lift eof
  token test mt               = lift (token test mt)
  tokens e ts                 = lift (tokens e ts)
+  takeWhileP l f              = lift (takeWhileP l f)
+  takeWhile1P l f             = lift (takeWhile1P l f)
  getParserState              = lift getParserState
  updateParserState f         = lift (updateParserState f)

@ -1045,6 +1104,8 @@ instance (Monoid w, MonadParsec e s m) => MonadParsec e s (L.RWST r w st m) wher
  eof                         = lift eof
  token test mt               = lift (token test mt)
  tokens e ts                 = lift (tokens e ts)
+  takeWhileP l f              = lift (takeWhileP l f)
+  takeWhile1P l f             = lift (takeWhile1P l f)
  getParserState              = lift getParserState
  updateParserState f         = lift (updateParserState f)

@ -1066,6 +1127,8 @@ instance (Monoid w, MonadParsec e s m) => MonadParsec e s (S.RWST r w st m) wher
  eof                         = lift eof
  token test mt               = lift (token test mt)
  tokens e ts                 = lift (tokens e ts)
+  takeWhileP l f              = lift (takeWhileP l f)
+  takeWhile1P l f             = lift (takeWhile1P l f)
  getParserState              = lift getParserState
  updateParserState f         = lift (updateParserState f)

@ -1082,6 +1145,8 @@ instance MonadParsec e s m => MonadParsec e s (IdentityT m) where
  eof                         = lift eof
  token test mt               = lift (token test mt)
  tokens e ts                 = lift $ tokens e ts
+  takeWhileP l f              = lift (takeWhileP l f)
+  takeWhile1P l f             = lift (takeWhile1P l f)
  getParserState              = lift getParserState
  updateParserState f         = lift $ updateParserState f

@ -1104,6 +1169,7 @@ infix 0 <?>

 (<?>) :: MonadParsec e s m => m a -> String -> m a
 (<?>) = flip label
+{-# INLINE (<?>) #-}

 -- | The parser @unexpected item@ fails with an error message telling about
 -- unexpected item @item@ without consuming any input.
@ -1111,7 +1177,7 @@ infix 0 <?>
 -- > unexpected item = failure (Set.singleton item) Set.empty

 unexpected :: MonadParsec e s m => ErrorItem (Token s) -> m a
-unexpected item = failure (E.singleton item) E.empty
+unexpected item = failure (pure item) E.empty
 {-# INLINE unexpected #-}

 -- | Return both the result of a parse and the list of tokens that were
@ -1120,13 +1186,19 @@ unexpected item = failure (E.singleton item) E.empty
 --
 -- @since 5.3.0

-match :: MonadParsec e s m => m a -> m ([Token s], a)
+match :: MonadParsec e s m => m a -> m (Tokens s, a)
 match p = do
  tp  <- getTokensProcessed
  s   <- getInput
  r   <- p
  tp' <- getTokensProcessed
-  return (streamTake (tp' - tp) s, r)
+  -- NOTE The 'fromJust' call here should never fail because if the stream
+  -- is empty before 'p' (the only case when 'takeN_' can return 'Nothing'
+  -- as per its invariants), (tp' - tp) won't be greater than 0, and in that
+  -- case 'Just' is guaranteed to be returned as per another invariant of
+  -- 'takeN_'.
+  return ((fst . fromJust) (takeN_ (tp' - tp) s), r)
+{-# INLINEABLE match #-}

 -- | Specify how to process 'ParseError's that happen inside of this
 -- wrapper. As a side effect of the current implementation changing
@ -1152,6 +1224,25 @@ region f m = do
          updateParserState $ \st -> st { statePos = pos }
          fancyFailure xs
    Right x -> return x
+{-# INLINEABLE region #-}
+
+-- | The same as 'takeWhileP', but discards the result.
+
+skipWhileP :: MonadParsec e s m
+  => Maybe String      -- ^ Name of a single token in the row
+  -> (Token s -> Bool) -- ^ Predicate to use to test tokens
+  -> m ()
+skipWhileP l f = void (takeWhileP l f)
+{-# INLINE skipWhileP #-}
+
+-- | The same as 'takeWhile1P', but discards the result.
+
+skipWhile1P :: MonadParsec e s m
+  => Maybe String      -- ^ Name of a single token in the row
+  -> (Token s -> Bool) -- ^ Predicate to use to test tokens
+  -> m ()
+skipWhile1P l f = void (takeWhile1P l f)
+{-# INLINE skipWhile1P #-}

 ----------------------------------------------------------------------------
 -- Parser state combinators
@ -1182,8 +1273,9 @@ getPosition = NE.head . statePos <$> getParserState
 getNextTokenPosition :: forall e s m. MonadParsec e s m => m (Maybe SourcePos)
 getNextTokenPosition = do
  State {..} <- getParserState
-  let f = fst . updatePos (Proxy :: Proxy s) stateTabWidth (NE.head statePos)
-  return (f . fst <$> uncons stateInput)
+  let f = positionAt1 (Proxy :: Proxy s) (NE.head statePos)
+  return (f . fst <$> take1_ stateInput)
+{-# INLINEABLE getNextTokenPosition #-}

 -- | @setPosition pos@ sets the current source position to @pos@.
 --
@ -1220,16 +1312,16 @@ popPosition = updateParserState $ \(State s z tp w) ->

 -- | Get the number of tokens processed so far.
 --
-- @since 5.2.0
+-- @since 6.0.0

-getTokensProcessed :: MonadParsec e s m => m Word
+getTokensProcessed :: MonadParsec e s m => m Int
 getTokensProcessed = stateTokensProcessed <$> getParserState

 -- | Set the number of tokens processed so far.
 --
-- @since 5.2.0
+-- @since 6.0.0

-setTokensProcessed :: MonadParsec e s m => Word -> m ()
+setTokensProcessed :: MonadParsec e s m => Int -> m ()
 setTokensProcessed tp = updateParserState $ \(State s pos _ w) ->
  State s pos tp w

@ -1295,6 +1387,7 @@ dbg :: forall e s m a.
  -> ParsecT e s m a   -- ^ Parser that prints debugging messages
 dbg lbl p = ParsecT $ \s cok cerr eok eerr ->
  let l = dbgLog lbl :: DbgItem s e a -> String
+      unfold = streamTake 40
      cok' x s' hs = flip trace (cok x s' hs) $
        l (DbgIn (unfold (stateInput s))) ++
        l (DbgCOK (streamTake (streamDelta s s') (stateInput s)) x)
@ -1355,18 +1448,13 @@ showStream ts =
 streamDelta
  :: State s           -- ^ State of parser before consumption
  -> State s           -- ^ State of parser after consumption
-  -> Word              -- ^ Number of consumed tokens
+  -> Int               -- ^ Number of consumed tokens
 streamDelta s0 s1 = stateTokensProcessed s1 - stateTokensProcessed s0

 -- | Extract a given number of tokens from the stream.

-streamTake :: Stream s => Word -> s -> [Token s]
-streamTake n s = genericTake n (unfold s)
-
-- | A custom version of 'unfold' that matches signature of the 'uncons'
-- method in the 'Stream' type class we use.
-
-unfold :: Stream s => s -> [Token s]
-unfold s = case uncons s of
-  Nothing -> []
-  Just (t, s') -> t : unfold s'
+streamTake :: forall s. Stream s => Int -> s -> [Token s]
+streamTake n s =
+  case fst <$> takeN_ n s of
+    Nothing -> []
+    Just chunk -> chunkToTokens (Proxy :: Proxy s) chunk
--- a/Text/Megaparsec/Char.hs
+++ b/Text/Megaparsec/Char.hs
@ -11,10 +11,11 @@
 --
 -- Commonly used character parsers.

-{-# LANGUAGE CPP              #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE LambdaCase       #-}
-{-# LANGUAGE TypeFamilies     #-}
+{-# LANGUAGE CPP                 #-}
+{-# LANGUAGE FlexibleContexts    #-}
+{-# LANGUAGE LambdaCase          #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeFamilies        #-}

 module Text.Megaparsec.Char
  ( -- * Simple parsers
@ -59,8 +60,11 @@ where

 import Control.Applicative
 import Data.Char
+import Data.Function (on)
 import Data.List.NonEmpty (NonEmpty (..))
-import qualified Data.Set as E
+import Data.Proxy
+import qualified Data.CaseInsensitive as CI
+import qualified Data.Set             as E

 import Text.Megaparsec

@ -74,29 +78,29 @@ import Prelude hiding (any, elem, notElem)

 -- | Parse a newline character.

-newline :: (MonadParsec e s m, Token s ~ Char) => m Char
+newline :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 newline = char '\n'
 {-# INLINE newline #-}

 -- | Parse a carriage return character followed by a newline character.
 -- Return the sequence of characters parsed.

-crlf :: (MonadParsec e s m, Token s ~ Char) => m String
-crlf = string "\r\n"
+crlf :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s)
+crlf = string (tokensToChunk (Proxy :: Proxy s) "\r\n")
 {-# INLINE crlf #-}

 -- | Parse a CRLF (see 'crlf') or LF (see 'newline') end of line. Return the
 -- sequence of characters parsed.
--
-- > eol = (pure <$> newline) <|> crlf <?> "end of line"

-eol :: (MonadParsec e s m, Token s ~ Char) => m String
-eol = (pure <$> newline) <|> crlf <?> "end of line"
+eol :: forall e s m. (MonadParsec e s m, Token s ~ Char) => m (Tokens s)
+eol = (tokenToChunk (Proxy :: Proxy s) <$> newline)
+  <|> crlf
+  <?> "end of line"
 {-# INLINE eol #-}

 -- | Parse a tab character.

-tab :: (MonadParsec e s m, Token s ~ Char) => m Char
+tab :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 tab = char '\t'
 {-# INLINE tab #-}

@ -105,7 +109,7 @@ tab = char '\t'
 -- See also: 'skipMany' and 'spaceChar'.

 space :: (MonadParsec e s m, Token s ~ Char) => m ()
-space = skipMany spaceChar
+space = skipWhileP (Just "white space") isSpace
 {-# INLINE space #-}

 ----------------------------------------------------------------------------
@ -114,14 +118,14 @@ space = skipMany spaceChar
 -- | Parse a control character (a non-printing character of the Latin-1
 -- subset of Unicode).

-controlChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+controlChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 controlChar = satisfy isControl <?> "control character"
 {-# INLINE controlChar #-}

 -- | Parse a Unicode space character, and the control characters: tab,
 -- newline, carriage return, form feed, and vertical tab.

-spaceChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+spaceChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 spaceChar = satisfy isSpace <?> "white space"
 {-# INLINE spaceChar #-}

@ -129,20 +133,20 @@ spaceChar = satisfy isSpace <?> "white space"
 -- case is used by a small number of letter ligatures like the
 -- single-character form of Lj.

-upperChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+upperChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 upperChar = satisfy isUpper <?> "uppercase letter"
 {-# INLINE upperChar #-}

 -- | Parse a lower-case alphabetic Unicode character.

-lowerChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+lowerChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 lowerChar = satisfy isLower <?> "lowercase letter"
 {-# INLINE lowerChar #-}

 -- | Parse an alphabetic Unicode character: lower-case, upper-case, or
 -- title-case letter, or a letter of case-less scripts\/modifier letter.

-letterChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+letterChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 letterChar = satisfy isLetter <?> "letter"
 {-# INLINE letterChar #-}

@ -152,88 +156,90 @@ letterChar = satisfy isLetter <?> "letter"
 -- parser but not by 'digitChar'. Such digits may be part of identifiers but
 -- are not used by the printer and reader to represent numbers.

-alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 alphaNumChar = satisfy isAlphaNum <?> "alphanumeric character"
 {-# INLINE alphaNumChar #-}

 -- | Parse a printable Unicode character: letter, number, mark, punctuation,
 -- symbol or space.

-printChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+printChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 printChar = satisfy isPrint <?> "printable character"
 {-# INLINE printChar #-}

 -- | Parse an ASCII digit, i.e between “0” and “9”.

-digitChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+digitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 digitChar = satisfy isDigit <?> "digit"
 {-# INLINE digitChar #-}

 -- | Parse an octal digit, i.e. between “0” and “7”.

-octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 octDigitChar = satisfy isOctDigit <?> "octal digit"
 {-# INLINE octDigitChar #-}

 -- | Parse a hexadecimal digit, i.e. between “0” and “9”, or “a” and “f”, or
 -- “A” and “F”.

-hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 hexDigitChar = satisfy isHexDigit <?> "hexadecimal digit"
 {-# INLINE hexDigitChar #-}

 -- | Parse a Unicode mark character (accents and the like), which combines
 -- with preceding characters.

-markChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+markChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 markChar = satisfy isMark <?> "mark character"
 {-# INLINE markChar #-}

 -- | Parse a Unicode numeric character, including digits from various
 -- scripts, Roman numerals, etc.

-numberChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+numberChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 numberChar = satisfy isNumber <?> "numeric character"
 {-# INLINE numberChar #-}

 -- | Parse a Unicode punctuation character, including various kinds of
 -- connectors, brackets and quotes.

-punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 punctuationChar = satisfy isPunctuation <?> "punctuation"
 {-# INLINE punctuationChar #-}

 -- | Parse a Unicode symbol characters, including mathematical and currency
 -- symbols.

-symbolChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+symbolChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 symbolChar = satisfy isSymbol <?> "symbol"
 {-# INLINE symbolChar #-}

 -- | Parse a Unicode space and separator characters.

-separatorChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+separatorChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 separatorChar = satisfy isSeparator <?> "separator"
 {-# INLINE separatorChar #-}

 -- | Parse a character from the first 128 characters of the Unicode
 -- character set, corresponding to the ASCII character set.

-asciiChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+asciiChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 asciiChar = satisfy isAscii <?> "ASCII character"
 {-# INLINE asciiChar #-}

 -- | Parse a character from the first 256 characters of the Unicode
 -- character set, corresponding to the ISO 8859-1 (Latin-1) character set.

-latin1Char :: (MonadParsec e s m, Token s ~ Char) => m Char
+latin1Char :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 latin1Char = satisfy isLatin1 <?> "Latin-1 character"
 {-# INLINE latin1Char #-}

 -- | @'charCategory' cat@ parses character in Unicode General Category
 -- @cat@, see 'Data.Char.GeneralCategory'.

-charCategory :: (MonadParsec e s m, Token s ~ Char) => GeneralCategory -> m Char
+charCategory :: (MonadParsec e s m, Token s ~ Char)
+  => GeneralCategory
+  -> m (Token s)
 charCategory cat = satisfy ((== cat) . generalCategory) <?> categoryName cat
 {-# INLINE charCategory #-}

@ -279,14 +285,14 @@ categoryName = \case
 --
 -- > semicolon = char ';'

-char :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char
+char :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
 char c = token testChar (Just c)
  where
-    f x = E.singleton (Tokens (x:|[]))
+    f x = Tokens (x:|[])
    testChar x =
      if x == c
        then Right x
-        else Left (f x, f c)
+        else Left (pure (f x), E.singleton (f c))
 {-# INLINE char #-}

 -- | The same as 'char' but case-insensitive. This parser returns the
@ -299,7 +305,7 @@ char c = token testChar (Just c)
 -- unexpected 'G'
 -- expecting 'E' or 'e'

-char' :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char
+char' :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
 char' c = choice [char c, char $ swapCase c]
  where
    swapCase x
@ -310,7 +316,7 @@ char' c = choice [char c, char $ swapCase c]

 -- | This parser succeeds for any character. Returns the parsed character.

-anyChar :: (MonadParsec e s m, Token s ~ Char) => m Char
+anyChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
 anyChar = satisfy (const True) <?> "character"
 {-# INLINE anyChar #-}

@ -324,7 +330,8 @@ anyChar = satisfy (const True) <?> "character"
 --
 -- > digit = oneOf ['0'..'9'] <?> "digit"

-oneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
+oneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char)
+  => f (Token s) -> m (Token s)
 oneOf cs = satisfy (`elem` cs)
 {-# INLINE oneOf #-}

@ -333,7 +340,9 @@ oneOf cs = satisfy (`elem` cs)
 --
 -- > vowel = oneOf' "aeiou" <?> "vowel"

-oneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
+oneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char)
+  => f (Token s)
+  -> m (Token s)
 oneOf' cs = satisfy (`elemi` cs)
 {-# INLINE oneOf' #-}

@ -341,7 +350,9 @@ oneOf' cs = satisfy (`elemi` cs)
 -- /not/ in the supplied list of characters @cs@. Returns the parsed
 -- character.

-noneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
+noneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char)
+  => f (Token s)
+  -> m (Token s)
 noneOf cs = satisfy (`notElem` cs)
 {-# INLINE noneOf #-}

@ -349,7 +360,9 @@ noneOf cs = satisfy (`notElem` cs)
 --
 -- > consonant = noneOf' "aeiou" <?> "consonant"

-noneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
+noneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char)
+  => f (Token s)
+  -> m (Token s)
 noneOf' cs = satisfy (`notElemi` cs)
 {-# INLINE noneOf' #-}

@ -360,13 +373,15 @@ noneOf' cs = satisfy (`notElemi` cs)
 -- > digitChar = satisfy isDigit <?> "digit"
 -- > oneOf cs  = satisfy (`elem` cs)

-satisfy :: (MonadParsec e s m, Token s ~ Char) => (Char -> Bool) -> m Char
+satisfy :: (MonadParsec e s m, Token s ~ Char)
+  => (Token s -> Bool) -- ^ Predicate to apply
+  -> m (Token s)
 satisfy f = token testChar Nothing
  where
    testChar x =
      if f x
        then Right x
-        else Left (E.singleton (Tokens (x:|[])), E.empty)
+        else Left (pure (Tokens (x:|[])), E.empty)
 {-# INLINE satisfy #-}

 ----------------------------------------------------------------------------
@ -377,7 +392,9 @@ satisfy f = token testChar Nothing
 --
 -- > divOrMod = string "div" <|> string "mod"

-string :: (MonadParsec e s m, Token s ~ Char) => String -> m String
+string :: MonadParsec e s m
+  => Tokens s
+  -> m (Tokens s)
 string = tokens (==)
 {-# INLINE string #-}

@ -387,8 +404,10 @@ string = tokens (==)
 -- >>> parseTest (string' "foobar") "foObAr"
 -- "foObAr"

-string' :: (MonadParsec e s m, Token s ~ Char) => String -> m String
-string' = tokens casei
+string' :: (MonadParsec e s m, CI.FoldCase (Tokens s))
+  => Tokens s
+  -> m (Tokens s)
+string' = tokens ((==) `on` CI.mk)
 {-# INLINE string' #-}

 ----------------------------------------------------------------------------
--- a/Text/Megaparsec/Error.hs
+++ b/Text/Megaparsec/Error.hs
@ -39,7 +39,7 @@ import Control.Exception
 import Data.Data (Data)
 import Data.List (intercalate)
 import Data.List.NonEmpty (NonEmpty (..))
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, isNothing)
 import Data.Semigroup
 import Data.Set (Set)
 import Data.Typeable (Typeable)
@ -91,10 +91,8 @@ instance NFData a => NFData (ErrorFancy a) where
  rnf (ErrorIndentation ord ref act) = ord `seq` rnf ref `seq` rnf act
  rnf (ErrorCustom a) = rnf a

-- | 'ParseError' represents… parse errors. It provides the stack of source
-- positions, a set of expected and unexpected tokens as well as a set of
-- custom associated data. The data type is parametrized over the token type
-- @t@ and the custom data @e@.
+-- | 'ParseError' represents… parse errors. The data type is parametrized
+-- over the token type @t@ and the custom data @e@.
 --
 -- Note that the stack of source positions contains current position as its
 -- head, and the rest of positions allows to track full sequence of include
@ -109,10 +107,10 @@ instance NFData a => NFData (ErrorFancy a) where
 -- @since 6.0.0

 data ParseError t e
-  = TrivialError (NonEmpty SourcePos) (Set (ErrorItem t)) (Set (ErrorItem t))
+  = TrivialError (NonEmpty SourcePos) (Maybe (ErrorItem t)) (Set (ErrorItem t))
    -- ^ Trivial errors, generated by Megaparsec's machinery. The data
-    -- constructor includes the stack of source positions, unexpected, and
-    -- expected tokens.
+    -- constructor includes the stack of source positions, unexpected token
+    -- (if any), and expected tokens.
  | FancyError (NonEmpty SourcePos) (Set (ErrorFancy e))
    -- ^ Fancy, custom errors.
  deriving (Show, Read, Eq, Data, Typeable, Generic)
@ -124,7 +122,7 @@ instance (Ord t, Ord e) => Semigroup (ParseError t e) where
  {-# INLINE (<>) #-}

 instance (Ord t, Ord e) => Monoid (ParseError t e) where
-  mempty  = TrivialError (initialPos "" :| []) E.empty E.empty
+  mempty  = TrivialError (initialPos "" :| []) Nothing E.empty
  mappend = (<>)
  {-# INLINE mappend #-}

@ -164,12 +162,25 @@ mergeError e1 e2 =
    EQ ->
      case (e1, e2) of
        (TrivialError s1 u1 p1, TrivialError _ u2 p2) ->
-          TrivialError s1 (E.union u1 u2) (E.union p1 p2)
+          TrivialError s1 (n u1 u2) (E.union p1 p2)
        (FancyError {}, TrivialError {}) -> e1
        (TrivialError {}, FancyError {}) -> e2
        (FancyError s1 x1, FancyError _ x2) ->
          FancyError s1 (E.union x1 x2)
    GT -> e1
+  where
+    -- NOTE The logic behind this merging is that since we only combine
+    -- parse errors that happen at exactly the same position, all the
+    -- unexpected items will be prefixes of input stream at that position or
+    -- labels referring to the same thing. Our aim here is to choose the
+    -- longest prefix (merging with labels and end of input is somewhat
+    -- arbitrary, but is necessary because otherwise we can't make
+    -- ParseError lawful Monoid and have nice parse errors at the same
+    -- time).
+    n Nothing  Nothing = Nothing
+    n (Just x) Nothing = Just x
+    n Nothing (Just y) = Just y
+    n (Just x) (Just y) = Just (max x y)
 {-# INLINE mergeError #-}

 -- | Type class 'ShowToken' includes methods that allow to pretty-print
@ -257,13 +268,13 @@ parseErrorTextPretty
  => ParseError t e    -- ^ Parse error to render
  -> String            -- ^ Result of rendering
 parseErrorTextPretty (TrivialError _ us ps) =
-  if E.null us && E.null ps
+  if isNothing us && E.null ps
    then "unknown parse error\n"
-    else messageItemsPretty "unexpected " us <>
+    else messageItemsPretty "unexpected " (maybe E.empty E.singleton us) <>
         messageItemsPretty "expecting "  ps
 parseErrorTextPretty (FancyError _ xs) =
  if E.null xs
-    then "unknown parse error\n"
+    then "unknown fancy parse error\n"
    else unlines (showErrorComponent <$> E.toAscList xs)

 ----------------------------------------------------------------------------
--- a/Text/Megaparsec/Error/Builder.hs
+++ b/Text/Megaparsec/Error/Builder.hs
@ -13,7 +13,7 @@
 --
 -- @since 6.0.0

-{-# LANGUAGE BangPatterns        #-}
+{-# LANGUAGE CPP                 #-}
 {-# LANGUAGE DeriveDataTypeable  #-}
 {-# LANGUAGE DeriveGeneric       #-}
 {-# LANGUAGE ScopedTypeVariables #-}
@ -53,19 +53,28 @@ import Text.Megaparsec.Stream
 import qualified Data.List.NonEmpty as NE
 import qualified Data.Set           as E

+#if !MIN_VERSION_base(4,8,0)
+import Control.Applicative
+#endif
+
 ----------------------------------------------------------------------------
 -- Data types

 -- | Auxiliary type for construction of trivial parse errors.

-data ET t = ET (Set (ErrorItem t)) (Set (ErrorItem t))
+data ET t = ET (Maybe (ErrorItem t)) (Set (ErrorItem t))
  deriving (Eq, Ord, Data, Typeable, Generic)

 instance Ord t => Semigroup (ET t) where
-  ET us0 ps0 <> ET us1 ps1 = ET (E.union us0 us1) (E.union ps0 ps1)
+  ET us0 ps0 <> ET us1 ps1 = ET (n us0 us1) (E.union ps0 ps1)
+    where
+      n Nothing  Nothing = Nothing
+      n (Just x) Nothing = Just x
+      n Nothing (Just y) = Just y
+      n (Just x) (Just y) = Just (max x y)

 instance Ord t => Monoid (ET t) where
-  mempty  = ET E.empty E.empty
+  mempty  = ET Nothing E.empty
  mappend = (<>)

 -- | Auxiliary type for construction of fancy parse errors.
@ -113,20 +122,15 @@ posI = initialPos "" :| []
 -- | @posN n s@ returns source position achieved by applying 'updatePos'
 -- method corresponding to type of stream @s@ @n@ times.

-posN :: forall s n. (Stream s, Integral n)
-  => n
+posN :: forall s. Stream s
+  => Int
  -> s
  -> NonEmpty SourcePos
-posN n see = f (initialPos "") see n :| []
-  where
-    f p s !i =
-      if i > 0
-        then case uncons s of
-          Nothing -> p
-          Just (t,s') ->
-            let p' = snd $ updatePos (Proxy :: Proxy s) defaultTabWidth p t
-            in f p' s' (i - 1)
-        else p
+posN n s =
+  case takeN_ n s of
+    Nothing -> posI
+    Just (ts, _) ->
+      advanceN (Proxy :: Proxy s) defaultTabWidth (initialPos "") ts :| []

 ----------------------------------------------------------------------------
 -- Error components
@ -195,12 +199,12 @@ canonicalizeTokens ts =
 -- | Lift an unexpected item into 'ET'.

 unexp :: ErrorItem t -> ET t
-unexp u = ET (E.singleton u) E.empty
+unexp u = ET (pure u) E.empty

 -- | Lift an expected item into 'ET'.

 expe :: ErrorItem t -> ET t
-expe p = ET E.empty (E.singleton p)
+expe p = ET Nothing (E.singleton p)

 -- | Make a singleton non-empty list from a value.

--- a/Text/Megaparsec/Lexer.hs
+++ b/Text/Megaparsec/Lexer.hs
@ -59,7 +59,8 @@ import Data.Char (readLitChar)
 import Data.List.NonEmpty (NonEmpty (..))
 import Data.Maybe (listToMaybe, fromMaybe, isJust)
 import Data.Scientific (Scientific, toRealFloat)
-import qualified Data.Set as E
+import qualified Data.CaseInsensitive as CI
+import qualified Data.Set             as E

 import Text.Megaparsec
 import qualified Text.Megaparsec.Char as C
@ -129,17 +130,17 @@ lexeme spc p = p <* spc

 symbol :: (MonadParsec e s m, Token s ~ Char)
  => m ()              -- ^ How to consume white space after lexeme
-  -> String            -- ^ String to parse
-  -> m String
+  -> Tokens s          -- ^ String to parse
+  -> m (Tokens s)
 symbol spc = lexeme spc . C.string

 -- | Case-insensitive version of 'symbol'. This may be helpful if you're
 -- working with case-insensitive languages.

-symbol' :: (MonadParsec e s m, Token s ~ Char)
+symbol' :: (MonadParsec e s m, Token s ~ Char, CI.FoldCase (Tokens s))
  => m ()              -- ^ How to consume white space after lexeme
-  -> String            -- ^ String to parse (case-insensitive)
-  -> m String
+  -> Tokens s          -- ^ String to parse (case-insensitive)
+  -> m (Tokens s)
 symbol' spc = lexeme spc . C.string'

 -- | Given comment prefix this function returns a parser that skips line
@ -148,22 +149,22 @@ symbol' spc = lexeme spc . C.string'
 -- 'space' parser or picked up manually.

 skipLineComment :: (MonadParsec e s m, Token s ~ Char)
-  => String            -- ^ Line comment prefix
+  => Tokens s          -- ^ Line comment prefix
  -> m ()
-skipLineComment prefix = p >> void (manyTill C.anyChar n)
-  where p = C.string prefix
-        n = lookAhead (void C.newline) <|> eof
+skipLineComment prefix =
+  C.string prefix *> skipWhileP (Just "character") (/= '\n')

 -- | @skipBlockComment start end@ skips non-nested block comment starting
 -- with @start@ and ending with @end@.

 skipBlockComment :: (MonadParsec e s m, Token s ~ Char)
-  => String            -- ^ Start of block comment
-  -> String            -- ^ End of block comment
+  => Tokens s          -- ^ Start of block comment
+  -> Tokens s          -- ^ End of block comment
  -> m ()
 skipBlockComment start end = p >> void (manyTill C.anyChar n)
-  where p = C.string start
-        n = C.string end
+  where
+    p = C.string start
+    n = C.string end

 -- | @skipBlockCommentNested start end@ skips possibly nested block comment
 -- starting with @start@ and ending with @end@.
@ -171,13 +172,14 @@ skipBlockComment start end = p >> void (manyTill C.anyChar n)
 -- @since 5.0.0

 skipBlockCommentNested :: (MonadParsec e s m, Token s ~ Char)
-  => String            -- ^ Start of block comment
-  -> String            -- ^ End of block comment
+  => Tokens s          -- ^ Start of block comment
+  -> Tokens s          -- ^ End of block comment
  -> m ()
 skipBlockCommentNested start end = p >> void (manyTill e n)
-  where e = skipBlockCommentNested start end <|> void C.anyChar
-        p = C.string start
-        n = C.string end
+  where
+    e = skipBlockCommentNested start end <|> void C.anyChar
+    p = C.string start
+    n = C.string end

 ----------------------------------------------------------------------------
 -- Indentation
@ -359,14 +361,6 @@ lineFold sc action =
 -- string literals:
 --
 -- > stringLiteral = char '"' >> manyTill L.charLiteral (char '"')
--
-- If you want to write @stringLiteral@ that adheres to the Haskell report
-- though, you'll need to take care of the @\\&@ combination which is not a
-- character, but can be used to separate characters (as in @\"\\291\\&4\"@
-- which is two characters long):
--
-- > stringLiteral = catMaybes <$> (char '"' >> manyTill ch (char '"'))
-- >   where ch = (Just <$> L.charLiteral) <|> (Nothing <$ string "\\&")

 charLiteral :: (MonadParsec e s m, Token s ~ Char) => m Char
 charLiteral = label "literal character" $ do
--- a/Text/Megaparsec/Pos.hs
+++ b/Text/Megaparsec/Pos.hs
@ -14,7 +14,6 @@
 -- You probably do not want to import this module because "Text.Megaparsec"
 -- re-exports it anyway.

-{-# LANGUAGE CPP                        #-}
 {-# LANGUAGE DeriveDataTypeable         #-}
 {-# LANGUAGE DeriveGeneric              #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
@ -40,10 +39,6 @@ import Data.Semigroup
 import Data.Typeable (Typeable)
 import GHC.Generics

-#if !MIN_VERSION_base(4,8,0)
-import Data.Word (Word)
-#endif
-
 ----------------------------------------------------------------------------
 -- Abstract position

@ -54,7 +49,7 @@ import Data.Word (Word)
 --
 -- @since 5.0.0

-newtype Pos = Pos Word
+newtype Pos = Pos Int
  deriving (Show, Eq, Ord, Data, Typeable, NFData)

 -- | Construction of 'Pos' from 'Word'. The function throws
@ -62,18 +57,18 @@ newtype Pos = Pos Word
 --
 -- @since 6.0.0

-mkPos :: Word -> Pos
+mkPos :: Int -> Pos
 mkPos a =
-  if a == 0
-    then throw InvalidPosException
+  if a <= 0
+    then throw (InvalidPosException a)
    else Pos a
 {-# INLINE mkPos #-}

-- | Extract 'Word' from 'Pos'.
+-- | Extract 'Int' from 'Pos'.
 --
-- @since 5.0.0
+-- @since 6.0.0

-unPos :: Pos -> Word
+unPos :: Pos -> Int
 unPos (Pos w) = w
 {-# INLINE unPos #-}

@ -102,14 +97,14 @@ instance Read Pos where
    readParen (d > 10) $ \r1 -> do
      ("Pos", r2) <- lex r1
      (x,     r3) <- readsPrec 11 r2
-      return (mkPos (x :: Word) ,r3)
+      return (mkPos x, r3)

 -- | The exception is thrown by 'mkPos' when its argument is not a positive
 -- number.
 --
 -- @since 5.0.0

-data InvalidPosException = InvalidPosException
+data InvalidPosException = InvalidPosException Int
  -- ^ The first value is the minimal allowed value, the second value is the
  -- actual value that was passed to 'mkPos'.
  deriving (Eq, Show, Data, Typeable, Generic)
--- a/Text/Megaparsec/Stream.hs
+++ b/Text/Megaparsec/Stream.hs
@ -11,124 +11,275 @@
 --
 -- You probably do not want to import this module because "Text.Megaparsec"
 -- re-exports it anyway.
+--
+-- @since 6.0.0

+{-# LANGUAGE CPP               #-}
 {-# LANGUAGE FlexibleContexts  #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE TypeFamilies      #-}

 module Text.Megaparsec.Stream
-  ( Stream (..)
-  , defaultUpdatePos )
+  ( Stream (..) )
 where

+import Data.List (foldl')
 import Data.Proxy
 import Data.Semigroup ((<>))
+import Data.Word (Word8)
 import Text.Megaparsec.Pos
-import qualified Data.ByteString.Char8      as B
-import qualified Data.ByteString.Lazy.Char8 as BL
-import qualified Data.Text                  as T
-import qualified Data.Text.Lazy             as TL
+import qualified Data.ByteString      as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.Text            as T
+import qualified Data.Text.Lazy       as TL

-- | An instance of @Stream s@ has stream type @s@. Token type is determined
-- by the stream and can be found via 'Token' type function.
+#if !MIN_VERSION_base(4,8,0)
+import Control.Applicative
+#endif

-class Ord (Token s) => Stream s where
+-- | Type class for inputs that can be consumed by the library.

-  -- | Type of token in stream.
-  --
-  -- @since 5.0.0
+class (Ord (Token s), Ord (Tokens s)) => Stream s where
+
+  -- | Type of token in the stream.

  type Token s :: *

-  -- | Get next token from the stream. If the stream is empty, return
-  -- 'Nothing'.
+  -- | Type of “chunk” of the stream.

-  uncons :: s -> Maybe (Token s, s)
+  type Tokens s :: *

-  -- | Update position in stream given tab width, current position, and
-  -- current token. The result is a tuple where the first element will be
-  -- used to report parse errors for current token, while the second element
-  -- is the incremented position that will be stored in the parser's state.
-  -- The stored (incremented) position is used whenever position can't
-  -- be\/shouldn't be updated by consuming a token. For example, when using
-  -- 'failure', we don't grab a new token (we need to fail right were we are
-  -- now), so error position will be taken from parser's state.
+  -- | Lift a single token to chunk to the stream. The default
+  -- implementation is:
  --
-  -- When you work with streams where elements do not contain information
-  -- about their position in input, the result is usually consists of the
-  -- third argument unchanged and incremented position calculated with
-  -- respect to current token. This is how default instances of 'Stream'
-  -- work (they use 'defaultUpdatePos', which may be a good starting point
-  -- for your own position-advancing function).
+  -- > tokenToChunk pxy = tokensToChunk pxy . pure
  --
-  -- When you wish to deal with a stream of tokens where every token “knows”
-  -- its start and end position in input (for example, you have produced the
-  -- stream with Happy\/Alex), then the best strategy is to use the start
-  -- position as the actual element position and provide the end position of
-  -- the token as the incremented one.
-  --
-  -- @since 5.0.0
+  -- However for some types of stream there may be a more efficient way to
+  -- lift.

-  updatePos
-    :: Proxy s -- ^ Proxy clarifying stream type ('Token' is not injective)
+  tokenToChunk  :: Proxy s -> Token s -> Tokens s
+  tokenToChunk pxy = tokensToChunk pxy . pure
+
+  -- | The first method that establishes isomorphism between list of tokens
+  -- and chunk of the stream. Valid implementation should satisfy:
+  --
+  -- > chunkToTokens pxy (tokensToChunk pxy ts) == ts
+
+  tokensToChunk :: Proxy s -> [Token s] -> Tokens s
+
+  -- | The second method that establishes isomorphism between list of tokens
+  -- and chunk of the stream. Valid implementation should satisfy:
+  --
+  -- > tokensToChunk pxy (chunkToTokens pxy chunk) == chunk
+
+  chunkToTokens :: Proxy s -> Tokens s -> [Token s]
+
+  -- | Return length of a chunk of the stream.
+
+  chunkLength :: Proxy s -> Tokens s -> Int
+
+  -- | Check if a chunk of the stream is empty. The default implementation
+  -- is in terms of the more general 'chunkLength':
+  --
+  -- > chunkEmpty pxy ts = chunkLength pxy ts <= 0
+  --
+  -- However for many streams there may be a more efficient implementation.
+
+  chunkEmpty :: Proxy s -> Tokens s -> Bool
+  chunkEmpty pxy ts = chunkLength pxy ts <= 0
+  {-# INLINE chunkEmpty #-}
+
+  -- | Set source position for a given token. By default, the given
+  -- 'SourcePos' (second argument) is just returned without looking at the
+  -- token. This method is important when your stream is a collection of
+  -- tokens where every token knows where it begins in the original input.
+
+  positionAt1
+    :: Proxy s         -- ^ 'Proxy' clarifying the type of stream
+    -> SourcePos       -- ^ Current position
+    -> Token s         -- ^ Current token
+    -> SourcePos       -- ^ Position of the token
+  positionAt1 Proxy = defaultPositionTo
+  {-# INLINE positionAt1 #-}
+
+  -- | The same as 'positionAt1', but for chunks of the stream. The function
+  -- should return the position where the entire chunk begins. Again, by
+  -- default the second argument is returned without modifications and the
+  -- chunk is not looked at.
+
+  positionAtN
+    :: Proxy s         -- ^ 'Proxy' clarifying the type of stream
+    -> SourcePos       -- ^ Current position
+    -> Tokens s        -- ^ Current chunk
+    -> SourcePos       -- ^ Position of the chunk
+  positionAtN Proxy = defaultPositionTo
+  {-# INLINE positionAtN #-}
+
+  -- | Advance position given a single token. The returned position is the
+  -- position right after the token, or position where the token ends.
+
+  advance1
+    :: Proxy s         -- ^ 'Proxy' clarifying the type of stream
    -> Pos             -- ^ Tab width
    -> SourcePos       -- ^ Current position
    -> Token s         -- ^ Current token
-    -> (SourcePos, SourcePos) -- ^ Actual position and incremented position
+    -> SourcePos       -- ^ Advanced position
+
+  -- | Advance position given a chunk of stream. The returned position is
+  -- the position right after the chunk, or position where the chunk ends.
+
+  advanceN
+    :: Proxy s         -- ^ 'Proxy' clarifying the type of stream
+    -> Pos             -- ^ Tab width
+    -> SourcePos       -- ^ Current position
+    -> Tokens s        -- ^ Current token
+    -> SourcePos       -- ^ Advanced position
+
+  -- | Extract a single token form the stream. Return 'Nothing' if the
+  -- stream is empty.
+
+  take1_ :: s -> Maybe (Token s, s)
+
+  -- | @'takeN_' n s@ should try to extract a chunk of length @n@, or if the
+  -- stream is too short, the rest of the stream. Valid implementation
+  -- should follow the rules:
+  --
+  --     * If the requested length @n@ is 0 (or less), 'Nothing' should
+  --       never be returned, instead @'Just' (\"\", s)@ should be returned,
+  --       where @\"\"@ stands for the empty chunk, and @s@ is the original
+  --       stream (second argument).
+  --     * If the requested length is greater than 0 and the stream is
+  --       empty, 'Nothing' should be returned indicating end of input.
+  --     * In other cases, take chunk of length @n@ (or shorter if the
+  --       stream is not long enough) from the input stream and return the
+  --       chunk along with the rest of the stream.
+
+  takeN_ :: Int -> s -> Maybe (Tokens s, s)
+
+  -- | Extract chunk of the stream taking tokens while the supplied
+  -- predicate returns 'True'. Return the chunk and the rest of the stream.
+  --
+  -- For many types of streams, the method allows for significant
+  -- performance improvements, although it is not strictly necessary from
+  -- conceptual point of view.
+
+  takeWhile_ :: (Token s -> Bool) -> s -> (Tokens s, s)

 instance Stream String where
  type Token String = Char
-  uncons [] = Nothing
-  uncons (t:ts) = Just (t, ts)
-  {-# INLINE uncons #-}
-  updatePos = const defaultUpdatePos
-  {-# INLINE updatePos #-}
+  type Tokens String = String
+  tokenToChunk Proxy = pure
+  tokensToChunk Proxy = id
+  chunkToTokens Proxy = id
+  chunkLength Proxy = length
+  chunkEmpty Proxy = null
+  advance1 Proxy = defaultAdvance1
+  advanceN Proxy w = foldl' (defaultAdvance1 w)
+  take1_ [] = Nothing
+  take1_ (t:ts) = Just (t, ts)
+  takeN_ n s
+    | n <= 0    = Just ("", s)
+    | null s    = Nothing
+    | otherwise = Just (splitAt n s)
+  takeWhile_ = span

 instance Stream B.ByteString where
-  type Token B.ByteString = Char
-  uncons = B.uncons
-  {-# INLINE uncons #-}
-  updatePos = const defaultUpdatePos
-  {-# INLINE updatePos #-}
+  type Token B.ByteString = Word8
+  type Tokens B.ByteString = B.ByteString
+  tokenToChunk Proxy = B.singleton
+  tokensToChunk Proxy = B.pack
+  chunkToTokens Proxy = B.unpack
+  chunkLength Proxy = B.length
+  chunkEmpty Proxy = B.null
+  advance1 Proxy = defaultAdvance1
+  advanceN Proxy w = B.foldl' (defaultAdvance1 w)
+  take1_ = B.uncons
+  takeN_ n s
+    | n <= 0    = Just (B.empty, s)
+    | B.null s  = Nothing
+    | otherwise = Just (B.splitAt n s)
+  takeWhile_ = B.span

 instance Stream BL.ByteString where
-  type Token BL.ByteString = Char
-  uncons = BL.uncons
-  {-# INLINE uncons #-}
-  updatePos = const defaultUpdatePos
-  {-# INLINE updatePos #-}
+  type Token BL.ByteString = Word8
+  type Tokens BL.ByteString = BL.ByteString
+  tokenToChunk Proxy = BL.singleton
+  tokensToChunk Proxy = BL.pack
+  chunkToTokens Proxy = BL.unpack
+  chunkLength Proxy = fromIntegral . BL.length
+  chunkEmpty Proxy = BL.null
+  advance1 Proxy = defaultAdvance1
+  advanceN Proxy w = BL.foldl' (defaultAdvance1 w)
+  take1_ = BL.uncons
+  takeN_ n s
+    | n <= 0    = Just (BL.empty, s)
+    | BL.null s = Nothing
+    | otherwise = Just (BL.splitAt (fromIntegral n) s)
+  takeWhile_ = BL.span

 instance Stream T.Text where
  type Token T.Text = Char
-  uncons = T.uncons
-  {-# INLINE uncons #-}
-  updatePos = const defaultUpdatePos
-  {-# INLINE updatePos #-}
+  type Tokens T.Text = T.Text
+  tokenToChunk Proxy = T.singleton
+  tokensToChunk Proxy = T.pack
+  chunkToTokens Proxy = T.unpack
+  chunkLength Proxy = T.length
+  chunkEmpty Proxy = T.null
+  advance1 Proxy = defaultAdvance1
+  advanceN Proxy w = T.foldl' (defaultAdvance1 w)
+  take1_ = T.uncons
+  takeN_ n s
+    | n <= 0    = Just (T.empty, s)
+    | T.null s  = Nothing
+    | otherwise = Just (T.splitAt n s)
+  takeWhile_ = T.span

 instance Stream TL.Text where
-  type Token TL.Text = Char
-  uncons = TL.uncons
-  {-# INLINE uncons #-}
-  updatePos = const defaultUpdatePos
-  {-# INLINE updatePos #-}
+  type Token TL.Text  = Char
+  type Tokens TL.Text = TL.Text
+  tokenToChunk Proxy = TL.singleton
+  tokensToChunk Proxy = TL.pack
+  chunkToTokens Proxy = TL.unpack
+  chunkLength Proxy = fromIntegral . TL.length
+  chunkEmpty Proxy = TL.null
+  advance1 Proxy = defaultAdvance1
+  advanceN Proxy w = TL.foldl' (defaultAdvance1 w)
+  take1_ = TL.uncons
+  takeN_ n s
+    | n <= 0    = Just (TL.empty, s)
+    | TL.null s = Nothing
+    | otherwise = Just (TL.splitAt (fromIntegral n) s)
+  takeWhile_ = TL.span

-- | Update a source position given a character. The first argument
-- specifies the tab width. If the character is a newline (\'\\n\') the line
-- number is incremented by 1. If the character is a tab (\'\\t\') the
-- column number is incremented to the nearest tab position. In all other
-- cases, the column is incremented by 1.
+----------------------------------------------------------------------------
+-- Helpers

-defaultUpdatePos
-  :: Pos               -- ^ Tab width
+-- | Default positioning function designed to work with simple streams where
+-- tokens do not contain info about their position in the stream. Thus it
+-- just returns the given 'SourcePos' without re-positioning.
+
+defaultPositionTo :: SourcePos -> a -> SourcePos
+defaultPositionTo pos _ = pos
+{-# INLINE defaultPositionTo #-}
+
+-- | Update a source position given a token. The first argument specifies
+-- the tab width. If the character is a newline (\'\\n\') the line number is
+-- incremented by 1 and column number is reset to 1. If the character is a
+-- tab (\'\\t\') the column number is incremented to the nearest tab
+-- position. In all other cases, the column is incremented by 1.
+
+defaultAdvance1 :: Enum t
+  => Pos               -- ^ Tab width
  -> SourcePos         -- ^ Current position
-  -> Char              -- ^ Current token
-  -> (SourcePos, SourcePos) -- ^ Actual position and incremented position
-defaultUpdatePos width apos@(SourcePos n l c) ch = (apos, npos)
+  -> t                 -- ^ Current token
+  -> SourcePos         -- ^ Incremented position
+defaultAdvance1 width (SourcePos n l c) t = npos
  where
    w  = unPos width
    c' = unPos c
    npos =
-      case ch of
-        '\n' -> SourcePos n (l <> pos1) pos1
-        '\t' -> SourcePos n l (mkPos $ c' + w - ((c' - 1) `rem` w))
-        _    -> SourcePos n l (c <> pos1)
+      case fromEnum t of
+        10 -> SourcePos n (l <> pos1) pos1
+        9  -> SourcePos n l (mkPos $ c' + w - ((c' - 1) `rem` w))
+        _   -> SourcePos n l (c <> pos1)
+{-# INLINE defaultAdvance1 #-}
--- a/bench/memory/Main.hs
+++ b/bench/memory/Main.hs
@ -1,15 +1,20 @@
+{-# LANGUAGE OverloadedStrings #-}
+
 module Main (main) where

 import Control.DeepSeq
 import Control.Monad
+import Data.Semigroup ((<>))
+import Data.Text (Text)
 import Data.Void
 import Text.Megaparsec
 import Text.Megaparsec.Char
 import Weigh
+import qualified Data.Text as T

 -- | The type of parser that consumes 'String's.

-type Parser = Parsec Void String
+type Parser = Parsec Void Text

 main :: IO ()
 main = mainWith $ do
@ -18,7 +23,7 @@ main = mainWith $ do
  bparser "string'"  manyAs (string' . fst)
  bparser "many"     manyAs (const $ many (char 'a'))
  bparser "some"     manyAs (const $ some (char 'a'))
-  bparser "choice"   (const "b") (choice . fmap char . manyAsB . snd)
+  bparser "choice"   (const "b") (choice . fmap char . manyAsB' . snd)
  bparser "count"    manyAs (\(_,n) -> count n (char 'a'))
  bparser "count'"   manyAs (\(_,n) -> count' 1 n (char 'a'))
  bparser "endBy"    manyAbs' (const $ endBy (char 'a') (char 'b'))
@ -33,13 +38,15 @@ main = mainWith $ do
  bparser "skipSome" manyAs (const $ skipSome (char 'a'))
  bparser "skipManyTill" manyAsB (const $ skipManyTill (char 'a') (char 'b'))
  bparser "skipSomeTill" manyAsB (const $ skipSomeTill (char 'a') (char 'b'))
+  bparser "takeWhileP" manyAs (const $ takeWhileP Nothing (== 'a'))
+  bparser "takeWhile1P" manyAs (const $ takeWhile1P Nothing (== 'a'))

 -- | Perform a series of measurements with the same parser.

 bparser :: NFData a
  => String            -- ^ Name of the benchmark group
-  -> (Int -> String)   -- ^ How to construct input
-  -> ((String, Int) -> Parser a) -- ^ The parser receiving its future input
+  -> (Int -> Text)     -- ^ How to construct input
+  -> ((Text, Int) -> Parser a) -- ^ The parser receiving its future input
  -> Weigh ()
 bparser name f p = forM_ stdSeries $ \i -> do
  let arg      = (f i,i)
@ -56,20 +63,25 @@ stdSeries = [500,1000,2000,4000]

 -- | Generate that many \'a\' characters.

-manyAs :: Int -> String
-manyAs n = replicate n 'a'
+manyAs :: Int -> Text
+manyAs n = T.replicate n "a"

 -- | Like 'manyAs', but interspersed with \'b\'s.

-manyAbs :: Int -> String
-manyAbs n = take (if even n then n + 1 else n) (cycle "ab")
+manyAbs :: Int -> Text
+manyAbs n = T.take (if even n then n + 1 else n) (T.replicate n "ab")

 -- | Like 'manyAs', but with a \'b\' added to the end.

-manyAsB :: Int -> String
-manyAsB n = replicate n 'a' ++ "b"
+manyAsB :: Int -> Text
+manyAsB n = manyAs n <> "b"
+
+-- | Like 'manyAsB', but returns a 'String'.
+
+manyAsB' :: Int -> String
+manyAsB' n = replicate n 'a' ++ "b"

 -- | Like 'manyAbs', but ends in a \'b\'.

-manyAbs' :: Int -> String
-manyAbs' n = take (if even n then n else n + 1) (cycle "ab")
+manyAbs' :: Int -> Text
+manyAbs' n = T.take (if even n then n else n + 1) (T.replicate n "ab")
--- a/bench/speed/Main.hs
+++ b/bench/speed/Main.hs
@ -1,14 +1,19 @@
+{-# LANGUAGE OverloadedStrings #-}
+
 module Main (main) where

 import Control.DeepSeq
 import Criterion.Main
+import Data.Semigroup ((<>))
+import Data.Text (Text)
 import Data.Void
 import Text.Megaparsec
 import Text.Megaparsec.Char
+import qualified Data.Text as T

 -- | The type of parser that consumes 'String's.

-type Parser = Parsec Void String
+type Parser = Parsec Void Text

 main :: IO ()
 main = defaultMain
@ -16,7 +21,7 @@ main = defaultMain
  , bparser "string'"  manyAs (string' . fst)
  , bparser "many"     manyAs (const $ many (char 'a'))
  , bparser "some"     manyAs (const $ some (char 'a'))
-  , bparser "choice"   (const "b") (choice . fmap char . manyAsB . snd)
+  , bparser "choice"   (const "b") (choice . fmap char . manyAsB' . snd)
  , bparser "count"    manyAs (\(_,n) -> count n (char 'a'))
  , bparser "count'"   manyAs (\(_,n) -> count' 1 n (char 'a'))
  , bparser "endBy"    manyAbs' (const $ endBy (char 'a') (char 'b'))
@ -31,14 +36,16 @@ main = defaultMain
  , bparser "skipSome" manyAs (const $ skipSome (char 'a'))
  , bparser "skipManyTill" manyAsB (const $ skipManyTill (char 'a') (char 'b'))
  , bparser "skipSomeTill" manyAsB (const $ skipSomeTill (char 'a') (char 'b'))
+  , bparser "takeWhileP" manyAs (const $ takeWhileP Nothing (== 'a'))
+  , bparser "takeWhile1P" manyAs (const $ takeWhile1P Nothing (== 'a'))
  ]

 -- | Perform a series to measurements with the same parser.

 bparser :: NFData a
  => String            -- ^ Name of the benchmark group
-  -> (Int -> String)   -- ^ How to construct input
-  -> ((String, Int) -> Parser a) -- ^ The parser receiving its future input
+  -> (Int -> Text)     -- ^ How to construct input
+  -> ((Text, Int) -> Parser a) -- ^ The parser receiving its future input
  -> Benchmark         -- ^ The benchmark
 bparser name f p = bgroup name (bs <$> stdSeries)
  where
@ -55,20 +62,25 @@ stdSeries = [500,1000,2000,4000]

 -- | Generate that many \'a\' characters.

-manyAs :: Int -> String
-manyAs n = replicate n 'a'
+manyAs :: Int -> Text
+manyAs n = T.replicate n "a"
+
+-- | Like 'manyAs', but interspersed with \'b\'s.
+
+manyAbs :: Int -> Text
+manyAbs n = T.take (if even n then n + 1 else n) (T.replicate n "ab")

 -- | Like 'manyAs', but with a \'b\' added to the end.

-manyAsB :: Int -> String
-manyAsB n = replicate n 'a' ++ "b"
+manyAsB :: Int -> Text
+manyAsB n = manyAs n <> "b"

-- | Like 'manyAs', but interspersed with \'b\'s and ends in a \'a\'.
+-- | Like 'manyAsB', but returns a 'String'.

-manyAbs :: Int -> String
-manyAbs n = take (if even n then n + 1 else n) (cycle "ab")
+manyAsB' :: Int -> String
+manyAsB' n = replicate n 'a' ++ "b"

 -- | Like 'manyAbs', but ends in a \'b\'.

-manyAbs' :: Int -> String
-manyAbs' n = take (if even n then n else n + 1) (cycle "ab")
+manyAbs' :: Int -> Text
+manyAbs' n = T.take (if even n then n else n + 1) (T.replicate n "ab")
--- a/megaparsec.cabal
+++ b/megaparsec.cabal
@ -35,6 +35,7 @@ flag dev
 library
  build-depends:      base         >= 4.7   && < 5.0
                    , bytestring   >= 0.2   && < 0.11
+                    , case-insensitive >= 1.2 && < 1.3
                    , containers   >= 0.5   && < 0.6
                    , deepseq      >= 1.3   && < 1.5
                    , mtl          >= 2.0   && < 3.0
@ -79,6 +80,7 @@ test-suite tests
                    , Text.Megaparsec.LexerSpec
                    , Text.Megaparsec.PermSpec
                    , Text.Megaparsec.PosSpec
+                    , Text.Megaparsec.StreamSpec
                    , Text.MegaparsecSpec
  build-depends:      QuickCheck   >= 2.7   && < 2.11
                    , base         >= 4.7   && < 5.0
@ -91,9 +93,8 @@ test-suite tests
                    , scientific   >= 0.3.1 && < 0.4
                    , text         >= 0.2   && < 1.3
                    , transformers >= 0.4   && < 0.6
-
  if !impl(ghc >= 8.0)
-    build-depends:    semigroups     == 0.18.*
+    build-depends:    semigroups   == 0.18.*
  if !impl(ghc >= 7.10)
    build-depends:    void         == 0.7.*
  default-language:   Haskell2010
@ -106,6 +107,9 @@ benchmark bench-speed
                    , criterion    >= 0.6.2.1 && < 1.3
                    , deepseq      >= 1.3  && < 1.5
                    , megaparsec
+                    , text         >= 0.2  && < 1.3
+  if !impl(ghc >= 8.0)
+    build-depends:    semigroups   == 0.18.*
  if !impl(ghc >= 7.10)
    build-depends:    void         == 0.7.*
  if flag(dev)
@ -121,7 +125,10 @@ benchmark bench-memory
  build-depends:      base         >= 4.7  && < 5.0
                    , deepseq      >= 1.3  && < 1.5
                    , megaparsec
+                    , text         >= 0.2  && < 1.3
                    , weigh        >= 0.0.4
+  if !impl(ghc >= 8.0)
+    build-depends:    semigroups   == 0.18.*
  if !impl(ghc >= 7.10)
    build-depends:    void         == 0.7.*
  if flag(dev)
--- a/tests/Control/Applicative/CombinatorsSpec.hs
+++ b/tests/Control/Applicative/CombinatorsSpec.hs
@ -26,7 +26,9 @@ spec = do
    if b > 0
      then prs_ p s `shouldFailWith` err (posN (length pre + n + b) s)
        ( etoks post <> etok c <>
-          (if length post == b then ueof else utoks [post !! b]) )
+          if length post == b
+            then ueof
+            else utoks (drop b post) )
      else prs_ p s `shouldParse` z

  describe "choice" . it "works" . property $ \cs' s' -> do
--- a/tests/Test/Hspec/Megaparsec/AdHoc.hs
+++ b/tests/Test/Hspec/Megaparsec/AdHoc.hs
@ -23,8 +23,8 @@ where
 import Control.Monad
 import Control.Monad.Reader
 import Control.Monad.Trans.Identity
-import Data.Foldable (foldl')
 import Data.List.NonEmpty (NonEmpty (..))
+import Data.Proxy
 import Data.Void
 import Test.Hspec
 import Test.Hspec.Megaparsec
@ -36,8 +36,12 @@ import qualified Control.Monad.State.Lazy    as L
 import qualified Control.Monad.State.Strict  as S
 import qualified Control.Monad.Writer.Lazy   as L
 import qualified Control.Monad.Writer.Strict as S
+import qualified Data.ByteString             as B
+import qualified Data.ByteString.Lazy        as BL
 import qualified Data.List.NonEmpty          as NE
 import qualified Data.Set                    as E
+import qualified Data.Text                   as T
+import qualified Data.Text.Lazy              as TL

 #if !MIN_VERSION_base(4,8,0)
 import Control.Applicative
@ -138,8 +142,7 @@ updatePosString
  -> SourcePos         -- ^ Initial position
  -> String            -- ^ 'String' — collection of tokens to process
  -> SourcePos         -- ^ Final position
-updatePosString w = foldl' f
-  where f p t = snd (defaultUpdatePos w p t)
+updatePosString = advanceN (Proxy :: Proxy String)

 -- | Make a singleton non-empty list from a value.

@ -206,7 +209,7 @@ instance (Arbitrary t, Ord t, Arbitrary e, Ord e)
  arbitrary = oneof
    [ TrivialError
      <$> (NE.fromList . getNonEmpty <$> arbitrary)
-      <*> (E.fromList <$> arbitrary)
+      <*> arbitrary
      <*> (E.fromList <$> arbitrary)
    , FancyError
      <$> (NE.fromList . getNonEmpty <$> arbitrary)
@ -218,3 +221,15 @@ instance Arbitrary a => Arbitrary (State a) where
    <*> (NE.fromList . getNonEmpty <$> arbitrary)
    <*> choose (1, 10000)
    <*> (mkPos <$> choose (1, 20))
+
+instance Arbitrary T.Text where
+  arbitrary = T.pack <$> arbitrary
+
+instance Arbitrary TL.Text where
+  arbitrary = TL.pack <$> arbitrary
+
+instance Arbitrary B.ByteString where
+  arbitrary = B.pack <$> arbitrary
+
+instance Arbitrary BL.ByteString where
+  arbitrary = BL.pack <$> arbitrary
--- a/tests/Text/Megaparsec/CharSpec.hs
+++ b/tests/Text/Megaparsec/CharSpec.hs
@ -48,8 +48,7 @@ spec = do
      it "signals correct parse error" $
        property $ \ch -> ch /= '\n' ==> do
          let s = ['\r',ch]
-          prs eol s `shouldFailWith` err posI
-            (utoks s <> utok '\r' <> elabel "end of line")
+          prs eol s `shouldFailWith` err posI (utoks s <> elabel "end of line")
    context "when input stream is '\\r'" $
      it "signals correct parse error" $
        prs eol "\r" `shouldFailWith` err posI
@ -59,7 +58,7 @@ spec = do
        property $ \ch s -> (ch `notElem` "\r\n") ==> do
          let s' = ch : s
          prs eol s' `shouldFailWith` err posI
-            (utok ch <> elabel "end of line")
+            (utoks (take 2 s') <> elabel "end of line")
    context "when stream is empty" $
      it "signals correct parse error" $
        prs eol "" `shouldFailWith` err posI
@ -317,9 +316,9 @@ spec = do
    context "when stream is not prefixed with given string" $
      it "signals correct parse error" $
        property $ \str s -> not (str `isPrefixOf` s) ==> do
-          let n = length (takeWhile (uncurry (==)) (zip str s)) + 1
-              common = take n s
-          prs  (string str) s `shouldFailWith` err posI (utoks common <> etoks str)
+          let us = take (length str) s
+          prs (string str) s `shouldFailWith`
+            err posI (utoks us <> etoks str)

  describe "string'" $ do
    context "when stream is prefixed with given string" $
@ -332,9 +331,9 @@ spec = do
    context "when stream is not prefixed with given string" $
      it "signals correct parse error" $
        property $ \str s -> not (str `isPrefixOfI` s) ==> do
-          let n = length (takeWhile (uncurry casei) (zip str s)) + 1
-              common = take n s
-          prs  (string' str) s `shouldFailWith` err posI (utoks common <> etoks str)
+          let us = take (length str) s
+          prs  (string' str) s `shouldFailWith`
+            err posI (utoks us <> etoks str)

 ----------------------------------------------------------------------------
 -- Helpers
@ -351,7 +350,8 @@ checkStrLit name ts p = do
    it "signals correct parse error" $
      property $ \ch s -> ch /= head ts ==> do
       let s' = ch : s
-       prs  p s' `shouldFailWith` err posI (utok ch <> etoks ts)
+           us = take (length ts) s'
+       prs  p s' `shouldFailWith` err posI (utoks us <> etoks ts)
       prs' p s' `failsLeaving`   s'
  context "when stream is empty" $
    it "signals correct parse error" $
--- a/tests/Text/Megaparsec/ErrorSpec.hs
+++ b/tests/Text/Megaparsec/ErrorSpec.hs
@ -11,6 +11,7 @@ import Test.Hspec
 import Test.Hspec.Megaparsec.AdHoc ()
 import Test.QuickCheck
 import Text.Megaparsec.Error
+import Text.Megaparsec.Error.Builder
 import Text.Megaparsec.Pos
 import qualified Data.List.NonEmpty as NE
 import qualified Data.Semigroup     as S
@ -54,10 +55,14 @@ spec = do
      property $ \x y ->
        errorPos (x <> y :: PE) === max (errorPos x) (errorPos y)
    context "when combining two trivial parse errors at the same position" $
-      it "merges their unexpected and expected items" $
+      it "merges their unexpected and expected items" $ do
+        let n Nothing  Nothing = Nothing
+            n (Just x) Nothing = Just x
+            n Nothing (Just y) = Just y
+            n (Just x) (Just y) = Just (max x y)
        property $ \pos us0 ps0 us1 ps1 ->
          TrivialError pos us0 ps0 <> TrivialError pos us1 ps1 `shouldBe`
-            (TrivialError pos (E.union us0 us1) (E.union ps0 ps1) :: PE)
+            (TrivialError pos (n us0 us1) (E.union ps0 ps1) :: PE)
    context "when combining two fancy parse errors at the same position" $
      it "merges their custom items" $
        property $ \pos xs0 xs1 ->
@ -178,7 +183,7 @@ spec = do
      property (contains errorPos sourcePosPretty)
    it "result contains representation of unexpected items" $ do
      let f (TrivialError _ us _) = us
-          f _                     = E.empty
+          f _                     = Nothing
      property (contains f showErrorComponent)
    it "result contains representation of expected items" $ do
      let f (TrivialError _ _ ps) = ps
@ -188,6 +193,11 @@ spec = do
      let f (FancyError _ xs) = xs
          f _                 = E.empty
      property (contains f showErrorComponent)
+    it "several fancy errors look not so bad" $ do
+      let pe :: PE
+          pe = errFancy posI $
+            mempty <> fancy (ErrorFail "foo") <> fancy (ErrorFail "bar")
+      parseErrorPretty pe `shouldBe` "1:1:\nbar\nfoo\n"

  describe "sourcePosStackPretty" $
    it "result never ends with a newline " $
@ -196,8 +206,12 @@ spec = do
        in sourcePosStackPretty pos `shouldNotSatisfy` ("\n" `isSuffixOf`)

  describe "parseErrorTextPretty" $ do
-    it "shows unknown ParseError correctly" $
-      parseErrorTextPretty (mempty :: PE) `shouldBe` "unknown parse error\n"
+    it "shows trivial unknown ParseError correctly" $
+      parseErrorTextPretty (mempty :: PE)
+        `shouldBe` "unknown parse error\n"
+    it "shows fancy unknown ParseError correctly" $
+      parseErrorTextPretty (FancyError posI E.empty :: PE)
+        `shouldBe` "unknown fancy parse error\n"
    it "result always ends with a newline" $
      property $ \x ->
        parseErrorTextPretty (x :: PE)
--- a/tests/Text/Megaparsec/ExprSpec.hs
+++ b/tests/Text/Megaparsec/ExprSpec.hs
@ -32,9 +32,8 @@ spec =
    context "when term is missing" $
      it "signals correct parse error" $ do
        let p = expr <* eof
-            n = 1 :: Integer
-        prs p "-" `shouldFailWith` err (posN n "-") (ueof <> elabel "term")
-        prs p "(" `shouldFailWith` err (posN n "(") (ueof <> elabel "term")
+        prs p "-" `shouldFailWith` err (posN 1 "-") (ueof <> elabel "term")
+        prs p "(" `shouldFailWith` err (posN 1 "(") (ueof <> elabel "term")
        prs p "*" `shouldFailWith` err posI (utok '*' <> elabel "term")
    context "operator is missing" $
      it "signals correct parse error" $
@ -123,16 +122,16 @@ arbitraryN2 n = elements [Sum,Sub,Pro,Div,Exp] <*> leaf <*> leaf
 -- Some helpers are put here since we don't want to depend on
 -- "Text.Megaparsec.Lexer".

-lexeme :: (MonadParsec e s m, Token s ~ Char) => m a -> m a
+lexeme :: Parser a -> Parser a
 lexeme p = p <* hidden space

-symbol :: (MonadParsec e s m, Token s ~ Char) => String -> m String
+symbol :: String -> Parser String
 symbol = lexeme . string

-parens :: (MonadParsec e s m, Token s ~ Char) => m a -> m a
+parens :: Parser a -> Parser a
 parens = between (symbol "(") (symbol ")")

-integer :: (MonadParsec e s m, Token s ~ Char) => m Integer
+integer :: Parser Integer
 integer = lexeme (read <$> some digitChar <?> "integer")

 -- Here we use a table of operators that makes use of all features of
@ -140,18 +139,19 @@ integer = lexeme (read <$> some digitChar <?> "integer")
 -- but valid expressions and render them to get their textual
 -- representation.

-expr :: (MonadParsec e s m, Token s ~ Char) => m Node
+expr :: Parser Node
 expr = makeExprParser term table

-term :: (MonadParsec e s m, Token s ~ Char) => m Node
+term :: Parser Node
 term = parens expr <|> (Val <$> integer) <?> "term"

-table :: (MonadParsec e s m, Token s ~ Char) => [[Operator m Node]]
-table = [ [ Prefix  (symbol "-" *> pure Neg)
-          , Postfix (symbol "!" *> pure Fac)
-          , InfixN  (symbol "%" *> pure Mod) ]
-        , [ InfixR  (symbol "^" *> pure Exp) ]
-        , [ InfixL  (symbol "*" *> pure Pro)
-          , InfixL  (symbol "/" *> pure Div) ]
-        , [ InfixL  (symbol "+" *> pure Sum)
-          , InfixL  (symbol "-" *> pure Sub)] ]
+table :: [[Operator Parser Node]]
+table =
+  [ [ Prefix  (symbol "-" *> pure Neg)
+    , Postfix (symbol "!" *> pure Fac)
+    , InfixN  (symbol "%" *> pure Mod) ]
+  , [ InfixR  (symbol "^" *> pure Exp) ]
+  , [ InfixL  (symbol "*" *> pure Pro)
+    , InfixL  (symbol "/" *> pure Div) ]
+  , [ InfixL  (symbol "+" *> pure Sum)
+    , InfixL  (symbol "-" *> pure Sub)] ]
--- a/tests/Text/Megaparsec/LexerSpec.hs
+++ b/tests/Text/Megaparsec/LexerSpec.hs
@ -144,7 +144,7 @@ spec = do
             | col2 <= col1 -> prs p s `shouldFailWith`
               errFancy (posN (getIndent l2 + g 2) s) (ii GT col1 col2)
             | col3 == col2 -> prs p s `shouldFailWith`
-               err (posN (getIndent l3 + g 3) s) (utok (head sblb) <> etoks sblc <> eeof)
+               err (posN (getIndent l3 + g 3) s) (utoks sblb <> etoks sblc <> eeof)
             | col3 <= col0 -> prs p s `shouldFailWith`
               err (posN (getIndent l3 + g 3) s) (utok (head sblb) <> eeof)
             | col3 < col1 -> prs p s `shouldFailWith`
--- a/tests/Text/Megaparsec/PosSpec.hs
+++ b/tests/Text/Megaparsec/PosSpec.hs
@ -5,18 +5,18 @@ import Data.Function (on)
 import Data.List (isInfixOf)
 import Data.Semigroup ((<>))
 import Test.Hspec
-import Test.Hspec.Megaparsec.AdHoc
+import Test.Hspec.Megaparsec.AdHoc ()
 import Test.QuickCheck
 import Text.Megaparsec.Pos
-import Text.Megaparsec.Stream (defaultUpdatePos)

 spec :: Spec
 spec = do

  describe "mkPos" $ do
-    context "when the argument is 0" $
+    context "when the argument is a non-positive number" $
      it "throws InvalidPosException" $
-        evaluate (mkPos 0) `shouldThrow` (== InvalidPosException)
+        property $ \n -> n <= 0 ==>
+          evaluate (mkPos n) `shouldThrow` (== InvalidPosException n)
    context "when the argument is not 0" $
      it "returns Pos with the given value" $
        property $ \n ->
@ -61,28 +61,3 @@ spec = do
    it "displays column number" $
      property $ \x ->
        (show . unPos . sourceColumn) x `isInfixOf` sourcePosPretty x
-
-  describe "defaultUpdatePos" $ do
-    it "returns actual position unchanged" $
-      property $ \w pos ch ->
-        fst (defaultUpdatePos w pos ch) === pos
-    it "does not change file name" $
-      property $ \w pos ch ->
-        (sourceName . snd) (defaultUpdatePos w pos ch) === sourceName pos
-    context "when given newline character" $
-      it "increments line number" $
-        property $ \w pos ->
-          (sourceLine . snd) (defaultUpdatePos w pos '\n')
-            === (sourceLine pos <> pos1)
-    context "when given tab character" $
-      it "shits column number to next tab position" $
-        property $ \w pos ->
-          let c  = sourceColumn pos
-              c' = (sourceColumn . snd) (defaultUpdatePos w pos '\t')
-          in c' > c .&&. (((unPos c' - 1) `rem` unPos w) == 0)
-    context "when given character other than newline or tab" $
-      it "increments column number by one" $
-        property $ \w pos ch ->
-          (ch /= '\n' && ch /= '\t') ==>
-          (sourceColumn . snd) (defaultUpdatePos w pos ch)
-            === (sourceColumn pos <> pos1)
--- a/tests/Text/Megaparsec/StreamSpec.hs
+++ b/tests/Text/Megaparsec/StreamSpec.hs
@ -0,0 +1,414 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Text.Megaparsec.StreamSpec (spec) where
+
+import Data.Char (isLetter, chr)
+import Data.Proxy
+import Data.Semigroup ((<>))
+import Test.Hspec
+import Test.Hspec.Megaparsec.AdHoc
+import Test.QuickCheck
+import Text.Megaparsec
+import qualified Data.ByteString      as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.Text            as T
+import qualified Data.Text.Lazy       as TL
+
+spec :: Spec
+spec = do
+
+  describe "String instance of Stream" $ do
+    describe "tokenToChunk" $
+      it "produces the same result as singleton with tokensToChunk" $
+        property $ \ch ->
+          tokenToChunk sproxy ch === tokensToChunk sproxy [ch]
+    describe "tokensToChunk" $
+      it "list of tokens is isomorphic to chunk" $
+        property $ \ts ->
+          chunkToTokens sproxy (tokensToChunk sproxy ts) === ts
+    describe "chunkToTokens" $
+      it "chunk is isomorphic to list of tokens" $
+        property $ \chunk ->
+          tokensToChunk sproxy (chunkToTokens sproxy chunk) === chunk
+    describe "chunkLength" $
+      it "returns correct length of given chunk" $
+        property $ \chunk ->
+          chunkLength sproxy chunk === length chunk
+    describe "chunkEmpty" $
+      it "only true when chunkLength returns 0" $
+        property $ \chunk ->
+          chunkEmpty sproxy chunk === (chunkLength sproxy chunk <= 0)
+    describe "positionAt1" $
+      it "just returns the given position" $
+        property $ \pos t ->
+          positionAt1 sproxy pos t === pos
+    describe "positionAtN" $
+      it "just returns the given position" $
+        property $ \pos chunk ->
+          positionAtN sproxy pos chunk === pos
+    describe "advance1" $ do
+      context "when given newline" $
+        it "works correctly" $
+          property $ \w pos@(SourcePos n l _) ->
+            advance1 sproxy w pos '\n' === SourcePos n (l <> pos1) pos1
+      context "when given tab" $
+        it "works correctly" $
+          property $ \w pos@(SourcePos n l c) ->
+            advance1 sproxy w pos '\t' === SourcePos n l (toNextTab w c)
+      context "when given other character" $
+        it "works correctly" $
+          property $ \ch w pos@(SourcePos n l c) ->
+            (ch /= '\n' && ch /= '\t') ==>
+              advance1 sproxy w pos ch === SourcePos n l (c <> pos1)
+    describe "advanceN" $
+      it "works correctly" $
+        advanceN sproxy defaultTabWidth (initialPos "") "something\n\tfoo"
+          === SourcePos "" (mkPos 2) (mkPos 12)
+    describe "take1_" $ do
+      context "when input in empty" $
+        it "returns Nothing" $
+          take1_ ("" :: String) === Nothing
+      context "when input is not empty" $
+        it "unconses a token" $
+          property $ \s -> not (null s) ==>
+            take1_ (s :: String) === Just (head s, tail s)
+    describe "takeN_" $ do
+      context "requested length is 0" $
+        it "returns Just empty chunk and original stream" $
+          property $ \s ->
+            takeN_ 0 (s :: String) === Just ("", s)
+      context "requested length is greater than 0" $ do
+        context "stream is empty" $
+          it "returns Nothing" $
+            property $ \(Positive n) ->
+              takeN_ n ("" :: String) === Nothing
+        context "stream is not empty" $
+          it "returns a chunk of correct length and rest of the stream" $
+            property $ \(Positive n) s -> not (null s) ==>
+              takeN_ n (s :: String) === Just (splitAt n s)
+    describe "takeWhile_" $
+      it "extracts a chunk that is a prefix consisting of matching tokens" $
+        property $ \s ->
+          takeWhile_ isLetter s === span isLetter s
+
+  describe "ByteString instance of Stream" $ do
+    describe "tokenToChunk" $
+      it "produces the same result as singleton with tokensToChunk" $
+        property $ \ch ->
+          tokenToChunk bproxy ch === tokensToChunk bproxy [ch]
+    describe "tokensToChunk" $
+      it "list of tokens is isomorphic to chunk" $
+        property $ \ts ->
+          chunkToTokens bproxy (tokensToChunk bproxy ts) === ts
+    describe "chunkToTokens" $
+      it "chunk is isomorphic to list of tokens" $
+        property $ \chunk ->
+          tokensToChunk bproxy (chunkToTokens bproxy chunk) === chunk
+    describe "chunkLength" $
+      it "returns correct length of given chunk" $
+        property $ \chunk ->
+          chunkLength bproxy chunk === B.length chunk
+    describe "chunkEmpty" $
+      it "only true when chunkLength returns 0" $
+        property $ \chunk ->
+          chunkEmpty bproxy chunk === (chunkLength bproxy chunk <= 0)
+    describe "positionAt1" $
+      it "just returns the given position" $
+        property $ \pos t ->
+          positionAt1 bproxy pos t === pos
+    describe "positionAtN" $
+      it "just returns the given position" $
+        property $ \pos chunk ->
+          positionAtN bproxy pos chunk === pos
+    describe "advance1" $ do
+      context "when given newline" $
+        it "works correctly" $
+          property $ \w pos@(SourcePos n l _) ->
+            advance1 bproxy w pos 10 === SourcePos n (l <> pos1) pos1
+      context "when given tab" $
+        it "works correctly" $
+          property $ \w pos@(SourcePos n l c) ->
+            advance1 bproxy w pos 9 === SourcePos n l (toNextTab w c)
+      context "when given other character" $
+        it "works correctly" $
+          property $ \ch w pos@(SourcePos n l c) ->
+            (ch /= 10 && ch /= 9) ==>
+              advance1 bproxy w pos ch === SourcePos n l (c <> pos1)
+    describe "advanceN" $
+      it "works correctly" $
+        advanceN bproxy defaultTabWidth (initialPos "") "something\n\tfoo"
+          === SourcePos "" (mkPos 2) (mkPos 12)
+    describe "take1_" $ do
+      context "when input in empty" $
+        it "returns Nothing" $
+          take1_ ("" :: B.ByteString) === Nothing
+      context "when input is not empty" $
+        it "unconses a token" $
+          property $ \s -> not (B.null s) ==>
+            take1_ (s :: B.ByteString) === B.uncons s
+    describe "takeN_" $ do
+      context "requested length is 0" $
+        it "returns Just empty chunk and original stream" $
+          property $ \s ->
+            takeN_ 0 (s :: B.ByteString) === Just ("", s)
+      context "requested length is greater than 0" $ do
+        context "stream is empty" $
+          it "returns Nothing" $
+            property $ \(Positive n) ->
+              takeN_ n ("" :: B.ByteString) === Nothing
+        context "stream is not empty" $
+          it "returns a chunk of correct length and rest of the stream" $
+            property $ \(Positive n) s -> not (B.null s) ==>
+              takeN_ n (s :: B.ByteString) === Just (B.splitAt n s)
+    describe "takeWhile_" $
+      it "extracts a chunk that is a prefix consisting of matching tokens" $
+        property $ \s ->
+          let f = isLetter . chr . fromIntegral
+          in takeWhile_ f s === B.span f s
+
+  describe "Lazy ByteString instance of Stream" $ do
+    describe "tokenToChunk" $
+      it "produces the same result as singleton with tokensToChunk" $
+        property $ \ch ->
+          tokenToChunk blproxy ch === tokensToChunk blproxy [ch]
+    describe "tokensToChunk" $
+      it "list of tokens is isomorphic to chunk" $
+        property $ \ts ->
+          chunkToTokens blproxy (tokensToChunk blproxy ts) === ts
+    describe "chunkToTokens" $
+      it "chunk is isomorphic to list of tokens" $
+        property $ \chunk ->
+          tokensToChunk blproxy (chunkToTokens blproxy chunk) === chunk
+    describe "chunkLength" $
+      it "returns correct length of given chunk" $
+        property $ \chunk ->
+          chunkLength blproxy chunk === fromIntegral (BL.length chunk)
+    describe "chunkEmpty" $
+      it "only true when chunkLength returns 0" $
+        property $ \chunk ->
+          chunkEmpty blproxy chunk === (chunkLength blproxy chunk <= 0)
+    describe "positionAt1" $
+      it "just returns the given position" $
+        property $ \pos t ->
+          positionAt1 blproxy pos t === pos
+    describe "positionAtN" $
+      it "just returns the given position" $
+        property $ \pos chunk ->
+          positionAtN blproxy pos chunk === pos
+    describe "advance1" $ do
+      context "when given newline" $
+        it "works correctly" $
+          property $ \w pos@(SourcePos n l _) ->
+            advance1 blproxy w pos 10 === SourcePos n (l <> pos1) pos1
+      context "when given tab" $
+        it "works correctly" $
+          property $ \w pos@(SourcePos n l c) ->
+            advance1 blproxy w pos 9 === SourcePos n l (toNextTab w c)
+      context "when given other character" $
+        it "works correctly" $
+          property $ \ch w pos@(SourcePos n l c) ->
+            (ch /= 10 && ch /= 9) ==>
+              advance1 blproxy w pos ch === SourcePos n l (c <> pos1)
+    describe "advanceN" $
+      it "works correctly" $
+        advanceN blproxy defaultTabWidth (initialPos "") "something\n\tfoo"
+          === SourcePos "" (mkPos 2) (mkPos 12)
+    describe "take1_" $ do
+      context "when input in empty" $
+        it "returns Nothing" $
+          take1_ ("" :: BL.ByteString) === Nothing
+      context "when input is not empty" $
+        it "unconses a token" $
+          property $ \s -> not (BL.null s) ==>
+            take1_ (s :: BL.ByteString) === BL.uncons s
+    describe "takeN_" $ do
+      context "requested length is 0" $
+        it "returns Just empty chunk and original stream" $
+          property $ \s ->
+            takeN_ 0 (s :: BL.ByteString) === Just ("", s)
+      context "requested length is greater than 0" $ do
+        context "stream is empty" $
+          it "returns Nothing" $
+            property $ \(Positive n) ->
+              takeN_ n ("" :: BL.ByteString) === Nothing
+        context "stream is not empty" $
+          it "returns a chunk of correct length and rest of the stream" $
+            property $ \(Positive n) s -> not (BL.null s) ==>
+              takeN_ n (s :: BL.ByteString) === Just (BL.splitAt (fromIntegral n) s)
+    describe "takeWhile_" $
+      it "extracts a chunk that is a prefix consisting of matching tokens" $
+        property $ \s ->
+          let f = isLetter . chr . fromIntegral
+          in takeWhile_ f s === BL.span f s
+
+  describe "Text instance of Stream" $ do
+    describe "tokenToChunk" $
+      it "produces the same result as singleton with tokensToChunk" $
+        property $ \ch ->
+          tokenToChunk tproxy ch === tokensToChunk tproxy [ch]
+    describe "tokensToChunk" $
+      it "list of tokens is isomorphic to chunk" $
+        property $ \ts ->
+          chunkToTokens tproxy (tokensToChunk tproxy ts) === ts
+    describe "chunkToTokens" $
+      it "chunk is isomorphic to list of tokens" $
+        property $ \chunk ->
+          tokensToChunk tproxy (chunkToTokens tproxy chunk) === chunk
+    describe "chunkLength" $
+      it "returns correct length of given chunk" $
+        property $ \chunk ->
+          chunkLength tproxy chunk === T.length chunk
+    describe "chunkEmpty" $
+      it "only true when chunkLength returns 0" $
+        property $ \chunk ->
+          chunkEmpty tproxy chunk === (chunkLength tproxy chunk <= 0)
+    describe "positionAt1" $
+      it "just returns the given position" $
+        property $ \pos t ->
+          positionAt1 tproxy pos t === pos
+    describe "positionAtN" $
+      it "just returns the given position" $
+        property $ \pos chunk ->
+          positionAtN tproxy pos chunk === pos
+    describe "advance1" $ do
+      context "when given newline" $
+        it "works correctly" $
+          property $ \w pos@(SourcePos n l _) ->
+            advance1 tproxy w pos '\n' === SourcePos n (l <> pos1) pos1
+      context "when given tab" $
+        it "works correctly" $
+          property $ \w pos@(SourcePos n l c) ->
+            advance1 tproxy w pos '\t' === SourcePos n l (toNextTab w c)
+      context "when given other character" $
+        it "works correctly" $
+          property $ \ch w pos@(SourcePos n l c) ->
+            (ch /= '\n' && ch /= '\t') ==>
+              advance1 tproxy w pos ch === SourcePos n l (c <> pos1)
+    describe "advanceN" $
+      it "works correctly" $
+        advanceN tproxy defaultTabWidth (initialPos "") "something\n\tfoo"
+          === SourcePos "" (mkPos 2) (mkPos 12)
+    describe "take1_" $ do
+      context "when input in empty" $
+        it "returns Nothing" $
+          take1_ ("" :: T.Text) === Nothing
+      context "when input is not empty" $
+        it "unconses a token" $
+          property $ \s -> not (T.null s) ==>
+            take1_ (s :: T.Text) === T.uncons s
+    describe "takeN_" $ do
+      context "requested length is 0" $
+        it "returns Just empty chunk and original stream" $
+          property $ \s ->
+            takeN_ 0 (s :: T.Text) === Just ("", s)
+      context "requested length is greater than 0" $ do
+        context "stream is empty" $
+          it "returns Nothing" $
+            property $ \(Positive n) ->
+              takeN_ n ("" :: T.Text) === Nothing
+        context "stream is not empty" $
+          it "returns a chunk of correct length and rest of the stream" $
+            property $ \(Positive n) s -> not (T.null s) ==>
+              takeN_ n (s :: T.Text) === Just (T.splitAt n s)
+    describe "takeWhile_" $
+      it "extracts a chunk that is a prefix consisting of matching tokens" $
+        property $ \s ->
+          takeWhile_ isLetter s === T.span isLetter s
+
+  describe "Lazy Text instance of Stream" $ do
+    describe "tokenToChunk" $
+      it "produces the same result as singleton with tokensToChunk" $
+        property $ \ch ->
+          tokenToChunk tlproxy ch === tokensToChunk tlproxy [ch]
+    describe "tokensToChunk" $
+      it "list of tokens is isomorphic to chunk" $
+        property $ \ts ->
+          chunkToTokens tlproxy (tokensToChunk tlproxy ts) === ts
+    describe "chunkToTokens" $
+      it "chunk is isomorphic to list of tokens" $
+        property $ \chunk ->
+          tokensToChunk tlproxy (chunkToTokens tlproxy chunk) === chunk
+    describe "chunkLength" $
+      it "returns correct length of given chunk" $
+        property $ \chunk ->
+          chunkLength tlproxy chunk === fromIntegral (TL.length chunk)
+    describe "chunkEmpty" $
+      it "only true when chunkLength returns 0" $
+        property $ \chunk ->
+          chunkEmpty tlproxy chunk === (chunkLength tlproxy chunk <= 0)
+    describe "positionAt1" $
+      it "just returns the given position" $
+        property $ \pos t ->
+          positionAt1 tlproxy pos t === pos
+    describe "positionAtN" $
+      it "just returns the given position" $
+        property $ \pos chunk ->
+          positionAtN tlproxy pos chunk === pos
+    describe "advance1" $ do
+      context "when given newline" $
+        it "works correctly" $
+          property $ \w pos@(SourcePos n l _) ->
+            advance1 tlproxy w pos '\n' === SourcePos n (l <> pos1) pos1
+      context "when given tab" $
+        it "works correctly" $
+          property $ \w pos@(SourcePos n l c) ->
+            advance1 tlproxy w pos '\t' === SourcePos n l (toNextTab w c)
+      context "when given other character" $
+        it "works correctly" $
+          property $ \ch w pos@(SourcePos n l c) ->
+            (ch /= '\n' && ch /= '\t') ==>
+              advance1 tlproxy w pos ch === SourcePos n l (c <> pos1)
+    describe "advanceN" $
+      it "works correctly" $
+        advanceN tlproxy defaultTabWidth (initialPos "") "something\n\tfoo"
+          === SourcePos "" (mkPos 2) (mkPos 12)
+    describe "take1_" $ do
+      context "when input in empty" $
+        it "returns Nothing" $
+          take1_ ("" :: TL.Text) === Nothing
+      context "when input is not empty" $
+        it "unconses a token" $
+          property $ \s -> not (TL.null s) ==>
+            take1_ (s :: TL.Text) === TL.uncons s
+    describe "takeN_" $ do
+      context "requested length is 0" $
+        it "returns Just empty chunk and original stream" $
+          property $ \s ->
+            takeN_ 0 (s :: TL.Text) === Just ("", s)
+      context "requested length is greater than 0" $ do
+        context "stream is empty" $
+          it "returns Nothing" $
+            property $ \(Positive n) ->
+              takeN_ n ("" :: TL.Text) === Nothing
+        context "stream is not empty" $
+          it "returns a chunk of correct length and rest of the stream" $
+            property $ \(Positive n) s -> not (TL.null s) ==>
+              takeN_ n (s :: TL.Text) === Just (TL.splitAt (fromIntegral n) s)
+    describe "takeWhile_" $
+      it "extracts a chunk that is a prefix consisting of matching tokens" $
+        property $ \s ->
+          takeWhile_ isLetter s === TL.span isLetter s
+
+----------------------------------------------------------------------------
+-- Helpers
+
+toNextTab :: Pos -> Pos -> Pos
+toNextTab w' c' = mkPos $ c + w - ((c - 1) `rem` w)
+  where
+     w = unPos w'
+     c = unPos c'
+
+sproxy :: Proxy String
+sproxy = Proxy
+
+bproxy :: Proxy B.ByteString
+bproxy = Proxy
+
+blproxy :: Proxy BL.ByteString
+blproxy = Proxy
+
+tproxy :: Proxy T.Text
+tproxy = Proxy
+
+tlproxy :: Proxy TL.Text
+tlproxy = Proxy
--- a/tests/Text/MegaparsecSpec.hs
+++ b/tests/Text/MegaparsecSpec.hs
@ -15,15 +15,15 @@ import Control.Monad.Cont
 import Control.Monad.Except
 import Control.Monad.Identity
 import Control.Monad.Reader
-import Data.Char (toUpper, chr)
+import Data.Char (toUpper, isLetter)
 import Data.Foldable (asum, concat)
-import Data.List (isPrefixOf, foldl')
+import Data.Function (on)
+import Data.List (isPrefixOf)
 import Data.List.NonEmpty (NonEmpty (..))
 import Data.Maybe (fromMaybe, listToMaybe, isJust)
 import Data.Monoid
 import Data.Proxy
 import Data.Void
-import Data.Word (Word8)
 import Prelude hiding (span, concat)
 import Test.Hspec
 import Test.Hspec.Megaparsec
@ -37,13 +37,10 @@ import qualified Control.Monad.State.Lazy    as L
 import qualified Control.Monad.State.Strict  as S
 import qualified Control.Monad.Writer.Lazy   as L
 import qualified Control.Monad.Writer.Strict as S
-import qualified Data.ByteString.Char8       as B
-import qualified Data.ByteString.Lazy.Char8  as BL
+import qualified Data.List                   as DL
 import qualified Data.List.NonEmpty          as NE
 import qualified Data.Semigroup              as G
 import qualified Data.Set                    as E
-import qualified Data.Text                   as T
-import qualified Data.Text.Lazy              as TL

 #if !MIN_VERSION_QuickCheck(2,8,2)
 instance (Arbitrary a, Ord a) => Arbitrary (E.Set a) where
@ -54,50 +51,6 @@ instance (Arbitrary a, Ord a) => Arbitrary (E.Set a) where
 spec :: Spec
 spec = do

-  describe "non-String instances of Stream" $ do
-    context "lazy ByteString" $ do
-      it "unconses correctly" $
-        property $ \ch' n -> do
-          let p  = many (char ch) :: Parsec Void BL.ByteString String
-              s  = replicate (getNonNegative n) ch
-              ch = byteToChar ch'
-          parse p "" (BL.pack s) `shouldParse` s
-      it "updates position like with String" $
-        property $ \w pos ch ->
-          updatePos (Proxy :: Proxy BL.ByteString) w pos ch `shouldBe`
-          updatePos (Proxy :: Proxy String) w pos ch
-    context "strict ByteString" $ do
-      it "unconses correctly" $
-        property $ \ch' n -> do
-          let p  = many (char ch) :: Parsec Void B.ByteString String
-              s  = replicate (getNonNegative n) ch
-              ch = byteToChar ch'
-          parse p "" (B.pack s) `shouldParse` s
-      it "updates position like with String" $
-        property $ \w pos ch ->
-          updatePos (Proxy :: Proxy B.ByteString) w pos ch `shouldBe`
-          updatePos (Proxy :: Proxy String) w pos ch
-    context "lazy Text" $ do
-      it "unconses correctly" $
-        property $ \ch n -> do
-          let p = many (char ch) :: Parsec Void TL.Text String
-              s = replicate (getNonNegative n) ch
-          parse p "" (TL.pack s) `shouldParse` s
-      it "updates position like with String" $
-        property $ \w pos ch ->
-          updatePos (Proxy :: Proxy TL.Text) w pos ch `shouldBe`
-          updatePos (Proxy :: Proxy String) w pos ch
-    context "strict Text" $ do
-      it "unconses correctly" $
-        property $ \ch n -> do
-          let p = many (char ch) :: Parsec Void T.Text String
-              s = replicate (getNonNegative n) ch
-          parse p "" (T.pack s) `shouldParse` s
-      it "updates position like with String" $
-        property $ \w pos ch ->
-          updatePos (Proxy :: Proxy T.Text) w pos ch `shouldBe`
-          updatePos (Proxy :: Proxy String) w pos ch
-
  describe "position in custom stream" $ do

    describe "eof" $
@ -144,18 +97,19 @@ spec = do
              , Left (err apos $ utok h <> etok span))

    describe "tokens" $
-      it "updates position is stream correctly" $
+      it "updates position in stream correctly" $
        property $ \st' ts -> forAll (incCoincidence st' ts) $ \st@State {..} -> do
          let p = tokens compareTokens ts :: CustomParser [Span]
-              compareTokens x y = spanBody x == spanBody y
-              updatePos' = updatePos (Proxy :: Proxy [Span]) stateTabWidth
-              il = length . takeWhile id $ zipWith compareTokens stateInput ts
+              compareTokens = (==) `on` fmap spanBody
+              compareToken  = (==) `on` spanBody
+              il = length . takeWhile id $ zipWith compareToken stateInput ts
              tl = length ts
              consumed = take il stateInput
              (apos, npos) =
                let (pos:|z) = statePos
-                in ( spanStart (head stateInput) :| z
-                   , foldl' (\q t -> snd (updatePos' q t)) pos consumed :| z )
+                    pxy = Proxy :: Proxy [Span]
+                in ( positionAt1 pxy pos (head stateInput) :| z
+                   , advanceN pxy stateTabWidth pos consumed :| z )
          if | null ts -> runParser' p st `shouldBe` (st, Right [])
             | null stateInput -> runParser' p st `shouldBe`
               ( st
@ -163,11 +117,39 @@ spec = do
             | il == tl -> runParser' p st `shouldBe`
               ( st { statePos             = npos
                    , stateTokensProcessed = stateTokensProcessed + fromIntegral tl
-                    , stateInput           = drop (length ts) stateInput }
+                    , stateInput           = drop tl stateInput }
               , Right consumed )
             | otherwise -> runParser' p st `shouldBe`
               ( st { statePos = apos }
-               , Left (err apos $ utoks (take (il + 1) stateInput) <> etoks ts) )
+               , Left (err apos $ utoks (take tl stateInput) <> etoks ts) )
+
+    describe "takeWhileP" $
+      it "updates position in stream correctly" $
+        property $ \st@State {..} -> do
+          let p = takeWhileP Nothing (const True) :: CustomParser [Span]
+              st' = st
+                { stateInput = []
+                , statePos   =
+                    case stateInput of
+                      [] -> statePos
+                      xs -> let _:|z = statePos in spanEnd (last xs) :| z
+                , stateTokensProcessed =
+                    stateTokensProcessed + length stateInput }
+          runParser' p st `shouldBe` (st', Right stateInput)
+
+    describe "takeWhile1P" $
+      it "updates position in stream correctly" $
+        property $ \st@State {..} -> not (null stateInput) ==> do
+          let p = takeWhile1P Nothing (const True) :: CustomParser [Span]
+              st' = st
+                { stateInput = []
+                , statePos   =
+                    case stateInput of
+                      [] -> statePos
+                      xs -> let _:|z = statePos in spanEnd (last xs) :| z
+                , stateTokensProcessed =
+                    stateTokensProcessed + length stateInput }
+          runParser' p st `shouldBe` (st', Right stateInput)

    describe "getNextTokenPosition" $ do
      context "when input stream is empty" $
@ -285,14 +267,11 @@ spec = do
          it "signals correct error message" $
            property $ \s0 s1 s -> not (s0 `isPrefixOf` s) && not (s1 `isPrefixOf` s) ==> do
              let p = string s0 <|> string s1
-                  z0' = toFirstMismatch (==) s0 s
-                  z1' = toFirstMismatch (==) s1 s
+                  z = take (max (length s0) (length s1)) s
              prs  p s `shouldFailWith` err posI
                (etoks s0 <>
                 etoks s1 <>
-                 (if null s then ueof else mempty) <>
-                 (if null z0' then mempty else utoks z0') <>
-                 (if null z1' then mempty else utoks z1'))
+                 (if null s then ueof else utoks z))
      context "with two complex parsers" $ do
        context "when stream begins with matching character" $
          it "parses it" $
@ -530,51 +509,6 @@ spec = do

  describe "primitive combinators" $ do

-    describe "unexpected" $
-      it "signals correct parse error" $
-        property $ \item -> do
-          let p :: MonadParsec Void String m => m ()
-              p = void (unexpected item)
-          grs p "" (`shouldFailWith` TrivialError posI (E.singleton item) E.empty)
-
-    describe "match" $
-      it "return consumed tokens along with the result" $
-        property $ \str -> do
-          let p  = match (string str)
-          prs  p str `shouldParse`     (str,str)
-          prs' p str `succeedsLeaving` ""
-
-    describe "region" $ do
-      context "when inner parser succeeds" $
-        it "has no effect" $
-          property $ \st e n -> do
-            let p :: Parser Int
-                p = region (const e) (pure n)
-            runParser' p st `shouldBe` (st, Right (n :: Int))
-      context "when inner parser fails" $
-        it "the given function is used on the parse error" $
-          property $ \st' e pos' -> do
-            let p :: Parsec Int String Int
-                p = region f $
-                  case e of
-                    TrivialError _ us ps -> failure us ps
-                    FancyError   _ xs    -> fancyFailure   xs
-                f (TrivialError pos us ps) = FancyError
-                  (max pos pos')
-                  (E.singleton . ErrorCustom $ E.size us + E.size ps)
-                f (FancyError pos xs) = FancyError
-                  (max pos pos')
-                  (E.singleton . ErrorCustom $ E.size xs)
-                r = FancyError
-                  (max (errorPos e) pos')
-                  (E.singleton . ErrorCustom $
-                    case e of
-                      TrivialError _ us ps -> E.size us + E.size ps
-                      FancyError   _ xs    -> E.size xs )
-                finalPos = max (errorPos e) pos'
-                st = st' { statePos = errorPos e }
-            runParser' p st `shouldBe` (st { statePos = finalPos }, Left r)
-
    describe "failure" $
      it "signals correct parse error" $
        property $ \us ps -> do
@ -997,8 +931,11 @@ spec = do
            grs' eof s (`failsLeaving` s)

    describe "token" $ do
-      let f x = E.singleton (Tokens $ nes x)
-          testChar a x = if x == a then Right x else Left (f x, f a)
+      let f x = Tokens (nes x)
+          testChar a x =
+            if x == a
+              then Right x
+              else Left (pure (f x), E.singleton (f a))
      context "when supplied predicate is satisfied" $
        it "succeeds" $
          property $ \a as mtok -> do
@ -1013,7 +950,7 @@ spec = do
            let p :: MonadParsec Void String m => m Char
                p = token (testChar b) mtok
                s = a : as
-                us = E.singleton (Tokens $ nes a)
+                us = pure (Tokens $ nes a)
                ps = E.singleton (Tokens $ nes b)
            grs  p s (`shouldFailWith` TrivialError posI us ps)
            grs' p s (`failsLeaving` s)
@ -1022,7 +959,7 @@ spec = do
          property $ \a mtok -> do
            let p :: MonadParsec Void String m => m Char
                p = token (testChar a) mtok
-                us = E.singleton EndOfInput
+                us = pure EndOfInput
                ps = maybe E.empty (E.singleton . Tokens . nes) mtok
            grs p "" (`shouldFailWith` TrivialError posI us ps)

@ -1040,10 +977,176 @@ spec = do
          property $ \str s -> not (str `isPrefixOf` s) ==> do
            let p :: MonadParsec Void String m => m String
                p = tokens (==) str
-                z = toFirstMismatch (==) str s
+                z = take (length str) s
            grs  p s (`shouldFailWith` err posI (utoks z <> etoks str))
            grs' p s (`failsLeaving` s)

+    describe "takeWhileP" $ do
+      context "when stream is not empty" $
+        it "consumes all matching tokens, zero or more" $
+          property $ \s -> not (null s) ==> do
+            let p :: MonadParsec Void String m => m String
+                p = takeWhileP Nothing isLetter
+                (z,zs) = DL.span isLetter s
+            grs  p s (`shouldParse` z)
+            grs' p s (`succeedsLeaving` zs)
+      context "when stream is empty" $
+        it "succeeds returning empty chunk" $ do
+          let p :: MonadParsec Void String m => m String
+              p = takeWhileP Nothing isLetter
+          grs  p "" (`shouldParse` "")
+          grs' p "" (`succeedsLeaving` "")
+      context "with two takeWhileP in a row (testing hints)" $ do
+        let p :: MonadParsec Void String m => m String
+            p = do
+              void $ takeWhileP (Just "foo") (== 'a')
+              void $ takeWhileP (Just "bar") (== 'b')
+              empty
+        context "when the second one does not consume" $
+          it "hints are combined properly" $ do
+            let s = "aaa"
+                pe = err (posN 3 s) (elabel "foo" <> elabel "bar")
+            grs  p s (`shouldFailWith` pe)
+            grs' p s (`failsLeaving` "")
+        context "when the second one consumes" $
+          it "only hints of the second one affect parse error" $ do
+            let s = "aaabbb"
+                pe = err (posN 6 s) (elabel "bar")
+            grs  p s (`shouldFailWith` pe)
+            grs' p s (`failsLeaving` "")
+
+    describe "takeWhile1P" $ do
+      context "when stream is prefixed with matching tokens" $
+        it "consumes the tokens" $
+          property $ \s' -> do
+            let p :: MonadParsec Void String m => m String
+                p = takeWhile1P Nothing isLetter
+                s = 'a' : s'
+                (z,zs) = DL.span isLetter s
+            grs  p s (`shouldParse` z)
+            grs' p s (`succeedsLeaving` zs)
+      context "when stream is not prefixed with at least one matching token" $
+        it "signals correct parse error" $
+          property $ \s' -> do
+            let p :: MonadParsec Void String m => m String
+                p = takeWhile1P (Just "foo") isLetter
+                s = '3' : s'
+                pe = err posI (utok '3' <> elabel "foo")
+            grs  p s (`shouldFailWith` pe)
+            grs' p s (`failsLeaving` s)
+      context "when stream is empty" $
+        it "signals correct parse error" $ do
+          let p :: MonadParsec Void String m => m String
+              p = takeWhile1P (Just "foo") isLetter
+              pe = err posI (ueof <> elabel "foo")
+          grs  p "" (`shouldFailWith` pe)
+          grs' p "" (`failsLeaving` "")
+      context "with two takeWhile1P in a row (testing hints)" $ do
+        let p :: MonadParsec Void String m => m String
+            p = do
+              void $ takeWhile1P (Just "foo") (== 'a')
+              void $ takeWhile1P (Just "bar") (== 'b')
+              empty
+        context "when the second one does not consume" $
+          it "hints are combined properly" $ do
+            let s = "aaa"
+                pe = err (posN 3 s) (ueof <> elabel "foo" <> elabel "bar")
+            grs  p s (`shouldFailWith` pe)
+            grs' p s (`failsLeaving` "")
+        context "when the second one consumes" $
+          it "only hints of the second one affect parse error" $ do
+            let s = "aaabbb"
+                pe = err (posN 6 s) (elabel "bar")
+            grs  p s (`shouldFailWith` pe)
+            grs' p s (`failsLeaving` "")
+
+  describe "derivatives from primitive combinators" $ do
+
+    describe "unexpected" $
+      it "signals correct parse error" $
+        property $ \item -> do
+          let p :: MonadParsec Void String m => m ()
+              p = void (unexpected item)
+          grs p "" (`shouldFailWith` TrivialError posI (pure item) E.empty)
+
+    describe "match" $
+      it "return consumed tokens along with the result" $
+        property $ \str -> do
+          let p  = match (string str)
+          prs  p str `shouldParse`     (str,str)
+          prs' p str `succeedsLeaving` ""
+
+    describe "region" $ do
+      context "when inner parser succeeds" $
+        it "has no effect" $
+          property $ \st e n -> do
+            let p :: Parser Int
+                p = region (const e) (pure n)
+            runParser' p st `shouldBe` (st, Right (n :: Int))
+      context "when inner parser fails" $
+        it "the given function is used on the parse error" $
+          property $ \st' e pos' -> do
+            let p :: Parsec Int String Int
+                p = region f $
+                  case e of
+                    TrivialError _ us ps -> failure us ps
+                    FancyError   _ xs    -> fancyFailure   xs
+                f (TrivialError pos us ps) = FancyError
+                  (max pos pos')
+                  (E.singleton . ErrorCustom $ maybe 0 (const 1) us + E.size ps)
+                f (FancyError pos xs) = FancyError
+                  (max pos pos')
+                  (E.singleton . ErrorCustom $ E.size xs)
+                r = FancyError
+                  (max (errorPos e) pos')
+                  (E.singleton . ErrorCustom $
+                    case e of
+                      TrivialError _ us ps -> maybe 0 (const 1) us + E.size ps
+                      FancyError   _ xs    -> E.size xs )
+                finalPos = max (errorPos e) pos'
+                st = st' { statePos = errorPos e }
+            runParser' p st `shouldBe` (st { statePos = finalPos }, Left r)
+
+    describe "skipWhileP" $ do
+      context "when stream is not empty" $
+        it "consumes all matching tokens, zero or more" $
+          property $ \s -> not (null s) ==> do
+            let p :: MonadParsec Void String m => m ()
+                p = skipWhileP Nothing isLetter
+            grs  p s (`shouldParse` ())
+            grs' p s (`succeedsLeaving` dropWhile isLetter s)
+      context "when stream is empty" $
+        it "succeeds returning empty chunk" $ do
+          let p :: MonadParsec Void String m => m ()
+              p = skipWhileP Nothing isLetter
+          grs  p "" (`shouldParse` ())
+          grs' p "" (`succeedsLeaving` "")
+    describe "skipWhile1P" $ do
+      context "when stream is prefixed with matching tokens" $
+        it "consumes the tokens" $
+          property $ \s' -> do
+            let p :: MonadParsec Void String m => m ()
+                p = skipWhile1P Nothing isLetter
+                s = 'a' : s'
+            grs  p s (`shouldParse` ())
+            grs' p s (`succeedsLeaving` dropWhile isLetter s)
+      context "when stream is not prefixed with at least one matching token" $
+        it "signals correct parse error" $
+          property $ \s' -> do
+            let p :: MonadParsec Void String m => m ()
+                p = skipWhile1P (Just "foo") isLetter
+                s = '3' : s'
+                pe = err posI (utok '3' <> elabel "foo")
+            grs  p s (`shouldFailWith` pe)
+            grs' p s (`failsLeaving` s)
+      context "when stream is empty" $
+        it "signals correct parse error" $ do
+          let p :: MonadParsec Void String m => m ()
+              p = skipWhile1P (Just "foo") isLetter
+              pe = err posI (ueof <> elabel "foo")
+          grs  p "" (`shouldFailWith` pe)
+          grs' p "" (`failsLeaving` "")
+
  describe "combinators for manipulating parser state" $ do

    describe "setInput and getInput" $
@ -1495,9 +1598,6 @@ spec = do
 ----------------------------------------------------------------------------
 -- Helpers

-byteToChar :: Word8 -> Char
-byteToChar = chr . fromIntegral
-
 -- | This data type represents tokens in custom input stream.

 data Span = Span
@ -1508,9 +1608,26 @@ data Span = Span

 instance Stream [Span] where
  type Token [Span] = Span
-  uncons [] = Nothing
-  uncons (t:ts) = Just (t, ts)
-  updatePos _ _ _ (Span start end _) = (start, end)
+  type Tokens [Span] = [Span]
+  tokenToChunk Proxy = pure
+  tokensToChunk Proxy = id
+  chunkToTokens Proxy = id
+  chunkLength Proxy = length
+  chunkEmpty Proxy = null
+  positionAt1 Proxy _ (Span start _ _) = start
+  positionAtN Proxy pos [] = pos
+  positionAtN Proxy _ (Span start _ _:_) = start
+  advance1 Proxy _ _ (Span _ end _) = end
+  advanceN Proxy _ pos [] = pos
+  advanceN Proxy _ _ ts =
+    let Span _ end _ = last ts in end
+  take1_ [] = Nothing
+  take1_ (t:ts) = Just (t, ts)
+  takeN_ n s
+    | n <= 0   = Just ([], s)
+    | null s   = Nothing
+    | otherwise = Just (splitAt n s)
+  takeWhile_ = DL.span

 instance Arbitrary Span where
  arbitrary = do
@ -1527,11 +1644,11 @@ type CustomParser = Parsec Void [Span]
 pSpan :: Span -> CustomParser Span
 pSpan span = token testToken (Just span)
  where
-    f = E.singleton . Tokens . nes
+    f = Tokens . nes
    testToken x =
      if spanBody x == spanBody span
        then Right span
-        else Left (f x, f span)
+        else Left (pure (f x), E.singleton (f span))

 incCoincidence :: State [Span] -> [Span] -> Gen (State [Span])
 incCoincidence st ts = do
@ -1545,7 +1662,10 @@ emulateStrParsing
  -> String
  -> (State String, Either (ParseError Char Void) String)
 emulateStrParsing st@(State i (pos:|z) tp w) s =
-  if l == length s
-    then (State (drop l i) (updatePosString w pos s :| z) (tp + fromIntegral l) w, Right s)
-    else (st, Left $ err (pos:|z) (etoks s <> utoks (take (l + 1) i)))
-  where l = length (takeWhile id $ zipWith (==) s i)
+  if s == take l i
+    then ( State (drop l i) (updatePosString w pos s :| z) (tp + fromIntegral l) w
+         , Right s )
+    else ( st
+         , Left $ err (pos:|z) (etoks s <> utoks (take l i)) )
+  where
+    l = length s