mirror of
https://github.com/mrkkrp/megaparsec.git
synced 2024-11-26 23:02:07 +03:00
Move general-enough combinators to ‘Text.Megaparsec’ (#267)
This commit is contained in:
parent
cb762351d0
commit
4fac009b48
24
CHANGELOG.md
24
CHANGELOG.md
@ -1,5 +1,29 @@
|
||||
## Megaparsec 7.0.0
|
||||
|
||||
* Moved some general combinators from `Text.Megaparsec.Char` and
|
||||
`Text.Megaparsec.Byte` to `Text.Megaparsec`, renaming some of them for
|
||||
clarity.
|
||||
|
||||
Practical consequences:
|
||||
|
||||
* Now there is the `single` combinator that is a generalization of `char`
|
||||
for arbitrary streams. `Text.Megaparsec.Char` and `Text.Megaparsec.Byte`
|
||||
still contain `char` as type-constrained versions of `single`.
|
||||
|
||||
* Similarly, now there is the `chunk` combinator that is a generalization
|
||||
of `string` for arbitrary streams. `Text.Megaparsec.Char` and
|
||||
`Text.Megaparsec.Byte` still contain `string` as type-constrained
|
||||
versions of `single`.
|
||||
|
||||
* `satisfy` does not depend on type of token, and so it now lives in
|
||||
`Text.Megaparsec`.
|
||||
|
||||
* `anyToken` was renamed to `anySingle` and moved to `Text.Megaparsec`.
|
||||
|
||||
* `notChar` was renamed to `anySingleBut` and moved to `Text.Megaparsec`.
|
||||
|
||||
* `oneOf` and `noneOf` were moved to `Text.Megaparsec`.
|
||||
|
||||
* Simplified the type of the `token` primitive. It now takes just a matching
|
||||
function `Token s -> Maybe a` as the first argument and the collection of
|
||||
expected items `Set (ErrorItem (Token s))` as the second argument. This
|
||||
|
@ -94,6 +94,13 @@ module Text.Megaparsec
|
||||
-- * Primitive combinators
|
||||
, MonadParsec (..)
|
||||
-- * Derivatives of primitive combinators
|
||||
, single
|
||||
, satisfy
|
||||
, anySingle
|
||||
, anySingleBut
|
||||
, oneOf
|
||||
, noneOf
|
||||
, chunk
|
||||
, (<?>)
|
||||
, unexpected
|
||||
, customFailure
|
||||
@ -155,6 +162,8 @@ import qualified Data.Set as E
|
||||
|
||||
#if !MIN_VERSION_base(4,8,0)
|
||||
import Control.Applicative
|
||||
import Data.Foldable (Foldable, elem, notElem)
|
||||
import Prelude hiding (elem, notElem)
|
||||
#endif
|
||||
|
||||
-- $reexports
|
||||
@ -771,16 +780,19 @@ class (Stream s, A.Alternative m, MonadPlus m)
|
||||
|
||||
eof :: m ()
|
||||
|
||||
-- | The parser @'token' test exp@ accepts a token @t@ with result @x@
|
||||
-- when the function @test t@ returns @'Just' x@. @exp@ specifies the
|
||||
-- collection of expected items to report in error messages.
|
||||
-- | The parser @'token' test expected@ accepts a token @t@ with result
|
||||
-- @x@ when the function @test t@ returns @'Just' x@. @expected@ specifies
|
||||
-- the collection of expected items to report in error messages.
|
||||
--
|
||||
-- This is the most primitive combinator for accepting tokens. For
|
||||
-- example, the 'Text.Megaparsec.Char.satisfy' parser is implemented as:
|
||||
--
|
||||
-- > satisfy f = token testChar E.empty
|
||||
-- > satisfy f = token testToken E.empty
|
||||
-- > where
|
||||
-- > testChar x = if f x then Just x else Nothing
|
||||
-- > testToken x = if f x then Just x else Nothing
|
||||
--
|
||||
-- __Note__: type signature of this primitive was changed in the version
|
||||
-- /7.0.0/.
|
||||
|
||||
token
|
||||
:: (Token s -> Maybe a)
|
||||
@ -789,14 +801,14 @@ class (Stream s, A.Alternative m, MonadPlus m)
|
||||
-- ^ Expected items (in case of an error)
|
||||
-> m a
|
||||
|
||||
-- | The parser @'tokens' test@ parses a chunk of input and returns it.
|
||||
-- Supplied predicate @test@ is used to check equality of given and parsed
|
||||
-- chunks after a candidate chunk of correct length is fetched from the
|
||||
-- stream.
|
||||
-- | The parser @'tokens' test chk@ parses a chunk of input @chk@ and
|
||||
-- returns it. The supplied predicate @test@ is used to check equality of
|
||||
-- given and parsed chunks after a candidate chunk of correct length is
|
||||
-- fetched from the stream.
|
||||
--
|
||||
-- This can be used for example to write 'Text.Megaparsec.Char.string':
|
||||
-- This can be used for example to write 'Text.Megaparsec.chunk':
|
||||
--
|
||||
-- > string = tokens (==)
|
||||
-- > chunk = tokens (==)
|
||||
--
|
||||
-- Note that beginning from Megaparsec 4.4.0, this is an auto-backtracking
|
||||
-- primitive, which means that if it fails, it never consumes any input.
|
||||
@ -1307,6 +1319,132 @@ fixs' _ (Right (b,s,w)) = (Right b, s, w)
|
||||
----------------------------------------------------------------------------
|
||||
-- Derivatives of primitive combinators
|
||||
|
||||
-- | @'single' t@ only matches the single token @t@.
|
||||
--
|
||||
-- > semicolon = single ';'
|
||||
--
|
||||
-- See also: 'token', 'anySingle', 'Text.Megaparsec.Byte.char',
|
||||
-- 'Text.Megaparsec.Char.char'.
|
||||
--
|
||||
-- @since 7.0.0
|
||||
|
||||
single :: MonadParsec e s m
|
||||
=> Token s -- ^ Token to match
|
||||
-> m (Token s)
|
||||
single t = token testToken expected
|
||||
where
|
||||
testToken x = if x == t then Just x else Nothing
|
||||
expected = E.singleton (Tokens (t:|[]))
|
||||
{-# INLINE single #-}
|
||||
|
||||
-- | The parser @'satisfy' f@ succeeds for any token for which the supplied
|
||||
-- function @f@ returns 'True'. Returns the character that is actually
|
||||
-- parsed.
|
||||
--
|
||||
-- > digitChar = satisfy isDigit <?> "digit"
|
||||
-- > oneOf cs = satisfy (`elem` cs)
|
||||
--
|
||||
-- See also: 'anySingle', 'anySingleBut', 'oneOf', 'noneOf'.
|
||||
--
|
||||
-- @since 7.0.0
|
||||
|
||||
satisfy :: MonadParsec e s m
|
||||
=> (Token s -> Bool) -- ^ Predicate to apply
|
||||
-> m (Token s)
|
||||
satisfy f = token testChar E.empty
|
||||
where
|
||||
testChar x = if f x then Just x else Nothing
|
||||
{-# INLINE satisfy #-}
|
||||
|
||||
-- | Parse and return a single token. It's a good idea to attach a 'label'
|
||||
-- to this parser manually.
|
||||
--
|
||||
-- > anySingle = satisfy (const True)
|
||||
--
|
||||
-- See also: 'satisfy', 'anySingleBut'.
|
||||
--
|
||||
-- @since 7.0.0
|
||||
|
||||
anySingle :: MonadParsec e s m => m (Token s)
|
||||
anySingle = satisfy (const True)
|
||||
{-# INLINE anySingle #-}
|
||||
|
||||
-- | Match any token but the given one. It's a good idea to attach a 'label'
|
||||
-- to this parser manually.
|
||||
--
|
||||
-- > anySingleBut t = satisfy (/= t)
|
||||
--
|
||||
-- See also: 'single', 'anySingle', 'satisfy'.
|
||||
--
|
||||
-- @since 7.0.0
|
||||
|
||||
anySingleBut :: MonadParsec e s m
|
||||
=> Token s -- ^ Token we should not match
|
||||
-> m (Token s)
|
||||
anySingleBut t = satisfy (/= t)
|
||||
{-# INLINE anySingleBut #-}
|
||||
|
||||
-- | @'oneOf' ts@ succeeds if the current token is in the supplied
|
||||
-- collection of tokens @ts@. Returns the parsed token. Note that this
|
||||
-- parser cannot automatically generate the “expected” component of error
|
||||
-- message, so usually you should label it manually with 'label' or ('<?>').
|
||||
--
|
||||
-- > oneOf cs = satisfy (`elem` cs)
|
||||
--
|
||||
-- See also: 'satisfy'.
|
||||
--
|
||||
-- > digit = oneOf ['0'..'9'] <?> "digit"
|
||||
--
|
||||
-- __Performance note__: prefer 'satisfy' when you can because it's faster
|
||||
-- when you have only a couple of tokens to compare to:
|
||||
--
|
||||
-- > quoteFast = satisfy (\x -> x == '\'' || x == '\"')
|
||||
-- > quoteSlow = oneOf "'\""
|
||||
--
|
||||
-- @since 7.0.0
|
||||
|
||||
oneOf :: (Foldable f, MonadParsec e s m)
|
||||
=> f (Token s) -- ^ Collection of matching tokens
|
||||
-> m (Token s)
|
||||
oneOf cs = satisfy (`elem` cs)
|
||||
{-# INLINE oneOf #-}
|
||||
|
||||
-- | As the dual of 'oneOf', @'noneOf' ts@ succeeds if the current token
|
||||
-- /not/ in the supplied list of tokens @ts@. Returns the parsed character.
|
||||
-- Note that this parser cannot automatically generate the “expected”
|
||||
-- component of error message, so usually you should label it manually with
|
||||
-- 'label' or ('<?>').
|
||||
--
|
||||
-- > noneOf cs = satisfy (`notElem` cs)
|
||||
--
|
||||
-- See also: 'satisfy'.
|
||||
--
|
||||
-- __Performance note__: prefer 'satisfy' and 'singleBut' when you can
|
||||
-- because it's faster.
|
||||
--
|
||||
-- @since 7.0.0
|
||||
|
||||
noneOf :: (Foldable f, MonadParsec e s m)
|
||||
=> f (Token s) -- ^ Collection of taken we should not match
|
||||
-> m (Token s)
|
||||
noneOf cs = satisfy (`notElem` cs)
|
||||
{-# INLINE noneOf #-}
|
||||
|
||||
-- | @'chunk' chk@ only matches the chunk @chk@.
|
||||
--
|
||||
-- > divOrMod = chunk "div" <|> chunk "mod"
|
||||
--
|
||||
-- See also: 'tokens', 'Text.Megaparsec.Char.string',
|
||||
-- 'Text.Megaparsec.Byte.string'.
|
||||
--
|
||||
-- @since 7.0.0
|
||||
|
||||
chunk :: MonadParsec e s m
|
||||
=> Tokens s -- ^ Chunk to match
|
||||
-> m (Tokens s)
|
||||
chunk = tokens (==)
|
||||
{-# INLINE chunk #-}
|
||||
|
||||
-- | A synonym for 'label' in the form of an operator.
|
||||
|
||||
infix 0 <?>
|
||||
@ -1318,10 +1456,10 @@ infix 0 <?>
|
||||
-- | The parser @'unexpected' item@ fails with an error message telling
|
||||
-- about unexpected item @item@ without consuming any input.
|
||||
--
|
||||
-- > unexpected item = failure (pure item) Set.empty
|
||||
-- > unexpected item = failure (Just item) Set.empty
|
||||
|
||||
unexpected :: MonadParsec e s m => ErrorItem (Token s) -> m a
|
||||
unexpected item = failure (pure item) E.empty
|
||||
unexpected item = failure (Just item) E.empty
|
||||
{-# INLINE unexpected #-}
|
||||
|
||||
-- | Report a custom parse error. For a more general version, see
|
||||
@ -1396,6 +1534,8 @@ takeRest = takeWhileP Nothing (const True)
|
||||
|
||||
-- | Return 'True' when end of input has been reached.
|
||||
--
|
||||
-- > atEnd = option False (True <$ hidden eof)
|
||||
--
|
||||
-- @since 6.0.0
|
||||
|
||||
atEnd :: MonadParsec e s m => m Bool
|
||||
@ -1410,15 +1550,14 @@ atEnd = option False (True <$ hidden eof)
|
||||
getInput :: MonadParsec e s m => m s
|
||||
getInput = stateInput <$> getParserState
|
||||
|
||||
-- | @'setInput' input@ continues parsing with @input@. The 'getInput' and
|
||||
-- 'setInput' functions can for example be used to deal with include files.
|
||||
-- | @'setInput' input@ continues parsing with @input@.
|
||||
|
||||
setInput :: MonadParsec e s m => s -> m ()
|
||||
setInput s = updateParserState (\(State _ pos tp w) -> State s pos tp w)
|
||||
|
||||
-- | Return the current source position.
|
||||
--
|
||||
-- See also: 'setPosition', 'pushPosition', 'popPosition', and 'SourcePos'.
|
||||
-- See also: 'getNextTokenPosition'.
|
||||
|
||||
getPosition :: MonadParsec e s m => m SourcePos
|
||||
getPosition = NE.head . statePos <$> getParserState
|
||||
@ -1426,6 +1565,8 @@ getPosition = NE.head . statePos <$> getParserState
|
||||
-- | Get the position where the next token in the stream begins. If the
|
||||
-- stream is empty, return 'Nothing'.
|
||||
--
|
||||
-- See also: 'getPosition'.
|
||||
--
|
||||
-- @since 5.3.0
|
||||
|
||||
getNextTokenPosition :: forall e s m. MonadParsec e s m => m (Maybe SourcePos)
|
||||
@ -1443,10 +1584,10 @@ setPosition :: MonadParsec e s m => SourcePos -> m ()
|
||||
setPosition pos = updateParserState $ \(State s (_:|z) tp w) ->
|
||||
State s (pos:|z) tp w
|
||||
|
||||
-- | Push a position into stack of positions and continue parsing working
|
||||
-- | Push a position to the stack of positions and continue parsing working
|
||||
-- with this position. Useful for working with include files and the like.
|
||||
--
|
||||
-- See also: 'getPosition', 'setPosition', 'popPosition', and 'SourcePos'.
|
||||
-- See also: 'popPosition'.
|
||||
--
|
||||
-- @since 5.0.0
|
||||
|
||||
@ -1458,7 +1599,7 @@ pushPosition pos = updateParserState $ \(State s z tp w) ->
|
||||
-- element (in that case the stack of positions remains the same). This is
|
||||
-- how to return to previous source file after 'pushPosition'.
|
||||
--
|
||||
-- See also: 'getPosition', 'setPosition', 'pushPosition', and 'SourcePos'.
|
||||
-- See also: 'pushPosition'.
|
||||
--
|
||||
-- @since 5.0.0
|
||||
|
||||
@ -1470,6 +1611,8 @@ popPosition = updateParserState $ \(State s z tp w) ->
|
||||
|
||||
-- | Get the number of tokens processed so far.
|
||||
--
|
||||
-- See also: 'setTokensProcessed'.
|
||||
--
|
||||
-- @since 6.0.0
|
||||
|
||||
getTokensProcessed :: MonadParsec e s m => m Int
|
||||
@ -1477,6 +1620,8 @@ getTokensProcessed = stateTokensProcessed <$> getParserState
|
||||
|
||||
-- | Set the number of tokens processed so far.
|
||||
--
|
||||
-- See also: 'getTokensProcessed'.
|
||||
--
|
||||
-- @since 6.0.0
|
||||
|
||||
setTokensProcessed :: MonadParsec e s m => Int -> m ()
|
||||
@ -1486,18 +1631,24 @@ setTokensProcessed tp = updateParserState $ \(State s pos _ w) ->
|
||||
-- | Return the tab width. The default tab width is equal to
|
||||
-- 'defaultTabWidth'. You can set a different tab width with the help of
|
||||
-- 'setTabWidth'.
|
||||
--
|
||||
-- See also: 'setTabWidth'.
|
||||
|
||||
getTabWidth :: MonadParsec e s m => m Pos
|
||||
getTabWidth = stateTabWidth <$> getParserState
|
||||
|
||||
-- | Set tab width. If the argument of the function is not a positive
|
||||
-- number, 'defaultTabWidth' will be used.
|
||||
--
|
||||
-- See also: 'getTabWidth'.
|
||||
|
||||
setTabWidth :: MonadParsec e s m => Pos -> m ()
|
||||
setTabWidth w = updateParserState $ \(State s pos tp _) ->
|
||||
State s pos tp w
|
||||
|
||||
-- | @'setParserState' st@ sets the parser state to @st@.
|
||||
--
|
||||
-- See also: 'getParserState', 'updateParserState'.
|
||||
|
||||
setParserState :: MonadParsec e s m => State s -> m ()
|
||||
setParserState st = updateParserState (const st)
|
||||
@ -1615,4 +1766,4 @@ streamTake :: forall s. Stream s => Int -> s -> [Token s]
|
||||
streamTake n s =
|
||||
case fst <$> takeN_ n s of
|
||||
Nothing -> []
|
||||
Just chunk -> chunkToTokens (Proxy :: Proxy s) chunk
|
||||
Just chk -> chunkToTokens (Proxy :: Proxy s) chk
|
||||
|
@ -34,16 +34,11 @@ module Text.Megaparsec.Byte
|
||||
, octDigitChar
|
||||
, hexDigitChar
|
||||
, asciiChar
|
||||
-- * More general parsers
|
||||
, C.char
|
||||
-- * Single byte
|
||||
, char
|
||||
, char'
|
||||
, C.anyChar
|
||||
, C.notChar
|
||||
, C.oneOf
|
||||
, C.noneOf
|
||||
, C.satisfy
|
||||
-- * Sequence of bytes
|
||||
, C.string
|
||||
, string
|
||||
, C.string' )
|
||||
where
|
||||
|
||||
@ -62,14 +57,14 @@ import qualified Text.Megaparsec.Char as C
|
||||
-- | Parse a newline byte.
|
||||
|
||||
newline :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
newline = C.char 10
|
||||
newline = char 10
|
||||
{-# INLINE newline #-}
|
||||
|
||||
-- | Parse a carriage return character followed by a newline character.
|
||||
-- Return the sequence of characters parsed.
|
||||
|
||||
crlf :: forall e s m. (MonadParsec e s m, Token s ~ Word8) => m (Tokens s)
|
||||
crlf = C.string (tokensToChunk (Proxy :: Proxy s) [13,10])
|
||||
crlf = string (tokensToChunk (Proxy :: Proxy s) [13,10])
|
||||
{-# INLINE crlf #-}
|
||||
|
||||
-- | Parse a CRLF (see 'crlf') or LF (see 'newline') end of line. Return the
|
||||
@ -84,7 +79,7 @@ eol = (tokenToChunk (Proxy :: Proxy s) <$> newline)
|
||||
-- | Parse a tab character.
|
||||
|
||||
tab :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
tab = C.char 9
|
||||
tab = char 9
|
||||
{-# INLINE tab #-}
|
||||
|
||||
-- | Skip /zero/ or more white space characters.
|
||||
@ -109,51 +104,51 @@ space1 = void $ takeWhile1P (Just "white space") isSpace'
|
||||
-- | Parse a control character.
|
||||
|
||||
controlChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
controlChar = C.satisfy (isControl . toChar) <?> "control character"
|
||||
controlChar = satisfy (isControl . toChar) <?> "control character"
|
||||
{-# INLINE controlChar #-}
|
||||
|
||||
-- | Parse a space character, and the control characters: tab, newline,
|
||||
-- carriage return, form feed, and vertical tab.
|
||||
|
||||
spaceChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
spaceChar = C.satisfy isSpace' <?> "white space"
|
||||
spaceChar = satisfy isSpace' <?> "white space"
|
||||
{-# INLINE spaceChar #-}
|
||||
|
||||
-- | Parse an upper-case character.
|
||||
|
||||
upperChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
upperChar = C.satisfy (isUpper . toChar) <?> "uppercase letter"
|
||||
upperChar = satisfy (isUpper . toChar) <?> "uppercase letter"
|
||||
{-# INLINE upperChar #-}
|
||||
|
||||
-- | Parse a lower-case alphabetic character.
|
||||
|
||||
lowerChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
lowerChar = C.satisfy (isLower . toChar) <?> "lowercase letter"
|
||||
lowerChar = satisfy (isLower . toChar) <?> "lowercase letter"
|
||||
{-# INLINE lowerChar #-}
|
||||
|
||||
-- | Parse an alphabetic character: lower-case or upper-case.
|
||||
|
||||
letterChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
letterChar = C.satisfy (isLetter . toChar) <?> "letter"
|
||||
letterChar = satisfy (isLetter . toChar) <?> "letter"
|
||||
{-# INLINE letterChar #-}
|
||||
|
||||
-- | Parse an alphabetic or digit characters.
|
||||
|
||||
alphaNumChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
alphaNumChar = C.satisfy (isAlphaNum . toChar) <?> "alphanumeric character"
|
||||
alphaNumChar = satisfy (isAlphaNum . toChar) <?> "alphanumeric character"
|
||||
{-# INLINE alphaNumChar #-}
|
||||
|
||||
-- | Parse a printable character: letter, number, mark, punctuation, symbol
|
||||
-- or space.
|
||||
|
||||
printChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
printChar = C.satisfy (isPrint . toChar) <?> "printable character"
|
||||
printChar = satisfy (isPrint . toChar) <?> "printable character"
|
||||
{-# INLINE printChar #-}
|
||||
|
||||
-- | Parse an ASCII digit, i.e between “0” and “9”.
|
||||
|
||||
digitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
digitChar = C.satisfy isDigit' <?> "digit"
|
||||
digitChar = satisfy isDigit' <?> "digit"
|
||||
where
|
||||
isDigit' x = x >= 48 && x <= 57
|
||||
{-# INLINE digitChar #-}
|
||||
@ -161,7 +156,7 @@ digitChar = C.satisfy isDigit' <?> "digit"
|
||||
-- | Parse an octal digit, i.e. between “0” and “7”.
|
||||
|
||||
octDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
octDigitChar = C.satisfy isOctDigit' <?> "octal digit"
|
||||
octDigitChar = satisfy isOctDigit' <?> "octal digit"
|
||||
where
|
||||
isOctDigit' x = x >= 48 && x <= 55
|
||||
{-# INLINE octDigitChar #-}
|
||||
@ -170,18 +165,26 @@ octDigitChar = C.satisfy isOctDigit' <?> "octal digit"
|
||||
-- “A” and “F”.
|
||||
|
||||
hexDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
hexDigitChar = C.satisfy (isHexDigit . toChar) <?> "hexadecimal digit"
|
||||
hexDigitChar = satisfy (isHexDigit . toChar) <?> "hexadecimal digit"
|
||||
{-# INLINE hexDigitChar #-}
|
||||
|
||||
-- | Parse a character from the first 128 characters of the Unicode
|
||||
-- character set, corresponding to the ASCII character set.
|
||||
|
||||
asciiChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
asciiChar = C.satisfy (< 128) <?> "ASCII character"
|
||||
asciiChar = satisfy (< 128) <?> "ASCII character"
|
||||
{-# INLINE asciiChar #-}
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- More general parsers
|
||||
-- Single byte
|
||||
|
||||
-- | A type-constrained version of 'single'.
|
||||
--
|
||||
-- > newline = char 10
|
||||
|
||||
char :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s)
|
||||
char = single
|
||||
{-# INLINE char #-}
|
||||
|
||||
-- | The same as 'char' but case-insensitive. This parser returns the
|
||||
-- actually parsed character preserving its case.
|
||||
@ -195,8 +198,8 @@ asciiChar = C.satisfy (< 128) <?> "ASCII character"
|
||||
|
||||
char' :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s)
|
||||
char' c = choice
|
||||
[ C.char c
|
||||
, C.char (fromMaybe c (swapCase c)) ]
|
||||
[ char c
|
||||
, char (fromMaybe c (swapCase c)) ]
|
||||
where
|
||||
swapCase x
|
||||
| isUpper g = fromChar (toLower g)
|
||||
@ -206,6 +209,15 @@ char' c = choice
|
||||
g = toChar x
|
||||
{-# INLINE char' #-}
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Sequence of bytes
|
||||
|
||||
-- | A type-constrained version of 'chunk'.
|
||||
|
||||
string :: (MonadParsec e s m, Token s ~ Word8) => Tokens s -> m (Tokens s)
|
||||
string = chunk
|
||||
{-# INLINE string #-}
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Helpers
|
||||
|
||||
|
@ -68,7 +68,7 @@ skipBlockComment :: (MonadParsec e s m, Token s ~ Word8)
|
||||
=> Tokens s -- ^ Start of block comment
|
||||
-> Tokens s -- ^ End of block comment
|
||||
-> m ()
|
||||
skipBlockComment start end = p >> void (manyTill anyChar n)
|
||||
skipBlockComment start end = p >> void (manyTill anySingle n)
|
||||
where
|
||||
p = string start
|
||||
n = string end
|
||||
@ -85,7 +85,7 @@ skipBlockCommentNested :: (MonadParsec e s m, Token s ~ Word8)
|
||||
-> m ()
|
||||
skipBlockCommentNested start end = p >> void (manyTill e n)
|
||||
where
|
||||
e = skipBlockCommentNested start end <|> void anyChar
|
||||
e = skipBlockCommentNested start end <|> void anySingle
|
||||
p = string start
|
||||
n = string end
|
||||
{-# INLINEABLE skipBlockCommentNested #-}
|
||||
|
@ -45,14 +45,9 @@ module Text.Megaparsec.Char
|
||||
, latin1Char
|
||||
, charCategory
|
||||
, categoryName
|
||||
-- * More general parsers
|
||||
-- * Single character
|
||||
, char
|
||||
, char'
|
||||
, anyChar
|
||||
, notChar
|
||||
, oneOf
|
||||
, noneOf
|
||||
, satisfy
|
||||
-- * Sequence of characters
|
||||
, string
|
||||
, string' )
|
||||
@ -62,16 +57,9 @@ import Control.Applicative
|
||||
import Data.Char
|
||||
import Data.Function (on)
|
||||
import Data.Functor (void)
|
||||
import Data.List.NonEmpty (NonEmpty (..))
|
||||
import Data.Proxy
|
||||
import Text.Megaparsec
|
||||
import qualified Data.CaseInsensitive as CI
|
||||
import qualified Data.Set as E
|
||||
|
||||
#if !MIN_VERSION_base(4,8,0)
|
||||
import Data.Foldable (Foldable (), elem, notElem)
|
||||
import Prelude hiding (elem, notElem)
|
||||
#endif
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Simple parsers
|
||||
@ -289,17 +277,14 @@ categoryName = \case
|
||||
NotAssigned -> "non-assigned Unicode character"
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- More general parsers
|
||||
-- Single character
|
||||
|
||||
-- | @'char' c@ parses a single character @c@.
|
||||
-- | A type-constrained version of 'single'.
|
||||
--
|
||||
-- > semicolon = char ';'
|
||||
|
||||
char :: MonadParsec e s m => Token s -> m (Token s)
|
||||
char c = token testChar expected
|
||||
where
|
||||
testChar x = if x == c then Just x else Nothing
|
||||
expected = E.singleton (Tokens (c:|[]))
|
||||
char :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
|
||||
char = single
|
||||
{-# INLINE char #-}
|
||||
|
||||
-- | The same as 'char' but case-insensitive. This parser returns the
|
||||
@ -321,87 +306,13 @@ char' c = choice [char c, char (swapCase c)]
|
||||
| otherwise = x
|
||||
{-# INLINE char' #-}
|
||||
|
||||
-- | This parser succeeds for any character. Returns the parsed character.
|
||||
|
||||
anyChar :: MonadParsec e s m => m (Token s)
|
||||
anyChar = satisfy (const True) <?> "character"
|
||||
{-# INLINE anyChar #-}
|
||||
|
||||
-- | Match any character but the given one. It's a good idea to attach a
|
||||
-- 'label' to this parser manually.
|
||||
--
|
||||
-- @since 6.0.0
|
||||
|
||||
notChar :: MonadParsec e s m => Token s -> m (Token s)
|
||||
notChar c = satisfy (/= c)
|
||||
{-# INLINE notChar #-}
|
||||
|
||||
-- | @'oneOf' cs@ succeeds if the current character is in the supplied
|
||||
-- collection of characters @cs@. Returns the parsed character. Note that
|
||||
-- this parser cannot automatically generate the “expected” component of
|
||||
-- error message, so usually you should label it manually with 'label' or
|
||||
-- ('<?>').
|
||||
--
|
||||
-- See also: 'satisfy'.
|
||||
--
|
||||
-- > digit = oneOf ['0'..'9'] <?> "digit"
|
||||
--
|
||||
-- __Performance note__: prefer 'satisfy' when you can because it's faster
|
||||
-- when you have only a couple of tokens to compare to:
|
||||
--
|
||||
-- > quoteFast = satisfy (\x -> x == '\'' || x == '\"')
|
||||
-- > quoteSlow = oneOf "'\""
|
||||
|
||||
oneOf :: (Foldable f, MonadParsec e s m)
|
||||
=> f (Token s)
|
||||
-> m (Token s)
|
||||
oneOf cs = satisfy (`elem` cs)
|
||||
{-# INLINE oneOf #-}
|
||||
|
||||
-- | As the dual of 'oneOf', @'noneOf' cs@ succeeds if the current character
|
||||
-- /not/ in the supplied list of characters @cs@. Returns the parsed
|
||||
-- character. Note that this parser cannot automatically generate the
|
||||
-- “expected” component of error message, so usually you should label it
|
||||
-- manually with 'label' or ('<?>').
|
||||
--
|
||||
-- See also: 'satisfy'.
|
||||
--
|
||||
-- __Performance note__: prefer 'satisfy' and 'notChar' when you can because
|
||||
-- it's faster.
|
||||
|
||||
noneOf :: (Foldable f, MonadParsec e s m)
|
||||
=> f (Token s)
|
||||
-> m (Token s)
|
||||
noneOf cs = satisfy (`notElem` cs)
|
||||
{-# INLINE noneOf #-}
|
||||
|
||||
-- | The parser @'satisfy' f@ succeeds for any character for which the
|
||||
-- supplied function @f@ returns 'True'. Returns the character that is
|
||||
-- actually parsed.
|
||||
--
|
||||
-- > digitChar = satisfy isDigit <?> "digit"
|
||||
-- > oneOf cs = satisfy (`elem` cs)
|
||||
|
||||
satisfy :: MonadParsec e s m
|
||||
=> (Token s -> Bool) -- ^ Predicate to apply
|
||||
-> m (Token s)
|
||||
satisfy f = token testChar E.empty
|
||||
where
|
||||
testChar x = if f x then Just x else Nothing
|
||||
{-# INLINE satisfy #-}
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Sequence of characters
|
||||
|
||||
-- | @'string' s@ parses a sequence of characters given by @s@. Returns the
|
||||
-- parsed string (i.e. @s@).
|
||||
--
|
||||
-- > divOrMod = string "div" <|> string "mod"
|
||||
-- | A type-constrained version of 'chunk'.
|
||||
|
||||
string :: MonadParsec e s m
|
||||
=> Tokens s
|
||||
-> m (Tokens s)
|
||||
string = tokens (==)
|
||||
string :: (MonadParsec e s m, Token s ~ Char) => Tokens s -> m (Tokens s)
|
||||
string = chunk
|
||||
{-# INLINE string #-}
|
||||
|
||||
-- | The same as 'string', but case-insensitive. On success returns string
|
||||
|
@ -143,7 +143,7 @@ lexeme spc p = p <* spc
|
||||
-- > colon = symbol ":"
|
||||
-- > dot = symbol "."
|
||||
|
||||
symbol :: MonadParsec e s m
|
||||
symbol :: (MonadParsec e s m, Token s ~ Char)
|
||||
=> m () -- ^ How to consume white space after lexeme
|
||||
-> Tokens s -- ^ Symbol to parse
|
||||
-> m (Tokens s)
|
||||
@ -179,7 +179,7 @@ skipBlockComment :: (MonadParsec e s m, Token s ~ Char)
|
||||
=> Tokens s -- ^ Start of block comment
|
||||
-> Tokens s -- ^ End of block comment
|
||||
-> m ()
|
||||
skipBlockComment start end = p >> void (manyTill C.anyChar n)
|
||||
skipBlockComment start end = p >> void (manyTill anySingle n)
|
||||
where
|
||||
p = C.string start
|
||||
n = C.string end
|
||||
@ -196,7 +196,7 @@ skipBlockCommentNested :: (MonadParsec e s m, Token s ~ Char)
|
||||
-> m ()
|
||||
skipBlockCommentNested start end = p >> void (manyTill e n)
|
||||
where
|
||||
e = skipBlockCommentNested start end <|> void C.anyChar
|
||||
e = skipBlockCommentNested start end <|> void anySingle
|
||||
p = C.string start
|
||||
n = C.string end
|
||||
{-# INLINEABLE skipBlockCommentNested #-}
|
||||
@ -397,9 +397,9 @@ charLiteral :: (MonadParsec e s m, Token s ~ Char) => m Char
|
||||
charLiteral = label "literal character" $ do
|
||||
-- The @~@ is needed to avoid requiring a MonadFail constraint,
|
||||
-- and we do know that r will be non-empty if count' succeeds.
|
||||
~r@(x:_) <- lookAhead $ count' 1 8 C.anyChar
|
||||
~r@(x:_) <- lookAhead (count' 1 8 anySingle)
|
||||
case listToMaybe (Char.readLitChar r) of
|
||||
Just (c, r') -> count (length r - length r') C.anyChar >> return c
|
||||
Just (c, r') -> c <$ count (length r - length r') anySingle
|
||||
Nothing -> unexpected (Tokens (x:|[]))
|
||||
{-# INLINEABLE charLiteral #-}
|
||||
|
||||
|
@ -8,8 +8,8 @@
|
||||
-- Portability : portable
|
||||
--
|
||||
-- Textual source position. The position includes name of file, line number,
|
||||
-- and column number. List of such positions can be used to model a stack of
|
||||
-- include files.
|
||||
-- and column number. A non-empty list of such positions can be used to
|
||||
-- model a stack of include files.
|
||||
--
|
||||
-- You probably do not want to import this module directly because
|
||||
-- "Text.Megaparsec" re-exports it anyway.
|
||||
|
@ -104,15 +104,6 @@ spec = do
|
||||
describe "spaceChar" $
|
||||
checkCharRange "white space" [9,10,11,12,13,32,160] spaceChar
|
||||
|
||||
-- describe "upperChar" $
|
||||
-- checkCharPred "uppercase letter" (isUpper . toChar) upperChar
|
||||
|
||||
-- describe "lowerChar" $
|
||||
-- checkCharPred "lowercase letter" (isLower . toChar) lowerChar
|
||||
|
||||
-- describe "letterChar" $
|
||||
-- checkCharPred "letter" (isAlpha . toChar) letterChar
|
||||
|
||||
describe "alphaNumChar" $
|
||||
checkCharPred "alphanumeric character" (isAlphaNum . toChar) alphaNumChar
|
||||
|
||||
@ -128,9 +119,6 @@ spec = do
|
||||
describe "hexDigitChar" $
|
||||
checkCharRange "hexadecimal digit" ([48..57] ++ [97..102] ++ [65..70]) hexDigitChar
|
||||
|
||||
-- describe "asciiChar" $
|
||||
-- checkCharPred "ASCII character" (isAscii . toChar) asciiChar
|
||||
|
||||
describe "char'" $ do
|
||||
context "when stream begins with the character specified as argument" $
|
||||
it "parses the character" $
|
||||
|
@ -206,7 +206,8 @@ spec = do
|
||||
|
||||
describe "char'" $ do
|
||||
let goodChar x =
|
||||
(toUpper x == toLower x) || (isUpper x || isLower x)
|
||||
(toUpper x == toLower x) ||
|
||||
(((toUpper x == x) || (toLower x == x)) && (isUpper x || isLower x))
|
||||
context "when stream begins with the character specified as argument" $
|
||||
it "parses the character" $
|
||||
property $ \ch s -> goodChar ch ==> do
|
||||
@ -229,75 +230,6 @@ spec = do
|
||||
let ms = ueof <> etok (toLower ch) <> etok (toUpper ch)
|
||||
prs (char' ch) "" `shouldFailWith` err posI ms
|
||||
|
||||
describe "anyChar" $ do
|
||||
context "when stream is not empty" $
|
||||
it "succeeds consuming next character in the stream" $
|
||||
property $ \ch s -> do
|
||||
let s' = ch : s
|
||||
prs anyChar s' `shouldParse` ch
|
||||
prs' anyChar s' `succeedsLeaving` s
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
prs anyChar "" `shouldFailWith` err posI (ueof <> elabel "character")
|
||||
|
||||
describe "notChar" $ do
|
||||
context "when stream begins with the character specified as argument" $
|
||||
it "signals correct parse error" $
|
||||
property $ \ch s' -> do
|
||||
let p = notChar ch
|
||||
s = ch : s'
|
||||
prs p s `shouldFailWith` err posI (utok ch)
|
||||
prs' p s `failsLeaving` s
|
||||
context "when stream does not begin with the character specified as argument" $
|
||||
it "parses first character in the stream" $
|
||||
property $ \ch s -> not (null s) && ch /= head s ==> do
|
||||
let p = notChar ch
|
||||
prs p s `shouldParse` head s
|
||||
prs' p s `succeedsLeaving` tail s
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
prs (notChar 'a') "" `shouldFailWith` err posI ueof
|
||||
|
||||
describe "oneOf" $ do
|
||||
context "when stream begins with one of specified characters" $
|
||||
it "parses the character" $
|
||||
property $ \chs' n s -> do
|
||||
let chs = getNonEmpty chs'
|
||||
ch = chs !! (getNonNegative n `rem` length chs)
|
||||
s' = ch : s
|
||||
prs (oneOf chs) s' `shouldParse` ch
|
||||
prs' (oneOf chs) s' `succeedsLeaving` s
|
||||
context "when stream does not begin with any of specified characters" $
|
||||
it "signals correct parse error" $
|
||||
property $ \chs ch s -> ch `notElem` (chs :: String) ==> do
|
||||
let s' = ch : s
|
||||
prs (oneOf chs) s' `shouldFailWith` err posI (utok ch)
|
||||
prs' (oneOf chs) s' `failsLeaving` s'
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
property $ \chs ->
|
||||
prs (oneOf (chs :: String)) "" `shouldFailWith` err posI ueof
|
||||
|
||||
describe "noneOf" $ do
|
||||
context "when stream does not begin with any of specified characters" $
|
||||
it "parses the character" $
|
||||
property $ \chs ch s -> ch `notElem` (chs :: String) ==> do
|
||||
let s' = ch : s
|
||||
prs (noneOf chs) s' `shouldParse` ch
|
||||
prs' (noneOf chs) s' `succeedsLeaving` s
|
||||
context "when stream begins with one of specified characters" $
|
||||
it "signals correct parse error" $
|
||||
property $ \chs' n s -> do
|
||||
let chs = getNonEmpty chs'
|
||||
ch = chs !! (getNonNegative n `rem` length chs)
|
||||
s' = ch : s
|
||||
prs (noneOf chs) s' `shouldFailWith` err posI (utok ch)
|
||||
prs' (noneOf chs) s' `failsLeaving` s'
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
property $ \chs ->
|
||||
prs (noneOf (chs :: String)) "" `shouldFailWith` err posI ueof
|
||||
|
||||
describe "string" $ do
|
||||
context "when stream is prefixed with given string" $
|
||||
it "parses the string" $
|
||||
|
@ -28,24 +28,24 @@ spec = do
|
||||
chunkToTokens sproxy (tokensToChunk sproxy ts) === ts
|
||||
describe "chunkToTokens" $
|
||||
it "chunk is isomorphic to list of tokens" $
|
||||
property $ \chunk ->
|
||||
tokensToChunk sproxy (chunkToTokens sproxy chunk) === chunk
|
||||
property $ \chk ->
|
||||
tokensToChunk sproxy (chunkToTokens sproxy chk) === chk
|
||||
describe "chunkLength" $
|
||||
it "returns correct length of given chunk" $
|
||||
property $ \chunk ->
|
||||
chunkLength sproxy chunk === length chunk
|
||||
property $ \chk ->
|
||||
chunkLength sproxy chk === length chk
|
||||
describe "chunkEmpty" $
|
||||
it "only true when chunkLength returns 0" $
|
||||
property $ \chunk ->
|
||||
chunkEmpty sproxy chunk === (chunkLength sproxy chunk <= 0)
|
||||
property $ \chk ->
|
||||
chunkEmpty sproxy chk === (chunkLength sproxy chk <= 0)
|
||||
describe "positionAt1" $
|
||||
it "just returns the given position" $
|
||||
property $ \pos t ->
|
||||
positionAt1 sproxy pos t === pos
|
||||
describe "positionAtN" $
|
||||
it "just returns the given position" $
|
||||
property $ \pos chunk ->
|
||||
positionAtN sproxy pos chunk === pos
|
||||
property $ \pos chk ->
|
||||
positionAtN sproxy pos chk === pos
|
||||
describe "advance1" $ do
|
||||
context "when given newline" $
|
||||
it "works correctly" $
|
||||
@ -102,24 +102,24 @@ spec = do
|
||||
chunkToTokens bproxy (tokensToChunk bproxy ts) === ts
|
||||
describe "chunkToTokens" $
|
||||
it "chunk is isomorphic to list of tokens" $
|
||||
property $ \chunk ->
|
||||
tokensToChunk bproxy (chunkToTokens bproxy chunk) === chunk
|
||||
property $ \chk ->
|
||||
tokensToChunk bproxy (chunkToTokens bproxy chk) === chk
|
||||
describe "chunkLength" $
|
||||
it "returns correct length of given chunk" $
|
||||
property $ \chunk ->
|
||||
chunkLength bproxy chunk === B.length chunk
|
||||
property $ \chk ->
|
||||
chunkLength bproxy chk === B.length chk
|
||||
describe "chunkEmpty" $
|
||||
it "only true when chunkLength returns 0" $
|
||||
property $ \chunk ->
|
||||
chunkEmpty bproxy chunk === (chunkLength bproxy chunk <= 0)
|
||||
property $ \chk ->
|
||||
chunkEmpty bproxy chk === (chunkLength bproxy chk <= 0)
|
||||
describe "positionAt1" $
|
||||
it "just returns the given position" $
|
||||
property $ \pos t ->
|
||||
positionAt1 bproxy pos t === pos
|
||||
describe "positionAtN" $
|
||||
it "just returns the given position" $
|
||||
property $ \pos chunk ->
|
||||
positionAtN bproxy pos chunk === pos
|
||||
property $ \pos chk ->
|
||||
positionAtN bproxy pos chk === pos
|
||||
describe "advance1" $ do
|
||||
context "when given newline" $
|
||||
it "works correctly" $
|
||||
@ -177,24 +177,24 @@ spec = do
|
||||
chunkToTokens blproxy (tokensToChunk blproxy ts) === ts
|
||||
describe "chunkToTokens" $
|
||||
it "chunk is isomorphic to list of tokens" $
|
||||
property $ \chunk ->
|
||||
tokensToChunk blproxy (chunkToTokens blproxy chunk) === chunk
|
||||
property $ \chk ->
|
||||
tokensToChunk blproxy (chunkToTokens blproxy chk) === chk
|
||||
describe "chunkLength" $
|
||||
it "returns correct length of given chunk" $
|
||||
property $ \chunk ->
|
||||
chunkLength blproxy chunk === fromIntegral (BL.length chunk)
|
||||
property $ \chk ->
|
||||
chunkLength blproxy chk === fromIntegral (BL.length chk)
|
||||
describe "chunkEmpty" $
|
||||
it "only true when chunkLength returns 0" $
|
||||
property $ \chunk ->
|
||||
chunkEmpty blproxy chunk === (chunkLength blproxy chunk <= 0)
|
||||
property $ \chk ->
|
||||
chunkEmpty blproxy chk === (chunkLength blproxy chk <= 0)
|
||||
describe "positionAt1" $
|
||||
it "just returns the given position" $
|
||||
property $ \pos t ->
|
||||
positionAt1 blproxy pos t === pos
|
||||
describe "positionAtN" $
|
||||
it "just returns the given position" $
|
||||
property $ \pos chunk ->
|
||||
positionAtN blproxy pos chunk === pos
|
||||
property $ \pos chk ->
|
||||
positionAtN blproxy pos chk === pos
|
||||
describe "advance1" $ do
|
||||
context "when given newline" $
|
||||
it "works correctly" $
|
||||
@ -252,24 +252,24 @@ spec = do
|
||||
chunkToTokens tproxy (tokensToChunk tproxy ts) === ts
|
||||
describe "chunkToTokens" $
|
||||
it "chunk is isomorphic to list of tokens" $
|
||||
property $ \chunk ->
|
||||
tokensToChunk tproxy (chunkToTokens tproxy chunk) === chunk
|
||||
property $ \chk ->
|
||||
tokensToChunk tproxy (chunkToTokens tproxy chk) === chk
|
||||
describe "chunkLength" $
|
||||
it "returns correct length of given chunk" $
|
||||
property $ \chunk ->
|
||||
chunkLength tproxy chunk === T.length chunk
|
||||
property $ \chk ->
|
||||
chunkLength tproxy chk === T.length chk
|
||||
describe "chunkEmpty" $
|
||||
it "only true when chunkLength returns 0" $
|
||||
property $ \chunk ->
|
||||
chunkEmpty tproxy chunk === (chunkLength tproxy chunk <= 0)
|
||||
property $ \chk ->
|
||||
chunkEmpty tproxy chk === (chunkLength tproxy chk <= 0)
|
||||
describe "positionAt1" $
|
||||
it "just returns the given position" $
|
||||
property $ \pos t ->
|
||||
positionAt1 tproxy pos t === pos
|
||||
describe "positionAtN" $
|
||||
it "just returns the given position" $
|
||||
property $ \pos chunk ->
|
||||
positionAtN tproxy pos chunk === pos
|
||||
property $ \pos chk ->
|
||||
positionAtN tproxy pos chk === pos
|
||||
describe "advance1" $ do
|
||||
context "when given newline" $
|
||||
it "works correctly" $
|
||||
@ -326,24 +326,24 @@ spec = do
|
||||
chunkToTokens tlproxy (tokensToChunk tlproxy ts) === ts
|
||||
describe "chunkToTokens" $
|
||||
it "chunk is isomorphic to list of tokens" $
|
||||
property $ \chunk ->
|
||||
tokensToChunk tlproxy (chunkToTokens tlproxy chunk) === chunk
|
||||
property $ \chk ->
|
||||
tokensToChunk tlproxy (chunkToTokens tlproxy chk) === chk
|
||||
describe "chunkLength" $
|
||||
it "returns correct length of given chunk" $
|
||||
property $ \chunk ->
|
||||
chunkLength tlproxy chunk === fromIntegral (TL.length chunk)
|
||||
property $ \chk ->
|
||||
chunkLength tlproxy chk === fromIntegral (TL.length chk)
|
||||
describe "chunkEmpty" $
|
||||
it "only true when chunkLength returns 0" $
|
||||
property $ \chunk ->
|
||||
chunkEmpty tlproxy chunk === (chunkLength tlproxy chunk <= 0)
|
||||
property $ \chk ->
|
||||
chunkEmpty tlproxy chk === (chunkLength tlproxy chk <= 0)
|
||||
describe "positionAt1" $
|
||||
it "just returns the given position" $
|
||||
property $ \pos t ->
|
||||
positionAt1 tlproxy pos t === pos
|
||||
describe "positionAtN" $
|
||||
it "just returns the given position" $
|
||||
property $ \pos chunk ->
|
||||
positionAtN tlproxy pos chunk === pos
|
||||
property $ \pos chk ->
|
||||
positionAtN tlproxy pos chk === pos
|
||||
describe "advance1" $ do
|
||||
context "when given newline" $
|
||||
it "works correctly" $
|
||||
|
@ -38,12 +38,12 @@ import qualified Control.Monad.State.Lazy as L
|
||||
import qualified Control.Monad.State.Strict as S
|
||||
import qualified Control.Monad.Writer.Lazy as L
|
||||
import qualified Control.Monad.Writer.Strict as S
|
||||
import qualified Data.ByteString as BS
|
||||
import qualified Data.List as DL
|
||||
import qualified Data.List.NonEmpty as NE
|
||||
import qualified Data.Semigroup as G
|
||||
import qualified Data.Set as E
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.ByteString as BS
|
||||
|
||||
#if !MIN_VERSION_base(4,8,0)
|
||||
import Control.Applicative hiding (many, some)
|
||||
@ -223,17 +223,17 @@ spec = do
|
||||
describe "equivalence to 'string'" $ do
|
||||
it "for String" $ property $ \s i ->
|
||||
eqParser
|
||||
(string s)
|
||||
(chunk s)
|
||||
(fromString s)
|
||||
(i :: String)
|
||||
it "for Text" $ property $ \s i ->
|
||||
eqParser
|
||||
(string (T.pack s))
|
||||
(chunk (T.pack s))
|
||||
(fromString s)
|
||||
(i :: T.Text)
|
||||
it "for ByteString" $ property $ \s i ->
|
||||
eqParser
|
||||
(string (fromString s :: BS.ByteString))
|
||||
(chunk (fromString s :: BS.ByteString))
|
||||
(fromString s)
|
||||
(i :: BS.ByteString)
|
||||
it "can handle Unicode" $ do
|
||||
@ -314,7 +314,7 @@ spec = do
|
||||
it "parses the string" $
|
||||
property $ \s0 s1 s -> not (s1 `isPrefixOf` s0) ==> do
|
||||
let s' = s0 ++ s
|
||||
p = string s0 <|> string s1
|
||||
p = chunk s0 <|> chunk s1
|
||||
prs p s' `shouldParse` s0
|
||||
prs' p s' `succeedsLeaving` s
|
||||
context "stream begins with the second string" $
|
||||
@ -1212,6 +1212,85 @@ spec = do
|
||||
|
||||
describe "derivatives from primitive combinators" $ do
|
||||
|
||||
-- NOTE 'single' is tested via 'char' in "Text.Megaparsec.Char" and
|
||||
-- "Text.Megaparsec.Byte".
|
||||
|
||||
describe "anySingle" $ do
|
||||
let p :: MonadParsec Void String m => m Char
|
||||
p = anySingle
|
||||
context "when stream is not empty" $
|
||||
it "succeeds consuming next character in the stream" $
|
||||
property $ \ch s -> do
|
||||
let s' = ch : s
|
||||
grs p s' (`shouldParse` ch)
|
||||
grs' p s' (`succeedsLeaving` s)
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
grs p "" (`shouldFailWith` err posI ueof)
|
||||
|
||||
describe "anySingleBut" $ do
|
||||
context "when stream begins with the character specified as argument" $
|
||||
it "signals correct parse error" $
|
||||
property $ \ch s' -> do
|
||||
let p :: MonadParsec Void String m => m Char
|
||||
p = anySingleBut ch
|
||||
s = ch : s'
|
||||
grs p s (`shouldFailWith` err posI (utok ch))
|
||||
grs' p s (`failsLeaving` s)
|
||||
context "when stream does not begin with the character specified as argument" $
|
||||
it "parses first character in the stream" $
|
||||
property $ \ch s -> not (null s) && ch /= head s ==> do
|
||||
let p :: MonadParsec Void String m => m Char
|
||||
p = anySingleBut ch
|
||||
grs p s (`shouldParse` head s)
|
||||
grs' p s (`succeedsLeaving` tail s)
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
grs (anySingleBut 'a') "" (`shouldFailWith` err posI ueof)
|
||||
|
||||
describe "oneOf" $ do
|
||||
context "when stream begins with one of specified characters" $
|
||||
it "parses the character" $
|
||||
property $ \chs' n s -> do
|
||||
let chs = getNonEmpty chs'
|
||||
ch = chs !! (getNonNegative n `rem` length chs)
|
||||
s' = ch : s
|
||||
grs (oneOf chs) s' (`shouldParse` ch)
|
||||
grs' (oneOf chs) s' (`succeedsLeaving` s)
|
||||
context "when stream does not begin with any of specified characters" $
|
||||
it "signals correct parse error" $
|
||||
property $ \chs ch s -> ch `notElem` (chs :: String) ==> do
|
||||
let s' = ch : s
|
||||
grs (oneOf chs) s' (`shouldFailWith` err posI (utok ch))
|
||||
grs' (oneOf chs) s' (`failsLeaving` s')
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
property $ \chs ->
|
||||
grs (oneOf (chs :: String)) "" (`shouldFailWith` err posI ueof)
|
||||
|
||||
describe "noneOf" $ do
|
||||
context "when stream does not begin with any of specified characters" $
|
||||
it "parses the character" $
|
||||
property $ \chs ch s -> ch `notElem` (chs :: String) ==> do
|
||||
let s' = ch : s
|
||||
grs (noneOf chs) s' (`shouldParse` ch)
|
||||
grs' (noneOf chs) s' (`succeedsLeaving` s)
|
||||
context "when stream begins with one of specified characters" $
|
||||
it "signals correct parse error" $
|
||||
property $ \chs' n s -> do
|
||||
let chs = getNonEmpty chs'
|
||||
ch = chs !! (getNonNegative n `rem` length chs)
|
||||
s' = ch : s
|
||||
grs (noneOf chs) s' (`shouldFailWith` err posI (utok ch))
|
||||
grs' (noneOf chs) s' (`failsLeaving` s')
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
property $ \chs ->
|
||||
grs (noneOf (chs :: String)) "" (`shouldFailWith` err posI ueof)
|
||||
|
||||
-- NOTE 'chunk' is tested via 'string' in "Text.Megaparsec.Char" and
|
||||
-- "Text.Megaparsec.Byte".
|
||||
|
||||
describe "unexpected" $
|
||||
it "signals correct parse error" $
|
||||
property $ \item -> do
|
||||
@ -1406,7 +1485,7 @@ spec = do
|
||||
describe "notFollowedBy" $
|
||||
it "generally works" $
|
||||
property $ \a' b' c' -> do
|
||||
let p = many (char =<< ask) <* notFollowedBy eof <* many anyChar
|
||||
let p = many (char =<< ask) <* notFollowedBy eof <* many anySingle
|
||||
[a,b,c] = getNonNegative <$> [a',b',c']
|
||||
s = abcRow a b c
|
||||
if b > 0 || c > 0
|
||||
@ -1435,7 +1514,7 @@ spec = do
|
||||
let p = do
|
||||
L.put n
|
||||
let notEof = notFollowedBy (L.modify (* 2) >> eof)
|
||||
some (try (anyChar <* notEof)) <* char 'x'
|
||||
some (try (anySingle <* notEof)) <* char 'x'
|
||||
prs (L.runStateT p 0) "abx" `shouldParse` ("ab", n :: Integer)
|
||||
|
||||
describe "observing" $ do
|
||||
@ -1475,7 +1554,7 @@ spec = do
|
||||
let p = do
|
||||
S.put n
|
||||
let notEof = notFollowedBy (S.modify (* 2) >> eof)
|
||||
some (try (anyChar <* notEof)) <* char 'x'
|
||||
some (try (anySingle <* notEof)) <* char 'x'
|
||||
prs (S.runStateT p 0) "abx" `shouldParse` ("ab", n :: Integer)
|
||||
|
||||
describe "observing" $ do
|
||||
|
Loading…
Reference in New Issue
Block a user