mirror of
https://github.com/mrkkrp/megaparsec.git
synced 2024-11-26 23:02:07 +03:00
Char and byte modules (#230)
This commit is contained in:
parent
45f30ae7e1
commit
3b9812bf76
@ -33,7 +33,7 @@ script:
|
||||
- travis_wait 60 cabal build
|
||||
- cabal test --show-details=always --test-option=--qc-max-success=1000
|
||||
- cabal sdist
|
||||
- cabal haddock | grep "100%" | wc -l | grep "9"
|
||||
- cabal haddock | grep "100%" | wc -l | grep "10"
|
||||
|
||||
after_script:
|
||||
- export PATH=~/.cabal/bin:$PATH
|
||||
|
21
CHANGELOG.md
21
CHANGELOG.md
@ -5,10 +5,11 @@
|
||||
* Re-organized the module hierarchy. Some modules such as
|
||||
`Text.Megaparsec.Prim` do not exist anymore. Stream definitions were moved
|
||||
to `Text.Megaparsec.Stream`. Generic combinators are now re-exported from
|
||||
the `Control.Applicative.Combinators`. Just import `Text.Megaparsec` and
|
||||
you should be OK. Then add `Text.Megaparsec.Char` if you are working with
|
||||
a stream of `Char`s, then add qualified modules you need (permutation
|
||||
parsing, lexing, expression parsing, etc.).
|
||||
the `Control.Applicative.Combinators` from the package
|
||||
`parser-combinators`. Just import `Text.Megaparsec` and you should be OK.
|
||||
Then add `Text.Megaparsec.Char` if you are working with a stream of
|
||||
`Char`s, then add qualified modules you need (permutation parsing, lexing,
|
||||
expression parsing, etc.).
|
||||
|
||||
* Dropped per-stream modules, the `Parser` type synonym is to be defined
|
||||
manually by user.
|
||||
@ -79,6 +80,18 @@
|
||||
* Added `takeWhileP` and `takeWhile1P` to `MonadParsec`. Added `skipWhileP`,
|
||||
`skipWhile1P` as derivatives from those primitive combinators.
|
||||
|
||||
* Dropped `oneOf'` and `noneOf'` from `Text.Megaparsec.Char`. These were
|
||||
seldom (if ever) used and are easily re-implemented.
|
||||
|
||||
* Added `notChar` in `Text.Megaparsec.Char`.
|
||||
|
||||
* Added `space1` in `Text.Megaprasec.Char`. This parser is like `space` but
|
||||
requires at least one space character to be present to succeed.
|
||||
|
||||
* Added new module `Text.Megaparsec.Byte`, which is similar to
|
||||
`Text.Megaparsec.Char`, but target token type is `Word8` instead of
|
||||
`Char`.
|
||||
|
||||
## Megaparsec 5.3.1
|
||||
|
||||
* Various updates to the docs.
|
||||
|
10
README.md
10
README.md
@ -72,11 +72,11 @@ features some combinators that are missing in other parsing libraries:
|
||||
In addition to that, Megaparsec 6 features high-performance combinators
|
||||
similar to those found in Attoparsec:
|
||||
|
||||
* `tokens` makes it easy to parse several tokens in a row. This is about 100
|
||||
time faster than matching a string token by token. `string` and `string'`
|
||||
are built on top of this combinator. `tokens` returns “chunk” of original
|
||||
input, meaning that if you parse `Text`, it'll return `Text` without any
|
||||
repacking.
|
||||
* `tokens` makes it easy to parse several tokens in a row (`string` and
|
||||
`string'` are built on top of this primitive). This is about 100 times
|
||||
faster than matching a string token by token. `tokens` returns “chunk” of
|
||||
original input, meaning that if you parse `Text`, it'll return `Text`
|
||||
without any repacking.
|
||||
|
||||
* `takeWhile` and `takeWhile1` are about 150 times faster than approaches
|
||||
involving `many`, `manyTill` and other similar combinators.
|
||||
|
@ -15,8 +15,8 @@
|
||||
--
|
||||
-- In addition to the "Text.Megaparsec" module, which exports and re-exports
|
||||
-- most everything that you may need, we advise to import
|
||||
-- "Text.Megaparsec.Char" if you plan to work with a stream of 'Char'
|
||||
-- tokens.
|
||||
-- "Text.Megaparsec.Char" if you plan to work with a stream of 'Char' tokens
|
||||
-- or "Text.Megaparsec.Byte" if you indend to parse binary data.
|
||||
--
|
||||
-- It is common to start working with the library by defining a type synonym
|
||||
-- like this:
|
||||
@ -756,6 +756,8 @@ class (Stream s, A.Alternative m, MonadPlus m)
|
||||
|
||||
-- | Similar to 'takeWhileP', but fails if it can't parse at least one
|
||||
-- token.
|
||||
--
|
||||
-- @since 6.0.0
|
||||
|
||||
takeWhile1P
|
||||
:: Maybe String -- ^ Name for a single token in the row
|
||||
@ -1227,6 +1229,8 @@ region f m = do
|
||||
{-# INLINEABLE region #-}
|
||||
|
||||
-- | The same as 'takeWhileP', but discards the result.
|
||||
--
|
||||
-- @since 6.0.0
|
||||
|
||||
skipWhileP :: MonadParsec e s m
|
||||
=> Maybe String -- ^ Name of a single token in the row
|
||||
@ -1236,6 +1240,8 @@ skipWhileP l f = void (takeWhileP l f)
|
||||
{-# INLINE skipWhileP #-}
|
||||
|
||||
-- | The same as 'takeWhile1P', but discards the result.
|
||||
--
|
||||
-- @since 6.0.0
|
||||
|
||||
skipWhile1P :: MonadParsec e s m
|
||||
=> Maybe String -- ^ Name of a single token in the row
|
||||
|
234
Text/Megaparsec/Byte.hs
Normal file
234
Text/Megaparsec/Byte.hs
Normal file
@ -0,0 +1,234 @@
|
||||
-- |
|
||||
-- Module : Text.Megaparsec.Byte
|
||||
-- Copyright : © 2015–2017 Megaparsec contributors
|
||||
-- License : FreeBSD
|
||||
--
|
||||
-- Maintainer : Mark Karpov <markkarpov92@gmail.com>
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- Commonly used binary parsers.
|
||||
--
|
||||
-- @since 6.0.0
|
||||
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
{-# LANGUAGE TypeFamilies #-}
|
||||
|
||||
module Text.Megaparsec.Byte
|
||||
( -- * Simple parsers
|
||||
newline
|
||||
, crlf
|
||||
, eol
|
||||
, tab
|
||||
, space
|
||||
, space1
|
||||
-- * Categories of characters
|
||||
, controlChar
|
||||
, spaceChar
|
||||
, upperChar
|
||||
, lowerChar
|
||||
, letterChar
|
||||
, alphaNumChar
|
||||
, printChar
|
||||
, digitChar
|
||||
, octDigitChar
|
||||
, hexDigitChar
|
||||
, asciiChar
|
||||
-- * More general parsers
|
||||
, C.char
|
||||
, char'
|
||||
, C.anyChar
|
||||
, C.notChar
|
||||
, C.oneOf
|
||||
, C.noneOf
|
||||
, C.satisfy
|
||||
-- * Sequence of bytes
|
||||
, C.string
|
||||
, C.string' )
|
||||
where
|
||||
|
||||
import Control.Applicative
|
||||
import Data.Char
|
||||
import Data.Proxy
|
||||
import Data.Word (Word8)
|
||||
import qualified Text.Megaparsec.Char as C
|
||||
|
||||
import Text.Megaparsec
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Simple parsers
|
||||
|
||||
-- | Parse a newline byte.
|
||||
|
||||
newline :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
newline = C.char 10
|
||||
{-# INLINE newline #-}
|
||||
|
||||
-- | Parse a carriage return character followed by a newline character.
|
||||
-- Return the sequence of characters parsed.
|
||||
|
||||
crlf :: forall e s m. (MonadParsec e s m, Token s ~ Word8) => m (Tokens s)
|
||||
crlf = C.string (tokensToChunk (Proxy :: Proxy s) [13,10])
|
||||
{-# INLINE crlf #-}
|
||||
|
||||
-- | Parse a CRLF (see 'crlf') or LF (see 'newline') end of line. Return the
|
||||
-- sequence of characters parsed.
|
||||
|
||||
eol :: forall e s m. (MonadParsec e s m, Token s ~ Word8) => m (Tokens s)
|
||||
eol = (tokenToChunk (Proxy :: Proxy s) <$> newline)
|
||||
<|> crlf
|
||||
<?> "end of line"
|
||||
{-# INLINE eol #-}
|
||||
|
||||
-- | Parse a tab character.
|
||||
|
||||
tab :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
tab = C.char 9
|
||||
{-# INLINE tab #-}
|
||||
|
||||
-- | Skip /zero/ or more white space characters.
|
||||
--
|
||||
-- See also: 'skipMany' and 'spaceChar'.
|
||||
|
||||
space :: (MonadParsec e s m, Token s ~ Word8) => m ()
|
||||
space = skipWhileP (Just "white space") isSpace'
|
||||
{-# INLINE space #-}
|
||||
|
||||
-- | Skip /one/ or more white space characters.
|
||||
--
|
||||
-- See also: 'skipSome' and 'spaceChar'.
|
||||
|
||||
space1 :: (MonadParsec e s m, Token s ~ Word8) => m ()
|
||||
space1 = skipWhile1P (Just "white space") isSpace'
|
||||
{-# INLINE space1 #-}
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Categories of characters
|
||||
|
||||
-- | Parse a control character (a non-printing character of the Latin-1
|
||||
-- subset of Unicode).
|
||||
|
||||
controlChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
controlChar = C.satisfy (isControl . toChar) <?> "control character"
|
||||
{-# INLINE controlChar #-}
|
||||
|
||||
-- | Parse a space character, and the control characters: tab, newline,
|
||||
-- carriage return, form feed, and vertical tab.
|
||||
|
||||
spaceChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
spaceChar = C.satisfy isSpace' <?> "white space"
|
||||
{-# INLINE spaceChar #-}
|
||||
|
||||
-- | Parse an upper-case character.
|
||||
|
||||
upperChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
upperChar = C.satisfy (isUpper . toChar) <?> "uppercase letter"
|
||||
{-# INLINE upperChar #-}
|
||||
|
||||
-- | Parse a lower-case alphabetic character.
|
||||
|
||||
lowerChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
lowerChar = C.satisfy (isLower . toChar) <?> "lowercase letter"
|
||||
{-# INLINE lowerChar #-}
|
||||
|
||||
-- | Parse an alphabetic character: lower-case or upper-case.
|
||||
|
||||
letterChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
letterChar = C.satisfy (isLetter . toChar) <?> "letter"
|
||||
{-# INLINE letterChar #-}
|
||||
|
||||
-- | Parse an alphabetic or numeric digit Unicode characters.
|
||||
--
|
||||
-- Note that the numeric digits outside the ASCII range are parsed by this
|
||||
-- parser but not by 'digitChar'. Such digits may be part of identifiers but
|
||||
-- are not used by the printer and reader to represent numbers.
|
||||
|
||||
alphaNumChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
alphaNumChar = C.satisfy (isAlphaNum . toChar) <?> "alphanumeric character"
|
||||
{-# INLINE alphaNumChar #-}
|
||||
|
||||
-- | Parse a printable character: letter, number, mark, punctuation, symbol
|
||||
-- or space.
|
||||
|
||||
printChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
printChar = C.satisfy (isPrint . toChar) <?> "printable character"
|
||||
{-# INLINE printChar #-}
|
||||
|
||||
-- | Parse an ASCII digit, i.e between “0” and “9”.
|
||||
|
||||
digitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
digitChar = C.satisfy isDigit' <?> "digit"
|
||||
where
|
||||
isDigit' x = x >= 48 && x <= 57
|
||||
{-# INLINE digitChar #-}
|
||||
|
||||
-- | Parse an octal digit, i.e. between “0” and “7”.
|
||||
|
||||
octDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
octDigitChar = C.satisfy isOctDigit' <?> "octal digit"
|
||||
where
|
||||
isOctDigit' x = x >= 48 && x <= 55
|
||||
{-# INLINE octDigitChar #-}
|
||||
|
||||
-- | Parse a hexadecimal digit, i.e. between “0” and “9”, or “a” and “f”, or
|
||||
-- “A” and “F”.
|
||||
|
||||
hexDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
hexDigitChar = C.satisfy (isHexDigit . toChar) <?> "hexadecimal digit"
|
||||
{-# INLINE hexDigitChar #-}
|
||||
|
||||
-- | Parse a character from the first 128 characters of the Unicode
|
||||
-- character set, corresponding to the ASCII character set.
|
||||
|
||||
asciiChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
asciiChar = C.satisfy (< 128) <?> "ASCII character"
|
||||
{-# INLINE asciiChar #-}
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- More general parsers
|
||||
|
||||
-- | The same as 'char' but case-insensitive. This parser returns the
|
||||
-- actually parsed character preserving its case.
|
||||
--
|
||||
-- >>> parseTest (char' 'e') "E"
|
||||
-- 'E'
|
||||
-- >>> parseTest (char' 'e') "G"
|
||||
-- 1:1:
|
||||
-- unexpected 'G'
|
||||
-- expecting 'E' or 'e'
|
||||
|
||||
char' :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s)
|
||||
char' c = choice [C.char c, C.char (swapCase c)]
|
||||
where
|
||||
swapCase x
|
||||
| isUpper g = fromChar (toLower g)
|
||||
| isLower g = fromChar (toUpper g)
|
||||
| otherwise = x
|
||||
where
|
||||
g = toChar x
|
||||
{-# INLINE char' #-}
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Helpers
|
||||
|
||||
-- | 'Word8'-specialized version of 'isSpace'.
|
||||
|
||||
isSpace' :: Word8 -> Bool
|
||||
isSpace' x
|
||||
| x >= 9 && x <= 13 = True
|
||||
| x == 32 = True
|
||||
| x == 160 = True
|
||||
| otherwise = False
|
||||
{-# INLINE isSpace' #-}
|
||||
|
||||
-- | Convert a byte to char.
|
||||
|
||||
toChar :: Word8 -> Char
|
||||
toChar = chr . fromIntegral
|
||||
{-# INLINE toChar #-}
|
||||
|
||||
-- | Convert a char to byte.
|
||||
|
||||
fromChar :: Char -> Word8
|
||||
fromChar = fromIntegral . ord
|
||||
{-# INLINE fromChar #-}
|
@ -24,6 +24,7 @@ module Text.Megaparsec.Char
|
||||
, eol
|
||||
, tab
|
||||
, space
|
||||
, space1
|
||||
-- * Categories of characters
|
||||
, controlChar
|
||||
, spaceChar
|
||||
@ -48,10 +49,9 @@ module Text.Megaparsec.Char
|
||||
, char
|
||||
, char'
|
||||
, anyChar
|
||||
, notChar
|
||||
, oneOf
|
||||
, oneOf'
|
||||
, noneOf
|
||||
, noneOf'
|
||||
, satisfy
|
||||
-- * Sequence of characters
|
||||
, string
|
||||
@ -69,8 +69,8 @@ import qualified Data.Set as E
|
||||
import Text.Megaparsec
|
||||
|
||||
#if !MIN_VERSION_base(4,8,0)
|
||||
import Data.Foldable (Foldable (), any, elem, notElem)
|
||||
import Prelude hiding (any, elem, notElem)
|
||||
import Data.Foldable (Foldable (), elem, notElem)
|
||||
import Prelude hiding (elem, notElem)
|
||||
#endif
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
@ -112,6 +112,16 @@ space :: (MonadParsec e s m, Token s ~ Char) => m ()
|
||||
space = skipWhileP (Just "white space") isSpace
|
||||
{-# INLINE space #-}
|
||||
|
||||
-- | Skip /one/ or more white space characters.
|
||||
--
|
||||
-- See also: 'skipSome' and 'spaceChar'.
|
||||
--
|
||||
-- @since 6.0.0
|
||||
|
||||
space1 :: (MonadParsec e s m, Token s ~ Char) => m ()
|
||||
space1 = skipWhile1P (Just "white space") isSpace
|
||||
{-# INLINE space1 #-}
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Categories of characters
|
||||
|
||||
@ -285,7 +295,7 @@ categoryName = \case
|
||||
--
|
||||
-- > semicolon = char ';'
|
||||
|
||||
char :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
|
||||
char :: MonadParsec e s m => Token s -> m (Token s)
|
||||
char c = token testChar (Just c)
|
||||
where
|
||||
f x = Tokens (x:|[])
|
||||
@ -306,7 +316,7 @@ char c = token testChar (Just c)
|
||||
-- expecting 'E' or 'e'
|
||||
|
||||
char' :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
|
||||
char' c = choice [char c, char $ swapCase c]
|
||||
char' c = choice [char c, char (swapCase c)]
|
||||
where
|
||||
swapCase x
|
||||
| isUpper x = toLower x
|
||||
@ -316,10 +326,19 @@ char' c = choice [char c, char $ swapCase c]
|
||||
|
||||
-- | This parser succeeds for any character. Returns the parsed character.
|
||||
|
||||
anyChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
|
||||
anyChar :: MonadParsec e s m => m (Token s)
|
||||
anyChar = satisfy (const True) <?> "character"
|
||||
{-# INLINE anyChar #-}
|
||||
|
||||
-- | Match any character but the given one. It's a good idea to attach a
|
||||
-- 'label' to this parser manually.
|
||||
--
|
||||
-- @since 6.0.0
|
||||
|
||||
notChar :: MonadParsec e s m => Token s -> m (Token s)
|
||||
notChar c = satisfy (/= c)
|
||||
{-# INLINE notChar #-}
|
||||
|
||||
-- | @'oneOf' cs@ succeeds if the current character is in the supplied
|
||||
-- collection of characters @cs@. Returns the parsed character. Note that
|
||||
-- this parser cannot automatically generate the “expected” component of
|
||||
@ -329,43 +348,36 @@ anyChar = satisfy (const True) <?> "character"
|
||||
-- See also: 'satisfy'.
|
||||
--
|
||||
-- > digit = oneOf ['0'..'9'] <?> "digit"
|
||||
--
|
||||
-- __Performance note__: prefer 'satisfy' when you can because it's faster
|
||||
-- when you have only a couple of tokens to compare to:
|
||||
--
|
||||
-- > quoteFast = satisfy (\x -> x == '\'' || x == '\"')
|
||||
-- > quoteSlow = oneOf "'\""
|
||||
|
||||
oneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char)
|
||||
=> f (Token s) -> m (Token s)
|
||||
oneOf :: (Foldable f, MonadParsec e s m)
|
||||
=> f (Token s)
|
||||
-> m (Token s)
|
||||
oneOf cs = satisfy (`elem` cs)
|
||||
{-# INLINE oneOf #-}
|
||||
|
||||
-- | The same as 'oneOf', but case-insensitive. Returns the parsed character
|
||||
-- preserving its case.
|
||||
--
|
||||
-- > vowel = oneOf' "aeiou" <?> "vowel"
|
||||
|
||||
oneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char)
|
||||
=> f (Token s)
|
||||
-> m (Token s)
|
||||
oneOf' cs = satisfy (`elemi` cs)
|
||||
{-# INLINE oneOf' #-}
|
||||
|
||||
-- | As the dual of 'oneOf', @noneOf cs@ succeeds if the current character
|
||||
-- /not/ in the supplied list of characters @cs@. Returns the parsed
|
||||
-- character.
|
||||
-- character. Note that this parser cannot automatically generate the
|
||||
-- “expected” component of error message, so usually you should label it
|
||||
-- manually with 'label' or ('<?>').
|
||||
--
|
||||
-- See also: 'satisfy'.
|
||||
--
|
||||
-- __Performance note__: prefer 'satisfy' and 'notChar' when you can because
|
||||
-- it's faster.
|
||||
|
||||
noneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char)
|
||||
noneOf :: (Foldable f, MonadParsec e s m)
|
||||
=> f (Token s)
|
||||
-> m (Token s)
|
||||
noneOf cs = satisfy (`notElem` cs)
|
||||
{-# INLINE noneOf #-}
|
||||
|
||||
-- | The same as 'noneOf', but case-insensitive.
|
||||
--
|
||||
-- > consonant = noneOf' "aeiou" <?> "consonant"
|
||||
|
||||
noneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char)
|
||||
=> f (Token s)
|
||||
-> m (Token s)
|
||||
noneOf' cs = satisfy (`notElemi` cs)
|
||||
{-# INLINE noneOf' #-}
|
||||
|
||||
-- | The parser @'satisfy' f@ succeeds for any character for which the
|
||||
-- supplied function @f@ returns 'True'. Returns the character that is
|
||||
-- actually parsed.
|
||||
@ -373,7 +385,7 @@ noneOf' cs = satisfy (`notElemi` cs)
|
||||
-- > digitChar = satisfy isDigit <?> "digit"
|
||||
-- > oneOf cs = satisfy (`elem` cs)
|
||||
|
||||
satisfy :: (MonadParsec e s m, Token s ~ Char)
|
||||
satisfy :: MonadParsec e s m
|
||||
=> (Token s -> Bool) -- ^ Predicate to apply
|
||||
-> m (Token s)
|
||||
satisfy f = token testChar Nothing
|
||||
@ -409,24 +421,3 @@ string' :: (MonadParsec e s m, CI.FoldCase (Tokens s))
|
||||
-> m (Tokens s)
|
||||
string' = tokens ((==) `on` CI.mk)
|
||||
{-# INLINE string' #-}
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Helpers
|
||||
|
||||
-- | Case-insensitive equality test for characters.
|
||||
|
||||
casei :: Char -> Char -> Bool
|
||||
casei x y = toUpper x == toUpper y
|
||||
{-# INLINE casei #-}
|
||||
|
||||
-- | Case-insensitive 'elem'.
|
||||
|
||||
elemi :: Foldable f => Char -> f Char -> Bool
|
||||
elemi = any . casei
|
||||
{-# INLINE elemi #-}
|
||||
|
||||
-- | Case-insensitive 'notElem'.
|
||||
|
||||
notElemi :: Foldable f => Char -> f Char -> Bool
|
||||
notElemi c = not . elemi c
|
||||
{-# INLINE notElemi #-}
|
||||
|
@ -36,6 +36,7 @@ where
|
||||
|
||||
import Control.DeepSeq
|
||||
import Control.Exception
|
||||
import Data.Char (chr)
|
||||
import Data.Data (Data)
|
||||
import Data.List (intercalate)
|
||||
import Data.List.NonEmpty (NonEmpty (..))
|
||||
@ -44,6 +45,7 @@ import Data.Semigroup
|
||||
import Data.Set (Set)
|
||||
import Data.Typeable (Typeable)
|
||||
import Data.Void
|
||||
import Data.Word (Word8)
|
||||
import GHC.Generics
|
||||
import Prelude hiding (concat)
|
||||
import qualified Data.List.NonEmpty as NE
|
||||
@ -199,6 +201,9 @@ class ShowToken a where
|
||||
instance ShowToken Char where
|
||||
showTokens = stringPretty
|
||||
|
||||
instance ShowToken Word8 where
|
||||
showTokens = stringPretty . fmap (chr . fromIntegral)
|
||||
|
||||
-- | The type class defines how to print custom data component of
|
||||
-- 'ParseError'.
|
||||
--
|
||||
|
@ -68,16 +68,18 @@ import qualified Text.Megaparsec.Char as C
|
||||
----------------------------------------------------------------------------
|
||||
-- White space
|
||||
|
||||
-- | @space spaceChar lineComment blockComment@ produces parser that can
|
||||
-- parse white space in general. It's expected that you create such a parser
|
||||
-- once and pass it to other functions in this module as needed (when you
|
||||
-- see @spaceConsumer@ in documentation, usually it means that something
|
||||
-- like 'space' is expected there).
|
||||
-- | @space sc lineComment blockComment@ produces parser that can parse
|
||||
-- white space in general. It's expected that you create such a parser once
|
||||
-- and pass it to other functions in this module as needed (when you see
|
||||
-- @spaceConsumer@ in documentation, usually it means that something like
|
||||
-- 'space' is expected there).
|
||||
--
|
||||
-- @spaceChar@ is used to parse trivial space characters. You can use
|
||||
-- 'C.spaceChar' from "Text.Megaparsec.Char" for this purpose as well as
|
||||
-- your own parser (if you don't want to automatically consume newlines, for
|
||||
-- example).
|
||||
-- @sc@ is used to parse blocks of space characters. You can use 'C.space1'
|
||||
-- from "Text.Megaparsec.Char" for this purpose as well as your own parser
|
||||
-- (if you don't want to automatically consume newlines, for example). Make
|
||||
-- sure the parser does not succeed on empty input though. In earlier
|
||||
-- version 'C.spaceChar' was recommended, but now parsers based on
|
||||
-- 'takeWhile1P' are preferred because of their speed.
|
||||
--
|
||||
-- @lineComment@ is used to parse line comments. You can use
|
||||
-- 'skipLineComment' if you don't need anything special.
|
||||
@ -94,7 +96,8 @@ import qualified Text.Megaparsec.Char as C
|
||||
-- of the file).
|
||||
|
||||
space :: MonadParsec e s m
|
||||
=> m () -- ^ A parser for a space character (e.g. @'void' 'C.spaceChar'@)
|
||||
=> m () -- ^ A parser for space characters which does not accept empty
|
||||
-- input (e.g. 'C.space1')
|
||||
-> m () -- ^ A parser for a line comment (e.g. 'skipLineComment')
|
||||
-> m () -- ^ A parser for a block comment (e.g. 'skipBlockComment')
|
||||
-> m ()
|
||||
@ -128,7 +131,7 @@ lexeme spc p = p <* spc
|
||||
-- > colon = symbol ":"
|
||||
-- > dot = symbol "."
|
||||
|
||||
symbol :: (MonadParsec e s m, Token s ~ Char)
|
||||
symbol :: MonadParsec e s m
|
||||
=> m () -- ^ How to consume white space after lexeme
|
||||
-> Tokens s -- ^ String to parse
|
||||
-> m (Tokens s)
|
||||
@ -137,7 +140,7 @@ symbol spc = lexeme spc . C.string
|
||||
-- | Case-insensitive version of 'symbol'. This may be helpful if you're
|
||||
-- working with case-insensitive languages.
|
||||
|
||||
symbol' :: (MonadParsec e s m, Token s ~ Char, CI.FoldCase (Tokens s))
|
||||
symbol' :: (MonadParsec e s m, CI.FoldCase (Tokens s))
|
||||
=> m () -- ^ How to consume white space after lexeme
|
||||
-> Tokens s -- ^ String to parse (case-insensitive)
|
||||
-> m (Tokens s)
|
||||
|
@ -49,6 +49,7 @@ library
|
||||
if !impl(ghc >= 7.10)
|
||||
build-depends: void == 0.7.*
|
||||
exposed-modules: Text.Megaparsec
|
||||
, Text.Megaparsec.Byte
|
||||
, Text.Megaparsec.Char
|
||||
, Text.Megaparsec.Error
|
||||
, Text.Megaparsec.Error.Builder
|
||||
@ -74,6 +75,7 @@ test-suite tests
|
||||
other-modules: Control.Applicative.CombinatorsSpec
|
||||
, Test.Hspec.Megaparsec
|
||||
, Test.Hspec.Megaparsec.AdHoc
|
||||
, Text.Megaparsec.ByteSpec
|
||||
, Text.Megaparsec.CharSpec
|
||||
, Text.Megaparsec.ErrorSpec
|
||||
, Text.Megaparsec.ExprSpec
|
||||
|
247
tests/Text/Megaparsec/ByteSpec.hs
Normal file
247
tests/Text/Megaparsec/ByteSpec.hs
Normal file
@ -0,0 +1,247 @@
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Text.Megaparsec.ByteSpec (spec) where
|
||||
|
||||
import Control.Monad
|
||||
import Data.ByteString (ByteString)
|
||||
import Data.Char
|
||||
import Data.Proxy
|
||||
import Data.Semigroup ((<>))
|
||||
import Data.Void
|
||||
import Data.Word (Word8)
|
||||
import Test.Hspec
|
||||
import Test.Hspec.Megaparsec
|
||||
import Test.Hspec.Megaparsec.AdHoc (nes)
|
||||
import Test.QuickCheck
|
||||
import Text.Megaparsec
|
||||
import Text.Megaparsec.Byte
|
||||
import qualified Data.ByteString as B
|
||||
|
||||
#if !MIN_VERSION_base(4,8,0)
|
||||
import Control.Applicative
|
||||
#endif
|
||||
|
||||
type Parser = Parsec Void ByteString
|
||||
|
||||
spec :: Spec
|
||||
spec = do
|
||||
|
||||
describe "newline" $
|
||||
checkStrLit "newline" "\n" (tokenToChunk bproxy <$> newline)
|
||||
|
||||
describe "csrf" $
|
||||
checkStrLit "crlf newline" "\r\n" crlf
|
||||
|
||||
describe "eol" $ do
|
||||
context "when stream begins with a newline" $
|
||||
it "succeeds returning the newline" $
|
||||
property $ \s -> do
|
||||
let s' = "\n" <> s
|
||||
prs eol s' `shouldParse` "\n"
|
||||
prs' eol s' `succeedsLeaving` s
|
||||
context "when stream begins with CRLF sequence" $
|
||||
it "parses the CRLF sequence" $
|
||||
property $ \s -> do
|
||||
let s' = "\r\n" <> s
|
||||
prs eol s' `shouldParse` "\r\n"
|
||||
prs' eol s' `succeedsLeaving` s
|
||||
context "when stream begins with '\\r', but it's not followed by '\\n'" $
|
||||
it "signals correct parse error" $
|
||||
property $ \ch -> ch /= 10 ==> do
|
||||
let s = "\r" <> B.singleton ch
|
||||
prs eol s `shouldFailWith`
|
||||
err posI (utoks (B.unpack s) <> elabel "end of line")
|
||||
context "when input stream is '\\r'" $
|
||||
it "signals correct parse error" $
|
||||
prs eol "\r" `shouldFailWith` err posI
|
||||
(utok 13 <> elabel "end of line")
|
||||
context "when stream does not begin with newline or CRLF sequence" $
|
||||
it "signals correct parse error" $
|
||||
property $ \ch s -> (ch /= 13 && ch /= 10) ==> do
|
||||
let s' = B.singleton ch <> s
|
||||
prs eol s' `shouldFailWith` err posI
|
||||
(utoks (B.unpack $ B.take 2 s') <> elabel "end of line")
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
prs eol "" `shouldFailWith` err posI
|
||||
(ueof <> elabel "end of line")
|
||||
|
||||
describe "tab" $
|
||||
checkStrLit "tab" "\t" (tokenToChunk bproxy <$> tab)
|
||||
|
||||
describe "space" $
|
||||
it "consumes space up to first non-space character" $
|
||||
property $ \s' -> do
|
||||
let (s0,s1) = B.partition isSpace' s'
|
||||
s = s0 <> s1
|
||||
prs space s `shouldParse` ()
|
||||
prs' space s `succeedsLeaving` s1
|
||||
|
||||
describe "space1" $ do
|
||||
context "when stream does not start with a space character" $
|
||||
it "signals correct parse error" $
|
||||
property $ \ch s' -> not (isSpace' ch) ==> do
|
||||
let (s0,s1) = B.partition isSpace' s'
|
||||
s = B.singleton ch <> s0 <> s1
|
||||
prs space1 s `shouldFailWith` err posI (utok ch <> elabel "white space")
|
||||
prs' space1 s `failsLeaving` s
|
||||
context "when stream starts with a space character" $
|
||||
it "consumes space up to first non-space character" $
|
||||
property $ \s' -> do
|
||||
let (s0,s1) = B.partition isSpace' s'
|
||||
s = " " <> s0 <> s1
|
||||
prs space1 s `shouldParse` ()
|
||||
prs' space1 s `succeedsLeaving` s1
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
prs space1 "" `shouldFailWith` err posI (ueof <> elabel "white space")
|
||||
|
||||
describe "controlChar" $
|
||||
checkCharPred "control character" (isControl . toChar) controlChar
|
||||
|
||||
describe "spaceChar" $
|
||||
checkCharRange "white space" [9,10,11,12,13,32,160] spaceChar
|
||||
|
||||
-- describe "upperChar" $
|
||||
-- checkCharPred "uppercase letter" (isUpper . toChar) upperChar
|
||||
|
||||
-- describe "lowerChar" $
|
||||
-- checkCharPred "lowercase letter" (isLower . toChar) lowerChar
|
||||
|
||||
-- describe "letterChar" $
|
||||
-- checkCharPred "letter" (isAlpha . toChar) letterChar
|
||||
|
||||
describe "alphaNumChar" $
|
||||
checkCharPred "alphanumeric character" (isAlphaNum . toChar) alphaNumChar
|
||||
|
||||
describe "printChar" $
|
||||
checkCharPred "printable character" (isPrint . toChar) printChar
|
||||
|
||||
describe "digitChar" $
|
||||
checkCharRange "digit" [48..57] digitChar
|
||||
|
||||
describe "octDigitChar" $
|
||||
checkCharRange "octal digit" [48..55] octDigitChar
|
||||
|
||||
describe "hexDigitChar" $
|
||||
checkCharRange "hexadecimal digit" ([48..57] ++ [97..102] ++ [65..70]) hexDigitChar
|
||||
|
||||
-- describe "asciiChar" $
|
||||
-- checkCharPred "ASCII character" (isAscii . toChar) asciiChar
|
||||
|
||||
describe "char'" $ do
|
||||
context "when stream begins with the character specified as argument" $
|
||||
it "parses the character" $
|
||||
property $ \ch s -> do
|
||||
let sl = B.cons (liftChar toLower ch) s
|
||||
su = B.cons (liftChar toUpper ch) s
|
||||
prs (char' ch) sl `shouldParse` liftChar toLower ch
|
||||
prs (char' ch) su `shouldParse` liftChar toUpper ch
|
||||
prs' (char' ch) sl `succeedsLeaving` s
|
||||
prs' (char' ch) su `succeedsLeaving` s
|
||||
context "when stream does not begin with the character specified as argument" $
|
||||
it "signals correct parse error" $
|
||||
property $ \ch ch' s -> liftChar toLower ch /= liftChar toLower ch' ==> do
|
||||
let s' = B.cons ch' s
|
||||
ms = utok ch' <> etok (liftChar toLower ch) <> etok (liftChar toUpper ch)
|
||||
prs (char' ch) s' `shouldFailWith` err posI ms
|
||||
prs' (char' ch) s' `failsLeaving` s'
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
property $ \ch -> do
|
||||
let ms = ueof <> etok (liftChar toLower ch) <> etok (liftChar toUpper ch)
|
||||
prs (char' ch) "" `shouldFailWith` err posI ms
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Helpers
|
||||
|
||||
checkStrLit :: String -> ByteString -> Parser ByteString -> SpecWith ()
|
||||
checkStrLit name ts p = do
|
||||
context ("when stream begins with " ++ name) $
|
||||
it ("parses the " ++ name) $
|
||||
property $ \s -> do
|
||||
let s' = ts <> s
|
||||
prs p s' `shouldParse` ts
|
||||
prs' p s' `succeedsLeaving` s
|
||||
context ("when stream does not begin with " ++ name) $
|
||||
it "signals correct parse error" $
|
||||
property $ \ch s -> ch /= B.head ts ==> do
|
||||
let s' = B.cons ch s
|
||||
us = B.unpack $ B.take (B.length ts) s'
|
||||
ps = B.unpack ts
|
||||
prs p s' `shouldFailWith` err posI (utoks us <> etoks ps)
|
||||
prs' p s' `failsLeaving` s'
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
prs p "" `shouldFailWith` err posI (ueof <> etoks (B.unpack ts))
|
||||
|
||||
checkCharPred :: String -> (Word8 -> Bool) -> Parser Word8 -> SpecWith ()
|
||||
checkCharPred name f p = do
|
||||
context ("when stream begins with " ++ name) $
|
||||
it ("parses the " ++ name) $
|
||||
property $ \ch s -> f ch ==> do
|
||||
let s' = B.singleton ch <> s
|
||||
prs p s' `shouldParse` ch
|
||||
prs' p s' `succeedsLeaving` s
|
||||
context ("when stream does not begin with " ++ name) $
|
||||
it "signals correct parse error" $
|
||||
property $ \ch s -> not (f ch) ==> do
|
||||
let s' = B.singleton ch <> s
|
||||
prs p s' `shouldFailWith` err posI (utok ch <> elabel name)
|
||||
prs' p s' `failsLeaving` s'
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
prs p "" `shouldFailWith` err posI (ueof <> elabel name)
|
||||
|
||||
checkCharRange :: String -> [Word8] -> Parser Word8 -> SpecWith ()
|
||||
checkCharRange name tchs p = do
|
||||
forM_ tchs $ \tch ->
|
||||
context ("when stream begins with " ++ showTokens (nes tch)) $
|
||||
it ("parses the " ++ showTokens (nes tch)) $
|
||||
property $ \s -> do
|
||||
let s' = B.singleton tch <> s
|
||||
prs p s' `shouldParse` tch
|
||||
prs' p s' `succeedsLeaving` s
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
prs p "" `shouldFailWith` err posI (ueof <> elabel name)
|
||||
|
||||
prs
|
||||
:: Parser a -- ^ Parser to run
|
||||
-> ByteString -- ^ Input for the parser
|
||||
-> Either (ParseError Word8 Void) a -- ^ Result of parsing
|
||||
prs p = parse p ""
|
||||
|
||||
prs'
|
||||
:: Parser a -- ^ Parser to run
|
||||
-> ByteString -- ^ Input for the parser
|
||||
-> (State ByteString, Either (ParseError Word8 Void) a) -- ^ Result of parsing
|
||||
prs' p s = runParser' p (initialState s)
|
||||
|
||||
bproxy :: Proxy ByteString
|
||||
bproxy = Proxy
|
||||
|
||||
-- | 'Word8'-specialized version of 'isSpace'.
|
||||
|
||||
isSpace' :: Word8 -> Bool
|
||||
isSpace' x
|
||||
| x >= 9 && x <= 13 = True
|
||||
| x == 32 = True
|
||||
| x == 160 = True
|
||||
| otherwise = False
|
||||
|
||||
-- | Convert a byte to char.
|
||||
|
||||
toChar :: Word8 -> Char
|
||||
toChar = chr . fromIntegral
|
||||
|
||||
-- | Covert a char to byte.
|
||||
|
||||
fromChar :: Char -> Word8
|
||||
fromChar = fromIntegral . ord
|
||||
|
||||
-- | Lift char transformation to byte transformation.
|
||||
|
||||
liftChar :: (Char -> Char) -> Word8 -> Word8
|
||||
liftChar f = fromChar . f . toChar
|
@ -7,7 +7,6 @@ import Control.Monad
|
||||
import Data.Char
|
||||
import Data.List (partition, isPrefixOf)
|
||||
import Data.Monoid ((<>))
|
||||
import Data.Void
|
||||
import Test.Hspec
|
||||
import Test.Hspec.Megaparsec
|
||||
import Test.Hspec.Megaparsec.AdHoc
|
||||
@ -68,12 +67,31 @@ spec = do
|
||||
checkStrLit "tab" "\t" (pure <$> tab)
|
||||
|
||||
describe "space" $
|
||||
it "consumes it up to first non-space character" $
|
||||
property $ \s -> do
|
||||
let (s0,s1) = partition isSpace s
|
||||
s' = s0 ++ s1
|
||||
prs space s' `shouldParse` ()
|
||||
prs' space s' `succeedsLeaving` s1
|
||||
it "consumes space up to first non-space character" $
|
||||
property $ \s' -> do
|
||||
let (s0,s1) = partition isSpace s'
|
||||
s = s0 ++ s1
|
||||
prs space s `shouldParse` ()
|
||||
prs' space s `succeedsLeaving` s1
|
||||
|
||||
describe "space1" $ do
|
||||
context "when stream does not start with a space character" $
|
||||
it "signals correct parse error" $
|
||||
property $ \ch s' -> not (isSpace ch) ==> do
|
||||
let (s0,s1) = partition isSpace s'
|
||||
s = ch : s0 ++ s1
|
||||
prs space1 s `shouldFailWith` err posI (utok ch <> elabel "white space")
|
||||
prs' space1 s `failsLeaving` s
|
||||
context "when stream starts with a space character" $
|
||||
it "consumes space up to first non-space character" $
|
||||
property $ \s' -> do
|
||||
let (s0,s1) = partition isSpace s'
|
||||
s = ' ' : s0 ++ s1
|
||||
prs space1 s `shouldParse` ()
|
||||
prs' space1 s `succeedsLeaving` s1
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
prs space1 "" `shouldFailWith` err posI (ueof <> elabel "white space")
|
||||
|
||||
describe "controlChar" $
|
||||
checkCharPred "control character" isControl controlChar
|
||||
@ -220,6 +238,24 @@ spec = do
|
||||
it "signals correct parse error" $
|
||||
prs anyChar "" `shouldFailWith` err posI (ueof <> elabel "character")
|
||||
|
||||
describe "notChar" $ do
|
||||
context "when stream begins with the character specified as argument" $
|
||||
it "signals correct parse error" $
|
||||
property $ \ch s' -> do
|
||||
let p = notChar ch
|
||||
s = ch : s'
|
||||
prs p s `shouldFailWith` err posI (utok ch)
|
||||
prs' p s `failsLeaving` s
|
||||
context "when stream does not begin with the character specified as argument" $
|
||||
it "parses first character in the stream" $
|
||||
property $ \ch s -> not (null s) && ch /= head s ==> do
|
||||
let p = notChar ch
|
||||
prs p s `shouldParse` head s
|
||||
prs' p s `succeedsLeaving` tail s
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
prs (notChar 'a') "" `shouldFailWith` err posI ueof
|
||||
|
||||
describe "oneOf" $ do
|
||||
context "when stream begins with one of specified characters" $
|
||||
it "parses the character" $
|
||||
@ -240,29 +276,6 @@ spec = do
|
||||
property $ \chs ->
|
||||
prs (oneOf (chs :: String)) "" `shouldFailWith` err posI ueof
|
||||
|
||||
describe "oneOf'" $ do
|
||||
context "when stream begins with one of specified characters" $
|
||||
it "parses the character" $
|
||||
property $ \chs' n s -> do
|
||||
let chs = getNonEmpty chs'
|
||||
ch = chs !! (getNonNegative n `rem` length chs)
|
||||
sl = toLower ch : s
|
||||
su = toUpper ch : s
|
||||
prs (oneOf' chs) sl `shouldParse` toLower ch
|
||||
prs (oneOf' chs) su `shouldParse` toUpper ch
|
||||
prs' (oneOf' chs) sl `succeedsLeaving` s
|
||||
prs' (oneOf' chs) su `succeedsLeaving` s
|
||||
context "when stream does not begin with any of specified characters" $
|
||||
it "signals correct parse error" $
|
||||
property $ \chs ch s -> ch `notElemi` (chs :: String) ==> do
|
||||
let s' = ch : s
|
||||
prs (oneOf' chs) s' `shouldFailWith` err posI (utok ch)
|
||||
prs' (oneOf' chs) s' `failsLeaving` s'
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
property $ \chs ->
|
||||
prs (oneOf' (chs :: String)) "" `shouldFailWith` err posI ueof
|
||||
|
||||
describe "noneOf" $ do
|
||||
context "when stream does not begin with any of specified characters" $
|
||||
it "parses the character" $
|
||||
@ -283,29 +296,6 @@ spec = do
|
||||
property $ \chs ->
|
||||
prs (noneOf (chs :: String)) "" `shouldFailWith` err posI ueof
|
||||
|
||||
describe "noneOf'" $ do
|
||||
context "when stream does not begin with any of specified characters" $
|
||||
it "parses the character" $
|
||||
property $ \chs ch s -> ch `notElemi` (chs :: String) ==> do
|
||||
let sl = toLower ch : s
|
||||
su = toUpper ch : s
|
||||
prs (noneOf' chs) sl `shouldParse` toLower ch
|
||||
prs (noneOf' chs) su `shouldParse` toUpper ch
|
||||
prs' (noneOf' chs) sl `succeedsLeaving` s
|
||||
prs' (noneOf' chs) su `succeedsLeaving` s
|
||||
context "when stream begins with one of specified characters" $
|
||||
it "signals correct parse error" $
|
||||
property $ \chs' n s -> do
|
||||
let chs = getNonEmpty chs'
|
||||
ch = chs !! (getNonNegative n `rem` length chs)
|
||||
s' = ch : s
|
||||
prs (noneOf' chs) s' `shouldFailWith` err posI (utok ch)
|
||||
prs' (noneOf' chs) s' `failsLeaving` s'
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
property $ \chs ->
|
||||
prs (noneOf' (chs :: String)) "" `shouldFailWith` err posI ueof
|
||||
|
||||
describe "string" $ do
|
||||
context "when stream is prefixed with given string" $
|
||||
it "parses the string" $
|
||||
@ -338,7 +328,7 @@ spec = do
|
||||
----------------------------------------------------------------------------
|
||||
-- Helpers
|
||||
|
||||
checkStrLit :: String -> String -> Parsec Void String String -> SpecWith ()
|
||||
checkStrLit :: String -> String -> Parser String -> SpecWith ()
|
||||
checkStrLit name ts p = do
|
||||
context ("when stream begins with " ++ name) $
|
||||
it ("parses the " ++ name) $
|
||||
@ -357,7 +347,7 @@ checkStrLit name ts p = do
|
||||
it "signals correct parse error" $
|
||||
prs p "" `shouldFailWith` err posI (ueof <> etoks ts)
|
||||
|
||||
checkCharPred :: String -> (Char -> Bool) -> Parsec Void String Char -> SpecWith ()
|
||||
checkCharPred :: String -> (Char -> Bool) -> Parser Char -> SpecWith ()
|
||||
checkCharPred name f p = do
|
||||
context ("when stream begins with " ++ name) $
|
||||
it ("parses the " ++ name) $
|
||||
@ -375,7 +365,7 @@ checkCharPred name f p = do
|
||||
it "signals correct parse error" $
|
||||
prs p "" `shouldFailWith` err posI (ueof <> elabel name)
|
||||
|
||||
checkCharRange :: String -> String -> Parsec Void String Char -> SpecWith ()
|
||||
checkCharRange :: String -> String -> Parser Char -> SpecWith ()
|
||||
checkCharRange name tchs p = do
|
||||
forM_ tchs $ \tch ->
|
||||
context ("when stream begins with " ++ showTokens (nes tch)) $
|
||||
@ -384,12 +374,6 @@ checkCharRange name tchs p = do
|
||||
let s' = tch : s
|
||||
prs p s' `shouldParse` tch
|
||||
prs' p s' `succeedsLeaving` s
|
||||
-- context ("when stream does not begin with " ++ name) $
|
||||
-- it "signals correct parse error" $
|
||||
-- property $ \ch s -> ch `notElem` tchs ==> do
|
||||
-- let s' = ch : s
|
||||
-- prs p s' `shouldFailWith` err posI (utok ch <> elabel name)
|
||||
-- prs' p s' `failsLeaving` s'
|
||||
context "when stream is empty" $
|
||||
it "signals correct parse error" $
|
||||
prs p "" `shouldFailWith` err posI (ueof <> elabel name)
|
||||
@ -406,16 +390,6 @@ fuzzyCase s = zipWith f s <$> vector (length s)
|
||||
casei :: Char -> Char -> Bool
|
||||
casei x y = toUpper x == toUpper y
|
||||
|
||||
-- | Case-insensitive 'elem'.
|
||||
|
||||
elemi :: Char -> String -> Bool
|
||||
elemi c = any (casei c)
|
||||
|
||||
-- | Case-insensitive 'notElem'.
|
||||
|
||||
notElemi :: Char -> String -> Bool
|
||||
notElemi c = not . elemi c
|
||||
|
||||
-- | The 'isPrefixOf' function takes two 'String's and returns 'True' iff
|
||||
-- the first list is a prefix of the second with case-insensitive
|
||||
-- comparison.
|
||||
|
@ -7,6 +7,7 @@ import Data.List (isInfixOf, isSuffixOf)
|
||||
import Data.List.NonEmpty (NonEmpty (..))
|
||||
import Data.Monoid
|
||||
import Data.Void
|
||||
import Data.Word (Word8)
|
||||
import Test.Hspec
|
||||
import Test.Hspec.Megaparsec.AdHoc ()
|
||||
import Test.QuickCheck
|
||||
@ -173,6 +174,13 @@ spec = do
|
||||
it "shows control characters in long strings property"
|
||||
(f "{\n" "\"{<newline>\"")
|
||||
|
||||
describe "showTokens (Word8 instance)" $
|
||||
it "basically works" $ do
|
||||
-- NOTE Currently the Word8 instance is defined via Char intance, so
|
||||
-- the testing is rather shallow.
|
||||
let ts = NE.fromList [10,48,49,50] :: NonEmpty Word8
|
||||
showTokens ts `shouldBe` "\"<newline>012\""
|
||||
|
||||
describe "parseErrorPretty" $ do
|
||||
it "shows unknown ParseError correctly" $
|
||||
parseErrorPretty (mempty :: PE) `shouldBe` "1:1:\nunknown parse error\n"
|
||||
|
Loading…
Reference in New Issue
Block a user