Char and byte modules (#230)

This commit is contained in:
Mark Karpov 2017-07-03 18:34:00 +07:00 committed by GitHub
parent 45f30ae7e1
commit 3b9812bf76
12 changed files with 633 additions and 150 deletions

View File

@ -33,7 +33,7 @@ script:
- travis_wait 60 cabal build
- cabal test --show-details=always --test-option=--qc-max-success=1000
- cabal sdist
- cabal haddock | grep "100%" | wc -l | grep "9"
- cabal haddock | grep "100%" | wc -l | grep "10"
after_script:
- export PATH=~/.cabal/bin:$PATH

View File

@ -5,10 +5,11 @@
* Re-organized the module hierarchy. Some modules such as
`Text.Megaparsec.Prim` do not exist anymore. Stream definitions were moved
to `Text.Megaparsec.Stream`. Generic combinators are now re-exported from
the `Control.Applicative.Combinators`. Just import `Text.Megaparsec` and
you should be OK. Then add `Text.Megaparsec.Char` if you are working with
a stream of `Char`s, then add qualified modules you need (permutation
parsing, lexing, expression parsing, etc.).
the `Control.Applicative.Combinators` from the package
`parser-combinators`. Just import `Text.Megaparsec` and you should be OK.
Then add `Text.Megaparsec.Char` if you are working with a stream of
`Char`s, then add qualified modules you need (permutation parsing, lexing,
expression parsing, etc.).
* Dropped per-stream modules, the `Parser` type synonym is to be defined
manually by user.
@ -79,6 +80,18 @@
* Added `takeWhileP` and `takeWhile1P` to `MonadParsec`. Added `skipWhileP`,
`skipWhile1P` as derivatives from those primitive combinators.
* Dropped `oneOf'` and `noneOf'` from `Text.Megaparsec.Char`. These were
seldom (if ever) used and are easily re-implemented.
* Added `notChar` in `Text.Megaparsec.Char`.
* Added `space1` in `Text.Megaprasec.Char`. This parser is like `space` but
requires at least one space character to be present to succeed.
* Added new module `Text.Megaparsec.Byte`, which is similar to
`Text.Megaparsec.Char`, but target token type is `Word8` instead of
`Char`.
## Megaparsec 5.3.1
* Various updates to the docs.

View File

@ -72,11 +72,11 @@ features some combinators that are missing in other parsing libraries:
In addition to that, Megaparsec 6 features high-performance combinators
similar to those found in Attoparsec:
* `tokens` makes it easy to parse several tokens in a row. This is about 100
time faster than matching a string token by token. `string` and `string'`
are built on top of this combinator. `tokens` returns “chunk” of original
input, meaning that if you parse `Text`, it'll return `Text` without any
repacking.
* `tokens` makes it easy to parse several tokens in a row (`string` and
`string'` are built on top of this primitive). This is about 100 times
faster than matching a string token by token. `tokens` returns “chunk” of
original input, meaning that if you parse `Text`, it'll return `Text`
without any repacking.
* `takeWhile` and `takeWhile1` are about 150 times faster than approaches
involving `many`, `manyTill` and other similar combinators.

View File

@ -15,8 +15,8 @@
--
-- In addition to the "Text.Megaparsec" module, which exports and re-exports
-- most everything that you may need, we advise to import
-- "Text.Megaparsec.Char" if you plan to work with a stream of 'Char'
-- tokens.
-- "Text.Megaparsec.Char" if you plan to work with a stream of 'Char' tokens
-- or "Text.Megaparsec.Byte" if you indend to parse binary data.
--
-- It is common to start working with the library by defining a type synonym
-- like this:
@ -756,6 +756,8 @@ class (Stream s, A.Alternative m, MonadPlus m)
-- | Similar to 'takeWhileP', but fails if it can't parse at least one
-- token.
--
-- @since 6.0.0
takeWhile1P
:: Maybe String -- ^ Name for a single token in the row
@ -1227,6 +1229,8 @@ region f m = do
{-# INLINEABLE region #-}
-- | The same as 'takeWhileP', but discards the result.
--
-- @since 6.0.0
skipWhileP :: MonadParsec e s m
=> Maybe String -- ^ Name of a single token in the row
@ -1236,6 +1240,8 @@ skipWhileP l f = void (takeWhileP l f)
{-# INLINE skipWhileP #-}
-- | The same as 'takeWhile1P', but discards the result.
--
-- @since 6.0.0
skipWhile1P :: MonadParsec e s m
=> Maybe String -- ^ Name of a single token in the row

234
Text/Megaparsec/Byte.hs Normal file
View File

@ -0,0 +1,234 @@
-- |
-- Module : Text.Megaparsec.Byte
-- Copyright : © 20152017 Megaparsec contributors
-- License : FreeBSD
--
-- Maintainer : Mark Karpov <markkarpov92@gmail.com>
-- Stability : experimental
-- Portability : portable
--
-- Commonly used binary parsers.
--
-- @since 6.0.0
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
module Text.Megaparsec.Byte
( -- * Simple parsers
newline
, crlf
, eol
, tab
, space
, space1
-- * Categories of characters
, controlChar
, spaceChar
, upperChar
, lowerChar
, letterChar
, alphaNumChar
, printChar
, digitChar
, octDigitChar
, hexDigitChar
, asciiChar
-- * More general parsers
, C.char
, char'
, C.anyChar
, C.notChar
, C.oneOf
, C.noneOf
, C.satisfy
-- * Sequence of bytes
, C.string
, C.string' )
where
import Control.Applicative
import Data.Char
import Data.Proxy
import Data.Word (Word8)
import qualified Text.Megaparsec.Char as C
import Text.Megaparsec
----------------------------------------------------------------------------
-- Simple parsers
-- | Parse a newline byte.
newline :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
newline = C.char 10
{-# INLINE newline #-}
-- | Parse a carriage return character followed by a newline character.
-- Return the sequence of characters parsed.
crlf :: forall e s m. (MonadParsec e s m, Token s ~ Word8) => m (Tokens s)
crlf = C.string (tokensToChunk (Proxy :: Proxy s) [13,10])
{-# INLINE crlf #-}
-- | Parse a CRLF (see 'crlf') or LF (see 'newline') end of line. Return the
-- sequence of characters parsed.
eol :: forall e s m. (MonadParsec e s m, Token s ~ Word8) => m (Tokens s)
eol = (tokenToChunk (Proxy :: Proxy s) <$> newline)
<|> crlf
<?> "end of line"
{-# INLINE eol #-}
-- | Parse a tab character.
tab :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
tab = C.char 9
{-# INLINE tab #-}
-- | Skip /zero/ or more white space characters.
--
-- See also: 'skipMany' and 'spaceChar'.
space :: (MonadParsec e s m, Token s ~ Word8) => m ()
space = skipWhileP (Just "white space") isSpace'
{-# INLINE space #-}
-- | Skip /one/ or more white space characters.
--
-- See also: 'skipSome' and 'spaceChar'.
space1 :: (MonadParsec e s m, Token s ~ Word8) => m ()
space1 = skipWhile1P (Just "white space") isSpace'
{-# INLINE space1 #-}
----------------------------------------------------------------------------
-- Categories of characters
-- | Parse a control character (a non-printing character of the Latin-1
-- subset of Unicode).
controlChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
controlChar = C.satisfy (isControl . toChar) <?> "control character"
{-# INLINE controlChar #-}
-- | Parse a space character, and the control characters: tab, newline,
-- carriage return, form feed, and vertical tab.
spaceChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
spaceChar = C.satisfy isSpace' <?> "white space"
{-# INLINE spaceChar #-}
-- | Parse an upper-case character.
upperChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
upperChar = C.satisfy (isUpper . toChar) <?> "uppercase letter"
{-# INLINE upperChar #-}
-- | Parse a lower-case alphabetic character.
lowerChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
lowerChar = C.satisfy (isLower . toChar) <?> "lowercase letter"
{-# INLINE lowerChar #-}
-- | Parse an alphabetic character: lower-case or upper-case.
letterChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
letterChar = C.satisfy (isLetter . toChar) <?> "letter"
{-# INLINE letterChar #-}
-- | Parse an alphabetic or numeric digit Unicode characters.
--
-- Note that the numeric digits outside the ASCII range are parsed by this
-- parser but not by 'digitChar'. Such digits may be part of identifiers but
-- are not used by the printer and reader to represent numbers.
alphaNumChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
alphaNumChar = C.satisfy (isAlphaNum . toChar) <?> "alphanumeric character"
{-# INLINE alphaNumChar #-}
-- | Parse a printable character: letter, number, mark, punctuation, symbol
-- or space.
printChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
printChar = C.satisfy (isPrint . toChar) <?> "printable character"
{-# INLINE printChar #-}
-- | Parse an ASCII digit, i.e between “0” and “9”.
digitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
digitChar = C.satisfy isDigit' <?> "digit"
where
isDigit' x = x >= 48 && x <= 57
{-# INLINE digitChar #-}
-- | Parse an octal digit, i.e. between “0” and “7”.
octDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
octDigitChar = C.satisfy isOctDigit' <?> "octal digit"
where
isOctDigit' x = x >= 48 && x <= 55
{-# INLINE octDigitChar #-}
-- | Parse a hexadecimal digit, i.e. between “0” and “9”, or “a” and “f”, or
-- “A” and “F”.
hexDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
hexDigitChar = C.satisfy (isHexDigit . toChar) <?> "hexadecimal digit"
{-# INLINE hexDigitChar #-}
-- | Parse a character from the first 128 characters of the Unicode
-- character set, corresponding to the ASCII character set.
asciiChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
asciiChar = C.satisfy (< 128) <?> "ASCII character"
{-# INLINE asciiChar #-}
----------------------------------------------------------------------------
-- More general parsers
-- | The same as 'char' but case-insensitive. This parser returns the
-- actually parsed character preserving its case.
--
-- >>> parseTest (char' 'e') "E"
-- 'E'
-- >>> parseTest (char' 'e') "G"
-- 1:1:
-- unexpected 'G'
-- expecting 'E' or 'e'
char' :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s)
char' c = choice [C.char c, C.char (swapCase c)]
where
swapCase x
| isUpper g = fromChar (toLower g)
| isLower g = fromChar (toUpper g)
| otherwise = x
where
g = toChar x
{-# INLINE char' #-}
----------------------------------------------------------------------------
-- Helpers
-- | 'Word8'-specialized version of 'isSpace'.
isSpace' :: Word8 -> Bool
isSpace' x
| x >= 9 && x <= 13 = True
| x == 32 = True
| x == 160 = True
| otherwise = False
{-# INLINE isSpace' #-}
-- | Convert a byte to char.
toChar :: Word8 -> Char
toChar = chr . fromIntegral
{-# INLINE toChar #-}
-- | Convert a char to byte.
fromChar :: Char -> Word8
fromChar = fromIntegral . ord
{-# INLINE fromChar #-}

View File

@ -24,6 +24,7 @@ module Text.Megaparsec.Char
, eol
, tab
, space
, space1
-- * Categories of characters
, controlChar
, spaceChar
@ -48,10 +49,9 @@ module Text.Megaparsec.Char
, char
, char'
, anyChar
, notChar
, oneOf
, oneOf'
, noneOf
, noneOf'
, satisfy
-- * Sequence of characters
, string
@ -69,8 +69,8 @@ import qualified Data.Set as E
import Text.Megaparsec
#if !MIN_VERSION_base(4,8,0)
import Data.Foldable (Foldable (), any, elem, notElem)
import Prelude hiding (any, elem, notElem)
import Data.Foldable (Foldable (), elem, notElem)
import Prelude hiding (elem, notElem)
#endif
----------------------------------------------------------------------------
@ -112,6 +112,16 @@ space :: (MonadParsec e s m, Token s ~ Char) => m ()
space = skipWhileP (Just "white space") isSpace
{-# INLINE space #-}
-- | Skip /one/ or more white space characters.
--
-- See also: 'skipSome' and 'spaceChar'.
--
-- @since 6.0.0
space1 :: (MonadParsec e s m, Token s ~ Char) => m ()
space1 = skipWhile1P (Just "white space") isSpace
{-# INLINE space1 #-}
----------------------------------------------------------------------------
-- Categories of characters
@ -285,7 +295,7 @@ categoryName = \case
--
-- > semicolon = char ';'
char :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
char :: MonadParsec e s m => Token s -> m (Token s)
char c = token testChar (Just c)
where
f x = Tokens (x:|[])
@ -306,7 +316,7 @@ char c = token testChar (Just c)
-- expecting 'E' or 'e'
char' :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s)
char' c = choice [char c, char $ swapCase c]
char' c = choice [char c, char (swapCase c)]
where
swapCase x
| isUpper x = toLower x
@ -316,10 +326,19 @@ char' c = choice [char c, char $ swapCase c]
-- | This parser succeeds for any character. Returns the parsed character.
anyChar :: (MonadParsec e s m, Token s ~ Char) => m (Token s)
anyChar :: MonadParsec e s m => m (Token s)
anyChar = satisfy (const True) <?> "character"
{-# INLINE anyChar #-}
-- | Match any character but the given one. It's a good idea to attach a
-- 'label' to this parser manually.
--
-- @since 6.0.0
notChar :: MonadParsec e s m => Token s -> m (Token s)
notChar c = satisfy (/= c)
{-# INLINE notChar #-}
-- | @'oneOf' cs@ succeeds if the current character is in the supplied
-- collection of characters @cs@. Returns the parsed character. Note that
-- this parser cannot automatically generate the “expected” component of
@ -329,43 +348,36 @@ anyChar = satisfy (const True) <?> "character"
-- See also: 'satisfy'.
--
-- > digit = oneOf ['0'..'9'] <?> "digit"
--
-- __Performance note__: prefer 'satisfy' when you can because it's faster
-- when you have only a couple of tokens to compare to:
--
-- > quoteFast = satisfy (\x -> x == '\'' || x == '\"')
-- > quoteSlow = oneOf "'\""
oneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char)
=> f (Token s) -> m (Token s)
oneOf :: (Foldable f, MonadParsec e s m)
=> f (Token s)
-> m (Token s)
oneOf cs = satisfy (`elem` cs)
{-# INLINE oneOf #-}
-- | The same as 'oneOf', but case-insensitive. Returns the parsed character
-- preserving its case.
--
-- > vowel = oneOf' "aeiou" <?> "vowel"
oneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char)
=> f (Token s)
-> m (Token s)
oneOf' cs = satisfy (`elemi` cs)
{-# INLINE oneOf' #-}
-- | As the dual of 'oneOf', @noneOf cs@ succeeds if the current character
-- /not/ in the supplied list of characters @cs@. Returns the parsed
-- character.
-- character. Note that this parser cannot automatically generate the
-- “expected” component of error message, so usually you should label it
-- manually with 'label' or ('<?>').
--
-- See also: 'satisfy'.
--
-- __Performance note__: prefer 'satisfy' and 'notChar' when you can because
-- it's faster.
noneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char)
noneOf :: (Foldable f, MonadParsec e s m)
=> f (Token s)
-> m (Token s)
noneOf cs = satisfy (`notElem` cs)
{-# INLINE noneOf #-}
-- | The same as 'noneOf', but case-insensitive.
--
-- > consonant = noneOf' "aeiou" <?> "consonant"
noneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char)
=> f (Token s)
-> m (Token s)
noneOf' cs = satisfy (`notElemi` cs)
{-# INLINE noneOf' #-}
-- | The parser @'satisfy' f@ succeeds for any character for which the
-- supplied function @f@ returns 'True'. Returns the character that is
-- actually parsed.
@ -373,7 +385,7 @@ noneOf' cs = satisfy (`notElemi` cs)
-- > digitChar = satisfy isDigit <?> "digit"
-- > oneOf cs = satisfy (`elem` cs)
satisfy :: (MonadParsec e s m, Token s ~ Char)
satisfy :: MonadParsec e s m
=> (Token s -> Bool) -- ^ Predicate to apply
-> m (Token s)
satisfy f = token testChar Nothing
@ -409,24 +421,3 @@ string' :: (MonadParsec e s m, CI.FoldCase (Tokens s))
-> m (Tokens s)
string' = tokens ((==) `on` CI.mk)
{-# INLINE string' #-}
----------------------------------------------------------------------------
-- Helpers
-- | Case-insensitive equality test for characters.
casei :: Char -> Char -> Bool
casei x y = toUpper x == toUpper y
{-# INLINE casei #-}
-- | Case-insensitive 'elem'.
elemi :: Foldable f => Char -> f Char -> Bool
elemi = any . casei
{-# INLINE elemi #-}
-- | Case-insensitive 'notElem'.
notElemi :: Foldable f => Char -> f Char -> Bool
notElemi c = not . elemi c
{-# INLINE notElemi #-}

View File

@ -36,6 +36,7 @@ where
import Control.DeepSeq
import Control.Exception
import Data.Char (chr)
import Data.Data (Data)
import Data.List (intercalate)
import Data.List.NonEmpty (NonEmpty (..))
@ -44,6 +45,7 @@ import Data.Semigroup
import Data.Set (Set)
import Data.Typeable (Typeable)
import Data.Void
import Data.Word (Word8)
import GHC.Generics
import Prelude hiding (concat)
import qualified Data.List.NonEmpty as NE
@ -199,6 +201,9 @@ class ShowToken a where
instance ShowToken Char where
showTokens = stringPretty
instance ShowToken Word8 where
showTokens = stringPretty . fmap (chr . fromIntegral)
-- | The type class defines how to print custom data component of
-- 'ParseError'.
--

View File

@ -68,16 +68,18 @@ import qualified Text.Megaparsec.Char as C
----------------------------------------------------------------------------
-- White space
-- | @space spaceChar lineComment blockComment@ produces parser that can
-- parse white space in general. It's expected that you create such a parser
-- once and pass it to other functions in this module as needed (when you
-- see @spaceConsumer@ in documentation, usually it means that something
-- like 'space' is expected there).
-- | @space sc lineComment blockComment@ produces parser that can parse
-- white space in general. It's expected that you create such a parser once
-- and pass it to other functions in this module as needed (when you see
-- @spaceConsumer@ in documentation, usually it means that something like
-- 'space' is expected there).
--
-- @spaceChar@ is used to parse trivial space characters. You can use
-- 'C.spaceChar' from "Text.Megaparsec.Char" for this purpose as well as
-- your own parser (if you don't want to automatically consume newlines, for
-- example).
-- @sc@ is used to parse blocks of space characters. You can use 'C.space1'
-- from "Text.Megaparsec.Char" for this purpose as well as your own parser
-- (if you don't want to automatically consume newlines, for example). Make
-- sure the parser does not succeed on empty input though. In earlier
-- version 'C.spaceChar' was recommended, but now parsers based on
-- 'takeWhile1P' are preferred because of their speed.
--
-- @lineComment@ is used to parse line comments. You can use
-- 'skipLineComment' if you don't need anything special.
@ -94,7 +96,8 @@ import qualified Text.Megaparsec.Char as C
-- of the file).
space :: MonadParsec e s m
=> m () -- ^ A parser for a space character (e.g. @'void' 'C.spaceChar'@)
=> m () -- ^ A parser for space characters which does not accept empty
-- input (e.g. 'C.space1')
-> m () -- ^ A parser for a line comment (e.g. 'skipLineComment')
-> m () -- ^ A parser for a block comment (e.g. 'skipBlockComment')
-> m ()
@ -128,7 +131,7 @@ lexeme spc p = p <* spc
-- > colon = symbol ":"
-- > dot = symbol "."
symbol :: (MonadParsec e s m, Token s ~ Char)
symbol :: MonadParsec e s m
=> m () -- ^ How to consume white space after lexeme
-> Tokens s -- ^ String to parse
-> m (Tokens s)
@ -137,7 +140,7 @@ symbol spc = lexeme spc . C.string
-- | Case-insensitive version of 'symbol'. This may be helpful if you're
-- working with case-insensitive languages.
symbol' :: (MonadParsec e s m, Token s ~ Char, CI.FoldCase (Tokens s))
symbol' :: (MonadParsec e s m, CI.FoldCase (Tokens s))
=> m () -- ^ How to consume white space after lexeme
-> Tokens s -- ^ String to parse (case-insensitive)
-> m (Tokens s)

View File

@ -49,6 +49,7 @@ library
if !impl(ghc >= 7.10)
build-depends: void == 0.7.*
exposed-modules: Text.Megaparsec
, Text.Megaparsec.Byte
, Text.Megaparsec.Char
, Text.Megaparsec.Error
, Text.Megaparsec.Error.Builder
@ -74,6 +75,7 @@ test-suite tests
other-modules: Control.Applicative.CombinatorsSpec
, Test.Hspec.Megaparsec
, Test.Hspec.Megaparsec.AdHoc
, Text.Megaparsec.ByteSpec
, Text.Megaparsec.CharSpec
, Text.Megaparsec.ErrorSpec
, Text.Megaparsec.ExprSpec

View File

@ -0,0 +1,247 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Megaparsec.ByteSpec (spec) where
import Control.Monad
import Data.ByteString (ByteString)
import Data.Char
import Data.Proxy
import Data.Semigroup ((<>))
import Data.Void
import Data.Word (Word8)
import Test.Hspec
import Test.Hspec.Megaparsec
import Test.Hspec.Megaparsec.AdHoc (nes)
import Test.QuickCheck
import Text.Megaparsec
import Text.Megaparsec.Byte
import qualified Data.ByteString as B
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative
#endif
type Parser = Parsec Void ByteString
spec :: Spec
spec = do
describe "newline" $
checkStrLit "newline" "\n" (tokenToChunk bproxy <$> newline)
describe "csrf" $
checkStrLit "crlf newline" "\r\n" crlf
describe "eol" $ do
context "when stream begins with a newline" $
it "succeeds returning the newline" $
property $ \s -> do
let s' = "\n" <> s
prs eol s' `shouldParse` "\n"
prs' eol s' `succeedsLeaving` s
context "when stream begins with CRLF sequence" $
it "parses the CRLF sequence" $
property $ \s -> do
let s' = "\r\n" <> s
prs eol s' `shouldParse` "\r\n"
prs' eol s' `succeedsLeaving` s
context "when stream begins with '\\r', but it's not followed by '\\n'" $
it "signals correct parse error" $
property $ \ch -> ch /= 10 ==> do
let s = "\r" <> B.singleton ch
prs eol s `shouldFailWith`
err posI (utoks (B.unpack s) <> elabel "end of line")
context "when input stream is '\\r'" $
it "signals correct parse error" $
prs eol "\r" `shouldFailWith` err posI
(utok 13 <> elabel "end of line")
context "when stream does not begin with newline or CRLF sequence" $
it "signals correct parse error" $
property $ \ch s -> (ch /= 13 && ch /= 10) ==> do
let s' = B.singleton ch <> s
prs eol s' `shouldFailWith` err posI
(utoks (B.unpack $ B.take 2 s') <> elabel "end of line")
context "when stream is empty" $
it "signals correct parse error" $
prs eol "" `shouldFailWith` err posI
(ueof <> elabel "end of line")
describe "tab" $
checkStrLit "tab" "\t" (tokenToChunk bproxy <$> tab)
describe "space" $
it "consumes space up to first non-space character" $
property $ \s' -> do
let (s0,s1) = B.partition isSpace' s'
s = s0 <> s1
prs space s `shouldParse` ()
prs' space s `succeedsLeaving` s1
describe "space1" $ do
context "when stream does not start with a space character" $
it "signals correct parse error" $
property $ \ch s' -> not (isSpace' ch) ==> do
let (s0,s1) = B.partition isSpace' s'
s = B.singleton ch <> s0 <> s1
prs space1 s `shouldFailWith` err posI (utok ch <> elabel "white space")
prs' space1 s `failsLeaving` s
context "when stream starts with a space character" $
it "consumes space up to first non-space character" $
property $ \s' -> do
let (s0,s1) = B.partition isSpace' s'
s = " " <> s0 <> s1
prs space1 s `shouldParse` ()
prs' space1 s `succeedsLeaving` s1
context "when stream is empty" $
it "signals correct parse error" $
prs space1 "" `shouldFailWith` err posI (ueof <> elabel "white space")
describe "controlChar" $
checkCharPred "control character" (isControl . toChar) controlChar
describe "spaceChar" $
checkCharRange "white space" [9,10,11,12,13,32,160] spaceChar
-- describe "upperChar" $
-- checkCharPred "uppercase letter" (isUpper . toChar) upperChar
-- describe "lowerChar" $
-- checkCharPred "lowercase letter" (isLower . toChar) lowerChar
-- describe "letterChar" $
-- checkCharPred "letter" (isAlpha . toChar) letterChar
describe "alphaNumChar" $
checkCharPred "alphanumeric character" (isAlphaNum . toChar) alphaNumChar
describe "printChar" $
checkCharPred "printable character" (isPrint . toChar) printChar
describe "digitChar" $
checkCharRange "digit" [48..57] digitChar
describe "octDigitChar" $
checkCharRange "octal digit" [48..55] octDigitChar
describe "hexDigitChar" $
checkCharRange "hexadecimal digit" ([48..57] ++ [97..102] ++ [65..70]) hexDigitChar
-- describe "asciiChar" $
-- checkCharPred "ASCII character" (isAscii . toChar) asciiChar
describe "char'" $ do
context "when stream begins with the character specified as argument" $
it "parses the character" $
property $ \ch s -> do
let sl = B.cons (liftChar toLower ch) s
su = B.cons (liftChar toUpper ch) s
prs (char' ch) sl `shouldParse` liftChar toLower ch
prs (char' ch) su `shouldParse` liftChar toUpper ch
prs' (char' ch) sl `succeedsLeaving` s
prs' (char' ch) su `succeedsLeaving` s
context "when stream does not begin with the character specified as argument" $
it "signals correct parse error" $
property $ \ch ch' s -> liftChar toLower ch /= liftChar toLower ch' ==> do
let s' = B.cons ch' s
ms = utok ch' <> etok (liftChar toLower ch) <> etok (liftChar toUpper ch)
prs (char' ch) s' `shouldFailWith` err posI ms
prs' (char' ch) s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
property $ \ch -> do
let ms = ueof <> etok (liftChar toLower ch) <> etok (liftChar toUpper ch)
prs (char' ch) "" `shouldFailWith` err posI ms
----------------------------------------------------------------------------
-- Helpers
checkStrLit :: String -> ByteString -> Parser ByteString -> SpecWith ()
checkStrLit name ts p = do
context ("when stream begins with " ++ name) $
it ("parses the " ++ name) $
property $ \s -> do
let s' = ts <> s
prs p s' `shouldParse` ts
prs' p s' `succeedsLeaving` s
context ("when stream does not begin with " ++ name) $
it "signals correct parse error" $
property $ \ch s -> ch /= B.head ts ==> do
let s' = B.cons ch s
us = B.unpack $ B.take (B.length ts) s'
ps = B.unpack ts
prs p s' `shouldFailWith` err posI (utoks us <> etoks ps)
prs' p s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
prs p "" `shouldFailWith` err posI (ueof <> etoks (B.unpack ts))
checkCharPred :: String -> (Word8 -> Bool) -> Parser Word8 -> SpecWith ()
checkCharPred name f p = do
context ("when stream begins with " ++ name) $
it ("parses the " ++ name) $
property $ \ch s -> f ch ==> do
let s' = B.singleton ch <> s
prs p s' `shouldParse` ch
prs' p s' `succeedsLeaving` s
context ("when stream does not begin with " ++ name) $
it "signals correct parse error" $
property $ \ch s -> not (f ch) ==> do
let s' = B.singleton ch <> s
prs p s' `shouldFailWith` err posI (utok ch <> elabel name)
prs' p s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
prs p "" `shouldFailWith` err posI (ueof <> elabel name)
checkCharRange :: String -> [Word8] -> Parser Word8 -> SpecWith ()
checkCharRange name tchs p = do
forM_ tchs $ \tch ->
context ("when stream begins with " ++ showTokens (nes tch)) $
it ("parses the " ++ showTokens (nes tch)) $
property $ \s -> do
let s' = B.singleton tch <> s
prs p s' `shouldParse` tch
prs' p s' `succeedsLeaving` s
context "when stream is empty" $
it "signals correct parse error" $
prs p "" `shouldFailWith` err posI (ueof <> elabel name)
prs
:: Parser a -- ^ Parser to run
-> ByteString -- ^ Input for the parser
-> Either (ParseError Word8 Void) a -- ^ Result of parsing
prs p = parse p ""
prs'
:: Parser a -- ^ Parser to run
-> ByteString -- ^ Input for the parser
-> (State ByteString, Either (ParseError Word8 Void) a) -- ^ Result of parsing
prs' p s = runParser' p (initialState s)
bproxy :: Proxy ByteString
bproxy = Proxy
-- | 'Word8'-specialized version of 'isSpace'.
isSpace' :: Word8 -> Bool
isSpace' x
| x >= 9 && x <= 13 = True
| x == 32 = True
| x == 160 = True
| otherwise = False
-- | Convert a byte to char.
toChar :: Word8 -> Char
toChar = chr . fromIntegral
-- | Covert a char to byte.
fromChar :: Char -> Word8
fromChar = fromIntegral . ord
-- | Lift char transformation to byte transformation.
liftChar :: (Char -> Char) -> Word8 -> Word8
liftChar f = fromChar . f . toChar

View File

@ -7,7 +7,6 @@ import Control.Monad
import Data.Char
import Data.List (partition, isPrefixOf)
import Data.Monoid ((<>))
import Data.Void
import Test.Hspec
import Test.Hspec.Megaparsec
import Test.Hspec.Megaparsec.AdHoc
@ -68,12 +67,31 @@ spec = do
checkStrLit "tab" "\t" (pure <$> tab)
describe "space" $
it "consumes it up to first non-space character" $
property $ \s -> do
let (s0,s1) = partition isSpace s
s' = s0 ++ s1
prs space s' `shouldParse` ()
prs' space s' `succeedsLeaving` s1
it "consumes space up to first non-space character" $
property $ \s' -> do
let (s0,s1) = partition isSpace s'
s = s0 ++ s1
prs space s `shouldParse` ()
prs' space s `succeedsLeaving` s1
describe "space1" $ do
context "when stream does not start with a space character" $
it "signals correct parse error" $
property $ \ch s' -> not (isSpace ch) ==> do
let (s0,s1) = partition isSpace s'
s = ch : s0 ++ s1
prs space1 s `shouldFailWith` err posI (utok ch <> elabel "white space")
prs' space1 s `failsLeaving` s
context "when stream starts with a space character" $
it "consumes space up to first non-space character" $
property $ \s' -> do
let (s0,s1) = partition isSpace s'
s = ' ' : s0 ++ s1
prs space1 s `shouldParse` ()
prs' space1 s `succeedsLeaving` s1
context "when stream is empty" $
it "signals correct parse error" $
prs space1 "" `shouldFailWith` err posI (ueof <> elabel "white space")
describe "controlChar" $
checkCharPred "control character" isControl controlChar
@ -220,6 +238,24 @@ spec = do
it "signals correct parse error" $
prs anyChar "" `shouldFailWith` err posI (ueof <> elabel "character")
describe "notChar" $ do
context "when stream begins with the character specified as argument" $
it "signals correct parse error" $
property $ \ch s' -> do
let p = notChar ch
s = ch : s'
prs p s `shouldFailWith` err posI (utok ch)
prs' p s `failsLeaving` s
context "when stream does not begin with the character specified as argument" $
it "parses first character in the stream" $
property $ \ch s -> not (null s) && ch /= head s ==> do
let p = notChar ch
prs p s `shouldParse` head s
prs' p s `succeedsLeaving` tail s
context "when stream is empty" $
it "signals correct parse error" $
prs (notChar 'a') "" `shouldFailWith` err posI ueof
describe "oneOf" $ do
context "when stream begins with one of specified characters" $
it "parses the character" $
@ -240,29 +276,6 @@ spec = do
property $ \chs ->
prs (oneOf (chs :: String)) "" `shouldFailWith` err posI ueof
describe "oneOf'" $ do
context "when stream begins with one of specified characters" $
it "parses the character" $
property $ \chs' n s -> do
let chs = getNonEmpty chs'
ch = chs !! (getNonNegative n `rem` length chs)
sl = toLower ch : s
su = toUpper ch : s
prs (oneOf' chs) sl `shouldParse` toLower ch
prs (oneOf' chs) su `shouldParse` toUpper ch
prs' (oneOf' chs) sl `succeedsLeaving` s
prs' (oneOf' chs) su `succeedsLeaving` s
context "when stream does not begin with any of specified characters" $
it "signals correct parse error" $
property $ \chs ch s -> ch `notElemi` (chs :: String) ==> do
let s' = ch : s
prs (oneOf' chs) s' `shouldFailWith` err posI (utok ch)
prs' (oneOf' chs) s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
property $ \chs ->
prs (oneOf' (chs :: String)) "" `shouldFailWith` err posI ueof
describe "noneOf" $ do
context "when stream does not begin with any of specified characters" $
it "parses the character" $
@ -283,29 +296,6 @@ spec = do
property $ \chs ->
prs (noneOf (chs :: String)) "" `shouldFailWith` err posI ueof
describe "noneOf'" $ do
context "when stream does not begin with any of specified characters" $
it "parses the character" $
property $ \chs ch s -> ch `notElemi` (chs :: String) ==> do
let sl = toLower ch : s
su = toUpper ch : s
prs (noneOf' chs) sl `shouldParse` toLower ch
prs (noneOf' chs) su `shouldParse` toUpper ch
prs' (noneOf' chs) sl `succeedsLeaving` s
prs' (noneOf' chs) su `succeedsLeaving` s
context "when stream begins with one of specified characters" $
it "signals correct parse error" $
property $ \chs' n s -> do
let chs = getNonEmpty chs'
ch = chs !! (getNonNegative n `rem` length chs)
s' = ch : s
prs (noneOf' chs) s' `shouldFailWith` err posI (utok ch)
prs' (noneOf' chs) s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
property $ \chs ->
prs (noneOf' (chs :: String)) "" `shouldFailWith` err posI ueof
describe "string" $ do
context "when stream is prefixed with given string" $
it "parses the string" $
@ -338,7 +328,7 @@ spec = do
----------------------------------------------------------------------------
-- Helpers
checkStrLit :: String -> String -> Parsec Void String String -> SpecWith ()
checkStrLit :: String -> String -> Parser String -> SpecWith ()
checkStrLit name ts p = do
context ("when stream begins with " ++ name) $
it ("parses the " ++ name) $
@ -357,7 +347,7 @@ checkStrLit name ts p = do
it "signals correct parse error" $
prs p "" `shouldFailWith` err posI (ueof <> etoks ts)
checkCharPred :: String -> (Char -> Bool) -> Parsec Void String Char -> SpecWith ()
checkCharPred :: String -> (Char -> Bool) -> Parser Char -> SpecWith ()
checkCharPred name f p = do
context ("when stream begins with " ++ name) $
it ("parses the " ++ name) $
@ -375,7 +365,7 @@ checkCharPred name f p = do
it "signals correct parse error" $
prs p "" `shouldFailWith` err posI (ueof <> elabel name)
checkCharRange :: String -> String -> Parsec Void String Char -> SpecWith ()
checkCharRange :: String -> String -> Parser Char -> SpecWith ()
checkCharRange name tchs p = do
forM_ tchs $ \tch ->
context ("when stream begins with " ++ showTokens (nes tch)) $
@ -384,12 +374,6 @@ checkCharRange name tchs p = do
let s' = tch : s
prs p s' `shouldParse` tch
prs' p s' `succeedsLeaving` s
-- context ("when stream does not begin with " ++ name) $
-- it "signals correct parse error" $
-- property $ \ch s -> ch `notElem` tchs ==> do
-- let s' = ch : s
-- prs p s' `shouldFailWith` err posI (utok ch <> elabel name)
-- prs' p s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
prs p "" `shouldFailWith` err posI (ueof <> elabel name)
@ -406,16 +390,6 @@ fuzzyCase s = zipWith f s <$> vector (length s)
casei :: Char -> Char -> Bool
casei x y = toUpper x == toUpper y
-- | Case-insensitive 'elem'.
elemi :: Char -> String -> Bool
elemi c = any (casei c)
-- | Case-insensitive 'notElem'.
notElemi :: Char -> String -> Bool
notElemi c = not . elemi c
-- | The 'isPrefixOf' function takes two 'String's and returns 'True' iff
-- the first list is a prefix of the second with case-insensitive
-- comparison.

View File

@ -7,6 +7,7 @@ import Data.List (isInfixOf, isSuffixOf)
import Data.List.NonEmpty (NonEmpty (..))
import Data.Monoid
import Data.Void
import Data.Word (Word8)
import Test.Hspec
import Test.Hspec.Megaparsec.AdHoc ()
import Test.QuickCheck
@ -173,6 +174,13 @@ spec = do
it "shows control characters in long strings property"
(f "{\n" "\"{<newline>\"")
describe "showTokens (Word8 instance)" $
it "basically works" $ do
-- NOTE Currently the Word8 instance is defined via Char intance, so
-- the testing is rather shallow.
let ts = NE.fromList [10,48,49,50] :: NonEmpty Word8
showTokens ts `shouldBe` "\"<newline>012\""
describe "parseErrorPretty" $ do
it "shows unknown ParseError correctly" $
parseErrorPretty (mempty :: PE) `shouldBe` "1:1:\nunknown parse error\n"