From 4fac009b485410ba190971bea16c4d573b2a5c08 Mon Sep 17 00:00:00 2001 From: Mark Karpov Date: Mon, 8 Jan 2018 22:39:39 +0700 Subject: [PATCH] =?UTF-8?q?Move=20general-enough=20combinators=20to=20?= =?UTF-8?q?=E2=80=98Text.Megaparsec=E2=80=99=20(#267)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 24 ++++ Text/Megaparsec.hs | 191 +++++++++++++++++++++++++--- Text/Megaparsec/Byte.hs | 62 +++++---- Text/Megaparsec/Byte/Lexer.hs | 4 +- Text/Megaparsec/Char.hs | 105 ++------------- Text/Megaparsec/Char/Lexer.hs | 10 +- Text/Megaparsec/Pos.hs | 4 +- tests/Text/Megaparsec/ByteSpec.hs | 12 -- tests/Text/Megaparsec/CharSpec.hs | 72 +---------- tests/Text/Megaparsec/StreamSpec.hs | 80 ++++++------ tests/Text/MegaparsecSpec.hs | 95 ++++++++++++-- 11 files changed, 378 insertions(+), 281 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc8f635..1891d23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,29 @@ ## Megaparsec 7.0.0 +* Moved some general combinators from `Text.Megaparsec.Char` and + `Text.Megaparsec.Byte` to `Text.Megaparsec`, renaming some of them for + clarity. + + Practical consequences: + + * Now there is the `single` combinator that is a generalization of `char` + for arbitrary streams. `Text.Megaparsec.Char` and `Text.Megaparsec.Byte` + still contain `char` as type-constrained versions of `single`. + + * Similarly, now there is the `chunk` combinator that is a generalization + of `string` for arbitrary streams. `Text.Megaparsec.Char` and + `Text.Megaparsec.Byte` still contain `string` as type-constrained + versions of `single`. + + * `satisfy` does not depend on type of token, and so it now lives in + `Text.Megaparsec`. + + * `anyToken` was renamed to `anySingle` and moved to `Text.Megaparsec`. + + * `notChar` was renamed to `anySingleBut` and moved to `Text.Megaparsec`. + + * `oneOf` and `noneOf` were moved to `Text.Megaparsec`. + * Simplified the type of the `token` primitive. It now takes just a matching function `Token s -> Maybe a` as the first argument and the collection of expected items `Set (ErrorItem (Token s))` as the second argument. This diff --git a/Text/Megaparsec.hs b/Text/Megaparsec.hs index 8b794c1..d496e73 100644 --- a/Text/Megaparsec.hs +++ b/Text/Megaparsec.hs @@ -94,6 +94,13 @@ module Text.Megaparsec -- * Primitive combinators , MonadParsec (..) -- * Derivatives of primitive combinators + , single + , satisfy + , anySingle + , anySingleBut + , oneOf + , noneOf + , chunk , () , unexpected , customFailure @@ -155,6 +162,8 @@ import qualified Data.Set as E #if !MIN_VERSION_base(4,8,0) import Control.Applicative +import Data.Foldable (Foldable, elem, notElem) +import Prelude hiding (elem, notElem) #endif -- $reexports @@ -771,16 +780,19 @@ class (Stream s, A.Alternative m, MonadPlus m) eof :: m () - -- | The parser @'token' test exp@ accepts a token @t@ with result @x@ - -- when the function @test t@ returns @'Just' x@. @exp@ specifies the - -- collection of expected items to report in error messages. + -- | The parser @'token' test expected@ accepts a token @t@ with result + -- @x@ when the function @test t@ returns @'Just' x@. @expected@ specifies + -- the collection of expected items to report in error messages. -- -- This is the most primitive combinator for accepting tokens. For -- example, the 'Text.Megaparsec.Char.satisfy' parser is implemented as: -- - -- > satisfy f = token testChar E.empty + -- > satisfy f = token testToken E.empty -- > where - -- > testChar x = if f x then Just x else Nothing + -- > testToken x = if f x then Just x else Nothing + -- + -- __Note__: type signature of this primitive was changed in the version + -- /7.0.0/. token :: (Token s -> Maybe a) @@ -789,14 +801,14 @@ class (Stream s, A.Alternative m, MonadPlus m) -- ^ Expected items (in case of an error) -> m a - -- | The parser @'tokens' test@ parses a chunk of input and returns it. - -- Supplied predicate @test@ is used to check equality of given and parsed - -- chunks after a candidate chunk of correct length is fetched from the - -- stream. + -- | The parser @'tokens' test chk@ parses a chunk of input @chk@ and + -- returns it. The supplied predicate @test@ is used to check equality of + -- given and parsed chunks after a candidate chunk of correct length is + -- fetched from the stream. -- - -- This can be used for example to write 'Text.Megaparsec.Char.string': + -- This can be used for example to write 'Text.Megaparsec.chunk': -- - -- > string = tokens (==) + -- > chunk = tokens (==) -- -- Note that beginning from Megaparsec 4.4.0, this is an auto-backtracking -- primitive, which means that if it fails, it never consumes any input. @@ -1307,6 +1319,132 @@ fixs' _ (Right (b,s,w)) = (Right b, s, w) ---------------------------------------------------------------------------- -- Derivatives of primitive combinators +-- | @'single' t@ only matches the single token @t@. +-- +-- > semicolon = single ';' +-- +-- See also: 'token', 'anySingle', 'Text.Megaparsec.Byte.char', +-- 'Text.Megaparsec.Char.char'. +-- +-- @since 7.0.0 + +single :: MonadParsec e s m + => Token s -- ^ Token to match + -> m (Token s) +single t = token testToken expected + where + testToken x = if x == t then Just x else Nothing + expected = E.singleton (Tokens (t:|[])) +{-# INLINE single #-} + +-- | The parser @'satisfy' f@ succeeds for any token for which the supplied +-- function @f@ returns 'True'. Returns the character that is actually +-- parsed. +-- +-- > digitChar = satisfy isDigit "digit" +-- > oneOf cs = satisfy (`elem` cs) +-- +-- See also: 'anySingle', 'anySingleBut', 'oneOf', 'noneOf'. +-- +-- @since 7.0.0 + +satisfy :: MonadParsec e s m + => (Token s -> Bool) -- ^ Predicate to apply + -> m (Token s) +satisfy f = token testChar E.empty + where + testChar x = if f x then Just x else Nothing +{-# INLINE satisfy #-} + +-- | Parse and return a single token. It's a good idea to attach a 'label' +-- to this parser manually. +-- +-- > anySingle = satisfy (const True) +-- +-- See also: 'satisfy', 'anySingleBut'. +-- +-- @since 7.0.0 + +anySingle :: MonadParsec e s m => m (Token s) +anySingle = satisfy (const True) +{-# INLINE anySingle #-} + +-- | Match any token but the given one. It's a good idea to attach a 'label' +-- to this parser manually. +-- +-- > anySingleBut t = satisfy (/= t) +-- +-- See also: 'single', 'anySingle', 'satisfy'. +-- +-- @since 7.0.0 + +anySingleBut :: MonadParsec e s m + => Token s -- ^ Token we should not match + -> m (Token s) +anySingleBut t = satisfy (/= t) +{-# INLINE anySingleBut #-} + +-- | @'oneOf' ts@ succeeds if the current token is in the supplied +-- collection of tokens @ts@. Returns the parsed token. Note that this +-- parser cannot automatically generate the “expected” component of error +-- message, so usually you should label it manually with 'label' or (''). +-- +-- > oneOf cs = satisfy (`elem` cs) +-- +-- See also: 'satisfy'. +-- +-- > digit = oneOf ['0'..'9'] "digit" +-- +-- __Performance note__: prefer 'satisfy' when you can because it's faster +-- when you have only a couple of tokens to compare to: +-- +-- > quoteFast = satisfy (\x -> x == '\'' || x == '\"') +-- > quoteSlow = oneOf "'\"" +-- +-- @since 7.0.0 + +oneOf :: (Foldable f, MonadParsec e s m) + => f (Token s) -- ^ Collection of matching tokens + -> m (Token s) +oneOf cs = satisfy (`elem` cs) +{-# INLINE oneOf #-} + +-- | As the dual of 'oneOf', @'noneOf' ts@ succeeds if the current token +-- /not/ in the supplied list of tokens @ts@. Returns the parsed character. +-- Note that this parser cannot automatically generate the “expected” +-- component of error message, so usually you should label it manually with +-- 'label' or (''). +-- +-- > noneOf cs = satisfy (`notElem` cs) +-- +-- See also: 'satisfy'. +-- +-- __Performance note__: prefer 'satisfy' and 'singleBut' when you can +-- because it's faster. +-- +-- @since 7.0.0 + +noneOf :: (Foldable f, MonadParsec e s m) + => f (Token s) -- ^ Collection of taken we should not match + -> m (Token s) +noneOf cs = satisfy (`notElem` cs) +{-# INLINE noneOf #-} + +-- | @'chunk' chk@ only matches the chunk @chk@. +-- +-- > divOrMod = chunk "div" <|> chunk "mod" +-- +-- See also: 'tokens', 'Text.Megaparsec.Char.string', +-- 'Text.Megaparsec.Byte.string'. +-- +-- @since 7.0.0 + +chunk :: MonadParsec e s m + => Tokens s -- ^ Chunk to match + -> m (Tokens s) +chunk = tokens (==) +{-# INLINE chunk #-} + -- | A synonym for 'label' in the form of an operator. infix 0 @@ -1318,10 +1456,10 @@ infix 0 -- | The parser @'unexpected' item@ fails with an error message telling -- about unexpected item @item@ without consuming any input. -- --- > unexpected item = failure (pure item) Set.empty +-- > unexpected item = failure (Just item) Set.empty unexpected :: MonadParsec e s m => ErrorItem (Token s) -> m a -unexpected item = failure (pure item) E.empty +unexpected item = failure (Just item) E.empty {-# INLINE unexpected #-} -- | Report a custom parse error. For a more general version, see @@ -1396,6 +1534,8 @@ takeRest = takeWhileP Nothing (const True) -- | Return 'True' when end of input has been reached. -- +-- > atEnd = option False (True <$ hidden eof) +-- -- @since 6.0.0 atEnd :: MonadParsec e s m => m Bool @@ -1410,15 +1550,14 @@ atEnd = option False (True <$ hidden eof) getInput :: MonadParsec e s m => m s getInput = stateInput <$> getParserState --- | @'setInput' input@ continues parsing with @input@. The 'getInput' and --- 'setInput' functions can for example be used to deal with include files. +-- | @'setInput' input@ continues parsing with @input@. setInput :: MonadParsec e s m => s -> m () setInput s = updateParserState (\(State _ pos tp w) -> State s pos tp w) -- | Return the current source position. -- --- See also: 'setPosition', 'pushPosition', 'popPosition', and 'SourcePos'. +-- See also: 'getNextTokenPosition'. getPosition :: MonadParsec e s m => m SourcePos getPosition = NE.head . statePos <$> getParserState @@ -1426,6 +1565,8 @@ getPosition = NE.head . statePos <$> getParserState -- | Get the position where the next token in the stream begins. If the -- stream is empty, return 'Nothing'. -- +-- See also: 'getPosition'. +-- -- @since 5.3.0 getNextTokenPosition :: forall e s m. MonadParsec e s m => m (Maybe SourcePos) @@ -1443,10 +1584,10 @@ setPosition :: MonadParsec e s m => SourcePos -> m () setPosition pos = updateParserState $ \(State s (_:|z) tp w) -> State s (pos:|z) tp w --- | Push a position into stack of positions and continue parsing working +-- | Push a position to the stack of positions and continue parsing working -- with this position. Useful for working with include files and the like. -- --- See also: 'getPosition', 'setPosition', 'popPosition', and 'SourcePos'. +-- See also: 'popPosition'. -- -- @since 5.0.0 @@ -1458,7 +1599,7 @@ pushPosition pos = updateParserState $ \(State s z tp w) -> -- element (in that case the stack of positions remains the same). This is -- how to return to previous source file after 'pushPosition'. -- --- See also: 'getPosition', 'setPosition', 'pushPosition', and 'SourcePos'. +-- See also: 'pushPosition'. -- -- @since 5.0.0 @@ -1470,6 +1611,8 @@ popPosition = updateParserState $ \(State s z tp w) -> -- | Get the number of tokens processed so far. -- +-- See also: 'setTokensProcessed'. +-- -- @since 6.0.0 getTokensProcessed :: MonadParsec e s m => m Int @@ -1477,6 +1620,8 @@ getTokensProcessed = stateTokensProcessed <$> getParserState -- | Set the number of tokens processed so far. -- +-- See also: 'getTokensProcessed'. +-- -- @since 6.0.0 setTokensProcessed :: MonadParsec e s m => Int -> m () @@ -1486,18 +1631,24 @@ setTokensProcessed tp = updateParserState $ \(State s pos _ w) -> -- | Return the tab width. The default tab width is equal to -- 'defaultTabWidth'. You can set a different tab width with the help of -- 'setTabWidth'. +-- +-- See also: 'setTabWidth'. getTabWidth :: MonadParsec e s m => m Pos getTabWidth = stateTabWidth <$> getParserState -- | Set tab width. If the argument of the function is not a positive -- number, 'defaultTabWidth' will be used. +-- +-- See also: 'getTabWidth'. setTabWidth :: MonadParsec e s m => Pos -> m () setTabWidth w = updateParserState $ \(State s pos tp _) -> State s pos tp w -- | @'setParserState' st@ sets the parser state to @st@. +-- +-- See also: 'getParserState', 'updateParserState'. setParserState :: MonadParsec e s m => State s -> m () setParserState st = updateParserState (const st) @@ -1615,4 +1766,4 @@ streamTake :: forall s. Stream s => Int -> s -> [Token s] streamTake n s = case fst <$> takeN_ n s of Nothing -> [] - Just chunk -> chunkToTokens (Proxy :: Proxy s) chunk + Just chk -> chunkToTokens (Proxy :: Proxy s) chk diff --git a/Text/Megaparsec/Byte.hs b/Text/Megaparsec/Byte.hs index fcc1615..5004add 100644 --- a/Text/Megaparsec/Byte.hs +++ b/Text/Megaparsec/Byte.hs @@ -34,16 +34,11 @@ module Text.Megaparsec.Byte , octDigitChar , hexDigitChar , asciiChar - -- * More general parsers - , C.char + -- * Single byte + , char , char' - , C.anyChar - , C.notChar - , C.oneOf - , C.noneOf - , C.satisfy -- * Sequence of bytes - , C.string + , string , C.string' ) where @@ -62,14 +57,14 @@ import qualified Text.Megaparsec.Char as C -- | Parse a newline byte. newline :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) -newline = C.char 10 +newline = char 10 {-# INLINE newline #-} -- | Parse a carriage return character followed by a newline character. -- Return the sequence of characters parsed. crlf :: forall e s m. (MonadParsec e s m, Token s ~ Word8) => m (Tokens s) -crlf = C.string (tokensToChunk (Proxy :: Proxy s) [13,10]) +crlf = string (tokensToChunk (Proxy :: Proxy s) [13,10]) {-# INLINE crlf #-} -- | Parse a CRLF (see 'crlf') or LF (see 'newline') end of line. Return the @@ -84,7 +79,7 @@ eol = (tokenToChunk (Proxy :: Proxy s) <$> newline) -- | Parse a tab character. tab :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) -tab = C.char 9 +tab = char 9 {-# INLINE tab #-} -- | Skip /zero/ or more white space characters. @@ -109,51 +104,51 @@ space1 = void $ takeWhile1P (Just "white space") isSpace' -- | Parse a control character. controlChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) -controlChar = C.satisfy (isControl . toChar) "control character" +controlChar = satisfy (isControl . toChar) "control character" {-# INLINE controlChar #-} -- | Parse a space character, and the control characters: tab, newline, -- carriage return, form feed, and vertical tab. spaceChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) -spaceChar = C.satisfy isSpace' "white space" +spaceChar = satisfy isSpace' "white space" {-# INLINE spaceChar #-} -- | Parse an upper-case character. upperChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) -upperChar = C.satisfy (isUpper . toChar) "uppercase letter" +upperChar = satisfy (isUpper . toChar) "uppercase letter" {-# INLINE upperChar #-} -- | Parse a lower-case alphabetic character. lowerChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) -lowerChar = C.satisfy (isLower . toChar) "lowercase letter" +lowerChar = satisfy (isLower . toChar) "lowercase letter" {-# INLINE lowerChar #-} -- | Parse an alphabetic character: lower-case or upper-case. letterChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) -letterChar = C.satisfy (isLetter . toChar) "letter" +letterChar = satisfy (isLetter . toChar) "letter" {-# INLINE letterChar #-} -- | Parse an alphabetic or digit characters. alphaNumChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) -alphaNumChar = C.satisfy (isAlphaNum . toChar) "alphanumeric character" +alphaNumChar = satisfy (isAlphaNum . toChar) "alphanumeric character" {-# INLINE alphaNumChar #-} -- | Parse a printable character: letter, number, mark, punctuation, symbol -- or space. printChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) -printChar = C.satisfy (isPrint . toChar) "printable character" +printChar = satisfy (isPrint . toChar) "printable character" {-# INLINE printChar #-} -- | Parse an ASCII digit, i.e between “0” and “9”. digitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) -digitChar = C.satisfy isDigit' "digit" +digitChar = satisfy isDigit' "digit" where isDigit' x = x >= 48 && x <= 57 {-# INLINE digitChar #-} @@ -161,7 +156,7 @@ digitChar = C.satisfy isDigit' "digit" -- | Parse an octal digit, i.e. between “0” and “7”. octDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) -octDigitChar = C.satisfy isOctDigit' "octal digit" +octDigitChar = satisfy isOctDigit' "octal digit" where isOctDigit' x = x >= 48 && x <= 55 {-# INLINE octDigitChar #-} @@ -170,18 +165,26 @@ octDigitChar = C.satisfy isOctDigit' "octal digit" -- “A” and “F”. hexDigitChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) -hexDigitChar = C.satisfy (isHexDigit . toChar) "hexadecimal digit" +hexDigitChar = satisfy (isHexDigit . toChar) "hexadecimal digit" {-# INLINE hexDigitChar #-} -- | Parse a character from the first 128 characters of the Unicode -- character set, corresponding to the ASCII character set. asciiChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s) -asciiChar = C.satisfy (< 128) "ASCII character" +asciiChar = satisfy (< 128) "ASCII character" {-# INLINE asciiChar #-} ---------------------------------------------------------------------------- --- More general parsers +-- Single byte + +-- | A type-constrained version of 'single'. +-- +-- > newline = char 10 + +char :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s) +char = single +{-# INLINE char #-} -- | The same as 'char' but case-insensitive. This parser returns the -- actually parsed character preserving its case. @@ -195,8 +198,8 @@ asciiChar = C.satisfy (< 128) "ASCII character" char' :: (MonadParsec e s m, Token s ~ Word8) => Token s -> m (Token s) char' c = choice - [ C.char c - , C.char (fromMaybe c (swapCase c)) ] + [ char c + , char (fromMaybe c (swapCase c)) ] where swapCase x | isUpper g = fromChar (toLower g) @@ -206,6 +209,15 @@ char' c = choice g = toChar x {-# INLINE char' #-} +---------------------------------------------------------------------------- +-- Sequence of bytes + +-- | A type-constrained version of 'chunk'. + +string :: (MonadParsec e s m, Token s ~ Word8) => Tokens s -> m (Tokens s) +string = chunk +{-# INLINE string #-} + ---------------------------------------------------------------------------- -- Helpers diff --git a/Text/Megaparsec/Byte/Lexer.hs b/Text/Megaparsec/Byte/Lexer.hs index 1101ad6..3c47bf2 100644 --- a/Text/Megaparsec/Byte/Lexer.hs +++ b/Text/Megaparsec/Byte/Lexer.hs @@ -68,7 +68,7 @@ skipBlockComment :: (MonadParsec e s m, Token s ~ Word8) => Tokens s -- ^ Start of block comment -> Tokens s -- ^ End of block comment -> m () -skipBlockComment start end = p >> void (manyTill anyChar n) +skipBlockComment start end = p >> void (manyTill anySingle n) where p = string start n = string end @@ -85,7 +85,7 @@ skipBlockCommentNested :: (MonadParsec e s m, Token s ~ Word8) -> m () skipBlockCommentNested start end = p >> void (manyTill e n) where - e = skipBlockCommentNested start end <|> void anyChar + e = skipBlockCommentNested start end <|> void anySingle p = string start n = string end {-# INLINEABLE skipBlockCommentNested #-} diff --git a/Text/Megaparsec/Char.hs b/Text/Megaparsec/Char.hs index 6a18385..4684213 100644 --- a/Text/Megaparsec/Char.hs +++ b/Text/Megaparsec/Char.hs @@ -45,14 +45,9 @@ module Text.Megaparsec.Char , latin1Char , charCategory , categoryName - -- * More general parsers + -- * Single character , char , char' - , anyChar - , notChar - , oneOf - , noneOf - , satisfy -- * Sequence of characters , string , string' ) @@ -62,16 +57,9 @@ import Control.Applicative import Data.Char import Data.Function (on) import Data.Functor (void) -import Data.List.NonEmpty (NonEmpty (..)) import Data.Proxy import Text.Megaparsec import qualified Data.CaseInsensitive as CI -import qualified Data.Set as E - -#if !MIN_VERSION_base(4,8,0) -import Data.Foldable (Foldable (), elem, notElem) -import Prelude hiding (elem, notElem) -#endif ---------------------------------------------------------------------------- -- Simple parsers @@ -289,17 +277,14 @@ categoryName = \case NotAssigned -> "non-assigned Unicode character" ---------------------------------------------------------------------------- --- More general parsers +-- Single character --- | @'char' c@ parses a single character @c@. +-- | A type-constrained version of 'single'. -- -- > semicolon = char ';' -char :: MonadParsec e s m => Token s -> m (Token s) -char c = token testChar expected - where - testChar x = if x == c then Just x else Nothing - expected = E.singleton (Tokens (c:|[])) +char :: (MonadParsec e s m, Token s ~ Char) => Token s -> m (Token s) +char = single {-# INLINE char #-} -- | The same as 'char' but case-insensitive. This parser returns the @@ -321,87 +306,13 @@ char' c = choice [char c, char (swapCase c)] | otherwise = x {-# INLINE char' #-} --- | This parser succeeds for any character. Returns the parsed character. - -anyChar :: MonadParsec e s m => m (Token s) -anyChar = satisfy (const True) "character" -{-# INLINE anyChar #-} - --- | Match any character but the given one. It's a good idea to attach a --- 'label' to this parser manually. --- --- @since 6.0.0 - -notChar :: MonadParsec e s m => Token s -> m (Token s) -notChar c = satisfy (/= c) -{-# INLINE notChar #-} - --- | @'oneOf' cs@ succeeds if the current character is in the supplied --- collection of characters @cs@. Returns the parsed character. Note that --- this parser cannot automatically generate the “expected” component of --- error message, so usually you should label it manually with 'label' or --- (''). --- --- See also: 'satisfy'. --- --- > digit = oneOf ['0'..'9'] "digit" --- --- __Performance note__: prefer 'satisfy' when you can because it's faster --- when you have only a couple of tokens to compare to: --- --- > quoteFast = satisfy (\x -> x == '\'' || x == '\"') --- > quoteSlow = oneOf "'\"" - -oneOf :: (Foldable f, MonadParsec e s m) - => f (Token s) - -> m (Token s) -oneOf cs = satisfy (`elem` cs) -{-# INLINE oneOf #-} - --- | As the dual of 'oneOf', @'noneOf' cs@ succeeds if the current character --- /not/ in the supplied list of characters @cs@. Returns the parsed --- character. Note that this parser cannot automatically generate the --- “expected” component of error message, so usually you should label it --- manually with 'label' or (''). --- --- See also: 'satisfy'. --- --- __Performance note__: prefer 'satisfy' and 'notChar' when you can because --- it's faster. - -noneOf :: (Foldable f, MonadParsec e s m) - => f (Token s) - -> m (Token s) -noneOf cs = satisfy (`notElem` cs) -{-# INLINE noneOf #-} - --- | The parser @'satisfy' f@ succeeds for any character for which the --- supplied function @f@ returns 'True'. Returns the character that is --- actually parsed. --- --- > digitChar = satisfy isDigit "digit" --- > oneOf cs = satisfy (`elem` cs) - -satisfy :: MonadParsec e s m - => (Token s -> Bool) -- ^ Predicate to apply - -> m (Token s) -satisfy f = token testChar E.empty - where - testChar x = if f x then Just x else Nothing -{-# INLINE satisfy #-} - ---------------------------------------------------------------------------- -- Sequence of characters --- | @'string' s@ parses a sequence of characters given by @s@. Returns the --- parsed string (i.e. @s@). --- --- > divOrMod = string "div" <|> string "mod" +-- | A type-constrained version of 'chunk'. -string :: MonadParsec e s m - => Tokens s - -> m (Tokens s) -string = tokens (==) +string :: (MonadParsec e s m, Token s ~ Char) => Tokens s -> m (Tokens s) +string = chunk {-# INLINE string #-} -- | The same as 'string', but case-insensitive. On success returns string diff --git a/Text/Megaparsec/Char/Lexer.hs b/Text/Megaparsec/Char/Lexer.hs index 7d350ba..f0d0972 100644 --- a/Text/Megaparsec/Char/Lexer.hs +++ b/Text/Megaparsec/Char/Lexer.hs @@ -143,7 +143,7 @@ lexeme spc p = p <* spc -- > colon = symbol ":" -- > dot = symbol "." -symbol :: MonadParsec e s m +symbol :: (MonadParsec e s m, Token s ~ Char) => m () -- ^ How to consume white space after lexeme -> Tokens s -- ^ Symbol to parse -> m (Tokens s) @@ -179,7 +179,7 @@ skipBlockComment :: (MonadParsec e s m, Token s ~ Char) => Tokens s -- ^ Start of block comment -> Tokens s -- ^ End of block comment -> m () -skipBlockComment start end = p >> void (manyTill C.anyChar n) +skipBlockComment start end = p >> void (manyTill anySingle n) where p = C.string start n = C.string end @@ -196,7 +196,7 @@ skipBlockCommentNested :: (MonadParsec e s m, Token s ~ Char) -> m () skipBlockCommentNested start end = p >> void (manyTill e n) where - e = skipBlockCommentNested start end <|> void C.anyChar + e = skipBlockCommentNested start end <|> void anySingle p = C.string start n = C.string end {-# INLINEABLE skipBlockCommentNested #-} @@ -397,9 +397,9 @@ charLiteral :: (MonadParsec e s m, Token s ~ Char) => m Char charLiteral = label "literal character" $ do -- The @~@ is needed to avoid requiring a MonadFail constraint, -- and we do know that r will be non-empty if count' succeeds. - ~r@(x:_) <- lookAhead $ count' 1 8 C.anyChar + ~r@(x:_) <- lookAhead (count' 1 8 anySingle) case listToMaybe (Char.readLitChar r) of - Just (c, r') -> count (length r - length r') C.anyChar >> return c + Just (c, r') -> c <$ count (length r - length r') anySingle Nothing -> unexpected (Tokens (x:|[])) {-# INLINEABLE charLiteral #-} diff --git a/Text/Megaparsec/Pos.hs b/Text/Megaparsec/Pos.hs index 8354292..198c19c 100644 --- a/Text/Megaparsec/Pos.hs +++ b/Text/Megaparsec/Pos.hs @@ -8,8 +8,8 @@ -- Portability : portable -- -- Textual source position. The position includes name of file, line number, --- and column number. List of such positions can be used to model a stack of --- include files. +-- and column number. A non-empty list of such positions can be used to +-- model a stack of include files. -- -- You probably do not want to import this module directly because -- "Text.Megaparsec" re-exports it anyway. diff --git a/tests/Text/Megaparsec/ByteSpec.hs b/tests/Text/Megaparsec/ByteSpec.hs index 61f9724..e533d4a 100644 --- a/tests/Text/Megaparsec/ByteSpec.hs +++ b/tests/Text/Megaparsec/ByteSpec.hs @@ -104,15 +104,6 @@ spec = do describe "spaceChar" $ checkCharRange "white space" [9,10,11,12,13,32,160] spaceChar - -- describe "upperChar" $ - -- checkCharPred "uppercase letter" (isUpper . toChar) upperChar - - -- describe "lowerChar" $ - -- checkCharPred "lowercase letter" (isLower . toChar) lowerChar - - -- describe "letterChar" $ - -- checkCharPred "letter" (isAlpha . toChar) letterChar - describe "alphaNumChar" $ checkCharPred "alphanumeric character" (isAlphaNum . toChar) alphaNumChar @@ -128,9 +119,6 @@ spec = do describe "hexDigitChar" $ checkCharRange "hexadecimal digit" ([48..57] ++ [97..102] ++ [65..70]) hexDigitChar - -- describe "asciiChar" $ - -- checkCharPred "ASCII character" (isAscii . toChar) asciiChar - describe "char'" $ do context "when stream begins with the character specified as argument" $ it "parses the character" $ diff --git a/tests/Text/Megaparsec/CharSpec.hs b/tests/Text/Megaparsec/CharSpec.hs index f9f6178..5076dc6 100644 --- a/tests/Text/Megaparsec/CharSpec.hs +++ b/tests/Text/Megaparsec/CharSpec.hs @@ -206,7 +206,8 @@ spec = do describe "char'" $ do let goodChar x = - (toUpper x == toLower x) || (isUpper x || isLower x) + (toUpper x == toLower x) || + (((toUpper x == x) || (toLower x == x)) && (isUpper x || isLower x)) context "when stream begins with the character specified as argument" $ it "parses the character" $ property $ \ch s -> goodChar ch ==> do @@ -229,75 +230,6 @@ spec = do let ms = ueof <> etok (toLower ch) <> etok (toUpper ch) prs (char' ch) "" `shouldFailWith` err posI ms - describe "anyChar" $ do - context "when stream is not empty" $ - it "succeeds consuming next character in the stream" $ - property $ \ch s -> do - let s' = ch : s - prs anyChar s' `shouldParse` ch - prs' anyChar s' `succeedsLeaving` s - context "when stream is empty" $ - it "signals correct parse error" $ - prs anyChar "" `shouldFailWith` err posI (ueof <> elabel "character") - - describe "notChar" $ do - context "when stream begins with the character specified as argument" $ - it "signals correct parse error" $ - property $ \ch s' -> do - let p = notChar ch - s = ch : s' - prs p s `shouldFailWith` err posI (utok ch) - prs' p s `failsLeaving` s - context "when stream does not begin with the character specified as argument" $ - it "parses first character in the stream" $ - property $ \ch s -> not (null s) && ch /= head s ==> do - let p = notChar ch - prs p s `shouldParse` head s - prs' p s `succeedsLeaving` tail s - context "when stream is empty" $ - it "signals correct parse error" $ - prs (notChar 'a') "" `shouldFailWith` err posI ueof - - describe "oneOf" $ do - context "when stream begins with one of specified characters" $ - it "parses the character" $ - property $ \chs' n s -> do - let chs = getNonEmpty chs' - ch = chs !! (getNonNegative n `rem` length chs) - s' = ch : s - prs (oneOf chs) s' `shouldParse` ch - prs' (oneOf chs) s' `succeedsLeaving` s - context "when stream does not begin with any of specified characters" $ - it "signals correct parse error" $ - property $ \chs ch s -> ch `notElem` (chs :: String) ==> do - let s' = ch : s - prs (oneOf chs) s' `shouldFailWith` err posI (utok ch) - prs' (oneOf chs) s' `failsLeaving` s' - context "when stream is empty" $ - it "signals correct parse error" $ - property $ \chs -> - prs (oneOf (chs :: String)) "" `shouldFailWith` err posI ueof - - describe "noneOf" $ do - context "when stream does not begin with any of specified characters" $ - it "parses the character" $ - property $ \chs ch s -> ch `notElem` (chs :: String) ==> do - let s' = ch : s - prs (noneOf chs) s' `shouldParse` ch - prs' (noneOf chs) s' `succeedsLeaving` s - context "when stream begins with one of specified characters" $ - it "signals correct parse error" $ - property $ \chs' n s -> do - let chs = getNonEmpty chs' - ch = chs !! (getNonNegative n `rem` length chs) - s' = ch : s - prs (noneOf chs) s' `shouldFailWith` err posI (utok ch) - prs' (noneOf chs) s' `failsLeaving` s' - context "when stream is empty" $ - it "signals correct parse error" $ - property $ \chs -> - prs (noneOf (chs :: String)) "" `shouldFailWith` err posI ueof - describe "string" $ do context "when stream is prefixed with given string" $ it "parses the string" $ diff --git a/tests/Text/Megaparsec/StreamSpec.hs b/tests/Text/Megaparsec/StreamSpec.hs index a850c22..1f65735 100644 --- a/tests/Text/Megaparsec/StreamSpec.hs +++ b/tests/Text/Megaparsec/StreamSpec.hs @@ -28,24 +28,24 @@ spec = do chunkToTokens sproxy (tokensToChunk sproxy ts) === ts describe "chunkToTokens" $ it "chunk is isomorphic to list of tokens" $ - property $ \chunk -> - tokensToChunk sproxy (chunkToTokens sproxy chunk) === chunk + property $ \chk -> + tokensToChunk sproxy (chunkToTokens sproxy chk) === chk describe "chunkLength" $ it "returns correct length of given chunk" $ - property $ \chunk -> - chunkLength sproxy chunk === length chunk + property $ \chk -> + chunkLength sproxy chk === length chk describe "chunkEmpty" $ it "only true when chunkLength returns 0" $ - property $ \chunk -> - chunkEmpty sproxy chunk === (chunkLength sproxy chunk <= 0) + property $ \chk -> + chunkEmpty sproxy chk === (chunkLength sproxy chk <= 0) describe "positionAt1" $ it "just returns the given position" $ property $ \pos t -> positionAt1 sproxy pos t === pos describe "positionAtN" $ it "just returns the given position" $ - property $ \pos chunk -> - positionAtN sproxy pos chunk === pos + property $ \pos chk -> + positionAtN sproxy pos chk === pos describe "advance1" $ do context "when given newline" $ it "works correctly" $ @@ -102,24 +102,24 @@ spec = do chunkToTokens bproxy (tokensToChunk bproxy ts) === ts describe "chunkToTokens" $ it "chunk is isomorphic to list of tokens" $ - property $ \chunk -> - tokensToChunk bproxy (chunkToTokens bproxy chunk) === chunk + property $ \chk -> + tokensToChunk bproxy (chunkToTokens bproxy chk) === chk describe "chunkLength" $ it "returns correct length of given chunk" $ - property $ \chunk -> - chunkLength bproxy chunk === B.length chunk + property $ \chk -> + chunkLength bproxy chk === B.length chk describe "chunkEmpty" $ it "only true when chunkLength returns 0" $ - property $ \chunk -> - chunkEmpty bproxy chunk === (chunkLength bproxy chunk <= 0) + property $ \chk -> + chunkEmpty bproxy chk === (chunkLength bproxy chk <= 0) describe "positionAt1" $ it "just returns the given position" $ property $ \pos t -> positionAt1 bproxy pos t === pos describe "positionAtN" $ it "just returns the given position" $ - property $ \pos chunk -> - positionAtN bproxy pos chunk === pos + property $ \pos chk -> + positionAtN bproxy pos chk === pos describe "advance1" $ do context "when given newline" $ it "works correctly" $ @@ -177,24 +177,24 @@ spec = do chunkToTokens blproxy (tokensToChunk blproxy ts) === ts describe "chunkToTokens" $ it "chunk is isomorphic to list of tokens" $ - property $ \chunk -> - tokensToChunk blproxy (chunkToTokens blproxy chunk) === chunk + property $ \chk -> + tokensToChunk blproxy (chunkToTokens blproxy chk) === chk describe "chunkLength" $ it "returns correct length of given chunk" $ - property $ \chunk -> - chunkLength blproxy chunk === fromIntegral (BL.length chunk) + property $ \chk -> + chunkLength blproxy chk === fromIntegral (BL.length chk) describe "chunkEmpty" $ it "only true when chunkLength returns 0" $ - property $ \chunk -> - chunkEmpty blproxy chunk === (chunkLength blproxy chunk <= 0) + property $ \chk -> + chunkEmpty blproxy chk === (chunkLength blproxy chk <= 0) describe "positionAt1" $ it "just returns the given position" $ property $ \pos t -> positionAt1 blproxy pos t === pos describe "positionAtN" $ it "just returns the given position" $ - property $ \pos chunk -> - positionAtN blproxy pos chunk === pos + property $ \pos chk -> + positionAtN blproxy pos chk === pos describe "advance1" $ do context "when given newline" $ it "works correctly" $ @@ -252,24 +252,24 @@ spec = do chunkToTokens tproxy (tokensToChunk tproxy ts) === ts describe "chunkToTokens" $ it "chunk is isomorphic to list of tokens" $ - property $ \chunk -> - tokensToChunk tproxy (chunkToTokens tproxy chunk) === chunk + property $ \chk -> + tokensToChunk tproxy (chunkToTokens tproxy chk) === chk describe "chunkLength" $ it "returns correct length of given chunk" $ - property $ \chunk -> - chunkLength tproxy chunk === T.length chunk + property $ \chk -> + chunkLength tproxy chk === T.length chk describe "chunkEmpty" $ it "only true when chunkLength returns 0" $ - property $ \chunk -> - chunkEmpty tproxy chunk === (chunkLength tproxy chunk <= 0) + property $ \chk -> + chunkEmpty tproxy chk === (chunkLength tproxy chk <= 0) describe "positionAt1" $ it "just returns the given position" $ property $ \pos t -> positionAt1 tproxy pos t === pos describe "positionAtN" $ it "just returns the given position" $ - property $ \pos chunk -> - positionAtN tproxy pos chunk === pos + property $ \pos chk -> + positionAtN tproxy pos chk === pos describe "advance1" $ do context "when given newline" $ it "works correctly" $ @@ -326,24 +326,24 @@ spec = do chunkToTokens tlproxy (tokensToChunk tlproxy ts) === ts describe "chunkToTokens" $ it "chunk is isomorphic to list of tokens" $ - property $ \chunk -> - tokensToChunk tlproxy (chunkToTokens tlproxy chunk) === chunk + property $ \chk -> + tokensToChunk tlproxy (chunkToTokens tlproxy chk) === chk describe "chunkLength" $ it "returns correct length of given chunk" $ - property $ \chunk -> - chunkLength tlproxy chunk === fromIntegral (TL.length chunk) + property $ \chk -> + chunkLength tlproxy chk === fromIntegral (TL.length chk) describe "chunkEmpty" $ it "only true when chunkLength returns 0" $ - property $ \chunk -> - chunkEmpty tlproxy chunk === (chunkLength tlproxy chunk <= 0) + property $ \chk -> + chunkEmpty tlproxy chk === (chunkLength tlproxy chk <= 0) describe "positionAt1" $ it "just returns the given position" $ property $ \pos t -> positionAt1 tlproxy pos t === pos describe "positionAtN" $ it "just returns the given position" $ - property $ \pos chunk -> - positionAtN tlproxy pos chunk === pos + property $ \pos chk -> + positionAtN tlproxy pos chk === pos describe "advance1" $ do context "when given newline" $ it "works correctly" $ diff --git a/tests/Text/MegaparsecSpec.hs b/tests/Text/MegaparsecSpec.hs index a65951f..d28fa4d 100644 --- a/tests/Text/MegaparsecSpec.hs +++ b/tests/Text/MegaparsecSpec.hs @@ -38,12 +38,12 @@ import qualified Control.Monad.State.Lazy as L import qualified Control.Monad.State.Strict as S import qualified Control.Monad.Writer.Lazy as L import qualified Control.Monad.Writer.Strict as S +import qualified Data.ByteString as BS import qualified Data.List as DL import qualified Data.List.NonEmpty as NE import qualified Data.Semigroup as G import qualified Data.Set as E import qualified Data.Text as T -import qualified Data.ByteString as BS #if !MIN_VERSION_base(4,8,0) import Control.Applicative hiding (many, some) @@ -223,17 +223,17 @@ spec = do describe "equivalence to 'string'" $ do it "for String" $ property $ \s i -> eqParser - (string s) + (chunk s) (fromString s) (i :: String) it "for Text" $ property $ \s i -> eqParser - (string (T.pack s)) + (chunk (T.pack s)) (fromString s) (i :: T.Text) it "for ByteString" $ property $ \s i -> eqParser - (string (fromString s :: BS.ByteString)) + (chunk (fromString s :: BS.ByteString)) (fromString s) (i :: BS.ByteString) it "can handle Unicode" $ do @@ -314,7 +314,7 @@ spec = do it "parses the string" $ property $ \s0 s1 s -> not (s1 `isPrefixOf` s0) ==> do let s' = s0 ++ s - p = string s0 <|> string s1 + p = chunk s0 <|> chunk s1 prs p s' `shouldParse` s0 prs' p s' `succeedsLeaving` s context "stream begins with the second string" $ @@ -1212,6 +1212,85 @@ spec = do describe "derivatives from primitive combinators" $ do + -- NOTE 'single' is tested via 'char' in "Text.Megaparsec.Char" and + -- "Text.Megaparsec.Byte". + + describe "anySingle" $ do + let p :: MonadParsec Void String m => m Char + p = anySingle + context "when stream is not empty" $ + it "succeeds consuming next character in the stream" $ + property $ \ch s -> do + let s' = ch : s + grs p s' (`shouldParse` ch) + grs' p s' (`succeedsLeaving` s) + context "when stream is empty" $ + it "signals correct parse error" $ + grs p "" (`shouldFailWith` err posI ueof) + + describe "anySingleBut" $ do + context "when stream begins with the character specified as argument" $ + it "signals correct parse error" $ + property $ \ch s' -> do + let p :: MonadParsec Void String m => m Char + p = anySingleBut ch + s = ch : s' + grs p s (`shouldFailWith` err posI (utok ch)) + grs' p s (`failsLeaving` s) + context "when stream does not begin with the character specified as argument" $ + it "parses first character in the stream" $ + property $ \ch s -> not (null s) && ch /= head s ==> do + let p :: MonadParsec Void String m => m Char + p = anySingleBut ch + grs p s (`shouldParse` head s) + grs' p s (`succeedsLeaving` tail s) + context "when stream is empty" $ + it "signals correct parse error" $ + grs (anySingleBut 'a') "" (`shouldFailWith` err posI ueof) + + describe "oneOf" $ do + context "when stream begins with one of specified characters" $ + it "parses the character" $ + property $ \chs' n s -> do + let chs = getNonEmpty chs' + ch = chs !! (getNonNegative n `rem` length chs) + s' = ch : s + grs (oneOf chs) s' (`shouldParse` ch) + grs' (oneOf chs) s' (`succeedsLeaving` s) + context "when stream does not begin with any of specified characters" $ + it "signals correct parse error" $ + property $ \chs ch s -> ch `notElem` (chs :: String) ==> do + let s' = ch : s + grs (oneOf chs) s' (`shouldFailWith` err posI (utok ch)) + grs' (oneOf chs) s' (`failsLeaving` s') + context "when stream is empty" $ + it "signals correct parse error" $ + property $ \chs -> + grs (oneOf (chs :: String)) "" (`shouldFailWith` err posI ueof) + + describe "noneOf" $ do + context "when stream does not begin with any of specified characters" $ + it "parses the character" $ + property $ \chs ch s -> ch `notElem` (chs :: String) ==> do + let s' = ch : s + grs (noneOf chs) s' (`shouldParse` ch) + grs' (noneOf chs) s' (`succeedsLeaving` s) + context "when stream begins with one of specified characters" $ + it "signals correct parse error" $ + property $ \chs' n s -> do + let chs = getNonEmpty chs' + ch = chs !! (getNonNegative n `rem` length chs) + s' = ch : s + grs (noneOf chs) s' (`shouldFailWith` err posI (utok ch)) + grs' (noneOf chs) s' (`failsLeaving` s') + context "when stream is empty" $ + it "signals correct parse error" $ + property $ \chs -> + grs (noneOf (chs :: String)) "" (`shouldFailWith` err posI ueof) + + -- NOTE 'chunk' is tested via 'string' in "Text.Megaparsec.Char" and + -- "Text.Megaparsec.Byte". + describe "unexpected" $ it "signals correct parse error" $ property $ \item -> do @@ -1406,7 +1485,7 @@ spec = do describe "notFollowedBy" $ it "generally works" $ property $ \a' b' c' -> do - let p = many (char =<< ask) <* notFollowedBy eof <* many anyChar + let p = many (char =<< ask) <* notFollowedBy eof <* many anySingle [a,b,c] = getNonNegative <$> [a',b',c'] s = abcRow a b c if b > 0 || c > 0 @@ -1435,7 +1514,7 @@ spec = do let p = do L.put n let notEof = notFollowedBy (L.modify (* 2) >> eof) - some (try (anyChar <* notEof)) <* char 'x' + some (try (anySingle <* notEof)) <* char 'x' prs (L.runStateT p 0) "abx" `shouldParse` ("ab", n :: Integer) describe "observing" $ do @@ -1475,7 +1554,7 @@ spec = do let p = do S.put n let notEof = notFollowedBy (S.modify (* 2) >> eof) - some (try (anyChar <* notEof)) <* char 'x' + some (try (anySingle <* notEof)) <* char 'x' prs (S.runStateT p 0) "abx" `shouldParse` ("ab", n :: Integer) describe "observing" $ do