2015-07-28 16:32:19 +03:00
|
|
|
|
-- |
|
2015-08-01 19:24:45 +03:00
|
|
|
|
-- Module : Text.Megaparsec.Char
|
|
|
|
|
-- Copyright : © 2015 Megaparsec contributors
|
2015-07-30 19:20:37 +03:00
|
|
|
|
-- © 2007 Paolo Martini
|
|
|
|
|
-- © 1999–2001 Daan Leijen
|
2015-07-28 16:32:19 +03:00
|
|
|
|
-- License : BSD3
|
|
|
|
|
--
|
|
|
|
|
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
|
2015-07-29 11:38:32 +03:00
|
|
|
|
-- Stability : experimental
|
2015-07-28 16:32:19 +03:00
|
|
|
|
-- Portability : portable
|
|
|
|
|
--
|
|
|
|
|
-- Commonly used character parsers.
|
|
|
|
|
|
2015-08-01 19:24:45 +03:00
|
|
|
|
module Text.Megaparsec.Char
|
2015-07-28 16:32:19 +03:00
|
|
|
|
( oneOf
|
|
|
|
|
, noneOf
|
|
|
|
|
, spaces
|
|
|
|
|
, space
|
|
|
|
|
, newline
|
|
|
|
|
, crlf
|
2015-08-08 18:17:27 +03:00
|
|
|
|
, eol
|
2015-07-28 16:32:19 +03:00
|
|
|
|
, tab
|
2015-08-12 15:41:22 +03:00
|
|
|
|
, letter
|
2015-07-28 16:32:19 +03:00
|
|
|
|
, upper
|
|
|
|
|
, lower
|
|
|
|
|
, digit
|
|
|
|
|
, hexDigit
|
|
|
|
|
, octDigit
|
2015-08-12 15:41:22 +03:00
|
|
|
|
, alphaNum
|
2015-07-28 16:32:19 +03:00
|
|
|
|
, char
|
|
|
|
|
, anyChar
|
|
|
|
|
, satisfy
|
|
|
|
|
, string )
|
|
|
|
|
where
|
|
|
|
|
|
2015-08-01 17:39:20 +03:00
|
|
|
|
import Control.Applicative ((<|>))
|
2015-07-28 16:32:19 +03:00
|
|
|
|
import Data.Char
|
|
|
|
|
|
2015-08-12 15:41:22 +03:00
|
|
|
|
import Text.Megaparsec.Combinator
|
2015-08-01 19:24:45 +03:00
|
|
|
|
import Text.Megaparsec.Pos
|
|
|
|
|
import Text.Megaparsec.Prim
|
2015-08-08 18:17:27 +03:00
|
|
|
|
import Text.Megaparsec.ShowToken
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
|
|
|
|
-- | @oneOf cs@ succeeds if the current character is in the supplied
|
|
|
|
|
-- list of characters @cs@. Returns the parsed character. See also
|
|
|
|
|
-- 'satisfy'.
|
|
|
|
|
--
|
2015-08-08 18:17:27 +03:00
|
|
|
|
-- > vowel = oneOf "aeiou" <?> "vowel"
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
|
|
|
|
oneOf :: Stream s m Char => String -> ParsecT s u m Char
|
|
|
|
|
oneOf cs = satisfy (`elem` cs)
|
|
|
|
|
|
|
|
|
|
-- | As the dual of 'oneOf', @noneOf cs@ succeeds if the current
|
|
|
|
|
-- character /not/ in the supplied list of characters @cs@. Returns the
|
|
|
|
|
-- parsed character.
|
|
|
|
|
--
|
2015-08-08 18:17:27 +03:00
|
|
|
|
-- > consonant = noneOf "aeiou" <?> "consonant"
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
|
|
|
|
noneOf :: Stream s m Char => String -> ParsecT s u m Char
|
|
|
|
|
noneOf cs = satisfy (`notElem` cs)
|
|
|
|
|
|
|
|
|
|
-- | Skips /zero/ or more white space characters. See also 'skipMany'.
|
|
|
|
|
|
|
|
|
|
spaces :: Stream s m Char => ParsecT s u m ()
|
2015-08-08 12:50:50 +03:00
|
|
|
|
spaces = skipMany space
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
2015-08-12 15:41:22 +03:00
|
|
|
|
-- | Parses a white space character (any character which satisfies 'isSpace').
|
2015-07-28 16:32:19 +03:00
|
|
|
|
-- Returns the parsed character.
|
|
|
|
|
|
|
|
|
|
space :: Stream s m Char => ParsecT s u m Char
|
2015-08-08 12:50:50 +03:00
|
|
|
|
space = satisfy isSpace <?> "white space"
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
2015-08-12 15:41:22 +03:00
|
|
|
|
-- | Parses a newline character.
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
|
|
|
|
newline :: Stream s m Char => ParsecT s u m Char
|
2015-08-08 18:17:27 +03:00
|
|
|
|
newline = char '\n' <?> "newline"
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
2015-08-12 15:41:22 +03:00
|
|
|
|
-- | Parses a carriage return character followed by a newline
|
|
|
|
|
-- character. Returns sequence of characters parsed.
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
2015-08-11 00:21:52 +03:00
|
|
|
|
crlf :: Stream s m Char => ParsecT s u m String
|
|
|
|
|
crlf = string "\r\n"
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
2015-08-12 15:41:22 +03:00
|
|
|
|
-- | Parses a CRLF (see 'crlf') or LF (see 'newline') end of line.
|
|
|
|
|
-- Returns the sequence of characters parsed.
|
2015-07-28 16:32:19 +03:00
|
|
|
|
--
|
2015-08-12 15:41:22 +03:00
|
|
|
|
-- > eol = (pure <$> newline) <|> crlf
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
2015-08-11 00:21:52 +03:00
|
|
|
|
eol :: Stream s m Char => ParsecT s u m String
|
|
|
|
|
eol = (pure <$> newline) <|> crlf <?> "end of line"
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
2015-08-12 15:41:22 +03:00
|
|
|
|
-- | Parses a tab character.
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
|
|
|
|
tab :: Stream s m Char => ParsecT s u m Char
|
|
|
|
|
tab = char '\t' <?> "tab"
|
|
|
|
|
|
2015-08-12 15:41:22 +03:00
|
|
|
|
-- | Parses a letter (an upper case or lower case character).
|
|
|
|
|
|
|
|
|
|
letter :: Stream s m Char => ParsecT s u m Char
|
|
|
|
|
letter = satisfy isAlpha <?> "letter"
|
|
|
|
|
|
|
|
|
|
-- | Parses an upper case letter.
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
|
|
|
|
upper :: Stream s m Char => ParsecT s u m Char
|
|
|
|
|
upper = satisfy isUpper <?> "uppercase letter"
|
|
|
|
|
|
2015-08-12 15:41:22 +03:00
|
|
|
|
-- | Parses a lower case character.
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
|
|
|
|
lower :: Stream s m Char => ParsecT s u m Char
|
|
|
|
|
lower = satisfy isLower <?> "lowercase letter"
|
|
|
|
|
|
|
|
|
|
-- | Parses a digit.
|
|
|
|
|
|
|
|
|
|
digit :: Stream s m Char => ParsecT s u m Char
|
|
|
|
|
digit = satisfy isDigit <?> "digit"
|
|
|
|
|
|
2015-08-12 15:41:22 +03:00
|
|
|
|
-- | Parses a hexadecimal digit (a digit or a letter between “a” and
|
|
|
|
|
-- “f” or “A” and “F”).
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
|
|
|
|
hexDigit :: Stream s m Char => ParsecT s u m Char
|
|
|
|
|
hexDigit = satisfy isHexDigit <?> "hexadecimal digit"
|
|
|
|
|
|
2015-08-12 15:41:22 +03:00
|
|
|
|
-- | Parses an octal digit (a character between “0” and “7”).
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
|
|
|
|
octDigit :: Stream s m Char => ParsecT s u m Char
|
|
|
|
|
octDigit = satisfy isOctDigit <?> "octal digit"
|
|
|
|
|
|
2015-08-12 15:41:22 +03:00
|
|
|
|
-- | Parses a letter or digit.
|
|
|
|
|
|
|
|
|
|
alphaNum :: Stream s m Char => ParsecT s u m Char
|
|
|
|
|
alphaNum = satisfy isAlphaNum <?> "letter or digit"
|
|
|
|
|
|
2015-07-28 16:32:19 +03:00
|
|
|
|
-- | @char c@ parses a single character @c@.
|
|
|
|
|
--
|
2015-08-12 15:41:22 +03:00
|
|
|
|
-- > semicolon = char ';'
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
|
|
|
|
char :: Stream s m Char => Char -> ParsecT s u m Char
|
2015-08-08 18:17:27 +03:00
|
|
|
|
char c = satisfy (== c) <?> showToken c
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
|
|
|
|
-- | This parser succeeds for any character. Returns the parsed character.
|
|
|
|
|
|
|
|
|
|
anyChar :: Stream s m Char => ParsecT s u m Char
|
2015-08-08 18:17:27 +03:00
|
|
|
|
anyChar = satisfy (const True) <?> "character"
|
2015-07-28 16:32:19 +03:00
|
|
|
|
|
|
|
|
|
-- | The parser @satisfy f@ succeeds for any character for which the
|
|
|
|
|
-- supplied function @f@ returns 'True'. Returns the character that is
|
|
|
|
|
-- actually parsed.
|
|
|
|
|
--
|
|
|
|
|
-- > digit = satisfy isDigit
|
|
|
|
|
-- > oneOf cs = satisfy (`elem` cs)
|
|
|
|
|
|
|
|
|
|
satisfy :: Stream s m Char => (Char -> Bool) -> ParsecT s u m Char
|
2015-08-06 13:37:08 +03:00
|
|
|
|
satisfy f = tokenPrim nextPos testChar
|
|
|
|
|
where nextPos pos x _ = updatePosChar pos x
|
2015-07-28 16:32:19 +03:00
|
|
|
|
testChar x = if f x then Just x else Nothing
|
|
|
|
|
|
|
|
|
|
-- | @string s@ parses a sequence of characters given by @s@. Returns
|
|
|
|
|
-- the parsed string (i.e. @s@).
|
|
|
|
|
--
|
|
|
|
|
-- > divOrMod = string "div" <|> string "mod"
|
|
|
|
|
|
|
|
|
|
string :: Stream s m Char => String -> ParsecT s u m String
|
2015-08-06 13:37:08 +03:00
|
|
|
|
string = tokens updatePosString
|