megaparsec/Text/Megaparsec/Char.hs

164 lines
4.4 KiB
Haskell
Raw Normal View History

2015-07-28 16:32:19 +03:00
-- |
-- Module : Text.Megaparsec.Char
-- Copyright : © 2015 Megaparsec contributors
-- © 2007 Paolo Martini
-- © 19992001 Daan Leijen
2015-07-28 16:32:19 +03:00
-- License : BSD3
--
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
2015-07-29 11:38:32 +03:00
-- Stability : experimental
2015-07-28 16:32:19 +03:00
-- Portability : portable
--
-- Commonly used character parsers.
module Text.Megaparsec.Char
2015-07-28 16:32:19 +03:00
( oneOf
, noneOf
, spaces
, space
, newline
, crlf
, eol
2015-07-28 16:32:19 +03:00
, tab
2015-08-12 15:41:22 +03:00
, letter
2015-07-28 16:32:19 +03:00
, upper
, lower
, digit
, hexDigit
, octDigit
2015-08-12 15:41:22 +03:00
, alphaNum
2015-07-28 16:32:19 +03:00
, char
, anyChar
, satisfy
, string )
where
import Control.Applicative ((<|>))
2015-07-28 16:32:19 +03:00
import Data.Char
2015-08-12 15:41:22 +03:00
import Text.Megaparsec.Combinator
import Text.Megaparsec.Pos
import Text.Megaparsec.Prim
import Text.Megaparsec.ShowToken
2015-07-28 16:32:19 +03:00
-- | @oneOf cs@ succeeds if the current character is in the supplied
-- list of characters @cs@. Returns the parsed character. See also
-- 'satisfy'.
--
-- > vowel = oneOf "aeiou" <?> "vowel"
2015-07-28 16:32:19 +03:00
oneOf :: Stream s m Char => String -> ParsecT s u m Char
oneOf cs = satisfy (`elem` cs)
-- | As the dual of 'oneOf', @noneOf cs@ succeeds if the current
-- character /not/ in the supplied list of characters @cs@. Returns the
-- parsed character.
--
-- > consonant = noneOf "aeiou" <?> "consonant"
2015-07-28 16:32:19 +03:00
noneOf :: Stream s m Char => String -> ParsecT s u m Char
noneOf cs = satisfy (`notElem` cs)
-- | Skips /zero/ or more white space characters. See also 'skipMany'.
spaces :: Stream s m Char => ParsecT s u m ()
spaces = skipMany space
2015-07-28 16:32:19 +03:00
2015-08-12 15:41:22 +03:00
-- | Parses a white space character (any character which satisfies 'isSpace').
2015-07-28 16:32:19 +03:00
-- Returns the parsed character.
space :: Stream s m Char => ParsecT s u m Char
space = satisfy isSpace <?> "white space"
2015-07-28 16:32:19 +03:00
2015-08-12 15:41:22 +03:00
-- | Parses a newline character.
2015-07-28 16:32:19 +03:00
newline :: Stream s m Char => ParsecT s u m Char
newline = char '\n' <?> "newline"
2015-07-28 16:32:19 +03:00
2015-08-12 15:41:22 +03:00
-- | Parses a carriage return character followed by a newline
-- character. Returns sequence of characters parsed.
2015-07-28 16:32:19 +03:00
crlf :: Stream s m Char => ParsecT s u m String
crlf = string "\r\n"
2015-07-28 16:32:19 +03:00
2015-08-12 15:41:22 +03:00
-- | Parses a CRLF (see 'crlf') or LF (see 'newline') end of line.
-- Returns the sequence of characters parsed.
2015-07-28 16:32:19 +03:00
--
2015-08-12 15:41:22 +03:00
-- > eol = (pure <$> newline) <|> crlf
2015-07-28 16:32:19 +03:00
eol :: Stream s m Char => ParsecT s u m String
eol = (pure <$> newline) <|> crlf <?> "end of line"
2015-07-28 16:32:19 +03:00
2015-08-12 15:41:22 +03:00
-- | Parses a tab character.
2015-07-28 16:32:19 +03:00
tab :: Stream s m Char => ParsecT s u m Char
tab = char '\t' <?> "tab"
2015-08-12 15:41:22 +03:00
-- | Parses a letter (an upper case or lower case character).
letter :: Stream s m Char => ParsecT s u m Char
letter = satisfy isAlpha <?> "letter"
-- | Parses an upper case letter.
2015-07-28 16:32:19 +03:00
upper :: Stream s m Char => ParsecT s u m Char
upper = satisfy isUpper <?> "uppercase letter"
2015-08-12 15:41:22 +03:00
-- | Parses a lower case character.
2015-07-28 16:32:19 +03:00
lower :: Stream s m Char => ParsecT s u m Char
lower = satisfy isLower <?> "lowercase letter"
-- | Parses a digit.
digit :: Stream s m Char => ParsecT s u m Char
digit = satisfy isDigit <?> "digit"
2015-08-12 15:41:22 +03:00
-- | Parses a hexadecimal digit (a digit or a letter between “a” and
-- “f” or “A” and “F”).
2015-07-28 16:32:19 +03:00
hexDigit :: Stream s m Char => ParsecT s u m Char
hexDigit = satisfy isHexDigit <?> "hexadecimal digit"
2015-08-12 15:41:22 +03:00
-- | Parses an octal digit (a character between “0” and “7”).
2015-07-28 16:32:19 +03:00
octDigit :: Stream s m Char => ParsecT s u m Char
octDigit = satisfy isOctDigit <?> "octal digit"
2015-08-12 15:41:22 +03:00
-- | Parses a letter or digit.
alphaNum :: Stream s m Char => ParsecT s u m Char
alphaNum = satisfy isAlphaNum <?> "letter or digit"
2015-07-28 16:32:19 +03:00
-- | @char c@ parses a single character @c@.
--
2015-08-12 15:41:22 +03:00
-- > semicolon = char ';'
2015-07-28 16:32:19 +03:00
char :: Stream s m Char => Char -> ParsecT s u m Char
char c = satisfy (== c) <?> showToken c
2015-07-28 16:32:19 +03:00
-- | This parser succeeds for any character. Returns the parsed character.
anyChar :: Stream s m Char => ParsecT s u m Char
anyChar = satisfy (const True) <?> "character"
2015-07-28 16:32:19 +03:00
-- | The parser @satisfy f@ succeeds for any character for which the
-- supplied function @f@ returns 'True'. Returns the character that is
-- actually parsed.
--
-- > digit = satisfy isDigit
-- > oneOf cs = satisfy (`elem` cs)
satisfy :: Stream s m Char => (Char -> Bool) -> ParsecT s u m Char
satisfy f = tokenPrim nextPos testChar
where nextPos pos x _ = updatePosChar pos x
2015-07-28 16:32:19 +03:00
testChar x = if f x then Just x else Nothing
-- | @string s@ parses a sequence of characters given by @s@. Returns
-- the parsed string (i.e. @s@).
--
-- > divOrMod = string "div" <|> string "mod"
string :: Stream s m Char => String -> ParsecT s u m String
string = tokens updatePosString