megaparsec/Text/MegaParsec/Token.hs

597 lines
20 KiB
Haskell
Raw Normal View History

2008-01-13 20:53:15 +03:00
-- |
2015-07-28 16:32:19 +03:00
-- Module : Text.MegaParsec.Token
-- Copyright : © 2015 MegaParsec contributors
-- © 2007 Paolo Martini
-- © 19992001 Daan Leijen
2015-07-28 16:32:19 +03:00
-- License : BSD3
--
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
2015-07-29 11:38:32 +03:00
-- Stability : experimental
2008-01-20 09:39:18 +03:00
-- Portability : non-portable (uses local universal quantification: PolymorphicComponents)
2015-07-28 16:32:19 +03:00
--
-- A helper module to parse lexical elements (tokens). See 'makeTokenParser'
-- for a description of how to use it.
2008-01-13 20:53:15 +03:00
2008-02-13 07:32:24 +03:00
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
2008-01-13 20:53:15 +03:00
2015-07-28 16:32:19 +03:00
module Text.MegaParsec.Token
2015-07-30 21:36:54 +03:00
( LanguageDef (..)
, TokenParser (..)
2015-07-28 16:32:19 +03:00
, makeTokenParser )
where
2008-01-13 20:53:15 +03:00
2015-07-28 16:32:19 +03:00
import Data.Char (isAlpha, toLower, toUpper, isSpace, digitToInt)
import Data.List (nub, sort)
2015-07-30 18:45:06 +03:00
2015-07-30 21:36:54 +03:00
import Control.Monad (void)
2015-07-28 16:32:19 +03:00
import Text.MegaParsec.Prim
import Text.MegaParsec.Char
import Text.MegaParsec.Combinator
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
-- Language definition
2015-07-30 21:36:54 +03:00
-- | The @LanguageDef@ type is a record that contains all parameterizable
2015-07-30 18:45:06 +03:00
-- features of the "Text.Parsec.Token" module. The module
-- "Text.Parsec.Language" contains some default definitions.
2015-07-30 21:36:54 +03:00
data LanguageDef s u m =
2015-07-30 18:45:06 +03:00
LanguageDef {
2015-07-28 16:32:19 +03:00
-- | Describes the start of a block comment. Use the empty string if the
2015-07-28 16:32:19 +03:00
-- language doesn't support block comments. For example \"\/*\".
2015-07-30 18:45:06 +03:00
commentStart :: String
-- | Describes the end of a block comment. Use the empty string if the
2015-07-28 16:32:19 +03:00
-- language doesn't support block comments. For example \"*\/\".
2015-07-30 18:45:06 +03:00
, commentEnd :: String
-- | Describes the start of a line comment. Use the empty string if the
2015-07-28 16:32:19 +03:00
-- language doesn't support line comments. For example \"\/\/\".
2015-07-30 18:45:06 +03:00
, commentLine :: String
2015-07-28 16:32:19 +03:00
-- | Set to 'True' if the language supports nested block comments.
2015-07-30 18:45:06 +03:00
, nestedComments :: Bool
-- | This parser should accept any start characters of identifiers. For
2015-07-28 16:32:19 +03:00
-- example @letter \<|> char \'_\'@.
2015-07-30 18:45:06 +03:00
, identStart :: ParsecT s u m Char
-- | This parser should accept any legal tail characters of identifiers.
2015-07-28 16:32:19 +03:00
-- For example @alphaNum \<|> char \'_\'@.
2015-07-30 18:45:06 +03:00
, identLetter :: ParsecT s u m Char
-- | This parser should accept any start characters of operators. For
2015-07-28 16:32:19 +03:00
-- example @oneOf \":!#$%&*+.\/\<=>?\@\\\\^|-~\"@
2015-07-30 18:45:06 +03:00
, opStart :: ParsecT s u m Char
-- | This parser should accept any legal tail characters of operators.
-- Note that this parser should even be defined if the language doesn't
2015-07-30 18:45:06 +03:00
-- support user-defined operators, or otherwise the 'reservedOp' parser
-- won't work correctly.
2015-07-30 18:45:06 +03:00
, opLetter :: ParsecT s u m Char
2015-07-28 16:32:19 +03:00
-- | The list of reserved identifiers.
2015-07-30 18:45:06 +03:00
, reservedNames :: [String]
2015-07-28 16:32:19 +03:00
-- | The list of reserved operators.
2015-07-30 18:45:06 +03:00
, reservedOpNames :: [String]
2015-07-28 16:32:19 +03:00
-- | Set to 'True' if the language is case sensitive.
2015-07-30 18:45:06 +03:00
, caseSensitive :: Bool }
2008-01-13 20:53:15 +03:00
2015-07-30 21:36:54 +03:00
-- Token parser
-- | The type of the record that holds lexical parsers that work on
-- @s@ streams with state @u@ over a monad @m@.
2015-07-30 21:36:54 +03:00
data TokenParser s u m =
2015-07-30 18:45:06 +03:00
TokenParser {
2015-07-30 18:45:06 +03:00
-- | This lexeme parser parses a legal identifier. Returns the identifier
-- string. This parser will fail on identifiers that are reserved
-- words. Legal identifier (start) characters and reserved words are
-- defined in the 'LanguageDef' that is passed to 'makeTokenParser'. An
-- @identifier@ is treated as a single token using 'try'.
2015-07-28 16:32:19 +03:00
2015-07-30 18:45:06 +03:00
identifier :: ParsecT s u m String
2015-07-30 18:45:06 +03:00
-- | The lexeme parser @reserved name@ parses @symbol
-- name@, but it also checks that the @name@ is not a prefix of a valid
-- identifier. A @reserved@ word is treated as a single token using
-- 'try'.
2015-07-30 18:45:06 +03:00
, reserved :: String -> ParsecT s u m ()
2015-07-30 18:45:06 +03:00
-- | This lexeme parser parses a legal operator. Returns the name of the
-- operator. This parser will fail on any operators that are reserved
-- operators. Legal operator (start) characters and reserved operators
-- are defined in the 'LanguageDef' that is passed to
-- 'makeTokenParser'. An @operator@ is treated as a single token using
-- 'try'.
2015-07-30 18:45:06 +03:00
, operator :: ParsecT s u m String
2015-07-30 18:45:06 +03:00
-- |The lexeme parser @reservedOp name@ parses @symbol
-- name@, but it also checks that the @name@ is not a prefix of a valid
-- operator. A @reservedOp@ is treated as a single token using 'try'.
2015-07-30 18:45:06 +03:00
, reservedOp :: String -> ParsecT s u m ()
2015-07-30 18:45:06 +03:00
-- | This lexeme parser parses a single literal character. Returns the
-- literal character value. This parsers deals correctly with escape
-- sequences. The literal character is parsed according to the grammar
-- rules defined in the Haskell report (which matches most programming
-- languages quite closely).
2015-07-30 18:45:06 +03:00
, charLiteral :: ParsecT s u m Char
2015-07-30 18:45:06 +03:00
-- | This lexeme parser parses a literal string. Returns the literal
-- string value. This parsers deals correctly with escape sequences and
-- gaps. The literal string is parsed according to the grammar rules
-- defined in the Haskell report (which matches most programming
-- languages quite closely).
2015-07-30 18:45:06 +03:00
, stringLiteral :: ParsecT s u m String
2015-07-30 18:45:06 +03:00
-- | This lexeme parser parses a natural number (a positive whole
-- number). Returns the value of the number. The number can be specified
-- in 'decimal', 'hexadecimal' or 'octal'. The number is parsed
-- according to the grammar rules in the Haskell report.
2015-07-30 18:45:06 +03:00
, natural :: ParsecT s u m Integer
2015-07-30 18:45:06 +03:00
-- | This lexeme parser parses an integer (a whole number). This parser
-- is like 'natural' except that it can be prefixed with sign
-- (i.e. \'-\' or \'+\'). Returns the value of the number. The number
-- can be specified in 'decimal', 'hexadecimal' or 'octal'. The number
-- is parsed according to the grammar rules in the Haskell report.
2015-07-28 16:32:19 +03:00
2015-07-30 18:45:06 +03:00
, integer :: ParsecT s u m Integer
2015-07-30 18:45:06 +03:00
-- | This lexeme parser parses a floating point value. Returns the value
-- of the number. The number is parsed according to the grammar rules
-- defined in the Haskell report.
2015-07-30 18:45:06 +03:00
, float :: ParsecT s u m Double
2015-07-30 18:45:06 +03:00
-- | This lexeme parser parses either 'natural' or a 'float'.
-- Returns the value of the number. This parsers deals with any overlap
-- in the grammar rules for naturals and floats. The number is parsed
-- according to the grammar rules defined in the Haskell report.
2015-07-30 18:45:06 +03:00
, naturalOrFloat :: ParsecT s u m (Either Integer Double)
2015-07-30 18:45:06 +03:00
-- | Parses a positive whole number in the decimal system. Returns the
-- value of the number.
2015-07-30 18:45:06 +03:00
, decimal :: ParsecT s u m Integer
2015-07-30 18:45:06 +03:00
-- | Parses a positive whole number in the hexadecimal system. The number
-- should be prefixed with \"0x\" or \"0X\". Returns the value of the
-- number.
2015-07-30 18:45:06 +03:00
, hexadecimal :: ParsecT s u m Integer
2015-07-30 18:45:06 +03:00
-- | Parses a positive whole number in the octal system. The number
-- should be prefixed with \"0o\" or \"0O\". Returns the value of the
-- number.
2015-07-30 18:45:06 +03:00
, octal :: ParsecT s u m Integer
2015-07-30 18:45:06 +03:00
-- | Lexeme parser @symbol s@ parses 'string' @s@ and skips
-- trailing white space.
2015-07-30 18:45:06 +03:00
, symbol :: String -> ParsecT s u m String
2015-07-30 18:45:06 +03:00
-- | @lexeme p@ first applies parser @p@ and than the 'whiteSpace'
-- parser, returning the value of @p@. Every lexical token (lexeme) is
-- defined using @lexeme@, this way every parse starts at a point
-- without white space. Parsers that use @lexeme@ are called /lexeme/
-- parsers in this document.
--
-- The only point where the 'whiteSpace' parser should be called
-- explicitly is the start of the main parser in order to skip any
-- leading white space.
2015-07-30 18:45:06 +03:00
, lexeme :: forall a. ParsecT s u m a -> ParsecT s u m a
2015-07-30 18:45:06 +03:00
-- | Parses any white space. White space consists of /zero/ or more
-- occurrences of a 'space', a line comment or a block (multi line)
-- comment. Block comments may be nested. How comments are started and
-- ended is defined in the 'LanguageDef' that is passed to
-- 'makeTokenParser'.
2015-07-30 18:45:06 +03:00
, whiteSpace :: ParsecT s u m ()
2015-07-30 18:45:06 +03:00
-- | Lexeme parser @parens p@ parses @p@ enclosed in parenthesis,
-- returning the value of @p@.
2015-07-30 18:45:06 +03:00
, parens :: forall a. ParsecT s u m a -> ParsecT s u m a
2015-07-30 18:45:06 +03:00
-- | Lexeme parser @braces p@ parses @p@ enclosed in braces (\'{\' and
-- \'}\'), returning the value of @p@.
2015-07-30 18:45:06 +03:00
, braces :: forall a. ParsecT s u m a -> ParsecT s u m a
2015-07-30 18:45:06 +03:00
-- | Lexeme parser @angles p@ parses @p@ enclosed in angle brackets (\'\<\'
-- and \'>\'), returning the value of @p@.
2015-07-30 18:45:06 +03:00
, angles :: forall a. ParsecT s u m a -> ParsecT s u m a
2015-07-30 18:45:06 +03:00
-- | Lexeme parser @brackets p@ parses @p@ enclosed in brackets (\'[\'
-- and \']\'), returning the value of @p@.
2015-07-30 18:45:06 +03:00
, brackets :: forall a. ParsecT s u m a -> ParsecT s u m a
2015-07-30 18:45:06 +03:00
-- | Lexeme parser |semi| parses the character \';\' and skips any
-- trailing white space. Returns the string \";\".
2015-07-30 18:45:06 +03:00
, semi :: ParsecT s u m String
2015-07-30 18:45:06 +03:00
-- | Lexeme parser @comma@ parses the character \',\' and skips any
-- trailing white space. Returns the string \",\".
2015-07-30 18:45:06 +03:00
, comma :: ParsecT s u m String
2015-07-30 18:45:06 +03:00
-- | Lexeme parser @colon@ parses the character \':\' and skips any
-- trailing white space. Returns the string \":\".
2015-07-30 18:45:06 +03:00
, colon :: ParsecT s u m String
2015-07-30 18:45:06 +03:00
-- | Lexeme parser @dot@ parses the character \'.\' and skips any
-- trailing white space. Returns the string \".\".
2015-07-30 18:45:06 +03:00
, dot :: ParsecT s u m String
2015-07-30 18:45:06 +03:00
-- | Lexeme parser @semiSep p@ parses /zero/ or more occurrences of @p@
-- separated by 'semi'. Returns a list of values returned by @p@.
2015-07-30 18:45:06 +03:00
, semiSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]
2015-07-30 18:45:06 +03:00
-- | Lexeme parser @semiSep1 p@ parses /one/ or more occurrences of @p@
-- separated by 'semi'. Returns a list of values returned by @p@.
2015-07-30 18:45:06 +03:00
, semiSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a]
2015-07-30 18:45:06 +03:00
-- | Lexeme parser @commaSep p@ parses /zero/ or more occurrences of
-- @p@ separated by 'comma'. Returns a list of values returned by @p@.
2015-07-30 18:45:06 +03:00
, commaSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]
2015-07-30 18:45:06 +03:00
-- | Lexeme parser @commaSep1 p@ parses /one/ or more occurrences of
-- @p@ separated by 'comma'. Returns a list of values returned by @p@.
2015-07-30 18:45:06 +03:00
, commaSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a] }
2015-07-30 18:45:06 +03:00
-- Given a LanguageDef, create a token parser
2015-07-30 21:36:54 +03:00
-- | The expression @makeTokenParser language@ creates a 'TokenParser'
2015-07-30 18:45:06 +03:00
-- record that contains lexical parsers that are defined using the
-- definitions in the @language@ record.
--
2015-07-30 18:45:06 +03:00
-- The use of this function is quite stylized — one imports the appropriate
-- language definition and selects the lexical parsers that are needed from
2015-07-30 21:36:54 +03:00
-- the resulting 'TokenParser'.
--
2015-07-30 18:45:06 +03:00
-- > module Main (main) where
-- >
2015-07-30 18:45:06 +03:00
-- > import Text.Parsec
-- > import qualified Text.Parsec.Token as Token
-- > import Text.Parsec.Language (haskellDef)
-- >
2015-07-30 18:45:06 +03:00
-- > -- The parser
-- > ...
2015-07-28 16:32:19 +03:00
-- >
2015-07-30 18:45:06 +03:00
-- > expr = parens expr
-- > <|> identifier
-- > <|> ...
-- >
2015-07-30 18:45:06 +03:00
-- > -- The lexer
-- > lexer = Token.makeTokenParser haskellDef
2015-07-28 16:32:19 +03:00
-- >
2015-07-30 18:45:06 +03:00
-- > parens = Token.parens lexer
-- > braces = Token.braces lexer
-- > identifier = Token.identifier lexer
-- > reserved = Token.reserved lexer
-- > ...
2015-07-30 21:36:54 +03:00
makeTokenParser :: Stream s m Char => LanguageDef s u m -> TokenParser s u m
2015-07-30 18:45:06 +03:00
makeTokenParser languageDef =
TokenParser
{ identifier = identifier
, reserved = reserved
, operator = operator
, reservedOp = reservedOp
, charLiteral = charLiteral
, stringLiteral = stringLiteral
, natural = natural
, integer = integer
, float = float
, naturalOrFloat = naturalOrFloat
, decimal = decimal
, hexadecimal = hexadecimal
, octal = octal
, symbol = symbol
, lexeme = lexeme
, whiteSpace = whiteSpace
, parens = parens
, braces = braces
, angles = angles
, brackets = brackets
, semi = semi
, comma = comma
, colon = colon
, dot = dot
, semiSep = semiSep
, semiSep1 = semiSep1
, commaSep = commaSep
, commaSep1 = commaSep1 }
2008-01-13 20:53:15 +03:00
where
2015-07-30 18:45:06 +03:00
-- bracketing
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
parens = between (symbol "(") (symbol ")")
braces = between (symbol "{") (symbol "}")
angles = between (symbol "<") (symbol ">")
brackets = between (symbol "[") (symbol "]")
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
semi = symbol ";"
comma = symbol ","
dot = symbol "."
colon = symbol ":"
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
commaSep = (`sepBy` comma)
semiSep = (`sepBy` semi)
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
commaSep1 = (`sepBy1` comma)
semiSep1 = (`sepBy1` semi)
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
-- chars & strings
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
charLiteral = lexeme ( between (char '\'')
(char '\'' <?> "end of character")
characterChar )
<?> "character"
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
characterChar = charLetter <|> charEscape <?> "literal character"
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
charEscape = char '\\' >> escapeCode
charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
stringLiteral =
lexeme ((foldr (maybe id (:)) "" <$>
between (char '"') (char '"' <?> "end of string")
(many stringChar)) <?> "literal string")
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
stringChar = (Just <$> stringLetter) <|> stringEscape <?> "string character"
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
stringEscape = char '\\' >>
( (escapeGap >> return Nothing) <|>
(escapeEmpty >> return Nothing) <|>
(Just <$> escapeCode) )
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
escapeEmpty = char '&'
escapeGap = many1 space >> char '\\' <?> "end of string gap"
2008-01-13 20:53:15 +03:00
-- escape codes
2015-07-30 18:45:06 +03:00
escapeCode = charEsc <|> charNum <|> charAscii <|> charControl
<?> "escape code"
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
charControl = toEnum . subtract 64 . fromEnum <$> (char '^' >> upper)
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
charNum = toEnum . fromInteger <$>
( decimal <|>
(char 'o' >> number 8 octDigit) <|>
(char 'x' >> number 16 hexDigit) )
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
charEsc = choice (parseEsc <$> escMap)
where parseEsc (c, code) = char c >> return code
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
charAscii = choice (parseAscii <$> asciiMap)
where parseAscii (asc, code) = try (string asc >> return code)
2008-01-13 20:53:15 +03:00
-- escape code tables
2015-07-30 18:45:06 +03:00
escMap = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
"FS","GS","RS","US","SP"]
ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
"DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
"CAN","SUB","ESC","DEL"]
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
ascii2 = "\b\t\n\v\f\r\SO\SI\EM\FS\GS\RS\US "
ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\a\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL"
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
-- numbers
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
naturalOrFloat = lexeme natFloat <?> "number"
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
float = lexeme floating <?> "float"
integer = lexeme int <?> "integer"
natural = lexeme nat <?> "natural"
2008-01-13 20:53:15 +03:00
-- floats
2015-07-30 18:45:06 +03:00
floating = decimal >>= fractExponent
natFloat = (char '0' >> zeroNumFloat) <|> decimalFloat
zeroNumFloat = (Left <$> (hexadecimal <|> octal)) <|>
decimalFloat <|>
fractFloat 0 <|>
return (Left 0)
decimalFloat = decimal >>= \n -> option (Left n) (fractFloat n)
fractFloat n = Right <$> fractExponent n
fractExponent n =
do { fract <- fraction
; expo <- option 1.0 exponent'
; return $ (n' + fract) * expo
} <|> ((* n') <$> exponent')
where n' = fromInteger n
fraction =
do { void $ char '.'
; digits <- many1 digit <?> "fraction"
; return $ foldr op 0.0 digits
} <?> "fraction"
where op d f = (f + fromIntegral (digitToInt d)) / 10.0
exponent' =
do { void $ oneOf "eE"
; f <- sign
; e <- decimal <?> "exponent"
; return $ power (f e)
} <?> "exponent"
where power e
| e < 0 = 1.0 / power (-e)
| otherwise = fromInteger (10^e)
2008-01-13 20:53:15 +03:00
-- integers and naturals
2015-07-30 18:45:06 +03:00
int = ($) <$> lexeme sign <*> nat
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
sign = (char '-' >> return negate) <|> (char '+' >> return id) <|> return id
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
nat = zeroNumber <|> decimal
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
zeroNumber = (char '0' >> (hexadecimal <|> octal <|> decimal <|> return 0))
<?> ""
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
decimal = number 10 digit
hexadecimal = oneOf "xX" >> number 16 hexDigit
octal = oneOf "oO" >> number 8 octDigit
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
number base baseDigit = do
digits <- many1 baseDigit
let n = foldl (\x d -> base * x + toInteger (digitToInt d)) 0 digits
n `seq` return n
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
-- operators & reserved ops
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
reservedOp name =
lexeme $ try $ do
void $ string name
notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name)
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
operator =
lexeme $ try $ do
name <- oper
if isReservedOp name
then unexpected ("reserved operator " ++ show name)
else return name
oper = ((:) <$> opStart languageDef <*> many (opLetter languageDef))
<?> "operator"
isReservedOp = isReserved . sort $ reservedOpNames languageDef
-- identifiers & reserved words
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
reserved name =
lexeme $ try $ do
void $ caseString name
notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name)
caseString name
| caseSensitive languageDef = string name
| otherwise = walk name >> return name
where walk = foldr (\c -> ((caseChar c <?> show name) >>)) (return ())
caseChar c
| isAlpha c = char (toLower c) <|> char (toUpper c)
| otherwise = char c
identifier =
lexeme $ try $ do
name <- ident
if isReservedName name
then unexpected ("reserved word " ++ show name)
else return name
ident = ((:) <$> identStart languageDef <*> many (identLetter languageDef))
<?> "identifier"
isReservedName name = isReserved theReservedNames caseName
where caseName
| caseSensitive languageDef = name
| otherwise = toLower <$> name
isReserved names name = scan names
where scan [] = False
scan (r:rs) = case compare r name of
LT -> scan rs
EQ -> True
GT -> False
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
theReservedNames
| caseSensitive languageDef = sort reserved
| otherwise = sort . fmap (fmap toLower) $ reserved
where reserved = reservedNames languageDef
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
-- white space & symbols
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
symbol = lexeme . string
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
lexeme p = p <* whiteSpace
2008-01-13 20:53:15 +03:00
whiteSpace
2015-07-30 18:45:06 +03:00
| noLine && noMulti = skipMany (simpleSpace <?> "")
| noLine = skipMany (simpleSpace <|>
multiLineComment <?> "")
| noMulti = skipMany (simpleSpace <|>
oneLineComment <?> "")
| otherwise = skipMany (simpleSpace <|>
oneLineComment <|>
multiLineComment <?> "")
2008-01-13 20:53:15 +03:00
where
noLine = null (commentLine languageDef)
noMulti = null (commentStart languageDef)
2015-07-30 18:45:06 +03:00
simpleSpace = skipMany1 (satisfy isSpace)
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
oneLineComment = void (try (string (commentLine languageDef))
>> skipMany (satisfy (/= '\n')))
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
multiLineComment = try (string (commentStart languageDef)) >> inComment
2008-01-13 20:53:15 +03:00
2015-07-30 18:45:06 +03:00
inComment = if nestedComments languageDef
then inCommentMulti
else inCommentSingle
2008-01-13 20:53:15 +03:00
inCommentMulti
2015-07-30 18:45:06 +03:00
= void (try . string $ commentEnd languageDef)
<|> (multiLineComment >> inCommentMulti)
<|> (skipMany1 (noneOf startEnd) >> inCommentMulti)
<|> (oneOf startEnd >> inCommentMulti)
<?> "end of comment"
2008-01-13 20:53:15 +03:00
inCommentSingle
2015-07-30 18:45:06 +03:00
= void (try . string $ commentEnd languageDef)
<|> (skipMany1 (noneOf startEnd) >> inCommentSingle)
<|> (oneOf startEnd >> inCommentSingle)
<?> "end of comment"
startEnd = nub $ (++) <$> commentEnd <*> commentStart $ languageDef