megaparsec/Text/Megaparsec/Token.hs

-- |
-- Module      :  Text.Megaparsec.Token
-- Copyright   :  © 2015 Megaparsec contributors
--                © 2007 Paolo Martini
--                © 1999–2001 Daan Leijen
-- License     :  BSD3
--
-- Maintainer  :  Mark Karpov <markkarpov@opmbx.org>
-- Stability   :  experimental
-- Portability :  non-portable (uses local universal quantification: PolymorphicComponents)
--
-- A helper module to parse lexical elements (tokens). See 'makeTokenParser'
-- for a description of how to use it.

{-# OPTIONS_GHC -fno-warn-name-shadowing #-}

module Text.Megaparsec.Token
  ( LanguageDef (..)
  , TokenParser (..)
  , makeTokenParser )
where

import Control.Applicative ((<|>), many, some)
import Control.Monad (void)
import Data.Char (isAlpha, toLower, toUpper)
import Data.List (sort)

import Text.Megaparsec.Prim
import Text.Megaparsec.Char
import Text.Megaparsec.Combinator

-- Language definition

-- | The @LanguageDef@ type is a record that contains all parameters used to
-- control features of the "Text.Megaparsec.Token" module. The module
-- "Text.Megaparsec.Language" contains some default definitions.

data LanguageDef s u m =
  LanguageDef {

  -- | Describes the start of a block comment. Use the empty string if the
  -- language doesn't support block comments.

    commentStart :: String

  -- | Describes the end of a block comment. Use the empty string if the
  -- language doesn't support block comments.

  , commentEnd :: String

  -- | Describes the start of a line comment. Use the empty string if the
  -- language doesn't support line comments.

  , commentLine :: String

  -- | Set to 'True' if the language supports nested block comments.

  , nestedComments :: Bool

  -- | This parser should accept any start characters of identifiers, for
  -- example @letter \<|> char \'_\'@.

  , identStart :: ParsecT s u m Char

  -- | This parser should accept any legal tail characters of identifiers,
  -- for example @alphaNum \<|> char \'_\'@.

  , identLetter :: ParsecT s u m Char

  -- | This parser should accept any start characters of operators, for
  -- example @oneOf \":!#$%&*+.\/\<=>?\@\\\\^|-~\"@

  , opStart :: ParsecT s u m Char

  -- | This parser should accept any legal tail characters of operators.
  -- Note that this parser should even be defined if the language doesn't
  -- support user-defined operators, or otherwise the 'reservedOp' parser
  -- won't work correctly.

  , opLetter :: ParsecT s u m Char

  -- | The list of reserved identifiers.

  , reservedNames :: [String]

  -- | The list of reserved operators.

  , reservedOpNames :: [String]

  -- | Set to 'True' if the language is case sensitive.

  , caseSensitive :: Bool }

-- Token parser

-- | The type of the record that holds lexical parsers that work on
-- @s@ streams with state @u@ over a monad @m@.

data TokenParser s u m =
  TokenParser {

  -- | The lexeme parser parses a legal identifier. Returns the identifier
  -- string. This parser will fail on identifiers that are reserved
  -- words. Legal identifier (start) characters and reserved words are
  -- defined in the 'LanguageDef' that is passed to 'makeTokenParser'.

    identifier :: ParsecT s u m String

  -- | The lexeme parser @reserved name@ parses @symbol name@, but it also
  -- checks that the @name@ is not a prefix of a valid identifier.

  , reserved :: String -> ParsecT s u m ()

  -- | The lexeme parser parses a legal operator. Returns the name of the
  -- operator. This parser will fail on any operators that are reserved
  -- operators. Legal operator (start) characters and reserved operators are
  -- defined in the 'LanguageDef' that is passed to 'makeTokenParser'.

  , operator :: ParsecT s u m String

  -- | The lexeme parser @reservedOp name@ parses @symbol name@, but it
  -- also checks that the @name@ is not a prefix of a valid operator.

  , reservedOp :: String -> ParsecT s u m ()

  -- | The lexeme parser parses a single literal character. Returns the
  -- literal character value. This parsers deals correctly with escape
  -- sequences. The literal character is parsed according to the grammar
  -- rules defined in the Haskell report (which matches most programming
  -- languages quite closely).

  , charLiteral :: ParsecT s u m Char

  -- | The lexeme parser parses a literal string. Returns the literal
  -- string value. This parsers deals correctly with escape sequences and
  -- gaps. The literal string is parsed according to the grammar rules
  -- defined in the Haskell report (which matches most programming languages
  -- quite closely).

  , stringLiteral :: ParsecT s u m String

  -- | The lexeme parser parses an integer (a whole number). This parser
  -- /does not/ parse sign. Returns the value of the number. The number can
  -- be specified in 'decimal', 'hexadecimal' or 'octal'. The number is
  -- parsed according to the grammar rules in the Haskell report.

  , integer :: ParsecT s u m Integer

  -- | This is just like 'integer', except it can parse sign.

  , integer' :: ParsecT s u m Integer

  -- | The lexeme parses a positive whole number in the decimal system.
  -- Returns the value of the number.

  , decimal :: ParsecT s u m Integer

  -- | The lexeme parses a positive whole number in the hexadecimal
  -- system. The number should be prefixed with “0x” or “0X”. Returns the
  -- value of the number.

  , hexadecimal :: ParsecT s u m Integer

  -- | The lexeme parses a positive whole number in the octal system.
  -- The number should be prefixed with “0o” or “0O”. Returns the value of
  -- the number.

  , octal :: ParsecT s u m Integer

  -- | @signed p@ tries to parse sign (i.e. “+”, “-”, or nothing) and
  -- then runs parser @p@, changing sign of its result accordingly. Note
  -- that there may be white space after the sign but not before it.

  , signed :: forall a . Num a => ParsecT s u m a -> ParsecT s u m a

  -- | The lexeme parser parses a floating point value. Returns the value
  -- of the number. The number is parsed according to the grammar rules
  -- defined in the Haskell report, sign is /not/ parsed, use 'float'' to
  -- achieve parsing of signed floating point values.

  , float :: ParsecT s u m Double

  -- | This is just like 'float', except it can parse sign.

  , float' :: ParsecT s u m Double

  -- | The lexeme parser parses either 'integer' or a 'float'.
  -- Returns the value of the number. This parser deals with any overlap in
  -- the grammar rules for integers and floats. The number is parsed
  -- according to the grammar rules defined in the Haskell report.

  , number :: ParsecT s u m (Either Integer Double)

  -- | This is just like 'number', except it can parse sign.

  , number' :: ParsecT s u m (Either Integer Double)

  -- | Lexeme parser @symbol s@ parses 'string' @s@ and skips
  -- trailing white space.

  , symbol :: String -> ParsecT s u m String

  -- | @lexeme p@ first applies parser @p@ and than the 'whiteSpace'
  -- parser, returning the value of @p@. Every lexical token (lexeme) is
  -- defined using @lexeme@, this way every parse starts at a point without
  -- white space. Parsers that use @lexeme@ are called /lexeme/ parsers in
  -- this document.
  --
  -- The only point where the 'whiteSpace' parser should be called
  -- explicitly is the start of the main parser in order to skip any leading
  -- white space.

  , lexeme :: forall a. ParsecT s u m a -> ParsecT s u m a

  -- | Parses any white space. White space consists of /zero/ or more
  -- occurrences of a 'space', a line comment or a block (multi line)
  -- comment. Block comments may be nested. How comments are started and
  -- ended is defined in the 'LanguageDef' that is passed to
  -- 'makeTokenParser'.

  , whiteSpace :: ParsecT s u m ()

  -- | Lexeme parser @parens p@ parses @p@ enclosed in parenthesis,
  -- returning the value of @p@.

  , parens :: forall a. ParsecT s u m a -> ParsecT s u m a

  -- | Lexeme parser @braces p@ parses @p@ enclosed in braces (“{” and
  -- “}”), returning the value of @p@.

  , braces :: forall a. ParsecT s u m a -> ParsecT s u m a

  -- | Lexeme parser @angles p@ parses @p@ enclosed in angle brackets (“\<”
  -- and “>”), returning the value of @p@.

  , angles :: forall a. ParsecT s u m a -> ParsecT s u m a

  -- | Lexeme parser @brackets p@ parses @p@ enclosed in brackets (“[”
  -- and “]”), returning the value of @p@.

  , brackets :: forall a. ParsecT s u m a -> ParsecT s u m a

  -- | Lexeme parser @semicolon@ parses the character “;” and skips any
  -- trailing white space. Returns the string “;”.

  , semicolon :: ParsecT s u m String

  -- | Lexeme parser @comma@ parses the character “,” and skips any
  -- trailing white space. Returns the string “,”.

  , comma :: ParsecT s u m String

  -- | Lexeme parser @colon@ parses the character “:” and skips any
  -- trailing white space. Returns the string “:”.

  , colon :: ParsecT s u m String

  -- | Lexeme parser @dot@ parses the character “.” and skips any
  -- trailing white space. Returns the string “.”.

  , dot :: ParsecT s u m String

  -- | Lexeme parser @semiSep p@ parses /zero/ or more occurrences of @p@
  -- separated by 'semicolon'. Returns a list of values returned by @p@.

  , semicolonSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]

  -- | Lexeme parser @semiSep1 p@ parses /one/ or more occurrences of @p@
  -- separated by 'semi'. Returns a list of values returned by @p@.

  , semicolonSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a]

  -- | Lexeme parser @commaSep p@ parses /zero/ or more occurrences of
  -- @p@ separated by 'comma'. Returns a list of values returned by @p@.

  , commaSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]

  -- | Lexeme parser @commaSep1 p@ parses /one/ or more occurrences of
  -- @p@ separated by 'comma'. Returns a list of values returned by @p@.

  , commaSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a] }

-- Given a LanguageDef, create a token parser

-- | The expression @makeTokenParser language@ creates a 'TokenParser'
-- record that contains lexical parsers that are defined using the
-- definitions in the @language@ record.
--
-- The use of this function is quite stylized — one imports the appropriate
-- language definition and selects the lexical parsers that are needed from
-- the resulting 'TokenParser'.
--
-- > module Main (main) where
-- >
-- > import Text.Parsec
-- > import qualified Text.Parsec.Token as Token
-- > import Text.Parsec.Language (haskellDef)
-- >
-- > -- The parser
-- > ...
-- >
-- > expr =  parens expr
-- >     <|> identifier
-- >     <|> ...
-- >
-- > -- The lexer
-- > lexer      = Token.makeTokenParser haskellDef
-- >
-- > parens     = Token.parens     lexer
-- > braces     = Token.braces     lexer
-- > identifier = Token.identifier lexer
-- > reserved   = Token.reserved   lexer
-- > ...

makeTokenParser :: Stream s m Char => LanguageDef s u m -> TokenParser s u m
makeTokenParser languageDef =
  TokenParser
  { identifier    = identifier
  , reserved      = reserved
  , operator      = operator
  , reservedOp    = reservedOp

  , charLiteral   = charLiteral
  , stringLiteral = stringLiteral

  , integer       = integer
  , integer'      = integer'
  , decimal       = decimal
  , hexadecimal   = hexadecimal
  , octal         = octal
  , signed        = signed
  , float         = float
  , float'        = float'
  , number        = number
  , number'       = number'

  , symbol        = symbol
  , lexeme        = lexeme
  , whiteSpace    = whiteSpace

  , parens        = parens
  , braces        = braces
  , angles        = angles
  , brackets      = brackets
  , semicolon     = semicolon
  , comma         = comma
  , colon         = colon
  , dot           = dot
  , semicolonSep  = semicolonSep
  , semicolonSep1 = semicolonSep1
  , commaSep      = commaSep
  , commaSep1     = commaSep1 }
  where

  -- bracketing

  parens    = between (symbol "(") (symbol ")")
  braces    = between (symbol "{") (symbol "}")
  angles    = between (symbol "<") (symbol ">")
  brackets  = between (symbol "[") (symbol "]")

  semicolon = symbol ";"
  comma     = symbol ","
  dot       = symbol "."
  colon     = symbol ":"

  commaSep  = (`sepBy` comma)
  semicolonSep = (`sepBy` semicolon)

  commaSep1 = (`sepBy1` comma)
  semicolonSep1 = (`sepBy1` semicolon)

  -- chars & strings

  charLiteral = lexeme ( between (char '\'')
                                 (char '\'' <?> "end of character")
                                 characterChar )
                <?> "character"

  characterChar = charLetter <|> charEscape <?> "literal character"

  charEscape = char '\\' >> escapeCode
  charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))

  stringLiteral =
      lexeme ((foldr (maybe id (:)) "" <$>
               between (char '"') (char '"' <?> "end of string")
                           (many stringChar)) <?> "literal string")

  stringChar = (Just <$> stringLetter) <|> stringEscape <?> "string character"

  stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))

  stringEscape = char '\\' >>
                 ( (escapeGap >> return Nothing)   <|>
                   (escapeEmpty >> return Nothing) <|>
                   (Just <$> escapeCode) )

  escapeEmpty = char '&'
  escapeGap   = some spaceChar >> char '\\' <?> "end of string gap"

  -- escape codes

  escapeCode = charEsc <|> charNum <|> charAscii <|> charControl
               <?> "escape code"

  charEsc = choice (parseEsc <$> escMap)
      where parseEsc (c, code) = char c >> return code

  charNum = toEnum . fromInteger <$>
            ( decimal <|>
             (char 'o' >> nump "0o" octDigitChar) <|>
             (char 'x' >> nump "0x" hexDigitChar) )

  charAscii = choice (parseAscii <$> asciiMap)
      where parseAscii (asc, code) = try (string asc >> return code)

  charControl = toEnum . subtract 64 . fromEnum <$> (char '^' >> upperChar)

  -- escape code tables

  escMap      = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
  asciiMap    = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)

  ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
                 "FS","GS","RS","US","SP"]
  ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
                 "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
                 "CAN","SUB","ESC","DEL"]

  ascii2 = "\b\t\n\v\f\r\SO\SI\EM\FS\GS\RS\US "
  ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\a\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL"

  -- numbers — integers

  integer  = decimal
  integer' = signed integer

  decimal     = lexeme (nump "" digitChar <?> "integer")
  hexadecimal = lexeme $ char '0' >> oneOf "xX" >> nump "0x" hexDigitChar
  octal       = lexeme $ char '0' >> oneOf "oO" >> nump "0o" octDigitChar

  nump prefix baseDigit = read . (prefix ++) <$> some baseDigit

  signed p = ($) <$> option id (lexeme sign) <*> p

  sign :: (Stream s m Char, Num a) => ParsecT s u m (a -> a)
  sign = (char '+' *> return id) <|> (char '-' *> return negate)

  -- numbers — floats

  float  = lexeme ffloat <?> "float"
  float' = signed float

  ffloat = read <$> ffloat'
    where
      ffloat' = do
        decimal <- fDec
        rest <- fraction <|> fExp
        return $ decimal ++ rest

  fraction = do
    void $ char '.'
    decimal <- fDec
    exp <- option "" fExp
    return $ '.' : decimal ++  exp

  fDec = some digitChar

  fExp = do
    expChar <- oneOf "eE"
    signStr <- option "" (pure <$> oneOf "+-")
    decimal <- fDec
    return $ expChar : signStr ++ decimal

  -- numbers — a more general case

  number  = (Right <$> try float)  <|> (Left <$> integer)  <?> "number"
  number' = (Right <$> try float') <|> (Left <$> integer') <?> "number"

  -- operators & reserved ops

  reservedOp name =
      lexeme $ try $ do
        void $ string name
        notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name)

  operator =
      lexeme $ try $ do
        name <- oper
        if isReservedOp name
        then unexpected ("reserved operator " ++ show name)
        else return name

  oper = ((:) <$> opStart languageDef <*> many (opLetter languageDef))
         <?> "operator"

  isReservedOp = isReserved . sort $ reservedOpNames languageDef

  -- identifiers & reserved words

  reserved name =
      lexeme $ try $ do
        void $ caseString name
        notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name)

  caseString name
      | caseSensitive languageDef = string name
      | otherwise                 = walk name >> return name
      where walk = foldr (\c -> ((caseChar c <?> show name) >>)) (return ())
            caseChar c
                | isAlpha c = char (toLower c) <|> char (toUpper c)
                | otherwise = char c

  identifier =
      lexeme $ try $ do
        name <- ident
        if isReservedName name
        then unexpected ("reserved word " ++ show name)
        else return name

  ident = ((:) <$> identStart languageDef <*> many (identLetter languageDef))
          <?> "identifier"

  isReservedName name = isReserved theReservedNames caseName
      where caseName
                | caseSensitive languageDef = name
                | otherwise                 = toLower <$> name

  isReserved names name = scan names
      where scan []     = False
            scan (r:rs) = case compare r name of
                            LT  -> scan rs
                            EQ  -> True
                            GT  -> False

  theReservedNames
      | caseSensitive languageDef = sort reserved
      | otherwise                 = sort . fmap (fmap toLower) $ reserved
      where reserved = reservedNames languageDef

  -- white space & symbols

  symbol = lexeme . string

  lexeme p = p <* whiteSpace

  whiteSpace = hidden space -- FIXME: write it in a decent manner
      -- \| noLine && noMulti = skipMany (space            <?> "")
      -- \| noLine            = skipMany (space            <|>
      --                                 multiLineComment <?> "")
      -- \| noMulti           = skipMany (space            <|>
      --                                 oneLineComment   <?> "")
      -- \| otherwise         = skipMany (space            <|>
      --                                 oneLineComment   <|>
      --                                 multiLineComment <?> "")
      -- where
      --   noLine  = null (commentLine languageDef)
      --   noMulti = null (commentStart languageDef)

  -- oneLineComment = void (try (string (commentLine languageDef))
  --                       >> skipMany (satisfy (/= '\n')))

  -- multiLineComment = try (string (commentStart languageDef)) >> inComment

  -- inComment = if nestedComments languageDef
  --             then inCommentMulti
  --             else inCommentSingle

  -- inCommentMulti
  --     =  void (try . string $ commentEnd languageDef)
  --    <|> (multiLineComment            >> inCommentMulti)
  --    <|> (skipSome (noneOf startEnd) >> inCommentMulti)
  --    <|> (oneOf startEnd              >> inCommentMulti)
  --    <?> "end of comment"

  -- inCommentSingle
  --     =  void (try . string $ commentEnd languageDef)
  --    <|> (skipSome (noneOf startEnd) >> inCommentSingle)
  --    <|> (oneOf startEnd              >> inCommentSingle)
  --    <?> "end of comment"

  -- startEnd = nub $ (++) <$> commentEnd <*> commentStart $ languageDef
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
+								-- |
-												renamed ‘MegaParsec’ → ‘Megaparsec’, close #10

											
										
										
											2015-08-01 19:24:45 +03:00
+								-- Module      :  Text.Megaparsec.Token
 								-- Copyright   :  © 2015 Megaparsec contributors
-												cosmetic changes in copyright (headers)

											
										
										
											2015-07-30 19:20:37 +03:00
+								--                © 2007 Paolo Martini
 								--                © 1999–2001 Daan Leijen
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								-- License     :  BSD3
 								--
 								-- Maintainer  :  Mark Karpov <markkarpov@opmbx.org>
-												refactoring, phase 2

											
										
										
											2015-07-29 11:38:32 +03:00
+								-- Stability   :  experimental
-												More accurate extension pragmas

											
										
										
											2008-01-20 09:39:18 +03:00
+								-- Portability :  non-portable (uses local universal quantification: PolymorphicComponents)
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								--
-												Tweaking Text.Parsec.Token's documentation

											
										
										
											2008-01-22 08:25:34 +03:00
+								-- A helper module to parse lexical elements (tokens). See 'makeTokenParser'
 								-- for a description of how to use it.
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												Clean most warnings

											
										
										
											2008-02-13 07:32:24 +03:00
+								{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												renamed ‘MegaParsec’ → ‘Megaparsec’, close #10

											
										
										
											2015-08-01 19:24:45 +03:00
+								module Text.Megaparsec.Token
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  ( LanguageDef (..)
 								  , TokenParser (..)
 								  , makeTokenParser )
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								where
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												re-export ‘(<|>)’, ‘many’, ‘some’, and ‘optional’, fixes #9

These functions are now re-exported from ‘Control.Applicative’
module. ‘many’ and ‘some’ are now part of ‘Alternative’ instance of
‘ParsecT’.

Note that these functions are re-exported only in ‘Text.MegaParsec’
module, but not in ‘Text.MegaParsec.Prim’ to avoid duplication of
floating doc-strings. Others internal modules now just casually import
‘Control.Applicative’ for their needs.

Note that ‘many1’ was renamed to ‘some’, the same is done for other
parsers that had ‘many1’ part in their names (for consistency).

											
										
										
											2015-08-01 17:39:20 +03:00
+								import Control.Applicative ((<|>), many, some)
 								import Control.Monad (void)
-												extend collection of character parsers, close #16

Added new character parsers in ‘Text.Megaparsec.Char’:

* ‘controlChar’
* ‘printChar’
* ‘markChar’
* ‘numberChar’
* ‘punctuationChar’
* ‘symbolChar’
* ‘separatorChar’
* ‘asciiChar’
* ‘latin1Char’
* ‘charCategory’

Renamed some parsers:

‘spaces’   → ‘space’
‘space’    → ‘spaceChar’
‘lower’    → ‘lowerChar’
‘upper’    → ‘upperChar’
‘letter’   → ‘letterChar’
‘alphaNum’ → ‘alphaNumChar’
‘digit’    → ‘digitChar’
‘octDigit’ → ‘octDigitChar’
‘hexDigit’ → ‘hexDigitChar’

Descriptions of old parsers have been updated to accent some
Unicode-specific moments. For example, old description of ‘letter’
stated that it parses letters from “a” to “z” and from “A” to “Z”. This
is wrong, since it used ‘Data.Char.isAlpha’ predicate internally and
thus parsed many more characters.

											
										
										
											2015-08-12 20:00:03 +03:00
+								import Data.Char (isAlpha, toLower, toUpper)
-												temporarily simplify token parsing

The improved error messages in Megaparsec are quite sensitive to how
parsers are written, which parts of parser are labeled, etc. Current
implementation of token parsers in ‘Text.Megaparsec.Token’ is written
without this in mind. We will improve the module later, for now let us
rewrite/simplify some parts to avoid failing tests.

											
										
										
											2015-08-19 22:11:21 +03:00
+								import Data.List (sort)
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												renamed ‘MegaParsec’ → ‘Megaparsec’, close #10

											
										
										
											2015-08-01 19:24:45 +03:00
+								import Text.Megaparsec.Prim
 								import Text.Megaparsec.Char
 								import Text.Megaparsec.Combinator
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- Language definition
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												various minor changes

											
										
										
											2015-08-12 15:41:22 +03:00
+								-- | The @LanguageDef@ type is a record that contains all parameters used to
 								-- control features of the "Text.Megaparsec.Token" module. The module
 								-- "Text.Megaparsec.Language" contains some default definitions.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								data LanguageDef s u m =
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  LanguageDef {
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Describes the start of a block comment. Use the empty string if the
 								  -- language doesn't support block comments.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								    commentStart :: String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Describes the end of a block comment. Use the empty string if the
 								  -- language doesn't support block comments.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , commentEnd :: String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Describes the start of a line comment. Use the empty string if the
 								  -- language doesn't support line comments.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , commentLine :: String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Set to 'True' if the language supports nested block comments.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , nestedComments :: Bool
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | This parser should accept any start characters of identifiers, for
 								  -- example @letter \<|> char \'_\'@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , identStart :: ParsecT s u m Char
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | This parser should accept any legal tail characters of identifiers,
 								  -- for example @alphaNum \<|> char \'_\'@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , identLetter :: ParsecT s u m Char
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | This parser should accept any start characters of operators, for
 								  -- example @oneOf \":!#$%&*+.\/\<=>?\@\\\\^|-~\"@
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , opStart :: ParsecT s u m Char
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | This parser should accept any legal tail characters of operators.
 								  -- Note that this parser should even be defined if the language doesn't
 								  -- support user-defined operators, or otherwise the 'reservedOp' parser
 								  -- won't work correctly.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , opLetter :: ParsecT s u m Char
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | The list of reserved identifiers.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , reservedNames :: [String]
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | The list of reserved operators.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , reservedOpNames :: [String]
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Set to 'True' if the language is case sensitive.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , caseSensitive :: Bool }
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								-- Token parser
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
 								-- | The type of the record that holds lexical parsers that work on
 								-- @s@ streams with state @u@ over a monad @m@.
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								data TokenParser s u m =
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  TokenParser {
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | The lexeme parser parses a legal identifier. Returns the identifier
 								  -- string. This parser will fail on identifiers that are reserved
 								  -- words. Legal identifier (start) characters and reserved words are
 								  -- defined in the 'LanguageDef' that is passed to 'makeTokenParser'.
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								    identifier :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | The lexeme parser @reserved name@ parses @symbol name@, but it also
 								  -- checks that the @name@ is not a prefix of a valid identifier.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , reserved :: String -> ParsecT s u m ()
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | The lexeme parser parses a legal operator. Returns the name of the
 								  -- operator. This parser will fail on any operators that are reserved
 								  -- operators. Legal operator (start) characters and reserved operators are
 								  -- defined in the 'LanguageDef' that is passed to 'makeTokenParser'.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , operator :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | The lexeme parser @reservedOp name@ parses @symbol name@, but it
 								  -- also checks that the @name@ is not a prefix of a valid operator.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , reservedOp :: String -> ParsecT s u m ()
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | The lexeme parser parses a single literal character. Returns the
 								  -- literal character value. This parsers deals correctly with escape
 								  -- sequences. The literal character is parsed according to the grammar
 								  -- rules defined in the Haskell report (which matches most programming
 								  -- languages quite closely).
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , charLiteral :: ParsecT s u m Char
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | The lexeme parser parses a literal string. Returns the literal
 								  -- string value. This parsers deals correctly with escape sequences and
 								  -- gaps. The literal string is parsed according to the grammar rules
 								  -- defined in the Haskell report (which matches most programming languages
 								  -- quite closely).
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , stringLiteral :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | The lexeme parser parses an integer (a whole number). This parser
 								  -- /does not/ parse sign. Returns the value of the number. The number can
 								  -- be specified in 'decimal', 'hexadecimal' or 'octal'. The number is
 								  -- parsed according to the grammar rules in the Haskell report.
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , integer :: ParsecT s u m Integer
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | This is just like 'integer', except it can parse sign.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , integer' :: ParsecT s u m Integer
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | The lexeme parses a positive whole number in the decimal system.
 								  -- Returns the value of the number.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , decimal :: ParsecT s u m Integer
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | The lexeme parses a positive whole number in the hexadecimal
 								  -- system. The number should be prefixed with “0x” or “0X”. Returns the
 								  -- value of the number.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , hexadecimal :: ParsecT s u m Integer
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | The lexeme parses a positive whole number in the octal system.
 								  -- The number should be prefixed with “0o” or “0O”. Returns the value of
 								  -- the number.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , octal :: ParsecT s u m Integer
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | @signed p@ tries to parse sign (i.e. “+”, “-”, or nothing) and
 								  -- then runs parser @p@, changing sign of its result accordingly. Note
 								  -- that there may be white space after the sign but not before it.
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , signed :: forall a . Num a => ParsecT s u m a -> ParsecT s u m a
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | The lexeme parser parses a floating point value. Returns the value
 								  -- of the number. The number is parsed according to the grammar rules
 								  -- defined in the Haskell report, sign is /not/ parsed, use 'float'' to
 								  -- achieve parsing of signed floating point values.
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , float :: ParsecT s u m Double
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | This is just like 'float', except it can parse sign.
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , float' :: ParsecT s u m Double
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | The lexeme parser parses either 'integer' or a 'float'.
 								  -- Returns the value of the number. This parser deals with any overlap in
 								  -- the grammar rules for integers and floats. The number is parsed
 								  -- according to the grammar rules defined in the Haskell report.
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , number :: ParsecT s u m (Either Integer Double)
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | This is just like 'number', except it can parse sign.
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , number' :: ParsecT s u m (Either Integer Double)
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Lexeme parser @symbol s@ parses 'string' @s@ and skips
 								  -- trailing white space.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , symbol :: String -> ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | @lexeme p@ first applies parser @p@ and than the 'whiteSpace'
 								  -- parser, returning the value of @p@. Every lexical token (lexeme) is
 								  -- defined using @lexeme@, this way every parse starts at a point without
 								  -- white space. Parsers that use @lexeme@ are called /lexeme/ parsers in
 								  -- this document.
 								  --
 								  -- The only point where the 'whiteSpace' parser should be called
 								  -- explicitly is the start of the main parser in order to skip any leading
 								  -- white space.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , lexeme :: forall a. ParsecT s u m a -> ParsecT s u m a
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Parses any white space. White space consists of /zero/ or more
 								  -- occurrences of a 'space', a line comment or a block (multi line)
 								  -- comment. Block comments may be nested. How comments are started and
 								  -- ended is defined in the 'LanguageDef' that is passed to
 								  -- 'makeTokenParser'.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , whiteSpace :: ParsecT s u m ()
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Lexeme parser @parens p@ parses @p@ enclosed in parenthesis,
 								  -- returning the value of @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , parens :: forall a. ParsecT s u m a -> ParsecT s u m a
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Lexeme parser @braces p@ parses @p@ enclosed in braces (“{” and
 								  -- “}”), returning the value of @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , braces :: forall a. ParsecT s u m a -> ParsecT s u m a
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Lexeme parser @angles p@ parses @p@ enclosed in angle brackets (“\<”
 								  -- and “>”), returning the value of @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , angles :: forall a. ParsecT s u m a -> ParsecT s u m a
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Lexeme parser @brackets p@ parses @p@ enclosed in brackets (“[”
 								  -- and “]”), returning the value of @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , brackets :: forall a. ParsecT s u m a -> ParsecT s u m a
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Lexeme parser @semicolon@ parses the character “;” and skips any
 								  -- trailing white space. Returns the string “;”.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , semicolon :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Lexeme parser @comma@ parses the character “,” and skips any
 								  -- trailing white space. Returns the string “,”.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , comma :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Lexeme parser @colon@ parses the character “:” and skips any
 								  -- trailing white space. Returns the string “:”.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , colon :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Lexeme parser @dot@ parses the character “.” and skips any
 								  -- trailing white space. Returns the string “.”.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , dot :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Lexeme parser @semiSep p@ parses /zero/ or more occurrences of @p@
 								  -- separated by 'semicolon'. Returns a list of values returned by @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , semicolonSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Lexeme parser @semiSep1 p@ parses /one/ or more occurrences of @p@
 								  -- separated by 'semi'. Returns a list of values returned by @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , semicolonSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a]
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Lexeme parser @commaSep p@ parses /zero/ or more occurrences of
 								  -- @p@ separated by 'comma'. Returns a list of values returned by @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , commaSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- | Lexeme parser @commaSep1 p@ parses /one/ or more occurrences of
 								  -- @p@ separated by 'comma'. Returns a list of values returned by @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , commaSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a] }
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- Given a LanguageDef, create a token parser
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								-- | The expression @makeTokenParser language@ creates a 'TokenParser'
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- record that contains lexical parsers that are defined using the
 								-- definitions in the @language@ record.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								--
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- The use of this function is quite stylized — one imports the appropriate
 								-- language definition and selects the lexical parsers that are needed from
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								-- the resulting 'TokenParser'.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								--
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > module Main (main) where
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								-- >
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > import Text.Parsec
 								-- > import qualified Text.Parsec.Token as Token
 								-- > import Text.Parsec.Language (haskellDef)
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								-- >
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > -- The parser
 								-- > ...
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								-- >
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > expr =  parens expr
 								-- >     <|> identifier
 								-- >     <|> ...
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								-- >
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > -- The lexer
 								-- > lexer      = Token.makeTokenParser haskellDef
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								-- >
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > parens     = Token.parens     lexer
 								-- > braces     = Token.braces     lexer
 								-- > identifier = Token.identifier lexer
 								-- > reserved   = Token.reserved   lexer
 								-- > ...
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								makeTokenParser :: Stream s m Char => LanguageDef s u m -> TokenParser s u m
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								makeTokenParser languageDef =
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  TokenParser
 								  { identifier    = identifier
 								  , reserved      = reserved
 								  , operator      = operator
 								  , reservedOp    = reservedOp
 								  , charLiteral   = charLiteral
 								  , stringLiteral = stringLiteral
 								  , integer       = integer
 								  , integer'      = integer'
 								  , decimal       = decimal
 								  , hexadecimal   = hexadecimal
 								  , octal         = octal
 								  , signed        = signed
 								  , float         = float
 								  , float'        = float'
 								  , number        = number
 								  , number'       = number'
 								  , symbol        = symbol
 								  , lexeme        = lexeme
 								  , whiteSpace    = whiteSpace
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  , parens        = parens
 								  , braces        = braces
 								  , angles        = angles
 								  , brackets      = brackets
 								  , semicolon     = semicolon
 								  , comma         = comma
 								  , colon         = colon
 								  , dot           = dot
 								  , semicolonSep  = semicolonSep
 								  , semicolonSep1 = semicolonSep1
 								  , commaSep      = commaSep
 								  , commaSep1     = commaSep1 }
 								  where
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- bracketing
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  parens    = between (symbol "(") (symbol ")")
 								  braces    = between (symbol "{") (symbol "}")
 								  angles    = between (symbol "<") (symbol ">")
 								  brackets  = between (symbol "[") (symbol "]")
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  semicolon = symbol ";"
 								  comma     = symbol ","
 								  dot       = symbol "."
 								  colon     = symbol ":"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  commaSep  = (`sepBy` comma)
 								  semicolonSep = (`sepBy` semicolon)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  commaSep1 = (`sepBy1` comma)
 								  semicolonSep1 = (`sepBy1` semicolon)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- chars & strings
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  charLiteral = lexeme ( between (char '\'')
 								                                 (char '\'' <?> "end of character")
 								                                 characterChar )
 								                <?> "character"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  characterChar = charLetter <|> charEscape <?> "literal character"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  charEscape = char '\\' >> escapeCode
 								  charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  stringLiteral =
 								      lexeme ((foldr (maybe id (:)) "" <$>
 								               between (char '"') (char '"' <?> "end of string")
 								                           (many stringChar)) <?> "literal string")
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  stringChar = (Just <$> stringLetter) <|> stringEscape <?> "string character"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  stringEscape = char '\\' >>
 								                 ( (escapeGap >> return Nothing)   <|>
 								                   (escapeEmpty >> return Nothing) <|>
 								                   (Just <$> escapeCode) )
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  escapeEmpty = char '&'
 								  escapeGap   = some spaceChar >> char '\\' <?> "end of string gap"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- escape codes
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  escapeCode = charEsc <|> charNum <|> charAscii <|> charControl
 								               <?> "escape code"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  charEsc = choice (parseEsc <$> escMap)
 								      where parseEsc (c, code) = char c >> return code
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  charNum = toEnum . fromInteger <$>
 								            ( decimal <|>
 								             (char 'o' >> nump "0o" octDigitChar) <|>
 								             (char 'x' >> nump "0x" hexDigitChar) )
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  charAscii = choice (parseAscii <$> asciiMap)
 								      where parseAscii (asc, code) = try (string asc >> return code)
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  charControl = toEnum . subtract 64 . fromEnum <$> (char '^' >> upperChar)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- escape code tables
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  escMap      = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
 								  asciiMap    = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
 								                 "FS","GS","RS","US","SP"]
 								  ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
 								                 "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
 								                 "CAN","SUB","ESC","DEL"]
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  ascii2 = "\b\t\n\v\f\r\SO\SI\EM\FS\GS\RS\US "
 								  ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\a\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL"
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- numbers — integers
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												temporarily simplify token parsing

The improved error messages in Megaparsec are quite sensitive to how
parsers are written, which parts of parser are labeled, etc. Current
implementation of token parsers in ‘Text.Megaparsec.Token’ is written
without this in mind. We will improve the module later, for now let us
rewrite/simplify some parts to avoid failing tests.

											
										
										
											2015-08-19 22:11:21 +03:00
+								  integer  = decimal
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  integer' = signed integer
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												temporarily simplify token parsing

The improved error messages in Megaparsec are quite sensitive to how
parsers are written, which parts of parser are labeled, etc. Current
implementation of token parsers in ‘Text.Megaparsec.Token’ is written
without this in mind. We will improve the module later, for now let us
rewrite/simplify some parts to avoid failing tests.

											
										
										
											2015-08-19 22:11:21 +03:00
+								  decimal     = lexeme (nump "" digitChar <?> "integer")
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  hexadecimal = lexeme $ char '0' >> oneOf "xX" >> nump "0x" hexDigitChar
 								  octal       = lexeme $ char '0' >> oneOf "oO" >> nump "0o" octDigitChar
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  nump prefix baseDigit = read . (prefix ++) <$> some baseDigit
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  signed p = ($) <$> option id (lexeme sign) <*> p
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  sign :: (Stream s m Char, Num a) => ParsecT s u m (a -> a)
 								  sign = (char '+' *> return id) <|> (char '-' *> return negate)
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  -- numbers — floats
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												cosmetic changes (indentation, etc)

											
										
										
											2015-08-12 20:51:06 +03:00
+								  float  = lexeme ffloat <?> "float"
 								  float' = signed float
 								  ffloat = read <$> ffloat'
 								    where
 								      ffloat' = do
 								        decimal <- fDec
 								        rest <- fraction <|> fExp
 								        return $ decimal ++ rest
 								  fraction = do
 								    void $ char '.'
 								    decimal <- fDec
 								    exp <- option "" fExp
 								    return $ '.' : decimal ++  exp
 								  fDec = some digitChar
 								  fExp = do
 								    expChar <- oneOf "eE"
 								    signStr <- option "" (pure <$> oneOf "+-")
 								    decimal <- fDec
 								    return $ expChar : signStr ++ decimal
 								  -- numbers — a more general case
 								  number  = (Right <$> try float)  <|> (Left <$> integer)  <?> "number"
 								  number' = (Right <$> try float') <|> (Left <$> integer') <?> "number"
 								  -- operators & reserved ops
 								  reservedOp name =
 								      lexeme $ try $ do
 								        void $ string name
 								        notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name)
 								  operator =
 								      lexeme $ try $ do
 								        name <- oper
 								        if isReservedOp name
 								        then unexpected ("reserved operator " ++ show name)
 								        else return name
 								  oper = ((:) <$> opStart languageDef <*> many (opLetter languageDef))
 								         <?> "operator"
 								  isReservedOp = isReserved . sort $ reservedOpNames languageDef
 								  -- identifiers & reserved words
 								  reserved name =
 								      lexeme $ try $ do
 								        void $ caseString name
 								        notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name)
 								  caseString name
 								      | caseSensitive languageDef = string name
 								      | otherwise                 = walk name >> return name
 								      where walk = foldr (\c -> ((caseChar c <?> show name) >>)) (return ())
 								            caseChar c
 								                | isAlpha c = char (toLower c) <|> char (toUpper c)
 								                | otherwise = char c
 								  identifier =
 								      lexeme $ try $ do
 								        name <- ident
 								        if isReservedName name
 								        then unexpected ("reserved word " ++ show name)
 								        else return name
 								  ident = ((:) <$> identStart languageDef <*> many (identLetter languageDef))
 								          <?> "identifier"
 								  isReservedName name = isReserved theReservedNames caseName
 								      where caseName
 								                | caseSensitive languageDef = name
 								                | otherwise                 = toLower <$> name
 								  isReserved names name = scan names
 								      where scan []     = False
 								            scan (r:rs) = case compare r name of
 								                            LT  -> scan rs
 								                            EQ  -> True
 								                            GT  -> False
 								  theReservedNames
 								      | caseSensitive languageDef = sort reserved
 								      | otherwise                 = sort . fmap (fmap toLower) $ reserved
 								      where reserved = reservedNames languageDef
 								  -- white space & symbols
 								  symbol = lexeme . string
 								  lexeme p = p <* whiteSpace
-												temporarily simplify token parsing

The improved error messages in Megaparsec are quite sensitive to how
parsers are written, which parts of parser are labeled, etc. Current
implementation of token parsers in ‘Text.Megaparsec.Token’ is written
without this in mind. We will improve the module later, for now let us
rewrite/simplify some parts to avoid failing tests.

											
										
										
											2015-08-19 22:11:21 +03:00
+								  whiteSpace = hidden space -- FIXME: write it in a decent manner
 								      -- \| noLine && noMulti = skipMany (space            <?> "")
 								      -- \| noLine            = skipMany (space            <|>
 								      --                                 multiLineComment <?> "")
 								      -- \| noMulti           = skipMany (space            <|>
 								      --                                 oneLineComment   <?> "")
 								      -- \| otherwise         = skipMany (space            <|>
 								      --                                 oneLineComment   <|>
 								      --                                 multiLineComment <?> "")
 								      -- where
 								      --   noLine  = null (commentLine languageDef)
 								      --   noMulti = null (commentStart languageDef)
 								  -- oneLineComment = void (try (string (commentLine languageDef))
 								  --                       >> skipMany (satisfy (/= '\n')))
 								  -- multiLineComment = try (string (commentStart languageDef)) >> inComment
 								  -- inComment = if nestedComments languageDef
 								  --             then inCommentMulti
 								  --             else inCommentSingle
 								  -- inCommentMulti
 								  --     =  void (try . string $ commentEnd languageDef)
 								  --    <|> (multiLineComment            >> inCommentMulti)
 								  --    <|> (skipSome (noneOf startEnd) >> inCommentMulti)
 								  --    <|> (oneOf startEnd              >> inCommentMulti)
 								  --    <?> "end of comment"
 								  -- inCommentSingle
 								  --     =  void (try . string $ commentEnd languageDef)
 								  --    <|> (skipSome (noneOf startEnd) >> inCommentSingle)
 								  --    <|> (oneOf startEnd              >> inCommentSingle)
 								  --    <?> "end of comment"
 								  -- startEnd = nub $ (++) <$> commentEnd <*> commentStart $ languageDef