megaparsec/Text/Megaparsec/Token.hs

-- |
-- Module      :  Text.Megaparsec.Token
-- Copyright   :  © 2015 Megaparsec contributors
--                © 2007 Paolo Martini
--                © 1999–2001 Daan Leijen
-- License     :  BSD3
--
-- Maintainer  :  Mark Karpov <markkarpov@opmbx.org>
-- Stability   :  experimental
-- Portability :  non-portable (uses local universal quantification: PolymorphicComponents)
--
-- A helper module to parse lexical elements (tokens). See 'makeTokenParser'
-- for a description of how to use it.

{-# OPTIONS_GHC -fno-warn-name-shadowing #-}

module Text.Megaparsec.Token
    ( LanguageDef (..)
    , TokenParser (..)
    , makeTokenParser )
where

import Control.Applicative ((<|>), many, some)
import Control.Monad (void)
import Data.Char (isAlpha, toLower, toUpper, isSpace)
import Data.List (nub, sort)

import Text.Megaparsec.Prim
import Text.Megaparsec.Char
import Text.Megaparsec.Combinator

-- Language definition

-- | The @LanguageDef@ type is a record that contains all parameterizable
-- features of the "Text.Parsec.Token" module. The module
-- "Text.Parsec.Language" contains some default definitions.

data LanguageDef s u m =
    LanguageDef {

    -- | Describes the start of a block comment. Use the empty string if the
    -- language doesn't support block comments. For example \"\/*\".

      commentStart :: String

    -- | Describes the end of a block comment. Use the empty string if the
    -- language doesn't support block comments. For example \"*\/\".

    , commentEnd :: String

    -- | Describes the start of a line comment. Use the empty string if the
    -- language doesn't support line comments. For example \"\/\/\".

    , commentLine :: String

    -- | Set to 'True' if the language supports nested block comments.

    , nestedComments :: Bool

    -- | This parser should accept any start characters of identifiers. For
    -- example @letter \<|> char \'_\'@.

    , identStart :: ParsecT s u m Char

    -- | This parser should accept any legal tail characters of identifiers.
    -- For example @alphaNum \<|> char \'_\'@.

    , identLetter :: ParsecT s u m Char

    -- | This parser should accept any start characters of operators. For
    -- example @oneOf \":!#$%&*+.\/\<=>?\@\\\\^|-~\"@

    , opStart :: ParsecT s u m Char

    -- | This parser should accept any legal tail characters of operators.
    -- Note that this parser should even be defined if the language doesn't
    -- support user-defined operators, or otherwise the 'reservedOp' parser
    -- won't work correctly.

    , opLetter :: ParsecT s u m Char

    -- | The list of reserved identifiers.

    , reservedNames :: [String]

    -- | The list of reserved operators.

    , reservedOpNames :: [String]

    -- | Set to 'True' if the language is case sensitive.

    , caseSensitive :: Bool }

-- Token parser

-- | The type of the record that holds lexical parsers that work on
-- @s@ streams with state @u@ over a monad @m@.

data TokenParser s u m =
    TokenParser {

    -- | The lexeme parser parses a legal identifier. Returns the identifier
    -- string. This parser will fail on identifiers that are reserved
    -- words. Legal identifier (start) characters and reserved words are
    -- defined in the 'LanguageDef' that is passed to 'makeTokenParser'. An
    -- @identifier@ is treated as a single token using 'try'.

      identifier :: ParsecT s u m String

    -- | The lexeme parser @reserved name@ parses @symbol
    -- name@, but it also checks that the @name@ is not a prefix of a valid
    -- identifier. A @reserved@ word is treated as a single token using
    -- 'try'.

    , reserved :: String -> ParsecT s u m ()

    -- | The lexeme parser parses a legal operator. Returns the name of the
    -- operator. This parser will fail on any operators that are reserved
    -- operators. Legal operator (start) characters and reserved operators
    -- are defined in the 'LanguageDef' that is passed to
    -- 'makeTokenParser'. An @operator@ is treated as a single token using
    -- 'try'.

    , operator :: ParsecT s u m String

    -- |The lexeme parser @reservedOp name@ parses @symbol
    -- name@, but it also checks that the @name@ is not a prefix of a valid
    -- operator. A @reservedOp@ is treated as a single token using 'try'.

    , reservedOp :: String -> ParsecT s u m ()

    -- | The lexeme parser parses a single literal character. Returns the
    -- literal character value. This parsers deals correctly with escape
    -- sequences. The literal character is parsed according to the grammar
    -- rules defined in the Haskell report (which matches most programming
    -- languages quite closely).

    , charLiteral :: ParsecT s u m Char

    -- | The lexeme parser parses a literal string. Returns the literal
    -- string value. This parsers deals correctly with escape sequences and
    -- gaps. The literal string is parsed according to the grammar rules
    -- defined in the Haskell report (which matches most programming
    -- languages quite closely).

    , stringLiteral :: ParsecT s u m String

    -- | The lexeme parser parses an integer (a whole number). This parser
    -- /does not/ parse sign. Returns the value of the number. The number
    -- can be specified in 'decimal', 'hexadecimal' or 'octal'. The number
    -- is parsed according to the grammar rules in the Haskell report.

    , integer :: ParsecT s u m Integer

    -- | This is just like 'integer', except it can parse sign.

    , integer' :: ParsecT s u m Integer

    -- | The lexeme parses a positive whole number in the decimal system.
    -- Returns the value of the number.

    , decimal :: ParsecT s u m Integer

    -- | The lexeme parses a positive whole number in the hexadecimal
    -- system. The number should be prefixed with \"0x\" or \"0X\". Returns
    -- the value of the number.

    , hexadecimal :: ParsecT s u m Integer

    -- | The lexeme parses a positive whole number in the octal system.
    -- The number should be prefixed with \"0o\" or \"0O\". Returns the
    -- value of the number.

    , octal :: ParsecT s u m Integer

    -- | @signed p@ tries to parse sign (i.e. \'+\', \'-\', or nothing) and
    -- then runs parser @p@, changing sign of its result accordingly. Note
    -- that there may be white space after the sign but not before it.

    , signed :: forall a . Num a => ParsecT s u m a -> ParsecT s u m a

    -- | The lexeme parser parses a floating point value. Returns the value
    -- of the number. The number is parsed according to the grammar rules
    -- defined in the Haskell report, sign is /not/ parsed, use 'signed' to
    -- achieve parsing of signed floating point values.

    , float :: ParsecT s u m Double

    -- | This is just like 'float', except it can parse sign.

    , float' :: ParsecT s u m Double

    -- | The lexeme parser parses either 'integer' or a 'float'.
    -- Returns the value of the number. This parser deals with any overlap
    -- in the grammar rules for integers and floats. The number is parsed
    -- according to the grammar rules defined in the Haskell report.

    , number :: ParsecT s u m (Either Integer Double)

    -- | This is just like 'number', except it can parse sign.

    , number' :: ParsecT s u m (Either Integer Double)

    -- | Lexeme parser @symbol s@ parses 'string' @s@ and skips
    -- trailing white space.

    , symbol :: String -> ParsecT s u m String

    -- | @lexeme p@ first applies parser @p@ and than the 'whiteSpace'
    -- parser, returning the value of @p@. Every lexical token (lexeme) is
    -- defined using @lexeme@, this way every parse starts at a point
    -- without white space. Parsers that use @lexeme@ are called /lexeme/
    -- parsers in this document.
    --
    -- The only point where the 'whiteSpace' parser should be called
    -- explicitly is the start of the main parser in order to skip any
    -- leading white space.

    , lexeme :: forall a. ParsecT s u m a -> ParsecT s u m a

    -- | Parses any white space. White space consists of /zero/ or more
    -- occurrences of a 'space', a line comment or a block (multi line)
    -- comment. Block comments may be nested. How comments are started and
    -- ended is defined in the 'LanguageDef' that is passed to
    -- 'makeTokenParser'.

    , whiteSpace :: ParsecT s u m ()

    -- | Lexeme parser @parens p@ parses @p@ enclosed in parenthesis,
    -- returning the value of @p@.

    , parens :: forall a. ParsecT s u m a -> ParsecT s u m a

    -- | Lexeme parser @braces p@ parses @p@ enclosed in braces (\'{\' and
    -- \'}\'), returning the value of @p@.

    , braces :: forall a. ParsecT s u m a -> ParsecT s u m a

    -- | Lexeme parser @angles p@ parses @p@ enclosed in angle brackets (\'\<\'
    -- and \'>\'), returning the value of @p@.

    , angles :: forall a. ParsecT s u m a -> ParsecT s u m a

    -- | Lexeme parser @brackets p@ parses @p@ enclosed in brackets (\'[\'
    -- and \']\'), returning the value of @p@.

    , brackets :: forall a. ParsecT s u m a -> ParsecT s u m a

    -- | Lexeme parser |semi| parses the character \';\' and skips any
    -- trailing white space. Returns the string \";\".

    , semi :: ParsecT s u m String

    -- | Lexeme parser @comma@ parses the character \',\' and skips any
    -- trailing white space. Returns the string \",\".

    , comma :: ParsecT s u m String

    -- | Lexeme parser @colon@ parses the character \':\' and skips any
    -- trailing white space. Returns the string \":\".

    , colon :: ParsecT s u m String

    -- | Lexeme parser @dot@ parses the character \'.\' and skips any
    -- trailing white space. Returns the string \".\".

    , dot :: ParsecT s u m String

    -- | Lexeme parser @semiSep p@ parses /zero/ or more occurrences of @p@
    -- separated by 'semi'. Returns a list of values returned by @p@.

    , semiSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]

    -- | Lexeme parser @semiSep1 p@ parses /one/ or more occurrences of @p@
    -- separated by 'semi'. Returns a list of values returned by @p@.

    , semiSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a]

    -- | Lexeme parser @commaSep p@ parses /zero/ or more occurrences of
    -- @p@ separated by 'comma'. Returns a list of values returned by @p@.

    , commaSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]

    -- | Lexeme parser @commaSep1 p@ parses /one/ or more occurrences of
    -- @p@ separated by 'comma'. Returns a list of values returned by @p@.

    , commaSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a] }

-- Given a LanguageDef, create a token parser

-- | The expression @makeTokenParser language@ creates a 'TokenParser'
-- record that contains lexical parsers that are defined using the
-- definitions in the @language@ record.
--
-- The use of this function is quite stylized — one imports the appropriate
-- language definition and selects the lexical parsers that are needed from
-- the resulting 'TokenParser'.
--
-- > module Main (main) where
-- >
-- > import Text.Parsec
-- > import qualified Text.Parsec.Token as Token
-- > import Text.Parsec.Language (haskellDef)
-- >
-- > -- The parser
-- > ...
-- >
-- > expr =  parens expr
-- >     <|> identifier
-- >     <|> ...
-- >
-- > -- The lexer
-- > lexer      = Token.makeTokenParser haskellDef
-- >
-- > parens     = Token.parens     lexer
-- > braces     = Token.braces     lexer
-- > identifier = Token.identifier lexer
-- > reserved   = Token.reserved   lexer
-- > ...

makeTokenParser :: Stream s m Char => LanguageDef s u m -> TokenParser s u m
makeTokenParser languageDef =
    TokenParser
    { identifier    = identifier
    , reserved      = reserved
    , operator      = operator
    , reservedOp    = reservedOp

    , charLiteral   = charLiteral
    , stringLiteral = stringLiteral

    , integer       = integer
    , integer'      = integer'
    , decimal       = decimal
    , hexadecimal   = hexadecimal
    , octal         = octal
    , signed        = signed
    , float         = float
    , float'        = float'
    , number        = number
    , number'       = number'

    , symbol        = symbol
    , lexeme        = lexeme
    , whiteSpace    = whiteSpace

    , parens        = parens
    , braces        = braces
    , angles        = angles
    , brackets      = brackets
    , semi          = semi
    , comma         = comma
    , colon         = colon
    , dot           = dot
    , semiSep       = semiSep
    , semiSep1      = semiSep1
    , commaSep      = commaSep
    , commaSep1     = commaSep1 }
    where

    -- bracketing

    parens    = between (symbol "(") (symbol ")")
    braces    = between (symbol "{") (symbol "}")
    angles    = between (symbol "<") (symbol ">")
    brackets  = between (symbol "[") (symbol "]")

    semi      = symbol ";"
    comma     = symbol ","
    dot       = symbol "."
    colon     = symbol ":"

    commaSep  = (`sepBy` comma)
    semiSep   = (`sepBy` semi)

    commaSep1 = (`sepBy1` comma)
    semiSep1  = (`sepBy1` semi)

    -- chars & strings

    charLiteral = lexeme ( between (char '\'')
                                   (char '\'' <?> "end of character")
                                   characterChar )
                  <?> "character"

    characterChar = charLetter <|> charEscape <?> "literal character"

    charEscape = char '\\' >> escapeCode
    charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))

    stringLiteral =
        lexeme ((foldr (maybe id (:)) "" <$>
                 between (char '"') (char '"' <?> "end of string")
                             (many stringChar)) <?> "literal string")

    stringChar = (Just <$> stringLetter) <|> stringEscape <?> "string character"

    stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))

    stringEscape = char '\\' >>
                   ( (escapeGap >> return Nothing)   <|>
                     (escapeEmpty >> return Nothing) <|>
                     (Just <$> escapeCode) )

    escapeEmpty = char '&'
    escapeGap   = some space >> char '\\' <?> "end of string gap"

    -- escape codes

    escapeCode = charEsc <|> charNum <|> charAscii <|> charControl
                 <?> "escape code"

    charEsc = choice (parseEsc <$> escMap)
        where parseEsc (c, code) = char c >> return code

    charNum = toEnum . fromInteger <$>
              ( decimal <|>
               (char 'o' >> nump "0o" octDigit) <|>
               (char 'x' >> nump "0x" hexDigit) )

    charAscii = choice (parseAscii <$> asciiMap)
        where parseAscii (asc, code) = try (string asc >> return code)

    charControl = toEnum . subtract 64 . fromEnum <$> (char '^' >> upper)

    -- escape code tables

    escMap      = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
    asciiMap    = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)

    ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
                   "FS","GS","RS","US","SP"]
    ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
                   "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
                   "CAN","SUB","ESC","DEL"]

    ascii2 = "\b\t\n\v\f\r\SO\SI\EM\FS\GS\RS\US "
    ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\a\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL"

    -- numbers — integers

    integer  = decimal <?> "integer"
    integer' = signed integer

    decimal     = lexeme $ nump "" digit
    hexadecimal = lexeme $ char '0' >> oneOf "xX" >> nump "0x" hexDigit
    octal       = lexeme $ char '0' >> oneOf "oO" >> nump "0o" octDigit

    nump prefix baseDigit = read . (prefix ++) <$> some baseDigit

    signed p = ($) <$> option id (lexeme sign) <*> p

    sign :: (Stream s m Char, Num a) => ParsecT s u m (a -> a)
    sign = (char '+' *> return id) <|> (char '-' *> return negate)

    -- numbers — floats

    float  = lexeme ffloat <?> "float"
    float' = signed float

    ffloat = read <$> ffloat'
      where
        ffloat' = do
          decimal <- fDec
          rest <- fraction <|> fExp
          return $ decimal ++ rest

    fraction = do
      void $ char '.'
      decimal <- fDec
      exp <- option "" fExp
      return $ '.' : decimal ++  exp

    fDec = some digit

    fExp = do
      expChar <- oneOf "eE"
      signStr <- option "" (pure <$> oneOf "+-")
      decimal <- fDec
      return $ expChar : signStr ++ decimal

    -- numbers — a more general case

    number  = (Right <$> try float)  <|> (Left <$> integer)  <?> "number"
    number' = (Right <$> try float') <|> (Left <$> integer') <?> "number"

    -- operators & reserved ops

    reservedOp name =
        lexeme $ try $ do
          void $ string name
          notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name)

    operator =
        lexeme $ try $ do
          name <- oper
          if isReservedOp name
          then unexpected ("reserved operator " ++ show name)
          else return name

    oper = ((:) <$> opStart languageDef <*> many (opLetter languageDef))
           <?> "operator"

    isReservedOp = isReserved . sort $ reservedOpNames languageDef

    -- identifiers & reserved words

    reserved name =
        lexeme $ try $ do
          void $ caseString name
          notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name)

    caseString name
        | caseSensitive languageDef = string name
        | otherwise                 = walk name >> return name
        where walk = foldr (\c -> ((caseChar c <?> show name) >>)) (return ())
              caseChar c
                  | isAlpha c = char (toLower c) <|> char (toUpper c)
                  | otherwise = char c

    identifier =
        lexeme $ try $ do
          name <- ident
          if isReservedName name
          then unexpected ("reserved word " ++ show name)
          else return name

    ident = ((:) <$> identStart languageDef <*> many (identLetter languageDef))
            <?> "identifier"

    isReservedName name = isReserved theReservedNames caseName
        where caseName
                  | caseSensitive languageDef = name
                  | otherwise                 = toLower <$> name

    isReserved names name = scan names
        where scan []     = False
              scan (r:rs) = case compare r name of
                              LT  -> scan rs
                              EQ  -> True
                              GT  -> False

    theReservedNames
        | caseSensitive languageDef = sort reserved
        | otherwise                 = sort . fmap (fmap toLower) $ reserved
        where reserved = reservedNames languageDef

    -- white space & symbols

    symbol = lexeme . string

    lexeme p = p <* whiteSpace

    whiteSpace
        | noLine && noMulti = skipMany (simpleSpace      <?> "")
        | noLine            = skipMany (simpleSpace      <|>
                                        multiLineComment <?> "")
        | noMulti           = skipMany (simpleSpace      <|>
                                        oneLineComment   <?> "")
        | otherwise         = skipMany (simpleSpace      <|>
                                        oneLineComment   <|>
                                        multiLineComment <?> "")
        where
          noLine  = null (commentLine languageDef)
          noMulti = null (commentStart languageDef)

    simpleSpace = skipSome (satisfy isSpace)

    oneLineComment = void (try (string (commentLine languageDef))
                          >> skipMany (satisfy (/= '\n')))

    multiLineComment = try (string (commentStart languageDef)) >> inComment

    inComment = if nestedComments languageDef
                then inCommentMulti
                else inCommentSingle

    inCommentMulti
        =  void (try . string $ commentEnd languageDef)
       <|> (multiLineComment            >> inCommentMulti)
       <|> (skipSome (noneOf startEnd) >> inCommentMulti)
       <|> (oneOf startEnd              >> inCommentMulti)
       <?> "end of comment"

    inCommentSingle
        =  void (try . string $ commentEnd languageDef)
       <|> (skipSome (noneOf startEnd) >> inCommentSingle)
       <|> (oneOf startEnd              >> inCommentSingle)
       <?> "end of comment"

    startEnd = nub $ (++) <$> commentEnd <*> commentStart $ languageDef
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
+								-- |
-												renamed ‘MegaParsec’ → ‘Megaparsec’, close #10

											
										
										
											2015-08-01 19:24:45 +03:00
+								-- Module      :  Text.Megaparsec.Token
 								-- Copyright   :  © 2015 Megaparsec contributors
-												cosmetic changes in copyright (headers)

											
										
										
											2015-07-30 19:20:37 +03:00
+								--                © 2007 Paolo Martini
 								--                © 1999–2001 Daan Leijen
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								-- License     :  BSD3
 								--
 								-- Maintainer  :  Mark Karpov <markkarpov@opmbx.org>
-												refactoring, phase 2

											
										
										
											2015-07-29 11:38:32 +03:00
+								-- Stability   :  experimental
-												More accurate extension pragmas

											
										
										
											2008-01-20 09:39:18 +03:00
+								-- Portability :  non-portable (uses local universal quantification: PolymorphicComponents)
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								--
-												Tweaking Text.Parsec.Token's documentation

											
										
										
											2008-01-22 08:25:34 +03:00
+								-- A helper module to parse lexical elements (tokens). See 'makeTokenParser'
 								-- for a description of how to use it.
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												Clean most warnings

											
										
										
											2008-02-13 07:32:24 +03:00
+								{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												renamed ‘MegaParsec’ → ‘Megaparsec’, close #10

											
										
										
											2015-08-01 19:24:45 +03:00
+								module Text.Megaparsec.Token
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								    ( LanguageDef (..)
 								    , TokenParser (..)
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    , makeTokenParser )
 								where
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												re-export ‘(<|>)’, ‘many’, ‘some’, and ‘optional’, fixes #9

These functions are now re-exported from ‘Control.Applicative’
module. ‘many’ and ‘some’ are now part of ‘Alternative’ instance of
‘ParsecT’.

Note that these functions are re-exported only in ‘Text.MegaParsec’
module, but not in ‘Text.MegaParsec.Prim’ to avoid duplication of
floating doc-strings. Others internal modules now just casually import
‘Control.Applicative’ for their needs.

Note that ‘many1’ was renamed to ‘some’, the same is done for other
parsers that had ‘many1’ part in their names (for consistency).

											
										
										
											2015-08-01 17:39:20 +03:00
+								import Control.Applicative ((<|>), many, some)
 								import Control.Monad (void)
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								import Data.Char (isAlpha, toLower, toUpper, isSpace)
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								import Data.List (nub, sort)
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												renamed ‘MegaParsec’ → ‘Megaparsec’, close #10

											
										
										
											2015-08-01 19:24:45 +03:00
+								import Text.Megaparsec.Prim
 								import Text.Megaparsec.Char
 								import Text.Megaparsec.Combinator
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- Language definition
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								-- | The @LanguageDef@ type is a record that contains all parameterizable
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- features of the "Text.Parsec.Token" module. The module
 								-- "Text.Parsec.Language" contains some default definitions.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								data LanguageDef s u m =
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    LanguageDef {
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								    -- | Describes the start of a block comment. Use the empty string if the
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- language doesn't support block comments. For example \"\/*\".
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								      commentStart :: String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
 								    -- | Describes the end of a block comment. Use the empty string if the
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- language doesn't support block comments. For example \"*\/\".
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , commentEnd :: String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
 								    -- | Describes the start of a line comment. Use the empty string if the
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- language doesn't support line comments. For example \"\/\/\".
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , commentLine :: String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- | Set to 'True' if the language supports nested block comments.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , nestedComments :: Bool
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
 								    -- | This parser should accept any start characters of identifiers. For
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- example @letter \<|> char \'_\'@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , identStart :: ParsecT s u m Char
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
 								    -- | This parser should accept any legal tail characters of identifiers.
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- For example @alphaNum \<|> char \'_\'@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , identLetter :: ParsecT s u m Char
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
 								    -- | This parser should accept any start characters of operators. For
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- example @oneOf \":!#$%&*+.\/\<=>?\@\\\\^|-~\"@
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , opStart :: ParsecT s u m Char
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
 								    -- | This parser should accept any legal tail characters of operators.
 								    -- Note that this parser should even be defined if the language doesn't
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- support user-defined operators, or otherwise the 'reservedOp' parser
 								    -- won't work correctly.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , opLetter :: ParsecT s u m Char
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- | The list of reserved identifiers.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , reservedNames :: [String]
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- | The list of reserved operators.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , reservedOpNames :: [String]
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- | Set to 'True' if the language is case sensitive.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , caseSensitive :: Bool }
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								-- Token parser
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
 								-- | The type of the record that holds lexical parsers that work on
 								-- @s@ streams with state @u@ over a monad @m@.
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								data TokenParser s u m =
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    TokenParser {
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parser parses a legal identifier. Returns the identifier
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- string. This parser will fail on identifiers that are reserved
 								    -- words. Legal identifier (start) characters and reserved words are
 								    -- defined in the 'LanguageDef' that is passed to 'makeTokenParser'. An
 								    -- @identifier@ is treated as a single token using 'try'.
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								      identifier :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | The lexeme parser @reserved name@ parses @symbol
 								    -- name@, but it also checks that the @name@ is not a prefix of a valid
 								    -- identifier. A @reserved@ word is treated as a single token using
 								    -- 'try'.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , reserved :: String -> ParsecT s u m ()
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parser parses a legal operator. Returns the name of the
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- operator. This parser will fail on any operators that are reserved
 								    -- operators. Legal operator (start) characters and reserved operators
 								    -- are defined in the 'LanguageDef' that is passed to
 								    -- 'makeTokenParser'. An @operator@ is treated as a single token using
 								    -- 'try'.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , operator :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- |The lexeme parser @reservedOp name@ parses @symbol
 								    -- name@, but it also checks that the @name@ is not a prefix of a valid
 								    -- operator. A @reservedOp@ is treated as a single token using 'try'.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , reservedOp :: String -> ParsecT s u m ()
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parser parses a single literal character. Returns the
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- literal character value. This parsers deals correctly with escape
 								    -- sequences. The literal character is parsed according to the grammar
 								    -- rules defined in the Haskell report (which matches most programming
 								    -- languages quite closely).
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , charLiteral :: ParsecT s u m Char
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parser parses a literal string. Returns the literal
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- string value. This parsers deals correctly with escape sequences and
 								    -- gaps. The literal string is parsed according to the grammar rules
 								    -- defined in the Haskell report (which matches most programming
 								    -- languages quite closely).
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , stringLiteral :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parser parses an integer (a whole number). This parser
 								    -- /does not/ parse sign. Returns the value of the number. The number
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- can be specified in 'decimal', 'hexadecimal' or 'octal'. The number
 								    -- is parsed according to the grammar rules in the Haskell report.
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , integer :: ParsecT s u m Integer
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | This is just like 'integer', except it can parse sign.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    , integer' :: ParsecT s u m Integer
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parses a positive whole number in the decimal system.
 								    -- Returns the value of the number.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , decimal :: ParsecT s u m Integer
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parses a positive whole number in the hexadecimal
 								    -- system. The number should be prefixed with \"0x\" or \"0X\". Returns
 								    -- the value of the number.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , hexadecimal :: ParsecT s u m Integer
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parses a positive whole number in the octal system.
 								    -- The number should be prefixed with \"0o\" or \"0O\". Returns the
 								    -- value of the number.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , octal :: ParsecT s u m Integer
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | @signed p@ tries to parse sign (i.e. \'+\', \'-\', or nothing) and
 								    -- then runs parser @p@, changing sign of its result accordingly. Note
 								    -- that there may be white space after the sign but not before it.
 								    , signed :: forall a . Num a => ParsecT s u m a -> ParsecT s u m a
 								    -- | The lexeme parser parses a floating point value. Returns the value
 								    -- of the number. The number is parsed according to the grammar rules
 								    -- defined in the Haskell report, sign is /not/ parsed, use 'signed' to
 								    -- achieve parsing of signed floating point values.
 								    , float :: ParsecT s u m Double
 								    -- | This is just like 'float', except it can parse sign.
 								    , float' :: ParsecT s u m Double
 								    -- | The lexeme parser parses either 'integer' or a 'float'.
 								    -- Returns the value of the number. This parser deals with any overlap
 								    -- in the grammar rules for integers and floats. The number is parsed
 								    -- according to the grammar rules defined in the Haskell report.
 								    , number :: ParsecT s u m (Either Integer Double)
 								    -- | This is just like 'number', except it can parse sign.
 								    , number' :: ParsecT s u m (Either Integer Double)
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @symbol s@ parses 'string' @s@ and skips
 								    -- trailing white space.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , symbol :: String -> ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | @lexeme p@ first applies parser @p@ and than the 'whiteSpace'
 								    -- parser, returning the value of @p@. Every lexical token (lexeme) is
 								    -- defined using @lexeme@, this way every parse starts at a point
 								    -- without white space. Parsers that use @lexeme@ are called /lexeme/
 								    -- parsers in this document.
 								    --
 								    -- The only point where the 'whiteSpace' parser should be called
 								    -- explicitly is the start of the main parser in order to skip any
 								    -- leading white space.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , lexeme :: forall a. ParsecT s u m a -> ParsecT s u m a
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Parses any white space. White space consists of /zero/ or more
 								    -- occurrences of a 'space', a line comment or a block (multi line)
 								    -- comment. Block comments may be nested. How comments are started and
 								    -- ended is defined in the 'LanguageDef' that is passed to
 								    -- 'makeTokenParser'.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , whiteSpace :: ParsecT s u m ()
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @parens p@ parses @p@ enclosed in parenthesis,
 								    -- returning the value of @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , parens :: forall a. ParsecT s u m a -> ParsecT s u m a
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @braces p@ parses @p@ enclosed in braces (\'{\' and
 								    -- \'}\'), returning the value of @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , braces :: forall a. ParsecT s u m a -> ParsecT s u m a
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @angles p@ parses @p@ enclosed in angle brackets (\'\<\'
 								    -- and \'>\'), returning the value of @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , angles :: forall a. ParsecT s u m a -> ParsecT s u m a
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @brackets p@ parses @p@ enclosed in brackets (\'[\'
 								    -- and \']\'), returning the value of @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , brackets :: forall a. ParsecT s u m a -> ParsecT s u m a
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser |semi| parses the character \';\' and skips any
 								    -- trailing white space. Returns the string \";\".
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , semi :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @comma@ parses the character \',\' and skips any
 								    -- trailing white space. Returns the string \",\".
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , comma :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @colon@ parses the character \':\' and skips any
 								    -- trailing white space. Returns the string \":\".
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , colon :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @dot@ parses the character \'.\' and skips any
 								    -- trailing white space. Returns the string \".\".
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , dot :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @semiSep p@ parses /zero/ or more occurrences of @p@
 								    -- separated by 'semi'. Returns a list of values returned by @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , semiSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @semiSep1 p@ parses /one/ or more occurrences of @p@
 								    -- separated by 'semi'. Returns a list of values returned by @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , semiSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a]
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @commaSep p@ parses /zero/ or more occurrences of
 								    -- @p@ separated by 'comma'. Returns a list of values returned by @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , commaSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @commaSep1 p@ parses /one/ or more occurrences of
 								    -- @p@ separated by 'comma'. Returns a list of values returned by @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , commaSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a] }
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- Given a LanguageDef, create a token parser
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								-- | The expression @makeTokenParser language@ creates a 'TokenParser'
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- record that contains lexical parsers that are defined using the
 								-- definitions in the @language@ record.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								--
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- The use of this function is quite stylized — one imports the appropriate
 								-- language definition and selects the lexical parsers that are needed from
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								-- the resulting 'TokenParser'.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								--
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > module Main (main) where
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								-- >
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > import Text.Parsec
 								-- > import qualified Text.Parsec.Token as Token
 								-- > import Text.Parsec.Language (haskellDef)
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								-- >
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > -- The parser
 								-- > ...
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								-- >
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > expr =  parens expr
 								-- >     <|> identifier
 								-- >     <|> ...
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								-- >
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > -- The lexer
 								-- > lexer      = Token.makeTokenParser haskellDef
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								-- >
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > parens     = Token.parens     lexer
 								-- > braces     = Token.braces     lexer
 								-- > identifier = Token.identifier lexer
 								-- > reserved   = Token.reserved   lexer
 								-- > ...
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								makeTokenParser :: Stream s m Char => LanguageDef s u m -> TokenParser s u m
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								makeTokenParser languageDef =
 								    TokenParser
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    { identifier    = identifier
 								    , reserved      = reserved
 								    , operator      = operator
 								    , reservedOp    = reservedOp
 								    , charLiteral   = charLiteral
 								    , stringLiteral = stringLiteral
 								    , integer       = integer
 								    , integer'      = integer'
 								    , decimal       = decimal
 								    , hexadecimal   = hexadecimal
 								    , octal         = octal
 								    , signed        = signed
 								    , float         = float
 								    , float'        = float'
 								    , number        = number
 								    , number'       = number'
 								    , symbol        = symbol
 								    , lexeme        = lexeme
 								    , whiteSpace    = whiteSpace
 								    , parens        = parens
 								    , braces        = braces
 								    , angles        = angles
 								    , brackets      = brackets
 								    , semi          = semi
 								    , comma         = comma
 								    , colon         = colon
 								    , dot           = dot
 								    , semiSep       = semiSep
 								    , semiSep1      = semiSep1
 								    , commaSep      = commaSep
 								    , commaSep1     = commaSep1 }
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
+								    where
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- bracketing
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    parens    = between (symbol "(") (symbol ")")
 								    braces    = between (symbol "{") (symbol "}")
 								    angles    = between (symbol "<") (symbol ">")
 								    brackets  = between (symbol "[") (symbol "]")
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    semi      = symbol ";"
 								    comma     = symbol ","
 								    dot       = symbol "."
 								    colon     = symbol ":"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    commaSep  = (`sepBy` comma)
 								    semiSep   = (`sepBy` semi)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    commaSep1 = (`sepBy1` comma)
 								    semiSep1  = (`sepBy1` semi)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- chars & strings
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    charLiteral = lexeme ( between (char '\'')
 								                                   (char '\'' <?> "end of character")
 								                                   characterChar )
 								                  <?> "character"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    characterChar = charLetter <|> charEscape <?> "literal character"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    charEscape = char '\\' >> escapeCode
 								    charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    stringLiteral =
 								        lexeme ((foldr (maybe id (:)) "" <$>
 								                 between (char '"') (char '"' <?> "end of string")
 								                             (many stringChar)) <?> "literal string")
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    stringChar = (Just <$> stringLetter) <|> stringEscape <?> "string character"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    stringEscape = char '\\' >>
 								                   ( (escapeGap >> return Nothing)   <|>
 								                     (escapeEmpty >> return Nothing) <|>
 								                     (Just <$> escapeCode) )
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    escapeEmpty = char '&'
-												re-export ‘(<|>)’, ‘many’, ‘some’, and ‘optional’, fixes #9

These functions are now re-exported from ‘Control.Applicative’
module. ‘many’ and ‘some’ are now part of ‘Alternative’ instance of
‘ParsecT’.

Note that these functions are re-exported only in ‘Text.MegaParsec’
module, but not in ‘Text.MegaParsec.Prim’ to avoid duplication of
floating doc-strings. Others internal modules now just casually import
‘Control.Applicative’ for their needs.

Note that ‘many1’ was renamed to ‘some’, the same is done for other
parsers that had ‘many1’ part in their names (for consistency).

											
										
										
											2015-08-01 17:39:20 +03:00
+								    escapeGap   = some space >> char '\\' <?> "end of string gap"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
 								    -- escape codes
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    escapeCode = charEsc <|> charNum <|> charAscii <|> charControl
 								                 <?> "escape code"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    charEsc = choice (parseEsc <$> escMap)
 								        where parseEsc (c, code) = char c >> return code
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    charNum = toEnum . fromInteger <$>
 								              ( decimal <|>
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								               (char 'o' >> nump "0o" octDigit) <|>
 								               (char 'x' >> nump "0x" hexDigit) )
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    charAscii = choice (parseAscii <$> asciiMap)
 								        where parseAscii (asc, code) = try (string asc >> return code)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    charControl = toEnum . subtract 64 . fromEnum <$> (char '^' >> upper)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
+								    -- escape code tables
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    escMap      = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
 								    asciiMap    = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
 								                   "FS","GS","RS","US","SP"]
 								    ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
 								                   "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
 								                   "CAN","SUB","ESC","DEL"]
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    ascii2 = "\b\t\n\v\f\r\SO\SI\EM\FS\GS\RS\US "
 								    ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\a\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- numbers — integers
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												remove word ‘unsigned’ from descriptions

											
										
										
											2015-07-31 22:31:31 +03:00
+								    integer  = decimal <?> "integer"
 								    integer' = signed integer
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    decimal     = lexeme $ nump "" digit
 								    hexadecimal = lexeme $ char '0' >> oneOf "xX" >> nump "0x" hexDigit
 								    octal       = lexeme $ char '0' >> oneOf "oO" >> nump "0o" octDigit
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												re-export ‘(<|>)’, ‘many’, ‘some’, and ‘optional’, fixes #9

These functions are now re-exported from ‘Control.Applicative’
module. ‘many’ and ‘some’ are now part of ‘Alternative’ instance of
‘ParsecT’.

Note that these functions are re-exported only in ‘Text.MegaParsec’
module, but not in ‘Text.MegaParsec.Prim’ to avoid duplication of
floating doc-strings. Others internal modules now just casually import
‘Control.Applicative’ for their needs.

Note that ‘many1’ was renamed to ‘some’, the same is done for other
parsers that had ‘many1’ part in their names (for consistency).

											
										
										
											2015-08-01 17:39:20 +03:00
+								    nump prefix baseDigit = read . (prefix ++) <$> some baseDigit
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    signed p = ($) <$> option id (lexeme sign) <*> p
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    sign :: (Stream s m Char, Num a) => ParsecT s u m (a -> a)
 								    sign = (char '+' *> return id) <|> (char '-' *> return negate)
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- numbers — floats
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												remove word ‘unsigned’ from descriptions

											
										
										
											2015-07-31 22:31:31 +03:00
+								    float  = lexeme ffloat <?> "float"
 								    float' = signed float
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												Remove `try` in float parser.

According to this post
(http://blog.ezyang.com/2014/05/parsec-try-a-or-b-considered-harmful/),
the error message could be confusing if using `try`. `"3.a"` is one case
to expose this behavior.

											
										
										
											2015-08-01 00:44:26 +03:00
+								    ffloat = read <$> ffloat'
 								      where
 								        ffloat' = do
 								          decimal <- fDec
 								          rest <- fraction <|> fExp
 								          return $ decimal ++ rest
 								    fraction = do
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								      void $ char '.'
-												Remove `try` in float parser.

According to this post
(http://blog.ezyang.com/2014/05/parsec-try-a-or-b-considered-harmful/),
the error message could be confusing if using `try`. `"3.a"` is one case
to expose this behavior.

											
										
										
											2015-08-01 00:44:26 +03:00
+								      decimal <- fDec
 								      exp <- option "" fExp
 								      return $ '.' : decimal ++  exp
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												re-export ‘(<|>)’, ‘many’, ‘some’, and ‘optional’, fixes #9

These functions are now re-exported from ‘Control.Applicative’
module. ‘many’ and ‘some’ are now part of ‘Alternative’ instance of
‘ParsecT’.

Note that these functions are re-exported only in ‘Text.MegaParsec’
module, but not in ‘Text.MegaParsec.Prim’ to avoid duplication of
floating doc-strings. Others internal modules now just casually import
‘Control.Applicative’ for their needs.

Note that ‘many1’ was renamed to ‘some’, the same is done for other
parsers that had ‘many1’ part in their names (for consistency).

											
										
										
											2015-08-01 17:39:20 +03:00
+								    fDec = some digit
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    fExp = do
 								      expChar <- oneOf "eE"
 								      signStr <- option "" (pure <$> oneOf "+-")
 								      decimal <- fDec
 								      return $ expChar : signStr ++ decimal
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- numbers — a more general case
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												remove word ‘unsigned’ from descriptions

											
										
										
											2015-07-31 22:31:31 +03:00
+								    number  = (Right <$> try float)  <|> (Left <$> integer)  <?> "number"
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    number' = (Right <$> try float') <|> (Left <$> integer') <?> "number"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- operators & reserved ops
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    reservedOp name =
 								        lexeme $ try $ do
 								          void $ string name
 								          notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    operator =
 								        lexeme $ try $ do
 								          name <- oper
 								          if isReservedOp name
 								          then unexpected ("reserved operator " ++ show name)
 								          else return name
 								    oper = ((:) <$> opStart languageDef <*> many (opLetter languageDef))
 								           <?> "operator"
 								    isReservedOp = isReserved . sort $ reservedOpNames languageDef
 								    -- identifiers & reserved words
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    reserved name =
 								        lexeme $ try $ do
 								          void $ caseString name
 								          notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name)
 								    caseString name
 								        | caseSensitive languageDef = string name
 								        | otherwise                 = walk name >> return name
 								        where walk = foldr (\c -> ((caseChar c <?> show name) >>)) (return ())
 								              caseChar c
 								                  | isAlpha c = char (toLower c) <|> char (toUpper c)
 								                  | otherwise = char c
 								    identifier =
 								        lexeme $ try $ do
 								          name <- ident
 								          if isReservedName name
 								          then unexpected ("reserved word " ++ show name)
 								          else return name
 								    ident = ((:) <$> identStart languageDef <*> many (identLetter languageDef))
 								            <?> "identifier"
 								    isReservedName name = isReserved theReservedNames caseName
 								        where caseName
 								                  | caseSensitive languageDef = name
 								                  | otherwise                 = toLower <$> name
 								    isReserved names name = scan names
 								        where scan []     = False
 								              scan (r:rs) = case compare r name of
 								                              LT  -> scan rs
 								                              EQ  -> True
 								                              GT  -> False
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    theReservedNames
 								        | caseSensitive languageDef = sort reserved
 								        | otherwise                 = sort . fmap (fmap toLower) $ reserved
 								        where reserved = reservedNames languageDef
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- white space & symbols
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    symbol = lexeme . string
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    lexeme p = p <* whiteSpace
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
 								    whiteSpace
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								        | noLine && noMulti = skipMany (simpleSpace      <?> "")
 								        | noLine            = skipMany (simpleSpace      <|>
 								                                        multiLineComment <?> "")
 								        | noMulti           = skipMany (simpleSpace      <|>
 								                                        oneLineComment   <?> "")
 								        | otherwise         = skipMany (simpleSpace      <|>
 								                                        oneLineComment   <|>
 								                                        multiLineComment <?> "")
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
+								        where
 								          noLine  = null (commentLine languageDef)
 								          noMulti = null (commentStart languageDef)
-												re-export ‘(<|>)’, ‘many’, ‘some’, and ‘optional’, fixes #9

These functions are now re-exported from ‘Control.Applicative’
module. ‘many’ and ‘some’ are now part of ‘Alternative’ instance of
‘ParsecT’.

Note that these functions are re-exported only in ‘Text.MegaParsec’
module, but not in ‘Text.MegaParsec.Prim’ to avoid duplication of
floating doc-strings. Others internal modules now just casually import
‘Control.Applicative’ for their needs.

Note that ‘many1’ was renamed to ‘some’, the same is done for other
parsers that had ‘many1’ part in their names (for consistency).

											
										
										
											2015-08-01 17:39:20 +03:00
+								    simpleSpace = skipSome (satisfy isSpace)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    oneLineComment = void (try (string (commentLine languageDef))
 								                          >> skipMany (satisfy (/= '\n')))
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    multiLineComment = try (string (commentStart languageDef)) >> inComment
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    inComment = if nestedComments languageDef
 								                then inCommentMulti
 								                else inCommentSingle
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
 								    inCommentMulti
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								        =  void (try . string $ commentEnd languageDef)
 								       <|> (multiLineComment            >> inCommentMulti)
-												re-export ‘(<|>)’, ‘many’, ‘some’, and ‘optional’, fixes #9

These functions are now re-exported from ‘Control.Applicative’
module. ‘many’ and ‘some’ are now part of ‘Alternative’ instance of
‘ParsecT’.

Note that these functions are re-exported only in ‘Text.MegaParsec’
module, but not in ‘Text.MegaParsec.Prim’ to avoid duplication of
floating doc-strings. Others internal modules now just casually import
‘Control.Applicative’ for their needs.

Note that ‘many1’ was renamed to ‘some’, the same is done for other
parsers that had ‘many1’ part in their names (for consistency).

											
										
										
											2015-08-01 17:39:20 +03:00
+								       <|> (skipSome (noneOf startEnd) >> inCommentMulti)
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								       <|> (oneOf startEnd              >> inCommentMulti)
 								       <?> "end of comment"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
 								    inCommentSingle
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								        =  void (try . string $ commentEnd languageDef)
-												re-export ‘(<|>)’, ‘many’, ‘some’, and ‘optional’, fixes #9

These functions are now re-exported from ‘Control.Applicative’
module. ‘many’ and ‘some’ are now part of ‘Alternative’ instance of
‘ParsecT’.

Note that these functions are re-exported only in ‘Text.MegaParsec’
module, but not in ‘Text.MegaParsec.Prim’ to avoid duplication of
floating doc-strings. Others internal modules now just casually import
‘Control.Applicative’ for their needs.

Note that ‘many1’ was renamed to ‘some’, the same is done for other
parsers that had ‘many1’ part in their names (for consistency).

											
										
										
											2015-08-01 17:39:20 +03:00
+								       <|> (skipSome (noneOf startEnd) >> inCommentSingle)
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								       <|> (oneOf startEnd              >> inCommentSingle)
 								       <?> "end of comment"
 								    startEnd = nub $ (++) <$> commentEnd <*> commentStart $ languageDef