megaparsec/Text/MegaParsec/Token.hs

-- |
-- Module      :  Text.MegaParsec.Token
-- Copyright   :  © 2015 MegaParsec contributors
--                © 2007 Paolo Martini
--                © 1999–2001 Daan Leijen
-- License     :  BSD3
--
-- Maintainer  :  Mark Karpov <markkarpov@opmbx.org>
-- Stability   :  experimental
-- Portability :  non-portable (uses local universal quantification: PolymorphicComponents)
--
-- A helper module to parse lexical elements (tokens). See 'makeTokenParser'
-- for a description of how to use it.

{-# OPTIONS_GHC -fno-warn-name-shadowing #-}

module Text.MegaParsec.Token
    ( LanguageDef (..)
    , TokenParser (..)
    , makeTokenParser )
where

import Data.Char (isAlpha, toLower, toUpper, isSpace)
import Data.List (nub, sort)

import Control.Monad (void)

import Text.MegaParsec.Prim
import Text.MegaParsec.Char
import Text.MegaParsec.Combinator

-- Language definition

-- | The @LanguageDef@ type is a record that contains all parameterizable
-- features of the "Text.Parsec.Token" module. The module
-- "Text.Parsec.Language" contains some default definitions.

data LanguageDef s u m =
    LanguageDef {

    -- | Describes the start of a block comment. Use the empty string if the
    -- language doesn't support block comments. For example \"\/*\".

      commentStart :: String

    -- | Describes the end of a block comment. Use the empty string if the
    -- language doesn't support block comments. For example \"*\/\".

    , commentEnd :: String

    -- | Describes the start of a line comment. Use the empty string if the
    -- language doesn't support line comments. For example \"\/\/\".

    , commentLine :: String

    -- | Set to 'True' if the language supports nested block comments.

    , nestedComments :: Bool

    -- | This parser should accept any start characters of identifiers. For
    -- example @letter \<|> char \'_\'@.

    , identStart :: ParsecT s u m Char

    -- | This parser should accept any legal tail characters of identifiers.
    -- For example @alphaNum \<|> char \'_\'@.

    , identLetter :: ParsecT s u m Char

    -- | This parser should accept any start characters of operators. For
    -- example @oneOf \":!#$%&*+.\/\<=>?\@\\\\^|-~\"@

    , opStart :: ParsecT s u m Char

    -- | This parser should accept any legal tail characters of operators.
    -- Note that this parser should even be defined if the language doesn't
    -- support user-defined operators, or otherwise the 'reservedOp' parser
    -- won't work correctly.

    , opLetter :: ParsecT s u m Char

    -- | The list of reserved identifiers.

    , reservedNames :: [String]

    -- | The list of reserved operators.

    , reservedOpNames :: [String]

    -- | Set to 'True' if the language is case sensitive.

    , caseSensitive :: Bool }

-- Token parser

-- | The type of the record that holds lexical parsers that work on
-- @s@ streams with state @u@ over a monad @m@.

data TokenParser s u m =
    TokenParser {

    -- | The lexeme parser parses a legal identifier. Returns the identifier
    -- string. This parser will fail on identifiers that are reserved
    -- words. Legal identifier (start) characters and reserved words are
    -- defined in the 'LanguageDef' that is passed to 'makeTokenParser'. An
    -- @identifier@ is treated as a single token using 'try'.

      identifier :: ParsecT s u m String

    -- | The lexeme parser @reserved name@ parses @symbol
    -- name@, but it also checks that the @name@ is not a prefix of a valid
    -- identifier. A @reserved@ word is treated as a single token using
    -- 'try'.

    , reserved :: String -> ParsecT s u m ()

    -- | The lexeme parser parses a legal operator. Returns the name of the
    -- operator. This parser will fail on any operators that are reserved
    -- operators. Legal operator (start) characters and reserved operators
    -- are defined in the 'LanguageDef' that is passed to
    -- 'makeTokenParser'. An @operator@ is treated as a single token using
    -- 'try'.

    , operator :: ParsecT s u m String

    -- |The lexeme parser @reservedOp name@ parses @symbol
    -- name@, but it also checks that the @name@ is not a prefix of a valid
    -- operator. A @reservedOp@ is treated as a single token using 'try'.

    , reservedOp :: String -> ParsecT s u m ()

    -- | The lexeme parser parses a single literal character. Returns the
    -- literal character value. This parsers deals correctly with escape
    -- sequences. The literal character is parsed according to the grammar
    -- rules defined in the Haskell report (which matches most programming
    -- languages quite closely).

    , charLiteral :: ParsecT s u m Char

    -- | The lexeme parser parses a literal string. Returns the literal
    -- string value. This parsers deals correctly with escape sequences and
    -- gaps. The literal string is parsed according to the grammar rules
    -- defined in the Haskell report (which matches most programming
    -- languages quite closely).

    , stringLiteral :: ParsecT s u m String

    -- | The lexeme parser parses an integer (a whole number). This parser
    -- /does not/ parse sign. Returns the value of the number. The number
    -- can be specified in 'decimal', 'hexadecimal' or 'octal'. The number
    -- is parsed according to the grammar rules in the Haskell report.

    , integer :: ParsecT s u m Integer

    -- | This is just like 'integer', except it can parse sign.

    , integer' :: ParsecT s u m Integer

    -- | The lexeme parses a positive whole number in the decimal system.
    -- Returns the value of the number.

    , decimal :: ParsecT s u m Integer

    -- | The lexeme parses a positive whole number in the hexadecimal
    -- system. The number should be prefixed with \"0x\" or \"0X\". Returns
    -- the value of the number.

    , hexadecimal :: ParsecT s u m Integer

    -- | The lexeme parses a positive whole number in the octal system.
    -- The number should be prefixed with \"0o\" or \"0O\". Returns the
    -- value of the number.

    , octal :: ParsecT s u m Integer

    -- | @signed p@ tries to parse sign (i.e. \'+\', \'-\', or nothing) and
    -- then runs parser @p@, changing sign of its result accordingly. Note
    -- that there may be white space after the sign but not before it.

    , signed :: forall a . Num a => ParsecT s u m a -> ParsecT s u m a

    -- | The lexeme parser parses a floating point value. Returns the value
    -- of the number. The number is parsed according to the grammar rules
    -- defined in the Haskell report, sign is /not/ parsed, use 'signed' to
    -- achieve parsing of signed floating point values.

    , float :: ParsecT s u m Double

    -- | This is just like 'float', except it can parse sign.

    , float' :: ParsecT s u m Double

    -- | The lexeme parser parses either 'integer' or a 'float'.
    -- Returns the value of the number. This parser deals with any overlap
    -- in the grammar rules for integers and floats. The number is parsed
    -- according to the grammar rules defined in the Haskell report.

    , number :: ParsecT s u m (Either Integer Double)

    -- | This is just like 'number', except it can parse sign.

    , number' :: ParsecT s u m (Either Integer Double)

    -- | Lexeme parser @symbol s@ parses 'string' @s@ and skips
    -- trailing white space.

    , symbol :: String -> ParsecT s u m String

    -- | @lexeme p@ first applies parser @p@ and than the 'whiteSpace'
    -- parser, returning the value of @p@. Every lexical token (lexeme) is
    -- defined using @lexeme@, this way every parse starts at a point
    -- without white space. Parsers that use @lexeme@ are called /lexeme/
    -- parsers in this document.
    --
    -- The only point where the 'whiteSpace' parser should be called
    -- explicitly is the start of the main parser in order to skip any
    -- leading white space.

    , lexeme :: forall a. ParsecT s u m a -> ParsecT s u m a

    -- | Parses any white space. White space consists of /zero/ or more
    -- occurrences of a 'space', a line comment or a block (multi line)
    -- comment. Block comments may be nested. How comments are started and
    -- ended is defined in the 'LanguageDef' that is passed to
    -- 'makeTokenParser'.

    , whiteSpace :: ParsecT s u m ()

    -- | Lexeme parser @parens p@ parses @p@ enclosed in parenthesis,
    -- returning the value of @p@.

    , parens :: forall a. ParsecT s u m a -> ParsecT s u m a

    -- | Lexeme parser @braces p@ parses @p@ enclosed in braces (\'{\' and
    -- \'}\'), returning the value of @p@.

    , braces :: forall a. ParsecT s u m a -> ParsecT s u m a

    -- | Lexeme parser @angles p@ parses @p@ enclosed in angle brackets (\'\<\'
    -- and \'>\'), returning the value of @p@.

    , angles :: forall a. ParsecT s u m a -> ParsecT s u m a

    -- | Lexeme parser @brackets p@ parses @p@ enclosed in brackets (\'[\'
    -- and \']\'), returning the value of @p@.

    , brackets :: forall a. ParsecT s u m a -> ParsecT s u m a

    -- | Lexeme parser |semi| parses the character \';\' and skips any
    -- trailing white space. Returns the string \";\".

    , semi :: ParsecT s u m String

    -- | Lexeme parser @comma@ parses the character \',\' and skips any
    -- trailing white space. Returns the string \",\".

    , comma :: ParsecT s u m String

    -- | Lexeme parser @colon@ parses the character \':\' and skips any
    -- trailing white space. Returns the string \":\".

    , colon :: ParsecT s u m String

    -- | Lexeme parser @dot@ parses the character \'.\' and skips any
    -- trailing white space. Returns the string \".\".

    , dot :: ParsecT s u m String

    -- | Lexeme parser @semiSep p@ parses /zero/ or more occurrences of @p@
    -- separated by 'semi'. Returns a list of values returned by @p@.

    , semiSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]

    -- | Lexeme parser @semiSep1 p@ parses /one/ or more occurrences of @p@
    -- separated by 'semi'. Returns a list of values returned by @p@.

    , semiSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a]

    -- | Lexeme parser @commaSep p@ parses /zero/ or more occurrences of
    -- @p@ separated by 'comma'. Returns a list of values returned by @p@.

    , commaSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]

    -- | Lexeme parser @commaSep1 p@ parses /one/ or more occurrences of
    -- @p@ separated by 'comma'. Returns a list of values returned by @p@.

    , commaSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a] }

-- Given a LanguageDef, create a token parser

-- | The expression @makeTokenParser language@ creates a 'TokenParser'
-- record that contains lexical parsers that are defined using the
-- definitions in the @language@ record.
--
-- The use of this function is quite stylized — one imports the appropriate
-- language definition and selects the lexical parsers that are needed from
-- the resulting 'TokenParser'.
--
-- > module Main (main) where
-- >
-- > import Text.Parsec
-- > import qualified Text.Parsec.Token as Token
-- > import Text.Parsec.Language (haskellDef)
-- >
-- > -- The parser
-- > ...
-- >
-- > expr =  parens expr
-- >     <|> identifier
-- >     <|> ...
-- >
-- > -- The lexer
-- > lexer      = Token.makeTokenParser haskellDef
-- >
-- > parens     = Token.parens     lexer
-- > braces     = Token.braces     lexer
-- > identifier = Token.identifier lexer
-- > reserved   = Token.reserved   lexer
-- > ...

makeTokenParser :: Stream s m Char => LanguageDef s u m -> TokenParser s u m
makeTokenParser languageDef =
    TokenParser
    { identifier    = identifier
    , reserved      = reserved
    , operator      = operator
    , reservedOp    = reservedOp

    , charLiteral   = charLiteral
    , stringLiteral = stringLiteral

    , integer       = integer
    , integer'      = integer'
    , decimal       = decimal
    , hexadecimal   = hexadecimal
    , octal         = octal
    , signed        = signed
    , float         = float
    , float'        = float'
    , number        = number
    , number'       = number'

    , symbol        = symbol
    , lexeme        = lexeme
    , whiteSpace    = whiteSpace

    , parens        = parens
    , braces        = braces
    , angles        = angles
    , brackets      = brackets
    , semi          = semi
    , comma         = comma
    , colon         = colon
    , dot           = dot
    , semiSep       = semiSep
    , semiSep1      = semiSep1
    , commaSep      = commaSep
    , commaSep1     = commaSep1 }
    where

    -- bracketing

    parens    = between (symbol "(") (symbol ")")
    braces    = between (symbol "{") (symbol "}")
    angles    = between (symbol "<") (symbol ">")
    brackets  = between (symbol "[") (symbol "]")

    semi      = symbol ";"
    comma     = symbol ","
    dot       = symbol "."
    colon     = symbol ":"

    commaSep  = (`sepBy` comma)
    semiSep   = (`sepBy` semi)

    commaSep1 = (`sepBy1` comma)
    semiSep1  = (`sepBy1` semi)

    -- chars & strings

    charLiteral = lexeme ( between (char '\'')
                                   (char '\'' <?> "end of character")
                                   characterChar )
                  <?> "character"

    characterChar = charLetter <|> charEscape <?> "literal character"

    charEscape = char '\\' >> escapeCode
    charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))

    stringLiteral =
        lexeme ((foldr (maybe id (:)) "" <$>
                 between (char '"') (char '"' <?> "end of string")
                             (many stringChar)) <?> "literal string")

    stringChar = (Just <$> stringLetter) <|> stringEscape <?> "string character"

    stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))

    stringEscape = char '\\' >>
                   ( (escapeGap >> return Nothing)   <|>
                     (escapeEmpty >> return Nothing) <|>
                     (Just <$> escapeCode) )

    escapeEmpty = char '&'
    escapeGap   = many1 space >> char '\\' <?> "end of string gap"

    -- escape codes

    escapeCode = charEsc <|> charNum <|> charAscii <|> charControl
                 <?> "escape code"

    charEsc = choice (parseEsc <$> escMap)
        where parseEsc (c, code) = char c >> return code

    charNum = toEnum . fromInteger <$>
              ( decimal <|>
               (char 'o' >> nump "0o" octDigit) <|>
               (char 'x' >> nump "0x" hexDigit) )

    charAscii = choice (parseAscii <$> asciiMap)
        where parseAscii (asc, code) = try (string asc >> return code)

    charControl = toEnum . subtract 64 . fromEnum <$> (char '^' >> upper)

    -- escape code tables

    escMap      = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
    asciiMap    = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)

    ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
                   "FS","GS","RS","US","SP"]
    ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
                   "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
                   "CAN","SUB","ESC","DEL"]

    ascii2 = "\b\t\n\v\f\r\SO\SI\EM\FS\GS\RS\US "
    ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\a\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL"

    -- numbers — integers

    integer  = decimal <?> "integer"
    integer' = signed integer

    decimal     = lexeme $ nump "" digit
    hexadecimal = lexeme $ char '0' >> oneOf "xX" >> nump "0x" hexDigit
    octal       = lexeme $ char '0' >> oneOf "oO" >> nump "0o" octDigit

    nump prefix baseDigit = read . (prefix ++) <$> many1 baseDigit

    signed p = ($) <$> option id (lexeme sign) <*> p

    sign :: (Stream s m Char, Num a) => ParsecT s u m (a -> a)
    sign = (char '+' *> return id) <|> (char '-' *> return negate)

    -- numbers — floats

    float  = lexeme ffloat <?> "float"
    float' = signed float

    ffloat = read <$> ffloat'
      where
        ffloat' = do
          decimal <- fDec
          rest <- fraction <|> fExp
          return $ decimal ++ rest

    fraction = do
      void $ char '.'
      decimal <- fDec
      exp <- option "" fExp
      return $ '.' : decimal ++  exp

    fDec = many1 digit

    fExp = do
      expChar <- oneOf "eE"
      signStr <- option "" (pure <$> oneOf "+-")
      decimal <- fDec
      return $ expChar : signStr ++ decimal

    -- numbers — a more general case

    number  = (Right <$> try float)  <|> (Left <$> integer)  <?> "number"
    number' = (Right <$> try float') <|> (Left <$> integer') <?> "number"

    -- operators & reserved ops

    reservedOp name =
        lexeme $ try $ do
          void $ string name
          notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name)

    operator =
        lexeme $ try $ do
          name <- oper
          if isReservedOp name
          then unexpected ("reserved operator " ++ show name)
          else return name

    oper = ((:) <$> opStart languageDef <*> many (opLetter languageDef))
           <?> "operator"

    isReservedOp = isReserved . sort $ reservedOpNames languageDef

    -- identifiers & reserved words

    reserved name =
        lexeme $ try $ do
          void $ caseString name
          notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name)

    caseString name
        | caseSensitive languageDef = string name
        | otherwise                 = walk name >> return name
        where walk = foldr (\c -> ((caseChar c <?> show name) >>)) (return ())
              caseChar c
                  | isAlpha c = char (toLower c) <|> char (toUpper c)
                  | otherwise = char c

    identifier =
        lexeme $ try $ do
          name <- ident
          if isReservedName name
          then unexpected ("reserved word " ++ show name)
          else return name

    ident = ((:) <$> identStart languageDef <*> many (identLetter languageDef))
            <?> "identifier"

    isReservedName name = isReserved theReservedNames caseName
        where caseName
                  | caseSensitive languageDef = name
                  | otherwise                 = toLower <$> name

    isReserved names name = scan names
        where scan []     = False
              scan (r:rs) = case compare r name of
                              LT  -> scan rs
                              EQ  -> True
                              GT  -> False

    theReservedNames
        | caseSensitive languageDef = sort reserved
        | otherwise                 = sort . fmap (fmap toLower) $ reserved
        where reserved = reservedNames languageDef

    -- white space & symbols

    symbol = lexeme . string

    lexeme p = p <* whiteSpace

    whiteSpace
        | noLine && noMulti = skipMany (simpleSpace      <?> "")
        | noLine            = skipMany (simpleSpace      <|>
                                        multiLineComment <?> "")
        | noMulti           = skipMany (simpleSpace      <|>
                                        oneLineComment   <?> "")
        | otherwise         = skipMany (simpleSpace      <|>
                                        oneLineComment   <|>
                                        multiLineComment <?> "")
        where
          noLine  = null (commentLine languageDef)
          noMulti = null (commentStart languageDef)

    simpleSpace = skipMany1 (satisfy isSpace)

    oneLineComment = void (try (string (commentLine languageDef))
                          >> skipMany (satisfy (/= '\n')))

    multiLineComment = try (string (commentStart languageDef)) >> inComment

    inComment = if nestedComments languageDef
                then inCommentMulti
                else inCommentSingle

    inCommentMulti
        =  void (try . string $ commentEnd languageDef)
       <|> (multiLineComment            >> inCommentMulti)
       <|> (skipMany1 (noneOf startEnd) >> inCommentMulti)
       <|> (oneOf startEnd              >> inCommentMulti)
       <?> "end of comment"

    inCommentSingle
        =  void (try . string $ commentEnd languageDef)
       <|> (skipMany1 (noneOf startEnd) >> inCommentSingle)
       <|> (oneOf startEnd              >> inCommentSingle)
       <?> "end of comment"

    startEnd = nub $ (++) <$> commentEnd <*> commentStart $ languageDef
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
+								-- |
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								-- Module      :  Text.MegaParsec.Token
-												cosmetic changes in copyright (headers)

											
										
										
											2015-07-30 19:20:37 +03:00
+								-- Copyright   :  © 2015 MegaParsec contributors
 								--                © 2007 Paolo Martini
 								--                © 1999–2001 Daan Leijen
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								-- License     :  BSD3
 								--
 								-- Maintainer  :  Mark Karpov <markkarpov@opmbx.org>
-												refactoring, phase 2

											
										
										
											2015-07-29 11:38:32 +03:00
+								-- Stability   :  experimental
-												More accurate extension pragmas

											
										
										
											2008-01-20 09:39:18 +03:00
+								-- Portability :  non-portable (uses local universal quantification: PolymorphicComponents)
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								--
-												Tweaking Text.Parsec.Token's documentation

											
										
										
											2008-01-22 08:25:34 +03:00
+								-- A helper module to parse lexical elements (tokens). See 'makeTokenParser'
 								-- for a description of how to use it.
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												Clean most warnings

											
										
										
											2008-02-13 07:32:24 +03:00
+								{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								module Text.MegaParsec.Token
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								    ( LanguageDef (..)
 								    , TokenParser (..)
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    , makeTokenParser )
 								where
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								import Data.Char (isAlpha, toLower, toUpper, isSpace)
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								import Data.List (nub, sort)
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								import Control.Monad (void)
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
 								import Text.MegaParsec.Prim
 								import Text.MegaParsec.Char
 								import Text.MegaParsec.Combinator
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- Language definition
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								-- | The @LanguageDef@ type is a record that contains all parameterizable
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- features of the "Text.Parsec.Token" module. The module
 								-- "Text.Parsec.Language" contains some default definitions.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								data LanguageDef s u m =
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    LanguageDef {
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								    -- | Describes the start of a block comment. Use the empty string if the
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- language doesn't support block comments. For example \"\/*\".
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								      commentStart :: String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
 								    -- | Describes the end of a block comment. Use the empty string if the
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- language doesn't support block comments. For example \"*\/\".
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , commentEnd :: String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
 								    -- | Describes the start of a line comment. Use the empty string if the
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- language doesn't support line comments. For example \"\/\/\".
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , commentLine :: String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- | Set to 'True' if the language supports nested block comments.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , nestedComments :: Bool
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
 								    -- | This parser should accept any start characters of identifiers. For
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- example @letter \<|> char \'_\'@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , identStart :: ParsecT s u m Char
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
 								    -- | This parser should accept any legal tail characters of identifiers.
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- For example @alphaNum \<|> char \'_\'@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , identLetter :: ParsecT s u m Char
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
 								    -- | This parser should accept any start characters of operators. For
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- example @oneOf \":!#$%&*+.\/\<=>?\@\\\\^|-~\"@
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , opStart :: ParsecT s u m Char
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
 								    -- | This parser should accept any legal tail characters of operators.
 								    -- Note that this parser should even be defined if the language doesn't
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- support user-defined operators, or otherwise the 'reservedOp' parser
 								    -- won't work correctly.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , opLetter :: ParsecT s u m Char
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- | The list of reserved identifiers.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , reservedNames :: [String]
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- | The list of reserved operators.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , reservedOpNames :: [String]
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								    -- | Set to 'True' if the language is case sensitive.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , caseSensitive :: Bool }
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								-- Token parser
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
 								-- | The type of the record that holds lexical parsers that work on
 								-- @s@ streams with state @u@ over a monad @m@.
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								data TokenParser s u m =
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    TokenParser {
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parser parses a legal identifier. Returns the identifier
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- string. This parser will fail on identifiers that are reserved
 								    -- words. Legal identifier (start) characters and reserved words are
 								    -- defined in the 'LanguageDef' that is passed to 'makeTokenParser'. An
 								    -- @identifier@ is treated as a single token using 'try'.
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								      identifier :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | The lexeme parser @reserved name@ parses @symbol
 								    -- name@, but it also checks that the @name@ is not a prefix of a valid
 								    -- identifier. A @reserved@ word is treated as a single token using
 								    -- 'try'.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , reserved :: String -> ParsecT s u m ()
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parser parses a legal operator. Returns the name of the
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- operator. This parser will fail on any operators that are reserved
 								    -- operators. Legal operator (start) characters and reserved operators
 								    -- are defined in the 'LanguageDef' that is passed to
 								    -- 'makeTokenParser'. An @operator@ is treated as a single token using
 								    -- 'try'.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , operator :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- |The lexeme parser @reservedOp name@ parses @symbol
 								    -- name@, but it also checks that the @name@ is not a prefix of a valid
 								    -- operator. A @reservedOp@ is treated as a single token using 'try'.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , reservedOp :: String -> ParsecT s u m ()
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parser parses a single literal character. Returns the
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- literal character value. This parsers deals correctly with escape
 								    -- sequences. The literal character is parsed according to the grammar
 								    -- rules defined in the Haskell report (which matches most programming
 								    -- languages quite closely).
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , charLiteral :: ParsecT s u m Char
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parser parses a literal string. Returns the literal
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- string value. This parsers deals correctly with escape sequences and
 								    -- gaps. The literal string is parsed according to the grammar rules
 								    -- defined in the Haskell report (which matches most programming
 								    -- languages quite closely).
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , stringLiteral :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parser parses an integer (a whole number). This parser
 								    -- /does not/ parse sign. Returns the value of the number. The number
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- can be specified in 'decimal', 'hexadecimal' or 'octal'. The number
 								    -- is parsed according to the grammar rules in the Haskell report.
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , integer :: ParsecT s u m Integer
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | This is just like 'integer', except it can parse sign.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    , integer' :: ParsecT s u m Integer
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parses a positive whole number in the decimal system.
 								    -- Returns the value of the number.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , decimal :: ParsecT s u m Integer
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parses a positive whole number in the hexadecimal
 								    -- system. The number should be prefixed with \"0x\" or \"0X\". Returns
 								    -- the value of the number.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , hexadecimal :: ParsecT s u m Integer
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | The lexeme parses a positive whole number in the octal system.
 								    -- The number should be prefixed with \"0o\" or \"0O\". Returns the
 								    -- value of the number.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , octal :: ParsecT s u m Integer
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- | @signed p@ tries to parse sign (i.e. \'+\', \'-\', or nothing) and
 								    -- then runs parser @p@, changing sign of its result accordingly. Note
 								    -- that there may be white space after the sign but not before it.
 								    , signed :: forall a . Num a => ParsecT s u m a -> ParsecT s u m a
 								    -- | The lexeme parser parses a floating point value. Returns the value
 								    -- of the number. The number is parsed according to the grammar rules
 								    -- defined in the Haskell report, sign is /not/ parsed, use 'signed' to
 								    -- achieve parsing of signed floating point values.
 								    , float :: ParsecT s u m Double
 								    -- | This is just like 'float', except it can parse sign.
 								    , float' :: ParsecT s u m Double
 								    -- | The lexeme parser parses either 'integer' or a 'float'.
 								    -- Returns the value of the number. This parser deals with any overlap
 								    -- in the grammar rules for integers and floats. The number is parsed
 								    -- according to the grammar rules defined in the Haskell report.
 								    , number :: ParsecT s u m (Either Integer Double)
 								    -- | This is just like 'number', except it can parse sign.
 								    , number' :: ParsecT s u m (Either Integer Double)
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @symbol s@ parses 'string' @s@ and skips
 								    -- trailing white space.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , symbol :: String -> ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | @lexeme p@ first applies parser @p@ and than the 'whiteSpace'
 								    -- parser, returning the value of @p@. Every lexical token (lexeme) is
 								    -- defined using @lexeme@, this way every parse starts at a point
 								    -- without white space. Parsers that use @lexeme@ are called /lexeme/
 								    -- parsers in this document.
 								    --
 								    -- The only point where the 'whiteSpace' parser should be called
 								    -- explicitly is the start of the main parser in order to skip any
 								    -- leading white space.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , lexeme :: forall a. ParsecT s u m a -> ParsecT s u m a
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Parses any white space. White space consists of /zero/ or more
 								    -- occurrences of a 'space', a line comment or a block (multi line)
 								    -- comment. Block comments may be nested. How comments are started and
 								    -- ended is defined in the 'LanguageDef' that is passed to
 								    -- 'makeTokenParser'.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , whiteSpace :: ParsecT s u m ()
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @parens p@ parses @p@ enclosed in parenthesis,
 								    -- returning the value of @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , parens :: forall a. ParsecT s u m a -> ParsecT s u m a
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @braces p@ parses @p@ enclosed in braces (\'{\' and
 								    -- \'}\'), returning the value of @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , braces :: forall a. ParsecT s u m a -> ParsecT s u m a
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @angles p@ parses @p@ enclosed in angle brackets (\'\<\'
 								    -- and \'>\'), returning the value of @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , angles :: forall a. ParsecT s u m a -> ParsecT s u m a
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @brackets p@ parses @p@ enclosed in brackets (\'[\'
 								    -- and \']\'), returning the value of @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , brackets :: forall a. ParsecT s u m a -> ParsecT s u m a
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser |semi| parses the character \';\' and skips any
 								    -- trailing white space. Returns the string \";\".
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , semi :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @comma@ parses the character \',\' and skips any
 								    -- trailing white space. Returns the string \",\".
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , comma :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @colon@ parses the character \':\' and skips any
 								    -- trailing white space. Returns the string \":\".
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , colon :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @dot@ parses the character \'.\' and skips any
 								    -- trailing white space. Returns the string \".\".
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , dot :: ParsecT s u m String
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @semiSep p@ parses /zero/ or more occurrences of @p@
 								    -- separated by 'semi'. Returns a list of values returned by @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , semiSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @semiSep1 p@ parses /one/ or more occurrences of @p@
 								    -- separated by 'semi'. Returns a list of values returned by @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , semiSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a]
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @commaSep p@ parses /zero/ or more occurrences of
 								    -- @p@ separated by 'comma'. Returns a list of values returned by @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , commaSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- | Lexeme parser @commaSep1 p@ parses /one/ or more occurrences of
 								    -- @p@ separated by 'comma'. Returns a list of values returned by @p@.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    , commaSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a] }
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- Given a LanguageDef, create a token parser
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								-- | The expression @makeTokenParser language@ creates a 'TokenParser'
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- record that contains lexical parsers that are defined using the
 								-- definitions in the @language@ record.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								--
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- The use of this function is quite stylized — one imports the appropriate
 								-- language definition and selects the lexical parsers that are needed from
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								-- the resulting 'TokenParser'.
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								--
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > module Main (main) where
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								-- >
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > import Text.Parsec
 								-- > import qualified Text.Parsec.Token as Token
 								-- > import Text.Parsec.Language (haskellDef)
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								-- >
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > -- The parser
 								-- > ...
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								-- >
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > expr =  parens expr
 								-- >     <|> identifier
 								-- >     <|> ...
-												Haddock documentation for Text.Parsec.Token

											
										
										
											2008-01-22 08:14:30 +03:00
+								-- >
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > -- The lexer
 								-- > lexer      = Token.makeTokenParser haskellDef
-												refactoring, phase 1

											
										
										
											2015-07-28 16:32:19 +03:00
+								-- >
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								-- > parens     = Token.parens     lexer
 								-- > braces     = Token.braces     lexer
 								-- > identifier = Token.identifier lexer
 								-- > reserved   = Token.reserved   lexer
 								-- > ...
-												improved documentation, fixes #1

											
										
										
											2015-07-30 21:36:54 +03:00
+								makeTokenParser :: Stream s m Char => LanguageDef s u m -> TokenParser s u m
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								makeTokenParser languageDef =
 								    TokenParser
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    { identifier    = identifier
 								    , reserved      = reserved
 								    , operator      = operator
 								    , reservedOp    = reservedOp
 								    , charLiteral   = charLiteral
 								    , stringLiteral = stringLiteral
 								    , integer       = integer
 								    , integer'      = integer'
 								    , decimal       = decimal
 								    , hexadecimal   = hexadecimal
 								    , octal         = octal
 								    , signed        = signed
 								    , float         = float
 								    , float'        = float'
 								    , number        = number
 								    , number'       = number'
 								    , symbol        = symbol
 								    , lexeme        = lexeme
 								    , whiteSpace    = whiteSpace
 								    , parens        = parens
 								    , braces        = braces
 								    , angles        = angles
 								    , brackets      = brackets
 								    , semi          = semi
 								    , comma         = comma
 								    , colon         = colon
 								    , dot           = dot
 								    , semiSep       = semiSep
 								    , semiSep1      = semiSep1
 								    , commaSep      = commaSep
 								    , commaSep1     = commaSep1 }
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
+								    where
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- bracketing
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    parens    = between (symbol "(") (symbol ")")
 								    braces    = between (symbol "{") (symbol "}")
 								    angles    = between (symbol "<") (symbol ">")
 								    brackets  = between (symbol "[") (symbol "]")
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    semi      = symbol ";"
 								    comma     = symbol ","
 								    dot       = symbol "."
 								    colon     = symbol ":"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    commaSep  = (`sepBy` comma)
 								    semiSep   = (`sepBy` semi)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    commaSep1 = (`sepBy1` comma)
 								    semiSep1  = (`sepBy1` semi)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- chars & strings
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    charLiteral = lexeme ( between (char '\'')
 								                                   (char '\'' <?> "end of character")
 								                                   characterChar )
 								                  <?> "character"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    characterChar = charLetter <|> charEscape <?> "literal character"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    charEscape = char '\\' >> escapeCode
 								    charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    stringLiteral =
 								        lexeme ((foldr (maybe id (:)) "" <$>
 								                 between (char '"') (char '"' <?> "end of string")
 								                             (many stringChar)) <?> "literal string")
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    stringChar = (Just <$> stringLetter) <|> stringEscape <?> "string character"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    stringEscape = char '\\' >>
 								                   ( (escapeGap >> return Nothing)   <|>
 								                     (escapeEmpty >> return Nothing) <|>
 								                     (Just <$> escapeCode) )
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    escapeEmpty = char '&'
 								    escapeGap   = many1 space >> char '\\' <?> "end of string gap"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
 								    -- escape codes
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    escapeCode = charEsc <|> charNum <|> charAscii <|> charControl
 								                 <?> "escape code"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    charEsc = choice (parseEsc <$> escMap)
 								        where parseEsc (c, code) = char c >> return code
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    charNum = toEnum . fromInteger <$>
 								              ( decimal <|>
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								               (char 'o' >> nump "0o" octDigit) <|>
 								               (char 'x' >> nump "0x" hexDigit) )
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    charAscii = choice (parseAscii <$> asciiMap)
 								        where parseAscii (asc, code) = try (string asc >> return code)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    charControl = toEnum . subtract 64 . fromEnum <$> (char '^' >> upper)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
+								    -- escape code tables
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    escMap      = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
 								    asciiMap    = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
 								                   "FS","GS","RS","US","SP"]
 								    ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
 								                   "DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
 								                   "CAN","SUB","ESC","DEL"]
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    ascii2 = "\b\t\n\v\f\r\SO\SI\EM\FS\GS\RS\US "
 								    ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\a\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- numbers — integers
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												remove word ‘unsigned’ from descriptions

											
										
										
											2015-07-31 22:31:31 +03:00
+								    integer  = decimal <?> "integer"
 								    integer' = signed integer
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    decimal     = lexeme $ nump "" digit
 								    hexadecimal = lexeme $ char '0' >> oneOf "xX" >> nump "0x" hexDigit
 								    octal       = lexeme $ char '0' >> oneOf "oO" >> nump "0o" octDigit
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    nump prefix baseDigit = read . (prefix ++) <$> many1 baseDigit
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    signed p = ($) <$> option id (lexeme sign) <*> p
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    sign :: (Stream s m Char, Num a) => ParsecT s u m (a -> a)
 								    sign = (char '+' *> return id) <|> (char '-' *> return negate)
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- numbers — floats
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
-												remove word ‘unsigned’ from descriptions

											
										
										
											2015-07-31 22:31:31 +03:00
+								    float  = lexeme ffloat <?> "float"
 								    float' = signed float
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												Remove `try` in float parser.

According to this post
(http://blog.ezyang.com/2014/05/parsec-try-a-or-b-considered-harmful/),
the error message could be confusing if using `try`. `"3.a"` is one case
to expose this behavior.

											
										
										
											2015-08-01 00:44:26 +03:00
+								    ffloat = read <$> ffloat'
 								      where
 								        ffloat' = do
 								          decimal <- fDec
 								          rest <- fraction <|> fExp
 								          return $ decimal ++ rest
 								    fraction = do
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								      void $ char '.'
-												Remove `try` in float parser.

According to this post
(http://blog.ezyang.com/2014/05/parsec-try-a-or-b-considered-harmful/),
the error message could be confusing if using `try`. `"3.a"` is one case
to expose this behavior.

											
										
										
											2015-08-01 00:44:26 +03:00
+								      decimal <- fDec
 								      exp <- option "" fExp
 								      return $ '.' : decimal ++  exp
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    fDec = many1 digit
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    fExp = do
 								      expChar <- oneOf "eE"
 								      signStr <- option "" (pure <$> oneOf "+-")
 								      decimal <- fDec
 								      return $ expChar : signStr ++ decimal
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    -- numbers — a more general case
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												remove word ‘unsigned’ from descriptions

											
										
										
											2015-07-31 22:31:31 +03:00
+								    number  = (Right <$> try float)  <|> (Left <$> integer)  <?> "number"
-												rewritten parsing of numbers, fixes #2 and #3

Changed how numbers are parsed because they were parsed in a naïf and
hairy way. Added tests for #2 and #3 (in old Parsec project these are
number 35 and 39 respectively).

* Since Haskell report doesn't say anything about sign, I've made
  ‘integer’ and ‘float’ parse numbers without sign.

* Removed ‘natural’ parser, it's equal to new ‘integer’ now.

* Renamed ‘naturalOrFloat’ → ‘number’ — this doesn't parse sign too.

* Added new combinator ‘signed’ to parse all sorts of signed numbers.

* For the sake of convenience I've added ‘integer'’, ‘float'’, and
 ‘number'’ combinators that also can parse signed numbers out of box.

											
										
										
											2015-07-31 14:30:38 +03:00
+								    number' = (Right <$> try float') <|> (Left <$> integer') <?> "number"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- operators & reserved ops
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    reservedOp name =
 								        lexeme $ try $ do
 								          void $ string name
 								          notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    operator =
 								        lexeme $ try $ do
 								          name <- oper
 								          if isReservedOp name
 								          then unexpected ("reserved operator " ++ show name)
 								          else return name
 								    oper = ((:) <$> opStart languageDef <*> many (opLetter languageDef))
 								           <?> "operator"
 								    isReservedOp = isReserved . sort $ reservedOpNames languageDef
 								    -- identifiers & reserved words
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    reserved name =
 								        lexeme $ try $ do
 								          void $ caseString name
 								          notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name)
 								    caseString name
 								        | caseSensitive languageDef = string name
 								        | otherwise                 = walk name >> return name
 								        where walk = foldr (\c -> ((caseChar c <?> show name) >>)) (return ())
 								              caseChar c
 								                  | isAlpha c = char (toLower c) <|> char (toUpper c)
 								                  | otherwise = char c
 								    identifier =
 								        lexeme $ try $ do
 								          name <- ident
 								          if isReservedName name
 								          then unexpected ("reserved word " ++ show name)
 								          else return name
 								    ident = ((:) <$> identStart languageDef <*> many (identLetter languageDef))
 								            <?> "identifier"
 								    isReservedName name = isReserved theReservedNames caseName
 								        where caseName
 								                  | caseSensitive languageDef = name
 								                  | otherwise                 = toLower <$> name
 								    isReserved names name = scan names
 								        where scan []     = False
 								              scan (r:rs) = case compare r name of
 								                              LT  -> scan rs
 								                              EQ  -> True
 								                              GT  -> False
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    theReservedNames
 								        | caseSensitive languageDef = sort reserved
 								        | otherwise                 = sort . fmap (fmap toLower) $ reserved
 								        where reserved = reservedNames languageDef
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    -- white space & symbols
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    symbol = lexeme . string
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    lexeme p = p <* whiteSpace
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
 								    whiteSpace
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								        | noLine && noMulti = skipMany (simpleSpace      <?> "")
 								        | noLine            = skipMany (simpleSpace      <|>
 								                                        multiLineComment <?> "")
 								        | noMulti           = skipMany (simpleSpace      <|>
 								                                        oneLineComment   <?> "")
 								        | otherwise         = skipMany (simpleSpace      <|>
 								                                        oneLineComment   <|>
 								                                        multiLineComment <?> "")
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
+								        where
 								          noLine  = null (commentLine languageDef)
 								          noMulti = null (commentStart languageDef)
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    simpleSpace = skipMany1 (satisfy isSpace)
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    oneLineComment = void (try (string (commentLine languageDef))
 								                          >> skipMany (satisfy (/= '\n')))
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    multiLineComment = try (string (commentStart languageDef)) >> inComment
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								    inComment = if nestedComments languageDef
 								                then inCommentMulti
 								                else inCommentSingle
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
 								    inCommentMulti
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								        =  void (try . string $ commentEnd languageDef)
 								       <|> (multiLineComment            >> inCommentMulti)
 								       <|> (skipMany1 (noneOf startEnd) >> inCommentMulti)
 								       <|> (oneOf startEnd              >> inCommentMulti)
 								       <?> "end of comment"
-												Initial import

											
										
										
											2008-01-13 20:53:15 +03:00
 								    inCommentSingle
-												refactoring, phase 3

											
										
										
											2015-07-30 18:45:06 +03:00
+								        =  void (try . string $ commentEnd languageDef)
 								       <|> (skipMany1 (noneOf startEnd) >> inCommentSingle)
 								       <|> (oneOf startEnd              >> inCommentSingle)
 								       <?> "end of comment"
 								    startEnd = nub $ (++) <$> commentEnd <*> commentStart $ languageDef