mirror of
https://github.com/mrkkrp/megaparsec.git
synced 2024-12-24 16:51:38 +03:00
cosmetic changes (indentation, etc)
This commit is contained in:
parent
77a54394b5
commit
287a777e6c
@ -40,114 +40,113 @@
|
||||
-- imported explicitly along with two modules mentioned above.
|
||||
|
||||
module Text.Megaparsec
|
||||
(
|
||||
-- * Parsers
|
||||
ParsecT
|
||||
, Parsec
|
||||
, token
|
||||
, tokens
|
||||
, runParserT
|
||||
, runParser
|
||||
, parse
|
||||
, parseMaybe
|
||||
, parseTest
|
||||
, getPosition
|
||||
, getInput
|
||||
, getState
|
||||
, putState
|
||||
, modifyState
|
||||
-- * Combinators
|
||||
, (A.<|>)
|
||||
-- $assocbo
|
||||
, A.many
|
||||
-- $many
|
||||
, A.some
|
||||
-- $some
|
||||
, A.optional
|
||||
-- $optional
|
||||
, (<?>)
|
||||
, label
|
||||
, try
|
||||
, unexpected
|
||||
, choice
|
||||
, skipMany
|
||||
, skipSome
|
||||
, count
|
||||
, between
|
||||
, option
|
||||
, optionMaybe
|
||||
, sepBy
|
||||
, sepBy1
|
||||
, endBy
|
||||
, endBy1
|
||||
, sepEndBy
|
||||
, sepEndBy1
|
||||
, chainl
|
||||
, chainl1
|
||||
, chainr
|
||||
, chainr1
|
||||
, eof
|
||||
, notFollowedBy
|
||||
, manyTill
|
||||
, lookAhead
|
||||
, anyToken
|
||||
-- * Character parsing
|
||||
, newline
|
||||
, crlf
|
||||
, eol
|
||||
, tab
|
||||
, space
|
||||
, controlChar
|
||||
, spaceChar
|
||||
, upperChar
|
||||
, lowerChar
|
||||
, letterChar
|
||||
, alphaNumChar
|
||||
, printChar
|
||||
, digitChar
|
||||
, octDigitChar
|
||||
, hexDigitChar
|
||||
, markChar
|
||||
, numberChar
|
||||
, punctuationChar
|
||||
, symbolChar
|
||||
, separatorChar
|
||||
, asciiChar
|
||||
, latin1Char
|
||||
, charCategory
|
||||
, char
|
||||
, anyChar
|
||||
, oneOf
|
||||
, noneOf
|
||||
, satisfy
|
||||
, string
|
||||
-- * Error messages
|
||||
, Message (..)
|
||||
, messageString
|
||||
, badMessage
|
||||
, ParseError
|
||||
, errorPos
|
||||
, errorMessages
|
||||
, errorIsUnknown
|
||||
-- * Position
|
||||
, SourcePos
|
||||
, SourceName
|
||||
, Line
|
||||
, Column
|
||||
, sourceName
|
||||
, sourceLine
|
||||
, sourceColumn
|
||||
-- * Low-level operations
|
||||
, Stream (..)
|
||||
, Consumed (..)
|
||||
, Reply (..)
|
||||
, State (..)
|
||||
, tokenPrim
|
||||
, getParserState
|
||||
, setParserState
|
||||
, updateParserState
|
||||
, setPosition
|
||||
, setInput )
|
||||
( -- * Parsers
|
||||
ParsecT
|
||||
, Parsec
|
||||
, token
|
||||
, tokens
|
||||
, runParserT
|
||||
, runParser
|
||||
, parse
|
||||
, parseMaybe
|
||||
, parseTest
|
||||
, getPosition
|
||||
, getInput
|
||||
, getState
|
||||
, putState
|
||||
, modifyState
|
||||
-- * Combinators
|
||||
, (A.<|>)
|
||||
-- $assocbo
|
||||
, A.many
|
||||
-- $many
|
||||
, A.some
|
||||
-- $some
|
||||
, A.optional
|
||||
-- $optional
|
||||
, (<?>)
|
||||
, label
|
||||
, try
|
||||
, unexpected
|
||||
, choice
|
||||
, skipMany
|
||||
, skipSome
|
||||
, count
|
||||
, between
|
||||
, option
|
||||
, optionMaybe
|
||||
, sepBy
|
||||
, sepBy1
|
||||
, endBy
|
||||
, endBy1
|
||||
, sepEndBy
|
||||
, sepEndBy1
|
||||
, chainl
|
||||
, chainl1
|
||||
, chainr
|
||||
, chainr1
|
||||
, eof
|
||||
, notFollowedBy
|
||||
, manyTill
|
||||
, lookAhead
|
||||
, anyToken
|
||||
-- * Character parsing
|
||||
, newline
|
||||
, crlf
|
||||
, eol
|
||||
, tab
|
||||
, space
|
||||
, controlChar
|
||||
, spaceChar
|
||||
, upperChar
|
||||
, lowerChar
|
||||
, letterChar
|
||||
, alphaNumChar
|
||||
, printChar
|
||||
, digitChar
|
||||
, octDigitChar
|
||||
, hexDigitChar
|
||||
, markChar
|
||||
, numberChar
|
||||
, punctuationChar
|
||||
, symbolChar
|
||||
, separatorChar
|
||||
, asciiChar
|
||||
, latin1Char
|
||||
, charCategory
|
||||
, char
|
||||
, anyChar
|
||||
, oneOf
|
||||
, noneOf
|
||||
, satisfy
|
||||
, string
|
||||
-- * Error messages
|
||||
, Message (..)
|
||||
, messageString
|
||||
, badMessage
|
||||
, ParseError
|
||||
, errorPos
|
||||
, errorMessages
|
||||
, errorIsUnknown
|
||||
-- * Position
|
||||
, SourcePos
|
||||
, SourceName
|
||||
, Line
|
||||
, Column
|
||||
, sourceName
|
||||
, sourceLine
|
||||
, sourceColumn
|
||||
-- * Low-level operations
|
||||
, Stream (..)
|
||||
, Consumed (..)
|
||||
, Reply (..)
|
||||
, State (..)
|
||||
, tokenPrim
|
||||
, getParserState
|
||||
, setParserState
|
||||
, updateParserState
|
||||
, setPosition
|
||||
, setInput )
|
||||
where
|
||||
|
||||
import qualified Control.Applicative as A
|
||||
|
@ -11,9 +11,9 @@
|
||||
-- Convenience definitions for working with 'C.ByteString'.
|
||||
|
||||
module Text.Megaparsec.ByteString
|
||||
( Parser
|
||||
, GenParser
|
||||
, parseFromFile )
|
||||
( Parser
|
||||
, GenParser
|
||||
, parseFromFile )
|
||||
where
|
||||
|
||||
import Text.Megaparsec.Error
|
||||
|
@ -11,9 +11,9 @@
|
||||
-- Convenience definitions for working with lazy 'C.ByteString'.
|
||||
|
||||
module Text.Megaparsec.ByteString.Lazy
|
||||
( Parser
|
||||
, GenParser
|
||||
, parseFromFile )
|
||||
( Parser
|
||||
, GenParser
|
||||
, parseFromFile )
|
||||
where
|
||||
|
||||
import Text.Megaparsec.Error
|
||||
|
@ -12,36 +12,36 @@
|
||||
-- Commonly used character parsers.
|
||||
|
||||
module Text.Megaparsec.Char
|
||||
( newline
|
||||
, crlf
|
||||
, eol
|
||||
, tab
|
||||
, space
|
||||
, controlChar
|
||||
, spaceChar
|
||||
, upperChar
|
||||
, lowerChar
|
||||
, letterChar
|
||||
, alphaNumChar
|
||||
, printChar
|
||||
, digitChar
|
||||
, octDigitChar
|
||||
, hexDigitChar
|
||||
, markChar
|
||||
, numberChar
|
||||
, punctuationChar
|
||||
, symbolChar
|
||||
, separatorChar
|
||||
, asciiChar
|
||||
, latin1Char
|
||||
, charCategory
|
||||
, categoryName
|
||||
, char
|
||||
, anyChar
|
||||
, oneOf
|
||||
, noneOf
|
||||
, satisfy
|
||||
, string )
|
||||
( newline
|
||||
, crlf
|
||||
, eol
|
||||
, tab
|
||||
, space
|
||||
, controlChar
|
||||
, spaceChar
|
||||
, upperChar
|
||||
, lowerChar
|
||||
, letterChar
|
||||
, alphaNumChar
|
||||
, printChar
|
||||
, digitChar
|
||||
, octDigitChar
|
||||
, hexDigitChar
|
||||
, markChar
|
||||
, numberChar
|
||||
, punctuationChar
|
||||
, symbolChar
|
||||
, separatorChar
|
||||
, asciiChar
|
||||
, latin1Char
|
||||
, charCategory
|
||||
, categoryName
|
||||
, char
|
||||
, anyChar
|
||||
, oneOf
|
||||
, noneOf
|
||||
, satisfy
|
||||
, string )
|
||||
where
|
||||
|
||||
import Control.Applicative ((<|>))
|
||||
@ -267,8 +267,8 @@ noneOf cs = satisfy (`notElem` cs)
|
||||
|
||||
satisfy :: Stream s m Char => (Char -> Bool) -> ParsecT s u m Char
|
||||
satisfy f = tokenPrim nextPos testChar
|
||||
where nextPos pos x _ = updatePosChar pos x
|
||||
testChar x = if f x then Just x else Nothing
|
||||
where nextPos pos x _ = updatePosChar pos x
|
||||
testChar x = if f x then Just x else Nothing
|
||||
|
||||
-- | @string s@ parses a sequence of characters given by @s@. Returns
|
||||
-- the parsed string (i.e. @s@).
|
||||
|
@ -12,28 +12,28 @@
|
||||
-- Commonly used generic combinators.
|
||||
|
||||
module Text.Megaparsec.Combinator
|
||||
( choice
|
||||
, count
|
||||
, between
|
||||
, option
|
||||
, optionMaybe
|
||||
, skipMany
|
||||
, skipSome
|
||||
, sepBy
|
||||
, sepBy1
|
||||
, endBy
|
||||
, endBy1
|
||||
, sepEndBy
|
||||
, sepEndBy1
|
||||
, chainl
|
||||
, chainl1
|
||||
, chainr
|
||||
, chainr1
|
||||
, eof
|
||||
, notFollowedBy
|
||||
, manyTill
|
||||
, lookAhead
|
||||
, anyToken )
|
||||
( choice
|
||||
, count
|
||||
, between
|
||||
, option
|
||||
, optionMaybe
|
||||
, skipMany
|
||||
, skipSome
|
||||
, sepBy
|
||||
, sepBy1
|
||||
, endBy
|
||||
, endBy1
|
||||
, sepEndBy
|
||||
, sepEndBy1
|
||||
, chainl
|
||||
, chainl1
|
||||
, chainr
|
||||
, chainr1
|
||||
, eof
|
||||
, notFollowedBy
|
||||
, manyTill
|
||||
, lookAhead
|
||||
, anyToken )
|
||||
where
|
||||
|
||||
import Control.Applicative ((<|>), many, some)
|
||||
@ -64,13 +64,6 @@ option x p = p <|> return x
|
||||
optionMaybe :: Stream s m t => ParsecT s u m a -> ParsecT s u m (Maybe a)
|
||||
optionMaybe p = option Nothing (Just <$> p)
|
||||
|
||||
-- -- | @optional p@ tries to apply parser @p@. It will parse @p@ or nothing.
|
||||
-- -- It only fails if @p@ fails after consuming input. It discards the result
|
||||
-- -- of @p@.
|
||||
|
||||
-- optional :: Stream s m t => ParsecT s u m a -> ParsecT s u m ()
|
||||
-- optional p = (p *> return ()) <|> return ()
|
||||
|
||||
-- | @between open close p@ parses @open@, followed by @p@ and @close@.
|
||||
-- Returns the value returned by @p@.
|
||||
--
|
||||
@ -190,7 +183,7 @@ chainl p op x = chainl1 p op <|> return x
|
||||
chainl1 :: Stream s m t =>
|
||||
ParsecT s u m a -> ParsecT s u m (a -> a -> a) -> ParsecT s u m a
|
||||
chainl1 p op = p >>= rest
|
||||
where rest x = ((($ x) <$> op <*> p) >>= rest) <|> return x
|
||||
where rest x = ((($ x) <$> op <*> p) >>= rest) <|> return x
|
||||
|
||||
-- | @chainr1 p op@ parses /one/ or more occurrences of |p|,
|
||||
-- separated by @op@ Returns a value obtained by a /right/ associative
|
||||
@ -200,7 +193,7 @@ chainl1 p op = p >>= rest
|
||||
chainr1 :: Stream s m t =>
|
||||
ParsecT s u m a -> ParsecT s u m (a -> a -> a) -> ParsecT s u m a
|
||||
chainr1 p op = p >>= rest
|
||||
where rest x = (($ x) <$> op <*> chainr1 p op) <|> return x
|
||||
where rest x = (($ x) <$> op <*> chainr1 p op) <|> return x
|
||||
|
||||
-- | This parser only succeeds at the end of the input. This is not a
|
||||
-- primitive parser but it is defined using 'notFollowedBy'.
|
||||
|
@ -12,20 +12,20 @@
|
||||
-- Parse errors.
|
||||
|
||||
module Text.Megaparsec.Error
|
||||
( Message (..)
|
||||
, messageString
|
||||
, badMessage
|
||||
, ParseError
|
||||
, errorPos
|
||||
, errorMessages
|
||||
, errorIsUnknown
|
||||
, newErrorMessage
|
||||
, newErrorUnknown
|
||||
, addErrorMessage
|
||||
, setErrorMessage
|
||||
, setErrorPos
|
||||
, mergeError
|
||||
, showMessages )
|
||||
( Message (..)
|
||||
, messageString
|
||||
, badMessage
|
||||
, ParseError
|
||||
, errorPos
|
||||
, errorMessages
|
||||
, errorIsUnknown
|
||||
, newErrorMessage
|
||||
, newErrorUnknown
|
||||
, addErrorMessage
|
||||
, setErrorMessage
|
||||
, setErrorPos
|
||||
, mergeError
|
||||
, showMessages )
|
||||
where
|
||||
|
||||
import Data.Bool (bool)
|
||||
@ -43,27 +43,27 @@ import Text.Megaparsec.Pos
|
||||
-- The fine distinction between different kinds of parse errors allows the
|
||||
-- system to generate quite good error messages for the user.
|
||||
|
||||
data Message = Unexpected !String
|
||||
| Expected !String
|
||||
| Message !String
|
||||
deriving Show
|
||||
data Message
|
||||
= Unexpected !String
|
||||
| Expected !String
|
||||
| Message !String
|
||||
deriving Show
|
||||
|
||||
instance Enum Message where
|
||||
fromEnum (Unexpected _) = 0
|
||||
fromEnum (Expected _) = 1
|
||||
fromEnum (Message _) = 2
|
||||
toEnum _ = error "Text.Megaparsec.Error: toEnum is undefined for Message"
|
||||
fromEnum (Unexpected _) = 0
|
||||
fromEnum (Expected _) = 1
|
||||
fromEnum (Message _) = 2
|
||||
toEnum _ = error "Text.Megaparsec.Error: toEnum is undefined for Message"
|
||||
|
||||
instance Eq Message where
|
||||
m1 == m2 =
|
||||
fromEnum m1 == fromEnum m2 && messageString m1 == messageString m2
|
||||
m1 == m2 = fromEnum m1 == fromEnum m2 && messageString m1 == messageString m2
|
||||
|
||||
instance Ord Message where
|
||||
compare m1 m2 =
|
||||
case compare (fromEnum m1) (fromEnum m2) of
|
||||
LT -> LT
|
||||
EQ -> compare (messageString m1) (messageString m2)
|
||||
GT -> GT
|
||||
compare m1 m2 =
|
||||
case compare (fromEnum m1) (fromEnum m2) of
|
||||
LT -> LT
|
||||
EQ -> compare (messageString m1) (messageString m2)
|
||||
GT -> GT
|
||||
|
||||
-- | Extract the message string from an error message.
|
||||
|
||||
@ -84,16 +84,16 @@ badMessage = null . messageString
|
||||
-- 'Eq' type classes.
|
||||
|
||||
data ParseError = ParseError
|
||||
{ -- | Extract the source position from @ParseError@.
|
||||
errorPos :: !SourcePos
|
||||
-- | Extract the list of error messages from @ParseError@.
|
||||
, errorMessages :: [Message] }
|
||||
{ -- | Extract the source position from @ParseError@.
|
||||
errorPos :: !SourcePos
|
||||
-- | Extract the list of error messages from @ParseError@.
|
||||
, errorMessages :: [Message] }
|
||||
|
||||
instance Show ParseError where
|
||||
show e = show (errorPos e) ++ ":\n" ++ showMessages (errorMessages e)
|
||||
show e = show (errorPos e) ++ ":\n" ++ showMessages (errorMessages e)
|
||||
|
||||
instance Eq ParseError where
|
||||
l == r = errorPos l == errorPos r && errorMessages l == errorMessages r
|
||||
l == r = errorPos l == errorPos r && errorMessages l == errorMessages r
|
||||
|
||||
-- | Test whether given @ParseError@ has associated collection of error
|
||||
-- messages. Return @True@ if it has none and @False@ otherwise.
|
||||
@ -122,9 +122,9 @@ newErrorMessage m pos = ParseError pos $ bool [m] [] (badMessage m)
|
||||
|
||||
addErrorMessage :: Message -> ParseError -> ParseError
|
||||
addErrorMessage m (ParseError pos ms) =
|
||||
ParseError pos $ bool (pre ++ [m] ++ post) ms (badMessage m)
|
||||
where pre = filter (< m) ms
|
||||
post = filter (> m) ms
|
||||
ParseError pos $ bool (pre ++ [m] ++ post) ms (badMessage m)
|
||||
where pre = filter (< m) ms
|
||||
post = filter (> m) ms
|
||||
|
||||
-- | @setErrorMessage m err@ returns @err@ with message @m@ added. This
|
||||
-- function also deletes all existing error messages that were created with
|
||||
@ -133,8 +133,8 @@ addErrorMessage m (ParseError pos ms) =
|
||||
|
||||
setErrorMessage :: Message -> ParseError -> ParseError
|
||||
setErrorMessage m e@(ParseError pos ms) =
|
||||
bool (addErrorMessage m $ ParseError pos xs) e (badMessage m)
|
||||
where xs = filter ((/= fromEnum m) . fromEnum) ms
|
||||
bool (addErrorMessage m $ ParseError pos xs) e (badMessage m)
|
||||
where xs = filter ((/= fromEnum m) . fromEnum) ms
|
||||
|
||||
-- | @setErrorPos pos err@ returns @ParseError@ identical to @err@, but with
|
||||
-- position @pos@.
|
||||
@ -147,31 +147,31 @@ setErrorPos pos (ParseError _ ms) = ParseError pos ms
|
||||
|
||||
mergeError :: ParseError -> ParseError -> ParseError
|
||||
mergeError e1@(ParseError pos1 ms1) e2@(ParseError pos2 ms2) =
|
||||
case pos1 `compare` pos2 of
|
||||
LT -> e1
|
||||
EQ -> foldr addErrorMessage (ParseError pos1 ms1) ms2
|
||||
GT -> e2
|
||||
case pos1 `compare` pos2 of
|
||||
LT -> e1
|
||||
EQ -> foldr addErrorMessage (ParseError pos1 ms1) ms2
|
||||
GT -> e2
|
||||
|
||||
-- | @showMessages ms@ transforms list of error messages @ms@ into
|
||||
-- their textual representation.
|
||||
|
||||
showMessages :: [Message] -> String
|
||||
showMessages [] = "unknown parse error"
|
||||
showMessages ms = intercalate "\n" $
|
||||
filter (not . null) [unexpected', expected', messages']
|
||||
where (unexpected, ms') = span ((== 0) . fromEnum) ms
|
||||
(expected, messages) = span ((== 1) . fromEnum) ms'
|
||||
showMessages ms =
|
||||
intercalate "\n" $ filter (not . null) [unexpected', expected', messages']
|
||||
where (unexpected, ms') = span ((== 0) . fromEnum) ms
|
||||
(expected, messages) = span ((== 1) . fromEnum) ms'
|
||||
|
||||
unexpected' = showMany "unexpected " unexpected
|
||||
expected' = showMany "expecting " expected
|
||||
messages' = showMany "" messages
|
||||
unexpected' = showMany "unexpected " unexpected
|
||||
expected' = showMany "expecting " expected
|
||||
messages' = showMany "" messages
|
||||
|
||||
showMany pre msgs =
|
||||
case messageString <$> msgs of
|
||||
[] -> ""
|
||||
xs | null pre -> commasOr xs
|
||||
| otherwise -> pre ++ commasOr xs
|
||||
showMany pre msgs =
|
||||
case messageString <$> msgs of
|
||||
[] -> ""
|
||||
xs | null pre -> commasOr xs
|
||||
| otherwise -> pre ++ commasOr xs
|
||||
|
||||
commasOr [] = ""
|
||||
commasOr [x] = x
|
||||
commasOr xs = intercalate ", " (init xs) ++ " or " ++ last xs
|
||||
commasOr [] = ""
|
||||
commasOr [x] = x
|
||||
commasOr xs = intercalate ", " (init xs) ++ " or " ++ last xs
|
||||
|
@ -13,10 +13,10 @@
|
||||
-- operators.
|
||||
|
||||
module Text.Megaparsec.Expr
|
||||
( Assoc (..)
|
||||
, Operator (..)
|
||||
, OperatorTable
|
||||
, buildExpressionParser )
|
||||
( Assoc (..)
|
||||
, Operator (..)
|
||||
, OperatorTable
|
||||
, buildExpressionParser )
|
||||
where
|
||||
|
||||
import Control.Applicative ((<|>))
|
||||
@ -29,18 +29,18 @@ import Text.Megaparsec.Prim
|
||||
-- or none.
|
||||
|
||||
data Assoc
|
||||
= AssocNone
|
||||
| AssocLeft
|
||||
| AssocRight
|
||||
= AssocNone
|
||||
| AssocLeft
|
||||
| AssocRight
|
||||
|
||||
-- | This data type specifies operators that work on values of type @a@.
|
||||
-- An operator is either binary infix or unary prefix or postfix. A binary
|
||||
-- operator has also an associated associativity.
|
||||
|
||||
data Operator s u m a
|
||||
= Infix (ParsecT s u m (a -> a -> a)) Assoc
|
||||
| Prefix (ParsecT s u m (a -> a))
|
||||
| Postfix (ParsecT s u m (a -> a))
|
||||
= Infix (ParsecT s u m (a -> a -> a)) Assoc
|
||||
| Prefix (ParsecT s u m (a -> a))
|
||||
| Postfix (ParsecT s u m (a -> a))
|
||||
|
||||
-- | An @OperatorTable s u m a@ is a list of @Operator s u m a@
|
||||
-- lists. The list is ordered in descending precedence. All operators in one
|
||||
@ -83,48 +83,48 @@ buildExpressionParser ops simpleExpr = foldl' makeParser simpleExpr ops
|
||||
makeParser :: (Foldable t, Stream s m t1) =>
|
||||
ParsecT s u m b -> t (Operator s u m b) -> ParsecT s u m b
|
||||
makeParser term ops =
|
||||
termP >>= \x -> rasP x <|> lasP x <|> nasP x <|> return x <?> "operator"
|
||||
where (ras, las, nas, prefix, postfix) = foldr splitOp ([],[],[],[],[]) ops
|
||||
termP >>= \x -> rasP x <|> lasP x <|> nasP x <|> return x <?> "operator"
|
||||
where (ras, las, nas, prefix, postfix) = foldr splitOp ([],[],[],[],[]) ops
|
||||
|
||||
rasOp = choice ras
|
||||
lasOp = choice las
|
||||
nasOp = choice nas
|
||||
prefixOp = choice prefix <?> ""
|
||||
postfixOp = choice postfix <?> ""
|
||||
rasOp = choice ras
|
||||
lasOp = choice las
|
||||
nasOp = choice nas
|
||||
prefixOp = choice prefix <?> ""
|
||||
postfixOp = choice postfix <?> ""
|
||||
|
||||
ambigious assoc op =
|
||||
try $ op >> fail ("ambiguous use of a " ++ assoc
|
||||
++ " associative operator")
|
||||
ambigious assoc op =
|
||||
try $ op >> fail ("ambiguous use of a " ++ assoc
|
||||
++ " associative operator")
|
||||
|
||||
ambigiousRight = ambigious "right" rasOp
|
||||
ambigiousLeft = ambigious "left" lasOp
|
||||
ambigiousNon = ambigious "non" nasOp
|
||||
ambigiousRight = ambigious "right" rasOp
|
||||
ambigiousLeft = ambigious "left" lasOp
|
||||
ambigiousNon = ambigious "non" nasOp
|
||||
|
||||
termP = do
|
||||
pre <- prefixP
|
||||
x <- term
|
||||
post <- postfixP
|
||||
return $ post (pre x)
|
||||
termP = do
|
||||
pre <- prefixP
|
||||
x <- term
|
||||
post <- postfixP
|
||||
return $ post (pre x)
|
||||
|
||||
postfixP = postfixOp <|> return id
|
||||
prefixP = prefixOp <|> return id
|
||||
postfixP = postfixOp <|> return id
|
||||
prefixP = prefixOp <|> return id
|
||||
|
||||
rasP x = do { f <- rasOp; y <- termP >>= rasP1; return (f x y)}
|
||||
<|> ambigiousLeft
|
||||
<|> ambigiousNon
|
||||
rasP x = do { f <- rasOp; y <- termP >>= rasP1; return (f x y)}
|
||||
<|> ambigiousLeft
|
||||
<|> ambigiousNon
|
||||
|
||||
rasP1 x = rasP x <|> return x
|
||||
rasP1 x = rasP x <|> return x
|
||||
|
||||
lasP x = do { f <- lasOp; y <- termP; lasP1 (f x y) }
|
||||
<|> ambigiousRight
|
||||
<|> ambigiousNon
|
||||
lasP x = do { f <- lasOp; y <- termP; lasP1 (f x y) }
|
||||
<|> ambigiousRight
|
||||
<|> ambigiousNon
|
||||
|
||||
lasP1 x = lasP x <|> return x
|
||||
lasP1 x = lasP x <|> return x
|
||||
|
||||
nasP x = do
|
||||
f <- nasOp
|
||||
y <- termP
|
||||
ambigiousRight <|> ambigiousLeft <|> ambigiousNon <|> return (f x y)
|
||||
nasP x = do
|
||||
f <- nasOp
|
||||
y <- termP
|
||||
ambigiousRight <|> ambigiousLeft <|> ambigiousNon <|> return (f x y)
|
||||
|
||||
splitOp :: Operator s u m a ->
|
||||
( [ParsecT s u m (a -> a -> a)]
|
||||
@ -138,11 +138,11 @@ splitOp :: Operator s u m a ->
|
||||
, [ParsecT s u m (a -> a)]
|
||||
, [ParsecT s u m (a -> a)] )
|
||||
splitOp (Infix op assoc) (ras, las, nas, prefix, postfix) =
|
||||
case assoc of
|
||||
AssocNone -> (ras, las, op:nas, prefix, postfix)
|
||||
AssocLeft -> (ras, op:las, nas, prefix, postfix)
|
||||
AssocRight -> (op:ras, las, nas, prefix, postfix)
|
||||
case assoc of
|
||||
AssocNone -> (ras, las, op:nas, prefix, postfix)
|
||||
AssocLeft -> (ras, op:las, nas, prefix, postfix)
|
||||
AssocRight -> (op:ras, las, nas, prefix, postfix)
|
||||
splitOp (Prefix op) (ras, las, nas, prefix, postfix) =
|
||||
(ras, las, nas, op:prefix, postfix)
|
||||
(ras, las, nas, op:prefix, postfix)
|
||||
splitOp (Postfix op) (ras, las, nas, prefix, postfix) =
|
||||
(ras, las, nas, prefix, op:postfix)
|
||||
(ras, las, nas, prefix, op:postfix)
|
||||
|
@ -13,12 +13,12 @@
|
||||
-- to instantiate a token parser (see "Text.Megaparsec.Token").
|
||||
|
||||
module Text.Megaparsec.Language
|
||||
( LanguageDef
|
||||
, emptyDef
|
||||
, haskellStyle
|
||||
, javaStyle
|
||||
, haskellDef
|
||||
, mondrianDef )
|
||||
( LanguageDef
|
||||
, emptyDef
|
||||
, haskellStyle
|
||||
, javaStyle
|
||||
, haskellDef
|
||||
, mondrianDef )
|
||||
where
|
||||
|
||||
import Control.Monad.Identity
|
||||
@ -34,18 +34,18 @@ import Text.Megaparsec.Token
|
||||
|
||||
emptyDef :: LanguageDef String st Identity
|
||||
emptyDef =
|
||||
LanguageDef
|
||||
{ commentStart = ""
|
||||
, commentEnd = ""
|
||||
, commentLine = ""
|
||||
, nestedComments = True
|
||||
, identStart = letterChar <|> char '_'
|
||||
, identLetter = alphaNumChar <|> oneOf "_'"
|
||||
, opStart = opLetter emptyDef
|
||||
, opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~"
|
||||
, reservedOpNames = []
|
||||
, reservedNames = []
|
||||
, caseSensitive = True }
|
||||
LanguageDef
|
||||
{ commentStart = ""
|
||||
, commentEnd = ""
|
||||
, commentLine = ""
|
||||
, nestedComments = True
|
||||
, identStart = letterChar <|> char '_'
|
||||
, identLetter = alphaNumChar <|> oneOf "_'"
|
||||
, opStart = opLetter emptyDef
|
||||
, opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~"
|
||||
, reservedOpNames = []
|
||||
, reservedNames = []
|
||||
, caseSensitive = True }
|
||||
|
||||
-- | This is a minimal token definition for Haskell-style languages. It
|
||||
-- defines the style of comments, valid identifiers and case sensitivity. It
|
||||
@ -53,18 +53,18 @@ emptyDef =
|
||||
|
||||
haskellStyle :: LanguageDef String u Identity
|
||||
haskellStyle =
|
||||
emptyDef
|
||||
{ commentStart = "{-"
|
||||
, commentEnd = "-}"
|
||||
, commentLine = "--"
|
||||
, nestedComments = True
|
||||
, identStart = letterChar
|
||||
, identLetter = alphaNumChar <|> oneOf "_'"
|
||||
, opStart = opLetter haskellStyle
|
||||
, opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~"
|
||||
, reservedOpNames = []
|
||||
, reservedNames = []
|
||||
, caseSensitive = True }
|
||||
emptyDef
|
||||
{ commentStart = "{-"
|
||||
, commentEnd = "-}"
|
||||
, commentLine = "--"
|
||||
, nestedComments = True
|
||||
, identStart = letterChar
|
||||
, identLetter = alphaNumChar <|> oneOf "_'"
|
||||
, opStart = opLetter haskellStyle
|
||||
, opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~"
|
||||
, reservedOpNames = []
|
||||
, reservedNames = []
|
||||
, caseSensitive = True }
|
||||
|
||||
-- | This is a minimal token definition for Java-style languages. It
|
||||
-- defines the style of comments, valid identifiers and case sensitivity. It
|
||||
@ -72,44 +72,44 @@ haskellStyle =
|
||||
|
||||
javaStyle :: LanguageDef String u Identity
|
||||
javaStyle =
|
||||
emptyDef
|
||||
{ commentStart = "/*"
|
||||
, commentEnd = "*/"
|
||||
, commentLine = "//"
|
||||
, nestedComments = True
|
||||
, identStart = letterChar
|
||||
, identLetter = alphaNumChar <|> oneOf "_'"
|
||||
, reservedNames = []
|
||||
, reservedOpNames = []
|
||||
, caseSensitive = False }
|
||||
emptyDef
|
||||
{ commentStart = "/*"
|
||||
, commentEnd = "*/"
|
||||
, commentLine = "//"
|
||||
, nestedComments = True
|
||||
, identStart = letterChar
|
||||
, identLetter = alphaNumChar <|> oneOf "_'"
|
||||
, reservedNames = []
|
||||
, reservedOpNames = []
|
||||
, caseSensitive = False }
|
||||
|
||||
-- | The language definition for the Haskell language.
|
||||
|
||||
haskellDef :: LanguageDef String u Identity
|
||||
haskellDef =
|
||||
haskell98Def
|
||||
{ identLetter = identLetter haskell98Def <|> char '#'
|
||||
, reservedNames = reservedNames haskell98Def ++
|
||||
[ "foreign", "import", "export", "primitive"
|
||||
, "_ccall_", "_casm_", "forall"] }
|
||||
haskell98Def
|
||||
{ identLetter = identLetter haskell98Def <|> char '#'
|
||||
, reservedNames = reservedNames haskell98Def ++
|
||||
[ "foreign", "import", "export", "primitive"
|
||||
, "_ccall_", "_casm_", "forall"] }
|
||||
|
||||
-- | The language definition for the language Haskell98.
|
||||
|
||||
haskell98Def :: LanguageDef String u Identity
|
||||
haskell98Def =
|
||||
haskellStyle
|
||||
{ reservedOpNames = ["::","..","=","\\","|","<-","->","@","~","=>"]
|
||||
, reservedNames = [ "let", "in", "case", "of", "if", "then", "else"
|
||||
, "data", "type", "class", "default", "deriving"
|
||||
, "do", "import", "infix", "infixl", "infixr"
|
||||
, "instance", "module", "newtype", "where"
|
||||
, "primitive" ] }
|
||||
haskellStyle
|
||||
{ reservedOpNames = ["::","..","=","\\","|","<-","->","@","~","=>"]
|
||||
, reservedNames = [ "let", "in", "case", "of", "if", "then", "else"
|
||||
, "data", "type", "class", "default", "deriving"
|
||||
, "do", "import", "infix", "infixl", "infixr"
|
||||
, "instance", "module", "newtype", "where"
|
||||
, "primitive" ] }
|
||||
|
||||
-- | The language definition for the language Mondrian.
|
||||
|
||||
mondrianDef :: LanguageDef String u Identity
|
||||
mondrianDef =
|
||||
javaStyle
|
||||
{ reservedNames = [ "case", "class", "default", "extends"
|
||||
, "import", "in", "let", "new", "of", "package" ]
|
||||
, caseSensitive = True }
|
||||
javaStyle
|
||||
{ reservedNames = [ "case", "class", "default", "extends"
|
||||
, "import", "in", "let", "new", "of", "package" ]
|
||||
, caseSensitive = True }
|
||||
|
@ -15,12 +15,12 @@
|
||||
-- Workshop 2001.
|
||||
|
||||
module Text.Megaparsec.Perm
|
||||
( StreamPermParser -- abstract
|
||||
, permute
|
||||
, (<||>)
|
||||
, (<$$>)
|
||||
, (<|?>)
|
||||
, (<$?>) )
|
||||
( StreamPermParser -- abstract
|
||||
, permute
|
||||
, (<||>)
|
||||
, (<$$>)
|
||||
, (<|?>)
|
||||
, (<$?>) )
|
||||
where
|
||||
|
||||
import Control.Monad.Identity
|
||||
@ -89,7 +89,7 @@ f <$?> xp = newperm f <|?> xp
|
||||
data StreamPermParser s st a = Perm (Maybe a) [StreamBranch s st a]
|
||||
|
||||
data StreamBranch s st a =
|
||||
forall b. Branch (StreamPermParser s st (b -> a)) (Parsec s st b)
|
||||
forall b. Branch (StreamPermParser s st (b -> a)) (Parsec s st b)
|
||||
|
||||
-- | The parser @permute perm@ parses a permutation of parser described
|
||||
-- by @perm@. For example, suppose we want to parse a permutation of: an
|
||||
@ -123,8 +123,7 @@ add perm@(Perm _mf fs) p = Perm Nothing (first : fmap insert fs)
|
||||
|
||||
addopt :: Stream s Identity tok => StreamPermParser s st (a -> b) ->
|
||||
a -> Parsec s st a -> StreamPermParser s st b
|
||||
addopt perm@(Perm mf fs) x p
|
||||
= Perm (fmap ($ x) mf) (first:map insert fs)
|
||||
addopt perm@(Perm mf fs) x p = Perm (fmap ($ x) mf) (first : map insert fs)
|
||||
where first = Branch perm p
|
||||
insert (Branch perm' p') = Branch (addopt (mapPerms flip perm') x p) p'
|
||||
|
||||
|
@ -12,22 +12,22 @@
|
||||
-- Textual source positions.
|
||||
|
||||
module Text.Megaparsec.Pos
|
||||
( SourceName
|
||||
, Line
|
||||
, Column
|
||||
, SourcePos
|
||||
, sourceLine
|
||||
, sourceColumn
|
||||
, sourceName
|
||||
, incSourceLine
|
||||
, incSourceColumn
|
||||
, setSourceLine
|
||||
, setSourceColumn
|
||||
, setSourceName
|
||||
, newPos
|
||||
, initialPos
|
||||
, updatePosChar
|
||||
, updatePosString )
|
||||
( SourceName
|
||||
, Line
|
||||
, Column
|
||||
, SourcePos
|
||||
, sourceLine
|
||||
, sourceColumn
|
||||
, sourceName
|
||||
, incSourceLine
|
||||
, incSourceColumn
|
||||
, setSourceLine
|
||||
, setSourceColumn
|
||||
, setSourceName
|
||||
, newPos
|
||||
, initialPos
|
||||
, updatePosChar
|
||||
, updatePosString )
|
||||
where
|
||||
|
||||
import Data.Data (Data)
|
||||
@ -52,13 +52,13 @@ type Column = Int
|
||||
-- class.
|
||||
|
||||
data SourcePos = SourcePos
|
||||
{ -- | Extract the name of the source from a source position.
|
||||
sourceName :: SourceName
|
||||
-- | Extract the line number from a source position.
|
||||
, sourceLine :: !Line
|
||||
-- | Extract the column number from a source position.
|
||||
, sourceColumn :: !Column }
|
||||
deriving (Eq, Ord, Data, Typeable)
|
||||
{ -- | Extract the name of the source from a source position.
|
||||
sourceName :: SourceName
|
||||
-- | Extract the line number from a source position.
|
||||
, sourceLine :: !Line
|
||||
-- | Extract the column number from a source position.
|
||||
, sourceColumn :: !Column }
|
||||
deriving (Eq, Ord, Data, Typeable)
|
||||
|
||||
instance Show SourcePos where
|
||||
show (SourcePos n l c)
|
||||
@ -113,11 +113,11 @@ setSourceColumn (SourcePos n l _) = SourcePos n l
|
||||
|
||||
updatePosChar :: SourcePos -> Char -> SourcePos
|
||||
updatePosChar (SourcePos n l c) ch =
|
||||
case ch of
|
||||
'\n' -> SourcePos n (l + 1) 1
|
||||
'\r' -> SourcePos n l 1
|
||||
'\t' -> SourcePos n l (c + 8 - ((c - 1) `rem` 8))
|
||||
_ -> SourcePos n l (c + 1)
|
||||
case ch of
|
||||
'\n' -> SourcePos n (l + 1) 1
|
||||
'\r' -> SourcePos n l 1
|
||||
'\t' -> SourcePos n l (c + 8 - ((c - 1) `rem` 8))
|
||||
_ -> SourcePos n l (c + 1)
|
||||
|
||||
-- | The expression @updatePosString pos s@ updates the source position
|
||||
-- @pos@ by calling 'updatePosChar' on every character in @s@, i.e. @foldl
|
||||
|
@ -14,39 +14,39 @@
|
||||
{-# OPTIONS_HADDOCK not-home #-}
|
||||
|
||||
module Text.Megaparsec.Prim
|
||||
( State (..)
|
||||
, Stream (..)
|
||||
, Consumed (..)
|
||||
, Reply (..)
|
||||
, ParsecT
|
||||
, Parsec
|
||||
, runParsecT
|
||||
, mkPT
|
||||
, unknownError
|
||||
, unexpected
|
||||
, mergeErrorReply
|
||||
, (<?>)
|
||||
, label
|
||||
, runParserT
|
||||
, runParser
|
||||
, parse
|
||||
, parseMaybe
|
||||
, parseTest
|
||||
, try
|
||||
, lookAhead
|
||||
, token
|
||||
, tokens
|
||||
, tokenPrim
|
||||
, getPosition
|
||||
, getInput
|
||||
, setPosition
|
||||
, setInput
|
||||
, getParserState
|
||||
, setParserState
|
||||
, updateParserState
|
||||
, getState
|
||||
, putState
|
||||
, modifyState )
|
||||
( State (..)
|
||||
, Stream (..)
|
||||
, Consumed (..)
|
||||
, Reply (..)
|
||||
, ParsecT
|
||||
, Parsec
|
||||
, runParsecT
|
||||
, mkPT
|
||||
, unknownError
|
||||
, unexpected
|
||||
, mergeErrorReply
|
||||
, (<?>)
|
||||
, label
|
||||
, runParserT
|
||||
, runParser
|
||||
, parse
|
||||
, parseMaybe
|
||||
, parseTest
|
||||
, try
|
||||
, lookAhead
|
||||
, token
|
||||
, tokens
|
||||
, tokenPrim
|
||||
, getPosition
|
||||
, getInput
|
||||
, setPosition
|
||||
, setInput
|
||||
, getParserState
|
||||
, setParserState
|
||||
, updateParserState
|
||||
, getState
|
||||
, putState
|
||||
, modifyState )
|
||||
where
|
||||
|
||||
import Data.Bool (bool)
|
||||
@ -75,9 +75,9 @@ import Text.Megaparsec.ShowToken
|
||||
-- user state @u@.
|
||||
|
||||
data State s u = State
|
||||
{ stateInput :: s
|
||||
, statePos :: !SourcePos
|
||||
, stateUser :: !u }
|
||||
{ stateInput :: s
|
||||
, statePos :: !SourcePos
|
||||
, stateUser :: !u }
|
||||
|
||||
-- | An instance of @Stream s m t@ has stream type @s@, underlying monad @m@
|
||||
-- and token type @t@ determined by the stream.
|
||||
@ -90,26 +90,26 @@ data State s u = State
|
||||
-- you are using the monad in a non-trivial way.
|
||||
|
||||
class (Monad m, ShowToken t) => Stream s m t | s -> t where
|
||||
uncons :: s -> m (Maybe (t, s))
|
||||
uncons :: s -> m (Maybe (t, s))
|
||||
|
||||
instance (Monad m, ShowToken t) => Stream [t] m t where
|
||||
uncons [] = return Nothing
|
||||
uncons (t:ts) = return $ Just (t, ts)
|
||||
{-# INLINE uncons #-}
|
||||
uncons [] = return Nothing
|
||||
uncons (t:ts) = return $ Just (t, ts)
|
||||
{-# INLINE uncons #-}
|
||||
|
||||
instance Monad m => Stream CL.ByteString m Char where
|
||||
uncons = return . CL.uncons
|
||||
uncons = return . CL.uncons
|
||||
|
||||
instance Monad m => Stream C.ByteString m Char where
|
||||
uncons = return . C.uncons
|
||||
uncons = return . C.uncons
|
||||
|
||||
instance Monad m => Stream T.Text m Char where
|
||||
uncons = return . T.uncons
|
||||
{-# INLINE uncons #-}
|
||||
uncons = return . T.uncons
|
||||
{-# INLINE uncons #-}
|
||||
|
||||
instance Monad m => Stream TL.Text m Char where
|
||||
uncons = return . TL.uncons
|
||||
{-# INLINE uncons #-}
|
||||
uncons = return . TL.uncons
|
||||
{-# INLINE uncons #-}
|
||||
|
||||
-- | This data structure represents an aspect of result of parser's
|
||||
-- work. The two constructors have the following meaning:
|
||||
@ -120,8 +120,7 @@ instance Monad m => Stream TL.Text m Char where
|
||||
--
|
||||
-- You shouldn't really need to know this. See also: 'Reply'.
|
||||
|
||||
data Consumed a = Consumed a
|
||||
| Empty !a
|
||||
data Consumed a = Consumed a | Empty !a
|
||||
|
||||
-- | This data structure represents an aspect of result of parser's
|
||||
-- work. The two constructors have the following meaning:
|
||||
@ -131,8 +130,7 @@ data Consumed a = Consumed a
|
||||
--
|
||||
-- You shouldn't really need to know this. See also 'Consumed'.
|
||||
|
||||
data Reply s u a = Ok a !(State s u) ParseError
|
||||
| Error ParseError
|
||||
data Reply s u a = Ok a !(State s u) ParseError | Error ParseError
|
||||
|
||||
-- | @ParsecT s u m a@ is a parser with stream type @s@, user state type @u@,
|
||||
-- underlying monad @m@ and return type @a@. Parsec is strict in the user
|
||||
@ -141,12 +139,12 @@ data Reply s u a = Ok a !(State s u) ParseError
|
||||
-- indirection.
|
||||
|
||||
newtype ParsecT s u m a = ParsecT
|
||||
{ unParser :: forall b . State s u
|
||||
-> (a -> State s u -> ParseError -> m b) -- consumed ok
|
||||
-> (ParseError -> m b) -- consumed err
|
||||
-> (a -> State s u -> ParseError -> m b) -- empty ok
|
||||
-> (ParseError -> m b) -- empty err
|
||||
-> m b }
|
||||
{ unParser :: forall b . State s u
|
||||
-> (a -> State s u -> ParseError -> m b) -- consumed ok
|
||||
-> (ParseError -> m b) -- consumed err
|
||||
-> (a -> State s u -> ParseError -> m b) -- empty ok
|
||||
-> (ParseError -> m b) -- empty err
|
||||
-> m b }
|
||||
|
||||
-- | @Parsec@ is non-transformer variant of more general @ParsecT@
|
||||
-- monad-transformer.
|
||||
@ -158,23 +156,23 @@ instance Functor (ParsecT s u m) where
|
||||
|
||||
parsecMap :: (a -> b) -> ParsecT s u m a -> ParsecT s u m b
|
||||
parsecMap f p = ParsecT $ \s cok cerr eok eerr ->
|
||||
unParser p s (cok . f) cerr (eok . f) eerr
|
||||
unParser p s (cok . f) cerr (eok . f) eerr
|
||||
|
||||
instance A.Applicative (ParsecT s u m) where
|
||||
pure = return
|
||||
(<*>) = ap
|
||||
(*>) = (>>)
|
||||
p1 <* p2 = do { x1 <- p1 ; void p2 ; return x1 }
|
||||
pure = return
|
||||
(<*>) = ap
|
||||
(*>) = (>>)
|
||||
p1 <* p2 = do { x1 <- p1 ; void p2 ; return x1 }
|
||||
|
||||
instance A.Alternative (ParsecT s u m) where
|
||||
empty = mzero
|
||||
(<|>) = mplus
|
||||
many p = reverse <$> manyAccum (:) p
|
||||
empty = mzero
|
||||
(<|>) = mplus
|
||||
many p = reverse <$> manyAccum (:) p
|
||||
|
||||
instance Monad (ParsecT s u m) where
|
||||
return = parserReturn
|
||||
(>>=) = parserBind
|
||||
fail = parserFail
|
||||
return = parserReturn
|
||||
(>>=) = parserBind
|
||||
fail = parserFail
|
||||
|
||||
parserReturn :: a -> ParsecT s u m a
|
||||
parserReturn x = ParsecT $ \s _ _ eok _ -> eok x s (unknownError s)
|
||||
@ -182,42 +180,42 @@ parserReturn x = ParsecT $ \s _ _ eok _ -> eok x s (unknownError s)
|
||||
parserBind :: ParsecT s u m a -> (a -> ParsecT s u m b) -> ParsecT s u m b
|
||||
{-# INLINE parserBind #-}
|
||||
parserBind m k = ParsecT $ \s cok cerr eok eerr ->
|
||||
let
|
||||
-- consumed-okay case for m
|
||||
mcok x st err =
|
||||
let
|
||||
-- if (k x) consumes, those go straight up
|
||||
pcok = cok
|
||||
pcerr = cerr
|
||||
-- if (k x) doesn't consume input, but is okay, we still
|
||||
-- return in the consumed continuation
|
||||
peok x' s' err' = cok x' s' (mergeError err err')
|
||||
-- if (k x) doesn't consume input, but errors, we return the
|
||||
-- error in the 'consumed-error' continuation
|
||||
peerr err' = cerr (mergeError err err')
|
||||
in unParser (k x) st pcok pcerr peok peerr
|
||||
let
|
||||
-- consumed-okay case for m
|
||||
mcok x st err =
|
||||
let
|
||||
-- if (k x) consumes, those go straight up
|
||||
pcok = cok
|
||||
pcerr = cerr
|
||||
-- if (k x) doesn't consume input, but is okay, we still return in
|
||||
-- the consumed continuation
|
||||
peok x' s' err' = cok x' s' (mergeError err err')
|
||||
-- if (k x) doesn't consume input, but errors, we return the
|
||||
-- error in the 'consumed-error' continuation
|
||||
peerr err' = cerr (mergeError err err')
|
||||
in unParser (k x) st pcok pcerr peok peerr
|
||||
|
||||
-- empty-ok case for m
|
||||
meok x st err =
|
||||
let
|
||||
-- in these cases, (k x) can return as empty
|
||||
pcok = cok
|
||||
peok x' s' err' = eok x' s' (mergeError err err')
|
||||
pcerr = cerr
|
||||
peerr err' = eerr (mergeError err err')
|
||||
in unParser (k x) st pcok pcerr peok peerr
|
||||
-- empty-ok case for m
|
||||
meok x st err =
|
||||
let
|
||||
-- in these cases, (k x) can return as empty
|
||||
pcok = cok
|
||||
peok x' s' err' = eok x' s' (mergeError err err')
|
||||
pcerr = cerr
|
||||
peerr err' = eerr (mergeError err err')
|
||||
in unParser (k x) st pcok pcerr peok peerr
|
||||
|
||||
-- consumed-error case for m
|
||||
mcerr = cerr
|
||||
-- consumed-error case for m
|
||||
mcerr = cerr
|
||||
|
||||
-- empty-error case for m
|
||||
meerr = eerr
|
||||
-- empty-error case for m
|
||||
meerr = eerr
|
||||
|
||||
in unParser m s mcok mcerr meok meerr
|
||||
in unParser m s mcok mcerr meok meerr
|
||||
|
||||
parserFail :: String -> ParsecT s u m a
|
||||
parserFail msg = ParsecT $ \s _ _ _ eerr ->
|
||||
eerr $ newErrorMessage (Message msg) (statePos s)
|
||||
eerr $ newErrorMessage (Message msg) (statePos s)
|
||||
|
||||
-- | Low-level unpacking of the ParsecT type. To actually run parser see
|
||||
-- 'runParserT' and 'runParser'.
|
||||
@ -225,10 +223,10 @@ parserFail msg = ParsecT $ \s _ _ _ eerr ->
|
||||
runParsecT :: Monad m =>
|
||||
ParsecT s u m a -> State s u -> m (Consumed (m (Reply s u a)))
|
||||
runParsecT p s = unParser p s cok cerr eok eerr
|
||||
where cok a s' err = return . Consumed . return $ Ok a s' err
|
||||
cerr err = return . Consumed . return $ Error err
|
||||
eok a s' err = return . Empty . return $ Ok a s' err
|
||||
eerr err = return . Empty . return $ Error err
|
||||
where cok a s' err = return . Consumed . return $ Ok a s' err
|
||||
cerr err = return . Consumed . return $ Error err
|
||||
eok a s' err = return . Empty . return $ Ok a s' err
|
||||
eerr err = return . Empty . return $ Error err
|
||||
|
||||
-- | Low-level creation of the ParsecT type. You really shouldn't have to do
|
||||
-- this.
|
||||
@ -236,66 +234,63 @@ runParsecT p s = unParser p s cok cerr eok eerr
|
||||
mkPT :: Monad m =>
|
||||
(State s u -> m (Consumed (m (Reply s u a)))) -> ParsecT s u m a
|
||||
mkPT k = ParsecT $ \s cok cerr eok eerr -> do
|
||||
cons <- k s
|
||||
case cons of
|
||||
Consumed mrep -> do
|
||||
rep <- mrep
|
||||
case rep of
|
||||
Ok x s' err -> cok x s' err
|
||||
Error err -> cerr err
|
||||
Empty mrep -> do
|
||||
rep <- mrep
|
||||
case rep of
|
||||
Ok x s' err -> eok x s' err
|
||||
Error err -> eerr err
|
||||
cons <- k s
|
||||
case cons of
|
||||
Consumed mrep -> do
|
||||
rep <- mrep
|
||||
case rep of
|
||||
Ok x s' err -> cok x s' err
|
||||
Error err -> cerr err
|
||||
Empty mrep -> do
|
||||
rep <- mrep
|
||||
case rep of
|
||||
Ok x s' err -> eok x s' err
|
||||
Error err -> eerr err
|
||||
|
||||
instance MonadIO m => MonadIO (ParsecT s u m) where
|
||||
liftIO = lift . liftIO
|
||||
liftIO = lift . liftIO
|
||||
|
||||
instance MonadReader r m => MonadReader r (ParsecT s u m) where
|
||||
ask = lift ask
|
||||
local f p = mkPT $ \s -> local f (runParsecT p s)
|
||||
ask = lift ask
|
||||
local f p = mkPT $ \s -> local f (runParsecT p s)
|
||||
|
||||
instance MonadState s m => MonadState s (ParsecT s' u m) where
|
||||
get = lift get
|
||||
put = lift . put
|
||||
get = lift get
|
||||
put = lift . put
|
||||
|
||||
instance MonadCont m => MonadCont (ParsecT s u m) where
|
||||
callCC f = mkPT $ \s ->
|
||||
callCC $ \c ->
|
||||
runParsecT (f (\a -> mkPT $ \s' -> c (pack s' a))) s
|
||||
callCC f = mkPT $ \s ->
|
||||
callCC $ \c ->
|
||||
runParsecT (f (\a -> mkPT $ \s' -> c (pack s' a))) s
|
||||
|
||||
where pack s a= Empty $ return (Ok a s (unknownError s))
|
||||
where pack s a= Empty $ return (Ok a s (unknownError s))
|
||||
|
||||
instance MonadError e m => MonadError e (ParsecT s u m) where
|
||||
throwError = lift . throwError
|
||||
p `catchError` h = mkPT $ \s ->
|
||||
runParsecT p s `catchError` \e ->
|
||||
runParsecT (h e) s
|
||||
throwError = lift . throwError
|
||||
p `catchError` h = mkPT $ \s ->
|
||||
runParsecT p s `catchError` \e ->
|
||||
runParsecT (h e) s
|
||||
|
||||
instance MonadPlus (ParsecT s u m) where
|
||||
mzero = parserZero
|
||||
mplus = parserPlus
|
||||
mzero = parserZero
|
||||
mplus = parserPlus
|
||||
|
||||
parserZero :: ParsecT s u m a
|
||||
parserZero = ParsecT $ \s _ _ _ eerr -> eerr $ unknownError s
|
||||
|
||||
parserPlus :: ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
|
||||
{-# INLINE parserPlus #-}
|
||||
parserPlus m n
|
||||
= ParsecT $ \s cok cerr eok eerr ->
|
||||
let
|
||||
meerr err =
|
||||
let
|
||||
neok y s' err' = eok y s' (mergeError err err')
|
||||
neerr err' = eerr $ mergeError err err'
|
||||
in unParser n s cok cerr neok neerr
|
||||
in unParser m s cok cerr eok meerr
|
||||
parserPlus m n = ParsecT $ \s cok cerr eok eerr ->
|
||||
let meerr err =
|
||||
let neok y s' err' = eok y s' (mergeError err err')
|
||||
neerr err' = eerr $ mergeError err err'
|
||||
in unParser n s cok cerr neok neerr
|
||||
in unParser m s cok cerr eok meerr
|
||||
|
||||
instance MonadTrans (ParsecT s u) where
|
||||
lift amb = ParsecT $ \s _ _ eok _ -> do
|
||||
a <- amb
|
||||
eok a s $ unknownError s
|
||||
lift amb = ParsecT $ \s _ _ eok _ -> do
|
||||
a <- amb
|
||||
eok a s (unknownError s)
|
||||
|
||||
-- Errors
|
||||
|
||||
@ -313,19 +308,19 @@ unknownError state = newErrorUnknown (statePos state)
|
||||
|
||||
unexpected :: Stream s m t => String -> ParsecT s u m a
|
||||
unexpected msg = ParsecT $ \s _ _ _ eerr ->
|
||||
eerr $ newErrorMessage (Unexpected msg) (statePos s)
|
||||
eerr $ newErrorMessage (Unexpected msg) (statePos s)
|
||||
|
||||
-- | @mergeErrorReply e reply@ returns @reply@ with error @e@ added.
|
||||
|
||||
mergeErrorReply :: ParseError -> Reply s u a -> Reply s u a
|
||||
mergeErrorReply e1 reply
|
||||
= case reply of
|
||||
Ok x state e2 -> Ok x state (mergeError e1 e2)
|
||||
Error e2 -> Error (mergeError e1 e2)
|
||||
= case reply of
|
||||
Ok x state e2 -> Ok x state (mergeError e1 e2)
|
||||
Error e2 -> Error (mergeError e1 e2)
|
||||
|
||||
-- Basic combinators
|
||||
|
||||
infix 0 <?>
|
||||
infix 0 <?>
|
||||
|
||||
-- | The parser @p \<?> msg@ behaves as parser @p@, but whenever the
|
||||
-- parser @p@ fails /without consuming any input/, it replaces expect error
|
||||
@ -348,17 +343,17 @@ label p msg = labels p [msg]
|
||||
|
||||
labels :: ParsecT s u m a -> [String] -> ParsecT s u m a
|
||||
labels p msgs = ParsecT $ \s cok cerr eok eerr ->
|
||||
let eok' x s' error' = eok x s' $ if errorIsUnknown error'
|
||||
then error'
|
||||
else setExpectErrors error' msgs
|
||||
eerr' err = eerr $ setExpectErrors err msgs
|
||||
in unParser p s cok cerr eok' eerr'
|
||||
where
|
||||
setExpectErrors err [] = setErrorMessage (Expected "end of input") err
|
||||
setExpectErrors err [m] = setErrorMessage (Expected m) err
|
||||
setExpectErrors err (m:ms)
|
||||
= foldr (\msg' err' -> addErrorMessage (Expected msg') err')
|
||||
(setErrorMessage (Expected m) err) ms
|
||||
let eok' x s' error' = eok x s' $ if errorIsUnknown error'
|
||||
then error'
|
||||
else setExpectErrors error' msgs
|
||||
eerr' err = eerr $ setExpectErrors err msgs
|
||||
in unParser p s cok cerr eok' eerr'
|
||||
where
|
||||
setExpectErrors err [] = setErrorMessage (Expected "end of input") err
|
||||
setExpectErrors err [m] = setErrorMessage (Expected m) err
|
||||
setExpectErrors err (m:ms)
|
||||
= foldr (\msg' err' -> addErrorMessage (Expected msg') err')
|
||||
(setErrorMessage (Expected m) err) ms
|
||||
|
||||
-- Running a parser
|
||||
|
||||
@ -371,16 +366,15 @@ labels p msgs = ParsecT $ \s cok cerr eok eerr ->
|
||||
|
||||
runParserT :: Stream s m t =>
|
||||
ParsecT s u m a -> u -> SourceName -> s -> m (Either ParseError a)
|
||||
runParserT p u name s
|
||||
= do res <- runParsecT p (State s (initialPos name) u)
|
||||
r <- parserReply res
|
||||
case r of
|
||||
Ok x _ _ -> return (Right x)
|
||||
Error err -> return (Left err)
|
||||
where parserReply res
|
||||
= case res of
|
||||
Consumed r -> r
|
||||
Empty r -> r
|
||||
runParserT p u name s = do
|
||||
res <- runParsecT p (State s (initialPos name) u)
|
||||
r <- parserReply res
|
||||
case r of
|
||||
Ok x _ _ -> return (Right x)
|
||||
Error err -> return (Left err)
|
||||
where parserReply res = case res of
|
||||
Consumed r -> r
|
||||
Empty r -> r
|
||||
|
||||
-- | The most general way to run a parser over the identity monad.
|
||||
-- @runParser p state filePath input@ runs parser @p@ on the input list of
|
||||
@ -415,18 +409,18 @@ parse p = runParser p ()
|
||||
|
||||
parseMaybe :: Stream s Identity t => Parsec s () a -> s -> Maybe a
|
||||
parseMaybe p s =
|
||||
case parse p "" s of
|
||||
Left _ -> Nothing
|
||||
Right x -> Just x
|
||||
case parse p "" s of
|
||||
Left _ -> Nothing
|
||||
Right x -> Just x
|
||||
|
||||
-- | The expression @parseTest p input@ applies a parser @p@ against
|
||||
-- input @input@ and prints the result to stdout. Used for testing.
|
||||
|
||||
parseTest :: (Stream s Identity t, Show a) => Parsec s () a -> s -> IO ()
|
||||
parseTest p input =
|
||||
case parse p "" input of
|
||||
Left err -> putStr "parse error at " >> print err
|
||||
Right x -> print x
|
||||
case parse p "" input of
|
||||
Left err -> putStr "parse error at " >> print err
|
||||
Right x -> print x
|
||||
|
||||
-- | The parser @try p@ behaves like parser @p@, except that it
|
||||
-- pretends that it hasn't consumed any input when an error occurs.
|
||||
@ -469,8 +463,8 @@ try p = ParsecT $ \s cok _ eok eerr -> unParser p s cok eerr eok eerr
|
||||
|
||||
lookAhead :: Stream s m t => ParsecT s u m a -> ParsecT s u m a
|
||||
lookAhead p = ParsecT $ \s _ cerr eok eerr -> do
|
||||
let eok' a _ _ = eok a s (newErrorUnknown (statePos s))
|
||||
unParser p s eok' cerr eok' eerr
|
||||
let eok' a _ _ = eok a s (newErrorUnknown (statePos s))
|
||||
unParser p s eok' cerr eok' eerr
|
||||
|
||||
-- | The parser @token posFromTok testTok@ accepts a token @t@ with result
|
||||
-- @x@ when the function @testTok t@ returns @'Just' x@. The source position
|
||||
@ -491,10 +485,10 @@ token :: Stream s Identity t =>
|
||||
-> (t -> Maybe a) -- ^ Matching function for the token to parse.
|
||||
-> Parsec s u a
|
||||
token tokpos = tokenPrim nextpos
|
||||
where nextpos _ tok ts =
|
||||
case runIdentity (uncons ts) of
|
||||
Nothing -> tokpos tok
|
||||
Just (tok', _) -> tokpos tok'
|
||||
where nextpos _ tok ts =
|
||||
case runIdentity (uncons ts) of
|
||||
Nothing -> tokpos tok
|
||||
Just (tok', _) -> tokpos tok'
|
||||
|
||||
-- | The parser @tokens posFromTok@ parses list of tokens and returns
|
||||
-- it. The resulting parser will use 'showToken' to pretty-print the
|
||||
@ -510,26 +504,25 @@ tokens :: (Stream s m t, Eq t, ShowToken [t]) =>
|
||||
-> ParsecT s u m [t]
|
||||
{-# INLINE tokens #-}
|
||||
tokens _ [] = ParsecT $ \s _ _ eok _ -> eok [] s $ unknownError s
|
||||
tokens nextposs tts
|
||||
= ParsecT $ \(State input pos u) cok cerr _ eerr ->
|
||||
let errExpect x = setErrorMessage (Expected $ showToken tts)
|
||||
(newErrorMessage (Unexpected x) pos)
|
||||
tokens nextposs tts = ParsecT $ \(State input pos u) cok cerr _ eerr ->
|
||||
let errExpect x = setErrorMessage (Expected $ showToken tts)
|
||||
(newErrorMessage (Unexpected x) pos)
|
||||
|
||||
walk [] _ rs = let pos' = nextposs pos tts
|
||||
s' = State rs pos' u
|
||||
in cok tts s' $ newErrorUnknown pos'
|
||||
walk (t:ts) i rs = do
|
||||
sr <- uncons rs
|
||||
let errorCont = if i == 0 then eerr else cerr
|
||||
what = bool (showToken $ take i tts) "end of input" (i == 0)
|
||||
case sr of
|
||||
Nothing -> errorCont . errExpect $ what
|
||||
Just (x,xs)
|
||||
| t == x -> walk ts (succ i) xs
|
||||
| otherwise -> errorCont . errExpect . showToken $
|
||||
take i tts ++ [x]
|
||||
walk [] _ rs = let pos' = nextposs pos tts
|
||||
s' = State rs pos' u
|
||||
in cok tts s' $ newErrorUnknown pos'
|
||||
walk (t:ts) i rs = do
|
||||
sr <- uncons rs
|
||||
let errorCont = if i == 0 then eerr else cerr
|
||||
what = bool (showToken $ take i tts) "end of input" (i == 0)
|
||||
case sr of
|
||||
Nothing -> errorCont . errExpect $ what
|
||||
Just (x,xs)
|
||||
| t == x -> walk ts (succ i) xs
|
||||
| otherwise -> errorCont . errExpect . showToken $
|
||||
take i tts ++ [x]
|
||||
|
||||
in walk tts 0 input
|
||||
in walk tts 0 input
|
||||
|
||||
-- | The parser @tokenPrim nextPos testTok@ accepts a token @t@ with result
|
||||
-- @x@ when the function @testTok t@ returns @'Just' x@. The position of the
|
||||
@ -560,48 +553,46 @@ tokenPrimEx :: Stream s m t =>
|
||||
|
||||
tokenPrimEx nextpos Nothing test
|
||||
= ParsecT $ \(State input pos user) cok _ _ eerr -> do
|
||||
r <- uncons input
|
||||
case r of
|
||||
Nothing -> eerr $ unexpectError "end of input" pos
|
||||
Just (c,cs)
|
||||
-> case test c of
|
||||
Just x -> let newpos = nextpos pos c cs
|
||||
newstate = State cs newpos user
|
||||
in seq newpos $ seq newstate $
|
||||
cok x newstate (newErrorUnknown newpos)
|
||||
Nothing -> eerr $ unexpectError (showToken c) pos
|
||||
r <- uncons input
|
||||
case r of
|
||||
Nothing -> eerr $ unexpectError "end of input" pos
|
||||
Just (c,cs)
|
||||
-> case test c of
|
||||
Just x -> let newpos = nextpos pos c cs
|
||||
newstate = State cs newpos user
|
||||
in seq newpos $ seq newstate $
|
||||
cok x newstate (newErrorUnknown newpos)
|
||||
Nothing -> eerr $ unexpectError (showToken c) pos
|
||||
|
||||
tokenPrimEx nextpos (Just nextState) test
|
||||
= ParsecT $ \(State input pos user) cok _ _ eerr -> do
|
||||
r <- uncons input
|
||||
case r of
|
||||
Nothing -> eerr $ unexpectError "end of input" pos
|
||||
Just (c,cs)
|
||||
-> case test c of
|
||||
Just x -> let newpos = nextpos pos c cs
|
||||
newUser = nextState pos c cs user
|
||||
newstate = State cs newpos newUser
|
||||
in seq newpos $ seq newstate $
|
||||
cok x newstate (newErrorUnknown newpos)
|
||||
Nothing -> eerr $ unexpectError (showToken c) pos
|
||||
r <- uncons input
|
||||
case r of
|
||||
Nothing -> eerr $ unexpectError "end of input" pos
|
||||
Just (c,cs)
|
||||
-> case test c of
|
||||
Just x -> let newpos = nextpos pos c cs
|
||||
newUser = nextState pos c cs user
|
||||
newstate = State cs newpos newUser
|
||||
in seq newpos $ seq newstate $
|
||||
cok x newstate (newErrorUnknown newpos)
|
||||
Nothing -> eerr $ unexpectError (showToken c) pos
|
||||
|
||||
unexpectError :: String -> SourcePos -> ParseError
|
||||
unexpectError msg = newErrorMessage (Unexpected msg)
|
||||
|
||||
manyAccum :: (a -> [a] -> [a]) -> ParsecT s u m a -> ParsecT s u m [a]
|
||||
manyAccum acc p =
|
||||
ParsecT $ \s cok cerr eok _ ->
|
||||
let walk xs x s' _ =
|
||||
unParser p s'
|
||||
(seq xs $ walk $ acc x xs) -- consumed-ok
|
||||
cerr -- consumed-err
|
||||
manyErr -- empty-ok
|
||||
(cok (acc x xs) s') -- empty-err
|
||||
in unParser p s (walk []) cerr manyErr (eok [] s)
|
||||
manyAccum acc p = ParsecT $ \s cok cerr eok _ ->
|
||||
let walk xs x s' _ =
|
||||
unParser p s'
|
||||
(seq xs $ walk $ acc x xs) -- consumed-ok
|
||||
cerr -- consumed-err
|
||||
manyErr -- empty-ok
|
||||
(cok (acc x xs) s') -- empty-err
|
||||
in unParser p s (walk []) cerr manyErr (eok [] s)
|
||||
|
||||
manyErr :: forall t . t
|
||||
manyErr =
|
||||
error
|
||||
manyErr = error
|
||||
"Text.Megaparsec.Prim.many: combinator 'many' is applied to a parser \
|
||||
\that accepts an empty string."
|
||||
|
||||
@ -641,8 +632,8 @@ setParserState st = updateParserState (const st)
|
||||
-- | @updateParserState f@ applies function @f@ to the parser state.
|
||||
|
||||
updateParserState :: (State s u -> State s u) -> ParsecT s u m (State s u)
|
||||
updateParserState f =
|
||||
ParsecT $ \s _ _ eok _ -> let s' = f s in eok s' s' $ unknownError s'
|
||||
updateParserState f = ParsecT $ \s _ _ eok _ ->
|
||||
let s' = f s in eok s' s' $ unknownError s'
|
||||
|
||||
-- User state combinators
|
||||
|
||||
|
@ -16,10 +16,10 @@ module Text.Megaparsec.ShowToken (ShowToken (..)) where
|
||||
-- instances are defined, but you can add your own, of course.
|
||||
|
||||
class Show a => ShowToken a where
|
||||
showToken :: a -> String
|
||||
showToken :: a -> String
|
||||
|
||||
instance ShowToken Char where
|
||||
showToken = prettyChar
|
||||
showToken = prettyChar
|
||||
|
||||
-- | @prettyChar ch@ returns user-friendly string representation of given
|
||||
-- character @ch@, suitable for using in error messages, for example.
|
||||
@ -37,7 +37,7 @@ prettyChar ' ' = "space"
|
||||
prettyChar x = "'" ++ [x] ++ "'"
|
||||
|
||||
instance ShowToken String where
|
||||
showToken = prettyString
|
||||
showToken = prettyString
|
||||
|
||||
-- | @prettyString s@ returns pretty representation of string @s@. This is
|
||||
-- used when printing string tokens in error messages.
|
||||
|
@ -11,9 +11,9 @@
|
||||
-- Make Strings an instance of 'Stream' with 'Char' token type.
|
||||
|
||||
module Text.Megaparsec.String
|
||||
( Parser
|
||||
, GenParser
|
||||
, parseFromFile )
|
||||
( Parser
|
||||
, GenParser
|
||||
, parseFromFile )
|
||||
where
|
||||
|
||||
import Text.Megaparsec.Error
|
||||
|
@ -11,9 +11,9 @@
|
||||
-- Convenience definitions for working with 'Text.Text'.
|
||||
|
||||
module Text.Megaparsec.Text
|
||||
( Parser
|
||||
, GenParser
|
||||
, parseFromFile )
|
||||
( Parser
|
||||
, GenParser
|
||||
, parseFromFile )
|
||||
where
|
||||
|
||||
import Text.Megaparsec.Error
|
||||
|
@ -11,9 +11,9 @@
|
||||
-- Convenience definitions for working with lazy 'Text.Text'.
|
||||
|
||||
module Text.Megaparsec.Text.Lazy
|
||||
( Parser
|
||||
, GenParser
|
||||
, parseFromFile )
|
||||
( Parser
|
||||
, GenParser
|
||||
, parseFromFile )
|
||||
where
|
||||
|
||||
import Text.Megaparsec.Error
|
||||
|
@ -15,9 +15,9 @@
|
||||
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
|
||||
|
||||
module Text.Megaparsec.Token
|
||||
( LanguageDef (..)
|
||||
, TokenParser (..)
|
||||
, makeTokenParser )
|
||||
( LanguageDef (..)
|
||||
, TokenParser (..)
|
||||
, makeTokenParser )
|
||||
where
|
||||
|
||||
import Control.Applicative ((<|>), many, some)
|
||||
@ -36,60 +36,60 @@ import Text.Megaparsec.Combinator
|
||||
-- "Text.Megaparsec.Language" contains some default definitions.
|
||||
|
||||
data LanguageDef s u m =
|
||||
LanguageDef {
|
||||
LanguageDef {
|
||||
|
||||
-- | Describes the start of a block comment. Use the empty string if the
|
||||
-- language doesn't support block comments.
|
||||
-- | Describes the start of a block comment. Use the empty string if the
|
||||
-- language doesn't support block comments.
|
||||
|
||||
commentStart :: String
|
||||
commentStart :: String
|
||||
|
||||
-- | Describes the end of a block comment. Use the empty string if the
|
||||
-- language doesn't support block comments.
|
||||
-- | Describes the end of a block comment. Use the empty string if the
|
||||
-- language doesn't support block comments.
|
||||
|
||||
, commentEnd :: String
|
||||
, commentEnd :: String
|
||||
|
||||
-- | Describes the start of a line comment. Use the empty string if the
|
||||
-- language doesn't support line comments.
|
||||
-- | Describes the start of a line comment. Use the empty string if the
|
||||
-- language doesn't support line comments.
|
||||
|
||||
, commentLine :: String
|
||||
, commentLine :: String
|
||||
|
||||
-- | Set to 'True' if the language supports nested block comments.
|
||||
-- | Set to 'True' if the language supports nested block comments.
|
||||
|
||||
, nestedComments :: Bool
|
||||
, nestedComments :: Bool
|
||||
|
||||
-- | This parser should accept any start characters of identifiers, for
|
||||
-- example @letter \<|> char \'_\'@.
|
||||
-- | This parser should accept any start characters of identifiers, for
|
||||
-- example @letter \<|> char \'_\'@.
|
||||
|
||||
, identStart :: ParsecT s u m Char
|
||||
, identStart :: ParsecT s u m Char
|
||||
|
||||
-- | This parser should accept any legal tail characters of identifiers,
|
||||
-- for example @alphaNum \<|> char \'_\'@.
|
||||
-- | This parser should accept any legal tail characters of identifiers,
|
||||
-- for example @alphaNum \<|> char \'_\'@.
|
||||
|
||||
, identLetter :: ParsecT s u m Char
|
||||
, identLetter :: ParsecT s u m Char
|
||||
|
||||
-- | This parser should accept any start characters of operators, for
|
||||
-- example @oneOf \":!#$%&*+.\/\<=>?\@\\\\^|-~\"@
|
||||
-- | This parser should accept any start characters of operators, for
|
||||
-- example @oneOf \":!#$%&*+.\/\<=>?\@\\\\^|-~\"@
|
||||
|
||||
, opStart :: ParsecT s u m Char
|
||||
, opStart :: ParsecT s u m Char
|
||||
|
||||
-- | This parser should accept any legal tail characters of operators.
|
||||
-- Note that this parser should even be defined if the language doesn't
|
||||
-- support user-defined operators, or otherwise the 'reservedOp' parser
|
||||
-- won't work correctly.
|
||||
-- | This parser should accept any legal tail characters of operators.
|
||||
-- Note that this parser should even be defined if the language doesn't
|
||||
-- support user-defined operators, or otherwise the 'reservedOp' parser
|
||||
-- won't work correctly.
|
||||
|
||||
, opLetter :: ParsecT s u m Char
|
||||
, opLetter :: ParsecT s u m Char
|
||||
|
||||
-- | The list of reserved identifiers.
|
||||
-- | The list of reserved identifiers.
|
||||
|
||||
, reservedNames :: [String]
|
||||
, reservedNames :: [String]
|
||||
|
||||
-- | The list of reserved operators.
|
||||
-- | The list of reserved operators.
|
||||
|
||||
, reservedOpNames :: [String]
|
||||
, reservedOpNames :: [String]
|
||||
|
||||
-- | Set to 'True' if the language is case sensitive.
|
||||
-- | Set to 'True' if the language is case sensitive.
|
||||
|
||||
, caseSensitive :: Bool }
|
||||
, caseSensitive :: Bool }
|
||||
|
||||
-- Token parser
|
||||
|
||||
@ -97,189 +97,188 @@ data LanguageDef s u m =
|
||||
-- @s@ streams with state @u@ over a monad @m@.
|
||||
|
||||
data TokenParser s u m =
|
||||
TokenParser {
|
||||
TokenParser {
|
||||
|
||||
-- | The lexeme parser parses a legal identifier. Returns the identifier
|
||||
-- string. This parser will fail on identifiers that are reserved
|
||||
-- words. Legal identifier (start) characters and reserved words are
|
||||
-- defined in the 'LanguageDef' that is passed to 'makeTokenParser'.
|
||||
-- | The lexeme parser parses a legal identifier. Returns the identifier
|
||||
-- string. This parser will fail on identifiers that are reserved
|
||||
-- words. Legal identifier (start) characters and reserved words are
|
||||
-- defined in the 'LanguageDef' that is passed to 'makeTokenParser'.
|
||||
|
||||
identifier :: ParsecT s u m String
|
||||
identifier :: ParsecT s u m String
|
||||
|
||||
-- | The lexeme parser @reserved name@ parses @symbol name@, but it also
|
||||
-- checks that the @name@ is not a prefix of a valid identifier.
|
||||
-- | The lexeme parser @reserved name@ parses @symbol name@, but it also
|
||||
-- checks that the @name@ is not a prefix of a valid identifier.
|
||||
|
||||
, reserved :: String -> ParsecT s u m ()
|
||||
, reserved :: String -> ParsecT s u m ()
|
||||
|
||||
-- | The lexeme parser parses a legal operator. Returns the name of the
|
||||
-- operator. This parser will fail on any operators that are reserved
|
||||
-- operators. Legal operator (start) characters and reserved operators
|
||||
-- are defined in the 'LanguageDef' that is passed to
|
||||
-- 'makeTokenParser'.
|
||||
-- | The lexeme parser parses a legal operator. Returns the name of the
|
||||
-- operator. This parser will fail on any operators that are reserved
|
||||
-- operators. Legal operator (start) characters and reserved operators are
|
||||
-- defined in the 'LanguageDef' that is passed to 'makeTokenParser'.
|
||||
|
||||
, operator :: ParsecT s u m String
|
||||
, operator :: ParsecT s u m String
|
||||
|
||||
-- | The lexeme parser @reservedOp name@ parses @symbol name@, but it
|
||||
-- also checks that the @name@ is not a prefix of a valid operator.
|
||||
-- | The lexeme parser @reservedOp name@ parses @symbol name@, but it
|
||||
-- also checks that the @name@ is not a prefix of a valid operator.
|
||||
|
||||
, reservedOp :: String -> ParsecT s u m ()
|
||||
, reservedOp :: String -> ParsecT s u m ()
|
||||
|
||||
-- | The lexeme parser parses a single literal character. Returns the
|
||||
-- literal character value. This parsers deals correctly with escape
|
||||
-- sequences. The literal character is parsed according to the grammar
|
||||
-- rules defined in the Haskell report (which matches most programming
|
||||
-- languages quite closely).
|
||||
-- | The lexeme parser parses a single literal character. Returns the
|
||||
-- literal character value. This parsers deals correctly with escape
|
||||
-- sequences. The literal character is parsed according to the grammar
|
||||
-- rules defined in the Haskell report (which matches most programming
|
||||
-- languages quite closely).
|
||||
|
||||
, charLiteral :: ParsecT s u m Char
|
||||
, charLiteral :: ParsecT s u m Char
|
||||
|
||||
-- | The lexeme parser parses a literal string. Returns the literal
|
||||
-- string value. This parsers deals correctly with escape sequences and
|
||||
-- gaps. The literal string is parsed according to the grammar rules
|
||||
-- defined in the Haskell report (which matches most programming
|
||||
-- languages quite closely).
|
||||
-- | The lexeme parser parses a literal string. Returns the literal
|
||||
-- string value. This parsers deals correctly with escape sequences and
|
||||
-- gaps. The literal string is parsed according to the grammar rules
|
||||
-- defined in the Haskell report (which matches most programming languages
|
||||
-- quite closely).
|
||||
|
||||
, stringLiteral :: ParsecT s u m String
|
||||
, stringLiteral :: ParsecT s u m String
|
||||
|
||||
-- | The lexeme parser parses an integer (a whole number). This parser
|
||||
-- /does not/ parse sign. Returns the value of the number. The number
|
||||
-- can be specified in 'decimal', 'hexadecimal' or 'octal'. The number
|
||||
-- is parsed according to the grammar rules in the Haskell report.
|
||||
-- | The lexeme parser parses an integer (a whole number). This parser
|
||||
-- /does not/ parse sign. Returns the value of the number. The number can
|
||||
-- be specified in 'decimal', 'hexadecimal' or 'octal'. The number is
|
||||
-- parsed according to the grammar rules in the Haskell report.
|
||||
|
||||
, integer :: ParsecT s u m Integer
|
||||
, integer :: ParsecT s u m Integer
|
||||
|
||||
-- | This is just like 'integer', except it can parse sign.
|
||||
-- | This is just like 'integer', except it can parse sign.
|
||||
|
||||
, integer' :: ParsecT s u m Integer
|
||||
, integer' :: ParsecT s u m Integer
|
||||
|
||||
-- | The lexeme parses a positive whole number in the decimal system.
|
||||
-- Returns the value of the number.
|
||||
-- | The lexeme parses a positive whole number in the decimal system.
|
||||
-- Returns the value of the number.
|
||||
|
||||
, decimal :: ParsecT s u m Integer
|
||||
, decimal :: ParsecT s u m Integer
|
||||
|
||||
-- | The lexeme parses a positive whole number in the hexadecimal
|
||||
-- system. The number should be prefixed with “0x” or “0X”. Returns the
|
||||
-- value of the number.
|
||||
-- | The lexeme parses a positive whole number in the hexadecimal
|
||||
-- system. The number should be prefixed with “0x” or “0X”. Returns the
|
||||
-- value of the number.
|
||||
|
||||
, hexadecimal :: ParsecT s u m Integer
|
||||
, hexadecimal :: ParsecT s u m Integer
|
||||
|
||||
-- | The lexeme parses a positive whole number in the octal system.
|
||||
-- The number should be prefixed with “0o” or “0O”. Returns the value of
|
||||
-- the number.
|
||||
-- | The lexeme parses a positive whole number in the octal system.
|
||||
-- The number should be prefixed with “0o” or “0O”. Returns the value of
|
||||
-- the number.
|
||||
|
||||
, octal :: ParsecT s u m Integer
|
||||
, octal :: ParsecT s u m Integer
|
||||
|
||||
-- | @signed p@ tries to parse sign (i.e. “+”, “-”, or nothing) and
|
||||
-- then runs parser @p@, changing sign of its result accordingly. Note
|
||||
-- that there may be white space after the sign but not before it.
|
||||
-- | @signed p@ tries to parse sign (i.e. “+”, “-”, or nothing) and
|
||||
-- then runs parser @p@, changing sign of its result accordingly. Note
|
||||
-- that there may be white space after the sign but not before it.
|
||||
|
||||
, signed :: forall a . Num a => ParsecT s u m a -> ParsecT s u m a
|
||||
, signed :: forall a . Num a => ParsecT s u m a -> ParsecT s u m a
|
||||
|
||||
-- | The lexeme parser parses a floating point value. Returns the value
|
||||
-- of the number. The number is parsed according to the grammar rules
|
||||
-- defined in the Haskell report, sign is /not/ parsed, use 'float\'' to
|
||||
-- achieve parsing of signed floating point values.
|
||||
-- | The lexeme parser parses a floating point value. Returns the value
|
||||
-- of the number. The number is parsed according to the grammar rules
|
||||
-- defined in the Haskell report, sign is /not/ parsed, use 'float'' to
|
||||
-- achieve parsing of signed floating point values.
|
||||
|
||||
, float :: ParsecT s u m Double
|
||||
, float :: ParsecT s u m Double
|
||||
|
||||
-- | This is just like 'float', except it can parse sign.
|
||||
-- | This is just like 'float', except it can parse sign.
|
||||
|
||||
, float' :: ParsecT s u m Double
|
||||
, float' :: ParsecT s u m Double
|
||||
|
||||
-- | The lexeme parser parses either 'integer' or a 'float'.
|
||||
-- Returns the value of the number. This parser deals with any overlap
|
||||
-- in the grammar rules for integers and floats. The number is parsed
|
||||
-- according to the grammar rules defined in the Haskell report.
|
||||
-- | The lexeme parser parses either 'integer' or a 'float'.
|
||||
-- Returns the value of the number. This parser deals with any overlap in
|
||||
-- the grammar rules for integers and floats. The number is parsed
|
||||
-- according to the grammar rules defined in the Haskell report.
|
||||
|
||||
, number :: ParsecT s u m (Either Integer Double)
|
||||
, number :: ParsecT s u m (Either Integer Double)
|
||||
|
||||
-- | This is just like 'number', except it can parse sign.
|
||||
-- | This is just like 'number', except it can parse sign.
|
||||
|
||||
, number' :: ParsecT s u m (Either Integer Double)
|
||||
, number' :: ParsecT s u m (Either Integer Double)
|
||||
|
||||
-- | Lexeme parser @symbol s@ parses 'string' @s@ and skips
|
||||
-- trailing white space.
|
||||
-- | Lexeme parser @symbol s@ parses 'string' @s@ and skips
|
||||
-- trailing white space.
|
||||
|
||||
, symbol :: String -> ParsecT s u m String
|
||||
, symbol :: String -> ParsecT s u m String
|
||||
|
||||
-- | @lexeme p@ first applies parser @p@ and than the 'whiteSpace'
|
||||
-- parser, returning the value of @p@. Every lexical token (lexeme) is
|
||||
-- defined using @lexeme@, this way every parse starts at a point
|
||||
-- without white space. Parsers that use @lexeme@ are called /lexeme/
|
||||
-- parsers in this document.
|
||||
--
|
||||
-- The only point where the 'whiteSpace' parser should be called
|
||||
-- explicitly is the start of the main parser in order to skip any
|
||||
-- leading white space.
|
||||
-- | @lexeme p@ first applies parser @p@ and than the 'whiteSpace'
|
||||
-- parser, returning the value of @p@. Every lexical token (lexeme) is
|
||||
-- defined using @lexeme@, this way every parse starts at a point without
|
||||
-- white space. Parsers that use @lexeme@ are called /lexeme/ parsers in
|
||||
-- this document.
|
||||
--
|
||||
-- The only point where the 'whiteSpace' parser should be called
|
||||
-- explicitly is the start of the main parser in order to skip any leading
|
||||
-- white space.
|
||||
|
||||
, lexeme :: forall a. ParsecT s u m a -> ParsecT s u m a
|
||||
, lexeme :: forall a. ParsecT s u m a -> ParsecT s u m a
|
||||
|
||||
-- | Parses any white space. White space consists of /zero/ or more
|
||||
-- occurrences of a 'space', a line comment or a block (multi line)
|
||||
-- comment. Block comments may be nested. How comments are started and
|
||||
-- ended is defined in the 'LanguageDef' that is passed to
|
||||
-- 'makeTokenParser'.
|
||||
-- | Parses any white space. White space consists of /zero/ or more
|
||||
-- occurrences of a 'space', a line comment or a block (multi line)
|
||||
-- comment. Block comments may be nested. How comments are started and
|
||||
-- ended is defined in the 'LanguageDef' that is passed to
|
||||
-- 'makeTokenParser'.
|
||||
|
||||
, whiteSpace :: ParsecT s u m ()
|
||||
, whiteSpace :: ParsecT s u m ()
|
||||
|
||||
-- | Lexeme parser @parens p@ parses @p@ enclosed in parenthesis,
|
||||
-- returning the value of @p@.
|
||||
-- | Lexeme parser @parens p@ parses @p@ enclosed in parenthesis,
|
||||
-- returning the value of @p@.
|
||||
|
||||
, parens :: forall a. ParsecT s u m a -> ParsecT s u m a
|
||||
, parens :: forall a. ParsecT s u m a -> ParsecT s u m a
|
||||
|
||||
-- | Lexeme parser @braces p@ parses @p@ enclosed in braces (“{” and
|
||||
-- “}”), returning the value of @p@.
|
||||
-- | Lexeme parser @braces p@ parses @p@ enclosed in braces (“{” and
|
||||
-- “}”), returning the value of @p@.
|
||||
|
||||
, braces :: forall a. ParsecT s u m a -> ParsecT s u m a
|
||||
, braces :: forall a. ParsecT s u m a -> ParsecT s u m a
|
||||
|
||||
-- | Lexeme parser @angles p@ parses @p@ enclosed in angle brackets (“\<”
|
||||
-- and “>”), returning the value of @p@.
|
||||
-- | Lexeme parser @angles p@ parses @p@ enclosed in angle brackets (“\<”
|
||||
-- and “>”), returning the value of @p@.
|
||||
|
||||
, angles :: forall a. ParsecT s u m a -> ParsecT s u m a
|
||||
, angles :: forall a. ParsecT s u m a -> ParsecT s u m a
|
||||
|
||||
-- | Lexeme parser @brackets p@ parses @p@ enclosed in brackets (“[”
|
||||
-- and “]”), returning the value of @p@.
|
||||
-- | Lexeme parser @brackets p@ parses @p@ enclosed in brackets (“[”
|
||||
-- and “]”), returning the value of @p@.
|
||||
|
||||
, brackets :: forall a. ParsecT s u m a -> ParsecT s u m a
|
||||
, brackets :: forall a. ParsecT s u m a -> ParsecT s u m a
|
||||
|
||||
-- | Lexeme parser @semicolon@ parses the character “;” and skips any
|
||||
-- trailing white space. Returns the string “;”.
|
||||
-- | Lexeme parser @semicolon@ parses the character “;” and skips any
|
||||
-- trailing white space. Returns the string “;”.
|
||||
|
||||
, semicolon :: ParsecT s u m String
|
||||
, semicolon :: ParsecT s u m String
|
||||
|
||||
-- | Lexeme parser @comma@ parses the character “,” and skips any
|
||||
-- trailing white space. Returns the string “,”.
|
||||
-- | Lexeme parser @comma@ parses the character “,” and skips any
|
||||
-- trailing white space. Returns the string “,”.
|
||||
|
||||
, comma :: ParsecT s u m String
|
||||
, comma :: ParsecT s u m String
|
||||
|
||||
-- | Lexeme parser @colon@ parses the character “:” and skips any
|
||||
-- trailing white space. Returns the string “:”.
|
||||
-- | Lexeme parser @colon@ parses the character “:” and skips any
|
||||
-- trailing white space. Returns the string “:”.
|
||||
|
||||
, colon :: ParsecT s u m String
|
||||
, colon :: ParsecT s u m String
|
||||
|
||||
-- | Lexeme parser @dot@ parses the character “.” and skips any
|
||||
-- trailing white space. Returns the string “.”.
|
||||
-- | Lexeme parser @dot@ parses the character “.” and skips any
|
||||
-- trailing white space. Returns the string “.”.
|
||||
|
||||
, dot :: ParsecT s u m String
|
||||
, dot :: ParsecT s u m String
|
||||
|
||||
-- | Lexeme parser @semiSep p@ parses /zero/ or more occurrences of @p@
|
||||
-- separated by 'semicolon'. Returns a list of values returned by @p@.
|
||||
-- | Lexeme parser @semiSep p@ parses /zero/ or more occurrences of @p@
|
||||
-- separated by 'semicolon'. Returns a list of values returned by @p@.
|
||||
|
||||
, semicolonSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]
|
||||
, semicolonSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]
|
||||
|
||||
-- | Lexeme parser @semiSep1 p@ parses /one/ or more occurrences of @p@
|
||||
-- separated by 'semi'. Returns a list of values returned by @p@.
|
||||
-- | Lexeme parser @semiSep1 p@ parses /one/ or more occurrences of @p@
|
||||
-- separated by 'semi'. Returns a list of values returned by @p@.
|
||||
|
||||
, semicolonSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a]
|
||||
, semicolonSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a]
|
||||
|
||||
-- | Lexeme parser @commaSep p@ parses /zero/ or more occurrences of
|
||||
-- @p@ separated by 'comma'. Returns a list of values returned by @p@.
|
||||
-- | Lexeme parser @commaSep p@ parses /zero/ or more occurrences of
|
||||
-- @p@ separated by 'comma'. Returns a list of values returned by @p@.
|
||||
|
||||
, commaSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]
|
||||
, commaSep :: forall a . ParsecT s u m a -> ParsecT s u m [a]
|
||||
|
||||
-- | Lexeme parser @commaSep1 p@ parses /one/ or more occurrences of
|
||||
-- @p@ separated by 'comma'. Returns a list of values returned by @p@.
|
||||
-- | Lexeme parser @commaSep1 p@ parses /one/ or more occurrences of
|
||||
-- @p@ separated by 'comma'. Returns a list of values returned by @p@.
|
||||
|
||||
, commaSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a] }
|
||||
, commaSep1 :: forall a . ParsecT s u m a -> ParsecT s u m [a] }
|
||||
|
||||
-- Given a LanguageDef, create a token parser
|
||||
|
||||
@ -315,272 +314,272 @@ data TokenParser s u m =
|
||||
|
||||
makeTokenParser :: Stream s m Char => LanguageDef s u m -> TokenParser s u m
|
||||
makeTokenParser languageDef =
|
||||
TokenParser
|
||||
{ identifier = identifier
|
||||
, reserved = reserved
|
||||
, operator = operator
|
||||
, reservedOp = reservedOp
|
||||
TokenParser
|
||||
{ identifier = identifier
|
||||
, reserved = reserved
|
||||
, operator = operator
|
||||
, reservedOp = reservedOp
|
||||
|
||||
, charLiteral = charLiteral
|
||||
, stringLiteral = stringLiteral
|
||||
, charLiteral = charLiteral
|
||||
, stringLiteral = stringLiteral
|
||||
|
||||
, integer = integer
|
||||
, integer' = integer'
|
||||
, decimal = decimal
|
||||
, hexadecimal = hexadecimal
|
||||
, octal = octal
|
||||
, signed = signed
|
||||
, float = float
|
||||
, float' = float'
|
||||
, number = number
|
||||
, number' = number'
|
||||
, integer = integer
|
||||
, integer' = integer'
|
||||
, decimal = decimal
|
||||
, hexadecimal = hexadecimal
|
||||
, octal = octal
|
||||
, signed = signed
|
||||
, float = float
|
||||
, float' = float'
|
||||
, number = number
|
||||
, number' = number'
|
||||
|
||||
, symbol = symbol
|
||||
, lexeme = lexeme
|
||||
, whiteSpace = whiteSpace
|
||||
, symbol = symbol
|
||||
, lexeme = lexeme
|
||||
, whiteSpace = whiteSpace
|
||||
|
||||
, parens = parens
|
||||
, braces = braces
|
||||
, angles = angles
|
||||
, brackets = brackets
|
||||
, semicolon = semicolon
|
||||
, comma = comma
|
||||
, colon = colon
|
||||
, dot = dot
|
||||
, semicolonSep = semicolonSep
|
||||
, semicolonSep1 = semicolonSep1
|
||||
, commaSep = commaSep
|
||||
, commaSep1 = commaSep1 }
|
||||
, parens = parens
|
||||
, braces = braces
|
||||
, angles = angles
|
||||
, brackets = brackets
|
||||
, semicolon = semicolon
|
||||
, comma = comma
|
||||
, colon = colon
|
||||
, dot = dot
|
||||
, semicolonSep = semicolonSep
|
||||
, semicolonSep1 = semicolonSep1
|
||||
, commaSep = commaSep
|
||||
, commaSep1 = commaSep1 }
|
||||
where
|
||||
|
||||
-- bracketing
|
||||
|
||||
parens = between (symbol "(") (symbol ")")
|
||||
braces = between (symbol "{") (symbol "}")
|
||||
angles = between (symbol "<") (symbol ">")
|
||||
brackets = between (symbol "[") (symbol "]")
|
||||
|
||||
semicolon = symbol ";"
|
||||
comma = symbol ","
|
||||
dot = symbol "."
|
||||
colon = symbol ":"
|
||||
|
||||
commaSep = (`sepBy` comma)
|
||||
semicolonSep = (`sepBy` semicolon)
|
||||
|
||||
commaSep1 = (`sepBy1` comma)
|
||||
semicolonSep1 = (`sepBy1` semicolon)
|
||||
|
||||
-- chars & strings
|
||||
|
||||
charLiteral = lexeme ( between (char '\'')
|
||||
(char '\'' <?> "end of character")
|
||||
characterChar )
|
||||
<?> "character"
|
||||
|
||||
characterChar = charLetter <|> charEscape <?> "literal character"
|
||||
|
||||
charEscape = char '\\' >> escapeCode
|
||||
charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
|
||||
|
||||
stringLiteral =
|
||||
lexeme ((foldr (maybe id (:)) "" <$>
|
||||
between (char '"') (char '"' <?> "end of string")
|
||||
(many stringChar)) <?> "literal string")
|
||||
|
||||
stringChar = (Just <$> stringLetter) <|> stringEscape <?> "string character"
|
||||
|
||||
stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))
|
||||
|
||||
stringEscape = char '\\' >>
|
||||
( (escapeGap >> return Nothing) <|>
|
||||
(escapeEmpty >> return Nothing) <|>
|
||||
(Just <$> escapeCode) )
|
||||
|
||||
escapeEmpty = char '&'
|
||||
escapeGap = some spaceChar >> char '\\' <?> "end of string gap"
|
||||
|
||||
-- escape codes
|
||||
|
||||
escapeCode = charEsc <|> charNum <|> charAscii <|> charControl
|
||||
<?> "escape code"
|
||||
|
||||
charEsc = choice (parseEsc <$> escMap)
|
||||
where parseEsc (c, code) = char c >> return code
|
||||
|
||||
charNum = toEnum . fromInteger <$>
|
||||
( decimal <|>
|
||||
(char 'o' >> nump "0o" octDigitChar) <|>
|
||||
(char 'x' >> nump "0x" hexDigitChar) )
|
||||
|
||||
charAscii = choice (parseAscii <$> asciiMap)
|
||||
where parseAscii (asc, code) = try (string asc >> return code)
|
||||
|
||||
charControl = toEnum . subtract 64 . fromEnum <$> (char '^' >> upperChar)
|
||||
|
||||
-- escape code tables
|
||||
|
||||
escMap = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
|
||||
asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
|
||||
|
||||
ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
|
||||
"FS","GS","RS","US","SP"]
|
||||
ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
|
||||
"DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
|
||||
"CAN","SUB","ESC","DEL"]
|
||||
|
||||
ascii2 = "\b\t\n\v\f\r\SO\SI\EM\FS\GS\RS\US "
|
||||
ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\a\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL"
|
||||
|
||||
-- numbers — integers
|
||||
|
||||
integer = decimal <?> "integer"
|
||||
integer' = signed integer
|
||||
|
||||
decimal = lexeme $ nump "" digitChar
|
||||
hexadecimal = lexeme $ char '0' >> oneOf "xX" >> nump "0x" hexDigitChar
|
||||
octal = lexeme $ char '0' >> oneOf "oO" >> nump "0o" octDigitChar
|
||||
|
||||
nump prefix baseDigit = read . (prefix ++) <$> some baseDigit
|
||||
|
||||
signed p = ($) <$> option id (lexeme sign) <*> p
|
||||
|
||||
sign :: (Stream s m Char, Num a) => ParsecT s u m (a -> a)
|
||||
sign = (char '+' *> return id) <|> (char '-' *> return negate)
|
||||
|
||||
-- numbers — floats
|
||||
|
||||
float = lexeme ffloat <?> "float"
|
||||
float' = signed float
|
||||
|
||||
ffloat = read <$> ffloat'
|
||||
where
|
||||
ffloat' = do
|
||||
decimal <- fDec
|
||||
rest <- fraction <|> fExp
|
||||
return $ decimal ++ rest
|
||||
|
||||
-- bracketing
|
||||
fraction = do
|
||||
void $ char '.'
|
||||
decimal <- fDec
|
||||
exp <- option "" fExp
|
||||
return $ '.' : decimal ++ exp
|
||||
|
||||
parens = between (symbol "(") (symbol ")")
|
||||
braces = between (symbol "{") (symbol "}")
|
||||
angles = between (symbol "<") (symbol ">")
|
||||
brackets = between (symbol "[") (symbol "]")
|
||||
fDec = some digitChar
|
||||
|
||||
semicolon = symbol ";"
|
||||
comma = symbol ","
|
||||
dot = symbol "."
|
||||
colon = symbol ":"
|
||||
fExp = do
|
||||
expChar <- oneOf "eE"
|
||||
signStr <- option "" (pure <$> oneOf "+-")
|
||||
decimal <- fDec
|
||||
return $ expChar : signStr ++ decimal
|
||||
|
||||
commaSep = (`sepBy` comma)
|
||||
semicolonSep = (`sepBy` semicolon)
|
||||
-- numbers — a more general case
|
||||
|
||||
commaSep1 = (`sepBy1` comma)
|
||||
semicolonSep1 = (`sepBy1` semicolon)
|
||||
number = (Right <$> try float) <|> (Left <$> integer) <?> "number"
|
||||
number' = (Right <$> try float') <|> (Left <$> integer') <?> "number"
|
||||
|
||||
-- chars & strings
|
||||
-- operators & reserved ops
|
||||
|
||||
charLiteral = lexeme ( between (char '\'')
|
||||
(char '\'' <?> "end of character")
|
||||
characterChar )
|
||||
<?> "character"
|
||||
reservedOp name =
|
||||
lexeme $ try $ do
|
||||
void $ string name
|
||||
notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name)
|
||||
|
||||
characterChar = charLetter <|> charEscape <?> "literal character"
|
||||
operator =
|
||||
lexeme $ try $ do
|
||||
name <- oper
|
||||
if isReservedOp name
|
||||
then unexpected ("reserved operator " ++ show name)
|
||||
else return name
|
||||
|
||||
charEscape = char '\\' >> escapeCode
|
||||
charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
|
||||
oper = ((:) <$> opStart languageDef <*> many (opLetter languageDef))
|
||||
<?> "operator"
|
||||
|
||||
stringLiteral =
|
||||
lexeme ((foldr (maybe id (:)) "" <$>
|
||||
between (char '"') (char '"' <?> "end of string")
|
||||
(many stringChar)) <?> "literal string")
|
||||
isReservedOp = isReserved . sort $ reservedOpNames languageDef
|
||||
|
||||
stringChar = (Just <$> stringLetter) <|> stringEscape <?> "string character"
|
||||
-- identifiers & reserved words
|
||||
|
||||
stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))
|
||||
reserved name =
|
||||
lexeme $ try $ do
|
||||
void $ caseString name
|
||||
notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name)
|
||||
|
||||
stringEscape = char '\\' >>
|
||||
( (escapeGap >> return Nothing) <|>
|
||||
(escapeEmpty >> return Nothing) <|>
|
||||
(Just <$> escapeCode) )
|
||||
caseString name
|
||||
| caseSensitive languageDef = string name
|
||||
| otherwise = walk name >> return name
|
||||
where walk = foldr (\c -> ((caseChar c <?> show name) >>)) (return ())
|
||||
caseChar c
|
||||
| isAlpha c = char (toLower c) <|> char (toUpper c)
|
||||
| otherwise = char c
|
||||
|
||||
escapeEmpty = char '&'
|
||||
escapeGap = some spaceChar >> char '\\' <?> "end of string gap"
|
||||
identifier =
|
||||
lexeme $ try $ do
|
||||
name <- ident
|
||||
if isReservedName name
|
||||
then unexpected ("reserved word " ++ show name)
|
||||
else return name
|
||||
|
||||
-- escape codes
|
||||
ident = ((:) <$> identStart languageDef <*> many (identLetter languageDef))
|
||||
<?> "identifier"
|
||||
|
||||
escapeCode = charEsc <|> charNum <|> charAscii <|> charControl
|
||||
<?> "escape code"
|
||||
isReservedName name = isReserved theReservedNames caseName
|
||||
where caseName
|
||||
| caseSensitive languageDef = name
|
||||
| otherwise = toLower <$> name
|
||||
|
||||
charEsc = choice (parseEsc <$> escMap)
|
||||
where parseEsc (c, code) = char c >> return code
|
||||
isReserved names name = scan names
|
||||
where scan [] = False
|
||||
scan (r:rs) = case compare r name of
|
||||
LT -> scan rs
|
||||
EQ -> True
|
||||
GT -> False
|
||||
|
||||
charNum = toEnum . fromInteger <$>
|
||||
( decimal <|>
|
||||
(char 'o' >> nump "0o" octDigitChar) <|>
|
||||
(char 'x' >> nump "0x" hexDigitChar) )
|
||||
theReservedNames
|
||||
| caseSensitive languageDef = sort reserved
|
||||
| otherwise = sort . fmap (fmap toLower) $ reserved
|
||||
where reserved = reservedNames languageDef
|
||||
|
||||
charAscii = choice (parseAscii <$> asciiMap)
|
||||
where parseAscii (asc, code) = try (string asc >> return code)
|
||||
-- white space & symbols
|
||||
|
||||
charControl = toEnum . subtract 64 . fromEnum <$> (char '^' >> upperChar)
|
||||
symbol = lexeme . string
|
||||
|
||||
-- escape code tables
|
||||
lexeme p = p <* whiteSpace
|
||||
|
||||
escMap = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
|
||||
asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
|
||||
|
||||
ascii2codes = ["BS","HT","LF","VT","FF","CR","SO","SI","EM",
|
||||
"FS","GS","RS","US","SP"]
|
||||
ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK","BEL",
|
||||
"DLE","DC1","DC2","DC3","DC4","NAK","SYN","ETB",
|
||||
"CAN","SUB","ESC","DEL"]
|
||||
|
||||
ascii2 = "\b\t\n\v\f\r\SO\SI\EM\FS\GS\RS\US "
|
||||
ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\a\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL"
|
||||
|
||||
-- numbers — integers
|
||||
|
||||
integer = decimal <?> "integer"
|
||||
integer' = signed integer
|
||||
|
||||
decimal = lexeme $ nump "" digitChar
|
||||
hexadecimal = lexeme $ char '0' >> oneOf "xX" >> nump "0x" hexDigitChar
|
||||
octal = lexeme $ char '0' >> oneOf "oO" >> nump "0o" octDigitChar
|
||||
|
||||
nump prefix baseDigit = read . (prefix ++) <$> some baseDigit
|
||||
|
||||
signed p = ($) <$> option id (lexeme sign) <*> p
|
||||
|
||||
sign :: (Stream s m Char, Num a) => ParsecT s u m (a -> a)
|
||||
sign = (char '+' *> return id) <|> (char '-' *> return negate)
|
||||
|
||||
-- numbers — floats
|
||||
|
||||
float = lexeme ffloat <?> "float"
|
||||
float' = signed float
|
||||
|
||||
ffloat = read <$> ffloat'
|
||||
whiteSpace
|
||||
| noLine && noMulti = skipMany (simpleSpace <?> "")
|
||||
| noLine = skipMany (simpleSpace <|>
|
||||
multiLineComment <?> "")
|
||||
| noMulti = skipMany (simpleSpace <|>
|
||||
oneLineComment <?> "")
|
||||
| otherwise = skipMany (simpleSpace <|>
|
||||
oneLineComment <|>
|
||||
multiLineComment <?> "")
|
||||
where
|
||||
ffloat' = do
|
||||
decimal <- fDec
|
||||
rest <- fraction <|> fExp
|
||||
return $ decimal ++ rest
|
||||
noLine = null (commentLine languageDef)
|
||||
noMulti = null (commentStart languageDef)
|
||||
|
||||
fraction = do
|
||||
void $ char '.'
|
||||
decimal <- fDec
|
||||
exp <- option "" fExp
|
||||
return $ '.' : decimal ++ exp
|
||||
simpleSpace = skipSome spaceChar
|
||||
|
||||
fDec = some digitChar
|
||||
oneLineComment = void (try (string (commentLine languageDef))
|
||||
>> skipMany (satisfy (/= '\n')))
|
||||
|
||||
fExp = do
|
||||
expChar <- oneOf "eE"
|
||||
signStr <- option "" (pure <$> oneOf "+-")
|
||||
decimal <- fDec
|
||||
return $ expChar : signStr ++ decimal
|
||||
multiLineComment = try (string (commentStart languageDef)) >> inComment
|
||||
|
||||
-- numbers — a more general case
|
||||
inComment = if nestedComments languageDef
|
||||
then inCommentMulti
|
||||
else inCommentSingle
|
||||
|
||||
number = (Right <$> try float) <|> (Left <$> integer) <?> "number"
|
||||
number' = (Right <$> try float') <|> (Left <$> integer') <?> "number"
|
||||
inCommentMulti
|
||||
= void (try . string $ commentEnd languageDef)
|
||||
<|> (multiLineComment >> inCommentMulti)
|
||||
<|> (skipSome (noneOf startEnd) >> inCommentMulti)
|
||||
<|> (oneOf startEnd >> inCommentMulti)
|
||||
<?> "end of comment"
|
||||
|
||||
-- operators & reserved ops
|
||||
inCommentSingle
|
||||
= void (try . string $ commentEnd languageDef)
|
||||
<|> (skipSome (noneOf startEnd) >> inCommentSingle)
|
||||
<|> (oneOf startEnd >> inCommentSingle)
|
||||
<?> "end of comment"
|
||||
|
||||
reservedOp name =
|
||||
lexeme $ try $ do
|
||||
void $ string name
|
||||
notFollowedBy (opLetter languageDef) <?> ("end of " ++ show name)
|
||||
|
||||
operator =
|
||||
lexeme $ try $ do
|
||||
name <- oper
|
||||
if isReservedOp name
|
||||
then unexpected ("reserved operator " ++ show name)
|
||||
else return name
|
||||
|
||||
oper = ((:) <$> opStart languageDef <*> many (opLetter languageDef))
|
||||
<?> "operator"
|
||||
|
||||
isReservedOp = isReserved . sort $ reservedOpNames languageDef
|
||||
|
||||
-- identifiers & reserved words
|
||||
|
||||
reserved name =
|
||||
lexeme $ try $ do
|
||||
void $ caseString name
|
||||
notFollowedBy (identLetter languageDef) <?> ("end of " ++ show name)
|
||||
|
||||
caseString name
|
||||
| caseSensitive languageDef = string name
|
||||
| otherwise = walk name >> return name
|
||||
where walk = foldr (\c -> ((caseChar c <?> show name) >>)) (return ())
|
||||
caseChar c
|
||||
| isAlpha c = char (toLower c) <|> char (toUpper c)
|
||||
| otherwise = char c
|
||||
|
||||
identifier =
|
||||
lexeme $ try $ do
|
||||
name <- ident
|
||||
if isReservedName name
|
||||
then unexpected ("reserved word " ++ show name)
|
||||
else return name
|
||||
|
||||
ident = ((:) <$> identStart languageDef <*> many (identLetter languageDef))
|
||||
<?> "identifier"
|
||||
|
||||
isReservedName name = isReserved theReservedNames caseName
|
||||
where caseName
|
||||
| caseSensitive languageDef = name
|
||||
| otherwise = toLower <$> name
|
||||
|
||||
isReserved names name = scan names
|
||||
where scan [] = False
|
||||
scan (r:rs) = case compare r name of
|
||||
LT -> scan rs
|
||||
EQ -> True
|
||||
GT -> False
|
||||
|
||||
theReservedNames
|
||||
| caseSensitive languageDef = sort reserved
|
||||
| otherwise = sort . fmap (fmap toLower) $ reserved
|
||||
where reserved = reservedNames languageDef
|
||||
|
||||
-- white space & symbols
|
||||
|
||||
symbol = lexeme . string
|
||||
|
||||
lexeme p = p <* whiteSpace
|
||||
|
||||
whiteSpace
|
||||
| noLine && noMulti = skipMany (simpleSpace <?> "")
|
||||
| noLine = skipMany (simpleSpace <|>
|
||||
multiLineComment <?> "")
|
||||
| noMulti = skipMany (simpleSpace <|>
|
||||
oneLineComment <?> "")
|
||||
| otherwise = skipMany (simpleSpace <|>
|
||||
oneLineComment <|>
|
||||
multiLineComment <?> "")
|
||||
where
|
||||
noLine = null (commentLine languageDef)
|
||||
noMulti = null (commentStart languageDef)
|
||||
|
||||
simpleSpace = skipSome spaceChar
|
||||
|
||||
oneLineComment = void (try (string (commentLine languageDef))
|
||||
>> skipMany (satisfy (/= '\n')))
|
||||
|
||||
multiLineComment = try (string (commentStart languageDef)) >> inComment
|
||||
|
||||
inComment = if nestedComments languageDef
|
||||
then inCommentMulti
|
||||
else inCommentSingle
|
||||
|
||||
inCommentMulti
|
||||
= void (try . string $ commentEnd languageDef)
|
||||
<|> (multiLineComment >> inCommentMulti)
|
||||
<|> (skipSome (noneOf startEnd) >> inCommentMulti)
|
||||
<|> (oneOf startEnd >> inCommentMulti)
|
||||
<?> "end of comment"
|
||||
|
||||
inCommentSingle
|
||||
= void (try . string $ commentEnd languageDef)
|
||||
<|> (skipSome (noneOf startEnd) >> inCommentSingle)
|
||||
<|> (oneOf startEnd >> inCommentSingle)
|
||||
<?> "end of comment"
|
||||
|
||||
startEnd = nub $ (++) <$> commentEnd <*> commentStart $ languageDef
|
||||
startEnd = nub $ (++) <$> commentEnd <*> commentStart $ languageDef
|
||||
|
@ -75,36 +75,36 @@ tests = testGroup "Character parsers"
|
||||
|
||||
instance Arbitrary GeneralCategory where
|
||||
arbitrary = elements
|
||||
[ UppercaseLetter
|
||||
, LowercaseLetter
|
||||
, TitlecaseLetter
|
||||
, ModifierLetter
|
||||
, OtherLetter
|
||||
, NonSpacingMark
|
||||
, SpacingCombiningMark
|
||||
, EnclosingMark
|
||||
, DecimalNumber
|
||||
, LetterNumber
|
||||
, OtherNumber
|
||||
, ConnectorPunctuation
|
||||
, DashPunctuation
|
||||
, OpenPunctuation
|
||||
, ClosePunctuation
|
||||
, InitialQuote
|
||||
, FinalQuote
|
||||
, OtherPunctuation
|
||||
, MathSymbol
|
||||
, CurrencySymbol
|
||||
, ModifierSymbol
|
||||
, OtherSymbol
|
||||
, Space
|
||||
, LineSeparator
|
||||
, ParagraphSeparator
|
||||
, Control
|
||||
, Format
|
||||
, Surrogate
|
||||
, PrivateUse
|
||||
, NotAssigned ]
|
||||
[ UppercaseLetter
|
||||
, LowercaseLetter
|
||||
, TitlecaseLetter
|
||||
, ModifierLetter
|
||||
, OtherLetter
|
||||
, NonSpacingMark
|
||||
, SpacingCombiningMark
|
||||
, EnclosingMark
|
||||
, DecimalNumber
|
||||
, LetterNumber
|
||||
, OtherNumber
|
||||
, ConnectorPunctuation
|
||||
, DashPunctuation
|
||||
, OpenPunctuation
|
||||
, ClosePunctuation
|
||||
, InitialQuote
|
||||
, FinalQuote
|
||||
, OtherPunctuation
|
||||
, MathSymbol
|
||||
, CurrencySymbol
|
||||
, ModifierSymbol
|
||||
, OtherSymbol
|
||||
, Space
|
||||
, LineSeparator
|
||||
, ParagraphSeparator
|
||||
, Control
|
||||
, Format
|
||||
, Surrogate
|
||||
, PrivateUse
|
||||
, NotAssigned ]
|
||||
|
||||
prop_newline :: String -> Property
|
||||
prop_newline = checkChar newline (== '\n') (Just "newline")
|
||||
@ -122,15 +122,15 @@ prop_tab = checkChar tab (== '\t') (Just "tab")
|
||||
|
||||
prop_space :: String -> Property
|
||||
prop_space s = checkParser space r s
|
||||
where r = case findIndex (not . isSpace) s of
|
||||
Just x ->
|
||||
let ch = s !! x
|
||||
in posErr x s
|
||||
[ uneCh ch
|
||||
, uneCh ch
|
||||
, exSpec "white space"
|
||||
, exStr "" ]
|
||||
Nothing -> Right ()
|
||||
where r = case findIndex (not . isSpace) s of
|
||||
Just x ->
|
||||
let ch = s !! x
|
||||
in posErr x s
|
||||
[ uneCh ch
|
||||
, uneCh ch
|
||||
, exSpec "white space"
|
||||
, exStr "" ]
|
||||
Nothing -> Right ()
|
||||
|
||||
prop_controlChar :: String -> Property
|
||||
prop_controlChar = checkChar controlChar isControl (Just "control character")
|
||||
@ -148,8 +148,8 @@ prop_letterChar :: String -> Property
|
||||
prop_letterChar = checkChar letterChar isAlpha (Just "letter")
|
||||
|
||||
prop_alphaNumChar :: String -> Property
|
||||
prop_alphaNumChar = checkChar alphaNumChar isAlphaNum
|
||||
(Just "alphanumeric character")
|
||||
prop_alphaNumChar =
|
||||
checkChar alphaNumChar isAlphaNum (Just "alphanumeric character")
|
||||
|
||||
prop_printChar :: String -> Property
|
||||
prop_printChar = checkChar printChar isPrint (Just "printable character")
|
||||
@ -170,8 +170,8 @@ prop_numberChar :: String -> Property
|
||||
prop_numberChar = checkChar numberChar isNumber (Just "numeric character")
|
||||
|
||||
prop_punctuationChar :: String -> Property
|
||||
prop_punctuationChar = checkChar punctuationChar isPunctuation
|
||||
(Just "punctuation")
|
||||
prop_punctuationChar =
|
||||
checkChar punctuationChar isPunctuation (Just "punctuation")
|
||||
|
||||
prop_symbolChar :: String -> Property
|
||||
prop_symbolChar = checkChar symbolChar isSymbol (Just "symbol")
|
||||
|
@ -57,15 +57,15 @@ tests = testGroup "Parse errors"
|
||||
, testProperty "message components are visible" prop_visibleMsgs ]
|
||||
|
||||
instance Arbitrary Message where
|
||||
arbitrary = ($) <$> elements constructors <*> arbitrary
|
||||
where constructors = [Unexpected, Expected, Message]
|
||||
arbitrary = ($) <$> elements constructors <*> arbitrary
|
||||
where constructors = [Unexpected, Expected, Message]
|
||||
|
||||
instance Arbitrary ParseError where
|
||||
arbitrary = do
|
||||
ms <- listOf arbitrary
|
||||
pe <- oneof [ newErrorUnknown <$> arbitrary
|
||||
, newErrorMessage <$> arbitrary <*> arbitrary ]
|
||||
return $ foldr addErrorMessage pe ms
|
||||
arbitrary = do
|
||||
ms <- listOf arbitrary
|
||||
pe <- oneof [ newErrorUnknown <$> arbitrary
|
||||
, newErrorMessage <$> arbitrary <*> arbitrary ]
|
||||
return $ foldr addErrorMessage pe ms
|
||||
|
||||
prop_messageString :: Message -> Bool
|
||||
prop_messageString m@(Unexpected s) = s == messageString m
|
||||
@ -74,57 +74,57 @@ prop_messageString m@(Message s) = s == messageString m
|
||||
|
||||
prop_newErrorMessage :: Message -> SourcePos -> Bool
|
||||
prop_newErrorMessage msg pos = added && errorPos new == pos
|
||||
where new = newErrorMessage msg pos
|
||||
added = errorMessages new == bool [msg] [] (badMessage msg)
|
||||
where new = newErrorMessage msg pos
|
||||
added = errorMessages new == bool [msg] [] (badMessage msg)
|
||||
|
||||
prop_wellFormedMessages :: ParseError -> Bool
|
||||
prop_wellFormedMessages err = wellFormed $ errorMessages err
|
||||
|
||||
prop_parseErrorCopy :: ParseError -> Bool
|
||||
prop_parseErrorCopy err =
|
||||
foldr addErrorMessage (newErrorUnknown pos) messages == err
|
||||
where pos = errorPos err
|
||||
messages = errorMessages err
|
||||
foldr addErrorMessage (newErrorUnknown pos) msgs == err
|
||||
where pos = errorPos err
|
||||
msgs = errorMessages err
|
||||
|
||||
prop_setErrorPos :: SourcePos -> ParseError -> Bool
|
||||
prop_setErrorPos pos err =
|
||||
errorPos new == pos && errorMessages new == errorMessages err
|
||||
where new = setErrorPos pos err
|
||||
errorPos new == pos && errorMessages new == errorMessages err
|
||||
where new = setErrorPos pos err
|
||||
|
||||
prop_addErrorMessage :: Message -> ParseError -> Bool
|
||||
prop_addErrorMessage msg err =
|
||||
wellFormed msgs && (badMessage msg || added)
|
||||
where new = addErrorMessage msg err
|
||||
msgs = errorMessages new
|
||||
added = msg `elem` msgs && not (errorIsUnknown new)
|
||||
wellFormed msgs && (badMessage msg || added)
|
||||
where new = addErrorMessage msg err
|
||||
msgs = errorMessages new
|
||||
added = msg `elem` msgs && not (errorIsUnknown new)
|
||||
|
||||
prop_setErrorMessage :: Message -> ParseError -> Bool
|
||||
prop_setErrorMessage msg err =
|
||||
wellFormed msgs && (badMessage msg || (added && unique))
|
||||
where new = setErrorMessage msg err
|
||||
msgs = errorMessages new
|
||||
added = msg `elem` msgs && not (errorIsUnknown new)
|
||||
unique = length (filter (== fromEnum msg) (fromEnum <$> msgs)) == 1
|
||||
wellFormed msgs && (badMessage msg || (added && unique))
|
||||
where new = setErrorMessage msg err
|
||||
msgs = errorMessages new
|
||||
added = msg `elem` msgs && not (errorIsUnknown new)
|
||||
unique = length (filter (== fromEnum msg) (fromEnum <$> msgs)) == 1
|
||||
|
||||
prop_mergeErrorPos :: ParseError -> ParseError -> Bool
|
||||
prop_mergeErrorPos e1 e2 = errorPos (mergeError e1 e2) == min pos1 pos2
|
||||
where pos1 = errorPos e1
|
||||
pos2 = errorPos e2
|
||||
where pos1 = errorPos e1
|
||||
pos2 = errorPos e2
|
||||
|
||||
prop_mergeErrorMsgs :: ParseError -> ParseError -> Bool
|
||||
prop_mergeErrorMsgs e1 e2' = errorPos e1 /= errorPos e2 || wellFormed msgsm
|
||||
where e2 = setErrorPos (errorPos e1) e2'
|
||||
msgsm = errorMessages $ mergeError e1 e2
|
||||
where e2 = setErrorPos (errorPos e1) e2'
|
||||
msgsm = errorMessages $ mergeError e1 e2
|
||||
|
||||
prop_visiblePos :: ParseError -> Bool
|
||||
prop_visiblePos err = show (errorPos err) `isPrefixOf` show err
|
||||
|
||||
prop_visibleMsgs :: ParseError -> Bool
|
||||
prop_visibleMsgs err = all (`isInfixOf` shown) (errorMessages err >>= f)
|
||||
where shown = show err
|
||||
f (Unexpected s) = ["unexpected", s]
|
||||
f (Expected s) = ["expecting", s]
|
||||
f (Message s) = [s]
|
||||
where shown = show err
|
||||
f (Unexpected s) = ["unexpected", s]
|
||||
f (Expected s) = ["expecting", s]
|
||||
f (Message s) = [s]
|
||||
|
||||
-- | @iwellFormed xs@ checks that list @xs@ is sorted and contains no
|
||||
-- duplicates and no empty messages.
|
||||
|
103
tests/Pos.hs
103
tests/Pos.hs
@ -55,7 +55,7 @@ tests = testGroup "Textual source positions"
|
||||
, testProperty "position updating" prop_updating ]
|
||||
|
||||
instance Arbitrary SourcePos where
|
||||
arbitrary = newPos <$> fileName <*> choose (1, 1000) <*> choose (0, 100)
|
||||
arbitrary = newPos <$> fileName <*> choose (1, 1000) <*> choose (0, 100)
|
||||
|
||||
fileName :: Gen SourceName
|
||||
fileName = do
|
||||
@ -64,89 +64,90 @@ fileName = do
|
||||
extension <- simpleName
|
||||
frequency [ (1, return [])
|
||||
, (7, return $ intercalate delimiter dirs ++ "." ++ extension)]
|
||||
where simpleName = listOf1 (arbitrary `suchThat` isAlphaNum)
|
||||
where simpleName = listOf1 (arbitrary `suchThat` isAlphaNum)
|
||||
|
||||
prop_components :: SourcePos -> Bool
|
||||
prop_components pos = pos == copy
|
||||
where copy = newPos (sourceName pos) (sourceLine pos) (sourceColumn pos)
|
||||
where copy = newPos (sourceName pos) (sourceLine pos) (sourceColumn pos)
|
||||
|
||||
prop_showFileName :: SourcePos -> Bool
|
||||
prop_showFileName pos = if null name
|
||||
then '"'`notElem` shown
|
||||
else ("\"" ++ name ++ "\"") `isInfixOf` shown
|
||||
where name = sourceName pos
|
||||
shown = show pos
|
||||
prop_showFileName pos =
|
||||
if null name
|
||||
then '"'`notElem` shown
|
||||
else ("\"" ++ name ++ "\"") `isInfixOf` shown
|
||||
where name = sourceName pos
|
||||
shown = show pos
|
||||
|
||||
prop_showLine :: SourcePos -> Bool
|
||||
prop_showLine pos = ("line " ++ line) `isInfixOf` show pos
|
||||
where line = show $ sourceLine pos
|
||||
where line = show $ sourceLine pos
|
||||
|
||||
prop_showColumn :: SourcePos -> Bool
|
||||
prop_showColumn pos = ("column " ++ column) `isInfixOf` show pos
|
||||
where column = show $ sourceColumn pos
|
||||
where column = show $ sourceColumn pos
|
||||
|
||||
prop_initialPos :: SourceName -> Bool
|
||||
prop_initialPos n =
|
||||
sourceName ipos == n &&
|
||||
sourceLine ipos == 1 &&
|
||||
sourceColumn ipos == 1
|
||||
where ipos = initialPos n
|
||||
sourceName ipos == n &&
|
||||
sourceLine ipos == 1 &&
|
||||
sourceColumn ipos == 1
|
||||
where ipos = initialPos n
|
||||
|
||||
prop_incSourceLine :: SourcePos -> NonNegative Int -> Bool
|
||||
prop_incSourceLine pos l =
|
||||
d sourceName id pos incp &&
|
||||
d sourceLine (+ l') pos incp &&
|
||||
d sourceColumn id pos incp
|
||||
where l' = getNonNegative l
|
||||
incp = incSourceLine pos l'
|
||||
d sourceName id pos incp &&
|
||||
d sourceLine (+ l') pos incp &&
|
||||
d sourceColumn id pos incp
|
||||
where l' = getNonNegative l
|
||||
incp = incSourceLine pos l'
|
||||
|
||||
prop_incSourceColumn :: SourcePos -> NonNegative Int -> Bool
|
||||
prop_incSourceColumn pos c =
|
||||
d sourceName id pos incp &&
|
||||
d sourceLine id pos incp &&
|
||||
d sourceColumn (+ c') pos incp
|
||||
where c' = getNonNegative c
|
||||
incp = incSourceColumn pos c'
|
||||
d sourceName id pos incp &&
|
||||
d sourceLine id pos incp &&
|
||||
d sourceColumn (+ c') pos incp
|
||||
where c' = getNonNegative c
|
||||
incp = incSourceColumn pos c'
|
||||
|
||||
prop_setSourceName :: SourcePos -> SourceName -> Bool
|
||||
prop_setSourceName pos n =
|
||||
d sourceName (const n) pos setp &&
|
||||
d sourceLine id pos setp &&
|
||||
d sourceColumn id pos setp
|
||||
where setp = setSourceName pos n
|
||||
d sourceName (const n) pos setp &&
|
||||
d sourceLine id pos setp &&
|
||||
d sourceColumn id pos setp
|
||||
where setp = setSourceName pos n
|
||||
|
||||
prop_setSourceLine :: SourcePos -> Positive Int -> Bool
|
||||
prop_setSourceLine pos l =
|
||||
d sourceName id pos setp &&
|
||||
d sourceLine (const l') pos setp &&
|
||||
d sourceColumn id pos setp
|
||||
where l' = getPositive l
|
||||
setp = setSourceLine pos l'
|
||||
d sourceName id pos setp &&
|
||||
d sourceLine (const l') pos setp &&
|
||||
d sourceColumn id pos setp
|
||||
where l' = getPositive l
|
||||
setp = setSourceLine pos l'
|
||||
|
||||
prop_setSourceColumn :: SourcePos -> NonNegative Int -> Bool
|
||||
prop_setSourceColumn pos c =
|
||||
d sourceName id pos setp &&
|
||||
d sourceLine id pos setp &&
|
||||
d sourceColumn (const c') pos setp
|
||||
where c' = getNonNegative c
|
||||
setp = setSourceColumn pos c'
|
||||
d sourceName id pos setp &&
|
||||
d sourceLine id pos setp &&
|
||||
d sourceColumn (const c') pos setp
|
||||
where c' = getNonNegative c
|
||||
setp = setSourceColumn pos c'
|
||||
|
||||
prop_updating :: SourcePos -> String -> Bool
|
||||
prop_updating pos "" = updatePosString pos "" == pos
|
||||
prop_updating pos s =
|
||||
d sourceName id pos updated &&
|
||||
d sourceLine (+ inclines) pos updated &&
|
||||
cols >= mincols && ((last s /= '\t') || ((cols - 1) `rem` 8 == 0))
|
||||
where updated = updatePosString pos s
|
||||
cols = sourceColumn updated
|
||||
newlines = elemIndices '\n' s
|
||||
creturns = elemIndices '\r' s
|
||||
inclines = length newlines
|
||||
total = length s
|
||||
allctrls = newlines ++ creturns
|
||||
mincols = if null allctrls
|
||||
then total + sourceColumn pos
|
||||
else total - maximum allctrls
|
||||
d sourceName id pos updated &&
|
||||
d sourceLine (+ inclines) pos updated &&
|
||||
cols >= mincols && ((last s /= '\t') || ((cols - 1) `rem` 8 == 0))
|
||||
where updated = updatePosString pos s
|
||||
cols = sourceColumn updated
|
||||
newlines = elemIndices '\n' s
|
||||
creturns = elemIndices '\r' s
|
||||
inclines = length newlines
|
||||
total = length s
|
||||
allctrls = newlines ++ creturns
|
||||
mincols = if null allctrls
|
||||
then total + sourceColumn pos
|
||||
else total - maximum allctrls
|
||||
|
||||
d :: Eq b => (a -> b) -> (b -> b) -> a -> a -> Bool
|
||||
d f g x y = g (f x) == f y
|
||||
|
@ -28,17 +28,17 @@
|
||||
-- possibility of such damage.
|
||||
|
||||
module Util
|
||||
( checkParser
|
||||
, simpleParse
|
||||
, checkChar
|
||||
, checkString
|
||||
, posErr
|
||||
, uneStr
|
||||
, uneCh
|
||||
, exStr
|
||||
, exCh
|
||||
, exSpec
|
||||
, showToken )
|
||||
( checkParser
|
||||
, simpleParse
|
||||
, checkChar
|
||||
, checkString
|
||||
, posErr
|
||||
, uneStr
|
||||
, uneCh
|
||||
, exStr
|
||||
, exCh
|
||||
, exSpec
|
||||
, showToken )
|
||||
where
|
||||
|
||||
import Data.Maybe (maybeToList)
|
||||
@ -76,12 +76,12 @@ simpleParse p = parse (p <* eof) ""
|
||||
checkChar :: Parser Char -> (Char -> Bool) ->
|
||||
Maybe String -> String -> Property
|
||||
checkChar p f l' s = checkParser p r s
|
||||
where h = head s
|
||||
l = exSpec <$> maybeToList l'
|
||||
r | null s = posErr 0 s (uneStr "" : l)
|
||||
| length s == 1 && f h = Right h
|
||||
| not (f h) = posErr 0 s (uneCh h : l)
|
||||
| otherwise = posErr 1 s [uneCh (s !! 1), exStr ""]
|
||||
where h = head s
|
||||
l = exSpec <$> maybeToList l'
|
||||
r | null s = posErr 0 s (uneStr "" : l)
|
||||
| length s == 1 && f h = Right h
|
||||
| not (f h) = posErr 0 s (uneCh h : l)
|
||||
| otherwise = posErr 1 s [uneCh (s !! 1), exStr ""]
|
||||
|
||||
-- | @checkString p a label s@ runs parser @p@ on input @s@ and checks if
|
||||
-- the result is equal to @a@ and also quality of error messages. @label@ is
|
||||
@ -89,14 +89,14 @@ checkChar p f l' s = checkParser p r s
|
||||
|
||||
checkString :: Parser String -> String -> String -> String -> Property
|
||||
checkString p a' l s' = checkParser p (w a' 0 s') s'
|
||||
where w [] _ [] = Right a'
|
||||
w [] i (s:_) = posErr i s' [uneCh s, exStr ""]
|
||||
w _ 0 [] = posErr 0 s' [uneStr "", exSpec l]
|
||||
w _ i [] = posErr 0 s' [uneStr (take i s'), exSpec l]
|
||||
w (a:as) i (s:ss)
|
||||
| a == s = w as i' ss
|
||||
| otherwise = posErr 0 s' [uneStr (take i' s'), exSpec l]
|
||||
where i' = succ i
|
||||
where w [] _ [] = Right a'
|
||||
w [] i (s:_) = posErr i s' [uneCh s, exStr ""]
|
||||
w _ 0 [] = posErr 0 s' [uneStr "", exSpec l]
|
||||
w _ i [] = posErr 0 s' [uneStr (take i s'), exSpec l]
|
||||
w (a:as) i (s:ss)
|
||||
| a == s = w as i' ss
|
||||
| otherwise = posErr 0 s' [uneStr (take i' s'), exSpec l]
|
||||
where i' = succ i
|
||||
|
||||
-- | @posErr pos s ms@ is an easy way to model result of parser that
|
||||
-- fails. @pos@ is how many tokens (characters) has been consumed before
|
||||
@ -106,7 +106,7 @@ checkString p a' l s' = checkParser p (w a' 0 s') s'
|
||||
|
||||
posErr :: Int -> String -> [Message] -> Either ParseError a
|
||||
posErr pos s = Left . foldr addErrorMessage (newErrorUnknown errPos)
|
||||
where errPos = updatePosString (initialPos "") (take pos s)
|
||||
where errPos = updatePosString (initialPos "") (take pos s)
|
||||
|
||||
-- | @uneStr s@ returns message created with 'UnExpect' constructor that
|
||||
-- tells the system that string @s@ is unexpected. This can be used to
|
||||
|
Loading…
Reference in New Issue
Block a user