mirror of
https://github.com/mrkkrp/megaparsec.git
synced 2024-11-27 15:32:14 +03:00
refactoring, phase 2
This commit is contained in:
parent
227667f829
commit
137ce0a521
@ -4,7 +4,7 @@
|
||||
-- License : BSD3
|
||||
--
|
||||
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
|
||||
-- Stability : provisional
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- This module includes everything you need to get started writing a parser.
|
||||
@ -13,10 +13,8 @@
|
||||
-- parse the result of your own tokenizer you should start with the following
|
||||
-- imports:
|
||||
--
|
||||
-- @
|
||||
-- import Text.MegaParsec.Prim
|
||||
-- import Text.MegaParsec.Combinator
|
||||
-- @
|
||||
-- > import Text.MegaParsec.Prim
|
||||
-- > import Text.MegaParsec.Combinator
|
||||
--
|
||||
-- Then you can implement your own version of 'satisfy' on top of the
|
||||
-- 'tokenPrim' primitive.
|
||||
@ -119,8 +117,6 @@ module Text.MegaParsec
|
||||
, setPosition
|
||||
, setInput
|
||||
-- * Other
|
||||
, setState
|
||||
, updateState
|
||||
, parsecMap
|
||||
, parserReturn
|
||||
, parserBind
|
||||
|
@ -4,7 +4,7 @@
|
||||
-- License : BSD3
|
||||
--
|
||||
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
|
||||
-- Stability : provisional
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- Convenience definitions for working with 'C.ByteString's.
|
||||
@ -27,13 +27,11 @@ type GenParser t st = Parsec C.ByteString st
|
||||
-- input read from @filePath@ using 'ByteString.Char8.readFile'. Returns
|
||||
-- either a 'ParseError' ('Left') or a value of type @a@ ('Right').
|
||||
--
|
||||
-- @
|
||||
-- main = do
|
||||
-- result <- parseFromFile numbers "digits.txt"
|
||||
-- case result of
|
||||
-- Left err -> print err
|
||||
-- Right xs -> print (sum xs)
|
||||
-- @
|
||||
-- > main = do
|
||||
-- > result <- parseFromFile numbers "digits.txt"
|
||||
-- > case result of
|
||||
-- > Left err -> print err
|
||||
-- > Right xs -> print (sum xs)
|
||||
|
||||
parseFromFile :: Parser a -> String -> IO (Either ParseError a)
|
||||
parseFromFile p fname = runP p () fname <$> C.readFile fname
|
||||
|
@ -4,7 +4,7 @@
|
||||
-- License : BSD3
|
||||
--
|
||||
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
|
||||
-- Stability : provisional
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- Convenience definitions for working with lazy 'C.ByteString's.
|
||||
@ -28,13 +28,11 @@ type GenParser t st = Parsec C.ByteString st
|
||||
-- 'ByteString.Lazy.Char8.readFile'. Returns either a 'ParseError' ('Left')
|
||||
-- or a value of type @a@ ('Right').
|
||||
--
|
||||
-- @
|
||||
-- main = do
|
||||
-- result <- parseFromFile numbers "digits.txt"
|
||||
-- case result of
|
||||
-- Left err -> print err
|
||||
-- Right xs -> print (sum xs)
|
||||
-- @
|
||||
-- > main = do
|
||||
-- > result <- parseFromFile numbers "digits.txt"
|
||||
-- > case result of
|
||||
-- > Left err -> print err
|
||||
-- > Right xs -> print (sum xs)
|
||||
|
||||
parseFromFile :: Parser a -> String -> IO (Either ParseError a)
|
||||
parseFromFile p fname = runP p () fname <$> C.readFile fname
|
||||
|
@ -4,13 +4,11 @@
|
||||
-- License : BSD3
|
||||
--
|
||||
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
|
||||
-- Stability : provisional
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- Commonly used character parsers.
|
||||
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
|
||||
module Text.MegaParsec.Char
|
||||
( oneOf
|
||||
, noneOf
|
||||
@ -129,7 +127,7 @@ octDigit = satisfy isOctDigit <?> "octal digit"
|
||||
|
||||
-- | @char c@ parses a single character @c@.
|
||||
--
|
||||
-- > semiColon = char ';'
|
||||
-- > semiColon = char ';'
|
||||
|
||||
char :: Stream s m Char => Char -> ParsecT s u m Char
|
||||
char c = satisfy (== c) <?> show [c]
|
||||
|
@ -4,7 +4,7 @@
|
||||
-- License : BSD3
|
||||
--
|
||||
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
|
||||
-- Stability : provisional
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- Commonly used generic combinators.
|
||||
@ -168,7 +168,7 @@ chainl :: Stream s m t => ParsecT s u m a ->
|
||||
ParsecT s u m (a -> a -> a) -> a -> ParsecT s u m a
|
||||
chainl p op x = chainl1 p op <|> return x
|
||||
|
||||
-- | @chainl1 p op x@ parses /one/ or more occurrences of @p@,
|
||||
-- | @chainl1 p op@ parses /one/ or more occurrences of @p@,
|
||||
-- separated by @op@ Returns a value obtained by a /left/ associative
|
||||
-- application of all functions returned by @op@ to the values returned by
|
||||
-- @p@. This parser can for example be used to eliminate left recursion
|
||||
@ -186,28 +186,18 @@ chainl p op x = chainl1 p op <|> return x
|
||||
|
||||
chainl1 :: Stream s m t =>
|
||||
ParsecT s u m a -> ParsecT s u m (a -> a -> a) -> ParsecT s u m a
|
||||
chainl1 p op = do{ x <- p; rest x }
|
||||
where rest x = do{ f <- op
|
||||
; y <- p
|
||||
; rest (f x y)
|
||||
}
|
||||
<|> return x
|
||||
chainl1 p op = p >>= rest
|
||||
where rest x = ((($ x) <$> op <*> p) >>= rest) <|> return x
|
||||
|
||||
-- | @chainr1 p op x@ parses /one/ or more occurrences of |p|,
|
||||
-- | @chainr1 p op@ parses /one/ or more occurrences of |p|,
|
||||
-- separated by @op@ Returns a value obtained by a /right/ associative
|
||||
-- application of all functions returned by @op@ to the values returned by
|
||||
-- @p@.
|
||||
|
||||
chainr1 :: Stream s m t =>
|
||||
ParsecT s u m a -> ParsecT s u m (a -> a -> a) -> ParsecT s u m a
|
||||
chainr1 p op = scan
|
||||
where
|
||||
scan = do{ x <- p; rest x }
|
||||
rest x = do{ f <- op
|
||||
; y <- scan
|
||||
; return (f x y)
|
||||
}
|
||||
<|> return x
|
||||
chainr1 p op = p >>= rest
|
||||
where rest x = (($ x) <$> op <*> chainr1 p op) <|> return x
|
||||
|
||||
-- | The parser @anyToken@ accepts any kind of token. It is for example
|
||||
-- used to implement 'eof'. Returns the accepted token.
|
||||
@ -240,17 +230,11 @@ notFollowedBy p = try ((try p >>= (unexpected . show)) <|> return ())
|
||||
-- parser @end@ succeeds. Returns the list of values returned by @p@. This
|
||||
-- parser can be used to scan comments:
|
||||
--
|
||||
-- > simpleComment = do{ string "<!--"
|
||||
-- > ; manyTill anyChar (try (string "-->"))
|
||||
-- > }
|
||||
-- > simpleComment = string "<!--" >> manyTill anyChar (string "-->")
|
||||
--
|
||||
-- Note the overlapping parsers @anyChar@ and @string \"-->\"@, and
|
||||
-- therefore the use of the 'try' combinator.
|
||||
-- Note that although parsers @anyChar@ and @string \"-->\"@ overlap, the
|
||||
-- combinator uses 'try' internally to parse @end@, so it's OK.
|
||||
|
||||
manyTill :: Stream s m t =>
|
||||
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
|
||||
manyTill p end = scan
|
||||
where
|
||||
scan = do{ end; return [] }
|
||||
<|>
|
||||
do{ x <- p; xs <- scan; return (x:xs) }
|
||||
manyTill p end = (try end *> return []) <|> ((:) <$> p <*> manyTill p end)
|
||||
|
@ -4,7 +4,7 @@
|
||||
-- License : BSD3
|
||||
--
|
||||
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
|
||||
-- Stability : provisional
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- Parse errors.
|
||||
@ -25,36 +25,36 @@ module Text.MegaParsec.Error
|
||||
, mergeError )
|
||||
where
|
||||
|
||||
import Data.List (nub, sort)
|
||||
import Data.List (nub, sort, intercalate)
|
||||
|
||||
import Text.MegaParsec.Pos
|
||||
|
||||
-- | This abstract data type represents parse error messages. There are
|
||||
-- four kinds of messages:
|
||||
--
|
||||
-- > data Message = SysUnExpect String
|
||||
-- > | UnExpect String
|
||||
-- > | Expect String
|
||||
-- > | Message String
|
||||
-- > data Message = SysUnExpect String
|
||||
-- > | UnExpect String
|
||||
-- > | Expect String
|
||||
-- > | Message String
|
||||
--
|
||||
-- The fine distinction between different kinds of parse errors allows
|
||||
-- the system to generate quite good error messages for the user. It
|
||||
-- also allows error messages that are formatted in different
|
||||
-- languages. Each kind of message is generated by different combinators:
|
||||
-- The fine distinction between different kinds of parse errors allows the
|
||||
-- system to generate quite good error messages for the user. It also allows
|
||||
-- error messages that are formatted in different languages. Each kind of
|
||||
-- message is generated by different combinators:
|
||||
--
|
||||
-- * A 'SysUnExpect' message is automatically generated by the
|
||||
-- 'Text.Parsec.Combinator.satisfy' combinator. The argument is the
|
||||
-- unexpected input.
|
||||
--
|
||||
-- * A 'UnExpect' message is generated by the 'Text.Parsec.Prim.unexpected'
|
||||
-- combinator. The argument describes the
|
||||
-- unexpected item.
|
||||
-- * A 'UnExpect' message is generated by the
|
||||
-- 'Text.Parsec.Prim.unexpected' combinator. The argument describes
|
||||
-- the unexpected item.
|
||||
--
|
||||
-- * A 'Expect' message is generated by the 'Text.Parsec.Prim.<?>'
|
||||
-- combinator. The argument describes the expected item.
|
||||
--
|
||||
-- * A 'Message' message is generated by the 'fail'
|
||||
-- combinator. The argument is some general parser message.
|
||||
-- * A 'Message' message is generated by the 'fail' combinator. The
|
||||
-- argument is some general parser message.
|
||||
|
||||
data Message = SysUnExpect !String -- @ library generated unexpect
|
||||
| UnExpect !String -- @ unexpected something
|
||||
@ -68,19 +68,11 @@ instance Enum Message where
|
||||
fromEnum (Message _) = 3
|
||||
toEnum _ = error "toEnum is undefined for Message"
|
||||
|
||||
-- < Return 'True' only when 'compare' would return 'EQ'.
|
||||
|
||||
instance Eq Message where
|
||||
|
||||
m1 == m2 = fromEnum m1 == fromEnum m2
|
||||
|
||||
-- < Compares two error messages without looking at their content. Only
|
||||
-- the constructors are compared where:
|
||||
--
|
||||
-- > 'SysUnExpect' < 'UnExpect' < 'Expect' < 'Message'
|
||||
|
||||
instance Ord Message where
|
||||
compare msg1 msg2 = compare (fromEnum msg1) (fromEnum msg2)
|
||||
compare m1 m2 = compare (fromEnum m1) (fromEnum m2)
|
||||
|
||||
-- | Extract the message string from an error message
|
||||
|
||||
@ -91,124 +83,91 @@ messageString (Expect s) = s
|
||||
messageString (Message s) = s
|
||||
|
||||
-- | The abstract data type @ParseError@ represents parse errors. It
|
||||
-- provides the source position ('SourcePos') of the error
|
||||
-- and a list of error messages ('Message'). A @ParseError@
|
||||
-- can be returned by the function 'Text.Parsec.Prim.parse'. @ParseError@ is an
|
||||
-- instance of the 'Show' and 'Eq' classes.
|
||||
-- provides the source position ('SourcePos') of the error and a list of
|
||||
-- error messages ('Message'). A @ParseError@ can be returned by the
|
||||
-- function 'Text.Parsec.Prim.parse'. @ParseError@ is an instance of the
|
||||
-- 'Show' and 'Eq' classes.
|
||||
|
||||
data ParseError = ParseError !SourcePos [Message]
|
||||
|
||||
-- | Extracts the source position from the parse error
|
||||
|
||||
errorPos :: ParseError -> SourcePos
|
||||
errorPos (ParseError pos _msgs)
|
||||
= pos
|
||||
|
||||
-- | Extracts the list of error messages from the parse error
|
||||
|
||||
errorMessages :: ParseError -> [Message]
|
||||
errorMessages (ParseError _pos msgs)
|
||||
= sort msgs
|
||||
|
||||
errorIsUnknown :: ParseError -> Bool
|
||||
errorIsUnknown (ParseError _pos msgs)
|
||||
= null msgs
|
||||
|
||||
-- < Create parse errors
|
||||
|
||||
newErrorUnknown :: SourcePos -> ParseError
|
||||
newErrorUnknown pos
|
||||
= ParseError pos []
|
||||
|
||||
newErrorMessage :: Message -> SourcePos -> ParseError
|
||||
newErrorMessage msg pos
|
||||
= ParseError pos [msg]
|
||||
|
||||
addErrorMessage :: Message -> ParseError -> ParseError
|
||||
addErrorMessage msg (ParseError pos msgs)
|
||||
= ParseError pos (msg:msgs)
|
||||
|
||||
setErrorPos :: SourcePos -> ParseError -> ParseError
|
||||
setErrorPos pos (ParseError _ msgs)
|
||||
= ParseError pos msgs
|
||||
|
||||
setErrorMessage :: Message -> ParseError -> ParseError
|
||||
setErrorMessage msg (ParseError pos msgs)
|
||||
= ParseError pos (msg : filter (msg /=) msgs)
|
||||
|
||||
mergeError :: ParseError -> ParseError -> ParseError
|
||||
mergeError e1@(ParseError pos1 msgs1) e2@(ParseError pos2 msgs2)
|
||||
-- prefer meaningful errors
|
||||
| null msgs2 && not (null msgs1) = e1
|
||||
| null msgs1 && not (null msgs2) = e2
|
||||
| otherwise
|
||||
= case pos1 `compare` pos2 of
|
||||
-- select the longest match
|
||||
EQ -> ParseError pos1 (msgs1 ++ msgs2)
|
||||
GT -> e1
|
||||
LT -> e2
|
||||
|
||||
instance Show ParseError where
|
||||
show err
|
||||
= show (errorPos err) ++ ":" ++
|
||||
showErrorMessages "or" "unknown parse error"
|
||||
"expecting" "unexpected" "end of input"
|
||||
(errorMessages err)
|
||||
show e = show (errorPos e) ++ ":" ++ showErrorMessages (errorMessages e)
|
||||
|
||||
instance Eq ParseError where
|
||||
l == r
|
||||
= errorPos l == errorPos r && messageStrs l == messageStrs r
|
||||
where
|
||||
messageStrs = map messageString . errorMessages
|
||||
l == r = errorPos l == errorPos r && mStrs l == mStrs r
|
||||
where mStrs = fmap messageString . errorMessages
|
||||
|
||||
-- Language independent show function
|
||||
-- | Extract the source position from the parse error.
|
||||
|
||||
-- TODO
|
||||
-- < The standard function for showing error messages. Formats a list of
|
||||
-- error messages in English. This function is used in the |Show|
|
||||
-- instance of |ParseError <#ParseError>|. The resulting string will be
|
||||
-- formatted like:
|
||||
--
|
||||
-- |unexpected /{The first UnExpect or a SysUnExpect message}/;
|
||||
-- expecting /{comma separated list of Expect messages}/;
|
||||
-- /{comma separated list of Message messages}/
|
||||
errorPos :: ParseError -> SourcePos
|
||||
errorPos (ParseError pos _) = pos
|
||||
|
||||
showErrorMessages ::
|
||||
String -> String -> String -> String -> String -> [Message] -> String
|
||||
showErrorMessages msgOr msgUnknown msgExpecting msgUnExpected msgEndOfInput msgs
|
||||
| null msgs = msgUnknown
|
||||
| otherwise = concat $ map ("\n"++) $ clean $
|
||||
[showSysUnExpect,showUnExpect,showExpect,showMessages]
|
||||
-- | Extract the list of error messages from the parse error.
|
||||
|
||||
errorMessages :: ParseError -> [Message]
|
||||
errorMessages (ParseError _ ms) = sort ms
|
||||
|
||||
errorIsUnknown :: ParseError -> Bool
|
||||
errorIsUnknown (ParseError _ ms) = null ms
|
||||
|
||||
-- Creation of parse errors
|
||||
|
||||
newErrorUnknown :: SourcePos -> ParseError
|
||||
newErrorUnknown pos = ParseError pos []
|
||||
|
||||
newErrorMessage :: Message -> SourcePos -> ParseError
|
||||
newErrorMessage m pos = ParseError pos [m]
|
||||
|
||||
addErrorMessage :: Message -> ParseError -> ParseError
|
||||
addErrorMessage m (ParseError pos ms) = ParseError pos (m:ms)
|
||||
|
||||
setErrorPos :: SourcePos -> ParseError -> ParseError
|
||||
setErrorPos pos (ParseError _ ms) = ParseError pos ms
|
||||
|
||||
setErrorMessage :: Message -> ParseError -> ParseError
|
||||
setErrorMessage m (ParseError pos ms) = ParseError pos (m : filter (m /=) ms)
|
||||
|
||||
mergeError :: ParseError -> ParseError -> ParseError
|
||||
mergeError e1@(ParseError pos1 ms1) e2@(ParseError pos2 ms2)
|
||||
-- prefer meaningful errors
|
||||
| null ms2 && not (null ms1) = e1
|
||||
| null ms1 && not (null ms2) = e2
|
||||
| otherwise
|
||||
= case pos1 `compare` pos2 of
|
||||
-- select the longest match
|
||||
EQ -> ParseError pos1 (ms1 ++ ms2)
|
||||
GT -> e1
|
||||
LT -> e2
|
||||
|
||||
showErrorMessages :: [Message] -> String
|
||||
showErrorMessages ms
|
||||
| null ms = "unknown parse error"
|
||||
| otherwise = concatMap ("\n" ++) $ clean
|
||||
[showSysUnExpect, showUnExpect, showExpect, showMessages]
|
||||
where
|
||||
(sysUnExpect,msgs1) = span ((SysUnExpect "") ==) msgs
|
||||
(unExpect,msgs2) = span ((UnExpect "") ==) msgs1
|
||||
(expect,messages) = span ((Expect "") ==) msgs2
|
||||
(sysUnExpect, ms1) = span (SysUnExpect "" ==) ms
|
||||
(unExpect, ms2) = span (UnExpect "" ==) ms1
|
||||
(expect, messages) = span (Expect "" ==) ms2
|
||||
|
||||
showExpect = showMany msgExpecting expect
|
||||
showUnExpect = showMany msgUnExpected unExpect
|
||||
showSysUnExpect | not (null unExpect) ||
|
||||
null sysUnExpect = ""
|
||||
| null firstMsg = msgUnExpected ++ " " ++ msgEndOfInput
|
||||
| otherwise = msgUnExpected ++ " " ++ firstMsg
|
||||
where
|
||||
firstMsg = messageString (head sysUnExpect)
|
||||
firstMsg = messageString (head sysUnExpect)
|
||||
showExpect = showMany "expecting" expect
|
||||
showUnExpect = showMany "unexpected" unExpect
|
||||
showSysUnExpect
|
||||
| not (null unExpect) || null sysUnExpect = ""
|
||||
| null firstMsg = "unexpected end of input"
|
||||
| otherwise = "unexpected " ++ firstMsg
|
||||
showMessages = showMany "" messages
|
||||
|
||||
showMessages = showMany "" messages
|
||||
showMany pre msgs =
|
||||
case clean (messageString <$> msgs) of
|
||||
[] -> ""
|
||||
xs | null pre -> commasOr xs
|
||||
| otherwise -> pre ++ " " ++ commasOr xs
|
||||
|
||||
-- helpers
|
||||
showMany pre msgs = case clean (map messageString msgs) of
|
||||
[] -> ""
|
||||
ms | null pre -> commasOr ms
|
||||
| otherwise -> pre ++ " " ++ commasOr ms
|
||||
commasOr [] = ""
|
||||
commasOr [x] = x
|
||||
commasOr xs = commaSep (init xs) ++ " or " ++ last xs
|
||||
|
||||
commasOr [] = ""
|
||||
commasOr [m] = m
|
||||
commasOr ms = commaSep (init ms) ++ " " ++ msgOr ++ " " ++ last ms
|
||||
commaSep = intercalate ", " . clean
|
||||
|
||||
commaSep = separate ", " . clean
|
||||
|
||||
separate _ [] = ""
|
||||
separate _ [m] = m
|
||||
separate sep (m:ms) = m ++ sep ++ separate sep ms
|
||||
|
||||
clean = nub . filter (not . null)
|
||||
clean = nub . filter (not . null)
|
||||
|
@ -4,145 +4,108 @@
|
||||
-- License : BSD3
|
||||
--
|
||||
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
|
||||
-- Stability : provisional
|
||||
-- Stability : experimental
|
||||
-- Portability : non-portable (uses non-portable module Text.Parsec.Token)
|
||||
--
|
||||
-- A helper module that defines some language definitions that can be used
|
||||
-- to instantiate a token parser (see "Text.Parsec.Token").
|
||||
|
||||
module Text.MegaParsec.Language
|
||||
( haskellDef
|
||||
, haskell
|
||||
, mondrianDef
|
||||
, mondrian
|
||||
( LanguageDef
|
||||
, GenLanguageDef
|
||||
, emptyDef
|
||||
, haskellStyle
|
||||
, javaStyle
|
||||
, LanguageDef
|
||||
, GenLanguageDef )
|
||||
, haskellDef
|
||||
, mondrianDef )
|
||||
where
|
||||
|
||||
import Text.MegaParsec
|
||||
import Text.MegaParsec.Token
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- Styles: haskellStyle, javaStyle
|
||||
-----------------------------------------------------------
|
||||
|
||||
-- | This is a minimal token definition for Haskell style languages. It
|
||||
-- defines the style of comments, valid identifiers and case
|
||||
-- sensitivity. It does not define any reserved words or operators.
|
||||
|
||||
haskellStyle :: LanguageDef st
|
||||
haskellStyle = emptyDef
|
||||
{ commentStart = "{-"
|
||||
, commentEnd = "-}"
|
||||
, commentLine = "--"
|
||||
, nestedComments = True
|
||||
, identStart = letter
|
||||
, identLetter = alphaNum <|> oneOf "_'"
|
||||
, opStart = opLetter haskellStyle
|
||||
, opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~"
|
||||
, reservedOpNames= []
|
||||
, reservedNames = []
|
||||
, caseSensitive = True
|
||||
}
|
||||
|
||||
-- | This is a minimal token definition for Java style languages. It
|
||||
-- defines the style of comments, valid identifiers and case
|
||||
-- sensitivity. It does not define any reserved words or operators.
|
||||
|
||||
javaStyle :: LanguageDef st
|
||||
javaStyle = emptyDef
|
||||
{ commentStart = "/*"
|
||||
, commentEnd = "*/"
|
||||
, commentLine = "//"
|
||||
, nestedComments = True
|
||||
, identStart = letter
|
||||
, identLetter = alphaNum <|> oneOf "_'"
|
||||
, reservedNames = []
|
||||
, reservedOpNames= []
|
||||
, caseSensitive = False
|
||||
}
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- minimal language definition
|
||||
--------------------------------------------------------
|
||||
|
||||
-- | This is the most minimal token definition. It is recommended to use
|
||||
-- this definition as the basis for other definitions. @emptyDef@ has
|
||||
-- no reserved names or operators, is case sensitive and doesn't accept
|
||||
-- this definition as the basis for other definitions. @emptyDef@ has no
|
||||
-- reserved names or operators, is case sensitive and doesn't accept
|
||||
-- comments, identifiers or operators.
|
||||
|
||||
emptyDef :: LanguageDef st
|
||||
emptyDef = LanguageDef
|
||||
{ commentStart = ""
|
||||
, commentEnd = ""
|
||||
, commentLine = ""
|
||||
, nestedComments = True
|
||||
, identStart = letter <|> char '_'
|
||||
, identLetter = alphaNum <|> oneOf "_'"
|
||||
, opStart = opLetter emptyDef
|
||||
, opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~"
|
||||
, reservedOpNames= []
|
||||
, reservedNames = []
|
||||
, caseSensitive = True
|
||||
}
|
||||
emptyDef :: LanguageDef st
|
||||
emptyDef =
|
||||
LanguageDef
|
||||
{ commentStart = ""
|
||||
, commentEnd = ""
|
||||
, commentLine = ""
|
||||
, nestedComments = True
|
||||
, identStart = letter <|> char '_'
|
||||
, identLetter = alphaNum <|> oneOf "_'"
|
||||
, opStart = opLetter emptyDef
|
||||
, opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~"
|
||||
, reservedOpNames = []
|
||||
, reservedNames = []
|
||||
, caseSensitive = True }
|
||||
|
||||
-- | This is a minimal token definition for Haskell style languages. It
|
||||
-- defines the style of comments, valid identifiers and case sensitivity. It
|
||||
-- does not define any reserved words or operators.
|
||||
|
||||
haskellStyle :: LanguageDef st
|
||||
haskellStyle =
|
||||
emptyDef
|
||||
{ commentStart = "{-"
|
||||
, commentEnd = "-}"
|
||||
, commentLine = "--"
|
||||
, nestedComments = True
|
||||
, identStart = letter
|
||||
, identLetter = alphaNum <|> oneOf "_'"
|
||||
, opStart = opLetter haskellStyle
|
||||
, opLetter = oneOf ":!#$%&*+./<=>?@\\^|-~"
|
||||
, reservedOpNames = []
|
||||
, reservedNames = []
|
||||
, caseSensitive = True }
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- Haskell
|
||||
-----------------------------------------------------------
|
||||
-- | This is a minimal token definition for Java style languages. It
|
||||
-- defines the style of comments, valid identifiers and case sensitivity. It
|
||||
-- does not define any reserved words or operators.
|
||||
|
||||
-- | A lexer for the haskell language.
|
||||
|
||||
haskell :: TokenParser st
|
||||
haskell = makeTokenParser haskellDef
|
||||
javaStyle :: LanguageDef st
|
||||
javaStyle =
|
||||
emptyDef
|
||||
{ commentStart = "/*"
|
||||
, commentEnd = "*/"
|
||||
, commentLine = "//"
|
||||
, nestedComments = True
|
||||
, identStart = letter
|
||||
, identLetter = alphaNum <|> oneOf "_'"
|
||||
, reservedNames = []
|
||||
, reservedOpNames = []
|
||||
, caseSensitive = False }
|
||||
|
||||
-- | The language definition for the Haskell language.
|
||||
|
||||
haskellDef :: LanguageDef st
|
||||
haskellDef = haskell98Def
|
||||
{ identLetter = identLetter haskell98Def <|> char '#'
|
||||
, reservedNames = reservedNames haskell98Def ++
|
||||
["foreign","import","export","primitive"
|
||||
,"_ccall_","_casm_"
|
||||
,"forall"
|
||||
]
|
||||
}
|
||||
haskellDef =
|
||||
haskell98Def
|
||||
{ identLetter = identLetter haskell98Def <|> char '#'
|
||||
, reservedNames = reservedNames haskell98Def ++
|
||||
[ "foreign", "import", "export", "primitive"
|
||||
, "_ccall_", "_casm_", "forall"] }
|
||||
|
||||
-- | The language definition for the language Haskell98.
|
||||
|
||||
haskell98Def :: LanguageDef st
|
||||
haskell98Def = haskellStyle
|
||||
{ reservedOpNames= ["::","..","=","\\","|","<-","->","@","~","=>"]
|
||||
, reservedNames = ["let","in","case","of","if","then","else",
|
||||
"data","type",
|
||||
"class","default","deriving","do","import",
|
||||
"infix","infixl","infixr","instance","module",
|
||||
"newtype","where",
|
||||
"primitive"
|
||||
-- "as","qualified","hiding"
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
-----------------------------------------------------------
|
||||
-- Mondrian
|
||||
-----------------------------------------------------------
|
||||
|
||||
-- | A lexer for the mondrian language.
|
||||
|
||||
mondrian :: TokenParser st
|
||||
mondrian = makeTokenParser mondrianDef
|
||||
haskell98Def =
|
||||
haskellStyle
|
||||
{ reservedOpNames = ["::","..","=","\\","|","<-","->","@","~","=>"]
|
||||
, reservedNames = [ "let", "in", "case", "of", "if", "then", "else"
|
||||
, "data", "type", "class", "default", "deriving"
|
||||
, "do", "import", "infix", "infixl", "infixr"
|
||||
, "instance", "module", "newtype", "where"
|
||||
, "primitive" ] }
|
||||
|
||||
-- | The language definition for the language Mondrian.
|
||||
|
||||
mondrianDef :: LanguageDef st
|
||||
mondrianDef = javaStyle
|
||||
{ reservedNames = [ "case", "class", "default", "extends"
|
||||
, "import", "in", "let", "new", "of", "package"
|
||||
]
|
||||
, caseSensitive = True
|
||||
}
|
||||
mondrianDef =
|
||||
javaStyle
|
||||
{ reservedNames = [ "case", "class", "default", "extends"
|
||||
, "import", "in", "let", "new", "of", "package" ]
|
||||
, caseSensitive = True }
|
||||
|
@ -4,7 +4,7 @@
|
||||
-- License : BSD3
|
||||
--
|
||||
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
|
||||
-- Stability : provisional
|
||||
-- Stability : experimental
|
||||
-- Portability : non-portable (uses existentially quantified data constructors)
|
||||
--
|
||||
-- This module implements permutation parsers. The algorithm used is fairly
|
||||
@ -14,8 +14,6 @@
|
||||
-- /Parsing Permutation Phrases,/ by Arthur Baars, Andres Loh and Doaitse
|
||||
-- Swierstra. Published as a functional pearl at the Haskell Workshop 2001.
|
||||
|
||||
{-# LANGUAGE ExistentialQuantification #-}
|
||||
|
||||
module Text.MegaParsec.Perm
|
||||
( PermParser
|
||||
, StreamPermParser -- abstract
|
||||
|
@ -4,7 +4,7 @@
|
||||
-- License : BSD3
|
||||
--
|
||||
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
|
||||
-- Stability : provisional
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- Textual source positions.
|
||||
@ -29,6 +29,7 @@ module Text.MegaParsec.Pos
|
||||
where
|
||||
|
||||
import Data.Data (Data)
|
||||
import Data.List (foldl')
|
||||
import Data.Typeable (Typeable)
|
||||
|
||||
-- Source positions: a file name, a line and a column upper left is (1,1)
|
||||
@ -42,8 +43,20 @@ type Column = Int
|
||||
-- column number. @SourcePos@ is an instance of the 'Show', 'Eq' and 'Ord'
|
||||
-- class.
|
||||
|
||||
data SourcePos = SourcePos SourceName !Line !Column
|
||||
deriving ( Eq, Ord, Data, Typeable)
|
||||
data SourcePos = SourcePos
|
||||
{ -- | Extract the name of the source from a source position.
|
||||
sourceName :: SourceName
|
||||
-- | Extract the line number from a source position.
|
||||
, sourceLine :: !Line
|
||||
-- | Extract the column number from a source position.
|
||||
, sourceColumn :: !Column }
|
||||
deriving (Eq, Ord, Data, Typeable)
|
||||
|
||||
instance Show SourcePos where
|
||||
show (SourcePos n l c)
|
||||
| null n = showLC
|
||||
| otherwise = "\"" ++ n ++ "\" " ++ showLC
|
||||
where showLC = "(line " ++ show l ++ ", column " ++ show c ++ ")"
|
||||
|
||||
-- | Create a new 'SourcePos' with the given source name,
|
||||
-- line number and column number.
|
||||
@ -55,76 +68,49 @@ newPos = SourcePos
|
||||
-- and line number and column number set to 1, the upper left.
|
||||
|
||||
initialPos :: SourceName -> SourcePos
|
||||
initialPos name
|
||||
= newPos name 1 1
|
||||
initialPos name = newPos name 1 1
|
||||
|
||||
-- | Extracts the name of the source from a source position.
|
||||
|
||||
sourceName :: SourcePos -> SourceName
|
||||
sourceName (SourcePos name _line _column) = name
|
||||
|
||||
-- | Extracts the line number from a source position.
|
||||
|
||||
sourceLine :: SourcePos -> Line
|
||||
sourceLine (SourcePos _name line _column) = line
|
||||
|
||||
-- | Extracts the column number from a source position.
|
||||
|
||||
sourceColumn :: SourcePos -> Column
|
||||
sourceColumn (SourcePos _name _line column) = column
|
||||
|
||||
-- | Increments the line number of a source position.
|
||||
-- | Increment the line number of a source position.
|
||||
|
||||
incSourceLine :: SourcePos -> Line -> SourcePos
|
||||
incSourceLine (SourcePos name line column) n = SourcePos name (line+n) column
|
||||
incSourceLine (SourcePos n l c) d = SourcePos n (l + d) c
|
||||
|
||||
-- | Increments the column number of a source position.
|
||||
|
||||
incSourceColumn :: SourcePos -> Column -> SourcePos
|
||||
incSourceColumn (SourcePos name line column) n = SourcePos name line (column+n)
|
||||
incSourceColumn (SourcePos n l c) d = SourcePos n l (c + d)
|
||||
|
||||
-- | Set the name of the source.
|
||||
|
||||
setSourceName :: SourcePos -> SourceName -> SourcePos
|
||||
setSourceName (SourcePos _name line column) n = SourcePos n line column
|
||||
setSourceName (SourcePos _ l c) n = SourcePos n l c
|
||||
|
||||
-- | Set the line number of a source position.
|
||||
|
||||
setSourceLine :: SourcePos -> Line -> SourcePos
|
||||
setSourceLine (SourcePos name _line column) n = SourcePos name n column
|
||||
setSourceLine (SourcePos n _ c) l = SourcePos n l c
|
||||
|
||||
-- | Set the column number of a source position.
|
||||
|
||||
setSourceColumn :: SourcePos -> Column -> SourcePos
|
||||
setSourceColumn (SourcePos name line _column) n = SourcePos name line n
|
||||
setSourceColumn (SourcePos n l _) = SourcePos n l
|
||||
|
||||
-- | The expression @updatePosString pos s@ updates the source position
|
||||
-- @pos@ by calling 'updatePosChar' on every character in @s@, ie.
|
||||
-- @foldl updatePosChar pos string@.
|
||||
-- @pos@ by calling 'updatePosChar' on every character in @s@, i.e. @foldl
|
||||
-- updatePosChar pos string@.
|
||||
|
||||
updatePosString :: SourcePos -> String -> SourcePos
|
||||
updatePosString pos string
|
||||
= foldl updatePosChar pos string
|
||||
updatePosString = foldl' updatePosChar
|
||||
|
||||
-- | Update a source position given a character. If the character is a
|
||||
-- newline (\'\\n\') or carriage return (\'\\r\') the line number is
|
||||
-- incremented by 1. If the character is a tab (\'\t\') the column
|
||||
-- number is incremented to the nearest 8'th column, ie. @column + 8 -
|
||||
-- ((column-1) \`mod\` 8)@. In all other cases, the column is
|
||||
-- incremented by 1.
|
||||
-- incremented by 1. If the character is a tab (\'\t\') the column number is
|
||||
-- incremented to the nearest 8'th column, i.e. @column + 8 - ((column-1)
|
||||
-- \`mod\` 8)@. In all other cases, the column is incremented by 1.
|
||||
|
||||
updatePosChar :: SourcePos -> Char -> SourcePos
|
||||
updatePosChar (SourcePos name line column) c
|
||||
= case c of
|
||||
'\n' -> SourcePos name (line+1) 1
|
||||
'\t' -> SourcePos name line (column + 8 - ((column-1) `mod` 8))
|
||||
_ -> SourcePos name line (column + 1)
|
||||
|
||||
instance Show SourcePos where
|
||||
show (SourcePos name line column)
|
||||
| null name = showLineColumn
|
||||
| otherwise = "\"" ++ name ++ "\" " ++ showLineColumn
|
||||
where
|
||||
showLineColumn = "(line " ++ show line ++
|
||||
", column " ++ show column ++
|
||||
")"
|
||||
updatePosChar :: SourcePos -> Char -> SourcePos
|
||||
updatePosChar (SourcePos n l c) ch =
|
||||
case ch of
|
||||
'\n' -> SourcePos n (l + 1) 1
|
||||
'\t' -> SourcePos n l (c + 8 - ((c - 1) `mod` 8))
|
||||
_ -> SourcePos n l (c + 1)
|
||||
|
@ -4,7 +4,7 @@
|
||||
-- License : BSD3
|
||||
--
|
||||
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
|
||||
-- Stability : provisional
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- Make Strings an instance of 'Stream' with 'Char' token type.
|
||||
|
@ -4,7 +4,7 @@
|
||||
-- License : BSD3
|
||||
--
|
||||
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
|
||||
-- Stability : provisional
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- Convenience definitions for working with 'Text.Text'.
|
||||
|
@ -4,7 +4,7 @@
|
||||
-- License : BSD3
|
||||
--
|
||||
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
|
||||
-- Stability : provisional
|
||||
-- Stability : experimental
|
||||
-- Portability : portable
|
||||
--
|
||||
-- Convenience definitions for working with lazy 'Text.Text'.
|
||||
@ -27,13 +27,11 @@ type GenParser st = Parsec T.Text st
|
||||
-- input read from @filePath@ using 'Prelude.readFile'. Returns either a
|
||||
-- 'ParseError' ('Left') or a value of type @a@ ('Right').
|
||||
--
|
||||
-- @
|
||||
-- main = do
|
||||
-- result <- parseFromFile numbers "digits.txt"
|
||||
-- case result of
|
||||
-- Left err -> print err
|
||||
-- Right xs -> print (sum xs)
|
||||
-- @
|
||||
-- > main = do
|
||||
-- > result <- parseFromFile numbers "digits.txt"
|
||||
-- > case result of
|
||||
-- > Left err -> print err
|
||||
-- > Right xs -> print (sum xs)
|
||||
|
||||
parseFromFile :: Parser a -> String -> IO (Either ParseError a)
|
||||
parseFromFile p fname = runP p () fname <$> T.readFile fname
|
||||
|
@ -4,13 +4,12 @@
|
||||
-- License : BSD3
|
||||
--
|
||||
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
|
||||
-- Stability : provisional
|
||||
-- Stability : experimental
|
||||
-- Portability : non-portable (uses local universal quantification: PolymorphicComponents)
|
||||
--
|
||||
-- A helper module to parse lexical elements (tokens). See 'makeTokenParser'
|
||||
-- for a description of how to use it.
|
||||
|
||||
{-# LANGUAGE PolymorphicComponents #-}
|
||||
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
|
||||
|
||||
module Text.MegaParsec.Token
|
||||
|
@ -29,10 +29,12 @@
|
||||
|
||||
name: megaparsec
|
||||
version: 4.0.0
|
||||
cabal-version: >= 1.8
|
||||
cabal-version: >= 1.10
|
||||
license: BSD3
|
||||
license-file: LICENSE.md
|
||||
author: Daan Leijen <daan@microsoft.com>, Paolo Martini <paolo@nemail.it>
|
||||
author: Daan Leijen <daan@microsoft.com>,
|
||||
Paolo Martini <paolo@nemail.it>,
|
||||
MegaParsec contributors
|
||||
maintainer: Mark Karpov <markkarpov@opmbx.org>
|
||||
homepage: https://github.com/mrkkrp/megaparsec
|
||||
bug-reports: https://github.com/mrkkrp/megaparsec/issues
|
||||
@ -51,13 +53,15 @@ library
|
||||
, mtl
|
||||
, bytestring
|
||||
, text >= 0.2 && < 1.3
|
||||
extensions: ExistentialQuantification
|
||||
, PolymorphicComponents
|
||||
, MultiParamTypeClasses
|
||||
, FlexibleInstances
|
||||
default-extensions:
|
||||
DeriveDataTypeable
|
||||
, ExistentialQuantification
|
||||
, FlexibleContexts
|
||||
, DeriveDataTypeable
|
||||
, CPP
|
||||
, FlexibleInstances
|
||||
, FunctionalDependencies
|
||||
, MultiParamTypeClasses
|
||||
, PolymorphicComponents
|
||||
, UndecidableInstances
|
||||
exposed-modules: Text.MegaParsec
|
||||
, Text.MegaParsec.String
|
||||
, Text.MegaParsec.ByteString
|
||||
@ -73,27 +77,29 @@ library
|
||||
, Text.MegaParsec.Expr
|
||||
, Text.MegaParsec.Language
|
||||
, Text.MegaParsec.Perm
|
||||
ghc-options: -O2 -Wall
|
||||
ghc-options: -O2 -Wall
|
||||
default-language: Haskell2010
|
||||
|
||||
-- The test-suite should be rewritten using QuickCheck. For now let the old
|
||||
-- tests be here.
|
||||
|
||||
test-suite tests
|
||||
main-is: Main.hs
|
||||
hs-source-dirs: test
|
||||
type: exitcode-stdio-1.0
|
||||
other-modules: Bugs
|
||||
main-is: Main.hs
|
||||
hs-source-dirs: test
|
||||
type: exitcode-stdio-1.0
|
||||
other-modules: Bugs
|
||||
, Bugs.Bug2
|
||||
, Bugs.Bug6
|
||||
, Bugs.Bug9
|
||||
, Util
|
||||
build-depends: base
|
||||
build-depends: base
|
||||
, megaparsec >= 4.0.0
|
||||
, HUnit == 1.2.*
|
||||
, test-framework >= 0.6 && < 0.9
|
||||
, test-framework-hunit >= 0.2 && < 0.4
|
||||
ghc-options: -O2 -Wall
|
||||
ghc-options: -O2 -Wall
|
||||
default-language: Haskell2010
|
||||
|
||||
source-repository head
|
||||
type: git
|
||||
location: https://github.com/mrkkrp/megaparsec.git
|
||||
type: git
|
||||
location: https://github.com/mrkkrp/megaparsec.git
|
||||
|
Loading…
Reference in New Issue
Block a user