mirror of
https://github.com/mrkkrp/megaparsec.git
synced 2024-12-18 13:51:58 +03:00
202 lines
6.7 KiB
Haskell
202 lines
6.7 KiB
Haskell
-- |
|
||
-- Module : Text.MegaParsec.Error
|
||
-- Copyright : © 2015 MegaParsec contributors
|
||
-- © 2007 Paolo Martini
|
||
-- © 1999–2001 Daan Leijen
|
||
-- License : BSD3
|
||
--
|
||
-- Maintainer : Mark Karpov <markkarpov@opmbx.org>
|
||
-- Stability : experimental
|
||
-- Portability : portable
|
||
--
|
||
-- Parse errors.
|
||
|
||
module Text.MegaParsec.Error
|
||
( Message (SysUnExpect, UnExpect, Expect, Message)
|
||
, messageString
|
||
, ParseError
|
||
, errorPos
|
||
, errorMessages
|
||
, errorIsUnknown
|
||
, showErrorMessages
|
||
, newErrorMessage
|
||
, newErrorUnknown
|
||
, addErrorMessage
|
||
, setErrorPos
|
||
, setErrorMessage
|
||
, mergeError )
|
||
where
|
||
|
||
import Data.List (nub, sort, intercalate)
|
||
|
||
import Text.MegaParsec.Pos
|
||
|
||
-- | This abstract data type represents parse error messages. There are
|
||
-- four kinds of messages:
|
||
--
|
||
-- > data Message = SysUnExpect String
|
||
-- > | UnExpect String
|
||
-- > | Expect String
|
||
-- > | Message String
|
||
--
|
||
-- The fine distinction between different kinds of parse errors allows the
|
||
-- system to generate quite good error messages for the user. It also allows
|
||
-- error messages that are formatted in different languages. Each kind of
|
||
-- message is generated by different combinators:
|
||
--
|
||
-- * A 'SysUnExpect' message is automatically generated by the
|
||
-- 'Text.Parsec.Combinator.satisfy' combinator. The argument is the
|
||
-- unexpected input.
|
||
--
|
||
-- * A 'UnExpect' message is generated by the
|
||
-- 'Text.Parsec.Prim.unexpected' combinator. The argument describes
|
||
-- the unexpected item.
|
||
--
|
||
-- * A 'Expect' message is generated by the 'Text.Parsec.Prim.<?>'
|
||
-- combinator. The argument describes the expected item.
|
||
--
|
||
-- * A 'Message' message is generated by the 'fail' combinator. The
|
||
-- argument is some general parser message.
|
||
|
||
data Message = SysUnExpect !String -- @ library generated unexpect
|
||
| UnExpect !String -- @ unexpected something
|
||
| Expect !String -- @ expecting something
|
||
| Message !String -- @ raw message
|
||
|
||
instance Enum Message where
|
||
fromEnum (SysUnExpect _) = 0
|
||
fromEnum (UnExpect _) = 1
|
||
fromEnum (Expect _) = 2
|
||
fromEnum (Message _) = 3
|
||
toEnum _ = error "toEnum is undefined for Message"
|
||
|
||
instance Eq Message where
|
||
m1 == m2 = fromEnum m1 == fromEnum m2
|
||
|
||
instance Ord Message where
|
||
compare m1 m2 = compare (fromEnum m1) (fromEnum m2)
|
||
|
||
-- | Extract the message string from an error message
|
||
|
||
messageString :: Message -> String
|
||
messageString (SysUnExpect s) = s
|
||
messageString (UnExpect s) = s
|
||
messageString (Expect s) = s
|
||
messageString (Message s) = s
|
||
|
||
-- | The abstract data type @ParseError@ represents parse errors. It
|
||
-- provides the source position ('SourcePos') of the error and a list of
|
||
-- error messages ('Message'). A @ParseError@ can be returned by the
|
||
-- function 'Text.Parsec.Prim.parse'. @ParseError@ is an instance of the
|
||
-- 'Show' and 'Eq' classes.
|
||
|
||
data ParseError = ParseError !SourcePos [Message]
|
||
|
||
instance Show ParseError where
|
||
show e = show (errorPos e) ++ ":" ++ showErrorMessages (errorMessages e)
|
||
|
||
instance Eq ParseError where
|
||
l == r = errorPos l == errorPos r && mStrs l == mStrs r
|
||
where mStrs = fmap messageString . errorMessages
|
||
|
||
-- | Extract the source position from the parse error.
|
||
|
||
errorPos :: ParseError -> SourcePos
|
||
errorPos (ParseError pos _) = pos
|
||
|
||
-- | Extract the list of error messages from the parse error.
|
||
|
||
errorMessages :: ParseError -> [Message]
|
||
errorMessages (ParseError _ ms) = sort ms
|
||
|
||
-- | Test whether given @ParseError@ has associated collection of error
|
||
-- messages. Return @True@ if it has none and @False@ otherwise.
|
||
|
||
errorIsUnknown :: ParseError -> Bool
|
||
errorIsUnknown (ParseError _ ms) = null ms
|
||
|
||
-- Creation of parse errors
|
||
|
||
-- | @newErrorUnknown pos@ creates @ParseError@ without any associated
|
||
-- message but with specified position @pos@.
|
||
|
||
newErrorUnknown :: SourcePos -> ParseError
|
||
newErrorUnknown pos = ParseError pos []
|
||
|
||
-- | @newErrorMessage m pos@ creates @ParseError@ with message @m@ and
|
||
-- associated position @pos@.
|
||
|
||
newErrorMessage :: Message -> SourcePos -> ParseError
|
||
newErrorMessage m pos = ParseError pos [m]
|
||
|
||
-- | @addErrorMessage m err@ returns @ParseError@ @err@ with message @m@
|
||
-- added.
|
||
|
||
addErrorMessage :: Message -> ParseError -> ParseError
|
||
addErrorMessage m (ParseError pos ms) = ParseError pos (m:ms)
|
||
|
||
-- | @setErrorPos pos err@ returns @ParseError@ identical to @err@, but with
|
||
-- position @pos@.
|
||
|
||
setErrorPos :: SourcePos -> ParseError -> ParseError
|
||
setErrorPos pos (ParseError _ ms) = ParseError pos ms
|
||
|
||
-- | @setErrorMessage m err@ returns @ParseError@ identical to @err@, but
|
||
-- with added message @m@. This is different from 'addErrorMessage' in that
|
||
-- it makes sure that collection of messages contains only one instance of
|
||
-- @m@.
|
||
|
||
setErrorMessage :: Message -> ParseError -> ParseError
|
||
setErrorMessage m (ParseError pos ms) = ParseError pos (m : filter (m /=) ms)
|
||
|
||
-- | Merge two error data structures into one joining their collections of
|
||
-- messages and preferring longest match (that is, greater position).
|
||
|
||
mergeError :: ParseError -> ParseError -> ParseError
|
||
mergeError e1@(ParseError pos1 ms1) e2@(ParseError pos2 ms2)
|
||
-- prefer meaningful errors
|
||
| null ms2 && not (null ms1) = e1
|
||
| null ms1 && not (null ms2) = e2
|
||
| otherwise
|
||
= case pos1 `compare` pos2 of
|
||
-- select the longest match
|
||
EQ -> ParseError pos1 (ms1 ++ ms2)
|
||
GT -> e1
|
||
LT -> e2
|
||
|
||
-- | @showErrorMessages ms@ transforms list of error messages @ms@ into
|
||
-- their textual representation.
|
||
|
||
showErrorMessages :: [Message] -> String
|
||
showErrorMessages ms
|
||
| null ms = "unknown parse error"
|
||
| otherwise = concatMap ("\n" ++) $ clean
|
||
[showSysUnExpect, showUnExpect, showExpect, showMessages]
|
||
where
|
||
(sysUnExpect, ms1) = span (SysUnExpect "" ==) ms
|
||
(unExpect, ms2) = span (UnExpect "" ==) ms1
|
||
(expect, messages) = span (Expect "" ==) ms2
|
||
|
||
firstMsg = messageString (head sysUnExpect)
|
||
showExpect = showMany "expecting" expect
|
||
showUnExpect = showMany "unexpected" unExpect
|
||
showSysUnExpect
|
||
| not (null unExpect) || null sysUnExpect = ""
|
||
| null firstMsg = "unexpected end of input"
|
||
| otherwise = "unexpected " ++ firstMsg
|
||
showMessages = showMany "" messages
|
||
|
||
showMany pre msgs =
|
||
case clean (messageString <$> msgs) of
|
||
[] -> ""
|
||
xs | null pre -> commasOr xs
|
||
| otherwise -> pre ++ " " ++ commasOr xs
|
||
|
||
commasOr [] = ""
|
||
commasOr [x] = x
|
||
commasOr xs = commaSep (init xs) ++ " or " ++ last xs
|
||
|
||
commaSep = intercalate ", " . clean
|
||
|
||
clean = nub . filter (not . null)
|