mirror of
https://github.com/mrkkrp/megaparsec.git
synced 2024-12-26 09:44:22 +03:00
b8990ab042
This fixes a regression introduced by: Sun Feb 20 18:24:22 EET 2011 Roman Cheplyaka <roma@ro-che.info> * Choose the longest match when merging error messages The source of the regression is that parsec sometimes generates dummy (aka "unknown") error messages when no actual error has occurred. So, when merging errors, before simply looking at the positions we should check if one of them is unknown and just ignore it. Reported by Matthias Hörmann.
206 lines
6.9 KiB
Haskell
206 lines
6.9 KiB
Haskell
-----------------------------------------------------------------------------
|
|
-- |
|
|
-- Module : Text.Parsec.Error
|
|
-- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007
|
|
-- License : BSD-style (see the LICENSE file)
|
|
--
|
|
-- Maintainer : derek.a.elkins@gmail.com
|
|
-- Stability : provisional
|
|
-- Portability : portable
|
|
--
|
|
-- Parse errors
|
|
--
|
|
-----------------------------------------------------------------------------
|
|
|
|
module Text.Parsec.Error
|
|
( Message ( SysUnExpect, UnExpect, Expect, Message )
|
|
, messageString
|
|
, ParseError, errorPos, errorMessages, errorIsUnknown
|
|
, showErrorMessages
|
|
, newErrorMessage, newErrorUnknown
|
|
, addErrorMessage, setErrorPos, setErrorMessage
|
|
, mergeError
|
|
) where
|
|
|
|
import Data.List ( nub, sort )
|
|
|
|
import Text.Parsec.Pos
|
|
|
|
-- | This abstract data type represents parse error messages. There are
|
|
-- four kinds of messages:
|
|
--
|
|
-- > data Message = SysUnExpect String
|
|
-- > | UnExpect String
|
|
-- > | Expect String
|
|
-- > | Message String
|
|
--
|
|
-- The fine distinction between different kinds of parse errors allows
|
|
-- the system to generate quite good error messages for the user. It
|
|
-- also allows error messages that are formatted in different
|
|
-- languages. Each kind of message is generated by different combinators:
|
|
--
|
|
-- * A 'SysUnExpect' message is automatically generated by the
|
|
-- 'Text.Parsec.Combinator.satisfy' combinator. The argument is the
|
|
-- unexpected input.
|
|
--
|
|
-- * A 'UnExpect' message is generated by the 'Text.Parsec.Prim.unexpected'
|
|
-- combinator. The argument describes the
|
|
-- unexpected item.
|
|
--
|
|
-- * A 'Expect' message is generated by the 'Text.Parsec.Prim.<?>'
|
|
-- combinator. The argument describes the expected item.
|
|
--
|
|
-- * A 'Message' message is generated by the 'fail'
|
|
-- combinator. The argument is some general parser message.
|
|
|
|
data Message = SysUnExpect !String -- @ library generated unexpect
|
|
| UnExpect !String -- @ unexpected something
|
|
| Expect !String -- @ expecting something
|
|
| Message !String -- @ raw message
|
|
|
|
instance Enum Message where
|
|
fromEnum (SysUnExpect _) = 0
|
|
fromEnum (UnExpect _) = 1
|
|
fromEnum (Expect _) = 2
|
|
fromEnum (Message _) = 3
|
|
toEnum _ = error "toEnum is undefined for Message"
|
|
|
|
-- < Return 'True' only when 'compare' would return 'EQ'.
|
|
|
|
instance Eq Message where
|
|
|
|
m1 == m2 = fromEnum m1 == fromEnum m2
|
|
|
|
-- < Compares two error messages without looking at their content. Only
|
|
-- the constructors are compared where:
|
|
--
|
|
-- > 'SysUnExpect' < 'UnExpect' < 'Expect' < 'Message'
|
|
|
|
instance Ord Message where
|
|
compare msg1 msg2 = compare (fromEnum msg1) (fromEnum msg2)
|
|
|
|
-- | Extract the message string from an error message
|
|
|
|
messageString :: Message -> String
|
|
messageString (SysUnExpect s) = s
|
|
messageString (UnExpect s) = s
|
|
messageString (Expect s) = s
|
|
messageString (Message s) = s
|
|
|
|
-- | The abstract data type @ParseError@ represents parse errors. It
|
|
-- provides the source position ('SourcePos') of the error
|
|
-- and a list of error messages ('Message'). A @ParseError@
|
|
-- can be returned by the function 'Text.Parsec.Prim.parse'. @ParseError@ is an
|
|
-- instance of the 'Show' class.
|
|
|
|
data ParseError = ParseError !SourcePos [Message]
|
|
|
|
-- | Extracts the source position from the parse error
|
|
|
|
errorPos :: ParseError -> SourcePos
|
|
errorPos (ParseError pos _msgs)
|
|
= pos
|
|
|
|
-- | Extracts the list of error messages from the parse error
|
|
|
|
errorMessages :: ParseError -> [Message]
|
|
errorMessages (ParseError _pos msgs)
|
|
= sort msgs
|
|
|
|
errorIsUnknown :: ParseError -> Bool
|
|
errorIsUnknown (ParseError _pos msgs)
|
|
= null msgs
|
|
|
|
-- < Create parse errors
|
|
|
|
newErrorUnknown :: SourcePos -> ParseError
|
|
newErrorUnknown pos
|
|
= ParseError pos []
|
|
|
|
newErrorMessage :: Message -> SourcePos -> ParseError
|
|
newErrorMessage msg pos
|
|
= ParseError pos [msg]
|
|
|
|
addErrorMessage :: Message -> ParseError -> ParseError
|
|
addErrorMessage msg (ParseError pos msgs)
|
|
= ParseError pos (msg:msgs)
|
|
|
|
setErrorPos :: SourcePos -> ParseError -> ParseError
|
|
setErrorPos pos (ParseError _ msgs)
|
|
= ParseError pos msgs
|
|
|
|
setErrorMessage :: Message -> ParseError -> ParseError
|
|
setErrorMessage msg (ParseError pos msgs)
|
|
= ParseError pos (msg : filter (msg /=) msgs)
|
|
|
|
mergeError :: ParseError -> ParseError -> ParseError
|
|
mergeError e1@(ParseError pos1 msgs1) e2@(ParseError pos2 msgs2)
|
|
-- prefer meaningful errors
|
|
| null msgs2 && not (null msgs1) = e1
|
|
| null msgs1 && not (null msgs2) = e2
|
|
| otherwise
|
|
= case pos1 `compare` pos2 of
|
|
-- select the longest match
|
|
EQ -> ParseError pos1 (msgs1 ++ msgs2)
|
|
GT -> e1
|
|
LT -> e2
|
|
|
|
instance Show ParseError where
|
|
show err
|
|
= show (errorPos err) ++ ":" ++
|
|
showErrorMessages "or" "unknown parse error"
|
|
"expecting" "unexpected" "end of input"
|
|
(errorMessages err)
|
|
|
|
-- Language independent show function
|
|
|
|
-- TODO
|
|
-- < The standard function for showing error messages. Formats a list of
|
|
-- error messages in English. This function is used in the |Show|
|
|
-- instance of |ParseError <#ParseError>|. The resulting string will be
|
|
-- formatted like:
|
|
--
|
|
-- |unexpected /{The first UnExpect or a SysUnExpect message}/;
|
|
-- expecting /{comma separated list of Expect messages}/;
|
|
-- /{comma separated list of Message messages}/
|
|
|
|
showErrorMessages ::
|
|
String -> String -> String -> String -> String -> [Message] -> String
|
|
showErrorMessages msgOr msgUnknown msgExpecting msgUnExpected msgEndOfInput msgs
|
|
| null msgs = msgUnknown
|
|
| otherwise = concat $ map ("\n"++) $ clean $
|
|
[showSysUnExpect,showUnExpect,showExpect,showMessages]
|
|
where
|
|
(sysUnExpect,msgs1) = span ((SysUnExpect "") ==) msgs
|
|
(unExpect,msgs2) = span ((UnExpect "") ==) msgs1
|
|
(expect,messages) = span ((Expect "") ==) msgs2
|
|
|
|
showExpect = showMany msgExpecting expect
|
|
showUnExpect = showMany msgUnExpected unExpect
|
|
showSysUnExpect | not (null unExpect) ||
|
|
null sysUnExpect = ""
|
|
| null firstMsg = msgUnExpected ++ " " ++ msgEndOfInput
|
|
| otherwise = msgUnExpected ++ " " ++ firstMsg
|
|
where
|
|
firstMsg = messageString (head sysUnExpect)
|
|
|
|
showMessages = showMany "" messages
|
|
|
|
-- helpers
|
|
showMany pre msgs = case clean (map messageString msgs) of
|
|
[] -> ""
|
|
ms | null pre -> commasOr ms
|
|
| otherwise -> pre ++ " " ++ commasOr ms
|
|
|
|
commasOr [] = ""
|
|
commasOr [m] = m
|
|
commasOr ms = commaSep (init ms) ++ " " ++ msgOr ++ " " ++ last ms
|
|
|
|
commaSep = seperate ", " . clean
|
|
|
|
seperate _ [] = ""
|
|
seperate _ [m] = m
|
|
seperate sep (m:ms) = m ++ sep ++ seperate sep ms
|
|
|
|
clean = nub . filter (not . null)
|