megaparsec/Text/Parsec/Error.hs
Roman Cheplyaka b8990ab042 When merging error messages, prefer known messages to unknown ones
This fixes a regression introduced by:

Sun Feb 20 18:24:22 EET 2011  Roman Cheplyaka <roma@ro-che.info>
  * Choose the longest match when merging error messages

The source of the regression is that parsec sometimes generates dummy (aka
"unknown") error messages when no actual error has occurred.

So, when merging errors, before simply looking at the positions we should check
if one of them is unknown and just ignore it.

Reported by Matthias Hörmann.
2012-05-30 22:38:09 +00:00

206 lines
6.9 KiB
Haskell

-----------------------------------------------------------------------------
-- |
-- Module : Text.Parsec.Error
-- Copyright : (c) Daan Leijen 1999-2001, (c) Paolo Martini 2007
-- License : BSD-style (see the LICENSE file)
--
-- Maintainer : derek.a.elkins@gmail.com
-- Stability : provisional
-- Portability : portable
--
-- Parse errors
--
-----------------------------------------------------------------------------
module Text.Parsec.Error
( Message ( SysUnExpect, UnExpect, Expect, Message )
, messageString
, ParseError, errorPos, errorMessages, errorIsUnknown
, showErrorMessages
, newErrorMessage, newErrorUnknown
, addErrorMessage, setErrorPos, setErrorMessage
, mergeError
) where
import Data.List ( nub, sort )
import Text.Parsec.Pos
-- | This abstract data type represents parse error messages. There are
-- four kinds of messages:
--
-- > data Message = SysUnExpect String
-- > | UnExpect String
-- > | Expect String
-- > | Message String
--
-- The fine distinction between different kinds of parse errors allows
-- the system to generate quite good error messages for the user. It
-- also allows error messages that are formatted in different
-- languages. Each kind of message is generated by different combinators:
--
-- * A 'SysUnExpect' message is automatically generated by the
-- 'Text.Parsec.Combinator.satisfy' combinator. The argument is the
-- unexpected input.
--
-- * A 'UnExpect' message is generated by the 'Text.Parsec.Prim.unexpected'
-- combinator. The argument describes the
-- unexpected item.
--
-- * A 'Expect' message is generated by the 'Text.Parsec.Prim.<?>'
-- combinator. The argument describes the expected item.
--
-- * A 'Message' message is generated by the 'fail'
-- combinator. The argument is some general parser message.
data Message = SysUnExpect !String -- @ library generated unexpect
| UnExpect !String -- @ unexpected something
| Expect !String -- @ expecting something
| Message !String -- @ raw message
instance Enum Message where
fromEnum (SysUnExpect _) = 0
fromEnum (UnExpect _) = 1
fromEnum (Expect _) = 2
fromEnum (Message _) = 3
toEnum _ = error "toEnum is undefined for Message"
-- < Return 'True' only when 'compare' would return 'EQ'.
instance Eq Message where
m1 == m2 = fromEnum m1 == fromEnum m2
-- < Compares two error messages without looking at their content. Only
-- the constructors are compared where:
--
-- > 'SysUnExpect' < 'UnExpect' < 'Expect' < 'Message'
instance Ord Message where
compare msg1 msg2 = compare (fromEnum msg1) (fromEnum msg2)
-- | Extract the message string from an error message
messageString :: Message -> String
messageString (SysUnExpect s) = s
messageString (UnExpect s) = s
messageString (Expect s) = s
messageString (Message s) = s
-- | The abstract data type @ParseError@ represents parse errors. It
-- provides the source position ('SourcePos') of the error
-- and a list of error messages ('Message'). A @ParseError@
-- can be returned by the function 'Text.Parsec.Prim.parse'. @ParseError@ is an
-- instance of the 'Show' class.
data ParseError = ParseError !SourcePos [Message]
-- | Extracts the source position from the parse error
errorPos :: ParseError -> SourcePos
errorPos (ParseError pos _msgs)
= pos
-- | Extracts the list of error messages from the parse error
errorMessages :: ParseError -> [Message]
errorMessages (ParseError _pos msgs)
= sort msgs
errorIsUnknown :: ParseError -> Bool
errorIsUnknown (ParseError _pos msgs)
= null msgs
-- < Create parse errors
newErrorUnknown :: SourcePos -> ParseError
newErrorUnknown pos
= ParseError pos []
newErrorMessage :: Message -> SourcePos -> ParseError
newErrorMessage msg pos
= ParseError pos [msg]
addErrorMessage :: Message -> ParseError -> ParseError
addErrorMessage msg (ParseError pos msgs)
= ParseError pos (msg:msgs)
setErrorPos :: SourcePos -> ParseError -> ParseError
setErrorPos pos (ParseError _ msgs)
= ParseError pos msgs
setErrorMessage :: Message -> ParseError -> ParseError
setErrorMessage msg (ParseError pos msgs)
= ParseError pos (msg : filter (msg /=) msgs)
mergeError :: ParseError -> ParseError -> ParseError
mergeError e1@(ParseError pos1 msgs1) e2@(ParseError pos2 msgs2)
-- prefer meaningful errors
| null msgs2 && not (null msgs1) = e1
| null msgs1 && not (null msgs2) = e2
| otherwise
= case pos1 `compare` pos2 of
-- select the longest match
EQ -> ParseError pos1 (msgs1 ++ msgs2)
GT -> e1
LT -> e2
instance Show ParseError where
show err
= show (errorPos err) ++ ":" ++
showErrorMessages "or" "unknown parse error"
"expecting" "unexpected" "end of input"
(errorMessages err)
-- Language independent show function
-- TODO
-- < The standard function for showing error messages. Formats a list of
-- error messages in English. This function is used in the |Show|
-- instance of |ParseError <#ParseError>|. The resulting string will be
-- formatted like:
--
-- |unexpected /{The first UnExpect or a SysUnExpect message}/;
-- expecting /{comma separated list of Expect messages}/;
-- /{comma separated list of Message messages}/
showErrorMessages ::
String -> String -> String -> String -> String -> [Message] -> String
showErrorMessages msgOr msgUnknown msgExpecting msgUnExpected msgEndOfInput msgs
| null msgs = msgUnknown
| otherwise = concat $ map ("\n"++) $ clean $
[showSysUnExpect,showUnExpect,showExpect,showMessages]
where
(sysUnExpect,msgs1) = span ((SysUnExpect "") ==) msgs
(unExpect,msgs2) = span ((UnExpect "") ==) msgs1
(expect,messages) = span ((Expect "") ==) msgs2
showExpect = showMany msgExpecting expect
showUnExpect = showMany msgUnExpected unExpect
showSysUnExpect | not (null unExpect) ||
null sysUnExpect = ""
| null firstMsg = msgUnExpected ++ " " ++ msgEndOfInput
| otherwise = msgUnExpected ++ " " ++ firstMsg
where
firstMsg = messageString (head sysUnExpect)
showMessages = showMany "" messages
-- helpers
showMany pre msgs = case clean (map messageString msgs) of
[] -> ""
ms | null pre -> commasOr ms
| otherwise -> pre ++ " " ++ commasOr ms
commasOr [] = ""
commasOr [m] = m
commasOr ms = commaSep (init ms) ++ " " ++ msgOr ++ " " ++ last ms
commaSep = seperate ", " . clean
seperate _ [] = ""
seperate _ [m] = m
seperate sep (m:ms) = m ++ sep ++ seperate sep ms
clean = nub . filter (not . null)