mirror of
https://github.com/mrkkrp/megaparsec.git
synced 2024-11-23 19:38:05 +03:00
494 lines
16 KiB
Haskell
494 lines
16 KiB
Haskell
-- |
|
||
-- Module : Text.Megaparsec.Error
|
||
-- Copyright : © 2015–2017 Megaparsec contributors
|
||
-- License : FreeBSD
|
||
--
|
||
-- Maintainer : Mark Karpov <markkarpov92@gmail.com>
|
||
-- Stability : experimental
|
||
-- Portability : portable
|
||
--
|
||
-- Parse errors. Current version of Megaparsec supports well-typed errors
|
||
-- instead of 'String'-based ones. This gives a lot of flexibility in
|
||
-- describing what exactly went wrong as well as a way to return arbitrary
|
||
-- data in case of failure.
|
||
--
|
||
-- You probably do not want to import this module directly because
|
||
-- "Text.Megaparsec" re-exports it anyway.
|
||
|
||
{-# LANGUAGE BangPatterns #-}
|
||
{-# LANGUAGE CPP #-}
|
||
{-# LANGUAGE DeriveDataTypeable #-}
|
||
{-# LANGUAGE DeriveFunctor #-}
|
||
{-# LANGUAGE DeriveGeneric #-}
|
||
{-# LANGUAGE FlexibleContexts #-}
|
||
{-# LANGUAGE FlexibleInstances #-}
|
||
{-# LANGUAGE ScopedTypeVariables #-}
|
||
|
||
module Text.Megaparsec.Error
|
||
( -- * Parse error type
|
||
ErrorItem (..)
|
||
, ErrorFancy (..)
|
||
, ParseError (..)
|
||
, errorPos
|
||
-- * Pretty-printing
|
||
, ShowToken (..)
|
||
, LineToken (..)
|
||
, ShowErrorComponent (..)
|
||
, parseErrorPretty
|
||
, parseErrorPretty'
|
||
, parseErrorPretty_
|
||
, sourcePosStackPretty
|
||
, parseErrorTextPretty )
|
||
where
|
||
|
||
import Control.DeepSeq
|
||
import Control.Exception
|
||
import Data.Char (chr)
|
||
import Data.Data (Data)
|
||
import Data.List (intercalate)
|
||
import Data.List.NonEmpty (NonEmpty (..))
|
||
import Data.Maybe (fromMaybe, isNothing)
|
||
import Data.Proxy
|
||
import Data.Semigroup
|
||
import Data.Set (Set)
|
||
import Data.Typeable (Typeable)
|
||
import Data.Void
|
||
import Data.Word (Word8)
|
||
import GHC.Generics
|
||
import Prelude hiding (concat)
|
||
import Text.Megaparsec.Pos
|
||
import Text.Megaparsec.Stream
|
||
import qualified Data.List.NonEmpty as NE
|
||
import qualified Data.Set as E
|
||
|
||
#if !MIN_VERSION_base(4,8,0)
|
||
import Control.Applicative
|
||
#endif
|
||
|
||
----------------------------------------------------------------------------
|
||
-- Parse error type
|
||
|
||
-- | Data type that is used to represent “unexpected\/expected” items in
|
||
-- 'ParseError'. The data type is parametrized over the token type @t@.
|
||
--
|
||
-- @since 5.0.0
|
||
|
||
data ErrorItem t
|
||
= Tokens (NonEmpty t) -- ^ Non-empty stream of tokens
|
||
| Label (NonEmpty Char) -- ^ Label (cannot be empty)
|
||
| EndOfInput -- ^ End of input
|
||
deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor)
|
||
|
||
instance NFData t => NFData (ErrorItem t)
|
||
|
||
-- | Additional error data, extendable by user. When no custom data is
|
||
-- necessary, the type is typically indexed by 'Void' to “cancel” the
|
||
-- 'ErrorCustom' constructor.
|
||
--
|
||
-- @since 6.0.0
|
||
|
||
data ErrorFancy e
|
||
= ErrorFail String
|
||
-- ^ 'fail' has been used in parser monad
|
||
| ErrorIndentation Ordering Pos Pos
|
||
-- ^ Incorrect indentation error: desired ordering between reference
|
||
-- level and actual level, reference indentation level, actual
|
||
-- indentation level
|
||
| ErrorCustom e
|
||
-- ^ Custom error data, can be conveniently disabled by indexing
|
||
-- 'ErrorFancy' by 'Void'
|
||
deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor)
|
||
|
||
instance NFData a => NFData (ErrorFancy a) where
|
||
rnf (ErrorFail str) = rnf str
|
||
rnf (ErrorIndentation ord ref act) = ord `seq` rnf ref `seq` rnf act
|
||
rnf (ErrorCustom a) = rnf a
|
||
|
||
-- | @'ParseError' t e@ represents a parse error parametrized over the token
|
||
-- type @t@ and the custom data @e@.
|
||
--
|
||
-- Note that the stack of source positions contains current position as its
|
||
-- head, and the rest of positions allows to track full sequence of include
|
||
-- files with topmost source file at the end of the list.
|
||
--
|
||
-- 'Semigroup' and 'Monoid' instances of the data type allow to merge parse
|
||
-- errors from different branches of parsing. When merging two
|
||
-- 'ParseError's, the longest match is preferred; if positions are the same,
|
||
-- custom data sets and collections of message items are combined. Note that
|
||
-- fancy errors take precedence over trivial errors in merging.
|
||
--
|
||
-- @since 6.0.0
|
||
|
||
data ParseError t e
|
||
= TrivialError (NonEmpty SourcePos) (Maybe (ErrorItem t)) (Set (ErrorItem t))
|
||
-- ^ Trivial errors, generated by Megaparsec's machinery. The data
|
||
-- constructor includes the stack of source positions, unexpected token
|
||
-- (if any), and expected tokens.
|
||
| FancyError (NonEmpty SourcePos) (Set (ErrorFancy e))
|
||
-- ^ Fancy, custom errors.
|
||
deriving (Show, Read, Eq, Data, Typeable, Generic)
|
||
|
||
instance (NFData t, NFData e) => NFData (ParseError t e)
|
||
|
||
instance (Ord t, Ord e) => Semigroup (ParseError t e) where
|
||
(<>) = mergeError
|
||
{-# INLINE (<>) #-}
|
||
|
||
instance (Ord t, Ord e) => Monoid (ParseError t e) where
|
||
mempty = TrivialError (initialPos "" :| []) Nothing E.empty
|
||
mappend = (<>)
|
||
{-# INLINE mappend #-}
|
||
|
||
instance ( Show t
|
||
, Ord t
|
||
, ShowToken t
|
||
, Typeable t
|
||
, Show e
|
||
, ShowErrorComponent e
|
||
, Typeable e )
|
||
=> Exception (ParseError t e) where
|
||
#if MIN_VERSION_base(4,8,0)
|
||
displayException = parseErrorPretty
|
||
#endif
|
||
|
||
-- | Get position of given 'ParseError'.
|
||
--
|
||
-- @since 6.0.0
|
||
|
||
errorPos :: ParseError t e -> NonEmpty SourcePos
|
||
errorPos (TrivialError p _ _) = p
|
||
errorPos (FancyError p _) = p
|
||
|
||
-- | Merge two error data structures into one joining their collections of
|
||
-- message items and preferring the longest match. In other words, earlier
|
||
-- error message is discarded. This may seem counter-intuitive, but
|
||
-- 'mergeError' is only used to merge error messages of alternative branches
|
||
-- of parsing and in this case longest match should be preferred.
|
||
|
||
mergeError :: (Ord t, Ord e)
|
||
=> ParseError t e
|
||
-> ParseError t e
|
||
-> ParseError t e
|
||
mergeError e1 e2 =
|
||
case errorPos e1 `compare` errorPos e2 of
|
||
LT -> e2
|
||
EQ ->
|
||
case (e1, e2) of
|
||
(TrivialError s1 u1 p1, TrivialError _ u2 p2) ->
|
||
TrivialError s1 (n u1 u2) (E.union p1 p2)
|
||
(FancyError {}, TrivialError {}) -> e1
|
||
(TrivialError {}, FancyError {}) -> e2
|
||
(FancyError s1 x1, FancyError _ x2) ->
|
||
FancyError s1 (E.union x1 x2)
|
||
GT -> e1
|
||
where
|
||
-- NOTE The logic behind this merging is that since we only combine
|
||
-- parse errors that happen at exactly the same position, all the
|
||
-- unexpected items will be prefixes of input stream at that position or
|
||
-- labels referring to the same thing. Our aim here is to choose the
|
||
-- longest prefix (merging with labels and end of input is somewhat
|
||
-- arbitrary, but is necessary because otherwise we can't make
|
||
-- ParseError lawful Monoid and have nice parse errors at the same
|
||
-- time).
|
||
n Nothing Nothing = Nothing
|
||
n (Just x) Nothing = Just x
|
||
n Nothing (Just y) = Just y
|
||
n (Just x) (Just y) = Just (max x y)
|
||
{-# INLINE mergeError #-}
|
||
|
||
----------------------------------------------------------------------------
|
||
-- Pretty-printing
|
||
|
||
-- | Type class 'ShowToken' includes methods that allow to pretty-print
|
||
-- single token as well as stream of tokens. This is used for rendering of
|
||
-- error messages.
|
||
--
|
||
-- @since 5.0.0
|
||
|
||
class ShowToken a where
|
||
|
||
-- | Pretty-print non-empty stream of tokens. This function is also used
|
||
-- to print single tokens (represented as singleton lists).
|
||
|
||
showTokens :: NonEmpty a -> String
|
||
|
||
instance ShowToken Char where
|
||
showTokens = stringPretty
|
||
|
||
instance ShowToken Word8 where
|
||
showTokens = stringPretty . fmap (chr . fromIntegral)
|
||
|
||
-- | Type class for tokens that support operations necessary for selecting
|
||
-- and displaying relevant line of input.
|
||
--
|
||
-- @since 6.0.0
|
||
|
||
class LineToken a where
|
||
|
||
-- | Convert a token to a 'Char'. This is used to print relevant line from
|
||
-- input stream by turning a list of tokens into a 'String'.
|
||
|
||
tokenAsChar :: a -> Char
|
||
|
||
-- | Check if given token is a newline or contains newline.
|
||
|
||
tokenIsNewline :: a -> Bool
|
||
|
||
instance LineToken Char where
|
||
tokenAsChar = id
|
||
tokenIsNewline x = x == '\n'
|
||
|
||
instance LineToken Word8 where
|
||
tokenAsChar = chr . fromIntegral
|
||
tokenIsNewline x = x == 10
|
||
|
||
-- | The type class defines how to print custom data component of
|
||
-- 'ParseError'.
|
||
--
|
||
-- @since 5.0.0
|
||
|
||
class Ord a => ShowErrorComponent a where
|
||
|
||
-- | Pretty-print custom data component of 'ParseError'.
|
||
|
||
showErrorComponent :: a -> String
|
||
|
||
instance (Ord t, ShowToken t) => ShowErrorComponent (ErrorItem t) where
|
||
showErrorComponent (Tokens ts) = showTokens ts
|
||
showErrorComponent (Label label) = NE.toList label
|
||
showErrorComponent EndOfInput = "end of input"
|
||
|
||
instance ShowErrorComponent e => ShowErrorComponent (ErrorFancy e) where
|
||
showErrorComponent (ErrorFail msg) = msg
|
||
showErrorComponent (ErrorIndentation ord ref actual) =
|
||
"incorrect indentation (got " <> show (unPos actual) <>
|
||
", should be " <> p <> show (unPos ref) <> ")"
|
||
where
|
||
p = case ord of
|
||
LT -> "less than "
|
||
EQ -> "equal to "
|
||
GT -> "greater than "
|
||
showErrorComponent (ErrorCustom a) = showErrorComponent a
|
||
|
||
instance ShowErrorComponent Void where
|
||
showErrorComponent = absurd
|
||
|
||
-- | Pretty-print a 'ParseError'. The rendered 'String' always ends with a
|
||
-- newline.
|
||
--
|
||
-- @since 5.0.0
|
||
|
||
parseErrorPretty
|
||
:: ( Ord t
|
||
, ShowToken t
|
||
, ShowErrorComponent e )
|
||
=> ParseError t e -- ^ Parse error to render
|
||
-> String -- ^ Result of rendering
|
||
parseErrorPretty e =
|
||
sourcePosStackPretty (errorPos e) <> ":\n" <> parseErrorTextPretty e
|
||
|
||
-- | Pretty-print a 'ParseError' and display the line on which the parse
|
||
-- error occurred. The rendered 'String' always ends with a newline.
|
||
--
|
||
-- Note that if you work with include files and have a stack of
|
||
-- 'SourcePos'es in 'ParseError', it's up to you to provide correct input
|
||
-- stream corresponding to the file in which parse error actually happened.
|
||
--
|
||
-- 'parseErrorPretty'' is defined in terms of the more general
|
||
-- 'parseErrorPretty_' function which allows to specify tab width as well:
|
||
--
|
||
-- > parseErrorPretty' = parseErrorPretty_ defaultTabWidth
|
||
--
|
||
-- @since 6.0.0
|
||
|
||
parseErrorPretty'
|
||
:: ( ShowToken (Token s)
|
||
, LineToken (Token s)
|
||
, ShowErrorComponent e
|
||
, Stream s )
|
||
=> s -- ^ Original input stream
|
||
-> ParseError (Token s) e -- ^ Parse error to render
|
||
-> String -- ^ Result of rendering
|
||
parseErrorPretty' = parseErrorPretty_ defaultTabWidth
|
||
|
||
-- | Just like 'parseErrorPretty'', but allows to specify tab width.
|
||
--
|
||
-- @since 6.1.0
|
||
|
||
parseErrorPretty_
|
||
:: forall s e.
|
||
( ShowToken (Token s)
|
||
, LineToken (Token s)
|
||
, ShowErrorComponent e
|
||
, Stream s )
|
||
=> Pos -- ^ Tab width
|
||
-> s -- ^ Original input stream
|
||
-> ParseError (Token s) e -- ^ Parse error to render
|
||
-> String -- ^ Result of rendering
|
||
parseErrorPretty_ w s e =
|
||
sourcePosStackPretty (errorPos e) <> ":\n" <>
|
||
padding <> "|\n" <>
|
||
lineNumber <> " | " <> rline <> "\n" <>
|
||
padding <> "| " <> rpadding <> "^\n" <>
|
||
parseErrorTextPretty e
|
||
where
|
||
epos = NE.last (errorPos e)
|
||
lineNumber = (show . unPos . sourceLine) epos
|
||
padding = replicate (length lineNumber + 1) ' '
|
||
rpadding = replicate (unPos (sourceColumn epos) - 1) ' '
|
||
rline =
|
||
case rline' of
|
||
[] -> "<empty line>"
|
||
xs -> expandTab w xs
|
||
rline' = fmap tokenAsChar . chunkToTokens (Proxy :: Proxy s) $
|
||
selectLine (sourceLine epos) s
|
||
|
||
-- | Pretty-print a stack of source positions.
|
||
--
|
||
-- @since 5.0.0
|
||
|
||
sourcePosStackPretty :: NonEmpty SourcePos -> String
|
||
sourcePosStackPretty ms = mconcat (f <$> rest) <> sourcePosPretty pos
|
||
where
|
||
(pos :| rest') = ms
|
||
rest = reverse rest'
|
||
f p = "in file included from " <> sourcePosPretty p <> ",\n"
|
||
|
||
-- | Pretty-print a textual part of a 'ParseError', that is, everything
|
||
-- except stack of source positions. The rendered staring always ends with a
|
||
-- new line.
|
||
--
|
||
-- @since 5.1.0
|
||
|
||
parseErrorTextPretty
|
||
:: ( Ord t
|
||
, ShowToken t
|
||
, ShowErrorComponent e )
|
||
=> ParseError t e -- ^ Parse error to render
|
||
-> String -- ^ Result of rendering
|
||
parseErrorTextPretty (TrivialError _ us ps) =
|
||
if isNothing us && E.null ps
|
||
then "unknown parse error\n"
|
||
else messageItemsPretty "unexpected " (maybe E.empty E.singleton us) <>
|
||
messageItemsPretty "expecting " ps
|
||
parseErrorTextPretty (FancyError _ xs) =
|
||
if E.null xs
|
||
then "unknown fancy parse error\n"
|
||
else unlines (showErrorComponent <$> E.toAscList xs)
|
||
|
||
----------------------------------------------------------------------------
|
||
-- Helpers
|
||
|
||
-- | @stringPretty s@ returns pretty representation of string @s@. This is
|
||
-- used when printing string tokens in error messages.
|
||
|
||
stringPretty :: NonEmpty Char -> String
|
||
stringPretty (x:|[]) = charPretty x
|
||
stringPretty ('\r':|"\n") = "crlf newline"
|
||
stringPretty xs = "\"" <> concatMap f (NE.toList xs) <> "\""
|
||
where
|
||
f ch =
|
||
case charPretty' ch of
|
||
Nothing -> [ch]
|
||
Just pretty -> "<" <> pretty <> ">"
|
||
|
||
-- | @charPretty ch@ returns user-friendly string representation of given
|
||
-- character @ch@, suitable for using in error messages.
|
||
|
||
charPretty :: Char -> String
|
||
charPretty ' ' = "space"
|
||
charPretty ch = fromMaybe ("'" <> [ch] <> "'") (charPretty' ch)
|
||
|
||
-- | If the given character has a pretty representation, return that,
|
||
-- otherwise 'Nothing'. This is an internal helper.
|
||
|
||
charPretty' :: Char -> Maybe String
|
||
charPretty' '\NUL' = pure "null"
|
||
charPretty' '\SOH' = pure "start of heading"
|
||
charPretty' '\STX' = pure "start of text"
|
||
charPretty' '\ETX' = pure "end of text"
|
||
charPretty' '\EOT' = pure "end of transmission"
|
||
charPretty' '\ENQ' = pure "enquiry"
|
||
charPretty' '\ACK' = pure "acknowledge"
|
||
charPretty' '\BEL' = pure "bell"
|
||
charPretty' '\BS' = pure "backspace"
|
||
charPretty' '\t' = pure "tab"
|
||
charPretty' '\n' = pure "newline"
|
||
charPretty' '\v' = pure "vertical tab"
|
||
charPretty' '\f' = pure "form feed"
|
||
charPretty' '\r' = pure "carriage return"
|
||
charPretty' '\SO' = pure "shift out"
|
||
charPretty' '\SI' = pure "shift in"
|
||
charPretty' '\DLE' = pure "data link escape"
|
||
charPretty' '\DC1' = pure "device control one"
|
||
charPretty' '\DC2' = pure "device control two"
|
||
charPretty' '\DC3' = pure "device control three"
|
||
charPretty' '\DC4' = pure "device control four"
|
||
charPretty' '\NAK' = pure "negative acknowledge"
|
||
charPretty' '\SYN' = pure "synchronous idle"
|
||
charPretty' '\ETB' = pure "end of transmission block"
|
||
charPretty' '\CAN' = pure "cancel"
|
||
charPretty' '\EM' = pure "end of medium"
|
||
charPretty' '\SUB' = pure "substitute"
|
||
charPretty' '\ESC' = pure "escape"
|
||
charPretty' '\FS' = pure "file separator"
|
||
charPretty' '\GS' = pure "group separator"
|
||
charPretty' '\RS' = pure "record separator"
|
||
charPretty' '\US' = pure "unit separator"
|
||
charPretty' '\DEL' = pure "delete"
|
||
charPretty' '\160' = pure "non-breaking space"
|
||
charPretty' _ = Nothing
|
||
|
||
-- | Transforms a list of error messages into their textual representation.
|
||
|
||
messageItemsPretty :: ShowErrorComponent a
|
||
=> String -- ^ Prefix to prepend
|
||
-> Set a -- ^ Collection of messages
|
||
-> String -- ^ Result of rendering
|
||
messageItemsPretty prefix ts
|
||
| E.null ts = ""
|
||
| otherwise =
|
||
let f = orList . NE.fromList . E.toAscList . E.map showErrorComponent
|
||
in prefix <> f ts <> "\n"
|
||
|
||
-- | Print a pretty list where items are separated with commas and the word
|
||
-- “or” according to the rules of English punctuation.
|
||
|
||
orList :: NonEmpty String -> String
|
||
orList (x:|[]) = x
|
||
orList (x:|[y]) = x <> " or " <> y
|
||
orList xs = intercalate ", " (NE.init xs) <> ", or " <> NE.last xs
|
||
|
||
-- | Select a line from input stream given its number.
|
||
|
||
selectLine
|
||
:: forall s. (LineToken (Token s), Stream s)
|
||
=> Pos -- ^ Number of line to select
|
||
-> s -- ^ Input stream
|
||
-> Tokens s -- ^ Selected line
|
||
selectLine l = go pos1
|
||
where
|
||
go !n !s =
|
||
if n == l
|
||
then fst (takeWhile_ notNewline s)
|
||
else go (n <> pos1) (stripNewline $ snd (takeWhile_ notNewline s))
|
||
notNewline = not . tokenIsNewline
|
||
stripNewline s =
|
||
case take1_ s of
|
||
Nothing -> s
|
||
Just (_, s') -> s'
|
||
|
||
-- | Replace tab characters with given number of spaces.
|
||
|
||
expandTab
|
||
:: Pos
|
||
-> String
|
||
-> String
|
||
expandTab w' = go 0
|
||
where
|
||
go 0 [] = []
|
||
go 0 ('\t':xs) = go w xs
|
||
go 0 (x:xs) = x : go 0 xs
|
||
go !n xs = ' ' : go (n - 1) xs
|
||
w = unPos w'
|