From 3daa70f15a82bf60ccf3335e1d92fb6733d3b260 Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Thu, 6 Aug 2015 16:37:08 +0600 Subject: [PATCH] representation of tokens in error messages, fixed #12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Type class ‘ShowToken’ introduced to pretty-print tokens. * For now, we have defined instances for ‘String’ and ‘Char’. --- CHANGELOG.md | 3 ++ Text/Megaparsec/Char.hs | 7 ++- Text/Megaparsec/Combinator.hs | 12 +++--- Text/Megaparsec/Prim.hs | 81 ++++++++++++++++------------------- Text/Megaparsec/ShowToken.hs | 47 ++++++++++++++++++++ megaparsec.cabal | 15 ++++--- 6 files changed, 104 insertions(+), 61 deletions(-) create mode 100644 Text/Megaparsec/ShowToken.hs diff --git a/CHANGELOG.md b/CHANGELOG.md index c7a0e69..ef3f467 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,9 @@ * The following functions are now re-exported from ‘Control.Applicative’: `(<|>)`, `many`, `some`, `optional`. See #9. +* Introduces type class `ShowToken` and improved representation of + characters and stings in error messages, see #12. + * Added comprehensive QuickCheck test suite. * Added benchmarks. diff --git a/Text/Megaparsec/Char.hs b/Text/Megaparsec/Char.hs index 8cf0650..709c79e 100644 --- a/Text/Megaparsec/Char.hs +++ b/Text/Megaparsec/Char.hs @@ -148,9 +148,8 @@ anyChar = satisfy (const True) -- > oneOf cs = satisfy (`elem` cs) satisfy :: Stream s m Char => (Char -> Bool) -> ParsecT s u m Char -satisfy f = tokenPrim showCh nextPos testChar - where showCh x = show [x] - nextPos pos x _ = updatePosChar pos x +satisfy f = tokenPrim nextPos testChar + where nextPos pos x _ = updatePosChar pos x testChar x = if f x then Just x else Nothing -- | @string s@ parses a sequence of characters given by @s@. Returns @@ -159,4 +158,4 @@ satisfy f = tokenPrim showCh nextPos testChar -- > divOrMod = string "div" <|> string "mod" string :: Stream s m Char => String -> ParsecT s u m String -string = tokens show updatePosString +string = tokens updatePosString diff --git a/Text/Megaparsec/Combinator.hs b/Text/Megaparsec/Combinator.hs index 80a9289..75a74f7 100644 --- a/Text/Megaparsec/Combinator.hs +++ b/Text/Megaparsec/Combinator.hs @@ -39,6 +39,7 @@ import Control.Applicative ((<|>), many, some) import Control.Monad import Text.Megaparsec.Prim +import Text.Megaparsec.ShowToken -- | @choice ps@ tries to apply the parsers in the list @ps@ in order, -- until one of them succeeds. Returns the value of the succeeding parser. @@ -195,15 +196,15 @@ chainr1 p op = p >>= rest -- | The parser @anyToken@ accepts any kind of token. It is for example -- used to implement 'eof'. Returns the accepted token. -anyToken :: (Stream s m t, Show t) => ParsecT s u m t -anyToken = tokenPrim show (\pos _ _ -> pos) Just +anyToken :: Stream s m t => ParsecT s u m t +anyToken = tokenPrim (\pos _ _ -> pos) Just -- | This parser only succeeds at the end of the input. This is not a -- primitive parser but it is defined using 'notFollowedBy'. -- -- > eof = notFollowedBy anyToken "end of input" -eof :: (Stream s m t, Show t) => ParsecT s u m () +eof :: Stream s m t => ParsecT s u m () eof = notFollowedBy anyToken "end of input" -- | @notFollowedBy p@ only succeeds when parser @p@ fails. This parser @@ -216,8 +217,9 @@ eof = notFollowedBy anyToken "end of input" -- -- > keywordLet = try (string "let" >> notFollowedBy alphaNum) -notFollowedBy :: (Stream s m t, Show a) => ParsecT s u m a -> ParsecT s u m () -notFollowedBy p = try ((try p >>= (unexpected . show)) <|> return ()) +notFollowedBy :: (Stream s m t, ShowToken a) => + ParsecT s u m a -> ParsecT s u m () +notFollowedBy p = try ((try p >>= (unexpected . showToken)) <|> return ()) -- | @manyTill p end@ applies parser @p@ /zero/ or more times until -- parser @end@ succeeds. Returns the list of values returned by @p@. This diff --git a/Text/Megaparsec/Prim.hs b/Text/Megaparsec/Prim.hs index 29e5f66..a27b370 100644 --- a/Text/Megaparsec/Prim.hs +++ b/Text/Megaparsec/Prim.hs @@ -67,8 +67,9 @@ import Control.Monad.Error.Class import qualified Control.Applicative as A -import Text.Megaparsec.Pos import Text.Megaparsec.Error +import Text.Megaparsec.Pos +import Text.Megaparsec.ShowToken -- | This is Parsec state, this is parametrized over stream type @s@, and -- user state @u@. @@ -89,10 +90,10 @@ data State s u = State -- within the stream\" in the stream state @s@. This is trivial unless -- you are using the monad in a non-trivial way. -class Monad m => Stream s m t | s -> t where +class (Monad m, ShowToken t) => Stream s m t | s -> t where uncons :: s -> m (Maybe (t, s)) -instance Monad m => Stream [tok] m tok where +instance (Monad m, ShowToken t) => Stream [t] m t where uncons [] = return Nothing uncons (t:ts) = return $ Just (t, ts) {-# INLINE uncons #-} @@ -492,57 +493,51 @@ lookAhead p = ParsecT $ \s _ cerr eok eerr -> do let eok' a _ _ = eok a s (newErrorUnknown (statePos s)) unParser p s eok' cerr eok' eerr --- | The parser @token showTok posFromTok testTok@ accepts a token @t@ --- with result @x@ when the function @testTok t@ returns @'Just' x@. The --- source position of the @t@ should be returned by @posFromTok t@ and the --- token can be shown using @showTok t@. +-- | The parser @token posFromTok testTok@ accepts a token @t@ with result +-- @x@ when the function @testTok t@ returns @'Just' x@. The source position +-- of the @t@ should be returned by @posFromTok t@. Token will be shown with +-- 'showToken' function. -- -- This combinator is expressed in terms of 'tokenPrim'. It is used to -- accept user defined token streams. For example, suppose that we have a -- stream of basic tokens tupled with source positions. We can than define a -- parser that accepts single tokens as: -- --- > mytoken x = token showTok posFromTok testTok --- > where --- > showTok (pos,t) = show t --- > posFromTok (pos,t) = pos --- > testTok (pos,t) = if x == t then Just t else Nothing +-- > mytoken x = token posFromTok testTok +-- > where posFromTok (pos,t) = pos +-- > testTok (pos,t) = if x == t then Just t else Nothing token :: Stream s Identity t => - (t -> String) -- ^ Token pretty-printing function. - -> (t -> SourcePos) -- ^ Computes the position of a token. + (t -> SourcePos) -- ^ Computes the position of a token. -> (t -> Maybe a) -- ^ Matching function for the token to parse. -> Parsec s u a -token showToken tokpos = tokenPrim showToken nextpos +token tokpos = tokenPrim nextpos where nextpos _ tok ts = case runIdentity (uncons ts) of Nothing -> tokpos tok Just (tok', _) -> tokpos tok' --- | The parser @tokens showToks posFromTok@ parses list of tokens and --- returns it. The resulting parser will use @showToks@ to pretty-print the +-- | The parser @tokens posFromTok@ parses list of tokens and returns +-- it. The resulting parser will use @showToks@ to pretty-print the -- collection of tokens. -- -- This can be used to example to write 'Text.Megaparsec.Char.string': -- --- > string = tokens show updatePosString +-- > string = tokens updatePosString -tokens :: (Stream s m t, Eq t) => - ([t] -> String) -- ^ Pretty print a list of tokens - -> (SourcePos -> [t] -> SourcePos) -- ^ Computes position of tokens. +tokens :: (Stream s m t, Eq t, ShowToken [t]) => + (SourcePos -> [t] -> SourcePos) -- ^ Computes position of tokens. -> [t] -- ^ List of tokens to parse -> ParsecT s u m [t] {-# INLINE tokens #-} -tokens _ _ [] - = ParsecT $ \s _ _ eok _ -> - eok [] s $ unknownError s -tokens showTokens nextposs tts@(tok:toks) +tokens _ [] = ParsecT $ \s _ _ eok _ -> eok [] s $ unknownError s +tokens nextposs tts@(tok:toks) = ParsecT $ \(State input pos u) cok cerr _ eerr -> let - errEof = addErrorMessage (Expect (showTokens tts)) + errEof = addErrorMessage (Expect (showToken tts)) (newErrorMessage (SysUnExpect "") pos) - errExpect x = addErrorMessage (Expect (showTokens tts)) - (newErrorMessage (SysUnExpect (showTokens [x])) pos) + errExpect x = addErrorMessage (Expect (showToken tts)) + (newErrorMessage (SysUnExpect (showToken [x])) pos) walk [] rs = ok rs walk (t:ts) rs = do @@ -564,38 +559,34 @@ tokens showTokens nextposs tts@(tok:toks) | tok == x -> walk toks xs | otherwise -> eerr $ errExpect x --- | The parser @tokenPrim showTok nextPos testTok@ accepts a token @t@ --- with result @x@ when the function @testTok t@ returns @'Just' x@. The --- token can be shown using @showTok t@. The position of the /next/ token --- should be returned when @nextPos@ is called with the current source --- position @pos@, the current token @t@ and the rest of the tokens @toks@, --- @nextPos pos t toks@. +-- | The parser @tokenPrim nextPos testTok@ accepts a token @t@ with result +-- @x@ when the function @testTok t@ returns @'Just' x@. The position of the +-- /next/ token should be returned when @nextPos@ is called with the current +-- source position @pos@, the current token @t@ and the rest of the tokens +-- @toks@, @nextPos pos t toks@. -- -- This is the most primitive combinator for accepting tokens. For example, -- the 'Text.Megaparsec.Char.char' parser could be implemented as: -- --- > char c = tokenPrim showChar nextPos testChar --- > where showChar x = "'" ++ x ++ "'" --- > testChar x = if x == c then Just x else Nothing +-- > char c = tokenPrim nextPos testChar +-- > where testChar x = if x == c then Just x else Nothing -- > nextPos pos x xs = updatePosChar pos x tokenPrim :: Stream s m t => - (t -> String) -- ^ Token pretty-printing function. - -> (SourcePos -> t -> s -> SourcePos) -- ^ Next position calculating function. + (SourcePos -> t -> s -> SourcePos) -- ^ Next position calculating function. -> (t -> Maybe a) -- ^ Matching function for the token to parse. -> ParsecT s u m a {-# INLINE tokenPrim #-} -tokenPrim showToken nextpos = tokenPrimEx showToken nextpos Nothing +tokenPrim nextpos = tokenPrimEx nextpos Nothing -tokenPrimEx :: Stream s m t - => (t -> String) - -> (SourcePos -> t -> s -> SourcePos) +tokenPrimEx :: Stream s m t => + (SourcePos -> t -> s -> SourcePos) -> Maybe (SourcePos -> t -> s -> u -> u) -> (t -> Maybe a) -> ParsecT s u m a {-# INLINE tokenPrimEx #-} -tokenPrimEx showToken nextpos Nothing test +tokenPrimEx nextpos Nothing test = ParsecT $ \(State input pos user) cok _ _ eerr -> do r <- uncons input case r of @@ -608,7 +599,7 @@ tokenPrimEx showToken nextpos Nothing test cok x newstate (newErrorUnknown newpos) Nothing -> eerr $ unexpectError (showToken c) pos -tokenPrimEx showToken nextpos (Just nextState) test +tokenPrimEx nextpos (Just nextState) test = ParsecT $ \(State input pos user) cok _ _ eerr -> do r <- uncons input case r of diff --git a/Text/Megaparsec/ShowToken.hs b/Text/Megaparsec/ShowToken.hs new file mode 100644 index 0000000..519b08e --- /dev/null +++ b/Text/Megaparsec/ShowToken.hs @@ -0,0 +1,47 @@ +-- | +-- Module : Text.Megaparsec.ShowToken +-- Copyright : © 2015 Megaparsec contributors +-- License : BSD3 +-- +-- Maintainer : Mark Karpov +-- Stability : experimental +-- Portability : portable +-- +-- Pretty printing function and instances for use in error messages. + +module Text.Megaparsec.ShowToken (ShowToken (..)) where + +-- | Typeclass 'ShowToken' defines single function 'showToken' that can be +-- used to “pretty-print” various tokens. By default, all commonly used +-- instances are defined, but you can add your own, of course. + +class Show a => ShowToken a where + showToken :: a -> String + +instance ShowToken Char where + showToken = prettyChar + +-- | @prettyChar ch@ returns user-friendly string representation of given +-- character @ch@, suitable for using in error messages, for example. + +prettyChar :: Char -> String +prettyChar '\0' = "null" +prettyChar '\a' = "bell" +prettyChar '\b' = "backspace" +prettyChar '\t' = "tab" +prettyChar '\n' = "newline" +prettyChar '\v' = "vertical tab" +prettyChar '\f' = "form feed" +prettyChar '\r' = "carriage return" +prettyChar x = "'" ++ [x] ++ "'" + +instance ShowToken String where + showToken = prettyString + +-- | @prettyString s@ returns pretty representation of string @s@. This is +-- used when printing string tokens in error messages. + +prettyString :: String -> String +prettyString "" = "" +-- prettyString [x] = prettyChar x -- FIXME enable this later +prettyString xs = "\"" ++ xs ++ "\"" diff --git a/megaparsec.cabal b/megaparsec.cabal index 991a2c9..000b32e 100644 --- a/megaparsec.cabal +++ b/megaparsec.cabal @@ -64,20 +64,21 @@ library , PolymorphicComponents , UndecidableInstances exposed-modules: Text.Megaparsec - , Text.Megaparsec.String , Text.Megaparsec.ByteString , Text.Megaparsec.ByteString.Lazy - , Text.Megaparsec.Text - , Text.Megaparsec.Text.Lazy - , Text.Megaparsec.Pos - , Text.Megaparsec.Error - , Text.Megaparsec.Prim , Text.Megaparsec.Char , Text.Megaparsec.Combinator - , Text.Megaparsec.Token + , Text.Megaparsec.Error , Text.Megaparsec.Expr , Text.Megaparsec.Language , Text.Megaparsec.Perm + , Text.Megaparsec.Pos + , Text.Megaparsec.Prim + , Text.Megaparsec.ShowToken + , Text.Megaparsec.String + , Text.Megaparsec.Text + , Text.Megaparsec.Text.Lazy + , Text.Megaparsec.Token ghc-options: -O2 -Wall default-language: Haskell2010