mirror of
https://github.com/mrkkrp/megaparsec.git
synced 2024-12-18 13:51:58 +03:00
358 lines
12 KiB
Haskell
358 lines
12 KiB
Haskell
--
|
||
-- QuickCheck tests for Megaparsec, utility functions for parser testing.
|
||
--
|
||
-- Copyright © 2015–2016 Megaparsec contributors
|
||
--
|
||
-- Redistribution and use in source and binary forms, with or without
|
||
-- modification, are permitted provided that the following conditions are
|
||
-- met:
|
||
--
|
||
-- * Redistributions of source code must retain the above copyright notice,
|
||
-- this list of conditions and the following disclaimer.
|
||
--
|
||
-- * Redistributions in binary form must reproduce the above copyright
|
||
-- notice, this list of conditions and the following disclaimer in the
|
||
-- documentation and/or other materials provided with the distribution.
|
||
--
|
||
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS “AS IS” AND ANY
|
||
-- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||
-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||
-- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||
-- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||
-- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||
-- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||
-- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||
-- ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
-- POSSIBILITY OF SUCH DAMAGE.
|
||
|
||
{-# LANGUAGE CPP #-}
|
||
{-# LANGUAGE FlexibleContexts #-}
|
||
{-# LANGUAGE RankNTypes #-}
|
||
{-# OPTIONS -fno-warn-orphans #-}
|
||
|
||
module Util
|
||
( checkParser
|
||
, checkParser'
|
||
, checkCase
|
||
, simpleParse
|
||
, checkChar
|
||
, checkString
|
||
, updatePosString
|
||
, (/=\)
|
||
, (!=!)
|
||
, abcRow
|
||
, EC (..)
|
||
, posErr
|
||
, posErr'
|
||
, utok
|
||
, utoks
|
||
, ulabel
|
||
, ueof
|
||
, etok
|
||
, etoks
|
||
, elabel
|
||
, eeof
|
||
, cstm )
|
||
where
|
||
|
||
import Control.Monad.Reader
|
||
import Control.Monad.Trans.Identity
|
||
import Data.Foldable (foldl')
|
||
import Data.List.NonEmpty (NonEmpty (..))
|
||
import Data.Maybe (mapMaybe, maybeToList)
|
||
import qualified Control.Monad.State.Lazy as L
|
||
import qualified Control.Monad.State.Strict as S
|
||
import qualified Control.Monad.Writer.Lazy as L
|
||
import qualified Control.Monad.Writer.Strict as S
|
||
import qualified Data.List.NonEmpty as NE
|
||
import qualified Data.Set as E
|
||
|
||
import Test.QuickCheck
|
||
import Test.HUnit (Assertion, (@?=))
|
||
|
||
import Text.Megaparsec.Error
|
||
import Text.Megaparsec.Pos
|
||
import Text.Megaparsec.Prim
|
||
import Text.Megaparsec.String
|
||
|
||
#if !MIN_VERSION_base(4,8,0)
|
||
import Control.Applicative ((<$>), (<*>), (<*))
|
||
#endif
|
||
|
||
-- | @checkParser p r s@ tries to run parser @p@ on input @s@ to parse
|
||
-- entire @s@. Result of the parsing is compared with expected result @r@,
|
||
-- it should match, otherwise the property doesn't hold and the test fails.
|
||
|
||
checkParser :: (Eq a, Show a)
|
||
=> Parser a -- ^ Parser to test
|
||
-> Either (ParseError Char Dec) a -- ^ Expected result of parsing
|
||
-> String -- ^ Input for the parser
|
||
-> Property -- ^ Resulting property
|
||
checkParser p r s = simpleParse p s === r
|
||
|
||
-- | A variant of 'checkParser' that runs given parser code with all
|
||
-- standard instances of 'MonadParsec'. Useful when testing primitive
|
||
-- combinators.
|
||
|
||
checkParser' :: (Eq a, Show a)
|
||
=> (forall m. MonadParsec Dec String m => m a) -- ^ Parser to test
|
||
-> Either (ParseError Char Dec) a -- ^ Expected result of parsing
|
||
-> String -- ^ Input for the parser
|
||
-> Property -- ^ Resulting property
|
||
checkParser' p r s = conjoin
|
||
[ checkParser p r s
|
||
, checkParser (runIdentityT p) r s
|
||
, checkParser (runReaderT p ()) r s
|
||
, checkParser (L.evalStateT p ()) r s
|
||
, checkParser (S.evalStateT p ()) r s
|
||
, checkParser (evalWriterTL p) r s
|
||
, checkParser (evalWriterTS p) r s ]
|
||
|
||
-- | Similar to 'checkParser', but produces HUnit's 'Assertion's instead.
|
||
|
||
checkCase :: (Eq a, Show a)
|
||
=> (forall m. MonadParsec Dec String m => m a) -- ^ Parser to test
|
||
-> Either (ParseError Char Dec) a -- ^ Expected result of parsing
|
||
-> String -- ^ Input for the parser
|
||
-> Assertion -- ^ Resulting assertion
|
||
checkCase p r s = do
|
||
parse p "" s @?= r
|
||
parse (runIdentityT p) "" s @?= r
|
||
parse (runReaderT p ()) "" s @?= r
|
||
parse (L.evalStateT p ()) "" s @?= r
|
||
parse (S.evalStateT p ()) "" s @?= r
|
||
parse (evalWriterTL p) "" s @?= r
|
||
parse (evalWriterTS p) "" s @?= r
|
||
|
||
evalWriterTL :: Monad m => L.WriterT [Int] m a -> m a
|
||
evalWriterTL = liftM fst . L.runWriterT
|
||
evalWriterTS :: Monad m => S.WriterT [Int] m a -> m a
|
||
evalWriterTS = liftM fst . S.runWriterT
|
||
|
||
-- | @simpleParse p s@ runs parser @p@ on input @s@ and returns corresponding
|
||
-- result of type @Either ParseError a@, where @a@ is type of parsed
|
||
-- value. This parser tries to parser end of file too and name of input file
|
||
-- is always empty string.
|
||
|
||
simpleParse :: Parser a -> String -> Either (ParseError Char Dec) a
|
||
simpleParse p = parse (p <* eof) ""
|
||
|
||
-- | @checkChar p test label s@ runs parser @p@ on input @s@ and checks if
|
||
-- the parser correctly parses single character that satisfies @test@. The
|
||
-- character may be labelled, in this case @label@ is used to check quality
|
||
-- of error messages.
|
||
|
||
checkChar
|
||
:: Parser Char -- ^ Parser to run
|
||
-> (Char -> Bool) -- ^ Predicate to test parsed char
|
||
-> Maybe (ErrorItem Char) -- ^ Representation to use in error messages
|
||
-> String -- ^ Input stream
|
||
-> Property -- ^ Resulting property
|
||
checkChar p f rep' s = checkParser p r s
|
||
where h = head s
|
||
rep = Expected <$> maybeToList rep'
|
||
r | null s = posErr 0 s (ueof : rep)
|
||
| length s == 1 && f h = Right h
|
||
| not (f h) = posErr 0 s (utok h : rep)
|
||
| otherwise = posErr 1 s [utok (s !! 1), eeof]
|
||
|
||
-- | @checkString p a test label s@ runs parser @p@ on input @s@ and checks if
|
||
-- the result is equal to @a@ and also quality of error messages. @test@ is
|
||
-- used to compare tokens. @label@ is used as expected representation of
|
||
-- parser's result in error messages.
|
||
|
||
checkString
|
||
:: Parser String -- ^ Parser to run
|
||
-> String -- ^ Expected result
|
||
-> (Char -> Char -> Bool) -- ^ Function used to compare tokens
|
||
-> String -- ^ Input stream
|
||
-> Property
|
||
checkString p a' test s' = checkParser p (w a' 0 s') s'
|
||
where w [] _ [] = Right s'
|
||
w [] i (s:_) = posErr i s' [utok s, eeof]
|
||
w _ 0 [] = posErr 0 s' [ueof, etoks a']
|
||
w _ i [] = posErr 0 s' [utoks (take i s'), etoks a']
|
||
w (a:as) i (s:ss)
|
||
| test a s = w as i' ss
|
||
| otherwise = posErr 0 s' [utoks (take i' s'), etoks a']
|
||
where i' = succ i
|
||
|
||
-- | A helper function that is used to advance 'SourcePos' given a 'String'.
|
||
|
||
updatePosString
|
||
:: Pos -- ^ Tab width
|
||
-> SourcePos -- ^ Initial position
|
||
-> String -- ^ 'String' — collection of tokens to process
|
||
-> SourcePos -- ^ Final position
|
||
updatePosString w = foldl' f
|
||
where f p t = snd (defaultUpdatePos w p t)
|
||
|
||
infix 4 /=\ -- preserve whitespace on automatic trim
|
||
|
||
-- | @p /=\\ x@ runs parser @p@ on empty input and compares its result
|
||
-- (which should be successful) with @x@. Succeeds when the result is equal
|
||
-- to @x@, prints counterexample on failure.
|
||
|
||
(/=\) :: (Eq a, Show a) => Parser a -> a -> Property
|
||
p /=\ x = simpleParse p "" === Right x
|
||
|
||
infix 4 !=!
|
||
|
||
-- | @n !=! m@ represents property that holds when results of running @n@
|
||
-- and @m@ parsers are identical. This is useful when checking monad laws
|
||
-- for example.
|
||
|
||
(!=!) :: (Eq a, Show a) => Parser a -> Parser a -> Property
|
||
n !=! m = simpleParse n "" === simpleParse m ""
|
||
|
||
-- | @abcRow a b c@ generates string consisting of character “a” repeated
|
||
-- @a@ times, character “b” repeated @b@ times, and finally character “c”
|
||
-- repeated @c@ times.
|
||
|
||
abcRow :: Enum a => a -> a -> a -> String
|
||
abcRow a b c = f a 'a' ++ f b 'b' ++ f c 'c'
|
||
where f x = replicate (fromEnum x)
|
||
|
||
-- | A component of parse error, useful for fast and dirty construction of
|
||
-- parse errors with 'posErr' and other helpers.
|
||
|
||
data EC
|
||
= Unexpected (ErrorItem Char)
|
||
| Expected (ErrorItem Char)
|
||
| Custom Dec
|
||
|
||
instance Arbitrary a => Arbitrary (NonEmpty a) where
|
||
arbitrary = NE.fromList . getNonEmpty <$> arbitrary
|
||
|
||
instance Arbitrary t => Arbitrary (ErrorItem t) where
|
||
arbitrary = oneof
|
||
[ Tokens <$> arbitrary
|
||
, Label <$> arbitrary
|
||
, return EndOfInput ]
|
||
|
||
instance Arbitrary Pos where
|
||
arbitrary = unsafePos . getPositive <$> arbitrary
|
||
|
||
instance Arbitrary SourcePos where
|
||
arbitrary = SourcePos
|
||
<$> shortString
|
||
<*> (unsafePos <$> choose (1, 1000))
|
||
<*> (unsafePos <$> choose (1, 100))
|
||
|
||
instance Arbitrary Dec where
|
||
arbitrary = oneof
|
||
[ DecFail <$> shortString
|
||
, DecIndentation <$> arbitrary <*> arbitrary <*> arbitrary ]
|
||
|
||
instance (Arbitrary t, Ord t, Arbitrary e, Ord e)
|
||
=> Arbitrary (ParseError t e) where
|
||
arbitrary = ParseError
|
||
<$> arbitrary
|
||
<*> arbitrary
|
||
<*> arbitrary
|
||
<*> arbitrary
|
||
|
||
shortString :: Gen String
|
||
shortString = sized $ \n -> do
|
||
k <- choose (0, n `div` 2)
|
||
vectorOf k arbitrary
|
||
|
||
-- | @posErr pos s ms@ is an easy way to model result of parser that fails.
|
||
-- @pos@ is how many tokens (characters) has been consumed before failure.
|
||
-- @s@ is input of the parser. @ms@ is a list, collection of 'Message's. See
|
||
-- 'utok', 'utoks', 'ulabel', 'ueof', 'etok', 'etoks', 'elabel', and 'eeof'
|
||
-- for easy ways to create error messages.
|
||
|
||
posErr
|
||
:: Int -- ^ How many tokens to drop from beginning of steam
|
||
-> String -- ^ The input stream (just a 'String' here)
|
||
-> [EC] -- ^ Collection of error components
|
||
-> Either (ParseError Char Dec) a -- ^ 'ParseError' inside of 'Left'
|
||
posErr i s = posErr' (pos :| [])
|
||
where pos = updatePosString defaultTabWidth (initialPos "") (take i s)
|
||
|
||
-- | The same as 'posErr', but 'SourcePos' should be provided directly.
|
||
|
||
posErr'
|
||
:: NonEmpty SourcePos -- ^ Position of the error
|
||
-> [EC] -- ^ Collection of error components
|
||
-> Either (ParseError Char Dec) a -- ^ 'ParseError' inside of 'Left'
|
||
posErr' pos ecs = Left ParseError
|
||
{ errorPos = pos
|
||
, errorUnexpected = E.fromList (mapMaybe getUnexpected ecs)
|
||
, errorExpected = E.fromList (mapMaybe getExpected ecs)
|
||
, errorCustom = E.fromList (mapMaybe getCustom ecs) }
|
||
where
|
||
getUnexpected (Unexpected x) = Just x
|
||
getUnexpected _ = Nothing
|
||
getExpected (Expected x) = Just x
|
||
getExpected _ = Nothing
|
||
getCustom (Custom x) = Just x
|
||
getCustom _ = Nothing
|
||
|
||
-- | Construct “unexpected token” error component.
|
||
|
||
utok :: Char -> EC
|
||
utok = Unexpected . Tokens . nes
|
||
|
||
-- | Construct “unexpected steam” error component. This function respects
|
||
-- some conventions described in 'canonicalizeStream'.
|
||
|
||
utoks :: String -> EC
|
||
utoks = Unexpected . canonicalizeStream
|
||
|
||
-- | Construct “unexpected label” error component. Do not use with empty
|
||
-- strings.
|
||
|
||
ulabel :: String -> EC
|
||
ulabel = Unexpected . Label . NE.fromList
|
||
|
||
-- | Construct “unexpected end of input” error component.
|
||
|
||
ueof :: EC
|
||
ueof = Unexpected EndOfInput
|
||
|
||
-- | Construct “expecting token” error component.
|
||
|
||
etok :: Char -> EC
|
||
etok = Expected . Tokens . nes
|
||
|
||
-- | Construct “expecting stream” error component. This function respects
|
||
-- some conventions described in 'canonicalizeStream'.
|
||
|
||
etoks :: String -> EC
|
||
etoks = Expected . canonicalizeStream
|
||
|
||
-- | Construct “expecting label” error component. Do not use with empty
|
||
-- strings.
|
||
|
||
elabel :: String -> EC
|
||
elabel = Expected . Label . NE.fromList
|
||
|
||
-- | Construct “expecting end of input” component.
|
||
|
||
eeof :: EC
|
||
eeof = Expected EndOfInput
|
||
|
||
-- | Construct error component consisting of custom data.
|
||
|
||
cstm :: Dec -> EC
|
||
cstm = Custom
|
||
|
||
-- | Construct appropriate 'MessageItem' representation for given token
|
||
-- stream. Empty string produces 'EndOfInput', single token — a 'Token', and
|
||
-- in other cases the 'TokenStream' constructor is used.
|
||
|
||
canonicalizeStream :: String -> ErrorItem Char
|
||
canonicalizeStream stream =
|
||
case NE.nonEmpty stream of
|
||
Nothing -> EndOfInput
|
||
Just xs -> Tokens xs
|
||
|
||
-- | Make a singleton non-empty list from a value.
|
||
|
||
nes :: a -> NonEmpty a
|
||
nes x = x :| []
|
||
{-# INLINE nes #-}
|