megaparsec/tests/Util.hs

349 lines
12 KiB
Haskell
Raw Normal View History

--
-- QuickCheck tests for Megaparsec, utility functions for parser testing.
--
2016-01-09 15:56:33 +03:00
-- Copyright © 20152016 Megaparsec contributors
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions are
-- met:
--
-- * Redistributions of source code must retain the above copyright notice,
-- this list of conditions and the following disclaimer.
--
-- * Redistributions in binary form must reproduce the above copyright
-- notice, this list of conditions and the following disclaimer in the
-- documentation and/or other materials provided with the distribution.
--
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS “AS IS” AND ANY
-- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
-- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-- ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-- POSSIBILITY OF SUCH DAMAGE.
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
2016-04-24 16:21:07 +03:00
{-# OPTIONS -fno-warn-orphans #-}
module Util
2015-08-12 20:51:06 +03:00
( checkParser
, checkParser'
, checkCase
2015-08-12 20:51:06 +03:00
, simpleParse
, checkChar
, checkString
, updatePosString
, (/=\)
, (!=!)
, abcRow
2016-04-24 16:21:07 +03:00
, EC (..)
2015-08-12 20:51:06 +03:00
, posErr
2016-04-24 16:21:07 +03:00
, posErr'
, utok
, utoks
, ulabel
, ueof
, etok
, etoks
, elabel
, eeof
, cstm )
where
import Control.Monad.Reader
import Control.Monad.Trans.Identity
import Data.Foldable (foldl')
2016-04-24 16:21:07 +03:00
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe (mapMaybe, maybeToList)
import qualified Control.Monad.State.Lazy as L
import qualified Control.Monad.State.Strict as S
import qualified Control.Monad.Writer.Lazy as L
import qualified Control.Monad.Writer.Strict as S
2016-04-24 16:21:07 +03:00
import qualified Data.List.NonEmpty as NE
import qualified Data.Set as E
import Test.QuickCheck
import Test.HUnit (Assertion, (@?=))
import Text.Megaparsec.Error
import Text.Megaparsec.Pos
import Text.Megaparsec.Prim
import Text.Megaparsec.String
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>), (<*))
#endif
-- | @checkParser p r s@ tries to run parser @p@ on input @s@ to parse
-- entire @s@. Result of the parsing is compared with expected result @r@,
-- it should match, otherwise the property doesn't hold and the test fails.
checkParser :: (Eq a, Show a)
=> Parser a -- ^ Parser to test
2016-04-24 16:21:07 +03:00
-> Either (ParseError Char Dec) a -- ^ Expected result of parsing
-> String -- ^ Input for the parser
-> Property -- ^ Resulting property
checkParser p r s = simpleParse p s === r
-- | A variant of 'checkParser' that runs given parser code with all
-- standard instances of 'MonadParsec'. Useful when testing primitive
-- combinators.
checkParser' :: (Eq a, Show a)
2016-04-24 16:21:07 +03:00
=> (forall m. MonadParsec Dec String m => m a) -- ^ Parser to test
-> Either (ParseError Char Dec) a -- ^ Expected result of parsing
-> String -- ^ Input for the parser
-> Property -- ^ Resulting property
checkParser' p r s = conjoin
[ checkParser p r s
, checkParser (runIdentityT p) r s
, checkParser (runReaderT p ()) r s
, checkParser (L.evalStateT p ()) r s
, checkParser (S.evalStateT p ()) r s
, checkParser (evalWriterTL p) r s
, checkParser (evalWriterTS p) r s ]
-- | Similar to 'checkParser', but produces HUnit's 'Assertion's instead.
checkCase :: (Eq a, Show a)
2016-04-24 16:21:07 +03:00
=> (forall m. MonadParsec Dec String m => m a) -- ^ Parser to test
-> Either (ParseError Char Dec) a -- ^ Expected result of parsing
-> String -- ^ Input for the parser
-> Assertion -- ^ Resulting assertion
checkCase p r s = do
parse p "" s @?= r
parse (runIdentityT p) "" s @?= r
parse (runReaderT p ()) "" s @?= r
parse (L.evalStateT p ()) "" s @?= r
parse (S.evalStateT p ()) "" s @?= r
parse (evalWriterTL p) "" s @?= r
parse (evalWriterTS p) "" s @?= r
evalWriterTL :: Monad m => L.WriterT [Int] m a -> m a
evalWriterTL = liftM fst . L.runWriterT
evalWriterTS :: Monad m => S.WriterT [Int] m a -> m a
evalWriterTS = liftM fst . S.runWriterT
-- | @simpleParse p s@ runs parser @p@ on input @s@ and returns corresponding
-- result of type @Either ParseError a@, where @a@ is type of parsed
-- value. This parser tries to parser end of file too and name of input file
-- is always empty string.
2016-04-24 16:21:07 +03:00
simpleParse :: Parser a -> String -> Either (ParseError Char Dec) a
simpleParse p = parse (p <* eof) ""
-- | @checkChar p test label s@ runs parser @p@ on input @s@ and checks if
-- the parser correctly parses single character that satisfies @test@. The
-- character may be labelled, in this case @label@ is used to check quality
-- of error messages.
2016-04-24 16:21:07 +03:00
checkChar
:: Parser Char -- ^ Parser to run
-> (Char -> Bool) -- ^ Predicate to test parsed char
-> Maybe (MessageItem Char) -- ^ Representation to use in error messages
-> String -- ^ Input stream
-> Property
checkChar p f rep' s = checkParser p r s
2015-08-12 20:51:06 +03:00
where h = head s
2016-04-24 16:21:07 +03:00
rep = Expected <$> maybeToList rep'
r | null s = posErr 0 s (ueof : rep)
2015-08-12 20:51:06 +03:00
| length s == 1 && f h = Right h
2016-04-24 16:21:07 +03:00
| not (f h) = posErr 0 s (utok h : rep)
| otherwise = posErr 1 s [utok (s !! 1), eeof]
-- | @checkString p a test label s@ runs parser @p@ on input @s@ and checks if
-- the result is equal to @a@ and also quality of error messages. @test@ is
-- used to compare tokens. @label@ is used as expected representation of
-- parser's result in error messages.
2016-04-24 16:21:07 +03:00
checkString
:: Parser String -- ^ Parser to run
-> String -- ^ Expected result
-> (Char -> Char -> Bool) -- ^ Function used to compare tokens
-> String -- ^ Input stream
-> Property
checkString p a' test s' = checkParser p (w a' 0 s') s'
where w [] _ [] = Right s'
2016-04-24 16:21:07 +03:00
w [] i (s:_) = posErr i s' [utok s, eeof]
w _ 0 [] = posErr 0 s' [ueof, etoks a']
w _ i [] = posErr 0 s' [utoks (take i s'), etoks a']
2015-08-12 20:51:06 +03:00
w (a:as) i (s:ss)
| test a s = w as i' ss
2016-04-24 16:21:07 +03:00
| otherwise = posErr 0 s' [utoks (take i' s'), etoks a']
2015-08-12 20:51:06 +03:00
where i' = succ i
-- | A helper function that is used to advance 'SourcePos' given a 'String'.
updatePosString
2016-04-24 16:21:07 +03:00
:: Pos -- ^ Tab width
-> SourcePos -- ^ Initial position
-> String -- ^ 'String' — collection of tokens to process
-> SourcePos -- ^ Final position
updatePosString w = foldl' f
where f p t = snd (defaultUpdatePos w p t)
infix 4 /=\ -- preserve whitespace on automatic trim
-- | @p /=\\ x@ runs parser @p@ on empty input and compares its result
-- (which should be successful) with @x@. Succeeds when the result is equal
-- to @x@, prints counterexample on failure.
(/=\) :: (Eq a, Show a) => Parser a -> a -> Property
p /=\ x = simpleParse p "" === Right x
infix 4 !=!
-- | @n !=! m@ represents property that holds when results of running @n@
-- and @m@ parsers are identical. This is useful when checking monad laws
-- for example.
(!=!) :: (Eq a, Show a) => Parser a -> Parser a -> Property
n !=! m = simpleParse n "" === simpleParse m ""
-- | @abcRow a b c@ generates string consisting of character “a” repeated
-- @a@ times, character “b” repeated @b@ times, and finally character “c”
-- repeated @c@ times.
2016-02-17 21:12:27 +03:00
abcRow :: Enum a => a -> a -> a -> String
abcRow a b c = f a 'a' ++ f b 'b' ++ f c 'c'
where f x = replicate (fromEnum x)
2016-04-24 16:21:07 +03:00
-- | A component of parse error, useful for fast and dirty construction of
-- parse errors with 'posErr' and other helpers.
data EC
= Unexpected (MessageItem Char)
| Expected (MessageItem Char)
| Custom Dec
instance Arbitrary a => Arbitrary (NonEmpty a) where
arbitrary = NE.fromList . getNonEmpty <$> arbitrary
instance Arbitrary t => Arbitrary (MessageItem t) where
arbitrary = oneof
[ Token <$> arbitrary
, TokenStream <$> arbitrary
, Label <$> arbitrary
, return EndOfInput ]
instance Arbitrary Pos where
arbitrary = unsafePos . getPositive <$> arbitrary
instance Arbitrary SourcePos where
arbitrary = SourcePos
<$> arbitrary
<*> (unsafePos <$> choose (1, 1000))
<*> (unsafePos <$> choose (1, 100))
instance Arbitrary Dec where
arbitrary = oneof
[ DecFail <$> arbitrary
, DecIndentation <$> arbitrary <*> arbitrary <*> arbitrary ]
instance (Arbitrary t, Ord t, Arbitrary e, Ord e)
=> Arbitrary (ParseError t e) where
arbitrary = ParseError
<$> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
-- | @posErr pos s ms@ is an easy way to model result of parser that
-- fails. @pos@ is how many tokens (characters) has been consumed before
-- failure. @s@ is input of the parser. @ms@ is a list, collection of
-- 'Message's. See 'uneStr', 'uneCh', 'uneSpec', 'exStr', 'exCh', and
-- 'exSpec' for easy ways to create error messages.
2016-04-24 16:21:07 +03:00
posErr
:: Int -- ^ How many tokens to drop from beginning of steam
-> String -- ^ The input stream (just a 'String' here)
-> [EC] -- ^ Collection of error components
-> Either (ParseError Char Dec) a -- ^ 'ParseError' inside of 'Left'
posErr i s = posErr' (pos :| [])
where pos = updatePosString defaultTabWidth (initialPos "") (take i s)
-- | The same as 'posErr', but 'SourcePos' should be provided directly.
posErr'
:: NonEmpty SourcePos -- ^ Position of the error
-> [EC] -- ^ Collection of error components
-> Either (ParseError Char Dec) a -- ^ 'ParseError' inside of 'Left'
posErr' pos ecs = Left ParseError
{ errorPos = pos
, errorUnexpected = E.fromList (mapMaybe getUnexpected ecs)
, errorExpected = E.fromList (mapMaybe getExpected ecs)
, errorData = E.fromList (mapMaybe getCustom ecs) }
where
getUnexpected (Unexpected x) = Just x
getUnexpected _ = Nothing
getExpected (Expected x) = Just x
getExpected _ = Nothing
getCustom (Custom x) = Just x
getCustom _ = Nothing
-- | Construct “unexpected token” error component.
utok :: Char -> EC
utok = Unexpected . Token
2016-04-24 16:21:07 +03:00
-- | Construct “unexpected steam” error component. This function respects
-- some conventions described in 'canonicalizeStream'.
2016-04-24 16:21:07 +03:00
utoks :: String -> EC
utoks = Unexpected . canonicalizeStream
2016-04-24 16:21:07 +03:00
-- | Construct “unexpected label” error component. Do not use with empty
-- strings.
2016-04-24 16:21:07 +03:00
ulabel :: String -> EC
ulabel = Unexpected . Label . NE.fromList
2016-04-24 16:21:07 +03:00
-- | Construct “unexpected end of input” error component.
2016-04-24 16:21:07 +03:00
ueof :: EC
ueof = Unexpected EndOfInput
2016-04-24 16:21:07 +03:00
-- | Construct “expecting token” error component.
2016-04-24 16:21:07 +03:00
etok :: Char -> EC
etok = Expected . Token
2016-04-24 16:21:07 +03:00
-- | Construct “expecting stream” error component. This function respects
-- some conventions described in 'canonicalizeStream'.
2016-04-24 16:21:07 +03:00
etoks :: String -> EC
etoks = Expected . canonicalizeStream
2016-04-24 16:21:07 +03:00
-- | Construct “expecting label” error component. Do not use with empty
-- strings.
2016-04-24 16:21:07 +03:00
elabel :: String -> EC
elabel = Expected . Label . NE.fromList
2016-04-24 16:21:07 +03:00
-- | Construct “expecting end of input” component.
2016-04-24 16:21:07 +03:00
eeof :: EC
eeof = Expected EndOfInput
2016-04-24 16:21:07 +03:00
-- | Construct error component consisting of custom data.
2016-04-24 16:21:07 +03:00
cstm :: Dec -> EC
cstm = Custom
2016-04-24 16:21:07 +03:00
-- | Construct appropriate 'MessageItem' representation for given token
-- stream. Empty string produces 'EndOfInput', single token — a 'Token', and
-- in other cases the 'TokenStream' constructor is used.
2016-04-24 16:21:07 +03:00
canonicalizeStream :: String -> MessageItem Char
canonicalizeStream stream =
case NE.nonEmpty stream of
Nothing -> EndOfInput
Just (x:|[]) -> Token x
Just xs -> TokenStream xs