megaparsec/tests/Util.hs
2016-04-26 00:37:06 +07:00

358 lines
12 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

--
-- QuickCheck tests for Megaparsec, utility functions for parser testing.
--
-- Copyright © 20152016 Megaparsec contributors
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions are
-- met:
--
-- * Redistributions of source code must retain the above copyright notice,
-- this list of conditions and the following disclaimer.
--
-- * Redistributions in binary form must reproduce the above copyright
-- notice, this list of conditions and the following disclaimer in the
-- documentation and/or other materials provided with the distribution.
--
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS “AS IS” AND ANY
-- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
-- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-- ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-- POSSIBILITY OF SUCH DAMAGE.
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
{-# OPTIONS -fno-warn-orphans #-}
module Util
( checkParser
, checkParser'
, checkCase
, simpleParse
, checkChar
, checkString
, updatePosString
, (/=\)
, (!=!)
, abcRow
, EC (..)
, posErr
, posErr'
, utok
, utoks
, ulabel
, ueof
, etok
, etoks
, elabel
, eeof
, cstm )
where
import Control.Monad.Reader
import Control.Monad.Trans.Identity
import Data.Foldable (foldl')
import Data.List.NonEmpty (NonEmpty (..))
import Data.Maybe (mapMaybe, maybeToList)
import qualified Control.Monad.State.Lazy as L
import qualified Control.Monad.State.Strict as S
import qualified Control.Monad.Writer.Lazy as L
import qualified Control.Monad.Writer.Strict as S
import qualified Data.List.NonEmpty as NE
import qualified Data.Set as E
import Test.QuickCheck
import Test.HUnit (Assertion, (@?=))
import Text.Megaparsec.Error
import Text.Megaparsec.Pos
import Text.Megaparsec.Prim
import Text.Megaparsec.String
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>), (<*>), (<*))
#endif
-- | @checkParser p r s@ tries to run parser @p@ on input @s@ to parse
-- entire @s@. Result of the parsing is compared with expected result @r@,
-- it should match, otherwise the property doesn't hold and the test fails.
checkParser :: (Eq a, Show a)
=> Parser a -- ^ Parser to test
-> Either (ParseError Char Dec) a -- ^ Expected result of parsing
-> String -- ^ Input for the parser
-> Property -- ^ Resulting property
checkParser p r s = simpleParse p s === r
-- | A variant of 'checkParser' that runs given parser code with all
-- standard instances of 'MonadParsec'. Useful when testing primitive
-- combinators.
checkParser' :: (Eq a, Show a)
=> (forall m. MonadParsec Dec String m => m a) -- ^ Parser to test
-> Either (ParseError Char Dec) a -- ^ Expected result of parsing
-> String -- ^ Input for the parser
-> Property -- ^ Resulting property
checkParser' p r s = conjoin
[ checkParser p r s
, checkParser (runIdentityT p) r s
, checkParser (runReaderT p ()) r s
, checkParser (L.evalStateT p ()) r s
, checkParser (S.evalStateT p ()) r s
, checkParser (evalWriterTL p) r s
, checkParser (evalWriterTS p) r s ]
-- | Similar to 'checkParser', but produces HUnit's 'Assertion's instead.
checkCase :: (Eq a, Show a)
=> (forall m. MonadParsec Dec String m => m a) -- ^ Parser to test
-> Either (ParseError Char Dec) a -- ^ Expected result of parsing
-> String -- ^ Input for the parser
-> Assertion -- ^ Resulting assertion
checkCase p r s = do
parse p "" s @?= r
parse (runIdentityT p) "" s @?= r
parse (runReaderT p ()) "" s @?= r
parse (L.evalStateT p ()) "" s @?= r
parse (S.evalStateT p ()) "" s @?= r
parse (evalWriterTL p) "" s @?= r
parse (evalWriterTS p) "" s @?= r
evalWriterTL :: Monad m => L.WriterT [Int] m a -> m a
evalWriterTL = liftM fst . L.runWriterT
evalWriterTS :: Monad m => S.WriterT [Int] m a -> m a
evalWriterTS = liftM fst . S.runWriterT
-- | @simpleParse p s@ runs parser @p@ on input @s@ and returns corresponding
-- result of type @Either ParseError a@, where @a@ is type of parsed
-- value. This parser tries to parser end of file too and name of input file
-- is always empty string.
simpleParse :: Parser a -> String -> Either (ParseError Char Dec) a
simpleParse p = parse (p <* eof) ""
-- | @checkChar p test label s@ runs parser @p@ on input @s@ and checks if
-- the parser correctly parses single character that satisfies @test@. The
-- character may be labelled, in this case @label@ is used to check quality
-- of error messages.
checkChar
:: Parser Char -- ^ Parser to run
-> (Char -> Bool) -- ^ Predicate to test parsed char
-> Maybe (ErrorItem Char) -- ^ Representation to use in error messages
-> String -- ^ Input stream
-> Property -- ^ Resulting property
checkChar p f rep' s = checkParser p r s
where h = head s
rep = Expected <$> maybeToList rep'
r | null s = posErr 0 s (ueof : rep)
| length s == 1 && f h = Right h
| not (f h) = posErr 0 s (utok h : rep)
| otherwise = posErr 1 s [utok (s !! 1), eeof]
-- | @checkString p a test label s@ runs parser @p@ on input @s@ and checks if
-- the result is equal to @a@ and also quality of error messages. @test@ is
-- used to compare tokens. @label@ is used as expected representation of
-- parser's result in error messages.
checkString
:: Parser String -- ^ Parser to run
-> String -- ^ Expected result
-> (Char -> Char -> Bool) -- ^ Function used to compare tokens
-> String -- ^ Input stream
-> Property
checkString p a' test s' = checkParser p (w a' 0 s') s'
where w [] _ [] = Right s'
w [] i (s:_) = posErr i s' [utok s, eeof]
w _ 0 [] = posErr 0 s' [ueof, etoks a']
w _ i [] = posErr 0 s' [utoks (take i s'), etoks a']
w (a:as) i (s:ss)
| test a s = w as i' ss
| otherwise = posErr 0 s' [utoks (take i' s'), etoks a']
where i' = succ i
-- | A helper function that is used to advance 'SourcePos' given a 'String'.
updatePosString
:: Pos -- ^ Tab width
-> SourcePos -- ^ Initial position
-> String -- ^ 'String' — collection of tokens to process
-> SourcePos -- ^ Final position
updatePosString w = foldl' f
where f p t = snd (defaultUpdatePos w p t)
infix 4 /=\ -- preserve whitespace on automatic trim
-- | @p /=\\ x@ runs parser @p@ on empty input and compares its result
-- (which should be successful) with @x@. Succeeds when the result is equal
-- to @x@, prints counterexample on failure.
(/=\) :: (Eq a, Show a) => Parser a -> a -> Property
p /=\ x = simpleParse p "" === Right x
infix 4 !=!
-- | @n !=! m@ represents property that holds when results of running @n@
-- and @m@ parsers are identical. This is useful when checking monad laws
-- for example.
(!=!) :: (Eq a, Show a) => Parser a -> Parser a -> Property
n !=! m = simpleParse n "" === simpleParse m ""
-- | @abcRow a b c@ generates string consisting of character “a” repeated
-- @a@ times, character “b” repeated @b@ times, and finally character “c”
-- repeated @c@ times.
abcRow :: Enum a => a -> a -> a -> String
abcRow a b c = f a 'a' ++ f b 'b' ++ f c 'c'
where f x = replicate (fromEnum x)
-- | A component of parse error, useful for fast and dirty construction of
-- parse errors with 'posErr' and other helpers.
data EC
= Unexpected (ErrorItem Char)
| Expected (ErrorItem Char)
| Custom Dec
instance Arbitrary a => Arbitrary (NonEmpty a) where
arbitrary = NE.fromList . getNonEmpty <$> arbitrary
instance Arbitrary t => Arbitrary (ErrorItem t) where
arbitrary = oneof
[ Tokens <$> arbitrary
, Label <$> arbitrary
, return EndOfInput ]
instance Arbitrary Pos where
arbitrary = unsafePos . getPositive <$> arbitrary
instance Arbitrary SourcePos where
arbitrary = SourcePos
<$> shortString
<*> (unsafePos <$> choose (1, 1000))
<*> (unsafePos <$> choose (1, 100))
instance Arbitrary Dec where
arbitrary = oneof
[ DecFail <$> shortString
, DecIndentation <$> arbitrary <*> arbitrary <*> arbitrary ]
instance (Arbitrary t, Ord t, Arbitrary e, Ord e)
=> Arbitrary (ParseError t e) where
arbitrary = ParseError
<$> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
shortString :: Gen String
shortString = sized $ \n -> do
k <- choose (0, n `div` 2)
vectorOf k arbitrary
-- | @posErr pos s ms@ is an easy way to model result of parser that fails.
-- @pos@ is how many tokens (characters) has been consumed before failure.
-- @s@ is input of the parser. @ms@ is a list, collection of 'Message's. See
-- 'utok', 'utoks', 'ulabel', 'ueof', 'etok', 'etoks', 'elabel', and 'eeof'
-- for easy ways to create error messages.
posErr
:: Int -- ^ How many tokens to drop from beginning of steam
-> String -- ^ The input stream (just a 'String' here)
-> [EC] -- ^ Collection of error components
-> Either (ParseError Char Dec) a -- ^ 'ParseError' inside of 'Left'
posErr i s = posErr' (pos :| [])
where pos = updatePosString defaultTabWidth (initialPos "") (take i s)
-- | The same as 'posErr', but 'SourcePos' should be provided directly.
posErr'
:: NonEmpty SourcePos -- ^ Position of the error
-> [EC] -- ^ Collection of error components
-> Either (ParseError Char Dec) a -- ^ 'ParseError' inside of 'Left'
posErr' pos ecs = Left ParseError
{ errorPos = pos
, errorUnexpected = E.fromList (mapMaybe getUnexpected ecs)
, errorExpected = E.fromList (mapMaybe getExpected ecs)
, errorCustom = E.fromList (mapMaybe getCustom ecs) }
where
getUnexpected (Unexpected x) = Just x
getUnexpected _ = Nothing
getExpected (Expected x) = Just x
getExpected _ = Nothing
getCustom (Custom x) = Just x
getCustom _ = Nothing
-- | Construct “unexpected token” error component.
utok :: Char -> EC
utok = Unexpected . Tokens . nes
-- | Construct “unexpected steam” error component. This function respects
-- some conventions described in 'canonicalizeStream'.
utoks :: String -> EC
utoks = Unexpected . canonicalizeStream
-- | Construct “unexpected label” error component. Do not use with empty
-- strings.
ulabel :: String -> EC
ulabel = Unexpected . Label . NE.fromList
-- | Construct “unexpected end of input” error component.
ueof :: EC
ueof = Unexpected EndOfInput
-- | Construct “expecting token” error component.
etok :: Char -> EC
etok = Expected . Tokens . nes
-- | Construct “expecting stream” error component. This function respects
-- some conventions described in 'canonicalizeStream'.
etoks :: String -> EC
etoks = Expected . canonicalizeStream
-- | Construct “expecting label” error component. Do not use with empty
-- strings.
elabel :: String -> EC
elabel = Expected . Label . NE.fromList
-- | Construct “expecting end of input” component.
eeof :: EC
eeof = Expected EndOfInput
-- | Construct error component consisting of custom data.
cstm :: Dec -> EC
cstm = Custom
-- | Construct appropriate 'MessageItem' representation for given token
-- stream. Empty string produces 'EndOfInput', single token — a 'Token', and
-- in other cases the 'TokenStream' constructor is used.
canonicalizeStream :: String -> ErrorItem Char
canonicalizeStream stream =
case NE.nonEmpty stream of
Nothing -> EndOfInput
Just xs -> Tokens xs
-- | Make a singleton non-empty list from a value.
nes :: a -> NonEmpty a
nes x = x :| []
{-# INLINE nes #-}