megaparsec/tests/Text/Megaparsec/CharSpec.hs
2016-09-25 18:42:21 +03:00

451 lines
17 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

--
-- Tests for Megaparsec's character parsers.
--
-- Copyright © 20152016 Megaparsec contributors
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions are
-- met:
--
-- * Redistributions of source code must retain the above copyright notice,
-- this list of conditions and the following disclaimer.
--
-- * Redistributions in binary form must reproduce the above copyright
-- notice, this list of conditions and the following disclaimer in the
-- documentation and/or other materials provided with the distribution.
--
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS “AS IS” AND ANY
-- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
-- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-- ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-- POSSIBILITY OF SUCH DAMAGE.
{-# LANGUAGE CPP #-}
{-# OPTIONS -fno-warn-orphans #-}
module Text.Megaparsec.CharSpec (spec) where
import Control.Monad
import Data.Char
import Data.List (partition, isPrefixOf)
import Data.Monoid ((<>))
import Test.Hspec
import Test.Hspec.Megaparsec
import Test.Hspec.Megaparsec.AdHoc
import Test.QuickCheck
import Text.Megaparsec.Char
import Text.Megaparsec.Error
import Text.Megaparsec.Prim
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative
#endif
instance Arbitrary GeneralCategory where
arbitrary = elements [minBound..maxBound]
spec :: Spec
spec = do
describe "newline" $
checkStrLit "newline" "\n" (pure <$> newline)
describe "csrf" $
checkStrLit "crlf newline" "\r\n" crlf
describe "eol" $ do
context "when stream begins with a newline" $
it "succeeds returning the newline" $
property $ \s -> do
let s' = '\n' : s
prs eol s' `shouldParse` "\n"
prs' eol s' `succeedsLeaving` s
context "when stream begins with CRLF sequence" $
it "parses the CSRF sequence" $
property $ \s -> do
let s' = '\r' : '\n' : s
prs eol s' `shouldParse` "\r\n"
prs' eol s' `succeedsLeaving` s
context "when stream begins with '\\r', but it's not followed by '\\n'" $
it "signals correct parse error" $
property $ \ch -> ch /= '\n' ==> do
let s = ['\r',ch]
prs eol s `shouldFailWith` err posI
(utoks s <> utok '\r' <> elabel "end of line")
context "when input stream is '\\r'" $
it "signals correct parse error" $
prs eol "\r" `shouldFailWith` err posI
(utok '\r' <> elabel "end of line")
context "when stream does not begin with newline or CSRF sequence" $
it "signals correct parse error" $
property $ \ch s -> (ch `notElem` "\r\n") ==> do
let s' = ch : s
prs eol s' `shouldFailWith` err posI
(utok ch <> elabel "end of line")
context "when stream is empty" $
it "signals correct parse error" $
prs eol "" `shouldFailWith` err posI
(ueof <> elabel "end of line")
describe "tab" $
checkStrLit "tab" "\t" (pure <$> tab)
describe "space" $
it "consumes it up to first non-space character" $
property $ \s -> do
let (s0,s1) = partition isSpace s
s' = s0 ++ s1
prs space s' `shouldParse` ()
prs' space s' `succeedsLeaving` s1
describe "controlChar" $
checkCharPred "control character" isControl controlChar
describe "spaceChar" $
checkCharRange "white space" " \160\t\n\r\f\v" spaceChar
describe "upperChar" $
checkCharPred "uppercase letter" isUpper upperChar
describe "lowerChar" $
checkCharPred "lowercase letter" isLower lowerChar
describe "letterChar" $
checkCharPred "letter" isAlpha letterChar
describe "alphaNumChar" $
checkCharPred "alphanumeric character" isAlphaNum alphaNumChar
describe "printChar" $
checkCharPred "printable character" isPrint printChar
describe "digitChar" $
checkCharRange "digit" ['0'..'9'] digitChar
describe "octDigitChar" $
checkCharRange "octal digit" ['0'..'7'] octDigitChar
describe "hexDigitChar" $
checkCharRange "hexadecimal digit" (['0'..'9'] ++ ['a'..'f'] ++ ['A'..'F']) hexDigitChar
describe "markChar" $
checkCharRange "mark character" "" markChar
describe "numberChar" $
let xs = "\185\178\179\188\189\190" ++ ['0'..'9']
in checkCharRange "numeric character" xs numberChar
describe "punctuationChar" $
checkCharPred "punctuation" isPunctuation punctuationChar
describe "symbolChar" $
#if MIN_VERSION_base(4,8,0)
checkCharRange "symbol" "<>$£`~|×÷^®°¸¯=¬+¤±¢¨´©¥¦" symbolChar
#else
checkCharRange "symbol" "<>$£`~|×÷^®°¸¯=¬+¤±¢¨´©¥¦§¶" symbolChar
#endif
describe "separatorChar" $
checkCharRange "separator" " \160" separatorChar
describe "asciiChar" $
checkCharPred "ASCII character" isAscii asciiChar
describe "latin1Char" $ do
context "when stream begins with Latin-1 character" $
it "parses the Latin-1 character" $
property $ \ch s -> isLatin1 ch ==> do
let s' = ch : s
prs latin1Char s' `shouldParse` ch
prs' latin1Char s' `succeedsLeaving` s
context "when stream does not begin with Latin-1 character" $
it "signals correct parse error" $ do
prs latin1Char "б" `shouldFailWith`
err posI (utok 'б' <> elabel "Latin-1 character")
prs' latin1Char "в" `failsLeaving` "в"
context "when stream is empty" $
it "signals correct parse error" $
prs latin1Char "" `shouldFailWith` err posI (ueof <> elabel "Latin-1 character")
describe "charCategory" $ do
context "when parser corresponding to general category of next char is used" $
it "succeeds" $
property $ \ch s -> do
let s' = ch : s
g = generalCategory ch
prs (charCategory g) s' `shouldParse` ch
prs' (charCategory g) s' `succeedsLeaving` s
context "when parser's category does not match next character's category" $
it "fails" $
property $ \g ch s -> (generalCategory ch /= g) ==> do
let s' = ch : s
prs (charCategory g) s' `shouldFailWith`
err posI (utok ch <> elabel (categoryName g))
prs' (charCategory g) s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
property $ \g ->
prs (charCategory g) "" `shouldFailWith`
err posI (ueof <> elabel (categoryName g))
describe "char" $ do
context "when stream begins with the character specified as argument" $
it "parses the character" $
property $ \ch s -> do
let s' = ch : s
prs (char ch) s' `shouldParse` ch
prs' (char ch) s' `succeedsLeaving` s
context "when stream does not begin with the character specified as argument" $
it "signals correct parse error" $
property $ \ch ch' s -> ch /= ch' ==> do
let s' = ch' : s
prs (char ch) s' `shouldFailWith` err posI (utok ch' <> etok ch)
prs' (char ch) s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
property $ \ch ->
prs (char ch) "" `shouldFailWith` err posI (ueof <> etok ch)
describe "char'" $ do
context "when stream begins with the character specified as argument" $
it "parses the character" $
property $ \ch s -> do
let sl = toLower ch : s
su = toUpper ch : s
prs (char' ch) sl `shouldParse` toLower ch
prs (char' ch) su `shouldParse` toUpper ch
prs' (char' ch) sl `succeedsLeaving` s
prs' (char' ch) su `succeedsLeaving` s
context "when stream does not begin with the character specified as argument" $
it "signals correct parse error" $
property $ \ch ch' s -> toLower ch /= toLower ch' ==> do
let s' = ch' : s
ms = utok ch' <> etok (toLower ch) <> etok (toUpper ch)
prs (char' ch) s' `shouldFailWith` err posI ms
prs' (char' ch) s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
property $ \ch -> do
let ms = ueof <> etok (toLower ch) <> etok (toUpper ch)
prs (char' ch) "" `shouldFailWith` err posI ms
describe "anyChar" $ do
context "when stream is not empty" $
it "succeeds consuming next character in the stream" $
property $ \ch s -> do
let s' = ch : s
prs anyChar s' `shouldParse` ch
prs' anyChar s' `succeedsLeaving` s
context "when stream is empty" $
it "signals correct parse error" $
prs anyChar "" `shouldFailWith` err posI (ueof <> elabel "character")
describe "oneOf" $ do
context "when stream begins with one of specified characters" $
it "parses the character" $
property $ \chs' n s -> do
let chs = getNonEmpty chs'
ch = chs !! (getNonNegative n `rem` length chs)
s' = ch : s
prs (oneOf chs) s' `shouldParse` ch
prs' (oneOf chs) s' `succeedsLeaving` s
context "when stream does not begin with any of specified characters" $
it "signals correct parse error" $
property $ \chs ch s -> ch `notElem` (chs :: String) ==> do
let s' = ch : s
prs (oneOf chs) s' `shouldFailWith` err posI (utok ch)
prs' (oneOf chs) s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
property $ \chs ->
prs (oneOf (chs :: String)) "" `shouldFailWith` err posI ueof
describe "oneOf'" $ do
context "when stream begins with one of specified characters" $
it "parses the character" $
property $ \chs' n s -> do
let chs = getNonEmpty chs'
ch = chs !! (getNonNegative n `rem` length chs)
sl = toLower ch : s
su = toUpper ch : s
prs (oneOf' chs) sl `shouldParse` toLower ch
prs (oneOf' chs) su `shouldParse` toUpper ch
prs' (oneOf' chs) sl `succeedsLeaving` s
prs' (oneOf' chs) su `succeedsLeaving` s
context "when stream does not begin with any of specified characters" $
it "signals correct parse error" $
property $ \chs ch s -> ch `notElemi` (chs :: String) ==> do
let s' = ch : s
prs (oneOf' chs) s' `shouldFailWith` err posI (utok ch)
prs' (oneOf' chs) s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
property $ \chs ->
prs (oneOf' (chs :: String)) "" `shouldFailWith` err posI ueof
describe "noneOf" $ do
context "when stream does not begin with any of specified characters" $
it "parses the character" $
property $ \chs ch s -> ch `notElem` (chs :: String) ==> do
let s' = ch : s
prs (noneOf chs) s' `shouldParse` ch
prs' (noneOf chs) s' `succeedsLeaving` s
context "when stream begins with one of specified characters" $
it "signals correct parse error" $
property $ \chs' n s -> do
let chs = getNonEmpty chs'
ch = chs !! (getNonNegative n `rem` length chs)
s' = ch : s
prs (noneOf chs) s' `shouldFailWith` err posI (utok ch)
prs' (noneOf chs) s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
property $ \chs ->
prs (noneOf (chs :: String)) "" `shouldFailWith` err posI ueof
describe "noneOf'" $ do
context "when stream does not begin with any of specified characters" $
it "parses the character" $
property $ \chs ch s -> ch `notElemi` (chs :: String) ==> do
let sl = toLower ch : s
su = toUpper ch : s
prs (noneOf' chs) sl `shouldParse` toLower ch
prs (noneOf' chs) su `shouldParse` toUpper ch
prs' (noneOf' chs) sl `succeedsLeaving` s
prs' (noneOf' chs) su `succeedsLeaving` s
context "when stream begins with one of specified characters" $
it "signals correct parse error" $
property $ \chs' n s -> do
let chs = getNonEmpty chs'
ch = chs !! (getNonNegative n `rem` length chs)
s' = ch : s
prs (noneOf' chs) s' `shouldFailWith` err posI (utok ch)
prs' (noneOf' chs) s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
property $ \chs ->
prs (noneOf' (chs :: String)) "" `shouldFailWith` err posI ueof
describe "string" $ do
context "when stream is prefixed with given string" $
it "parses the string" $
property $ \str s -> do
let s' = str ++ s
prs (string str) s' `shouldParse` str
prs' (string str) s' `succeedsLeaving` s
context "when stream is not prefixed with given string" $
it "signals correct parse error" $
property $ \str s -> not (str `isPrefixOf` s) ==> do
let n = length (takeWhile (uncurry (==)) (zip str s)) + 1
common = take n s
prs (string str) s `shouldFailWith` err posI (utoks common <> etoks str)
describe "string'" $ do
context "when stream is prefixed with given string" $
it "parses the string" $
property $ \str s ->
forAll (fuzzyCase str) $ \str' -> do
let s' = str' ++ s
prs (string' str) s' `shouldParse` str'
prs' (string' str) s' `succeedsLeaving` s
context "when stream is not prefixed with given string" $
it "signals correct parse error" $
property $ \str s -> not (str `isPrefixOfI` s) ==> do
let n = length (takeWhile (uncurry casei) (zip str s)) + 1
common = take n s
prs (string' str) s `shouldFailWith` err posI (utoks common <> etoks str)
----------------------------------------------------------------------------
-- Helpers
checkStrLit :: String -> String -> Parsec Dec String String -> SpecWith ()
checkStrLit name ts p = do
context ("when stream begins with " ++ name) $
it ("parses the " ++ name) $
property $ \s -> do
let s' = ts ++ s
prs p s' `shouldParse` ts
prs' p s' `succeedsLeaving` s
context ("when stream does not begin with " ++ name) $
it "signals correct parse error" $
property $ \ch s -> ch /= head ts ==> do
let s' = ch : s
prs p s' `shouldFailWith` err posI (utok ch <> etoks ts)
prs' p s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
prs p "" `shouldFailWith` err posI (ueof <> etoks ts)
checkCharPred :: String -> (Char -> Bool) -> Parsec Dec String Char -> SpecWith ()
checkCharPred name f p = do
context ("when stream begins with " ++ name) $
it ("parses the " ++ name) $
property $ \ch s -> f ch ==> do
let s' = ch : s
prs p s' `shouldParse` ch
prs' p s' `succeedsLeaving` s
context ("when stream does not begin with " ++ name) $
it "signals correct parse error" $
property $ \ch s -> not (f ch) ==> do
let s' = ch : s
prs p s' `shouldFailWith` err posI (utok ch <> elabel name)
prs' p s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
prs p "" `shouldFailWith` err posI (ueof <> elabel name)
checkCharRange :: String -> String -> Parsec Dec String Char -> SpecWith ()
checkCharRange name tchs p = do
forM_ tchs $ \tch ->
context ("when stream begins with " ++ showTokens (nes tch)) $
it ("parses the " ++ showTokens (nes tch)) $
property $ \s -> do
let s' = tch : s
prs p s' `shouldParse` tch
prs' p s' `succeedsLeaving` s
context ("when stream does not begin with " ++ name) $
it "signals correct parse error" $
property $ \ch s -> ch `notElem` tchs ==> do
let s' = ch : s
prs p s' `shouldFailWith` err posI (utok ch <> elabel name)
prs' p s' `failsLeaving` s'
context "when stream is empty" $
it "signals correct parse error" $
prs p "" `shouldFailWith` err posI (ueof <> elabel name)
-- | Randomly change the case in the given string.
fuzzyCase :: String -> Gen String
fuzzyCase s = zipWith f s <$> vector (length s)
where f k True = if isLower k then toUpper k else toLower k
f k False = k
-- | Case-insensitive equality test for characters.
casei :: Char -> Char -> Bool
casei x y = toUpper x == toUpper y
-- | Case-insensitive 'elem'.
elemi :: Char -> String -> Bool
elemi c = any (casei c)
-- | Case-insensitive 'notElem'.
notElemi :: Char -> String -> Bool
notElemi c = not . elemi c
-- | The 'isPrefixOf' function takes two 'String's and returns 'True' iff
-- the first list is a prefix of the second with case-insensitive
-- comparison.
isPrefixOfI :: String -> String -> Bool
isPrefixOfI [] _ = True
isPrefixOfI _ [] = False
isPrefixOfI (x:xs) (y:ys) = x `casei` y && isPrefixOf xs ys