mirror of
https://github.com/mrkkrp/megaparsec.git
synced 2024-12-18 13:51:58 +03:00
451 lines
17 KiB
Haskell
451 lines
17 KiB
Haskell
--
|
||
-- Tests for Megaparsec's character parsers.
|
||
--
|
||
-- Copyright © 2015–2016 Megaparsec contributors
|
||
--
|
||
-- Redistribution and use in source and binary forms, with or without
|
||
-- modification, are permitted provided that the following conditions are
|
||
-- met:
|
||
--
|
||
-- * Redistributions of source code must retain the above copyright notice,
|
||
-- this list of conditions and the following disclaimer.
|
||
--
|
||
-- * Redistributions in binary form must reproduce the above copyright
|
||
-- notice, this list of conditions and the following disclaimer in the
|
||
-- documentation and/or other materials provided with the distribution.
|
||
--
|
||
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS “AS IS” AND ANY
|
||
-- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||
-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||
-- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||
-- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||
-- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||
-- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||
-- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||
-- ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
-- POSSIBILITY OF SUCH DAMAGE.
|
||
|
||
{-# LANGUAGE CPP #-}
|
||
{-# OPTIONS -fno-warn-orphans #-}
|
||
|
||
module Text.Megaparsec.CharSpec (spec) where
|
||
|
||
import Control.Monad
|
||
import Data.Char
|
||
import Data.List (partition, isPrefixOf)
|
||
import Data.Monoid ((<>))
|
||
import Test.Hspec
|
||
import Test.Hspec.Megaparsec
|
||
import Test.Hspec.Megaparsec.AdHoc
|
||
import Test.QuickCheck
|
||
import Text.Megaparsec.Char
|
||
import Text.Megaparsec.Error
|
||
import Text.Megaparsec.Prim
|
||
|
||
#if !MIN_VERSION_base(4,8,0)
|
||
import Control.Applicative
|
||
#endif
|
||
|
||
instance Arbitrary GeneralCategory where
|
||
arbitrary = elements [minBound..maxBound]
|
||
|
||
spec :: Spec
|
||
spec = do
|
||
|
||
describe "newline" $
|
||
checkStrLit "newline" "\n" (pure <$> newline)
|
||
|
||
describe "csrf" $
|
||
checkStrLit "crlf newline" "\r\n" crlf
|
||
|
||
describe "eol" $ do
|
||
context "when stream begins with a newline" $
|
||
it "succeeds returning the newline" $
|
||
property $ \s -> do
|
||
let s' = '\n' : s
|
||
prs eol s' `shouldParse` "\n"
|
||
prs' eol s' `succeedsLeaving` s
|
||
context "when stream begins with CRLF sequence" $
|
||
it "parses the CSRF sequence" $
|
||
property $ \s -> do
|
||
let s' = '\r' : '\n' : s
|
||
prs eol s' `shouldParse` "\r\n"
|
||
prs' eol s' `succeedsLeaving` s
|
||
context "when stream begins with '\\r', but it's not followed by '\\n'" $
|
||
it "signals correct parse error" $
|
||
property $ \ch -> ch /= '\n' ==> do
|
||
let s = ['\r',ch]
|
||
prs eol s `shouldFailWith` err posI
|
||
(utoks s <> utok '\r' <> elabel "end of line")
|
||
context "when input stream is '\\r'" $
|
||
it "signals correct parse error" $
|
||
prs eol "\r" `shouldFailWith` err posI
|
||
(utok '\r' <> elabel "end of line")
|
||
context "when stream does not begin with newline or CSRF sequence" $
|
||
it "signals correct parse error" $
|
||
property $ \ch s -> (ch `notElem` "\r\n") ==> do
|
||
let s' = ch : s
|
||
prs eol s' `shouldFailWith` err posI
|
||
(utok ch <> elabel "end of line")
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
prs eol "" `shouldFailWith` err posI
|
||
(ueof <> elabel "end of line")
|
||
|
||
describe "tab" $
|
||
checkStrLit "tab" "\t" (pure <$> tab)
|
||
|
||
describe "space" $
|
||
it "consumes it up to first non-space character" $
|
||
property $ \s -> do
|
||
let (s0,s1) = partition isSpace s
|
||
s' = s0 ++ s1
|
||
prs space s' `shouldParse` ()
|
||
prs' space s' `succeedsLeaving` s1
|
||
|
||
describe "controlChar" $
|
||
checkCharPred "control character" isControl controlChar
|
||
|
||
describe "spaceChar" $
|
||
checkCharRange "white space" " \160\t\n\r\f\v" spaceChar
|
||
|
||
describe "upperChar" $
|
||
checkCharPred "uppercase letter" isUpper upperChar
|
||
|
||
describe "lowerChar" $
|
||
checkCharPred "lowercase letter" isLower lowerChar
|
||
|
||
describe "letterChar" $
|
||
checkCharPred "letter" isAlpha letterChar
|
||
|
||
describe "alphaNumChar" $
|
||
checkCharPred "alphanumeric character" isAlphaNum alphaNumChar
|
||
|
||
describe "printChar" $
|
||
checkCharPred "printable character" isPrint printChar
|
||
|
||
describe "digitChar" $
|
||
checkCharRange "digit" ['0'..'9'] digitChar
|
||
|
||
describe "octDigitChar" $
|
||
checkCharRange "octal digit" ['0'..'7'] octDigitChar
|
||
|
||
describe "hexDigitChar" $
|
||
checkCharRange "hexadecimal digit" (['0'..'9'] ++ ['a'..'f'] ++ ['A'..'F']) hexDigitChar
|
||
|
||
describe "markChar" $
|
||
checkCharRange "mark character" "" markChar
|
||
|
||
describe "numberChar" $
|
||
let xs = "\185\178\179\188\189\190" ++ ['0'..'9']
|
||
in checkCharRange "numeric character" xs numberChar
|
||
|
||
describe "punctuationChar" $
|
||
checkCharPred "punctuation" isPunctuation punctuationChar
|
||
|
||
describe "symbolChar" $
|
||
#if MIN_VERSION_base(4,8,0)
|
||
checkCharRange "symbol" "<>$£`~|×÷^®°¸¯=¬+¤±¢¨´©¥¦" symbolChar
|
||
#else
|
||
checkCharRange "symbol" "<>$£`~|×÷^®°¸¯=¬+¤±¢¨´©¥¦§¶" symbolChar
|
||
#endif
|
||
describe "separatorChar" $
|
||
checkCharRange "separator" " \160" separatorChar
|
||
|
||
describe "asciiChar" $
|
||
checkCharPred "ASCII character" isAscii asciiChar
|
||
|
||
describe "latin1Char" $ do
|
||
context "when stream begins with Latin-1 character" $
|
||
it "parses the Latin-1 character" $
|
||
property $ \ch s -> isLatin1 ch ==> do
|
||
let s' = ch : s
|
||
prs latin1Char s' `shouldParse` ch
|
||
prs' latin1Char s' `succeedsLeaving` s
|
||
context "when stream does not begin with Latin-1 character" $
|
||
it "signals correct parse error" $ do
|
||
prs latin1Char "б" `shouldFailWith`
|
||
err posI (utok 'б' <> elabel "Latin-1 character")
|
||
prs' latin1Char "в" `failsLeaving` "в"
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
prs latin1Char "" `shouldFailWith` err posI (ueof <> elabel "Latin-1 character")
|
||
|
||
describe "charCategory" $ do
|
||
context "when parser corresponding to general category of next char is used" $
|
||
it "succeeds" $
|
||
property $ \ch s -> do
|
||
let s' = ch : s
|
||
g = generalCategory ch
|
||
prs (charCategory g) s' `shouldParse` ch
|
||
prs' (charCategory g) s' `succeedsLeaving` s
|
||
context "when parser's category does not match next character's category" $
|
||
it "fails" $
|
||
property $ \g ch s -> (generalCategory ch /= g) ==> do
|
||
let s' = ch : s
|
||
prs (charCategory g) s' `shouldFailWith`
|
||
err posI (utok ch <> elabel (categoryName g))
|
||
prs' (charCategory g) s' `failsLeaving` s'
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
property $ \g ->
|
||
prs (charCategory g) "" `shouldFailWith`
|
||
err posI (ueof <> elabel (categoryName g))
|
||
|
||
describe "char" $ do
|
||
context "when stream begins with the character specified as argument" $
|
||
it "parses the character" $
|
||
property $ \ch s -> do
|
||
let s' = ch : s
|
||
prs (char ch) s' `shouldParse` ch
|
||
prs' (char ch) s' `succeedsLeaving` s
|
||
context "when stream does not begin with the character specified as argument" $
|
||
it "signals correct parse error" $
|
||
property $ \ch ch' s -> ch /= ch' ==> do
|
||
let s' = ch' : s
|
||
prs (char ch) s' `shouldFailWith` err posI (utok ch' <> etok ch)
|
||
prs' (char ch) s' `failsLeaving` s'
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
property $ \ch ->
|
||
prs (char ch) "" `shouldFailWith` err posI (ueof <> etok ch)
|
||
|
||
describe "char'" $ do
|
||
context "when stream begins with the character specified as argument" $
|
||
it "parses the character" $
|
||
property $ \ch s -> do
|
||
let sl = toLower ch : s
|
||
su = toUpper ch : s
|
||
prs (char' ch) sl `shouldParse` toLower ch
|
||
prs (char' ch) su `shouldParse` toUpper ch
|
||
prs' (char' ch) sl `succeedsLeaving` s
|
||
prs' (char' ch) su `succeedsLeaving` s
|
||
context "when stream does not begin with the character specified as argument" $
|
||
it "signals correct parse error" $
|
||
property $ \ch ch' s -> toLower ch /= toLower ch' ==> do
|
||
let s' = ch' : s
|
||
ms = utok ch' <> etok (toLower ch) <> etok (toUpper ch)
|
||
prs (char' ch) s' `shouldFailWith` err posI ms
|
||
prs' (char' ch) s' `failsLeaving` s'
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
property $ \ch -> do
|
||
let ms = ueof <> etok (toLower ch) <> etok (toUpper ch)
|
||
prs (char' ch) "" `shouldFailWith` err posI ms
|
||
|
||
describe "anyChar" $ do
|
||
context "when stream is not empty" $
|
||
it "succeeds consuming next character in the stream" $
|
||
property $ \ch s -> do
|
||
let s' = ch : s
|
||
prs anyChar s' `shouldParse` ch
|
||
prs' anyChar s' `succeedsLeaving` s
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
prs anyChar "" `shouldFailWith` err posI (ueof <> elabel "character")
|
||
|
||
describe "oneOf" $ do
|
||
context "when stream begins with one of specified characters" $
|
||
it "parses the character" $
|
||
property $ \chs' n s -> do
|
||
let chs = getNonEmpty chs'
|
||
ch = chs !! (getNonNegative n `rem` length chs)
|
||
s' = ch : s
|
||
prs (oneOf chs) s' `shouldParse` ch
|
||
prs' (oneOf chs) s' `succeedsLeaving` s
|
||
context "when stream does not begin with any of specified characters" $
|
||
it "signals correct parse error" $
|
||
property $ \chs ch s -> ch `notElem` (chs :: String) ==> do
|
||
let s' = ch : s
|
||
prs (oneOf chs) s' `shouldFailWith` err posI (utok ch)
|
||
prs' (oneOf chs) s' `failsLeaving` s'
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
property $ \chs ->
|
||
prs (oneOf (chs :: String)) "" `shouldFailWith` err posI ueof
|
||
|
||
describe "oneOf'" $ do
|
||
context "when stream begins with one of specified characters" $
|
||
it "parses the character" $
|
||
property $ \chs' n s -> do
|
||
let chs = getNonEmpty chs'
|
||
ch = chs !! (getNonNegative n `rem` length chs)
|
||
sl = toLower ch : s
|
||
su = toUpper ch : s
|
||
prs (oneOf' chs) sl `shouldParse` toLower ch
|
||
prs (oneOf' chs) su `shouldParse` toUpper ch
|
||
prs' (oneOf' chs) sl `succeedsLeaving` s
|
||
prs' (oneOf' chs) su `succeedsLeaving` s
|
||
context "when stream does not begin with any of specified characters" $
|
||
it "signals correct parse error" $
|
||
property $ \chs ch s -> ch `notElemi` (chs :: String) ==> do
|
||
let s' = ch : s
|
||
prs (oneOf' chs) s' `shouldFailWith` err posI (utok ch)
|
||
prs' (oneOf' chs) s' `failsLeaving` s'
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
property $ \chs ->
|
||
prs (oneOf' (chs :: String)) "" `shouldFailWith` err posI ueof
|
||
|
||
describe "noneOf" $ do
|
||
context "when stream does not begin with any of specified characters" $
|
||
it "parses the character" $
|
||
property $ \chs ch s -> ch `notElem` (chs :: String) ==> do
|
||
let s' = ch : s
|
||
prs (noneOf chs) s' `shouldParse` ch
|
||
prs' (noneOf chs) s' `succeedsLeaving` s
|
||
context "when stream begins with one of specified characters" $
|
||
it "signals correct parse error" $
|
||
property $ \chs' n s -> do
|
||
let chs = getNonEmpty chs'
|
||
ch = chs !! (getNonNegative n `rem` length chs)
|
||
s' = ch : s
|
||
prs (noneOf chs) s' `shouldFailWith` err posI (utok ch)
|
||
prs' (noneOf chs) s' `failsLeaving` s'
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
property $ \chs ->
|
||
prs (noneOf (chs :: String)) "" `shouldFailWith` err posI ueof
|
||
|
||
describe "noneOf'" $ do
|
||
context "when stream does not begin with any of specified characters" $
|
||
it "parses the character" $
|
||
property $ \chs ch s -> ch `notElemi` (chs :: String) ==> do
|
||
let sl = toLower ch : s
|
||
su = toUpper ch : s
|
||
prs (noneOf' chs) sl `shouldParse` toLower ch
|
||
prs (noneOf' chs) su `shouldParse` toUpper ch
|
||
prs' (noneOf' chs) sl `succeedsLeaving` s
|
||
prs' (noneOf' chs) su `succeedsLeaving` s
|
||
context "when stream begins with one of specified characters" $
|
||
it "signals correct parse error" $
|
||
property $ \chs' n s -> do
|
||
let chs = getNonEmpty chs'
|
||
ch = chs !! (getNonNegative n `rem` length chs)
|
||
s' = ch : s
|
||
prs (noneOf' chs) s' `shouldFailWith` err posI (utok ch)
|
||
prs' (noneOf' chs) s' `failsLeaving` s'
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
property $ \chs ->
|
||
prs (noneOf' (chs :: String)) "" `shouldFailWith` err posI ueof
|
||
|
||
describe "string" $ do
|
||
context "when stream is prefixed with given string" $
|
||
it "parses the string" $
|
||
property $ \str s -> do
|
||
let s' = str ++ s
|
||
prs (string str) s' `shouldParse` str
|
||
prs' (string str) s' `succeedsLeaving` s
|
||
context "when stream is not prefixed with given string" $
|
||
it "signals correct parse error" $
|
||
property $ \str s -> not (str `isPrefixOf` s) ==> do
|
||
let n = length (takeWhile (uncurry (==)) (zip str s)) + 1
|
||
common = take n s
|
||
prs (string str) s `shouldFailWith` err posI (utoks common <> etoks str)
|
||
|
||
describe "string'" $ do
|
||
context "when stream is prefixed with given string" $
|
||
it "parses the string" $
|
||
property $ \str s ->
|
||
forAll (fuzzyCase str) $ \str' -> do
|
||
let s' = str' ++ s
|
||
prs (string' str) s' `shouldParse` str'
|
||
prs' (string' str) s' `succeedsLeaving` s
|
||
context "when stream is not prefixed with given string" $
|
||
it "signals correct parse error" $
|
||
property $ \str s -> not (str `isPrefixOfI` s) ==> do
|
||
let n = length (takeWhile (uncurry casei) (zip str s)) + 1
|
||
common = take n s
|
||
prs (string' str) s `shouldFailWith` err posI (utoks common <> etoks str)
|
||
|
||
----------------------------------------------------------------------------
|
||
-- Helpers
|
||
|
||
checkStrLit :: String -> String -> Parsec Dec String String -> SpecWith ()
|
||
checkStrLit name ts p = do
|
||
context ("when stream begins with " ++ name) $
|
||
it ("parses the " ++ name) $
|
||
property $ \s -> do
|
||
let s' = ts ++ s
|
||
prs p s' `shouldParse` ts
|
||
prs' p s' `succeedsLeaving` s
|
||
context ("when stream does not begin with " ++ name) $
|
||
it "signals correct parse error" $
|
||
property $ \ch s -> ch /= head ts ==> do
|
||
let s' = ch : s
|
||
prs p s' `shouldFailWith` err posI (utok ch <> etoks ts)
|
||
prs' p s' `failsLeaving` s'
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
prs p "" `shouldFailWith` err posI (ueof <> etoks ts)
|
||
|
||
checkCharPred :: String -> (Char -> Bool) -> Parsec Dec String Char -> SpecWith ()
|
||
checkCharPred name f p = do
|
||
context ("when stream begins with " ++ name) $
|
||
it ("parses the " ++ name) $
|
||
property $ \ch s -> f ch ==> do
|
||
let s' = ch : s
|
||
prs p s' `shouldParse` ch
|
||
prs' p s' `succeedsLeaving` s
|
||
context ("when stream does not begin with " ++ name) $
|
||
it "signals correct parse error" $
|
||
property $ \ch s -> not (f ch) ==> do
|
||
let s' = ch : s
|
||
prs p s' `shouldFailWith` err posI (utok ch <> elabel name)
|
||
prs' p s' `failsLeaving` s'
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
prs p "" `shouldFailWith` err posI (ueof <> elabel name)
|
||
|
||
checkCharRange :: String -> String -> Parsec Dec String Char -> SpecWith ()
|
||
checkCharRange name tchs p = do
|
||
forM_ tchs $ \tch ->
|
||
context ("when stream begins with " ++ showTokens (nes tch)) $
|
||
it ("parses the " ++ showTokens (nes tch)) $
|
||
property $ \s -> do
|
||
let s' = tch : s
|
||
prs p s' `shouldParse` tch
|
||
prs' p s' `succeedsLeaving` s
|
||
context ("when stream does not begin with " ++ name) $
|
||
it "signals correct parse error" $
|
||
property $ \ch s -> ch `notElem` tchs ==> do
|
||
let s' = ch : s
|
||
prs p s' `shouldFailWith` err posI (utok ch <> elabel name)
|
||
prs' p s' `failsLeaving` s'
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
prs p "" `shouldFailWith` err posI (ueof <> elabel name)
|
||
|
||
-- | Randomly change the case in the given string.
|
||
|
||
fuzzyCase :: String -> Gen String
|
||
fuzzyCase s = zipWith f s <$> vector (length s)
|
||
where f k True = if isLower k then toUpper k else toLower k
|
||
f k False = k
|
||
|
||
-- | Case-insensitive equality test for characters.
|
||
|
||
casei :: Char -> Char -> Bool
|
||
casei x y = toUpper x == toUpper y
|
||
|
||
-- | Case-insensitive 'elem'.
|
||
|
||
elemi :: Char -> String -> Bool
|
||
elemi c = any (casei c)
|
||
|
||
-- | Case-insensitive 'notElem'.
|
||
|
||
notElemi :: Char -> String -> Bool
|
||
notElemi c = not . elemi c
|
||
|
||
-- | The 'isPrefixOf' function takes two 'String's and returns 'True' iff
|
||
-- the first list is a prefix of the second with case-insensitive
|
||
-- comparison.
|
||
|
||
isPrefixOfI :: String -> String -> Bool
|
||
isPrefixOfI [] _ = True
|
||
isPrefixOfI _ [] = False
|
||
isPrefixOfI (x:xs) (y:ys) = x `casei` y && isPrefixOf xs ys
|