mirror of
https://github.com/mrkkrp/megaparsec.git
synced 2024-12-24 00:33:22 +03:00
481 lines
18 KiB
Haskell
481 lines
18 KiB
Haskell
--
|
||
-- Tests for Megaparsec's lexer.
|
||
--
|
||
-- Copyright © 2015–2016 Megaparsec contributors
|
||
--
|
||
-- Redistribution and use in source and binary forms, with or without
|
||
-- modification, are permitted provided that the following conditions are
|
||
-- met:
|
||
--
|
||
-- * Redistributions of source code must retain the above copyright notice,
|
||
-- this list of conditions and the following disclaimer.
|
||
--
|
||
-- * Redistributions in binary form must reproduce the above copyright
|
||
-- notice, this list of conditions and the following disclaimer in the
|
||
-- documentation and/or other materials provided with the distribution.
|
||
--
|
||
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS “AS IS” AND ANY
|
||
-- EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||
-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||
-- DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||
-- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||
-- OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||
-- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||
-- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||
-- ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
-- POSSIBILITY OF SUCH DAMAGE.
|
||
|
||
{-# LANGUAGE CPP #-}
|
||
{-# LANGUAGE FlexibleContexts #-}
|
||
{-# LANGUAGE MultiWayIf #-}
|
||
{-# LANGUAGE TupleSections #-}
|
||
{-# LANGUAGE TypeFamilies #-}
|
||
|
||
module Text.Megaparsec.LexerSpec (spec) where
|
||
|
||
import Control.Applicative
|
||
import Control.Monad (void)
|
||
import Data.Char hiding (ord)
|
||
import Data.List (isInfixOf)
|
||
import Data.Maybe
|
||
import Data.Monoid ((<>))
|
||
import Data.Scientific (fromFloatDigits)
|
||
import Numeric (showInt, showHex, showOct)
|
||
import Test.Hspec
|
||
import Test.Hspec.Megaparsec
|
||
import Test.Hspec.Megaparsec.AdHoc
|
||
import Test.QuickCheck
|
||
import Text.Megaparsec.Error
|
||
import Text.Megaparsec.Lexer
|
||
import Text.Megaparsec.Pos
|
||
import Text.Megaparsec.Prim
|
||
import Text.Megaparsec.String
|
||
import qualified Text.Megaparsec.Char as C
|
||
|
||
spec :: Spec
|
||
spec = do
|
||
|
||
describe "space" $
|
||
it "consumes any sort of white space" $
|
||
property $ forAll mkWhiteSpace $ \s -> do
|
||
prs scn s `shouldParse` ()
|
||
prs' scn s `succeedsLeaving` ""
|
||
|
||
describe "symbol" $
|
||
context "when stream begins with the symbol" $
|
||
it "parses the symbol and trailing whitespace" $
|
||
property $ forAll mkSymbol $ \s -> do
|
||
let p = symbol scn y
|
||
y = takeWhile (not . isSpace) s
|
||
prs p s `shouldParse` y
|
||
prs' p s `succeedsLeaving` ""
|
||
|
||
describe "symbol'" $
|
||
context "when stream begins with the symbol" $
|
||
it "parses the symbol and trailing whitespace" $
|
||
property $ forAll mkSymbol $ \s -> do
|
||
let p = symbol' scn (toUpper <$> y)
|
||
y = takeWhile (not . isSpace) s
|
||
prs p s `shouldParse` y
|
||
prs' p s `succeedsLeaving` ""
|
||
|
||
describe "skipLineComment" $
|
||
context "when there is no newline at the end of line" $
|
||
it "is picked up successfully" $ do
|
||
let p = space (void C.spaceChar) (skipLineComment "//") empty <* eof
|
||
s = " // this line comment doesn't have a newline at the end "
|
||
prs p s `shouldParse` ()
|
||
prs' p s `succeedsLeaving` ""
|
||
|
||
describe "skipBlockCommentNested" $
|
||
context "when it runs into nested block comments" $
|
||
it "parses them all right" $ do
|
||
let p = space (void C.spaceChar) empty
|
||
(skipBlockCommentNested "/*" "*/") <* eof
|
||
s = " /* foo bar /* baz */ quux */ "
|
||
prs p s `shouldParse` ()
|
||
prs' p s `succeedsLeaving` ""
|
||
|
||
describe "indentLevel" $
|
||
it "returns current indentation level (column)" $
|
||
property $ \pos -> do
|
||
let p = setPosition pos *> indentLevel
|
||
prs p "" `shouldParse` sourceColumn pos
|
||
|
||
describe "incorrectIndent" $
|
||
it "signals correct parse error" $
|
||
property $ \ord ref actual -> do
|
||
let p :: Parser ()
|
||
p = incorrectIndent ord ref actual
|
||
prs p "" `shouldFailWith` err posI (ii ord ref actual)
|
||
|
||
describe "indentGuard" $
|
||
it "works as intended" $
|
||
property $ \n -> do
|
||
let mki = mkIndent sbla (getSmall $ getNonNegative n)
|
||
forAll ((,,) <$> mki <*> mki <*> mki) $ \(l0,l1,l2) -> do
|
||
let (col0, col1, col2) = (getCol l0, getCol l1, getCol l2)
|
||
fragments = [l0,l1,l2]
|
||
g x = sum (length <$> take x fragments)
|
||
s = concat fragments
|
||
p = ip GT pos1 >>=
|
||
\x -> sp >> ip EQ x >> sp >> ip GT x >> sp >> scn
|
||
ip = indentGuard scn
|
||
sp = void (symbol sc sbla <* C.eol)
|
||
if | col0 <= pos1 ->
|
||
prs p s `shouldFailWith` err posI (ii GT pos1 col0)
|
||
| col1 /= col0 ->
|
||
prs p s `shouldFailWith` err (posN (getIndent l1 + g 1) s) (ii EQ col0 col1)
|
||
| col2 <= col0 ->
|
||
prs p s `shouldFailWith` err (posN (getIndent l2 + g 2) s) (ii GT col0 col2)
|
||
| otherwise ->
|
||
prs p s `shouldParse` ()
|
||
|
||
describe "nonIdented" $
|
||
it "works as intended" $
|
||
property $ forAll (mkIndent sbla 0) $ \s -> do
|
||
let p = nonIndented scn (symbol scn sbla)
|
||
i = getIndent s
|
||
if i == 0
|
||
then prs p s `shouldParse` sbla
|
||
else prs p s `shouldFailWith` err (posN i s) (ii EQ pos1 (getCol s))
|
||
|
||
describe "indentBlock" $ do
|
||
it "works as indented" $
|
||
property $ \mn'' -> do
|
||
let mkBlock = do
|
||
l0 <- mkIndent sbla 0
|
||
l1 <- mkIndent sblb ib
|
||
l2 <- mkIndent sblc (ib + 2)
|
||
l3 <- mkIndent sblb ib
|
||
l4 <- mkIndent' sblc (ib + 2)
|
||
return (l0,l1,l2,l3,l4)
|
||
ib = fromMaybe 2 mn'
|
||
mn' = getSmall . getPositive <$> mn''
|
||
mn = unsafePos . fromIntegral <$> mn'
|
||
forAll mkBlock $ \(l0,l1,l2,l3,l4) -> do
|
||
let (col0, col1, col2, col3, col4) =
|
||
(getCol l0, getCol l1, getCol l2, getCol l3, getCol l4)
|
||
fragments = [l0,l1,l2,l3,l4]
|
||
g x = sum (length <$> take x fragments)
|
||
s = concat fragments
|
||
p = lvla <* eof
|
||
lvla = indentBlock scn $ IndentMany mn (l sbla) lvlb <$ b sbla
|
||
lvlb = indentBlock scn $ IndentSome Nothing (l sblb) lvlc <$ b sblb
|
||
lvlc = indentBlock scn $ IndentNone sblc <$ b sblc
|
||
b = symbol sc
|
||
l x = return . (x,)
|
||
ib' = unsafePos (fromIntegral ib)
|
||
if | col1 <= col0 -> prs p s `shouldFailWith`
|
||
err (posN (getIndent l1 + g 1) s) (utok (head sblb) <> eeof)
|
||
| isJust mn && col1 /= ib' -> prs p s `shouldFailWith`
|
||
err (posN (getIndent l1 + g 1) s) (ii EQ ib' col1)
|
||
| col2 <= col1 -> prs p s `shouldFailWith`
|
||
err (posN (getIndent l2 + g 2) s) (ii GT col1 col2)
|
||
| col3 == col2 -> prs p s `shouldFailWith`
|
||
err (posN (getIndent l3 + g 3) s) (utok (head sblb) <> etoks sblc)
|
||
| col3 <= col0 -> prs p s `shouldFailWith`
|
||
err (posN (getIndent l3 + g 3) s) (utok (head sblb) <> eeof)
|
||
| col3 < col1 -> prs p s `shouldFailWith`
|
||
err (posN (getIndent l3 + g 3) s) (ii EQ col1 col3)
|
||
| col3 > col1 -> prs p s `shouldFailWith`
|
||
err (posN (getIndent l3 + g 3) s) (ii EQ col2 col3)
|
||
| col4 <= col3 -> prs p s `shouldFailWith`
|
||
err (posN (getIndent l4 + g 4) s) (ii GT col3 col4)
|
||
| otherwise -> prs p s `shouldParse`
|
||
(sbla, [(sblb, [sblc]), (sblb, [sblc])])
|
||
it "IndentMany works as intended" $
|
||
property $ forAll (mkIndent sbla 0) $ \s -> do
|
||
let p = lvla
|
||
lvla = indentBlock scn $ IndentMany Nothing (l sbla) lvlb <$ b sbla
|
||
lvlb = b sblb
|
||
b = symbol sc
|
||
l x = return . (x,)
|
||
prs p s `shouldParse` (sbla, [])
|
||
prs' p s `succeedsLeaving` ""
|
||
|
||
describe "lineFold" $
|
||
it "works as intended" $
|
||
property $ do
|
||
let mkFold = do
|
||
l0 <- mkInterspace sbla 0
|
||
l1 <- mkInterspace sblb 1
|
||
l2 <- mkInterspace sblc 1
|
||
return (l0,l1,l2)
|
||
forAll mkFold $ \(l0,l1,l2) -> do
|
||
let p = lineFold scn $ \sc' -> do
|
||
a <- symbol sc' sbla
|
||
b <- symbol sc' sblb
|
||
c <- symbol scn sblc
|
||
return (a, b, c)
|
||
getEnd x = last x == '\n'
|
||
fragments = [l0,l1,l2]
|
||
g x = sum (length <$> take x fragments)
|
||
s = concat fragments
|
||
(col0, col1, col2) = (getCol l0, getCol l1, getCol l2)
|
||
(end0, end1) = (getEnd l0, getEnd l1)
|
||
if | end0 && col1 <= col0 -> prs p s `shouldFailWith`
|
||
err (posN (getIndent l1 + g 1) s) (ii GT col0 col1)
|
||
| end1 && col2 <= col0 -> prs p s `shouldFailWith`
|
||
err (posN (getIndent l2 + g 2) s) (ii GT col0 col2)
|
||
| otherwise -> prs p s `shouldParse` (sbla, sblb, sblc)
|
||
|
||
describe "charLiteral" $ do
|
||
context "when stream begins with a literal character" $
|
||
it "parses it" $
|
||
property $ \ch -> do
|
||
let p = charLiteral
|
||
s = showLitChar ch ""
|
||
prs p s `shouldParse` ch
|
||
prs' p s `succeedsLeaving` ""
|
||
context "when stream does not begin with a literal character" $
|
||
it "signals correct parse error" $ do
|
||
let p = charLiteral
|
||
s = "\\"
|
||
prs p s `shouldFailWith` err posI (utok '\\' <> elabel "literal character")
|
||
prs' p s `failsLeaving` s
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $ do
|
||
let p = charLiteral
|
||
prs p "" `shouldFailWith` err posI (ueof <> elabel "literal character")
|
||
|
||
describe "integer" $ do
|
||
context "when stream begins with decimal digits" $
|
||
it "they are parsed as an integer" $
|
||
property $ \n' -> do
|
||
let p = integer
|
||
n = getNonNegative n'
|
||
s = showInt n ""
|
||
prs p s `shouldParse` n
|
||
prs' p s `succeedsLeaving` ""
|
||
context "when stream does not begin with decimal digits" $
|
||
it "signals correct parse error" $
|
||
property $ \a as -> not (isDigit a) ==> do
|
||
let p = integer
|
||
s = a : as
|
||
prs p s `shouldFailWith` err posI (utok a <> elabel "integer")
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
prs integer "" `shouldFailWith`
|
||
err posI (ueof <> elabel "integer")
|
||
|
||
describe "decimal" $ do
|
||
context "when stream begins with decimal digits" $
|
||
it "they are parsed as an integer" $
|
||
property $ \n' -> do
|
||
let p = decimal
|
||
n = getNonNegative n'
|
||
s = showInt n ""
|
||
prs p s `shouldParse` n
|
||
prs' p s `succeedsLeaving` ""
|
||
context "when stream does not begin with decimal digits" $
|
||
it "signals correct parse error" $
|
||
property $ \a as -> not (isDigit a) ==> do
|
||
let p = decimal
|
||
s = a : as
|
||
prs p s `shouldFailWith` err posI (utok a <> elabel "decimal integer")
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
prs decimal "" `shouldFailWith`
|
||
err posI (ueof <> elabel "decimal integer")
|
||
|
||
describe "hexadecimal" $ do
|
||
context "when stream begins with hexadecimal digits" $
|
||
it "they are parsed as an integer" $
|
||
property $ \n' -> do
|
||
let p = hexadecimal
|
||
n = getNonNegative n'
|
||
s = showHex n ""
|
||
prs p s `shouldParse` n
|
||
prs' p s `succeedsLeaving` ""
|
||
context "when stream does not begin with hexadecimal digits" $
|
||
it "signals correct parse error" $
|
||
property $ \a as -> not (isHexDigit a) ==> do
|
||
let p = hexadecimal
|
||
s = a : as
|
||
prs p s `shouldFailWith`
|
||
err posI (utok a <> elabel "hexadecimal integer")
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
prs hexadecimal "" `shouldFailWith`
|
||
err posI (ueof <> elabel "hexadecimal integer")
|
||
|
||
describe "octal" $ do
|
||
context "when stream begins with octal digits" $
|
||
it "they are parsed as an integer" $
|
||
property $ \n' -> do
|
||
let p = octal
|
||
n = getNonNegative n'
|
||
s = showOct n ""
|
||
prs p s `shouldParse` n
|
||
prs' p s `succeedsLeaving` ""
|
||
context "when stream does not begin with octal digits" $
|
||
it "signals correct parse error" $
|
||
property $ \a as -> not (isOctDigit a) ==> do
|
||
let p = octal
|
||
s = a : as
|
||
prs p s `shouldFailWith`
|
||
err posI (utok a <> elabel "octal integer")
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
prs octal "" `shouldFailWith`
|
||
err posI (ueof <> elabel "octal integer")
|
||
|
||
describe "float" $ do
|
||
context "when stream begins with a float" $
|
||
it "parses it" $
|
||
property $ \n' -> do
|
||
let p = float
|
||
n = getNonNegative n'
|
||
s = show n
|
||
prs p s `shouldParse` n
|
||
prs' p s `succeedsLeaving` ""
|
||
context "when stream does not begin with a float" $
|
||
it "signals correct parse error" $
|
||
property $ \a as -> not (isDigit a) ==> do
|
||
let p = float
|
||
s = a : as
|
||
prs p s `shouldFailWith`
|
||
err posI (utok a <> elabel "floating point number")
|
||
prs' p s `failsLeaving` s
|
||
context "when stream begins with a decimal number" $
|
||
it "signals correct parse error" $
|
||
property $ \n' -> do
|
||
let p = float
|
||
n = getNonNegative n'
|
||
s = show (n :: Integer)
|
||
prs p s `shouldFailWith` err (posN (length s) s)
|
||
(ueof <> etok '.' <> etok 'E' <> etok 'e' <> elabel "digit")
|
||
prs' p s `failsLeaving` ""
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
prs float "" `shouldFailWith`
|
||
err posI (ueof <> elabel "floating point number")
|
||
|
||
describe "number" $ do
|
||
context "when stream begins with a number" $
|
||
it "parses it" $
|
||
property $ \n' -> do
|
||
let p = number
|
||
s = either (show . getNonNegative) (show . getNonNegative)
|
||
(n' :: Either (NonNegative Integer) (NonNegative Double))
|
||
prs p s `shouldParse` case n' of
|
||
Left x -> fromIntegral (getNonNegative x)
|
||
Right x -> fromFloatDigits (getNonNegative x)
|
||
prs' p s `succeedsLeaving` ""
|
||
context "when stream is empty" $
|
||
it "signals correct parse error" $
|
||
prs number "" `shouldFailWith`
|
||
err posI (ueof <> elabel "number")
|
||
|
||
describe "signed" $ do
|
||
context "with integer" $
|
||
it "parses signed integers" $
|
||
property $ \n -> do
|
||
let p = signed (hidden C.space) integer
|
||
s = show n
|
||
prs p s `shouldParse` n
|
||
prs' p s `succeedsLeaving` ""
|
||
context "with float" $
|
||
it "parses signed floats" $
|
||
property $ \n -> do
|
||
let p = signed (hidden C.space) float
|
||
s = show n
|
||
prs p s `shouldParse` n
|
||
prs' p s `succeedsLeaving` ""
|
||
context "with number" $
|
||
it "parses singed numbers" $
|
||
property $ \n -> do
|
||
let p = signed (hidden C.space) number
|
||
s = either show show (n :: Either Integer Double)
|
||
prs p s `shouldParse` case n of
|
||
Left x -> fromIntegral x
|
||
Right x -> fromFloatDigits x
|
||
context "when number is prefixed with plus sign" $
|
||
it "parses the number" $
|
||
property $ \n' -> do
|
||
let p = signed (hidden C.space) integer
|
||
n = getNonNegative n'
|
||
s = show n
|
||
prs p s `shouldParse` n
|
||
prs' p s `succeedsLeaving` ""
|
||
context "when number is prefixed with white space" $
|
||
it "signals correct parse error" $
|
||
property $ \n -> do
|
||
let p = signed (hidden C.space) integer
|
||
s = ' ' : show (n :: Integer)
|
||
prs p s `shouldFailWith` err posI
|
||
(utok ' ' <> etok '+' <> etok '-' <> elabel "integer")
|
||
prs' p s `failsLeaving` s
|
||
context "when there is white space between sign and digits" $
|
||
it "parses it all right" $ do
|
||
let p = signed (hidden C.space) integer
|
||
s = "- 123"
|
||
prs p s `shouldParse` (-123)
|
||
prs' p s `succeedsLeaving` ""
|
||
|
||
----------------------------------------------------------------------------
|
||
-- Helpers
|
||
|
||
mkWhiteSpace :: Gen String
|
||
mkWhiteSpace = concat <$> listOf whiteUnit
|
||
where whiteUnit = oneof [whiteChars, whiteLine, whiteBlock]
|
||
|
||
mkSymbol :: Gen String
|
||
mkSymbol = (++) <$> symbolName <*> whiteChars
|
||
|
||
mkInterspace :: String -> Int -> Gen String
|
||
mkInterspace x n = oneof [si, mkIndent x n]
|
||
where si = (++ x) <$> listOf (elements " \t")
|
||
|
||
mkIndent :: String -> Int -> Gen String
|
||
mkIndent x n = (++) <$> mkIndent' x n <*> eol
|
||
where eol = frequency [(5, return "\n"), (1, listOf1 (return '\n'))]
|
||
|
||
mkIndent' :: String -> Int -> Gen String
|
||
mkIndent' x n = concat <$> sequence [spc, sym, tra]
|
||
where spc = frequency [(5, vectorOf n itm), (1, listOf itm)]
|
||
tra = listOf itm
|
||
itm = elements " \t"
|
||
sym = return x
|
||
|
||
whiteChars :: Gen String
|
||
whiteChars = listOf (elements "\t\n ")
|
||
|
||
whiteLine :: Gen String
|
||
whiteLine = commentOut <$> arbitrary `suchThat` goodEnough
|
||
where commentOut x = "//" ++ x ++ "\n"
|
||
goodEnough x = '\n' `notElem` x
|
||
|
||
whiteBlock :: Gen String
|
||
whiteBlock = commentOut <$> arbitrary `suchThat` goodEnough
|
||
where commentOut x = "/*" ++ x ++ "*/"
|
||
goodEnough x = not $ "*/" `isInfixOf` x
|
||
|
||
symbolName :: Gen String
|
||
symbolName = listOf $ arbitrary `suchThat` isAlphaNum
|
||
|
||
sc :: Parser ()
|
||
sc = space (void $ C.oneOf " \t") empty empty
|
||
|
||
scn :: Parser ()
|
||
scn = space (void C.spaceChar) l b
|
||
where l = skipLineComment "//"
|
||
b = skipBlockComment "/*" "*/"
|
||
|
||
getIndent :: String -> Int
|
||
getIndent = length . takeWhile isSpace
|
||
|
||
getCol :: String -> Pos
|
||
getCol x = sourceColumn .
|
||
updatePosString defaultTabWidth (initialPos "") $ take (getIndent x) x
|
||
|
||
sbla, sblb, sblc :: String
|
||
sbla = "aaa"
|
||
sblb = "bbb"
|
||
sblc = "ccc"
|
||
|
||
ii :: Ordering -> Pos -> Pos -> EC Char Dec
|
||
ii ord ref actual = cstm (DecIndentation ord ref actual)
|