mirror of
https://github.com/mrkkrp/megaparsec.git
synced 2024-11-23 19:38:05 +03:00
Merge pull request #76 from mrkkrp/advanced-indent
Advanced combinators for indentation-sensitive parsing
This commit is contained in:
commit
61dc477eea
15
CHANGELOG.md
15
CHANGELOG.md
@ -1,3 +1,18 @@
|
||||
## Megaparsec 4.3.0
|
||||
|
||||
* Canonicalized `Applicative`/`Monad` instances. Thanks to Herbert Valerio
|
||||
Riedel.
|
||||
|
||||
* Custom messages in `ParseError` are printed each on its own line.
|
||||
|
||||
* Now accumulated hints are not used with `ParseError` records that have
|
||||
only custom messages in them (created with `Message` constructor, as
|
||||
opposed to `Unexpected` or `Expected`). This strips “expected” line from
|
||||
custom error messages where it's unlikely to be relevant anyway.
|
||||
|
||||
* Added higher-level combinators for indentation-sensitive grammars:
|
||||
`indentLevel`, `nonIndented`, and `indentBlock`.
|
||||
|
||||
## Megaparsec 4.2.0
|
||||
|
||||
* Made `newPos` constructor and other functions in `Text.Megaparsec.Pos`
|
||||
|
@ -19,14 +19,19 @@
|
||||
-- > import qualified Text.Megaparsec.Lexer as L
|
||||
|
||||
module Text.Megaparsec.Lexer
|
||||
( -- * White space and indentation
|
||||
( -- * White space
|
||||
space
|
||||
, lexeme
|
||||
, symbol
|
||||
, symbol'
|
||||
, indentGuard
|
||||
, skipLineComment
|
||||
, skipBlockComment
|
||||
-- * Indentation
|
||||
, indentLevel
|
||||
, indentGuard
|
||||
, nonIndented
|
||||
, IndentOpt (..)
|
||||
, indentBlock
|
||||
-- * Character and string literals
|
||||
, charLiteral
|
||||
-- * Numbers
|
||||
@ -40,10 +45,10 @@ module Text.Megaparsec.Lexer
|
||||
, signed )
|
||||
where
|
||||
|
||||
import Control.Applicative ((<|>), some)
|
||||
import Control.Applicative ((<|>), some, optional)
|
||||
import Control.Monad (void)
|
||||
import Data.Char (readLitChar)
|
||||
import Data.Maybe (listToMaybe)
|
||||
import Data.Maybe (listToMaybe, fromMaybe)
|
||||
import Prelude hiding (negate)
|
||||
import qualified Prelude
|
||||
|
||||
@ -85,10 +90,10 @@ import Control.Applicative ((<$>), (<*), (*>), (<*>), pure)
|
||||
-- of the file).
|
||||
|
||||
space :: MonadParsec s m Char
|
||||
=> m () -- ^ A parser for a space character (e.g. 'C.spaceChar')
|
||||
-> m () -- ^ A parser for a line comment (e.g. 'skipLineComment')
|
||||
-> m () -- ^ A parser for a block comment (e.g. 'skipBlockComment')
|
||||
-> m ()
|
||||
=> m () -- ^ A parser for a space character (e.g. 'C.spaceChar')
|
||||
-> m () -- ^ A parser for a line comment (e.g. 'skipLineComment')
|
||||
-> m () -- ^ A parser for a block comment (e.g. 'skipBlockComment')
|
||||
-> m ()
|
||||
space ch line block = hidden . skipMany $ choice [ch, line, block]
|
||||
|
||||
-- | This is wrapper for lexemes. Typical usage is to supply first argument
|
||||
@ -98,7 +103,10 @@ space ch line block = hidden . skipMany $ choice [ch, line, block]
|
||||
-- > lexeme = L.lexeme spaceConsumer
|
||||
-- > integer = lexeme L.integer
|
||||
|
||||
lexeme :: MonadParsec s m Char => m () -> m a -> m a
|
||||
lexeme :: MonadParsec s m Char
|
||||
=> m () -- ^ How to consume white space after lexeme
|
||||
-> m a -- ^ How to parse actual lexeme
|
||||
-> m a
|
||||
lexeme spc p = p <* spc
|
||||
|
||||
-- | This is a helper to parse symbols, i.e. verbatim strings. You pass the
|
||||
@ -116,15 +124,55 @@ lexeme spc p = p <* spc
|
||||
-- > colon = symbol ":"
|
||||
-- > dot = symbol "."
|
||||
|
||||
symbol :: MonadParsec s m Char => m () -> String -> m String
|
||||
symbol :: MonadParsec s m Char
|
||||
=> m () -- ^ How to consume white space after lexeme
|
||||
-> String -- ^ String to parse
|
||||
-> m String
|
||||
symbol spc = lexeme spc . C.string
|
||||
|
||||
-- | Case-insensitive version of 'symbol'. This may be helpful if you're
|
||||
-- working with case-insensitive languages.
|
||||
|
||||
symbol' :: MonadParsec s m Char => m () -> String -> m String
|
||||
symbol' :: MonadParsec s m Char
|
||||
=> m () -- ^ How to consume white space after lexeme
|
||||
-> String -- ^ String to parse (case-insensitive)
|
||||
-> m String
|
||||
symbol' spc = lexeme spc . C.string'
|
||||
|
||||
-- | Given comment prefix this function returns parser that skips line
|
||||
-- comments. Note that it stops just before newline character but doesn't
|
||||
-- consume the newline. Newline is either supposed to be consumed by 'space'
|
||||
-- parser or picked up manually.
|
||||
|
||||
skipLineComment :: MonadParsec s m Char
|
||||
=> String -- ^ Line comment prefix
|
||||
-> m ()
|
||||
skipLineComment prefix = p >> void (manyTill C.anyChar n)
|
||||
where p = try (C.string prefix)
|
||||
n = lookAhead C.newline
|
||||
|
||||
-- | @skipBlockComment start end@ skips non-nested block comment starting
|
||||
-- with @start@ and ending with @end@.
|
||||
|
||||
skipBlockComment :: MonadParsec s m Char
|
||||
=> String -- ^ Start of block comment
|
||||
-> String -- ^ End of block comment
|
||||
-> m ()
|
||||
skipBlockComment start end = p >> void (manyTill C.anyChar n)
|
||||
where p = try (C.string start)
|
||||
n = try (C.string end)
|
||||
|
||||
-- Indentation
|
||||
|
||||
-- | Return current indentation level.
|
||||
--
|
||||
-- The function is a simple shortcut defined as:
|
||||
--
|
||||
-- > indentLevel = sourceColumn <$> getPosition
|
||||
|
||||
indentLevel :: MonadParsec s m t => m Int
|
||||
indentLevel = sourceColumn <$> getPosition
|
||||
|
||||
-- | @indentGuard spaceConsumer test@ first consumes all white space
|
||||
-- (indentation) with @spaceConsumer@ parser, then it checks column
|
||||
-- position. It should satisfy supplied predicate @test@, otherwise the
|
||||
@ -136,31 +184,88 @@ symbol' spc = lexeme spc . C.string'
|
||||
-- indentation. Use returned value to check indentation on every subsequent
|
||||
-- line according to syntax of your language.
|
||||
|
||||
indentGuard :: MonadParsec s m Char => m () -> (Int -> Bool) -> m Int
|
||||
indentGuard :: MonadParsec s m Char
|
||||
=> m () -- ^ How to consume indentation (white space)
|
||||
-> (Int -> Bool) -- ^ Predicate checking indentation level
|
||||
-> m Int -- ^ Current column (indentation level)
|
||||
indentGuard spc p = do
|
||||
spc
|
||||
pos <- sourceColumn <$> getPosition
|
||||
if p pos
|
||||
then return pos
|
||||
else fail "incorrect indentation"
|
||||
lvl <- indentLevel
|
||||
if p lvl
|
||||
then return lvl
|
||||
else fail ii
|
||||
|
||||
-- | Given comment prefix this function returns parser that skips line
|
||||
-- comments. Note that it stops just before newline character but doesn't
|
||||
-- consume the newline. Newline is either supposed to be consumed by 'space'
|
||||
-- parser or picked up manually.
|
||||
-- | Parse non-indented construction. This ensures that there is no
|
||||
-- indentation before actual data. Useful, for example, as a wrapper for
|
||||
-- top-level function definitions.
|
||||
|
||||
skipLineComment :: MonadParsec s m Char => String -> m ()
|
||||
skipLineComment prefix = p >> void (manyTill C.anyChar n)
|
||||
where p = try $ C.string prefix
|
||||
n = lookAhead C.newline
|
||||
nonIndented :: MonadParsec s m Char
|
||||
=> m () -- ^ How to consume indentation (white space)
|
||||
-> m a -- ^ How to parse actual data
|
||||
-> m a
|
||||
nonIndented sc p = indentGuard sc (== 1) *> p
|
||||
|
||||
-- | @skipBlockComment start end@ skips non-nested block comment starting
|
||||
-- with @start@ and ending with @end@.
|
||||
-- | The data type represents available behaviors for parsing of indented
|
||||
-- tokens. This is used in 'indentBlock', which see.
|
||||
|
||||
skipBlockComment :: MonadParsec s m Char => String -> String -> m ()
|
||||
skipBlockComment start end = p >> void (manyTill C.anyChar n)
|
||||
where p = try $ C.string start
|
||||
n = try $ C.string end
|
||||
data IndentOpt m a b
|
||||
= IndentNone a
|
||||
-- ^ Parse no indented tokens, just return the value
|
||||
| IndentMany (Maybe Int) ([b] -> m a) (m b)
|
||||
-- ^ Parse many indented tokens (possibly zero), use given indentation
|
||||
-- level (if 'Nothing', use level of the first indented token); the
|
||||
-- second argument tells how to get final result, and third argument
|
||||
-- describes how to parse indented token
|
||||
| IndentSome (Maybe Int) ([b] -> m a) (m b)
|
||||
-- ^ Just like 'ManyIndent', but requires at least one indented token to
|
||||
-- be present
|
||||
|
||||
-- | Parse a “reference” token and a number of other tokens that have
|
||||
-- greater (but the same) level of indentation than that of “reference”
|
||||
-- token. Reference token can influence parsing, see 'IndentOpt' for more
|
||||
-- information.
|
||||
--
|
||||
-- Tokens /must not/ consume newlines after them. On the other hand, the
|
||||
-- first argument of this function /should/ consume newlines among other
|
||||
-- white space characters.
|
||||
|
||||
indentBlock :: MonadParsec String m Char
|
||||
=> m () -- ^ How to consume indentation (white space)
|
||||
-> m (IndentOpt m a b) -- ^ How to parse “reference” token
|
||||
-> m a
|
||||
indentBlock sc r = do
|
||||
ref <- indentGuard sc (const True)
|
||||
a <- r
|
||||
case a of
|
||||
IndentNone x -> return x
|
||||
IndentMany indent f p -> do
|
||||
mlvl <- lookAhead . optional . try $ C.eol *> indentGuard sc (> ref)
|
||||
case mlvl of
|
||||
Nothing -> sc *> f []
|
||||
Just lvl -> indentedItems ref (fromMaybe lvl indent) sc p >>= f
|
||||
IndentSome indent f p -> do
|
||||
lvl <- lookAhead . try $ C.eol *> indentGuard sc (> ref)
|
||||
indentedItems ref (fromMaybe lvl indent) sc p >>= f
|
||||
|
||||
-- | Grab indented items. This is a helper for 'indentBlock', it's not a
|
||||
-- part of public API.
|
||||
|
||||
indentedItems :: MonadParsec String m Char
|
||||
=> Int -- ^ Reference indentation level
|
||||
-> Int -- ^ Level of the first indented item ('lookAhead'ed)
|
||||
-> m () -- ^ How to consume indentation (white space)
|
||||
-> m b -- ^ How to parse indented tokens
|
||||
-> m [b]
|
||||
indentedItems ref lvl sc p = go
|
||||
where
|
||||
go = (sc *> indentLevel) >>= re
|
||||
re pos
|
||||
| pos <= ref = return []
|
||||
| pos == lvl = (:) <$> p <*> go
|
||||
| otherwise = fail ii
|
||||
|
||||
ii :: String
|
||||
ii = "incorrect indentation"
|
||||
|
||||
-- Character and string literals
|
||||
|
||||
@ -256,21 +361,18 @@ nump prefix baseDigit = read . (prefix ++) <$> some baseDigit
|
||||
-- If you need to parse signed floats, see 'signed'.
|
||||
|
||||
float :: MonadParsec s m Char => m Double
|
||||
float = label "float" $ read <$> f
|
||||
where f = do
|
||||
d <- some C.digitChar
|
||||
rest <- fraction <|> fExp
|
||||
return $ d ++ rest
|
||||
float = label "float" (read <$> f)
|
||||
where f = (++) <$> some C.digitChar <*> (fraction <|> fExp)
|
||||
|
||||
-- | This is a helper for 'float' parser. It parses fractional part of
|
||||
-- floating point number, that is, dot and everything after it.
|
||||
|
||||
fraction :: MonadParsec s m Char => m String
|
||||
fraction = do
|
||||
void $ C.char '.'
|
||||
void (C.char '.')
|
||||
d <- some C.digitChar
|
||||
e <- option "" fExp
|
||||
return $ '.' : d ++ e
|
||||
return ('.' : d ++ e)
|
||||
|
||||
-- | This helper parses exponent of floating point numbers.
|
||||
|
||||
@ -279,7 +381,7 @@ fExp = do
|
||||
expChar <- C.char' 'e'
|
||||
signStr <- option "" (pure <$> choice (C.char <$> "+-"))
|
||||
d <- some C.digitChar
|
||||
return $ expChar : signStr ++ d
|
||||
return (expChar : signStr ++ d)
|
||||
|
||||
-- | Parse a number: either integer or floating point. The parser can handle
|
||||
-- overlapping grammars graciously.
|
||||
|
@ -136,9 +136,18 @@ toHints err = Hints hints
|
||||
msgs = filter ((== 1) . fromEnum) $ errorMessages err
|
||||
|
||||
-- | @withHints hs c@ makes “error” continuation @c@ use given hints @hs@.
|
||||
--
|
||||
-- Note that if resulting continuation gets 'ParseError' where all messages
|
||||
-- are created with 'Message' constructor, hints are ignored.
|
||||
|
||||
withHints :: Hints -> (ParseError -> m b) -> ParseError -> m b
|
||||
withHints (Hints xs) c = c . addErrorMessages (Expected <$> concat xs)
|
||||
withHints (Hints xs) c e =
|
||||
let isMessage (Message _) = True
|
||||
isMessage _ = False
|
||||
in (if all isMessage (errorMessages e)
|
||||
then c
|
||||
else c . addErrorMessages (Expected <$> concat xs))
|
||||
e
|
||||
|
||||
-- | @accHints hs c@ results in “OK” continuation that will add given hints
|
||||
-- @hs@ to third argument of original continuation @c@.
|
||||
@ -183,7 +192,7 @@ instance Stream TL.Text Char where
|
||||
{-# INLINE uncons #-}
|
||||
|
||||
-- | @StorableStream@ abstracts ability of some streams to be stored in a
|
||||
-- file. This is used by polymorphic function 'readFromFile'.
|
||||
-- file. This is used by the polymorphic function 'parseFromFile'.
|
||||
|
||||
class Stream s t => StorableStream s t where
|
||||
|
||||
@ -265,7 +274,7 @@ pMap f p = ParsecT $ \s cok cerr eok eerr ->
|
||||
instance A.Applicative (ParsecT s m) where
|
||||
pure = pPure
|
||||
(<*>) = ap
|
||||
p1 *> p2 = p1 `pBind` \_ -> p2
|
||||
p1 *> p2 = p1 `pBind` const p2
|
||||
p1 <* p2 = do { x1 <- p1 ; void p2 ; return x1 }
|
||||
|
||||
instance A.Alternative (ParsecT s m) where
|
||||
|
195
tests/Lexer.hs
195
tests/Lexer.hs
@ -27,6 +27,8 @@
|
||||
-- ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
-- POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
{-# LANGUAGE TupleSections #-}
|
||||
|
||||
module Lexer (tests) where
|
||||
|
||||
import Control.Applicative (empty)
|
||||
@ -39,7 +41,7 @@ import Data.Char
|
||||
, isSpace
|
||||
, toLower )
|
||||
import Data.List (findIndices, isInfixOf, find)
|
||||
import Data.Maybe (listToMaybe, maybeToList, isNothing, fromJust)
|
||||
import Data.Maybe
|
||||
import Numeric (showInt, showHex, showOct, showSigned)
|
||||
|
||||
import Test.Framework
|
||||
@ -53,10 +55,11 @@ import Text.Megaparsec.Prim
|
||||
import Text.Megaparsec.String
|
||||
import qualified Text.Megaparsec.Char as C
|
||||
|
||||
import Prim () -- 'Arbitrary' instance for 'SourcePos'
|
||||
import Util
|
||||
|
||||
#if !MIN_VERSION_base(4,8,0)
|
||||
import Control.Applicative ((<$>), (<*), (<*>))
|
||||
import Control.Applicative ((<$>), (<*), (<*>), (<$))
|
||||
#endif
|
||||
|
||||
tests :: Test
|
||||
@ -64,7 +67,11 @@ tests = testGroup "Lexer"
|
||||
[ testProperty "space combinator" prop_space
|
||||
, testProperty "symbol combinator" prop_symbol
|
||||
, testProperty "symbol' combinator" prop_symbol'
|
||||
, testProperty "indentLevel" prop_indentLevel
|
||||
, testProperty "indentGuard combinator" prop_indentGuard
|
||||
, testProperty "nonIndented combinator" prop_nonIndented
|
||||
, testProperty "indentBlock combinator" prop_indentBlock
|
||||
, testProperty "indentBlock (many)" prop_indentMany
|
||||
, testProperty "charLiteral" prop_charLiteral
|
||||
, testProperty "integer" prop_integer
|
||||
, testProperty "decimal" prop_decimal
|
||||
@ -77,25 +84,25 @@ tests = testGroup "Lexer"
|
||||
, testProperty "number 2 (signed)" prop_number_2
|
||||
, testProperty "signed" prop_signed ]
|
||||
|
||||
newtype WhiteSpace = WhiteSpace
|
||||
{ getWhiteSpace :: String }
|
||||
deriving (Show, Eq)
|
||||
-- White space
|
||||
|
||||
instance Arbitrary WhiteSpace where
|
||||
arbitrary = WhiteSpace . concat <$> listOf whiteUnit
|
||||
mkWhiteSpace :: Gen String
|
||||
mkWhiteSpace = concat <$> listOf whiteUnit
|
||||
where whiteUnit = oneof [whiteChars, whiteLine, whiteBlock]
|
||||
|
||||
newtype Symbol = Symbol
|
||||
{ getSymbol :: String }
|
||||
deriving (Show, Eq)
|
||||
mkSymbol :: Gen String
|
||||
mkSymbol = (++) <$> symbolName <*> whiteChars
|
||||
|
||||
instance Arbitrary Symbol where
|
||||
arbitrary = Symbol <$> ((++) <$> symbolName <*> whiteChars)
|
||||
|
||||
whiteUnit :: Gen String
|
||||
whiteUnit = oneof [whiteChars, whiteLine, whiteBlock]
|
||||
mkIndent :: String -> Int -> Gen String
|
||||
mkIndent x n = concat <$> sequence [spc, sym, tra, eol]
|
||||
where spc = frequency [(5, vectorOf n itm), (1, listOf itm)]
|
||||
tra = listOf itm
|
||||
itm = elements " \t"
|
||||
sym = return x
|
||||
eol = frequency [(5, return "\n"), (1, listOf1 (return '\n'))]
|
||||
|
||||
whiteChars :: Gen String
|
||||
whiteChars = listOf $ elements "\t\n "
|
||||
whiteChars = listOf (elements "\t\n ")
|
||||
|
||||
whiteLine :: Gen String
|
||||
whiteLine = commentOut <$> arbitrary `suchThat` goodEnough
|
||||
@ -118,52 +125,124 @@ sc = space (void C.spaceChar) l b
|
||||
sc' :: Parser ()
|
||||
sc' = space (void $ C.oneOf " \t") empty empty
|
||||
|
||||
prop_space :: WhiteSpace -> Property
|
||||
prop_space w = checkParser p r s
|
||||
prop_space :: Property
|
||||
prop_space = forAll mkWhiteSpace (checkParser p r)
|
||||
where p = sc
|
||||
r = Right ()
|
||||
s = getWhiteSpace w
|
||||
|
||||
prop_symbol :: Symbol -> Maybe Char -> Property
|
||||
prop_symbol = parseSymbol (symbol sc) id
|
||||
prop_symbol :: Maybe Char -> Property
|
||||
prop_symbol t = forAll mkSymbol $ \s ->
|
||||
parseSymbol (symbol sc) id s t
|
||||
|
||||
prop_symbol' :: Symbol -> Maybe Char -> Property
|
||||
prop_symbol' = parseSymbol (symbol' sc) (fmap toLower)
|
||||
prop_symbol' :: Maybe Char -> Property
|
||||
prop_symbol' t = forAll mkSymbol $ \s ->
|
||||
parseSymbol (symbol' sc) (fmap toLower) s t
|
||||
|
||||
parseSymbol :: (String -> Parser String) -> (String -> String)
|
||||
-> Symbol -> Maybe Char -> Property
|
||||
parseSymbol
|
||||
:: (String -> Parser String)
|
||||
-> (String -> String)
|
||||
-> String
|
||||
-> Maybe Char
|
||||
-> Property
|
||||
parseSymbol p' f s' t = checkParser p r s
|
||||
where p = p' (f g)
|
||||
r | g == s || isSpace (last s) = Right g
|
||||
| otherwise = posErr (length s - 1) s [uneCh (last s), exEof]
|
||||
g = takeWhile (not . isSpace) s
|
||||
s = getSymbol s' ++ maybeToList t
|
||||
s = s' ++ maybeToList t
|
||||
|
||||
newtype IndLine = IndLine
|
||||
{ getIndLine :: String }
|
||||
deriving (Show, Eq)
|
||||
-- Indentation
|
||||
|
||||
instance Arbitrary IndLine where
|
||||
arbitrary = IndLine . concat <$> sequence [spc, sym, spc, eol]
|
||||
where spc = listOf (elements " \t")
|
||||
sym = return "xxx"
|
||||
eol = return "\n"
|
||||
prop_indentLevel :: SourcePos -> Property
|
||||
prop_indentLevel pos = p /=\ sourceColumn pos
|
||||
where p = setPosition pos >> indentLevel
|
||||
|
||||
prop_indentGuard :: IndLine -> IndLine -> IndLine -> Property
|
||||
prop_indentGuard l0 l1 l2 = checkParser p r s
|
||||
where p = ip (> 1) >>= \x -> sp >> ip (== x) >> sp >> ip (> x) >> sp
|
||||
ip = indentGuard sc'
|
||||
sp = void $ symbol sc' "xxx" <* C.eol
|
||||
r | f' l0 <= 1 = posErr 0 s msg'
|
||||
| f' l1 /= f' l0 = posErr (f l1 + g [l0]) s msg'
|
||||
| f' l2 <= f' l0 = posErr (f l2 + g [l0, l1]) s msg'
|
||||
prop_indentGuard :: NonNegative Int -> Property
|
||||
prop_indentGuard n =
|
||||
forAll ((,,) <$> mki <*> mki <*> mki) $ \(l0,l1,l2) ->
|
||||
let r | getCol l0 <= 1 = posErr 0 s ii
|
||||
| getCol l1 /= getCol l0 = posErr (getIndent l1 + g 1) s ii
|
||||
| getCol l2 <= getCol l0 = posErr (getIndent l2 + g 2) s ii
|
||||
| otherwise = Right ()
|
||||
msg' = [msg "incorrect indentation"]
|
||||
f = length . takeWhile isSpace . getIndLine
|
||||
f' x = sourceColumn $ updatePosString defaultTabWidth (initialPos "") $
|
||||
take (f x) (getIndLine x)
|
||||
g xs = sum $ length . getIndLine <$> xs
|
||||
s = concat $ getIndLine <$> [l0, l1, l2]
|
||||
fragments = [l0,l1,l2]
|
||||
g x = sum (length <$> take x fragments)
|
||||
s = concat fragments
|
||||
in checkParser p r s
|
||||
where mki = mkIndent sbla (getNonNegative n)
|
||||
p = ip (> 1) >>= \x -> sp >> ip (== x) >> sp >> ip (> x) >> sp >> sc
|
||||
ip = indentGuard sc
|
||||
sp = void (symbol sc' sbla <* C.eol)
|
||||
|
||||
prop_nonIndented :: Property
|
||||
prop_nonIndented = forAll (mkIndent sbla 0) $ \s ->
|
||||
let i = getIndent s
|
||||
r | i == 0 = Right sbla
|
||||
| otherwise = posErr i s ii
|
||||
in checkParser p r s
|
||||
where p = nonIndented sc (symbol sc sbla)
|
||||
|
||||
prop_indentBlock :: Maybe (Positive Int) -> Property
|
||||
prop_indentBlock mn' = forAll mkBlock $ \(l0,l1,l2,l3,l4) ->
|
||||
let r | getCol l1 <= getCol l0 =
|
||||
posErr (getIndent l1 + g 1) s [uneCh (head sblb), exEof]
|
||||
| isJust mn && getCol l1 /= ib =
|
||||
posErr (getIndent l1 + g 1) s ii
|
||||
| getCol l2 <= getCol l1 =
|
||||
posErr (getIndent l2 + g 2) s ii
|
||||
| getCol l3 == getCol l2 =
|
||||
posErr (getIndent l3 + g 3) s [uneCh (head sblb), exStr sblc]
|
||||
| getCol l3 <= getCol l0 =
|
||||
posErr (getIndent l3 + g 3) s [uneCh (head sblb), exEof]
|
||||
| getCol l3 /= getCol l1 =
|
||||
posErr (getIndent l3 + g 3) s ii
|
||||
| getCol l4 <= getCol l3 =
|
||||
posErr (getIndent l4 + g 4) s ii
|
||||
| otherwise = Right (sbla, [(sblb, [sblc]), (sblb, [sblc])])
|
||||
fragments = [l0,l1,l2,l3,l4]
|
||||
g x = sum (length <$> take x fragments)
|
||||
s = concat fragments
|
||||
in checkParser p r s
|
||||
where mkBlock = do
|
||||
l0 <- mkIndent sbla 0
|
||||
l1 <- mkIndent sblb ib
|
||||
l2 <- mkIndent sblc (ib + 2)
|
||||
l3 <- mkIndent sblb ib
|
||||
l4 <- mkIndent sblc (ib + 2)
|
||||
return (l0,l1,l2,l3,l4)
|
||||
p = lvla
|
||||
lvla = indentBlock sc $ IndentMany mn (l sbla) lvlb <$ b sbla
|
||||
lvlb = indentBlock sc $ IndentSome Nothing (l sblb) lvlc <$ b sblb
|
||||
lvlc = indentBlock sc $ IndentNone sblc <$ b sblc
|
||||
b = symbol sc'
|
||||
l x = return . (x,)
|
||||
mn = getPositive <$> mn'
|
||||
ib = fromMaybe 2 mn
|
||||
|
||||
prop_indentMany :: Property
|
||||
prop_indentMany = forAll (mkIndent "xxx" 0) (checkParser p r)
|
||||
where r = Right (sbla, [])
|
||||
p = lvla
|
||||
lvla = indentBlock sc $ IndentMany Nothing (l sbla) lvlb <$ b sbla
|
||||
lvlb = b sblb
|
||||
b = symbol sc'
|
||||
l x = return . (x,)
|
||||
|
||||
getIndent :: String -> Int
|
||||
getIndent = length . takeWhile isSpace
|
||||
|
||||
getCol :: String -> Int
|
||||
getCol x = sourceColumn $
|
||||
updatePosString defaultTabWidth (initialPos "") $ take (getIndent x) x
|
||||
|
||||
sbla, sblb, sblc :: String
|
||||
sbla = "xxx"
|
||||
sblb = "yyy"
|
||||
sblc = "zzz"
|
||||
|
||||
ii :: [Message]
|
||||
ii = [msg "incorrect indentation"]
|
||||
|
||||
-- Character and string literals
|
||||
|
||||
prop_charLiteral :: String -> Bool -> Property
|
||||
prop_charLiteral t i = checkParser charLiteral r s
|
||||
@ -176,6 +255,8 @@ prop_charLiteral t i = checkParser charLiteral r s
|
||||
l = length s - length g
|
||||
s = if null t || i then t else showLitChar (head t) (tail t)
|
||||
|
||||
-- Numbers
|
||||
|
||||
prop_integer :: NonNegative Integer -> Int -> Property
|
||||
prop_integer n' i = checkParser integer r s
|
||||
where (r, s) = quasiCorrupted n' i showInt "integer"
|
||||
@ -227,11 +308,11 @@ prop_signed n i plus = checkParser p r s
|
||||
where p = signed (hidden C.space) integer
|
||||
r | i > length z = Right n
|
||||
| otherwise = posErr i s $ uneCh '?' :
|
||||
(if i <= 0 then [exCh '+', exCh '-'] else []) ++
|
||||
[exSpec $ if isNothing . find isDigit $ take i s
|
||||
then "integer"
|
||||
else "rest of integer"]
|
||||
++ [exEof | i > head (findIndices isDigit s)]
|
||||
(if i <= 0 then [exCh '+', exCh '-'] else []) ++
|
||||
[exSpec $ if isNothing . find isDigit $ take i s
|
||||
then "integer"
|
||||
else "rest of integer"] ++
|
||||
[exEof | i > head (findIndices isDigit s)]
|
||||
z = let bar = showSigned showInt 0 n ""
|
||||
in if n < 0 || plus then bar else '+' : bar
|
||||
s = if i <= length z then take i z ++ "?" ++ drop i z else z
|
||||
@ -243,9 +324,9 @@ quasiCorrupted n' i shower l = (r, s)
|
||||
where n = getNonNegative n'
|
||||
r | i > length z = Right n
|
||||
| otherwise = posErr i s $ uneCh '?' :
|
||||
[ exEof | i > 0 ] ++
|
||||
[if i <= 0 || null l
|
||||
then exSpec l
|
||||
else exSpec $ "rest of " ++ l]
|
||||
[ exEof | i > 0 ] ++
|
||||
[if i <= 0 || null l
|
||||
then exSpec l
|
||||
else exSpec $ "rest of " ++ l]
|
||||
z = shower n ""
|
||||
s = if i <= length z then take i z ++ "?" ++ drop i z else z
|
||||
|
Loading…
Reference in New Issue
Block a user