mirror of
https://github.com/mrkkrp/megaparsec.git
synced 2024-11-23 19:38:05 +03:00
implemented flexible tab width
Closes #38. Now tab width can be manipulated with via the following functions: * ‘getTabWidth’ * ‘setTabWidth’ Other auxiliary changes were performed, such as updating of ‘updatePosChar’. This also corrects a bit obsolete descriptions of some functions.
This commit is contained in:
parent
38abf590db
commit
d0409a897e
@ -34,6 +34,10 @@
|
||||
* Added new primitive combinator `hidden p` which hides “expected” tokens in
|
||||
error message when parser `p` fails.
|
||||
|
||||
* Tab width is not hard-coded anymore. It can be manipulated via
|
||||
`getTabWidth` and `setTabWidth`. Default tab-width is `defaultTabWidth`,
|
||||
which is 8.
|
||||
|
||||
### Error messages
|
||||
|
||||
* Introduced type class `ShowToken` and improved representation of
|
||||
|
@ -139,10 +139,12 @@ module Text.Megaparsec
|
||||
, Consumed (..)
|
||||
, Reply (..)
|
||||
, State (..)
|
||||
, getPosition
|
||||
, setPosition
|
||||
, getInput
|
||||
, setInput
|
||||
, getPosition
|
||||
, setPosition
|
||||
, getTabWidth
|
||||
, setTabWidth
|
||||
, getParserState
|
||||
, setParserState
|
||||
, updateParserState )
|
||||
|
@ -316,11 +316,10 @@ noneOf' = noneOf . extendi
|
||||
-- > oneOf cs = satisfy (`elem` cs)
|
||||
|
||||
satisfy :: MonadParsec s m Char => (Char -> Bool) -> m Char
|
||||
satisfy f = token nextPos testChar
|
||||
where nextPos pos x _ = updatePosChar pos x
|
||||
testChar x = if f x
|
||||
then Right x
|
||||
else Left . pure . Unexpected . showToken $ x
|
||||
satisfy f = token updatePosChar testChar
|
||||
where testChar x = if f x
|
||||
then Right x
|
||||
else Left . pure . Unexpected . showToken $ x
|
||||
|
||||
-- | @string s@ parses a sequence of characters given by @s@. Returns
|
||||
-- the parsed string (i.e. @s@).
|
||||
|
@ -24,7 +24,8 @@ module Text.Megaparsec.Pos
|
||||
, newPos
|
||||
, initialPos
|
||||
, updatePosChar
|
||||
, updatePosString )
|
||||
, updatePosString
|
||||
, defaultTabWidth )
|
||||
where
|
||||
|
||||
import Data.Data (Data)
|
||||
@ -88,24 +89,34 @@ setSourceLine (SourcePos n _ c) l = SourcePos n l c
|
||||
setSourceColumn :: SourcePos -> Int -> SourcePos
|
||||
setSourceColumn (SourcePos n l _) = SourcePos n l
|
||||
|
||||
-- | Update a source position given a character. If the character is a
|
||||
-- newline (\'\\n\') the line number is incremented by 1. If the character
|
||||
-- is carriage return (\'\\r\') the line number is unaltered, but column
|
||||
-- number is reset to 1. If the character is a tab (\'\\t\') the column
|
||||
-- number is incremented to the nearest 8'th column, i.e. @column + 8 -
|
||||
-- ((column - 1) \`rem\` 8)@. In all other cases, the column is incremented
|
||||
-- by 1.
|
||||
-- | Update a source position given a character. The first argument
|
||||
-- specifies tab width. If the character is a newline (\'\\n\') the line
|
||||
-- number is incremented by 1. If the character is a tab (\'\\t\') the
|
||||
-- column number is incremented to the nearest tab position, i.e. @column +
|
||||
-- width - ((column - 1) \`rem\` width)@. In all other cases, the column is
|
||||
-- incremented by 1.
|
||||
--
|
||||
-- If given tab width is not positive, 'defaultTabWidth' will be used.
|
||||
|
||||
updatePosChar :: SourcePos -> Char -> SourcePos
|
||||
updatePosChar (SourcePos n l c) ch =
|
||||
updatePosChar :: Int -> SourcePos -> Char -> SourcePos
|
||||
updatePosChar width (SourcePos n l c) ch =
|
||||
case ch of
|
||||
'\n' -> SourcePos n (l + 1) 1
|
||||
'\t' -> SourcePos n l (c + 8 - ((c - 1) `rem` 8))
|
||||
'\t' -> let w = if width < 1 then defaultTabWidth else width
|
||||
in SourcePos n l (c + w - ((c - 1) `rem` w))
|
||||
_ -> SourcePos n l (c + 1)
|
||||
|
||||
-- | The expression @updatePosString pos s@ updates the source position
|
||||
-- @pos@ by calling 'updatePosChar' on every character in @s@, i.e. @foldl
|
||||
-- updatePosChar pos string@.
|
||||
|
||||
updatePosString :: SourcePos -> String -> SourcePos
|
||||
updatePosString = foldl' updatePosChar
|
||||
updatePosString :: Int -> SourcePos -> String -> SourcePos
|
||||
updatePosString w = foldl' (updatePosChar w)
|
||||
|
||||
-- | Value of tab width used by default. This is used as fall-back by
|
||||
-- 'updatePosChar' and possibly in other cases. Always prefer this constant
|
||||
-- when you want to refer to default tab width because actual value /may/
|
||||
-- change in future. Current value is @8@.
|
||||
|
||||
defaultTabWidth :: Int
|
||||
defaultTabWidth = 8
|
||||
|
@ -25,10 +25,12 @@ module Text.Megaparsec.Prim
|
||||
, MonadParsec (..)
|
||||
, (<?>)
|
||||
-- * Parser state combinators
|
||||
, getPosition
|
||||
, setPosition
|
||||
, getInput
|
||||
, setInput
|
||||
, getPosition
|
||||
, setPosition
|
||||
, getTabWidth
|
||||
, setTabWidth
|
||||
, setParserState
|
||||
-- * Running parser
|
||||
, runParser
|
||||
@ -68,8 +70,9 @@ import Text.Megaparsec.ShowToken
|
||||
-- | This is Megaparsec state, it's parametrized over stream type @s@.
|
||||
|
||||
data State s = State
|
||||
{ stateInput :: s
|
||||
, statePos :: !SourcePos }
|
||||
{ stateInput :: s
|
||||
, statePos :: !SourcePos
|
||||
, stateTabWidth :: !Int }
|
||||
deriving (Show, Eq)
|
||||
|
||||
-- | An instance of @Stream s t@ has stream type @s@, and token type @t@
|
||||
@ -318,7 +321,7 @@ instance MonadPlus (ParsecT s m) where
|
||||
mplus = pPlus
|
||||
|
||||
pZero :: ParsecT s m a
|
||||
pZero = ParsecT $ \(State _ pos) _ _ _ eerr -> eerr $ newErrorUnknown pos
|
||||
pZero = ParsecT $ \(State _ pos _) _ _ _ eerr -> eerr $ newErrorUnknown pos
|
||||
|
||||
pPlus :: ParsecT s m a -> ParsecT s m a -> ParsecT s m a
|
||||
pPlus m n = ParsecT $ \s cok cerr eok eerr ->
|
||||
@ -414,35 +417,37 @@ class (A.Alternative m, Monad m, Stream s t) =>
|
||||
eof :: m ()
|
||||
|
||||
-- | The parser @token nextPos testTok@ accepts a token @t@ with result
|
||||
-- @x@ when the function @testTok t@ returns @'Just' x@. The position of
|
||||
-- @x@ when the function @testTok t@ returns @'Right' x@. The position of
|
||||
-- the /next/ token should be returned when @nextPos@ is called with the
|
||||
-- current source position @pos@, the current token @t@ and the rest of
|
||||
-- the tokens @toks@, @nextPos pos t toks@.
|
||||
-- tab width, current source position, and the current token.
|
||||
--
|
||||
-- This is the most primitive combinator for accepting tokens. For
|
||||
-- example, the 'Text.Megaparsec.Char.char' parser could be implemented
|
||||
-- as:
|
||||
--
|
||||
-- > char c = token nextPos testChar
|
||||
-- > where testChar x = if x == c then Just x else Nothing
|
||||
-- > nextPos pos x xs = updatePosChar pos x
|
||||
-- > char c = token updatePosChar testChar
|
||||
-- > where testChar x = if x == c
|
||||
-- > then Right x
|
||||
-- > else Left . pure . Unexpected . showToken $ x
|
||||
|
||||
token :: (SourcePos -> t -> s -> SourcePos) -- ^ Next position calculating function.
|
||||
-> (t -> Either [Message] a) -- ^ Matching function for the token to parse.
|
||||
token :: (Int -> SourcePos -> t -> SourcePos) -- ^ Next position calculating function
|
||||
-> (t -> Either [Message] a) -- ^ Matching function for the token to parse
|
||||
-> m a
|
||||
|
||||
-- | The parser @tokens posFromTok test@ parses list of tokens and returns
|
||||
-- it. The resulting parser will use 'showToken' to pretty-print the
|
||||
-- collection of tokens. Supplied predicate @test@ is used to check
|
||||
-- equality of given and parsed tokens.
|
||||
-- it. @posFromTok@ is called with three arguments: tab width, initial
|
||||
-- position, and collection of tokens to parse. The resulting parser will
|
||||
-- use 'showToken' to pretty-print the collection of tokens in error
|
||||
-- messages. Supplied predicate @test@ is used to check equality of given
|
||||
-- and parsed tokens.
|
||||
--
|
||||
-- This can be used to example to write 'Text.Megaparsec.Char.string':
|
||||
--
|
||||
-- > string = tokens updatePosString (==)
|
||||
|
||||
tokens :: Eq t =>
|
||||
(SourcePos -> [t] -> SourcePos) -- ^ Computes position of tokens.
|
||||
-> (t -> t -> Bool) -- ^ Predicate to check equality of tokens.
|
||||
(Int -> SourcePos -> [t] -> SourcePos) -- ^ Computes position of tokens
|
||||
-> (t -> t -> Bool) -- ^ Predicate to check equality of tokens
|
||||
-> [t] -- ^ List of tokens to parse
|
||||
-> m [t]
|
||||
|
||||
@ -467,7 +472,7 @@ instance Stream s t => MonadParsec s (ParsecT s m) t where
|
||||
updateParserState = pUpdateParserState
|
||||
|
||||
pUnexpected :: String -> ParsecT s m a
|
||||
pUnexpected msg = ParsecT $ \(State _ pos) _ _ _ eerr ->
|
||||
pUnexpected msg = ParsecT $ \(State _ pos _) _ _ _ eerr ->
|
||||
eerr $ newErrorMessage (Unexpected msg) pos
|
||||
|
||||
pLabel :: String -> ParsecT s m a -> ParsecT s m a
|
||||
@ -489,7 +494,7 @@ pLookAhead p = ParsecT $ \s _ cerr eok eerr ->
|
||||
{-# INLINE pLookAhead #-}
|
||||
|
||||
pNotFollowedBy :: Stream s t => ParsecT s m a -> ParsecT s m ()
|
||||
pNotFollowedBy p = ParsecT $ \s@(State input pos) _ _ eok eerr ->
|
||||
pNotFollowedBy p = ParsecT $ \s@(State input pos _) _ _ eok eerr ->
|
||||
let l = maybe eoi (showToken . fst) (uncons input)
|
||||
cok' _ _ _ = eerr $ unexpectedErr l pos
|
||||
cerr' _ = eok () s mempty
|
||||
@ -498,38 +503,38 @@ pNotFollowedBy p = ParsecT $ \s@(State input pos) _ _ eok eerr ->
|
||||
in unParser p s cok' cerr' eok' eerr'
|
||||
|
||||
pEof :: Stream s t => ParsecT s m ()
|
||||
pEof = label eoi $ ParsecT $ \s@(State input pos) _ _ eok eerr ->
|
||||
pEof = label eoi $ ParsecT $ \s@(State input pos _) _ _ eok eerr ->
|
||||
case uncons input of
|
||||
Nothing -> eok () s mempty
|
||||
Just (x,_) -> eerr $ unexpectedErr (showToken x) pos
|
||||
{-# INLINE pEof #-}
|
||||
|
||||
pToken :: Stream s t =>
|
||||
(SourcePos -> t -> s -> SourcePos)
|
||||
(Int -> SourcePos -> t -> SourcePos)
|
||||
-> (t -> Either [Message] a)
|
||||
-> ParsecT s m a
|
||||
pToken nextpos test = ParsecT $ \(State input pos) cok _ _ eerr ->
|
||||
pToken nextpos test = ParsecT $ \(State input pos w) cok _ _ eerr ->
|
||||
case uncons input of
|
||||
Nothing -> eerr $ unexpectedErr eoi pos
|
||||
Just (c,cs) ->
|
||||
case test c of
|
||||
Left ms -> eerr $ foldr addErrorMessage (newErrorUnknown pos) ms
|
||||
Right x -> let newpos = nextpos pos c cs
|
||||
newstate = State cs newpos
|
||||
Right x -> let newpos = nextpos w pos c
|
||||
newstate = State cs newpos w
|
||||
in seq newpos $ seq newstate $ cok x newstate mempty
|
||||
{-# INLINE pToken #-}
|
||||
|
||||
pTokens :: Stream s t =>
|
||||
(SourcePos -> [t] -> SourcePos)
|
||||
(Int -> SourcePos -> [t] -> SourcePos)
|
||||
-> (t -> t -> Bool)
|
||||
-> [t]
|
||||
-> ParsecT s m [t]
|
||||
pTokens _ _ [] = ParsecT $ \s _ _ eok _ -> eok [] s mempty
|
||||
pTokens nextpos test tts = ParsecT $ \(State input pos) cok cerr _ eerr ->
|
||||
pTokens nextpos test tts = ParsecT $ \(State input pos w) cok cerr _ eerr ->
|
||||
let errExpect x = setErrorMessage (Expected $ showToken tts)
|
||||
(newErrorMessage (Unexpected x) pos)
|
||||
walk [] is rs = let pos' = nextpos pos tts
|
||||
s' = State rs pos'
|
||||
walk [] is rs = let pos' = nextpos w pos tts
|
||||
s' = State rs pos' w
|
||||
in cok (reverse is) s' mempty
|
||||
walk (t:ts) is rs =
|
||||
let errorCont = if null is then eerr else cerr
|
||||
@ -563,16 +568,6 @@ eoi = "end of input"
|
||||
|
||||
-- Parser state combinators
|
||||
|
||||
-- | Returns the current source position. See also 'SourcePos'.
|
||||
|
||||
getPosition :: MonadParsec s m t => m SourcePos
|
||||
getPosition = statePos <$> getParserState
|
||||
|
||||
-- | @setPosition pos@ sets the current source position to @pos@.
|
||||
|
||||
setPosition :: MonadParsec s m t => SourcePos -> m ()
|
||||
setPosition pos = updateParserState (\(State s _) -> State s pos)
|
||||
|
||||
-- | Returns the current input.
|
||||
|
||||
getInput :: MonadParsec s m t => m s
|
||||
@ -582,7 +577,29 @@ getInput = stateInput <$> getParserState
|
||||
-- @setInput@ functions can for example be used to deal with #include files.
|
||||
|
||||
setInput :: MonadParsec s m t => s -> m ()
|
||||
setInput s = updateParserState (\(State _ pos) -> State s pos)
|
||||
setInput s = updateParserState (\(State _ pos w) -> State s pos w)
|
||||
|
||||
-- | Returns the current source position. See also 'SourcePos'.
|
||||
|
||||
getPosition :: MonadParsec s m t => m SourcePos
|
||||
getPosition = statePos <$> getParserState
|
||||
|
||||
-- | @setPosition pos@ sets the current source position to @pos@.
|
||||
|
||||
setPosition :: MonadParsec s m t => SourcePos -> m ()
|
||||
setPosition pos = updateParserState (\(State s _ w) -> State s pos w)
|
||||
|
||||
-- | Returns tab width. Default tab width is equal to 'defaultTabWidth'. You
|
||||
-- can set different tab width with help of 'setTabWidth'.
|
||||
|
||||
getTabWidth :: MonadParsec s m t => m Int
|
||||
getTabWidth = stateTabWidth <$> getParserState
|
||||
|
||||
-- | Set tab width. If argument of the function is not positive number,
|
||||
-- 'defaultTabWidth' will be used.
|
||||
|
||||
setTabWidth :: MonadParsec s m t => Int -> m ()
|
||||
setTabWidth w = updateParserState (\(State s pos _) -> State s pos w)
|
||||
|
||||
-- | @setParserState st@ set the full parser state to @st@.
|
||||
|
||||
@ -649,7 +666,7 @@ runParser p name s = runIdentity $ runParserT p name s
|
||||
runParserT :: (Monad m, Stream s t) =>
|
||||
ParsecT s m a -> String -> s -> m (Either ParseError a)
|
||||
runParserT p name s = do
|
||||
res <- runParsecT p $ State s (initialPos name)
|
||||
res <- runParsecT p $ State s (initialPos name) defaultTabWidth
|
||||
r <- parserReply res
|
||||
case r of
|
||||
Ok x _ -> return $ Right x
|
||||
|
@ -155,8 +155,8 @@ prop_indentGuard l0 l1 l2 = checkParser p r s
|
||||
| otherwise = Right ()
|
||||
msg' = [msg "incorrect indentation"]
|
||||
f = length . takeWhile isSpace . getIndLine
|
||||
f' x = sourceColumn $
|
||||
updatePosString (initialPos "") $ take (f x) (getIndLine x)
|
||||
f' x = sourceColumn $ updatePosString defaultTabWidth (initialPos "") $
|
||||
take (f x) (getIndLine x)
|
||||
g xs = sum $ length . getIndLine <$> xs
|
||||
s = concat $ getIndLine <$> [l0, l1, l2]
|
||||
|
||||
|
11
tests/Pos.hs
11
tests/Pos.hs
@ -132,13 +132,14 @@ prop_setSourceColumn pos c =
|
||||
where c' = getNonNegative c
|
||||
setp = setSourceColumn pos c'
|
||||
|
||||
prop_updating :: SourcePos -> String -> Bool
|
||||
prop_updating pos "" = updatePosString pos "" == pos
|
||||
prop_updating pos s =
|
||||
prop_updating :: Int -> SourcePos -> String -> Bool
|
||||
prop_updating w pos "" = updatePosString w pos "" == pos
|
||||
prop_updating w' pos s =
|
||||
d sourceName id pos updated &&
|
||||
d sourceLine (+ inclines) pos updated &&
|
||||
cols >= mincols && ((last s /= '\t') || ((cols - 1) `rem` 8 == 0))
|
||||
where updated = updatePosString pos s
|
||||
cols >= mincols && ((last s /= '\t') || ((cols - 1) `rem` w == 0))
|
||||
where w = if w' < 1 then defaultTabWidth else w
|
||||
updated = updatePosString w' pos s
|
||||
cols = sourceColumn updated
|
||||
newlines = elemIndices '\n' s
|
||||
creturns = elemIndices '\r' s
|
||||
|
@ -90,6 +90,7 @@ tests = testGroup "Primitive parser combinators"
|
||||
, testProperty "combinator tokens" prop_tokens
|
||||
, testProperty "parser state position" prop_state_pos
|
||||
, testProperty "parser state input" prop_state_input
|
||||
, testProperty "parser state tab width" prop_state_tab
|
||||
, testProperty "parser state general" prop_state
|
||||
, testProperty "IdentityT try" prop_IdentityT_try
|
||||
, testProperty "IdentityT notFollowedBy" prop_IdentityT_notFollowedBy
|
||||
@ -101,7 +102,7 @@ tests = testGroup "Primitive parser combinators"
|
||||
, testProperty "WriterT" prop_WriterT ]
|
||||
|
||||
instance Arbitrary (State String) where
|
||||
arbitrary = State <$> arbitrary <*> arbitrary
|
||||
arbitrary = State <$> arbitrary <*> arbitrary <*> arbitrary
|
||||
|
||||
-- Functor instance
|
||||
|
||||
@ -347,11 +348,10 @@ prop_notFollowedBy_2 a' b' c' = checkParser p r s
|
||||
|
||||
prop_token :: String -> Property
|
||||
prop_token s = checkParser p r s
|
||||
where p = token nextPos testChar
|
||||
nextPos pos x _ = updatePosChar pos x
|
||||
testChar x = if isLetter x
|
||||
then Right x
|
||||
else Left . pure . Unexpected . showToken $ x
|
||||
where p = token updatePosChar testChar
|
||||
testChar x = if isLetter x
|
||||
then Right x
|
||||
else Left . pure . Unexpected . showToken $ x
|
||||
h = head s
|
||||
r | null s = posErr 0 s [uneEof]
|
||||
| isLetter h && length s == 1 = Right (head s)
|
||||
@ -379,12 +379,16 @@ prop_state_input s = p /=\ s
|
||||
guard (null st1)
|
||||
return result
|
||||
|
||||
prop_state_tab :: Int -> Property
|
||||
prop_state_tab w = p /=\ w
|
||||
where p = setTabWidth w >> getTabWidth
|
||||
|
||||
prop_state :: State String -> State String -> Property
|
||||
prop_state s1 s2 = runParser p "" "" === Right (f s2 s1)
|
||||
where f (State s1' pos) (State s2' _) = State (max s1' s2' ) pos
|
||||
where f (State s1' pos w) (State s2' _ _) = State (max s1' s2' ) pos w
|
||||
p = do
|
||||
st <- getParserState
|
||||
guard (st == State "" (initialPos ""))
|
||||
guard (st == State "" (initialPos "") defaultTabWidth)
|
||||
setParserState s1
|
||||
updateParserState (f s2)
|
||||
getParserState
|
||||
|
@ -137,7 +137,7 @@ abcRow' a b c = abcRow (fromEnum a) (fromEnum b) (fromEnum c)
|
||||
|
||||
posErr :: Int -> String -> [Message] -> Either ParseError a
|
||||
posErr pos s = Left . foldr addErrorMessage (newErrorUnknown errPos)
|
||||
where errPos = updatePosString (initialPos "") (take pos s)
|
||||
where errPos = updatePosString defaultTabWidth (initialPos "") (take pos s)
|
||||
|
||||
-- | @uneCh s@ returns message created with 'Unexpected' constructor that
|
||||
-- tells the system that char @s@ is unexpected.
|
||||
|
Loading…
Reference in New Issue
Block a user