implemented flexible tab width

Closes #38.

Now tab width can be manipulated with via the following functions:

* ‘getTabWidth’
* ‘setTabWidth’

Other auxiliary changes were performed, such as updating of
‘updatePosChar’.

This also corrects a bit obsolete descriptions of some functions.
This commit is contained in:
mrkkrp 2015-09-23 16:47:17 +06:00
parent 38abf590db
commit d0409a897e
9 changed files with 115 additions and 77 deletions

View File

@ -34,6 +34,10 @@
* Added new primitive combinator `hidden p` which hides “expected” tokens in
error message when parser `p` fails.
* Tab width is not hard-coded anymore. It can be manipulated via
`getTabWidth` and `setTabWidth`. Default tab-width is `defaultTabWidth`,
which is 8.
### Error messages
* Introduced type class `ShowToken` and improved representation of

View File

@ -139,10 +139,12 @@ module Text.Megaparsec
, Consumed (..)
, Reply (..)
, State (..)
, getPosition
, setPosition
, getInput
, setInput
, getPosition
, setPosition
, getTabWidth
, setTabWidth
, getParserState
, setParserState
, updateParserState )

View File

@ -316,11 +316,10 @@ noneOf' = noneOf . extendi
-- > oneOf cs = satisfy (`elem` cs)
satisfy :: MonadParsec s m Char => (Char -> Bool) -> m Char
satisfy f = token nextPos testChar
where nextPos pos x _ = updatePosChar pos x
testChar x = if f x
then Right x
else Left . pure . Unexpected . showToken $ x
satisfy f = token updatePosChar testChar
where testChar x = if f x
then Right x
else Left . pure . Unexpected . showToken $ x
-- | @string s@ parses a sequence of characters given by @s@. Returns
-- the parsed string (i.e. @s@).

View File

@ -24,7 +24,8 @@ module Text.Megaparsec.Pos
, newPos
, initialPos
, updatePosChar
, updatePosString )
, updatePosString
, defaultTabWidth )
where
import Data.Data (Data)
@ -88,24 +89,34 @@ setSourceLine (SourcePos n _ c) l = SourcePos n l c
setSourceColumn :: SourcePos -> Int -> SourcePos
setSourceColumn (SourcePos n l _) = SourcePos n l
-- | Update a source position given a character. If the character is a
-- newline (\'\\n\') the line number is incremented by 1. If the character
-- is carriage return (\'\\r\') the line number is unaltered, but column
-- number is reset to 1. If the character is a tab (\'\\t\') the column
-- number is incremented to the nearest 8'th column, i.e. @column + 8 -
-- ((column - 1) \`rem\` 8)@. In all other cases, the column is incremented
-- by 1.
-- | Update a source position given a character. The first argument
-- specifies tab width. If the character is a newline (\'\\n\') the line
-- number is incremented by 1. If the character is a tab (\'\\t\') the
-- column number is incremented to the nearest tab position, i.e. @column +
-- width - ((column - 1) \`rem\` width)@. In all other cases, the column is
-- incremented by 1.
--
-- If given tab width is not positive, 'defaultTabWidth' will be used.
updatePosChar :: SourcePos -> Char -> SourcePos
updatePosChar (SourcePos n l c) ch =
updatePosChar :: Int -> SourcePos -> Char -> SourcePos
updatePosChar width (SourcePos n l c) ch =
case ch of
'\n' -> SourcePos n (l + 1) 1
'\t' -> SourcePos n l (c + 8 - ((c - 1) `rem` 8))
'\t' -> let w = if width < 1 then defaultTabWidth else width
in SourcePos n l (c + w - ((c - 1) `rem` w))
_ -> SourcePos n l (c + 1)
-- | The expression @updatePosString pos s@ updates the source position
-- @pos@ by calling 'updatePosChar' on every character in @s@, i.e. @foldl
-- updatePosChar pos string@.
updatePosString :: SourcePos -> String -> SourcePos
updatePosString = foldl' updatePosChar
updatePosString :: Int -> SourcePos -> String -> SourcePos
updatePosString w = foldl' (updatePosChar w)
-- | Value of tab width used by default. This is used as fall-back by
-- 'updatePosChar' and possibly in other cases. Always prefer this constant
-- when you want to refer to default tab width because actual value /may/
-- change in future. Current value is @8@.
defaultTabWidth :: Int
defaultTabWidth = 8

View File

@ -25,10 +25,12 @@ module Text.Megaparsec.Prim
, MonadParsec (..)
, (<?>)
-- * Parser state combinators
, getPosition
, setPosition
, getInput
, setInput
, getPosition
, setPosition
, getTabWidth
, setTabWidth
, setParserState
-- * Running parser
, runParser
@ -68,8 +70,9 @@ import Text.Megaparsec.ShowToken
-- | This is Megaparsec state, it's parametrized over stream type @s@.
data State s = State
{ stateInput :: s
, statePos :: !SourcePos }
{ stateInput :: s
, statePos :: !SourcePos
, stateTabWidth :: !Int }
deriving (Show, Eq)
-- | An instance of @Stream s t@ has stream type @s@, and token type @t@
@ -318,7 +321,7 @@ instance MonadPlus (ParsecT s m) where
mplus = pPlus
pZero :: ParsecT s m a
pZero = ParsecT $ \(State _ pos) _ _ _ eerr -> eerr $ newErrorUnknown pos
pZero = ParsecT $ \(State _ pos _) _ _ _ eerr -> eerr $ newErrorUnknown pos
pPlus :: ParsecT s m a -> ParsecT s m a -> ParsecT s m a
pPlus m n = ParsecT $ \s cok cerr eok eerr ->
@ -414,35 +417,37 @@ class (A.Alternative m, Monad m, Stream s t) =>
eof :: m ()
-- | The parser @token nextPos testTok@ accepts a token @t@ with result
-- @x@ when the function @testTok t@ returns @'Just' x@. The position of
-- @x@ when the function @testTok t@ returns @'Right' x@. The position of
-- the /next/ token should be returned when @nextPos@ is called with the
-- current source position @pos@, the current token @t@ and the rest of
-- the tokens @toks@, @nextPos pos t toks@.
-- tab width, current source position, and the current token.
--
-- This is the most primitive combinator for accepting tokens. For
-- example, the 'Text.Megaparsec.Char.char' parser could be implemented
-- as:
--
-- > char c = token nextPos testChar
-- > where testChar x = if x == c then Just x else Nothing
-- > nextPos pos x xs = updatePosChar pos x
-- > char c = token updatePosChar testChar
-- > where testChar x = if x == c
-- > then Right x
-- > else Left . pure . Unexpected . showToken $ x
token :: (SourcePos -> t -> s -> SourcePos) -- ^ Next position calculating function.
-> (t -> Either [Message] a) -- ^ Matching function for the token to parse.
token :: (Int -> SourcePos -> t -> SourcePos) -- ^ Next position calculating function
-> (t -> Either [Message] a) -- ^ Matching function for the token to parse
-> m a
-- | The parser @tokens posFromTok test@ parses list of tokens and returns
-- it. The resulting parser will use 'showToken' to pretty-print the
-- collection of tokens. Supplied predicate @test@ is used to check
-- equality of given and parsed tokens.
-- it. @posFromTok@ is called with three arguments: tab width, initial
-- position, and collection of tokens to parse. The resulting parser will
-- use 'showToken' to pretty-print the collection of tokens in error
-- messages. Supplied predicate @test@ is used to check equality of given
-- and parsed tokens.
--
-- This can be used to example to write 'Text.Megaparsec.Char.string':
--
-- > string = tokens updatePosString (==)
tokens :: Eq t =>
(SourcePos -> [t] -> SourcePos) -- ^ Computes position of tokens.
-> (t -> t -> Bool) -- ^ Predicate to check equality of tokens.
(Int -> SourcePos -> [t] -> SourcePos) -- ^ Computes position of tokens
-> (t -> t -> Bool) -- ^ Predicate to check equality of tokens
-> [t] -- ^ List of tokens to parse
-> m [t]
@ -467,7 +472,7 @@ instance Stream s t => MonadParsec s (ParsecT s m) t where
updateParserState = pUpdateParserState
pUnexpected :: String -> ParsecT s m a
pUnexpected msg = ParsecT $ \(State _ pos) _ _ _ eerr ->
pUnexpected msg = ParsecT $ \(State _ pos _) _ _ _ eerr ->
eerr $ newErrorMessage (Unexpected msg) pos
pLabel :: String -> ParsecT s m a -> ParsecT s m a
@ -489,7 +494,7 @@ pLookAhead p = ParsecT $ \s _ cerr eok eerr ->
{-# INLINE pLookAhead #-}
pNotFollowedBy :: Stream s t => ParsecT s m a -> ParsecT s m ()
pNotFollowedBy p = ParsecT $ \s@(State input pos) _ _ eok eerr ->
pNotFollowedBy p = ParsecT $ \s@(State input pos _) _ _ eok eerr ->
let l = maybe eoi (showToken . fst) (uncons input)
cok' _ _ _ = eerr $ unexpectedErr l pos
cerr' _ = eok () s mempty
@ -498,38 +503,38 @@ pNotFollowedBy p = ParsecT $ \s@(State input pos) _ _ eok eerr ->
in unParser p s cok' cerr' eok' eerr'
pEof :: Stream s t => ParsecT s m ()
pEof = label eoi $ ParsecT $ \s@(State input pos) _ _ eok eerr ->
pEof = label eoi $ ParsecT $ \s@(State input pos _) _ _ eok eerr ->
case uncons input of
Nothing -> eok () s mempty
Just (x,_) -> eerr $ unexpectedErr (showToken x) pos
{-# INLINE pEof #-}
pToken :: Stream s t =>
(SourcePos -> t -> s -> SourcePos)
(Int -> SourcePos -> t -> SourcePos)
-> (t -> Either [Message] a)
-> ParsecT s m a
pToken nextpos test = ParsecT $ \(State input pos) cok _ _ eerr ->
pToken nextpos test = ParsecT $ \(State input pos w) cok _ _ eerr ->
case uncons input of
Nothing -> eerr $ unexpectedErr eoi pos
Just (c,cs) ->
case test c of
Left ms -> eerr $ foldr addErrorMessage (newErrorUnknown pos) ms
Right x -> let newpos = nextpos pos c cs
newstate = State cs newpos
Right x -> let newpos = nextpos w pos c
newstate = State cs newpos w
in seq newpos $ seq newstate $ cok x newstate mempty
{-# INLINE pToken #-}
pTokens :: Stream s t =>
(SourcePos -> [t] -> SourcePos)
(Int -> SourcePos -> [t] -> SourcePos)
-> (t -> t -> Bool)
-> [t]
-> ParsecT s m [t]
pTokens _ _ [] = ParsecT $ \s _ _ eok _ -> eok [] s mempty
pTokens nextpos test tts = ParsecT $ \(State input pos) cok cerr _ eerr ->
pTokens nextpos test tts = ParsecT $ \(State input pos w) cok cerr _ eerr ->
let errExpect x = setErrorMessage (Expected $ showToken tts)
(newErrorMessage (Unexpected x) pos)
walk [] is rs = let pos' = nextpos pos tts
s' = State rs pos'
walk [] is rs = let pos' = nextpos w pos tts
s' = State rs pos' w
in cok (reverse is) s' mempty
walk (t:ts) is rs =
let errorCont = if null is then eerr else cerr
@ -563,16 +568,6 @@ eoi = "end of input"
-- Parser state combinators
-- | Returns the current source position. See also 'SourcePos'.
getPosition :: MonadParsec s m t => m SourcePos
getPosition = statePos <$> getParserState
-- | @setPosition pos@ sets the current source position to @pos@.
setPosition :: MonadParsec s m t => SourcePos -> m ()
setPosition pos = updateParserState (\(State s _) -> State s pos)
-- | Returns the current input.
getInput :: MonadParsec s m t => m s
@ -582,7 +577,29 @@ getInput = stateInput <$> getParserState
-- @setInput@ functions can for example be used to deal with #include files.
setInput :: MonadParsec s m t => s -> m ()
setInput s = updateParserState (\(State _ pos) -> State s pos)
setInput s = updateParserState (\(State _ pos w) -> State s pos w)
-- | Returns the current source position. See also 'SourcePos'.
getPosition :: MonadParsec s m t => m SourcePos
getPosition = statePos <$> getParserState
-- | @setPosition pos@ sets the current source position to @pos@.
setPosition :: MonadParsec s m t => SourcePos -> m ()
setPosition pos = updateParserState (\(State s _ w) -> State s pos w)
-- | Returns tab width. Default tab width is equal to 'defaultTabWidth'. You
-- can set different tab width with help of 'setTabWidth'.
getTabWidth :: MonadParsec s m t => m Int
getTabWidth = stateTabWidth <$> getParserState
-- | Set tab width. If argument of the function is not positive number,
-- 'defaultTabWidth' will be used.
setTabWidth :: MonadParsec s m t => Int -> m ()
setTabWidth w = updateParserState (\(State s pos _) -> State s pos w)
-- | @setParserState st@ set the full parser state to @st@.
@ -649,7 +666,7 @@ runParser p name s = runIdentity $ runParserT p name s
runParserT :: (Monad m, Stream s t) =>
ParsecT s m a -> String -> s -> m (Either ParseError a)
runParserT p name s = do
res <- runParsecT p $ State s (initialPos name)
res <- runParsecT p $ State s (initialPos name) defaultTabWidth
r <- parserReply res
case r of
Ok x _ -> return $ Right x

View File

@ -155,8 +155,8 @@ prop_indentGuard l0 l1 l2 = checkParser p r s
| otherwise = Right ()
msg' = [msg "incorrect indentation"]
f = length . takeWhile isSpace . getIndLine
f' x = sourceColumn $
updatePosString (initialPos "") $ take (f x) (getIndLine x)
f' x = sourceColumn $ updatePosString defaultTabWidth (initialPos "") $
take (f x) (getIndLine x)
g xs = sum $ length . getIndLine <$> xs
s = concat $ getIndLine <$> [l0, l1, l2]

View File

@ -132,13 +132,14 @@ prop_setSourceColumn pos c =
where c' = getNonNegative c
setp = setSourceColumn pos c'
prop_updating :: SourcePos -> String -> Bool
prop_updating pos "" = updatePosString pos "" == pos
prop_updating pos s =
prop_updating :: Int -> SourcePos -> String -> Bool
prop_updating w pos "" = updatePosString w pos "" == pos
prop_updating w' pos s =
d sourceName id pos updated &&
d sourceLine (+ inclines) pos updated &&
cols >= mincols && ((last s /= '\t') || ((cols - 1) `rem` 8 == 0))
where updated = updatePosString pos s
cols >= mincols && ((last s /= '\t') || ((cols - 1) `rem` w == 0))
where w = if w' < 1 then defaultTabWidth else w
updated = updatePosString w' pos s
cols = sourceColumn updated
newlines = elemIndices '\n' s
creturns = elemIndices '\r' s

View File

@ -90,6 +90,7 @@ tests = testGroup "Primitive parser combinators"
, testProperty "combinator tokens" prop_tokens
, testProperty "parser state position" prop_state_pos
, testProperty "parser state input" prop_state_input
, testProperty "parser state tab width" prop_state_tab
, testProperty "parser state general" prop_state
, testProperty "IdentityT try" prop_IdentityT_try
, testProperty "IdentityT notFollowedBy" prop_IdentityT_notFollowedBy
@ -101,7 +102,7 @@ tests = testGroup "Primitive parser combinators"
, testProperty "WriterT" prop_WriterT ]
instance Arbitrary (State String) where
arbitrary = State <$> arbitrary <*> arbitrary
arbitrary = State <$> arbitrary <*> arbitrary <*> arbitrary
-- Functor instance
@ -347,11 +348,10 @@ prop_notFollowedBy_2 a' b' c' = checkParser p r s
prop_token :: String -> Property
prop_token s = checkParser p r s
where p = token nextPos testChar
nextPos pos x _ = updatePosChar pos x
testChar x = if isLetter x
then Right x
else Left . pure . Unexpected . showToken $ x
where p = token updatePosChar testChar
testChar x = if isLetter x
then Right x
else Left . pure . Unexpected . showToken $ x
h = head s
r | null s = posErr 0 s [uneEof]
| isLetter h && length s == 1 = Right (head s)
@ -379,12 +379,16 @@ prop_state_input s = p /=\ s
guard (null st1)
return result
prop_state_tab :: Int -> Property
prop_state_tab w = p /=\ w
where p = setTabWidth w >> getTabWidth
prop_state :: State String -> State String -> Property
prop_state s1 s2 = runParser p "" "" === Right (f s2 s1)
where f (State s1' pos) (State s2' _) = State (max s1' s2' ) pos
where f (State s1' pos w) (State s2' _ _) = State (max s1' s2' ) pos w
p = do
st <- getParserState
guard (st == State "" (initialPos ""))
guard (st == State "" (initialPos "") defaultTabWidth)
setParserState s1
updateParserState (f s2)
getParserState

View File

@ -137,7 +137,7 @@ abcRow' a b c = abcRow (fromEnum a) (fromEnum b) (fromEnum c)
posErr :: Int -> String -> [Message] -> Either ParseError a
posErr pos s = Left . foldr addErrorMessage (newErrorUnknown errPos)
where errPos = updatePosString (initialPos "") (take pos s)
where errPos = updatePosString defaultTabWidth (initialPos "") (take pos s)
-- | @uneCh s@ returns message created with 'Unexpected' constructor that
-- tells the system that char @s@ is unexpected.