mirror of
https://github.com/mrkkrp/megaparsec.git
synced 2024-11-26 23:02:07 +03:00
Add ‘hspace’ and ‘hspace1’ functions
These functions are slightly more specific versions of the existing ‘space’ functions. They accept only “horizontal whitespace”.
This commit is contained in:
parent
8c7acbf376
commit
46f276a0cc
@ -1,3 +1,8 @@
|
||||
## Megaparsec 8.1.0
|
||||
|
||||
* Added the functions `hspace` and `hspace1` to the `Text.Megaparsec.Char`
|
||||
and `Text.Megaparsec.Byte` modules.
|
||||
|
||||
## Megaparsec 8.0.0
|
||||
|
||||
* The methods `failure` and `fancyFailure` of `MonadParsec` are now ordinary
|
||||
|
@ -20,7 +20,9 @@ module Text.Megaparsec.Byte
|
||||
eol,
|
||||
tab,
|
||||
space,
|
||||
hspace,
|
||||
space1,
|
||||
hspace1,
|
||||
|
||||
-- * Categories of characters
|
||||
controlChar,
|
||||
@ -47,7 +49,7 @@ module Text.Megaparsec.Byte
|
||||
where
|
||||
|
||||
import Control.Applicative
|
||||
import Data.Char hiding (toLower, toUpper)
|
||||
import Data.Char hiding (isSpace, toLower, toUpper)
|
||||
import Data.Functor (void)
|
||||
import Data.Proxy
|
||||
import Data.Word (Word8)
|
||||
@ -86,16 +88,30 @@ tab = char 9
|
||||
--
|
||||
-- See also: 'skipMany' and 'spaceChar'.
|
||||
space :: (MonadParsec e s m, Token s ~ Word8) => m ()
|
||||
space = void $ takeWhileP (Just "white space") isSpace'
|
||||
space = void $ takeWhileP (Just "white space") isSpace
|
||||
{-# INLINE space #-}
|
||||
|
||||
-- | Like 'space', but does not accept newlines and carriage returns.
|
||||
--
|
||||
-- @since 8.1.0
|
||||
hspace :: (MonadParsec e s m, Token s ~ Word8) => m ()
|
||||
hspace = void $ takeWhileP (Just "white space") isHSpace
|
||||
{-# INLINE hspace #-}
|
||||
|
||||
-- | Skip /one/ or more white space characters.
|
||||
--
|
||||
-- See also: 'skipSome' and 'spaceChar'.
|
||||
space1 :: (MonadParsec e s m, Token s ~ Word8) => m ()
|
||||
space1 = void $ takeWhile1P (Just "white space") isSpace'
|
||||
space1 = void $ takeWhile1P (Just "white space") isSpace
|
||||
{-# INLINE space1 #-}
|
||||
|
||||
-- | Like 'space1', but does not accept newlines and carriage returns.
|
||||
--
|
||||
-- @since 8.1.0
|
||||
hspace1 :: (MonadParsec e s m, Token s ~ Word8) => m ()
|
||||
hspace1 = void $ takeWhile1P (Just "white space") isHSpace
|
||||
{-# INLINE hspace1 #-}
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Categories of characters
|
||||
|
||||
@ -107,7 +123,7 @@ controlChar = satisfy (isControl . toChar) <?> "control character"
|
||||
-- | Parse a space character, and the control characters: tab, newline,
|
||||
-- carriage return, form feed, and vertical tab.
|
||||
spaceChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
|
||||
spaceChar = satisfy isSpace' <?> "white space"
|
||||
spaceChar = satisfy isSpace <?> "white space"
|
||||
{-# INLINE spaceChar #-}
|
||||
|
||||
-- | Parse an upper-case character.
|
||||
@ -201,14 +217,25 @@ char' c =
|
||||
----------------------------------------------------------------------------
|
||||
-- Helpers
|
||||
|
||||
-- | 'Word8'-specialized version of 'isSpace'.
|
||||
isSpace' :: Word8 -> Bool
|
||||
isSpace' x
|
||||
-- | 'Word8'-specialized version of 'Data.Char.isSpace'.
|
||||
isSpace :: Word8 -> Bool
|
||||
isSpace x
|
||||
| x >= 9 && x <= 13 = True
|
||||
| x == 32 = True
|
||||
| x == 160 = True
|
||||
| otherwise = False
|
||||
{-# INLINE isSpace' #-}
|
||||
{-# INLINE isSpace #-}
|
||||
|
||||
-- | Like 'isSpace', but does not accept newlines and carriage returns.
|
||||
isHSpace :: Word8 -> Bool
|
||||
isHSpace x
|
||||
| x == 9 = True
|
||||
| x == 11 = True
|
||||
| x == 12 = True
|
||||
| x == 32 = True
|
||||
| x == 160 = True
|
||||
| otherwise = False
|
||||
{-# INLINE isHSpace #-}
|
||||
|
||||
-- | Convert a byte to char.
|
||||
toChar :: Word8 -> Char
|
||||
|
@ -22,7 +22,9 @@ module Text.Megaparsec.Char
|
||||
eol,
|
||||
tab,
|
||||
space,
|
||||
hspace,
|
||||
space1,
|
||||
hspace1,
|
||||
|
||||
-- * Categories of characters
|
||||
controlChar,
|
||||
@ -98,6 +100,13 @@ space :: (MonadParsec e s m, Token s ~ Char) => m ()
|
||||
space = void $ takeWhileP (Just "white space") isSpace
|
||||
{-# INLINE space #-}
|
||||
|
||||
-- | Like 'space', but does not accept newlines and carriage returns.
|
||||
--
|
||||
-- @since 8.1.0
|
||||
hspace :: (MonadParsec e s m, Token s ~ Char) => m ()
|
||||
hspace = void $ takeWhileP (Just "white space") isHSpace
|
||||
{-# INLINE hspace #-}
|
||||
|
||||
-- | Skip /one/ or more white space characters.
|
||||
--
|
||||
-- See also: 'skipSome' and 'spaceChar'.
|
||||
@ -107,6 +116,13 @@ space1 :: (MonadParsec e s m, Token s ~ Char) => m ()
|
||||
space1 = void $ takeWhile1P (Just "white space") isSpace
|
||||
{-# INLINE space1 #-}
|
||||
|
||||
-- | Like 'space1', but does not accept newlines and carriage returns.
|
||||
--
|
||||
-- @since 8.1.0
|
||||
hspace1 :: (MonadParsec e s m, Token s ~ Char) => m ()
|
||||
hspace1 = void $ takeWhile1P (Just "white space") isHSpace
|
||||
{-# INLINE hspace1 #-}
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Categories of characters
|
||||
|
||||
@ -291,3 +307,10 @@ char' c =
|
||||
char (toTitle c)
|
||||
]
|
||||
{-# INLINE char' #-}
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Helpers
|
||||
|
||||
-- | Is it a horizontal space character?
|
||||
isHSpace :: Char -> Bool
|
||||
isHSpace x = isSpace x && x /= '\n' && x /= '\r'
|
||||
|
@ -256,7 +256,8 @@ indentBlock sc r = do
|
||||
pos <- C.eol *> indentGuard sc GT ref
|
||||
let lvl = fromMaybe pos indent
|
||||
x <-
|
||||
if | pos <= ref -> incorrectIndent GT ref pos
|
||||
if
|
||||
| pos <= ref -> incorrectIndent GT ref pos
|
||||
| pos == lvl -> p
|
||||
| otherwise -> incorrectIndent EQ lvl pos
|
||||
xs <- indentedItems ref lvl sc p
|
||||
@ -285,7 +286,8 @@ indentedItems ref lvl sc p = go
|
||||
if done
|
||||
then return []
|
||||
else
|
||||
if | pos <= ref -> return []
|
||||
if
|
||||
| pos <= ref -> return []
|
||||
| pos == lvl -> (:) <$> p <*> go
|
||||
| otherwise -> incorrectIndent EQ lvl pos
|
||||
|
||||
|
@ -367,7 +367,8 @@ reachOffset'
|
||||
let SourcePos n l c = apos
|
||||
c' = unPos c
|
||||
w = unPos pstateTabWidth
|
||||
in if | ch == newlineTok ->
|
||||
in if
|
||||
| ch == newlineTok ->
|
||||
St
|
||||
(SourcePos n (l <> pos1) pos1)
|
||||
id
|
||||
@ -417,7 +418,8 @@ reachOffsetNoLine'
|
||||
go (SourcePos n l c) ch =
|
||||
let c' = unPos c
|
||||
w = unPos pstateTabWidth
|
||||
in if | ch == newlineTok ->
|
||||
in if
|
||||
| ch == newlineTok ->
|
||||
SourcePos n (l <> pos1) pos1
|
||||
| ch == tabTok ->
|
||||
SourcePos n l (mkPos $ c' + w - ((c' - 1) `rem` w))
|
||||
|
@ -6,7 +6,7 @@ module Text.Megaparsec.ByteSpec (spec) where
|
||||
import Control.Monad
|
||||
import Data.ByteString (ByteString)
|
||||
import qualified Data.ByteString as B
|
||||
import Data.Char
|
||||
import Data.Char hiding (isSpace)
|
||||
import Data.Maybe (fromMaybe)
|
||||
import Data.Void
|
||||
import Data.Word (Word8)
|
||||
@ -82,17 +82,26 @@ spec = do
|
||||
$ it "consumes space up to first non-space character"
|
||||
$ property
|
||||
$ \s' -> do
|
||||
let (s0, s1) = B.partition isSpace' s'
|
||||
let (s0, s1) = B.partition isSpace s'
|
||||
s = s0 <> s1
|
||||
prs space s `shouldParse` ()
|
||||
prs' space s `succeedsLeaving` s1
|
||||
|
||||
describe "hspace"
|
||||
$ it "consumes space up to first non-space character"
|
||||
$ property
|
||||
$ \s' -> do
|
||||
let (s0, s1) = B.partition isHSpace s'
|
||||
s = s0 <> s1
|
||||
prs hspace s `shouldParse` ()
|
||||
prs' hspace s `succeedsLeaving` s1
|
||||
|
||||
describe "space1" $ do
|
||||
context "when stream does not start with a space character"
|
||||
$ it "signals correct parse error"
|
||||
$ property
|
||||
$ \ch s' -> not (isSpace' ch) ==> do
|
||||
let (s0, s1) = B.partition isSpace' s'
|
||||
$ \ch s' -> not (isSpace ch) ==> do
|
||||
let (s0, s1) = B.partition isSpace s'
|
||||
s = B.singleton ch <> s0 <> s1
|
||||
prs space1 s `shouldFailWith` err 0 (utok ch <> elabel "white space")
|
||||
prs' space1 s `failsLeaving` s
|
||||
@ -100,7 +109,7 @@ spec = do
|
||||
$ it "consumes space up to first non-space character"
|
||||
$ property
|
||||
$ \s' -> do
|
||||
let (s0, s1) = B.partition isSpace' s'
|
||||
let (s0, s1) = B.partition isSpace s'
|
||||
s = " " <> s0 <> s1
|
||||
prs space1 s `shouldParse` ()
|
||||
prs' space1 s `succeedsLeaving` s1
|
||||
@ -108,6 +117,27 @@ spec = do
|
||||
$ it "signals correct parse error"
|
||||
$ prs space1 "" `shouldFailWith` err 0 (ueof <> elabel "white space")
|
||||
|
||||
describe "hspace1" $ do
|
||||
context "when stream does not start with a space character"
|
||||
$ it "signals correct parse error"
|
||||
$ property
|
||||
$ \ch s' -> not (isSpace ch) ==> do
|
||||
let (s0, s1) = B.partition isHSpace s'
|
||||
s = B.singleton ch <> s0 <> s1
|
||||
prs hspace1 s `shouldFailWith` err 0 (utok ch <> elabel "white space")
|
||||
prs' hspace1 s `failsLeaving` s
|
||||
context "when stream starts with a space character"
|
||||
$ it "consumes space up to first non-space character"
|
||||
$ property
|
||||
$ \s' -> do
|
||||
let (s0, s1) = B.partition isHSpace s'
|
||||
s = " " <> s0 <> s1
|
||||
prs hspace1 s `shouldParse` ()
|
||||
prs' hspace1 s `succeedsLeaving` s1
|
||||
context "when stream is empty"
|
||||
$ it "signals correct parse error"
|
||||
$ prs hspace1 "" `shouldFailWith` err 0 (ueof <> elabel "white space")
|
||||
|
||||
describe "controlChar" $
|
||||
checkCharPred "control character" (isControl . toChar) controlChar
|
||||
|
||||
@ -234,14 +264,24 @@ prs' ::
|
||||
(State ByteString Void, Either (ParseErrorBundle ByteString Void) a)
|
||||
prs' p s = runParser' p (initialState s)
|
||||
|
||||
-- | 'Word8'-specialized version of 'isSpace'.
|
||||
isSpace' :: Word8 -> Bool
|
||||
isSpace' x
|
||||
-- | 'Word8'-specialized version of 'Data.Char.isSpace'.
|
||||
isSpace :: Word8 -> Bool
|
||||
isSpace x
|
||||
| x >= 9 && x <= 13 = True
|
||||
| x == 32 = True
|
||||
| x == 160 = True
|
||||
| otherwise = False
|
||||
|
||||
-- | Like 'isSpace', but does not accept newlines and carriage returns.
|
||||
isHSpace :: Word8 -> Bool
|
||||
isHSpace x
|
||||
| x == 9 = True
|
||||
| x == 11 = True
|
||||
| x == 12 = True
|
||||
| x == 32 = True
|
||||
| x == 160 = True
|
||||
| otherwise = False
|
||||
|
||||
-- | Lift char transformation to byte transformation.
|
||||
liftChar :: (Char -> Char) -> Word8 -> Word8
|
||||
liftChar f x = (fromMaybe x . fromChar . f . toChar) x
|
||||
|
@ -131,7 +131,8 @@ spec = do
|
||||
>>= \x -> sp >> ip EQ x >> sp >> ip GT x >> sp >> scn
|
||||
ip = indentGuard scn
|
||||
sp = void (symbol sc sbla <* C.eol)
|
||||
if | col0 <= pos1 ->
|
||||
if
|
||||
| col0 <= pos1 ->
|
||||
prs p s `shouldFailWith` errFancy 0 (ii GT pos1 col0)
|
||||
| col1 /= col0 ->
|
||||
prs p s `shouldFailWith` errFancy (getIndent l1 + g 1) (ii EQ col0 col1)
|
||||
@ -178,7 +179,8 @@ spec = do
|
||||
b = symbol sc
|
||||
l x = return . (x,)
|
||||
ib' = mkPos (fromIntegral ib)
|
||||
if | col1 <= col0 ->
|
||||
if
|
||||
| col1 <= col0 ->
|
||||
prs p s
|
||||
`shouldFailWith` err (getIndent l1 + g 1) (utok (head sblb) <> eeof)
|
||||
| isJust mn && col1 /= ib' ->
|
||||
@ -282,7 +284,8 @@ spec = do
|
||||
s = concat fragments
|
||||
(col0, col1, col2) = (getCol l0, getCol l1, getCol l2)
|
||||
(end0, end1) = (getEnd l0, getEnd l1)
|
||||
if | end0 && col1 <= col0 ->
|
||||
if
|
||||
| end0 && col1 <= col0 ->
|
||||
prs p s
|
||||
`shouldFailWith` errFancy (getIndent l1 + g 1) (ii GT col0 col1)
|
||||
| end1 && col2 <= col0 ->
|
||||
|
@ -84,6 +84,15 @@ spec = do
|
||||
prs space s `shouldParse` ()
|
||||
prs' space s `succeedsLeaving` s1
|
||||
|
||||
describe "hspace"
|
||||
$ it "consumes space up to first non-space character"
|
||||
$ property
|
||||
$ \s' -> do
|
||||
let (s0, s1) = partition isHSpace s'
|
||||
s = s0 ++ s1
|
||||
prs hspace s `shouldParse` ()
|
||||
prs' hspace s `succeedsLeaving` s1
|
||||
|
||||
describe "space1" $ do
|
||||
context "when stream does not start with a space character"
|
||||
$ it "signals correct parse error"
|
||||
@ -105,6 +114,27 @@ spec = do
|
||||
$ it "signals correct parse error"
|
||||
$ prs space1 "" `shouldFailWith` err 0 (ueof <> elabel "white space")
|
||||
|
||||
describe "hspace1" $ do
|
||||
context "when stream does not start with a space character"
|
||||
$ it "signals correct parse error"
|
||||
$ property
|
||||
$ \ch s' -> not (isHSpace ch) ==> do
|
||||
let (s0, s1) = partition isHSpace s'
|
||||
s = ch : s0 ++ s1
|
||||
prs hspace1 s `shouldFailWith` err 0 (utok ch <> elabel "white space")
|
||||
prs' hspace1 s `failsLeaving` s
|
||||
context "when stream starts with a space character"
|
||||
$ it "consumes space up to first non-space character"
|
||||
$ property
|
||||
$ \s' -> do
|
||||
let (s0, s1) = partition isHSpace s'
|
||||
s = ' ' : s0 ++ s1
|
||||
prs hspace1 s `shouldParse` ()
|
||||
prs' hspace1 s `succeedsLeaving` s1
|
||||
context "when stream is empty"
|
||||
$ it "signals correct parse error"
|
||||
$ prs hspace1 "" `shouldFailWith` err 0 (ueof <> elabel "white space")
|
||||
|
||||
describe "controlChar" $
|
||||
checkCharPred "control character" isControl controlChar
|
||||
|
||||
@ -374,3 +404,7 @@ casei x y =
|
||||
x == toLower y
|
||||
|| x == toUpper y
|
||||
|| x == toTitle y
|
||||
|
||||
-- | Is it a horizontal space character?
|
||||
isHSpace :: Char -> Bool
|
||||
isHSpace x = isSpace x && x /= '\n' && x /= '\r'
|
||||
|
@ -1410,7 +1410,7 @@ spec = do
|
||||
runParser' p st `shouldBe` (st', Right stateInput)
|
||||
|
||||
describe "atEnd" $ do
|
||||
let p , p' :: Parser Bool
|
||||
let p, p' :: Parser Bool
|
||||
p = atEnd
|
||||
p' = p <* empty
|
||||
context "when stream is empty" $ do
|
||||
|
Loading…
Reference in New Issue
Block a user