Add ‘hspace’ and ‘hspace1’ functions

These functions are slightly more specific versions of the existing ‘space’
functions. They accept only “horizontal whitespace”.
This commit is contained in:
Mark Karpov 2020-05-10 22:03:56 +02:00
parent 8c7acbf376
commit 46f276a0cc
9 changed files with 160 additions and 24 deletions

View File

@ -1,3 +1,8 @@
## Megaparsec 8.1.0
* Added the functions `hspace` and `hspace1` to the `Text.Megaparsec.Char`
and `Text.Megaparsec.Byte` modules.
## Megaparsec 8.0.0
* The methods `failure` and `fancyFailure` of `MonadParsec` are now ordinary

View File

@ -20,7 +20,9 @@ module Text.Megaparsec.Byte
eol,
tab,
space,
hspace,
space1,
hspace1,
-- * Categories of characters
controlChar,
@ -47,7 +49,7 @@ module Text.Megaparsec.Byte
where
import Control.Applicative
import Data.Char hiding (toLower, toUpper)
import Data.Char hiding (isSpace, toLower, toUpper)
import Data.Functor (void)
import Data.Proxy
import Data.Word (Word8)
@ -86,16 +88,30 @@ tab = char 9
--
-- See also: 'skipMany' and 'spaceChar'.
space :: (MonadParsec e s m, Token s ~ Word8) => m ()
space = void $ takeWhileP (Just "white space") isSpace'
space = void $ takeWhileP (Just "white space") isSpace
{-# INLINE space #-}
-- | Like 'space', but does not accept newlines and carriage returns.
--
-- @since 8.1.0
hspace :: (MonadParsec e s m, Token s ~ Word8) => m ()
hspace = void $ takeWhileP (Just "white space") isHSpace
{-# INLINE hspace #-}
-- | Skip /one/ or more white space characters.
--
-- See also: 'skipSome' and 'spaceChar'.
space1 :: (MonadParsec e s m, Token s ~ Word8) => m ()
space1 = void $ takeWhile1P (Just "white space") isSpace'
space1 = void $ takeWhile1P (Just "white space") isSpace
{-# INLINE space1 #-}
-- | Like 'space1', but does not accept newlines and carriage returns.
--
-- @since 8.1.0
hspace1 :: (MonadParsec e s m, Token s ~ Word8) => m ()
hspace1 = void $ takeWhile1P (Just "white space") isHSpace
{-# INLINE hspace1 #-}
----------------------------------------------------------------------------
-- Categories of characters
@ -107,7 +123,7 @@ controlChar = satisfy (isControl . toChar) <?> "control character"
-- | Parse a space character, and the control characters: tab, newline,
-- carriage return, form feed, and vertical tab.
spaceChar :: (MonadParsec e s m, Token s ~ Word8) => m (Token s)
spaceChar = satisfy isSpace' <?> "white space"
spaceChar = satisfy isSpace <?> "white space"
{-# INLINE spaceChar #-}
-- | Parse an upper-case character.
@ -201,14 +217,25 @@ char' c =
----------------------------------------------------------------------------
-- Helpers
-- | 'Word8'-specialized version of 'isSpace'.
isSpace' :: Word8 -> Bool
isSpace' x
-- | 'Word8'-specialized version of 'Data.Char.isSpace'.
isSpace :: Word8 -> Bool
isSpace x
| x >= 9 && x <= 13 = True
| x == 32 = True
| x == 160 = True
| otherwise = False
{-# INLINE isSpace' #-}
{-# INLINE isSpace #-}
-- | Like 'isSpace', but does not accept newlines and carriage returns.
isHSpace :: Word8 -> Bool
isHSpace x
| x == 9 = True
| x == 11 = True
| x == 12 = True
| x == 32 = True
| x == 160 = True
| otherwise = False
{-# INLINE isHSpace #-}
-- | Convert a byte to char.
toChar :: Word8 -> Char

View File

@ -22,7 +22,9 @@ module Text.Megaparsec.Char
eol,
tab,
space,
hspace,
space1,
hspace1,
-- * Categories of characters
controlChar,
@ -98,6 +100,13 @@ space :: (MonadParsec e s m, Token s ~ Char) => m ()
space = void $ takeWhileP (Just "white space") isSpace
{-# INLINE space #-}
-- | Like 'space', but does not accept newlines and carriage returns.
--
-- @since 8.1.0
hspace :: (MonadParsec e s m, Token s ~ Char) => m ()
hspace = void $ takeWhileP (Just "white space") isHSpace
{-# INLINE hspace #-}
-- | Skip /one/ or more white space characters.
--
-- See also: 'skipSome' and 'spaceChar'.
@ -107,6 +116,13 @@ space1 :: (MonadParsec e s m, Token s ~ Char) => m ()
space1 = void $ takeWhile1P (Just "white space") isSpace
{-# INLINE space1 #-}
-- | Like 'space1', but does not accept newlines and carriage returns.
--
-- @since 8.1.0
hspace1 :: (MonadParsec e s m, Token s ~ Char) => m ()
hspace1 = void $ takeWhile1P (Just "white space") isHSpace
{-# INLINE hspace1 #-}
----------------------------------------------------------------------------
-- Categories of characters
@ -291,3 +307,10 @@ char' c =
char (toTitle c)
]
{-# INLINE char' #-}
----------------------------------------------------------------------------
-- Helpers
-- | Is it a horizontal space character?
isHSpace :: Char -> Bool
isHSpace x = isSpace x && x /= '\n' && x /= '\r'

View File

@ -256,7 +256,8 @@ indentBlock sc r = do
pos <- C.eol *> indentGuard sc GT ref
let lvl = fromMaybe pos indent
x <-
if | pos <= ref -> incorrectIndent GT ref pos
if
| pos <= ref -> incorrectIndent GT ref pos
| pos == lvl -> p
| otherwise -> incorrectIndent EQ lvl pos
xs <- indentedItems ref lvl sc p
@ -285,7 +286,8 @@ indentedItems ref lvl sc p = go
if done
then return []
else
if | pos <= ref -> return []
if
| pos <= ref -> return []
| pos == lvl -> (:) <$> p <*> go
| otherwise -> incorrectIndent EQ lvl pos

View File

@ -367,7 +367,8 @@ reachOffset'
let SourcePos n l c = apos
c' = unPos c
w = unPos pstateTabWidth
in if | ch == newlineTok ->
in if
| ch == newlineTok ->
St
(SourcePos n (l <> pos1) pos1)
id
@ -417,7 +418,8 @@ reachOffsetNoLine'
go (SourcePos n l c) ch =
let c' = unPos c
w = unPos pstateTabWidth
in if | ch == newlineTok ->
in if
| ch == newlineTok ->
SourcePos n (l <> pos1) pos1
| ch == tabTok ->
SourcePos n l (mkPos $ c' + w - ((c' - 1) `rem` w))

View File

@ -6,7 +6,7 @@ module Text.Megaparsec.ByteSpec (spec) where
import Control.Monad
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import Data.Char
import Data.Char hiding (isSpace)
import Data.Maybe (fromMaybe)
import Data.Void
import Data.Word (Word8)
@ -82,17 +82,26 @@ spec = do
$ it "consumes space up to first non-space character"
$ property
$ \s' -> do
let (s0, s1) = B.partition isSpace' s'
let (s0, s1) = B.partition isSpace s'
s = s0 <> s1
prs space s `shouldParse` ()
prs' space s `succeedsLeaving` s1
describe "hspace"
$ it "consumes space up to first non-space character"
$ property
$ \s' -> do
let (s0, s1) = B.partition isHSpace s'
s = s0 <> s1
prs hspace s `shouldParse` ()
prs' hspace s `succeedsLeaving` s1
describe "space1" $ do
context "when stream does not start with a space character"
$ it "signals correct parse error"
$ property
$ \ch s' -> not (isSpace' ch) ==> do
let (s0, s1) = B.partition isSpace' s'
$ \ch s' -> not (isSpace ch) ==> do
let (s0, s1) = B.partition isSpace s'
s = B.singleton ch <> s0 <> s1
prs space1 s `shouldFailWith` err 0 (utok ch <> elabel "white space")
prs' space1 s `failsLeaving` s
@ -100,7 +109,7 @@ spec = do
$ it "consumes space up to first non-space character"
$ property
$ \s' -> do
let (s0, s1) = B.partition isSpace' s'
let (s0, s1) = B.partition isSpace s'
s = " " <> s0 <> s1
prs space1 s `shouldParse` ()
prs' space1 s `succeedsLeaving` s1
@ -108,6 +117,27 @@ spec = do
$ it "signals correct parse error"
$ prs space1 "" `shouldFailWith` err 0 (ueof <> elabel "white space")
describe "hspace1" $ do
context "when stream does not start with a space character"
$ it "signals correct parse error"
$ property
$ \ch s' -> not (isSpace ch) ==> do
let (s0, s1) = B.partition isHSpace s'
s = B.singleton ch <> s0 <> s1
prs hspace1 s `shouldFailWith` err 0 (utok ch <> elabel "white space")
prs' hspace1 s `failsLeaving` s
context "when stream starts with a space character"
$ it "consumes space up to first non-space character"
$ property
$ \s' -> do
let (s0, s1) = B.partition isHSpace s'
s = " " <> s0 <> s1
prs hspace1 s `shouldParse` ()
prs' hspace1 s `succeedsLeaving` s1
context "when stream is empty"
$ it "signals correct parse error"
$ prs hspace1 "" `shouldFailWith` err 0 (ueof <> elabel "white space")
describe "controlChar" $
checkCharPred "control character" (isControl . toChar) controlChar
@ -234,14 +264,24 @@ prs' ::
(State ByteString Void, Either (ParseErrorBundle ByteString Void) a)
prs' p s = runParser' p (initialState s)
-- | 'Word8'-specialized version of 'isSpace'.
isSpace' :: Word8 -> Bool
isSpace' x
-- | 'Word8'-specialized version of 'Data.Char.isSpace'.
isSpace :: Word8 -> Bool
isSpace x
| x >= 9 && x <= 13 = True
| x == 32 = True
| x == 160 = True
| otherwise = False
-- | Like 'isSpace', but does not accept newlines and carriage returns.
isHSpace :: Word8 -> Bool
isHSpace x
| x == 9 = True
| x == 11 = True
| x == 12 = True
| x == 32 = True
| x == 160 = True
| otherwise = False
-- | Lift char transformation to byte transformation.
liftChar :: (Char -> Char) -> Word8 -> Word8
liftChar f x = (fromMaybe x . fromChar . f . toChar) x

View File

@ -131,7 +131,8 @@ spec = do
>>= \x -> sp >> ip EQ x >> sp >> ip GT x >> sp >> scn
ip = indentGuard scn
sp = void (symbol sc sbla <* C.eol)
if | col0 <= pos1 ->
if
| col0 <= pos1 ->
prs p s `shouldFailWith` errFancy 0 (ii GT pos1 col0)
| col1 /= col0 ->
prs p s `shouldFailWith` errFancy (getIndent l1 + g 1) (ii EQ col0 col1)
@ -178,7 +179,8 @@ spec = do
b = symbol sc
l x = return . (x,)
ib' = mkPos (fromIntegral ib)
if | col1 <= col0 ->
if
| col1 <= col0 ->
prs p s
`shouldFailWith` err (getIndent l1 + g 1) (utok (head sblb) <> eeof)
| isJust mn && col1 /= ib' ->
@ -282,7 +284,8 @@ spec = do
s = concat fragments
(col0, col1, col2) = (getCol l0, getCol l1, getCol l2)
(end0, end1) = (getEnd l0, getEnd l1)
if | end0 && col1 <= col0 ->
if
| end0 && col1 <= col0 ->
prs p s
`shouldFailWith` errFancy (getIndent l1 + g 1) (ii GT col0 col1)
| end1 && col2 <= col0 ->

View File

@ -84,6 +84,15 @@ spec = do
prs space s `shouldParse` ()
prs' space s `succeedsLeaving` s1
describe "hspace"
$ it "consumes space up to first non-space character"
$ property
$ \s' -> do
let (s0, s1) = partition isHSpace s'
s = s0 ++ s1
prs hspace s `shouldParse` ()
prs' hspace s `succeedsLeaving` s1
describe "space1" $ do
context "when stream does not start with a space character"
$ it "signals correct parse error"
@ -105,6 +114,27 @@ spec = do
$ it "signals correct parse error"
$ prs space1 "" `shouldFailWith` err 0 (ueof <> elabel "white space")
describe "hspace1" $ do
context "when stream does not start with a space character"
$ it "signals correct parse error"
$ property
$ \ch s' -> not (isHSpace ch) ==> do
let (s0, s1) = partition isHSpace s'
s = ch : s0 ++ s1
prs hspace1 s `shouldFailWith` err 0 (utok ch <> elabel "white space")
prs' hspace1 s `failsLeaving` s
context "when stream starts with a space character"
$ it "consumes space up to first non-space character"
$ property
$ \s' -> do
let (s0, s1) = partition isHSpace s'
s = ' ' : s0 ++ s1
prs hspace1 s `shouldParse` ()
prs' hspace1 s `succeedsLeaving` s1
context "when stream is empty"
$ it "signals correct parse error"
$ prs hspace1 "" `shouldFailWith` err 0 (ueof <> elabel "white space")
describe "controlChar" $
checkCharPred "control character" isControl controlChar
@ -374,3 +404,7 @@ casei x y =
x == toLower y
|| x == toUpper y
|| x == toTitle y
-- | Is it a horizontal space character?
isHSpace :: Char -> Bool
isHSpace x = isSpace x && x /= '\n' && x /= '\r'

View File

@ -1410,7 +1410,7 @@ spec = do
runParser' p st `shouldBe` (st', Right stateInput)
describe "atEnd" $ do
let p , p' :: Parser Bool
let p, p' :: Parser Bool
p = atEnd
p' = p <* empty
context "when stream is empty" $ do