mirror of
https://github.com/typeable/inflections-hs.git
synced 2024-08-15 17:10:21 +03:00
Parser tests and fixes
This commit is contained in:
parent
477a36de5c
commit
4d3bb48d7b
@ -9,6 +9,13 @@
|
||||
|
||||
* Renamed `defaultMap` to `defaultTransliterations`.
|
||||
|
||||
* Words now can contain digits (recognized by all parsers).
|
||||
|
||||
* `parseSnakeCase` now is not confused when a word happens to have prefix
|
||||
coinciding with an acronym. This is harder to fix for `parseCamelCase`
|
||||
because acronym may contain capital letters, so old behavior is preserved
|
||||
for `parseCamelCase` for now.
|
||||
|
||||
* `parseCamelCase` and `parseSnakeCase` take any instance of `Foldable` as a
|
||||
collection of acronyms, not just lists.
|
||||
|
||||
|
@ -28,6 +28,7 @@ import qualified Data.Text as T
|
||||
import Prelude hiding (Word)
|
||||
#else
|
||||
import Data.Foldable
|
||||
import Prelude hiding (elem)
|
||||
#endif
|
||||
|
||||
-- | Parse a CamelCase string.
|
||||
|
@ -27,6 +27,7 @@ import qualified Data.Text as T
|
||||
import Prelude hiding (Word)
|
||||
#else
|
||||
import Data.Foldable
|
||||
import Prelude hiding (elem)
|
||||
#endif
|
||||
|
||||
-- | Parse a snake_case string.
|
||||
@ -48,25 +49,14 @@ parseSnakeCase acronyms = parse (parser acronyms) ""
|
||||
parser :: (Foldable f, Functor f)
|
||||
=> f (Word 'Acronym)
|
||||
-> Parser [SomeWord]
|
||||
parser acronyms = ((a <|> n) `sepBy` char '_') <* eof
|
||||
where
|
||||
n = SomeWord <$> word
|
||||
a = SomeWord <$> acronym acronyms
|
||||
parser acronyms = (pWord acronyms `sepBy` char '_') <* eof
|
||||
|
||||
acronym :: (Foldable f, Functor f)
|
||||
pWord :: (Foldable f, Functor f)
|
||||
=> f (Word 'Acronym)
|
||||
-> Parser (Word 'Acronym)
|
||||
acronym acronyms = do
|
||||
x <- T.pack <$> choice (string . T.unpack . unWord <$> acronyms)
|
||||
case mkAcronym x of
|
||||
Nothing -> empty -- cannot happen if the system is sound
|
||||
Just acr -> return acr
|
||||
{-# INLINE acronym #-}
|
||||
|
||||
word :: Parser (Word 'Normal)
|
||||
word = do
|
||||
x <- T.pack <$> (some lowerChar <|> some digitChar)
|
||||
case mkWord x of
|
||||
Nothing -> empty -- cannot happen if the system is sound
|
||||
Just wrd -> return wrd
|
||||
{-# INLINE word #-}
|
||||
-> Parser SomeWord
|
||||
pWord acronyms = do
|
||||
let acs = unWord <$> acronyms
|
||||
r <- T.pack <$> some alphaNumChar
|
||||
if r `elem` acs
|
||||
then maybe empty (return . SomeWord) (mkAcronym r)
|
||||
else maybe empty (return . SomeWord) (mkWord r)
|
||||
|
@ -106,6 +106,9 @@ data SomeWord where
|
||||
instance Show SomeWord where
|
||||
show (SomeWord w) = show w
|
||||
|
||||
instance Eq SomeWord where
|
||||
x == y = unSomeWord id x == unSomeWord id y
|
||||
|
||||
-- | Extract 'Text' from 'SomeWord' and apply given function only if the
|
||||
-- word inside wasn't an acronym.
|
||||
--
|
||||
|
@ -1,8 +1,47 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Text.Inflections.Parse.CamelCaseSpec
|
||||
( spec )
|
||||
where
|
||||
|
||||
import Test.Hspec
|
||||
import Test.Hspec.Megaparsec
|
||||
import Text.Inflections
|
||||
|
||||
spec :: Spec
|
||||
spec = return ()
|
||||
spec =
|
||||
describe "parseCamelCase" $ do
|
||||
context "when given no acronyms" $ do
|
||||
context "when first word is capitalized" $
|
||||
it "parses CamelCase correctly" $ do
|
||||
r <- mapM (fmap SomeWord . mkWord) ["One","Two","Three"]
|
||||
parseCamelCase [] "OneTwoThree" `shouldParse` r
|
||||
context "when first word is not capitalized" $
|
||||
it "parses camelCase correctly" $ do
|
||||
r <- mapM (fmap SomeWord . mkWord) ["one","Two","Three"]
|
||||
parseCamelCase [] "oneTwoThree" `shouldParse` r
|
||||
context "when there are digits in the words" $
|
||||
it "parses CamelCase correctly" $ do
|
||||
r <- mapM (fmap SomeWord . mkWord) ["one1two","Three3"]
|
||||
parseCamelCase [] "one1twoThree3" `shouldParse` r
|
||||
context "when given some acronyms" $ do
|
||||
context "when first word is capitalized" $
|
||||
it "parses CamelCase correctly" $ do
|
||||
a <- mkAcronym "BOO"
|
||||
r <- mapM (fmap SomeWord . mkWord) ["One","BOO","One"]
|
||||
parseCamelCase [a] "OneBOOOne" `shouldParse` r
|
||||
context "when first word is not capitalized" $
|
||||
it "parses camelCase correctly" $ do
|
||||
a <- mkAcronym "BOO"
|
||||
r <- mapM (fmap SomeWord . mkWord) ["one","Two","BOO"]
|
||||
parseCamelCase [a] "oneTwoBOO" `shouldParse` r
|
||||
context "when there are digits in the words" $
|
||||
it "parses CamelCase correctly" $ do
|
||||
a <- mkAcronym "BOO"
|
||||
r <- mapM (fmap SomeWord . mkWord) ["one1two","Three3"]
|
||||
parseCamelCase [a] "one1twoThree3" `shouldParse` r
|
||||
context "when a word has a suffix coinciding with a acronym" $
|
||||
it "is still parsed correctly as a normala word" $ do
|
||||
a <- mkAcronym "boo"
|
||||
r <- mapM (fmap SomeWord . mkWord) ["fooboo","Bar"]
|
||||
parseCamelCase [a] "foobooBar" `shouldParse` r
|
||||
|
@ -1,8 +1,35 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Text.Inflections.Parse.SnakeCaseSpec
|
||||
( spec )
|
||||
where
|
||||
|
||||
import Test.Hspec
|
||||
import Test.Hspec.Megaparsec
|
||||
import Text.Inflections
|
||||
|
||||
spec :: Spec
|
||||
spec = return ()
|
||||
spec =
|
||||
describe "parseSnakeCase" $ do
|
||||
context "when given no acronyms" $ do
|
||||
it "parses snake_case correctly" $ do
|
||||
r <- mapM (fmap SomeWord . mkWord) ["OneTwo","Three"]
|
||||
parseSnakeCase [] "OneTwo_Three" `shouldParse` r
|
||||
it "handles digits in the words correctly" $ do
|
||||
r <- mapM (fmap SomeWord . mkWord) ["one4a","two00"]
|
||||
parseSnakeCase [] "one4a_two00" `shouldParse` r
|
||||
context "when given some acronyms" $ do
|
||||
it "parses snake_case correctly" $ do
|
||||
a <- mkAcronym "BOO"
|
||||
r <- mapM (fmap SomeWord . mkWord) ["BOO","one","one"]
|
||||
parseSnakeCase [a] "BOO_one_one" `shouldParse` r
|
||||
context "when acronym happens to be a prefix of other word" $
|
||||
it "parses the word correctly" $ do
|
||||
a <- mkAcronym "BOO"
|
||||
r <- mapM (fmap SomeWord . mkWord) ["one","BOOtwo"]
|
||||
parseSnakeCase [a] "one_BOOtwo" `shouldParse` r
|
||||
context "when acronym happens to be a suffix of other word" $
|
||||
it "parses the word correctly" $ do
|
||||
a <- mkAcronym "BOO"
|
||||
r <- mapM (fmap SomeWord . mkWord) ["oneBOO","two"]
|
||||
parseSnakeCase [a] "oneBOO_two" `shouldParse` r
|
||||
|
Loading…
Reference in New Issue
Block a user