Parser tests and fixes

This commit is contained in:
mrkkrp 2016-12-21 18:49:44 +03:00
parent 477a36de5c
commit 4d3bb48d7b
6 changed files with 89 additions and 22 deletions

View File

@ -9,6 +9,13 @@
* Renamed `defaultMap` to `defaultTransliterations`.
* Words now can contain digits (recognized by all parsers).
* `parseSnakeCase` now is not confused when a word happens to have prefix
coinciding with an acronym. This is harder to fix for `parseCamelCase`
because acronym may contain capital letters, so old behavior is preserved
for `parseCamelCase` for now.
* `parseCamelCase` and `parseSnakeCase` take any instance of `Foldable` as a
collection of acronyms, not just lists.

View File

@ -28,6 +28,7 @@ import qualified Data.Text as T
import Prelude hiding (Word)
#else
import Data.Foldable
import Prelude hiding (elem)
#endif
-- | Parse a CamelCase string.

View File

@ -27,6 +27,7 @@ import qualified Data.Text as T
import Prelude hiding (Word)
#else
import Data.Foldable
import Prelude hiding (elem)
#endif
-- | Parse a snake_case string.
@ -48,25 +49,14 @@ parseSnakeCase acronyms = parse (parser acronyms) ""
parser :: (Foldable f, Functor f)
=> f (Word 'Acronym)
-> Parser [SomeWord]
parser acronyms = ((a <|> n) `sepBy` char '_') <* eof
where
n = SomeWord <$> word
a = SomeWord <$> acronym acronyms
parser acronyms = (pWord acronyms `sepBy` char '_') <* eof
acronym :: (Foldable f, Functor f)
pWord :: (Foldable f, Functor f)
=> f (Word 'Acronym)
-> Parser (Word 'Acronym)
acronym acronyms = do
x <- T.pack <$> choice (string . T.unpack . unWord <$> acronyms)
case mkAcronym x of
Nothing -> empty -- cannot happen if the system is sound
Just acr -> return acr
{-# INLINE acronym #-}
word :: Parser (Word 'Normal)
word = do
x <- T.pack <$> (some lowerChar <|> some digitChar)
case mkWord x of
Nothing -> empty -- cannot happen if the system is sound
Just wrd -> return wrd
{-# INLINE word #-}
-> Parser SomeWord
pWord acronyms = do
let acs = unWord <$> acronyms
r <- T.pack <$> some alphaNumChar
if r `elem` acs
then maybe empty (return . SomeWord) (mkAcronym r)
else maybe empty (return . SomeWord) (mkWord r)

View File

@ -106,6 +106,9 @@ data SomeWord where
instance Show SomeWord where
show (SomeWord w) = show w
instance Eq SomeWord where
x == y = unSomeWord id x == unSomeWord id y
-- | Extract 'Text' from 'SomeWord' and apply given function only if the
-- word inside wasn't an acronym.
--

View File

@ -1,8 +1,47 @@
{-# LANGUAGE OverloadedStrings #-}
module Text.Inflections.Parse.CamelCaseSpec
( spec )
where
import Test.Hspec
import Test.Hspec.Megaparsec
import Text.Inflections
spec :: Spec
spec = return ()
spec =
describe "parseCamelCase" $ do
context "when given no acronyms" $ do
context "when first word is capitalized" $
it "parses CamelCase correctly" $ do
r <- mapM (fmap SomeWord . mkWord) ["One","Two","Three"]
parseCamelCase [] "OneTwoThree" `shouldParse` r
context "when first word is not capitalized" $
it "parses camelCase correctly" $ do
r <- mapM (fmap SomeWord . mkWord) ["one","Two","Three"]
parseCamelCase [] "oneTwoThree" `shouldParse` r
context "when there are digits in the words" $
it "parses CamelCase correctly" $ do
r <- mapM (fmap SomeWord . mkWord) ["one1two","Three3"]
parseCamelCase [] "one1twoThree3" `shouldParse` r
context "when given some acronyms" $ do
context "when first word is capitalized" $
it "parses CamelCase correctly" $ do
a <- mkAcronym "BOO"
r <- mapM (fmap SomeWord . mkWord) ["One","BOO","One"]
parseCamelCase [a] "OneBOOOne" `shouldParse` r
context "when first word is not capitalized" $
it "parses camelCase correctly" $ do
a <- mkAcronym "BOO"
r <- mapM (fmap SomeWord . mkWord) ["one","Two","BOO"]
parseCamelCase [a] "oneTwoBOO" `shouldParse` r
context "when there are digits in the words" $
it "parses CamelCase correctly" $ do
a <- mkAcronym "BOO"
r <- mapM (fmap SomeWord . mkWord) ["one1two","Three3"]
parseCamelCase [a] "one1twoThree3" `shouldParse` r
context "when a word has a suffix coinciding with a acronym" $
it "is still parsed correctly as a normala word" $ do
a <- mkAcronym "boo"
r <- mapM (fmap SomeWord . mkWord) ["fooboo","Bar"]
parseCamelCase [a] "foobooBar" `shouldParse` r

View File

@ -1,8 +1,35 @@
{-# LANGUAGE OverloadedStrings #-}
module Text.Inflections.Parse.SnakeCaseSpec
( spec )
where
import Test.Hspec
import Test.Hspec.Megaparsec
import Text.Inflections
spec :: Spec
spec = return ()
spec =
describe "parseSnakeCase" $ do
context "when given no acronyms" $ do
it "parses snake_case correctly" $ do
r <- mapM (fmap SomeWord . mkWord) ["OneTwo","Three"]
parseSnakeCase [] "OneTwo_Three" `shouldParse` r
it "handles digits in the words correctly" $ do
r <- mapM (fmap SomeWord . mkWord) ["one4a","two00"]
parseSnakeCase [] "one4a_two00" `shouldParse` r
context "when given some acronyms" $ do
it "parses snake_case correctly" $ do
a <- mkAcronym "BOO"
r <- mapM (fmap SomeWord . mkWord) ["BOO","one","one"]
parseSnakeCase [a] "BOO_one_one" `shouldParse` r
context "when acronym happens to be a prefix of other word" $
it "parses the word correctly" $ do
a <- mkAcronym "BOO"
r <- mapM (fmap SomeWord . mkWord) ["one","BOOtwo"]
parseSnakeCase [a] "one_BOOtwo" `shouldParse` r
context "when acronym happens to be a suffix of other word" $
it "parses the word correctly" $ do
a <- mkAcronym "BOO"
r <- mapM (fmap SomeWord . mkWord) ["oneBOO","two"]
parseSnakeCase [a] "oneBOO_two" `shouldParse` r