Parser tests and fixes

2024-08-15 17:10:21 +03:00 · 2016-12-21 18:49:44 +03:00 · 2016-12-21 18:49:44 +03:00 · 4d3bb48d7b
commit 4d3bb48d7b
parent 477a36de5c
6 changed files with 89 additions and 22 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -9,6 +9,13 @@

 * Renamed `defaultMap` to `defaultTransliterations`.

+* Words now can contain digits (recognized by all parsers).
+
+* `parseSnakeCase` now is not confused when a word happens to have prefix
+  coinciding with an acronym. This is harder to fix for `parseCamelCase`
+  because acronym may contain capital letters, so old behavior is preserved
+  for `parseCamelCase` for now.
+
 * `parseCamelCase` and `parseSnakeCase` take any instance of `Foldable` as a
  collection of acronyms, not just lists.

--- a/Text/Inflections/Parse/CamelCase.hs
+++ b/Text/Inflections/Parse/CamelCase.hs
@ -28,6 +28,7 @@ import qualified Data.Text as T
 import Prelude hiding (Word)
 #else
 import Data.Foldable
+import Prelude hiding (elem)
 #endif

 -- | Parse a CamelCase string.
--- a/Text/Inflections/Parse/SnakeCase.hs
+++ b/Text/Inflections/Parse/SnakeCase.hs
@ -27,6 +27,7 @@ import qualified Data.Text as T
 import Prelude hiding (Word)
 #else
 import Data.Foldable
+import Prelude hiding (elem)
 #endif

 -- | Parse a snake_case string.
@ -48,25 +49,14 @@ parseSnakeCase acronyms = parse (parser acronyms) ""
 parser :: (Foldable f, Functor f)
  => f (Word 'Acronym)
  -> Parser [SomeWord]
-parser acronyms = ((a <|> n) `sepBy` char '_') <* eof
-  where
-    n = SomeWord <$> word
-    a = SomeWord <$> acronym acronyms
+parser acronyms = (pWord acronyms `sepBy` char '_') <* eof

-acronym :: (Foldable f, Functor f)
+pWord :: (Foldable f, Functor f)
  => f (Word 'Acronym)
-  -> Parser (Word 'Acronym)
-acronym acronyms = do
-  x <- T.pack <$> choice (string . T.unpack . unWord <$> acronyms)
-  case mkAcronym x of
-    Nothing -> empty -- cannot happen if the system is sound
-    Just acr -> return acr
-{-# INLINE acronym #-}
-
-word :: Parser (Word 'Normal)
-word = do
-  x <- T.pack <$> (some lowerChar <|> some digitChar)
-  case mkWord x of
-    Nothing -> empty -- cannot happen if the system is sound
-    Just wrd -> return wrd
-{-# INLINE word #-}
+  -> Parser SomeWord
+pWord acronyms = do
+  let acs = unWord <$> acronyms
+  r <- T.pack <$> some alphaNumChar
+  if r `elem` acs
+    then maybe empty (return . SomeWord) (mkAcronym r)
+    else maybe empty (return . SomeWord) (mkWord    r)
--- a/Text/Inflections/Types.hs
+++ b/Text/Inflections/Types.hs
@ -106,6 +106,9 @@ data SomeWord where
 instance Show SomeWord where
  show (SomeWord w) = show w

+instance Eq SomeWord where
+  x == y = unSomeWord id x == unSomeWord id y
+
 -- | Extract 'Text' from 'SomeWord' and apply given function only if the
 -- word inside wasn't an acronym.
 --
--- a/test/Text/Inflections/Parse/CamelCaseSpec.hs
+++ b/test/Text/Inflections/Parse/CamelCaseSpec.hs
@ -1,8 +1,47 @@
+{-# LANGUAGE OverloadedStrings #-}
+
 module Text.Inflections.Parse.CamelCaseSpec
  ( spec )
 where

 import Test.Hspec
+import Test.Hspec.Megaparsec
+import Text.Inflections

 spec :: Spec
-spec = return ()
+spec =
+  describe "parseCamelCase" $ do
+    context "when given no acronyms" $ do
+      context "when first word is capitalized" $
+        it "parses CamelCase correctly" $ do
+          r <- mapM (fmap SomeWord . mkWord) ["One","Two","Three"]
+          parseCamelCase [] "OneTwoThree" `shouldParse` r
+      context "when first word is not capitalized" $
+        it "parses camelCase correctly" $ do
+          r <- mapM (fmap SomeWord . mkWord) ["one","Two","Three"]
+          parseCamelCase [] "oneTwoThree" `shouldParse` r
+      context "when there are digits in the words" $
+        it "parses CamelCase correctly" $ do
+          r <- mapM (fmap SomeWord . mkWord) ["one1two","Three3"]
+          parseCamelCase [] "one1twoThree3" `shouldParse` r
+    context "when given some acronyms" $ do
+      context "when first word is capitalized" $
+        it "parses CamelCase correctly" $ do
+          a <- mkAcronym "BOO"
+          r <- mapM (fmap SomeWord . mkWord) ["One","BOO","One"]
+          parseCamelCase [a] "OneBOOOne" `shouldParse` r
+      context "when first word is not capitalized" $
+        it "parses camelCase correctly" $ do
+          a <- mkAcronym "BOO"
+          r <- mapM (fmap SomeWord . mkWord) ["one","Two","BOO"]
+          parseCamelCase [a] "oneTwoBOO" `shouldParse` r
+      context "when there are digits in the words" $
+        it "parses CamelCase correctly" $ do
+          a <- mkAcronym "BOO"
+          r <- mapM (fmap SomeWord . mkWord) ["one1two","Three3"]
+          parseCamelCase [a] "one1twoThree3" `shouldParse` r
+      context "when a word has a suffix coinciding with a acronym" $
+        it "is still parsed correctly as a normala word" $ do
+          a <- mkAcronym "boo"
+          r <- mapM (fmap SomeWord . mkWord) ["fooboo","Bar"]
+          parseCamelCase [a] "foobooBar" `shouldParse` r
--- a/test/Text/Inflections/Parse/SnakeCaseSpec.hs
+++ b/test/Text/Inflections/Parse/SnakeCaseSpec.hs
@ -1,8 +1,35 @@
+{-# LANGUAGE OverloadedStrings #-}
+
 module Text.Inflections.Parse.SnakeCaseSpec
  ( spec )
 where

 import Test.Hspec
+import Test.Hspec.Megaparsec
+import Text.Inflections

 spec :: Spec
-spec = return ()
+spec =
+  describe "parseSnakeCase" $ do
+    context "when given no acronyms" $ do
+      it "parses snake_case correctly" $ do
+        r <- mapM (fmap SomeWord . mkWord) ["OneTwo","Three"]
+        parseSnakeCase [] "OneTwo_Three" `shouldParse` r
+      it "handles digits in the words correctly" $ do
+        r <- mapM (fmap SomeWord . mkWord) ["one4a","two00"]
+        parseSnakeCase [] "one4a_two00" `shouldParse` r
+    context "when given some acronyms" $ do
+      it "parses snake_case correctly" $ do
+        a <- mkAcronym "BOO"
+        r <- mapM (fmap SomeWord . mkWord) ["BOO","one","one"]
+        parseSnakeCase [a] "BOO_one_one" `shouldParse` r
+      context "when acronym happens to be a prefix of other word" $
+        it "parses the word correctly" $ do
+          a <- mkAcronym "BOO"
+          r <- mapM (fmap SomeWord . mkWord) ["one","BOOtwo"]
+          parseSnakeCase [a] "one_BOOtwo" `shouldParse` r
+      context "when acronym happens to be a suffix of other word" $
+        it "parses the word correctly" $ do
+          a <- mkAcronym "BOO"
+          r <- mapM (fmap SomeWord . mkWord) ["oneBOO","two"]
+          parseSnakeCase [a] "oneBOO_two" `shouldParse` r