diff --git a/CHANGELOG.md b/CHANGELOG.md index f70cef6..26b542d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,13 @@ * Renamed `defaultMap` to `defaultTransliterations`. +* Words now can contain digits (recognized by all parsers). + +* `parseSnakeCase` now is not confused when a word happens to have prefix + coinciding with an acronym. This is harder to fix for `parseCamelCase` + because acronym may contain capital letters, so old behavior is preserved + for `parseCamelCase` for now. + * `parseCamelCase` and `parseSnakeCase` take any instance of `Foldable` as a collection of acronyms, not just lists. diff --git a/Text/Inflections/Parse/CamelCase.hs b/Text/Inflections/Parse/CamelCase.hs index e491ae9..340d93b 100644 --- a/Text/Inflections/Parse/CamelCase.hs +++ b/Text/Inflections/Parse/CamelCase.hs @@ -28,6 +28,7 @@ import qualified Data.Text as T import Prelude hiding (Word) #else import Data.Foldable +import Prelude hiding (elem) #endif -- | Parse a CamelCase string. diff --git a/Text/Inflections/Parse/SnakeCase.hs b/Text/Inflections/Parse/SnakeCase.hs index b64c9b1..19d6a24 100644 --- a/Text/Inflections/Parse/SnakeCase.hs +++ b/Text/Inflections/Parse/SnakeCase.hs @@ -27,6 +27,7 @@ import qualified Data.Text as T import Prelude hiding (Word) #else import Data.Foldable +import Prelude hiding (elem) #endif -- | Parse a snake_case string. @@ -48,25 +49,14 @@ parseSnakeCase acronyms = parse (parser acronyms) "" parser :: (Foldable f, Functor f) => f (Word 'Acronym) -> Parser [SomeWord] -parser acronyms = ((a <|> n) `sepBy` char '_') <* eof - where - n = SomeWord <$> word - a = SomeWord <$> acronym acronyms +parser acronyms = (pWord acronyms `sepBy` char '_') <* eof -acronym :: (Foldable f, Functor f) +pWord :: (Foldable f, Functor f) => f (Word 'Acronym) - -> Parser (Word 'Acronym) -acronym acronyms = do - x <- T.pack <$> choice (string . T.unpack . unWord <$> acronyms) - case mkAcronym x of - Nothing -> empty -- cannot happen if the system is sound - Just acr -> return acr -{-# INLINE acronym #-} - -word :: Parser (Word 'Normal) -word = do - x <- T.pack <$> (some lowerChar <|> some digitChar) - case mkWord x of - Nothing -> empty -- cannot happen if the system is sound - Just wrd -> return wrd -{-# INLINE word #-} + -> Parser SomeWord +pWord acronyms = do + let acs = unWord <$> acronyms + r <- T.pack <$> some alphaNumChar + if r `elem` acs + then maybe empty (return . SomeWord) (mkAcronym r) + else maybe empty (return . SomeWord) (mkWord r) diff --git a/Text/Inflections/Types.hs b/Text/Inflections/Types.hs index c27ecde..33b82c2 100644 --- a/Text/Inflections/Types.hs +++ b/Text/Inflections/Types.hs @@ -106,6 +106,9 @@ data SomeWord where instance Show SomeWord where show (SomeWord w) = show w +instance Eq SomeWord where + x == y = unSomeWord id x == unSomeWord id y + -- | Extract 'Text' from 'SomeWord' and apply given function only if the -- word inside wasn't an acronym. -- diff --git a/test/Text/Inflections/Parse/CamelCaseSpec.hs b/test/Text/Inflections/Parse/CamelCaseSpec.hs index ac2d2f3..b4ae233 100644 --- a/test/Text/Inflections/Parse/CamelCaseSpec.hs +++ b/test/Text/Inflections/Parse/CamelCaseSpec.hs @@ -1,8 +1,47 @@ +{-# LANGUAGE OverloadedStrings #-} + module Text.Inflections.Parse.CamelCaseSpec ( spec ) where import Test.Hspec +import Test.Hspec.Megaparsec +import Text.Inflections spec :: Spec -spec = return () +spec = + describe "parseCamelCase" $ do + context "when given no acronyms" $ do + context "when first word is capitalized" $ + it "parses CamelCase correctly" $ do + r <- mapM (fmap SomeWord . mkWord) ["One","Two","Three"] + parseCamelCase [] "OneTwoThree" `shouldParse` r + context "when first word is not capitalized" $ + it "parses camelCase correctly" $ do + r <- mapM (fmap SomeWord . mkWord) ["one","Two","Three"] + parseCamelCase [] "oneTwoThree" `shouldParse` r + context "when there are digits in the words" $ + it "parses CamelCase correctly" $ do + r <- mapM (fmap SomeWord . mkWord) ["one1two","Three3"] + parseCamelCase [] "one1twoThree3" `shouldParse` r + context "when given some acronyms" $ do + context "when first word is capitalized" $ + it "parses CamelCase correctly" $ do + a <- mkAcronym "BOO" + r <- mapM (fmap SomeWord . mkWord) ["One","BOO","One"] + parseCamelCase [a] "OneBOOOne" `shouldParse` r + context "when first word is not capitalized" $ + it "parses camelCase correctly" $ do + a <- mkAcronym "BOO" + r <- mapM (fmap SomeWord . mkWord) ["one","Two","BOO"] + parseCamelCase [a] "oneTwoBOO" `shouldParse` r + context "when there are digits in the words" $ + it "parses CamelCase correctly" $ do + a <- mkAcronym "BOO" + r <- mapM (fmap SomeWord . mkWord) ["one1two","Three3"] + parseCamelCase [a] "one1twoThree3" `shouldParse` r + context "when a word has a suffix coinciding with a acronym" $ + it "is still parsed correctly as a normala word" $ do + a <- mkAcronym "boo" + r <- mapM (fmap SomeWord . mkWord) ["fooboo","Bar"] + parseCamelCase [a] "foobooBar" `shouldParse` r diff --git a/test/Text/Inflections/Parse/SnakeCaseSpec.hs b/test/Text/Inflections/Parse/SnakeCaseSpec.hs index d4fb069..c8494c2 100644 --- a/test/Text/Inflections/Parse/SnakeCaseSpec.hs +++ b/test/Text/Inflections/Parse/SnakeCaseSpec.hs @@ -1,8 +1,35 @@ +{-# LANGUAGE OverloadedStrings #-} + module Text.Inflections.Parse.SnakeCaseSpec ( spec ) where import Test.Hspec +import Test.Hspec.Megaparsec +import Text.Inflections spec :: Spec -spec = return () +spec = + describe "parseSnakeCase" $ do + context "when given no acronyms" $ do + it "parses snake_case correctly" $ do + r <- mapM (fmap SomeWord . mkWord) ["OneTwo","Three"] + parseSnakeCase [] "OneTwo_Three" `shouldParse` r + it "handles digits in the words correctly" $ do + r <- mapM (fmap SomeWord . mkWord) ["one4a","two00"] + parseSnakeCase [] "one4a_two00" `shouldParse` r + context "when given some acronyms" $ do + it "parses snake_case correctly" $ do + a <- mkAcronym "BOO" + r <- mapM (fmap SomeWord . mkWord) ["BOO","one","one"] + parseSnakeCase [a] "BOO_one_one" `shouldParse` r + context "when acronym happens to be a prefix of other word" $ + it "parses the word correctly" $ do + a <- mkAcronym "BOO" + r <- mapM (fmap SomeWord . mkWord) ["one","BOOtwo"] + parseSnakeCase [a] "one_BOOtwo" `shouldParse` r + context "when acronym happens to be a suffix of other word" $ + it "parses the word correctly" $ do + a <- mkAcronym "BOO" + r <- mapM (fmap SomeWord . mkWord) ["oneBOO","two"] + parseSnakeCase [a] "oneBOO_two" `shouldParse` r