From 212d0cb3e70e5f8ccdf041e2e231b0492555b76a Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Fri, 16 Dec 2016 20:12:09 +0300 Subject: [PATCH 01/12] Implement a more type safe API --- .travis.yml | 2 +- Text/Inflections.hs | 50 ++++------ Text/Inflections/Camelize.hs | 24 ++--- Text/Inflections/Dasherize.hs | 23 +++-- Text/Inflections/Humanize.hs | 18 ++-- Text/Inflections/Parse/Acronym.hs | 32 ------- Text/Inflections/Parse/CamelCase.hs | 39 +++++--- Text/Inflections/Parse/SnakeCase.hs | 48 ++++++---- Text/Inflections/Parse/Types.hs | 54 ----------- Text/Inflections/Titleize.hs | 15 +-- Text/Inflections/Types.hs | 122 ++++++++++++++++++++++++ Text/Inflections/Underscore.hs | 14 +-- inflections.cabal | 3 +- test/Text/Inflections/DasherizeSpec.hs | 13 ++- test/Text/Inflections/HumanizeSpec.hs | 24 +++-- test/Text/Inflections/TitleizeSpec.hs | 19 ++-- test/Text/Inflections/UnderscoreSpec.hs | 13 ++- 17 files changed, 287 insertions(+), 226 deletions(-) delete mode 100644 Text/Inflections/Parse/Acronym.hs delete mode 100644 Text/Inflections/Parse/Types.hs create mode 100644 Text/Inflections/Types.hs diff --git a/.travis.yml b/.travis.yml index 7c81bb2..9b7d724 100644 --- a/.travis.yml +++ b/.travis.yml @@ -39,7 +39,7 @@ script: *) cabal test --show-details=always ;; esac - cabal sdist - - cabal haddock | grep "100%" | wc -l | grep "14" + - cabal haddock | grep "100%" | wc -l | grep "13" # after_script: # - cabal install hpc-coveralls diff --git a/Text/Inflections.hs b/Text/Inflections.hs index 6c7e814..cebcfde 100644 --- a/Text/Inflections.hs +++ b/Text/Inflections.hs @@ -71,36 +71,26 @@ code or bug reports: -} module Text.Inflections - ( camelize - , camelizeCustom - - , dasherize - - , humanize - - , underscore - - , titleize - - , Transliterations - , defaultMap - - , parameterize - , parameterizeCustom - - , transliterate - , transliterateCustom - - , ordinal - , ordinalize - - , parseSnakeCase - , parseCamelCase + ( camelize + , camelizeCustom + , dasherize + , humanize + , underscore + , titleize + , Transliterations + , defaultMap + , parameterize + , parameterizeCustom + , transliterate + , transliterateCustom + , ordinal + , ordinalize + , parseSnakeCase + , parseCamelCase -- * Often used combinators - , toUnderscore - , toDashed - , toCamelCased - ) + , toUnderscore + , toDashed + , toCamelCased ) where import Control.Monad (liftM) @@ -114,9 +104,9 @@ import Text.Inflections.Ordinal (ordinal, ordinalize) import Text.Inflections.Parameterize (parameterize, parameterizeCustom) import Text.Inflections.Parse.CamelCase (parseCamelCase) import Text.Inflections.Parse.SnakeCase (parseSnakeCase) -import Text.Inflections.Parse.Types import Text.Inflections.Titleize (titleize) import Text.Inflections.Transliterate (transliterate, transliterateCustom) +import Text.Inflections.Types import Text.Inflections.Underscore (underscore) import Text.Megaparsec diff --git a/Text/Inflections/Camelize.hs b/Text/Inflections/Camelize.hs index 5991524..bb80524 100644 --- a/Text/Inflections/Camelize.hs +++ b/Text/Inflections/Camelize.hs @@ -18,34 +18,30 @@ module Text.Inflections.Camelize where import Data.Text (Text) -import Text.Inflections.Parse.Types +import Text.Inflections.Types import qualified Data.Text as T -#if MIN_VERSION_base(4,8,0) -import Prelude hiding (Word) -#else +#if !MIN_VERSION_base(4,8,0) import Control.Applicative #endif --- |Turns a an input Word List in into CamelCase. Returns the CamelCase String. +-- | Turn an input word list in into CamelCase. -- -- >>> camelize [ Word "foo", Acronym "bar", Word "bazz" ] -- "FoobarBazz" camelize - :: [Word] -- ^ Input Words to separate with underscores - -> Text -- ^ The camelized 'Text' + :: [SomeWord] -- ^ Input words + -> Text -- ^ The camelized 'Text' camelize = camelizeCustom True -{-# INLINE camelize #-} --- |Turns an input Word List into a CamelCase String. +-- | Turn an input word list into a CamelCase String. -- -- >>> camelizeCustom False [ Word "foo", Acronym "bar", Word "bazz" ] -- "foobarBazz" camelizeCustom - :: Bool -- ^ Whether to capitalize the first character in the output String - -> [Word] -- ^ The input Words - -> Text -- ^ The camelized 'Text' + :: Bool -- ^ Whether to capitalize the first character in the output String + -> [SomeWord] -- ^ The input Words + -> Text -- ^ The camelized 'Text' camelizeCustom _ [] = "" camelizeCustom c (x:xs) = T.concat $ - mapWord (if c then T.toTitle else T.toLower) x : (mapWord T.toTitle <$> xs) -{-# INLINE camelizeCustom #-} + unSomeWord (if c then T.toTitle else T.toLower) x : (unSomeWord T.toTitle <$> xs) diff --git a/Text/Inflections/Dasherize.hs b/Text/Inflections/Dasherize.hs index 4746e96..8b46aa8 100644 --- a/Text/Inflections/Dasherize.hs +++ b/Text/Inflections/Dasherize.hs @@ -9,25 +9,24 @@ -- -- Conversion to dasherized phrases. -{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} -module Text.Inflections.Dasherize ( dasherize ) where +module Text.Inflections.Dasherize + ( dasherize ) +where import Data.Text (Text) -import Text.Inflections.Parse.Types +import Text.Inflections.Types import qualified Data.Text as T -#if MIN_VERSION_base(4,8,0) -import Prelude hiding (Word) -#endif - -- | Replaces underscores in a snake_cased string with dashes (hyphens). -- --- >>> dasherize [ Word "foo", Acronym "bar", Word "bazz" ] +-- >>> foo <- SomeWord <$> mkWord "foo" +-- >>> bar <- SomeWord <$> mkAcronym "bar" +-- >>> bazz <- SomeWord <$> mkWord "bazz" +-- >>> dasherize [foo,bar,bazz] -- "foo-bar-bazz" dasherize - :: [Word] -- ^ Input Words to separate with dashes - -> Text -- ^ The dasherized String -dasherize = T.intercalate "-" . fmap (mapWord T.toLower) -{-# INLINE dasherize #-} + :: [SomeWord] -- ^ Input Words to separate with dashes + -> Text -- ^ The dasherized String +dasherize = T.intercalate "-" . fmap (unSomeWord T.toLower) diff --git a/Text/Inflections/Humanize.hs b/Text/Inflections/Humanize.hs index e25765d..6defa50 100644 --- a/Text/Inflections/Humanize.hs +++ b/Text/Inflections/Humanize.hs @@ -17,12 +17,10 @@ module Text.Inflections.Humanize where import Data.Text (Text) -import Text.Inflections.Parse.Types +import Text.Inflections.Types import qualified Data.Text as T -#if MIN_VERSION_base(4,8,0) -import Prelude hiding (Word) -#else +#if !MIN_VERSION_base(4,8,0) import Control.Applicative #endif @@ -30,13 +28,15 @@ import Control.Applicative -- 'Text.Inflections.Titleize.titleize', this is meant for creating pretty -- output. -- --- >>> humanize [ Word "foo", Acronym "bar", Word "bazz" ] +-- >>> humanize [Word "foo", Acronym "bar", Word "bazz"] -- "Foo bar bazz" +-- +-- Note that as of version 0.3.0.0 @Word@ and @Acronym@ constructors are +-- hidden, but you still can construct them with 'mkWord' and 'mkAcronym'. humanize - :: [Word] -- ^ List of Words, first of which will be capitalized - -> Text -- ^ The humanized output + :: [SomeWord] -- ^ List of Words, first of which will be capitalized + -> Text -- ^ The humanized output humanize xs' = - case mapWord (T.replace "_" " ") <$> xs' of + case unSomeWord (T.replace "_" " ") <$> xs' of [] -> "" (x:xs) -> T.unwords $ T.toTitle x : (T.toLower <$> xs) -{-# INLINE humanize #-} diff --git a/Text/Inflections/Parse/Acronym.hs b/Text/Inflections/Parse/Acronym.hs deleted file mode 100644 index cadf723..0000000 --- a/Text/Inflections/Parse/Acronym.hs +++ /dev/null @@ -1,32 +0,0 @@ --- | --- Module : Text.Inflections.Parse.Acronym --- Copyright : © 2016 Justin Leitgeb --- License : MIT --- --- Maintainer : Justin Leitgeb --- Stability : experimental --- Portability : portable --- --- Parser for acronyms. - -{-# LANGUAGE CPP #-} - -module Text.Inflections.Parse.Acronym - ( acronym ) -where - -import Data.Text (Text) -import Text.Inflections.Parse.Types -import Text.Megaparsec -import Text.Megaparsec.Text -import qualified Data.Text as T - -#if MIN_VERSION_base(4,8,0) -import Prelude hiding (Word) -#endif - --- | Parser that accepts a 'Text' from given collection and turns it into --- an 'Acronym'. -acronym :: [Text] -> Parser Word -acronym = fmap (Acronym . T.pack) . choice . fmap (string . T.unpack) -{-# INLINE acronym #-} diff --git a/Text/Inflections/Parse/CamelCase.hs b/Text/Inflections/Parse/CamelCase.hs index 6ef5571..fcb4663 100644 --- a/Text/Inflections/Parse/CamelCase.hs +++ b/Text/Inflections/Parse/CamelCase.hs @@ -10,26 +10,25 @@ -- Parser for camel case “symbols”. {-# LANGUAGE CPP #-} +{-# LANGUAGE DataKinds #-} {-# LANGUAGE OverloadedStrings #-} module Text.Inflections.Parse.CamelCase ( parseCamelCase ) where +import Control.Applicative import Data.Text (Text) -import Text.Inflections.Parse.Acronym (acronym) -import Text.Inflections.Parse.Types (Word(..)) +import Text.Inflections.Types import Text.Megaparsec import Text.Megaparsec.Text import qualified Data.Text as T #if MIN_VERSION_base(4,8,0) import Prelude hiding (Word) -#else -import Control.Applicative #endif --- |Parse a CamelCase string. +-- | Parse a CamelCase string. -- -- >>> parseCamelCase ["Bar"] "FooBarBazz" -- Right [Word "Foo",Acronym "Bar",Word "Bazz"] @@ -37,20 +36,32 @@ import Control.Applicative -- Left "(unknown)" (line 1, column 4): -- unexpected '_' parseCamelCase - :: [Text] -- ^ Collection of acronyms - -> Text -- ^ Input - -> Either (ParseError Char Dec) [Word] -- ^ Result of parsing + :: [Word 'Acronym] -- ^ Collection of acronyms + -> Text -- ^ Input + -> Either (ParseError Char Dec) [SomeWord] -- ^ Result of parsing parseCamelCase acronyms = parse (parser acronyms) "" parser - :: [Text] -- ^ Collection of acronyms - -> Parser [Word] -- ^ CamelCase parser -parser acronyms = many (acronym acronyms <|> word) <* eof -{-# INLINE parser #-} + :: [Word 'Acronym] -- ^ Collection of acronyms + -> Parser [SomeWord] -- ^ CamelCase parser +parser acronyms = many (a <|> n) <* eof + where + n = SomeWord <$> word + a = SomeWord <$> acronym acronyms -word :: Parser Word +acronym :: [Word 'Acronym] -> Parser (Word 'Acronym) +acronym acronyms = do + x <- T.pack <$> choice (string . T.unpack . unWord <$> acronyms) + case mkAcronym x of + Nothing -> empty -- cannot happen if the system is sound + Just acr -> return acr +{-# INLINE acronym #-} + +word :: Parser (Word 'Normal) word = do firstChar <- upperChar <|> lowerChar restChars <- many $ lowerChar <|> digitChar - return . Word . T.pack $ firstChar : restChars + case (mkWord . T.pack) (firstChar : restChars) of + Nothing -> empty -- cannot happen if the system is sound + Just wrd -> return wrd {-# INLINE word #-} diff --git a/Text/Inflections/Parse/SnakeCase.hs b/Text/Inflections/Parse/SnakeCase.hs index 0d9b207..ae01ce2 100644 --- a/Text/Inflections/Parse/SnakeCase.hs +++ b/Text/Inflections/Parse/SnakeCase.hs @@ -9,47 +9,59 @@ -- -- Parser for snake case “symbols”. -{-# LANGUAGE CPP #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE DataKinds #-} module Text.Inflections.Parse.SnakeCase ( parseSnakeCase ) where +import Control.Applicative import Data.Text (Text) -import Text.Inflections.Parse.Acronym (acronym) -import Text.Inflections.Parse.Types (Word(..)) +import Text.Inflections.Types import Text.Megaparsec import Text.Megaparsec.Text import qualified Data.Text as T #if MIN_VERSION_base(4,8,0) import Prelude hiding (Word) -#else -import Control.Applicative #endif --- |Parses a snake_case string. +-- | Parse a snake_case string. -- --- >>> parseSnakeCase ["bar"] "foo_bar_bazz" +-- >>> bar <- mkAcronym "bar" +-- >>> parseSnakeCase [bar] "foo_bar_bazz" -- Right [Word "foo",Acronym "bar",Word "bazz"] +-- -- >>> parseSnakeCase [] "fooBarBazz" -- Left "(unknown)" (line 1, column 4): -- unexpected 'B' parseSnakeCase - :: [Text] -- ^ Collection of acronyms + :: [Word 'Acronym] -- ^ Collection of acronyms -> Text -- ^ Input - -> Either (ParseError Char Dec) [Word] -- ^ Result of parsing + -> Either (ParseError Char Dec) [SomeWord] -- ^ Result of parsing parseSnakeCase acronyms = parse (parser acronyms) "" parser - :: [Text] - -> Parser [Word] -parser acronyms = do - ws <- (acronym acronyms <|> word) `sepBy` char '_' - eof - return ws -{-# INLINE parser #-} + :: [Word 'Acronym] + -> Parser [SomeWord] +parser acronyms = ((a <|> n) `sepBy` char '_') <* eof + where + n = SomeWord <$> word + a = SomeWord <$> acronym acronyms -word :: Parser Word -word = Word . T.pack <$> (some lowerChar <|> some digitChar) +acronym :: [Word 'Acronym] -> Parser (Word 'Acronym) +acronym acronyms = do + x <- T.pack <$> choice (string . T.unpack . unWord <$> acronyms) + case mkAcronym x of + Nothing -> empty -- cannot happen if the system is sound + Just acr -> return acr +{-# INLINE acronym #-} + +word :: Parser (Word 'Normal) +word = do + x <- T.pack <$> (some lowerChar <|> some digitChar) + case mkWord x of + Nothing -> empty -- cannot happen if the system is sound + Just wrd -> return wrd {-# INLINE word #-} diff --git a/Text/Inflections/Parse/Types.hs b/Text/Inflections/Parse/Types.hs deleted file mode 100644 index 7951e28..0000000 --- a/Text/Inflections/Parse/Types.hs +++ /dev/null @@ -1,54 +0,0 @@ --- | --- Module : Text.Inflections.Parse.Types --- Copyright : © 2016 Justin Leitgeb --- License : MIT --- --- Maintainer : Justin Leitgeb --- Stability : experimental --- Portability : portable --- --- Types used in the library. - -{-# LANGUAGE CPP #-} -{-# LANGUAGE DeriveDataTypeable #-} - -module Text.Inflections.Parse.Types - ( Word (..) -- FIXME we should not export the constructor - , unWord - , mapWord - , InflectionException (..) ) -where - -import Control.Monad.Catch (Exception) -import Data.Text (Text) -import Data.Typeable (Typeable) -import Text.Megaparsec - -#if MIN_VERSION_base(4,8,0) -import Prelude hiding (Word) -#endif - --- | A 'String' that should be kept whole through applied inflections -data Word - = Word Text -- ^ A word that may be transformed by inflection - | Acronym Text -- ^ A word that may not be transformed by inflections - deriving (Show, Eq) - --- | Get a 'Text' value from 'Word'. -unWord :: Word -> Text -unWord (Word s) = s -unWord (Acronym s) = s -{-# INLINE unWord #-} - --- | Apply 'Text' transforming function to a 'Word' unless it's a 'Acronym'. -mapWord :: (Text -> Text) -> Word -> Text -mapWord f (Word s) = f s -mapWord _ (Acronym s) = s -{-# INLINE mapWord #-} - --- | The exceptions that is thrown when parsing of input fails. - -data InflectionException = InflectionParsingFailed (ParseError Char Dec) - deriving (Eq, Show, Typeable) - -instance Exception InflectionException diff --git a/Text/Inflections/Titleize.hs b/Text/Inflections/Titleize.hs index ee6429b..8f0d4d0 100644 --- a/Text/Inflections/Titleize.hs +++ b/Text/Inflections/Titleize.hs @@ -9,26 +9,19 @@ -- -- Conversion to titleized phrases. -{-# LANGUAGE CPP #-} - module Text.Inflections.Titleize ( titleize ) where import Data.Text (Text) -import Text.Inflections.Parse.Types +import Text.Inflections.Types import qualified Data.Text as T -#if MIN_VERSION_base(4,8,0) -import Prelude hiding (Word) -#endif - -- | Capitalize all the Words in the input list. -- -- >>> titleize [ Word "foo", Acronym "bar", Word "bazz" ] -- "Foo bar Bazz" titleize - :: [Word] -- ^ List of Words, first of which will be capitalized - -> Text -- ^ The titleized String -titleize = T.unwords . fmap (mapWord T.toTitle) -{-# INLINE titleize #-} + :: [SomeWord] -- ^ List of Words, first of which will be capitalized + -> Text -- ^ The titleized String +titleize = T.unwords . fmap (unSomeWord T.toTitle) diff --git a/Text/Inflections/Types.hs b/Text/Inflections/Types.hs new file mode 100644 index 0000000..ca183a8 --- /dev/null +++ b/Text/Inflections/Types.hs @@ -0,0 +1,122 @@ +-- | +-- Module : Text.Inflections.Types +-- Copyright : © 2016 Justin Leitgeb +-- License : MIT +-- +-- Maintainer : Justin Leitgeb +-- Stability : experimental +-- Portability : portable +-- +-- Types used in the library. Usually you don't need to import this module +-- and "Text.Inflections" should be enough. + +{-# LANGUAGE CPP #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE KindSignatures #-} + +module Text.Inflections.Types + ( Word + , WordType (..) + , mkWord + , mkAcronym + , unWord + , SomeWord (..) + , unSomeWord + , InflectionException (..) ) +where + +import Control.Monad.Catch +import Data.Char (isSpace) +import Data.Data (Data) +import Data.Text (Text) +import Data.Typeable (Typeable) +import GHC.Generics (Generic) +import Text.Megaparsec +import qualified Data.Text as T + +#if MIN_VERSION_base(4,8,0) +import Prelude hiding (Word) +#endif + +-- | Create a word from given 'Text'. The input should not contain spaces or +-- 'InflectionInvalidWord' will be thrown. + +mkWord :: MonadThrow m => Text -> m (Word 'Normal) +mkWord txt = + if T.any isSpace txt + then throwM (InflectionInvalidWord txt) + else return (Word txt) + +-- | Create an acronym from given 'Text'. The input should not contain +-- spaces or 'InflectionInvalidAcronym' will be thrown. Acronym is different +-- from normal word by that it may not be transformed by inflections. + +mkAcronym :: MonadThrow m => Text -> m (Word 'Acronym) +mkAcronym txt = + if T.any isSpace txt + then throwM (InflectionInvalidAcronym txt) + else return (Word txt) + +-- | A 'Text' value that should be kept whole through applied inflections. +data Word (t :: WordType) = Word Text + deriving (Eq, Ord, Show) + +instance Show (Word 'Normal) where + show (Word x) = "Word " ++ show x + +instance Show (Word 'Acronym) where + show (Word x) = "Acronym " ++ show x + +-- | A type-level tag for words. + +data WordType = Normal | Acronym + +-- | Get a 'Text' value from 'Word'. +unWord :: Word t -> Text +unWord (Word s) = s + +-- | An existential wrapper that allows to keep words and acronyms in single +-- list for example. The only thing that receiver of 'SomeWord' can do is to +-- apply 'unWord' on it, of course. This is faciliated by 'unSomeWord'. + +data SomeWord where + SomeWord :: Transformable (Word t) => Word t -> SomeWord + -- NOTE The constraint is only needed because GHC is not smart enough + -- (yet?) to figure out that t cannot be anything other than Normal and + -- Acronym and thus all cases are already covered by the instances + -- provided below. + +instance Show SomeWord where + show (SomeWord w) = show w + +-- | Extract 'Text' from 'SomeWord' and apply given function only if the +-- word inside wasn't an acronym. + +unSomeWord :: (Text -> Text) -> SomeWord -> Text +unSomeWord f (SomeWord w) = transform f w + +-- | Non public stuff. + +class Transformable a where + transform :: (Text -> Text) -> a -> Text + +instance Transformable (Word 'Normal) where + transform f = f . unWord + +instance Transformable (Word 'Acronym) where + transform _ = unWord + +-- | The exceptions that is thrown when parsing of input fails. + +data InflectionException + = InflectionParsingFailed (ParseError Char Dec) + | InflectionInvalidWord Text + | InflectionInvalidAcronym Text + deriving (Eq, Show, Typeable, Data, Generic) + +instance Exception InflectionException diff --git a/Text/Inflections/Underscore.hs b/Text/Inflections/Underscore.hs index b965bd1..3f80fce 100644 --- a/Text/Inflections/Underscore.hs +++ b/Text/Inflections/Underscore.hs @@ -9,7 +9,6 @@ -- -- Conversion to phrases separated by underscores. -{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} module Text.Inflections.Underscore @@ -17,19 +16,14 @@ module Text.Inflections.Underscore where import Data.Text (Text) -import Text.Inflections.Parse.Types +import Text.Inflections.Types import qualified Data.Text as T -#if MIN_VERSION_base(4,8,0) -import Prelude hiding (Word) -#endif - -- |Turns a CamelCase string into an underscore_separated 'Text'. -- -- >>> underscore [ Word "foo", Acronym "bar", Word "bazz" ] -- "foo_bar_bazz" underscore - :: [Word] -- ^ Input Words to separate with underscores - -> Text -- ^ The underscored String -underscore = T.intercalate "_" . fmap (mapWord T.toLower) -{-# INLINE underscore #-} + :: [SomeWord] -- ^ Input Words to separate with underscores + -> Text -- ^ The underscored String +underscore = T.intercalate "_" . fmap (unSomeWord T.toLower) diff --git a/inflections.cabal b/inflections.cabal index c8f3451..c21a1fd 100644 --- a/inflections.cabal +++ b/inflections.cabal @@ -29,7 +29,7 @@ flag dev library exposed-modules: Text.Inflections - , Text.Inflections.Parse.Types + , Text.Inflections.Types other-modules: Text.Inflections.Data , Text.Inflections.Camelize @@ -37,7 +37,6 @@ library , Text.Inflections.Humanize , Text.Inflections.Ordinal , Text.Inflections.Parameterize - , Text.Inflections.Parse.Acronym , Text.Inflections.Parse.CamelCase , Text.Inflections.Parse.SnakeCase , Text.Inflections.Titleize diff --git a/test/Text/Inflections/DasherizeSpec.hs b/test/Text/Inflections/DasherizeSpec.hs index 7c5f7ea..050629c 100644 --- a/test/Text/Inflections/DasherizeSpec.hs +++ b/test/Text/Inflections/DasherizeSpec.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} module Text.Inflections.DasherizeSpec @@ -7,9 +8,15 @@ where import Test.Hspec import Text.Inflections (dasherize) -import Text.Inflections.Parse.Types (Word (..)) +import Text.Inflections.Types + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative +#endif spec :: Spec spec = describe "dasherize" $ - it "dasherizes a collection of words sentence" $ - dasherize [Word "foo", Word "bar"] `shouldBe` "foo-bar" + it "dasherizes a collection of words sentence" $ do + foo <- SomeWord <$> mkWord "foo" + bar <- SomeWord <$> mkWord "bar" + dasherize [foo,bar] `shouldBe` "foo-bar" diff --git a/test/Text/Inflections/HumanizeSpec.hs b/test/Text/Inflections/HumanizeSpec.hs index 3f36e30..7b742c7 100644 --- a/test/Text/Inflections/HumanizeSpec.hs +++ b/test/Text/Inflections/HumanizeSpec.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} module Text.Inflections.HumanizeSpec (spec) where @@ -5,13 +6,22 @@ module Text.Inflections.HumanizeSpec (spec) where import Test.Hspec import Text.Inflections (humanize) -import Text.Inflections.Parse.Types (Word(..)) +import Text.Inflections.Types + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative +#endif spec :: Spec spec = describe "humazine" $ do - it "converts snake case to a human-readable string" $ - humanize [Word "employee", Word "salary"] `shouldBe` "Employee salary" - it "turns underscores into spaces" $ - humanize [Word "employee", Word "has_salary"] `shouldBe` "Employee has salary" - it "capitalizes the first word of a sentence" $ - humanize [Word "underground"] `shouldBe` "Underground" + it "converts snake case to a human-readable string" $ do + employee <- SomeWord <$> mkWord "employee" + salary <- SomeWord <$> mkWord "salary" + humanize [employee,salary] `shouldBe` "Employee salary" + it "turns underscores into spaces" $ do + employee <- SomeWord <$> mkWord "employee" + hasSalary <- SomeWord <$> mkWord "has_salary" + humanize [employee, hasSalary] `shouldBe` "Employee has salary" + it "capitalizes the first word of a sentence" $ do + underground <- SomeWord <$> mkWord "underground" + humanize [underground] `shouldBe` "Underground" diff --git a/test/Text/Inflections/TitleizeSpec.hs b/test/Text/Inflections/TitleizeSpec.hs index 82f7c68..4359ff2 100644 --- a/test/Text/Inflections/TitleizeSpec.hs +++ b/test/Text/Inflections/TitleizeSpec.hs @@ -1,15 +1,22 @@ +{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} module Text.Inflections.TitleizeSpec (spec) where import Test.Hspec - import Text.Inflections (titleize) -import Text.Inflections.Parse.Types (Word(..)) +import Text.Inflections.Types + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative +#endif spec :: Spec spec = describe "titleize" $ do - it "converts two words to title case" $ - titleize [Word "Employee", Word "Salary"] `shouldBe` "Employee Salary" - it "converts one word to title case" $ - titleize [Word "underground"] `shouldBe` "Underground" + it "converts two words to title case" $ do + employee <- SomeWord <$> mkWord "Employee" + salary <- SomeWord <$> mkWord "Salary" + titleize [employee,salary] `shouldBe` "Employee Salary" + it "converts one word to title case" $ do + underground <- SomeWord <$> mkWord "underground" + titleize [underground] `shouldBe` "Underground" diff --git a/test/Text/Inflections/UnderscoreSpec.hs b/test/Text/Inflections/UnderscoreSpec.hs index eaaa705..c128299 100644 --- a/test/Text/Inflections/UnderscoreSpec.hs +++ b/test/Text/Inflections/UnderscoreSpec.hs @@ -1,12 +1,19 @@ +{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} module Text.Inflections.UnderscoreSpec (spec) where import Test.Hspec import Text.Inflections (underscore) -import Text.Inflections.Parse.Types (Word(..)) +import Text.Inflections.Types + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative +#endif spec :: Spec spec = describe "underscore" $ - it "converts a word list to snake case" $ - underscore [Word "test", Word "this"] `shouldBe` "test_this" + it "converts a word list to snake case" $ do + test <- SomeWord <$> mkWord "test" + this <- SomeWord <$> mkWord "this" + underscore [test, this] `shouldBe` "test_this" From feac25f60c45ac26d2a0a29a5d010b4ecfdc8713 Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Wed, 21 Dec 2016 15:13:34 +0300 Subject: [PATCH 02/12] We don't want to have overlapping instances --- Text/Inflections/Types.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Text/Inflections/Types.hs b/Text/Inflections/Types.hs index ca183a8..90c01cf 100644 --- a/Text/Inflections/Types.hs +++ b/Text/Inflections/Types.hs @@ -64,7 +64,7 @@ mkAcronym txt = -- | A 'Text' value that should be kept whole through applied inflections. data Word (t :: WordType) = Word Text - deriving (Eq, Ord, Show) + deriving (Eq, Ord) instance Show (Word 'Normal) where show (Word x) = "Word " ++ show x @@ -85,7 +85,7 @@ unWord (Word s) = s -- apply 'unWord' on it, of course. This is faciliated by 'unSomeWord'. data SomeWord where - SomeWord :: Transformable (Word t) => Word t -> SomeWord + SomeWord :: (Transformable (Word t), Show (Word t)) => Word t -> SomeWord -- NOTE The constraint is only needed because GHC is not smart enough -- (yet?) to figure out that t cannot be anything other than Normal and -- Acronym and thus all cases are already covered by the instances From 008c07098267f2f27c4ea2ddb9c52b9b6d2208f2 Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Wed, 21 Dec 2016 15:14:17 +0300 Subject: [PATCH 03/12] Add some @since declarations --- Text/Inflections/Types.hs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Text/Inflections/Types.hs b/Text/Inflections/Types.hs index 90c01cf..011b22e 100644 --- a/Text/Inflections/Types.hs +++ b/Text/Inflections/Types.hs @@ -45,6 +45,8 @@ import Prelude hiding (Word) -- | Create a word from given 'Text'. The input should not contain spaces or -- 'InflectionInvalidWord' will be thrown. +-- +-- @since 0.3.0.0 mkWord :: MonadThrow m => Text -> m (Word 'Normal) mkWord txt = @@ -55,6 +57,8 @@ mkWord txt = -- | Create an acronym from given 'Text'. The input should not contain -- spaces or 'InflectionInvalidAcronym' will be thrown. Acronym is different -- from normal word by that it may not be transformed by inflections. +-- +-- @since 0.3.0.0 mkAcronym :: MonadThrow m => Text -> m (Word 'Acronym) mkAcronym txt = @@ -63,6 +67,7 @@ mkAcronym txt = else return (Word txt) -- | A 'Text' value that should be kept whole through applied inflections. + data Word (t :: WordType) = Word Text deriving (Eq, Ord) @@ -73,16 +78,23 @@ instance Show (Word 'Acronym) where show (Word x) = "Acronym " ++ show x -- | A type-level tag for words. +-- +-- @since 0.3.0.0 data WordType = Normal | Acronym -- | Get a 'Text' value from 'Word'. +-- +-- @since 0.3.0.0 + unWord :: Word t -> Text unWord (Word s) = s -- | An existential wrapper that allows to keep words and acronyms in single -- list for example. The only thing that receiver of 'SomeWord' can do is to -- apply 'unWord' on it, of course. This is faciliated by 'unSomeWord'. +-- +-- @since 0.3.0.0 data SomeWord where SomeWord :: (Transformable (Word t), Show (Word t)) => Word t -> SomeWord @@ -96,6 +108,8 @@ instance Show SomeWord where -- | Extract 'Text' from 'SomeWord' and apply given function only if the -- word inside wasn't an acronym. +-- +-- @since 0.3.0.0 unSomeWord :: (Text -> Text) -> SomeWord -> Text unSomeWord f (SomeWord w) = transform f w @@ -112,6 +126,8 @@ instance Transformable (Word 'Acronym) where transform _ = unWord -- | The exceptions that is thrown when parsing of input fails. +-- +-- @since 0.3.0.0 data InflectionException = InflectionParsingFailed (ParseError Char Dec) From da792fdfec7b46447ce624fabb3c3d8a8b7a5ba6 Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Wed, 21 Dec 2016 15:15:20 +0300 Subject: [PATCH 04/12] =?UTF-8?q?Hide=20=E2=80=98Text.Inflections.Types?= =?UTF-8?q?=E2=80=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Text/Inflections.hs | 13 +++++++++++++ inflections.cabal | 2 +- test/Text/Inflections/DasherizeSpec.hs | 3 +-- test/Text/Inflections/HumanizeSpec.hs | 3 +-- test/Text/Inflections/TitleizeSpec.hs | 3 +-- test/Text/Inflections/UnderscoreSpec.hs | 3 +-- 6 files changed, 18 insertions(+), 9 deletions(-) diff --git a/Text/Inflections.hs b/Text/Inflections.hs index cebcfde..202d1b3 100644 --- a/Text/Inflections.hs +++ b/Text/Inflections.hs @@ -87,6 +87,15 @@ module Text.Inflections , ordinalize , parseSnakeCase , parseCamelCase + -- * Types and helpers + , Word + , WordType (..) + , mkWord + , mkAcronym + , unWord + , SomeWord (..) + , unSomeWord + , InflectionException (..) -- * Often used combinators , toUnderscore , toDashed @@ -110,6 +119,10 @@ import Text.Inflections.Types import Text.Inflections.Underscore (underscore) import Text.Megaparsec +#if MIN_VERSION_base(4,8,0) +import Prelude hiding (Word) +#endif + -- | Transforms CamelCasedString to snake_cased_string_with_underscores. In -- case of failed parsing 'InflectionException' is thrown. -- diff --git a/inflections.cabal b/inflections.cabal index c21a1fd..7ed9b17 100644 --- a/inflections.cabal +++ b/inflections.cabal @@ -29,7 +29,6 @@ flag dev library exposed-modules: Text.Inflections - , Text.Inflections.Types other-modules: Text.Inflections.Data , Text.Inflections.Camelize @@ -41,6 +40,7 @@ library , Text.Inflections.Parse.SnakeCase , Text.Inflections.Titleize , Text.Inflections.Transliterate + , Text.Inflections.Types , Text.Inflections.Underscore if flag(dev) diff --git a/test/Text/Inflections/DasherizeSpec.hs b/test/Text/Inflections/DasherizeSpec.hs index 050629c..7b3d514 100644 --- a/test/Text/Inflections/DasherizeSpec.hs +++ b/test/Text/Inflections/DasherizeSpec.hs @@ -7,8 +7,7 @@ where import Test.Hspec -import Text.Inflections (dasherize) -import Text.Inflections.Types +import Text.Inflections #if !MIN_VERSION_base(4,8,0) import Control.Applicative diff --git a/test/Text/Inflections/HumanizeSpec.hs b/test/Text/Inflections/HumanizeSpec.hs index 7b742c7..a31ece5 100644 --- a/test/Text/Inflections/HumanizeSpec.hs +++ b/test/Text/Inflections/HumanizeSpec.hs @@ -5,8 +5,7 @@ module Text.Inflections.HumanizeSpec (spec) where import Test.Hspec -import Text.Inflections (humanize) -import Text.Inflections.Types +import Text.Inflections #if !MIN_VERSION_base(4,8,0) import Control.Applicative diff --git a/test/Text/Inflections/TitleizeSpec.hs b/test/Text/Inflections/TitleizeSpec.hs index 4359ff2..6218cf2 100644 --- a/test/Text/Inflections/TitleizeSpec.hs +++ b/test/Text/Inflections/TitleizeSpec.hs @@ -4,8 +4,7 @@ module Text.Inflections.TitleizeSpec (spec) where import Test.Hspec -import Text.Inflections (titleize) -import Text.Inflections.Types +import Text.Inflections #if !MIN_VERSION_base(4,8,0) import Control.Applicative diff --git a/test/Text/Inflections/UnderscoreSpec.hs b/test/Text/Inflections/UnderscoreSpec.hs index c128299..9e21cfa 100644 --- a/test/Text/Inflections/UnderscoreSpec.hs +++ b/test/Text/Inflections/UnderscoreSpec.hs @@ -4,8 +4,7 @@ module Text.Inflections.UnderscoreSpec (spec) where import Test.Hspec -import Text.Inflections (underscore) -import Text.Inflections.Types +import Text.Inflections #if !MIN_VERSION_base(4,8,0) import Control.Applicative From 8560ec9fcef6b925eed459aa0f2cce1f7443370d Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Wed, 21 Dec 2016 15:15:47 +0300 Subject: [PATCH 05/12] Update the docs to reflect the current state of the code --- Text/Inflections.hs | 163 +++++++++++++++------------- Text/Inflections/Camelize.hs | 10 +- Text/Inflections/Dasherize.hs | 6 +- Text/Inflections/Data.hs | 16 ++- Text/Inflections/Humanize.hs | 12 +- Text/Inflections/Ordinal.hs | 29 +++-- Text/Inflections/Parameterize.hs | 8 +- Text/Inflections/Parse/CamelCase.hs | 7 +- Text/Inflections/Parse/SnakeCase.hs | 3 +- Text/Inflections/Titleize.hs | 9 +- Text/Inflections/Transliterate.hs | 7 +- Text/Inflections/Underscore.hs | 9 +- 12 files changed, 155 insertions(+), 124 deletions(-) diff --git a/Text/Inflections.hs b/Text/Inflections.hs index 202d1b3..1f2fd0d 100644 --- a/Text/Inflections.hs +++ b/Text/Inflections.hs @@ -1,92 +1,103 @@ -{- | -Module : Text.Inflections -Description : Rails-like inflections library for common String transformations. -Copyright : (c) Justin Leitgeb -License : MIT +-- | +-- Module : Text.Inflections +-- Description : Rails-like inflections library for common String transformations. +-- Copyright : (c) Justin Leitgeb +-- License : MIT +-- +-- Maintainer : justin@stackbuilders.com +-- Stability : unstable +-- Portability : portable +-- +-- This module provides methods for common String transformations, similar +-- to the Inflections library found in Rails: +-- +-- +-- +-- While many of the functions in this library are the same as in +-- implementations in Rails' ActiveSupport, the philosophy of this library +-- is fundamentally different. Where Rails tries to be as permissive as +-- possible, and return a String when given any input, this library tries to +-- output strings that make sense according to the function that is called. +-- +-- When you look closely at many of the functions in Rails' inflections +-- library, you will notice that many of them are partial. That is, they +-- only have well-defined output for some of the possible inputs to the +-- function allowed by the type system. As an example, let's take the +-- @underscore@ function. In Rails, it works like this: +-- +-- >>> "fooBar".underscore +-- "foo_bar" +-- +-- Looks OK so far. However, it's also easy to produce less expected results: +-- +-- >>> "foo bar".underscore +-- "foo bar" +-- +-- The output isn't underscored - it contains a space! It turns out that +-- some of the functions from Inflections in ActiveSupport are /partial/. +-- I.e., the outputs are really only specified for a certain range of the +-- inputs allowed by the String type. +-- +-- In the Haskell inflections library, we aim to deliver more predictable +-- results by separating the parsing of strings into tokens from the +-- application of transformations. Let's see an example. +-- +-- First, we tokenize an underscored String using 'parseSnakeCase': +-- +-- >>> parseSnakeCase [] "foo_bar" +-- Right [Word "foo",Word "bar"] +-- +-- We can chain together the tokenization of the input String and the +-- transformation to CamelCase by using 'Control.Monad.LiftM': +-- +-- >>> import Control.Monad (liftM) +-- >>> liftM camelize $ parseSnakeCase "foo_bar" +-- Right "FooBar" +-- +-- By separating out the tokenization from the application of inflections, +-- we also end up with useful libraries for validating input which can be +-- used independently: +-- +-- >>> parseSnakeCase [] "fooBar" +-- 1:4: +-- unexpected 'B' +-- expecting '_', end of input, or lowercase letter +-- +-- As of version 0.3.0.0, we don't permit creation of invalid 'Word's by +-- using of the smart constructors 'mkWord' and 'mkAcronym'. This is done +-- because not every 'Text' value is a valid 'Word', as it should not +-- contain whitespace, for example. Normal words have the type @'Word' +-- 'Normal'@, while acronyms have the type @'Word' 'Acronym'@. If you need +-- to have several words\/acronyms in a single list, use the existential +-- wrapper 'SomeWord'. Parsing functions now produce 'SomeWord's. +-- +-- This library is still a work-in-progress, and contributions are welcome +-- for missing pieces and to fix bugs. Please see the Github page to +-- contribute with code or bug reports: +-- +-- -Maintainer : justin@stackbuilders.com -Stability : unstable -Portability : portable - -This module provides methods for common String transformations, similar to the -"Inflections" library found in Rails: - - - -While many of the functions in this library are the same as in implementations -in Rails' ActiveSupport, the philosophy of this library is fundamentally -different. Where Rails tries to be as permissive as possible, and return a -String when given any input, this library tries to output strings that make -sense according to the function that is called. - -When you look closely at many of the functions in Rails' inflections -library, you will notice that many of them are partial. That is, they only -have well-defined output for some of the possible inputs to the function allowed -by the type system. As an example, let's take the @underscore@ function. In -Rails, it works like this: - ->>> "fooBar".underscore -"foo_bar" - -Looks ok so far. However, it's also easy to produce less expected results: - ->>> "foo bar".underscore -"foo bar" - -The output isn't underscored - it contains a space! It turns out that some of -the functions from Inflections in ActiveSupport are /partial/. Ie., the outputs -are really only specified for a certain range of the inputs allowed by the -String type. - -In the Haskell inflections library, we aim to deliver more predictable results -by separating the parsing of strings into tokens from the application of -transformations. Let's see an example. - -First, we tokenize an underscored String using 'parseSnakeCase': - ->>> parseSnakeCase [] "foo_bar" -Right [Word "foo",Word "bar"] - -We can chain together the tokenization of the input String and the -transformation to CamelCase by using 'Control.Monad.LiftM': - ->>> import Control.Monad (liftM) ->>> liftM camelize $ parseSnakeCase "foo_bar" - -By separating out the tokenization from the application of inflections, we also -end up with useful libraries for validating input which can be used -independently: - ->>> parseSnakeCase [] "fooBar" -Left "(unknown)" (line 1, column 4): -unexpected 'B' -expecting lowercase letter, "_" or end of input - -This library is still a work-in-progress, and contributions are welcome for -missing pieces and to fix bugs. Please see the Github page to contribute with -code or bug reports: - - - --} +{-# LANGUAGE CPP #-} module Text.Inflections - ( camelize + ( -- * Parsing + parseSnakeCase + , parseCamelCase + -- * Rendering + , camelize , camelizeCustom , dasherize , humanize , underscore , titleize , Transliterations - , defaultMap + , defaultTransliterations , parameterize , parameterizeCustom , transliterate , transliterateCustom - , ordinal , ordinalize - , parseSnakeCase - , parseCamelCase + , ordinal -- * Types and helpers , Word , WordType (..) @@ -107,7 +118,7 @@ import Control.Monad.Catch (MonadThrow (..)) import Data.Text (Text) import Text.Inflections.Camelize (camelize, camelizeCustom) import Text.Inflections.Dasherize (dasherize) -import Text.Inflections.Data (Transliterations, defaultMap) +import Text.Inflections.Data (Transliterations, defaultTransliterations) import Text.Inflections.Humanize (humanize) import Text.Inflections.Ordinal (ordinal, ordinalize) import Text.Inflections.Parameterize (parameterize, parameterizeCustom) diff --git a/Text/Inflections/Camelize.hs b/Text/Inflections/Camelize.hs index bb80524..5f32aeb 100644 --- a/Text/Inflections/Camelize.hs +++ b/Text/Inflections/Camelize.hs @@ -27,7 +27,10 @@ import Control.Applicative -- | Turn an input word list in into CamelCase. -- --- >>> camelize [ Word "foo", Acronym "bar", Word "bazz" ] +-- >>> foo <- SomeWord <$> mkWord "foo" +-- >>> bar <- SomeWord <$> mkAcronym "bar" +-- >>> bazz <- SomeWord <$> mkAcronym "bazz" +-- >>> camelize [foo,bar,bazz] -- "FoobarBazz" camelize :: [SomeWord] -- ^ Input words @@ -36,7 +39,10 @@ camelize = camelizeCustom True -- | Turn an input word list into a CamelCase String. -- --- >>> camelizeCustom False [ Word "foo", Acronym "bar", Word "bazz" ] +-- >>> foo <- SomeWord <$> mkWord "foo" +-- >>> bar <- SomeWord <$> mkAcronym "bar" +-- >>> bazz <- SomeWord <$> mkAcronym "bazz" +-- >>> camelizeCustom False [foo,bar,bazz] -- "foobarBazz" camelizeCustom :: Bool -- ^ Whether to capitalize the first character in the output String diff --git a/Text/Inflections/Dasherize.hs b/Text/Inflections/Dasherize.hs index 8b46aa8..dce733b 100644 --- a/Text/Inflections/Dasherize.hs +++ b/Text/Inflections/Dasherize.hs @@ -19,7 +19,7 @@ import Data.Text (Text) import Text.Inflections.Types import qualified Data.Text as T --- | Replaces underscores in a snake_cased string with dashes (hyphens). +-- | Produce a string with words separated by dashes (hyphens). -- -- >>> foo <- SomeWord <$> mkWord "foo" -- >>> bar <- SomeWord <$> mkAcronym "bar" @@ -27,6 +27,6 @@ import qualified Data.Text as T -- >>> dasherize [foo,bar,bazz] -- "foo-bar-bazz" dasherize - :: [SomeWord] -- ^ Input Words to separate with dashes - -> Text -- ^ The dasherized String + :: [SomeWord] -- ^ Input words to separate with dashes + -> Text -- ^ The dasherized 'Text' dasherize = T.intercalate "-" . fmap (unSomeWord T.toLower) diff --git a/Text/Inflections/Data.hs b/Text/Inflections/Data.hs index 50b91b2..84922f0 100644 --- a/Text/Inflections/Data.hs +++ b/Text/Inflections/Data.hs @@ -9,19 +9,25 @@ -- -- Auxiliary data used in the library. -module Text.Inflections.Data where +module Text.Inflections.Data + ( Transliterations + , defaultTransliterations ) +where import Data.HashMap.Strict (HashMap) import qualified Data.HashMap.Strict as M --- |A 'Data.Map.Map' containing mappings from international characters to +-- | A 'HashMap' containing mappings from international characters to -- sequences approximating these characters within the ASCII range. type Transliterations = HashMap Char String --- |These default transliterations stolen from the Ruby i18n library - see +-- | These default transliterations are stolen from the Ruby i18n library - +-- see -- . -defaultMap :: Transliterations -defaultMap = M.fromList [ +-- +-- NOTE: before version 0.3.0.0 this was called @defaultMap@. +defaultTransliterations :: Transliterations +defaultTransliterations = M.fromList [ ('À', "A"), ('Á', "A"), ('Â', "A"), ('Ã', "A"), ('Ä', "A"), ('Å', "A"), ('Æ', "AE"), ('Ç', "C"), ('È', "E"), ('É', "E"), ('Ê', "E"), ('Ë', "E"), ('Ì', "I"), ('Í', "I"), ('Î', "I"), ('Ï', "I"), ('Ð', "D"), ('Ñ', "N"), diff --git a/Text/Inflections/Humanize.hs b/Text/Inflections/Humanize.hs index 6defa50..d6838b7 100644 --- a/Text/Inflections/Humanize.hs +++ b/Text/Inflections/Humanize.hs @@ -24,17 +24,17 @@ import qualified Data.Text as T import Control.Applicative #endif --- |Capitalizes the first word and turns underscores into spaces. Like +-- | Capitalize the first word and separate words with spaces. Like -- 'Text.Inflections.Titleize.titleize', this is meant for creating pretty -- output. -- --- >>> humanize [Word "foo", Acronym "bar", Word "bazz"] +-- >>> foo <- SomeWord <$> mkWord "foo" +-- >>> bar <- SomeWord <$> mkAcronym "bar" +-- >>> bazz <- SomeWord <$> mkWord "bazz" +-- >>> humanize [foo,bar,bazz] -- "Foo bar bazz" --- --- Note that as of version 0.3.0.0 @Word@ and @Acronym@ constructors are --- hidden, but you still can construct them with 'mkWord' and 'mkAcronym'. humanize - :: [SomeWord] -- ^ List of Words, first of which will be capitalized + :: [SomeWord] -- ^ List of words, first of which will be capitalized -> Text -- ^ The humanized output humanize xs' = case unSomeWord (T.replace "_" " ") <$> xs' of diff --git a/Text/Inflections/Ordinal.hs b/Text/Inflections/Ordinal.hs index ab424de..bd8cfcc 100644 --- a/Text/Inflections/Ordinal.hs +++ b/Text/Inflections/Ordinal.hs @@ -12,14 +12,26 @@ {-# LANGUAGE OverloadedStrings #-} module Text.Inflections.Ordinal - ( ordinal - , ordinalize ) + ( ordinalize + , ordinal ) where import Data.Monoid ((<>)) import Data.Text (Text) import qualified Data.Text as T +-- |Turns a number into an ordinal string used to denote the position in an +-- ordered sequence such as 1st, 2nd, 3rd, 4th. +-- +-- >>> ordinalize 1 +-- "1st" +-- >>> ordinalize 2 +-- "2nd" +-- >>> ordinalize 10 +-- "10th" +ordinalize :: (Integral a, Show a) => a -> Text +ordinalize n = T.pack (show n) <> ordinal n + -- |Returns the suffix that should be added to a number to denote the position -- in an ordered sequence such as 1st, 2nd, 3rd, 4th. -- @@ -39,16 +51,3 @@ ordinal number where abs_number = abs number remainder10 = abs_number `mod` 10 remainder100 = abs_number `mod` 100 - --- |Turns a number into an ordinal string used to denote the position in an --- ordered sequence such as 1st, 2nd, 3rd, 4th. --- --- >>> ordinalize 1 --- "1st" --- >>> ordinalize 2 --- "2nd" --- >>> ordinalize 10 --- "10th" -ordinalize :: (Integral a, Show a) => a -> Text -ordinalize n = T.pack (show n) <> ordinal n -{-# INLINE ordinalize #-} diff --git a/Text/Inflections/Parameterize.hs b/Text/Inflections/Parameterize.hs index 7e96e81..23ab6c5 100644 --- a/Text/Inflections/Parameterize.hs +++ b/Text/Inflections/Parameterize.hs @@ -27,13 +27,13 @@ import qualified Data.Text as T import Control.Applicative #endif --- |Replaces special characters in a string so that it may be used as part of a --- 'pretty' URL. Uses the default transliterations in this library. +-- | Replace special characters in a string so that it may be used as part +-- of a 'pretty' URL. Uses the 'defaultTransliterations'. parameterize :: Text -> Text -parameterize = parameterizeCustom defaultMap +parameterize = parameterizeCustom defaultTransliterations {-# INLINE parameterize #-} --- |Transliterate 'Text' with a custom transliteration table. +-- | Transliterate 'Text' with a custom transliteration table. parameterizeCustom :: Transliterations -> Text -> Text parameterizeCustom m txt = (T.intercalate "-" . T.words) (T.unfoldr f ("", txt)) where diff --git a/Text/Inflections/Parse/CamelCase.hs b/Text/Inflections/Parse/CamelCase.hs index fcb4663..6b40bd4 100644 --- a/Text/Inflections/Parse/CamelCase.hs +++ b/Text/Inflections/Parse/CamelCase.hs @@ -30,11 +30,14 @@ import Prelude hiding (Word) -- | Parse a CamelCase string. -- --- >>> parseCamelCase ["Bar"] "FooBarBazz" +-- >>> bar <- mkAcronym "bar" +-- >>> parseCamelCase [bar] "FooBarBazz" -- Right [Word "Foo",Acronym "Bar",Word "Bazz"] +-- -- >>> parseCamelCase [] "foo_bar_bazz" --- Left "(unknown)" (line 1, column 4): +-- 1:4: -- unexpected '_' +-- expecting end of input, lowercase letter, or uppercase letter parseCamelCase :: [Word 'Acronym] -- ^ Collection of acronyms -> Text -- ^ Input diff --git a/Text/Inflections/Parse/SnakeCase.hs b/Text/Inflections/Parse/SnakeCase.hs index ae01ce2..9650f81 100644 --- a/Text/Inflections/Parse/SnakeCase.hs +++ b/Text/Inflections/Parse/SnakeCase.hs @@ -34,8 +34,9 @@ import Prelude hiding (Word) -- Right [Word "foo",Acronym "bar",Word "bazz"] -- -- >>> parseSnakeCase [] "fooBarBazz" --- Left "(unknown)" (line 1, column 4): +-- 1:4: -- unexpected 'B' +-- expecting '_', end of input, or lowercase letter parseSnakeCase :: [Word 'Acronym] -- ^ Collection of acronyms -> Text -- ^ Input diff --git a/Text/Inflections/Titleize.hs b/Text/Inflections/Titleize.hs index 8f0d4d0..0d9c0ef 100644 --- a/Text/Inflections/Titleize.hs +++ b/Text/Inflections/Titleize.hs @@ -19,9 +19,12 @@ import qualified Data.Text as T -- | Capitalize all the Words in the input list. -- --- >>> titleize [ Word "foo", Acronym "bar", Word "bazz" ] +-- >>> foo <- SomeWord <$> mkWord "foo" +-- >>> bar <- SomeWord <$> mkAcronym "bar" +-- >>> bazz <- SomeWord <$> mkWord "bazz" +-- >>> titleize [foo,bar,bazz] -- "Foo bar Bazz" titleize - :: [SomeWord] -- ^ List of Words, first of which will be capitalized - -> Text -- ^ The titleized String + :: [SomeWord] -- ^ List of words, first of which will be capitalized + -> Text -- ^ The titleized 'Text' titleize = T.unwords . fmap (unSomeWord T.toTitle) diff --git a/Text/Inflections/Transliterate.hs b/Text/Inflections/Transliterate.hs index 8aa5b6c..21f3254 100644 --- a/Text/Inflections/Transliterate.hs +++ b/Text/Inflections/Transliterate.hs @@ -27,15 +27,14 @@ import qualified Data.Text as T import Control.Applicative #endif --- |Returns a String after default approximations for changing Unicode +-- | Returns a 'Text' after default approximations for changing Unicode -- characters to a valid ASCII range are applied. If you want to supplement -- the default approximations with your own, you should use the -- 'transliterateCustom' function instead of 'transliterate'. transliterate :: Text -> Text -transliterate = transliterateCustom "?" defaultMap -{-# INLINE transliterate #-} +transliterate = transliterateCustom "?" defaultTransliterations --- |Returns a String after default approximations for changing Unicode +-- | Returns a 'Text' after default approximations for changing Unicode -- characters to a valid ASCII range are applied. transliterateCustom :: String -- ^ The default replacement diff --git a/Text/Inflections/Underscore.hs b/Text/Inflections/Underscore.hs index 3f80fce..9848d76 100644 --- a/Text/Inflections/Underscore.hs +++ b/Text/Inflections/Underscore.hs @@ -19,11 +19,14 @@ import Data.Text (Text) import Text.Inflections.Types import qualified Data.Text as T --- |Turns a CamelCase string into an underscore_separated 'Text'. +-- | Separate given words by underscores. -- --- >>> underscore [ Word "foo", Acronym "bar", Word "bazz" ] +-- >>> foo <- SomeWord <$> mkWord "foo" +-- >>> bar <- SomeWord <$> mkAcronym "bar" +-- >>> bazz <- SomeWord <$> mkWord "bazz" +-- >>> underscore [foo,bar,bazz] -- "foo_bar_bazz" underscore - :: [SomeWord] -- ^ Input Words to separate with underscores + :: [SomeWord] -- ^ Input words to separate with underscores -> Text -- ^ The underscored String underscore = T.intercalate "_" . fmap (unSomeWord T.toLower) From d9f226a81d5b04e39717e5d4307a7de5d816e076 Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Wed, 21 Dec 2016 15:16:00 +0300 Subject: [PATCH 06/12] =?UTF-8?q?Update=20the=20=E2=80=9CCHANGELOG.md?= =?UTF-8?q?=E2=80=9D=20file?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 41d06bf..caa0fbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,14 +1,18 @@ ## Inflections 0.3.0.0 +* A more type-safe API forbidding creation of invalid words. + +* Made the API use `Text` instead of `String` (which significally improved + speed). + +* Switched to Megaparsec 5 for parsing. + +* Renamed `defaultMap` to `defaultTransliterations`. + * Added the `CHANGELOG.md` file. * Switched test suite to Hspec. -* Switched to Megaparsec 5 for parsing. - -* Made the API use `Text` instead of `String` (which significally improved - speed). - * The `toUnderscore`, `toDashed`, and `toCamelCased` are not partial anymore, now they operate in `MonadThrow`. From 48e14a31ff3b988586fc425345cb16ed9ad403f2 Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Wed, 21 Dec 2016 15:29:07 +0300 Subject: [PATCH 07/12] Fix the build Oldish GHC does not understand @since. --- Text/Inflections/Types.hs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Text/Inflections/Types.hs b/Text/Inflections/Types.hs index 011b22e..c27ecde 100644 --- a/Text/Inflections/Types.hs +++ b/Text/Inflections/Types.hs @@ -46,7 +46,7 @@ import Prelude hiding (Word) -- | Create a word from given 'Text'. The input should not contain spaces or -- 'InflectionInvalidWord' will be thrown. -- --- @since 0.3.0.0 +-- /since 0.3.0.0/ mkWord :: MonadThrow m => Text -> m (Word 'Normal) mkWord txt = @@ -58,7 +58,7 @@ mkWord txt = -- spaces or 'InflectionInvalidAcronym' will be thrown. Acronym is different -- from normal word by that it may not be transformed by inflections. -- --- @since 0.3.0.0 +-- /since 0.3.0.0/ mkAcronym :: MonadThrow m => Text -> m (Word 'Acronym) mkAcronym txt = @@ -79,13 +79,13 @@ instance Show (Word 'Acronym) where -- | A type-level tag for words. -- --- @since 0.3.0.0 +-- /since 0.3.0.0/ data WordType = Normal | Acronym -- | Get a 'Text' value from 'Word'. -- --- @since 0.3.0.0 +-- /since 0.3.0.0/ unWord :: Word t -> Text unWord (Word s) = s @@ -94,7 +94,7 @@ unWord (Word s) = s -- list for example. The only thing that receiver of 'SomeWord' can do is to -- apply 'unWord' on it, of course. This is faciliated by 'unSomeWord'. -- --- @since 0.3.0.0 +-- /since 0.3.0.0/ data SomeWord where SomeWord :: (Transformable (Word t), Show (Word t)) => Word t -> SomeWord @@ -109,7 +109,7 @@ instance Show SomeWord where -- | Extract 'Text' from 'SomeWord' and apply given function only if the -- word inside wasn't an acronym. -- --- @since 0.3.0.0 +-- /since 0.3.0.0/ unSomeWord :: (Text -> Text) -> SomeWord -> Text unSomeWord f (SomeWord w) = transform f w @@ -127,7 +127,7 @@ instance Transformable (Word 'Acronym) where -- | The exceptions that is thrown when parsing of input fails. -- --- @since 0.3.0.0 +-- /since 0.3.0.0/ data InflectionException = InflectionParsingFailed (ParseError Char Dec) From 2a2d02847e86a3a8633d85b34e15957c30dd870f Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Wed, 21 Dec 2016 15:54:27 +0300 Subject: [PATCH 08/12] Introduce a more flexible error handling --- CHANGELOG.md | 4 ++- Text/Inflections.hs | 60 +++++++++++++++++++----------------- test/Text/InflectionsSpec.hs | 10 +++--- 3 files changed, 40 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index caa0fbf..3be9498 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,9 @@ * Switched test suite to Hspec. * The `toUnderscore`, `toDashed`, and `toCamelCased` are not partial - anymore, now they operate in `MonadThrow`. + anymore. They return parse error in `Left` just like parsing functions, + but this result can be lifted into any instance of `MonadThrow` with + `betterThrow` helper. * Improved documentation. diff --git a/Text/Inflections.hs b/Text/Inflections.hs index 1f2fd0d..ef64b6e 100644 --- a/Text/Inflections.hs +++ b/Text/Inflections.hs @@ -48,10 +48,9 @@ -- Right [Word "foo",Word "bar"] -- -- We can chain together the tokenization of the input String and the --- transformation to CamelCase by using 'Control.Monad.LiftM': +-- transformation to CamelCase by using 'fmap': -- --- >>> import Control.Monad (liftM) --- >>> liftM camelize $ parseSnakeCase "foo_bar" +-- >>> camelize <$> parseSnakeCase [] "foo_bar" -- Right "FooBar" -- -- By separating out the tokenization from the application of inflections, @@ -110,10 +109,10 @@ module Text.Inflections -- * Often used combinators , toUnderscore , toDashed - , toCamelCased ) + , toCamelCased + , betterThrow ) where -import Control.Monad (liftM) import Control.Monad.Catch (MonadThrow (..)) import Data.Text (Text) import Text.Inflections.Camelize (camelize, camelizeCustom) @@ -134,43 +133,48 @@ import Text.Megaparsec import Prelude hiding (Word) #endif --- | Transforms CamelCasedString to snake_cased_string_with_underscores. In --- case of failed parsing 'InflectionException' is thrown. +-- | Transforms CamelCasedString to snake_cased_string_with_underscores. +-- +-- > toUnderscore = fmap underscore . parseCamelCase [] -- -- >>> toUnderscore "FooBarBazz" -- "foo_bar_bazz" -toUnderscore :: MonadThrow m => Text -> m Text -toUnderscore = liftM underscore . handleEither . parseCamelCase [] -{-# INLINE toUnderscore #-} +toUnderscore :: Text -> Either (ParseError Char Dec) Text +toUnderscore = fmap underscore . parseCamelCase [] --- | Transforms CamelCasedString to snake-cased-string-with-dashes. In case --- of failed parsing 'InflectionException' is thrown. +-- | Transforms CamelCasedString to snake-cased-string-with-dashes. +-- +-- > toDashed = fmap dasherize . parseCamelCase [] -- -- >>> toDashed "FooBarBazz" -- "foo-bar-bazz" -toDashed :: MonadThrow m => Text -> m Text -toDashed = liftM dasherize . handleEither . parseCamelCase [] -{-# INLINE toDashed #-} +toDashed :: Text -> Either (ParseError Char Dec) Text +toDashed = fmap dasherize . parseCamelCase [] -- | Transforms underscored_text to CamelCasedText. If first argument is -- 'True' then FirstCharacter in result string will be in upper case. If --- 'False' then firstCharacter will be in lower case. In case of failed --- parsing 'InflectionException' is thrown. +-- 'False' then firstCharacter will be in lower case. +-- +-- > toCamelCased t = fmap (camelizeCustom t) . parseSnakeCase [] -- -- >>> toCamelCased True "foo_bar_bazz" -- "FooBarBazz" -- >>> toCamelCased False "foo_bar_bazz" -- "fooBarBazz" -toCamelCased :: MonadThrow m - => Bool -- ^ Capitalize the first character +toCamelCased + :: Bool -- ^ Capitalize the first character -> Text -- ^ Input - -> m Text -- ^ Ouput -toCamelCased t = liftM (camelizeCustom t) . handleEither . parseSnakeCase [] -{-# INLINE toCamelCased #-} + -> Either (ParseError Char Dec) Text -- ^ Ouput +toCamelCased t = fmap (camelizeCustom t) . parseSnakeCase [] --- | Take an 'Either' that can contain a parser error and throw it if --- necessary. If everything is OK, just return 'Right' value. -handleEither :: MonadThrow m => Either (ParseError Char Dec) a -> m a -handleEither (Left err) = throwM (InflectionParsingFailed err) -handleEither (Right x) = return x -{-# INLINE handleEither #-} +-- | Lift something of type @'Either' ('ParseError' 'Char' 'Dec') a@ to +-- an instance of 'MonadThrow'. Useful when you want to shortcut on parsing +-- failures and you're in an instance of 'MonadThrow'. +-- +-- This throws 'InflectionParsingFailed' if given value is inside 'Left'. +-- +-- /since 0.3.0.0/ + +betterThrow :: MonadThrow m => Either (ParseError Char Dec) a -> m a +betterThrow (Left err) = throwM (InflectionParsingFailed err) +betterThrow (Right x) = return x diff --git a/test/Text/InflectionsSpec.hs b/test/Text/InflectionsSpec.hs index e95a796..fd9b335 100644 --- a/test/Text/InflectionsSpec.hs +++ b/test/Text/InflectionsSpec.hs @@ -9,16 +9,16 @@ spec :: Spec spec = do describe "toUnderscore" $ do it "converts camel case to snake case" $ - toUnderscore "camelCasedText" `shouldReturn` "camel_cased_text" + toUnderscore "camelCasedText" `shouldBe` Right "camel_cased_text" it "converts camel case to snake case with numbers" $ - toUnderscore "ipv4Address" `shouldReturn` "ipv4_address" + toUnderscore "ipv4Address" `shouldBe` Right "ipv4_address" describe "toDashed" $ it "converts camel case to dashed" $ - toDashed "camelCasedText" `shouldReturn` "camel-cased-text" + toDashed "camelCasedText" `shouldBe` Right "camel-cased-text" describe "toCamelCased" $ do context "when the first argument is False" $ it "converts snake case to camel case" $ - toCamelCased False "underscored_text" `shouldReturn` "underscoredText" + toCamelCased False "underscored_text" `shouldBe` Right "underscoredText" context "when the first argument is True" $ it "converts snake case to camel case" $ - toCamelCased True "underscored_text" `shouldReturn` "UnderscoredText" + toCamelCased True "underscored_text" `shouldBe` Right "UnderscoredText" From 0a43bc06903db88d27b33249766535efb374e61c Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Wed, 21 Dec 2016 16:17:53 +0300 Subject: [PATCH 09/12] Add more tests --- inflections.cabal | 5 ++++- stack.yaml | 3 +++ test/Text/Inflections/Parse/CamelCaseSpec.hs | 8 ++++++++ test/Text/Inflections/Parse/SnakeCaseSpec.hs | 8 ++++++++ test/Text/InflectionsSpec.hs | 19 +++++++++++++++++-- 5 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 test/Text/Inflections/Parse/CamelCaseSpec.hs create mode 100644 test/Text/Inflections/Parse/SnakeCaseSpec.hs diff --git a/inflections.cabal b/inflections.cabal index 7ed9b17..12ce9ef 100644 --- a/inflections.cabal +++ b/inflections.cabal @@ -62,7 +62,8 @@ test-suite test , QuickCheck >= 2.7.6 && < 3.0 , base >= 4.6 && < 5.0 , hspec >= 2.0 && < 3.0 - , megaparsec >= 5.0 && < 6.0 + , hspec-megaparsec >= 0.3 && < 0.4 + , megaparsec >= 5.1 && < 6.0 , text >= 0.2 && < 1.3 if flag(dev) ghc-options: -Wall -Werror @@ -73,6 +74,8 @@ test-suite test , Text.Inflections.HumanizeSpec , Text.Inflections.OrdinalSpec , Text.Inflections.ParametrizeSpec + , Text.Inflections.Parse.CamelCaseSpec + , Text.Inflections.Parse.SnakeCaseSpec , Text.Inflections.TitleizeSpec , Text.Inflections.TransliterateSpec , Text.Inflections.UnderscoreSpec diff --git a/stack.yaml b/stack.yaml index 36801c9..9085c49 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,3 +1,6 @@ resolver: lts-7.13 packages: - '.' +extra-deps: +- hspec-megaparsec-0.3.0 +- megaparsec-5.1.2 diff --git a/test/Text/Inflections/Parse/CamelCaseSpec.hs b/test/Text/Inflections/Parse/CamelCaseSpec.hs new file mode 100644 index 0000000..ac2d2f3 --- /dev/null +++ b/test/Text/Inflections/Parse/CamelCaseSpec.hs @@ -0,0 +1,8 @@ +module Text.Inflections.Parse.CamelCaseSpec + ( spec ) +where + +import Test.Hspec + +spec :: Spec +spec = return () diff --git a/test/Text/Inflections/Parse/SnakeCaseSpec.hs b/test/Text/Inflections/Parse/SnakeCaseSpec.hs new file mode 100644 index 0000000..d4fb069 --- /dev/null +++ b/test/Text/Inflections/Parse/SnakeCaseSpec.hs @@ -0,0 +1,8 @@ +module Text.Inflections.Parse.SnakeCaseSpec + ( spec ) +where + +import Test.Hspec + +spec :: Spec +spec = return () diff --git a/test/Text/InflectionsSpec.hs b/test/Text/InflectionsSpec.hs index fd9b335..2b0f739 100644 --- a/test/Text/InflectionsSpec.hs +++ b/test/Text/InflectionsSpec.hs @@ -3,22 +3,37 @@ module Text.InflectionsSpec (spec) where import Test.Hspec -import Text.Inflections (toUnderscore, toDashed, toCamelCased) +import Test.QuickCheck +import Text.Inflections spec :: Spec spec = do + describe "toUnderscore" $ do it "converts camel case to snake case" $ toUnderscore "camelCasedText" `shouldBe` Right "camel_cased_text" it "converts camel case to snake case with numbers" $ toUnderscore "ipv4Address" `shouldBe` Right "ipv4_address" + describe "toDashed" $ it "converts camel case to dashed" $ toDashed "camelCasedText" `shouldBe` Right "camel-cased-text" + describe "toCamelCased" $ do context "when the first argument is False" $ it "converts snake case to camel case" $ toCamelCased False "underscored_text" `shouldBe` Right "underscoredText" context "when the first argument is True" $ - it "converts snake case to camel case" $ + it "converts snake case to camel case with the first word capitalized" $ toCamelCased True "underscored_text" `shouldBe` Right "UnderscoredText" + + describe "betterThrow" $ do + context "when given a parse error" $ + it "throws the correct exception" $ + property $ \err -> + betterThrow (Left err) `shouldThrow` + (== InflectionParsingFailed err) + context "when given a value in Right" $ + it "returns the value" $ + property $ \x -> + betterThrow (Right x) `shouldReturn` (x :: Int) From 477a36de5cfbfe933d3a0d8177213bf3a41c4ace Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Wed, 21 Dec 2016 17:38:42 +0300 Subject: [PATCH 10/12] Make collection type for acronyms polymorphic --- CHANGELOG.md | 3 +++ Text/Inflections/Parse/CamelCase.hs | 14 +++++++++----- Text/Inflections/Parse/SnakeCase.hs | 14 +++++++++----- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3be9498..f70cef6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ * Renamed `defaultMap` to `defaultTransliterations`. +* `parseCamelCase` and `parseSnakeCase` take any instance of `Foldable` as a + collection of acronyms, not just lists. + * Added the `CHANGELOG.md` file. * Switched test suite to Hspec. diff --git a/Text/Inflections/Parse/CamelCase.hs b/Text/Inflections/Parse/CamelCase.hs index 6b40bd4..e491ae9 100644 --- a/Text/Inflections/Parse/CamelCase.hs +++ b/Text/Inflections/Parse/CamelCase.hs @@ -26,6 +26,8 @@ import qualified Data.Text as T #if MIN_VERSION_base(4,8,0) import Prelude hiding (Word) +#else +import Data.Foldable #endif -- | Parse a CamelCase string. @@ -38,21 +40,23 @@ import Prelude hiding (Word) -- 1:4: -- unexpected '_' -- expecting end of input, lowercase letter, or uppercase letter -parseCamelCase - :: [Word 'Acronym] -- ^ Collection of acronyms +parseCamelCase :: (Foldable f, Functor f) + => f (Word 'Acronym) -- ^ Collection of acronyms -> Text -- ^ Input -> Either (ParseError Char Dec) [SomeWord] -- ^ Result of parsing parseCamelCase acronyms = parse (parser acronyms) "" -parser - :: [Word 'Acronym] -- ^ Collection of acronyms +parser :: (Foldable f, Functor f) + => f (Word 'Acronym) -- ^ Collection of acronyms -> Parser [SomeWord] -- ^ CamelCase parser parser acronyms = many (a <|> n) <* eof where n = SomeWord <$> word a = SomeWord <$> acronym acronyms -acronym :: [Word 'Acronym] -> Parser (Word 'Acronym) +acronym :: (Foldable f, Functor f) + => f (Word 'Acronym) + -> Parser (Word 'Acronym) acronym acronyms = do x <- T.pack <$> choice (string . T.unpack . unWord <$> acronyms) case mkAcronym x of diff --git a/Text/Inflections/Parse/SnakeCase.hs b/Text/Inflections/Parse/SnakeCase.hs index 9650f81..b64c9b1 100644 --- a/Text/Inflections/Parse/SnakeCase.hs +++ b/Text/Inflections/Parse/SnakeCase.hs @@ -25,6 +25,8 @@ import qualified Data.Text as T #if MIN_VERSION_base(4,8,0) import Prelude hiding (Word) +#else +import Data.Foldable #endif -- | Parse a snake_case string. @@ -37,21 +39,23 @@ import Prelude hiding (Word) -- 1:4: -- unexpected 'B' -- expecting '_', end of input, or lowercase letter -parseSnakeCase - :: [Word 'Acronym] -- ^ Collection of acronyms +parseSnakeCase :: (Foldable f, Functor f) + => f (Word 'Acronym) -- ^ Collection of acronyms -> Text -- ^ Input -> Either (ParseError Char Dec) [SomeWord] -- ^ Result of parsing parseSnakeCase acronyms = parse (parser acronyms) "" -parser - :: [Word 'Acronym] +parser :: (Foldable f, Functor f) + => f (Word 'Acronym) -> Parser [SomeWord] parser acronyms = ((a <|> n) `sepBy` char '_') <* eof where n = SomeWord <$> word a = SomeWord <$> acronym acronyms -acronym :: [Word 'Acronym] -> Parser (Word 'Acronym) +acronym :: (Foldable f, Functor f) + => f (Word 'Acronym) + -> Parser (Word 'Acronym) acronym acronyms = do x <- T.pack <$> choice (string . T.unpack . unWord <$> acronyms) case mkAcronym x of From 4d3bb48d7bac7f9d81254d028da74a8213f79833 Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Wed, 21 Dec 2016 18:49:44 +0300 Subject: [PATCH 11/12] Parser tests and fixes --- CHANGELOG.md | 7 ++++ Text/Inflections/Parse/CamelCase.hs | 1 + Text/Inflections/Parse/SnakeCase.hs | 30 +++++--------- Text/Inflections/Types.hs | 3 ++ test/Text/Inflections/Parse/CamelCaseSpec.hs | 41 +++++++++++++++++++- test/Text/Inflections/Parse/SnakeCaseSpec.hs | 29 +++++++++++++- 6 files changed, 89 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f70cef6..26b542d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,13 @@ * Renamed `defaultMap` to `defaultTransliterations`. +* Words now can contain digits (recognized by all parsers). + +* `parseSnakeCase` now is not confused when a word happens to have prefix + coinciding with an acronym. This is harder to fix for `parseCamelCase` + because acronym may contain capital letters, so old behavior is preserved + for `parseCamelCase` for now. + * `parseCamelCase` and `parseSnakeCase` take any instance of `Foldable` as a collection of acronyms, not just lists. diff --git a/Text/Inflections/Parse/CamelCase.hs b/Text/Inflections/Parse/CamelCase.hs index e491ae9..340d93b 100644 --- a/Text/Inflections/Parse/CamelCase.hs +++ b/Text/Inflections/Parse/CamelCase.hs @@ -28,6 +28,7 @@ import qualified Data.Text as T import Prelude hiding (Word) #else import Data.Foldable +import Prelude hiding (elem) #endif -- | Parse a CamelCase string. diff --git a/Text/Inflections/Parse/SnakeCase.hs b/Text/Inflections/Parse/SnakeCase.hs index b64c9b1..19d6a24 100644 --- a/Text/Inflections/Parse/SnakeCase.hs +++ b/Text/Inflections/Parse/SnakeCase.hs @@ -27,6 +27,7 @@ import qualified Data.Text as T import Prelude hiding (Word) #else import Data.Foldable +import Prelude hiding (elem) #endif -- | Parse a snake_case string. @@ -48,25 +49,14 @@ parseSnakeCase acronyms = parse (parser acronyms) "" parser :: (Foldable f, Functor f) => f (Word 'Acronym) -> Parser [SomeWord] -parser acronyms = ((a <|> n) `sepBy` char '_') <* eof - where - n = SomeWord <$> word - a = SomeWord <$> acronym acronyms +parser acronyms = (pWord acronyms `sepBy` char '_') <* eof -acronym :: (Foldable f, Functor f) +pWord :: (Foldable f, Functor f) => f (Word 'Acronym) - -> Parser (Word 'Acronym) -acronym acronyms = do - x <- T.pack <$> choice (string . T.unpack . unWord <$> acronyms) - case mkAcronym x of - Nothing -> empty -- cannot happen if the system is sound - Just acr -> return acr -{-# INLINE acronym #-} - -word :: Parser (Word 'Normal) -word = do - x <- T.pack <$> (some lowerChar <|> some digitChar) - case mkWord x of - Nothing -> empty -- cannot happen if the system is sound - Just wrd -> return wrd -{-# INLINE word #-} + -> Parser SomeWord +pWord acronyms = do + let acs = unWord <$> acronyms + r <- T.pack <$> some alphaNumChar + if r `elem` acs + then maybe empty (return . SomeWord) (mkAcronym r) + else maybe empty (return . SomeWord) (mkWord r) diff --git a/Text/Inflections/Types.hs b/Text/Inflections/Types.hs index c27ecde..33b82c2 100644 --- a/Text/Inflections/Types.hs +++ b/Text/Inflections/Types.hs @@ -106,6 +106,9 @@ data SomeWord where instance Show SomeWord where show (SomeWord w) = show w +instance Eq SomeWord where + x == y = unSomeWord id x == unSomeWord id y + -- | Extract 'Text' from 'SomeWord' and apply given function only if the -- word inside wasn't an acronym. -- diff --git a/test/Text/Inflections/Parse/CamelCaseSpec.hs b/test/Text/Inflections/Parse/CamelCaseSpec.hs index ac2d2f3..b4ae233 100644 --- a/test/Text/Inflections/Parse/CamelCaseSpec.hs +++ b/test/Text/Inflections/Parse/CamelCaseSpec.hs @@ -1,8 +1,47 @@ +{-# LANGUAGE OverloadedStrings #-} + module Text.Inflections.Parse.CamelCaseSpec ( spec ) where import Test.Hspec +import Test.Hspec.Megaparsec +import Text.Inflections spec :: Spec -spec = return () +spec = + describe "parseCamelCase" $ do + context "when given no acronyms" $ do + context "when first word is capitalized" $ + it "parses CamelCase correctly" $ do + r <- mapM (fmap SomeWord . mkWord) ["One","Two","Three"] + parseCamelCase [] "OneTwoThree" `shouldParse` r + context "when first word is not capitalized" $ + it "parses camelCase correctly" $ do + r <- mapM (fmap SomeWord . mkWord) ["one","Two","Three"] + parseCamelCase [] "oneTwoThree" `shouldParse` r + context "when there are digits in the words" $ + it "parses CamelCase correctly" $ do + r <- mapM (fmap SomeWord . mkWord) ["one1two","Three3"] + parseCamelCase [] "one1twoThree3" `shouldParse` r + context "when given some acronyms" $ do + context "when first word is capitalized" $ + it "parses CamelCase correctly" $ do + a <- mkAcronym "BOO" + r <- mapM (fmap SomeWord . mkWord) ["One","BOO","One"] + parseCamelCase [a] "OneBOOOne" `shouldParse` r + context "when first word is not capitalized" $ + it "parses camelCase correctly" $ do + a <- mkAcronym "BOO" + r <- mapM (fmap SomeWord . mkWord) ["one","Two","BOO"] + parseCamelCase [a] "oneTwoBOO" `shouldParse` r + context "when there are digits in the words" $ + it "parses CamelCase correctly" $ do + a <- mkAcronym "BOO" + r <- mapM (fmap SomeWord . mkWord) ["one1two","Three3"] + parseCamelCase [a] "one1twoThree3" `shouldParse` r + context "when a word has a suffix coinciding with a acronym" $ + it "is still parsed correctly as a normala word" $ do + a <- mkAcronym "boo" + r <- mapM (fmap SomeWord . mkWord) ["fooboo","Bar"] + parseCamelCase [a] "foobooBar" `shouldParse` r diff --git a/test/Text/Inflections/Parse/SnakeCaseSpec.hs b/test/Text/Inflections/Parse/SnakeCaseSpec.hs index d4fb069..c8494c2 100644 --- a/test/Text/Inflections/Parse/SnakeCaseSpec.hs +++ b/test/Text/Inflections/Parse/SnakeCaseSpec.hs @@ -1,8 +1,35 @@ +{-# LANGUAGE OverloadedStrings #-} + module Text.Inflections.Parse.SnakeCaseSpec ( spec ) where import Test.Hspec +import Test.Hspec.Megaparsec +import Text.Inflections spec :: Spec -spec = return () +spec = + describe "parseSnakeCase" $ do + context "when given no acronyms" $ do + it "parses snake_case correctly" $ do + r <- mapM (fmap SomeWord . mkWord) ["OneTwo","Three"] + parseSnakeCase [] "OneTwo_Three" `shouldParse` r + it "handles digits in the words correctly" $ do + r <- mapM (fmap SomeWord . mkWord) ["one4a","two00"] + parseSnakeCase [] "one4a_two00" `shouldParse` r + context "when given some acronyms" $ do + it "parses snake_case correctly" $ do + a <- mkAcronym "BOO" + r <- mapM (fmap SomeWord . mkWord) ["BOO","one","one"] + parseSnakeCase [a] "BOO_one_one" `shouldParse` r + context "when acronym happens to be a prefix of other word" $ + it "parses the word correctly" $ do + a <- mkAcronym "BOO" + r <- mapM (fmap SomeWord . mkWord) ["one","BOOtwo"] + parseSnakeCase [a] "one_BOOtwo" `shouldParse` r + context "when acronym happens to be a suffix of other word" $ + it "parses the word correctly" $ do + a <- mkAcronym "BOO" + r <- mapM (fmap SomeWord . mkWord) ["oneBOO","two"] + parseSnakeCase [a] "oneBOO_two" `shouldParse` r From 2348bb717c36a9c668b9b3780fbc6c6f19213830 Mon Sep 17 00:00:00 2001 From: mrkkrp Date: Wed, 21 Dec 2016 20:01:14 +0300 Subject: [PATCH 12/12] =?UTF-8?q?Better=20define=20what=20a=20word=20is,?= =?UTF-8?q?=20tests=20for=20=E2=80=98Types.hs=E2=80=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Text/Inflections/Types.hs | 24 +++++++------ inflections.cabal | 1 + test/Text/Inflections/HumanizeSpec.hs | 5 +-- test/Text/Inflections/TypesSpec.hs | 51 +++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 13 deletions(-) create mode 100644 test/Text/Inflections/TypesSpec.hs diff --git a/Text/Inflections/Types.hs b/Text/Inflections/Types.hs index 33b82c2..06b42f5 100644 --- a/Text/Inflections/Types.hs +++ b/Text/Inflections/Types.hs @@ -31,7 +31,7 @@ module Text.Inflections.Types where import Control.Monad.Catch -import Data.Char (isSpace) +import Data.Char (isAlphaNum) import Data.Data (Data) import Data.Text (Text) import Data.Typeable (Typeable) @@ -43,28 +43,30 @@ import qualified Data.Text as T import Prelude hiding (Word) #endif --- | Create a word from given 'Text'. The input should not contain spaces or +-- | Create a word from given 'Text'. The input should consist of only +-- alpha-numeric characters (no white spaces or punctuation) -- 'InflectionInvalidWord' will be thrown. -- -- /since 0.3.0.0/ mkWord :: MonadThrow m => Text -> m (Word 'Normal) mkWord txt = - if T.any isSpace txt - then throwM (InflectionInvalidWord txt) - else return (Word txt) + if T.all isAlphaNum txt + then return (Word txt) + else throwM (InflectionInvalidWord txt) --- | Create an acronym from given 'Text'. The input should not contain --- spaces or 'InflectionInvalidAcronym' will be thrown. Acronym is different --- from normal word by that it may not be transformed by inflections. +-- | Create an acronym from given 'Text'. The input should consist of only +-- alpha-numeric characters 'InflectionInvalidAcronym' will be thrown. +-- Acronym is different from normal word by that it may not be transformed +-- by inflections (also see 'unSomeWord'). -- -- /since 0.3.0.0/ mkAcronym :: MonadThrow m => Text -> m (Word 'Acronym) mkAcronym txt = - if T.any isSpace txt - then throwM (InflectionInvalidAcronym txt) - else return (Word txt) + if T.all isAlphaNum txt + then return (Word txt) + else throwM (InflectionInvalidAcronym txt) -- | A 'Text' value that should be kept whole through applied inflections. diff --git a/inflections.cabal b/inflections.cabal index 12ce9ef..03b4bbb 100644 --- a/inflections.cabal +++ b/inflections.cabal @@ -78,5 +78,6 @@ test-suite test , Text.Inflections.Parse.SnakeCaseSpec , Text.Inflections.TitleizeSpec , Text.Inflections.TransliterateSpec + , Text.Inflections.TypesSpec , Text.Inflections.UnderscoreSpec , Text.InflectionsSpec diff --git a/test/Text/Inflections/HumanizeSpec.hs b/test/Text/Inflections/HumanizeSpec.hs index a31ece5..ed013c4 100644 --- a/test/Text/Inflections/HumanizeSpec.hs +++ b/test/Text/Inflections/HumanizeSpec.hs @@ -19,8 +19,9 @@ spec = describe "humazine" $ do humanize [employee,salary] `shouldBe` "Employee salary" it "turns underscores into spaces" $ do employee <- SomeWord <$> mkWord "employee" - hasSalary <- SomeWord <$> mkWord "has_salary" - humanize [employee, hasSalary] `shouldBe` "Employee has salary" + has <- SomeWord <$> mkWord "has" + salary <- SomeWord <$> mkWord "salary" + humanize [employee, has, salary] `shouldBe` "Employee has salary" it "capitalizes the first word of a sentence" $ do underground <- SomeWord <$> mkWord "underground" humanize [underground] `shouldBe` "Underground" diff --git a/test/Text/Inflections/TypesSpec.hs b/test/Text/Inflections/TypesSpec.hs new file mode 100644 index 0000000..e0b7205 --- /dev/null +++ b/test/Text/Inflections/TypesSpec.hs @@ -0,0 +1,51 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} + +module Text.Inflections.TypesSpec + ( spec ) +where + +import Test.Hspec +import Text.Inflections +import qualified Data.Text as T + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative +#endif + +spec :: Spec +spec = do + describe "mkWord" $ do + context "when provided a correct Text value" $ + it "creates a normal word" $ do + x <- mkWord "foo" + unWord x `shouldBe` "foo" + context "when provided an incorrect Text value" $ + it "throws the correct exception" $ do + mkWord "foo bar" `shouldThrow` (== InflectionInvalidWord "foo bar") + mkWord "foo_" `shouldThrow` (== InflectionInvalidWord "foo_") + mkWord "&$?%" `shouldThrow` (== InflectionInvalidWord "&$?%") + describe "mkAcronym" $ do + context "when provided a correct Text value" $ + it "creates an acronym" $ do + x <- mkAcronym "foo" + unWord x `shouldBe` "foo" + context "when providde an incorrect Text value" $ + it "throws the correct exception" $ do + mkAcronym "foo bar" `shouldThrow` (== InflectionInvalidAcronym "foo bar") + mkAcronym "foo_" `shouldThrow` (== InflectionInvalidAcronym "foo_") + mkAcronym "&$?%" `shouldThrow` (== InflectionInvalidAcronym "&$?%") + describe "unWord" $ + it "extracts the inner Text value" $ do + (unWord <$> mkWord "foo") `shouldReturn` "foo" + (unWord <$> mkWord "bar") `shouldReturn` "bar" + (unWord <$> mkAcronym "baz") `shouldReturn` "baz" + describe "unSomeWord" $ do + context "when inner value is a normal word" $ + it "Text is extracted and the given function applied" $ do + x <- SomeWord <$> mkWord "word" + unSomeWord T.toUpper x `shouldBe` "WORD" + context "when inner value is an acronym" $ + it "Text is extracted, but the function is not applied" $ do + x <- SomeWord <$> mkAcronym "acronym" + unSomeWord T.toUpper x `shouldBe` "acronym"