Merge pull request #38 from stackbuilders/refactoring-use-text

Major refactoring, switch to ‘Text’
2024-08-15 17:10:21 +03:00 · 2016-07-05 14:53:12 +04:00 · 2016-07-05 14:53:12 +04:00 · 03234409a0
commit 03234409a0
parent 4fdc9d6ac9 ad52094310
22 changed files with 292 additions and 293 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -6,6 +6,12 @@

 * Switched to Megaparsec 5 for parsing.

+* Made the API use `Text` instead of `String` (which significally improved
+  speed).
+
+* The `toUnderscore`, `toDashed`, and `toCamelCased` are not partial
+  anymore, now they operate in `MonadThrow`.
+
 * Improved documentation.

 ## Inflections 0.2.0.1
--- a/Text/Inflections.hs
+++ b/Text/Inflections.hs
@ -103,66 +103,57 @@ module Text.Inflections
    )
 where

-import Text.Inflections.Data ( Transliterations, defaultMap )
+import Control.Monad (liftM)
+import Control.Monad.Catch (MonadThrow (..))
+import Data.Text (Text)
+import Text.Inflections.Camelize (camelize, camelizeCustom)
+import Text.Inflections.Dasherize (dasherize)
+import Text.Inflections.Data (Transliterations, defaultMap)
+import Text.Inflections.Humanize (humanize)
+import Text.Inflections.Ordinal (ordinal, ordinalize)
+import Text.Inflections.Parameterize (parameterize, parameterizeCustom)
+import Text.Inflections.Parse.CamelCase (parseCamelCase)
+import Text.Inflections.Parse.SnakeCase (parseSnakeCase)
+import Text.Inflections.Parse.Types
+import Text.Inflections.Titleize (titleize)
+import Text.Inflections.Transliterate (transliterate, transliterateCustom)
+import Text.Inflections.Underscore (underscore)
+import Text.Megaparsec

-import Text.Inflections.Parameterize ( parameterize, parameterizeCustom )
-
-import Text.Inflections.Underscore ( underscore )
-
-import Text.Inflections.Camelize ( camelize, camelizeCustom )
-
-import Text.Inflections.Humanize ( humanize )
-
-import Text.Inflections.Titleize ( titleize )
-
-import Text.Inflections.Transliterate ( transliterate, transliterateCustom )
-
-import Text.Inflections.Dasherize ( dasherize )
-
-import Text.Inflections.Ordinal ( ordinal, ordinalize )
-
-import Text.Inflections.Parse.SnakeCase ( parseSnakeCase )
-
-import Text.Inflections.Parse.Types ( mapWord )
-
-import Text.Inflections.Parse.CamelCase ( parseCamelCase )
-
-import Data.Char ( toLower )
-
-- | Transforms CamelCasedString to
-- snake_cased_string_with_underscores. Throws exception if parsing failed
+-- | Transforms CamelCasedString to snake_cased_string_with_underscores. In
+-- case of failed parsing 'InflectionException' is thrown.
 --
 -- >>> toUnderscore "FooBarBazz"
 -- "foo_bar_bazz"
-toUnderscore :: String -> String
-toUnderscore =
-    underscore
-    . either (error .  ("toUnderscore: " ++) . show) id
-    . parseCamelCase []
+toUnderscore :: MonadThrow m => Text -> m Text
+toUnderscore = liftM underscore . handleEither . parseCamelCase []
+{-# INLINE toUnderscore #-}

-- | Transforms CamelCasedString to snake-cased-string-with-dashes. Throws
-- exception if parsing failed.
+-- | Transforms CamelCasedString to snake-cased-string-with-dashes. In case
+-- of failed parsing 'InflectionException' is thrown.
 --
 -- >>> toDashed "FooBarBazz"
 -- "foo-bar-bazz"
-toDashed :: String -> String
-toDashed =
-    dasherize
-    . map (mapWord (map toLower))
-    . either (error . ("toDashed: " ++) . show) id
-    . parseCamelCase []
+toDashed :: MonadThrow m => Text -> m Text
+toDashed = liftM dasherize . handleEither . parseCamelCase []
+{-# INLINE toDashed #-}

 -- | Transforms underscored_text to CamelCasedText. If first argument is
 -- 'True' then FirstCharacter in result string will be in upper case. If
-- 'False' then firstCharacter will be in lower case. Throws exception if
-- parsing failed
+-- 'False' then firstCharacter will be in lower case. In case of failed
+-- parsing 'InflectionException' is thrown.
 --
 -- >>> toCamelCased True "foo_bar_bazz"
 -- "FooBarBazz"
 -- >>> toCamelCased False "foo_bar_bazz"
 -- "fooBarBazz"
-toCamelCased :: Bool -> String -> String
-toCamelCased t =
-    camelizeCustom t
-    . either (error . ("toCamelCased: " ++) . show) id
-    . parseSnakeCase []
+toCamelCased :: MonadThrow m => Bool -> Text -> m Text
+toCamelCased t = liftM (camelizeCustom t) . handleEither . parseSnakeCase []
+{-# INLINE toCamelCased #-}
+
+-- | Take an 'Either' that can contain a parser error and throw it if
+-- necessary. If everything is OK, just return 'Right' value.
+handleEither :: MonadThrow m => Either (ParseError Char Dec) a -> m a
+handleEither (Left err) = throwM (InflectionParsingFailed err)
+handleEither (Right  x) = return x
+{-# INLINE handleEither #-}
--- a/Text/Inflections/Camelize.hs
+++ b/Text/Inflections/Camelize.hs
@ -9,13 +9,23 @@
 --
 -- Conversion to CamelCased phrases.

-module Text.Inflections.Camelize ( camelize, camelizeCustom ) where
+{-# LANGUAGE CPP               #-}
+{-# LANGUAGE OverloadedStrings #-}

-import Text.Inflections.Parse.Types (Word(..))
+module Text.Inflections.Camelize
+  ( camelize
+  , camelizeCustom )
+where

-import Data.Char (toUpper, toLower)
+import Data.Text (Text)
+import Text.Inflections.Parse.Types
+import qualified Data.Text as T

-import Prelude (String, Bool(..), concatMap, (.), zip, ($), repeat)
+#if MIN_VERSION_base(4,8,0)
+import Prelude hiding (Word)
+#else
+import Control.Applicative
+#endif

 -- |Turns a an input Word List in into CamelCase. Returns the CamelCase String.
 --
@ -23,8 +33,9 @@ import Prelude (String, Bool(..), concatMap, (.), zip, ($), repeat)
 -- "FoobarBazz"
 camelize
  :: [Word] -- ^ Input Words to separate with underscores
-  -> String -- ^ The camelized String
+  -> Text   -- ^ The camelized 'Text'
 camelize = camelizeCustom True
+{-# INLINE camelize #-}

 -- |Turns an input Word List into a CamelCase String.
 --
@ -33,16 +44,8 @@ camelize = camelizeCustom True
 camelizeCustom
  :: Bool   -- ^ Whether to capitalize the first character in the output String
  -> [Word] -- ^ The input Words
-  -> String -- ^ The camelized String
-camelizeCustom isFirstCap = concatMap (caseForWord isFirstCap) . isFirstList
-
-caseForWord :: Bool -> (Word, Bool) -> String
-caseForWord True (Word (c:cs), True)  = toUpper c : cs
-caseForWord False (Word (c:cs), True) = toLower c : cs
-caseForWord _ (Word (c:cs), _)        = toUpper c : cs
-caseForWord _ (Word [], _)            = []
-caseForWord _ (Acronym s, _)          = s
-
-- |Returns list with Bool indicating if an element is first.
-isFirstList :: [a] -> [(a, Bool)]
-isFirstList xs = zip xs $ True : repeat False
+  -> Text   -- ^ The camelized 'Text'
+camelizeCustom _ []     = ""
+camelizeCustom c (x:xs) = T.concat $
+  (mapWord (if c then T.toTitle else T.toLower) x) : (mapWord T.toTitle <$> xs)
+{-# INLINE camelizeCustom #-}
--- a/Text/Inflections/Dasherize.hs
+++ b/Text/Inflections/Dasherize.hs
@ -9,12 +9,14 @@
 --
 -- Conversion to dasherized phrases.

-{-# LANGUAGE CPP #-}
+{-# LANGUAGE CPP               #-}
+{-# LANGUAGE OverloadedStrings #-}

 module Text.Inflections.Dasherize ( dasherize ) where

-import Data.List (intercalate)
+import Data.Text (Text)
 import Text.Inflections.Parse.Types
+import qualified Data.Text as T

 #if MIN_VERSION_base(4,8,0)
 import Prelude hiding (Word)
@ -26,5 +28,6 @@ import Prelude hiding (Word)
 -- "foo-bar-bazz"
 dasherize
  :: [Word] -- ^ Input Words to separate with dashes
-  -> String -- ^ The dasherized String
-dasherize = intercalate "-" . fmap unWord
+  -> Text   -- ^ The dasherized String
+dasherize = T.intercalate "-" . fmap (mapWord T.toLower)
+{-# INLINE dasherize #-}
--- a/Text/Inflections/Data.hs
+++ b/Text/Inflections/Data.hs
@ -11,12 +11,12 @@

 module Text.Inflections.Data where

-import Data.Map (Map)
-import qualified Data.Map as M
+import Data.HashMap.Strict (HashMap)
+import qualified Data.HashMap.Strict as M

 -- |A 'Data.Map.Map' containing mappings from international characters to
 -- sequences approximating these characters within the ASCII range.
-type Transliterations = Map Char String
+type Transliterations = HashMap Char String

 -- |These default transliterations stolen from the Ruby i18n library - see
 -- <https://github.com/svenfuchs/i18n/blob/master/lib/i18n/backend/transliterator.rb#L41:L69>.
--- a/Text/Inflections/Humanize.hs
+++ b/Text/Inflections/Humanize.hs
@ -9,32 +9,34 @@
 --
 -- Conversion to “humanized” phrases.

-module Text.Inflections.Humanize (humanize) where
+{-# LANGUAGE CPP               #-}
+{-# LANGUAGE OverloadedStrings #-}

-import Text.Inflections.Parse.Types (Word(..))
+module Text.Inflections.Humanize
+  ( humanize )
+where

-import Data.Char (toUpper)
+import Data.Text (Text)
+import Text.Inflections.Parse.Types
+import qualified Data.Text as T

-import Prelude (String, Bool(..), (.), map, zip, ($), unwords, repeat)
+#if MIN_VERSION_base(4,8,0)
+import Prelude hiding (Word)
+#else
+import Control.Applicative
+#endif

-- |Capitalizes the first word and turns underscores into spaces. Like titleize,
-- this is meant for creating pretty output.
+-- |Capitalizes the first word and turns underscores into spaces. Like
+-- 'Text.Inflections.Titleize.titleize', this is meant for creating pretty
+-- output.
 --
 -- >>> humanize [ Word "foo", Acronym "bar", Word "bazz" ]
 -- "Foo bar bazz"
 humanize
  :: [Word] -- ^ List of Words, first of which will be capitalized
-  -> String -- ^ The humanized output
-humanize = unwords . map caseForWord . isFirstList
-
-- |Returns list with Bool indicating if an element is first.
-isFirstList :: [a] -> [(a, Bool)]
-isFirstList xs = zip xs $ True : repeat False
-
-- | Convert given 'Word' to capitalized 'String' when associated Boolean
-- value is 'True'.
-caseForWord :: (Word, Bool) -> String
-caseForWord (Word (c:cs), True)  = toUpper c : cs
-caseForWord (Word s, False)      = s
-caseForWord (Word [], _)         = []
-caseForWord (Acronym s, _)       = s  -- Acronyms are left intact
+  -> Text   -- ^ The humanized output
+humanize xs' =
+  case mapWord (T.replace "_" " ") <$> xs' of
+    []     -> ""
+    (x:xs) -> T.unwords $ T.toTitle x : (T.toLower <$> xs)
+{-# INLINE humanize #-}
--- a/Text/Inflections/Ordinal.hs
+++ b/Text/Inflections/Ordinal.hs
@ -9,9 +9,17 @@
 --
 -- Conversion to spelled ordinal numbers.

-module Text.Inflections.Ordinal (ordinal, ordinalize)
+{-# LANGUAGE OverloadedStrings #-}
+
+module Text.Inflections.Ordinal
+  ( ordinal
+  , ordinalize )
 where

+import Data.Monoid ((<>))
+import Data.Text (Text)
+import qualified Data.Text as T
+
 -- |Returns the suffix that should be added to a number to denote the position
 -- in an ordered sequence such as 1st, 2nd, 3rd, 4th.
 --
@ -21,7 +29,7 @@ where
 -- "nd"
 -- >>> ordinal 10
 -- "th"
-ordinal :: Integral a => a -> String
+ordinal :: Integral a => a -> Text
 ordinal number
        | remainder100 `elem` [11..13] = "th"
        | remainder10 == 1             = "st"
@ -41,5 +49,6 @@ ordinal number
 -- "2nd"
 -- >>> ordinalize 10
 -- "10th"
-ordinalize :: (Integral a, Show a) => a -> String
-ordinalize n = show n ++ ordinal n
+ordinalize :: (Integral a, Show a) => a -> Text
+ordinalize n = T.pack (show n) <> ordinal n
+{-# INLINE ordinalize #-}
--- a/Text/Inflections/Parameterize.hs
+++ b/Text/Inflections/Parameterize.hs
@ -9,124 +9,40 @@
 --
 -- Parametrization for strings, useful for transliteration.

-{-# LANGUAGE CPP #-}
+{-# LANGUAGE CPP               #-}
+{-# LANGUAGE OverloadedStrings #-}

 module Text.Inflections.Parameterize
  ( parameterize
  , parameterizeCustom )
 where

-import Control.Monad (guard)
-import Data.Char (isAscii, isAsciiLower, isAsciiUpper, isDigit, toLower)
-import Data.List (group)
-import Data.Maybe (mapMaybe)
+import Data.Char (isAscii, isAlphaNum, isPunctuation, toLower)
+import Data.Text (Text)
 import Text.Inflections.Data
-import Text.Megaparsec
-import Text.Megaparsec.String
-import qualified Data.Map as Map
+import qualified Data.HashMap.Strict as M
+import qualified Data.Text           as T

 #if !MIN_VERSION_base(4,8,0)
 import Control.Applicative
 #endif

-- | Parametrizable character.
-
-data PChar
-  = UCase      {-# UNPACK #-} !Char -- ^ Uppercase charater
-  | Acceptable String
-  -- ^ Since some of the transliterating approximations expand from one
-  -- Unicode to two ASCII chars (eg., œ to oe), we represent this as a
-  -- String.
-  | Separator                       -- ^ Separator
-  | Underscore                      -- ^ Underscore
-  | OtherAscii {-# UNPACK #-} !Char -- ^ Other ASCII charater
-  | NonAscii   {-# UNPACK #-} !Char -- ^ Non-ASCII charater
-  deriving (Eq, Show)
-
-- |Matches 'acceptable' characters for parameterization purposes.
-acceptableParser :: Parser PChar
-acceptableParser = do
-  c <- satisfy ((||) <$> isAsciiLower <*> isDigit)
-  return $ Acceptable [c]
-
-parser :: Parser [PChar]
-parser = many $ label "a parametrizable character" $ choice
-  [ acceptableParser
-  , UCase      <$> satisfy isAsciiUpper
-  , Separator  <$  char '-'
-  , Underscore <$  char '_'
-  , OtherAscii <$> asciiChar
-  , NonAscii   <$> satisfy (not . isAscii) ]
-
-- | Parser that accepts rows of parametrizable characters.
-- | Check if given char is “acceptable”, that is, it's lowercase ASCII
-- letter or digit.
-isValidParamChar :: Char -> Bool
-isValidParamChar c = isAsciiLower c || isDigit c
-
-- |Given a Transliteration table and a PChar, returns Maybe PChar indicating
-- how this character should appear in a URL.
-parameterizeChar :: Transliterations -> PChar -> Maybe PChar
-parameterizeChar _  (UCase c)      = Just $ Acceptable [toLower c]
-parameterizeChar _  (Acceptable c) = Just $ Acceptable c
-parameterizeChar _  Separator      = Just Separator
-parameterizeChar _  Underscore     = Just Underscore
-parameterizeChar _  (OtherAscii _) = Just Separator
-parameterizeChar ts (NonAscii c)   = transliteratePCharCustom ts c
-
-- |Look up character in transliteration list. Accepts a Transliteration map
-- which has Chars as keys and Strings as values for approximating common
-- international Unicode characters within the ASCII range.
-transliteratePCharCustom :: Transliterations -> Char -> Maybe PChar
-transliteratePCharCustom ts c = do
-  -- We may have expanded into multiple characters during
-  -- transliteration, so check validity of all characters in
-  -- result.
-  v <- Map.lookup c ts
-  guard (all isValidParamChar v)
-  return (Acceptable v)
-
-- |Turns PChar tokens into their String representation.
-pCharToC :: PChar -> String
-pCharToC (UCase c)        = [c]
-pCharToC (Acceptable str) = str
-pCharToC Separator        = "-"
-pCharToC Underscore       = "_"
-pCharToC (OtherAscii c)   = [c]
-pCharToC (NonAscii c)     = [c]
-
-- |Reduce sequences of separators down to only one separator.
-squeezeSeparators :: [PChar] -> [PChar]
-squeezeSeparators ps = concatMap squashSeparatorGroup $ group ps
-    where squashSeparatorGroup g = case head g of
-                                     Separator -> [Separator] -- only take head
-                                     _         -> g           -- don't change
-
 -- |Replaces special characters in a string so that it may be used as part of a
-- 'pretty' URL. Uses the default transliterations in this library
-parameterize :: String -> String
+-- 'pretty' URL. Uses the default transliterations in this library.
+parameterize :: Text -> Text
 parameterize = parameterizeCustom defaultMap
+{-# INLINE parameterize #-}

-- |Transliterate a String with a custom transliteration table.
-parameterizeCustom :: Transliterations -> String -> String
-parameterizeCustom ts s =
-    case parsed of
-      Right ast -> (concatMap pCharToC . squeezeSeparators .
-                    trimUnwanted wanted . mapMaybe (parameterizeChar ts))
-                   ast
-
-      -- Note that this should never fail, since we accommodate all Unicode
-      -- characters as valid input.
-      Left err -> fail $ "Parse failed, please report a bug! Error: " ++
-                         show err
-
-    where parsed = parse parser  "" s
-          wanted :: [PChar] -- All valid URL chars - we shouldn't trim these.
-          wanted = Underscore :
-                   map (Acceptable . (: [])) (['a'..'z'] ++ ['0'..'9'])
-
-- |Trim non-wanted elements from the beginning and end of list.
-trimUnwanted :: Eq a => [a] -> [a] -> [a]
-trimUnwanted wanted = dropWhile notWanted . reverse . dropWhile notWanted
-                      . reverse
-  where notWanted = (`notElem` wanted)
+-- |Transliterate 'Text' with a custom transliteration table.
+parameterizeCustom :: Transliterations -> Text -> Text
+parameterizeCustom m txt = (T.intercalate "-" . T.words) (T.unfoldr f ("", txt))
+  where
+    f ("", t) = uncurry g <$> T.uncons t
+    f ((x:xs), t) = Just (x, (xs, t))
+    g x xs
+      | (isAscii x && isAlphaNum x) || x == '_' = (toLower x, ("", xs))
+      | isPunctuation x = (' ', ("", xs))
+      | otherwise =
+          case toLower <$> M.lookupDefault " " x m of
+            ""     -> (' ', ("",xs))
+            (y:ys) -> (y,   (ys,xs))
--- a/Text/Inflections/Parse/Acronym.hs
+++ b/Text/Inflections/Parse/Acronym.hs
@ -11,17 +11,22 @@

 {-# LANGUAGE CPP #-}

-module Text.Inflections.Parse.Acronym ( acronym ) where
+module Text.Inflections.Parse.Acronym
+  ( acronym )
+where

+import Data.Text (Text)
 import Text.Inflections.Parse.Types
 import Text.Megaparsec
-import Text.Megaparsec.String
+import Text.Megaparsec.Text
+import qualified Data.Text as T

 #if MIN_VERSION_base(4,8,0)
 import Prelude hiding (Word)
 #endif

-- | Parser that accepts a string from given collection and turns it into
+-- | Parser that accepts a 'Text' from given collection and turns it into
 -- an 'Acronym'.
-acronym :: [String] -> Parser Word
-acronym = fmap Acronym . choice . fmap string
+acronym :: [Text] -> Parser Word
+acronym = fmap (Acronym . T.pack) . choice . fmap (string . T.unpack)
+{-# INLINE acronym #-}
--- a/Text/Inflections/Parse/CamelCase.hs
+++ b/Text/Inflections/Parse/CamelCase.hs
@ -9,15 +9,19 @@
 --
 -- Parser for camel case “symbols”.

-{-# LANGUAGE CPP #-}
+{-# LANGUAGE CPP               #-}
+{-# LANGUAGE OverloadedStrings #-}

-module Text.Inflections.Parse.CamelCase ( parseCamelCase )
+module Text.Inflections.Parse.CamelCase
+  ( parseCamelCase )
 where

+import Data.Text (Text)
 import Text.Inflections.Parse.Acronym (acronym)
 import Text.Inflections.Parse.Types (Word(..))
 import Text.Megaparsec
-import Text.Megaparsec.String
+import Text.Megaparsec.Text
+import qualified Data.Text as T

 #if MIN_VERSION_base(4,8,0)
 import Prelude hiding (Word)
@ -25,7 +29,7 @@ import Prelude hiding (Word)
 import Control.Applicative
 #endif

-- |Parses a CamelCase string.
+-- |Parse a CamelCase string.
 --
 -- >>> parseCamelCase ["Bar"] "FooBarBazz"
 -- Right [Word "Foo",Acronym "Bar",Word "Bazz"]
@ -33,18 +37,20 @@ import Control.Applicative
 -- Left "(unknown)" (line 1, column 4):
 -- unexpected '_'
 parseCamelCase
-  :: [String]          -- ^ Collection of acronyms
-  -> String            -- ^ Input
+  :: [Text]          -- ^ Collection of acronyms
+  -> Text            -- ^ Input
  -> Either (ParseError Char Dec) [Word] -- ^ Result of parsing
 parseCamelCase acronyms = parse (parser acronyms) ""

 parser
-  :: [String]          -- ^ Collection of acronyms
+  :: [Text]            -- ^ Collection of acronyms
  -> Parser [Word]     -- ^ CamelCase parser
 parser acronyms = many (acronym acronyms <|> word) <* eof
+{-# INLINE parser #-}

 word :: Parser Word
 word = do
  firstChar <- upperChar <|> lowerChar
  restChars <- many lowerChar
-  return . Word $ firstChar : restChars
+  return . Word . T.pack $ firstChar : restChars
+{-# INLINE word #-}
--- a/Text/Inflections/Parse/SnakeCase.hs
+++ b/Text/Inflections/Parse/SnakeCase.hs
@ -11,13 +11,16 @@

 {-# LANGUAGE CPP #-}

-module Text.Inflections.Parse.SnakeCase ( parseSnakeCase )
+module Text.Inflections.Parse.SnakeCase
+  ( parseSnakeCase )
 where

+import Data.Text (Text)
 import Text.Inflections.Parse.Acronym (acronym)
 import Text.Inflections.Parse.Types (Word(..))
 import Text.Megaparsec
-import Text.Megaparsec.String
+import Text.Megaparsec.Text
+import qualified Data.Text as T

 #if MIN_VERSION_base(4,8,0)
 import Prelude hiding (Word)
@ -33,17 +36,20 @@ import Control.Applicative
 -- Left "(unknown)" (line 1, column 4):
 -- unexpected 'B'
 parseSnakeCase
-  :: [String]          -- ^ Collection of acronyms
-  -> String            -- ^ Input
+  :: [Text]            -- ^ Collection of acronyms
+  -> Text              -- ^ Input
  -> Either (ParseError Char Dec) [Word] -- ^ Result of parsing
 parseSnakeCase acronyms = parse (parser acronyms) ""

 parser
-  :: [String] -> Parser [Word]
+  :: [Text]
+  -> Parser [Word]
 parser acronyms = do
  ws <- (acronym acronyms <|> word) `sepBy` char '_'
  eof
  return ws
+{-# INLINE parser #-}

 word :: Parser Word
-word = Word <$> (some lowerChar <|> some digitChar)
+word = Word . T.pack <$> (some lowerChar <|> some digitChar)
+{-# INLINE word #-}
--- a/Text/Inflections/Parse/Types.hs
+++ b/Text/Inflections/Parse/Types.hs
@ -9,32 +9,46 @@
 --
 -- Types used in the library.

-{-# LANGUAGE CPP #-}
+{-# LANGUAGE CPP                #-}
+{-# LANGUAGE DeriveDataTypeable #-}

 module Text.Inflections.Parse.Types
  ( Word (..) -- FIXME we should not export the constructor
  , unWord
-  , mapWord )
+  , mapWord
+  , InflectionException (..) )
 where

+import Control.Monad.Catch (Exception)
+import Data.Text (Text)
+import Data.Typeable (Typeable)
+import Text.Megaparsec
+
 #if MIN_VERSION_base(4,8,0)
 import Prelude hiding (Word)
 #endif

 -- | A 'String' that should be kept whole through applied inflections
 data Word
-  = Word String -- ^ A word that may be transformed by inflection
-  | Acronym String -- ^ A word that may not be transformed by inflections
+  = Word    Text -- ^ A word that may be transformed by inflection
+  | Acronym Text -- ^ A word that may not be transformed by inflections
  deriving (Show, Eq)

-- | Get a 'String' from 'Word'.
-unWord :: Word -> String
+-- | Get a 'Text' value from 'Word'.
+unWord :: Word -> Text
 unWord (Word    s) = s
 unWord (Acronym s) = s
 {-# INLINE unWord #-}

-- | Apply 'String' transforming function to a 'Word'.
-mapWord :: (String -> String) -> Word -> Word
-mapWord f (Word    s) = Word (f s)
-mapWord f (Acronym s) = Acronym (f s)
+-- | Apply 'Text' transforming function to a 'Word' unless it's a 'Acronym'.
+mapWord :: (Text -> Text) -> Word -> Text
+mapWord f (Word    s) = f s
+mapWord _ (Acronym s) = s
 {-# INLINE mapWord #-}
+
+-- | The exceptions that is thrown when parsing of input fails.
+
+data InflectionException = InflectionParsingFailed (ParseError Char Dec)
+  deriving (Eq, Show, Typeable)
+
+instance Exception InflectionException
--- a/Text/Inflections/Titleize.hs
+++ b/Text/Inflections/Titleize.hs
@ -9,25 +9,26 @@
 --
 -- Conversion to titleized phrases.

-module Text.Inflections.Titleize (titleize) where
+{-# LANGUAGE CPP #-}

-import Text.Inflections.Parse.Types (Word(..))
+module Text.Inflections.Titleize
+  ( titleize )
+where

-import Data.Char (toUpper)
+import Data.Text (Text)
+import Text.Inflections.Parse.Types
+import qualified Data.Text as T

-import Prelude (String, unwords, map, ($))
+#if MIN_VERSION_base(4,8,0)
+import Prelude hiding (Word)
+#endif

-- | Capitalizes all the Words in the input list.
+-- | Capitalize all the Words in the input list.
 --
 -- >>> titleize [ Word "foo", Acronym "bar", Word "bazz" ]
 -- "Foo bar Bazz"
 titleize
  :: [Word] -- ^ List of Words, first of which will be capitalized
-  -> String -- ^ The titleized String
-titleize s = unwords $ map upperCaseWord s
-
-- | Transform 'Word' into an upper-cased 'String'.
-upperCaseWord :: Word -> String
-upperCaseWord (Word (c:cs))  = toUpper c : cs
-upperCaseWord (Word [])      = []
-upperCaseWord (Acronym s)    = s  -- Acronyms are left intact
+  -> Text   -- ^ The titleized String
+titleize = T.unwords . fmap (mapWord T.toTitle)
+{-# INLINE titleize #-}
--- a/Text/Inflections/Transliterate.hs
+++ b/Text/Inflections/Transliterate.hs
@ -9,30 +9,47 @@
 --
 -- Support for transliteration.

+{-# LANGUAGE CPP               #-}
+{-# LANGUAGE OverloadedStrings #-}
+
 module Text.Inflections.Transliterate
    ( transliterate
-    , transliterateCustom
-    )
+    , transliterateCustom )
 where

 import Data.Char (isAscii)
-import Data.Maybe(fromMaybe)
+import Data.Text (Text)
 import Text.Inflections.Data
-import qualified Data.Map as Map
+import qualified Data.HashMap.Strict as M
+import qualified Data.Text           as T

-- |Returns a String after default approximations for changing Unicode characters
-- to a valid ASCII range are applied. If you want to supplement the default
-- approximations with your own, you should use the transliterateCustom
-- function instead of transliterate.
-transliterate :: String -> String
+#if !MIN_VERSION_base(4,8,0)
+import Control.Applicative
+#endif
+
+-- |Returns a String after default approximations for changing Unicode
+-- characters to a valid ASCII range are applied. If you want to supplement
+-- the default approximations with your own, you should use the
+-- 'transliterateCustom' function instead of 'transliterate'.
+transliterate :: Text -> Text
 transliterate = transliterateCustom "?" defaultMap
+{-# INLINE transliterate #-}

-- |Returns a String after default approximations for changing Unicode characters
-- to a valid ASCII range are applied.
-transliterateCustom :: String -> Transliterations -> String -> String
-transliterateCustom replacement ts = concatMap lookupCharTransliteration
-  where lookupCharTransliteration c =
-          if isAscii c then -- Don't bother looking up Chars in ASCII range
-            [c]
-          else
-            fromMaybe replacement (Map.lookup c ts)
+-- |Returns a String after default approximations for changing Unicode
+-- characters to a valid ASCII range are applied.
+transliterateCustom
+  :: String            -- ^ The default replacement
+  -> Transliterations  -- ^ The table of transliterations
+  -> Text              -- ^ The input
+  -> Text              -- ^ The output
+transliterateCustom replacement m txt = T.unfoldr f ("", txt)
+  where
+    f ("", t) = uncurry g <$> T.uncons t
+    f ((x:xs), t) = Just (x, (xs, t))
+    g x xs =
+      if isAscii x
+        then (x, ("", xs))
+        else
+          case M.lookupDefault replacement x m of
+            ""     -> ('?', ("",xs))
+            (y:ys) -> (y,   (ys,xs))
--- a/Text/Inflections/Underscore.hs
+++ b/Text/Inflections/Underscore.hs
@ -9,25 +9,27 @@
 --
 -- Conversion to phrases separated by underscores.

-module Text.Inflections.Underscore ( underscore ) where
+{-# LANGUAGE CPP               #-}
+{-# LANGUAGE OverloadedStrings #-}

-import Text.Inflections.Parse.Types (Word(..))
+module Text.Inflections.Underscore
+  ( underscore )
+where

-import Data.Char (toLower)
-import Data.List (intercalate)
+import Data.Text (Text)
+import Text.Inflections.Parse.Types
+import qualified Data.Text as T

-import Prelude (String, (.), map)
+#if MIN_VERSION_base(4,8,0)
+import Prelude hiding (Word)
+#endif

-- |Turns a CamelCase string into an underscore_separated String.
+-- |Turns a CamelCase string into an underscore_separated 'Text'.
 --
 -- >>> underscore [ Word "foo", Acronym "bar", Word "bazz" ]
 -- "foo_bar_bazz"
 underscore
  :: [Word] -- ^ Input Words to separate with underscores
-  -> String -- ^ The underscored String
-underscore = intercalate "_" . map toDowncasedString
-
-- | Transform 'Word' into a down-cased 'String'.
-toDowncasedString :: Word -> String
-toDowncasedString (Acronym s) = map toLower s
-toDowncasedString (Word s)    = map toLower s
+  -> Text   -- ^ The underscored String
+underscore = T.intercalate "_" . fmap (mapWord T.toLower)
+{-# INLINE underscore #-}
--- a/inflections.cabal
+++ b/inflections.cabal
@ -49,8 +49,10 @@ library
  else
    ghc-options:      -O2 -Wall
  build-depends:       base         >= 4.6   && < 4.10
-                     , containers   >= 0.5   && < 0.6
+                     , exceptions   >= 0.6   && < 0.9
                     , megaparsec   >= 5.0   && < 6.0
+                     , text         >= 0.2   && < 1.3
+                     , unordered-containers >= 0.2.7 && < 0.3
  default-language:    Haskell2010

 test-suite test
@ -58,11 +60,11 @@ test-suite test
  hs-source-dirs: test
  main-is: Spec.hs
  build-depends:       inflections
-                     , base         >= 4.2   && < 4.10
                     , QuickCheck   >= 2.7.6 && < 3.0
-                     , containers   >= 0.5   && < 0.6
+                     , base         >= 4.2   && < 4.10
                     , hspec        >= 2.0   && < 3.0
                     , megaparsec   >= 5.0   && < 6.0
+                     , text         >= 0.2   && < 1.3
  if flag(dev)
    ghc-options:      -Wall -Werror
  else
--- a/test/Text/Inflections/HumanizeSpec.hs
+++ b/test/Text/Inflections/HumanizeSpec.hs
@ -1,3 +1,5 @@
+{-# LANGUAGE OverloadedStrings #-}
+
 module Text.Inflections.HumanizeSpec (spec) where

 import Test.Hspec
--- a/test/Text/Inflections/OrdinalSpec.hs
+++ b/test/Text/Inflections/OrdinalSpec.hs
@ -1,9 +1,12 @@
-{-# LANGUAGE CPP #-}
+{-# LANGUAGE CPP               #-}
+{-# LANGUAGE OverloadedStrings #-}

 module Text.Inflections.OrdinalSpec (spec) where

+import Data.Text (Text)
 import Test.Hspec
 import Test.QuickCheck.Property
+import qualified Data.Text as T

 import Text.Inflections (ordinal, ordinalize)

@ -57,11 +60,12 @@ fullOrdinals = do
 ordinalReturnsNotEmpty :: Spec
 ordinalReturnsNotEmpty =
  it "never returns empty" $ property $
-    property <$> not . null . (ordinal :: Integer -> String)
+    property <$> not . T.null . (ordinal :: Integer -> Text)

 ordinalizeContainsTheSameNumber :: Spec
 ordinalizeContainsTheSameNumber =
  it "always returns the number as part of the result" $ property ordinalizeSamePrefix

 ordinalizeSamePrefix :: Integer -> Bool
-ordinalizeSamePrefix n = show n == take (length $ show n) (ordinalize n)
+ordinalizeSamePrefix n = T.pack s == T.take (length s) (ordinalize n)
+  where s = show n
--- a/test/Text/Inflections/PropertiesSpec.hs
+++ b/test/Text/Inflections/PropertiesSpec.hs
@ -1,9 +1,12 @@
+{-# LANGUAGE OverloadedStrings #-}
+
 module Text.Inflections.PropertiesSpec (spec) where

 import Data.Char (toLower)
 import Data.List (group)
 import Test.Hspec
 import Test.QuickCheck
+import qualified Data.Text as T

 import Text.Inflections
 import Text.Inflections.Parse.Types (Word(..))
@ -46,44 +49,45 @@ dasherizeSpacedSentence =
 onlyValidCharacters :: Spec
 onlyValidCharacters =
  it "returns only valid characters" (property onlyValidCharactersPredicate)
-    where onlyValidCharactersPredicate sf = all (`elem` (alphaNumerics ++ "-_")) $ parameterize sf
+    where onlyValidCharactersPredicate sf
+            = T.all (`elem` (alphaNumerics ++ "-_")) $ parameterize (T.pack sf)

 notBeginWithSeparator :: Spec
 notBeginWithSeparator =
  it "never returns a string beginning ending with a separator" (property notBeginWithSeparatorPredicate)
      where
        notBeginWithSeparatorPredicate s =
-            let parameterized = parameterize s in
-            (not . null) parameterized ==> head parameterized /= '-'
+            let parameterized = parameterize (T.pack s) in
+            (not . T.null) parameterized ==> T.head parameterized /= '-'

 notEndWithSeparator :: Spec
 notEndWithSeparator =
  it "never returns a string beginning with a separator" (property notBeginWithSeparatorPredicate)
     where
       notBeginWithSeparatorPredicate s =
-            let parameterized = parameterize s in
-            (not . null) parameterized ==> last parameterized /= '-'
+            let parameterized = parameterize (T.pack s) in
+            (not . T.null) parameterized ==> T.last parameterized /= '-'

 noMissingAlphanumerics :: Spec
 noMissingAlphanumerics =
  it "returns every alphanumeric character from the input" (property noMissingAlphanumericsPredicate)
  where noMissingAlphanumericsPredicate s =
-            let parameterized = parameterize s in
+            let parameterized = parameterize (T.pack s) in
            all (\c -> c `notElem` alphaNumerics ||
                c `elem` (alphaNumerics ++ "-") &&
-                c `elem` parameterized) $ map toLower s
+                c `elem` T.unpack parameterized) $ map toLower s

 noMoreThanOneHyphen :: Spec
 noMoreThanOneHyphen =
  it "never returns a string with a sequence of dashes" (property noMoreThanOneHyphenPredicate)
  where noMoreThanOneHyphenPredicate s =
-            let parameterized = parameterize s in longestSequenceOf '-' parameterized <= 1
+            let parameterized = parameterize (T.pack s)
+            in longestSequenceOf '-' (T.unpack parameterized) <= 1

 longestSequenceOf :: Char -> String -> Int
 longestSequenceOf _ [] = 0
 longestSequenceOf c s =
    if null subseqLengths then 0 else maximum subseqLengths
-
  where subseqLengths = (map length . filter (\str -> head str == c) . group) s

 alphaNumerics :: String
--- a/test/Text/Inflections/TitleizeSpec.hs
+++ b/test/Text/Inflections/TitleizeSpec.hs
@ -1,3 +1,5 @@
+{-# LANGUAGE OverloadedStrings #-}
+
 module Text.Inflections.TitleizeSpec (spec) where

 import Test.Hspec
--- a/test/Text/Inflections/UnderscoreSpec.hs
+++ b/test/Text/Inflections/UnderscoreSpec.hs
@ -1,3 +1,5 @@
+{-# LANGUAGE OverloadedStrings #-}
+
 module Text.Inflections.UnderscoreSpec (spec) where

 import Test.Hspec
--- a/test/Text/InflectionsSpec.hs
+++ b/test/Text/InflectionsSpec.hs
@ -1,3 +1,5 @@
+{-# LANGUAGE OverloadedStrings #-}
+
 module Text.InflectionsSpec (spec) where

 import Test.Hspec
@ -12,14 +14,14 @@ spec = do
 camelCaseToSnakeCase :: Spec
 camelCaseToSnakeCase =
  it "converts camel case snake case" $
-    toUnderscore "camelCasedText" `shouldBe` "camel_cased_text"
+    toUnderscore "camelCasedText" `shouldReturn` "camel_cased_text"

 camelCaseToDashed :: Spec
 camelCaseToDashed =
  it "converts camel case to dashed" $
-    toDashed "camelCasedText" `shouldBe` "camel-cased-text"
+    toDashed "camelCasedText" `shouldReturn` "camel-cased-text"

 snakeCaseToCamelCase :: Spec
 snakeCaseToCamelCase =
  it "converts snake case to camel case" $
-     toCamelCased False "underscored_text" `shouldBe` "underscoredText"
+     toCamelCased False "underscored_text" `shouldReturn` "underscoredText"