mirror of
https://github.com/typeable/inflections-hs.git
synced 2024-08-15 17:10:21 +03:00
Merge pull request #38 from stackbuilders/refactoring-use-text
Major refactoring, switch to ‘Text’
This commit is contained in:
commit
03234409a0
@ -6,6 +6,12 @@
|
||||
|
||||
* Switched to Megaparsec 5 for parsing.
|
||||
|
||||
* Made the API use `Text` instead of `String` (which significally improved
|
||||
speed).
|
||||
|
||||
* The `toUnderscore`, `toDashed`, and `toCamelCased` are not partial
|
||||
anymore, now they operate in `MonadThrow`.
|
||||
|
||||
* Improved documentation.
|
||||
|
||||
## Inflections 0.2.0.1
|
||||
|
@ -103,66 +103,57 @@ module Text.Inflections
|
||||
)
|
||||
where
|
||||
|
||||
import Text.Inflections.Data ( Transliterations, defaultMap )
|
||||
import Control.Monad (liftM)
|
||||
import Control.Monad.Catch (MonadThrow (..))
|
||||
import Data.Text (Text)
|
||||
import Text.Inflections.Camelize (camelize, camelizeCustom)
|
||||
import Text.Inflections.Dasherize (dasherize)
|
||||
import Text.Inflections.Data (Transliterations, defaultMap)
|
||||
import Text.Inflections.Humanize (humanize)
|
||||
import Text.Inflections.Ordinal (ordinal, ordinalize)
|
||||
import Text.Inflections.Parameterize (parameterize, parameterizeCustom)
|
||||
import Text.Inflections.Parse.CamelCase (parseCamelCase)
|
||||
import Text.Inflections.Parse.SnakeCase (parseSnakeCase)
|
||||
import Text.Inflections.Parse.Types
|
||||
import Text.Inflections.Titleize (titleize)
|
||||
import Text.Inflections.Transliterate (transliterate, transliterateCustom)
|
||||
import Text.Inflections.Underscore (underscore)
|
||||
import Text.Megaparsec
|
||||
|
||||
import Text.Inflections.Parameterize ( parameterize, parameterizeCustom )
|
||||
|
||||
import Text.Inflections.Underscore ( underscore )
|
||||
|
||||
import Text.Inflections.Camelize ( camelize, camelizeCustom )
|
||||
|
||||
import Text.Inflections.Humanize ( humanize )
|
||||
|
||||
import Text.Inflections.Titleize ( titleize )
|
||||
|
||||
import Text.Inflections.Transliterate ( transliterate, transliterateCustom )
|
||||
|
||||
import Text.Inflections.Dasherize ( dasherize )
|
||||
|
||||
import Text.Inflections.Ordinal ( ordinal, ordinalize )
|
||||
|
||||
import Text.Inflections.Parse.SnakeCase ( parseSnakeCase )
|
||||
|
||||
import Text.Inflections.Parse.Types ( mapWord )
|
||||
|
||||
import Text.Inflections.Parse.CamelCase ( parseCamelCase )
|
||||
|
||||
import Data.Char ( toLower )
|
||||
|
||||
-- | Transforms CamelCasedString to
|
||||
-- snake_cased_string_with_underscores. Throws exception if parsing failed
|
||||
-- | Transforms CamelCasedString to snake_cased_string_with_underscores. In
|
||||
-- case of failed parsing 'InflectionException' is thrown.
|
||||
--
|
||||
-- >>> toUnderscore "FooBarBazz"
|
||||
-- "foo_bar_bazz"
|
||||
toUnderscore :: String -> String
|
||||
toUnderscore =
|
||||
underscore
|
||||
. either (error . ("toUnderscore: " ++) . show) id
|
||||
. parseCamelCase []
|
||||
toUnderscore :: MonadThrow m => Text -> m Text
|
||||
toUnderscore = liftM underscore . handleEither . parseCamelCase []
|
||||
{-# INLINE toUnderscore #-}
|
||||
|
||||
-- | Transforms CamelCasedString to snake-cased-string-with-dashes. Throws
|
||||
-- exception if parsing failed.
|
||||
-- | Transforms CamelCasedString to snake-cased-string-with-dashes. In case
|
||||
-- of failed parsing 'InflectionException' is thrown.
|
||||
--
|
||||
-- >>> toDashed "FooBarBazz"
|
||||
-- "foo-bar-bazz"
|
||||
toDashed :: String -> String
|
||||
toDashed =
|
||||
dasherize
|
||||
. map (mapWord (map toLower))
|
||||
. either (error . ("toDashed: " ++) . show) id
|
||||
. parseCamelCase []
|
||||
toDashed :: MonadThrow m => Text -> m Text
|
||||
toDashed = liftM dasherize . handleEither . parseCamelCase []
|
||||
{-# INLINE toDashed #-}
|
||||
|
||||
-- | Transforms underscored_text to CamelCasedText. If first argument is
|
||||
-- 'True' then FirstCharacter in result string will be in upper case. If
|
||||
-- 'False' then firstCharacter will be in lower case. Throws exception if
|
||||
-- parsing failed
|
||||
-- 'False' then firstCharacter will be in lower case. In case of failed
|
||||
-- parsing 'InflectionException' is thrown.
|
||||
--
|
||||
-- >>> toCamelCased True "foo_bar_bazz"
|
||||
-- "FooBarBazz"
|
||||
-- >>> toCamelCased False "foo_bar_bazz"
|
||||
-- "fooBarBazz"
|
||||
toCamelCased :: Bool -> String -> String
|
||||
toCamelCased t =
|
||||
camelizeCustom t
|
||||
. either (error . ("toCamelCased: " ++) . show) id
|
||||
. parseSnakeCase []
|
||||
toCamelCased :: MonadThrow m => Bool -> Text -> m Text
|
||||
toCamelCased t = liftM (camelizeCustom t) . handleEither . parseSnakeCase []
|
||||
{-# INLINE toCamelCased #-}
|
||||
|
||||
-- | Take an 'Either' that can contain a parser error and throw it if
|
||||
-- necessary. If everything is OK, just return 'Right' value.
|
||||
handleEither :: MonadThrow m => Either (ParseError Char Dec) a -> m a
|
||||
handleEither (Left err) = throwM (InflectionParsingFailed err)
|
||||
handleEither (Right x) = return x
|
||||
{-# INLINE handleEither #-}
|
||||
|
@ -9,13 +9,23 @@
|
||||
--
|
||||
-- Conversion to CamelCased phrases.
|
||||
|
||||
module Text.Inflections.Camelize ( camelize, camelizeCustom ) where
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
import Text.Inflections.Parse.Types (Word(..))
|
||||
module Text.Inflections.Camelize
|
||||
( camelize
|
||||
, camelizeCustom )
|
||||
where
|
||||
|
||||
import Data.Char (toUpper, toLower)
|
||||
import Data.Text (Text)
|
||||
import Text.Inflections.Parse.Types
|
||||
import qualified Data.Text as T
|
||||
|
||||
import Prelude (String, Bool(..), concatMap, (.), zip, ($), repeat)
|
||||
#if MIN_VERSION_base(4,8,0)
|
||||
import Prelude hiding (Word)
|
||||
#else
|
||||
import Control.Applicative
|
||||
#endif
|
||||
|
||||
-- |Turns a an input Word List in into CamelCase. Returns the CamelCase String.
|
||||
--
|
||||
@ -23,8 +33,9 @@ import Prelude (String, Bool(..), concatMap, (.), zip, ($), repeat)
|
||||
-- "FoobarBazz"
|
||||
camelize
|
||||
:: [Word] -- ^ Input Words to separate with underscores
|
||||
-> String -- ^ The camelized String
|
||||
-> Text -- ^ The camelized 'Text'
|
||||
camelize = camelizeCustom True
|
||||
{-# INLINE camelize #-}
|
||||
|
||||
-- |Turns an input Word List into a CamelCase String.
|
||||
--
|
||||
@ -33,16 +44,8 @@ camelize = camelizeCustom True
|
||||
camelizeCustom
|
||||
:: Bool -- ^ Whether to capitalize the first character in the output String
|
||||
-> [Word] -- ^ The input Words
|
||||
-> String -- ^ The camelized String
|
||||
camelizeCustom isFirstCap = concatMap (caseForWord isFirstCap) . isFirstList
|
||||
|
||||
caseForWord :: Bool -> (Word, Bool) -> String
|
||||
caseForWord True (Word (c:cs), True) = toUpper c : cs
|
||||
caseForWord False (Word (c:cs), True) = toLower c : cs
|
||||
caseForWord _ (Word (c:cs), _) = toUpper c : cs
|
||||
caseForWord _ (Word [], _) = []
|
||||
caseForWord _ (Acronym s, _) = s
|
||||
|
||||
-- |Returns list with Bool indicating if an element is first.
|
||||
isFirstList :: [a] -> [(a, Bool)]
|
||||
isFirstList xs = zip xs $ True : repeat False
|
||||
-> Text -- ^ The camelized 'Text'
|
||||
camelizeCustom _ [] = ""
|
||||
camelizeCustom c (x:xs) = T.concat $
|
||||
(mapWord (if c then T.toTitle else T.toLower) x) : (mapWord T.toTitle <$> xs)
|
||||
{-# INLINE camelizeCustom #-}
|
||||
|
@ -9,12 +9,14 @@
|
||||
--
|
||||
-- Conversion to dasherized phrases.
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Text.Inflections.Dasherize ( dasherize ) where
|
||||
|
||||
import Data.List (intercalate)
|
||||
import Data.Text (Text)
|
||||
import Text.Inflections.Parse.Types
|
||||
import qualified Data.Text as T
|
||||
|
||||
#if MIN_VERSION_base(4,8,0)
|
||||
import Prelude hiding (Word)
|
||||
@ -26,5 +28,6 @@ import Prelude hiding (Word)
|
||||
-- "foo-bar-bazz"
|
||||
dasherize
|
||||
:: [Word] -- ^ Input Words to separate with dashes
|
||||
-> String -- ^ The dasherized String
|
||||
dasherize = intercalate "-" . fmap unWord
|
||||
-> Text -- ^ The dasherized String
|
||||
dasherize = T.intercalate "-" . fmap (mapWord T.toLower)
|
||||
{-# INLINE dasherize #-}
|
||||
|
@ -11,12 +11,12 @@
|
||||
|
||||
module Text.Inflections.Data where
|
||||
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as M
|
||||
import Data.HashMap.Strict (HashMap)
|
||||
import qualified Data.HashMap.Strict as M
|
||||
|
||||
-- |A 'Data.Map.Map' containing mappings from international characters to
|
||||
-- sequences approximating these characters within the ASCII range.
|
||||
type Transliterations = Map Char String
|
||||
type Transliterations = HashMap Char String
|
||||
|
||||
-- |These default transliterations stolen from the Ruby i18n library - see
|
||||
-- <https://github.com/svenfuchs/i18n/blob/master/lib/i18n/backend/transliterator.rb#L41:L69>.
|
||||
|
@ -9,32 +9,34 @@
|
||||
--
|
||||
-- Conversion to “humanized” phrases.
|
||||
|
||||
module Text.Inflections.Humanize (humanize) where
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
import Text.Inflections.Parse.Types (Word(..))
|
||||
module Text.Inflections.Humanize
|
||||
( humanize )
|
||||
where
|
||||
|
||||
import Data.Char (toUpper)
|
||||
import Data.Text (Text)
|
||||
import Text.Inflections.Parse.Types
|
||||
import qualified Data.Text as T
|
||||
|
||||
import Prelude (String, Bool(..), (.), map, zip, ($), unwords, repeat)
|
||||
#if MIN_VERSION_base(4,8,0)
|
||||
import Prelude hiding (Word)
|
||||
#else
|
||||
import Control.Applicative
|
||||
#endif
|
||||
|
||||
-- |Capitalizes the first word and turns underscores into spaces. Like titleize,
|
||||
-- this is meant for creating pretty output.
|
||||
-- |Capitalizes the first word and turns underscores into spaces. Like
|
||||
-- 'Text.Inflections.Titleize.titleize', this is meant for creating pretty
|
||||
-- output.
|
||||
--
|
||||
-- >>> humanize [ Word "foo", Acronym "bar", Word "bazz" ]
|
||||
-- "Foo bar bazz"
|
||||
humanize
|
||||
:: [Word] -- ^ List of Words, first of which will be capitalized
|
||||
-> String -- ^ The humanized output
|
||||
humanize = unwords . map caseForWord . isFirstList
|
||||
|
||||
-- |Returns list with Bool indicating if an element is first.
|
||||
isFirstList :: [a] -> [(a, Bool)]
|
||||
isFirstList xs = zip xs $ True : repeat False
|
||||
|
||||
-- | Convert given 'Word' to capitalized 'String' when associated Boolean
|
||||
-- value is 'True'.
|
||||
caseForWord :: (Word, Bool) -> String
|
||||
caseForWord (Word (c:cs), True) = toUpper c : cs
|
||||
caseForWord (Word s, False) = s
|
||||
caseForWord (Word [], _) = []
|
||||
caseForWord (Acronym s, _) = s -- Acronyms are left intact
|
||||
-> Text -- ^ The humanized output
|
||||
humanize xs' =
|
||||
case mapWord (T.replace "_" " ") <$> xs' of
|
||||
[] -> ""
|
||||
(x:xs) -> T.unwords $ T.toTitle x : (T.toLower <$> xs)
|
||||
{-# INLINE humanize #-}
|
||||
|
@ -9,9 +9,17 @@
|
||||
--
|
||||
-- Conversion to spelled ordinal numbers.
|
||||
|
||||
module Text.Inflections.Ordinal (ordinal, ordinalize)
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Text.Inflections.Ordinal
|
||||
( ordinal
|
||||
, ordinalize )
|
||||
where
|
||||
|
||||
import Data.Monoid ((<>))
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
|
||||
-- |Returns the suffix that should be added to a number to denote the position
|
||||
-- in an ordered sequence such as 1st, 2nd, 3rd, 4th.
|
||||
--
|
||||
@ -21,7 +29,7 @@ where
|
||||
-- "nd"
|
||||
-- >>> ordinal 10
|
||||
-- "th"
|
||||
ordinal :: Integral a => a -> String
|
||||
ordinal :: Integral a => a -> Text
|
||||
ordinal number
|
||||
| remainder100 `elem` [11..13] = "th"
|
||||
| remainder10 == 1 = "st"
|
||||
@ -41,5 +49,6 @@ ordinal number
|
||||
-- "2nd"
|
||||
-- >>> ordinalize 10
|
||||
-- "10th"
|
||||
ordinalize :: (Integral a, Show a) => a -> String
|
||||
ordinalize n = show n ++ ordinal n
|
||||
ordinalize :: (Integral a, Show a) => a -> Text
|
||||
ordinalize n = T.pack (show n) <> ordinal n
|
||||
{-# INLINE ordinalize #-}
|
||||
|
@ -9,124 +9,40 @@
|
||||
--
|
||||
-- Parametrization for strings, useful for transliteration.
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Text.Inflections.Parameterize
|
||||
( parameterize
|
||||
, parameterizeCustom )
|
||||
where
|
||||
|
||||
import Control.Monad (guard)
|
||||
import Data.Char (isAscii, isAsciiLower, isAsciiUpper, isDigit, toLower)
|
||||
import Data.List (group)
|
||||
import Data.Maybe (mapMaybe)
|
||||
import Data.Char (isAscii, isAlphaNum, isPunctuation, toLower)
|
||||
import Data.Text (Text)
|
||||
import Text.Inflections.Data
|
||||
import Text.Megaparsec
|
||||
import Text.Megaparsec.String
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.HashMap.Strict as M
|
||||
import qualified Data.Text as T
|
||||
|
||||
#if !MIN_VERSION_base(4,8,0)
|
||||
import Control.Applicative
|
||||
#endif
|
||||
|
||||
-- | Parametrizable character.
|
||||
|
||||
data PChar
|
||||
= UCase {-# UNPACK #-} !Char -- ^ Uppercase charater
|
||||
| Acceptable String
|
||||
-- ^ Since some of the transliterating approximations expand from one
|
||||
-- Unicode to two ASCII chars (eg., œ to oe), we represent this as a
|
||||
-- String.
|
||||
| Separator -- ^ Separator
|
||||
| Underscore -- ^ Underscore
|
||||
| OtherAscii {-# UNPACK #-} !Char -- ^ Other ASCII charater
|
||||
| NonAscii {-# UNPACK #-} !Char -- ^ Non-ASCII charater
|
||||
deriving (Eq, Show)
|
||||
|
||||
-- |Matches 'acceptable' characters for parameterization purposes.
|
||||
acceptableParser :: Parser PChar
|
||||
acceptableParser = do
|
||||
c <- satisfy ((||) <$> isAsciiLower <*> isDigit)
|
||||
return $ Acceptable [c]
|
||||
|
||||
parser :: Parser [PChar]
|
||||
parser = many $ label "a parametrizable character" $ choice
|
||||
[ acceptableParser
|
||||
, UCase <$> satisfy isAsciiUpper
|
||||
, Separator <$ char '-'
|
||||
, Underscore <$ char '_'
|
||||
, OtherAscii <$> asciiChar
|
||||
, NonAscii <$> satisfy (not . isAscii) ]
|
||||
|
||||
-- | Parser that accepts rows of parametrizable characters.
|
||||
-- | Check if given char is “acceptable”, that is, it's lowercase ASCII
|
||||
-- letter or digit.
|
||||
isValidParamChar :: Char -> Bool
|
||||
isValidParamChar c = isAsciiLower c || isDigit c
|
||||
|
||||
-- |Given a Transliteration table and a PChar, returns Maybe PChar indicating
|
||||
-- how this character should appear in a URL.
|
||||
parameterizeChar :: Transliterations -> PChar -> Maybe PChar
|
||||
parameterizeChar _ (UCase c) = Just $ Acceptable [toLower c]
|
||||
parameterizeChar _ (Acceptable c) = Just $ Acceptable c
|
||||
parameterizeChar _ Separator = Just Separator
|
||||
parameterizeChar _ Underscore = Just Underscore
|
||||
parameterizeChar _ (OtherAscii _) = Just Separator
|
||||
parameterizeChar ts (NonAscii c) = transliteratePCharCustom ts c
|
||||
|
||||
-- |Look up character in transliteration list. Accepts a Transliteration map
|
||||
-- which has Chars as keys and Strings as values for approximating common
|
||||
-- international Unicode characters within the ASCII range.
|
||||
transliteratePCharCustom :: Transliterations -> Char -> Maybe PChar
|
||||
transliteratePCharCustom ts c = do
|
||||
-- We may have expanded into multiple characters during
|
||||
-- transliteration, so check validity of all characters in
|
||||
-- result.
|
||||
v <- Map.lookup c ts
|
||||
guard (all isValidParamChar v)
|
||||
return (Acceptable v)
|
||||
|
||||
-- |Turns PChar tokens into their String representation.
|
||||
pCharToC :: PChar -> String
|
||||
pCharToC (UCase c) = [c]
|
||||
pCharToC (Acceptable str) = str
|
||||
pCharToC Separator = "-"
|
||||
pCharToC Underscore = "_"
|
||||
pCharToC (OtherAscii c) = [c]
|
||||
pCharToC (NonAscii c) = [c]
|
||||
|
||||
-- |Reduce sequences of separators down to only one separator.
|
||||
squeezeSeparators :: [PChar] -> [PChar]
|
||||
squeezeSeparators ps = concatMap squashSeparatorGroup $ group ps
|
||||
where squashSeparatorGroup g = case head g of
|
||||
Separator -> [Separator] -- only take head
|
||||
_ -> g -- don't change
|
||||
|
||||
-- |Replaces special characters in a string so that it may be used as part of a
|
||||
-- 'pretty' URL. Uses the default transliterations in this library
|
||||
parameterize :: String -> String
|
||||
-- 'pretty' URL. Uses the default transliterations in this library.
|
||||
parameterize :: Text -> Text
|
||||
parameterize = parameterizeCustom defaultMap
|
||||
{-# INLINE parameterize #-}
|
||||
|
||||
-- |Transliterate a String with a custom transliteration table.
|
||||
parameterizeCustom :: Transliterations -> String -> String
|
||||
parameterizeCustom ts s =
|
||||
case parsed of
|
||||
Right ast -> (concatMap pCharToC . squeezeSeparators .
|
||||
trimUnwanted wanted . mapMaybe (parameterizeChar ts))
|
||||
ast
|
||||
|
||||
-- Note that this should never fail, since we accommodate all Unicode
|
||||
-- characters as valid input.
|
||||
Left err -> fail $ "Parse failed, please report a bug! Error: " ++
|
||||
show err
|
||||
|
||||
where parsed = parse parser "" s
|
||||
wanted :: [PChar] -- All valid URL chars - we shouldn't trim these.
|
||||
wanted = Underscore :
|
||||
map (Acceptable . (: [])) (['a'..'z'] ++ ['0'..'9'])
|
||||
|
||||
-- |Trim non-wanted elements from the beginning and end of list.
|
||||
trimUnwanted :: Eq a => [a] -> [a] -> [a]
|
||||
trimUnwanted wanted = dropWhile notWanted . reverse . dropWhile notWanted
|
||||
. reverse
|
||||
where notWanted = (`notElem` wanted)
|
||||
-- |Transliterate 'Text' with a custom transliteration table.
|
||||
parameterizeCustom :: Transliterations -> Text -> Text
|
||||
parameterizeCustom m txt = (T.intercalate "-" . T.words) (T.unfoldr f ("", txt))
|
||||
where
|
||||
f ("", t) = uncurry g <$> T.uncons t
|
||||
f ((x:xs), t) = Just (x, (xs, t))
|
||||
g x xs
|
||||
| (isAscii x && isAlphaNum x) || x == '_' = (toLower x, ("", xs))
|
||||
| isPunctuation x = (' ', ("", xs))
|
||||
| otherwise =
|
||||
case toLower <$> M.lookupDefault " " x m of
|
||||
"" -> (' ', ("",xs))
|
||||
(y:ys) -> (y, (ys,xs))
|
||||
|
@ -11,17 +11,22 @@
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
module Text.Inflections.Parse.Acronym ( acronym ) where
|
||||
module Text.Inflections.Parse.Acronym
|
||||
( acronym )
|
||||
where
|
||||
|
||||
import Data.Text (Text)
|
||||
import Text.Inflections.Parse.Types
|
||||
import Text.Megaparsec
|
||||
import Text.Megaparsec.String
|
||||
import Text.Megaparsec.Text
|
||||
import qualified Data.Text as T
|
||||
|
||||
#if MIN_VERSION_base(4,8,0)
|
||||
import Prelude hiding (Word)
|
||||
#endif
|
||||
|
||||
-- | Parser that accepts a string from given collection and turns it into
|
||||
-- | Parser that accepts a 'Text' from given collection and turns it into
|
||||
-- an 'Acronym'.
|
||||
acronym :: [String] -> Parser Word
|
||||
acronym = fmap Acronym . choice . fmap string
|
||||
acronym :: [Text] -> Parser Word
|
||||
acronym = fmap (Acronym . T.pack) . choice . fmap (string . T.unpack)
|
||||
{-# INLINE acronym #-}
|
||||
|
@ -9,15 +9,19 @@
|
||||
--
|
||||
-- Parser for camel case “symbols”.
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Text.Inflections.Parse.CamelCase ( parseCamelCase )
|
||||
module Text.Inflections.Parse.CamelCase
|
||||
( parseCamelCase )
|
||||
where
|
||||
|
||||
import Data.Text (Text)
|
||||
import Text.Inflections.Parse.Acronym (acronym)
|
||||
import Text.Inflections.Parse.Types (Word(..))
|
||||
import Text.Megaparsec
|
||||
import Text.Megaparsec.String
|
||||
import Text.Megaparsec.Text
|
||||
import qualified Data.Text as T
|
||||
|
||||
#if MIN_VERSION_base(4,8,0)
|
||||
import Prelude hiding (Word)
|
||||
@ -25,7 +29,7 @@ import Prelude hiding (Word)
|
||||
import Control.Applicative
|
||||
#endif
|
||||
|
||||
-- |Parses a CamelCase string.
|
||||
-- |Parse a CamelCase string.
|
||||
--
|
||||
-- >>> parseCamelCase ["Bar"] "FooBarBazz"
|
||||
-- Right [Word "Foo",Acronym "Bar",Word "Bazz"]
|
||||
@ -33,18 +37,20 @@ import Control.Applicative
|
||||
-- Left "(unknown)" (line 1, column 4):
|
||||
-- unexpected '_'
|
||||
parseCamelCase
|
||||
:: [String] -- ^ Collection of acronyms
|
||||
-> String -- ^ Input
|
||||
:: [Text] -- ^ Collection of acronyms
|
||||
-> Text -- ^ Input
|
||||
-> Either (ParseError Char Dec) [Word] -- ^ Result of parsing
|
||||
parseCamelCase acronyms = parse (parser acronyms) ""
|
||||
|
||||
parser
|
||||
:: [String] -- ^ Collection of acronyms
|
||||
:: [Text] -- ^ Collection of acronyms
|
||||
-> Parser [Word] -- ^ CamelCase parser
|
||||
parser acronyms = many (acronym acronyms <|> word) <* eof
|
||||
{-# INLINE parser #-}
|
||||
|
||||
word :: Parser Word
|
||||
word = do
|
||||
firstChar <- upperChar <|> lowerChar
|
||||
restChars <- many lowerChar
|
||||
return . Word $ firstChar : restChars
|
||||
return . Word . T.pack $ firstChar : restChars
|
||||
{-# INLINE word #-}
|
||||
|
@ -11,13 +11,16 @@
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
module Text.Inflections.Parse.SnakeCase ( parseSnakeCase )
|
||||
module Text.Inflections.Parse.SnakeCase
|
||||
( parseSnakeCase )
|
||||
where
|
||||
|
||||
import Data.Text (Text)
|
||||
import Text.Inflections.Parse.Acronym (acronym)
|
||||
import Text.Inflections.Parse.Types (Word(..))
|
||||
import Text.Megaparsec
|
||||
import Text.Megaparsec.String
|
||||
import Text.Megaparsec.Text
|
||||
import qualified Data.Text as T
|
||||
|
||||
#if MIN_VERSION_base(4,8,0)
|
||||
import Prelude hiding (Word)
|
||||
@ -33,17 +36,20 @@ import Control.Applicative
|
||||
-- Left "(unknown)" (line 1, column 4):
|
||||
-- unexpected 'B'
|
||||
parseSnakeCase
|
||||
:: [String] -- ^ Collection of acronyms
|
||||
-> String -- ^ Input
|
||||
:: [Text] -- ^ Collection of acronyms
|
||||
-> Text -- ^ Input
|
||||
-> Either (ParseError Char Dec) [Word] -- ^ Result of parsing
|
||||
parseSnakeCase acronyms = parse (parser acronyms) ""
|
||||
|
||||
parser
|
||||
:: [String] -> Parser [Word]
|
||||
:: [Text]
|
||||
-> Parser [Word]
|
||||
parser acronyms = do
|
||||
ws <- (acronym acronyms <|> word) `sepBy` char '_'
|
||||
eof
|
||||
return ws
|
||||
{-# INLINE parser #-}
|
||||
|
||||
word :: Parser Word
|
||||
word = Word <$> (some lowerChar <|> some digitChar)
|
||||
word = Word . T.pack <$> (some lowerChar <|> some digitChar)
|
||||
{-# INLINE word #-}
|
||||
|
@ -9,32 +9,46 @@
|
||||
--
|
||||
-- Types used in the library.
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE DeriveDataTypeable #-}
|
||||
|
||||
module Text.Inflections.Parse.Types
|
||||
( Word (..) -- FIXME we should not export the constructor
|
||||
, unWord
|
||||
, mapWord )
|
||||
, mapWord
|
||||
, InflectionException (..) )
|
||||
where
|
||||
|
||||
import Control.Monad.Catch (Exception)
|
||||
import Data.Text (Text)
|
||||
import Data.Typeable (Typeable)
|
||||
import Text.Megaparsec
|
||||
|
||||
#if MIN_VERSION_base(4,8,0)
|
||||
import Prelude hiding (Word)
|
||||
#endif
|
||||
|
||||
-- | A 'String' that should be kept whole through applied inflections
|
||||
data Word
|
||||
= Word String -- ^ A word that may be transformed by inflection
|
||||
| Acronym String -- ^ A word that may not be transformed by inflections
|
||||
= Word Text -- ^ A word that may be transformed by inflection
|
||||
| Acronym Text -- ^ A word that may not be transformed by inflections
|
||||
deriving (Show, Eq)
|
||||
|
||||
-- | Get a 'String' from 'Word'.
|
||||
unWord :: Word -> String
|
||||
-- | Get a 'Text' value from 'Word'.
|
||||
unWord :: Word -> Text
|
||||
unWord (Word s) = s
|
||||
unWord (Acronym s) = s
|
||||
{-# INLINE unWord #-}
|
||||
|
||||
-- | Apply 'String' transforming function to a 'Word'.
|
||||
mapWord :: (String -> String) -> Word -> Word
|
||||
mapWord f (Word s) = Word (f s)
|
||||
mapWord f (Acronym s) = Acronym (f s)
|
||||
-- | Apply 'Text' transforming function to a 'Word' unless it's a 'Acronym'.
|
||||
mapWord :: (Text -> Text) -> Word -> Text
|
||||
mapWord f (Word s) = f s
|
||||
mapWord _ (Acronym s) = s
|
||||
{-# INLINE mapWord #-}
|
||||
|
||||
-- | The exceptions that is thrown when parsing of input fails.
|
||||
|
||||
data InflectionException = InflectionParsingFailed (ParseError Char Dec)
|
||||
deriving (Eq, Show, Typeable)
|
||||
|
||||
instance Exception InflectionException
|
||||
|
@ -9,25 +9,26 @@
|
||||
--
|
||||
-- Conversion to titleized phrases.
|
||||
|
||||
module Text.Inflections.Titleize (titleize) where
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
import Text.Inflections.Parse.Types (Word(..))
|
||||
module Text.Inflections.Titleize
|
||||
( titleize )
|
||||
where
|
||||
|
||||
import Data.Char (toUpper)
|
||||
import Data.Text (Text)
|
||||
import Text.Inflections.Parse.Types
|
||||
import qualified Data.Text as T
|
||||
|
||||
import Prelude (String, unwords, map, ($))
|
||||
#if MIN_VERSION_base(4,8,0)
|
||||
import Prelude hiding (Word)
|
||||
#endif
|
||||
|
||||
-- | Capitalizes all the Words in the input list.
|
||||
-- | Capitalize all the Words in the input list.
|
||||
--
|
||||
-- >>> titleize [ Word "foo", Acronym "bar", Word "bazz" ]
|
||||
-- "Foo bar Bazz"
|
||||
titleize
|
||||
:: [Word] -- ^ List of Words, first of which will be capitalized
|
||||
-> String -- ^ The titleized String
|
||||
titleize s = unwords $ map upperCaseWord s
|
||||
|
||||
-- | Transform 'Word' into an upper-cased 'String'.
|
||||
upperCaseWord :: Word -> String
|
||||
upperCaseWord (Word (c:cs)) = toUpper c : cs
|
||||
upperCaseWord (Word []) = []
|
||||
upperCaseWord (Acronym s) = s -- Acronyms are left intact
|
||||
-> Text -- ^ The titleized String
|
||||
titleize = T.unwords . fmap (mapWord T.toTitle)
|
||||
{-# INLINE titleize #-}
|
||||
|
@ -9,30 +9,47 @@
|
||||
--
|
||||
-- Support for transliteration.
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Text.Inflections.Transliterate
|
||||
( transliterate
|
||||
, transliterateCustom
|
||||
)
|
||||
, transliterateCustom )
|
||||
where
|
||||
|
||||
import Data.Char (isAscii)
|
||||
import Data.Maybe(fromMaybe)
|
||||
import Data.Text (Text)
|
||||
import Text.Inflections.Data
|
||||
import qualified Data.Map as Map
|
||||
import qualified Data.HashMap.Strict as M
|
||||
import qualified Data.Text as T
|
||||
|
||||
-- |Returns a String after default approximations for changing Unicode characters
|
||||
-- to a valid ASCII range are applied. If you want to supplement the default
|
||||
-- approximations with your own, you should use the transliterateCustom
|
||||
-- function instead of transliterate.
|
||||
transliterate :: String -> String
|
||||
#if !MIN_VERSION_base(4,8,0)
|
||||
import Control.Applicative
|
||||
#endif
|
||||
|
||||
-- |Returns a String after default approximations for changing Unicode
|
||||
-- characters to a valid ASCII range are applied. If you want to supplement
|
||||
-- the default approximations with your own, you should use the
|
||||
-- 'transliterateCustom' function instead of 'transliterate'.
|
||||
transliterate :: Text -> Text
|
||||
transliterate = transliterateCustom "?" defaultMap
|
||||
{-# INLINE transliterate #-}
|
||||
|
||||
-- |Returns a String after default approximations for changing Unicode characters
|
||||
-- to a valid ASCII range are applied.
|
||||
transliterateCustom :: String -> Transliterations -> String -> String
|
||||
transliterateCustom replacement ts = concatMap lookupCharTransliteration
|
||||
where lookupCharTransliteration c =
|
||||
if isAscii c then -- Don't bother looking up Chars in ASCII range
|
||||
[c]
|
||||
else
|
||||
fromMaybe replacement (Map.lookup c ts)
|
||||
-- |Returns a String after default approximations for changing Unicode
|
||||
-- characters to a valid ASCII range are applied.
|
||||
transliterateCustom
|
||||
:: String -- ^ The default replacement
|
||||
-> Transliterations -- ^ The table of transliterations
|
||||
-> Text -- ^ The input
|
||||
-> Text -- ^ The output
|
||||
transliterateCustom replacement m txt = T.unfoldr f ("", txt)
|
||||
where
|
||||
f ("", t) = uncurry g <$> T.uncons t
|
||||
f ((x:xs), t) = Just (x, (xs, t))
|
||||
g x xs =
|
||||
if isAscii x
|
||||
then (x, ("", xs))
|
||||
else
|
||||
case M.lookupDefault replacement x m of
|
||||
"" -> ('?', ("",xs))
|
||||
(y:ys) -> (y, (ys,xs))
|
||||
|
@ -9,25 +9,27 @@
|
||||
--
|
||||
-- Conversion to phrases separated by underscores.
|
||||
|
||||
module Text.Inflections.Underscore ( underscore ) where
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
import Text.Inflections.Parse.Types (Word(..))
|
||||
module Text.Inflections.Underscore
|
||||
( underscore )
|
||||
where
|
||||
|
||||
import Data.Char (toLower)
|
||||
import Data.List (intercalate)
|
||||
import Data.Text (Text)
|
||||
import Text.Inflections.Parse.Types
|
||||
import qualified Data.Text as T
|
||||
|
||||
import Prelude (String, (.), map)
|
||||
#if MIN_VERSION_base(4,8,0)
|
||||
import Prelude hiding (Word)
|
||||
#endif
|
||||
|
||||
-- |Turns a CamelCase string into an underscore_separated String.
|
||||
-- |Turns a CamelCase string into an underscore_separated 'Text'.
|
||||
--
|
||||
-- >>> underscore [ Word "foo", Acronym "bar", Word "bazz" ]
|
||||
-- "foo_bar_bazz"
|
||||
underscore
|
||||
:: [Word] -- ^ Input Words to separate with underscores
|
||||
-> String -- ^ The underscored String
|
||||
underscore = intercalate "_" . map toDowncasedString
|
||||
|
||||
-- | Transform 'Word' into a down-cased 'String'.
|
||||
toDowncasedString :: Word -> String
|
||||
toDowncasedString (Acronym s) = map toLower s
|
||||
toDowncasedString (Word s) = map toLower s
|
||||
-> Text -- ^ The underscored String
|
||||
underscore = T.intercalate "_" . fmap (mapWord T.toLower)
|
||||
{-# INLINE underscore #-}
|
||||
|
@ -49,8 +49,10 @@ library
|
||||
else
|
||||
ghc-options: -O2 -Wall
|
||||
build-depends: base >= 4.6 && < 4.10
|
||||
, containers >= 0.5 && < 0.6
|
||||
, exceptions >= 0.6 && < 0.9
|
||||
, megaparsec >= 5.0 && < 6.0
|
||||
, text >= 0.2 && < 1.3
|
||||
, unordered-containers >= 0.2.7 && < 0.3
|
||||
default-language: Haskell2010
|
||||
|
||||
test-suite test
|
||||
@ -58,11 +60,11 @@ test-suite test
|
||||
hs-source-dirs: test
|
||||
main-is: Spec.hs
|
||||
build-depends: inflections
|
||||
, base >= 4.2 && < 4.10
|
||||
, QuickCheck >= 2.7.6 && < 3.0
|
||||
, containers >= 0.5 && < 0.6
|
||||
, base >= 4.2 && < 4.10
|
||||
, hspec >= 2.0 && < 3.0
|
||||
, megaparsec >= 5.0 && < 6.0
|
||||
, text >= 0.2 && < 1.3
|
||||
if flag(dev)
|
||||
ghc-options: -Wall -Werror
|
||||
else
|
||||
|
@ -1,3 +1,5 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Text.Inflections.HumanizeSpec (spec) where
|
||||
|
||||
import Test.Hspec
|
||||
|
@ -1,9 +1,12 @@
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Text.Inflections.OrdinalSpec (spec) where
|
||||
|
||||
import Data.Text (Text)
|
||||
import Test.Hspec
|
||||
import Test.QuickCheck.Property
|
||||
import qualified Data.Text as T
|
||||
|
||||
import Text.Inflections (ordinal, ordinalize)
|
||||
|
||||
@ -57,11 +60,12 @@ fullOrdinals = do
|
||||
ordinalReturnsNotEmpty :: Spec
|
||||
ordinalReturnsNotEmpty =
|
||||
it "never returns empty" $ property $
|
||||
property <$> not . null . (ordinal :: Integer -> String)
|
||||
property <$> not . T.null . (ordinal :: Integer -> Text)
|
||||
|
||||
ordinalizeContainsTheSameNumber :: Spec
|
||||
ordinalizeContainsTheSameNumber =
|
||||
it "always returns the number as part of the result" $ property ordinalizeSamePrefix
|
||||
|
||||
ordinalizeSamePrefix :: Integer -> Bool
|
||||
ordinalizeSamePrefix n = show n == take (length $ show n) (ordinalize n)
|
||||
ordinalizeSamePrefix n = T.pack s == T.take (length s) (ordinalize n)
|
||||
where s = show n
|
||||
|
@ -1,9 +1,12 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Text.Inflections.PropertiesSpec (spec) where
|
||||
|
||||
import Data.Char (toLower)
|
||||
import Data.List (group)
|
||||
import Test.Hspec
|
||||
import Test.QuickCheck
|
||||
import qualified Data.Text as T
|
||||
|
||||
import Text.Inflections
|
||||
import Text.Inflections.Parse.Types (Word(..))
|
||||
@ -46,44 +49,45 @@ dasherizeSpacedSentence =
|
||||
onlyValidCharacters :: Spec
|
||||
onlyValidCharacters =
|
||||
it "returns only valid characters" (property onlyValidCharactersPredicate)
|
||||
where onlyValidCharactersPredicate sf = all (`elem` (alphaNumerics ++ "-_")) $ parameterize sf
|
||||
where onlyValidCharactersPredicate sf
|
||||
= T.all (`elem` (alphaNumerics ++ "-_")) $ parameterize (T.pack sf)
|
||||
|
||||
notBeginWithSeparator :: Spec
|
||||
notBeginWithSeparator =
|
||||
it "never returns a string beginning ending with a separator" (property notBeginWithSeparatorPredicate)
|
||||
where
|
||||
notBeginWithSeparatorPredicate s =
|
||||
let parameterized = parameterize s in
|
||||
(not . null) parameterized ==> head parameterized /= '-'
|
||||
let parameterized = parameterize (T.pack s) in
|
||||
(not . T.null) parameterized ==> T.head parameterized /= '-'
|
||||
|
||||
notEndWithSeparator :: Spec
|
||||
notEndWithSeparator =
|
||||
it "never returns a string beginning with a separator" (property notBeginWithSeparatorPredicate)
|
||||
where
|
||||
notBeginWithSeparatorPredicate s =
|
||||
let parameterized = parameterize s in
|
||||
(not . null) parameterized ==> last parameterized /= '-'
|
||||
let parameterized = parameterize (T.pack s) in
|
||||
(not . T.null) parameterized ==> T.last parameterized /= '-'
|
||||
|
||||
noMissingAlphanumerics :: Spec
|
||||
noMissingAlphanumerics =
|
||||
it "returns every alphanumeric character from the input" (property noMissingAlphanumericsPredicate)
|
||||
where noMissingAlphanumericsPredicate s =
|
||||
let parameterized = parameterize s in
|
||||
let parameterized = parameterize (T.pack s) in
|
||||
all (\c -> c `notElem` alphaNumerics ||
|
||||
c `elem` (alphaNumerics ++ "-") &&
|
||||
c `elem` parameterized) $ map toLower s
|
||||
c `elem` T.unpack parameterized) $ map toLower s
|
||||
|
||||
noMoreThanOneHyphen :: Spec
|
||||
noMoreThanOneHyphen =
|
||||
it "never returns a string with a sequence of dashes" (property noMoreThanOneHyphenPredicate)
|
||||
where noMoreThanOneHyphenPredicate s =
|
||||
let parameterized = parameterize s in longestSequenceOf '-' parameterized <= 1
|
||||
let parameterized = parameterize (T.pack s)
|
||||
in longestSequenceOf '-' (T.unpack parameterized) <= 1
|
||||
|
||||
longestSequenceOf :: Char -> String -> Int
|
||||
longestSequenceOf _ [] = 0
|
||||
longestSequenceOf c s =
|
||||
if null subseqLengths then 0 else maximum subseqLengths
|
||||
|
||||
where subseqLengths = (map length . filter (\str -> head str == c) . group) s
|
||||
|
||||
alphaNumerics :: String
|
||||
|
@ -1,3 +1,5 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Text.Inflections.TitleizeSpec (spec) where
|
||||
|
||||
import Test.Hspec
|
||||
|
@ -1,3 +1,5 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Text.Inflections.UnderscoreSpec (spec) where
|
||||
|
||||
import Test.Hspec
|
||||
|
@ -1,3 +1,5 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Text.InflectionsSpec (spec) where
|
||||
|
||||
import Test.Hspec
|
||||
@ -12,14 +14,14 @@ spec = do
|
||||
camelCaseToSnakeCase :: Spec
|
||||
camelCaseToSnakeCase =
|
||||
it "converts camel case snake case" $
|
||||
toUnderscore "camelCasedText" `shouldBe` "camel_cased_text"
|
||||
toUnderscore "camelCasedText" `shouldReturn` "camel_cased_text"
|
||||
|
||||
camelCaseToDashed :: Spec
|
||||
camelCaseToDashed =
|
||||
it "converts camel case to dashed" $
|
||||
toDashed "camelCasedText" `shouldBe` "camel-cased-text"
|
||||
toDashed "camelCasedText" `shouldReturn` "camel-cased-text"
|
||||
|
||||
snakeCaseToCamelCase :: Spec
|
||||
snakeCaseToCamelCase =
|
||||
it "converts snake case to camel case" $
|
||||
toCamelCased False "underscored_text" `shouldBe` "underscoredText"
|
||||
toCamelCased False "underscored_text" `shouldReturn` "underscoredText"
|
||||
|
Loading…
Reference in New Issue
Block a user