commit 2c22ed5daa93bb962a2433c4459ddad4f67b5384 Author: Justin Leitgeb Date: Sun Feb 23 08:34:46 2014 -0500 First commit diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..05ca57f --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2014 Justin Leitgeb + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..9133bd1 --- /dev/null +++ b/README.md @@ -0,0 +1,33 @@ +# String Inflections for Haskell + +This library is a partial port of the [String Inflector](http://api.rubyonrails.org/classes/ActiveSupport/Inflector.html) from Ruby on Rails. Specifically, it implements the [`parameterize`](http://api.rubyonrails.org/classes/ActiveSupport/Inflector.html#method-i-parameterize) and [`dasherize`](http://api.rubyonrails.org/classes/ActiveSupport/Inflector.html#method-i-dasherize) functions from the Inflector. + +## Usage + +The most common usage of this library at this point is to parameterize a URL. This is accomplished as follows: + +```haskell +λ: parameterize defaultTransliterations "¡Feliz año nuevo!" +"feliz-ano-nuevo" +``` + +## Customization + +Part of parameterizing strings is approximating all characters in the input encoding to ASCII characters. This library copies the character approximation table from the Ruby i18n library. This data structure is provided as `defaultCharacterTransliterations`. You can provide your own transliteration map by passing a Map structure (from Data.Map) to the `parameterize` function. + +## TODO + +I'd like this library to implement other functions found in the Rails inflections library. If you need one of those functions, please submit a pull request! + +## Author + +Justin Leitgeb + +## Contributing + +You may submit pull requests to this repository on GitHub. Please add property +tests for any functional changes that you make to this library. + +## License + +MIT - see [the LICENSE file](LICENSE). diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/Text/Inflections.hs b/Text/Inflections.hs new file mode 100644 index 0000000..fc05ef1 --- /dev/null +++ b/Text/Inflections.hs @@ -0,0 +1,156 @@ +{-# LANGUAGE FlexibleContexts, NoMonomorphismRestriction #-} + +module Text.Inflections + ( dasherize + , parameterize + , defaultTransliterations + ) where + +import Data.Char (toLower, isAsciiLower, isAsciiUpper, isAscii, isDigit) +import qualified Text.Parsec as P +import Control.Applicative +import qualified Text.ParserCombinators.Parsec.Char as C +import Data.List (group) +import Data.Maybe (mapMaybe) +import qualified Data.Map as Map + +type Transliterations = Map.Map Char String + +data PChar = UCase Char + -- Since some of the transliterating approximations expand from + -- one Unicode to two ASCII chars (eg., œ to oe), we represent + -- this as a String. + | Acceptable String + | Separator + | Underscore + | OtherAscii Char + | NonAscii Char + deriving (Eq, Show) + +-- |Replaces special characters in a string so that it may be used as part of a +-- 'pretty' URL. +parameterize :: Transliterations -> String -> String +parameterize ts s = + case parsed of + Right ast -> (concatMap pCharToC . squeezeSeparators . + trimUnwanted wanted . mapMaybe (parameterizeChar ts)) + ast + + -- Note that this should never fail, since we accommodate all Unicode + -- characters as valid input. + Left err -> fail $ "Parse failed, please report a bug! Error: " ++ + show err + + where parsed = P.parse parameterizableString "" s + wanted :: [PChar] -- All valid URL chars - we shouldn't trim these. + wanted = Underscore : + map (Acceptable . (: [])) (['a'..'z'] ++ ['0'..'9']) + +-- |Replaces underscores with dashes in the string. +dasherize :: String -> String +dasherize = map (\c -> if c == ' ' then '-' else c) + + +-- Private functions + +-- |Matches 'acceptable' characters for parameterization purposes. +acceptableParser :: P.Stream s m Char => P.ParsecT s u m PChar +acceptableParser = do + c <- C.satisfy isValidParamChar + return $ Acceptable [c] + +parameterizableString :: P.Stream s m Char => P.ParsecT s u m [PChar] +parameterizableString = P.many $ P.choice [ + acceptableParser + , UCase <$> C.satisfy isAsciiUpper + , C.char '-' >> return Separator + , C.char '_' >> return Underscore + , OtherAscii <$> C.satisfy isAscii + , NonAscii <$> C.satisfy (not . isAscii) + ] + +-- |Look up character in transliteration list. +transliterate :: Transliterations -> Char -> Maybe PChar +transliterate ts c = + case Map.lookup c ts of + Just v -> -- We may have expanded into multiple characters during + -- transliteration, so check validity of all characters in + -- result. + if all isValidParamChar v then + Just $ Acceptable v + else + Nothing + + Nothing -> Nothing + +isValidParamChar :: Char -> Bool +isValidParamChar c = isAsciiLower c || isDigit c + +-- |Given a Transliteration table and a PChar, returns Maybe PChar indicating +-- how this character should appear in a URL. +parameterizeChar :: Transliterations -> PChar -> Maybe PChar +parameterizeChar _ (UCase c) = Just $ Acceptable [toLower c] +parameterizeChar _ (Acceptable c) = Just $ Acceptable c +parameterizeChar _ Separator = Just Separator +parameterizeChar _ Underscore = Just Underscore +parameterizeChar _ (OtherAscii _) = Just Separator +parameterizeChar ts (NonAscii c) = transliterate ts c + +-- |Turns PChar tokens into their String representation. +pCharToC :: PChar -> String +pCharToC (UCase c) = [c] +pCharToC (Acceptable str) = str +pCharToC Separator = "-" +pCharToC Underscore = "_" +pCharToC (OtherAscii c) = [c] +pCharToC (NonAscii c) = [c] + +-- |These default transliterations stolen from the Ruby i18n library - +-- https://github.com/svenfuchs/i18n/blob/master/lib/i18n/backend/transliterator.rb#L41:L69 +defaultTransliterations :: Map.Map Char String +defaultTransliterations = Map.fromList [ + ('À', "A"), ('Á', "A"), ('Â', "A"), ('Ã', "A"), ('Ä', "A"), ('Å', "A"), + ('Æ', "AE"), ('Ç', "C"), ('È', "E"), ('É', "E"), ('Ê', "E"), ('Ë', "E"), + ('Ì', "I"), ('Í', "I"), ('Î', "I"), ('Ï', "I"), ('Ð', "D"), ('Ñ', "N"), + ('Ò', "O"), ('Ó', "O"), ('Ô', "O"), ('Õ', "O"), ('Ö', "O"), ('×', "x"), + ('Ø', "O"), ('Ù', "U"), ('Ú', "U"), ('Û', "U"), ('Ü', "U"), ('Ý', "Y"), + ('Þ', "Th"), ('ß', "ss"), ('à', "a"), ('á', "a"), ('â', "a"), ('ã', "a"), + ('ä', "a"), ('å', "a"), ('æ', "ae"), ('ç', "c"), ('è', "e"), ('é', "e"), + ('ê', "e"), ('ë', "e"), ('ì', "i"), ('í', "i"), ('î', "i"), ('ï', "i"), + ('ð', "d"), ('ñ', "n"), ('ò', "o"), ('ó', "o"), ('ô', "o"), ('õ', "o"), + ('ö', "o"), ('ø', "o"), ('ù', "u"), ('ú', "u"), ('û', "u"), ('ü', "u"), + ('ý', "y"), ('þ', "th"), ('ÿ', "y"), ('Ā', "A"), ('ā', "a"), ('Ă', "A"), + ('ă', "a"), ('Ą', "A"), ('ą', "a"), ('Ć', "C"), ('ć', "c"), ('Ĉ', "C"), + ('ĉ', "c"), ('Ċ', "C"), ('ċ', "c"), ('Č', "C"), ('č', "c"), ('Ď', "D"), + ('ď', "d"), ('Đ', "D"), ('đ', "d"), ('Ē', "E"), ('ē', "e"), ('Ĕ', "E"), + ('ĕ', "e"), ('Ė', "E"), ('ė', "e"), ('Ę', "E"), ('ę', "e"), ('Ě', "E"), + ('ě', "e"), ('Ĝ', "G"), ('ĝ', "g"), ('Ğ', "G"), ('ğ', "g"), ('Ġ', "G"), + ('ġ', "g"), ('Ģ', "G"), ('ģ', "g"), ('Ĥ', "H"), ('ĥ', "h"), ('Ħ', "H"), + ('ħ', "h"), ('Ĩ', "I"), ('ĩ', "i"), ('Ī', "I"), ('ī', "i"), ('Ĭ', "I"), + ('ĭ', "i"), ('Į', "I"), ('į', "i"), ('İ', "I"), ('ı', "i"), ('IJ', "IJ"), + ('ij', "ij"), ('Ĵ', "J"), ('ĵ', "j"), ('Ķ', "K"), ('ķ', "k"), ('ĸ', "k"), + ('Ĺ', "L"), ('ĺ', "l"), ('Ļ', "L"), ('ļ', "l"), ('Ľ', "L"), ('ľ', "l"), + ('Ŀ', "L"), ('ŀ', "l"), ('Ł', "L"), ('ł', "l"), ('Ń', "N"), ('ń', "n"), + ('Ņ', "N"), ('ņ', "n"), ('Ň', "N"), ('ň', "n"), ('ʼn', "'n"), ('Ŋ', "NG"), + ('ŋ', "ng"), ('Ō', "O"), ('ō', "o"), ('Ŏ', "O"), ('ŏ', "o"), ('Ő', "O"), + ('ő', "o"), ('Œ', "OE"), ('œ', "oe"), ('Ŕ', "R"), ('ŕ', "r"), ('Ŗ', "R"), + ('ŗ', "r"), ('Ř', "R"), ('ř', "r"), ('Ś', "S"), ('ś', "s"), ('Ŝ', "S"), + ('ŝ', "s"), ('Ş', "S"), ('ş', "s"), ('Š', "S"), ('š', "s"), ('Ţ', "T"), + ('ţ', "t"), ('Ť', "T"), ('ť', "t"), ('Ŧ', "T"), ('ŧ', "t"), ('Ũ', "U"), + ('ũ', "u"), ('Ū', "U"), ('ū', "u"), ('Ŭ', "U"), ('ŭ', "u"), ('Ů', "U"), + ('ů', "u"), ('Ű', "U"), ('ű', "u"), ('Ų', "U"), ('ų', "u"), ('Ŵ', "W"), + ('ŵ', "w"), ('Ŷ', "Y"), ('ŷ', "y"), ('Ÿ', "Y"), ('Ź', "Z"), ('ź', "z"), + ('Ż', "Z"), ('ż', "z"), ('Ž', "Z"), ('ž', "z")] + +-- |Reduce sequences of separators down to only one separator. +squeezeSeparators :: [PChar] -> [PChar] +squeezeSeparators ps = concatMap squashSeparatorGroup $ group ps + where squashSeparatorGroup g = case head g of + Separator -> [Separator] -- only take head + _ -> g -- don't change + +-- |Trim non-wanted elements from the beginning and end of list. +trimUnwanted :: Eq a => [a] -> [a] -> [a] +trimUnwanted wanted xs = dropWhile notWanted $ reverse $ dropWhile notWanted $ + reverse xs + where notWanted elt = elt `notElem` wanted diff --git a/inflections.cabal b/inflections.cabal new file mode 100644 index 0000000..d5bd4c5 --- /dev/null +++ b/inflections.cabal @@ -0,0 +1,35 @@ +name: inflections +version: 0.1.0.0 +synopsis: Inflections library for Haskell +description: + Inflections provides methods for singularization, pluralization, dasherizing, etc. The library is based on Rails' inflections library. +license: MIT +license-file: LICENSE +author: Justin Leitgeb +maintainer: justin@stackbuilders.com +copyright: 2014 Justin Leitgeb +category: Text +build-type: Simple +-- extra-source-files: +cabal-version: >=1.10 + +library + exposed-modules: Text.Inflections + ghc-options: -Wall + build-depends: base >=4.6 && <4.7, parsec, containers + default-language: Haskell2010 + +test-suite test + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: Suite.hs + build-depends: + inflections + , base >=4.6 && <4.7 + , test-framework + , HUnit + , QuickCheck + , test-framework-hunit + , test-framework-quickcheck2 + , containers + default-language: Haskell2010 diff --git a/test/Suite.hs b/test/Suite.hs new file mode 100644 index 0000000..997e41a --- /dev/null +++ b/test/Suite.hs @@ -0,0 +1,8 @@ +module Main where + +import Test.Framework (defaultMain) + +import qualified Text.Inflections.Tests + +main :: IO () +main = defaultMain Text.Inflections.Tests.tests diff --git a/test/Text/Inflections/Tests.hs b/test/Text/Inflections/Tests.hs new file mode 100644 index 0000000..98d40b7 --- /dev/null +++ b/test/Text/Inflections/Tests.hs @@ -0,0 +1,81 @@ +module Text.Inflections.Tests where + +import Test.HUnit hiding (Test) + +import Test.Framework.Providers.QuickCheck2 (testProperty) + +import Test.QuickCheck +import Test.QuickCheck.Arbitrary + +import Test.Framework (Test, testGroup) + +import Data.List (all, group) +import Data.Char (toLower) +import Data.Map (fromList) + +import Text.Inflections + +{-# ANN module "HLint: ignore Use camelCase" #-} + +tests :: [Test] +tests = [testGroup "dasherize" + [ testProperty "Substitutes spaces for hyphens" prop_dasherize1 + ], + + testGroup "parameterize" + [ testProperty "Contains only valid chars" + prop_parameterize1 + , testProperty "Does not begin with a separator character" + prop_parameterize2 + , testProperty "Does not end in a separator character" + prop_parameterize3 + , testProperty "All alphanumerics in input exist in output" + prop_parameterize4 + , testProperty "Doesn't have subsequences of more than one hyphen" + prop_parameterize5 + ] + ] + + +prop_dasherize1 :: String -> Property +prop_dasherize1 s = + '-' `notElem` s ==> numMatching '-' (dasherize s) == numMatching ' ' s + +prop_parameterize1 :: String -> Bool +prop_parameterize1 sf = all (`elem` (alphaNumerics ++ "-_")) $ + parameterize defaultTransliterations sf + +prop_parameterize2 :: String -> Property +prop_parameterize2 s = + (not . null) parameterized ==> head parameterized /= '-' + where parameterized = parameterize defaultTransliterations s + +prop_parameterize3 :: String -> Property +prop_parameterize3 s = + (not . null) parameterized ==> last parameterized /= '-' + where parameterized = parameterize defaultTransliterations s + +prop_parameterize4 :: String -> Bool +prop_parameterize4 s = all (\c -> c `notElem` alphaNumerics || + c `elem` (alphaNumerics ++ "-") && + c `elem` parameterized) $ map toLower s + where parameterized = parameterize defaultTransliterations s + +prop_parameterize5 :: String -> Bool +prop_parameterize5 s = longestSequenceOf '-' parameterized <= 1 + where parameterized = parameterize defaultTransliterations s + + +-- Helper functions and shared tests + +longestSequenceOf :: Char -> String -> Int +longestSequenceOf c [] = 0 +longestSequenceOf c s = + if null subseqLengths then 0 else maximum subseqLengths + + where subseqLengths = (map length . filter (\str -> head str == c) . group) s + +numMatching char str = length $ filter (== char) str + +alphaNumerics :: String +alphaNumerics = ['a'..'z'] ++ ['0'..'9']