mirror of
https://github.com/thma/LtuPatternFactory.git
synced 2025-01-06 03:23:19 +03:00
update MapReduce example
This commit is contained in:
parent
d19f5159f0
commit
834d63ac90
@ -30,9 +30,12 @@ executable LtuPatternFactory
|
||||
, Coerce
|
||||
, IdiomBrackets
|
||||
, Interpreter
|
||||
, Infinity
|
||||
, MapReduce
|
||||
, DependencyInjection
|
||||
, CheapskateRenderer
|
||||
, CMarkGFMRenderer
|
||||
|
||||
main-is: Main.hs
|
||||
default-language: Haskell2010
|
||||
build-depends: base >= 4.7 && < 5
|
||||
@ -47,6 +50,7 @@ executable LtuPatternFactory
|
||||
, cmark-gfm
|
||||
, cheapskate
|
||||
, blaze-html
|
||||
, parallel
|
||||
|
||||
test-suite LtuPatternFactory-Demo
|
||||
type: exitcode-stdio-1.0
|
||||
@ -65,6 +69,8 @@ test-suite LtuPatternFactory-Demo
|
||||
, Coerce
|
||||
, IdiomBrackets
|
||||
, Interpreter
|
||||
, Infinity
|
||||
, MapReduce
|
||||
, DependencyInjection
|
||||
, CheapskateRenderer
|
||||
, CMarkGFMRenderer
|
||||
@ -83,4 +89,5 @@ test-suite LtuPatternFactory-Demo
|
||||
, cmark-gfm
|
||||
, cheapskate
|
||||
, blaze-html
|
||||
, parallel
|
||||
default-language: Haskell2010
|
||||
|
@ -16,6 +16,7 @@ import Singleton
|
||||
import Strategy
|
||||
import TemplateMethod
|
||||
import Visitor
|
||||
import MapReduce
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
@ -35,3 +36,4 @@ main = do
|
||||
demoDI
|
||||
interpreterDemo
|
||||
infinityDemo
|
||||
mapReduceDemo
|
||||
|
@ -1,38 +1,56 @@
|
||||
module MapReduce where
|
||||
|
||||
import Data.Char (toLower)
|
||||
import Data.List (sort, group)
|
||||
import Control.Arrow ((&&&))
|
||||
import Data.Map as Map hiding (map, filter)
|
||||
import Data.Monoid
|
||||
import Control.Arrow ((&&&))
|
||||
import Control.Category ((>>>))
|
||||
import Data.Char (toLower)
|
||||
import Data.List (group, sort)
|
||||
import Data.Map as Map hiding (filter, map, foldr)
|
||||
import Data.Monoid
|
||||
import Control.Parallel (pseq)
|
||||
import Control.Parallel.Strategies (rseq, using, parMap)
|
||||
|
||||
stringToWordCountMap :: String -> Map.Map String Int
|
||||
stringToWordCountMap = Map.fromList . map (head &&& length) . group . sort . words . map toLower
|
||||
newtype WordCountMap = WordCountMap { getMap :: Map.Map String Int} deriving (Show)
|
||||
|
||||
combineWordCountMaps :: Map.Map String Int -> Map.Map String Int -> Map.Map String Int
|
||||
combineWordCountMaps = Map.unionWith (+)
|
||||
instance Semigroup WordCountMap where
|
||||
a <> b = WordCountMap $ Map.unionWith (+) (getMap a) (getMap b)
|
||||
instance Monoid WordCountMap where
|
||||
mempty = WordCountMap Map.empty
|
||||
|
||||
reduceWordCountMaps :: [Map.Map String Int] -> Map.Map String Int
|
||||
reduceWordCountMaps [x] = x
|
||||
reduceWordCountMaps (x:xs) = combineWordCountMaps x (reduceWordCountMaps xs)
|
||||
|
||||
--countWords :: String -> Map.Map String Int
|
||||
--countWords =
|
||||
stringToWordCountMap :: String -> WordCountMap
|
||||
stringToWordCountMap =
|
||||
map toLower >>> words >>> -- convert to lowercase and split into a list of words
|
||||
sort >>> group >>> -- sort the words and group all equal words to sub-lists
|
||||
map (head &&& length) >>> -- for each of those list of grouped words: form a pair (word, frequency)
|
||||
Map.fromList >>> -- create a Map from the list of (word, frequency) pairs
|
||||
WordCountMap -- wrap as WordCountMap
|
||||
|
||||
simpleMapReduce
|
||||
:: (a -> b) -- map function
|
||||
-> ([b] -> c) -- reduce function
|
||||
-> [a] -- list to map over
|
||||
-> c -- result
|
||||
simpleMapReduce mapFunc reduceFunc = reduceFunc . Prelude.map mapFunc
|
||||
reduceWordCountMaps :: [WordCountMap] -> WordCountMap
|
||||
reduceWordCountMaps = WordCountMap . foldr (Map.unionWith (+) . getMap) empty
|
||||
|
||||
simpleMapReduce ::
|
||||
(a -> b) -- map function
|
||||
-> ([b] -> c) -- reduce function
|
||||
-> [a] -- list to map over
|
||||
-> c -- result
|
||||
simpleMapReduce mapFunc reduceFunc = reduceFunc . map mapFunc
|
||||
|
||||
alphabetic :: Char -> Bool
|
||||
alphabetic char = char `elem` (" \t\n\r" ++ ['a'..'z'] ++ ['A'..'Z'])
|
||||
|
||||
parMapReduce :: (a -> b) -> ([b] -> c) -> [a] -> c
|
||||
parMapReduce mapFunc reduceFunc input =
|
||||
mapResult `pseq` reduceResult
|
||||
where mapResult = parMap rseq mapFunc input
|
||||
reduceResult = reduceFunc mapResult `using` rseq
|
||||
|
||||
filterBy :: String -> Char -> Bool
|
||||
filterBy notAllowedChars char =
|
||||
char `notElem` notAllowedChars
|
||||
|
||||
mapReduceDemo = do
|
||||
contents <- readFile "LICENSE"
|
||||
let linesInFile = lines $ filter (filterBy ".,;:!?()[]{}\"'") contents
|
||||
let result = simpleMapReduce stringToWordCountMap reduceWordCountMaps linesInFile
|
||||
putStrLn $ "The file has " ++ show (length (lines contents)) ++ " lines!"
|
||||
putStrLn $ "result = " ++ show result ++ "."
|
||||
contents <- readFile "LICENSE"
|
||||
let linesInFile = lines $ filter alphabetic contents
|
||||
let result =
|
||||
simpleMapReduce stringToWordCountMap reduceWordCountMaps linesInFile
|
||||
putStrLn $ "The file has " ++ show (length linesInFile) ++ " lines"
|
||||
putStrLn $ "result = " ++ show (getMap result)
|
||||
print $ getMap $ foldMap stringToWordCountMap linesInFile
|
||||
print $ getMap $ parMapReduce stringToWordCountMap reduceWordCountMaps linesInFile
|
||||
|
Loading…
Reference in New Issue
Block a user