1
1
mirror of https://github.com/thma/LtuPatternFactory.git synced 2025-01-06 03:23:19 +03:00

update MapReduce example

This commit is contained in:
thma 2019-01-05 18:22:43 +01:00
parent d19f5159f0
commit 834d63ac90
3 changed files with 55 additions and 28 deletions

View File

@ -30,9 +30,12 @@ executable LtuPatternFactory
, Coerce
, IdiomBrackets
, Interpreter
, Infinity
, MapReduce
, DependencyInjection
, CheapskateRenderer
, CMarkGFMRenderer
main-is: Main.hs
default-language: Haskell2010
build-depends: base >= 4.7 && < 5
@ -47,6 +50,7 @@ executable LtuPatternFactory
, cmark-gfm
, cheapskate
, blaze-html
, parallel
test-suite LtuPatternFactory-Demo
type: exitcode-stdio-1.0
@ -65,6 +69,8 @@ test-suite LtuPatternFactory-Demo
, Coerce
, IdiomBrackets
, Interpreter
, Infinity
, MapReduce
, DependencyInjection
, CheapskateRenderer
, CMarkGFMRenderer
@ -83,4 +89,5 @@ test-suite LtuPatternFactory-Demo
, cmark-gfm
, cheapskate
, blaze-html
, parallel
default-language: Haskell2010

View File

@ -16,6 +16,7 @@ import Singleton
import Strategy
import TemplateMethod
import Visitor
import MapReduce
main :: IO ()
main = do
@ -35,3 +36,4 @@ main = do
demoDI
interpreterDemo
infinityDemo
mapReduceDemo

View File

@ -1,38 +1,56 @@
module MapReduce where
import Data.Char (toLower)
import Data.List (sort, group)
import Control.Arrow ((&&&))
import Data.Map as Map hiding (map, filter)
import Data.Monoid
import Control.Arrow ((&&&))
import Control.Category ((>>>))
import Data.Char (toLower)
import Data.List (group, sort)
import Data.Map as Map hiding (filter, map, foldr)
import Data.Monoid
import Control.Parallel (pseq)
import Control.Parallel.Strategies (rseq, using, parMap)
stringToWordCountMap :: String -> Map.Map String Int
stringToWordCountMap = Map.fromList . map (head &&& length) . group . sort . words . map toLower
newtype WordCountMap = WordCountMap { getMap :: Map.Map String Int} deriving (Show)
combineWordCountMaps :: Map.Map String Int -> Map.Map String Int -> Map.Map String Int
combineWordCountMaps = Map.unionWith (+)
instance Semigroup WordCountMap where
a <> b = WordCountMap $ Map.unionWith (+) (getMap a) (getMap b)
instance Monoid WordCountMap where
mempty = WordCountMap Map.empty
reduceWordCountMaps :: [Map.Map String Int] -> Map.Map String Int
reduceWordCountMaps [x] = x
reduceWordCountMaps (x:xs) = combineWordCountMaps x (reduceWordCountMaps xs)
--countWords :: String -> Map.Map String Int
--countWords =
stringToWordCountMap :: String -> WordCountMap
stringToWordCountMap =
map toLower >>> words >>> -- convert to lowercase and split into a list of words
sort >>> group >>> -- sort the words and group all equal words to sub-lists
map (head &&& length) >>> -- for each of those list of grouped words: form a pair (word, frequency)
Map.fromList >>> -- create a Map from the list of (word, frequency) pairs
WordCountMap -- wrap as WordCountMap
simpleMapReduce
:: (a -> b) -- map function
-> ([b] -> c) -- reduce function
-> [a] -- list to map over
-> c -- result
simpleMapReduce mapFunc reduceFunc = reduceFunc . Prelude.map mapFunc
reduceWordCountMaps :: [WordCountMap] -> WordCountMap
reduceWordCountMaps = WordCountMap . foldr (Map.unionWith (+) . getMap) empty
simpleMapReduce ::
(a -> b) -- map function
-> ([b] -> c) -- reduce function
-> [a] -- list to map over
-> c -- result
simpleMapReduce mapFunc reduceFunc = reduceFunc . map mapFunc
alphabetic :: Char -> Bool
alphabetic char = char `elem` (" \t\n\r" ++ ['a'..'z'] ++ ['A'..'Z'])
parMapReduce :: (a -> b) -> ([b] -> c) -> [a] -> c
parMapReduce mapFunc reduceFunc input =
mapResult `pseq` reduceResult
where mapResult = parMap rseq mapFunc input
reduceResult = reduceFunc mapResult `using` rseq
filterBy :: String -> Char -> Bool
filterBy notAllowedChars char =
char `notElem` notAllowedChars
mapReduceDemo = do
contents <- readFile "LICENSE"
let linesInFile = lines $ filter (filterBy ".,;:!?()[]{}\"'") contents
let result = simpleMapReduce stringToWordCountMap reduceWordCountMaps linesInFile
putStrLn $ "The file has " ++ show (length (lines contents)) ++ " lines!"
putStrLn $ "result = " ++ show result ++ "."
contents <- readFile "LICENSE"
let linesInFile = lines $ filter alphabetic contents
let result =
simpleMapReduce stringToWordCountMap reduceWordCountMaps linesInFile
putStrLn $ "The file has " ++ show (length linesInFile) ++ " lines"
putStrLn $ "result = " ++ show (getMap result)
print $ getMap $ foldMap stringToWordCountMap linesInFile
print $ getMap $ parMapReduce stringToWordCountMap reduceWordCountMaps linesInFile