taggy/bench/vs-tagsoup.hs
2014-06-12 20:44:44 +02:00

66 lines
1.8 KiB
Haskell

{-# LANGUAGE OverloadedStrings #-}
module Main where
import Criterion.Main
import qualified Text.Taggy as Taggy
import qualified Text.HTML.TagSoup as Tagsoup
import qualified Data.Text.Lazy as T
import qualified Data.Text.Lazy.IO as T
import qualified Data.Vector as V
sizes :: [Int]
sizes = [50, 500, 5000]
sizes' :: [Int]
sizes' = [50000, 500000, 5000000]
benchOnFile :: String -> FilePath -> Benchmark
benchOnFile label fp =
bgroup label
[ bench "tagsoup" $ whnfIO (tagsoup fp)
, bench "taggy" $ whnfIO (taggy fp)
, bench "taggy-entities" $ whnfIO (taggyEntities fp)
]
tagsoup :: FilePath -> IO (V.Vector (Tagsoup.Tag T.Text))
tagsoup fp = T.readFile ("html_files/" ++ fp)
>>= return . V.fromList . Tagsoup.parseTags
taggy :: FilePath -> IO (V.Vector Taggy.Tag)
taggy fp = T.readFile ("html_files/" ++ fp)
>>= return . V.fromList . Taggy.taggyWith False
taggyEntities :: FilePath -> IO (V.Vector Taggy.Tag)
taggyEntities fp = T.readFile ("html_files/" ++ fp)
>>= return . V.fromList . Taggy.taggyWith True
linkBench :: Int -> Benchmark
linkBench size = benchOnFile (show size) fp
where fp = "links_" ++ show size ++ ".html"
main :: IO ()
main =
defaultMain
[ benchOnFile "alpmestan.com index"
"alpmestan.html"
, benchOnFile "google search"
"googling-haskell.html"
, benchOnFile "youtube"
"youtube.html"
, benchOnFile "wikipedia - history of mathematics"
"wikipedia_history_of_mathematics.html"
, benchOnFile "worldslongestwebsite.com"
"worldslongestwebsite.com.html"
, benchOnFile "/r/haskell"
"haskell_reddit.html"
-- , bgroup "links" $ map linkBench sizes
]