From 668a8adc2e39fa6480f68007d93fa5a0e72ac412 Mon Sep 17 00:00:00 2001 From: Zejun Wu Date: Mon, 8 Sep 2014 12:02:25 -0700 Subject: [PATCH] Collect time in each data sources in each round Summary: Collect time in each data sources in each round Test Plan: test in haxlsh test the overhead in replay Reviewed By: smarlow@fb.com Subscribers: anfarmer, ldbrandy, watashi, smarlow, akr, bnitka, jcoens FB internal diff: D1521346 Tasks: 4589842 --- Haxl/Core/Monad.hs | 60 +++++++++++++++++++++++++++------- Haxl/Core/Types.hs | 42 +++++++++++++++++++----- tests/TestExampleDataSource.hs | 13 +++++--- 3 files changed, 90 insertions(+), 25 deletions(-) diff --git a/Haxl/Core/Monad.hs b/Haxl/Core/Monad.hs index a842401..93767d9 100644 --- a/Haxl/Core/Monad.hs +++ b/Haxl/Core/Monad.hs @@ -10,6 +10,7 @@ {-# LANGUAGE ExistentialQuantification #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} +{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -42,6 +43,7 @@ import Haxl.Core.Exception import Haxl.Core.RequestStore import Haxl.Core.Util import Haxl.Core.DataCache as DataCache +import Util.Time import qualified Data.Text as Text import Control.Exception (Exception(..), SomeException) @@ -401,10 +403,6 @@ performFetches env reqs = do getReq :: [BlockedFetch r] -> r a getReq = undefined - ifReport f 1 $ - modifyIORef' sref $ \(Stats rounds) -> - Stats (RoundStats (HashMap.fromList roundstats) : rounds) - ifTrace f 1 $ printf "Batch data fetch (%s)\n" $ intercalate (", "::String) $ @@ -423,24 +421,44 @@ performFetches env reqs = do e = DataSourceError $ "data source not initialized: " <> dataSourceName req Just state -> - return $ wrapFetch reqs $ fetch state f (userEnv env) reqs + return $ wrapFetchInCatch reqs $ fetch state f (userEnv env) reqs fetches <- mapM applyFetch jobs - scheduleFetches fetches + times <- + if report f >= 2 + then do + (refs, timedfetches) <- mapAndUnzipM wrapFetchInTimer fetches + scheduleFetches timedfetches + mapM (fmap Just . readIORef) refs + else do + scheduleFetches fetches + return $ repeat Nothing + + let dsroundstats = HashMap.fromList + [ (name, DataSourceRoundStats { dataSourceFetches = fetches + , dataSourceTime = time + }) + | ((name, fetches), time) <- zip roundstats times] + + t1 <- getCurrentTime + let roundtime = realToFrac (diffUTCTime t1 t0) :: Double + + ifReport f 1 $ + modifyIORef' sref $ \(Stats rounds) -> + Stats (RoundStats (microsecs roundtime) dsroundstats: rounds) + + ifTrace f 1 $ + printf "Batch data fetch done (%.2fs)\n" (realToFrac roundtime :: Double) - ifTrace f 1 $ do - t1 <- getCurrentTime - printf "Batch data fetch done (%.2fs)\n" - (realToFrac (diffUTCTime t1 t0) :: Double) -- Catch exceptions arising from the data source and stuff them into -- the appropriate requests. We don't want any exceptions propagating -- directly from the data sources, because we want the exception to be -- thrown by dataFetch instead. -- -wrapFetch :: [BlockedFetch req] -> PerformFetch -> PerformFetch -wrapFetch reqs fetch = +wrapFetchInCatch :: [BlockedFetch req] -> PerformFetch -> PerformFetch +wrapFetchInCatch reqs fetch = case fetch of SyncFetch io -> SyncFetch (io `Control.Exception.catch` handler) @@ -456,6 +474,24 @@ wrapFetch reqs fetch = void $ tryTakeResult rvar putResult rvar (except e) +wrapFetchInTimer :: PerformFetch -> IO (IORef Microseconds, PerformFetch) +wrapFetchInTimer f = do + r <- newIORef 0 + case f of + SyncFetch io -> return (r, SyncFetch (time io >>= writeIORef r)) + AsyncFetch f -> do + inner_r <- newIORef 0 + return (r, AsyncFetch $ \inner -> do + total <- time (f (time inner >>= writeIORef inner_r)) + inner_t <- readIORef inner_r + writeIORef r (total - inner_t)) + +time :: IO () -> IO Microseconds +time io = microsecs . fst <$> timeIt io + +microsecs :: Double -> Microseconds +microsecs t = round (t * 10^(6::Int)) + -- | Start all the async fetches first, then perform the sync fetches before -- getting the results of the async fetches. scheduleFetches :: [PerformFetch] -> IO() diff --git a/Haxl/Core/Types.hs b/Haxl/Core/Types.hs index a19ad6a..c4b9087 100644 --- a/Haxl/Core/Types.hs +++ b/Haxl/Core/Types.hs @@ -13,6 +13,7 @@ {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE Rank2Types #-} +{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeFamilies #-} @@ -31,6 +32,8 @@ module Haxl.Core.Types ( -- * Statistics Stats(..), RoundStats(..), + DataSourceRoundStats(..), + Microseconds, emptyStats, numRounds, numFetches, @@ -65,15 +68,15 @@ module Haxl.Core.Types ( ) where import Control.Applicative +import Control.Concurrent.MVar import Control.Exception -import Data.Typeable -import Data.Text (Text) +import Control.Monad import Data.Aeson import Data.Hashable -import Control.Concurrent.MVar -import Control.Monad -import qualified Data.HashMap.Strict as HashMap import Data.HashMap.Strict (HashMap) +import qualified Data.HashMap.Strict as HashMap +import Data.Text (Text) +import Data.Typeable (Typeable) #if __GLASGOW_HASKELL__ < 708 import Haxl.Core.Util (tryReadMVar) @@ -113,6 +116,8 @@ ifTrace flags i = when (trace flags >= i) . void ifReport :: (Functor m, Monad m) => Flags -> Int -> m a -> m () ifReport flags i = when (report flags >= i) . void +type Microseconds = Int + -- | Stats that we collect along the way. newtype Stats = Stats [RoundStats] deriving ToJSON @@ -120,11 +125,32 @@ newtype Stats = Stats [RoundStats] -- | Maps data source name to the number of requests made in that round. -- The map only contains entries for sources that made requests in that -- round. -newtype RoundStats = RoundStats (HashMap Text Int) - deriving ToJSON +data RoundStats = RoundStats + { roundTime :: Microseconds + , roundDataSources :: HashMap Text DataSourceRoundStats + } + +instance ToJSON RoundStats where + toJSON RoundStats{..} = object + [ "time" .= roundTime + , "dataSources" .= roundDataSources + ] + +-- | Detailed stats of each data source in each round. +data DataSourceRoundStats = DataSourceRoundStats + { dataSourceFetches :: Int + , dataSourceTime :: Maybe Microseconds + } + +instance ToJSON DataSourceRoundStats where + toJSON DataSourceRoundStats{..} = object [k .= v | (k, Just v) <- + [ ("fetches", Just dataSourceFetches) + , ("time", dataSourceTime) + ]] fetchesInRound :: RoundStats -> Int -fetchesInRound (RoundStats hm) = sum $ HashMap.elems hm +fetchesInRound (RoundStats _ hm) = + sum $ map dataSourceFetches $ HashMap.elems hm emptyStats :: Stats emptyStats = Stats [] diff --git a/tests/TestExampleDataSource.hs b/tests/TestExampleDataSource.hs index 4973472..a5a722c 100644 --- a/tests/TestExampleDataSource.hs +++ b/tests/TestExampleDataSource.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE OverloadedStrings, RebindableSyntax #-} +{-# LANGUAGE OverloadedStrings, RebindableSyntax, MultiWayIf #-} module TestExampleDataSource (tests) where import Haxl.Prelude as Haxl @@ -47,12 +47,15 @@ exampleTest = TestCase $ do assertEqual "runTests" x (2 + 3) -- Should be just one fetching round: - Stats s <- readIORef (statsRef env) - assertEqual "rounds" 1 (length s) + Stats stats <- readIORef (statsRef env) + assertEqual "rounds" 1 (length stats) -- With two fetches: - let reqs = case head s of RoundStats m -> HashMap.lookup "ExampleDataSource" m - assertEqual "reqs" (Just 2) reqs + assertBool "reqs" $ + if | RoundStats { roundDataSources = m } : _ <- stats, + Just (DataSourceRoundStats { dataSourceFetches = 2 }) + <- HashMap.lookup "ExampleDataSource" m -> True + | otherwise -> False -- Test side-effect ordering