Fix time measurement issue

1) We were taking a max of the latest timestamp and the timer maintained time
   which is incorrect.

2) We were computing the timeout incorrectly making it twice the actual
   timeout.

Also, maintain sessionCount in the state. We will use this later to put a limit
of the number of sessions.
This commit is contained in:
Harendra Kumar 2020-02-06 21:51:27 +05:30
parent 036ca6a557
commit eea9aa1721

View File

@ -1,5 +1,7 @@
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
#if __GLASGOW_HASKELL__ >= 800 #if __GLASGOW_HASKELL__ >= 800
@ -449,6 +451,9 @@ import Control.Monad.State.Strict (StateT)
import Control.Monad.Trans (MonadTrans(..)) import Control.Monad.Trans (MonadTrans(..))
import Control.Monad.Trans.Control (MonadBaseControl) import Control.Monad.Trans.Control (MonadBaseControl)
import Data.Functor.Identity (Identity (..)) import Data.Functor.Identity (Identity (..))
#if __GLASGOW_HASKELL__ >= 800
import Data.Kind (Type)
#endif
import Data.Heap (Entry(..)) import Data.Heap (Entry(..))
import Data.Maybe (isJust, fromJust, isNothing) import Data.Maybe (isJust, fromJust, isNothing)
import Foreign.Storable (Storable) import Foreign.Storable (Storable)
@ -480,7 +485,7 @@ import Streamly.Internal.Data.Stream.Serial (SerialT, WSerialT)
import Streamly.Internal.Data.Stream.Zip (ZipSerialM) import Streamly.Internal.Data.Stream.Zip (ZipSerialM)
import Streamly.Internal.Data.Pipe.Types (Pipe (..)) import Streamly.Internal.Data.Pipe.Types (Pipe (..))
import Streamly.Internal.Data.Time.Units import Streamly.Internal.Data.Time.Units
(AbsTime, MilliSecond64(..), addToAbsTime, diffAbsTime, toRelTime, (AbsTime, MilliSecond64(..), addToAbsTime, toRelTime,
toAbsTime, TimeUnit64) toAbsTime, TimeUnit64)
import Streamly.Internal.Mutable.Prim.Var (Prim, Var) import Streamly.Internal.Mutable.Prim.Var (Prim, Var)
@ -3967,49 +3972,83 @@ classifyKeepAliveChunks
classifyKeepAliveChunks spanout = classifyChunksBy spanout True classifyKeepAliveChunks spanout = classifyChunksBy spanout True
-} -}
-- | @classifySessionsBy tick timeout reset f stream@ groups together all input #if __GLASGOW_HASKELL__ < 800
-- stream elements that belong to the same session. @timeout@ is the maximum #define Type *
-- lifetime of a session in seconds. All elements belonging to a session are #endif
-- purged after this duration. If "reset" is 'Ture' then the timeout is reset
-- after every event received in the session. Session duration is measured data SessionState t m k a b = SessionState
-- using the timestamp of the first element seen for that session. To detect { sessionCurTime :: !AbsTime -- ^ time since last event
-- session timeouts, a monotonic event time clock is maintained using the , sessionEventTime :: !AbsTime -- ^ time as per last event
-- timestamps seen in the inputs and a timer with a tick duration specified by , sessionCount :: !Int -- ^ total number sessions in progress
-- @tick@. , sessionTimerHeap :: H.Heap (H.Entry AbsTime k) -- ^ heap for timeouts
, sessionKeyValueMap :: Map.Map k a -- ^ Stored sessions for keys
, sessionOutputStream :: t (m :: Type -> Type) (k, b) -- ^ Completed sessions
}
#undef Type
-- | @classifySessionsBy tick timeout reset f stream@ groups timed events in an
-- input event stream into sessions based on a session key. Each element in the
-- stream is an event consisting of a 4-tuple @(session key, sesssion data,
-- isClosingEvent, timestamp)@. @session key@ is a key that uniquely
-- identifies a session. All the events belonging to a session are folded using
-- the fold @f@ until an event with @isClosingEvent@ flag set arrives or a
-- timeout has occurred. The session key and the result of the fold are
-- emitted in the output stream when the session is purged.
-- --
-- @session key@ is a key that uniquely identifies the session for the given -- When @reset@ is 'False', @timeout@ is the maximum lifetime of a session in
-- element, @timestamp@ characterizes the time when the input element was -- seconds, measured from the @timestamp@ of the first event in that session.
-- generated, this is an absolute time measured from some @Epoch@. @session -- When "reset" is 'True' then the timeout is reset after every event received
-- close@ is a boolean indicating whether this element marks the closing of the -- in the session, in other words timeout is measured from the timestamp of the
-- session. When an input element with @session close@ set to @True@ is seen -- last event in the session.
-- the session is purged immediately.
-- --
-- All the input elements belonging to a session are collected using the fold -- @timestamp@ in an event characterizes the time when the input event was
-- @f@. The session key and the fold result are emitted in the output stream -- generated, this is an absolute time measured from some @Epoch@. The notion
-- when the session is purged either via the session close event or via the -- of current time is maintained by a monotonic event time clock using the
-- session lifetime timeout. -- timestamps seen in the input stream. The latest timestamp seen till now is
-- used as the base for the current time. When no new events are seen, a timer
-- is started with a tick duration specified by @tick@. This timer is used to
-- detect session timeouts in the absence of new events.
--
-- /Internal/
-- --
-- @since 0.7.0
{-# INLINABLE classifySessionsBy #-} {-# INLINABLE classifySessionsBy #-}
classifySessionsBy classifySessionsBy
:: (IsStream t, MonadAsync m, Ord k) :: (IsStream t, MonadAsync m, Ord k)
=> Double -- ^ timer tick in seconds => Double -- ^ timer tick in seconds
-> Double -- ^ session timeout -> Double -- ^ session timeout in seconds
-> Bool -- ^ reset the timeout when an event is received -> Bool -- ^ reset the timeout when an event is received
-> Fold m a b -- ^ Fold to be applied to session events -> Fold m a b -- ^ Fold to be applied to session events
-> t m (k, a, Bool, AbsTime) -- ^ session key, timestamp, close event, data -> t m (k, a, Bool, AbsTime) -- ^ session key, data, timestamp, close event
-> t m (k, b) -> t m (k, b) -- ^ session key, fold result
classifySessionsBy tick timeout reset (Fold step initial extract) str = classifySessionsBy tick timeout reset (Fold step initial extract) str =
concatMap (\(Tuple4' _ _ _ s) -> s) $ scanlM' sstep szero stream concatMap (\session -> sessionOutputStream session)
$ scanlM' sstep szero stream
where where
timeoutMs = toRelTime (round (timeout * 1000) :: MilliSecond64) timeoutMs = toRelTime (round (timeout * 1000) :: MilliSecond64)
tickMs = toRelTime (round (tick * 1000) :: MilliSecond64) tickMs = toRelTime (round (tick * 1000) :: MilliSecond64)
szero = Tuple4' (toAbsTime (0 :: MilliSecond64)) H.empty Map.empty K.nil szero = SessionState
{ sessionCurTime = toAbsTime (0 :: MilliSecond64)
, sessionEventTime = toAbsTime (0 :: MilliSecond64)
, sessionCount = 0
, sessionTimerHeap = H.empty
, sessionKeyValueMap = Map.empty
, sessionOutputStream = K.nil
}
-- XXX If there are too many sessions in progress we may want to put a
-- limit on the number of sessions because of memory consumption limits. We
-- can use two different strategies in this case:
--
-- 1) Eject old sessions or sessions beyond a certain/lower timeout
-- threshold even before timeout, effectively reduce the timeout.
-- 2) Drop creation of new sessions but keep accepting new events for the
-- old ones.
--
-- Got a new stream input element -- Got a new stream input element
sstep (Tuple4' evTime hp mp _) (Just (key, a, closing, ts)) = sstep (session@SessionState{..}) (Just (key, a, closing, ts)) =
-- XXX we should use a heap in pinned memory to scale it to a large -- XXX we should use a heap in pinned memory to scale it to a large
-- size -- size
-- --
@ -4027,66 +4066,90 @@ classifySessionsBy tick timeout reset (Fold step initial extract) str =
-- XXX if the key is an Int, we can also use an IntMap for slightly -- XXX if the key is an Int, we can also use an IntMap for slightly
-- better performance. -- better performance.
-- --
let accumulate v = do let curTime = max sessionEventTime ts
accumulate v = do
Tuple' _ old <- maybe (initial >>= return . Tuple' ts) return v Tuple' _ old <- maybe (initial >>= return . Tuple' ts) return v
new <- step old a new <- step old a
return $ Tuple' ts new return $ Tuple' ts new
in if closing in if closing
then do then do
let (r, mp') = Map.updateLookupWithKey (\_ _ -> Nothing) key mp let (r, mp') = Map.updateLookupWithKey (\_ _ -> Nothing) key
sessionKeyValueMap
Tuple' _ acc <- accumulate r Tuple' _ acc <- accumulate r
res <- extract acc res <- extract acc
return $ Tuple4' evTime hp mp' (yield (key, res)) return $ session
{ sessionCurTime = curTime
, sessionEventTime = curTime
, sessionCount = sessionCount - 1
, sessionKeyValueMap = mp'
, sessionOutputStream = yield (key, res)
}
else do else do
let r = Map.lookup key mp let r = Map.lookup key sessionKeyValueMap
acc <- accumulate r acc <- accumulate r
let mp' = Map.insert key acc mp let mp' = Map.insert key acc sessionKeyValueMap
let hp' = let (hp', count) =
case r of case r of
Nothing -> Nothing ->
let expiry = addToAbsTime ts timeoutMs let expiry = addToAbsTime ts timeoutMs
in H.insert (Entry expiry key) hp in (H.insert (Entry expiry key)
Just _ -> hp sessionTimerHeap, sessionCount + 1)
Just _ -> (sessionTimerHeap, sessionCount)
-- Event time is maintained as monotonically increasing -- Event time is maintained as monotonically increasing
-- time. If we have lagged behind any of the timestamps -- time. If we have lagged behind any of the timestamps
-- seen then we increase it to match the latest time seen -- seen then we increase it to match the latest time seen
-- in the timestamps. We also increase it on timer ticks. -- in the timestamps. We also increase it on timer ticks.
return $ Tuple4' (max evTime ts) hp' mp' K.nil return $ SessionState
{ sessionCurTime = curTime
, sessionEventTime = curTime
, sessionCount = count
, sessionTimerHeap = hp'
, sessionKeyValueMap = mp'
, sessionOutputStream = K.nil
}
-- Got a timer tick event -- Got a timer tick event
-- XXX can we yield the entries without accumulating them? -- XXX can we yield the entries without accumulating them?
sstep (Tuple4' evTime heap sessions _) Nothing = do sstep (session@SessionState{..}) Nothing = do
(hp', mp', out) <- go heap sessions K.nil (hp', mp', out, count) <- go sessionTimerHeap sessionKeyValueMap K.nil 0
return $ Tuple4' curTime hp' mp' out return $ session
{ sessionCurTime = curTime
, sessionCount = sessionCount - count
, sessionTimerHeap = hp'
, sessionKeyValueMap = mp'
, sessionOutputStream = out
}
where where
curTime = addToAbsTime evTime tickMs curTime = addToAbsTime sessionCurTime tickMs
go hp mp out = do go hp mp out cnt = do
let hres = H.uncons hp let hres = H.uncons hp
case hres of case hres of
Just (Entry ts key, hp') -> do Just (Entry expiry key, hp') -> do
let duration = diffAbsTime curTime ts if curTime >= expiry
if duration >= timeoutMs
then do then do
let (r, mp') = Map.updateLookupWithKey let (r, mp') = Map.updateLookupWithKey
(\_ _ -> Nothing) key mp (\_ _ -> Nothing) key mp
case r of case r of
Nothing -> go hp' mp' out Nothing -> go hp' mp' out cnt
Just (Tuple' latestTS acc) -> do Just (Tuple' latestTS acc) -> do
let dur = diffAbsTime curTime latestTS let expiry' = addToAbsTime latestTS timeoutMs
if dur >= timeoutMs || not reset if curTime >= expiry' || not reset
then do then do
sess <- extract acc sess <- extract acc
go hp' mp' ((key, sess) `K.cons` out) go hp' mp' ((key, sess) `K.cons` out)
(cnt + 1)
else else
-- reset the session timeout -- reset the session timeout
let expiry = addToAbsTime latestTS timeoutMs -- XXX purge anyway if the session size
hp'' = H.insert (Entry expiry key) hp' -- goes beyond maximum session size.
mp'' = Map.insert key (Tuple' latestTS acc) mp' let hp'' = H.insert (Entry expiry' key) hp'
in go hp'' mp'' out mp'' = Map.insert key
else return (hp, mp, out) (Tuple' latestTS acc) mp'
Nothing -> return (hp, mp, out) in go hp'' mp'' out cnt
else return (hp, mp, out, cnt)
Nothing -> return (hp, mp, out, cnt)
-- merge timer events in the stream -- merge timer events in the stream
stream = Serial.map Just str `Par.parallel` repeatM timer stream = Serial.map Just str `Par.parallel` repeatM timer
@ -4098,7 +4161,12 @@ classifySessionsBy tick timeout reset (Fold step initial extract) str =
-- received within the session window. The session times out and gets closed -- received within the session window. The session times out and gets closed
-- only if no event is received within the specified session window size. -- only if no event is received within the specified session window size.
-- --
-- @since 0.7.0 -- @
-- classifyKeepAliveSessions timeout = classifySessionsBy 1 timeout True
-- @
--
-- /Internal/
--
{-# INLINABLE classifyKeepAliveSessions #-} {-# INLINABLE classifyKeepAliveSessions #-}
classifyKeepAliveSessions classifyKeepAliveSessions
:: (IsStream t, MonadAsync m, Ord k) :: (IsStream t, MonadAsync m, Ord k)
@ -4147,7 +4215,12 @@ classifyChunksOf wsize = classifyChunksBy wsize False
-- session window end, a monotonic event time clock is maintained synced with -- session window end, a monotonic event time clock is maintained synced with
-- the timestamps with a clock resolution of 1 second. -- the timestamps with a clock resolution of 1 second.
-- --
-- @since 0.7.0 -- @
-- classifySessionsOf interval = classifySessionsBy 1 interval False
-- @
--
-- /Internal/
--
{-# INLINABLE classifySessionsOf #-} {-# INLINABLE classifySessionsOf #-}
classifySessionsOf classifySessionsOf
:: (IsStream t, MonadAsync m, Ord k) :: (IsStream t, MonadAsync m, Ord k)