server: process event triggers with a timeout

PR-URL: https://github.com/hasura/graphql-engine-mono/pull/8823
GitOrigin-RevId: 4a38ef993fffe018ae16e232f2abb5d30c604855
This commit is contained in:
Puru Gupta 2023-04-24 14:57:22 +05:30 committed by hasura-bot
parent 1698f9dd91
commit 44d9987e92

View File

@ -95,6 +95,7 @@ import System.Metrics.Gauge qualified as EKG.Gauge
import System.Metrics.Prometheus.Counter qualified as Prometheus.Counter
import System.Metrics.Prometheus.Gauge qualified as Prometheus.Gauge
import System.Metrics.Prometheus.Histogram qualified as Prometheus.Histogram
import System.Timeout.Lifted (timeout)
newtype EventInternalErr
= EventInternalErr QErr
@ -270,6 +271,12 @@ logFetchedEventsStatistics logger backendEvents =
{-# ANN processEventQueue ("HLint: ignore Use withAsync" :: String) #-}
-- | `upperBoundEventTriggerTimeout` is the maximum amount of time
-- an event trigger can take to process. This function is intended
-- to use with a timeout.
upperBoundEventTriggerTimeout :: DiffTime
upperBoundEventTriggerTimeout = minutes 30
-- | Service events from our in-DB queue.
--
-- There are a few competing concerns and constraints here; we want to...
@ -442,7 +449,8 @@ processEventQueue logger statsLogger httpMgr getSchemaCache getEventEngineCtx ac
processEvent ::
forall io r b.
( MonadIO io,
( MonadBaseControl IO io,
MonadIO io,
MonadReader r io,
Has HTTP.Manager r,
Has (L.Logger L.Hasura) r,
@ -490,7 +498,6 @@ processEventQueue logger statsLogger httpMgr getSchemaCache getEventEngineCtx ac
runExceptT (setRetry sourceConfig e (addUTCTime 60 currentTime) maintenanceModeVersion)
>>= flip onLeft logQErr
Right eti -> trace (spanName eti) do
eventExecutionStartTime <- liftIO getCurrentTime
let webhook = wciCachedValue $ etiWebhookInfo eti
retryConf = etiRetryConf eti
timeoutSeconds = fromMaybe defaultTimeoutSeconds (rcTimeoutSec retryConf)
@ -501,6 +508,9 @@ processEventQueue logger statsLogger httpMgr getSchemaCache getEventEngineCtx ac
extraLogCtx = ExtraLogContext (epId ep) (Just $ etiName eti)
requestTransform = etiRequestTransform eti
responseTransform = mkResponseTransform <$> etiResponseTransform eti
eventTriggerProcessingTimeout = maybe upperBoundEventTriggerTimeout (min upperBoundEventTriggerTimeout . fromIntegral) (rcTimeoutSec retryConf)
eventTriggerProcessAction = do
eventExecutionStartTime <- liftIO getCurrentTime
eitherReqRes <-
runExceptT $
mkRequest headers httpTimeout payload requestTransform (_envVarValue webhook) >>= \reqDetails -> do
@ -567,6 +577,16 @@ processEventQueue logger statsLogger httpMgr getSchemaCache getEventEngineCtx ac
-- Record an Event Error
recordError' @b sourceConfig e Nothing PESetError maintenanceModeVersion >>= flip onLeft logQErr
-- Try to process the event trigger with a timeout of min(`uppserBoundEventTriggerTimeout`, event's response timeout),
-- so that we're never blocked forever while processing a single event trigger.
--
-- If the request times out, then process it as an erroneous invocation and move on.
timeout (fromInteger (diffTimeToMicroSeconds eventTriggerProcessingTimeout)) eventTriggerProcessAction
`onNothingM` do
let eventTriggerTimeoutMessage = "Event Trigger " <> etiName eti <<> " timed out while processing."
processError @b sourceConfig e retryConf logHeaders J.Null maintenanceModeVersion eventTriggerMetrics (HOther $ T.unpack eventTriggerTimeoutMessage)
>>= flip onLeft logQErr
-- removing an event from the _eeCtxLockedEvents after the event has been processed:
removeEventTriggerEventFromLockedEvents sourceName (eId e) leEvents