mirror of
https://github.com/hasura/graphql-engine.git
synced 2024-12-14 17:02:49 +03:00
server: add new metric for event trigger observability
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/8380 Co-authored-by: Rob Dominguez <24390149+robertjdominguez@users.noreply.github.com> GitOrigin-RevId: df7d5c53668fc84e7d70d471e29774136f5d560a
This commit is contained in:
parent
677a972956
commit
b7bae6dfec
@ -122,6 +122,20 @@ curl 'http://127.0.0.1:8080/v1/metrics' -H 'Authorization: Bearer <secret>'
|
||||
<td>none</td>
|
||||
<td>Compare this number to the <a href="/latest/deployment/graphql-engine-flags/reference/#events-http-pool-size">HTTP pool size</a>. Consider increasing it if the metric is near the current configured value.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>hasura_event_processed_total</code></td>
|
||||
<td>Total number of events processed</td>
|
||||
<td>Counter</td>
|
||||
<td>• "status": success|failed</td>
|
||||
<td>Represents the Event Trigger egress.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>hasura_event_invocations_total</code></td>
|
||||
<td>Total number of events invoked</td>
|
||||
<td>Counter</td>
|
||||
<td>• "status": success|failed</td>
|
||||
<td>Represents the Event Trigger webhook HTTP requests made.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>hasura_postgres_connections</code></td>
|
||||
<td>Current number of active PostgreSQL connections</td>
|
||||
|
@ -554,13 +554,19 @@ processEventQueue logger statsLogger httpMgr getSchemaCache getEventEngineCtx ac
|
||||
Prometheus.Histogram.observe (eventWebhookProcessingTime eventTriggerMetrics) eventWebhookProcessingTime'
|
||||
EKG.Distribution.add (smEventProcessingTime serverMetrics) eventProcessingTime'
|
||||
Prometheus.Histogram.observe (eventProcessingTime eventTriggerMetrics) eventProcessingTime'
|
||||
Left (HTTPError reqBody err) ->
|
||||
processError @b sourceConfig e retryConf logHeaders reqBody maintenanceModeVersion err >>= flip onLeft logQErr
|
||||
Left (TransformationError _ err) -> do
|
||||
L.unLogger logger $ L.UnstructuredLog L.LevelError (SB.fromLBS $ J.encode err)
|
||||
Prometheus.Counter.inc (eventProcessedTotalSuccess eventTriggerMetrics)
|
||||
Prometheus.Counter.inc (eventInvocationTotalSuccess eventTriggerMetrics)
|
||||
Left eventError -> do
|
||||
-- TODO (paritosh): We can also add a label to the metric to indicate the type of error
|
||||
liftIO $ Prometheus.Counter.inc (eventInvocationTotalFailure eventTriggerMetrics)
|
||||
case eventError of
|
||||
(HTTPError reqBody err) ->
|
||||
processError @b sourceConfig e retryConf logHeaders reqBody maintenanceModeVersion eventTriggerMetrics err >>= flip onLeft logQErr
|
||||
(TransformationError _ err) -> do
|
||||
L.unLogger logger $ L.UnstructuredLog L.LevelError (SB.fromLBS $ J.encode err)
|
||||
|
||||
-- Record an Event Error
|
||||
recordError' @b sourceConfig e Nothing PESetError maintenanceModeVersion >>= flip onLeft logQErr
|
||||
-- Record an Event Error
|
||||
recordError' @b sourceConfig e Nothing PESetError maintenanceModeVersion >>= flip onLeft logQErr
|
||||
-- removing an event from the _eeCtxLockedEvents after the event has been processed:
|
||||
removeEventTriggerEventFromLockedEvents sourceName (eId e) leEvents
|
||||
|
||||
@ -608,9 +614,10 @@ processError ::
|
||||
[HeaderConf] ->
|
||||
J.Value ->
|
||||
MaintenanceMode MaintenanceModeVersion ->
|
||||
EventTriggerMetrics ->
|
||||
HTTPErr a ->
|
||||
m (Either QErr ())
|
||||
processError sourceConfig e retryConf reqHeaders ep maintenanceModeVersion err = do
|
||||
processError sourceConfig e retryConf reqHeaders ep maintenanceModeVersion eventTriggerMetrics err = do
|
||||
let invocation = case err of
|
||||
HClient httpException ->
|
||||
let statusMaybe = getHTTPExceptionStatus httpException
|
||||
@ -623,16 +630,17 @@ processError sourceConfig e retryConf reqHeaders ep maintenanceModeVersion err =
|
||||
HOther detail -> do
|
||||
let errMsg = SB.fromLBS $ J.encode detail
|
||||
mkInvocation (eId e) ep (Just 500) reqHeaders errMsg []
|
||||
retryOrError <- retryOrSetError e retryConf err
|
||||
retryOrError <- retryOrSetError e retryConf eventTriggerMetrics err
|
||||
recordError @b sourceConfig e invocation retryOrError maintenanceModeVersion
|
||||
|
||||
retryOrSetError ::
|
||||
MonadIO m =>
|
||||
Event b ->
|
||||
RetryConf ->
|
||||
EventTriggerMetrics ->
|
||||
HTTPErr a ->
|
||||
m ProcessEventError
|
||||
retryOrSetError e retryConf err = do
|
||||
retryOrSetError e retryConf eventTriggerMetrics err = do
|
||||
let mretryHeader = getRetryAfterHeaderFromError err
|
||||
tries = eTries e
|
||||
mretryHeaderSeconds = mretryHeader >>= parseRetryHeader
|
||||
@ -640,7 +648,9 @@ retryOrSetError e retryConf err = do
|
||||
noRetryHeader = isNothing mretryHeaderSeconds
|
||||
-- current_try = tries + 1 , allowed_total_tries = rcNumRetries retryConf + 1
|
||||
if triesExhausted && noRetryHeader
|
||||
then pure PESetError
|
||||
then do
|
||||
liftIO $ Prometheus.Counter.inc (eventProcessedTotalFailure eventTriggerMetrics)
|
||||
pure PESetError
|
||||
else do
|
||||
currentTime <- liftIO getCurrentTime
|
||||
let delay = fromMaybe (rcIntervalSec retryConf) mretryHeaderSeconds
|
||||
|
@ -60,7 +60,11 @@ data EventTriggerMetrics = EventTriggerMetrics
|
||||
eventWebhookProcessingTime :: Histogram,
|
||||
eventProcessingTime :: Histogram,
|
||||
eventTriggerBytesReceived :: Counter,
|
||||
eventTriggerBytesSent :: Counter
|
||||
eventTriggerBytesSent :: Counter,
|
||||
eventProcessedTotalSuccess :: Counter,
|
||||
eventProcessedTotalFailure :: Counter,
|
||||
eventInvocationTotalSuccess :: Counter,
|
||||
eventInvocationTotalFailure :: Counter
|
||||
}
|
||||
|
||||
-- | Create dummy mutable references without associating them to a metrics
|
||||
@ -99,6 +103,10 @@ makeDummyEventTriggerMetrics = do
|
||||
eventProcessingTime <- Histogram.new []
|
||||
eventTriggerBytesReceived <- Counter.new
|
||||
eventTriggerBytesSent <- Counter.new
|
||||
eventProcessedTotalSuccess <- Counter.new
|
||||
eventProcessedTotalFailure <- Counter.new
|
||||
eventInvocationTotalSuccess <- Counter.new
|
||||
eventInvocationTotalFailure <- Counter.new
|
||||
pure EventTriggerMetrics {..}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
Loading…
Reference in New Issue
Block a user