mirror of
https://github.com/hasura/graphql-engine.git
synced 2024-12-15 01:12:56 +03:00
server: add new metric for event trigger observability
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/8380 Co-authored-by: Rob Dominguez <24390149+robertjdominguez@users.noreply.github.com> GitOrigin-RevId: df7d5c53668fc84e7d70d471e29774136f5d560a
This commit is contained in:
parent
677a972956
commit
b7bae6dfec
@ -122,6 +122,20 @@ curl 'http://127.0.0.1:8080/v1/metrics' -H 'Authorization: Bearer <secret>'
|
|||||||
<td>none</td>
|
<td>none</td>
|
||||||
<td>Compare this number to the <a href="/latest/deployment/graphql-engine-flags/reference/#events-http-pool-size">HTTP pool size</a>. Consider increasing it if the metric is near the current configured value.</td>
|
<td>Compare this number to the <a href="/latest/deployment/graphql-engine-flags/reference/#events-http-pool-size">HTTP pool size</a>. Consider increasing it if the metric is near the current configured value.</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><code>hasura_event_processed_total</code></td>
|
||||||
|
<td>Total number of events processed</td>
|
||||||
|
<td>Counter</td>
|
||||||
|
<td>• "status": success|failed</td>
|
||||||
|
<td>Represents the Event Trigger egress.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><code>hasura_event_invocations_total</code></td>
|
||||||
|
<td>Total number of events invoked</td>
|
||||||
|
<td>Counter</td>
|
||||||
|
<td>• "status": success|failed</td>
|
||||||
|
<td>Represents the Event Trigger webhook HTTP requests made.</td>
|
||||||
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code>hasura_postgres_connections</code></td>
|
<td><code>hasura_postgres_connections</code></td>
|
||||||
<td>Current number of active PostgreSQL connections</td>
|
<td>Current number of active PostgreSQL connections</td>
|
||||||
|
@ -554,9 +554,15 @@ processEventQueue logger statsLogger httpMgr getSchemaCache getEventEngineCtx ac
|
|||||||
Prometheus.Histogram.observe (eventWebhookProcessingTime eventTriggerMetrics) eventWebhookProcessingTime'
|
Prometheus.Histogram.observe (eventWebhookProcessingTime eventTriggerMetrics) eventWebhookProcessingTime'
|
||||||
EKG.Distribution.add (smEventProcessingTime serverMetrics) eventProcessingTime'
|
EKG.Distribution.add (smEventProcessingTime serverMetrics) eventProcessingTime'
|
||||||
Prometheus.Histogram.observe (eventProcessingTime eventTriggerMetrics) eventProcessingTime'
|
Prometheus.Histogram.observe (eventProcessingTime eventTriggerMetrics) eventProcessingTime'
|
||||||
Left (HTTPError reqBody err) ->
|
Prometheus.Counter.inc (eventProcessedTotalSuccess eventTriggerMetrics)
|
||||||
processError @b sourceConfig e retryConf logHeaders reqBody maintenanceModeVersion err >>= flip onLeft logQErr
|
Prometheus.Counter.inc (eventInvocationTotalSuccess eventTriggerMetrics)
|
||||||
Left (TransformationError _ err) -> do
|
Left eventError -> do
|
||||||
|
-- TODO (paritosh): We can also add a label to the metric to indicate the type of error
|
||||||
|
liftIO $ Prometheus.Counter.inc (eventInvocationTotalFailure eventTriggerMetrics)
|
||||||
|
case eventError of
|
||||||
|
(HTTPError reqBody err) ->
|
||||||
|
processError @b sourceConfig e retryConf logHeaders reqBody maintenanceModeVersion eventTriggerMetrics err >>= flip onLeft logQErr
|
||||||
|
(TransformationError _ err) -> do
|
||||||
L.unLogger logger $ L.UnstructuredLog L.LevelError (SB.fromLBS $ J.encode err)
|
L.unLogger logger $ L.UnstructuredLog L.LevelError (SB.fromLBS $ J.encode err)
|
||||||
|
|
||||||
-- Record an Event Error
|
-- Record an Event Error
|
||||||
@ -608,9 +614,10 @@ processError ::
|
|||||||
[HeaderConf] ->
|
[HeaderConf] ->
|
||||||
J.Value ->
|
J.Value ->
|
||||||
MaintenanceMode MaintenanceModeVersion ->
|
MaintenanceMode MaintenanceModeVersion ->
|
||||||
|
EventTriggerMetrics ->
|
||||||
HTTPErr a ->
|
HTTPErr a ->
|
||||||
m (Either QErr ())
|
m (Either QErr ())
|
||||||
processError sourceConfig e retryConf reqHeaders ep maintenanceModeVersion err = do
|
processError sourceConfig e retryConf reqHeaders ep maintenanceModeVersion eventTriggerMetrics err = do
|
||||||
let invocation = case err of
|
let invocation = case err of
|
||||||
HClient httpException ->
|
HClient httpException ->
|
||||||
let statusMaybe = getHTTPExceptionStatus httpException
|
let statusMaybe = getHTTPExceptionStatus httpException
|
||||||
@ -623,16 +630,17 @@ processError sourceConfig e retryConf reqHeaders ep maintenanceModeVersion err =
|
|||||||
HOther detail -> do
|
HOther detail -> do
|
||||||
let errMsg = SB.fromLBS $ J.encode detail
|
let errMsg = SB.fromLBS $ J.encode detail
|
||||||
mkInvocation (eId e) ep (Just 500) reqHeaders errMsg []
|
mkInvocation (eId e) ep (Just 500) reqHeaders errMsg []
|
||||||
retryOrError <- retryOrSetError e retryConf err
|
retryOrError <- retryOrSetError e retryConf eventTriggerMetrics err
|
||||||
recordError @b sourceConfig e invocation retryOrError maintenanceModeVersion
|
recordError @b sourceConfig e invocation retryOrError maintenanceModeVersion
|
||||||
|
|
||||||
retryOrSetError ::
|
retryOrSetError ::
|
||||||
MonadIO m =>
|
MonadIO m =>
|
||||||
Event b ->
|
Event b ->
|
||||||
RetryConf ->
|
RetryConf ->
|
||||||
|
EventTriggerMetrics ->
|
||||||
HTTPErr a ->
|
HTTPErr a ->
|
||||||
m ProcessEventError
|
m ProcessEventError
|
||||||
retryOrSetError e retryConf err = do
|
retryOrSetError e retryConf eventTriggerMetrics err = do
|
||||||
let mretryHeader = getRetryAfterHeaderFromError err
|
let mretryHeader = getRetryAfterHeaderFromError err
|
||||||
tries = eTries e
|
tries = eTries e
|
||||||
mretryHeaderSeconds = mretryHeader >>= parseRetryHeader
|
mretryHeaderSeconds = mretryHeader >>= parseRetryHeader
|
||||||
@ -640,7 +648,9 @@ retryOrSetError e retryConf err = do
|
|||||||
noRetryHeader = isNothing mretryHeaderSeconds
|
noRetryHeader = isNothing mretryHeaderSeconds
|
||||||
-- current_try = tries + 1 , allowed_total_tries = rcNumRetries retryConf + 1
|
-- current_try = tries + 1 , allowed_total_tries = rcNumRetries retryConf + 1
|
||||||
if triesExhausted && noRetryHeader
|
if triesExhausted && noRetryHeader
|
||||||
then pure PESetError
|
then do
|
||||||
|
liftIO $ Prometheus.Counter.inc (eventProcessedTotalFailure eventTriggerMetrics)
|
||||||
|
pure PESetError
|
||||||
else do
|
else do
|
||||||
currentTime <- liftIO getCurrentTime
|
currentTime <- liftIO getCurrentTime
|
||||||
let delay = fromMaybe (rcIntervalSec retryConf) mretryHeaderSeconds
|
let delay = fromMaybe (rcIntervalSec retryConf) mretryHeaderSeconds
|
||||||
|
@ -60,7 +60,11 @@ data EventTriggerMetrics = EventTriggerMetrics
|
|||||||
eventWebhookProcessingTime :: Histogram,
|
eventWebhookProcessingTime :: Histogram,
|
||||||
eventProcessingTime :: Histogram,
|
eventProcessingTime :: Histogram,
|
||||||
eventTriggerBytesReceived :: Counter,
|
eventTriggerBytesReceived :: Counter,
|
||||||
eventTriggerBytesSent :: Counter
|
eventTriggerBytesSent :: Counter,
|
||||||
|
eventProcessedTotalSuccess :: Counter,
|
||||||
|
eventProcessedTotalFailure :: Counter,
|
||||||
|
eventInvocationTotalSuccess :: Counter,
|
||||||
|
eventInvocationTotalFailure :: Counter
|
||||||
}
|
}
|
||||||
|
|
||||||
-- | Create dummy mutable references without associating them to a metrics
|
-- | Create dummy mutable references without associating them to a metrics
|
||||||
@ -99,6 +103,10 @@ makeDummyEventTriggerMetrics = do
|
|||||||
eventProcessingTime <- Histogram.new []
|
eventProcessingTime <- Histogram.new []
|
||||||
eventTriggerBytesReceived <- Counter.new
|
eventTriggerBytesReceived <- Counter.new
|
||||||
eventTriggerBytesSent <- Counter.new
|
eventTriggerBytesSent <- Counter.new
|
||||||
|
eventProcessedTotalSuccess <- Counter.new
|
||||||
|
eventProcessedTotalFailure <- Counter.new
|
||||||
|
eventInvocationTotalSuccess <- Counter.new
|
||||||
|
eventInvocationTotalFailure <- Counter.new
|
||||||
pure EventTriggerMetrics {..}
|
pure EventTriggerMetrics {..}
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
Loading…
Reference in New Issue
Block a user