mirror of
https://github.com/hasura/graphql-engine.git
synced 2024-12-04 20:06:35 +03:00
server: add dynamic labels trigger_name
and source_name
to existing event trigger metrics
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/9265 GitOrigin-RevId: 6fb6504f1a476ea6c8b810e067770920757e8dc6
This commit is contained in:
parent
6d27ad97ae
commit
e3df24507d
@ -142,21 +142,21 @@ consider looking into the performance of your database.
|
||||
|
||||
Total number of events invoked. Represents the Event Trigger webhook HTTP requests made.
|
||||
|
||||
| | |
|
||||
| ------ | -------------------------------- |
|
||||
| Name | `hasura_event_invocations_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed |
|
||||
| | |
|
||||
| ------ | ---------------------------------------------------------- |
|
||||
| Name | `hasura_event_invocations_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed, `source_name`, `trigger_name` |
|
||||
|
||||
### Hasura event processed total
|
||||
|
||||
Total number of events processed. Represents the Event Trigger egress.
|
||||
|
||||
| | |
|
||||
| ------ | ------------------------------ |
|
||||
| Name | `hasura_event_processed_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed |
|
||||
| | |
|
||||
| ------ | ---------------------------------------------------------- |
|
||||
| Name | `hasura_event_processed_total` |
|
||||
| Type | Counter |
|
||||
| Labels | `status`: success \| failed, `source_name`, `trigger_name` |
|
||||
|
||||
### Hasura event processing time
|
||||
|
||||
@ -167,7 +167,7 @@ This metric can be considered as the end-to-end processing time for an event.
|
||||
| ------ | --------------------------------------------------------------------- |
|
||||
| Name | `hasura_event_processing_time_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100 |
|
||||
| Labels | none |
|
||||
| Labels | `source_name`, `trigger_name` |
|
||||
|
||||
### Hasura event queue time
|
||||
|
||||
@ -180,7 +180,7 @@ server.
|
||||
| ------ | --------------------------------------------------------------------- |
|
||||
| Name | `hasura_event_queue_time_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100 |
|
||||
| Labels | none |
|
||||
| Labels | `source_name`, `trigger_name` |
|
||||
|
||||
### Hasura event trigger HTTP workers
|
||||
|
||||
@ -203,7 +203,7 @@ processing time indicates slow webhook, you should try to optimize the event web
|
||||
| ------ | ------------------------------------------------------------ |
|
||||
| Name | `hasura_event_webhook_processing_time_seconds` |
|
||||
| Type | Histogram<br /><br />Buckets: 0.01, 0.03, 0.1, 0.3, 1, 3, 10 |
|
||||
| Labels | none |
|
||||
| Labels | `source_name`, `trigger_name` |
|
||||
|
||||
### Hasura events fetched per batch
|
||||
|
||||
|
@ -89,6 +89,8 @@ import Refined.Unsafe (unsafeRefine)
|
||||
import System.Metrics.Distribution qualified as EKG.Distribution
|
||||
import System.Metrics.Gauge qualified as EKG.Gauge
|
||||
import System.Metrics.Prometheus.Counter qualified as Prometheus.Counter
|
||||
import System.Metrics.Prometheus.CounterVector (CounterVector)
|
||||
import System.Metrics.Prometheus.CounterVector qualified as CounterVector
|
||||
import System.Metrics.Prometheus.Gauge qualified as Prometheus.Gauge
|
||||
import System.Metrics.Prometheus.Histogram qualified as Prometheus.Histogram
|
||||
import System.Timeout.Lifted (timeout)
|
||||
@ -472,7 +474,13 @@ processEventQueue logger statsLogger httpMgr getSchemaCache getEventEngineCtx ac
|
||||
eventProcessTime <- liftIO getCurrentTime
|
||||
let eventQueueTime = realToFrac $ diffUTCTime eventProcessTime eventFetchedTime
|
||||
_ <- liftIO $ EKG.Distribution.add (smEventQueueTime serverMetrics) eventQueueTime
|
||||
liftIO $ Prometheus.Histogram.observe (eventQueueTimeSeconds eventTriggerMetrics) eventQueueTime
|
||||
liftIO $
|
||||
observeHistogramWithLabel
|
||||
getPrometheusMetricsGranularity
|
||||
True
|
||||
(eventQueueTimeSeconds eventTriggerMetrics)
|
||||
(DynamicEventTriggerLabel (tmName (eTrigger e)) sourceName)
|
||||
eventQueueTime
|
||||
|
||||
cache <- liftIO getSchemaCache
|
||||
|
||||
@ -566,16 +574,39 @@ processEventQueue logger statsLogger httpMgr getSchemaCache getEventEngineCtx ac
|
||||
-- `eventStartTime`) used here in calculation are all UTC time.
|
||||
eventStartTime = fromMaybe (eCreatedAtUTC e) (eRetryAtUTC e)
|
||||
eventProcessingTime' = realToFrac $ diffUTCTime eventExecutionFinishTime eventStartTime
|
||||
observeHistogramWithLabel getPrometheusMetricsGranularity True (eventProcessingTime eventTriggerMetrics) (TriggerNameLabel (etiName eti)) eventProcessingTime'
|
||||
observeHistogramWithLabel
|
||||
getPrometheusMetricsGranularity
|
||||
True
|
||||
(eventProcessingTime eventTriggerMetrics)
|
||||
(DynamicEventTriggerLabel (etiName eti) sourceName)
|
||||
eventProcessingTime'
|
||||
liftIO $ do
|
||||
EKG.Distribution.add (smEventWebhookProcessingTime serverMetrics) eventWebhookProcessingTime'
|
||||
Prometheus.Histogram.observe (eventWebhookProcessingTime eventTriggerMetrics) eventWebhookProcessingTime'
|
||||
observeHistogramWithLabel
|
||||
getPrometheusMetricsGranularity
|
||||
True
|
||||
(eventWebhookProcessingTime eventTriggerMetrics)
|
||||
(DynamicEventTriggerLabel (etiName eti) sourceName)
|
||||
eventWebhookProcessingTime'
|
||||
EKG.Distribution.add (smEventProcessingTime serverMetrics) eventProcessingTime'
|
||||
Prometheus.Counter.inc (eventProcessedTotalSuccess eventTriggerMetrics)
|
||||
Prometheus.Counter.inc (eventInvocationTotalSuccess eventTriggerMetrics)
|
||||
incEventTriggerCounterWithLabel
|
||||
getPrometheusMetricsGranularity
|
||||
True
|
||||
(eventProcessedTotal eventTriggerMetrics)
|
||||
(EventStatusWithTriggerLabel eventSuccessLabel (Just (DynamicEventTriggerLabel (etiName eti) sourceName)))
|
||||
incEventTriggerCounterWithLabel
|
||||
getPrometheusMetricsGranularity
|
||||
True
|
||||
(eventInvocationTotal eventTriggerMetrics)
|
||||
(EventStatusWithTriggerLabel eventSuccessLabel (Just (DynamicEventTriggerLabel (etiName eti) sourceName)))
|
||||
Left eventError -> do
|
||||
-- TODO (paritosh): We can also add a label to the metric to indicate the type of error
|
||||
liftIO $ Prometheus.Counter.inc (eventInvocationTotalFailure eventTriggerMetrics)
|
||||
liftIO $
|
||||
incEventTriggerCounterWithLabel
|
||||
getPrometheusMetricsGranularity
|
||||
True
|
||||
(eventInvocationTotal eventTriggerMetrics)
|
||||
(EventStatusWithTriggerLabel eventFailedLabel (Just (DynamicEventTriggerLabel (etiName eti) sourceName)))
|
||||
case eventError of
|
||||
(HTTPError reqBody err) ->
|
||||
processError @b sourceConfig e retryConf logHeaders reqBody maintenanceModeVersion eventTriggerMetrics err >>= flip onLeft logQErr
|
||||
@ -633,7 +664,8 @@ processSuccess sourceConfig e reqHeaders ep maintenanceModeVersion resp = do
|
||||
processError ::
|
||||
forall b m a.
|
||||
( MonadIO m,
|
||||
BackendEventTrigger b
|
||||
BackendEventTrigger b,
|
||||
MonadGetPolicies m
|
||||
) =>
|
||||
SourceConfig b ->
|
||||
Event b ->
|
||||
@ -661,13 +693,16 @@ processError sourceConfig e retryConf reqHeaders ep maintenanceModeVersion event
|
||||
recordError @b sourceConfig e invocation retryOrError maintenanceModeVersion
|
||||
|
||||
retryOrSetError ::
|
||||
MonadIO m =>
|
||||
( MonadIO m,
|
||||
MonadGetPolicies m
|
||||
) =>
|
||||
Event b ->
|
||||
RetryConf ->
|
||||
EventTriggerMetrics ->
|
||||
HTTPErr a ->
|
||||
m ProcessEventError
|
||||
retryOrSetError e retryConf eventTriggerMetrics err = do
|
||||
getPrometheusMetricsGranularity <- runGetPrometheusMetricsGranularity
|
||||
let mretryHeader = getRetryAfterHeaderFromError err
|
||||
tries = eTries e
|
||||
mretryHeaderSeconds = mretryHeader >>= parseRetryHeader
|
||||
@ -676,7 +711,12 @@ retryOrSetError e retryConf eventTriggerMetrics err = do
|
||||
-- current_try = tries + 1 , allowed_total_tries = rcNumRetries retryConf + 1
|
||||
if triesExhausted && noRetryHeader
|
||||
then do
|
||||
liftIO $ Prometheus.Counter.inc (eventProcessedTotalFailure eventTriggerMetrics)
|
||||
liftIO $
|
||||
incEventTriggerCounterWithLabel
|
||||
getPrometheusMetricsGranularity
|
||||
True
|
||||
(eventProcessedTotal eventTriggerMetrics)
|
||||
(EventStatusWithTriggerLabel eventFailedLabel (Just (DynamicEventTriggerLabel (tmName (eTrigger e)) (eSource e))))
|
||||
pure PESetError
|
||||
else do
|
||||
currentTime <- liftIO getCurrentTime
|
||||
@ -732,3 +772,18 @@ getEventTriggerInfoFromEvent sc e = do
|
||||
<> "' on table '"
|
||||
<> table <<> "' not found"
|
||||
)
|
||||
|
||||
incEventTriggerCounterWithLabel ::
|
||||
(MonadIO m) =>
|
||||
(IO GranularPrometheusMetricsState) ->
|
||||
-- should the metric be observed without a label when granularMetricsState is OFF
|
||||
Bool ->
|
||||
CounterVector EventStatusWithTriggerLabel ->
|
||||
EventStatusWithTriggerLabel ->
|
||||
m ()
|
||||
incEventTriggerCounterWithLabel getMetricState alwaysObserve counterVector (EventStatusWithTriggerLabel status tl) = do
|
||||
recordMetricWithLabel
|
||||
getMetricState
|
||||
alwaysObserve
|
||||
(liftIO $ CounterVector.inc counterVector (EventStatusWithTriggerLabel status tl))
|
||||
(liftIO $ CounterVector.inc counterVector (EventStatusWithTriggerLabel status Nothing))
|
||||
|
@ -19,7 +19,13 @@ module Hasura.Server.Prometheus
|
||||
decWebsocketConnections,
|
||||
ScheduledTriggerMetrics (..),
|
||||
SubscriptionMetrics (..),
|
||||
TriggerNameLabel (..),
|
||||
DynamicEventTriggerLabel (..),
|
||||
ResponseStatus (..),
|
||||
responseStatusToLabelValue,
|
||||
EventStatusLabel (..),
|
||||
eventSuccessLabel,
|
||||
eventFailedLabel,
|
||||
EventStatusWithTriggerLabel (..),
|
||||
GranularPrometheusMetricsState (..),
|
||||
observeHistogramWithLabel,
|
||||
SubscriptionKindLabel (..),
|
||||
@ -39,12 +45,15 @@ import Data.Int (Int64)
|
||||
import Hasura.GraphQL.ParameterizedQueryHash
|
||||
import Hasura.GraphQL.Transport.HTTP.Protocol (OperationName (..))
|
||||
import Hasura.Prelude
|
||||
import Hasura.RQL.Types.Common (SourceName, sourceNameToText)
|
||||
import Hasura.RQL.Types.EventTrigger (TriggerName, triggerNameToTxt)
|
||||
import Hasura.Server.Types (GranularPrometheusMetricsState (..))
|
||||
import Language.GraphQL.Draft.Syntax qualified as G
|
||||
import System.Metrics.Prometheus (ToLabels (..))
|
||||
import System.Metrics.Prometheus.Counter (Counter)
|
||||
import System.Metrics.Prometheus.Counter qualified as Counter
|
||||
import System.Metrics.Prometheus.CounterVector (CounterVector)
|
||||
import System.Metrics.Prometheus.CounterVector qualified as CounterVector
|
||||
import System.Metrics.Prometheus.Gauge (Gauge)
|
||||
import System.Metrics.Prometheus.Gauge qualified as Gauge
|
||||
import System.Metrics.Prometheus.GaugeVector qualified as GaugeVector
|
||||
@ -85,16 +94,14 @@ data GraphQLRequestMetrics = GraphQLRequestMetrics
|
||||
data EventTriggerMetrics = EventTriggerMetrics
|
||||
{ eventTriggerHTTPWorkers :: Gauge,
|
||||
eventsFetchedPerBatch :: Gauge,
|
||||
eventQueueTimeSeconds :: Histogram,
|
||||
eventQueueTimeSeconds :: HistogramVector (Maybe DynamicEventTriggerLabel),
|
||||
eventsFetchTimePerBatch :: Histogram,
|
||||
eventWebhookProcessingTime :: Histogram,
|
||||
eventProcessingTime :: HistogramVector (Maybe TriggerNameLabel),
|
||||
eventWebhookProcessingTime :: HistogramVector (Maybe DynamicEventTriggerLabel),
|
||||
eventProcessingTime :: HistogramVector (Maybe DynamicEventTriggerLabel),
|
||||
eventTriggerBytesReceived :: Counter,
|
||||
eventTriggerBytesSent :: Counter,
|
||||
eventProcessedTotalSuccess :: Counter,
|
||||
eventProcessedTotalFailure :: Counter,
|
||||
eventInvocationTotalSuccess :: Counter,
|
||||
eventInvocationTotalFailure :: Counter
|
||||
eventProcessedTotal :: CounterVector EventStatusWithTriggerLabel,
|
||||
eventInvocationTotal :: CounterVector EventStatusWithTriggerLabel
|
||||
}
|
||||
|
||||
data ScheduledTriggerMetrics = ScheduledTriggerMetrics
|
||||
@ -159,16 +166,14 @@ makeDummyEventTriggerMetrics :: IO EventTriggerMetrics
|
||||
makeDummyEventTriggerMetrics = do
|
||||
eventTriggerHTTPWorkers <- Gauge.new
|
||||
eventsFetchedPerBatch <- Gauge.new
|
||||
eventQueueTimeSeconds <- Histogram.new []
|
||||
eventQueueTimeSeconds <- HistogramVector.new []
|
||||
eventsFetchTimePerBatch <- Histogram.new []
|
||||
eventWebhookProcessingTime <- Histogram.new []
|
||||
eventWebhookProcessingTime <- HistogramVector.new []
|
||||
eventProcessingTime <- HistogramVector.new []
|
||||
eventTriggerBytesReceived <- Counter.new
|
||||
eventTriggerBytesSent <- Counter.new
|
||||
eventProcessedTotalSuccess <- Counter.new
|
||||
eventProcessedTotalFailure <- Counter.new
|
||||
eventInvocationTotalSuccess <- Counter.new
|
||||
eventInvocationTotalFailure <- Counter.new
|
||||
eventProcessedTotal <- CounterVector.new
|
||||
eventInvocationTotal <- CounterVector.new
|
||||
pure EventTriggerMetrics {..}
|
||||
|
||||
makeDummyScheduledTriggerMetrics :: IO ScheduledTriggerMetrics
|
||||
@ -250,12 +255,44 @@ modifyConnectionsGauge ::
|
||||
modifyConnectionsGauge f (ConnectionsGauge ref) =
|
||||
atomicModifyIORef' ref $ \connections -> (f connections, ())
|
||||
|
||||
newtype TriggerNameLabel = TriggerNameLabel TriggerName
|
||||
data DynamicEventTriggerLabel = DynamicEventTriggerLabel
|
||||
{ _detlTriggerName :: TriggerName,
|
||||
_detlSourceName :: SourceName
|
||||
}
|
||||
deriving (Ord, Eq)
|
||||
|
||||
instance ToLabels (Maybe TriggerNameLabel) where
|
||||
instance ToLabels (Maybe DynamicEventTriggerLabel) where
|
||||
toLabels Nothing = Map.empty
|
||||
toLabels (Just (TriggerNameLabel triggerName)) = Map.singleton "trigger_name" (triggerNameToTxt triggerName)
|
||||
toLabels (Just (DynamicEventTriggerLabel triggerName sourceName)) = Map.fromList $ [("trigger_name", triggerNameToTxt triggerName), ("source_name", sourceNameToText sourceName)]
|
||||
|
||||
data ResponseStatus = Success | Failed
|
||||
|
||||
-- TODO: Make this a method of a new typeclass of the metrics library
|
||||
responseStatusToLabelValue :: ResponseStatus -> Text
|
||||
responseStatusToLabelValue = \case
|
||||
Success -> "success"
|
||||
Failed -> "failed"
|
||||
|
||||
newtype EventStatusLabel = EventStatusLabel
|
||||
{ status :: Text
|
||||
}
|
||||
deriving stock (Generic, Ord, Eq)
|
||||
deriving anyclass (ToLabels)
|
||||
|
||||
eventSuccessLabel :: EventStatusLabel
|
||||
eventSuccessLabel = EventStatusLabel $ responseStatusToLabelValue Success
|
||||
|
||||
eventFailedLabel :: EventStatusLabel
|
||||
eventFailedLabel = EventStatusLabel $ responseStatusToLabelValue Failed
|
||||
|
||||
data EventStatusWithTriggerLabel = EventStatusWithTriggerLabel
|
||||
{ _eswtlStatus :: EventStatusLabel,
|
||||
_eswtlDynamicLabels :: Maybe DynamicEventTriggerLabel
|
||||
}
|
||||
deriving stock (Generic, Ord, Eq)
|
||||
|
||||
instance ToLabels (EventStatusWithTriggerLabel) where
|
||||
toLabels (EventStatusWithTriggerLabel esl tl) = (HashMap.fromList $ [("status", status esl)]) <> toLabels tl
|
||||
|
||||
data SubscriptionKindLabel = SubscriptionKindLabel
|
||||
{ subscription_kind :: Text
|
||||
|
Loading…
Reference in New Issue
Block a user