mirror of
https://github.com/hasura/graphql-engine.git
synced 2024-09-11 10:46:25 +03:00
pro/server: add monitoring metrics for OTLP traces export
re. https://hasurahq.atlassian.net/browse/INFRA-832 PR-URL: https://github.com/hasura/graphql-engine-mono/pull/10080 Co-authored-by: Toan Nguyen <1615675+hgiasac@users.noreply.github.com> GitOrigin-RevId: 8de1c0a1703037cc9955da01f2593c2db15dc189
This commit is contained in:
parent
75f0629c5d
commit
f6bbda77eb
@ -430,3 +430,30 @@ Health check status of a particular data source, corresponding to the output of
|
||||
| Name | `hasura_source_health` |
|
||||
| Type | Gauge |
|
||||
| Labels | `source_name`: name of the database |
|
||||
|
||||
### OpenTelemetry OTLP Export Metrics
|
||||
|
||||
These metrics allow for monitoring the reliability and performance of OTLP
|
||||
exports of telemetry data.
|
||||
|
||||
#### Hasura OTLP Sent Spans
|
||||
|
||||
Total number of successfully exported trace spans.
|
||||
|
||||
| | |
|
||||
| ------ | -------------------------------------------------------------- |
|
||||
| Name | `hasura_otel_sent_spans` |
|
||||
| Type | Counter |
|
||||
| Labels | none |
|
||||
|
||||
#### Hasura OTLP Dropped Spans
|
||||
|
||||
Total number of trace spans dropped due to either high trace volume that filled
|
||||
the buffer, or errors during send (e.g. a timeout or error response from the
|
||||
collector).
|
||||
|
||||
| | |
|
||||
| ------ | -------------------------------------------------------------- |
|
||||
| Name | `hasura_otel_dropped_spans` |
|
||||
| Type | Counter |
|
||||
| Labels | `reason`: buffer_full \| send_failed |
|
||||
|
@ -8,6 +8,7 @@ module Hasura.Server.Prometheus
|
||||
GraphQLRequestMetrics (..),
|
||||
EventTriggerMetrics (..),
|
||||
CacheRequestMetrics (..),
|
||||
OpenTelemetryMetrics (..),
|
||||
makeDummyPrometheusMetrics,
|
||||
ConnectionsGauge,
|
||||
Connections (..),
|
||||
@ -77,7 +78,8 @@ data PrometheusMetrics = PrometheusMetrics
|
||||
pmSubscriptionMetrics :: SubscriptionMetrics,
|
||||
pmWebsocketMsgQueueTimeSeconds :: Histogram,
|
||||
pmWebsocketMsgWriteTimeSeconds :: Histogram,
|
||||
pmCacheRequestMetrics :: CacheRequestMetrics
|
||||
pmCacheRequestMetrics :: CacheRequestMetrics,
|
||||
pmOpenTelemetryMetrics :: OpenTelemetryMetrics
|
||||
}
|
||||
|
||||
data GraphQLRequestMetrics = GraphQLRequestMetrics
|
||||
@ -133,6 +135,20 @@ data CacheRequestMetrics = CacheRequestMetrics
|
||||
crmCacheMisses :: Counter
|
||||
}
|
||||
|
||||
-- | Metrics related to OTel telemetry export; for now the volume of logs and
|
||||
-- trace spans shipped, and counts of log lines and spans dropped due to high
|
||||
-- volume.
|
||||
data OpenTelemetryMetrics = OpenTelemetryMetrics
|
||||
{ otmSentSpans :: Counter,
|
||||
-- | Dropped due to the send buffer being full
|
||||
otmDroppedSpansInBuffer :: Counter,
|
||||
-- | Dropped due to some error (after retrying) when sending to collector
|
||||
otmDroppedSpansInSend :: Counter,
|
||||
otmSentLogs :: Counter,
|
||||
otmDroppedLogsInBuffer :: Counter,
|
||||
otmDroppedLogsInSend :: Counter
|
||||
}
|
||||
|
||||
-- | Create dummy mutable references without associating them to a metrics
|
||||
-- store.
|
||||
makeDummyPrometheusMetrics :: IO PrometheusMetrics
|
||||
@ -149,6 +165,7 @@ makeDummyPrometheusMetrics = do
|
||||
pmWebsocketMsgQueueTimeSeconds <- Histogram.new []
|
||||
pmWebsocketMsgWriteTimeSeconds <- Histogram.new []
|
||||
pmCacheRequestMetrics <- makeDummyCacheRequestMetrics
|
||||
pmOpenTelemetryMetrics <- makeDummyOpenTelemetryMetrics
|
||||
pure PrometheusMetrics {..}
|
||||
|
||||
makeDummyGraphQLRequestMetrics :: IO GraphQLRequestMetrics
|
||||
@ -209,6 +226,16 @@ makeDummyCacheRequestMetrics = do
|
||||
crmCacheMisses <- Counter.new
|
||||
pure CacheRequestMetrics {..}
|
||||
|
||||
makeDummyOpenTelemetryMetrics :: IO OpenTelemetryMetrics
|
||||
makeDummyOpenTelemetryMetrics = do
|
||||
otmSentSpans <- Counter.new
|
||||
otmDroppedSpansInSend <- Counter.new
|
||||
otmDroppedSpansInBuffer <- Counter.new
|
||||
otmSentLogs <- Counter.new
|
||||
otmDroppedLogsInSend <- Counter.new
|
||||
otmDroppedLogsInBuffer <- Counter.new
|
||||
pure OpenTelemetryMetrics {..}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
-- | A mutable reference for atomically sampling the number of websocket
|
||||
|
Loading…
Reference in New Issue
Block a user