add operation_name and parameterized_query_hash to hasura_graphql_requests_total metric

PR-URL: https://github.com/hasura/graphql-engine-mono/pull/10725
GitOrigin-RevId: b19df2a17e94e72dde790a54e01e290b581aa560
This commit is contained in:
paritosh-08 2024-03-18 20:12:43 +05:30 committed by hasura-bot
parent 4b1cf9bee7
commit b0a3ff1d0f
8 changed files with 181 additions and 129 deletions

View File

@ -44,11 +44,11 @@ buckets, you should consider [tuning the performance](/deployment/performance-tu
Number of GraphQL requests received, representing the GraphQL query/mutation traffic on the server.
| | |
| ------ | -------------------------------------------------------------- |
| Name | `hasura_graphql_requests_total` |
| Type | Counter |
| Labels | `operation_type`: query \| mutation \| subscription \| unknown |
| | |
| ------ | -------------------------------------------------------------------------------------------------------------------------------------------------- |
| Name | `hasura_graphql_requests_total` |
| Type | Counter |
| Labels | `operation_type`: query \| mutation \| subscription \| unknown, `response_status`: success \| failed, `operation_name`, `parameterized_query_hash` |
The `unknown` operation type will be returned for queries that fail authorization, parsing, or certain validations. The
`response_status` label will be `success` for successful requests and `failed` for failed requests.

View File

@ -39,7 +39,7 @@ import Hasura.RQL.Types.Common (SourceName)
import Hasura.RQL.Types.Roles (RoleName)
import Hasura.RQL.Types.Subscription (SubscriptionType (..))
import Hasura.Server.Logging (ModelInfo (..), ModelInfoLog (..))
import Hasura.Server.Prometheus (PrometheusMetrics (..), SubscriptionMetrics (..), liveQuerySubscriptionLabel, recordSubcriptionMetric)
import Hasura.Server.Prometheus (PrometheusMetrics (..), SubscriptionMetrics (..), liveQuerySubscriptionLabel, recordSubscriptionMetric)
import Hasura.Server.Types (GranularPrometheusMetricsState (..), ModelInfoLogState (..))
import Refined (unrefine)
import System.Metrics.Prometheus.Gauge qualified as Prometheus.Gauge
@ -121,7 +121,7 @@ pollLiveQuery pollerId pollerResponseState lqOpts (sourceName, sourceConfig) rol
(queryExecutionTime, mxRes) <- runDBSubscription @b sourceConfig query (over (each . _2) C._csVariables cohorts) resolvedConnectionTemplate
let dbExecTimeMetric = submDBExecTotalTime $ pmSubscriptionMetrics $ prometheusMetrics
recordSubcriptionMetric
recordSubscriptionMetric
granularPrometheusMetricsState
True
operationNamesMap
@ -215,7 +215,7 @@ pollLiveQuery pollerId pollerResponseState lqOpts (sourceName, sourceConfig) rol
when (modelInfoLogStatus' == ModelInfoLogOn) $ do
for_ (modelInfoList) $ \(ModelInfoPart modelName modelType modelSourceName modelSourceType modelQueryType) -> do
L.unLogger logger $ ModelInfoLog L.LevelInfo $ ModelInfo modelName (toTxt modelType) (toTxt <$> modelSourceName) (toTxt <$> modelSourceType) (toTxt modelQueryType) False
recordSubcriptionMetric
recordSubscriptionMetric
granularPrometheusMetricsState
True
operationNamesMap

View File

@ -41,7 +41,7 @@ import Hasura.RQL.Types.Roles (RoleName)
import Hasura.RQL.Types.Subscription (SubscriptionType (..))
import Hasura.SQL.Value (TxtEncodedVal (..))
import Hasura.Server.Logging (ModelInfo (..), ModelInfoLog (..))
import Hasura.Server.Prometheus (PrometheusMetrics (..), SubscriptionMetrics (..), recordSubcriptionMetric, streamingSubscriptionLabel)
import Hasura.Server.Prometheus (PrometheusMetrics (..), SubscriptionMetrics (..), recordSubscriptionMetric, streamingSubscriptionLabel)
import Hasura.Server.Types (GranularPrometheusMetricsState (..), ModelInfoLogState (..))
import Language.GraphQL.Draft.Syntax qualified as G
import Refined (unrefine)
@ -289,7 +289,7 @@ pollStreamingQuery pollerId pollerResponseState streamingQueryOpts (sourceName,
(over (each . _2) C._csVariables $ fmap (fmap fst) cohorts)
resolvedConnectionTemplate
let dbExecTimeMetric = submDBExecTotalTime $ pmSubscriptionMetrics $ prometheusMetrics
recordSubcriptionMetric
recordSubscriptionMetric
granularPrometheusMetricsState
True
operationNames
@ -470,7 +470,7 @@ pollStreamingQuery pollerId pollerResponseState streamingQueryOpts (sourceName,
unLogger logger $ ModelInfoLog LevelInfo $ ModelInfo modelName (toTxt modelType) (toTxt <$> modelSourceName) (toTxt <$> modelSourceType) (toTxt modelQueryType) False
postPollHook pollDetails
let totalTimeMetric = submTotalTime $ pmSubscriptionMetrics $ prometheusMetrics
recordSubcriptionMetric
recordSubscriptionMetric
granularPrometheusMetricsState
True
operationNames

View File

@ -56,7 +56,7 @@ import Hasura.RQL.Types.Common (SourceName)
import Hasura.SQL.AnyBackend qualified as AB
import Hasura.Server.Metrics (ServerMetrics (..))
import Hasura.Server.Prometheus
( DynamicSubscriptionLabel (..),
( DynamicGraphqlOperationLabel (..),
PrometheusMetrics (..),
SubscriptionLabel (..),
SubscriptionMetrics (..),
@ -258,7 +258,7 @@ addLiveQuery
liftIO $ Prometheus.Gauge.inc $ submActiveLiveQueryPollers $ pmSubscriptionMetrics $ prometheusMetrics
liftIO $ EKG.Gauge.inc $ smActiveSubscriptions serverMetrics
let promMetricGranularLabel = SubscriptionLabel liveQuerySubscriptionLabel (Just $ DynamicSubscriptionLabel (Just parameterizedQueryHash) operationName)
let promMetricGranularLabel = SubscriptionLabel liveQuerySubscriptionLabel (Just $ DynamicGraphqlOperationLabel (Just parameterizedQueryHash) operationName)
promMetricLabel = SubscriptionLabel liveQuerySubscriptionLabel Nothing
let numSubscriptionMetric = submActiveSubscriptions $ pmSubscriptionMetrics $ prometheusMetrics
recordMetricWithLabel
@ -390,7 +390,7 @@ addStreamSubscriptionQuery
EKG.Gauge.inc $ smActiveSubscriptions serverMetrics
EKG.Gauge.inc $ smActiveStreamingSubscriptions serverMetrics
let promMetricGranularLabel = SubscriptionLabel streamingSubscriptionLabel (Just $ DynamicSubscriptionLabel (Just parameterizedQueryHash) operationName)
let promMetricGranularLabel = SubscriptionLabel streamingSubscriptionLabel (Just $ DynamicGraphqlOperationLabel (Just parameterizedQueryHash) operationName)
promMetricLabel = SubscriptionLabel streamingSubscriptionLabel Nothing
numSubscriptionMetric = submActiveSubscriptions $ pmSubscriptionMetrics $ prometheusMetrics
recordMetricWithLabel
@ -470,7 +470,7 @@ removeLiveQuery logger serverMetrics prometheusMetrics lqState lqId@(SubscriberD
<*> TMap.null newOps
when cohortIsEmpty $ TMap.delete cohortId cohortMap
handlerIsEmpty <- TMap.null cohortMap
let promMetricGranularLabel = SubscriptionLabel liveQuerySubscriptionLabel (Just $ DynamicSubscriptionLabel (Just parameterizedQueryHash) maybeOperationName)
let promMetricGranularLabel = SubscriptionLabel liveQuerySubscriptionLabel (Just $ DynamicGraphqlOperationLabel (Just parameterizedQueryHash) maybeOperationName)
promMetricLabel = SubscriptionLabel liveQuerySubscriptionLabel Nothing
-- when there is no need for handler i.e, this happens to be the last
-- operation, take the ref for the polling thread to cancel it
@ -569,7 +569,7 @@ removeStreamingQuery logger serverMetrics prometheusMetrics subscriptionState (S
<*> TMap.null newOps
when cohortIsEmpty $ TMap.delete currentCohortId cohortMap
handlerIsEmpty <- TMap.null cohortMap
let promMetricGranularLabel = SubscriptionLabel streamingSubscriptionLabel (Just $ DynamicSubscriptionLabel (Just parameterizedQueryHash) maybeOperationName)
let promMetricGranularLabel = SubscriptionLabel streamingSubscriptionLabel (Just $ DynamicGraphqlOperationLabel (Just parameterizedQueryHash) maybeOperationName)
promMetricLabel = SubscriptionLabel streamingSubscriptionLabel Nothing
-- when there is no need for handler i.e,
-- operation, take the ref for the polling thread to cancel it

View File

@ -88,8 +88,11 @@ import Hasura.Server.Limits
import Hasura.Server.Logging
import Hasura.Server.Logging qualified as L
import Hasura.Server.Prometheus
( GraphQLRequestMetrics (..),
( GranularPrometheusMetricsState,
GraphQLRequestMetrics (..),
PrometheusMetrics (..),
ResponseStatus (..),
recordGraphqlOperationMetric,
)
import Hasura.Server.Telemetry.Counters qualified as Telem
import Hasura.Server.Types (HeaderPrecedence, ModelInfoLogState (..), MonadGetPolicies (..), ReadOnlyMode (..), RemoteSchemaResponsePriority (..), RequestId (..))
@ -100,7 +103,7 @@ import Hasura.Tracing qualified as Tracing
import Language.GraphQL.Draft.Syntax qualified as G
import Network.HTTP.Types qualified as HTTP
import Network.Wai.Extended qualified as Wai
import System.Metrics.Prometheus.Counter qualified as Prometheus.Counter
import System.Metrics.Prometheus.CounterVector qualified as Prometheus.CounterVector
import System.Metrics.Prometheus.Histogram qualified as Prometheus.Histogram
-- | Encapsulates a function that stores a query response in the cache.
@ -329,12 +332,13 @@ runGQ ::
ResponseInternalErrorsConfig ->
m (GQLQueryOperationSuccessLog, HttpResponse (Maybe GQResponse, EncJSON))
runGQ env sqlGenCtx sc enableAL readOnlyMode remoteSchemaResponsePriority headerPrecedence prometheusMetrics logger agentLicenseKey reqId userInfo ipAddress reqHeaders queryType reqUnparsed responseErrorsConfig = do
granularPrometheusMetricsState <- runGetPrometheusMetricsGranularity
getModelInfoLogStatus' <- runGetModelInfoLogStatus
modelInfoLogStatus <- liftIO getModelInfoLogStatus'
let gqlMetrics = pmGraphQLRequestMetrics prometheusMetrics
(totalTime, (response, parameterizedQueryHash, gqlOpType, modelInfoListForLogging, queryCachedStatus)) <- withElapsedTime $ do
(reqParsed, runLimits, queryParts) <- Tracing.newSpan "Parse GraphQL" $ observeGQLQueryError gqlMetrics Nothing $ do
(totalTime, (response, parameterizedQueryHash, gqlOpType, gqlOperationName, modelInfoListForLogging, queryCachedStatus)) <- withElapsedTime $ do
(reqParsed, runLimits, queryParts) <- Tracing.newSpan "Parse GraphQL" $ observeGQLQueryError granularPrometheusMetricsState gqlMetrics Nothing (_grOperationName reqUnparsed) Nothing $ do
-- 1. Run system authorization on the 'reqUnparsed :: GQLReqUnparsed' query.
reqParsed <-
E.checkGQLExecution userInfo (reqHeaders, ipAddress) enableAL sc reqUnparsed reqId
@ -348,7 +352,8 @@ runGQ env sqlGenCtx sc enableAL readOnlyMode remoteSchemaResponsePriority header
return (reqParsed, runLimits, queryParts)
let gqlOpType = G._todType queryParts
observeGQLQueryError gqlMetrics (Just gqlOpType) $ do
let gqlOperationName = getOpNameFromParsedReq reqParsed
observeGQLQueryError granularPrometheusMetricsState gqlMetrics (Just gqlOpType) gqlOperationName Nothing $ do
-- 3. Construct the remainder of the execution plan.
let maybeOperationName = _unOperationName <$> getOpNameFromParsedReq reqParsed
for_ maybeOperationName $ \nm ->
@ -374,13 +379,13 @@ runGQ env sqlGenCtx sc enableAL readOnlyMode remoteSchemaResponsePriority header
-- 4. Execute the execution plan producing a 'AnnotatedResponse'.
(response, queryCachedStatus, modelInfoFromExecution) <- executePlan reqParsed runLimits execPlan
return (response, parameterizedQueryHash, gqlOpType, ((modelInfoList <> (modelInfoFromExecution))), queryCachedStatus)
return (response, parameterizedQueryHash, gqlOpType, gqlOperationName, ((modelInfoList <> (modelInfoFromExecution))), queryCachedStatus)
-- 5. Record telemetry
recordTimings totalTime response
-- 6. Record Prometheus metrics (query successes)
liftIO $ recordGQLQuerySuccess gqlMetrics totalTime gqlOpType
liftIO $ recordGQLQuerySuccess granularPrometheusMetricsState gqlMetrics totalTime gqlOperationName parameterizedQueryHash gqlOpType
-- 7. Return the response along with logging metadata.
let requestSize = LBS.length $ J.encode reqUnparsed
@ -603,42 +608,45 @@ runGQ env sqlGenCtx sc enableAL readOnlyMode remoteSchemaResponsePriority header
( MonadIO n,
MonadError e n
) =>
IO GranularPrometheusMetricsState ->
GraphQLRequestMetrics ->
Maybe G.OperationType ->
Maybe OperationName ->
Maybe ParameterizedQueryHash ->
n a ->
n a
observeGQLQueryError gqlMetrics mOpType action =
observeGQLQueryError granularPrometheusMetricsState gqlMetrics mOpType mOpName mQHash action =
catchError (fmap Right action) (pure . Left) >>= \case
Right result ->
pure result
Left err -> do
case mOpType of
Nothing ->
liftIO $ Prometheus.Counter.inc (gqlRequestsUnknownFailure gqlMetrics)
Just opType -> case opType of
G.OperationTypeQuery ->
liftIO $ Prometheus.Counter.inc (gqlRequestsQueryFailure gqlMetrics)
G.OperationTypeMutation ->
liftIO $ Prometheus.Counter.inc (gqlRequestsMutationFailure gqlMetrics)
G.OperationTypeSubscription ->
-- We do not collect metrics for subscriptions at the request level.
pure ()
recordGraphqlOperationMetric
granularPrometheusMetricsState
mOpType
Failed
mOpName
mQHash
(Prometheus.CounterVector.inc $ gqlRequests gqlMetrics)
throwError err
-- Tally and record execution times for successful GraphQL requests.
recordGQLQuerySuccess ::
GraphQLRequestMetrics -> DiffTime -> G.OperationType -> IO ()
recordGQLQuerySuccess gqlMetrics totalTime = \case
G.OperationTypeQuery -> liftIO $ do
Prometheus.Counter.inc (gqlRequestsQuerySuccess gqlMetrics)
Prometheus.Histogram.observe (gqlExecutionTimeSecondsQuery gqlMetrics) (realToFrac totalTime)
G.OperationTypeMutation -> liftIO $ do
Prometheus.Counter.inc (gqlRequestsMutationSuccess gqlMetrics)
Prometheus.Histogram.observe (gqlExecutionTimeSecondsMutation gqlMetrics) (realToFrac totalTime)
G.OperationTypeSubscription ->
-- We do not collect metrics for subscriptions at the request level.
-- Furthermore, we do not serve GraphQL subscriptions over HTTP.
pure ()
IO GranularPrometheusMetricsState -> GraphQLRequestMetrics -> DiffTime -> Maybe OperationName -> ParameterizedQueryHash -> G.OperationType -> IO ()
recordGQLQuerySuccess granularPrometheusMetricsState gqlMetrics totalTime opName qHash opType = do
recordGraphqlOperationMetric
granularPrometheusMetricsState
(Just opType)
Success
opName
(Just qHash)
(Prometheus.CounterVector.inc $ gqlRequests gqlMetrics)
case opType of
G.OperationTypeQuery -> liftIO $ Prometheus.Histogram.observe (gqlExecutionTimeSecondsQuery gqlMetrics) (realToFrac totalTime)
G.OperationTypeMutation -> liftIO $ Prometheus.Histogram.observe (gqlExecutionTimeSecondsMutation gqlMetrics) (realToFrac totalTime)
G.OperationTypeSubscription ->
-- We do not collect metrics for subscriptions at the request level.
-- Furthermore, we do not serve GraphQL subscriptions over HTTP.
pure ()
coalescePostgresMutations ::
EB.ExecutionPlan ->

View File

@ -101,6 +101,8 @@ import Hasura.Server.Metrics (ServerMetrics (..))
import Hasura.Server.Prometheus
( GraphQLRequestMetrics (..),
PrometheusMetrics (..),
ResponseStatus (..),
recordGraphqlOperationMetric,
)
import Hasura.Server.Telemetry.Counters qualified as Telem
import Hasura.Server.Types (GranularPrometheusMetricsState (..), HeaderPrecedence, ModelInfoLogState (..), MonadGetPolicies (..), RemoteSchemaResponsePriority, RequestId, getRequestId)
@ -115,7 +117,7 @@ import Network.HTTP.Types qualified as HTTP
import Network.WebSockets qualified as WS
import Refined (unrefine)
import StmContainers.Map qualified as STMMap
import System.Metrics.Prometheus.Counter qualified as Prometheus.Counter
import System.Metrics.Prometheus.CounterVector qualified as Prometheus.CounterVector
import System.Metrics.Prometheus.Histogram qualified as Prometheus.Histogram
-- | 'ES.SubscriberDetails' comes from 'Hasura.GraphQL.Execute.LiveQuery.State.addLiveQuery'. We use
@ -451,6 +453,7 @@ onStart ::
onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables (StartMsg opId q) onMessageActions responseErrorsConfig headerPrecedence = catchAndIgnore $ do
modelInfoLogStatus' <- runGetModelInfoLogStatus
modelInfoLogStatus <- liftIO modelInfoLogStatus'
granularPrometheusMetricsState <- runGetPrometheusMetricsGranularity
timerTot <- startTimer
op <- liftIO $ STM.atomically $ STMMap.lookup opId opMap
@ -458,7 +461,7 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
-- we process all operations on a websocket connection serially:
when (isJust op)
$ withComplete
$ sendStartErr
$ sendStartErr granularPrometheusMetricsState (snd =<< op)
$ "an operation already exists with this id: "
<> unOperationId opId
@ -467,10 +470,10 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
CSInitialised WsClientState {..} -> return (wscsUserInfo, wscsReqHeaders, wscsIpAddress)
CSInitError initErr -> do
let e = "cannot start as connection_init failed with: " <> initErr
withComplete $ sendStartErr e
withComplete $ sendStartErr granularPrometheusMetricsState (_grOperationName q) e
CSNotInitialised _ _ -> do
let e = "start received before the connection is initialised"
withComplete $ sendStartErr e
withComplete $ sendStartErr granularPrometheusMetricsState (_grOperationName q) e
(requestId, reqHdrs) <- liftIO $ getRequestId origReqHdrs
sc <- liftIO $ getSchemaCacheWithVersion appStateRef
@ -488,9 +491,9 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
(reqParsed, queryParts) <- Tracing.newSpan "Parse GraphQL" $ do
reqParsedE <- lift $ E.checkGQLExecution userInfo (reqHdrs, ipAddress) enableAL sc q requestId
reqParsed <- onLeft reqParsedE (withComplete . preExecErr requestId Nothing)
reqParsed <- onLeft reqParsedE (withComplete . preExecErr granularPrometheusMetricsState requestId Nothing (_grOperationName q) Nothing)
queryPartsE <- runExceptT $ getSingleOperation reqParsed
queryParts <- onLeft queryPartsE (withComplete . preExecErr requestId Nothing)
queryParts <- onLeft queryPartsE (withComplete . preExecErr granularPrometheusMetricsState requestId Nothing (getOpNameFromParsedReq reqParsed) Nothing)
pure (reqParsed, queryParts)
let gqlOpType = G._todType queryParts
@ -519,7 +522,7 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
responseErrorsConfig
headerPrecedence
(parameterizedQueryHash, execPlan, modelInfoList) <- onLeft execPlanE (withComplete . preExecErr requestId (Just gqlOpType))
(parameterizedQueryHash, execPlan, modelInfoList) <- onLeft execPlanE (withComplete . preExecErr granularPrometheusMetricsState requestId (Just gqlOpType) opName Nothing)
case execPlan of
E.QueryExecutionPlan queryPlan asts dirMap -> do
@ -535,7 +538,7 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
ResponseCached cachedResponseData -> do
logQueryLog logger $ QueryLog q Nothing requestId QueryLogKindCached
let reportedExecutionTime = 0
liftIO $ recordGQLQuerySuccess reportedExecutionTime gqlOpType
liftIO $ recordGQLQuerySuccess granularPrometheusMetricsState reportedExecutionTime opName parameterizedQueryHash gqlOpType
modelInfoLogging modelInfoList True modelInfoLogStatus
sendSuccResp cachedResponseData opName parameterizedQueryHash $ ES.SubscriptionMetadata reportedExecutionTime
ResponseUncached storeResponseM -> do
@ -582,7 +585,7 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
let (allResponses', allModelInfo) = unzip allResponses
pure $ (AnnotatedResponsePart 0 Telem.Local (encJFromList (map arpResponse allResponses')) [], concat allModelInfo)
in getResponse
sendResultFromFragments Telem.Query timerTot requestId conclusion opName parameterizedQueryHash gqlOpType modelInfoList modelInfoLogStatus
sendResultFromFragments granularPrometheusMetricsState Telem.Query timerTot requestId conclusion opName parameterizedQueryHash gqlOpType modelInfoList modelInfoLogStatus
case (storeResponseM, conclusion) of
(Just ResponseCacher {..}, Right results) -> do
let (key, (compositeValue')) = unzip $ InsOrdHashMap.toList results
@ -608,7 +611,7 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
$ doQErr
$ runPGMutationTransaction requestId q userInfo logger sourceConfig resolvedConnectionTemplate pgMutations
-- we do not construct result fragments since we have only one result
handleResult requestId gqlOpType resp \(telemTimeIO_DT, results) -> do
handleResult granularPrometheusMetricsState requestId gqlOpType opName parameterizedQueryHash resp \(telemTimeIO_DT, results) -> do
let telemQueryType = Telem.Query
telemLocality = Telem.Local
telemTimeIO = convertDuration telemTimeIO_DT
@ -618,7 +621,7 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
$ ES.SubscriptionMetadata telemTimeIO_DT
-- Telemetry. NOTE: don't time network IO:
Telem.recordTimingMetric Telem.RequestDimensions {..} Telem.RequestTimings {..}
liftIO $ recordGQLQuerySuccess totalTime gqlOpType
liftIO $ recordGQLQuerySuccess granularPrometheusMetricsState totalTime opName parameterizedQueryHash gqlOpType
-- we are not in the transaction case; proceeding normally
Nothing -> do
@ -666,7 +669,7 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
let (allResponses', allModelInfo) = unzip allResponses
pure $ (AnnotatedResponsePart 0 Telem.Local (encJFromList (map arpResponse allResponses')) [], concat allModelInfo)
in getResponse
sendResultFromFragments Telem.Query timerTot requestId conclusion opName parameterizedQueryHash gqlOpType modelInfoList modelInfoLogStatus
sendResultFromFragments granularPrometheusMetricsState Telem.Query timerTot requestId conclusion opName parameterizedQueryHash gqlOpType modelInfoList modelInfoLogStatus
liftIO $ sendCompleted (Just requestId) (Just parameterizedQueryHash)
E.SubscriptionExecutionPlan (subExec, modifier) -> do
case subExec of
@ -718,11 +721,10 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
asyncActionQueryLive
E.SEOnSourceDB (E.SSLivequery actionIds liveQueryBuilder) -> do
actionLogMapE <- fmap fst <$> runExceptT (EA.fetchActionLogResponses actionIds)
actionLogMap <- onLeft actionLogMapE (withComplete . preExecErr requestId (Just gqlOpType))
granularPrometheusMetricsState <- runGetPrometheusMetricsGranularity
actionLogMap <- onLeft actionLogMapE (withComplete . preExecErr granularPrometheusMetricsState requestId (Just gqlOpType) opName (Just parameterizedQueryHash))
modelInfoLogStatus'' <- runGetModelInfoLogStatus
opMetadataE <- liftIO $ startLiveQuery opName liveQueryBuilder parameterizedQueryHash requestId actionLogMap granularPrometheusMetricsState modifier modelInfoLogStatus''
lqId <- onLeft opMetadataE (withComplete . preExecErr requestId (Just gqlOpType))
lqId <- onLeft opMetadataE (withComplete . preExecErr granularPrometheusMetricsState requestId (Just gqlOpType) opName (Just parameterizedQueryHash))
-- Update async action query subscription state
case NE.nonEmpty (toList actionIds) of
Nothing -> do
@ -747,11 +749,16 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
onUnexpectedException
asyncActionQueryLive
E.SEOnSourceDB (E.SSStreaming rootFieldName streamQueryBuilder) -> do
granularPrometheusMetricsState <- runGetPrometheusMetricsGranularity
modelInfoLogStatus'' <- runGetModelInfoLogStatus
liftIO $ startStreamingQuery rootFieldName streamQueryBuilder parameterizedQueryHash requestId granularPrometheusMetricsState modifier modelInfoLogStatus''
liftIO $ Prometheus.Counter.inc (gqlRequestsSubscriptionSuccess gqlMetrics)
recordGraphqlOperationMetric
granularPrometheusMetricsState
(Just G.OperationTypeSubscription)
Success
opName
(Just parameterizedQueryHash)
(Prometheus.CounterVector.inc $ gqlRequests gqlMetrics)
liftIO $ logOpEv ODStarted (Just requestId) (Just parameterizedQueryHash)
where
sendDataMsg = WS._wsaGetDataMessageType onMessageActions
@ -787,18 +794,21 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
handleResult ::
forall a.
IO GranularPrometheusMetricsState ->
RequestId ->
G.OperationType ->
Maybe OperationName ->
ParameterizedQueryHash ->
Either (Either GQExecError QErr) a ->
(a -> ExceptT () m ()) ->
ExceptT () m ()
handleResult requestId gqlOpType r f = case r of
Left (Left err) -> postExecErr' gqlOpType err
Left (Right err) -> postExecErr requestId gqlOpType err
handleResult granularPrometheusMetricsState requestId gqlOpType mOpName pqh r f = case r of
Left (Left err) -> postExecErr' granularPrometheusMetricsState gqlOpType mOpName pqh err
Left (Right err) -> postExecErr granularPrometheusMetricsState requestId gqlOpType mOpName pqh err
Right results -> f results
sendResultFromFragments telemQueryType timerTot requestId r opName pqh gqlOpType modelInfoList getModelInfoLogStatus =
handleResult requestId gqlOpType r \results -> do
sendResultFromFragments granularPrometheusMetricsState telemQueryType timerTot requestId r opName pqh gqlOpType modelInfoList getModelInfoLogStatus =
handleResult granularPrometheusMetricsState requestId gqlOpType opName pqh r \results -> do
let (key, (compositeValue')) = unzip $ InsOrdHashMap.toList results
(annotatedResp, model) = unzip compositeValue'
results' = InsOrdHashMap.fromList $ zip key annotatedResp
@ -814,7 +824,7 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
-- Telemetry. NOTE: don't time network IO:
Telem.recordTimingMetric Telem.RequestDimensions {..} Telem.RequestTimings {..}
modelInfoLogging (modelInfoList <> modelInfoList') False getModelInfoLogStatus
liftIO $ (recordGQLQuerySuccess totalTime gqlOpType)
liftIO $ (recordGQLQuerySuccess granularPrometheusMetricsState totalTime opName pqh gqlOpType)
runRemoteGQ ::
RequestId ->
@ -885,7 +895,7 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
getErrFn ERTLegacy = encodeQErr
getErrFn ERTGraphqlCompliant = encodeGQLErr
sendStartErr e = do
sendStartErr granularPrometheusMetricsState mOpName e = do
let errFn = getErrFn errRespTy
sendMsg wsConn
$ SMErr
@ -893,7 +903,7 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
$ errFn False
$ err400 StartFailed e
liftIO $ logOpEv (ODProtoErr e) Nothing Nothing
liftIO $ reportGQLQueryError Nothing
liftIO $ reportGQLQueryError granularPrometheusMetricsState mOpName Nothing Nothing
liftIO $ closeConnAction wsConn opId (T.unpack e)
sendCompleted reqId paramQueryHash = do
@ -901,24 +911,27 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
logOpEv ODCompleted reqId paramQueryHash
postExecErr ::
IO GranularPrometheusMetricsState ->
RequestId ->
G.OperationType ->
Maybe OperationName ->
ParameterizedQueryHash ->
QErr ->
ExceptT () m ()
postExecErr reqId gqlOpType qErr = do
postExecErr granularPrometheusMetricsState reqId gqlOpType mOpName pqh qErr = do
let errFn = getErrFn errRespTy False
liftIO $ logOpEv (ODQueryErr qErr) (Just reqId) Nothing
postExecErr' gqlOpType $ GQExecError $ pure $ errFn qErr
postExecErr' granularPrometheusMetricsState gqlOpType mOpName pqh $ GQExecError $ pure $ errFn qErr
postExecErr' :: G.OperationType -> GQExecError -> ExceptT () m ()
postExecErr' gqlOpType qErr =
postExecErr' :: IO GranularPrometheusMetricsState -> G.OperationType -> Maybe OperationName -> ParameterizedQueryHash -> GQExecError -> ExceptT () m ()
postExecErr' granularPrometheusMetricsState gqlOpType mOpName pqh qErr =
liftIO $ do
reportGQLQueryError (Just gqlOpType)
reportGQLQueryError granularPrometheusMetricsState mOpName (Just pqh) (Just gqlOpType)
postExecErrAction wsConn opId qErr
-- why wouldn't pre exec error use graphql response?
preExecErr reqId mGqlOpType qErr = do
liftIO $ reportGQLQueryError mGqlOpType
preExecErr granularPrometheusMetricsState reqId mGqlOpType mOpName pqh qErr = do
liftIO $ reportGQLQueryError granularPrometheusMetricsState mOpName pqh mGqlOpType
liftIO $ sendError reqId qErr
sendError reqId qErr = do
@ -1049,30 +1062,32 @@ onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables
catchAndIgnore :: ExceptT () m () -> m ()
catchAndIgnore m = void $ runExceptT m
reportGQLQueryError :: Maybe G.OperationType -> IO ()
reportGQLQueryError = \case
Nothing ->
liftIO $ Prometheus.Counter.inc (gqlRequestsUnknownFailure gqlMetrics)
Just opType -> case opType of
G.OperationTypeQuery ->
liftIO $ Prometheus.Counter.inc (gqlRequestsQueryFailure gqlMetrics)
G.OperationTypeMutation ->
liftIO $ Prometheus.Counter.inc (gqlRequestsMutationFailure gqlMetrics)
G.OperationTypeSubscription ->
liftIO $ Prometheus.Counter.inc (gqlRequestsSubscriptionFailure gqlMetrics)
reportGQLQueryError :: IO GranularPrometheusMetricsState -> Maybe OperationName -> Maybe ParameterizedQueryHash -> Maybe G.OperationType -> IO ()
reportGQLQueryError granularPrometheusMetricsState mOpName mQHash mOpType =
recordGraphqlOperationMetric
granularPrometheusMetricsState
mOpType
Failed
mOpName
mQHash
(Prometheus.CounterVector.inc $ gqlRequests gqlMetrics)
-- Tally and record execution times for successful GraphQL requests.
recordGQLQuerySuccess :: DiffTime -> G.OperationType -> IO ()
recordGQLQuerySuccess totalTime = \case
G.OperationTypeQuery -> liftIO $ do
Prometheus.Counter.inc (gqlRequestsQuerySuccess gqlMetrics)
Prometheus.Histogram.observe (gqlExecutionTimeSecondsQuery gqlMetrics) (realToFrac totalTime)
G.OperationTypeMutation -> liftIO $ do
Prometheus.Counter.inc (gqlRequestsMutationSuccess gqlMetrics)
Prometheus.Histogram.observe (gqlExecutionTimeSecondsMutation gqlMetrics) (realToFrac totalTime)
G.OperationTypeSubscription ->
-- We do not collect metrics for subscriptions at the request level.
pure ()
recordGQLQuerySuccess :: IO GranularPrometheusMetricsState -> DiffTime -> Maybe OperationName -> ParameterizedQueryHash -> G.OperationType -> IO ()
recordGQLQuerySuccess granularPrometheusMetricsState totalTime mOpName qHash opType = do
recordGraphqlOperationMetric
granularPrometheusMetricsState
(Just opType)
Success
mOpName
(Just qHash)
(Prometheus.CounterVector.inc $ gqlRequests gqlMetrics)
case opType of
G.OperationTypeQuery -> liftIO $ Prometheus.Histogram.observe (gqlExecutionTimeSecondsQuery gqlMetrics) (realToFrac totalTime)
G.OperationTypeMutation -> liftIO $ Prometheus.Histogram.observe (gqlExecutionTimeSecondsMutation gqlMetrics) (realToFrac totalTime)
G.OperationTypeSubscription ->
-- We do not collect metrics for subscriptions at the request level.
pure ()
onMessage ::
( MonadIO m,

View File

@ -72,7 +72,7 @@ import Hasura.Server.Auth (AuthMode, compareAuthMode)
import Hasura.Server.Cors (CorsPolicy)
import Hasura.Server.Init.Config (AllowListStatus (..), WSConnectionInitTimeout (..))
import Hasura.Server.Prometheus
( DynamicSubscriptionLabel (..),
( DynamicGraphqlOperationLabel (..),
PrometheusMetrics (..),
recordMetricWithLabel,
)
@ -639,8 +639,8 @@ createServerApp getMetricsConfig wsConnInitTimeout (WSServer logger@(L.Logger wr
messageDetails = MessageDetails (SB.fromLBS msg) messageLength
parameterizedQueryHash = wsInfo >>= _wseiParameterizedQueryHash
operationName = wsInfo >>= _wseiOperationName
promMetricGranularLabel = DynamicSubscriptionLabel parameterizedQueryHash operationName
promMetricLabel = DynamicSubscriptionLabel Nothing Nothing
promMetricGranularLabel = DynamicGraphqlOperationLabel parameterizedQueryHash operationName
promMetricLabel = DynamicGraphqlOperationLabel Nothing Nothing
websocketBytesSentMetric = pmWebSocketBytesSent prometheusMetrics
granularPrometheusMetricsState <- runGetPrometheusMetricsGranularity
liftIO $ do

View File

@ -31,11 +31,13 @@ module Hasura.Server.Prometheus
observeHistogramWithLabel,
SubscriptionKindLabel (..),
SubscriptionLabel (..),
DynamicSubscriptionLabel (..),
DynamicGraphqlOperationLabel (..),
streamingSubscriptionLabel,
liveQuerySubscriptionLabel,
recordMetricWithLabel,
recordSubcriptionMetric,
recordSubscriptionMetric,
GraphQLRequestsLabels,
recordGraphqlOperationMetric,
)
where
@ -71,7 +73,7 @@ data PrometheusMetrics = PrometheusMetrics
pmGraphQLRequestMetrics :: GraphQLRequestMetrics,
pmEventTriggerMetrics :: EventTriggerMetrics,
pmWebSocketBytesReceived :: Counter,
pmWebSocketBytesSent :: CounterVector DynamicSubscriptionLabel,
pmWebSocketBytesSent :: CounterVector DynamicGraphqlOperationLabel,
pmActionBytesReceived :: Counter,
pmActionBytesSent :: Counter,
pmScheduledTriggerMetrics :: ScheduledTriggerMetrics,
@ -83,13 +85,7 @@ data PrometheusMetrics = PrometheusMetrics
}
data GraphQLRequestMetrics = GraphQLRequestMetrics
{ gqlRequestsQuerySuccess :: Counter,
gqlRequestsQueryFailure :: Counter,
gqlRequestsMutationSuccess :: Counter,
gqlRequestsMutationFailure :: Counter,
gqlRequestsSubscriptionSuccess :: Counter,
gqlRequestsSubscriptionFailure :: Counter,
gqlRequestsUnknownFailure :: Counter,
{ gqlRequests :: CounterVector GraphQLRequestsLabels,
gqlExecutionTimeSecondsQuery :: Histogram,
gqlExecutionTimeSecondsMutation :: Histogram
}
@ -170,13 +166,7 @@ makeDummyPrometheusMetrics = do
makeDummyGraphQLRequestMetrics :: IO GraphQLRequestMetrics
makeDummyGraphQLRequestMetrics = do
gqlRequestsQuerySuccess <- Counter.new
gqlRequestsQueryFailure <- Counter.new
gqlRequestsMutationSuccess <- Counter.new
gqlRequestsMutationFailure <- Counter.new
gqlRequestsSubscriptionSuccess <- Counter.new
gqlRequestsSubscriptionFailure <- Counter.new
gqlRequestsUnknownFailure <- Counter.new
gqlRequests <- CounterVector.new
gqlExecutionTimeSecondsQuery <- Histogram.new []
gqlExecutionTimeSecondsMutation <- Histogram.new []
pure GraphQLRequestMetrics {..}
@ -295,6 +285,7 @@ instance ToLabels (Maybe DynamicEventTriggerLabel) where
toLabels (Just (DynamicEventTriggerLabel triggerName sourceName)) = Map.fromList $ [("trigger_name", triggerNameToTxt triggerName), ("source_name", sourceNameToText sourceName)]
data ResponseStatus = Success | Failed
deriving stock (Generic, Ord, Eq)
-- TODO: Make this a method of a new typeclass of the metrics library
responseStatusToLabelValue :: ResponseStatus -> Text
@ -335,21 +326,21 @@ streamingSubscriptionLabel = SubscriptionKindLabel "streaming"
liveQuerySubscriptionLabel :: SubscriptionKindLabel
liveQuerySubscriptionLabel = SubscriptionKindLabel "live-query"
data DynamicSubscriptionLabel = DynamicSubscriptionLabel
data DynamicGraphqlOperationLabel = DynamicGraphqlOperationLabel
{ _dslParamQueryHash :: Maybe ParameterizedQueryHash,
_dslOperationName :: Maybe OperationName
}
deriving stock (Generic, Ord, Eq)
instance ToLabels DynamicSubscriptionLabel where
toLabels (DynamicSubscriptionLabel hash opName) =
instance ToLabels DynamicGraphqlOperationLabel where
toLabels (DynamicGraphqlOperationLabel hash opName) =
Map.fromList
$ maybe [] (\pqh -> [("parameterized_query_hash", bsToTxt $ unParamQueryHash pqh)]) hash
<> maybe [] (\op -> [("operation_name", G.unName $ _unOperationName op)]) opName
data SubscriptionLabel = SubscriptionLabel
{ _slKind :: SubscriptionKindLabel,
_slDynamicLabels :: Maybe DynamicSubscriptionLabel
_slDynamicLabels :: Maybe DynamicGraphqlOperationLabel
}
deriving stock (Generic, Ord, Eq)
@ -357,6 +348,25 @@ instance ToLabels SubscriptionLabel where
toLabels (SubscriptionLabel kind Nothing) = Map.fromList $ [("subscription_kind", subscription_kind kind)]
toLabels (SubscriptionLabel kind (Just dl)) = (Map.fromList $ [("subscription_kind", subscription_kind kind)]) <> toLabels dl
-- TODO: Make this a method of a new typeclass of the metrics library
opTypeToLabelValue :: Maybe G.OperationType -> Text
opTypeToLabelValue = \case
(Just G.OperationTypeQuery) -> "query"
(Just G.OperationTypeMutation) -> "mutation"
(Just G.OperationTypeSubscription) -> "subscription"
Nothing -> "unknown"
data GraphQLRequestsLabels = GraphQLRequestsLabels
{ operation_type :: Maybe G.OperationType,
response_status :: ResponseStatus,
dynamic_label :: Maybe DynamicGraphqlOperationLabel
}
deriving stock (Generic, Ord, Eq)
instance ToLabels (GraphQLRequestsLabels) where
toLabels (GraphQLRequestsLabels op_type res_status dynamic_labels) =
(HashMap.fromList $ [("operation_type", opTypeToLabelValue op_type), ("response_status", responseStatusToLabelValue res_status)]) <> (fromMaybe mempty (toLabels <$> dynamic_labels))
-- | Record metrics with dynamic label
recordMetricWithLabel ::
(MonadIO m) =>
@ -401,7 +411,7 @@ observeHistogramWithLabel getMetricState alwaysObserve histogramVector label val
-- | Record a subscription metric for all the operation names present in the subscription.
-- Use this when you want to update the same value of the metric for all the operation names.
recordSubcriptionMetric ::
recordSubscriptionMetric ::
(MonadIO m) =>
(IO GranularPrometheusMetricsState) ->
-- should the metric be observed without a label when granularMetricsState is OFF
@ -412,11 +422,11 @@ recordSubcriptionMetric ::
-- the mertic action to perform
(SubscriptionLabel -> IO ()) ->
m ()
recordSubcriptionMetric getMetricState alwaysObserve operationNamesMap parameterizedQueryHash subscriptionKind metricAction = do
recordSubscriptionMetric getMetricState alwaysObserve operationNamesMap parameterizedQueryHash subscriptionKind metricAction = do
-- if no operation names are present, then emit metric with only param query hash as dynamic label
if (null operationNamesMap)
then do
let promMetricGranularLabel = SubscriptionLabel subscriptionKind (Just $ DynamicSubscriptionLabel (Just parameterizedQueryHash) Nothing)
let promMetricGranularLabel = SubscriptionLabel subscriptionKind (Just $ DynamicGraphqlOperationLabel (Just parameterizedQueryHash) Nothing)
promMetricLabel = SubscriptionLabel subscriptionKind Nothing
recordMetricWithLabel
getMetricState
@ -427,10 +437,29 @@ recordSubcriptionMetric getMetricState alwaysObserve operationNamesMap parameter
do
let operationNames = HashMap.keys operationNamesMap
for_ operationNames $ \opName -> do
let promMetricGranularLabel = SubscriptionLabel subscriptionKind (Just $ DynamicSubscriptionLabel (Just parameterizedQueryHash) opName)
let promMetricGranularLabel = SubscriptionLabel subscriptionKind (Just $ DynamicGraphqlOperationLabel (Just parameterizedQueryHash) opName)
promMetricLabel = SubscriptionLabel subscriptionKind Nothing
recordMetricWithLabel
getMetricState
alwaysObserve
(metricAction promMetricGranularLabel)
(metricAction promMetricLabel)
recordGraphqlOperationMetric ::
(MonadIO m) =>
(IO GranularPrometheusMetricsState) ->
Maybe G.OperationType ->
ResponseStatus ->
Maybe OperationName ->
Maybe ParameterizedQueryHash ->
(GraphQLRequestsLabels -> IO ()) ->
m ()
recordGraphqlOperationMetric getMetricState operationType responseStatus operationName parameterizedQueryHash metricAction = do
let dynamicLabel = DynamicGraphqlOperationLabel parameterizedQueryHash operationName
promMetricGranularLabel = GraphQLRequestsLabels operationType responseStatus (Just dynamicLabel)
promMetricLabel = GraphQLRequestsLabels operationType responseStatus Nothing
recordMetricWithLabel
getMetricState
True
(metricAction promMetricGranularLabel)
(metricAction promMetricLabel)