mirror of
https://github.com/hasura/graphql-engine.git
synced 2025-01-05 14:27:59 +03:00
0346224444
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/8769 GitOrigin-RevId: 66f2cbfb620d641e672a4074554d9d324a18c591
322 lines
12 KiB
Haskell
322 lines
12 KiB
Haskell
{-# LANGUAGE ScopedTypeVariables #-}
|
|
|
|
-- |
|
|
-- Send anonymized metrics to the telemetry server regarding usage of various
|
|
-- features of Hasura.
|
|
--
|
|
-- The general workflow for telemetry is as follows:
|
|
--
|
|
-- 1. We generate metrics for each backend in the graphql-engine code and send it to 'telemetryUrl'.
|
|
-- The relevant types can be found in "Hasura.Server.Telemetry.Types".
|
|
-- 2. The 'telemetryUrl' endpoint is handled by code in:
|
|
-- <https://github.com/hasura/hasura-analytics/tree/hge-upgrade>, specifically
|
|
-- <manager/main.go> and <manager/analytics.go>.
|
|
-- This server endpoint receives the telemetry payload and sends it to another graphql-engine
|
|
-- which runs locally and is backed by a postgres database.
|
|
-- The database schema for the telemetry endpoint can also be found in the same repo under <hge/migrations/>.
|
|
-- 3. The information from the postgres db can be viewed in Metabase:
|
|
-- <https://metabase.telemetry.hasura.io/browse/2/schema/public>.
|
|
--
|
|
-- For more information about telemetry in general, visit the user-facing docs on the topic:
|
|
-- <https://hasura.io/docs/latest/graphql/core/guides/telemetry>.
|
|
module Hasura.Server.Telemetry
|
|
( runTelemetry,
|
|
mkTelemetryLog,
|
|
)
|
|
where
|
|
|
|
import CI qualified
|
|
import Control.Concurrent.Extended qualified as C
|
|
import Control.Exception (try)
|
|
import Control.Lens
|
|
import Data.Aeson qualified as A
|
|
import Data.ByteString.Lazy qualified as BL
|
|
import Data.HashMap.Strict qualified as HM
|
|
import Data.HashMap.Strict qualified as Map
|
|
import Data.List qualified as L
|
|
import Data.List.Extended qualified as L
|
|
import Data.Text qualified as T
|
|
import Data.Text.Conversions (UTF8 (..), decodeText)
|
|
import Hasura.App.State qualified as State
|
|
import Hasura.HTTP
|
|
import Hasura.Logging
|
|
import Hasura.NativeQuery.Cache (NativeQueryInfo (_nqiArguments))
|
|
import Hasura.Prelude
|
|
import Hasura.RQL.Types.Action
|
|
import Hasura.RQL.Types.Common
|
|
import Hasura.RQL.Types.CustomTypes
|
|
import Hasura.RQL.Types.Metadata.Instances ()
|
|
import Hasura.RQL.Types.Relationships.Local
|
|
import Hasura.RQL.Types.SchemaCache
|
|
import Hasura.RQL.Types.Source
|
|
import Hasura.RQL.Types.Table
|
|
import Hasura.SQL.AnyBackend qualified as Any
|
|
import Hasura.SQL.Backend (BackendType)
|
|
import Hasura.SQL.Tag
|
|
import Hasura.Server.AppStateRef qualified as HGE
|
|
import Hasura.Server.Init.Config
|
|
import Hasura.Server.Telemetry.Counters (dumpServiceTimingMetrics)
|
|
import Hasura.Server.Telemetry.Types
|
|
import Hasura.Server.Types
|
|
import Hasura.Server.Version
|
|
import Hasura.Session
|
|
import Network.HTTP.Client qualified as HTTP
|
|
import Network.HTTP.Types qualified as HTTP
|
|
import Network.Wreq qualified as Wreq
|
|
|
|
-- * Logging and error handling
|
|
|
|
-- | Logging related
|
|
data TelemetryLog = TelemetryLog
|
|
{ _tlLogLevel :: !LogLevel,
|
|
_tlType :: !Text,
|
|
_tlMessage :: !Text,
|
|
_tlHttpError :: !(Maybe TelemetryHttpError)
|
|
}
|
|
deriving (Show)
|
|
|
|
data TelemetryHttpError = TelemetryHttpError
|
|
{ tlheStatus :: !(Maybe HTTP.Status),
|
|
tlheUrl :: !Text,
|
|
tlheHttpException :: !(Maybe HttpException),
|
|
tlheResponse :: !(Maybe Text)
|
|
}
|
|
deriving (Show)
|
|
|
|
instance A.ToJSON TelemetryLog where
|
|
toJSON tl =
|
|
A.object
|
|
[ "type" A..= _tlType tl,
|
|
"message" A..= _tlMessage tl,
|
|
"http_error" A..= (A.toJSON <$> _tlHttpError tl)
|
|
]
|
|
|
|
instance A.ToJSON TelemetryHttpError where
|
|
toJSON tlhe =
|
|
A.object
|
|
[ "status_code" A..= (HTTP.statusCode <$> tlheStatus tlhe),
|
|
"url" A..= tlheUrl tlhe,
|
|
"response" A..= tlheResponse tlhe,
|
|
"http_exception" A..= (A.toJSON <$> tlheHttpException tlhe)
|
|
]
|
|
|
|
instance ToEngineLog TelemetryLog Hasura where
|
|
toEngineLog tl = (_tlLogLevel tl, ELTInternal ILTTelemetry, A.toJSON tl)
|
|
|
|
mkHttpError ::
|
|
Text ->
|
|
Maybe (Wreq.Response BL.ByteString) ->
|
|
Maybe HttpException ->
|
|
TelemetryHttpError
|
|
mkHttpError url mResp httpEx =
|
|
case mResp of
|
|
Nothing -> TelemetryHttpError Nothing url httpEx Nothing
|
|
Just resp ->
|
|
let status = resp ^. Wreq.responseStatus
|
|
body = decodeText $ UTF8 (resp ^. Wreq.responseBody)
|
|
in TelemetryHttpError (Just status) url httpEx body
|
|
|
|
mkTelemetryLog :: Text -> Text -> Maybe TelemetryHttpError -> TelemetryLog
|
|
mkTelemetryLog = TelemetryLog LevelInfo
|
|
|
|
-- * Endpoint
|
|
|
|
telemetryUrl :: Text
|
|
telemetryUrl = "https://telemetry.hasura.io/v1/http"
|
|
|
|
-- * Execution
|
|
|
|
-- | An infinite loop that sends updated telemetry data ('Metrics') every 24
|
|
-- hours. The send time depends on when the server was started and will
|
|
-- naturally drift.
|
|
runTelemetry ::
|
|
forall m impl.
|
|
( MonadIO m,
|
|
State.HasAppEnv m
|
|
) =>
|
|
Logger Hasura ->
|
|
-- | an action that always returns the latest schema cache ref
|
|
HGE.AppStateRef impl ->
|
|
MetadataDbId ->
|
|
PGVersion ->
|
|
m Void
|
|
runTelemetry (Logger logger) appStateRef metadataDbUid pgVersion = do
|
|
State.AppEnv {..} <- State.askAppEnv
|
|
let options = wreqOptions appEnvManager []
|
|
forever $ liftIO $ do
|
|
telemetryStatus <- State.acEnableTelemetry <$> HGE.getAppContext appStateRef
|
|
case telemetryStatus of
|
|
TelemetryEnabled -> do
|
|
schemaCache <- HGE.getSchemaCache appStateRef
|
|
serviceTimings <- dumpServiceTimingMetrics
|
|
experimentalFeatures <- State.acExperimentalFeatures <$> HGE.getAppContext appStateRef
|
|
ci <- CI.getCI
|
|
-- Creates a telemetry payload for a specific backend.
|
|
let telemetryForSource :: forall (b :: BackendType). HasTag b => SourceInfo b -> TelemetryPayload
|
|
telemetryForSource =
|
|
mkTelemetryPayload
|
|
metadataDbUid
|
|
appEnvInstanceId
|
|
currentVersion
|
|
pgVersion
|
|
ci
|
|
serviceTimings
|
|
(scRemoteSchemas schemaCache)
|
|
(scActions schemaCache)
|
|
experimentalFeatures
|
|
telemetries =
|
|
map
|
|
(\sourceinfo -> (Any.dispatchAnyBackend @HasTag) sourceinfo telemetryForSource)
|
|
(HM.elems (scSources schemaCache))
|
|
payloads = A.encode <$> telemetries
|
|
|
|
for_ payloads $ \payload -> do
|
|
logger $ debugLBS $ "metrics_info: " <> payload
|
|
resp <- try $ Wreq.postWith options (T.unpack telemetryUrl) payload
|
|
either logHttpEx handleHttpResp resp
|
|
C.sleep $ days 1
|
|
TelemetryDisabled -> C.sleep $ seconds 1
|
|
where
|
|
logHttpEx :: HTTP.HttpException -> IO ()
|
|
logHttpEx ex = do
|
|
let httpErr = Just $ mkHttpError telemetryUrl Nothing (Just $ HttpException ex)
|
|
logger $ mkTelemetryLog "http_exception" "http exception occurred" httpErr
|
|
|
|
handleHttpResp resp = do
|
|
let statusCode = resp ^. Wreq.responseStatus . Wreq.statusCode
|
|
logger $ debugLBS $ "http_success: " <> resp ^. Wreq.responseBody
|
|
when (statusCode /= 200) $ do
|
|
let httpErr = Just $ mkHttpError telemetryUrl (Just resp) Nothing
|
|
logger $ mkTelemetryLog "http_error" "failed to post telemetry" httpErr
|
|
|
|
-- * Generate metrics
|
|
|
|
-- | Generate a telemetry payload for a specific source by computing their
|
|
-- relevant metrics.
|
|
-- Additional information that may not be relevant to a particular source
|
|
-- such as service timing, remote schemas and actions, will be reported
|
|
-- only with the default source.
|
|
mkTelemetryPayload ::
|
|
forall (b :: BackendType).
|
|
HasTag b =>
|
|
MetadataDbId ->
|
|
InstanceId ->
|
|
Version ->
|
|
PGVersion ->
|
|
Maybe CI.CI ->
|
|
ServiceTimingMetrics ->
|
|
RemoteSchemaMap ->
|
|
ActionCache ->
|
|
HashSet ExperimentalFeature ->
|
|
SourceInfo b ->
|
|
TelemetryPayload
|
|
mkTelemetryPayload metadataDbId instanceId version pgVersion ci serviceTimings remoteSchemaMap actionCache experimentalFeatures sourceInfo =
|
|
let topic = versionToTopic version
|
|
sourceMetadata =
|
|
SourceMetadata
|
|
{ _smDbUid = forDefaultSource (mdDbIdToDbUid metadataDbId),
|
|
_smDbKind = reify $ backendTag @b,
|
|
_smDbVersion = forDefaultSource (pgToDbVersion pgVersion)
|
|
}
|
|
-- We use this function to attach additional information that is not associated
|
|
-- with a particular source, such as service timing, remote schemas and actions.
|
|
-- These will be reported only with the default source.
|
|
forDefaultSource :: forall a. a -> Maybe a
|
|
forDefaultSource =
|
|
if _siName sourceInfo == SNDefault
|
|
then Just
|
|
else const Nothing
|
|
metrics =
|
|
computeMetrics
|
|
sourceInfo
|
|
(forDefaultSource serviceTimings)
|
|
(forDefaultSource remoteSchemaMap)
|
|
(forDefaultSource actionCache)
|
|
telemetry =
|
|
HasuraTelemetry metadataDbId instanceId version ci sourceMetadata metrics experimentalFeatures
|
|
in TelemetryPayload topic telemetry
|
|
|
|
-- | Compute the relevant metrics for a specific source.
|
|
computeMetrics ::
|
|
forall (b :: BackendType).
|
|
SourceInfo b ->
|
|
Maybe ServiceTimingMetrics ->
|
|
Maybe RemoteSchemaMap ->
|
|
Maybe ActionCache ->
|
|
Metrics
|
|
computeMetrics sourceInfo _mtServiceTimings remoteSchemaMap actionCache =
|
|
let _mtTables = countSourceTables (isNothing . _tciViewInfo . _tiCoreInfo)
|
|
_mtViews = countSourceTables (isJust . _tciViewInfo . _tiCoreInfo)
|
|
_mtEnumTables = countSourceTables (isJust . _tciEnumValues . _tiCoreInfo)
|
|
allRels = join $ Map.elems $ Map.map (getRels . _tciFieldInfoMap . _tiCoreInfo) sourceTableCache
|
|
(manualRels, autoRels) = L.partition riIsManual allRels
|
|
_mtRelationships = RelationshipMetric (length manualRels) (length autoRels)
|
|
rolePerms = join $ Map.elems $ Map.map permsOfTbl sourceTableCache
|
|
_pmRoles = length $ L.uniques $ fst <$> rolePerms
|
|
allPerms = snd <$> rolePerms
|
|
_pmInsert = calcPerms _permIns allPerms
|
|
_pmSelect = calcPerms _permSel allPerms
|
|
_pmUpdate = calcPerms _permUpd allPerms
|
|
_pmDelete = calcPerms _permDel allPerms
|
|
_mtPermissions =
|
|
PermissionMetric {..}
|
|
_mtEventTriggers =
|
|
Map.size $
|
|
Map.filter (not . Map.null) $
|
|
Map.map _tiEventTriggerInfoMap sourceTableCache
|
|
_mtRemoteSchemas = Map.size <$> remoteSchemaMap
|
|
_mtFunctions = Map.size $ Map.filter (not . isSystemDefined . _fiSystemDefined) sourceFunctionCache
|
|
_mtActions = computeActionsMetrics <$> actionCache
|
|
_mtNativeQueries = countNativeQueries (HM.elems $ _siNativeQueries sourceInfo)
|
|
in Metrics {..}
|
|
where
|
|
sourceTableCache = _siTables sourceInfo
|
|
sourceFunctionCache = _siFunctions sourceInfo
|
|
countSourceTables predicate = length . filter predicate $ Map.elems sourceTableCache
|
|
|
|
calcPerms :: (RolePermInfo b -> Maybe a) -> [RolePermInfo b] -> Int
|
|
calcPerms fn perms = length $ mapMaybe fn perms
|
|
|
|
permsOfTbl :: TableInfo b -> [(RoleName, RolePermInfo b)]
|
|
permsOfTbl = Map.toList . _tiRolePermInfoMap
|
|
|
|
countNativeQueries :: [NativeQueryInfo b] -> NativeQueriesMetrics
|
|
countNativeQueries =
|
|
foldMap
|
|
( \logimo ->
|
|
if null (_nqiArguments logimo)
|
|
then mempty {_nqmWithoutParameters = 1}
|
|
else mempty {_nqmWithParameters = 1}
|
|
)
|
|
|
|
-- | Compute the relevant metrics for actions from the action cache.
|
|
computeActionsMetrics :: ActionCache -> ActionMetric
|
|
computeActionsMetrics actionCache =
|
|
ActionMetric syncActionsLen asyncActionsLen queryActionsLen typeRelationships customTypesLen
|
|
where
|
|
actions = Map.elems actionCache
|
|
syncActionsLen = length . filter ((== ActionMutation ActionSynchronous) . _adType . _aiDefinition) $ actions
|
|
asyncActionsLen = length . filter ((== ActionMutation ActionAsynchronous) . _adType . _aiDefinition) $ actions
|
|
queryActionsLen = length . filter ((== ActionQuery) . _adType . _aiDefinition) $ actions
|
|
|
|
outputTypesLen = length . L.nub . map (_adOutputType . _aiDefinition) $ actions
|
|
inputTypesLen = length . L.nub . concatMap (map _argType . _adArguments . _aiDefinition) $ actions
|
|
customTypesLen = inputTypesLen + outputTypesLen
|
|
|
|
typeRelationships =
|
|
length
|
|
. L.nub
|
|
. concatMap
|
|
( \aInfo -> case (snd . _aiOutputType $ aInfo) of
|
|
AOTObject aot -> map _atrName $ _aotRelationships aot
|
|
AOTScalar _ -> []
|
|
)
|
|
$ actions
|
|
|
|
-- | Decide which topic (telemetry table) we should use based on the version.
|
|
versionToTopic :: Version -> Topic
|
|
versionToTopic = \case
|
|
VersionDev _ -> Topic "server_metrics_v2_test"
|
|
VersionRelease _ -> Topic "server_metrics_v2"
|
|
VersionCE _ -> Topic "server_metrics_v2"
|