graphql-engine/server/src-lib/Hasura/Server/API/V2Query.hs
Antoine Leblanc cf531b05cb Rewrite Tracing to allow for only one TraceT in the entire stack.
This PR is on top of #7789.

### Description

This PR entirely rewrites the API of the Tracing library, to make `interpTraceT` a thing of the past. Before this change, we ran traces by sticking a `TraceT` on top of whatever we were doing. This had several major drawbacks:
- we were carrying a bunch of `TraceT` across the codebase, and the entire codebase had to know about it
- we needed to carry a second class constraint around (`HasReporterM`) to be able to run all of those traces
- we kept having to do stack rewriting with `interpTraceT`, which went from inconvenient to horrible
- we had to declare several behavioral instances on `TraceT m`

This PR rewrite all of `Tracing` using a more conventional model: there is ONE `TraceT` at the bottom of the stack, and there is an associated class constraint `MonadTrace`: any part of the code that happens to satisfy `MonadTrace` is able to create new traces. We NEVER have to do stack rewriting, `interpTraceT` is gone, and `TraceT` and `Reporter` become  implementation details that 99% of the code is blissfully unaware of: code that needs to do tracing only needs to declare that the monad in which it operates implements `MonadTrace`.

In doing so, this PR revealed **several bugs in the codebase**: places where we were expecting to trace something, but due to the default instance of `HasReporterM IO` we would actually not do anything. This PR also splits the code of `Tracing` in more byte-sized modules, with the goal of potentially moving to `server/lib` down the line.

### Remaining work

This PR is a draft; what's left to do is:
- [x] make Pro compile; i haven't updated `HasuraPro/Main` yet
- [x] document Tracing by writing a note that explains how to use the library, and the meaning of "reporter", "trace" and "span", as well as the pitfalls
- [x] discuss some of the trade-offs in the implementation, which is why i'm opening this PR already despite it not fully building yet
- [x] it depends on #7789 being merged first

PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7791
GitOrigin-RevId: cadd32d039134c93ddbf364599a2f4dd988adea8
2023-03-13 17:38:39 +00:00

225 lines
7.9 KiB
Haskell

{-# LANGUAGE ViewPatterns #-}
-- | The RQL query ('/v2/query')
module Hasura.Server.API.V2Query
( RQLQuery,
queryModifiesSchema,
runQuery,
)
where
import Control.Concurrent.Async.Lifted (mapConcurrently)
import Control.Lens (preview, _Right)
import Control.Monad.Trans.Control (MonadBaseControl)
import Data.Aeson
import Data.Aeson.Types (Parser)
import Data.Environment qualified as Env
import Data.Text qualified as T
import GHC.Generics.Extended (constrName)
import Hasura.Backends.BigQuery.DDL.RunSQL qualified as BigQuery
import Hasura.Backends.DataConnector.Adapter.RunSQL qualified as DataConnector
import Hasura.Backends.DataConnector.Adapter.Types (DataConnectorName, mkDataConnectorName)
import Hasura.Backends.MSSQL.DDL.RunSQL qualified as MSSQL
import Hasura.Backends.MySQL.SQL qualified as MySQL
import Hasura.Backends.Postgres.DDL.RunSQL qualified as Postgres
import Hasura.Base.Error
import Hasura.EncJSON
import Hasura.GraphQL.Execute.Backend
import Hasura.Metadata.Class
import Hasura.Prelude
import Hasura.RQL.DDL.Schema
import Hasura.RQL.DML.Count
import Hasura.RQL.DML.Delete
import Hasura.RQL.DML.Insert
import Hasura.RQL.DML.Select
import Hasura.RQL.DML.Types
( CountQuery,
DeleteQuery,
InsertQuery,
SelectQuery,
UpdateQuery,
)
import Hasura.RQL.DML.Update
import Hasura.RQL.Types.Metadata
import Hasura.RQL.Types.Run
import Hasura.RQL.Types.SchemaCache.Build
import Hasura.RQL.Types.Source
import Hasura.SQL.Backend
import Hasura.Server.Types
import Hasura.Services
import Hasura.Session
import Hasura.Tracing qualified as Tracing
import Language.GraphQL.Draft.Syntax qualified as GQL
data RQLQuery
= RQInsert !InsertQuery
| RQSelect !SelectQuery
| RQUpdate !UpdateQuery
| RQDelete !DeleteQuery
| RQCount !CountQuery
| RQRunSql !Postgres.RunSQL
| RQMssqlRunSql !MSSQL.MSSQLRunSQL
| RQCitusRunSql !Postgres.RunSQL
| RQCockroachRunSql !Postgres.RunSQL
| RQMysqlRunSql !MySQL.RunSQL
| RQBigqueryRunSql !BigQuery.BigQueryRunSQL
| RQDataConnectorRunSql !DataConnectorName !DataConnector.DataConnectorRunSQL
| RQBigqueryDatabaseInspection !BigQuery.BigQueryRunSQL
| RQBulk ![RQLQuery]
| -- | A variant of 'RQBulk' that runs a bulk of read-only queries concurrently.
-- Asserts that queries on this lists are not modifying the schema.
--
-- This is mainly used by the graphql-engine console.
RQConcurrentBulk [RQLQuery]
deriving (Generic)
-- | This instance has been written by hand so that "wildcard" prefixes of _run_sql can be delegated to data connectors.
instance FromJSON RQLQuery where
parseJSON = withObject "RQLQuery" \o -> do
t <- o .: "type"
let args :: forall a. FromJSON a => Parser a
args = o .: "args"
dcNameFromRunSql = T.stripSuffix "_run_sql" >=> GQL.mkName >=> preview _Right . mkDataConnectorName
case t of
"insert" -> RQInsert <$> args
"select" -> RQSelect <$> args
"update" -> RQUpdate <$> args
"delete" -> RQDelete <$> args
"count" -> RQCount <$> args
-- Optionally, we can specify a `pg_` prefix. This primarily makes some
-- string interpolation easier in the cross-backend tests.
"run_sql" -> RQRunSql <$> args
"pg_run_sql" -> RQRunSql <$> args
"mssql_run_sql" -> RQMssqlRunSql <$> args
"citus_run_sql" -> RQCitusRunSql <$> args
"cockroach_run_sql" -> RQCockroachRunSql <$> args
"mysql_run_sql" -> RQMysqlRunSql <$> args
"bigquery_run_sql" -> RQBigqueryRunSql <$> args
(dcNameFromRunSql -> Just t') -> RQDataConnectorRunSql t' <$> args
"bigquery_database_inspection" -> RQBigqueryDatabaseInspection <$> args
"bulk" -> RQBulk <$> args
"concurrent_bulk" -> RQConcurrentBulk <$> args
_ -> fail $ "Unrecognised RQLQuery type: " <> T.unpack t
runQuery ::
( MonadIO m,
MonadBaseControl IO m,
MonadError QErr m,
Tracing.MonadTrace m,
MonadMetadataStorage m,
MonadResolveSource m,
MonadQueryTags m,
ProvidesHasuraServices m
) =>
Env.Environment ->
InstanceId ->
UserInfo ->
RebuildableSchemaCache ->
ServerConfigCtx ->
RQLQuery ->
m (EncJSON, RebuildableSchemaCache)
runQuery env instanceId userInfo schemaCache serverConfigCtx rqlQuery = do
when ((_sccReadOnlyMode serverConfigCtx == ReadOnlyModeEnabled) && queryModifiesUserDB rqlQuery) $
throw400 NotSupported "Cannot run write queries when read-only mode is enabled"
(metadata, currentResourceVersion) <- Tracing.newSpan "fetchMetadata" $ liftEitherM fetchMetadata
result <-
runQueryM env rqlQuery & \x -> do
((js, meta), rsc, ci) <-
-- We can use defaults here unconditionally, since there is no MD export function in V2Query
x
& runMetadataT metadata (_sccMetadataDefaults serverConfigCtx)
& runCacheRWT schemaCache
& peelRun runCtx
pure (js, rsc, ci, meta)
withReload currentResourceVersion result
where
runCtx = RunCtx userInfo serverConfigCtx
withReload currentResourceVersion (result, updatedCache, invalidations, updatedMetadata) = do
when (queryModifiesSchema rqlQuery) $ do
case _sccMaintenanceMode serverConfigCtx of
MaintenanceModeDisabled -> do
-- set modified metadata in storage
newResourceVersion <-
Tracing.newSpan "setMetadata" $
liftEitherM $
setMetadata currentResourceVersion updatedMetadata
-- notify schema cache sync
Tracing.newSpan "notifySchemaCacheSync" $
liftEitherM $
notifySchemaCacheSync newResourceVersion instanceId invalidations
MaintenanceModeEnabled () ->
throw500 "metadata cannot be modified in maintenance mode"
pure (result, updatedCache)
queryModifiesSchema :: RQLQuery -> Bool
queryModifiesSchema = \case
RQInsert _ -> False
RQSelect _ -> False
RQUpdate _ -> False
RQDelete _ -> False
RQCount _ -> False
RQRunSql q -> Postgres.isSchemaCacheBuildRequiredRunSQL q
RQCitusRunSql q -> Postgres.isSchemaCacheBuildRequiredRunSQL q
RQCockroachRunSql q -> Postgres.isSchemaCacheBuildRequiredRunSQL q
RQMssqlRunSql q -> MSSQL.isSchemaCacheBuildRequiredRunSQL q
RQMysqlRunSql _ -> False
RQBigqueryRunSql _ -> False
RQDataConnectorRunSql _ _ -> False
RQBigqueryDatabaseInspection _ -> False
RQBulk l -> any queryModifiesSchema l
RQConcurrentBulk l -> any queryModifiesSchema l
runQueryM ::
( MonadError QErr m,
MonadIO m,
MonadBaseControl IO m,
UserInfoM m,
CacheRWM m,
HasServerConfigCtx m,
Tracing.MonadTrace m,
MetadataM m,
MonadQueryTags m
) =>
Env.Environment ->
RQLQuery ->
m EncJSON
runQueryM env rq = Tracing.newSpan (T.pack $ constrName rq) $ case rq of
RQInsert q -> runInsert q
RQSelect q -> runSelect q
RQUpdate q -> runUpdate q
RQDelete q -> runDelete q
RQCount q -> runCount q
RQRunSql q -> Postgres.runRunSQL @'Vanilla q
RQMssqlRunSql q -> MSSQL.runSQL q
RQMysqlRunSql q -> MySQL.runSQL q
RQCitusRunSql q -> Postgres.runRunSQL @'Citus q
RQCockroachRunSql q -> Postgres.runRunSQL @'Cockroach q
RQBigqueryRunSql q -> BigQuery.runSQL q
RQDataConnectorRunSql t q -> DataConnector.runSQL t q
RQBigqueryDatabaseInspection q -> BigQuery.runDatabaseInspection q
RQBulk l -> encJFromList <$> indexedMapM (runQueryM env) l
RQConcurrentBulk l -> do
when (queryModifiesSchema rq) $
throw500 "Only read-only queries are allowed in a concurrent_bulk"
encJFromList <$> mapConcurrently (runQueryM env) l
queryModifiesUserDB :: RQLQuery -> Bool
queryModifiesUserDB = \case
RQInsert _ -> True
RQSelect _ -> False
RQUpdate _ -> True
RQDelete _ -> True
RQCount _ -> False
RQRunSql _ -> True
RQCitusRunSql _ -> True
RQCockroachRunSql _ -> True
RQMssqlRunSql _ -> True
RQMysqlRunSql _ -> True
RQBigqueryRunSql _ -> True
RQDataConnectorRunSql _ _ -> True
RQBigqueryDatabaseInspection _ -> False
RQBulk q -> any queryModifiesUserDB q
RQConcurrentBulk _ -> False