2021-09-24 20:01:40 +03:00
{- # LANGUAGE CPP # -}
2022-03-16 03:39:21 +03:00
{- # LANGUAGE TemplateHaskell # -}
2021-09-24 20:01:40 +03:00
-- | This file contains the handlers that are used within websocket server.
--
-- This module export three main handlers for the websocket server ('onConn',
-- 'onMessage', 'onClose'), and two helpers for sending messages to the client
-- ('sendMsg', 'sendCloseWithMsg').
--
-- NOTE!
-- The handler functions 'onClose', 'onMessage', etc. depend for correctness on two properties:
-- - they run with async exceptions masked
-- - they do not race on the same connection
2018-07-20 10:22:46 +03:00
module Hasura.GraphQL.Transport.WebSocket
2021-09-24 20:01:40 +03:00
( onConn ,
2021-09-24 01:56:37 +03:00
onMessage ,
onClose ,
sendMsg ,
sendCloseWithMsg ,
2023-06-13 12:22:36 +03:00
mkCloseWebsocketsOnMetadataChangeAction ,
runWebsocketCloseOnMetadataChangeAction ,
WebsocketCloseOnMetadataChangeAction ,
2021-09-24 01:56:37 +03:00
)
where
2018-07-20 10:22:46 +03:00
2021-09-24 01:56:37 +03:00
import Control.Concurrent.Extended ( sleep )
import Control.Concurrent.STM qualified as STM
2023-03-14 20:46:13 +03:00
import Control.Monad.Morph ( hoist )
2021-09-24 01:56:37 +03:00
import Control.Monad.Trans.Control qualified as MC
import Data.Aeson qualified as J
import Data.Aeson.Casing qualified as J
2023-06-02 08:28:17 +03:00
import Data.Aeson.Encoding qualified as J
2021-09-24 01:56:37 +03:00
import Data.Aeson.TH qualified as J
2021-10-29 17:42:07 +03:00
import Data.ByteString ( ByteString )
2021-09-24 01:56:37 +03:00
import Data.ByteString.Lazy qualified as LBS
import Data.CaseInsensitive qualified as CI
import Data.Dependent.Map qualified as DM
2023-04-26 18:42:13 +03:00
import Data.HashMap.Strict qualified as HashMap
2023-04-27 10:41:55 +03:00
import Data.HashMap.Strict.InsOrd qualified as InsOrdHashMap
2021-09-24 01:56:37 +03:00
import Data.HashSet qualified as Set
import Data.List.NonEmpty qualified as NE
import Data.String
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
2023-05-31 08:47:40 +03:00
import Data.Text.Extended ( ( <>> ) )
2023-04-25 23:28:03 +03:00
import Data.Time.Clock
2021-09-24 01:56:37 +03:00
import Data.Time.Clock qualified as TC
2021-11-04 15:38:57 +03:00
import Data.Word ( Word16 )
2021-09-24 01:56:37 +03:00
import GHC.AssertNF.CPP
2023-03-30 19:31:50 +03:00
import Hasura.App.State
2023-04-05 11:57:19 +03:00
import Hasura.Backends.DataConnector.Agent.Client ( AgentLicenseKey )
2021-09-24 01:56:37 +03:00
import Hasura.Backends.Postgres.Instances.Transport ( runPGMutationTransaction )
import Hasura.Base.Error
2023-04-05 11:57:19 +03:00
import Hasura.CredentialCache
2021-09-24 01:56:37 +03:00
import Hasura.EncJSON
import Hasura.GraphQL.Execute qualified as E
import Hasura.GraphQL.Execute.Action qualified as EA
import Hasura.GraphQL.Execute.Backend qualified as EB
import Hasura.GraphQL.Execute.RemoteJoin qualified as RJ
2022-03-21 13:39:49 +03:00
import Hasura.GraphQL.Execute.Subscription.Plan qualified as ES
import Hasura.GraphQL.Execute.Subscription.Poll qualified as ES
import Hasura.GraphQL.Execute.Subscription.State qualified as ES
2021-09-24 01:56:37 +03:00
import Hasura.GraphQL.Logging
2022-04-07 17:41:43 +03:00
import Hasura.GraphQL.Namespace ( RootFieldAlias ( .. ) )
2021-09-24 01:56:37 +03:00
import Hasura.GraphQL.ParameterizedQueryHash ( ParameterizedQueryHash )
import Hasura.GraphQL.Parser.Directives ( cached )
import Hasura.GraphQL.Transport.Backend
import Hasura.GraphQL.Transport.HTTP
import Hasura.GraphQL.Transport.HTTP.Protocol
import Hasura.GraphQL.Transport.Instances ( )
import Hasura.GraphQL.Transport.WebSocket.Protocol
import Hasura.GraphQL.Transport.WebSocket.Server qualified as WS
import Hasura.GraphQL.Transport.WebSocket.Types
2023-06-13 12:22:36 +03:00
import Hasura.GraphQL.Transport.WebSocket.Types qualified as WS
2021-09-24 01:56:37 +03:00
import Hasura.Logging qualified as L
import Hasura.Metadata.Class
import Hasura.Prelude
2023-03-31 00:18:11 +03:00
import Hasura.QueryTags
2022-11-10 02:30:42 +03:00
import Hasura.RQL.Types.Common ( MetricsConfig ( _mcAnalyzeQueryVariables ) )
2021-10-29 17:42:07 +03:00
import Hasura.RQL.Types.ResultCustomization
2022-11-10 02:30:42 +03:00
import Hasura.RQL.Types.SchemaCache ( scApiLimits , scMetricsConfig )
scaffolding for remote-schemas module
The main aim of the PR is:
1. To set up a module structure for 'remote-schemas' package.
2. Move parts by the remote schema codebase into the new module structure to validate it.
## Notes to the reviewer
Why a PR with large-ish diff?
1. We've been making progress on the MM project but we don't yet know long it is going to take us to get to the first milestone. To understand this better, we need to figure out the unknowns as soon as possible. Hence I've taken a stab at the first two items in the [end-state](https://gist.github.com/0x777/ca2bdc4284d21c3eec153b51dea255c9) document to figure out the unknowns. Unsurprisingly, there are a bunch of issues that we haven't discussed earlier. These are documented in the 'open questions' section.
1. The diff is large but that is only code moved around and I've added a section that documents how things are moved. In addition, there are fair number of PR comments to help with the review process.
## Changes in the PR
### Module structure
Sets up the module structure as follows:
```
Hasura/
RemoteSchema/
Metadata/
Types.hs
SchemaCache/
Types.hs
Permission.hs
RemoteRelationship.hs
Build.hs
MetadataAPI/
Types.hs
Execute.hs
```
### 1. Types representing metadata are moved
Types that capture metadata information (currently scattered across several RQL modules) are moved into `Hasura.RemoteSchema.Metadata.Types`.
- This new module only depends on very 'core' modules such as
`Hasura.Session` for the notion of roles and `Hasura.Incremental` for `Cacheable` typeclass.
- The requirement on database modules is avoided by generalizing the remote schemas metadata to accept an arbitrary 'r' for a remote relationship
definition.
### 2. SchemaCache related types and build logic have been moved
Types that represent remote schemas information in SchemaCache are moved into `Hasura.RemoteSchema.SchemaCache.Types`.
Similar to `H.RS.Metadata.Types`, this module depends on 'core' modules except for `Hasura.GraphQL.Parser.Variable`. It has something to do with remote relationships but I haven't spent time looking into it. The validation of 'remote relationships to remote schema' is also something that needs to be looked at.
Rips out the logic that builds remote schema's SchemaCache information from the monolithic `buildSchemaCacheRule` and moves it into `Hasura.RemoteSchema.SchemaCache.Build`. Further, the `.SchemaCache.Permission` and `.SchemaCache.RemoteRelationship` have been created from existing modules that capture schema cache building logic for those two components.
This was a fair amount of work. On main, currently remote schema's SchemaCache information is built in two phases - in the first phase, 'permissions' and 'remote relationships' are ignored and in the second phase they are filled in.
While remote relationships can only be resolved after partially resolving sources and other remote schemas, the same isn't true for permissions. Further, most of the work that is done to resolve remote relationships can be moved to the first phase so that the second phase can be a very simple traversal.
This is the approach that was taken - resolve permissions and as much as remote relationships information in the first phase.
### 3. Metadata APIs related types and build logic have been moved
The types that represent remote schema related metadata APIs and the execution logic have been moved to `Hasura.RemoteSchema.MetadataAPI.Types` and `.Execute` modules respectively.
## Open questions:
1. `Hasura.RemoteSchema.Metadata.Types` is so called because I was hoping that all of the metadata related APIs of remote schema can be brought in at `Hasura.RemoteSchema.Metadata.API`. However, as metadata APIs depended on functions from `SchemaCache` module (see [1](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L55) and [2](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L91), it made more sense to create a separate top-level module for `MetadataAPI`s.
Maybe we can just have `Hasura.RemoteSchema.Metadata` and get rid of the extra nesting or have `Hasura.RemoteSchema.Metadata.{Core,Permission,RemoteRelationship}` if we want to break them down further.
1. `buildRemoteSchemas` in `H.RS.SchemaCache.Build` has the following type:
```haskell
buildRemoteSchemas ::
( ArrowChoice arr,
Inc.ArrowDistribute arr,
ArrowWriter (Seq CollectedInfo) arr,
Inc.ArrowCache m arr,
MonadIO m,
HasHttpManagerM m,
Inc.Cacheable remoteRelationshipDefinition,
ToJSON remoteRelationshipDefinition,
MonadError QErr m
) =>
Env.Environment ->
( (Inc.Dependency (HashMap RemoteSchemaName Inc.InvalidationKey), OrderedRoles),
[RemoteSchemaMetadataG remoteRelationshipDefinition]
)
`arr` HashMap RemoteSchemaName (PartiallyResolvedRemoteSchemaCtxG remoteRelationshipDefinition, MetadataObject)
```
Note the dependence on `CollectedInfo` which is defined as
```haskell
data CollectedInfo
= CIInconsistency InconsistentMetadata
| CIDependency
MetadataObject
-- ^ for error reporting on missing dependencies
SchemaObjId
SchemaDependency
deriving (Eq)
```
this pretty much means that remote schemas is dependent on types from databases, actions, ....
How do we fix this? Maybe introduce a typeclass such as `ArrowCollectRemoteSchemaDependencies` which is defined in `Hasura.RemoteSchema` and then implemented in graphql-engine?
1. The dependency on `buildSchemaCacheFor` in `.MetadataAPI.Execute` which has the following signature:
```haskell
buildSchemaCacheFor ::
(QErrM m, CacheRWM m, MetadataM m) =>
MetadataObjId ->
MetadataModifier ->
```
This can be easily resolved if we restrict what the metadata APIs are allowed to do. Currently, they operate in an unfettered access to modify SchemaCache (the `CacheRWM` constraint):
```haskell
runAddRemoteSchema ::
( QErrM m,
CacheRWM m,
MonadIO m,
HasHttpManagerM m,
MetadataM m,
Tracing.MonadTrace m
) =>
Env.Environment ->
AddRemoteSchemaQuery ->
m EncJSON
```
This should instead be changed to restrict remote schema APIs to only modify remote schema metadata (but has access to the remote schemas part of the schema cache), this dependency is completely removed.
```haskell
runAddRemoteSchema ::
( QErrM m,
MonadIO m,
HasHttpManagerM m,
MonadReader RemoteSchemasSchemaCache m,
MonadState RemoteSchemaMetadata m,
Tracing.MonadTrace m
) =>
Env.Environment ->
AddRemoteSchemaQuery ->
m RemoteSchemeMetadataObjId
```
The idea is that the core graphql-engine would call these functions and then call
`buildSchemaCacheFor`.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6291
GitOrigin-RevId: 51357148c6404afe70219afa71bd1d59bdf4ffc6
2022-10-21 06:13:07 +03:00
import Hasura.RemoteSchema.SchemaCache
2021-09-24 01:56:37 +03:00
import Hasura.SQL.AnyBackend qualified as AB
2023-03-17 13:29:07 +03:00
import Hasura.Server.AppStateRef
2021-09-24 01:56:37 +03:00
import Hasura.Server.Auth
( AuthMode ,
UserAuthentication ,
resolveUserInfo ,
)
import Hasura.Server.Cors
import Hasura.Server.Init.Config ( KeepAliveDelay ( .. ) )
2021-09-29 19:20:06 +03:00
import Hasura.Server.Limits
( HasResourceLimits ( .. ) ,
ResourceLimits ( .. ) ,
)
2021-09-24 01:56:37 +03:00
import Hasura.Server.Metrics ( ServerMetrics ( .. ) )
2022-07-24 00:18:01 +03:00
import Hasura.Server.Prometheus
( GraphQLRequestMetrics ( .. ) ,
PrometheusMetrics ( .. ) ,
)
2021-09-24 01:56:37 +03:00
import Hasura.Server.Telemetry.Counters qualified as Telem
2023-06-25 16:46:35 +03:00
import Hasura.Server.Types ( GranularPrometheusMetricsState ( .. ) , MonadGetPolicies ( .. ) , RequestId , getInputValidationSetting , getRequestId )
harmonize network manager handling
## Description
### I want to speak to the `Manager`
Oh boy. This PR is both fairly straightforward and overreaching, so let's break it down.
For most network access, we need a [`HTTP.Manager`](https://hackage.haskell.org/package/http-client-0.1.0.0/docs/Network-HTTP-Client-Manager.html). It is created only once, at the top level, when starting the engine, and is then threaded through the application to wherever we need to make a network call. As of main, the way we do this is not standardized: most of the GraphQL execution code passes it "manually" as a function argument throughout the code. We also have a custom monad constraint, `HasHttpManagerM`, that describes a monad's ability to provide a manager. And, finally, several parts of the code store the manager in some kind of argument structure, such as `RunT`'s `RunCtx`.
This PR's first goal is to harmonize all of this: we always create the manager at the root, and we already have it when we do our very first `runReaderT`. Wouldn't it make sense for the rest of the code to not manually pass it anywhere, to not store it anywhere, but to always rely on the current monad providing it? This is, in short, what this PR does: it implements a constraint on the base monads, so that they provide the manager, and removes most explicit passing from the code.
### First come, first served
One way this PR goes a tiny bit further than "just" doing the aforementioned harmonization is that it starts the process of implementing the "Services oriented architecture" roughly outlined in this [draft document](https://docs.google.com/document/d/1FAigqrST0juU1WcT4HIxJxe1iEBwTuBZodTaeUvsKqQ/edit?usp=sharing). Instead of using the existing `HasHTTPManagerM`, this PR revamps it into the `ProvidesNetwork` service.
The idea is, again, that we should make all "external" dependencies of the engine, all things that the core of the engine doesn't care about, a "service". This allows us to define clear APIs for features, to choose different implementations based on which version of the engine we're running, harmonizes our many scattered monadic constraints... Which is why this service is called "Network": we can refine it, moving forward, to be the constraint that defines how all network communication is to operate, instead of relying on disparate classes constraint or hardcoded decisions. A comment in the code clarifies this intent.
### Side-effects? In my Haskell?
This PR also unavoidably touches some other aspects of the codebase. One such example: it introduces `Hasura.App.AppContext`, named after `HasuraPro.Context.AppContext`: a name for the reader structure at the base level. It also transforms `Handler` from a type alias to a newtype, as `Handler` is where we actually enforce HTTP limits; but without `Handler` being a distinct type, any code path could simply do a `runExceptT $ runReader` and forget to enforce them.
(As a rule of thumb, i am starting to consider any straggling `runReaderT` or `runExceptT` as a code smell: we should not stack / unstack monads haphazardly, and every layer should be an opaque `newtype` with a corresponding run function.)
## Further work
In several places, i have left TODOs when i have encountered things that suggest that we should do further unrelated cleanups. I'll write down the follow-up steps, either in the aforementioned document or on slack. But, in short, at a glance, in approximate order, we could:
- delete `ExecutionCtx` as it is only a subset of `ServerCtx`, and remove one more `runReaderT` call
- delete `ServerConfigCtx` as it is only a subset of `ServerCtx`, and remove it from `RunCtx`
- remove `ServerCtx` from `HandlerCtx`, and make it part of `AppContext`, or even make it the `AppContext` altogether (since, at least for the OSS version, `AppContext` is there again only a subset)
- remove `CacheBuildParams` and `CacheBuild` altogether, as they're just a distinct stack that is a `ReaderT` on top of `IO` that contains, you guessed it, the same thing as `ServerCtx`
- move `RunT` out of `RQL.Types` and rename it, since after the previous cleanups **it only contains `UserInfo`**; it could be bundled with the authentication service, made a small implementation detail in `Hasura.Server.Auth`
- rename `PGMetadaStorageT` to something a bit more accurate, such as `App`, and enforce its IO base
This would significantly simply our complex stack. From there, or in parallel, we can start moving existing dependencies as Services. For the purpose of supporting read replicas entitlement, we could move `MonadResolveSource` to a `SourceResolver` service, as attempted in #7653, and transform `UserAuthenticationM` into a `Authentication` service.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7736
GitOrigin-RevId: 68cce710eb9e7d752bda1ba0c49541d24df8209f
2023-02-22 18:53:52 +03:00
import Hasura.Services.Network
2021-09-24 01:56:37 +03:00
import Hasura.Session
import Hasura.Tracing qualified as Tracing
2021-10-29 17:42:07 +03:00
import Language.GraphQL.Draft.Syntax ( Name ( .. ) )
2022-07-24 00:18:01 +03:00
import Language.GraphQL.Draft.Syntax qualified as G
2021-09-24 01:56:37 +03:00
import ListT qualified
2022-02-16 10:08:51 +03:00
import Network.HTTP.Client qualified as HTTP
import Network.HTTP.Types qualified as HTTP
2021-09-24 01:56:37 +03:00
import Network.WebSockets qualified as WS
2022-09-21 21:01:48 +03:00
import Refined ( unrefine )
2021-09-24 01:56:37 +03:00
import StmContainers.Map qualified as STMMap
2022-07-24 00:18:01 +03:00
import System.Metrics.Prometheus.Counter qualified as Prometheus . Counter
import System.Metrics.Prometheus.Histogram qualified as Prometheus . Histogram
2019-07-11 08:37:06 +03:00
2022-04-22 22:53:12 +03:00
-- | 'ES.SubscriberDetails' comes from 'Hasura.GraphQL.Execute.LiveQuery.State.addLiveQuery'. We use
2020-03-05 20:59:26 +03:00
-- this to track a connection's operations so we can remove them from 'LiveQueryState', and
-- log.
--
2020-03-20 09:46:45 +03:00
-- NOTE!: This must be kept consistent with the global 'LiveQueryState', in 'onClose'
2020-03-05 20:59:26 +03:00
-- and 'onStart'.
2018-07-20 10:22:46 +03:00
data OpDetail
2018-10-16 14:49:24 +03:00
= ODStarted
| ODProtoErr ! Text
| ODQueryErr ! QErr
| ODCompleted
| ODStopped
2022-07-01 14:47:20 +03:00
deriving ( Eq )
2021-09-24 01:56:37 +03:00
$ ( J . deriveToJSON
J . defaultOptions
{ J . constructorTagModifier = J . snakeCase . drop 2 ,
J . sumEncoding = J . TaggedObject " type " " detail "
}
''OpDetail
)
data OperationDetails = OperationDetails
{ _odOperationId :: ! OperationId ,
_odRequestId :: ! ( Maybe RequestId ) ,
_odOperationName :: ! ( Maybe OperationName ) ,
_odOperationType :: ! OpDetail ,
_odQuery :: ! ( Maybe GQLReqUnparsed ) ,
_odParameterizedQueryHash :: ! ( Maybe ParameterizedQueryHash )
}
2022-07-01 14:47:20 +03:00
deriving ( Eq )
2021-09-24 01:56:37 +03:00
2021-01-19 22:14:42 +03:00
$ ( J . deriveToJSON hasuraJSON ''OperationDetails )
2019-07-11 08:37:06 +03:00
2018-07-20 10:22:46 +03:00
data WSEvent
= EAccepted
| ERejected ! QErr
2018-10-16 14:49:24 +03:00
| EConnErr ! ConnErrMsg
2019-07-11 08:37:06 +03:00
| EOperation ! OperationDetails
2018-07-20 10:22:46 +03:00
| EClosed
2022-07-01 14:47:20 +03:00
deriving ( Eq )
2021-09-24 01:56:37 +03:00
$ ( J . deriveToJSON
J . defaultOptions
{ J . constructorTagModifier = J . snakeCase . drop 1 ,
J . sumEncoding = J . TaggedObject " type " " detail "
}
''WSEvent
)
data WsConnInfo = WsConnInfo
{ _wsciWebsocketId :: ! WS . WSId ,
_wsciTokenExpiry :: ! ( Maybe TC . UTCTime ) ,
_wsciMsg :: ! ( Maybe Text )
}
2022-07-01 14:47:20 +03:00
deriving ( Eq )
2021-09-24 01:56:37 +03:00
2021-01-19 22:14:42 +03:00
$ ( J . deriveToJSON hasuraJSON ''WsConnInfo )
2018-07-20 10:22:46 +03:00
2021-09-24 01:56:37 +03:00
data WSLogInfo = WSLogInfo
{ _wsliUserVars :: ! ( Maybe SessionVariables ) ,
_wsliConnectionInfo :: ! WsConnInfo ,
_wsliEvent :: ! WSEvent
}
2022-07-01 14:47:20 +03:00
deriving ( Eq )
2021-09-24 01:56:37 +03:00
2021-01-19 22:14:42 +03:00
$ ( J . deriveToJSON hasuraJSON ''WSLogInfo )
2019-07-11 08:37:06 +03:00
2021-09-24 01:56:37 +03:00
data WSLog = WSLog
{ _wslLogLevel :: ! L . LogLevel ,
_wslInfo :: ! WSLogInfo
2019-07-11 08:37:06 +03:00
}
2019-11-26 15:14:21 +03:00
instance L . ToEngineLog WSLog L . Hasura where
2019-07-11 08:37:06 +03:00
toEngineLog ( WSLog logLevel wsLog ) =
( logLevel , L . ELTWebsocketLog , J . toJSON wsLog )
2020-04-24 12:10:53 +03:00
mkWsInfoLog :: Maybe SessionVariables -> WsConnInfo -> WSEvent -> WSLog
2019-07-11 08:37:06 +03:00
mkWsInfoLog uv ci ev =
WSLog L . LevelInfo $ WSLogInfo uv ci ev
2020-04-24 12:10:53 +03:00
mkWsErrorLog :: Maybe SessionVariables -> WsConnInfo -> WSEvent -> WSLog
2019-07-11 08:37:06 +03:00
mkWsErrorLog uv ci ev =
WSLog L . LevelError $ WSLogInfo uv ci ev
2018-07-20 10:22:46 +03:00
2021-09-24 01:56:37 +03:00
logWSEvent ::
( MonadIO m ) =>
L . Logger L . Hasura ->
WSConn ->
WSEvent ->
m ()
2021-08-24 19:25:12 +03:00
logWSEvent ( L . Logger logger ) wsConn wsEv = do
userInfoME <- liftIO $ STM . readTVarIO userInfoR
let ( userVarsM , tokenExpM ) = case userInfoME of
2021-09-24 01:56:37 +03:00
CSInitialised WsClientState { .. } ->
( Just $ _uiSession wscsUserInfo ,
wscsTokenExpTime
)
_ -> ( Nothing , Nothing )
2021-08-24 19:25:12 +03:00
liftIO $ logger $ WSLog logLevel $ WSLogInfo userVarsM ( WsConnInfo wsId tokenExpM Nothing ) wsEv
where
WSConnData userInfoR _ _ _ = WS . getData wsConn
wsId = WS . getWSId wsConn
logLevel = bool L . LevelInfo L . LevelError isError
isError = case wsEv of
2021-09-24 01:56:37 +03:00
EAccepted -> False
2021-08-24 19:25:12 +03:00
ERejected _ -> True
2021-09-24 01:56:37 +03:00
EConnErr _ -> True
EClosed -> False
2021-08-24 19:25:12 +03:00
EOperation operation -> case _odOperationType operation of
2021-09-24 01:56:37 +03:00
ODStarted -> False
2021-08-24 19:25:12 +03:00
ODProtoErr _ -> True
ODQueryErr _ -> True
2021-09-24 01:56:37 +03:00
ODCompleted -> False
ODStopped -> False
2021-08-24 19:25:12 +03:00
2023-05-24 16:51:56 +03:00
sendMsg :: ( MonadIO m ) => WSConn -> ServerMsg -> m ()
2023-04-25 23:28:03 +03:00
sendMsg wsConn msg = liftIO do
timer <- startTimer
WS . sendMsg wsConn $ WS . WSQueueResponse ( encodeServerMsg msg ) Nothing timer
2021-08-24 19:25:12 +03:00
2021-11-04 15:38:57 +03:00
-- sendCloseWithMsg closes the websocket server with an error code that can be supplied as (Maybe Word16),
-- if there is `Nothing`, the server will be closed with an error code derived from ServerErrorCode
2021-09-24 01:56:37 +03:00
sendCloseWithMsg ::
( MonadIO m ) =>
L . Logger L . Hasura ->
WSConn ->
ServerErrorCode ->
Maybe ServerMsg ->
2021-11-04 15:38:57 +03:00
Maybe Word16 ->
2021-09-24 01:56:37 +03:00
m ()
2021-11-04 15:38:57 +03:00
sendCloseWithMsg logger wsConn errCode mErrServerMsg mCode = do
2022-10-13 12:32:33 +03:00
case mErrServerMsg of
Just errServerMsg -> do
logWSEvent logger wsConn EClosed
liftIO $ WS . sendMsgAndCloseConn wsConn errCloseCode errMsg errServerMsg
Nothing -> do
logWSEvent logger wsConn EClosed
liftIO $ WS . sendCloseCode wsc errCloseCode errMsg
2021-08-24 19:25:12 +03:00
where
2021-09-24 01:56:37 +03:00
wsc = WS . getRawWebSocketConnection wsConn
2021-08-24 19:25:12 +03:00
errMsg = encodeServerErrorMsg errCode
2021-11-04 15:38:57 +03:00
errCloseCode = fromMaybe ( getErrCode errCode ) mCode
getErrCode :: ServerErrorCode -> Word16
getErrCode err = case err of
ProtocolError1002 -> 1002
GenericError4400 _ -> 4400
Unauthorized4401 -> 4401
Forbidden4403 -> 4403
ConnectionInitTimeout4408 -> 4408
NonUniqueSubscription4409 _ -> 4409
TooManyRequests4429 -> 4429
2021-08-24 19:25:12 +03:00
2021-09-24 01:56:37 +03:00
sendMsgWithMetadata ::
( MonadIO m ) =>
WSConn ->
ServerMsg ->
Maybe OperationName ->
Maybe ParameterizedQueryHash ->
2022-03-21 13:39:49 +03:00
ES . SubscriptionMetadata ->
2021-09-24 01:56:37 +03:00
m ()
2022-03-21 13:39:49 +03:00
sendMsgWithMetadata wsConn msg opName paramQueryHash ( ES . SubscriptionMetadata execTime ) =
2023-04-25 23:28:03 +03:00
liftIO do
timer <- startTimer
WS . sendMsg wsConn $ WS . WSQueueResponse bs wsInfo timer
2021-08-24 19:25:12 +03:00
where
bs = encodeServerMsg msg
( msgType , operationId ) = case msg of
( SMNext ( DataMsg opId _ ) ) -> ( Just SMT_GQL_NEXT , Just opId )
( SMData ( DataMsg opId _ ) ) -> ( Just SMT_GQL_DATA , Just opId )
2021-09-24 01:56:37 +03:00
_ -> ( Nothing , Nothing )
wsInfo =
2023-05-24 16:51:56 +03:00
Just
$! WS . WSEventInfo
2021-09-24 01:56:37 +03:00
{ WS . _wseiEventType = msgType ,
WS . _wseiOperationId = operationId ,
WS . _wseiOperationName = opName ,
WS . _wseiQueryExecutionTime = Just $! realToFrac execTime ,
WS . _wseiResponseSize = Just $! LBS . length bs ,
WS . _wseiParameterizedQueryHash = paramQueryHash
}
onConn ::
2023-03-17 13:29:07 +03:00
( MonadIO m , MonadReader ( WSServerEnv impl ) m ) =>
2021-09-24 01:56:37 +03:00
WS . OnConnH m WSConnData
2021-08-24 19:25:12 +03:00
onConn wsId requestHead ipAddress onConnHActions = do
2019-03-04 10:46:53 +03:00
res <- runExceptT $ do
2020-06-08 15:13:01 +03:00
( errType , queryType ) <- checkPath
2019-03-04 10:46:53 +03:00
let reqHdrs = WS . requestHeaders requestHead
headers <- maybe ( return reqHdrs ) ( flip enforceCors reqHdrs . snd ) getOrigin
2020-06-08 15:13:01 +03:00
return ( WsHeaders $ filterWsHeaders headers , errType , queryType )
either reject accept res
2019-03-04 10:46:53 +03:00
where
2021-08-24 19:25:12 +03:00
kaAction = WS . _wsaKeepAliveAction onConnHActions
acceptRequest = WS . _wsaAcceptRequest onConnHActions
-- NOTE: the "Keep-Alive" delay is something that's mentioned
-- in the Apollo spec. For 'graphql-ws', we're using the Ping
-- messages that are part of the spec.
2022-04-22 22:53:12 +03:00
keepAliveAction keepAliveDelay wsConn =
2023-05-24 16:51:56 +03:00
liftIO
$ forever
$ do
2021-09-24 01:56:37 +03:00
kaAction wsConn
2022-09-21 21:01:48 +03:00
sleep $ seconds ( unrefine $ unKeepAliveDelay keepAliveDelay )
2018-07-20 10:22:46 +03:00
2020-04-03 03:00:13 +03:00
tokenExpiryHandler wsConn = do
2023-05-24 16:51:56 +03:00
expTime <- liftIO
$ STM . atomically
$ do
2021-09-24 01:56:37 +03:00
connState <- STM . readTVar $ ( _wscUser . WS . getData ) wsConn
case connState of
CSNotInitialised _ _ -> STM . retry
CSInitError _ -> STM . retry
CSInitialised clientState -> onNothing ( wscsTokenExpTime clientState ) STM . retry
2019-05-14 09:24:46 +03:00
currTime <- TC . getCurrentTime
2020-05-13 15:33:16 +03:00
sleep $ convertDuration $ TC . diffUTCTime expTime currTime
2019-05-14 09:24:46 +03:00
2020-06-08 15:13:01 +03:00
accept ( hdrs , errType , queryType ) = do
2020-11-12 12:25:48 +03:00
( L . Logger logger ) <- asks _wseLogger
2021-09-24 01:56:37 +03:00
keepAliveDelay <- asks _wseKeepAliveDelay
2019-07-11 08:37:06 +03:00
logger $ mkWsInfoLog Nothing ( WsConnInfo wsId Nothing Nothing ) EAccepted
2021-09-24 01:56:37 +03:00
connData <-
2023-05-24 16:51:56 +03:00
liftIO
$ WSConnData
<$> STM . newTVarIO ( CSNotInitialised hdrs ipAddress )
<*> STMMap . newIO
<*> pure errType
<*> pure queryType
pure
$ Right
$ WS . AcceptWith
connData
acceptRequest
( keepAliveAction keepAliveDelay )
tokenExpiryHandler
2021-08-24 19:25:12 +03:00
2018-07-20 10:22:46 +03:00
reject qErr = do
2020-11-12 12:25:48 +03:00
( L . Logger logger ) <- asks _wseLogger
2019-07-11 08:37:06 +03:00
logger $ mkWsErrorLog Nothing ( WsConnInfo wsId Nothing Nothing ) ( ERejected qErr )
2023-05-24 16:51:56 +03:00
return
$ Left
$ WS . RejectRequest
( HTTP . statusCode $ qeStatus qErr )
( HTTP . statusMessage $ qeStatus qErr )
[]
2023-06-02 08:28:17 +03:00
( LBS . toStrict $ J . encodingToLazyByteString $ encodeGQLErr False qErr )
2018-07-20 10:22:46 +03:00
2019-05-10 09:05:11 +03:00
checkPath = case WS . requestPath requestHead of
2020-06-08 15:13:01 +03:00
" /v1alpha1/graphql " -> return ( ERTLegacy , E . QueryHasura )
2021-09-24 01:56:37 +03:00
" /v1/graphql " -> return ( ERTGraphqlCompliant , E . QueryHasura )
" /v1beta1/relay " -> return ( ERTGraphqlCompliant , E . QueryRelay )
_ ->
2020-07-03 09:30:35 +03:00
throw404 " only '/v1/graphql', '/v1alpha1/graphql' and '/v1beta1/relay' are supported on websockets "
2018-07-20 10:22:46 +03:00
2019-03-04 10:46:53 +03:00
getOrigin =
find ( ( == ) " Origin " . fst ) ( WS . requestHeaders requestHead )
2020-11-12 12:25:48 +03:00
enforceCors origin reqHdrs = do
( L . Logger logger ) <- asks _wseLogger
2023-03-30 19:31:50 +03:00
corsPolicy <- liftIO =<< asks _wseCorsPolicy
2020-11-12 12:25:48 +03:00
case cpConfig corsPolicy of
CCAllowAll -> return reqHdrs
CCDisabled readCookie ->
if readCookie
2021-09-24 01:56:37 +03:00
then return reqHdrs
else do
lift $ logger $ mkWsInfoLog Nothing ( WsConnInfo wsId Nothing ( Just corsNote ) ) EAccepted
return $ filter ( \ h -> fst h /= " Cookie " ) reqHdrs
2020-11-12 12:25:48 +03:00
CCAllowedOrigins ds
-- if the origin is in our cors domains, no error
2021-09-24 01:56:37 +03:00
| bsToTxt origin ` elem ` dmFqdns ds -> return reqHdrs
2020-11-12 12:25:48 +03:00
-- if current origin is part of wildcard domain list, no error
| inWildcardList ds ( bsToTxt origin ) -> return reqHdrs
-- otherwise error
2021-09-24 01:56:37 +03:00
| otherwise -> corsErr
2019-03-04 10:46:53 +03:00
filterWsHeaders hdrs = flip filter hdrs $ \ ( n , _ ) ->
2021-09-24 01:56:37 +03:00
n
` notElem ` [ " sec-websocket-key " ,
" sec-websocket-version " ,
" upgrade " ,
" connection "
2019-03-04 10:46:53 +03:00
]
2021-09-24 01:56:37 +03:00
corsErr =
throw400
AccessDenied
" received origin header does not match configured CORS domains "
corsNote =
" Cookie is not read when CORS is disabled, because it is a potential "
<> " security issue. If you're already handling CORS before Hasura and enforcing "
<> " CORS on websocket connections, then you can use the flag --ws-read-cookie or "
<> " HASURA_GRAPHQL_WS_READ_COOKIE to force read cookie when CORS is disabled. "
2022-11-29 13:04:51 +03:00
-- Helper for avoiding boolean blindness
data ShouldCaptureQueryVariables
= CaptureQueryVariables
| DoNotCaptureQueryVariables
2021-09-24 01:56:37 +03:00
onStart ::
2023-03-17 13:29:07 +03:00
forall m impl .
2021-10-13 19:38:56 +03:00
( MonadIO m ,
2021-09-24 01:56:37 +03:00
E . MonadGQLExecutionCheck m ,
MonadQueryLog m ,
2023-03-15 16:05:17 +03:00
MonadExecutionLog m ,
2021-09-24 01:56:37 +03:00
Tracing . MonadTrace m ,
MonadExecuteQuery m ,
MC . MonadBaseControl IO m ,
2023-02-03 04:03:23 +03:00
MonadMetadataStorage m ,
2023-03-31 00:18:11 +03:00
MonadQueryTags m ,
harmonize network manager handling
## Description
### I want to speak to the `Manager`
Oh boy. This PR is both fairly straightforward and overreaching, so let's break it down.
For most network access, we need a [`HTTP.Manager`](https://hackage.haskell.org/package/http-client-0.1.0.0/docs/Network-HTTP-Client-Manager.html). It is created only once, at the top level, when starting the engine, and is then threaded through the application to wherever we need to make a network call. As of main, the way we do this is not standardized: most of the GraphQL execution code passes it "manually" as a function argument throughout the code. We also have a custom monad constraint, `HasHttpManagerM`, that describes a monad's ability to provide a manager. And, finally, several parts of the code store the manager in some kind of argument structure, such as `RunT`'s `RunCtx`.
This PR's first goal is to harmonize all of this: we always create the manager at the root, and we already have it when we do our very first `runReaderT`. Wouldn't it make sense for the rest of the code to not manually pass it anywhere, to not store it anywhere, but to always rely on the current monad providing it? This is, in short, what this PR does: it implements a constraint on the base monads, so that they provide the manager, and removes most explicit passing from the code.
### First come, first served
One way this PR goes a tiny bit further than "just" doing the aforementioned harmonization is that it starts the process of implementing the "Services oriented architecture" roughly outlined in this [draft document](https://docs.google.com/document/d/1FAigqrST0juU1WcT4HIxJxe1iEBwTuBZodTaeUvsKqQ/edit?usp=sharing). Instead of using the existing `HasHTTPManagerM`, this PR revamps it into the `ProvidesNetwork` service.
The idea is, again, that we should make all "external" dependencies of the engine, all things that the core of the engine doesn't care about, a "service". This allows us to define clear APIs for features, to choose different implementations based on which version of the engine we're running, harmonizes our many scattered monadic constraints... Which is why this service is called "Network": we can refine it, moving forward, to be the constraint that defines how all network communication is to operate, instead of relying on disparate classes constraint or hardcoded decisions. A comment in the code clarifies this intent.
### Side-effects? In my Haskell?
This PR also unavoidably touches some other aspects of the codebase. One such example: it introduces `Hasura.App.AppContext`, named after `HasuraPro.Context.AppContext`: a name for the reader structure at the base level. It also transforms `Handler` from a type alias to a newtype, as `Handler` is where we actually enforce HTTP limits; but without `Handler` being a distinct type, any code path could simply do a `runExceptT $ runReader` and forget to enforce them.
(As a rule of thumb, i am starting to consider any straggling `runReaderT` or `runExceptT` as a code smell: we should not stack / unstack monads haphazardly, and every layer should be an opaque `newtype` with a corresponding run function.)
## Further work
In several places, i have left TODOs when i have encountered things that suggest that we should do further unrelated cleanups. I'll write down the follow-up steps, either in the aforementioned document or on slack. But, in short, at a glance, in approximate order, we could:
- delete `ExecutionCtx` as it is only a subset of `ServerCtx`, and remove one more `runReaderT` call
- delete `ServerConfigCtx` as it is only a subset of `ServerCtx`, and remove it from `RunCtx`
- remove `ServerCtx` from `HandlerCtx`, and make it part of `AppContext`, or even make it the `AppContext` altogether (since, at least for the OSS version, `AppContext` is there again only a subset)
- remove `CacheBuildParams` and `CacheBuild` altogether, as they're just a distinct stack that is a `ReaderT` on top of `IO` that contains, you guessed it, the same thing as `ServerCtx`
- move `RunT` out of `RQL.Types` and rename it, since after the previous cleanups **it only contains `UserInfo`**; it could be bundled with the authentication service, made a small implementation detail in `Hasura.Server.Auth`
- rename `PGMetadaStorageT` to something a bit more accurate, such as `App`, and enforce its IO base
This would significantly simply our complex stack. From there, or in parallel, we can start moving existing dependencies as Services. For the purpose of supporting read replicas entitlement, we could move `MonadResolveSource` to a `SourceResolver` service, as attempted in #7653, and transform `UserAuthenticationM` into a `Authentication` service.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7736
GitOrigin-RevId: 68cce710eb9e7d752bda1ba0c49541d24df8209f
2023-02-22 18:53:52 +03:00
HasResourceLimits m ,
2023-05-18 15:55:53 +03:00
ProvidesNetwork m ,
MonadGetPolicies m
2021-09-24 01:56:37 +03:00
) =>
HashSet ( L . EngineLogType L . Hasura ) ->
2023-04-05 11:57:19 +03:00
Maybe ( CredentialCache AgentLicenseKey ) ->
2023-03-17 13:29:07 +03:00
WSServerEnv impl ->
2021-09-24 01:56:37 +03:00
WSConn ->
2022-11-29 13:04:51 +03:00
ShouldCaptureQueryVariables ->
2021-09-24 01:56:37 +03:00
StartMsg ->
WS . WSActions WSConnData ->
m ()
2023-04-05 11:57:19 +03:00
onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables ( StartMsg opId q ) onMessageActions = catchAndIgnore $ do
2020-01-16 04:56:57 +03:00
timerTot <- startTimer
2021-09-06 15:26:45 +03:00
op <- liftIO $ STM . atomically $ STMMap . lookup opId opMap
let opName = _grOperationName q
2018-07-20 10:22:46 +03:00
2020-03-05 20:59:26 +03:00
-- NOTE: it should be safe to rely on this check later on in this function, since we expect that
-- we process all operations on a websocket connection serially:
2023-05-24 16:51:56 +03:00
when ( isJust op )
$ withComplete
$ sendStartErr
$ " an operation already exists with this id: "
<> unOperationId opId
2018-07-20 10:22:46 +03:00
2019-05-14 09:24:46 +03:00
userInfoM <- liftIO $ STM . readTVarIO userInfoR
2021-05-19 17:07:04 +03:00
( userInfo , origReqHdrs , ipAddress ) <- case userInfoM of
2021-09-24 01:56:37 +03:00
CSInitialised WsClientState { .. } -> return ( wscsUserInfo , wscsReqHeaders , wscsIpAddress )
2018-11-23 16:02:46 +03:00
CSInitError initErr -> do
2022-10-01 17:47:12 +03:00
let e = " cannot start as connection_init failed with: " <> initErr
2022-11-29 13:04:51 +03:00
withComplete $ sendStartErr e
2020-06-16 18:23:06 +03:00
CSNotInitialised _ _ -> do
2019-05-10 09:05:11 +03:00
let e = " start received before the connection is initialised "
2022-11-29 13:04:51 +03:00
withComplete $ sendStartErr e
2018-07-20 10:22:46 +03:00
2022-07-24 00:18:01 +03:00
( requestId , reqHdrs ) <- liftIO $ getRequestId origReqHdrs
2023-03-17 13:29:07 +03:00
( sc , scVer ) <- liftIO $ getSchemaCacheWithVersion appStateRef
2022-11-29 13:04:51 +03:00
2022-10-27 18:34:43 +03:00
operationLimit <- askGraphqlOperationLimit requestId userInfo ( scApiLimits sc )
2021-09-29 19:20:06 +03:00
let runLimits ::
ExceptT ( Either GQExecError QErr ) ( ExceptT () m ) a ->
ExceptT ( Either GQExecError QErr ) ( ExceptT () m ) a
2022-10-27 18:34:43 +03:00
runLimits = withErr Right $ runResourceLimits operationLimit
2021-09-29 19:20:06 +03:00
2023-03-30 19:31:50 +03:00
env <- liftIO $ acEnvironment <$> getAppContext appStateRef
sqlGenCtx <- liftIO $ acSQLGenCtx <$> getAppContext appStateRef
enableAL <- liftIO $ acEnableAllowlist <$> getAppContext appStateRef
2023-06-25 16:46:35 +03:00
inputValidationSetting <- liftIO $ ( getInputValidationSetting . acExperimentalFeatures ) <$> getAppContext appStateRef
2023-03-30 19:31:50 +03:00
2023-05-31 08:47:40 +03:00
( reqParsed , queryParts ) <- Tracing . newSpan " Parse GraphQL " $ do
reqParsedE <- lift $ E . checkGQLExecution userInfo ( reqHdrs , ipAddress ) enableAL sc q requestId
reqParsed <- onLeft reqParsedE ( withComplete . preExecErr requestId Nothing )
queryPartsE <- runExceptT $ getSingleOperation reqParsed
queryParts <- onLeft queryPartsE ( withComplete . preExecErr requestId Nothing )
pure ( reqParsed , queryParts )
2022-07-24 00:18:01 +03:00
let gqlOpType = G . _todType queryParts
maybeOperationName = _unOperationName <$> _grOperationName reqParsed
2023-04-26 19:19:58 +03:00
for_ maybeOperationName $ \ nm ->
-- https://opentelemetry.io/docs/reference/specification/trace/semantic_conventions/instrumentation/graphql/
Tracing . attachMetadata [ ( " graphql.operation.name " , unName nm ) ]
2021-09-24 01:56:37 +03:00
execPlanE <-
2023-05-24 16:51:56 +03:00
runExceptT
$ E . getResolvedExecPlan
2021-09-24 01:56:37 +03:00
env
logger
2022-12-28 06:47:42 +03:00
prometheusMetrics
2021-09-24 01:56:37 +03:00
userInfo
sqlGenCtx
2023-06-25 16:46:35 +03:00
inputValidationSetting
2021-12-08 09:26:46 +03:00
readOnlyMode
2021-09-24 01:56:37 +03:00
sc
scVer
queryType
reqHdrs
2022-07-24 00:18:01 +03:00
q
queryParts
maybeOperationName
2021-09-24 01:56:37 +03:00
requestId
2020-01-16 04:56:57 +03:00
2022-11-29 13:04:51 +03:00
( parameterizedQueryHash , execPlan ) <- onLeft execPlanE ( withComplete . preExecErr requestId ( Just gqlOpType ) )
2019-07-11 08:37:06 +03:00
2019-03-25 21:25:25 +03:00
case execPlan of
2023-04-26 19:19:58 +03:00
E . QueryExecutionPlan queryPlan asts dirMap -> do
2023-05-17 12:21:18 +03:00
let cachedDirective = runIdentity <$> DM . lookup cached dirMap
2021-04-13 10:00:43 +03:00
2020-10-07 11:55:39 +03:00
-- We ignore the response headers (containing TTL information) because
-- WebSockets don't support them.
Rewrite `Tracing` to allow for only one `TraceT` in the entire stack.
This PR is on top of #7789.
### Description
This PR entirely rewrites the API of the Tracing library, to make `interpTraceT` a thing of the past. Before this change, we ran traces by sticking a `TraceT` on top of whatever we were doing. This had several major drawbacks:
- we were carrying a bunch of `TraceT` across the codebase, and the entire codebase had to know about it
- we needed to carry a second class constraint around (`HasReporterM`) to be able to run all of those traces
- we kept having to do stack rewriting with `interpTraceT`, which went from inconvenient to horrible
- we had to declare several behavioral instances on `TraceT m`
This PR rewrite all of `Tracing` using a more conventional model: there is ONE `TraceT` at the bottom of the stack, and there is an associated class constraint `MonadTrace`: any part of the code that happens to satisfy `MonadTrace` is able to create new traces. We NEVER have to do stack rewriting, `interpTraceT` is gone, and `TraceT` and `Reporter` become implementation details that 99% of the code is blissfully unaware of: code that needs to do tracing only needs to declare that the monad in which it operates implements `MonadTrace`.
In doing so, this PR revealed **several bugs in the codebase**: places where we were expecting to trace something, but due to the default instance of `HasReporterM IO` we would actually not do anything. This PR also splits the code of `Tracing` in more byte-sized modules, with the goal of potentially moving to `server/lib` down the line.
### Remaining work
This PR is a draft; what's left to do is:
- [x] make Pro compile; i haven't updated `HasuraPro/Main` yet
- [x] document Tracing by writing a note that explains how to use the library, and the meaning of "reporter", "trace" and "span", as well as the pitfalls
- [x] discuss some of the trade-offs in the implementation, which is why i'm opening this PR already despite it not fully building yet
- [x] it depends on #7789 being merged first
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7791
GitOrigin-RevId: cadd32d039134c93ddbf364599a2f4dd988adea8
2023-03-13 20:37:16 +03:00
cachedValue <-
2023-05-17 12:21:18 +03:00
cacheLookup queryPlan asts cachedDirective reqParsed userInfo reqHdrs >>= \ case
Rewrite `Tracing` to allow for only one `TraceT` in the entire stack.
This PR is on top of #7789.
### Description
This PR entirely rewrites the API of the Tracing library, to make `interpTraceT` a thing of the past. Before this change, we ran traces by sticking a `TraceT` on top of whatever we were doing. This had several major drawbacks:
- we were carrying a bunch of `TraceT` across the codebase, and the entire codebase had to know about it
- we needed to carry a second class constraint around (`HasReporterM`) to be able to run all of those traces
- we kept having to do stack rewriting with `interpTraceT`, which went from inconvenient to horrible
- we had to declare several behavioral instances on `TraceT m`
This PR rewrite all of `Tracing` using a more conventional model: there is ONE `TraceT` at the bottom of the stack, and there is an associated class constraint `MonadTrace`: any part of the code that happens to satisfy `MonadTrace` is able to create new traces. We NEVER have to do stack rewriting, `interpTraceT` is gone, and `TraceT` and `Reporter` become implementation details that 99% of the code is blissfully unaware of: code that needs to do tracing only needs to declare that the monad in which it operates implements `MonadTrace`.
In doing so, this PR revealed **several bugs in the codebase**: places where we were expecting to trace something, but due to the default instance of `HasReporterM IO` we would actually not do anything. This PR also splits the code of `Tracing` in more byte-sized modules, with the goal of potentially moving to `server/lib` down the line.
### Remaining work
This PR is a draft; what's left to do is:
- [x] make Pro compile; i haven't updated `HasuraPro/Main` yet
- [x] document Tracing by writing a note that explains how to use the library, and the meaning of "reporter", "trace" and "span", as well as the pitfalls
- [x] discuss some of the trade-offs in the implementation, which is why i'm opening this PR already despite it not fully building yet
- [x] it depends on #7789 being merged first
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7791
GitOrigin-RevId: cadd32d039134c93ddbf364599a2f4dd988adea8
2023-03-13 20:37:16 +03:00
Right ( _responseHeaders , cachedValue ) -> pure cachedValue
Left _err -> throwError ()
2020-10-07 11:55:39 +03:00
case cachedValue of
2023-05-17 12:21:18 +03:00
ResponseCached cachedResponseData -> do
2023-03-15 16:05:17 +03:00
logQueryLog logger $ QueryLog q Nothing requestId QueryLogKindCached
2022-07-24 00:18:01 +03:00
let reportedExecutionTime = 0
liftIO $ recordGQLQuerySuccess reportedExecutionTime gqlOpType
sendSuccResp cachedResponseData opName parameterizedQueryHash $ ES . SubscriptionMetadata reportedExecutionTime
2023-05-17 12:21:18 +03:00
ResponseUncached storeResponseM -> do
2023-05-24 16:51:56 +03:00
conclusion <- runExceptT
$ runLimits
$ forWithKey queryPlan
$ \ fieldName ->
let getResponse = \ case
E . ExecStepDB _headers exists remoteJoins -> doQErr $ do
( telemTimeIO_DT , resp ) <-
AB . dispatchAnyBackend @ BackendTransport
exists
\ ( EB . DBStepInfo _ sourceConfig genSql tx resolvedConnectionTemplate :: EB . DBStepInfo b ) ->
runDBQuery @ b
requestId
q
fieldName
userInfo
logger
agentLicenseKey
sourceConfig
( fmap ( statsToAnyBackend @ b ) tx )
genSql
resolvedConnectionTemplate
finalResponse <-
RJ . processRemoteJoins requestId logger agentLicenseKey env reqHdrs userInfo resp remoteJoins q
pure $ AnnotatedResponsePart telemTimeIO_DT Telem . Local finalResponse []
E . ExecStepRemote rsi resultCustomizer gqlReq remoteJoins -> do
logQueryLog logger $ QueryLog q Nothing requestId QueryLogKindRemoteSchema
runRemoteGQ requestId q fieldName userInfo reqHdrs rsi resultCustomizer gqlReq remoteJoins
E . ExecStepAction actionExecPlan _ remoteJoins -> do
logQueryLog logger $ QueryLog q Nothing requestId QueryLogKindAction
( time , ( resp , _ ) ) <- doQErr $ do
( time , ( resp , hdrs ) ) <- EA . runActionExecution userInfo actionExecPlan
2022-07-25 18:53:25 +03:00
finalResponse <-
2023-04-05 11:57:19 +03:00
RJ . processRemoteJoins requestId logger agentLicenseKey env reqHdrs userInfo resp remoteJoins q
2023-05-24 16:51:56 +03:00
pure ( time , ( finalResponse , hdrs ) )
pure $ AnnotatedResponsePart time Telem . Empty resp []
E . ExecStepRaw json -> do
logQueryLog logger $ QueryLog q Nothing requestId QueryLogKindIntrospection
buildRaw json
E . ExecStepMulti lst -> do
allResponses <- traverse getResponse lst
pure $ AnnotatedResponsePart 0 Telem . Local ( encJFromList ( map arpResponse allResponses ) ) []
in getResponse
2022-11-29 13:04:51 +03:00
sendResultFromFragments Telem . Query timerTot requestId conclusion opName parameterizedQueryHash gqlOpType
2023-05-17 12:21:18 +03:00
case ( storeResponseM , conclusion ) of
( Just ResponseCacher { .. } , Right results ) ->
-- Note: The result of `runStoreResponse` is ignored here since we can't ensure that
2021-08-25 04:52:38 +03:00
-- the WS client will respond correctly to multiple messages.
2023-05-24 16:51:56 +03:00
void
$ runStoreResponse
$ encodeAnnotatedResponseParts results
2023-05-17 12:21:18 +03:00
_ -> pure ()
2021-09-06 15:26:45 +03:00
2022-11-29 13:04:51 +03:00
liftIO $ sendCompleted ( Just requestId ) ( Just parameterizedQueryHash )
2020-10-07 13:23:17 +03:00
E . MutationExecutionPlan mutationPlan -> do
2021-04-01 23:40:31 +03:00
-- See Note [Backwards-compatible transaction optimisation]
case coalescePostgresMutations mutationPlan of
-- we are in the aforementioned case; we circumvent the normal process
2023-01-25 10:12:53 +03:00
Just ( sourceConfig , resolvedConnectionTemplate , pgMutations ) -> do
2021-09-24 01:56:37 +03:00
resp <-
2023-05-24 16:51:56 +03:00
runExceptT
$ runLimits
$ doQErr
$ runPGMutationTransaction requestId q userInfo logger sourceConfig resolvedConnectionTemplate pgMutations
2021-04-01 23:40:31 +03:00
-- we do not construct result fragments since we have only one result
2022-11-29 13:04:51 +03:00
handleResult requestId gqlOpType resp \ ( telemTimeIO_DT , results ) -> do
2021-04-01 23:40:31 +03:00
let telemQueryType = Telem . Query
2021-09-24 01:56:37 +03:00
telemLocality = Telem . Local
telemTimeIO = convertDuration telemTimeIO_DT
2022-07-24 00:18:01 +03:00
totalTime <- timerTot
let telemTimeTot = Seconds totalTime
2023-05-24 16:51:56 +03:00
sendSuccResp ( encodeEncJSONResults results ) opName parameterizedQueryHash
$ ES . SubscriptionMetadata telemTimeIO_DT
2021-04-01 23:40:31 +03:00
-- Telemetry. NOTE: don't time network IO:
2021-09-24 01:56:37 +03:00
Telem . recordTimingMetric Telem . RequestDimensions { .. } Telem . RequestTimings { .. }
2022-07-24 00:18:01 +03:00
liftIO $ recordGQLQuerySuccess totalTime gqlOpType
2021-04-01 23:40:31 +03:00
-- we are not in the transaction case; proceeding normally
Nothing -> do
2023-05-24 16:51:56 +03:00
conclusion <- runExceptT
$ runLimits
$ forWithKey mutationPlan
$ \ fieldName ->
let getResponse = \ case
-- Ignoring response headers since we can't send them over WebSocket
E . ExecStepDB _responseHeaders exists remoteJoins -> doQErr $ do
( telemTimeIO_DT , resp ) <-
AB . dispatchAnyBackend @ BackendTransport
exists
\ ( EB . DBStepInfo _ sourceConfig genSql tx resolvedConnectionTemplate :: EB . DBStepInfo b ) ->
runDBMutation @ b
requestId
q
fieldName
userInfo
logger
agentLicenseKey
sourceConfig
( fmap EB . arResult tx )
genSql
resolvedConnectionTemplate
finalResponse <-
RJ . processRemoteJoins requestId logger agentLicenseKey env reqHdrs userInfo resp remoteJoins q
pure $ AnnotatedResponsePart telemTimeIO_DT Telem . Local finalResponse []
E . ExecStepAction actionExecPlan _ remoteJoins -> do
logQueryLog logger $ QueryLog q Nothing requestId QueryLogKindAction
( time , ( resp , hdrs ) ) <- doQErr $ do
( time , ( resp , hdrs ) ) <- EA . runActionExecution userInfo actionExecPlan
2022-07-25 18:53:25 +03:00
finalResponse <-
2023-04-05 11:57:19 +03:00
RJ . processRemoteJoins requestId logger agentLicenseKey env reqHdrs userInfo resp remoteJoins q
2023-05-24 16:51:56 +03:00
pure ( time , ( finalResponse , hdrs ) )
pure $ AnnotatedResponsePart time Telem . Empty resp $ fromMaybe [] hdrs
E . ExecStepRemote rsi resultCustomizer gqlReq remoteJoins -> do
logQueryLog logger $ QueryLog q Nothing requestId QueryLogKindRemoteSchema
runRemoteGQ requestId q fieldName userInfo reqHdrs rsi resultCustomizer gqlReq remoteJoins
E . ExecStepRaw json -> do
logQueryLog logger $ QueryLog q Nothing requestId QueryLogKindIntrospection
buildRaw json
E . ExecStepMulti lst -> do
allResponses <- traverse getResponse lst
pure $ AnnotatedResponsePart 0 Telem . Local ( encJFromList ( map arpResponse allResponses ) ) []
in getResponse
2022-11-29 13:04:51 +03:00
sendResultFromFragments Telem . Query timerTot requestId conclusion opName parameterizedQueryHash gqlOpType
liftIO $ sendCompleted ( Just requestId ) ( Just parameterizedQueryHash )
2021-03-31 13:39:01 +03:00
E . SubscriptionExecutionPlan subExec -> do
case subExec of
2021-04-28 20:38:05 +03:00
E . SEAsyncActionsWithNoRelationships actions -> do
2023-03-15 16:05:17 +03:00
logQueryLog logger $ QueryLog q Nothing requestId QueryLogKindAction
2021-04-28 20:38:05 +03:00
liftIO do
2021-10-29 17:42:07 +03:00
let allActionIds = map fst $ toList actions
2021-04-28 20:38:05 +03:00
case NE . nonEmpty allActionIds of
2022-11-29 13:04:51 +03:00
Nothing -> sendCompleted ( Just requestId ) ( Just parameterizedQueryHash )
2021-04-28 20:38:05 +03:00
Just actionIds -> do
let sendResponseIO actionLogMap = do
2023-05-24 16:51:56 +03:00
( dTime , resultsE ) <- withElapsedTime
$ runExceptT
$ for actions
$ \ ( actionId , resultBuilder ) -> do
actionLogResponse <-
HashMap . lookup actionId actionLogMap
` onNothing ` throw500 " unexpected: cannot lookup action_id in response map "
liftEither $ resultBuilder actionLogResponse
2021-09-24 01:56:37 +03:00
case resultsE of
2022-11-29 13:04:51 +03:00
Left err -> sendError requestId err
2021-09-24 01:56:37 +03:00
Right results -> do
let dataMsg =
2023-05-24 16:51:56 +03:00
sendDataMsg
$ DataMsg opId
$ pure
$ encJToLBS
$ encodeEncJSONResults results
2022-03-21 13:39:49 +03:00
sendMsgWithMetadata wsConn dataMsg opName ( Just parameterizedQueryHash ) $ ES . SubscriptionMetadata dTime
2021-09-24 01:56:37 +03:00
asyncActionQueryLive =
2023-05-24 16:51:56 +03:00
ES . LAAQNoRelationships
$ ES . LiveAsyncActionQueryWithNoRelationships sendResponseIO ( sendCompleted ( Just requestId ) ( Just parameterizedQueryHash ) )
2021-09-24 01:56:37 +03:00
2022-03-21 13:39:49 +03:00
ES . addAsyncActionLiveQuery
2022-04-07 17:41:43 +03:00
( ES . _ssAsyncActions subscriptionsState )
2021-09-24 01:56:37 +03:00
opId
actionIds
2022-11-29 13:04:51 +03:00
( sendError requestId )
2021-09-24 01:56:37 +03:00
asyncActionQueryLive
2022-04-07 17:41:43 +03:00
E . SEOnSourceDB ( E . SSLivequery actionIds liveQueryBuilder ) -> do
2021-03-31 13:39:01 +03:00
actionLogMapE <- fmap fst <$> runExceptT ( EA . fetchActionLogResponses actionIds )
2022-11-29 13:04:51 +03:00
actionLogMap <- onLeft actionLogMapE ( withComplete . preExecErr requestId ( Just gqlOpType ) )
2023-05-18 15:55:53 +03:00
granularPrometheusMetricsState <- runGetPrometheusMetricsGranularity
opMetadataE <- liftIO $ startLiveQuery liveQueryBuilder parameterizedQueryHash requestId actionLogMap granularPrometheusMetricsState
2022-11-29 13:04:51 +03:00
lqId <- onLeft opMetadataE ( withComplete . preExecErr requestId ( Just gqlOpType ) )
2021-03-31 13:39:01 +03:00
-- Update async action query subscription state
case NE . nonEmpty ( toList actionIds ) of
2021-09-24 01:56:37 +03:00
Nothing -> do
2023-03-15 16:05:17 +03:00
logQueryLog logger $ QueryLog q Nothing requestId ( QueryLogKindDatabase Nothing )
2021-03-31 13:39:01 +03:00
-- No async action query fields present, do nothing.
pure ()
2021-04-28 20:38:05 +03:00
Just nonEmptyActionIds -> do
2023-03-15 16:05:17 +03:00
logQueryLog logger $ QueryLog q Nothing requestId QueryLogKindAction
2021-04-28 20:38:05 +03:00
liftIO $ do
2021-09-24 01:56:37 +03:00
let asyncActionQueryLive =
2023-05-24 16:51:56 +03:00
ES . LAAQOnSourceDB
$ ES . LiveAsyncActionQueryOnSource lqId actionLogMap
$ restartLiveQuery parameterizedQueryHash requestId liveQueryBuilder granularPrometheusMetricsState ( _grOperationName reqParsed )
2021-04-28 20:38:05 +03:00
onUnexpectedException err = do
2022-11-29 13:04:51 +03:00
sendError requestId err
2023-05-18 15:55:53 +03:00
stopOperation serverEnv wsConn opId granularPrometheusMetricsState ( pure () ) -- Don't log in case opId don't exist
2022-03-21 13:39:49 +03:00
ES . addAsyncActionLiveQuery
2022-04-07 17:41:43 +03:00
( ES . _ssAsyncActions subscriptionsState )
2021-09-24 01:56:37 +03:00
opId
nonEmptyActionIds
onUnexpectedException
asyncActionQueryLive
2022-04-07 17:41:43 +03:00
E . SEOnSourceDB ( E . SSStreaming rootFieldName streamQueryBuilder ) -> do
2023-05-18 15:55:53 +03:00
granularPrometheusMetricsState <- runGetPrometheusMetricsGranularity
liftIO $ startStreamingQuery rootFieldName streamQueryBuilder parameterizedQueryHash requestId granularPrometheusMetricsState
2021-03-31 13:39:01 +03:00
2023-04-25 23:28:03 +03:00
liftIO $ Prometheus . Counter . inc ( gqlRequestsSubscriptionSuccess gqlMetrics )
2021-09-06 15:26:45 +03:00
liftIO $ logOpEv ODStarted ( Just requestId ) ( Just parameterizedQueryHash )
2018-07-20 10:22:46 +03:00
where
2021-08-24 19:25:12 +03:00
sendDataMsg = WS . _wsaGetDataMessageType onMessageActions
closeConnAction = WS . _wsaConnectionCloseAction onMessageActions
postExecErrAction = WS . _wsaPostExecErrMessageAction onMessageActions
2021-11-04 15:38:57 +03:00
fmtErrorMessage = WS . _wsaErrorMsgFormat onMessageActions
2021-08-24 19:25:12 +03:00
2021-09-29 19:20:06 +03:00
doQErr ::
2023-05-24 16:51:56 +03:00
( Monad n ) =>
2021-09-29 19:20:06 +03:00
ExceptT QErr n a ->
ExceptT ( Either GQExecError QErr ) n a
2020-10-07 13:23:17 +03:00
doQErr = withExceptT Right
2021-09-29 19:20:06 +03:00
withErr ::
forall e f n a .
2023-05-24 16:51:56 +03:00
( Monad n ) =>
2021-09-29 19:20:06 +03:00
( e -> f ) ->
( ExceptT e ( ExceptT f n ) a -> ExceptT e ( ExceptT f n ) a ) ->
ExceptT f n a ->
ExceptT f n a
withErr embed f action = do
res <- runExceptT $ f $ lift action
2022-04-22 22:53:12 +03:00
onLeft res ( throwError . embed )
2021-09-29 19:20:06 +03:00
2023-04-27 10:41:55 +03:00
forWithKey = flip InsOrdHashMap . traverseWithKey
2020-10-07 13:23:17 +03:00
2020-08-06 17:07:23 +03:00
telemTransport = Telem . WebSocket
2020-10-07 13:23:17 +03:00
2021-09-29 19:20:06 +03:00
handleResult ::
2021-09-24 01:56:37 +03:00
forall a .
RequestId ->
2022-07-24 00:18:01 +03:00
G . OperationType ->
2021-09-24 01:56:37 +03:00
Either ( Either GQExecError QErr ) a ->
( a -> ExceptT () m () ) ->
ExceptT () m ()
2022-11-29 13:04:51 +03:00
handleResult requestId gqlOpType r f = case r of
2022-07-24 00:18:01 +03:00
Left ( Left err ) -> postExecErr' gqlOpType err
2022-11-29 13:04:51 +03:00
Left ( Right err ) -> postExecErr requestId gqlOpType err
2021-09-24 01:56:37 +03:00
Right results -> f results
2021-04-01 23:40:31 +03:00
2022-11-29 13:04:51 +03:00
sendResultFromFragments telemQueryType timerTot requestId r opName pqh gqlOpType =
handleResult requestId gqlOpType r \ results -> do
2021-10-26 14:44:18 +03:00
let telemLocality = foldMap arpLocality results
telemTimeIO = convertDuration $ sum $ fmap arpTimeIO results
2022-07-24 00:18:01 +03:00
totalTime <- timerTot
let telemTimeTot = Seconds totalTime
2023-05-24 16:51:56 +03:00
sendSuccResp ( encodeAnnotatedResponseParts results ) opName pqh
$ ES . SubscriptionMetadata
$ sum
$ fmap arpTimeIO results
2021-04-01 23:40:31 +03:00
-- Telemetry. NOTE: don't time network IO:
2021-09-24 01:56:37 +03:00
Telem . recordTimingMetric Telem . RequestDimensions { .. } Telem . RequestTimings { .. }
2022-07-24 00:18:01 +03:00
liftIO $ recordGQLQuerySuccess totalTime gqlOpType
2021-09-24 01:56:37 +03:00
runRemoteGQ ::
Enable remote joins from remote schemas in the execution engine.
### Description
This PR adds the ability to perform remote joins from remote schemas in the engine. To do so, we alter the definition of an `ExecutionStep` targeting a remote schema: the `ExecStepRemote` constructor now expects a `Maybe RemoteJoins`. This new argument is used when processing the execution step, in the transport layer (either `Transport.HTTP` or `Transport.WebSocket`).
For this `Maybe RemoteJoins` to be extracted from a parsed query, this PR also extends the `Execute.RemoteJoin.Collect` module, to implement "collection" from a selection set. Not only do those new functions extract the remote joins, but they also apply all necessary transformations to the selection sets (such as inserting the necessary "phantom" fields used as join keys).
Finally in `Execute.RemoteJoin.Join`, we make two changes. First, we now always look for nested remote joins, regardless of whether the join we just performed went to a source or a remote schema; and second we adapt our join tree logic according to the special cases that were added to deal with remote server edge cases.
Additionally, this PR refactors / cleans / documents `Execute.RemoteJoin.RemoteServer`. This is not required as part of this change and could be moved to a separate PR if needed (a similar cleanup of `Join` is done independently in #3894). It also introduces a draft of a new documentation page for this project, that will be refined in the release PR that ships the feature (either #3069 or a copy of it).
While this PR extends the engine, it doesn't plug such relationships in the schema, meaning that, as of this PR, the new code paths in `Join` are technically unreachable. Adding the corresponding schema code and, ultimately, enabling the metadata API will be done in subsequent PRs.
### Keeping track of concrete type names
The main change this PR makes to the existing `Join` code is to handle a new reserved field we sometimes use when targeting remote servers: the `__hasura_internal_typename` field. In short, a GraphQL selection set can sometimes "branch" based on the concrete "runtime type" of the object on which the selection happens:
```graphql
query {
author(id: 53478) {
... on Writer {
name
articles {
title
}
}
... on Artist {
name
articles {
title
}
}
}
}
```
If both of those `articles` are remote joins, we need to be able, when we get the answer, to differentiate between the two different cases. We do this by asking for `__typename`, to be able to decide if we're in the `Writer` or the `Artist` branch of the query.
To avoid further processing / customization of results, we only insert this `__hasura_internal_typename: __typename` field in the query in the case of unions of interfaces AND if we have the guarantee that we will processing the request as part of the remote joins "folding": that is, if there's any remote join in this branch in the tree. Otherwise, we don't insert the field, and we leave that part of the response untouched.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/3810
GitOrigin-RevId: 89aaf16274d68e26ad3730b80c2d2fdc2896b96c
2022-03-09 06:17:28 +03:00
RequestId ->
GQLReqUnparsed ->
2021-10-29 17:42:07 +03:00
RootFieldAlias ->
2021-09-24 01:56:37 +03:00
UserInfo ->
2022-02-16 10:08:51 +03:00
[ HTTP . Header ] ->
2021-09-24 01:56:37 +03:00
RemoteSchemaInfo ->
2021-10-29 17:42:07 +03:00
ResultCustomizer ->
2021-09-24 01:56:37 +03:00
GQLReqOutgoing ->
Enable remote joins from remote schemas in the execution engine.
### Description
This PR adds the ability to perform remote joins from remote schemas in the engine. To do so, we alter the definition of an `ExecutionStep` targeting a remote schema: the `ExecStepRemote` constructor now expects a `Maybe RemoteJoins`. This new argument is used when processing the execution step, in the transport layer (either `Transport.HTTP` or `Transport.WebSocket`).
For this `Maybe RemoteJoins` to be extracted from a parsed query, this PR also extends the `Execute.RemoteJoin.Collect` module, to implement "collection" from a selection set. Not only do those new functions extract the remote joins, but they also apply all necessary transformations to the selection sets (such as inserting the necessary "phantom" fields used as join keys).
Finally in `Execute.RemoteJoin.Join`, we make two changes. First, we now always look for nested remote joins, regardless of whether the join we just performed went to a source or a remote schema; and second we adapt our join tree logic according to the special cases that were added to deal with remote server edge cases.
Additionally, this PR refactors / cleans / documents `Execute.RemoteJoin.RemoteServer`. This is not required as part of this change and could be moved to a separate PR if needed (a similar cleanup of `Join` is done independently in #3894). It also introduces a draft of a new documentation page for this project, that will be refined in the release PR that ships the feature (either #3069 or a copy of it).
While this PR extends the engine, it doesn't plug such relationships in the schema, meaning that, as of this PR, the new code paths in `Join` are technically unreachable. Adding the corresponding schema code and, ultimately, enabling the metadata API will be done in subsequent PRs.
### Keeping track of concrete type names
The main change this PR makes to the existing `Join` code is to handle a new reserved field we sometimes use when targeting remote servers: the `__hasura_internal_typename` field. In short, a GraphQL selection set can sometimes "branch" based on the concrete "runtime type" of the object on which the selection happens:
```graphql
query {
author(id: 53478) {
... on Writer {
name
articles {
title
}
}
... on Artist {
name
articles {
title
}
}
}
}
```
If both of those `articles` are remote joins, we need to be able, when we get the answer, to differentiate between the two different cases. We do this by asking for `__typename`, to be able to decide if we're in the `Writer` or the `Artist` branch of the query.
To avoid further processing / customization of results, we only insert this `__hasura_internal_typename: __typename` field in the query in the case of unions of interfaces AND if we have the guarantee that we will processing the request as part of the remote joins "folding": that is, if there's any remote join in this branch in the tree. Otherwise, we don't insert the field, and we leave that part of the response untouched.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/3810
GitOrigin-RevId: 89aaf16274d68e26ad3730b80c2d2fdc2896b96c
2022-03-09 06:17:28 +03:00
Maybe RJ . RemoteJoins ->
2021-10-26 14:44:18 +03:00
ExceptT ( Either GQExecError QErr ) ( ExceptT () m ) AnnotatedResponsePart
2023-05-31 08:47:40 +03:00
runRemoteGQ requestId reqUnparsed fieldName userInfo reqHdrs rsi resultCustomizer gqlReq remoteJoins = Tracing . newSpan ( " Remote schema query for root field " <>> fieldName ) $ do
2023-03-30 19:31:50 +03:00
env <- liftIO $ acEnvironment <$> getAppContext appStateRef
2021-09-24 01:56:37 +03:00
( telemTimeIO_DT , _respHdrs , resp ) <-
2023-05-24 16:51:56 +03:00
doQErr
$ E . execRemoteGQ env userInfo reqHdrs ( rsDef rsi ) gqlReq
2023-03-14 20:46:13 +03:00
value <- hoist lift $ extractFieldFromResponse fieldName resultCustomizer resp
Enable remote joins from remote schemas in the execution engine.
### Description
This PR adds the ability to perform remote joins from remote schemas in the engine. To do so, we alter the definition of an `ExecutionStep` targeting a remote schema: the `ExecStepRemote` constructor now expects a `Maybe RemoteJoins`. This new argument is used when processing the execution step, in the transport layer (either `Transport.HTTP` or `Transport.WebSocket`).
For this `Maybe RemoteJoins` to be extracted from a parsed query, this PR also extends the `Execute.RemoteJoin.Collect` module, to implement "collection" from a selection set. Not only do those new functions extract the remote joins, but they also apply all necessary transformations to the selection sets (such as inserting the necessary "phantom" fields used as join keys).
Finally in `Execute.RemoteJoin.Join`, we make two changes. First, we now always look for nested remote joins, regardless of whether the join we just performed went to a source or a remote schema; and second we adapt our join tree logic according to the special cases that were added to deal with remote server edge cases.
Additionally, this PR refactors / cleans / documents `Execute.RemoteJoin.RemoteServer`. This is not required as part of this change and could be moved to a separate PR if needed (a similar cleanup of `Join` is done independently in #3894). It also introduces a draft of a new documentation page for this project, that will be refined in the release PR that ships the feature (either #3069 or a copy of it).
While this PR extends the engine, it doesn't plug such relationships in the schema, meaning that, as of this PR, the new code paths in `Join` are technically unreachable. Adding the corresponding schema code and, ultimately, enabling the metadata API will be done in subsequent PRs.
### Keeping track of concrete type names
The main change this PR makes to the existing `Join` code is to handle a new reserved field we sometimes use when targeting remote servers: the `__hasura_internal_typename` field. In short, a GraphQL selection set can sometimes "branch" based on the concrete "runtime type" of the object on which the selection happens:
```graphql
query {
author(id: 53478) {
... on Writer {
name
articles {
title
}
}
... on Artist {
name
articles {
title
}
}
}
}
```
If both of those `articles` are remote joins, we need to be able, when we get the answer, to differentiate between the two different cases. We do this by asking for `__typename`, to be able to decide if we're in the `Writer` or the `Artist` branch of the query.
To avoid further processing / customization of results, we only insert this `__hasura_internal_typename: __typename` field in the query in the case of unions of interfaces AND if we have the guarantee that we will processing the request as part of the remote joins "folding": that is, if there's any remote join in this branch in the tree. Otherwise, we don't insert the field, and we leave that part of the response untouched.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/3810
GitOrigin-RevId: 89aaf16274d68e26ad3730b80c2d2fdc2896b96c
2022-03-09 06:17:28 +03:00
finalResponse <-
2023-05-24 16:51:56 +03:00
doQErr
$ RJ . processRemoteJoins
Enable remote joins from remote schemas in the execution engine.
### Description
This PR adds the ability to perform remote joins from remote schemas in the engine. To do so, we alter the definition of an `ExecutionStep` targeting a remote schema: the `ExecStepRemote` constructor now expects a `Maybe RemoteJoins`. This new argument is used when processing the execution step, in the transport layer (either `Transport.HTTP` or `Transport.WebSocket`).
For this `Maybe RemoteJoins` to be extracted from a parsed query, this PR also extends the `Execute.RemoteJoin.Collect` module, to implement "collection" from a selection set. Not only do those new functions extract the remote joins, but they also apply all necessary transformations to the selection sets (such as inserting the necessary "phantom" fields used as join keys).
Finally in `Execute.RemoteJoin.Join`, we make two changes. First, we now always look for nested remote joins, regardless of whether the join we just performed went to a source or a remote schema; and second we adapt our join tree logic according to the special cases that were added to deal with remote server edge cases.
Additionally, this PR refactors / cleans / documents `Execute.RemoteJoin.RemoteServer`. This is not required as part of this change and could be moved to a separate PR if needed (a similar cleanup of `Join` is done independently in #3894). It also introduces a draft of a new documentation page for this project, that will be refined in the release PR that ships the feature (either #3069 or a copy of it).
While this PR extends the engine, it doesn't plug such relationships in the schema, meaning that, as of this PR, the new code paths in `Join` are technically unreachable. Adding the corresponding schema code and, ultimately, enabling the metadata API will be done in subsequent PRs.
### Keeping track of concrete type names
The main change this PR makes to the existing `Join` code is to handle a new reserved field we sometimes use when targeting remote servers: the `__hasura_internal_typename` field. In short, a GraphQL selection set can sometimes "branch" based on the concrete "runtime type" of the object on which the selection happens:
```graphql
query {
author(id: 53478) {
... on Writer {
name
articles {
title
}
}
... on Artist {
name
articles {
title
}
}
}
}
```
If both of those `articles` are remote joins, we need to be able, when we get the answer, to differentiate between the two different cases. We do this by asking for `__typename`, to be able to decide if we're in the `Writer` or the `Artist` branch of the query.
To avoid further processing / customization of results, we only insert this `__hasura_internal_typename: __typename` field in the query in the case of unions of interfaces AND if we have the guarantee that we will processing the request as part of the remote joins "folding": that is, if there's any remote join in this branch in the tree. Otherwise, we don't insert the field, and we leave that part of the response untouched.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/3810
GitOrigin-RevId: 89aaf16274d68e26ad3730b80c2d2fdc2896b96c
2022-03-09 06:17:28 +03:00
requestId
logger
2023-04-05 11:57:19 +03:00
agentLicenseKey
Enable remote joins from remote schemas in the execution engine.
### Description
This PR adds the ability to perform remote joins from remote schemas in the engine. To do so, we alter the definition of an `ExecutionStep` targeting a remote schema: the `ExecStepRemote` constructor now expects a `Maybe RemoteJoins`. This new argument is used when processing the execution step, in the transport layer (either `Transport.HTTP` or `Transport.WebSocket`).
For this `Maybe RemoteJoins` to be extracted from a parsed query, this PR also extends the `Execute.RemoteJoin.Collect` module, to implement "collection" from a selection set. Not only do those new functions extract the remote joins, but they also apply all necessary transformations to the selection sets (such as inserting the necessary "phantom" fields used as join keys).
Finally in `Execute.RemoteJoin.Join`, we make two changes. First, we now always look for nested remote joins, regardless of whether the join we just performed went to a source or a remote schema; and second we adapt our join tree logic according to the special cases that were added to deal with remote server edge cases.
Additionally, this PR refactors / cleans / documents `Execute.RemoteJoin.RemoteServer`. This is not required as part of this change and could be moved to a separate PR if needed (a similar cleanup of `Join` is done independently in #3894). It also introduces a draft of a new documentation page for this project, that will be refined in the release PR that ships the feature (either #3069 or a copy of it).
While this PR extends the engine, it doesn't plug such relationships in the schema, meaning that, as of this PR, the new code paths in `Join` are technically unreachable. Adding the corresponding schema code and, ultimately, enabling the metadata API will be done in subsequent PRs.
### Keeping track of concrete type names
The main change this PR makes to the existing `Join` code is to handle a new reserved field we sometimes use when targeting remote servers: the `__hasura_internal_typename` field. In short, a GraphQL selection set can sometimes "branch" based on the concrete "runtime type" of the object on which the selection happens:
```graphql
query {
author(id: 53478) {
... on Writer {
name
articles {
title
}
}
... on Artist {
name
articles {
title
}
}
}
}
```
If both of those `articles` are remote joins, we need to be able, when we get the answer, to differentiate between the two different cases. We do this by asking for `__typename`, to be able to decide if we're in the `Writer` or the `Artist` branch of the query.
To avoid further processing / customization of results, we only insert this `__hasura_internal_typename: __typename` field in the query in the case of unions of interfaces AND if we have the guarantee that we will processing the request as part of the remote joins "folding": that is, if there's any remote join in this branch in the tree. Otherwise, we don't insert the field, and we leave that part of the response untouched.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/3810
GitOrigin-RevId: 89aaf16274d68e26ad3730b80c2d2fdc2896b96c
2022-03-09 06:17:28 +03:00
env
reqHdrs
userInfo
-- TODO: avoid encode and decode here
( encJFromOrderedValue value )
remoteJoins
reqUnparsed
return $ AnnotatedResponsePart telemTimeIO_DT Telem . Remote finalResponse []
2019-05-29 14:51:09 +03:00
2021-09-24 01:56:37 +03:00
WSServerEnv
logger
2022-04-07 17:41:43 +03:00
subscriptionsState
2023-03-17 13:29:07 +03:00
appStateRef
harmonize network manager handling
## Description
### I want to speak to the `Manager`
Oh boy. This PR is both fairly straightforward and overreaching, so let's break it down.
For most network access, we need a [`HTTP.Manager`](https://hackage.haskell.org/package/http-client-0.1.0.0/docs/Network-HTTP-Client-Manager.html). It is created only once, at the top level, when starting the engine, and is then threaded through the application to wherever we need to make a network call. As of main, the way we do this is not standardized: most of the GraphQL execution code passes it "manually" as a function argument throughout the code. We also have a custom monad constraint, `HasHttpManagerM`, that describes a monad's ability to provide a manager. And, finally, several parts of the code store the manager in some kind of argument structure, such as `RunT`'s `RunCtx`.
This PR's first goal is to harmonize all of this: we always create the manager at the root, and we already have it when we do our very first `runReaderT`. Wouldn't it make sense for the rest of the code to not manually pass it anywhere, to not store it anywhere, but to always rely on the current monad providing it? This is, in short, what this PR does: it implements a constraint on the base monads, so that they provide the manager, and removes most explicit passing from the code.
### First come, first served
One way this PR goes a tiny bit further than "just" doing the aforementioned harmonization is that it starts the process of implementing the "Services oriented architecture" roughly outlined in this [draft document](https://docs.google.com/document/d/1FAigqrST0juU1WcT4HIxJxe1iEBwTuBZodTaeUvsKqQ/edit?usp=sharing). Instead of using the existing `HasHTTPManagerM`, this PR revamps it into the `ProvidesNetwork` service.
The idea is, again, that we should make all "external" dependencies of the engine, all things that the core of the engine doesn't care about, a "service". This allows us to define clear APIs for features, to choose different implementations based on which version of the engine we're running, harmonizes our many scattered monadic constraints... Which is why this service is called "Network": we can refine it, moving forward, to be the constraint that defines how all network communication is to operate, instead of relying on disparate classes constraint or hardcoded decisions. A comment in the code clarifies this intent.
### Side-effects? In my Haskell?
This PR also unavoidably touches some other aspects of the codebase. One such example: it introduces `Hasura.App.AppContext`, named after `HasuraPro.Context.AppContext`: a name for the reader structure at the base level. It also transforms `Handler` from a type alias to a newtype, as `Handler` is where we actually enforce HTTP limits; but without `Handler` being a distinct type, any code path could simply do a `runExceptT $ runReader` and forget to enforce them.
(As a rule of thumb, i am starting to consider any straggling `runReaderT` or `runExceptT` as a code smell: we should not stack / unstack monads haphazardly, and every layer should be an opaque `newtype` with a corresponding run function.)
## Further work
In several places, i have left TODOs when i have encountered things that suggest that we should do further unrelated cleanups. I'll write down the follow-up steps, either in the aforementioned document or on slack. But, in short, at a glance, in approximate order, we could:
- delete `ExecutionCtx` as it is only a subset of `ServerCtx`, and remove one more `runReaderT` call
- delete `ServerConfigCtx` as it is only a subset of `ServerCtx`, and remove it from `RunCtx`
- remove `ServerCtx` from `HandlerCtx`, and make it part of `AppContext`, or even make it the `AppContext` altogether (since, at least for the OSS version, `AppContext` is there again only a subset)
- remove `CacheBuildParams` and `CacheBuild` altogether, as they're just a distinct stack that is a `ReaderT` on top of `IO` that contains, you guessed it, the same thing as `ServerCtx`
- move `RunT` out of `RQL.Types` and rename it, since after the previous cleanups **it only contains `UserInfo`**; it could be bundled with the authentication service, made a small implementation detail in `Hasura.Server.Auth`
- rename `PGMetadaStorageT` to something a bit more accurate, such as `App`, and enforce its IO base
This would significantly simply our complex stack. From there, or in parallel, we can start moving existing dependencies as Services. For the purpose of supporting read replicas entitlement, we could move `MonadResolveSource` to a `SourceResolver` service, as attempted in #7653, and transform `UserAuthenticationM` into a `Authentication` service.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7736
GitOrigin-RevId: 68cce710eb9e7d752bda1ba0c49541d24df8209f
2023-02-22 18:53:52 +03:00
_
2021-09-24 01:56:37 +03:00
_
2021-12-08 09:26:46 +03:00
readOnlyMode
2021-09-24 01:56:37 +03:00
_
_keepAliveDelay
2022-07-24 00:18:01 +03:00
_serverMetrics
2022-12-22 22:47:17 +03:00
prometheusMetrics
_ = serverEnv
2022-07-24 00:18:01 +03:00
2023-03-30 19:31:50 +03:00
-- Hook to retrieve the latest subscription options(live query + stream query options) from the `appStateRef`
getSubscriptionOptions = fmap ( \ appCtx -> ( acLiveQueryOptions appCtx , acStreamQueryOptions appCtx ) ) ( getAppContext appStateRef )
2022-07-24 00:18:01 +03:00
gqlMetrics = pmGraphQLRequestMetrics prometheusMetrics
2019-03-05 14:09:02 +03:00
2020-06-08 15:13:01 +03:00
WSConnData userInfoR opMap errRespTy queryType = WS . getData wsConn
2023-05-24 16:51:56 +03:00
logOpEv :: ( MonadIO n ) => OpDetail -> Maybe RequestId -> Maybe ParameterizedQueryHash -> n ()
2022-11-29 13:04:51 +03:00
logOpEv opTy reqId parameterizedQueryHash =
-- See Note [Disable query printing when query-log is disabled]
let censoredReq =
case shouldCaptureVariables of
CaptureQueryVariables -> q
DoNotCaptureQueryVariables -> q { _grVariables = Nothing }
queryToLog = censoredReq <$ guard ( Set . member L . ELTQueryLog enabledLogTypes )
2023-05-24 16:51:56 +03:00
in logWSEvent logger wsConn
$ EOperation
$ OperationDetails opId reqId ( _grOperationName q ) opTy queryToLog parameterizedQueryHash
2022-11-29 13:04:51 +03:00
2021-09-24 01:56:37 +03:00
getErrFn ERTLegacy = encodeQErr
2020-10-07 13:23:17 +03:00
getErrFn ERTGraphqlCompliant = encodeGQLErr
2020-06-16 18:23:06 +03:00
2022-11-29 13:04:51 +03:00
sendStartErr e = do
2019-05-10 09:05:11 +03:00
let errFn = getErrFn errRespTy
2023-05-24 16:51:56 +03:00
sendMsg wsConn
$ SMErr
$ ErrorMsg opId
$ errFn False
$ err400 StartFailed e
2022-11-29 13:04:51 +03:00
liftIO $ logOpEv ( ODProtoErr e ) Nothing Nothing
2022-07-24 00:18:01 +03:00
liftIO $ reportGQLQueryError Nothing
2021-08-24 19:25:12 +03:00
liftIO $ closeConnAction wsConn opId ( T . unpack e )
2020-06-16 18:23:06 +03:00
2022-11-29 13:04:51 +03:00
sendCompleted reqId paramQueryHash = do
2021-08-24 19:25:12 +03:00
sendMsg wsConn ( SMComplete . CompletionMsg $ opId )
2022-11-29 13:04:51 +03:00
logOpEv ODCompleted reqId paramQueryHash
2020-06-16 18:23:06 +03:00
2022-11-10 02:30:42 +03:00
postExecErr ::
RequestId ->
G . OperationType ->
QErr ->
ExceptT () m ()
2022-11-29 13:04:51 +03:00
postExecErr reqId gqlOpType qErr = do
2020-10-07 13:23:17 +03:00
let errFn = getErrFn errRespTy False
2022-11-29 13:04:51 +03:00
liftIO $ logOpEv ( ODQueryErr qErr ) ( Just reqId ) Nothing
2022-07-24 00:18:01 +03:00
postExecErr' gqlOpType $ GQExecError $ pure $ errFn qErr
2020-10-07 13:23:17 +03:00
2022-07-24 00:18:01 +03:00
postExecErr' :: G . OperationType -> GQExecError -> ExceptT () m ()
postExecErr' gqlOpType qErr =
liftIO $ do
reportGQLQueryError ( Just gqlOpType )
postExecErrAction wsConn opId qErr
2018-07-20 10:22:46 +03:00
2018-10-16 14:49:24 +03:00
-- why wouldn't pre exec error use graphql response?
2022-11-29 13:04:51 +03:00
preExecErr reqId mGqlOpType qErr = do
2022-07-24 00:18:01 +03:00
liftIO $ reportGQLQueryError mGqlOpType
2022-11-29 13:04:51 +03:00
liftIO $ sendError reqId qErr
2021-03-31 13:39:01 +03:00
2022-11-29 13:04:51 +03:00
sendError reqId qErr = do
2019-05-10 09:05:11 +03:00
let errFn = getErrFn errRespTy
2022-11-29 13:04:51 +03:00
logOpEv ( ODQueryErr qErr ) ( Just reqId ) Nothing
2019-05-10 09:05:11 +03:00
let err = case errRespTy of
2021-09-24 01:56:37 +03:00
ERTLegacy -> errFn False qErr
2021-11-04 15:38:57 +03:00
ERTGraphqlCompliant -> fmtErrorMessage [ errFn False qErr ]
2020-01-07 23:25:32 +03:00
sendMsg wsConn ( SMErr $ ErrorMsg opId err )
2018-10-16 14:49:24 +03:00
2021-09-24 01:56:37 +03:00
sendSuccResp ::
EncJSON ->
Maybe OperationName ->
ParameterizedQueryHash ->
2022-03-21 13:39:49 +03:00
ES . SubscriptionMetadata ->
2021-09-24 01:56:37 +03:00
ExceptT () m ()
2021-09-06 15:26:45 +03:00
sendSuccResp encJson opName queryHash =
2021-09-24 01:56:37 +03:00
sendMsgWithMetadata
wsConn
2021-09-06 15:26:45 +03:00
( sendDataMsg $ DataMsg opId $ pure $ encJToLBS encJson )
2021-09-24 01:56:37 +03:00
opName
( Just queryHash )
2018-10-16 14:49:24 +03:00
2022-11-10 02:30:42 +03:00
withComplete ::
ExceptT () m () ->
ExceptT () m a
2022-11-29 13:04:51 +03:00
withComplete action = do
2018-10-16 14:49:24 +03:00
action
2022-11-29 13:04:51 +03:00
liftIO $ sendCompleted Nothing Nothing
2018-10-16 14:49:24 +03:00
throwError ()
2023-05-18 15:55:53 +03:00
restartLiveQuery parameterizedQueryHash requestId liveQueryBuilder granularPrometheusMetricsState maybeOperationName lqId actionLogMap = do
ES . removeLiveQuery logger ( _wseServerMetrics serverEnv ) ( _wsePrometheusMetrics serverEnv ) subscriptionsState lqId granularPrometheusMetricsState maybeOperationName
either ( const Nothing ) Just <$> startLiveQuery liveQueryBuilder parameterizedQueryHash requestId actionLogMap granularPrometheusMetricsState
2021-03-31 13:39:01 +03:00
2023-05-18 15:55:53 +03:00
startLiveQuery liveQueryBuilder parameterizedQueryHash requestId actionLogMap granularPrometheusMetricsState = do
2021-03-31 13:39:01 +03:00
liveQueryE <- runExceptT $ liveQueryBuilder actionLogMap
2023-05-18 15:55:53 +03:00
2022-03-21 13:39:49 +03:00
for liveQueryE $ \ ( sourceName , E . SubscriptionQueryPlan exists ) -> do
2021-06-16 16:27:26 +03:00
let ! opName = _grOperationName q
2022-03-21 13:39:49 +03:00
subscriberMetadata = ES . mkSubscriberMetadata ( WS . getWSId wsConn ) opId opName requestId
2021-03-31 13:39:01 +03:00
-- NOTE!: we mask async exceptions higher in the call stack, but it's
-- crucial we don't lose lqId after addLiveQuery returns successfully.
2021-09-24 01:56:37 +03:00
! lqId <- liftIO $ AB . dispatchAnyBackend @ BackendTransport
exists
2022-03-21 13:39:49 +03:00
\ ( E . MultiplexedSubscriptionQueryPlan liveQueryPlan ) ->
ES . addLiveQuery
2021-09-24 01:56:37 +03:00
logger
( _wseServerMetrics serverEnv )
2022-07-24 00:18:01 +03:00
( _wsePrometheusMetrics serverEnv )
2021-09-24 01:56:37 +03:00
subscriberMetadata
2022-04-07 17:41:43 +03:00
subscriptionsState
2023-03-30 19:31:50 +03:00
getSubscriptionOptions
2021-09-24 01:56:37 +03:00
sourceName
parameterizedQueryHash
opName
requestId
liveQueryPlan
2023-05-18 15:55:53 +03:00
granularPrometheusMetricsState
2022-04-07 17:41:43 +03:00
( onChange opName parameterizedQueryHash $ ES . _sqpNamespace liveQueryPlan )
2023-05-18 15:55:53 +03:00
2021-09-24 01:56:37 +03:00
liftIO $ $ assertNFHere ( lqId , opName ) -- so we don't write thunks to mutable vars
2023-05-24 16:51:56 +03:00
STM . atomically
$
2021-03-31 13:39:01 +03:00
-- NOTE: see crucial `lookup` check above, ensuring this doesn't clobber:
2022-04-07 17:41:43 +03:00
STMMap . insert ( LiveQuerySubscriber lqId , opName ) opId opMap
2021-03-31 13:39:01 +03:00
pure lqId
2023-05-18 15:55:53 +03:00
startStreamingQuery rootFieldName ( sourceName , E . SubscriptionQueryPlan exists ) parameterizedQueryHash requestId granularPrometheusMetricsState = do
2022-04-07 17:41:43 +03:00
let ! opName = _grOperationName q
subscriberMetadata = ES . mkSubscriberMetadata ( WS . getWSId wsConn ) opId opName requestId
-- NOTE!: we mask async exceptions higher in the call stack, but it's
-- crucial we don't lose lqId after addLiveQuery returns successfully.
streamSubscriberId <- liftIO $ AB . dispatchAnyBackend @ BackendTransport
exists
\ ( E . MultiplexedSubscriptionQueryPlan streamQueryPlan ) ->
ES . addStreamSubscriptionQuery
logger
( _wseServerMetrics serverEnv )
2022-07-24 00:18:01 +03:00
( _wsePrometheusMetrics serverEnv )
2022-04-07 17:41:43 +03:00
subscriberMetadata
subscriptionsState
2023-03-30 19:31:50 +03:00
getSubscriptionOptions
2022-04-07 17:41:43 +03:00
sourceName
parameterizedQueryHash
opName
requestId
( _rfaAlias rootFieldName )
streamQueryPlan
2023-05-18 15:55:53 +03:00
granularPrometheusMetricsState
2022-04-07 17:41:43 +03:00
( onChange opName parameterizedQueryHash $ ES . _sqpNamespace streamQueryPlan )
liftIO $ $ assertNFHere ( streamSubscriberId , opName ) -- so we don't write thunks to mutable vars
2023-05-24 16:51:56 +03:00
STM . atomically
$
2022-04-07 17:41:43 +03:00
-- NOTE: see crucial `lookup` check above, ensuring this doesn't clobber:
STMMap . insert ( StreamingQuerySubscriber streamSubscriberId , opName ) opId opMap
pure ()
2018-10-16 14:49:24 +03:00
-- on change, send message on the websocket
2022-04-07 17:41:43 +03:00
onChange :: Maybe OperationName -> ParameterizedQueryHash -> Maybe Name -> ES . OnChange
onChange opName queryHash namespace = \ case
2022-03-21 13:39:49 +03:00
Right ( ES . SubscriptionResponse bs dTime ) ->
2021-09-24 01:56:37 +03:00
sendMsgWithMetadata
wsConn
2021-10-29 17:42:07 +03:00
( sendDataMsg $ DataMsg opId $ pure $ maybe LBS . fromStrict wrapNamespace namespace bs )
2021-09-24 01:56:37 +03:00
opName
( Just queryHash )
2022-03-21 13:39:49 +03:00
( ES . SubscriptionMetadata dTime )
2021-09-24 01:56:37 +03:00
resp ->
2023-05-24 16:51:56 +03:00
sendMsg wsConn
$ sendDataMsg
$ DataMsg opId
$ LBS . fromStrict
. ES . _lqrPayload
<$> resp
2018-07-20 10:22:46 +03:00
2021-10-29 17:42:07 +03:00
-- If the source has a namespace then we need to wrap the response
-- from the DB in that namespace.
wrapNamespace :: Name -> ByteString -> LBS . ByteString
wrapNamespace namespace bs =
encJToLBS $ encJFromAssocList [ ( unName namespace , encJFromBS bs ) ]
2020-06-16 18:23:06 +03:00
catchAndIgnore :: ExceptT () m () -> m ()
2018-10-16 14:49:24 +03:00
catchAndIgnore m = void $ runExceptT m
2018-07-20 10:22:46 +03:00
2022-07-24 00:18:01 +03:00
reportGQLQueryError :: Maybe G . OperationType -> IO ()
reportGQLQueryError = \ case
Nothing ->
liftIO $ Prometheus . Counter . inc ( gqlRequestsUnknownFailure gqlMetrics )
Just opType -> case opType of
G . OperationTypeQuery ->
liftIO $ Prometheus . Counter . inc ( gqlRequestsQueryFailure gqlMetrics )
G . OperationTypeMutation ->
liftIO $ Prometheus . Counter . inc ( gqlRequestsMutationFailure gqlMetrics )
G . OperationTypeSubscription ->
2023-04-25 23:28:03 +03:00
liftIO $ Prometheus . Counter . inc ( gqlRequestsSubscriptionFailure gqlMetrics )
2022-07-24 00:18:01 +03:00
-- Tally and record execution times for successful GraphQL requests.
recordGQLQuerySuccess :: DiffTime -> G . OperationType -> IO ()
recordGQLQuerySuccess totalTime = \ case
G . OperationTypeQuery -> liftIO $ do
Prometheus . Counter . inc ( gqlRequestsQuerySuccess gqlMetrics )
Prometheus . Histogram . observe ( gqlExecutionTimeSecondsQuery gqlMetrics ) ( realToFrac totalTime )
G . OperationTypeMutation -> liftIO $ do
Prometheus . Counter . inc ( gqlRequestsMutationSuccess gqlMetrics )
Prometheus . Histogram . observe ( gqlExecutionTimeSecondsMutation gqlMetrics ) ( realToFrac totalTime )
G . OperationTypeSubscription ->
-- We do not collect metrics for subscriptions at the request level.
pure ()
2021-08-24 19:25:12 +03:00
onMessage ::
2021-10-13 19:38:56 +03:00
( MonadIO m ,
Rewrite `Tracing` to allow for only one `TraceT` in the entire stack.
This PR is on top of #7789.
### Description
This PR entirely rewrites the API of the Tracing library, to make `interpTraceT` a thing of the past. Before this change, we ran traces by sticking a `TraceT` on top of whatever we were doing. This had several major drawbacks:
- we were carrying a bunch of `TraceT` across the codebase, and the entire codebase had to know about it
- we needed to carry a second class constraint around (`HasReporterM`) to be able to run all of those traces
- we kept having to do stack rewriting with `interpTraceT`, which went from inconvenient to horrible
- we had to declare several behavioral instances on `TraceT m`
This PR rewrite all of `Tracing` using a more conventional model: there is ONE `TraceT` at the bottom of the stack, and there is an associated class constraint `MonadTrace`: any part of the code that happens to satisfy `MonadTrace` is able to create new traces. We NEVER have to do stack rewriting, `interpTraceT` is gone, and `TraceT` and `Reporter` become implementation details that 99% of the code is blissfully unaware of: code that needs to do tracing only needs to declare that the monad in which it operates implements `MonadTrace`.
In doing so, this PR revealed **several bugs in the codebase**: places where we were expecting to trace something, but due to the default instance of `HasReporterM IO` we would actually not do anything. This PR also splits the code of `Tracing` in more byte-sized modules, with the goal of potentially moving to `server/lib` down the line.
### Remaining work
This PR is a draft; what's left to do is:
- [x] make Pro compile; i haven't updated `HasuraPro/Main` yet
- [x] document Tracing by writing a note that explains how to use the library, and the meaning of "reporter", "trace" and "span", as well as the pitfalls
- [x] discuss some of the trade-offs in the implementation, which is why i'm opening this PR already despite it not fully building yet
- [x] it depends on #7789 being merged first
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7791
GitOrigin-RevId: cadd32d039134c93ddbf364599a2f4dd988adea8
2023-03-13 20:37:16 +03:00
UserAuthentication m ,
2021-09-24 01:56:37 +03:00
E . MonadGQLExecutionCheck m ,
MonadQueryLog m ,
2023-03-15 16:05:17 +03:00
MonadExecutionLog m ,
2021-09-24 01:56:37 +03:00
MonadExecuteQuery m ,
MC . MonadBaseControl IO m ,
2023-02-03 04:03:23 +03:00
MonadMetadataStorage m ,
2023-03-31 00:18:11 +03:00
MonadQueryTags m ,
harmonize network manager handling
## Description
### I want to speak to the `Manager`
Oh boy. This PR is both fairly straightforward and overreaching, so let's break it down.
For most network access, we need a [`HTTP.Manager`](https://hackage.haskell.org/package/http-client-0.1.0.0/docs/Network-HTTP-Client-Manager.html). It is created only once, at the top level, when starting the engine, and is then threaded through the application to wherever we need to make a network call. As of main, the way we do this is not standardized: most of the GraphQL execution code passes it "manually" as a function argument throughout the code. We also have a custom monad constraint, `HasHttpManagerM`, that describes a monad's ability to provide a manager. And, finally, several parts of the code store the manager in some kind of argument structure, such as `RunT`'s `RunCtx`.
This PR's first goal is to harmonize all of this: we always create the manager at the root, and we already have it when we do our very first `runReaderT`. Wouldn't it make sense for the rest of the code to not manually pass it anywhere, to not store it anywhere, but to always rely on the current monad providing it? This is, in short, what this PR does: it implements a constraint on the base monads, so that they provide the manager, and removes most explicit passing from the code.
### First come, first served
One way this PR goes a tiny bit further than "just" doing the aforementioned harmonization is that it starts the process of implementing the "Services oriented architecture" roughly outlined in this [draft document](https://docs.google.com/document/d/1FAigqrST0juU1WcT4HIxJxe1iEBwTuBZodTaeUvsKqQ/edit?usp=sharing). Instead of using the existing `HasHTTPManagerM`, this PR revamps it into the `ProvidesNetwork` service.
The idea is, again, that we should make all "external" dependencies of the engine, all things that the core of the engine doesn't care about, a "service". This allows us to define clear APIs for features, to choose different implementations based on which version of the engine we're running, harmonizes our many scattered monadic constraints... Which is why this service is called "Network": we can refine it, moving forward, to be the constraint that defines how all network communication is to operate, instead of relying on disparate classes constraint or hardcoded decisions. A comment in the code clarifies this intent.
### Side-effects? In my Haskell?
This PR also unavoidably touches some other aspects of the codebase. One such example: it introduces `Hasura.App.AppContext`, named after `HasuraPro.Context.AppContext`: a name for the reader structure at the base level. It also transforms `Handler` from a type alias to a newtype, as `Handler` is where we actually enforce HTTP limits; but without `Handler` being a distinct type, any code path could simply do a `runExceptT $ runReader` and forget to enforce them.
(As a rule of thumb, i am starting to consider any straggling `runReaderT` or `runExceptT` as a code smell: we should not stack / unstack monads haphazardly, and every layer should be an opaque `newtype` with a corresponding run function.)
## Further work
In several places, i have left TODOs when i have encountered things that suggest that we should do further unrelated cleanups. I'll write down the follow-up steps, either in the aforementioned document or on slack. But, in short, at a glance, in approximate order, we could:
- delete `ExecutionCtx` as it is only a subset of `ServerCtx`, and remove one more `runReaderT` call
- delete `ServerConfigCtx` as it is only a subset of `ServerCtx`, and remove it from `RunCtx`
- remove `ServerCtx` from `HandlerCtx`, and make it part of `AppContext`, or even make it the `AppContext` altogether (since, at least for the OSS version, `AppContext` is there again only a subset)
- remove `CacheBuildParams` and `CacheBuild` altogether, as they're just a distinct stack that is a `ReaderT` on top of `IO` that contains, you guessed it, the same thing as `ServerCtx`
- move `RunT` out of `RQL.Types` and rename it, since after the previous cleanups **it only contains `UserInfo`**; it could be bundled with the authentication service, made a small implementation detail in `Hasura.Server.Auth`
- rename `PGMetadaStorageT` to something a bit more accurate, such as `App`, and enforce its IO base
This would significantly simply our complex stack. From there, or in parallel, we can start moving existing dependencies as Services. For the purpose of supporting read replicas entitlement, we could move `MonadResolveSource` to a `SourceResolver` service, as attempted in #7653, and transform `UserAuthenticationM` into a `Authentication` service.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7736
GitOrigin-RevId: 68cce710eb9e7d752bda1ba0c49541d24df8209f
2023-02-22 18:53:52 +03:00
HasResourceLimits m ,
Rewrite `Tracing` to allow for only one `TraceT` in the entire stack.
This PR is on top of #7789.
### Description
This PR entirely rewrites the API of the Tracing library, to make `interpTraceT` a thing of the past. Before this change, we ran traces by sticking a `TraceT` on top of whatever we were doing. This had several major drawbacks:
- we were carrying a bunch of `TraceT` across the codebase, and the entire codebase had to know about it
- we needed to carry a second class constraint around (`HasReporterM`) to be able to run all of those traces
- we kept having to do stack rewriting with `interpTraceT`, which went from inconvenient to horrible
- we had to declare several behavioral instances on `TraceT m`
This PR rewrite all of `Tracing` using a more conventional model: there is ONE `TraceT` at the bottom of the stack, and there is an associated class constraint `MonadTrace`: any part of the code that happens to satisfy `MonadTrace` is able to create new traces. We NEVER have to do stack rewriting, `interpTraceT` is gone, and `TraceT` and `Reporter` become implementation details that 99% of the code is blissfully unaware of: code that needs to do tracing only needs to declare that the monad in which it operates implements `MonadTrace`.
In doing so, this PR revealed **several bugs in the codebase**: places where we were expecting to trace something, but due to the default instance of `HasReporterM IO` we would actually not do anything. This PR also splits the code of `Tracing` in more byte-sized modules, with the goal of potentially moving to `server/lib` down the line.
### Remaining work
This PR is a draft; what's left to do is:
- [x] make Pro compile; i haven't updated `HasuraPro/Main` yet
- [x] document Tracing by writing a note that explains how to use the library, and the meaning of "reporter", "trace" and "span", as well as the pitfalls
- [x] discuss some of the trade-offs in the implementation, which is why i'm opening this PR already despite it not fully building yet
- [x] it depends on #7789 being merged first
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7791
GitOrigin-RevId: cadd32d039134c93ddbf364599a2f4dd988adea8
2023-03-13 20:37:16 +03:00
ProvidesNetwork m ,
2023-05-18 15:55:53 +03:00
Tracing . MonadTrace m ,
MonadGetPolicies m
2021-09-24 01:56:37 +03:00
) =>
HashSet ( L . EngineLogType L . Hasura ) ->
2023-03-30 19:31:50 +03:00
IO AuthMode ->
2023-03-17 13:29:07 +03:00
WSServerEnv impl ->
2021-09-24 01:56:37 +03:00
WSConn ->
LBS . ByteString ->
WS . WSActions WSConnData ->
2023-04-05 11:57:19 +03:00
Maybe ( CredentialCache AgentLicenseKey ) ->
2021-09-24 01:56:37 +03:00
m ()
2023-04-05 11:57:19 +03:00
onMessage enabledLogTypes authMode serverEnv wsConn msgRaw onMessageActions agentLicenseKey =
Rewrite `Tracing` to allow for only one `TraceT` in the entire stack.
This PR is on top of #7789.
### Description
This PR entirely rewrites the API of the Tracing library, to make `interpTraceT` a thing of the past. Before this change, we ran traces by sticking a `TraceT` on top of whatever we were doing. This had several major drawbacks:
- we were carrying a bunch of `TraceT` across the codebase, and the entire codebase had to know about it
- we needed to carry a second class constraint around (`HasReporterM`) to be able to run all of those traces
- we kept having to do stack rewriting with `interpTraceT`, which went from inconvenient to horrible
- we had to declare several behavioral instances on `TraceT m`
This PR rewrite all of `Tracing` using a more conventional model: there is ONE `TraceT` at the bottom of the stack, and there is an associated class constraint `MonadTrace`: any part of the code that happens to satisfy `MonadTrace` is able to create new traces. We NEVER have to do stack rewriting, `interpTraceT` is gone, and `TraceT` and `Reporter` become implementation details that 99% of the code is blissfully unaware of: code that needs to do tracing only needs to declare that the monad in which it operates implements `MonadTrace`.
In doing so, this PR revealed **several bugs in the codebase**: places where we were expecting to trace something, but due to the default instance of `HasReporterM IO` we would actually not do anything. This PR also splits the code of `Tracing` in more byte-sized modules, with the goal of potentially moving to `server/lib` down the line.
### Remaining work
This PR is a draft; what's left to do is:
- [x] make Pro compile; i haven't updated `HasuraPro/Main` yet
- [x] document Tracing by writing a note that explains how to use the library, and the meaning of "reporter", "trace" and "span", as well as the pitfalls
- [x] discuss some of the trade-offs in the implementation, which is why i'm opening this PR already despite it not fully building yet
- [x] it depends on #7789 being merged first
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7791
GitOrigin-RevId: cadd32d039134c93ddbf364599a2f4dd988adea8
2023-03-13 20:37:16 +03:00
Tracing . newTrace ( _wseTraceSamplingPolicy serverEnv ) " websocket " do
case J . eitherDecode msgRaw of
Left e -> do
let err = ConnErrMsg $ " parsing ClientMessage failed: " <> T . pack e
logWSEvent logger wsConn $ EConnErr err
liftIO $ onErrAction wsConn err WS . ClientMessageParseFailed
Right msg -> case msg of
-- common to both protocols
CMConnInit params ->
onConnInit
logger
( _wseHManager serverEnv )
wsConn
authMode
params
onErrAction
keepAliveMessageAction
CMStart startMsg -> do
2023-03-17 13:29:07 +03:00
schemaCache <- liftIO $ getSchemaCache $ _wseAppStateRef serverEnv
Rewrite `Tracing` to allow for only one `TraceT` in the entire stack.
This PR is on top of #7789.
### Description
This PR entirely rewrites the API of the Tracing library, to make `interpTraceT` a thing of the past. Before this change, we ran traces by sticking a `TraceT` on top of whatever we were doing. This had several major drawbacks:
- we were carrying a bunch of `TraceT` across the codebase, and the entire codebase had to know about it
- we needed to carry a second class constraint around (`HasReporterM`) to be able to run all of those traces
- we kept having to do stack rewriting with `interpTraceT`, which went from inconvenient to horrible
- we had to declare several behavioral instances on `TraceT m`
This PR rewrite all of `Tracing` using a more conventional model: there is ONE `TraceT` at the bottom of the stack, and there is an associated class constraint `MonadTrace`: any part of the code that happens to satisfy `MonadTrace` is able to create new traces. We NEVER have to do stack rewriting, `interpTraceT` is gone, and `TraceT` and `Reporter` become implementation details that 99% of the code is blissfully unaware of: code that needs to do tracing only needs to declare that the monad in which it operates implements `MonadTrace`.
In doing so, this PR revealed **several bugs in the codebase**: places where we were expecting to trace something, but due to the default instance of `HasReporterM IO` we would actually not do anything. This PR also splits the code of `Tracing` in more byte-sized modules, with the goal of potentially moving to `server/lib` down the line.
### Remaining work
This PR is a draft; what's left to do is:
- [x] make Pro compile; i haven't updated `HasuraPro/Main` yet
- [x] document Tracing by writing a note that explains how to use the library, and the meaning of "reporter", "trace" and "span", as well as the pitfalls
- [x] discuss some of the trade-offs in the implementation, which is why i'm opening this PR already despite it not fully building yet
- [x] it depends on #7789 being merged first
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7791
GitOrigin-RevId: cadd32d039134c93ddbf364599a2f4dd988adea8
2023-03-13 20:37:16 +03:00
let shouldCaptureVariables =
if _mcAnalyzeQueryVariables ( scMetricsConfig schemaCache )
then CaptureQueryVariables
else DoNotCaptureQueryVariables
2023-04-05 11:57:19 +03:00
onStart enabledLogTypes agentLicenseKey serverEnv wsConn shouldCaptureVariables startMsg onMessageActions
2023-05-18 15:55:53 +03:00
CMStop stopMsg -> do
granularPrometheusMetricsState <- runGetPrometheusMetricsGranularity
onStop serverEnv wsConn stopMsg granularPrometheusMetricsState
Rewrite `Tracing` to allow for only one `TraceT` in the entire stack.
This PR is on top of #7789.
### Description
This PR entirely rewrites the API of the Tracing library, to make `interpTraceT` a thing of the past. Before this change, we ran traces by sticking a `TraceT` on top of whatever we were doing. This had several major drawbacks:
- we were carrying a bunch of `TraceT` across the codebase, and the entire codebase had to know about it
- we needed to carry a second class constraint around (`HasReporterM`) to be able to run all of those traces
- we kept having to do stack rewriting with `interpTraceT`, which went from inconvenient to horrible
- we had to declare several behavioral instances on `TraceT m`
This PR rewrite all of `Tracing` using a more conventional model: there is ONE `TraceT` at the bottom of the stack, and there is an associated class constraint `MonadTrace`: any part of the code that happens to satisfy `MonadTrace` is able to create new traces. We NEVER have to do stack rewriting, `interpTraceT` is gone, and `TraceT` and `Reporter` become implementation details that 99% of the code is blissfully unaware of: code that needs to do tracing only needs to declare that the monad in which it operates implements `MonadTrace`.
In doing so, this PR revealed **several bugs in the codebase**: places where we were expecting to trace something, but due to the default instance of `HasReporterM IO` we would actually not do anything. This PR also splits the code of `Tracing` in more byte-sized modules, with the goal of potentially moving to `server/lib` down the line.
### Remaining work
This PR is a draft; what's left to do is:
- [x] make Pro compile; i haven't updated `HasuraPro/Main` yet
- [x] document Tracing by writing a note that explains how to use the library, and the meaning of "reporter", "trace" and "span", as well as the pitfalls
- [x] discuss some of the trade-offs in the implementation, which is why i'm opening this PR already despite it not fully building yet
- [x] it depends on #7789 being merged first
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7791
GitOrigin-RevId: cadd32d039134c93ddbf364599a2f4dd988adea8
2023-03-13 20:37:16 +03:00
-- specfic to graphql-ws
CMPing mPayload -> onPing wsConn mPayload
CMPong _mPayload -> pure ()
-- specific to apollo clients
CMConnTerm -> liftIO $ WS . closeConn wsConn " GQL_CONNECTION_TERMINATE received "
2018-07-20 10:22:46 +03:00
where
2018-10-25 12:37:57 +03:00
logger = _wseLogger serverEnv
2021-08-24 19:25:12 +03:00
onErrAction = WS . _wsaOnErrorMessageAction onMessageActions
keepAliveMessageAction = WS . _wsaKeepAliveAction onMessageActions
onPing :: ( MonadIO m ) => WSConn -> Maybe PingPongPayload -> m ()
onPing wsConn mPayload =
liftIO $ sendMsg wsConn ( SMPong mPayload )
2023-05-18 15:55:53 +03:00
onStop :: ( MonadIO m ) => WSServerEnv impl -> WSConn -> StopMsg -> IO GranularPrometheusMetricsState -> m ()
onStop serverEnv wsConn ( StopMsg opId ) granularPrometheusMetricsState = liftIO $ do
2020-04-08 18:59:46 +03:00
-- When a stop message is received for an operation, it may not be present in OpMap
-- in these cases:
-- 1. If the operation is a query/mutation - as we remove the operation from the
-- OpMap as soon as it is executed
-- 2. A misbehaving client
-- 3. A bug on our end
2023-05-24 16:51:56 +03:00
stopOperation serverEnv wsConn opId granularPrometheusMetricsState
$ L . unLogger logger
$ L . UnstructuredLog L . LevelDebug
$ fromString
$ " Received STOP for an operation that we have no record for: "
<> show ( unOperationId opId )
<> " (could be a query/mutation operation or a misbehaving client or a bug) "
2021-03-31 13:39:01 +03:00
where
logger = _wseLogger serverEnv
2023-05-18 15:55:53 +03:00
stopOperation :: WSServerEnv impl -> WSConn -> OperationId -> IO GranularPrometheusMetricsState -> IO () -> IO ()
stopOperation serverEnv wsConn opId granularPrometheusMetricsState logWhenOpNotExist = do
2018-07-20 10:22:46 +03:00
opM <- liftIO $ STM . atomically $ STMMap . lookup opId opMap
case opM of
2023-05-18 15:55:53 +03:00
Just ( subscriberDetails , operationName ) -> do
logWSEvent logger wsConn $ EOperation $ opDet operationName
2022-04-07 17:41:43 +03:00
case subscriberDetails of
LiveQuerySubscriber lqId ->
2023-05-18 15:55:53 +03:00
ES . removeLiveQuery logger ( _wseServerMetrics serverEnv ) ( _wsePrometheusMetrics serverEnv ) subscriptionState lqId granularPrometheusMetricsState operationName
2022-04-07 17:41:43 +03:00
StreamingQuerySubscriber streamSubscriberId ->
2023-05-18 15:55:53 +03:00
ES . removeStreamingQuery logger ( _wseServerMetrics serverEnv ) ( _wsePrometheusMetrics serverEnv ) subscriptionState streamSubscriberId granularPrometheusMetricsState operationName
2021-09-24 01:56:37 +03:00
Nothing -> logWhenOpNotExist
2018-07-20 10:22:46 +03:00
STM . atomically $ STMMap . delete opId opMap
where
2018-10-25 12:37:57 +03:00
logger = _wseLogger serverEnv
2022-04-07 17:41:43 +03:00
subscriptionState = _wseSubscriptionState serverEnv
2021-09-24 01:56:37 +03:00
opMap = _wscOpMap $ WS . getData wsConn
2021-09-06 15:26:45 +03:00
opDet n = OperationDetails opId Nothing n ODStopped Nothing Nothing
2018-07-20 10:22:46 +03:00
2021-09-24 01:56:37 +03:00
onConnInit ::
Rewrite `Tracing` to allow for only one `TraceT` in the entire stack.
This PR is on top of #7789.
### Description
This PR entirely rewrites the API of the Tracing library, to make `interpTraceT` a thing of the past. Before this change, we ran traces by sticking a `TraceT` on top of whatever we were doing. This had several major drawbacks:
- we were carrying a bunch of `TraceT` across the codebase, and the entire codebase had to know about it
- we needed to carry a second class constraint around (`HasReporterM`) to be able to run all of those traces
- we kept having to do stack rewriting with `interpTraceT`, which went from inconvenient to horrible
- we had to declare several behavioral instances on `TraceT m`
This PR rewrite all of `Tracing` using a more conventional model: there is ONE `TraceT` at the bottom of the stack, and there is an associated class constraint `MonadTrace`: any part of the code that happens to satisfy `MonadTrace` is able to create new traces. We NEVER have to do stack rewriting, `interpTraceT` is gone, and `TraceT` and `Reporter` become implementation details that 99% of the code is blissfully unaware of: code that needs to do tracing only needs to declare that the monad in which it operates implements `MonadTrace`.
In doing so, this PR revealed **several bugs in the codebase**: places where we were expecting to trace something, but due to the default instance of `HasReporterM IO` we would actually not do anything. This PR also splits the code of `Tracing` in more byte-sized modules, with the goal of potentially moving to `server/lib` down the line.
### Remaining work
This PR is a draft; what's left to do is:
- [x] make Pro compile; i haven't updated `HasuraPro/Main` yet
- [x] document Tracing by writing a note that explains how to use the library, and the meaning of "reporter", "trace" and "span", as well as the pitfalls
- [x] discuss some of the trade-offs in the implementation, which is why i'm opening this PR already despite it not fully building yet
- [x] it depends on #7789 being merged first
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7791
GitOrigin-RevId: cadd32d039134c93ddbf364599a2f4dd988adea8
2023-03-13 20:37:16 +03:00
( MonadIO m , UserAuthentication m ) =>
2021-09-24 01:56:37 +03:00
L . Logger L . Hasura ->
2022-02-16 10:08:51 +03:00
HTTP . Manager ->
2021-09-24 01:56:37 +03:00
WSConn ->
2023-03-30 19:31:50 +03:00
IO AuthMode ->
2021-09-24 01:56:37 +03:00
Maybe ConnParams ->
-- | this is the message handler for handling errors on initializing a from the client connection
WS . WSOnErrorMessageAction WSConnData ->
-- | this is the message handler for handling "keep-alive" messages to the client
WS . WSKeepAliveMessageAction WSConnData ->
Rewrite `Tracing` to allow for only one `TraceT` in the entire stack.
This PR is on top of #7789.
### Description
This PR entirely rewrites the API of the Tracing library, to make `interpTraceT` a thing of the past. Before this change, we ran traces by sticking a `TraceT` on top of whatever we were doing. This had several major drawbacks:
- we were carrying a bunch of `TraceT` across the codebase, and the entire codebase had to know about it
- we needed to carry a second class constraint around (`HasReporterM`) to be able to run all of those traces
- we kept having to do stack rewriting with `interpTraceT`, which went from inconvenient to horrible
- we had to declare several behavioral instances on `TraceT m`
This PR rewrite all of `Tracing` using a more conventional model: there is ONE `TraceT` at the bottom of the stack, and there is an associated class constraint `MonadTrace`: any part of the code that happens to satisfy `MonadTrace` is able to create new traces. We NEVER have to do stack rewriting, `interpTraceT` is gone, and `TraceT` and `Reporter` become implementation details that 99% of the code is blissfully unaware of: code that needs to do tracing only needs to declare that the monad in which it operates implements `MonadTrace`.
In doing so, this PR revealed **several bugs in the codebase**: places where we were expecting to trace something, but due to the default instance of `HasReporterM IO` we would actually not do anything. This PR also splits the code of `Tracing` in more byte-sized modules, with the goal of potentially moving to `server/lib` down the line.
### Remaining work
This PR is a draft; what's left to do is:
- [x] make Pro compile; i haven't updated `HasuraPro/Main` yet
- [x] document Tracing by writing a note that explains how to use the library, and the meaning of "reporter", "trace" and "span", as well as the pitfalls
- [x] discuss some of the trade-offs in the implementation, which is why i'm opening this PR already despite it not fully building yet
- [x] it depends on #7789 being merged first
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7791
GitOrigin-RevId: cadd32d039134c93ddbf364599a2f4dd988adea8
2023-03-13 20:37:16 +03:00
m ()
2023-03-30 19:31:50 +03:00
onConnInit logger manager wsConn getAuthMode connParamsM onConnInitErrAction keepAliveMessageAction = do
Rewrite GraphQL schema generation and query parsing (close #2801) (#4111)
Aka “the PDV refactor.” History is preserved on the branch 2801-graphql-schema-parser-refactor.
* [skip ci] remove stale benchmark commit from commit_diff
* [skip ci] Check for root field name conflicts between remotes
* [skip ci] Additionally check for conflicts between remotes and DB
* [skip ci] Check for conflicts in schema when tracking a table
* [skip ci] Fix equality checking in GraphQL AST
* server: fix mishandling of GeoJSON inputs in subscriptions (fix #3239) (#4551)
* Add support for multiple top-level fields in a subscription to improve testability of subscriptions
* Add an internal flag to enable multiple subscriptions
* Add missing call to withConstructorFn in live queries (fix #3239)
Co-authored-by: Alexis King <lexi.lambda@gmail.com>
* Scheduled triggers (close #1914) (#3553)
server: add scheduled triggers
Co-authored-by: Alexis King <lexi.lambda@gmail.com>
Co-authored-by: Marion Schleifer <marion@hasura.io>
Co-authored-by: Karthikeyan Chinnakonda <karthikeyan@hasura.io>
Co-authored-by: Aleksandra Sikora <ola.zxcvbnm@gmail.com>
* dev.sh: bump version due to addition of croniter python dependency
* server: fix an introspection query caching issue (fix #4547) (#4661)
Introspection queries accept variables, but we need to make sure to
also touch the variables that we ignore, so that an introspection
query is marked not reusable if we are not able to build a correct
query plan for it.
A better solution here would be to deal with such unused variables
correctly, so that more introspection queries become reusable.
An even better solution would be to type-safely track *how* to reuse
which variables, rather than to split the reusage marking from the
planning.
Co-authored-by: Tirumarai Selvan <tiru@hasura.io>
* flush log buffer on exception in mkWaiApp ( fix #4772 ) (#4801)
* flush log buffer on exception in mkWaiApp
* add comment to explain the introduced change
* add changelog
* allow logging details of a live query polling thread (#4959)
* changes for poller-log
add various multiplexed query info in poller-log
* minor cleanup, also fixes a bug which will return duplicate data
* Live query poller stats can now be logged
This also removes in-memory stats that are collected about batched
query execution as the log lines when piped into an monitoring tool
will give us better insights.
* allow poller-log to be configurable
* log minimal information in the livequery-poller-log
Other information can be retrieved from /dev/subscriptions/extended
* fix few review comments
* avoid marshalling and unmarshalling from ByteString to EncJSON
* separate out SubscriberId and SubscriberMetadata
Co-authored-by: Anon Ray <rayanon004@gmail.com>
* Don't compile in developer APIs by default
* Tighten up handling of admin secret, more docs
Store the admin secret only as a hash to prevent leaking the secret
inadvertently, and to prevent timing attacks on the secret.
NOTE: best practice for stored user passwords is a function with a
tunable cost like bcrypt, but our threat model is quite different (even
if we thought we could reasonably protect the secret from an attacker
who could read arbitrary regions of memory), and bcrypt is far too slow
(by design) to perform on each request. We'd have to rely on our
(technically savvy) users to choose high entropy passwords in any case.
Referencing #4736
* server/docs: add instructions to fix loss of float precision in PostgreSQL <= 11 (#5187)
This adds a server flag, --pg-connection-options, that can be used to set a PostgreSQL connection parameter, extra_float_digits, that needs to be used to avoid loss of data on older versions of PostgreSQL, which have odd default behavior when returning float values. (fixes #5092)
* [skip ci] Add new commits from master to the commit diff
* [skip ci] serve default directives (skip & include) over introspection
* [skip ci] Update non-Haskell assets with the version on master
* server: refactor GQL execution check and config API (#5094)
Co-authored-by: Vamshi Surabhi <vamshi@hasura.io>
Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com>
* [skip ci] fix js issues in tests by pinning dependencies version
* [skip ci] bump graphql version
* [skip ci] Add note about memory usage
* generalize query execution logic on Postgres (#5110)
* generalize PGExecCtx to support specialized functions for various operations
* fix tests compilation
* allow customising PGExecCtx when starting the web server
* server: changes catalog initialization and logging for pro customization (#5139)
* new typeclass to abstract the logic of QueryLog-ing
* abstract the logic of logging websocket-server logs
introduce a MonadWSLog typeclass
* move catalog initialization to init step
expose a helper function to migrate catalog
create schema cache in initialiseCtx
* expose various modules and functions for pro
* [skip ci] cosmetic change
* [skip ci] fix test calling a mutation that does not exist
* [skip ci] minor text change
* [skip ci] refactored input values
* [skip ci] remove VString Origin
* server: fix updating of headers behaviour in the update cron trigger API and create future events immediately (#5151)
* server: fix bug to update headers in an existing cron trigger and create future events
Co-authored-by: Tirumarai Selvan <tiru@hasura.io>
* Lower stack chunk size in RTS to reduce thread STACK memory (closes #5190)
This reduces memory consumption for new idle subscriptions significantly
(see linked ticket).
The hypothesis is: we fork a lot of threads per websocket, and some of
these use slightly more than the initial 1K stack size, so the first
overflow balloons to 32K, when significantly less is required.
However: running with `+RTS -K1K -xc` did not seem to show evidence of
any overflows! So it's a mystery why this improves things.
GHC should probably also be doubling the stack buffer at each overflow
or doing something even smarter; the knobs we have aren't so helpful.
* [skip ci] fix todo and schema generation for aggregate fields
* 5087 libpq pool leak (#5089)
Shrink libpq buffers to 1MB before returning connection to pool. Closes #5087
See: https://github.com/hasura/pg-client-hs/pull/19
Also related: #3388 #4077
* bump pg-client-hs version (fixes a build issue on some environments) (#5267)
* do not use prepared statements for mutations
* server: unlock scheduled events on graceful shutdown (#4928)
* Fix buggy parsing of new --conn-lifetime flag in 2b0e3774
* [skip ci] remove cherry-picked commit from commit_diff.txt
* server: include additional fields in scheduled trigger webhook payload (#5262)
* include scheduled triggers metadata in the webhook body
Co-authored-by: Tirumarai Selvan <tiru@hasura.io>
* server: call the webhook asynchronously in event triggers (#5352)
* server: call the webhook asynchronosly in event triggers
* Expose all modules in Cabal file (#5371)
* [skip ci] update commit_diff.txt
* [skip ci] fix cast exp parser & few TODOs
* [skip ci] fix remote fields arguments
* [skip ci] fix few more TODO, no-op refactor, move resolve/action.hs to execute/action.hs
* Pass environment variables around as a data structure, via @sordina (#5374)
* Pass environment variables around as a data structure, via @sordina
* Resolving build error
* Adding Environment passing note to changelog
* Removing references to ILTPollerLog as this seems to have been reintroduced from a bad merge
* removing commented-out imports
* Language pragmas already set by project
* Linking async thread
* Apply suggestions from code review
Use `runQueryTx` instead of `runLazyTx` for queries.
* remove the non-user facing entry in the changelog
Co-authored-by: Phil Freeman <paf31@cantab.net>
Co-authored-by: Phil Freeman <phil@hasura.io>
Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com>
* [skip ci] fix: restrict remote relationship field generation for hasura queries
* [skip ci] no-op refactor; move insert execution code from schema parser module
* server: call the webhook asynchronously in event triggers (#5352)
* server: call the webhook asynchronosly in event triggers
* Expose all modules in Cabal file (#5371)
* [skip ci] update commit_diff.txt
* Pass environment variables around as a data structure, via @sordina (#5374)
* Pass environment variables around as a data structure, via @sordina
* Resolving build error
* Adding Environment passing note to changelog
* Removing references to ILTPollerLog as this seems to have been reintroduced from a bad merge
* removing commented-out imports
* Language pragmas already set by project
* Linking async thread
* Apply suggestions from code review
Use `runQueryTx` instead of `runLazyTx` for queries.
* remove the non-user facing entry in the changelog
Co-authored-by: Phil Freeman <paf31@cantab.net>
Co-authored-by: Phil Freeman <phil@hasura.io>
Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com>
* [skip ci] implement header checking
Probably closes #14 and #3659.
* server: refactor 'pollQuery' to have a hook to process 'PollDetails' (#5391)
Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com>
* update pg-client (#5421)
* [skip ci] update commit_diff
* Fix latency buckets for telemetry data
These must have gotten messed up during a refactor. As a consequence
almost all samples received so far fall into the single erroneous 0 to
1K seconds (originally supposed to be 1ms?) bucket.
I also re-thought what the numbers should be, but these are still
arbitrary and might want adjusting in the future.
* [skip ci] include the latest commit compared against master in commit_diff
* [skip ci] include new commits from master in commit_diff
* [skip ci] improve description generation
* [skip ci] sort all introspect arrays
* [skip ci] allow parsers to specify error codes
* [skip ci] fix integer and float parsing error code
* [skip ci] scalar from json errors are now parse errors
* [skip ci] fixed negative integer error message and code
* [skip ci] Re-fix nullability in relationships
* [skip ci] no-op refactor and removed couple of FIXMEs
* [skip ci] uncomment code in 'deleteMetadataObject'
* [skip ci] Fix re-fix of nullability for relationships
* [skip ci] fix default arguments error code
* [skip ci] updated test error message
!!! WARNING !!!
Since all fields accept `null`, they all are technically optional in
the new schema. Meaning there's no such thing as a missing mandatory
field anymore: a field that doesn't have a default value, and which
therefore isn't labelled as "optional" in the schema, will be assumed
to be null if it's missing, meaning it isn't possible anymore to have
an error for a missing mandatory field. The only possible error is now
when a optional positional argument is omitted but is not the last
positional argument.
* [skip ci] cleanup of int scalar parser
* [skip ci] retro-compatibility of offset as string
* [skip ci] Remove commit from commit_diff.txt
Although strictly speaking we don't know if this will work correctly in PDV
if we would implement query plan caching, the fact is that in the theoretical
case that we would have the same issue in PDV, it would probably apply not just
to introspection, and the fix would be written completely differently. So this
old commit is of no value to us other than the heads-up "make sure query plan
caching works correctly even in the presence of unused variables", which is
already part of the test suite.
* Add MonadTrace and MonadExecuteQuery abstractions (#5383)
* [skip ci] Fix accumulation of input object types
Just like object types, interface types, and union types, we have to avoid
circularities when collecting input types from the GraphQL AST.
Additionally, this fixes equality checks for input object types (whose fields
are unordered, and hence should be compared as sets) and enum types (ditto).
* [skip ci] fix fragment error path
* [skip ci] fix node error code
* [skip ci] fix paths in insert queries
* [skip ci] fix path in objects
* [skip ci] manually alter node id path for consistency
* [skip ci] more node error fixups
* [skip ci] one last relay error message fix
* [skip ci] update commit_diff
* Propagate the trace context to event triggers (#5409)
* Propagate the trace context to event triggers
* Handle missing trace and span IDs
* Store trace context as one LOCAL
* Add migrations
* Documentation
* changelog
* Fix warnings
* Respond to code review suggestions
* Respond to code review
* Undo changelog
* Update CHANGELOG.md
Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com>
* server: log request/response sizes for event triggers (#5463)
* server: log request/response sizes for event triggers
event triggers (and scheduled triggers) now have request/response size
in their logs.
* add changelog entry
* Tracing: Simplify HTTP traced request (#5451)
Remove the Inversion of Control (SuspendRequest) and simplify
the tracing of HTTP Requests.
Co-authored-by: Phil Freeman <phil@hasura.io>
* Attach request ID as tracing metadata (#5456)
* Propagate the trace context to event triggers
* Handle missing trace and span IDs
* Store trace context as one LOCAL
* Add migrations
* Documentation
* Include the request ID as trace metadata
* changelog
* Fix warnings
* Respond to code review suggestions
* Respond to code review
* Undo changelog
* Update CHANGELOG.md
* Typo
Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com>
* server: add logging for action handlers (#5471)
* server: add logging for action handlers
* add changelog entry
* change action-handler log type from internal to non-internal
* fix action-handler-log name
* server: pass http and websocket request to logging context (#5470)
* pass request body to logging context in all cases
* add message size logging on the websocket API
this is required by graphql-engine-pro/#416
* message size logging on websocket API
As we need to log all messages recieved/sent by the websocket server,
it makes sense to log them as part of the websocket server event logs.
Previously message recieved were logged inside the onMessage handler,
and messages sent were logged only for "data" messages (as a server event log)
* fix review comments
Co-authored-by: Phil Freeman <phil@hasura.io>
* server: stop eventing subsystem threads when shutting down (#5479)
* server: stop eventing subsystem threads when shutting down
* Apply suggestions from code review
Co-authored-by: Karthikeyan Chinnakonda <chkarthikeyan95@gmail.com>
Co-authored-by: Phil Freeman <phil@hasura.io>
Co-authored-by: Phil Freeman <paf31@cantab.net>
Co-authored-by: Karthikeyan Chinnakonda <chkarthikeyan95@gmail.com>
* [skip ci] update commit_diff with new commits added in master
* Bugfix to support 0-size HASURA_GRAPHQL_QUERY_PLAN_CACHE_SIZE
Also some minor refactoring of bounded cache module:
- the maxBound check in `trim` was confusing and unnecessary
- consequently trim was unnecessary for lookupPure
Also add some basic tests
* Support only the bounded cache, with default HASURA_GRAPHQL_QUERY_PLAN_CACHE_SIZE of 4000. Closes #5363
* [skip ci] remove merge commit from commit_diff
* server: Fix compiler warning caused by GHC upgrade (#5489)
Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com>
* [skip ci] update all non server code from master
* [skip ci] aligned object field error message with master
* [skip ci] fix remaining undefined?
* [skip ci] remove unused import
* [skip ci] revert to previous error message, fix tests
* Move nullableType/nonNullableType to Schema.hs
These are functions on Types, not on Parsers.
* [skip ci] fix setup to fix backend only test
the order in which permission checks are performed on the branch is
slightly different than on master, resulting in a slightly different
error if there are no other mutations the user has access to. By
adding update permissions, we go back to the expected case.
* [skip ci] fix insert geojson tests to reflect new paths
* [skip ci] fix enum test for better error message
* [skip ci] fix header test for better error message
* [skip ci] fix fragment cycle test for better error message
* [skip ci] fix error message for type mismatch
* [skip ci] fix variable path in test
* [skip ci] adjust tests after bug fix
* [skip ci] more tests fixing
* Add hdb_catalog.current_setting abstraction for reading Hasura settings
As the comment in the function’s definition explains, this is needed to
work around an awkward Postgres behavior.
* [skip ci] Update CONTRIBUTING.md to mention Node setup for Python tests
* [skip ci] Add missing Python tests env var to CONTRIBUTING.md
* [skip ci] fix order of result when subscription is run with multiple nodes
* [skip ci] no-op refactor: fix a warning in Internal/Parser.hs
* [skip ci] throw error when a subscription contains remote joins
* [skip ci] Enable easier profiling by hiding AssertNF behind a flag
In order to compile a profiling build, run:
$ cabal new-build -f profiling --enable-profiling
* [skip ci] Fix two warnings
We used to lookup the objects that implement a given interface by filtering all
objects in the schema document. However, one of the tests expects us to
generate a warning if the provided `implements` field of an introspection query
specifies an object not implementing some interface. So we use that field
instead.
* [skip ci] Fix warnings by commenting out query plan caching
* [skip ci] improve masking/commenting query caching related code & few warning fixes
* [skip ci] Fixed compiler warnings in graphql-parser-hs
* Sync non-Haskell assets with master
* [skip ci] add a test inserting invalid GraphQL but valid JSON value in a jsonb column
* [skip ci] Avoid converting to/from Map
* [skip ci] Apply some hlint suggestions
* [skip ci] remove redundant constraints from buildLiveQueryPlan and explainGQLQuery
* [skip ci] add NOTEs about missing Tracing constraints in PDV from master
* Remove -fdefer-typed-holes, fix warnings
* Update cabal.project.freeze
* Limit GHC’s heap size to 8GB in CI to avoid the OOM killer
* Commit package-lock.json for Python tests’ remote schema server
* restrict env variables start with HASURA_GRAPHQL_ for headers configuration in actions, event triggers & remote schemas (#5519)
* restrict env variables start with HASURA_GRAPHQL_ for headers definition in actions & event triggers
* update CHANGELOG.md
* Apply suggestions from code review
Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com>
* add test for table_by_pk node when roles doesn't have permission to PK
* [skip ci] fix introspection query if any enum column present in primary key (fix #5200) (#5522)
* [skip ci] test case fix for a6450e126bc2d98bcfd3791501986e4627ce6c6f
* [skip ci] add tests to agg queries when role doesn't have access to any cols
* fix backend test
* Simplify subscription execution
* [skip ci] add test to check if required headers are present while querying
* Suppose, table B is related to table A and to query B certain headers are
necessary, then the test checks that we are throwing error when the header
is not set when B is queried through A
* fix mutations not checking for view mutability
* [skip ci] add variable type checking and corresponding tests
* [skip ci] add test to check if update headers are present while doing an upsert
* [skip ci] add positive counterparts to some of the negative permission tests
* fix args missing their description in introspect
* [skip ci] Remove unused function; insert missing markNotReusable call
* [skip ci] Add a Note about InputValue
* [skip ci] Delete LegacySchema/ 🎉
* [skip ci] Delete GraphQL/{Resolve,Validate}/ 🎉
* [skip ci] Delete top-level Resolve/Validate modules; tidy .cabal file
* [skip ci] Delete LegacySchema top-level module
Somehow I missed this one.
* fix input value to json
* [skip ci] elaborate on JSON objects in GraphQL
* [skip ci] add missing file
* [skip ci] add a test with subscription containing remote joins
* add a test with remote joins in mutation output
* [skip ci] Add some comments to Schema/Mutation.hs
* [skip ci] Remove no longer needed code from RemoteServer.hs
* [skip ci] Use a helper function to generate conflict clause parsers
* [skip ci] fix type checker error in fields with default value
* capitalize the header keys in select_articles_without_required_headers
* Somehow, this was the reason the tests were failing. I have no idea, why!
* [skip ci] Add a long Note about optional fields and nullability
* Improve comments a bit; simplify Schema/Common.hs a bit
* [skip ci] full implementation of 5.8.5 type checking.
* [skip ci] fix validation test teardown
* [skip ci] fix schema stitching test
* fix remote schema ignoring enum nullability
* [skip ci] fix fieldOptional to not discard nullability
* revert nullability of use_spheroid
* fix comment
* add required remote fields with arguments for tests
* [skip ci] add missing docstrings
* [skip ci] fixed description of remote fields
* [skip ci] change docstring for consistency
* fix several schema inconsistencies
* revert behaviour change in function arguments parsing
* fix remaining nullability issues in new schema
* minor no-op refactor; use isListType from graphql-parser-hs
* use nullability of remote schema node, while creating a Remote reln
* fix 'ID' input coercing & action 'ID' type relationship mapping
* include ASTs in MonadExecuteQuery
* needed for PRO code-base
* Delete code for "interfaces implementing ifaces" (draft GraphQL spec)
Previously I started writing some code that adds support for a future GraphQL
feature where interfaces may themselves be sub-types of other interfaces.
However, this code was incomplete, and partially incorrect. So this commit
deletes support for that entirely.
* Ignore a remote schema test during the upgrade/downgrade test
The PDV refactor does a better job at exposing a minimal set of types through
introspection. In particular, not every type that is present in a remote schema
is re-exposed by Hasura. The test
test_schema_stitching.py::TestRemoteSchemaBasic::test_introspection assumed that
all types were re-exposed, which is not required for GraphQL compatibility, in
order to test some aspect of our support for remote schemas.
So while this particular test has been updated on PDV, the PDV branch now does
not pass the old test, which we argue to be incorrect. Hence this test is
disabled while we await a release, after which we can re-enable it.
This also re-enables a test that was previously disabled for similar, though
unrelated, reasons.
* add haddock documentation to the action's field parsers
* Deslecting some tests in server-upgrade
Some tests with current build are failing on server upgrade
which it should not. The response is more accurate than
what it was.
Also the upgrade tests were not throwing errors when the test is
expected to return an error, but succeeds. The test framework is
patched to catch this case.
* [skip ci] Add a long Note about interfaces and object types
* send the response headers back to client after running a query
* Deselect a few more tests during upgrade/downgrade test
* Update commit_diff.txt
* change log kind from db_migrate to catalog_migrate (#5531)
* Show method and complete URI in traced HTTP calls (#5525)
Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com>
* restrict env variables start with HASURA_GRAPHQL_ for headers configuration in actions, event triggers & remote schemas (#5519)
* restrict env variables start with HASURA_GRAPHQL_ for headers definition in actions & event triggers
* update CHANGELOG.md
* Apply suggestions from code review
Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com>
* fix introspection query if any enum column present in primary key (fix #5200) (#5522)
* Fix telemetry reporting of transport (websocket was reported as http)
* add log kinds in cli-migrations image (#5529)
* add log kinds in cli-migrations image
* give hint to resolve timeout error
* minor changes and CHANGELOG
* server: set hasura.tracecontext in RQL mutations [#5542] (#5555)
* server: set hasura.tracecontext in RQL mutations [#5542]
* Update test suite
Co-authored-by: Tirumarai Selvan <tiru@hasura.io>
* Add bulldozer auto-merge and -update configuration
We still need to add the github app (as of time of opening this PR)
Afterwards devs should be able to allow bulldozer to automatically
"update" the branch, merging in parent when it changes, as well as
automatically merge when all checks pass.
This is opt-in by adding the `auto-update-auto-merge` label to the PR.
* Remove 'bulldozer' config, try 'kodiak' for auto-merge
see: https://github.com/chdsbd/kodiak
The main issue that bit us was not being able to auto update forked
branches, also:
https://github.com/palantir/bulldozer/issues/66
https://github.com/palantir/bulldozer/issues/145
* Cherry-picked all commits
* [skip ci] Slightly improve formatting
* Revert "fix introspection query if any enum column present in primary key (fix #5200) (#5522)"
This reverts commit 0f9a5afa59a88f6824f4d63d58db246a5ba3fb03.
This undoes a cherry-pick of 34288e1eb5f2c5dad9e6d1e05453dd52397dc970 that was
already done previously in a6450e126bc2d98bcfd3791501986e4627ce6c6f, and
subsequently fixed for PDV in 70e89dc250f8ddc6e2b7930bbe2b3eeaa6dbe1db
* Do a small bit of tidying in Hasura.GraphQL.Parser.Collect
* Fix cherry-picking work
Some previous cherry-picks ended up modifying code that is commented out
* [skip ci] clarified comment regarding insert representation
* [skip ci] removed obsolete todos
* cosmetic change
* fix action error message
* [skip ci] remove obsolete comment
* [skip ci] synchronize stylish haskell extensions list
* use previously defined scalar names in parsers rather than ad-hoc literals
* Apply most syntax hlint hints.
* Clarify comment on update mutation.
* [skip ci] Clarify what fields should be specified for objects
* Update "_inc" description.
* Use record types rather than tuples fo IntrospectionResult and ParsedIntrospection
* Get rid of checkFieldNamesUnique (use Data.List.Extended.duplicates)
* Throw more errors when collecting query root names
* [skip ci] clean column parser comment
* Remove dead code inserted in ab65b39
* avoid converting to non-empty list where not needed
* add note and TODO about the disabled checks in PDV
* minor refactor in remoteField' function
* Unify two getObject methods
* Nitpicks in Remote.hs
* Update CHANGELOG.md
* Revert "Unify two getObject methods"
This reverts commit bd6bb40355b3d189a46c0312eb52225e18be57b3.
We do need two different getObject functions as the corresponding error message is different
* Fix error message in Remote.hs
* Update CHANGELOG.md
Co-authored-by: Auke Booij <auke@tulcod.com>
* Apply suggested Changelog fix.
Co-authored-by: Auke Booij <auke@tulcod.com>
* Fix typo in Changelog.
* [skip ci] Update changelog.
* reuse type names to avoid duplication
* Fix Hashable instance for Definition
The presence of `Maybe Unique`, and an optional description, as part of
`Definition`s, means that `Definition`s that are considered `Eq`ual may get
different hashes. This can happen, for instance, when one object is memoized
but another is not.
* [skip ci] Update commit_diff.txt
* Bump parser version.
* Bump freeze file after changes in parser.
* [skip ci] Incorporate commits from master
* Fix developer flag in server/cabal.project.freeze
Co-authored-by: Auke Booij <auke@tulcod.com>
* Deselect a changed ENUM test for upgrade/downgrade CI
* Deselect test here as well
* [skip ci] remove dead code
* Disable more tests for upgrade/downgrade
* Fix which test gets deselected
* Revert "Add hdb_catalog.current_setting abstraction for reading Hasura settings"
This reverts commit 66e85ab9fbd56cca2c28a80201f6604fbe811b85.
* Remove circular reference in cabal.project.freeze
Co-authored-by: Karthikeyan Chinnakonda <karthikeyan@hasura.io>
Co-authored-by: Auke Booij <auke@hasura.io>
Co-authored-by: Tirumarai Selvan <tiru@hasura.io>
Co-authored-by: Marion Schleifer <marion@hasura.io>
Co-authored-by: Aleksandra Sikora <ola.zxcvbnm@gmail.com>
Co-authored-by: Brandon Simmons <brandon.m.simmons@gmail.com>
Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com>
Co-authored-by: Anon Ray <rayanon004@gmail.com>
Co-authored-by: rakeshkky <12475069+rakeshkky@users.noreply.github.com>
Co-authored-by: Anon Ray <ecthiender@users.noreply.github.com>
Co-authored-by: Vamshi Surabhi <vamshi@hasura.io>
Co-authored-by: Antoine Leblanc <antoine@hasura.io>
Co-authored-by: Brandon Simmons <brandon@hasura.io>
Co-authored-by: Phil Freeman <phil@hasura.io>
Co-authored-by: Lyndon Maydwell <lyndon@sordina.net>
Co-authored-by: Phil Freeman <paf31@cantab.net>
Co-authored-by: Naveen Naidu <naveennaidu479@gmail.com>
Co-authored-by: Karthikeyan Chinnakonda <chkarthikeyan95@gmail.com>
Co-authored-by: Nizar Malangadan <nizar-m@users.noreply.github.com>
Co-authored-by: Antoine Leblanc <crucuny@gmail.com>
Co-authored-by: Auke Booij <auke@tulcod.com>
2020-08-21 20:27:01 +03:00
-- TODO(from master): what should be the behaviour of connection_init message when a
2020-06-16 18:23:06 +03:00
-- connection is already iniatilized? Currently, we seem to be doing
-- something arbitrary which isn't correct. Ideally, we should stick to
-- this:
--
-- > Allow connection_init message only when the connection state is
-- 'not initialised'. This means that there is no reason for the
-- connection to be in `CSInitError` state.
connState <- liftIO ( STM . readTVarIO ( _wscUser $ WS . getData wsConn ) )
2023-03-30 19:31:50 +03:00
authMode <- liftIO $ getAuthMode
2020-06-16 18:23:06 +03:00
case getIpAddress connState of
Left err -> unexpectedInitError err
Right ipAddress -> do
let headers = mkHeaders connState
2021-02-03 10:10:39 +03:00
res <- resolveUserInfo logger manager headers authMode Nothing
2021-11-09 15:00:21 +03:00
2020-06-16 18:23:06 +03:00
case res of
2020-07-14 22:00:58 +03:00
Left e -> do
2020-06-16 18:23:06 +03:00
let ! initErr = CSInitError $ qeError e
liftIO $ do
2021-09-24 01:56:37 +03:00
$ assertNFHere initErr -- so we don't write thunks to mutable vars
2020-06-16 18:23:06 +03:00
STM . atomically $ STM . writeTVar ( _wscUser $ WS . getData wsConn ) initErr
let connErr = ConnErrMsg $ qeError e
logWSEvent logger wsConn $ EConnErr connErr
2022-10-13 12:32:33 +03:00
liftIO $ onConnInitErrAction wsConn connErr WS . ConnInitFailed
2021-11-09 15:00:21 +03:00
-- we're ignoring the auth headers as headers are irrelevant in websockets
2022-12-15 10:48:18 +03:00
Right ( userInfo , expTimeM , _authHeaders , _ ) -> do
2020-07-14 22:00:58 +03:00
let ! csInit = CSInitialised $ WsClientState userInfo expTimeM paramHeaders ipAddress
2020-06-16 18:23:06 +03:00
liftIO $ do
2021-09-24 01:56:37 +03:00
$ assertNFHere csInit -- so we don't write thunks to mutable vars
2020-06-16 18:23:06 +03:00
STM . atomically $ STM . writeTVar ( _wscUser $ WS . getData wsConn ) csInit
sendMsg wsConn SMConnAck
2021-08-24 19:25:12 +03:00
liftIO $ keepAliveMessageAction wsConn
2020-06-16 18:23:06 +03:00
where
unexpectedInitError e = do
let connErr = ConnErrMsg e
2018-10-25 12:37:57 +03:00
logWSEvent logger wsConn $ EConnErr connErr
2022-10-13 12:32:33 +03:00
liftIO $ onConnInitErrAction wsConn connErr WS . ConnInitFailed
2020-06-16 18:23:06 +03:00
getIpAddress = \ case
2021-09-24 01:56:37 +03:00
CSNotInitialised _ ip -> return ip
CSInitialised WsClientState { .. } -> return wscsIpAddress
CSInitError e -> Left e
2020-06-16 18:23:06 +03:00
2019-03-04 10:46:53 +03:00
mkHeaders st =
paramHeaders ++ getClientHdrs st
paramHeaders =
[ ( CI . mk $ TE . encodeUtf8 h , TE . encodeUtf8 v )
2023-04-26 18:42:13 +03:00
| ( h , v ) <- maybe [] HashMap . toList $ connParamsM >>= _cpHeaders
2019-03-04 10:46:53 +03:00
]
getClientHdrs st = case st of
2020-06-16 18:23:06 +03:00
CSNotInitialised h _ -> unWsHeaders h
2021-09-24 01:56:37 +03:00
_ -> []
onClose ::
2023-05-24 16:51:56 +03:00
( MonadIO m ) =>
2021-09-24 01:56:37 +03:00
L . Logger L . Hasura ->
ServerMetrics ->
2022-07-24 00:18:01 +03:00
PrometheusMetrics ->
2022-03-21 13:39:49 +03:00
ES . SubscriptionsState ->
2021-09-24 01:56:37 +03:00
WSConn ->
2023-05-18 15:55:53 +03:00
IO GranularPrometheusMetricsState ->
2021-09-24 01:56:37 +03:00
m ()
2023-05-18 15:55:53 +03:00
onClose logger serverMetrics prometheusMetrics subscriptionsState wsConn granularPrometheusMetricsState = do
2018-10-25 12:37:57 +03:00
logWSEvent logger wsConn EClosed
2019-12-11 04:04:49 +03:00
operations <- liftIO $ STM . atomically $ ListT . toList $ STMMap . listT opMap
2023-05-24 16:51:56 +03:00
liftIO
$ for_ operations
$ \ ( _ , ( subscriber , operationName ) ) ->
2022-04-07 17:41:43 +03:00
case subscriber of
2023-05-18 15:55:53 +03:00
LiveQuerySubscriber lqId -> ES . removeLiveQuery logger serverMetrics prometheusMetrics subscriptionsState lqId granularPrometheusMetricsState operationName
StreamingQuerySubscriber streamSubscriberId -> ES . removeStreamingQuery logger serverMetrics prometheusMetrics subscriptionsState streamSubscriberId granularPrometheusMetricsState operationName
2018-07-20 10:22:46 +03:00
where
opMap = _wscOpMap $ WS . getData wsConn
2023-06-13 12:22:36 +03:00
newtype WebsocketCloseOnMetadataChangeAction = WebsocketCloseOnMetadataChangeAction
{ runWebsocketCloseOnMetadataChangeAction :: IO ()
}
-- | By default, we close all the websocket connections when the metadata changes. This function is used to create the
-- action that will be run when the metadata changes.
mkCloseWebsocketsOnMetadataChangeAction :: WS . WSServer WS . WSConnData -> WebsocketCloseOnMetadataChangeAction
mkCloseWebsocketsOnMetadataChangeAction wsServer =
WebsocketCloseOnMetadataChangeAction
$ WS . closeAllConnectionsWithReason
wsServer
" Closing all websocket connections as the metadata has changed "
" Server state changed, restarting the server "
id