2021-09-24 01:56:37 +03:00
|
|
|
|
{-# LANGUAGE Arrows #-}
|
|
|
|
|
{-# LANGUAGE OverloadedLabels #-}
|
2020-12-28 15:56:00 +03:00
|
|
|
|
{-# LANGUAGE UndecidableInstances #-}
|
2019-11-20 21:21:30 +03:00
|
|
|
|
|
2021-09-24 01:56:37 +03:00
|
|
|
|
-- | Top-level functions concerned specifically with operations on the schema cache, such as
|
|
|
|
|
-- rebuilding it from the catalog and incorporating schema changes. See the module documentation for
|
|
|
|
|
-- "Hasura.RQL.DDL.Schema" for more details.
|
|
|
|
|
--
|
|
|
|
|
-- __Note__: this module is __mutually recursive__ with other @Hasura.RQL.DDL.Schema.*@ modules, which
|
|
|
|
|
-- both define pieces of the implementation of building the schema cache and define handlers that
|
|
|
|
|
-- trigger schema cache rebuilds.
|
2019-08-14 02:34:37 +03:00
|
|
|
|
module Hasura.RQL.DDL.Schema.Cache
|
2021-09-24 01:56:37 +03:00
|
|
|
|
( RebuildableSchemaCache,
|
|
|
|
|
lastBuiltSchemaCache,
|
|
|
|
|
buildRebuildableSchemaCache,
|
|
|
|
|
buildRebuildableSchemaCacheWithReason,
|
|
|
|
|
CacheRWT,
|
|
|
|
|
runCacheRWT,
|
|
|
|
|
mkBooleanPermissionMap,
|
|
|
|
|
)
|
|
|
|
|
where
|
|
|
|
|
|
|
|
|
|
import Control.Arrow.Extended
|
2022-11-29 04:00:28 +03:00
|
|
|
|
import Control.Arrow.Interpret
|
2021-09-24 01:56:37 +03:00
|
|
|
|
import Control.Lens hiding ((.=))
|
|
|
|
|
import Control.Monad.Trans.Control (MonadBaseControl)
|
|
|
|
|
import Control.Retry qualified as Retry
|
|
|
|
|
import Data.Aeson
|
|
|
|
|
import Data.Either (isLeft)
|
|
|
|
|
import Data.Environment qualified as Env
|
|
|
|
|
import Data.HashMap.Strict.Extended qualified as M
|
2022-03-03 23:12:09 +03:00
|
|
|
|
import Data.HashMap.Strict.InsOrd.Extended qualified as OMap
|
2021-09-24 01:56:37 +03:00
|
|
|
|
import Data.HashSet qualified as HS
|
|
|
|
|
import Data.Proxy
|
|
|
|
|
import Data.Set qualified as S
|
|
|
|
|
import Data.Text.Extended
|
|
|
|
|
import Hasura.Base.Error
|
|
|
|
|
import Hasura.GraphQL.Schema (buildGQLContext)
|
2022-07-12 17:00:15 +03:00
|
|
|
|
import Hasura.GraphQL.Schema.NamingCase
|
2021-09-24 01:56:37 +03:00
|
|
|
|
import Hasura.Incremental qualified as Inc
|
2021-11-09 17:21:48 +03:00
|
|
|
|
import Hasura.Logging
|
2021-09-24 01:56:37 +03:00
|
|
|
|
import Hasura.Metadata.Class
|
|
|
|
|
import Hasura.Prelude
|
|
|
|
|
import Hasura.RQL.DDL.Action
|
|
|
|
|
import Hasura.RQL.DDL.CustomTypes
|
2022-09-13 11:33:44 +03:00
|
|
|
|
import Hasura.RQL.DDL.EventTrigger (MonadEventLogCleanup (..), buildEventTriggerInfo)
|
2021-09-24 01:56:37 +03:00
|
|
|
|
import Hasura.RQL.DDL.InheritedRoles (resolveInheritedRole)
|
2022-02-03 21:58:37 +03:00
|
|
|
|
import Hasura.RQL.DDL.RemoteRelationship (CreateRemoteSchemaRemoteRelationship (..), PartiallyResolvedSource (..), buildRemoteFieldInfo, getRemoteSchemaEntityJoinColumns)
|
2021-09-24 01:56:37 +03:00
|
|
|
|
import Hasura.RQL.DDL.ScheduledTrigger
|
|
|
|
|
import Hasura.RQL.DDL.Schema.Cache.Common
|
|
|
|
|
import Hasura.RQL.DDL.Schema.Cache.Dependencies
|
|
|
|
|
import Hasura.RQL.DDL.Schema.Cache.Fields
|
|
|
|
|
import Hasura.RQL.DDL.Schema.Cache.Permission
|
|
|
|
|
import Hasura.RQL.DDL.Schema.Function
|
|
|
|
|
import Hasura.RQL.DDL.Schema.Table
|
2022-04-27 16:57:28 +03:00
|
|
|
|
import Hasura.RQL.Types.Action
|
|
|
|
|
import Hasura.RQL.Types.Allowlist
|
|
|
|
|
import Hasura.RQL.Types.Backend
|
|
|
|
|
import Hasura.RQL.Types.Column
|
|
|
|
|
import Hasura.RQL.Types.Common
|
|
|
|
|
import Hasura.RQL.Types.CustomTypes
|
2022-03-13 10:40:06 +03:00
|
|
|
|
import Hasura.RQL.Types.Endpoint
|
2022-04-27 16:57:28 +03:00
|
|
|
|
import Hasura.RQL.Types.EventTrigger
|
2021-09-24 01:56:37 +03:00
|
|
|
|
import Hasura.RQL.Types.Eventing.Backend
|
2022-04-27 16:57:28 +03:00
|
|
|
|
import Hasura.RQL.Types.Function
|
|
|
|
|
import Hasura.RQL.Types.Metadata hiding (fmFunction, tmTable)
|
|
|
|
|
import Hasura.RQL.Types.Metadata.Backend
|
|
|
|
|
import Hasura.RQL.Types.Metadata.Object
|
|
|
|
|
import Hasura.RQL.Types.Network
|
2022-11-07 09:54:49 +03:00
|
|
|
|
import Hasura.RQL.Types.OpenTelemetry
|
2022-04-27 16:57:28 +03:00
|
|
|
|
import Hasura.RQL.Types.QueryCollection
|
|
|
|
|
import Hasura.RQL.Types.Relationships.Remote
|
|
|
|
|
import Hasura.RQL.Types.Roles
|
|
|
|
|
import Hasura.RQL.Types.ScheduledTrigger
|
|
|
|
|
import Hasura.RQL.Types.SchemaCache
|
|
|
|
|
import Hasura.RQL.Types.SchemaCache.Build
|
2022-08-19 18:40:26 +03:00
|
|
|
|
import Hasura.RQL.Types.SchemaCache.Instances ()
|
2022-04-27 16:57:28 +03:00
|
|
|
|
import Hasura.RQL.Types.SchemaCacheTypes
|
|
|
|
|
import Hasura.RQL.Types.Source
|
|
|
|
|
import Hasura.RQL.Types.SourceCustomization
|
|
|
|
|
import Hasura.RQL.Types.Table
|
scaffolding for remote-schemas module
The main aim of the PR is:
1. To set up a module structure for 'remote-schemas' package.
2. Move parts by the remote schema codebase into the new module structure to validate it.
## Notes to the reviewer
Why a PR with large-ish diff?
1. We've been making progress on the MM project but we don't yet know long it is going to take us to get to the first milestone. To understand this better, we need to figure out the unknowns as soon as possible. Hence I've taken a stab at the first two items in the [end-state](https://gist.github.com/0x777/ca2bdc4284d21c3eec153b51dea255c9) document to figure out the unknowns. Unsurprisingly, there are a bunch of issues that we haven't discussed earlier. These are documented in the 'open questions' section.
1. The diff is large but that is only code moved around and I've added a section that documents how things are moved. In addition, there are fair number of PR comments to help with the review process.
## Changes in the PR
### Module structure
Sets up the module structure as follows:
```
Hasura/
RemoteSchema/
Metadata/
Types.hs
SchemaCache/
Types.hs
Permission.hs
RemoteRelationship.hs
Build.hs
MetadataAPI/
Types.hs
Execute.hs
```
### 1. Types representing metadata are moved
Types that capture metadata information (currently scattered across several RQL modules) are moved into `Hasura.RemoteSchema.Metadata.Types`.
- This new module only depends on very 'core' modules such as
`Hasura.Session` for the notion of roles and `Hasura.Incremental` for `Cacheable` typeclass.
- The requirement on database modules is avoided by generalizing the remote schemas metadata to accept an arbitrary 'r' for a remote relationship
definition.
### 2. SchemaCache related types and build logic have been moved
Types that represent remote schemas information in SchemaCache are moved into `Hasura.RemoteSchema.SchemaCache.Types`.
Similar to `H.RS.Metadata.Types`, this module depends on 'core' modules except for `Hasura.GraphQL.Parser.Variable`. It has something to do with remote relationships but I haven't spent time looking into it. The validation of 'remote relationships to remote schema' is also something that needs to be looked at.
Rips out the logic that builds remote schema's SchemaCache information from the monolithic `buildSchemaCacheRule` and moves it into `Hasura.RemoteSchema.SchemaCache.Build`. Further, the `.SchemaCache.Permission` and `.SchemaCache.RemoteRelationship` have been created from existing modules that capture schema cache building logic for those two components.
This was a fair amount of work. On main, currently remote schema's SchemaCache information is built in two phases - in the first phase, 'permissions' and 'remote relationships' are ignored and in the second phase they are filled in.
While remote relationships can only be resolved after partially resolving sources and other remote schemas, the same isn't true for permissions. Further, most of the work that is done to resolve remote relationships can be moved to the first phase so that the second phase can be a very simple traversal.
This is the approach that was taken - resolve permissions and as much as remote relationships information in the first phase.
### 3. Metadata APIs related types and build logic have been moved
The types that represent remote schema related metadata APIs and the execution logic have been moved to `Hasura.RemoteSchema.MetadataAPI.Types` and `.Execute` modules respectively.
## Open questions:
1. `Hasura.RemoteSchema.Metadata.Types` is so called because I was hoping that all of the metadata related APIs of remote schema can be brought in at `Hasura.RemoteSchema.Metadata.API`. However, as metadata APIs depended on functions from `SchemaCache` module (see [1](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L55) and [2](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L91), it made more sense to create a separate top-level module for `MetadataAPI`s.
Maybe we can just have `Hasura.RemoteSchema.Metadata` and get rid of the extra nesting or have `Hasura.RemoteSchema.Metadata.{Core,Permission,RemoteRelationship}` if we want to break them down further.
1. `buildRemoteSchemas` in `H.RS.SchemaCache.Build` has the following type:
```haskell
buildRemoteSchemas ::
( ArrowChoice arr,
Inc.ArrowDistribute arr,
ArrowWriter (Seq CollectedInfo) arr,
Inc.ArrowCache m arr,
MonadIO m,
HasHttpManagerM m,
Inc.Cacheable remoteRelationshipDefinition,
ToJSON remoteRelationshipDefinition,
MonadError QErr m
) =>
Env.Environment ->
( (Inc.Dependency (HashMap RemoteSchemaName Inc.InvalidationKey), OrderedRoles),
[RemoteSchemaMetadataG remoteRelationshipDefinition]
)
`arr` HashMap RemoteSchemaName (PartiallyResolvedRemoteSchemaCtxG remoteRelationshipDefinition, MetadataObject)
```
Note the dependence on `CollectedInfo` which is defined as
```haskell
data CollectedInfo
= CIInconsistency InconsistentMetadata
| CIDependency
MetadataObject
-- ^ for error reporting on missing dependencies
SchemaObjId
SchemaDependency
deriving (Eq)
```
this pretty much means that remote schemas is dependent on types from databases, actions, ....
How do we fix this? Maybe introduce a typeclass such as `ArrowCollectRemoteSchemaDependencies` which is defined in `Hasura.RemoteSchema` and then implemented in graphql-engine?
1. The dependency on `buildSchemaCacheFor` in `.MetadataAPI.Execute` which has the following signature:
```haskell
buildSchemaCacheFor ::
(QErrM m, CacheRWM m, MetadataM m) =>
MetadataObjId ->
MetadataModifier ->
```
This can be easily resolved if we restrict what the metadata APIs are allowed to do. Currently, they operate in an unfettered access to modify SchemaCache (the `CacheRWM` constraint):
```haskell
runAddRemoteSchema ::
( QErrM m,
CacheRWM m,
MonadIO m,
HasHttpManagerM m,
MetadataM m,
Tracing.MonadTrace m
) =>
Env.Environment ->
AddRemoteSchemaQuery ->
m EncJSON
```
This should instead be changed to restrict remote schema APIs to only modify remote schema metadata (but has access to the remote schemas part of the schema cache), this dependency is completely removed.
```haskell
runAddRemoteSchema ::
( QErrM m,
MonadIO m,
HasHttpManagerM m,
MonadReader RemoteSchemasSchemaCache m,
MonadState RemoteSchemaMetadata m,
Tracing.MonadTrace m
) =>
Env.Environment ->
AddRemoteSchemaQuery ->
m RemoteSchemeMetadataObjId
```
The idea is that the core graphql-engine would call these functions and then call
`buildSchemaCacheFor`.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6291
GitOrigin-RevId: 51357148c6404afe70219afa71bd1d59bdf4ffc6
2022-10-21 06:13:07 +03:00
|
|
|
|
import Hasura.RemoteSchema.Metadata
|
|
|
|
|
import Hasura.RemoteSchema.SchemaCache
|
2021-09-24 01:56:37 +03:00
|
|
|
|
import Hasura.SQL.AnyBackend qualified as AB
|
2022-04-29 05:13:13 +03:00
|
|
|
|
import Hasura.SQL.Backend
|
|
|
|
|
import Hasura.SQL.BackendMap (BackendMap)
|
|
|
|
|
import Hasura.SQL.BackendMap qualified as BackendMap
|
2022-09-05 05:42:59 +03:00
|
|
|
|
import Hasura.SQL.Tag
|
2022-11-02 01:41:22 +03:00
|
|
|
|
import Hasura.Server.Migrate.Version
|
2021-09-24 01:56:37 +03:00
|
|
|
|
import Hasura.Server.Types
|
|
|
|
|
import Hasura.Session
|
|
|
|
|
import Hasura.Tracing qualified as Tracing
|
|
|
|
|
import Language.GraphQL.Draft.Syntax qualified as G
|
|
|
|
|
import Network.HTTP.Client.Manager (HasHttpManagerM (..))
|
2020-12-21 12:11:37 +03:00
|
|
|
|
|
2021-08-09 13:20:04 +03:00
|
|
|
|
{- Note [Roles Inheritance]
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
Roles may have parent roles defined from which they can inherit permission and this is
|
|
|
|
|
called as roles inheritance. Roles which have parents can also be parents of other roles.
|
|
|
|
|
So, cycle in roles should be disallowed and this is done in the `orderRoles` function.
|
|
|
|
|
|
|
|
|
|
When the metadata contains a permission for a role for a entity, then it will override the
|
|
|
|
|
inherited permission, if any.
|
|
|
|
|
|
|
|
|
|
Roles inheritance work differently for different features:
|
|
|
|
|
|
|
|
|
|
1. Select permissions
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
See note [Inherited roles architecture for read queries]
|
|
|
|
|
|
|
|
|
|
2. Mutation permissions and remote schema permissions
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
For mutation and remote schema permissions, an inherited role can only inherit permission
|
|
|
|
|
from its parent roles when the relevant parts of the permissions are equal i.e. the non-relevant
|
|
|
|
|
parts are discarded for the equality, for example, in two remote schema permissions the order
|
|
|
|
|
of the fields in an Object type is discarded.
|
|
|
|
|
|
|
|
|
|
When an inherited role cannot inherit permission from its parents due to a conflict, then we mark
|
|
|
|
|
the inherited role and the entity (remote schema or table) combination as inconsistent in the metadata.
|
|
|
|
|
|
|
|
|
|
3. Actions and Custom function permissions
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
|
|
|
|
|
Currently, actions and custom function permissions can be thought of as a boolean. Either a role has
|
|
|
|
|
permission to the entity or it doesn't, so in these cases there's no possiblity of a conflict. An inherited
|
|
|
|
|
role will have access to the action/function if any one of the parents have permission to access the
|
|
|
|
|
action/function.
|
|
|
|
|
|
|
|
|
|
-}
|
|
|
|
|
|
2021-09-24 01:56:37 +03:00
|
|
|
|
buildRebuildableSchemaCache ::
|
2021-11-09 17:21:48 +03:00
|
|
|
|
Logger Hasura ->
|
2021-09-24 01:56:37 +03:00
|
|
|
|
Env.Environment ->
|
|
|
|
|
Metadata ->
|
|
|
|
|
CacheBuild RebuildableSchemaCache
|
2021-01-07 12:04:22 +03:00
|
|
|
|
buildRebuildableSchemaCache =
|
|
|
|
|
buildRebuildableSchemaCacheWithReason CatalogSync
|
|
|
|
|
|
2021-09-24 01:56:37 +03:00
|
|
|
|
buildRebuildableSchemaCacheWithReason ::
|
|
|
|
|
BuildReason ->
|
2021-11-09 17:21:48 +03:00
|
|
|
|
Logger Hasura ->
|
2021-09-24 01:56:37 +03:00
|
|
|
|
Env.Environment ->
|
|
|
|
|
Metadata ->
|
|
|
|
|
CacheBuild RebuildableSchemaCache
|
2021-11-09 17:21:48 +03:00
|
|
|
|
buildRebuildableSchemaCacheWithReason reason logger env metadata = do
|
2021-09-24 01:56:37 +03:00
|
|
|
|
result <-
|
|
|
|
|
flip runReaderT reason $
|
2021-11-09 17:21:48 +03:00
|
|
|
|
Inc.build (buildSchemaCacheRule logger env) (metadata, initialInvalidationKeys)
|
2021-08-24 10:36:32 +03:00
|
|
|
|
|
2020-01-29 23:15:53 +03:00
|
|
|
|
pure $ RebuildableSchemaCache (Inc.result result) initialInvalidationKeys (Inc.rebuildRule result)
|
2019-11-20 21:21:30 +03:00
|
|
|
|
|
|
|
|
|
newtype CacheRWT m a
|
2021-09-24 01:56:37 +03:00
|
|
|
|
= -- The CacheInvalidations component of the state could actually be collected using WriterT, but
|
|
|
|
|
-- WriterT implementations prior to transformers-0.5.6.0 (which added
|
|
|
|
|
-- Control.Monad.Trans.Writer.CPS) are leaky, and we don’t have that yet.
|
|
|
|
|
CacheRWT (StateT (RebuildableSchemaCache, CacheInvalidations) m a)
|
2019-11-20 21:21:30 +03:00
|
|
|
|
deriving
|
2021-09-24 01:56:37 +03:00
|
|
|
|
( Functor,
|
|
|
|
|
Applicative,
|
|
|
|
|
Monad,
|
|
|
|
|
MonadIO,
|
|
|
|
|
MonadReader r,
|
|
|
|
|
MonadError e,
|
|
|
|
|
UserInfoM,
|
|
|
|
|
HasHttpManagerM,
|
|
|
|
|
MonadMetadataStorage,
|
|
|
|
|
MonadMetadataStorageQueryAPI,
|
|
|
|
|
Tracing.MonadTrace,
|
2022-05-24 10:21:39 +03:00
|
|
|
|
HasServerConfigCtx,
|
|
|
|
|
MonadBase b,
|
|
|
|
|
MonadBaseControl b
|
2021-09-24 01:56:37 +03:00
|
|
|
|
)
|
2020-12-28 15:56:00 +03:00
|
|
|
|
|
2022-09-09 11:26:44 +03:00
|
|
|
|
instance (MonadEventLogCleanup m) => MonadEventLogCleanup (CacheRWT m) where
|
|
|
|
|
runLogCleaner conf = lift $ runLogCleaner conf
|
2022-09-15 14:45:14 +03:00
|
|
|
|
generateCleanupSchedules sourceInfo triggerName cleanupConfig = lift $ generateCleanupSchedules sourceInfo triggerName cleanupConfig
|
2022-09-09 11:26:44 +03:00
|
|
|
|
|
2021-09-24 01:56:37 +03:00
|
|
|
|
runCacheRWT ::
|
|
|
|
|
Functor m =>
|
|
|
|
|
RebuildableSchemaCache ->
|
|
|
|
|
CacheRWT m a ->
|
|
|
|
|
m (a, RebuildableSchemaCache, CacheInvalidations)
|
2020-01-30 02:03:49 +03:00
|
|
|
|
runCacheRWT cache (CacheRWT m) =
|
|
|
|
|
runStateT m (cache, mempty) <&> \(v, (newCache, invalidations)) -> (v, newCache, invalidations)
|
2019-11-20 21:21:30 +03:00
|
|
|
|
|
|
|
|
|
instance MonadTrans CacheRWT where
|
|
|
|
|
lift = CacheRWT . lift
|
|
|
|
|
|
|
|
|
|
instance (Monad m) => CacheRM (CacheRWT m) where
|
2020-12-28 15:56:00 +03:00
|
|
|
|
askSchemaCache = CacheRWT $ gets (lastBuiltSchemaCache . (^. _1))
|
2019-11-20 21:21:30 +03:00
|
|
|
|
|
2021-09-24 01:56:37 +03:00
|
|
|
|
instance
|
|
|
|
|
( MonadIO m,
|
|
|
|
|
MonadError QErr m,
|
|
|
|
|
HasHttpManagerM m,
|
|
|
|
|
MonadResolveSource m,
|
|
|
|
|
HasServerConfigCtx m
|
|
|
|
|
) =>
|
|
|
|
|
CacheRWM (CacheRWT m)
|
|
|
|
|
where
|
2020-12-08 17:22:31 +03:00
|
|
|
|
buildSchemaCacheWithOptions buildReason invalidations metadata = CacheRWT do
|
2021-04-06 06:25:02 +03:00
|
|
|
|
(RebuildableSchemaCache lastBuiltSC invalidationKeys rule, oldInvalidations) <- get
|
|
|
|
|
let metadataVersion = scMetadataResourceVersion lastBuiltSC
|
|
|
|
|
newInvalidationKeys = invalidateKeys invalidations invalidationKeys
|
2021-09-24 01:56:37 +03:00
|
|
|
|
result <-
|
|
|
|
|
lift $
|
|
|
|
|
runCacheBuildM $
|
|
|
|
|
flip runReaderT buildReason $
|
|
|
|
|
Inc.build rule (metadata, newInvalidationKeys)
|
|
|
|
|
let schemaCache = (Inc.result result) {scMetadataResourceVersion = metadataVersion}
|
2020-01-30 02:03:49 +03:00
|
|
|
|
prunedInvalidationKeys = pruneInvalidationKeys schemaCache newInvalidationKeys
|
|
|
|
|
!newCache = RebuildableSchemaCache schemaCache prunedInvalidationKeys (Inc.rebuildRule result)
|
|
|
|
|
!newInvalidations = oldInvalidations <> invalidations
|
|
|
|
|
put (newCache, newInvalidations)
|
2019-11-20 21:21:30 +03:00
|
|
|
|
where
|
2020-01-29 23:15:53 +03:00
|
|
|
|
-- Prunes invalidation keys that no longer exist in the schema to avoid leaking memory by
|
|
|
|
|
-- hanging onto unnecessary keys.
|
|
|
|
|
pruneInvalidationKeys schemaCache = over ikRemoteSchemas $ M.filterWithKey \name _ ->
|
2020-03-26 14:52:20 +03:00
|
|
|
|
-- see Note [Keep invalidation keys for inconsistent objects]
|
|
|
|
|
name `elem` getAllRemoteSchemas schemaCache
|
2019-11-20 21:21:30 +03:00
|
|
|
|
|
2021-04-06 06:25:02 +03:00
|
|
|
|
setMetadataResourceVersionInSchemaCache resourceVersion = CacheRWT $ do
|
|
|
|
|
(rebuildableSchemaCache, invalidations) <- get
|
2021-09-24 01:56:37 +03:00
|
|
|
|
put
|
|
|
|
|
( rebuildableSchemaCache
|
|
|
|
|
{ lastBuiltSchemaCache =
|
|
|
|
|
(lastBuiltSchemaCache rebuildableSchemaCache)
|
|
|
|
|
{ scMetadataResourceVersion = Just resourceVersion
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
invalidations
|
|
|
|
|
)
|
|
|
|
|
|
2022-09-02 09:33:21 +03:00
|
|
|
|
-- | Generate health checks related cache from sources metadata
|
|
|
|
|
buildHealthCheckCache :: Sources -> SourceHealthCheckCache
|
|
|
|
|
buildHealthCheckCache sources =
|
|
|
|
|
catMaybes $ M.fromList $ map (second mkSourceHealthCheck) (OMap.toList sources)
|
|
|
|
|
where
|
|
|
|
|
mkSourceHealthCheck :: BackendSourceMetadata -> Maybe BackendSourceHealthCheckInfo
|
|
|
|
|
mkSourceHealthCheck (BackendSourceMetadata sourceMetadata) =
|
|
|
|
|
AB.traverseBackend @Backend sourceMetadata mkSourceHealthCheckBackend
|
|
|
|
|
|
|
|
|
|
mkSourceHealthCheckBackend :: SourceMetadata b -> Maybe (SourceHealthCheckInfo b)
|
|
|
|
|
mkSourceHealthCheckBackend sourceMetadata =
|
|
|
|
|
let sourceName = _smName sourceMetadata
|
|
|
|
|
connection = _smConfiguration sourceMetadata
|
|
|
|
|
healthCheck = _smHealthCheckConfig sourceMetadata
|
|
|
|
|
in SourceHealthCheckInfo sourceName connection <$> healthCheck
|
|
|
|
|
|
2022-11-23 19:40:21 +03:00
|
|
|
|
-- | Generate cache of source connection details so that we can ping sources for
|
|
|
|
|
-- attribution
|
|
|
|
|
buildSourcePingCache :: Sources -> SourcePingCache
|
|
|
|
|
buildSourcePingCache sources =
|
|
|
|
|
M.fromList $ map (second mkSourcePing) (OMap.toList sources)
|
|
|
|
|
where
|
|
|
|
|
mkSourcePing :: BackendSourceMetadata -> BackendSourcePingInfo
|
|
|
|
|
mkSourcePing (BackendSourceMetadata sourceMetadata) =
|
|
|
|
|
AB.mapBackend sourceMetadata mkSourcePingBackend
|
|
|
|
|
|
|
|
|
|
mkSourcePingBackend :: SourceMetadata b -> SourcePingInfo b
|
|
|
|
|
mkSourcePingBackend sourceMetadata =
|
|
|
|
|
let sourceName = _smName sourceMetadata
|
|
|
|
|
connection = _smConfiguration sourceMetadata
|
|
|
|
|
in SourcePingInfo sourceName connection
|
|
|
|
|
|
Avoid GraphQL schema rebuild when changing irrelevant Metadata
This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to:
- allowlists
- query collections
- cron triggers
- REST endpoints
- API limits
- metrics config
- GraphQL introspection options
- TLS allow lists
- OpenTelemetry
When is construction of the in-memory GraphQL schema cached between Metadata operations?
Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation.
However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction.
The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.)
We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance.
So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps.
That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema.
Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613
GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
|
|
|
|
{- Note [Avoiding GraphQL schema rebuilds when changing irrelevant Metadata]
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
There are many Metadata operations that don't influence the GraphQL schema. So
|
|
|
|
|
we should be caching its construction.
|
|
|
|
|
|
|
|
|
|
The `Hasura.Incremental` framework allows us to cache such constructions:
|
|
|
|
|
whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and
|
|
|
|
|
`b` the output, we can use the `Inc.cache` combinator to obtain a new arrow
|
|
|
|
|
which is only re-executed when the input `a` changes in a material way. To test
|
|
|
|
|
this, `a` needs an `Eq` instance.
|
|
|
|
|
|
|
|
|
|
We can't simply apply `Inc.cache` to the GraphQL schema cache building phase
|
|
|
|
|
(`buildGQLContext`), because the inputs (components of `BuildOutputs` such as
|
|
|
|
|
`SourceCache`) don't have an `Eq` instance.
|
|
|
|
|
|
|
|
|
|
So the purpose of `buildOutputsAndSchema` is that we cach already at an earlier
|
|
|
|
|
point, encompassing more computation. The Metadata and invalidation keys (which
|
|
|
|
|
have `Eq` instances) are used as a caching key, and `Inc.cache` can be applied
|
|
|
|
|
to the whole sequence of steps.
|
|
|
|
|
|
|
|
|
|
But because of the all-or-nothing nature of caching, it's important that
|
|
|
|
|
`buildOutputsAndSchema` is re-run as little as possible. So the exercise
|
|
|
|
|
becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as
|
|
|
|
|
many Metadata operations as possible can be handled outside of this codepath
|
|
|
|
|
that produces a GraphQL schema.
|
|
|
|
|
-}
|
|
|
|
|
|
2021-09-24 01:56:37 +03:00
|
|
|
|
buildSchemaCacheRule ::
|
2019-11-20 21:21:30 +03:00
|
|
|
|
-- Note: by supplying BuildReason via MonadReader, it does not participate in caching, which is
|
|
|
|
|
-- what we want!
|
2021-10-13 19:38:56 +03:00
|
|
|
|
( ArrowChoice arr,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
Inc.ArrowDistribute arr,
|
|
|
|
|
Inc.ArrowCache m arr,
|
|
|
|
|
MonadIO m,
|
|
|
|
|
MonadBaseControl IO m,
|
|
|
|
|
MonadError QErr m,
|
|
|
|
|
MonadReader BuildReason m,
|
|
|
|
|
HasHttpManagerM m,
|
|
|
|
|
MonadResolveSource m,
|
|
|
|
|
HasServerConfigCtx m
|
|
|
|
|
) =>
|
2021-11-09 17:21:48 +03:00
|
|
|
|
Logger Hasura ->
|
2021-09-24 01:56:37 +03:00
|
|
|
|
Env.Environment ->
|
|
|
|
|
(Metadata, InvalidationKeys) `arr` SchemaCache
|
server: Simplify `BuildOutputs`
A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between.
So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container.
Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related.
- Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it.
- Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`.
If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`.
---
The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609
GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
|
|
|
|
buildSchemaCacheRule logger env = proc (metadataNoDefaults, invalidationKeys) -> do
|
2020-01-29 23:15:53 +03:00
|
|
|
|
invalidationKeysDep <- Inc.newDependency -< invalidationKeys
|
2022-10-20 15:45:31 +03:00
|
|
|
|
metadataDefaults <- bindA -< askMetadataDefaults
|
server: Simplify `BuildOutputs`
A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between.
So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container.
Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related.
- Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it.
- Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`.
If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`.
---
The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609
GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
|
|
|
|
let metadata@Metadata {..} = overrideMetadataDefaults metadataNoDefaults metadataDefaults
|
Avoid GraphQL schema rebuild when changing irrelevant Metadata
This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to:
- allowlists
- query collections
- cron triggers
- REST endpoints
- API limits
- metrics config
- GraphQL introspection options
- TLS allow lists
- OpenTelemetry
When is construction of the in-memory GraphQL schema cached between Metadata operations?
Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation.
However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction.
The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.)
We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance.
So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps.
That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema.
Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613
GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
|
|
|
|
metadataDep <- Inc.newDependency -< metadata
|
|
|
|
|
|
|
|
|
|
(inconsistentObjects, (resolvedOutputs, dependencyInconsistentObjects, resolvedDependencies), ((adminIntrospection, gqlContext, gqlContextUnauth, inconsistentRemoteSchemas), (relayContext, relayContextUnauth))) <-
|
|
|
|
|
Inc.cache buildOutputsAndSchema -< (metadataDep, invalidationKeysDep)
|
2020-01-29 23:15:53 +03:00
|
|
|
|
|
2022-11-30 12:11:00 +03:00
|
|
|
|
let (resolvedEndpoints, endpointCollectedInfo) = runIdentity $ runWriterT $ buildRESTEndpoints _metaQueryCollections (OMap.elems _metaRestEndpoints)
|
|
|
|
|
(cronTriggersMap, cronTriggersCollectedInfo) = runIdentity $ runWriterT $ buildCronTriggers (OMap.elems _metaCronTriggers)
|
|
|
|
|
(openTelemetryInfo, openTelemetryCollectedInfo) = runIdentity $ runWriterT $ buildOpenTelemetry _metaOpenTelemetryConfig
|
2021-09-24 01:56:37 +03:00
|
|
|
|
|
2022-11-30 12:11:00 +03:00
|
|
|
|
duplicateVariables :: EndpointMetadata a -> Bool
|
2021-09-24 01:56:37 +03:00
|
|
|
|
duplicateVariables m = any ((> 1) . length) $ group $ sort $ catMaybes $ splitPath Just (const Nothing) (_ceUrl m)
|
|
|
|
|
|
|
|
|
|
endpointObjId :: EndpointMetadata q -> MetadataObjId
|
|
|
|
|
endpointObjId md = MOEndpoint (_ceName md)
|
|
|
|
|
|
|
|
|
|
endpointObject :: EndpointMetadata q -> MetadataObject
|
server: Simplify `BuildOutputs`
A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between.
So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container.
Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related.
- Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it.
- Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`.
If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`.
---
The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609
GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
|
|
|
|
endpointObject md = MetadataObject (endpointObjId md) (toJSON $ OMap.lookup (_ceName md) _metaRestEndpoints)
|
2021-09-24 01:56:37 +03:00
|
|
|
|
|
2022-03-08 12:48:21 +03:00
|
|
|
|
listedQueryObjects :: (CollectionName, ListedQuery) -> MetadataObject
|
|
|
|
|
listedQueryObjects (cName, lq) = MetadataObject (MOQueryCollectionsQuery cName lq) (toJSON lq)
|
|
|
|
|
|
2021-09-24 01:56:37 +03:00
|
|
|
|
-- Cases of urls that generate invalid segments:
|
|
|
|
|
|
|
|
|
|
hasInvalidSegments :: EndpointMetadata query -> Bool
|
|
|
|
|
hasInvalidSegments m = any (`elem` ["", ":"]) (splitPath id id (_ceUrl m))
|
|
|
|
|
|
|
|
|
|
ceUrlTxt = toTxt . _ceUrl
|
|
|
|
|
|
Avoid GraphQL schema rebuild when changing irrelevant Metadata
This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to:
- allowlists
- query collections
- cron triggers
- REST endpoints
- API limits
- metrics config
- GraphQL introspection options
- TLS allow lists
- OpenTelemetry
When is construction of the in-memory GraphQL schema cached between Metadata operations?
Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation.
However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction.
The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.)
We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance.
So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps.
That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema.
Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613
GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
|
|
|
|
endpoints = buildEndpointsTrie (M.elems resolvedEndpoints)
|
2021-09-24 01:56:37 +03:00
|
|
|
|
|
|
|
|
|
duplicateF md = DuplicateRestVariables (ceUrlTxt md) (endpointObject md)
|
Avoid GraphQL schema rebuild when changing irrelevant Metadata
This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to:
- allowlists
- query collections
- cron triggers
- REST endpoints
- API limits
- metrics config
- GraphQL introspection options
- TLS allow lists
- OpenTelemetry
When is construction of the in-memory GraphQL schema cached between Metadata operations?
Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation.
However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction.
The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.)
We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance.
So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps.
That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema.
Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613
GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
|
|
|
|
duplicateRestVariables = map duplicateF $ filter duplicateVariables (M.elems resolvedEndpoints)
|
2021-09-24 01:56:37 +03:00
|
|
|
|
|
|
|
|
|
invalidF md = InvalidRestSegments (ceUrlTxt md) (endpointObject md)
|
Avoid GraphQL schema rebuild when changing irrelevant Metadata
This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to:
- allowlists
- query collections
- cron triggers
- REST endpoints
- API limits
- metrics config
- GraphQL introspection options
- TLS allow lists
- OpenTelemetry
When is construction of the in-memory GraphQL schema cached between Metadata operations?
Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation.
However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction.
The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.)
We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance.
So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps.
That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema.
Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613
GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
|
|
|
|
invalidRestSegments = map invalidF $ filter hasInvalidSegments (M.elems resolvedEndpoints)
|
2021-09-24 01:56:37 +03:00
|
|
|
|
|
|
|
|
|
ambiguousF' ep = MetadataObject (endpointObjId ep) (toJSON ep)
|
|
|
|
|
ambiguousF mds = AmbiguousRestEndpoints (commaSeparated $ map _ceUrl mds) (map ambiguousF' mds)
|
|
|
|
|
ambiguousRestEndpoints = map (ambiguousF . S.elems . snd) $ ambiguousPathsGrouped endpoints
|
2022-03-08 12:48:21 +03:00
|
|
|
|
|
server: Simplify `BuildOutputs`
A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between.
So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container.
Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related.
- Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it.
- Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`.
If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`.
---
The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609
GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
|
|
|
|
inlinedAllowlist = inlineAllowlist _metaQueryCollections _metaAllowlist
|
|
|
|
|
globalAllowLists = HS.toList . iaGlobal $ inlinedAllowlist
|
2022-04-20 10:32:19 +03:00
|
|
|
|
|
Avoid GraphQL schema rebuild when changing irrelevant Metadata
This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to:
- allowlists
- query collections
- cron triggers
- REST endpoints
- API limits
- metrics config
- GraphQL introspection options
- TLS allow lists
- OpenTelemetry
When is construction of the in-memory GraphQL schema cached between Metadata operations?
Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation.
However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction.
The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.)
We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance.
So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps.
That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema.
Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613
GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
|
|
|
|
-- Endpoints don't generate any dependencies
|
|
|
|
|
endpointInconsistencies = either id absurd <$> toList endpointCollectedInfo
|
|
|
|
|
|
|
|
|
|
-- Cron triggers don't generate any dependencies
|
|
|
|
|
cronTriggersInconsistencies = either id absurd <$> toList cronTriggersCollectedInfo
|
|
|
|
|
|
|
|
|
|
-- OpenTelemerty doesn't generate any dependencies
|
|
|
|
|
openTelemetryInconsistencies = either id absurd <$> toList openTelemetryCollectedInfo
|
|
|
|
|
|
server: Simplify `BuildOutputs`
A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between.
So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container.
Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related.
- Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it.
- Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`.
If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`.
---
The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609
GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
|
|
|
|
inconsistentQueryCollections <- bindA -< do getInconsistentQueryCollections adminIntrospection _metaQueryCollections listedQueryObjects endpoints globalAllowLists
|
2021-09-24 01:56:37 +03:00
|
|
|
|
|
|
|
|
|
returnA
|
|
|
|
|
-<
|
|
|
|
|
SchemaCache
|
|
|
|
|
{ scSources = _boSources resolvedOutputs,
|
|
|
|
|
scActions = _boActions resolvedOutputs,
|
|
|
|
|
-- TODO this is not the right value: we should track what part of the schema
|
|
|
|
|
-- we can stitch without consistencies, I think.
|
|
|
|
|
scRemoteSchemas = fmap fst (_boRemoteSchemas resolvedOutputs), -- remoteSchemaMap
|
server: Simplify `BuildOutputs`
A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between.
So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container.
Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related.
- Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it.
- Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`.
If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`.
---
The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609
GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
|
|
|
|
scAllowlist = inlinedAllowlist,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
-- , scCustomTypes = _boCustomTypes resolvedOutputs
|
Decouple `Analyse` and `OpenAPI` from remote schema introspection and internal execution details.
### Motivation
#2338 introduced a way to validate REST queries against the metadata after a change, to properly report any inconsistency that would emerge from a change in the underlying structure of our schema. However, the way this was done was quite complex and error-prone. Namely: we would use the generated schema parsers to statically execute an introspection query, similar to the one we use for remote schemas, then parse the resulting bytestring as it were coming from a remote schema.
This led to several issues: the code was using remote schema primitives, and was associated with remote schema code, despite being unrelated, which led to absurd situations like creating fake `Variable`s whose type was also their name. A lot of the code had to deal with the fact that we might fail to re-parse our own schema. Additionally, some of it was dead code, that for some reason GHC did not warn about? But more fundamentally, this architecture decision creates a dependency between unrelated pieces of the engine: modifying the internal processing of root fields or the introspection of remote schemas now risks impacting the unrelated `OpenAPI` feature.
### Description
This PR decouples that process from the remote schema introspection logic and from the execution engine by making `Analyse` and `OpenAPI` work on the generic `G.SchemaIntrospection` instead. To accomplish this, it:
- adds `GraphQL.Parser.Schema.Convert`, to convert from our "live" schema back to a flat `SchemaIntrospection`
- persists in the schema cache the `admin` introspection generated when building the schema, and uses it both for validation and for generating the `OpenAPI`.
### Known issues and limitations
This adds a bit of memory pressure to the engine, as we persist the entire schema in the schema cache. This might be acceptable in the short-term, but we have several potential ideas going forward should this be a problem:
- cache the result of `Analyze`: when it becomes possible to build the `OpenAPI` purely with the result of `Analyze` without any additional schema information, then we could cache that instead, reducing the footprint
- caching the `OpenAPI`: if it doesn't need to change every time the endpoint is queried, then it should be possible to cache the entire `OpenAPI` object instead of the schema
- cache a copy of the `FieldParsers` used to generate the schema: as those are persisted through the GraphQL `Context`, and are the only input required to generate the `Schema`, making them accessible in the schema cache would allow us to have the exact same feature with no additional memory cost, at the price of a slightly slower and more complicated process (need to rebuild the `Schema` every time we query the OpenAPI endpoint)
- cache nothing at all, and rebuild the admin schema from scratch every time.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/3962
Co-authored-by: paritosh-08 <85472423+paritosh-08@users.noreply.github.com>
GitOrigin-RevId: a8b9808170b231fdf6787983b4a9ed286cde27e0
2022-03-22 10:36:39 +03:00
|
|
|
|
scAdminIntrospection = adminIntrospection,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
scGQLContext = gqlContext,
|
|
|
|
|
scUnauthenticatedGQLContext = gqlContextUnauth,
|
|
|
|
|
scRelayContext = relayContext,
|
|
|
|
|
scUnauthenticatedRelayContext = relayContextUnauth,
|
|
|
|
|
-- , scGCtxMap = gqlSchema
|
|
|
|
|
-- , scDefaultRemoteGCtx = remoteGQLSchema
|
|
|
|
|
scDepMap = resolvedDependencies,
|
Avoid GraphQL schema rebuild when changing irrelevant Metadata
This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to:
- allowlists
- query collections
- cron triggers
- REST endpoints
- API limits
- metrics config
- GraphQL introspection options
- TLS allow lists
- OpenTelemetry
When is construction of the in-memory GraphQL schema cached between Metadata operations?
Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation.
However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction.
The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.)
We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance.
So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps.
That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema.
Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613
GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
|
|
|
|
scCronTriggers = cronTriggersMap,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
scEndpoints = endpoints,
|
|
|
|
|
scInconsistentObjs =
|
|
|
|
|
inconsistentObjects
|
|
|
|
|
<> dependencyInconsistentObjects
|
2022-02-03 21:58:37 +03:00
|
|
|
|
<> toList inconsistentRemoteSchemas
|
2021-09-24 01:56:37 +03:00
|
|
|
|
<> duplicateRestVariables
|
|
|
|
|
<> invalidRestSegments
|
2022-02-08 07:46:57 +03:00
|
|
|
|
<> ambiguousRestEndpoints
|
Avoid GraphQL schema rebuild when changing irrelevant Metadata
This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to:
- allowlists
- query collections
- cron triggers
- REST endpoints
- API limits
- metrics config
- GraphQL introspection options
- TLS allow lists
- OpenTelemetry
When is construction of the in-memory GraphQL schema cached between Metadata operations?
Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation.
However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction.
The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.)
We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance.
So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps.
That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema.
Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613
GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
|
|
|
|
<> endpointInconsistencies
|
|
|
|
|
<> cronTriggersInconsistencies
|
|
|
|
|
<> openTelemetryInconsistencies
|
2022-03-08 12:48:21 +03:00
|
|
|
|
<> inconsistentQueryCollections,
|
server: Simplify `BuildOutputs`
A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between.
So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container.
Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related.
- Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it.
- Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`.
If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`.
---
The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609
GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
|
|
|
|
scApiLimits = _metaApiLimits,
|
|
|
|
|
scMetricsConfig = _metaMetricsConfig,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
scMetadataResourceVersion = Nothing,
|
server: Simplify `BuildOutputs`
A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between.
So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container.
Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related.
- Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it.
- Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`.
If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`.
---
The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609
GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
|
|
|
|
scSetGraphqlIntrospectionOptions = _metaSetGraphqlIntrospectionOptions,
|
|
|
|
|
scTlsAllowlist = networkTlsAllowlist _metaNetwork,
|
|
|
|
|
scQueryCollections = _metaQueryCollections,
|
2022-09-05 05:42:59 +03:00
|
|
|
|
scBackendCache = _boBackendCache resolvedOutputs,
|
server: Simplify `BuildOutputs`
A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between.
So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container.
Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related.
- Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it.
- Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`.
If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`.
---
The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609
GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
|
|
|
|
scSourceHealthChecks = buildHealthCheckCache _metaSources,
|
2022-11-23 19:40:21 +03:00
|
|
|
|
scSourcePingConfig = buildSourcePingCache _metaSources,
|
Avoid GraphQL schema rebuild when changing irrelevant Metadata
This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to:
- allowlists
- query collections
- cron triggers
- REST endpoints
- API limits
- metrics config
- GraphQL introspection options
- TLS allow lists
- OpenTelemetry
When is construction of the in-memory GraphQL schema cached between Metadata operations?
Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation.
However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction.
The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.)
We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance.
So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps.
That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema.
Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613
GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
|
|
|
|
scOpenTelemetryConfig = openTelemetryInfo
|
2021-09-24 01:56:37 +03:00
|
|
|
|
}
|
2019-08-14 02:34:37 +03:00
|
|
|
|
where
|
Avoid GraphQL schema rebuild when changing irrelevant Metadata
This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to:
- allowlists
- query collections
- cron triggers
- REST endpoints
- API limits
- metrics config
- GraphQL introspection options
- TLS allow lists
- OpenTelemetry
When is construction of the in-memory GraphQL schema cached between Metadata operations?
Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation.
However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction.
The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.)
We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance.
So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps.
That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema.
Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613
GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
|
|
|
|
-- See Note [Avoiding GraphQL schema rebuilds when changing irrelevant Metadata]
|
|
|
|
|
buildOutputsAndSchema = proc (metadataDep, invalidationKeysDep) -> do
|
|
|
|
|
(outputs, collectedInfo) <- runWriterA buildAndCollectInfo -< (metadataDep, invalidationKeysDep)
|
|
|
|
|
let (inconsistentObjects, unresolvedDependencies) = partitionEithers $ toList collectedInfo
|
|
|
|
|
out2@(resolvedOutputs, _dependencyInconsistentObjects, _resolvedDependencies) <- resolveDependencies -< (outputs, unresolvedDependencies)
|
|
|
|
|
out3 <-
|
|
|
|
|
bindA
|
|
|
|
|
-< do
|
|
|
|
|
cxt <- askServerConfigCtx
|
|
|
|
|
buildGQLContext
|
|
|
|
|
cxt
|
|
|
|
|
(_boSources resolvedOutputs)
|
|
|
|
|
(_boRemoteSchemas resolvedOutputs)
|
|
|
|
|
(_boActions resolvedOutputs)
|
|
|
|
|
(_boCustomTypes resolvedOutputs)
|
|
|
|
|
returnA -< (inconsistentObjects, out2, out3)
|
|
|
|
|
|
2022-09-05 05:42:59 +03:00
|
|
|
|
resolveBackendInfo' ::
|
2022-09-14 15:59:37 +03:00
|
|
|
|
forall arr m b.
|
2022-09-05 05:42:59 +03:00
|
|
|
|
( BackendMetadata b,
|
|
|
|
|
ArrowChoice arr,
|
2022-09-01 08:27:57 +03:00
|
|
|
|
Inc.ArrowCache m arr,
|
2022-09-05 05:42:59 +03:00
|
|
|
|
Inc.ArrowDistribute arr,
|
2022-11-15 19:58:51 +03:00
|
|
|
|
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
|
2022-09-01 08:27:57 +03:00
|
|
|
|
MonadIO m,
|
|
|
|
|
HasHttpManagerM m
|
|
|
|
|
) =>
|
2022-09-14 15:59:37 +03:00
|
|
|
|
(BackendConfigWrapper b, Inc.Dependency (BackendMap BackendInvalidationKeysWrapper)) `arr` BackendCache
|
|
|
|
|
resolveBackendInfo' = proc (backendConfigWrapper, backendInvalidationMap) -> do
|
|
|
|
|
let backendInvalidationKeys =
|
2022-11-28 12:48:54 +03:00
|
|
|
|
Inc.selectMaybeD #unBackendInvalidationKeysWrapper $
|
|
|
|
|
BackendMap.lookupD @b backendInvalidationMap
|
2022-09-14 15:59:37 +03:00
|
|
|
|
backendInfo <- resolveBackendInfo @b logger -< (backendInvalidationKeys, unBackendConfigWrapper backendConfigWrapper)
|
2022-09-05 05:42:59 +03:00
|
|
|
|
returnA -< BackendMap.singleton (BackendInfoWrapper @b backendInfo)
|
2022-09-01 08:27:57 +03:00
|
|
|
|
|
2022-09-05 05:42:59 +03:00
|
|
|
|
resolveBackendCache ::
|
2022-09-01 08:27:57 +03:00
|
|
|
|
forall arr m.
|
|
|
|
|
( ArrowChoice arr,
|
|
|
|
|
Inc.ArrowCache m arr,
|
|
|
|
|
Inc.ArrowDistribute arr,
|
2022-11-15 19:58:51 +03:00
|
|
|
|
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
|
2022-09-01 08:27:57 +03:00
|
|
|
|
MonadIO m,
|
|
|
|
|
HasHttpManagerM m
|
|
|
|
|
) =>
|
2022-09-14 15:59:37 +03:00
|
|
|
|
(Inc.Dependency (BackendMap BackendInvalidationKeysWrapper), [AB.AnyBackend BackendConfigWrapper]) `arr` BackendCache
|
|
|
|
|
resolveBackendCache = proc (backendInvalidationMap, backendConfigs) -> do
|
2022-09-05 05:42:59 +03:00
|
|
|
|
case backendConfigs of
|
|
|
|
|
[] -> returnA -< mempty
|
|
|
|
|
(anyBackendConfig : backendConfigs') -> do
|
|
|
|
|
backendInfo <-
|
2022-09-14 15:59:37 +03:00
|
|
|
|
AB.dispatchAnyBackendArrow @BackendMetadata @HasTag resolveBackendInfo' -< (anyBackendConfig, backendInvalidationMap)
|
|
|
|
|
backendInfos <- resolveBackendCache -< (backendInvalidationMap, backendConfigs')
|
2022-09-05 05:42:59 +03:00
|
|
|
|
returnA -< backendInfo <> backendInfos
|
2022-09-01 08:27:57 +03:00
|
|
|
|
|
2022-11-29 04:00:28 +03:00
|
|
|
|
tryGetSourceConfig ::
|
2021-09-24 01:56:37 +03:00
|
|
|
|
forall b arr m.
|
|
|
|
|
( ArrowChoice arr,
|
|
|
|
|
Inc.ArrowCache m arr,
|
2022-11-15 19:58:51 +03:00
|
|
|
|
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
MonadIO m,
|
|
|
|
|
MonadResolveSource m,
|
2022-07-27 10:18:36 +03:00
|
|
|
|
HasHttpManagerM m,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
BackendMetadata b
|
|
|
|
|
) =>
|
|
|
|
|
( Inc.Dependency (HashMap SourceName Inc.InvalidationKey),
|
|
|
|
|
SourceName,
|
2022-04-29 05:13:13 +03:00
|
|
|
|
SourceConnConfiguration b,
|
|
|
|
|
BackendSourceKind b,
|
2022-09-05 05:42:59 +03:00
|
|
|
|
BackendInfo b
|
2021-09-24 01:56:37 +03:00
|
|
|
|
)
|
|
|
|
|
`arr` Maybe (SourceConfig b)
|
2022-11-29 04:00:28 +03:00
|
|
|
|
tryGetSourceConfig = Inc.cache proc (invalidationKeys, sourceName, sourceConfig, backendKind, backendInfo) -> do
|
2021-02-22 10:52:42 +03:00
|
|
|
|
let metadataObj = MetadataObject (MOSource sourceName) $ toJSON sourceName
|
2022-07-27 10:18:36 +03:00
|
|
|
|
httpMgr <- bindA -< askHttpManager
|
2021-02-22 10:52:42 +03:00
|
|
|
|
Inc.dependOn -< Inc.selectKeyD sourceName invalidationKeys
|
2021-09-24 01:56:37 +03:00
|
|
|
|
(|
|
|
|
|
|
withRecordInconsistency
|
2022-09-05 05:42:59 +03:00
|
|
|
|
( liftEitherA <<< bindA -< resolveSourceConfig @b logger sourceName sourceConfig backendKind backendInfo env httpMgr
|
2021-09-24 01:56:37 +03:00
|
|
|
|
)
|
|
|
|
|
|) metadataObj
|
|
|
|
|
|
2022-11-29 04:00:28 +03:00
|
|
|
|
tryResolveSource ::
|
2021-09-24 01:56:37 +03:00
|
|
|
|
forall b arr m.
|
|
|
|
|
( ArrowChoice arr,
|
|
|
|
|
Inc.ArrowCache m arr,
|
2022-11-15 19:58:51 +03:00
|
|
|
|
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
MonadIO m,
|
|
|
|
|
MonadBaseControl IO m,
|
|
|
|
|
MonadResolveSource m,
|
2022-07-27 10:18:36 +03:00
|
|
|
|
HasHttpManagerM m,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
BackendMetadata b
|
|
|
|
|
) =>
|
|
|
|
|
( Inc.Dependency (HashMap SourceName Inc.InvalidationKey),
|
2022-09-05 05:42:59 +03:00
|
|
|
|
BackendInfoAndSourceMetadata b
|
2021-09-24 01:56:37 +03:00
|
|
|
|
)
|
|
|
|
|
`arr` Maybe (ResolvedSource b)
|
2022-11-29 04:00:28 +03:00
|
|
|
|
tryResolveSource = Inc.cache proc (invalidationKeys, BackendInfoAndSourceMetadata {..}) -> do
|
2022-04-29 05:13:13 +03:00
|
|
|
|
let sourceName = _smName _bcasmSourceMetadata
|
2020-12-28 15:56:00 +03:00
|
|
|
|
metadataObj = MetadataObject (MOSource sourceName) $ toJSON sourceName
|
2022-02-09 08:13:32 +03:00
|
|
|
|
|
2022-11-29 04:00:28 +03:00
|
|
|
|
maybeSourceConfig <- tryGetSourceConfig @b -< (invalidationKeys, sourceName, _smConfiguration _bcasmSourceMetadata, _smKind _bcasmSourceMetadata, _bcasmBackendInfo)
|
2021-02-22 10:52:42 +03:00
|
|
|
|
case maybeSourceConfig of
|
|
|
|
|
Nothing -> returnA -< Nothing
|
|
|
|
|
Just sourceConfig ->
|
2021-09-24 01:56:37 +03:00
|
|
|
|
(|
|
|
|
|
|
withRecordInconsistency
|
2022-11-15 23:13:00 +03:00
|
|
|
|
( liftEitherA <<< bindA
|
|
|
|
|
-< do
|
|
|
|
|
resSource <- resolveDatabaseMetadata _bcasmSourceMetadata sourceConfig (getSourceTypeCustomization $ _smCustomization _bcasmSourceMetadata)
|
|
|
|
|
for_ resSource $ liftIO . unLogger logger
|
|
|
|
|
pure resSource
|
2021-09-24 01:56:37 +03:00
|
|
|
|
)
|
2021-02-22 10:52:42 +03:00
|
|
|
|
|) metadataObj
|
2020-12-28 15:56:00 +03:00
|
|
|
|
|
2021-05-25 09:50:13 +03:00
|
|
|
|
-- impl notes (swann):
|
|
|
|
|
--
|
2022-07-04 13:09:50 +03:00
|
|
|
|
-- as our cache invalidation key, we use the fact of the availability of event triggers
|
|
|
|
|
-- present, rerunning catalog init when this changes. i.e we invalidate the cache and
|
|
|
|
|
-- rebuild it with the catalog only when there is at least one event trigger present.
|
|
|
|
|
-- This is correct, because we only care about the transition from zero event triggers
|
|
|
|
|
-- to nonzero (not necessarily one, as Anon has observed, because replace_metadata can
|
|
|
|
|
-- add multiple event triggers in one go)
|
2021-05-25 09:50:13 +03:00
|
|
|
|
--
|
|
|
|
|
-- a future optimisation would be to cache, on a per-source basis, whether or not
|
|
|
|
|
-- the event catalog itself exists, and to then trigger catalog init when an event
|
|
|
|
|
-- trigger is created _but only if_ this cached information says the event catalog
|
|
|
|
|
-- doesn't already exist.
|
|
|
|
|
|
2021-09-24 01:56:37 +03:00
|
|
|
|
initCatalogIfNeeded ::
|
|
|
|
|
forall b arr m.
|
|
|
|
|
( ArrowChoice arr,
|
|
|
|
|
Inc.ArrowCache m arr,
|
|
|
|
|
MonadIO m,
|
|
|
|
|
BackendMetadata b,
|
|
|
|
|
HasServerConfigCtx m,
|
2022-02-24 11:13:19 +03:00
|
|
|
|
MonadError QErr m,
|
|
|
|
|
MonadBaseControl IO m
|
2021-09-24 01:56:37 +03:00
|
|
|
|
) =>
|
2022-11-02 01:41:22 +03:00
|
|
|
|
(Proxy b, Bool, SourceConfig b) `arr` (RecreateEventTriggers, SourceCatalogMigrationState)
|
2022-07-04 13:09:50 +03:00
|
|
|
|
initCatalogIfNeeded = Inc.cache proc (Proxy, atleastOneTrigger, sourceConfig) -> do
|
2022-11-15 23:13:00 +03:00
|
|
|
|
bindA
|
2021-09-24 01:56:37 +03:00
|
|
|
|
-< do
|
2022-07-04 13:09:50 +03:00
|
|
|
|
if atleastOneTrigger
|
2021-09-24 01:56:37 +03:00
|
|
|
|
then do
|
2022-02-24 11:13:19 +03:00
|
|
|
|
maintenanceMode <- _sccMaintenanceMode <$> askServerConfigCtx
|
|
|
|
|
eventingMode <- _sccEventingMode <$> askServerConfigCtx
|
|
|
|
|
readOnlyMode <- _sccReadOnlyMode <$> askServerConfigCtx
|
|
|
|
|
|
|
|
|
|
if
|
|
|
|
|
-- when safe mode is enabled, don't perform any migrations
|
2022-11-02 01:41:22 +03:00
|
|
|
|
| readOnlyMode == ReadOnlyModeEnabled -> pure (RETDoNothing, SCMSMigrationOnHold "read-only mode enabled")
|
2022-02-24 11:13:19 +03:00
|
|
|
|
-- when eventing mode is disabled, don't perform any migrations
|
2022-11-02 01:41:22 +03:00
|
|
|
|
| eventingMode == EventingDisabled -> pure (RETDoNothing, SCMSMigrationOnHold "eventing mode disabled")
|
2022-02-24 11:13:19 +03:00
|
|
|
|
-- when maintenance mode is enabled, don't perform any migrations
|
2022-11-02 01:41:22 +03:00
|
|
|
|
| maintenanceMode == (MaintenanceModeEnabled ()) -> pure (RETDoNothing, SCMSMigrationOnHold "maintenance mode enabled")
|
2022-02-24 11:13:19 +03:00
|
|
|
|
| otherwise -> do
|
2022-11-02 23:53:23 +03:00
|
|
|
|
-- The `initCatalogForSource` action is retried here because
|
|
|
|
|
-- in cloud there will be multiple workers (graphql-engine instances)
|
|
|
|
|
-- trying to migrate the source catalog, when needed. This introduces
|
|
|
|
|
-- a race condition as both the workers try to migrate the source catalog
|
|
|
|
|
-- concurrently and when one of them succeeds the other ones will fail
|
|
|
|
|
-- and be in an inconsistent state. To avoid the inconsistency, we retry
|
|
|
|
|
-- migrating the catalog on error and in the retry `initCatalogForSource`
|
|
|
|
|
-- will see that the catalog is already migrated, so it won't attempt the
|
|
|
|
|
-- migration again
|
|
|
|
|
liftEither
|
|
|
|
|
=<< Retry.retrying
|
|
|
|
|
( Retry.constantDelay (fromIntegral $ diffTimeToMicroSeconds $ seconds $ Seconds 10)
|
|
|
|
|
<> Retry.limitRetries 3
|
|
|
|
|
)
|
|
|
|
|
(const $ return . isLeft)
|
|
|
|
|
(const $ runExceptT $ prepareCatalog @b sourceConfig)
|
2022-11-02 01:41:22 +03:00
|
|
|
|
else pure (RETDoNothing, SCMSUninitializedSource)
|
2021-09-24 01:56:37 +03:00
|
|
|
|
|
|
|
|
|
buildSource ::
|
|
|
|
|
forall b arr m.
|
|
|
|
|
( ArrowChoice arr,
|
2022-11-29 04:00:28 +03:00
|
|
|
|
ArrowKleisli m arr,
|
2022-11-15 19:58:51 +03:00
|
|
|
|
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
HasServerConfigCtx m,
|
|
|
|
|
MonadError QErr m,
|
2022-08-19 18:40:26 +03:00
|
|
|
|
BackendMetadata b,
|
|
|
|
|
GetAggregationPredicatesDeps b
|
2021-09-24 01:56:37 +03:00
|
|
|
|
) =>
|
|
|
|
|
( HashMap SourceName (AB.AnyBackend PartiallyResolvedSource),
|
|
|
|
|
SourceMetadata b,
|
|
|
|
|
SourceConfig b,
|
|
|
|
|
HashMap (TableName b) (TableCoreInfoG b (ColumnInfo b) (ColumnInfo b)),
|
2022-06-27 03:36:53 +03:00
|
|
|
|
HashMap (TableName b) (EventTriggerInfoMap b),
|
2021-09-24 01:56:37 +03:00
|
|
|
|
DBTablesMetadata b,
|
|
|
|
|
DBFunctionsMetadata b,
|
scaffolding for remote-schemas module
The main aim of the PR is:
1. To set up a module structure for 'remote-schemas' package.
2. Move parts by the remote schema codebase into the new module structure to validate it.
## Notes to the reviewer
Why a PR with large-ish diff?
1. We've been making progress on the MM project but we don't yet know long it is going to take us to get to the first milestone. To understand this better, we need to figure out the unknowns as soon as possible. Hence I've taken a stab at the first two items in the [end-state](https://gist.github.com/0x777/ca2bdc4284d21c3eec153b51dea255c9) document to figure out the unknowns. Unsurprisingly, there are a bunch of issues that we haven't discussed earlier. These are documented in the 'open questions' section.
1. The diff is large but that is only code moved around and I've added a section that documents how things are moved. In addition, there are fair number of PR comments to help with the review process.
## Changes in the PR
### Module structure
Sets up the module structure as follows:
```
Hasura/
RemoteSchema/
Metadata/
Types.hs
SchemaCache/
Types.hs
Permission.hs
RemoteRelationship.hs
Build.hs
MetadataAPI/
Types.hs
Execute.hs
```
### 1. Types representing metadata are moved
Types that capture metadata information (currently scattered across several RQL modules) are moved into `Hasura.RemoteSchema.Metadata.Types`.
- This new module only depends on very 'core' modules such as
`Hasura.Session` for the notion of roles and `Hasura.Incremental` for `Cacheable` typeclass.
- The requirement on database modules is avoided by generalizing the remote schemas metadata to accept an arbitrary 'r' for a remote relationship
definition.
### 2. SchemaCache related types and build logic have been moved
Types that represent remote schemas information in SchemaCache are moved into `Hasura.RemoteSchema.SchemaCache.Types`.
Similar to `H.RS.Metadata.Types`, this module depends on 'core' modules except for `Hasura.GraphQL.Parser.Variable`. It has something to do with remote relationships but I haven't spent time looking into it. The validation of 'remote relationships to remote schema' is also something that needs to be looked at.
Rips out the logic that builds remote schema's SchemaCache information from the monolithic `buildSchemaCacheRule` and moves it into `Hasura.RemoteSchema.SchemaCache.Build`. Further, the `.SchemaCache.Permission` and `.SchemaCache.RemoteRelationship` have been created from existing modules that capture schema cache building logic for those two components.
This was a fair amount of work. On main, currently remote schema's SchemaCache information is built in two phases - in the first phase, 'permissions' and 'remote relationships' are ignored and in the second phase they are filled in.
While remote relationships can only be resolved after partially resolving sources and other remote schemas, the same isn't true for permissions. Further, most of the work that is done to resolve remote relationships can be moved to the first phase so that the second phase can be a very simple traversal.
This is the approach that was taken - resolve permissions and as much as remote relationships information in the first phase.
### 3. Metadata APIs related types and build logic have been moved
The types that represent remote schema related metadata APIs and the execution logic have been moved to `Hasura.RemoteSchema.MetadataAPI.Types` and `.Execute` modules respectively.
## Open questions:
1. `Hasura.RemoteSchema.Metadata.Types` is so called because I was hoping that all of the metadata related APIs of remote schema can be brought in at `Hasura.RemoteSchema.Metadata.API`. However, as metadata APIs depended on functions from `SchemaCache` module (see [1](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L55) and [2](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L91), it made more sense to create a separate top-level module for `MetadataAPI`s.
Maybe we can just have `Hasura.RemoteSchema.Metadata` and get rid of the extra nesting or have `Hasura.RemoteSchema.Metadata.{Core,Permission,RemoteRelationship}` if we want to break them down further.
1. `buildRemoteSchemas` in `H.RS.SchemaCache.Build` has the following type:
```haskell
buildRemoteSchemas ::
( ArrowChoice arr,
Inc.ArrowDistribute arr,
ArrowWriter (Seq CollectedInfo) arr,
Inc.ArrowCache m arr,
MonadIO m,
HasHttpManagerM m,
Inc.Cacheable remoteRelationshipDefinition,
ToJSON remoteRelationshipDefinition,
MonadError QErr m
) =>
Env.Environment ->
( (Inc.Dependency (HashMap RemoteSchemaName Inc.InvalidationKey), OrderedRoles),
[RemoteSchemaMetadataG remoteRelationshipDefinition]
)
`arr` HashMap RemoteSchemaName (PartiallyResolvedRemoteSchemaCtxG remoteRelationshipDefinition, MetadataObject)
```
Note the dependence on `CollectedInfo` which is defined as
```haskell
data CollectedInfo
= CIInconsistency InconsistentMetadata
| CIDependency
MetadataObject
-- ^ for error reporting on missing dependencies
SchemaObjId
SchemaDependency
deriving (Eq)
```
this pretty much means that remote schemas is dependent on types from databases, actions, ....
How do we fix this? Maybe introduce a typeclass such as `ArrowCollectRemoteSchemaDependencies` which is defined in `Hasura.RemoteSchema` and then implemented in graphql-engine?
1. The dependency on `buildSchemaCacheFor` in `.MetadataAPI.Execute` which has the following signature:
```haskell
buildSchemaCacheFor ::
(QErrM m, CacheRWM m, MetadataM m) =>
MetadataObjId ->
MetadataModifier ->
```
This can be easily resolved if we restrict what the metadata APIs are allowed to do. Currently, they operate in an unfettered access to modify SchemaCache (the `CacheRWM` constraint):
```haskell
runAddRemoteSchema ::
( QErrM m,
CacheRWM m,
MonadIO m,
HasHttpManagerM m,
MetadataM m,
Tracing.MonadTrace m
) =>
Env.Environment ->
AddRemoteSchemaQuery ->
m EncJSON
```
This should instead be changed to restrict remote schema APIs to only modify remote schema metadata (but has access to the remote schemas part of the schema cache), this dependency is completely removed.
```haskell
runAddRemoteSchema ::
( QErrM m,
MonadIO m,
HasHttpManagerM m,
MonadReader RemoteSchemasSchemaCache m,
MonadState RemoteSchemaMetadata m,
Tracing.MonadTrace m
) =>
Env.Environment ->
AddRemoteSchemaQuery ->
m RemoteSchemeMetadataObjId
```
The idea is that the core graphql-engine would call these functions and then call
`buildSchemaCacheFor`.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6291
GitOrigin-RevId: 51357148c6404afe70219afa71bd1d59bdf4ffc6
2022-10-21 06:13:07 +03:00
|
|
|
|
PartiallyResolvedRemoteSchemaMap,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
OrderedRoles
|
|
|
|
|
)
|
2022-11-30 12:11:00 +03:00
|
|
|
|
`arr` (SourceInfo b)
|
2022-06-27 03:36:53 +03:00
|
|
|
|
buildSource = proc (allSources, sourceMetadata, sourceConfig, tablesRawInfo, eventTriggerInfoMaps, _dbTables, dbFunctions, remoteSchemaMap, orderedRoles) -> do
|
2022-09-02 09:33:21 +03:00
|
|
|
|
let SourceMetadata sourceName _backendKind tables functions _ queryTagsConfig sourceCustomization _healthCheckConfig = sourceMetadata
|
2021-02-14 09:07:52 +03:00
|
|
|
|
tablesMetadata = OMap.elems tables
|
2021-09-24 01:56:37 +03:00
|
|
|
|
(_, nonColumnInputs, permissions) = unzip3 $ map mkTableInputs tablesMetadata
|
2021-02-14 09:07:52 +03:00
|
|
|
|
alignTableMap :: HashMap (TableName b) a -> HashMap (TableName b) c -> HashMap (TableName b) (a, c)
|
2020-12-08 17:22:31 +03:00
|
|
|
|
alignTableMap = M.intersectionWith (,)
|
2019-11-20 21:21:30 +03:00
|
|
|
|
|
|
|
|
|
-- relationships and computed fields
|
2020-12-08 17:22:31 +03:00
|
|
|
|
let nonColumnsByTable = mapFromL _nctiTable nonColumnInputs
|
2021-02-14 09:07:52 +03:00
|
|
|
|
tableCoreInfos :: HashMap (TableName b) (TableCoreInfo b) <-
|
2022-11-29 04:00:28 +03:00
|
|
|
|
interpretWriter
|
|
|
|
|
-< for (tablesRawInfo `alignTableMap` nonColumnsByTable) \(tableRawInfo, nonColumnInput) -> do
|
|
|
|
|
let columns = _tciFieldInfoMap tableRawInfo
|
|
|
|
|
allFields :: FieldInfoMap (FieldInfo b) <- addNonColumnFields allSources sourceName tablesRawInfo columns remoteSchemaMap dbFunctions nonColumnInput
|
|
|
|
|
pure $ tableRawInfo {_tciFieldInfoMap = allFields}
|
2021-07-23 02:06:10 +03:00
|
|
|
|
|
2022-06-27 03:36:53 +03:00
|
|
|
|
-- permissions
|
2022-11-29 04:00:28 +03:00
|
|
|
|
result <-
|
|
|
|
|
interpretWriter
|
|
|
|
|
-< runExceptT $
|
|
|
|
|
for
|
|
|
|
|
(tableCoreInfos `alignTableMap` mapFromL _tpiTable permissions `alignTableMap` eventTriggerInfoMaps)
|
|
|
|
|
\((tableCoreInfo, permissionInputs), eventTriggerInfos) -> do
|
2021-09-24 01:56:37 +03:00
|
|
|
|
let tableFields = _tciFieldInfoMap tableCoreInfo
|
|
|
|
|
permissionInfos <-
|
|
|
|
|
buildTablePermissions
|
2022-11-29 04:00:28 +03:00
|
|
|
|
sourceName
|
|
|
|
|
tableCoreInfos
|
|
|
|
|
tableFields
|
|
|
|
|
permissionInputs
|
|
|
|
|
orderedRoles
|
|
|
|
|
pure $ TableInfo tableCoreInfo permissionInfos eventTriggerInfos (mkAdminRolePermInfo tableCoreInfo)
|
|
|
|
|
-- Generate a non-recoverable error when inherited roles were not ordered in a way that allows for building permissions to succeed
|
|
|
|
|
tableCache <- bindA -< liftEither result
|
2019-11-20 21:21:30 +03:00
|
|
|
|
|
Resolve source customization at schema cache building time.
### Description
This PR attempts to fix several issues with source customization as it relates to remote relationships. There were several issues regarding casing: at the relationship border, we didn't properly set the target source's case, we didn't have access to the list of supported features to decide whether the feature was allowed or not, and we didn't have access to the global default.
However, all of that information is available when we build the schema cache, as we do resolve the case of some elements such as function names: we can therefore resolve source information at the same time, and simplify both the root of the schema and the remote relationship border.
To do this, this PR introduces a new type, `ResolvedSourceCustomization`, to be used in the Schema Cache, as opposed to the metadata's `SourceCustomization`, following a pattern established by a lot of other types.
### Remaining work and open questions
One major point of confusion: it seems to me that we didn't set the case at all across remote relationships, which would suggest we would use the case of the LHS source across the subset of the RHS one that is accessible through the remote relationship, which would in turn "corrupt" the parser cache and might result in the wrong case being used for that source later on. Is that assesment correct, and was I right to fix it?
Another one is that we seem not to be using the local case of the RHS to name the field in an object relationship; unless I'm mistaken we only use it for array relationships? Is that intentional?
This PR is also missing tests that would show-case the difference, and a changelog entry. To my knowledge, all the tests of this feature are in the python test suite; this could be the opportunity to move them to the hspec suite, but this might be a considerable amount of work?
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/5619
GitOrigin-RevId: 51a81b713a74575e82d9f96b51633f158ce3a47b
2022-09-12 19:05:40 +03:00
|
|
|
|
-- not forcing the evaluation here results in a measurable negative impact
|
|
|
|
|
-- on memory residency as measured by our benchmark
|
2022-08-11 10:52:22 +03:00
|
|
|
|
!defaultNC <- bindA -< _sccDefaultNamingConvention <$> askServerConfigCtx
|
|
|
|
|
!isNamingConventionEnabled <- bindA -< ((EFNamingConventions `elem`) . _sccExperimentalFeatures) <$> askServerConfigCtx
|
Resolve source customization at schema cache building time.
### Description
This PR attempts to fix several issues with source customization as it relates to remote relationships. There were several issues regarding casing: at the relationship border, we didn't properly set the target source's case, we didn't have access to the list of supported features to decide whether the feature was allowed or not, and we didn't have access to the global default.
However, all of that information is available when we build the schema cache, as we do resolve the case of some elements such as function names: we can therefore resolve source information at the same time, and simplify both the root of the schema and the remote relationship border.
To do this, this PR introduces a new type, `ResolvedSourceCustomization`, to be used in the Schema Cache, as opposed to the metadata's `SourceCustomization`, following a pattern established by a lot of other types.
### Remaining work and open questions
One major point of confusion: it seems to me that we didn't set the case at all across remote relationships, which would suggest we would use the case of the LHS source across the subset of the RHS one that is accessible through the remote relationship, which would in turn "corrupt" the parser cache and might result in the wrong case being used for that source later on. Is that assesment correct, and was I right to fix it?
Another one is that we seem not to be using the local case of the RHS to name the field in an object relationship; unless I'm mistaken we only use it for array relationships? Is that intentional?
This PR is also missing tests that would show-case the difference, and a changelog entry. To my knowledge, all the tests of this feature are in the python test suite; this could be the opportunity to move them to the hspec suite, but this might be a considerable amount of work?
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/5619
GitOrigin-RevId: 51a81b713a74575e82d9f96b51633f158ce3a47b
2022-09-12 19:05:40 +03:00
|
|
|
|
!namingConv <-
|
|
|
|
|
bindA
|
|
|
|
|
-<
|
|
|
|
|
if isNamingConventionEnabled
|
|
|
|
|
then getNamingCase sourceCustomization (namingConventionSupport @b) defaultNC
|
|
|
|
|
else pure HasuraCase
|
|
|
|
|
let resolvedCustomization = mkResolvedSourceCustomization sourceCustomization namingConv
|
2022-05-31 12:08:06 +03:00
|
|
|
|
|
2019-11-20 21:21:30 +03:00
|
|
|
|
-- sql functions
|
2022-11-15 23:13:00 +03:00
|
|
|
|
functionCacheMaybes <-
|
2022-11-29 04:00:28 +03:00
|
|
|
|
interpretWriter
|
|
|
|
|
-< for
|
|
|
|
|
(OMap.elems functions)
|
|
|
|
|
\case
|
|
|
|
|
FunctionMetadata qf config functionPermissions comment -> do
|
2022-11-15 23:13:00 +03:00
|
|
|
|
let systemDefined = SystemDefined False
|
2022-11-30 12:11:00 +03:00
|
|
|
|
definition = TrackFunction @b qf
|
2022-11-15 23:13:00 +03:00
|
|
|
|
metadataObject =
|
|
|
|
|
MetadataObject
|
|
|
|
|
( MOSourceObjId sourceName $
|
|
|
|
|
AB.mkAnyBackend $
|
|
|
|
|
SMOFunction @b qf
|
|
|
|
|
)
|
2022-11-30 12:11:00 +03:00
|
|
|
|
(toJSON definition)
|
2022-11-15 23:13:00 +03:00
|
|
|
|
schemaObject =
|
|
|
|
|
SOSourceObj sourceName $
|
|
|
|
|
AB.mkAnyBackend $
|
|
|
|
|
SOIFunction @b qf
|
|
|
|
|
addFunctionContext e = "in function " <> qf <<> ": " <> e
|
2022-11-30 12:11:00 +03:00
|
|
|
|
funcDefs = fromMaybe [] $ M.lookup qf dbFunctions
|
|
|
|
|
metadataPermissions = mapFromL _fpmRole functionPermissions
|
|
|
|
|
permissionsMap = mkBooleanPermissionMap FunctionPermissionInfo metadataPermissions orderedRoles
|
|
|
|
|
withRecordInconsistencyM metadataObject $ modifyErr addFunctionContext do
|
|
|
|
|
rawfunctionInfo <- handleMultipleFunctions @b qf funcDefs
|
|
|
|
|
(functionInfo, dep) <- buildFunctionInfo sourceName qf systemDefined config permissionsMap rawfunctionInfo comment namingConv
|
2022-11-29 04:00:28 +03:00
|
|
|
|
recordDependenciesM metadataObject schemaObject [dep]
|
|
|
|
|
pure functionInfo
|
|
|
|
|
let functionCache = mapFromL _fiSQLName $ catMaybes functionCacheMaybes
|
2019-11-20 21:21:30 +03:00
|
|
|
|
|
2022-11-30 12:11:00 +03:00
|
|
|
|
returnA -< SourceInfo sourceName tableCache functionCache sourceConfig queryTagsConfig resolvedCustomization
|
2021-02-14 09:07:52 +03:00
|
|
|
|
|
2021-09-24 01:56:37 +03:00
|
|
|
|
buildAndCollectInfo ::
|
|
|
|
|
forall arr m.
|
|
|
|
|
( ArrowChoice arr,
|
|
|
|
|
Inc.ArrowDistribute arr,
|
|
|
|
|
Inc.ArrowCache m arr,
|
2022-11-15 19:58:51 +03:00
|
|
|
|
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
MonadIO m,
|
|
|
|
|
MonadError QErr m,
|
|
|
|
|
MonadReader BuildReason m,
|
|
|
|
|
MonadBaseControl IO m,
|
|
|
|
|
HasHttpManagerM m,
|
|
|
|
|
HasServerConfigCtx m,
|
|
|
|
|
MonadResolveSource m
|
|
|
|
|
) =>
|
Avoid GraphQL schema rebuild when changing irrelevant Metadata
This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to:
- allowlists
- query collections
- cron triggers
- REST endpoints
- API limits
- metrics config
- GraphQL introspection options
- TLS allow lists
- OpenTelemetry
When is construction of the in-memory GraphQL schema cached between Metadata operations?
Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation.
However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction.
The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.)
We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance.
So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps.
That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema.
Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613
GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
|
|
|
|
(Inc.Dependency Metadata, Inc.Dependency InvalidationKeys) `arr` BuildOutputs
|
|
|
|
|
buildAndCollectInfo = proc (metadataDep, invalidationKeys) -> do
|
|
|
|
|
sources <- Inc.dependOn -< Inc.selectD #_metaSources metadataDep
|
|
|
|
|
remoteSchemas <- Inc.dependOn -< Inc.selectD #_metaRemoteSchemas metadataDep
|
|
|
|
|
customTypes <- Inc.dependOn -< Inc.selectD #_metaCustomTypes metadataDep
|
|
|
|
|
actions <- Inc.dependOn -< Inc.selectD #_metaActions metadataDep
|
|
|
|
|
inheritedRoles <- Inc.dependOn -< Inc.selectD #_metaInheritedRoles metadataDep
|
|
|
|
|
backendConfigs <- Inc.dependOn -< Inc.selectD #_metaBackendConfigs metadataDep
|
|
|
|
|
let actionRoles = map _apmRole . _amPermissions =<< OMap.elems actions
|
[Preview] Inherited roles for postgres read queries
fixes #3868
docker image - `hasura/graphql-engine:inherited-roles-preview-48b73a2de`
Note:
To be able to use the inherited roles feature, the graphql-engine should be started with the env variable `HASURA_GRAPHQL_EXPERIMENTAL_FEATURES` set to `inherited_roles`.
Introduction
------------
This PR implements the idea of multiple roles as presented in this [paper](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/FGALanguageICDE07.pdf). The multiple roles feature in this PR can be used via inherited roles. An inherited role is a role which can be created by combining multiple singular roles. For example, if there are two roles `author` and `editor` configured in the graphql-engine, then we can create a inherited role with the name of `combined_author_editor` role which will combine the select permissions of the `author` and `editor` roles and then make GraphQL queries using the `combined_author_editor`.
How are select permissions of different roles are combined?
------------------------------------------------------------
A select permission includes 5 things:
1. Columns accessible to the role
2. Row selection filter
3. Limit
4. Allow aggregation
5. Scalar computed fields accessible to the role
Suppose there are two roles, `role1` gives access to the `address` column with row filter `P1` and `role2` gives access to both the `address` and the `phone` column with row filter `P2` and we create a new role `combined_roles` which combines `role1` and `role2`.
Let's say the following GraphQL query is queried with the `combined_roles` role.
```graphql
query {
employees {
address
phone
}
}
```
This will translate to the following SQL query:
```sql
select
(case when (P1 or P2) then address else null end) as address,
(case when P2 then phone else null end) as phone
from employee
where (P1 or P2)
```
The other parameters of the select permission will be combined in the following manner:
1. Limit - Minimum of the limits will be the limit of the inherited role
2. Allow aggregations - If any of the role allows aggregation, then the inherited role will allow aggregation
3. Scalar computed fields - same as table column fields, as in the above example
APIs for inherited roles:
----------------------
1. `add_inherited_role`
`add_inherited_role` is the [metadata API](https://hasura.io/docs/1.0/graphql/core/api-reference/index.html#schema-metadata-api) to create a new inherited role. It accepts two arguments
`role_name`: the name of the inherited role to be added (String)
`role_set`: list of roles that need to be combined (Array of Strings)
Example:
```json
{
"type": "add_inherited_role",
"args": {
"role_name":"combined_user",
"role_set":[
"user",
"user1"
]
}
}
```
After adding the inherited role, the inherited role can be used like single roles like earlier
Note:
An inherited role can only be created with non-inherited/singular roles.
2. `drop_inherited_role`
The `drop_inherited_role` API accepts the name of the inherited role and drops it from the metadata. It accepts a single argument:
`role_name`: name of the inherited role to be dropped
Example:
```json
{
"type": "drop_inherited_role",
"args": {
"role_name":"combined_user"
}
}
```
Metadata
---------
The derived roles metadata will be included under the `experimental_features` key while exporting the metadata.
```json
{
"experimental_features": {
"derived_roles": [
{
"role_name": "manager_is_employee_too",
"role_set": [
"employee",
"manager"
]
}
]
}
}
```
Scope
------
Only postgres queries and subscriptions are supported in this PR.
Important points:
-----------------
1. All columns exposed to an inherited role will be marked as `nullable`, this is done so that cell value nullification can be done.
TODOs
-------
- [ ] Tests
- [ ] Test a GraphQL query running with a inherited role without enabling inherited roles in experimental features
- [] Tests for aggregate queries, limit, computed fields, functions, subscriptions (?)
- [ ] Introspection test with a inherited role (nullability changes in a inherited role)
- [ ] Docs
- [ ] Changelog
Co-authored-by: Vamshi Surabhi <6562944+0x777@users.noreply.github.com>
GitOrigin-RevId: 3b8ee1e11f5ceca80fe294f8c074d42fbccfec63
2021-03-08 14:14:13 +03:00
|
|
|
|
remoteSchemaRoles = map _rspmRole . _rsmPermissions =<< OMap.elems remoteSchemas
|
|
|
|
|
sourceRoles =
|
2021-09-24 01:56:37 +03:00
|
|
|
|
HS.fromList $
|
|
|
|
|
concat $
|
2022-08-29 03:58:03 +03:00
|
|
|
|
OMap.elems sources >>= \(BackendSourceMetadata e) ->
|
2022-09-02 09:33:21 +03:00
|
|
|
|
AB.dispatchAnyBackend @Backend e \(SourceMetadata _ _ tables _functions _ _ _ _) -> do
|
2021-09-24 01:56:37 +03:00
|
|
|
|
table <- OMap.elems tables
|
|
|
|
|
pure $
|
|
|
|
|
OMap.keys (_tmInsertPermissions table)
|
|
|
|
|
<> OMap.keys (_tmSelectPermissions table)
|
|
|
|
|
<> OMap.keys (_tmUpdatePermissions table)
|
|
|
|
|
<> OMap.keys (_tmDeletePermissions table)
|
2021-07-17 00:18:58 +03:00
|
|
|
|
inheritedRoleNames = OMap.keys inheritedRoles
|
|
|
|
|
allRoleNames = sourceRoles <> HS.fromList (remoteSchemaRoles <> actionRoles <> inheritedRoleNames)
|
[Preview] Inherited roles for postgres read queries
fixes #3868
docker image - `hasura/graphql-engine:inherited-roles-preview-48b73a2de`
Note:
To be able to use the inherited roles feature, the graphql-engine should be started with the env variable `HASURA_GRAPHQL_EXPERIMENTAL_FEATURES` set to `inherited_roles`.
Introduction
------------
This PR implements the idea of multiple roles as presented in this [paper](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/FGALanguageICDE07.pdf). The multiple roles feature in this PR can be used via inherited roles. An inherited role is a role which can be created by combining multiple singular roles. For example, if there are two roles `author` and `editor` configured in the graphql-engine, then we can create a inherited role with the name of `combined_author_editor` role which will combine the select permissions of the `author` and `editor` roles and then make GraphQL queries using the `combined_author_editor`.
How are select permissions of different roles are combined?
------------------------------------------------------------
A select permission includes 5 things:
1. Columns accessible to the role
2. Row selection filter
3. Limit
4. Allow aggregation
5. Scalar computed fields accessible to the role
Suppose there are two roles, `role1` gives access to the `address` column with row filter `P1` and `role2` gives access to both the `address` and the `phone` column with row filter `P2` and we create a new role `combined_roles` which combines `role1` and `role2`.
Let's say the following GraphQL query is queried with the `combined_roles` role.
```graphql
query {
employees {
address
phone
}
}
```
This will translate to the following SQL query:
```sql
select
(case when (P1 or P2) then address else null end) as address,
(case when P2 then phone else null end) as phone
from employee
where (P1 or P2)
```
The other parameters of the select permission will be combined in the following manner:
1. Limit - Minimum of the limits will be the limit of the inherited role
2. Allow aggregations - If any of the role allows aggregation, then the inherited role will allow aggregation
3. Scalar computed fields - same as table column fields, as in the above example
APIs for inherited roles:
----------------------
1. `add_inherited_role`
`add_inherited_role` is the [metadata API](https://hasura.io/docs/1.0/graphql/core/api-reference/index.html#schema-metadata-api) to create a new inherited role. It accepts two arguments
`role_name`: the name of the inherited role to be added (String)
`role_set`: list of roles that need to be combined (Array of Strings)
Example:
```json
{
"type": "add_inherited_role",
"args": {
"role_name":"combined_user",
"role_set":[
"user",
"user1"
]
}
}
```
After adding the inherited role, the inherited role can be used like single roles like earlier
Note:
An inherited role can only be created with non-inherited/singular roles.
2. `drop_inherited_role`
The `drop_inherited_role` API accepts the name of the inherited role and drops it from the metadata. It accepts a single argument:
`role_name`: name of the inherited role to be dropped
Example:
```json
{
"type": "drop_inherited_role",
"args": {
"role_name":"combined_user"
}
}
```
Metadata
---------
The derived roles metadata will be included under the `experimental_features` key while exporting the metadata.
```json
{
"experimental_features": {
"derived_roles": [
{
"role_name": "manager_is_employee_too",
"role_set": [
"employee",
"manager"
]
}
]
}
}
```
Scope
------
Only postgres queries and subscriptions are supported in this PR.
Important points:
-----------------
1. All columns exposed to an inherited role will be marked as `nullable`, this is done so that cell value nullification can be done.
TODOs
-------
- [ ] Tests
- [ ] Test a GraphQL query running with a inherited role without enabling inherited roles in experimental features
- [] Tests for aggregate queries, limit, computed fields, functions, subscriptions (?)
- [ ] Introspection test with a inherited role (nullability changes in a inherited role)
- [ ] Docs
- [ ] Changelog
Co-authored-by: Vamshi Surabhi <6562944+0x777@users.noreply.github.com>
GitOrigin-RevId: 3b8ee1e11f5ceca80fe294f8c074d42fbccfec63
2021-03-08 14:14:13 +03:00
|
|
|
|
|
2021-07-17 00:18:58 +03:00
|
|
|
|
-- roles which have some kind of permission (action/remote schema/table/function) set in the metadata
|
2021-08-09 13:20:04 +03:00
|
|
|
|
let metadataRoles = mapFromL _rRoleName $ (`Role` ParentRoles mempty) <$> toList allRoleNames
|
[Preview] Inherited roles for postgres read queries
fixes #3868
docker image - `hasura/graphql-engine:inherited-roles-preview-48b73a2de`
Note:
To be able to use the inherited roles feature, the graphql-engine should be started with the env variable `HASURA_GRAPHQL_EXPERIMENTAL_FEATURES` set to `inherited_roles`.
Introduction
------------
This PR implements the idea of multiple roles as presented in this [paper](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/FGALanguageICDE07.pdf). The multiple roles feature in this PR can be used via inherited roles. An inherited role is a role which can be created by combining multiple singular roles. For example, if there are two roles `author` and `editor` configured in the graphql-engine, then we can create a inherited role with the name of `combined_author_editor` role which will combine the select permissions of the `author` and `editor` roles and then make GraphQL queries using the `combined_author_editor`.
How are select permissions of different roles are combined?
------------------------------------------------------------
A select permission includes 5 things:
1. Columns accessible to the role
2. Row selection filter
3. Limit
4. Allow aggregation
5. Scalar computed fields accessible to the role
Suppose there are two roles, `role1` gives access to the `address` column with row filter `P1` and `role2` gives access to both the `address` and the `phone` column with row filter `P2` and we create a new role `combined_roles` which combines `role1` and `role2`.
Let's say the following GraphQL query is queried with the `combined_roles` role.
```graphql
query {
employees {
address
phone
}
}
```
This will translate to the following SQL query:
```sql
select
(case when (P1 or P2) then address else null end) as address,
(case when P2 then phone else null end) as phone
from employee
where (P1 or P2)
```
The other parameters of the select permission will be combined in the following manner:
1. Limit - Minimum of the limits will be the limit of the inherited role
2. Allow aggregations - If any of the role allows aggregation, then the inherited role will allow aggregation
3. Scalar computed fields - same as table column fields, as in the above example
APIs for inherited roles:
----------------------
1. `add_inherited_role`
`add_inherited_role` is the [metadata API](https://hasura.io/docs/1.0/graphql/core/api-reference/index.html#schema-metadata-api) to create a new inherited role. It accepts two arguments
`role_name`: the name of the inherited role to be added (String)
`role_set`: list of roles that need to be combined (Array of Strings)
Example:
```json
{
"type": "add_inherited_role",
"args": {
"role_name":"combined_user",
"role_set":[
"user",
"user1"
]
}
}
```
After adding the inherited role, the inherited role can be used like single roles like earlier
Note:
An inherited role can only be created with non-inherited/singular roles.
2. `drop_inherited_role`
The `drop_inherited_role` API accepts the name of the inherited role and drops it from the metadata. It accepts a single argument:
`role_name`: name of the inherited role to be dropped
Example:
```json
{
"type": "drop_inherited_role",
"args": {
"role_name":"combined_user"
}
}
```
Metadata
---------
The derived roles metadata will be included under the `experimental_features` key while exporting the metadata.
```json
{
"experimental_features": {
"derived_roles": [
{
"role_name": "manager_is_employee_too",
"role_set": [
"employee",
"manager"
]
}
]
}
}
```
Scope
------
Only postgres queries and subscriptions are supported in this PR.
Important points:
-----------------
1. All columns exposed to an inherited role will be marked as `nullable`, this is done so that cell value nullification can be done.
TODOs
-------
- [ ] Tests
- [ ] Test a GraphQL query running with a inherited role without enabling inherited roles in experimental features
- [] Tests for aggregate queries, limit, computed fields, functions, subscriptions (?)
- [ ] Introspection test with a inherited role (nullability changes in a inherited role)
- [ ] Docs
- [ ] Changelog
Co-authored-by: Vamshi Surabhi <6562944+0x777@users.noreply.github.com>
GitOrigin-RevId: 3b8ee1e11f5ceca80fe294f8c074d42fbccfec63
2021-03-08 14:14:13 +03:00
|
|
|
|
|
2022-11-30 12:11:00 +03:00
|
|
|
|
resolvedInheritedRoles <- interpretWriter -< buildInheritedRoles allRoleNames (OMap.elems inheritedRoles)
|
[Preview] Inherited roles for postgres read queries
fixes #3868
docker image - `hasura/graphql-engine:inherited-roles-preview-48b73a2de`
Note:
To be able to use the inherited roles feature, the graphql-engine should be started with the env variable `HASURA_GRAPHQL_EXPERIMENTAL_FEATURES` set to `inherited_roles`.
Introduction
------------
This PR implements the idea of multiple roles as presented in this [paper](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/FGALanguageICDE07.pdf). The multiple roles feature in this PR can be used via inherited roles. An inherited role is a role which can be created by combining multiple singular roles. For example, if there are two roles `author` and `editor` configured in the graphql-engine, then we can create a inherited role with the name of `combined_author_editor` role which will combine the select permissions of the `author` and `editor` roles and then make GraphQL queries using the `combined_author_editor`.
How are select permissions of different roles are combined?
------------------------------------------------------------
A select permission includes 5 things:
1. Columns accessible to the role
2. Row selection filter
3. Limit
4. Allow aggregation
5. Scalar computed fields accessible to the role
Suppose there are two roles, `role1` gives access to the `address` column with row filter `P1` and `role2` gives access to both the `address` and the `phone` column with row filter `P2` and we create a new role `combined_roles` which combines `role1` and `role2`.
Let's say the following GraphQL query is queried with the `combined_roles` role.
```graphql
query {
employees {
address
phone
}
}
```
This will translate to the following SQL query:
```sql
select
(case when (P1 or P2) then address else null end) as address,
(case when P2 then phone else null end) as phone
from employee
where (P1 or P2)
```
The other parameters of the select permission will be combined in the following manner:
1. Limit - Minimum of the limits will be the limit of the inherited role
2. Allow aggregations - If any of the role allows aggregation, then the inherited role will allow aggregation
3. Scalar computed fields - same as table column fields, as in the above example
APIs for inherited roles:
----------------------
1. `add_inherited_role`
`add_inherited_role` is the [metadata API](https://hasura.io/docs/1.0/graphql/core/api-reference/index.html#schema-metadata-api) to create a new inherited role. It accepts two arguments
`role_name`: the name of the inherited role to be added (String)
`role_set`: list of roles that need to be combined (Array of Strings)
Example:
```json
{
"type": "add_inherited_role",
"args": {
"role_name":"combined_user",
"role_set":[
"user",
"user1"
]
}
}
```
After adding the inherited role, the inherited role can be used like single roles like earlier
Note:
An inherited role can only be created with non-inherited/singular roles.
2. `drop_inherited_role`
The `drop_inherited_role` API accepts the name of the inherited role and drops it from the metadata. It accepts a single argument:
`role_name`: name of the inherited role to be dropped
Example:
```json
{
"type": "drop_inherited_role",
"args": {
"role_name":"combined_user"
}
}
```
Metadata
---------
The derived roles metadata will be included under the `experimental_features` key while exporting the metadata.
```json
{
"experimental_features": {
"derived_roles": [
{
"role_name": "manager_is_employee_too",
"role_set": [
"employee",
"manager"
]
}
]
}
}
```
Scope
------
Only postgres queries and subscriptions are supported in this PR.
Important points:
-----------------
1. All columns exposed to an inherited role will be marked as `nullable`, this is done so that cell value nullification can be done.
TODOs
-------
- [ ] Tests
- [ ] Test a GraphQL query running with a inherited role without enabling inherited roles in experimental features
- [] Tests for aggregate queries, limit, computed fields, functions, subscriptions (?)
- [ ] Introspection test with a inherited role (nullability changes in a inherited role)
- [ ] Docs
- [ ] Changelog
Co-authored-by: Vamshi Surabhi <6562944+0x777@users.noreply.github.com>
GitOrigin-RevId: 3b8ee1e11f5ceca80fe294f8c074d42fbccfec63
2021-03-08 14:14:13 +03:00
|
|
|
|
|
2021-07-17 00:18:58 +03:00
|
|
|
|
let allRoles = resolvedInheritedRoles `M.union` metadataRoles
|
[Preview] Inherited roles for postgres read queries
fixes #3868
docker image - `hasura/graphql-engine:inherited-roles-preview-48b73a2de`
Note:
To be able to use the inherited roles feature, the graphql-engine should be started with the env variable `HASURA_GRAPHQL_EXPERIMENTAL_FEATURES` set to `inherited_roles`.
Introduction
------------
This PR implements the idea of multiple roles as presented in this [paper](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/FGALanguageICDE07.pdf). The multiple roles feature in this PR can be used via inherited roles. An inherited role is a role which can be created by combining multiple singular roles. For example, if there are two roles `author` and `editor` configured in the graphql-engine, then we can create a inherited role with the name of `combined_author_editor` role which will combine the select permissions of the `author` and `editor` roles and then make GraphQL queries using the `combined_author_editor`.
How are select permissions of different roles are combined?
------------------------------------------------------------
A select permission includes 5 things:
1. Columns accessible to the role
2. Row selection filter
3. Limit
4. Allow aggregation
5. Scalar computed fields accessible to the role
Suppose there are two roles, `role1` gives access to the `address` column with row filter `P1` and `role2` gives access to both the `address` and the `phone` column with row filter `P2` and we create a new role `combined_roles` which combines `role1` and `role2`.
Let's say the following GraphQL query is queried with the `combined_roles` role.
```graphql
query {
employees {
address
phone
}
}
```
This will translate to the following SQL query:
```sql
select
(case when (P1 or P2) then address else null end) as address,
(case when P2 then phone else null end) as phone
from employee
where (P1 or P2)
```
The other parameters of the select permission will be combined in the following manner:
1. Limit - Minimum of the limits will be the limit of the inherited role
2. Allow aggregations - If any of the role allows aggregation, then the inherited role will allow aggregation
3. Scalar computed fields - same as table column fields, as in the above example
APIs for inherited roles:
----------------------
1. `add_inherited_role`
`add_inherited_role` is the [metadata API](https://hasura.io/docs/1.0/graphql/core/api-reference/index.html#schema-metadata-api) to create a new inherited role. It accepts two arguments
`role_name`: the name of the inherited role to be added (String)
`role_set`: list of roles that need to be combined (Array of Strings)
Example:
```json
{
"type": "add_inherited_role",
"args": {
"role_name":"combined_user",
"role_set":[
"user",
"user1"
]
}
}
```
After adding the inherited role, the inherited role can be used like single roles like earlier
Note:
An inherited role can only be created with non-inherited/singular roles.
2. `drop_inherited_role`
The `drop_inherited_role` API accepts the name of the inherited role and drops it from the metadata. It accepts a single argument:
`role_name`: name of the inherited role to be dropped
Example:
```json
{
"type": "drop_inherited_role",
"args": {
"role_name":"combined_user"
}
}
```
Metadata
---------
The derived roles metadata will be included under the `experimental_features` key while exporting the metadata.
```json
{
"experimental_features": {
"derived_roles": [
{
"role_name": "manager_is_employee_too",
"role_set": [
"employee",
"manager"
]
}
]
}
}
```
Scope
------
Only postgres queries and subscriptions are supported in this PR.
Important points:
-----------------
1. All columns exposed to an inherited role will be marked as `nullable`, this is done so that cell value nullification can be done.
TODOs
-------
- [ ] Tests
- [ ] Test a GraphQL query running with a inherited role without enabling inherited roles in experimental features
- [] Tests for aggregate queries, limit, computed fields, functions, subscriptions (?)
- [ ] Introspection test with a inherited role (nullability changes in a inherited role)
- [ ] Docs
- [ ] Changelog
Co-authored-by: Vamshi Surabhi <6562944+0x777@users.noreply.github.com>
GitOrigin-RevId: 3b8ee1e11f5ceca80fe294f8c074d42fbccfec63
2021-03-08 14:14:13 +03:00
|
|
|
|
|
2021-07-17 00:18:58 +03:00
|
|
|
|
orderedRoles <- bindA -< orderRoles $ M.elems allRoles
|
2020-12-28 15:56:00 +03:00
|
|
|
|
|
|
|
|
|
-- remote schemas
|
|
|
|
|
let remoteSchemaInvalidationKeys = Inc.selectD #_ikRemoteSchemas invalidationKeys
|
scaffolding for remote-schemas module
The main aim of the PR is:
1. To set up a module structure for 'remote-schemas' package.
2. Move parts by the remote schema codebase into the new module structure to validate it.
## Notes to the reviewer
Why a PR with large-ish diff?
1. We've been making progress on the MM project but we don't yet know long it is going to take us to get to the first milestone. To understand this better, we need to figure out the unknowns as soon as possible. Hence I've taken a stab at the first two items in the [end-state](https://gist.github.com/0x777/ca2bdc4284d21c3eec153b51dea255c9) document to figure out the unknowns. Unsurprisingly, there are a bunch of issues that we haven't discussed earlier. These are documented in the 'open questions' section.
1. The diff is large but that is only code moved around and I've added a section that documents how things are moved. In addition, there are fair number of PR comments to help with the review process.
## Changes in the PR
### Module structure
Sets up the module structure as follows:
```
Hasura/
RemoteSchema/
Metadata/
Types.hs
SchemaCache/
Types.hs
Permission.hs
RemoteRelationship.hs
Build.hs
MetadataAPI/
Types.hs
Execute.hs
```
### 1. Types representing metadata are moved
Types that capture metadata information (currently scattered across several RQL modules) are moved into `Hasura.RemoteSchema.Metadata.Types`.
- This new module only depends on very 'core' modules such as
`Hasura.Session` for the notion of roles and `Hasura.Incremental` for `Cacheable` typeclass.
- The requirement on database modules is avoided by generalizing the remote schemas metadata to accept an arbitrary 'r' for a remote relationship
definition.
### 2. SchemaCache related types and build logic have been moved
Types that represent remote schemas information in SchemaCache are moved into `Hasura.RemoteSchema.SchemaCache.Types`.
Similar to `H.RS.Metadata.Types`, this module depends on 'core' modules except for `Hasura.GraphQL.Parser.Variable`. It has something to do with remote relationships but I haven't spent time looking into it. The validation of 'remote relationships to remote schema' is also something that needs to be looked at.
Rips out the logic that builds remote schema's SchemaCache information from the monolithic `buildSchemaCacheRule` and moves it into `Hasura.RemoteSchema.SchemaCache.Build`. Further, the `.SchemaCache.Permission` and `.SchemaCache.RemoteRelationship` have been created from existing modules that capture schema cache building logic for those two components.
This was a fair amount of work. On main, currently remote schema's SchemaCache information is built in two phases - in the first phase, 'permissions' and 'remote relationships' are ignored and in the second phase they are filled in.
While remote relationships can only be resolved after partially resolving sources and other remote schemas, the same isn't true for permissions. Further, most of the work that is done to resolve remote relationships can be moved to the first phase so that the second phase can be a very simple traversal.
This is the approach that was taken - resolve permissions and as much as remote relationships information in the first phase.
### 3. Metadata APIs related types and build logic have been moved
The types that represent remote schema related metadata APIs and the execution logic have been moved to `Hasura.RemoteSchema.MetadataAPI.Types` and `.Execute` modules respectively.
## Open questions:
1. `Hasura.RemoteSchema.Metadata.Types` is so called because I was hoping that all of the metadata related APIs of remote schema can be brought in at `Hasura.RemoteSchema.Metadata.API`. However, as metadata APIs depended on functions from `SchemaCache` module (see [1](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L55) and [2](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L91), it made more sense to create a separate top-level module for `MetadataAPI`s.
Maybe we can just have `Hasura.RemoteSchema.Metadata` and get rid of the extra nesting or have `Hasura.RemoteSchema.Metadata.{Core,Permission,RemoteRelationship}` if we want to break them down further.
1. `buildRemoteSchemas` in `H.RS.SchemaCache.Build` has the following type:
```haskell
buildRemoteSchemas ::
( ArrowChoice arr,
Inc.ArrowDistribute arr,
ArrowWriter (Seq CollectedInfo) arr,
Inc.ArrowCache m arr,
MonadIO m,
HasHttpManagerM m,
Inc.Cacheable remoteRelationshipDefinition,
ToJSON remoteRelationshipDefinition,
MonadError QErr m
) =>
Env.Environment ->
( (Inc.Dependency (HashMap RemoteSchemaName Inc.InvalidationKey), OrderedRoles),
[RemoteSchemaMetadataG remoteRelationshipDefinition]
)
`arr` HashMap RemoteSchemaName (PartiallyResolvedRemoteSchemaCtxG remoteRelationshipDefinition, MetadataObject)
```
Note the dependence on `CollectedInfo` which is defined as
```haskell
data CollectedInfo
= CIInconsistency InconsistentMetadata
| CIDependency
MetadataObject
-- ^ for error reporting on missing dependencies
SchemaObjId
SchemaDependency
deriving (Eq)
```
this pretty much means that remote schemas is dependent on types from databases, actions, ....
How do we fix this? Maybe introduce a typeclass such as `ArrowCollectRemoteSchemaDependencies` which is defined in `Hasura.RemoteSchema` and then implemented in graphql-engine?
1. The dependency on `buildSchemaCacheFor` in `.MetadataAPI.Execute` which has the following signature:
```haskell
buildSchemaCacheFor ::
(QErrM m, CacheRWM m, MetadataM m) =>
MetadataObjId ->
MetadataModifier ->
```
This can be easily resolved if we restrict what the metadata APIs are allowed to do. Currently, they operate in an unfettered access to modify SchemaCache (the `CacheRWM` constraint):
```haskell
runAddRemoteSchema ::
( QErrM m,
CacheRWM m,
MonadIO m,
HasHttpManagerM m,
MetadataM m,
Tracing.MonadTrace m
) =>
Env.Environment ->
AddRemoteSchemaQuery ->
m EncJSON
```
This should instead be changed to restrict remote schema APIs to only modify remote schema metadata (but has access to the remote schemas part of the schema cache), this dependency is completely removed.
```haskell
runAddRemoteSchema ::
( QErrM m,
MonadIO m,
HasHttpManagerM m,
MonadReader RemoteSchemasSchemaCache m,
MonadState RemoteSchemaMetadata m,
Tracing.MonadTrace m
) =>
Env.Environment ->
AddRemoteSchemaQuery ->
m RemoteSchemeMetadataObjId
```
The idea is that the core graphql-engine would call these functions and then call
`buildSchemaCacheFor`.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6291
GitOrigin-RevId: 51357148c6404afe70219afa71bd1d59bdf4ffc6
2022-10-21 06:13:07 +03:00
|
|
|
|
remoteSchemaMap <- buildRemoteSchemas env -< ((remoteSchemaInvalidationKeys, orderedRoles), OMap.elems remoteSchemas)
|
|
|
|
|
let remoteSchemaCtxMap = M.map fst remoteSchemaMap
|
2021-07-23 02:06:10 +03:00
|
|
|
|
|
2022-08-11 10:52:22 +03:00
|
|
|
|
!defaultNC <- bindA -< _sccDefaultNamingConvention <$> askServerConfigCtx
|
|
|
|
|
!isNamingConventionEnabled <- bindA -< ((EFNamingConventions `elem`) . _sccExperimentalFeatures) <$> askServerConfigCtx
|
2022-05-31 12:08:06 +03:00
|
|
|
|
|
2022-09-14 15:59:37 +03:00
|
|
|
|
let backendInvalidationKeys = Inc.selectD #_ikBackends invalidationKeys
|
|
|
|
|
backendCache <- resolveBackendCache -< (backendInvalidationKeys, BackendMap.elems backendConfigs)
|
2022-09-05 05:42:59 +03:00
|
|
|
|
|
|
|
|
|
let backendInfoAndSourceMetadata = joinBackendInfosToSources backendCache sources
|
2022-09-01 08:27:57 +03:00
|
|
|
|
|
2021-07-23 02:06:10 +03:00
|
|
|
|
-- sources are build in two steps
|
|
|
|
|
-- first we resolve them, and build the table cache
|
2022-11-15 23:13:00 +03:00
|
|
|
|
partiallyResolvedSourcesMaybes <-
|
2021-09-24 01:56:37 +03:00
|
|
|
|
(|
|
|
|
|
|
Inc.keyed
|
|
|
|
|
( \_ exists ->
|
2022-06-27 03:36:53 +03:00
|
|
|
|
AB.dispatchAnyBackendArrow @BackendMetadata @BackendEventTrigger
|
2022-09-05 05:42:59 +03:00
|
|
|
|
( proc (backendInfoAndSourceMetadata, (invalidationKeys, defaultNC, isNamingConventionEnabled)) -> do
|
|
|
|
|
let sourceMetadata = _bcasmSourceMetadata backendInfoAndSourceMetadata
|
2022-04-29 05:13:13 +03:00
|
|
|
|
sourceName = _smName sourceMetadata
|
2021-09-24 01:56:37 +03:00
|
|
|
|
sourceInvalidationsKeys = Inc.selectD #_ikSources invalidationKeys
|
2022-11-29 04:00:28 +03:00
|
|
|
|
maybeResolvedSource <- tryResolveSource -< (sourceInvalidationsKeys, backendInfoAndSourceMetadata)
|
2021-09-24 01:56:37 +03:00
|
|
|
|
case maybeResolvedSource of
|
|
|
|
|
Nothing -> returnA -< Nothing
|
|
|
|
|
Just (source :: ResolvedSource b) -> do
|
|
|
|
|
let metadataInvalidationKey = Inc.selectD #_ikMetadata invalidationKeys
|
|
|
|
|
(tableInputs, _, _) = unzip3 $ map mkTableInputs $ OMap.elems $ _smTables sourceMetadata
|
2022-06-30 08:55:50 +03:00
|
|
|
|
!namingConv = if isNamingConventionEnabled then getNamingConvention (_smCustomization sourceMetadata) defaultNC else HasuraCase
|
2021-09-24 01:56:37 +03:00
|
|
|
|
tablesCoreInfo <-
|
|
|
|
|
buildTableCache
|
|
|
|
|
-<
|
|
|
|
|
( sourceName,
|
|
|
|
|
_rsConfig source,
|
|
|
|
|
_rsTables source,
|
|
|
|
|
tableInputs,
|
2022-05-26 14:54:30 +03:00
|
|
|
|
metadataInvalidationKey,
|
2022-05-31 12:08:06 +03:00
|
|
|
|
namingConv
|
2021-09-24 01:56:37 +03:00
|
|
|
|
)
|
2022-06-27 03:36:53 +03:00
|
|
|
|
|
|
|
|
|
let tablesMetadata = OMap.elems $ _smTables sourceMetadata
|
|
|
|
|
eventTriggers = map (_tmTable &&& OMap.elems . _tmEventTriggers) tablesMetadata
|
|
|
|
|
numEventTriggers = sum $ map (length . snd) eventTriggers
|
|
|
|
|
sourceConfig = _rsConfig source
|
|
|
|
|
|
2022-11-02 01:41:22 +03:00
|
|
|
|
(recreateEventTriggers, sourceCatalogMigrationState) <- initCatalogIfNeeded -< (Proxy :: Proxy b, numEventTriggers > 0, sourceConfig)
|
|
|
|
|
|
|
|
|
|
bindA -< unLogger logger (sourceName, sourceCatalogMigrationState)
|
2022-06-27 03:36:53 +03:00
|
|
|
|
|
|
|
|
|
let alignTableMap :: HashMap (TableName b) a -> HashMap (TableName b) c -> HashMap (TableName b) (a, c)
|
|
|
|
|
alignTableMap = M.intersectionWith (,)
|
|
|
|
|
|
|
|
|
|
eventTriggerInfoMaps <-
|
|
|
|
|
(|
|
|
|
|
|
Inc.keyed
|
|
|
|
|
( \_ (tableCoreInfo, (_, eventTriggerConfs)) ->
|
|
|
|
|
buildTableEventTriggers -< (sourceName, sourceConfig, tableCoreInfo, eventTriggerConfs, metadataInvalidationKey, recreateEventTriggers)
|
|
|
|
|
)
|
|
|
|
|
|) (tablesCoreInfo `alignTableMap` mapFromL fst eventTriggers)
|
|
|
|
|
|
2021-09-24 01:56:37 +03:00
|
|
|
|
returnA
|
|
|
|
|
-<
|
|
|
|
|
Just $
|
|
|
|
|
AB.mkAnyBackend @b $
|
2022-06-27 03:36:53 +03:00
|
|
|
|
PartiallyResolvedSource sourceMetadata source tablesCoreInfo eventTriggerInfoMaps
|
2021-09-24 01:56:37 +03:00
|
|
|
|
)
|
|
|
|
|
-<
|
2022-09-05 05:42:59 +03:00
|
|
|
|
(exists, (invalidationKeys, defaultNC, isNamingConventionEnabled))
|
2021-09-24 01:56:37 +03:00
|
|
|
|
)
|
2022-11-02 23:53:23 +03:00
|
|
|
|
|) (M.fromList $ OMap.toList backendInfoAndSourceMetadata)
|
2022-11-15 23:13:00 +03:00
|
|
|
|
let partiallyResolvedSources = catMaybes partiallyResolvedSourcesMaybes
|
2020-12-28 15:56:00 +03:00
|
|
|
|
|
2021-07-23 02:06:10 +03:00
|
|
|
|
-- then we can build the entire source output
|
|
|
|
|
-- we need to have the table cache of all sources to build cross-sources relationships
|
|
|
|
|
sourcesOutput <-
|
2021-09-24 01:56:37 +03:00
|
|
|
|
(|
|
|
|
|
|
Inc.keyed
|
|
|
|
|
( \_ exists ->
|
2022-08-19 18:40:26 +03:00
|
|
|
|
-- Note that it's a bit of a coincidence that
|
|
|
|
|
-- 'AB.dispatchAnyBackendArrow' accepts exactly two constraints,
|
|
|
|
|
-- and that we happen to want to apply to exactly two
|
|
|
|
|
-- constraints.
|
|
|
|
|
-- Ideally the function should be able to take an arbitrary
|
|
|
|
|
-- number of constraints.
|
|
|
|
|
AB.dispatchAnyBackendArrow @BackendMetadata @GetAggregationPredicatesDeps
|
2021-09-24 01:56:37 +03:00
|
|
|
|
( proc
|
|
|
|
|
( partiallyResolvedSource :: PartiallyResolvedSource b,
|
2022-06-27 03:36:53 +03:00
|
|
|
|
(allResolvedSources, remoteSchemaCtxMap, orderedRoles)
|
2021-09-24 01:56:37 +03:00
|
|
|
|
)
|
|
|
|
|
-> do
|
2022-06-27 03:36:53 +03:00
|
|
|
|
let PartiallyResolvedSource sourceMetadata resolvedSource tablesInfo eventTriggers = partiallyResolvedSource
|
2021-10-29 17:42:07 +03:00
|
|
|
|
ResolvedSource sourceConfig _sourceCustomization tablesMeta functionsMeta scalars = resolvedSource
|
2021-09-24 01:56:37 +03:00
|
|
|
|
so <-
|
2022-11-29 04:00:28 +03:00
|
|
|
|
Inc.cache buildSource
|
2021-09-24 01:56:37 +03:00
|
|
|
|
-<
|
|
|
|
|
( allResolvedSources,
|
|
|
|
|
sourceMetadata,
|
|
|
|
|
sourceConfig,
|
|
|
|
|
tablesInfo,
|
2022-06-27 03:36:53 +03:00
|
|
|
|
eventTriggers,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
tablesMeta,
|
|
|
|
|
functionsMeta,
|
|
|
|
|
remoteSchemaCtxMap,
|
|
|
|
|
orderedRoles
|
|
|
|
|
)
|
2022-11-30 12:11:00 +03:00
|
|
|
|
returnA -< (AB.mkAnyBackend so, BackendMap.singleton scalars)
|
2021-07-23 02:06:10 +03:00
|
|
|
|
)
|
2021-09-24 01:56:37 +03:00
|
|
|
|
-<
|
|
|
|
|
( exists,
|
2022-06-27 03:36:53 +03:00
|
|
|
|
(partiallyResolvedSources, remoteSchemaCtxMap, orderedRoles)
|
2021-09-24 01:56:37 +03:00
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|) partiallyResolvedSources
|
2021-07-23 02:06:10 +03:00
|
|
|
|
|
2022-02-03 21:58:37 +03:00
|
|
|
|
remoteSchemaCache <-
|
2022-11-30 12:11:00 +03:00
|
|
|
|
interpretWriter
|
|
|
|
|
-< for remoteSchemaMap \(partiallyResolvedRemoteSchemaCtx, metadataObj) -> do
|
|
|
|
|
let remoteSchemaIntrospection = irDoc $ _rscIntroOriginal partiallyResolvedRemoteSchemaCtx
|
|
|
|
|
resolvedSchemaCtx <- for partiallyResolvedRemoteSchemaCtx \PartiallyResolvedRemoteRelationship {..} ->
|
|
|
|
|
buildRemoteSchemaRemoteRelationship partiallyResolvedSources remoteSchemaCtxMap (_rscName partiallyResolvedRemoteSchemaCtx) remoteSchemaIntrospection _prrrTypeName _prrrDefinition
|
|
|
|
|
pure $ (catMaybes resolvedSchemaCtx, metadataObj)
|
2020-02-13 20:38:23 +03:00
|
|
|
|
|
2021-01-20 03:31:53 +03:00
|
|
|
|
-- actions
|
2022-11-30 12:11:00 +03:00
|
|
|
|
(actionCache, annotatedCustomTypes) <-
|
|
|
|
|
interpretWriter
|
|
|
|
|
-< do
|
|
|
|
|
-- custom types
|
|
|
|
|
let scalarsMap = mconcat $ map snd $ M.elems sourcesOutput
|
|
|
|
|
sourcesCache = M.map fst sourcesOutput
|
|
|
|
|
actionList = OMap.elems actions
|
|
|
|
|
maybeResolvedCustomTypes <-
|
|
|
|
|
withRecordInconsistencyM (MetadataObject MOCustomTypes $ toJSON customTypes) $
|
|
|
|
|
resolveCustomTypes sourcesCache customTypes scalarsMap
|
|
|
|
|
case maybeResolvedCustomTypes of
|
|
|
|
|
Just resolvedCustomTypes -> do
|
|
|
|
|
actionCache' <- buildActions resolvedCustomTypes scalarsMap orderedRoles actionList
|
|
|
|
|
pure (actionCache', resolvedCustomTypes)
|
|
|
|
|
-- If the custom types themselves are inconsistent, we can’t really do
|
|
|
|
|
-- anything with actions, so just mark them all inconsistent.
|
|
|
|
|
Nothing -> do
|
|
|
|
|
recordInconsistenciesM
|
|
|
|
|
(map mkActionMetadataObject actionList)
|
|
|
|
|
"custom types are inconsistent"
|
|
|
|
|
pure (mempty, mempty)
|
2020-02-13 20:38:23 +03:00
|
|
|
|
|
Avoid GraphQL schema rebuild when changing irrelevant Metadata
This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to:
- allowlists
- query collections
- cron triggers
- REST endpoints
- API limits
- metrics config
- GraphQL introspection options
- TLS allow lists
- OpenTelemetry
When is construction of the in-memory GraphQL schema cached between Metadata operations?
Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation.
However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction.
The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.)
We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance.
So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps.
That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema.
Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild.
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613
GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
|
|
|
|
returnA
|
|
|
|
|
-<
|
|
|
|
|
BuildOutputs
|
|
|
|
|
{ _boSources = M.map fst sourcesOutput,
|
|
|
|
|
_boActions = actionCache,
|
|
|
|
|
_boRemoteSchemas = remoteSchemaCache,
|
|
|
|
|
_boCustomTypes = annotatedCustomTypes,
|
|
|
|
|
_boRoles = mapFromL _rRoleName $ _unOrderedRoles orderedRoles,
|
|
|
|
|
_boBackendCache = backendCache
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
buildOpenTelemetry ::
|
2022-11-30 12:11:00 +03:00
|
|
|
|
MonadWriter (Seq (Either InconsistentMetadata md)) m =>
|
|
|
|
|
OpenTelemetryConfig ->
|
|
|
|
|
m OpenTelemetryInfo
|
|
|
|
|
buildOpenTelemetry openTelemetryConfig = do
|
2022-11-23 05:49:29 +03:00
|
|
|
|
case _ocStatus openTelemetryConfig of
|
|
|
|
|
OtelDisabled ->
|
|
|
|
|
-- Disable all components if OpenTelemetry export not enabled
|
2022-11-30 12:11:00 +03:00
|
|
|
|
pure $ OpenTelemetryInfo Nothing Nothing
|
2022-11-23 05:49:29 +03:00
|
|
|
|
OtelEnabled -> do
|
|
|
|
|
mOtelExporterInfo <-
|
|
|
|
|
let exporterOtlp = _ocExporterOtlp openTelemetryConfig
|
2022-11-30 12:11:00 +03:00
|
|
|
|
in withRecordInconsistencyM (MetadataObject (MOOpenTelemetry OtelSubobjectExporterOtlp) (toJSON exporterOtlp)) $
|
|
|
|
|
liftEither $
|
|
|
|
|
parseOtelExporterConfig env exporterOtlp
|
2022-11-23 05:49:29 +03:00
|
|
|
|
mOtelBatchSpanProcessorInfo <-
|
|
|
|
|
let batchSpanProcessor = _ocBatchSpanProcessor openTelemetryConfig
|
2022-11-30 12:11:00 +03:00
|
|
|
|
in withRecordInconsistencyM (MetadataObject (MOOpenTelemetry OtelSubobjectBatchSpanProcessor) (toJSON batchSpanProcessor)) $
|
|
|
|
|
liftEither $
|
|
|
|
|
parseOtelBatchSpanProcessorConfig batchSpanProcessor
|
|
|
|
|
pure $
|
|
|
|
|
OpenTelemetryInfo
|
|
|
|
|
mOtelExporterInfo
|
|
|
|
|
-- Disable data types if they are not in the enabled set
|
|
|
|
|
( if OtelTraces `S.member` _ocEnabledDataTypes openTelemetryConfig
|
|
|
|
|
then mOtelBatchSpanProcessorInfo
|
|
|
|
|
else Nothing
|
|
|
|
|
)
|
2021-01-29 04:02:34 +03:00
|
|
|
|
|
2022-11-30 12:11:00 +03:00
|
|
|
|
buildRESTEndpoints ::
|
|
|
|
|
MonadWriter (Seq (Either InconsistentMetadata md)) m =>
|
|
|
|
|
QueryCollections ->
|
|
|
|
|
[CreateEndpoint] ->
|
|
|
|
|
m (HashMap EndpointName (EndpointMetadata GQLQueryWithText))
|
|
|
|
|
buildRESTEndpoints collections endpoints = buildInfoMapM _ceName mkEndpointMetadataObject buildEndpoint endpoints
|
|
|
|
|
where
|
|
|
|
|
mkEndpointMetadataObject createEndpoint@EndpointMetadata {..} =
|
|
|
|
|
let objectId = MOEndpoint _ceName
|
|
|
|
|
in MetadataObject objectId (toJSON createEndpoint)
|
|
|
|
|
|
|
|
|
|
buildEndpoint createEndpoint@EndpointMetadata {..} = do
|
|
|
|
|
let -- QueryReference collName queryName = _edQuery endpoint
|
|
|
|
|
addContext err = "in endpoint " <> toTxt _ceName <> ": " <> err
|
|
|
|
|
withRecordInconsistencyM (mkEndpointMetadataObject createEndpoint) $ modifyErr addContext $ resolveEndpoint collections createEndpoint
|
2021-09-24 01:56:37 +03:00
|
|
|
|
|
|
|
|
|
resolveEndpoint ::
|
|
|
|
|
QErrM m =>
|
|
|
|
|
InsOrdHashMap CollectionName CreateCollection ->
|
|
|
|
|
EndpointMetadata QueryReference ->
|
|
|
|
|
m (EndpointMetadata GQLQueryWithText)
|
2021-01-29 04:02:34 +03:00
|
|
|
|
resolveEndpoint collections = traverse $ \(QueryReference collName queryName) -> do
|
2021-02-01 15:57:34 +03:00
|
|
|
|
collection <-
|
|
|
|
|
onNothing
|
|
|
|
|
(OMap.lookup collName collections)
|
|
|
|
|
(throw400 NotExists $ "collection with name " <> toTxt collName <> " does not exist")
|
|
|
|
|
listedQuery <-
|
2021-09-24 01:56:37 +03:00
|
|
|
|
flip
|
|
|
|
|
onNothing
|
|
|
|
|
( throw400 NotExists $
|
|
|
|
|
"query with name "
|
2021-02-01 15:57:34 +03:00
|
|
|
|
<> toTxt queryName
|
2021-09-24 01:56:37 +03:00
|
|
|
|
<> " does not exist in collection "
|
|
|
|
|
<> toTxt collName
|
|
|
|
|
)
|
|
|
|
|
$ find ((== queryName) . _lqName) (_cdQueries (_ccDefinition collection))
|
2021-02-24 07:30:12 +03:00
|
|
|
|
|
2021-09-24 01:56:37 +03:00
|
|
|
|
let lq@(GQLQueryWithText lqq) = _lqQuery listedQuery
|
|
|
|
|
ds = G.getExecutableDefinitions $ unGQLQuery $ snd lqq
|
2021-02-24 13:12:12 +03:00
|
|
|
|
|
|
|
|
|
case ds of
|
|
|
|
|
[G.ExecutableDefinitionOperation (G.OperationDefinitionTyped d)]
|
|
|
|
|
| G._todType d == G.OperationTypeSubscription ->
|
2022-11-02 23:53:23 +03:00
|
|
|
|
throw405 $ "query with name " <> toTxt queryName <> " is a subscription"
|
2021-02-24 13:12:12 +03:00
|
|
|
|
| otherwise -> pure ()
|
|
|
|
|
[] -> throw400 BadRequest $ "query with name " <> toTxt queryName <> " has no definitions."
|
2021-09-24 01:56:37 +03:00
|
|
|
|
_ -> throw400 BadRequest $ "query with name " <> toTxt queryName <> " has multiple definitions."
|
2021-02-24 07:30:12 +03:00
|
|
|
|
|
|
|
|
|
pure lq
|
2021-01-29 04:02:34 +03:00
|
|
|
|
|
2021-09-24 01:56:37 +03:00
|
|
|
|
mkEventTriggerMetadataObject ::
|
|
|
|
|
forall b a c.
|
|
|
|
|
Backend b =>
|
|
|
|
|
(a, SourceName, c, TableName b, RecreateEventTriggers, EventTriggerConf b) ->
|
|
|
|
|
MetadataObject
|
2021-06-07 16:57:24 +03:00
|
|
|
|
mkEventTriggerMetadataObject (_, source, _, table, _, eventTriggerConf) =
|
2021-09-24 01:56:37 +03:00
|
|
|
|
let objectId =
|
|
|
|
|
MOSourceObjId source $
|
|
|
|
|
AB.mkAnyBackend $
|
|
|
|
|
SMOTableObj @b table $
|
|
|
|
|
MTOTrigger $
|
|
|
|
|
etcName eventTriggerConf
|
2020-12-08 17:22:31 +03:00
|
|
|
|
definition = object ["table" .= table, "configuration" .= eventTriggerConf]
|
2021-09-24 01:56:37 +03:00
|
|
|
|
in MetadataObject objectId definition
|
2019-11-20 21:21:30 +03:00
|
|
|
|
|
2020-05-13 15:33:16 +03:00
|
|
|
|
mkCronTriggerMetadataObject catalogCronTrigger =
|
|
|
|
|
let definition = toJSON catalogCronTrigger
|
2021-09-24 01:56:37 +03:00
|
|
|
|
in MetadataObject
|
|
|
|
|
(MOCronTrigger (ctName catalogCronTrigger))
|
|
|
|
|
definition
|
2020-05-13 15:33:16 +03:00
|
|
|
|
|
2021-10-29 07:12:27 +03:00
|
|
|
|
mkActionMetadataObject (ActionMetadata name comment defn _) =
|
|
|
|
|
MetadataObject (MOAction name) (toJSON $ CreateAction name defn comment)
|
2020-04-15 15:03:13 +03:00
|
|
|
|
|
2021-07-17 00:18:58 +03:00
|
|
|
|
mkInheritedRoleMetadataObject inheritedRole@(Role roleName _) =
|
|
|
|
|
MetadataObject (MOInheritedRole roleName) (toJSON inheritedRole)
|
|
|
|
|
|
2021-09-24 01:56:37 +03:00
|
|
|
|
buildTableEventTriggers ::
|
|
|
|
|
forall arr m b.
|
|
|
|
|
( ArrowChoice arr,
|
|
|
|
|
Inc.ArrowDistribute arr,
|
2022-11-15 19:58:51 +03:00
|
|
|
|
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
Inc.ArrowCache m arr,
|
|
|
|
|
MonadIO m,
|
|
|
|
|
MonadError QErr m,
|
|
|
|
|
MonadBaseControl IO m,
|
|
|
|
|
MonadReader BuildReason m,
|
|
|
|
|
HasServerConfigCtx m,
|
|
|
|
|
BackendMetadata b,
|
|
|
|
|
BackendEventTrigger b
|
|
|
|
|
) =>
|
|
|
|
|
( SourceName,
|
|
|
|
|
SourceConfig b,
|
2022-06-27 03:36:53 +03:00
|
|
|
|
TableCoreInfoG b (ColumnInfo b) (ColumnInfo b),
|
2021-09-24 01:56:37 +03:00
|
|
|
|
[EventTriggerConf b],
|
|
|
|
|
Inc.Dependency Inc.InvalidationKey,
|
|
|
|
|
RecreateEventTriggers
|
|
|
|
|
)
|
|
|
|
|
`arr` (EventTriggerInfoMap b)
|
2022-07-04 13:09:50 +03:00
|
|
|
|
buildTableEventTriggers = proc (sourceName, sourceConfig, tableInfo, eventTriggerConfs, metadataInvalidationKey, migrationRecreateEventTriggers) ->
|
2021-06-07 16:57:24 +03:00
|
|
|
|
buildInfoMap (etcName . (^. _6)) (mkEventTriggerMetadataObject @b) buildEventTrigger
|
2021-09-24 01:56:37 +03:00
|
|
|
|
-<
|
2022-07-04 13:09:50 +03:00
|
|
|
|
(tableInfo, map (metadataInvalidationKey,sourceName,sourceConfig,_tciName tableInfo,migrationRecreateEventTriggers,) eventTriggerConfs)
|
2019-11-27 01:49:42 +03:00
|
|
|
|
where
|
2022-07-04 13:09:50 +03:00
|
|
|
|
buildEventTrigger = proc (tableInfo, (metadataInvalidationKey, source, sourceConfig, table, migrationRecreateEventTriggers, eventTriggerConf)) -> do
|
2020-12-08 17:22:31 +03:00
|
|
|
|
let triggerName = etcName eventTriggerConf
|
2022-11-29 20:41:41 +03:00
|
|
|
|
triggerOnReplication = etcTriggerOnReplication eventTriggerConf
|
2022-07-04 13:09:50 +03:00
|
|
|
|
metadataObject = mkEventTriggerMetadataObject @b (metadataInvalidationKey, source, sourceConfig, table, migrationRecreateEventTriggers, eventTriggerConf)
|
2021-09-24 01:56:37 +03:00
|
|
|
|
schemaObjectId =
|
|
|
|
|
SOSourceObj source $
|
|
|
|
|
AB.mkAnyBackend $
|
|
|
|
|
SOITableObj @b table $
|
|
|
|
|
TOTrigger triggerName
|
2020-12-08 17:22:31 +03:00
|
|
|
|
addTriggerContext e = "in event trigger " <> triggerName <<> ": " <> e
|
2021-11-10 17:34:22 +03:00
|
|
|
|
buildReason <- bindA -< ask
|
|
|
|
|
let reloadMetadataRecreateEventTrigger =
|
|
|
|
|
case buildReason of
|
|
|
|
|
CatalogSync -> RETDoNothing
|
|
|
|
|
CatalogUpdate Nothing -> RETDoNothing
|
|
|
|
|
CatalogUpdate (Just sources) -> if source `elem` sources then RETRecreate else RETDoNothing
|
2021-09-24 01:56:37 +03:00
|
|
|
|
(|
|
|
|
|
|
withRecordInconsistency
|
2022-11-29 04:00:28 +03:00
|
|
|
|
( do
|
|
|
|
|
(info, dependencies) <- bindErrorA -< modifyErr (addTableContext @b table . addTriggerContext) $ buildEventTriggerInfo @b env source table eventTriggerConf
|
|
|
|
|
serverConfigCtx <- bindA -< askServerConfigCtx
|
|
|
|
|
let isCatalogUpdate =
|
|
|
|
|
case buildReason of
|
|
|
|
|
CatalogUpdate _ -> True
|
|
|
|
|
CatalogSync -> False
|
|
|
|
|
tableColumns = M.elems $ _tciFieldInfoMap tableInfo
|
|
|
|
|
if ( _sccMaintenanceMode serverConfigCtx == MaintenanceModeDisabled
|
|
|
|
|
&& _sccReadOnlyMode serverConfigCtx == ReadOnlyModeDisabled
|
|
|
|
|
)
|
|
|
|
|
then do
|
|
|
|
|
bindA
|
|
|
|
|
-<
|
|
|
|
|
when (reloadMetadataRecreateEventTrigger == RETRecreate) $
|
|
|
|
|
-- This is the case when the user sets `recreate_event_triggers`
|
|
|
|
|
-- to `true` in `reload_metadata`, in this case, we recreate
|
|
|
|
|
-- the SQL trigger by force, even if it may not be necessary
|
|
|
|
|
liftEitherM $
|
|
|
|
|
createTableEventTrigger
|
|
|
|
|
@b
|
|
|
|
|
serverConfigCtx
|
|
|
|
|
sourceConfig
|
|
|
|
|
table
|
|
|
|
|
tableColumns
|
|
|
|
|
triggerName
|
2022-11-29 20:41:41 +03:00
|
|
|
|
triggerOnReplication
|
2022-11-29 04:00:28 +03:00
|
|
|
|
(etcDefinition eventTriggerConf)
|
|
|
|
|
(_tciPrimaryKey tableInfo)
|
|
|
|
|
if isCatalogUpdate || migrationRecreateEventTriggers == RETRecreate
|
|
|
|
|
then do
|
|
|
|
|
recreateTriggerIfNeeded
|
|
|
|
|
-<
|
|
|
|
|
( table,
|
|
|
|
|
tableColumns,
|
|
|
|
|
triggerName,
|
2022-11-29 20:41:41 +03:00
|
|
|
|
triggerOnReplication,
|
2022-11-29 04:00:28 +03:00
|
|
|
|
etcDefinition eventTriggerConf,
|
|
|
|
|
sourceConfig,
|
|
|
|
|
(_tciPrimaryKey tableInfo)
|
|
|
|
|
)
|
|
|
|
|
-- We check if the SQL triggers for the event triggers
|
|
|
|
|
-- are present. If any SQL triggers are missing, those are
|
|
|
|
|
-- created.
|
|
|
|
|
bindA
|
|
|
|
|
-<
|
|
|
|
|
createMissingSQLTriggers
|
|
|
|
|
sourceConfig
|
|
|
|
|
table
|
|
|
|
|
(tableColumns, _tciPrimaryKey tableInfo)
|
|
|
|
|
triggerName
|
2022-11-29 20:41:41 +03:00
|
|
|
|
triggerOnReplication
|
2022-11-29 04:00:28 +03:00
|
|
|
|
(etcDefinition eventTriggerConf)
|
|
|
|
|
else returnA -< ()
|
|
|
|
|
else returnA -< ()
|
|
|
|
|
recordDependencies -< (metadataObject, schemaObjectId, dependencies)
|
|
|
|
|
returnA -< info
|
2021-09-24 01:56:37 +03:00
|
|
|
|
)
|
|
|
|
|
|) metadataObject
|
|
|
|
|
|
|
|
|
|
recreateTriggerIfNeeded =
|
2021-11-03 04:19:40 +03:00
|
|
|
|
-- using `Inc.cache` here means that the response will be cached for the given output and the
|
|
|
|
|
-- next time this arrow recieves the same input, the cached response will be returned and the
|
|
|
|
|
-- computation will not be done again.
|
2021-09-24 01:56:37 +03:00
|
|
|
|
Inc.cache
|
|
|
|
|
proc
|
2021-11-03 04:19:40 +03:00
|
|
|
|
( tableName,
|
2022-07-04 13:09:50 +03:00
|
|
|
|
tableColumns,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
triggerName,
|
2022-11-29 20:41:41 +03:00
|
|
|
|
triggerOnReplication,
|
2021-09-24 01:56:37 +03:00
|
|
|
|
triggerDefinition,
|
|
|
|
|
sourceConfig,
|
2022-07-04 13:09:50 +03:00
|
|
|
|
primaryKey
|
2021-09-24 01:56:37 +03:00
|
|
|
|
)
|
|
|
|
|
-> do
|
|
|
|
|
bindA
|
|
|
|
|
-< do
|
|
|
|
|
serverConfigCtx <- askServerConfigCtx
|
2022-07-04 13:09:50 +03:00
|
|
|
|
liftEitherM $
|
|
|
|
|
createTableEventTrigger @b
|
|
|
|
|
serverConfigCtx
|
|
|
|
|
sourceConfig
|
|
|
|
|
tableName
|
|
|
|
|
tableColumns
|
|
|
|
|
triggerName
|
2022-11-29 20:41:41 +03:00
|
|
|
|
triggerOnReplication
|
2022-07-04 13:09:50 +03:00
|
|
|
|
triggerDefinition
|
|
|
|
|
primaryKey
|
2021-09-24 01:56:37 +03:00
|
|
|
|
|
|
|
|
|
buildCronTriggers ::
|
2022-11-30 12:11:00 +03:00
|
|
|
|
MonadWriter (Seq (Either InconsistentMetadata md)) m =>
|
|
|
|
|
[CronTriggerMetadata] ->
|
|
|
|
|
m (HashMap TriggerName CronTriggerInfo)
|
|
|
|
|
buildCronTriggers = buildInfoMapM ctName mkCronTriggerMetadataObject buildCronTrigger
|
2020-05-13 15:33:16 +03:00
|
|
|
|
where
|
2022-11-30 12:11:00 +03:00
|
|
|
|
buildCronTrigger cronTrigger = do
|
2020-12-08 17:22:31 +03:00
|
|
|
|
let triggerName = triggerNameToTxt $ ctName cronTrigger
|
2020-05-13 15:33:16 +03:00
|
|
|
|
addCronTriggerContext e = "in cron trigger " <> triggerName <> ": " <> e
|
2022-11-30 12:11:00 +03:00
|
|
|
|
withRecordInconsistencyM (mkCronTriggerMetadataObject cronTrigger) $
|
|
|
|
|
modifyErr addCronTriggerContext $
|
|
|
|
|
resolveCronTrigger env cronTrigger
|
2021-09-24 01:56:37 +03:00
|
|
|
|
|
|
|
|
|
buildInheritedRoles ::
|
2022-11-30 12:11:00 +03:00
|
|
|
|
MonadWriter (Seq (Either InconsistentMetadata MetadataDependency)) m =>
|
|
|
|
|
HashSet RoleName ->
|
|
|
|
|
[InheritedRole] ->
|
|
|
|
|
m (HashMap RoleName Role)
|
|
|
|
|
buildInheritedRoles allRoles = buildInfoMapM _rRoleName mkInheritedRoleMetadataObject buildInheritedRole
|
2021-07-17 00:18:58 +03:00
|
|
|
|
where
|
2022-11-30 12:11:00 +03:00
|
|
|
|
buildInheritedRole inheritedRole = do
|
2021-09-24 01:56:37 +03:00
|
|
|
|
let addInheritedRoleContext e = "in inherited role " <> roleNameToTxt (_rRoleName inheritedRole) <> ": " <> e
|
2021-07-17 00:18:58 +03:00
|
|
|
|
metadataObject = mkInheritedRoleMetadataObject inheritedRole
|
|
|
|
|
schemaObject = SORole $ _rRoleName inheritedRole
|
2022-11-30 12:11:00 +03:00
|
|
|
|
withRecordInconsistencyM metadataObject $ modifyErr addInheritedRoleContext do
|
|
|
|
|
(resolvedInheritedRole, dependencies) <- resolveInheritedRole allRoles inheritedRole
|
|
|
|
|
recordDependenciesM metadataObject schemaObject dependencies
|
|
|
|
|
pure resolvedInheritedRole
|
2021-09-24 01:56:37 +03:00
|
|
|
|
|
|
|
|
|
buildActions ::
|
2022-11-30 12:11:00 +03:00
|
|
|
|
MonadWriter (Seq (Either InconsistentMetadata MetadataDependency)) m =>
|
|
|
|
|
AnnotatedCustomTypes ->
|
|
|
|
|
BackendMap ScalarMap ->
|
|
|
|
|
OrderedRoles ->
|
|
|
|
|
[ActionMetadata] ->
|
|
|
|
|
m (HashMap ActionName ActionInfo)
|
|
|
|
|
buildActions resolvedCustomTypes scalarsMap orderedRoles = buildInfoMapM _amName mkActionMetadataObject buildAction
|
2020-04-15 15:03:13 +03:00
|
|
|
|
where
|
2022-11-30 12:11:00 +03:00
|
|
|
|
buildAction action@(ActionMetadata name comment def actionPermissions) = do
|
|
|
|
|
let addActionContext e = "in action " <> name <<> "; " <> e
|
|
|
|
|
permissionInfos = map (ActionPermissionInfo . _apmRole) actionPermissions
|
|
|
|
|
metadataPermissionMap = mapFromL _apiRole permissionInfos
|
|
|
|
|
permissionsMap = mkBooleanPermissionMap ActionPermissionInfo metadataPermissionMap orderedRoles
|
|
|
|
|
outputType = unGraphQLType $ _adOutputType def
|
|
|
|
|
withRecordInconsistencyM (mkActionMetadataObject action) $ modifyErr addActionContext do
|
|
|
|
|
(resolvedDef, outObject) <- resolveAction env resolvedCustomTypes def scalarsMap
|
|
|
|
|
let forwardClientHeaders = _adForwardClientHeaders resolvedDef
|
|
|
|
|
return $ ActionInfo name (outputType, outObject) resolvedDef permissionsMap forwardClientHeaders comment
|
2021-09-24 01:56:37 +03:00
|
|
|
|
|
2022-02-03 21:58:37 +03:00
|
|
|
|
buildRemoteSchemaRemoteRelationship ::
|
2022-11-30 12:11:00 +03:00
|
|
|
|
MonadWriter (Seq (Either InconsistentMetadata MetadataDependency)) m =>
|
|
|
|
|
HashMap SourceName (AB.AnyBackend PartiallyResolvedSource) ->
|
|
|
|
|
PartiallyResolvedRemoteSchemaMap ->
|
|
|
|
|
RemoteSchemaName ->
|
|
|
|
|
RemoteSchemaIntrospection ->
|
|
|
|
|
G.Name ->
|
|
|
|
|
RemoteRelationship ->
|
|
|
|
|
m (Maybe (RemoteFieldInfo G.Name))
|
|
|
|
|
buildRemoteSchemaRemoteRelationship allSources remoteSchemaMap remoteSchema remoteSchemaIntrospection typeName rr@RemoteRelationship {..} = do
|
|
|
|
|
let metadataObject =
|
|
|
|
|
MetadataObject (MORemoteSchemaRemoteRelationship remoteSchema typeName _rrName) $
|
|
|
|
|
toJSON $
|
|
|
|
|
CreateRemoteSchemaRemoteRelationship remoteSchema typeName _rrName _rrDefinition
|
|
|
|
|
schemaObj = SORemoteSchemaRemoteRelationship remoteSchema typeName _rrName
|
|
|
|
|
addRemoteRelationshipContext e = "in remote relationship" <> _rrName <<> ": " <> e
|
|
|
|
|
-- buildRemoteFieldInfo only knows how to construct dependencies on the RHS of the join condition,
|
|
|
|
|
-- so the dependencies on the remote relationship on the LHS entity have to be computed here
|
|
|
|
|
lhsDependencies =
|
|
|
|
|
-- a direct dependency on the remote schema on which this is defined
|
|
|
|
|
[SchemaDependency (SORemoteSchema remoteSchema) DRRemoteRelationship]
|
|
|
|
|
withRecordInconsistencyM metadataObject $ modifyErr addRemoteRelationshipContext do
|
|
|
|
|
allowedLHSJoinFields <- getRemoteSchemaEntityJoinColumns remoteSchema remoteSchemaIntrospection typeName
|
|
|
|
|
(remoteField, rhsDependencies) <-
|
|
|
|
|
buildRemoteFieldInfo (remoteSchemaToLHSIdentifier remoteSchema) allowedLHSJoinFields rr allSources remoteSchemaMap
|
|
|
|
|
recordDependenciesM metadataObject schemaObj (lhsDependencies <> rhsDependencies)
|
|
|
|
|
pure remoteField
|
2022-02-03 21:58:37 +03:00
|
|
|
|
|
2022-09-05 05:42:59 +03:00
|
|
|
|
data BackendInfoAndSourceMetadata b = BackendInfoAndSourceMetadata
|
|
|
|
|
{ _bcasmBackendInfo :: BackendInfo b,
|
2022-04-29 05:13:13 +03:00
|
|
|
|
_bcasmSourceMetadata :: SourceMetadata b
|
|
|
|
|
}
|
|
|
|
|
deriving stock (Generic)
|
|
|
|
|
|
2022-09-05 05:42:59 +03:00
|
|
|
|
deriving instance (Backend b) => Show (BackendInfoAndSourceMetadata b)
|
2022-04-29 05:13:13 +03:00
|
|
|
|
|
2022-09-05 05:42:59 +03:00
|
|
|
|
deriving instance (Backend b) => Eq (BackendInfoAndSourceMetadata b)
|
2022-04-29 05:13:13 +03:00
|
|
|
|
|
2022-09-05 05:42:59 +03:00
|
|
|
|
joinBackendInfosToSources ::
|
|
|
|
|
BackendCache ->
|
2022-08-29 03:58:03 +03:00
|
|
|
|
InsOrdHashMap SourceName BackendSourceMetadata ->
|
2022-09-05 05:42:59 +03:00
|
|
|
|
InsOrdHashMap SourceName (AB.AnyBackend BackendInfoAndSourceMetadata)
|
|
|
|
|
joinBackendInfosToSources backendInfos sources =
|
2022-04-29 05:13:13 +03:00
|
|
|
|
flip OMap.map sources $ \abSourceMetadata ->
|
2022-08-29 03:58:03 +03:00
|
|
|
|
AB.dispatchAnyBackend @Backend (unBackendSourceMetadata abSourceMetadata) $ \(sourceMetadata :: SourceMetadata b) ->
|
2022-09-05 05:42:59 +03:00
|
|
|
|
let _bcasmBackendInfo = maybe mempty unBackendInfoWrapper (BackendMap.lookup @b backendInfos)
|
2022-04-29 05:13:13 +03:00
|
|
|
|
_bcasmSourceMetadata = sourceMetadata
|
2022-09-05 05:42:59 +03:00
|
|
|
|
in AB.mkAnyBackend @b BackendInfoAndSourceMetadata {..}
|
2022-04-29 05:13:13 +03:00
|
|
|
|
|
2020-03-26 14:52:20 +03:00
|
|
|
|
{- Note [Keep invalidation keys for inconsistent objects]
|
|
|
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|
After building the schema cache, we prune InvalidationKeys for objects
|
|
|
|
|
that no longer exist in the schema to avoid leaking memory for objects
|
|
|
|
|
that have been dropped. However, note that we *don’t* want to drop
|
|
|
|
|
keys for objects that are simply inconsistent!
|
|
|
|
|
|
|
|
|
|
Why? The object is still in the metadata, so next time we reload it,
|
|
|
|
|
we’ll reprocess that object. We want to reuse the cache if its
|
|
|
|
|
definition hasn’t changed, but if we dropped the invalidation key, it
|
|
|
|
|
will incorrectly be reprocessed (since the invalidation key changed
|
|
|
|
|
from present to absent). -}
|