graphql-engine/server/src-lib/Hasura/RQL/DDL/Schema/Cache.hs

1319 lines
61 KiB
Haskell
Raw Normal View History

{-# LANGUAGE Arrows #-}
{-# LANGUAGE OverloadedLabels #-}
{-# LANGUAGE UndecidableInstances #-}
-- | Top-level functions concerned specifically with operations on the schema cache, such as
-- rebuilding it from the catalog and incorporating schema changes. See the module documentation for
-- "Hasura.RQL.DDL.Schema" for more details.
--
-- __Note__: this module is __mutually recursive__ with other @Hasura.RQL.DDL.Schema.*@ modules, which
-- both define pieces of the implementation of building the schema cache and define handlers that
-- trigger schema cache rebuilds.
module Hasura.RQL.DDL.Schema.Cache
( RebuildableSchemaCache,
lastBuiltSchemaCache,
buildRebuildableSchemaCache,
buildRebuildableSchemaCacheWithReason,
CacheRWT,
runCacheRWT,
mkBooleanPermissionMap,
)
where
import Control.Arrow.Extended
import Control.Lens hiding ((.=))
import Control.Monad.Trans.Control (MonadBaseControl)
import Control.Retry qualified as Retry
import Data.Aeson
import Data.Either (isLeft)
import Data.Environment qualified as Env
import Data.HashMap.Strict.Extended qualified as M
import Data.HashMap.Strict.InsOrd.Extended qualified as OMap
import Data.HashSet qualified as HS
import Data.Proxy
import Data.Set qualified as S
import Data.Text.Extended
import Hasura.Base.Error
import Hasura.GraphQL.Schema (buildGQLContext)
import Hasura.GraphQL.Schema.NamingCase
import Hasura.Incremental qualified as Inc
import Hasura.Logging
import Hasura.Metadata.Class
import Hasura.Prelude
import Hasura.RQL.DDL.Action
import Hasura.RQL.DDL.CustomTypes
import Hasura.RQL.DDL.EventTrigger (MonadEventLogCleanup (..), buildEventTriggerInfo)
import Hasura.RQL.DDL.InheritedRoles (resolveInheritedRole)
import Hasura.RQL.DDL.RemoteRelationship (CreateRemoteSchemaRemoteRelationship (..), PartiallyResolvedSource (..), buildRemoteFieldInfo, getRemoteSchemaEntityJoinColumns)
import Hasura.RQL.DDL.ScheduledTrigger
import Hasura.RQL.DDL.Schema.Cache.Common
import Hasura.RQL.DDL.Schema.Cache.Dependencies
import Hasura.RQL.DDL.Schema.Cache.Fields
import Hasura.RQL.DDL.Schema.Cache.Permission
import Hasura.RQL.DDL.Schema.Function
import Hasura.RQL.DDL.Schema.Table
import Hasura.RQL.Types.Action
import Hasura.RQL.Types.Allowlist
import Hasura.RQL.Types.Backend
import Hasura.RQL.Types.Column
import Hasura.RQL.Types.Common
import Hasura.RQL.Types.CustomTypes
import Hasura.RQL.Types.Endpoint
import Hasura.RQL.Types.EventTrigger
import Hasura.RQL.Types.Eventing.Backend
import Hasura.RQL.Types.Function
import Hasura.RQL.Types.Metadata hiding (fmFunction, tmTable)
import Hasura.RQL.Types.Metadata.Backend
import Hasura.RQL.Types.Metadata.Object
import Hasura.RQL.Types.Network
import Hasura.RQL.Types.OpenTelemetry
import Hasura.RQL.Types.QueryCollection
import Hasura.RQL.Types.Relationships.Remote
import Hasura.RQL.Types.Roles
import Hasura.RQL.Types.ScheduledTrigger
import Hasura.RQL.Types.SchemaCache
import Hasura.RQL.Types.SchemaCache.Build
import Hasura.RQL.Types.SchemaCache.Instances ()
import Hasura.RQL.Types.SchemaCacheTypes
import Hasura.RQL.Types.Source
import Hasura.RQL.Types.SourceCustomization
import Hasura.RQL.Types.Table
scaffolding for remote-schemas module The main aim of the PR is: 1. To set up a module structure for 'remote-schemas' package. 2. Move parts by the remote schema codebase into the new module structure to validate it. ## Notes to the reviewer Why a PR with large-ish diff? 1. We've been making progress on the MM project but we don't yet know long it is going to take us to get to the first milestone. To understand this better, we need to figure out the unknowns as soon as possible. Hence I've taken a stab at the first two items in the [end-state](https://gist.github.com/0x777/ca2bdc4284d21c3eec153b51dea255c9) document to figure out the unknowns. Unsurprisingly, there are a bunch of issues that we haven't discussed earlier. These are documented in the 'open questions' section. 1. The diff is large but that is only code moved around and I've added a section that documents how things are moved. In addition, there are fair number of PR comments to help with the review process. ## Changes in the PR ### Module structure Sets up the module structure as follows: ``` Hasura/ RemoteSchema/ Metadata/ Types.hs SchemaCache/ Types.hs Permission.hs RemoteRelationship.hs Build.hs MetadataAPI/ Types.hs Execute.hs ``` ### 1. Types representing metadata are moved Types that capture metadata information (currently scattered across several RQL modules) are moved into `Hasura.RemoteSchema.Metadata.Types`. - This new module only depends on very 'core' modules such as `Hasura.Session` for the notion of roles and `Hasura.Incremental` for `Cacheable` typeclass. - The requirement on database modules is avoided by generalizing the remote schemas metadata to accept an arbitrary 'r' for a remote relationship definition. ### 2. SchemaCache related types and build logic have been moved Types that represent remote schemas information in SchemaCache are moved into `Hasura.RemoteSchema.SchemaCache.Types`. Similar to `H.RS.Metadata.Types`, this module depends on 'core' modules except for `Hasura.GraphQL.Parser.Variable`. It has something to do with remote relationships but I haven't spent time looking into it. The validation of 'remote relationships to remote schema' is also something that needs to be looked at. Rips out the logic that builds remote schema's SchemaCache information from the monolithic `buildSchemaCacheRule` and moves it into `Hasura.RemoteSchema.SchemaCache.Build`. Further, the `.SchemaCache.Permission` and `.SchemaCache.RemoteRelationship` have been created from existing modules that capture schema cache building logic for those two components. This was a fair amount of work. On main, currently remote schema's SchemaCache information is built in two phases - in the first phase, 'permissions' and 'remote relationships' are ignored and in the second phase they are filled in. While remote relationships can only be resolved after partially resolving sources and other remote schemas, the same isn't true for permissions. Further, most of the work that is done to resolve remote relationships can be moved to the first phase so that the second phase can be a very simple traversal. This is the approach that was taken - resolve permissions and as much as remote relationships information in the first phase. ### 3. Metadata APIs related types and build logic have been moved The types that represent remote schema related metadata APIs and the execution logic have been moved to `Hasura.RemoteSchema.MetadataAPI.Types` and `.Execute` modules respectively. ## Open questions: 1. `Hasura.RemoteSchema.Metadata.Types` is so called because I was hoping that all of the metadata related APIs of remote schema can be brought in at `Hasura.RemoteSchema.Metadata.API`. However, as metadata APIs depended on functions from `SchemaCache` module (see [1](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L55) and [2](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L91), it made more sense to create a separate top-level module for `MetadataAPI`s. Maybe we can just have `Hasura.RemoteSchema.Metadata` and get rid of the extra nesting or have `Hasura.RemoteSchema.Metadata.{Core,Permission,RemoteRelationship}` if we want to break them down further. 1. `buildRemoteSchemas` in `H.RS.SchemaCache.Build` has the following type: ```haskell buildRemoteSchemas :: ( ArrowChoice arr, Inc.ArrowDistribute arr, ArrowWriter (Seq CollectedInfo) arr, Inc.ArrowCache m arr, MonadIO m, HasHttpManagerM m, Inc.Cacheable remoteRelationshipDefinition, ToJSON remoteRelationshipDefinition, MonadError QErr m ) => Env.Environment -> ( (Inc.Dependency (HashMap RemoteSchemaName Inc.InvalidationKey), OrderedRoles), [RemoteSchemaMetadataG remoteRelationshipDefinition] ) `arr` HashMap RemoteSchemaName (PartiallyResolvedRemoteSchemaCtxG remoteRelationshipDefinition, MetadataObject) ``` Note the dependence on `CollectedInfo` which is defined as ```haskell data CollectedInfo = CIInconsistency InconsistentMetadata | CIDependency MetadataObject -- ^ for error reporting on missing dependencies SchemaObjId SchemaDependency deriving (Eq) ``` this pretty much means that remote schemas is dependent on types from databases, actions, .... How do we fix this? Maybe introduce a typeclass such as `ArrowCollectRemoteSchemaDependencies` which is defined in `Hasura.RemoteSchema` and then implemented in graphql-engine? 1. The dependency on `buildSchemaCacheFor` in `.MetadataAPI.Execute` which has the following signature: ```haskell buildSchemaCacheFor :: (QErrM m, CacheRWM m, MetadataM m) => MetadataObjId -> MetadataModifier -> ``` This can be easily resolved if we restrict what the metadata APIs are allowed to do. Currently, they operate in an unfettered access to modify SchemaCache (the `CacheRWM` constraint): ```haskell runAddRemoteSchema :: ( QErrM m, CacheRWM m, MonadIO m, HasHttpManagerM m, MetadataM m, Tracing.MonadTrace m ) => Env.Environment -> AddRemoteSchemaQuery -> m EncJSON ``` This should instead be changed to restrict remote schema APIs to only modify remote schema metadata (but has access to the remote schemas part of the schema cache), this dependency is completely removed. ```haskell runAddRemoteSchema :: ( QErrM m, MonadIO m, HasHttpManagerM m, MonadReader RemoteSchemasSchemaCache m, MonadState RemoteSchemaMetadata m, Tracing.MonadTrace m ) => Env.Environment -> AddRemoteSchemaQuery -> m RemoteSchemeMetadataObjId ``` The idea is that the core graphql-engine would call these functions and then call `buildSchemaCacheFor`. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6291 GitOrigin-RevId: 51357148c6404afe70219afa71bd1d59bdf4ffc6
2022-10-21 06:13:07 +03:00
import Hasura.RemoteSchema.Metadata
import Hasura.RemoteSchema.SchemaCache
import Hasura.SQL.AnyBackend qualified as AB
import Hasura.SQL.Backend
import Hasura.SQL.BackendMap (BackendMap)
import Hasura.SQL.BackendMap qualified as BackendMap
import Hasura.SQL.Tag
import Hasura.Server.Migrate.Version
import Hasura.Server.Types
import Hasura.Session
import Hasura.Tracing qualified as Tracing
import Language.GraphQL.Draft.Syntax qualified as G
import Network.HTTP.Client.Manager (HasHttpManagerM (..))
{- Note [Roles Inheritance]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Roles may have parent roles defined from which they can inherit permission and this is
called as roles inheritance. Roles which have parents can also be parents of other roles.
So, cycle in roles should be disallowed and this is done in the `orderRoles` function.
When the metadata contains a permission for a role for a entity, then it will override the
inherited permission, if any.
Roles inheritance work differently for different features:
1. Select permissions
~~~~~~~~~~~~~~~~~~~~~
See note [Inherited roles architecture for read queries]
2. Mutation permissions and remote schema permissions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
For mutation and remote schema permissions, an inherited role can only inherit permission
from its parent roles when the relevant parts of the permissions are equal i.e. the non-relevant
parts are discarded for the equality, for example, in two remote schema permissions the order
of the fields in an Object type is discarded.
When an inherited role cannot inherit permission from its parents due to a conflict, then we mark
the inherited role and the entity (remote schema or table) combination as inconsistent in the metadata.
3. Actions and Custom function permissions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Currently, actions and custom function permissions can be thought of as a boolean. Either a role has
permission to the entity or it doesn't, so in these cases there's no possiblity of a conflict. An inherited
role will have access to the action/function if any one of the parents have permission to access the
action/function.
-}
buildRebuildableSchemaCache ::
Logger Hasura ->
Env.Environment ->
Metadata ->
CacheBuild RebuildableSchemaCache
buildRebuildableSchemaCache =
buildRebuildableSchemaCacheWithReason CatalogSync
buildRebuildableSchemaCacheWithReason ::
BuildReason ->
Logger Hasura ->
Env.Environment ->
Metadata ->
CacheBuild RebuildableSchemaCache
buildRebuildableSchemaCacheWithReason reason logger env metadata = do
result <-
flip runReaderT reason $
Inc.build (buildSchemaCacheRule logger env) (metadata, initialInvalidationKeys)
pure $ RebuildableSchemaCache (Inc.result result) initialInvalidationKeys (Inc.rebuildRule result)
newtype CacheRWT m a
= -- The CacheInvalidations component of the state could actually be collected using WriterT, but
-- WriterT implementations prior to transformers-0.5.6.0 (which added
-- Control.Monad.Trans.Writer.CPS) are leaky, and we dont have that yet.
CacheRWT (StateT (RebuildableSchemaCache, CacheInvalidations) m a)
deriving
( Functor,
Applicative,
Monad,
MonadIO,
MonadReader r,
MonadError e,
UserInfoM,
HasHttpManagerM,
MonadMetadataStorage,
MonadMetadataStorageQueryAPI,
Tracing.MonadTrace,
HasServerConfigCtx,
MonadBase b,
MonadBaseControl b
)
instance (MonadEventLogCleanup m) => MonadEventLogCleanup (CacheRWT m) where
runLogCleaner conf = lift $ runLogCleaner conf
generateCleanupSchedules sourceInfo triggerName cleanupConfig = lift $ generateCleanupSchedules sourceInfo triggerName cleanupConfig
runCacheRWT ::
Functor m =>
RebuildableSchemaCache ->
CacheRWT m a ->
m (a, RebuildableSchemaCache, CacheInvalidations)
runCacheRWT cache (CacheRWT m) =
runStateT m (cache, mempty) <&> \(v, (newCache, invalidations)) -> (v, newCache, invalidations)
instance MonadTrans CacheRWT where
lift = CacheRWT . lift
instance (Monad m) => CacheRM (CacheRWT m) where
askSchemaCache = CacheRWT $ gets (lastBuiltSchemaCache . (^. _1))
instance
( MonadIO m,
MonadError QErr m,
HasHttpManagerM m,
MonadResolveSource m,
HasServerConfigCtx m
) =>
CacheRWM (CacheRWT m)
where
buildSchemaCacheWithOptions buildReason invalidations metadata = CacheRWT do
(RebuildableSchemaCache lastBuiltSC invalidationKeys rule, oldInvalidations) <- get
let metadataVersion = scMetadataResourceVersion lastBuiltSC
newInvalidationKeys = invalidateKeys invalidations invalidationKeys
result <-
lift $
runCacheBuildM $
flip runReaderT buildReason $
Inc.build rule (metadata, newInvalidationKeys)
let schemaCache = (Inc.result result) {scMetadataResourceVersion = metadataVersion}
prunedInvalidationKeys = pruneInvalidationKeys schemaCache newInvalidationKeys
!newCache = RebuildableSchemaCache schemaCache prunedInvalidationKeys (Inc.rebuildRule result)
!newInvalidations = oldInvalidations <> invalidations
put (newCache, newInvalidations)
where
-- Prunes invalidation keys that no longer exist in the schema to avoid leaking memory by
-- hanging onto unnecessary keys.
pruneInvalidationKeys schemaCache = over ikRemoteSchemas $ M.filterWithKey \name _ ->
-- see Note [Keep invalidation keys for inconsistent objects]
name `elem` getAllRemoteSchemas schemaCache
setMetadataResourceVersionInSchemaCache resourceVersion = CacheRWT $ do
(rebuildableSchemaCache, invalidations) <- get
put
( rebuildableSchemaCache
{ lastBuiltSchemaCache =
(lastBuiltSchemaCache rebuildableSchemaCache)
{ scMetadataResourceVersion = Just resourceVersion
}
},
invalidations
)
-- | Generate health checks related cache from sources metadata
buildHealthCheckCache :: Sources -> SourceHealthCheckCache
buildHealthCheckCache sources =
catMaybes $ M.fromList $ map (second mkSourceHealthCheck) (OMap.toList sources)
where
mkSourceHealthCheck :: BackendSourceMetadata -> Maybe BackendSourceHealthCheckInfo
mkSourceHealthCheck (BackendSourceMetadata sourceMetadata) =
AB.traverseBackend @Backend sourceMetadata mkSourceHealthCheckBackend
mkSourceHealthCheckBackend :: SourceMetadata b -> Maybe (SourceHealthCheckInfo b)
mkSourceHealthCheckBackend sourceMetadata =
let sourceName = _smName sourceMetadata
connection = _smConfiguration sourceMetadata
healthCheck = _smHealthCheckConfig sourceMetadata
in SourceHealthCheckInfo sourceName connection <$> healthCheck
-- | Generate cache of source connection details so that we can ping sources for
-- attribution
buildSourcePingCache :: Sources -> SourcePingCache
buildSourcePingCache sources =
M.fromList $ map (second mkSourcePing) (OMap.toList sources)
where
mkSourcePing :: BackendSourceMetadata -> BackendSourcePingInfo
mkSourcePing (BackendSourceMetadata sourceMetadata) =
AB.mapBackend sourceMetadata mkSourcePingBackend
mkSourcePingBackend :: SourceMetadata b -> SourcePingInfo b
mkSourcePingBackend sourceMetadata =
let sourceName = _smName sourceMetadata
connection = _smConfiguration sourceMetadata
in SourcePingInfo sourceName connection
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
{- Note [Avoiding GraphQL schema rebuilds when changing irrelevant Metadata]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There are many Metadata operations that don't influence the GraphQL schema. So
we should be caching its construction.
The `Hasura.Incremental` framework allows us to cache such constructions:
whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and
`b` the output, we can use the `Inc.cache` combinator to obtain a new arrow
which is only re-executed when the input `a` changes in a material way. To test
this, `a` needs an `Eq` instance.
We can't simply apply `Inc.cache` to the GraphQL schema cache building phase
(`buildGQLContext`), because the inputs (components of `BuildOutputs` such as
`SourceCache`) don't have an `Eq` instance.
So the purpose of `buildOutputsAndSchema` is that we cach already at an earlier
point, encompassing more computation. The Metadata and invalidation keys (which
have `Eq` instances) are used as a caching key, and `Inc.cache` can be applied
to the whole sequence of steps.
But because of the all-or-nothing nature of caching, it's important that
`buildOutputsAndSchema` is re-run as little as possible. So the exercise
becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as
many Metadata operations as possible can be handled outside of this codepath
that produces a GraphQL schema.
-}
buildSchemaCacheRule ::
-- Note: by supplying BuildReason via MonadReader, it does not participate in caching, which is
-- what we want!
( ArrowChoice arr,
Inc.ArrowDistribute arr,
Inc.ArrowCache m arr,
MonadIO m,
MonadBaseControl IO m,
MonadError QErr m,
MonadReader BuildReason m,
HasHttpManagerM m,
MonadResolveSource m,
HasServerConfigCtx m
) =>
Logger Hasura ->
Env.Environment ->
(Metadata, InvalidationKeys) `arr` SchemaCache
server: Simplify `BuildOutputs` A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between. So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container. Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related. - Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it. - Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`. If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`. --- The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609 GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
buildSchemaCacheRule logger env = proc (metadataNoDefaults, invalidationKeys) -> do
invalidationKeysDep <- Inc.newDependency -< invalidationKeys
metadataDefaults <- bindA -< askMetadataDefaults
server: Simplify `BuildOutputs` A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between. So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container. Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related. - Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it. - Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`. If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`. --- The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609 GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
let metadata@Metadata {..} = overrideMetadataDefaults metadataNoDefaults metadataDefaults
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
metadataDep <- Inc.newDependency -< metadata
(inconsistentObjects, (resolvedOutputs, dependencyInconsistentObjects, resolvedDependencies), ((adminIntrospection, gqlContext, gqlContextUnauth, inconsistentRemoteSchemas), (relayContext, relayContextUnauth))) <-
Inc.cache buildOutputsAndSchema -< (metadataDep, invalidationKeysDep)
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
(resolvedEndpoints, endpointCollectedInfo) <- runWriterA $ buildInfoMap fst mkEndpointMetadataObject buildEndpoint -< (_metaQueryCollections, OMap.toList _metaRestEndpoints)
(cronTriggersMap, cronTriggersCollectedInfo) <- runWriterA buildCronTriggers -< ((), OMap.elems _metaCronTriggers)
(openTelemetryInfo, openTelemetryCollectedInfo) <- runWriterA buildOpenTelemetry -< _metaOpenTelemetryConfig
let duplicateVariables :: EndpointMetadata a -> Bool
duplicateVariables m = any ((> 1) . length) $ group $ sort $ catMaybes $ splitPath Just (const Nothing) (_ceUrl m)
endpointObjId :: EndpointMetadata q -> MetadataObjId
endpointObjId md = MOEndpoint (_ceName md)
endpointObject :: EndpointMetadata q -> MetadataObject
server: Simplify `BuildOutputs` A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between. So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container. Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related. - Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it. - Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`. If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`. --- The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609 GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
endpointObject md = MetadataObject (endpointObjId md) (toJSON $ OMap.lookup (_ceName md) _metaRestEndpoints)
listedQueryObjects :: (CollectionName, ListedQuery) -> MetadataObject
listedQueryObjects (cName, lq) = MetadataObject (MOQueryCollectionsQuery cName lq) (toJSON lq)
-- Cases of urls that generate invalid segments:
hasInvalidSegments :: EndpointMetadata query -> Bool
hasInvalidSegments m = any (`elem` ["", ":"]) (splitPath id id (_ceUrl m))
ceUrlTxt = toTxt . _ceUrl
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
endpoints = buildEndpointsTrie (M.elems resolvedEndpoints)
duplicateF md = DuplicateRestVariables (ceUrlTxt md) (endpointObject md)
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
duplicateRestVariables = map duplicateF $ filter duplicateVariables (M.elems resolvedEndpoints)
invalidF md = InvalidRestSegments (ceUrlTxt md) (endpointObject md)
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
invalidRestSegments = map invalidF $ filter hasInvalidSegments (M.elems resolvedEndpoints)
ambiguousF' ep = MetadataObject (endpointObjId ep) (toJSON ep)
ambiguousF mds = AmbiguousRestEndpoints (commaSeparated $ map _ceUrl mds) (map ambiguousF' mds)
ambiguousRestEndpoints = map (ambiguousF . S.elems . snd) $ ambiguousPathsGrouped endpoints
server: Simplify `BuildOutputs` A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between. So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container. Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related. - Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it. - Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`. If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`. --- The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609 GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
inlinedAllowlist = inlineAllowlist _metaQueryCollections _metaAllowlist
globalAllowLists = HS.toList . iaGlobal $ inlinedAllowlist
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
-- Endpoints don't generate any dependencies
endpointInconsistencies = either id absurd <$> toList endpointCollectedInfo
-- Cron triggers don't generate any dependencies
cronTriggersInconsistencies = either id absurd <$> toList cronTriggersCollectedInfo
-- OpenTelemerty doesn't generate any dependencies
openTelemetryInconsistencies = either id absurd <$> toList openTelemetryCollectedInfo
server: Simplify `BuildOutputs` A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between. So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container. Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related. - Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it. - Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`. If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`. --- The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609 GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
inconsistentQueryCollections <- bindA -< do getInconsistentQueryCollections adminIntrospection _metaQueryCollections listedQueryObjects endpoints globalAllowLists
returnA
-<
SchemaCache
{ scSources = _boSources resolvedOutputs,
scActions = _boActions resolvedOutputs,
-- TODO this is not the right value: we should track what part of the schema
-- we can stitch without consistencies, I think.
scRemoteSchemas = fmap fst (_boRemoteSchemas resolvedOutputs), -- remoteSchemaMap
server: Simplify `BuildOutputs` A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between. So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container. Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related. - Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it. - Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`. If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`. --- The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609 GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
scAllowlist = inlinedAllowlist,
-- , scCustomTypes = _boCustomTypes resolvedOutputs
Decouple `Analyse` and `OpenAPI` from remote schema introspection and internal execution details. ### Motivation #2338 introduced a way to validate REST queries against the metadata after a change, to properly report any inconsistency that would emerge from a change in the underlying structure of our schema. However, the way this was done was quite complex and error-prone. Namely: we would use the generated schema parsers to statically execute an introspection query, similar to the one we use for remote schemas, then parse the resulting bytestring as it were coming from a remote schema. This led to several issues: the code was using remote schema primitives, and was associated with remote schema code, despite being unrelated, which led to absurd situations like creating fake `Variable`s whose type was also their name. A lot of the code had to deal with the fact that we might fail to re-parse our own schema. Additionally, some of it was dead code, that for some reason GHC did not warn about? But more fundamentally, this architecture decision creates a dependency between unrelated pieces of the engine: modifying the internal processing of root fields or the introspection of remote schemas now risks impacting the unrelated `OpenAPI` feature. ### Description This PR decouples that process from the remote schema introspection logic and from the execution engine by making `Analyse` and `OpenAPI` work on the generic `G.SchemaIntrospection` instead. To accomplish this, it: - adds `GraphQL.Parser.Schema.Convert`, to convert from our "live" schema back to a flat `SchemaIntrospection` - persists in the schema cache the `admin` introspection generated when building the schema, and uses it both for validation and for generating the `OpenAPI`. ### Known issues and limitations This adds a bit of memory pressure to the engine, as we persist the entire schema in the schema cache. This might be acceptable in the short-term, but we have several potential ideas going forward should this be a problem: - cache the result of `Analyze`: when it becomes possible to build the `OpenAPI` purely with the result of `Analyze` without any additional schema information, then we could cache that instead, reducing the footprint - caching the `OpenAPI`: if it doesn't need to change every time the endpoint is queried, then it should be possible to cache the entire `OpenAPI` object instead of the schema - cache a copy of the `FieldParsers` used to generate the schema: as those are persisted through the GraphQL `Context`, and are the only input required to generate the `Schema`, making them accessible in the schema cache would allow us to have the exact same feature with no additional memory cost, at the price of a slightly slower and more complicated process (need to rebuild the `Schema` every time we query the OpenAPI endpoint) - cache nothing at all, and rebuild the admin schema from scratch every time. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/3962 Co-authored-by: paritosh-08 <85472423+paritosh-08@users.noreply.github.com> GitOrigin-RevId: a8b9808170b231fdf6787983b4a9ed286cde27e0
2022-03-22 10:36:39 +03:00
scAdminIntrospection = adminIntrospection,
scGQLContext = gqlContext,
scUnauthenticatedGQLContext = gqlContextUnauth,
scRelayContext = relayContext,
scUnauthenticatedRelayContext = relayContextUnauth,
-- , scGCtxMap = gqlSchema
-- , scDefaultRemoteGCtx = remoteGQLSchema
scDepMap = resolvedDependencies,
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
scCronTriggers = cronTriggersMap,
scEndpoints = endpoints,
scInconsistentObjs =
inconsistentObjects
<> dependencyInconsistentObjects
<> toList inconsistentRemoteSchemas
<> duplicateRestVariables
<> invalidRestSegments
<> ambiguousRestEndpoints
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
<> endpointInconsistencies
<> cronTriggersInconsistencies
<> openTelemetryInconsistencies
<> inconsistentQueryCollections,
server: Simplify `BuildOutputs` A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between. So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container. Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related. - Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it. - Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`. If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`. --- The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609 GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
scApiLimits = _metaApiLimits,
scMetricsConfig = _metaMetricsConfig,
scMetadataResourceVersion = Nothing,
server: Simplify `BuildOutputs` A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between. So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container. Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related. - Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it. - Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`. If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`. --- The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609 GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
scSetGraphqlIntrospectionOptions = _metaSetGraphqlIntrospectionOptions,
scTlsAllowlist = networkTlsAllowlist _metaNetwork,
scQueryCollections = _metaQueryCollections,
scBackendCache = _boBackendCache resolvedOutputs,
server: Simplify `BuildOutputs` A bunch of configurations are retrieved from the Metadata, then stored in the `BuildOutputs` structure, only to then be forwarded to the `SchemaCache`, with extremely little processing in between. So this simplifies the build pipeline for some parts of the metadata: just construct those things from `Metadata` directly, and store them in the `SchemaCache` without any intermediate container. Why did we have the detour via `BuildOutputs` in the first place? Parts of the Metadata (codified by `MetadataObjId`) can generate _metadata inconsistencies_ and/or _schema dependencies_, which are related. - Metadata inconsistencies are warnings that we show to the user, indicating that there's something wrong with their configuration, and they have to fix it. - Schema dependencies are an internal mechanism that allow us to build a consistent view of the world. For instance, if we have a relationship from DB tables `books` to `authors`, but the `authors` table is inconsistent (e.g. it doesn't exist in the DB), then we have schema dependencies indicating that. The job of `resolveDependencies` is to then drop the relationship, so that we can at least generate a legal GraphQL schema for `books`. If we never generate a schema dependency for a certain fragment of Metadata, then there is no reason to call `resolveDependencies` on it, and so there is no reason to store it in `BuildOutputs`. --- The starting point that allows this refactor is to apply Metadata defaults before it reaches `buildAndCollectInfo`, so that metadata-with-defaults can be used elsewhere. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6609 GitOrigin-RevId: df0c4a7ff9451e10e02a40bf26304b26584ba483
2022-11-15 15:02:55 +03:00
scSourceHealthChecks = buildHealthCheckCache _metaSources,
scSourcePingConfig = buildSourcePingCache _metaSources,
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
scOpenTelemetryConfig = openTelemetryInfo
}
where
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
-- See Note [Avoiding GraphQL schema rebuilds when changing irrelevant Metadata]
buildOutputsAndSchema = proc (metadataDep, invalidationKeysDep) -> do
(outputs, collectedInfo) <- runWriterA buildAndCollectInfo -< (metadataDep, invalidationKeysDep)
let (inconsistentObjects, unresolvedDependencies) = partitionEithers $ toList collectedInfo
out2@(resolvedOutputs, _dependencyInconsistentObjects, _resolvedDependencies) <- resolveDependencies -< (outputs, unresolvedDependencies)
out3 <-
bindA
-< do
cxt <- askServerConfigCtx
buildGQLContext
cxt
(_boSources resolvedOutputs)
(_boRemoteSchemas resolvedOutputs)
(_boActions resolvedOutputs)
(_boCustomTypes resolvedOutputs)
returnA -< (inconsistentObjects, out2, out3)
resolveBackendInfo' ::
forall arr m b.
( BackendMetadata b,
ArrowChoice arr,
Inc.ArrowCache m arr,
Inc.ArrowDistribute arr,
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
MonadIO m,
HasHttpManagerM m
) =>
(BackendConfigWrapper b, Inc.Dependency (BackendMap BackendInvalidationKeysWrapper)) `arr` BackendCache
resolveBackendInfo' = proc (backendConfigWrapper, backendInvalidationMap) -> do
let backendInvalidationKeys =
Inc.selectD #unBackendInvalidationKeysWrapper $
Inc.selectMaybeD $
BackendMap.lookupD @b backendInvalidationMap
backendInfo <- resolveBackendInfo @b logger -< (backendInvalidationKeys, unBackendConfigWrapper backendConfigWrapper)
returnA -< BackendMap.singleton (BackendInfoWrapper @b backendInfo)
resolveBackendCache ::
forall arr m.
( ArrowChoice arr,
Inc.ArrowCache m arr,
Inc.ArrowDistribute arr,
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
MonadIO m,
HasHttpManagerM m
) =>
(Inc.Dependency (BackendMap BackendInvalidationKeysWrapper), [AB.AnyBackend BackendConfigWrapper]) `arr` BackendCache
resolveBackendCache = proc (backendInvalidationMap, backendConfigs) -> do
case backendConfigs of
[] -> returnA -< mempty
(anyBackendConfig : backendConfigs') -> do
backendInfo <-
AB.dispatchAnyBackendArrow @BackendMetadata @HasTag resolveBackendInfo' -< (anyBackendConfig, backendInvalidationMap)
backendInfos <- resolveBackendCache -< (backendInvalidationMap, backendConfigs')
returnA -< backendInfo <> backendInfos
getSourceConfigIfNeeded ::
forall b arr m.
( ArrowChoice arr,
Inc.ArrowCache m arr,
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
MonadIO m,
MonadResolveSource m,
HasHttpManagerM m,
BackendMetadata b
) =>
( Inc.Dependency (HashMap SourceName Inc.InvalidationKey),
SourceName,
SourceConnConfiguration b,
BackendSourceKind b,
BackendInfo b
)
`arr` Maybe (SourceConfig b)
getSourceConfigIfNeeded = Inc.cache proc (invalidationKeys, sourceName, sourceConfig, backendKind, backendInfo) -> do
let metadataObj = MetadataObject (MOSource sourceName) $ toJSON sourceName
httpMgr <- bindA -< askHttpManager
Inc.dependOn -< Inc.selectKeyD sourceName invalidationKeys
(|
withRecordInconsistency
( liftEitherA <<< bindA -< resolveSourceConfig @b logger sourceName sourceConfig backendKind backendInfo env httpMgr
)
|) metadataObj
resolveSourceIfNeeded ::
forall b arr m.
( ArrowChoice arr,
Inc.ArrowCache m arr,
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
MonadIO m,
MonadBaseControl IO m,
MonadResolveSource m,
HasHttpManagerM m,
BackendMetadata b
) =>
( Inc.Dependency (HashMap SourceName Inc.InvalidationKey),
BackendInfoAndSourceMetadata b
)
`arr` Maybe (ResolvedSource b)
resolveSourceIfNeeded = Inc.cache proc (invalidationKeys, BackendInfoAndSourceMetadata {..}) -> do
let sourceName = _smName _bcasmSourceMetadata
metadataObj = MetadataObject (MOSource sourceName) $ toJSON sourceName
maybeSourceConfig <- getSourceConfigIfNeeded @b -< (invalidationKeys, sourceName, _smConfiguration _bcasmSourceMetadata, _smKind _bcasmSourceMetadata, _bcasmBackendInfo)
case maybeSourceConfig of
Nothing -> returnA -< Nothing
Just sourceConfig ->
(|
withRecordInconsistency
( liftEitherA <<< bindA
-< do
resSource <- resolveDatabaseMetadata _bcasmSourceMetadata sourceConfig (getSourceTypeCustomization $ _smCustomization _bcasmSourceMetadata)
for_ resSource $ liftIO . unLogger logger
pure resSource
)
|) metadataObj
-- impl notes (swann):
--
-- as our cache invalidation key, we use the fact of the availability of event triggers
-- present, rerunning catalog init when this changes. i.e we invalidate the cache and
-- rebuild it with the catalog only when there is at least one event trigger present.
-- This is correct, because we only care about the transition from zero event triggers
-- to nonzero (not necessarily one, as Anon has observed, because replace_metadata can
-- add multiple event triggers in one go)
--
-- a future optimisation would be to cache, on a per-source basis, whether or not
-- the event catalog itself exists, and to then trigger catalog init when an event
-- trigger is created _but only if_ this cached information says the event catalog
-- doesn't already exist.
initCatalogIfNeeded ::
forall b arr m.
( ArrowChoice arr,
Inc.ArrowCache m arr,
MonadIO m,
BackendMetadata b,
HasServerConfigCtx m,
MonadError QErr m,
MonadBaseControl IO m
) =>
(Proxy b, Bool, SourceConfig b) `arr` (RecreateEventTriggers, SourceCatalogMigrationState)
initCatalogIfNeeded = Inc.cache proc (Proxy, atleastOneTrigger, sourceConfig) -> do
bindA
-< do
if atleastOneTrigger
then do
maintenanceMode <- _sccMaintenanceMode <$> askServerConfigCtx
eventingMode <- _sccEventingMode <$> askServerConfigCtx
readOnlyMode <- _sccReadOnlyMode <$> askServerConfigCtx
if
-- when safe mode is enabled, don't perform any migrations
| readOnlyMode == ReadOnlyModeEnabled -> pure (RETDoNothing, SCMSMigrationOnHold "read-only mode enabled")
-- when eventing mode is disabled, don't perform any migrations
| eventingMode == EventingDisabled -> pure (RETDoNothing, SCMSMigrationOnHold "eventing mode disabled")
-- when maintenance mode is enabled, don't perform any migrations
| maintenanceMode == (MaintenanceModeEnabled ()) -> pure (RETDoNothing, SCMSMigrationOnHold "maintenance mode enabled")
| otherwise -> do
-- The `initCatalogForSource` action is retried here because
-- in cloud there will be multiple workers (graphql-engine instances)
-- trying to migrate the source catalog, when needed. This introduces
-- a race condition as both the workers try to migrate the source catalog
-- concurrently and when one of them succeeds the other ones will fail
-- and be in an inconsistent state. To avoid the inconsistency, we retry
-- migrating the catalog on error and in the retry `initCatalogForSource`
-- will see that the catalog is already migrated, so it won't attempt the
-- migration again
liftEither
=<< Retry.retrying
( Retry.constantDelay (fromIntegral $ diffTimeToMicroSeconds $ seconds $ Seconds 10)
<> Retry.limitRetries 3
)
(const $ return . isLeft)
(const $ runExceptT $ prepareCatalog @b sourceConfig)
else pure (RETDoNothing, SCMSUninitializedSource)
buildSource ::
forall b arr m.
( ArrowChoice arr,
Inc.ArrowDistribute arr,
Inc.ArrowCache m arr,
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
HasServerConfigCtx m,
MonadError QErr m,
BackendMetadata b,
GetAggregationPredicatesDeps b
) =>
( HashMap SourceName (AB.AnyBackend PartiallyResolvedSource),
SourceMetadata b,
SourceConfig b,
HashMap (TableName b) (TableCoreInfoG b (ColumnInfo b) (ColumnInfo b)),
HashMap (TableName b) (EventTriggerInfoMap b),
DBTablesMetadata b,
DBFunctionsMetadata b,
scaffolding for remote-schemas module The main aim of the PR is: 1. To set up a module structure for 'remote-schemas' package. 2. Move parts by the remote schema codebase into the new module structure to validate it. ## Notes to the reviewer Why a PR with large-ish diff? 1. We've been making progress on the MM project but we don't yet know long it is going to take us to get to the first milestone. To understand this better, we need to figure out the unknowns as soon as possible. Hence I've taken a stab at the first two items in the [end-state](https://gist.github.com/0x777/ca2bdc4284d21c3eec153b51dea255c9) document to figure out the unknowns. Unsurprisingly, there are a bunch of issues that we haven't discussed earlier. These are documented in the 'open questions' section. 1. The diff is large but that is only code moved around and I've added a section that documents how things are moved. In addition, there are fair number of PR comments to help with the review process. ## Changes in the PR ### Module structure Sets up the module structure as follows: ``` Hasura/ RemoteSchema/ Metadata/ Types.hs SchemaCache/ Types.hs Permission.hs RemoteRelationship.hs Build.hs MetadataAPI/ Types.hs Execute.hs ``` ### 1. Types representing metadata are moved Types that capture metadata information (currently scattered across several RQL modules) are moved into `Hasura.RemoteSchema.Metadata.Types`. - This new module only depends on very 'core' modules such as `Hasura.Session` for the notion of roles and `Hasura.Incremental` for `Cacheable` typeclass. - The requirement on database modules is avoided by generalizing the remote schemas metadata to accept an arbitrary 'r' for a remote relationship definition. ### 2. SchemaCache related types and build logic have been moved Types that represent remote schemas information in SchemaCache are moved into `Hasura.RemoteSchema.SchemaCache.Types`. Similar to `H.RS.Metadata.Types`, this module depends on 'core' modules except for `Hasura.GraphQL.Parser.Variable`. It has something to do with remote relationships but I haven't spent time looking into it. The validation of 'remote relationships to remote schema' is also something that needs to be looked at. Rips out the logic that builds remote schema's SchemaCache information from the monolithic `buildSchemaCacheRule` and moves it into `Hasura.RemoteSchema.SchemaCache.Build`. Further, the `.SchemaCache.Permission` and `.SchemaCache.RemoteRelationship` have been created from existing modules that capture schema cache building logic for those two components. This was a fair amount of work. On main, currently remote schema's SchemaCache information is built in two phases - in the first phase, 'permissions' and 'remote relationships' are ignored and in the second phase they are filled in. While remote relationships can only be resolved after partially resolving sources and other remote schemas, the same isn't true for permissions. Further, most of the work that is done to resolve remote relationships can be moved to the first phase so that the second phase can be a very simple traversal. This is the approach that was taken - resolve permissions and as much as remote relationships information in the first phase. ### 3. Metadata APIs related types and build logic have been moved The types that represent remote schema related metadata APIs and the execution logic have been moved to `Hasura.RemoteSchema.MetadataAPI.Types` and `.Execute` modules respectively. ## Open questions: 1. `Hasura.RemoteSchema.Metadata.Types` is so called because I was hoping that all of the metadata related APIs of remote schema can be brought in at `Hasura.RemoteSchema.Metadata.API`. However, as metadata APIs depended on functions from `SchemaCache` module (see [1](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L55) and [2](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L91), it made more sense to create a separate top-level module for `MetadataAPI`s. Maybe we can just have `Hasura.RemoteSchema.Metadata` and get rid of the extra nesting or have `Hasura.RemoteSchema.Metadata.{Core,Permission,RemoteRelationship}` if we want to break them down further. 1. `buildRemoteSchemas` in `H.RS.SchemaCache.Build` has the following type: ```haskell buildRemoteSchemas :: ( ArrowChoice arr, Inc.ArrowDistribute arr, ArrowWriter (Seq CollectedInfo) arr, Inc.ArrowCache m arr, MonadIO m, HasHttpManagerM m, Inc.Cacheable remoteRelationshipDefinition, ToJSON remoteRelationshipDefinition, MonadError QErr m ) => Env.Environment -> ( (Inc.Dependency (HashMap RemoteSchemaName Inc.InvalidationKey), OrderedRoles), [RemoteSchemaMetadataG remoteRelationshipDefinition] ) `arr` HashMap RemoteSchemaName (PartiallyResolvedRemoteSchemaCtxG remoteRelationshipDefinition, MetadataObject) ``` Note the dependence on `CollectedInfo` which is defined as ```haskell data CollectedInfo = CIInconsistency InconsistentMetadata | CIDependency MetadataObject -- ^ for error reporting on missing dependencies SchemaObjId SchemaDependency deriving (Eq) ``` this pretty much means that remote schemas is dependent on types from databases, actions, .... How do we fix this? Maybe introduce a typeclass such as `ArrowCollectRemoteSchemaDependencies` which is defined in `Hasura.RemoteSchema` and then implemented in graphql-engine? 1. The dependency on `buildSchemaCacheFor` in `.MetadataAPI.Execute` which has the following signature: ```haskell buildSchemaCacheFor :: (QErrM m, CacheRWM m, MetadataM m) => MetadataObjId -> MetadataModifier -> ``` This can be easily resolved if we restrict what the metadata APIs are allowed to do. Currently, they operate in an unfettered access to modify SchemaCache (the `CacheRWM` constraint): ```haskell runAddRemoteSchema :: ( QErrM m, CacheRWM m, MonadIO m, HasHttpManagerM m, MetadataM m, Tracing.MonadTrace m ) => Env.Environment -> AddRemoteSchemaQuery -> m EncJSON ``` This should instead be changed to restrict remote schema APIs to only modify remote schema metadata (but has access to the remote schemas part of the schema cache), this dependency is completely removed. ```haskell runAddRemoteSchema :: ( QErrM m, MonadIO m, HasHttpManagerM m, MonadReader RemoteSchemasSchemaCache m, MonadState RemoteSchemaMetadata m, Tracing.MonadTrace m ) => Env.Environment -> AddRemoteSchemaQuery -> m RemoteSchemeMetadataObjId ``` The idea is that the core graphql-engine would call these functions and then call `buildSchemaCacheFor`. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6291 GitOrigin-RevId: 51357148c6404afe70219afa71bd1d59bdf4ffc6
2022-10-21 06:13:07 +03:00
PartiallyResolvedRemoteSchemaMap,
OrderedRoles
)
`arr` BackendSourceInfo
buildSource = proc (allSources, sourceMetadata, sourceConfig, tablesRawInfo, eventTriggerInfoMaps, _dbTables, dbFunctions, remoteSchemaMap, orderedRoles) -> do
let SourceMetadata sourceName _backendKind tables functions _ queryTagsConfig sourceCustomization _healthCheckConfig = sourceMetadata
tablesMetadata = OMap.elems tables
(_, nonColumnInputs, permissions) = unzip3 $ map mkTableInputs tablesMetadata
alignTableMap :: HashMap (TableName b) a -> HashMap (TableName b) c -> HashMap (TableName b) (a, c)
alignTableMap = M.intersectionWith (,)
-- relationships and computed fields
let nonColumnsByTable = mapFromL _nctiTable nonColumnInputs
tableCoreInfos :: HashMap (TableName b) (TableCoreInfo b) <-
(|
Inc.keyed
( \_ (tableRawInfo, nonColumnInput) -> do
let columns = _tciFieldInfoMap tableRawInfo
allFields :: FieldInfoMap (FieldInfo b) <- addNonColumnFields -< (allSources, sourceName, tablesRawInfo, columns, remoteSchemaMap, dbFunctions, nonColumnInput)
returnA -< (tableRawInfo {_tciFieldInfoMap = allFields})
)
|) (tablesRawInfo `alignTableMap` nonColumnsByTable)
tableCoreInfosDep <- Inc.newDependency -< tableCoreInfos
-- permissions
tableCache <-
(|
Inc.keyed
( \_ ((tableCoreInfo, permissionInputs), eventTriggerInfos) -> do
let tableFields = _tciFieldInfoMap tableCoreInfo
permissionInfos <-
buildTablePermissions
-<
(Proxy :: Proxy b, sourceName, tableCoreInfosDep, tableFields, permissionInputs, orderedRoles)
Role-invariant schema constructors We build the GraphQL schema by combining building blocks such as `tableSelectionSet` and `columnParser`. These building blocks individually build `{InputFields,Field,}Parser` objects. Those object specify the valid GraphQL schema. Since the GraphQL schema is role-dependent, at some point we need to know what fragment of the GraphQL schema a specific role is allowed to access, and this is stored in `{Sel,Upd,Ins,Del}PermInfo` objects. We have passed around these permission objects as function arguments to the schema building blocks since we first started dealing with permissions during the PDV refactor - see hasura/graphql-engine@5168b99e463199b1934d8645bd6cd37eddb64ae1 in hasura/graphql-engine#4111. This means that, for instance, `tableSelectionSet` has as its type: ```haskell tableSelectionSet :: forall b r m n. MonadBuildSchema b r m n => SourceName -> TableInfo b -> SelPermInfo b -> m (Parser 'Output n (AnnotatedFields b)) ``` There are three reasons to change this. 1. We often pass a `Maybe (xPermInfo b)` instead of a proper `xPermInfo b`, and it's not clear what the intended semantics of this is. Some potential improvements on the data types involved are discussed in issue hasura/graphql-engine-mono#3125. 2. In most cases we also already pass a `TableInfo b`, and together with the `MonadRole` that is usually also in scope, this means that we could look up the required permissions regardless: so passing the permissions explicitly undermines the "single source of truth" principle. Breaking this principle also makes the code more difficult to read. 3. We are working towards role-based parsers (see hasura/graphql-engine-mono#2711), where the `{InputFields,Field,}Parser` objects are constructed in a role-invariant way, so that we have a single object that can be used for all roles. In particular, this means that the schema building blocks _need_ to be constructed in a role-invariant way. While this PR doesn't accomplish that, it does reduce the amount of role-specific arguments being passed, thus fixing hasura/graphql-engine-mono#3068. Concretely, this PR simply drops the `xPermInfo b` argument from almost all schema building blocks. Instead these objects are looked up from the `TableInfo b` as-needed. The resulting code is considerably simpler and shorter. One way to interpret this change is as follows. Before this PR, we figured out permissions at the top-level in `Hasura.GraphQL.Schema`, passing down the obtained `xPermInfo` objects as required. After this PR, we have a bottom-up approach where the schema building blocks themselves decide whether they want to be included for a particular role. So this moves some permission logic out of `Hasura.GraphQL.Schema`, which is very complex. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/3608 GitOrigin-RevId: 51a744f34ec7d57bc8077667ae7f9cb9c4f6c962
2022-02-17 11:16:20 +03:00
returnA -< TableInfo tableCoreInfo permissionInfos eventTriggerInfos (mkAdminRolePermInfo tableCoreInfo)
)
|) (tableCoreInfos `alignTableMap` mapFromL _tpiTable permissions `alignTableMap` eventTriggerInfoMaps)
Resolve source customization at schema cache building time. ### Description This PR attempts to fix several issues with source customization as it relates to remote relationships. There were several issues regarding casing: at the relationship border, we didn't properly set the target source's case, we didn't have access to the list of supported features to decide whether the feature was allowed or not, and we didn't have access to the global default. However, all of that information is available when we build the schema cache, as we do resolve the case of some elements such as function names: we can therefore resolve source information at the same time, and simplify both the root of the schema and the remote relationship border. To do this, this PR introduces a new type, `ResolvedSourceCustomization`, to be used in the Schema Cache, as opposed to the metadata's `SourceCustomization`, following a pattern established by a lot of other types. ### Remaining work and open questions One major point of confusion: it seems to me that we didn't set the case at all across remote relationships, which would suggest we would use the case of the LHS source across the subset of the RHS one that is accessible through the remote relationship, which would in turn "corrupt" the parser cache and might result in the wrong case being used for that source later on. Is that assesment correct, and was I right to fix it? Another one is that we seem not to be using the local case of the RHS to name the field in an object relationship; unless I'm mistaken we only use it for array relationships? Is that intentional? This PR is also missing tests that would show-case the difference, and a changelog entry. To my knowledge, all the tests of this feature are in the python test suite; this could be the opportunity to move them to the hspec suite, but this might be a considerable amount of work? PR-URL: https://github.com/hasura/graphql-engine-mono/pull/5619 GitOrigin-RevId: 51a81b713a74575e82d9f96b51633f158ce3a47b
2022-09-12 19:05:40 +03:00
-- not forcing the evaluation here results in a measurable negative impact
-- on memory residency as measured by our benchmark
!defaultNC <- bindA -< _sccDefaultNamingConvention <$> askServerConfigCtx
!isNamingConventionEnabled <- bindA -< ((EFNamingConventions `elem`) . _sccExperimentalFeatures) <$> askServerConfigCtx
Resolve source customization at schema cache building time. ### Description This PR attempts to fix several issues with source customization as it relates to remote relationships. There were several issues regarding casing: at the relationship border, we didn't properly set the target source's case, we didn't have access to the list of supported features to decide whether the feature was allowed or not, and we didn't have access to the global default. However, all of that information is available when we build the schema cache, as we do resolve the case of some elements such as function names: we can therefore resolve source information at the same time, and simplify both the root of the schema and the remote relationship border. To do this, this PR introduces a new type, `ResolvedSourceCustomization`, to be used in the Schema Cache, as opposed to the metadata's `SourceCustomization`, following a pattern established by a lot of other types. ### Remaining work and open questions One major point of confusion: it seems to me that we didn't set the case at all across remote relationships, which would suggest we would use the case of the LHS source across the subset of the RHS one that is accessible through the remote relationship, which would in turn "corrupt" the parser cache and might result in the wrong case being used for that source later on. Is that assesment correct, and was I right to fix it? Another one is that we seem not to be using the local case of the RHS to name the field in an object relationship; unless I'm mistaken we only use it for array relationships? Is that intentional? This PR is also missing tests that would show-case the difference, and a changelog entry. To my knowledge, all the tests of this feature are in the python test suite; this could be the opportunity to move them to the hspec suite, but this might be a considerable amount of work? PR-URL: https://github.com/hasura/graphql-engine-mono/pull/5619 GitOrigin-RevId: 51a81b713a74575e82d9f96b51633f158ce3a47b
2022-09-12 19:05:40 +03:00
!namingConv <-
bindA
-<
if isNamingConventionEnabled
then getNamingCase sourceCustomization (namingConventionSupport @b) defaultNC
else pure HasuraCase
let resolvedCustomization = mkResolvedSourceCustomization sourceCustomization namingConv
-- sql functions
functionCacheMaybes <-
(|
Inc.keyed
( \_ (FunctionMetadata qf config functionPermissions comment) -> do
let systemDefined = SystemDefined False
definition = toJSON $ TrackFunction @b qf
metadataObject =
MetadataObject
( MOSourceObjId sourceName $
AB.mkAnyBackend $
SMOFunction @b qf
)
definition
schemaObject =
SOSourceObj sourceName $
AB.mkAnyBackend $
SOIFunction @b qf
addFunctionContext e = "in function " <> qf <<> ": " <> e
(|
withRecordInconsistency
( do
(functionInfo, dep) <-
bindErrorA
-< modifyErr addFunctionContext do
let funcDefs = fromMaybe [] $ M.lookup qf dbFunctions
metadataPermissions = mapFromL _fpmRole functionPermissions
permissionsMap = mkBooleanPermissionMap FunctionPermissionInfo metadataPermissions orderedRoles
rawfunctionInfo <- handleMultipleFunctions @b qf funcDefs
buildFunctionInfo sourceName qf systemDefined config permissionsMap rawfunctionInfo comment namingConv
recordDependencies -< (metadataObject, schemaObject, [dep])
returnA -< functionInfo
)
|) metadataObject
)
|) (mapFromL _fmFunction (OMap.elems functions))
let functionCache = catMaybes functionCacheMaybes
Resolve source customization at schema cache building time. ### Description This PR attempts to fix several issues with source customization as it relates to remote relationships. There were several issues regarding casing: at the relationship border, we didn't properly set the target source's case, we didn't have access to the list of supported features to decide whether the feature was allowed or not, and we didn't have access to the global default. However, all of that information is available when we build the schema cache, as we do resolve the case of some elements such as function names: we can therefore resolve source information at the same time, and simplify both the root of the schema and the remote relationship border. To do this, this PR introduces a new type, `ResolvedSourceCustomization`, to be used in the Schema Cache, as opposed to the metadata's `SourceCustomization`, following a pattern established by a lot of other types. ### Remaining work and open questions One major point of confusion: it seems to me that we didn't set the case at all across remote relationships, which would suggest we would use the case of the LHS source across the subset of the RHS one that is accessible through the remote relationship, which would in turn "corrupt" the parser cache and might result in the wrong case being used for that source later on. Is that assesment correct, and was I right to fix it? Another one is that we seem not to be using the local case of the RHS to name the field in an object relationship; unless I'm mistaken we only use it for array relationships? Is that intentional? This PR is also missing tests that would show-case the difference, and a changelog entry. To my knowledge, all the tests of this feature are in the python test suite; this could be the opportunity to move them to the hspec suite, but this might be a considerable amount of work? PR-URL: https://github.com/hasura/graphql-engine-mono/pull/5619 GitOrigin-RevId: 51a81b713a74575e82d9f96b51633f158ce3a47b
2022-09-12 19:05:40 +03:00
returnA -< AB.mkAnyBackend $ SourceInfo sourceName tableCache functionCache sourceConfig queryTagsConfig resolvedCustomization
buildAndCollectInfo ::
forall arr m.
( ArrowChoice arr,
Inc.ArrowDistribute arr,
Inc.ArrowCache m arr,
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
MonadIO m,
MonadError QErr m,
MonadReader BuildReason m,
MonadBaseControl IO m,
HasHttpManagerM m,
HasServerConfigCtx m,
MonadResolveSource m
) =>
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
(Inc.Dependency Metadata, Inc.Dependency InvalidationKeys) `arr` BuildOutputs
buildAndCollectInfo = proc (metadataDep, invalidationKeys) -> do
sources <- Inc.dependOn -< Inc.selectD #_metaSources metadataDep
remoteSchemas <- Inc.dependOn -< Inc.selectD #_metaRemoteSchemas metadataDep
customTypes <- Inc.dependOn -< Inc.selectD #_metaCustomTypes metadataDep
actions <- Inc.dependOn -< Inc.selectD #_metaActions metadataDep
inheritedRoles <- Inc.dependOn -< Inc.selectD #_metaInheritedRoles metadataDep
backendConfigs <- Inc.dependOn -< Inc.selectD #_metaBackendConfigs metadataDep
let actionRoles = map _apmRole . _amPermissions =<< OMap.elems actions
[Preview] Inherited roles for postgres read queries fixes #3868 docker image - `hasura/graphql-engine:inherited-roles-preview-48b73a2de` Note: To be able to use the inherited roles feature, the graphql-engine should be started with the env variable `HASURA_GRAPHQL_EXPERIMENTAL_FEATURES` set to `inherited_roles`. Introduction ------------ This PR implements the idea of multiple roles as presented in this [paper](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/FGALanguageICDE07.pdf). The multiple roles feature in this PR can be used via inherited roles. An inherited role is a role which can be created by combining multiple singular roles. For example, if there are two roles `author` and `editor` configured in the graphql-engine, then we can create a inherited role with the name of `combined_author_editor` role which will combine the select permissions of the `author` and `editor` roles and then make GraphQL queries using the `combined_author_editor`. How are select permissions of different roles are combined? ------------------------------------------------------------ A select permission includes 5 things: 1. Columns accessible to the role 2. Row selection filter 3. Limit 4. Allow aggregation 5. Scalar computed fields accessible to the role Suppose there are two roles, `role1` gives access to the `address` column with row filter `P1` and `role2` gives access to both the `address` and the `phone` column with row filter `P2` and we create a new role `combined_roles` which combines `role1` and `role2`. Let's say the following GraphQL query is queried with the `combined_roles` role. ```graphql query { employees { address phone } } ``` This will translate to the following SQL query: ```sql select (case when (P1 or P2) then address else null end) as address, (case when P2 then phone else null end) as phone from employee where (P1 or P2) ``` The other parameters of the select permission will be combined in the following manner: 1. Limit - Minimum of the limits will be the limit of the inherited role 2. Allow aggregations - If any of the role allows aggregation, then the inherited role will allow aggregation 3. Scalar computed fields - same as table column fields, as in the above example APIs for inherited roles: ---------------------- 1. `add_inherited_role` `add_inherited_role` is the [metadata API](https://hasura.io/docs/1.0/graphql/core/api-reference/index.html#schema-metadata-api) to create a new inherited role. It accepts two arguments `role_name`: the name of the inherited role to be added (String) `role_set`: list of roles that need to be combined (Array of Strings) Example: ```json { "type": "add_inherited_role", "args": { "role_name":"combined_user", "role_set":[ "user", "user1" ] } } ``` After adding the inherited role, the inherited role can be used like single roles like earlier Note: An inherited role can only be created with non-inherited/singular roles. 2. `drop_inherited_role` The `drop_inherited_role` API accepts the name of the inherited role and drops it from the metadata. It accepts a single argument: `role_name`: name of the inherited role to be dropped Example: ```json { "type": "drop_inherited_role", "args": { "role_name":"combined_user" } } ``` Metadata --------- The derived roles metadata will be included under the `experimental_features` key while exporting the metadata. ```json { "experimental_features": { "derived_roles": [ { "role_name": "manager_is_employee_too", "role_set": [ "employee", "manager" ] } ] } } ``` Scope ------ Only postgres queries and subscriptions are supported in this PR. Important points: ----------------- 1. All columns exposed to an inherited role will be marked as `nullable`, this is done so that cell value nullification can be done. TODOs ------- - [ ] Tests - [ ] Test a GraphQL query running with a inherited role without enabling inherited roles in experimental features - [] Tests for aggregate queries, limit, computed fields, functions, subscriptions (?) - [ ] Introspection test with a inherited role (nullability changes in a inherited role) - [ ] Docs - [ ] Changelog Co-authored-by: Vamshi Surabhi <6562944+0x777@users.noreply.github.com> GitOrigin-RevId: 3b8ee1e11f5ceca80fe294f8c074d42fbccfec63
2021-03-08 14:14:13 +03:00
remoteSchemaRoles = map _rspmRole . _rsmPermissions =<< OMap.elems remoteSchemas
sourceRoles =
HS.fromList $
concat $
OMap.elems sources >>= \(BackendSourceMetadata e) ->
AB.dispatchAnyBackend @Backend e \(SourceMetadata _ _ tables _functions _ _ _ _) -> do
table <- OMap.elems tables
pure $
OMap.keys (_tmInsertPermissions table)
<> OMap.keys (_tmSelectPermissions table)
<> OMap.keys (_tmUpdatePermissions table)
<> OMap.keys (_tmDeletePermissions table)
inheritedRoleNames = OMap.keys inheritedRoles
allRoleNames = sourceRoles <> HS.fromList (remoteSchemaRoles <> actionRoles <> inheritedRoleNames)
[Preview] Inherited roles for postgres read queries fixes #3868 docker image - `hasura/graphql-engine:inherited-roles-preview-48b73a2de` Note: To be able to use the inherited roles feature, the graphql-engine should be started with the env variable `HASURA_GRAPHQL_EXPERIMENTAL_FEATURES` set to `inherited_roles`. Introduction ------------ This PR implements the idea of multiple roles as presented in this [paper](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/FGALanguageICDE07.pdf). The multiple roles feature in this PR can be used via inherited roles. An inherited role is a role which can be created by combining multiple singular roles. For example, if there are two roles `author` and `editor` configured in the graphql-engine, then we can create a inherited role with the name of `combined_author_editor` role which will combine the select permissions of the `author` and `editor` roles and then make GraphQL queries using the `combined_author_editor`. How are select permissions of different roles are combined? ------------------------------------------------------------ A select permission includes 5 things: 1. Columns accessible to the role 2. Row selection filter 3. Limit 4. Allow aggregation 5. Scalar computed fields accessible to the role Suppose there are two roles, `role1` gives access to the `address` column with row filter `P1` and `role2` gives access to both the `address` and the `phone` column with row filter `P2` and we create a new role `combined_roles` which combines `role1` and `role2`. Let's say the following GraphQL query is queried with the `combined_roles` role. ```graphql query { employees { address phone } } ``` This will translate to the following SQL query: ```sql select (case when (P1 or P2) then address else null end) as address, (case when P2 then phone else null end) as phone from employee where (P1 or P2) ``` The other parameters of the select permission will be combined in the following manner: 1. Limit - Minimum of the limits will be the limit of the inherited role 2. Allow aggregations - If any of the role allows aggregation, then the inherited role will allow aggregation 3. Scalar computed fields - same as table column fields, as in the above example APIs for inherited roles: ---------------------- 1. `add_inherited_role` `add_inherited_role` is the [metadata API](https://hasura.io/docs/1.0/graphql/core/api-reference/index.html#schema-metadata-api) to create a new inherited role. It accepts two arguments `role_name`: the name of the inherited role to be added (String) `role_set`: list of roles that need to be combined (Array of Strings) Example: ```json { "type": "add_inherited_role", "args": { "role_name":"combined_user", "role_set":[ "user", "user1" ] } } ``` After adding the inherited role, the inherited role can be used like single roles like earlier Note: An inherited role can only be created with non-inherited/singular roles. 2. `drop_inherited_role` The `drop_inherited_role` API accepts the name of the inherited role and drops it from the metadata. It accepts a single argument: `role_name`: name of the inherited role to be dropped Example: ```json { "type": "drop_inherited_role", "args": { "role_name":"combined_user" } } ``` Metadata --------- The derived roles metadata will be included under the `experimental_features` key while exporting the metadata. ```json { "experimental_features": { "derived_roles": [ { "role_name": "manager_is_employee_too", "role_set": [ "employee", "manager" ] } ] } } ``` Scope ------ Only postgres queries and subscriptions are supported in this PR. Important points: ----------------- 1. All columns exposed to an inherited role will be marked as `nullable`, this is done so that cell value nullification can be done. TODOs ------- - [ ] Tests - [ ] Test a GraphQL query running with a inherited role without enabling inherited roles in experimental features - [] Tests for aggregate queries, limit, computed fields, functions, subscriptions (?) - [ ] Introspection test with a inherited role (nullability changes in a inherited role) - [ ] Docs - [ ] Changelog Co-authored-by: Vamshi Surabhi <6562944+0x777@users.noreply.github.com> GitOrigin-RevId: 3b8ee1e11f5ceca80fe294f8c074d42fbccfec63
2021-03-08 14:14:13 +03:00
-- roles which have some kind of permission (action/remote schema/table/function) set in the metadata
let metadataRoles = mapFromL _rRoleName $ (`Role` ParentRoles mempty) <$> toList allRoleNames
[Preview] Inherited roles for postgres read queries fixes #3868 docker image - `hasura/graphql-engine:inherited-roles-preview-48b73a2de` Note: To be able to use the inherited roles feature, the graphql-engine should be started with the env variable `HASURA_GRAPHQL_EXPERIMENTAL_FEATURES` set to `inherited_roles`. Introduction ------------ This PR implements the idea of multiple roles as presented in this [paper](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/FGALanguageICDE07.pdf). The multiple roles feature in this PR can be used via inherited roles. An inherited role is a role which can be created by combining multiple singular roles. For example, if there are two roles `author` and `editor` configured in the graphql-engine, then we can create a inherited role with the name of `combined_author_editor` role which will combine the select permissions of the `author` and `editor` roles and then make GraphQL queries using the `combined_author_editor`. How are select permissions of different roles are combined? ------------------------------------------------------------ A select permission includes 5 things: 1. Columns accessible to the role 2. Row selection filter 3. Limit 4. Allow aggregation 5. Scalar computed fields accessible to the role Suppose there are two roles, `role1` gives access to the `address` column with row filter `P1` and `role2` gives access to both the `address` and the `phone` column with row filter `P2` and we create a new role `combined_roles` which combines `role1` and `role2`. Let's say the following GraphQL query is queried with the `combined_roles` role. ```graphql query { employees { address phone } } ``` This will translate to the following SQL query: ```sql select (case when (P1 or P2) then address else null end) as address, (case when P2 then phone else null end) as phone from employee where (P1 or P2) ``` The other parameters of the select permission will be combined in the following manner: 1. Limit - Minimum of the limits will be the limit of the inherited role 2. Allow aggregations - If any of the role allows aggregation, then the inherited role will allow aggregation 3. Scalar computed fields - same as table column fields, as in the above example APIs for inherited roles: ---------------------- 1. `add_inherited_role` `add_inherited_role` is the [metadata API](https://hasura.io/docs/1.0/graphql/core/api-reference/index.html#schema-metadata-api) to create a new inherited role. It accepts two arguments `role_name`: the name of the inherited role to be added (String) `role_set`: list of roles that need to be combined (Array of Strings) Example: ```json { "type": "add_inherited_role", "args": { "role_name":"combined_user", "role_set":[ "user", "user1" ] } } ``` After adding the inherited role, the inherited role can be used like single roles like earlier Note: An inherited role can only be created with non-inherited/singular roles. 2. `drop_inherited_role` The `drop_inherited_role` API accepts the name of the inherited role and drops it from the metadata. It accepts a single argument: `role_name`: name of the inherited role to be dropped Example: ```json { "type": "drop_inherited_role", "args": { "role_name":"combined_user" } } ``` Metadata --------- The derived roles metadata will be included under the `experimental_features` key while exporting the metadata. ```json { "experimental_features": { "derived_roles": [ { "role_name": "manager_is_employee_too", "role_set": [ "employee", "manager" ] } ] } } ``` Scope ------ Only postgres queries and subscriptions are supported in this PR. Important points: ----------------- 1. All columns exposed to an inherited role will be marked as `nullable`, this is done so that cell value nullification can be done. TODOs ------- - [ ] Tests - [ ] Test a GraphQL query running with a inherited role without enabling inherited roles in experimental features - [] Tests for aggregate queries, limit, computed fields, functions, subscriptions (?) - [ ] Introspection test with a inherited role (nullability changes in a inherited role) - [ ] Docs - [ ] Changelog Co-authored-by: Vamshi Surabhi <6562944+0x777@users.noreply.github.com> GitOrigin-RevId: 3b8ee1e11f5ceca80fe294f8c074d42fbccfec63
2021-03-08 14:14:13 +03:00
resolvedInheritedRoles <- buildInheritedRoles -< (allRoleNames, OMap.elems inheritedRoles)
[Preview] Inherited roles for postgres read queries fixes #3868 docker image - `hasura/graphql-engine:inherited-roles-preview-48b73a2de` Note: To be able to use the inherited roles feature, the graphql-engine should be started with the env variable `HASURA_GRAPHQL_EXPERIMENTAL_FEATURES` set to `inherited_roles`. Introduction ------------ This PR implements the idea of multiple roles as presented in this [paper](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/FGALanguageICDE07.pdf). The multiple roles feature in this PR can be used via inherited roles. An inherited role is a role which can be created by combining multiple singular roles. For example, if there are two roles `author` and `editor` configured in the graphql-engine, then we can create a inherited role with the name of `combined_author_editor` role which will combine the select permissions of the `author` and `editor` roles and then make GraphQL queries using the `combined_author_editor`. How are select permissions of different roles are combined? ------------------------------------------------------------ A select permission includes 5 things: 1. Columns accessible to the role 2. Row selection filter 3. Limit 4. Allow aggregation 5. Scalar computed fields accessible to the role Suppose there are two roles, `role1` gives access to the `address` column with row filter `P1` and `role2` gives access to both the `address` and the `phone` column with row filter `P2` and we create a new role `combined_roles` which combines `role1` and `role2`. Let's say the following GraphQL query is queried with the `combined_roles` role. ```graphql query { employees { address phone } } ``` This will translate to the following SQL query: ```sql select (case when (P1 or P2) then address else null end) as address, (case when P2 then phone else null end) as phone from employee where (P1 or P2) ``` The other parameters of the select permission will be combined in the following manner: 1. Limit - Minimum of the limits will be the limit of the inherited role 2. Allow aggregations - If any of the role allows aggregation, then the inherited role will allow aggregation 3. Scalar computed fields - same as table column fields, as in the above example APIs for inherited roles: ---------------------- 1. `add_inherited_role` `add_inherited_role` is the [metadata API](https://hasura.io/docs/1.0/graphql/core/api-reference/index.html#schema-metadata-api) to create a new inherited role. It accepts two arguments `role_name`: the name of the inherited role to be added (String) `role_set`: list of roles that need to be combined (Array of Strings) Example: ```json { "type": "add_inherited_role", "args": { "role_name":"combined_user", "role_set":[ "user", "user1" ] } } ``` After adding the inherited role, the inherited role can be used like single roles like earlier Note: An inherited role can only be created with non-inherited/singular roles. 2. `drop_inherited_role` The `drop_inherited_role` API accepts the name of the inherited role and drops it from the metadata. It accepts a single argument: `role_name`: name of the inherited role to be dropped Example: ```json { "type": "drop_inherited_role", "args": { "role_name":"combined_user" } } ``` Metadata --------- The derived roles metadata will be included under the `experimental_features` key while exporting the metadata. ```json { "experimental_features": { "derived_roles": [ { "role_name": "manager_is_employee_too", "role_set": [ "employee", "manager" ] } ] } } ``` Scope ------ Only postgres queries and subscriptions are supported in this PR. Important points: ----------------- 1. All columns exposed to an inherited role will be marked as `nullable`, this is done so that cell value nullification can be done. TODOs ------- - [ ] Tests - [ ] Test a GraphQL query running with a inherited role without enabling inherited roles in experimental features - [] Tests for aggregate queries, limit, computed fields, functions, subscriptions (?) - [ ] Introspection test with a inherited role (nullability changes in a inherited role) - [ ] Docs - [ ] Changelog Co-authored-by: Vamshi Surabhi <6562944+0x777@users.noreply.github.com> GitOrigin-RevId: 3b8ee1e11f5ceca80fe294f8c074d42fbccfec63
2021-03-08 14:14:13 +03:00
let allRoles = resolvedInheritedRoles `M.union` metadataRoles
[Preview] Inherited roles for postgres read queries fixes #3868 docker image - `hasura/graphql-engine:inherited-roles-preview-48b73a2de` Note: To be able to use the inherited roles feature, the graphql-engine should be started with the env variable `HASURA_GRAPHQL_EXPERIMENTAL_FEATURES` set to `inherited_roles`. Introduction ------------ This PR implements the idea of multiple roles as presented in this [paper](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/FGALanguageICDE07.pdf). The multiple roles feature in this PR can be used via inherited roles. An inherited role is a role which can be created by combining multiple singular roles. For example, if there are two roles `author` and `editor` configured in the graphql-engine, then we can create a inherited role with the name of `combined_author_editor` role which will combine the select permissions of the `author` and `editor` roles and then make GraphQL queries using the `combined_author_editor`. How are select permissions of different roles are combined? ------------------------------------------------------------ A select permission includes 5 things: 1. Columns accessible to the role 2. Row selection filter 3. Limit 4. Allow aggregation 5. Scalar computed fields accessible to the role Suppose there are two roles, `role1` gives access to the `address` column with row filter `P1` and `role2` gives access to both the `address` and the `phone` column with row filter `P2` and we create a new role `combined_roles` which combines `role1` and `role2`. Let's say the following GraphQL query is queried with the `combined_roles` role. ```graphql query { employees { address phone } } ``` This will translate to the following SQL query: ```sql select (case when (P1 or P2) then address else null end) as address, (case when P2 then phone else null end) as phone from employee where (P1 or P2) ``` The other parameters of the select permission will be combined in the following manner: 1. Limit - Minimum of the limits will be the limit of the inherited role 2. Allow aggregations - If any of the role allows aggregation, then the inherited role will allow aggregation 3. Scalar computed fields - same as table column fields, as in the above example APIs for inherited roles: ---------------------- 1. `add_inherited_role` `add_inherited_role` is the [metadata API](https://hasura.io/docs/1.0/graphql/core/api-reference/index.html#schema-metadata-api) to create a new inherited role. It accepts two arguments `role_name`: the name of the inherited role to be added (String) `role_set`: list of roles that need to be combined (Array of Strings) Example: ```json { "type": "add_inherited_role", "args": { "role_name":"combined_user", "role_set":[ "user", "user1" ] } } ``` After adding the inherited role, the inherited role can be used like single roles like earlier Note: An inherited role can only be created with non-inherited/singular roles. 2. `drop_inherited_role` The `drop_inherited_role` API accepts the name of the inherited role and drops it from the metadata. It accepts a single argument: `role_name`: name of the inherited role to be dropped Example: ```json { "type": "drop_inherited_role", "args": { "role_name":"combined_user" } } ``` Metadata --------- The derived roles metadata will be included under the `experimental_features` key while exporting the metadata. ```json { "experimental_features": { "derived_roles": [ { "role_name": "manager_is_employee_too", "role_set": [ "employee", "manager" ] } ] } } ``` Scope ------ Only postgres queries and subscriptions are supported in this PR. Important points: ----------------- 1. All columns exposed to an inherited role will be marked as `nullable`, this is done so that cell value nullification can be done. TODOs ------- - [ ] Tests - [ ] Test a GraphQL query running with a inherited role without enabling inherited roles in experimental features - [] Tests for aggregate queries, limit, computed fields, functions, subscriptions (?) - [ ] Introspection test with a inherited role (nullability changes in a inherited role) - [ ] Docs - [ ] Changelog Co-authored-by: Vamshi Surabhi <6562944+0x777@users.noreply.github.com> GitOrigin-RevId: 3b8ee1e11f5ceca80fe294f8c074d42fbccfec63
2021-03-08 14:14:13 +03:00
orderedRoles <- bindA -< orderRoles $ M.elems allRoles
-- remote schemas
let remoteSchemaInvalidationKeys = Inc.selectD #_ikRemoteSchemas invalidationKeys
scaffolding for remote-schemas module The main aim of the PR is: 1. To set up a module structure for 'remote-schemas' package. 2. Move parts by the remote schema codebase into the new module structure to validate it. ## Notes to the reviewer Why a PR with large-ish diff? 1. We've been making progress on the MM project but we don't yet know long it is going to take us to get to the first milestone. To understand this better, we need to figure out the unknowns as soon as possible. Hence I've taken a stab at the first two items in the [end-state](https://gist.github.com/0x777/ca2bdc4284d21c3eec153b51dea255c9) document to figure out the unknowns. Unsurprisingly, there are a bunch of issues that we haven't discussed earlier. These are documented in the 'open questions' section. 1. The diff is large but that is only code moved around and I've added a section that documents how things are moved. In addition, there are fair number of PR comments to help with the review process. ## Changes in the PR ### Module structure Sets up the module structure as follows: ``` Hasura/ RemoteSchema/ Metadata/ Types.hs SchemaCache/ Types.hs Permission.hs RemoteRelationship.hs Build.hs MetadataAPI/ Types.hs Execute.hs ``` ### 1. Types representing metadata are moved Types that capture metadata information (currently scattered across several RQL modules) are moved into `Hasura.RemoteSchema.Metadata.Types`. - This new module only depends on very 'core' modules such as `Hasura.Session` for the notion of roles and `Hasura.Incremental` for `Cacheable` typeclass. - The requirement on database modules is avoided by generalizing the remote schemas metadata to accept an arbitrary 'r' for a remote relationship definition. ### 2. SchemaCache related types and build logic have been moved Types that represent remote schemas information in SchemaCache are moved into `Hasura.RemoteSchema.SchemaCache.Types`. Similar to `H.RS.Metadata.Types`, this module depends on 'core' modules except for `Hasura.GraphQL.Parser.Variable`. It has something to do with remote relationships but I haven't spent time looking into it. The validation of 'remote relationships to remote schema' is also something that needs to be looked at. Rips out the logic that builds remote schema's SchemaCache information from the monolithic `buildSchemaCacheRule` and moves it into `Hasura.RemoteSchema.SchemaCache.Build`. Further, the `.SchemaCache.Permission` and `.SchemaCache.RemoteRelationship` have been created from existing modules that capture schema cache building logic for those two components. This was a fair amount of work. On main, currently remote schema's SchemaCache information is built in two phases - in the first phase, 'permissions' and 'remote relationships' are ignored and in the second phase they are filled in. While remote relationships can only be resolved after partially resolving sources and other remote schemas, the same isn't true for permissions. Further, most of the work that is done to resolve remote relationships can be moved to the first phase so that the second phase can be a very simple traversal. This is the approach that was taken - resolve permissions and as much as remote relationships information in the first phase. ### 3. Metadata APIs related types and build logic have been moved The types that represent remote schema related metadata APIs and the execution logic have been moved to `Hasura.RemoteSchema.MetadataAPI.Types` and `.Execute` modules respectively. ## Open questions: 1. `Hasura.RemoteSchema.Metadata.Types` is so called because I was hoping that all of the metadata related APIs of remote schema can be brought in at `Hasura.RemoteSchema.Metadata.API`. However, as metadata APIs depended on functions from `SchemaCache` module (see [1](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L55) and [2](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L91), it made more sense to create a separate top-level module for `MetadataAPI`s. Maybe we can just have `Hasura.RemoteSchema.Metadata` and get rid of the extra nesting or have `Hasura.RemoteSchema.Metadata.{Core,Permission,RemoteRelationship}` if we want to break them down further. 1. `buildRemoteSchemas` in `H.RS.SchemaCache.Build` has the following type: ```haskell buildRemoteSchemas :: ( ArrowChoice arr, Inc.ArrowDistribute arr, ArrowWriter (Seq CollectedInfo) arr, Inc.ArrowCache m arr, MonadIO m, HasHttpManagerM m, Inc.Cacheable remoteRelationshipDefinition, ToJSON remoteRelationshipDefinition, MonadError QErr m ) => Env.Environment -> ( (Inc.Dependency (HashMap RemoteSchemaName Inc.InvalidationKey), OrderedRoles), [RemoteSchemaMetadataG remoteRelationshipDefinition] ) `arr` HashMap RemoteSchemaName (PartiallyResolvedRemoteSchemaCtxG remoteRelationshipDefinition, MetadataObject) ``` Note the dependence on `CollectedInfo` which is defined as ```haskell data CollectedInfo = CIInconsistency InconsistentMetadata | CIDependency MetadataObject -- ^ for error reporting on missing dependencies SchemaObjId SchemaDependency deriving (Eq) ``` this pretty much means that remote schemas is dependent on types from databases, actions, .... How do we fix this? Maybe introduce a typeclass such as `ArrowCollectRemoteSchemaDependencies` which is defined in `Hasura.RemoteSchema` and then implemented in graphql-engine? 1. The dependency on `buildSchemaCacheFor` in `.MetadataAPI.Execute` which has the following signature: ```haskell buildSchemaCacheFor :: (QErrM m, CacheRWM m, MetadataM m) => MetadataObjId -> MetadataModifier -> ``` This can be easily resolved if we restrict what the metadata APIs are allowed to do. Currently, they operate in an unfettered access to modify SchemaCache (the `CacheRWM` constraint): ```haskell runAddRemoteSchema :: ( QErrM m, CacheRWM m, MonadIO m, HasHttpManagerM m, MetadataM m, Tracing.MonadTrace m ) => Env.Environment -> AddRemoteSchemaQuery -> m EncJSON ``` This should instead be changed to restrict remote schema APIs to only modify remote schema metadata (but has access to the remote schemas part of the schema cache), this dependency is completely removed. ```haskell runAddRemoteSchema :: ( QErrM m, MonadIO m, HasHttpManagerM m, MonadReader RemoteSchemasSchemaCache m, MonadState RemoteSchemaMetadata m, Tracing.MonadTrace m ) => Env.Environment -> AddRemoteSchemaQuery -> m RemoteSchemeMetadataObjId ``` The idea is that the core graphql-engine would call these functions and then call `buildSchemaCacheFor`. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6291 GitOrigin-RevId: 51357148c6404afe70219afa71bd1d59bdf4ffc6
2022-10-21 06:13:07 +03:00
remoteSchemaMap <- buildRemoteSchemas env -< ((remoteSchemaInvalidationKeys, orderedRoles), OMap.elems remoteSchemas)
let remoteSchemaCtxMap = M.map fst remoteSchemaMap
!defaultNC <- bindA -< _sccDefaultNamingConvention <$> askServerConfigCtx
!isNamingConventionEnabled <- bindA -< ((EFNamingConventions `elem`) . _sccExperimentalFeatures) <$> askServerConfigCtx
let backendInvalidationKeys = Inc.selectD #_ikBackends invalidationKeys
backendCache <- resolveBackendCache -< (backendInvalidationKeys, BackendMap.elems backendConfigs)
let backendInfoAndSourceMetadata = joinBackendInfosToSources backendCache sources
-- sources are build in two steps
-- first we resolve them, and build the table cache
partiallyResolvedSourcesMaybes <-
(|
Inc.keyed
( \_ exists ->
AB.dispatchAnyBackendArrow @BackendMetadata @BackendEventTrigger
( proc (backendInfoAndSourceMetadata, (invalidationKeys, defaultNC, isNamingConventionEnabled)) -> do
let sourceMetadata = _bcasmSourceMetadata backendInfoAndSourceMetadata
sourceName = _smName sourceMetadata
sourceInvalidationsKeys = Inc.selectD #_ikSources invalidationKeys
maybeResolvedSource <- resolveSourceIfNeeded -< (sourceInvalidationsKeys, backendInfoAndSourceMetadata)
case maybeResolvedSource of
Nothing -> returnA -< Nothing
Just (source :: ResolvedSource b) -> do
let metadataInvalidationKey = Inc.selectD #_ikMetadata invalidationKeys
(tableInputs, _, _) = unzip3 $ map mkTableInputs $ OMap.elems $ _smTables sourceMetadata
!namingConv = if isNamingConventionEnabled then getNamingConvention (_smCustomization sourceMetadata) defaultNC else HasuraCase
tablesCoreInfo <-
buildTableCache
-<
( sourceName,
_rsConfig source,
_rsTables source,
tableInputs,
metadataInvalidationKey,
namingConv
)
let tablesMetadata = OMap.elems $ _smTables sourceMetadata
eventTriggers = map (_tmTable &&& OMap.elems . _tmEventTriggers) tablesMetadata
numEventTriggers = sum $ map (length . snd) eventTriggers
sourceConfig = _rsConfig source
(recreateEventTriggers, sourceCatalogMigrationState) <- initCatalogIfNeeded -< (Proxy :: Proxy b, numEventTriggers > 0, sourceConfig)
bindA -< unLogger logger (sourceName, sourceCatalogMigrationState)
let alignTableMap :: HashMap (TableName b) a -> HashMap (TableName b) c -> HashMap (TableName b) (a, c)
alignTableMap = M.intersectionWith (,)
eventTriggerInfoMaps <-
(|
Inc.keyed
( \_ (tableCoreInfo, (_, eventTriggerConfs)) ->
buildTableEventTriggers -< (sourceName, sourceConfig, tableCoreInfo, eventTriggerConfs, metadataInvalidationKey, recreateEventTriggers)
)
|) (tablesCoreInfo `alignTableMap` mapFromL fst eventTriggers)
returnA
-<
Just $
AB.mkAnyBackend @b $
PartiallyResolvedSource sourceMetadata source tablesCoreInfo eventTriggerInfoMaps
)
-<
(exists, (invalidationKeys, defaultNC, isNamingConventionEnabled))
)
|) (M.fromList $ OMap.toList backendInfoAndSourceMetadata)
let partiallyResolvedSources = catMaybes partiallyResolvedSourcesMaybes
-- then we can build the entire source output
-- we need to have the table cache of all sources to build cross-sources relationships
sourcesOutput <-
(|
Inc.keyed
( \_ exists ->
-- Note that it's a bit of a coincidence that
-- 'AB.dispatchAnyBackendArrow' accepts exactly two constraints,
-- and that we happen to want to apply to exactly two
-- constraints.
-- Ideally the function should be able to take an arbitrary
-- number of constraints.
AB.dispatchAnyBackendArrow @BackendMetadata @GetAggregationPredicatesDeps
( proc
( partiallyResolvedSource :: PartiallyResolvedSource b,
(allResolvedSources, remoteSchemaCtxMap, orderedRoles)
)
-> do
let PartiallyResolvedSource sourceMetadata resolvedSource tablesInfo eventTriggers = partiallyResolvedSource
ResolvedSource sourceConfig _sourceCustomization tablesMeta functionsMeta scalars = resolvedSource
so <-
buildSource
-<
( allResolvedSources,
sourceMetadata,
sourceConfig,
tablesInfo,
eventTriggers,
tablesMeta,
functionsMeta,
remoteSchemaCtxMap,
orderedRoles
)
returnA -< (so, BackendMap.singleton scalars)
)
-<
( exists,
(partiallyResolvedSources, remoteSchemaCtxMap, orderedRoles)
)
)
|) partiallyResolvedSources
remoteSchemaCache <-
(|
Inc.keyed
( \_ (partiallyResolvedRemoteSchemaCtx, metadataObj) -> do
let remoteSchemaIntrospection = irDoc $ _rscIntroOriginal partiallyResolvedRemoteSchemaCtx
resolvedSchemaCtx <-
(|
traverseA
( \PartiallyResolvedRemoteRelationship {..} ->
buildRemoteSchemaRemoteRelationship
-<
( (partiallyResolvedSources, remoteSchemaCtxMap),
(_rscName partiallyResolvedRemoteSchemaCtx, remoteSchemaIntrospection, _prrrTypeName, _prrrDefinition)
)
)
|) partiallyResolvedRemoteSchemaCtx
returnA -< (catMaybes $ resolvedSchemaCtx, metadataObj)
)
|) remoteSchemaMap
allow custom mutations through actions (#3042) * basic doc for actions * custom_types, sync and async actions * switch to graphql-parser-hs on github * update docs * metadata import/export * webhook calls are now supported * relationships in sync actions * initialise.sql is now in sync with the migration file * fix metadata tests * allow specifying arguments of actions * fix blacklist check on check_build_worthiness job * track custom_types and actions related tables * handlers are now triggered on async actions * default to pgjson unless a field is involved in relationships, for generating definition list * use 'true' for action filter for non admin role * fix create_action_permission sql query * drop permissions when dropping an action * add a hdb_role view (and relationships) to fetch all roles in the system * rename 'webhook' key in action definition to 'handler' * allow templating actions wehook URLs with env vars * add 'update_action' /v1/query type * allow forwarding client headers by setting `forward_client_headers` in action definition * add 'headers' configuration in action definition * handle webhook error response based on status codes * support array relationships for custom types * implement single row mutation, see https://github.com/hasura/graphql-engine/issues/3731 * single row mutation: rename 'pk_columns' -> 'columns' and no-op refactor * use top level primary key inputs for delete_by_pk & account select permissions for single row mutations * use only REST semantics to resolve the webhook response * use 'pk_columns' instead of 'columns' for update_by_pk input * add python basic tests for single row mutations * add action context (name) in webhook payload * Async action response is accessible for non admin roles only if the request session vars equals to action's * clean nulls, empty arrays for actions, custom types in export metadata * async action mutation returns only the UUID of the action * unit tests for URL template parser * Basic sync actions python tests * fix output in async query & add async tests * add admin secret header in async actions python test * document async action architecture in Resolve/Action.hs file * support actions returning array of objects * tests for list type response actions * update docs with actions and custom types metadata API reference * update actions python tests as per #f8e1330 Co-authored-by: Tirumarai Selvan <tirumarai.selvan@gmail.com> Co-authored-by: Aravind Shankar <face11301@gmail.com> Co-authored-by: Rakesh Emmadi <12475069+rakeshkky@users.noreply.github.com>
2020-02-13 20:38:23 +03:00
-- custom types
let scalarsMap = mconcat $ map snd $ M.elems sourcesOutput
sourcesCache = M.map fst sourcesOutput
maybeResolvedCustomTypes <-
(|
withRecordInconsistency
( bindErrorA -< resolveCustomTypes sourcesCache customTypes scalarsMap
)
|) (MetadataObject MOCustomTypes $ toJSON customTypes)
allow custom mutations through actions (#3042) * basic doc for actions * custom_types, sync and async actions * switch to graphql-parser-hs on github * update docs * metadata import/export * webhook calls are now supported * relationships in sync actions * initialise.sql is now in sync with the migration file * fix metadata tests * allow specifying arguments of actions * fix blacklist check on check_build_worthiness job * track custom_types and actions related tables * handlers are now triggered on async actions * default to pgjson unless a field is involved in relationships, for generating definition list * use 'true' for action filter for non admin role * fix create_action_permission sql query * drop permissions when dropping an action * add a hdb_role view (and relationships) to fetch all roles in the system * rename 'webhook' key in action definition to 'handler' * allow templating actions wehook URLs with env vars * add 'update_action' /v1/query type * allow forwarding client headers by setting `forward_client_headers` in action definition * add 'headers' configuration in action definition * handle webhook error response based on status codes * support array relationships for custom types * implement single row mutation, see https://github.com/hasura/graphql-engine/issues/3731 * single row mutation: rename 'pk_columns' -> 'columns' and no-op refactor * use top level primary key inputs for delete_by_pk & account select permissions for single row mutations * use only REST semantics to resolve the webhook response * use 'pk_columns' instead of 'columns' for update_by_pk input * add python basic tests for single row mutations * add action context (name) in webhook payload * Async action response is accessible for non admin roles only if the request session vars equals to action's * clean nulls, empty arrays for actions, custom types in export metadata * async action mutation returns only the UUID of the action * unit tests for URL template parser * Basic sync actions python tests * fix output in async query & add async tests * add admin secret header in async actions python test * document async action architecture in Resolve/Action.hs file * support actions returning array of objects * tests for list type response actions * update docs with actions and custom types metadata API reference * update actions python tests as per #f8e1330 Co-authored-by: Tirumarai Selvan <tirumarai.selvan@gmail.com> Co-authored-by: Aravind Shankar <face11301@gmail.com> Co-authored-by: Rakesh Emmadi <12475069+rakeshkky@users.noreply.github.com>
2020-02-13 20:38:23 +03:00
-- actions
let actionList = OMap.elems actions
Rewrite GraphQL schema generation and query parsing (close #2801) (#4111) Aka “the PDV refactor.” History is preserved on the branch 2801-graphql-schema-parser-refactor. * [skip ci] remove stale benchmark commit from commit_diff * [skip ci] Check for root field name conflicts between remotes * [skip ci] Additionally check for conflicts between remotes and DB * [skip ci] Check for conflicts in schema when tracking a table * [skip ci] Fix equality checking in GraphQL AST * server: fix mishandling of GeoJSON inputs in subscriptions (fix #3239) (#4551) * Add support for multiple top-level fields in a subscription to improve testability of subscriptions * Add an internal flag to enable multiple subscriptions * Add missing call to withConstructorFn in live queries (fix #3239) Co-authored-by: Alexis King <lexi.lambda@gmail.com> * Scheduled triggers (close #1914) (#3553) server: add scheduled triggers Co-authored-by: Alexis King <lexi.lambda@gmail.com> Co-authored-by: Marion Schleifer <marion@hasura.io> Co-authored-by: Karthikeyan Chinnakonda <karthikeyan@hasura.io> Co-authored-by: Aleksandra Sikora <ola.zxcvbnm@gmail.com> * dev.sh: bump version due to addition of croniter python dependency * server: fix an introspection query caching issue (fix #4547) (#4661) Introspection queries accept variables, but we need to make sure to also touch the variables that we ignore, so that an introspection query is marked not reusable if we are not able to build a correct query plan for it. A better solution here would be to deal with such unused variables correctly, so that more introspection queries become reusable. An even better solution would be to type-safely track *how* to reuse which variables, rather than to split the reusage marking from the planning. Co-authored-by: Tirumarai Selvan <tiru@hasura.io> * flush log buffer on exception in mkWaiApp ( fix #4772 ) (#4801) * flush log buffer on exception in mkWaiApp * add comment to explain the introduced change * add changelog * allow logging details of a live query polling thread (#4959) * changes for poller-log add various multiplexed query info in poller-log * minor cleanup, also fixes a bug which will return duplicate data * Live query poller stats can now be logged This also removes in-memory stats that are collected about batched query execution as the log lines when piped into an monitoring tool will give us better insights. * allow poller-log to be configurable * log minimal information in the livequery-poller-log Other information can be retrieved from /dev/subscriptions/extended * fix few review comments * avoid marshalling and unmarshalling from ByteString to EncJSON * separate out SubscriberId and SubscriberMetadata Co-authored-by: Anon Ray <rayanon004@gmail.com> * Don't compile in developer APIs by default * Tighten up handling of admin secret, more docs Store the admin secret only as a hash to prevent leaking the secret inadvertently, and to prevent timing attacks on the secret. NOTE: best practice for stored user passwords is a function with a tunable cost like bcrypt, but our threat model is quite different (even if we thought we could reasonably protect the secret from an attacker who could read arbitrary regions of memory), and bcrypt is far too slow (by design) to perform on each request. We'd have to rely on our (technically savvy) users to choose high entropy passwords in any case. Referencing #4736 * server/docs: add instructions to fix loss of float precision in PostgreSQL <= 11 (#5187) This adds a server flag, --pg-connection-options, that can be used to set a PostgreSQL connection parameter, extra_float_digits, that needs to be used to avoid loss of data on older versions of PostgreSQL, which have odd default behavior when returning float values. (fixes #5092) * [skip ci] Add new commits from master to the commit diff * [skip ci] serve default directives (skip & include) over introspection * [skip ci] Update non-Haskell assets with the version on master * server: refactor GQL execution check and config API (#5094) Co-authored-by: Vamshi Surabhi <vamshi@hasura.io> Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * [skip ci] fix js issues in tests by pinning dependencies version * [skip ci] bump graphql version * [skip ci] Add note about memory usage * generalize query execution logic on Postgres (#5110) * generalize PGExecCtx to support specialized functions for various operations * fix tests compilation * allow customising PGExecCtx when starting the web server * server: changes catalog initialization and logging for pro customization (#5139) * new typeclass to abstract the logic of QueryLog-ing * abstract the logic of logging websocket-server logs introduce a MonadWSLog typeclass * move catalog initialization to init step expose a helper function to migrate catalog create schema cache in initialiseCtx * expose various modules and functions for pro * [skip ci] cosmetic change * [skip ci] fix test calling a mutation that does not exist * [skip ci] minor text change * [skip ci] refactored input values * [skip ci] remove VString Origin * server: fix updating of headers behaviour in the update cron trigger API and create future events immediately (#5151) * server: fix bug to update headers in an existing cron trigger and create future events Co-authored-by: Tirumarai Selvan <tiru@hasura.io> * Lower stack chunk size in RTS to reduce thread STACK memory (closes #5190) This reduces memory consumption for new idle subscriptions significantly (see linked ticket). The hypothesis is: we fork a lot of threads per websocket, and some of these use slightly more than the initial 1K stack size, so the first overflow balloons to 32K, when significantly less is required. However: running with `+RTS -K1K -xc` did not seem to show evidence of any overflows! So it's a mystery why this improves things. GHC should probably also be doubling the stack buffer at each overflow or doing something even smarter; the knobs we have aren't so helpful. * [skip ci] fix todo and schema generation for aggregate fields * 5087 libpq pool leak (#5089) Shrink libpq buffers to 1MB before returning connection to pool. Closes #5087 See: https://github.com/hasura/pg-client-hs/pull/19 Also related: #3388 #4077 * bump pg-client-hs version (fixes a build issue on some environments) (#5267) * do not use prepared statements for mutations * server: unlock scheduled events on graceful shutdown (#4928) * Fix buggy parsing of new --conn-lifetime flag in 2b0e3774 * [skip ci] remove cherry-picked commit from commit_diff.txt * server: include additional fields in scheduled trigger webhook payload (#5262) * include scheduled triggers metadata in the webhook body Co-authored-by: Tirumarai Selvan <tiru@hasura.io> * server: call the webhook asynchronously in event triggers (#5352) * server: call the webhook asynchronosly in event triggers * Expose all modules in Cabal file (#5371) * [skip ci] update commit_diff.txt * [skip ci] fix cast exp parser & few TODOs * [skip ci] fix remote fields arguments * [skip ci] fix few more TODO, no-op refactor, move resolve/action.hs to execute/action.hs * Pass environment variables around as a data structure, via @sordina (#5374) * Pass environment variables around as a data structure, via @sordina * Resolving build error * Adding Environment passing note to changelog * Removing references to ILTPollerLog as this seems to have been reintroduced from a bad merge * removing commented-out imports * Language pragmas already set by project * Linking async thread * Apply suggestions from code review Use `runQueryTx` instead of `runLazyTx` for queries. * remove the non-user facing entry in the changelog Co-authored-by: Phil Freeman <paf31@cantab.net> Co-authored-by: Phil Freeman <phil@hasura.io> Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * [skip ci] fix: restrict remote relationship field generation for hasura queries * [skip ci] no-op refactor; move insert execution code from schema parser module * server: call the webhook asynchronously in event triggers (#5352) * server: call the webhook asynchronosly in event triggers * Expose all modules in Cabal file (#5371) * [skip ci] update commit_diff.txt * Pass environment variables around as a data structure, via @sordina (#5374) * Pass environment variables around as a data structure, via @sordina * Resolving build error * Adding Environment passing note to changelog * Removing references to ILTPollerLog as this seems to have been reintroduced from a bad merge * removing commented-out imports * Language pragmas already set by project * Linking async thread * Apply suggestions from code review Use `runQueryTx` instead of `runLazyTx` for queries. * remove the non-user facing entry in the changelog Co-authored-by: Phil Freeman <paf31@cantab.net> Co-authored-by: Phil Freeman <phil@hasura.io> Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * [skip ci] implement header checking Probably closes #14 and #3659. * server: refactor 'pollQuery' to have a hook to process 'PollDetails' (#5391) Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * update pg-client (#5421) * [skip ci] update commit_diff * Fix latency buckets for telemetry data These must have gotten messed up during a refactor. As a consequence almost all samples received so far fall into the single erroneous 0 to 1K seconds (originally supposed to be 1ms?) bucket. I also re-thought what the numbers should be, but these are still arbitrary and might want adjusting in the future. * [skip ci] include the latest commit compared against master in commit_diff * [skip ci] include new commits from master in commit_diff * [skip ci] improve description generation * [skip ci] sort all introspect arrays * [skip ci] allow parsers to specify error codes * [skip ci] fix integer and float parsing error code * [skip ci] scalar from json errors are now parse errors * [skip ci] fixed negative integer error message and code * [skip ci] Re-fix nullability in relationships * [skip ci] no-op refactor and removed couple of FIXMEs * [skip ci] uncomment code in 'deleteMetadataObject' * [skip ci] Fix re-fix of nullability for relationships * [skip ci] fix default arguments error code * [skip ci] updated test error message !!! WARNING !!! Since all fields accept `null`, they all are technically optional in the new schema. Meaning there's no such thing as a missing mandatory field anymore: a field that doesn't have a default value, and which therefore isn't labelled as "optional" in the schema, will be assumed to be null if it's missing, meaning it isn't possible anymore to have an error for a missing mandatory field. The only possible error is now when a optional positional argument is omitted but is not the last positional argument. * [skip ci] cleanup of int scalar parser * [skip ci] retro-compatibility of offset as string * [skip ci] Remove commit from commit_diff.txt Although strictly speaking we don't know if this will work correctly in PDV if we would implement query plan caching, the fact is that in the theoretical case that we would have the same issue in PDV, it would probably apply not just to introspection, and the fix would be written completely differently. So this old commit is of no value to us other than the heads-up "make sure query plan caching works correctly even in the presence of unused variables", which is already part of the test suite. * Add MonadTrace and MonadExecuteQuery abstractions (#5383) * [skip ci] Fix accumulation of input object types Just like object types, interface types, and union types, we have to avoid circularities when collecting input types from the GraphQL AST. Additionally, this fixes equality checks for input object types (whose fields are unordered, and hence should be compared as sets) and enum types (ditto). * [skip ci] fix fragment error path * [skip ci] fix node error code * [skip ci] fix paths in insert queries * [skip ci] fix path in objects * [skip ci] manually alter node id path for consistency * [skip ci] more node error fixups * [skip ci] one last relay error message fix * [skip ci] update commit_diff * Propagate the trace context to event triggers (#5409) * Propagate the trace context to event triggers * Handle missing trace and span IDs * Store trace context as one LOCAL * Add migrations * Documentation * changelog * Fix warnings * Respond to code review suggestions * Respond to code review * Undo changelog * Update CHANGELOG.md Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * server: log request/response sizes for event triggers (#5463) * server: log request/response sizes for event triggers event triggers (and scheduled triggers) now have request/response size in their logs. * add changelog entry * Tracing: Simplify HTTP traced request (#5451) Remove the Inversion of Control (SuspendRequest) and simplify the tracing of HTTP Requests. Co-authored-by: Phil Freeman <phil@hasura.io> * Attach request ID as tracing metadata (#5456) * Propagate the trace context to event triggers * Handle missing trace and span IDs * Store trace context as one LOCAL * Add migrations * Documentation * Include the request ID as trace metadata * changelog * Fix warnings * Respond to code review suggestions * Respond to code review * Undo changelog * Update CHANGELOG.md * Typo Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * server: add logging for action handlers (#5471) * server: add logging for action handlers * add changelog entry * change action-handler log type from internal to non-internal * fix action-handler-log name * server: pass http and websocket request to logging context (#5470) * pass request body to logging context in all cases * add message size logging on the websocket API this is required by graphql-engine-pro/#416 * message size logging on websocket API As we need to log all messages recieved/sent by the websocket server, it makes sense to log them as part of the websocket server event logs. Previously message recieved were logged inside the onMessage handler, and messages sent were logged only for "data" messages (as a server event log) * fix review comments Co-authored-by: Phil Freeman <phil@hasura.io> * server: stop eventing subsystem threads when shutting down (#5479) * server: stop eventing subsystem threads when shutting down * Apply suggestions from code review Co-authored-by: Karthikeyan Chinnakonda <chkarthikeyan95@gmail.com> Co-authored-by: Phil Freeman <phil@hasura.io> Co-authored-by: Phil Freeman <paf31@cantab.net> Co-authored-by: Karthikeyan Chinnakonda <chkarthikeyan95@gmail.com> * [skip ci] update commit_diff with new commits added in master * Bugfix to support 0-size HASURA_GRAPHQL_QUERY_PLAN_CACHE_SIZE Also some minor refactoring of bounded cache module: - the maxBound check in `trim` was confusing and unnecessary - consequently trim was unnecessary for lookupPure Also add some basic tests * Support only the bounded cache, with default HASURA_GRAPHQL_QUERY_PLAN_CACHE_SIZE of 4000. Closes #5363 * [skip ci] remove merge commit from commit_diff * server: Fix compiler warning caused by GHC upgrade (#5489) Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * [skip ci] update all non server code from master * [skip ci] aligned object field error message with master * [skip ci] fix remaining undefined? * [skip ci] remove unused import * [skip ci] revert to previous error message, fix tests * Move nullableType/nonNullableType to Schema.hs These are functions on Types, not on Parsers. * [skip ci] fix setup to fix backend only test the order in which permission checks are performed on the branch is slightly different than on master, resulting in a slightly different error if there are no other mutations the user has access to. By adding update permissions, we go back to the expected case. * [skip ci] fix insert geojson tests to reflect new paths * [skip ci] fix enum test for better error message * [skip ci] fix header test for better error message * [skip ci] fix fragment cycle test for better error message * [skip ci] fix error message for type mismatch * [skip ci] fix variable path in test * [skip ci] adjust tests after bug fix * [skip ci] more tests fixing * Add hdb_catalog.current_setting abstraction for reading Hasura settings As the comment in the function’s definition explains, this is needed to work around an awkward Postgres behavior. * [skip ci] Update CONTRIBUTING.md to mention Node setup for Python tests * [skip ci] Add missing Python tests env var to CONTRIBUTING.md * [skip ci] fix order of result when subscription is run with multiple nodes * [skip ci] no-op refactor: fix a warning in Internal/Parser.hs * [skip ci] throw error when a subscription contains remote joins * [skip ci] Enable easier profiling by hiding AssertNF behind a flag In order to compile a profiling build, run: $ cabal new-build -f profiling --enable-profiling * [skip ci] Fix two warnings We used to lookup the objects that implement a given interface by filtering all objects in the schema document. However, one of the tests expects us to generate a warning if the provided `implements` field of an introspection query specifies an object not implementing some interface. So we use that field instead. * [skip ci] Fix warnings by commenting out query plan caching * [skip ci] improve masking/commenting query caching related code & few warning fixes * [skip ci] Fixed compiler warnings in graphql-parser-hs * Sync non-Haskell assets with master * [skip ci] add a test inserting invalid GraphQL but valid JSON value in a jsonb column * [skip ci] Avoid converting to/from Map * [skip ci] Apply some hlint suggestions * [skip ci] remove redundant constraints from buildLiveQueryPlan and explainGQLQuery * [skip ci] add NOTEs about missing Tracing constraints in PDV from master * Remove -fdefer-typed-holes, fix warnings * Update cabal.project.freeze * Limit GHC’s heap size to 8GB in CI to avoid the OOM killer * Commit package-lock.json for Python tests’ remote schema server * restrict env variables start with HASURA_GRAPHQL_ for headers configuration in actions, event triggers & remote schemas (#5519) * restrict env variables start with HASURA_GRAPHQL_ for headers definition in actions & event triggers * update CHANGELOG.md * Apply suggestions from code review Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * add test for table_by_pk node when roles doesn't have permission to PK * [skip ci] fix introspection query if any enum column present in primary key (fix #5200) (#5522) * [skip ci] test case fix for a6450e126bc2d98bcfd3791501986e4627ce6c6f * [skip ci] add tests to agg queries when role doesn't have access to any cols * fix backend test * Simplify subscription execution * [skip ci] add test to check if required headers are present while querying * Suppose, table B is related to table A and to query B certain headers are necessary, then the test checks that we are throwing error when the header is not set when B is queried through A * fix mutations not checking for view mutability * [skip ci] add variable type checking and corresponding tests * [skip ci] add test to check if update headers are present while doing an upsert * [skip ci] add positive counterparts to some of the negative permission tests * fix args missing their description in introspect * [skip ci] Remove unused function; insert missing markNotReusable call * [skip ci] Add a Note about InputValue * [skip ci] Delete LegacySchema/ 🎉 * [skip ci] Delete GraphQL/{Resolve,Validate}/ 🎉 * [skip ci] Delete top-level Resolve/Validate modules; tidy .cabal file * [skip ci] Delete LegacySchema top-level module Somehow I missed this one. * fix input value to json * [skip ci] elaborate on JSON objects in GraphQL * [skip ci] add missing file * [skip ci] add a test with subscription containing remote joins * add a test with remote joins in mutation output * [skip ci] Add some comments to Schema/Mutation.hs * [skip ci] Remove no longer needed code from RemoteServer.hs * [skip ci] Use a helper function to generate conflict clause parsers * [skip ci] fix type checker error in fields with default value * capitalize the header keys in select_articles_without_required_headers * Somehow, this was the reason the tests were failing. I have no idea, why! * [skip ci] Add a long Note about optional fields and nullability * Improve comments a bit; simplify Schema/Common.hs a bit * [skip ci] full implementation of 5.8.5 type checking. * [skip ci] fix validation test teardown * [skip ci] fix schema stitching test * fix remote schema ignoring enum nullability * [skip ci] fix fieldOptional to not discard nullability * revert nullability of use_spheroid * fix comment * add required remote fields with arguments for tests * [skip ci] add missing docstrings * [skip ci] fixed description of remote fields * [skip ci] change docstring for consistency * fix several schema inconsistencies * revert behaviour change in function arguments parsing * fix remaining nullability issues in new schema * minor no-op refactor; use isListType from graphql-parser-hs * use nullability of remote schema node, while creating a Remote reln * fix 'ID' input coercing & action 'ID' type relationship mapping * include ASTs in MonadExecuteQuery * needed for PRO code-base * Delete code for "interfaces implementing ifaces" (draft GraphQL spec) Previously I started writing some code that adds support for a future GraphQL feature where interfaces may themselves be sub-types of other interfaces. However, this code was incomplete, and partially incorrect. So this commit deletes support for that entirely. * Ignore a remote schema test during the upgrade/downgrade test The PDV refactor does a better job at exposing a minimal set of types through introspection. In particular, not every type that is present in a remote schema is re-exposed by Hasura. The test test_schema_stitching.py::TestRemoteSchemaBasic::test_introspection assumed that all types were re-exposed, which is not required for GraphQL compatibility, in order to test some aspect of our support for remote schemas. So while this particular test has been updated on PDV, the PDV branch now does not pass the old test, which we argue to be incorrect. Hence this test is disabled while we await a release, after which we can re-enable it. This also re-enables a test that was previously disabled for similar, though unrelated, reasons. * add haddock documentation to the action's field parsers * Deslecting some tests in server-upgrade Some tests with current build are failing on server upgrade which it should not. The response is more accurate than what it was. Also the upgrade tests were not throwing errors when the test is expected to return an error, but succeeds. The test framework is patched to catch this case. * [skip ci] Add a long Note about interfaces and object types * send the response headers back to client after running a query * Deselect a few more tests during upgrade/downgrade test * Update commit_diff.txt * change log kind from db_migrate to catalog_migrate (#5531) * Show method and complete URI in traced HTTP calls (#5525) Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * restrict env variables start with HASURA_GRAPHQL_ for headers configuration in actions, event triggers & remote schemas (#5519) * restrict env variables start with HASURA_GRAPHQL_ for headers definition in actions & event triggers * update CHANGELOG.md * Apply suggestions from code review Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * fix introspection query if any enum column present in primary key (fix #5200) (#5522) * Fix telemetry reporting of transport (websocket was reported as http) * add log kinds in cli-migrations image (#5529) * add log kinds in cli-migrations image * give hint to resolve timeout error * minor changes and CHANGELOG * server: set hasura.tracecontext in RQL mutations [#5542] (#5555) * server: set hasura.tracecontext in RQL mutations [#5542] * Update test suite Co-authored-by: Tirumarai Selvan <tiru@hasura.io> * Add bulldozer auto-merge and -update configuration We still need to add the github app (as of time of opening this PR) Afterwards devs should be able to allow bulldozer to automatically "update" the branch, merging in parent when it changes, as well as automatically merge when all checks pass. This is opt-in by adding the `auto-update-auto-merge` label to the PR. * Remove 'bulldozer' config, try 'kodiak' for auto-merge see: https://github.com/chdsbd/kodiak The main issue that bit us was not being able to auto update forked branches, also: https://github.com/palantir/bulldozer/issues/66 https://github.com/palantir/bulldozer/issues/145 * Cherry-picked all commits * [skip ci] Slightly improve formatting * Revert "fix introspection query if any enum column present in primary key (fix #5200) (#5522)" This reverts commit 0f9a5afa59a88f6824f4d63d58db246a5ba3fb03. This undoes a cherry-pick of 34288e1eb5f2c5dad9e6d1e05453dd52397dc970 that was already done previously in a6450e126bc2d98bcfd3791501986e4627ce6c6f, and subsequently fixed for PDV in 70e89dc250f8ddc6e2b7930bbe2b3eeaa6dbe1db * Do a small bit of tidying in Hasura.GraphQL.Parser.Collect * Fix cherry-picking work Some previous cherry-picks ended up modifying code that is commented out * [skip ci] clarified comment regarding insert representation * [skip ci] removed obsolete todos * cosmetic change * fix action error message * [skip ci] remove obsolete comment * [skip ci] synchronize stylish haskell extensions list * use previously defined scalar names in parsers rather than ad-hoc literals * Apply most syntax hlint hints. * Clarify comment on update mutation. * [skip ci] Clarify what fields should be specified for objects * Update "_inc" description. * Use record types rather than tuples fo IntrospectionResult and ParsedIntrospection * Get rid of checkFieldNamesUnique (use Data.List.Extended.duplicates) * Throw more errors when collecting query root names * [skip ci] clean column parser comment * Remove dead code inserted in ab65b39 * avoid converting to non-empty list where not needed * add note and TODO about the disabled checks in PDV * minor refactor in remoteField' function * Unify two getObject methods * Nitpicks in Remote.hs * Update CHANGELOG.md * Revert "Unify two getObject methods" This reverts commit bd6bb40355b3d189a46c0312eb52225e18be57b3. We do need two different getObject functions as the corresponding error message is different * Fix error message in Remote.hs * Update CHANGELOG.md Co-authored-by: Auke Booij <auke@tulcod.com> * Apply suggested Changelog fix. Co-authored-by: Auke Booij <auke@tulcod.com> * Fix typo in Changelog. * [skip ci] Update changelog. * reuse type names to avoid duplication * Fix Hashable instance for Definition The presence of `Maybe Unique`, and an optional description, as part of `Definition`s, means that `Definition`s that are considered `Eq`ual may get different hashes. This can happen, for instance, when one object is memoized but another is not. * [skip ci] Update commit_diff.txt * Bump parser version. * Bump freeze file after changes in parser. * [skip ci] Incorporate commits from master * Fix developer flag in server/cabal.project.freeze Co-authored-by: Auke Booij <auke@tulcod.com> * Deselect a changed ENUM test for upgrade/downgrade CI * Deselect test here as well * [skip ci] remove dead code * Disable more tests for upgrade/downgrade * Fix which test gets deselected * Revert "Add hdb_catalog.current_setting abstraction for reading Hasura settings" This reverts commit 66e85ab9fbd56cca2c28a80201f6604fbe811b85. * Remove circular reference in cabal.project.freeze Co-authored-by: Karthikeyan Chinnakonda <karthikeyan@hasura.io> Co-authored-by: Auke Booij <auke@hasura.io> Co-authored-by: Tirumarai Selvan <tiru@hasura.io> Co-authored-by: Marion Schleifer <marion@hasura.io> Co-authored-by: Aleksandra Sikora <ola.zxcvbnm@gmail.com> Co-authored-by: Brandon Simmons <brandon.m.simmons@gmail.com> Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> Co-authored-by: Anon Ray <rayanon004@gmail.com> Co-authored-by: rakeshkky <12475069+rakeshkky@users.noreply.github.com> Co-authored-by: Anon Ray <ecthiender@users.noreply.github.com> Co-authored-by: Vamshi Surabhi <vamshi@hasura.io> Co-authored-by: Antoine Leblanc <antoine@hasura.io> Co-authored-by: Brandon Simmons <brandon@hasura.io> Co-authored-by: Phil Freeman <phil@hasura.io> Co-authored-by: Lyndon Maydwell <lyndon@sordina.net> Co-authored-by: Phil Freeman <paf31@cantab.net> Co-authored-by: Naveen Naidu <naveennaidu479@gmail.com> Co-authored-by: Karthikeyan Chinnakonda <chkarthikeyan95@gmail.com> Co-authored-by: Nizar Malangadan <nizar-m@users.noreply.github.com> Co-authored-by: Antoine Leblanc <crucuny@gmail.com> Co-authored-by: Auke Booij <auke@tulcod.com>
2020-08-21 20:27:01 +03:00
(actionCache, annotatedCustomTypes) <- case maybeResolvedCustomTypes of
Just resolvedCustomTypes -> do
actionCache' <- buildActions -< ((resolvedCustomTypes, scalarsMap, orderedRoles), actionList)
Rewrite GraphQL schema generation and query parsing (close #2801) (#4111) Aka “the PDV refactor.” History is preserved on the branch 2801-graphql-schema-parser-refactor. * [skip ci] remove stale benchmark commit from commit_diff * [skip ci] Check for root field name conflicts between remotes * [skip ci] Additionally check for conflicts between remotes and DB * [skip ci] Check for conflicts in schema when tracking a table * [skip ci] Fix equality checking in GraphQL AST * server: fix mishandling of GeoJSON inputs in subscriptions (fix #3239) (#4551) * Add support for multiple top-level fields in a subscription to improve testability of subscriptions * Add an internal flag to enable multiple subscriptions * Add missing call to withConstructorFn in live queries (fix #3239) Co-authored-by: Alexis King <lexi.lambda@gmail.com> * Scheduled triggers (close #1914) (#3553) server: add scheduled triggers Co-authored-by: Alexis King <lexi.lambda@gmail.com> Co-authored-by: Marion Schleifer <marion@hasura.io> Co-authored-by: Karthikeyan Chinnakonda <karthikeyan@hasura.io> Co-authored-by: Aleksandra Sikora <ola.zxcvbnm@gmail.com> * dev.sh: bump version due to addition of croniter python dependency * server: fix an introspection query caching issue (fix #4547) (#4661) Introspection queries accept variables, but we need to make sure to also touch the variables that we ignore, so that an introspection query is marked not reusable if we are not able to build a correct query plan for it. A better solution here would be to deal with such unused variables correctly, so that more introspection queries become reusable. An even better solution would be to type-safely track *how* to reuse which variables, rather than to split the reusage marking from the planning. Co-authored-by: Tirumarai Selvan <tiru@hasura.io> * flush log buffer on exception in mkWaiApp ( fix #4772 ) (#4801) * flush log buffer on exception in mkWaiApp * add comment to explain the introduced change * add changelog * allow logging details of a live query polling thread (#4959) * changes for poller-log add various multiplexed query info in poller-log * minor cleanup, also fixes a bug which will return duplicate data * Live query poller stats can now be logged This also removes in-memory stats that are collected about batched query execution as the log lines when piped into an monitoring tool will give us better insights. * allow poller-log to be configurable * log minimal information in the livequery-poller-log Other information can be retrieved from /dev/subscriptions/extended * fix few review comments * avoid marshalling and unmarshalling from ByteString to EncJSON * separate out SubscriberId and SubscriberMetadata Co-authored-by: Anon Ray <rayanon004@gmail.com> * Don't compile in developer APIs by default * Tighten up handling of admin secret, more docs Store the admin secret only as a hash to prevent leaking the secret inadvertently, and to prevent timing attacks on the secret. NOTE: best practice for stored user passwords is a function with a tunable cost like bcrypt, but our threat model is quite different (even if we thought we could reasonably protect the secret from an attacker who could read arbitrary regions of memory), and bcrypt is far too slow (by design) to perform on each request. We'd have to rely on our (technically savvy) users to choose high entropy passwords in any case. Referencing #4736 * server/docs: add instructions to fix loss of float precision in PostgreSQL <= 11 (#5187) This adds a server flag, --pg-connection-options, that can be used to set a PostgreSQL connection parameter, extra_float_digits, that needs to be used to avoid loss of data on older versions of PostgreSQL, which have odd default behavior when returning float values. (fixes #5092) * [skip ci] Add new commits from master to the commit diff * [skip ci] serve default directives (skip & include) over introspection * [skip ci] Update non-Haskell assets with the version on master * server: refactor GQL execution check and config API (#5094) Co-authored-by: Vamshi Surabhi <vamshi@hasura.io> Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * [skip ci] fix js issues in tests by pinning dependencies version * [skip ci] bump graphql version * [skip ci] Add note about memory usage * generalize query execution logic on Postgres (#5110) * generalize PGExecCtx to support specialized functions for various operations * fix tests compilation * allow customising PGExecCtx when starting the web server * server: changes catalog initialization and logging for pro customization (#5139) * new typeclass to abstract the logic of QueryLog-ing * abstract the logic of logging websocket-server logs introduce a MonadWSLog typeclass * move catalog initialization to init step expose a helper function to migrate catalog create schema cache in initialiseCtx * expose various modules and functions for pro * [skip ci] cosmetic change * [skip ci] fix test calling a mutation that does not exist * [skip ci] minor text change * [skip ci] refactored input values * [skip ci] remove VString Origin * server: fix updating of headers behaviour in the update cron trigger API and create future events immediately (#5151) * server: fix bug to update headers in an existing cron trigger and create future events Co-authored-by: Tirumarai Selvan <tiru@hasura.io> * Lower stack chunk size in RTS to reduce thread STACK memory (closes #5190) This reduces memory consumption for new idle subscriptions significantly (see linked ticket). The hypothesis is: we fork a lot of threads per websocket, and some of these use slightly more than the initial 1K stack size, so the first overflow balloons to 32K, when significantly less is required. However: running with `+RTS -K1K -xc` did not seem to show evidence of any overflows! So it's a mystery why this improves things. GHC should probably also be doubling the stack buffer at each overflow or doing something even smarter; the knobs we have aren't so helpful. * [skip ci] fix todo and schema generation for aggregate fields * 5087 libpq pool leak (#5089) Shrink libpq buffers to 1MB before returning connection to pool. Closes #5087 See: https://github.com/hasura/pg-client-hs/pull/19 Also related: #3388 #4077 * bump pg-client-hs version (fixes a build issue on some environments) (#5267) * do not use prepared statements for mutations * server: unlock scheduled events on graceful shutdown (#4928) * Fix buggy parsing of new --conn-lifetime flag in 2b0e3774 * [skip ci] remove cherry-picked commit from commit_diff.txt * server: include additional fields in scheduled trigger webhook payload (#5262) * include scheduled triggers metadata in the webhook body Co-authored-by: Tirumarai Selvan <tiru@hasura.io> * server: call the webhook asynchronously in event triggers (#5352) * server: call the webhook asynchronosly in event triggers * Expose all modules in Cabal file (#5371) * [skip ci] update commit_diff.txt * [skip ci] fix cast exp parser & few TODOs * [skip ci] fix remote fields arguments * [skip ci] fix few more TODO, no-op refactor, move resolve/action.hs to execute/action.hs * Pass environment variables around as a data structure, via @sordina (#5374) * Pass environment variables around as a data structure, via @sordina * Resolving build error * Adding Environment passing note to changelog * Removing references to ILTPollerLog as this seems to have been reintroduced from a bad merge * removing commented-out imports * Language pragmas already set by project * Linking async thread * Apply suggestions from code review Use `runQueryTx` instead of `runLazyTx` for queries. * remove the non-user facing entry in the changelog Co-authored-by: Phil Freeman <paf31@cantab.net> Co-authored-by: Phil Freeman <phil@hasura.io> Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * [skip ci] fix: restrict remote relationship field generation for hasura queries * [skip ci] no-op refactor; move insert execution code from schema parser module * server: call the webhook asynchronously in event triggers (#5352) * server: call the webhook asynchronosly in event triggers * Expose all modules in Cabal file (#5371) * [skip ci] update commit_diff.txt * Pass environment variables around as a data structure, via @sordina (#5374) * Pass environment variables around as a data structure, via @sordina * Resolving build error * Adding Environment passing note to changelog * Removing references to ILTPollerLog as this seems to have been reintroduced from a bad merge * removing commented-out imports * Language pragmas already set by project * Linking async thread * Apply suggestions from code review Use `runQueryTx` instead of `runLazyTx` for queries. * remove the non-user facing entry in the changelog Co-authored-by: Phil Freeman <paf31@cantab.net> Co-authored-by: Phil Freeman <phil@hasura.io> Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * [skip ci] implement header checking Probably closes #14 and #3659. * server: refactor 'pollQuery' to have a hook to process 'PollDetails' (#5391) Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * update pg-client (#5421) * [skip ci] update commit_diff * Fix latency buckets for telemetry data These must have gotten messed up during a refactor. As a consequence almost all samples received so far fall into the single erroneous 0 to 1K seconds (originally supposed to be 1ms?) bucket. I also re-thought what the numbers should be, but these are still arbitrary and might want adjusting in the future. * [skip ci] include the latest commit compared against master in commit_diff * [skip ci] include new commits from master in commit_diff * [skip ci] improve description generation * [skip ci] sort all introspect arrays * [skip ci] allow parsers to specify error codes * [skip ci] fix integer and float parsing error code * [skip ci] scalar from json errors are now parse errors * [skip ci] fixed negative integer error message and code * [skip ci] Re-fix nullability in relationships * [skip ci] no-op refactor and removed couple of FIXMEs * [skip ci] uncomment code in 'deleteMetadataObject' * [skip ci] Fix re-fix of nullability for relationships * [skip ci] fix default arguments error code * [skip ci] updated test error message !!! WARNING !!! Since all fields accept `null`, they all are technically optional in the new schema. Meaning there's no such thing as a missing mandatory field anymore: a field that doesn't have a default value, and which therefore isn't labelled as "optional" in the schema, will be assumed to be null if it's missing, meaning it isn't possible anymore to have an error for a missing mandatory field. The only possible error is now when a optional positional argument is omitted but is not the last positional argument. * [skip ci] cleanup of int scalar parser * [skip ci] retro-compatibility of offset as string * [skip ci] Remove commit from commit_diff.txt Although strictly speaking we don't know if this will work correctly in PDV if we would implement query plan caching, the fact is that in the theoretical case that we would have the same issue in PDV, it would probably apply not just to introspection, and the fix would be written completely differently. So this old commit is of no value to us other than the heads-up "make sure query plan caching works correctly even in the presence of unused variables", which is already part of the test suite. * Add MonadTrace and MonadExecuteQuery abstractions (#5383) * [skip ci] Fix accumulation of input object types Just like object types, interface types, and union types, we have to avoid circularities when collecting input types from the GraphQL AST. Additionally, this fixes equality checks for input object types (whose fields are unordered, and hence should be compared as sets) and enum types (ditto). * [skip ci] fix fragment error path * [skip ci] fix node error code * [skip ci] fix paths in insert queries * [skip ci] fix path in objects * [skip ci] manually alter node id path for consistency * [skip ci] more node error fixups * [skip ci] one last relay error message fix * [skip ci] update commit_diff * Propagate the trace context to event triggers (#5409) * Propagate the trace context to event triggers * Handle missing trace and span IDs * Store trace context as one LOCAL * Add migrations * Documentation * changelog * Fix warnings * Respond to code review suggestions * Respond to code review * Undo changelog * Update CHANGELOG.md Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * server: log request/response sizes for event triggers (#5463) * server: log request/response sizes for event triggers event triggers (and scheduled triggers) now have request/response size in their logs. * add changelog entry * Tracing: Simplify HTTP traced request (#5451) Remove the Inversion of Control (SuspendRequest) and simplify the tracing of HTTP Requests. Co-authored-by: Phil Freeman <phil@hasura.io> * Attach request ID as tracing metadata (#5456) * Propagate the trace context to event triggers * Handle missing trace and span IDs * Store trace context as one LOCAL * Add migrations * Documentation * Include the request ID as trace metadata * changelog * Fix warnings * Respond to code review suggestions * Respond to code review * Undo changelog * Update CHANGELOG.md * Typo Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * server: add logging for action handlers (#5471) * server: add logging for action handlers * add changelog entry * change action-handler log type from internal to non-internal * fix action-handler-log name * server: pass http and websocket request to logging context (#5470) * pass request body to logging context in all cases * add message size logging on the websocket API this is required by graphql-engine-pro/#416 * message size logging on websocket API As we need to log all messages recieved/sent by the websocket server, it makes sense to log them as part of the websocket server event logs. Previously message recieved were logged inside the onMessage handler, and messages sent were logged only for "data" messages (as a server event log) * fix review comments Co-authored-by: Phil Freeman <phil@hasura.io> * server: stop eventing subsystem threads when shutting down (#5479) * server: stop eventing subsystem threads when shutting down * Apply suggestions from code review Co-authored-by: Karthikeyan Chinnakonda <chkarthikeyan95@gmail.com> Co-authored-by: Phil Freeman <phil@hasura.io> Co-authored-by: Phil Freeman <paf31@cantab.net> Co-authored-by: Karthikeyan Chinnakonda <chkarthikeyan95@gmail.com> * [skip ci] update commit_diff with new commits added in master * Bugfix to support 0-size HASURA_GRAPHQL_QUERY_PLAN_CACHE_SIZE Also some minor refactoring of bounded cache module: - the maxBound check in `trim` was confusing and unnecessary - consequently trim was unnecessary for lookupPure Also add some basic tests * Support only the bounded cache, with default HASURA_GRAPHQL_QUERY_PLAN_CACHE_SIZE of 4000. Closes #5363 * [skip ci] remove merge commit from commit_diff * server: Fix compiler warning caused by GHC upgrade (#5489) Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * [skip ci] update all non server code from master * [skip ci] aligned object field error message with master * [skip ci] fix remaining undefined? * [skip ci] remove unused import * [skip ci] revert to previous error message, fix tests * Move nullableType/nonNullableType to Schema.hs These are functions on Types, not on Parsers. * [skip ci] fix setup to fix backend only test the order in which permission checks are performed on the branch is slightly different than on master, resulting in a slightly different error if there are no other mutations the user has access to. By adding update permissions, we go back to the expected case. * [skip ci] fix insert geojson tests to reflect new paths * [skip ci] fix enum test for better error message * [skip ci] fix header test for better error message * [skip ci] fix fragment cycle test for better error message * [skip ci] fix error message for type mismatch * [skip ci] fix variable path in test * [skip ci] adjust tests after bug fix * [skip ci] more tests fixing * Add hdb_catalog.current_setting abstraction for reading Hasura settings As the comment in the function’s definition explains, this is needed to work around an awkward Postgres behavior. * [skip ci] Update CONTRIBUTING.md to mention Node setup for Python tests * [skip ci] Add missing Python tests env var to CONTRIBUTING.md * [skip ci] fix order of result when subscription is run with multiple nodes * [skip ci] no-op refactor: fix a warning in Internal/Parser.hs * [skip ci] throw error when a subscription contains remote joins * [skip ci] Enable easier profiling by hiding AssertNF behind a flag In order to compile a profiling build, run: $ cabal new-build -f profiling --enable-profiling * [skip ci] Fix two warnings We used to lookup the objects that implement a given interface by filtering all objects in the schema document. However, one of the tests expects us to generate a warning if the provided `implements` field of an introspection query specifies an object not implementing some interface. So we use that field instead. * [skip ci] Fix warnings by commenting out query plan caching * [skip ci] improve masking/commenting query caching related code & few warning fixes * [skip ci] Fixed compiler warnings in graphql-parser-hs * Sync non-Haskell assets with master * [skip ci] add a test inserting invalid GraphQL but valid JSON value in a jsonb column * [skip ci] Avoid converting to/from Map * [skip ci] Apply some hlint suggestions * [skip ci] remove redundant constraints from buildLiveQueryPlan and explainGQLQuery * [skip ci] add NOTEs about missing Tracing constraints in PDV from master * Remove -fdefer-typed-holes, fix warnings * Update cabal.project.freeze * Limit GHC’s heap size to 8GB in CI to avoid the OOM killer * Commit package-lock.json for Python tests’ remote schema server * restrict env variables start with HASURA_GRAPHQL_ for headers configuration in actions, event triggers & remote schemas (#5519) * restrict env variables start with HASURA_GRAPHQL_ for headers definition in actions & event triggers * update CHANGELOG.md * Apply suggestions from code review Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * add test for table_by_pk node when roles doesn't have permission to PK * [skip ci] fix introspection query if any enum column present in primary key (fix #5200) (#5522) * [skip ci] test case fix for a6450e126bc2d98bcfd3791501986e4627ce6c6f * [skip ci] add tests to agg queries when role doesn't have access to any cols * fix backend test * Simplify subscription execution * [skip ci] add test to check if required headers are present while querying * Suppose, table B is related to table A and to query B certain headers are necessary, then the test checks that we are throwing error when the header is not set when B is queried through A * fix mutations not checking for view mutability * [skip ci] add variable type checking and corresponding tests * [skip ci] add test to check if update headers are present while doing an upsert * [skip ci] add positive counterparts to some of the negative permission tests * fix args missing their description in introspect * [skip ci] Remove unused function; insert missing markNotReusable call * [skip ci] Add a Note about InputValue * [skip ci] Delete LegacySchema/ 🎉 * [skip ci] Delete GraphQL/{Resolve,Validate}/ 🎉 * [skip ci] Delete top-level Resolve/Validate modules; tidy .cabal file * [skip ci] Delete LegacySchema top-level module Somehow I missed this one. * fix input value to json * [skip ci] elaborate on JSON objects in GraphQL * [skip ci] add missing file * [skip ci] add a test with subscription containing remote joins * add a test with remote joins in mutation output * [skip ci] Add some comments to Schema/Mutation.hs * [skip ci] Remove no longer needed code from RemoteServer.hs * [skip ci] Use a helper function to generate conflict clause parsers * [skip ci] fix type checker error in fields with default value * capitalize the header keys in select_articles_without_required_headers * Somehow, this was the reason the tests were failing. I have no idea, why! * [skip ci] Add a long Note about optional fields and nullability * Improve comments a bit; simplify Schema/Common.hs a bit * [skip ci] full implementation of 5.8.5 type checking. * [skip ci] fix validation test teardown * [skip ci] fix schema stitching test * fix remote schema ignoring enum nullability * [skip ci] fix fieldOptional to not discard nullability * revert nullability of use_spheroid * fix comment * add required remote fields with arguments for tests * [skip ci] add missing docstrings * [skip ci] fixed description of remote fields * [skip ci] change docstring for consistency * fix several schema inconsistencies * revert behaviour change in function arguments parsing * fix remaining nullability issues in new schema * minor no-op refactor; use isListType from graphql-parser-hs * use nullability of remote schema node, while creating a Remote reln * fix 'ID' input coercing & action 'ID' type relationship mapping * include ASTs in MonadExecuteQuery * needed for PRO code-base * Delete code for "interfaces implementing ifaces" (draft GraphQL spec) Previously I started writing some code that adds support for a future GraphQL feature where interfaces may themselves be sub-types of other interfaces. However, this code was incomplete, and partially incorrect. So this commit deletes support for that entirely. * Ignore a remote schema test during the upgrade/downgrade test The PDV refactor does a better job at exposing a minimal set of types through introspection. In particular, not every type that is present in a remote schema is re-exposed by Hasura. The test test_schema_stitching.py::TestRemoteSchemaBasic::test_introspection assumed that all types were re-exposed, which is not required for GraphQL compatibility, in order to test some aspect of our support for remote schemas. So while this particular test has been updated on PDV, the PDV branch now does not pass the old test, which we argue to be incorrect. Hence this test is disabled while we await a release, after which we can re-enable it. This also re-enables a test that was previously disabled for similar, though unrelated, reasons. * add haddock documentation to the action's field parsers * Deslecting some tests in server-upgrade Some tests with current build are failing on server upgrade which it should not. The response is more accurate than what it was. Also the upgrade tests were not throwing errors when the test is expected to return an error, but succeeds. The test framework is patched to catch this case. * [skip ci] Add a long Note about interfaces and object types * send the response headers back to client after running a query * Deselect a few more tests during upgrade/downgrade test * Update commit_diff.txt * change log kind from db_migrate to catalog_migrate (#5531) * Show method and complete URI in traced HTTP calls (#5525) Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * restrict env variables start with HASURA_GRAPHQL_ for headers configuration in actions, event triggers & remote schemas (#5519) * restrict env variables start with HASURA_GRAPHQL_ for headers definition in actions & event triggers * update CHANGELOG.md * Apply suggestions from code review Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * fix introspection query if any enum column present in primary key (fix #5200) (#5522) * Fix telemetry reporting of transport (websocket was reported as http) * add log kinds in cli-migrations image (#5529) * add log kinds in cli-migrations image * give hint to resolve timeout error * minor changes and CHANGELOG * server: set hasura.tracecontext in RQL mutations [#5542] (#5555) * server: set hasura.tracecontext in RQL mutations [#5542] * Update test suite Co-authored-by: Tirumarai Selvan <tiru@hasura.io> * Add bulldozer auto-merge and -update configuration We still need to add the github app (as of time of opening this PR) Afterwards devs should be able to allow bulldozer to automatically "update" the branch, merging in parent when it changes, as well as automatically merge when all checks pass. This is opt-in by adding the `auto-update-auto-merge` label to the PR. * Remove 'bulldozer' config, try 'kodiak' for auto-merge see: https://github.com/chdsbd/kodiak The main issue that bit us was not being able to auto update forked branches, also: https://github.com/palantir/bulldozer/issues/66 https://github.com/palantir/bulldozer/issues/145 * Cherry-picked all commits * [skip ci] Slightly improve formatting * Revert "fix introspection query if any enum column present in primary key (fix #5200) (#5522)" This reverts commit 0f9a5afa59a88f6824f4d63d58db246a5ba3fb03. This undoes a cherry-pick of 34288e1eb5f2c5dad9e6d1e05453dd52397dc970 that was already done previously in a6450e126bc2d98bcfd3791501986e4627ce6c6f, and subsequently fixed for PDV in 70e89dc250f8ddc6e2b7930bbe2b3eeaa6dbe1db * Do a small bit of tidying in Hasura.GraphQL.Parser.Collect * Fix cherry-picking work Some previous cherry-picks ended up modifying code that is commented out * [skip ci] clarified comment regarding insert representation * [skip ci] removed obsolete todos * cosmetic change * fix action error message * [skip ci] remove obsolete comment * [skip ci] synchronize stylish haskell extensions list * use previously defined scalar names in parsers rather than ad-hoc literals * Apply most syntax hlint hints. * Clarify comment on update mutation. * [skip ci] Clarify what fields should be specified for objects * Update "_inc" description. * Use record types rather than tuples fo IntrospectionResult and ParsedIntrospection * Get rid of checkFieldNamesUnique (use Data.List.Extended.duplicates) * Throw more errors when collecting query root names * [skip ci] clean column parser comment * Remove dead code inserted in ab65b39 * avoid converting to non-empty list where not needed * add note and TODO about the disabled checks in PDV * minor refactor in remoteField' function * Unify two getObject methods * Nitpicks in Remote.hs * Update CHANGELOG.md * Revert "Unify two getObject methods" This reverts commit bd6bb40355b3d189a46c0312eb52225e18be57b3. We do need two different getObject functions as the corresponding error message is different * Fix error message in Remote.hs * Update CHANGELOG.md Co-authored-by: Auke Booij <auke@tulcod.com> * Apply suggested Changelog fix. Co-authored-by: Auke Booij <auke@tulcod.com> * Fix typo in Changelog. * [skip ci] Update changelog. * reuse type names to avoid duplication * Fix Hashable instance for Definition The presence of `Maybe Unique`, and an optional description, as part of `Definition`s, means that `Definition`s that are considered `Eq`ual may get different hashes. This can happen, for instance, when one object is memoized but another is not. * [skip ci] Update commit_diff.txt * Bump parser version. * Bump freeze file after changes in parser. * [skip ci] Incorporate commits from master * Fix developer flag in server/cabal.project.freeze Co-authored-by: Auke Booij <auke@tulcod.com> * Deselect a changed ENUM test for upgrade/downgrade CI * Deselect test here as well * [skip ci] remove dead code * Disable more tests for upgrade/downgrade * Fix which test gets deselected * Revert "Add hdb_catalog.current_setting abstraction for reading Hasura settings" This reverts commit 66e85ab9fbd56cca2c28a80201f6604fbe811b85. * Remove circular reference in cabal.project.freeze Co-authored-by: Karthikeyan Chinnakonda <karthikeyan@hasura.io> Co-authored-by: Auke Booij <auke@hasura.io> Co-authored-by: Tirumarai Selvan <tiru@hasura.io> Co-authored-by: Marion Schleifer <marion@hasura.io> Co-authored-by: Aleksandra Sikora <ola.zxcvbnm@gmail.com> Co-authored-by: Brandon Simmons <brandon.m.simmons@gmail.com> Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> Co-authored-by: Anon Ray <rayanon004@gmail.com> Co-authored-by: rakeshkky <12475069+rakeshkky@users.noreply.github.com> Co-authored-by: Anon Ray <ecthiender@users.noreply.github.com> Co-authored-by: Vamshi Surabhi <vamshi@hasura.io> Co-authored-by: Antoine Leblanc <antoine@hasura.io> Co-authored-by: Brandon Simmons <brandon@hasura.io> Co-authored-by: Phil Freeman <phil@hasura.io> Co-authored-by: Lyndon Maydwell <lyndon@sordina.net> Co-authored-by: Phil Freeman <paf31@cantab.net> Co-authored-by: Naveen Naidu <naveennaidu479@gmail.com> Co-authored-by: Karthikeyan Chinnakonda <chkarthikeyan95@gmail.com> Co-authored-by: Nizar Malangadan <nizar-m@users.noreply.github.com> Co-authored-by: Antoine Leblanc <crucuny@gmail.com> Co-authored-by: Auke Booij <auke@tulcod.com>
2020-08-21 20:27:01 +03:00
returnA -< (actionCache', resolvedCustomTypes)
-- If the custom types themselves are inconsistent, we cant really do
-- anything with actions, so just mark them all inconsistent.
Nothing -> do
recordInconsistencies
-<
( map mkActionMetadataObject actionList,
"custom types are inconsistent"
)
returnA -< (mempty, mempty)
allow custom mutations through actions (#3042) * basic doc for actions * custom_types, sync and async actions * switch to graphql-parser-hs on github * update docs * metadata import/export * webhook calls are now supported * relationships in sync actions * initialise.sql is now in sync with the migration file * fix metadata tests * allow specifying arguments of actions * fix blacklist check on check_build_worthiness job * track custom_types and actions related tables * handlers are now triggered on async actions * default to pgjson unless a field is involved in relationships, for generating definition list * use 'true' for action filter for non admin role * fix create_action_permission sql query * drop permissions when dropping an action * add a hdb_role view (and relationships) to fetch all roles in the system * rename 'webhook' key in action definition to 'handler' * allow templating actions wehook URLs with env vars * add 'update_action' /v1/query type * allow forwarding client headers by setting `forward_client_headers` in action definition * add 'headers' configuration in action definition * handle webhook error response based on status codes * support array relationships for custom types * implement single row mutation, see https://github.com/hasura/graphql-engine/issues/3731 * single row mutation: rename 'pk_columns' -> 'columns' and no-op refactor * use top level primary key inputs for delete_by_pk & account select permissions for single row mutations * use only REST semantics to resolve the webhook response * use 'pk_columns' instead of 'columns' for update_by_pk input * add python basic tests for single row mutations * add action context (name) in webhook payload * Async action response is accessible for non admin roles only if the request session vars equals to action's * clean nulls, empty arrays for actions, custom types in export metadata * async action mutation returns only the UUID of the action * unit tests for URL template parser * Basic sync actions python tests * fix output in async query & add async tests * add admin secret header in async actions python test * document async action architecture in Resolve/Action.hs file * support actions returning array of objects * tests for list type response actions * update docs with actions and custom types metadata API reference * update actions python tests as per #f8e1330 Co-authored-by: Tirumarai Selvan <tirumarai.selvan@gmail.com> Co-authored-by: Aravind Shankar <face11301@gmail.com> Co-authored-by: Rakesh Emmadi <12475069+rakeshkky@users.noreply.github.com>
2020-02-13 20:38:23 +03:00
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
returnA
-<
BuildOutputs
{ _boSources = M.map fst sourcesOutput,
_boActions = actionCache,
_boRemoteSchemas = remoteSchemaCache,
_boCustomTypes = annotatedCustomTypes,
_boRoles = mapFromL _rRoleName $ _unOrderedRoles orderedRoles,
_boBackendCache = backendCache
}
mkEndpointMetadataObject (name, createEndpoint) =
let objectId = MOEndpoint name
in MetadataObject objectId (toJSON createEndpoint)
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
buildOpenTelemetry ::
( ArrowChoice arr,
Inc.ArrowCache m arr,
ArrowWriter (Seq (Either InconsistentMetadata md)) arr,
MonadError QErr m
) =>
OpenTelemetryConfig `arr` OpenTelemetryInfo
buildOpenTelemetry = proc openTelemetryConfig -> do
case _ocStatus openTelemetryConfig of
OtelDisabled ->
-- Disable all components if OpenTelemetry export not enabled
returnA -< OpenTelemetryInfo Nothing Nothing
OtelEnabled -> do
mOtelExporterInfo <-
let exporterOtlp = _ocExporterOtlp openTelemetryConfig
in (|
withRecordInconsistency
( bindErrorA -< liftEither (parseOtelExporterConfig env exporterOtlp)
)
|) (MetadataObject (MOOpenTelemetry OtelSubobjectExporterOtlp) (toJSON exporterOtlp))
mOtelBatchSpanProcessorInfo <-
let batchSpanProcessor = _ocBatchSpanProcessor openTelemetryConfig
in (|
withRecordInconsistency
( bindErrorA -< liftEither (parseOtelBatchSpanProcessorConfig batchSpanProcessor)
)
|) (MetadataObject (MOOpenTelemetry OtelSubobjectBatchSpanProcessor) (toJSON batchSpanProcessor))
returnA
-<
OpenTelemetryInfo
mOtelExporterInfo
-- Disable data types if they are not in the enabled set
( if OtelTraces `S.member` _ocEnabledDataTypes openTelemetryConfig
then mOtelBatchSpanProcessorInfo
else Nothing
)
buildEndpoint ::
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
(ArrowChoice arr, ArrowKleisli m arr, MonadError QErr m, ArrowWriter (Seq (Either InconsistentMetadata md)) arr) =>
(InsOrdHashMap CollectionName CreateCollection, (EndpointName, CreateEndpoint)) `arr` Maybe (EndpointMetadata GQLQueryWithText)
buildEndpoint = proc (collections, e@(name, createEndpoint)) -> do
let endpoint = createEndpoint
-- QueryReference collName queryName = _edQuery endpoint
addContext err = "in endpoint " <> toTxt (unEndpointName name) <> ": " <> err
(|
withRecordInconsistency
(bindErrorA -< modifyErr addContext $ resolveEndpoint collections endpoint)
|) (mkEndpointMetadataObject e)
resolveEndpoint ::
QErrM m =>
InsOrdHashMap CollectionName CreateCollection ->
EndpointMetadata QueryReference ->
m (EndpointMetadata GQLQueryWithText)
resolveEndpoint collections = traverse $ \(QueryReference collName queryName) -> do
collection <-
onNothing
(OMap.lookup collName collections)
(throw400 NotExists $ "collection with name " <> toTxt collName <> " does not exist")
listedQuery <-
flip
onNothing
( throw400 NotExists $
"query with name "
<> toTxt queryName
<> " does not exist in collection "
<> toTxt collName
)
$ find ((== queryName) . _lqName) (_cdQueries (_ccDefinition collection))
let lq@(GQLQueryWithText lqq) = _lqQuery listedQuery
ds = G.getExecutableDefinitions $ unGQLQuery $ snd lqq
case ds of
[G.ExecutableDefinitionOperation (G.OperationDefinitionTyped d)]
| G._todType d == G.OperationTypeSubscription ->
throw405 $ "query with name " <> toTxt queryName <> " is a subscription"
| otherwise -> pure ()
[] -> throw400 BadRequest $ "query with name " <> toTxt queryName <> " has no definitions."
_ -> throw400 BadRequest $ "query with name " <> toTxt queryName <> " has multiple definitions."
pure lq
mkEventTriggerMetadataObject ::
forall b a c.
Backend b =>
(a, SourceName, c, TableName b, RecreateEventTriggers, EventTriggerConf b) ->
MetadataObject
mkEventTriggerMetadataObject (_, source, _, table, _, eventTriggerConf) =
let objectId =
MOSourceObjId source $
AB.mkAnyBackend $
SMOTableObj @b table $
MTOTrigger $
etcName eventTriggerConf
definition = object ["table" .= table, "configuration" .= eventTriggerConf]
in MetadataObject objectId definition
mkCronTriggerMetadataObject catalogCronTrigger =
let definition = toJSON catalogCronTrigger
in MetadataObject
(MOCronTrigger (ctName catalogCronTrigger))
definition
mkActionMetadataObject (ActionMetadata name comment defn _) =
MetadataObject (MOAction name) (toJSON $ CreateAction name defn comment)
mkInheritedRoleMetadataObject inheritedRole@(Role roleName _) =
MetadataObject (MOInheritedRole roleName) (toJSON inheritedRole)
buildTableEventTriggers ::
forall arr m b.
( ArrowChoice arr,
Inc.ArrowDistribute arr,
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
Inc.ArrowCache m arr,
MonadIO m,
MonadError QErr m,
MonadBaseControl IO m,
MonadReader BuildReason m,
HasServerConfigCtx m,
BackendMetadata b,
BackendEventTrigger b
) =>
( SourceName,
SourceConfig b,
TableCoreInfoG b (ColumnInfo b) (ColumnInfo b),
[EventTriggerConf b],
Inc.Dependency Inc.InvalidationKey,
RecreateEventTriggers
)
`arr` (EventTriggerInfoMap b)
buildTableEventTriggers = proc (sourceName, sourceConfig, tableInfo, eventTriggerConfs, metadataInvalidationKey, migrationRecreateEventTriggers) ->
buildInfoMap (etcName . (^. _6)) (mkEventTriggerMetadataObject @b) buildEventTrigger
-<
(tableInfo, map (metadataInvalidationKey,sourceName,sourceConfig,_tciName tableInfo,migrationRecreateEventTriggers,) eventTriggerConfs)
where
buildEventTrigger = proc (tableInfo, (metadataInvalidationKey, source, sourceConfig, table, migrationRecreateEventTriggers, eventTriggerConf)) -> do
let triggerName = etcName eventTriggerConf
metadataObject = mkEventTriggerMetadataObject @b (metadataInvalidationKey, source, sourceConfig, table, migrationRecreateEventTriggers, eventTriggerConf)
schemaObjectId =
SOSourceObj source $
AB.mkAnyBackend $
SOITableObj @b table $
TOTrigger triggerName
addTriggerContext e = "in event trigger " <> triggerName <<> ": " <> e
buildReason <- bindA -< ask
let reloadMetadataRecreateEventTrigger =
case buildReason of
CatalogSync -> RETDoNothing
CatalogUpdate Nothing -> RETDoNothing
CatalogUpdate (Just sources) -> if source `elem` sources then RETRecreate else RETDoNothing
(|
withRecordInconsistency
( (|
modifyErrA
( do
(info, dependencies) <- bindErrorA -< buildEventTriggerInfo @b env source table eventTriggerConf
serverConfigCtx <- bindA -< askServerConfigCtx
let isCatalogUpdate =
case buildReason of
CatalogUpdate _ -> True
CatalogSync -> False
tableColumns = M.elems $ _tciFieldInfoMap tableInfo
if ( _sccMaintenanceMode serverConfigCtx == MaintenanceModeDisabled
&& _sccReadOnlyMode serverConfigCtx == ReadOnlyModeDisabled
)
then do
bindA
-<
when (reloadMetadataRecreateEventTrigger == RETRecreate) $
-- This is the case when the user sets `recreate_event_triggers`
-- to `true` in `reload_metadata`, in this case, we recreate
-- the SQL trigger by force, even if it may not be necessary
liftEitherM $
createTableEventTrigger
@b
serverConfigCtx
sourceConfig
table
tableColumns
triggerName
(etcDefinition eventTriggerConf)
(_tciPrimaryKey tableInfo)
if isCatalogUpdate || migrationRecreateEventTriggers == RETRecreate
then do
recreateTriggerIfNeeded
-<
( table,
tableColumns,
triggerName,
etcDefinition eventTriggerConf,
sourceConfig,
(_tciPrimaryKey tableInfo)
)
-- We check if the SQL triggers for the event triggers
-- are present. If any SQL triggers are missing, those are
-- created.
bindA
-<
createMissingSQLTriggers
sourceConfig
table
(tableColumns, _tciPrimaryKey tableInfo)
triggerName
(etcDefinition eventTriggerConf)
else bindA -< pure ()
else bindA -< pure ()
recordDependencies -< (metadataObject, schemaObjectId, dependencies)
returnA -< info
)
|) (addTableContext @b table . addTriggerContext)
)
|) metadataObject
recreateTriggerIfNeeded =
-- using `Inc.cache` here means that the response will be cached for the given output and the
-- next time this arrow recieves the same input, the cached response will be returned and the
-- computation will not be done again.
Inc.cache
proc
( tableName,
tableColumns,
triggerName,
triggerDefinition,
sourceConfig,
primaryKey
)
-> do
bindA
-< do
serverConfigCtx <- askServerConfigCtx
liftEitherM $
createTableEventTrigger @b
serverConfigCtx
sourceConfig
tableName
tableColumns
triggerName
triggerDefinition
primaryKey
buildCronTriggers ::
( ArrowChoice arr,
Inc.ArrowDistribute arr,
Avoid GraphQL schema rebuild when changing irrelevant Metadata This increases the speed of `create_query_collection` and `add_collection_to_allowlist` by a factor ~~10~~ 65, by caching the in-memory GraphQL schema. This speedup also applies more broadly to Metadata changes relating to: - allowlists - query collections - cron triggers - REST endpoints - API limits - metrics config - GraphQL introspection options - TLS allow lists - OpenTelemetry When is construction of the in-memory GraphQL schema cached between Metadata operations? Before this PR, **never**! It's rebuilt fully, for every role, on every Metadata operation. However, there are many Metadata operations that don't influence the GraphQL schema. So we should be caching its construction. The `Hasura.Incremental` framework allows us to cache such constructions: whenever we have an arrow `Rule m a b`, where `a` is the input to the arrow and `b` the output, we can use the `Inc.cache` combinator to obtain a new arrow which is only re-executed when the input `a` changes in a material way. To test this, `a` needs an `Eq` instance. (Before hasura/graphql-engine-mono#6877, this was a `Cacheable` type class which has now been removed.) We can't simply apply `Inc.cache` to the "Steps 3 and 4" in `buildSchemaCacheRule`, because the inputs (components of `BuildOutputs` such as `SourceCache`) don't have an `Eq` instance. So the changes to `buildSchemaCacheRule` restructure the code so that the input to "Step 1", namely the Metadata, can be used as a caching key instead, so that `Inc.cache` can be applied to the whole sequence of steps. That works to cache construction of the GraphQL schema, but it means that now only those Metadata operations that _don't_ influence any of the products of steps 1-4 can use a cached build of the GraphQL schema. The most important intermediate product is `BuildOutputs`. So now the exercise becomes to minimize the amount of stuff stored in `BuildOutputs`, so that as many Metadata operations as possible can be handled outside of the codepath that produces a GraphQL schema. Per hasura/graphql-engine-mono#6609, the `BuildOutputs` structure is too big, and stores things unnecessarily. Refer to the PR description there for reasoning - the same logic applies to this PR, and simply goes a few steps further. In doing so, it can benefit from hasura/graphql-engine-mono#6765, which allows us to verify at compile time that certain Schema Cache building steps _don't_ generate "Metadata dependencies". If a certain Metadata dependency is never generated, we don't need to handle that case in `deleteMetadataObject`. Thus such intermediate products don't need to be passed through `resolveDependencies`, and thus they don't need to be stored in `BuildOutputs`, and thus their rebuild won't trigger a GraphQL schema rebuild. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6613 GitOrigin-RevId: 27d2e69d3461bd4c32f08febef9995c0369fab3a
2022-11-22 15:11:32 +03:00
ArrowWriter (Seq (Either InconsistentMetadata md)) arr,
Inc.ArrowCache m arr,
MonadError QErr m
) =>
((), [CronTriggerMetadata])
`arr` HashMap TriggerName CronTriggerInfo
buildCronTriggers = buildInfoMap ctName mkCronTriggerMetadataObject buildCronTrigger
where
buildCronTrigger = proc (_, cronTrigger) -> do
let triggerName = triggerNameToTxt $ ctName cronTrigger
addCronTriggerContext e = "in cron trigger " <> triggerName <> ": " <> e
(|
withRecordInconsistency
(bindErrorA -< modifyErr addCronTriggerContext $ resolveCronTrigger env cronTrigger)
|) (mkCronTriggerMetadataObject cronTrigger)
buildInheritedRoles ::
( ArrowChoice arr,
Inc.ArrowDistribute arr,
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
Inc.ArrowCache m arr,
MonadError QErr m
) =>
(HashSet RoleName, [InheritedRole])
`arr` HashMap RoleName Role
buildInheritedRoles = buildInfoMap _rRoleName mkInheritedRoleMetadataObject buildInheritedRole
where
buildInheritedRole = proc (allRoles, inheritedRole) -> do
let addInheritedRoleContext e = "in inherited role " <> roleNameToTxt (_rRoleName inheritedRole) <> ": " <> e
metadataObject = mkInheritedRoleMetadataObject inheritedRole
schemaObject = SORole $ _rRoleName inheritedRole
(|
withRecordInconsistency
( do
(resolvedInheritedRole, dependencies) <- bindErrorA -< modifyErr addInheritedRoleContext $ resolveInheritedRole allRoles inheritedRole
recordDependencies -< (metadataObject, schemaObject, dependencies)
returnA -< resolvedInheritedRole
)
|) metadataObject
buildActions ::
( ArrowChoice arr,
Inc.ArrowDistribute arr,
Inc.ArrowCache m arr,
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
MonadError QErr m
) =>
( (AnnotatedCustomTypes, BackendMap ScalarMap, OrderedRoles),
[ActionMetadata]
)
`arr` HashMap ActionName ActionInfo
buildActions = buildInfoMap _amName mkActionMetadataObject buildAction
where
buildAction = proc ((resolvedCustomTypes, scalarsMap, orderedRoles), action) -> do
let ActionMetadata name comment def actionPermissions = action
addActionContext e = "in action " <> name <<> "; " <> e
(|
withRecordInconsistency
( bindErrorA
-< modifyErr addActionContext $ do
(resolvedDef, outObject) <-
resolveAction env resolvedCustomTypes def scalarsMap
let permissionInfos = map (ActionPermissionInfo . _apmRole) actionPermissions
metadataPermissionMap = mapFromL _apiRole permissionInfos
permissionsMap = mkBooleanPermissionMap ActionPermissionInfo metadataPermissionMap orderedRoles
forwardClientHeaders = _adForwardClientHeaders resolvedDef
outputType = unGraphQLType $ _adOutputType def
return $ ActionInfo name (outputType, outObject) resolvedDef permissionsMap forwardClientHeaders comment
)
|) (mkActionMetadataObject action)
buildRemoteSchemaRemoteRelationship ::
forall arr m.
( ArrowChoice arr,
ArrowWriter (Seq (Either InconsistentMetadata MetadataDependency)) arr,
ArrowKleisli m arr,
MonadError QErr m
) =>
scaffolding for remote-schemas module The main aim of the PR is: 1. To set up a module structure for 'remote-schemas' package. 2. Move parts by the remote schema codebase into the new module structure to validate it. ## Notes to the reviewer Why a PR with large-ish diff? 1. We've been making progress on the MM project but we don't yet know long it is going to take us to get to the first milestone. To understand this better, we need to figure out the unknowns as soon as possible. Hence I've taken a stab at the first two items in the [end-state](https://gist.github.com/0x777/ca2bdc4284d21c3eec153b51dea255c9) document to figure out the unknowns. Unsurprisingly, there are a bunch of issues that we haven't discussed earlier. These are documented in the 'open questions' section. 1. The diff is large but that is only code moved around and I've added a section that documents how things are moved. In addition, there are fair number of PR comments to help with the review process. ## Changes in the PR ### Module structure Sets up the module structure as follows: ``` Hasura/ RemoteSchema/ Metadata/ Types.hs SchemaCache/ Types.hs Permission.hs RemoteRelationship.hs Build.hs MetadataAPI/ Types.hs Execute.hs ``` ### 1. Types representing metadata are moved Types that capture metadata information (currently scattered across several RQL modules) are moved into `Hasura.RemoteSchema.Metadata.Types`. - This new module only depends on very 'core' modules such as `Hasura.Session` for the notion of roles and `Hasura.Incremental` for `Cacheable` typeclass. - The requirement on database modules is avoided by generalizing the remote schemas metadata to accept an arbitrary 'r' for a remote relationship definition. ### 2. SchemaCache related types and build logic have been moved Types that represent remote schemas information in SchemaCache are moved into `Hasura.RemoteSchema.SchemaCache.Types`. Similar to `H.RS.Metadata.Types`, this module depends on 'core' modules except for `Hasura.GraphQL.Parser.Variable`. It has something to do with remote relationships but I haven't spent time looking into it. The validation of 'remote relationships to remote schema' is also something that needs to be looked at. Rips out the logic that builds remote schema's SchemaCache information from the monolithic `buildSchemaCacheRule` and moves it into `Hasura.RemoteSchema.SchemaCache.Build`. Further, the `.SchemaCache.Permission` and `.SchemaCache.RemoteRelationship` have been created from existing modules that capture schema cache building logic for those two components. This was a fair amount of work. On main, currently remote schema's SchemaCache information is built in two phases - in the first phase, 'permissions' and 'remote relationships' are ignored and in the second phase they are filled in. While remote relationships can only be resolved after partially resolving sources and other remote schemas, the same isn't true for permissions. Further, most of the work that is done to resolve remote relationships can be moved to the first phase so that the second phase can be a very simple traversal. This is the approach that was taken - resolve permissions and as much as remote relationships information in the first phase. ### 3. Metadata APIs related types and build logic have been moved The types that represent remote schema related metadata APIs and the execution logic have been moved to `Hasura.RemoteSchema.MetadataAPI.Types` and `.Execute` modules respectively. ## Open questions: 1. `Hasura.RemoteSchema.Metadata.Types` is so called because I was hoping that all of the metadata related APIs of remote schema can be brought in at `Hasura.RemoteSchema.Metadata.API`. However, as metadata APIs depended on functions from `SchemaCache` module (see [1](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L55) and [2](https://github.com/hasura/graphql-engine-mono/blob/ceba6d62264603ee5d279814677b29bcc43ecaea/server/src-lib/Hasura/RQL/DDL/RemoteSchema.hs#L91), it made more sense to create a separate top-level module for `MetadataAPI`s. Maybe we can just have `Hasura.RemoteSchema.Metadata` and get rid of the extra nesting or have `Hasura.RemoteSchema.Metadata.{Core,Permission,RemoteRelationship}` if we want to break them down further. 1. `buildRemoteSchemas` in `H.RS.SchemaCache.Build` has the following type: ```haskell buildRemoteSchemas :: ( ArrowChoice arr, Inc.ArrowDistribute arr, ArrowWriter (Seq CollectedInfo) arr, Inc.ArrowCache m arr, MonadIO m, HasHttpManagerM m, Inc.Cacheable remoteRelationshipDefinition, ToJSON remoteRelationshipDefinition, MonadError QErr m ) => Env.Environment -> ( (Inc.Dependency (HashMap RemoteSchemaName Inc.InvalidationKey), OrderedRoles), [RemoteSchemaMetadataG remoteRelationshipDefinition] ) `arr` HashMap RemoteSchemaName (PartiallyResolvedRemoteSchemaCtxG remoteRelationshipDefinition, MetadataObject) ``` Note the dependence on `CollectedInfo` which is defined as ```haskell data CollectedInfo = CIInconsistency InconsistentMetadata | CIDependency MetadataObject -- ^ for error reporting on missing dependencies SchemaObjId SchemaDependency deriving (Eq) ``` this pretty much means that remote schemas is dependent on types from databases, actions, .... How do we fix this? Maybe introduce a typeclass such as `ArrowCollectRemoteSchemaDependencies` which is defined in `Hasura.RemoteSchema` and then implemented in graphql-engine? 1. The dependency on `buildSchemaCacheFor` in `.MetadataAPI.Execute` which has the following signature: ```haskell buildSchemaCacheFor :: (QErrM m, CacheRWM m, MetadataM m) => MetadataObjId -> MetadataModifier -> ``` This can be easily resolved if we restrict what the metadata APIs are allowed to do. Currently, they operate in an unfettered access to modify SchemaCache (the `CacheRWM` constraint): ```haskell runAddRemoteSchema :: ( QErrM m, CacheRWM m, MonadIO m, HasHttpManagerM m, MetadataM m, Tracing.MonadTrace m ) => Env.Environment -> AddRemoteSchemaQuery -> m EncJSON ``` This should instead be changed to restrict remote schema APIs to only modify remote schema metadata (but has access to the remote schemas part of the schema cache), this dependency is completely removed. ```haskell runAddRemoteSchema :: ( QErrM m, MonadIO m, HasHttpManagerM m, MonadReader RemoteSchemasSchemaCache m, MonadState RemoteSchemaMetadata m, Tracing.MonadTrace m ) => Env.Environment -> AddRemoteSchemaQuery -> m RemoteSchemeMetadataObjId ``` The idea is that the core graphql-engine would call these functions and then call `buildSchemaCacheFor`. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/6291 GitOrigin-RevId: 51357148c6404afe70219afa71bd1d59bdf4ffc6
2022-10-21 06:13:07 +03:00
( (HashMap SourceName (AB.AnyBackend PartiallyResolvedSource), PartiallyResolvedRemoteSchemaMap),
(RemoteSchemaName, RemoteSchemaIntrospection, G.Name, RemoteRelationship)
)
`arr` Maybe (RemoteFieldInfo G.Name)
buildRemoteSchemaRemoteRelationship =
proc
( (allSources, remoteSchemaMap),
(remoteSchema, remoteSchemaIntrospection, typeName, rr@RemoteRelationship {..})
)
-> do
let metadataObject = mkRemoteSchemaRemoteRelationshipMetadataObject (remoteSchema, typeName, rr)
schemaObj = SORemoteSchemaRemoteRelationship remoteSchema typeName _rrName
addRemoteRelationshipContext e = "in remote relationship" <> _rrName <<> ": " <> e
(|
withRecordInconsistency
( do
(remoteField, rhsDependencies) <-
bindErrorA
-< modifyErr addRemoteRelationshipContext do
allowedLHSJoinFields <- getRemoteSchemaEntityJoinColumns remoteSchema remoteSchemaIntrospection typeName
buildRemoteFieldInfo (remoteSchemaToLHSIdentifier remoteSchema) allowedLHSJoinFields rr allSources remoteSchemaMap
-- buildRemoteFieldInfo only knows how to construct dependencies on the RHS of the join condition,
-- so the dependencies on the remote relationship on the LHS entity have to be computed here
let lhsDependencies =
-- a direct dependency on the remote schema on which this is defined
[SchemaDependency (SORemoteSchema remoteSchema) DRRemoteRelationship]
recordDependencies -< (metadataObject, schemaObj, lhsDependencies <> rhsDependencies)
returnA -< remoteField
)
|) metadataObject
mkRemoteSchemaRemoteRelationshipMetadataObject ::
(RemoteSchemaName, G.Name, RemoteRelationship) ->
MetadataObject
mkRemoteSchemaRemoteRelationshipMetadataObject (remoteSchemaName, typeName, RemoteRelationship {..}) =
let objectId =
MORemoteSchemaRemoteRelationship remoteSchemaName typeName _rrName
in MetadataObject objectId $
toJSON $
CreateRemoteSchemaRemoteRelationship remoteSchemaName typeName _rrName _rrDefinition
data BackendInfoAndSourceMetadata b = BackendInfoAndSourceMetadata
{ _bcasmBackendInfo :: BackendInfo b,
_bcasmSourceMetadata :: SourceMetadata b
}
deriving stock (Generic)
deriving instance (Backend b) => Show (BackendInfoAndSourceMetadata b)
deriving instance (Backend b) => Eq (BackendInfoAndSourceMetadata b)
joinBackendInfosToSources ::
BackendCache ->
InsOrdHashMap SourceName BackendSourceMetadata ->
InsOrdHashMap SourceName (AB.AnyBackend BackendInfoAndSourceMetadata)
joinBackendInfosToSources backendInfos sources =
flip OMap.map sources $ \abSourceMetadata ->
AB.dispatchAnyBackend @Backend (unBackendSourceMetadata abSourceMetadata) $ \(sourceMetadata :: SourceMetadata b) ->
let _bcasmBackendInfo = maybe mempty unBackendInfoWrapper (BackendMap.lookup @b backendInfos)
_bcasmSourceMetadata = sourceMetadata
in AB.mkAnyBackend @b BackendInfoAndSourceMetadata {..}
{- Note [Keep invalidation keys for inconsistent objects]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
After building the schema cache, we prune InvalidationKeys for objects
that no longer exist in the schema to avoid leaking memory for objects
that have been dropped. However, note that we *dont* want to drop
keys for objects that are simply inconsistent!
Why? The object is still in the metadata, so next time we reload it,
well reprocess that object. We want to reuse the cache if its
definition hasnt changed, but if we dropped the invalidation key, it
will incorrectly be reprocessed (since the invalidation key changed
from present to absent). -}