graphql-engine/server/src-lib/Hasura/GraphQL/Schema/Common.hs
David Overton 346804fc67 Support nested object fields in DC API and use this to implement nest…
## Description

This change adds support for nested object fields in HGE IR and Schema Cache, the Data Connectors backend and API, and the MongoDB agent.

### Data Connector API changes

- The `/schema` endpoint response now includes an optional set of GraphQL type definitions. Table column types can refer to these definitions by name.
- Queries can now include a new field type `object` which contains a column name and a nested query. This allows querying into a nested object within a field.

### MongoDB agent changes

- Add support for querying into nested documents using the new `object` field type.

### HGE changes

- The `Backend` type class has a new type family `XNestedObjects b` which controls whether or not a backend supports querying into nested objects. This is currently enabled only for the `DataConnector` backend.
- For backends that support nested objects, the `FieldInfo` type gets a new constructor `FINestedObject`, and the `AnnFieldG` type gets a new constructor `AFNestedObject`.
- If the DC `/schema` endpoint returns any custom GraphQL type definitions they are stored in the `TableInfo` for each table in the source.
- During schema cache building, the function `addNonColumnFields` will check whether any column types match custom GraphQL object types stored in the `TableInfo`. If so, they are converted into `FINestedObject` instead of `FIColumn` in the `FieldInfoMap`.
- When building the `FieldParser`s from `FieldInfo` (function `fieldSelection`) any `FINestedObject` fields are converted into nested object parsers returning `AFNestedObject`.
- The `DataConnector` query planner converts `AFNestedObject` fields into `object` field types in the query sent to the agent.

## Limitations

### HGE not yet implemented:
- Support for nested arrays
- Support for nested objects/arrays in mutations
- Support for nested objects/arrays in order-by
- Support for filters (`where`) in nested objects/arrays
- Support for adding custom GraphQL types via track table metadata API
- Support for interface and union types
- Tests for nested objects

### Mongo agent not yet implemented:

- Generate nested object types from validation schema
- Support for aggregates
- Support for order-by
- Configure agent port
- Build agent in CI
- Agent tests for nested objects and MongoDB agent

PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7844
GitOrigin-RevId: aec9ec1e4216293286a68f9b1af6f3f5317db423
2023-04-11 01:30:37 +00:00

500 lines
19 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{-# LANGUAGE TemplateHaskell #-}
module Hasura.GraphQL.Schema.Common
( SchemaContext (..),
SchemaKind (..),
RemoteRelationshipParserBuilder (..),
NodeInterfaceParserBuilder (..),
MonadBuildSchemaBase,
retrieve,
SchemaT (..),
MonadBuildSourceSchema,
MonadBuildRemoteSchema,
MonadBuildActionSchema,
runSourceSchema,
runRemoteSchema,
runActionSchema,
ignoreRemoteRelationship,
isHasuraSchema,
AggSelectExp,
AnnotatedField,
AnnotatedFields,
ConnectionFields,
ConnectionSelectExp,
AnnotatedActionField,
AnnotatedActionFields,
AnnotatedNestedObjectSelect,
EdgeFields,
Scenario (..),
SelectArgs,
SelectStreamArgs,
SelectExp,
StreamSelectExp,
TablePerms,
getTableRoles,
getCustomReturnTypeRoles,
askTableInfo,
comparisonAggOperators,
mapField,
mkDescriptionWith,
numericAggOperators,
optionalFieldParser,
parsedSelectionsToFields,
partialSQLExpToUnpreparedValue,
requiredFieldParser,
takeValidLogicalModels,
takeValidFunctions,
takeValidTables,
textToName,
textToGQLIdentifier,
RemoteSchemaParser (..),
mkEnumTypeName,
addEnumSuffix,
peelWithOrigin,
getIntrospectionResult,
)
where
import Data.Either (isRight)
import Data.Has
import Data.HashMap.Strict qualified as Map
import Data.HashMap.Strict.InsOrd qualified as OMap
import Data.List (uncons)
import Data.Text qualified as T
import Data.Text.Casing (GQLNameIdentifier)
import Data.Text.Casing qualified as C
import Data.Text.Extended
import Hasura.Backends.Postgres.SQL.Types qualified as Postgres
import Hasura.Base.Error
import Hasura.CustomReturnType.Cache (CustomReturnTypeInfo (_crtiPermissions))
import Hasura.Function.Cache
import Hasura.GraphQL.Namespace (NamespacedField)
import Hasura.GraphQL.Parser.Internal.TypeChecking qualified as P
import Hasura.GraphQL.Schema.Node
import Hasura.GraphQL.Schema.Options (SchemaOptions)
import Hasura.GraphQL.Schema.Options qualified as Options
import Hasura.GraphQL.Schema.Parser qualified as P
import Hasura.GraphQL.Schema.Typename
import Hasura.LogicalModel.Cache (LogicalModelCache)
import Hasura.Prelude
import Hasura.RQL.IR qualified as IR
import Hasura.RQL.IR.BoolExp
import Hasura.RQL.Types.Backend
import Hasura.RQL.Types.Common
import Hasura.RQL.Types.Relationships.Remote
import Hasura.RQL.Types.SchemaCache hiding (askTableInfo)
import Hasura.RQL.Types.Source
import Hasura.RQL.Types.SourceCustomization
import Hasura.RemoteSchema.SchemaCache.Types
import Hasura.SQL.AnyBackend qualified as AB
import Hasura.Session (RoleName, adminRoleName)
import Language.GraphQL.Draft.Syntax qualified as G
-------------------------------------------------------------------------------
-- | Aggregation of contextual information required to build the schema.
data SchemaContext = SchemaContext
{ -- | the kind of schema being built
scSchemaKind :: SchemaKind,
-- | how to process remote relationships
scRemoteRelationshipParserBuilder :: RemoteRelationshipParserBuilder,
-- | the role for which the schema is being built
scRole :: RoleName
}
-- | The kind of schema we're building, and its associated options.
data SchemaKind
= HasuraSchema
| RelaySchema NodeInterfaceParserBuilder
isHasuraSchema :: SchemaKind -> Bool
isHasuraSchema = \case
HasuraSchema -> True
RelaySchema _ -> False
-- | The set of common constraints required to build the schema.
type MonadBuildSchemaBase m n =
( MonadError QErr m,
P.MonadMemoize m,
P.MonadParse n
)
-- | How a remote relationship field should be processed when building a
-- schema. Injecting this function from the top level avoids having to know how
-- to do top-level dispatch from deep within the schema code.
--
-- Note: the inner function type uses an existential qualifier: it is expected
-- that the given function will work for _any_ monad @m@ that has the relevant
-- constraints. This prevents us from passing a function that is specfic to the
-- monad in which the schema construction will run, but avoids having to
-- propagate type annotations to each call site.
newtype RemoteRelationshipParserBuilder
= RemoteRelationshipParserBuilder
( forall lhsJoinField r n m.
MonadBuildSchemaBase m n =>
RemoteFieldInfo lhsJoinField ->
SchemaT r m (Maybe [P.FieldParser n (IR.RemoteRelationshipField IR.UnpreparedValue)])
)
-- | A 'RemoteRelationshipParserBuilder' that ignores the field altogether, that can
-- be used in tests or to build a source or remote schema in isolation.
ignoreRemoteRelationship :: RemoteRelationshipParserBuilder
ignoreRemoteRelationship = RemoteRelationshipParserBuilder $ const $ pure Nothing
-- | How to build the 'Relay' node.
--
-- Similarly to what we do for remote relationships, we pass in the context the
-- builder function required to build the 'Node' interface, in order to avoid
-- the cross-sources cycles it creates otherwise.
newtype NodeInterfaceParserBuilder = NodeInterfaceParserBuilder
{ runNodeBuilder ::
( forall m n.
MonadBuildSchemaBase m n =>
SchemaContext ->
SchemaOptions ->
m (P.Parser 'P.Output n NodeMap)
)
}
-- TODO: move this to Prelude?
retrieve ::
(MonadReader r m, Has a r) =>
(a -> b) ->
m b
retrieve f = asks $ f . getter
-------------------------------------------------------------------------------
{- Note [SchemaT and stacking]
The schema is explicitly built in `SchemaT`, rather than in an arbitrary monad
`m` that happens to have the desired properties (`MonadReader`, `MonadMemoize`,
`MonadError`, and so on). The main reason why we do this is that we want to
avoid a specific performance issue that arises out of two specific constraints:
- we want to build each part of the schema (such as sources and remote
schemas) with its own dedicated minimal reader context (i.e. not using a
shared reader context that is the union of all the information required);
- we want to be able to process remote-relationships, which means "altering"
the reader context when jumping from one "part" of the schema to another.
What that means, in practice, is that we have to call `runReaderT` (or an
equivalent) every time we build a part of the schema (at the root level or as
part of a remote relationship) so that the part we build has access to its
context. When processing a remote relationship, the calling code is *already* in
a monad stack that contains a `ReaderT`, since we were processing a given part
of the schema. If we directly call `runReaderT` to process the RHS of the remote
relationship, we implicitly make it so that the monad stack of the LHS is the
base underneath the `ReaderT` of the RHS; in other terms, we stack another
reader on top of the existing monad stack.
As the schema is built in a "depth-first" way, in a complicated schema with a
lot of remote relationships we would end up with several readers stacked upon
one another. A manually run benchmark showed that this could significantly
impact performance in complicated schemas. We do now have a benchmark set to
replicate this specific case (see the "deep_schema" benchmark set for more
information).
To prevent this stacking, we need to be able to "bring back" the result of the
`runReaderT` back into the calling monad, rather than defaulting to having the
calling monad be the base of the reader. The simplest way of doing this is to
enforce that we are always building the schema in a monad stack that has the
reader on top of some arbitrary *shared* base. This gives us the guarantee that
the LHS of any remote relationship, the calling context for `runReaderT`, is
itself a `ReaderT` on top og that known shared base, meaning that after a call
to `runReaderT` on another part of the schema, we can always go back to the
calling monad with a simple `lift`, as demonstrated in
'remoteRelationshipField'.
-}
-- | The monad in which the schema is built.
--
-- The implementation of 'SchemaT' is intended to be opaque: running a
-- computation in 'SchemaT' is intended to be done via calls to
-- 'runSourceSchema' and 'runRemoteSchema', which also enforce what the @r@
-- parameter should be in each case.
--
-- The reason why we want to enforce that the schema is built in a reader on top
-- of an arbitrary base monad is for performance: see Note [SchemaT and
-- stacking] for more information.
--
-- In the future, we might monomorphize this further to make `MemoizeT` explicit.
newtype SchemaT r m a = SchemaT {runSchemaT :: ReaderT r m a}
deriving newtype (Functor, Applicative, Monad, MonadReader r, P.MonadMemoize, MonadTrans, MonadError e)
type MonadBuildSourceSchema b r m n =
( MonadBuildSchemaBase m n,
Has SchemaContext r,
Has SchemaOptions r,
Has (SourceInfo b) r
)
-- | Runs a schema-building computation with all the context required to build a source.
runSourceSchema ::
forall b m a.
SchemaContext ->
SchemaOptions ->
SourceInfo b ->
SchemaT
( SchemaContext,
SchemaOptions,
SourceInfo b
)
m
a ->
m a
runSourceSchema context options sourceInfo (SchemaT action) = runReaderT action (context, options, sourceInfo)
type MonadBuildRemoteSchema r m n =
( MonadBuildSchemaBase m n,
Has SchemaContext r,
Has CustomizeRemoteFieldName r,
Has MkTypename r
)
-- | Runs a schema-building computation with all the context required to build a remote schema.
runRemoteSchema ::
SchemaContext ->
SchemaT
( SchemaContext,
MkTypename,
CustomizeRemoteFieldName
)
m
a ->
m a
runRemoteSchema context (SchemaT action) = runReaderT action (context, mempty, mempty)
type MonadBuildActionSchema r m n =
( MonadBuildSchemaBase m n,
Has SchemaContext r,
Has SchemaOptions r
)
-- | Runs a schema-building computation with all the context required to build actions.
runActionSchema ::
SchemaContext ->
SchemaOptions ->
SchemaT
( SchemaContext,
SchemaOptions
)
m
a ->
m a
runActionSchema context options (SchemaT action) = runReaderT action (context, options)
-------------------------------------------------------------------------------
type SelectExp b = IR.AnnSimpleSelectG b (IR.RemoteRelationshipField IR.UnpreparedValue) (IR.UnpreparedValue b)
type StreamSelectExp b = IR.AnnSimpleStreamSelectG b (IR.RemoteRelationshipField IR.UnpreparedValue) (IR.UnpreparedValue b)
type AggSelectExp b = IR.AnnAggregateSelectG b (IR.RemoteRelationshipField IR.UnpreparedValue) (IR.UnpreparedValue b)
type ConnectionSelectExp b = IR.ConnectionSelect b (IR.RemoteRelationshipField IR.UnpreparedValue) (IR.UnpreparedValue b)
type SelectArgs b = IR.SelectArgsG b (IR.UnpreparedValue b)
type SelectStreamArgs b = IR.SelectStreamArgsG b (IR.UnpreparedValue b)
type TablePerms b = IR.TablePermG b (IR.UnpreparedValue b)
type AnnotatedFields b = IR.AnnFieldsG b (IR.RemoteRelationshipField IR.UnpreparedValue) (IR.UnpreparedValue b)
type AnnotatedField b = IR.AnnFieldG b (IR.RemoteRelationshipField IR.UnpreparedValue) (IR.UnpreparedValue b)
type ConnectionFields b = IR.ConnectionFields b (IR.RemoteRelationshipField IR.UnpreparedValue) (IR.UnpreparedValue b)
type EdgeFields b = IR.EdgeFields b (IR.RemoteRelationshipField IR.UnpreparedValue) (IR.UnpreparedValue b)
type AnnotatedActionFields = IR.ActionFieldsG (IR.RemoteRelationshipField IR.UnpreparedValue)
type AnnotatedActionField = IR.ActionFieldG (IR.RemoteRelationshipField IR.UnpreparedValue)
type AnnotatedNestedObjectSelect b = IR.AnnNestedObjectSelectG b (IR.RemoteRelationshipField IR.UnpreparedValue) (IR.UnpreparedValue b)
-------------------------------------------------------------------------------
data RemoteSchemaParser n = RemoteSchemaParser
{ piQuery :: [P.FieldParser n (NamespacedField (IR.RemoteSchemaRootField (IR.RemoteRelationshipField IR.UnpreparedValue) RemoteSchemaVariable))],
piMutation :: Maybe [P.FieldParser n (NamespacedField (IR.RemoteSchemaRootField (IR.RemoteRelationshipField IR.UnpreparedValue) RemoteSchemaVariable))],
piSubscription :: Maybe [P.FieldParser n (NamespacedField (IR.RemoteSchemaRootField (IR.RemoteRelationshipField IR.UnpreparedValue) RemoteSchemaVariable))]
}
getTableRoles :: BackendSourceInfo -> [RoleName]
getTableRoles bsi = AB.dispatchAnyBackend @Backend bsi go
where
go si = Map.keys . _tiRolePermInfoMap =<< Map.elems (_siTables si)
getCustomReturnTypeRoles :: BackendSourceInfo -> [RoleName]
getCustomReturnTypeRoles bsi = AB.dispatchAnyBackend @Backend bsi go
where
go si = Map.keys . _crtiPermissions =<< Map.elems (_siCustomReturnTypes si)
-- | Looks up table information for the given table name. This function
-- should never fail, since the schema cache construction process is
-- supposed to ensure all dependencies are resolved.
-- TODO: deduplicate this with `CacheRM`.
askTableInfo ::
forall b r m.
(Backend b, MonadError QErr m, MonadReader r m, Has (SourceInfo b) r) =>
TableName b ->
m (TableInfo b)
askTableInfo tableName = do
SourceInfo {..} <- asks getter
Map.lookup tableName _siTables
`onNothing` throw500 ("askTableInfo: no info for table " <> dquote tableName <> " in source " <> dquote _siName)
-- | Whether the request is sent with `x-hasura-use-backend-only-permissions` set to `true`.
data Scenario = Backend | Frontend deriving (Enum, Show, Eq)
textToName :: MonadError QErr m => Text -> m G.Name
textToName textName =
G.mkName textName
`onNothing` throw400
ValidationFailed
( "cannot include "
<> textName <<> " in the GraphQL schema because "
<> " it is not a valid GraphQL identifier"
)
textToGQLIdentifier :: MonadError QErr m => Text -> m GQLNameIdentifier
textToGQLIdentifier textName = do
let gqlIdents = do
(pref, suffs) <- uncons (C.fromSnake textName)
prefName <- G.mkName pref
suffNames <- traverse G.mkNameSuffix suffs
pure $ C.fromAutogeneratedTuple (prefName, suffNames)
gqlIdents
`onNothing` throw400
ValidationFailed
( "cannot include "
<> textName <<> " in the GraphQL schema because "
<> " it is not a valid GraphQL identifier"
)
partialSQLExpToUnpreparedValue :: PartialSQLExp b -> IR.UnpreparedValue b
partialSQLExpToUnpreparedValue (PSESessVar pftype var) = IR.UVSessionVar pftype var
partialSQLExpToUnpreparedValue PSESession = IR.UVSession
partialSQLExpToUnpreparedValue (PSESQLExp sqlExp) = IR.UVLiteral sqlExp
mapField ::
Functor m =>
P.InputFieldsParser m (Maybe a) ->
(a -> b) ->
P.InputFieldsParser m (Maybe b)
mapField fp f = fmap (fmap f) fp
parsedSelectionsToFields ::
-- | how to handle @__typename@ fields
(Text -> a) ->
OMap.InsOrdHashMap G.Name (P.ParsedSelection a) ->
Fields a
parsedSelectionsToFields mkTypenameFromText =
OMap.toList
>>> map (FieldName . G.unName *** P.handleTypename (mkTypenameFromText . G.unName))
numericAggOperators :: [C.GQLNameIdentifier]
numericAggOperators =
[ C.fromAutogeneratedName $$(G.litName "sum"),
C.fromAutogeneratedName $$(G.litName "avg"),
C.fromAutogeneratedName $$(G.litName "stddev"),
C.fromAutogeneratedTuple $$(G.litGQLIdentifier ["stddev", "samp"]),
C.fromAutogeneratedTuple $$(G.litGQLIdentifier ["stddev", "pop"]),
C.fromAutogeneratedName $$(G.litName "variance"),
C.fromAutogeneratedTuple $$(G.litGQLIdentifier ["var", "samp"]),
C.fromAutogeneratedTuple $$(G.litGQLIdentifier ["var", "pop"])
]
comparisonAggOperators :: [C.GQLNameIdentifier]
comparisonAggOperators =
[ C.fromAutogeneratedName $$(G.litName "max"),
C.fromAutogeneratedName $$(G.litName "min")
]
mkDescriptionWith :: Maybe Postgres.PGDescription -> Text -> G.Description
mkDescriptionWith descM defaultTxt = G.Description $ case descM of
Nothing -> defaultTxt
Just (Postgres.PGDescription descTxt) -> T.unlines [descTxt, "\n", defaultTxt]
-- TODO why do we do these validations at this point? What does it mean to track
-- a function but not add it to the schema...?
-- Auke:
-- I believe the intention is simply to allow the console to do postgres data management
-- Karthikeyan: Yes, this is correct. We allowed this pre PDV but somehow
-- got removed in PDV. OTOH, Im not sure how prevalent this feature
-- actually is
takeValidTables :: forall b. Backend b => TableCache b -> TableCache b
takeValidTables = Map.filterWithKey graphQLTableFilter
where
graphQLTableFilter tableName tableInfo =
-- either the table name should be GraphQL compliant
-- or it should have a GraphQL custom name set with it
isRight (tableGraphQLName @b tableName)
|| isJust (_tcCustomName $ _tciCustomConfig $ _tiCoreInfo tableInfo)
-- TODO and what about graphql-compliant function names here too?
takeValidFunctions :: forall b. FunctionCache b -> FunctionCache b
takeValidFunctions = Map.filter functionFilter
where
functionFilter = not . isSystemDefined . _fiSystemDefined
-- | Currently we do no validation on logical models in schema. Should we?
takeValidLogicalModels :: forall b. LogicalModelCache b -> LogicalModelCache b
takeValidLogicalModels = id
-- root field builder helpers
requiredFieldParser ::
(Functor n, Functor m) =>
(a -> b) ->
m (P.FieldParser n a) ->
m (Maybe (P.FieldParser n b))
requiredFieldParser f = fmap $ Just . fmap f
optionalFieldParser ::
(Functor n, Functor m) =>
(a -> b) ->
m (Maybe (P.FieldParser n a)) ->
m (Maybe (P.FieldParser n b))
optionalFieldParser = fmap . fmap . fmap
-- | Builds the type name for referenced enum tables.
mkEnumTypeName :: forall b r m. (Backend b, MonadError QErr m, Has (SourceInfo b) r) => TableName b -> Maybe G.Name -> SchemaT r m G.Name
mkEnumTypeName enumTableName enumTableCustomName = do
customization <- retrieve $ _siCustomization @b
enumTableGQLName <- getTableIdentifier @b enumTableName `onLeft` throwError
pure $ addEnumSuffix customization enumTableGQLName enumTableCustomName
addEnumSuffix :: ResolvedSourceCustomization -> GQLNameIdentifier -> Maybe G.Name -> G.Name
addEnumSuffix customization enumTableGQLName enumTableCustomName =
runMkTypename (_rscTypeNames customization) $
applyTypeNameCaseIdentifier (_rscNamingConvention customization) $
mkEnumTableTypeName enumTableGQLName enumTableCustomName
-- TODO: figure out what the purpose of this method is.
peelWithOrigin :: P.MonadParse m => P.Parser 'P.Both m a -> P.Parser 'P.Both m (IR.ValueWithOrigin a)
peelWithOrigin parser =
parser
{ P.pParser = \case
P.GraphQLValue (G.VVariable var@P.Variable {vInfo, vValue}) -> do
-- Check types c.f. 5.8.5 of the June 2018 GraphQL spec
P.typeCheck False (P.toGraphQLType $ P.pType parser) var
IR.ValueWithOrigin vInfo <$> P.pParser parser (absurd <$> vValue)
value -> IR.ValueNoOrigin <$> P.pParser parser value
}
getIntrospectionResult :: Options.RemoteSchemaPermissions -> RoleName -> RemoteSchemaCtxG remoteFieldInfo -> Maybe IntrospectionResult
getIntrospectionResult remoteSchemaPermsCtx role remoteSchemaContext =
if
| -- admin doesn't have a custom annotated introspection, defaulting to the original one
role == adminRoleName ->
pure $ _rscIntroOriginal remoteSchemaContext
| -- if permissions are disabled, the role map will be empty, defaulting to the original one
remoteSchemaPermsCtx == Options.DisableRemoteSchemaPermissions ->
pure $ _rscIntroOriginal remoteSchemaContext
| -- otherwise, look the role up in the map; if we find nothing, then the role doesn't have access
otherwise ->
Map.lookup role (_rscPermissions remoteSchemaContext)