server: query remote server in a remote join query iff all arguments are not null (#31)

* server: query remote server in a remote join query iff all arguments are not null

Co-authored-by: Karthikeyan Chinnakonda <karthikeyan@hasura.io>
GITHUB_PR_NUMBER: 6199
GITHUB_PR_URL: https://github.com/hasura/graphql-engine/pull/6199

* Update server/src-lib/Hasura/Backends/Postgres/Execute/RemoteJoin.hs

Co-authored-by: Brandon Simmons <brandon@hasura.io>

* Apply suggestions from code review

Co-authored-by: Brandon Simmons <brandon@hasura.io>

* Update server/tests-py/test_remote_relationships.py

Co-authored-by: Brandon Simmons <brandon@hasura.io>

* Apply suggestions from code review

Co-authored-by: Brandon Simmons <brandon@hasura.io>

* use guard instead of case match

* add comment about the remote relationship joining

* add a comment about the design discussion

* update CHANGELOG

Co-authored-by: Karthikeyan Chinnakonda <karthikeyan@hasura.io>
Co-authored-by: Brandon Simmons <brandon@hasura.io>
GitOrigin-RevId: ce7e8288d37ad7c32705f96cb6363f6ad68f3c0a
This commit is contained in:
hasura-bot 2020-11-18 14:29:16 +05:30
parent 320835a6f2
commit 8eebcb9bdf
8 changed files with 170 additions and 22 deletions

View File

@ -123,6 +123,26 @@ const newTable: TableEntry = {
![](./contrib/metadata-types/json-schema-typecheck-demo.gif)
### Breaking changes
#### PDV
This release contains the [PDV refactor (#4111)](https://github.com/hasura/graphql-engine/pull/4111), a significant rewrite of the internals of the server, which did include some breaking changes:
- The semantics of explicit `null` values in `where` filters have changed according to the discussion in [issue 704](https://github.com/hasura/graphql-engine/issues/704#issuecomment-635571407): an explicit `null` value in a comparison input object will be treated as an error rather than resulting in the expression being evaluated to `True`. For instance: `delete_users(where: {id: {_eq: $userId}}) { name }` will yield an error if `$userId` is `null` instead of deleting all users.
- The validation of required headers has been fixed (closing #14 and #3659):
- if a query selects table `bar` through table `foo` via a relationship, the required permissions headers will be the union of the required headers of table `foo` and table `bar` (we used to only check the headers of the root table);
- if an insert does not have an `on_conflict` clause, it will not require the update permissions headers.
#### Remote Relationship
In this release, a breaking change has been introduced:
In a remote relationship query, the remote schema will be queried when all of the joining arguments
are **not** `null` values. When there are `null` value(s), the remote schema won't be queried and the
response of the remote relationship field will be `null`. Earlier, the remote schema
was queried with the `null` value arguments and the response depended upon how the remote schema handled the `null`
arguments.
### Bug fixes and improvements

View File

@ -49,7 +49,6 @@ import Hasura.RQL.Types
import Hasura.Server.Version (HasVersion)
import Hasura.Session
-- | Executes given query and fetch response JSON from Postgres. Substitutes remote relationship fields.
executeQueryWithRemoteJoins
:: ( HasVersion
@ -89,7 +88,8 @@ processRemoteJoins env manager reqHdrs userInfo pgRes rjs = do
jsonRes <- onLeft (AO.eitherDecode pgRes) (throw500 . T.pack)
-- Step 2: Traverse through the JSON obtained in above step and generate composite JSON value with remote joins
compositeJson <- traverseQueryResponseJSON rjMap jsonRes
let remoteJoins = collectRemoteFields compositeJson
-- The remote server is queried only when all of the joining fields are *not* NULL:
let remoteJoins = catMaybes $ collectRemoteFields compositeJson
-- Step 3: Make queries to remote server and fetch graphql response
remoteServerResp <- fetchRemoteJoinFields env manager reqHdrs userInfo remoteJoins
-- Step 4: Replace remote fields in composite json with remote join values
@ -313,7 +313,7 @@ data RemoteJoinField
-- from remote join map and query response JSON from Postgres.
traverseQueryResponseJSON
:: (MonadError QErr m)
=> RemoteJoinMap 'Postgres -> AO.Value -> m (CompositeValue RemoteJoinField)
=> RemoteJoinMap 'Postgres -> AO.Value -> m (CompositeValue (Maybe RemoteJoinField))
traverseQueryResponseJSON rjm =
flip runReaderT rjm . flip evalStateT (Counter 0) . traverseValue mempty
where
@ -322,24 +322,72 @@ traverseQueryResponseJSON rjm =
askRemoteJoins path = asks (Map.lookup path)
traverseValue :: (MonadError QErr m, MonadReader (RemoteJoinMap 'Postgres) m, MonadState Counter m)
=> FieldPath -> AO.Value -> m (CompositeValue RemoteJoinField)
=> FieldPath -> AO.Value -> m (CompositeValue (Maybe RemoteJoinField))
traverseValue path = \case
AO.Object obj -> traverseObject obj
AO.Array arr -> CVObjectArray <$> mapM (traverseValue path) (toList arr)
v -> pure $ CVOrdValue v
where
mkRemoteSchemaField siblingFields remoteJoin = do
mkRemoteSchemaField
:: (MonadError QErr m, MonadState Counter m)
=> AO.Object
-> RemoteJoin 'Postgres
-> m (Maybe RemoteJoinField)
mkRemoteSchemaField siblingFields remoteJoin = runMaybeT $ do
counter <- getCounter
let RemoteJoin fieldName inputArgs selSet hasuraFields fieldCall rsi _ = remoteJoin
hasuraFieldVariables <- mapM (parseGraphQLName . getFieldNameTxt) $ toList hasuraFields
siblingFieldArgsVars <- mapM (\(k,val) -> do
(,) <$> parseGraphQLName k <*> ordJSONValueToGValue val)
$ siblingFields
-- when any of the joining fields are `NULL`, we don't query
-- the remote schema
--
-- The rationale for performing such a join is that, postgres
-- implements joins in the same way. For example:
-- Let's say we have the following tables
--
-- test_db=# select * from users;
-- id | first_name | last_name
-- ----+------------+-----------
-- 4 | foo |
-- 5 | baz | bar
-- 6 | hello |
-- (3 rows)
-- test_db=# select * from address;
-- id | first_name | last_name | address
-- ----+------------+-----------+----------
-- 4 | foo | | address1
-- 5 | baz | bar | address2
-- 6 | | hello | address3
--
-- Executing the following query:
-- select u.first_name,u.last_name,a.address
-- from users u
-- left join address a on u.first_name = a.first_name
-- and u.last_name = a.last_name;
--
-- gives the following result:
--
-- first_name | last_name | address
-- -----------+-----------+----------
-- baz | bar | address2
-- foo | |
-- hello | |
--
-- The data from the `address` table is fetched only when all
-- of the arguments are **NOT** NULL.
--
-- For discussion of this design here
-- see: https://github.com/hasura/graphql-engine-mono/pull/31#issuecomment-728230307
for_ hasuraFields $ \(FieldName fieldNameTxt) -> do
fldValue <- hoistMaybe $ AO.lookup fieldNameTxt siblingFields
guard $ fldValue /= AO.Null
hasuraFieldVariables <- lift $ mapM (parseGraphQLName . getFieldNameTxt) $ toList hasuraFields
siblingFieldArgsVars <- lift $ mapM (\(k,val) -> do
(,) <$> parseGraphQLName k <*> ordJSONValueToGValue val)
$ AO.toList siblingFields
let siblingFieldArgs = Map.fromList $ siblingFieldArgsVars
hasuraFieldArgs = flip Map.filterWithKey siblingFieldArgs $ \k _ -> k `elem` hasuraFieldVariables
fieldAlias <- pathToAlias (appendPath fieldName path) counter
queryField <- fieldCallsToField (inputArgsToMap inputArgs) hasuraFieldArgs selSet fieldAlias fieldCall
fieldAlias <- lift $ pathToAlias (appendPath fieldName path) counter
queryField <- lift $ fieldCallsToField (inputArgsToMap inputArgs) hasuraFieldArgs selSet fieldAlias fieldCall
pure $ RemoteJoinField rsi
fieldAlias
queryField
@ -364,9 +412,9 @@ traverseQueryResponseJSON rjm =
phantomColumnFields = map (fromPGCol . pgiColumn) $
concatMap _rjPhantomFields remoteJoins
if | fieldName `elem` phantomColumnFields -> pure Nothing
| otherwise ->
| otherwise -> do
case find ((== fieldName) . _rjName) remoteJoins of
Just rj -> Just . CVFromRemote <$> mkRemoteSchemaField fields rj
Just rj -> Just . CVFromRemote <$> mkRemoteSchemaField obj rj
Nothing -> Just <$> traverseValue fieldPath value
pure $ CVObject $ OMap.fromList processedFields
@ -499,16 +547,18 @@ fetchRemoteJoinFields env manager reqHdrs userInfo remoteJoins = do
-- | Replace 'RemoteJoinField' in composite JSON with it's json value from remote server response.
replaceRemoteFields
:: MonadError QErr m
=> CompositeValue RemoteJoinField
=> CompositeValue (Maybe RemoteJoinField)
-> AO.Object
-> m AO.Value
replaceRemoteFields compositeJson remoteServerResponse =
compositeValueToJSON <$> traverse replaceValue compositeJson
where
replaceValue rj = do
let alias = _rjfAlias rj
fieldCall = _rjfFieldCall rj
extractAtPath (alias:fieldCall) $ AO.Object remoteServerResponse
-- `Nothing` below signifies that at-least one of the joining fields was NULL
-- , when that happens we have to manually insert the `NULL` value for the
-- remoteField value in the response.
replaceValue Nothing = pure $ AO.Null
replaceValue (Just RemoteJoinField {_rjfAlias, _rjfFieldCall}) =
extractAtPath (_rjfAlias:_rjfFieldCall) $ AO.Object remoteServerResponse
-- | 'FieldCall' is path to remote relationship value in remote server response.
-- 'extractAtPath' traverse through the path and extracts the json value
@ -606,9 +656,8 @@ substituteVariables values = traverse go
where
go = \case
G.VVariable variableName ->
case Map.lookup variableName values of
Nothing -> Failure ["Value for variable " <> variableName <<> " not provided"]
Just value -> pure value
onNothing (Map.lookup variableName values) $
Failure ["Value for variable " <> variableName <<> " not provided"]
G.VList listValue ->
fmap G.VList (traverse go listValue)
G.VObject objectValue ->

View File

@ -15,6 +15,7 @@ module Hasura.Prelude
, base64Decode
, spanMaybeM
, liftEitherM
, hoistMaybe
-- * Efficient coercions
, coerce
, findWithIndex
@ -179,3 +180,7 @@ startTimer = do
return $ do
aft <- liftIO Clock.getMonotonicTimeNSec
return $ nanoseconds $ fromIntegral (aft - bef)
-- copied from http://hackage.haskell.org/package/errors-2.3.0/docs/src/Control.Error.Util.html#hoistMaybe
hoistMaybe :: (Monad m) => Maybe b -> MaybeT m b
hoistMaybe = MaybeT . return

View File

@ -0,0 +1,33 @@
description: Remote join with null joining fields
url: /v1/graphql
status: 200
response:
data:
employees:
- id: 1
name: alice
employeeMessages:
- id: 1
name: alice
msg: You win!
- id: 2
name: null
# since the join argument (name) was `NULL`, we don't perform the remote query
# and just return `null` for the joined field:
employeeMessages: null
- id: 3
name: bob
employeeMessages: []
query:
query: |
query {
employees {
id
name
employeeMessages {
id
name
msg
}
}
}

View File

@ -52,3 +52,17 @@ args:
args:
schema: public
name: authors
- type: run_sql
args:
sql: |
create table employees (
id serial primary key,
name text
);
insert into employees (name) values ('alice'),(NULL),('bob');
- type: track_table
args:
schema: public
name: employees

View File

@ -0,0 +1,16 @@
type: create_remote_relationship
args:
name: employeeMessages
table: employees
hasura_fields:
- id
- name
remote_schema: my-remote-schema
remote_field:
messages:
arguments:
where:
name:
eq: "$name"
includes:
id: ["$id"]

View File

@ -15,6 +15,11 @@ args:
sql: |
drop table if exists authors
- type: run_sql
args:
sql: |
drop table if exists employees
# also drops remote relationship as direct dep
- type: remove_remote_schema
args:

View File

@ -186,6 +186,12 @@ class TestExecution:
assert st_code == 200, resp
check_query_f(hge_ctx, self.dir() + 'basic_multiple_fields.yaml')
# https://github.com/hasura/graphql-engine/issues/5448
def test_remote_join_fields_with_null_joining_fields(self, hge_ctx):
st_code, resp = hge_ctx.v1q_f(self.dir() + 'setup_remote_rel_null_joining_fields.yaml')
assert st_code == 200, resp
check_query_f(hge_ctx, self.dir() + 'remote_rel_with_null_joining_fields.yaml')
def test_nested_fields(self, hge_ctx):
st_code, resp = hge_ctx.v1q_f(self.dir() + 'setup_remote_rel_nested_fields.yaml')
assert st_code == 200, resp