graphql-engine/server/src-lib/Hasura/Backends/BigQuery/Instances/Execute.hs

254 lines
9.3 KiB
Haskell
Raw Normal View History

{-# OPTIONS_GHC -fno-warn-orphans #-}
module Hasura.Backends.BigQuery.Instances.Execute () where
import Data.Aeson qualified as J
import Data.Aeson.Text qualified as J
import Data.HashMap.Strict qualified as HashMap
import Data.HashMap.Strict.InsOrd qualified as OMap
import Data.Text qualified as T
import Data.Text.Lazy qualified as LT
import Data.Text.Lazy.Builder qualified as LT
import Data.Vector qualified as V
import Hasura.Backends.BigQuery.Execute qualified as DataLoader
import Hasura.Backends.BigQuery.FromIr qualified as BigQuery
import Hasura.Backends.BigQuery.Plan
import Hasura.Backends.BigQuery.ToQuery qualified as ToQuery
import Hasura.Backends.BigQuery.Types qualified as BigQuery
import Hasura.Base.Error
import Hasura.Base.Error qualified as E
import Hasura.EncJSON
import Hasura.Function.Cache
import Hasura.GraphQL.Execute.Backend
import Hasura.GraphQL.Namespace (RootFieldAlias)
import Hasura.Prelude
import Hasura.QueryTags
( emptyQueryTagsComment,
)
import Hasura.RQL.IR
import Hasura.RQL.IR.Select qualified as IR
import Hasura.RQL.IR.Value qualified as IR (Provenance (Unknown))
import Hasura.RQL.Types.Backend
import Hasura.RQL.Types.BackendType
import Hasura.RQL.Types.Column
import Hasura.RQL.Types.Common
import Hasura.RQL.Types.Schema.Options qualified as Options
import Hasura.SQL.AnyBackend qualified as AB
import Hasura.Session
import Language.GraphQL.Draft.Syntax qualified as G
import Network.HTTP.Types qualified as HTTP
instance BackendExecute 'BigQuery where
type PreparedQuery 'BigQuery = Text
type MultiplexedQuery 'BigQuery = Void
Allow backend execution to happen on the base app monad. ### Description Each Backend executes queries against the database in a slightly different stack: Postgres uses its own `TXeT`, MSSQL uses a variant of it, BigQuery is simply in `ExceptT QErr IO`... To accommodate those variations, we had originally introduced an `ExecutionMonad b` type family in `BackendExecute`, allowing each backend to describe its own stack. It was then up to that backend's `BackendTransport` instance to implement running said stack, and converting the result back into our main app monad. However, this was not without complications: `TraceT` is one of them: as it usually needs to be on the top of the stack, converting from one stack to the other implies the use `interpTraceT`, which is quite monstrous. Furthermore, as part of the Entitlement Services work, we're trying to move to a "Services" architecture in which the entire engine runs in one base monad, that delegates features and dependencies to monad constraints; and as a result we'd like to minimize the number of different monad stacks we have to maintain and translate from and to in the codebase. To improve things, this PR changes `ExecutionMonad b` from an _absolute_ stack to a _relative_ one: i.e.: what needs to be stacked on top of our base monad for the execution. In `Transport`, we then only need to pop the top of the stack, and voila. This greatly simplifies the implementation of the backends, as there's no longer any need to do any stack transformation: MySQL's implementation becomes a `runIdentityT`! This also removes most mentions of `TraceT` from the execution code since it's no longer required: we can rely on the base monad's existing `MonadTrace` constraint. To continue encapsulating monadic actions in `DBStepInfo` and avoid threading a bunch of `forall` all over the place, this PR introduces a small local helper: `OnBaseMonad`. One only downside of all this is that this requires adding `MonadBaseControl IO m` constraint all over the place: previously, we would run directly on `IO` and lift, and would therefore not need to bring that constraint all the way. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7789 GitOrigin-RevId: e9b2e431c5c47fa9851abf87545c0415ff6d1a12
2023-02-09 17:38:33 +03:00
type ExecutionMonad 'BigQuery = IdentityT
mkDBQueryPlan = bqDBQueryPlan
mkDBMutationPlan = bqDBMutationPlan
mkLiveQuerySubscriptionPlan _ _ _ _ _ _ _ =
throw500 "Cannot currently perform subscriptions on BigQuery sources."
mkDBStreamingSubscriptionPlan _ _ _ _ _ _ =
throw500 "Cannot currently perform subscriptions on BigQuery sources."
mkDBQueryExplain = bqDBQueryExplain
mkSubscriptionExplain _ =
throw500 "Cannot currently retrieve query execution plans on BigQuery sources."
-- NOTE: Currently unimplemented!.
--
-- This function is just a stub for future implementation; for now it just
-- throws a 500 error.
mkDBRemoteRelationshipPlan =
bqDBRemoteRelationshipPlan
-- query
bqDBQueryPlan ::
forall m.
( MonadError E.QErr m
) =>
UserInfo ->
SourceName ->
SourceConfig 'BigQuery ->
QueryDB 'BigQuery Void (UnpreparedValue 'BigQuery) ->
[HTTP.Header] ->
Maybe G.Name ->
m (DBStepInfo 'BigQuery)
bqDBQueryPlan userInfo sourceName sourceConfig qrf _ _ = do
-- TODO (naveen): Append query tags to the query
select <- planNoPlan (BigQuery.bigQuerySourceConfigToFromIrConfig sourceConfig) userInfo qrf
Allow backend execution to happen on the base app monad. ### Description Each Backend executes queries against the database in a slightly different stack: Postgres uses its own `TXeT`, MSSQL uses a variant of it, BigQuery is simply in `ExceptT QErr IO`... To accommodate those variations, we had originally introduced an `ExecutionMonad b` type family in `BackendExecute`, allowing each backend to describe its own stack. It was then up to that backend's `BackendTransport` instance to implement running said stack, and converting the result back into our main app monad. However, this was not without complications: `TraceT` is one of them: as it usually needs to be on the top of the stack, converting from one stack to the other implies the use `interpTraceT`, which is quite monstrous. Furthermore, as part of the Entitlement Services work, we're trying to move to a "Services" architecture in which the entire engine runs in one base monad, that delegates features and dependencies to monad constraints; and as a result we'd like to minimize the number of different monad stacks we have to maintain and translate from and to in the codebase. To improve things, this PR changes `ExecutionMonad b` from an _absolute_ stack to a _relative_ one: i.e.: what needs to be stacked on top of our base monad for the execution. In `Transport`, we then only need to pop the top of the stack, and voila. This greatly simplifies the implementation of the backends, as there's no longer any need to do any stack transformation: MySQL's implementation becomes a `runIdentityT`! This also removes most mentions of `TraceT` from the execution code since it's no longer required: we can rely on the base monad's existing `MonadTrace` constraint. To continue encapsulating monadic actions in `DBStepInfo` and avoid threading a bunch of `forall` all over the place, this PR introduces a small local helper: `OnBaseMonad`. One only downside of all this is that this requires adding `MonadBaseControl IO m` constraint all over the place: previously, we would run directly on `IO` and lift, and would therefore not need to bring that constraint all the way. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7789 GitOrigin-RevId: e9b2e431c5c47fa9851abf87545c0415ff6d1a12
2023-02-09 17:38:33 +03:00
let action = OnBaseMonad do
result <-
DataLoader.runExecute
sourceConfig
(DataLoader.executeSelect select)
case result of
Left err -> throw500WithDetail (DataLoader.executeProblemMessage DataLoader.HideDetails err) $ J.toJSON err
Right (job, recordSet) -> pure ActionResult {arStatistics = Just BigQuery.ExecutionStatistics {_esJob = job}, arResult = recordSetToEncJSON (BigQuery.selectCardinality select) recordSet}
pure $ DBStepInfo @'BigQuery sourceName sourceConfig (Just (selectSQLTextForExplain select)) action ()
-- | Convert the dataloader's 'RecordSet' type to JSON.
recordSetToEncJSON :: BigQuery.Cardinality -> DataLoader.RecordSet -> EncJSON
recordSetToEncJSON cardinality DataLoader.RecordSet {rows} =
case cardinality of
BigQuery.One
| Just row <- rows V.!? 0 -> encJFromRecord row
| otherwise -> encJFromList (toList (fmap encJFromRecord rows))
BigQuery.Many -> encJFromList (toList (fmap encJFromRecord rows))
where
encJFromRecord =
encJFromInsOrdHashMap . fmap encJFromOutputValue . OMap.mapKeys coerce
encJFromOutputValue outputValue =
case outputValue of
DataLoader.NullOutputValue -> encJFromJValue J.Null
DataLoader.DecimalOutputValue i -> encJFromJValue i
DataLoader.BigDecimalOutputValue i -> encJFromJValue i
DataLoader.FloatOutputValue i -> encJFromJValue i
DataLoader.TextOutputValue i -> encJFromJValue i
DataLoader.BytesOutputValue i -> encJFromJValue i
DataLoader.DateOutputValue i -> encJFromJValue i
DataLoader.TimestampOutputValue i -> encJFromJValue i
DataLoader.TimeOutputValue i -> encJFromJValue i
DataLoader.DatetimeOutputValue i -> encJFromJValue i
DataLoader.GeographyOutputValue i -> encJFromJValue i
DataLoader.BoolOutputValue i -> encJFromJValue i
DataLoader.IntegerOutputValue i -> encJFromJValue i
DataLoader.JsonOutputValue i -> encJFromJValue i
DataLoader.ArrayOutputValue vector ->
encJFromList (toList (fmap encJFromOutputValue vector))
-- Really, the case below shouldn't be happening. But if it
-- does, it's not a problem either. The output will just have
-- a record in it.
DataLoader.RecordOutputValue record -> encJFromRecord record
-- mutation
bqDBMutationPlan ::
forall m.
( MonadError E.QErr m
) =>
UserInfo ->
Options.StringifyNumbers ->
SourceName ->
SourceConfig 'BigQuery ->
MutationDB 'BigQuery Void (UnpreparedValue 'BigQuery) ->
[HTTP.Header] ->
Maybe G.Name ->
m (DBStepInfo 'BigQuery)
bqDBMutationPlan _userInfo _stringifyNum _sourceName _sourceConfig _mrf _headers _gName =
throw500 "mutations are not supported in BigQuery; this should be unreachable"
-- explain
bqDBQueryExplain ::
MonadError E.QErr m =>
RootFieldAlias ->
UserInfo ->
SourceName ->
SourceConfig 'BigQuery ->
QueryDB 'BigQuery Void (UnpreparedValue 'BigQuery) ->
[HTTP.Header] ->
Maybe G.Name ->
m (AB.AnyBackend DBStepInfo)
bqDBQueryExplain fieldName userInfo sourceName sourceConfig qrf _ _ = do
select <- planNoPlan (BigQuery.bigQuerySourceConfigToFromIrConfig sourceConfig) userInfo qrf
let textSQL = selectSQLTextForExplain select
pure $
AB.mkAnyBackend $
DBStepInfo @'BigQuery
sourceName
sourceConfig
Nothing
Allow backend execution to happen on the base app monad. ### Description Each Backend executes queries against the database in a slightly different stack: Postgres uses its own `TXeT`, MSSQL uses a variant of it, BigQuery is simply in `ExceptT QErr IO`... To accommodate those variations, we had originally introduced an `ExecutionMonad b` type family in `BackendExecute`, allowing each backend to describe its own stack. It was then up to that backend's `BackendTransport` instance to implement running said stack, and converting the result back into our main app monad. However, this was not without complications: `TraceT` is one of them: as it usually needs to be on the top of the stack, converting from one stack to the other implies the use `interpTraceT`, which is quite monstrous. Furthermore, as part of the Entitlement Services work, we're trying to move to a "Services" architecture in which the entire engine runs in one base monad, that delegates features and dependencies to monad constraints; and as a result we'd like to minimize the number of different monad stacks we have to maintain and translate from and to in the codebase. To improve things, this PR changes `ExecutionMonad b` from an _absolute_ stack to a _relative_ one: i.e.: what needs to be stacked on top of our base monad for the execution. In `Transport`, we then only need to pop the top of the stack, and voila. This greatly simplifies the implementation of the backends, as there's no longer any need to do any stack transformation: MySQL's implementation becomes a `runIdentityT`! This also removes most mentions of `TraceT` from the execution code since it's no longer required: we can rely on the base monad's existing `MonadTrace` constraint. To continue encapsulating monadic actions in `DBStepInfo` and avoid threading a bunch of `forall` all over the place, this PR introduces a small local helper: `OnBaseMonad`. One only downside of all this is that this requires adding `MonadBaseControl IO m` constraint all over the place: previously, we would run directly on `IO` and lift, and would therefore not need to bring that constraint all the way. PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7789 GitOrigin-RevId: e9b2e431c5c47fa9851abf87545c0415ff6d1a12
2023-02-09 17:38:33 +03:00
( OnBaseMonad $
pure $
withNoStatistics $
encJFromJValue $
ExplainPlan
fieldName
(Just $ textSQL)
(Just $ T.lines $ textSQL)
)
()
-- | Get the SQL text for a select, with parameters left as $1, $2, .. holes.
selectSQLTextForExplain :: BigQuery.Select -> Text
selectSQLTextForExplain =
LT.toStrict
. LT.toLazyText
. fst
. ToQuery.renderBuilderPretty
. ToQuery.fromSelect
--------------------------------------------------------------------------------
-- Remote Relationships (e.g. DB-to-DB Joins, remote schema joins, etc.)
--------------------------------------------------------------------------------
-- | Construct an action (i.e. 'DBStepInfo') which can marshal some remote
-- relationship information into a form that BigQuery can query against.
--
-- XXX: Currently unimplemented; the Postgres implementation uses
-- @jsonb_to_recordset@ to query the remote relationship, however this
-- functionality doesn't exist in BigQuery.
--
-- NOTE: The following typeclass constraints will be necessary when implementing
-- this function for real:
--
-- @
-- MonadQueryTags m
-- Backend 'BigQuery
-- @
bqDBRemoteRelationshipPlan ::
forall m.
( MonadError QErr m
) =>
UserInfo ->
SourceName ->
SourceConfig 'BigQuery ->
-- | List of json objects, each of which becomes a row of the table.
NonEmpty J.Object ->
-- | The above objects have this schema
--
-- XXX: What is this for/what does this mean?
HashMap FieldName (Column 'BigQuery, ScalarType 'BigQuery) ->
-- | This is a field name from the lhs that *has* to be selected in the
-- response along with the relationship.
FieldName ->
(FieldName, SourceRelationshipSelection 'BigQuery Void UnpreparedValue) ->
[HTTP.Header] ->
Maybe G.Name ->
Options.StringifyNumbers ->
m (DBStepInfo 'BigQuery)
bqDBRemoteRelationshipPlan userInfo sourceName sourceConfig lhs lhsSchema argumentId relationship reqHeaders operationName stringifyNumbers = do
flip runReaderT emptyQueryTagsComment $ bqDBQueryPlan userInfo sourceName sourceConfig rootSelection reqHeaders operationName
where
coerceToColumn = BigQuery.ColumnName . getFieldNameTxt
joinColumnMapping = mapKeys coerceToColumn lhsSchema
rowsArgument :: UnpreparedValue 'BigQuery
rowsArgument =
UVParameter IR.Unknown $
ColumnValue (ColumnScalar BigQuery.StringScalarType) $
BigQuery.StringValue . LT.toStrict $
J.encodeToLazyText lhs
recordSetDefinitionList =
(coerceToColumn argumentId, BigQuery.IntegerScalarType) : HashMap.toList (fmap snd joinColumnMapping)
jsonToRecordSet :: IR.SelectFromG ('BigQuery) (UnpreparedValue 'BigQuery)
jsonToRecordSet =
IR.FromFunction
(BigQuery.FunctionName "unnest" Nothing)
( FunctionArgsExp
[BigQuery.AEInput rowsArgument]
mempty
)
(Just recordSetDefinitionList)
rootSelection =
convertRemoteSourceRelationship
(fst <$> joinColumnMapping)
jsonToRecordSet
(BigQuery.ColumnName $ getFieldNameTxt argumentId)
(ColumnScalar BigQuery.IntegerScalarType)
relationship
stringifyNumbers