graphql-engine/server/src-lib/Hasura/Backends/BigQuery/DDL/Source.hs

181 lines
7.0 KiB
Haskell
Raw Normal View History

{-# LANGUAGE DuplicateRecordFields #-}
module Hasura.Backends.BigQuery.DDL.Source
( resolveSource,
postDropSourceHook,
resolveSourceConfig,
restTypeToScalarType,
)
where
import Data.Aeson qualified as J
import Data.ByteString.Lazy qualified as L
import Data.Environment qualified as Env
import Data.HashMap.Strict.Extended qualified as HM
import Data.Int qualified as Int
import Data.Text qualified as T
import Data.Text.Encoding qualified as T
import Data.Time.Clock.System
import Hasura.Backends.BigQuery.Connection
import Hasura.Backends.BigQuery.Meta
import Hasura.Backends.BigQuery.Source
import Hasura.Backends.BigQuery.Types
import Hasura.Base.Error
import Hasura.Function.Cache (FunctionOverloads (..))
import Hasura.Prelude
import Hasura.RQL.Types.Backend (BackendConfig)
import Hasura.RQL.Types.Column
import Hasura.RQL.Types.Common
import Hasura.RQL.Types.Source
import Hasura.RQL.Types.Table
import Hasura.SQL.Backend
defaultGlobalSelectLimit :: Int.Int64
defaultGlobalSelectLimit = 1000
defaultRetryLimit :: Int
defaultRetryLimit = 5
defaultRetryBaseDelay :: Microseconds
defaultRetryBaseDelay = 500000
resolveSourceConfig ::
MonadIO m =>
SourceName ->
BigQueryConnSourceConfig ->
BackendSourceKind 'BigQuery ->
BackendConfig 'BigQuery ->
Env.Environment ->
manager ->
m (Either QErr BigQuerySourceConfig)
resolveSourceConfig _name BigQueryConnSourceConfig {..} _backendKind _backendConfig env _manager = runExceptT $ do
eSA <- resolveConfigurationJson env _cscServiceAccount
case eSA of
Left e -> throw400 Unexpected $ T.pack e
Right serviceAccount -> do
projectId <- BigQueryProjectId <$> resolveConfigurationInput env _cscProjectId
retryOptions <- do
numRetries <-
resolveConfigurationInput env `mapM` _cscRetryLimit >>= \case
Nothing -> pure defaultRetryLimit
Just v -> readNonNegative v "retry limit"
if numRetries == 0
then pure Nothing
else do
let _retryNumRetries = numRetries
_retryBaseDelay <-
resolveConfigurationInput env `mapM` _cscRetryBaseDelay >>= \case
Nothing -> pure defaultRetryBaseDelay
Just v -> fromInteger <$> readNonNegative v "retry base delay"
pure $ Just RetryOptions {..}
_scConnection <- initConnection serviceAccount projectId retryOptions
_scDatasets <- fmap BigQueryDataset <$> resolveConfigurationInputs env _cscDatasets
_scGlobalSelectLimit <-
resolveConfigurationInput env `mapM` _cscGlobalSelectLimit >>= \case
Nothing -> pure defaultGlobalSelectLimit
Just v -> readNonNegative v "global select limit"
pure BigQuerySourceConfig {..}
readNonNegative :: (MonadError QErr m, Num a, Ord a, J.FromJSON a, Read a) => Text -> Text -> m a
readNonNegative i paramName =
-- This works around the inconsistency between JSON and
-- environment variables. The config handling module should be
-- reworked to handle non-text values better.
case readMaybe (T.unpack i) <|> J.decode (L.fromStrict (T.encodeUtf8 i)) of
Nothing -> throw400 Unexpected $ "Need a non-negative integer for " <> paramName
Just i' -> do
when (i' < 0) $ throw400 Unexpected $ "Need the integer for the " <> paramName <> " to be non-negative"
pure i'
resolveSource ::
(MonadIO m) =>
BigQuerySourceConfig ->
server: plumb `StoredIntrospection` while building the Schema Cache We'd like to be able to build a Schema Cache from only serializable data. We already have Metadata. The data that's missing to build a Schema Cache is referred to as "stored introspection", and this includes: - DB introspection - User-defined enum values (i.e. contents of specific DB tables) - Remote schema introspection This PR introduces a new `StoredIntrospection` container that holds that data, and plumbs it through to the right parts of the schema cache building process, so that stored introspection can be used as a substitute for fresh introspection requests against live data sources. The serialization of `StoredIntrospection` is intended to be straightforward: just take the serialized source introspection results, and put them in an appropriate JSON object. Though I don't think that this PR achieves that entirely. In order for `StoredIntrospection` to be deserializable (through `aeson` instances), while keeping the required code changes low, this piggy-backs off of the `ResolvedSource` data type. `ResolvedSource` is _almost_ exactly what we want, and _almost_ deserializable, so this PR brings it across the finish line by moving a few things out of that type, and adding a `FromJSON (RawFunctionInfo b)` context to the `Backend` type class. [PLAT-270]: https://hasurahq.atlassian.net/browse/PLAT-270?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ [PLAT-270]: https://hasurahq.atlassian.net/browse/PLAT-270?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ [PLAT-276]: https://hasurahq.atlassian.net/browse/PLAT-276?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ [PLAT-276]: https://hasurahq.atlassian.net/browse/PLAT-276?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7053 GitOrigin-RevId: 5001b4ea086195cb5e65886747eac2a0a657b64c
2023-01-20 17:51:11 +03:00
m (Either QErr (DBObjectsIntrospection 'BigQuery))
resolveSource sourceConfig =
runExceptT $ do
tables <- getTables sourceConfig
routines <- getRoutines sourceConfig
let result = (,) <$> tables <*> routines
case result of
Left err ->
throw400 Unexpected $
"unexpected exception while connecting to database: " <> tshow err
Right (restTables, restRoutines) -> do
seconds <- liftIO $ fmap systemSeconds getSystemTime
let functions = FunctionOverloads <$> HM.groupOnNE (routineReferenceToFunctionName . routineReference) restRoutines
pure
server: plumb `StoredIntrospection` while building the Schema Cache We'd like to be able to build a Schema Cache from only serializable data. We already have Metadata. The data that's missing to build a Schema Cache is referred to as "stored introspection", and this includes: - DB introspection - User-defined enum values (i.e. contents of specific DB tables) - Remote schema introspection This PR introduces a new `StoredIntrospection` container that holds that data, and plumbs it through to the right parts of the schema cache building process, so that stored introspection can be used as a substitute for fresh introspection requests against live data sources. The serialization of `StoredIntrospection` is intended to be straightforward: just take the serialized source introspection results, and put them in an appropriate JSON object. Though I don't think that this PR achieves that entirely. In order for `StoredIntrospection` to be deserializable (through `aeson` instances), while keeping the required code changes low, this piggy-backs off of the `ResolvedSource` data type. `ResolvedSource` is _almost_ exactly what we want, and _almost_ deserializable, so this PR brings it across the finish line by moving a few things out of that type, and adding a `FromJSON (RawFunctionInfo b)` context to the `Backend` type class. [PLAT-270]: https://hasurahq.atlassian.net/browse/PLAT-270?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ [PLAT-270]: https://hasurahq.atlassian.net/browse/PLAT-270?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ [PLAT-276]: https://hasurahq.atlassian.net/browse/PLAT-276?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ [PLAT-276]: https://hasurahq.atlassian.net/browse/PLAT-276?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7053 GitOrigin-RevId: 5001b4ea086195cb5e65886747eac2a0a657b64c
2023-01-20 17:51:11 +03:00
( DBObjectsIntrospection
{ _rsTables =
HM.fromList
[ ( restTableReferenceToTableName tableReference,
DBTableMetadata
{ _ptmiOid = OID (fromIntegral seconds + index :: Int), -- TODO: The seconds are used for uniqueness. BigQuery doesn't support a "stable" ID for a table.
_ptmiColumns =
[ RawColumnInfo
{ rciName = ColumnName name,
rciPosition = position,
rciType = restTypeToScalarType type',
rciIsNullable =
case mode of
Nullable -> True
_ -> False,
rciDescription = Nothing,
rciMutability = ColumnMutability {_cmIsInsertable = True, _cmIsUpdatable = True}
}
| (position, RestFieldSchema {name, type', mode}) <-
zip [1 ..] fields -- TODO: Same trouble as Oid above.
],
_ptmiPrimaryKey = Nothing,
_ptmiUniqueConstraints = mempty,
_ptmiForeignKeys = mempty,
_ptmiViewInfo = Just $ ViewInfo False False False,
_ptmiDescription = Nothing,
Support nested object fields in DC API and use this to implement nest… ## Description This change adds support for nested object fields in HGE IR and Schema Cache, the Data Connectors backend and API, and the MongoDB agent. ### Data Connector API changes - The `/schema` endpoint response now includes an optional set of GraphQL type definitions. Table column types can refer to these definitions by name. - Queries can now include a new field type `object` which contains a column name and a nested query. This allows querying into a nested object within a field. ### MongoDB agent changes - Add support for querying into nested documents using the new `object` field type. ### HGE changes - The `Backend` type class has a new type family `XNestedObjects b` which controls whether or not a backend supports querying into nested objects. This is currently enabled only for the `DataConnector` backend. - For backends that support nested objects, the `FieldInfo` type gets a new constructor `FINestedObject`, and the `AnnFieldG` type gets a new constructor `AFNestedObject`. - If the DC `/schema` endpoint returns any custom GraphQL type definitions they are stored in the `TableInfo` for each table in the source. - During schema cache building, the function `addNonColumnFields` will check whether any column types match custom GraphQL object types stored in the `TableInfo`. If so, they are converted into `FINestedObject` instead of `FIColumn` in the `FieldInfoMap`. - When building the `FieldParser`s from `FieldInfo` (function `fieldSelection`) any `FINestedObject` fields are converted into nested object parsers returning `AFNestedObject`. - The `DataConnector` query planner converts `AFNestedObject` fields into `object` field types in the query sent to the agent. ## Limitations ### HGE not yet implemented: - Support for nested arrays - Support for nested objects/arrays in mutations - Support for nested objects/arrays in order-by - Support for filters (`where`) in nested objects/arrays - Support for adding custom GraphQL types via track table metadata API - Support for interface and union types - Tests for nested objects ### Mongo agent not yet implemented: - Generate nested object types from validation schema - Support for aggregates - Support for order-by - Configure agent port - Build agent in CI - Agent tests for nested objects and MongoDB agent PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7844 GitOrigin-RevId: aec9ec1e4216293286a68f9b1af6f3f5317db423
2023-04-11 04:29:05 +03:00
_ptmiExtraTableMetadata = (),
_ptmiCustomObjectTypes = mempty
}
)
| (index, RestTable {tableReference, schema}) <-
zip [0 ..] restTables,
let RestTableSchema fields = schema
],
_rsFunctions = functions,
_rsScalars = mempty
}
)
restTypeToScalarType :: RestType -> ScalarType
restTypeToScalarType =
\case
STRING -> StringScalarType
BYTES -> BytesScalarType
INTEGER -> IntegerScalarType
FLOAT -> FloatScalarType
BOOL -> BoolScalarType
TIMESTAMP -> TimestampScalarType
DATE -> DateScalarType
TIME -> TimeScalarType
DATETIME -> DatetimeScalarType
GEOGRAPHY -> GeographyScalarType
STRUCT -> StructScalarType
BIGDECIMAL -> BigDecimalScalarType
DECIMAL -> DecimalScalarType
JSON -> JsonScalarType
-- Hierarchy: Project / Dataset / Table
-- see <https://cloud.google.com/bigquery/docs/datasets-intro>
restTableReferenceToTableName :: RestTableReference -> TableName
restTableReferenceToTableName RestTableReference {..} =
TableName {tableName = tableId, tableNameSchema = datasetId}
-- We ignore project id and push that requirement up to the top to
-- the data source level.
postDropSourceHook ::
(MonadIO m) =>
BigQuerySourceConfig ->
TableEventTriggers 'BigQuery ->
m ()
postDropSourceHook _ _ =
-- On BigQuery we don't keep connections open.
pure ()