graphql-engine/server/src-lib/Hasura/Backends/BigQuery/DDL/Source.hs
Kirill Zaborsky 773870f443 BigQuery hspec tests
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/3435
Co-authored-by: Vishnu Bharathi <4211715+scriptnull@users.noreply.github.com>
GitOrigin-RevId: 78f698becb83bd9732eddbe419361aff2799dd2c
2022-02-09 15:30:13 +00:00

150 lines
5.7 KiB
Haskell

{-# LANGUAGE DuplicateRecordFields #-}
module Hasura.Backends.BigQuery.DDL.Source
( resolveSource,
postDropSourceHook,
resolveSourceConfig,
)
where
import Data.Aeson qualified as J
import Data.ByteString.Lazy qualified as L
import Data.Environment qualified as Env
import Data.HashMap.Strict qualified as HM
import Data.Int qualified as Int
import Data.Text qualified as T
import Data.Text.Encoding qualified as T
import Data.Time.Clock.System
import Hasura.Backends.BigQuery.Connection
import Hasura.Backends.BigQuery.Meta
import Hasura.Backends.BigQuery.Source
import Hasura.Backends.BigQuery.Types
import Hasura.Base.Error
import Hasura.Prelude
import Hasura.RQL.Types.Column
import Hasura.RQL.Types.Common
import Hasura.RQL.Types.Source
import Hasura.RQL.Types.SourceCustomization
import Hasura.RQL.Types.Table
import Hasura.SQL.Backend
defaultGlobalSelectLimit :: Int.Int64
defaultGlobalSelectLimit = 1000
resolveSourceConfig ::
MonadIO m =>
SourceName ->
BigQueryConnSourceConfig ->
Env.Environment ->
m (Either QErr BigQuerySourceConfig)
resolveSourceConfig _name BigQueryConnSourceConfig {..} env = runExceptT $ do
eSA <- resolveConfigurationJson env _cscServiceAccount
case eSA of
Left e -> throw400 Unexpected $ T.pack e
Right serviceAccount -> do
projectId <- resolveConfigurationInput env _cscProjectId
_scConnection <- initConnection serviceAccount projectId
_scDatasets <- resolveConfigurationInputs env _cscDatasets
_scGlobalSelectLimit <-
resolveConfigurationInput env `mapM` _cscGlobalSelectLimit >>= \case
Nothing -> pure defaultGlobalSelectLimit
Just i ->
-- This works around the inconsistency between JSON and
-- environment variables. The config handling module should be
-- reworked to handle non-text values better.
case readMaybe (T.unpack i) <|> J.decode (L.fromStrict (T.encodeUtf8 i)) of
Nothing -> throw400 Unexpected $ "Need a non-negative integer for global select limit"
Just i' -> do
when (i' < 0) $ throw400 Unexpected "Need the integer for the global select limit to be non-negative"
pure i'
pure BigQuerySourceConfig {..}
resolveSource ::
(MonadIO m) =>
BigQuerySourceConfig ->
SourceTypeCustomization ->
m (Either QErr (ResolvedSource 'BigQuery))
resolveSource sourceConfig customization =
runExceptT $ do
result <- getTables sourceConfig
case result of
Left err ->
throw400 Unexpected $
"unexpected exception while connecting to database: " <> tshow err
Right restTables -> do
seconds <- liftIO $ fmap systemSeconds getSystemTime
pure
( ResolvedSource
{ _rsConfig = sourceConfig,
_rsCustomization = customization,
_rsTables =
HM.fromList
[ ( restTableReferenceToTableName tableReference,
DBTableMetadata
{ _ptmiOid = OID (fromIntegral seconds + index :: Int), -- TODO: The seconds are used for uniqueness. BigQuery doesn't support a "stable" ID for a table.
_ptmiColumns =
[ RawColumnInfo
{ rciName = ColumnName name,
rciPosition = position,
rciType = restTypeToScalarType type',
rciIsNullable =
case mode of
Nullable -> True
_ -> False,
rciDescription = Nothing,
rciMutability = ColumnMutability {_cmIsInsertable = True, _cmIsUpdatable = True}
}
| (position, RestFieldSchema {name, type', mode}) <-
zip [1 ..] fields -- TODO: Same trouble as Oid above.
],
_ptmiPrimaryKey = Nothing,
_ptmiUniqueConstraints = mempty,
_ptmiForeignKeys = mempty,
_ptmiViewInfo = Just $ ViewInfo False False False,
_ptmiDescription = Nothing,
_ptmiExtraTableMetadata = ()
}
)
| (index, RestTable {tableReference, schema}) <-
zip [0 ..] restTables,
let RestTableSchema fields = schema
],
_rsFunctions = mempty,
_rsPgScalars = mempty
}
)
restTypeToScalarType :: RestType -> ScalarType
restTypeToScalarType =
\case
STRING -> StringScalarType
BYTES -> BytesScalarType
INTEGER -> IntegerScalarType
FLOAT -> FloatScalarType
BOOL -> BoolScalarType
TIMESTAMP -> TimestampScalarType
DATE -> DateScalarType
TIME -> TimeScalarType
DATETIME -> DatetimeScalarType
GEOGRAPHY -> GeographyScalarType
STRUCT -> StructScalarType
BIGDECIMAL -> BigDecimalScalarType
DECIMAL -> DecimalScalarType
-- Hierarchy: Project / Dataset / Table
-- see <https://cloud.google.com/bigquery/docs/datasets-intro>
restTableReferenceToTableName :: RestTableReference -> TableName
restTableReferenceToTableName RestTableReference {..} =
TableName {tableName = tableId, tableNameSchema = datasetId}
-- We ignore project id and push that requirement up to the top to
-- the data source level.
postDropSourceHook ::
(MonadIO m) =>
BigQuerySourceConfig ->
m ()
postDropSourceHook _ =
-- On BigQuery we don't keep connections open.
pure ()