mirror of
https://github.com/hasura/graphql-engine.git
synced 2025-01-07 08:13:18 +03:00
2e940f88c4
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/5042 GitOrigin-RevId: 45efb6dff9fbe8e2ec10356921e5c61b2af1bcda
462 lines
14 KiB
Haskell
462 lines
14 KiB
Haskell
{-# LANGUAGE DuplicateRecordFields #-}
|
|
|
|
module Hasura.Backends.BigQuery.Meta
|
|
( MetadataError (..),
|
|
getTables,
|
|
RestTableReference (..),
|
|
RestTable (..),
|
|
RestTableSchema (..),
|
|
RestFieldSchema (..),
|
|
RestType (..),
|
|
Mode (..),
|
|
RestRoutineType (..),
|
|
RestArgument (..),
|
|
RestStandardSqlField (..),
|
|
RestStandardSqlTableType (..),
|
|
RestRoutineReference (..),
|
|
routineReferenceToFunctionName,
|
|
RestRoutine (..),
|
|
getRoutines,
|
|
)
|
|
where
|
|
|
|
import Control.Exception.Safe
|
|
import Control.Monad.IO.Class
|
|
import Control.Monad.Trans.Except
|
|
import Data.Aeson
|
|
import Data.Aeson qualified as Aeson
|
|
import Data.Foldable
|
|
import Data.Maybe
|
|
import Data.Sequence qualified as Seq
|
|
import Data.Text qualified as T
|
|
import GHC.Generics
|
|
import Hasura.Backends.BigQuery.Connection
|
|
import Hasura.Backends.BigQuery.Source
|
|
import Hasura.Backends.BigQuery.Types
|
|
import Hasura.Prelude
|
|
import Network.HTTP.Simple
|
|
import Network.HTTP.Types
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- Types
|
|
|
|
data MetadataError
|
|
= RestProblem RestProblem
|
|
deriving (Show)
|
|
|
|
data RestProblem
|
|
= GetTablesProblem SomeException
|
|
| GetTableProblem SomeException
|
|
| GetRoutineProblem SomeException
|
|
| GetMetaDecodeProblem String
|
|
| GetTablesBigQueryProblem BigQueryProblem
|
|
| GetRoutinesBigQueryProblem BigQueryProblem
|
|
| RESTRequestNonOK Status
|
|
deriving (Show)
|
|
|
|
data RestTableList = RestTableList
|
|
{ nextPageToken :: Maybe Text,
|
|
tables :: [RestTableBrief]
|
|
}
|
|
deriving (Show)
|
|
|
|
instance FromJSON RestTableList where
|
|
parseJSON =
|
|
withObject
|
|
"RestTableList"
|
|
( \o -> do
|
|
kind <- o .: "kind"
|
|
case kind of
|
|
("bigquery#tableList" :: Text) -> do
|
|
nextPageToken <- o .:? "nextPageToken"
|
|
tables <- o .:? "tables" .!= []
|
|
pure RestTableList {..}
|
|
_ -> fail "Expected kind of bigquery#tableList"
|
|
)
|
|
|
|
data RestTableBrief = RestTableBrief
|
|
{ tableReference :: RestTableReference
|
|
}
|
|
deriving (Show, Generic)
|
|
|
|
instance FromJSON RestTableBrief
|
|
|
|
data RestTableReference = RestTableReference
|
|
{ datasetId :: Text,
|
|
projectId :: Text,
|
|
tableId :: Text
|
|
}
|
|
deriving (Show, Generic)
|
|
|
|
instance FromJSON RestTableReference
|
|
|
|
data RestTable = RestTable
|
|
{ tableReference :: RestTableReference,
|
|
schema :: RestTableSchema
|
|
}
|
|
deriving (Show, Generic)
|
|
|
|
instance FromJSON RestTable
|
|
|
|
data RestTableSchema = RestTableSchema
|
|
{ fields :: [RestFieldSchema]
|
|
}
|
|
deriving (Show, Generic)
|
|
|
|
instance FromJSON RestTableSchema
|
|
|
|
data RestFieldSchema = RestFieldSchema
|
|
{ name :: Text,
|
|
-- | The field data type. Possible values include STRING, BYTES,
|
|
-- INTEGER, INT64 (same as INTEGER), FLOAT, FLOAT64 (same as
|
|
-- FLOAT), BOOLEAN, BOOL (same as BOOLEAN), TIMESTAMP, DATE, TIME,
|
|
-- DATETIME, GEOGRAPHY, NUMERIC, RECORD (where RECORD indicates
|
|
-- that the field contains a nested schema) or STRUCT (same as
|
|
-- RECORD).
|
|
type' :: RestType,
|
|
mode :: Mode
|
|
-- The field mode. Possible values include NULLABLE, REQUIRED and
|
|
-- REPEATED. The default value is NULLABLE.
|
|
}
|
|
deriving (Show, Generic)
|
|
|
|
instance FromJSON RestFieldSchema where
|
|
parseJSON =
|
|
withObject
|
|
"RestFieldSchema"
|
|
( \o -> do
|
|
type' <- o .: "type"
|
|
name <- o .: "name"
|
|
mode <- o .:? "mode" .!= Nullable
|
|
pure RestFieldSchema {..}
|
|
)
|
|
|
|
data Mode = Nullable | Required | Repeated deriving (Show)
|
|
|
|
instance FromJSON Mode where
|
|
parseJSON j = do
|
|
s <- parseJSON j
|
|
case s :: Text of
|
|
"NULLABLE" -> pure Nullable
|
|
"REQUIRED" -> pure Required
|
|
"REPEATED" -> pure Repeated
|
|
_ -> fail ("invalid mode " ++ show s)
|
|
|
|
data RestType
|
|
= STRING
|
|
| BYTES
|
|
| INTEGER
|
|
| FLOAT
|
|
| BOOL
|
|
| TIMESTAMP
|
|
| DATE
|
|
| TIME
|
|
| DATETIME
|
|
| GEOGRAPHY
|
|
| DECIMAL
|
|
| BIGDECIMAL
|
|
| STRUCT -- (same as RECORD).
|
|
deriving (Show)
|
|
|
|
instance FromJSON RestType where
|
|
parseJSON j = do
|
|
s <- parseJSON j
|
|
case s :: Text of
|
|
"STRING" -> pure STRING
|
|
"BYTES" -> pure BYTES
|
|
"INTEGER" -> pure INTEGER
|
|
"INT64" -> pure INTEGER
|
|
"FLOAT" -> pure FLOAT
|
|
"FLOAT64" -> pure FLOAT
|
|
"BOOLEAN" -> pure BOOL
|
|
"BOOL" -> pure BOOL
|
|
"TIMESTAMP" -> pure TIMESTAMP
|
|
"DATE" -> pure DATE
|
|
"TIME" -> pure TIME
|
|
"DATETIME" -> pure DATETIME
|
|
"GEOGRAPHY" -> pure GEOGRAPHY
|
|
"NUMERIC" -> pure DECIMAL
|
|
"DECIMAL" -> pure DECIMAL
|
|
"BIGNUMERIC" -> pure BIGDECIMAL
|
|
"BIGDECIMAL" -> pure BIGDECIMAL
|
|
"RECORD" -> pure STRUCT
|
|
"STRUCT" -> pure STRUCT
|
|
_ -> fail ("invalid type " ++ show s)
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- REST request
|
|
|
|
-- | Get all tables from all specified data sets.
|
|
getTables ::
|
|
MonadIO m =>
|
|
BigQuerySourceConfig ->
|
|
m (Either RestProblem [RestTable])
|
|
getTables BigQuerySourceConfig {..} =
|
|
runExceptT
|
|
(fmap concat (traverse (ExceptT . getTablesForDataSet _scConnection) _scDatasets))
|
|
|
|
-- | Get tables in the dataset.
|
|
getTablesForDataSet ::
|
|
MonadIO m =>
|
|
BigQueryConnection ->
|
|
Text ->
|
|
m (Either RestProblem [RestTable])
|
|
getTablesForDataSet conn dataSet = do
|
|
result <-
|
|
liftIO (catchAny (run Nothing mempty) (pure . Left . GetTablesProblem))
|
|
case result of
|
|
Left e -> pure (Left e)
|
|
Right briefs ->
|
|
fmap
|
|
sequence
|
|
( traverse
|
|
( \RestTableBrief {tableReference = RestTableReference {tableId}} ->
|
|
getTable conn dataSet tableId
|
|
)
|
|
briefs
|
|
)
|
|
where
|
|
run pageToken acc = do
|
|
let req =
|
|
setRequestHeader "Content-Type" ["application/json"] $
|
|
parseRequest_ url
|
|
eResp <- runBigQuery conn req
|
|
case eResp of
|
|
Left e -> pure (Left (GetTablesBigQueryProblem e))
|
|
Right resp ->
|
|
case getResponseStatusCode resp of
|
|
200 ->
|
|
case Aeson.eitherDecode (getResponseBody resp) of
|
|
Left e -> pure (Left (GetMetaDecodeProblem e))
|
|
Right RestTableList {nextPageToken, tables} ->
|
|
case nextPageToken of
|
|
Nothing -> pure (Right (toList (acc <> Seq.fromList tables)))
|
|
Just token -> run (pure token) (acc <> Seq.fromList tables)
|
|
_ -> pure (Left (RESTRequestNonOK (getResponseStatus resp)))
|
|
where
|
|
url =
|
|
"GET https://bigquery.googleapis.com/bigquery/v2/projects/"
|
|
<> T.unpack (_bqProjectId conn)
|
|
<> "/datasets/"
|
|
<> T.unpack dataSet
|
|
<> "/tables?alt=json&"
|
|
<> T.unpack (encodeParams extraParameters)
|
|
extraParameters = pageTokenParam
|
|
where
|
|
pageTokenParam =
|
|
case pageToken of
|
|
Nothing -> []
|
|
Just token -> [("pageToken", token)]
|
|
|
|
-- | Get tables in the schema.
|
|
getTable ::
|
|
MonadIO m =>
|
|
BigQueryConnection ->
|
|
Text ->
|
|
Text ->
|
|
m (Either RestProblem RestTable)
|
|
getTable conn dataSet tableId = do
|
|
liftIO (catchAny run (pure . Left . GetTableProblem))
|
|
where
|
|
run = do
|
|
let req =
|
|
setRequestHeader "Content-Type" ["application/json"] $
|
|
parseRequest_ url
|
|
eResp <- runBigQuery conn req
|
|
case eResp of
|
|
Left e -> pure (Left (GetTablesBigQueryProblem e))
|
|
Right resp ->
|
|
case getResponseStatusCode resp of
|
|
200 ->
|
|
case Aeson.eitherDecode (getResponseBody resp) of
|
|
Left e -> pure (Left (GetMetaDecodeProblem e))
|
|
Right table -> pure (Right table)
|
|
_ -> pure (Left (RESTRequestNonOK (getResponseStatus resp)))
|
|
where
|
|
url =
|
|
"GET https://bigquery.googleapis.com/bigquery/v2/projects/"
|
|
<> T.unpack (_bqProjectId conn)
|
|
<> "/datasets/"
|
|
<> T.unpack dataSet
|
|
<> "/tables/"
|
|
<> T.unpack tableId
|
|
<> "?alt=json&"
|
|
<> T.unpack (encodeParams extraParameters)
|
|
extraParameters = []
|
|
|
|
encodeParams :: [(Text, Text)] -> Text
|
|
encodeParams = T.intercalate "&" . map (\(k, v) -> k <> "=" <> v)
|
|
|
|
-- Routines related
|
|
|
|
-- | The fine-grained type of the routine
|
|
-- Ref: https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#RoutineType
|
|
data RestRoutineType
|
|
= ROUTINE_TYPE_UNSPECIFIED
|
|
| SCALAR_FUNCTION
|
|
| PROCEDURE
|
|
| TABLE_VALUED_FUNCTION
|
|
deriving (Show, Eq, Generic)
|
|
|
|
instance FromJSON RestRoutineType
|
|
|
|
-- | Input argument of a function/routine.
|
|
-- Ref: https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#Argument
|
|
data RestArgument = RestArgument
|
|
{ -- | The name of this argument. Can be absent for function return argument.
|
|
_raName :: Maybe Text,
|
|
_raDataType :: Maybe RestType
|
|
}
|
|
deriving (Show, Generic)
|
|
|
|
instance FromJSON RestArgument where
|
|
parseJSON =
|
|
withObject
|
|
"RestArgument"
|
|
( \o -> do
|
|
name <- o .:? "name"
|
|
typeObject <- o .:? "dataType"
|
|
|
|
-- (Hopefully) very temporary fix: right now, we don't have an
|
|
-- understanding of @ARRAY@ as a 'RestType', which is causing issues
|
|
-- in production. With this change, we ignore any BigQuery argument
|
|
-- types that we don't recognise. While not ideal, this should be
|
|
-- safe, as we shouldn't get types from BigQuery that it can't itself
|
|
-- understand.
|
|
type' <- mapM (.: "typeKind") typeObject <|> pure Nothing
|
|
pure $ RestArgument name type'
|
|
)
|
|
|
|
-- | A field or a column.
|
|
-- Ref: https://cloud.google.com/bigquery/docs/reference/rest/v2/StandardSqlField
|
|
data RestStandardSqlField = RestStandardSqlField
|
|
{ -- | The field name is optional and is absent for fields with STRUCT type.
|
|
_rssfName :: Maybe Text,
|
|
_rssType :: Maybe RestType
|
|
}
|
|
deriving (Show, Generic)
|
|
|
|
instance FromJSON RestStandardSqlField where
|
|
parseJSON =
|
|
withObject
|
|
"RestStandardSqlField"
|
|
( \o -> do
|
|
name <- o .:? "name"
|
|
typeObject <- o .:? "type"
|
|
|
|
-- (Hopefully) very temporary fix: right now, we don't have an
|
|
-- understanding of @ARRAY@ as a 'RestType', which is causing issues
|
|
-- in production. With this change, we ignore any BigQuery argument
|
|
-- types that we don't recognise. While not ideal, this should be
|
|
-- safe, as we shouldn't get types from BigQuery that it can't itself
|
|
-- understand.
|
|
type' <- mapM (.: "typeKind") typeObject <|> pure Nothing
|
|
pure $ RestStandardSqlField name type'
|
|
)
|
|
|
|
-- | A table type, which has only list of columns with names and types.
|
|
-- Ref: https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#StandardSqlTableType
|
|
data RestStandardSqlTableType = RestStandardSqlTableType
|
|
{ _rrttColumns :: [RestStandardSqlField]
|
|
}
|
|
deriving (Show, Generic)
|
|
|
|
instance FromJSON RestStandardSqlTableType where
|
|
parseJSON = genericParseJSON hasuraJSON
|
|
|
|
-- | Id path of a routine.
|
|
-- Ref: https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#RoutineReference
|
|
data RestRoutineReference = RestRoutineReference
|
|
{ datasetId :: Text,
|
|
projectId :: Text,
|
|
routineId :: Text
|
|
}
|
|
deriving (Show, Generic)
|
|
|
|
instance FromJSON RestRoutineReference
|
|
|
|
routineReferenceToFunctionName :: RestRoutineReference -> FunctionName
|
|
routineReferenceToFunctionName RestRoutineReference {..} =
|
|
FunctionName {functionName = routineId, functionNameSchema = Just datasetId}
|
|
|
|
-- | A user-defined function.
|
|
-- Ref: https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#Routine
|
|
data RestRoutine = RestRoutine
|
|
{ -- | Reference describing the ID of this routine
|
|
routineReference :: RestRoutineReference,
|
|
-- | The type of routine
|
|
routineType :: RestRoutineType,
|
|
-- | List of arguments defined
|
|
arguments :: Maybe [RestArgument],
|
|
-- | Routines defined with 'RETURNS TABLE' clause has this information
|
|
returnTableType :: Maybe RestStandardSqlTableType
|
|
}
|
|
deriving (Show, Generic)
|
|
|
|
instance FromJSON RestRoutine
|
|
|
|
-- | List of routines
|
|
-- Ref: https://cloud.google.com/bigquery/docs/reference/rest/v2/routines/list
|
|
data RestRoutineList = RestRoutineList
|
|
{ _rrlRoutines :: [RestRoutine],
|
|
_rrlNextPageToken :: Maybe Text
|
|
}
|
|
deriving (Show)
|
|
|
|
instance FromJSON RestRoutineList where
|
|
parseJSON = withObject "Object" $ \o ->
|
|
RestRoutineList
|
|
<$> o .:? "routines" .!= [] -- "routine" field is absent when there are no routines defined
|
|
<*> o .:? "nextPageToken"
|
|
|
|
-- | Get all routines from all specified data sets.
|
|
getRoutines ::
|
|
MonadIO m =>
|
|
BigQuerySourceConfig ->
|
|
m (Either RestProblem [RestRoutine])
|
|
getRoutines BigQuerySourceConfig {..} =
|
|
runExceptT
|
|
(fmap concat (traverse (ExceptT . getRoutinesForDataSet _scConnection) _scDatasets))
|
|
|
|
-- | Get routines in the dataset.
|
|
getRoutinesForDataSet ::
|
|
MonadIO m =>
|
|
BigQueryConnection ->
|
|
Text ->
|
|
m (Either RestProblem [RestRoutine])
|
|
getRoutinesForDataSet conn dataSet = do
|
|
liftIO (catchAny (run Nothing mempty) (pure . Left . GetRoutineProblem))
|
|
where
|
|
run pageToken acc = do
|
|
let req =
|
|
setRequestHeader "Content-Type" ["application/json"] $
|
|
parseRequest_ url
|
|
eResp <- runBigQuery conn req
|
|
case eResp of
|
|
Left e -> pure (Left (GetRoutinesBigQueryProblem e))
|
|
Right resp ->
|
|
case getResponseStatusCode resp of
|
|
200 ->
|
|
case Aeson.eitherDecode (getResponseBody resp) of
|
|
Left e -> pure (Left (GetMetaDecodeProblem e))
|
|
Right RestRoutineList {_rrlRoutines = routines, _rrlNextPageToken = nextPageToken} ->
|
|
case nextPageToken of
|
|
Nothing -> pure (Right (toList (acc <> Seq.fromList routines)))
|
|
Just token -> run (pure token) (acc <> Seq.fromList routines)
|
|
_ -> pure (Left (RESTRequestNonOK (getResponseStatus resp)))
|
|
where
|
|
url =
|
|
"GET https://bigquery.googleapis.com/bigquery/v2/projects/"
|
|
<> T.unpack (_bqProjectId conn)
|
|
<> "/datasets/"
|
|
<> T.unpack dataSet
|
|
<> "/routines?alt=json&"
|
|
<> T.unpack (encodeParams extraParameters)
|
|
extraParameters = pageTokenParam <> readMaskParam
|
|
where
|
|
pageTokenParam =
|
|
case pageToken of
|
|
Nothing -> []
|
|
Just token -> [("pageToken", token)]
|
|
readMaskParam = [("readMask", "routineType,arguments,returnTableType")]
|