2021-07-13 18:49:19 +03:00
{- # LANGUAGE DeriveAnyClass # -}
2022-03-16 03:39:21 +03:00
{- # LANGUAGE TemplateHaskell # -}
2021-04-12 13:18:29 +03:00
{- # OPTIONS_GHC - fno - warn - orphans # -}
2021-11-04 19:08:33 +03:00
module Hasura.Backends.BigQuery.Source
( BigQueryConnSourceConfig ( .. ) ,
2022-02-14 12:45:46 +03:00
RetryOptions ( .. ) ,
2022-12-16 16:17:41 +03:00
BigQueryProjectId ( .. ) ,
BigQueryDataset ( .. ) ,
2022-02-09 18:26:14 +03:00
BigQueryConnection ( .. ) ,
2021-11-04 19:08:33 +03:00
BigQuerySourceConfig ( .. ) ,
ConfigurationInput ( .. ) ,
ConfigurationInputs ( .. ) ,
ConfigurationJSON ( .. ) ,
GoogleAccessToken ( GoogleAccessToken ) ,
PKey ( unPKey ) ,
ServiceAccount ( .. ) ,
TokenResp ( .. ) ,
)
where
2021-04-12 13:18:29 +03:00
2022-10-12 19:28:51 +03:00
import Autodocodec
2021-07-30 18:42:36 +03:00
import Control.Concurrent.MVar
2021-04-12 13:18:29 +03:00
import Crypto.PubKey.RSA.Types qualified as Cry
2021-07-13 18:49:19 +03:00
import Data.Aeson qualified as J
import Data.Aeson.Casing qualified as J
2022-06-08 18:31:28 +03:00
import Data.Aeson.KeyMap qualified as KM
2021-07-13 18:49:19 +03:00
import Data.Aeson.TH qualified as J
import Data.ByteString.Lazy qualified as BL
2022-01-17 13:01:25 +03:00
import Data.Int qualified as Int
2022-10-12 19:28:51 +03:00
import Data.Scientific ( Scientific )
2021-07-13 18:49:19 +03:00
import Data.Text.Encoding qualified as TE
import Data.X509 qualified as X509
import Data.X509.Memory qualified as X509
2022-10-12 19:28:51 +03:00
import Hasura.Metadata.DTO.Utils ( fromEnvCodec )
2021-07-30 18:42:36 +03:00
import Hasura.Prelude
2021-04-12 13:18:29 +03:00
2022-12-16 16:17:41 +03:00
newtype BigQueryProjectId = BigQueryProjectId { getBigQueryProjectId :: Text }
deriving newtype ( Eq , Show , NFData , Hashable , J . FromJSON , J . ToJSON )
deriving stock ( Data , Generic )
instance HasCodec BigQueryProjectId where
codec = bimapCodec ( Right . BigQueryProjectId ) getBigQueryProjectId textCodec
newtype BigQueryDataset = BigQueryDataset { getBigQueryDataset :: Text }
deriving newtype ( Eq , Show , NFData , Hashable , J . FromJSON , J . ToJSON )
deriving stock ( Data , Generic )
instance HasCodec BigQueryDataset where
codec = bimapCodec ( Right . BigQueryDataset ) getBigQueryDataset textCodec
2021-04-12 13:18:29 +03:00
data PKey = PKey
2021-07-13 18:49:19 +03:00
{ unPKey :: Cry . PrivateKey ,
2021-04-12 13:18:29 +03:00
originalBS :: Text
}
deriving ( Show , Eq , Data , Generic , NFData , Hashable )
2021-09-24 01:56:37 +03:00
2021-04-12 13:18:29 +03:00
deriving instance Generic Cry . PrivateKey -- orphan
2021-09-24 01:56:37 +03:00
2021-04-12 13:18:29 +03:00
deriving instance Generic Cry . PublicKey -- orphan
2021-09-24 01:56:37 +03:00
2021-04-12 13:18:29 +03:00
deriving instance J . ToJSON Cry . PrivateKey -- orphan
2021-09-24 01:56:37 +03:00
2021-04-12 13:18:29 +03:00
deriving instance J . ToJSON Cry . PublicKey -- orphan
2021-09-24 01:56:37 +03:00
2021-04-12 13:18:29 +03:00
deriving instance Hashable Cry . PrivateKey -- orphan
2021-09-24 01:56:37 +03:00
2021-04-12 13:18:29 +03:00
deriving instance Hashable Cry . PublicKey -- orphan
2021-09-24 01:56:37 +03:00
2022-10-12 19:28:51 +03:00
instance HasCodec PKey where
codec = bimapCodec dec originalBS codec
where
dec k = case X509 . readKeyFileFromMemory $ TE . encodeUtf8 k of
[ X509 . PrivKeyRSA k' ] -> Right $ PKey k' k
_ -> Left " unable to parse private key "
2021-04-12 13:18:29 +03:00
instance J . FromJSON PKey where
parseJSON = J . withText " private_key " $ \ k ->
case X509 . readKeyFileFromMemory $ TE . encodeUtf8 k of
[ X509 . PrivKeyRSA k' ] -> return $ PKey k' k
_ -> fail " unable to parse private key "
instance J . ToJSON PKey where
toJSON PKey { .. } = J . toJSON originalBS
newtype GoogleAccessToken
= GoogleAccessToken Text
2022-12-16 16:17:41 +03:00
deriving stock ( Show , Eq , Generic , Data )
deriving anyclass ( J . FromJSON , J . ToJSON , Hashable , NFData )
2021-04-12 13:18:29 +03:00
data TokenResp = TokenResp
2022-07-29 17:05:03 +03:00
{ _trAccessToken :: GoogleAccessToken ,
_trExpiresAt :: Integer -- Number of seconds until expiry from `now`, but we add `now` seconds to this for easy tracking
2021-09-24 01:56:37 +03:00
}
2021-04-12 13:18:29 +03:00
deriving ( Eq , Show , Data , NFData , Generic , Hashable )
instance J . FromJSON TokenResp where
parseJSON = J . withObject " TokenResp " $ \ o ->
TokenResp
<$> o J ..: " access_token "
<*> o J ..: " expires_in "
2021-07-13 18:49:19 +03:00
data ServiceAccount = ServiceAccount
2022-07-29 17:05:03 +03:00
{ _saClientEmail :: Text ,
_saPrivateKey :: PKey ,
2022-12-16 16:17:41 +03:00
_saProjectId :: BigQueryProjectId
2021-09-24 01:56:37 +03:00
}
2021-04-12 13:18:29 +03:00
deriving ( Eq , Show , Data , NFData , Generic , Hashable )
2022-10-12 19:28:51 +03:00
instance HasCodec ServiceAccount where
codec =
object " BigQueryServiceAccount " $
ServiceAccount
<$> requiredField' " client_email " .= _saClientEmail
<*> requiredField' " private_key " .= _saPrivateKey
<*> requiredField' " project_id " .= _saProjectId
2021-04-12 13:18:29 +03:00
$ ( J . deriveJSON ( J . aesonDrop 3 J . snakeCase ) { J . omitNothingFields = False } ''ServiceAccount )
data ConfigurationJSON a
= FromEnvJSON Text
| FromYamlJSON a
deriving stock ( Show , Eq , Generic )
deriving ( NFData , Hashable )
2021-09-24 01:56:37 +03:00
2022-10-12 19:28:51 +03:00
-- This codec has straightforward encoding, but on decoding there is
-- a possibility of receiving a string that contains JSON that is recursively
-- handled by this codec. There is also the issue that decoding the
-- @FromYamlJSON@ case should be attempted last because there is a possibility
-- that the decoding for @a@ is not disjoint from the other decoding cases. This
-- presents some asymmetry that is a little tricky to capture in a codec.
instance HasCodec a => HasCodec ( ConfigurationJSON a ) where
codec = parseAlternative ( parseAlternative mainCodec fromEnvEncodedAsNestedJSON ) yamlJSONCodec
where
-- This is the only codec in this implementation that is used for
-- encoding. It must cover both the @FromEnvJSON@ and @FromYamlJSON@ cases
-- because Autodocodec does not support codecs that are partial in
-- encoding.
mainCodec :: JSONCodec ( ConfigurationJSON a )
mainCodec =
dimapCodec dec enc $
eitherCodec
fromEnvCodec
( bimapCodec
-- Fail parsing at this point because @codec \@a@ should only be
-- used for parsing after trying @fromEnvEncodedAsNestedJSON@.
( const $ Left " not used for parsing " )
id
$ codec @ a
)
where
dec ( Left text ) = FromEnvJSON text
dec ( Right a ) = FromYamlJSON a
enc ( FromEnvJSON i ) = Left i
enc ( FromYamlJSON j ) = Right j
-- The JSON-encoded string case is used as an alternative in
-- a 'parseAlternative' because we can implement the decoding direction,
-- but not the encoding direction. (There isn't a good way to implement
-- @ConfigurationJSON a -> Text@.) Fortunately an alternative in
-- a 'parseAlternative' is only used for decoding so we don't need to
-- implement encoding logic here.
fromEnvEncodedAsNestedJSON :: ValueCodec Text ( ConfigurationJSON a )
fromEnvEncodedAsNestedJSON =
bimapCodec
( eitherDecodeJSONViaCodec . BL . fromStrict . TE . encodeUtf8 )
id
$ codec @ Text <?> " JSON-encoded string "
yamlJSONCodec :: ValueCodec a ( ConfigurationJSON a )
yamlJSONCodec = FromYamlJSON <$> codec @ a
2021-04-12 13:18:29 +03:00
instance J . FromJSON a => J . FromJSON ( ConfigurationJSON a ) where
parseJSON = \ case
2022-06-08 18:31:28 +03:00
J . Object o | Just ( J . String text ) <- KM . lookup " from_env " o -> pure ( FromEnvJSON text )
2021-04-12 13:18:29 +03:00
J . String s -> case J . eitherDecode . BL . fromStrict . TE . encodeUtf8 $ s of
2021-07-13 18:49:19 +03:00
Left { } -> fail " error parsing configuration json "
2021-04-12 13:18:29 +03:00
Right sa -> pure sa
j -> fmap FromYamlJSON ( J . parseJSON j )
2021-09-24 01:56:37 +03:00
2021-04-12 13:18:29 +03:00
instance J . ToJSON a => J . ToJSON ( ConfigurationJSON a ) where
toJSON = \ case
2021-07-13 18:49:19 +03:00
FromEnvJSON i -> J . object [ " from_env " J ..= i ]
2021-04-12 13:18:29 +03:00
FromYamlJSON j -> J . toJSON j
2022-10-12 19:28:51 +03:00
-- | Configuration inputs when they are a YAML array or an Env var whose value is
2021-04-12 13:18:29 +03:00
-- a comma-separated string
data ConfigurationInputs
2022-07-29 17:05:03 +03:00
= FromYamls [ Text ]
| FromEnvs Text
2021-04-12 13:18:29 +03:00
deriving stock ( Show , Eq , Generic )
deriving ( NFData , Hashable )
2021-09-24 01:56:37 +03:00
2022-10-12 19:28:51 +03:00
instance HasCodec ConfigurationInputs where
codec =
dimapCodec
( either FromYamls FromEnvs )
( \ case FromYamls i -> Left i ; FromEnvs i -> Right i )
$ disjointEitherCodec
( codec @ [ Text ] )
fromEnvCodec
2021-04-12 13:18:29 +03:00
instance J . ToJSON ConfigurationInputs where
toJSON = \ case
FromYamls i -> J . toJSON i
2021-07-13 18:49:19 +03:00
FromEnvs i -> J . object [ " from_env " J ..= i ]
2021-09-24 01:56:37 +03:00
2021-04-12 13:18:29 +03:00
instance J . FromJSON ConfigurationInputs where
parseJSON = \ case
2021-07-13 18:49:19 +03:00
J . Object o -> FromEnvs <$> o J ..: " from_env "
2021-04-12 13:18:29 +03:00
s @ ( J . Array _ ) -> FromYamls <$> J . parseJSON s
2021-07-13 18:49:19 +03:00
_ -> fail " one of array or object must be provided "
2021-04-12 13:18:29 +03:00
-- | Configuration input when the YAML value as well as the Env var have
2022-10-12 19:28:51 +03:00
-- singular values
2021-04-12 13:18:29 +03:00
data ConfigurationInput
2022-07-29 17:05:03 +03:00
= FromYaml Text
| FromEnv Text
2021-04-12 13:18:29 +03:00
deriving stock ( Show , Eq , Generic )
deriving ( NFData , Hashable )
2021-09-24 01:56:37 +03:00
2022-10-12 19:28:51 +03:00
instance HasCodec ConfigurationInput where
codec =
dimapCodec
( either FromYaml FromEnv )
( \ case FromYaml i -> Left i ; FromEnv i -> Right i )
$ disjointEitherCodec fromYamls fromEnvCodec
where
fromYamls =
parseAlternative
( codec @ Text )
( tshow <$> codec @ Scientific )
2021-04-12 13:18:29 +03:00
instance J . ToJSON ConfigurationInput where
toJSON = \ case
FromYaml i -> J . toJSON i
2021-07-13 18:49:19 +03:00
FromEnv i -> J . object [ " from_env " J ..= i ]
2021-09-24 01:56:37 +03:00
2021-04-12 13:18:29 +03:00
instance J . FromJSON ConfigurationInput where
parseJSON = \ case
2021-07-13 18:49:19 +03:00
J . Object o -> FromEnv <$> o J ..: " from_env "
2021-04-12 13:18:29 +03:00
s @ ( J . String _ ) -> FromYaml <$> J . parseJSON s
2021-07-30 10:54:50 +03:00
( J . Number n ) -> FromYaml <$> J . parseJSON ( J . String ( tshow n ) )
2021-07-13 18:49:19 +03:00
_ -> fail " one of string or number or object must be provided "
2021-09-24 01:56:37 +03:00
2021-04-12 13:18:29 +03:00
data BigQueryConnSourceConfig = BigQueryConnSourceConfig
2022-02-14 12:45:46 +03:00
{ _cscServiceAccount :: ConfigurationJSON ServiceAccount ,
_cscDatasets :: ConfigurationInputs ,
2022-12-16 16:17:41 +03:00
_cscProjectId :: ConfigurationInput , -- we use this projectId instead of the one from the service account as a service account may have access to multiple projects and we wish to choose which one to use
2022-02-14 12:45:46 +03:00
_cscGlobalSelectLimit :: Maybe ConfigurationInput ,
_cscRetryBaseDelay :: Maybe ConfigurationInput ,
_cscRetryLimit :: Maybe ConfigurationInput
2021-04-12 13:18:29 +03:00
}
deriving ( Eq , Generic , NFData )
2021-09-24 01:56:37 +03:00
2021-04-12 13:18:29 +03:00
$ ( J . deriveJSON ( J . aesonDrop 4 J . snakeCase ) { J . omitNothingFields = True } ''BigQueryConnSourceConfig )
2021-09-24 01:56:37 +03:00
server: polymorphic codec for metadata sources
This PR expands the OpenAPI specification generated for metadata to include separate definitions for `SourceMetadata` for each native database type, and for DataConnector.
For the most part the changes add `HasCodec` implementations, and don't modify existing code otherwise.
The generated OpenAPI spec can be used to generate TypeScript definitions that distinguish different source metadata types based on the value of the `kind` properly. There is a problem: because the specified `kind` value for a data connector source is any string, when TypeScript gets a source with a `kind` value of, say, `"postgres"`, it cannot unambiguously determine whether the source is postgres, or a data connector. For example,
```ts
function consumeSourceMetadata(source: SourceMetadata) {
if (source.kind === "postgres" || source.kind === "pg") {
// At this point TypeScript infers that `source` is either an instance
// of `PostgresSourceMetadata`, or `DataconnectorSourceMetadata`. It
// can't narrow further.
source
}
if (source.kind === "something else") {
// TypeScript infers that this `source` must be an instance of
// `DataconnectorSourceMetadata` because `source.kind` does not match
// any of the other options.
source
}
}
```
The simplest way I can think of to fix this would be to add a boolean property to the `SourceMetadata` type along the lines of `isNative` or `isDataConnector`. This could be a field that only exists in serialized data, like the metadata version field. The combination of one of the native database names for `kind`, and a true value for `isNative` would be enough for TypeScript to unambiguously distinguish the source kinds.
But note that in the current state TypeScript is able to reference the short `"pg"` name correctly!
~~Tests are not passing yet due to some discrepancies in DTO serialization vs existing Metadata serialization. I'm working on that.~~
The placeholders that I used for table and function metadata are not compatible with the ordered JSON serialization in use. I think the best solution is to write compatible codecs for those types in another PR. For now I have disabled some DTO tests for this PR.
Here are the generated [OpenAPI spec](https://github.com/hasura/graphql-engine-mono/files/9397333/openapi.tar.gz) based on these changes, and the generated [TypeScript client code](https://github.com/hasura/graphql-engine-mono/files/9397339/client-typescript.tar.gz) based on that spec.
Ticket: [MM-66](https://hasurahq.atlassian.net/browse/MM-66)
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/5582
GitOrigin-RevId: e1446191c6c832879db04f129daa397a3be03f62
2022-08-25 21:34:44 +03:00
-- TODO: Write a proper codec, and use it to derive FromJSON and ToJSON
-- instances.
instance HasCodec BigQueryConnSourceConfig where
2022-10-12 19:28:51 +03:00
codec =
object " BigQueryConnSourceConfig " $
BigQueryConnSourceConfig
<$> requiredField' " service_account " .= _cscServiceAccount
<*> requiredField' " datasets " .= _cscDatasets
<*> requiredField' " project_id " .= _cscProjectId
<*> optionalFieldOrNull' " global_select_limit " .= _cscGlobalSelectLimit
<*> optionalFieldOrNull' " retry_base_delay " .= _cscRetryBaseDelay
<*> optionalFieldOrNull' " retry_limit " .= _cscRetryLimit
server: polymorphic codec for metadata sources
This PR expands the OpenAPI specification generated for metadata to include separate definitions for `SourceMetadata` for each native database type, and for DataConnector.
For the most part the changes add `HasCodec` implementations, and don't modify existing code otherwise.
The generated OpenAPI spec can be used to generate TypeScript definitions that distinguish different source metadata types based on the value of the `kind` properly. There is a problem: because the specified `kind` value for a data connector source is any string, when TypeScript gets a source with a `kind` value of, say, `"postgres"`, it cannot unambiguously determine whether the source is postgres, or a data connector. For example,
```ts
function consumeSourceMetadata(source: SourceMetadata) {
if (source.kind === "postgres" || source.kind === "pg") {
// At this point TypeScript infers that `source` is either an instance
// of `PostgresSourceMetadata`, or `DataconnectorSourceMetadata`. It
// can't narrow further.
source
}
if (source.kind === "something else") {
// TypeScript infers that this `source` must be an instance of
// `DataconnectorSourceMetadata` because `source.kind` does not match
// any of the other options.
source
}
}
```
The simplest way I can think of to fix this would be to add a boolean property to the `SourceMetadata` type along the lines of `isNative` or `isDataConnector`. This could be a field that only exists in serialized data, like the metadata version field. The combination of one of the native database names for `kind`, and a true value for `isNative` would be enough for TypeScript to unambiguously distinguish the source kinds.
But note that in the current state TypeScript is able to reference the short `"pg"` name correctly!
~~Tests are not passing yet due to some discrepancies in DTO serialization vs existing Metadata serialization. I'm working on that.~~
The placeholders that I used for table and function metadata are not compatible with the ordered JSON serialization in use. I think the best solution is to write compatible codecs for those types in another PR. For now I have disabled some DTO tests for this PR.
Here are the generated [OpenAPI spec](https://github.com/hasura/graphql-engine-mono/files/9397333/openapi.tar.gz) based on these changes, and the generated [TypeScript client code](https://github.com/hasura/graphql-engine-mono/files/9397339/client-typescript.tar.gz) based on that spec.
Ticket: [MM-66](https://hasurahq.atlassian.net/browse/MM-66)
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/5582
GitOrigin-RevId: e1446191c6c832879db04f129daa397a3be03f62
2022-08-25 21:34:44 +03:00
2022-12-16 16:17:41 +03:00
deriving stock instance Show BigQueryConnSourceConfig
2021-09-24 01:56:37 +03:00
2021-04-12 13:18:29 +03:00
deriving instance Hashable BigQueryConnSourceConfig
2021-09-24 01:56:37 +03:00
2022-02-14 12:45:46 +03:00
data RetryOptions = RetryOptions
{ _retryBaseDelay :: Microseconds ,
_retryNumRetries :: Int
}
deriving ( Eq )
2022-02-09 18:26:14 +03:00
data BigQueryConnection = BigQueryConnection
2022-02-14 12:45:46 +03:00
{ _bqServiceAccount :: ServiceAccount ,
2022-12-16 16:17:41 +03:00
_bqProjectId :: BigQueryProjectId , -- we use this projectId instead of the one from the service account as a service account may have access to multiple projects and we wish to choose which one to use
2022-02-14 12:45:46 +03:00
_bqRetryOptions :: Maybe RetryOptions ,
_bqAccessTokenMVar :: MVar ( Maybe TokenResp )
2022-02-09 18:26:14 +03:00
}
deriving ( Eq )
2021-04-12 13:18:29 +03:00
data BigQuerySourceConfig = BigQuerySourceConfig
2022-02-14 12:45:46 +03:00
{ _scConnection :: BigQueryConnection ,
2022-12-16 16:17:41 +03:00
_scDatasets :: [ BigQueryDataset ] ,
2022-02-14 12:45:46 +03:00
_scGlobalSelectLimit :: Int . Int64
2021-09-24 01:56:37 +03:00
}
2021-07-30 18:42:36 +03:00
deriving ( Eq )
2021-04-12 13:18:29 +03:00
2023-01-04 11:28:19 +03:00
instance Show BigQuerySourceConfig where
show _ = " (BigQuerySourceConfig <details>) "
2021-07-30 18:42:36 +03:00
instance J . ToJSON BigQuerySourceConfig where
toJSON BigQuerySourceConfig { .. } =
2022-02-14 12:45:46 +03:00
J . object $
2022-02-09 18:26:14 +03:00
[ " service_account " J ..= _bqServiceAccount _scConnection ,
2021-07-30 18:42:36 +03:00
" datasets " J ..= _scDatasets ,
2022-02-09 18:26:14 +03:00
" project_id " J ..= _bqProjectId _scConnection ,
2021-07-30 18:42:36 +03:00
" global_select_limit " J ..= _scGlobalSelectLimit
]
2022-02-14 12:45:46 +03:00
<> case _bqRetryOptions _scConnection of
Just RetryOptions { .. } ->
[ " base_delay " J ..= diffTimeToMicroSeconds ( microseconds _retryBaseDelay ) ,
" retry_limit " J ..= _retryNumRetries
]
Nothing -> []