{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
-- | Postgres DDL Source
-- A Source is a connected database. One can have multiple sources of the same
-- kind (e.g. Postgres).
-- This module provides ways to fetch, update, and deal with table and function
-- metadata and hdb_catalog migrations for a Postgres Source.
-- NOTE: Please have a look at the `server/documentation/migration-guidelines.md`
-- before adding any new migration if you haven't already looked at it.
module Hasura.Backends.Postgres.DDL.Source
( ToMetadataFetchQuery,
import Control.Concurrent.Extended (sleep)
import Control.Monad.Trans.Control (MonadBaseControl)
import Data.Aeson (ToJSON, toJSON)
import Data.Aeson.TH
import Data.Environment qualified as Env
import Data.FileEmbed (makeRelativeToProject)
import Data.HashMap.Strict qualified as Map
import Data.HashMap.Strict.InsOrd qualified as OMap
import Data.List.Extended qualified as LE
import Data.List.NonEmpty qualified as NE
import Data.Time.Clock (UTCTime, getCurrentTime)
import Database.PG.Query qualified as Q
import Hasura.Backends.Postgres.Connection
import Hasura.Backends.Postgres.DDL.Source.Version
import Hasura.Backends.Postgres.SQL.Types hiding (FunctionName)
import Hasura.Backends.Postgres.Types.ComputedFieldDefinition
import Hasura.Base.Error
import Hasura.Logging
import Hasura.Prelude
import Hasura.RQL.Types.Backend
import Hasura.RQL.Types.Common
import Hasura.RQL.Types.EventTrigger (RecreateEventTriggers (..))
import Hasura.RQL.Types.Function
import Hasura.RQL.Types.Metadata (SourceMetadata (..), TableMetadata (..), _cfmDefinition)
import Hasura.RQL.Types.Source
import Hasura.RQL.Types.SourceCustomization
import Hasura.RQL.Types.Table
import Hasura.SQL.Backend
import Hasura.Server.Migrate.Internal
import Language.Haskell.TH.Lib qualified as TH
import Language.Haskell.TH.Syntax qualified as TH
-- | We differentiate the handling of metadata between Citus and Vanilla
-- Postgres because Citus imposes limitations on the types of joins that it
-- permits, which then limits the types of relations that we can track.
class ToMetadataFetchQuery (pgKind :: PostgresKind) where
tableMetadata :: Q.Query
instance ToMetadataFetchQuery 'Vanilla where
tableMetadata = $(makeRelativeToProject "src-rsr/pg_table_metadata.sql" >>= Q.sqlFromFile)
instance ToMetadataFetchQuery 'Citus where
tableMetadata = $(makeRelativeToProject "src-rsr/citus_table_metadata.sql" >>= Q.sqlFromFile)
2021-05-21 05:46:58 +03:00
resolveSourceConfig ::
(MonadIO m, MonadResolveSource m) =>
SourceName ->
PostgresConnConfiguration ->
BackendSourceKind ('Postgres pgKind) ->
BackendConfig ('Postgres pgKind) ->
2021-09-24 01:56:37 +03:00
Env.Environment ->
m (Either QErr (SourceConfig ('Postgres pgKind)))
2022-04-29 05:13:13 +03:00
resolveSourceConfig name config _backendKind _backendConfig _env = runExceptT do
sourceResolver <- getPGSourceResolver
2021-02-22 10:52:42 +03:00
liftEitherM $ liftIO $ sourceResolver name config
2021-02-14 09:07:52 +03:00
-- | 'PGSourceLockQuery' is a data type which represents the contents of a single object of the
-- locked queries which are queried from the `pg_stat_activity`. See `logPGSourceCatalogMigrationLockedQueries`.
data PGSourceLockQuery = PGSourceLockQuery
{ _psqaQuery :: !Text,
_psqaLockGranted :: !(Maybe Bool),
_psqaLockMode :: !Text,
_psqaTransactionStartTime :: !UTCTime,
_psqaQueryStartTime :: !UTCTime,
_psqaWaitEventType :: !Text,
_psqaBlockingQuery :: !Text
$(deriveJSON hasuraJSON ''PGSourceLockQuery)
instance ToEngineLog [PGSourceLockQuery] Hasura where
toEngineLog resp = (LevelInfo, sourceCatalogMigrationLogType, toJSON resp)
newtype PGSourceLockQueryError = PGSourceLockQueryError QErr
deriving (ToJSON)
instance ToEngineLog PGSourceLockQueryError Hasura where
toEngineLog resp = (LevelError, sourceCatalogMigrationLogType, toJSON resp)
-- | 'logPGSourceCatalogMigrationLockedQueries' as the name suggests logs
-- the queries which are blocking in the database. This function is called
-- asynchronously from `initCatalogIfNeeded` while the source catalog is being
-- migrated.
-- NOTE: When there are no locking queries present in the database, nothing will be logged.
logPGSourceCatalogMigrationLockedQueries ::
MonadIO m =>
Logger Hasura ->
PGSourceConfig ->
m Void
logPGSourceCatalogMigrationLockedQueries logger sourceConfig = forever $ do
dbStats <- liftIO $ runPgSourceReadTx sourceConfig fetchLockedQueriesTx
case dbStats of
Left err -> unLogger logger $ PGSourceLockQueryError err
Right (val :: (Maybe [PGSourceLockQuery])) ->
case val of
Nothing -> pure ()
Just [] -> pure ()
Just val' -> liftIO $ unLogger logger $ val'
liftIO $ sleep $ seconds 5
-- The blocking query in the below transaction is truncated to the first 20 characters because it may contain
-- sensitive info.
fetchLockedQueriesTx =
(Q.getAltJ . runIdentity . Q.getRow)
<$> Q.withQE
SELECT COALESCE(json_agg(DISTINCT jsonb_build_object('query', psa.query, 'lock_granted', pl.granted, 'lock_mode', pl.mode, 'transaction_start_time', psa.xact_start, 'query_start_time', psa.query_start, 'wait_event_type', psa.wait_event_type, 'blocking_query', (SUBSTRING(blocking.query, 1, 20) || '...') )), '[]'::json)
FROM pg_stat_activity psa
JOIN pg_stat_activity blocking ON blocking.pid = ANY(pg_blocking_pids(psa.pid))
LEFT JOIN pg_locks pl ON psa.pid = pl.pid
WHERE psa.query ILIKE '%hdb_catalog%' AND psa.wait_event_type IS NOT NULL
AND psa.query ILIKE any (array ['%create%', '%drop%', '%alter%']);
resolveDatabaseMetadata ::
forall pgKind m.
(Backend ('Postgres pgKind), ToMetadataFetchQuery pgKind, MonadIO m, MonadBaseControl IO m) =>
2022-03-08 16:02:13 +03:00
SourceMetadata ('Postgres pgKind) ->
2021-09-24 01:56:37 +03:00
SourceConfig ('Postgres pgKind) ->
2021-10-29 17:42:07 +03:00
SourceTypeCustomization ->
2021-09-24 01:56:37 +03:00
m (Either QErr (ResolvedSource ('Postgres pgKind)))
2022-03-08 16:02:13 +03:00
resolveDatabaseMetadata sourceMetadata sourceConfig sourceCustomization = runExceptT do
(tablesMeta, functionsMeta, pgScalars) <- runTx (_pscExecCtx sourceConfig) Q.ReadOnly $ do
2022-03-08 16:02:13 +03:00
tablesMeta <- fetchTableMetadata $ OMap.keys $ _smTables sourceMetadata
let allFunctions =
OMap.keys (_smFunctions sourceMetadata) -- Tracked functions
<> concatMap getComputedFieldFunctionsMetadata (OMap.elems $ _smTables sourceMetadata) -- Computed field functions
functionsMeta <- fetchFunctionMetadata allFunctions
pgScalars <- fetchPgScalars
2021-02-14 09:07:52 +03:00
pure (tablesMeta, functionsMeta, pgScalars)
2021-10-29 17:42:07 +03:00
pure $ ResolvedSource sourceConfig sourceCustomization tablesMeta functionsMeta pgScalars
2022-03-08 16:02:13 +03:00
-- A helper function to list all functions underpinning computed fields from a table metadata
2022-05-04 17:52:29 +03:00
getComputedFieldFunctionsMetadata :: TableMetadata ('Postgres pgKind) -> [FunctionName ('Postgres pgKind)]
2022-03-08 16:02:13 +03:00
getComputedFieldFunctionsMetadata =
map (_cfdFunction . _cfmDefinition) . OMap.elems . _tmComputedFields
2021-05-25 09:50:13 +03:00
-- | Initialise catalog tables for a source, including those required by the event delivery subsystem.
2022-05-05 16:43:50 +03:00
prepareCatalog ::
(MonadIO m, MonadBaseControl IO m) =>
SourceConfig ('Postgres pgKind) ->
ExceptT QErr m RecreateEventTriggers
prepareCatalog sourceConfig = runTx (_pscExecCtx sourceConfig) Q.ReadWrite do
hdbCatalogExist <- doesSchemaExist "hdb_catalog"
eventLogTableExist <- doesTableExist "hdb_catalog" "event_log"
sourceVersionTableExist <- doesTableExist "hdb_catalog" "hdb_source_catalog_version"
2021-09-24 01:56:37 +03:00
-- Fresh database
| not hdbCatalogExist -> liftTx do
2021-02-22 17:59:13 +03:00
Q.unitQE defaultTxErrorHandler "CREATE SCHEMA hdb_catalog" () False
2021-06-07 16:57:24 +03:00
return RETDoNothing
2021-09-24 01:56:37 +03:00
-- Only 'hdb_catalog' schema defined
| not sourceVersionTableExist && not eventLogTableExist -> do
2021-02-22 17:59:13 +03:00
liftTx initPgSourceCatalog
2021-06-07 16:57:24 +03:00
return RETDoNothing
2021-09-24 01:56:37 +03:00
-- Source is initialised by pre multisource support servers
| not sourceVersionTableExist && eventLogTableExist -> do
-- Update the Source Catalog to v43 to include the new migration
-- changes. Skipping this step will result in errors.
2021-11-03 17:20:25 +03:00
currMetadataCatalogVersionFloat <- liftTx getCatalogVersion
2021-05-31 16:54:08 +03:00
-- we migrate to the 43 version, which is the migration where
-- metadata separation is introduced
2021-11-03 17:20:25 +03:00
migrateTo43MetadataCatalog currMetadataCatalogVersionFloat
2021-02-22 17:59:13 +03:00
liftTx createVersionTable
2021-11-03 17:20:25 +03:00
-- Migrate the catalog from initial version i.e '0'
migrateSourceCatalogFrom "0"
2021-10-13 16:00:39 +03:00
| otherwise -> migrateSourceCatalog
initPgSourceCatalog = do
2021-03-16 20:35:35 +03:00
() <- Q.multiQE defaultTxErrorHandler $(makeRelativeToProject "src-rsr/init_pg_source.sql" >>= Q.sqlFromFile)
2021-02-14 09:07:52 +03:00
createVersionTable = do
2021-09-24 01:56:37 +03:00
() <-
2021-02-14 09:07:52 +03:00
CREATE TABLE hdb_catalog.hdb_source_catalog_version(
version TEXT NOT NULL,
CREATE UNIQUE INDEX hdb_source_catalog_version_one_row
ON hdb_catalog.hdb_source_catalog_version((version IS NOT NULL));
2021-06-01 20:33:25 +03:00
pure ()
2021-02-14 09:07:52 +03:00
2021-06-01 20:33:25 +03:00
migrateTo43MetadataCatalog prevVersion = do
2021-11-03 17:20:25 +03:00
let neededMigrations = dropWhile ((< prevVersion) . fst) upMigrationsUntil43
2021-06-23 21:00:19 +03:00
case NE.nonEmpty neededMigrations of
Just nonEmptyNeededMigrations -> do
-- Migrations aren't empty. We need to update the catalog version after migrations
2022-05-05 16:43:50 +03:00
migrationTime <- liftIO getCurrentTime
2021-09-15 23:45:49 +03:00
liftTx $ traverse_ snd nonEmptyNeededMigrations
2021-06-23 21:00:19 +03:00
setCatalogVersion "43" migrationTime
Nothing ->
-- No migrations exists, implies the database is migrated to latest metadata catalog version
pure ()
2021-06-01 20:33:25 +03:00
-- NOTE (rakesh):
-- Down migrations for postgres sources is not supported in this PR. We need an
-- exhaustive discussion to make a call as I think, as of now, it is not
-- trivial. For metadata catalog migrations, we have a separate downgrade
-- command in the graphql-engine exe.
-- I can think of two ways:
-- - Just like downgrade, we need to have a new command path for downgrading
-- pg sources (command design should support other backends too,
-- graphql-engine source-downgrade postgres --to-catalog-version 1 --
-- downgrade all available pg sources to 1)
-- - Have an online documentation with necessary SQLs to help users to
-- downgrade pg sources themselves. Improve error message by referring the URL
-- to the documentation.
2021-10-13 16:00:39 +03:00
migrateSourceCatalog :: MonadTx m => m RecreateEventTriggers
2021-06-01 20:33:25 +03:00
migrateSourceCatalog =
getSourceCatalogVersion >>= migrateSourceCatalogFrom
2021-10-13 16:00:39 +03:00
-- | `migrateSourceCatalogFrom` migrates the catalog from a lower to a higher version.
-- When there are any changes in the source catalog, then re-create the existing event
-- triggers in the metadata. This is done so that the event triggers be compatible with the
-- changes introduced in the newly added source catalog migrations. When the source is already
-- in the latest catalog version, we do nothing because nothing has changed w.r.t the source catalog
-- so recreating the event triggers will only be extraneous.
migrateSourceCatalogFrom :: (MonadTx m) => Text -> m RecreateEventTriggers
2021-06-01 20:33:25 +03:00
migrateSourceCatalogFrom prevVersion
2021-10-13 16:00:39 +03:00
| prevVersion == latestSourceCatalogVersionText = pure RETDoNothing
2021-06-01 20:33:25 +03:00
| [] <- neededMigrations =
2021-09-24 01:56:37 +03:00
throw400 NotSupported $
"Expected source catalog version <= "
2021-06-01 20:33:25 +03:00
<> latestSourceCatalogVersionText
2021-09-24 01:56:37 +03:00
<> ", but the current version is "
<> prevVersion
2021-06-01 20:33:25 +03:00
| otherwise = do
2021-09-24 01:56:37 +03:00
liftTx $ traverse_ snd neededMigrations
2021-10-13 16:00:39 +03:00
pure RETRecreate
neededMigrations =
dropWhile ((/= prevVersion) . fst) sourceMigrations
2021-09-15 23:45:49 +03:00
sourceMigrations :: [(Text, Q.TxE QErr ())]
2021-06-01 20:33:25 +03:00
sourceMigrations =
2021-09-24 01:56:37 +03:00
$( let migrationFromFile from =
let to = from + 1
path = "src-rsr/pg_source_migrations/" <> show from <> "_to_" <> show to <> ".sql"
in [|Q.multiQE defaultTxErrorHandler $(makeRelativeToProject path >>= Q.sqlFromFile)|]
2021-06-01 20:33:25 +03:00
2021-09-24 01:56:37 +03:00
migrationsFromFile = map $ \(from :: Integer) ->
[|($(TH.lift $ tshow from), $(migrationFromFile from))|]
2021-11-03 17:20:25 +03:00
in TH.listE $ migrationsFromFile [0 .. (latestSourceCatalogVersion - 1)]
2021-06-01 20:33:25 +03:00
-- Upgrade the hdb_catalog schema to v43 (Metadata catalog)
2021-11-03 17:20:25 +03:00
upMigrationsUntil43 :: [(Float, Q.TxE QErr ())]
2021-02-22 17:59:13 +03:00
upMigrationsUntil43 =
2021-09-24 01:56:37 +03:00
$( let migrationFromFile from to =
let path = "src-rsr/migrations/" <> from <> "_to_" <> to <> ".sql"
in [|Q.multiQE defaultTxErrorHandler $(makeRelativeToProject path >>= Q.sqlFromFile)|]
2021-02-22 17:59:13 +03:00
2021-11-03 17:20:25 +03:00
migrationsFromFile = map $ \(to :: Float) ->
2021-09-24 01:56:37 +03:00
let from = to - 1
in [|
2021-11-03 17:20:25 +03:00
( $(TH.lift from),
$(migrationFromFile (show (floor from :: Integer)) (show (floor to :: Integer)))
2021-09-24 01:56:37 +03:00
2021-02-22 17:59:13 +03:00
in TH.listE
2021-09-24 01:56:37 +03:00
-- version 0.8 is the only non-integral catalog version
2021-11-03 17:20:25 +03:00
-- The 41st migration which included only source catalog migration
-- was introduced before metadata separation changes were introduced
-- in the graphql-engine. Now the earlier 41st migration has been
-- moved to source catalog migrations and the 41st up migration is removed
-- entirely.
2021-09-24 01:56:37 +03:00
2021-11-03 17:20:25 +03:00
[|(0.8, $(migrationFromFile "08" "1"))|] :
2021-09-24 01:56:37 +03:00
migrationsFromFile [2 .. 3]
2021-11-03 17:20:25 +03:00
++ [|(3, from3To4)|] :
(migrationsFromFile [5 .. 40]) ++ migrationsFromFile [42 .. 43]
2021-09-24 01:56:37 +03:00
2021-02-22 17:59:13 +03:00
2021-02-14 09:07:52 +03:00
-- | Fetch Postgres metadata of all user tables
2021-09-24 01:56:37 +03:00
fetchTableMetadata ::
forall pgKind m.
(Backend ('Postgres pgKind), ToMetadataFetchQuery pgKind, MonadTx m) =>
2022-03-08 16:02:13 +03:00
[QualifiedTable] ->
2021-09-24 01:56:37 +03:00
m (DBTablesMetadata ('Postgres pgKind))
2022-03-08 16:02:13 +03:00
fetchTableMetadata tables = do
2021-09-24 01:56:37 +03:00
results <-
liftTx $
(tableMetadata @pgKind)
2022-03-08 16:02:13 +03:00
[Q.AltJ $ LE.uniques tables]
2021-09-24 01:56:37 +03:00
pure $
Map.fromList $
flip map results $
\(schema, table, Q.AltJ info) -> (QualifiedObject schema table, info)
-- | Fetch Postgres metadata for all user functions
2022-03-08 16:02:13 +03:00
fetchFunctionMetadata :: (MonadTx m) => [QualifiedFunction] -> m (DBFunctionsMetadata ('Postgres pgKind))
fetchFunctionMetadata functions = do
2021-09-24 01:56:37 +03:00
results <-
liftTx $
$(makeRelativeToProject "src-rsr/pg_function_metadata.sql" >>= Q.sqlFromFile)
2022-03-08 16:02:13 +03:00
[Q.AltJ $ LE.uniques functions]
2021-09-24 01:56:37 +03:00
pure $
Map.fromList $
flip map results $
\(schema, table, Q.AltJ infos) -> (QualifiedObject schema table, infos)
-- | Fetch all scalar types from Postgres
fetchPgScalars :: MonadTx m => m (HashSet PGScalarType)
fetchPgScalars =
2021-09-24 01:56:37 +03:00
liftTx $
Q.getAltJ . runIdentity . Q.getRow
<$> Q.withQE
2021-02-14 09:07:52 +03:00
SELECT coalesce(json_agg(typname), '[]')
FROM pg_catalog.pg_type where typtype = 'b'
2021-09-24 01:56:37 +03:00
-- | Clean source database after dropping in metadata
2021-09-24 01:56:37 +03:00
postDropSourceHook ::
(MonadIO m, MonadError QErr m, MonadBaseControl IO m) =>
PGSourceConfig ->
m ()
2021-02-23 20:37:27 +03:00
postDropSourceHook sourceConfig = do
-- Clean traces of Hasura in source database
2021-05-25 09:50:13 +03:00
-- There are three type of database we have to consider here, which we
-- refer to as types 1, 2, and 3 below:
-- 1. default postgres source (no separate metadata database)
2022-04-11 14:24:11 +03:00
-- In this case, we want to only drop source-related tables ("event_log",
-- "hdb_source_catalog_version", etc), leaving the rest of the schema
-- intact.
2021-05-25 09:50:13 +03:00
-- 2. dedicated metadata database
2022-04-11 14:24:11 +03:00
-- Ideally a dedicated metadata database won't have any source related
-- tables. But if it does, then drop only source-related tables, leaving the
-- rest of schema intact.
2021-05-25 09:50:13 +03:00
-- 3. non-default postgres source (necessarily without metadata tables)
-- In this case, we want to drop the entire "hdb_catalog" schema.
2021-09-24 01:56:37 +03:00
liftEitherM $
runPgSourceWriteTx sourceConfig $ do
hdbMetadataTableExist <- doesTableExist "hdb_catalog" "hdb_metadata"
2022-04-11 14:24:11 +03:00
-- If "hdb_metadata" exists, we have one of two possible cases:
-- * this is a metadata database (type 2)
-- * this is a default database (type 1)
2021-09-24 01:56:37 +03:00
2022-04-11 14:24:11 +03:00
-- Both of the possible cases might have source-related tables. And in
-- both the cases we only want to drop the source-related tables
-- leaving rest of the schema intact.
2021-09-24 01:56:37 +03:00
2022-04-11 14:24:11 +03:00
-- To adhere to the spec described above, we use DROP IF EXISTS
-- statements for all source-related tables. The IF EXISTS lets us
-- handle both cases uniformly, doing "ideally" nothing in the type 2
-- database, and for default databases, we drop only source-related
-- tables from the database's "hdb_catalog" schema.
2021-09-24 01:56:37 +03:00
| hdbMetadataTableExist ->
$(makeRelativeToProject "src-rsr/drop_pg_source.sql" >>= Q.sqlFromFile)
-- Otherwise, we have a non-default postgres source, which has no metadata tables.
-- We drop the entire "hdb_catalog" schema as discussed above.
2022-04-11 14:24:11 +03:00
| otherwise ->
2021-02-23 20:37:27 +03:00
-- Destory postgres source connection
liftIO $ _pecDestroyConn $ _pscExecCtx sourceConfig
2021-05-26 10:40:34 +03:00
-- Run other drop hooks configured at source creation time
liftIO $ _pscPostDropHook sourceConfig