graphql-engine/server/src-lib/Hasura/RQL/DDL/Schema.hs
Abby Sassel 1afa4ac3cc server/citus: feature branch
Co-authored-by: Vladimir Ciobanu <1017953+vladciobanu@users.noreply.github.com>
Co-authored-by: Antoine Leblanc <1618949+nicuveo@users.noreply.github.com>
Co-authored-by: Ikechukwu Eze <22247592+iykekings@users.noreply.github.com>
Co-authored-by: Philip Lykke Carlsen <358550+plcplc@users.noreply.github.com>
Co-authored-by: Phil Freeman <630306+paf31@users.noreply.github.com>
GitOrigin-RevId: 1b964fe5f5f50380172cb702b6a328fed782b6b7
2021-05-21 02:47:51 +00:00

192 lines
7.4 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{-| This module (along with the various @Hasura.RQL.DDL.Schema.*@ modules) provides operations to
load and modify the Hasura catalog and schema cache.
* The /catalog/ refers to the set of PostgreSQL tables and views that store all schema information
known by Hasura. This includes any tracked Postgres tables, views, and functions, all remote
schemas, and any additionaly Hasura-specific information such as permissions and relationships.
Primitive functions for loading and modifying the catalog are defined in
"Hasura.RQL.DDL.Schema.Catalog", but most uses are wrapped by other functions to synchronize
catalog information with the information in the schema cache.
* The /schema cache/ is a process-global value of type 'SchemaCache' that stores an in-memory
representation of the data stored in the catalog. The in-memory representation is not identical
to the data in the catalog, since it has some post-processing applied to it in order to make it
easier to consume for other parts of the system, such as GraphQL schema generation. For example,
although column information is represented by 'RawColumnInfo', the schema cache contains
“processed” 'ColumnInfo' values, instead.
Ultimately, the catalog is the source of truth for all information contained in the schema
cache, but to avoid rebuilding the entire schema cache on every change to the catalog, various
functions incrementally update the cache when they modify the catalog.
-}
module Hasura.RQL.DDL.Schema
( module Hasura.RQL.DDL.Schema.Cache
, module Hasura.RQL.DDL.Schema.Catalog
, module Hasura.RQL.DDL.Schema.Function
, module Hasura.RQL.DDL.Schema.Rename
, module Hasura.RQL.DDL.Schema.Table
, RunSQL(..)
, runRunSQL
, isSchemaCacheBuildRequiredRunSQL
, RunSQLRes(..)
) where
import Hasura.Prelude
import qualified Data.Text.Encoding as TE
import qualified Database.PG.Query as Q
import qualified Database.PostgreSQL.LibPQ as PQ
import qualified Text.Regex.TDFA as TDFA
import Control.Monad.Trans.Control (MonadBaseControl)
import Data.Aeson
import Data.Aeson.TH
import Hasura.Backends.Postgres.DDL.RunSQL
import Hasura.Backends.Postgres.DDL.Source (ToMetadataFetchQuery)
import Hasura.Base.Error
import Hasura.Base.Instances ()
import Hasura.EncJSON
import Hasura.RQL.DDL.Schema.Cache
import Hasura.RQL.DDL.Schema.Catalog
import Hasura.RQL.DDL.Schema.Function
import Hasura.RQL.DDL.Schema.Rename
import Hasura.RQL.DDL.Schema.Table
import Hasura.RQL.Types
import Hasura.Server.Utils (quoteRegex)
data RunSQL
= RunSQL
{ rSql :: Text
, rSource :: !SourceName
, rCascade :: !Bool
, rCheckMetadataConsistency :: !(Maybe Bool)
, rTxAccessMode :: !Q.TxAccess
} deriving (Show, Eq)
instance FromJSON RunSQL where
parseJSON = withObject "RunSQL" $ \o -> do
rSql <- o .: "sql"
rSource <- o .:? "source" .!= defaultSource
rCascade <- o .:? "cascade" .!= False
rCheckMetadataConsistency <- o .:? "check_metadata_consistency"
isReadOnly <- o .:? "read_only" .!= False
let rTxAccessMode = if isReadOnly then Q.ReadOnly else Q.ReadWrite
pure RunSQL{..}
instance ToJSON RunSQL where
toJSON RunSQL {..} =
object
[ "sql" .= rSql
, "source" .= rSource
, "cascade" .= rCascade
, "check_metadata_consistency" .= rCheckMetadataConsistency
, "read_only" .=
case rTxAccessMode of
Q.ReadOnly -> True
Q.ReadWrite -> False
]
-- | see Note [Checking metadata consistency in run_sql]
isSchemaCacheBuildRequiredRunSQL :: RunSQL -> Bool
isSchemaCacheBuildRequiredRunSQL RunSQL {..} =
case rTxAccessMode of
Q.ReadOnly -> False
Q.ReadWrite -> fromMaybe (containsDDLKeyword rSql) rCheckMetadataConsistency
where
containsDDLKeyword :: Text -> Bool
containsDDLKeyword = TDFA.match $$(quoteRegex
TDFA.defaultCompOpt
{ TDFA.caseSensitive = False
, TDFA.multiline = True
, TDFA.lastStarGreedy = True }
TDFA.defaultExecOpt
{ TDFA.captureGroups = False }
"\\balter\\b|\\bdrop\\b|\\breplace\\b|\\bcreate function\\b|\\bcomment on\\b")
runRunSQL
:: forall (pgKind :: PostgresKind) m
. ( BackendMetadata ('Postgres pgKind)
, ToMetadataFetchQuery pgKind
, CacheRWM m
, HasServerConfigCtx m
, MetadataM m
, MonadBaseControl IO m
, MonadError QErr m
, MonadIO m
)
=> RunSQL
-> m EncJSON
runRunSQL q@RunSQL {..}
-- see Note [Checking metadata consistency in run_sql]
| isSchemaCacheBuildRequiredRunSQL q
= withMetadataCheck @pgKind rSource rCascade rTxAccessMode $ execRawSQL rSql
| otherwise
= askSourceConfig @('Postgres pgKind) rSource >>= \sourceConfig ->
liftEitherM $ runExceptT $
runLazyTx (_pscExecCtx sourceConfig) rTxAccessMode $ execRawSQL rSql
where
execRawSQL :: (MonadTx n) => Text -> n EncJSON
execRawSQL =
fmap (encJFromJValue @RunSQLRes) . liftTx . Q.multiQE rawSqlErrHandler . Q.fromText
where
rawSqlErrHandler txe =
(err400 PostgresError "query execution failed") { qeInternal = Just $ toJSON txe }
{- Note [Checking metadata consistency in run_sql]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
SQL queries executed by run_sql may change the Postgres schema in arbitrary
ways. We attempt to automatically update the metadata to reflect those changes
as much as possible---for example, if a table is renamed, we want to update the
metadata to track the table under its new name instead of its old one. This
schema diffing (plus some integrity checking) is handled by withMetadataCheck.
But this process has overhead---it involves reloading the metadata, diffing it,
and rebuilding the schema cache---so we dont want to do it if it isnt
necessary. The user can explicitly disable the check via the
check_metadata_consistency option, and we also skip it if the current
transaction is in READ ONLY mode, since the schema cant be modified in that
case, anyway.
However, even if neither read_only or check_metadata_consistency is passed, lots
of queries may not modify the schema at all. As a (fairly stupid) heuristic, we
check if the query contains any keywords for DDL operations, and if not, we skip
the metadata check as well. -}
data RunSQLRes
= RunSQLRes
{ rrResultType :: !Text
, rrResult :: !Value
} deriving (Show, Eq)
$(deriveJSON hasuraJSON ''RunSQLRes)
instance Q.FromRes RunSQLRes where
fromRes (Q.ResultOkEmpty _) =
return $ RunSQLRes "CommandOk" Null
fromRes (Q.ResultOkData res) = do
csvRows <- resToCSV res
return $ RunSQLRes "TuplesOk" $ toJSON csvRows
where
resToCSV :: PQ.Result -> ExceptT Text IO [[Text]]
resToCSV r = do
nr <- liftIO $ PQ.ntuples r
nc <- liftIO $ PQ.nfields r
hdr <- forM [0..pred nc] $ \ic -> do
colNameBS <- liftIO $ PQ.fname r ic
maybe (return "unknown") decodeBS colNameBS
rows <- forM [0..pred nr] $ \ir ->
forM [0..pred nc] $ \ic -> do
cellValBS <- liftIO $ PQ.getvalue r ir ic
maybe (return "NULL") decodeBS cellValBS
return $ hdr:rows
decodeBS = either (throwError . tshow) return . TE.decodeUtf8'