graphql-engine/server/src-lib/Hasura/App.hs

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1256 lines
48 KiB
Haskell
Raw Normal View History

{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE ViewPatterns #-}
-- | Imported by 'server/src-exec/Main.hs'.
module Hasura.App
( ExitCode (DatabaseMigrationError, DowngradeProcessError, MetadataCleanError, MetadataExportError, SchemaCacheInitError),
ExitException (ExitException),
GlobalCtx (..),
Loggers (..),
PGMetadataStorageAppT (runPGMetadataStorageAppT),
ServeCtx (ServeCtx, _scLoggers, _scMetadataDbPool, _scShutdownLatch),
ShutdownLatch,
accessDeniedErrMsg,
flushLogger,
getCatalogStateTx,
initGlobalCtx,
initialiseServeCtx,
migrateCatalogSchema,
mkLoggers,
mkPGLogger,
newShutdownLatch,
notifySchemaCacheSyncTx,
parseArgs,
throwErrExit,
throwErrJExit,
printJSON,
printYaml,
readTlsAllowlist,
resolvePostgresConnInfo,
runHGEServer,
setCatalogStateTx,
shutdownGracefully,
waitForShutdown,
shuttingDown,
-- * Exported for testing
mkHGEServer,
mkPgSourceResolver,
mkMSSQLSourceResolver,
)
where
import Control.Concurrent.Async.Lifted.Safe qualified as LA
import Control.Concurrent.Extended qualified as C
import Control.Concurrent.STM qualified as STM
import Control.Concurrent.STM.TVar (readTVarIO)
import Control.Exception (bracket_, throwIO)
import Control.Monad.Catch
( Exception,
MonadCatch,
MonadMask,
MonadThrow,
onException,
)
import Control.Monad.Morph (hoist)
import Control.Monad.STM (atomically)
import Control.Monad.Stateless
import Control.Monad.Trans.Control (MonadBaseControl (..))
import Control.Monad.Trans.Managed (ManagedT (..), allocate_)
import Control.Retry qualified as Retry
import Data.Aeson qualified as A
import Data.ByteString.Char8 qualified as BC
import Data.ByteString.Lazy qualified as BL
import Data.ByteString.Lazy.Char8 qualified as BLC
import Data.Environment qualified as Env
import Data.FileEmbed (makeRelativeToProject)
import Data.HashMap.Strict qualified as HM
import Data.Set.NonEmpty qualified as NE
import Data.Text qualified as T
import Data.Time.Clock (UTCTime)
import Data.Time.Clock qualified as Clock
import Data.Yaml qualified as Y
import Database.PG.Query qualified as PG
import GHC.AssertNF.CPP
import Hasura.Backends.MSSQL.Connection
import Hasura.Backends.Postgres.Connection
import Hasura.Base.Error
import Hasura.Eventing.Common
import Hasura.Eventing.EventTrigger
import Hasura.Eventing.ScheduledTrigger
import Hasura.GraphQL.Execute
( ExecutionStep (..),
MonadGQLExecutionCheck (..),
checkQueryInAllowlist,
)
import Hasura.GraphQL.Execute.Action
import Hasura.GraphQL.Execute.Action.Subscription
import Hasura.GraphQL.Execute.Backend qualified as EB
import Hasura.GraphQL.Execute.Subscription.Poll qualified as ES
import Hasura.GraphQL.Logging (MonadQueryLog (..))
import Hasura.GraphQL.Schema.Options qualified as Options
import Hasura.GraphQL.Transport.HTTP
( CacheStoreSuccess (CacheStoreSkipped),
MonadExecuteQuery (..),
)
import Hasura.GraphQL.Transport.HTTP.Protocol (toParsed)
import Hasura.GraphQL.Transport.WebSocket.Server qualified as WS
import Hasura.Logging
import Hasura.Metadata.Class
import Hasura.Prelude
import Hasura.QueryTags
import Hasura.RQL.DDL.EventTrigger (MonadEventLogCleanup (..))
import Hasura.RQL.DDL.Schema.Cache
import Hasura.RQL.DDL.Schema.Cache.Common
import Hasura.RQL.DDL.Schema.Catalog
import Hasura.RQL.Types.Allowlist
import Hasura.RQL.Types.Backend
import Hasura.RQL.Types.Common
import Hasura.RQL.Types.Eventing.Backend
import Hasura.RQL.Types.Metadata
import Hasura.RQL.Types.Network
import Hasura.RQL.Types.SchemaCache
import Hasura.RQL.Types.SchemaCache.Build
import Hasura.RQL.Types.Source
import Hasura.SQL.AnyBackend qualified as AB
import Hasura.SQL.Backend
import Hasura.Server.API.Query (requiresAdmin)
import Hasura.Server.App
import Hasura.Server.Auth
import Hasura.Server.CheckUpdates (checkForUpdates)
import Hasura.Server.Init
server: operation timeout with postgres cancelling ### Description This PR implements operation timeouts, as specced in #1232. RFC: [rfcs/operation-timeout-api-limits.md](https://github.com/hasura/graphql-engine-mono/blob/c025a90fe9779436bc0188a2bbf0ad95b5ed1f32/rfcs/operation-timeout-api-limits.md) There's still some things to be done (tests and docs most notably), but apart from that it can be reviewed. I'd still appreciate feedback on the RFC! TODO: - [x] break out the `ApiLimits` refactoring into a separate PR: #2103 - [x] finish the `pg-client-hs` PR: https://github.com/hasura/pg-client-hs/pull/39 - [x] remove configurability, after testing, prior to merging - [ ] tests: #2390 has some tests that I've run locally to confirm things work on a fundamental level - [x] changelog - [x] documentation - [x] fill in the detailed PR checklist ### Changelog - [x] `CHANGELOG.md` is updated with user-facing content relevant to this PR. If no changelog is required, then add the `no-changelog-required` label. ### Affected components - [x] Server - [ ] Console - [ ] CLI - [x] Docs - [ ] Tests ### Related Issues Product spec: #1232. ### Solution and Design Compare `rfcs/operation-timeout-api-limits.md`. ### Steps to test and verify Configure operation timeouts, e.g. by posting ``` { "type": "set_api_limits", "args": { "operation_timeout": { "global": 3 } } } ``` to `v1/metadata` to set an operation timeout of 3s. Then verify that 1. non-admin queries that take longer than 3s time out with a nice error message 2. that those queries return after ~3s (at least for postgres) 3. also that everything else still works as usual ### Limitations, known bugs & workarounds - while this will cause slow queries against any backends to fail, it's only verified to actually interrupt queries against postgres - this will only successfully short-cut (cancel) queries to postgres if the database server is responsive #### Catalog upgrade Does this PR change Hasura Catalog version? - [x] No #### Metadata Does this PR add a new Metadata feature? - [x] Yes - Does `run_sql` auto manages the new metadata through schema diffing? - [x] Not required - Does `run_sql` auto manages the definitions of metadata on renaming? - [x] Not required - Does `export_metadata`/`replace_metadata` supports the new metadata added? - [x] Yes #### GraphQL - [x] No new GraphQL schema is generated #### Breaking changes - [x] No Breaking changes PR-URL: https://github.com/hasura/graphql-engine-mono/pull/1593 GitOrigin-RevId: f0582d0be3ed9fadf89e0c4aaf96344d18331dc4
2021-09-29 19:20:06 +03:00
import Hasura.Server.Limits
import Hasura.Server.Logging
import Hasura.Server.Metrics (ServerMetrics (..))
import Hasura.Server.Migrate (migrateCatalog)
import Hasura.Server.Prometheus
( PrometheusMetrics (..),
decWarpThreads,
incWarpThreads,
)
import Hasura.Server.SchemaCacheRef
( SchemaCacheRef,
getSchemaCache,
initialiseSchemaCacheRef,
logInconsistentMetadata,
)
import Hasura.Server.SchemaUpdate
import Hasura.Server.Telemetry
import Hasura.Server.Types
import Hasura.Server.Version
import Hasura.Session
import Hasura.Tracing qualified as Tracing
import Network.HTTP.Client.Blocklisting (Blocklist)
server: http ip blocklist (closes #2449) ## Description This PR is in reference to #2449 (support IP blacklisting for multitenant) *RFC Update: Add support for IPv6 blocking* ### Solution and Design Using [http-client-restricted](https://hackage.haskell.org/package/http-client-restricted) package, we're creating the HTTP manager with restricting capabilities. The IPs can be supplied from the CLI arguments as `--ipv4BlocklistCidrs cidr1, cidr2...` or `--disableDefaultIPv4Blocklist` for a default IP list. The new manager will block all requests to the provided CIDRs. We are extracting the error message string to show the end-user that given IP is blocked from being set as a webhook. There are 2 ways to extract the error message "connection to IP address is blocked". Given below are the responses from event trigger to a blocked IP for these implementations: - 6d74fde316f61e246c861befcca5059d33972fa7 - We return the error message string as a HTTPErr(HOther) from `Hasura/Eventing/HTTP.hs`. ``` { "data": { "message": "blocked connection to private IP address " }, "version": "2", "type": "client_error" } ``` - 88e17456345cbb449a5ecd4877c84c9f319dbc25 - We case match on HTTPExceptionContent for InternaException in `Hasura/HTTP.hs` and extract the error message string from it. (this is implemented as it handles all the cases where pro engine makes webhook requests) ``` { "data": { "message": { "type": "http_exception", "message": "blocked connection to private IP address ", "request": { "secure": false, "path": "/webhook", "responseTimeout": "ResponseTimeoutMicro 60000000", "queryString": "", "method": "POST", "requestHeaders": { "Content-Type": "application/json", "X-B3-ParentSpanId": "5ae6573edb2a6b36", "X-B3-TraceId": "29ea7bd6de6ebb8f", "X-B3-SpanId": "303137d9f1d4f341", "User-Agent": "hasura-graphql-engine/cerebushttp-ip-blacklist-a793a0e41-dirty" }, "host": "139.59.90.109", "port": 8000 } } }, "version": "2", "type": "client_error" } ``` ### Steps to test and verify The restricted IPs can be used as webhooks in event triggers, and hasura will return an error message in reponse. ### Limitations, known bugs & workarounds - The `http-client-restricted` has a needlessly complex interface, and puts effort into implementing proxy support which we don't want, so we've inlined a stripped down version. - Performance constraint: As the blocking is checked for each request, if a long list of blocked CIDRs is supplied, iterating through all of them is not what we would prefer. Using trie is suggested to overcome this. (Added to RFC) - Calls to Lux endpoints are inconsistent: We use either the http manager from the ProServeCtx which is unrestricted, or the http manager from the ServeCtx which is restricted (the latter through the instances for MonadMetadataApiAuthorization and UserAuthentication). (The failure scenario here would be: cloud sets PRO_ENDPOINT to something that resolves to an internal address, and then restricted requests to those endpoints fail, causing auth to fail on user requests. This is about HTTP requests to lux auth endpoints.) ## Changelog - ✅ `CHANGELOG.md` is updated with user-facing content relevant to this PR. ## Affected components - ✅ Server - ✅ Tests PR-URL: https://github.com/hasura/graphql-engine-mono/pull/3186 Co-authored-by: Robert <132113+robx@users.noreply.github.com> GitOrigin-RevId: 5bd2de2d028bc416b02c99e996c7bebce56fb1e7
2022-02-25 16:29:55 +03:00
import Network.HTTP.Client.CreateManager (mkHttpManager)
import Network.HTTP.Client.Manager (HasHttpManagerM (..))
import Network.HTTP.Client.Transformable qualified as HTTP
import Network.Wai (Application)
import Network.Wai.Handler.Warp qualified as Warp
import Options.Applicative
import Refined (unrefine)
import System.Environment (getEnvironment)
import System.Log.FastLogger qualified as FL
import System.Metrics qualified as EKG
import System.Metrics.Gauge qualified as EKG.Gauge
import Text.Mustache.Compile qualified as M
import Web.Spock.Core qualified as Spock
data ExitCode
= -- these are used during server initialization:
InvalidEnvironmentVariableOptionsError
| InvalidDatabaseConnectionParamsError
| AuthConfigurationError
| EventSubSystemError
| DatabaseMigrationError
| -- | used by MT because it initialises the schema cache only
-- these are used in app/Main.hs:
SchemaCacheInitError
| MetadataExportError
| MetadataCleanError
| ExecuteProcessError
| DowngradeProcessError
deriving (Show)
data ExitException = ExitException
{ eeCode :: !ExitCode,
eeMessage :: !BC.ByteString
}
deriving (Show)
instance Exception ExitException
throwErrExit :: (MonadIO m) => forall a. ExitCode -> String -> m a
throwErrExit reason = liftIO . throwIO . ExitException reason . BC.pack
throwErrJExit :: (A.ToJSON a, MonadIO m) => forall b. ExitCode -> a -> m b
throwErrJExit reason = liftIO . throwIO . ExitException reason . BLC.toStrict . A.encode
--------------------------------------------------------------------------------
-- TODO(SOLOMON): Move Into `Hasura.Server.Init`. Unable to do so
-- currently due `throwErrExit`.
-- | Parse cli arguments to graphql-engine executable.
parseArgs :: EnabledLogTypes impl => IO (HGEOptions (ServeOptions impl))
parseArgs = do
rawHGEOpts <- execParser opts
env <- getEnvironment
let eitherOpts = runWithEnv env $ mkHGEOptions rawHGEOpts
onLeft eitherOpts $ throwErrExit InvalidEnvironmentVariableOptionsError
where
opts =
info
(helper <*> parseHgeOpts)
( fullDesc
<> header "Hasura GraphQL Engine: Blazing fast, instant realtime GraphQL APIs on your DB with fine grained access control, also trigger webhooks on database events."
<> footerDoc (Just mainCmdFooter)
)
--------------------------------------------------------------------------------
printJSON :: (A.ToJSON a, MonadIO m) => a -> m ()
printJSON = liftIO . BLC.putStrLn . A.encode
printYaml :: (A.ToJSON a, MonadIO m) => a -> m ()
printYaml = liftIO . BC.putStrLn . Y.encode
mkPGLogger :: Logger Hasura -> PG.PGLogger
mkPGLogger (Logger logger) (PG.PLERetryMsg msg) =
logger $ PGLog LevelWarn msg
-- | Context required for all graphql-engine CLI commands
data GlobalCtx = GlobalCtx
{ _gcMetadataDbConnInfo :: !PG.ConnInfo,
-- | --database-url option, @'UrlConf' is required to construct default source configuration
-- and optional retries
_gcDefaultPostgresConnInfo :: !(Maybe (UrlConf, PG.ConnInfo), Maybe Int)
}
readTlsAllowlist :: SchemaCacheRef -> IO [TlsAllow]
readTlsAllowlist scRef = scTlsAllowlist <$> getSchemaCache scRef
initGlobalCtx ::
(MonadIO m) =>
Env.Environment ->
-- | the metadata DB URL
Maybe String ->
-- | the user's DB URL
PostgresConnInfo (Maybe UrlConf) ->
m GlobalCtx
initGlobalCtx env metadataDbUrl defaultPgConnInfo = do
let PostgresConnInfo dbUrlConf maybeRetries = defaultPgConnInfo
server: multitenant metadata storage The metadata storage implementation for graphql-engine-multitenant. - It uses a centralized PG database to store metadata of all tenants (instead of per tenant database) - Similarly, it uses a single schema-sync listener thread per MT worker (instead of listener thread per tenant) (PS: although, the processor thread is spawned per tenant) - 2 new flags are introduced - `--metadataDatabaseUrl` and (optional) `--metadataDatabaseRetries` Internally, a "metadata mode" is introduced to indicate an external/managed store vs a store managed by each pro-server. To run : - obtain the schema file (located at `pro/server/res/cloud/metadata_db_schema.sql`) - apply the schema on a PG database - set the `--metadataDatabaseUrl` flag to point to the above database - run the MT executable The schema (and its migrations) for the metadata db is managed outside the MT worker. ### New metadata The following is the new portion of `Metadata` added : ```yaml version: 3 metrics_config: analyze_query_variables: true analyze_response_body: false api_limits: disabled: false depth_limit: global: 5 per_role: user: 7 editor: 9 rate_limit: per_role: user: unique_params: - x-hasura-user-id - x-hasura-team-id max_reqs_per_min: 20 global: unique_params: IP max_reqs_per_min: 10 ``` - In Pro, the code around fetching/updating/syncing pro-config is removed - That also means, `hdb_pro_catalog` for keeping the config cache is not required. Hence the `hdb_pro_catalog` is also removed - The required config comes from metadata / schema cache ### New Metadata APIs - `set_api_limits` - `remove_api_limits` - `set_metrics_config` - `remove_metrics_config` #### `set_api_limits` ```yaml type: set_api_limits args: disabled: false depth_limit: global: 5 per_role: user: 7 editor: 9 rate_limit: per_role: anonymous: max_reqs_per_min: 10 unique_params: "ip" editor: max_reqs_per_min: 30 unique_params: - x-hasura-user-id user: unique_params: - x-hasura-user-id - x-hasura-team-id max_reqs_per_min: 20 global: unique_params: IP max_reqs_per_min: 10 ``` #### `remove_api_limits` ```yaml type: remove_api_limits args: {} ``` #### `set_metrics_config` ```yaml type: set_metrics_config args: analyze_query_variables: true analyze_response_body: false ``` #### `remove_metrics_config` ```yaml type: remove_metrics_config args: {} ``` #### TODO - [x] on-prem pro implementation for `MonadMetadataStorage` - [x] move the project config from Lux to pro metadata (PR: #379) - [ ] console changes for pro config/api limits, subscription workers (cc @soorajshankar @beerose) - [x] address other minor TODOs - [x] TxIso for `MonadSourceResolver` - [x] enable EKG connection pool metrics - [x] add logging of connection info when sources are added? - [x] confirm if the `buildReason` for schema cache is correct - [ ] testing - [x] 1.3 -> 1.4 cloud migration script (#465; PR: #508) - [x] one-time migration of existing metadata from users' db to centralized PG - [x] one-time migration of pro project config + api limits + regression tests from metrics API to metadata - [ ] integrate with infra team (WIP - cc @hgiasac) - [x] benchmark with 1000+ tenants + each tenant making read/update metadata query every second (PR: https://github.com/hasura/graphql-engine-mono/pull/411) - [ ] benchmark with few tenants having large metadata (100+ tables etc.) - [ ] when user moves regions (https://github.com/hasura/lux/issues/1717) - [ ] metadata has to be migrated from one regional PG to another - [ ] migrate metrics data as well ? - [ ] operation logs - [ ] regression test runs - [ ] find a way to share the schema files with the infra team Co-authored-by: Naveen Naidu <30195193+Naveenaidu@users.noreply.github.com> GitOrigin-RevId: 39e8361f2c0e96e0f9e8f8fb45e6cc14857f31f1
2021-02-11 20:54:25 +03:00
mkConnInfoFromSource dbUrl = do
resolvePostgresConnInfo env dbUrl maybeRetries
mkConnInfoFromMDb mdbUrl =
let retries = fromMaybe 1 maybeRetries
in (PG.ConnInfo retries . PG.CDDatabaseURI . txtToBs . T.pack) mdbUrl
server: multitenant metadata storage The metadata storage implementation for graphql-engine-multitenant. - It uses a centralized PG database to store metadata of all tenants (instead of per tenant database) - Similarly, it uses a single schema-sync listener thread per MT worker (instead of listener thread per tenant) (PS: although, the processor thread is spawned per tenant) - 2 new flags are introduced - `--metadataDatabaseUrl` and (optional) `--metadataDatabaseRetries` Internally, a "metadata mode" is introduced to indicate an external/managed store vs a store managed by each pro-server. To run : - obtain the schema file (located at `pro/server/res/cloud/metadata_db_schema.sql`) - apply the schema on a PG database - set the `--metadataDatabaseUrl` flag to point to the above database - run the MT executable The schema (and its migrations) for the metadata db is managed outside the MT worker. ### New metadata The following is the new portion of `Metadata` added : ```yaml version: 3 metrics_config: analyze_query_variables: true analyze_response_body: false api_limits: disabled: false depth_limit: global: 5 per_role: user: 7 editor: 9 rate_limit: per_role: user: unique_params: - x-hasura-user-id - x-hasura-team-id max_reqs_per_min: 20 global: unique_params: IP max_reqs_per_min: 10 ``` - In Pro, the code around fetching/updating/syncing pro-config is removed - That also means, `hdb_pro_catalog` for keeping the config cache is not required. Hence the `hdb_pro_catalog` is also removed - The required config comes from metadata / schema cache ### New Metadata APIs - `set_api_limits` - `remove_api_limits` - `set_metrics_config` - `remove_metrics_config` #### `set_api_limits` ```yaml type: set_api_limits args: disabled: false depth_limit: global: 5 per_role: user: 7 editor: 9 rate_limit: per_role: anonymous: max_reqs_per_min: 10 unique_params: "ip" editor: max_reqs_per_min: 30 unique_params: - x-hasura-user-id user: unique_params: - x-hasura-user-id - x-hasura-team-id max_reqs_per_min: 20 global: unique_params: IP max_reqs_per_min: 10 ``` #### `remove_api_limits` ```yaml type: remove_api_limits args: {} ``` #### `set_metrics_config` ```yaml type: set_metrics_config args: analyze_query_variables: true analyze_response_body: false ``` #### `remove_metrics_config` ```yaml type: remove_metrics_config args: {} ``` #### TODO - [x] on-prem pro implementation for `MonadMetadataStorage` - [x] move the project config from Lux to pro metadata (PR: #379) - [ ] console changes for pro config/api limits, subscription workers (cc @soorajshankar @beerose) - [x] address other minor TODOs - [x] TxIso for `MonadSourceResolver` - [x] enable EKG connection pool metrics - [x] add logging of connection info when sources are added? - [x] confirm if the `buildReason` for schema cache is correct - [ ] testing - [x] 1.3 -> 1.4 cloud migration script (#465; PR: #508) - [x] one-time migration of existing metadata from users' db to centralized PG - [x] one-time migration of pro project config + api limits + regression tests from metrics API to metadata - [ ] integrate with infra team (WIP - cc @hgiasac) - [x] benchmark with 1000+ tenants + each tenant making read/update metadata query every second (PR: https://github.com/hasura/graphql-engine-mono/pull/411) - [ ] benchmark with few tenants having large metadata (100+ tables etc.) - [ ] when user moves regions (https://github.com/hasura/lux/issues/1717) - [ ] metadata has to be migrated from one regional PG to another - [ ] migrate metrics data as well ? - [ ] operation logs - [ ] regression test runs - [ ] find a way to share the schema files with the infra team Co-authored-by: Naveen Naidu <30195193+Naveenaidu@users.noreply.github.com> GitOrigin-RevId: 39e8361f2c0e96e0f9e8f8fb45e6cc14857f31f1
2021-02-11 20:54:25 +03:00
mkGlobalCtx mdbConnInfo sourceConnInfo =
pure $ GlobalCtx mdbConnInfo (sourceConnInfo, maybeRetries)
server: multitenant metadata storage The metadata storage implementation for graphql-engine-multitenant. - It uses a centralized PG database to store metadata of all tenants (instead of per tenant database) - Similarly, it uses a single schema-sync listener thread per MT worker (instead of listener thread per tenant) (PS: although, the processor thread is spawned per tenant) - 2 new flags are introduced - `--metadataDatabaseUrl` and (optional) `--metadataDatabaseRetries` Internally, a "metadata mode" is introduced to indicate an external/managed store vs a store managed by each pro-server. To run : - obtain the schema file (located at `pro/server/res/cloud/metadata_db_schema.sql`) - apply the schema on a PG database - set the `--metadataDatabaseUrl` flag to point to the above database - run the MT executable The schema (and its migrations) for the metadata db is managed outside the MT worker. ### New metadata The following is the new portion of `Metadata` added : ```yaml version: 3 metrics_config: analyze_query_variables: true analyze_response_body: false api_limits: disabled: false depth_limit: global: 5 per_role: user: 7 editor: 9 rate_limit: per_role: user: unique_params: - x-hasura-user-id - x-hasura-team-id max_reqs_per_min: 20 global: unique_params: IP max_reqs_per_min: 10 ``` - In Pro, the code around fetching/updating/syncing pro-config is removed - That also means, `hdb_pro_catalog` for keeping the config cache is not required. Hence the `hdb_pro_catalog` is also removed - The required config comes from metadata / schema cache ### New Metadata APIs - `set_api_limits` - `remove_api_limits` - `set_metrics_config` - `remove_metrics_config` #### `set_api_limits` ```yaml type: set_api_limits args: disabled: false depth_limit: global: 5 per_role: user: 7 editor: 9 rate_limit: per_role: anonymous: max_reqs_per_min: 10 unique_params: "ip" editor: max_reqs_per_min: 30 unique_params: - x-hasura-user-id user: unique_params: - x-hasura-user-id - x-hasura-team-id max_reqs_per_min: 20 global: unique_params: IP max_reqs_per_min: 10 ``` #### `remove_api_limits` ```yaml type: remove_api_limits args: {} ``` #### `set_metrics_config` ```yaml type: set_metrics_config args: analyze_query_variables: true analyze_response_body: false ``` #### `remove_metrics_config` ```yaml type: remove_metrics_config args: {} ``` #### TODO - [x] on-prem pro implementation for `MonadMetadataStorage` - [x] move the project config from Lux to pro metadata (PR: #379) - [ ] console changes for pro config/api limits, subscription workers (cc @soorajshankar @beerose) - [x] address other minor TODOs - [x] TxIso for `MonadSourceResolver` - [x] enable EKG connection pool metrics - [x] add logging of connection info when sources are added? - [x] confirm if the `buildReason` for schema cache is correct - [ ] testing - [x] 1.3 -> 1.4 cloud migration script (#465; PR: #508) - [x] one-time migration of existing metadata from users' db to centralized PG - [x] one-time migration of pro project config + api limits + regression tests from metrics API to metadata - [ ] integrate with infra team (WIP - cc @hgiasac) - [x] benchmark with 1000+ tenants + each tenant making read/update metadata query every second (PR: https://github.com/hasura/graphql-engine-mono/pull/411) - [ ] benchmark with few tenants having large metadata (100+ tables etc.) - [ ] when user moves regions (https://github.com/hasura/lux/issues/1717) - [ ] metadata has to be migrated from one regional PG to another - [ ] migrate metrics data as well ? - [ ] operation logs - [ ] regression test runs - [ ] find a way to share the schema files with the infra team Co-authored-by: Naveen Naidu <30195193+Naveenaidu@users.noreply.github.com> GitOrigin-RevId: 39e8361f2c0e96e0f9e8f8fb45e6cc14857f31f1
2021-02-11 20:54:25 +03:00
case (metadataDbUrl, dbUrlConf) of
(Nothing, Nothing) ->
throwErrExit
InvalidDatabaseConnectionParamsError
"Fatal Error: Either of --metadata-database-url or --database-url option expected"
server: multitenant metadata storage The metadata storage implementation for graphql-engine-multitenant. - It uses a centralized PG database to store metadata of all tenants (instead of per tenant database) - Similarly, it uses a single schema-sync listener thread per MT worker (instead of listener thread per tenant) (PS: although, the processor thread is spawned per tenant) - 2 new flags are introduced - `--metadataDatabaseUrl` and (optional) `--metadataDatabaseRetries` Internally, a "metadata mode" is introduced to indicate an external/managed store vs a store managed by each pro-server. To run : - obtain the schema file (located at `pro/server/res/cloud/metadata_db_schema.sql`) - apply the schema on a PG database - set the `--metadataDatabaseUrl` flag to point to the above database - run the MT executable The schema (and its migrations) for the metadata db is managed outside the MT worker. ### New metadata The following is the new portion of `Metadata` added : ```yaml version: 3 metrics_config: analyze_query_variables: true analyze_response_body: false api_limits: disabled: false depth_limit: global: 5 per_role: user: 7 editor: 9 rate_limit: per_role: user: unique_params: - x-hasura-user-id - x-hasura-team-id max_reqs_per_min: 20 global: unique_params: IP max_reqs_per_min: 10 ``` - In Pro, the code around fetching/updating/syncing pro-config is removed - That also means, `hdb_pro_catalog` for keeping the config cache is not required. Hence the `hdb_pro_catalog` is also removed - The required config comes from metadata / schema cache ### New Metadata APIs - `set_api_limits` - `remove_api_limits` - `set_metrics_config` - `remove_metrics_config` #### `set_api_limits` ```yaml type: set_api_limits args: disabled: false depth_limit: global: 5 per_role: user: 7 editor: 9 rate_limit: per_role: anonymous: max_reqs_per_min: 10 unique_params: "ip" editor: max_reqs_per_min: 30 unique_params: - x-hasura-user-id user: unique_params: - x-hasura-user-id - x-hasura-team-id max_reqs_per_min: 20 global: unique_params: IP max_reqs_per_min: 10 ``` #### `remove_api_limits` ```yaml type: remove_api_limits args: {} ``` #### `set_metrics_config` ```yaml type: set_metrics_config args: analyze_query_variables: true analyze_response_body: false ``` #### `remove_metrics_config` ```yaml type: remove_metrics_config args: {} ``` #### TODO - [x] on-prem pro implementation for `MonadMetadataStorage` - [x] move the project config from Lux to pro metadata (PR: #379) - [ ] console changes for pro config/api limits, subscription workers (cc @soorajshankar @beerose) - [x] address other minor TODOs - [x] TxIso for `MonadSourceResolver` - [x] enable EKG connection pool metrics - [x] add logging of connection info when sources are added? - [x] confirm if the `buildReason` for schema cache is correct - [ ] testing - [x] 1.3 -> 1.4 cloud migration script (#465; PR: #508) - [x] one-time migration of existing metadata from users' db to centralized PG - [x] one-time migration of pro project config + api limits + regression tests from metrics API to metadata - [ ] integrate with infra team (WIP - cc @hgiasac) - [x] benchmark with 1000+ tenants + each tenant making read/update metadata query every second (PR: https://github.com/hasura/graphql-engine-mono/pull/411) - [ ] benchmark with few tenants having large metadata (100+ tables etc.) - [ ] when user moves regions (https://github.com/hasura/lux/issues/1717) - [ ] metadata has to be migrated from one regional PG to another - [ ] migrate metrics data as well ? - [ ] operation logs - [ ] regression test runs - [ ] find a way to share the schema files with the infra team Co-authored-by: Naveen Naidu <30195193+Naveenaidu@users.noreply.github.com> GitOrigin-RevId: 39e8361f2c0e96e0f9e8f8fb45e6cc14857f31f1
2021-02-11 20:54:25 +03:00
-- If no metadata storage specified consider use default database as
-- metadata storage
(Nothing, Just dbUrl) -> do
connInfo <- mkConnInfoFromSource dbUrl
mkGlobalCtx connInfo $ Just (dbUrl, connInfo)
(Just mdUrl, Nothing) -> do
let mdConnInfo = mkConnInfoFromMDb mdUrl
mkGlobalCtx mdConnInfo Nothing
(Just mdUrl, Just dbUrl) -> do
srcConnInfo <- mkConnInfoFromSource dbUrl
let mdConnInfo = mkConnInfoFromMDb mdUrl
mkGlobalCtx mdConnInfo (Just (dbUrl, srcConnInfo))
-- | Context required for the 'serve' CLI command.
data ServeCtx = ServeCtx
{ _scHttpManager :: !HTTP.Manager,
_scInstanceId :: !InstanceId,
_scLoggers :: !Loggers,
_scEnabledLogTypes :: !(HashSet (EngineLogType Hasura)),
_scMetadataDbPool :: !PG.PGPool,
_scShutdownLatch :: !ShutdownLatch,
_scSchemaCache :: !RebuildableSchemaCache,
_scSchemaCacheRef :: !SchemaCacheRef,
_scMetaVersionRef :: !(STM.TMVar MetadataResourceVersion)
}
-- | Collection of the LoggerCtx, the regular Logger and the PGLogger
Rewrite GraphQL schema generation and query parsing (close #2801) (#4111) Aka “the PDV refactor.” History is preserved on the branch 2801-graphql-schema-parser-refactor. * [skip ci] remove stale benchmark commit from commit_diff * [skip ci] Check for root field name conflicts between remotes * [skip ci] Additionally check for conflicts between remotes and DB * [skip ci] Check for conflicts in schema when tracking a table * [skip ci] Fix equality checking in GraphQL AST * server: fix mishandling of GeoJSON inputs in subscriptions (fix #3239) (#4551) * Add support for multiple top-level fields in a subscription to improve testability of subscriptions * Add an internal flag to enable multiple subscriptions * Add missing call to withConstructorFn in live queries (fix #3239) Co-authored-by: Alexis King <lexi.lambda@gmail.com> * Scheduled triggers (close #1914) (#3553) server: add scheduled triggers Co-authored-by: Alexis King <lexi.lambda@gmail.com> Co-authored-by: Marion Schleifer <marion@hasura.io> Co-authored-by: Karthikeyan Chinnakonda <karthikeyan@hasura.io> Co-authored-by: Aleksandra Sikora <ola.zxcvbnm@gmail.com> * dev.sh: bump version due to addition of croniter python dependency * server: fix an introspection query caching issue (fix #4547) (#4661) Introspection queries accept variables, but we need to make sure to also touch the variables that we ignore, so that an introspection query is marked not reusable if we are not able to build a correct query plan for it. A better solution here would be to deal with such unused variables correctly, so that more introspection queries become reusable. An even better solution would be to type-safely track *how* to reuse which variables, rather than to split the reusage marking from the planning. Co-authored-by: Tirumarai Selvan <tiru@hasura.io> * flush log buffer on exception in mkWaiApp ( fix #4772 ) (#4801) * flush log buffer on exception in mkWaiApp * add comment to explain the introduced change * add changelog * allow logging details of a live query polling thread (#4959) * changes for poller-log add various multiplexed query info in poller-log * minor cleanup, also fixes a bug which will return duplicate data * Live query poller stats can now be logged This also removes in-memory stats that are collected about batched query execution as the log lines when piped into an monitoring tool will give us better insights. * allow poller-log to be configurable * log minimal information in the livequery-poller-log Other information can be retrieved from /dev/subscriptions/extended * fix few review comments * avoid marshalling and unmarshalling from ByteString to EncJSON * separate out SubscriberId and SubscriberMetadata Co-authored-by: Anon Ray <rayanon004@gmail.com> * Don't compile in developer APIs by default * Tighten up handling of admin secret, more docs Store the admin secret only as a hash to prevent leaking the secret inadvertently, and to prevent timing attacks on the secret. NOTE: best practice for stored user passwords is a function with a tunable cost like bcrypt, but our threat model is quite different (even if we thought we could reasonably protect the secret from an attacker who could read arbitrary regions of memory), and bcrypt is far too slow (by design) to perform on each request. We'd have to rely on our (technically savvy) users to choose high entropy passwords in any case. Referencing #4736 * server/docs: add instructions to fix loss of float precision in PostgreSQL <= 11 (#5187) This adds a server flag, --pg-connection-options, that can be used to set a PostgreSQL connection parameter, extra_float_digits, that needs to be used to avoid loss of data on older versions of PostgreSQL, which have odd default behavior when returning float values. (fixes #5092) * [skip ci] Add new commits from master to the commit diff * [skip ci] serve default directives (skip & include) over introspection * [skip ci] Update non-Haskell assets with the version on master * server: refactor GQL execution check and config API (#5094) Co-authored-by: Vamshi Surabhi <vamshi@hasura.io> Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * [skip ci] fix js issues in tests by pinning dependencies version * [skip ci] bump graphql version * [skip ci] Add note about memory usage * generalize query execution logic on Postgres (#5110) * generalize PGExecCtx to support specialized functions for various operations * fix tests compilation * allow customising PGExecCtx when starting the web server * server: changes catalog initialization and logging for pro customization (#5139) * new typeclass to abstract the logic of QueryLog-ing * abstract the logic of logging websocket-server logs introduce a MonadWSLog typeclass * move catalog initialization to init step expose a helper function to migrate catalog create schema cache in initialiseCtx * expose various modules and functions for pro * [skip ci] cosmetic change * [skip ci] fix test calling a mutation that does not exist * [skip ci] minor text change * [skip ci] refactored input values * [skip ci] remove VString Origin * server: fix updating of headers behaviour in the update cron trigger API and create future events immediately (#5151) * server: fix bug to update headers in an existing cron trigger and create future events Co-authored-by: Tirumarai Selvan <tiru@hasura.io> * Lower stack chunk size in RTS to reduce thread STACK memory (closes #5190) This reduces memory consumption for new idle subscriptions significantly (see linked ticket). The hypothesis is: we fork a lot of threads per websocket, and some of these use slightly more than the initial 1K stack size, so the first overflow balloons to 32K, when significantly less is required. However: running with `+RTS -K1K -xc` did not seem to show evidence of any overflows! So it's a mystery why this improves things. GHC should probably also be doubling the stack buffer at each overflow or doing something even smarter; the knobs we have aren't so helpful. * [skip ci] fix todo and schema generation for aggregate fields * 5087 libpq pool leak (#5089) Shrink libpq buffers to 1MB before returning connection to pool. Closes #5087 See: https://github.com/hasura/pg-client-hs/pull/19 Also related: #3388 #4077 * bump pg-client-hs version (fixes a build issue on some environments) (#5267) * do not use prepared statements for mutations * server: unlock scheduled events on graceful shutdown (#4928) * Fix buggy parsing of new --conn-lifetime flag in 2b0e3774 * [skip ci] remove cherry-picked commit from commit_diff.txt * server: include additional fields in scheduled trigger webhook payload (#5262) * include scheduled triggers metadata in the webhook body Co-authored-by: Tirumarai Selvan <tiru@hasura.io> * server: call the webhook asynchronously in event triggers (#5352) * server: call the webhook asynchronosly in event triggers * Expose all modules in Cabal file (#5371) * [skip ci] update commit_diff.txt * [skip ci] fix cast exp parser & few TODOs * [skip ci] fix remote fields arguments * [skip ci] fix few more TODO, no-op refactor, move resolve/action.hs to execute/action.hs * Pass environment variables around as a data structure, via @sordina (#5374) * Pass environment variables around as a data structure, via @sordina * Resolving build error * Adding Environment passing note to changelog * Removing references to ILTPollerLog as this seems to have been reintroduced from a bad merge * removing commented-out imports * Language pragmas already set by project * Linking async thread * Apply suggestions from code review Use `runQueryTx` instead of `runLazyTx` for queries. * remove the non-user facing entry in the changelog Co-authored-by: Phil Freeman <paf31@cantab.net> Co-authored-by: Phil Freeman <phil@hasura.io> Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * [skip ci] fix: restrict remote relationship field generation for hasura queries * [skip ci] no-op refactor; move insert execution code from schema parser module * server: call the webhook asynchronously in event triggers (#5352) * server: call the webhook asynchronosly in event triggers * Expose all modules in Cabal file (#5371) * [skip ci] update commit_diff.txt * Pass environment variables around as a data structure, via @sordina (#5374) * Pass environment variables around as a data structure, via @sordina * Resolving build error * Adding Environment passing note to changelog * Removing references to ILTPollerLog as this seems to have been reintroduced from a bad merge * removing commented-out imports * Language pragmas already set by project * Linking async thread * Apply suggestions from code review Use `runQueryTx` instead of `runLazyTx` for queries. * remove the non-user facing entry in the changelog Co-authored-by: Phil Freeman <paf31@cantab.net> Co-authored-by: Phil Freeman <phil@hasura.io> Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * [skip ci] implement header checking Probably closes #14 and #3659. * server: refactor 'pollQuery' to have a hook to process 'PollDetails' (#5391) Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * update pg-client (#5421) * [skip ci] update commit_diff * Fix latency buckets for telemetry data These must have gotten messed up during a refactor. As a consequence almost all samples received so far fall into the single erroneous 0 to 1K seconds (originally supposed to be 1ms?) bucket. I also re-thought what the numbers should be, but these are still arbitrary and might want adjusting in the future. * [skip ci] include the latest commit compared against master in commit_diff * [skip ci] include new commits from master in commit_diff * [skip ci] improve description generation * [skip ci] sort all introspect arrays * [skip ci] allow parsers to specify error codes * [skip ci] fix integer and float parsing error code * [skip ci] scalar from json errors are now parse errors * [skip ci] fixed negative integer error message and code * [skip ci] Re-fix nullability in relationships * [skip ci] no-op refactor and removed couple of FIXMEs * [skip ci] uncomment code in 'deleteMetadataObject' * [skip ci] Fix re-fix of nullability for relationships * [skip ci] fix default arguments error code * [skip ci] updated test error message !!! WARNING !!! Since all fields accept `null`, they all are technically optional in the new schema. Meaning there's no such thing as a missing mandatory field anymore: a field that doesn't have a default value, and which therefore isn't labelled as "optional" in the schema, will be assumed to be null if it's missing, meaning it isn't possible anymore to have an error for a missing mandatory field. The only possible error is now when a optional positional argument is omitted but is not the last positional argument. * [skip ci] cleanup of int scalar parser * [skip ci] retro-compatibility of offset as string * [skip ci] Remove commit from commit_diff.txt Although strictly speaking we don't know if this will work correctly in PDV if we would implement query plan caching, the fact is that in the theoretical case that we would have the same issue in PDV, it would probably apply not just to introspection, and the fix would be written completely differently. So this old commit is of no value to us other than the heads-up "make sure query plan caching works correctly even in the presence of unused variables", which is already part of the test suite. * Add MonadTrace and MonadExecuteQuery abstractions (#5383) * [skip ci] Fix accumulation of input object types Just like object types, interface types, and union types, we have to avoid circularities when collecting input types from the GraphQL AST. Additionally, this fixes equality checks for input object types (whose fields are unordered, and hence should be compared as sets) and enum types (ditto). * [skip ci] fix fragment error path * [skip ci] fix node error code * [skip ci] fix paths in insert queries * [skip ci] fix path in objects * [skip ci] manually alter node id path for consistency * [skip ci] more node error fixups * [skip ci] one last relay error message fix * [skip ci] update commit_diff * Propagate the trace context to event triggers (#5409) * Propagate the trace context to event triggers * Handle missing trace and span IDs * Store trace context as one LOCAL * Add migrations * Documentation * changelog * Fix warnings * Respond to code review suggestions * Respond to code review * Undo changelog * Update CHANGELOG.md Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * server: log request/response sizes for event triggers (#5463) * server: log request/response sizes for event triggers event triggers (and scheduled triggers) now have request/response size in their logs. * add changelog entry * Tracing: Simplify HTTP traced request (#5451) Remove the Inversion of Control (SuspendRequest) and simplify the tracing of HTTP Requests. Co-authored-by: Phil Freeman <phil@hasura.io> * Attach request ID as tracing metadata (#5456) * Propagate the trace context to event triggers * Handle missing trace and span IDs * Store trace context as one LOCAL * Add migrations * Documentation * Include the request ID as trace metadata * changelog * Fix warnings * Respond to code review suggestions * Respond to code review * Undo changelog * Update CHANGELOG.md * Typo Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * server: add logging for action handlers (#5471) * server: add logging for action handlers * add changelog entry * change action-handler log type from internal to non-internal * fix action-handler-log name * server: pass http and websocket request to logging context (#5470) * pass request body to logging context in all cases * add message size logging on the websocket API this is required by graphql-engine-pro/#416 * message size logging on websocket API As we need to log all messages recieved/sent by the websocket server, it makes sense to log them as part of the websocket server event logs. Previously message recieved were logged inside the onMessage handler, and messages sent were logged only for "data" messages (as a server event log) * fix review comments Co-authored-by: Phil Freeman <phil@hasura.io> * server: stop eventing subsystem threads when shutting down (#5479) * server: stop eventing subsystem threads when shutting down * Apply suggestions from code review Co-authored-by: Karthikeyan Chinnakonda <chkarthikeyan95@gmail.com> Co-authored-by: Phil Freeman <phil@hasura.io> Co-authored-by: Phil Freeman <paf31@cantab.net> Co-authored-by: Karthikeyan Chinnakonda <chkarthikeyan95@gmail.com> * [skip ci] update commit_diff with new commits added in master * Bugfix to support 0-size HASURA_GRAPHQL_QUERY_PLAN_CACHE_SIZE Also some minor refactoring of bounded cache module: - the maxBound check in `trim` was confusing and unnecessary - consequently trim was unnecessary for lookupPure Also add some basic tests * Support only the bounded cache, with default HASURA_GRAPHQL_QUERY_PLAN_CACHE_SIZE of 4000. Closes #5363 * [skip ci] remove merge commit from commit_diff * server: Fix compiler warning caused by GHC upgrade (#5489) Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * [skip ci] update all non server code from master * [skip ci] aligned object field error message with master * [skip ci] fix remaining undefined? * [skip ci] remove unused import * [skip ci] revert to previous error message, fix tests * Move nullableType/nonNullableType to Schema.hs These are functions on Types, not on Parsers. * [skip ci] fix setup to fix backend only test the order in which permission checks are performed on the branch is slightly different than on master, resulting in a slightly different error if there are no other mutations the user has access to. By adding update permissions, we go back to the expected case. * [skip ci] fix insert geojson tests to reflect new paths * [skip ci] fix enum test for better error message * [skip ci] fix header test for better error message * [skip ci] fix fragment cycle test for better error message * [skip ci] fix error message for type mismatch * [skip ci] fix variable path in test * [skip ci] adjust tests after bug fix * [skip ci] more tests fixing * Add hdb_catalog.current_setting abstraction for reading Hasura settings As the comment in the function’s definition explains, this is needed to work around an awkward Postgres behavior. * [skip ci] Update CONTRIBUTING.md to mention Node setup for Python tests * [skip ci] Add missing Python tests env var to CONTRIBUTING.md * [skip ci] fix order of result when subscription is run with multiple nodes * [skip ci] no-op refactor: fix a warning in Internal/Parser.hs * [skip ci] throw error when a subscription contains remote joins * [skip ci] Enable easier profiling by hiding AssertNF behind a flag In order to compile a profiling build, run: $ cabal new-build -f profiling --enable-profiling * [skip ci] Fix two warnings We used to lookup the objects that implement a given interface by filtering all objects in the schema document. However, one of the tests expects us to generate a warning if the provided `implements` field of an introspection query specifies an object not implementing some interface. So we use that field instead. * [skip ci] Fix warnings by commenting out query plan caching * [skip ci] improve masking/commenting query caching related code & few warning fixes * [skip ci] Fixed compiler warnings in graphql-parser-hs * Sync non-Haskell assets with master * [skip ci] add a test inserting invalid GraphQL but valid JSON value in a jsonb column * [skip ci] Avoid converting to/from Map * [skip ci] Apply some hlint suggestions * [skip ci] remove redundant constraints from buildLiveQueryPlan and explainGQLQuery * [skip ci] add NOTEs about missing Tracing constraints in PDV from master * Remove -fdefer-typed-holes, fix warnings * Update cabal.project.freeze * Limit GHC’s heap size to 8GB in CI to avoid the OOM killer * Commit package-lock.json for Python tests’ remote schema server * restrict env variables start with HASURA_GRAPHQL_ for headers configuration in actions, event triggers & remote schemas (#5519) * restrict env variables start with HASURA_GRAPHQL_ for headers definition in actions & event triggers * update CHANGELOG.md * Apply suggestions from code review Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * add test for table_by_pk node when roles doesn't have permission to PK * [skip ci] fix introspection query if any enum column present in primary key (fix #5200) (#5522) * [skip ci] test case fix for a6450e126bc2d98bcfd3791501986e4627ce6c6f * [skip ci] add tests to agg queries when role doesn't have access to any cols * fix backend test * Simplify subscription execution * [skip ci] add test to check if required headers are present while querying * Suppose, table B is related to table A and to query B certain headers are necessary, then the test checks that we are throwing error when the header is not set when B is queried through A * fix mutations not checking for view mutability * [skip ci] add variable type checking and corresponding tests * [skip ci] add test to check if update headers are present while doing an upsert * [skip ci] add positive counterparts to some of the negative permission tests * fix args missing their description in introspect * [skip ci] Remove unused function; insert missing markNotReusable call * [skip ci] Add a Note about InputValue * [skip ci] Delete LegacySchema/ 🎉 * [skip ci] Delete GraphQL/{Resolve,Validate}/ 🎉 * [skip ci] Delete top-level Resolve/Validate modules; tidy .cabal file * [skip ci] Delete LegacySchema top-level module Somehow I missed this one. * fix input value to json * [skip ci] elaborate on JSON objects in GraphQL * [skip ci] add missing file * [skip ci] add a test with subscription containing remote joins * add a test with remote joins in mutation output * [skip ci] Add some comments to Schema/Mutation.hs * [skip ci] Remove no longer needed code from RemoteServer.hs * [skip ci] Use a helper function to generate conflict clause parsers * [skip ci] fix type checker error in fields with default value * capitalize the header keys in select_articles_without_required_headers * Somehow, this was the reason the tests were failing. I have no idea, why! * [skip ci] Add a long Note about optional fields and nullability * Improve comments a bit; simplify Schema/Common.hs a bit * [skip ci] full implementation of 5.8.5 type checking. * [skip ci] fix validation test teardown * [skip ci] fix schema stitching test * fix remote schema ignoring enum nullability * [skip ci] fix fieldOptional to not discard nullability * revert nullability of use_spheroid * fix comment * add required remote fields with arguments for tests * [skip ci] add missing docstrings * [skip ci] fixed description of remote fields * [skip ci] change docstring for consistency * fix several schema inconsistencies * revert behaviour change in function arguments parsing * fix remaining nullability issues in new schema * minor no-op refactor; use isListType from graphql-parser-hs * use nullability of remote schema node, while creating a Remote reln * fix 'ID' input coercing & action 'ID' type relationship mapping * include ASTs in MonadExecuteQuery * needed for PRO code-base * Delete code for "interfaces implementing ifaces" (draft GraphQL spec) Previously I started writing some code that adds support for a future GraphQL feature where interfaces may themselves be sub-types of other interfaces. However, this code was incomplete, and partially incorrect. So this commit deletes support for that entirely. * Ignore a remote schema test during the upgrade/downgrade test The PDV refactor does a better job at exposing a minimal set of types through introspection. In particular, not every type that is present in a remote schema is re-exposed by Hasura. The test test_schema_stitching.py::TestRemoteSchemaBasic::test_introspection assumed that all types were re-exposed, which is not required for GraphQL compatibility, in order to test some aspect of our support for remote schemas. So while this particular test has been updated on PDV, the PDV branch now does not pass the old test, which we argue to be incorrect. Hence this test is disabled while we await a release, after which we can re-enable it. This also re-enables a test that was previously disabled for similar, though unrelated, reasons. * add haddock documentation to the action's field parsers * Deslecting some tests in server-upgrade Some tests with current build are failing on server upgrade which it should not. The response is more accurate than what it was. Also the upgrade tests were not throwing errors when the test is expected to return an error, but succeeds. The test framework is patched to catch this case. * [skip ci] Add a long Note about interfaces and object types * send the response headers back to client after running a query * Deselect a few more tests during upgrade/downgrade test * Update commit_diff.txt * change log kind from db_migrate to catalog_migrate (#5531) * Show method and complete URI in traced HTTP calls (#5525) Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * restrict env variables start with HASURA_GRAPHQL_ for headers configuration in actions, event triggers & remote schemas (#5519) * restrict env variables start with HASURA_GRAPHQL_ for headers definition in actions & event triggers * update CHANGELOG.md * Apply suggestions from code review Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> * fix introspection query if any enum column present in primary key (fix #5200) (#5522) * Fix telemetry reporting of transport (websocket was reported as http) * add log kinds in cli-migrations image (#5529) * add log kinds in cli-migrations image * give hint to resolve timeout error * minor changes and CHANGELOG * server: set hasura.tracecontext in RQL mutations [#5542] (#5555) * server: set hasura.tracecontext in RQL mutations [#5542] * Update test suite Co-authored-by: Tirumarai Selvan <tiru@hasura.io> * Add bulldozer auto-merge and -update configuration We still need to add the github app (as of time of opening this PR) Afterwards devs should be able to allow bulldozer to automatically "update" the branch, merging in parent when it changes, as well as automatically merge when all checks pass. This is opt-in by adding the `auto-update-auto-merge` label to the PR. * Remove 'bulldozer' config, try 'kodiak' for auto-merge see: https://github.com/chdsbd/kodiak The main issue that bit us was not being able to auto update forked branches, also: https://github.com/palantir/bulldozer/issues/66 https://github.com/palantir/bulldozer/issues/145 * Cherry-picked all commits * [skip ci] Slightly improve formatting * Revert "fix introspection query if any enum column present in primary key (fix #5200) (#5522)" This reverts commit 0f9a5afa59a88f6824f4d63d58db246a5ba3fb03. This undoes a cherry-pick of 34288e1eb5f2c5dad9e6d1e05453dd52397dc970 that was already done previously in a6450e126bc2d98bcfd3791501986e4627ce6c6f, and subsequently fixed for PDV in 70e89dc250f8ddc6e2b7930bbe2b3eeaa6dbe1db * Do a small bit of tidying in Hasura.GraphQL.Parser.Collect * Fix cherry-picking work Some previous cherry-picks ended up modifying code that is commented out * [skip ci] clarified comment regarding insert representation * [skip ci] removed obsolete todos * cosmetic change * fix action error message * [skip ci] remove obsolete comment * [skip ci] synchronize stylish haskell extensions list * use previously defined scalar names in parsers rather than ad-hoc literals * Apply most syntax hlint hints. * Clarify comment on update mutation. * [skip ci] Clarify what fields should be specified for objects * Update "_inc" description. * Use record types rather than tuples fo IntrospectionResult and ParsedIntrospection * Get rid of checkFieldNamesUnique (use Data.List.Extended.duplicates) * Throw more errors when collecting query root names * [skip ci] clean column parser comment * Remove dead code inserted in ab65b39 * avoid converting to non-empty list where not needed * add note and TODO about the disabled checks in PDV * minor refactor in remoteField' function * Unify two getObject methods * Nitpicks in Remote.hs * Update CHANGELOG.md * Revert "Unify two getObject methods" This reverts commit bd6bb40355b3d189a46c0312eb52225e18be57b3. We do need two different getObject functions as the corresponding error message is different * Fix error message in Remote.hs * Update CHANGELOG.md Co-authored-by: Auke Booij <auke@tulcod.com> * Apply suggested Changelog fix. Co-authored-by: Auke Booij <auke@tulcod.com> * Fix typo in Changelog. * [skip ci] Update changelog. * reuse type names to avoid duplication * Fix Hashable instance for Definition The presence of `Maybe Unique`, and an optional description, as part of `Definition`s, means that `Definition`s that are considered `Eq`ual may get different hashes. This can happen, for instance, when one object is memoized but another is not. * [skip ci] Update commit_diff.txt * Bump parser version. * Bump freeze file after changes in parser. * [skip ci] Incorporate commits from master * Fix developer flag in server/cabal.project.freeze Co-authored-by: Auke Booij <auke@tulcod.com> * Deselect a changed ENUM test for upgrade/downgrade CI * Deselect test here as well * [skip ci] remove dead code * Disable more tests for upgrade/downgrade * Fix which test gets deselected * Revert "Add hdb_catalog.current_setting abstraction for reading Hasura settings" This reverts commit 66e85ab9fbd56cca2c28a80201f6604fbe811b85. * Remove circular reference in cabal.project.freeze Co-authored-by: Karthikeyan Chinnakonda <karthikeyan@hasura.io> Co-authored-by: Auke Booij <auke@hasura.io> Co-authored-by: Tirumarai Selvan <tiru@hasura.io> Co-authored-by: Marion Schleifer <marion@hasura.io> Co-authored-by: Aleksandra Sikora <ola.zxcvbnm@gmail.com> Co-authored-by: Brandon Simmons <brandon.m.simmons@gmail.com> Co-authored-by: Vamshi Surabhi <0x777@users.noreply.github.com> Co-authored-by: Anon Ray <rayanon004@gmail.com> Co-authored-by: rakeshkky <12475069+rakeshkky@users.noreply.github.com> Co-authored-by: Anon Ray <ecthiender@users.noreply.github.com> Co-authored-by: Vamshi Surabhi <vamshi@hasura.io> Co-authored-by: Antoine Leblanc <antoine@hasura.io> Co-authored-by: Brandon Simmons <brandon@hasura.io> Co-authored-by: Phil Freeman <phil@hasura.io> Co-authored-by: Lyndon Maydwell <lyndon@sordina.net> Co-authored-by: Phil Freeman <paf31@cantab.net> Co-authored-by: Naveen Naidu <naveennaidu479@gmail.com> Co-authored-by: Karthikeyan Chinnakonda <chkarthikeyan95@gmail.com> Co-authored-by: Nizar Malangadan <nizar-m@users.noreply.github.com> Co-authored-by: Antoine Leblanc <crucuny@gmail.com> Co-authored-by: Auke Booij <auke@tulcod.com>
2020-08-21 20:27:01 +03:00
-- TODO (from master): better naming?
data Loggers = Loggers
{ _lsLoggerCtx :: !(LoggerCtx Hasura),
_lsLogger :: !(Logger Hasura),
_lsPgLogger :: !PG.PGLogger
}
-- | An application with Postgres database as a metadata storage
newtype PGMetadataStorageAppT m a = PGMetadataStorageAppT {runPGMetadataStorageAppT :: (PG.PGPool, PG.PGLogger) -> m a}
deriving
( Functor,
Applicative,
Monad,
MonadIO,
Rewrite OpenAPI ### Description This PR rewrites OpenAPI to be more idiomatic. Some noteworthy changes: - we accumulate all required information during the Analyze phase, to avoid having to do a single lookup in the schema cache during the OpenAPI generation phase (we now only need the schema cache as input to run the analysis) - we no longer build intermediary endpoint information and aggregate it, we directly build the the `PathItem` for each endpoint; additionally, that means we no longer have to assume that different methods have the same metadata - we no longer have to first declare types, then craft references: we do everything in one step - we now properly deal with nullability by treating "typeName" and "typeName!" as different - we add a bunch of additional fields in the generated "schema", such as title - we do now support enum values in both input and output positions - checking whether the request body is required is now performed on the fly rather than by introspecting the generated schema - the methods in the file are sorted by topic ### Controversial point However, this PR creates some additional complexity, that we might not want to keep. The main complexity is _knot-tying_: to avoid lookups when generating the OpenAPI, it builds an actual graph of input types, which means that we need something similar to (but simpler than) `MonadSchema`, to avoid infinite recursions when analyzing the input types of a query. To do this, this PR introduces `CircularT`, a lesser `SchemaT` that aims at avoiding ever having to reinvent this particular wheel ever again. ### Remaining work - [x] fix existing tests (they are all failing due to some of the schema changes) - [ ] add tests to cover the new features: - [x] tests for `CircularT` - [ ] tests for enums in output schemas - [x] extract / document `CircularT` if we wish to keep it - [x] add more comments to `OpenAPI` - [x] have a second look at `buildVariableSchema` - [x] fix all missing diagnostics in `Analyze` - [x] add a Changelog entry? PR-URL: https://github.com/hasura/graphql-engine-mono/pull/4654 Co-authored-by: David Overton <7734777+dmoverton@users.noreply.github.com> GitOrigin-RevId: f4a9191f22dfcc1dccefd6a52f5c586b6ad17172
2022-06-30 15:55:56 +03:00
MonadFix,
MonadCatch,
MonadThrow,
MonadMask,
HasHttpManagerM,
HasServerConfigCtx,
MonadReader (PG.PGPool, PG.PGLogger),
MonadBase b,
MonadBaseControl b
)
via (ReaderT (PG.PGPool, PG.PGLogger) m)
deriving
( MonadTrans
)
via (ReaderT (PG.PGPool, PG.PGLogger))
resolvePostgresConnInfo ::
(MonadIO m) => Env.Environment -> UrlConf -> Maybe Int -> m PG.ConnInfo
resolvePostgresConnInfo env dbUrlConf maybeRetries = do
dbUrlText <-
runExcept (resolveUrlConf env dbUrlConf) `onLeft` \err ->
liftIO (throwErrJExit InvalidDatabaseConnectionParamsError err)
pure $ PG.ConnInfo retries $ PG.CDDatabaseURI $ txtToBs dbUrlText
where
retries = fromMaybe 1 maybeRetries
-- | Initializes or migrates the catalog and returns the context required to start the server.
initialiseServeCtx ::
(C.ForkableMonadIO m, MonadCatch m) =>
Env.Environment ->
GlobalCtx ->
ServeOptions Hasura ->
ServerMetrics ->
ManagedT m ServeCtx
initialiseServeCtx env GlobalCtx {..} so@ServeOptions {..} serverMetrics = do
instanceId <- liftIO generateInstanceId
latch <- liftIO newShutdownLatch
loggers@(Loggers loggerCtx logger pgLogger) <- mkLoggers soEnabledLogTypes soLogLevel
when (null soAdminSecret) $ do
let errMsg :: Text
errMsg = "WARNING: No admin secret provided"
unLogger logger $
StartupLog
{ slLogLevel = LevelWarn,
slKind = "no_admin_secret",
slInfo = A.toJSON errMsg
}
-- log serve options
unLogger logger $ serveOptsToLog so
-- log postgres connection info
unLogger logger $ connInfoToLog _gcMetadataDbConnInfo
metadataDbPool <- liftIO $ PG.initPGPool _gcMetadataDbConnInfo soConnParams pgLogger
let maybeDefaultSourceConfig =
fst _gcDefaultPostgresConnInfo <&> \(dbUrlConf, _) ->
let connSettings =
PostgresPoolSettings
{ _ppsMaxConnections = Just $ PG.cpConns soConnParams,
_ppsIdleTimeout = Just $ PG.cpIdleTime soConnParams,
_ppsRetries = snd _gcDefaultPostgresConnInfo <|> Just 1,
_ppsPoolTimeout = PG.cpTimeout soConnParams,
_ppsConnectionLifetime = PG.cpMbLifetime soConnParams
}
sourceConnInfo = PostgresSourceConnInfo dbUrlConf (Just connSettings) (PG.cpAllowPrepare soConnParams) soTxIso Nothing
in PostgresConnConfiguration sourceConnInfo Nothing defaultPostgresExtensionsSchema
optimizePermissionFilters
| EFOptimizePermissionFilters `elem` soExperimentalFeatures = Options.OptimizePermissionFilters
| otherwise = Options.Don'tOptimizePermissionFilters
sqlGenCtx = SQLGenCtx soStringifyNum soDangerousBooleanCollapse optimizePermissionFilters
let serverConfigCtx =
ServerConfigCtx
soInferFunctionPermissions
soEnableRemoteSchemaPermissions
sqlGenCtx
soEnableMaintenanceMode
soExperimentalFeatures
soEventingMode
soReadOnlyMode
soDefaultNamingConvention
rebuildableSchemaCache <-
lift . flip onException (flushLogger loggerCtx) $
migrateCatalogSchema
env
logger
metadataDbPool
maybeDefaultSourceConfig
mempty
serverConfigCtx
(mkPgSourceResolver pgLogger)
mkMSSQLSourceResolver
soExtensionsSchema
-- Start a background thread for listening schema sync events from other server instances,
metaVersionRef <- liftIO $ STM.newEmptyTMVarIO
Disable schema sync with an interval of 0 instead of an explicit flag Removing `schemaSyncDisable` flag and interpreting `schemaPollInterval` of `0` as disabling schema sync. This change brings the convention in line with how action and other intervals are used to disable processes. There is an opportunity to abstract the notion of an optional interval similar to how actions uses `AsyncActionsFetchInterval`. This can be used for the following fields of ServeOptions, with RawServeOptions having a milliseconds value where `0` is interpreted as disable. OptionalInterval: ``` -- | Sleep time interval for activities data OptionalInterval = Skip -- ^ No polling | Interval !Milliseconds -- ^ Interval time deriving (Show, Eq) ``` ServeOptions: ``` data ServeOptions impl = ServeOptions { ... , soEventsFetchInterval :: !OptionalInterval , soAsyncActionsFetchInterval :: !OptionalInterval , soSchemaPollInterval :: !OptionalInterval ... } ``` Rather than encoding a `Maybe OptionalInterval` in RawServeOptions, instead a `Maybe Milliseconds` can be used to more directly express the input format, with the ServeOptions constructor interpreting `0` as `Skip`. Current inconsistencies: * `soEventsFetchInterval` has no value interpreted as disabling the fetches * `soAsyncActionsFetchInterval` uses an `OptionalInterval` analog in `RawServeOptions` instead of `Milliseconds` * `soSchemaPollInterval` currently uses `Milliseconds` directly in `ServeOptions` --- ### Kodiak commit message Information used by [Kodiak bot](https://kodiakhq.com/) while merging this PR. #### Commit title Same as the title of this pull request GitOrigin-RevId: 3cda1656ae39ae95ba142512ed4e123d6ffeb7fe
2021-04-07 12:59:48 +03:00
-- An interval of 0 indicates that no schema sync is required
case soSchemaPollInterval of
Skip -> unLogger logger $ mkGenericStrLog LevelInfo "schema-sync" "Schema sync disabled"
Interval interval -> do
unLogger logger $ mkGenericStrLog LevelInfo "schema-sync" ("Schema sync enabled. Polling at " <> show interval)
void $ startSchemaSyncListenerThread logger metadataDbPool instanceId interval metaVersionRef
schemaCacheRef <- initialiseSchemaCacheRef serverMetrics rebuildableSchemaCache
server: multitenant metadata storage The metadata storage implementation for graphql-engine-multitenant. - It uses a centralized PG database to store metadata of all tenants (instead of per tenant database) - Similarly, it uses a single schema-sync listener thread per MT worker (instead of listener thread per tenant) (PS: although, the processor thread is spawned per tenant) - 2 new flags are introduced - `--metadataDatabaseUrl` and (optional) `--metadataDatabaseRetries` Internally, a "metadata mode" is introduced to indicate an external/managed store vs a store managed by each pro-server. To run : - obtain the schema file (located at `pro/server/res/cloud/metadata_db_schema.sql`) - apply the schema on a PG database - set the `--metadataDatabaseUrl` flag to point to the above database - run the MT executable The schema (and its migrations) for the metadata db is managed outside the MT worker. ### New metadata The following is the new portion of `Metadata` added : ```yaml version: 3 metrics_config: analyze_query_variables: true analyze_response_body: false api_limits: disabled: false depth_limit: global: 5 per_role: user: 7 editor: 9 rate_limit: per_role: user: unique_params: - x-hasura-user-id - x-hasura-team-id max_reqs_per_min: 20 global: unique_params: IP max_reqs_per_min: 10 ``` - In Pro, the code around fetching/updating/syncing pro-config is removed - That also means, `hdb_pro_catalog` for keeping the config cache is not required. Hence the `hdb_pro_catalog` is also removed - The required config comes from metadata / schema cache ### New Metadata APIs - `set_api_limits` - `remove_api_limits` - `set_metrics_config` - `remove_metrics_config` #### `set_api_limits` ```yaml type: set_api_limits args: disabled: false depth_limit: global: 5 per_role: user: 7 editor: 9 rate_limit: per_role: anonymous: max_reqs_per_min: 10 unique_params: "ip" editor: max_reqs_per_min: 30 unique_params: - x-hasura-user-id user: unique_params: - x-hasura-user-id - x-hasura-team-id max_reqs_per_min: 20 global: unique_params: IP max_reqs_per_min: 10 ``` #### `remove_api_limits` ```yaml type: remove_api_limits args: {} ``` #### `set_metrics_config` ```yaml type: set_metrics_config args: analyze_query_variables: true analyze_response_body: false ``` #### `remove_metrics_config` ```yaml type: remove_metrics_config args: {} ``` #### TODO - [x] on-prem pro implementation for `MonadMetadataStorage` - [x] move the project config from Lux to pro metadata (PR: #379) - [ ] console changes for pro config/api limits, subscription workers (cc @soorajshankar @beerose) - [x] address other minor TODOs - [x] TxIso for `MonadSourceResolver` - [x] enable EKG connection pool metrics - [x] add logging of connection info when sources are added? - [x] confirm if the `buildReason` for schema cache is correct - [ ] testing - [x] 1.3 -> 1.4 cloud migration script (#465; PR: #508) - [x] one-time migration of existing metadata from users' db to centralized PG - [x] one-time migration of pro project config + api limits + regression tests from metrics API to metadata - [ ] integrate with infra team (WIP - cc @hgiasac) - [x] benchmark with 1000+ tenants + each tenant making read/update metadata query every second (PR: https://github.com/hasura/graphql-engine-mono/pull/411) - [ ] benchmark with few tenants having large metadata (100+ tables etc.) - [ ] when user moves regions (https://github.com/hasura/lux/issues/1717) - [ ] metadata has to be migrated from one regional PG to another - [ ] migrate metrics data as well ? - [ ] operation logs - [ ] regression test runs - [ ] find a way to share the schema files with the infra team Co-authored-by: Naveen Naidu <30195193+Naveenaidu@users.noreply.github.com> GitOrigin-RevId: 39e8361f2c0e96e0f9e8f8fb45e6cc14857f31f1
2021-02-11 20:54:25 +03:00
server: http ip blocklist (closes #2449) ## Description This PR is in reference to #2449 (support IP blacklisting for multitenant) *RFC Update: Add support for IPv6 blocking* ### Solution and Design Using [http-client-restricted](https://hackage.haskell.org/package/http-client-restricted) package, we're creating the HTTP manager with restricting capabilities. The IPs can be supplied from the CLI arguments as `--ipv4BlocklistCidrs cidr1, cidr2...` or `--disableDefaultIPv4Blocklist` for a default IP list. The new manager will block all requests to the provided CIDRs. We are extracting the error message string to show the end-user that given IP is blocked from being set as a webhook. There are 2 ways to extract the error message "connection to IP address is blocked". Given below are the responses from event trigger to a blocked IP for these implementations: - 6d74fde316f61e246c861befcca5059d33972fa7 - We return the error message string as a HTTPErr(HOther) from `Hasura/Eventing/HTTP.hs`. ``` { "data": { "message": "blocked connection to private IP address " }, "version": "2", "type": "client_error" } ``` - 88e17456345cbb449a5ecd4877c84c9f319dbc25 - We case match on HTTPExceptionContent for InternaException in `Hasura/HTTP.hs` and extract the error message string from it. (this is implemented as it handles all the cases where pro engine makes webhook requests) ``` { "data": { "message": { "type": "http_exception", "message": "blocked connection to private IP address ", "request": { "secure": false, "path": "/webhook", "responseTimeout": "ResponseTimeoutMicro 60000000", "queryString": "", "method": "POST", "requestHeaders": { "Content-Type": "application/json", "X-B3-ParentSpanId": "5ae6573edb2a6b36", "X-B3-TraceId": "29ea7bd6de6ebb8f", "X-B3-SpanId": "303137d9f1d4f341", "User-Agent": "hasura-graphql-engine/cerebushttp-ip-blacklist-a793a0e41-dirty" }, "host": "139.59.90.109", "port": 8000 } } }, "version": "2", "type": "client_error" } ``` ### Steps to test and verify The restricted IPs can be used as webhooks in event triggers, and hasura will return an error message in reponse. ### Limitations, known bugs & workarounds - The `http-client-restricted` has a needlessly complex interface, and puts effort into implementing proxy support which we don't want, so we've inlined a stripped down version. - Performance constraint: As the blocking is checked for each request, if a long list of blocked CIDRs is supplied, iterating through all of them is not what we would prefer. Using trie is suggested to overcome this. (Added to RFC) - Calls to Lux endpoints are inconsistent: We use either the http manager from the ProServeCtx which is unrestricted, or the http manager from the ServeCtx which is restricted (the latter through the instances for MonadMetadataApiAuthorization and UserAuthentication). (The failure scenario here would be: cloud sets PRO_ENDPOINT to something that resolves to an internal address, and then restricted requests to those endpoints fail, causing auth to fail on user requests. This is about HTTP requests to lux auth endpoints.) ## Changelog - ✅ `CHANGELOG.md` is updated with user-facing content relevant to this PR. ## Affected components - ✅ Server - ✅ Tests PR-URL: https://github.com/hasura/graphql-engine-mono/pull/3186 Co-authored-by: Robert <132113+robx@users.noreply.github.com> GitOrigin-RevId: 5bd2de2d028bc416b02c99e996c7bebce56fb1e7
2022-02-25 16:29:55 +03:00
srvMgr <- liftIO $ mkHttpManager (readTlsAllowlist schemaCacheRef) mempty
pure $
ServeCtx
srvMgr
instanceId
loggers
soEnabledLogTypes
metadataDbPool
latch
rebuildableSchemaCache
schemaCacheRef
metaVersionRef
mkLoggers ::
(MonadIO m, MonadBaseControl IO m) =>
HashSet (EngineLogType Hasura) ->
LogLevel ->
ManagedT m Loggers
mkLoggers enabledLogs logLevel = do
loggerCtx <- mkLoggerCtx (defaultLoggerSettings True logLevel) enabledLogs
let logger = mkLogger loggerCtx
pgLogger = mkPGLogger logger
return $ Loggers loggerCtx logger pgLogger
-- | helper function to initialize or migrate the @hdb_catalog@ schema (used by pro as well)
migrateCatalogSchema ::
(MonadIO m, MonadBaseControl IO m) =>
Env.Environment ->
Logger Hasura ->
PG.PGPool ->
Maybe (SourceConnConfiguration ('Postgres 'Vanilla)) ->
Blocklist ->
ServerConfigCtx ->
SourceResolver ('Postgres 'Vanilla) ->
SourceResolver ('MSSQL) ->
ExtensionsSchema ->
m RebuildableSchemaCache
migrateCatalogSchema
env
logger
pool
defaultSourceConfig
blockList
serverConfigCtx
pgSourceResolver
mssqlSourceResolver
extensionsSchema = do
initialiseResult <- runExceptT $ do
-- TODO: should we allow the migration to happen during maintenance mode?
-- Allowing this can be a sanity check, to see if the hdb_catalog in the
-- DB has been set correctly
currentTime <- liftIO Clock.getCurrentTime
(migrationResult, metadata) <-
PG.runTx pool (PG.Serializable, Just PG.ReadWrite) $
migrateCatalog
defaultSourceConfig
extensionsSchema
(_sccMaintenanceMode serverConfigCtx)
currentTime
let tlsAllowList = networkTlsAllowlist $ _metaNetwork metadata
httpManager <- liftIO $ mkHttpManager (pure tlsAllowList) blockList
let cacheBuildParams =
CacheBuildParams httpManager pgSourceResolver mssqlSourceResolver serverConfigCtx
buildReason = CatalogSync
schemaCache <-
runCacheBuild cacheBuildParams $
buildRebuildableSchemaCacheWithReason buildReason logger env metadata
pure (migrationResult, schemaCache)
(migrationResult, schemaCache) <-
initialiseResult `onLeft` \err -> do
unLogger
logger
StartupLog
{ slLogLevel = LevelError,
slKind = "catalog_migrate",
slInfo = A.toJSON err
}
liftIO (throwErrJExit DatabaseMigrationError err)
unLogger logger migrationResult
pure schemaCache
-- | A latch for the graceful shutdown of a server process.
newtype ShutdownLatch = ShutdownLatch {unShutdownLatch :: C.MVar ()}
-- | Event triggers live in the user's DB and other events
-- (cron, one-off and async actions)
-- live in the metadata DB, so we need a way to differentiate the
-- type of shutdown action
data ShutdownAction
= EventTriggerShutdownAction (IO ())
| MetadataDBShutdownAction (MetadataStorageT IO ())
newShutdownLatch :: IO ShutdownLatch
newShutdownLatch = fmap ShutdownLatch C.newEmptyMVar
-- | Block the current thread, waiting on the latch.
waitForShutdown :: ShutdownLatch -> IO ()
waitForShutdown = C.readMVar . unShutdownLatch
-- | Initiate a graceful shutdown of the server associated with the provided
-- latch.
shutdownGracefully :: ShutdownLatch -> IO ()
shutdownGracefully = void . flip C.tryPutMVar () . unShutdownLatch
-- | Returns True if the latch is set for shutdown and vice-versa
shuttingDown :: ShutdownLatch -> IO Bool
shuttingDown latch = not <$> C.isEmptyMVar (unShutdownLatch latch)
-- | If an exception is encountered , flush the log buffer and
-- rethrow If we do not flush the log buffer on exception, then log lines
-- may be missed
-- See: https://github.com/hasura/graphql-engine/issues/4772
flushLogger :: MonadIO m => LoggerCtx impl -> m ()
flushLogger = liftIO . FL.flushLogStr . _lcLoggerSet
-- | This function acts as the entrypoint for the graphql-engine webserver.
--
-- Note: at the exit of this function, or in case of a graceful server shutdown
-- (SIGTERM, or more generally, whenever the shutdown latch is set), we need to
-- make absolutely sure that we clean up any resources which were allocated during
-- server setup. In the case of a multitenant process, failure to do so can lead to
-- resource leaks.
--
-- To track these resources, we use the ManagedT monad, and attach finalizers at
-- the same point in the code where we allocate resources. If you fork a new
-- long-lived thread, or create a connection pool, or allocate any other
-- long-lived resource, make sure to pair the allocator with its finalizer.
-- There are plenty of examples throughout the code. For example, see
-- 'C.forkManagedT'.
--
-- Note also: the order in which the finalizers run can be important. Specifically,
-- we want the finalizers for the logger threads to run last, so that we retain as
-- many "thread stopping" log messages as possible. The order in which the
-- finalizers is run is determined by the order in which they are introduced in the
-- code.
{- HLINT ignore runHGEServer "Avoid lambda" -}
runHGEServer ::
forall m impl.
( MonadIO m,
Rewrite OpenAPI ### Description This PR rewrites OpenAPI to be more idiomatic. Some noteworthy changes: - we accumulate all required information during the Analyze phase, to avoid having to do a single lookup in the schema cache during the OpenAPI generation phase (we now only need the schema cache as input to run the analysis) - we no longer build intermediary endpoint information and aggregate it, we directly build the the `PathItem` for each endpoint; additionally, that means we no longer have to assume that different methods have the same metadata - we no longer have to first declare types, then craft references: we do everything in one step - we now properly deal with nullability by treating "typeName" and "typeName!" as different - we add a bunch of additional fields in the generated "schema", such as title - we do now support enum values in both input and output positions - checking whether the request body is required is now performed on the fly rather than by introspecting the generated schema - the methods in the file are sorted by topic ### Controversial point However, this PR creates some additional complexity, that we might not want to keep. The main complexity is _knot-tying_: to avoid lookups when generating the OpenAPI, it builds an actual graph of input types, which means that we need something similar to (but simpler than) `MonadSchema`, to avoid infinite recursions when analyzing the input types of a query. To do this, this PR introduces `CircularT`, a lesser `SchemaT` that aims at avoiding ever having to reinvent this particular wheel ever again. ### Remaining work - [x] fix existing tests (they are all failing due to some of the schema changes) - [ ] add tests to cover the new features: - [x] tests for `CircularT` - [ ] tests for enums in output schemas - [x] extract / document `CircularT` if we wish to keep it - [x] add more comments to `OpenAPI` - [x] have a second look at `buildVariableSchema` - [x] fix all missing diagnostics in `Analyze` - [x] add a Changelog entry? PR-URL: https://github.com/hasura/graphql-engine-mono/pull/4654 Co-authored-by: David Overton <7734777+dmoverton@users.noreply.github.com> GitOrigin-RevId: f4a9191f22dfcc1dccefd6a52f5c586b6ad17172
2022-06-30 15:55:56 +03:00
MonadFix m,
MonadMask m,
MonadStateless IO m,
LA.Forall (LA.Pure m),
UserAuthentication (Tracing.TraceT m),
HttpLog m,
ConsoleRenderer m,
MonadMetadataApiAuthorization m,
MonadGQLExecutionCheck m,
MonadConfigApiHandler m,
MonadQueryLog m,
WS.MonadWSLog m,
MonadExecuteQuery m,
Tracing.HasReporter m,
HasResourceLimits m,
MonadMetadataStorage (MetadataStorageT m),
MonadResolveSource m,
EB.MonadQueryTags m,
MonadEventLogCleanup m
) =>
(ServerCtx -> Spock.SpockT m ()) ->
Env.Environment ->
ServeOptions impl ->
ServeCtx ->
-- and mutations
-- | start time
UTCTime ->
Maybe ES.SubscriptionPostPollHook ->
ServerMetrics ->
EKG.Store EKG.EmptyMetrics ->
-- | A hook which can be called to indicate when the server is started succesfully
Maybe (IO ()) ->
PrometheusMetrics ->
ManagedT m ()
runHGEServer setupHook env serveOptions serveCtx initTime postPollHook serverMetrics ekgStore startupStatusHook prometheusMetrics = do
waiApplication <-
mkHGEServer setupHook env serveOptions serveCtx initTime postPollHook serverMetrics ekgStore prometheusMetrics
-- `startupStatusHook`: add `Service started successfully` message to config_status
-- table when a tenant starts up in multitenant
let warpSettings :: Warp.Settings
warpSettings =
Warp.setPort (_getPort $ soPort serveOptions)
. Warp.setHost (soHost serveOptions)
. Warp.setGracefulShutdownTimeout (Just 30) -- 30s graceful shutdown
. Warp.setInstallShutdownHandler shutdownHandler
. Warp.setBeforeMainLoop (onJust startupStatusHook id)
. setForkIOWithMetrics
$ Warp.defaultSettings
setForkIOWithMetrics :: Warp.Settings -> Warp.Settings
setForkIOWithMetrics = Warp.setFork \f -> do
void $
C.forkIOWithUnmask
( \unmask ->
bracket_
( do
EKG.Gauge.inc (smWarpThreads serverMetrics)
incWarpThreads (pmConnections prometheusMetrics)
)
( do
EKG.Gauge.dec (smWarpThreads serverMetrics)
decWarpThreads (pmConnections prometheusMetrics)
)
(f unmask)
)
shutdownHandler :: IO () -> IO ()
shutdownHandler closeSocket =
LA.link =<< LA.async do
waitForShutdown $ _scShutdownLatch serveCtx
let logger = _lsLogger $ _scLoggers serveCtx
unLogger logger $ mkGenericStrLog LevelInfo "server" "gracefully shutting down server"
closeSocket
-- Here we block until the shutdown latch 'MVar' is filled, and then
-- shut down the server. Once this blocking call returns, we'll tidy up
-- any resources using the finalizers attached using 'ManagedT' above.
-- Structuring things using the shutdown latch in this way lets us decide
-- elsewhere exactly how we want to control shutdown.
liftIO $ Warp.runSettings warpSettings waiApplication
-- | Part of a factorization of 'runHGEServer' to expose the constructed WAI
-- application for testing purposes. See 'runHGEServer' for documentation.
mkHGEServer ::
forall m impl.
( MonadIO m,
Rewrite OpenAPI ### Description This PR rewrites OpenAPI to be more idiomatic. Some noteworthy changes: - we accumulate all required information during the Analyze phase, to avoid having to do a single lookup in the schema cache during the OpenAPI generation phase (we now only need the schema cache as input to run the analysis) - we no longer build intermediary endpoint information and aggregate it, we directly build the the `PathItem` for each endpoint; additionally, that means we no longer have to assume that different methods have the same metadata - we no longer have to first declare types, then craft references: we do everything in one step - we now properly deal with nullability by treating "typeName" and "typeName!" as different - we add a bunch of additional fields in the generated "schema", such as title - we do now support enum values in both input and output positions - checking whether the request body is required is now performed on the fly rather than by introspecting the generated schema - the methods in the file are sorted by topic ### Controversial point However, this PR creates some additional complexity, that we might not want to keep. The main complexity is _knot-tying_: to avoid lookups when generating the OpenAPI, it builds an actual graph of input types, which means that we need something similar to (but simpler than) `MonadSchema`, to avoid infinite recursions when analyzing the input types of a query. To do this, this PR introduces `CircularT`, a lesser `SchemaT` that aims at avoiding ever having to reinvent this particular wheel ever again. ### Remaining work - [x] fix existing tests (they are all failing due to some of the schema changes) - [ ] add tests to cover the new features: - [x] tests for `CircularT` - [ ] tests for enums in output schemas - [x] extract / document `CircularT` if we wish to keep it - [x] add more comments to `OpenAPI` - [x] have a second look at `buildVariableSchema` - [x] fix all missing diagnostics in `Analyze` - [x] add a Changelog entry? PR-URL: https://github.com/hasura/graphql-engine-mono/pull/4654 Co-authored-by: David Overton <7734777+dmoverton@users.noreply.github.com> GitOrigin-RevId: f4a9191f22dfcc1dccefd6a52f5c586b6ad17172
2022-06-30 15:55:56 +03:00
MonadFix m,
MonadMask m,
MonadStateless IO m,
LA.Forall (LA.Pure m),
UserAuthentication (Tracing.TraceT m),
HttpLog m,
ConsoleRenderer m,
MonadMetadataApiAuthorization m,
MonadGQLExecutionCheck m,
MonadConfigApiHandler m,
MonadQueryLog m,
WS.MonadWSLog m,
MonadExecuteQuery m,
Tracing.HasReporter m,
HasResourceLimits m,
MonadMetadataStorage (MetadataStorageT m),
MonadResolveSource m,
EB.MonadQueryTags m,
MonadEventLogCleanup m
) =>
(ServerCtx -> Spock.SpockT m ()) ->
Env.Environment ->
ServeOptions impl ->
ServeCtx ->
-- and mutations
-- | start time
UTCTime ->
Maybe ES.SubscriptionPostPollHook ->
ServerMetrics ->
EKG.Store EKG.EmptyMetrics ->
PrometheusMetrics ->
ManagedT m Application
mkHGEServer setupHook env ServeOptions {..} ServeCtx {..} initTime postPollHook serverMetrics ekgStore prometheusMetrics = do
-- Comment this to enable expensive assertions from "GHC.AssertNF". These
-- will log lines to STDOUT containing "not in normal form". In the future we
-- could try to integrate this into our tests. For now this is a development
-- tool.
--
-- NOTE: be sure to compile WITHOUT code coverage, for this to work properly.
liftIO disableAssertNF
let optimizePermissionFilters
| EFOptimizePermissionFilters `elem` soExperimentalFeatures = Options.OptimizePermissionFilters
| otherwise = Options.Don'tOptimizePermissionFilters
sqlGenCtx = SQLGenCtx soStringifyNum soDangerousBooleanCollapse optimizePermissionFilters
Loggers loggerCtx logger _ = _scLoggers
authModeRes <-
runExceptT $
setupAuthMode
soAdminSecret
soAuthHook
soJwtSecret
soUnAuthRole
_scHttpManager
logger
authMode <- onLeft authModeRes (throwErrExit AuthConfigurationError . T.unpack)
HasuraApp app cacheRef actionSubState stopWsServer <-
lift $
flip onException (flushLogger loggerCtx) $
mkWaiApp
setupHook
env
logger
sqlGenCtx
soEnableAllowlist
_scHttpManager
authMode
soCorsConfig
soEnableConsole
soConsoleAssetsDir
soEnableTelemetry
_scInstanceId
soEnabledAPIs
soLiveQueryOpts
soStreamingQueryOpts
soResponseInternalErrorsConfig
postPollHook
_scSchemaCacheRef
ekgStore
serverMetrics
prometheusMetrics
soEnableRemoteSchemaPermissions
soInferFunctionPermissions
soConnectionOptions
soWebSocketKeepAlive
soEnableMaintenanceMode
soEventingMode
soReadOnlyMode
soExperimentalFeatures
_scEnabledLogTypes
soWebSocketConnectionInitTimeout
soEnableMetadataQueryLogging
soDefaultNamingConvention
let serverConfigCtx =
ServerConfigCtx
soInferFunctionPermissions
soEnableRemoteSchemaPermissions
sqlGenCtx
soEnableMaintenanceMode
soExperimentalFeatures
soEventingMode
soReadOnlyMode
soDefaultNamingConvention
-- Log Warning if deprecated environment variables are used
sources <- scSources <$> liftIO (getSchemaCache cacheRef)
liftIO $ logDeprecatedEnvVars logger env sources
-- log inconsistent schema objects
inconsObjs <- scInconsistentObjs <$> liftIO (getSchemaCache cacheRef)
liftIO $ logInconsistentMetadata logger inconsObjs
-- NOTE: `newLogTVar` is being used to make sure that the metadata logger runs only once
-- while logging errors or any `inconsistent_metadata` logs.
newLogTVar <- liftIO $ STM.newTVarIO False
-- Start a background thread for processing schema sync event present in the '_sscSyncEventRef'
_ <-
startSchemaSyncProcessorThread
logger
_scHttpManager
_scMetaVersionRef
cacheRef
_scInstanceId
serverConfigCtx
newLogTVar
lockedEventsCtx <-
liftIO $
LockedEventsCtx
<$> STM.newTVarIO mempty
<*> STM.newTVarIO mempty
<*> STM.newTVarIO mempty
<*> STM.newTVarIO mempty
case soEventingMode of
EventingEnabled -> do
startEventTriggerPollerThread logger lockedEventsCtx cacheRef
startAsyncActionsPollerThread logger lockedEventsCtx cacheRef actionSubState
-- start a background thread to create new cron events
_cronEventsThread <-
C.forkManagedT "runCronEventsGenerator" logger $
runCronEventsGenerator logger (getSchemaCache cacheRef)
startScheduledEventsPollerThread logger lockedEventsCtx cacheRef
EventingDisabled ->
unLogger logger $ mkGenericStrLog LevelInfo "server" "starting in eventing disabled mode"
-- start a background thread to check for updates
_updateThread <-
C.forkManagedT "checkForUpdates" logger $
liftIO $ checkForUpdates loggerCtx _scHttpManager
-- start a background thread for telemetry
_telemetryThread <-
if soEnableTelemetry
then do
lift . unLogger logger $ mkGenericStrLog LevelInfo "telemetry" telemetryNotice
dbUid <-
runMetadataStorageT getMetadataDbUid
>>= (`onLeft` throwErrJExit DatabaseMigrationError)
pgVersion <-
liftIO (runExceptT $ PG.runTx _scMetadataDbPool (PG.ReadCommitted, Nothing) $ getPgVersion)
>>= (`onLeft` throwErrJExit DatabaseMigrationError)
telemetryThread <-
C.forkManagedT "runTelemetry" logger $
liftIO $ runTelemetry logger _scHttpManager (getSchemaCache cacheRef) dbUid _scInstanceId pgVersion
return $ Just telemetryThread
else return Nothing
finishTime <- liftIO Clock.getCurrentTime
let apiInitTime = realToFrac $ Clock.diffUTCTime finishTime initTime
unLogger logger $
mkGenericLog LevelInfo "server" $ StartupTimeInfo "starting API server" apiInitTime
-- These cleanup actions are not directly associated with any
-- resource, but we still need to make sure we clean them up here.
allocate_ (pure ()) (liftIO stopWsServer)
pure app
where
isRetryRequired _ resp = do
return $ case resp of
Right _ -> False
Left err -> qeCode err == ConcurrentUpdate
prepareScheduledEvents (Logger logger) = do
liftIO $ logger $ mkGenericStrLog LevelInfo "scheduled_triggers" "preparing data"
res <- Retry.retrying Retry.retryPolicyDefault isRetryRequired (return $ runMetadataStorageT unlockAllLockedScheduledEvents)
onLeft res (\err -> logger $ mkGenericStrLog LevelError "scheduled_triggers" (show $ qeError err))
getProcessingScheduledEventsCount :: LockedEventsCtx -> IO Int
getProcessingScheduledEventsCount LockedEventsCtx {..} = do
processingCronEvents <- readTVarIO leCronEvents
processingOneOffEvents <- readTVarIO leOneOffEvents
return $ length processingOneOffEvents + length processingCronEvents
shutdownEventTriggerEvents ::
[BackendSourceInfo] ->
Logger Hasura ->
LockedEventsCtx ->
IO ()
shutdownEventTriggerEvents sources (Logger logger) LockedEventsCtx {..} = do
-- TODO: is this correct?
-- event triggers should be tied to the life cycle of a source
lockedEvents <- readTVarIO leEvents
forM_ sources $ \backendSourceInfo -> do
AB.dispatchAnyBackend @BackendEventTrigger backendSourceInfo \(SourceInfo sourceName _ _ sourceConfig _ _ :: SourceInfo b) -> do
let sourceNameText = sourceNameToText sourceName
logger $ mkGenericLog LevelInfo "event_triggers" $ "unlocking events of source: " <> sourceNameText
onJust (HM.lookup sourceName lockedEvents) $ \sourceLockedEvents -> do
-- No need to execute unlockEventsTx when events are not present
onJust (NE.nonEmptySet sourceLockedEvents) $ \nonEmptyLockedEvents -> do
res <- Retry.retrying Retry.retryPolicyDefault isRetryRequired (return $ unlockEventsInSource @b sourceConfig nonEmptyLockedEvents)
case res of
Left err ->
logger $
mkGenericLog LevelWarn "event_trigger" $
"Error while unlocking event trigger events of source: " <> sourceNameText <> " error:" <> showQErr err
Right count ->
logger $
mkGenericLog LevelInfo "event_trigger" $
tshow count <> " events of source " <> sourceNameText <> " were successfully unlocked"
shutdownAsyncActions ::
LockedEventsCtx ->
MetadataStorageT m ()
shutdownAsyncActions lockedEventsCtx = do
lockedActionEvents <- liftIO $ readTVarIO $ leActionEvents lockedEventsCtx
setProcessingActionLogsToPending (LockedActionIdArray $ toList lockedActionEvents)
-- This function is a helper function to do couple of things:
--
-- 1. When the value of the `graceful-shutdown-timeout` > 0, we poll
-- the in-flight events queue we maintain using the `processingEventsCountAction`
-- number of in-flight processing events, in case of actions it is the
-- actions which are in 'processing' state and in scheduled events
-- it is the events which are in 'locked' state. The in-flight events queue is polled
-- every 5 seconds until either the graceful shutdown time is exhausted
-- or the number of in-flight processing events is 0.
-- 2. After step 1, we unlock all the events which were attempted to process by the current
-- graphql-engine instance that are still in the processing
-- state. In actions, it means to set the status of such actions to 'pending'
-- and in scheduled events, the status will be set to 'unlocked'.
waitForProcessingAction ::
Logger Hasura ->
String ->
IO Int ->
ShutdownAction ->
Seconds ->
IO ()
waitForProcessingAction l@(Logger logger) actionType processingEventsCountAction' shutdownAction maxTimeout
| maxTimeout <= 0 = do
case shutdownAction of
EventTriggerShutdownAction userDBShutdownAction -> userDBShutdownAction
MetadataDBShutdownAction metadataDBShutdownAction ->
runMetadataStorageT metadataDBShutdownAction >>= \case
Left err ->
logger $
mkGenericLog LevelWarn (T.pack actionType) $
"Error while unlocking the processing "
<> tshow actionType
<> " err - "
<> showQErr err
Right () -> pure ()
| otherwise = do
processingEventsCount <- processingEventsCountAction'
if (processingEventsCount == 0)
then
logger $
mkGenericStrLog LevelInfo (T.pack actionType) $
"All in-flight events have finished processing"
else unless (processingEventsCount == 0) $ do
C.sleep (5) -- sleep for 5 seconds and then repeat
waitForProcessingAction l actionType processingEventsCountAction' shutdownAction (maxTimeout - (Seconds 5))
startEventTriggerPollerThread logger lockedEventsCtx cacheRef = do
let maxEventThreads = unrefine soEventsHttpPoolSize
fetchInterval = milliseconds $ unrefine soEventsFetchInterval
allSources = HM.elems $ scSources $ lastBuiltSchemaCache _scSchemaCache
unless (unrefine soEventsFetchBatchSize == 0 || fetchInterval == 0) $ do
-- Don't start the events poller thread when fetchBatchSize or fetchInterval is 0
-- prepare event triggers data
eventEngineCtx <- liftIO $ atomically $ initEventEngineCtx maxEventThreads fetchInterval soEventsFetchBatchSize
let eventsGracefulShutdownAction =
waitForProcessingAction
logger
"event_triggers"
(length <$> readTVarIO (leEvents lockedEventsCtx))
(EventTriggerShutdownAction (shutdownEventTriggerEvents allSources logger lockedEventsCtx))
(unrefine soGracefulShutdownTimeout)
unLogger logger $ mkGenericStrLog LevelInfo "event_triggers" "starting workers"
void $
C.forkManagedTWithGracefulShutdown
"processEventQueue"
logger
(C.ThreadShutdown (liftIO eventsGracefulShutdownAction))
$ processEventQueue
logger
_scHttpManager
(getSchemaCache cacheRef)
eventEngineCtx
lockedEventsCtx
serverMetrics
(pmEventTriggerMetrics prometheusMetrics)
soEnableMaintenanceMode
startAsyncActionsPollerThread logger lockedEventsCtx cacheRef actionSubState = do
-- start a background thread to handle async actions
case soAsyncActionsFetchInterval of
Skip -> pure () -- Don't start the poller thread
Interval (unrefine -> sleepTime) -> do
let label = "asyncActionsProcessor"
asyncActionGracefulShutdownAction =
( liftWithStateless \lowerIO ->
( waitForProcessingAction
logger
"async_actions"
(length <$> readTVarIO (leActionEvents lockedEventsCtx))
(MetadataDBShutdownAction (hoist lowerIO (shutdownAsyncActions lockedEventsCtx)))
(unrefine soGracefulShutdownTimeout)
)
)
void $
C.forkManagedTWithGracefulShutdown
label
logger
(C.ThreadShutdown asyncActionGracefulShutdownAction)
$ asyncActionsProcessor
env
logger
(getSchemaCache cacheRef)
(leActionEvents lockedEventsCtx)
_scHttpManager
sleepTime
Nothing
-- start a background thread to handle async action live queries
void $
C.forkManagedT "asyncActionSubscriptionsProcessor" logger $
asyncActionSubscriptionsProcessor actionSubState
startScheduledEventsPollerThread logger lockedEventsCtx cacheRef = do
-- prepare scheduled triggers
lift $ prepareScheduledEvents logger
-- start a background thread to deliver the scheduled events
-- _scheduledEventsThread <- do
let scheduledEventsGracefulShutdownAction =
( liftWithStateless \lowerIO ->
( waitForProcessingAction
logger
"scheduled_events"
(getProcessingScheduledEventsCount lockedEventsCtx)
(MetadataDBShutdownAction (hoist lowerIO unlockAllLockedScheduledEvents))
(unrefine soGracefulShutdownTimeout)
)
)
void $
C.forkManagedTWithGracefulShutdown
"processScheduledTriggers"
logger
(C.ThreadShutdown scheduledEventsGracefulShutdownAction)
$ processScheduledTriggers
env
logger
_scHttpManager
(getSchemaCache cacheRef)
lockedEventsCtx
instance (Monad m) => Tracing.HasReporter (PGMetadataStorageAppT m)
instance (Monad m) => HasResourceLimits (PGMetadataStorageAppT m) where
server: operation timeout with postgres cancelling ### Description This PR implements operation timeouts, as specced in #1232. RFC: [rfcs/operation-timeout-api-limits.md](https://github.com/hasura/graphql-engine-mono/blob/c025a90fe9779436bc0188a2bbf0ad95b5ed1f32/rfcs/operation-timeout-api-limits.md) There's still some things to be done (tests and docs most notably), but apart from that it can be reviewed. I'd still appreciate feedback on the RFC! TODO: - [x] break out the `ApiLimits` refactoring into a separate PR: #2103 - [x] finish the `pg-client-hs` PR: https://github.com/hasura/pg-client-hs/pull/39 - [x] remove configurability, after testing, prior to merging - [ ] tests: #2390 has some tests that I've run locally to confirm things work on a fundamental level - [x] changelog - [x] documentation - [x] fill in the detailed PR checklist ### Changelog - [x] `CHANGELOG.md` is updated with user-facing content relevant to this PR. If no changelog is required, then add the `no-changelog-required` label. ### Affected components - [x] Server - [ ] Console - [ ] CLI - [x] Docs - [ ] Tests ### Related Issues Product spec: #1232. ### Solution and Design Compare `rfcs/operation-timeout-api-limits.md`. ### Steps to test and verify Configure operation timeouts, e.g. by posting ``` { "type": "set_api_limits", "args": { "operation_timeout": { "global": 3 } } } ``` to `v1/metadata` to set an operation timeout of 3s. Then verify that 1. non-admin queries that take longer than 3s time out with a nice error message 2. that those queries return after ~3s (at least for postgres) 3. also that everything else still works as usual ### Limitations, known bugs & workarounds - while this will cause slow queries against any backends to fail, it's only verified to actually interrupt queries against postgres - this will only successfully short-cut (cancel) queries to postgres if the database server is responsive #### Catalog upgrade Does this PR change Hasura Catalog version? - [x] No #### Metadata Does this PR add a new Metadata feature? - [x] Yes - Does `run_sql` auto manages the new metadata through schema diffing? - [x] Not required - Does `run_sql` auto manages the definitions of metadata on renaming? - [x] Not required - Does `export_metadata`/`replace_metadata` supports the new metadata added? - [x] Yes #### GraphQL - [x] No new GraphQL schema is generated #### Breaking changes - [x] No Breaking changes PR-URL: https://github.com/hasura/graphql-engine-mono/pull/1593 GitOrigin-RevId: f0582d0be3ed9fadf89e0c4aaf96344d18331dc4
2021-09-29 19:20:06 +03:00
askHTTPHandlerLimit = pure $ ResourceLimits id
askGraphqlOperationLimit _ = pure $ \_ _ -> ResourceLimits id
instance (MonadIO m) => HttpLog (PGMetadataStorageAppT m) where
type ExtraHttpLogMetadata (PGMetadataStorageAppT m) = ()
emptyExtraHttpLogMetadata = ()
buildExtraHttpLogMetadata _ = ()
logHttpError logger loggingSettings userInfoM reqId waiReq req qErr headers =
unLogger logger $
mkHttpLog $
mkHttpErrorLogContext userInfoM loggingSettings reqId waiReq req qErr Nothing Nothing headers
logHttpSuccess logger loggingSettings userInfoM reqId waiReq reqBody response compressedResponse qTime cType headers (CommonHttpLogMetadata rb batchQueryOpLogs, ()) =
unLogger logger $
mkHttpLog $
mkHttpAccessLogContext userInfoM loggingSettings reqId waiReq reqBody (BL.length response) compressedResponse qTime cType headers rb batchQueryOpLogs
instance (Monad m) => MonadExecuteQuery (PGMetadataStorageAppT m) where
cacheLookup _ _ _ _ = pure ([], Nothing)
cacheStore _ _ _ = pure (Right CacheStoreSkipped)
instance (MonadIO m, MonadBaseControl IO m) => UserAuthentication (Tracing.TraceT (PGMetadataStorageAppT m)) where
resolveUserInfo logger manager headers authMode reqs =
runExceptT $ getUserInfoWithExpTime logger manager headers authMode reqs
accessDeniedErrMsg :: Text
accessDeniedErrMsg =
"restricted access : admin only"
instance (Monad m) => MonadMetadataApiAuthorization (PGMetadataStorageAppT m) where
authorizeV1QueryApi query handlerCtx = runExceptT do
let currRole = _uiRole $ hcUser handlerCtx
backend only insert permissions (rfc #4120) (#4224) * move user info related code to Hasura.User module * the RFC #4120 implementation; insert permissions with admin secret * revert back to old RoleName based schema maps An attempt made to avoid duplication of schema contexts in types if any role doesn't possess any admin secret specific schema * fix compile errors in haskell test * keep 'user_vars' for session variables in http-logs * no-op refacto * tests for admin only inserts * update docs for admin only inserts * updated CHANGELOG.md * default behaviour when admin secret is not set * fix x-hasura-role to X-Hasura-Role in pytests * introduce effective timeout in actions async tests * update docs for admin-secret not configured case * Update docs/graphql/manual/api-reference/schema-metadata-api/permission.rst Co-Authored-By: Marion Schleifer <marion@hasura.io> * Apply suggestions from code review Co-Authored-By: Marion Schleifer <marion@hasura.io> * a complete iteration backend insert permissions accessable via 'x-hasura-backend-privilege' session variable * console changes for backend-only permissions * provide tooltip id; update labels and tooltips; * requested changes * requested changes - remove className from Toggle component - use appropriate function name (capitalizeFirstChar -> capitalize) * use toggle props from definitelyTyped * fix accidental commit * Revert "introduce effective timeout in actions async tests" This reverts commit b7a59c19d643520cfde6af579889e1038038438a. * generate complete schema for both 'default' and 'backend' sessions * Apply suggestions from code review Co-Authored-By: Marion Schleifer <marion@hasura.io> * remove unnecessary import, export Toggle as is * update session variable in tooltip * 'x-hasura-use-backend-only-permissions' variable to switch * update help texts * update docs * update docs * update console help text * regenerate package-lock * serve no backend schema when backend_only: false and header set to true - Few type name refactor as suggested by @0x777 * update CHANGELOG.md * Update CHANGELOG.md * Update CHANGELOG.md * fix a merge bug where a certain entity didn't get removed Co-authored-by: Marion Schleifer <marion@hasura.io> Co-authored-by: Rishichandra Wawhal <rishi@hasura.io> Co-authored-by: rikinsk <rikin.kachhia@gmail.com> Co-authored-by: Tirumarai Selvan <tiru@hasura.io>
2020-04-24 12:10:53 +03:00
when (requiresAdmin query && currRole /= adminRoleName) $
withPathK "args" $ throw400 AccessDenied accessDeniedErrMsg
authorizeV1MetadataApi _ handlerCtx = runExceptT do
let currRole = _uiRole $ hcUser handlerCtx
when (currRole /= adminRoleName) $
withPathK "args" $ throw400 AccessDenied accessDeniedErrMsg
authorizeV2QueryApi _ handlerCtx = runExceptT do
let currRole = _uiRole $ hcUser handlerCtx
when (currRole /= adminRoleName) $
withPathK "args" $ throw400 AccessDenied accessDeniedErrMsg
instance (Monad m) => ConsoleRenderer (PGMetadataStorageAppT m) where
renderConsole path authMode enableTelemetry consoleAssetsDir =
return $ mkConsoleHTML path authMode enableTelemetry consoleAssetsDir
instance (Monad m) => MonadGQLExecutionCheck (PGMetadataStorageAppT m) where
checkGQLExecution userInfo _ enableAL sc query _ = runExceptT $ do
req <- toParsed query
checkQueryInAllowlist enableAL AllowlistModeGlobalOnly userInfo req sc
return req
executeIntrospection _ introspectionQuery _ =
pure $ Right $ ExecStepRaw introspectionQuery
instance (MonadIO m, MonadBaseControl IO m) => MonadConfigApiHandler (PGMetadataStorageAppT m) where
runConfigApiHandler = configApiGetHandler
instance (MonadIO m) => MonadQueryLog (PGMetadataStorageAppT m) where
logQueryLog logger = unLogger logger
instance (MonadIO m) => WS.MonadWSLog (PGMetadataStorageAppT m) where
logWSLog logger = unLogger logger
instance (Monad m) => MonadResolveSource (PGMetadataStorageAppT m) where
getPGSourceResolver = mkPgSourceResolver <$> asks snd
getMSSQLSourceResolver = return mkMSSQLSourceResolver
instance (Monad m) => EB.MonadQueryTags (PGMetadataStorageAppT m) where
createQueryTags _attributes _qtSourceConfig = return $ emptyQueryTagsComment
instance (Monad m) => MonadEventLogCleanup (PGMetadataStorageAppT m) where
runLogCleaner _ = pure $ throw400 NotSupported "Event log cleanup feature is enterprise edition only"
generateCleanupSchedules _ _ _ = pure $ Right ()
runInSeparateTx ::
(MonadIO m) =>
PG.TxE QErr a ->
MetadataStorageT (PGMetadataStorageAppT m) a
runInSeparateTx tx = do
pool <- lift $ asks fst
liftEitherM $ liftIO $ runExceptT $ PG.runTx pool (PG.RepeatableRead, Nothing) tx
notifySchemaCacheSyncTx :: MetadataResourceVersion -> InstanceId -> CacheInvalidations -> PG.TxE QErr ()
notifySchemaCacheSyncTx (MetadataResourceVersion resourceVersion) instanceId invalidations = do
PG.Discard () <-
PG.withQE
defaultTxErrorHandler
[PG.sql|
INSERT INTO hdb_catalog.hdb_schema_notifications(id, notification, resource_version, instance_id)
VALUES (1, $1::json, $2, $3::uuid)
ON CONFLICT (id) DO UPDATE SET
notification = $1::json,
resource_version = $2,
instance_id = $3::uuid
|]
(PG.ViaJSON invalidations, resourceVersion, instanceId)
True
pure ()
getCatalogStateTx :: PG.TxE QErr CatalogState
getCatalogStateTx =
mkCatalogState . PG.getRow
<$> PG.withQE
defaultTxErrorHandler
[PG.sql|
SELECT hasura_uuid::text, cli_state::json, console_state::json
FROM hdb_catalog.hdb_version
|]
()
False
where
mkCatalogState (dbId, PG.ViaJSON cliState, PG.ViaJSON consoleState) =
CatalogState dbId cliState consoleState
setCatalogStateTx :: CatalogStateType -> A.Value -> PG.TxE QErr ()
setCatalogStateTx stateTy stateValue =
case stateTy of
CSTCli ->
PG.unitQE
defaultTxErrorHandler
[PG.sql|
UPDATE hdb_catalog.hdb_version
SET cli_state = $1
|]
(Identity $ PG.ViaJSON stateValue)
False
CSTConsole ->
PG.unitQE
defaultTxErrorHandler
[PG.sql|
UPDATE hdb_catalog.hdb_version
SET console_state = $1
|]
(Identity $ PG.ViaJSON stateValue)
False
-- | Each of the function in the type class is executed in a totally separate transaction.
--
-- To learn more about why the instance is derived as following, see Note [Generic MetadataStorageT transformer]
instance {-# OVERLAPPING #-} MonadIO m => MonadMetadataStorage (MetadataStorageT (PGMetadataStorageAppT m)) where
fetchMetadataResourceVersion = runInSeparateTx fetchMetadataResourceVersionFromCatalog
fetchMetadata = runInSeparateTx fetchMetadataAndResourceVersionFromCatalog
fetchMetadataNotifications a b = runInSeparateTx $ fetchMetadataNotificationsFromCatalog a b
setMetadata r = runInSeparateTx . setMetadataInCatalog r
notifySchemaCacheSync a b c = runInSeparateTx $ notifySchemaCacheSyncTx a b c
getCatalogState = runInSeparateTx getCatalogStateTx
setCatalogState a b = runInSeparateTx $ setCatalogStateTx a b
getMetadataDbUid = runInSeparateTx getDbId
checkMetadataStorageHealth = runInSeparateTx $ checkDbConnection
getDeprivedCronTriggerStats = runInSeparateTx . getDeprivedCronTriggerStatsTx
getScheduledEventsForDelivery = runInSeparateTx getScheduledEventsForDeliveryTx
insertCronEvents = runInSeparateTx . insertCronEventsTx
insertOneOffScheduledEvent = runInSeparateTx . insertOneOffScheduledEventTx
insertScheduledEventInvocation a b = runInSeparateTx $ insertInvocationTx a b
setScheduledEventOp a b c = runInSeparateTx $ setScheduledEventOpTx a b c
unlockScheduledEvents a b = runInSeparateTx $ unlockScheduledEventsTx a b
unlockAllLockedScheduledEvents = runInSeparateTx unlockAllLockedScheduledEventsTx
clearFutureCronEvents = runInSeparateTx . dropFutureCronEventsTx
getOneOffScheduledEvents a b c = runInSeparateTx $ getOneOffScheduledEventsTx a b c
getCronEvents a b c d = runInSeparateTx $ getCronEventsTx a b c d
getInvocations a = runInSeparateTx $ getInvocationsTx a
deleteScheduledEvent a b = runInSeparateTx $ deleteScheduledEventTx a b
insertAction a b c d = runInSeparateTx $ insertActionTx a b c d
fetchUndeliveredActionEvents = runInSeparateTx fetchUndeliveredActionEventsTx
setActionStatus a b = runInSeparateTx $ setActionStatusTx a b
fetchActionResponse = runInSeparateTx . fetchActionResponseTx
clearActionData = runInSeparateTx . clearActionDataTx
setProcessingActionLogsToPending = runInSeparateTx . setProcessingActionLogsToPendingTx
instance MonadMetadataStorageQueryAPI (MetadataStorageT (PGMetadataStorageAppT CacheBuild))
--- helper functions ---
mkConsoleHTML :: Text -> AuthMode -> Bool -> Maybe Text -> Either String Text
mkConsoleHTML path authMode enableTelemetry consoleAssetsDir =
renderHtmlTemplate consoleTmplt $
-- variables required to render the template
A.object
[ "isAdminSecretSet" A..= isAdminSecretSet authMode,
"consolePath" A..= consolePath,
"enableTelemetry" A..= boolToText enableTelemetry,
"cdnAssets" A..= boolToText (isNothing consoleAssetsDir),
"assetsVersion" A..= consoleAssetsVersion,
"serverVersion" A..= currentVersion,
"consoleSentryDsn" A..= ("" :: Text)
]
where
consolePath = case path of
"" -> "/console"
r -> "/console/" <> r
consoleTmplt = $(makeRelativeToProject "src-rsr/console.html" >>= M.embedSingleTemplate)
telemetryNotice :: String
telemetryNotice =
"Help us improve Hasura! The graphql-engine server collects anonymized "
<> "usage stats which allows us to keep improving Hasura at warp speed. "
<> "To read more or opt-out, visit https://hasura.io/docs/latest/graphql/core/guides/telemetry.html"
Clean metadata arguments ## Description Thanks to #1664, the Metadata API types no longer require a `ToJSON` instance. This PR follows up with a cleanup of the types of the arguments to the metadata API: - whenever possible, it moves those argument types to where they're used (RQL.DDL.*) - it removes all unrequired instances (mostly `ToJSON`) This PR does not attempt to do it for _all_ such argument types. For some of the metadata operations, the type used to describe the argument to the API and used to represent the value in the metadata are one and the same (like for `CreateEndpoint`). Sometimes, the two types are intertwined in complex ways (`RemoteRelationship` and `RemoteRelationshipDef`). In the spirit of only doing uncontroversial cleaning work, this PR only moves types that are not used outside of RQL.DDL. Furthermore, this is a small step towards separating the different types all jumbled together in RQL.Types. ## Notes This PR also improves several `FromJSON` instances to make use of `withObject`, and to use a human readable string instead of a type name in error messages whenever possible. For instance: - before: `expected Object for Object, but encountered X` after: `expected Object for add computed field, but encountered X` - before: `Expecting an object for update query` after: `expected Object for update query, but encountered X` This PR also renames `CreateFunctionPermission` to `FunctionPermissionArgument`, to remove the quite surprising `type DropFunctionPermission = CreateFunctionPermission`. This PR also deletes some dead code, mostly in RQL.DML. This PR also moves a PG-specific source resolving function from DDL.Schema.Source to the only place where it is used: App.hs. https://github.com/hasura/graphql-engine-mono/pull/1844 GitOrigin-RevId: a594521194bb7fe6a111b02a9e099896f9fed59c
2021-07-27 13:41:42 +03:00
mkPgSourceResolver :: PG.PGLogger -> SourceResolver ('Postgres 'Vanilla)
Clean metadata arguments ## Description Thanks to #1664, the Metadata API types no longer require a `ToJSON` instance. This PR follows up with a cleanup of the types of the arguments to the metadata API: - whenever possible, it moves those argument types to where they're used (RQL.DDL.*) - it removes all unrequired instances (mostly `ToJSON`) This PR does not attempt to do it for _all_ such argument types. For some of the metadata operations, the type used to describe the argument to the API and used to represent the value in the metadata are one and the same (like for `CreateEndpoint`). Sometimes, the two types are intertwined in complex ways (`RemoteRelationship` and `RemoteRelationshipDef`). In the spirit of only doing uncontroversial cleaning work, this PR only moves types that are not used outside of RQL.DDL. Furthermore, this is a small step towards separating the different types all jumbled together in RQL.Types. ## Notes This PR also improves several `FromJSON` instances to make use of `withObject`, and to use a human readable string instead of a type name in error messages whenever possible. For instance: - before: `expected Object for Object, but encountered X` after: `expected Object for add computed field, but encountered X` - before: `Expecting an object for update query` after: `expected Object for update query, but encountered X` This PR also renames `CreateFunctionPermission` to `FunctionPermissionArgument`, to remove the quite surprising `type DropFunctionPermission = CreateFunctionPermission`. This PR also deletes some dead code, mostly in RQL.DML. This PR also moves a PG-specific source resolving function from DDL.Schema.Source to the only place where it is used: App.hs. https://github.com/hasura/graphql-engine-mono/pull/1844 GitOrigin-RevId: a594521194bb7fe6a111b02a9e099896f9fed59c
2021-07-27 13:41:42 +03:00
mkPgSourceResolver pgLogger _ config = runExceptT do
env <- lift Env.getEnvironment
let PostgresSourceConnInfo urlConf poolSettings allowPrepare isoLevel _ = _pccConnectionInfo config
-- If the user does not provide values for the pool settings, then use the default values
let (maxConns, idleTimeout, retries) = getDefaultPGPoolSettingIfNotExists poolSettings defaultPostgresPoolSettings
urlText <- resolveUrlConf env urlConf
let connInfo = PG.ConnInfo retries $ PG.CDDatabaseURI $ txtToBs urlText
connParams =
PG.defaultConnParams
{ PG.cpIdleTime = idleTimeout,
PG.cpConns = maxConns,
PG.cpAllowPrepare = allowPrepare,
PG.cpMbLifetime = _ppsConnectionLifetime =<< poolSettings,
PG.cpTimeout = _ppsPoolTimeout =<< poolSettings
}
pgPool <- liftIO $ PG.initPGPool connInfo connParams pgLogger
Clean metadata arguments ## Description Thanks to #1664, the Metadata API types no longer require a `ToJSON` instance. This PR follows up with a cleanup of the types of the arguments to the metadata API: - whenever possible, it moves those argument types to where they're used (RQL.DDL.*) - it removes all unrequired instances (mostly `ToJSON`) This PR does not attempt to do it for _all_ such argument types. For some of the metadata operations, the type used to describe the argument to the API and used to represent the value in the metadata are one and the same (like for `CreateEndpoint`). Sometimes, the two types are intertwined in complex ways (`RemoteRelationship` and `RemoteRelationshipDef`). In the spirit of only doing uncontroversial cleaning work, this PR only moves types that are not used outside of RQL.DDL. Furthermore, this is a small step towards separating the different types all jumbled together in RQL.Types. ## Notes This PR also improves several `FromJSON` instances to make use of `withObject`, and to use a human readable string instead of a type name in error messages whenever possible. For instance: - before: `expected Object for Object, but encountered X` after: `expected Object for add computed field, but encountered X` - before: `Expecting an object for update query` after: `expected Object for update query, but encountered X` This PR also renames `CreateFunctionPermission` to `FunctionPermissionArgument`, to remove the quite surprising `type DropFunctionPermission = CreateFunctionPermission`. This PR also deletes some dead code, mostly in RQL.DML. This PR also moves a PG-specific source resolving function from DDL.Schema.Source to the only place where it is used: App.hs. https://github.com/hasura/graphql-engine-mono/pull/1844 GitOrigin-RevId: a594521194bb7fe6a111b02a9e099896f9fed59c
2021-07-27 13:41:42 +03:00
let pgExecCtx = mkPGExecCtx isoLevel pgPool
pure $ PGSourceConfig pgExecCtx connInfo Nothing mempty $ _pccExtensionsSchema config
mkMSSQLSourceResolver :: SourceResolver ('MSSQL)
mkMSSQLSourceResolver _name (MSSQLConnConfiguration connInfo _) = runExceptT do
env <- lift Env.getEnvironment
(connString, mssqlPool) <- createMSSQLPool connInfo env
let mssqlExecCtx = mkMSSQLExecCtx mssqlPool
pure $ MSSQLSourceConfig connString mssqlExecCtx