mirror of
https://github.com/hasura/graphql-engine.git
synced 2024-12-15 01:12:56 +03:00
cf531b05cb
This PR is on top of #7789. ### Description This PR entirely rewrites the API of the Tracing library, to make `interpTraceT` a thing of the past. Before this change, we ran traces by sticking a `TraceT` on top of whatever we were doing. This had several major drawbacks: - we were carrying a bunch of `TraceT` across the codebase, and the entire codebase had to know about it - we needed to carry a second class constraint around (`HasReporterM`) to be able to run all of those traces - we kept having to do stack rewriting with `interpTraceT`, which went from inconvenient to horrible - we had to declare several behavioral instances on `TraceT m` This PR rewrite all of `Tracing` using a more conventional model: there is ONE `TraceT` at the bottom of the stack, and there is an associated class constraint `MonadTrace`: any part of the code that happens to satisfy `MonadTrace` is able to create new traces. We NEVER have to do stack rewriting, `interpTraceT` is gone, and `TraceT` and `Reporter` become implementation details that 99% of the code is blissfully unaware of: code that needs to do tracing only needs to declare that the monad in which it operates implements `MonadTrace`. In doing so, this PR revealed **several bugs in the codebase**: places where we were expecting to trace something, but due to the default instance of `HasReporterM IO` we would actually not do anything. This PR also splits the code of `Tracing` in more byte-sized modules, with the goal of potentially moving to `server/lib` down the line. ### Remaining work This PR is a draft; what's left to do is: - [x] make Pro compile; i haven't updated `HasuraPro/Main` yet - [x] document Tracing by writing a note that explains how to use the library, and the meaning of "reporter", "trace" and "span", as well as the pitfalls - [x] discuss some of the trade-offs in the implementation, which is why i'm opening this PR already despite it not fully building yet - [x] it depends on #7789 being merged first PR-URL: https://github.com/hasura/graphql-engine-mono/pull/7791 GitOrigin-RevId: cadd32d039134c93ddbf364599a2f4dd988adea8
168 lines
7.2 KiB
Haskell
168 lines
7.2 KiB
Haskell
module Main
|
|
( main,
|
|
)
|
|
where
|
|
|
|
import Control.Concurrent.Extended qualified as C
|
|
import Control.Exception
|
|
import Control.Monad.Trans.Managed (ManagedT (..), lowerManagedT)
|
|
import Data.ByteString.Char8 qualified as BC
|
|
import Data.Environment qualified as Env
|
|
import Data.Int (Int64)
|
|
import Data.Kind (Type)
|
|
import Data.Text.Conversions (convertText)
|
|
import Data.Time.Clock (getCurrentTime)
|
|
import Data.Time.Clock.POSIX (getPOSIXTime)
|
|
import Database.PG.Query qualified as PG
|
|
import GHC.Debug.Stub
|
|
import GHC.TypeLits (Symbol)
|
|
import Hasura.App
|
|
import Hasura.App.State
|
|
import Hasura.Backends.Postgres.Connection.MonadTx
|
|
import Hasura.Backends.Postgres.Connection.Settings
|
|
import Hasura.GC qualified as GC
|
|
import Hasura.Logging (Hasura, LogLevel (..), defaultEnabledEngineLogTypes)
|
|
import Hasura.Prelude
|
|
import Hasura.RQL.DDL.Schema
|
|
import Hasura.Server.Init
|
|
import Hasura.Server.Metrics (ServerMetricsSpec, createServerMetrics)
|
|
import Hasura.Server.Migrate (downgradeCatalog)
|
|
import Hasura.Server.Prometheus (makeDummyPrometheusMetrics)
|
|
import Hasura.Server.Version
|
|
import Hasura.ShutdownLatch
|
|
import Hasura.Tracing (ignoreTraceT, sampleAlways)
|
|
import System.Environment (getEnvironment, lookupEnv, unsetEnv)
|
|
import System.Exit qualified as Sys
|
|
import System.Metrics qualified as EKG
|
|
import System.Posix.Signals qualified as Signals
|
|
|
|
{-# ANN main ("HLINT: ignore Use env_from_function_argument" :: String) #-}
|
|
main :: IO ()
|
|
main = maybeWithGhcDebug $ do
|
|
catch
|
|
do
|
|
env <- Env.getEnvironment
|
|
clearEnvironment
|
|
args <- parseArgs env
|
|
runApp env args
|
|
(\(ExitException _code msg) -> BC.putStrLn msg >> Sys.exitFailure)
|
|
where
|
|
-- Since the handling of environment variables works differently between the
|
|
-- Cloud version and the OSSS version we clear the process environment to
|
|
-- avoid accidentally reading directly from the operating system environment
|
|
-- variables.
|
|
clearEnvironment :: IO ()
|
|
clearEnvironment = getEnvironment >>= traverse_ \(v, _) -> unsetEnv v
|
|
|
|
runApp :: Env.Environment -> HGEOptions (ServeOptions Hasura) -> IO ()
|
|
runApp env (HGEOptions rci metadataDbUrl hgeCmd) = ignoreTraceT do
|
|
initTime <- liftIO getCurrentTime
|
|
|
|
case hgeCmd of
|
|
HCServe serveOptions -> do
|
|
globalCtx@GlobalCtx {} <- initGlobalCtx env metadataDbUrl rci
|
|
(ekgStore, serverMetrics) <- liftIO $ do
|
|
store <- EKG.newStore @AppMetricsSpec
|
|
void $ EKG.register (EKG.subset GcSubset store) EKG.registerGcMetrics
|
|
|
|
let getTimeMs :: IO Int64
|
|
getTimeMs = (round . (* 1000)) `fmap` getPOSIXTime
|
|
void $ EKG.register store $ EKG.registerCounter ServerTimestampMs () getTimeMs
|
|
|
|
serverMetrics <-
|
|
liftIO $ createServerMetrics $ EKG.subset ServerSubset store
|
|
|
|
pure (EKG.subset EKG.emptyOf store, serverMetrics)
|
|
|
|
prometheusMetrics <- lift makeDummyPrometheusMetrics
|
|
|
|
-- It'd be nice if we didn't have to call runManagedT twice here, but
|
|
-- there is a data dependency problem since the call to runPGMetadataStorageApp
|
|
-- below depends on appCtx.
|
|
runManagedT (initialiseContext env globalCtx serveOptions Nothing serverMetrics prometheusMetrics sampleAlways) $ \(appCtx, appEnv) -> do
|
|
-- Catches the SIGTERM signal and initiates a graceful shutdown.
|
|
-- Graceful shutdown for regular HTTP requests is already implemented in
|
|
-- Warp, and is triggered by invoking the 'closeSocket' callback.
|
|
-- We only catch the SIGTERM signal once, that is, if the user hits CTRL-C
|
|
-- once again, we terminate the process immediately.
|
|
|
|
liftIO $ do
|
|
void $ Signals.installHandler Signals.sigTERM (Signals.CatchOnce (shutdownGracefully $ appEnvShutdownLatch appEnv)) Nothing
|
|
void $ Signals.installHandler Signals.sigINT (Signals.CatchOnce (shutdownGracefully $ appEnvShutdownLatch appEnv)) Nothing
|
|
|
|
let Loggers _ logger _ = appEnvLoggers appEnv
|
|
|
|
_idleGCThread <-
|
|
lift $
|
|
C.forkImmortal "ourIdleGC" logger $
|
|
GC.ourIdleGC logger (seconds 0.3) (seconds 10) (seconds 60)
|
|
|
|
runPGMetadataStorageAppT (appCtx, appEnv) $
|
|
lowerManagedT $
|
|
runHGEServer (const $ pure ()) appCtx appEnv initTime Nothing ekgStore
|
|
HCExport -> do
|
|
GlobalCtx {..} <- initGlobalCtx env metadataDbUrl rci
|
|
res <- runTxWithMinimalPool _gcMetadataDbConnInfo fetchMetadataFromCatalog
|
|
either (throwErrJExit MetadataExportError) printJSON res
|
|
HCClean -> do
|
|
GlobalCtx {..} <- initGlobalCtx env metadataDbUrl rci
|
|
res <- runTxWithMinimalPool _gcMetadataDbConnInfo dropHdbCatalogSchema
|
|
let cleanSuccessMsg = "successfully cleaned graphql-engine related data"
|
|
either (throwErrJExit MetadataCleanError) (const $ liftIO $ putStrLn cleanSuccessMsg) res
|
|
HCDowngrade opts -> do
|
|
GlobalCtx {..} <- initGlobalCtx env metadataDbUrl rci
|
|
let (maybeDefaultPgConnInfo, maybeRetries) = _gcDefaultPostgresConnInfo
|
|
let defaultSourceConfig =
|
|
maybeDefaultPgConnInfo <&> \(dbUrlConf, _) ->
|
|
let pgSourceConnInfo =
|
|
PostgresSourceConnInfo
|
|
dbUrlConf
|
|
(Just setPostgresPoolSettings {_ppsRetries = maybeRetries <|> Just 1})
|
|
False
|
|
PG.ReadCommitted
|
|
Nothing
|
|
in PostgresConnConfiguration pgSourceConnInfo Nothing defaultPostgresExtensionsSchema Nothing mempty
|
|
res <- runTxWithMinimalPool _gcMetadataDbConnInfo $ downgradeCatalog defaultSourceConfig opts initTime
|
|
either (throwErrJExit DowngradeProcessError) (liftIO . print) res
|
|
HCVersion -> liftIO $ putStrLn $ "Hasura GraphQL Engine: " ++ convertText currentVersion
|
|
where
|
|
runTxWithMinimalPool connInfo tx = lowerManagedT $ do
|
|
minimalPool <- mkMinimalPool connInfo
|
|
liftIO $ runExceptT $ PG.runTx minimalPool (PG.ReadCommitted, Nothing) tx
|
|
|
|
mkMinimalPool connInfo = do
|
|
pgLogger <- _lsPgLogger <$> mkLoggers defaultEnabledEngineLogTypes LevelInfo
|
|
let connParams = PG.defaultConnParams {PG.cpConns = 1}
|
|
liftIO $ PG.initPGPool connInfo connParams pgLogger
|
|
|
|
-- | A specification of all EKG metrics tracked in `runApp`.
|
|
data
|
|
AppMetricsSpec ::
|
|
Symbol -> -- Metric name
|
|
EKG.MetricType -> -- Metric type, e.g. Counter, Gauge
|
|
Type -> -- Tag structure
|
|
Type
|
|
where
|
|
ServerSubset ::
|
|
ServerMetricsSpec name metricType tags ->
|
|
AppMetricsSpec name metricType tags
|
|
GcSubset ::
|
|
EKG.GcMetrics name metricType tags ->
|
|
AppMetricsSpec name metricType tags
|
|
ServerTimestampMs ::
|
|
AppMetricsSpec "ekg.server_timestamp_ms" 'EKG.CounterType ()
|
|
|
|
-- | 'withGhcDebug' but conditional on the environment variable
|
|
-- @HASURA_GHC_DEBUG=true@. When this is set a debug socket will be opened,
|
|
-- otherwise the server will start normally. This must only be called once and
|
|
-- it's argument should be the program's @main@
|
|
maybeWithGhcDebug :: IO a -> IO a
|
|
maybeWithGhcDebug theMain = do
|
|
lookupEnv "HASURA_GHC_DEBUG" >>= \case
|
|
Just "true" -> do
|
|
putStrLn "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
|
|
putStrLn "!!!!! Opening a ghc-debug socket !!!!!"
|
|
putStrLn "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
|
|
withGhcDebug theMain
|
|
_ -> theMain
|