mirror of
https://github.com/hasura/graphql-engine.git
synced 2024-12-18 21:12:09 +03:00
c52bfc540d
This is the result of a general audit of how we fork threads, with a detour into how we're using mutable state especially in websocket codepaths, making more robust to async exceptions and exceptions resulting from bugs. Some highlights: - use a wrapper around 'immortal' so threads that die due to bugs are restarted, and log the error - use 'withAsync' some places - use bracket a few places where we might break invariants - log some codepaths that represent bugs - export UnstructuredLog for ad hoc logging (the alternative is we continue not logging useful stuff) I had to timebox this. There are a few TODOs I didn't want to address. And we'll wait until this is merged to attempt #3705 for Control.Concurrent.Extended
165 lines
5.9 KiB
Haskell
165 lines
5.9 KiB
Haskell
-- | Top-level management of live query poller threads. The implementation of the polling itself is
|
|
-- in "Hasura.GraphQL.Execute.LiveQuery.Poll". See "Hasura.GraphQL.Execute.LiveQuery" for high-level
|
|
-- details.
|
|
module Hasura.GraphQL.Execute.LiveQuery.State
|
|
( LiveQueriesState
|
|
, initLiveQueriesState
|
|
, dumpLiveQueriesState
|
|
|
|
, LiveQueryId
|
|
, addLiveQuery
|
|
, removeLiveQuery
|
|
) where
|
|
|
|
import Hasura.Prelude
|
|
|
|
import qualified Control.Concurrent.STM as STM
|
|
import qualified Control.Immortal as Immortal
|
|
import qualified Data.Aeson.Extended as J
|
|
import qualified StmContainers.Map as STMMap
|
|
|
|
import Control.Concurrent.Extended (sleep, forkImmortal)
|
|
import Control.Exception (mask_)
|
|
import Data.String
|
|
|
|
import qualified Hasura.Logging as L
|
|
import qualified Hasura.GraphQL.Execute.LiveQuery.TMap as TMap
|
|
|
|
import Hasura.Db
|
|
import Hasura.GraphQL.Execute.LiveQuery.Options
|
|
import Hasura.GraphQL.Execute.LiveQuery.Plan
|
|
import Hasura.GraphQL.Execute.LiveQuery.Poll
|
|
|
|
-- | The top-level datatype that holds the state for all active live queries.
|
|
--
|
|
-- NOTE!: This must be kept consistent with a websocket connection's 'OperationMap', in 'onClose'
|
|
-- and 'onStart'.
|
|
data LiveQueriesState
|
|
= LiveQueriesState
|
|
{ _lqsOptions :: !LiveQueriesOptions
|
|
, _lqsPGExecTx :: !PGExecCtx
|
|
, _lqsLiveQueryMap :: !PollerMap
|
|
}
|
|
|
|
initLiveQueriesState :: LiveQueriesOptions -> PGExecCtx -> IO LiveQueriesState
|
|
initLiveQueriesState options pgCtx = LiveQueriesState options pgCtx <$> STMMap.newIO
|
|
|
|
dumpLiveQueriesState :: Bool -> LiveQueriesState -> IO J.Value
|
|
dumpLiveQueriesState extended (LiveQueriesState opts _ lqMap) = do
|
|
lqMapJ <- dumpPollerMap extended lqMap
|
|
return $ J.object
|
|
[ "options" J..= opts
|
|
, "live_queries_map" J..= lqMapJ
|
|
]
|
|
|
|
data LiveQueryId
|
|
= LiveQueryId
|
|
{ _lqiPoller :: !PollerKey
|
|
, _lqiCohort :: !CohortKey
|
|
, _lqiSubscriber :: !SubscriberId
|
|
} deriving Show
|
|
|
|
addLiveQuery
|
|
:: L.Logger L.Hasura
|
|
-> LiveQueriesState
|
|
-> LiveQueryPlan
|
|
-> OnChange
|
|
-- ^ the action to be executed when result changes
|
|
-> IO LiveQueryId
|
|
addLiveQuery logger lqState plan onResultAction = do
|
|
-- CAREFUL!: It's absolutely crucial that we can't throw any exceptions here!
|
|
|
|
-- disposable UUIDs:
|
|
responseId <- newCohortId
|
|
sinkId <- newSinkId
|
|
|
|
-- a handler is returned only when it is newly created
|
|
handlerM <- STM.atomically $ do
|
|
handlerM <- STMMap.lookup handlerId lqMap
|
|
case handlerM of
|
|
Just handler -> do
|
|
cohortM <- TMap.lookup cohortKey $ _pCohorts handler
|
|
case cohortM of
|
|
Just cohort -> addToCohort sinkId cohort
|
|
Nothing -> addToPoller sinkId responseId handler
|
|
return Nothing
|
|
Nothing -> do
|
|
poller <- newPoller
|
|
addToPoller sinkId responseId poller
|
|
STMMap.insert poller handlerId lqMap
|
|
return $ Just poller
|
|
|
|
-- we can then attach a polling thread if it is new
|
|
-- the livequery can only be cancelled after putTMVar
|
|
onJust handlerM $ \handler -> do
|
|
metrics <- initRefetchMetrics
|
|
threadRef <- forkImmortal ("pollQuery."<>show sinkId) logger $ forever $ do
|
|
pollQuery metrics batchSize pgExecCtx query handler
|
|
sleep $ unRefetchInterval refetchInterval
|
|
STM.atomically $ STM.putTMVar (_pIOState handler) (PollerIOState threadRef metrics)
|
|
|
|
pure $ LiveQueryId handlerId cohortKey sinkId
|
|
where
|
|
LiveQueriesState lqOpts pgExecCtx lqMap = lqState
|
|
LiveQueriesOptions batchSize refetchInterval = lqOpts
|
|
LiveQueryPlan (ParameterizedLiveQueryPlan role alias query) cohortKey = plan
|
|
|
|
handlerId = PollerKey role query
|
|
|
|
addToCohort sinkId handlerC =
|
|
TMap.insert (Subscriber alias onResultAction) sinkId $ _cNewSubscribers handlerC
|
|
|
|
addToPoller sinkId responseId handler = do
|
|
newCohort <- Cohort responseId <$> STM.newTVar Nothing <*> TMap.new <*> TMap.new
|
|
addToCohort sinkId newCohort
|
|
TMap.insert newCohort cohortKey $ _pCohorts handler
|
|
|
|
newPoller = Poller <$> TMap.new <*> STM.newEmptyTMVar
|
|
|
|
removeLiveQuery
|
|
:: L.Logger L.Hasura
|
|
-> LiveQueriesState
|
|
-- the query and the associated operation
|
|
-> LiveQueryId
|
|
-> IO ()
|
|
removeLiveQuery logger lqState lqId@(LiveQueryId handlerId cohortId sinkId) = mask_ $ do
|
|
mbCleanupIO <- STM.atomically $ do
|
|
detM <- getQueryDet
|
|
fmap join $ forM detM $ \(Poller cohorts ioState, cohort) ->
|
|
cleanHandlerC cohorts ioState cohort
|
|
sequence_ mbCleanupIO
|
|
where
|
|
lqMap = _lqsLiveQueryMap lqState
|
|
|
|
getQueryDet = do
|
|
pollerM <- STMMap.lookup handlerId lqMap
|
|
fmap join $ forM pollerM $ \poller -> do
|
|
cohortM <- TMap.lookup cohortId (_pCohorts poller)
|
|
return $ (poller,) <$> cohortM
|
|
|
|
cleanHandlerC cohortMap ioState handlerC = do
|
|
let curOps = _cExistingSubscribers handlerC
|
|
newOps = _cNewSubscribers handlerC
|
|
TMap.delete sinkId curOps
|
|
TMap.delete sinkId newOps
|
|
cohortIsEmpty <- (&&)
|
|
<$> TMap.null curOps
|
|
<*> TMap.null newOps
|
|
when cohortIsEmpty $ TMap.delete cohortId cohortMap
|
|
handlerIsEmpty <- TMap.null cohortMap
|
|
-- when there is no need for handler i.e, this happens to be the last
|
|
-- operation, take the ref for the polling thread to cancel it
|
|
if handlerIsEmpty
|
|
then do
|
|
STMMap.delete handlerId lqMap
|
|
threadRefM <- fmap _pThread <$> STM.tryReadTMVar ioState
|
|
return $ Just $ -- deferred IO:
|
|
case threadRefM of
|
|
Just threadRef -> Immortal.stop threadRef
|
|
-- This would seem to imply addLiveQuery broke or a bug
|
|
-- elsewhere. Be paranoid and log:
|
|
Nothing -> L.unLogger logger $ L.UnstructuredLog L.LevelError $ fromString $
|
|
"In removeLiveQuery no worker thread installed. Please report this as a bug: "<>
|
|
show lqId
|
|
else return Nothing
|