1
0
mirror of https://github.com/hasura/graphql-engine.git synced 2024-12-18 21:12:09 +03:00
graphql-engine/server/src-lib/Hasura/GraphQL/Execute/LiveQuery/State.hs
Vamshi Surabhi c52bfc540d
More robust forking, exception safety. Closes ()
This is the result of a general audit of how we fork threads, with a
detour into how we're using mutable state especially in websocket
codepaths, making more robust to async exceptions and exceptions
resulting from bugs.

Some highlights:
- use a wrapper around 'immortal' so threads that die due to bugs are
  restarted, and log the error
- use 'withAsync' some places
- use bracket a few places where we might break invariants
- log some codepaths that represent bugs
- export UnstructuredLog for ad hoc logging (the alternative is we
  continue not logging useful stuff)

I had to timebox this. There are a few TODOs I didn't want to address.
And we'll wait until this is merged to attempt  for
Control.Concurrent.Extended
2020-03-05 23:29:26 +05:30

165 lines
5.9 KiB
Haskell

-- | Top-level management of live query poller threads. The implementation of the polling itself is
-- in "Hasura.GraphQL.Execute.LiveQuery.Poll". See "Hasura.GraphQL.Execute.LiveQuery" for high-level
-- details.
module Hasura.GraphQL.Execute.LiveQuery.State
( LiveQueriesState
, initLiveQueriesState
, dumpLiveQueriesState
, LiveQueryId
, addLiveQuery
, removeLiveQuery
) where
import Hasura.Prelude
import qualified Control.Concurrent.STM as STM
import qualified Control.Immortal as Immortal
import qualified Data.Aeson.Extended as J
import qualified StmContainers.Map as STMMap
import Control.Concurrent.Extended (sleep, forkImmortal)
import Control.Exception (mask_)
import Data.String
import qualified Hasura.Logging as L
import qualified Hasura.GraphQL.Execute.LiveQuery.TMap as TMap
import Hasura.Db
import Hasura.GraphQL.Execute.LiveQuery.Options
import Hasura.GraphQL.Execute.LiveQuery.Plan
import Hasura.GraphQL.Execute.LiveQuery.Poll
-- | The top-level datatype that holds the state for all active live queries.
--
-- NOTE!: This must be kept consistent with a websocket connection's 'OperationMap', in 'onClose'
-- and 'onStart'.
data LiveQueriesState
= LiveQueriesState
{ _lqsOptions :: !LiveQueriesOptions
, _lqsPGExecTx :: !PGExecCtx
, _lqsLiveQueryMap :: !PollerMap
}
initLiveQueriesState :: LiveQueriesOptions -> PGExecCtx -> IO LiveQueriesState
initLiveQueriesState options pgCtx = LiveQueriesState options pgCtx <$> STMMap.newIO
dumpLiveQueriesState :: Bool -> LiveQueriesState -> IO J.Value
dumpLiveQueriesState extended (LiveQueriesState opts _ lqMap) = do
lqMapJ <- dumpPollerMap extended lqMap
return $ J.object
[ "options" J..= opts
, "live_queries_map" J..= lqMapJ
]
data LiveQueryId
= LiveQueryId
{ _lqiPoller :: !PollerKey
, _lqiCohort :: !CohortKey
, _lqiSubscriber :: !SubscriberId
} deriving Show
addLiveQuery
:: L.Logger L.Hasura
-> LiveQueriesState
-> LiveQueryPlan
-> OnChange
-- ^ the action to be executed when result changes
-> IO LiveQueryId
addLiveQuery logger lqState plan onResultAction = do
-- CAREFUL!: It's absolutely crucial that we can't throw any exceptions here!
-- disposable UUIDs:
responseId <- newCohortId
sinkId <- newSinkId
-- a handler is returned only when it is newly created
handlerM <- STM.atomically $ do
handlerM <- STMMap.lookup handlerId lqMap
case handlerM of
Just handler -> do
cohortM <- TMap.lookup cohortKey $ _pCohorts handler
case cohortM of
Just cohort -> addToCohort sinkId cohort
Nothing -> addToPoller sinkId responseId handler
return Nothing
Nothing -> do
poller <- newPoller
addToPoller sinkId responseId poller
STMMap.insert poller handlerId lqMap
return $ Just poller
-- we can then attach a polling thread if it is new
-- the livequery can only be cancelled after putTMVar
onJust handlerM $ \handler -> do
metrics <- initRefetchMetrics
threadRef <- forkImmortal ("pollQuery."<>show sinkId) logger $ forever $ do
pollQuery metrics batchSize pgExecCtx query handler
sleep $ unRefetchInterval refetchInterval
STM.atomically $ STM.putTMVar (_pIOState handler) (PollerIOState threadRef metrics)
pure $ LiveQueryId handlerId cohortKey sinkId
where
LiveQueriesState lqOpts pgExecCtx lqMap = lqState
LiveQueriesOptions batchSize refetchInterval = lqOpts
LiveQueryPlan (ParameterizedLiveQueryPlan role alias query) cohortKey = plan
handlerId = PollerKey role query
addToCohort sinkId handlerC =
TMap.insert (Subscriber alias onResultAction) sinkId $ _cNewSubscribers handlerC
addToPoller sinkId responseId handler = do
newCohort <- Cohort responseId <$> STM.newTVar Nothing <*> TMap.new <*> TMap.new
addToCohort sinkId newCohort
TMap.insert newCohort cohortKey $ _pCohorts handler
newPoller = Poller <$> TMap.new <*> STM.newEmptyTMVar
removeLiveQuery
:: L.Logger L.Hasura
-> LiveQueriesState
-- the query and the associated operation
-> LiveQueryId
-> IO ()
removeLiveQuery logger lqState lqId@(LiveQueryId handlerId cohortId sinkId) = mask_ $ do
mbCleanupIO <- STM.atomically $ do
detM <- getQueryDet
fmap join $ forM detM $ \(Poller cohorts ioState, cohort) ->
cleanHandlerC cohorts ioState cohort
sequence_ mbCleanupIO
where
lqMap = _lqsLiveQueryMap lqState
getQueryDet = do
pollerM <- STMMap.lookup handlerId lqMap
fmap join $ forM pollerM $ \poller -> do
cohortM <- TMap.lookup cohortId (_pCohorts poller)
return $ (poller,) <$> cohortM
cleanHandlerC cohortMap ioState handlerC = do
let curOps = _cExistingSubscribers handlerC
newOps = _cNewSubscribers handlerC
TMap.delete sinkId curOps
TMap.delete sinkId newOps
cohortIsEmpty <- (&&)
<$> TMap.null curOps
<*> TMap.null newOps
when cohortIsEmpty $ TMap.delete cohortId cohortMap
handlerIsEmpty <- TMap.null cohortMap
-- when there is no need for handler i.e, this happens to be the last
-- operation, take the ref for the polling thread to cancel it
if handlerIsEmpty
then do
STMMap.delete handlerId lqMap
threadRefM <- fmap _pThread <$> STM.tryReadTMVar ioState
return $ Just $ -- deferred IO:
case threadRefM of
Just threadRef -> Immortal.stop threadRef
-- This would seem to imply addLiveQuery broke or a bug
-- elsewhere. Be paranoid and log:
Nothing -> L.unLogger logger $ L.UnstructuredLog L.LevelError $ fromString $
"In removeLiveQuery no worker thread installed. Please report this as a bug: "<>
show lqId
else return Nothing