{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE UndecidableInstances #-} -- | -- = Reasonably efficient PostgreSQL live queries -- -- The module implements /query multiplexing/, which is our implementation strategy for live queries -- (i.e. GraphQL subscriptions) made against Postgres. Fundamentally, our implementation is built -- around polling, which is never ideal, but it’s a lot easier to implement than trying to do something -- event-based. To minimize the resource cost of polling, we use /multiplexing/, which is essentially -- a two-tier batching strategy. -- -- == The high-level idea -- -- The objective is to minimize the number of concurrent polling workers to reduce database load as -- much as possible. A very naïve strategy would be to group identical queries together so we only have -- one poller per /unique/ active subscription. That’s a good start, but of course, in practice, most -- queries differ slightly. However, it happens that they very frequently /only differ in their -- variables/ (that is, GraphQL query variables and session variables), and in those cases, we try to -- generated parameterized SQL. This means that the same prepared SQL query can be reused, just with a -- different set of variables. -- -- To give a concrete example, consider the following query: -- -- > subscription vote_count($post_id: Int!) { -- > vote_count(where: {post_id: {_eq: $post_id}}) { -- > votes -- > } -- > } -- -- No matter what the client provides for @$post_id@, we will always generate the same SQL: -- -- > SELECT votes FROM vote_count WHERE post_id = $1 -- -- If multiple clients subscribe to @vote_count@, we can certainly reuse the same prepared query. For -- example, imagine we had 10 concurrent subscribers, each listening on a distinct @$post_id@: -- -- > let postIds = [3, 11, 32, 56, 13, 97, 24, 43, 109, 48] -- -- We could iterate over @postIds@ in Haskell, executing the same prepared query 10 times: -- -- > for postIds $ \postId -> -- > Q.listQE defaultTxErrorHandler preparedQuery (Identity postId) True -- -- Sadly, that on its own isn’t good enough. The overhead of running each query is large enough that -- Postgres becomes overwhelmed if we have to serve lots of concurrent subscribers. Therefore, what we -- want to be able to do is somehow make one query instead of ten. -- -- === Multiplexing -- -- This is where multiplexing comes in. By taking advantage of Postgres -- , -- we can do the iteration in Postgres rather than in Haskell, allowing us to pay the query overhead -- just once for all ten subscribers. Essentially, lateral joins add 'map'-like functionality to SQL, -- so we can run our query once per @$post_id@: -- -- > SELECT results.votes -- > FROM unnest($1::integer[]) query_variables (post_id) -- > LEFT JOIN LATERAL ( -- > SELECT coalesce(json_agg(votes), '[]') -- > FROM vote_count WHERE vote_count.post_id = query_variables.post_id -- > ) results ON true -- -- If we generalize this approach just a little bit more, we can apply this transformation to arbitrary -- queries parameterized over arbitrary session and query variables! -- -- == Implementation overview -- -- To support query multiplexing, we maintain a tree of the following types, where @>@ should be read -- as “contains”: -- -- @ -- 'SubscriptionsState' > 'Poller' > 'Cohort' > 'Subscriber' -- @ -- -- Here’s a brief summary of each type’s role: -- -- * A 'Subscriber' is an actual client with an open websocket connection. -- -- * A 'Cohort' is a set of 'Subscriber's that are all subscribed to the same query /with the exact -- same variables/. (By batching these together, we can do better than multiplexing, since we can -- just query the data once.) -- -- * A 'Poller' is a worker thread for a single, multiplexed query. It fetches data for a set of -- 'Cohort's that all use the same parameterized query, but have different sets of variables. -- -- * Finally, the 'SubscriptionsState' is the top-level container that holds all the active 'Poller's. -- -- Additional details are provided by the documentation for individual bindings. module Hasura.GraphQL.Execute.Subscription.Plan ( CohortId, dummyCohortId, newCohortId, CohortIdArray (..), CohortVariablesArray (..), CohortVariables, _cvCursorVariables, mkCohortVariables, ValidatedVariables (..), mkUnsafeValidateVariables, modifyCursorCohortVariables, ValidatedQueryVariables, ValidatedSyntheticVariables, ValidatedCursorVariables, SubscriptionQueryPlan (..), SubscriptionQueryPlanExplanation (..), ParameterizedSubscriptionQueryPlan (..), CursorVariableValues (..), cvSessionVariables, cvCursorVariables, cvQueryVariables, cvSyntheticVariables, unValidatedVariables, ) where import Control.Lens (makeLenses) import Data.Aeson.Extended qualified as J import Data.Aeson.TH qualified as J import Data.HashMap.Strict qualified as Map import Data.HashSet qualified as Set import Data.UUID (UUID) import Data.UUID qualified as UUID import Data.UUID.V4 qualified as UUID import Database.PG.Query qualified as Q import Database.PG.Query.PTI qualified as PTI import Hasura.Backends.Postgres.SQL.Value import Hasura.Prelude import Hasura.RQL.Types.Backend import Hasura.SQL.Backend import Hasura.Session import Language.GraphQL.Draft.Syntax qualified as G import PostgreSQL.Binary.Encoding qualified as PE ---------------------------------------------------------------------------------------------------- -- Variable validation -- | When running multiplexed queries, we have to be especially careful about user -- input, since invalid values will cause the query to fail, causing collateral -- damage for anyone else multiplexed into the same query. Therefore, we -- pre-validate variables against Postgres by executing a no-op query of the shape -- -- > SELECT 'v1'::t1, 'v2'::t2, ..., 'vn'::tn -- -- so if any variable values are invalid, the error will be caught early. newtype ValidatedVariables f = ValidatedVariables {_unValidatedVariables :: (f TxtEncodedVal)} deriving instance (Show (f TxtEncodedVal)) => Show (ValidatedVariables f) deriving instance (Eq (f TxtEncodedVal)) => Eq (ValidatedVariables f) deriving instance (Hashable (f TxtEncodedVal)) => Hashable (ValidatedVariables f) deriving instance (J.ToJSON (f TxtEncodedVal)) => J.ToJSON (ValidatedVariables f) deriving instance (Semigroup (f TxtEncodedVal)) => Semigroup (ValidatedVariables f) deriving instance (Monoid (f TxtEncodedVal)) => Monoid (ValidatedVariables f) $(makeLenses 'ValidatedVariables) type ValidatedQueryVariables = ValidatedVariables (Map.HashMap G.Name) type ValidatedSyntheticVariables = ValidatedVariables [] type ValidatedCursorVariables = ValidatedVariables (Map.HashMap G.Name) mkUnsafeValidateVariables :: f TxtEncodedVal -> ValidatedVariables f mkUnsafeValidateVariables = ValidatedVariables ---------------------------------------------------------------------------------------------------- -- Cohort newtype CohortId = CohortId {unCohortId :: UUID} deriving (Show, Eq, Hashable, J.ToJSON, J.FromJSON, Q.FromCol) newCohortId :: (MonadIO m) => m CohortId newCohortId = CohortId <$> liftIO UUID.nextRandom dummyCohortId :: CohortId dummyCohortId = CohortId UUID.nil data CohortVariables = CohortVariables { _cvSessionVariables :: !SessionVariables, _cvQueryVariables :: !ValidatedQueryVariables, -- | To allow more queries to be multiplexed together, we introduce “synthetic” -- variables for /all/ SQL literals in a query, even if they don’t correspond to -- any GraphQL variable. For example, the query -- -- > subscription latest_tracks($condition: tracks_bool_exp!) { -- > tracks(where: $tracks_bool_exp) { -- > id -- > title -- > } -- > } -- -- might be executed with similar values for @$condition@, such as @{"album_id": -- {"_eq": "1"}}@ and @{"album_id": {"_eq": "2"}}@. -- -- Normally, we wouldn’t bother parameterizing over the @1@ and @2@ literals in the -- resulting query because we can’t cache that query plan (since different -- @$condition@ values could lead to different SQL). However, for live queries, we -- can still take advantage of the similarity between the two queries by -- multiplexing them together, so we replace them with references to synthetic -- variables. _cvSyntheticVariables :: !ValidatedSyntheticVariables, -- | Cursor variables contain the latest value of the cursor. -- The value of the cursor variables are updated after every poll. -- If the value has been changed - see [Streaming subscription polling]. -- Cursor variables are only used in the case of streaming subscriptions, -- for live queries it will be empty. _cvCursorVariables :: !ValidatedCursorVariables } deriving (Show, Eq, Generic) instance Hashable CohortVariables $(makeLenses 'CohortVariables) modifyCursorCohortVariables :: ValidatedCursorVariables -> CohortVariables -> CohortVariables modifyCursorCohortVariables validatedCursorVariables cohortVariables = cohortVariables {_cvCursorVariables = validatedCursorVariables} -- | Builds a cohort's variables by only using the session variables that -- are required for the subscription mkCohortVariables :: Set.HashSet SessionVariable -> SessionVariables -> ValidatedQueryVariables -> ValidatedSyntheticVariables -> ValidatedCursorVariables -> CohortVariables mkCohortVariables requiredSessionVariables sessionVariableValues = CohortVariables $ filterSessionVariables (\k _ -> Set.member k requiredSessionVariables) sessionVariableValues instance J.ToJSON CohortVariables where toJSON (CohortVariables sessionVars queryVars syntheticVars cursorVars) = J.object [ "session" J..= sessionVars, "query" J..= queryVars, "synthetic" J..= syntheticVars, "cursor" J..= cursorVars ] -- These types exist only to use the Postgres array encoding. newtype CohortIdArray = CohortIdArray {unCohortIdArray :: [CohortId]} deriving (Show, Eq) instance Q.ToPrepArg CohortIdArray where toPrepVal (CohortIdArray l) = Q.toPrepValHelper PTI.unknown encoder $ map unCohortId l where encoder = PE.array 2950 . PE.dimensionArray foldl' (PE.encodingArray . PE.uuid) newtype CohortVariablesArray = CohortVariablesArray {unCohortVariablesArray :: [CohortVariables]} deriving (Show, Eq) instance Q.ToPrepArg CohortVariablesArray where toPrepVal (CohortVariablesArray l) = Q.toPrepValHelper PTI.unknown encoder (map J.toJSON l) where encoder = PE.array 114 . PE.dimensionArray foldl' (PE.encodingArray . PE.json_ast) ---------------------------------------------------------------------------------------------------- -- Live query plans -- | A self-contained, ready-to-execute subscription plan. Contains enough information -- to find an existing poller that this can be added to /or/ to create a new poller -- if necessary. data SubscriptionQueryPlan (b :: BackendType) q = SubscriptionQueryPlan { _sqpParameterizedPlan :: !(ParameterizedSubscriptionQueryPlan b q), _sqpSourceConfig :: !(SourceConfig b), _sqpVariables :: !CohortVariables, -- | We need to know if the source has a namespace so that we can wrap it around -- the response from the DB _sqpNamespace :: !(Maybe G.Name) } data ParameterizedSubscriptionQueryPlan (b :: BackendType) q = ParameterizedSubscriptionQueryPlan { _plqpRole :: !RoleName, _plqpQuery :: !q } deriving (Show) $(J.deriveToJSON hasuraJSON ''ParameterizedSubscriptionQueryPlan) data SubscriptionQueryPlanExplanation = SubscriptionQueryPlanExplanation { _sqpeSql :: !Text, _sqpePlan :: ![Text], _sqpeVariables :: !CohortVariables } deriving (Show) $(J.deriveToJSON hasuraJSON ''SubscriptionQueryPlanExplanation) -------------------------------------------------------------------------- --- Streaming Subscriptions newtype CursorVariableValues = CursorVariableValues (HashMap G.Name TxtEncodedVal) deriving (J.FromJSON, J.ToJSON, Eq, Show)