graphql-engine/server/src-lib/Control/Monad/Memoize.hs
kodiakhq[bot] dd3bbef2fe Jberryman/ghc 9.6.4
PR-URL: https://github.com/hasura/graphql-engine-mono/pull/10615
GitOrigin-RevId: 8a45c92bec8444d5ec0aed125f4aa96d9c684728
2024-01-23 19:47:53 +00:00

229 lines
9.0 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{-# LANGUAGE UndecidableInstances #-}
-- ghc 9.6 seems to be doing something screwy with...
{-# OPTIONS_GHC -Wno-redundant-constraints #-}
module Control.Monad.Memoize
( MonadMemoize (..),
memoize,
MemoizeT,
runMemoizeT,
)
where
import Control.Monad.Except
import Data.Dependent.Map (DMap)
import Data.Dependent.Map qualified as DM
import Data.Functor.Identity
import Data.GADT.Compare.Extended
import Data.IORef
import Data.Kind qualified as K
import Hasura.Prelude
import Language.Haskell.TH qualified as TH
import System.IO.Unsafe (unsafeInterleaveIO)
import Type.Reflection (Typeable, typeRep, (:~:) (Refl))
{- Note [Tying the knot]
~~~~~~~~~~~~~~~~~~~~~~~~
GraphQL type definitions can be mutually recursive, and indeed, they quite often
are! For example, two tables that reference one another will be represented by
types such as the following:
type author {
id: Int!
name: String!
articles: [article!]!
}
type article {
id: Int!
title: String!
content: String!
author: author!
}
This doesnt cause any trouble if the schema is represented by a mapping from
type names to type definitions, but the Parser abstraction is all about avoiding
that kind of indirection to improve type safety — parsers refer to their
sub-parsers directly. This presents two problems during schema generation:
1. Schema generation needs to terminate in finite time, so we need to ensure
we dont try to eagerly construct an infinitely-large schema due to the
mutually-recursive structure.
2. To serve introspection queries, we do eventually need to construct a
mapping from names to types (a TypeMap), so we need to be able to
recursively walk the entire schema in finite time.
Solving point number 1 could be done with either laziness or sharing, but
neither of those are enough to solve point number 2, which requires /observable/
sharing. We need to construct a Parser graph that contains enough information to
detect cycles during traversal.
It may seem appealing to just use type names to detect cycles, which would allow
us to get away with using laziness rather than true sharing. Unfortunately, that
leads to two further problems:
* Its possible to end up with two different types with the same name, which
is an error and should be reported as such. Using names to break cycles
prevents us from doing that, since we have no way to check that two types
with the same name are actually the same.
* Some Parser constructors can fail — the `column` parser checks that the type
name is a valid GraphQL name, for example. This extra validation means lazy
schema construction isnt viable, since we need to eagerly build the schema
to ensure all the validation checks hold.
So were forced to use sharing. But how do we do it? Somehow, we have to /tie
the knot/ — we have to build a cyclic data structure — and some of the cycles
may be quite large. Doing all this knot-tying by hand would be incredibly
tricky, and it would require a lot of inversion of control to thread the shared
parsers around.
To avoid contorting the program, we instead implement a form of memoization. The
MonadMemoize class provides a mechanism to memoize a parser constructor function,
which allows us to get sharing mostly for free. The memoization strategy also
annotates cached parsers with a Unique that can be used to break cycles while
traversing the graph, so we get observable sharing as well. -}
class (Monad m) => MonadMemoize m where
-- | Memoizes a parser constructor function for the extent of a single schema
-- construction process. This is mostly useful for recursive parsers;
-- see Note [Tying the knot] for more details.
--
-- The generality of the type here allows us to use this with multiple concrete
-- parser types:
--
-- @
-- 'memoizeOn' :: ('MonadMemoize' m, MonadParse n) => 'TH.Name' -> a -> m (Parser n b) -> m (Parser n b)
-- 'memoizeOn' :: ('MonadMemoize' m, MonadParse n) => 'TH.Name' -> a -> m (FieldParser n b) -> m (FieldParser n b)
-- @
memoizeOn ::
forall a p.
(Ord a, Typeable a, Typeable p) =>
-- | A unique name used to identify the function being memoized. There isnt
-- really any metaprogramming going on here, we just use a Template Haskell
-- 'TH.Name' as a convenient source for a static, unique identifier.
TH.Name ->
-- | The value to use as the memoization key. Its the callers
-- responsibility to ensure multiple calls to the same function dont use
-- the same key.
a ->
m p ->
m p
instance
(MonadMemoize m) =>
MonadMemoize (ReaderT a m)
where
memoizeOn name key = mapReaderT (memoizeOn name key)
-- | A wrapper around 'memoizeOn' that memoizes a function by using its argument
-- as the key.
memoize ::
(MonadMemoize m, Ord a, Typeable a, Typeable p) =>
TH.Name ->
(a -> m p) ->
(a -> m p)
memoize name f a = memoizeOn name a (f a)
newtype MemoizeT m a = MemoizeT
{ unMemoizeT :: StateT (DMap MemoizationKey Identity) m a
}
deriving (Functor, Applicative, Monad, MonadError e, MonadReader r, MonadTrans)
-- | Allow code in 'MemoizeT' to have access to any underlying state capabilities,
-- hiding the fact that 'MemoizeT' itself is a state monad.
instance (MonadState s m) => MonadState s (MemoizeT m) where
get = lift get
put = lift . put
runMemoizeT :: forall m a. (Monad m) => MemoizeT m a -> m a
runMemoizeT = flip evalStateT mempty . unMemoizeT
-- | see Note [MemoizeT requires MonadIO]
instance
(MonadIO m) =>
MonadMemoize (MemoizeT m)
where
memoizeOn name key buildParser = MemoizeT do
let parserId = MemoizationKey name key
parsersById <- get
case DM.lookup parserId parsersById of
Just (Identity parser) -> pure parser
Nothing -> do
-- We manually do eager blackholing here using a MutVar rather than
-- relying on MonadFix and ordinary thunk blackholing. Why? A few
-- reasons:
--
-- 1. We have more control. We arent at the whims of whatever
-- MonadFix instance happens to get used.
--
-- 2. We can be more precise. GHCs lazy blackholing doesnt always
-- kick in when youd expect.
--
-- 3. We can provide more useful error reporting if things go wrong.
-- Most usefully, we can include a HasCallStack source location.
cell <- liftIO $ newIORef Nothing
-- We use unsafeInterleaveIO here, which sounds scary, but
-- unsafeInterleaveIO is actually far more safe than unsafePerformIO.
-- unsafeInterleaveIO just defers the execution of the action until its
-- result is needed, adding some laziness.
--
-- That laziness can be dangerous if the action has side-effects, since
-- the point at which the effect is performed can be unpredictable. But
-- this action just reads, never writes, so that isnt a concern.
parserById <-
liftIO
$ unsafeInterleaveIO
$ readIORef cell
>>= \case
Just parser -> pure $ Identity parser
Nothing ->
error
$ unlines
[ "memoize: parser was forced before being fully constructed",
" parser constructor: " ++ TH.pprint name
]
put $! DM.insert parserId parserById parsersById
parser <- unMemoizeT buildParser
liftIO $ writeIORef cell (Just parser)
pure parser
{- Note [MemoizeT requires MonadIO]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The MonadMemoize instance for MemoizeT requires MonadIO, which is unsatisfying.
The only reason the constraint is needed is to implement knot-tying via IORefs
(see Note [Tying the knot] above), which really only requires the power of
ST. Alternatively, it might be possible to use the ST monad instead, but that
has not been done for historical reasons.
-}
-- | A key used to distinguish calls to 'memoize'd functions. The 'TH.Name'
-- distinguishes calls to completely different parsers, and the @a@ value
-- records the arguments.
data MemoizationKey (t :: K.Type) where
MemoizationKey :: (Ord a, Typeable a, Typeable p) => TH.Name -> a -> MemoizationKey p
instance GEq MemoizationKey where
geq
(MemoizationKey name1 (arg1 :: a1) :: MemoizationKey t1)
(MemoizationKey name2 (arg2 :: a2) :: MemoizationKey t2)
| name1 == name2,
Just Refl <- typeRep @a1 `geq` typeRep @a2,
arg1 == arg2,
Just Refl <- typeRep @t1 `geq` typeRep @t2 =
Just Refl
| otherwise = Nothing
instance GCompare MemoizationKey where
gcompare
(MemoizationKey name1 (arg1 :: a1) :: MemoizationKey t1)
(MemoizationKey name2 (arg2 :: a2) :: MemoizationKey t2) =
strengthenOrdering (compare name1 name2)
`extendGOrdering` gcompare (typeRep @a1) (typeRep @a2)
`extendGOrdering` strengthenOrdering (compare arg1 arg2)
`extendGOrdering` gcompare (typeRep @t1) (typeRep @t2)
`extendGOrdering` GEQ