graphql-engine/server/src-lib/Hasura/Incremental.hs

174 lines
7.7 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- | A simple implementation of /incremental build rules/, which can be used to avoid unnecessary
-- recomputation on incrementally-changing input. See 'Rule' for more details.
module Hasura.Incremental
( Rule
, Result
, rule
, build
, rebuild
, result
, cache
, keyed
) where
import Hasura.Prelude hiding (id, (.))
import qualified Data.HashMap.Strict as M
import Control.Applicative
import Control.Arrow
import Control.Category
import Data.Profunctor
-- | A value of type @'Rule' m a b@ is a /build rule/: a computation that describes how to build a
-- value of type @b@ from a value of type @a@ in a monad @m@. What distinguishes @'Rule' m a b@ from
-- an ordinary function of type @a -> m b@ is that it can be made /incremental/ (in the sense of
-- “incremental compilation”) — after executing it, future executions can perform a subset of the
-- required work if only a portion of the input changed.
--
-- To achieve this, 'Rule's have a more restrictive interface: there is no @Monad ('Rule' m a)@
-- instance, for example. Instead, 'Rule's are composed using the 'Arrow' hierarchy of operations,
-- which ensures that the dependency graph of build rules is mostly static (though it may contain
-- conditional branches, and combinators such as 'keyed' can express restricted forms of dynamic
-- dependencies). Each atomic rule may be defined using the 'Monad' instance for @m@, but
-- incrementalization is not supported inside those rules — they are treated as a single, monolithic
-- computation.
--
-- Atomic rules are created with the 'rule' function, and caching can be added to a rule using the
-- 'cache' combinator. Rules can be executed using the 'build' function, which returns a 'Result'. A
-- 'Result' contains the built value, accessible via 'result', but it also allows supplying a new
-- input value using 'rebuild' to produce a new result incrementally.
newtype Rule m a b
= Rule { build :: a -> m (Result m a b) }
deriving (Functor)
-- | Creates a 'Rule' that produces an @b@ from an @a@ using the given monadic function. No caching
-- is applied by default, so the rule will be re-executed on every subsequent rebuild unless it is
-- explicitly wrapped in 'cache'.
rule :: (Functor m) => (a -> m b) -> Rule m a b
rule f = Rule $ \input -> f input <&> \result ->
Result { rebuild = build (rule f), result }
instance (Applicative m) => Applicative (Rule m a) where
pure a = Rule . const . pure $ pure a
rule1 <*> rule2 = Rule $ \input -> liftA2 (<*>) (build rule1 input) (build rule2 input)
instance (Functor m) => Profunctor (Rule m) where
dimap f g (Rule build) = Rule (fmap (dimap f g) . build . f)
instance (Functor m) => Strong (Rule m) where
first' (Rule build) = Rule $ \(a, b) -> resultFirst b <$> build a
where
resultFirst b Result { rebuild, result } = Result
{ rebuild = \(a, b') -> resultFirst b' <$> rebuild a
, result = (result, b)
}
instance (Applicative m) => Choice (Rule m) where
-- This is significantly trickier to implement than 'first'! Heres how to think about it: the
-- first time the rule executes, we know nothing about previous runs, so if were given 'Left',
-- we have to call the original rule were given. At that point, as long as we are still given
-- 'Left' on every rebuild, we can take advantage of whatever caching happened on the previous
-- run, so we keep recursively calling 'leftResult'.
--
-- However, as soon as we get 'Right', we have to bail out. We return the input were given, and
-- we forget about any previous executions of the rule completely. If were given 'Left' on a
-- subsequent rebuild, we start over from the original rule again.
left' (Rule build) = Rule eitherResult
where
eitherResult = either (fmap leftResult . build) rightResult
leftResult Result { rebuild, result } = Result
{ rebuild = either (fmap leftResult . rebuild) rightResult
, result = Left result
}
rightResult input = pure $ Result
{ rebuild = eitherResult
, result = Right input
}
instance (Monad m) => Category (Rule m) where
id = Rule . fix $ \build -> pure . Result build
rule2 . rule1 = Rule $ \input -> do
result1 <- build rule1 input
result2 <- build rule2 (result result1)
pure $ Result
{ rebuild = build (Rule (rebuild result2) . Rule (rebuild result1))
, result = result result2
}
instance (Monad m) => Arrow (Rule m) where
arr f = Rule . fix $ \build -> pure . Result build . f
first = first'
instance (Monad m) => ArrowChoice (Rule m) where
left = left'
data Result m a b
= Result
{ rebuild :: !(a -> m (Result m a b))
, result :: !b
} deriving (Functor)
instance (Applicative m) => Applicative (Result m a) where
pure a = fix $ \result -> Result
{ rebuild = const $ pure result
, result = a
}
result1 <*> result2 = Result
{ rebuild = \input -> liftA2 (<*>) (rebuild result1 input) (rebuild result2 input)
, result = result result1 $ result result2
}
instance (Functor m) => Profunctor (Result m) where
dimap f g Result { rebuild, result } = Result
{ rebuild = fmap (dimap f g) . rebuild . f
, result = g result
}
-- | Adds equality-based caching to the given rule. After each execution of the rule, its input and
-- result values are cached. On the next rebuild, the input value is compared via '==' to the
-- previous input value. If they are the same, the previous build result is returned /without/
-- re-executing the rule. Otherwise, the old cached values are discarded, and the rule is
-- re-executed to produce a new set of cached values.
--
-- Note that it is important that the given rule does not perform any side-effects (other than
-- perhaps raising an error on invalid input), as they will not be re-executed if the cached result
-- is used!
--
-- Indescriminate use of 'cache' is likely to have little effect except to increase memory usage,
-- since the input and result of each rule execution must be retained in memory. Avoid using 'cache'
-- around rules with large input or output that is likely to change often unless profiling
-- indicates it is computationally expensive enough to be worth the memory overhead.
cache :: forall a b m. (Eq a, Applicative m) => Rule m a b -> Rule m a b
cache (Rule build) = Rule $ \input -> cacheResult input <$> build input
where
cacheResult :: a -> Result m a b -> Result m a b
cacheResult oldInput Result { rebuild, result } = fix $ \cachedBuild -> Result
{ rebuild = \newInput -> if
| oldInput == newInput -> pure cachedBuild
| otherwise -> cacheResult newInput <$> rebuild newInput
, result
}
-- | Given a 'Rule' that operates on key-value pairs, produces a 'Rule' that operates on a
-- 'M.HashMap'. If the input rule is incremental in its argument, the resulting rule will be
-- incremental as well for any entries in the map that do not change between builds.
keyed
:: forall a b k m. (Eq k, Hashable k, Applicative m)
=> Rule m (k, a) b -> Rule m (M.HashMap k a) (M.HashMap k b)
keyed baseRule = buildWith M.empty
where
buildWith :: M.HashMap k (Rule m a b) -> Rule m (M.HashMap k a) (M.HashMap k b)
buildWith !ruleMap = Rule $ \valueMap ->
M.traverseWithKey processEntry valueMap <&> \resultMap -> Result
{ rebuild = build (buildWith (Rule . rebuild <$> resultMap))
, result = result <$> resultMap
}
where
processEntry :: k -> a -> m (Result m a b)
processEntry k v =
let ruleForKey = case M.lookup k ruleMap of
Just existingRule -> existingRule
Nothing -> lmap (k,) baseRule
in build ruleForKey v