{-| An in-memory, Bounded by LRU strategy, capability-local cache implementation. By making the cache capability-local, data may be recomputed up to once per capability (which usually means up to once per OS thread), but write contention from multiple threads is unlikely. -} module Hasura.Cache.Bounded ( BoundedCache , CacheSize(..) , parseCacheSize , initialise , initialiseStripes , insertAllStripes , lookup , insert , clear , getEntries -- * Exposed for testing , checkInvariants , getEntriesRecency ) where import Hasura.Prelude hiding (lookup) import qualified Data.Aeson as J import qualified Data.HashPSQ as HashPSQ import qualified Data.IORef as IORef import qualified Data.Vector as V import Control.Concurrent (getNumCapabilities, myThreadId, threadCapability) import Data.Word (Word16) import GHC.Natural (Natural) -- MISC TODO: -- - benchmark and consider unsafeLookupIncreasePriority and unsafeInsertIncreasePriorityView -- - our own concurrent cache, that doesn't need redundant stripes -- - this would save significant memory -- - we could choose something more sophisticated than LRU if warranted -- - we could probably keep things simple by evicting on hash collisions newtype CacheSize = CacheSize { unCacheSize :: Word16 } deriving (Show, Read, Eq, Ord, Bounded, Num, Real, Integral, Enum, J.ToJSON, J.FromJSON) parseCacheSize :: String -> Either String CacheSize parseCacheSize v = -- NOTE: naively using readMaybe Word16 will silently wrap case readMaybe v :: Maybe Natural of Just n | n <= max16 && n >= 0 -> return (CacheSize $ fromIntegral n) _ -> throwError "cache size must be given as a number between 0 and 65535" where max16 = fromIntegral (maxBound :: Word16) :: Natural newtype Tick = Tick { unTick :: Word64 } deriving (Show, Eq, Ord, Hashable, Bounded, Num, Enum) -- | LRU cache based on hashing -- Based on https://hackage.haskell.org/package/lrucaching data LruCache k v = LruCache { _lcCapacity :: !CacheSize -- ^ The maximum number of elements in the cache , _lcSize :: !CacheSize -- ^ The current number of elements in the cache. We maintain this ourselves -- since 'HashPSQ.size' is O(n). , _lcTick :: !Tick -- ^ The priority is drawn from this tick, it is incremented -- after insert or successful lookup , _lcQueue :: !(HashPSQ.HashPSQ k Tick v) -- ^ Underlying priority queue } deriving (Show, Eq, Functor, Foldable, Traversable) -- | Create an empty 'LruCache' of the given size. emptyCache :: CacheSize -> LruCache k v emptyCache capacity = LruCache { _lcCapacity = capacity , _lcSize = 0 , _lcTick = Tick 0 , _lcQueue = HashPSQ.empty } -- | Insert an element into the 'LruCache'. insertPure :: (Hashable k, Ord k) => k -> v -> LruCache k v -> LruCache k v insertPure key val c = -- NOTE: we assume any rollover of 64-bit counters here to be impossible: let (mbOldVal,queue) = HashPSQ.insertView key (_lcTick c) val (_lcQueue c) cTicked = c{ _lcTick = _lcTick c + 1 } in case mbOldVal of Nothing -- at capacity; remove LRU to maintain _lcSize: | _lcSize c == _lcCapacity c -> cTicked{ _lcQueue = HashPSQ.deleteMin queue } -- under capacity; just record new size after insert: | otherwise -> cTicked{ _lcQueue = queue, _lcSize = _lcSize c + 1 } _valueAlreadyInserted -> -- nothing inserted; _lcSize stays the same cTicked{ _lcQueue = queue } -- | Lookup an element in an 'LruCache' and mark it as the least -- recently accessed. lookupPure :: (Hashable k, Ord k) => k -> LruCache k v -> Maybe (v, LruCache k v) lookupPure k c = case HashPSQ.alter lookupAndBump k (_lcQueue c) of (Nothing, _) -> Nothing (Just x, q) -> let !c' = c {_lcTick = _lcTick c + 1, _lcQueue = q} in Just (x, c') where lookupAndBump Nothing = (Nothing, Nothing) lookupAndBump (Just (_, x)) = (Just x, Just (_lcTick c, x)) newtype LocalCacheRef k v = LocalCacheRef (IORef.IORef (LruCache k v)) getLocalEntries :: (Hashable k, Ord k) => LocalCacheRef k v -> IO [(k, v)] getLocalEntries (LocalCacheRef ioRef) = map (\(k, _, v) -> (k, v)) . HashPSQ.toList . _lcQueue <$> IORef.readIORef ioRef -- | Create a new Local cache of the given size. initLocalCache :: CacheSize -> IO (LocalCacheRef k v) initLocalCache capacity = LocalCacheRef <$> IORef.newIORef (emptyCache capacity) -- | clear a local cache clearLocal :: LocalCacheRef k v -> IO () clearLocal (LocalCacheRef ref)= IORef.atomicModifyIORef' ref $ \currentCache -> (emptyCache (_lcCapacity currentCache), ()) -- | lookup for a key in the local cache lookupLocal :: (Hashable k, Ord k) => LocalCacheRef k v -> k -> IO (Maybe v) lookupLocal (LocalCacheRef ref) k = -- Return the result and replace the cache if needed IORef.atomicModifyIORef' ref $ \currentCache -> case lookupPure k currentCache of Just (v, newCache) -> (newCache, Just v) Nothing -> (currentCache, Nothing) -- | insert into a local cache insertLocal :: (Hashable k, Ord k) => LocalCacheRef k v -> k -> v -> IO () insertLocal (LocalCacheRef ref) k v = IORef.atomicModifyIORef' ref $ \c -> (insertPure k v c, ()) -- | Using a stripe of multiple handles can improve the performance in -- the case of concurrent accesses since several handles can be -- accessed in parallel. newtype BoundedCache k v = BoundedCache (V.Vector (LocalCacheRef k v)) lookup :: (Hashable k, Ord k) => k -> BoundedCache k v -> IO (Maybe v) lookup k striped = do localHandle <- getLocal striped lookupLocal localHandle k insert :: (Hashable k, Ord k) => k -> v -> BoundedCache k v -> IO () insert k v striped = do localHandle <- getLocal striped insertLocal localHandle k v clear :: BoundedCache k v -> IO () clear (BoundedCache caches) = V.mapM_ clearLocal caches getEntries :: (Hashable k, Ord k)=> BoundedCache k v -> IO [[(k, v)]] getEntries (BoundedCache localCaches) = mapM getLocalEntries $ V.toList localCaches -- | Creates a new BoundedCache of the specified size, with one stripe per capability. initialise :: CacheSize -> IO (BoundedCache k v) initialise sz = do caps <- getNumCapabilities initialiseStripes caps sz -- | Creates a new BoundedCache of the specified size, for each stripe initialiseStripes :: Int -- ^ Stripes; to minimize contention this should probably match the number of capabilities. -> CacheSize -> IO (BoundedCache k v) initialiseStripes stripes capacity = do BoundedCache <$> V.replicateM stripes (initLocalCache capacity) {-# INLINE getLocal #-} getLocal :: BoundedCache k v -> IO (LocalCacheRef k v) getLocal (BoundedCache handles) = do (i, _) <- myThreadId >>= threadCapability -- The number of capabilities can grow dynamically so make sure we wrap -- around when indexing. let j = i `mod` V.length handles return $ handles V.! j -- | Insert into all stripes (non-atomically). insertAllStripes :: (Hashable k, Ord k) => k -> v -> BoundedCache k v ->IO () insertAllStripes k v (BoundedCache handles) = do forM_ handles $ \localHandle-> insertLocal localHandle k v -- | Check internal invariants, throwing an error if things are off: checkInvariants :: (Hashable k, Ord k)=> BoundedCache k v -> IO () checkInvariants (BoundedCache handles) = forM_ handles $ \(LocalCacheRef ref) -> do LruCache{..} <- IORef.readIORef ref -- check internal invariants (in case we use unsafe* functions that rely on -- us maintaining HashPSQ invariants): unless (HashPSQ.valid _lcQueue) $ error "invalid HashPSQ!" unless (HashPSQ.size _lcQueue == fromIntegral _lcSize) $ error "Size incorrect!" unless (fromIntegral _lcSize <= _lcTick)$ error "Somehow tick wasn't incremented properly!" when (_lcSize > _lcCapacity) $ error "Size > capacity!" getEntriesRecency :: (Hashable k, Ord k) => BoundedCache k v -> IO [[(k, Tick, v)]] getEntriesRecency (BoundedCache localCaches) = forM (V.toList localCaches) $ \(LocalCacheRef ref) -> HashPSQ.toList . _lcQueue <$> IORef.readIORef ref