add a BlobCache

Summary: Add a BlobCache with a maximum cache size and a minimum entry count and interest-based eviction.

Reviewed By: strager

Differential Revision: D12972062

fbshipit-source-id: 1958f7f500c051a5bc0b39b5b89a6f0fc1774b0f
This commit is contained in:
Chad Austin 2018-11-21 19:41:56 -08:00 committed by Facebook Github Bot
parent ea61afec34
commit a51606565b
4 changed files with 662 additions and 0 deletions

View File

@ -28,6 +28,15 @@ class Blob {
contents_{contents},
size_{contents_.computeChainDataLength()} {}
/**
* Convenience constructor for unit tests. Always copies the given
* StringPiece.
*/
Blob(const Hash& hash, folly::StringPiece contents)
: hash_{hash},
contents_{folly::IOBuf::COPY_BUFFER, contents.data(), contents.size()},
size_{contents.size()} {}
const Hash& getHash() const {
return hash_;
}

207
eden/fs/store/BlobCache.cpp Normal file
View File

@ -0,0 +1,207 @@
/*
* Copyright (c) 2018-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*
*/
#include "BlobCache.h"
#include <folly/MapUtil.h>
#include <folly/logging/xlog.h>
#include "eden/fs/model/Blob.h"
namespace facebook {
namespace eden {
BlobInterestHandle::BlobInterestHandle(std::weak_ptr<const Blob> blob)
: blob_{std::move(blob)} {
// No need to initialize hash_ because blobCache_ is unset.
}
BlobInterestHandle::BlobInterestHandle(
std::weak_ptr<BlobCache> blobCache,
const Hash& hash,
std::weak_ptr<const Blob> blob)
: blobCache_{std::move(blobCache)}, hash_{hash}, blob_{std::move(blob)} {}
void BlobInterestHandle::reset() noexcept {
if (auto blobCache = blobCache_.lock()) {
blobCache->dropInterestHandle(hash_);
}
blobCache_.reset();
}
std::shared_ptr<const Blob> BlobInterestHandle::getBlob() const {
auto blobCache = blobCache_.lock();
if (blobCache) {
// UnlikelyNeededAgain because there's no need to create a new interest
// handle nor bump the refcount.
auto blob =
blobCache->get(hash_, BlobCache::Interest::UnlikelyNeededAgain).blob;
if (blob) {
return blob;
}
}
// If the blob is no longer in cache, at least see if it's still in memory.
return blob_.lock();
}
std::shared_ptr<BlobCache> BlobCache::create(
size_t maximumCacheSizeBytes,
size_t minimumEntryCount) {
// Allow make_shared with private constructor.
struct BC : BlobCache {
BC(size_t x, size_t y) : BlobCache{x, y} {}
};
return std::make_shared<BC>(maximumCacheSizeBytes, minimumEntryCount);
}
BlobCache::BlobCache(size_t maximumCacheSizeBytes, size_t minimumEntryCount)
: maximumCacheSizeBytes_{maximumCacheSizeBytes},
minimumEntryCount_{minimumEntryCount} {}
BlobCache::~BlobCache() {}
BlobCache::GetResult BlobCache::get(const Hash& hash, Interest interest) {
// Acquires BlobCache's lock upon destruction by calling dropInterestHandle,
// so ensure that, if an exception is thrown below, the ~BlobInterestHandle
// runs after the lock is released.
BlobInterestHandle interestHandle;
auto state = state_.wlock();
auto* item = folly::get_ptr(state->items, hash);
if (!item) {
return GetResult{};
}
switch (interest) {
case Interest::UnlikelyNeededAgain:
interestHandle.blob_ = item->blob;
break;
case Interest::WantHandle:
interestHandle = BlobInterestHandle{shared_from_this(), hash, item->blob};
++item->referenceCount;
break;
case Interest::LikelyNeededAgain:
interestHandle.blob_ = item->blob;
// Bump the reference count without allocating an interest handle - this
// will cause the reference count to never reach zero, avoiding early
// eviction.
//
// TODO: One possible optimization here is to set a bit (reference count
// to UINT64_MAX) after which new interest handles never need to be
// created.
++item->referenceCount;
break;
}
// TODO: Should we avoid promoting if interest is UnlikelyNeededAgain?
// For now, we'll try not to be too clever.
state->evictionQueue.splice(
state->evictionQueue.end(), state->evictionQueue, item->index);
return GetResult{item->blob, std::move(interestHandle)};
}
BlobInterestHandle BlobCache::insert(
std::shared_ptr<const Blob> blob,
Interest interest) {
// Acquires BlobCache's lock upon destruction by calling dropInterestHandle,
// so ensure that, if an exception is thrown below, the ~BlobInterestHandle
// runs after the lock is released.
BlobInterestHandle interestHandle;
auto hash = blob->getHash();
auto size = blob->getSize();
if (interest == Interest::WantHandle) {
// This can throw, so do it before inserting into items.
interestHandle = BlobInterestHandle{shared_from_this(), hash, blob};
} else {
interestHandle.blob_ = blob;
}
auto state = state_.wlock();
auto [iter, inserted] = state->items.try_emplace(hash, std::move(blob));
// noexcept from here until `try`
switch (interest) {
case Interest::UnlikelyNeededAgain:
break;
case Interest::WantHandle:
case Interest::LikelyNeededAgain:
++iter->second.referenceCount;
break;
}
if (inserted) {
auto* itemPtr = &iter->second;
try {
state->evictionQueue.push_back(itemPtr);
} catch (std::exception&) {
state->items.erase(iter);
throw;
}
iter->second.index = std::prev(state->evictionQueue.end());
state->totalSize += size;
evictUntilFits(*state);
} else {
state->evictionQueue.splice(
state->evictionQueue.end(), state->evictionQueue, iter->second.index);
}
return interestHandle;
}
size_t BlobCache::getTotalSize() const {
return state_.rlock()->totalSize;
}
void BlobCache::dropInterestHandle(const Hash& hash) noexcept {
auto state = state_.wlock();
auto* item = folly::get_ptr(state->items, hash);
if (!item) {
// Cached item already evicted.
return;
}
if (item->referenceCount == 0) {
XLOG(WARN)
<< "Reference count on item for " << hash
<< " was already zero: an exception must have been thrown during get()";
return;
}
if (--item->referenceCount == 0) {
state->evictionQueue.erase(item->index);
evictItem(*state, item);
}
}
void BlobCache::evictUntilFits(State& state) noexcept {
while (state.totalSize > maximumCacheSizeBytes_ &&
state.evictionQueue.size() > minimumEntryCount_) {
evictOne(state);
}
}
void BlobCache::evictOne(State& state) noexcept {
CacheItem* front = state.evictionQueue.front();
state.evictionQueue.pop_front();
evictItem(state, front);
}
void BlobCache::evictItem(State& state, CacheItem* item) noexcept {
auto size = item->blob->getSize();
// TODO: Releasing this BlobPtr here can run arbitrary deleters which could,
// in theory, try to reacquire the BlobCache's lock. The blob could be
// scheduled for deletion in a deletion queue but then it's hard to ensure
// that scheduling is noexcept. Instead, BlobPtr should be replaced with an
// refcounted pointer that doesn't allow running custom deleters.
state.items.erase(item->blob->getHash());
state.totalSize -= size;
}
} // namespace eden
} // namespace facebook

214
eden/fs/store/BlobCache.h Normal file
View File

@ -0,0 +1,214 @@
/*
* Copyright (c) 2018-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*
*/
#pragma once
#include <folly/Synchronized.h>
#include <cstddef>
#include <list>
#include <unordered_map>
#include "eden/fs/model/Hash.h"
namespace facebook {
namespace eden {
class Blob;
class BlobCache;
/**
* Cache lookups return a BlobInterestHandle which should be held as long as the
* blob remains interesting.
*/
class BlobInterestHandle {
public:
BlobInterestHandle() = default;
~BlobInterestHandle() {
reset();
}
BlobInterestHandle(BlobInterestHandle&& other) noexcept
: blobCache_{std::move(other.blobCache_)}, hash_{other.hash_} {
// We don't need to clear other.hash_ because it's only referenced when
// blobCache_ is not expired.
}
BlobInterestHandle& operator=(BlobInterestHandle&& other) noexcept {
if (this != &other) {
reset();
blobCache_ = std::move(other.blobCache_);
hash_ = other.hash_;
}
return *this;
}
/**
* If this is a valid interest handle, and the blob is still in cache, return
* the corresponding blob and move it to the back of the eviction queue.
*
* Otherwise, return nullptr.
*/
std::shared_ptr<const Blob> getBlob() const;
void reset() noexcept;
private:
explicit BlobInterestHandle(std::weak_ptr<const Blob> blob);
BlobInterestHandle(
std::weak_ptr<BlobCache> blobCache,
const Hash& hash,
std::weak_ptr<const Blob> blob);
std::weak_ptr<BlobCache> blobCache_;
// hash_ is only accessed if blobCache_ is non-expired.
Hash hash_;
// In the situation that the Blob exists even if it's been evicted, allow
// retrieving it anyway.
std::weak_ptr<const Blob> blob_;
friend class BlobCache;
};
/**
* An in-memory LRU cache for loaded blobs. It is parameterized by both a
* maximum cache size and a minimum entry count. The cache tries to evict
* entries when the total number of loaded blobs exceeds the maximum cache size,
* except that it always keeps the minimum entry count around.
*
* The intent of the minimum entry count is to avoid having to reload
* frequently-accessed large blobs when they are larger than the maximum cache
* size.
*
* It is safe to use this object from arbitrary threads.
*/
class BlobCache : public std::enable_shared_from_this<BlobCache> {
public:
using BlobPtr = std::shared_ptr<const Blob>;
enum class Interest {
/**
* Will return a blob if it is cached, but not add a reference to it nor
* move it to the back of the eviction queue.
*/
UnlikelyNeededAgain,
/**
* If a blob is cached, its reference count is incremented and a handle is
* returned that, when dropped, releases the reference and evicts the item
* from cache. Intended for satisfying a series of blob reads from cache
* until the inode is unloaded, after which the blob can evicted from cache,
* freeing space.
*/
WantHandle,
/**
* If a blob is cached, its reference count is incremented, but no interest
* handle is returned. It is assumed to be worth caching until it is
* naturally evicted.
*/
LikelyNeededAgain,
};
struct GetResult {
BlobPtr blob;
BlobInterestHandle interestHandle;
GetResult(GetResult&&) = default;
GetResult& operator=(GetResult&&) = default;
};
static std::shared_ptr<BlobCache> create(
size_t maximumCacheSizeBytes,
size_t minimumEntryCount);
~BlobCache();
/**
* If a blob for the given hash is in cache, return it. If the blob is not in
* cache, return nullptr (and an empty interest handle).
*
* If a blob is returned and interest is WantHandle, then a movable handle
* object is also returned. When the interest handle is destroyed, the cached
* blob may be evicted.
*
* After fetching a blob, prefer calling getBlob() on the returned
* BlobInterestHandle first. It can avoid some overhead or return a blob if
* it still exists in memory and the BlobCache has evicted its reference.
*/
GetResult get(
const Hash& hash,
Interest interest = Interest::LikelyNeededAgain);
/**
* Inserts a blob into the cache for future lookup. If the new total size
* exceeds the maximum cache size and the minimum entry count, old entries are
* evicted.
*
* Optionally returns an interest handle that, when dropped, evicts the
* inserted blob.
*/
BlobInterestHandle insert(
BlobPtr blob,
Interest interest = Interest::LikelyNeededAgain);
/**
* Returns the sum of all of the cached blob sizes.
*/
size_t getTotalSize() const;
private:
/*
* TODO: This data structure could be implemented more efficiently. But since
* most of the data will be held in the blobs themselves and not in this
* index, the overhead is not worrisome.
*
* But should we ever decide to optimize it, storing the array of CacheItem
* nodes in a std::vector with indices to its siblings and to the next node
* in the hash chain would be more efficient, especially since the indices
* could be smaller than a pointer.
*/
struct CacheItem {
// WARNING: leaves index unset. Since the items map and evictionQueue are
// circular, initialization of index must happen after the CacheItem is
// constructed.
explicit CacheItem(BlobPtr b) : blob{std::move(b)} {}
BlobPtr blob;
std::list<CacheItem*>::iterator index;
/// Incremented on every LikelyNeededAgain or WantInterestHandle.
/// Decremented on every dropInterestHandle. Evicted if it reaches zero.
uint64_t referenceCount{0};
};
struct State {
size_t totalSize{0};
std::unordered_map<Hash, CacheItem> items;
/// Entries are evicted from the front of the queue.
std::list<CacheItem*> evictionQueue;
};
void dropInterestHandle(const Hash& hash) noexcept;
explicit BlobCache(size_t maximumCacheSizeBytes, size_t minimumEntryCount);
void evictUntilFits(State& state) noexcept;
void evictOne(State& state) noexcept;
void evictItem(State&, CacheItem* item) noexcept;
const size_t maximumCacheSizeBytes_;
const size_t minimumEntryCount_;
folly::Synchronized<State> state_;
friend class BlobInterestHandle;
};
} // namespace eden
} // namespace facebook

View File

@ -0,0 +1,232 @@
/*
* Copyright (c) 2018-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*
*/
#include "eden/fs/store/BlobCache.h"
#include <gtest/gtest.h>
#include "eden/fs/model/Blob.h"
using namespace folly::literals;
using namespace facebook::eden;
namespace {
const auto hash3 = Hash{"0000000000000000000000000000000000000000"_sp};
const auto hash4 = Hash{"0000000000000000000000000000000000000001"_sp};
const auto hash5 = Hash{"0000000000000000000000000000000000000002"_sp};
const auto hash6 = Hash{"0000000000000000000000000000000000000003"_sp};
const auto hash9 = Hash{"0000000000000000000000000000000000000004"_sp};
// Each blob's name corresponds to its length in bytes.
const auto blob3 = std::make_shared<Blob>(hash3, "333"_sp);
const auto blob4 = std::make_shared<Blob>(hash4, "4444"_sp);
const auto blob5 = std::make_shared<Blob>(hash5, "55555"_sp);
const auto blob6 = std::make_shared<Blob>(hash6, "666666"_sp);
const auto blob9 = std::make_shared<Blob>(hash9, "999999999"_sp);
} // namespace
TEST(BlobCache, evicts_oldest_on_insertion) {
auto cache = BlobCache::create(10, 0);
cache->insert(blob3);
cache->insert(blob4); // blob4 is considered more recent than blob3
EXPECT_EQ(7, cache->getTotalSize());
cache->insert(blob5); // evicts blob3
EXPECT_EQ(9, cache->getTotalSize());
EXPECT_EQ(nullptr, cache->get(hash3).blob)
<< "Inserting blob5 should evict oldest (blob3)";
EXPECT_EQ(blob4, cache->get(hash4).blob) << "But blob4 still fits";
cache->insert(blob3); // evicts blob5
EXPECT_EQ(7, cache->getTotalSize());
EXPECT_EQ(nullptr, cache->get(hash5).blob)
<< "Inserting blob3 again evicts blob5 because blob4 was accessed";
EXPECT_EQ(blob4, cache->get(hash4).blob);
}
TEST(BlobCache, inserting_large_blob_evicts_multiple_small_blobs) {
auto cache = BlobCache::create(10, 0);
cache->insert(blob3);
cache->insert(blob4);
cache->insert(blob9);
EXPECT_FALSE(cache->get(hash3).blob);
EXPECT_FALSE(cache->get(hash4).blob);
EXPECT_EQ(blob9, cache->get(hash9).blob);
}
TEST(BlobCache, inserting_existing_blob_moves_it_to_back_of_eviction_queue) {
auto cache = BlobCache::create(8, 0);
cache->insert(blob3);
cache->insert(blob4);
cache->insert(blob3);
cache->insert(blob5); // evicts 4
EXPECT_EQ(blob3, cache->get(hash3).blob);
EXPECT_FALSE(cache->get(hash4).blob);
EXPECT_EQ(blob5, cache->get(hash5).blob);
}
TEST(
BlobCache,
preserves_minimum_number_of_entries_despite_exceeding_size_limit) {
auto cache = BlobCache::create(1, 3);
cache->insert(blob3);
cache->insert(blob4);
cache->insert(blob5);
EXPECT_EQ(12, cache->getTotalSize());
EXPECT_TRUE(cache->get(hash3).blob);
EXPECT_TRUE(cache->get(hash4).blob);
EXPECT_TRUE(cache->get(hash5).blob);
}
TEST(BlobCache, preserves_minimum_number_of_entries) {
auto cache = BlobCache::create(1, 3);
cache->insert(blob3);
cache->insert(blob4);
cache->insert(blob5);
cache->insert(blob6);
EXPECT_EQ(15, cache->getTotalSize());
EXPECT_FALSE(cache->get(hash3).blob);
EXPECT_TRUE(cache->get(hash4).blob);
EXPECT_TRUE(cache->get(hash5).blob);
EXPECT_TRUE(cache->get(hash6).blob);
}
TEST(BlobCache, can_forget_cached_entries) {
auto cache = BlobCache::create(100, 0);
auto handle3 = cache->insert(
std::make_shared<Blob>(hash3, "blob3"_sp),
BlobCache::Interest::WantHandle);
auto handle4 = cache->insert(
std::make_shared<Blob>(hash4, "blob4"_sp),
BlobCache::Interest::WantHandle);
// The use of WantHandle causes these reset() calls to evict from the cache.
handle3.reset();
handle4.reset();
EXPECT_FALSE(cache->get(hash3).blob);
EXPECT_FALSE(cache->get(hash4).blob);
}
TEST(BlobCache, can_forget_cached_entries_in_reverse_insertion_order) {
auto cache = BlobCache::create(100, 0);
auto handle3 = cache->insert(
std::make_shared<Blob>(hash3, "blob3"_sp),
BlobCache::Interest::WantHandle);
auto handle4 = cache->insert(
std::make_shared<Blob>(hash4, "blob4"_sp),
BlobCache::Interest::WantHandle);
handle4.reset();
handle3.reset();
EXPECT_FALSE(cache->get(hash3).blob);
EXPECT_FALSE(cache->get(hash4).blob);
}
TEST(BlobCache, can_forget_cached_entry_in_middle) {
auto cache = BlobCache::create(100, 0);
auto handle3 = cache->insert(
std::make_shared<Blob>(hash3, "blob3"_sp),
BlobCache::Interest::WantHandle);
auto handle4 = cache->insert(
std::make_shared<Blob>(hash4, "blob4"_sp),
BlobCache::Interest::WantHandle);
auto handle5 = cache->insert(
std::make_shared<Blob>(hash5, "blob5"_sp),
BlobCache::Interest::WantHandle);
handle4.reset();
EXPECT_TRUE(cache->get(hash3).blob);
EXPECT_FALSE(cache->get(hash4).blob);
EXPECT_TRUE(cache->get(hash5).blob);
}
TEST(BlobCache, duplicate_insertion_with_interest_forgets_on_last_drop) {
auto cache = BlobCache::create(100, 0);
auto blob = std::make_shared<Blob>(hash3, "blob"_sp);
auto weak = std::weak_ptr{blob};
auto handle1 = cache->insert(blob, BlobCache::Interest::WantHandle);
auto handle2 = cache->insert(blob, BlobCache::Interest::WantHandle);
blob.reset();
EXPECT_TRUE(weak.lock());
handle1.reset();
EXPECT_TRUE(weak.lock());
handle2.reset();
EXPECT_FALSE(weak.lock());
}
TEST(BlobCache, does_not_forget_blob_until_last_handle_is_forgotten) {
auto cache = BlobCache::create(100, 0);
cache->insert(
std::make_shared<Blob>(hash6, "newblob"_sp),
BlobCache::Interest::UnlikelyNeededAgain);
auto result1 = cache->get(hash6, BlobCache::Interest::WantHandle);
auto result2 = cache->get(hash6, BlobCache::Interest::WantHandle);
EXPECT_TRUE(result1.blob);
EXPECT_TRUE(result2.blob);
EXPECT_EQ(result1.blob, result2.blob);
auto weak = std::weak_ptr{result1.blob};
result1.blob.reset();
result2.blob.reset();
EXPECT_TRUE(weak.lock());
result1.interestHandle.reset();
EXPECT_TRUE(weak.lock());
result2.interestHandle.reset();
EXPECT_FALSE(weak.lock());
}
TEST(BlobCache, redundant_inserts_are_ignored) {
auto cache = BlobCache::create(10, 0);
auto blob = std::make_shared<Blob>(Hash{}, "not ready"_sp);
cache->insert(blob);
EXPECT_EQ(9, cache->getTotalSize());
cache->insert(blob);
EXPECT_EQ(9, cache->getTotalSize());
cache->insert(blob);
EXPECT_EQ(9, cache->getTotalSize());
}
TEST(
BlobCache,
fetching_blob_from_interest_handle_moves_to_back_of_eviction_queue) {
auto cache = BlobCache::create(10, 0);
auto handle3 = cache->insert(
std::make_shared<Blob>(hash3, "333"_sp), BlobCache::Interest::WantHandle);
auto handle4 = cache->insert(
std::make_shared<Blob>(hash4, "444"_sp), BlobCache::Interest::WantHandle);
// Normally, inserting blob5 would cause blob3 to get evicted since it was
// the first one inserted. Access blob3 through its interest handle.
EXPECT_TRUE(handle3.getBlob());
cache->insert(blob5);
EXPECT_TRUE(handle3.getBlob());
EXPECT_EQ(nullptr, handle4.getBlob());
}
TEST(BlobCache, interest_handle_can_return_blob_even_if_it_was_evicted) {
auto cache = BlobCache::create(10, 0);
// Insert multiple blobs that are never collected. Also, don't ask for scoped
// interest.
auto handle3 = cache->insert(blob3);
auto handle4 = cache->insert(blob4);
auto handle5 = cache->insert(blob5);
EXPECT_FALSE(cache->get(hash3).blob) << "Inserting blob5 evicts blob3";
EXPECT_EQ(blob3, handle3.getBlob())
<< "Blob accessible even though it's been evicted";
EXPECT_EQ(blob4, handle4.getBlob());
EXPECT_EQ(blob5, handle5.getBlob());
}