sapling/eden/fs/store/ObjectStore.cpp
Katie Mancini be0cd8da1e enable skipping Metadata prefetches during eden prefetches
Summary:
This is the plumbing to allow us to skip Metadata prefetching during eden
prefetches. These can trigger  a bunch of wasted network requests
when we are fetching files anyways. (These network requests are wasted since we
fetch the file contents and most of them are being throttled on sandcastle anyways.)

We won't necessarily want to skip metadata prefetching always, we will still want it
for the watchman queries, but for `eden prefetch` will probably want to skip it. This
is why we are making it an option in the GlobParams.

Reviewed By: chadaustin

Differential Revision: D24640754

fbshipit-source-id: 20db62d4c0e59fe17cb6535c86ac8f1e3877879c
2020-11-11 16:30:02 -08:00

372 lines
14 KiB
C++

/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#include "ObjectStore.h"
#include <folly/Conv.h>
#include <folly/Executor.h>
#include <folly/Format.h>
#include <folly/futures/Future.h>
#include <folly/io/IOBuf.h>
#include <stdexcept>
#include "eden/fs/model/Blob.h"
#include "eden/fs/model/Tree.h"
#include "eden/fs/store/BackingStore.h"
#include "eden/fs/store/LocalStore.h"
#include "eden/fs/store/ObjectFetchContext.h"
#include "eden/fs/telemetry/EdenStats.h"
using folly::Future;
using folly::makeFuture;
using std::shared_ptr;
using std::string;
using std::unique_ptr;
namespace facebook {
namespace eden {
std::shared_ptr<ObjectStore> ObjectStore::create(
shared_ptr<LocalStore> localStore,
shared_ptr<BackingStore> backingStore,
shared_ptr<EdenStats> stats,
folly::Executor::KeepAlive<folly::Executor> executor,
std::shared_ptr<ProcessNameCache> processNameCache,
std::shared_ptr<StructuredLogger> structuredLogger,
std::shared_ptr<const EdenConfig> edenConfig) {
return std::shared_ptr<ObjectStore>{new ObjectStore{std::move(localStore),
std::move(backingStore),
std::move(stats),
executor,
processNameCache,
structuredLogger,
edenConfig}};
}
ObjectStore::ObjectStore(
shared_ptr<LocalStore> localStore,
shared_ptr<BackingStore> backingStore,
shared_ptr<EdenStats> stats,
folly::Executor::KeepAlive<folly::Executor> executor,
std::shared_ptr<ProcessNameCache> processNameCache,
std::shared_ptr<StructuredLogger> structuredLogger,
std::shared_ptr<const EdenConfig> edenConfig)
: metadataCache_{folly::in_place, kCacheSize},
localStore_{std::move(localStore)},
backingStore_{std::move(backingStore)},
stats_{std::move(stats)},
executor_{executor},
pidFetchCounts_{std::make_unique<PidFetchCounts>()},
processNameCache_(processNameCache),
structuredLogger_(structuredLogger),
edenConfig_(edenConfig) {}
ObjectStore::~ObjectStore() {}
void ObjectStore::updateProcessFetch(
const ObjectFetchContext& fetchContext) const {
if (auto pid = fetchContext.getClientPid()) {
auto fetch_count = pidFetchCounts_->recordProcessFetch(pid.value());
auto threshold = edenConfig_->fetchHeavyThreshold.getValue();
if (fetch_count && threshold && !(fetch_count % threshold)) {
sendFetchHeavyEvent(pid.value(), fetch_count);
}
}
}
void ObjectStore::sendFetchHeavyEvent(pid_t pid, uint64_t fetch_count) const {
auto processName = processNameCache_->getSpacedProcessName(pid);
if (processName.has_value()) {
structuredLogger_->logEvent(
FetchHeavy{processName.value(), pid, fetch_count});
}
}
void ObjectStore::deprioritizeWhenFetchHeavy(
ObjectFetchContext& context) const {
auto pid = context.getClientPid();
if (pid.has_value()) {
auto fetch_count = pidFetchCounts_->getCountByPid(pid.value());
auto threshold = edenConfig_->fetchHeavyThreshold.getValue();
if (threshold && fetch_count >= threshold) {
context.deprioritize(importPriorityDeprioritizeAmount);
}
}
}
Future<shared_ptr<const Tree>> ObjectStore::getTree(
const Hash& id,
ObjectFetchContext& fetchContext) const {
// Check in the LocalStore first
return localStore_->getTree(id).thenValue([self = shared_from_this(),
id,
&fetchContext](
shared_ptr<const Tree> tree) {
if (tree) {
XLOG(DBG4) << "tree " << id << " found in local store";
fetchContext.didFetch(
ObjectFetchContext::Tree, id, ObjectFetchContext::FromDiskCache);
self->updateProcessFetch(fetchContext);
return makeFuture(std::move(tree));
}
self->deprioritizeWhenFetchHeavy(fetchContext);
// Note: We don't currently have logic here to avoid duplicate work if
// multiple callers request the same tree at once. We could store a map
// of pending lookups as (Hash --> std::list<Promise<unique_ptr<Tree>>),
// and just add a new Promise to the list if this Hash already exists in
// the pending list.
//
// However, de-duplication of object loads will already be done at the
// Inode layer. Therefore we currently don't bother de-duping loads at
// this layer.
// Load the tree from the BackingStore.
return self->backingStore_->getTree(id, fetchContext)
.via(self->executor_)
.thenValue([self, id, &fetchContext, localStore = self->localStore_](
unique_ptr<const Tree> loadedTree) {
if (!loadedTree) {
// TODO: Perhaps we should do some short-term negative
// caching?
XLOG(DBG2) << "unable to find tree " << id;
throw std::domain_error(
folly::to<string>("tree ", id.toString(), " not found"));
}
localStore->putTree(loadedTree.get());
XLOG(DBG3) << "tree " << id << " retrieved from backing store";
fetchContext.didFetch(
ObjectFetchContext::Tree,
id,
ObjectFetchContext::FromBackingStore);
self->updateProcessFetch(fetchContext);
return shared_ptr<const Tree>(std::move(loadedTree));
});
});
}
Future<shared_ptr<const Tree>> ObjectStore::getTreeForCommit(
const Hash& commitID,
ObjectFetchContext& context) const {
XLOG(DBG3) << "getTreeForCommit(" << commitID << ")";
return backingStore_->getTreeForCommit(commitID, context)
.via(executor_)
.thenValue([commitID,
localStore = localStore_](std::shared_ptr<const Tree> tree) {
if (!tree) {
throw std::domain_error(folly::to<string>(
"unable to import commit ", commitID.toString()));
}
localStore->putTree(tree.get());
return tree;
});
}
Future<shared_ptr<const Tree>> ObjectStore::getTreeForManifest(
const Hash& commitID,
const Hash& manifestID,
ObjectFetchContext& context) const {
XLOG(DBG3) << "getTreeForManifest(" << commitID << ", " << manifestID << ")";
return backingStore_->getTreeForManifest(commitID, manifestID, context)
.via(executor_)
.thenValue([commitID, manifestID, localStore = localStore_](
std::shared_ptr<const Tree> tree) {
if (!tree) {
throw std::domain_error(folly::to<string>(
"unable to import commit ",
commitID.toString(),
" with manifest node ",
manifestID.toString()));
}
localStore->putTree(tree.get());
return tree;
});
}
folly::Future<folly::Unit> ObjectStore::prefetchBlobs(
const std::vector<Hash>& ids,
ObjectFetchContext& fetchContext) const {
// In theory we could/should ask the localStore_ to filter the list
// of ids down to just the set that we need to load, but there is no
// bulk key existence check in rocksdb, so we would need to cause it
// to load all the blocks of those keys into memory.
// So for the moment we are committing a layering violation in the
// interest of making things faster in practice by just asking the
// mercurial backing store to ensure that its local hgcache storage
// has entries for all of the requested keys.
if (ids.empty()) {
return folly::unit;
}
return backingStore_->prefetchBlobs(ids, fetchContext).via(executor_);
}
Future<shared_ptr<const Blob>> ObjectStore::getBlob(
const Hash& id,
ObjectFetchContext& fetchContext) const {
auto self = shared_from_this();
return localStore_->getBlob(id).thenValue([id, &fetchContext, self](
shared_ptr<const Blob> blob) {
if (blob) {
// Not computing the BlobMetadata here because if the blob was found
// in the local store, the LocalStore probably also has the metadata
// already, and the caller may not even need the SHA-1 here. (If the
// caller needed the SHA-1, they would have called getBlobMetadata
// instead.)
XLOG(DBG4) << "blob " << id << " found in local store";
self->updateBlobStats(true, false);
fetchContext.didFetch(
ObjectFetchContext::Blob, id, ObjectFetchContext::FromDiskCache);
self->updateProcessFetch(fetchContext);
return makeFuture(shared_ptr<const Blob>(std::move(blob)));
}
self->deprioritizeWhenFetchHeavy(fetchContext);
// Look in the BackingStore
return self->backingStore_->getBlob(id, fetchContext)
.via(self->executor_)
.thenValue([self, &fetchContext, id](
unique_ptr<const Blob> loadedBlob) {
if (loadedBlob) {
XLOG(DBG3) << "blob " << id << " retrieved from backing store";
self->updateBlobStats(false, true);
fetchContext.didFetch(
ObjectFetchContext::Blob,
id,
ObjectFetchContext::FromBackingStore);
self->updateProcessFetch(fetchContext);
auto metadata = self->localStore_->putBlob(id, loadedBlob.get());
self->metadataCache_.wlock()->set(id, metadata);
return shared_ptr<const Blob>(std::move(loadedBlob));
}
XLOG(DBG2) << "unable to find blob " << id;
self->updateBlobStats(false, false);
// TODO: Perhaps we should do some short-term negative caching?
throw std::domain_error(
folly::to<string>("blob ", id.toString(), " not found"));
});
});
}
void ObjectStore::updateBlobStats(bool local, bool backing) const {
ObjectStoreThreadStats& stats = stats_->getObjectStoreStatsForCurrentThread();
stats.getBlobFromLocalStore.addValue(local);
stats.getBlobFromBackingStore.addValue(backing);
}
Future<BlobMetadata> ObjectStore::getBlobMetadata(
const Hash& id,
ObjectFetchContext& context) const {
// Check in-memory cache
{
auto metadataCache = metadataCache_.wlock();
auto cacheIter = metadataCache->find(id);
if (cacheIter != metadataCache->end()) {
updateBlobMetadataStats(true, false, false);
context.didFetch(
ObjectFetchContext::BlobMetadata,
id,
ObjectFetchContext::FromMemoryCache);
updateProcessFetch(context);
return cacheIter->second;
}
}
auto self = shared_from_this();
// Check local store
return localStore_->getBlobMetadata(id).thenValue(
[self, id, &context](std::optional<BlobMetadata>&& metadata) {
if (metadata) {
self->updateBlobMetadataStats(false, true, false);
self->metadataCache_.wlock()->set(id, *metadata);
context.didFetch(
ObjectFetchContext::BlobMetadata,
id,
ObjectFetchContext::FromDiskCache);
self->updateProcessFetch(context);
return makeFuture(*metadata);
}
self->deprioritizeWhenFetchHeavy(context);
// Check backing store
//
// TODO: It would be nice to add a smarter API to the BackingStore so
// that we can query it just for the blob metadata if it supports
// getting that without retrieving the full blob data.
//
// TODO: This should probably check the LocalStore for the blob first,
// especially when we begin to expire entries in RocksDB.
return self->backingStore_->getBlob(id, context)
.via(self->executor_)
.thenValue([self, id, &context](std::unique_ptr<Blob> blob) {
if (blob) {
self->updateBlobMetadataStats(false, false, true);
auto metadata = self->localStore_->putBlob(id, blob.get());
self->metadataCache_.wlock()->set(id, metadata);
// I could see an argument for recording this fetch with
// type Blob instead of BlobMetadata, but it's probably more
// useful in context to know how many metadata fetches
// occurred. Also, since backing stores don't directly
// support fetching metadata, it should be clear.
context.didFetch(
ObjectFetchContext::BlobMetadata,
id,
ObjectFetchContext::FromBackingStore);
self->updateProcessFetch(context);
return makeFuture(metadata);
}
self->updateBlobMetadataStats(false, false, false);
throw std::domain_error(
folly::to<string>("blob ", id.toString(), " not found"));
});
});
}
void ObjectStore::updateBlobMetadataStats(bool memory, bool local, bool backing)
const {
ObjectStoreThreadStats& stats = stats_->getObjectStoreStatsForCurrentThread();
stats.getBlobMetadataFromMemory.addValue(memory);
stats.getBlobMetadataFromLocalStore.addValue(local);
stats.getBlobMetadataFromBackingStore.addValue(backing);
}
Future<Hash> ObjectStore::getBlobSha1(
const Hash& id,
ObjectFetchContext& context) const {
return getBlobMetadata(id, context)
.thenValue([](const BlobMetadata& metadata) { return metadata.sha1; });
}
Future<uint64_t> ObjectStore::getBlobSize(
const Hash& id,
ObjectFetchContext& context) const {
return getBlobMetadata(id, context)
.thenValue([](const BlobMetadata& metadata) { return metadata.size; });
}
} // namespace eden
} // namespace facebook