eden: add batch loading interface for HgProxyHash

Summary: as above

Reviewed By: chadaustin

Differential Revision: D8065370

fbshipit-source-id: 08462c5bfb71aa969ee70f5c27c652e5baea6354
This commit is contained in:
Wez Furlong 2018-05-25 13:47:54 -07:00 committed by Facebook Github Bot
parent 8be54b4a1b
commit 85539ca950
5 changed files with 130 additions and 9 deletions

View File

@ -103,6 +103,18 @@ folly::Future<StoreResult> LocalStore::getFuture(
[keySpace, key, this] { return get(keySpace, key); });
}
folly::Future<std::vector<StoreResult>> LocalStore::getBatch(
KeySpace keySpace,
const std::vector<folly::ByteRange>& keys) const {
return folly::makeFutureWith([keySpace, keys, this] {
std::vector<StoreResult> results;
for (auto& key : keys) {
results.emplace_back(get(keySpace, key));
}
return results;
});
}
// TODO(mbolin): Currently, all objects in our RocksDB are Git objects. We
// probably want to namespace these by column family going forward, at which
// point we might want to have a GitLocalStore that delegates to an

View File

@ -87,6 +87,10 @@ class LocalStore {
KeySpace keySpace,
folly::ByteRange key) const;
FOLLY_NODISCARD virtual folly::Future<std::vector<StoreResult>> getBatch(
KeySpace keySpace,
const std::vector<folly::ByteRange>& keys) const;
/**
* Get a Tree from the store.
*

View File

@ -252,6 +252,78 @@ FOLLY_NODISCARD folly::Future<StoreResult> RocksDbLocalStore::getFuture(
});
}
FOLLY_NODISCARD folly::Future<std::vector<StoreResult>>
RocksDbLocalStore::getBatch(
KeySpace keySpace,
const std::vector<folly::ByteRange>& keys) const {
std::vector<folly::Future<std::vector<StoreResult>>> futures;
std::vector<std::shared_ptr<std::vector<std::string>>> batches;
batches.emplace_back(std::make_shared<std::vector<std::string>>());
for (auto& key : keys) {
if (batches.back()->size() >= 2048) {
batches.emplace_back(std::make_shared<std::vector<std::string>>());
}
batches.back()->emplace_back(
reinterpret_cast<const char*>(key.data()), key.size());
}
for (auto& batch : batches) {
futures.emplace_back(
folly::via(&ioPool_, [this, keySpace, keys = std::move(batch)] {
std::vector<Slice> keySlices;
std::vector<std::string> values;
std::vector<rocksdb::ColumnFamilyHandle*> columns;
for (auto& key : *keys) {
keySlices.emplace_back(key);
columns.emplace_back(dbHandles_.columns[keySpace].get());
}
auto statuses = dbHandles_.db->MultiGet(
ReadOptions(), columns, keySlices, &values);
std::vector<StoreResult> results;
for (size_t i = 0; i < keys->size(); ++i) {
auto& status = statuses[i];
if (!status.ok()) {
if (status.IsNotFound()) {
// Return an empty StoreResult
results.emplace_back(); // StoreResult();
continue;
}
// TODO: RocksDB can return a "TryAgain" error.
// Should we try again for the user, rather than re-throwing the
// error?
// We don't use RocksException::check(), since we don't want to
// waste our time computing the hex string of the key if we
// succeeded.
throw RocksException::build(
status,
"failed to get ",
folly::hexlify(keys->at(i)),
" from local store");
}
results.emplace_back(std::move(values[i]));
}
return results;
}));
}
return folly::collect(futures).then(
[](std::vector<std::vector<StoreResult>>&& tries) {
std::vector<StoreResult> results;
for (auto& batch : tries) {
results.insert(
results.end(),
make_move_iterator(batch.begin()),
make_move_iterator(batch.end()));
}
return results;
});
}
bool RocksDbLocalStore::hasKey(
LocalStore::KeySpace keySpace,
folly::ByteRange key) const {

View File

@ -28,6 +28,9 @@ class RocksDbLocalStore : public LocalStore {
FOLLY_NODISCARD folly::Future<StoreResult> getFuture(
KeySpace keySpace,
folly::ByteRange key) const override;
FOLLY_NODISCARD folly::Future<std::vector<StoreResult>> getBatch(
KeySpace keySpace,
const std::vector<folly::ByteRange>& keys) const override;
bool hasKey(LocalStore::KeySpace keySpace, folly::ByteRange key)
const override;
void put(

View File

@ -16,6 +16,7 @@
#include <folly/container/Array.h>
#include <folly/dynamic.h>
#include <folly/experimental/EnvUtil.h>
#include <folly/futures/Future.h>
#include <folly/io/Cursor.h>
#include <folly/io/IOBuf.h>
#include <folly/json.h>
@ -152,6 +153,29 @@ struct HgProxyHash {
return revHash_;
}
static folly::Future<std::vector<std::pair<RelativePath, Hash>>> getBatch(
LocalStore* store,
const std::vector<Hash>& blobHashes) {
auto hashCopies = std::make_shared<std::vector<Hash>>(blobHashes);
std::vector<folly::ByteRange> byteRanges;
for (auto& hash : *hashCopies) {
byteRanges.push_back(hash.getBytes());
}
return store->getBatch(KeySpace::HgProxyHashFamily, byteRanges)
.then([blobHashes = hashCopies](std::vector<StoreResult>&& data) {
std::vector<std::pair<RelativePath, Hash>> results;
for (size_t i = 0; i < blobHashes->size(); ++i) {
HgProxyHash hgInfo(
blobHashes->at(i), data[i], "prefetchFiles getBatch");
results.emplace_back(hgInfo.path().copy(), hgInfo.revHash());
}
return results;
});
}
/**
* Store HgProxyHash data in the LocalStore.
*
@ -214,6 +238,20 @@ struct HgProxyHash {
HgProxyHash(HgProxyHash&&) = delete;
HgProxyHash& operator=(HgProxyHash&&) = delete;
HgProxyHash(
Hash edenBlobHash,
StoreResult& infoResult,
folly::StringPiece context) {
if (!infoResult.isValid()) {
XLOG(ERR) << "received unknown mercurial proxy hash "
<< edenBlobHash.toString() << " in " << context;
// Fall through and let infoResult.extractValue() throw
}
value_ = infoResult.extractValue();
parseValue(edenBlobHash);
}
/**
* Serialize the (path, hgRevHash) data into a buffer that will be stored in
* the LocalStore.
@ -810,15 +848,7 @@ IOBuf HgImporter::importFileContents(Hash blobHash) {
}
void HgImporter::prefetchFiles(const std::vector<Hash>& blobHashes) {
std::vector<std::pair<RelativePath, Hash>> files;
files.reserve(blobHashes.size());
for (auto& blobHash : blobHashes) {
// TODO: add batch lookup interface to HgProxyHash
HgProxyHash hgInfo(store_, blobHash, "prefetchFiles");
files.emplace_back(hgInfo.path().copy(), hgInfo.revHash());
}
auto files = HgProxyHash::getBatch(store_, blobHashes).get();
sendPrefetchFilesRequest(files);