prjfs: make readdir more asynchronous

Summary:
The makeImmediateFuture is merely a glorified try/catch, and thus the entirety
of the readdir callback would run immediately, and would block on every size
sequentially. This is inefficient and may cause files to be fetched one at a time.

By reworking the inner logic, we can collect all the sizes beforehand prior to
servicing the callback, allowing for sizes to be fetched concurrently, and for
completing the callback asynchronously.

Reviewed By: chadaustin

Differential Revision: D31782916

fbshipit-source-id: d6315347492e969ffa79037dc2a4f275f4b95a8d
This commit is contained in:
Xavier Deguillard 2021-11-05 12:54:45 -07:00 committed by Facebook GitHub Bot
parent 6dd3693628
commit 87ef50135e
11 changed files with 189 additions and 134 deletions

View File

@ -86,7 +86,6 @@ class ServerState;
class Tree;
class TreePrefetchLease;
class UnboundedQueueExecutor;
struct FileMetadata;
template <typename T>
class ImmediateFuture;
class TreeEntry;

View File

@ -55,7 +55,7 @@ PrjfsDispatcherImpl::PrjfsDispatcherImpl(EdenMount* mount)
mount_{mount},
dotEdenConfig_{makeDotEdenConfig(*mount)} {}
ImmediateFuture<std::vector<FileMetadata>> PrjfsDispatcherImpl::opendir(
ImmediateFuture<std::vector<PrjfsDirEntry>> PrjfsDispatcherImpl::opendir(
RelativePath path,
ObjectFetchContext& context) {
return mount_->getInode(path, context).thenValue([](const InodePtr inode) {

View File

@ -17,7 +17,7 @@ class PrjfsDispatcherImpl : public PrjfsDispatcher {
public:
explicit PrjfsDispatcherImpl(EdenMount* mount);
ImmediateFuture<std::vector<FileMetadata>> opendir(
ImmediateFuture<std::vector<PrjfsDirEntry>> opendir(
RelativePath path,
ObjectFetchContext& context) override;

View File

@ -1946,8 +1946,8 @@ std::tuple<NfsDirList, bool> TreeInode::nfsReaddir(
#else
std::vector<FileMetadata> TreeInode::readdir() {
vector<FileMetadata> ret;
std::vector<PrjfsDirEntry> TreeInode::readdir() {
vector<PrjfsDirEntry> ret;
auto dir = contents_.rlock();
auto& entries = dir->entries;
@ -1955,7 +1955,6 @@ std::vector<FileMetadata> TreeInode::readdir() {
for (auto& [name, entry] : entries) {
auto isDir = entry.getDtype() == dtype_t::Dir;
auto winName = name.wide();
if (!isDir) {
// We only populates the file size for non-materialized files. For
@ -1966,14 +1965,14 @@ std::vector<FileMetadata> TreeInode::readdir() {
ObjectFetchContext::getNullContextWithCauseDetail(
"TreeInode::readdir");
ret.emplace_back(
std::move(winName),
false,
name,
isDir,
getMount()->getObjectStore()->getBlobSize(hash.value(), *context));
continue;
}
}
ret.emplace_back(std::move(winName), isDir, ImmediateFuture<uint64_t>(0));
ret.emplace_back(name, isDir, ImmediateFuture<uint64_t>(0));
}
return ret;

View File

@ -34,7 +34,7 @@ class RenameLock;
class Tree;
class TreeEntry;
class TreeInodeDebugInfo;
struct FileMetadata;
class PrjfsDirEntry;
constexpr folly::StringPiece kDotEdenName{".eden"};
@ -178,7 +178,7 @@ class TreeInode final : public InodeBaseMetadata<DirContents> {
* way to get it, so in this function as an optimization we don't populate the
* size of materialized files.
*/
FOLLY_NODISCARD std::vector<FileMetadata> readdir();
FOLLY_NODISCARD std::vector<PrjfsDirEntry> readdir();
#endif
const folly::Synchronized<TreeInodeState>& getContents() const {

View File

@ -94,8 +94,8 @@ TEST(TreeInode, readdirTest) {
auto result = root->readdir();
ASSERT_EQ(2, result.size());
EXPECT_EQ(L".eden", result[0].name);
EXPECT_EQ(L"file", result[1].name);
EXPECT_EQ(L".eden", result[0].getName());
EXPECT_EQ(L"file", result[1].getName());
}
TEST(TreeInode, updateAndReaddir) {
@ -110,18 +110,18 @@ TEST(TreeInode, updateAndReaddir) {
auto result = somedir->readdir();
ASSERT_EQ(3, result.size());
EXPECT_EQ(L"file1", result[0].name);
EXPECT_EQ(L"file2", result[1].name);
EXPECT_EQ(L"file3", result[2].name);
EXPECT_EQ(L"file1", result[0].getName());
EXPECT_EQ(L"file2", result[1].getName());
EXPECT_EQ(L"file3", result[2].getName());
auto resultInode =
somedir->mknod("newfile.txt"_pc, S_IFREG, 0, InvalidationRequired::No);
result = somedir->readdir();
ASSERT_EQ(4, result.size());
EXPECT_EQ(L"file1", result[0].name);
EXPECT_EQ(L"file2", result[1].name);
EXPECT_EQ(L"file3", result[2].name);
EXPECT_EQ(L"newfile.txt", result[3].name);
EXPECT_EQ(L"file1", result[0].getName());
EXPECT_EQ(L"file2", result[1].getName());
EXPECT_EQ(L"file3", result[2].getName());
EXPECT_EQ(L"newfile.txt", result[3].getName());
somedir
->unlink(
@ -131,9 +131,9 @@ TEST(TreeInode, updateAndReaddir) {
.get(0ms);
result = somedir->readdir();
ASSERT_EQ(3, result.size());
EXPECT_EQ(L"file1", result[0].name);
EXPECT_EQ(L"file3", result[1].name);
EXPECT_EQ(L"newfile.txt", result[2].name);
EXPECT_EQ(L"file1", result[0].getName());
EXPECT_EQ(L"file3", result[1].getName());
EXPECT_EQ(L"newfile.txt", result[2].getName());
somedir
->rename(
@ -145,9 +145,9 @@ TEST(TreeInode, updateAndReaddir) {
.get(0ms);
result = somedir->readdir();
ASSERT_EQ(3, result.size());
EXPECT_EQ(L"file1", result[0].name);
EXPECT_EQ(L"newfile.txt", result[1].name);
EXPECT_EQ(L"renamedfile.txt", result[2].name);
EXPECT_EQ(L"file1", result[0].getName());
EXPECT_EQ(L"newfile.txt", result[1].getName());
EXPECT_EQ(L"renamedfile.txt", result[2].getName());
}
#else

View File

@ -7,42 +7,71 @@
#ifdef _WIN32
#include "folly/portability/Windows.h"
#include <ProjectedFSLib.h> // @manual
#include "eden/fs/prjfs/Enumerator.h"
namespace facebook {
namespace eden {
#include <ProjectedFSLib.h> // @manual
#include <folly/executors/GlobalExecutor.h>
#include <folly/portability/Windows.h>
Enumerator::Enumerator(std::vector<FileMetadata>&& entryList)
: metadataList_(std::move(entryList)) {
std::sort(
metadataList_.begin(),
metadataList_.end(),
[](const FileMetadata& first, const FileMetadata& second) -> bool {
return (
PrjFileNameCompare(first.name.c_str(), second.name.c_str()) < 0);
namespace facebook::eden {
PrjfsDirEntry::PrjfsDirEntry(
PathComponentPiece name,
bool isDir,
ImmediateFuture<uint64_t> sizeFuture)
: name_{name.wide()},
// In the case where the future isn't ready yet, we want to start
// driving it immediately, thus convert it to a Future.
sizeFuture_{
std::move(sizeFuture).semi().via(folly::getGlobalCPUExecutor())},
isDir_{isDir} {}
bool PrjfsDirEntry::matchPattern(const std::wstring& pattern) const {
return PrjFileNameMatch(name_.c_str(), pattern.c_str());
}
ImmediateFuture<PrjfsDirEntry::Ready> PrjfsDirEntry::getFuture() {
return ImmediateFuture{sizeFuture_.getSemiFuture()}.thenValue(
[name = name_, isDir = isDir_](uint64_t size) {
return Ready{std::move(name), size, isDir};
});
}
FileMetadata* Enumerator::current() {
for (; listIndex_ < metadataList_.size(); listIndex_++) {
if (PrjFileNameMatch(
metadataList_[listIndex_].name.c_str(),
searchExpression_.c_str())) {
//
// Don't increment the index here because we don't know if the caller
// would be able to use this. The caller should instead call advance() on
// success.
//
return &metadataList_[listIndex_];
}
}
return nullptr;
bool PrjfsDirEntry::operator<(const PrjfsDirEntry& other) const {
return PrjFileNameCompare(name_.c_str(), other.name_.c_str()) < 0;
}
} // namespace eden
} // namespace facebook
Enumerator::Enumerator(std::vector<PrjfsDirEntry>&& entryList)
: metadataList_(std::move(entryList)), iter_{metadataList_.begin()} {
std::sort(
metadataList_.begin(),
metadataList_.end(),
[](const PrjfsDirEntry& first, const PrjfsDirEntry& second) -> bool {
return first < second;
});
}
void Enumerator::advanceEnumeration() {
XDCHECK_NE(iter_, metadataList_.end());
++iter_;
while (iter_ != metadataList_.end() &&
!iter_->matchPattern(searchExpression_)) {
++iter_;
}
}
std::vector<ImmediateFuture<PrjfsDirEntry::Ready>>
Enumerator::getPendingDirEntries() {
std::vector<ImmediateFuture<PrjfsDirEntry::Ready>> ret;
for (auto it = iter_; it != metadataList_.end(); it++) {
if (it->matchPattern(searchExpression_)) {
ret.push_back(it->getFuture());
}
}
return ret;
}
} // namespace facebook::eden
#endif

View File

@ -13,42 +13,61 @@
#include <vector>
#include "eden/fs/model/Hash.h"
#include "eden/fs/utils/ImmediateFuture.h"
#include "eden/fs/utils/PathFuncs.h"
namespace facebook {
namespace eden {
struct FileMetadata {
//
// File name : final component
//
std::wstring name;
class PrjfsDirEntry {
public:
PrjfsDirEntry() = delete;
//
// isDirectory will be set only for the directories
// For files it will be ignored
//
bool isDirectory{false};
PrjfsDirEntry(
PathComponentPiece name,
bool isDir,
ImmediateFuture<uint64_t> sizeFuture);
folly::Future<uint64_t> getSize() {
return sizeFuture_.getFuture();
/**
* An entry whose size future has been resolved.
*/
struct Ready {
/** Name of the directory entry. */
std::wstring name;
/** Size of the file, 0 for a directory. */
uint64_t size;
/** Whether this entry is a directory. */
bool isDir;
};
/**
* Test whether this entry matches the given pattern.
*/
bool matchPattern(const std::wstring& pattern) const;
/**
* Return a future that completes when the size of this entry becomes
* available.
*/
ImmediateFuture<Ready> getFuture();
/**
* Do a lexicographical comparison of the entry.
*
* Return true if this entry is lexicographically before the other.
*/
bool operator<(const PrjfsDirEntry& other) const;
/**
* Return the name of this entry.
*/
const std::wstring& getName() const {
return name_;
}
FileMetadata(
std::wstring&& name,
bool isDir,
ImmediateFuture<uint64_t> sizeFuture)
: name(std::move(name)),
isDirectory(isDir),
// In the case where the future isn't ready yet, we want to start
// driving it immediately, thus convert it to a Future.
sizeFuture_(std::move(sizeFuture)
.semi()
.via(&folly::QueuedImmediateExecutor::instance())) {}
FileMetadata() = delete;
private:
std::wstring name_;
folly::FutureSplitter<uint64_t> sizeFuture_;
bool isDir_;
};
class Enumerator {
@ -56,23 +75,17 @@ class Enumerator {
Enumerator(const Enumerator&) = delete;
Enumerator& operator=(const Enumerator&) = delete;
Enumerator(std::vector<FileMetadata>&& entryList);
Enumerator(Enumerator&& other) noexcept
: searchExpression_(std::move(other.searchExpression_)),
metadataList_(std::move(other.metadataList_)),
listIndex_(std::move(other.listIndex_)) {}
explicit Enumerator(std::vector<PrjfsDirEntry>&& entryList);
Enumerator(Enumerator&& other) = default;
explicit Enumerator() = delete;
FileMetadata* current();
std::vector<ImmediateFuture<PrjfsDirEntry::Ready>> getPendingDirEntries();
void advance() {
++listIndex_;
}
void advanceEnumeration();
void restart() {
listIndex_ = 0;
void restartEnumeration() {
iter_ = metadataList_.begin();
}
bool isSearchExpressionEmpty() const {
@ -85,13 +98,12 @@ class Enumerator {
private:
std::wstring searchExpression_;
std::vector<FileMetadata> metadataList_;
std::vector<PrjfsDirEntry> metadataList_;
//
// use the listIndex_ to return entries when the enumeration is done over
// multiple calls
//
size_t listIndex_ = 0;
/**
* Iterator on the first directory entry that didn't get send to ProjectedFS.
*/
std::vector<PrjfsDirEntry>::iterator iter_;
};
} // namespace eden
} // namespace facebook

View File

@ -242,7 +242,7 @@ HRESULT PrjfsChannelInner::getEnumerationData(
}
if (shouldRestart) {
enumerator->restart();
enumerator->restartEnumeration();
}
auto fut = makeImmediateFutureWith([this,
@ -254,42 +254,58 @@ HRESULT PrjfsChannelInner::getEnumerationData(
auto stat = &ChannelThreadStats::readDir;
context->startRequest(dispatcher_->getStats(), stat, requestWatch);
bool added = false;
for (FileMetadata* entry; (entry = enumerator->current());
enumerator->advance()) {
auto fileInfo = PRJ_FILE_BASIC_INFO();
// TODO(xavierd): there is a potential quadratic cost to the following code
// in the case where the buffer can only hold a single entry. The linear
// getPendingDirEntries would thus be called for as many entries, causing
// the quadratic complexity. In practice, ProjectedFS doesn't do this and
// thus we can afford a bit of redundant work.
auto pendingDirEntries = enumerator->getPendingDirEntries();
return collectAll(std::move(pendingDirEntries))
.thenValue([enumerator = std::move(enumerator),
buffer,
context = std::move(context)](
std::vector<folly::Try<PrjfsDirEntry::Ready>> entries) {
bool added = false;
for (auto& try_ : entries) {
if (try_.hasException()) {
return folly::Try<folly::Unit>{try_.exception()};
}
auto& entry = try_.value();
fileInfo.IsDirectory = entry->isDirectory;
fileInfo.FileSize = entry->getSize().get();
auto fileInfo = PRJ_FILE_BASIC_INFO();
fileInfo.IsDirectory = entry.isDir;
fileInfo.FileSize = entry.size;
XLOGF(
DBG6,
"Directory entry: {}, {}, size={}",
fileInfo.IsDirectory ? "Dir" : "File",
PathComponent(entry->name),
fileInfo.FileSize);
XLOGF(
DBG6,
"Directory entry: {}, {}, size={}",
fileInfo.IsDirectory ? "Dir" : "File",
PathComponent(entry.name),
fileInfo.FileSize);
auto result =
PrjFillDirEntryBuffer(entry->name.c_str(), &fileInfo, buffer);
if (FAILED(result)) {
if (result == HRESULT_FROM_WIN32(ERROR_INSUFFICIENT_BUFFER) && added) {
// We are out of buffer space. This entry didn't make it. Return
// without increment.
break;
} else {
return folly::Try<folly::Unit>{makeHResultErrorExplicit(
result,
fmt::format(
FMT_STRING("Adding directory entry {}"),
PathComponent(entry->name)))};
}
}
auto result =
PrjFillDirEntryBuffer(entry.name.c_str(), &fileInfo, buffer);
if (FAILED(result)) {
if (result == HRESULT_FROM_WIN32(ERROR_INSUFFICIENT_BUFFER) &&
added) {
// We are out of buffer space. This entry didn't make it. Return
// without increment.
break;
} else {
return folly::Try<folly::Unit>{makeHResultErrorExplicit(
result,
fmt::format(
FMT_STRING("Adding directory entry {}"),
PathComponent(entry.name)))};
}
}
added = true;
enumerator->advanceEnumeration();
}
added = true;
}
context->sendEnumerationSuccess(buffer);
return folly::Try{folly::unit};
context->sendEnumerationSuccess(buffer);
return folly::Try{folly::unit};
});
});
context->catchErrors(std::move(fut))

View File

@ -204,7 +204,7 @@ class PrjfsChannelInner {
return *straceLogger_;
}
void addDirectoryEnumeration(Guid guid, std::vector<FileMetadata> dirents) {
void addDirectoryEnumeration(Guid guid, std::vector<PrjfsDirEntry> dirents) {
auto [iterator, inserted] = enumSessions_.wlock()->emplace(
std::move(guid), std::make_shared<Enumerator>(std::move(dirents)));
XDCHECK(inserted);

View File

@ -45,7 +45,7 @@ class PrjfsDispatcher {
/**
* Open a directory
*/
virtual ImmediateFuture<std::vector<FileMetadata>> opendir(
virtual ImmediateFuture<std::vector<PrjfsDirEntry>> opendir(
RelativePath path,
ObjectFetchContext& context) = 0;