nfs: implement the READDIR RPC

Summary:
The NFS readdir turns out to be pretty similar to the FUSE one, with a couple
of differences. For one, it only populates the directory entry name, it also
puts a limit on the total size of the serialized result, including all the
NFS/XDR overhead.

It is not specified if the . and .. entries need to be returned, but since the
NFS spec is usually pretty explicit about these and makes it clear that this is
for the most part a client burden, I didn't add these. I may have to revisit
this later when I get to manually browse a repository.

Since the READDIR RPC doesn't populate any filehandle, the client will have to
issue a LOOKUP RPC for each entries, potentially leading to some
inefficiencies. A future diff will implement the READDIRPLUS to fix these.

Reviewed By: chadaustin

Differential Revision: D26802310

fbshipit-source-id: b821b57021d0c2dca33427975b1acd665173bc5c
This commit is contained in:
Xavier Deguillard 2021-03-18 10:06:28 -07:00 committed by Facebook GitHub Bot
parent a7a8778dcf
commit ec5a6ef1f0
10 changed files with 306 additions and 18 deletions

View File

@ -223,6 +223,19 @@ folly::Future<NfsDispatcher::RenameRes> NfsDispatcherImpl::rename(
});
}
folly::Future<NfsDispatcher::ReaddirRes> NfsDispatcherImpl::readdir(
InodeNumber dir,
off_t offset,
uint32_t count,
ObjectFetchContext& context) {
return inodeMap_->lookupTreeInode(dir).thenValue(
[&context, offset, count](const TreeInodePtr& inode) {
auto [dirList, isEof] =
inode->nfsReaddir(NfsDirList{count}, offset, context);
return ReaddirRes{std::move(dirList), isEof};
});
}
folly::Future<struct statfs> NfsDispatcherImpl::statfs(
InodeNumber /*dir*/,
ObjectFetchContext& /*context*/) {

View File

@ -83,6 +83,12 @@ class NfsDispatcherImpl : public NfsDispatcher {
PathComponent toName,
ObjectFetchContext& context) override;
folly::Future<NfsDispatcher::ReaddirRes> readdir(
InodeNumber dir,
off_t offset,
uint32_t count,
ObjectFetchContext& context) override;
folly::Future<struct statfs> statfs(
InodeNumber ino,
ObjectFetchContext& context) override;

View File

@ -1791,10 +1791,8 @@ void TreeInode::TreeRenameLocks::lockDestChild(PathComponentPiece destName) {
}
#ifndef _WIN32
FuseDirList TreeInode::fuseReaddir(
FuseDirList&& list,
off_t off,
ObjectFetchContext& context) {
template <typename Fn>
bool TreeInode::readdirImpl(off_t off, ObjectFetchContext& context, Fn add) {
/*
* Implementing readdir correctly in the presence of concurrent modifications
* to the directory is nontrivial. This function will be called multiple
@ -1851,9 +1849,9 @@ FuseDirList TreeInode::fuseReaddir(
// 2: start after ..
// 2+N: start after inode N
if (off <= 0) {
if (!list.add(".", getNodeId().get(), dtype_t::Dir, 1)) {
return std::move(list);
if (off == 0) {
if (!add(".", DirEntry{dtype_to_mode(dtype_t::Dir), getNodeId()}, 1)) {
return false;
}
}
if (off <= 1) {
@ -1864,8 +1862,8 @@ FuseDirList TreeInode::fuseReaddir(
// For the root of the mount point, just add its own inode ID as its parent.
// FUSE seems to overwrite the parent inode number on the root dir anyway.
auto parentNodeId = parent ? parent->getNodeId() : getNodeId();
if (!list.add("..", parentNodeId.get(), dtype_t::Dir, 2)) {
return std::move(list);
if (!add("..", DirEntry{dtype_to_mode(dtype_t::Dir), parentNodeId}, 2)) {
return false;
}
}
@ -1892,18 +1890,44 @@ FuseDirList TreeInode::fuseReaddir(
auto& [name, entry] = entries.begin()[indices.back().second];
indices.pop_back();
if (!list.add(
name.stringPiece(),
entry.getInodeNumber().get(),
entry.getDtype(),
entry.getInodeNumber().get() + 2)) {
if (!add(name.stringPiece(), entry, entry.getInodeNumber().get() + 2)) {
break;
}
}
return indices.size() == 0;
}
FuseDirList TreeInode::fuseReaddir(
FuseDirList&& list,
off_t off,
ObjectFetchContext& context) {
readdirImpl(
off,
context,
[&list](StringPiece name, const DirEntry& entry, uint64_t offset) {
return list.add(
name, entry.getInodeNumber().get(), entry.getDtype(), offset);
});
return std::move(list);
}
std::tuple<NfsDirList, bool> TreeInode::nfsReaddir(
NfsDirList&& list,
off_t off,
ObjectFetchContext& context) {
updateAtime();
bool isEof = readdirImpl(
off,
context,
[&list](StringPiece name, const DirEntry& entry, uint64_t offset) {
return list.add(name, entry.getInodeNumber(), offset);
});
return {std::move(list), isEof};
}
#else
std::vector<FileMetadata> TreeInode::readdir() {

View File

@ -22,6 +22,7 @@ class CheckoutAction;
class CheckoutContext;
class DiffContext;
class FuseDirList;
class NfsDirList;
class EdenMount;
class GitIgnoreStack;
class DiffCallback;
@ -148,6 +149,16 @@ class TreeInode final : public InodeBaseMetadata<DirContents> {
#ifndef _WIN32
FuseDirList
fuseReaddir(FuseDirList&& list, off_t off, ObjectFetchContext& context);
/**
* Populate the list with as many directory entries as possible starting from
* the inode start.
*
* Return the filled directory list as well as a boolean indicating if the
* listing is complete.
*/
std::tuple<NfsDirList, bool>
nfsReaddir(NfsDirList&& list, off_t off, ObjectFetchContext& context);
#else
/**
* The following readdir() is for responding to Projected FS's directory
@ -496,6 +507,15 @@ class TreeInode final : public InodeBaseMetadata<DirContents> {
return TreeInodePtr::newPtrFromExisting(this);
}
/**
* Helper function to implement both fuseReaddir and nfsReaddir.
*
* Returns a boolean that indicates if readdir finished reading the entire
* directory.
*/
template <typename Fn>
bool readdirImpl(off_t offset, ObjectFetchContext& context, Fn add);
/**
* createImpl() is a helper function for creating new children inodes.
*

View File

@ -80,10 +80,23 @@ target_link_libraries(
PUBLIC
eden_inodes_inodenumber
eden_inode_metadata
eden_nfs_dirlist
eden_store
eden_utils
)
add_library(
eden_nfs_dirlist STATIC
"DirList.cpp" "DirList.h"
)
target_link_libraries(
eden_nfs_dirlist
PUBLIC
eden_nfs_nfsd_rpc
eden_inodes_inodenumber
)
add_subdirectory(portmap)
add_subdirectory(rpc)
add_subdirectory(xdr)

65
eden/fs/nfs/DirList.cpp Normal file
View File

@ -0,0 +1,65 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#ifndef _WIN32
#include "eden/fs/nfs/DirList.h"
namespace facebook::eden {
namespace {
/**
* Overhead of READDIR3resok before adding any entries:
* - A filled post_op_attr
* - The cookieverf,
* - The eof boolean and the end of the list marker.
*/
constexpr size_t kInitialOverhead =
XdrTrait<post_op_attr>::serializedSize(post_op_attr{fattr3{}}) +
XdrTrait<uint64_t>::serializedSize(0) +
2 * XdrTrait<bool>::serializedSize(false);
/**
* NFS is weird, it specifies the maximum amount of entries to be returned by
* passing the total size of the READDIR3resok structure, therefore we need to
* account for all the overhead.
*/
uint32_t computeInitialRemaining(uint32_t count) {
if (kInitialOverhead > count) {
throw std::length_error(
"NFS READDIR overhead is bigger than the passed in size");
}
return count - kInitialOverhead;
}
} // namespace
NfsDirList::NfsDirList(uint32_t count)
: remaining_(computeInitialRemaining(count)) {}
bool NfsDirList::add(
folly::StringPiece name,
InodeNumber ino,
uint64_t offset) {
auto entry = entry3{ino.get(), name.str(), offset};
// The serialized size includes a boolean indicating that this is not the end
// of the list.
auto neededSize = XdrTrait<entry3>::serializedSize(entry) +
XdrTrait<bool>::serializedSize(true);
if (neededSize > remaining_) {
return false;
}
remaining_ -= neededSize;
list_.list.push_back(std::move(entry));
return true;
}
} // namespace facebook::eden
#endif

52
eden/fs/nfs/DirList.h Normal file
View File

@ -0,0 +1,52 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2.
*/
#pragma once
#ifndef _WIN32
#include "eden/fs/inodes/InodeNumber.h"
#include "eden/fs/nfs/NfsdRpc.h"
namespace facebook::eden {
/**
* Abstraction to only add as many directory entries that can fit into a given
* amount of memory.
*/
class NfsDirList {
public:
explicit NfsDirList(uint32_t count);
NfsDirList(NfsDirList&&) = default;
NfsDirList& operator=(NfsDirList&&) = default;
NfsDirList() = delete;
NfsDirList(const NfsDirList&) = delete;
NfsDirList& operator=(const NfsDirList&) = delete;
/**
* Add an entry. Return true if the entry was successfully added, false
* otherwise.
*/
bool add(folly::StringPiece name, InodeNumber ino, uint64_t offset);
/**
* Move the built list out of the NfsDirList.
*/
XdrList<entry3> extractList() {
return std::move(list_);
}
private:
uint32_t remaining_;
XdrList<entry3> list_{};
};
} // namespace facebook::eden
#endif

View File

@ -19,6 +19,7 @@
#include "eden/fs/inodes/InodeMetadata.h"
#include "eden/fs/inodes/InodeNumber.h"
#include "eden/fs/nfs/DirList.h"
#include "eden/fs/store/ObjectFetchContext.h"
#include "eden/fs/utils/PathFuncs.h"
@ -275,6 +276,31 @@ class NfsDispatcher {
PathComponent toName,
ObjectFetchContext& context) = 0;
/**
* Return value of the readdir method.
*/
struct ReaddirRes {
/** List of directory entries */
NfsDirList entries;
/** Has the readdir reached the end of the directory */
bool isEof;
};
/**
* Read the content of the directory referenced by the InodeNumber dir. A
* maximum of count bytes will be added to the returned NfsDirList.
*
* For very large directories, it is possible that more than count bytes are
* necessary to return all the directory entries. In this case, a subsequent
* readdir call will be made by the NFS client to restart the enumeration at
* offset. The first readdir will have an offset of 0.
*/
virtual folly::Future<ReaddirRes> readdir(
InodeNumber dir,
off_t offset,
uint32_t count,
ObjectFetchContext& context) = 0;
virtual folly::Future<struct statfs> statfs(
InodeNumber dir,
ObjectFetchContext& context) = 0;

View File

@ -1001,12 +1001,81 @@ folly::Future<folly::Unit> Nfsd3ServerProcessor::link(
});
}
/**
* Verify that the passed in cookie verifier is valid.
*
* The verifier allows the server to know whether the directory was modified
* across readdir calls, and to restart if this is the case.
*
* TODO(xavierd): For now, this only checks that the verifier is 0, in the
* future, we may want to compare it against a global counter that is
* incremented for each update operations. The assumption being that: "The
* client should be careful to avoid holding directory entry cookies across
* operations that modify the directory contents, such as REMOVE and CREATE.",
* thus we only need to protect against concurrent update and readdir
* operations since there is only one client per mount.
*/
bool isReaddirCookieverfValid(uint64_t verf) {
return verf == 0;
}
/**
* Return the current global cookie.
*
* See the documentation above for the meaning of the cookie verifier.
*/
uint64_t getReaddirCookieverf() {
return 0;
}
folly::Future<folly::Unit> Nfsd3ServerProcessor::readdir(
folly::io::Cursor /*deser*/,
folly::io::Cursor deser,
folly::io::QueueAppender ser,
uint32_t xid) {
serializeReply(ser, accept_stat::PROC_UNAVAIL, xid);
return folly::unit;
serializeReply(ser, accept_stat::SUCCESS, xid);
auto args = XdrTrait<READDIR3args>::deserialize(deser);
static auto context =
ObjectFetchContext::getNullContextWithCauseDetail("readdir");
if (!isReaddirCookieverfValid(args.cookieverf)) {
READDIR3res res{{{nfsstat3::NFS3ERR_BAD_COOKIE, READDIR3resfail{}}}};
XdrTrait<READDIR3res>::serialize(ser, res);
return folly::unit;
}
return dispatcher_->readdir(args.dir.ino, args.cookie, args.count, *context)
.thenTry([this, ino = args.dir.ino, ser = std::move(ser)](
folly::Try<NfsDispatcher::ReaddirRes> try_) mutable {
return dispatcher_->getattr(ino, *context)
.thenTry([ser = std::move(ser), try_ = std::move(try_)](
folly::Try<struct stat> tryStat) mutable {
if (try_.hasException()) {
READDIR3res res{
{{exceptionToNfsError(try_.exception()),
READDIR3resfail{statToPostOpAttr(std::move(tryStat))}}}};
XdrTrait<READDIR3res>::serialize(ser, res);
} else {
auto readdirRes = std::move(try_).value();
READDIR3res res{
{{nfsstat3::NFS3_OK,
READDIR3resok{
/*dir_attributes*/ statToPostOpAttr(
std::move(tryStat)),
/*cookieverf*/ getReaddirCookieverf(),
/*reply*/
dirlist3{
/*entries*/ std::move(readdirRes)
.entries.extractList(),
/*eof*/ readdirRes.isEof,
}}}}};
XdrTrait<READDIR3res>::serialize(ser, res);
}
return folly::unit;
});
});
}
folly::Future<folly::Unit> Nfsd3ServerProcessor::readdirplus(

View File

@ -486,7 +486,7 @@ struct XdrOptionalVariant : public XdrVariant<UnionTypeT, TrueVariantT> {
static constexpr UnionType TestValue = TestValueV;
XdrOptionalVariant() = default;
/* implicit */ XdrOptionalVariant(TrueVariant&& set)
/* implicit */ constexpr XdrOptionalVariant(TrueVariant&& set)
: XdrVariant<UnionType, TrueVariantT>{TestValue, std::move(set)} {}
};