inodes: return an eof flag from FileInode::read

Summary:
The NFS protocol needs to know if a read reached the end-of-file to avoid a
subsequent read and thus reduce the chattyness of the protocol.

On top of avoiding RPC calls, this should also halve the amount of data read
from Mercurial due to the BlobCache freeing the in-memory cached blob when the
FS has read the file in its entirety. This meant that the second READ would
always force the blob to be reloaded from the Mercurial store, which would also
force that blob to be kept in memory until being evicted (due to it not being
fully read).

Reviewed By: kmancini

Differential Revision: D30871422

fbshipit-source-id: 8acf4e21ea22b2dfd7f81d2fdd1b137a6e90cc8f
This commit is contained in:
Xavier Deguillard 2021-09-16 10:32:40 -07:00 committed by Facebook GitHub Bot
parent b562bb0344
commit 1ebf805e18
5 changed files with 49 additions and 26 deletions

View File

@ -860,24 +860,31 @@ void FileInode::materialize() {
}
#else
Future<BufVec>
Future<std::tuple<BufVec, bool>>
FileInode::read(size_t size, off_t off, ObjectFetchContext& context) {
XDCHECK_GE(off, 0);
return runWhileDataLoaded<Future<BufVec>>(
return runWhileDataLoaded<Future<std::tuple<BufVec, bool>>>(
LockedState{this},
BlobCache::Interest::WantHandle,
// This function is only called by FUSE.
context,
nullptr,
[size, off, self = inodePtrFromThis()](
LockedState&& state, std::shared_ptr<const Blob> blob) -> BufVec {
LockedState&& state,
std::shared_ptr<const Blob> blob) -> std::tuple<BufVec, bool> {
SCOPE_SUCCESS {
self->updateAtimeLocked(*state);
};
// Materialized either before or during blob load.
if (state->tag == State::MATERIALIZED_IN_OVERLAY) {
return self->getOverlayFileAccess(state)->read(*self, size, off);
// TODO(xavierd): For materialized files, only return EOF when read
// returned no bytes. This will force some FS Channel (like NFS) to
// issue at least 2 read calls: one for reading the entire file, and
// the second one to get the EOF bit.
auto buf = self->getOverlayFileAccess(state)->read(*self, size, off);
auto eof = size != 0 && buf->empty();
return {std::move(buf), eof};
}
// runWhileDataLoaded() ensures that the state is either
@ -899,7 +906,7 @@ FileInode::read(size_t size, off_t off, ObjectFetchContext& context) {
if (!cursor.canAdvance(off)) {
// Seek beyond EOF. Return an empty result.
return BufVec{folly::IOBuf::wrapBuffer("", 0)};
return {BufVec{folly::IOBuf::wrapBuffer("", 0)}, true};
}
cursor.skip(off);
@ -907,7 +914,7 @@ FileInode::read(size_t size, off_t off, ObjectFetchContext& context) {
std::unique_ptr<folly::IOBuf> result;
cursor.cloneAtMost(result, size);
return BufVec{std::move(result)};
return {BufVec{std::move(result)}, cursor.isAtEnd()};
});
}

View File

@ -254,15 +254,17 @@ class FileInode final : public InodeBaseMetadata<FileInodeState> {
/**
* Read up to size bytes from the file at the specified offset.
*
* Returns a BufVec containing the data. This may return fewer bytes than
* Returns a tuple of a BufVec containing the data and a boolean indicating
* if the end-of-file was reached. This may return fewer bytes than
* requested. If the specified offset is at or past the end of the buffer an
* empty IOBuf will be returned. Otherwise between 1 and size bytes will be
* returned. If fewer than size bytes are returned this does *not* guarantee
* that the end of the file was reached.
* that the end of the file was reached, the boolean should be checked for
* this.
*
* May throw exceptions on error.
*/
folly::Future<BufVec>
folly::Future<std::tuple<BufVec, bool>>
read(size_t size, off_t off, ObjectFetchContext& context);
folly::Future<size_t>

View File

@ -249,7 +249,11 @@ ImmediateFuture<BufVec> FuseDispatcherImpl::read(
ObjectFetchContext& context) {
return inodeMap_->lookupFileInode(ino).thenValue(
[&context, size, off](FileInodePtr&& inode) {
return inode->read(size, off, context).semi();
return inode->read(size, off, context)
.thenValue([](std::tuple<BufVec, bool>&& readRes) {
return std::get<BufVec>(std::move(readRes));
})
.semi();
});
}

View File

@ -90,15 +90,11 @@ ImmediateFuture<NfsDispatcher::ReadRes> NfsDispatcherImpl::read(
return inodeMap_->lookupFileInode(ino).thenValue(
[&context, size, offset](const FileInodePtr& inode) {
return inode->read(size, offset, context)
.thenValue([size](std::unique_ptr<folly::IOBuf>&& data) {
// TODO(xavierd): Detect an empty file when a empty read is
// performed. This forces the client to issue 2 reads: one to
// read the file, and the second to validate it is at the end of
// the file. If we could detect an EOF without the second read we
// can half the number of READ RPC.
auto isEof = size != 0 && data->empty();
return ReadRes{std::move(data), isEof};
})
.thenValue(
[](std::tuple<std::unique_ptr<folly::IOBuf>, bool>&& res) {
auto [data, isEof] = std::move(res);
return ReadRes{std::move(data), isEof};
})
.semi();
});
}

View File

@ -451,7 +451,12 @@ TEST(FileInode, readDuringLoad) {
// Load the inode and start reading the contents
auto inode = mount_.getFileInode("notready.txt");
auto dataFuture = inode->read(4096, 0, ObjectFetchContext::getNullContext());
auto dataFuture = inode->read(4096, 0, ObjectFetchContext::getNullContext())
.thenValue([](std::tuple<BufVec, bool> readRes) {
auto [data, isEof] = std::move(readRes);
EXPECT_EQ(true, isEof);
return data->moveToFbString();
});
EXPECT_FALSE(dataFuture.isReady());
@ -460,7 +465,7 @@ TEST(FileInode, readDuringLoad) {
// The read() operation should have completed now.
ASSERT_TRUE(dataFuture.isReady());
EXPECT_EQ(contents, std::move(dataFuture).get()->moveToFbString());
EXPECT_EQ(contents, std::move(dataFuture).get());
}
TEST(FileInode, writeDuringLoad) {
@ -499,7 +504,12 @@ TEST(FileInode, truncateDuringLoad) {
auto inode = mount_.getFileInode("notready.txt");
// Start reading the contents
auto dataFuture = inode->read(4096, 0, ObjectFetchContext::getNullContext());
auto dataFuture = inode->read(4096, 0, ObjectFetchContext::getNullContext())
.thenValue([](std::tuple<BufVec, bool> readRes) {
auto [data, isEof] = std::move(readRes);
EXPECT_EQ(true, isEof);
return data->moveToFbString();
});
EXPECT_FALSE(dataFuture.isReady());
// Truncate the file while the initial read is in progress. This should
@ -511,15 +521,19 @@ TEST(FileInode, truncateDuringLoad) {
// The read should complete now too.
ASSERT_TRUE(dataFuture.isReady());
EXPECT_EQ("", std::move(dataFuture).get()->moveToFbString());
EXPECT_EQ("", std::move(dataFuture).get());
// For good measure, test reading and writing some more.
inode->write("foobar\n"_sp, 5, ObjectFetchContext::getNullContext()).get(0ms);
dataFuture = inode->read(4096, 0, ObjectFetchContext::getNullContext());
dataFuture = inode->read(4096, 0, ObjectFetchContext::getNullContext())
.thenValue([](std::tuple<BufVec, bool> readRes) {
auto [data, isEof] = std::move(readRes);
EXPECT_EQ(false, isEof);
return data->moveToFbString();
});
ASSERT_TRUE(dataFuture.isReady());
EXPECT_EQ(
"\0\0\0\0\0foobar\n"_sp, std::move(dataFuture).get()->moveToFbString());
EXPECT_EQ("\0\0\0\0\0foobar\n"_sp, std::move(dataFuture).get());
EXPECT_FILE_INODE(inode, "\0\0\0\0\0foobar\n"_sp, 0644);
}