prjfs: properly handle directory hierarchy replaced by a file

Summary:
When walking up the path that has been detected as not being present on disk,
EdenFS would assume that the returned path is a directory, but this assumption
doesn't hold true if the directory has been replaced by a file. Thus, instead
of walking up until finding a file/directory that is present on disk, the code
needs to walk up until a directory is found.

Reviewed By: kmancini

Differential Revision: D37927926

fbshipit-source-id: 5da55f503f4df8158c61435a1afa2cdd3f24d997
This commit is contained in:
Xavier Deguillard 2022-08-02 13:03:34 -07:00 committed by Facebook GitHub Bot
parent 7ba8264a8a
commit 03004f5a41
3 changed files with 163 additions and 63 deletions

View File

@ -348,10 +348,9 @@ ImmediateFuture<OnDiskState> getOnDiskState(
}
}
ImmediateFuture<folly::Unit> handleMaterializedFileNotification(
EdenMount& mount,
ImmediateFuture<folly::Unit> fileNotificationImpl(
const EdenMount& mount,
RelativePath path,
InodeType inodeType,
std::chrono::steady_clock::time_point receivedAt,
ObjectFetchContext& context);
@ -361,26 +360,25 @@ ImmediateFuture<folly::Unit> handleNotPresentFileNotification(
std::chrono::steady_clock::time_point receivedAt,
ObjectFetchContext& context) {
/**
* Allows finding the first directory that is present on disk. This must be
* heap allocated and kept alive until compute returns.
* Allows finding the first directory that is not present on disk. This must
* be heap allocated and kept alive until compute returns.
*/
class GetFirstPresent {
class GetFirstDirectoryNotPresent {
public:
explicit GetFirstPresent(RelativePath path)
explicit GetFirstDirectoryNotPresent(RelativePath path)
: fullPath_{std::move(path)}, currentPrefix_{fullPath_} {}
GetFirstPresent(GetFirstPresent&&) = delete;
GetFirstPresent(const GetFirstPresent&) = delete;
GetFirstDirectoryNotPresent(GetFirstDirectoryNotPresent&&) = delete;
GetFirstDirectoryNotPresent(const GetFirstDirectoryNotPresent&) = delete;
ImmediateFuture<RelativePath> compute(
const EdenMount& mount,
std::chrono::steady_clock::time_point receivedAt) {
auto dirname = currentPrefix_.dirname();
return getOnDiskState(mount, dirname, receivedAt)
return getOnDiskState(mount, currentPrefix_.dirname(), receivedAt)
.thenValue(
[this, &mount, receivedAt](
OnDiskState state) mutable -> ImmediateFuture<RelativePath> {
if (state != OnDiskState::NotPresent) {
if (state == OnDiskState::MaterializedDirectory) {
return currentPrefix_.copy();
}
@ -397,11 +395,12 @@ ImmediateFuture<folly::Unit> handleNotPresentFileNotification(
};
// First, we need to figure out how far down this path has been removed.
auto getFirstPresent = std::make_unique<GetFirstPresent>(std::move(path));
auto fut = getFirstPresent->compute(mount, receivedAt);
auto getFirstNotPresent =
std::make_unique<GetFirstDirectoryNotPresent>(std::move(path));
auto fut = getFirstNotPresent->compute(mount, receivedAt);
return std::move(fut)
.ensure([getFirstPresent = std::move(getFirstPresent)] {})
.thenValue([&mount, &context](RelativePath path) {
.ensure([getFirstNotPresent = std::move(getFirstNotPresent)] {})
.thenValue([&mount, &context, receivedAt](RelativePath path) {
auto basename = path.basename();
auto dirname = path.dirname();
@ -412,6 +411,17 @@ ImmediateFuture<folly::Unit> handleNotPresentFileNotification(
return treeInode->removeRecursively(
basename, InvalidationRequired::No, context);
})
.thenValue([&mount, &context, path = std::move(path), receivedAt](
auto&&) mutable {
// Now that the mismatch has been removed, make sure to also
// trigger a notification on that path. A file might have been
// created. Note that this may trigger a recursion into
// handleNotPresentFileNotification, which will be caught by the
// thenTry below due to the file/directory no longer being
// present in the TreeInode.
return fileNotificationImpl(
mount, std::move(path), receivedAt, context);
})
.thenTry([](folly::Try<folly::Unit> try_) {
if (auto* exc = try_.tryGetExceptionObject<std::system_error>()) {
if (isEnoent(*exc)) {
@ -427,30 +437,8 @@ ImmediateFuture<folly::Unit> handleNotPresentFileNotification(
});
}
ImmediateFuture<folly::Unit> fileNotificationImpl(
EdenMount& mount,
RelativePath path,
std::chrono::steady_clock::time_point receivedAt,
ObjectFetchContext& context) {
return getOnDiskState(mount, path, receivedAt)
.thenValue([&mount, path = std::move(path), receivedAt, &context](
OnDiskState state) mutable {
switch (state) {
case OnDiskState::MaterializedFile:
return handleMaterializedFileNotification(
mount, std::move(path), InodeType::FILE, receivedAt, context);
case OnDiskState::MaterializedDirectory:
return handleMaterializedFileNotification(
mount, std::move(path), InodeType::TREE, receivedAt, context);
case OnDiskState::NotPresent:
return handleNotPresentFileNotification(
mount, std::move(path), receivedAt, context);
}
});
}
ImmediateFuture<folly::Unit> recursivelyUpdateChildrens(
EdenMount& mount,
const EdenMount& mount,
TreeInodePtr tree,
RelativePath path,
std::chrono::steady_clock::time_point receivedAt,
@ -500,7 +488,7 @@ ImmediateFuture<folly::Unit> recursivelyUpdateChildrens(
}
ImmediateFuture<folly::Unit> handleMaterializedFileNotification(
EdenMount& mount,
const EdenMount& mount,
RelativePath path,
InodeType inodeType,
std::chrono::steady_clock::time_point receivedAt,
@ -553,13 +541,13 @@ ImmediateFuture<folly::Unit> handleMaterializedFileNotification(
if (auto inodePtr = inode.asTreePtrOrNull()) {
// In the case where this is already a directory, we
// still need to recursively add all the childrens.
// Consider the case where a directory is renamed and a
// file is added in it after it. If EdenFS handles the
// file creation prior to the renaming the directory
// will be created above in createDirInode, but we also
// need to make sure that all the files in the renamed
// directory are added too, hence the call to
// recursivelyAddAllChildrens.
// Consider the case where a directory is renamed and
// a file is added in it after it. If EdenFS handles
// the file creation prior to the renaming the
// directory will be created above in createDirInode,
// but we also need to make sure that all the files in
// the renamed directory are added too, hence the call
// to recursivelyAddAllChildrens.
return recursivelyUpdateChildrens(
mount,
std::move(inodePtr),
@ -632,6 +620,28 @@ ImmediateFuture<folly::Unit> handleMaterializedFileNotification(
});
}
ImmediateFuture<folly::Unit> fileNotificationImpl(
const EdenMount& mount,
RelativePath path,
std::chrono::steady_clock::time_point receivedAt,
ObjectFetchContext& context) {
return getOnDiskState(mount, path, receivedAt)
.thenValue([&mount, path = std::move(path), receivedAt, &context](
OnDiskState state) mutable {
switch (state) {
case OnDiskState::MaterializedFile:
return handleMaterializedFileNotification(
mount, std::move(path), InodeType::FILE, receivedAt, context);
case OnDiskState::MaterializedDirectory:
return handleMaterializedFileNotification(
mount, std::move(path), InodeType::TREE, receivedAt, context);
case OnDiskState::NotPresent:
return handleNotPresentFileNotification(
mount, std::move(path), receivedAt, context);
}
});
}
ImmediateFuture<folly::Unit> fileNotification(
EdenMount& mount,
RelativePath path,

View File

@ -350,6 +350,10 @@ class EdenTestCase(EdenTestCaseBase):
"""Unlink the file at the specified path relative to the clone."""
os.unlink(self.get_path(path))
def rmdir(self, path: str) -> None:
"""Unlink the directory at the specified path relative to the clone."""
os.rmdir(self.get_path(path))
def select_storage_engine(self) -> str:
"""
Prefer to use memory in the integration tests, but allow

View File

@ -5,9 +5,10 @@
# GNU General Public License version 2.
import os
import stat
from contextlib import contextmanager
from pathlib import Path
from typing import Dict, Generator, Optional, Set
from typing import Dict, Generator, Optional, Set, Tuple
from eden.fs.cli import util
from facebook.eden.constants import DIS_REQUIRE_MATERIALIZED
@ -53,13 +54,16 @@ class PrjFSStress(testcase.EdenRepoTest):
for _ in range(numToUnblock):
util.poll_until(unblock, timeout=30)
def getAllMaterialized(self) -> Set[Path]:
def getAllMaterialized(self, waitTime: int = 5) -> Set[Tuple[Path, int]]:
"""Return all the materialized files/directories minus .hg and .eden"""
res = set()
with self.eden.get_thrift_client_legacy() as client:
inodes = client.debugInodeStatus(
self.mount_path_bytes, b"", DIS_REQUIRE_MATERIALIZED, SyncBehavior(5)
self.mount_path_bytes,
b"",
DIS_REQUIRE_MATERIALIZED,
SyncBehavior(waitTime),
)
for tree_inode in inodes:
@ -68,21 +72,29 @@ class PrjFSStress(testcase.EdenRepoTest):
dirent_path = parent_dir / Path(os.fsdecode(dirent.name))
top_level_parent = dirent_path.parts[0]
if top_level_parent != ".hg" and top_level_parent != ".eden":
res.add(dirent_path)
res.add((dirent_path, dirent.mode))
return res
def assertNotMaterialized(self, path: str) -> None:
materialized = self.getAllMaterialized()
self.assertNotIn(Path(path), materialized, msg=f"{path} is materialized")
def assertNotMaterialized(self, path: str, waitTime: int = 5) -> None:
materialized = self.getAllMaterialized(waitTime)
self.assertNotIn(
Path(path),
{materialized_path for materialized_path, mode in materialized},
msg=f"{path} is materialized",
)
def assertMaterialized(self, path: str) -> None:
materialized = self.getAllMaterialized()
self.assertIn(Path(path), materialized, msg=f"{path} is not materialized")
def assertMaterialized(self, path: str, mode: int, waitTime: int = 5) -> None:
materialized = self.getAllMaterialized(waitTime)
self.assertIn(
(Path(path), mode), materialized, msg=f"{path} is not materialized"
)
def assertAllMaterialized(self, paths: Set[str]) -> None:
materialized = self.getAllMaterialized()
self.assertSetEqual(materialized, {Path(path) for path in paths})
def assertAllMaterialized(
self, paths: Set[Tuple[str, int]], waitTime: int = 5
) -> None:
materialized = self.getAllMaterialized(waitTime)
self.assertSetEqual(materialized, {(Path(path), mode) for path, mode in paths})
@contextmanager
def run_with_fault(
@ -143,7 +155,16 @@ class PrjFSStress(testcase.EdenRepoTest):
self.mkdir("foo")
self.wait_on_fault_unblock(3)
self.assertMaterialized("foo")
self.assertMaterialized("foo", stat.S_IFDIR)
def test_create_directory_to_file(self) -> None:
with self.run_with_fault():
self.mkdir("foo")
self.rmdir("foo")
self.touch("foo")
self.wait_on_fault_unblock(3)
self.assertMaterialized("foo", stat.S_IFREG)
def test_rename_hierarchy(self) -> None:
with self.run_with_fault():
@ -157,7 +178,25 @@ class PrjFSStress(testcase.EdenRepoTest):
2
) # A rename is a total removal and a total creation
self.assertMaterialized("bar")
self.assertMaterialized("bar", stat.S_IFDIR)
self.assertNotMaterialized("foo")
def test_rename_to_file(self) -> None:
with self.run_with_fault():
self.mkdir("foo")
self.touch("foo/bar")
self.touch("foo/baz")
self.wait_on_fault_unblock(3)
self.rename("foo", "bar")
self.rm("bar/bar")
self.rm("bar/baz")
self.rmdir("bar")
self.touch("bar")
self.wait_on_fault_unblock(6)
self.assertMaterialized("bar", stat.S_IFREG)
self.assertNotMaterialized("foo")
def test_rename_and_replace(self) -> None:
@ -174,5 +213,52 @@ class PrjFSStress(testcase.EdenRepoTest):
self.wait_on_fault_unblock(4)
self.assertAllMaterialized(
{"bar", "bar/bar", "bar/baz", "foo", "foo/hello", "hello"}
{
("bar", stat.S_IFDIR),
("bar/bar", stat.S_IFREG),
("bar/baz", stat.S_IFREG),
("foo", stat.S_IFDIR),
("foo/hello", stat.S_IFDIR),
("hello", stat.S_IFREG),
}
)
def test_out_of_order_file_removal(self) -> None:
with self.run_with_fault():
self.mkdir("a/b")
self.touch("a/b/c")
self.wait_on_fault_unblock(3)
self.rm("a/b/c")
# A wait_on_fault_unblock(1) below will just wait for the rm to be
# unblocked, not for it to terminate. This is usually not an issue
# due to Thrift APIs waiting on all IO when a positive SyncBehavior
# is used, but since we'll need to pass a SyncBehavior of 0
# seconds, the only way to guarantee the rm above would have
# completed is by forcing some IO and unblocking these.
self.touch("foo")
self.rm("foo")
self.rmdir("a/b")
self.touch("a/b")
# Unblock rm("a/b/c") touch("foo") and rm("foo")
self.wait_on_fault_unblock(3)
self.assertAllMaterialized(
{
("a/b", stat.S_IFREG),
("a", stat.S_IFDIR),
("hello", stat.S_IFREG),
},
waitTime=0,
)
self.wait_on_fault_unblock(2)
self.assertAllMaterialized(
{
("a/b", stat.S_IFREG),
("a", stat.S_IFDIR),
("hello", stat.S_IFREG),
}
)