Allow rm of files with corrupt overlay

Summary:
Sometimes, Eden's overlay (in `$client_dir/local/`) gets corrupt. In
particular, sometimes overlay files can be truncated or missing after a hard
reboot where the underlying filesystem state was not flushed to disk.

For such files, open(), stat(), unlink(), etc. from Eden report ENOENT, yet
readdir() on the containing directory shows that the file does exist.

In other words, the problematic file is undeletable:

```
$ ls -la dir/
/bin/ls: cannot access dir/corrupt_file: No such file or directory
total 0
drwxr-xr-x.  3 strager 0 Jul 10 21:41 .
drwxr-xr-x. 48 strager 0 Jul 10 21:41 ..
-??????????  ? ?       ?            ? corrupt_file
$ rm dir/corrupt_file
rm: cannot remove ‘dir/corrupt_file’: No such file or directory
```

Allow users to delete these problematic files (if the file was a regular file
and not a directory) by doing the following:

* Allow corrupt regular files to be unlink()d successfully.
* Allow corrupt regular files to be stat()d.

Making stat() succeed is a requirement by FUSE:

* For unlink(), FUSE performs FUSE_LOOKUP before FUSE_UNLINK. If FUSE_LOOKUP
  fails, unlink() fails. Therefore, we must make FUSE_LOOKUP succeed for
  corrupt files.
* For stat(), FUSE performs FUSE_LOOKUP and sometimes FUSE_GETATTR. Since we
  must make FUSE_LOOKUP succeed (for unlink()), it's natural to make
  FUSE_GETATTR succeed too.

A future diff will fix corrupted directories.

Reviewed By: chadaustin

Differential Revision: D8884793

fbshipit-source-id: 1100037bf52475fcca66f39946b917ce604f12dc
This commit is contained in:
Adam Simpkins 2018-07-25 20:52:58 -07:00 committed by Facebook Github Bot
parent 4620a3af43
commit ea2a6034d4
7 changed files with 270 additions and 33 deletions

View File

@ -14,6 +14,7 @@
#include <folly/executors/GlobalExecutor.h>
#include <folly/logging/xlog.h>
#include <gflags/gflags.h>
#include <cstring>
#include <shared_mutex>
#include "eden/fs/fuse/DirHandle.h"
@ -23,6 +24,7 @@
#include "eden/fs/inodes/EdenMount.h"
#include "eden/fs/inodes/FileInode.h"
#include "eden/fs/inodes/InodeMap.h"
#include "eden/fs/inodes/Overlay.h"
#include "eden/fs/inodes/TreeInode.h"
#include "eden/fs/utils/SystemError.h"
@ -63,6 +65,13 @@ fuse_entry_out computeEntryParam(
entry.entry_valid_nsec = fuse_attr.attr_valid_nsec;
return entry;
}
Dispatcher::Attr attrForInodeWithCorruptOverlay() noexcept {
struct stat st;
std::memset(&st, 0, sizeof(st));
st.st_mode = S_IFREG;
return Dispatcher::Attr{st};
}
} // namespace
folly::Future<Dispatcher::Attr> EdenDispatcher::getattr(InodeNumber ino) {
@ -89,12 +98,36 @@ folly::Future<fuse_entry_out> EdenDispatcher::lookup(
return tree->getOrLoadChild(name);
})
.then([](const InodePtr& inode) {
return inode->getattr().then([inode](Dispatcher::Attr attr) {
inode->incFuseRefcount();
// Preserve inode's life for the duration of the prefetch.
inode->prefetch().ensure([inode] {});
return computeEntryParam(inode->getNodeId(), attr);
});
return folly::makeFutureWith([&]() { return inode->getattr(); })
.then([inode](folly::Try<Dispatcher::Attr> maybeAttr) {
if (maybeAttr.hasValue()) {
// Preserve inode's life for the duration of the prefetch.
inode->prefetch().ensure([inode] {});
inode->incFuseRefcount();
return computeEntryParam(inode->getNodeId(), maybeAttr.value());
} else {
// The most common case for getattr() failing is if this file is
// materialized but the data for it in the overlay is missing
// or corrupt. This can happen after a hard reboot where the
// overlay data was not synced to disk first.
//
// We intentionally want to return a result here rather than
// failing; otherwise we can't return the inode number to the
// kernel at all. This blocks other operations on the file,
// like FUSE_UNLINK. By successfully returning from the
// lookup we allow clients to remove this corrupt file with an
// unlink operation. (Even though FUSE_UNLINK does not require
// the child inode number, the kernel does not appear to send a
// FUSE_UNLINK request to us if it could not get the child inode
// number first.)
XLOG(WARN) << "error getting attributes for inode "
<< inode->getNodeId() << " (" << inode->getLogPath()
<< "): " << maybeAttr.exception().what();
inode->incFuseRefcount();
return computeEntryParam(
inode->getNodeId(), attrForInodeWithCorruptOverlay());
}
});
})
.onError([](const std::system_error& err) {
// Translate ENOENT into a successful response with an

View File

@ -0,0 +1,101 @@
#!/usr/bin/env python3
#
# Copyright (c) 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
import abc
import contextlib
import os
import pathlib
import subprocess
import typing
import eden.integration.lib.overlay as overlay_mod
from eden.integration.lib import testcase
class CorruptOverlayTestBase(
testcase.HgRepoTestMixin, testcase.EdenRepoTest, metaclass=abc.ABCMeta
):
"""Test file operations when Eden's overlay is corrupted.
Tests in this class apply to all types of files (regular files, directories,
etc.).
"""
def test_unmount_succeeds(self) -> None:
with self.assert_does_not_raise():
self.eden.unmount(self.mount_path)
@property
@abc.abstractmethod
def corrupted_path(self) -> pathlib.Path:
raise NotImplementedError()
@abc.abstractmethod
def corrupt_overlay_file(self, path: pathlib.Path) -> None:
raise NotImplementedError()
@contextlib.contextmanager
def assert_does_not_raise(self) -> typing.Iterator[None]:
yield
class CorruptOverlayRegularFileTestBase(CorruptOverlayTestBase):
"""Test a regular file whose overlay was corrupted."""
def setUp(self) -> None:
super().setUp()
self.overlay = overlay_mod.OverlayStore(self.eden, self.mount_path)
self.overlay.corrupt_file(self.corrupted_path, self.corrupt_overlay_file)
def populate_repo(self) -> None:
self.repo.write_file("committed_file", "committed_file content")
self.repo.commit("Initial commit.")
def test_unlink_deletes_corrupted_file(self) -> None:
path = self.mount_path / self.corrupted_path
path.unlink()
self.assertFalse(path.exists(), f"{path} should not exist after being deleted")
def test_rm_program_with_force_deletes_corrupted_file(self) -> None:
path = self.mount_path / self.corrupted_path
subprocess.check_call(["rm", "-f", "--", path])
self.assertFalse(path.exists(), f"{path} should not exist after being deleted")
class DeleteTrackedFile(CorruptOverlayRegularFileTestBase):
@property
def corrupted_path(self) -> pathlib.Path:
return pathlib.Path("committed_file")
def corrupt_overlay_file(self, path: pathlib.Path) -> None:
path.unlink()
class DeleteUntrackedFile(CorruptOverlayRegularFileTestBase):
@property
def corrupted_path(self) -> pathlib.Path:
return pathlib.Path("new_file")
def corrupt_overlay_file(self, path: pathlib.Path) -> None:
path.unlink()
class TruncateTrackedFile(CorruptOverlayRegularFileTestBase):
@property
def corrupted_path(self) -> pathlib.Path:
return pathlib.Path("committed_file")
def corrupt_overlay_file(self, path: pathlib.Path) -> None:
os.truncate(str(path), 0)
del CorruptOverlayTestBase
del CorruptOverlayRegularFileTestBase

View File

@ -0,0 +1,27 @@
#!/usr/bin/env python3
#
# Copyright (c) 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
import pathlib
from .lib import testcase
@testcase.eden_repo_test
class EdenClientTest(testcase.EdenRepoTest):
def populate_repo(self) -> None:
self.repo.write_file("hello", "hola\n")
self.repo.commit("Initial commit.")
def test_client_dir_for_mount(self) -> None:
clone_path = pathlib.Path(self.tmp_dir, "test_checkout")
self.eden.run_cmd("clone", self.repo_name, clone_path)
self.assertEqual(
self.eden.client_dir_for_mount(clone_path),
pathlib.Path(self.eden_dir, "clients", "test_checkout"),
)

View File

@ -9,7 +9,7 @@
import os
from .lib import repobase, testcase
from .lib import overlay, repobase, testcase
class FsckTest(testcase.EdenRepoTest):
@ -31,35 +31,18 @@ class FsckTest(testcase.EdenRepoTest):
def setup_eden_test(self) -> None:
super().setup_eden_test()
self.client_dir = os.readlink(os.path.join(self.mount, ".eden", "client"))
self.overlay_dir = os.path.join(self.client_dir, "local")
def _update_file(self, path: str, contents: str) -> int:
"""
Update a file by path and return its inode number.
Updating the file contents ensures it will be materialized and present in the
overlay.
"""
with open(os.path.join(self.mount, path), "w") as f:
f.write(contents)
stat_info = os.fstat(f.fileno())
return stat_info.st_ino
def _get_overlay_path(self, inode_number: int) -> str:
subdir = "{:02x}".format(inode_number % 256)
return os.path.join(self.overlay_dir, subdir, str(inode_number))
self.overlay = overlay.OverlayStore(self.eden, self.mount_path)
def test_fsck_no_issues(self) -> None:
output = self.eden.run_cmd("fsck", self.mount)
self.assertIn("No issues found", output)
def test_fsck_empty_overlay_file(self) -> None:
inode_number = self._update_file("doc/foo.txt", "new contents\n")
overlay_path = self.overlay.materialize_file("doc/foo.txt")
self.eden.run_cmd("unmount", self.mount)
# Truncate the file to 0 length
with open(self._get_overlay_path(inode_number), "w"):
with overlay_path.open("wb"):
pass
self.eden.run_cmd("mount", self.mount)

View File

@ -9,6 +9,7 @@
import logging
import os
import pathlib
import shlex
import shutil
import subprocess
@ -363,6 +364,19 @@ class EdenFS(object):
with self.get_thrift_client() as client:
client.debugSetLogLevel(category, level)
def client_dir_for_mount(self, mount_path: pathlib.Path) -> pathlib.Path:
client_link = mount_path / ".eden" / "client"
return pathlib.Path(os.readlink(str(client_link)))
def overlay_dir_for_mount(self, mount_path: pathlib.Path) -> pathlib.Path:
return self.client_dir_for_mount(mount_path) / "local"
def mount(self, mount_path: pathlib.Path) -> None:
self.run_cmd("mount", "--", str(mount_path))
def unmount(self, mount_path: pathlib.Path) -> None:
self.run_cmd("unmount", "--", str(mount_path))
class EdenCommandError(subprocess.CalledProcessError):
def __init__(self, ex: subprocess.CalledProcessError) -> None:

View File

@ -0,0 +1,66 @@
#!/usr/bin/env python3
#
# Copyright (c) 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
import os
import pathlib
import stat
import tempfile
import typing
import eden.integration.lib.edenclient as edenclient
class OverlayStore:
def __init__(self, eden: edenclient.EdenFS, mount: pathlib.Path) -> None:
self.eden = eden
self.mount = mount
self.overlay_dir = eden.overlay_dir_for_mount(mount)
def materialize_file(self, path: pathlib.Path) -> pathlib.Path:
"""Force the file inode at the specified path to be materialized and recorded in
the overlay. Returns the path to the overlay file that stores the data for this
inode in the overlay.
"""
path_in_mount = self.mount / path
# Opening the file in write mode will materialize it
with path_in_mount.open("w+b") as f:
s = os.fstat(f.fileno())
return self._get_overlay_path(s.st_ino)
def materialize_dir(self, path: pathlib.Path) -> pathlib.Path:
"""Force the directory inode at the specified path to be materialized and
recorded in the overlay. Returns the path to the overlay file that stores the
data for this inode in the overlay.
"""
s = os.lstat(path)
assert stat.S_ISDIR(s.st_mode)
# Creating and then removing a file inside the directory will materialize it
with tempfile.NamedTemporaryFile(dir=str(path)):
pass
return self._get_overlay_path(s.st_ino)
def _get_overlay_path(self, inode_number: int) -> pathlib.Path:
subdir = "{:02x}".format(inode_number % 256)
return self.overlay_dir / subdir / str(inode_number)
def corrupt_file(
self,
path: pathlib.Path,
corrupt_function: typing.Callable[[pathlib.Path], None],
) -> None:
"""Given a relative path to a regular file in the checkout, ensure that the file
is materialized, unmount the checkout, corrupt the file in the overlay by
calling the specified corrupt_functoin and then remount the checkout.
"""
overlay_file_path = self.materialize_file(path)
self.eden.unmount(self.mount)
corrupt_function(overlay_file_path)
self.eden.mount(self.mount)

View File

@ -13,6 +13,7 @@ import errno
import inspect
import logging
import os
import pathlib
import shutil
import tempfile
import time
@ -232,6 +233,10 @@ class EdenTestCase(TestParent):
self.mount = os.path.join(self.mounts_dir, "main")
@property
def mount_path(self) -> pathlib.Path:
return pathlib.Path(self.mount)
def get_thrift_client(self) -> eden.thrift.EdenClient:
"""
Get a thrift client to the edenfs daemon.
@ -441,13 +446,11 @@ def test_replicator(
def _replicate_eden_repo_test(
test_class: Type[EdenRepoTest]
) -> Iterable[Tuple[str, Type[EdenRepoTest]]]:
class HgRepoTest(test_class):
def create_repo(self, name: str) -> repobase.Repository:
return self.create_hg_repo(name)
class HgRepoTest(HgRepoTestMixin, test_class):
pass
class GitRepoTest(test_class):
def create_repo(self, name: str) -> repobase.Repository:
return self.create_git_repo(name)
class GitRepoTest(GitRepoTestMixin, test_class):
pass
return [("Hg", HgRepoTest), ("Git", GitRepoTest)]
@ -459,3 +462,13 @@ def _replicate_eden_repo_test(
# classes named "MyTestHg" and "MyTestGit", which run the tests with
# mercurial and git repositories, respectively.
eden_repo_test = test_replicator(_replicate_eden_repo_test)
class HgRepoTestMixin:
def create_repo(self, name: str) -> repobase.Repository:
return self.create_hg_repo(name)
class GitRepoTestMixin:
def create_repo(self, name: str) -> repobase.Repository:
return self.create_git_repo(name)