mirror of
https://github.com/facebook/sapling.git
synced 2024-10-09 00:14:35 +03:00
fix eden doctor
to avoid interfering with other edenfs instances
Summary: Only treat edenfs mount points as stale if attempting to access them fails with ENOTCONN. If we can successfully access the mount point then it cannot be stale and does not need to be unmounted. This prevents `eden doctor` from incorrectly unmounting mount points owned by another edenfs process running on this system. Reviewed By: bolinfest Differential Revision: D7683503 fbshipit-source-id: 412cc17442fac1e8030dd40de0e0f642c15d1253
This commit is contained in:
parent
2812d73d2c
commit
c21571f23a
@ -93,8 +93,8 @@ def cure_what_ails_you(
|
||||
if mount_path not in config.get_mount_paths():
|
||||
# TODO: if there are mounts in active_mount_points that aren't in
|
||||
# config.get_mount_paths(), should we try to add them to the config?
|
||||
# I've only seen this happen in the wild if a clone fails partway, for
|
||||
# example, if a post-clone hook fails.
|
||||
# I've only seen this happen in the wild if a clone fails partway,
|
||||
# for example, if a post-clone hook fails.
|
||||
continue
|
||||
|
||||
# For now, we assume that each mount_path is actively mounted. We should
|
||||
@ -178,10 +178,9 @@ class StaleMountsCheck(Check):
|
||||
self._mount_table = mount_table
|
||||
|
||||
def do_check(self, dry_run: bool) -> CheckResult:
|
||||
active_mount_devices: List[int] = []
|
||||
for amp in self._active_mount_points:
|
||||
try:
|
||||
active_mount_devices.append(self._mount_table.lstat(amp).st_dev)
|
||||
self._mount_table.lstat(amp).st_dev
|
||||
except OSError as e:
|
||||
# If dry_run, should this return NOT_FIXED_BECAUSE_DRY_RUN?
|
||||
return CheckResult(
|
||||
@ -189,14 +188,13 @@ class StaleMountsCheck(Check):
|
||||
f'Failed to lstat active eden mount {amp}\n'
|
||||
)
|
||||
|
||||
stale_mounts = self.get_all_stale_eden_mount_points(
|
||||
active_mount_devices
|
||||
)
|
||||
stale_mounts = self.get_all_stale_eden_mount_points()
|
||||
if not stale_mounts:
|
||||
return CheckResult(CheckResultType.NO_ISSUE, '')
|
||||
|
||||
if dry_run:
|
||||
message = f'Found {len(stale_mounts)} stale edenfs mount point{"s" if len(stale_mounts) != 1 else ""}:\n'
|
||||
message = (f'Found {len(stale_mounts)} stale edenfs mount '
|
||||
f'point{"s" if len(stale_mounts) != 1 else ""}:\n')
|
||||
for mp in stale_mounts:
|
||||
message += f' {printable_bytes(mp)}\n'
|
||||
message += 'Not unmounting because dry run.\n'
|
||||
@ -216,7 +214,7 @@ class StaleMountsCheck(Check):
|
||||
|
||||
# Use a refreshed list -- it's possible MNT_DETACH succeeded on some of
|
||||
# the points.
|
||||
for mp in self.get_all_stale_eden_mount_points(active_mount_devices):
|
||||
for mp in self.get_all_stale_eden_mount_points():
|
||||
if self._mount_table.unmount_force(mp):
|
||||
unmounted.append(mp)
|
||||
else:
|
||||
@ -225,27 +223,33 @@ class StaleMountsCheck(Check):
|
||||
if failed_to_unmount:
|
||||
message = ''
|
||||
if len(unmounted):
|
||||
message += f'Successfully unmounted {len(unmounted)} mount point{"s" if len(unmounted) != 1 else ""}:\n'
|
||||
message += (f'Successfully unmounted {len(unmounted)} mount '
|
||||
f'point{"s" if len(unmounted) != 1 else ""}:\n')
|
||||
for mp in sorted(unmounted):
|
||||
message += f' {printable_bytes(mp)}\n'
|
||||
message += f'Failed to unmount {len(failed_to_unmount)} mount point{"s" if len(failed_to_unmount) != 1 else ""}:\n'
|
||||
message += (f'Failed to unmount {len(failed_to_unmount)} mount '
|
||||
f'point{"s" if len(failed_to_unmount) != 1 else ""}:\n')
|
||||
for mp in sorted(failed_to_unmount):
|
||||
message += f' {printable_bytes(mp)}\n'
|
||||
return CheckResult(CheckResultType.FAILED_TO_FIX, message)
|
||||
else:
|
||||
message = f'Unmounted {len(stale_mounts)} stale edenfs mount point{"s" if len(stale_mounts) != 1 else ""}:\n'
|
||||
message = (f'Unmounted {len(stale_mounts)} stale edenfs mount '
|
||||
f'point{"s" if len(stale_mounts) != 1 else ""}:\n')
|
||||
for mp in sorted(unmounted):
|
||||
message += f' {printable_bytes(mp)}\n'
|
||||
return CheckResult(CheckResultType.FIXED, message)
|
||||
|
||||
def get_all_stale_eden_mount_points(self, active_mount_devices: List[int]
|
||||
) -> List[bytes]:
|
||||
me = os.getuid()
|
||||
|
||||
def get_all_stale_eden_mount_points(self) -> List[bytes]:
|
||||
stale_eden_mount_points: Set[bytes] = set()
|
||||
for mount_point in self.get_all_eden_mount_points():
|
||||
try:
|
||||
st = self._mount_table.lstat(mount_point)
|
||||
# All eden mounts should have a .eden directory.
|
||||
# If the edenfs daemon serving this mount point has died we
|
||||
# will get ENOTCONN when trying to access it. (Simply calling
|
||||
# lstat() on the root directory itself can succeed even in this
|
||||
# case.)
|
||||
eden_dir = os.path.join(mount_point, b'.eden')
|
||||
self._mount_table.lstat(eden_dir)
|
||||
except OSError as e:
|
||||
if e.errno == errno.ENOTCONN:
|
||||
stale_eden_mount_points.add(mount_point)
|
||||
@ -254,13 +258,6 @@ class StaleMountsCheck(Check):
|
||||
f"Unclear whether {printable_bytes(mount_point)} "
|
||||
f"is stale or not. lstat() failed: {e}"
|
||||
)
|
||||
else:
|
||||
# Exclude any mounts that aren't owned by the current user and whose
|
||||
# device ID matches the device ID of any existing mounts. Avoid
|
||||
# excluding by path because the same FUSE mount can show up in the
|
||||
# mount table multiple times if it's underneath a bind mount.
|
||||
if st.st_uid == me and st.st_dev not in active_mount_devices:
|
||||
stale_eden_mount_points.add(mount_point)
|
||||
|
||||
return sorted(stale_eden_mount_points)
|
||||
|
||||
|
@ -615,26 +615,19 @@ class StaleMountsCheckTest(unittest.TestCase):
|
||||
self.assertEqual([], self.mount_table.unmount_lazy_calls)
|
||||
self.assertEqual([], self.mount_table.unmount_force_calls)
|
||||
|
||||
def test_stale_nonactive_mount_is_unmounted(self):
|
||||
def test_working_nonactive_mount_is_not_unmounted(self):
|
||||
# Add a working edenfs mount that is not part of our active list
|
||||
self.mount_table.add_mount('/mnt/stale1')
|
||||
self.mount_table.add_mount('/mnt/other1')
|
||||
|
||||
result = self.check.do_check(dry_run=False)
|
||||
self.assertEqual(doctor.CheckResultType.FIXED, result.result_type)
|
||||
self.assertEqual(
|
||||
dedent(
|
||||
'''\
|
||||
Unmounted 1 stale edenfs mount point:
|
||||
/mnt/stale1
|
||||
'''
|
||||
), result.message
|
||||
)
|
||||
self.assertEqual([b'/mnt/stale1'], self.mount_table.unmount_lazy_calls)
|
||||
self.assertEqual('', result.message)
|
||||
self.assertEqual(doctor.CheckResultType.NO_ISSUE, result.result_type)
|
||||
self.assertEqual([], self.mount_table.unmount_lazy_calls)
|
||||
self.assertEqual([], self.mount_table.unmount_force_calls)
|
||||
|
||||
def test_force_unmounts_if_lazy_fails(self):
|
||||
self.mount_table.add_mount('/mnt/stale1')
|
||||
self.mount_table.add_mount('/mnt/stale2')
|
||||
self.mount_table.add_stale_mount('/mnt/stale1')
|
||||
self.mount_table.add_stale_mount('/mnt/stale2')
|
||||
self.mount_table.fail_unmount_lazy(b'/mnt/stale1')
|
||||
|
||||
result = self.check.do_check(dry_run=False)
|
||||
@ -655,8 +648,8 @@ class StaleMountsCheckTest(unittest.TestCase):
|
||||
self.assertEqual([b'/mnt/stale1'], self.mount_table.unmount_force_calls)
|
||||
|
||||
def test_dry_run_prints_stale_mounts_and_does_not_unmount(self):
|
||||
self.mount_table.add_mount('/mnt/stale1')
|
||||
self.mount_table.add_mount('/mnt/stale2')
|
||||
self.mount_table.add_stale_mount('/mnt/stale1')
|
||||
self.mount_table.add_stale_mount('/mnt/stale2')
|
||||
|
||||
result = self.check.do_check(dry_run=True)
|
||||
self.assertEqual(
|
||||
@ -676,8 +669,8 @@ class StaleMountsCheckTest(unittest.TestCase):
|
||||
self.assertEqual([], self.mount_table.unmount_force_calls)
|
||||
|
||||
def test_fails_if_unmount_fails(self):
|
||||
self.mount_table.add_mount('/mnt/stale1')
|
||||
self.mount_table.add_mount('/mnt/stale2')
|
||||
self.mount_table.add_stale_mount('/mnt/stale1')
|
||||
self.mount_table.add_stale_mount('/mnt/stale2')
|
||||
self.mount_table.fail_unmount_lazy(b'/mnt/stale1', b'/mnt/stale2')
|
||||
self.mount_table.fail_unmount_force(b'/mnt/stale1')
|
||||
|
||||
@ -713,7 +706,9 @@ class StaleMountsCheckTest(unittest.TestCase):
|
||||
self.assertEqual([], self.mount_table.unmount_force_calls)
|
||||
|
||||
def test_gives_up_if_cannot_stat_active_mount(self):
|
||||
del self.mount_table.stats['/mnt/active1']
|
||||
self.mount_table.fail_access('/mnt/active1', errno.ENOENT)
|
||||
self.mount_table.fail_access('/mnt/active1/.eden', errno.ENOENT)
|
||||
|
||||
result = self.check.do_check(dry_run=False)
|
||||
self.assertEqual(
|
||||
doctor.CheckResultType.FAILED_TO_FIX, result.result_type
|
||||
@ -727,6 +722,7 @@ class StaleMountsCheckTest(unittest.TestCase):
|
||||
def test_does_not_unmount_if_cannot_stat_stale_mount(self, warning):
|
||||
self.mount_table.add_mount('/mnt/stale1')
|
||||
self.mount_table.fail_access('/mnt/stale1', errno.EACCES)
|
||||
self.mount_table.fail_access('/mnt/stale1/.eden', errno.EACCES)
|
||||
|
||||
result = self.check.do_check(dry_run=False)
|
||||
self.assertEqual(doctor.CheckResultType.NO_ISSUE, result.result_type)
|
||||
@ -740,8 +736,7 @@ class StaleMountsCheckTest(unittest.TestCase):
|
||||
)
|
||||
|
||||
def test_does_unmount_if_stale_mount_is_unconnected(self):
|
||||
self.mount_table.add_mount('/mnt/stale1')
|
||||
self.mount_table.fail_access('/mnt/stale1', errno.ENOTCONN)
|
||||
self.mount_table.add_stale_mount('/mnt/stale1')
|
||||
|
||||
result = self.check.do_check(dry_run=False)
|
||||
self.assertEqual(doctor.CheckResultType.FIXED, result.result_type)
|
||||
@ -883,6 +878,23 @@ class FakeMountTable(mtab.MountTable):
|
||||
|
||||
self._add_mount_info(path, device=device, vfstype=vfstype)
|
||||
self.stats[path] = mtab.MTStat(st_uid=uid, st_dev=dev)
|
||||
if device == 'edenfs':
|
||||
self.stats[os.path.join(path, '.eden')] = mtab.MTStat(
|
||||
st_uid=uid, st_dev=dev
|
||||
)
|
||||
|
||||
def add_stale_mount(
|
||||
self,
|
||||
path: str,
|
||||
uid: Optional[int] = None,
|
||||
dev: Optional[int] = None,
|
||||
) -> None:
|
||||
# Stale mounts are always edenfs FUSE mounts
|
||||
self.add_mount(path, uid=uid, dev=dev)
|
||||
# Stale mounts still successfully respond to stat() calls for the root
|
||||
# directory itself, but fail stat() calls to any other path with
|
||||
# ENOTCONN
|
||||
self.fail_access(os.path.join(path, '.eden'), errno.ENOTCONN)
|
||||
|
||||
def fail_access(self, path: str, errnum: int) -> None:
|
||||
self.stats[path] = OSError(errnum, os.strerror(errnum))
|
||||
|
Loading…
Reference in New Issue
Block a user