unmount stale edenfs mounts in eden doctor

Summary:
If the Eden process dies or malfunctions, it's possible to end up with stale
edenfs mounts on the system.  Change `eden doctor` to correct them.

Reviewed By: simpkins

Differential Revision: D6659193

fbshipit-source-id: d9fcf2e68663295e4f43b2c11fd4503a1dfac038
This commit is contained in:
Chad Austin 2018-01-19 10:46:34 -08:00 committed by Facebook Github Bot
parent c2eb5f3f3f
commit b6b2a08998
9 changed files with 375 additions and 26 deletions

View File

@ -19,6 +19,7 @@ python_library(
"config.py",
"configinterpolator.py",
"doctor.py",
"mtab.py",
"rage.py",
"stats.py",
"stats_print.py",

View File

@ -7,6 +7,7 @@
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
import abc
import binascii
import json
import os
@ -18,6 +19,7 @@ from textwrap import dedent
from typing import Dict, List, Set, TextIO, Union
from . import config as config_mod
from . import version
from . import mtab
class CheckResultType(Enum):
@ -36,19 +38,18 @@ class CheckResult:
self.message = message
class Check:
class Check(abc.ABC):
@abc.abstractmethod
def do_check(self, dry_run: bool) -> CheckResult:
pass
def cure_what_ails_you(
config: config_mod.Config, dry_run: bool, out: TextIO
config: config_mod.Config,
dry_run: bool,
out: TextIO,
mount_table: mtab.MountTable,
) -> int:
mount_paths = config.get_mount_paths()
if len(mount_paths) == 0:
out.write('No mounts points to assess.\n')
return 1
is_healthy = config.check_health().is_healthy()
if not is_healthy:
out.write(
@ -62,9 +63,16 @@ def cure_what_ails_you(
'''
)
)
active_mount_points: List[str] = []
else:
with config.get_thrift_client() as client:
active_mount_points = [
mount.mountPoint for mount in client.listMounts()]
# This list is a mix of messages to print to stdout and checks to perform.
checks_and_messages: List[Union[str, Check]] = []
checks_and_messages: List[Union[str, Check]] = [
StaleMountsCheck(active_mount_points, mount_table),
]
if is_healthy:
checks_and_messages.append(EdenfsIsLatest(config))
else:
@ -74,7 +82,7 @@ def cure_what_ails_you(
)
watchman_roots = _get_watch_roots_for_watchman()
for mount_path in mount_paths:
for mount_path in active_mount_points:
# For now, we assume that each mount_path is actively mounted. We should
# update the listMounts() Thrift API to return information that notes
# whether a mount point is active and use it here.
@ -143,6 +151,76 @@ def cure_what_ails_you(
return 0
def printable_bytes(b: bytes) -> str:
return b.decode('utf-8', 'backslashreplace')
class StaleMountsCheck(Check):
def __init__(self, active_mount_points: List[str],
mount_table: mtab.MountTable) -> None:
self._active_mount_points = active_mount_points
self._mount_table = mount_table
def do_check(self, dry_run: bool) -> CheckResult:
stale_mounts = self.get_all_stale_eden_mount_points()
if not stale_mounts:
return CheckResult(CheckResultType.NO_ISSUE, '')
if dry_run:
message = f'Found {len(stale_mounts)} stale edenfs mount point{"s" if len(stale_mounts) != 1 else ""}:\n'
for mp in sorted(stale_mounts):
message += f' {printable_bytes(mp)}\n'
message += 'Not unmounting because dry run.\n'
return CheckResult(
CheckResultType.NOT_FIXED_BECAUSE_DRY_RUN,
message)
unmounted = []
failed_to_unmount = []
# Attempt to lazy unmount all of them first. For some reason,
# lazy unmount can sometimes release any bind mounts inside.
for mp in stale_mounts:
if self._mount_table.unmount_lazy(mp):
unmounted.append(mp)
# Use a refreshed list -- it's possible MNT_DETACH succeeded on some of
# the points.
for mp in self.get_all_stale_eden_mount_points():
if self._mount_table.unmount_force(mp):
unmounted.append(mp)
else:
failed_to_unmount.append(mp)
if failed_to_unmount:
message = ''
if len(unmounted):
message += f'Successfully unmounted {len(unmounted)} mount point{"s" if len(unmounted) != 1 else ""}:\n'
for mp in sorted(unmounted):
message += f' {printable_bytes(mp)}\n'
message += f'Failed to unmount {len(failed_to_unmount)} mount point{"s" if len(failed_to_unmount) != 1 else ""}:\n'
for mp in sorted(failed_to_unmount):
message += f' {printable_bytes(mp)}\n'
return CheckResult(CheckResultType.FAILED_TO_FIX, message)
else:
message = f'Unmounted {len(stale_mounts)} stale edenfs mount point{"s" if len(stale_mounts) != 1 else ""}:\n'
for mp in sorted(unmounted):
message += f' {printable_bytes(mp)}\n'
return CheckResult(CheckResultType.FIXED, message)
def get_all_stale_eden_mount_points(self) -> List[bytes]:
all_eden_mount_points = self.get_all_eden_mount_points()
return sorted(all_eden_mount_points - set(self._active_mount_points))
def get_all_eden_mount_points(self) -> Set[bytes]:
all_system_mounts = self._mount_table.read()
return set(
mount.mount_point
for mount in all_system_mounts
if mount.device == b'edenfs' and mount.vfstype == b'fuse')
class WatchmanUsingEdenSubscriptionCheck(Check):
def __init__(self, path: str, watchman_roots: Set[str],
is_healthy: bool) -> None:

View File

@ -19,6 +19,7 @@ import sys
from . import config as config_mod
from . import debug as debug_mod
from . import doctor as doctor_mod
from . import mtab
from . import rage as rage_mod
from . import stats as stats_mod
from . import version as version_mod
@ -256,7 +257,8 @@ def do_config(args):
def do_doctor(args) -> int:
config = create_config(args)
return doctor_mod.cure_what_ails_you(config, args.dry_run, out=sys.stdout)
return doctor_mod.cure_what_ails_you(config, args.dry_run, out=sys.stdout,
mount_table=mtab.LinuxMountTable())
def do_mount(args):

71
eden/cli/mtab.py Normal file
View File

@ -0,0 +1,71 @@
#!/usr/bin/env python3
# Copyright (c) 2018-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
import abc
import logging
import subprocess
from typing import List, NamedTuple
log = logging.getLogger('eden.cli.mtab')
MountInfo = NamedTuple('MountInfo', [
('device', bytes),
('mount_point', bytes),
('vfstype', bytes),
])
class MountTable(abc.ABC):
@abc.abstractmethod
def read(self) -> List[MountInfo]:
"Returns the list of system mounts."
@abc.abstractmethod
def unmount_lazy(self, mount_point: bytes) -> bool:
"Corresponds to `umount -l` on Linux."
@abc.abstractmethod
def unmount_force(self, mount_point: bytes) -> bool:
"Corresponds to `umount -f` on Linux."
def parse_mtab(contents: bytes) -> List[MountInfo]:
mounts = []
for line in contents.splitlines():
# columns split by space or tab per man page
entries = line.split()
if len(entries) != 6:
log.warning(f'mount table line has {len(entries)} entries instead of 6')
continue
device, mount_point, vfstype, opts, freq, passno = entries
mounts.append(MountInfo(
device=device,
mount_point=mount_point,
vfstype=vfstype,
))
return mounts
class LinuxMountTable(MountTable):
def read(self) -> List[MountInfo]:
# What's the most portable mtab path? I've seen both /etc/mtab and
# /proc/self/mounts. CentOS 6 in particular does not symlink /etc/mtab
# to /proc/self/mounts so go directly to /proc/self/mounts.
# This code could eventually fall back to /proc/mounts and /etc/mtab.
with open('/proc/self/mounts', 'rb') as f:
return parse_mtab(f.read())
def unmount_lazy(self, mount_point: bytes) -> bool:
# MNT_DETACH
return 0 == subprocess.call(['sudo', 'umount', '-l', mount_point])
def unmount_force(self, mount_point: bytes) -> bool:
# MNT_FORCE
return 0 == subprocess.call(['sudo', 'umount', '-f', mount_point])

View File

@ -14,6 +14,7 @@ import subprocess
from . import debug as debug_mod
from . import doctor as doctor_mod
from . import mtab
from . import stats as stats_mod
from typing import IO
@ -66,7 +67,9 @@ def print_rpm_version(out: IO[bytes]):
def print_eden_doctor_report(config, out: IO[bytes]):
dry_run = True
doctor_output = io.StringIO()
doctor_rc = doctor_mod.cure_what_ails_you(config, dry_run, doctor_output)
doctor_rc = doctor_mod.cure_what_ails_you(
config, dry_run, doctor_output,
mount_table=mtab.LinuxMountTable())
out.write(
b'\neden doctor --dry-run (exit code %d):\n%s\n' %
(doctor_rc, doctor_output.getvalue().encode())

View File

@ -1,6 +1,7 @@
python_unittest(
name = "test",
srcs = glob(["*.py"]),
check_types = True,
deps = [
"//eden/cli:lib",
"//eden/py:py",

View File

@ -14,13 +14,15 @@ import tempfile
import unittest
from collections import OrderedDict
from textwrap import dedent
from typing import Any, Dict, Iterable, List, Optional
from typing import Any, Dict, Iterable, List, Optional, Set
from unittest.mock import call, patch
import eden.cli.doctor as doctor
import eden.cli.config as config_mod
from eden.cli.doctor import CheckResultType
from eden.cli import mtab
from fb303.ttypes import fb_status
import eden.dirstate
import facebook.eden.ttypes as eden_ttypes
class DoctorTest(unittest.TestCase):
@ -85,6 +87,10 @@ class DoctorTest(unittest.TestCase):
'client-dir': '/I_DO_NOT_EXIST2'
}
config = FakeConfig(mount_paths, is_healthy=True)
config.get_thrift_client()._mounts = [
eden_ttypes.MountInfo(mountPoint=edenfs_path1),
eden_ttypes.MountInfo(mountPoint=edenfs_path2),
]
os.mkdir(edenfs_path1)
hg_dir = os.path.join(edenfs_path1, '.hg')
@ -95,7 +101,8 @@ class DoctorTest(unittest.TestCase):
with open(dirstate, 'wb') as f:
eden.dirstate.write(f, parents, tuples_dict={}, copymap={})
exit_code = doctor.cure_what_ails_you(config, dry_run, out)
exit_code = doctor.cure_what_ails_you(
config, dry_run, out, FakeMountTable())
finally:
shutil.rmtree(tmp_dir)
@ -152,7 +159,12 @@ Number of issues that could not be fixed: 2.
'client-dir': '/I_DO_NOT_EXIST'
}
config = FakeConfig(mount_paths, is_healthy=True)
exit_code = doctor.cure_what_ails_you(config, dry_run, out)
config.get_thrift_client()._mounts = [
eden_ttypes.MountInfo(mountPoint=edenfs_path),
eden_ttypes.MountInfo(mountPoint=edenfs_path_not_watched),
]
exit_code = doctor.cure_what_ails_you(
config, dry_run, out, FakeMountTable())
self.assertEqual(
'Performing 2 checks for /path/to/eden-mount.\n'
@ -184,7 +196,8 @@ Number of issues that could not be fixed: 2.
}
}
config = FakeConfig(mount_paths, is_healthy=False)
exit_code = doctor.cure_what_ails_you(config, dry_run, out)
exit_code = doctor.cure_what_ails_you(
config, dry_run, out, FakeMountTable())
self.assertEqual(
dedent(
@ -195,7 +208,6 @@ To start Eden, run:
eden daemon
Cannot check if running latest edenfs because the daemon is not running.
Performing 3 checks for /path/to/eden-mount.
All is well.
'''
), out.getvalue()
@ -203,16 +215,6 @@ All is well.
mock_watchman.assert_has_calls(calls)
self.assertEqual(0, exit_code)
def test_fails_if_no_mount_points(self):
out = io.StringIO()
dry_run = False
mount_paths = {}
config = FakeConfig(mount_paths, is_healthy=False)
exit_code = doctor.cure_what_ails_you(config, dry_run, out)
self.assertEqual('No mounts points to assess.\n', out.getvalue())
self.assertEqual(1, exit_code)
@patch('eden.cli.doctor._call_watchman')
def test_no_issue_when_watchman_using_eden_watcher(self, mock_watchman):
self._test_watchman_watcher_check(
@ -457,6 +459,99 @@ All is well.
mock_rpm_q.assert_has_calls(calls)
class StaleMountsCheckTest(unittest.TestCase):
maxDiff = None
def setUp(self):
self.active_mounts: List[bytes] = [b'/mnt/active1', b'/mnt/active2']
self.mount_table = FakeMountTable()
self.check = doctor.StaleMountsCheck(
active_mount_points=self.active_mounts,
mount_table=self.mount_table)
def test_does_not_unmount_active_mounts(self):
self.mount_table.set_eden_mounts(self.active_mounts)
result = self.check.do_check(dry_run=False)
self.assertEqual(doctor.CheckResultType.NO_ISSUE, result.result_type)
self.assertEqual([], self.mount_table.unmount_lazy_calls)
self.assertEqual([], self.mount_table.unmount_force_calls)
def test_stale_nonactive_mount_is_unmounted(self):
self.mount_table.set_eden_mounts(self.active_mounts + [b'/mnt/stale1'])
result = self.check.do_check(dry_run=False)
self.assertEqual(doctor.CheckResultType.FIXED, result.result_type)
self.assertEqual(dedent('''\
Unmounted 1 stale edenfs mount point:
/mnt/stale1
'''), result.message)
self.assertEqual([b'/mnt/stale1'], self.mount_table.unmount_lazy_calls)
self.assertEqual([], self.mount_table.unmount_force_calls)
def test_force_unmounts_if_lazy_fails(self):
self.mount_table.set_eden_mounts(
self.active_mounts + [b'/mnt/stale1', b'/mnt/stale2'])
self.mount_table.fail_unmount_lazy(b'/mnt/stale1')
result = self.check.do_check(dry_run=False)
self.assertEqual(doctor.CheckResultType.FIXED, result.result_type)
self.assertEqual(dedent('''\
Unmounted 2 stale edenfs mount points:
/mnt/stale1
/mnt/stale2
'''), result.message)
self.assertEqual(
[b'/mnt/stale1', b'/mnt/stale2'],
self.mount_table.unmount_lazy_calls)
self.assertEqual([b'/mnt/stale1'], self.mount_table.unmount_force_calls)
def test_dry_run_prints_stale_mounts_and_does_not_unmount(self):
self.mount_table.set_eden_mounts(
self.active_mounts + [b'/mnt/stale2', b'/mnt/stale1'])
result = self.check.do_check(dry_run=True)
self.assertEqual(
doctor.CheckResultType.NOT_FIXED_BECAUSE_DRY_RUN,
result.result_type)
self.assertEqual(dedent('''\
Found 2 stale edenfs mount points:
/mnt/stale1
/mnt/stale2
Not unmounting because dry run.
'''), result.message)
self.assertEqual([], self.mount_table.unmount_lazy_calls)
self.assertEqual([], self.mount_table.unmount_force_calls)
def test_fails_if_unmount_fails(self):
self.mount_table.set_eden_mounts(
self.active_mounts + [b'/mnt/stale1', b'/mnt/stale2'])
self.mount_table.fail_unmount_lazy(b'/mnt/stale1', b'/mnt/stale2')
self.mount_table.fail_unmount_force(b'/mnt/stale1')
result = self.check.do_check(dry_run=False)
self.assertEqual(doctor.CheckResultType.FAILED_TO_FIX, result.result_type)
self.assertEqual(dedent('''\
Successfully unmounted 1 mount point:
/mnt/stale2
Failed to unmount 1 mount point:
/mnt/stale1
'''), result.message)
self.assertEqual(
[b'/mnt/stale1', b'/mnt/stale2'],
self.mount_table.unmount_lazy_calls)
self.assertEqual(
[b'/mnt/stale1', b'/mnt/stale2'],
self.mount_table.unmount_force_calls)
def test_ignores_noneden_mounts(self):
self.mount_table.set_mounts([
mtab.MountInfo(device=b'/dev/sda1', mount_point=b'/', vfstype=b'ext4'),
])
result = self.check.do_check(dry_run=False)
self.assertEqual(doctor.CheckResultType.NO_ISSUE, result.result_type)
self.assertEqual('', result.message)
self.assertEqual([], self.mount_table.unmount_lazy_calls)
self.assertEqual([], self.mount_table.unmount_force_calls)
def _create_watchman_subscription(
filewatcher_subscription: Optional[str] = None,
include_primary_subscription: bool = True,
@ -494,6 +589,20 @@ def _create_watchman_subscription(
}
class FakeClient:
def __init__(self):
self._mounts = []
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
pass
def listMounts(self):
return self._mounts
class FakeConfig:
def __init__(
self,
@ -504,6 +613,7 @@ class FakeConfig:
self._mount_paths = mount_paths
self._is_healthy = is_healthy
self._build_info = build_info if build_info else {}
self._fake_client = FakeClient()
def get_mount_paths(self) -> Iterable[str]:
return self._mount_paths.keys()
@ -517,3 +627,56 @@ class FakeConfig:
def get_server_build_info(self) -> Dict[str, str]:
return dict(self._build_info)
def get_thrift_client(self) -> FakeClient:
return self._fake_client
class FakeMountTable(mtab.MountTable):
def __init__(self):
self.mounts: List[mtab.MountInfo] = []
self.unmount_lazy_calls: List[bytes] = []
self.unmount_force_calls: List[bytes] = []
self.unmount_lazy_fails: Set[bytes] = set()
self.unmount_force_fails: Set[bytes] = set()
def set_eden_mounts(self, mounts: List[bytes]):
self.set_mounts([
mtab.MountInfo(
device=b'edenfs',
mount_point=mp,
vfstype=b'fuse')
for mp in mounts])
def set_mounts(self, mounts: List[mtab.MountInfo]):
self.mounts[:] = mounts
def fail_unmount_lazy(self, *mounts: bytes):
self.unmount_lazy_fails |= set(mounts)
def fail_unmount_force(self, *mounts: bytes):
self.unmount_force_fails |= set(mounts)
def read(self) -> List[mtab.MountInfo]:
return self.mounts
def unmount_lazy(self, mount_point: bytes) -> bool:
self.unmount_lazy_calls.append(mount_point)
if mount_point in self.unmount_lazy_fails:
return False
self._remove_mount(mount_point)
return True
def unmount_force(self, mount_point: bytes) -> bool:
self.unmount_force_calls.append(mount_point)
if mount_point in self.unmount_force_fails:
return False
self._remove_mount(mount_point)
return True
def _remove_mount(self, mount_point: bytes):
self.mounts[:] = [
mount_info for mount_info in self.mounts
if mount_info.mount_point != mount_point]

View File

@ -0,0 +1,30 @@
#!/usr/bin/env python3
#
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
import unittest
from eden.cli import mtab
class MTabTest(unittest.TestCase):
# The diffs for what is written to stdout can be large.
maxDiff = None
def test_parse_mtab(self):
contents = '''\
homedir.eden.com:/home109/chadaustin/public_html /mnt/public/chadaustin nfs rw,context=user_u:object_r:user_home_dir_t,relatime,vers=3,rsize=65536,wsize=65536,namlen=255,soft,nosharecache,proto=tcp6,timeo=100,retrans=2,sec=krb5i,mountaddr=2401:db00:fffe:1007:face:0000:0:4007,mountvers=3,mountport=635,mountproto=udp6,local_lock=none,addr=2401:db00:fffe:1007:0000:b00c:0:4007 0 0
squashfuse_ll /mnt/xarfuse/uid-0/2c071047-ns-4026531840 fuse.squashfuse_ll rw,nosuid,nodev,relatime,user_id=0,group_id=0 0 0
bogus line here
edenfs /tmp/eden_test.4rec6drf/mounts/main fuse rw,nosuid,relatime,user_id=138655,group_id=100,default_permissions,allow_other 0 0
'''
mount_infos = mtab.parse_mtab(contents)
self.assertEqual(3, len(mount_infos))
one, two, three = mount_infos
self.assertEqual('edenfs', three.device)
self.assertEqual('/tmp/eden_test.4rec6drf/mounts/main', three.mount_point)
self.assertEqual('fuse', three.vfstype)

0
eden/scripts/force-unmount-all.sh Executable file → Normal file
View File