unmount stale edenfs mounts in eden doctor

Summary: If the Eden process dies or malfunctions, it's possible to end up with stale edenfs mounts on the system. Change `eden doctor` to correct them. Reviewed By: simpkins Differential Revision: D6659193 fbshipit-source-id: d9fcf2e68663295e4f43b2c11fd4503a1dfac038
2024-10-06 14:58:03 +03:00 · 2018-01-19 10:46:34 -08:00 · 2018-01-19 10:46:34 -08:00 · b6b2a08998
commit b6b2a08998
parent c2eb5f3f3f
9 changed files with 375 additions and 26 deletions
--- a/eden/cli/TARGETS
+++ b/eden/cli/TARGETS
@ -19,6 +19,7 @@ python_library(
        "config.py",
        "configinterpolator.py",
        "doctor.py",
+        "mtab.py",
        "rage.py",
        "stats.py",
        "stats_print.py",
--- a/eden/cli/doctor.py
+++ b/eden/cli/doctor.py
@ -7,6 +7,7 @@
 # LICENSE file in the root directory of this source tree. An additional grant
 # of patent rights can be found in the PATENTS file in the same directory.

+import abc
 import binascii
 import json
 import os
@ -18,6 +19,7 @@ from textwrap import dedent
 from typing import Dict, List, Set, TextIO, Union
 from . import config as config_mod
 from . import version
+from . import mtab


 class CheckResultType(Enum):
@ -36,19 +38,18 @@ class CheckResult:
        self.message = message


-class Check:
+class Check(abc.ABC):
+    @abc.abstractmethod
    def do_check(self, dry_run: bool) -> CheckResult:
        pass


 def cure_what_ails_you(
-    config: config_mod.Config, dry_run: bool, out: TextIO
+    config: config_mod.Config,
+    dry_run: bool,
+    out: TextIO,
+    mount_table: mtab.MountTable,
 ) -> int:
-    mount_paths = config.get_mount_paths()
-    if len(mount_paths) == 0:
-        out.write('No mounts points to assess.\n')
-        return 1
-
    is_healthy = config.check_health().is_healthy()
    if not is_healthy:
        out.write(
@ -62,9 +63,16 @@ def cure_what_ails_you(
        '''
            )
        )
+        active_mount_points: List[str] = []
+    else:
+        with config.get_thrift_client() as client:
+            active_mount_points = [
+                mount.mountPoint for mount in client.listMounts()]

    # This list is a mix of messages to print to stdout and checks to perform.
-    checks_and_messages: List[Union[str, Check]] = []
+    checks_and_messages: List[Union[str, Check]] = [
+        StaleMountsCheck(active_mount_points, mount_table),
+    ]
    if is_healthy:
        checks_and_messages.append(EdenfsIsLatest(config))
    else:
@ -74,7 +82,7 @@ def cure_what_ails_you(
        )

    watchman_roots = _get_watch_roots_for_watchman()
-    for mount_path in mount_paths:
+    for mount_path in active_mount_points:
        # For now, we assume that each mount_path is actively mounted. We should
        # update the listMounts() Thrift API to return information that notes
        # whether a mount point is active and use it here.
@ -143,6 +151,76 @@ def cure_what_ails_you(
        return 0


+def printable_bytes(b: bytes) -> str:
+    return b.decode('utf-8', 'backslashreplace')
+
+
+class StaleMountsCheck(Check):
+    def __init__(self, active_mount_points: List[str],
+                 mount_table: mtab.MountTable) -> None:
+        self._active_mount_points = active_mount_points
+        self._mount_table = mount_table
+
+    def do_check(self, dry_run: bool) -> CheckResult:
+        stale_mounts = self.get_all_stale_eden_mount_points()
+        if not stale_mounts:
+            return CheckResult(CheckResultType.NO_ISSUE, '')
+
+        if dry_run:
+            message = f'Found {len(stale_mounts)} stale edenfs mount point{"s" if len(stale_mounts) != 1 else ""}:\n'
+            for mp in sorted(stale_mounts):
+                message += f'  {printable_bytes(mp)}\n'
+            message += 'Not unmounting because dry run.\n'
+
+            return CheckResult(
+                CheckResultType.NOT_FIXED_BECAUSE_DRY_RUN,
+                message)
+
+        unmounted = []
+        failed_to_unmount = []
+
+        # Attempt to lazy unmount all of them first. For some reason,
+        # lazy unmount can sometimes release any bind mounts inside.
+        for mp in stale_mounts:
+            if self._mount_table.unmount_lazy(mp):
+                unmounted.append(mp)
+
+        # Use a refreshed list -- it's possible MNT_DETACH succeeded on some of
+        # the points.
+        for mp in self.get_all_stale_eden_mount_points():
+            if self._mount_table.unmount_force(mp):
+                unmounted.append(mp)
+            else:
+                failed_to_unmount.append(mp)
+
+        if failed_to_unmount:
+            message = ''
+            if len(unmounted):
+                message += f'Successfully unmounted {len(unmounted)} mount point{"s" if len(unmounted) != 1 else ""}:\n'
+                for mp in sorted(unmounted):
+                    message += f'  {printable_bytes(mp)}\n'
+            message += f'Failed to unmount {len(failed_to_unmount)} mount point{"s" if len(failed_to_unmount) != 1 else ""}:\n'
+            for mp in sorted(failed_to_unmount):
+                message += f'  {printable_bytes(mp)}\n'
+            return CheckResult(CheckResultType.FAILED_TO_FIX, message)
+        else:
+            message = f'Unmounted {len(stale_mounts)} stale edenfs mount point{"s" if len(stale_mounts) != 1 else ""}:\n'
+            for mp in sorted(unmounted):
+                message += f'  {printable_bytes(mp)}\n'
+            return CheckResult(CheckResultType.FIXED, message)
+
+    def get_all_stale_eden_mount_points(self) -> List[bytes]:
+        all_eden_mount_points = self.get_all_eden_mount_points()
+        return sorted(all_eden_mount_points - set(self._active_mount_points))
+
+    def get_all_eden_mount_points(self) -> Set[bytes]:
+        all_system_mounts = self._mount_table.read()
+        return set(
+            mount.mount_point
+            for mount in all_system_mounts
+            if mount.device == b'edenfs' and mount.vfstype == b'fuse')
+
+
 class WatchmanUsingEdenSubscriptionCheck(Check):
    def __init__(self, path: str, watchman_roots: Set[str],
                 is_healthy: bool) -> None:
--- a/eden/cli/main.py
+++ b/eden/cli/main.py
@ -19,6 +19,7 @@ import sys
 from . import config as config_mod
 from . import debug as debug_mod
 from . import doctor as doctor_mod
+from . import mtab
 from . import rage as rage_mod
 from . import stats as stats_mod
 from . import version as version_mod
@ -256,7 +257,8 @@ def do_config(args):

 def do_doctor(args) -> int:
    config = create_config(args)
-    return doctor_mod.cure_what_ails_you(config, args.dry_run, out=sys.stdout)
+    return doctor_mod.cure_what_ails_you(config, args.dry_run, out=sys.stdout,
+                                         mount_table=mtab.LinuxMountTable())


 def do_mount(args):
--- a/eden/cli/mtab.py
+++ b/eden/cli/mtab.py
@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+# Copyright (c) 2018-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree. An additional grant
+# of patent rights can be found in the PATENTS file in the same directory.
+
+import abc
+import logging
+import subprocess
+from typing import List, NamedTuple
+
+
+log = logging.getLogger('eden.cli.mtab')
+
+
+MountInfo = NamedTuple('MountInfo', [
+    ('device', bytes),
+    ('mount_point', bytes),
+    ('vfstype', bytes),
+])
+
+
+class MountTable(abc.ABC):
+    @abc.abstractmethod
+    def read(self) -> List[MountInfo]:
+        "Returns the list of system mounts."
+
+    @abc.abstractmethod
+    def unmount_lazy(self, mount_point: bytes) -> bool:
+        "Corresponds to `umount -l` on Linux."
+
+    @abc.abstractmethod
+    def unmount_force(self, mount_point: bytes) -> bool:
+        "Corresponds to `umount -f` on Linux."
+
+
+def parse_mtab(contents: bytes) -> List[MountInfo]:
+    mounts = []
+    for line in contents.splitlines():
+        # columns split by space or tab per man page
+        entries = line.split()
+        if len(entries) != 6:
+            log.warning(f'mount table line has {len(entries)} entries instead of 6')
+            continue
+        device, mount_point, vfstype, opts, freq, passno = entries
+        mounts.append(MountInfo(
+            device=device,
+            mount_point=mount_point,
+            vfstype=vfstype,
+        ))
+    return mounts
+
+
+class LinuxMountTable(MountTable):
+    def read(self) -> List[MountInfo]:
+        # What's the most portable mtab path? I've seen both /etc/mtab and
+        # /proc/self/mounts.  CentOS 6 in particular does not symlink /etc/mtab
+        # to /proc/self/mounts so go directly to /proc/self/mounts.
+        # This code could eventually fall back to /proc/mounts and /etc/mtab.
+        with open('/proc/self/mounts', 'rb') as f:
+            return parse_mtab(f.read())
+
+    def unmount_lazy(self, mount_point: bytes) -> bool:
+        # MNT_DETACH
+        return 0 == subprocess.call(['sudo', 'umount', '-l', mount_point])
+
+    def unmount_force(self, mount_point: bytes) -> bool:
+        # MNT_FORCE
+        return 0 == subprocess.call(['sudo', 'umount', '-f', mount_point])
--- a/eden/cli/rage.py
+++ b/eden/cli/rage.py
@ -14,6 +14,7 @@ import subprocess

 from . import debug as debug_mod
 from . import doctor as doctor_mod
+from . import mtab
 from . import stats as stats_mod
 from typing import IO

@ -66,7 +67,9 @@ def print_rpm_version(out: IO[bytes]):
 def print_eden_doctor_report(config, out: IO[bytes]):
    dry_run = True
    doctor_output = io.StringIO()
-    doctor_rc = doctor_mod.cure_what_ails_you(config, dry_run, doctor_output)
+    doctor_rc = doctor_mod.cure_what_ails_you(
+        config, dry_run, doctor_output,
+        mount_table=mtab.LinuxMountTable())
    out.write(
        b'\neden doctor --dry-run (exit code %d):\n%s\n' %
        (doctor_rc, doctor_output.getvalue().encode())
--- a/eden/cli/test/TARGETS
+++ b/eden/cli/test/TARGETS
@ -1,6 +1,7 @@
 python_unittest(
    name = "test",
    srcs = glob(["*.py"]),
+    check_types = True,
    deps = [
        "//eden/cli:lib",
        "//eden/py:py",
--- a/eden/cli/test/doctor_test.py
+++ b/eden/cli/test/doctor_test.py
@ -14,13 +14,15 @@ import tempfile
 import unittest
 from collections import OrderedDict
 from textwrap import dedent
-from typing import Any, Dict, Iterable, List, Optional
+from typing import Any, Dict, Iterable, List, Optional, Set
 from unittest.mock import call, patch
 import eden.cli.doctor as doctor
 import eden.cli.config as config_mod
 from eden.cli.doctor import CheckResultType
+from eden.cli import mtab
 from fb303.ttypes import fb_status
 import eden.dirstate
+import facebook.eden.ttypes as eden_ttypes


 class DoctorTest(unittest.TestCase):
@ -85,6 +87,10 @@ class DoctorTest(unittest.TestCase):
                'client-dir': '/I_DO_NOT_EXIST2'
            }
            config = FakeConfig(mount_paths, is_healthy=True)
+            config.get_thrift_client()._mounts = [
+                eden_ttypes.MountInfo(mountPoint=edenfs_path1),
+                eden_ttypes.MountInfo(mountPoint=edenfs_path2),
+            ]

            os.mkdir(edenfs_path1)
            hg_dir = os.path.join(edenfs_path1, '.hg')
@ -95,7 +101,8 @@ class DoctorTest(unittest.TestCase):
            with open(dirstate, 'wb') as f:
                eden.dirstate.write(f, parents, tuples_dict={}, copymap={})

-            exit_code = doctor.cure_what_ails_you(config, dry_run, out)
+            exit_code = doctor.cure_what_ails_you(
+                config, dry_run, out, FakeMountTable())
        finally:
            shutil.rmtree(tmp_dir)

@ -152,7 +159,12 @@ Number of issues that could not be fixed: 2.
            'client-dir': '/I_DO_NOT_EXIST'
        }
        config = FakeConfig(mount_paths, is_healthy=True)
-        exit_code = doctor.cure_what_ails_you(config, dry_run, out)
+        config.get_thrift_client()._mounts = [
+            eden_ttypes.MountInfo(mountPoint=edenfs_path),
+            eden_ttypes.MountInfo(mountPoint=edenfs_path_not_watched),
+        ]
+        exit_code = doctor.cure_what_ails_you(
+            config, dry_run, out, FakeMountTable())

        self.assertEqual(
            'Performing 2 checks for /path/to/eden-mount.\n'
@ -184,7 +196,8 @@ Number of issues that could not be fixed: 2.
            }
        }
        config = FakeConfig(mount_paths, is_healthy=False)
-        exit_code = doctor.cure_what_ails_you(config, dry_run, out)
+        exit_code = doctor.cure_what_ails_you(
+            config, dry_run, out, FakeMountTable())

        self.assertEqual(
            dedent(
@ -195,7 +208,6 @@ To start Eden, run:
    eden daemon

 Cannot check if running latest edenfs because the daemon is not running.
-Performing 3 checks for /path/to/eden-mount.
 All is well.
 '''
            ), out.getvalue()
@ -203,16 +215,6 @@ All is well.
        mock_watchman.assert_has_calls(calls)
        self.assertEqual(0, exit_code)

-    def test_fails_if_no_mount_points(self):
-        out = io.StringIO()
-        dry_run = False
-        mount_paths = {}
-        config = FakeConfig(mount_paths, is_healthy=False)
-
-        exit_code = doctor.cure_what_ails_you(config, dry_run, out)
-        self.assertEqual('No mounts points to assess.\n', out.getvalue())
-        self.assertEqual(1, exit_code)
-
    @patch('eden.cli.doctor._call_watchman')
    def test_no_issue_when_watchman_using_eden_watcher(self, mock_watchman):
        self._test_watchman_watcher_check(
@ -457,6 +459,99 @@ All is well.
        mock_rpm_q.assert_has_calls(calls)


+class StaleMountsCheckTest(unittest.TestCase):
+    maxDiff = None
+
+    def setUp(self):
+        self.active_mounts: List[bytes] = [b'/mnt/active1', b'/mnt/active2']
+        self.mount_table = FakeMountTable()
+        self.check = doctor.StaleMountsCheck(
+            active_mount_points=self.active_mounts,
+            mount_table=self.mount_table)
+
+    def test_does_not_unmount_active_mounts(self):
+        self.mount_table.set_eden_mounts(self.active_mounts)
+        result = self.check.do_check(dry_run=False)
+        self.assertEqual(doctor.CheckResultType.NO_ISSUE, result.result_type)
+        self.assertEqual([], self.mount_table.unmount_lazy_calls)
+        self.assertEqual([], self.mount_table.unmount_force_calls)
+
+    def test_stale_nonactive_mount_is_unmounted(self):
+        self.mount_table.set_eden_mounts(self.active_mounts + [b'/mnt/stale1'])
+        result = self.check.do_check(dry_run=False)
+        self.assertEqual(doctor.CheckResultType.FIXED, result.result_type)
+        self.assertEqual(dedent('''\
+            Unmounted 1 stale edenfs mount point:
+              /mnt/stale1
+        '''), result.message)
+        self.assertEqual([b'/mnt/stale1'], self.mount_table.unmount_lazy_calls)
+        self.assertEqual([], self.mount_table.unmount_force_calls)
+
+    def test_force_unmounts_if_lazy_fails(self):
+        self.mount_table.set_eden_mounts(
+            self.active_mounts + [b'/mnt/stale1', b'/mnt/stale2'])
+        self.mount_table.fail_unmount_lazy(b'/mnt/stale1')
+
+        result = self.check.do_check(dry_run=False)
+        self.assertEqual(doctor.CheckResultType.FIXED, result.result_type)
+        self.assertEqual(dedent('''\
+            Unmounted 2 stale edenfs mount points:
+              /mnt/stale1
+              /mnt/stale2
+        '''), result.message)
+        self.assertEqual(
+            [b'/mnt/stale1', b'/mnt/stale2'],
+            self.mount_table.unmount_lazy_calls)
+        self.assertEqual([b'/mnt/stale1'], self.mount_table.unmount_force_calls)
+
+    def test_dry_run_prints_stale_mounts_and_does_not_unmount(self):
+        self.mount_table.set_eden_mounts(
+            self.active_mounts + [b'/mnt/stale2', b'/mnt/stale1'])
+        result = self.check.do_check(dry_run=True)
+        self.assertEqual(
+            doctor.CheckResultType.NOT_FIXED_BECAUSE_DRY_RUN,
+            result.result_type)
+        self.assertEqual(dedent('''\
+            Found 2 stale edenfs mount points:
+              /mnt/stale1
+              /mnt/stale2
+            Not unmounting because dry run.
+        '''), result.message)
+        self.assertEqual([], self.mount_table.unmount_lazy_calls)
+        self.assertEqual([], self.mount_table.unmount_force_calls)
+
+    def test_fails_if_unmount_fails(self):
+        self.mount_table.set_eden_mounts(
+            self.active_mounts + [b'/mnt/stale1', b'/mnt/stale2'])
+        self.mount_table.fail_unmount_lazy(b'/mnt/stale1', b'/mnt/stale2')
+        self.mount_table.fail_unmount_force(b'/mnt/stale1')
+
+        result = self.check.do_check(dry_run=False)
+        self.assertEqual(doctor.CheckResultType.FAILED_TO_FIX, result.result_type)
+        self.assertEqual(dedent('''\
+            Successfully unmounted 1 mount point:
+              /mnt/stale2
+            Failed to unmount 1 mount point:
+              /mnt/stale1
+        '''), result.message)
+        self.assertEqual(
+            [b'/mnt/stale1', b'/mnt/stale2'],
+            self.mount_table.unmount_lazy_calls)
+        self.assertEqual(
+            [b'/mnt/stale1', b'/mnt/stale2'],
+            self.mount_table.unmount_force_calls)
+
+    def test_ignores_noneden_mounts(self):
+        self.mount_table.set_mounts([
+            mtab.MountInfo(device=b'/dev/sda1', mount_point=b'/', vfstype=b'ext4'),
+        ])
+        result = self.check.do_check(dry_run=False)
+        self.assertEqual(doctor.CheckResultType.NO_ISSUE, result.result_type)
+        self.assertEqual('', result.message)
+        self.assertEqual([], self.mount_table.unmount_lazy_calls)
+        self.assertEqual([], self.mount_table.unmount_force_calls)
+
+
 def _create_watchman_subscription(
    filewatcher_subscription: Optional[str] = None,
    include_primary_subscription: bool = True,
@ -494,6 +589,20 @@ def _create_watchman_subscription(
    }


+class FakeClient:
+    def __init__(self):
+        self._mounts = []
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_traceback):
+        pass
+
+    def listMounts(self):
+        return self._mounts
+
+
 class FakeConfig:
    def __init__(
        self,
@ -504,6 +613,7 @@ class FakeConfig:
        self._mount_paths = mount_paths
        self._is_healthy = is_healthy
        self._build_info = build_info if build_info else {}
+        self._fake_client = FakeClient()

    def get_mount_paths(self) -> Iterable[str]:
        return self._mount_paths.keys()
@ -517,3 +627,56 @@ class FakeConfig:

    def get_server_build_info(self) -> Dict[str, str]:
        return dict(self._build_info)
+
+    def get_thrift_client(self) -> FakeClient:
+        return self._fake_client
+
+
+class FakeMountTable(mtab.MountTable):
+    def __init__(self):
+        self.mounts: List[mtab.MountInfo] = []
+        self.unmount_lazy_calls: List[bytes] = []
+        self.unmount_force_calls: List[bytes] = []
+        self.unmount_lazy_fails: Set[bytes] = set()
+        self.unmount_force_fails: Set[bytes] = set()
+
+    def set_eden_mounts(self, mounts: List[bytes]):
+        self.set_mounts([
+            mtab.MountInfo(
+                device=b'edenfs',
+                mount_point=mp,
+                vfstype=b'fuse')
+            for mp in mounts])
+
+    def set_mounts(self, mounts: List[mtab.MountInfo]):
+        self.mounts[:] = mounts
+
+    def fail_unmount_lazy(self, *mounts: bytes):
+        self.unmount_lazy_fails |= set(mounts)
+
+    def fail_unmount_force(self, *mounts: bytes):
+        self.unmount_force_fails |= set(mounts)
+
+    def read(self) -> List[mtab.MountInfo]:
+        return self.mounts
+
+    def unmount_lazy(self, mount_point: bytes) -> bool:
+        self.unmount_lazy_calls.append(mount_point)
+
+        if mount_point in self.unmount_lazy_fails:
+            return False
+        self._remove_mount(mount_point)
+        return True
+
+    def unmount_force(self, mount_point: bytes) -> bool:
+        self.unmount_force_calls.append(mount_point)
+
+        if mount_point in self.unmount_force_fails:
+            return False
+        self._remove_mount(mount_point)
+        return True
+
+    def _remove_mount(self, mount_point: bytes):
+        self.mounts[:] = [
+            mount_info for mount_info in self.mounts
+            if mount_info.mount_point != mount_point]
--- a/eden/cli/test/mtab_test.py
+++ b/eden/cli/test/mtab_test.py
@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree. An additional grant
+# of patent rights can be found in the PATENTS file in the same directory.
+
+import unittest
+from eden.cli import mtab
+
+
+class MTabTest(unittest.TestCase):
+    # The diffs for what is written to stdout can be large.
+    maxDiff = None
+
+    def test_parse_mtab(self):
+        contents = '''\
+homedir.eden.com:/home109/chadaustin/public_html /mnt/public/chadaustin nfs rw,context=user_u:object_r:user_home_dir_t,relatime,vers=3,rsize=65536,wsize=65536,namlen=255,soft,nosharecache,proto=tcp6,timeo=100,retrans=2,sec=krb5i,mountaddr=2401:db00:fffe:1007:face:0000:0:4007,mountvers=3,mountport=635,mountproto=udp6,local_lock=none,addr=2401:db00:fffe:1007:0000:b00c:0:4007 0 0
+squashfuse_ll /mnt/xarfuse/uid-0/2c071047-ns-4026531840 fuse.squashfuse_ll rw,nosuid,nodev,relatime,user_id=0,group_id=0 0 0
+bogus line here
+edenfs /tmp/eden_test.4rec6drf/mounts/main fuse rw,nosuid,relatime,user_id=138655,group_id=100,default_permissions,allow_other 0 0
+'''
+        mount_infos = mtab.parse_mtab(contents)
+        self.assertEqual(3, len(mount_infos))
+        one, two, three = mount_infos
+        self.assertEqual('edenfs', three.device)
+        self.assertEqual('/tmp/eden_test.4rec6drf/mounts/main', three.mount_point)
+        self.assertEqual('fuse', three.vfstype)
--- a/eden/scripts/force-unmount-all.sh
+++ b/eden/scripts/force-unmount-all.sh