sapling/eden/fs/cli/doctor/__init__.py
Adam Simpkins a26b8e9930 update edenfsctl to report the compile-time version
Summary:
Update most locations in edenfsctl to report the version number that was built
into the edenfsctl binary at build time, rather than querying the RPM database
for the installed RPM version.  The RPM behavior only works on to RedHat-based
Linux systems, and the currently running process doesn't necessarily have to
have come from the RPM.

The one place where we do still attempt to print the RPM version is in the
`edenfsctl rage` report, when running on Linux.

Reviewed By: wez

Differential Revision: D21000168

fbshipit-source-id: 0fb747e71b6950d74f22c458efa0dfcbd45270bd
2020-04-22 12:48:48 -07:00

580 lines
20 KiB
Python

#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.
# pyre-strict
import os
import shlex
from pathlib import Path
from textwrap import dedent
from typing import Dict, Optional
from eden.fs.cli import (
config as config_mod,
filesystem,
mtab,
proc_utils as proc_utils_mod,
ui,
version,
)
from eden.fs.cli.config import EdenCheckout, EdenInstance
from facebook.eden.ttypes import MountState
from fb303_core.ttypes import fb303_status
from . import (
check_filesystems,
check_hg,
check_os,
check_rogue_edenfs,
check_stale_mounts,
check_watchman,
)
from .problem import (
DryRunFixer,
FixableProblem,
Problem,
ProblemFixer,
ProblemSeverity,
ProblemTracker,
)
# working_directory_was_stale may be set to True by the CLI main module
# if the original working directory referred to a stale eden mount point.
working_directory_was_stale = False
def cure_what_ails_you(
instance: EdenInstance,
dry_run: bool,
mount_table: Optional[mtab.MountTable] = None,
fs_util: Optional[filesystem.FsUtil] = None,
proc_utils: Optional[proc_utils_mod.ProcUtils] = None,
out: Optional[ui.Output] = None,
) -> int:
return EdenDoctor(
instance, dry_run, mount_table, fs_util, proc_utils, out
).cure_what_ails_you()
class CheckoutInfo:
def __init__(
self,
instance: EdenInstance,
path: Path,
running_state_dir: Optional[Path] = None,
configured_state_dir: Optional[Path] = None,
state: Optional[MountState] = None,
) -> None:
self.instance = instance
self.path = path
self.running_state_dir = running_state_dir
self.configured_state_dir = configured_state_dir
self.state = state
def get_checkout(self) -> EdenCheckout:
state_dir = (
self.running_state_dir
if self.running_state_dir is not None
else self.configured_state_dir
)
assert state_dir is not None
return EdenCheckout(self.instance, self.path, state_dir)
class EdenDoctorChecker:
"""EdenDoctorChecker is a base class for EdenDoctor, and only supports
running checks, without reporting or fixing problems.
"""
instance: EdenInstance
mount_table: mtab.MountTable
fs_util: filesystem.FsUtil
proc_utils: proc_utils_mod.ProcUtils
tracker: ProblemTracker
out: ui.Output
# Setting run_system_wide_checks to False causes EdenDoctor to skip checks that
# try to detect system-wide problems (e.g., stale mount points, old OS/kernel
# version, low disk space, etc.). It is often desirable to disable this during
# integration tests, since tests normally only want to report issues with their
# specific EdenFS instance under test.
run_system_wide_checks: bool = True
def __init__(
self,
instance: EdenInstance,
tracker: ProblemTracker,
mount_table: Optional[mtab.MountTable] = None,
fs_util: Optional[filesystem.FsUtil] = None,
proc_utils: Optional[proc_utils_mod.ProcUtils] = None,
out: Optional[ui.Output] = None,
) -> None:
self.instance = instance
self.tracker = tracker
self.mount_table = mount_table if mount_table is not None else mtab.new()
self.fs_util = fs_util if fs_util is not None else filesystem.LinuxFsUtil()
self.proc_utils = proc_utils if proc_utils is not None else proc_utils_mod.new()
self.out = out if out is not None else ui.get_output()
def run_checks(self) -> None:
self.check_working_directory()
if self.run_system_wide_checks:
# check OS type, kernel version etc.
check_os.run_operating_system_checks(self.tracker, self.instance, self.out)
# check multiple edenfs running with some rogue stale PIDs
check_rogue_edenfs.check_many_edenfs_are_running(
self.tracker, self.proc_utils
)
status = self.instance.check_health()
if status.status == fb303_status.ALIVE:
self.run_normal_checks()
elif status.status == fb303_status.STARTING:
self.tracker.add_problem(EdenfsStarting())
elif status.status == fb303_status.STOPPING:
self.tracker.add_problem(EdenfsStopping())
elif status.status == fb303_status.DEAD:
self.run_edenfs_not_healthy_checks()
else:
self.tracker.add_problem(EdenfsUnexpectedStatus(status))
def check_working_directory(self) -> None:
problem = check_for_working_directory_problem()
if problem:
self.tracker.add_problem(problem)
def run_edenfs_not_healthy_checks(self) -> None:
configured_mounts = self.instance.get_mount_paths()
if configured_mounts:
self.tracker.add_problem(EdenfsNotHealthy())
else:
self.tracker.using_edenfs = False
return
if self.run_system_wide_checks:
check_stale_mounts.check_for_stale_mounts(self.tracker, self.mount_table)
check_filesystems.check_disk_usage(
self.tracker,
list(configured_mounts),
self.instance,
fs_util=self.fs_util,
)
def _get_checkouts_info(self) -> Dict[Path, CheckoutInfo]:
checkouts: Dict[Path, CheckoutInfo] = {}
# Get information about the checkouts currently known to the running
# edenfs process
with self.instance.get_thrift_client() as client:
for mount in client.listMounts():
# Old versions of edenfs did not return a mount state field.
# These versions only listed running mounts, so treat the mount state
# as running in this case.
mount_state = (
mount.state if mount.state is not None else MountState.RUNNING
)
path = Path(os.fsdecode(mount.mountPoint))
checkout = CheckoutInfo(
self.instance,
path,
running_state_dir=Path(os.fsdecode(mount.edenClientPath)),
state=mount_state,
)
checkouts[path] = checkout
# Get information about the checkouts listed in the config file
for configured_checkout in self.instance.get_checkouts():
checkout_info = checkouts.get(configured_checkout.path, None)
if checkout_info is None:
checkout_info = CheckoutInfo(self.instance, configured_checkout.path)
checkout_info.configured_state_dir = configured_checkout.state_dir
checkouts[checkout_info.path] = checkout_info
checkout_info.configured_state_dir = configured_checkout.state_dir
return checkouts
def run_normal_checks(self) -> None:
check_edenfs_version(self.tracker, self.instance)
checkouts = self._get_checkouts_info()
if self.run_system_wide_checks:
check_filesystems.check_eden_directory(self.tracker, self.instance)
check_stale_mounts.check_for_stale_mounts(self.tracker, self.mount_table)
check_filesystems.check_disk_usage(
self.tracker,
list(self.instance.get_mount_paths()),
self.instance,
fs_util=self.fs_util,
)
watchman_info = check_watchman.pre_check()
for path, checkout in sorted(checkouts.items()):
self.out.writeln(f"Checking {path}")
try:
check_mount(self.tracker, checkout, watchman_info)
except Exception as ex:
self.tracker.add_problem(
Problem(f"unexpected error while checking {path}: {ex}")
)
class EdenDoctor(EdenDoctorChecker):
fixer: ProblemFixer
dry_run: bool
def __init__(
self,
instance: EdenInstance,
dry_run: bool,
mount_table: Optional[mtab.MountTable] = None,
fs_util: Optional[filesystem.FsUtil] = None,
proc_utils: Optional[proc_utils_mod.ProcUtils] = None,
out: Optional[ui.Output] = None,
) -> None:
self.dry_run = dry_run
out = out if out is not None else ui.get_output()
if dry_run:
self.fixer = DryRunFixer(out)
else:
self.fixer = ProblemFixer(out)
super().__init__(
instance,
tracker=self.fixer,
mount_table=mount_table,
fs_util=fs_util,
proc_utils=proc_utils,
out=out,
)
def cure_what_ails_you(self) -> int:
self.run_checks()
return self._report_problems()
def _report_problems(self) -> int:
fixer = self.fixer
out = self.out
if not self.dry_run:
self.instance.log_sample(
"eden_doctor",
num_problems=fixer.num_problems,
problems=fixer.problem_types,
)
if fixer.num_problems == 0:
if not fixer.using_edenfs:
out.writeln("EdenFS is not in use.")
else:
out.writeln("No issues detected.", fg=out.GREEN)
return 0
def problem_count(num: int) -> str:
if num == 1:
return "1 problem"
return f"{num} problems"
if self.dry_run:
out.writeln(
f"Discovered {problem_count(fixer.num_problems)} during --dry-run",
fg=out.YELLOW,
)
return 1
if fixer.num_fixed_problems:
out.writeln(
f"Successfully fixed {problem_count(fixer.num_fixed_problems)}.",
fg=out.YELLOW,
)
if fixer.num_failed_fixes:
out.writeln(
f"Failed to fix {problem_count(fixer.num_failed_fixes)}.", fg=out.RED
)
if fixer.num_manual_fixes:
if fixer.num_manual_fixes == 1:
msg = f"1 issue requires manual attention."
else:
msg = f"{fixer.num_manual_fixes} issues require manual attention."
out.writeln(msg, fg=out.YELLOW)
if fixer.num_fixed_problems == fixer.num_problems:
return 0
out.write(
"Ask in the Eden Users group if you need help fixing issues with Eden:\n"
"https://fb.facebook.com/groups/eden.users/\n"
)
return 1
class EdenfsNotHealthy(Problem):
def __init__(self) -> None:
super().__init__(
"Eden is not running.", remediation="To start Eden, run:\n\n eden start"
)
class EdenfsStarting(Problem):
def __init__(self) -> None:
remediation = '''\
Please wait for edenfs to finish starting.
If Eden seems to be taking too long to start you can try restarting it
with "eden restart"'''
super().__init__("Eden is currently still starting.", remediation=remediation)
class EdenfsStopping(Problem):
def __init__(self) -> None:
remediation = """\
Either wait for edenfs to exit, or to forcibly kill Eden, run:
eden stop --kill"""
super().__init__("Eden is currently shutting down.", remediation=remediation)
class EdenfsUnexpectedStatus(Problem):
def __init__(self, status: config_mod.HealthStatus) -> None:
msg = f"Unexpected health status reported by edenfs: {status}"
remediation = 'You can try restarting Eden by running "eden restart"'
super().__init__(msg, remediation=remediation)
def check_mount(
tracker: ProblemTracker,
checkout: CheckoutInfo,
watchman_info: check_watchman.WatchmanCheckInfo,
) -> None:
if checkout.state is None:
# This checkout is configured but not currently running.
tracker.add_problem(CheckoutNotMounted(checkout))
elif checkout.state == MountState.RUNNING:
check_running_mount(tracker, checkout, watchman_info)
elif checkout.state in (
MountState.UNINITIALIZED,
MountState.INITIALIZING,
MountState.INITIALIZED,
MountState.STARTING,
):
tracker.add_problem(
Problem(
f"Checkout {checkout.path} is currently starting up.",
f"If this checkout does not successfully finish starting soon, "
'try running "eden restart"',
severity=ProblemSeverity.ADVICE,
)
)
elif checkout.state in (
MountState.SHUTTING_DOWN,
MountState.SHUT_DOWN,
MountState.DESTROYING,
):
tracker.add_problem(
Problem(
f"Checkout {checkout.path} is currently shutting down.",
f"If this checkout does not successfully finish shutting down soon, "
'try running "eden restart"',
)
)
elif checkout.state == MountState.FUSE_ERROR:
# TODO: We could potentially try automatically unmounting and remounting.
# In general mounts shouldn't remain in this state for long, so we probably
# don't need to worry too much about this case.
tracker.add_problem(
Problem(f"Checkout {checkout.path} encountered a FUSE error while mounting")
)
else:
tracker.add_problem(
Problem(
f"edenfs reports that checkout {checkout.path} is in "
"unknown state {checkout.state}"
)
)
def check_running_mount(
tracker: ProblemTracker,
checkout_info: CheckoutInfo,
watchman_info: check_watchman.WatchmanCheckInfo,
) -> None:
if checkout_info.configured_state_dir is None:
tracker.add_problem(CheckoutNotConfigured(checkout_info))
return
elif checkout_info.configured_state_dir != checkout_info.running_state_dir:
tracker.add_problem(CheckoutConfigurationMismatch(checkout_info))
return
checkout = checkout_info.get_checkout()
try:
config = checkout.get_config()
except Exception as ex:
tracker.add_problem(
Problem(f"error parsing the configuration for {checkout_info.path}: {ex}")
)
# Just skip the remaining checks.
# Most of them rely on values from the configuration.
return
check_filesystems.check_using_nfs_path(tracker, checkout.path)
check_watchman.check_active_mount(tracker, str(checkout.path), watchman_info)
if config.scm_type == "hg":
check_hg.check_hg(tracker, checkout)
class CheckoutNotConfigured(Problem):
def __init__(self, checkout_info: CheckoutInfo) -> None:
msg = (
f"Checkout {checkout_info.path} is running but not "
"listed in Eden's configuration file."
)
# TODO: Maybe we could use some better suggestions here, depending on
# common cases that might lead to this situation. (At the moment I believe this
# can occur if `eden clone` fails to set up the .hg directory after mounting.)
quoted_path = shlex.quote(str(checkout_info.path))
remediation = (
f'Running "eden unmount {quoted_path}" will unmount this checkout.'
)
super().__init__(msg, remediation)
class CheckoutConfigurationMismatch(Problem):
def __init__(self, checkout_info: CheckoutInfo) -> None:
msg = f"""\
The running configuration for {checkout_info.path} is different than "
the on-disk state in Eden's configuration file:
- Running state directory: {checkout_info.running_state_dir}
- Configured state directory: {checkout_info.configured_state_dir}"""
remediation = f"""\
Running `eden restart` will cause Eden to restart and use the data from the
on-disk configuration."""
super().__init__(msg, remediation)
class CheckoutNotMounted(FixableProblem):
_instance: EdenInstance
_mount_path: Path
def __init__(self, checkout_info: CheckoutInfo) -> None:
self._instance = checkout_info.instance
self._mount_path = checkout_info.path
def description(self) -> str:
return f"{self._mount_path} is not currently mounted"
def dry_run_msg(self) -> str:
return f"Would remount {self._mount_path}"
def start_msg(self) -> str:
return f"Remounting {self._mount_path}"
def perform_fix(self) -> None:
try:
self._instance.mount(str(self._mount_path), False)
except Exception as ex:
if "is too short for header" in str(ex):
raise Exception(
f"""\
{ex}
{self._mount_path} appears to have been corrupted.
This can happen if your devserver was hard-rebooted.
To recover, you will need to remove and reclone the repo.
You will lose uncommitted work or shelves, but all your local
commits are safe.
If you have non-trivial uncommitted work that you need to recover
you may be able to restore it from your system backup.
To remove the corrupted repo, run: `eden rm {self._mount_path}`"""
)
raise
class StaleWorkingDirectory(Problem):
def __init__(self, msg: str) -> None:
remediation = f"""\
Run "cd / && cd -" to update your shell's working directory."""
super().__init__(msg, remediation)
def check_for_working_directory_problem() -> Optional[Problem]:
# Report an issue if the working directory points to a stale mount point
if working_directory_was_stale:
msg = "Your current working directory appears to be a stale Eden mount point"
return StaleWorkingDirectory(msg)
# If the $PWD environment variable is set, confirm that it points our current
# working directory.
#
# This helps catch problems where the current working directory has been replaced
# with a new mount point but the user hasn't cd'ed into the new mount yet. For
# instance this can happen if the user cd'ed into a checkout directory before Eden
# was running, and then started Eden. The user will still need to cd again to see
# the Eden checkout contents.
pwd = os.environ.get("PWD")
if pwd is None:
# If $PWD isn't set we can't check anything else
return None
try:
pwd_stat = os.stat(pwd)
cwd_stat = os.stat(".")
except Exception:
# If we fail to stat either directory just ignore the error and don't report a
# problem for now. If we ever see a real issue in practice hit this scenario we
# can add an appropriate error message at that point in time.
#
# We've already handled stale mounts above, and `os.stat()` checks normally
# succeed in this case anyway.
#
# Users can get into situations where they don't have permissions to
# stat the current working directory in some cases, but this should be very
# rare and they will probably be able to identify the issue themselves in this
# case.
return None
if (pwd_stat.st_dev, pwd_stat.st_ino) == (cwd_stat.st_dev, cwd_stat.st_ino):
# Everything looks okay
return None
msg = """\
Your current working directory is out-of-date.
This can happen if you have (re)started Eden but your shell is still pointing to
the old directory from before the Eden checkouts were mounted.
"""
return StaleWorkingDirectory(msg)
def check_edenfs_version(tracker: ProblemTracker, instance: EdenInstance) -> None:
rver, release = instance.get_running_version_parts()
if not rver or not release:
# This could be a dev build that returns the empty
# string for both of these values.
return
running_version = version.format_eden_version((rver, release))
installed_version = version.get_current_version()
if running_version == installed_version:
return
tracker.add_problem(
Problem(
dedent(
f"""\
The version of Eden that is installed on your machine is:
fb-eden-{installed_version}.x86_64
but the version of Eden that is currently running is:
fb-eden-{running_version}.x86_64
Consider running `eden restart` to migrate to the newer version, which
may have important bug fixes or performance improvements.
"""
),
severity=ProblemSeverity.ADVICE,
)
)