sapling/eden/fs/cli/config.py

#!/usr/bin/env python3
#
# Copyright (c) 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.

import collections
import configparser
import errno
import fcntl
import hashlib
import json
import os
import shutil
import stat
import subprocess
import tempfile
import time

from . import util
import eden.thrift
import facebook.eden.ttypes as eden_ttypes
from fb303.ttypes import fb_status
import thrift

# These are INI files that hold config data.
SYSTEM_CONFIG_DIR = '/etc/eden/config.d'
USER_CONFIG = '.edenrc'

# These paths are relative to the user's client directory.
CLIENTS_DIR = 'clients'
STORAGE_DIR = 'storage'
ROCKS_DB_DIR = os.path.join(STORAGE_DIR, 'rocks-db')
CONFIG_JSON = 'config.json'

# These are files in a client directory.
LOCAL_CONFIG = 'edenrc'
SNAPSHOT = 'SNAPSHOT'

# In our test environment, when we need to run as root, we
# may need to launch via a helper script that is whitelisted
# by the local sudo configuration.
SUDO_HELPER = '/var/www/scripts/testinfra/run_eden.sh'


class EdenStartError(Exception):
    pass


class UsageError(Exception):
    pass


class Config:
    def __init__(self, config_dir, system_config_dir, home_dir):
        self._config_dir = config_dir
        self._system_config_dir = system_config_dir
        if not self._system_config_dir:
            self._system_config_dir = SYSTEM_CONFIG_DIR
        self._user_config_path = os.path.join(home_dir, USER_CONFIG)

    def get_rc_files(self):
        result = []
        if os.path.isdir(self._system_config_dir):
            result = os.listdir(self._system_config_dir)
            result = [os.path.join(self._system_config_dir, f) for f in result]
            result.sort()
        result.append(self._user_config_path)
        return result

    def get_repository_list(self, parser=None):
        result = []
        if not parser:
            parser = configparser.ConfigParser()
            parser.read(self.get_rc_files())
        for section in parser.sections():
            header = section.split(' ')
            if len(header) == 2 and header[0] == 'repository':
                result.append(header[1])
        return sorted(result)

    def get_repo_data(self, name):
        '''
        Returns a dictionary containing the metadata and the bind mounts of the
        repository specified by name and raises an exception if the repository
        data could not be found. The expected keys in the returned dictionary
        are: 'repo_type', 'repo_source', 'bind-mounts'.
        '''
        result = {}
        rc_files = self.get_rc_files()
        parser = configparser.ConfigParser()
        parser.read(rc_files)
        repository_header = 'repository ' + name
        if repository_header in parser:
            result.update(parser[repository_header])
        bind_mounts_header = 'bindmounts ' + name
        if bind_mounts_header in parser:
            # Convert the ConfigParser section into a dict so it is JSON
            # serializable for the `eden info` command.
            result['bind-mounts'] = dict(parser[bind_mounts_header].items())
        else:
            result['bind-mounts'] = {}
        if not result:
            raise Exception('repository %s does not exist.' % name)
        return result

    def get_mount_paths(self):
        '''Return the paths of the set mount points stored in config.json'''
        return self._get_directory_map().keys()

    def get_all_client_config_info(self):
        info = {}
        for path in self.get_mount_paths():
            info[path] = self.get_client_info(path)

        return info

    def get_thrift_client(self):
        return eden.thrift.create_thrift_client(self._config_dir)

    def get_client_info(self, path):
        path = os.path.realpath(path)
        client_dir = self._get_client_dir_for_mount_point(path)
        repo_name = self._get_repo_name(client_dir)
        repo_data = self.get_repo_data(repo_name)

        snapshot_file = os.path.join(client_dir, SNAPSHOT)
        with open(snapshot_file) as f:
            snapshot = f.read().strip()

        return collections.OrderedDict([
            ['bind-mounts', repo_data['bind-mounts']],
            ['mount', path],
            ['snapshot', snapshot],
            ['client-dir', client_dir],
        ])

    def checkout(self, path, snapshot_id):
        '''Switch the active snapshot id for a given client'''
        with self.get_thrift_client() as client:
            client.checkOutRevision(path, snapshot_id)

    def add_repository(self, name, repo_type, source, with_buck=False):
        # Check if repository already exists
        with ConfigUpdater(self._user_config_path) as config:
            if name in self.get_repository_list(config):
                raise UsageError('''\
repository %s already exists. You will need to edit the ~/.edenrc config file \
by hand to make changes to the repository or remove it.''' % name)

            # Create a directory for client to store repository metadata
            bind_mounts = {}
            if with_buck:
                bind_mount_name = 'buck-out'
                bind_mounts[bind_mount_name] = 'buck-out'

            # Add repository to INI file
            config['repository ' + name] = {'type': repo_type, 'path': source}
            if bind_mounts:
                config['bindmounts ' + name] = bind_mounts
            config.save()

    def clone(self, repo_name, path, snapshot_id):
        if path in self._get_directory_map():
            raise Exception('mount path %s already exists.' % path)

        # Make sure that path is a valid destination for the clone.
        st = None
        try:
            st = os.stat(path)
        except OSError as ex:
            if ex.errno == errno.ENOENT:
                # Note that this could also throw if path is /a/b/c and /a
                # exists, but it is a file.
                util.mkdir_p(path)
            else:
                raise

        # Note that st will be None if `mkdir_p` was run in the catch block.
        if st:
            if stat.S_ISDIR(st.st_mode):
                # If an existing directory was specified, then verify it is
                # empty.
                if len(os.listdir(path)) > 0:
                    raise OSError(errno.ENOTEMPTY, os.strerror(errno.ENOTEMPTY),
                                  path)
            else:
                # Throw because it exists, but it is not a directory.
                raise OSError(errno.ENOTDIR, os.strerror(errno.ENOTDIR), path)

        # Create client directory
        dir_name = hashlib.sha1(path.encode('utf-8')).hexdigest()
        client_dir = os.path.join(self._get_clients_dir(), dir_name)
        util.mkdir_p(client_dir)

        # Store repository name in local edenrc config file
        self._store_repo_name(client_dir, repo_name)

        # Store snapshot ID
        if snapshot_id:
            client_snapshot = os.path.join(client_dir, SNAPSHOT)
            with open(client_snapshot, 'w') as f:
                f.write(snapshot_id + '\n')
        else:
            raise Exception('snapshot id not provided')

        # Create bind mounts directories
        repo_data = self.get_repo_data(repo_name)
        bind_mounts_dir = os.path.join(client_dir, 'bind-mounts')
        util.mkdir_p(bind_mounts_dir)
        for mount in repo_data['bind-mounts']:
            util.mkdir_p(os.path.join(bind_mounts_dir, mount))

        # Create local storage directory
        self._get_or_create_write_dir(dir_name)

        # Prepare to mount
        mount_info = eden_ttypes.MountInfo(mountPoint=path,
                                           edenClientPath=client_dir)
        with self.get_thrift_client() as client:
            client.mount(mount_info)

        # Add mapping of mount path to client directory in config.json
        self._add_path_to_directory_map(path, dir_name)

    def mount(self, path):
        # Load the config info for this client, to make sure we
        # know about the client.
        path = os.path.realpath(path)
        client_dir = self._get_client_dir_for_mount_point(path)
        self._get_repo_name(client_dir)

        # Make sure the mount path exists
        util.mkdir_p(path)

        # Ask eden to mount the path
        mount_info = eden_ttypes.MountInfo(mountPoint=path,
                                           edenClientPath=client_dir)
        with self.get_thrift_client() as client:
            client.mount(mount_info)

    def unmount(self, path, delete_config=True):
        path = os.path.realpath(path)
        with self.get_thrift_client() as client:
            client.unmount(path)

        if delete_config:
            shutil.rmtree(self._get_client_dir_for_mount_point(path))
            self._remove_path_from_directory_map(path)

            # Delete the now empty mount point
            os.rmdir(path)

    def check_health(self):
        '''
        Get the status of the edenfs daemon.

        Returns a HealthStatus object containing health information.
        '''
        pid = None
        status = fb_status.DEAD
        try:
            with self.get_thrift_client() as client:
                pid = client.getPid()
                status = client.getStatus()
        except eden.thrift.EdenNotRunningError:
            return HealthStatus(fb_status.DEAD, pid=None,
                                detail='edenfs not running')
        except thrift.Thrift.TException as ex:
            detail = 'error talking to edenfs: ' + str(ex)
            return HealthStatus(status, pid, detail)

        status_name = fb_status._VALUES_TO_NAMES.get(status)
        detail = 'edenfs running (pid {}); status is {}'.format(
            pid, status_name)
        return HealthStatus(status, pid, detail)

    def spawn(self,
              daemon_binary,
              extra_args=None,
              gdb=False,
              gdb_args=None,
              foreground=False):
        '''
        Start edenfs.

        If foreground is True this function never returns (edenfs is exec'ed
        directly in the current process).

        Otherwise, this function waits for edenfs to become healthy, and
        returns a HealthStatus object.  On error an exception will be raised.
        '''
        # Check to see if edenfs is already running
        health_info = self.check_health()
        if health_info.is_healthy():
            raise EdenStartError('edenfs is already running (pid {})'.format(
                health_info.pid))

        # Run the eden server.
        cmd = [daemon_binary, '--edenDir', self._config_dir,
               '--systemConfigDir', self._system_config_dir,
               '--configPath', self._user_config_path, ]
        if gdb:
            gdb_args = gdb_args or []
            cmd = ['gdb'] + gdb_args + ['--args'] + cmd
            foreground = True
        if extra_args:
            cmd.extend(extra_args)

        eden_env = self._build_eden_environment()

        # Run edenfs using sudo, unless we already have root privileges,
        # or the edenfs binary is setuid root.
        if os.geteuid() != 0:
            s = os.stat(daemon_binary)
            if not (s.st_uid == 0 and (s.st_mode & stat.S_ISUID)):
                # We need to run edenfs under sudo
                sudo_cmd = ['/usr/bin/sudo']
                # Add environment variable settings
                # Depending on the sudo configuration, these may not
                # necessarily get passed through automatically even when
                # using "sudo -E".
                for key, value in eden_env.items():
                    sudo_cmd.append('%s=%s' % (key, value))

                if ('SANDCASTLE' in os.environ) and os.path.exists(SUDO_HELPER):
                    cmd = [SUDO_HELPER] + cmd
                cmd = sudo_cmd + cmd

        if foreground:
            # This call does not return
            os.execve(cmd[0], cmd, eden_env)

        # Open the log file
        log_path = self.get_log_path()
        util.mkdir_p(os.path.dirname(log_path))
        log_file = open(log_path, 'a')
        startup_msg = time.strftime('%Y-%m-%d %H:%M:%S: starting edenfs\n')
        log_file.write(startup_msg)

        # Start edenfs
        proc = subprocess.Popen(cmd, env=eden_env, preexec_fn=os.setsid,
                                stdout=log_file, stderr=log_file)
        log_file.close()

        # Wait for edenfs to start
        return self._wait_for_daemon_healthy(proc)

    def _wait_for_daemon_healthy(self, proc):
        '''
        Wait for edenfs to become healthy.
        '''
        def check_health():
            # Check the thrift status
            health_info = self.check_health()
            if health_info.is_healthy():
                return health_info

            # Make sure that edenfs is still running
            status = proc.poll()
            if status is not None:
                if status < 0:
                    msg = 'terminated with signal {}'.format(-status)
                else:
                    msg = 'exit status {}'.format(status)
                raise EdenStartError('edenfs exited before becoming healthy: ' +
                                     msg)

            # Still starting
            return None

        timeout_ex = EdenStartError('timed out waiting for edenfs to become '
                                    'healthy')
        return util.poll_until(check_health, timeout=5, timeout_ex=timeout_ex)

    def get_log_path(self):
        return os.path.join(self._config_dir, 'logs', 'edenfs.log')

    def _build_eden_environment(self):
        # Reset $PATH to the following contents, so that everyone has the
        # same consistent settings.
        path_dirs = [
            '/usr/local/bin',
            '/bin',
            '/usr/bin',
        ]

        eden_env = {
            'PATH': ':'.join(path_dirs),
        }

        # Preserve the following environment settings
        preserve = [
            'USER',
            'LOGNAME',
            'HOME',
            'EMAIL',
            'NAME',
            # When we import data from mercurial, the remotefilelog extension
            # may need to SSH to a remote mercurial server to get the file
            # contents.  Preserve SSH environment variables needed to do this.
            'SSH_AUTH_SOCK',
            'SSH_AGENT_PID',
        ]

        for name, value in os.environ.items():
            # Preserve any environment variable starting with "TESTPILOT_".
            # TestPilot uses a few environment variables to keep track of
            # processes started during test runs, so it can track down and kill
            # runaway processes that weren't cleaned up by the test itself.
            # We want to make sure this behavior works during the eden
            # integration tests.
            if name.startswith('TESTPILOT_'):
                eden_env[name] = value
            elif name in preserve:
                eden_env[name] = value
            else:
                # Drop any environment variable not matching the above cases
                pass

        return eden_env

    def get_or_create_path_to_rocks_db(self):
        rocks_db_dir = os.path.join(self._config_dir, ROCKS_DB_DIR)
        return util.mkdir_p(rocks_db_dir)

    def _store_repo_name(self, client_dir, repo_name):
        config_path = os.path.join(client_dir, LOCAL_CONFIG)
        with ConfigUpdater(config_path) as config:
            config['repository'] = {'name': repo_name}
            config.save()

    def _get_repo_name(self, client_dir):
        config = os.path.join(client_dir, LOCAL_CONFIG)
        parser = configparser.ConfigParser()
        parser.read(config)
        name = parser.get('repository', 'name')
        if name:
            return name
        raise Exception('could not find repository for %s' % client_dir)

    def _get_directory_map(self):
        '''
        Parse config.json which holds a mapping of mount paths to their
        respective client directory and return contents in a dictionary.
        '''
        directory_map = os.path.join(self._config_dir, CONFIG_JSON)
        if os.path.isfile(directory_map):
            with open(directory_map) as f:
                return json.load(f)
        return {}

    def _add_path_to_directory_map(self, path, dir_name):
        config_data = self._get_directory_map()
        if path in config_data:
            raise Exception('mount path %s already exists.' % path)
        config_data[path] = dir_name
        self._write_directory_map(config_data)

    def _remove_path_from_directory_map(self, path):
        config_data = self._get_directory_map()
        if path in config_data:
            del config_data[path]
            self._write_directory_map(config_data)

    def _write_directory_map(self, config_data):
        directory_map = os.path.join(self._config_dir, CONFIG_JSON)
        with open(directory_map, 'w') as f:
            json.dump(config_data, f, indent=2, sort_keys=True)
            f.write('\n')

    def _get_client_dir_for_mount_point(self, path):
        # The caller is responsible for making sure the path is already
        # a normalized, absolute path.
        assert os.path.isabs(path)

        config_data = self._get_directory_map()
        if path not in config_data:
            raise Exception('could not find mount path %s' % path)
        return os.path.join(self._get_clients_dir(), config_data[path])

    def _get_clients_dir(self):
        return os.path.join(self._config_dir, CLIENTS_DIR)

    def _get_or_create_write_dir(self, dir_name):
        ''' Returns the local storage directory that is used to
            hold writes that are not part of a snapshot '''
        local_dir = os.path.join(self._get_clients_dir(),
                                 dir_name, 'local')
        return util.mkdir_p(local_dir)


class HealthStatus(object):
    def __init__(self, status, pid, detail):
        self.status = status
        self.pid = pid  # The process ID, or None if not running
        self.detail = detail  # a human-readable message

    def is_healthy(self):
        return self.status == fb_status.ALIVE


class ConfigUpdater(object):
    '''
    A helper class to safely update an eden config file.

    This acquires a lock on the config file, reads it in, and then provide APIs
    to save it back.  This ensures that another process cannot change the file
    in between the time that we read it and when we write it back.

    This also saves the file to a temporary name first, then renames it into
    place, so that the main config file is always in a good state, and never
    has partially written contents.
    '''
    def __init__(self, path):
        self.path = path
        self._lock_path = self.path + '.lock'
        self._lock_file = None
        self.config = configparser.ConfigParser()
        self.config.read(self.path)

        # Acquire a lock.
        # This makes sure that another process can't modify the config in the
        # middle of a read-modify-write operation.  (We can't stop a user
        # from manually editing the file while we work, but we can stop
        # other eden CLI processes.)
        self._acquire_lock()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        self.close()

    def __del__(self):
        self.close()

    def sections(self):
        return self.config.sections()

    def __getitem__(self, key):
        return self.config[key]

    def __setitem__(self, key, value):
        self.config[key] = value

    def _acquire_lock(self):
        while True:
            self._lock_file = open(self._lock_path, 'w+')
            fcntl.flock(self._lock_file.fileno(), fcntl.LOCK_EX)
            # The original creator of the lock file will unlink it when
            # it is finished.  Make sure we grab the lock on the file still on
            # disk, and not an unlinked file.
            st1 = os.fstat(self._lock_file.fileno())
            st2 = os.lstat(self._lock_path)
            if st1.st_dev == st2.st_dev and st1.st_ino == st2.st_ino:
                # We got the real lock
                return

            # We acquired a lock on an old deleted file.
            # Close it, and try to acquire the current lock file again.
            self._lock_file.close()
            self._lock_file = None
            continue

    def _unlock(self):
        # Remove the file on disk before we unlock it.
        # This way processes currently waiting in _acquire_lock() that already
        # opened our lock file will see that it isn't the current file on disk
        # once they acquire the lock.
        os.unlink(self._lock_path)
        self._lock_file.close()
        self._lock_file = None

    def close(self):
        if self._lock_file is not None:
            self._unlock()

    def save(self):
        if self._lock_file is None:
            raise Exception('Cannot save the config without holding the lock')

        try:
            st = os.stat(self.path)
            perms = (st.st_mode & 0o777)
        except OSError as ex:
            if ex.errno != errno.ENOENT:
                raise
            perms = 0o644

        # Write the contents to a temporary file first, then atomically rename
        # it to the desired destination.  This makes sure the .edenrc file
        # always has valid contents at all points in time.
        prefix = USER_CONFIG + '.tmp.'
        dirname = os.path.dirname(self.path)
        tmpf = tempfile.NamedTemporaryFile('w', dir=dirname, prefix=prefix,
                                           delete=False)
        try:
            self.config.write(tmpf)
            tmpf.close()
            os.chmod(tmpf.name, perms)
            os.rename(tmpf.name, self.path)
        except BaseException:
            # Remove temporary file on error
            try:
                os.unlink(tmpf.name)
            except Exception:
                pass
            raise


def _verify_mount_point(mount_point):
    if os.path.isdir(mount_point):
        return
    parent_dir = os.path.dirname(mount_point)
    if os.path.isdir(parent_dir):
        os.mkdir(mount_point)
    else:
        raise Exception(
            ('%s must be a directory in order to mount a client at %s. ' +
             'If this is the correct location, run `mkdir -p %s` to create ' +
             'the directory.') % (parent_dir, mount_point, parent_dir))