increase shutdown timeout in integration tests and avoid EPERM when started with sudo

Summary: 30 seconds is not enough time on heavily contented systems, including CI. Bump the shutdown timeout to 120. Also, correctly send SIGKILL to the daemon process when it's been started with sudo. Reviewed By: simpkins Differential Revision: D22422784 fbshipit-source-id: dc7be0962705f1feb9643990309f570e352b68a0
2024-12-25 14:05:00 +03:00 · 2020-09-15 15:11:47 -07:00 · 2020-09-15 15:11:47 -07:00 · a303d6a399
commit a303d6a399
parent 62f06c645a
1 changed files with 16 additions and 5 deletions
--- a/eden/integration/lib/edenclient.py
+++ b/eden/integration/lib/edenclient.py
@ -10,6 +10,7 @@ import os
 import pathlib
 import shlex
 import shutil
+import signal
 import subprocess
 import sys
 import tempfile
@ -334,21 +335,31 @@ class EdenFS(object):
        process = self._process
        assert process is not None

+        # Before shutting down, get the current pid. This may differ from process.pid when
+        # edenfs is started with sudo.
+        daemon_pid = util.check_health(
+            self.get_thrift_client, self.eden_dir, timeout=30
+        ).pid
+
        # Run "edenfsctl stop" with a timeout of 0 to tell it not to wait for the EdenFS
        # process to exit.  Since we are running it directly (self._process) we will
        # need to wait on it.  Depending on exactly how it is being run the process may
        # not go away until we wait on it.
        self.run_cmd("stop", "-t", "0")

-        # Wait up to 30 seconds for EdenFS to exit.
+        # Wait up to 120 seconds for EdenFS to exit. It generally won't take this long,
+        # but under heavy contention on CI machines, 30 seconds isn't long enough.
        self._process = None
        try:
-            return_code = process.wait(timeout=30)
+            return_code = process.wait(timeout=120)
        except subprocess.TimeoutExpired:
            # EdenFS did not exit normally on its own.
-            process.kill()
-            return_code = process.wait(timeout=10)
-            raise Exception("EdenFS did not exit on its own; had to send SIGKILL")
+            if can_run_sudo() and daemon_pid is not None:
+                os.kill(daemon_pid, signal.SIGKILL)
+            else:
+                process.kill()
+            process.wait(timeout=10)
+            raise Exception("EdenFS did not shutdown on time; had to send SIGKILL")

        if return_code != 0:
            raise Exception(