sapling/tests/killdaemons.py
Matt Harbison 66cdc3e9cb killdaemons: close pid file before killing processes
With #serve enabled on Windows, I was getting occasional stacktraces like this:

  Errored test-hgweb-json.t: Traceback (most recent call last):
    File "./run-tests.py", line 724, in run
      self.tearDown()
    File "./run-tests.py", line 805, in tearDown
      killdaemons(entry)
    File "./run-tests.py", line 540, in killdaemons
      logfn=vlog)
    File "...\tests\killdaemons.py", line 94, in killdaemons
      os.unlink(pidfile)
  WindowsError: [Error 32] The process cannot access the file because it is
     being used by another process: '...\\hgtests.zmpqj3\\child80\\daemon.pids'

Adrian suggested using util.posixfile, which works.  However, the 'mercurial'
package isn't in sys.path when invoking run-tests.py, and it isn't clear that
hacking[1] it in is a good thing (especially for test-run-tests.t, which uses an
installation in a temp folder).

I tried using ProcessMonitor to figure out what the other process is, but that
monitoring slows things down to such a degree that the issue doesn't occur.  I
was ready to blame the virus scanner, but it happens without that too.

Looking at the code, I don't see anything that would have the pid file open.
But I was able to get through about 20 full test runs without an issue with this
minor change, whereas before it was pretty certain to hit this at least once in
two or three runs.

[1] https://www.mercurial-scm.org/pipermail/mercurial-devel/2017-May/097907.html
2017-05-22 21:45:02 -04:00

107 lines
3.4 KiB
Python
Executable File

#!/usr/bin/env python
from __future__ import absolute_import
import errno
import os
import signal
import sys
import time
if os.name =='nt':
import ctypes
def _check(ret, expectederr=None):
if ret == 0:
winerrno = ctypes.GetLastError()
if winerrno == expectederr:
return True
raise ctypes.WinError(winerrno)
def kill(pid, logfn, tryhard=True):
logfn('# Killing daemon process %d' % pid)
PROCESS_TERMINATE = 1
PROCESS_QUERY_INFORMATION = 0x400
SYNCHRONIZE = 0x00100000
WAIT_OBJECT_0 = 0
WAIT_TIMEOUT = 258
handle = ctypes.windll.kernel32.OpenProcess(
PROCESS_TERMINATE|SYNCHRONIZE|PROCESS_QUERY_INFORMATION,
False, pid)
if handle == 0:
_check(0, 87) # err 87 when process not found
return # process not found, already finished
try:
r = ctypes.windll.kernel32.WaitForSingleObject(handle, 100)
if r == WAIT_OBJECT_0:
pass # terminated, but process handle still available
elif r == WAIT_TIMEOUT:
_check(ctypes.windll.kernel32.TerminateProcess(handle, -1))
else:
_check(r)
# TODO?: forcefully kill when timeout
# and ?shorter waiting time? when tryhard==True
r = ctypes.windll.kernel32.WaitForSingleObject(handle, 100)
# timeout = 100 ms
if r == WAIT_OBJECT_0:
pass # process is terminated
elif r == WAIT_TIMEOUT:
logfn('# Daemon process %d is stuck')
else:
_check(r) # any error
except: #re-raises
ctypes.windll.kernel32.CloseHandle(handle) # no _check, keep error
raise
_check(ctypes.windll.kernel32.CloseHandle(handle))
else:
def kill(pid, logfn, tryhard=True):
try:
os.kill(pid, 0)
logfn('# Killing daemon process %d' % pid)
os.kill(pid, signal.SIGTERM)
if tryhard:
for i in range(10):
time.sleep(0.05)
os.kill(pid, 0)
else:
time.sleep(0.1)
os.kill(pid, 0)
logfn('# Daemon process %d is stuck - really killing it' % pid)
os.kill(pid, signal.SIGKILL)
except OSError as err:
if err.errno != errno.ESRCH:
raise
def killdaemons(pidfile, tryhard=True, remove=False, logfn=None):
if not logfn:
logfn = lambda s: s
# Kill off any leftover daemon processes
try:
pids = []
with open(pidfile) as fp:
for line in fp:
try:
pid = int(line)
if pid <= 0:
raise ValueError
except ValueError:
logfn('# Not killing daemon process %s - invalid pid'
% line.rstrip())
continue
pids.append(pid)
for pid in pids:
kill(pid, logfn, tryhard)
if remove:
os.unlink(pidfile)
except IOError:
pass
if __name__ == '__main__':
if len(sys.argv) > 1:
path, = sys.argv[1:]
else:
path = os.environ["DAEMON_PIDS"]
killdaemons(path)