sapling/contrib/chg/util.c
Jun Wu 5d9fba271f chg: restart server automatically if handshake takes too long
Summary:
Usually the handshake process is pretty quick (<0.01 seconds):

  chg: debug: 0.000148 try connect to ...
  chg: debug: 0.000338 connected to server
  chg: debug: 0.000359 initialize context buffer with size 4096
  chg: debug: 0.008225 hello received: ...
  chg: debug: 0.008269 capflags=0x7b03, pid=31941
  chg: debug: 0.008282 request setprocname, block size 17
  chg: debug: 0.008316 request attachio
  chg: debug: 0.008978 response read from channel r, size 4
  chg: debug: 0.009045 request chdir, block size 45
  chg: debug: 0.009092 version matched (6119653365548183087)

However, we have seen some OSX cases where the handshake and basically
everything takes much longer:

  chg: debug: 0.000139 try connect to ...
  chg: debug: 0.000297 connected to server
  chg: debug: 0.000321 initialize context buffer with size 4096
  chg: debug: 0.192316 hello received: ...
  chg: debug: 0.192362 capflags=0x7b03, pid=55634
  chg: debug: 0.192373 request setprocname, block size 17
  chg: debug: 0.192420 request attachio
  chg: debug: 0.229009 response read from channel r, size 4
  chg: debug: 0.229072 request chdir, block size 34
  chg: debug: 0.229111 version matched (6119653365548183087)

(See P59677258 for the full paste)

If restart the chg server, the problem goes away and commands will be fast
again.

Unfortunately I'm not sure about the root cause of the problem. Maybe it's
Python's GC doing something very expensive? Maybe it's OSX thinking the server
process is "inactive" and put it to some state that's very slow to recover? Or maybe
it's some weird 3rdparty service?

For now, what we do know are:
- The slowness *sometimes* reproduces with chg.
- The slowness goes away if chg server is restarted.

As a last resort, detect the slowness by measuring the handshake time, then
restart the server accordingly. To avoid an infinite restart loop on slow machines,
the restart can only happen once.

The threshold is set to 0.05 seconds, which is roughly 5x the normal value, and
can be disabled by `CHGSTARTTIMECHECK=0`.

Reviewed By: phillco

Differential Revision: D8294468

fbshipit-source-id: 75246ea4d872045664e7feadb0acc47dfa1d8eae
2018-06-05 22:02:50 -07:00

192 lines
4.4 KiB
C

/*
* Utility functions
*
* Copyright (c) 2011 Yuya Nishihara <yuya@tcha.org>
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2 or any later version.
*/
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include "util.h"
static int colorenabled = 0;
static inline void fsetcolor(FILE* fp, const char* code) {
if (!colorenabled)
return;
fprintf(fp, "\033[%sm", code);
}
static void vabortmsgerrno(int no, const char* fmt, va_list args) {
fsetcolor(stderr, "1;31");
fputs("chg: abort: ", stderr);
vfprintf(stderr, fmt, args);
if (no != 0)
fprintf(stderr, " (errno = %d, %s)", no, strerror(no));
fsetcolor(stderr, "");
fputc('\n', stderr);
exit(255);
}
void abortmsg(const char* fmt, ...) {
va_list args;
va_start(args, fmt);
vabortmsgerrno(0, fmt, args);
va_end(args);
}
void abortmsgerrno(const char* fmt, ...) {
int no = errno;
va_list args;
va_start(args, fmt);
vabortmsgerrno(no, fmt, args);
va_end(args);
}
static int debugmsgenabled = 0;
static double debugstart = 0;
static double now() {
struct timeval t;
gettimeofday(&t, NULL);
return t.tv_usec / 1e6 + t.tv_sec;
}
double chg_now() {
return now();
}
void enablecolor(void) {
colorenabled = 1;
}
void enabledebugmsg(void) {
debugmsgenabled = 1;
debugstart = now();
}
void debugmsg(const char* fmt, ...) {
if (!debugmsgenabled)
return;
va_list args;
va_start(args, fmt);
fsetcolor(stderr, "1;30");
fprintf(stderr, "chg: debug: %4.6f ", now() - debugstart);
vfprintf(stderr, fmt, args);
fsetcolor(stderr, "");
fputc('\n', stderr);
va_end(args);
}
void fchdirx(int dirfd) {
int r = fchdir(dirfd);
if (r == -1)
abortmsgerrno("failed to fchdir");
}
void fsetcloexec(int fd) {
int flags = fcntl(fd, F_GETFD);
if (flags < 0)
abortmsgerrno("cannot get flags of fd %d", fd);
if (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) < 0)
abortmsgerrno("cannot set flags of fd %d", fd);
}
void* chg_mallocx(size_t size) {
void* result = malloc(size);
if (!result)
abortmsg("failed to malloc");
return result;
}
void* chg_reallocx(void* ptr, size_t size) {
void* result = realloc(ptr, size);
if (!result)
abortmsg("failed to realloc");
return result;
}
/*
* Execute a shell command in mostly the same manner as system(), with the
* give environment variables, after chdir to the given cwd. Returns a status
* code compatible with the Python subprocess module.
*/
int runshellcmd(const char* cmd, const char* envp[], const char* cwd) {
enum { F_SIGINT = 1, F_SIGQUIT = 2, F_SIGMASK = 4, F_WAITPID = 8 };
unsigned int doneflags = 0;
int status = 0;
struct sigaction newsa, oldsaint, oldsaquit;
sigset_t oldmask;
/* block or mask signals just as system() does */
memset(&newsa, 0, sizeof(newsa));
newsa.sa_handler = SIG_IGN;
newsa.sa_flags = 0;
if (sigemptyset(&newsa.sa_mask) < 0)
goto done;
if (sigaction(SIGINT, &newsa, &oldsaint) < 0)
goto done;
doneflags |= F_SIGINT;
if (sigaction(SIGQUIT, &newsa, &oldsaquit) < 0)
goto done;
doneflags |= F_SIGQUIT;
if (sigaddset(&newsa.sa_mask, SIGCHLD) < 0)
goto done;
if (sigprocmask(SIG_BLOCK, &newsa.sa_mask, &oldmask) < 0)
goto done;
doneflags |= F_SIGMASK;
pid_t pid = fork();
if (pid < 0)
goto done;
if (pid == 0) {
sigaction(SIGINT, &oldsaint, NULL);
sigaction(SIGQUIT, &oldsaquit, NULL);
sigprocmask(SIG_SETMASK, &oldmask, NULL);
if (cwd && chdir(cwd) < 0)
_exit(127);
const char* argv[] = {"sh", "-c", cmd, NULL};
if (envp) {
execve("/bin/sh", (char**)argv, (char**)envp);
} else {
execv("/bin/sh", (char**)argv);
}
_exit(127);
} else {
if (waitpid(pid, &status, 0) < 0)
goto done;
doneflags |= F_WAITPID;
}
done:
if (doneflags & F_SIGINT)
sigaction(SIGINT, &oldsaint, NULL);
if (doneflags & F_SIGQUIT)
sigaction(SIGQUIT, &oldsaquit, NULL);
if (doneflags & F_SIGMASK)
sigprocmask(SIG_SETMASK, &oldmask, NULL);
/* no way to report other errors, use 127 (= shell termination) */
if (!(doneflags & F_WAITPID))
return 127;
if (WIFEXITED(status))
return WEXITSTATUS(status);
if (WIFSIGNALED(status))
return -WTERMSIG(status);
return 127;
}