chg: remove hashstate and validation logic

Summary:
After D7840236 stack, it's possible to have a single chg server that handles
different [extensions] configurations. The 'validate' step and 'hashstate' were
mainly designed to detect changes of [extensions], the source code of the
extensions. That becomes unnecessary with the latest design.

Remove them to simplify the logic.

chg no longer creates symlink `server2 -> server2-hash`. Bump the name
to `server3` to explicitly break compatibility.

Reviewed By: xavierd

Differential Revision: D16866463

fbshipit-source-id: 5e1d00e6f895d9b8ead0bcabefcea11756f57c94
This commit is contained in:
Jun Wu 2019-08-28 19:22:46 -07:00 committed by Facebook Github Bot
parent 1a6bc5ff62
commit 74d5e02eb8
3 changed files with 10 additions and 290 deletions

View File

@ -106,7 +106,7 @@ static void setcmdserveropts(struct cmdserveropts* opts) {
}
const char* basename = (envsockname) ? envsockname : sockdir;
const char* sockfmt = (envsockname) ? "%s" : "%s/server2";
const char* sockfmt = (envsockname) ? "%s" : "%s/server3";
r = snprintf(opts->sockname, sizeof(opts->sockname), sockfmt, basename);
if (r < 0 || (size_t)r >= sizeof(opts->sockname))
abortmsg("too long TMPDIR or CHGSOCKNAME (r = %d)", r);
@ -174,10 +174,10 @@ static hgclient_t* retryconnectcmdserver(
for (unsigned int i = 0; !timeoutsec || i < timeoutsec * 100; i++) {
hgclient_t* hgc = hgc_open(opts->initsockname);
if (hgc) {
debugmsg("rename %s to %s", opts->initsockname, opts->sockname);
int r = rename(opts->initsockname, opts->sockname);
debugmsg("unlink %s", opts->initsockname);
int r = unlink(opts->initsockname);
if (r != 0)
abortmsgerrno("cannot rename");
abortmsgerrno("cannot unlink");
return hgc;
}
@ -252,43 +252,6 @@ static void killcmdserver(const struct cmdserveropts* opts) {
}
}
/* Run instructions sent from the server like unlink and set redirect path
* Return 1 if reconnect is needed, otherwise 0 */
static int runinstructions(struct cmdserveropts* opts, const char** insts) {
int needreconnect = 0;
if (!insts)
return needreconnect;
assert(insts);
opts->redirectsockname[0] = '\0';
const char** pinst;
for (pinst = insts; *pinst; pinst++) {
debugmsg("instruction: %s", *pinst);
if (strncmp(*pinst, "unlink ", 7) == 0) {
unlink(*pinst + 7);
} else if (strncmp(*pinst, "redirect ", 9) == 0) {
int r = snprintf(
opts->redirectsockname,
sizeof(opts->redirectsockname),
"%s",
*pinst + 9);
if (r < 0 || r >= (int)sizeof(opts->redirectsockname))
abortmsg("redirect path is too long (%d)", r);
needreconnect = 1;
} else if (strncmp(*pinst, "exit ", 5) == 0) {
int n = 0;
if (sscanf(*pinst + 5, "%d", &n) != 1)
abortmsg("cannot read the exit code");
exit(n);
} else if (strcmp(*pinst, "reconnect") == 0) {
needreconnect = 1;
} else {
abortmsg("unknown instruction: %s", *pinst);
}
}
return needreconnect;
}
/*
* Test whether the command is unsupported or not. This is not designed to
* cover all cases. But it's fast, does not depend on the server and does
@ -389,12 +352,6 @@ int chg_main(int argc, const char* argv[], const char* envp[]) {
#endif
if (!needreconnect) {
hgc_setenv(hgc, envp);
double validate_start = hgc_elapsed(hgc);
const char** insts = hgc_validate(hgc, argv + 1, argc - 1);
double validate_interval = hgc_elapsed(hgc) - validate_start;
needreconnect = runinstructions(&opts, insts);
free(insts);
debugmsg("validate took %.4f seconds", validate_interval);
}
if (!needreconnect)
break;

View File

@ -34,7 +34,6 @@ enum {
CAP_CHDIR = 0x0200,
CAP_SETENV = 0x0800,
CAP_SETUMASK = 0x1000,
CAP_VALIDATE = 0x2000,
CAP_SETPROCNAME = 0x4000,
};
@ -50,7 +49,6 @@ static const cappair_t captable[] = {
{"chdir", CAP_CHDIR},
{"setenv", CAP_SETENV},
{"setumask", CAP_SETUMASK},
{"validate", CAP_VALIDATE},
{"setprocname", CAP_SETPROCNAME},
{NULL, 0}, /* terminator */
};
@ -519,39 +517,6 @@ unsigned long long hgc_versionhash(const hgclient_t* hgc) {
return hgc->versionhash;
}
/*!
* Send command line arguments to let the server load the repo config and check
* whether it can process our request directly or not.
* Make sure hgc_setenv is called before calling this.
*
* @return - NULL, the server believes it can handle our request, or does not
* support "validate" command.
* - a list of strings, the server probably cannot handle our request
* and it sent instructions telling us what to do next. See
* chgserver.py for possible instruction formats.
* the list should be freed by the caller.
* the last string is guaranteed to be NULL.
*/
const char**
hgc_validate(hgclient_t* hgc, const char* const args[], size_t argsize) {
assert(hgc);
if (!(hgc->capflags & CAP_VALIDATE))
return NULL;
packcmdargs(&hgc->ctx, args, argsize);
writeblockrequest(hgc, "validate");
handleresponse(hgc);
/* the server returns '\0' if it can handle our request */
if (hgc->ctx.datasize <= 1)
return NULL;
/* make sure the buffer is '\0' terminated */
enlargecontext(&hgc->ctx, hgc->ctx.datasize + 1);
hgc->ctx.data[hgc->ctx.datasize] = '\0';
return unpackcmdargsnul(&hgc->ctx);
}
/*!
* Execute the specified Mercurial command
*

View File

@ -57,117 +57,6 @@ from .i18n import _
_log = commandserver.log
def _hashlist(items):
"""return sha1 hexdigest for a list"""
return hashlib.sha1(str(items)).hexdigest()
# sensitive environment variables affecting confighash
_envre = re.compile(
r"""\A(?:
CHGHG
|HG(?:EMITWARNINGS|ENCODING)?
)\Z""",
re.X,
)
def _confighash(ui):
"""return a quick hash for detecting config/env changes
confighash is the hash of sensitive config items and environment variables.
for chgserver, it is designed that once confighash changes, the server is
not qualified to serve its client and should redirect the client to a new
server. different from mtimehash, confighash change will not mark the
server outdated and exit since the user can have different configs at the
same time.
"""
# no more sensitive config sections with dispatch.runchgserver()
# If $CHGHG is set, the change to $HG should not trigger a new chg server
if "CHGHG" in encoding.environ:
ignored = {"HG"}
else:
ignored = set()
envitems = [
(k, v)
for k, v in encoding.environ.iteritems()
if _envre.match(k) and k not in ignored
]
envhash = _hashlist(sorted(envitems))
return envhash[:6]
def _getmtimepaths(ui):
"""get a list of paths that should be checked to detect change
The list will include:
- mercurial/__version__.py
- python binary
"""
modules = []
try:
from . import __version__
modules.append(__version__)
except ImportError:
pass
files = [pycompat.sysexecutable]
for m in modules:
try:
files.append(inspect.getabsfile(m))
except TypeError:
pass
return sorted(set(files))
def _mtimehash(paths):
"""return a quick hash for detecting file changes
mtimehash calls stat on given paths and calculate a hash based on size and
mtime of each file. mtimehash does not read file content because reading is
expensive. therefore it's not 100% reliable for detecting content changes.
it's possible to return different hashes for same file contents.
it's also possible to return a same hash for different file contents for
some carefully crafted situation.
for chgserver, it is designed that once mtimehash changes, the server is
considered outdated immediately and should no longer provide service.
mtimehash is not included in confighash because we only know the paths of
extensions after importing them (there is imp.find_module but that faces
race conditions). We need to calculate confighash without importing.
"""
def trystat(path):
try:
st = os.stat(path)
return (st.st_mtime, st.st_size)
except OSError:
# could be ENOENT, EPERM etc. not fatal in any case
pass
return _hashlist(map(trystat, paths))[:12]
class hashstate(object):
"""a structure storing confighash, mtimehash, paths used for mtimehash"""
def __init__(self, confighash, mtimehash, mtimepaths):
self.confighash = confighash
self.mtimehash = mtimehash
self.mtimepaths = mtimepaths
@staticmethod
def fromui(ui, mtimepaths=None):
if mtimepaths is None:
mtimepaths = _getmtimepaths(ui)
confighash = _confighash(ui)
mtimehash = _mtimehash(mtimepaths)
_log("confighash = %s mtimehash = %s\n" % (confighash, mtimehash))
return hashstate(confighash, mtimehash, mtimepaths)
def _newchgui(srcui, csystem, attachio):
class chgui(srcui.__class__):
def __init__(self, src=None):
@ -203,28 +92,6 @@ def _newchgui(srcui, csystem, attachio):
return chgui(srcui)
def _loadnewui(srcui, args):
from . import dispatch # avoid cycle
newui = srcui.__class__.load()
for a in ["fin", "fout", "ferr", "environ"]:
setattr(newui, a, getattr(srcui, a))
if util.safehasattr(srcui, "_csystem"):
newui._csystem = srcui._csystem
# command line args
options = dispatch._earlyparseopts(newui, args)
dispatch._parseconfig(newui, options["config"])
# load wd and repo config, copied from dispatch.py
cwd = options["cwd"]
cwd = cwd and os.path.realpath(cwd) or None
rpath = options["repository"]
path, newlui = dispatch._getlocal(newui, rpath, wd=cwd)
return (newui, newlui)
class channeledsystem(object):
"""Propagate ui.system() request in the following format:
@ -289,7 +156,7 @@ _iochannels = [
class chgcmdserver(commandserver.server):
def __init__(self, ui, repo, fin, fout, sock, hashstate, baseaddress):
def __init__(self, ui, repo, fin, fout, sock, baseaddress):
super(chgcmdserver, self).__init__(
_newchgui(ui, channeledsystem(fin, fout, "S"), self.attachio),
repo,
@ -298,11 +165,7 @@ class chgcmdserver(commandserver.server):
)
self.clientsock = sock
self._oldios = [] # original (self.ch, ui.fp, fd) before "attachio"
self.hashstate = hashstate
self.baseaddress = baseaddress
if hashstate is not None:
self.capabilities = self.capabilities.copy()
self.capabilities["validate"] = chgcmdserver.validate
def cleanup(self):
super(chgcmdserver, self).cleanup()
@ -376,51 +239,6 @@ class chgcmdserver(commandserver.server):
setattr(ui, fn, fp)
del self._oldios[:]
def validate(self):
"""Reload the config and check if the server is up to date
Read a list of '\0' separated arguments.
Write a non-empty list of '\0' separated instruction strings or '\0'
if the list is empty.
An instruction string could be either:
- "unlink $path", the client should unlink the path to stop the
outdated server.
- "redirect $path", the client should attempt to connect to $path
first. If it does not work, start a new server. It implies
"reconnect".
- "exit $n", the client should exit directly with code n.
This may happen if we cannot parse the config.
- "reconnect", the client should close the connection and
reconnect.
If neither "reconnect" nor "redirect" is included in the instruction
list, the client can continue with this server after completing all
the instructions.
"""
from . import dispatch # avoid cycle
args = self._readlist()
try:
self.ui, lui = _loadnewui(self.ui, args)
except error.ParseError as inst:
dispatch._formatparse(self.ui.warn, inst)
self.ui.flush()
self.cresult.write("exit 255")
return
newhash = hashstate.fromui(lui, self.hashstate.mtimepaths)
insts = []
if newhash.mtimehash != self.hashstate.mtimehash:
addr = _hashaddress(self.baseaddress, self.hashstate.confighash)
insts.append("unlink %s" % addr)
# mtimehash is empty if one or more extensions fail to load.
# to be compatible with hg, still serve the client this time.
if self.hashstate.mtimehash:
insts.append("reconnect")
if newhash.confighash != self.hashstate.confighash:
addr = _hashaddress(self.baseaddress, newhash.confighash)
insts.append("redirect %s" % addr)
_log("validate: %s\n" % insts)
self.cresult.write("\0".join(insts) or "\0")
def chdir(self):
"""Change current directory
@ -494,13 +312,13 @@ def _tempaddress(address):
return "%s.%d.tmp" % (address, os.getpid())
def _hashaddress(address, hashstr):
def _realaddress(address):
# if the basename of address contains '.', use only the left part. this
# makes it possible for the client to pass 'server.tmp$PID' and follow by
# an atomic rename to avoid locking when spawning new servers.
dirname, basename = os.path.split(address)
basename = basename.split(".", 1)[0]
return "%s-%s" % (os.path.join(dirname, basename), hashstr)
return os.path.join(dirname, basename)
class chgunixservicehandler(object):
@ -514,30 +332,12 @@ class chgunixservicehandler(object):
self._lastactive = time.time()
def bindsocket(self, sock, address):
self._inithashstate(address)
self._checkextensions()
self._baseaddress = address
self._realaddress = _realaddress(address)
self._bind(sock)
self._createsymlink()
# no "listening at" message should be printed to simulate hg behavior
def _inithashstate(self, address):
self._baseaddress = address
if self.ui.configbool("chgserver", "skiphash"):
self._hashstate = None
self._realaddress = address
return
self._hashstate = hashstate.fromui(self.ui)
self._realaddress = _hashaddress(address, self._hashstate.confighash)
def _checkextensions(self):
if not self._hashstate:
return
if extensions.notloaded():
# one or more extensions failed to load. mtimehash becomes
# meaningless because we do not know the paths of those extensions.
# set mtimehash to an illegal hash value to invalidate the server.
self._hashstate.mtimehash = ""
def _bind(self, sock):
# use a unique temp address so we can stat the file and do ownership
# check later
@ -588,9 +388,7 @@ class chgunixservicehandler(object):
self._lastactive = time.time()
def createcmdserver(self, repo, conn, fin, fout):
return chgcmdserver(
self.ui, repo, fin, fout, conn, self._hashstate, self._baseaddress
)
return chgcmdserver(self.ui, repo, fin, fout, conn, self._baseaddress)
def chgunixservice(ui, repo, opts):