restart hg_import_helper.py when the repository needs to be reopened

Summary:
Update hg_import_helper.py to throw a new ResetRepoError type when it decides
that its local mercurial state is invalid and that it needs to reopen the
repository.  The C++ code will catch this exception, restart the
hg_import_helper.py and retry the request.  (It will only retry once for each
request.)

The previous behavior of simply closing and re-opening the repository object
in Python has been problematic, as this has resulted in resource leaks.  The
hg_import_helper.py process itself ends up leaking memory when re-opening the
repository.  This also appears to result in many scmmemcache helper processes
being created and not cleaned up.  Presumably these are associated with the
old repository state and not cleaned up properly when we reopen the
repository.

Reviewed By: quark-zju

Differential Revision: D9510664

fbshipit-source-id: 449dfa9e2e21aabf8b3ce640749d32aa8f8e4052
This commit is contained in:
Adam Simpkins 2018-08-26 00:16:46 -07:00 committed by Facebook Github Bot
parent acf4660012
commit 05394747ae
3 changed files with 80 additions and 72 deletions

View File

@ -537,22 +537,26 @@ unique_ptr<Tree> HgImporter::importTreeImpl(
try { try {
header = readChunkHeader(requestID, "CMD_FETCH_TREE"); header = readChunkHeader(requestID, "CMD_FETCH_TREE");
} catch (const HgImportPyError& ex) { } catch (const HgImportPyError& ex) {
// For now translate any error thrown into a MissingKeyError, if (FLAGS_allow_flatmanifest_fallback) {
// so that our caller will retry this tree import using flatmanifest // For now translate any error thrown into a MissingKeyError,
// import if possible. // so that our caller will retry this tree import using flatmanifest
// // import if possible.
// The mercurial code can throw a wide variety of errors here that all //
// effectively mean mean it couldn't fetch the tree data. // The mercurial code can throw a wide variety of errors here that all
// // effectively mean mean it couldn't fetch the tree data.
// We most commonly expect to get a MissingNodesError if the remote //
// server does not know about these trees (for instance if they are only // We most commonly expect to get a MissingNodesError if the remote
// available locally, but simply only have flatmanifest information // server does not know about these trees (for instance if they are only
// rather than treemanifest info). // available locally, but simply only have flatmanifest information
// // rather than treemanifest info).
// However we can also get lots of other errors: no remote server //
// configured, remote repository does not exist, remote repository does // However we can also get lots of other errors: no remote server
// not support fetching tree info, etc. // configured, remote repository does not exist, remote repository does
throw MissingKeyError(ex.what()); // not support fetching tree info, etc.
throw MissingKeyError(ex.what());
} else {
throw;
}
} }
if (header.dataLength != 0) { if (header.dataLength != 0) {
@ -1183,6 +1187,37 @@ HgImporterManager::HgImporterManager(
clientCertificate_{clientCertificate}, clientCertificate_{clientCertificate},
useMononoke_{useMononoke} {} useMononoke_{useMononoke} {}
template <typename Fn>
auto HgImporterManager::retryOnError(Fn&& fn) {
bool retried = false;
auto retryableError = [this, &retried](const std::exception& ex) {
resetHgImporter(ex);
if (retried) {
throw;
} else {
XLOG(INFO) << "restarting hg_import_helper and retrying operation";
retried = true;
}
};
while (true) {
try {
return fn(getImporter());
} catch (const HgImportPyError& ex) {
if (ex.errorType() == "ResetRepoError") {
// The python code thinks its repository state has gone bad, and
// is requesting to be restarted
retryableError(ex);
} else {
throw;
}
} catch (const HgImporterError& ex) {
retryableError(ex);
}
}
}
Hash HgImporterManager::importManifest(StringPiece revName) { Hash HgImporterManager::importManifest(StringPiece revName) {
return retryOnError( return retryOnError(
[&](HgImporter* importer) { return importer->importManifest(revName); }); [&](HgImporter* importer) { return importer->importManifest(revName); });
@ -1213,7 +1248,7 @@ HgImporter* HgImporterManager::getImporter() {
return importer_.get(); return importer_.get();
} }
void HgImporterManager::resetHgImporter(const HgImporterError& ex) { void HgImporterManager::resetHgImporter(const std::exception& ex) {
importer_.reset(); importer_.reset();
XLOG(WARN) << "error communicating with hg_import_helper.py: " << ex.what(); XLOG(WARN) << "error communicating with hg_import_helper.py: " << ex.what();
} }

View File

@ -401,24 +401,10 @@ class HgImporterManager : public Importer {
private: private:
template <typename Fn> template <typename Fn>
auto retryOnError(Fn&& fn) { auto retryOnError(Fn&& fn);
bool retried = false;
while (true) {
try {
return fn(getImporter());
} catch (const HgImporterError& ex) {
resetHgImporter(ex);
if (retried) {
throw;
} else {
retried = true;
}
}
}
}
HgImporter* getImporter(); HgImporter* getImporter();
void resetHgImporter(const HgImporterError& ex); void resetHgImporter(const std::exception& ex);
std::unique_ptr<HgImporter> importer_; std::unique_ptr<HgImporter> importer_;

View File

@ -143,6 +143,24 @@ class Request(object):
self.body = body self.body = body
class ResetRepoError(Exception):
"""This exception type indicates that the internal mercurial repo object state
is likely bad, and should be re-opened. We throw this error up to the C++ code,
and it will restart us when this happens.
Completely restarting the python hg_import_helper.py script in this case appears to
be much better than trying to close and re-open the repository in the same python
process. The python code tends to leak memory and other resources (e.g.,
remotefilelog.cacheprocess subprocesses) if we do not restart the process
completely.
"""
def __init__(self, original_error):
super(ResetRepoError, self).__init__(
"hg repository needs to be re-opened: %s" % (original_error,)
)
def cmd(command_id): def cmd(command_id):
""" """
A helper function for identifying command functions A helper function for identifying command functions
@ -515,7 +533,7 @@ class HgServer(object):
try: try:
self._fetch_tree_impl(path, manifest_node) self._fetch_tree_impl(path, manifest_node)
except Exception: except Exception as ex:
# Ugh. Mercurial sometimes throws spurious KeyErrors # Ugh. Mercurial sometimes throws spurious KeyErrors
# if this tree was created since we first initialized our # if this tree was created since we first initialized our
# connection to the server. # connection to the server.
@ -523,8 +541,7 @@ class HgServer(object):
# These errors come from the server-side; there doesn't seem to be # These errors come from the server-side; there doesn't seem to be
# a good way to force the server to re-read the data other than # a good way to force the server to re-read the data other than
# recreating our repo object. # recreating our repo object.
self._reopen_repo() raise ResetRepoError(ex)
self._fetch_tree_impl(path, manifest_node)
def _fetch_tree_impl(self, path, manifest_node): def _fetch_tree_impl(self, path, manifest_node):
mfnodes = set([manifest_node]) mfnodes = set([manifest_node])
@ -674,7 +691,7 @@ class HgServer(object):
# #
# For now, if we receive empty file contents perform some additional checks # For now, if we receive empty file contents perform some additional checks
# to try and make sure this really seems like the right result. # to try and make sure this really seems like the right result.
contents = self._check_empty_file(path, rev_hash) self._check_empty_file(path, rev_hash)
return contents return contents
@ -687,7 +704,7 @@ class HgServer(object):
try: try:
return fctx.data() return fctx.data()
except Exception: except Exception as ex:
# Ugh. The server-side remotefilelog code can sometimes # Ugh. The server-side remotefilelog code can sometimes
# incorrectly fail to return data here. I believe this occurs if # incorrectly fail to return data here. I believe this occurs if
# the file data is new since we first opened our connection to the # the file data is new since we first opened our connection to the
@ -696,9 +713,7 @@ class HgServer(object):
# Completely re-initialize our repo object and try again, in hopes # Completely re-initialize our repo object and try again, in hopes
# that this will make the server return data correctly when we # that this will make the server return data correctly when we
# retry. # retry.
self._reopen_repo() raise ResetRepoError(ex)
fctx = self.repo.filectx(path, fileid=rev_hash)
return fctx.data()
def _check_empty_file(self, path, rev_hash): def _check_empty_file(self, path, rev_hash):
# The rev_hash is computed from the file contents plus the file's parent history # The rev_hash is computed from the file contents plus the file's parent history
@ -716,7 +731,7 @@ class HgServer(object):
b"\xb8\r\xe5\xd18u\x85A\xc5\xf0Re\xad\x14J\xb9\xfa\x86\xd1\xdb" b"\xb8\r\xe5\xd18u\x85A\xc5\xf0Re\xad\x14J\xb9\xfa\x86\xd1\xdb"
) )
if rev_hash == no_history_rev_hash: if rev_hash == no_history_rev_hash:
return b"" return
# If the remotefilelog extension is in use we can check with it to get the # If the remotefilelog extension is in use we can check with it to get the
# expected file length. # expected file length.
@ -733,37 +748,9 @@ class HgServer(object):
rev_hash, rev_hash,
remotefilelog_file_size, remotefilelog_file_size,
) )
raise ResetRepoError("inconsistent file length")
# Re-opening the repository and get the file contents again, to really confirm return
# that the file is empty.
self._reopen_repo()
contents = self._get_file_impl(path, rev_hash)
if contents:
# We received non-empty contents, so the original import attempt presumably
# got incorrect data. Log an error message about this before we return the
# contents.
logging.error(
"hg_import_helper incorrectly obtained empty contents for for %s:%s, "
"but non-empty contents after re-opening the repository. "
"remotefilelog size=%s",
path,
hex(rev_hash),
remotefilelog_file_size,
)
elif remotefilelog_file_size:
raise Exception(
(
"mercurial repeatedly incorrectly returned empty contents for "
"%s:%s; remotefilelog size=%s"
)
% (path, hex(rev_hash), remotefilelog_file_size)
)
else:
logging.info(
"confirmed import of %s:%s as an empty file", path, hex(rev_hash)
)
return contents
def prefetch(self, rev): def prefetch(self, rev):
if not hasattr(self.repo, "prefetch"): if not hasattr(self.repo, "prefetch"):