do not try to parse CMD_PREFETCH_FILES data that looks too large

Summary:
Update the hg_import_helper.py code to ignore CMD_PREFETCH_FILES requests with
more than 4M files.

Old edenfs daemons that send JSON data for this field can sometimes end up
starting newer versions of the hg_import_helper.py script.  If they send JSON
data here we want to make sure the `hg_import_helper.py` script does not
actually try to parse the request, which can end up consuming a lot of CPU and
memory if it tries to deserialize data that isn't actually what it expects.
The older JSON data format should always start with '[', which means it will
appear to have at least 1,526,726,656 files.

Reviewed By: chadaustin

Differential Revision: D8508651

fbshipit-source-id: c1e7726398517f97ccc1deafd30620306a9ad80d
This commit is contained in:
Adam Simpkins 2018-06-19 12:42:30 -07:00 committed by Facebook Github Bot
parent bfad766a21
commit 6f65f4543b

View File

@ -619,16 +619,35 @@ class HgServer(object):
@cmd(CMD_PREFETCH_FILES) @cmd(CMD_PREFETCH_FILES)
def prefetch_files(self, request): def prefetch_files(self, request):
self._do_prefetch(request)
self.send_chunk(request, "")
def _do_prefetch(self, request):
# Some repos may not have remotefilelog enabled; for example, # Some repos may not have remotefilelog enabled; for example,
# the watchman integration tests have no remote server and no # the watchman integration tests have no remote server and no
# remotefilelog. # remotefilelog.
if not hasattr(self.repo, "fileservice"): if not hasattr(self.repo, "fileservice"):
logging.debug("ignoring prefetch request in non-remotefilelog repository") logging.debug("ignoring prefetch request in non-remotefilelog repository")
self.send_chunk(request, "")
return return
logging.debug("got prefetch request, parsing") logging.debug("got prefetch request, parsing")
[num_files] = struct.unpack_from(b">I", request.body, 0) [num_files] = struct.unpack_from(b">I", request.body, 0)
if num_files > 4000000:
# Ignore requests with an extremely large number of files to prefetch,
# to prevent us from consuming logs of memory and CPU trying to deserialize
# garbage data.
#
# This is likely a request from an older edenfs daemon that sends JSON data
# here rather than our binary serialization format. We just return a
# successful response and ignore the request in this case rather than
# responding with an error. Responding with an error will cause these older
# edenfs versions to propagate the error back to clients in some cases;
# ignoring the request will allow things to proceed normally, but just
# slower than if the data had been prefetched.
logging.debug(
"ignoring prefetch request with too many files: %r", num_files
)
return
offset = 4 # struct.calcsize(">I") offset = 4 # struct.calcsize(">I")
lengths_fmt = b">" + (num_files * b"I") lengths_fmt = b">" + (num_files * b"I")
path_lengths = struct.unpack_from(lengths_fmt, request.body, offset) path_lengths = struct.unpack_from(lengths_fmt, request.body, offset)
@ -644,8 +663,6 @@ class HgServer(object):
logging.debug("will prefetch %d files" % len(files)) logging.debug("will prefetch %d files" % len(files))
self.repo.fileservice.prefetch(files) self.repo.fileservice.prefetch(files)
self.send_chunk(request, "")
def always_allow_pending(root): def always_allow_pending(root):
return True return True