importer - call p4 where with multiple paths

Summary:
We currently call p4 where with one path at a time, but it accepts a list.
This change takes advantage of that, batching p4 where calls, which speeds up
importing.

Differential Revision: D7676378

fbshipit-source-id: 4a6747458555a60dd5f385604f2a25d595af947d
This commit is contained in:
Alexandre Marin 2018-04-19 17:05:34 -07:00 committed by Facebook Github Bot
parent 9cf21d7754
commit 69005698f4
5 changed files with 69 additions and 63 deletions

View File

@ -3,7 +3,6 @@ from __future__ import absolute_import
import collections
import gzip
import json
import os
import re
@ -17,7 +16,7 @@ from mercurial import (
)
from . import lfs, p4
from .util import caseconflict, localpath, runworker
from .util import caseconflict, localpath
KEYWORD_REGEX = "\$(Id|Header|DateTime|" + \
"Date|Change|File|" + \
@ -26,21 +25,6 @@ KEYWORD_REGEX = "\$(Id|Header|DateTime|" + \
#TODO: make p4 user configurable
P4_ADMIN_USER = 'p4admin'
def relpath(client, depotfile, ignore_nonexisting=False):
where = p4.parse_where(client, depotfile)
filename = where.get('clientFile')
if filename is not None:
filename = filename.replace('//%s/' % client, '')
elif not ignore_nonexisting:
raise error.Abort('Could not find file %s' % (depotfile))
return p4.decodefilename(filename) if filename is not None else filename
def get_localname(client, p4filelogs):
for p4fl in p4filelogs:
depotfile = p4fl.depotfile
localname = relpath(client, depotfile)
yield 1, json.dumps({depotfile:localname})
def get_p4_file_content(storepath, p4filelog, p4cl, skipp4revcheck=False):
p4path = p4filelog._depotfile
p4storepath = os.path.join(storepath, localpath(p4path))
@ -78,20 +62,14 @@ def get_filelogs_to_sync(ui, client, repo, p1ctx, cl, p4filelogs):
# it represents files not in the parent's commit
p1 = repo[p1ctx.node()]
hgfilelogs = p1.manifest().copy()
p4flmapping = collections.defaultdict()
p4flmapping = {p4fl.depotfile: p4fl for p4fl in p4filelogs}
addedp4filelogs = []
reusep4filelogs = []
addedp4flheadcls = set()
wargs = (client,)
for p4fl in p4filelogs:
p4flmapping[p4fl.depotfile] = p4fl
ui.debug('%d p4 filelogs to read\n' % (len(p4filelogs)))
# parallelize calls to translate each p4 filepath into hg filepath
for i, serialized in runworker(ui, get_localname, wargs, p4filelogs):
data = json.loads(serialized)
localfile = data.values()[0].encode('utf-8')
p4file = data.keys()[0].encode('utf-8')
mapping = p4.parse_where_multiple(client, p4flmapping.keys())
for p4file, localfile in mapping.items():
if localfile in hgfilelogs:
reusep4filelogs.append(localfile)
else:
@ -436,7 +414,7 @@ class FileImporter(object):
@util.propertycache
def relpath(self):
return relpath(self._importset.client, self.depotfile)
return p4.parse_where(self._importset.client, self.depotfile)
@property
def depotfile(self):
@ -566,7 +544,7 @@ class SyncFileImporter(FileImporter):
if self._localfile:
return self._localfile
else:
return relpath(self._client, self._p4filelog.depotfile)
return p4.parse_where(self._client, self._p4filelog.depotfile)
def create(self, tr):
assert tr is not None

View File

@ -9,6 +9,7 @@ import time
from .util import runworker
from mercurial import (
error,
util,
)
@ -139,22 +140,46 @@ def parse_filelist_at_cl(client, cl=None):
if c:
yield d
def parse_where(client, depotname):
# TODO: investigate if we replace this with exactly one call to
# where //clientame/...
cmd = 'p4 --client %s -G where %s' % (
util.shellquote(client),
util.shellquote(depotname))
try:
stdout = ''
@retry(num=3, sleeps=0.3)
def helper():
global stdout
stdout = util.popen(cmd, mode='rb')
return marshal.load(stdout)
return helper()
except Exception:
raise P4Exception(stdout)
def parse_where(client, depotname, ignore_nonexisting=False):
mapping = parse_where_multiple(client, [depotname], ignore_nonexisting)
return mapping.get(depotname)
MAX_CMD_LEN = 2 ** 12 # 4K
def batch_and_run_where(client, p4paths):
base_cmd = 'p4 -c %s -G where ' % (client)
paths = [util.shellquote(p) for p in p4paths]
max_length = MAX_CMD_LEN - len(base_cmd)
start = cmd_len = 0
@retry(num=3, sleeps=0.3)
def run_for(start, end=None):
paths_str = ' '.join(paths[start:end])
return util.popen(base_cmd + paths_str, mode='rb')
for index, path in enumerate(p4paths):
if cmd_len + len(path) + 1 < max_length:
cmd_len += len(path) + 1
continue
yield run_for(start, index)
start = index
cmd_len = 0
# Deal with the last few paths
if start < len(p4paths):
yield run_for(start)
def parse_where_multiple(client, p4paths, ignore_nonexisting=False):
mapping = {}
client_prefix_len = len('//%s/' % client)
for stdout in batch_and_run_where(client, p4paths):
for info in loaditer(stdout):
cpath = info.get('clientFile')
if cpath is not None:
cpath = cpath[client_prefix_len:]
mapping[info['depotFile']] = decodefilename(cpath)
elif not ignore_nonexisting:
raise error.Abort('Could not find file %s' % (info))
return mapping
def get_file(path, rev=None, clnum=None):
"""Returns a file from Perforce"""

View File

@ -35,11 +35,13 @@ class ChangelistImporter(object):
added_or_modified = []
removed = set()
p4flogs = {}
p4paths = [info['depotFile'] for info in fstat]
hgpaths = p4.parse_where_multiple(self.client, p4paths)
for info in fstat:
action = info['action']
p4path = info['depotFile']
hgpath = hgpaths[p4path]
data = {p4cl.cl: {'action': action, 'type': info['type']}}
hgpath = importer.relpath(self.client, p4path)
p4flogs[hgpath] = p4.P4Filelog(p4path, data)
if action in p4.ACTION_DELETE + p4.ACTION_ARCHIVE:
@ -67,30 +69,30 @@ class ChangelistImporter(object):
def _get_move_info(self, p4cl, p4flogs):
'''Returns a dict where entries are (dst, src)'''
moves = {}
files_in_clientspec = {
p4flog._depotfile: hgpath for hgpath, p4flog in p4flogs.items()
}
hgdst_to_p4src = {}
for filename, info in p4cl.parsed['files'].items():
if filename not in files_in_clientspec:
continue
src = info.get('src')
if src:
hgdst = files_in_clientspec[filename]
# The below could return None if the source of the move is
# outside of client view. That is expected.
# This info will be used when creating the commit, and value of
# None in the moves dictionary is a no-op, it will treat it as
# an add in hg. As it just came into the client view we cannot
# store any move info for it in hg (even though it was a legit
# move in perforce).
hgsrc = importer.relpath(
self.client,
src,
ignore_nonexisting=True,
)
moves[hgdst] = hgsrc
return moves
hgdst_to_p4src[files_in_clientspec[filename]] = src
w_map = p4.parse_where_multiple(
self.client,
hgdst_to_p4src.values(),
ignore_nonexisting=True,
)
# The 'get' below could return None if the source of the move is outside
# of client view. That is expected. This info will be used when creating
# the commit, and a value of None in this dictionary is a no-op, it will
# treat it as an add in hg. As it just came into the client view we
# cannot store any move info for it in hg (even though it was a legit
# move in perforce).
return {
hgdst: w_map.get(p4src) for hgdst, p4src in hgdst_to_p4src.items()
}
def _create_commit(self, p4cl, p4flogs, removed, moved):
'''Uses a memory context to commit files into the repo'''

View File

@ -10,6 +10,7 @@ New errors are not allowed. Warnings are strongly discouraged.
$ testrepohg files -I . \
> -X contrib/python-zstandard \
> -X hgext/fsmonitor/pywatchman \
> -X hgext/p4fastimport \
> -X lib/cdatapack \
> -X lib/third-party \
> -X mercurial/thirdparty \

View File

@ -98,8 +98,8 @@ outputs, which should be fixed later.
hgext/p4fastimport/__init__.py:42: symbol import follows non-symbol import: mercurial.i18n
hgext/p4fastimport/__init__.py:43: symbol import follows non-symbol import: mercurial.node
hgext/p4fastimport/__init__.py:43: imports from mercurial.node not lexically sorted: hex < short
hgext/p4fastimport/importer.py:20: direct symbol import caseconflict, localpath, runworker from hgext.p4fastimport.util
hgext/p4fastimport/importer.py:20: symbol import follows non-symbol import: hgext.p4fastimport.util
hgext/p4fastimport/importer.py:19: direct symbol import caseconflict, localpath from hgext.p4fastimport.util
hgext/p4fastimport/importer.py:19: symbol import follows non-symbol import: hgext.p4fastimport.util
hgext/p4fastimport/p4.py:10: direct symbol import runworker from hgext.p4fastimport.util
hgext/pushrebase.py:27: multiple imported names: errno, os, tempfile, mmap, time
hgext/pushrebase.py:49: direct symbol import wrapcommand, wrapfunction, unwrapfunction from mercurial.extensions