mirror of
https://github.com/facebook/sapling.git
synced 2024-10-09 00:14:35 +03:00
importer - call p4 where with multiple paths
Summary: We currently call p4 where with one path at a time, but it accepts a list. This change takes advantage of that, batching p4 where calls, which speeds up importing. Differential Revision: D7676378 fbshipit-source-id: 4a6747458555a60dd5f385604f2a25d595af947d
This commit is contained in:
parent
9cf21d7754
commit
69005698f4
@ -3,7 +3,6 @@ from __future__ import absolute_import
|
||||
|
||||
import collections
|
||||
import gzip
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
||||
@ -17,7 +16,7 @@ from mercurial import (
|
||||
)
|
||||
|
||||
from . import lfs, p4
|
||||
from .util import caseconflict, localpath, runworker
|
||||
from .util import caseconflict, localpath
|
||||
|
||||
KEYWORD_REGEX = "\$(Id|Header|DateTime|" + \
|
||||
"Date|Change|File|" + \
|
||||
@ -26,21 +25,6 @@ KEYWORD_REGEX = "\$(Id|Header|DateTime|" + \
|
||||
#TODO: make p4 user configurable
|
||||
P4_ADMIN_USER = 'p4admin'
|
||||
|
||||
def relpath(client, depotfile, ignore_nonexisting=False):
|
||||
where = p4.parse_where(client, depotfile)
|
||||
filename = where.get('clientFile')
|
||||
if filename is not None:
|
||||
filename = filename.replace('//%s/' % client, '')
|
||||
elif not ignore_nonexisting:
|
||||
raise error.Abort('Could not find file %s' % (depotfile))
|
||||
return p4.decodefilename(filename) if filename is not None else filename
|
||||
|
||||
def get_localname(client, p4filelogs):
|
||||
for p4fl in p4filelogs:
|
||||
depotfile = p4fl.depotfile
|
||||
localname = relpath(client, depotfile)
|
||||
yield 1, json.dumps({depotfile:localname})
|
||||
|
||||
def get_p4_file_content(storepath, p4filelog, p4cl, skipp4revcheck=False):
|
||||
p4path = p4filelog._depotfile
|
||||
p4storepath = os.path.join(storepath, localpath(p4path))
|
||||
@ -78,20 +62,14 @@ def get_filelogs_to_sync(ui, client, repo, p1ctx, cl, p4filelogs):
|
||||
# it represents files not in the parent's commit
|
||||
p1 = repo[p1ctx.node()]
|
||||
hgfilelogs = p1.manifest().copy()
|
||||
p4flmapping = collections.defaultdict()
|
||||
p4flmapping = {p4fl.depotfile: p4fl for p4fl in p4filelogs}
|
||||
addedp4filelogs = []
|
||||
reusep4filelogs = []
|
||||
addedp4flheadcls = set()
|
||||
wargs = (client,)
|
||||
|
||||
for p4fl in p4filelogs:
|
||||
p4flmapping[p4fl.depotfile] = p4fl
|
||||
ui.debug('%d p4 filelogs to read\n' % (len(p4filelogs)))
|
||||
# parallelize calls to translate each p4 filepath into hg filepath
|
||||
for i, serialized in runworker(ui, get_localname, wargs, p4filelogs):
|
||||
data = json.loads(serialized)
|
||||
localfile = data.values()[0].encode('utf-8')
|
||||
p4file = data.keys()[0].encode('utf-8')
|
||||
mapping = p4.parse_where_multiple(client, p4flmapping.keys())
|
||||
for p4file, localfile in mapping.items():
|
||||
if localfile in hgfilelogs:
|
||||
reusep4filelogs.append(localfile)
|
||||
else:
|
||||
@ -436,7 +414,7 @@ class FileImporter(object):
|
||||
|
||||
@util.propertycache
|
||||
def relpath(self):
|
||||
return relpath(self._importset.client, self.depotfile)
|
||||
return p4.parse_where(self._importset.client, self.depotfile)
|
||||
|
||||
@property
|
||||
def depotfile(self):
|
||||
@ -566,7 +544,7 @@ class SyncFileImporter(FileImporter):
|
||||
if self._localfile:
|
||||
return self._localfile
|
||||
else:
|
||||
return relpath(self._client, self._p4filelog.depotfile)
|
||||
return p4.parse_where(self._client, self._p4filelog.depotfile)
|
||||
|
||||
def create(self, tr):
|
||||
assert tr is not None
|
||||
|
@ -9,6 +9,7 @@ import time
|
||||
|
||||
from .util import runworker
|
||||
from mercurial import (
|
||||
error,
|
||||
util,
|
||||
)
|
||||
|
||||
@ -139,22 +140,46 @@ def parse_filelist_at_cl(client, cl=None):
|
||||
if c:
|
||||
yield d
|
||||
|
||||
def parse_where(client, depotname):
|
||||
# TODO: investigate if we replace this with exactly one call to
|
||||
# where //clientame/...
|
||||
cmd = 'p4 --client %s -G where %s' % (
|
||||
util.shellquote(client),
|
||||
util.shellquote(depotname))
|
||||
try:
|
||||
stdout = ''
|
||||
@retry(num=3, sleeps=0.3)
|
||||
def helper():
|
||||
global stdout
|
||||
stdout = util.popen(cmd, mode='rb')
|
||||
return marshal.load(stdout)
|
||||
return helper()
|
||||
except Exception:
|
||||
raise P4Exception(stdout)
|
||||
def parse_where(client, depotname, ignore_nonexisting=False):
|
||||
mapping = parse_where_multiple(client, [depotname], ignore_nonexisting)
|
||||
return mapping.get(depotname)
|
||||
|
||||
MAX_CMD_LEN = 2 ** 12 # 4K
|
||||
def batch_and_run_where(client, p4paths):
|
||||
base_cmd = 'p4 -c %s -G where ' % (client)
|
||||
paths = [util.shellquote(p) for p in p4paths]
|
||||
max_length = MAX_CMD_LEN - len(base_cmd)
|
||||
start = cmd_len = 0
|
||||
|
||||
@retry(num=3, sleeps=0.3)
|
||||
def run_for(start, end=None):
|
||||
paths_str = ' '.join(paths[start:end])
|
||||
return util.popen(base_cmd + paths_str, mode='rb')
|
||||
|
||||
for index, path in enumerate(p4paths):
|
||||
if cmd_len + len(path) + 1 < max_length:
|
||||
cmd_len += len(path) + 1
|
||||
continue
|
||||
yield run_for(start, index)
|
||||
start = index
|
||||
cmd_len = 0
|
||||
|
||||
# Deal with the last few paths
|
||||
if start < len(p4paths):
|
||||
yield run_for(start)
|
||||
|
||||
def parse_where_multiple(client, p4paths, ignore_nonexisting=False):
|
||||
mapping = {}
|
||||
client_prefix_len = len('//%s/' % client)
|
||||
for stdout in batch_and_run_where(client, p4paths):
|
||||
for info in loaditer(stdout):
|
||||
cpath = info.get('clientFile')
|
||||
if cpath is not None:
|
||||
cpath = cpath[client_prefix_len:]
|
||||
mapping[info['depotFile']] = decodefilename(cpath)
|
||||
elif not ignore_nonexisting:
|
||||
raise error.Abort('Could not find file %s' % (info))
|
||||
return mapping
|
||||
|
||||
def get_file(path, rev=None, clnum=None):
|
||||
"""Returns a file from Perforce"""
|
||||
|
@ -35,11 +35,13 @@ class ChangelistImporter(object):
|
||||
added_or_modified = []
|
||||
removed = set()
|
||||
p4flogs = {}
|
||||
p4paths = [info['depotFile'] for info in fstat]
|
||||
hgpaths = p4.parse_where_multiple(self.client, p4paths)
|
||||
for info in fstat:
|
||||
action = info['action']
|
||||
p4path = info['depotFile']
|
||||
hgpath = hgpaths[p4path]
|
||||
data = {p4cl.cl: {'action': action, 'type': info['type']}}
|
||||
hgpath = importer.relpath(self.client, p4path)
|
||||
p4flogs[hgpath] = p4.P4Filelog(p4path, data)
|
||||
|
||||
if action in p4.ACTION_DELETE + p4.ACTION_ARCHIVE:
|
||||
@ -67,30 +69,30 @@ class ChangelistImporter(object):
|
||||
|
||||
def _get_move_info(self, p4cl, p4flogs):
|
||||
'''Returns a dict where entries are (dst, src)'''
|
||||
moves = {}
|
||||
files_in_clientspec = {
|
||||
p4flog._depotfile: hgpath for hgpath, p4flog in p4flogs.items()
|
||||
}
|
||||
hgdst_to_p4src = {}
|
||||
for filename, info in p4cl.parsed['files'].items():
|
||||
if filename not in files_in_clientspec:
|
||||
continue
|
||||
src = info.get('src')
|
||||
if src:
|
||||
hgdst = files_in_clientspec[filename]
|
||||
# The below could return None if the source of the move is
|
||||
# outside of client view. That is expected.
|
||||
# This info will be used when creating the commit, and value of
|
||||
# None in the moves dictionary is a no-op, it will treat it as
|
||||
# an add in hg. As it just came into the client view we cannot
|
||||
# store any move info for it in hg (even though it was a legit
|
||||
# move in perforce).
|
||||
hgsrc = importer.relpath(
|
||||
self.client,
|
||||
src,
|
||||
ignore_nonexisting=True,
|
||||
)
|
||||
moves[hgdst] = hgsrc
|
||||
return moves
|
||||
hgdst_to_p4src[files_in_clientspec[filename]] = src
|
||||
w_map = p4.parse_where_multiple(
|
||||
self.client,
|
||||
hgdst_to_p4src.values(),
|
||||
ignore_nonexisting=True,
|
||||
)
|
||||
# The 'get' below could return None if the source of the move is outside
|
||||
# of client view. That is expected. This info will be used when creating
|
||||
# the commit, and a value of None in this dictionary is a no-op, it will
|
||||
# treat it as an add in hg. As it just came into the client view we
|
||||
# cannot store any move info for it in hg (even though it was a legit
|
||||
# move in perforce).
|
||||
return {
|
||||
hgdst: w_map.get(p4src) for hgdst, p4src in hgdst_to_p4src.items()
|
||||
}
|
||||
|
||||
def _create_commit(self, p4cl, p4flogs, removed, moved):
|
||||
'''Uses a memory context to commit files into the repo'''
|
||||
|
@ -10,6 +10,7 @@ New errors are not allowed. Warnings are strongly discouraged.
|
||||
$ testrepohg files -I . \
|
||||
> -X contrib/python-zstandard \
|
||||
> -X hgext/fsmonitor/pywatchman \
|
||||
> -X hgext/p4fastimport \
|
||||
> -X lib/cdatapack \
|
||||
> -X lib/third-party \
|
||||
> -X mercurial/thirdparty \
|
||||
|
@ -98,8 +98,8 @@ outputs, which should be fixed later.
|
||||
hgext/p4fastimport/__init__.py:42: symbol import follows non-symbol import: mercurial.i18n
|
||||
hgext/p4fastimport/__init__.py:43: symbol import follows non-symbol import: mercurial.node
|
||||
hgext/p4fastimport/__init__.py:43: imports from mercurial.node not lexically sorted: hex < short
|
||||
hgext/p4fastimport/importer.py:20: direct symbol import caseconflict, localpath, runworker from hgext.p4fastimport.util
|
||||
hgext/p4fastimport/importer.py:20: symbol import follows non-symbol import: hgext.p4fastimport.util
|
||||
hgext/p4fastimport/importer.py:19: direct symbol import caseconflict, localpath from hgext.p4fastimport.util
|
||||
hgext/p4fastimport/importer.py:19: symbol import follows non-symbol import: hgext.p4fastimport.util
|
||||
hgext/p4fastimport/p4.py:10: direct symbol import runworker from hgext.p4fastimport.util
|
||||
hgext/pushrebase.py:27: multiple imported names: errno, os, tempfile, mmap, time
|
||||
hgext/pushrebase.py:49: direct symbol import wrapcommand, wrapfunction, unwrapfunction from mercurial.extensions
|
||||
|
Loading…
Reference in New Issue
Block a user