Use memctx to create commit without working copy

Summary:
p4seqimport has used very high level mercurial abstractions so far (almost
equivalent to running hg add / mv / rm / commit on command line). This is very
easy to grasp as we use it day to day. It is not performant enough for our
importer:
- It does the work twice (write to working copy, then commit changing hg metadata)
- It requires the working copy (this would force us to update between revs,
  materializing a prohibitively large number of files)

This change makes use of memctx, which is basically an in-memory commit. This way
we don't need a working copy and we save time + a lot of space.

For a high-level overview of p4seqimport, please check https://our.intern.facebook.com/intern/wiki/IDI/p4seqimport/

Differential Revision: D7176903

fbshipit-source-id: 2773d7c001b615837496ea9db3229d9afc020124
This commit is contained in:
Alexandre Marin 2018-04-02 14:59:31 -07:00 committed by Saurabh Singh
parent 50cae88150
commit e2fc55f8a6
3 changed files with 60 additions and 66 deletions

View File

@ -374,8 +374,9 @@ def p4seqimport(ui, repo, client, **opts):
sanitizeopts(repo, opts)
startcl = None
ctx = repo['tip']
if len(repo) > 0:
startcl = startfrom(ui, repo, opts)[1]
ctx, startcl = startfrom(ui, repo, opts)[:2]
changelists = getchangelists(ui, client, startcl, limit=opts.get('limit'))
if len(changelists) == 0:
@ -385,6 +386,7 @@ def p4seqimport(ui, repo, client, **opts):
climporter = seqimporter.ChangelistImporter(
ui,
repo,
ctx,
client,
opts.get('path'),
opts.get('bookmark'),

View File

@ -1,78 +1,66 @@
# (c) 2017-present Facebook Inc.
from __future__ import absolute_import
import collections
import errno
import os
from mercurial.i18n import _
from mercurial import (
bookmarks,
commands,
context,
)
from . import importer, lfs, p4
MoveInfo = collections.namedtuple('MoveInfo', ['src', 'dst'])
class ChangelistImporter(object):
def __init__(self, ui, repo, client, storepath, bookmark):
def __init__(self, ui, repo, ctx, client, storepath, bookmark):
self.ui = ui
self.repo = repo
self.node = self.repo[ctx].node()
self.client = client
self.storepath = storepath
self.bookmark = bookmark
def importcl(self, p4cl, bookmark=None):
try:
node, largefiles = self._import(p4cl)
self._update_bookmark(node)
return node, largefiles
ctx, largefiles = self._import(p4cl)
self.node = self.repo[ctx].node()
self._update_bookmark()
return ctx, largefiles
except Exception as e:
self.ui.write_err(_('Failed importing CL%d: %s\n') % (p4cl.cl, e))
raise
def _update_bookmark(self, rev):
def _update_bookmark(self):
if not self.bookmark:
return
tr = self.repo.currenttransaction()
bookmarks.addbookmarks(self.repo, tr, [self.bookmark], rev, force=True)
changes = [(self.bookmark, self.node)]
self.repo._bookmarks.applychanges(self.repo, tr, changes)
def _import(self, p4cl):
'''Converts the provided p4 CL into a commit in hg.
Returns a tuple containing hg node and largefiles for new commit'''
self.ui.debug('importing CL%d\n' % p4cl.cl)
fstat = p4.parse_fstat(p4cl.cl, self.client)
added, removed = [], []
added_or_modified = []
removed = set()
p4flogs = {}
for info in fstat:
action = info['action']
p4path = info['depotFile']
data = {p4cl.cl: {'action': action, 'type': info['type']}}
p4flog = p4.P4Filelog(p4path, data)
hgpath = importer.relpath(self.client, p4path)
p4flogs[hgpath] = p4.P4Filelog(p4path, data)
if action in p4.ACTION_DELETE + p4.ACTION_ARCHIVE:
removed.append(hgpath)
removed.add(hgpath)
else:
added_or_modified.append((p4path, hgpath))
file_content = self._get_file_content(p4path, p4cl.cl)
if p4flog.issymlink(p4cl.cl):
target = file_content.rstrip()
os.symlink(target, hgpath)
else:
if os.path.islink(hgpath):
os.remove(hgpath)
with self._safe_open(hgpath) as f:
f.write(file_content)
if action in p4.ACTION_ADD:
added.append(hgpath)
moved = self._get_move_info(p4cl)
move_dsts = set(mi.dst for mi in moved)
added = [fname for fname in added if fname not in move_dsts]
node = self._create_commit(p4cl, added, moved, removed)
node = self._create_commit(p4cl, p4flogs, removed, moved)
largefiles = self._get_largefiles(p4cl, added_or_modified)
return node, largefiles
def _get_largefiles(self, p4cl, files):
@ -97,34 +85,49 @@ class ChangelistImporter(object):
return open(path, 'w')
def _get_move_info(self, p4cl):
'''Returns a list of MoveInfo, i.e. (src, dst) for each moved file'''
moves = []
'''Returns a dict where entries are (dst, src)'''
moves = {}
for filename, info in p4cl.parsed['files'].items():
src = info.get('src')
if src:
hgsrc = importer.relpath(self.client, src)
hgdst = importer.relpath(self.client, filename)
moves.append(MoveInfo(hgsrc, hgdst))
moves[hgdst] = hgsrc
return moves
def _get_file_content(self, p4path, clnum):
'''Returns file content for file in p4path'''
# TODO try to get file from local stores instead of resorting to
# p4 print, similar to what importer.FileImporter does
return p4.get_file(p4path, clnum=clnum)
def _create_commit(self, p4cl, p4flogs, removed, moved):
'''Uses a memory context to commit files into the repo'''
def getfile(repo, memctx, path):
if path in removed:
# A path that shows up in files (below) but returns None in this
# function implies a deletion.
return None
def _create_commit(self, p4cl, added, moved, removed):
'''Performs all hg add/mv/rm and creates a commit'''
if added:
commands.add(self.ui, self.repo, *added)
for mi in moved:
commands.copy(self.ui, self.repo, mi.src, mi.dst, after=True)
if removed:
commands.remove(self.ui, self.repo, *removed)
p4flog = p4flogs[path]
data = p4.get_file(p4flog._depotfile, clnum=p4cl.cl)
islink = p4flog.issymlink(p4cl.cl)
if islink:
# p4 will give us content with a trailing newline, symlinks
# cannot end with newline
data = data.rstrip()
return self.repo.commit(
text=p4cl.description,
date=p4cl.hgdate,
user=p4cl.user,
extra={'p4changelist': p4cl.cl},
)
return context.memfilectx(
repo,
memctx,
path,
data,
islink=islink,
copied=moved.get(path),
# TODO deal with executable files
)
return context.memctx(
self.repo, # repository
(self.node, None), # parents
p4cl.description, # commit message
p4flogs.keys(), # files affected by this change
getfile, # fn - see above
user=p4cl.user, # commit author
date=p4cl.hgdate, # commit date
extra={'p4changelist': p4cl.cl}, # commit extras
).commit()

View File

@ -72,15 +72,11 @@ Add a largefile and change symlink to be a regular file
Run seqimport limiting to one changelist
$ cd $hgwd
$ hg init --config 'format.usefncache=False'
$ hg p4seqimport --debug -P $P4ROOT -B master $P4CLIENT --limit 1
$ hg p4seqimport --debug -P $P4ROOT -B master $P4CLIENT --limit 1 --traceback
loading changelist numbers.
3 changelists to import.
importing 1 only because of --limit.
importing CL1
adding Main/a
adding Main/b
adding Main/symlink
adding Main/symlinktosymlink
committing files:
Main/a
Main/b
@ -90,7 +86,6 @@ Run seqimport limiting to one changelist
committing changelog
writing metadata to sqlite
updating the branch cache
calling hook commit.lfs: hgext.lfs.checkrequireslfs
Assert bookmark was written
$ hg log -r master -T '{desc}\n'
@ -108,14 +103,11 @@ Confirm Main/symlink is a link to Main/b in hg as well
symlink (no-eol)
Run seqimport again for up to 50 changelists
$ hg p4seqimport --debug -P $P4ROOT -B master $P4CLIENT --limit 50
$ hg p4seqimport --debug -P $P4ROOT -B master $P4CLIENT --limit 50 --traceback
incremental import from changelist: 2, node: * (glob)
loading changelist numbers.
2 changelists to import.
importing CL2
adding Main/c
copying Main/a to Main/amove
removing Main/a
committing files:
Main/amove
Main/amove: copy Main/a:* (glob)
@ -125,7 +117,6 @@ Run seqimport again for up to 50 changelists
committing changelog
writing metadata to sqlite
importing CL3
adding Main/largefile
committing files:
Main/largefile
Main/symlink
@ -135,8 +126,6 @@ Run seqimport again for up to 50 changelists
writing lfs metadata to sqlite
writing metadata to sqlite
updating the branch cache
calling hook commit.lfs: hgext.lfs.checkrequireslfs
calling hook commit.lfs: hgext.lfs.checkrequireslfs
Main/symlink is no longer a symlink
$ hg manifest -vr tip | grep Main/symlink
@ -175,7 +164,7 @@ Confirm p4changelist is in commit extras
1 3 6 -1 1 57fe91e2a37a 1e88685f5dde 000000000000
Ensure Main/amove was moved and modified
$ hg cat Main/amove
$ hg cat -r tip Main/amove
a
modified