sapling/p4fastimport/__init__.py
David Soria Parra 305a284a00 p4fastimport: generate filelogs using fstat concurrently
Summary:
Generating case-correct filelogs using fstat leads to O(changelists)
calls to Perforce (and overall complexity of O(changelists*number of files),
which is slow. We want to run this using workers.

Test Plan: rt test-p4* test-check*

Reviewers: #sourcecontrol, quark

Reviewed By: quark

Subscribers: quark, mjpieters

Differential Revision: https://phabricator.intern.facebook.com/D4963767

Signature: t1:4963767:1493349047:3eaddf6a3bb2ee06decaac48980c69b8645ebbed
2017-05-01 20:43:11 -07:00

267 lines
10 KiB
Python

# (c) 2017-present Facebook Inc.
"""p4fastimport - A fast importer from Perforce to Mercurial
Config example:
[p4fastimport]
# whether use worker or not
useworker = false
# trace copies?
copytrace = false
# if LFS is enabled, write only the metadata to disk, do not write the
# blob itself to the local cache.
lfspointeronly = false
# path to sqlite output file for lfs metadata
lfsmetadata = PATH
# path to sqlite output file for metadata
metadata = PATH
"""
from __future__ import absolute_import
import collections
import json
import sqlite3
from . import (
p4,
importer,
)
from .util import runworker, lastcl, decodefileflags
from mercurial.i18n import _
from mercurial.node import short, hex
from mercurial import (
cmdutil,
error,
extensions,
scmutil,
verify,
)
def reposetup(ui, repo):
def nothing(orig, *args, **kwargs):
pass
def yoloverify(orig, *args, **kwargs):
# We have to set it directly as repo is reading the config lfs.bypass
# during their repo setup.
repo.svfs.options['lfsbypass'] = True
return orig(*args, **kwargs)
def handlelfs(loaded):
if loaded:
lfs = extensions.find('lfs')
extensions.wrapfunction(lfs.blobstore.local, 'write', nothing)
extensions.wrapfunction(lfs.blobstore.local, 'read', nothing)
extensions.wrapfunction(verify.verifier, 'verify', yoloverify)
extensions.afterloaded('lfs', handlelfs)
def writebookmark(tr, repo, revisions, name):
if len(revisions) > 0:
marks = repo._bookmarks
__, hexnode = revisions[-1]
marks[name] = repo[hexnode].node()
marks.recordchange(tr)
def writerevmetadata(revisions, outfile):
"""Write the LFS mappings from OID to a depotpath and it's CLnum into
sqlite. This way the LFS server can import the correct file from Perforce
and mapping it to the correct OID.
"""
with sqlite3.connect(outfile, isolation_level=None) as conn:
cur = conn.cursor()
cur.execute("BEGIN TRANSACTION")
cur.execute("""
CREATE TABLE IF NOT EXISTS revision_mapping (
"id" INTEGER PRIMARY KEY AUTOINCREMENT,
"cl" INTEGER NOT NULL,
"node" BLOB
)""")
cur.executemany(
"INSERT INTO revision_mapping(cl, node) VALUES (?,?)",
revisions)
cur.execute("COMMIT")
def writelfsmetadata(largefiles, revisions, outfile):
"""Write the LFS mappings from OID to a depotpath and it's CLnum into
sqlite. This way the LFS server can import the correct file from Perforce
and mapping it to the correct OID.
"""
with sqlite3.connect(outfile, isolation_level=None) as conn:
cur = conn.cursor()
cur.execute("BEGIN TRANSACTION")
cur.execute("""
CREATE TABLE IF NOT EXISTS p4_lfs_map(
"id" INTEGER PRIMARY KEY AUTOINCREMENT,
"cl" INTEGER NOT NULL,
"node" BLOB,
"oid" TEXT,
"path" BLOB
)""")
inserts = []
revdict = dict(revisions)
for cl, path, oid in largefiles:
inserts.append((cl, path, oid, revdict[cl]))
cur.executemany(
"INSERT INTO p4_lfs_map(cl, path, oid, node) VALUES (?,?,?,?)",
inserts)
cur.execute("COMMIT")
def create(tr, ui, repo, importset, filelogs):
for filelog in filelogs:
# If the Perforce is case insensitive a filelog can map to
# multiple filenames. For exmaple A.txt and a.txt would show up in the
# same filelog. It would be more appropriate to update the filelist
# after receiving the initial filelist but this would not be parallel.
fi = importer.FileImporter(ui, repo, importset, filelog)
fileflags, largefiles, oldtiprev, newtiprev = fi.create(tr)
yield 1, json.dumps({
'newtiprev': newtiprev,
'oldtiprev': oldtiprev,
'fileflags': fileflags,
'largefiles': largefiles,
'depotname': filelog.depotfile,
'localname': fi.relpath,
})
cmdtable = {}
command = cmdutil.command(cmdtable)
@command(
'p4fastimport',
[('P', 'path', '.', _('path to the local depot store'), _('PATH')),
('B', 'bookmark', '', _('bookmark to set'), _('NAME')),
('', 'limit', '',
_('number of changelists to import at a time'), _('N'))],
_('[-P PATH] [-B NAME] [--limit N] [CLIENT]'),
inferrepo=True)
def p4fastimport(ui, repo, client, **opts):
if 'fncache' in repo.requirements:
raise error.Abort(_('fncache must be disabled'))
if opts.get('bookmark'):
scmutil.checknewlabel(repo, opts['bookmark'], 'bookmark')
startcl = None
if len(repo) > 0 and startcl is None:
latestctx = list(repo.set("last(extra(p4changelist))"))
if latestctx:
startcl = lastcl(latestctx[0])
ui.note(_('incremental import from changelist: %d, node: %s\n') %
(startcl, short(latestctx[0].node())))
# A client defines checkout behavior for a user. It contains a list of
# views.A view defines a set of files and directories to check out from a
# Perforce server and their mappins to local disk, e.g.:
# //depot/foo/... //client/x/...
# would map the files that are stored on the
# server under foo/* locally under x/*.
# 1. Return all the changelists touching files in a given client view.
ui.note(_('loading changelist numbers.\n'))
changelists = sorted(p4.parse_changes(client, startcl=startcl))
ui.note(_('%d changelists to import.\n') % len(changelists))
limit = len(changelists)
if opts.get('limit'):
limit = int(opts.get('limit'))
run_import(ui, repo, client, changelists[0:limit], **opts)
def run_import(ui, repo, client, changelists, **opts):
if len(changelists) == 0:
return
basepath = opts.get('path')
startcl, endcl = changelists[0].cl, changelists[-1].cl
# 2. Get a list of files that we will have to import from the depot with
# it's full path in the depot.
ui.note(_('loading list of files.\n'))
filelist = set()
for fileinfo in p4.parse_filelist(client, startcl=startcl, endcl=endcl):
if fileinfo['action'] in p4.SUPPORTED_ACTIONS:
filelist.add(fileinfo['depotFile'])
else:
ui.warn(_('unknown action %s: %s\n') % (fileinfo['action'],
fileinfo['depotFile']))
ui.note(_('%d files to import.\n') % len(filelist))
importset = importer.ImportSet(repo, client, changelists,
filelist, basepath)
p4filelogs = []
for i, f in enumerate(importset.filelogs()):
ui.debug('reading filelog %s\n' % f.depotfile)
ui.progress(_('reading filelog'), i, unit=_('filelogs'),
total=len(filelist))
p4filelogs.append(f)
ui.progress(_('reading filelog'), None)
# runlist is used to topologically order files which were branched (Perforce
# uses per-file branching, not per-repo branching). If we do copytracing a
# file A' which was branched off A will be considered a copy of A. Therefore
# we need to import A' before A. In this case A' will have a dependency
# counter +1 of A's, and therefore being imported after A. If copy tracing
# is disabled this is not needed and we can import files in arbitrary order.
runlist = collections.OrderedDict()
if ui.configbool('p4fastimport', 'copytrace', False):
raise error.Abort(_('copytracing is broken'))
else:
runlist[0] = p4filelogs
ui.note(_('importing repository.\n'))
with repo.wlock(), repo.lock():
tr = repo.transaction('import')
try:
for a, b in importset.caseconflicts:
ui.warn(_('case conflict: %s and %s\n') % (a, b))
# 3. Import files.
count = 0
fileinfo = {}
largefiles = []
for filelogs in map(sorted, runlist.values()):
wargs = (tr, ui, repo, importset)
for i, serialized in runworker(ui, create, wargs, filelogs):
data = json.loads(serialized)
ui.progress(_('importing'), count,
item=data['depotname'], unit='file',
total=len(p4filelogs))
# Json converts to UTF8 and int keys to strings, so we
# have to convert back.
# TODO: Find a better way to handle this.
fileinfo[data['depotname']] = {
'localname': data['localname'].encode('utf-8'),
'flags': decodefileflags(data['fileflags']),
'baserev': data['oldtiprev'],
}
largefiles.extend(data['largefiles'])
count += i
ui.progress(_('importing'), None)
# 4. Generate manifest and changelog based on the filelogs
# we imported
clog = importer.ChangeManifestImporter(ui, repo, importset)
revisions = []
for cl, hgnode in clog.creategen(tr, fileinfo):
revisions.append((cl, hex(hgnode)))
if opts.get('bookmark'):
ui.note(_('writing bookmark\n'))
writebookmark(tr, repo, revisions, opts['bookmark'])
if ui.config('p4fastimport', 'lfsmetadata', None) is not None:
ui.note(_('writing lfs metadata to sqlite\n'))
writelfsmetadata(largefiles, revisions,
ui.config('p4fastimport', 'lfsmetadata', None))
if ui.config('p4fastimport', 'metadata', None) is not None:
ui.note(_('writing metadata to sqlite\n'))
writerevmetadata(revisions,
ui.config('p4fastimport', 'metadata', None))
tr.close()
ui.note(_('%d revision(s), %d file(s) imported.\n') % (
len(changelists), count))
finally:
tr.release()