sapling/edenscm/hgext/p4fastimport/__init__.py
Jun Wu 9dc21f8d0b codemod: import from the edenscm package
Summary:
D13853115 adds `edenscm/` to `sys.path` and code still uses `import mercurial`.
That has nasty problems if both `import mercurial` and
`import edenscm.mercurial` are used, because Python would think `mercurial.foo`
and `edenscm.mercurial.foo` are different modules so code like
`try: ... except mercurial.error.Foo: ...`, or `isinstance(x, mercurial.foo.Bar)`
would fail to handle the `edenscm.mercurial` version. There are also some
module-level states (ex. `extensions._extensions`) that would cause trouble if
they have multiple versions in a single process.

Change imports to use the `edenscm` so ideally the `mercurial` is no longer
imported at all. Add checks in extensions.py to catch unexpected extensions
importing modules from the old (wrong) locations when running tests.

Reviewed By: phillco

Differential Revision: D13868981

fbshipit-source-id: f4e2513766957fd81d85407994f7521a08e4de48
2019-01-29 17:25:32 -08:00

350 lines
12 KiB
Python

# Copyright 2017 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
# (c) 2017-present Facebook Inc.
"""p4fastimport - A fast importer from Perforce to Mercurial
Config example:
[p4fastimport]
# Instead of uploading to LFS, store lfs metadata in this sqlite output
# file. Some other process will upload from there to the LFS server later.
lfsmetadata = PATH
# path to sqlite output file for metadata
metadata = PATH
# certain commits by certain users should be igored so that
# p4fastimporter imports the actual commits we want
ignore-user = None
# heuristic time difference between a ignored user commit
# and a p4fastimporter import
ignore-time-delta = None
"""
from __future__ import absolute_import
import itertools
import sqlite3
from edenscm.mercurial import error, extensions, progress, registrar, revlog, scmutil
from edenscm.mercurial.i18n import _
from edenscm.mercurial.node import hex, short
from . import importer, p4, seqimporter, syncimporter
from .util import getcl, lastcl
def extsetup():
# Writing multiple changelog entries in one transaction can lead to revlog
# caching issues when the inlined .i data is separated into a .d file. We
# workaround by not allowing inlined revlogs at all.
revlog.REVLOG_DEFAULT_VERSION = revlog.REVLOG_DEFAULT_FORMAT
def writebookmark(tr, repo, revisions, name):
if len(revisions) > 0:
__, hexnode = revisions[-1]
repo._bookmarks.applychanges(repo, tr, [(name, repo[hexnode].node())])
def writerevmetadata(revisions, outfile):
"""Write the LFS mappings from OID to a depotpath and it's CLnum into
sqlite. This way the LFS server can import the correct file from Perforce
and mapping it to the correct OID.
"""
with sqlite3.connect(outfile, isolation_level=None) as conn:
cur = conn.cursor()
cur.execute("BEGIN TRANSACTION")
cur.execute(
"""
CREATE TABLE IF NOT EXISTS revision_mapping (
"id" INTEGER PRIMARY KEY AUTOINCREMENT,
"cl" INTEGER NOT NULL,
"node" BLOB
)"""
)
cur.executemany(
"INSERT INTO revision_mapping(cl, node) VALUES (?,?)", revisions
)
cur.execute("COMMIT")
def writelfsmetadata(largefiles, revisions, outfile):
"""Write the LFS mappings from OID to a depotpath and it's CLnum into
sqlite. This way the LFS server can import the correct file from Perforce
and mapping it to the correct OID.
"""
with sqlite3.connect(outfile, isolation_level=None) as conn:
cur = conn.cursor()
cur.execute("BEGIN TRANSACTION")
cur.execute(
"""
CREATE TABLE IF NOT EXISTS p4_lfs_map(
"id" INTEGER PRIMARY KEY AUTOINCREMENT,
"cl" INTEGER NOT NULL,
"node" BLOB,
"oid" TEXT,
"path" BLOB
)"""
)
inserts = []
revdict = dict(revisions)
for cl, path, oid in largefiles:
inserts.append((cl, path, oid, revdict[cl]))
cur.executemany(
"INSERT INTO p4_lfs_map(cl, path, oid, node) VALUES (?,?,?,?)", inserts
)
cur.execute("COMMIT")
def enforce_p4_client_exists(client):
# A client defines checkout behavior for a user. It contains a list of
# views. A view defines a set of files and directories to check out from a
# Perforce server and their mappins to local disk, e.g.:
# //depot/foo/... //client/x/...
# would map the files that are stored on the
# server under foo/* locally under x/*.
if not p4.exists_client(client):
raise error.Abort(_("p4 client %s does not exist.") % client)
def getchangelists(ui, client, startcl, limit=None):
"""
Returns a sorted list of changelists affecting client,
starting at startcl.
If a limit N is provided, return only the first N changelists.
"""
ui.note(_("loading changelist numbers.\n"))
ignore_user = ui.config("p4fastimport", "ignore-user")
ignore_time_delta = ui.config("p4fastimport", "ignore-time-delta")
if ignore_user is None or ignore_time_delta is None:
changelists = sorted(p4.parse_changes(client, startcl=startcl))
else:
changelists = list(
itertools.takewhile(
lambda cl: not (
cl._user == ignore_user and cl._commit_time_diff < ignore_time_delta
),
sorted(p4.parse_changes(client, startcl=startcl)),
)
)
ui.note(_("%d changelists to import.\n") % len(changelists))
if limit:
limit = int(limit)
if limit < len(changelists):
ui.debug("importing %d only because of --limit.\n" % limit)
changelists = changelists[:limit]
return changelists
def sanitizeopts(repo, opts):
if opts.get("base") and not opts.get("bookmark"):
raise error.Abort(_("must set --bookmark when using --base"))
if opts.get("bookmark"):
scmutil.checknewlabel(repo, opts["bookmark"], "bookmark")
limit = opts.get("limit")
if limit:
try:
limit = int(limit)
except ValueError:
raise error.Abort(_("--limit should be an integer, got %s") % limit)
if limit <= 0:
raise error.Abort(_("--limit should be > 0, got %d") % limit)
def startfrom(ui, repo, opts):
base, dest = "null", "tip"
if opts.get("bookmark"):
dest = opts.get("bookmark")
if opts.get("base"):
base = opts["base"]
if opts.get("bookmark") not in repo:
dest = base
basectx = scmutil.revsingle(repo, base)
destctx = scmutil.revsingle(repo, dest)
ctx = list(
repo.set(
"""
last(
%n::%n and (
extra(p4changelist) or
extra(p4fullimportbasechangelist)))""",
basectx.node(),
destctx.node(),
)
)
if ctx:
ctx = ctx[0]
startcl = lastcl(ctx)
ui.note(
_("incremental import from changelist: %d, node: %s\n")
% (startcl, short(ctx.node()))
)
if ctx.node() == basectx.node():
ui.note(_("creating branchpoint, base %s\n") % short(basectx.node()))
return ctx, startcl, True
return ctx, startcl, False
raise error.Abort(_("no valid p4 changelist number."))
def updatemetadata(ui, revisions, largefiles):
lfsmetadata = ui.config("p4fastimport", "lfsmetadata", None)
if lfsmetadata is not None:
if len(largefiles) > 0:
ui.note(_("writing lfs metadata to sqlite\n"))
writelfsmetadata(largefiles, revisions, lfsmetadata)
metadata = ui.config("p4fastimport", "metadata", None)
if metadata is not None:
if len(revisions) > 0:
ui.note(_("writing metadata to sqlite\n"))
writerevmetadata(revisions, metadata)
cmdtable = {}
command = registrar.command(cmdtable)
@command(
"p4seqimport",
[
("P", "path", ".", _("path to the local depot store"), _("PATH")),
("B", "bookmark", "", _("bookmark to set"), _("NAME")),
("", "base", "", _("base changeset (must exist in the repository)")),
("", "limit", "", _("max number of changelists to import"), _("N")),
],
_("[-P PATH] [-B NAME] client"),
)
def p4seqimport(ui, repo, client, **opts):
"""Sequentially import changelists"""
if "fncache" in repo.requirements:
raise error.Abort(_("fncache must be disabled"))
enforce_p4_client_exists(client)
sanitizeopts(repo, opts)
startcl = None
ctx = repo["tip"]
if len(repo) > 0:
ctx, startcl = startfrom(ui, repo, opts)[:2]
changelists = getchangelists(ui, client, startcl, limit=opts.get("limit"))
if len(changelists) == 0:
ui.note(_("no changes to import, exiting.\n"))
return
climporter = seqimporter.ChangelistImporter(ui, repo, ctx, client, opts.get("path"))
with repo.wlock(), repo.lock(), repo.transaction("seqimport") as tr:
node = None
for p4cl in changelists:
node, largefiles = climporter.importcl(p4cl)
updatemetadata(ui, [(p4cl.cl, hex(node))], largefiles)
if node is not None and opts.get("bookmark"):
writebookmark(tr, repo, [(None, hex(node))], opts["bookmark"])
@command(
"p4syncimport",
[
("P", "path", ".", _("path to the local depot store"), _("PATH")),
("B", "bookmark", "", _("bookmark to set"), _("NAME")),
],
_("[-P PATH] [-B NAME] oldclient newclient"),
)
def p4syncimport(ui, repo, oldclient, newclient, **opts):
sanitizeopts(repo, opts)
storepath = opts.get("path")
if len(repo) == 0:
raise error.Abort(_("p4 sync commit does not support empty repo yet."))
p1ctx, startcl, __ = startfrom(ui, repo, opts)
# Fail if the specified client does not exist
enforce_p4_client_exists(oldclient)
enforce_p4_client_exists(newclient)
# Get a list of files that we will have to import
oldcl = p4.get_latest_cl(oldclient)
latestcl = p4.get_latest_cl(newclient)
lastimportedcl = getcl(p1ctx)
if latestcl is None:
raise error.Abort(_("cannot find latest p4 changelist number"))
ui.debug(
"%r (current client) %r (requested client) "
"%r (latest imported)\n" % (oldcl, latestcl, lastimportedcl)
)
if oldcl != lastimportedcl:
# Consider running p4fastimport from here
raise error.Abort(_("repository must contain most recent changes"))
ui.note(_("latest change list number %s\n") % latestcl)
filesadd, filesdel = syncimporter.get_filelogs_to_sync(
ui, oldclient, oldcl, newclient, latestcl
)
if not filesadd and not filesdel:
ui.warn(_("nothing to import.\n"))
return
# sync import
simporter = syncimporter.SyncImporter(
ui, repo, p1ctx, storepath, latestcl, filesadd, filesdel
)
with repo.wlock(), repo.lock(), repo.transaction("syncimport") as tr:
node, largefiles = simporter.sync_commit()
updatemetadata(ui, [(latestcl, hex(node))], largefiles)
if node is not None and opts.get("bookmark"):
writebookmark(tr, repo, [(None, hex(node))], opts["bookmark"])
@command(
"debugscanlfs",
[
("r", "rev", ".", _("display LFS files in REV")),
("A", "all", None, _("display LFS files all revisions")),
],
)
def debugscanlfs(ui, repo, **opts):
lfs = extensions.find("lfs")
def display(repo, filename, flog, rev):
filenode = flog.node(rev)
rawtext = flog.revision(filenode, raw=True)
ptr = lfs.pointer.deserialize(rawtext)
linkrev = flog.linkrev(rev)
cl = int(repo[linkrev].extra()["p4changelist"])
return _("%d %s %s %d %s\n") % (
flog.linkrev(rev),
hex(filenode),
ptr.oid(),
cl,
filename,
)
if opts.get("all"):
prefix, suffix = "data/", ".i"
plen, slen = len(prefix), len(suffix)
for fn, b, size in repo.store.datafiles():
if size == 0 or fn[-slen:] != suffix or fn[:plen] != prefix:
continue
fn = fn[plen:-slen]
flog = repo.file(fn)
for rev in range(0, len(flog)):
flags = flog.flags(rev)
if bool(flags & revlog.REVIDX_EXTSTORED):
ui.write(display(repo, fn, flog, rev))
else:
revisions = repo.set(opts.get("rev", "."))
for ctx in revisions:
for fn in ctx.manifest():
fctx = ctx[fn]
flog = fctx.filelog()
flags = flog.flags(fctx.filerev())
if bool(flags & revlog.REVIDX_EXTSTORED):
ui.write(display(repo, fn, flog, fctx.filerev()))