sapling/eden/scm/edenscm/mercurial/bundlerepo.py
Jun Wu ec15ea6bee changelog: add a way to disable the C index
Summary: This will help us verify that the C index is no longer necessary.

Reviewed By: DurhamG

Differential Revision: D22657196

fbshipit-source-id: 16ed74acc5400661572880adf3d8d3267c8b53e2
2020-07-30 20:32:36 -07:00

674 lines
23 KiB
Python

# Portions Copyright (c) Facebook, Inc. and its affiliates.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.
# bundlerepo.py - repository class for viewing uncompressed bundles
#
# Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""Repository class for viewing uncompressed bundles.
This provides a read-only repository interface to bundles as if they
were part of the actual repository.
"""
from __future__ import absolute_import
import os
import shutil
import tempfile
from typing import IO, Any, Optional, Union
from . import (
bundle2,
changegroup,
changelog,
cmdutil,
discovery,
error,
exchange,
filelog,
localrepo,
manifest,
mdiff,
mutation,
node as nodemod,
pathutil,
phases,
pycompat,
revlog,
util,
vfs as vfsmod,
visibility,
)
from .i18n import _
from .node import nullid, nullrev
class bundlerevlog(revlog.revlog):
def __init__(self, opener, indexfile, cgunpacker, linkmapper):
# How it works:
# To retrieve a revision, we need to know the offset of the revision in
# the bundle (an unbundle object). We store this offset in the index
# (start). The base of the delta is stored in the base field.
#
# To differentiate a rev in the bundle from a rev in the revlog, we
# check revision against repotiprev.
opener = vfsmod.readonlyvfs(opener)
# bundlechangelog might have called revlog.revlog.__init__ already.
# avoid re-init the revlog.
if not util.safehasattr(self, "opener"):
index2 = indexfile.startswith("00changelog")
revlog.revlog.__init__(self, opener, indexfile, index2=index2)
inner = getattr(self, "inner", None)
index2 = getattr(self, "index2", None)
self.bundle = cgunpacker
n = len(self)
self.repotiprev = n - 1
self.bundlerevs = set() # used by 'bundle()' revset expression
self.bundleheads = set() # used by visibility
for deltadata in cgunpacker.deltaiter():
node, p1, p2, cs, deltabase, delta, flags = deltadata
size = len(delta)
start = cgunpacker.tell() - size
link = linkmapper(cs)
if node in self.nodemap:
# this can happen if two branches make the same change
self.bundlerevs.add(self.nodemap[node])
continue
for p in (p1, p2):
if p not in self.nodemap:
raise error.LookupError(p, self.indexfile, _("unknown parent"))
if deltabase not in self.nodemap:
raise LookupError(deltabase, self.indexfile, _("unknown delta base"))
baserev = self.rev(deltabase)
p1rev = self.rev(p1)
p2rev = self.rev(p2)
# start, size, full unc. size, base (unused), link, p1, p2, node
e = (
revlog.offset_type(start, flags),
size,
-1,
baserev,
link,
p1rev,
p2rev,
node,
)
if self.index is not None:
self.index.insert(-1, e)
if index2 is not None:
index2.insert(node, [p for p in (p1rev, p2rev) if p >= 0])
if inner is not None:
parentnodes = [p for p in (p1, p2) if p != nullid]
basetext = self.revision(deltabase)
text = mdiff.patches(basetext, [delta])
inner.addcommits([(node, parentnodes, bytes(text))])
self.nodemap[node] = n
self.bundlerevs.add(n)
self.bundleheads.add(n)
self.bundleheads.discard(p1rev)
self.bundleheads.discard(p2rev)
n += 1
def _chunk(self, rev, df=None):
# Warning: in case of bundle, the diff is against what we stored as
# delta base, not against rev - 1
# XXX: could use some caching
if rev <= self.repotiprev:
return revlog.revlog._chunk(self, rev)
self.bundle.seek(self.start(rev))
return self.bundle.read(self.length(rev))
def revdiff(self, rev1, rev2):
"""return or calculate a delta between two revisions"""
if rev1 > self.repotiprev and rev2 > self.repotiprev:
# hot path for bundle
revb = self.index[rev2][3]
if revb == rev1:
return self._chunk(rev2)
elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
return revlog.revlog.revdiff(self, rev1, rev2)
return mdiff.textdiff(
self.revision(rev1, raw=True), self.revision(rev2, raw=True)
)
def revision(self, nodeorrev, _df=None, raw=False):
# type: (Union[int, bytes], Optional[IO], bool) -> bytes
"""return an uncompressed revision of a given node or revision
number.
"""
if isinstance(nodeorrev, int):
rev = nodeorrev
node = self.node(rev)
else:
node = nodeorrev
rev = self.rev(node)
if node == nullid:
return b""
rawtext = None
chain = []
iterrev = rev
cache = self._cache
# reconstruct the revision if it is from a changegroup
while iterrev > self.repotiprev:
if cache is not None:
if cache[1] == iterrev:
rawtext = cache[2]
break
chain.append(iterrev)
iterrev = self.index[iterrev][3]
if rawtext is None:
rawtext = self.baserevision(iterrev)
while chain:
delta = self._chunk(chain.pop())
rawtext = mdiff.patches(rawtext, [delta])
text, validatehash = self._processflags(
rawtext, self.flags(rev), "read", raw=raw
)
if validatehash:
self.checkhash(text, node, rev=rev)
self._cache = (node, rev, rawtext)
return text
def baserevision(self, nodeorrev):
# Revlog subclasses may override 'revision' method to modify format of
# content retrieved from revlog. To use bundlerevlog with such class one
# needs to override 'baserevision' and make more specific call here.
return revlog.revlog.revision(self, nodeorrev, raw=True)
def addrevision(self, *args, **kwargs):
raise NotImplementedError
def addgroup(self, *args, **kwargs):
raise NotImplementedError
def strip(self, *args, **kwargs):
raise NotImplementedError
def checksize(self):
raise NotImplementedError
class bundlechangelog(bundlerevlog, changelog.changelog):
def __init__(self, opener, cgunpacker, uiconfig):
changelog.changelog.__init__(self, opener, uiconfig)
linkmapper = lambda x: x
bundlerevlog.__init__(self, opener, self.indexfile, cgunpacker, linkmapper)
self._visibleheads.addbundleheads([self.node(r) for r in self.bundleheads])
def baserevision(self, nodeorrev):
# Although changelog doesn't override 'revision' method, some extensions
# may replace this class with another that does. Same story with
# manifest and filelog classes.
return changelog.changelog.revision(self, nodeorrev, raw=True)
def revision(self, nodeorrev, raw=False):
if self.userust("revision"):
if nodeorrev in {nullid, nullrev}:
return b""
if isinstance(nodeorrev, int):
node = self.node(nodeorrev)
else:
node = nodeorrev
text = self.inner.getcommitrawtext(node)
if text is None:
raise error.LookupError(node, self.indexfile, _("no node"))
return text
else:
return super(bundlechangelog, self).revision(nodeorrev, raw)
def _loadvisibleheads(self, opener):
return visibility.bundlevisibleheads(opener)
class bundlemanifest(bundlerevlog, manifest.manifestrevlog):
def __init__(self, opener, cgunpacker, linkmapper, dirlogstarts=None, dir=""):
manifest.manifestrevlog.__init__(self, opener, dir=dir)
bundlerevlog.__init__(self, opener, self.indexfile, cgunpacker, linkmapper)
if dirlogstarts is None:
dirlogstarts = {}
if self.bundle.version == "03":
dirlogstarts = _getfilestarts(self.bundle)
self._dirlogstarts = dirlogstarts
self._linkmapper = linkmapper
def baserevision(self, nodeorrev):
node = nodeorrev
if isinstance(node, int):
node = self.node(node)
if node in self.fulltextcache:
result = b"%s" % self.fulltextcache[node]
else:
result = manifest.manifestrevlog.revision(self, nodeorrev, raw=True)
return result
def dirlog(self, d):
if d in self._dirlogstarts:
self.bundle.seek(self._dirlogstarts[d])
return bundlemanifest(
self.opener, self.bundle, self._linkmapper, self._dirlogstarts, dir=d
)
return super(bundlemanifest, self).dirlog(d)
class bundlefilelog(bundlerevlog, filelog.filelog):
def __init__(self, opener, path, cgunpacker, linkmapper):
filelog.filelog.__init__(self, opener, path)
bundlerevlog.__init__(self, opener, self.indexfile, cgunpacker, linkmapper)
def baserevision(self, nodeorrev):
return filelog.filelog.revision(self, nodeorrev, raw=True)
class bundlepeer(localrepo.localpeer):
def canpush(self):
# type: () -> bool
return False
class bundlephasecache(phases.phasecache):
def __init__(self, *args, **kwargs):
super(bundlephasecache, self).__init__(*args, **kwargs)
if util.safehasattr(self, "opener"):
self.opener = vfsmod.readonlyvfs(self.opener)
def write(self):
raise NotImplementedError
def _write(self, fp):
raise NotImplementedError
def _updateroots(self, phase, newroots, tr):
self.phaseroots[phase] = newroots
self.invalidate()
self.dirty = True
def _getfilestarts(cgunpacker):
filespos = {}
for chunkdata in iter(cgunpacker.filelogheader, {}):
fname = chunkdata["filename"]
filespos[fname] = cgunpacker.tell()
for chunk in iter(lambda: cgunpacker.deltachunk(None), {}):
pass
return filespos
class bundlerepository(localrepo.localrepository):
"""A repository instance that is a union of a local repo and a bundle.
Instances represent a read-only repository composed of a local repository
with the contents of a bundle file applied. The repository instance is
conceptually similar to the state of a repository after an
``hg unbundle`` operation. However, the contents of the bundle are never
applied to the actual base repository.
"""
def __init__(self, ui, repopath, bundlepath):
self._tempparent = None
try:
localrepo.localrepository.__init__(self, ui, repopath)
except error.RepoError:
self._tempparent = tempfile.mkdtemp()
localrepo.instance(ui, self._tempparent, 1)
localrepo.localrepository.__init__(self, ui, self._tempparent)
self.ui.setconfig("phases", "publish", False, "bundlerepo")
if repopath:
self._url = "bundle:" + util.expandpath(repopath) + "+" + bundlepath
else:
self._url = "bundle:" + bundlepath
self.tempfile = None
f = util.posixfile(bundlepath, "rb")
bundle = exchange.readbundle(ui, f, bundlepath)
if isinstance(bundle, bundle2.unbundle20):
self._bundlefile = bundle
self._cgunpacker = None
cgpart = None
for part in bundle.iterparts(seekable=True):
if part.type == "changegroup":
if cgpart:
raise NotImplementedError(
"can't process " "multiple changegroups"
)
cgpart = part
self._handlebundle2part(bundle, part)
if not cgpart:
raise error.Abort(_("No changegroups found"))
# This is required to placate a later consumer, which expects
# the payload offset to be at the beginning of the changegroup.
# We need to do this after the iterparts() generator advances
# because iterparts() will seek to end of payload after the
# generator returns control to iterparts().
cgpart.seek(0, os.SEEK_SET)
elif isinstance(bundle, changegroup.cg1unpacker):
if bundle.compressed():
f = self._writetempbundle(bundle.read, ".hg10un", header="HG10UN")
bundle = exchange.readbundle(ui, f, bundlepath, self.localvfs)
self._bundlefile = bundle
self._cgunpacker = bundle
else:
raise error.Abort(_("bundle type %s cannot be read") % type(bundle))
# dict with the mapping 'filename' -> position in the changegroup.
self._cgfilespos = {}
self.firstnewrev = self.changelog.repotiprev + 1
phases.retractboundary(
self, None, phases.draft, [ctx.node() for ctx in self[self.firstnewrev :]]
)
def _handlebundle2part(self, bundle, part):
if part.type != "changegroup":
return
cgstream = part
version = part.params.get("version", "01")
legalcgvers = changegroup.supportedincomingversions(self)
if version not in legalcgvers:
msg = _("Unsupported changegroup version: %s")
raise error.Abort(msg % version)
cgstream = self._writetempbundle(part.read, ".cg%sun" % version)
self._cgunpacker = changegroup.getunbundler(version, cgstream, "UN")
def _writetempbundle(self, readfn, suffix, header=""):
"""Write a temporary file to disk
"""
fdtemp, temp = self.localvfs.mkstemp(prefix="hg-bundle-", suffix=suffix)
self.tempfile = temp
with util.fdopen(fdtemp, "wb") as fptemp:
fptemp.write(pycompat.encodeutf8(header))
while True:
chunk = readfn(2 ** 18)
if not chunk:
break
fptemp.write(chunk)
return self.localvfs.open(self.tempfile, mode="rb")
@util.propertycache
def _phasecache(self):
return bundlephasecache(self, self._phasedefaults)
@util.propertycache
def _mutationstore(self):
return mutation.bundlemutationstore(self)
@util.propertycache
def changelog(self):
# consume the header if it exists
self._cgunpacker.changelogheader()
c = bundlechangelog(self.svfs, self._cgunpacker, self.ui.uiconfig())
self.manstart = self._cgunpacker.tell()
return c
@util.propertycache
def manifestlog(self):
return super(bundlerepository, self).manifestlog
def _constructmanifest(self):
self._cgunpacker.seek(self.manstart)
# consume the header if it exists
self._cgunpacker.manifestheader()
linkmapper = self.changelog.rev
m = bundlemanifest(self.svfs, self._cgunpacker, linkmapper)
self.filestart = self._cgunpacker.tell()
return m
def _consumemanifest(self):
"""Consumes the manifest portion of the bundle, setting filestart so the
file portion can be read."""
self._cgunpacker.seek(self.manstart)
self._cgunpacker.manifestheader()
for delta in self._cgunpacker.deltaiter():
pass
# Changegroup v3 supports additional manifest entries that we need to
# skip.
if self._cgunpacker.version == "03":
for chunkdata in iter(self._cgunpacker.filelogheader, {}):
# If we get here, there are directory manifests in the changegroup
for delta in self._cgunpacker.deltaiter():
pass
self.filestart = self._cgunpacker.tell()
@util.propertycache
def manstart(self):
self.changelog
return self.manstart
@util.propertycache
def filestart(self):
self.manifestlog
# If filestart was not set by self.manifestlog, that means the
# manifestlog implementation did not consume the manifests from the
# changegroup (ex: it might be consuming trees from a separate bundle2
# part instead). So we need to manually consume it.
if "filestart" not in self.__dict__:
self._consumemanifest()
return self.filestart
def url(self):
# type: () -> str
return self._url
def file(self, f):
# type: () -> filelog.filelog
if not self._cgfilespos:
self._cgunpacker.seek(self.filestart)
self._cgfilespos = _getfilestarts(self._cgunpacker)
if f in self._cgfilespos:
self._cgunpacker.seek(self._cgfilespos[f])
linkmapper = self.changelog.rev
return bundlefilelog(self.svfs, f, self._cgunpacker, linkmapper)
else:
return filelog.filelog(self.svfs, f)
def close(self):
# type: () -> None
"""Close assigned bundle file immediately."""
self._bundlefile.close()
if self.tempfile is not None:
self.localvfs.unlink(self.tempfile)
path = self._tempparent
if path is not None:
shutil.rmtree(path, True)
def cancopy(self):
# type: () -> bool
return False
def peer(self):
# type: () -> localrepo.localpeer
return bundlepeer(self)
def getcwd(self):
# type: () -> str
return pycompat.getcwd() # always outside the repo
# Check if parents exist in localrepo before setting
def setparents(self, p1, p2=nullid):
# type: (bytes, bytes) -> None
p1rev = self.changelog.rev(p1)
p2rev = self.changelog.rev(p2)
msg = _("setting parent to node %s that only exists in the bundle\n")
if self.changelog.repotiprev < p1rev:
self.ui.warn(msg % nodemod.hex(p1))
if self.changelog.repotiprev < p2rev:
self.ui.warn(msg % nodemod.hex(p2))
return super(bundlerepository, self).setparents(p1, p2)
def instance(ui, path, create):
if create:
raise error.Abort(_("cannot create new bundle repository"))
# internal config: bundle.mainreporoot
parentpath = ui.config("bundle", "mainreporoot")
if not parentpath:
# try to find the correct path to the working directory repo
parentpath = cmdutil.findrepo(pycompat.getcwd())
if parentpath is None:
parentpath = ""
if parentpath:
# Try to make the full path relative so we get a nice, short URL.
# In particular, we don't want temp dir names in test outputs.
cwd = pycompat.getcwd()
if parentpath == cwd:
parentpath = ""
else:
cwd = pathutil.normasprefix(cwd)
if parentpath.startswith(cwd):
parentpath = parentpath[len(cwd) :]
u = util.url(path)
path = u.localpath()
if u.scheme == "bundle":
s = path.split("+", 1)
if len(s) == 1:
repopath, bundlename = parentpath, s[0]
else:
repopath, bundlename = s
else:
repopath, bundlename = parentpath, path
return bundlerepository(ui, repopath, bundlename)
class bundletransactionmanager(object):
def transaction(self):
return None
def close(self):
raise NotImplementedError
def release(self):
raise NotImplementedError
def getremotechanges(ui, repo, other, onlyheads=None, bundlename=None, force=False):
"""obtains a bundle of changes incoming from other
"onlyheads" restricts the returned changes to those reachable from the
specified heads.
"bundlename", if given, stores the bundle to this file path permanently;
otherwise it's stored to a temp file and gets deleted again when you call
the returned "cleanupfn".
"force" indicates whether to proceed on unrelated repos.
Returns a tuple (local, csets, cleanupfn):
"local" is a local repo from which to obtain the actual incoming
changesets; it is a bundlerepo for the obtained bundle when the
original "other" is remote.
"csets" lists the incoming changeset node ids.
"cleanupfn" must be called without arguments when you're done processing
the changes; it closes both the original "other" and the one returned
here.
"""
tmp = discovery.findcommonincoming(repo, other, heads=onlyheads, force=force)
common, incoming, rheads = tmp
if not incoming:
try:
if bundlename:
os.unlink(bundlename)
except OSError:
pass
return repo, [], other.close
commonset = set(common)
rheads = [x for x in rheads if x not in commonset]
bundle = None
bundlerepo = None
localrepo = other.local()
if bundlename or not localrepo:
# create a bundle (uncompressed if other repo is not local)
# developer config: devel.legacy.exchange
legexc = ui.configlist("devel", "legacy.exchange")
forcebundle1 = "bundle2" not in legexc and "bundle1" in legexc
canbundle2 = (
not forcebundle1 and other.capable("getbundle") and other.capable("bundle2")
)
if canbundle2:
kwargs = {}
kwargs[r"common"] = common
kwargs[r"heads"] = rheads
kwargs[r"bundlecaps"] = exchange.caps20to10(repo)
kwargs[r"cg"] = True
b2 = other.getbundle("incoming", **kwargs)
fname = bundle = changegroup.writechunks(
ui, b2._forwardchunks(), bundlename
)
else:
if other.capable("getbundle"):
cg = other.getbundle("incoming", common=common, heads=rheads)
elif onlyheads is None and not other.capable("changegroupsubset"):
# compat with older servers when pulling all remote heads
cg = other.changegroup(incoming, "incoming")
rheads = None
else:
cg = other.changegroupsubset(incoming, rheads, "incoming")
if localrepo:
bundletype = "HG10BZ"
else:
bundletype = "HG10UN"
fname = bundle = bundle2.writebundle(ui, cg, bundlename, bundletype)
# keep written bundle?
if bundlename:
bundle = None
if not localrepo:
# use the created uncompressed bundlerepo
localrepo = bundlerepo = bundlerepository(repo.baseui, repo.root, fname)
# this repo contains local and other now, so filter out local again
common = repo.heads()
csets = localrepo.changelog.findmissing(common, rheads)
if bundlerepo:
reponodes = [ctx.node() for ctx in bundlerepo[bundlerepo.firstnewrev :]]
remotephases = other.listkeys("phases")
pullop = exchange.pulloperation(bundlerepo, other, heads=reponodes)
pullop.trmanager = bundletransactionmanager()
exchange._pullapplyphases(pullop, remotephases)
def cleanup():
if bundlerepo:
bundlerepo.close()
if bundle:
os.unlink(bundle)
other.close()
return (localrepo, csets, cleanup)