2013-03-20 09:44:01 +04:00
|
|
|
# This file contains code dealing specifically with converting Mercurial
|
|
|
|
# repositories to Git repositories. Code in this file is meant to be a generic
|
|
|
|
# library and should be usable outside the context of hg-git or an hg command.
|
|
|
|
|
|
|
|
import os
|
|
|
|
import stat
|
|
|
|
|
2014-11-24 03:06:21 +03:00
|
|
|
import compat
|
2018-07-06 03:45:27 +03:00
|
|
|
import dulwich.objects as dulobjs
|
2013-03-20 09:44:01 +04:00
|
|
|
import util
|
2019-01-30 03:25:33 +03:00
|
|
|
from edenscm.mercurial import util as hgutil
|
|
|
|
from edenscm.mercurial.i18n import _
|
2013-03-20 09:44:01 +04:00
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2014-02-20 08:18:43 +04:00
|
|
|
def parse_subrepos(ctx):
|
|
|
|
sub = util.OrderedDict()
|
2018-05-30 12:16:33 +03:00
|
|
|
if ".hgsub" in ctx:
|
|
|
|
sub = util.parse_hgsub(ctx[".hgsub"].data().splitlines())
|
2014-02-20 08:18:43 +04:00
|
|
|
substate = util.OrderedDict()
|
2018-05-30 12:16:33 +03:00
|
|
|
if ".hgsubstate" in ctx:
|
|
|
|
substate = util.parse_hgsubstate(ctx[".hgsubstate"].data().splitlines())
|
2014-02-20 08:18:43 +04:00
|
|
|
return sub, substate
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2014-11-24 03:06:21 +03:00
|
|
|
def audit_git_path(ui, path):
|
|
|
|
r"""Check for path components that case-fold to .git.
|
|
|
|
|
|
|
|
>>> class fakeui(object):
|
|
|
|
... def configbool(*args):
|
|
|
|
... return False
|
|
|
|
... def warn(self, s):
|
|
|
|
... print s
|
|
|
|
>>> u = fakeui()
|
|
|
|
>>> audit_git_path(u, 'foo/git~100/wat')
|
2018-01-09 16:53:20 +03:00
|
|
|
warning: path 'foo/git~100/wat' contains a dangerous path component.
|
2014-11-24 03:06:21 +03:00
|
|
|
It may not be legal to check out in Git.
|
|
|
|
It may also be rejected by some git server configurations.
|
|
|
|
<BLANKLINE>
|
|
|
|
>>> audit_git_path(u, u'foo/.gi\u200ct'.encode('utf-8'))
|
2018-01-09 16:53:20 +03:00
|
|
|
warning: path 'foo/.gi\xe2\x80\x8ct' contains a dangerous path component.
|
2014-11-24 03:06:21 +03:00
|
|
|
It may not be legal to check out in Git.
|
|
|
|
It may also be rejected by some git server configurations.
|
|
|
|
<BLANKLINE>
|
|
|
|
>>> audit_git_path(u, 'this/is/safe')
|
|
|
|
"""
|
|
|
|
dangerous = False
|
|
|
|
for c in path.split(os.path.sep):
|
2018-05-30 12:16:33 +03:00
|
|
|
if compat.hfsignoreclean(c) == ".git":
|
2014-11-24 03:06:21 +03:00
|
|
|
dangerous = True
|
|
|
|
break
|
2018-05-30 12:16:33 +03:00
|
|
|
elif "~" in c:
|
|
|
|
base, tail = c.split("~", 1)
|
|
|
|
if tail.isdigit() and base.upper().startswith("GIT"):
|
2014-11-24 03:06:21 +03:00
|
|
|
dangerous = True
|
|
|
|
break
|
|
|
|
if dangerous:
|
2018-05-30 12:16:33 +03:00
|
|
|
if compat.config(ui, "bool", "git", "blockdotgit"):
|
2014-11-24 03:06:21 +03:00
|
|
|
raise hgutil.Abort(
|
2018-05-30 12:16:33 +03:00
|
|
|
_("Refusing to export likely-dangerous path %r") % path,
|
|
|
|
hint=_(
|
|
|
|
"If you need to continue, read about CVE-2014-9390 and "
|
|
|
|
"then set '[git] blockdotgit = false' in your hgrc."
|
|
|
|
),
|
|
|
|
)
|
|
|
|
ui.warn(
|
|
|
|
_(
|
|
|
|
"warning: path %r contains a dangerous path component.\n"
|
|
|
|
"It may not be legal to check out in Git.\n"
|
|
|
|
"It may also be rejected by some git server configurations.\n"
|
|
|
|
)
|
|
|
|
% path
|
|
|
|
)
|
|
|
|
|
2014-11-24 03:06:21 +03:00
|
|
|
|
2018-10-26 21:23:52 +03:00
|
|
|
class GitTreeDict(object):
|
|
|
|
"""A mutable structure pretending to be a fullpath->tree dictionary of all the trees
|
|
|
|
in the given commit.
|
|
|
|
|
|
|
|
Trees are lazily loaded as needed, and any path that is set has its parents
|
|
|
|
automatically loaded/created as well.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, store, commit):
|
|
|
|
self.store = store
|
|
|
|
self.trees = {}
|
|
|
|
self.trees[""] = store[commit.tree] if commit is not None else dulobjs.Tree()
|
|
|
|
|
|
|
|
def __getitem__(self, path):
|
|
|
|
value = self.get(path)
|
|
|
|
if value is None:
|
|
|
|
raise KeyError("no path %s" % path)
|
|
|
|
return value
|
|
|
|
|
|
|
|
def get(self, path, default=None):
|
|
|
|
value = self.trees.get(path)
|
|
|
|
if value is None:
|
|
|
|
# It's not in our cache, so let's find the parent so we can add this
|
|
|
|
# entry.
|
|
|
|
if path == "":
|
|
|
|
raise KeyError("missing required '' root")
|
|
|
|
base, name = os.path.split(path)
|
|
|
|
parent = self.get(base)
|
|
|
|
if parent is None or name not in parent:
|
|
|
|
return default
|
|
|
|
|
|
|
|
# Load the missing child tree
|
|
|
|
child_mode, child_id = parent[name]
|
|
|
|
if child_mode != stat.S_IFDIR:
|
|
|
|
raise KeyError("trying to set non-tree child %s" % path)
|
|
|
|
|
|
|
|
value = self.store[child_id]
|
|
|
|
self.trees[path] = value
|
|
|
|
|
|
|
|
return value
|
|
|
|
|
|
|
|
def __setitem__(self, path, value):
|
|
|
|
base, name = os.path.split(path)
|
|
|
|
parent = self.get(base)
|
|
|
|
if parent is None:
|
|
|
|
# Empty trees will be filled during finalization (see comment below)
|
|
|
|
self[base] = dulobjs.Tree()
|
|
|
|
# In an ideal world we would assign the child to the parent here, but
|
|
|
|
# parents store a reference to the child's id which we don't have since
|
|
|
|
# the tree is still being mutated. Instead we create empty parents where
|
|
|
|
# needed, and later in _populate_tree_entries we attach all the children
|
|
|
|
# to parents bottom-up.
|
|
|
|
self.trees[path] = value
|
|
|
|
|
|
|
|
def __delitem__(self, path):
|
|
|
|
if path == "":
|
|
|
|
raise KeyError("cannot delete root path")
|
|
|
|
del self.trees[path]
|
|
|
|
|
|
|
|
def setdefault(self, path, default):
|
|
|
|
value = self.get(path)
|
|
|
|
if value is None:
|
|
|
|
value = default
|
|
|
|
self[path] = value
|
|
|
|
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
2013-03-20 09:44:01 +04:00
|
|
|
class IncrementalChangesetExporter(object):
|
|
|
|
"""Incrementally export Mercurial changesets to Git trees.
|
|
|
|
|
|
|
|
The purpose of this class is to facilitate Git tree export that is more
|
|
|
|
optimal than brute force.
|
|
|
|
|
|
|
|
A "dumb" implementations of Mercurial to Git export would iterate over
|
|
|
|
every file present in a Mercurial changeset and would convert each to
|
|
|
|
a Git blob and then conditionally add it to a Git repository if it didn't
|
|
|
|
yet exist. This is suboptimal because the overhead associated with
|
|
|
|
obtaining every file's raw content and converting it to a Git blob is
|
|
|
|
not trivial!
|
|
|
|
|
|
|
|
This class works around the suboptimality of brute force export by
|
|
|
|
leveraging the information stored in Mercurial - the knowledge of what
|
|
|
|
changed between changesets - to only export Git objects corresponding to
|
|
|
|
changes in Mercurial. In the context of converting Mercurial repositories
|
|
|
|
to Git repositories, we only export objects Git (possibly) hasn't seen yet.
|
|
|
|
This prevents a lot of redundant work and is thus faster.
|
|
|
|
|
|
|
|
Callers instantiate an instance of this class against a mercurial.localrepo
|
|
|
|
instance. They then associate it with a specific changesets by calling
|
|
|
|
update_changeset(). On each call to update_changeset(), the instance
|
|
|
|
computes the difference between the current and new changesets and emits
|
|
|
|
Git objects that haven't yet been encountered during the lifetime of the
|
|
|
|
class instance. In other words, it expresses Mercurial changeset deltas in
|
|
|
|
terms of Git objects. Callers then (usually) take this set of Git objects
|
|
|
|
and add them to the Git repository.
|
|
|
|
|
|
|
|
This class only emits Git blobs and trees, not commits.
|
|
|
|
|
|
|
|
The tree calculation part of this class is essentially a reimplementation
|
|
|
|
of dulwich.index.commit_tree. However, since our implementation reuses
|
|
|
|
Tree instances and only recalculates SHA-1 when things change, we are
|
|
|
|
more efficient.
|
|
|
|
"""
|
|
|
|
|
2014-03-15 07:45:09 +04:00
|
|
|
def __init__(self, hg_repo, start_ctx, git_store, git_commit):
|
|
|
|
"""Create an instance against a mercurial.localrepo.
|
|
|
|
|
2015-04-23 02:42:48 +03:00
|
|
|
start_ctx: the context for a Mercurial commit that has a Git
|
|
|
|
equivalent, passed in as git_commit. The incremental
|
|
|
|
computation will be started from this commit.
|
|
|
|
git_store: the Git object store the commit comes from.
|
|
|
|
|
|
|
|
start_ctx can be repo[nullid], in which case git_commit should be None.
|
2014-03-15 07:45:09 +04:00
|
|
|
"""
|
2013-03-20 09:44:01 +04:00
|
|
|
self._hg = hg_repo
|
|
|
|
|
2014-02-13 05:49:14 +04:00
|
|
|
# Our current revision's context.
|
2014-03-15 07:45:09 +04:00
|
|
|
self._ctx = start_ctx
|
2013-03-20 09:44:01 +04:00
|
|
|
|
|
|
|
# Path to dulwich.objects.Tree.
|
2018-10-26 21:23:52 +03:00
|
|
|
self._dirs = GitTreeDict(git_store, git_commit)
|
2013-03-20 09:44:01 +04:00
|
|
|
|
|
|
|
# Mercurial file nodeid to Git blob SHA-1. Used to prevent redundant
|
|
|
|
# blob calculation.
|
|
|
|
self._blob_cache = {}
|
|
|
|
|
|
|
|
@property
|
|
|
|
def root_tree_sha(self):
|
|
|
|
"""The SHA-1 of the root Git tree.
|
|
|
|
|
|
|
|
This is needed to construct a Git commit object.
|
|
|
|
"""
|
2018-05-30 12:16:33 +03:00
|
|
|
return self._dirs[""].id
|
2013-03-20 09:44:01 +04:00
|
|
|
|
2014-02-13 05:47:38 +04:00
|
|
|
def update_changeset(self, newctx):
|
2013-03-20 09:44:01 +04:00
|
|
|
"""Set the tree to track a new Mercurial changeset.
|
|
|
|
|
|
|
|
This is a generator of 2-tuples. The first item in each tuple is a
|
|
|
|
dulwich object, either a Blob or a Tree. The second item is the
|
|
|
|
corresponding Mercurial nodeid for the item, if any. Only blobs will
|
|
|
|
have nodeids. Trees do not correspond to a specific nodeid, so it does
|
|
|
|
not make sense to emit a nodeid for them.
|
|
|
|
|
|
|
|
When exporting trees from Mercurial, callers typically write the
|
|
|
|
returned dulwich object to the Git repo via the store's add_object().
|
|
|
|
|
|
|
|
Some emitted objects may already exist in the Git repository. This
|
|
|
|
class does not know about the Git repository, so it's up to the caller
|
|
|
|
to conditionally add the object, etc.
|
|
|
|
|
|
|
|
Emitted objects are those that have changed since the last call to
|
|
|
|
update_changeset. If this is the first call to update_chanageset, all
|
|
|
|
objects in the tree are emitted.
|
|
|
|
"""
|
|
|
|
# Our general strategy is to accumulate dulwich.objects.Blob and
|
|
|
|
# dulwich.objects.Tree instances for the current Mercurial changeset.
|
|
|
|
# We do this incremental by iterating over the Mercurial-reported
|
|
|
|
# changeset delta. We rely on the behavior of Mercurial to lazy
|
|
|
|
# calculate a Tree's SHA-1 when we modify it. This is critical to
|
|
|
|
# performance.
|
|
|
|
|
|
|
|
# In theory we should be able to look at changectx.files(). This is
|
|
|
|
# *much* faster. However, it may not be accurate, especially with older
|
|
|
|
# repositories, which may not record things like deleted files
|
|
|
|
# explicitly in the manifest (which is where files() gets its data).
|
|
|
|
# The only reliable way to get the full set of changes is by looking at
|
|
|
|
# the full manifest. And, the easy way to compare two manifests is
|
|
|
|
# localrepo.status().
|
2014-02-13 06:05:12 +04:00
|
|
|
modified, added, removed = self._hg.status(self._ctx, newctx)[0:3]
|
2013-03-20 09:44:01 +04:00
|
|
|
|
2013-04-14 22:11:41 +04:00
|
|
|
# We track which directories/trees have modified in this update and we
|
|
|
|
# only export those.
|
|
|
|
dirty_trees = set()
|
|
|
|
|
hg2git: fix subrepo handling to be deterministic
Previously, the correctness of _handle_subrepos was based on the order the
files were processed in. For example, consider the case where a subrepo at
location 'loc' is replaced with a file at 'loc', while another subrepo exists.
This would cause .hgsubstate and .hgsub to be modified and the file added.
If .hgsubstate was seen _before_ 'loc' in the modified/added loop, then
_handle_subrepos would run and remove 'loc' correctly, before 'loc' was added
back later. If, however, .hgsubstate was seen _after_ 'loc', then
_handle_subrepos would run after 'loc' was added and would remove 'loc'.
With this patch, _handle_subrepos merely computes the changes that need to be
applied. The changes are then applied, making sure removed files and subrepos
are processed before added ones.
This was detected by setting a random PYTHONHASHSEED (in this case, 3910358828)
and running the test suite against it. An upcoming patch will randomize the
PYTHONHASHSEED in run-tests.py, just like is done in Mercurial.
2014-02-20 08:52:59 +04:00
|
|
|
subadded, subremoved = [], []
|
|
|
|
|
|
|
|
for s in modified, added, removed:
|
2018-05-30 12:16:33 +03:00
|
|
|
if ".hgsub" in s or ".hgsubstate" in s:
|
hg2git: fix subrepo handling to be deterministic
Previously, the correctness of _handle_subrepos was based on the order the
files were processed in. For example, consider the case where a subrepo at
location 'loc' is replaced with a file at 'loc', while another subrepo exists.
This would cause .hgsubstate and .hgsub to be modified and the file added.
If .hgsubstate was seen _before_ 'loc' in the modified/added loop, then
_handle_subrepos would run and remove 'loc' correctly, before 'loc' was added
back later. If, however, .hgsubstate was seen _after_ 'loc', then
_handle_subrepos would run after 'loc' was added and would remove 'loc'.
With this patch, _handle_subrepos merely computes the changes that need to be
applied. The changes are then applied, making sure removed files and subrepos
are processed before added ones.
This was detected by setting a random PYTHONHASHSEED (in this case, 3910358828)
and running the test suite against it. An upcoming patch will randomize the
PYTHONHASHSEED in run-tests.py, just like is done in Mercurial.
2014-02-20 08:52:59 +04:00
|
|
|
subadded, subremoved = self._handle_subrepos(newctx)
|
|
|
|
break
|
|
|
|
|
2015-04-23 02:42:48 +03:00
|
|
|
# We first process subrepo and file removals so we can prune dead
|
|
|
|
# trees.
|
hg2git: fix subrepo handling to be deterministic
Previously, the correctness of _handle_subrepos was based on the order the
files were processed in. For example, consider the case where a subrepo at
location 'loc' is replaced with a file at 'loc', while another subrepo exists.
This would cause .hgsubstate and .hgsub to be modified and the file added.
If .hgsubstate was seen _before_ 'loc' in the modified/added loop, then
_handle_subrepos would run and remove 'loc' correctly, before 'loc' was added
back later. If, however, .hgsubstate was seen _after_ 'loc', then
_handle_subrepos would run after 'loc' was added and would remove 'loc'.
With this patch, _handle_subrepos merely computes the changes that need to be
applied. The changes are then applied, making sure removed files and subrepos
are processed before added ones.
This was detected by setting a random PYTHONHASHSEED (in this case, 3910358828)
and running the test suite against it. An upcoming patch will randomize the
PYTHONHASHSEED in run-tests.py, just like is done in Mercurial.
2014-02-20 08:52:59 +04:00
|
|
|
for path in subremoved:
|
|
|
|
self._remove_path(path, dirty_trees)
|
2014-02-13 10:55:16 +04:00
|
|
|
|
hg2git: fix subrepo handling to be deterministic
Previously, the correctness of _handle_subrepos was based on the order the
files were processed in. For example, consider the case where a subrepo at
location 'loc' is replaced with a file at 'loc', while another subrepo exists.
This would cause .hgsubstate and .hgsub to be modified and the file added.
If .hgsubstate was seen _before_ 'loc' in the modified/added loop, then
_handle_subrepos would run and remove 'loc' correctly, before 'loc' was added
back later. If, however, .hgsubstate was seen _after_ 'loc', then
_handle_subrepos would run after 'loc' was added and would remove 'loc'.
With this patch, _handle_subrepos merely computes the changes that need to be
applied. The changes are then applied, making sure removed files and subrepos
are processed before added ones.
This was detected by setting a random PYTHONHASHSEED (in this case, 3910358828)
and running the test suite against it. An upcoming patch will randomize the
PYTHONHASHSEED in run-tests.py, just like is done in Mercurial.
2014-02-20 08:52:59 +04:00
|
|
|
for path in removed:
|
2018-05-30 12:16:33 +03:00
|
|
|
if path == ".hgsubstate" or path == ".hgsub":
|
2014-02-13 10:55:16 +04:00
|
|
|
continue
|
|
|
|
|
2014-02-13 07:50:56 +04:00
|
|
|
self._remove_path(path, dirty_trees)
|
2013-03-20 09:44:01 +04:00
|
|
|
|
hg2git: fix subrepo handling to be deterministic
Previously, the correctness of _handle_subrepos was based on the order the
files were processed in. For example, consider the case where a subrepo at
location 'loc' is replaced with a file at 'loc', while another subrepo exists.
This would cause .hgsubstate and .hgsub to be modified and the file added.
If .hgsubstate was seen _before_ 'loc' in the modified/added loop, then
_handle_subrepos would run and remove 'loc' correctly, before 'loc' was added
back later. If, however, .hgsubstate was seen _after_ 'loc', then
_handle_subrepos would run after 'loc' was added and would remove 'loc'.
With this patch, _handle_subrepos merely computes the changes that need to be
applied. The changes are then applied, making sure removed files and subrepos
are processed before added ones.
This was detected by setting a random PYTHONHASHSEED (in this case, 3910358828)
and running the test suite against it. An upcoming patch will randomize the
PYTHONHASHSEED in run-tests.py, just like is done in Mercurial.
2014-02-20 08:52:59 +04:00
|
|
|
for path, sha in subadded:
|
|
|
|
d = os.path.dirname(path)
|
|
|
|
tree = self._dirs.setdefault(d, dulobjs.Tree())
|
|
|
|
dirty_trees.add(d)
|
|
|
|
tree.add(os.path.basename(path), dulobjs.S_IFGITLINK, sha)
|
|
|
|
|
2013-03-20 09:44:01 +04:00
|
|
|
# For every file that changed or was added, we need to calculate the
|
|
|
|
# corresponding Git blob and its tree entry. We emit the blob
|
|
|
|
# immediately and update trees to be aware of its presence.
|
2013-04-14 22:11:41 +04:00
|
|
|
for path in set(modified) | set(added):
|
2014-11-24 03:06:21 +03:00
|
|
|
audit_git_path(self._hg.ui, path)
|
2018-05-30 12:16:33 +03:00
|
|
|
if path == ".hgsubstate" or path == ".hgsub":
|
2013-03-20 09:44:01 +04:00
|
|
|
continue
|
|
|
|
|
|
|
|
d = os.path.dirname(path)
|
|
|
|
tree = self._dirs.setdefault(d, dulobjs.Tree())
|
2013-04-14 22:11:41 +04:00
|
|
|
dirty_trees.add(d)
|
2013-03-20 09:44:01 +04:00
|
|
|
|
2014-02-13 05:47:38 +04:00
|
|
|
fctx = newctx[path]
|
2013-03-20 09:44:01 +04:00
|
|
|
|
2015-04-23 02:42:48 +03:00
|
|
|
func = IncrementalChangesetExporter.tree_entry
|
|
|
|
entry, blob = func(fctx, self._blob_cache)
|
2013-03-20 09:44:01 +04:00
|
|
|
if blob is not None:
|
|
|
|
yield (blob, fctx.filenode())
|
|
|
|
|
|
|
|
tree.add(*entry)
|
|
|
|
|
|
|
|
# Now that all the trees represent the current changeset, recalculate
|
|
|
|
# the tree IDs and emit them. Note that we wait until now to calculate
|
|
|
|
# tree SHA-1s. This is an important difference between us and
|
|
|
|
# dulwich.index.commit_tree(), which builds new Tree instances for each
|
|
|
|
# series of blobs.
|
2013-04-14 22:11:41 +04:00
|
|
|
for obj in self._populate_tree_entries(dirty_trees):
|
2013-03-20 09:44:01 +04:00
|
|
|
yield (obj, None)
|
|
|
|
|
2014-02-13 05:49:14 +04:00
|
|
|
self._ctx = newctx
|
2013-03-20 09:44:01 +04:00
|
|
|
|
2014-02-13 07:50:56 +04:00
|
|
|
def _remove_path(self, path, dirty_trees):
|
|
|
|
"""Remove a path (file or git link) from the current changeset.
|
|
|
|
|
|
|
|
If the tree containing this path is empty, it might be removed."""
|
|
|
|
d = os.path.dirname(path)
|
|
|
|
tree = self._dirs.get(d, dulobjs.Tree())
|
|
|
|
|
|
|
|
del tree[os.path.basename(path)]
|
|
|
|
dirty_trees.add(d)
|
|
|
|
|
|
|
|
# If removing this file made the tree empty, we should delete this
|
|
|
|
# tree. This could result in parent trees losing their only child
|
|
|
|
# and so on.
|
|
|
|
if not len(tree):
|
|
|
|
self._remove_tree(d)
|
|
|
|
else:
|
|
|
|
self._dirs[d] = tree
|
|
|
|
|
2013-03-20 09:44:01 +04:00
|
|
|
def _remove_tree(self, path):
|
|
|
|
"""Remove a (presumably empty) tree from the current changeset.
|
|
|
|
|
|
|
|
A now-empty tree may be the only child of its parent. So, we traverse
|
|
|
|
up the chain to the root tree, deleting any empty trees along the way.
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
del self._dirs[path]
|
|
|
|
except KeyError:
|
|
|
|
return
|
|
|
|
|
|
|
|
# Now we traverse up to the parent and delete any references.
|
2018-05-30 12:16:33 +03:00
|
|
|
if path == "":
|
2013-03-20 09:44:01 +04:00
|
|
|
return
|
|
|
|
|
|
|
|
basename = os.path.basename(path)
|
|
|
|
parent = os.path.dirname(path)
|
|
|
|
while True:
|
|
|
|
tree = self._dirs.get(parent, None)
|
|
|
|
|
|
|
|
# No parent entry. Nothing to remove or update.
|
|
|
|
if tree is None:
|
|
|
|
return
|
|
|
|
|
|
|
|
try:
|
|
|
|
del tree[basename]
|
|
|
|
except KeyError:
|
|
|
|
return
|
|
|
|
|
|
|
|
if len(tree):
|
|
|
|
return
|
|
|
|
|
|
|
|
# The parent tree is empty. Se, we can delete it.
|
|
|
|
del self._dirs[parent]
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
if parent == "":
|
2013-03-20 09:44:01 +04:00
|
|
|
return
|
|
|
|
|
|
|
|
basename = os.path.basename(parent)
|
|
|
|
parent = os.path.dirname(parent)
|
|
|
|
|
2013-04-14 22:11:41 +04:00
|
|
|
def _populate_tree_entries(self, dirty_trees):
|
2018-05-30 12:16:33 +03:00
|
|
|
self._dirs.setdefault("", dulobjs.Tree())
|
2013-03-20 09:44:01 +04:00
|
|
|
|
2013-04-14 22:11:41 +04:00
|
|
|
for dirty in list(dirty_trees):
|
|
|
|
parent = os.path.dirname(dirty)
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
while parent != "":
|
2013-04-14 22:11:41 +04:00
|
|
|
if parent in dirty_trees:
|
|
|
|
break
|
|
|
|
|
|
|
|
dirty_trees.add(parent)
|
|
|
|
parent = os.path.dirname(parent)
|
|
|
|
|
|
|
|
# The root tree is always dirty but doesn't always get updated.
|
2018-05-30 12:16:33 +03:00
|
|
|
dirty_trees.add("")
|
2013-04-14 22:11:41 +04:00
|
|
|
|
|
|
|
# We only need to recalculate and export dirty trees.
|
|
|
|
for d in sorted(dirty_trees, key=len, reverse=True):
|
|
|
|
# Only happens for deleted directories.
|
|
|
|
try:
|
|
|
|
tree = self._dirs[d]
|
|
|
|
except KeyError:
|
|
|
|
continue
|
|
|
|
|
2013-03-20 09:44:01 +04:00
|
|
|
yield tree
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
if d == "":
|
2013-03-20 09:44:01 +04:00
|
|
|
continue
|
|
|
|
|
|
|
|
parent_tree = self._dirs[os.path.dirname(d)]
|
|
|
|
|
|
|
|
# Accessing the tree's ID is what triggers SHA-1 calculation and is
|
|
|
|
# the expensive part (at least if the tree has been modified since
|
2013-04-14 22:11:41 +04:00
|
|
|
# the last time we retrieved its ID). Also, assigning an entry to a
|
|
|
|
# tree (even if it already exists) invalidates the existing tree
|
|
|
|
# and incurs SHA-1 recalculation. So, it's in our interest to avoid
|
|
|
|
# invalidating trees. Since we only update the entries of dirty
|
|
|
|
# trees, this should hold true.
|
2013-03-20 09:44:01 +04:00
|
|
|
parent_tree[os.path.basename(d)] = (stat.S_IFDIR, tree.id)
|
|
|
|
|
hg2git: fix subrepo handling to be deterministic
Previously, the correctness of _handle_subrepos was based on the order the
files were processed in. For example, consider the case where a subrepo at
location 'loc' is replaced with a file at 'loc', while another subrepo exists.
This would cause .hgsubstate and .hgsub to be modified and the file added.
If .hgsubstate was seen _before_ 'loc' in the modified/added loop, then
_handle_subrepos would run and remove 'loc' correctly, before 'loc' was added
back later. If, however, .hgsubstate was seen _after_ 'loc', then
_handle_subrepos would run after 'loc' was added and would remove 'loc'.
With this patch, _handle_subrepos merely computes the changes that need to be
applied. The changes are then applied, making sure removed files and subrepos
are processed before added ones.
This was detected by setting a random PYTHONHASHSEED (in this case, 3910358828)
and running the test suite against it. An upcoming patch will randomize the
PYTHONHASHSEED in run-tests.py, just like is done in Mercurial.
2014-02-20 08:52:59 +04:00
|
|
|
def _handle_subrepos(self, newctx):
|
2014-02-13 09:19:04 +04:00
|
|
|
sub, substate = parse_subrepos(self._ctx)
|
2014-02-13 08:34:09 +04:00
|
|
|
newsub, newsubstate = parse_subrepos(newctx)
|
2013-03-20 09:44:01 +04:00
|
|
|
|
2014-02-13 09:19:04 +04:00
|
|
|
# For each path, the logic is described by the following table. 'no'
|
|
|
|
# stands for 'the subrepo doesn't exist', 'git' stands for 'git
|
|
|
|
# subrepo', and 'hg' stands for 'hg or other subrepo'.
|
|
|
|
#
|
|
|
|
# old new | action
|
|
|
|
# * git | link (1)
|
|
|
|
# git hg | delete (2)
|
|
|
|
# git no | delete (3)
|
|
|
|
#
|
|
|
|
# All other combinations are 'do nothing'.
|
|
|
|
#
|
2015-04-23 02:42:48 +03:00
|
|
|
# git links without corresponding submodule paths are stored as
|
|
|
|
# subrepos with a substate but without an entry in .hgsub.
|
2014-02-13 09:19:04 +04:00
|
|
|
|
hg2git: fix subrepo handling to be deterministic
Previously, the correctness of _handle_subrepos was based on the order the
files were processed in. For example, consider the case where a subrepo at
location 'loc' is replaced with a file at 'loc', while another subrepo exists.
This would cause .hgsubstate and .hgsub to be modified and the file added.
If .hgsubstate was seen _before_ 'loc' in the modified/added loop, then
_handle_subrepos would run and remove 'loc' correctly, before 'loc' was added
back later. If, however, .hgsubstate was seen _after_ 'loc', then
_handle_subrepos would run after 'loc' was added and would remove 'loc'.
With this patch, _handle_subrepos merely computes the changes that need to be
applied. The changes are then applied, making sure removed files and subrepos
are processed before added ones.
This was detected by setting a random PYTHONHASHSEED (in this case, 3910358828)
and running the test suite against it. An upcoming patch will randomize the
PYTHONHASHSEED in run-tests.py, just like is done in Mercurial.
2014-02-20 08:52:59 +04:00
|
|
|
# 'added' is both modified and added
|
|
|
|
added, removed = [], []
|
|
|
|
|
2014-02-13 09:19:04 +04:00
|
|
|
def isgit(sub, path):
|
2018-05-30 12:16:33 +03:00
|
|
|
return path not in sub or sub[path].startswith("[git]")
|
2014-02-13 09:19:04 +04:00
|
|
|
|
|
|
|
for path, sha in substate.iteritems():
|
|
|
|
if not isgit(sub, path):
|
|
|
|
# old = hg -- will be handled in next loop
|
|
|
|
continue
|
|
|
|
# old = git
|
|
|
|
if path not in newsubstate or not isgit(newsub, path):
|
|
|
|
# new = hg or no, case (2) or (3)
|
hg2git: fix subrepo handling to be deterministic
Previously, the correctness of _handle_subrepos was based on the order the
files were processed in. For example, consider the case where a subrepo at
location 'loc' is replaced with a file at 'loc', while another subrepo exists.
This would cause .hgsubstate and .hgsub to be modified and the file added.
If .hgsubstate was seen _before_ 'loc' in the modified/added loop, then
_handle_subrepos would run and remove 'loc' correctly, before 'loc' was added
back later. If, however, .hgsubstate was seen _after_ 'loc', then
_handle_subrepos would run after 'loc' was added and would remove 'loc'.
With this patch, _handle_subrepos merely computes the changes that need to be
applied. The changes are then applied, making sure removed files and subrepos
are processed before added ones.
This was detected by setting a random PYTHONHASHSEED (in this case, 3910358828)
and running the test suite against it. An upcoming patch will randomize the
PYTHONHASHSEED in run-tests.py, just like is done in Mercurial.
2014-02-20 08:52:59 +04:00
|
|
|
removed.append(path)
|
2014-02-13 09:19:04 +04:00
|
|
|
|
2014-02-13 08:34:09 +04:00
|
|
|
for path, sha in newsubstate.iteritems():
|
2014-02-13 09:19:04 +04:00
|
|
|
if not isgit(newsub, path):
|
2015-04-23 02:42:48 +03:00
|
|
|
# new = hg or no; the only cases we care about are handled
|
|
|
|
# above
|
2013-03-20 09:44:01 +04:00
|
|
|
continue
|
|
|
|
|
2014-02-13 09:19:04 +04:00
|
|
|
# case (1)
|
hg2git: fix subrepo handling to be deterministic
Previously, the correctness of _handle_subrepos was based on the order the
files were processed in. For example, consider the case where a subrepo at
location 'loc' is replaced with a file at 'loc', while another subrepo exists.
This would cause .hgsubstate and .hgsub to be modified and the file added.
If .hgsubstate was seen _before_ 'loc' in the modified/added loop, then
_handle_subrepos would run and remove 'loc' correctly, before 'loc' was added
back later. If, however, .hgsubstate was seen _after_ 'loc', then
_handle_subrepos would run after 'loc' was added and would remove 'loc'.
With this patch, _handle_subrepos merely computes the changes that need to be
applied. The changes are then applied, making sure removed files and subrepos
are processed before added ones.
This was detected by setting a random PYTHONHASHSEED (in this case, 3910358828)
and running the test suite against it. An upcoming patch will randomize the
PYTHONHASHSEED in run-tests.py, just like is done in Mercurial.
2014-02-20 08:52:59 +04:00
|
|
|
added.append((path, sha))
|
|
|
|
|
|
|
|
return added, removed
|
2013-03-20 09:44:01 +04:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def tree_entry(fctx, blob_cache):
|
|
|
|
"""Compute a dulwich TreeEntry from a filectx.
|
|
|
|
|
|
|
|
A side effect is the TreeEntry is stored in the passed cache.
|
|
|
|
|
|
|
|
Returns a 2-tuple of (dulwich.objects.TreeEntry, dulwich.objects.Blob).
|
|
|
|
"""
|
|
|
|
blob_id = blob_cache.get(fctx.filenode(), None)
|
|
|
|
blob = None
|
|
|
|
|
|
|
|
if blob_id is None:
|
|
|
|
blob = dulobjs.Blob.from_string(fctx.data())
|
|
|
|
blob_id = blob.id
|
|
|
|
blob_cache[fctx.filenode()] = blob_id
|
|
|
|
|
|
|
|
flags = fctx.flags()
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
if "l" in flags:
|
2018-01-09 16:53:20 +03:00
|
|
|
mode = 0o120000
|
2018-05-30 12:16:33 +03:00
|
|
|
elif "x" in flags:
|
2018-01-09 16:53:20 +03:00
|
|
|
mode = 0o100755
|
2013-03-20 09:44:01 +04:00
|
|
|
else:
|
2018-01-09 16:53:20 +03:00
|
|
|
mode = 0o100644
|
2013-03-20 09:44:01 +04:00
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
return (dulobjs.TreeEntry(os.path.basename(fctx.path()), mode, blob_id), blob)
|