git_handler: fix hgsubstate generation

Before this patch, in the git to hg conversion, .hgsubstate once created is
never deleted, even if no submodules are any longer present. This is broken
state, as shown by the test for which the SHA changes. Fix that by looking at
the diff instead of just what submodules are present.

Since 'gitlinks' now contains *changed* gitlinks, not *all* gitlinks, it no
longer makes sense to gate gitmodules checks on that.

This patch simply demonstrates that the test was broken; an upcoming patch will
introduce more tests.

Bonus: this also makes the import process faster because we no longer need to
walk the entire tree to collect gitlinks.

This will cause the SHAs of repos that have submodules added and then removed
to change.
This commit is contained in:
Siddharth Agarwal 2014-02-14 15:44:50 -08:00
parent 94f67b719d
commit 8d0c4fe9f2
2 changed files with 33 additions and 15 deletions

View File

@ -685,25 +685,39 @@ class GitHandler(object):
# get a list of the changed, added, removed files and gitlinks
files, gitlinks = self.get_files_changed(commit)
# Handle gitlinks: collect
gitlinks = self.collect_gitlinks(commit.tree)
git_commit_tree = self.git[commit.tree]
# Analyze hgsubstate and build an updated version
# using SHAs from gitlinks
hgsubstate = None
if gitlinks:
hgsubstate = util.parse_hgsubstate(self.git_file_readlines(git_commit_tree, '.hgsubstate'))
for path, sha in gitlinks:
# Analyze hgsubstate and build an updated version using SHAs from
# gitlinks. Order of application:
# - preexisting .hgsubstate in git tree
# - .hgsubstate from hg parent
# - changes in gitlinks
hgsubstate = util.parse_hgsubstate(
self.git_file_readlines(git_commit_tree, '.hgsubstate'))
parentsubdata = ''
if gparents:
p1ctx = self.repo.changectx(gparents[0])
if '.hgsubstate' in p1ctx:
parentsubdata = p1ctx.filectx('.hgsubstate').data().splitlines()
parentsubstate = util.parse_hgsubstate(parentsubdata)
for path, sha in parentsubstate.iteritems():
hgsubstate[path] = sha
for path, sha in gitlinks.iteritems():
if sha is None:
hgsubstate.pop(path, None)
else:
hgsubstate[path] = sha
# in case .hgsubstate wasn't among changed files
# force its inclusion
files['.hgsubstate'] = (False, 0100644, None)
if not hgsubstate and parentsubdata:
files['.hgsubstate'] = True, None, None
elif util.serialize_hgsubstate(hgsubstate) != parentsubdata:
files['.hgsubstate'] = False, 0100644, None
# Analyze .hgsub and merge with .gitmodules
hgsub = None
gitmodules = self.parse_gitmodules(git_commit_tree)
if gitmodules or gitlinks:
if gitmodules:
hgsub = util.parse_hgsub(self.git_file_readlines(git_commit_tree, '.hgsub'))
for (sm_path, sm_url, sm_name) in gitmodules:
hgsub[sm_path] = '[git]' + sm_url

View File

@ -51,9 +51,9 @@ while older git will use the full normalized path for .)
$ hg clone gitrepo2 hgrepo | grep -v '^updating'
importing git objects into hg
2 files updated, 0 files merged, 0 files removed, 0 files unresolved
1 files updated, 0 files merged, 0 files removed, 0 files unresolved
$ hg -R hgrepo log --graph | grep -v ': *master'
@ changeset: 2:76fda365fbbb
@ changeset: 2:8f93c04eafda
| tag: default/master
| tag: tip
| user: test <test@example.org>
@ -73,4 +73,8 @@ while older git will use the full normalized path for .)
we should have some bookmarks
$ hg -R hgrepo book
* master 2:76fda365fbbb
* master 2:8f93c04eafda
check that .hgsubstate and .hgsub aren't present
$ hg -R hgrepo manifest
alpha