Refactor branch handling to be much more dynamic (and hopefully robust).

This should allow fixing of several outstanding issues with branch handling. Note that this is a *massive* change to one of the oldest parts of hgsubversion, so it might introduce bugs not caught by the testsuite.
This commit is contained in:
Augie Fackler 2009-03-02 23:58:38 -06:00
parent 9cbde47428
commit 5771c60c73
4 changed files with 220 additions and 71 deletions

View File

@ -69,7 +69,7 @@ def fetch_revisions(ui, svn_url, hg_repo_path, skipto_rev=0, stupid=None,
# start converting revisions # start converting revisions
for r in svn.revisions(start=start): for r in svn.revisions(start=start):
valid = False valid = True
hg_editor.update_branch_tag_map_for_rev(r) hg_editor.update_branch_tag_map_for_rev(r)
for p in r.paths: for p in r.paths:
if hg_editor._is_path_valid(p): if hg_editor._is_path_valid(p):
@ -224,11 +224,12 @@ def stupid_diff_branchrev(ui, svn, hg_editor, branch, r, parentctx):
callable to retrieve individual file information. Raise BadPatchApply upon callable to retrieve individual file information. Raise BadPatchApply upon
error. error.
""" """
def make_diff_path(b): def make_diff_path(branch):
if b == None: if branch == 'trunk' or branch is None:
return 'trunk' return 'trunk'
return 'branches/' + b elif branch.startswith('../'):
return branch[3:]
return 'branches/%s' % branch
parent_rev, br_p = hg_editor.get_parent_svn_branch_and_rev(r.revnum, branch) parent_rev, br_p = hg_editor.get_parent_svn_branch_and_rev(r.revnum, branch)
diff_path = make_diff_path(branch) diff_path = make_diff_path(branch)
try: try:
@ -554,8 +555,41 @@ def stupid_fetch_branchrev(svn, hg_editor, branch, branchpath, r, parentctx):
def stupid_svn_server_pull_rev(ui, svn, hg_editor, r): def stupid_svn_server_pull_rev(ui, svn, hg_editor, r):
# this server fails at replay # this server fails at replay
branches = hg_editor.branches_in_paths(r.paths) branches = hg_editor.branches_in_paths(r.paths, r.revnum, svn.checkpath, svn.list_files)
deleted_branches = {} deleted_branches = {}
brpaths = branches.values()
bad_branch_paths = {}
for br, bp in branches.iteritems():
bad_branch_paths[br] = []
# This next block might be needed, but for now I'm omitting it until it can be
# proven necessary.
# for bad in brpaths:
# if bad.startswith(bp) and len(bad) > len(bp):
# bad_branch_paths[br].append(bad[len(bp)+1:])
# We've go a branch that contains other branches. We have to be careful to
# get results similar to real replay in this case.
for existingbr in hg_editor.branches:
bad = hg_editor._remotename(existingbr)
if bad.startswith(bp) and len(bad) > len(bp):
bad_branch_paths[br].append(bad[len(bp)+1:])
for p in r.paths:
if hg_editor._is_path_tag(p):
continue
branch = hg_editor._localname(p)
if r.paths[p].action == 'R' and branch in hg_editor.branches:
branchedits = sorted(filter(lambda x: x[0][1] == branch and x[0][0] < r.revnum,
hg_editor.revmap.iteritems()), reverse=True)
is_closed = False
if len(branchedits) > 0:
branchtip = branchedits[0][1]
for child in hg_editor.repo[branchtip].children():
if child.branch() == 'closed-branches':
is_closed = True
break
if not is_closed:
deleted_branches[branch] = branchtip
date = r.date.replace('T', ' ').replace('Z', '').split('.')[0] date = r.date.replace('T', ' ').replace('Z', '').split('.')[0]
date += ' -0000' date += ' -0000'
check_deleted_branches = set() check_deleted_branches = set()
@ -589,12 +623,15 @@ def stupid_svn_server_pull_rev(ui, svn, hg_editor, r):
raise IOError() raise IOError()
return context.memfilectx(path=path, data=externals.write(), return context.memfilectx(path=path, data=externals.write(),
islink=False, isexec=False, copied=None) islink=False, isexec=False, copied=None)
for bad in bad_branch_paths[b]:
if path.startswith(bad):
raise IOError()
return filectxfn2(repo, memctx, path) return filectxfn2(repo, memctx, path)
extra = util.build_extra(r.revnum, b, svn.uuid, svn.subdir) extra = util.build_extra(r.revnum, b, svn.uuid, svn.subdir)
if '' in files_touched: if '' in files_touched:
files_touched.remove('') files_touched.remove('')
excluded = [f for f in files_touched excluded = [f for f in files_touched
if not hg_editor._is_file_included(f)] if not hg_editor._is_file_included(f)]
for f in excluded: for f in excluded:
files_touched.remove(f) files_touched.remove(f)
@ -612,6 +649,9 @@ def stupid_svn_server_pull_rev(ui, svn, hg_editor, r):
date, date,
extra) extra)
ha = hg_editor.repo.commitctx(current_ctx) ha = hg_editor.repo.commitctx(current_ctx)
branch = extra.get('branch', None)
if not branch in hg_editor.branches:
hg_editor.branches[branch] = None, 0, r.revnum
hg_editor.add_to_revmap(r.revnum, b, ha) hg_editor.add_to_revmap(r.revnum, b, ha)
hg_editor._save_metadata() hg_editor._save_metadata()
util.describe_commit(ui, ha, b) util.describe_commit(ui, ha, b)

View File

@ -166,41 +166,111 @@ class HgChangeReceiver(delta.Editor):
pickle_atomic(self.branches, self.branch_info_file, self.meta_data_dir) pickle_atomic(self.branches, self.branch_info_file, self.meta_data_dir)
pickle_atomic(self.tags, self.tag_info_file, self.meta_data_dir) pickle_atomic(self.tags, self.tag_info_file, self.meta_data_dir)
def branches_in_paths(self, paths): def branches_in_paths(self, paths, revnum, checkpath, listdir):
'''Given a list of paths, return mapping of all branches touched '''Given a list of paths, return mapping of all branches touched
to their branch path. to their branch path.
''' '''
branches = {} branches = {}
paths_need_discovery = []
for p in paths: for p in paths:
relpath, branch, branchpath = self._split_branch_path(p) relpath, branch, branchpath = self._split_branch_path(p)
if relpath is not None: if relpath is not None:
branches[branch] = branchpath branches[branch] = branchpath
elif paths[p].action == 'D' and not self._is_path_tag(p):
ln = self._localname(p)
# must check in branches_to_delete as well, because this runs after we
# already updated the branch map
if ln in self.branches or ln in self.branches_to_delete:
branches[self._localname(p)] = p
else:
paths_need_discovery.append(p)
if paths_need_discovery:
paths_need_discovery = [(len(p), p) for p in paths_need_discovery]
paths_need_discovery.sort()
paths_need_discovery = [p[1] for p in paths_need_discovery]
actually_files = []
while paths_need_discovery:
p = paths_need_discovery.pop(0)
path_could_be_file = True
# TODO(augie) Figure out if you can use break here in a for loop, quick
# testing of that failed earlier.
ind = 0
while ind < len(paths_need_discovery) and not paths_need_discovery:
if op.startswith(p):
path_could_be_file = False
ind += 1
if path_could_be_file:
if checkpath(p, revnum) == 'f':
actually_files.append(p)
# if there's a copyfrom_path and there were files inside that copyfrom,
# we need to detect those branches. It's a little thorny and slow, but
# seems to be the best option.
elif paths[p].copyfrom_path and not p.startswith('tags/'):
paths_need_discovery.extend(['%s/%s' % (p,x[0])
for x in listdir(p, revnum)
if x[1] == 'f'])
if actually_files:
filepaths = [p.split('/') for p in actually_files]
filepaths = [(len(p), p) for p in filepaths]
filepaths.sort()
filepaths = [p[1] for p in filepaths]
while filepaths:
path = filepaths.pop(0)
parentdir = '/'.join(path[:-1])
filepaths = [p for p in filepaths if not '/'.join(p).startswith(parentdir)]
branchpath = self._normalize_path(parentdir)
branchname = self._localname(branchpath)
branches[branchname] = branchpath
return branches return branches
def _path_and_branch_for_path(self, path): def _path_and_branch_for_path(self, path, existing=True):
return self._split_branch_path(path)[:2] return self._split_branch_path(path, existing=existing)[:2]
def _split_branch_path(self, path): def _localname(self, path):
"""Compute the local name for a branch located at path.
"""
assert not path.startswith('tags/')
if path == 'trunk':
return None
elif path.startswith('branches/'):
return path[len('branches/'):]
return '../%s' % path
def _remotename(self, branch):
if branch == 'default' or branch is None:
return 'trunk'
elif branch.startswith('../'):
return branch[3:]
return 'branches/%s' % branch
def _split_branch_path(self, path, existing=True):
"""Figure out which branch inside our repo this path represents, and """Figure out which branch inside our repo this path represents, and
also figure out which path inside that branch it is. also figure out which path inside that branch it is.
Raises an exception if it can't perform its job. Returns a tuple of (path within branch, local branch name, server-side branch path).
If existing=True, will return None, None, None if the file isn't on some known
branch. If existing=False, then it will guess what the branch would be if it were
known.
""" """
path = self._normalize_path(path) path = self._normalize_path(path)
if path.startswith('trunk'): if path.startswith('tags/'):
p = path[len('trunk'):] return None, None, None
if p and p[0] == '/': test = ''
p = p[1:] path_comps = path.split('/')
return p, None, 'trunk' while self._localname(test) not in self.branches and len(path_comps):
elif path.startswith('branches/'): if not test:
p = path[len('branches/'):] test = path_comps.pop(0)
br = p.split('/')[0] else:
if br: test += '/%s' % path_comps.pop(0)
p = p[len(br)+1:] if self._localname(test) in self.branches:
if p and p[0] == '/': return path[len(test)+1:], self._localname(test), test
p = p[1:] if existing:
return p, br, 'branches/' + br return None, None, None
return None, None, None path = test.split('/')[-1]
test = '/'.join(test.split('/')[:-1])
return path, self._localname(test), test
def set_current_rev(self, rev): def set_current_rev(self, rev):
"""Set the revision we're currently converting. """Set the revision we're currently converting.
@ -234,7 +304,7 @@ class HgChangeReceiver(delta.Editor):
if path and path[0] == '/': if path and path[0] == '/':
path = path[1:] path = path[1:]
return path return path
def _is_file_included(self, subpath): def _is_file_included(self, subpath):
def checkpathinmap(path, mapping): def checkpathinmap(path, mapping):
def rpairs(name): def rpairs(name):
@ -243,14 +313,14 @@ class HgChangeReceiver(delta.Editor):
while e != -1: while e != -1:
yield name[:e], name[e+1:] yield name[:e], name[e+1:]
e = name.rfind('/', 0, e) e = name.rfind('/', 0, e)
for pre, suf in rpairs(path): for pre, suf in rpairs(path):
try: try:
return mapping[pre] return mapping[pre]
except KeyError, err: except KeyError, err:
pass pass
return None return None
if len(self.includepaths) and len(subpath): if len(self.includepaths) and len(subpath):
inc = checkpathinmap(subpath, self.includepaths) inc = checkpathinmap(subpath, self.includepaths)
else: else:
@ -268,10 +338,13 @@ class HgChangeReceiver(delta.Editor):
if subpath is None: if subpath is None:
return False return False
return self._is_file_included(subpath) return self._is_file_included(subpath)
def _is_path_tag(self, path): def _is_path_tag(self, path):
"""If path represents the path to a tag, returns the tag name. """If path could represent the path to a tag, returns the potential tag name.
Note that it's only a tag if it was copied from the path '' in a branch (or tag)
we have, for our purposes.
Otherwise, returns False. Otherwise, returns False.
""" """
@ -279,7 +352,7 @@ class HgChangeReceiver(delta.Editor):
for tags_path in self.tag_locations: for tags_path in self.tag_locations:
if path and (path.startswith(tags_path) and if path and (path.startswith(tags_path) and
len(path) > len('%s/' % tags_path)): len(path) > len('%s/' % tags_path)):
return path[len(tags_path)+1:].split('/')[0] return path[len(tags_path)+1:]
return False return False
def get_parent_svn_branch_and_rev(self, number, branch): def get_parent_svn_branch_and_rev(self, number, branch):
@ -321,6 +394,28 @@ class HgChangeReceiver(delta.Editor):
return self.revmap[r, br] return self.revmap[r, br]
return revlog.nullid return revlog.nullid
def _svnpath(self, branch):
"""Return the relative path in svn of branch.
"""
if branch == None or branch == 'default':
return 'trunk'
elif branch.startswith('../'):
return branch[3:]
return 'branches/%s' % branch
def __determine_parent_branch(self, p, src_path, src_rev, revnum):
if src_path is not None:
src_file, src_branch = self._path_and_branch_for_path(src_path)
src_tag = self._is_path_tag(src_path)
if src_tag != False:
# also case 2
src_branch, src_rev = self.tags[src_tag]
return {self._localname(p): (src_branch, src_rev, revnum )}
if src_file == '':
# case 2
return {self._localname(p): (src_branch, src_rev, revnum )}
return {}
def update_branch_tag_map_for_rev(self, revision): def update_branch_tag_map_for_rev(self, revision):
paths = revision.paths paths = revision.paths
added_branches = {} added_branches = {}
@ -328,37 +423,8 @@ class HgChangeReceiver(delta.Editor):
self.branches_to_delete = set() self.branches_to_delete = set()
tags_to_delete = set() tags_to_delete = set()
for p in sorted(paths): for p in sorted(paths):
fi, br = self._path_and_branch_for_path(p) t_name = self._is_path_tag(p)
if fi is not None: if t_name != False:
if fi == '' and paths[p].action != 'D':
src_p = paths[p].copyfrom_path
src_rev = paths[p].copyfrom_rev
src_tag = self._is_path_tag(src_p)
if not ((src_p and self._is_path_valid(src_p)) or
(src_tag and src_tag in self.tags)):
# The branch starts here and is not a copy
src_branch = None
src_rev = 0
elif src_tag:
# this is a branch created from a tag. Note that this
# really does happen (see Django)
src_branch, src_rev = self.tags[src_tag]
else:
# Not from a tag, and from a valid repo path
(src_p,
src_branch) = self._path_and_branch_for_path(src_p)
if src_p is None:
continue
if (br not in self.branches or
not (src_rev == 0 and src_branch == None)):
added_branches[br] = src_branch, src_rev, revision.revnum
elif fi == '' and br in self.branches:
self.branches_to_delete.add(br)
else:
t_name = self._is_path_tag(p)
if t_name == False:
continue
src_p, src_rev = paths[p].copyfrom_path, paths[p].copyfrom_rev src_p, src_rev = paths[p].copyfrom_path, paths[p].copyfrom_rev
# if you commit to a tag, I'm calling you stupid and ignoring # if you commit to a tag, I'm calling you stupid and ignoring
# you. # you.
@ -378,6 +444,41 @@ class HgChangeReceiver(delta.Editor):
elif (paths[p].action == 'D' and p.endswith(t_name) elif (paths[p].action == 'D' and p.endswith(t_name)
and t_name in self.tags): and t_name in self.tags):
tags_to_delete.add(t_name) tags_to_delete.add(t_name)
continue
# At this point we know the path is not a tag. In that case, we only care if it
# is the root of a new branch (in this function). This is determined by the
# following checks:
# 1. Is the file located inside any currently known branch?
# If yes, then we're done with it, this isn't interesting.
# 2. Does the file have copyfrom information that means it is a copy from the root
# of some other branch?
# If yes, then we're done: this is a new branch, and we record the copyfrom in
# added_branches
# 3. Neither of the above. This could be a branch, but it might never work out for
# us. It's only ever a branch (as far as we're concerned) if it gets committed
# to, which we have to detect at file-write time anyway. So we do nothing here.
# 4. It's the root of an already-known branch, with an action of 'D'. We mark the
# branch as deleted.
# 5. It's the parent directory of one or more already-known branches, so we mark them
# as deleted.
# 6. It's a branch being replaced by another branch - the action will be 'R'.
fi, br = self._path_and_branch_for_path(p)
if fi is not None:
if fi == '':
if paths[p].action == 'D':
self.branches_to_delete.add(br) # case 4
elif paths[p].action == 'R':
added_branches.update(self.__determine_parent_branch(p, paths[p].copyfrom_path,
paths[p].copyfrom_rev,
revision.revnum))
continue # case 1
if paths[p].action == 'D':
# check for case 5
for known in self.branches:
if self._svnpath(known).startswith(p):
self.branches_to_delete.add(br) # case 5
added_branches.update(self.__determine_parent_branch(p, paths[p].copyfrom_path,
paths[p].copyfrom_rev, revision.revnum))
for t in tags_to_delete: for t in tags_to_delete:
del self.tags[t] del self.tags[t]
for br in self.branches_to_delete: for br in self.branches_to_delete:
@ -556,6 +657,7 @@ class HgChangeReceiver(delta.Editor):
our_util.describe_commit(self.ui, new_hash, branch) our_util.describe_commit(self.ui, new_hash, branch)
if (rev.revnum, branch) not in self.revmap: if (rev.revnum, branch) not in self.revmap:
self.add_to_revmap(rev.revnum, branch, new_hash) self.add_to_revmap(rev.revnum, branch, new_hash)
self._save_metadata()
self.clear_current_info() self.clear_current_info()
def authorforsvnauthor(self, author): def authorforsvnauthor(self, author):
@ -741,9 +843,12 @@ class HgChangeReceiver(delta.Editor):
self.base_revision = None self.base_revision = None
if path in self.deleted_files: if path in self.deleted_files:
del self.deleted_files[path] del self.deleted_files[path]
fpath, branch = self._path_and_branch_for_path(path) fpath, branch = self._path_and_branch_for_path(path, existing=False)
if not fpath: if not fpath:
return return
if branch not in self.branches:
# we know this branch will exist now, because it has at least one file. Rock.
self.branches[branch] = None, 0, self.current_rev.revnum
self.current_file = path self.current_file = path
self.should_edit_most_recent_plaintext = False self.should_edit_most_recent_plaintext = False
if not copyfrom_path: if not copyfrom_path:
@ -790,13 +895,14 @@ class HgChangeReceiver(delta.Editor):
if not self._is_path_valid(copyfrom_path) and not tag: if not self._is_path_valid(copyfrom_path) and not tag:
self.missing_plaintexts.add('%s/' % path) self.missing_plaintexts.add('%s/' % path)
return path return path
if tag: if tag:
source_branch, source_rev = self.tags[tag] source_branch, source_rev = self.tags[tag]
cp_f = '' cp_f = ''
else: else:
source_rev = copyfrom_revision source_rev = copyfrom_revision
cp_f, source_branch = self._path_and_branch_for_path(copyfrom_path) cp_f, source_branch = self._path_and_branch_for_path(copyfrom_path)
if cp_f == '' and br_path == '':
self.branches[branch] = source_branch, source_rev, self.current_rev.revnum
new_hash = self.get_parent_revision(source_rev + 1, new_hash = self.get_parent_revision(source_rev + 1,
source_branch) source_branch)
if new_hash == node.nullid: if new_hash == node.nullid:

View File

@ -82,9 +82,12 @@ class TestBasicRepoLayout(test_util.TestBase):
def test_file_mixed_with_branches(self): def test_file_mixed_with_branches(self):
repo = self._load_fixture_and_fetch('file_mixed_with_branches.svndump') repo = self._load_fixture_and_fetch('file_mixed_with_branches.svndump')
self.assertEqual(node.hex(repo['tip'].node()), self.assertEqual(node.hex(repo['default'].node()),
'434ed487136c1b47c1e8f952edb4dc5a8e6328df') '434ed487136c1b47c1e8f952edb4dc5a8e6328df')
assert 'README' not in repo assert 'README' not in repo
self.assertEqual(repo['tip'].branch(),
'../branches')
def test_files_copied_from_outside_btt(self): def test_files_copied_from_outside_btt(self):
repo = self._load_fixture_and_fetch( repo = self._load_fixture_and_fetch(

View File

@ -53,7 +53,7 @@ class MapTests(test_util.TestBase):
def test_author_map_closing_author_stupid(self): def test_author_map_closing_author_stupid(self):
self.test_author_map_closing_author(True) self.test_author_map_closing_author(True)
def test_file_map(self, stupid=False): def test_file_map(self, stupid=False):
test_util.load_svndump_fixture(self.repo_path, 'replace_trunk_with_branch.svndump') test_util.load_svndump_fixture(self.repo_path, 'replace_trunk_with_branch.svndump')
filemap = open(self.filemap, 'w') filemap = open(self.filemap, 'w')
@ -66,10 +66,10 @@ class MapTests(test_util.TestBase):
filemap=self.filemap) filemap=self.filemap)
self.assertEqual(node.hex(self.repo[0].node()), '88e2c7492d83e4bf30fbb2dcbf6aa24d60ac688d') self.assertEqual(node.hex(self.repo[0].node()), '88e2c7492d83e4bf30fbb2dcbf6aa24d60ac688d')
self.assertEqual(node.hex(self.repo['default'].node()), 'e524296152246b3837fe9503c83b727075835155') self.assertEqual(node.hex(self.repo['default'].node()), 'e524296152246b3837fe9503c83b727075835155')
def test_file_map_stupid(self): def test_file_map_stupid(self):
self.test_file_map(True) self.test_file_map(True)
def test_file_map_exclude(self, stupid=False): def test_file_map_exclude(self, stupid=False):
test_util.load_svndump_fixture(self.repo_path, 'replace_trunk_with_branch.svndump') test_util.load_svndump_fixture(self.repo_path, 'replace_trunk_with_branch.svndump')
filemap = open(self.filemap, 'w') filemap = open(self.filemap, 'w')
@ -81,8 +81,8 @@ class MapTests(test_util.TestBase):
stupid=stupid, stupid=stupid,
filemap=self.filemap) filemap=self.filemap)
self.assertEqual(node.hex(self.repo[0].node()), '2c48f3525926ab6c8b8424bcf5eb34b149b61841') self.assertEqual(node.hex(self.repo[0].node()), '2c48f3525926ab6c8b8424bcf5eb34b149b61841')
self.assertEqual(node.hex(self.repo['default'].node()), '86fc12d173716139d5bd1d36866038d355009f45') self.assertEqual(node.hex(self.repo['default'].node()), 'b37a3c0297b71f989064d9b545b5a478bbed7cc1')
def test_file_map_exclude_stupid(self): def test_file_map_exclude_stupid(self):
self.test_file_map_exclude(True) self.test_file_map_exclude(True)