mirror of
https://github.com/facebook/sapling.git
synced 2024-10-11 01:07:15 +03:00
530 lines
21 KiB
Python
530 lines
21 KiB
Python
import cStringIO
|
|
import re
|
|
import os
|
|
import shutil
|
|
import tempfile
|
|
|
|
from mercurial import patch
|
|
from mercurial import node
|
|
from mercurial import context
|
|
from mercurial import revlog
|
|
from mercurial import util as merc_util
|
|
from svn import core
|
|
from svn import delta
|
|
|
|
import hg_delta_editor
|
|
import svnwrap
|
|
import util
|
|
|
|
|
|
def print_your_svn_is_old_message(ui): #pragma: no cover
|
|
ui.status("In light of that, I'll fall back and do diffs, but it won't do "
|
|
"as good a job. You should really upgrade your server.\n")
|
|
|
|
|
|
@util.register_subcommand('pull')
|
|
def fetch_revisions(ui, svn_url, hg_repo_path, skipto_rev=0, stupid=None,
|
|
tag_locations='tags',
|
|
**opts):
|
|
"""Pull new revisions from Subversion.
|
|
"""
|
|
old_encoding = merc_util._encoding
|
|
merc_util._encoding = 'UTF-8'
|
|
skipto_rev=int(skipto_rev)
|
|
have_replay = not stupid
|
|
if have_replay and not callable(delta.svn_txdelta_apply(None, None,
|
|
None)[0]): #pragma: no cover
|
|
ui.status('You are using old Subversion SWIG bindings. Replay will not'
|
|
' work until you upgrade to 1.5.0 or newer. Falling back to'
|
|
' a slower method that may be buggier. Please upgrade, or'
|
|
' contribute a patch to use the ctypes bindings instead'
|
|
' of SWIG.\n')
|
|
have_replay = False
|
|
initializing_repo = False
|
|
svn = svnwrap.SubversionRepo(svn_url, username=merc_util.getuser())
|
|
author_host = "@%s" % svn.uuid
|
|
tag_locations = tag_locations.split(',')
|
|
hg_editor = hg_delta_editor.HgChangeReceiver(hg_repo_path,
|
|
ui_=ui,
|
|
subdir=svn.subdir,
|
|
author_host=author_host,
|
|
tag_locations=tag_locations)
|
|
if os.path.exists(hg_editor.uuid_file):
|
|
uuid = open(hg_editor.uuid_file).read()
|
|
assert uuid == svn.uuid
|
|
start = int(open(hg_editor.last_revision_handled_file, 'r').read())
|
|
else:
|
|
open(hg_editor.uuid_file, 'w').write(svn.uuid)
|
|
open(hg_editor.svn_url_file, 'w').write(svn_url)
|
|
open(hg_editor.last_revision_handled_file, 'w').write(str(0))
|
|
initializing_repo = True
|
|
start = skipto_rev
|
|
|
|
# start converting revisions
|
|
for r in svn.revisions(start=start):
|
|
valid = False
|
|
hg_editor.update_branch_tag_map_for_rev(r)
|
|
for p in r.paths:
|
|
if hg_editor._is_path_valid(p):
|
|
valid = True
|
|
continue
|
|
if initializing_repo and start > 0:
|
|
assert False, 'This feature not ready yet.'
|
|
if valid:
|
|
# got a 502? Try more than once!
|
|
tries = 0
|
|
converted = False
|
|
while not converted and tries < 3:
|
|
try:
|
|
ui.status('converting %s\n' % r)
|
|
if have_replay:
|
|
try:
|
|
replay_convert_rev(hg_editor, svn, r)
|
|
except svnwrap.SubversionRepoCanNotReplay, e: #pragma: no cover
|
|
ui.status('%s\n' % e.message)
|
|
print_your_svn_is_old_message(ui)
|
|
have_replay = False
|
|
stupid_svn_server_pull_rev(ui, svn, hg_editor, r)
|
|
else:
|
|
stupid_svn_server_pull_rev(ui, svn, hg_editor, r)
|
|
converted = True
|
|
tmpfile = '%s_tmp' % hg_editor.last_revision_handled_file
|
|
fp = open(tmpfile, 'w')
|
|
fp.write(str(r.revnum))
|
|
fp.close()
|
|
merc_util.rename(tmpfile,
|
|
hg_editor.last_revision_handled_file)
|
|
except core.SubversionException, e: #pragma: no cover
|
|
if hasattr(e, 'message') and (
|
|
'Server sent unexpected return value (502 Bad Gateway)'
|
|
' in response to PROPFIND') in e.message:
|
|
tries += 1
|
|
ui.status('Got a 502, retrying (%s)\n' % tries)
|
|
else:
|
|
raise
|
|
merc_util._encoding = old_encoding
|
|
|
|
|
|
def cleanup_file_handles(svn, count):
|
|
if count % 50 == 0:
|
|
svn.init_ra_and_client()
|
|
|
|
def replay_convert_rev(hg_editor, svn, r):
|
|
hg_editor.set_current_rev(r)
|
|
svn.get_replay(r.revnum, hg_editor)
|
|
i = 1
|
|
if hg_editor.missing_plaintexts:
|
|
hg_editor.ui.status('Fetching %s files that could not use replay.\n' %
|
|
len(hg_editor.missing_plaintexts))
|
|
files_to_grab = set()
|
|
rootpath = svn.subdir and svn.subdir[1:] or ''
|
|
for p in hg_editor.missing_plaintexts:
|
|
hg_editor.ui.status('.')
|
|
hg_editor.ui.flush()
|
|
if p[-1] == '/':
|
|
dirpath = p[len(rootpath):]
|
|
files_to_grab.update((dirpath + f for f,k in
|
|
svn.list_files(p, r.revnum) if k == 'f'))
|
|
else:
|
|
files_to_grab.add(p[len(rootpath):])
|
|
hg_editor.ui.status('\nFetching files...\n')
|
|
for p in files_to_grab:
|
|
hg_editor.ui.status('.')
|
|
hg_editor.ui.flush()
|
|
cleanup_file_handles(svn, i)
|
|
i += 1
|
|
data, mode = svn.get_file(p, r.revnum)
|
|
hg_editor.set_file(p, data, 'x' in mode, 'l' in mode)
|
|
hg_editor.missing_plaintexts = set()
|
|
hg_editor.ui.status('\n')
|
|
hg_editor.commit_current_delta()
|
|
|
|
|
|
binary_file_re = re.compile(r'''Index: ([^\n]*)
|
|
=*
|
|
Cannot display: file marked as a binary type.''')
|
|
|
|
property_exec_set_re = re.compile(r'''Property changes on: ([^\n]*)
|
|
_*
|
|
(?:Added|Name): svn:executable
|
|
\+ \*
|
|
''')
|
|
|
|
property_exec_removed_re = re.compile(r'''Property changes on: ([^\n]*)
|
|
_*
|
|
(?:Deleted|Name): svn:executable
|
|
- \*
|
|
''')
|
|
|
|
empty_file_patch_wont_make_re = re.compile(r'''Index: ([^\n]*)\n=*\n(?=Index:)''')
|
|
|
|
any_file_re = re.compile(r'''^Index: ([^\n]*)\n=*\n''', re.MULTILINE)
|
|
|
|
property_special_set_re = re.compile(r'''Property changes on: ([^\n]*)
|
|
_*
|
|
(?:Added|Name): svn:special
|
|
\+ \*
|
|
''')
|
|
|
|
property_special_removed_re = re.compile(r'''Property changes on: ([^\n]*)
|
|
_*
|
|
(?:Deleted|Name): svn:special
|
|
\- \*
|
|
''')
|
|
|
|
def make_diff_path(b):
|
|
if b == None:
|
|
return 'trunk'
|
|
return 'branches/' + b
|
|
|
|
def makecopyfinder(r, branchpath, rootdir):
|
|
"""Return a function detecting copies.
|
|
|
|
Returned copyfinder(path) returns None if no copy information can
|
|
be found or ((source, sourcerev), sourcepath) where "sourcepath" is the
|
|
copy source path, "sourcerev" the source svn revision and "source" is the
|
|
copy record path causing the copy to occur. If a single file was copied
|
|
"sourcepath" and "source" are the same, while file copies dectected from
|
|
directory copies return the copied source directory in "source".
|
|
"""
|
|
# filter copy information for current branch
|
|
branchpath = branchpath + '/'
|
|
fullbranchpath = rootdir + branchpath
|
|
copies = []
|
|
for path, e in r.paths.iteritems():
|
|
if not e.copyfrom_path:
|
|
continue
|
|
if not path.startswith(branchpath):
|
|
continue
|
|
if not e.copyfrom_path.startswith(fullbranchpath):
|
|
# ignore cross branch copies
|
|
continue
|
|
dest = path[len(branchpath):]
|
|
source = e.copyfrom_path[len(fullbranchpath):]
|
|
copies.append((dest, (source, e.copyfrom_rev)))
|
|
|
|
copies.sort()
|
|
copies.reverse()
|
|
exactcopies = dict(copies)
|
|
|
|
def finder(path):
|
|
if path in exactcopies:
|
|
return exactcopies[path], exactcopies[path][0]
|
|
# look for parent directory copy, longest first
|
|
for dest, (source, sourcerev) in copies:
|
|
dest = dest + '/'
|
|
if not path.startswith(dest):
|
|
continue
|
|
sourcepath = source + '/' + path[len(dest):]
|
|
return (source, sourcerev), sourcepath
|
|
return None
|
|
|
|
return finder
|
|
|
|
def getcopies(svn, hg_editor, branch, branchpath, r, files, parentid):
|
|
"""Return a mapping {dest: source} for every file copied into r.
|
|
"""
|
|
if parentid == revlog.nullid:
|
|
return {}
|
|
|
|
# Extract svn copy information, group them by copy source.
|
|
# The idea is to duplicate the replay behaviour where copies are
|
|
# evaluated per copy event (one event for all files in a directory copy,
|
|
# one event for single file copy). We assume that copy events match
|
|
# copy sources in revision info.
|
|
svncopies = {}
|
|
finder = makecopyfinder(r, branchpath, svn.subdir)
|
|
for f in files:
|
|
copy = finder(f)
|
|
if copy:
|
|
svncopies.setdefault(copy[0], []).append((f, copy[1]))
|
|
if not svncopies:
|
|
return {}
|
|
|
|
# cache changeset contexts and map them to source svn revisions
|
|
parentctx = hg_editor.repo.changectx(parentid)
|
|
ctxs = {}
|
|
def getctx(svnrev):
|
|
if svnrev in ctxs:
|
|
return ctxs[svnrev]
|
|
changeid = hg_editor.get_parent_revision(svnrev + 1, branch)
|
|
ctx = None
|
|
if changeid != revlog.nullid:
|
|
ctx = hg_editor.repo.changectx(changeid)
|
|
ctxs[svnrev] = ctx
|
|
return ctx
|
|
|
|
# check svn copies really make sense in mercurial
|
|
hgcopies = {}
|
|
for (sourcepath, rev), copies in svncopies.iteritems():
|
|
sourcectx = getctx(rev)
|
|
if sourcectx is None:
|
|
continue
|
|
sources = [s[1] for s in copies]
|
|
if not hg_editor.aresamefiles(sourcectx, parentctx, sources):
|
|
continue
|
|
hgcopies.update(copies)
|
|
return hgcopies
|
|
|
|
def stupid_fetch_branchrev(svn, hg_editor, branch, branchpath, r, parentid):
|
|
"""Extract all 'branch' content at a given revision.
|
|
|
|
Return a tuple (files, filectxfn) where 'files' is the list of all files
|
|
in the branch at the given revision, and 'filectxfn' is a memctx compatible
|
|
callable to retrieve individual file information.
|
|
"""
|
|
parentctx = hg_editor.repo[parentid]
|
|
kind = svn.checkpath(branchpath, r.revnum)
|
|
if kind is None:
|
|
# Branch does not exist at this revision. Get parent revision and
|
|
# remove everything.
|
|
files = parentctx.manifest().keys()
|
|
def filectxfn_rm(repo, memctx, path):
|
|
raise IOError()
|
|
return files, filectxfn_rm
|
|
|
|
files = []
|
|
if parentid == revlog.nullid:
|
|
# Initial revision, fetch all files
|
|
for path, kind in svn.list_files(branchpath, r.revnum):
|
|
if kind == 'f':
|
|
files.append(path)
|
|
else:
|
|
branchprefix = branchpath + '/'
|
|
for path, e in r.paths.iteritems():
|
|
if not path.startswith(branchprefix):
|
|
continue
|
|
kind = svn.checkpath(path, r.revnum)
|
|
path = path[len(branchprefix):]
|
|
if kind == 'f':
|
|
files.append(path)
|
|
elif kind == 'd':
|
|
if e.action == 'M':
|
|
# Ignore property changes for now
|
|
continue
|
|
dirpath = branchprefix + path
|
|
for child, k in svn.list_files(dirpath, r.revnum):
|
|
if k == 'f':
|
|
files.append(path + '/' + child)
|
|
else:
|
|
if path in parentctx:
|
|
files.append(path)
|
|
continue
|
|
# Assume it's a deleted directory
|
|
path = path + '/'
|
|
deleted = [f for f in parentctx if f.startswith(path)]
|
|
files += deleted
|
|
|
|
copies = getcopies(svn, hg_editor, branch, branchpath, r, files, parentid)
|
|
|
|
def filectxfn(repo, memctx, path):
|
|
data, mode = svn.get_file(branchpath + '/' + path, r.revnum)
|
|
isexec = 'x' in mode
|
|
islink = 'l' in mode
|
|
copied = copies.get(path)
|
|
return context.memfilectx(path=path, data=data, islink=islink,
|
|
isexec=isexec, copied=copied)
|
|
|
|
return files, filectxfn
|
|
|
|
def stupid_svn_server_pull_rev(ui, svn, hg_editor, r):
|
|
# this server fails at replay
|
|
branches = hg_editor.branches_in_paths(r.paths)
|
|
temp_location = os.path.join(hg_editor.path, '.hg', 'svn', 'temp')
|
|
if not os.path.exists(temp_location):
|
|
os.makedirs(temp_location)
|
|
for b in branches:
|
|
our_tempdir = None
|
|
diff_path = make_diff_path(b)
|
|
parent_rev, br_p = hg_editor.get_parent_svn_branch_and_rev(r.revnum, b)
|
|
parent_ha = hg_editor.get_parent_revision(r.revnum, b)
|
|
files_touched = set()
|
|
try:
|
|
if br_p == b:
|
|
# letting patch handle binaries sounded
|
|
# cool, but it breaks patch in sad ways
|
|
d = svn.get_unified_diff(diff_path, r.revnum, deleted=False,
|
|
ignore_type=False)
|
|
else:
|
|
d = svn.get_unified_diff(diff_path, r.revnum,
|
|
other_path=make_diff_path(br_p),
|
|
other_rev=parent_rev,
|
|
deleted=True, ignore_type=True)
|
|
if d:
|
|
ui.status('Branch creation with mods, pulling full rev.\n')
|
|
raise BadPatchApply()
|
|
|
|
our_tempdir = tempfile.mkdtemp('svn_fetch_temp', dir=temp_location)
|
|
opener = merc_util.opener(our_tempdir)
|
|
for m in binary_file_re.findall(d):
|
|
# we have to pull each binary file by hand as a fulltext,
|
|
# which sucks but we've got no choice
|
|
files_touched.add(m)
|
|
try:
|
|
f = opener(m, 'w')
|
|
f.write(svn.get_file(diff_path+'/'+m, r.revnum)[0])
|
|
f.close()
|
|
except IOError:
|
|
pass
|
|
d2 = empty_file_patch_wont_make_re.sub('', d)
|
|
d2 = property_exec_set_re.sub('', d2)
|
|
d2 = property_exec_removed_re.sub('', d2)
|
|
for f in any_file_re.findall(d):
|
|
if f in files_touched:
|
|
# this check is here because modified binary files will get
|
|
# created before here.
|
|
continue
|
|
files_touched.add(f)
|
|
data = ''
|
|
if f in hg_editor.repo[parent_ha]:
|
|
data = hg_editor.repo[parent_ha][f].data()
|
|
fp = opener(f, 'w')
|
|
fp.write(data)
|
|
fp.close()
|
|
if d2.strip() and len(re.findall('\n[-+]', d2.strip())) > 0:
|
|
old_cwd = os.getcwd()
|
|
os.chdir(our_tempdir)
|
|
changed = {}
|
|
try:
|
|
patch_st = patch.applydiff(ui, cStringIO.StringIO(d2),
|
|
changed, strip=0)
|
|
except patch.PatchError:
|
|
# TODO: this happens if the svn server has the wrong mime
|
|
# type stored and doesn't know a file is binary. It would
|
|
# be better to do one file at a time and only do a
|
|
# full fetch on files that had problems.
|
|
os.chdir(old_cwd)
|
|
raise BadPatchApply()
|
|
for x in changed.iterkeys():
|
|
ui.status('M %s\n' % x)
|
|
files_touched.add(x)
|
|
os.chdir(old_cwd)
|
|
# if this patch didn't apply right, fall back to exporting the
|
|
# entire rev.
|
|
if patch_st == -1:
|
|
parent_ctx = hg_editor.repo[parent_ha]
|
|
parent_manifest = parent_ctx.manifest()
|
|
for fn in files_touched:
|
|
if (fn in parent_manifest and
|
|
'l' in parent_ctx.filectx(fn).flags()):
|
|
# I think this might be an underlying bug in svn -
|
|
# I get diffs of deleted symlinks even though I
|
|
# specifically said no deletes above.
|
|
ui.status('Pulling whole rev because of a deleted'
|
|
'symlink')
|
|
raise BadPatchApply()
|
|
assert False, ('This should only happen on case-insensitive'
|
|
' volumes.')
|
|
elif patch_st == 1:
|
|
# When converting Django, I saw fuzz on .po files that was
|
|
# causing revisions to end up failing verification. If that
|
|
# can be fixed, maybe this won't ever be reached.
|
|
ui.status('There was some fuzz, not using diff after all.')
|
|
raise BadPatchApply()
|
|
else:
|
|
ui.status('Not using patch for %s, diff had no hunks.\n' %
|
|
r.revnum)
|
|
|
|
# we create the files if they don't exist here because we know
|
|
# that we'll never have diff info for a deleted file, so if the
|
|
# property is set, we should force the file to exist no matter what.
|
|
exec_files = {}
|
|
for m in property_exec_removed_re.findall(d):
|
|
exec_files[m] = False
|
|
for m in property_exec_set_re.findall(d):
|
|
exec_files[m] = True
|
|
for m in exec_files:
|
|
files_touched.add(m)
|
|
f = os.path.join(our_tempdir, m)
|
|
if not os.path.exists(f):
|
|
data = ''
|
|
if m in hg_editor.repo[parent_ha]:
|
|
data = hg_editor.repo[parent_ha][m].data()
|
|
fp = opener(m, 'w')
|
|
fp.write(data)
|
|
fp.close()
|
|
link_files = {}
|
|
for m in property_special_set_re.findall(d):
|
|
# TODO(augie) when a symlink is removed, patching will fail.
|
|
# We're seeing that above - there's gotta be a better
|
|
# workaround than just bailing like that.
|
|
path = os.path.join(our_tempdir, m)
|
|
assert os.path.exists(path)
|
|
link_path = open(path).read()
|
|
link_path = link_path[len('link '):]
|
|
os.remove(path)
|
|
link_files[m] = link_path
|
|
files_touched.add(m)
|
|
|
|
deleted_files = set()
|
|
for p in r.paths:
|
|
if p.startswith(diff_path) and r.paths[p].action == 'D':
|
|
p2 = p[len(diff_path)+1:].strip('/')
|
|
if p2 in hg_editor.repo[parent_ha]:
|
|
deleted_files.add(p2)
|
|
continue
|
|
# If this isn't in the parent ctx, it must've been a dir
|
|
deleted_files.update([f for f in hg_editor.repo[parent_ha]
|
|
if f.startswith(p2 + '/')])
|
|
files_touched.update(deleted_files)
|
|
|
|
copies = getcopies(svn, hg_editor, b, branches[b], r, files_touched,
|
|
parent_ha)
|
|
|
|
def filectxfn(repo, memctx, path):
|
|
if path in deleted_files:
|
|
raise IOError()
|
|
if path in link_files:
|
|
return context.memfilectx(path=path, data=link_files[path],
|
|
islink=True, isexec=False,
|
|
copied=False)
|
|
data = opener(path).read()
|
|
exe = exec_files.get(path, None)
|
|
if exe is None and path in hg_editor.repo[parent_ha]:
|
|
exe = 'x' in hg_editor.repo[parent_ha].filectx(path).flags()
|
|
copied = copies.get(path)
|
|
return context.memfilectx(path=path, data=data, islink=False,
|
|
isexec=exe, copied=copied)
|
|
except (core.SubversionException,
|
|
BadPatchApply,
|
|
svnwrap.SubversionRepoCanNotDiff), e:
|
|
if (hasattr(e, 'apr_err') and e.apr_err != 160013):
|
|
raise
|
|
# Either this revision or the previous one does not exist.
|
|
ui.status("fetching entire rev previous rev does not exist.\n")
|
|
files_touched, filectxfn = stupid_fetch_branchrev(
|
|
svn, hg_editor, b, branches[b], r, parent_ha)
|
|
|
|
date = r.date.replace('T', ' ').replace('Z', '').split('.')[0]
|
|
date += ' -0000'
|
|
extra = {}
|
|
if b:
|
|
extra['branch'] = b
|
|
if '' in files_touched:
|
|
files_touched.remove('')
|
|
if parent_ha != node.nullid or files_touched:
|
|
# TODO(augie) remove this debug code? Or maybe it's sane to have it.
|
|
for f in files_touched:
|
|
if f:
|
|
assert f[0] != '/'
|
|
current_ctx = context.memctx(hg_editor.repo,
|
|
[parent_ha, revlog.nullid],
|
|
r.message or '...',
|
|
files_touched,
|
|
filectxfn,
|
|
'%s%s' % (r.author,
|
|
hg_editor.author_host),
|
|
date,
|
|
extra)
|
|
ha = hg_editor.repo.commitctx(current_ctx)
|
|
hg_editor.add_to_revmap(r.revnum, b, ha)
|
|
hg_editor._save_metadata()
|
|
ui.status('committed as %s on branch %s\n' %
|
|
(node.hex(ha), b or 'default'))
|
|
if our_tempdir is not None:
|
|
shutil.rmtree(our_tempdir)
|
|
|
|
|
|
class BadPatchApply(Exception):
|
|
pass
|