patch: support diff data loss detection and upgrade

In worst case, generating diff in upgrade mode can be two times more expensive
than generating it in git mode directly: we may have to regenerate the whole
diff again whenever a git feature is detected. Also, the first diff attempt is
completely buffered instead of being streamed. That said, even without having
profiled it yet, I am convinced we can fast-path the upgrade mode if necessary
were it to be used in regular diff commands, and not only in mq where avoiding
data loss is worth the price.
This commit is contained in:
Patrick Mezard 2010-01-01 20:54:05 +01:00
parent 9bddfaea09
commit d6ce43b965
5 changed files with 393 additions and 39 deletions

View File

@ -27,7 +27,9 @@ class diffopts(object):
nodates removes dates from diff headers
ignorews ignores all whitespace changes in the diff
ignorewsamount ignores changes in the amount of whitespace
ignoreblanklines ignores changes whose lines are all blank'''
ignoreblanklines ignores changes whose lines are all blank
upgrade generates git diffs to avoid data loss
'''
defaults = {
'context': 3,
@ -38,6 +40,7 @@ class diffopts(object):
'ignorews': False,
'ignorewsamount': False,
'ignoreblanklines': False,
'upgrade': False,
}
__slots__ = defaults.keys()

View File

@ -1246,17 +1246,25 @@ def b85diff(to, tn):
ret.append('\n')
return ''.join(ret)
def _addmodehdr(header, omode, nmode):
if omode != nmode:
header.append('old mode %s\n' % omode)
header.append('new mode %s\n' % nmode)
class GitDiffRequired(Exception):
pass
def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None):
def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None,
losedatafn=None):
'''yields diff of changes to files between two nodes, or node and
working directory.
if node1 is None, use first dirstate parent instead.
if node2 is None, compare node1 with working directory.'''
if node2 is None, compare node1 with working directory.
losedatafn(**kwarg) is a callable run when opts.upgrade=True and
every time some change cannot be represented with the current
patch format. Return False to upgrade to git patch format, True to
accept the loss or raise an exception to abort the diff. It is
called with the name of current file being diffed as 'fn'. If set
to None, patches will always be upgraded to git format when
necessary.
'''
if opts is None:
opts = mdiff.defaultopts
@ -1288,25 +1296,51 @@ def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None):
modified, added, removed = changes[:3]
if not modified and not added and not removed:
return
date1 = util.datestr(ctx1.date())
man1 = ctx1.manifest()
return []
revs = None
if not repo.ui.quiet and not opts.git:
if not repo.ui.quiet:
hexfunc = repo.ui.debugflag and hex or short
revs = [hexfunc(node) for node in [node1, node2] if node]
if opts.git:
copy, diverge = copies.copies(repo, ctx1, ctx2, repo[nullid])
copy = {}
if opts.git or opts.upgrade:
copy = copies.copies(repo, ctx1, ctx2, repo[nullid])[0]
copy = copy.copy()
for k, v in copy.items():
copy[v] = k
difffn = lambda opts, losedata: trydiff(repo, revs, ctx1, ctx2,
modified, added, removed, copy, getfilectx, opts, losedata)
if opts.upgrade and not opts.git:
try:
def losedata(fn):
if not losedatafn or not losedatafn(fn=fn):
raise GitDiffRequired()
# Buffer the whole output until we are sure it can be generated
return list(difffn(opts.copy(git=False), losedata))
except GitDiffRequired:
return difffn(opts.copy(git=True), None)
else:
return difffn(opts, None)
def _addmodehdr(header, omode, nmode):
if omode != nmode:
header.append('old mode %s\n' % omode)
header.append('new mode %s\n' % nmode)
def trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
copy, getfilectx, opts, losedatafn):
date1 = util.datestr(ctx1.date())
man1 = ctx1.manifest()
gone = set()
gitmode = {'l': '120000', 'x': '100755', '': '100644'}
if opts.git:
revs = None
for f in sorted(modified + added + removed):
to = None
tn = None
@ -1317,39 +1351,61 @@ def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None):
if f not in removed:
tn = getfilectx(f, ctx2).data()
a, b = f, f
if opts.git:
if opts.git or losedatafn:
if f in added:
mode = gitmode[ctx2.flags(f)]
if f in copy:
a = copy[f]
omode = gitmode[man1.flags(a)]
_addmodehdr(header, omode, mode)
if a in removed and a not in gone:
op = 'rename'
gone.add(a)
if opts.git:
a = copy[f]
omode = gitmode[man1.flags(a)]
_addmodehdr(header, omode, mode)
if a in removed and a not in gone:
op = 'rename'
gone.add(a)
else:
op = 'copy'
header.append('%s from %s\n' % (op, a))
header.append('%s to %s\n' % (op, f))
to = getfilectx(a, ctx1).data()
else:
op = 'copy'
header.append('%s from %s\n' % (op, a))
header.append('%s to %s\n' % (op, f))
to = getfilectx(a, ctx1).data()
losedatafn(f)
else:
header.append('new file mode %s\n' % mode)
if opts.git:
header.append('new file mode %s\n' % mode)
elif ctx2.flags(f):
losedatafn(f)
if util.binary(tn):
dodiff = 'binary'
if opts.git:
dodiff = 'binary'
else:
losedatafn(f)
if not opts.git and not tn:
# regular diffs cannot represent new empty file
losedatafn(f)
elif f in removed:
# have we already reported a copy above?
if f in copy and copy[f] in added and copy[copy[f]] == f:
dodiff = False
else:
header.append('deleted file mode %s\n' %
gitmode[man1.flags(f)])
if opts.git:
# have we already reported a copy above?
if f in copy and copy[f] in added and copy[copy[f]] == f:
dodiff = False
else:
header.append('deleted file mode %s\n' %
gitmode[man1.flags(f)])
elif not to:
# regular diffs cannot represent empty file deletion
losedatafn(f)
else:
omode = gitmode[man1.flags(f)]
nmode = gitmode[ctx2.flags(f)]
_addmodehdr(header, omode, nmode)
if util.binary(to) or util.binary(tn):
dodiff = 'binary'
header.insert(0, mdiff.diffline(revs, a, b, opts))
oflag = man1.flags(f)
nflag = ctx2.flags(f)
binary = util.binary(to) or util.binary(tn)
if opts.git:
_addmodehdr(header, gitmode[oflag], gitmode[nflag])
if binary:
dodiff = 'binary'
elif binary or nflag != oflag:
losedatafn(f)
if opts.git:
header.insert(0, mdiff.diffline(revs, a, b, opts))
if dodiff:
if dodiff == 'binary':
text = b85diff(to, tn)

46
tests/autodiff.py Normal file
View File

@ -0,0 +1,46 @@
# Extension dedicated to test patch.diff() upgrade modes
#
#
from mercurial import cmdutil, patch, util
def autodiff(ui, repo, *pats, **opts):
diffopts = patch.diffopts(ui, opts)
git = opts.get('git', 'no')
brokenfiles = set()
losedatafn = None
if git in ('yes', 'no'):
diffopts.git = git == 'yes'
diffopts.upgrade = False
elif git == 'auto':
diffopts.git = False
diffopts.upgrade = True
elif git == 'warn':
diffopts.git = False
diffopts.upgrade = True
def losedatafn(fn=None, **kwargs):
brokenfiles.add(fn)
return True
elif git == 'abort':
diffopts.git = False
diffopts.upgrade = True
def losedatafn(fn=None, **kwargs):
raise util.Abort('losing data for %s' % fn)
else:
raise util.Abort('--git must be yes, no or auto')
node1, node2 = cmdutil.revpair(repo, [])
m = cmdutil.match(repo, pats, opts)
it = patch.diff(repo, node1, node2, match=m, opts=diffopts,
losedatafn=losedatafn)
for chunk in it:
ui.write(chunk)
for fn in sorted(brokenfiles):
ui.write('data lost for: %s\n' % fn)
cmdtable = {
"autodiff":
(autodiff,
[('', 'git', '', 'git upgrade mode (yes/no/auto/warn/abort)'),
],
'[OPTION]... [FILE]...'),
}

63
tests/test-diff-upgrade Executable file
View File

@ -0,0 +1,63 @@
#!/bin/sh
echo "[extensions]" >> $HGRCPATH
echo "autodiff=$TESTDIR/autodiff.py" >> $HGRCPATH
echo "[diff]" >> $HGRCPATH
echo "nodates=1" >> $HGRCPATH
hg init repo
cd repo
echo '% make a combination of new, changed and deleted file'
echo regular > regular
echo rmregular > rmregular
touch rmempty
echo exec > exec
chmod +x exec
echo rmexec > rmexec
chmod +x rmexec
echo setexec > setexec
echo unsetexec > unsetexec
chmod +x unsetexec
echo binary > binary
python -c "file('rmbinary', 'wb').write('\0')"
hg ci -Am addfiles
echo regular >> regular
echo newregular >> newregular
rm rmempty
touch newempty
rm rmregular
echo exec >> exec
echo newexec > newexec
chmod +x newexec
rm rmexec
chmod +x setexec
chmod -x unsetexec
python -c "file('binary', 'wb').write('\0\0')"
python -c "file('newbinary', 'wb').write('\0')"
rm rmbinary
hg addremove
echo '% git=no: regular diff for all files'
hg autodiff --git=no
echo '% git=no: git diff for single regular file'
hg autodiff --git=yes regular
echo '% git=auto: regular diff for regular files and removals'
hg autodiff --git=auto regular newregular rmregular rmbinary rmexec
for f in exec newexec setexec unsetexec binary newbinary newempty rmempty; do
echo '% git=auto: git diff for' $f
hg autodiff --git=auto $f
done
echo '% git=warn: regular diff with data loss warnings'
hg autodiff --git=warn
echo '% git=abort: fail on execute bit change'
hg autodiff --git=abort regular setexec
echo '% git=abort: succeed on regular file'
hg autodiff --git=abort regular
cd ..

186
tests/test-diff-upgrade.out Normal file
View File

@ -0,0 +1,186 @@
% make a combination of new, changed and deleted file
adding binary
adding exec
adding regular
adding rmbinary
adding rmempty
adding rmexec
adding rmregular
adding setexec
adding unsetexec
adding newbinary
adding newempty
adding newexec
adding newregular
removing rmbinary
removing rmempty
removing rmexec
removing rmregular
% git=no: regular diff for all files
diff -r b3f053cd7c7f binary
Binary file binary has changed
diff -r b3f053cd7c7f exec
--- a/exec
+++ b/exec
@@ -1,1 +1,2 @@
exec
+exec
diff -r b3f053cd7c7f newbinary
Binary file newbinary has changed
diff -r b3f053cd7c7f newexec
--- /dev/null
+++ b/newexec
@@ -0,0 +1,1 @@
+newexec
diff -r b3f053cd7c7f newregular
--- /dev/null
+++ b/newregular
@@ -0,0 +1,1 @@
+newregular
diff -r b3f053cd7c7f regular
--- a/regular
+++ b/regular
@@ -1,1 +1,2 @@
regular
+regular
diff -r b3f053cd7c7f rmbinary
Binary file rmbinary has changed
diff -r b3f053cd7c7f rmexec
--- a/rmexec
+++ /dev/null
@@ -1,1 +0,0 @@
-rmexec
diff -r b3f053cd7c7f rmregular
--- a/rmregular
+++ /dev/null
@@ -1,1 +0,0 @@
-rmregular
% git=no: git diff for single regular file
diff --git a/regular b/regular
--- a/regular
+++ b/regular
@@ -1,1 +1,2 @@
regular
+regular
% git=auto: regular diff for regular files and removals
diff -r b3f053cd7c7f newregular
--- /dev/null
+++ b/newregular
@@ -0,0 +1,1 @@
+newregular
diff -r b3f053cd7c7f regular
--- a/regular
+++ b/regular
@@ -1,1 +1,2 @@
regular
+regular
diff -r b3f053cd7c7f rmbinary
Binary file rmbinary has changed
diff -r b3f053cd7c7f rmexec
--- a/rmexec
+++ /dev/null
@@ -1,1 +0,0 @@
-rmexec
diff -r b3f053cd7c7f rmregular
--- a/rmregular
+++ /dev/null
@@ -1,1 +0,0 @@
-rmregular
% git=auto: git diff for exec
diff -r b3f053cd7c7f exec
--- a/exec
+++ b/exec
@@ -1,1 +1,2 @@
exec
+exec
% git=auto: git diff for newexec
diff --git a/newexec b/newexec
new file mode 100755
--- /dev/null
+++ b/newexec
@@ -0,0 +1,1 @@
+newexec
% git=auto: git diff for setexec
diff --git a/setexec b/setexec
old mode 100644
new mode 100755
% git=auto: git diff for unsetexec
diff --git a/unsetexec b/unsetexec
old mode 100755
new mode 100644
% git=auto: git diff for binary
diff --git a/binary b/binary
index a9128c283485202893f5af379dd9beccb6e79486..09f370e38f498a462e1ca0faa724559b6630c04f
GIT binary patch
literal 2
Jc${Nk0000200961
% git=auto: git diff for newbinary
diff --git a/newbinary b/newbinary
new file mode 100644
index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d
GIT binary patch
literal 1
Ic${MZ000310RR91
% git=auto: git diff for newempty
diff --git a/newempty b/newempty
new file mode 100644
% git=auto: git diff for rmempty
diff --git a/rmempty b/rmempty
deleted file mode 100644
% git=warn: regular diff with data loss warnings
diff -r b3f053cd7c7f binary
Binary file binary has changed
diff -r b3f053cd7c7f exec
--- a/exec
+++ b/exec
@@ -1,1 +1,2 @@
exec
+exec
diff -r b3f053cd7c7f newbinary
Binary file newbinary has changed
diff -r b3f053cd7c7f newexec
--- /dev/null
+++ b/newexec
@@ -0,0 +1,1 @@
+newexec
diff -r b3f053cd7c7f newregular
--- /dev/null
+++ b/newregular
@@ -0,0 +1,1 @@
+newregular
diff -r b3f053cd7c7f regular
--- a/regular
+++ b/regular
@@ -1,1 +1,2 @@
regular
+regular
diff -r b3f053cd7c7f rmbinary
Binary file rmbinary has changed
diff -r b3f053cd7c7f rmexec
--- a/rmexec
+++ /dev/null
@@ -1,1 +0,0 @@
-rmexec
diff -r b3f053cd7c7f rmregular
--- a/rmregular
+++ /dev/null
@@ -1,1 +0,0 @@
-rmregular
data lost for: binary
data lost for: newbinary
data lost for: newempty
data lost for: newexec
data lost for: rmempty
data lost for: setexec
data lost for: unsetexec
% git=abort: fail on execute bit change
abort: losing data for setexec
% git=abort: succeed on regular file
diff -r b3f053cd7c7f regular
--- a/regular
+++ b/regular
@@ -1,1 +1,2 @@
regular
+regular