mirror of
https://github.com/facebook/sapling.git
synced 2024-10-06 23:07:18 +03:00
patch: support diff data loss detection and upgrade
In worst case, generating diff in upgrade mode can be two times more expensive than generating it in git mode directly: we may have to regenerate the whole diff again whenever a git feature is detected. Also, the first diff attempt is completely buffered instead of being streamed. That said, even without having profiled it yet, I am convinced we can fast-path the upgrade mode if necessary were it to be used in regular diff commands, and not only in mq where avoiding data loss is worth the price.
This commit is contained in:
parent
9bddfaea09
commit
d6ce43b965
@ -27,7 +27,9 @@ class diffopts(object):
|
||||
nodates removes dates from diff headers
|
||||
ignorews ignores all whitespace changes in the diff
|
||||
ignorewsamount ignores changes in the amount of whitespace
|
||||
ignoreblanklines ignores changes whose lines are all blank'''
|
||||
ignoreblanklines ignores changes whose lines are all blank
|
||||
upgrade generates git diffs to avoid data loss
|
||||
'''
|
||||
|
||||
defaults = {
|
||||
'context': 3,
|
||||
@ -38,6 +40,7 @@ class diffopts(object):
|
||||
'ignorews': False,
|
||||
'ignorewsamount': False,
|
||||
'ignoreblanklines': False,
|
||||
'upgrade': False,
|
||||
}
|
||||
|
||||
__slots__ = defaults.keys()
|
||||
|
@ -1246,17 +1246,25 @@ def b85diff(to, tn):
|
||||
ret.append('\n')
|
||||
return ''.join(ret)
|
||||
|
||||
def _addmodehdr(header, omode, nmode):
|
||||
if omode != nmode:
|
||||
header.append('old mode %s\n' % omode)
|
||||
header.append('new mode %s\n' % nmode)
|
||||
class GitDiffRequired(Exception):
|
||||
pass
|
||||
|
||||
def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None):
|
||||
def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None,
|
||||
losedatafn=None):
|
||||
'''yields diff of changes to files between two nodes, or node and
|
||||
working directory.
|
||||
|
||||
if node1 is None, use first dirstate parent instead.
|
||||
if node2 is None, compare node1 with working directory.'''
|
||||
if node2 is None, compare node1 with working directory.
|
||||
|
||||
losedatafn(**kwarg) is a callable run when opts.upgrade=True and
|
||||
every time some change cannot be represented with the current
|
||||
patch format. Return False to upgrade to git patch format, True to
|
||||
accept the loss or raise an exception to abort the diff. It is
|
||||
called with the name of current file being diffed as 'fn'. If set
|
||||
to None, patches will always be upgraded to git format when
|
||||
necessary.
|
||||
'''
|
||||
|
||||
if opts is None:
|
||||
opts = mdiff.defaultopts
|
||||
@ -1288,25 +1296,51 @@ def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None):
|
||||
modified, added, removed = changes[:3]
|
||||
|
||||
if not modified and not added and not removed:
|
||||
return
|
||||
|
||||
date1 = util.datestr(ctx1.date())
|
||||
man1 = ctx1.manifest()
|
||||
return []
|
||||
|
||||
revs = None
|
||||
if not repo.ui.quiet and not opts.git:
|
||||
if not repo.ui.quiet:
|
||||
hexfunc = repo.ui.debugflag and hex or short
|
||||
revs = [hexfunc(node) for node in [node1, node2] if node]
|
||||
|
||||
if opts.git:
|
||||
copy, diverge = copies.copies(repo, ctx1, ctx2, repo[nullid])
|
||||
copy = {}
|
||||
if opts.git or opts.upgrade:
|
||||
copy = copies.copies(repo, ctx1, ctx2, repo[nullid])[0]
|
||||
copy = copy.copy()
|
||||
for k, v in copy.items():
|
||||
copy[v] = k
|
||||
|
||||
difffn = lambda opts, losedata: trydiff(repo, revs, ctx1, ctx2,
|
||||
modified, added, removed, copy, getfilectx, opts, losedata)
|
||||
if opts.upgrade and not opts.git:
|
||||
try:
|
||||
def losedata(fn):
|
||||
if not losedatafn or not losedatafn(fn=fn):
|
||||
raise GitDiffRequired()
|
||||
# Buffer the whole output until we are sure it can be generated
|
||||
return list(difffn(opts.copy(git=False), losedata))
|
||||
except GitDiffRequired:
|
||||
return difffn(opts.copy(git=True), None)
|
||||
else:
|
||||
return difffn(opts, None)
|
||||
|
||||
def _addmodehdr(header, omode, nmode):
|
||||
if omode != nmode:
|
||||
header.append('old mode %s\n' % omode)
|
||||
header.append('new mode %s\n' % nmode)
|
||||
|
||||
def trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
|
||||
copy, getfilectx, opts, losedatafn):
|
||||
|
||||
date1 = util.datestr(ctx1.date())
|
||||
man1 = ctx1.manifest()
|
||||
|
||||
gone = set()
|
||||
gitmode = {'l': '120000', 'x': '100755', '': '100644'}
|
||||
|
||||
if opts.git:
|
||||
revs = None
|
||||
|
||||
for f in sorted(modified + added + removed):
|
||||
to = None
|
||||
tn = None
|
||||
@ -1317,39 +1351,61 @@ def diff(repo, node1=None, node2=None, match=None, changes=None, opts=None):
|
||||
if f not in removed:
|
||||
tn = getfilectx(f, ctx2).data()
|
||||
a, b = f, f
|
||||
if opts.git:
|
||||
if opts.git or losedatafn:
|
||||
if f in added:
|
||||
mode = gitmode[ctx2.flags(f)]
|
||||
if f in copy:
|
||||
a = copy[f]
|
||||
omode = gitmode[man1.flags(a)]
|
||||
_addmodehdr(header, omode, mode)
|
||||
if a in removed and a not in gone:
|
||||
op = 'rename'
|
||||
gone.add(a)
|
||||
if opts.git:
|
||||
a = copy[f]
|
||||
omode = gitmode[man1.flags(a)]
|
||||
_addmodehdr(header, omode, mode)
|
||||
if a in removed and a not in gone:
|
||||
op = 'rename'
|
||||
gone.add(a)
|
||||
else:
|
||||
op = 'copy'
|
||||
header.append('%s from %s\n' % (op, a))
|
||||
header.append('%s to %s\n' % (op, f))
|
||||
to = getfilectx(a, ctx1).data()
|
||||
else:
|
||||
op = 'copy'
|
||||
header.append('%s from %s\n' % (op, a))
|
||||
header.append('%s to %s\n' % (op, f))
|
||||
to = getfilectx(a, ctx1).data()
|
||||
losedatafn(f)
|
||||
else:
|
||||
header.append('new file mode %s\n' % mode)
|
||||
if opts.git:
|
||||
header.append('new file mode %s\n' % mode)
|
||||
elif ctx2.flags(f):
|
||||
losedatafn(f)
|
||||
if util.binary(tn):
|
||||
dodiff = 'binary'
|
||||
if opts.git:
|
||||
dodiff = 'binary'
|
||||
else:
|
||||
losedatafn(f)
|
||||
if not opts.git and not tn:
|
||||
# regular diffs cannot represent new empty file
|
||||
losedatafn(f)
|
||||
elif f in removed:
|
||||
# have we already reported a copy above?
|
||||
if f in copy and copy[f] in added and copy[copy[f]] == f:
|
||||
dodiff = False
|
||||
else:
|
||||
header.append('deleted file mode %s\n' %
|
||||
gitmode[man1.flags(f)])
|
||||
if opts.git:
|
||||
# have we already reported a copy above?
|
||||
if f in copy and copy[f] in added and copy[copy[f]] == f:
|
||||
dodiff = False
|
||||
else:
|
||||
header.append('deleted file mode %s\n' %
|
||||
gitmode[man1.flags(f)])
|
||||
elif not to:
|
||||
# regular diffs cannot represent empty file deletion
|
||||
losedatafn(f)
|
||||
else:
|
||||
omode = gitmode[man1.flags(f)]
|
||||
nmode = gitmode[ctx2.flags(f)]
|
||||
_addmodehdr(header, omode, nmode)
|
||||
if util.binary(to) or util.binary(tn):
|
||||
dodiff = 'binary'
|
||||
header.insert(0, mdiff.diffline(revs, a, b, opts))
|
||||
oflag = man1.flags(f)
|
||||
nflag = ctx2.flags(f)
|
||||
binary = util.binary(to) or util.binary(tn)
|
||||
if opts.git:
|
||||
_addmodehdr(header, gitmode[oflag], gitmode[nflag])
|
||||
if binary:
|
||||
dodiff = 'binary'
|
||||
elif binary or nflag != oflag:
|
||||
losedatafn(f)
|
||||
if opts.git:
|
||||
header.insert(0, mdiff.diffline(revs, a, b, opts))
|
||||
|
||||
if dodiff:
|
||||
if dodiff == 'binary':
|
||||
text = b85diff(to, tn)
|
||||
|
46
tests/autodiff.py
Normal file
46
tests/autodiff.py
Normal file
@ -0,0 +1,46 @@
|
||||
# Extension dedicated to test patch.diff() upgrade modes
|
||||
#
|
||||
#
|
||||
from mercurial import cmdutil, patch, util
|
||||
|
||||
def autodiff(ui, repo, *pats, **opts):
|
||||
diffopts = patch.diffopts(ui, opts)
|
||||
git = opts.get('git', 'no')
|
||||
brokenfiles = set()
|
||||
losedatafn = None
|
||||
if git in ('yes', 'no'):
|
||||
diffopts.git = git == 'yes'
|
||||
diffopts.upgrade = False
|
||||
elif git == 'auto':
|
||||
diffopts.git = False
|
||||
diffopts.upgrade = True
|
||||
elif git == 'warn':
|
||||
diffopts.git = False
|
||||
diffopts.upgrade = True
|
||||
def losedatafn(fn=None, **kwargs):
|
||||
brokenfiles.add(fn)
|
||||
return True
|
||||
elif git == 'abort':
|
||||
diffopts.git = False
|
||||
diffopts.upgrade = True
|
||||
def losedatafn(fn=None, **kwargs):
|
||||
raise util.Abort('losing data for %s' % fn)
|
||||
else:
|
||||
raise util.Abort('--git must be yes, no or auto')
|
||||
|
||||
node1, node2 = cmdutil.revpair(repo, [])
|
||||
m = cmdutil.match(repo, pats, opts)
|
||||
it = patch.diff(repo, node1, node2, match=m, opts=diffopts,
|
||||
losedatafn=losedatafn)
|
||||
for chunk in it:
|
||||
ui.write(chunk)
|
||||
for fn in sorted(brokenfiles):
|
||||
ui.write('data lost for: %s\n' % fn)
|
||||
|
||||
cmdtable = {
|
||||
"autodiff":
|
||||
(autodiff,
|
||||
[('', 'git', '', 'git upgrade mode (yes/no/auto/warn/abort)'),
|
||||
],
|
||||
'[OPTION]... [FILE]...'),
|
||||
}
|
63
tests/test-diff-upgrade
Executable file
63
tests/test-diff-upgrade
Executable file
@ -0,0 +1,63 @@
|
||||
#!/bin/sh
|
||||
|
||||
echo "[extensions]" >> $HGRCPATH
|
||||
echo "autodiff=$TESTDIR/autodiff.py" >> $HGRCPATH
|
||||
echo "[diff]" >> $HGRCPATH
|
||||
echo "nodates=1" >> $HGRCPATH
|
||||
|
||||
hg init repo
|
||||
cd repo
|
||||
echo '% make a combination of new, changed and deleted file'
|
||||
echo regular > regular
|
||||
echo rmregular > rmregular
|
||||
touch rmempty
|
||||
echo exec > exec
|
||||
chmod +x exec
|
||||
echo rmexec > rmexec
|
||||
chmod +x rmexec
|
||||
echo setexec > setexec
|
||||
echo unsetexec > unsetexec
|
||||
chmod +x unsetexec
|
||||
echo binary > binary
|
||||
python -c "file('rmbinary', 'wb').write('\0')"
|
||||
hg ci -Am addfiles
|
||||
echo regular >> regular
|
||||
echo newregular >> newregular
|
||||
rm rmempty
|
||||
touch newempty
|
||||
rm rmregular
|
||||
echo exec >> exec
|
||||
echo newexec > newexec
|
||||
chmod +x newexec
|
||||
rm rmexec
|
||||
chmod +x setexec
|
||||
chmod -x unsetexec
|
||||
python -c "file('binary', 'wb').write('\0\0')"
|
||||
python -c "file('newbinary', 'wb').write('\0')"
|
||||
rm rmbinary
|
||||
hg addremove
|
||||
|
||||
echo '% git=no: regular diff for all files'
|
||||
hg autodiff --git=no
|
||||
|
||||
echo '% git=no: git diff for single regular file'
|
||||
hg autodiff --git=yes regular
|
||||
|
||||
echo '% git=auto: regular diff for regular files and removals'
|
||||
hg autodiff --git=auto regular newregular rmregular rmbinary rmexec
|
||||
|
||||
for f in exec newexec setexec unsetexec binary newbinary newempty rmempty; do
|
||||
echo '% git=auto: git diff for' $f
|
||||
hg autodiff --git=auto $f
|
||||
done
|
||||
|
||||
echo '% git=warn: regular diff with data loss warnings'
|
||||
hg autodiff --git=warn
|
||||
|
||||
echo '% git=abort: fail on execute bit change'
|
||||
hg autodiff --git=abort regular setexec
|
||||
|
||||
echo '% git=abort: succeed on regular file'
|
||||
hg autodiff --git=abort regular
|
||||
|
||||
cd ..
|
186
tests/test-diff-upgrade.out
Normal file
186
tests/test-diff-upgrade.out
Normal file
@ -0,0 +1,186 @@
|
||||
% make a combination of new, changed and deleted file
|
||||
adding binary
|
||||
adding exec
|
||||
adding regular
|
||||
adding rmbinary
|
||||
adding rmempty
|
||||
adding rmexec
|
||||
adding rmregular
|
||||
adding setexec
|
||||
adding unsetexec
|
||||
adding newbinary
|
||||
adding newempty
|
||||
adding newexec
|
||||
adding newregular
|
||||
removing rmbinary
|
||||
removing rmempty
|
||||
removing rmexec
|
||||
removing rmregular
|
||||
% git=no: regular diff for all files
|
||||
diff -r b3f053cd7c7f binary
|
||||
Binary file binary has changed
|
||||
diff -r b3f053cd7c7f exec
|
||||
--- a/exec
|
||||
+++ b/exec
|
||||
@@ -1,1 +1,2 @@
|
||||
exec
|
||||
+exec
|
||||
diff -r b3f053cd7c7f newbinary
|
||||
Binary file newbinary has changed
|
||||
diff -r b3f053cd7c7f newexec
|
||||
--- /dev/null
|
||||
+++ b/newexec
|
||||
@@ -0,0 +1,1 @@
|
||||
+newexec
|
||||
diff -r b3f053cd7c7f newregular
|
||||
--- /dev/null
|
||||
+++ b/newregular
|
||||
@@ -0,0 +1,1 @@
|
||||
+newregular
|
||||
diff -r b3f053cd7c7f regular
|
||||
--- a/regular
|
||||
+++ b/regular
|
||||
@@ -1,1 +1,2 @@
|
||||
regular
|
||||
+regular
|
||||
diff -r b3f053cd7c7f rmbinary
|
||||
Binary file rmbinary has changed
|
||||
diff -r b3f053cd7c7f rmexec
|
||||
--- a/rmexec
|
||||
+++ /dev/null
|
||||
@@ -1,1 +0,0 @@
|
||||
-rmexec
|
||||
diff -r b3f053cd7c7f rmregular
|
||||
--- a/rmregular
|
||||
+++ /dev/null
|
||||
@@ -1,1 +0,0 @@
|
||||
-rmregular
|
||||
% git=no: git diff for single regular file
|
||||
diff --git a/regular b/regular
|
||||
--- a/regular
|
||||
+++ b/regular
|
||||
@@ -1,1 +1,2 @@
|
||||
regular
|
||||
+regular
|
||||
% git=auto: regular diff for regular files and removals
|
||||
diff -r b3f053cd7c7f newregular
|
||||
--- /dev/null
|
||||
+++ b/newregular
|
||||
@@ -0,0 +1,1 @@
|
||||
+newregular
|
||||
diff -r b3f053cd7c7f regular
|
||||
--- a/regular
|
||||
+++ b/regular
|
||||
@@ -1,1 +1,2 @@
|
||||
regular
|
||||
+regular
|
||||
diff -r b3f053cd7c7f rmbinary
|
||||
Binary file rmbinary has changed
|
||||
diff -r b3f053cd7c7f rmexec
|
||||
--- a/rmexec
|
||||
+++ /dev/null
|
||||
@@ -1,1 +0,0 @@
|
||||
-rmexec
|
||||
diff -r b3f053cd7c7f rmregular
|
||||
--- a/rmregular
|
||||
+++ /dev/null
|
||||
@@ -1,1 +0,0 @@
|
||||
-rmregular
|
||||
% git=auto: git diff for exec
|
||||
diff -r b3f053cd7c7f exec
|
||||
--- a/exec
|
||||
+++ b/exec
|
||||
@@ -1,1 +1,2 @@
|
||||
exec
|
||||
+exec
|
||||
% git=auto: git diff for newexec
|
||||
diff --git a/newexec b/newexec
|
||||
new file mode 100755
|
||||
--- /dev/null
|
||||
+++ b/newexec
|
||||
@@ -0,0 +1,1 @@
|
||||
+newexec
|
||||
% git=auto: git diff for setexec
|
||||
diff --git a/setexec b/setexec
|
||||
old mode 100644
|
||||
new mode 100755
|
||||
% git=auto: git diff for unsetexec
|
||||
diff --git a/unsetexec b/unsetexec
|
||||
old mode 100755
|
||||
new mode 100644
|
||||
% git=auto: git diff for binary
|
||||
diff --git a/binary b/binary
|
||||
index a9128c283485202893f5af379dd9beccb6e79486..09f370e38f498a462e1ca0faa724559b6630c04f
|
||||
GIT binary patch
|
||||
literal 2
|
||||
Jc${Nk0000200961
|
||||
|
||||
% git=auto: git diff for newbinary
|
||||
diff --git a/newbinary b/newbinary
|
||||
new file mode 100644
|
||||
index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d
|
||||
GIT binary patch
|
||||
literal 1
|
||||
Ic${MZ000310RR91
|
||||
|
||||
% git=auto: git diff for newempty
|
||||
diff --git a/newempty b/newempty
|
||||
new file mode 100644
|
||||
% git=auto: git diff for rmempty
|
||||
diff --git a/rmempty b/rmempty
|
||||
deleted file mode 100644
|
||||
% git=warn: regular diff with data loss warnings
|
||||
diff -r b3f053cd7c7f binary
|
||||
Binary file binary has changed
|
||||
diff -r b3f053cd7c7f exec
|
||||
--- a/exec
|
||||
+++ b/exec
|
||||
@@ -1,1 +1,2 @@
|
||||
exec
|
||||
+exec
|
||||
diff -r b3f053cd7c7f newbinary
|
||||
Binary file newbinary has changed
|
||||
diff -r b3f053cd7c7f newexec
|
||||
--- /dev/null
|
||||
+++ b/newexec
|
||||
@@ -0,0 +1,1 @@
|
||||
+newexec
|
||||
diff -r b3f053cd7c7f newregular
|
||||
--- /dev/null
|
||||
+++ b/newregular
|
||||
@@ -0,0 +1,1 @@
|
||||
+newregular
|
||||
diff -r b3f053cd7c7f regular
|
||||
--- a/regular
|
||||
+++ b/regular
|
||||
@@ -1,1 +1,2 @@
|
||||
regular
|
||||
+regular
|
||||
diff -r b3f053cd7c7f rmbinary
|
||||
Binary file rmbinary has changed
|
||||
diff -r b3f053cd7c7f rmexec
|
||||
--- a/rmexec
|
||||
+++ /dev/null
|
||||
@@ -1,1 +0,0 @@
|
||||
-rmexec
|
||||
diff -r b3f053cd7c7f rmregular
|
||||
--- a/rmregular
|
||||
+++ /dev/null
|
||||
@@ -1,1 +0,0 @@
|
||||
-rmregular
|
||||
data lost for: binary
|
||||
data lost for: newbinary
|
||||
data lost for: newempty
|
||||
data lost for: newexec
|
||||
data lost for: rmempty
|
||||
data lost for: setexec
|
||||
data lost for: unsetexec
|
||||
% git=abort: fail on execute bit change
|
||||
abort: losing data for setexec
|
||||
% git=abort: succeed on regular file
|
||||
diff -r b3f053cd7c7f regular
|
||||
--- a/regular
|
||||
+++ b/regular
|
||||
@@ -1,1 +1,2 @@
|
||||
regular
|
||||
+regular
|
Loading…
Reference in New Issue
Block a user