copies: rewrite copy detection for non-merge users

The existing copy detection API was designed with merge in mind and
was ill-suited for doing status/diff. The new pathcopies
implementation gives more accurate, easier to use results for
comparing two revisions, and is much simpler to understand.

Test notes:

- test-mv-cp-st.t results finds more renames in the reverse direction now
- test-mq-merge.t was always wrong and duplicated a copy in diff that
  was already present in one of the parent revisions
This commit is contained in:
Matt Mackall 2012-01-04 17:55:30 -06:00
parent 9bfa890ee6
commit 48738cad22
4 changed files with 92 additions and 19 deletions

View File

@ -5206,17 +5206,7 @@ def status(ui, repo, *pats, **opts):
changestates = zip(states, 'MAR!?IC', stat)
if (opts.get('all') or opts.get('copies')) and not opts.get('no_status'):
ctx1 = repo[node1]
ctx2 = repo[node2]
added = stat[1]
if node2 is None:
added = stat[0] + stat[1] # merged?
for k, v in copies.pathcopies(ctx1, ctx2).iteritems():
if k in added:
copy[k] = v
elif v in added:
copy[v] = k
copy = copies.pathcopies(repo[node1], repo[node2])
for state, char, files in changestates:
if state in show:

View File

@ -84,8 +84,89 @@ def _findlimit(repo, a, b):
return None
return limit
def pathcopies(c1, c2):
return mergecopies(c1._repo, c1, c2, c1._repo["null"], False)[0]
def _chain(src, dst, a, b):
'''chain two sets of copies a->b'''
t = a.copy()
for k, v in b.iteritems():
if v in t:
# found a chain
if t[v] != k:
# file wasn't renamed back to itself
t[k] = t[v]
if v not in dst:
# chain was a rename, not a copy
del t[v]
if v in src:
# file is a copy of an existing file
t[k] = v
return t
def _tracefile(fctx, actx):
'''return file context that is the ancestor of fctx present in actx'''
stop = actx.rev()
am = actx.manifest()
for f in fctx.ancestors():
if am.get(f.path(), None) == f.filenode():
return f
if f.rev() < stop:
return None
def _dirstatecopies(d):
ds = d._repo.dirstate
c = ds.copies().copy()
for k in c.keys():
if ds[k] not in 'anm':
del c[k]
return c
def _forwardcopies(a, b):
'''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
# check for working copy
w = None
if b.rev() is None:
w = b
b = w.p1()
if a == b:
# short-circuit to avoid issues with merge states
return _dirstatecopies(w)
# find where new files came from
# we currently don't try to find where old files went, too expensive
# this means we can miss a case like 'hg rm b; hg cp a b'
cm = {}
for f in b:
if f not in a:
ofctx = _tracefile(b[f], a)
if ofctx:
cm[f] = ofctx.path()
# combine copies from dirstate if necessary
if w is not None:
cm = _chain(a, w, cm, _dirstatecopies(w))
return cm
def _backwardcopies(a, b):
# because the forward mapping is 1:n, we can lose renames here
# in particular, we find renames better than copies
f = _forwardcopies(b, a)
r = {}
for k, v in f.iteritems():
r[v] = k
return r
def pathcopies(x, y):
'''find {dst@y: src@x} copy mapping for directed compare'''
if x == y or not x or not y:
return {}
a = y.ancestor(x)
if a == x:
return _forwardcopies(x, y)
if a == y:
return _backwardcopies(x, y)
return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))
def mergecopies(repo, c1, c2, ca, checkdirs=True):
"""

View File

@ -149,13 +149,11 @@ Check patcha is still a git patch:
-b
+a
+c
diff --git a/a b/aa
copy from a
copy to aa
--- a/a
diff --git a/aa b/aa
new file mode 100644
--- /dev/null
+++ b/aa
@@ -1,1 +1,1 @@
-b
@@ -0,0 +1,1 @@
+a
Check patcha2 is still a regular patch:

View File

@ -560,6 +560,7 @@ $4 - test description
- parent to root: --rev . --rev 0
M a
b
R b
diff --git a/a b/a
@ -611,6 +612,7 @@ $4 - test description
- parent to branch: --rev . --rev 2
M a
b
A x/y
R b
@ -906,6 +908,7 @@ $4 - test description
- parent to root: --rev . --rev 0
M a
b
R b
R c
@ -975,6 +978,7 @@ $4 - test description
- parent to branch: --rev . --rev 2
M a
b
A x/y
R b
R c