copytrace: add fast copytracing for amends

This commit is contained in:
Mark Thomas 2017-07-18 08:52:20 +01:00
parent 4fdf562a93
commit 9d389170da
2 changed files with 422 additions and 0 deletions

View File

@ -22,20 +22,34 @@
# limits the number of heuristically found move candidates to check # limits the number of heuristically found move candidates to check
maxmovescandidatestocheck = 5 maxmovescandidatestocheck = 5
# whether to enable fast copytracing during amends (requires fastcopytrace)
# to be enabled.
enableamendcopytrace = True
# how many previous commits to search through when looking for amend
# copytrace data.
amendcopytracecommitlimit = 100
''' '''
from collections import defaultdict from collections import defaultdict
from mercurial import ( from mercurial import (
cmdutil,
commands, commands,
copies as copiesmod, copies as copiesmod,
dispatch, dispatch,
error,
extensions, extensions,
filemerge, filemerge,
node, node,
phases,
util, util,
) )
from mercurial.i18n import _ from mercurial.i18n import _
import anydbm
import json
import os import os
import time import time
@ -71,6 +85,7 @@ def extsetup(ui):
extensions.wrapfunction(filemerge, '_filemerge', _filemerge) extensions.wrapfunction(filemerge, '_filemerge', _filemerge)
extensions.wrapfunction(copiesmod, 'mergecopies', _mergecopies) extensions.wrapfunction(copiesmod, 'mergecopies', _mergecopies)
extensions.wrapfunction(cmdutil, 'amend', _amend)
def _filemerge(origfunc, premerge, repo, mynode, orig, fcd, fco, fca, def _filemerge(origfunc, premerge, repo, mynode, orig, fcd, fco, fca,
labels=None, *args, **kwargs): labels=None, *args, **kwargs):
@ -110,6 +125,155 @@ def _runcommand(orig, lui, repo, cmd, fullargs, ui, *args, **kwargs):
False, "--tracecopies") False, "--tracecopies")
return orig(lui, repo, cmd, fullargs, ui, *args, **kwargs) return orig(lui, repo, cmd, fullargs, ui, *args, **kwargs)
def _amend(orig, ui, repo, commitfunc, old, extra, pats, opts):
"""Wraps amend to collect copytrace data on amend
If a file is created in one commit, modified in a subsequent commit, and
then renamed or copied by amending the original commit, restacking the
commits that modify the file will fail:
file modified here B B' restack of B to B' will fail
| :
file created here A --> A' file renamed in amended commit
| /
o --
This function collects information about copies and renames from amend
commits, and saves it for use during rebases onto the amend commit. This
lets rebases onto files that been renamed or copied in an amend commit
work without conflicts.
When an amend commit is created, mercurial first creates a temporary
intermediate commit that contains the amendments, and then merges these
two commits into the single amended commit:
intermediate_ctx o
|
orig_ctx o o amended_ctx
| /
o parent of original commit
This function finds the intermediate commit and stores its copytrace
information against the amended commit in a separate dbm file. Later,
in _domergecopies, this information will be merged with the rebase
copytrace data to incorporate renames and copies made during the amend.
"""
node = orig(ui, repo, commitfunc, old, extra, pats, opts)
# Check if amend copytracing has been disabled.
if not ui.configbool("copytrace", "enableamendcopytrace", True):
return node
# Find the amended commit context and the intermediate context that
# was used for the amend. The two commits were created in sequence, so
# the intermediate commit will be the commit with a revision number one
# before the amended commit. This commit is filtered, so look at the
# unfiltered view of the repo to access it.
amended_ctx = repo[node]
intermediate_ctx = repo.unfiltered()[amended_ctx.rev() - 1]
# Sanity check that both contexts have a single parent. The intermediate
# context's parent is the original commit, so its parent should be the
# same as the amended commit's, and it should have the temporary commit
# message that the amend function gave it. If any of this is not true,
# then bail out. Otherwise, find the original commit.
if (len(amended_ctx.parents()) != 1 or
len(intermediate_ctx.parents()) != 1 or
intermediate_ctx.p1().parents() != amended_ctx.parents() or
intermediate_ctx.description() !=
'temporary amend commit for %s' % intermediate_ctx.p1()):
return node
orig_ctx = intermediate_ctx.p1()
# Find the amend-copies, and store them against the amended context.
amend_copies = copiesmod.pathcopies(orig_ctx, intermediate_ctx)
if amend_copies:
path = repo.vfs.join('amendcopytrace')
try:
# Open the database, creating it if it doesn't already exist.
db = anydbm.open(path, 'c')
except anydbm.error as e:
# Database locked, can't record these amend-copies.
ui.log('copytrace', 'Failed to open amendcopytrace db: %s' % e)
return node
# Merge in any existing amend copies from any previous amends.
try:
orig_data = db.get(orig_ctx.node(), '{}')
except anydbm.error as e:
ui.log('copytrace',
'Failed to read key %s from amendcopytrace db: %s' %
(orig_ctx.hex(), e))
return node
orig_encoded = json.loads(orig_data)
orig_amend_copies = dict((k.decode('base64'), v.decode('base64'))
for (k, v) in orig_encoded.iteritems())
# Copytrace information is not valid if it refers to a file that
# doesn't exist in a commit. We need to update or remove entries
# that refer to files that might have only existed in the previous
# amend commit.
#
# Find chained copies and renames (a -> b -> c) and collapse them to
# (a -> c). Delete the entry for b if this was a rename.
for dst, src in amend_copies.iteritems():
if src in orig_amend_copies:
amend_copies[dst] = orig_amend_copies[src]
if src not in amended_ctx:
del orig_amend_copies[src]
# Copy any left over copies from the previous context.
for dst, src in orig_amend_copies.iteritems():
if dst not in amend_copies:
amend_copies[dst] = src
# Write out the entry for the new amend commit.
encoded = dict((k.encode('base64'), v.encode('base64'))
for (k, v) in amend_copies.iteritems())
db[node] = json.dumps(encoded)
try:
db.close()
except Exception as e:
# Database corruption. Not much we can do, so just log.
ui.log('copytrace', 'Failed to close amendcopytrace db: %s' % e)
return node
def _getamendcopies(repo, dest, ancestor):
path = repo.vfs.join('amendcopytrace')
try:
db = anydbm.open(path, 'r')
except anydbm.error:
return {}
try:
ctx = dest
count = 0
limit = repo.ui.configint('copytrace', 'amendcopytracecommitlimit', 100)
# Search for the ancestor commit that has amend copytrace data. This
# will be the most recent amend commit if we are rebasing onto an
# amend commit. If we reach the common ancestor or a public commit,
# then there is no amend copytrace data to be found.
while ctx.node() not in db:
ctx = ctx.p1()
count += 1
if ctx == ancestor or count > limit or ctx.phase() == phases.public:
return {}
# Load the amend copytrace data from this commit.
encoded = json.loads(db[ctx.node()])
return dict((k.decode('base64'), v.decode('base64'))
for (k, v) in encoded.iteritems())
except Exception:
repo.ui.log('copytrace',
'Failed to load amend copytrace for %s' % dest.hex())
return {}
finally:
try:
db.close()
except anydbm.error:
pass
def _mergecopies(orig, repo, cdst, csrc, base): def _mergecopies(orig, repo, cdst, csrc, base):
start = time.time() start = time.time()
try: try:
@ -259,6 +423,15 @@ def _domergecopies(orig, repo, cdst, csrc, base):
repo.ui.log("copytrace", msg=msg, repo.ui.log("copytrace", msg=msg,
reponame=_getreponame(repo, repo.ui)) reponame=_getreponame(repo, repo.ui))
if repo.ui.configbool("copytrace", "enableamendcopytrace", True):
# Look for additional amend-copies.
amend_copies = _getamendcopies(repo, cdst, base.p1())
if amend_copies:
repo.ui.debug('Loaded amend copytrace for %s' % cdst)
for dst, src in amend_copies.iteritems():
if dst not in copies:
copies[dst] = src
return copies, {}, {}, {}, {} return copies, {}, {}, {}, {}
def _fastcopytraceenabled(ui): def _fastcopytraceenabled(ui):

View File

@ -0,0 +1,249 @@
$ . "$TESTDIR/copytrace.sh"
$ cat >> $HGRCPATH << EOF
> [extensions]
> copytrace=$TESTDIR/../hgext3rd/copytrace.py
> fbamend=$TESTDIR/../hgext3rd/fbamend
> rebase=
> shelve=
> [fbamend]
> userestack=True
> [experimental]
> disablecopytrace=True
> evolution=createmarkers
> EOF
Test amend copytrace
$ hg init repo
$ initclient repo
$ cd repo
$ echo x > x
$ hg add x
$ hg ci -m initial
$ echo a > a
$ hg add a
$ hg ci -m "create a"
$ echo b > a
$ hg ci -qm "mod a"
$ hg up -q ".^"
$ hg mv a b
$ hg amend
warning: the changeset's children were left behind
(use 'hg restack' to rebase them)
$ hg rebase --restack
rebasing 2:ad25e018afa9 "mod a"
merging b and a to b
$ ls
b
x
$ cat b
a
$ hg update 5
1 files updated, 0 files merged, 0 files removed, 0 files unresolved
$ cat b
b
$ cd ..
$ rm -rf repo
Test amend copytrace with multiple stacked commits
$ hg init repo
$ initclient repo
$ cd repo
$ echo x > x
$ hg add x
$ hg ci -m initial
$ echo a > a
$ echo b > b
$ echo c > c
$ hg add a b c
$ hg ci -m "create a b c"
$ echo a1 > a
$ hg ci -qm "mod a"
$ echo b2 > b
$ hg ci -qm "mod b"
$ echo c3 > c
$ hg ci -qm "mod c"
$ hg bookmark test-top
$ hg up -q '.~3'
$ hg mv a a1
$ hg mv b b2
$ hg amend
warning: the changeset's children were left behind
(use 'hg restack' to rebase them)
$ hg mv c c3
$ hg amend
$ hg rebase --restack
rebasing 2:797127d4e250 "mod a"
merging a1 and a to a1
rebasing 3:e2aabbfe749a "mod b"
merging b2 and b to b2
rebasing 4:4f8d18558559 "mod c" (test-top)
merging c3 and c to c3
$ hg up test-top
3 files updated, 0 files merged, 0 files removed, 0 files unresolved
(activating bookmark test-top)
$ cat a1 b2 c3
a1
b2
c3
$ cd ..
$ rm -rf repo
Test amend copytrace with multiple renames of the same file
$ hg init repo
$ initclient repo
$ cd repo
$ echo x > x
$ hg add x
$ hg ci -m initial
$ echo a > a
$ hg add a
$ hg ci -m "create a"
$ echo b > a
$ hg ci -qm "mod a"
$ hg up -q ".^"
$ hg mv a b
$ hg amend
warning: the changeset's children were left behind
(use 'hg restack' to rebase them)
$ hg mv b c
$ hg amend
$ hg rebase --restack
rebasing 2:ad25e018afa9 "mod a"
merging c and a to c
$ hg update 7
1 files updated, 0 files merged, 0 files removed, 0 files unresolved
$ cat c
b
$ cd ..
$ rm -rf repo
Test amend copytrace with copies
$ hg init repo
$ initclient repo
$ cd repo
$ echo x > x
$ hg add x
$ hg ci -m initial
$ echo a > a
$ echo i > i
$ hg add a i
$ hg ci -m "create a i"
$ echo b > a
$ hg ci -qm "mod a"
$ echo j > i
$ hg ci -qm "mod i"
$ hg bookmark test-top
$ hg up -q ".~2"
$ hg cp a b
$ hg amend
warning: the changeset's children were left behind
(use 'hg restack' to rebase them)
$ hg cp i j
$ hg amend
$ hg cp b c
$ hg amend
$ hg rebase --restack
rebasing 2:6938f0d82b23 "mod a"
merging b and a to b
merging c and a to c
rebasing 3:df8dfcb1d237 "mod i" (test-top)
merging j and i to j
$ hg up test-top
5 files updated, 0 files merged, 0 files removed, 0 files unresolved
(activating bookmark test-top)
$ cat a b c i j
b
b
b
j
j
$ cd ..
$ rm -rf repo
Test rebase after amend deletion of copy
$ hg init repo
$ initclient repo
$ cd repo
$ echo x > x
$ hg add x
$ hg ci -m initial
$ echo a > a
$ hg add a
$ hg ci -m "create a"
$ echo b > a
$ hg ci -qm "mod a"
$ hg up -q ".^"
$ hg cp a b
$ hg amend
warning: the changeset's children were left behind
(use 'hg restack' to rebase them)
$ hg rm b
$ hg amend
$ hg rebase --restack
rebasing 2:ad25e018afa9 "mod a"
$ cd ..
$ rm -rf repo
Test failure to rebase deletion after rename
$ hg init repo
$ initclient repo
$ cd repo
$ echo x > x
$ hg add x
$ hg ci -m initial
$ echo a > a
$ hg add a
$ hg ci -m "create a"
$ echo b > a
$ hg ci -qm "mod a"
$ hg rm a
$ hg ci -m "delete a"
$ hg up -q ".~2"
$ hg mv a b
$ hg amend
warning: the changeset's children were left behind
(use 'hg restack' to rebase them)
$ hg rebase --restack
rebasing 2:ad25e018afa9 "mod a"
merging b and a to b
rebasing 3:ba0395f0e180 "delete a"
transaction abort!
rollback completed
abort: a@ba0395f0e180: not found in manifest!
[255]
$ hg rebase --abort
rebase aborted (no revision is removed, only broken state is cleared)
$ cd ..
$ rm -rf repo
Test amend copytrace can be disabled
$ cat >> $HGRCPATH << EOF
> [copytrace]
> enableamendcopytrace=false
> EOF
$ hg init repo
$ initclient repo
$ cd repo
$ echo x > x
$ hg add x
$ hg ci -m initial
$ echo a > a
$ hg add a
$ hg ci -m "create a"
$ echo b > a
$ hg ci -qm "mod a"
$ hg up -q ".^"
$ hg mv a b
$ hg amend
warning: the changeset's children were left behind
(use 'hg restack' to rebase them)
$ hg rebase --restack
rebasing 2:ad25e018afa9 "mod a"
other [source] changed a which local [dest] deleted
hint: if this message is due to a moved file, you can ask mercurial to attempt to automatically resolve this change by re-running with the --tracecopies flag, but this will significantly slow down the operation, so you will need to be patient.
Source control team is working on fixing this problem.
use (c)hanged version, leave (d)eleted, or leave (u)nresolved? u
unresolved conflicts (see hg resolve, then hg rebase --continue)
[1]
$ cd ..
$ rm -rf repo