copytrace: retrieve moves by package to reduce back and forths to the database

Summary: Instead of retrieving moves one by one during copytracing, retrieves all the moves from the ctxstack at a time

Test Plan: The former tests still work

Reviewers: #sourcecontrol, rmcelroy

Differential Revision: https://phabricator.fb.com/D2661088

Tasks: 8660367
This commit is contained in:
Cecile Berillon 2015-11-16 16:31:22 -08:00
parent 67eae4b104
commit ad9beb1dc1
3 changed files with 63 additions and 49 deletions

View File

@ -50,9 +50,13 @@ def _forwardrenamesandpaths(repo, ctxstack, m):
paths = {}
# Retrieve the move data for all the ctx
ctxhash = [ctx.hex() for ctx in ctxstack]
datapkg = dbutil.retrievedatapkg(repo, ctxhash, move=True)
while ctxstack:
ctx = ctxstack.pop()
data = dbutil.retrievedata(repo, ctx, move=True)
data = datapkg[ctx.hex()]
pk = paths.keys()
delsrc = []
for dst, src in data.iteritems():
@ -241,16 +245,13 @@ def _dirstaterenames(ctx):
return copies
def _processrenames(repo, ctx, renamed, move=False):
def _processrenames(repo, ctx, datapkg, renamed, move=False):
"""
Adds the renames {dst: src} to the 'renamed' dictionary if the source is
in files
"""
data = dbutil.retrievedata(repo, ctx, move=True)
data = datapkg[ctx.hex()]
movedsrc = []
# moves and copies
if not move:
data.update(dbutil.retrievedata(repo, ctx, move=False))
for dst, src in data.iteritems():
# checks if the source file is to be considered
@ -284,13 +285,23 @@ def _forwardrenameswithdb(a, b, match=None, move=False):
if a == b:
# short-circuit to avoid issues with merge states
return dirstatefunc(w)
repo = b._repo
ctxstack = _createctxstack(repo, b, a)
ctxhash = [ctx.hex() for ctx in ctxstack]
ctxstack = _createctxstack(b._repo, b, a)
# Retrieve the move data for all the ctx
# move-only data
datapkg = dbutil.retrievedatapkg(repo, ctxhash, move=True)
# adding the copies
if not move:
cppkg = dbutil.retrievedatapkg(repo, ctxhash, move=False)
for ctx, dic in cppkg.iteritems():
datapkg.setdefault(ctx, {}).update(dic)
renamed = {}
while ctxstack:
ctx = ctxstack.pop()
_processrenames(b._repo, ctx, renamed, move)
_processrenames(repo, ctx, datapkg, renamed, move)
# combine renames from dirstate if necessary
if w is not None:

View File

@ -90,45 +90,6 @@ def insertdata(repo, ctx, mvdict, cpdict, remote=False):
_close(conn)
def retrievedata(repo, ctx, move=False, remote=False, askserver=True):
"""
returns the {dst:src} dictonary for moves if move = True or of copies if
move = False for ctx
"""
dbname, conn, c = _connect(repo, remote)
# '0'is used as temp data storage
if ctx == '0':
ctxhash = '0'
else:
ctxhash = str(ctx.hex())
mv = '1' if move else '0'
try:
c.execute('SELECT DISTINCT source, destination FROM Moves ' +
'WHERE hash = ? AND mv = ?', [ctxhash, mv])
except:
raise util.Abort('could not access data from the %s database' % dbname)
all_rows = c.fetchall()
_close(conn)
ret = {}
# The local database doesn't have the data for this ctx and hasn't tried
# to retrieve it yet (askserver)
if askserver and not remote and not all_rows:
_requestdata(repo, [ctx])
return retrievedata(repo, ctx, move=move, remote=remote,
askserver=False)
for src, dst in all_rows:
# this ctx is registered but has no move data
if not dst:
break
ret[dst.encode('utf8')] = src.encode('utf8')
return ret
def insertrawdata(repo, dic, remote=False):
"""
inserts dict = {ctxhash: [src, dst, mv]} for moves and copies into the
@ -150,10 +111,52 @@ def insertrawdata(repo, dic, remote=False):
_close(conn)
def retrieverawdata(repo, ctxlist, remote=False, askserver=True):
def retrievedatapkg(repo, ctxlist, move=False, remote=False, askserver=True):
"""
retrieves {ctxhash: {dst: src}} for ctxhash in ctxlist for moves or copies
"""
# Do we want moves or copies
mv = '1' if move else '0'
dbname, conn, c = _connect(repo, remote)
try:
c.execute('SELECT DISTINCT hash, source, destination FROM Moves' +
' WHERE hash IN (%s) AND mv = ?'
% (','.join('?' * len(ctxlist))), ctxlist + [mv])
except:
raise util.Abort('could not access data from the %s database' % dbname)
all_rows = c.fetchall()
_close(conn)
ret = {}
# Building the mvdict and cpdict for each ctxhash:
for ctxhash, src, dst in all_rows:
# No move or No copy
if not dst:
ret.setdefault(ctxhash.encode('utf8'), {})
else:
ret.setdefault(ctxhash.encode('utf8'), {})[dst.encode('utf8')] = \
src.encode('utf8')
processed = ret.keys()
missing = [f for f in ctxlist if f not in processed]
# The local database doesn't have the data for this ctx and hasn't tried
# to retrieve it yet (firstcheck)
if askserver and not remote and missing:
_requestdata(repo, missing)
add = retrievedatapkg(repo, missing, move=move, remote=remote,
askserver=False)
ret.update(add)
return ret
def retrieverawdata(repo, ctxlist, remote=False, askserver=True):
"""
retrieves {ctxhash: [src, dst, mv]} for ctxhash in ctxlist for moves or copies
"""
dbname, conn, c = _connect(repo, remote)
try:
c.execute('SELECT DISTINCT hash, source, destination, mv FROM Moves' +

View File

@ -84,7 +84,7 @@ def concludenode(orig, repo, rev, p1, p2, **kwargs):
"""
# this allows to trace rename information from the rebase which mercurial
# doesn't do today
cp = dbutil.retrievedata(repo, '0', move=False)
cp = dbutil.retrievedatapkg(repo, ['0'], move=False, askserver=False)['0']
_markchanges(repo, cp)
ret = orig(repo, rev, p1, p2, **kwargs)