copytrace: retrieve moves by package to reduce back and forths to the database

Summary: Instead of retrieving moves one by one during copytracing, retrieves all the moves from the ctxstack at a time Test Plan: The former tests still work Reviewers: #sourcecontrol, rmcelroy Differential Revision: https://phabricator.fb.com/D2661088 Tasks: 8660367
2024-10-10 08:47:12 +03:00 · 2015-11-16 16:31:22 -08:00 · 2015-11-16 16:31:22 -08:00 · ad9beb1dc1
commit ad9beb1dc1
parent 67eae4b104
3 changed files with 63 additions and 49 deletions
--- a/copytrace/copytrace.py
+++ b/copytrace/copytrace.py
@ -50,9 +50,13 @@ def _forwardrenamesandpaths(repo, ctxstack, m):

    paths = {}

+    # Retrieve the move data for all the ctx
+    ctxhash = [ctx.hex() for ctx in ctxstack]
+    datapkg = dbutil.retrievedatapkg(repo, ctxhash, move=True)
+
    while ctxstack:
        ctx = ctxstack.pop()
-        data = dbutil.retrievedata(repo, ctx, move=True)
+        data = datapkg[ctx.hex()]
        pk = paths.keys()
        delsrc = []
        for dst, src in data.iteritems():
@ -241,16 +245,13 @@ def _dirstaterenames(ctx):
    return copies


-def _processrenames(repo, ctx, renamed, move=False):
+def _processrenames(repo, ctx, datapkg, renamed, move=False):
    """
    Adds the renames {dst: src} to the 'renamed' dictionary if the source is
     in files
    """
-    data = dbutil.retrievedata(repo, ctx, move=True)
+    data = datapkg[ctx.hex()]
    movedsrc = []
-    # moves and copies
-    if not move:
-        data.update(dbutil.retrievedata(repo, ctx, move=False))

    for dst, src in data.iteritems():
        # checks if the source file is to be considered
@ -284,13 +285,23 @@ def _forwardrenameswithdb(a, b, match=None, move=False):
        if a == b:
        # short-circuit to avoid issues with merge states
            return dirstatefunc(w)
+    repo = b._repo
+    ctxstack = _createctxstack(repo, b, a)
+    ctxhash = [ctx.hex() for ctx in ctxstack]

-    ctxstack = _createctxstack(b._repo, b, a)
+    # Retrieve the move data for all the ctx
+    # move-only data
+    datapkg = dbutil.retrievedatapkg(repo, ctxhash, move=True)
+    # adding the copies
+    if not move:
+        cppkg = dbutil.retrievedatapkg(repo, ctxhash, move=False)
+        for ctx, dic in cppkg.iteritems():
+            datapkg.setdefault(ctx, {}).update(dic)
    renamed = {}

    while ctxstack:
        ctx = ctxstack.pop()
-        _processrenames(b._repo, ctx, renamed, move)
+        _processrenames(repo, ctx, datapkg, renamed, move)

    # combine renames from dirstate if necessary
    if w is not None:
--- a/copytrace/dbutil.py
+++ b/copytrace/dbutil.py
@ -90,45 +90,6 @@ def insertdata(repo, ctx, mvdict, cpdict, remote=False):
    _close(conn)


-def retrievedata(repo, ctx, move=False, remote=False, askserver=True):
-    """
-    returns the {dst:src} dictonary for moves if move = True or of copies if
-    move = False for ctx
-    """
-    dbname, conn, c = _connect(repo, remote)
-
-    # '0'is used as temp data storage
-    if ctx == '0':
-        ctxhash = '0'
-    else:
-        ctxhash = str(ctx.hex())
-    mv = '1' if move else '0'
-
-    try:
-        c.execute('SELECT DISTINCT source, destination FROM Moves ' +
-                'WHERE hash = ? AND mv = ?', [ctxhash, mv])
-    except:
-        raise util.Abort('could not access data from the %s database' % dbname)
-
-    all_rows = c.fetchall()
-    _close(conn)
-    ret = {}
-
-    # The local database doesn't have the data for this ctx and hasn't tried
-    # to retrieve it yet (askserver)
-    if askserver and not remote and not all_rows:
-        _requestdata(repo, [ctx])
-        return retrievedata(repo, ctx, move=move, remote=remote,
-                            askserver=False)
-
-    for src, dst in all_rows:
-        # this ctx is registered but has no move data
-        if not dst:
-            break
-        ret[dst.encode('utf8')] = src.encode('utf8')
-    return ret
-
-
 def insertrawdata(repo, dic, remote=False):
    """
    inserts dict = {ctxhash: [src, dst, mv]} for moves and copies into the
@ -150,10 +111,52 @@ def insertrawdata(repo, dic, remote=False):
    _close(conn)


-def retrieverawdata(repo, ctxlist, remote=False, askserver=True):
+def retrievedatapkg(repo, ctxlist, move=False, remote=False, askserver=True):
    """
    retrieves {ctxhash: {dst: src}} for ctxhash in ctxlist for moves or copies
    """
+    # Do we want moves or copies
+    mv = '1' if move else '0'
+
+    dbname, conn, c = _connect(repo, remote)
+    try:
+        c.execute('SELECT DISTINCT hash, source, destination FROM Moves' +
+                  ' WHERE hash IN (%s) AND mv = ?'
+                  % (','.join('?' * len(ctxlist))), ctxlist + [mv])
+    except:
+        raise util.Abort('could not access data from the %s database' % dbname)
+
+    all_rows = c.fetchall()
+    _close(conn)
+
+    ret = {}
+    # Building the mvdict and cpdict for each ctxhash:
+    for ctxhash, src, dst in all_rows:
+        # No move or No copy
+        if not dst:
+            ret.setdefault(ctxhash.encode('utf8'), {})
+        else:
+            ret.setdefault(ctxhash.encode('utf8'), {})[dst.encode('utf8')] = \
+                 src.encode('utf8')
+
+    processed = ret.keys()
+    missing = [f for f in ctxlist if f not in processed]
+
+    # The local database doesn't have the data for this ctx and hasn't tried
+    # to retrieve it yet (firstcheck)
+    if askserver and not remote and missing:
+        _requestdata(repo, missing)
+        add = retrievedatapkg(repo, missing, move=move, remote=remote,
+                              askserver=False)
+        ret.update(add)
+
+    return ret
+
+
+def retrieverawdata(repo, ctxlist, remote=False, askserver=True):
+    """
+    retrieves {ctxhash: [src, dst, mv]} for ctxhash in ctxlist for moves or copies
+    """
    dbname, conn, c = _connect(repo, remote)
    try:
        c.execute('SELECT DISTINCT hash, source, destination, mv FROM Moves' +
--- a/copytrace/filldb.py
+++ b/copytrace/filldb.py
@ -84,7 +84,7 @@ def concludenode(orig, repo, rev, p1, p2, **kwargs):
    """
    # this allows to trace rename information from the rebase which mercurial
    # doesn't do today
-    cp = dbutil.retrievedata(repo, '0', move=False)
+    cp = dbutil.retrievedatapkg(repo, ['0'], move=False, askserver=False)['0']
    _markchanges(repo, cp)
    ret = orig(repo, rev, p1, p2, **kwargs)