mirror of
https://github.com/facebook/sapling.git
synced 2024-10-09 16:31:02 +03:00
fastannotate: implement the annotate algorithm
Summary: This diff implements the `annotate` algorithm. Unlike the vanilla one, the annotate method takes 2 revisions: the revision specified for annotating, and the head of the main branch. The algorithm will do a "hybrid" annotate: incrementally update the linelog (the cache) so it can answer queries of any revision in the main branch. And use the traditional algorithm to deal with revisions not in the main branch: like a side branch of a merge commit, or the revision the user specified not in the main branch. The main branch is supposed to be something like `master` or `@`, and their p1s. Building up linelog with merge handled reasonably for the main branch, and the non-linelog part that produces final result share a lot internal states and logic so they are deeply coupled. Splitting them will probably reduce performance, or have difficulty (no clean way) to share internal states. If the caller only wants to build linelog without annotate things, just pass `rev = master`. While some attempts are made to support "merge" changeset, the result can still be different from the vanilla one sometimes. In those cases, both results make sense. It's really hard, if not impossible, to make the new implementation 100% same with the vanilla one because of the linear history restriction of linelog so I guess currently it's good enough. The differences will be covered by a `.t` test later. Test Plan: Code Review. A `.t` file will be added. Reviewers: #sourcecontrol, stash Reviewed By: stash Subscribers: stash, mjpieters Differential Revision: https://phabricator.intern.facebook.com/D3836438 Signature: t1:3836438:1473778829:27978479a01920833fa146f427178292ea1f5306
This commit is contained in:
parent
943e37217d
commit
60eb8a2c22
@ -12,12 +12,17 @@ import os
|
||||
|
||||
from fastannotate import (
|
||||
revmap as revmapmod,
|
||||
error as faerror,
|
||||
)
|
||||
|
||||
from mercurial import (
|
||||
lock as lockmod,
|
||||
mdiff,
|
||||
node,
|
||||
scmutil,
|
||||
util,
|
||||
)
|
||||
from mercurial.i18n import _
|
||||
|
||||
import linelog as linelogmod
|
||||
|
||||
@ -116,6 +121,302 @@ class _annotatecontext(object):
|
||||
self.revmap = revmap
|
||||
self.opts = opts
|
||||
|
||||
def annotate(self, rev, master=None, showpath=False, showlines=False):
|
||||
"""incrementally update the cache so it includes revisions in the main
|
||||
branch till 'master'. and run annotate on 'rev', which may or may not be
|
||||
included in the main branch.
|
||||
|
||||
if master is None, do not update linelog. if master is a callable, call
|
||||
it to get the actual master, which can save some time if we don't need
|
||||
to resolve the master.
|
||||
|
||||
the first value returned is the annotate result, it is [(node, linenum)]
|
||||
by default. [(node, linenum, path)] if showpath is True.
|
||||
|
||||
if showlines is True, a second value will be returned, it is a list of
|
||||
corresponding line contents.
|
||||
"""
|
||||
|
||||
# fast path: if rev is in the main branch already
|
||||
directly, revfctx = self.canannotatedirectly(rev)
|
||||
if directly:
|
||||
if self.ui.debugflag:
|
||||
self.ui.debug('fastannotate: %s: no need to update linelog\n'
|
||||
% self.path)
|
||||
return self.annotatedirectly(revfctx, showpath, showlines)
|
||||
|
||||
# resolve master
|
||||
masterfctx = None
|
||||
if master:
|
||||
if callable(master):
|
||||
master = master()
|
||||
masterfctx = _getbase(scmutil.revsingle(self.repo,
|
||||
master)[self.path])
|
||||
if masterfctx in self.revmap:
|
||||
masterfctx = None
|
||||
|
||||
# ... - @ <- rev (can be an arbitrary changeset,
|
||||
# / not necessarily a descendant
|
||||
# master -> o of master)
|
||||
# |
|
||||
# a merge -> o 'o': new changesets in the main branch
|
||||
# |\ '#': revisions in the main branch that
|
||||
# o * exist in linelog / revmap
|
||||
# | . '*': changesets in side branches, or
|
||||
# last master -> # . descendants of master
|
||||
# | .
|
||||
# # * joint: '#', and is a parent of a '*'
|
||||
# |/
|
||||
# a joint -> # ^^^^ --- side branches
|
||||
# |
|
||||
# ^ --- main branch (in linelog)
|
||||
|
||||
# these DFSes are similar to the traditional annotate algorithm.
|
||||
# we cannot really reuse the code for perf reason.
|
||||
|
||||
# 1st DFS calculates merges, joint points, and needed.
|
||||
# "needed" is a simple reference counting dict to free items in
|
||||
# "hist", reducing its memory usage otherwise could be huge.
|
||||
initvisit = [revfctx]
|
||||
if masterfctx:
|
||||
initvisit.append(masterfctx)
|
||||
visit = initvisit[:]
|
||||
pcache = {}
|
||||
needed = {revfctx: 1}
|
||||
hist = {} # {fctx: ([(llrev or fctx, linenum)], text)}
|
||||
while visit:
|
||||
f = visit.pop()
|
||||
if f in pcache or f in hist:
|
||||
continue
|
||||
if f in self.revmap: # in the old main branch, it's a joint
|
||||
llrev = self.revmap.hsh2rev(f.node())
|
||||
self.linelog.annotate(llrev)
|
||||
result = self.linelog.annotateresult
|
||||
hist[f] = (result, f.data())
|
||||
continue
|
||||
pl = self._parentfunc(f)
|
||||
pcache[f] = pl
|
||||
for p in pl:
|
||||
needed[p] = needed.get(p, 0) + 1
|
||||
if p not in pcache:
|
||||
visit.append(p)
|
||||
|
||||
# 2nd (simple) DFS calculates new changesets in the main branch
|
||||
# ('o' nodes in # the above graph), so we know when to update linelog.
|
||||
newmainbranch = set()
|
||||
f = masterfctx
|
||||
while f and f not in self.revmap:
|
||||
newmainbranch.add(f)
|
||||
pl = pcache[f]
|
||||
if pl:
|
||||
f = pl[0]
|
||||
else:
|
||||
f = None
|
||||
break
|
||||
|
||||
# f, if present, is the position where the last build stopped at, and
|
||||
# should be the "master" last time. check to see if we can continue
|
||||
# building the linelog incrementally. (we cannot if diverged)
|
||||
if masterfctx is not None:
|
||||
self._checklastmasterhead(f)
|
||||
|
||||
if self.ui.debugflag:
|
||||
self.ui.debug('fastannotate: %s: %d new changesets in the main '
|
||||
'branch\n' % (self.path, len(newmainbranch)))
|
||||
|
||||
# prepare annotateresult so we can update linelog incrementally
|
||||
self.linelog.annotate(self.linelog.maxrev)
|
||||
|
||||
# 3rd DFS does the actual annotate
|
||||
visit = initvisit[:]
|
||||
progress = 0
|
||||
while visit:
|
||||
f = visit[-1]
|
||||
if f in hist or f in self.revmap:
|
||||
visit.pop()
|
||||
continue
|
||||
|
||||
ready = True
|
||||
pl = pcache[f]
|
||||
for p in pl:
|
||||
if p not in hist:
|
||||
ready = False
|
||||
visit.append(p)
|
||||
if not ready:
|
||||
continue
|
||||
|
||||
visit.pop()
|
||||
blocks = None # mdiff blocks, used for appending linelog
|
||||
ismainbranch = (f in newmainbranch)
|
||||
# curr is the same as the traditional annotate algorithm,
|
||||
# if we only care about linear history (do not follow merge),
|
||||
# then curr is not actually used.
|
||||
assert f not in hist
|
||||
curr = _decorate(f)
|
||||
for i, p in enumerate(pl):
|
||||
bs = list(mdiff.allblocks(hist[p][1], curr[1]))
|
||||
if i == 0 and ismainbranch:
|
||||
blocks = bs
|
||||
curr = _pair(hist[p], curr, bs)
|
||||
if needed[p] == 1:
|
||||
del hist[p]
|
||||
del needed[p]
|
||||
else:
|
||||
needed[p] -= 1
|
||||
|
||||
hist[f] = curr
|
||||
del pcache[f]
|
||||
|
||||
if ismainbranch: # need to write to linelog
|
||||
if not self.ui.quiet:
|
||||
progress += 1
|
||||
self.ui.progress(_('building cache'), progress,
|
||||
total=len(newmainbranch))
|
||||
bannotated = None
|
||||
if len(pl) == 2 and self.opts.followmerge: # merge
|
||||
bannotated = curr[0]
|
||||
if blocks is None: # no parents, add an empty one
|
||||
blocks = list(mdiff.allblocks('', curr[1]))
|
||||
self._appendrev(f, blocks, bannotated)
|
||||
|
||||
if progress: # clean progress bar
|
||||
self.ui.write()
|
||||
|
||||
result = [
|
||||
((self.revmap.rev2hsh(f) if isinstance(f, int) else f.node()), l)
|
||||
for f, l in hist[revfctx][0]]
|
||||
return self._refineannotateresult(result, revfctx, showpath, showlines)
|
||||
|
||||
def canannotatedirectly(self, rev):
|
||||
"""(str) -> bool, fctx or node.
|
||||
return (True, f) if we can annotate without updating the linelog, pass
|
||||
f to annotatedirectly.
|
||||
return (False, f) if we need extra calculation. f is the fctx resolved
|
||||
from rev.
|
||||
"""
|
||||
result = True
|
||||
f = None
|
||||
if len(rev) == 20 and rev in self.revmap:
|
||||
f = rev
|
||||
elif len(rev) == 40 and node.bin(rev) in self.revmap:
|
||||
f = node.bin(rev)
|
||||
else:
|
||||
f = _getbase(scmutil.revsingle(self.repo, rev)[self.path])
|
||||
result = f in self.revmap
|
||||
return result, f
|
||||
|
||||
def annotatedirectly(self, f, showpath, showlines):
|
||||
"""like annotate, but when we know that f is in linelog.
|
||||
f can be either a 20-char str (node) or a fctx. this is for perf - in
|
||||
the best case, the user provides a node and we don't need to read the
|
||||
filelog or construct any filecontext.
|
||||
"""
|
||||
if isinstance(f, str):
|
||||
hsh = f
|
||||
else:
|
||||
hsh = f.node()
|
||||
llrev = self.revmap.hsh2rev(hsh)
|
||||
assert llrev
|
||||
assert (self.revmap.rev2flag(llrev) & revmapmod.sidebranchflag) == 0
|
||||
self.linelog.annotate(llrev)
|
||||
result = [(self.revmap.rev2hsh(r), l)
|
||||
for r, l in self.linelog.annotateresult]
|
||||
return self._refineannotateresult(result, f, showpath, showlines)
|
||||
|
||||
def _refineannotateresult(self, result, f, showpath, showlines):
|
||||
"""add the missing path or line contents, they can be expensive.
|
||||
f could be either node or fctx.
|
||||
"""
|
||||
if showpath:
|
||||
result = self._addpathtoresult(result)
|
||||
if showlines:
|
||||
if isinstance(f, str): # f: node or fctx
|
||||
llrev = self.revmap.hsh2rev(f)
|
||||
fctx = self.repo[f][self.revmap.rev2path(llrev)]
|
||||
else:
|
||||
fctx = f
|
||||
lines = mdiff.splitnewlines(fctx.data())
|
||||
assert len(lines) == len(result)
|
||||
result = (result, lines)
|
||||
return result
|
||||
|
||||
def _appendrev(self, fctx, blocks, bannotated=None):
|
||||
self._doappendrev(self.linelog, self.revmap, fctx, blocks, bannotated)
|
||||
|
||||
@staticmethod
|
||||
def _doappendrev(linelog, revmap, fctx, blocks, bannotated=None):
|
||||
"""append a revision to linelog and revmap"""
|
||||
|
||||
def getllrev(f):
|
||||
"""(fctx) -> int"""
|
||||
# f should not be a linelog revision
|
||||
assert not isinstance(f, int)
|
||||
# f is a fctx, allocate linelog rev on demand
|
||||
hsh = f.node()
|
||||
rev = revmap.hsh2rev(hsh)
|
||||
if rev is None:
|
||||
rev = revmap.append(hsh, sidebranch=True, path=f.path())
|
||||
return rev
|
||||
|
||||
# append sidebranch revisions to revmap
|
||||
siderevs = []
|
||||
siderevmap = {} # node: int
|
||||
if bannotated is not None:
|
||||
for (a1, a2, b1, b2), op in blocks:
|
||||
if op != '=':
|
||||
# f could be either linelong rev, or fctx.
|
||||
siderevs += [f for f, l in bannotated[b1:b2]
|
||||
if not isinstance(f, int)]
|
||||
siderevs = set(siderevs)
|
||||
if fctx in siderevs: # mainnode must be appended seperately
|
||||
siderevs.remove(fctx)
|
||||
for f in siderevs:
|
||||
siderevmap[f] = getllrev(f)
|
||||
|
||||
# the changeset in the main branch, could be a merge
|
||||
llrev = revmap.append(fctx.node(), path=fctx.path())
|
||||
siderevmap[fctx] = llrev
|
||||
|
||||
for (a1, a2, b1, b2), op in reversed(blocks):
|
||||
if op == '=':
|
||||
continue
|
||||
if bannotated is None:
|
||||
linelog.replacelines(llrev, a1, a2, b1, b2)
|
||||
else:
|
||||
blines = [((r if isinstance(r, int) else siderevmap[r]), l)
|
||||
for r, l in bannotated[b1:b2]]
|
||||
linelog.replacelines_vec(llrev, a1, a2, blines)
|
||||
|
||||
def _addpathtoresult(self, annotateresult, revmap=None):
|
||||
"""(revmap, [(node, linenum)]) -> [(node, linenum, path)]"""
|
||||
if revmap is None:
|
||||
revmap = self.revmap
|
||||
nodes = set([n for n, l in annotateresult])
|
||||
paths = dict((n, revmap.rev2path(revmap.hsh2rev(n))) for n in nodes)
|
||||
return [(n, l, paths[n]) for n, l in annotateresult]
|
||||
|
||||
def _checklastmasterhead(self, fctx):
|
||||
"""check if fctx is the master's head last time, raise if not"""
|
||||
if fctx is None:
|
||||
llrev = 0
|
||||
else:
|
||||
llrev = self.revmap.hsh2rev(fctx.node())
|
||||
assert llrev
|
||||
if self.linelog.maxrev != llrev:
|
||||
raise faerror.CannotReuseError()
|
||||
|
||||
@util.propertycache
|
||||
def _parentfunc(self):
|
||||
"""-> (fctx) -> [fctx]"""
|
||||
followrename = self.opts.followrename
|
||||
followmerge = self.opts.followmerge
|
||||
def parents(f):
|
||||
pl = _parents(f, follow=followrename)
|
||||
if not followmerge:
|
||||
pl = pl[:1]
|
||||
return pl
|
||||
return parents
|
||||
|
||||
def _unlinkpaths(paths):
|
||||
"""silent, best-effort unlink"""
|
||||
for path in paths:
|
||||
|
Loading…
Reference in New Issue
Block a user