fastannotate: add a --deleted option

Summary:
This feature uses the linelog to show all lines ever existed (even deleted) in
a file. Helpful to see the history all the way back to the beginning.

Sadly it has to be inefficient currently as we have chosen to not store line
content (but only numbers) in linelog. Calculating the revisions and line
numbers is very fast because of linelog but resolving the line contents is
painfully slow. We may want a key-value database in the future, answering the
query:

  (path, node, linenum) -> content

How slow is it? With the linelog pre-built, generating the output for
`mercurial/commands.py` needs resolving 400+ revisions and is about 10+
seconds.

Test Plan: Run the changed `test-fastannotate.t`

Reviewers: #sourcecontrol, rmcelroy

Reviewed By: rmcelroy

Subscribers: rmcelroy, mjpieters

Differential Revision: https://phabricator.intern.facebook.com/D3849503

Signature: t1:3849503:1475086235:83077c571746a7515b5ba75c4df37a1a400d9232
This commit is contained in:
Jun Wu 2016-09-11 22:52:29 +01:00
parent 58826b52c2
commit 89c9da2b06
4 changed files with 153 additions and 6 deletions

View File

@ -32,6 +32,7 @@ fastannotatecommandargs = {
('c', 'changeset', None, _('list the changeset')),
('l', 'line-number', None, _('show line number at the first '
'appearance')),
('e', 'deleted', None, _('show deleted lines (slow)')),
('h', 'no-content', None, _('do not show file content')),
('', 'no-follow', None, _("don't follow copies and renames")),
('', 'linear', None, _('enforce linear history, ignore second parent '
@ -88,6 +89,7 @@ def fastannotate(ui, repo, *pats, **opts):
opts[name] = True
formatter = faformatter.defaultformatter(ui, repo, opts)
showdeleted = opts.get('deleted', False)
showlines = not bool(opts.get('no_content'))
showpath = opts.get('file', False)
@ -99,12 +101,17 @@ def fastannotate(ui, repo, *pats, **opts):
master = rev
for path in ctx.walk(m):
result = lines = None
result = lines = existinglines = None
while True:
try:
with facontext.annotatecontext(repo, path, aopts, rebuild) as a:
result = a.annotate(rev, master=master, showpath=showpath,
showlines=showlines)
showlines=(showlines and
not showdeleted))
if showdeleted:
existinglines = set((l[0], l[1]) for l in result)
result = a.annotatealllines(
rev, showpath=showpath, showlines=showlines)
break
except faerror.CannotReuseError: # happens if master moves backwards
if rebuild: # give up since we have tried rebuild alreadyraise
@ -116,7 +123,7 @@ def fastannotate(ui, repo, *pats, **opts):
if showlines:
result, lines = result
formatter.write(result, lines)
formatter.write(result, lines, existinglines=existinglines)
_newopts = set([])
_knownopts = set([opt[1].replace('-', '_') for opt in

View File

@ -7,6 +7,7 @@
from __future__ import absolute_import
from collections import defaultdict
import contextlib
import os
@ -305,6 +306,122 @@ class _annotatecontext(object):
result = f in self.revmap
return result, f
def annotatealllines(self, rev, showpath=False, showlines=False):
"""(rev : str) -> [(node : str, linenum : int, path : str)]
the result has the same format with annotate, but include all (including
deleted) lines up to rev. call this after calling annotate(rev, ...) for
better performance and accuracy.
"""
revfctx = _getbase(scmutil.revsingle(self.repo, rev)[self.path])
# find a chain from rev to anything in the mainbranch
if revfctx not in self.revmap:
chain = [revfctx]
a = ''
while True:
f = chain[-1]
pl = self._parentfunc(f)
if not pl:
break
if pl[0] in self.revmap:
a = pl[0].data()
break
chain.append(pl[0])
# both self.linelog and self.revmap is backed by filesystem. now
# we want to modify them but do not want to write changes back to
# files. so we create in-memory objects and copy them. it's like
# a "fork".
linelog = linelogmod.linelog()
linelog.copyfrom(self.linelog)
revmap = revmapmod.revmap()
revmap.copyfrom(self.revmap)
for f in reversed(chain):
b = f.data()
blocks = list(mdiff.allblocks(a, b))
self._doappendrev(linelog, revmap, f, blocks)
a = b
else:
# fastpath: use existing linelog, revmap as we don't write to them
linelog = self.linelog
revmap = self.revmap
lines = linelog.getalllines()
hsh = revfctx.node()
llrev = revmap.hsh2rev(hsh)
result = [(revmap.rev2hsh(r), l) for r, l in lines if r <= llrev]
# cannot use _refineannotateresult since we need custom logic for
# resolving line contents
if showpath:
result = self._addpathtoresult(result, revmap)
if showlines:
linecontents = self._resolvelines(result, revmap, linelog)
result = (result, linecontents)
return result
def _resolvelines(self, annotateresult, revmap, linelog):
"""(annotateresult) -> [line]. designed for annotatealllines.
this is probably the most inefficient code in the whole fastannotate
directory. but we have made a decision that the linelog does not
store line contents. so getting them requires random accesses to
the revlog data, since they can be many, it can be very slow.
"""
# [llrev]
revs = [revmap.hsh2rev(l[0]) for l in annotateresult]
result = [None] * len(annotateresult)
# {(rev, linenum): [lineindex]}
key2idxs = defaultdict(list)
for i in xrange(len(result)):
key2idxs[(revs[i], annotateresult[i][1])].append(i)
while key2idxs:
# find an unresolved line and its linelog rev to annotate
hsh = None
try:
for (rev, _linenum), idxs in key2idxs.iteritems():
if revmap.rev2flag(rev) & revmapmod.sidebranchflag:
continue
hsh = annotateresult[idxs[0]][0]
break
except StopIteration: # no more unresolved lines
return result
if hsh is None:
# the remaining key2idxs are not in main branch, resolving them
# using the hard way...
revlines = {}
for (rev, linenum), idxs in key2idxs.iteritems():
if rev not in revlines:
hsh = annotateresult[idxs[0]][0]
if self.ui.debugflag:
self.ui.debug('fastannotate: reading %s line #%d '
'to resolve lines %r\n'
% (node.short(hsh), linenum, idxs))
fctx = self.repo[hsh][revmap.rev2path(rev)]
lines = mdiff.splitnewlines(fctx.data())
revlines[rev] = lines
for idx in idxs:
result[idx] = revlines[rev][linenum]
assert all(x is not None for x in result)
return result
# run the annotate and the lines should match to the file content
self.ui.debug('fastannotate: annotate %s to resolve lines\n'
% node.short(hsh))
linelog.annotate(rev)
fctx = self.repo[hsh][revmap.rev2path(rev)]
annotated = linelog.annotateresult
lines = mdiff.splitnewlines(fctx.data())
assert len(lines) == len(annotated)
# resolve lines from the annotate result
for i, line in enumerate(lines):
k = annotated[i]
if k in key2idxs:
for idx in key2idxs[k]:
result[idx] = line
del key2idxs[k]
return result
def annotatedirectly(self, f, showpath, showlines):
"""like annotate, but when we know that f is in linelog.
f can be either a 20-char str (node) or a fctx. this is for perf - in

View File

@ -41,8 +41,8 @@ class defaultformatter(object):
self.ui = ui
self.funcmap = funcmap
def write(self, annotatedresult, lines=None):
"""(annotateresult, [str]) -> None. write content out.
def write(self, annotatedresult, lines=None, existinglines=None):
"""(annotateresult, [str], set([rev, linenum])) -> None. write output.
annotateresult can be [(node, linenum, path)], or [(node, linenum)]
"""
pieces = [] # [[str]]
@ -64,7 +64,15 @@ class defaultformatter(object):
padding = ' ' * (maxwidths[j] - len(p[i]))
msg += sep + padding + p[i]
if lines:
msg += ': ' + lines[i]
if existinglines is None:
msg += ': ' + lines[i]
else: # extra formatting showing whether a line exists
key = (annotatedresult[i][0], annotatedresult[i][1])
if key in existinglines:
msg += ': ' + lines[i]
else:
msg += ': ' + self.ui.label('-' + lines[i],
'diff.deleted')
if msg[-1] != '\n':
msg += '\n'

View File

@ -150,3 +150,18 @@ rename
0: 1
1: 2
2: 3
fastannotate --deleted
$ hg fastannotate --deleted -nf b
3 a: 0
5 b: 11
0 a: -1
1 a: -2
2 a: 3
5 b: 44
4 a: -4
$ hg fastannotate --deleted -r 3 -nf a
3 a: 0
0 a: 1
1 a: 2