sapling/hgext3rd/grepdiff.py
Jun Wu 9fea0c3f0c doc: replace "commit" where it is used as a noun with "changeset"
Summary:
"changeset" is a more official term and let's use it. Note that this patch
only changes documentation / i18n messages visible to the users and header
comment blocks to developers. Other places like comments in the code are
untouched.

With the "dialect" extension enabled, users will still see the more friendly
term - "commit".

Test Plan:
`arc unit`. Note the remotefilelog failure is probably unrelated - seems
related to ongoing / upcoming manifest refactoring upstream.

Reviewers: #sourcecontrol, rmcelroy

Reviewed By: rmcelroy

Subscribers: mjpieters

Differential Revision: https://phabricator.intern.facebook.com/D3900394

Signature: t1:3900394:1474470348:6a1b5691e2599cc47df18b227d56d1f9d3c7c906
2016-09-21 15:45:25 +01:00

169 lines
6.9 KiB
Python

# grepdiff.py
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import re
from mercurial import pathutil, registrar, revset, util
from mercurial.i18n import _
revsetpredicate = registrar.revsetpredicate()
touchprefix = 'touch'
prefixtoprocessors = {
"add": lambda adds, removes: adds > 0,
"remove": lambda adds, removes: removes > 0,
"delta": lambda adds, removes: adds != removes,
touchprefix: lambda adds, removes: adds > 0 or removes > 0,
"inc": lambda adds, removes: adds > removes,
"dec": lambda adds, removes: adds < removes
}
def getpatternandprocessor(args):
"""Parse prefix and pattern from the provided arguments
Example argument could be args[0][1] == 'add:hello world'"""
pattern = args[0][1]
prefix = touchprefix
patstart = 0
if ':' in pattern:
patstart = pattern.index(':') + 1
prefix = pattern[:patstart - 1]
if prefix and prefix not in prefixtoprocessors:
repo.ui.warning(_('treating %s as a part of pattern') % (prefix + ':'))
prefix = touchprefix
else:
pattern = pattern[patstart:]
processor = prefixtoprocessors[prefix]
# currently this regex always has re.M and re.I flags, we might
# want to make it configurable in future
pattern = util.re.compile(pattern, re.M | re.I)
return pattern, processor
@revsetpredicate('grepdiff(pattern, [file], ...)')
def grepdiffpredicate(repo, subset, x):
"""grepdiff: a revset for code archeology
Sample usages are:
$ hg log --rev "grepdiff('add:command')" mercurial/commands.py
will only match changesets that add 'command' somewhere in the diff
$ hg log --rev "grepdiff('remove:command')" mercurial/commands.py
will match changesets which remove 'command' somewhere in the diff
$ hg log --rev "grepdiff('delta:command') mercurial/commands.py"
will mathc changesets where the number of 'command' adds is different
from the number of 'command' removes in the diff
$ hg log --rev "grepdiff('touch:command')"
will only match changesets which either add or remove 'command' at
least once in the diff
$ hg log --rev "grepdiff('inc:command')" folder/file1.py folder/file2.py
will match changesets which increase the number of occurrences
of 'command' in the specified files
$ hg log --rev "grepdiff('dec:command')"
will match changesets which decrease the number of occurrences
of 'command'
"""
err = _("wrong set of arguments passed to grepdiff revset")
args = revset.getargs(x, 1, -1, err)
files = None
if len(args) > 1:
files = set(pathutil.canonpath(repo.root, repo.getcwd(), arg[1])
for arg in args[1:])
res = []
pattern, processor = getpatternandprocessor(args)
def matcher(rev):
res = processor(*ctxaddsremoves(repo[rev], files, pattern))
return res
resset = subset.filter(matcher)
return resset
def ctxaddsremoves(ctx, files, regexp):
"""Check whether some context matches a given pattern
'ctx' is a context to check
'files' is a set of repo-based filenames we're interested in (None
indicates all files)
'regexp' is a compiled regular expression against which to match"""
addcount = 0
removecount = 0
for diffitem in ctx.diff():
# ctx.diff() is a generator that returns a list of strings that are
# supposed to be printed and some of them are concatenations of
# multiple '\n'-separated lines. Here's an example of such a list:
# ["diff --git a/setup.py b/setup.py\n",
# "--- a/setup.py\n" +\
# "+++ b/setup.py\n" +\
# "@@ -1,7 +1,7 @@\n" +\
# " from distutils.core import setup, Extension\n" +\
# " \n" +\
# " setup(\n" +\
# "- name='fbhgextensions',\n" +\
# "+ name='fbhgext',\n" +\
# " version='0.1.0',\n" +\
# " author='Durham Goode',\n" +\
# " maintainer='Durham Goode',\n"]
# Please note that this list in fact contains just two elements, the
# second string is manually separated into individual lines as they
# would've been printed.
# It can be seen that the first element of the list starts with 'diff'
# and is of no interest since it does not contain the actual changes.
# The second element however has the changes that happened to a some
# file separated by '\n', so we want to parse that, find which ones
# start with '+' or '-', group them into blocks and match the regex
# against those blocks.
if diffitem.startswith('diff'):
# title line that start diff for some file, does not contain
# the diff itself. the next iteration of this loop wil hit the
# actual diff line
continue
# a changeblock is a set of consequtive change lines which share the
# same sign (+/-). we want to join those lines into blocks in order
# to be able to perform multi-line regex matches
changeblocks, currentblock, currentsign = [], [], ''
lines = diffitem.split('\n')
if len(lines) < 2:
# a minimum of two lines is needed to contain filenames
continue
filenamelines = lines[:2]
# an extra iteration is necessary to save the last block
for line in lines[2:] + ["@"]:
if not line:
continue
if line[0] == currentsign:
# current block continues
currentblock.append(line[1:])
continue
if currentsign:
# we know that current block is over so we should save it
changeblocks.append((currentsign, "\n".join(currentblock)))
if line[0] == '+' or line[0] == '-':
# new block starts here
currentsign = line[0]
currentblock = [line[1:]]
else:
# other lines include the ones that start with @@ and
# contain context line numbers or unchanged context lines
# from source file.
currentsign, currentblock = '', []
beforetablines = (ln.split("\t", 1)[0] for ln in filenamelines)
filenames = (ln.split('/', 1)[1] for ln in beforetablines if '/' in ln)
if files and not any(fn for fn in filenames if fn in files):
# this part of diff does not touch any of the files we're
# interested in
continue
for mod, change in changeblocks:
match = regexp.search(change)
if not match:
continue
if mod == '+':
addcount += 1
else:
removecount += 1
return addcount, removecount