sapling/hgext/grepdiff.py
Jun Wu 584656dff3 codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).

Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.

Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.

An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.

As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.

Reviewed By: wez, phillco, simpkins, pkaush, singhsrb

Differential Revision: D8173629

fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-25 22:17:29 -07:00

174 lines
6.8 KiB
Python

# grepdiff.py
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import re
from mercurial import pathutil, registrar, revset, util
from mercurial.i18n import _
revsetpredicate = registrar.revsetpredicate()
touchprefix = "touch"
prefixtoprocessors = {
"add": lambda adds, removes: adds > 0,
"remove": lambda adds, removes: removes > 0,
"delta": lambda adds, removes: adds != removes,
touchprefix: lambda adds, removes: adds > 0 or removes > 0,
"inc": lambda adds, removes: adds > removes,
"dec": lambda adds, removes: adds < removes,
}
def getpatternandprocessor(repo, args):
"""Parse prefix and pattern from the provided arguments
Example argument could be args[0][1] == 'add:hello world'"""
pattern = args[0][1]
prefix = touchprefix
patstart = 0
if ":" in pattern:
patstart = pattern.index(":") + 1
prefix = pattern[: patstart - 1]
if prefix and prefix not in prefixtoprocessors:
repo.ui.warning(_("treating %s as a part of pattern") % (prefix + ":"))
prefix = touchprefix
else:
pattern = pattern[patstart:]
processor = prefixtoprocessors[prefix]
# currently this regex always has re.M and re.I flags, we might
# want to make it configurable in future
pattern = util.re.compile(pattern, re.M | re.I)
return pattern, processor
@revsetpredicate("grepdiff(pattern, [file], ...)")
def grepdiffpredicate(repo, subset, x):
"""grepdiff: a revset for code archeology
Sample usages are:
$ hg log --rev "grepdiff('add:command')" mercurial/commands.py
will only match changesets that add 'command' somewhere in the diff
$ hg log --rev "grepdiff('remove:command')" mercurial/commands.py
will match changesets which remove 'command' somewhere in the diff
$ hg log --rev "grepdiff('delta:command') mercurial/commands.py"
will mathc changesets where the number of 'command' adds is different
from the number of 'command' removes in the diff
$ hg log --rev "grepdiff('touch:command')"
will only match changesets which either add or remove 'command' at
least once in the diff
$ hg log --rev "grepdiff('inc:command')" folder/file1.py folder/file2.py
will match changesets which increase the number of occurrences
of 'command' in the specified files
$ hg log --rev "grepdiff('dec:command')"
will match changesets which decrease the number of occurrences
of 'command'
"""
err = _("wrong set of arguments passed to grepdiff revset")
args = revset.getargs(x, 1, -1, err)
files = None
if len(args) > 1:
files = set(
pathutil.canonpath(repo.root, repo.getcwd(), arg[1]) for arg in args[1:]
)
pattern, processor = getpatternandprocessor(repo, args)
def matcher(rev):
res = processor(*ctxaddsremoves(repo[rev], files, pattern))
return res
resset = subset.filter(matcher)
return resset
def ctxaddsremoves(ctx, files, regexp):
"""Check whether some context matches a given pattern
'ctx' is a context to check
'files' is a set of repo-based filenames we're interested in (None
indicates all files)
'regexp' is a compiled regular expression against which to match"""
addcount = 0
removecount = 0
filenamelines = []
for diffitem in ctx.diff():
# ctx.diff() is a generator that returns a list of strings that are
# supposed to be printed and some of them are concatenations of
# multiple '\n'-separated lines. Here's an example of such a list:
# ["diff --git a/setup.py b/setup.py\n" +\
# "--- a/setup.py\n" +\
# "+++ b/setup.py\n",
# "@@ -1,7 +1,7 @@\n" +\
# " from distutils.core import setup, Extension\n" +\
# " \n" +\
# " setup(\n" +\
# "- name='fbhgextensions',\n" +\
# "+ name='fbhgext',\n" +\
# " version='0.1.0',\n" +\
# " author='Durham Goode',\n" +\
# " maintainer='Durham Goode',\n"]
# Please note that this list in fact contains just two elements, the
# second string is manually separated into individual lines as they
# would've been printed.
# It can be seen that the first element of the list starts with 'diff'
# and contains the filenames for the upcoming chunks.
# The second element however has the changes that happened to the
# file separated by '\n', so we want to parse that, find which ones
# start with '+' or '-', group them into blocks and match the regex
# against those blocks.
if diffitem.startswith("diff"):
# title line that start diff for some file, does not contain
# the diff itself. the next iteration of this loop wil hit the
# actual diff line
lines = diffitem.split("\n")
filenamelines = lines[1:3]
continue
# a changeblock is a set of consequtive change lines which share the
# same sign (+/-). we want to join those lines into blocks in order
# to be able to perform multi-line regex matches
changeblocks, currentblock, currentsign = [], [], ""
lines = diffitem.split("\n")
# an extra iteration is necessary to save the last block
for line in lines + ["@"]:
if not line:
continue
if line[0] == currentsign:
# current block continues
currentblock.append(line[1:])
continue
if currentsign:
# we know that current block is over so we should save it
changeblocks.append((currentsign, "\n".join(currentblock)))
if line[0] == "+" or line[0] == "-":
# new block starts here
currentsign = line[0]
currentblock = [line[1:]]
else:
# other lines include the ones that start with @@ and
# contain context line numbers or unchanged context lines
# from source file.
currentsign, currentblock = "", []
beforetablines = (ln.split("\t", 1)[0] for ln in filenamelines)
filenames = (ln.split("/", 1)[1] for ln in beforetablines if "/" in ln)
if files and not any(fn for fn in filenames if fn in files):
# this part of diff does not touch any of the files we're
# interested in
continue
for mod, change in changeblocks:
match = regexp.search(change)
if not match:
continue
if mod == "+":
addcount += 1
else:
removecount += 1
return addcount, removecount