mirror of
https://github.com/facebook/sapling.git
synced 2024-10-06 23:07:18 +03:00
grepdiff: an revset for code archeology
Differential Revision: https://phabricator.intern.facebook.com/D3392198
This commit is contained in:
parent
13e132c771
commit
5628e94ca0
168
grepdiff.py
Normal file
168
grepdiff.py
Normal file
@ -0,0 +1,168 @@
|
||||
# grepdiff.py
|
||||
#
|
||||
# Copyright 2016 Facebook, Inc.
|
||||
#
|
||||
# This software may be used and distributed according to the terms of the
|
||||
# GNU General Public License version 2 or any later version.
|
||||
|
||||
import re
|
||||
|
||||
from mercurial import pathutil, registrar, revset, util
|
||||
from mercurial.i18n import _
|
||||
|
||||
revsetpredicate = registrar.revsetpredicate()
|
||||
|
||||
touchprefix = 'touch'
|
||||
prefixtoprocessors = {
|
||||
"add": lambda adds, removes: adds > 0,
|
||||
"remove": lambda adds, removes: removes > 0,
|
||||
"delta": lambda adds, removes: adds != removes,
|
||||
touchprefix: lambda adds, removes: adds > 0 or removes > 0,
|
||||
"inc": lambda adds, removes: adds > removes,
|
||||
"dec": lambda adds, removes: adds < removes
|
||||
}
|
||||
|
||||
def getpatternandprocessor(args):
|
||||
"""Parse prefix and pattern from the provided arguments
|
||||
|
||||
Example argument could be args[0][1] == 'add:hello world'"""
|
||||
pattern = args[0][1]
|
||||
prefix = touchprefix
|
||||
patstart = 0
|
||||
if ':' in pattern:
|
||||
patstart = pattern.index(':') + 1
|
||||
prefix = pattern[:patstart - 1]
|
||||
if prefix and prefix not in prefixtoprocessors:
|
||||
repo.ui.warning(_('treating %s as a part of pattern') % (prefix + ':'))
|
||||
prefix = touchprefix
|
||||
else:
|
||||
pattern = pattern[patstart:]
|
||||
processor = prefixtoprocessors[prefix]
|
||||
# currently this regex always has re.M and re.I flags, we might
|
||||
# want to make it configurable in future
|
||||
pattern = util.re.compile(pattern, re.M | re.I)
|
||||
return pattern, processor
|
||||
|
||||
@revsetpredicate('grepdiff(pattern, [file], ...)')
|
||||
def grepdiffpredicate(repo, subset, x):
|
||||
"""grepdiff: a revset for code archeology
|
||||
|
||||
Sample usages are:
|
||||
$ hg log --rev "grepdiff('add:command')" mercurial/commands.py
|
||||
will only match commits that add 'command' somewhere in the diff
|
||||
$ hg log --rev "grepdiff('remove:command')" mercurial/commands.py
|
||||
will match commits which remove 'command' somewhere in the diff
|
||||
$ hg log --rev "grepdiff('delta:command') mercurial/commands.py"
|
||||
will mathc commits where the number of 'command' adds is different
|
||||
from the number of 'command' removes in the diff
|
||||
$ hg log --rev "grepdiff('touch:command')"
|
||||
will only match commits which either add or remove 'command' at
|
||||
least once in the diff
|
||||
$ hg log --rev "grepdiff('inc:command')" folder/file1.py folder/file2.py
|
||||
will match commits which increase the number of occurrences
|
||||
of 'command' in the specified files
|
||||
$ hg log --rev "grepdiff('dec:command')"
|
||||
will match commits which decrease the number of occurrences
|
||||
of 'command'
|
||||
"""
|
||||
err = _("wrong set of arguments passed to grepdiff revset")
|
||||
args = revset.getargs(x, 1, -1, err)
|
||||
files = None
|
||||
if len(args) > 1:
|
||||
files = set(pathutil.canonpath(repo.root, repo.getcwd(), arg[1])
|
||||
for arg in args[1:])
|
||||
res = []
|
||||
pattern, processor = getpatternandprocessor(args)
|
||||
def matcher(rev):
|
||||
res = processor(*ctxaddsremoves(repo[rev], files, pattern))
|
||||
return res
|
||||
resset = subset.filter(matcher)
|
||||
return resset
|
||||
|
||||
def ctxaddsremoves(ctx, files, regexp):
|
||||
"""Check whether some context matches a given pattern
|
||||
|
||||
'ctx' is a context to check
|
||||
'files' is a set of repo-based filenames we're interested in (None
|
||||
indicates all files)
|
||||
'regexp' is a compiled regular expression against which to match"""
|
||||
addcount = 0
|
||||
removecount = 0
|
||||
for diffitem in ctx.diff():
|
||||
# ctx.diff() is a generator that returns a list of strings that are
|
||||
# supposed to be printed and some of them are concatenations of
|
||||
# multiple '\n'-separated lines. Here's an example of such a list:
|
||||
# ["diff --git a/setup.py b/setup.py\n",
|
||||
# "--- a/setup.py\n" +\
|
||||
# "+++ b/setup.py\n" +\
|
||||
# "@@ -1,7 +1,7 @@\n" +\
|
||||
# " from distutils.core import setup, Extension\n" +\
|
||||
# " \n" +\
|
||||
# " setup(\n" +\
|
||||
# "- name='fbhgextensions',\n" +\
|
||||
# "+ name='fbhgext',\n" +\
|
||||
# " version='0.1.0',\n" +\
|
||||
# " author='Durham Goode',\n" +\
|
||||
# " maintainer='Durham Goode',\n"]
|
||||
# Please note that this list in fact contains just two elements, the
|
||||
# second string is manually separated into individual lines as they
|
||||
# would've been printed.
|
||||
# It can be seen that the first element of the list starts with 'diff'
|
||||
# and is of no interest since it does not contain the actual changes.
|
||||
# The second element however has the changes that happened to a some
|
||||
# file separated by '\n', so we want to parse that, find which ones
|
||||
# start with '+' or '-', group them into blocks and match the regex
|
||||
# against those blocks.
|
||||
if diffitem.startswith('diff'):
|
||||
# title line that start diff for some file, does not contain
|
||||
# the diff itself. the next iteration of this loop wil hit the
|
||||
# actual diff line
|
||||
continue
|
||||
# a changeblock is a set of consequtive change lines which share the
|
||||
# same sign (+/-). we want to join those lines into blocks in order
|
||||
# to be able to perform multi-line regex matches
|
||||
changeblocks, currentblock, currentsign = [], [], ''
|
||||
lines = diffitem.split('\n')
|
||||
if len(lines) < 2:
|
||||
# a minimum of two lines is needed to contain filenames
|
||||
continue
|
||||
filenamelines = lines[:2]
|
||||
# an extra iteration is necessary to save the last block
|
||||
for line in lines[2:] + ["@"]:
|
||||
if not line:
|
||||
continue
|
||||
if line[0] == currentsign:
|
||||
# current block continues
|
||||
currentblock.append(line[1:])
|
||||
continue
|
||||
|
||||
if currentsign:
|
||||
# we know that current block is over so we should save it
|
||||
changeblocks.append((currentsign, "\n".join(currentblock)))
|
||||
|
||||
if line[0] == '+' or line[0] == '-':
|
||||
# new block starts here
|
||||
currentsign = line[0]
|
||||
currentblock = [line[1:]]
|
||||
else:
|
||||
# other lines include the ones that start with @@ and
|
||||
# contain context line numbers or unchanged context lines
|
||||
# from source file.
|
||||
currentsign, currentblock = '', []
|
||||
|
||||
beforetablines = (ln.split("\t", 1)[0] for ln in filenamelines)
|
||||
filenames = (ln.split('/', 1)[1] for ln in beforetablines if '/' in ln)
|
||||
if files and not any(fn for fn in filenames if fn in files):
|
||||
# this part of diff does not touch any of the files we're
|
||||
# interested in
|
||||
continue
|
||||
for mod, change in changeblocks:
|
||||
match = regexp.search(change)
|
||||
if not match:
|
||||
continue
|
||||
if mod == '+':
|
||||
addcount += 1
|
||||
else:
|
||||
removecount += 1
|
||||
return addcount, removecount
|
||||
|
127
tests/test-grepdiff.t
Normal file
127
tests/test-grepdiff.t
Normal file
@ -0,0 +1,127 @@
|
||||
$ PYTHONPATH=$TESTDIR/..:$PYTHONPATH
|
||||
$ echo "[extensions]" >> $HGRCPATH
|
||||
$ echo "grepdiff = $TESTDIR/../grepdiff.py" >> $HGRCPATH
|
||||
|
||||
Setup repo
|
||||
|
||||
$ hg init repo
|
||||
$ cd repo
|
||||
|
||||
Commit some things
|
||||
$ echo "string one" > root
|
||||
$ hg ci -Am "string one in root"
|
||||
adding root
|
||||
$ echo "string one" > a
|
||||
$ hg ci -Am "string one in a"
|
||||
adding a
|
||||
$ echo "string two" > root
|
||||
$ hg ci -m "string two in root"
|
||||
$ echo "string three" >> a
|
||||
$ hg ci -m "string three in a"
|
||||
$ echo "int" >> root
|
||||
$ hg ci -m "int in root"
|
||||
$ echo "string" >> a
|
||||
$ hg ci -m "string in a"
|
||||
|
||||
Perform a grepdiff without a modifier over the whole repo
|
||||
$ hg log --rev "grepdiff('string \wne')" -p
|
||||
changeset: 0:66a661e5ba18
|
||||
user: test
|
||||
date: Thu Jan 01 00:00:00 1970 +0000
|
||||
summary: string one in root
|
||||
|
||||
diff -r 000000000000 -r 66a661e5ba18 root
|
||||
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
|
||||
+++ b/root Thu Jan 01 00:00:00 1970 +0000
|
||||
@@ -0,0 +1,1 @@
|
||||
+string one
|
||||
|
||||
changeset: 1:e4e29c42d1c9
|
||||
user: test
|
||||
date: Thu Jan 01 00:00:00 1970 +0000
|
||||
summary: string one in a
|
||||
|
||||
diff -r 66a661e5ba18 -r e4e29c42d1c9 a
|
||||
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
|
||||
+++ b/a Thu Jan 01 00:00:00 1970 +0000
|
||||
@@ -0,0 +1,1 @@
|
||||
+string one
|
||||
|
||||
changeset: 2:f90b5c1dcd6f
|
||||
user: test
|
||||
date: Thu Jan 01 00:00:00 1970 +0000
|
||||
summary: string two in root
|
||||
|
||||
diff -r e4e29c42d1c9 -r f90b5c1dcd6f root
|
||||
--- a/root Thu Jan 01 00:00:00 1970 +0000
|
||||
+++ b/root Thu Jan 01 00:00:00 1970 +0000
|
||||
@@ -1,1 +1,1 @@
|
||||
-string one
|
||||
+string two
|
||||
|
||||
Perform a "remove" grepdiff over a limited set of files
|
||||
$ hg log --rev "grepdiff('remove:string', root)" -p
|
||||
changeset: 2:f90b5c1dcd6f
|
||||
user: test
|
||||
date: Thu Jan 01 00:00:00 1970 +0000
|
||||
summary: string two in root
|
||||
|
||||
diff -r e4e29c42d1c9 -r f90b5c1dcd6f root
|
||||
--- a/root Thu Jan 01 00:00:00 1970 +0000
|
||||
+++ b/root Thu Jan 01 00:00:00 1970 +0000
|
||||
@@ -1,1 +1,1 @@
|
||||
-string one
|
||||
+string two
|
||||
|
||||
|
||||
Perform an "add" grepdiff over the whole repo
|
||||
$ hg log --rev "grepdiff('add:two')" -p
|
||||
changeset: 2:f90b5c1dcd6f
|
||||
user: test
|
||||
date: Thu Jan 01 00:00:00 1970 +0000
|
||||
summary: string two in root
|
||||
|
||||
diff -r e4e29c42d1c9 -r f90b5c1dcd6f root
|
||||
--- a/root Thu Jan 01 00:00:00 1970 +0000
|
||||
+++ b/root Thu Jan 01 00:00:00 1970 +0000
|
||||
@@ -1,1 +1,1 @@
|
||||
-string one
|
||||
+string two
|
||||
|
||||
|
||||
Perform a "delta" grepdiff over the whole repo with another revset used
|
||||
$ hg log --rev "(4:0) and grepdiff('delta:string')" -p
|
||||
changeset: 3:0173332b5f0e
|
||||
user: test
|
||||
date: Thu Jan 01 00:00:00 1970 +0000
|
||||
summary: string three in a
|
||||
|
||||
diff -r f90b5c1dcd6f -r 0173332b5f0e a
|
||||
--- a/a Thu Jan 01 00:00:00 1970 +0000
|
||||
+++ b/a Thu Jan 01 00:00:00 1970 +0000
|
||||
@@ -1,1 +1,2 @@
|
||||
string one
|
||||
+string three
|
||||
|
||||
changeset: 1:e4e29c42d1c9
|
||||
user: test
|
||||
date: Thu Jan 01 00:00:00 1970 +0000
|
||||
summary: string one in a
|
||||
|
||||
diff -r 66a661e5ba18 -r e4e29c42d1c9 a
|
||||
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
|
||||
+++ b/a Thu Jan 01 00:00:00 1970 +0000
|
||||
@@ -0,0 +1,1 @@
|
||||
+string one
|
||||
|
||||
changeset: 0:66a661e5ba18
|
||||
user: test
|
||||
date: Thu Jan 01 00:00:00 1970 +0000
|
||||
summary: string one in root
|
||||
|
||||
diff -r 000000000000 -r 66a661e5ba18 root
|
||||
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
|
||||
+++ b/root Thu Jan 01 00:00:00 1970 +0000
|
||||
@@ -0,0 +1,1 @@
|
||||
+string one
|
||||
|
Loading…
Reference in New Issue
Block a user