mirror of
https://github.com/facebook/sapling.git
synced 2024-10-11 09:17:30 +03:00
make hg grep aware of big grep
Summary: This isn't 100% finished and is missing handling some error cases, but I wanted to put it out there to get some knee-jerk reactions and suggestions on how to handle some things. This diff adds a `grep.usebiggrep` config option to `hg grep`. The idea is that we'll default this to on when the repo requires `eden` (or when we pull in the eden specific site configuration). When run in big-grep mode, we'll first ask big grep for the results and then compute the local differences and run only those through the local grep process, and avoid materializing files locally. I'm not 100% sure if the current `repo.status` call will yield the correct results for the case where the current rev is behind the biggrep corpus revision. Reviewed By: quark-zju Differential Revision: D8416360 fbshipit-source-id: 952badb7a7ec74096b5c77cd79aa25e2327a7659
This commit is contained in:
parent
8fc182554c
commit
70cdd4a413
@ -59,6 +59,10 @@ Config::
|
|||||||
|
|
||||||
# output new hashes when nodes get updated
|
# output new hashes when nodes get updated
|
||||||
showupdated = False
|
showupdated = False
|
||||||
|
|
||||||
|
[grep]
|
||||||
|
# Use external grep index
|
||||||
|
usebiggrep = False
|
||||||
"""
|
"""
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
@ -91,7 +95,7 @@ from mercurial import (
|
|||||||
util,
|
util,
|
||||||
)
|
)
|
||||||
from mercurial.i18n import _
|
from mercurial.i18n import _
|
||||||
from mercurial.node import short
|
from mercurial.node import bin, short
|
||||||
|
|
||||||
from . import rebase
|
from . import rebase
|
||||||
|
|
||||||
@ -669,6 +673,34 @@ def histgrep(ui, repo, pattern, *pats, **opts):
|
|||||||
return commands.grep(ui, repo, pattern, *pats, **opts)
|
return commands.grep(ui, repo, pattern, *pats, **opts)
|
||||||
|
|
||||||
|
|
||||||
|
ansiregex = re.compile(
|
||||||
|
(
|
||||||
|
r"\x1b("
|
||||||
|
r"(\[\??\d+[hl])|"
|
||||||
|
r"([=<>a-kzNM78])|"
|
||||||
|
r"([\(\)][a-b0-2])|"
|
||||||
|
r"(\[\d{0,2}[ma-dgkjqi])|"
|
||||||
|
r"(\[\d+;\d+[hfy]?)|"
|
||||||
|
r"(\[;?[hf])|"
|
||||||
|
r"(#[3-68])|"
|
||||||
|
r"([01356]n)|"
|
||||||
|
r"(O[mlnp-z]?)|"
|
||||||
|
r"(/Z)|"
|
||||||
|
r"(\d+)|"
|
||||||
|
r"(\[\?\d;\d0c)|"
|
||||||
|
r"(\d;\dR))"
|
||||||
|
),
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def stripansiescapes(s):
|
||||||
|
"""Removes ANSI escape sequences from a string.
|
||||||
|
Borrowed from https://stackoverflow.com/a/45448194/149111
|
||||||
|
"""
|
||||||
|
return ansiregex.sub("", s)
|
||||||
|
|
||||||
|
|
||||||
del commands.table["grep"]
|
del commands.table["grep"]
|
||||||
|
|
||||||
|
|
||||||
@ -729,36 +761,60 @@ def grep(ui, repo, pattern, *pats, **opts):
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# If true, we'll use the `bgr` tool to perform the grep against some
|
||||||
|
# externally maintained index. We don't provide an implementation
|
||||||
|
# of that tool with this repo, just the optional client interface.
|
||||||
|
biggrep = ui.configbool("grep", "usebiggrep")
|
||||||
|
|
||||||
|
# Ask big grep to strip out the corpus dir (stripdir) and to include
|
||||||
|
# the corpus revision on the first line.
|
||||||
|
biggrepcmd = ["bgr", "--stripdir", "-r", "--expression", pattern]
|
||||||
|
|
||||||
|
args = []
|
||||||
|
|
||||||
if opts.get("after_context"):
|
if opts.get("after_context"):
|
||||||
cmd.append("-A")
|
args.append("-A")
|
||||||
cmd.append(opts.get("after_context"))
|
args.append(opts.get("after_context"))
|
||||||
if opts.get("before_context"):
|
if opts.get("before_context"):
|
||||||
cmd.append("-B")
|
args.append("-B")
|
||||||
cmd.append(opts.get("before_context"))
|
args.append(opts.get("before_context"))
|
||||||
if opts.get("context"):
|
if opts.get("context"):
|
||||||
cmd.append("-C")
|
args.append("-C")
|
||||||
cmd.append(opts.get("context"))
|
args.append(opts.get("context"))
|
||||||
if opts.get("ignore_case"):
|
if opts.get("ignore_case"):
|
||||||
cmd.append("-i")
|
args.append("-i")
|
||||||
if opts.get("files_with_matches"):
|
if opts.get("files_with_matches"):
|
||||||
cmd.append("-l")
|
args.append("-l")
|
||||||
if opts.get("line_number"):
|
if opts.get("line_number"):
|
||||||
cmd.append("-n")
|
cmd.append("-n")
|
||||||
if opts.get("invert_match"):
|
if opts.get("invert_match"):
|
||||||
|
if biggrep:
|
||||||
|
raise error.Abort("Cannot use invert_match option with big grep")
|
||||||
cmd.append("-v")
|
cmd.append("-v")
|
||||||
if opts.get("word_regexp"):
|
if opts.get("word_regexp"):
|
||||||
cmd.append("-w")
|
cmd.append("-w")
|
||||||
|
biggrepcmd[4] = "\\b%s\\b" % pattern
|
||||||
if opts.get("extended_regexp"):
|
if opts.get("extended_regexp"):
|
||||||
cmd.append("-E")
|
cmd.append("-E")
|
||||||
|
# re2 is already mostly compatible by default, so there are no options
|
||||||
|
# to apply for this.
|
||||||
if opts.get("fixed_strings"):
|
if opts.get("fixed_strings"):
|
||||||
cmd.append("-F")
|
cmd.append("-F")
|
||||||
|
# using bgs rather than bgr switches the engine to fixed string matches
|
||||||
|
biggrepcmd[0] = "bgs"
|
||||||
if opts.get("perl_regexp"):
|
if opts.get("perl_regexp"):
|
||||||
cmd.append("-P")
|
cmd.append("-P")
|
||||||
|
# re2 is already mostly pcre compatible, so there are no options
|
||||||
|
# to apply for this.
|
||||||
|
|
||||||
|
biggrepcmd += args
|
||||||
|
cmd += args
|
||||||
|
|
||||||
# color support, using the color extension
|
# color support, using the color extension
|
||||||
colormode = getattr(ui, "_colormode", "")
|
colormode = getattr(ui, "_colormode", "")
|
||||||
if colormode == "ansi":
|
if colormode == "ansi":
|
||||||
cmd.append("--color=always")
|
cmd.append("--color=always")
|
||||||
|
biggrepcmd.append("--color=on")
|
||||||
|
|
||||||
# Copy match specific options
|
# Copy match specific options
|
||||||
match_opts = {}
|
match_opts = {}
|
||||||
@ -778,21 +834,129 @@ def grep(ui, repo, pattern, *pats, **opts):
|
|||||||
# (passed in by xargs) as filenames.
|
# (passed in by xargs) as filenames.
|
||||||
cmd.append("--")
|
cmd.append("--")
|
||||||
ui.pager("grep")
|
ui.pager("grep")
|
||||||
p = subprocess.Popen(
|
|
||||||
cmd, bufsize=-1, close_fds=util.closefds, stdin=subprocess.PIPE
|
|
||||||
)
|
|
||||||
|
|
||||||
write = p.stdin.write
|
if biggrep:
|
||||||
|
reporoot = os.path.dirname(repo.path)
|
||||||
|
p = subprocess.Popen(
|
||||||
|
biggrepcmd,
|
||||||
|
bufsize=-1,
|
||||||
|
close_fds=util.closefds,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
cwd=reporoot,
|
||||||
|
)
|
||||||
|
out, err = p.communicate()
|
||||||
|
lines = out.rstrip().split("\n")
|
||||||
|
# the first line has the revision for the corpus; parse it out
|
||||||
|
# the format is "#HASH:timestamp"
|
||||||
|
corpusrev = lines[0][1:41]
|
||||||
|
lines = lines[1:]
|
||||||
|
|
||||||
|
resultsbyfile = {}
|
||||||
|
includelineno = opts.get("line_number")
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
filename, lineno, colno, context = line.split(":", 3)
|
||||||
|
unescapedfilename = stripansiescapes(filename)
|
||||||
|
|
||||||
|
# filter to just the files that match the list supplied
|
||||||
|
# by the caller
|
||||||
|
if m(unescapedfilename):
|
||||||
|
# relativize the path to the CWD. Note that `filename` will
|
||||||
|
# often have escape sequences, so we do a substring replacement
|
||||||
|
filename = filename.replace(unescapedfilename, m.rel(unescapedfilename))
|
||||||
|
|
||||||
|
if unescapedfilename not in resultsbyfile:
|
||||||
|
resultsbyfile[unescapedfilename] = []
|
||||||
|
|
||||||
|
# re-assemble the output, but omit the column number
|
||||||
|
if includelineno:
|
||||||
|
resultsbyfile[unescapedfilename].append(
|
||||||
|
"%s:%s:%s\n" % (filename, lineno, context)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
resultsbyfile[unescapedfilename].append(
|
||||||
|
"%s:%s\n" % (filename, context)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Now check to see what has changed since the corpusrev
|
||||||
|
# we're going to need to grep those and stitch the results together
|
||||||
|
try:
|
||||||
|
changes = repo.status(bin(corpusrev), None, m)
|
||||||
|
except error.RepoLookupError:
|
||||||
|
# TODO: can we trigger a commit cloud fetch for this case?
|
||||||
|
|
||||||
|
# print the results we've gathered so far. We're not sure
|
||||||
|
# how things differ, so we'll follow up with a warning.
|
||||||
|
for lines in resultsbyfile.values():
|
||||||
|
for line in lines:
|
||||||
|
ui.write(line)
|
||||||
|
|
||||||
|
ui.warn(
|
||||||
|
_(
|
||||||
|
"The results above are based on revision %s\n"
|
||||||
|
"which is not available locally and thus may be inaccurate.\n"
|
||||||
|
"To get accurate results, run `hg pull` and re-run "
|
||||||
|
"your grep.\n"
|
||||||
|
)
|
||||||
|
% corpusrev
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
# which files we're going to search locally
|
||||||
|
filestogrep = set()
|
||||||
|
|
||||||
|
# files that have been changed or added need to be searched again
|
||||||
|
for f in changes.modified:
|
||||||
|
resultsbyfile.pop(f, None)
|
||||||
|
filestogrep.add(f)
|
||||||
|
for f in changes.added:
|
||||||
|
resultsbyfile.pop(f, None)
|
||||||
|
filestogrep.add(f)
|
||||||
|
|
||||||
|
# files that have been removed since the corpus rev cannot match
|
||||||
|
for f in changes.removed:
|
||||||
|
resultsbyfile.pop(f, None)
|
||||||
|
for f in changes.deleted:
|
||||||
|
resultsbyfile.pop(f, None)
|
||||||
|
|
||||||
|
# Having filtered out the changed files from the big grep results,
|
||||||
|
# we can now print those that remain.
|
||||||
|
for lines in resultsbyfile.values():
|
||||||
|
for line in lines:
|
||||||
|
ui.write(line)
|
||||||
|
|
||||||
|
# pass on any changed files to the local grep
|
||||||
|
if len(filestogrep) > 0:
|
||||||
|
# Ensure that the biggrep results are flushed before we
|
||||||
|
# start to intermingle with the local grep process output
|
||||||
|
ui.flush()
|
||||||
|
return _rungrep(cmd, filestogrep, m)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
ds = repo.dirstate
|
ds = repo.dirstate
|
||||||
getkind = stat.S_IFMT
|
getkind = stat.S_IFMT
|
||||||
lnkkind = stat.S_IFLNK
|
lnkkind = stat.S_IFLNK
|
||||||
results = ds.walk(m, subrepos=[], unknown=False, ignored=False)
|
results = ds.walk(m, subrepos=[], unknown=False, ignored=False)
|
||||||
|
|
||||||
|
files = []
|
||||||
for f in sorted(results.keys()):
|
for f in sorted(results.keys()):
|
||||||
st = results[f]
|
st = results[f]
|
||||||
# skip symlinks and removed files
|
# skip symlinks and removed files
|
||||||
if st is None or getkind(st.st_mode) == lnkkind:
|
if st is None or getkind(st.st_mode) == lnkkind:
|
||||||
continue
|
continue
|
||||||
write(m.rel(f) + "\0")
|
files.append(f)
|
||||||
|
|
||||||
|
return _rungrep(cmd, files, m)
|
||||||
|
|
||||||
|
|
||||||
|
def _rungrep(cmd, files, match):
|
||||||
|
p = subprocess.Popen(
|
||||||
|
cmd, bufsize=-1, close_fds=util.closefds, stdin=subprocess.PIPE
|
||||||
|
)
|
||||||
|
write = p.stdin.write
|
||||||
|
for f in files:
|
||||||
|
write(match.rel(f) + "\0")
|
||||||
|
|
||||||
p.stdin.close()
|
p.stdin.close()
|
||||||
return p.wait()
|
return p.wait()
|
||||||
|
Loading…
Reference in New Issue
Block a user