matcher: use tree matcher if possible

Summary:
For glob patterns, tree matcher has a better 'visitdir' implementation that
can greatly speed up some pattern matching. So try to use it instead.

This should make things like `hg status -n -c "glob:path/to/something**"` much
faster.

With this change:

  % ~/hg/hg files 'glob:skycastle/src/test/resources**' --time --pager=off | wc -l                                                                                                                                                         :(
  time: real 0.110 secs (user 0.070+0.000 sys 0.020+0.000)
  118

Without this change:

  % hg files 'glob:skycastle/src/test/resources**' --time --pager=off | wc -l
  time: real 16.700 secs (user 15.660+0.000 sys 1.310+0.000)
  118

A config was added so we can turn this off if it causes issues.

The fsmonitor code was slightly changed since treematcher.visitdir does not
like trailing slash.

Reviewed By: matthewdippel

Differential Revision: D18305373

fbshipit-source-id: 2ff927041ba8bab6912f125a7ae465be1a0ea659
This commit is contained in:
Jun Wu 2019-11-12 15:33:03 -08:00 committed by Facebook Github Bot
parent 04613287e2
commit 0cb781519d
8 changed files with 181 additions and 64 deletions

View File

@ -365,7 +365,7 @@ def _walk(self, match, event):
ignorevisitdir = self.dirstate._ignore.visitdir
def dirfilter(path):
result = ignorevisitdir(path)
result = ignorevisitdir(path.rstrip("/"))
return result == "all"
nonnormalset = self.dirstate._map.nonnormalsetfiltered(dirfilter)

View File

@ -4496,7 +4496,7 @@ def parents(ui, repo, file_=None, **opts):
if file_:
m = scmutil.match(ctx, (file_,), opts)
if m.anypats() or len(m.files()) != 1:
if len(m.files()) != 1:
raise error.Abort(_("can only specify an explicit filename"))
file_ = m.files()[0]
filenodes = []

View File

@ -289,6 +289,7 @@ coreconfigitem("experimental", "sparse-read", default=False)
coreconfigitem("experimental", "sparse-read.density-threshold", default=0.25)
coreconfigitem("experimental", "sparse-read.min-gap-size", default="256K")
coreconfigitem("experimental", "treemanifest", default=False)
coreconfigitem("experimental", "treematcher", default=True)
coreconfigitem("experimental", "uncommitondirtywdir", default=True)
coreconfigitem("experimental", "xdiff", default=True)
coreconfigitem("extensions", ".*", default=None, generic=True)

View File

@ -1173,9 +1173,10 @@ def _dispatch(req):
elif rpath:
ui.warn(_("warning: --repository ignored\n"))
from . import mdiff
from . import mdiff, match as matchmod
mdiff.init(ui)
matchmod.init(ui)
ui.log("command", "%s\n", msg)
if repo:

View File

@ -794,13 +794,109 @@ def normalizerootdir(dir, funcname):
return dir
def _kindpatstoglobs(kindpats, recursive=False):
"""Attempt to convert 'kindpats' to glob patterns that can be used in a
treematcher.
kindpats should be already normalized to be relative to repo root.
If recursive is True, `glob:a*` will match both `a1/b` and `a1`, otherwise
`glob:a*` will only match `a1` but not `a1/b`.
Return None if there are unsupported patterns (ex. regular expressions).
"""
if not _usetreematcher:
return None
globs = []
for kindpat in kindpats:
kind, pat = kindpat[0:2]
if kind == "re":
# Attempt to convert the re pat to globs
reglobs = _convertretoglobs(pat)
if reglobs is not None:
globs += reglobs
else:
return None
elif kind == "glob":
# The treematcher (man gitignore) does not support csh-style
# brackets (ex. "{a,b,c}"). Expand the brackets to patterns.
for subpat in pathmatcher.expandcurlybrackets(pat):
normalized = pathmatcher.normalizeglob(subpat)
if recursive:
normalized = _makeglobrecursive(normalized)
globs.append(normalized)
elif kind == "path":
if pat == ".":
# Special case. Comes from `util.normpath`.
pat = ""
else:
pat = pathmatcher.plaintoglob(pat)
pat = _makeglobrecursive(pat)
globs.append(pat)
else:
return None
return globs
def _makeglobrecursive(pat):
"""Make a glob pattern recursive by appending "/**" to it"""
if pat.endswith("/") or not pat:
return pat + "**"
else:
return pat + "/**"
# re:x/(?!y/)
# meaning: include x, but not x/y.
_repat1 = re.compile(r"^\^?([\w._/]+)/\(\?\!([\w._/]+)/?\)$")
# re:x/(?:.*/)?y
# meaning: glob:x/**/y
_repat2 = re.compile(r"^\^?([\w._/]+)/\(\?:\.\*/\)\?([\w._]+)(?:\(\?\:\/\|\$\))?$")
def _convertretoglobs(repat):
"""Attempt to convert a regular expression pattern to glob patterns.
A single regular expression pattern might be converted into multiple
glob patterns.
Return None if conversion is unsupported.
>>> _convertretoglobs("abc*") is None
True
>>> _convertretoglobs("xx/yy/(?!zz/kk)")
['xx/yy/**', '!xx/yy/zz/kk/**']
>>> _convertretoglobs("x/y/(?:.*/)?BUCK")
['x/y/**/BUCK']
"""
m = _repat1.match(repat)
if m:
prefix, excluded = m.groups()
return ["%s/**" % prefix, "!%s/%s/**" % (prefix, excluded)]
m = _repat2.match(repat)
if m:
prefix, name = m.groups()
return ["%s/**/%s" % (prefix, name)]
return None
class patternmatcher(basematcher):
def __init__(self, root, cwd, kindpats, ctx=None, badfn=None):
super(patternmatcher, self).__init__(root, cwd, badfn)
# kindpats are already normalized to be relative to repo-root.
# Can we use tree matcher?
rules = _kindpatstoglobs(kindpats, recursive=False)
if rules is not None:
matcher = treematcher(root, cwd, badfn=badfn, rules=rules)
# Replace self to 'matcher'.
self.__dict__ = matcher.__dict__
self.__class__ = matcher.__class__
else:
self._prefix = _prefix(kindpats)
self._pats, self.matchfn = _buildmatch(ctx, kindpats, "$", root)
self._files = _explicitfiles(kindpats)
self._prefix = _prefix(kindpats)
self._pats, self.matchfn = _buildmatch(ctx, kindpats, "$", root)
@propertycache
def _dirs(self):
@ -829,27 +925,35 @@ class includematcher(basematcher):
def __init__(self, root, cwd, kindpats, ctx=None, badfn=None):
super(includematcher, self).__init__(root, cwd, badfn)
self._pats, self.matchfn = _buildmatch(ctx, kindpats, "(?:/|$)", root)
# prefix is True if all patterns are recursive, so certain fast paths
# can be enabled. Unfortunately, it's too easy to break it (ex. by
# using "glob:*.c", "re:...", etc).
self._prefix = _prefix(kindpats)
roots, dirs = _rootsanddirs(kindpats)
# roots are directories which are recursively included.
# If self._prefix is True, then _roots can have a fast path for
# visitdir to return "all", marking things included unconditionally.
# If self._prefix is False, then that optimization is unsound because
# "roots" might contain entries that is not recursive (ex. roots will
# include "foo/bar" for pattern "glob:foo/bar/*.c").
self._roots = set(roots)
# dirs are directories which are non-recursively included.
# That is, files under that directory are included. But not
# subdirectories.
self._dirs = set(dirs)
# Try to use a more efficient visitdir implementation
visitdir = _buildvisitdir(kindpats)
if visitdir:
self.visitdir = visitdir
# Can we use tree matcher?
rules = _kindpatstoglobs(kindpats, recursive=True)
if rules is not None:
matcher = treematcher(root, cwd, badfn=badfn, rules=rules)
# Replace self to 'matcher'.
self.__dict__ = matcher.__dict__
self.__class__ = matcher.__class__
else:
self._pats, self.matchfn = _buildmatch(ctx, kindpats, "(?:/|$)", root)
# prefix is True if all patterns are recursive, so certain fast paths
# can be enabled. Unfortunately, it's too easy to break it (ex. by
# using "glob:*.c", "re:...", etc).
self._prefix = _prefix(kindpats)
roots, dirs = _rootsanddirs(kindpats)
# roots are directories which are recursively included.
# If self._prefix is True, then _roots can have a fast path for
# visitdir to return "all", marking things included unconditionally.
# If self._prefix is False, then that optimization is unsound because
# "roots" might contain entries that is not recursive (ex. roots will
# include "foo/bar" for pattern "glob:foo/bar/*.c").
self._roots = set(roots)
# dirs are directories which are non-recursively included.
# That is, files under that directory are included. But not
# subdirectories.
self._dirs = set(dirs)
# Try to use a more efficient visitdir implementation
visitdir = _buildvisitdir(kindpats)
if visitdir:
self.visitdir = visitdir
def visitdir(self, dir):
dir = normalizerootdir(dir, "visitdir")
@ -1518,3 +1622,11 @@ def readpatternfile(filepath, warn, sourceinfo=False):
patterns.append(linesyntax + line)
fp.close()
return patterns
_usetreematcher = True
def init(ui):
global _usetreematcher
_usetreematcher = ui.configbool("experimental", "treematcher")

View File

@ -93,8 +93,11 @@ cd dir; hg parents -r 2 ../a
$ cd ..
$ hg parents -r 2 glob:a
abort: can only specify an explicit filename
[255]
changeset: 1:d786049f033a
user: test
date: Thu Jan 01 00:00:01 1970 +0000
summary: a
merge working dir with 2 parents, hg parents c

View File

@ -7,5 +7,5 @@
The root directory ("") should not be ignored
$ hg debugshell -c 'print(repo.dirstate._ignore(""))'
False
$ hg debugshell -c 'print(repo.dirstate._ignore.visitdir(""))'
True

View File

@ -44,7 +44,7 @@
f mammals/Procyonidae/raccoon mammals/Procyonidae/raccoon
f mammals/skunk mammals/skunk
$ hg debugwalk -I.
matcher: <includematcher includes='(?:)'>
matcher: <treematcher rules=['**']>
f beans/black beans/black
f beans/borlotti beans/borlotti
f beans/kidney beans/kidney
@ -76,7 +76,7 @@
f mammals/Procyonidae/raccoon Procyonidae/raccoon
f mammals/skunk skunk
$ hg debugwalk -X ../beans
matcher: <differencematcher m1=<alwaysmatcher>, m2=<includematcher includes='(?:beans(?:/|$))'>>
matcher: <differencematcher m1=<alwaysmatcher>, m2=<treematcher rules=['beans/**']>>
f fennel ../fennel
f fenugreek ../fenugreek
f fiddlehead ../fiddlehead
@ -85,10 +85,10 @@
f mammals/Procyonidae/raccoon Procyonidae/raccoon
f mammals/skunk skunk
$ hg debugwalk -I '*k'
matcher: <includematcher includes='(?:mammals\\/[^/]*k(?:/|$))'>
matcher: <treematcher rules=['mammals/*k/**']>
f mammals/skunk skunk
$ hg debugwalk -I 'glob:*k'
matcher: <includematcher includes='(?:mammals\\/[^/]*k(?:/|$))'>
matcher: <treematcher rules=['mammals/*k/**']>
f mammals/skunk skunk
$ hg debugwalk -I 'relglob:*k'
matcher: <includematcher includes='(?:(?:|.*/)[^/]*k(?:/|$))'>
@ -109,7 +109,7 @@
f fenugreek ../fenugreek
f mammals/skunk skunk
$ hg debugwalk -I 'path:beans'
matcher: <includematcher includes='(?:beans(?:/|$))'>
matcher: <treematcher rules=['beans/**']>
f beans/black ../beans/black
f beans/borlotti ../beans/borlotti
f beans/kidney ../beans/kidney
@ -215,7 +215,7 @@
f mammals/Procyonidae/raccoon Procyonidae/raccoon
f mammals/skunk skunk
$ hg debugwalk -I.
matcher: <includematcher includes='(?:mammals(?:/|$))'>
matcher: <treematcher rules=['mammals/**']>
f mammals/Procyonidae/cacomistle Procyonidae/cacomistle
f mammals/Procyonidae/coatimundi Procyonidae/coatimundi
f mammals/Procyonidae/raccoon Procyonidae/raccoon
@ -263,7 +263,7 @@
$ cd ..
$ hg debugwalk -Ibeans
matcher: <includematcher includes='(?:beans(?:/|$))'>
matcher: <treematcher rules=['beans/**']>
f beans/black beans/black
f beans/borlotti beans/borlotti
f beans/kidney beans/kidney
@ -271,56 +271,56 @@
f beans/pinto beans/pinto
f beans/turtle beans/turtle
$ hg debugwalk -I '{*,{b,m}*/*}k'
matcher: <includematcher includes='(?:(?:[^/]*|(?:b|m)[^/]*\\/[^/]*)k(?:/|$))'>
matcher: <treematcher rules=['*k/**', 'b*/*k/**', 'm*/*k/**']>
f beans/black beans/black
f fenugreek fenugreek
f mammals/skunk mammals/skunk
$ hg debugwalk -Ibeans mammals
matcher: <intersectionmatcher m1=<patternmatcher patterns='(?:mammals(?:/|$))'>, m2=<includematcher includes='(?:beans(?:/|$))'>>
matcher: <intersectionmatcher m1=<patternmatcher patterns='(?:mammals(?:/|$))'>, m2=<treematcher rules=['beans/**']>>
$ hg debugwalk -Inon-existent
matcher: <includematcher includes='(?:non\\-existent(?:/|$))'>
matcher: <treematcher rules=['non-existent/**']>
$ hg debugwalk -Inon-existent -Ibeans/black
matcher: <includematcher includes='(?:non\\-existent(?:/|$)|beans\\/black(?:/|$))'>
matcher: <treematcher rules=['non-existent/**', 'beans/black/**']>
f beans/black beans/black
$ hg debugwalk -Ibeans beans/black
matcher: <intersectionmatcher m1=<patternmatcher patterns='(?:beans\\/black(?:/|$))'>, m2=<includematcher includes='(?:beans(?:/|$))'>>
matcher: <intersectionmatcher m1=<patternmatcher patterns='(?:beans\\/black(?:/|$))'>, m2=<treematcher rules=['beans/**']>>
f beans/black beans/black exact
$ hg debugwalk -Ibeans/black beans
matcher: <intersectionmatcher m1=<patternmatcher patterns='(?:beans(?:/|$))'>, m2=<includematcher includes='(?:beans\\/black(?:/|$))'>>
matcher: <intersectionmatcher m1=<patternmatcher patterns='(?:beans(?:/|$))'>, m2=<treematcher rules=['beans/black/**']>>
f beans/black beans/black
$ hg debugwalk -Xbeans/black beans
matcher: <differencematcher m1=<patternmatcher patterns='(?:beans(?:/|$))'>, m2=<includematcher includes='(?:beans\\/black(?:/|$))'>>
matcher: <differencematcher m1=<patternmatcher patterns='(?:beans(?:/|$))'>, m2=<treematcher rules=['beans/black/**']>>
f beans/borlotti beans/borlotti
f beans/kidney beans/kidney
f beans/navy beans/navy
f beans/pinto beans/pinto
f beans/turtle beans/turtle
$ hg debugwalk -Xbeans/black -Ibeans
matcher: <differencematcher m1=<includematcher includes='(?:beans(?:/|$))'>, m2=<includematcher includes='(?:beans\\/black(?:/|$))'>>
matcher: <differencematcher m1=<treematcher rules=['beans/**']>, m2=<treematcher rules=['beans/black/**']>>
f beans/borlotti beans/borlotti
f beans/kidney beans/kidney
f beans/navy beans/navy
f beans/pinto beans/pinto
f beans/turtle beans/turtle
$ hg debugwalk -Xbeans/black beans/black
matcher: <differencematcher m1=<patternmatcher patterns='(?:beans\\/black(?:/|$))'>, m2=<includematcher includes='(?:beans\\/black(?:/|$))'>>
matcher: <differencematcher m1=<patternmatcher patterns='(?:beans\\/black(?:/|$))'>, m2=<treematcher rules=['beans/black/**']>>
f beans/black beans/black exact
$ hg debugwalk -Xbeans/black -Ibeans/black
matcher: <differencematcher m1=<includematcher includes='(?:beans\\/black(?:/|$))'>, m2=<includematcher includes='(?:beans\\/black(?:/|$))'>>
matcher: <differencematcher m1=<treematcher rules=['beans/black/**']>, m2=<treematcher rules=['beans/black/**']>>
$ hg debugwalk -Xbeans beans/black
matcher: <differencematcher m1=<patternmatcher patterns='(?:beans\\/black(?:/|$))'>, m2=<includematcher includes='(?:beans(?:/|$))'>>
matcher: <differencematcher m1=<patternmatcher patterns='(?:beans\\/black(?:/|$))'>, m2=<treematcher rules=['beans/**']>>
f beans/black beans/black exact
$ hg debugwalk -Xbeans -Ibeans/black
matcher: <differencematcher m1=<includematcher includes='(?:beans\\/black(?:/|$))'>, m2=<includematcher includes='(?:beans(?:/|$))'>>
matcher: <differencematcher m1=<treematcher rules=['beans/black/**']>, m2=<treematcher rules=['beans/**']>>
$ hg debugwalk 'glob:mammals/../beans/b*'
matcher: <patternmatcher patterns='(?:beans\\/b[^/]*$)'>
matcher: <treematcher rules=['beans/b*']>
f beans/black beans/black
f beans/borlotti beans/borlotti
$ hg debugwalk '-X*/Procyonidae' mammals
matcher: <differencematcher m1=<patternmatcher patterns='(?:mammals(?:/|$))'>, m2=<includematcher includes='(?:[^/]*\\/Procyonidae(?:/|$))'>>
matcher: <differencematcher m1=<patternmatcher patterns='(?:mammals(?:/|$))'>, m2=<treematcher rules=['*/Procyonidae/**']>>
f mammals/skunk mammals/skunk
$ hg debugwalk path:mammals
matcher: <patternmatcher patterns='(?:mammals(?:/|$))'>
matcher: <treematcher rules=['mammals/**']>
f mammals/Procyonidae/cacomistle mammals/Procyonidae/cacomistle
f mammals/Procyonidae/coatimundi mammals/Procyonidae/coatimundi
f mammals/Procyonidae/raccoon mammals/Procyonidae/raccoon
@ -361,7 +361,7 @@ Test absolute paths:
Test patterns:
$ hg debugwalk glob:\*
matcher: <patternmatcher patterns='(?:[^/]*$)'>
matcher: <treematcher rules=['*']>
f fennel fennel
f fenugreek fenugreek
f fiddlehead fiddlehead
@ -371,19 +371,19 @@ Test patterns:
adding glob:glob
warning: filename contains ':', which is reserved on Windows: 'glob:glob'
$ hg debugwalk glob:\*
matcher: <patternmatcher patterns='(?:[^/]*$)'>
matcher: <treematcher rules=['*']>
f fennel fennel
f fenugreek fenugreek
f fiddlehead fiddlehead
f glob:glob glob:glob
$ hg debugwalk glob:glob
matcher: <patternmatcher patterns='(?:glob$)'>
matcher: <treematcher rules=['glob']>
glob: $ENOENT$
$ hg debugwalk glob:glob:glob
matcher: <patternmatcher patterns='(?:glob\\:glob$)'>
matcher: <treematcher rules=['glob:glob']>
f glob:glob glob:glob exact
$ hg debugwalk path:glob:glob
matcher: <patternmatcher patterns='(?:glob\\:glob(?:/|$))'>
matcher: <treematcher rules=['glob:glob/**']>
f glob:glob glob:glob exact
$ rm glob:glob
$ hg addremove
@ -391,7 +391,7 @@ Test patterns:
#endif
$ hg debugwalk 'glob:**e'
matcher: <patternmatcher patterns='(?:.*e$)'>
matcher: <treematcher rules=['**/*e']>
f beans/turtle beans/turtle
f mammals/Procyonidae/cacomistle mammals/Procyonidae/cacomistle
@ -402,10 +402,10 @@ Test patterns:
f mammals/skunk mammals/skunk
$ hg debugwalk path:beans/black
matcher: <patternmatcher patterns='(?:beans\\/black(?:/|$))'>
matcher: <treematcher rules=['beans/black/**']>
f beans/black beans/black exact
$ hg debugwalk path:beans//black
matcher: <patternmatcher patterns='(?:beans\\/black(?:/|$))'>
matcher: <treematcher rules=['beans/black/**']>
f beans/black beans/black exact
$ hg debugwalk relglob:Procyonidae
@ -430,7 +430,7 @@ Test patterns:
f beans/pinto beans/pinto
f beans/turtle beans/turtle
$ hg debugwalk 'glob:mamm**'
matcher: <patternmatcher patterns='(?:mamm.*$)'>
matcher: <treematcher rules=['mamm*/**']>
f mammals/Procyonidae/cacomistle mammals/Procyonidae/cacomistle
f mammals/Procyonidae/coatimundi mammals/Procyonidae/coatimundi
f mammals/Procyonidae/raccoon mammals/Procyonidae/raccoon
@ -443,7 +443,7 @@ Test patterns:
f mammals/Procyonidae/raccoon mammals/Procyonidae/raccoon
f mammals/skunk mammals/skunk
$ hg debugwalk 'glob:j*'
matcher: <patternmatcher patterns='(?:j[^/]*$)'>
matcher: <treematcher rules=['j*']>
$ hg debugwalk NOEXIST
matcher: <patternmatcher patterns='(?:NOEXIST(?:/|$))'>
NOEXIST: * (glob)
@ -481,12 +481,12 @@ Test listfile and listfile0
$ $PYTHON -c "file('listfile0', 'wb').write('fenugreek\0new\0')"
$ hg debugwalk -I 'listfile0:listfile0'
matcher: <includematcher includes='(?:fenugreek(?:/|$)|new(?:/|$))'>
matcher: <treematcher rules=['fenugreek/**', 'new/**']>
f fenugreek fenugreek
f new new
$ $PYTHON -c "file('listfile', 'wb').write('fenugreek\nnew\r\nmammals/skunk\n')"
$ hg debugwalk -I 'listfile:listfile'
matcher: <includematcher includes='(?:fenugreek(?:/|$)|new(?:/|$)|mammals\\/skunk(?:/|$))'>
matcher: <treematcher rules=['fenugreek/**', 'new/**', 'mammals/skunk/**']>
f fenugreek fenugreek
f mammals/skunk mammals/skunk
f new new