fsmonitor: add threshold of nonnormal files to update

Summary:
Previously, the fsmonitor state update logic will skip updating treestate if the wlock
cannot be obtained. D17468790 (8d4d0a66a2) made it wait for wlock for the painful "watchman fresh
instance" case. But things can still suck if it's not a "fresh instance" but there are just
too many nonnormal files.

This diff makes it that exceeding a threshold of nonnormal files will trigger a fsmonitor
state write as an attempt to to reduce the number of nonnormal files. In additional,
`--debug` was changed to print more internal states for debugging.

This would hopefully address issues where people have a large "nonnormal"
treestate, suffers from the bad performance issue and cannot recover from it
automatically.

Reviewed By: DurhamG

Differential Revision: D25794083

fbshipit-source-id: 741426cf31484d9318f9cfcab11d38da33ab5067
This commit is contained in:
Jun Wu 2021-01-12 16:23:59 -08:00 committed by Facebook GitHub Bot
parent cdda325255
commit f11d555a3a
4 changed files with 67 additions and 6 deletions

View File

@ -117,6 +117,16 @@ leading to better performance, at the cost of disk usage. Set this to a large
value would update treestate less frequently, with the downside that
performance might regress in some cases. (default: 200)
::
[fsmonitor]
dirstate-nonnormal-file-threshold = 200
Number of nonnormal files to force obtaining the wlock to update treestate.
Usually status will skip updating treestate if it cannot obtain the wlock,
in some cases that can cause performance issues. This setting allows
status to wait to obtain the wlock to avoid such issues. (default: 200)
::
[fsmonitor]
@ -214,6 +224,7 @@ configitem("fsmonitor", "mode", default="on")
configitem("fsmonitor", "timeout", default=10)
configitem("fsmonitor", "track-ignore-files", default=True)
configitem("fsmonitor", "walk_on_invalidate", default=False)
configitem("fsmonitor", "dirstate-nonnormal-file-threshold", default=200)
configitem("fsmonitor", "watchman-changed-file-threshold", default=200)
configitem("fsmonitor", "warn-fresh-instance", default=False)
configitem("fsmonitor", "fallback-on-watchman-exception", default=True)
@ -404,6 +415,7 @@ def _walk(self, match, event, span):
event["old_clock"] = clock
event["old_files"] = blackbox.shortlist(sorted(nonnormalset))
span.record(oldclock=clock, oldfileslen=len(nonnormalset))
state.setlastnonnormalfilecount(len(nonnormalset))
copymap = self.dirstate._map.copymap
getkind = stat.S_IFMT

View File

@ -27,7 +27,10 @@ class state(object):
self._rootdir = pathutil.normasprefix(repo.root)
self._lastclock = None
self._lastisfresh = False
# File count reported by watchman
self._lastchangedfilecount = 0
# Non-normal file count stored in dirstate
self._lastnonnormalcount = 0
self.mode = self._ui.config("fsmonitor", "mode")
self.walk_on_invalidate = self._ui.configbool("fsmonitor", "walk_on_invalidate")
@ -167,5 +170,8 @@ class state(object):
def setwatchmanchangedfilecount(self, filecount):
self._lastchangedfilecount = filecount
def setlastnonnormalfilecount(self, count):
self._lastnonnormalcount = count
def getlastclock(self):
return self._lastclock

View File

@ -1216,19 +1216,33 @@ class dirstate(object):
# since not updating watchman state leads to very painful
# performance.
freshinstance = False
nonnormalcount = 0
try:
# pyre-fixme[16]: physicalfilesystem has no attr _fsmonitorstate
freshinstance = self._fs._fsmonitorstate._lastisfresh
nonnormalcount = self._fs._fsmonitorstate._lastnonnormalcount
except Exception:
pass
waitforlock = False
nonnormalthreshold = self._repo.ui.configint(
"fsmonitor", "dirstate-nonnormal-file-threshold"
)
if (
nonnormalthreshold is not None
and nonnormalcount >= nonnormalthreshold
):
ui.debug(
"poststatusfixup decides to wait for wlock since nonnormal file count %s >= %s\n"
% (nonnormalcount, nonnormalthreshold)
)
waitforlock = True
if freshinstance:
waitforlock = True
ui.debug(
"poststatusfixup decides to wait for wlock since watchman reported fresh instance\n"
)
with self._repo.disableeventreporting(), self._repo.wlock(
freshinstance
):
with self._repo.disableeventreporting(), self._repo.wlock(waitforlock):
identity = self._repo.dirstate.identity()
if identity == oldid:
if poststatusbefore:
@ -1247,14 +1261,28 @@ class dirstate(object):
if poststatusafter:
for ps in poststatusafter:
ps(wctx, status)
except error.LockError:
if freshinstance:
elif not util.istest():
# Too noisy in tests.
ui.debug(
"poststatusfixup did not write dirstate because identity changed %s != %s\n"
% (oldid, identity)
)
except error.LockError as ex:
if waitforlock:
ui.write_err(
_(
"warning: failed to update watchman state because wlock cannot be obtained\n"
"warning: failed to update watchman state because wlock cannot be obtained (%s)\n"
)
% (ex,)
)
ui.write_err(slowstatuswarning)
else:
ui.debug(
"poststatusfixup did not write dirstate because wlock cannot be obtained (%s)\n"
% (ex,)
)
finally:
# Even if the wlock couldn't be grabbed, clear out the list.
self._repo.clearpostdsstatus()

View File

@ -11,9 +11,24 @@ A warning is printed for the first use
$ hg status --debug
Force waiting for the lock
$ touch A
$ hg add A
$ echo 1 > A
$ hg status --debug --config fsmonitor.dirstate-nonnormal-file-threshold=0
poststatusfixup decides to wait for wlock since nonnormal file count 1 >= 0
A A
$ hg debugstatus
len(dirstate) = 1
len(nonnormal) = 1
len(filtered nonnormal) = 1
clock = * (glob)
hg status on a non-utf8 filename
$ touch foo
$ python2 -c 'open(b"\xc3\x28", "wb+").write("asdf")'
$ hg status --traceback
skipping invalid utf-8 filename: '*' (glob)
A A
? foo