fsmonitor: add threshold of nonnormal files to update

Summary: Previously, the fsmonitor state update logic will skip updating treestate if the wlock cannot be obtained. D17468790 (8d4d0a66a2) made it wait for wlock for the painful "watchman fresh instance" case. But things can still suck if it's not a "fresh instance" but there are just too many nonnormal files. This diff makes it that exceeding a threshold of nonnormal files will trigger a fsmonitor state write as an attempt to to reduce the number of nonnormal files. In additional, `--debug` was changed to print more internal states for debugging. This would hopefully address issues where people have a large "nonnormal" treestate, suffers from the bad performance issue and cannot recover from it automatically. Reviewed By: DurhamG Differential Revision: D25794083 fbshipit-source-id: 741426cf31484d9318f9cfcab11d38da33ab5067
2024-10-06 06:47:41 +03:00 · 2021-01-12 16:23:59 -08:00 · 2021-01-12 16:23:59 -08:00 · f11d555a3a
commit f11d555a3a
parent cdda325255
4 changed files with 67 additions and 6 deletions
--- a/eden/scm/edenscm/hgext/fsmonitor/init.py
+++ b/eden/scm/edenscm/hgext/fsmonitor/init.py
@ -117,6 +117,16 @@ leading to better performance, at the cost of disk usage. Set this to a large
 value would update treestate less frequently, with the downside that
 performance might regress in some cases. (default: 200)

+::
+
+    [fsmonitor]
+    dirstate-nonnormal-file-threshold = 200
+
+Number of nonnormal files to force obtaining the wlock to update treestate.
+Usually status will skip updating treestate if it cannot obtain the wlock,
+in some cases that can cause performance issues. This setting allows
+status to wait to obtain the wlock to avoid such issues. (default: 200)
+
 ::

    [fsmonitor]
@ -214,6 +224,7 @@ configitem("fsmonitor", "mode", default="on")
 configitem("fsmonitor", "timeout", default=10)
 configitem("fsmonitor", "track-ignore-files", default=True)
 configitem("fsmonitor", "walk_on_invalidate", default=False)
+configitem("fsmonitor", "dirstate-nonnormal-file-threshold", default=200)
 configitem("fsmonitor", "watchman-changed-file-threshold", default=200)
 configitem("fsmonitor", "warn-fresh-instance", default=False)
 configitem("fsmonitor", "fallback-on-watchman-exception", default=True)
@ -404,6 +415,7 @@ def _walk(self, match, event, span):
    event["old_clock"] = clock
    event["old_files"] = blackbox.shortlist(sorted(nonnormalset))
    span.record(oldclock=clock, oldfileslen=len(nonnormalset))
+    state.setlastnonnormalfilecount(len(nonnormalset))

    copymap = self.dirstate._map.copymap
    getkind = stat.S_IFMT
--- a/eden/scm/edenscm/hgext/fsmonitor/state.py
+++ b/eden/scm/edenscm/hgext/fsmonitor/state.py
@ -27,7 +27,10 @@ class state(object):
        self._rootdir = pathutil.normasprefix(repo.root)
        self._lastclock = None
        self._lastisfresh = False
+        # File count reported by watchman
        self._lastchangedfilecount = 0
+        # Non-normal file count stored in dirstate
+        self._lastnonnormalcount = 0

        self.mode = self._ui.config("fsmonitor", "mode")
        self.walk_on_invalidate = self._ui.configbool("fsmonitor", "walk_on_invalidate")
@ -167,5 +170,8 @@ class state(object):
    def setwatchmanchangedfilecount(self, filecount):
        self._lastchangedfilecount = filecount

+    def setlastnonnormalfilecount(self, count):
+        self._lastnonnormalcount = count
+
    def getlastclock(self):
        return self._lastclock
--- a/eden/scm/edenscm/mercurial/dirstate.py
+++ b/eden/scm/edenscm/mercurial/dirstate.py
@ -1216,19 +1216,33 @@ class dirstate(object):
                # since not updating watchman state leads to very painful
                # performance.
                freshinstance = False
+                nonnormalcount = 0
                try:
                    # pyre-fixme[16]: physicalfilesystem has no attr _fsmonitorstate
                    freshinstance = self._fs._fsmonitorstate._lastisfresh
+                    nonnormalcount = self._fs._fsmonitorstate._lastnonnormalcount
                except Exception:
                    pass
+                waitforlock = False
+                nonnormalthreshold = self._repo.ui.configint(
+                    "fsmonitor", "dirstate-nonnormal-file-threshold"
+                )
+                if (
+                    nonnormalthreshold is not None
+                    and nonnormalcount >= nonnormalthreshold
+                ):
+                    ui.debug(
+                        "poststatusfixup decides to wait for wlock since nonnormal file count %s >= %s\n"
+                        % (nonnormalcount, nonnormalthreshold)
+                    )
+                    waitforlock = True
                if freshinstance:
+                    waitforlock = True
                    ui.debug(
                        "poststatusfixup decides to wait for wlock since watchman reported fresh instance\n"
                    )

-                with self._repo.disableeventreporting(), self._repo.wlock(
-                    freshinstance
-                ):
+                with self._repo.disableeventreporting(), self._repo.wlock(waitforlock):
                    identity = self._repo.dirstate.identity()
                    if identity == oldid:
                        if poststatusbefore:
@ -1247,14 +1261,28 @@ class dirstate(object):
                        if poststatusafter:
                            for ps in poststatusafter:
                                ps(wctx, status)
-            except error.LockError:
-                if freshinstance:
+                    elif not util.istest():
+                        # Too noisy in tests.
+                        ui.debug(
+                            "poststatusfixup did not write dirstate because identity changed %s != %s\n"
+                            % (oldid, identity)
+                        )
+
+            except error.LockError as ex:
+                if waitforlock:
                    ui.write_err(
                        _(
-                            "warning: failed to update watchman state because wlock cannot be obtained\n"
+                            "warning: failed to update watchman state because wlock cannot be obtained (%s)\n"
                        )
+                        % (ex,)
                    )
                    ui.write_err(slowstatuswarning)
+                else:
+                    ui.debug(
+                        "poststatusfixup did not write dirstate because wlock cannot be obtained (%s)\n"
+                        % (ex,)
+                    )
+
            finally:
                # Even if the wlock couldn't be grabbed, clear out the list.
                self._repo.clearpostdsstatus()
--- a/eden/scm/tests/test-fsmonitor-warn-fresh.t
+++ b/eden/scm/tests/test-fsmonitor-warn-fresh.t
@ -11,9 +11,24 @@ A warning is printed for the first use

  $ hg status --debug

+Force waiting for the lock
+
+  $ touch A
+  $ hg add A
+  $ echo 1 > A
+  $ hg status --debug --config fsmonitor.dirstate-nonnormal-file-threshold=0
+  poststatusfixup decides to wait for wlock since nonnormal file count 1 >= 0
+  A A
+  $ hg debugstatus
+  len(dirstate) = 1
+  len(nonnormal) = 1
+  len(filtered nonnormal) = 1
+  clock = * (glob)
+
 hg status on a non-utf8 filename
  $ touch foo
  $ python2 -c 'open(b"\xc3\x28", "wb+").write("asdf")'
  $ hg status --traceback
  skipping invalid utf-8 filename: '*' (glob)
+  A A
  ? foo