From 1145e2541b1bb9a0cdc23377534382bc214a2e11 Mon Sep 17 00:00:00 2001 From: Jun Wu Date: Sat, 6 Jul 2019 02:41:45 -0700 Subject: [PATCH] logging: migrate fsmonitor to new blackbox API Summary: The new log is typed and concise. Old log: [fsmonitor]> clock='c:1559600325:3956709:1:34762' len(nonnormal)=0 [fsmonitor]> setlastclock 'c:1559600325:3956709:1:36405' [fsmonitor]> setlastisfresh False [fsmonitor]> watchman returned ["x"] [fsmonitor]> getlastclock 'c:1559600325:3956709:1:36405' [fsmonitor]> set clock='c:1559600325:3956709:1:36405' notefiles=["x"] New log: [fsmonitor] clock: "c:1559600325:3956709:1:34762" -> "c:1559600325:3956709:1:36405"; need check: [] + ["x"] In JSON form: {"fsmonitor":{"new_clock":"c:1559600325:3956709:1:36425","new_files":{"len":1,"short_list":["x"]},"old_clock":"c:1559600325:3956709:1:34762"} The new logging does not cover every information exposed by the old logging. For example: - Non-treestate events like why fsmonitor state gets invalidated. Assuming most clients are on treestate now. These are removed. - "fsmonitor_state = normal | unavailable | fresh" scuba logging. This can be inferred, and will be added in a later diff. - New "notefiles". The next "fsmoniotr" event will log the information. Reviewed By: markbt Differential Revision: D15710672 fbshipit-source-id: 5c4cad08c0072c7dc711e5c1e65aa7552940699e --- edenscm/hgext/fsmonitor/__init__.py | 50 +++++++---- edenscm/hgext/fsmonitor/fsmonitorutil.py | 16 ++-- edenscm/hgext/fsmonitor/state.py | 40 --------- lib/blackbox/src/event.rs | 108 +++++++++++++++++++++++ tests/test-fsmonitor-blackbox.t | 13 +-- tests/test-fsmonitor-bumpclock.t | 26 ++---- 6 files changed, 164 insertions(+), 89 deletions(-) diff --git a/edenscm/hgext/fsmonitor/__init__.py b/edenscm/hgext/fsmonitor/__init__.py index 86a7fb08ed..4be72b2eb7 100644 --- a/edenscm/hgext/fsmonitor/__init__.py +++ b/edenscm/hgext/fsmonitor/__init__.py @@ -154,6 +154,7 @@ import sys import weakref from edenscm.mercurial import ( + blackbox, context, encoding, error, @@ -205,11 +206,6 @@ def _handleunavailable(ui, state, ex): ui.warn(str(ex) + "\n") if ex.invalidate: state.invalidate(reason="exception") - ui.log("fsmonitor", "watchman unavailable: %s\n", ex.msg) - ui.log("fsmonitor_status", fsmonitor_status="unavailable") - else: - ui.log("fsmonitor", "watchman exception: %s\n", ex) - ui.log("fsmonitor_status", fsmonitor_status="exception") def _hashignore(ignore): @@ -303,6 +299,18 @@ def wrappurge(orig, repo, match, findfiles, finddirs, includeignored): @util.timefunction("fsmonitorwalk", 1, "_ui") def overridewalk(orig, self, match, unknown, ignored, full=True): + fsmonitorevent = {} + try: + return _walk(orig, self, match, unknown, ignored, full, event=fsmonitorevent) + finally: + try: + blackbox.log({"fsmonitor": fsmonitorevent}) + except UnicodeDecodeError: + # test-adding-invalid-utf8.t hits this path + pass + + +def _walk(orig, self, match, unknown, ignored, full, event): """Replacement for dirstate.walk, hooking into Watchman. Whenever full is False, ignored is False, and the Watchman client is @@ -381,7 +389,9 @@ def overridewalk(orig, self, match, unknown, ignored, full=True): nonnormalset = self._map.nonnormalsetfiltered(dirfilter) else: nonnormalset = self._map.nonnormalset - self._ui.log("fsmonitor", "clock=%r len(nonnormal)=%d" % (clock, len(nonnormalset))) + + event["old_clock"] = clock + event["old_files"] = fsmonitorutil.shortlist(nonnormalset) copymap = self._map.copymap getkind = stat.S_IFMT @@ -438,12 +448,18 @@ def overridewalk(orig, self, match, unknown, ignored, full=True): }, ) except Exception as ex: + event["is_error"] = True _handleunavailable(self._ui, state, ex) self._watchmanclient.clearconnection() + # XXX: Legacy scuba logging. Remove this once the source of truth + # is moved to the Rust Event. + self._ui.log("fsmonitor_status", fsmonitor_status="exception") return bail("exception during run") else: # We need to propagate the last observed clock up so that we # can use it for our next query + event["new_clock"] = result["clock"] + event["is_fresh"] = result["is_fresh_instance"] state.setlastclock(result["clock"]) state.setlastisfresh(result["is_fresh_instance"]) if result["is_fresh_instance"]: @@ -453,17 +469,18 @@ def overridewalk(orig, self, match, unknown, ignored, full=True): fresh_instance = True # Ignore any prior noteable files from the state info notefiles = [] - - if fresh_instance: - self._ui.log("fsmonitor_status", fsmonitor_status="fresh") - else: - self._ui.log("fsmonitor_status", fsmonitor_status="normal") - if "fsmonitor" in getattr(self._ui, "track", ()): - filelist = [e["name"] for e in result["files"]] - self._ui.log( - "fsmonitor", "watchman returned %s" % fsmonitorutil.reprshort(filelist) + else: + count = len(result["files"]) + state.setwatchmanchangedfilecount(count) + event["new_files"] = fsmonitorutil.shortlist( + (e["name"] for e in result["files"]), count ) - state.setwatchmanchangedfilecount(len(result["files"])) + # XXX: Legacy scuba logging. Remove this once the source of truth + # is moved to the Rust Event. + if event["is_fresh"]: + self._ui.log("fsmonitor_status", fsmonitor_status="fresh") + else: + self._ui.log("fsmonitor_status", fsmonitor_status="normal") # for file paths which require normalization and we encounter a case # collision, we store our own foldmap @@ -891,7 +908,6 @@ def _racedetect(orig, self, other, s, match, listignored, listclean, listunknown msg = _( "[race-detector] files changed when scanning changes in working copy:\n%s" ) % "".join(" %s\n" % name for name in sorted(racenames)) - repo.ui.log("fsmonitor", msg) raise error.WorkingCopyRaced( msg, hint=_( diff --git a/edenscm/hgext/fsmonitor/fsmonitorutil.py b/edenscm/hgext/fsmonitor/fsmonitorutil.py index bced65226a..11ec441b47 100644 --- a/edenscm/hgext/fsmonitor/fsmonitorutil.py +++ b/edenscm/hgext/fsmonitor/fsmonitorutil.py @@ -6,9 +6,13 @@ # GNU General Public License version 2 or any later version. -def reprshort(filelist, limit=20): - """Like repr(filelist). But truncate it if it is too long""" - if len(filelist) <= limit: - return repr(filelist) - else: - return "%r and %s more entries" % (filelist[:limit], len(filelist) - limit) +def shortlist(listlike, count=None, limit=4): + """Return a value that can be converted to Rust blackbox::event::ShortList""" + shortlist = [] + for name in listlike: + shortlist.append(name) + if len(shortlist) > limit: + break + if count is None: + count = len(listlike) + return {"short_list": shortlist, "len": count} diff --git a/edenscm/hgext/fsmonitor/state.py b/edenscm/hgext/fsmonitor/state.py index 242d7de1c7..ffa723cbc8 100644 --- a/edenscm/hgext/fsmonitor/state.py +++ b/edenscm/hgext/fsmonitor/state.py @@ -15,8 +15,6 @@ import struct from edenscm.mercurial import pathutil, util from edenscm.mercurial.i18n import _ -from . import fsmonitorutil - _version = 4 _versionformat = ">I" @@ -61,22 +59,12 @@ class state(object): versionbytes = file.read(4) if len(versionbytes) < 4: - self._ui.log( - "fsmonitor", - "fsmonitor: state file only has %d bytes, " - "nuking state\n" % len(versionbytes), - ) self.invalidate(reason="state_file_truncated") return None, None, None try: diskversion = struct.unpack(_versionformat, versionbytes)[0] if diskversion != _version: # different version, nuke state and start over - self._ui.log( - "fsmonitor", - "fsmonitor: version switch from %d to " - "%d, nuking state\n" % (diskversion, _version), - ) self.invalidate(reason="state_file_wrong_version") return None, None, None @@ -84,24 +72,12 @@ class state(object): # state = hostname\0clock\0ignorehash\0 + list of files, each # followed by a \0 if len(state) < 3: - self._ui.log( - "fsmonitor", - "fsmonitor: state file truncated (expected " - "3 chunks, found %d), nuking state\n", - len(state), - ) self.invalidate(reason="state_file_truncated") return None, None, None diskhostname = state[0] hostname = socket.gethostname() if diskhostname != hostname: # file got moved to a different host - self._ui.log( - "fsmonitor", - 'fsmonitor: stored hostname "%s" ' - 'different from current "%s", nuking state\n' - % (diskhostname, hostname), - ) self.invalidate(reason="hostname_mismatch") return None, None, None @@ -113,11 +89,6 @@ class state(object): finally: file.close() - self._ui.log( - "fsmonitor", - "get clock=%r notefiles=%s" % (clock, fsmonitorutil.reprshort(notefiles)), - ) - return clock, ignorehash, notefiles def setdroplist(self, droplist): @@ -134,11 +105,6 @@ class state(object): self._ignorelist = ignorelist def set(self, clock, ignorehash, notefiles): - self._ui.log( - "fsmonitor", - "set clock=%r notefiles=%s" % (clock, fsmonitorutil.reprshort(notefiles)), - ) - if self._usetreestate: ds = self._repo.dirstate dmap = ds._map @@ -186,26 +152,20 @@ class state(object): file.write("\0") def invalidate(self, reason=None): - if reason: - self._ui.log("command_info", watchman_invalidate_reason=reason) try: os.unlink(os.path.join(self._rootdir, ".hg", "fsmonitor.state")) except OSError as inst: if inst.errno != errno.ENOENT: raise - self._ui.log("fsmonitor", "state invalidated") def setlastclock(self, clock): - self._ui.log("fsmonitor", "setlastclock %r" % clock) self._lastclock = clock def setlastisfresh(self, isfresh): - self._ui.log("fsmonitor", "setlastisfresh %r" % isfresh) self._lastisfresh = isfresh def setwatchmanchangedfilecount(self, filecount): self._lastchangedfilecount = filecount def getlastclock(self): - self._ui.log("fsmonitor", "getlastclock %r" % self._lastclock) return self._lastclock diff --git a/lib/blackbox/src/event.rs b/lib/blackbox/src/event.rs index a277a23be6..d51bf80555 100644 --- a/lib/blackbox/src/event.rs +++ b/lib/blackbox/src/event.rs @@ -105,6 +105,58 @@ pub enum Event { duration_ms: u64, }, + /// A watchman query with related context. + #[serde(rename = "FQ", alias = "fsmonitor")] + FsmonitorQuery { + #[serde( + rename = "1", + alias = "old_clock", + default, + skip_serializing_if = "is_default" + )] + old_clock: String, + + #[serde( + rename = "2", + alias = "old_files", + default, + skip_serializing_if = "is_default" + )] + old_files: ShortList, + + #[serde( + rename = "3", + alias = "new_clock", + default, + skip_serializing_if = "is_default" + )] + new_clock: String, + + #[serde( + rename = "4", + alias = "new_files", + default, + skip_serializing_if = "is_default" + )] + new_files: ShortList, + + #[serde( + rename = "F", + alias = "is_fresh", + default, + skip_serializing_if = "is_default" + )] + is_fresh: bool, + + #[serde( + rename = "E", + alias = "is_error", + default, + skip_serializing_if = "is_default" + )] + is_error: bool, + }, + /// Legacy blackbox message for compatibility. #[serde(rename = "L", alias = "legacy_log")] LegacyLog { @@ -272,6 +324,27 @@ pub enum Event { }, } +/// A truncated (file) list. +#[serde_alt] +#[derive(Serialize, Deserialize, Debug, PartialEq, Default)] +pub struct ShortList { + #[serde( + rename = "S", + alias = "short_list", + default, + skip_serializing_if = "is_default" + )] + short_list: Vec, + + #[serde( + rename = "L", + alias = "len", + default, + skip_serializing_if = "is_default" + )] + len: usize, +} + #[serde_alt] #[derive(Serialize, Deserialize, Debug, PartialEq)] pub enum NetworkOp { @@ -353,6 +426,18 @@ impl ToValue for Event { } } +impl fmt::Display for ShortList { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.short_list.len() >= self.len { + write!(f, "{:?}", self.short_list)?; + } else { + let remaining = self.len - self.short_list.len(); + write!(f, "{:?} and {} entries", self.short_list, remaining)?; + } + Ok(()) + } +} + impl fmt::Display for Event { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use Event::*; @@ -400,6 +485,29 @@ impl fmt::Display for Event { exit_code, duration_ms, max_rss )?; } + FsmonitorQuery { + old_clock, + old_files, + new_clock, + new_files, + is_error, + is_fresh, + } => { + let msg = if *is_error { + format!("query failed") + } else if *is_fresh { + format!( + "clock: {:?} -> {:?}; need check: {} + all files", + old_clock, new_clock, old_files + ) + } else { + format!( + "clock: {:?} -> {:?}; need check: {} + {}", + old_clock, new_clock, old_files, new_files + ) + }; + write!(f, "[fsmonitor] {}", msg)?; + } LegacyLog { service, msg, diff --git a/tests/test-fsmonitor-blackbox.t b/tests/test-fsmonitor-blackbox.t index 91b2376785..a0db9b10ae 100644 --- a/tests/test-fsmonitor-blackbox.t +++ b/tests/test-fsmonitor-blackbox.t @@ -1,8 +1,6 @@ #require fsmonitor $ newrepo - $ enable blackbox - $ setconfig blackbox.track=fsmonitor $ hg status $ touch x $ hg status @@ -35,10 +33,7 @@ ? 8 ? 9 ? x - $ grep returned .hg/blackbox.log - *> watchman returned ['x'] (glob) - *> watchman returned [*] and 5 more entries (glob) - $ grep 'set clock' .hg/blackbox.log - *> set clock='*' notefiles=[] (glob) - *> set clock='*' notefiles=['x'] (glob) - *> set clock='*' notefiles=[*] and 6 more entries (glob) + $ hg blackbox --pattern '{"fsmonitor":"_"}' --no-timestamp --no-sid + [fsmonitor] clock: "c:0:0" -> "*"; need check: [] + all files (glob) + [fsmonitor] clock: "*" -> "*"; need check: [] + ["x"] (glob) + [fsmonitor] clock: "*" -> "*"; need check: ["x"] + [*] and 20 entries (glob) diff --git a/tests/test-fsmonitor-bumpclock.t b/tests/test-fsmonitor-bumpclock.t index 94ed3564d1..e2ca741b39 100644 --- a/tests/test-fsmonitor-bumpclock.t +++ b/tests/test-fsmonitor-bumpclock.t @@ -25,27 +25,19 @@ many files: len(filtered nonnormal) = 0 clock = * (glob) - $ enable blackbox - $ setconfig blackbox.track=watchman,fsmonitor - $ rm -rf .hg/blackbox* $ hg status - $ hg blackbox --no-timestamp --no-sid --pattern '{"legacy_log":{"service":"fsmonitor"}}' | grep -v command | sed "s/^.*\\] //;s/c:[0-9][0-9:]*/c:x/" - clock='c:x' len(nonnormal)=0 - setlastclock 'c:x' - setlastisfresh False - watchman returned ['a', 'b', 'c', 'd', 'e', 'f'] - getlastclock 'c:x' - set clock='c:x' notefiles=[] + $ hg blackbox --no-timestamp --no-sid --pattern '{"fsmonitor":"_"}' + [fsmonitor] clock: "c:*" -> "c:*"; need check: * + ["a", "b", "c", "d", "e"] and 1 entries (glob) -The watchman clock remains unchanged. Watchman still returns 4 files, which +The watchman clock remains unchanged. Watchman still returns 6 files, which means the "status" command could still be slow. $ rm -rf .hg/blackbox* $ hg status - $ hg blackbox --no-timestamp --no-sid --pattern '{"legacy_log":{"service":"fsmonitor"}}' | grep returned | sed "s/^.*\\] //;s/c:[0-9][0-9:]*/c:x/" - watchman returned ['a', 'b', 'c', 'd', 'e', 'f'] + $ hg blackbox --no-timestamp --no-sid --pattern '{"fsmonitor":"_"}' + [fsmonitor] clock: "c:*" -> "c:*"; need check: * + ["a", "b", "c", "d", "e"] and 1 entries (glob) With watchman-changed-file-threshold set, clock is bumped and watchman can return an empty list: @@ -55,12 +47,12 @@ return an empty list: $ rm -rf .hg/blackbox* $ hg status - $ hg blackbox --no-timestamp --no-sid --pattern '{"legacy_log":{"service":"fsmonitor"}}' | grep returned | sed "s/^.*\\] //;s/c:[0-9][0-9:]*/c:x/" - watchman returned ['a', 'b', 'c', 'd', 'e', 'f'] + $ hg blackbox --no-timestamp --no-sid --pattern '{"fsmonitor":"_"}' + [fsmonitor] clock: "c:*" -> "c:*"; need check: * + ["a", "b", "c", "d", "e"] and 1 entries (glob) $ sleep 1 $ rm -rf .hg/blackbox* $ hg status - $ hg blackbox --no-timestamp --no-sid --pattern '{"legacy_log":{"service":"fsmonitor"}}' | grep returned | sed "s/^.*\\] //;s/c:[0-9][0-9:]*/c:x/" - watchman returned [] + $ hg blackbox --no-timestamp --no-sid --pattern '{"fsmonitor":"_"}' + [fsmonitor] clock: "c:*" -> "c:*"; need check: [] + [] (glob)