From 556c49918ce5e7773a0b0307eff7b0ab2c575cb6 Mon Sep 17 00:00:00 2001 From: Jun Wu Date: Thu, 14 Jun 2018 21:23:07 -0700 Subject: [PATCH] fsmonitor: store fsmonitor state in treestate Summary: The new treestate was designed to store fsmonitor state. Use it to make fsmonitor and dirstate state consistent, and avoid fsmonitor state invalidation. The "fsmonitor identity" check was removed as we now rely on the dirstate identity check - dirstate and fsmonitor state must be updated consistently - both updated or neither updated. Since this is the first dirstate that tracks "untracked" files, several places are adjusted (ex. dmap.dropfile, dmap.keys) to take the new untracked files into consideration. Reviewed By: wez Differential Revision: D7909172 fbshipit-source-id: 05fd64b25c67ae4b07bc8cfee2731c748205975e --- hgext/fsmonitor/__init__.py | 74 ++++++++++++++++++++------ hgext/fsmonitor/state.py | 52 +++++++++++++----- mercurial/context.py | 15 +++--- mercurial/dirstate.py | 27 ++++++++++ mercurial/rust/treestate/src/python.rs | 23 +++++++- mercurial/treestate.py | 55 +++++++++++++++++-- tests/test-dirstate-migrate.t | 12 +++-- tests/test-dirstate-race.t | 28 ---------- tests/test-dirstate.t | 2 +- 9 files changed, 217 insertions(+), 71 deletions(-) diff --git a/hgext/fsmonitor/__init__.py b/hgext/fsmonitor/__init__.py index 5798c411af..719d1ff890 100644 --- a/hgext/fsmonitor/__init__.py +++ b/hgext/fsmonitor/__init__.py @@ -308,7 +308,7 @@ def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True): if unknown: # experimental config: experimental.fsmonitor.skipignore if not self._ui.configbool("experimental", "fsmonitor.skipignore"): - if _hashignore(ignore) != ignorehash and clock != "c:0:0": + if ignorehash and _hashignore(ignore) != ignorehash and clock != "c:0:0": # ignore list changed -- can't rely on Watchman state any more if state.walk_on_invalidate: return bail("ignore rules changed") @@ -327,6 +327,9 @@ def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True): # standard dirstate implementation is in use. dmap = dmap._map nonnormalset = self._map.nonnormalset + self._ui.log( + "fsmonitor", "clock = %r len(nonnormal) = %d" % (clock, len(nonnormalset)) + ) copymap = self._map.copymap getkind = stat.S_IFMT @@ -497,10 +500,24 @@ def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True): results[f] = None nf = iter(auditpass).next + droplist = [] + droplistappend = droplist.append for st in util.statfiles([join(f) for f in auditpass]): f = nf() if st or f in dmap: results[f] = st + else: + # '?' (untracked) file was deleted from the filesystem - remove it + # from treestate. + # + # We can only update the dirstate (and treestate) while holding the + # wlock. That happens inside poststatus.__call__ -> state.set. So + # buffer what files to "drop" so state.set can clean them up. + entry = dmap.get(f, None) + if entry and entry[0] == "?": + droplistappend(f) + # The droplist needs to match setlastclock() + state.setdroplist(droplist) for s in subrepos: del results[s] @@ -606,21 +623,33 @@ def overridestatus( stateunknown = listunknown if updatestate: - # Invalidate fsmonitor.state if dirstate changes. This avoids the - # following issue: - # 1. pid 11 writes dirstate - # 2. pid 22 reads dirstate and inconsistent fsmonitor.state - # 3. pid 22 calculates a wrong state - # 4. pid 11 writes fsmonitor.state - # Because before 1, - # 0. pid 11 invalidates fsmonitor.state - # will happen. - psbefore = lambda *args, **kwds: self._fsmonitorstate.invalidate( - reason="dirstate_change" - ) - self.addpostdsstatus(psbefore, afterdirstatewrite=False) - psafter = poststatus(startclock) - self.addpostdsstatus(psafter, afterdirstatewrite=True) + if "treestate" in self.requirements: + # No need to invalidate fsmonitor state. + # state.set needs to run before dirstate write, since it changes + # dirstate (treestate). + self.addpostdsstatus(poststatustreestate, afterdirstatewrite=False) + else: + # Invalidate fsmonitor.state if dirstate changes. This avoids the + # following issue: + # 1. pid 11 writes dirstate + # 2. pid 22 reads dirstate and inconsistent fsmonitor.state + # 3. pid 22 calculates a wrong state + # 4. pid 11 writes fsmonitor.state + # Because before 1, + # 0. pid 11 invalidates fsmonitor.state + # will happen. + # + # To avoid race conditions when reading without a lock, do things + # in this order: + # 1. Invalidate fsmonitor state + # 2. Write dirstate + # 3. Write fsmonitor state + psbefore = lambda *args, **kwds: self._fsmonitorstate.invalidate( + reason="dirstate_change" + ) + self.addpostdsstatus(psbefore, afterdirstatewrite=False) + psafter = poststatus(startclock) + self.addpostdsstatus(psafter, afterdirstatewrite=True) r = orig(node1, node2, match, listignored, listclean, stateunknown, listsubrepos) modified, added, removed, deleted, unknown, ignored, clean = r @@ -656,6 +685,19 @@ def overridestatus( return scmutil.status(modified, added, removed, deleted, unknown, ignored, clean) +def poststatustreestate(wctx, status): + clock = wctx.repo()._fsmonitorstate.getlastclock() + hashignore = None + notefiles = ( + status.modified + + status.added + + status.removed + + status.deleted + + status.unknown + ) + wctx.repo()._fsmonitorstate.set(clock, hashignore, notefiles) + + class poststatus(object): def __init__(self, startclock): self._startclock = startclock diff --git a/hgext/fsmonitor/state.py b/hgext/fsmonitor/state.py index 83c4362c1b..94e6995e12 100644 --- a/hgext/fsmonitor/state.py +++ b/hgext/fsmonitor/state.py @@ -26,23 +26,31 @@ class state(object): self._ui = repo.ui self._rootdir = pathutil.normasprefix(repo.root) self._lastclock = None - self._identity = util.filestat(None) self.mode = self._ui.config("fsmonitor", "mode") self.walk_on_invalidate = self._ui.configbool("fsmonitor", "walk_on_invalidate") self.timeout = float(self._ui.config("fsmonitor", "timeout")) + self._repo = repo + self._usetreestate = "treestate" in repo.requirements + self._droplist = [] def get(self): + """return clock, ignorehash, notefiles""" + if self._usetreestate: + clock = self._repo.dirstate.getclock() + # XXX: ignorehash is already broken, so return None + ignorehash = None + # note files are already included in nonnormalset, so they will be + # processed anyway, do not return a separate notefiles. + notefiles = [] + return clock, ignorehash, notefiles try: file = self._vfs("fsmonitor.state", "rb") except IOError as inst: - self._identity = util.filestat(None) if inst.errno != errno.ENOENT: raise return None, None, None - self._identity = util.filestat.fromfp(file) - versionbytes = file.read(4) if len(versionbytes) < 4: self._ui.log( @@ -104,17 +112,38 @@ class state(object): return clock, ignorehash, notefiles + def setdroplist(self, droplist): + """set a list of files to be dropped from dirstate upon 'set'. + + This is used to clean up deleted untracked files from treestate, which + tracks untracked files. + """ + self._droplist = droplist + def set(self, clock, ignorehash, notefiles): + if self._usetreestate: + ds = self._repo.dirstate + dmap = ds._map + changed = bool(self._droplist) + for path in self._droplist: + dmap.dropfile(path, None, real=True) + self._droplist = [] + for path in notefiles: + changed |= ds.needcheck(path) + # Avoid updating dirstate frequently if nothing changed. + # But do update dirstate if the clock is reset to None, or is + # moving away from None. + if not clock or changed or not ds.getclock(): + ds.setclock(clock) + return + if clock is None: self.invalidate(reason="no_clock") return - # Read the identity from the file on disk rather than from the open file - # pointer below, because the latter is actually a brand new file. - identity = util.filestat.frompath(self._vfs.join("fsmonitor.state")) - if identity != self._identity: - self._ui.debug("skip updating fsmonitor.state: identity mismatch\n") - return + # The code runs with a wlock taken, and dirstate has passed its + # identity check. So we can update both dirstate and fsmonitor state. + # See _poststatusfixup in context.py try: file = self._vfs("fsmonitor.state", "wb", atomictemp=True, checkambig=True) @@ -147,7 +176,6 @@ class state(object): raise if "fsmonitor_details" in getattr(self._ui, "track", ()): self._ui.log("fsmonitor_details", "fsmonitor state invalidated") - self._identity = util.filestat(None) def setlastclock(self, clock): if "fsmonitor_details" in getattr(self._ui, "track", ()): @@ -157,4 +185,4 @@ class state(object): def getlastclock(self): if "fsmonitor_details" in getattr(self._ui, "track", ()): self._ui.log("fsmonitor_details", "getlastclock: %r" % self._lastclock) - return self._lastclock + self._lastclock diff --git a/mercurial/context.py b/mercurial/context.py index 48160385e0..e0df0452a6 100644 --- a/mercurial/context.py +++ b/mercurial/context.py @@ -1890,12 +1890,15 @@ class workingctx(committablectx): normal = self._repo.dirstate.normal for f in fixup: normal(f) - # write changes out explicitly, because nesting - # wlock at runtime may prevent 'wlock.release()' - # after this block from doing so for subsequent - # changing files - tr = self._repo.currenttransaction() - self._repo.dirstate.write(tr) + + # write changes out explicitly, because nesting + # wlock at runtime may prevent 'wlock.release()' + # after this block from doing so for subsequent + # changing files + # + # This is a no-op if dirstate is not dirty. + tr = self._repo.currenttransaction() + self._repo.dirstate.write(tr) if poststatusafter: for ps in poststatusafter: diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py index 717d5f9b4c..cf679b82bf 100644 --- a/mercurial/dirstate.py +++ b/mercurial/dirstate.py @@ -427,6 +427,29 @@ class dirstate(object): def copies(self): return self._map.copymap + def needcheck(self, file): + """Mark file as need-check""" + if not self._istreestate: + raise error.ProgrammingError("needcheck is only supported by treestate") + changed = self._map.needcheck(file) + self._dirty |= changed + return changed + + def setclock(self, clock): + """Set fsmonitor clock""" + if not self._istreestate: + raise error.ProgrammingError("setclock is only supported by treestate") + clock = clock or "" + if clock != self._map._clock: + self._map._clock = clock + self._dirty = True + + def getclock(self): + """Get fsmonitor clock""" + if not self._istreestate: + raise error.ProgrammingError("getclock is only supported by treestate") + return self._map._clock or None + def _addpath(self, f, state, mode, size, mtime): oldstate = self[f] if state == "a" or oldstate == "r": @@ -1101,6 +1124,10 @@ class dirstate(object): ).iteritems(): try: t = dget(fn) + # This "?" state is only tracked by treestate, emulate the old + # behavior - KeyError. + if t[0] == "?": + raise KeyError except KeyError: if (listignored or mexact(fn)) and dirignore(fn): if listignored: diff --git a/mercurial/rust/treestate/src/python.rs b/mercurial/rust/treestate/src/python.rs index 54a512180f..f753d0f5c0 100644 --- a/mercurial/rust/treestate/src/python.rs +++ b/mercurial/rust/treestate/src/python.rs @@ -546,7 +546,7 @@ py_class!(class treestate |py| { } def walk(&self, setbits: u16, unsetbits: u16) -> PyResult> { - // Get all file paths with `setbits` set and `unsetbits` unset. + // Get all file paths with `setbits` all set, `unsetbits` all unset. assert_eq!(setbits & unsetbits, 0, "setbits cannot overlap with unsetbits"); let setbits = StateFlags::from_bits_truncate(setbits); let unsetbits = StateFlags::from_bits_truncate(unsetbits); @@ -569,6 +569,27 @@ py_class!(class treestate |py| { Ok(result) } + def tracked(&self) -> PyResult> { + // Not ideal as a special case. But the returned list is large and it needs to be fast. + // It's basically walk(EXIST_P1, 0) + walk(EXIST_P2, 0) + walk(EXIST_NEXT). + let mut state = self.state(py).borrow_mut(); + let mut result = Vec::new(); + let mask = StateFlags::EXIST_P1 | StateFlags::EXIST_P2 | StateFlags::EXIST_NEXT; + convert_result(py, state.visit( + &mut |components, _state| { + let path = PyBytes::new(py, &components.concat()); + result.push(path); + Ok(VisitorResult::NotChanged) + }, + &|_, dir| match dir.get_aggregated_state() { + None => true, + Some(state) => state.union.intersects(mask), + }, + &|_, file| file.state.intersects(mask), + ))?; + Ok(result) + } + def getfiltered( &self, path: PyBytes, filter: PyObject, filterid: u64 ) -> PyResult> { diff --git a/mercurial/treestate.py b/mercurial/treestate.py index ae0234cfff..57c7791209 100644 --- a/mercurial/treestate.py +++ b/mercurial/treestate.py @@ -147,7 +147,9 @@ class treestatemap(object): return result def keys(self): - return self._tree.walk(0, 0) + # Exclude untracked files, since the matcher interface expects __iter__ + # to not include untracked files. + return self._tree.tracked() def preload(self): pass @@ -181,8 +183,32 @@ class treestatemap(object): size = 0 self._tree.insert(f, state, mode, size, mtime, copied) - def dropfile(self, f, oldstate): - return self._tree.remove(f) + def dropfile(self, f, oldstate, real=False): + if real or not self._clock: + # If watchman clock is not set, watchman is not used, drop + # untracked files directly. This is also correct if watchman + # clock is reset to empty, since the next query will do a full + # crawl. + return self._tree.remove(f) + else: + # If watchman is used, treestate tracks "untracked" files before + # watchman clock. So only remove EXIST_* bits from the file. + # fsmonitor will do a stat check and drop(real=True) later. + # + # Typically, dropfile is used in 2 cases: + # - "hg forget": mark the file as "untracked". + # - "hg update": remove files only tracked by old commit. + entry = self._tree.get(f, None) + if not entry: + return False + else: + state, mode, size, mtime, copied = entry + state ^= state & ( + treestate.EXIST_NEXT | treestate.EXIST_P1 | treestate.EXIST_P2 + ) + state |= treestate.NEED_CHECK + self._tree.insert(f, state, mode, size, mtime, copied) + return True def clearambiguoustimes(self, _files, now): # TODO(quark): could _files be different from those with mtime = -1 @@ -357,3 +383,26 @@ class treestatemap(object): self._tree.insert(dest, state, mode, size, mtime, source) else: raise error.ProgrammingError("copy dest %r does not exist" % dest) + + # treestate specific methods + + def needcheck(self, path): + """Mark a file as NEED_CHECK, so it will be included by 'nonnormalset' + + Return True if the file was changed, False if it's already marked. + """ + existing = self._tree.get(path, None) + if not existing: + # The file was not in dirstate (untracked). Add it. + state = treestate.NEED_CHECK + mode = 0o666 + size = -1 + mtime = -1 + copied = None + else: + state, mode, size, mtime, copied = existing + if treestate.NEED_CHECK & state: + return False + state |= treestate.NEED_CHECK + self._tree.insert(path, state, mode, size, mtime, copied) + return True diff --git a/tests/test-dirstate-migrate.t b/tests/test-dirstate-migrate.t index 3faf18f97e..2b3a4cd9ca 100644 --- a/tests/test-dirstate-migrate.t +++ b/tests/test-dirstate-migrate.t @@ -39,7 +39,8 @@ R removed ! deleted ? untracked - dirstate v2 (using treestate*, offset *, 4 files tracked) (glob) + dirstate v2 (using treestate*, offset *, 4 files tracked) (glob) (no-fsmonitor !) + dirstate v2 (using treestate*, offset *, 5 files tracked) (glob) (fsmonitor !) ==== Migrating dirstate v1 to v0 ==== M modified R removed @@ -61,13 +62,15 @@ R removed ! deleted ? untracked - dirstate v2 (using treestate* offset *, 4 files tracked) (glob) + dirstate v2 (using treestate*, offset *, 4 files tracked) (glob) (no-fsmonitor !) + dirstate v2 (using treestate*, offset *, 5 files tracked) (glob) (fsmonitor !) ==== Migrating dirstate v2 to v0 ==== M modified R removed ! deleted ? untracked - dirstate v2 (using treestate*, offset *, 4 files tracked) (glob) + dirstate v2 (using treestate*, offset *, 4 files tracked) (glob) (no-fsmonitor !) + dirstate v2 (using treestate*, offset *, 5 files tracked) (glob) (fsmonitor !) M modified R removed ! deleted @@ -78,7 +81,8 @@ R removed ! deleted ? untracked - dirstate v2 (using treestate*, offset *, 4 files tracked) (glob) + dirstate v2 (using treestate*, offset *, 4 files tracked) (glob) (no-fsmonitor !) + dirstate v2 (using treestate*, offset *, 5 files tracked) (glob) (fsmonitor !) M modified R removed ! deleted diff --git a/tests/test-dirstate-race.t b/tests/test-dirstate-race.t index 0bd415458a..2f5f302e5e 100644 --- a/tests/test-dirstate-race.t +++ b/tests/test-dirstate-race.t @@ -174,34 +174,6 @@ treated differently in _checklookup() according to runtime platform. $ rm b -#if fsmonitor - -Create fsmonitor state. - - $ hg status - $ f --type .hg/fsmonitor.state - .hg/fsmonitor.state: file - -Test that invalidating fsmonitor state in the middle (which doesn't require the -wlock) causes the fsmonitor update to be skipped. -hg debugrebuilddirstate ensures that the dirstaterace hook will be called, but -it also invalidates the fsmonitor state. So back it up and restore it. - - $ mv .hg/fsmonitor.state .hg/fsmonitor.state.tmp - $ hg debugrebuilddirstate - $ mv .hg/fsmonitor.state.tmp .hg/fsmonitor.state - - $ cat > $TESTTMP/dirstaterace.sh < rm .hg/fsmonitor.state - > EOF - - $ hg status --config extensions.dirstaterace=$TESTTMP/dirstaterace.py --debug - skip updating fsmonitor.state: identity mismatch - $ f .hg/fsmonitor.state - .hg/fsmonitor.state: file not found - -#endif - Set up a rebase situation for issue5581. $ echo c2 > a diff --git a/tests/test-dirstate.t b/tests/test-dirstate.t index a0a3b5aa3f..3236c765a0 100644 --- a/tests/test-dirstate.t +++ b/tests/test-dirstate.t @@ -57,7 +57,7 @@ Prepare test repo: adding a $ hg ci -m1 -#if treestate-off +#if no-v2 Set mtime of a into the future: $ touch -t 202101011200 a