fsmonitor: store fsmonitor state in treestate

Summary:
The new treestate was designed to store fsmonitor state. Use it to make
fsmonitor and dirstate state consistent, and avoid fsmonitor state
invalidation.

The "fsmonitor identity" check was removed as we now rely on the dirstate
identity check - dirstate and fsmonitor state must be updated consistently -
both updated or neither updated.

Since this is the first dirstate that tracks "untracked" files, several places
are adjusted (ex. dmap.dropfile, dmap.keys) to take the new untracked files
into consideration.

Reviewed By: wez

Differential Revision: D7909172

fbshipit-source-id: 05fd64b25c67ae4b07bc8cfee2731c748205975e
This commit is contained in:
Jun Wu 2018-06-14 21:23:07 -07:00 committed by Facebook Github Bot
parent a1c5a8fc76
commit 556c49918c
9 changed files with 217 additions and 71 deletions

View File

@ -308,7 +308,7 @@ def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
if unknown:
# experimental config: experimental.fsmonitor.skipignore
if not self._ui.configbool("experimental", "fsmonitor.skipignore"):
if _hashignore(ignore) != ignorehash and clock != "c:0:0":
if ignorehash and _hashignore(ignore) != ignorehash and clock != "c:0:0":
# ignore list changed -- can't rely on Watchman state any more
if state.walk_on_invalidate:
return bail("ignore rules changed")
@ -327,6 +327,9 @@ def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
# standard dirstate implementation is in use.
dmap = dmap._map
nonnormalset = self._map.nonnormalset
self._ui.log(
"fsmonitor", "clock = %r len(nonnormal) = %d" % (clock, len(nonnormalset))
)
copymap = self._map.copymap
getkind = stat.S_IFMT
@ -497,10 +500,24 @@ def overridewalk(orig, self, match, subrepos, unknown, ignored, full=True):
results[f] = None
nf = iter(auditpass).next
droplist = []
droplistappend = droplist.append
for st in util.statfiles([join(f) for f in auditpass]):
f = nf()
if st or f in dmap:
results[f] = st
else:
# '?' (untracked) file was deleted from the filesystem - remove it
# from treestate.
#
# We can only update the dirstate (and treestate) while holding the
# wlock. That happens inside poststatus.__call__ -> state.set. So
# buffer what files to "drop" so state.set can clean them up.
entry = dmap.get(f, None)
if entry and entry[0] == "?":
droplistappend(f)
# The droplist needs to match setlastclock()
state.setdroplist(droplist)
for s in subrepos:
del results[s]
@ -606,21 +623,33 @@ def overridestatus(
stateunknown = listunknown
if updatestate:
# Invalidate fsmonitor.state if dirstate changes. This avoids the
# following issue:
# 1. pid 11 writes dirstate
# 2. pid 22 reads dirstate and inconsistent fsmonitor.state
# 3. pid 22 calculates a wrong state
# 4. pid 11 writes fsmonitor.state
# Because before 1,
# 0. pid 11 invalidates fsmonitor.state
# will happen.
psbefore = lambda *args, **kwds: self._fsmonitorstate.invalidate(
reason="dirstate_change"
)
self.addpostdsstatus(psbefore, afterdirstatewrite=False)
psafter = poststatus(startclock)
self.addpostdsstatus(psafter, afterdirstatewrite=True)
if "treestate" in self.requirements:
# No need to invalidate fsmonitor state.
# state.set needs to run before dirstate write, since it changes
# dirstate (treestate).
self.addpostdsstatus(poststatustreestate, afterdirstatewrite=False)
else:
# Invalidate fsmonitor.state if dirstate changes. This avoids the
# following issue:
# 1. pid 11 writes dirstate
# 2. pid 22 reads dirstate and inconsistent fsmonitor.state
# 3. pid 22 calculates a wrong state
# 4. pid 11 writes fsmonitor.state
# Because before 1,
# 0. pid 11 invalidates fsmonitor.state
# will happen.
#
# To avoid race conditions when reading without a lock, do things
# in this order:
# 1. Invalidate fsmonitor state
# 2. Write dirstate
# 3. Write fsmonitor state
psbefore = lambda *args, **kwds: self._fsmonitorstate.invalidate(
reason="dirstate_change"
)
self.addpostdsstatus(psbefore, afterdirstatewrite=False)
psafter = poststatus(startclock)
self.addpostdsstatus(psafter, afterdirstatewrite=True)
r = orig(node1, node2, match, listignored, listclean, stateunknown, listsubrepos)
modified, added, removed, deleted, unknown, ignored, clean = r
@ -656,6 +685,19 @@ def overridestatus(
return scmutil.status(modified, added, removed, deleted, unknown, ignored, clean)
def poststatustreestate(wctx, status):
clock = wctx.repo()._fsmonitorstate.getlastclock()
hashignore = None
notefiles = (
status.modified
+ status.added
+ status.removed
+ status.deleted
+ status.unknown
)
wctx.repo()._fsmonitorstate.set(clock, hashignore, notefiles)
class poststatus(object):
def __init__(self, startclock):
self._startclock = startclock

View File

@ -26,23 +26,31 @@ class state(object):
self._ui = repo.ui
self._rootdir = pathutil.normasprefix(repo.root)
self._lastclock = None
self._identity = util.filestat(None)
self.mode = self._ui.config("fsmonitor", "mode")
self.walk_on_invalidate = self._ui.configbool("fsmonitor", "walk_on_invalidate")
self.timeout = float(self._ui.config("fsmonitor", "timeout"))
self._repo = repo
self._usetreestate = "treestate" in repo.requirements
self._droplist = []
def get(self):
"""return clock, ignorehash, notefiles"""
if self._usetreestate:
clock = self._repo.dirstate.getclock()
# XXX: ignorehash is already broken, so return None
ignorehash = None
# note files are already included in nonnormalset, so they will be
# processed anyway, do not return a separate notefiles.
notefiles = []
return clock, ignorehash, notefiles
try:
file = self._vfs("fsmonitor.state", "rb")
except IOError as inst:
self._identity = util.filestat(None)
if inst.errno != errno.ENOENT:
raise
return None, None, None
self._identity = util.filestat.fromfp(file)
versionbytes = file.read(4)
if len(versionbytes) < 4:
self._ui.log(
@ -104,17 +112,38 @@ class state(object):
return clock, ignorehash, notefiles
def setdroplist(self, droplist):
"""set a list of files to be dropped from dirstate upon 'set'.
This is used to clean up deleted untracked files from treestate, which
tracks untracked files.
"""
self._droplist = droplist
def set(self, clock, ignorehash, notefiles):
if self._usetreestate:
ds = self._repo.dirstate
dmap = ds._map
changed = bool(self._droplist)
for path in self._droplist:
dmap.dropfile(path, None, real=True)
self._droplist = []
for path in notefiles:
changed |= ds.needcheck(path)
# Avoid updating dirstate frequently if nothing changed.
# But do update dirstate if the clock is reset to None, or is
# moving away from None.
if not clock or changed or not ds.getclock():
ds.setclock(clock)
return
if clock is None:
self.invalidate(reason="no_clock")
return
# Read the identity from the file on disk rather than from the open file
# pointer below, because the latter is actually a brand new file.
identity = util.filestat.frompath(self._vfs.join("fsmonitor.state"))
if identity != self._identity:
self._ui.debug("skip updating fsmonitor.state: identity mismatch\n")
return
# The code runs with a wlock taken, and dirstate has passed its
# identity check. So we can update both dirstate and fsmonitor state.
# See _poststatusfixup in context.py
try:
file = self._vfs("fsmonitor.state", "wb", atomictemp=True, checkambig=True)
@ -147,7 +176,6 @@ class state(object):
raise
if "fsmonitor_details" in getattr(self._ui, "track", ()):
self._ui.log("fsmonitor_details", "fsmonitor state invalidated")
self._identity = util.filestat(None)
def setlastclock(self, clock):
if "fsmonitor_details" in getattr(self._ui, "track", ()):
@ -157,4 +185,4 @@ class state(object):
def getlastclock(self):
if "fsmonitor_details" in getattr(self._ui, "track", ()):
self._ui.log("fsmonitor_details", "getlastclock: %r" % self._lastclock)
return self._lastclock
self._lastclock

View File

@ -1890,12 +1890,15 @@ class workingctx(committablectx):
normal = self._repo.dirstate.normal
for f in fixup:
normal(f)
# write changes out explicitly, because nesting
# wlock at runtime may prevent 'wlock.release()'
# after this block from doing so for subsequent
# changing files
tr = self._repo.currenttransaction()
self._repo.dirstate.write(tr)
# write changes out explicitly, because nesting
# wlock at runtime may prevent 'wlock.release()'
# after this block from doing so for subsequent
# changing files
#
# This is a no-op if dirstate is not dirty.
tr = self._repo.currenttransaction()
self._repo.dirstate.write(tr)
if poststatusafter:
for ps in poststatusafter:

View File

@ -427,6 +427,29 @@ class dirstate(object):
def copies(self):
return self._map.copymap
def needcheck(self, file):
"""Mark file as need-check"""
if not self._istreestate:
raise error.ProgrammingError("needcheck is only supported by treestate")
changed = self._map.needcheck(file)
self._dirty |= changed
return changed
def setclock(self, clock):
"""Set fsmonitor clock"""
if not self._istreestate:
raise error.ProgrammingError("setclock is only supported by treestate")
clock = clock or ""
if clock != self._map._clock:
self._map._clock = clock
self._dirty = True
def getclock(self):
"""Get fsmonitor clock"""
if not self._istreestate:
raise error.ProgrammingError("getclock is only supported by treestate")
return self._map._clock or None
def _addpath(self, f, state, mode, size, mtime):
oldstate = self[f]
if state == "a" or oldstate == "r":
@ -1101,6 +1124,10 @@ class dirstate(object):
).iteritems():
try:
t = dget(fn)
# This "?" state is only tracked by treestate, emulate the old
# behavior - KeyError.
if t[0] == "?":
raise KeyError
except KeyError:
if (listignored or mexact(fn)) and dirignore(fn):
if listignored:

View File

@ -546,7 +546,7 @@ py_class!(class treestate |py| {
}
def walk(&self, setbits: u16, unsetbits: u16) -> PyResult<Vec<PyBytes>> {
// Get all file paths with `setbits` set and `unsetbits` unset.
// Get all file paths with `setbits` all set, `unsetbits` all unset.
assert_eq!(setbits & unsetbits, 0, "setbits cannot overlap with unsetbits");
let setbits = StateFlags::from_bits_truncate(setbits);
let unsetbits = StateFlags::from_bits_truncate(unsetbits);
@ -569,6 +569,27 @@ py_class!(class treestate |py| {
Ok(result)
}
def tracked(&self) -> PyResult<Vec<PyBytes>> {
// Not ideal as a special case. But the returned list is large and it needs to be fast.
// It's basically walk(EXIST_P1, 0) + walk(EXIST_P2, 0) + walk(EXIST_NEXT).
let mut state = self.state(py).borrow_mut();
let mut result = Vec::new();
let mask = StateFlags::EXIST_P1 | StateFlags::EXIST_P2 | StateFlags::EXIST_NEXT;
convert_result(py, state.visit(
&mut |components, _state| {
let path = PyBytes::new(py, &components.concat());
result.push(path);
Ok(VisitorResult::NotChanged)
},
&|_, dir| match dir.get_aggregated_state() {
None => true,
Some(state) => state.union.intersects(mask),
},
&|_, file| file.state.intersects(mask),
))?;
Ok(result)
}
def getfiltered(
&self, path: PyBytes, filter: PyObject, filterid: u64
) -> PyResult<Option<PyBytes>> {

View File

@ -147,7 +147,9 @@ class treestatemap(object):
return result
def keys(self):
return self._tree.walk(0, 0)
# Exclude untracked files, since the matcher interface expects __iter__
# to not include untracked files.
return self._tree.tracked()
def preload(self):
pass
@ -181,8 +183,32 @@ class treestatemap(object):
size = 0
self._tree.insert(f, state, mode, size, mtime, copied)
def dropfile(self, f, oldstate):
return self._tree.remove(f)
def dropfile(self, f, oldstate, real=False):
if real or not self._clock:
# If watchman clock is not set, watchman is not used, drop
# untracked files directly. This is also correct if watchman
# clock is reset to empty, since the next query will do a full
# crawl.
return self._tree.remove(f)
else:
# If watchman is used, treestate tracks "untracked" files before
# watchman clock. So only remove EXIST_* bits from the file.
# fsmonitor will do a stat check and drop(real=True) later.
#
# Typically, dropfile is used in 2 cases:
# - "hg forget": mark the file as "untracked".
# - "hg update": remove files only tracked by old commit.
entry = self._tree.get(f, None)
if not entry:
return False
else:
state, mode, size, mtime, copied = entry
state ^= state & (
treestate.EXIST_NEXT | treestate.EXIST_P1 | treestate.EXIST_P2
)
state |= treestate.NEED_CHECK
self._tree.insert(f, state, mode, size, mtime, copied)
return True
def clearambiguoustimes(self, _files, now):
# TODO(quark): could _files be different from those with mtime = -1
@ -357,3 +383,26 @@ class treestatemap(object):
self._tree.insert(dest, state, mode, size, mtime, source)
else:
raise error.ProgrammingError("copy dest %r does not exist" % dest)
# treestate specific methods
def needcheck(self, path):
"""Mark a file as NEED_CHECK, so it will be included by 'nonnormalset'
Return True if the file was changed, False if it's already marked.
"""
existing = self._tree.get(path, None)
if not existing:
# The file was not in dirstate (untracked). Add it.
state = treestate.NEED_CHECK
mode = 0o666
size = -1
mtime = -1
copied = None
else:
state, mode, size, mtime, copied = existing
if treestate.NEED_CHECK & state:
return False
state |= treestate.NEED_CHECK
self._tree.insert(path, state, mode, size, mtime, copied)
return True

View File

@ -39,7 +39,8 @@
R removed
! deleted
? untracked
dirstate v2 (using treestate*, offset *, 4 files tracked) (glob)
dirstate v2 (using treestate*, offset *, 4 files tracked) (glob) (no-fsmonitor !)
dirstate v2 (using treestate*, offset *, 5 files tracked) (glob) (fsmonitor !)
==== Migrating dirstate v1 to v0 ====
M modified
R removed
@ -61,13 +62,15 @@
R removed
! deleted
? untracked
dirstate v2 (using treestate* offset *, 4 files tracked) (glob)
dirstate v2 (using treestate*, offset *, 4 files tracked) (glob) (no-fsmonitor !)
dirstate v2 (using treestate*, offset *, 5 files tracked) (glob) (fsmonitor !)
==== Migrating dirstate v2 to v0 ====
M modified
R removed
! deleted
? untracked
dirstate v2 (using treestate*, offset *, 4 files tracked) (glob)
dirstate v2 (using treestate*, offset *, 4 files tracked) (glob) (no-fsmonitor !)
dirstate v2 (using treestate*, offset *, 5 files tracked) (glob) (fsmonitor !)
M modified
R removed
! deleted
@ -78,7 +81,8 @@
R removed
! deleted
? untracked
dirstate v2 (using treestate*, offset *, 4 files tracked) (glob)
dirstate v2 (using treestate*, offset *, 4 files tracked) (glob) (no-fsmonitor !)
dirstate v2 (using treestate*, offset *, 5 files tracked) (glob) (fsmonitor !)
M modified
R removed
! deleted

View File

@ -174,34 +174,6 @@ treated differently in _checklookup() according to runtime platform.
$ rm b
#if fsmonitor
Create fsmonitor state.
$ hg status
$ f --type .hg/fsmonitor.state
.hg/fsmonitor.state: file
Test that invalidating fsmonitor state in the middle (which doesn't require the
wlock) causes the fsmonitor update to be skipped.
hg debugrebuilddirstate ensures that the dirstaterace hook will be called, but
it also invalidates the fsmonitor state. So back it up and restore it.
$ mv .hg/fsmonitor.state .hg/fsmonitor.state.tmp
$ hg debugrebuilddirstate
$ mv .hg/fsmonitor.state.tmp .hg/fsmonitor.state
$ cat > $TESTTMP/dirstaterace.sh <<EOF
> rm .hg/fsmonitor.state
> EOF
$ hg status --config extensions.dirstaterace=$TESTTMP/dirstaterace.py --debug
skip updating fsmonitor.state: identity mismatch
$ f .hg/fsmonitor.state
.hg/fsmonitor.state: file not found
#endif
Set up a rebase situation for issue5581.
$ echo c2 > a

View File

@ -57,7 +57,7 @@ Prepare test repo:
adding a
$ hg ci -m1
#if treestate-off
#if no-v2
Set mtime of a into the future:
$ touch -t 202101011200 a