mirror of
https://github.com/facebook/sapling.git
synced 2024-10-09 16:31:02 +03:00
locks: add logging for invalid lock order taking
Summary: We've been seeing some issues where non-write processes (like clone) are taking the wlock/lock without taking the sqllock. This causes processes which do want the sqllock (like pushes) to hang waiting. Streaming clones are one known case of this, but this extra logging will help us catch other cases. Once we're confident that nothing takes the wrong lock ordering, we can change this to throw an exception instead of logging the error. This required a minor refactor the synclimit code, so we can detect if we're in a sync lock (which is a case where it's ok to take the wlock/lock without taking the sqlock). And it configures the tests to use streaming clones when possible, which trigger the current known bug. Test Plan: Added a test Reviewers: #mercurial, quark Reviewed By: quark Subscribers: quark Differential Revision: https://phabricator.intern.facebook.com/D4287384 Signature: t1:4287384:1481065929:dfac78e3537c3cdf44c810bc99c5f07ed5b12f4c
This commit is contained in:
parent
593af04245
commit
a6491a0b9c
37
hgsql.py
37
hgsql.py
@ -473,6 +473,35 @@ def wraprepo(repo):
|
||||
callback()
|
||||
self.sqlpostrelease = []
|
||||
|
||||
def lock(self, *args, **kwargs):
|
||||
wl = self._wlockref and self._wlockref()
|
||||
if (not self._synclock and
|
||||
not (wl is not None and wl.held) and
|
||||
not self.hassqllock(writelock, checkserver=False)):
|
||||
self._recordbadlockorder()
|
||||
return super(sqllocalrepo, self).lock(*args, **kwargs)
|
||||
|
||||
def wlock(self, *args, **kwargs):
|
||||
if (not self._synclock and
|
||||
not self.hassqllock(writelock, checkserver=False)):
|
||||
self._recordbadlockorder()
|
||||
return super(sqllocalrepo, self).wlock(*args, **kwargs)
|
||||
|
||||
def _recordbadlockorder(self):
|
||||
self.ui.debug("hgsql: invalid lock order\n")
|
||||
|
||||
outputdir = self.ui.config('hgsql', 'profileoutput')
|
||||
if outputdir:
|
||||
import random, traceback
|
||||
pid = os.getpid()
|
||||
rand = random.random()
|
||||
timestamp = time.time()
|
||||
path = os.path.join(outputdir, 'hgsql-lockorder-%s-%s-%s' %
|
||||
(timestamp, pid, rand))
|
||||
with open(path, 'a+') as f:
|
||||
stack = traceback.format_stack()
|
||||
f.write(''.join(stack))
|
||||
|
||||
def transaction(self, *args, **kwargs):
|
||||
tr = super(sqllocalrepo, self).transaction(*args, **kwargs)
|
||||
if tr.count > 1:
|
||||
@ -528,9 +557,12 @@ def wraprepo(repo):
|
||||
is returned, it means the limiter was not acquired, and readonly
|
||||
clients should not attempt to perform a sync."""
|
||||
try:
|
||||
def release():
|
||||
self._synclock = None
|
||||
wait = False
|
||||
return self._lock(self.svfs, "synclimiter", wait, None,
|
||||
None, _('repository %s') % self.origroot)
|
||||
self._synclock = self._lock(self.svfs, "synclimiter", wait, release,
|
||||
None, _('repository %s') % self.origroot)
|
||||
return self._synclock
|
||||
except error.LockHeld:
|
||||
return None
|
||||
|
||||
@ -1050,6 +1082,7 @@ def wraprepo(repo):
|
||||
repo.pendingrevs = []
|
||||
repo.heldlocks = set()
|
||||
repo.sqlpostrelease = []
|
||||
repo._synclock = None
|
||||
|
||||
repo.__class__ = sqllocalrepo
|
||||
|
||||
|
@ -58,6 +58,10 @@ password = $DBPASS
|
||||
port = $DBPORT
|
||||
reponame = $2
|
||||
|
||||
[server]
|
||||
preferuncompressed=True
|
||||
uncompressed=True
|
||||
|
||||
[ui]
|
||||
ssh=python "$TESTDIR/dummyssh"
|
||||
EOF
|
||||
|
@ -9,6 +9,10 @@
|
||||
$ cd ..
|
||||
|
||||
$ initserver master masterrepo
|
||||
$ cat >> master/.hg/hgrc <<EOF
|
||||
> [hgsql]
|
||||
> profileoutput=$TESTTMP/
|
||||
> EOF
|
||||
$ hg -R master log
|
||||
$ hg -R master pull -q client
|
||||
|
||||
@ -45,6 +49,8 @@
|
||||
$ printf "[hooks]\npre-changegroup.sleep = sleep 2\n" >> master/.hg/hgrc
|
||||
$ initclient client2
|
||||
$ hg pull -q -R client2 ssh://user@dummy/master
|
||||
$ ls -l $TESTTMP/hgsql-lockorder*
|
||||
* $TESTTMP/hgsql-lockorder-* (glob)
|
||||
|
||||
$ cd client
|
||||
$ hg up -q 1
|
||||
@ -70,3 +76,18 @@
|
||||
|
||||
$ sed -i '/hooks/d' master/.hg/hgrc
|
||||
$ sed -i '/sleep/d' master/.hg/hgrc
|
||||
|
||||
# Verify incorrect lock order taking is logged.
|
||||
# We know that streaming clones currently hit this code path, and the hg pull in
|
||||
# the above test used that code path.
|
||||
$ cat $TESTTMP/hgsql-lockorder* | tail -n 10
|
||||
File *, line *, in getstream (glob)
|
||||
for chunk in it:
|
||||
File *, line *, in generatev1wireproto (glob)
|
||||
filecount, bytecount, it = generatev1(repo)
|
||||
File *, line *, in generatev1 (glob)
|
||||
with repo.lock():
|
||||
File *, line *, in lock (glob)
|
||||
self._recordbadlockorder()
|
||||
File *, line *, in _recordbadlockorder (glob)
|
||||
stack = traceback.format_stack()
|
||||
|
Loading…
Reference in New Issue
Block a user