vere: use fcntl advisory locks on pidfile (#697)

The PR replaces vere's current (buggy, insufficient) lockfile mechanism
with an advisory `fcntl()` write lock (`F_SETLK`, `F_WRLCK`) on the
first byte of vere's PID file (`$pier/.vere.lock`). It does not (yet?)
include the functionality to kill the lock-holder and take over the
pier, as I'm not sure that's desirable.

There have been many problems with vere's current lockfile
implementation. The biggest advantage of this approach is that the lock
is automatically released if the process is killed or the host system
crashes. The biggest question is: does it work reliably across docker
shared volumes, virtual machines, &c.
This commit is contained in:
Pyry Kovanen 2024-08-06 20:36:46 +03:00 committed by GitHub
commit 47d2fedefa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 115 additions and 65 deletions

View File

@ -833,86 +833,134 @@ _disk_lock(c3_c* pax_c)
return paf_c;
}
/* u3_disk_acquire(): acquire a lockfile, killing anything that holds it.
/* _disk_acquire(): acquire a lockfile, killing anything that holds it.
*/
static void
u3_disk_acquire(c3_c* pax_c)
static c3_i
_disk_acquire(c3_c* pax_c)
{
c3_c* paf_c = _disk_lock(pax_c);
c3_w pid_w;
FILE* loq_u;
c3_c* paf_c = _disk_lock(pax_c);
c3_y dat_y[12] = {0};
c3_w pid_w = 0;
c3_i fid_i, ret_i;
if ( NULL != (loq_u = c3_fopen(paf_c, "r")) ) {
if ( 1 != fscanf(loq_u, "%" SCNu32, &pid_w) ) {
u3l_log("lockfile %s is corrupt!", paf_c);
kill(getpid(), SIGTERM);
sleep(1); u3_assert(0);
}
else if (pid_w != getpid()) {
c3_w i_w;
int ret = kill(pid_w, SIGTERM);
if ( -1 == ret && errno == EPERM ) {
u3l_log("disk: permission denied when trying to kill process %d!", pid_w);
kill(getpid(), SIGTERM);
sleep(1); u3_assert(0);
}
if ( -1 != ret ) {
u3l_log("disk: stopping process %d, live in %s...",
pid_w, pax_c);
for ( i_w = 0; i_w < 16; i_w++ ) {
sleep(1);
if ( -1 == kill(pid_w, SIGTERM) ) {
break;
}
}
if ( 16 == i_w ) {
for ( i_w = 0; i_w < 16; i_w++ ) {
if ( -1 == kill(pid_w, SIGKILL) ) {
break;
}
sleep(1);
}
}
if ( 16 == i_w ) {
u3l_log("disk: process %d seems unkillable!", pid_w);
u3_assert(0);
}
u3l_log("disk: stopped old process %u", pid_w);
}
}
fclose(loq_u);
c3_unlink(paf_c);
if ( -1 == (fid_i = c3_open(paf_c, O_RDWR|O_CREAT, 0666)) ) {
fprintf(stderr, "disk: failed to open/create lock file\r\n");
goto fail;
}
if ( NULL == (loq_u = c3_fopen(paf_c, "w")) ) {
u3l_log("disk: unable to open %s: %s", paf_c, strerror(errno));
u3_assert(0);
}
fprintf(loq_u, "%u\n", getpid());
{
c3_i fid_i = fileno(loq_u);
c3_sync(fid_i);
c3_y len_y = 0;
c3_y* buf_y = dat_y;
do {
c3_zs ret_zs;
if ( -1 == (ret_zs = read(fid_i, buf_y, 1)) ) {
if ( EINTR == errno ) continue;
fprintf(stderr, "disk: failed to read lockfile: %s\r\n",
strerror(errno));
goto fail;
}
if ( !ret_zs ) break;
else if ( 1 != ret_zs ) {
fprintf(stderr, "disk: strange lockfile read %zd\r\n", ret_zs);
goto fail;
}
len_y++;
buf_y++;
}
while ( len_y < sizeof(dat_y) );
if ( len_y ) {
if ( (1 != sscanf((c3_c*)dat_y, "%" SCNu32 "%n", &pid_w, &ret_i))
|| (0 >= ret_i)
|| ('\n' != *(dat_y + ret_i)) )
{
fprintf(stderr, "disk: lockfile is corrupt\r\n");
}
}
}
{
struct flock lok_u;
memset((void *)&lok_u, 0, sizeof(lok_u));
lok_u.l_type = F_WRLCK;
lok_u.l_whence = SEEK_SET;
lok_u.l_start = 0;
lok_u.l_len = 1;
while ( (ret_i = fcntl(fid_i, F_SETLK, &lok_u))
&& (EINTR == (ret_i = errno)) );
if ( ret_i ) {
if ( pid_w ) {
fprintf(stderr, "pier: locked by PID %u\r\n", pid_w);
}
else {
fprintf(stderr, "pier: strange: locked by empty lockfile\r\n");
}
goto fail;
}
}
ret_i = snprintf((c3_c*)dat_y, sizeof(dat_y), "%u\n", getpid());
if ( 0 >= ret_i ) {
fprintf(stderr, "disk: failed to write lockfile\r\n");
goto fail;
}
{
c3_y len_y = (c3_y)ret_i;
c3_y* buf_y = dat_y;
do {
c3_zs ret_zs;
if ( (-1 == (ret_zs = write(fid_i, buf_y, len_y)))
&& (EINTR != errno) )
{
fprintf(stderr, "disk: lockfile write failed %s\r\n",
strerror(errno));
goto fail;
}
if ( 0 < ret_zs ) {
len_y -= ret_zs;
buf_y += ret_zs;
}
}
while ( len_y );
}
if ( -1 == c3_sync(fid_i) ) {
fprintf(stderr, "disk: failed to sync lockfile: %s\r\n",
strerror(errno));
goto fail;
}
fclose(loq_u);
c3_free(paf_c);
return fid_i;
fail:
kill(getpid(), SIGTERM);
sleep(1); u3_assert(0);
}
/* u3_disk_release(): release a lockfile.
/* _disk_release(): release a lockfile.
*/
static void
u3_disk_release(c3_c* pax_c)
_disk_release(c3_c* pax_c, c3_i fid_i)
{
c3_c* paf_c = _disk_lock(pax_c);
c3_unlink(paf_c);
c3_free(paf_c);
close(fid_i);
}
/* u3_disk_exit(): close the log.
@ -959,7 +1007,7 @@ u3_disk_exit(u3_disk* log_u)
}
}
u3_disk_release(log_u->dir_u->pax_c);
_disk_release(log_u->dir_u->pax_c, log_u->lok_i);
u3_dire_free(log_u->dir_u);
u3_dire_free(log_u->urb_u);
@ -1817,6 +1865,7 @@ u3_disk*
u3_disk_init(c3_c* pax_c, u3_disk_cb cb_u)
{
u3_disk* log_u = c3_calloc(sizeof(*log_u));
log_u->lok_i = -1;
log_u->liv_o = c3n;
log_u->ted_o = c3n;
log_u->cb_u = cb_u;
@ -1835,7 +1884,7 @@ u3_disk_init(c3_c* pax_c, u3_disk_cb cb_u)
// acquire lockfile.
//
u3_disk_acquire(pax_c);
log_u->lok_i = _disk_acquire(pax_c);
// create/load $pier/.urb
//

View File

@ -553,6 +553,7 @@
u3_dire* dir_u; // main pier directory
u3_dire* urb_u; // urbit system data
u3_dire* com_u; // log directory
c3_i lok_i; // lockfile
c3_o liv_o; // live
c3_w ver_w; // version (see version.h)
void* mdb_u; // lmdb env of current epoch