Kernel+LibPthread+LibC: Add a naive futex and use it for pthread_cond_t

This patch implements a simple version of the futex (fast userspace
mutex) API in the kernel and uses it to make the pthread_cond_t API's
block instead of busily sched_yield().

An arbitrary userspace address is passed to the kernel as a "token"
that identifies the futex and you can then FUTEX_WAIT and FUTEX_WAKE
that specific userspace address.

FUTEX_WAIT corresponds to pthread_cond_wait() and FUTEX_WAKE is used
for pthread_cond_signal() and pthread_cond_broadcast().

I'm pretty sure I'm missing something in this implementation, but it's
hopefully okay for a start. :^)
This commit is contained in:
Andreas Kling 2019-12-22 21:29:47 +01:00
parent 4b8b100b83
commit 4a8683ea68
Notes: sideshowbarker 2024-07-19 10:40:17 +09:00
9 changed files with 99 additions and 55 deletions

View File

@ -3807,3 +3807,47 @@ Thread& Process::any_thread()
ASSERT(found_thread);
return *found_thread;
}
WaitQueue& Process::futex_queue(i32* userspace_address)
{
auto& queue = m_futex_queues.ensure((u32)userspace_address);
if (!queue)
queue = make<WaitQueue>();
return *queue;
}
int Process::sys$futex(const Syscall::SC_futex_params* params)
{
if (!validate_read_typed(params))
return -EFAULT;
auto& [userspace_address, futex_op, value, timeout] = *params;
if (!validate_read_typed(userspace_address))
return -EFAULT;
if (timeout && !validate_read_typed(timeout))
return -EFAULT;
switch (futex_op) {
case FUTEX_WAIT:
if (*userspace_address != value)
return -EAGAIN;
// FIXME: This is supposed to be interruptible by a signal, but right now WaitQueue cannot be interrupted.
// FIXME: Support timeout!
current->wait_on(futex_queue(userspace_address));
break;
case FUTEX_WAKE:
if (value == 0)
return 0;
if (value == 1) {
futex_queue(userspace_address).wake_one();
} else {
// FIXME: Wake exactly (value) waiters.
futex_queue(userspace_address).wake_all();
}
break;
}
return 0;
}

View File

@ -233,6 +233,7 @@ public:
int sys$profiling_enable(pid_t);
int sys$profiling_disable(pid_t);
void* sys$get_kernel_info_page();
int sys$futex(const Syscall::SC_futex_params*);
static void initialize();
@ -391,6 +392,9 @@ private:
u64 m_alarm_deadline { 0 };
int m_icon_id { -1 };
WaitQueue& futex_queue(i32*);
HashMap<u32, OwnPtr<WaitQueue>> m_futex_queues;
};
class ProcessInspectionHandle {

View File

@ -150,7 +150,8 @@ typedef u32 socklen_t;
__ENUMERATE_SYSCALL(set_shared_buffer_volatile) \
__ENUMERATE_SYSCALL(profiling_enable) \
__ENUMERATE_SYSCALL(profiling_disable) \
__ENUMERATE_SYSCALL(get_kernel_info_page)
__ENUMERATE_SYSCALL(get_kernel_info_page) \
__ENUMERATE_SYSCALL(futex)
namespace Syscall {
@ -260,6 +261,13 @@ struct SC_setsockopt_params {
socklen_t value_size;
};
struct SC_futex_params {
i32* userspace_address;
int futex_op;
i32 val;
const timespec* timeout;
};
struct SC_create_thread_params {
unsigned int m_detach_state = 0; // JOINABLE or DETACHED
int m_schedule_priority = 2; // ThreadPriority::Normal

View File

@ -46,6 +46,9 @@
#define FD_CLOEXEC 1
#define FUTEX_WAIT 1
#define FUTEX_WAKE 2
/* c_cc characters */
#define VINTR 0
#define VQUIT 1

View File

@ -27,4 +27,12 @@ int profiling_disable(pid_t pid)
int rc = syscall(SC_profiling_disable, pid);
__RETURN_WITH_ERRNO(rc, rc, -1);
}
int futex(int32_t* userspace_address, int futex_op, int32_t value, const struct timespec* timeout)
{
Syscall::SC_futex_params params { userspace_address, futex_op, value, timeout };
int rc = syscall(SC_futex, &params);
__RETURN_WITH_ERRNO(rc, rc, -1);
}
}

View File

@ -44,4 +44,9 @@ int module_unload(const char* name, size_t name_length);
int profiling_enable(pid_t);
int profiling_disable(pid_t);
#define FUTEX_WAIT 1
#define FUTEX_WAKE 2
int futex(int32_t* userspace_address, int futex_op, int32_t value, const struct timespec* timeout);
__END_DECLS

View File

@ -77,7 +77,9 @@ typedef struct __pthread_mutexattr_t {
} pthread_mutexattr_t;
typedef struct __pthread_cond_t {
void* storage;
int32_t value;
uint32_t previous;
int clockid; // clockid_t
} pthread_cond_t;
typedef void* pthread_rwlock_t;

View File

@ -1,10 +1,10 @@
#include <AK/Assertions.h>
#include <AK/Atomic.h>
#include <AK/InlineLinkedList.h>
#include <AK/StdLibExtras.h>
#include <Kernel/Syscall.h>
#include <limits.h>
#include <pthread.h>
#include <serenity.h>
#include <signal.h>
#include <stdio.h>
#include <sys/mman.h>
@ -418,42 +418,27 @@ int pthread_setschedparam(pthread_t thread, int policy, const struct sched_param
return 0;
}
struct WaitNode : public InlineLinkedListNode<WaitNode> {
volatile bool waiting { true };
WaitNode* m_next { nullptr };
WaitNode* m_prev { nullptr };
};
struct ConditionVariable {
InlineLinkedList<WaitNode> waiters;
clockid_t clock { CLOCK_MONOTONIC };
};
int pthread_cond_init(pthread_cond_t* cond, const pthread_condattr_t* attr)
{
auto& condvar = *new ConditionVariable;
cond->storage = &condvar;
if (attr)
condvar.clock = attr->clockid;
cond->value = 0;
cond->previous = 0;
cond->clockid = attr ? attr->clockid : CLOCK_MONOTONIC;
return 0;
}
int pthread_cond_destroy(pthread_cond_t* cond)
int pthread_cond_destroy(pthread_cond_t*)
{
delete static_cast<ConditionVariable*>(cond->storage);
return 0;
}
int pthread_cond_wait(pthread_cond_t* cond, pthread_mutex_t* mutex)
{
WaitNode node;
auto& condvar = *(ConditionVariable*)cond->storage;
condvar.waiters.append(&node);
while (node.waiting) {
pthread_mutex_unlock(mutex);
sched_yield();
pthread_mutex_lock(mutex);
}
i32 value = cond->value;
cond->previous = value;
pthread_mutex_unlock(mutex);
int rc = futex(&cond->value, FUTEX_WAIT, value, nullptr);
ASSERT(rc == 0);
pthread_mutex_lock(mutex);
return 0;
}
@ -476,42 +461,27 @@ int pthread_condattr_setclock(pthread_condattr_t* attr, clockid_t clock)
int pthread_cond_timedwait(pthread_cond_t* cond, pthread_mutex_t* mutex, const struct timespec* abstime)
{
WaitNode node;
auto& condvar = *(ConditionVariable*)cond->storage;
condvar.waiters.append(&node);
while (node.waiting) {
struct timespec now;
if (clock_gettime(condvar.clock, &now) < 0) {
dbgprintf("pthread_cond_timedwait: clock_gettime() failed\n");
return errno;
}
if ((abstime->tv_sec < now.tv_sec) || (abstime->tv_sec == now.tv_sec && abstime->tv_nsec <= now.tv_nsec)) {
return ETIMEDOUT;
}
pthread_mutex_unlock(mutex);
sched_yield();
pthread_mutex_lock(mutex);
}
// FIXME: Implement timeout.
(void)abstime;
pthread_cond_wait(cond, mutex);
return 0;
}
int pthread_cond_signal(pthread_cond_t* cond)
{
auto& condvar = *(ConditionVariable*)cond->storage;
if (condvar.waiters.is_empty())
return 0;
auto* node = condvar.waiters.remove_head();
node->waiting = false;
u32 value = cond->previous + 1;
cond->value = value;
int rc = futex(&cond->value, FUTEX_WAKE, 1, nullptr);
ASSERT(rc == 0);
return 0;
}
int pthread_cond_broadcast(pthread_cond_t* cond)
{
auto& condvar = *(ConditionVariable*)cond->storage;
while (!condvar.waiters.is_empty()) {
auto* node = condvar.waiters.remove_head();
node->waiting = false;
}
u32 value = cond->previous + 1;
cond->value = value;
int rc = futex(&cond->value, FUTEX_WAKE, INT32_MAX, nullptr);
ASSERT(rc == 0);
return 0;
}

View File

@ -53,7 +53,7 @@ int pthread_setschedparam(pthread_t thread, int policy, const struct sched_param
#define PTHREAD_MUTEX_RECURSIVE 1
#define PTHREAD_MUTEX_DEFAULT PTHREAD_MUTEX_NORMAL
#define PTHREAD_MUTEX_INITIALIZER { 0, 0, 0, PTHREAD_MUTEX_DEFAULT }
#define PTHREAD_COND_INITIALIZER { NULL }
#define PTHREAD_COND_INITIALIZER { 0, 0, CLOCK_MONOTONIC }
int pthread_key_create(pthread_key_t* key, void (*destructor)(void*));
int pthread_key_delete(pthread_key_t key);