/* * Copyright (c) 2018-2021, Andreas Kling * Copyright (c) 2022, Idan Horowitz * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include #include #include namespace Kernel { static Singleton>>> s_global_futex_queues; void Process::clear_futex_queues_on_exec() { s_global_futex_queues->with([this](auto& queues) { auto const* address_space = this->address_space().with([](auto& space) { return space.ptr(); }); queues.remove_all_matching([address_space](auto& futex_key, auto& futex_queue) { if ((futex_key.raw.offset & futex_key_private_flag) == 0) return false; if (futex_key.private_.address_space != address_space) return false; bool did_wake_all; futex_queue->wake_all(did_wake_all); VERIFY(did_wake_all); // No one should be left behind... return true; }); }); } ErrorOr Process::get_futex_key(FlatPtr user_address, bool shared) { if (user_address & 0b11) // user_address points to a u32, so must be 4byte aligned return EINVAL; auto range = Memory::VirtualRange { VirtualAddress(user_address), sizeof(u32) }; if (!Kernel::Memory::is_user_range(range)) return EFAULT; if (!shared) { // If this is thread-shared, we can skip searching the matching region return GlobalFutexKey { .private_ = { .address_space = this->address_space().with([](auto& space) { return space.ptr(); }), .user_address = user_address | futex_key_private_flag, } }; } return address_space().with([&](auto& space) -> ErrorOr { auto* matching_region = space->find_region_containing(range); if (!matching_region) return EFAULT; // The user wants to share this futex, but if the address doesn't point to a shared resource, there's not // much sharing to be done, so let's mark this as private if (!matching_region->is_shared()) { return GlobalFutexKey { .private_ = { .address_space = space.ptr(), .user_address = user_address | futex_key_private_flag, } }; } // This address is backed by a shared VMObject, if it's an AnonymousVMObject, it can be shared between processes // via forking, and shared regions that are cloned during a fork retain their original AnonymousVMObject. // On the other hand, if it's a SharedInodeVMObject, it can be shared by two processes mapping the same file as // MAP_SHARED, but since they are deduplicated based on the inode, in all cases the VMObject pointer should be // a unique global identifier. // NOTE: This assumes that a program will not unmap the only region keeping the vmobject alive while waiting on it, // if it does, it will get stuck waiting forever until interrupted by a signal, but since that use case is defined as // a programmer error, we are fine with it. auto const& vmobject = matching_region->vmobject(); if (vmobject.is_inode()) VERIFY(vmobject.is_shared_inode()); return GlobalFutexKey { .shared = { .vmobject = &vmobject, .offset = matching_region->offset_in_vmobject_from_vaddr(range.base()) } }; }); } ErrorOr Process::sys$futex(Userspace user_params) { VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this); auto params = TRY(copy_typed_from_user(user_params)); Thread::BlockTimeout timeout; u32 cmd = params.futex_op & FUTEX_CMD_MASK; bool use_realtime_clock = (params.futex_op & FUTEX_CLOCK_REALTIME) != 0; if (use_realtime_clock && cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET) { return ENOSYS; } bool shared = (params.futex_op & FUTEX_PRIVATE_FLAG) == 0; switch (cmd) { case FUTEX_WAIT: case FUTEX_WAIT_BITSET: case FUTEX_REQUEUE: case FUTEX_CMP_REQUEUE: { if (params.timeout) { auto timeout_time = TRY(copy_time_from_user(params.timeout)); bool is_absolute = cmd != FUTEX_WAIT; clockid_t clock_id = use_realtime_clock ? CLOCK_REALTIME_COARSE : CLOCK_MONOTONIC_COARSE; timeout = Thread::BlockTimeout(is_absolute, &timeout_time, nullptr, clock_id); } if (cmd == FUTEX_WAIT_BITSET && params.val3 == FUTEX_BITSET_MATCH_ANY) cmd = FUTEX_WAIT; break; case FUTEX_WAKE_BITSET: if (params.val3 == FUTEX_BITSET_MATCH_ANY) cmd = FUTEX_WAKE; break; } } auto find_futex_queue = [&](GlobalFutexKey futex_key, bool create_if_not_found, bool* did_create = nullptr) -> ErrorOr> { VERIFY(!create_if_not_found || did_create != nullptr); return s_global_futex_queues->with([&](auto& queues) -> ErrorOr> { auto it = queues.find(futex_key); if (it != queues.end()) return it->value; if (!create_if_not_found) return nullptr; *did_create = true; auto futex_queue = TRY(adopt_nonnull_lock_ref_or_enomem(new (nothrow) FutexQueue)); auto result = TRY(queues.try_set(futex_key, futex_queue)); VERIFY(result == AK::HashSetResult::InsertedNewEntry); return futex_queue; }); }; auto remove_futex_queue = [&](GlobalFutexKey futex_key) { return s_global_futex_queues->with([&](auto& queues) { auto it = queues.find(futex_key); if (it == queues.end()) return; if (it->value->try_remove()) queues.remove(it); }); }; auto do_wake = [&](FlatPtr user_address, u32 count, Optional const& bitmask) -> ErrorOr { if (count == 0) return 0; auto futex_key = TRY(get_futex_key(user_address, shared)); auto futex_queue = TRY(find_futex_queue(futex_key, false)); if (!futex_queue) return 0; bool is_empty; u32 woke_count = futex_queue->wake_n(count, bitmask, is_empty); if (is_empty) { // If there are no more waiters, we want to get rid of the futex! remove_futex_queue(futex_key); } return (int)woke_count; }; auto user_address = FlatPtr(params.userspace_address); auto user_address2 = FlatPtr(params.userspace_address2); auto do_wait = [&](u32 bitset) -> ErrorOr { bool did_create; LockRefPtr futex_queue; auto futex_key = TRY(get_futex_key(user_address, shared)); do { auto user_value = user_atomic_load_relaxed(params.userspace_address); if (!user_value.has_value()) return EFAULT; if (user_value.value() != params.val) { dbgln_if(FUTEX_DEBUG, "futex wait: EAGAIN. user value: {:p} @ {:p} != val: {}", user_value.value(), params.userspace_address, params.val); return EAGAIN; } atomic_thread_fence(AK::MemoryOrder::memory_order_acquire); did_create = false; futex_queue = TRY(find_futex_queue(futex_key, true, &did_create)); VERIFY(futex_queue); // We need to try again if we didn't create this queue and the existing queue // was removed before we were able to queue an imminent wait. } while (!did_create && !futex_queue->queue_imminent_wait()); // We must not hold the lock before blocking. But we have a reference // to the FutexQueue so that we can keep it alive. Thread::BlockResult block_result = futex_queue->wait_on(timeout, bitset); if (futex_queue->is_empty_and_no_imminent_waits()) { // If there are no more waiters, we want to get rid of the futex! remove_futex_queue(futex_key); } if (block_result == Thread::BlockResult::InterruptedByTimeout) { return ETIMEDOUT; } return 0; }; auto do_requeue = [&](Optional val3) -> ErrorOr { auto user_value = user_atomic_load_relaxed(params.userspace_address); if (!user_value.has_value()) return EFAULT; if (val3.has_value() && val3.value() != user_value.value()) return EAGAIN; atomic_thread_fence(AK::MemoryOrder::memory_order_acquire); auto futex_key = TRY(get_futex_key(user_address, shared)); auto futex_queue = TRY(find_futex_queue(futex_key, false)); if (!futex_queue) return 0; LockRefPtr target_futex_queue; bool is_empty = false; bool is_target_empty = false; auto futex_key2 = TRY(get_futex_key(user_address2, shared)); auto woken_or_requeued = TRY(futex_queue->wake_n_requeue( params.val, [&]() -> ErrorOr { // NOTE: futex_queue's lock is being held while this callback is called // The reason we're doing this in a callback is that we don't want to always // create a target queue, only if we actually have anything to move to it! target_futex_queue = TRY(find_futex_queue(futex_key2, true)); return target_futex_queue.ptr(); }, params.val2, is_empty, is_target_empty)); if (is_empty) remove_futex_queue(futex_key); if (is_target_empty && target_futex_queue) remove_futex_queue(futex_key2); return woken_or_requeued; }; switch (cmd) { case FUTEX_WAIT: return do_wait(0); case FUTEX_WAKE: return TRY(do_wake(user_address, params.val, {})); case FUTEX_WAKE_OP: { Optional oldval; u32 op_arg = _FUTEX_OP_ARG(params.val3); auto op = _FUTEX_OP(params.val3); if (op & FUTEX_OP_ARG_SHIFT) { op_arg = 1 << op_arg; op &= FUTEX_OP_ARG_SHIFT; } atomic_thread_fence(AK::MemoryOrder::memory_order_release); switch (op) { case FUTEX_OP_SET: oldval = user_atomic_exchange_relaxed(params.userspace_address2, op_arg); break; case FUTEX_OP_ADD: oldval = user_atomic_fetch_add_relaxed(params.userspace_address2, op_arg); break; case FUTEX_OP_OR: oldval = user_atomic_fetch_or_relaxed(params.userspace_address2, op_arg); break; case FUTEX_OP_ANDN: oldval = user_atomic_fetch_and_not_relaxed(params.userspace_address2, op_arg); break; case FUTEX_OP_XOR: oldval = user_atomic_fetch_xor_relaxed(params.userspace_address2, op_arg); break; default: return EINVAL; } if (!oldval.has_value()) return EFAULT; atomic_thread_fence(AK::MemoryOrder::memory_order_acquire); auto result = TRY(do_wake(user_address, params.val, {})); if (params.val2 > 0) { bool compare_result; switch (_FUTEX_CMP(params.val3)) { case FUTEX_OP_CMP_EQ: compare_result = (oldval.value() == _FUTEX_CMP_ARG(params.val3)); break; case FUTEX_OP_CMP_NE: compare_result = (oldval.value() != _FUTEX_CMP_ARG(params.val3)); break; case FUTEX_OP_CMP_LT: compare_result = (oldval.value() < _FUTEX_CMP_ARG(params.val3)); break; case FUTEX_OP_CMP_LE: compare_result = (oldval.value() <= _FUTEX_CMP_ARG(params.val3)); break; case FUTEX_OP_CMP_GT: compare_result = (oldval.value() > _FUTEX_CMP_ARG(params.val3)); break; case FUTEX_OP_CMP_GE: compare_result = (oldval.value() >= _FUTEX_CMP_ARG(params.val3)); break; default: return EINVAL; } if (compare_result) result += TRY(do_wake(user_address2, params.val2, {})); } return result; } case FUTEX_REQUEUE: return do_requeue({}); case FUTEX_CMP_REQUEUE: return do_requeue(params.val3); case FUTEX_WAIT_BITSET: VERIFY(params.val3 != FUTEX_BITSET_MATCH_ANY); // we should have turned it into FUTEX_WAIT if (params.val3 == 0) return EINVAL; return do_wait(params.val3); case FUTEX_WAKE_BITSET: VERIFY(params.val3 != FUTEX_BITSET_MATCH_ANY); // we should have turned it into FUTEX_WAKE if (params.val3 == 0) return EINVAL; return TRY(do_wake(user_address, params.val, params.val3)); } return ENOSYS; } }