Kernel: Refactor scheduler to use dynamic thread priorities

Threads now have numeric priorities with a base priority in the 1-99
range.

Whenever a runnable thread is *not* scheduled, its effective priority
is incremented by 1. This is tracked in Thread::m_extra_priority.
The effective priority of a thread is m_priority + m_extra_priority.

When a runnable thread *is* scheduled, its m_extra_priority is reset to
zero and the effective priority returns to base.

This means that lower-priority threads will always eventually get
scheduled to run, once its effective priority becomes high enough to
exceed the base priority of threads "above" it.

The previous values for ThreadPriority (Low, Normal and High) are now
replaced as follows:

    Low -> 10
    Normal -> 30
    High -> 50

In other words, it will take 20 ticks for a "Low" priority thread to
get to "Normal" effective priority, and another 20 to reach "High".

This is not perfect, and I've used some quite naive data structures,
but I think the mechanism will allow us to build various new and
interesting optimizations, and we can figure out better data structures
later on. :^)
This commit is contained in:
Andreas Kling 2019-12-30 18:46:17 +01:00
parent 816d3e6208
commit 50677bf806
Notes: sideshowbarker 2024-07-19 10:31:54 +09:00
16 changed files with 109 additions and 149 deletions

View File

@ -55,6 +55,8 @@ String ProcessModel::column_name(int column) const
return "User";
case Column::Priority:
return "Pr";
case Column::EffectivePriority:
return "EPr";
case Column::Virtual:
return "Virtual";
case Column::Physical:
@ -108,7 +110,9 @@ GModel::ColumnMetadata ProcessModel::column_metadata(int column) const
case Column::State:
return { 75, TextAlignment::CenterLeft };
case Column::Priority:
return { 16, TextAlignment::CenterLeft };
return { 16, TextAlignment::CenterRight };
case Column::EffectivePriority:
return { 16, TextAlignment::CenterRight };
case Column::User:
return { 50, TextAlignment::CenterLeft };
case Column::Virtual:
@ -177,16 +181,9 @@ GVariant ProcessModel::data(const GModelIndex& index, Role role) const
case Column::User:
return thread.current_state.user;
case Column::Priority:
if (thread.current_state.priority == "Idle")
return 0;
if (thread.current_state.priority == "Low")
return 1;
if (thread.current_state.priority == "Normal")
return 2;
if (thread.current_state.priority == "High")
return 3;
ASSERT_NOT_REACHED();
return 3;
return thread.current_state.priority;
case Column::EffectivePriority:
return thread.current_state.effective_priority;
case Column::Virtual:
return (int)thread.current_state.amount_virtual;
case Column::Physical:
@ -249,15 +246,9 @@ GVariant ProcessModel::data(const GModelIndex& index, Role role) const
case Column::User:
return thread.current_state.user;
case Column::Priority:
if (thread.current_state.priority == "Idle")
return String::empty();
if (thread.current_state.priority == "High")
return *m_high_priority_icon;
if (thread.current_state.priority == "Low")
return *m_low_priority_icon;
if (thread.current_state.priority == "Normal")
return *m_normal_priority_icon;
return thread.current_state.priority;
case Column::EffectivePriority:
return thread.current_state.effective_priority;
case Column::Virtual:
return pretty_byte_size(thread.current_state.amount_virtual);
case Column::Physical:
@ -338,6 +329,7 @@ void ProcessModel::update()
state.tid = thread.tid;
state.times_scheduled = thread.times_scheduled;
state.priority = thread.priority;
state.effective_priority = thread.effective_priority;
state.state = thread.state;
sum_times_scheduled += thread.times_scheduled;
{

View File

@ -25,6 +25,7 @@ public:
CPU,
State,
Priority,
EffectivePriority,
User,
PID,
TID,
@ -71,7 +72,8 @@ private:
String name;
String state;
String user;
String priority;
u32 priority;
u32 effective_priority;
size_t amount_virtual;
size_t amount_resident;
size_t amount_dirty_private;

View File

@ -767,7 +767,8 @@ Optional<KBuffer> procfs$all(InodeIdentifier)
thread_object.add("times_scheduled", thread.times_scheduled());
thread_object.add("ticks", thread.ticks());
thread_object.add("state", thread.state_string());
thread_object.add("priority", to_string(thread.priority()));
thread_object.add("priority", thread.priority());
thread_object.add("effective_priority", thread.effective_priority());
thread_object.add("syscall_count", thread.syscall_count());
thread_object.add("inode_faults", thread.inode_faults());
thread_object.add("zero_faults", thread.zero_faults());

View File

@ -2831,10 +2831,10 @@ int Process::sys$sched_setparam(pid_t pid, const struct sched_param* param)
if (!is_superuser() && m_euid != peer->m_uid && m_uid != peer->m_uid)
return -EPERM;
if (param->sched_priority < (int)ThreadPriority::First || param->sched_priority > (int)ThreadPriority::Last)
if (param->sched_priority < THREAD_PRIORITY_MIN || param->sched_priority > THREAD_PRIORITY_MAX)
return -EINVAL;
peer->any_thread().set_priority((ThreadPriority)param->sched_priority);
peer->any_thread().set_priority((u32)param->sched_priority);
return 0;
}
@ -3078,13 +3078,10 @@ int Process::sys$create_thread(void* (*entry)(void*), void* argument, const Sysc
// FIXME: return EAGAIN if Thread::all_threads().size() is greater than PTHREAD_THREADS_MAX
ThreadPriority requested_thread_priority = static_cast<ThreadPriority>(params->m_schedule_priority);
if (requested_thread_priority < ThreadPriority::First || requested_thread_priority > ThreadPriority::Last)
int requested_thread_priority = params->m_schedule_priority;
if (requested_thread_priority < THREAD_PRIORITY_MIN || requested_thread_priority > THREAD_PRIORITY_MAX)
return -EINVAL;
if (requested_thread_priority != ThreadPriority::Normal && !is_superuser())
return -EPERM;
bool is_thread_joinable = (0 == params->m_detach_state);
// FIXME: Do something with guard pages?

View File

@ -432,8 +432,6 @@ private:
Process& m_process;
};
const char* to_string(ThreadPriority);
extern InlineLinkedList<Process>* g_processes;
template<typename Callback>

View File

@ -1,3 +1,4 @@
#include <AK/QuickSort.h>
#include <AK/TemporaryChange.h>
#include <Kernel/Arch/i386/PIT.h>
#include <Kernel/FileSystem/FileDescription.h>
@ -21,7 +22,7 @@ void Scheduler::init_thread(Thread& thread)
void Scheduler::update_state_for_thread(Thread& thread)
{
ASSERT_INTERRUPTS_DISABLED();
auto& list = g_scheduler_data->thread_list_for_state_and_priority(thread.state(), thread.priority());
auto& list = g_scheduler_data->thread_list_for_state(thread.state());
if (list.contains(thread))
return;
@ -29,35 +30,12 @@ void Scheduler::update_state_for_thread(Thread& thread)
list.append(thread);
}
template<typename Callback>
static inline IterationDecision for_each_runnable_with_priority(ThreadPriority priority, Callback callback)
{
ASSERT_INTERRUPTS_DISABLED();
auto& tl = g_scheduler_data->m_runnable_threads[(u8)priority - (u8)ThreadPriority::First];
for (auto it = tl.begin(); it != tl.end();) {
auto& thread = *it;
it = ++it;
if (callback(thread) == IterationDecision::Break)
return IterationDecision::Break;
}
return IterationDecision::Continue;
}
static u32 time_slice_for(ThreadPriority priority)
static u32 time_slice_for(const Thread& thread)
{
// One time slice unit == 1ms
switch (priority) {
case ThreadPriority::High:
return 20;
case ThreadPriority::Normal:
return 15;
case ThreadPriority::Low:
return 5;
case ThreadPriority::Idle:
if (&thread == g_colonel)
return 1;
}
ASSERT_NOT_REACHED();
return 10;
}
Thread* current;
@ -364,46 +342,54 @@ bool Scheduler::pick_next()
#ifdef SCHEDULER_RUNNABLE_DEBUG
dbgprintf("Non-runnables:\n");
Scheduler::for_each_nonrunnable([](Thread& thread) -> IterationDecision {
auto& process = thread.process();
dbgprintf("[K%x] %-12s %s(%u:%u) @ %w:%x\n", &process, thread.state_string(), process.name().characters(), process.pid(), thread.tid(), thread.tss().cs, thread.tss().eip);
dbgprintf(" %-12s %s(%u:%u) @ %w:%x\n", thread.state_string(), thread.name().characters(), thread.pid(), thread.tid(), thread.tss().cs, thread.tss().eip);
return IterationDecision::Continue;
});
for (u8 priority = (u8)ThreadPriority::Last; priority >= (u8)ThreadPriority::First; --priority) {
dbgprintf("Runnables (%s):\n", to_string((ThreadPriority)priority));
for_each_runnable_with_priority((ThreadPriority)priority, [](Thread& thread) -> IterationDecision {
auto& process = thread.process();
dbgprintf("[K%x] %-12s %s(%u:%u) @ %w:%x\n", &process, thread.state_string(), process.name().characters(), process.pid(), thread.tid(), thread.tss().cs, thread.tss().eip);
return IterationDecision::Continue;
});
}
dbgprintf("Runnables:\n");
Scheduler::for_each_runnable([](Thread& thread) -> IterationDecision {
dbgprintf(" %3u/%2u %-12s %s(%u:%u) @ %w:%x\n", thread.effective_priority(), thread.priority(), thread.state_string(), thread.name().characters(), thread.pid(), thread.tid(), thread.tss().cs, thread.tss().eip);
return IterationDecision::Continue;
});
#endif
for (u8 priority = (u8)ThreadPriority::Last; priority >= (u8)ThreadPriority::First; --priority) {
auto& runnable_list = g_scheduler_data->m_runnable_threads[priority - (u8)ThreadPriority::First];
if (runnable_list.is_empty())
Vector<Thread*, 128> sorted_runnables;
for_each_runnable([&sorted_runnables](auto& thread) {
sorted_runnables.append(&thread);
return IterationDecision::Continue;
});
quick_sort(sorted_runnables.begin(), sorted_runnables.end(), [](auto& a, auto& b) { return a->effective_priority() >= b->effective_priority(); });
Thread* thread_to_schedule = nullptr;
for (auto* thread : sorted_runnables) {
if (thread->process().is_being_inspected())
continue;
auto* previous_head = runnable_list.first();
for (;;) {
// Move head to tail.
runnable_list.append(*runnable_list.first());
auto* thread = runnable_list.first();
ASSERT(thread->state() == Thread::Runnable || thread->state() == Thread::Running);
if (!thread->process().is_being_inspected() && (thread->state() == Thread::Runnable || thread->state() == Thread::Running)) {
#ifdef SCHEDULER_DEBUG
dbgprintf("switch to %s(%u:%u) @ %w:%x\n", thread->process().name().characters(), thread->process().pid(), thread->tid(), thread->tss().cs, thread->tss().eip);
#endif
return context_switch(*thread);
}
if (thread == previous_head)
break;
if (!thread_to_schedule) {
thread->m_extra_priority = 0;
thread_to_schedule = thread;
} else {
thread->m_extra_priority++;
}
}
// Nothing wants to run. Send in the colonel!
return context_switch(*g_colonel);
if (!thread_to_schedule)
thread_to_schedule = g_colonel;
#ifdef SCHEDULER_DEBUG
dbgprintf("switch to %s(%u:%u) @ %w:%x\n",
thread_to_schedule->name().characters(),
thread_to_schedule->pid(),
thread_to_schedule->tid(),
thread_to_schedule->tss().cs,
thread_to_schedule->tss().eip);
#endif
return context_switch(*thread_to_schedule);
}
bool Scheduler::donate_to(Thread* beneficiary, const char* reason)
@ -417,7 +403,7 @@ bool Scheduler::donate_to(Thread* beneficiary, const char* reason)
if (!beneficiary || beneficiary->state() != Thread::Runnable || ticks_left <= 1)
return yield();
unsigned ticks_to_donate = min(ticks_left - 1, time_slice_for(beneficiary->priority()));
unsigned ticks_to_donate = min(ticks_left - 1, time_slice_for(*beneficiary));
#ifdef SCHEDULER_DEBUG
dbgprintf("%s(%u:%u) donating %u ticks to %s(%u:%u), reason=%s\n", current->process().name().characters(), current->pid(), current->tid(), ticks_to_donate, beneficiary->process().name().characters(), beneficiary->pid(), beneficiary->tid(), reason);
#endif
@ -459,7 +445,7 @@ void Scheduler::switch_now()
bool Scheduler::context_switch(Thread& thread)
{
thread.set_ticks_left(time_slice_for(thread.priority()));
thread.set_ticks_left(time_slice_for(thread));
thread.did_schedule();
if (current == &thread)
@ -472,10 +458,10 @@ bool Scheduler::context_switch(Thread& thread)
current->set_state(Thread::Runnable);
#ifdef LOG_EVERY_CONTEXT_SWITCH
dbgprintf("Scheduler: %s(%u:%u) -> %s(%u:%u) [%s] %w:%x\n",
dbgprintf("Scheduler: %s(%u:%u) -> %s(%u:%u) [%u] %w:%x\n",
current->process().name().characters(), current->process().pid(), current->tid(),
thread.process().name().characters(), thread.process().pid(), thread.tid(),
to_string(thread.priority()),
thread.priority(),
thread.tss().cs, thread.tss().eip);
#endif
}
@ -552,8 +538,7 @@ void Scheduler::initialize()
s_redirection.selector = gdt_alloc_entry();
initialize_redirection();
s_colonel_process = Process::create_kernel_process(g_colonel, "colonel", nullptr);
// Make sure the colonel uses a smallish time slice.
g_colonel->set_priority(ThreadPriority::Idle);
g_colonel->set_priority(THREAD_PRIORITY_MIN);
load_task_register(s_redirection.selector);
}

View File

@ -270,7 +270,7 @@ struct SC_futex_params {
struct SC_create_thread_params {
unsigned int m_detach_state = 0; // JOINABLE or DETACHED
int m_schedule_priority = 2; // ThreadPriority::Normal
int m_schedule_priority = 30; // THREAD_PRIORITY_NORMAL
// FIXME: Implment guard pages in create_thread (unreadable pages at "overflow" end of stack)
// "If an implementation rounds up the value of guardsize to a multiple of {PAGESIZE},
// a call to pthread_attr_getguardsize() specifying attr shall store in the guardsize

View File

@ -765,23 +765,6 @@ const LogStream& operator<<(const LogStream& stream, const Thread& value)
return stream << value.process().name() << "(" << value.pid() << ":" << value.tid() << ")";
}
const char* to_string(ThreadPriority priority)
{
switch (priority) {
case ThreadPriority::Idle:
return "Idle";
case ThreadPriority::Low:
return "Low";
case ThreadPriority::Normal:
return "Normal";
case ThreadPriority::High:
return "High";
}
dbg() << "to_string(ThreadPriority): Invalid priority: " << (u32)priority;
ASSERT_NOT_REACHED();
return nullptr;
}
void Thread::wait_on(WaitQueue& queue, Thread* beneficiary, const char* reason)
{
bool did_unlock = unlock_process_if_locked();

View File

@ -35,14 +35,11 @@ struct ThreadSpecificData {
ThreadSpecificData* self;
};
enum class ThreadPriority : u8 {
Idle,
Low,
Normal,
High,
First = Low,
Last = High,
};
#define THREAD_PRIORITY_MIN 1
#define THREAD_PRIORITY_LOW 10
#define THREAD_PRIORITY_NORMAL 30
#define THREAD_PRIORITY_HIGH 50
#define THREAD_PRIORITY_MAX 99
class Thread {
friend class Process;
@ -61,8 +58,10 @@ public:
int tid() const { return m_tid; }
int pid() const;
void set_priority(ThreadPriority p) { m_priority = p; }
ThreadPriority priority() const { return m_priority; }
void set_priority(u32 p) { m_priority = p; }
u32 priority() const { return m_priority; }
u32 effective_priority() const { return m_priority + m_extra_priority; }
void set_joinable(bool j) { m_is_joinable = j; }
bool is_joinable() const { return m_is_joinable; }
@ -451,7 +450,8 @@ private:
FPUState* m_fpu_state { nullptr };
State m_state { Invalid };
String m_name;
ThreadPriority m_priority { ThreadPriority::Normal };
u32 m_priority { THREAD_PRIORITY_NORMAL };
u32 m_extra_priority { 0 };
bool m_has_used_fpu { false };
bool m_dump_backtrace_on_finalization { false };
bool m_should_die { false };
@ -501,15 +501,13 @@ const LogStream& operator<<(const LogStream&, const Thread&);
struct SchedulerData {
typedef IntrusiveList<Thread, &Thread::m_runnable_list_node> ThreadList;
static constexpr size_t num_thread_priorities = (size_t)ThreadPriority::Last - (size_t)ThreadPriority::First + 1;
ThreadList m_runnable_threads[num_thread_priorities];
ThreadList m_runnable_threads;
ThreadList m_nonrunnable_threads;
ThreadList& thread_list_for_state_and_priority(Thread::State state, ThreadPriority priority)
ThreadList& thread_list_for_state(Thread::State state)
{
if (Thread::is_runnable_state(state))
return m_runnable_threads[(u8)priority - (u8)ThreadPriority::First];
return m_runnable_threads;
return m_nonrunnable_threads;
}
};
@ -518,13 +516,12 @@ template<typename Callback>
inline IterationDecision Scheduler::for_each_runnable(Callback callback)
{
ASSERT_INTERRUPTS_DISABLED();
for (auto& tl : g_scheduler_data->m_runnable_threads) {
for (auto it = tl.begin(); it != tl.end();) {
auto& thread = *it;
it = ++it;
if (callback(thread) == IterationDecision::Break)
return IterationDecision::Break;
}
auto& tl = g_scheduler_data->m_runnable_threads;
for (auto it = tl.begin(); it != tl.end();) {
auto& thread = *it;
it = ++it;
if (callback(thread) == IterationDecision::Break)
return IterationDecision::Break;
}
return IterationDecision::Continue;

View File

@ -175,7 +175,7 @@ VFS* vfs;
kprintf("init_stage2: error spawning Shell: %d\n", error);
hang();
}
thread->set_priority(ThreadPriority::High);
thread->set_priority(THREAD_PRIORITY_HIGH);
} else {
tty0->set_graphical(true);
Thread* thread = nullptr;
@ -184,7 +184,7 @@ VFS* vfs;
kprintf("init_stage2: error spawning SystemServer: %d\n", error);
hang();
}
thread->set_priority(ThreadPriority::High);
thread->set_priority(THREAD_PRIORITY_HIGH);
}
{
Thread* thread = nullptr;
@ -326,7 +326,7 @@ extern "C" [[noreturn]] void init(u32 physical_address_for_kernel_page_tables)
});
Process::create_kernel_process(g_finalizer, "Finalizer", [] {
current->set_priority(ThreadPriority::Low);
current->set_priority(THREAD_PRIORITY_LOW);
for (;;) {
current->wait_on(*g_finalizer_wait_queue);
Thread::finalize_dying_threads();

View File

@ -44,6 +44,12 @@ int module_unload(const char* name, size_t name_length);
int profiling_enable(pid_t);
int profiling_disable(pid_t);
#define THREAD_PRIORITY_MIN 1
#define THREAD_PRIORITY_LOW 10
#define THREAD_PRIORITY_NORMAL 30
#define THREAD_PRIORITY_HIGH 50
#define THREAD_PRIORITY_MAX 99
#define FUTEX_WAIT 1
#define FUTEX_WAKE 2

View File

@ -54,7 +54,8 @@ HashMap<pid_t, CProcessStatistics> CProcessStatisticsReader::get_all()
thread.name = thread_object.get("name").to_string();
thread.state = thread_object.get("state").to_string();
thread.ticks = thread_object.get("ticks").to_u32();
thread.priority = thread_object.get("priority").to_string();
thread.priority = thread_object.get("priority").to_u32();
thread.effective_priority = thread_object.get("effective_priority").to_u32();
thread.syscall_count = thread_object.get("syscall_count").to_u32();
thread.inode_faults = thread_object.get("inode_faults").to_u32();
thread.zero_faults = thread_object.get("zero_faults").to_u32();

View File

@ -19,7 +19,8 @@ struct CThreadStatistics {
unsigned file_read_bytes;
unsigned file_write_bytes;
String state;
String priority;
u32 priority;
u32 effective_priority;
String name;
};

View File

@ -302,8 +302,7 @@ int pthread_attr_setschedparam(pthread_attr_t* attributes, const struct sched_pa
if (!attributes_impl || !p_sched_param)
return EINVAL;
// NOTE: This must track sched_get_priority_[min,max] and ThreadPriority enum in Thread.h
if (p_sched_param->sched_priority < 0 || p_sched_param->sched_priority > 3)
if (p_sched_param->sched_priority < THREAD_PRIORITY_MIN || p_sched_param->sched_priority > THREAD_PRIORITY_MAX)
return ENOTSUP;
attributes_impl->m_schedule_priority = p_sched_param->sched_priority;

View File

@ -211,14 +211,12 @@ Service::Service(const CConfigFile& config, const StringView& name)
m_stdio_file_path = config.read_entry(name, "StdIO");
String prio = config.read_entry(name, "Priority");
if (prio == "idle")
m_priority = 0;
else if (prio == "low")
m_priority = 1;
if (prio == "low")
m_priority = 10;
else if (prio == "normal" || prio.is_null())
m_priority = 2;
m_priority = 30;
else if (prio == "high")
m_priority = 3;
m_priority = 50;
else
ASSERT_NOT_REACHED();

View File

@ -37,7 +37,7 @@ struct ThreadData {
unsigned cpu_percent { 0 };
unsigned cpu_percent_decimal { 0 };
String priority;
u32 priority;
String username;
String state;
};
@ -146,10 +146,10 @@ int main(int, char**)
});
for (auto* thread : threads) {
printf("%6d %3d %c %-8s %-10s %6zu %6zu %2u.%1u %s\n",
printf("%6d %3d %2u %-8s %-10s %6zu %6zu %2u.%1u %s\n",
thread->pid,
thread->tid,
thread->priority[0],
thread->priority,
thread->username.characters(),
thread->state.characters(),
thread->amount_virtual / 1024,