ladybird/Kernel/Syscall.cpp
Andreas Kling 9eef39d68a Kernel: Start implementing x86 SMAP support
Supervisor Mode Access Prevention (SMAP) is an x86 CPU feature that
prevents the kernel from accessing userspace memory. With SMAP enabled,
trying to read/write a userspace memory address while in the kernel
will now generate a page fault.

Since it's sometimes necessary to read/write userspace memory, there
are two new instructions that quickly switch the protection on/off:
STAC (disables protection) and CLAC (enables protection.)
These are exposed in kernel code via the stac() and clac() helpers.

There's also a SmapDisabler RAII object that can be used to ensure
that you don't forget to re-enable protection before returning to
userspace code.

THis patch also adds copy_to_user(), copy_from_user() and memset_user()
which are the "correct" way of doing things. These functions allow us
to briefly disable protection for a specific purpose, and then turn it
back on immediately after it's done. Going forward all kernel code
should be moved to using these and all uses of SmapDisabler are to be
considered FIXME's.

Note that we're not realizing the full potential of this feature since
I've used SmapDisabler quite liberally in this initial bring-up patch.
2020-01-05 18:14:51 +01:00

139 lines
4.2 KiB
C++

#include <Kernel/Arch/i386/CPU.h>
#include <Kernel/Process.h>
#include <Kernel/ProcessTracer.h>
#include <Kernel/Random.h>
#include <Kernel/Syscall.h>
#include <Kernel/VM/MemoryManager.h>
extern "C" void syscall_handler(RegisterDump);
extern "C" void syscall_asm_entry();
asm(
".globl syscall_asm_entry\n"
"syscall_asm_entry:\n"
" pushl $0x0\n"
" pusha\n"
" pushl %ds\n"
" pushl %es\n"
" pushl %fs\n"
" pushl %gs\n"
" pushl %ss\n"
" mov $0x10, %ax\n"
" mov %ax, %ds\n"
" mov %ax, %es\n"
" cld\n"
" call syscall_handler\n"
" add $0x4, %esp\n"
" popl %gs\n"
" popl %fs\n"
" popl %es\n"
" popl %ds\n"
" popa\n"
" add $0x4, %esp\n"
" iret\n");
namespace Syscall {
static int handle(RegisterDump&, u32 function, u32 arg1, u32 arg2, u32 arg3);
void initialize()
{
register_user_callable_interrupt_handler(0x82, syscall_asm_entry);
kprintf("Syscall: int 0x82 handler installed\n");
}
#pragma GCC diagnostic ignored "-Wcast-function-type"
typedef int (Process::*Handler)(u32, u32, u32);
#define __ENUMERATE_REMOVED_SYSCALL(x) nullptr,
#define __ENUMERATE_SYSCALL(x) reinterpret_cast<Handler>(&Process::sys$##x),
static Handler s_syscall_table[] = {
ENUMERATE_SYSCALLS
};
#undef __ENUMERATE_SYSCALL
#undef __ENUMERATE_REMOVED_SYSCALL
int handle(RegisterDump& regs, u32 function, u32 arg1, u32 arg2, u32 arg3)
{
ASSERT_INTERRUPTS_ENABLED();
auto& process = current->process();
current->did_syscall();
if (function == SC_exit || function == SC_exit_thread) {
// These syscalls need special handling since they never return to the caller.
cli();
if (auto* tracer = process.tracer())
tracer->did_syscall(function, arg1, arg2, arg3, 0);
if (function == SC_exit)
process.sys$exit((int)arg1);
else
process.sys$exit_thread((void*)arg1);
ASSERT_NOT_REACHED();
return 0;
}
if (function == SC_fork)
return process.sys$fork(regs);
if (function == SC_sigreturn)
return process.sys$sigreturn(regs);
if (function >= Function::__Count) {
dbg() << process << ": Unknown syscall %u requested (" << arg1 << ", " << arg2 << ", " << arg3 << ")";
return -ENOSYS;
}
if (s_syscall_table[function] == nullptr) {
dbg() << process << ": Null syscall " << function << " requested: \"" << to_string((Function)function) << "\", you probably need to rebuild this program.";
return -ENOSYS;
}
return (process.*(s_syscall_table[function]))(arg1, arg2, arg3);
}
}
void syscall_handler(RegisterDump regs)
{
// Make sure SMAP protection is enabled on syscall entry.
clac();
// Apply a random offset in the range 0-255 to the stack pointer,
// to make kernel stacks a bit less deterministic.
auto* ptr = (char*)__builtin_alloca(get_fast_random<u8>());
asm volatile(""
: "=m"(*ptr));
auto& process = current->process();
if (!MM.validate_user_stack(process, VirtualAddress(regs.esp_if_crossRing))) {
dbgprintf("Invalid stack pointer: %p\n", regs.esp_if_crossRing);
handle_crash(regs, "Bad stack on syscall entry", SIGSTKFLT);
ASSERT_NOT_REACHED();
}
auto* calling_region = MM.region_from_vaddr(process, VirtualAddress(regs.eip));
if (!calling_region) {
dbgprintf("Syscall from %p which has no region\n", regs.eip);
handle_crash(regs, "Syscall from unknown region", SIGSEGV);
ASSERT_NOT_REACHED();
}
if (calling_region->is_writable()) {
dbgprintf("Syscall from writable memory at %p\n", regs.eip);
handle_crash(regs, "Syscall from writable memory", SIGSEGV);
ASSERT_NOT_REACHED();
}
process.big_lock().lock();
u32 function = regs.eax;
u32 arg1 = regs.edx;
u32 arg2 = regs.ecx;
u32 arg3 = regs.ebx;
regs.eax = (u32)Syscall::handle(regs, function, arg1, arg2, arg3);
if (auto* tracer = process.tracer())
tracer->did_syscall(function, arg1, arg2, arg3, regs.eax);
process.big_lock().unlock();
// Check if we're supposed to return to userspace or just die.
current->die_if_needed();
}