Implement fork()!

This is quite cool! The syscall entry point plumbs the register dump
down to sys$fork(), which uses it to set up the child process's TSS
in order to resume execution right after the int 0x80 fork() call. :^)

This works pretty well, although there is some problem with the kernel
alias mappings used to clone the parent process's regions. If I disable
the MM::release_page_directory() code, there's no problem. Probably there's
a premature freeing of a physical page somehow.
This commit is contained in:
Andreas Kling 2018-11-02 20:41:58 +01:00
parent 10b666f69a
commit 8accc92c3c
Notes: sideshowbarker 2024-07-19 18:34:22 +09:00
16 changed files with 228 additions and 78 deletions

View File

@ -407,3 +407,23 @@ bool MemoryManager::validate_user_write(const Process& process, LinearAddress la
return false;
return true;
}
RetainPtr<Region> Region::clone()
{
InterruptDisabler disabler;
KernelPagingScope pagingScope;
// FIXME: Implement COW regions.
auto clone_zone = MM.createZone(zone->size());
auto clone_region = adopt(*new Region(linearAddress, size, move(clone_zone), String(name)));
// FIXME: It would be cool to make the src_alias a read-only mapping.
byte* src_alias = MM.create_kernel_alias_for_region(*this);
byte* dest_alias = MM.create_kernel_alias_for_region(*clone_region);
memcpy(dest_alias, src_alias, size);
MM.remove_kernel_alias_for_region(*clone_region, dest_alias);
MM.remove_kernel_alias_for_region(*this, src_alias);
return clone_region;
}

View File

@ -40,6 +40,8 @@ private:
struct Region : public Retainable<Region> {
Region(LinearAddress, size_t, RetainPtr<Zone>&&, String&&);
~Region();
RetainPtr<Region> clone();
LinearAddress linearAddress;
size_t size { 0 };
RetainPtr<Zone> zone;

View File

@ -105,30 +105,6 @@ void Process::initialize()
loadTaskRegister(s_kernelProcess->selector());
}
void Process::allocateLDT()
{
ASSERT(!m_tss.ldt);
static const WORD numLDTEntries = 4;
m_ldt_selector = gdt_alloc_entry();
m_ldtEntries = new Descriptor[numLDTEntries];
#if 0
kprintf("new ldt selector = %x\n", m_ldt_selector);
kprintf("new ldt table at = %p\n", m_ldtEntries);
kprintf("new ldt table size = %u\n", (numLDTEntries * 8) - 1);
#endif
Descriptor& ldt = getGDTEntry(m_ldt_selector);
ldt.setBase(m_ldtEntries);
ldt.setLimit(numLDTEntries * 8 - 1);
ldt.dpl = 0;
ldt.segment_present = 1;
ldt.granularity = 0;
ldt.zero = 0;
ldt.operation_size = 1;
ldt.descriptor_type = 0;
ldt.type = Descriptor::LDT;
m_tss.ldt = m_ldt_selector;
}
template<typename Callback>
static void forEachProcess(Callback callback)
{
@ -235,6 +211,77 @@ int Process::sys$gethostname(char* buffer, size_t size)
return 0;
}
Process* Process::fork(RegisterDump& regs)
{
auto* child = new Process(String(m_name), m_uid, m_gid, m_pid, m_ring, m_cwd.copyRef(), m_executable.copyRef(), m_tty, this);
#ifdef FORK_DEBUG
dbgprintf("fork: child=%p\n", child);
#endif
#if 0
// FIXME: An honest fork() would copy these. Needs a Vector copy ctor.
child->m_arguments = m_arguments;
child->m_initialEnvironment = m_initialEnvironment;
#endif
for (auto& region : m_regions) {
#ifdef FORK_DEBUG
dbgprintf("fork: cloning Region{%p}\n", region.ptr());
#endif
auto cloned_region = region->clone();
// FIXME: Move subregions into Region?
for (auto& subregion : m_subregions) {
if (subregion->region.ptr() != region.ptr())
continue;
#ifdef FORK_DEBUG
dbgprintf("fork: cloning Subregion{%p}\n", subregion.ptr());
#endif
auto cloned_subregion = make<Subregion>(*cloned_region, subregion->offset, subregion->size, subregion->linearAddress, String(subregion->name));
child->m_subregions.append(move(cloned_subregion));
MM.mapSubregion(*child, *child->m_subregions.last());
}
child->m_regions.append(move(cloned_region));
MM.mapRegion(*child, *child->m_regions.last());
}
child->m_tss.eax = 0; // fork() returns 0 in the child :^)
child->m_tss.ebx = regs.ebx;
child->m_tss.ecx = regs.ecx;
child->m_tss.edx = regs.edx;
child->m_tss.ebp = regs.ebp;
child->m_tss.esp = regs.esp_if_crossRing;
child->m_tss.esi = regs.esi;
child->m_tss.edi = regs.edi;
child->m_tss.eflags = regs.eflags;
child->m_tss.eip = regs.eip;
child->m_tss.cs = regs.cs;
child->m_tss.ds = regs.ds;
child->m_tss.es = regs.es;
child->m_tss.fs = regs.fs;
child->m_tss.gs = regs.gs;
child->m_tss.ss = regs.ss_if_crossRing;
#ifdef FORK_DEBUG
dbgprintf("fork: child will begin executing at %w:%x with stack %w:%x\n", child->m_tss.cs, child->m_tss.eip, child->m_tss.ss, child->m_tss.esp);
#endif
ProcFileSystem::the().addProcess(*child);
s_processes->prepend(child);
system.nprocess++;
#ifdef TASK_DEBUG
kprintf("Process %u (%s) forked from %u @ %p\n", child->pid(), child->name().characters(), m_pid, child->m_tss.eip);
#endif
return child;
}
pid_t Process::sys$fork(RegisterDump& regs)
{
auto* child = fork(regs);
ASSERT(child);
return child->pid();
}
int Process::sys$spawn(const char* path, const char** args)
{
if (args) {
@ -413,9 +460,9 @@ Process* Process::createKernelProcess(void (*e)(), String&& name)
return process;
}
Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel ring, RetainPtr<VirtualFileSystem::Node>&& cwd, RetainPtr<VirtualFileSystem::Node>&& executable, TTY* tty)
Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel ring, RetainPtr<VirtualFileSystem::Node>&& cwd, RetainPtr<VirtualFileSystem::Node>&& executable, TTY* tty, Process* fork_parent)
: m_name(move(name))
, m_pid(next_pid++)
, m_pid(next_pid++) // FIXME: RACE: This variable looks racy!
, m_uid(uid)
, m_gid(gid)
, m_state(Runnable)
@ -425,57 +472,71 @@ Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel
, m_tty(tty)
, m_parentPID(parentPID)
{
{
if (fork_parent) {
m_sid = fork_parent->m_sid;
m_pgid = fork_parent->m_pgid;
} else {
// FIXME: Use a ProcessHandle? Presumably we're executing *IN* the parent right now though..
InterruptDisabler disabler;
if (auto* parent = Process::fromPID(m_parentPID)) {
m_sid = parent->m_sid;
m_pgid = parent->m_pgid;
}
}
m_page_directory = (PageDirectory*)kmalloc_page_aligned(sizeof(PageDirectory));
MM.populate_page_directory(*this);
m_file_descriptors.resize(m_max_open_file_descriptors);
if (tty) {
m_file_descriptors[0] = tty->open(O_RDONLY);
m_file_descriptors[1] = tty->open(O_WRONLY);
m_file_descriptors[2] = tty->open(O_WRONLY);
}
m_nextRegion = LinearAddress(0x10000000);
memset(&m_tss, 0, sizeof(m_tss));
if (isRing3()) {
memset(&m_ldtEntries, 0, sizeof(m_ldtEntries));
allocateLDT();
}
// Only IF is set when a process boots.
m_tss.eflags = 0x0202;
word cs, ds, ss;
if (isRing0()) {
cs = 0x08;
ds = 0x10;
ss = 0x10;
if (fork_parent) {
m_file_descriptors.resize(fork_parent->m_file_descriptors.size());
for (size_t i = 0; i < fork_parent->m_file_descriptors.size(); ++i) {
if (!fork_parent->m_file_descriptors[i])
continue;
#ifdef FORK_DEBUG
dbgprintf("fork: cloning fd %u... (%p) istty? %um\n", i, fork_parent->m_file_descriptors[i].ptr(), fork_parent->m_file_descriptors[i]->isTTY());
#endif
m_file_descriptors[i] = fork_parent->m_file_descriptors[i]->clone();
}
} else {
cs = 0x1b;
ds = 0x23;
ss = 0x23;
m_file_descriptors.resize(m_max_open_file_descriptors);
if (tty) {
m_file_descriptors[0] = tty->open(O_RDONLY);
m_file_descriptors[1] = tty->open(O_WRONLY);
m_file_descriptors[2] = tty->open(O_WRONLY);
}
}
m_tss.ds = ds;
m_tss.es = ds;
m_tss.fs = ds;
m_tss.gs = ds;
m_tss.ss = ss;
m_tss.cs = cs;
if (fork_parent)
m_nextRegion = fork_parent->m_nextRegion;
else
m_nextRegion = LinearAddress(0x10000000);
if (fork_parent) {
memcpy(&m_tss, &fork_parent->m_tss, sizeof(m_tss));
} else {
memset(&m_tss, 0, sizeof(m_tss));
// Only IF is set when a process boots.
m_tss.eflags = 0x0202;
word cs, ds, ss;
if (isRing0()) {
cs = 0x08;
ds = 0x10;
ss = 0x10;
} else {
cs = 0x1b;
ds = 0x23;
ss = 0x23;
}
m_tss.ds = ds;
m_tss.es = ds;
m_tss.fs = ds;
m_tss.gs = ds;
m_tss.ss = ss;
m_tss.cs = cs;
}
m_tss.cr3 = (dword)m_page_directory;
@ -486,10 +547,14 @@ Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel
m_stackTop0 = (stackBottom + defaultStackSize) & 0xffffff8;
m_tss.esp = m_stackTop0;
} else {
auto* region = allocateRegion(defaultStackSize, "stack");
ASSERT(region);
m_stackTop3 = region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
m_tss.esp = m_stackTop3;
if (fork_parent) {
m_stackTop3 = fork_parent->m_stackTop3;
} else {
auto* region = allocateRegion(defaultStackSize, "stack");
ASSERT(region);
m_stackTop3 = region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
m_tss.esp = m_stackTop3;
}
}
if (isRing3()) {
@ -511,12 +576,6 @@ Process::~Process()
ProcFileSystem::the().removeProcess(*this);
system.nprocess--;
if (isRing3()) {
delete [] m_ldtEntries;
m_ldtEntries = nullptr;
gdt_free_entry(m_ldt_selector);
}
gdt_free_entry(selector());
if (m_kernelStack) {

View File

@ -119,6 +119,7 @@ public:
int sys$uname(utsname*);
int sys$readlink(const char*, char*, size_t);
int sys$ttyname_r(int fd, char*, size_t);
pid_t sys$fork(RegisterDump&);
static void initialize();
@ -155,11 +156,13 @@ public:
void send_signal(int signal, Process* sender);
Process* fork(RegisterDump&);
private:
friend class MemoryManager;
friend bool scheduleNewProcess();
Process(String&& name, uid_t, gid_t, pid_t parentPID, RingLevel, RetainPtr<VirtualFileSystem::Node>&& cwd = nullptr, RetainPtr<VirtualFileSystem::Node>&& executable = nullptr, TTY* = nullptr);
Process(String&& name, uid_t, gid_t, pid_t parentPID, RingLevel, RetainPtr<VirtualFileSystem::Node>&& cwd = nullptr, RetainPtr<VirtualFileSystem::Node>&& executable = nullptr, TTY* = nullptr, Process* fork_parent = nullptr);
void allocateLDT();
@ -183,8 +186,6 @@ private:
State m_state { Invalid };
DWORD m_wakeupTime { 0 };
TSS32 m_tss;
word m_ldt_selector { 0 };
Descriptor* m_ldtEntries { nullptr };
Vector<OwnPtr<FileHandle>> m_file_descriptors;
RingLevel m_ring { Ring0 };
int m_error { 0 };

View File

@ -43,7 +43,7 @@ void initialize()
kprintf("syscall: int 0x80 handler installed\n");
}
DWORD handle(DWORD function, DWORD arg1, DWORD arg2, DWORD arg3)
static DWORD handle(RegisterDump& regs, DWORD function, DWORD arg1, DWORD arg2, DWORD arg3)
{
ASSERT_INTERRUPTS_ENABLED();
switch (function) {
@ -128,6 +128,8 @@ DWORD handle(DWORD function, DWORD arg1, DWORD arg2, DWORD arg3)
return current->sys$tcgetpgrp((int)arg1);
case Syscall::PosixTcsetpgrp:
return current->sys$tcsetpgrp((int)arg1, (pid_t)arg2);
case Syscall::PosixFork:
return current->sys$fork(regs);
default:
kprintf("<%u> int0x80: Unknown function %x requested {%x, %x, %x}\n", current->pid(), function, arg1, arg2, arg3);
break;
@ -143,5 +145,6 @@ void syscall_entry(RegisterDump& regs)
DWORD arg1 = regs.edx;
DWORD arg2 = regs.ecx;
DWORD arg3 = regs.ebx;
regs.eax = Syscall::handle(function, arg1, arg2, arg3);
regs.eax = Syscall::handle(regs, function, arg1, arg2, arg3);
}

View File

@ -47,6 +47,7 @@ enum Function {
PosixGetpgrp = 0x2015,
PosixTcsetpgrp = 0x2016,
PosixTcgetpgrp = 0x2017,
PosixFork = 0x2018,
};
void initialize();

View File

@ -26,6 +26,8 @@ typedef struct
#define ETERNAL_BASE_PHYSICAL 0x200000
#define BASE_PHYS 0x100000
#define RANGE_SIZE 0x100000
PRIVATE BYTE alloc_map[POOL_SIZE / CHUNK_SIZE / 8];
volatile DWORD sum_alloc = 0;
@ -36,6 +38,9 @@ volatile size_t kmalloc_sum_page_aligned = 0;
static byte* s_next_eternal_ptr;
static byte* s_next_page_aligned_ptr;
static byte* s_end_of_eternal_range;
static byte* s_end_of_page_aligned_range;
bool is_kmalloc_address(void* ptr)
{
if (ptr >= (byte*)ETERNAL_BASE_PHYSICAL && ptr < s_next_eternal_ptr)
@ -58,12 +63,16 @@ kmalloc_init()
s_next_eternal_ptr = (byte*)ETERNAL_BASE_PHYSICAL;
s_next_page_aligned_ptr = (byte*)PAGE_ALIGNED_BASE_PHYSICAL;
s_end_of_eternal_range = s_next_eternal_ptr + RANGE_SIZE;
s_end_of_page_aligned_range = s_next_page_aligned_ptr + RANGE_SIZE;
}
void* kmalloc_eternal(size_t size)
{
void* ptr = s_next_eternal_ptr;
s_next_eternal_ptr += size;
ASSERT(s_next_eternal_ptr < s_end_of_eternal_range);
kmalloc_sum_eternal += size;
return ptr;
}
@ -73,6 +82,7 @@ void* kmalloc_page_aligned(size_t size)
ASSERT((size % 4096) == 0);
void* ptr = s_next_page_aligned_ptr;
s_next_page_aligned_ptr += size;
ASSERT(s_next_page_aligned_ptr < s_end_of_page_aligned_range);
kmalloc_sum_page_aligned += size;
return ptr;
}

View File

@ -18,6 +18,7 @@ cp ../Userland/cat mnt/bin/cat
cp ../Userland/uname mnt/bin/uname
cp ../Userland/clear mnt/bin/clear
cp ../Userland/tst mnt/bin/tst
cp ../Userland/ft mnt/bin/ft
cp ../Userland/mm mnt/bin/mm
cp ../Userland/kill mnt/bin/kill
cp ../Userland/tty mnt/bin/tty

View File

@ -5,6 +5,11 @@
extern "C" {
pid_t fork()
{
return Syscall::invoke(Syscall::PosixFork);
}
uid_t getuid()
{
return Syscall::invoke(Syscall::PosixGetuid);

View File

@ -8,6 +8,7 @@ __BEGIN_DECLS
extern char** environ;
inline int getpagesize() { return 4096; }
pid_t fork();
pid_t getsid(pid_t);
pid_t setsid();
int setpgid(pid_t pid, pid_t pgid);

1
Userland/.gitignore vendored
View File

@ -16,3 +16,4 @@ tst
mm
kill
tty
ft

View File

@ -14,6 +14,7 @@ OBJS = \
tst.o \
mm.o \
kill.o \
ft.o \
tty.o
APPS = \
@ -32,6 +33,7 @@ APPS = \
tst \
mm \
kill \
ft \
tty
ARCH_FLAGS =
@ -91,6 +93,9 @@ clear: clear.o
tst: tst.o
$(LD) -o $@ $(LDFLAGS) $< ../LibC/LibC.a
ft: ft.o
$(LD) -o $@ $(LDFLAGS) $< ../LibC/LibC.a
mm: mm.o
$(LD) -o $@ $(LDFLAGS) $< ../LibC/LibC.a

14
Userland/ft.cpp Normal file
View File

@ -0,0 +1,14 @@
#include <stdio.h>
#include <unistd.h>
int main(int argc, char** argv)
{
printf("Testing fork()...\n");
pid_t pid = fork();
if (!pid) {
printf("child, pid=%d\n", getpid());
} else {
printf("parent, child pid=%d\n", pid);
}
return 0;
}

View File

@ -32,6 +32,13 @@ static int sh_pwd(int, const char**)
return 0;
}
static int sh_fork(int, const char**)
{
pid_t pid = fork();
printf("getpid()=%d, fork()=%d\n", getpid(), pid);
return 0;
}
static int sh_exit(int, const char**)
{
printf("Good-bye!\n");
@ -94,6 +101,11 @@ static bool handle_builtin(int argc, const char** argv, int& retval)
retval = sh_exit(argc, argv);
return true;
}
if (!strcmp(argv[0], "fork")) {
retval = sh_fork(argc, argv);
return true;
}
return false;
}

View File

@ -15,6 +15,19 @@ FileHandle::~FileHandle()
{
}
OwnPtr<FileHandle> FileHandle::clone()
{
auto handle = make<FileHandle>(m_vnode.copyRef());
if (!handle)
return nullptr;
handle->m_currentOffset = m_currentOffset;
#ifdef SERENITY
handle->m_fd = m_fd;
handle->m_isBlocking = m_isBlocking;
#endif
return handle;
}
#ifndef SERENITY
bool additionWouldOverflow(Unix::off_t a, Unix::off_t b)
{

View File

@ -11,6 +11,8 @@ public:
explicit FileHandle(RetainPtr<VirtualFileSystem::Node>&&);
~FileHandle();
OwnPtr<FileHandle> clone();
int close();
Unix::off_t seek(Unix::off_t, int whence);