Kernel: Start implementing purgeable memory support

It's now possible to get purgeable memory by using mmap(MAP_PURGEABLE).
Purgeable memory has a "volatile" flag that can be set using madvise():

- madvise(..., MADV_SET_VOLATILE)
- madvise(..., MADV_SET_NONVOLATILE)

When in the "volatile" state, the kernel may take away the underlying
physical memory pages at any time, without notifying the owner.
This gives you a guilt discount when caching very large things. :^)

Setting a purgeable region to non-volatile will return whether or not
the memory has been taken away by the kernel while being volatile.
Basically, if madvise(..., MADV_SET_NONVOLATILE) returns 1, that means
the memory was purged while volatile, and whatever was in that piece
of memory needs to be reconstructed before use.
This commit is contained in:
Andreas Kling 2019-12-09 19:12:38 +01:00
parent 7248c34e35
commit dbb644f20c
Notes: sideshowbarker 2024-07-19 10:54:40 +09:00
13 changed files with 196 additions and 9 deletions

View File

@ -24,6 +24,7 @@
#include <Kernel/Net/UDPSocket.h>
#include <Kernel/PCI.h>
#include <Kernel/VM/MemoryManager.h>
#include <Kernel/VM/PurgeableVMObject.h>
#include <LibC/errno_numbers.h>
enum ProcParentDirectory {
@ -262,6 +263,11 @@ Optional<KBuffer> procfs$pid_vm(InodeIdentifier identifier)
region_object.add("writable", region.is_writable());
region_object.add("stack", region.is_stack());
region_object.add("shared", region.is_shared());
region_object.add("purgeable", region.vmobject().is_purgeable());
if (region.vmobject().is_purgeable()) {
region_object.add("volatile", static_cast<const PurgeableVMObject&>(region.vmobject()).is_volatile());
}
region_object.add("purgeable", region.vmobject().is_purgeable());
region_object.add("address", region.vaddr().get());
region_object.add("size", (u32)region.size());
region_object.add("amount_resident", (u32)region.amount_resident());
@ -716,6 +722,8 @@ Optional<KBuffer> procfs$all(InodeIdentifier)
process_object.add("amount_virtual", (u32)process.amount_virtual());
process_object.add("amount_resident", (u32)process.amount_resident());
process_object.add("amount_shared", (u32)process.amount_shared());
process_object.add("amount_purgeable_volatile", (u32)process.amount_purgeable_volatile());
process_object.add("amount_purgeable_nonvolatile", (u32)process.amount_purgeable_nonvolatile());
process_object.add("icon_id", process.icon_id());
auto thread_array = process_object.add_array("threads");
process.for_each_thread([&](const Thread& thread) {

View File

@ -93,6 +93,7 @@ CXX_OBJS = \
VM/PageDirectory.o \
VM/PhysicalPage.o \
VM/PhysicalRegion.o \
VM/PurgeableVMObject.o \
VM/RangeAllocator.o \
VM/Region.o \
VM/VMObject.o \

View File

@ -36,6 +36,7 @@
#include <Kernel/TTY/MasterPTY.h>
#include <Kernel/Thread.h>
#include <Kernel/VM/InodeVMObject.h>
#include <Kernel/VM/PurgeableVMObject.h>
#include <LibC/errno_numbers.h>
#include <LibC/signal_numbers.h>
#include <LibELF/ELFLoader.h>
@ -224,6 +225,18 @@ void* Process::sys$mmap(const Syscall::SC_mmap_params* params)
return (void*)-EINVAL;
// FIXME: The rest of this function seems like it could share more code..
if (flags & MAP_PURGEABLE) {
auto vmobject = PurgeableVMObject::create_with_size(size);
auto* region = allocate_region_with_vmo(VirtualAddress((u32)addr), size, vmobject, 0, name ? name : "mmap (purgeable)", prot);
if (!region)
return (void*)-ENOMEM;
if (flags & MAP_SHARED)
region->set_shared(true);
region->set_mmap(true);
return region->vaddr().as_ptr();
}
if (flags & MAP_ANONYMOUS) {
auto* region = allocate_region(VirtualAddress((u32)addr), size, name ? name : "mmap", prot, false);
if (!region)
@ -312,6 +325,52 @@ int Process::sys$mprotect(void* addr, size_t size, int prot)
return 0;
}
int Process::sys$madvise(void* address, size_t size, int advice)
{
auto* region = region_from_range({ VirtualAddress((u32)address), size });
if (!region)
return -EINVAL;
if (!region->is_mmap())
return -EPERM;
if ((advice & MADV_SET_VOLATILE) && (advice & MADV_SET_NONVOLATILE))
return -EINVAL;
if (advice & MADV_SET_VOLATILE) {
if (!region->vmobject().is_purgeable())
return -EPERM;
auto& vmobject = static_cast<PurgeableVMObject&>(region->vmobject());
vmobject.set_volatile(true);
return 0;
}
if (advice & MADV_SET_NONVOLATILE) {
if (!region->vmobject().is_purgeable())
return -EPERM;
auto& vmobject = static_cast<PurgeableVMObject&>(region->vmobject());
vmobject.set_volatile(false);
bool was_purged = vmobject.was_purged();
vmobject.set_was_purged(false);
return was_purged ? 1 : 0;
}
return -EINVAL;
}
int Process::sys$purge()
{
NonnullRefPtrVector<PurgeableVMObject> vmobjects;
{
InterruptDisabler disabler;
MM.for_each_vmobject([&](auto& vmobject) {
if (vmobject.is_purgeable())
vmobjects.append(static_cast<PurgeableVMObject&>(vmobject));
return IterationDecision::Continue;
});
}
int purged_page_count = 0;
for (auto& vmobject : vmobjects) {
purged_page_count += vmobject.purge();
}
return purged_page_count;
}
int Process::sys$gethostname(char* buffer, ssize_t size)
{
if (size < 0)
@ -842,7 +901,7 @@ void Process::dump_regions()
kprintf("Process %s(%u) regions:\n", name().characters(), pid());
kprintf("BEGIN END SIZE ACCESS NAME\n");
for (auto& region : m_regions) {
kprintf("%08x -- %08x %08x %c%c%c%c%c %s\n",
kprintf("%08x -- %08x %08x %c%c%c%c%c%c %s\n",
region.vaddr().get(),
region.vaddr().offset(region.size() - 1).get(),
region.size(),
@ -851,6 +910,7 @@ void Process::dump_regions()
region.is_executable() ? 'X' : ' ',
region.is_shared() ? 'S' : ' ',
region.is_stack() ? 'T' : ' ',
region.vmobject().is_purgeable() ? 'P' : ' ',
region.name().characters());
}
}
@ -2410,6 +2470,26 @@ size_t Process::amount_shared() const
return amount;
}
size_t Process::amount_purgeable_volatile() const
{
size_t amount = 0;
for (auto& region : m_regions) {
if (region.vmobject().is_purgeable() && static_cast<const PurgeableVMObject&>(region.vmobject()).is_volatile())
amount += region.amount_resident();
}
return amount;
}
size_t Process::amount_purgeable_nonvolatile() const
{
size_t amount = 0;
for (auto& region : m_regions) {
if (region.vmobject().is_purgeable() && !static_cast<const PurgeableVMObject&>(region.vmobject()).is_volatile())
amount += region.amount_resident();
}
return amount;
}
int Process::sys$socket(int domain, int type, int protocol)
{
int fd = alloc_fd();

View File

@ -139,6 +139,8 @@ public:
int sys$munmap(void*, size_t size);
int sys$set_mmap_name(void*, size_t, const char*);
int sys$mprotect(void*, size_t, int prot);
int sys$madvise(void*, size_t, int advice);
int sys$purge();
int sys$select(const Syscall::SC_select_params*);
int sys$poll(pollfd*, int nfds, int timeout);
ssize_t sys$get_dir_entries(int fd, void*, ssize_t);
@ -266,6 +268,8 @@ public:
size_t amount_virtual() const;
size_t amount_resident() const;
size_t amount_shared() const;
size_t amount_purgeable_volatile() const;
size_t amount_purgeable_nonvolatile() const;
Process* fork(RegisterDump&);
int exec(String path, Vector<String> arguments, Vector<String> environment);

View File

@ -144,7 +144,9 @@ typedef u32 socklen_t;
__ENUMERATE_SYSCALL(module_unload) \
__ENUMERATE_SYSCALL(detach_thread) \
__ENUMERATE_SYSCALL(set_thread_name) \
__ENUMERATE_SYSCALL(get_thread_name)
__ENUMERATE_SYSCALL(get_thread_name) \
__ENUMERATE_SYSCALL(madvise) \
__ENUMERATE_SYSCALL(purge)
namespace Syscall {

View File

@ -27,12 +27,16 @@
#define MAP_ANONYMOUS 0x20
#define MAP_ANON MAP_ANONYMOUS
#define MAP_STACK 0x40
#define MAP_PURGEABLE 0x80
#define PROT_READ 0x1
#define PROT_WRITE 0x2
#define PROT_EXEC 0x4
#define PROT_NONE 0x0
#define MADV_SET_VOLATILE 0x100
#define MADV_SET_NONVOLATILE 0x200
#define F_DUPFD 0
#define F_GETFD 1
#define F_SETFD 2

View File

@ -3,7 +3,7 @@
#include <Kernel/VM/PhysicalAddress.h>
#include <Kernel/VM/VMObject.h>
class AnonymousVMObject final : public VMObject {
class AnonymousVMObject : public VMObject {
public:
virtual ~AnonymousVMObject() override;
@ -11,9 +11,11 @@ public:
static NonnullRefPtr<AnonymousVMObject> create_for_physical_range(PhysicalAddress, size_t);
virtual NonnullRefPtr<VMObject> clone() override;
private:
protected:
explicit AnonymousVMObject(size_t);
explicit AnonymousVMObject(const AnonymousVMObject&);
private:
AnonymousVMObject(PhysicalAddress, size_t);
AnonymousVMObject& operator=(const AnonymousVMObject&) = delete;

View File

@ -0,0 +1,41 @@
#include <Kernel/VM/PurgeableVMObject.h>
#include <Kernel/VM/PhysicalPage.h>
NonnullRefPtr<PurgeableVMObject> PurgeableVMObject::create_with_size(size_t size)
{
return adopt(*new PurgeableVMObject(size));
}
PurgeableVMObject::PurgeableVMObject(size_t size)
: AnonymousVMObject(size)
{
}
PurgeableVMObject::PurgeableVMObject(const PurgeableVMObject& other)
: AnonymousVMObject(other)
{
}
PurgeableVMObject::~PurgeableVMObject()
{
}
NonnullRefPtr<VMObject> PurgeableVMObject::clone()
{
return adopt(*new PurgeableVMObject(*this));
}
int PurgeableVMObject::purge()
{
LOCKER(m_paging_lock);
if (!m_volatile)
return 0;
int purged_page_count = 0;
for (size_t i = 0; i < m_physical_pages.size(); ++i) {
if (m_physical_pages[i])
++purged_page_count;
m_physical_pages[i] = nullptr;
}
m_was_purged = true;
return purged_page_count;
}

View File

@ -0,0 +1,32 @@
#pragma once
#include <Kernel/VM/AnonymousVMObject.h>
class PurgeableVMObject final : public AnonymousVMObject {
public:
virtual ~PurgeableVMObject() override;
static NonnullRefPtr<PurgeableVMObject> create_with_size(size_t);
virtual NonnullRefPtr<VMObject> clone() override;
int purge();
bool was_purged() const { return m_was_purged; }
void set_was_purged(bool b) { m_was_purged = b; }
bool is_volatile() const { return m_volatile; }
void set_volatile(bool b) { m_volatile = b; }
private:
explicit PurgeableVMObject(size_t);
explicit PurgeableVMObject(const PurgeableVMObject&);
PurgeableVMObject& operator=(const PurgeableVMObject&) = delete;
PurgeableVMObject& operator=(PurgeableVMObject&&) = delete;
PurgeableVMObject(PurgeableVMObject&&) = delete;
virtual bool is_purgeable() const override { return true; }
bool m_was_purged { false };
bool m_volatile { false };
};

View File

@ -299,10 +299,11 @@ PageFaultResponse Region::handle_zero_fault(size_t page_index_in_region)
ASSERT_INTERRUPTS_DISABLED();
ASSERT(vmobject().is_anonymous());
auto& vmobject_physical_page_entry = vmobject().physical_pages()[first_page_index() + page_index_in_region];
sti();
LOCKER(vmobject().m_paging_lock);
cli();
// NOTE: We don't need to acquire the VMObject's lock.
// This function is already exclusive due to interrupts being blocked.
auto& vmobject_physical_page_entry = vmobject().physical_pages()[first_page_index() + page_index_in_region];
if (!vmobject_physical_page_entry.is_null()) {
#ifdef PAGE_FAULT_DEBUG

View File

@ -22,6 +22,7 @@ public:
virtual NonnullRefPtr<VMObject> clone() = 0;
virtual bool is_anonymous() const { return false; }
virtual bool is_purgeable() const { return false; }
virtual bool is_inode() const { return false; }
size_t page_count() const { return m_physical_pages.size(); }
@ -42,11 +43,10 @@ protected:
void for_each_region(Callback);
FixedArray<RefPtr<PhysicalPage>> m_physical_pages;
Lock m_paging_lock { "VMObject" };
private:
VMObject& operator=(const VMObject&) = delete;
VMObject& operator=(VMObject&&) = delete;
VMObject(VMObject&&) = delete;
Lock m_paging_lock { "VMObject" };
};

View File

@ -56,4 +56,11 @@ int shm_unlink(const char* name)
int rc = syscall(SC_shm_unlink, name);
__RETURN_WITH_ERRNO(rc, rc, -1);
}
int madvise(void* address, size_t size, int advice)
{
int rc = syscall(SC_madvise, address, size, advice);
__RETURN_WITH_ERRNO(rc, rc, -1);
}
}

View File

@ -9,6 +9,7 @@
#define MAP_ANONYMOUS 0x20
#define MAP_ANON MAP_ANONYMOUS
#define MAP_STACK 0x40
#define MAP_PURGEABLE 0x80
#define PROT_READ 0x1
#define PROT_WRITE 0x2
@ -17,6 +18,9 @@
#define MAP_FAILED ((void*)-1)
#define MADV_SET_VOLATILE 0x100
#define MADV_SET_NONVOLATILE 0x200
__BEGIN_DECLS
void* mmap(void* addr, size_t, int prot, int flags, int fd, off_t);
@ -26,5 +30,6 @@ int mprotect(void*, size_t, int prot);
int set_mmap_name(void*, size_t, const char*);
int shm_open(const char* name, int flags, mode_t);
int shm_unlink(const char* name);
int madvise(void*, size_t, int advice);
__END_DECLS