From f7a4c34929361d3df905d49e7ae20d2eb5f90ff3 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Sat, 25 Dec 2021 17:23:18 +0100 Subject: [PATCH] Kernel: Make kmalloc heap expansion kmalloc-free Previously, the heap expansion logic could end up calling kmalloc recursively, which was quite messy and hard to reason about. This patch redesigns heap expansion so that it's kmalloc-free: - We make a single large virtual range allocation at startup - When expanding, we bump allocate VM from that region - When expanding, we populate page tables directly ourselves, instead of going via MemoryManager. This makes heap expansion a great deal simpler. However, do note that it introduces two new flaws that we'll need to deal with eventually: - The single virtual range allocation is limited to 64 MiB and once exhausted, kmalloc() will fail. (Actually, it will PANIC for now..) - The kmalloc heap can no longer shrink once expanded. Subheaps stay in place once constructed. --- Kernel/Heap/Heap.h | 210 ------------------------ Kernel/Heap/kmalloc.cpp | 297 ++++++++++++++++------------------ Kernel/Memory/MemoryManager.h | 3 + 3 files changed, 140 insertions(+), 370 deletions(-) diff --git a/Kernel/Heap/Heap.h b/Kernel/Heap/Heap.h index 725d7d5d64f..5370560205a 100644 --- a/Kernel/Heap/Heap.h +++ b/Kernel/Heap/Heap.h @@ -144,214 +144,4 @@ private: Bitmap m_bitmap; }; -template -struct ExpandableHeapTraits { - static bool add_memory(ExpandHeap& expand, size_t allocation_request) - { - return expand.add_memory(allocation_request); - } - - static bool remove_memory(ExpandHeap& expand, void* memory) - { - return expand.remove_memory(memory); - } -}; - -struct DefaultExpandHeap { - bool add_memory(size_t) - { - // Requires explicit implementation - return false; - } - - bool remove_memory(void*) - { - return false; - } -}; - -template -class ExpandableHeap { - AK_MAKE_NONCOPYABLE(ExpandableHeap); - AK_MAKE_NONMOVABLE(ExpandableHeap); - -public: - using ExpandHeapType = ExpandHeap; - using HeapType = Heap; - - struct SubHeap { - HeapType heap; - SubHeap* next { nullptr }; - size_t memory_size { 0 }; - - template - SubHeap(size_t memory_size, Args&&... args) - : heap(forward(args)...) - , memory_size(memory_size) - { - } - }; - - ExpandableHeap(u8* memory, size_t memory_size, const ExpandHeapType& expand = ExpandHeapType()) - : m_heaps(memory_size, memory, memory_size) - , m_expand(expand) - { - } - ~ExpandableHeap() - { - // We don't own the main heap, only remove memory that we added previously - SubHeap* next; - for (auto* heap = m_heaps.next; heap; heap = next) { - next = heap->next; - - heap->~SubHeap(); - ExpandableHeapTraits::remove_memory(m_expand, (void*)heap); - } - } - - static size_t calculate_memory_for_bytes(size_t bytes) - { - return sizeof(SubHeap) + HeapType::calculate_memory_for_bytes(bytes); - } - - bool expand_memory(size_t size) - { - if (m_expanding) - return false; - - // Allocating more memory itself may trigger allocations and deallocations - // on this heap. We need to prevent recursive expansion. We also disable - // removing memory while trying to expand the heap. - TemporaryChange change(m_expanding, true); - return ExpandableHeapTraits::add_memory(m_expand, size); - } - - void* allocate(size_t size) - { - int attempt = 0; - do { - for (auto* subheap = &m_heaps; subheap; subheap = subheap->next) { - if (void* ptr = subheap->heap.allocate(size)) - return ptr; - } - - // We need to loop because we won't know how much memory was added. - // Even though we make a best guess how much memory needs to be added, - // it doesn't guarantee that enough will be available after adding it. - // This is especially true for the kmalloc heap, where adding memory - // requires several other objects to be allocated just to be able to - // expand the heap. - - // To avoid an infinite expansion loop, limit to two attempts - if (attempt++ >= 2) - break; - } while (expand_memory(size)); - return nullptr; - } - - void deallocate(void* ptr) - { - if (!ptr) - return; - for (auto* subheap = &m_heaps; subheap; subheap = subheap->next) { - if (subheap->heap.contains(ptr)) { - subheap->heap.deallocate(ptr); - if (subheap->heap.allocated_chunks() == 0 && subheap != &m_heaps && !m_expanding) { - // remove_memory expects the memory to be unused and - // may deallocate the memory. We need to therefore first - // unlink the subheap and destroy it. If remove_memory - // ends up not not removing the memory, we'll initialize - // a new subheap and re-add it. - // We need to remove the subheap before calling remove_memory - // because it's possible that remove_memory itself could - // cause a memory allocation that we don't want to end up - // potentially being made in the subheap we're about to remove. - { - auto* subheap2 = m_heaps.next; - auto** subheap_link = &m_heaps.next; - while (subheap2 != subheap) { - subheap_link = &subheap2->next; - subheap2 = subheap2->next; - } - *subheap_link = subheap->next; - } - - auto memory_size = subheap->memory_size; - subheap->~SubHeap(); - - if (!ExpandableHeapTraits::remove_memory(m_expand, subheap)) { - // Removal of the subheap was rejected, add it back in and - // re-initialize with a clean subheap. - add_subheap(subheap, memory_size); - } - } - return; - } - } - VERIFY_NOT_REACHED(); - } - - HeapType& add_subheap(void* memory, size_t memory_size) - { - VERIFY(memory_size > sizeof(SubHeap)); - - // Place the SubHeap structure at the beginning of the new memory block - memory_size -= sizeof(SubHeap); - SubHeap* new_heap = (SubHeap*)memory; - new (new_heap) SubHeap(memory_size, (u8*)(new_heap + 1), memory_size); - - // Add the subheap to the list (but leave the main heap where it is) - SubHeap* next_heap = m_heaps.next; - SubHeap** next_heap_link = &m_heaps.next; - while (next_heap) { - if (new_heap->heap.memory() < next_heap->heap.memory()) - break; - next_heap_link = &next_heap->next; - next_heap = next_heap->next; - } - new_heap->next = *next_heap_link; - *next_heap_link = new_heap; - return new_heap->heap; - } - - bool contains(const void* ptr) const - { - for (auto* subheap = &m_heaps; subheap; subheap = subheap->next) { - if (subheap->heap.contains(ptr)) - return true; - } - return false; - } - - size_t total_chunks() const - { - size_t total = 0; - for (auto* subheap = &m_heaps; subheap; subheap = subheap->next) - total += subheap->heap.total_chunks(); - return total; - } - size_t total_bytes() const { return total_chunks() * CHUNK_SIZE; } - size_t free_chunks() const - { - size_t total = 0; - for (auto* subheap = &m_heaps; subheap; subheap = subheap->next) - total += subheap->heap.free_chunks(); - return total; - } - size_t free_bytes() const { return free_chunks() * CHUNK_SIZE; } - size_t allocated_chunks() const - { - size_t total = 0; - for (auto* subheap = &m_heaps; subheap; subheap = subheap->next) - total += subheap->heap.allocated_chunks(); - return total; - } - size_t allocated_bytes() const { return allocated_chunks() * CHUNK_SIZE; } - -private: - SubHeap m_heaps; - ExpandHeap m_expand; - bool m_expanding { false }; -}; - } diff --git a/Kernel/Heap/kmalloc.cpp b/Kernel/Heap/kmalloc.cpp index 683aa9f1587..4c4e8946d8e 100644 --- a/Kernel/Heap/kmalloc.cpp +++ b/Kernel/Heap/kmalloc.cpp @@ -10,7 +10,6 @@ */ #include -#include #include #include #include @@ -38,159 +37,137 @@ const nothrow_t nothrow; static RecursiveSpinlock s_lock; // needs to be recursive because of dump_backtrace() -static void kmalloc_allocate_backup_memory(); - -struct KmallocGlobalHeap { - struct ExpandGlobalHeap { - KmallocGlobalHeap& m_global_heap; - - ExpandGlobalHeap(KmallocGlobalHeap& global_heap) - : m_global_heap(global_heap) - { - } - - bool m_adding { false }; - bool add_memory(size_t allocation_request) - { - if (!Memory::MemoryManager::is_initialized()) { - if constexpr (KMALLOC_DEBUG) { - dmesgln("kmalloc: Cannot expand heap before MM is initialized!"); - } - return false; - } - VERIFY(!m_adding); - TemporaryChange change(m_adding, true); - // At this point we have very little memory left. Any attempt to - // kmalloc() could fail, so use our backup memory first, so we - // can't really reliably allocate even a new region of memory. - // This is why we keep a backup region, which we can - auto region = move(m_global_heap.m_backup_memory); - if (!region) { - // Be careful to not log too much here. We don't want to trigger - // any further calls to kmalloc(). We're already out of memory - // and don't have any backup memory, either! - if constexpr (KMALLOC_DEBUG) { - dmesgln("kmalloc: Cannot expand heap: no backup memory"); - } - return false; - } - - // At this point we should have at least enough memory from the - // backup region to be able to log properly - if constexpr (KMALLOC_DEBUG) { - dmesgln("kmalloc: Adding memory to heap at {}, bytes: {}", region->vaddr(), region->size()); - } - - auto& subheap = m_global_heap.m_heap.add_subheap(region->vaddr().as_ptr(), region->size()); - m_global_heap.m_subheap_memory.append(region.release_nonnull()); - - // Since we pulled in our backup heap, make sure we allocate another - // backup heap before returning. Otherwise we potentially lose - // the ability to expand the heap next time we get called. - ScopeGuard guard([&]() { - // We may need to defer allocating backup memory because the - // heap expansion may have been triggered while holding some - // other spinlock. If the expansion happens to need the same - // spinlock we would deadlock. So, if we're in any lock, defer - Processor::deferred_call_queue(kmalloc_allocate_backup_memory); - }); - - // Now that we added our backup memory, check if the backup heap - // was big enough to likely satisfy the request - if (subheap.free_bytes() < allocation_request) { - // Looks like we probably need more - size_t memory_size = Memory::page_round_up(decltype(m_global_heap.m_heap)::calculate_memory_for_bytes(allocation_request)); - // Add some more to the new heap. We're already using it for other - // allocations not including the original allocation_request - // that triggered heap expansion. If we don't allocate - memory_size += 1 * MiB; - - auto new_region_or_error = MM.allocate_kernel_region(memory_size, "kmalloc subheap", Memory::Region::Access::ReadWrite, AllocationStrategy::AllocateNow); - if (new_region_or_error.is_error()) { - dbgln("kmalloc: Could not expand heap to satisfy allocation of {} bytes", allocation_request); - return false; - } - - region = new_region_or_error.release_value(); - dbgln("kmalloc: Adding even more memory to heap at {}, bytes: {}", region->vaddr(), region->size()); - - m_global_heap.m_heap.add_subheap(region->vaddr().as_ptr(), region->size()); - m_global_heap.m_subheap_memory.append(region.release_nonnull()); - } - return true; - } - - bool remove_memory(void* memory) - { - // This is actually relatively unlikely to happen, because it requires that all - // allocated memory in a subheap to be freed. Only then the subheap can be removed... - for (size_t i = 0; i < m_global_heap.m_subheap_memory.size(); i++) { - if (m_global_heap.m_subheap_memory[i].vaddr().as_ptr() == memory) { - auto region = m_global_heap.m_subheap_memory.take(i); - if (!m_global_heap.m_backup_memory) { - if constexpr (KMALLOC_DEBUG) { - dmesgln("kmalloc: Using removed memory as backup: {}, bytes: {}", region->vaddr(), region->size()); - } - m_global_heap.m_backup_memory = move(region); - } else { - if constexpr (KMALLOC_DEBUG) { - dmesgln("kmalloc: Queue removing memory from heap at {}, bytes: {}", region->vaddr(), region->size()); - } - Processor::deferred_call_queue([this, region = move(region)]() mutable { - // We need to defer freeing the region to prevent a potential - // deadlock since we are still holding the kmalloc lock - // We don't really need to do anything other than holding - // onto the region. Unless we already used the backup - // memory, in which case we want to use the region as the - // new backup. - SpinlockLocker lock(s_lock); - if (!m_global_heap.m_backup_memory) { - if constexpr (KMALLOC_DEBUG) { - dmesgln("kmalloc: Queued memory region at {}, bytes: {} will be used as new backup", region->vaddr(), region->size()); - } - m_global_heap.m_backup_memory = move(region); - } else { - if constexpr (KMALLOC_DEBUG) { - dmesgln("kmalloc: Queued memory region at {}, bytes: {} will be freed now", region->vaddr(), region->size()); - } - } - }); - } - return true; - } - } - - if constexpr (KMALLOC_DEBUG) { - dmesgln("kmalloc: Cannot remove memory from heap: {}", VirtualAddress(memory)); - } - return false; - } - }; - using HeapType = ExpandableHeap; - - HeapType m_heap; - NonnullOwnPtrVector m_subheap_memory; - OwnPtr m_backup_memory; - - KmallocGlobalHeap(u8* memory, size_t memory_size) - : m_heap(memory, memory_size, ExpandGlobalHeap(*this)) +struct KmallocSubheap { + KmallocSubheap(u8* base, size_t size) + : allocator(base, size) { } - void allocate_backup_memory() - { - if (m_backup_memory) - return; - m_backup_memory = MM.allocate_kernel_region(1 * MiB, "kmalloc subheap", Memory::Region::Access::ReadWrite, AllocationStrategy::AllocateNow).release_value(); - } - size_t backup_memory_bytes() const - { - return m_backup_memory ? m_backup_memory->size() : 0; - } + IntrusiveListNode list_node; + Heap allocator; }; -READONLY_AFTER_INIT static KmallocGlobalHeap* g_kmalloc_global; -alignas(KmallocGlobalHeap) static u8 g_kmalloc_global_heap[sizeof(KmallocGlobalHeap)]; +struct KmallocGlobalData { + KmallocGlobalData(u8* initial_heap, size_t initial_heap_size) + { + add_subheap(initial_heap, initial_heap_size); + } + + void add_subheap(u8* storage, size_t storage_size) + { + auto* subheap = new (storage) KmallocSubheap(storage + PAGE_SIZE, storage_size - PAGE_SIZE); + subheaps.append(*subheap); + } + + void* allocate(size_t size) + { + VERIFY(!expansion_in_progress); + + for (auto& subheap : subheaps) { + if (auto* ptr = subheap.allocator.allocate(size)) + return ptr; + } + + if (!try_expand()) { + PANIC("OOM when trying to expand kmalloc heap."); + } + + return allocate(size); + } + + void deallocate(void* ptr) + { + VERIFY(!expansion_in_progress); + + for (auto& subheap : subheaps) { + if (subheap.allocator.contains(ptr)) { + subheap.allocator.deallocate(ptr); + return; + } + } + + PANIC("Bogus pointer {:p} passed to kfree()", ptr); + } + + size_t allocated_bytes() const + { + size_t total = 0; + for (auto const& subheap : subheaps) + total += subheap.allocator.allocated_bytes(); + return total; + } + + size_t free_bytes() const + { + size_t total = 0; + for (auto const& subheap : subheaps) + total += subheap.allocator.free_bytes(); + return total; + } + + bool try_expand() + { + VERIFY(!expansion_in_progress); + TemporaryChange change(expansion_in_progress, true); + + auto new_subheap_base = expansion_data->next_virtual_address; + size_t new_subheap_size = 1 * MiB; + + if (!expansion_data->virtual_range.contains(new_subheap_base, new_subheap_size)) { + // FIXME: Dare to return false and allow kmalloc() to fail! + PANIC("Out of address space when expanding kmalloc heap."); + } + + auto physical_pages_or_error = MM.commit_user_physical_pages(new_subheap_size / PAGE_SIZE); + if (physical_pages_or_error.is_error()) { + // FIXME: Dare to return false! + PANIC("Out of physical pages when expanding kmalloc heap."); + } + auto physical_pages = physical_pages_or_error.release_value(); + + expansion_data->next_virtual_address = expansion_data->next_virtual_address.offset(new_subheap_size); + + SpinlockLocker mm_locker(Memory::s_mm_lock); + SpinlockLocker pd_locker(MM.kernel_page_directory().get_lock()); + + for (auto vaddr = new_subheap_base; !physical_pages.is_empty(); vaddr = vaddr.offset(PAGE_SIZE)) { + // FIXME: We currently leak physical memory when mapping it into the kmalloc heap. + auto& page = physical_pages.take_one().leak_ref(); + auto* pte = MM.ensure_pte(MM.kernel_page_directory(), vaddr); + if (!pte) { + // FIXME: If ensure_pte() fails due to lazy page directory construction, it returns nullptr + // and we're in trouble. Find a way to avoid getting into that situation. + // Perhaps we could do a dry run through the address range and ensure_pte() for each + // virtual address to ensure that each PTE is available. Not maximally efficient, + // but could work.. Needs more thought. + PANIC("Unable to acquire PTE during heap expansion"); + } + pte->set_physical_page_base(page.paddr().get()); + pte->set_global(true); + pte->set_user_allowed(false); + pte->set_writable(true); + pte->set_present(true); + } + + MM.flush_tlb(&MM.kernel_page_directory(), new_subheap_base, new_subheap_size / PAGE_SIZE); + + add_subheap(new_subheap_base.as_ptr(), new_subheap_size); + return true; + } + + struct ExpansionData { + Memory::VirtualRange virtual_range; + VirtualAddress next_virtual_address; + }; + Optional expansion_data; + + IntrusiveList<&KmallocSubheap::list_node> subheaps; + + bool expansion_in_progress { false }; +}; + +READONLY_AFTER_INIT static KmallocGlobalData* g_kmalloc_global; +alignas(KmallocGlobalData) static u8 g_kmalloc_global_heap[sizeof(KmallocGlobalData)]; // Treat the heap as logically separate from .bss __attribute__((section(".heap"))) static u8 kmalloc_eternal_heap[ETERNAL_RANGE_SIZE]; @@ -205,14 +182,14 @@ bool g_dump_kmalloc_stacks; static u8* s_next_eternal_ptr; READONLY_AFTER_INIT static u8* s_end_of_eternal_range; -static void kmalloc_allocate_backup_memory() -{ - g_kmalloc_global->allocate_backup_memory(); -} - void kmalloc_enable_expand() { - g_kmalloc_global->allocate_backup_memory(); + // FIXME: This range can be much bigger on 64-bit, but we need to figure something out for 32-bit. + auto virtual_range = MM.kernel_page_directory().range_allocator().try_allocate_anywhere(64 * MiB, 1 * MiB); + g_kmalloc_global->expansion_data = KmallocGlobalData::ExpansionData { + .virtual_range = virtual_range.value(), + .next_virtual_address = virtual_range.value().base(), + }; } static inline void kmalloc_verify_nospinlock_held() @@ -228,7 +205,7 @@ UNMAP_AFTER_INIT void kmalloc_init() // Zero out heap since it's placed after end_of_kernel_bss. memset(kmalloc_eternal_heap, 0, sizeof(kmalloc_eternal_heap)); memset(kmalloc_pool_heap, 0, sizeof(kmalloc_pool_heap)); - g_kmalloc_global = new (g_kmalloc_global_heap) KmallocGlobalHeap(kmalloc_pool_heap, sizeof(kmalloc_pool_heap)); + g_kmalloc_global = new (g_kmalloc_global_heap) KmallocGlobalData(kmalloc_pool_heap, sizeof(kmalloc_pool_heap)); s_lock.initialize(); @@ -261,7 +238,7 @@ void* kmalloc(size_t size) Kernel::dump_backtrace(); } - void* ptr = g_kmalloc_global->m_heap.allocate(size); + void* ptr = g_kmalloc_global->allocate(size); Thread* current_thread = Thread::current(); if (!current_thread) @@ -296,7 +273,7 @@ void kfree(void* ptr) PerformanceManager::add_kfree_perf_event(*current_thread, 0, (FlatPtr)ptr); } - g_kmalloc_global->m_heap.deallocate(ptr); + g_kmalloc_global->deallocate(ptr); --g_nested_kfree_calls; } @@ -383,8 +360,8 @@ void operator delete[](void* ptr, size_t size) noexcept void get_kmalloc_stats(kmalloc_stats& stats) { SpinlockLocker lock(s_lock); - stats.bytes_allocated = g_kmalloc_global->m_heap.allocated_bytes(); - stats.bytes_free = g_kmalloc_global->m_heap.free_bytes() + g_kmalloc_global->backup_memory_bytes(); + stats.bytes_allocated = g_kmalloc_global->allocated_bytes(); + stats.bytes_free = g_kmalloc_global->free_bytes(); stats.bytes_eternal = g_kmalloc_bytes_eternal; stats.kmalloc_call_count = g_kmalloc_call_count; stats.kfree_call_count = g_kfree_call_count; diff --git a/Kernel/Memory/MemoryManager.h b/Kernel/Memory/MemoryManager.h index da11268c0fe..ccf058fc548 100644 --- a/Kernel/Memory/MemoryManager.h +++ b/Kernel/Memory/MemoryManager.h @@ -22,6 +22,8 @@ namespace Kernel { class PageDirectoryEntry; } +struct KmallocGlobalData; + namespace Kernel::Memory { constexpr bool page_round_up_would_wrap(FlatPtr x) @@ -140,6 +142,7 @@ class MemoryManager { friend class AnonymousVMObject; friend class Region; friend class VMObject; + friend struct ::KmallocGlobalData; public: static MemoryManager& the();