From 2617adac526fd2ac897f81d26d9203b179ec7abe Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Sat, 2 Apr 2022 01:28:01 +0200 Subject: [PATCH] Kernel: Store AddressSpace memory regions in an IntrusiveRedBlackTree This means we never need to allocate when inserting/removing regions from the address space. --- Kernel/Coredump.cpp | 48 ++++++++++---------- Kernel/Memory/AddressSpace.cpp | 74 ++++++++++++++++++------------- Kernel/Memory/AddressSpace.h | 9 ++-- Kernel/Memory/Region.h | 1 + Kernel/PerformanceEventBuffer.cpp | 2 +- Kernel/ProcessSpecificExposed.cpp | 40 ++++++++--------- Kernel/Syscalls/fork.cpp | 6 +-- 7 files changed, 99 insertions(+), 81 deletions(-) diff --git a/Kernel/Coredump.cpp b/Kernel/Coredump.cpp index 47958440f1a..2cddc6bf468 100644 --- a/Kernel/Coredump.cpp +++ b/Kernel/Coredump.cpp @@ -48,11 +48,11 @@ Coredump::Coredump(NonnullRefPtr process, NonnullRefPtraddress_space().regions()) { #if !INCLUDE_USERSPACE_HEAP_MEMORY_IN_COREDUMPS - if (looks_like_userspace_heap_region(*region)) + if (looks_like_userspace_heap_region(region)) continue; #endif - if (region->access() == Memory::Region::Access::None) + if (region.access() == Memory::Region::Access::None) continue; ++m_num_program_headers; } @@ -127,28 +127,28 @@ ErrorOr Coredump::write_program_headers(size_t notes_size) for (auto& region : m_process->address_space().regions()) { #if !INCLUDE_USERSPACE_HEAP_MEMORY_IN_COREDUMPS - if (looks_like_userspace_heap_region(*region)) + if (looks_like_userspace_heap_region(region)) continue; #endif - if (region->access() == Memory::Region::Access::None) + if (region.access() == Memory::Region::Access::None) continue; ElfW(Phdr) phdr {}; phdr.p_type = PT_LOAD; phdr.p_offset = offset; - phdr.p_vaddr = region->vaddr().get(); + phdr.p_vaddr = region.vaddr().get(); phdr.p_paddr = 0; - phdr.p_filesz = region->page_count() * PAGE_SIZE; - phdr.p_memsz = region->page_count() * PAGE_SIZE; + phdr.p_filesz = region.page_count() * PAGE_SIZE; + phdr.p_memsz = region.page_count() * PAGE_SIZE; phdr.p_align = 0; - phdr.p_flags = region->is_readable() ? PF_R : 0; - if (region->is_writable()) + phdr.p_flags = region.is_readable() ? PF_R : 0; + if (region.is_writable()) phdr.p_flags |= PF_W; - if (region->is_executable()) + if (region.is_executable()) phdr.p_flags |= PF_X; offset += phdr.p_filesz; @@ -176,28 +176,28 @@ ErrorOr Coredump::write_regions() u8 zero_buffer[PAGE_SIZE] = {}; for (auto& region : m_process->address_space().regions()) { - VERIFY(!region->is_kernel()); + VERIFY(!region.is_kernel()); #if !INCLUDE_USERSPACE_HEAP_MEMORY_IN_COREDUMPS - if (looks_like_userspace_heap_region(*region)) + if (looks_like_userspace_heap_region(region)) continue; #endif - if (region->access() == Memory::Region::Access::None) + if (region.access() == Memory::Region::Access::None) continue; // If we crashed in the middle of mapping in Regions, they do not have a page directory yet, and will crash on a remap() call - if (!region->is_mapped()) + if (!region.is_mapped()) continue; - region->set_readable(true); - region->remap(); + region.set_readable(true); + region.remap(); - for (size_t i = 0; i < region->page_count(); i++) { - auto const* page = region->physical_page(i); + for (size_t i = 0; i < region.page_count(); i++) { + auto const* page = region.physical_page(i); auto src_buffer = [&]() -> ErrorOr { if (page) - return UserOrKernelBuffer::for_user_buffer(reinterpret_cast((region->vaddr().as_ptr() + (i * PAGE_SIZE))), PAGE_SIZE); + return UserOrKernelBuffer::for_user_buffer(reinterpret_cast((region.vaddr().as_ptr() + (i * PAGE_SIZE))), PAGE_SIZE); // If the current page is not backed by a physical page, we zero it in the coredump file. return UserOrKernelBuffer::for_kernel_buffer(zero_buffer); }(); @@ -267,24 +267,24 @@ ErrorOr Coredump::create_notes_regions_data(auto& builder) const for (auto const& region : m_process->address_space().regions()) { #if !INCLUDE_USERSPACE_HEAP_MEMORY_IN_COREDUMPS - if (looks_like_userspace_heap_region(*region)) + if (looks_like_userspace_heap_region(region)) continue; #endif - if (region->access() == Memory::Region::Access::None) + if (region.access() == Memory::Region::Access::None) continue; ELF::Core::MemoryRegionInfo info {}; info.header.type = ELF::Core::NotesEntryHeader::Type::MemoryRegionInfo; - info.region_start = region->vaddr().get(); - info.region_end = region->vaddr().offset(region->size()).get(); + info.region_start = region.vaddr().get(); + info.region_end = region.vaddr().offset(region.size()).get(); info.program_header_index = region_index++; TRY(builder.append_bytes(ReadonlyBytes { (void*)&info, sizeof(info) })); // NOTE: The region name *is* null-terminated, so the following is ok: - auto name = region->name(); + auto name = region.name(); if (name.is_empty()) TRY(builder.append('\0')); else diff --git a/Kernel/Memory/AddressSpace.cpp b/Kernel/Memory/AddressSpace.cpp index e8cb21caac2..cccc3163c48 100644 --- a/Kernel/Memory/AddressSpace.cpp +++ b/Kernel/Memory/AddressSpace.cpp @@ -30,7 +30,20 @@ AddressSpace::AddressSpace(NonnullRefPtr page_directory) { } -AddressSpace::~AddressSpace() = default; +AddressSpace::~AddressSpace() +{ + delete_all_regions_assuming_they_are_unmapped(); +} + +void AddressSpace::delete_all_regions_assuming_they_are_unmapped() +{ + // FIXME: This could definitely be done in a more efficient manner. + while (!m_regions.is_empty()) { + auto& region = *m_regions.begin(); + m_regions.remove(region.vaddr().get()); + delete ®ion; + } +} ErrorOr AddressSpace::unmap_mmap_range(VirtualAddress addr, size_t size) { @@ -208,9 +221,9 @@ void AddressSpace::deallocate_region(Region& region) NonnullOwnPtr AddressSpace::take_region(Region& region) { SpinlockLocker lock(m_lock); - auto found_region = m_regions.unsafe_remove(region.vaddr().get()); - VERIFY(found_region.ptr() == ®ion); - return found_region; + auto did_remove = m_regions.remove(region.vaddr().get()); + VERIFY(did_remove); + return NonnullOwnPtr { NonnullOwnPtr::Adopt, region }; } Region* AddressSpace::find_region_from_range(VirtualRange const& range) @@ -221,9 +234,9 @@ Region* AddressSpace::find_region_from_range(VirtualRange const& range) return nullptr; auto& region = *found_region; auto rounded_range_size = page_round_up(range.size()); - if (rounded_range_size.is_error() || region->size() != rounded_range_size.value()) + if (rounded_range_size.is_error() || region.size() != rounded_range_size.value()) return nullptr; - return region; + return ®ion; } Region* AddressSpace::find_region_containing(VirtualRange const& range) @@ -232,7 +245,7 @@ Region* AddressSpace::find_region_containing(VirtualRange const& range) auto* candidate = m_regions.find_largest_not_above(range.base().get()); if (!candidate) return nullptr; - return (*candidate)->range().contains(range) ? candidate->ptr() : nullptr; + return (*candidate).range().contains(range) ? candidate : nullptr; } ErrorOr> AddressSpace::find_regions_intersecting(VirtualRange const& range) @@ -245,12 +258,12 @@ ErrorOr> AddressSpace::find_regions_intersecting(VirtualRange co auto* found_region = m_regions.find_largest_not_above(range.base().get()); if (!found_region) return regions; - for (auto iter = m_regions.begin_from((*found_region)->vaddr().get()); !iter.is_end(); ++iter) { - auto const& iter_range = (*iter)->range(); + for (auto iter = m_regions.begin_from((*found_region).vaddr().get()); !iter.is_end(); ++iter) { + auto const& iter_range = (*iter).range(); if (iter_range.base() < range.end() && iter_range.end() > range.base()) { - TRY(regions.try_append(*iter)); + TRY(regions.try_append(&*iter)); - total_size_collected += (*iter)->size() - iter_range.intersect(range).size(); + total_size_collected += (*iter).size() - iter_range.intersect(range).size(); if (total_size_collected == range.size()) break; } @@ -261,9 +274,10 @@ ErrorOr> AddressSpace::find_regions_intersecting(VirtualRange co ErrorOr AddressSpace::add_region(NonnullOwnPtr region) { - auto* ptr = region.ptr(); SpinlockLocker lock(m_lock); - TRY(m_regions.try_insert(region->vaddr().get(), move(region))); + // NOTE: We leak the region into the IRBT here. It must be deleted or readopted when removed from the tree. + auto* ptr = region.leak_ptr(); + m_regions.insert(ptr->vaddr().get(), *ptr); return ptr; } @@ -300,8 +314,7 @@ void AddressSpace::dump_regions() SpinlockLocker lock(m_lock); - for (auto const& sorted_region : m_regions) { - auto const& region = *sorted_region; + for (auto const& region : m_regions) { dbgln("{:p} -- {:p} {:p} {:c}{:c}{:c}{:c}{:c}{:c} {}", region.vaddr().get(), region.vaddr().offset(region.size() - 1).get(), region.size(), region.is_readable() ? 'R' : ' ', region.is_writable() ? 'W' : ' ', @@ -322,9 +335,10 @@ void AddressSpace::remove_all_regions(Badge) SpinlockLocker pd_locker(m_page_directory->get_lock()); SpinlockLocker mm_locker(s_mm_lock); for (auto& region : m_regions) - (*region).unmap_with_locks_held(Region::ShouldDeallocateVirtualRange::No, ShouldFlushTLB::No, pd_locker, mm_locker); + region.unmap_with_locks_held(Region::ShouldDeallocateVirtualRange::No, ShouldFlushTLB::No, pd_locker, mm_locker); } - m_regions.clear(); + + delete_all_regions_assuming_they_are_unmapped(); } size_t AddressSpace::amount_dirty_private() const @@ -335,8 +349,8 @@ size_t AddressSpace::amount_dirty_private() const // That's probably a situation that needs to be looked at in general. size_t amount = 0; for (auto const& region : m_regions) { - if (!region->is_shared()) - amount += region->amount_dirty(); + if (!region.is_shared()) + amount += region.amount_dirty(); } return amount; } @@ -346,8 +360,8 @@ ErrorOr AddressSpace::amount_clean_inode() const SpinlockLocker lock(m_lock); HashTable vmobjects; for (auto const& region : m_regions) { - if (region->vmobject().is_inode()) - TRY(vmobjects.try_set(&static_cast(region->vmobject()))); + if (region.vmobject().is_inode()) + TRY(vmobjects.try_set(&static_cast(region.vmobject()))); } size_t amount = 0; for (auto& vmobject : vmobjects) @@ -360,7 +374,7 @@ size_t AddressSpace::amount_virtual() const SpinlockLocker lock(m_lock); size_t amount = 0; for (auto const& region : m_regions) { - amount += region->size(); + amount += region.size(); } return amount; } @@ -371,7 +385,7 @@ size_t AddressSpace::amount_resident() const // FIXME: This will double count if multiple regions use the same physical page. size_t amount = 0; for (auto const& region : m_regions) { - amount += region->amount_resident(); + amount += region.amount_resident(); } return amount; } @@ -385,7 +399,7 @@ size_t AddressSpace::amount_shared() const // so that every Region contributes +1 ref to each of its PhysicalPages. size_t amount = 0; for (auto const& region : m_regions) { - amount += region->amount_shared(); + amount += region.amount_shared(); } return amount; } @@ -395,11 +409,11 @@ size_t AddressSpace::amount_purgeable_volatile() const SpinlockLocker lock(m_lock); size_t amount = 0; for (auto const& region : m_regions) { - if (!region->vmobject().is_anonymous()) + if (!region.vmobject().is_anonymous()) continue; - auto const& vmobject = static_cast(region->vmobject()); + auto const& vmobject = static_cast(region.vmobject()); if (vmobject.is_purgeable() && vmobject.is_volatile()) - amount += region->amount_resident(); + amount += region.amount_resident(); } return amount; } @@ -409,11 +423,11 @@ size_t AddressSpace::amount_purgeable_nonvolatile() const SpinlockLocker lock(m_lock); size_t amount = 0; for (auto const& region : m_regions) { - if (!region->vmobject().is_anonymous()) + if (!region.vmobject().is_anonymous()) continue; - auto const& vmobject = static_cast(region->vmobject()); + auto const& vmobject = static_cast(region.vmobject()); if (vmobject.is_purgeable() && !vmobject.is_volatile()) - amount += region->amount_resident(); + amount += region.amount_resident(); } return amount; } diff --git a/Kernel/Memory/AddressSpace.h b/Kernel/Memory/AddressSpace.h index 0c08647749f..a4c04a3d178 100644 --- a/Kernel/Memory/AddressSpace.h +++ b/Kernel/Memory/AddressSpace.h @@ -12,6 +12,7 @@ #include #include #include +#include #include namespace Kernel::Memory { @@ -28,8 +29,8 @@ public: size_t region_count() const { return m_regions.size(); } - RedBlackTree>& regions() { return m_regions; } - RedBlackTree> const& regions() const { return m_regions; } + auto& regions() { return m_regions; } + auto const& regions() const { return m_regions; } void dump_regions(); @@ -68,11 +69,13 @@ public: private: explicit AddressSpace(NonnullRefPtr); + void delete_all_regions_assuming_they_are_unmapped(); + mutable RecursiveSpinlock m_lock; RefPtr m_page_directory; - RedBlackTree> m_regions; + IntrusiveRedBlackTree<&Region::m_tree_node> m_regions; bool m_enforces_syscall_regions { false }; }; diff --git a/Kernel/Memory/Region.h b/Kernel/Memory/Region.h index dcaeae18a0f..29c264c480e 100644 --- a/Kernel/Memory/Region.h +++ b/Kernel/Memory/Region.h @@ -31,6 +31,7 @@ enum class ShouldFlushTLB { class Region final : public Weakable { + friend class AddressSpace; friend class MemoryManager; public: diff --git a/Kernel/PerformanceEventBuffer.cpp b/Kernel/PerformanceEventBuffer.cpp index 4ae7df427b2..8f930cda6b5 100644 --- a/Kernel/PerformanceEventBuffer.cpp +++ b/Kernel/PerformanceEventBuffer.cpp @@ -362,7 +362,7 @@ ErrorOr PerformanceEventBuffer::add_process(Process const& process, Proces for (auto const& region : process.address_space().regions()) { TRY(append_with_ip_and_bp(process.pid(), 0, - 0, 0, PERF_EVENT_MMAP, 0, region->range().base().get(), region->range().size(), region->name())); + 0, 0, PERF_EVENT_MMAP, 0, region.range().base().get(), region.range().size(), region.name())); } return {}; diff --git a/Kernel/ProcessSpecificExposed.cpp b/Kernel/ProcessSpecificExposed.cpp index f101084e74f..c43a016a412 100644 --- a/Kernel/ProcessSpecificExposed.cpp +++ b/Kernel/ProcessSpecificExposed.cpp @@ -234,31 +234,31 @@ ErrorOr Process::procfs_get_virtual_memory_stats(KBufferBuilder& builder) { SpinlockLocker lock(address_space().get_lock()); for (auto const& region : address_space().regions()) { - if (!region->is_user() && !Process::current().is_superuser()) + if (!region.is_user() && !Process::current().is_superuser()) continue; auto region_object = TRY(array.add_object()); - TRY(region_object.add("readable", region->is_readable())); - TRY(region_object.add("writable", region->is_writable())); - TRY(region_object.add("executable", region->is_executable())); - TRY(region_object.add("stack", region->is_stack())); - TRY(region_object.add("shared", region->is_shared())); - TRY(region_object.add("syscall", region->is_syscall_region())); - TRY(region_object.add("purgeable", region->vmobject().is_anonymous())); - if (region->vmobject().is_anonymous()) { - TRY(region_object.add("volatile", static_cast(region->vmobject()).is_volatile())); + TRY(region_object.add("readable", region.is_readable())); + TRY(region_object.add("writable", region.is_writable())); + TRY(region_object.add("executable", region.is_executable())); + TRY(region_object.add("stack", region.is_stack())); + TRY(region_object.add("shared", region.is_shared())); + TRY(region_object.add("syscall", region.is_syscall_region())); + TRY(region_object.add("purgeable", region.vmobject().is_anonymous())); + if (region.vmobject().is_anonymous()) { + TRY(region_object.add("volatile", static_cast(region.vmobject()).is_volatile())); } - TRY(region_object.add("cacheable", region->is_cacheable())); - TRY(region_object.add("address", region->vaddr().get())); - TRY(region_object.add("size", region->size())); - TRY(region_object.add("amount_resident", region->amount_resident())); - TRY(region_object.add("amount_dirty", region->amount_dirty())); - TRY(region_object.add("cow_pages", region->cow_pages())); - TRY(region_object.add("name", region->name())); - TRY(region_object.add("vmobject", region->vmobject().class_name())); + TRY(region_object.add("cacheable", region.is_cacheable())); + TRY(region_object.add("address", region.vaddr().get())); + TRY(region_object.add("size", region.size())); + TRY(region_object.add("amount_resident", region.amount_resident())); + TRY(region_object.add("amount_dirty", region.amount_dirty())); + TRY(region_object.add("cow_pages", region.cow_pages())); + TRY(region_object.add("name", region.name())); + TRY(region_object.add("vmobject", region.vmobject().class_name())); StringBuilder pagemap_builder; - for (size_t i = 0; i < region->page_count(); ++i) { - auto const* page = region->physical_page(i); + for (size_t i = 0; i < region.page_count(); ++i) { + auto const* page = region.physical_page(i); if (!page) pagemap_builder.append('N'); else if (page->is_shared_zero_page() || page->is_lazy_committed_page()) diff --git a/Kernel/Syscalls/fork.cpp b/Kernel/Syscalls/fork.cpp index 505cbcc1fc9..0b106360766 100644 --- a/Kernel/Syscalls/fork.cpp +++ b/Kernel/Syscalls/fork.cpp @@ -109,12 +109,12 @@ ErrorOr Process::sys$fork(RegisterState& regs) { SpinlockLocker lock(address_space().get_lock()); for (auto& region : address_space().regions()) { - dbgln_if(FORK_DEBUG, "fork: cloning Region({}) '{}' @ {}", region, region->name(), region->vaddr()); - auto region_clone = TRY(region->try_clone()); + dbgln_if(FORK_DEBUG, "fork: cloning Region({}) '{}' @ {}", region, region.name(), region.vaddr()); + auto region_clone = TRY(region.try_clone()); TRY(region_clone->map(child->address_space().page_directory(), Memory::ShouldFlushTLB::No)); auto* child_region = TRY(child->address_space().add_region(move(region_clone))); - if (region == m_master_tls_region.unsafe_ptr()) + if (®ion == m_master_tls_region.unsafe_ptr()) child->m_master_tls_region = TRY(child_region->try_make_weak_ptr()); } }