Kernel+Profiler: Capture metadata about all profiled processes

The perfcore file format was previously limited to a single process
since the pid/executable/regions data was top-level in the JSON.

This patch moves the process-specific data into a top-level array
named "processes" and we now add entries for each process that has
been sampled during the profile run.

This makes it possible to see samples from multiple threads when
viewing a perfcore file with Profiler. This is extremely cool! :^)
This commit is contained in:
Andreas Kling 2021-03-02 19:01:02 +01:00
parent ea500dd3e3
commit 5e7abea31e
Notes: sideshowbarker 2024-07-18 21:45:59 +09:00
11 changed files with 223 additions and 102 deletions

View File

@ -488,16 +488,10 @@ static bool procfs$pid_perf_events(InodeIdentifier identifier, KBufferBuilder& b
auto process = Process::from_pid(to_pid(identifier));
if (!process)
return false;
InterruptDisabler disabler;
if (!process->executable())
return false;
if (!process->perf_events())
return false;
return process->perf_events()->to_json(builder, process->pid(), process->executable()->absolute_path());
return process->perf_events()->to_json(builder);
}
static bool procfs$net_adapters(InodeIdentifier, KBufferBuilder& builder)

View File

@ -28,6 +28,7 @@
#include <AK/JsonObject.h>
#include <AK/JsonObjectSerializer.h>
#include <Kernel/Arch/x86/SmapDisabler.h>
#include <Kernel/FileSystem/Custody.h>
#include <Kernel/KBufferBuilder.h>
#include <Kernel/PerformanceEventBuffer.h>
#include <Kernel/Process.h>
@ -111,14 +112,6 @@ PerformanceEvent& PerformanceEventBuffer::at(size_t index)
return events[index];
}
OwnPtr<KBuffer> PerformanceEventBuffer::to_json(ProcessID pid, const String& executable_path) const
{
KBufferBuilder builder;
if (!to_json(builder, pid, executable_path))
return {};
return builder.build();
}
template<typename Serializer>
bool PerformanceEventBuffer::to_json_impl(Serializer& object) const
{
@ -154,33 +147,28 @@ bool PerformanceEventBuffer::to_json_impl(Serializer& object) const
return true;
}
bool PerformanceEventBuffer::to_json(KBufferBuilder& builder)
bool PerformanceEventBuffer::to_json(KBufferBuilder& builder) const
{
JsonObjectSerializer object(builder);
return to_json_impl(object);
}
bool PerformanceEventBuffer::to_json(KBufferBuilder& builder, ProcessID pid, const String& executable_path) const
{
auto process = Process::from_pid(pid);
VERIFY(process);
ScopedSpinLock locker(process->space().get_lock());
auto processes_array = object.add_array("processes");
for (auto& it : m_processes) {
auto& process = *it.value;
auto process_object = processes_array.add_object();
process_object.add("pid", process.pid.value());
process_object.add("executable", process.executable);
JsonObjectSerializer object(builder);
object.add("pid", pid.value());
object.add("executable", executable_path);
{
auto region_array = object.add_array("regions");
for (const auto& region : process->space().regions()) {
auto region_object = region_array.add_object();
region_object.add("base", region.vaddr().get());
region_object.add("size", region.size());
region_object.add("name", region.name());
auto regions_array = process_object.add_array("regions");
for (auto& region : process.regions) {
auto region_object = regions_array.add_object();
region_object.add("name", region.name);
region_object.add("base", region.range.base().get());
region_object.add("size", region.range.size());
}
region_array.finish();
}
processes_array.finish();
return to_json_impl(object);
}
@ -192,4 +180,35 @@ OwnPtr<PerformanceEventBuffer> PerformanceEventBuffer::try_create_with_size(size
return adopt_own(*new PerformanceEventBuffer(buffer.release_nonnull()));
}
void PerformanceEventBuffer::add_process(const Process& process)
{
// FIXME: What about threads that have died?
ScopedSpinLock locker(process.space().get_lock());
String executable;
if (process.executable())
executable = process.executable()->absolute_path();
auto sampled_process = adopt_own(*new SampledProcess {
.pid = process.pid().value(),
.executable = executable,
.threads = {},
.regions = {},
});
process.for_each_thread([&](auto& thread) {
sampled_process->threads.set(thread.tid());
return IterationDecision::Continue;
});
for (auto& region : process.space().regions()) {
sampled_process->regions.append(SampledProcess::Region {
.name = region.name(),
.range = region.range(),
});
}
m_processes.set(process.pid(), move(sampled_process));
}
}

View File

@ -75,15 +75,25 @@ public:
return const_cast<PerformanceEventBuffer&>(*this).at(index);
}
OwnPtr<KBuffer> to_json(ProcessID, const String& executable_path) const;
bool to_json(KBufferBuilder&, ProcessID, const String& executable_path) const;
bool to_json(KBufferBuilder&) const;
// Used by full-system profile (/proc/profile)
bool to_json(KBufferBuilder&);
void add_process(const Process&);
private:
explicit PerformanceEventBuffer(NonnullOwnPtr<KBuffer>);
struct SampledProcess {
ProcessID pid;
String executable;
HashTable<ThreadID> threads;
struct Region {
String name;
Range range;
};
Vector<Region> regions;
};
template<typename Serializer>
bool to_json_impl(Serializer&) const;
@ -91,6 +101,8 @@ private:
size_t m_count { 0 };
NonnullOwnPtr<KBuffer> m_buffer;
HashMap<ProcessID, NonnullOwnPtr<SampledProcess>> m_processes;
};
}

View File

@ -448,10 +448,13 @@ bool Process::dump_perfcore()
if (description_or_error.is_error())
return false;
auto& description = description_or_error.value();
auto json = m_perf_event_buffer->to_json(m_pid, m_executable ? m_executable->absolute_path() : "");
if (!json)
KBufferBuilder builder;
if (!m_perf_event_buffer->to_json(builder))
return false;
auto json = builder.build();
if (!json)
return false;
auto json_buffer = UserOrKernelBuffer::for_kernel_buffer(json->data());
return !description->write(json_buffer, json->size()).is_error();
}
@ -671,6 +674,7 @@ bool Process::create_perf_events_buffer_if_needed()
{
if (!m_perf_event_buffer) {
m_perf_event_buffer = PerformanceEventBuffer::try_create_with_size(4 * MiB);
m_perf_event_buffer->add_process(*this);
}
return !!m_perf_event_buffer;
}

View File

@ -552,8 +552,15 @@ void Scheduler::timer_tick(const RegisterState& regs)
VERIFY(g_global_perf_events);
// FIXME: We currently don't collect samples while idle.
// That will be an interesting mode to add in the future. :^)
if (current_thread != Processor::current().idle_thread())
if (current_thread != Processor::current().idle_thread()) {
perf_events = g_global_perf_events;
if (current_thread->process().space().enforces_syscall_regions()) {
// FIXME: This is very nasty! We dump the current process's address
// space layout *every time* it's sampled. We should figure out
// a way to do this less often.
perf_events->add_process(current_thread->process());
}
}
} else if (current_thread->process().is_profiling()) {
VERIFY(current_thread->process().perf_events());
perf_events = current_thread->process().perf_events();

View File

@ -26,6 +26,7 @@
#include <AK/WeakPtr.h>
#include <Kernel/FileSystem/FileDescription.h>
#include <Kernel/PerformanceEventBuffer.h>
#include <Kernel/Process.h>
#include <Kernel/VM/MemoryManager.h>
#include <Kernel/VM/PageDirectory.h>

View File

@ -68,7 +68,8 @@ DisassemblyModel::DisassemblyModel(Profile& profile, ProfileNode& node)
kernel_elf = make<ELF::Image>((const u8*)m_kernel_file->data(), m_kernel_file->size());
elf = kernel_elf.ptr();
} else {
auto library_data = profile.libraries().library_containing(node.address());
// FIXME: This is kinda rickety looking with all the -> -> ->
auto library_data = node.process(profile)->library_metadata->library_containing(node.address());
if (!library_data) {
dbgln("no library data");
return;

View File

@ -47,10 +47,9 @@ static void sort_profile_nodes(Vector<NonnullRefPtr<ProfileNode>>& nodes)
child->sort_children();
}
Profile::Profile(String executable_path, Vector<Event> events, NonnullOwnPtr<LibraryMetadata> library_metadata)
: m_executable_path(move(executable_path))
Profile::Profile(Vector<Process> processes, Vector<Event> events)
: m_processes(move(processes))
, m_events(move(events))
, m_library_metadata(move(library_metadata))
{
m_first_timestamp = m_events.first().timestamp;
m_last_timestamp = m_events.last().timestamp;
@ -84,14 +83,14 @@ void Profile::rebuild_tree()
u32 filtered_event_count = 0;
Vector<NonnullRefPtr<ProfileNode>> roots;
auto find_or_create_root = [&roots](FlyString object_name, String symbol, u32 address, u32 offset, u64 timestamp) -> ProfileNode& {
auto find_or_create_root = [&roots](FlyString object_name, String symbol, u32 address, u32 offset, u64 timestamp, pid_t pid) -> ProfileNode& {
for (size_t i = 0; i < roots.size(); ++i) {
auto& root = roots[i];
if (root->symbol() == symbol) {
return root;
}
}
auto new_root = ProfileNode::create(move(object_name), move(symbol), address, offset, timestamp);
auto new_root = ProfileNode::create(move(object_name), move(symbol), address, offset, timestamp, pid);
roots.append(new_root);
return new_root;
};
@ -149,10 +148,11 @@ void Profile::rebuild_tree()
if (symbol.is_empty())
return IterationDecision::Break;
// FIXME: More cheating with intentional mixing of TID/PID here:
if (!node)
node = &find_or_create_root(object_name, symbol, address, offset, event.timestamp);
node = &find_or_create_root(object_name, symbol, address, offset, event.timestamp, event.tid);
else
node = &node->find_or_create_child(object_name, symbol, address, offset, event.timestamp);
node = &node->find_or_create_child(object_name, symbol, address, offset, event.timestamp, event.tid);
node->increment_event_count();
if (is_innermost_frame) {
@ -174,12 +174,13 @@ void Profile::rebuild_tree()
if (symbol.is_empty())
break;
// FIXME: More PID/TID mixing cheats here:
if (!node) {
node = &find_or_create_root(object_name, symbol, address, offset, event.timestamp);
node = &find_or_create_root(object_name, symbol, address, offset, event.timestamp, event.tid);
root = node;
root->will_track_seen_events(m_events.size());
} else {
node = &node->find_or_create_child(object_name, symbol, address, offset, event.timestamp);
node = &node->find_or_create_child(object_name, symbol, address, offset, event.timestamp, event.tid);
}
if (!root->has_seen_event(event_index)) {
@ -219,11 +220,45 @@ Result<NonnullOwnPtr<Profile>, String> Profile::load_from_perfcore_file(const St
return String { "Invalid perfcore format (not a JSON object)" };
auto& object = json.value().as_object();
auto executable_path = object.get("executable").to_string();
auto pid = object.get("pid");
if (!pid.is_u32())
return String { "Invalid perfcore format (no process ID)" };
auto processes_value = object.get("processes");
if (processes_value.is_null())
return String { "Invalid perfcore format (no processes)" };
if (!processes_value.is_array())
return String { "Invalid perfcore format (processes is not an array)" };
Vector<Process> sampled_processes;
for (auto& process_value : processes_value.as_array().values()) {
if (!process_value.is_object())
return String { "Invalid perfcore format (process value is not an object)" };
auto& process = process_value.as_object();
auto regions_value = process.get("regions");
if (!regions_value.is_array())
return String { "Invalid perfcore format (regions is not an array)" };
Process sampled_process {
.pid = (pid_t)process.get("pid").to_i32(),
.executable = process.get("executable").to_string(),
.threads = {},
.regions = {},
.library_metadata = make<LibraryMetadata>(regions_value.as_array()),
};
for (auto& region_value : regions_value.as_array().values()) {
if (!region_value.is_object())
return String { "Invalid perfcore format (region is not an object)" };
auto& region = region_value.as_object();
sampled_process.regions.append(Process::Region {
.name = region.get("name").to_string(),
.base = region.get("base").to_u32(),
.size = region.get("size").to_u32(),
});
}
sampled_processes.append(move(sampled_process));
}
auto file_or_error = MappedFile::map("/boot/Kernel");
OwnPtr<ELF::Image> kernel_elf;
@ -234,16 +269,10 @@ Result<NonnullOwnPtr<Profile>, String> Profile::load_from_perfcore_file(const St
if (!events_value.is_array())
return String { "Malformed profile (events is not an array)" };
auto regions_value = object.get("regions");
if (!regions_value.is_array() || regions_value.as_array().is_empty())
return String { "Malformed profile (regions is not an array, or it is empty)" };
auto& perf_events = events_value.as_array();
if (perf_events.is_empty())
return String { "No events captured (targeted process was never on CPU)" };
auto library_metadata = make<LibraryMetadata>(regions_value.as_array());
Vector<Event> events;
for (auto& perf_event_value : perf_events.values()) {
@ -253,6 +282,7 @@ Result<NonnullOwnPtr<Profile>, String> Profile::load_from_perfcore_file(const St
event.timestamp = perf_event.get("timestamp").to_number<u64>();
event.type = perf_event.get("type").to_string();
event.tid = perf_event.get("tid").to_i32();
if (event.type == "malloc") {
event.ptr = perf_event.get("ptr").to_number<FlatPtr>();
@ -276,7 +306,15 @@ Result<NonnullOwnPtr<Profile>, String> Profile::load_from_perfcore_file(const St
symbol = "??";
}
} else {
if (auto* library = library_metadata->library_containing(ptr)) {
auto it = sampled_processes.find_if([&](auto& entry) {
// FIXME: This doesn't support multi-threaded programs!
return entry.pid == event.tid;
});
// FIXME: This logic is kinda gnarly, find a way to clean it up.
LibraryMetadata* library_metadata {};
if (!it.is_end())
library_metadata = it->library_metadata.ptr();
if (auto* library = library_metadata ? library_metadata->library_containing(ptr) : nullptr) {
object_name = library->name;
symbol = library->elf.symbolicate(ptr - library->base, &offset);
} else {
@ -296,7 +334,7 @@ Result<NonnullOwnPtr<Profile>, String> Profile::load_from_perfcore_file(const St
events.append(move(event));
}
return adopt_own(*new Profile(executable_path, move(events), move(library_metadata)));
return adopt_own(*new Profile(move(sampled_processes), move(events)));
}
void ProfileNode::sort_children()
@ -363,7 +401,7 @@ GUI::Model* Profile::disassembly_model()
return m_disassembly_model;
}
Profile::LibraryMetadata::LibraryMetadata(JsonArray regions)
LibraryMetadata::LibraryMetadata(JsonArray regions)
: m_regions(move(regions))
{
for (auto& region_value : m_regions.values()) {
@ -391,12 +429,12 @@ Profile::LibraryMetadata::LibraryMetadata(JsonArray regions)
auto elf = ELF::Image(file_or_error.value()->bytes());
if (!elf.is_valid())
continue;
auto library = make<Library>(base, size, name, file_or_error.release_value(), move(elf));
auto library = adopt_own(*new Library { base, size, name, file_or_error.release_value(), move(elf) });
m_libraries.set(name, move(library));
}
}
const Profile::LibraryMetadata::Library* Profile::LibraryMetadata::library_containing(FlatPtr ptr) const
const LibraryMetadata::Library* LibraryMetadata::library_containing(FlatPtr ptr) const
{
for (auto& it : m_libraries) {
if (!it.value)
@ -408,8 +446,9 @@ const Profile::LibraryMetadata::Library* Profile::LibraryMetadata::library_conta
return nullptr;
}
ProfileNode::ProfileNode(const String& object_name, String symbol, u32 address, u32 offset, u64 timestamp)
ProfileNode::ProfileNode(const String& object_name, String symbol, u32 address, u32 offset, u64 timestamp, pid_t pid)
: m_symbol(move(symbol))
, m_pid(pid)
, m_address(address)
, m_offset(offset)
, m_timestamp(timestamp)
@ -422,3 +461,8 @@ ProfileNode::ProfileNode(const String& object_name, String symbol, u32 address,
}
m_object_name = LexicalPath(object).basename();
}
const Process* ProfileNode::process(Profile& profile) const
{
return profile.find_process(m_pid);
}

View File

@ -39,15 +39,50 @@
#include <LibGUI/Forward.h>
#include <LibGUI/ModelIndex.h>
class ProfileModel;
class DisassemblyModel;
class Profile;
class ProfileModel;
class SamplesModel;
class LibraryMetadata {
public:
explicit LibraryMetadata(JsonArray regions);
struct Library {
FlatPtr base;
size_t size;
String name;
NonnullRefPtr<MappedFile> file;
ELF::Image elf;
};
const Library* library_containing(FlatPtr) const;
private:
mutable HashMap<String, OwnPtr<Library>> m_libraries;
JsonArray m_regions;
};
struct Process {
pid_t pid {};
String executable;
HashTable<int> threads;
struct Region {
String name;
FlatPtr base {};
size_t size {};
};
Vector<Region> regions;
NonnullOwnPtr<LibraryMetadata> library_metadata;
};
class ProfileNode : public RefCounted<ProfileNode> {
public:
static NonnullRefPtr<ProfileNode> create(FlyString object_name, String symbol, u32 address, u32 offset, u64 timestamp)
static NonnullRefPtr<ProfileNode> create(FlyString object_name, String symbol, u32 address, u32 offset, u64 timestamp, pid_t pid)
{
return adopt(*new ProfileNode(move(object_name), move(symbol), address, offset, timestamp));
return adopt(*new ProfileNode(move(object_name), move(symbol), address, offset, timestamp, pid));
}
// These functions are only relevant for root nodes
@ -80,7 +115,7 @@ public:
m_children.append(child);
}
ProfileNode& find_or_create_child(FlyString object_name, String symbol, u32 address, u32 offset, u64 timestamp)
ProfileNode& find_or_create_child(FlyString object_name, String symbol, u32 address, u32 offset, u64 timestamp, pid_t pid)
{
for (size_t i = 0; i < m_children.size(); ++i) {
auto& child = m_children[i];
@ -88,7 +123,7 @@ public:
return child;
}
}
auto new_child = ProfileNode::create(move(object_name), move(symbol), address, offset, timestamp);
auto new_child = ProfileNode::create(move(object_name), move(symbol), address, offset, timestamp, pid);
add_child(new_child);
return new_child;
};
@ -111,12 +146,17 @@ public:
m_events_per_address.set(address, it->value + 1);
}
pid_t pid() const { return m_pid; }
const Process* process(Profile&) const;
private:
explicit ProfileNode(const String& object_name, String symbol, u32 address, u32 offset, u64 timestamp);
explicit ProfileNode(const String& object_name, String symbol, u32 address, u32 offset, u64 timestamp, pid_t);
ProfileNode* m_parent { nullptr };
FlyString m_object_name;
String m_symbol;
pid_t m_pid { 0 };
u32 m_address { 0 };
u32 m_offset { 0 };
u32 m_event_count { 0 };
@ -136,6 +176,14 @@ public:
GUI::Model& samples_model();
GUI::Model* disassembly_model();
const Process* find_process(pid_t pid) const
{
auto it = m_processes.find_if([&](auto& entry) {
return entry.pid == pid;
});
return it.is_end() ? nullptr : &(*it);
}
void set_disassembly_index(const GUI::ModelIndex&);
const Vector<NonnullRefPtr<ProfileNode>>& roots() const { return m_roots; }
@ -152,6 +200,7 @@ public:
String type;
FlatPtr ptr { 0 };
size_t size { 0 };
int tid { 0 };
bool in_kernel { false };
Vector<Frame> frames;
};
@ -178,29 +227,6 @@ public:
bool show_percentages() const { return m_show_percentages; }
void set_show_percentages(bool);
const String& executable_path() const { return m_executable_path; }
class LibraryMetadata {
public:
LibraryMetadata(JsonArray regions);
struct Library {
FlatPtr base;
size_t size;
String name;
NonnullRefPtr<MappedFile> file;
ELF::Image elf;
};
const Library* library_containing(FlatPtr) const;
private:
mutable HashMap<String, OwnPtr<Library>> m_libraries;
JsonArray m_regions;
};
const LibraryMetadata& libraries() const { return *m_library_metadata; }
template<typename Callback>
void for_each_event_in_filter_range(Callback callback)
{
@ -215,12 +241,10 @@ public:
}
private:
Profile(String executable_path, Vector<Event>, NonnullOwnPtr<LibraryMetadata>);
Profile(Vector<Process>, Vector<Event>);
void rebuild_tree();
String m_executable_path;
RefPtr<ProfileModel> m_model;
RefPtr<SamplesModel> m_samples_model;
RefPtr<DisassemblyModel> m_disassembly_model;
@ -233,10 +257,9 @@ private:
u64 m_first_timestamp { 0 };
u64 m_last_timestamp { 0 };
Vector<Process> m_processes;
Vector<Event> m_events;
NonnullOwnPtr<LibraryMetadata> m_library_metadata;
bool m_has_timestamp_filter_range { false };
u64 m_timestamp_filter_range_start { 0 };
u64 m_timestamp_filter_range_end { 0 };

View File

@ -57,6 +57,10 @@ String SamplesModel::column_name(int column) const
return "#";
case Column::Timestamp:
return "Timestamp";
case Column::ThreadID:
return "TID";
case Column::ExecutableName:
return "Executable";
case Column::InnermostStackFrame:
return "Innermost Frame";
default:
@ -77,6 +81,16 @@ GUI::Variant SamplesModel::data(const GUI::ModelIndex& index, GUI::ModelRole rol
if (index.column() == Column::SampleIndex)
return event_index;
if (index.column() == Column::ThreadID)
return event.tid;
if (index.column() == Column::ExecutableName) {
// FIXME: More abuse of the PID/TID relationship:
if (auto* process = m_profile.find_process(event.tid))
return process->executable;
return "";
}
if (index.column() == Column::Timestamp) {
return (u32)event.timestamp;
}

View File

@ -40,6 +40,8 @@ public:
enum Column {
SampleIndex,
Timestamp,
ThreadID,
ExecutableName,
InnermostStackFrame,
__Count
};