mirror of
https://github.com/facebook/sapling.git
synced 2024-10-11 01:07:15 +03:00
3b9d8cf876
Summary: Previously, the tracer extracts file name, function name from Python code objects and stores them as std::string. And frame the de-duplication logic hashes those strings, which is actually quite expensive. `hg id` takes 1.0 seconds without frame de-duplication, and 2.5 seconds with de-duplication. This diff assumes Python code object is unique (i.e. Python won't generate two code objects for a single code segment). That seems reasonable and `lsprof` seems to make a same assumption. With that assumption, just store `PyCodeObject` (contains file, method name and line number) instead of `std::string` (for file, method names) and line numbers for code identity and use the raw address of `PyCodeObject` to do frame de-duplication. That is helpful during profiling and de-duplication: - During profiling, no need to copy strings (convert to std::string), therefore less overhead - During frame de-duplication, no need to read long strings and hash them, therefore much faster Debug flag during compilation is added to make debugging easier as the code starts to mix of C++ and Python objects, which means mistakes could be made more easily. Test Plan: With this patch, `hg id` with frame de-duplication takes 1.0 seconds. There is no visible overhead doing de-duplication. Cheers! Reviewers: #mercurial, rmcelroy Reviewed By: rmcelroy Subscribers: rmcelroy, mjpieters Differential Revision: https://phabricator.intern.facebook.com/D5112636 Signature: t1:5112636:1495562232:dd0724198ebb7f2eb4e9fd3a83517fc9160fa01e
390 lines
9.2 KiB
C++
390 lines
9.2 KiB
C++
/* traceprofimpl.cpp - main implementation of traceprofile
|
|
*
|
|
* Copyright 2017 Facebook, Inc.
|
|
*
|
|
* This software may be used and distributed according to the terms of the
|
|
* GNU General Public License version 2 or any later version. */
|
|
#include "Python.h"
|
|
#include "frameobject.h"
|
|
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cstdint>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <list>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
#include <sys/time.h>
|
|
#include <unistd.h>
|
|
|
|
typedef int lineno_t;
|
|
typedef uint64_t frameid_t;
|
|
typedef uint64_t rdtsc_t;
|
|
|
|
/* tracing ------------------------------------------------------------------ */
|
|
|
|
/* information about a raw Python frame */
|
|
struct FrameInfo {
|
|
PyCodeObject *code; /* PyCodeObject */
|
|
frameid_t back;
|
|
|
|
/* needed by older C++ map which has difficulty on zero-copy assignment */
|
|
FrameInfo() { code = NULL; back = 0; }
|
|
|
|
void assign(PyFrameObject *frame, frameid_t backfid) {
|
|
back = backfid;
|
|
code = frame->f_code;
|
|
Py_XINCREF(code);
|
|
}
|
|
|
|
lineno_t line() {
|
|
if (!code) return 0;
|
|
return code->co_firstlineno;
|
|
}
|
|
|
|
const char *file() {
|
|
if (!code) return NULL;
|
|
return PyString_AsString(code->co_filename);
|
|
}
|
|
|
|
const char *name() {
|
|
if (!code) return NULL;
|
|
return PyString_AsString(code->co_name);
|
|
}
|
|
|
|
~FrameInfo() {
|
|
Py_XDECREF(code);
|
|
code = NULL;
|
|
}
|
|
|
|
private:
|
|
/* forbid copy */
|
|
FrameInfo(const FrameInfo& rhs);
|
|
FrameInfo(const FrameInfo&& rhs);
|
|
};
|
|
|
|
/* samples */
|
|
struct Sample {
|
|
rdtsc_t time;
|
|
frameid_t frameid;
|
|
int op; /* PyTrace_{CALL,EXCEPTION,LINE,RETURN,C_CALL,C_EXCEPTION,C_RETURN} */
|
|
};
|
|
|
|
/* global state, collected frames and samples */
|
|
static std::unordered_map<frameid_t, FrameInfo> frames;
|
|
static std::list<Sample> samples;
|
|
|
|
/* for measuring wall time / rdtsc ratio */
|
|
static struct timeval t1, t2;
|
|
static rdtsc_t r1, r2;
|
|
static double rdtscratio; /* set by disable() */
|
|
|
|
/* fast way to get wall time */
|
|
inline static rdtsc_t rdtsc() {
|
|
unsigned long lo, hi;
|
|
asm volatile ("rdtsc" : "=a" (lo), "=d" (hi));
|
|
return (rdtsc_t)(lo | (hi << 32));
|
|
}
|
|
|
|
/* fast, but inaccurate hashing of a Python frame */
|
|
inline static uint64_t hashframe(PyFrameObject *frame) {
|
|
uint64_t v = ((uint64_t)frame ^ ((uint64_t)(frame->f_back) << 16) ^
|
|
((uint64_t)frame->f_code << 32));
|
|
/* f_code is usually immutable (lsprof use its address as keys) */
|
|
return v;
|
|
}
|
|
|
|
/* hash, and store a Python frame, return its ID */
|
|
static frameid_t hashandstoreframe(PyFrameObject *frame) {
|
|
if (!frame) return 0;
|
|
frameid_t frameid = (frameid_t)hashframe(frame);
|
|
if (frames.count(frameid) == 0) {
|
|
frames[frameid].assign(frame,hashandstoreframe(frame->f_back));
|
|
}
|
|
return frameid;
|
|
}
|
|
|
|
/* record a sample */
|
|
inline static void recordframe(PyFrameObject *frame, int op) {
|
|
frameid_t fid = hashandstoreframe(frame);
|
|
samples.push_back({rdtsc(), fid, op});
|
|
}
|
|
|
|
static int tracefunc(PyObject *o, PyFrameObject *frame, int op, PyObject *a) {
|
|
(void)o;
|
|
(void)a;
|
|
recordframe(frame, op);
|
|
return 0;
|
|
}
|
|
|
|
static void enable() {
|
|
r1 = rdtsc();
|
|
gettimeofday(&t1, NULL);
|
|
PyEval_SetProfile((Py_tracefunc) tracefunc, NULL);
|
|
}
|
|
|
|
static void disable() {
|
|
PyEval_SetProfile(NULL, NULL);
|
|
r2 = rdtsc();
|
|
gettimeofday(&t2, NULL);
|
|
/* calculate rdtscratio */
|
|
double dt = 0; /* ms */
|
|
dt += (double)(t2.tv_sec - t1.tv_sec) * 1000.0; /* sec to ms */
|
|
dt += (double)(t2.tv_usec - t1.tv_usec) / 1000.0; /* us to ms */
|
|
rdtscratio = dt / (double)(r2 - r1);
|
|
}
|
|
|
|
/* reporting ---------------------------------------------------------------- */
|
|
|
|
struct FrameSummary {
|
|
rdtsc_t time;
|
|
unsigned int count;
|
|
};
|
|
|
|
static std::unordered_map<frameid_t, FrameSummary> summaries;
|
|
static std::unordered_map<frameid_t, std::list<frameid_t> > framechildren;
|
|
|
|
/* for dedup */
|
|
static std::unordered_map<frameid_t, uint64_t> fid2hash;
|
|
static std::unordered_map<uint64_t, frameid_t> hash2fid;
|
|
|
|
/* hash FrameInfo, do not be affected by frame addresses */
|
|
static uint64_t hashframeinfo(frameid_t fid) {
|
|
if (fid == 0) {
|
|
return 0;
|
|
}
|
|
if (!fid2hash.count(fid)) {
|
|
auto& fi = frames[fid];
|
|
uint64_t v = (uint64_t)fi.code;
|
|
v ^= hashframeinfo(fi.back) << 1;
|
|
fid2hash[fid] = v;
|
|
}
|
|
return fid2hash[fid];
|
|
}
|
|
|
|
/* fill hash2fid */
|
|
static void buildframededup() {
|
|
for (auto& s : samples) {
|
|
frameid_t fid = s.frameid;
|
|
while (fid) {
|
|
uint64_t v = hashframeinfo(fid);
|
|
if (hash2fid.count(v) == 0) {
|
|
hash2fid[v] = fid;
|
|
fid = frames[fid].back;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static frameid_t dedupfid(frameid_t fid) {
|
|
if (fid2hash.count(fid) == 0) {
|
|
return fid; /* no information available */
|
|
} else {
|
|
return hash2fid[fid2hash[fid]];
|
|
}
|
|
}
|
|
|
|
/* fill calltimes and summaries */
|
|
static void buildsummaries() {
|
|
std::unordered_map<frameid_t, std::list<Sample*>> calls;
|
|
|
|
for (auto& s : samples) {
|
|
frameid_t fid = dedupfid(s.frameid);
|
|
if (s.op == PyTrace_CALL) {
|
|
calls[fid].push_back(&s);
|
|
} else if (s.op == PyTrace_RETURN) {
|
|
auto& entries = calls[fid];
|
|
if (entries.empty())
|
|
continue;
|
|
/* frame was entered before */
|
|
Sample* prev = entries.back();
|
|
entries.pop_back();
|
|
auto &sum = summaries[fid];
|
|
sum.count += 1;
|
|
if (entries.empty())
|
|
sum.time += s.time - prev->time;
|
|
} /* s.op */
|
|
}
|
|
}
|
|
|
|
/* fill framechildren */
|
|
static void buildframetree() {
|
|
for (auto& s : samples) {
|
|
if (s.op != PyTrace_CALL && s.op != PyTrace_C_CALL) {
|
|
continue; /* only interested in call */
|
|
}
|
|
for (frameid_t fid = s.frameid; fid;) {
|
|
fid = dedupfid(fid);
|
|
auto& fi = frames[fid];
|
|
frameid_t pfid = dedupfid(fi.back);
|
|
auto& children = framechildren[pfid];
|
|
int existed = 0;
|
|
for (auto& c : children) {
|
|
if (c == fid) {
|
|
existed = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (existed) {
|
|
break;
|
|
}
|
|
children.push_back(fid);
|
|
fid = pfid;
|
|
} /* for fid */
|
|
}
|
|
}
|
|
|
|
static std::string shortname(std::string path) {
|
|
size_t p = path.rfind('/');
|
|
if (p == std::string::npos)
|
|
return path;
|
|
/* special handling __init__.py, include its dirname */
|
|
if (p > 0 && path.substr(p + 1, p + 1 + 11) == "__init__.py") {
|
|
p = path.rfind('/', p - 1);
|
|
if (p == std::string::npos)
|
|
return path;
|
|
}
|
|
return path.substr(p + 1);
|
|
}
|
|
|
|
/* width needed to output time (in ms) */
|
|
static int timelen() {
|
|
static int n = 0;
|
|
if (n)
|
|
return n;
|
|
n = 1;
|
|
rdtsc_t maxframetime = 0;
|
|
for (auto& s : summaries) {
|
|
if (s.second.time > maxframetime)
|
|
maxframetime = s.second.time;
|
|
}
|
|
for (double t = (double)maxframetime * rdtscratio; t >= 10; t /= 10) {
|
|
n ++;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
/* printf format to output time */
|
|
static const char *timefmt() {
|
|
static char fmt[20] = { 0 };
|
|
if (fmt[0] == 0) {
|
|
int n = timelen();
|
|
snprintf(fmt, sizeof fmt, "%%%d.0f", n);
|
|
}
|
|
return fmt;
|
|
}
|
|
|
|
static inline int fprintindent(FILE *fp, int indent) {
|
|
for (int i = 0; i < indent; ++i) fputc(' ', fp);
|
|
return (indent > 0 ? indent : 0);
|
|
}
|
|
|
|
/* config items */
|
|
static double timethreshold = 2;
|
|
static size_t countthreshold = 2;
|
|
static int dedup = 1;
|
|
|
|
static void settimethreshold(double ms) {
|
|
timethreshold = ms;
|
|
}
|
|
|
|
static void setcountthreshold(size_t count) {
|
|
countthreshold = count;
|
|
}
|
|
|
|
static void setdedup(int value) {
|
|
dedup = value;
|
|
}
|
|
|
|
static void fprintframetree(FILE *fp = stderr, frameid_t fid = 0,
|
|
int indent = 0, char ch = '|') {
|
|
auto& f = frames[fid];
|
|
auto& s = summaries[fid];
|
|
|
|
/* collect (> 2ms) child frames to print */
|
|
std::vector<frameid_t> cfids;
|
|
rdtsc_t ctotaltime = 0;
|
|
for (auto& cfid : framechildren[fid]) {
|
|
auto& cs = summaries[cfid];
|
|
if ((double)cs.time * rdtscratio >= timethreshold || cs.count == 0)
|
|
cfids.push_back(cfid);
|
|
ctotaltime += cs.time;
|
|
}
|
|
|
|
/* hot frame? frame time > 2 * sum(child frame time) and frame time > 30ms */
|
|
int hot = (s.time > ctotaltime * 2 && (double)s.time * rdtscratio > 30);
|
|
|
|
if (fid != 0) {
|
|
int ncol = 0;
|
|
|
|
/* hot symbol */
|
|
if (hot) {
|
|
ncol += fprintf(fp, "* ");
|
|
} else {
|
|
ncol += fprintf(fp, " ");
|
|
}
|
|
|
|
/* time in ms */
|
|
if (s.count > 0) {
|
|
ncol += fprintf(fp, timefmt(), (double)s.time * rdtscratio);
|
|
} else {
|
|
ncol += fprintindent(fp, timelen()); /* call not recorded */
|
|
}
|
|
|
|
/* symbol and indent */
|
|
ncol += fprintindent(fp, indent + 1);
|
|
ncol += fprintf(fp, "%c ", ch);
|
|
|
|
/* frame name */
|
|
ncol += fprintf(fp, "%s ", f.name());
|
|
|
|
/* call count */
|
|
if (s.count >= countthreshold) {
|
|
ncol += fprintf(fp, "(%d times) ", s.count);
|
|
}
|
|
|
|
/* file path */
|
|
fprintindent(fp, 48 - ncol);
|
|
std::string path = f.file();
|
|
ncol += fprintf(fp, "%s:%d", shortname(path).c_str(), f.line());
|
|
|
|
/* end of line */
|
|
fprintf(fp, "\n");
|
|
}
|
|
|
|
/* children */
|
|
indent += (ch == '\\');
|
|
if (cfids.size() > 1) {
|
|
indent += 1;
|
|
ch = '\\';
|
|
} else {
|
|
ch = '|';
|
|
}
|
|
for (auto& cfid : cfids) {
|
|
fprintframetree(fp, cfid, indent, ch);
|
|
}
|
|
}
|
|
|
|
static void clear() {
|
|
summaries.clear();
|
|
framechildren.clear();
|
|
fid2hash.clear();
|
|
hash2fid.clear();
|
|
samples.clear();
|
|
frames.clear();
|
|
}
|
|
|
|
static void report(FILE *fp = stderr) {
|
|
if (dedup)
|
|
buildframededup();
|
|
buildsummaries();
|
|
buildframetree();
|
|
fprintframetree(fp, dedupfid(0));
|
|
fprintf(fp, "Total time: %.0f ms\n", (double)(r2 - r1) * rdtscratio);
|
|
}
|