/* traceprofimpl.cpp - main implementation of traceprofile * * Copyright 2017 Facebook, Inc. * * This software may be used and distributed according to the terms of the * GNU General Public License version 2 or any later version. */ #include "Python.h" #include "frameobject.h" #include #include #include #include #include #include #include #include #include #include #include typedef int lineno_t; typedef uint64_t frameid_t; typedef uint64_t rdtsc_t; /* tracing ------------------------------------------------------------------ */ /* information about a raw Python frame */ struct FrameInfo { PyCodeObject *code; /* PyCodeObject */ frameid_t back; /* needed by older C++ map which has difficulty on zero-copy assignment */ FrameInfo() { code = NULL; back = 0; } void assign(PyFrameObject *frame, frameid_t backfid) { back = backfid; code = frame->f_code; Py_XINCREF(code); } lineno_t line() { if (!code) return 0; return code->co_firstlineno; } const char *file() { if (!code) return NULL; return PyString_AsString(code->co_filename); } const char *name() { if (!code) return NULL; return PyString_AsString(code->co_name); } ~FrameInfo() { Py_XDECREF(code); code = NULL; } private: /* forbid copy */ FrameInfo(const FrameInfo& rhs); FrameInfo(const FrameInfo&& rhs); }; /* samples */ struct Sample { rdtsc_t time; frameid_t frameid; int op; /* PyTrace_{CALL,EXCEPTION,LINE,RETURN,C_CALL,C_EXCEPTION,C_RETURN} */ }; /* global state, collected frames and samples */ static std::unordered_map frames; static std::list samples; /* for measuring wall time / rdtsc ratio */ static struct timeval t1, t2; static rdtsc_t r1, r2; static double rdtscratio; /* set by disable() */ /* fast way to get wall time */ inline static rdtsc_t rdtsc() { unsigned long lo, hi; asm volatile ("rdtsc" : "=a" (lo), "=d" (hi)); return (rdtsc_t)(lo | (hi << 32)); } /* fast, but inaccurate hashing of a Python frame */ inline static uint64_t hashframe(PyFrameObject *frame) { uint64_t v = ((uint64_t)frame ^ ((uint64_t)(frame->f_back) << 16) ^ ((uint64_t)frame->f_code << 32)); /* f_code is usually immutable (lsprof use its address as keys) */ return v; } /* hash, and store a Python frame, return its ID */ static frameid_t hashandstoreframe(PyFrameObject *frame) { if (!frame) return 0; frameid_t frameid = (frameid_t)hashframe(frame); if (frames.count(frameid) == 0) { frames[frameid].assign(frame,hashandstoreframe(frame->f_back)); } return frameid; } /* record a sample */ inline static void recordframe(PyFrameObject *frame, int op) { frameid_t fid = hashandstoreframe(frame); samples.push_back({rdtsc(), fid, op}); } static int tracefunc(PyObject *o, PyFrameObject *frame, int op, PyObject *a) { (void)o; (void)a; recordframe(frame, op); return 0; } static void enable() { r1 = rdtsc(); gettimeofday(&t1, NULL); PyEval_SetProfile((Py_tracefunc) tracefunc, NULL); } static void disable() { PyEval_SetProfile(NULL, NULL); r2 = rdtsc(); gettimeofday(&t2, NULL); /* calculate rdtscratio */ double dt = 0; /* ms */ dt += (double)(t2.tv_sec - t1.tv_sec) * 1000.0; /* sec to ms */ dt += (double)(t2.tv_usec - t1.tv_usec) / 1000.0; /* us to ms */ rdtscratio = dt / (double)(r2 - r1); } /* reporting ---------------------------------------------------------------- */ struct FrameSummary { rdtsc_t time; unsigned int count; }; static std::unordered_map summaries; static std::unordered_map > framechildren; /* for dedup */ static std::unordered_map fid2hash; static std::unordered_map hash2fid; /* hash FrameInfo, do not be affected by frame addresses */ static uint64_t hashframeinfo(frameid_t fid) { if (fid == 0) { return 0; } if (!fid2hash.count(fid)) { auto& fi = frames[fid]; uint64_t v = (uint64_t)fi.code; v ^= hashframeinfo(fi.back) << 1; fid2hash[fid] = v; } return fid2hash[fid]; } /* fill hash2fid */ static void buildframededup() { for (auto& s : samples) { frameid_t fid = s.frameid; while (fid) { uint64_t v = hashframeinfo(fid); if (hash2fid.count(v) == 0) { hash2fid[v] = fid; fid = frames[fid].back; } else { break; } } } } static frameid_t dedupfid(frameid_t fid) { if (fid2hash.count(fid) == 0) { return fid; /* no information available */ } else { return hash2fid[fid2hash[fid]]; } } /* fill calltimes and summaries */ static void buildsummaries() { std::unordered_map> calls; for (auto& s : samples) { frameid_t fid = dedupfid(s.frameid); if (s.op == PyTrace_CALL) { calls[fid].push_back(&s); } else if (s.op == PyTrace_RETURN) { auto& entries = calls[fid]; if (entries.empty()) continue; /* frame was entered before */ Sample* prev = entries.back(); entries.pop_back(); auto &sum = summaries[fid]; sum.count += 1; if (entries.empty()) sum.time += s.time - prev->time; } /* s.op */ } } /* fill framechildren */ static void buildframetree() { for (auto& s : samples) { if (s.op != PyTrace_CALL && s.op != PyTrace_C_CALL) { continue; /* only interested in call */ } for (frameid_t fid = s.frameid; fid;) { fid = dedupfid(fid); auto& fi = frames[fid]; frameid_t pfid = dedupfid(fi.back); auto& children = framechildren[pfid]; int existed = 0; for (auto& c : children) { if (c == fid) { existed = 1; break; } } if (existed) { break; } children.push_back(fid); fid = pfid; } /* for fid */ } } static std::string shortname(std::string path) { size_t p = path.rfind('/'); if (p == std::string::npos) return path; /* special handling __init__.py, include its dirname */ if (p > 0 && path.substr(p + 1, p + 1 + 11) == "__init__.py") { p = path.rfind('/', p - 1); if (p == std::string::npos) return path; } return path.substr(p + 1); } /* width needed to output time (in ms) */ static int timelen() { static int n = 0; if (n) return n; n = 1; rdtsc_t maxframetime = 0; for (auto& s : summaries) { if (s.second.time > maxframetime) maxframetime = s.second.time; } for (double t = (double)maxframetime * rdtscratio; t >= 10; t /= 10) { n ++; } return n; } /* printf format to output time */ static const char *timefmt() { static char fmt[20] = { 0 }; if (fmt[0] == 0) { int n = timelen(); snprintf(fmt, sizeof fmt, "%%%d.0f", n); } return fmt; } static inline int fprintindent(FILE *fp, int indent) { for (int i = 0; i < indent; ++i) fputc(' ', fp); return (indent > 0 ? indent : 0); } /* config items */ static double timethreshold = 2; static size_t countthreshold = 2; static int dedup = 1; static void settimethreshold(double ms) { timethreshold = ms; } static void setcountthreshold(size_t count) { countthreshold = count; } static void setdedup(int value) { dedup = value; } static void fprintframetree(FILE *fp = stderr, frameid_t fid = 0, int indent = 0, char ch = '|') { auto& f = frames[fid]; auto& s = summaries[fid]; /* collect (> 2ms) child frames to print */ std::vector cfids; rdtsc_t ctotaltime = 0; for (auto& cfid : framechildren[fid]) { auto& cs = summaries[cfid]; if ((double)cs.time * rdtscratio >= timethreshold || cs.count == 0) cfids.push_back(cfid); ctotaltime += cs.time; } /* hot frame? frame time > 2 * sum(child frame time) and frame time > 30ms */ int hot = (s.time > ctotaltime * 2 && (double)s.time * rdtscratio > 30); if (fid != 0) { int ncol = 0; /* hot symbol */ if (hot) { ncol += fprintf(fp, "* "); } else { ncol += fprintf(fp, " "); } /* time in ms */ if (s.count > 0) { ncol += fprintf(fp, timefmt(), (double)s.time * rdtscratio); } else { ncol += fprintindent(fp, timelen()); /* call not recorded */ } /* symbol and indent */ ncol += fprintindent(fp, indent + 1); ncol += fprintf(fp, "%c ", ch); /* frame name */ ncol += fprintf(fp, "%s ", f.name()); /* call count */ if (s.count >= countthreshold) { ncol += fprintf(fp, "(%d times) ", s.count); } /* file path */ fprintindent(fp, 48 - ncol); std::string path = f.file(); ncol += fprintf(fp, "%s:%d", shortname(path).c_str(), f.line()); /* end of line */ fprintf(fp, "\n"); } /* children */ indent += (ch == '\\'); if (cfids.size() > 1) { indent += 1; ch = '\\'; } else { ch = '|'; } for (auto& cfid : cfids) { fprintframetree(fp, cfid, indent, ch); } } static void clear() { summaries.clear(); framechildren.clear(); fid2hash.clear(); hash2fid.clear(); samples.clear(); frames.clear(); } static void report(FILE *fp = stderr) { if (dedup) buildframededup(); buildsummaries(); buildframetree(); fprintframetree(fp, dedupfid(0)); fprintf(fp, "Total time: %.0f ms\n", (double)(r2 - r1) * rdtscratio); }