traceprof: move to hgext and make it more portable

Summary:
Get rid of `gettimeofday` and switch to C++11 `std::chrono` for Windows/rare
platform compatibility.

Also format the code using clang-format.

Test Plan:
Make sure it build on both x64 and Power8 platform.
`make local` and try it in IPython:

```
In [1]: from hgext import traceprof

In [2]: def f():
   ...:     g(1)
   ...:

In [3]: def g(x):
   ...:     print(x+1)
   ...:

In [4]: from mercurial import ui

In [7]: with traceprof.profile(ui.ui(), sys.stderr):
   ...:     f()
   ...:
2
    | <module>                                  ipython2:3
    | start_ipython                             IPython/__init__.py:93
    | launch_instance                           application.py:650
    | start                                     ipapp.py:342
    | mainloop                                  interactiveshell.py:479
    | interact                                  interactiveshell.py:459
    | run_cell                                  interactiveshell.py:2591
    | run_ast_nodes                             interactiveshell.py:2770
    | run_code                                  interactiveshell.py:2851
    | <module>                                  <ipython-input-7-2e5a012739d1>:1
    | __exit__                                  contextlib.py:21
Total time: 0 ms
```

Note: it crashes on Windows, which will be workarounded in a later patch.

Reviewers: rmcelroy, #mercurial

Reviewed By: rmcelroy

Subscribers: fried

Differential Revision: https://phabricator.intern.facebook.com/D6681062

Tags: aarch64

Signature: 6681062:1515488414:6b7a51eda9e9764560d415350630590e4817fae2
This commit is contained in:
Jun Wu 2018-01-08 17:45:51 -08:00
parent f446276fe1
commit a5976a9d50
6 changed files with 456 additions and 397 deletions

View File

@ -63,6 +63,7 @@ locale/*/LC_MESSAGES/hg.mo
hgext/__index__.py
hgext/clindex.c
hgext/extlib/linelog.c
hgext/traceprof.cpp
# Rust libraries and extensions
lib/*/Cargo.lock

View File

@ -2,7 +2,7 @@
from __future__ import absolute_import, print_function
from hgext3rd import traceprof
from hgext import traceprof
from mercurial import ui
import os

View File

@ -1,395 +0,0 @@
/* traceprofimpl.cpp - main implementation of traceprofile
*
* Copyright 2017 Facebook, Inc.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2 or any later version. */
#include "Python.h"
#include "frameobject.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <list>
#include <string>
#include <unordered_map>
#include <vector>
#include <sys/time.h>
#include <unistd.h>
typedef int lineno_t;
typedef uint64_t frameid_t;
typedef uint64_t rdtsc_t;
/* tracing ------------------------------------------------------------------ */
/* information about a raw Python frame */
struct FrameInfo {
PyCodeObject *code; /* PyCodeObject */
frameid_t back;
/* needed by older C++ map which has difficulty on zero-copy assignment */
FrameInfo() { code = NULL; back = 0; }
void assign(PyFrameObject *frame, frameid_t backfid) {
back = backfid;
code = frame->f_code;
Py_XINCREF(code);
}
lineno_t line() {
if (!code) return 0;
return code->co_firstlineno;
}
const char *file() {
if (!code) return NULL;
return PyString_AsString(code->co_filename);
}
const char *name() {
if (!code) return NULL;
return PyString_AsString(code->co_name);
}
~FrameInfo() {
Py_XDECREF(code);
code = NULL;
}
private:
/* forbid copy */
FrameInfo(const FrameInfo& rhs);
FrameInfo(const FrameInfo&& rhs);
};
/* samples */
struct Sample {
rdtsc_t time;
frameid_t frameid;
int op; /* PyTrace_{CALL,EXCEPTION,LINE,RETURN,C_CALL,C_EXCEPTION,C_RETURN} */
};
/* global state, collected frames and samples */
static std::unordered_map<frameid_t, FrameInfo> frames;
static std::list<Sample> samples;
/* for measuring wall time / rdtsc ratio */
static struct timeval t1, t2;
static rdtsc_t r1, r2;
static double rdtscratio; /* set by disable() */
/* fast way to get wall time */
inline static rdtsc_t rdtsc() {
#ifdef __aarch64__
unsigned long long val;
asm volatile("mrs %0, cntvct_el0" : "=r" (val));
return val;
#else
unsigned long lo, hi;
asm volatile ("rdtsc" : "=a" (lo), "=d" (hi));
return (rdtsc_t)(lo | (hi << 32));
#endif
}
/* fast, but inaccurate hashing of a Python frame */
inline static uint64_t hashframe(PyFrameObject *frame) {
uint64_t v = ((uint64_t)frame ^ ((uint64_t)(frame->f_back) << 16) ^
((uint64_t)frame->f_code << 32));
/* f_code is usually immutable (lsprof use its address as keys) */
return v;
}
/* hash, and store a Python frame, return its ID */
static frameid_t hashandstoreframe(PyFrameObject *frame) {
if (!frame) return 0;
frameid_t frameid = (frameid_t)hashframe(frame);
if (frames.count(frameid) == 0) {
frames[frameid].assign(frame,hashandstoreframe(frame->f_back));
}
return frameid;
}
/* record a sample */
inline static void recordframe(PyFrameObject *frame, int op) {
frameid_t fid = hashandstoreframe(frame);
samples.push_back({rdtsc(), fid, op});
}
static int tracefunc(PyObject *o, PyFrameObject *frame, int op, PyObject *a) {
(void)o;
(void)a;
recordframe(frame, op);
return 0;
}
static void enable() {
r1 = rdtsc();
gettimeofday(&t1, NULL);
PyEval_SetProfile((Py_tracefunc) tracefunc, NULL);
}
static void disable() {
PyEval_SetProfile(NULL, NULL);
r2 = rdtsc();
gettimeofday(&t2, NULL);
/* calculate rdtscratio */
double dt = 0; /* ms */
dt += (double)(t2.tv_sec - t1.tv_sec) * 1000.0; /* sec to ms */
dt += (double)(t2.tv_usec - t1.tv_usec) / 1000.0; /* us to ms */
rdtscratio = dt / (double)(r2 - r1);
}
/* reporting ---------------------------------------------------------------- */
struct FrameSummary {
rdtsc_t time;
unsigned int count;
};
static std::unordered_map<frameid_t, FrameSummary> summaries;
static std::unordered_map<frameid_t, std::list<frameid_t> > framechildren;
/* for dedup */
static std::unordered_map<frameid_t, uint64_t> fid2hash;
static std::unordered_map<uint64_t, frameid_t> hash2fid;
/* hash FrameInfo, do not be affected by frame addresses */
static uint64_t hashframeinfo(frameid_t fid) {
if (fid == 0) {
return 0;
}
if (!fid2hash.count(fid)) {
auto& fi = frames[fid];
uint64_t v = (uint64_t)fi.code;
v ^= hashframeinfo(fi.back) << 1;
fid2hash[fid] = v;
}
return fid2hash[fid];
}
/* fill hash2fid */
static void buildframededup() {
for (auto& s : samples) {
frameid_t fid = s.frameid;
while (fid) {
uint64_t v = hashframeinfo(fid);
if (hash2fid.count(v) == 0) {
hash2fid[v] = fid;
fid = frames[fid].back;
} else {
break;
}
}
}
}
static frameid_t dedupfid(frameid_t fid) {
if (fid2hash.count(fid) == 0) {
return fid; /* no information available */
} else {
return hash2fid[fid2hash[fid]];
}
}
/* fill calltimes and summaries */
static void buildsummaries() {
std::unordered_map<frameid_t, std::list<Sample*>> calls;
for (auto& s : samples) {
frameid_t fid = dedupfid(s.frameid);
if (s.op == PyTrace_CALL) {
calls[fid].push_back(&s);
} else if (s.op == PyTrace_RETURN) {
auto& entries = calls[fid];
if (entries.empty())
continue;
/* frame was entered before */
Sample* prev = entries.back();
entries.pop_back();
auto &sum = summaries[fid];
sum.count += 1;
if (entries.empty())
sum.time += s.time - prev->time;
} /* s.op */
}
}
/* fill framechildren */
static void buildframetree() {
for (auto& s : samples) {
if (s.op != PyTrace_CALL && s.op != PyTrace_C_CALL) {
continue; /* only interested in call */
}
for (frameid_t fid = s.frameid; fid;) {
fid = dedupfid(fid);
auto& fi = frames[fid];
frameid_t pfid = dedupfid(fi.back);
auto& children = framechildren[pfid];
int existed = 0;
for (auto& c : children) {
if (c == fid) {
existed = 1;
break;
}
}
if (existed) {
break;
}
children.push_back(fid);
fid = pfid;
} /* for fid */
}
}
static std::string shortname(std::string path) {
size_t p = path.rfind('/');
if (p == std::string::npos)
return path;
/* special handling __init__.py, include its dirname */
if (p > 0 && path.substr(p + 1, p + 1 + 11) == "__init__.py") {
p = path.rfind('/', p - 1);
if (p == std::string::npos)
return path;
}
return path.substr(p + 1);
}
/* width needed to output time (in ms) */
static int timelen() {
static int n = 0;
if (n)
return n;
n = 1;
rdtsc_t maxframetime = 0;
for (auto& s : summaries) {
if (s.second.time > maxframetime)
maxframetime = s.second.time;
}
for (double t = (double)maxframetime * rdtscratio; t >= 10; t /= 10) {
n ++;
}
return n;
}
/* printf format to output time */
static const char *timefmt() {
static char fmt[20] = { 0 };
if (fmt[0] == 0) {
int n = timelen();
snprintf(fmt, sizeof fmt, "%%%d.0f", n);
}
return fmt;
}
static inline int fprintindent(FILE *fp, int indent) {
for (int i = 0; i < indent; ++i) fputc(' ', fp);
return (indent > 0 ? indent : 0);
}
/* config items */
static double timethreshold = 2;
static size_t countthreshold = 2;
static int dedup = 1;
static void settimethreshold(double ms) {
timethreshold = ms;
}
static void setcountthreshold(size_t count) {
countthreshold = count;
}
static void setdedup(int value) {
dedup = value;
}
static void fprintframetree(FILE *fp = stderr, frameid_t fid = 0,
int indent = 0, char ch = '|') {
auto& f = frames[fid];
auto& s = summaries[fid];
/* collect (> 2ms) child frames to print */
std::vector<frameid_t> cfids;
rdtsc_t ctotaltime = 0;
for (auto& cfid : framechildren[fid]) {
auto& cs = summaries[cfid];
if ((double)cs.time * rdtscratio >= timethreshold || cs.count == 0)
cfids.push_back(cfid);
ctotaltime += cs.time;
}
/* hot frame? frame time > 2 * sum(child frame time) and frame time > 30ms */
int hot = (s.time > ctotaltime * 2 && (double)s.time * rdtscratio > 30);
if (fid != 0) {
int ncol = 0;
/* hot symbol */
if (hot) {
ncol += fprintf(fp, "* ");
} else {
ncol += fprintf(fp, " ");
}
/* time in ms */
if (s.count > 0) {
ncol += fprintf(fp, timefmt(), (double)s.time * rdtscratio);
} else {
ncol += fprintindent(fp, timelen()); /* call not recorded */
}
/* symbol and indent */
ncol += fprintindent(fp, indent + 1);
ncol += fprintf(fp, "%c ", ch);
/* frame name */
ncol += fprintf(fp, "%s ", f.name());
/* call count */
if (s.count >= countthreshold) {
ncol += fprintf(fp, "(%d times) ", s.count);
}
/* file path */
fprintindent(fp, 48 - ncol);
std::string path = f.file();
ncol += fprintf(fp, "%s:%d", shortname(path).c_str(), f.line());
/* end of line */
fprintf(fp, "\n");
}
/* children */
indent += (ch == '\\');
if (cfids.size() > 1) {
indent += 1;
ch = '\\';
} else {
ch = '|';
}
for (auto& cfid : cfids) {
fprintframetree(fp, cfid, indent, ch);
}
}
static void clear() {
summaries.clear();
framechildren.clear();
fid2hash.clear();
hash2fid.clear();
samples.clear();
frames.clear();
}
static void report(FILE *fp = stderr) {
if (dedup)
buildframededup();
buildsummaries();
buildframetree();
fprintframetree(fp, dedupfid(0));
fprintf(fp, "Total time: %.0f ms\n", (double)(r2 - r1) * rdtscratio);
}

View File

@ -0,0 +1,447 @@
/* traceprofimpl.cpp - main implementation of traceprofile
*
* Copyright 2017 Facebook, Inc.
*
* This software may be used and distributed according to the terms of the
* GNU General Public License version 2 or any later version. */
#include "Python.h"
#include "frameobject.h"
#include <algorithm>
#include <cassert>
#include <chrono>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <list>
#include <string>
#include <unordered_map>
#include <vector>
#ifndef _WIN32
#include <sys/time.h>
#include <unistd.h>
#endif
typedef int lineno_t;
typedef uint64_t frameid_t;
typedef uint64_t rdtsc_t;
/* tracing ------------------------------------------------------------------ */
/* information about a raw Python frame */
struct FrameInfo {
PyCodeObject *code; /* PyCodeObject */
frameid_t back;
/* needed by older C++ map which has difficulty on zero-copy assignment
*/
FrameInfo()
{
code = NULL;
back = 0;
}
void assign(PyFrameObject *frame, frameid_t backfid)
{
back = backfid;
code = frame->f_code;
Py_XINCREF(code);
}
lineno_t line()
{
if (!code)
return 0;
return code->co_firstlineno;
}
const char *file()
{
if (!code)
return NULL;
return PyString_AsString(code->co_filename);
}
const char *name()
{
if (!code)
return NULL;
return PyString_AsString(code->co_name);
}
~FrameInfo()
{
Py_XDECREF(code);
code = NULL;
}
private:
/* forbid copy */
FrameInfo(const FrameInfo &rhs);
FrameInfo(const FrameInfo &&rhs);
};
/* samples */
struct Sample {
rdtsc_t time;
frameid_t frameid;
int op; /* PyTrace_{CALL,EXCEPTION,LINE,RETURN,C_CALL,C_EXCEPTION,C_RETURN}
*/
};
/* global state, collected frames and samples */
static std::unordered_map<frameid_t, FrameInfo> frames;
static std::list<Sample> samples;
/* for measuring wall time / rdtsc ratio */
static uint64_t t1, t2;
static rdtsc_t r1, r2;
static double rdtscratio; /* set by disable() */
/* read microseconds using std::chrono */
static uint64_t now_microseconds()
{
using namespace std::chrono;
auto now = high_resolution_clock::now();
return duration_cast<microseconds>(now.time_since_epoch()).count();
}
/* fast (best-effort) way to get wall time */
inline static rdtsc_t rdtsc()
{
#if defined(__aarch64__) && !defined(_MSC_VER) /* aarch64 fast path */
unsigned long long val;
asm volatile("mrs %0, cntvct_el0" : "=r"(val));
return val;
#elif defined(__amd64__) && !defined(_MSC_VER) /* x64 fast path */
unsigned long lo, hi;
asm volatile("rdtsc" : "=a"(lo), "=d"(hi));
return (rdtsc_t)(lo | (hi << 32));
#else /* other platform or MSVC (no inline asm support) */
return (rdtsc_t)(now_microseconds());
#endif
}
/* fast, but inaccurate hashing of a Python frame */
inline static uint64_t hashframe(PyFrameObject *frame)
{
uint64_t v = ((uint64_t)frame ^ ((uint64_t)(frame->f_back) << 16) ^
((uint64_t)frame->f_code << 32));
/* f_code is usually immutable (lsprof use its address as keys) */
return v;
}
/* hash, and store a Python frame, return its ID */
static frameid_t hashandstoreframe(PyFrameObject *frame)
{
if (!frame)
return 0;
frameid_t frameid = (frameid_t)hashframe(frame);
if (frames.count(frameid) == 0) {
frames[frameid].assign(frame, hashandstoreframe(frame->f_back));
}
return frameid;
}
/* record a sample */
inline static void recordframe(PyFrameObject *frame, int op)
{
frameid_t fid = hashandstoreframe(frame);
samples.push_back({rdtsc(), fid, op});
}
static int tracefunc(PyObject *o, PyFrameObject *frame, int op, PyObject *a)
{
(void)o;
(void)a;
recordframe(frame, op);
return 0;
}
static void enable()
{
r1 = rdtsc();
t1 = now_microseconds() / 1000;
PyEval_SetProfile((Py_tracefunc)tracefunc, NULL);
}
static void disable()
{
PyEval_SetProfile(NULL, NULL);
r2 = rdtsc();
t2 = now_microseconds() / 1000;
/* calculate rdtscratio */
rdtscratio = (t2 - t1) / (double)(r2 - r1);
}
/* reporting ---------------------------------------------------------------- */
struct FrameSummary {
rdtsc_t time;
unsigned int count;
};
static std::unordered_map<frameid_t, FrameSummary> summaries;
static std::unordered_map<frameid_t, std::list<frameid_t>> framechildren;
/* for dedup */
static std::unordered_map<frameid_t, uint64_t> fid2hash;
static std::unordered_map<uint64_t, frameid_t> hash2fid;
/* hash FrameInfo, do not be affected by frame addresses */
static uint64_t hashframeinfo(frameid_t fid)
{
if (fid == 0) {
return 0;
}
if (!fid2hash.count(fid)) {
auto &fi = frames[fid];
uint64_t v = (uint64_t)fi.code;
v ^= hashframeinfo(fi.back) << 1;
fid2hash[fid] = v;
}
return fid2hash[fid];
}
/* fill hash2fid */
static void buildframededup()
{
for (auto &s : samples) {
frameid_t fid = s.frameid;
while (fid) {
uint64_t v = hashframeinfo(fid);
if (hash2fid.count(v) == 0) {
hash2fid[v] = fid;
fid = frames[fid].back;
} else {
break;
}
}
}
}
static frameid_t dedupfid(frameid_t fid)
{
if (fid2hash.count(fid) == 0) {
return fid; /* no information available */
} else {
return hash2fid[fid2hash[fid]];
}
}
/* fill calltimes and summaries */
static void buildsummaries()
{
std::unordered_map<frameid_t, std::list<Sample *>> calls;
for (auto &s : samples) {
frameid_t fid = dedupfid(s.frameid);
if (s.op == PyTrace_CALL) {
calls[fid].push_back(&s);
} else if (s.op == PyTrace_RETURN) {
auto &entries = calls[fid];
if (entries.empty())
continue;
/* frame was entered before */
Sample *prev = entries.back();
entries.pop_back();
auto &sum = summaries[fid];
sum.count += 1;
if (entries.empty())
sum.time += s.time - prev->time;
} /* s.op */
}
}
/* fill framechildren */
static void buildframetree()
{
for (auto &s : samples) {
if (s.op != PyTrace_CALL && s.op != PyTrace_C_CALL) {
continue; /* only interested in call */
}
for (frameid_t fid = s.frameid; fid;) {
fid = dedupfid(fid);
auto &fi = frames[fid];
frameid_t pfid = dedupfid(fi.back);
auto &children = framechildren[pfid];
int existed = 0;
for (auto &c : children) {
if (c == fid) {
existed = 1;
break;
}
}
if (existed) {
break;
}
children.push_back(fid);
fid = pfid;
} /* for fid */
}
}
static std::string shortname(std::string path)
{
size_t p = path.rfind('/');
if (p == std::string::npos)
return path;
/* special handling __init__.py, include its dirname */
if (p > 0 && path.substr(p + 1, p + 1 + 11) == "__init__.py") {
p = path.rfind('/', p - 1);
if (p == std::string::npos)
return path;
}
return path.substr(p + 1);
}
/* width needed to output time (in ms) */
static int timelen()
{
static int n = 0;
if (n)
return n;
n = 1;
rdtsc_t maxframetime = 0;
for (auto &s : summaries) {
if (s.second.time > maxframetime)
maxframetime = s.second.time;
}
for (double t = (double)maxframetime * rdtscratio; t >= 10; t /= 10) {
n++;
}
return n;
}
/* printf format to output time */
static const char *timefmt()
{
static char fmt[20] = {0};
if (fmt[0] == 0) {
int n = timelen();
snprintf(fmt, sizeof fmt, "%%%d.0f", n);
}
return fmt;
}
static inline int fprintindent(FILE *fp, int indent)
{
for (int i = 0; i < indent; ++i)
fputc(' ', fp);
return (indent > 0 ? indent : 0);
}
/* config items */
static double timethreshold = 2;
static size_t countthreshold = 2;
static int dedup = 1;
static void settimethreshold(double ms)
{
timethreshold = ms;
}
static void setcountthreshold(size_t count)
{
countthreshold = count;
}
static void setdedup(int value)
{
dedup = value;
}
static void fprintframetree(FILE *fp = stderr, frameid_t fid = 0,
int indent = 0, char ch = '|')
{
auto &f = frames[fid];
auto &s = summaries[fid];
/* collect (> 2ms) child frames to print */
std::vector<frameid_t> cfids;
rdtsc_t ctotaltime = 0;
for (auto &cfid : framechildren[fid]) {
auto &cs = summaries[cfid];
if ((double)cs.time * rdtscratio >= timethreshold ||
cs.count == 0)
cfids.push_back(cfid);
ctotaltime += cs.time;
}
/* hot frame? frame time > 2 * sum(child frame time) and frame time >
* 30ms */
int hot = (s.time > ctotaltime * 2 && (double)s.time * rdtscratio > 30);
if (fid != 0) {
int ncol = 0;
/* hot symbol */
if (hot) {
ncol += fprintf(fp, "* ");
} else {
ncol += fprintf(fp, " ");
}
/* time in ms */
if (s.count > 0) {
ncol +=
fprintf(fp, timefmt(), (double)s.time * rdtscratio);
} else {
ncol +=
fprintindent(fp, timelen()); /* call not recorded */
}
/* symbol and indent */
ncol += fprintindent(fp, indent + 1);
ncol += fprintf(fp, "%c ", ch);
/* frame name */
ncol += fprintf(fp, "%s ", f.name());
/* call count */
if (s.count >= countthreshold) {
ncol += fprintf(fp, "(%d times) ", s.count);
}
/* file path */
fprintindent(fp, 48 - ncol);
std::string path = f.file();
ncol += fprintf(fp, "%s:%d", shortname(path).c_str(), f.line());
/* end of line */
fprintf(fp, "\n");
}
/* children */
indent += (ch == '\\');
if (cfids.size() > 1) {
indent += 1;
ch = '\\';
} else {
ch = '|';
}
for (auto &cfid : cfids) {
fprintframetree(fp, cfid, indent, ch);
}
}
static void clear()
{
summaries.clear();
framechildren.clear();
fid2hash.clear();
hash2fid.clear();
samples.clear();
frames.clear();
}
static void report(FILE *fp = stderr)
{
if (dedup)
buildframededup();
buildsummaries();
buildframetree();
fprintframetree(fp, dedupfid(0));
fprintf(fp, "Total time: %.0f ms\n", (double)(r2 - r1) * rdtscratio);
}

View File

@ -33,7 +33,7 @@ from cpython.object cimport PyObject
import contextlib
import gc
cdef extern from "traceprofimpl.cpp":
cdef extern from "extlib/traceprofimpl.cpp":
void enable()
void disable()
void report(FILE *)

View File

@ -5,6 +5,7 @@
# 'python setup.py --help' for more options
from distutils.version import LooseVersion
import itertools
import os
supportedpy = '~= 2.7'
@ -189,6 +190,7 @@ SHA1_LIBRARY = "sha1detectcoll"
SHA1LIB_DEFINE = "/DSHA1_USE_SHA1DC" if iswindows else "-DSHA1_USE_SHA1DC"
STDC99 = "" if iswindows else "-std=c99"
STDCPP0X = "" if iswindows else "-std=c++0x"
STDCPP11 = "" if iswindows else "-std=c++11"
WALL = "/Wall" if iswindows else "-Wall"
WERROR = "/WX" if iswindows else "-Werror"
WSTRICTPROTOTYPES = None if iswindows else "-Werror=strict-prototypes"
@ -1053,6 +1055,10 @@ extmodules += cythonize([
Extension('hgext.clindex',
sources=['hgext/clindex.pyx'],
extra_compile_args=filter(None, [STDC99, PRODUCEDEBUGSYMBOLS])),
Extension('hgext.traceprof',
sources=['hgext/traceprof.pyx'],
include_dirs=['hgext'],
extra_compile_args=filter(None, [STDCPP11, PRODUCEDEBUGSYMBOLS])),
Extension('hgext.extlib.linelog',
sources=['hgext/extlib/linelog.pyx'],
extra_compile_args=filter(None, [STDC99, PRODUCEDEBUGSYMBOLS])),