2017-06-15 22:55:17 +03:00
|
|
|
from distutils.version import LooseVersion
|
2017-05-02 19:12:56 +03:00
|
|
|
from distutils.cmd import Command
|
2016-08-11 00:26:05 +03:00
|
|
|
from distutils.core import setup, Extension
|
2017-02-24 01:03:02 +03:00
|
|
|
import distutils
|
2017-05-02 19:12:56 +03:00
|
|
|
import fnmatch
|
2016-07-08 15:15:42 +03:00
|
|
|
from glob import glob
|
|
|
|
|
2017-06-15 22:55:17 +03:00
|
|
|
import os, shutil, sys
|
2016-09-18 23:45:50 +03:00
|
|
|
|
2017-03-17 19:04:19 +03:00
|
|
|
iswindows = os.name == 'nt'
|
2017-04-06 01:00:31 +03:00
|
|
|
WERROR = "/WX" if iswindows else "-Werror"
|
|
|
|
WSTRICTPROTOTYPES = None if iswindows else "-Werror=strict-prototypes"
|
|
|
|
WALL = "/Wall" if iswindows else "-Wall"
|
|
|
|
STDC99 = "" if iswindows else "-std=c99"
|
|
|
|
STDCPP0X = "" if iswindows else "-std=c++0x"
|
|
|
|
WEXTRA = "" if iswindows else "-Wextra"
|
|
|
|
WCONVERSION = "" if iswindows else "-Wconversion"
|
|
|
|
PEDANTIC = "" if iswindows else "-pedantic"
|
2017-08-23 05:09:07 +03:00
|
|
|
SHA1LIB_DEFINE = "/DSHA1_USE_SHA1DC" if iswindows else "-DSHA1_USE_SHA1DC"
|
|
|
|
SHA1_LIBRARY = "sha1detectcoll"
|
2017-04-06 01:00:31 +03:00
|
|
|
NOOPTIMIZATION = "/Od" if iswindows else "-O0"
|
traceprof: new extension to perform accurate profiling
Summary:
Instead of sampling periodically like the default profiler `statprof`, trace
every function call so the report is accurate. The output is designed to be
similar to `statprof`.
Written in C++ and inline assembly to minimize overhead. Cython is used as a
glue layer.
Comparing to `lsprof`, the overhead is similarly small. The major difference is
this profiler cares about "frame object" and records `frame->f_back` (parent
frame) so it could draw a statprof-like tree of the callgraph. `lsprof` records
"code object" instead and could have difficulty to deal with recursive calls.
The following code could demostrate the difference:
```
def fib(x):
if x < 2:
return x
return fib(x - 1) + fib(x - 2)
# lsprof - recorded 1 code object for fib and no way to figure out callgraph
import _lsprof
p = _lsprof.Profiler()
p.enable(subcalls=True)
fib(5)
p.disable()
for s in p.getstats():
print(s)
'''output:
_lsprof.profiler_entry(code="<method 'disable' of '_lsprof.Profiler' objects>", callcount=1, reccallcount=0, totaltime=0.0, inlinetime=0.0, calls=None)
_lsprof.profiler_entry(code=<code object fib at 0x7f27ca587530, file "a.py", line 1>, callcount=15, reccallcount=14, totaltime=8e-06, inlinetime=8e-06, calls=[_lsprof.profiler_subentry(code=<code object fib at 0x7f27ca587530, file "a.py", line 1>, callcount=14, reccallcount=12, totaltime=6e-06, inlinetime=6e-06)])
'''
# traceprof (with timethreshold set to 0) - callgraph drawn correctly
import sys
from hgext3rd import traceprof
with traceprof.profile(None, sys.stderr):
fib(5)
'''output:
| <module> a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
\ __exit__ contextlib.py:21
'''
```
There are some performance / correctness tradeoffs. The current implementation
takes `frame` address and several (but not theoretically complete) fields to
hash a frame. That could in theory lead to inaccurate frame information being
recorded. But hashing all fields recursively incurs significant overhead. The
related logic is the `hashframe` function, which could be tweaked later.
This is an initial version. It probably does not cover all corner cases like
exception handling well. And it may duplicate a same frame multiple times
because of the way it hashes a frame. The duplicated frame issue could be
solved by adding post-profiling filtering though.
Here is a real-world example of `hg log -r .` on `hg-committed`:
```
| <module> hg:10
| run dispatch.py:81
| dispatch dispatch.py:132
| _runcatch dispatch.py:202
| _callcatch dispatch.py:309
| callcatch scmutil.py:133
| _runcatchfunc dispatch.py:215
| _dispatch dispatch.py:744
2 \ repository hg.py:166
2 | _peerorrepo hg.py:151
2 | instance localrepo.py:2025
2 | __init__ localrepo.py:275
863 \ runcommand dispatch.py:656
863 | _runcommand dispatch.py:926
863 | <lambda> dispatch.py:918
863 | check util.py:1059
863 | log commands.py:3262
480 \ getlogrevs cmdutil.py:2236
479 | _logrevs cmdutil.py:2190
479 | revrange scmutil.py:429
479 | anyrevs localrepo.py:622
478 | mfunc revset.py:2275
478 | __init__ (4 times) smartset.py:1033
478 | __init__ (4 times) smartset.py:919
478 | __len__ (4 times) localrepo.py:587
478 | changelog repoview.py:309
477 | filterrevs (11 times) repoview.py:260
477 | computehidden repoview.py:171
455 \ hideablerevs repoview.py:24
455 | getrevs obsolete.py:1114
455 | _computeobsoleteset obsolete.py:1143
3 \ __get__ (2 times) localrepo.py:76
7 \ node (5760 times) changelog.py:359
4 | node (8938 times) revlog.py:482
9 \ __get__ (5760 times) localrepo.py:76
428 \ __get__ util.py:798
428 | successors obsolete.py:672
225 \ __get__ util.py:798
224 | _all obsolete.py:662
6 \ tryread vfs.py:32
6 | read vfs.py:60
200 \ _readmarkers obsolete.py:442
* 199 | _fm1readmarkers obsolete.py:430
15 \ _checkinvalidmarkers obsolete.py:523
* 203 \ _addsuccessors obsolete.py:504
3 \ tryreadcache repoview.py:166
3 | cachehash repoview.py:98
3 | heads localrepo.py:1884
17 \ <genexpr> (5225 times) repoview.py:191
15 | __contains__ (5226 times) ancestor.py:334
5 | parentrevs (4010 times) changelog.py:371
2 | parentrevs (4332 times) revlog.py:479
9 \ pager ui.py:843
9 | _runpager ui.py:906
8 | __init__ subprocess.py:330
8 | _execute_child subprocess.py:880
5 | _eintr_retry_call subprocess.py:118
374 \ show cmdutil.py:1308
374 | _show cmdutil.py:1316
365 \ _changesetlabels cmdutil.py:1266
365 | troubled context.py:225
12 \ unstable (2 times) context.py:207
12 | getrevs obsolete.py:1114
12 | _computeunstableset obsolete.py:1154
3 \ set (322 times) localrepo.py:610
3 \ revs (3 times) localrepo.py:593
8 \ __getitem__ (963 times) localrepo.py:566
4 \ parents (321 times) context.py:246
4 | __get__ (321 times) util.py:798
4 | _parents (321 times) context.py:562
3 \ <genexpr> (642 times) obsolete.py:1164
3 | obsolete (321 times) context.py:199
2 | getrevs (321 times) obsolete.py:1114
319 \ bumped (2 times) context.py:211
319 | getrevs obsolete.py:1114
319 | _computebumpedset obsolete.py:1181
4 \ set (322 times) localrepo.py:610
312 \ allprecursors (2021 times) obsolete.py:850
308 | __get__ util.py:798
308 | precursors obsolete.py:678
* 308 | _addprecursors obsolete.py:509
34 \ divergent (2 times) context.py:218
34 | getrevs obsolete.py:1114
33 | _computedivergentset obsolete.py:1204
4 \ set (322 times) localrepo.py:610
24 \ successorssets (1368 times) obsolete.py:899
3 \ __get__ (1368 times) localrepo.py:76
4 \ __contains__ (200 times) localrepo.py:575
9 \ names (2 times) namespaces.py:184
9 | <lambda> namespaces.py:43
9 | nodetags localrepo.py:741
8 | __get__ util.py:798
8 | _tagscache localrepo.py:646
8 | _findtags localrepo.py:685
7 | findglobaltags tags.py:170
7 | _readtagcache tags.py:370
6 | filteredhash scmutil.py:307
\ __exit__ contextlib.py:21
| maybeprofile profiling.py:199
| __exit__ contextlib.py:21
| profile profiling.py:148
| __exit__ contextlib.py:21
Total time: 867 ms
```
Some example conclusions from reading the above output include:
1. `_fm1readmarkers`, `_addsuccessors`, `_addprecursors` are slow
2. `changelog.node` and `changelog.parentrevs` are called thousands of times,
but they are just a few milliseconds.
3. Reading the `obsstore` file (`vfs.read`) takes 5ms exactly
While `statprof` may get 1 right, `lsprof` may get 2 right, neither of them is
helpful for 3. `statprof` is not accurate enough to be confident about 5ms,
`lsprof` will include other `vfs.read` calls so it's impossible to see just the
read for obsstore.
Test Plan:
With the upstream [patch](https://patchwork.mercurial-scm.org/patch/20822/), run:
```
HGPROF=traceprof hg id --config extensions.traceprof= --profile
```
Reviewers: #mercurial, rmcelroy
Reviewed By: rmcelroy
Subscribers: rmcelroy, mjpieters, jiangyuan
Differential Revision: https://phabricator.intern.facebook.com/D5104851
Signature: t1:5104851:1495480386:666847ea08bb6a94658bc10d7f0e91c225d56836
2017-05-23 01:49:45 +03:00
|
|
|
OPTIMIZATION = "" if iswindows else "-O2"
|
2017-04-06 01:00:31 +03:00
|
|
|
PRODUCEDEBUGSYMBOLS = "/DEBUG:FULL" if iswindows else "-g"
|
|
|
|
|
2017-06-15 22:55:17 +03:00
|
|
|
# whether to use Cython to recompile .pyx to .c/.cpp at build time.
|
|
|
|
# if False, fallback to .c/.cpp in the repo and .pyx files are ignored.
|
|
|
|
# if True, re-compile .c/.cpp from .pyx files, require cython at build time.
|
|
|
|
if 'USECYTHON' in os.environ:
|
|
|
|
USECYTHON = int(os.environ['USECYTHON'])
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
import Cython
|
|
|
|
except ImportError:
|
|
|
|
USECYTHON = False
|
|
|
|
else:
|
|
|
|
USECYTHON = (LooseVersion(Cython.__version__) >= LooseVersion('0.22'))
|
|
|
|
|
2016-10-08 19:08:29 +03:00
|
|
|
# --component allows the caller to specify what components they want. We can't
|
|
|
|
# use argparse like normal, because setup() at the bottom has it's own argument
|
|
|
|
# logic.
|
|
|
|
components = []
|
|
|
|
args = []
|
|
|
|
skip = False
|
|
|
|
for i, arg in enumerate(sys.argv):
|
|
|
|
if skip:
|
|
|
|
skip = False
|
|
|
|
continue
|
|
|
|
|
|
|
|
if arg == '--component' and len(sys.argv) > i + 1:
|
|
|
|
components.extend(sys.argv[i + 1].split(','))
|
|
|
|
skip = True
|
|
|
|
else:
|
|
|
|
args.append(arg)
|
|
|
|
|
|
|
|
sys.argv = args
|
2016-07-22 22:44:59 +03:00
|
|
|
|
2017-08-23 05:09:07 +03:00
|
|
|
cflags = [SHA1LIB_DEFINE]
|
2017-02-24 01:03:03 +03:00
|
|
|
|
2016-09-18 23:45:50 +03:00
|
|
|
# if this is set, compile all C extensions with -O0 -g for easy debugging. note
|
|
|
|
# that this is not manifested in any way in the Makefile dependencies.
|
|
|
|
# therefore, if you already have build products, they won't be rebuilt!
|
|
|
|
if os.getenv('FB_HGEXT_CDEBUG') is not None:
|
2017-04-06 01:00:31 +03:00
|
|
|
cflags.extend([NOOPTIMIZATION, PRODUCEDEBUGSYMBOLS])
|
2016-09-18 23:45:50 +03:00
|
|
|
else:
|
2017-04-06 01:00:31 +03:00
|
|
|
cflags.append(WERROR)
|
2016-09-18 23:45:50 +03:00
|
|
|
|
[setup.py] support parsing INCLUDE_DIRS and LIBRARY_DIRS environment variables
Summary:
Update the code to read additional include and library directories from the
INCLUDE_DIRS and LIBRARY_DIRS environment variables, if they are set.
This allows us to build the extensions using custom include/library paths.
While these could have been set through the CFLAGS and LDFLAGS variables,
distutils already includes many default settings in CFLAGS and LDFLAGS by
default (taken from the settings used to build python), and setting CFLAGS and
LDFLAGS ourself would override these defaults. We do still want all of the
flags in CFLAGS and LDFLAGS by default, so it is easier to add support for
separate INCLUDE_DIRS and LIBRARY_DIRS variables.
Test Plan: Tested building fb-mercurial-omnibus on CentOS.
Reviewers: ikostia, simonfar, ttung, durham, mbolin, wez, quark
Reviewed By: quark
Subscribers: ttung, net-systems-diffs@, yogeshwer, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3916198
Signature: t1:3916198:1475091590:97904ef87a8a2b78e178c87d9d49bdadcce94e44
2016-09-29 02:08:21 +03:00
|
|
|
def get_env_path_list(var_name, default=None):
|
|
|
|
'''Get a path list from an environment variable. The variable is parsed as
|
|
|
|
a colon-separated list.'''
|
|
|
|
value = os.environ.get(var_name)
|
|
|
|
if not value:
|
|
|
|
return default
|
|
|
|
return value.split(os.path.pathsep)
|
|
|
|
|
|
|
|
include_dirs = get_env_path_list('INCLUDE_DIRS')
|
|
|
|
library_dirs = get_env_path_list('LIBRARY_DIRS')
|
|
|
|
|
2017-05-08 17:53:23 +03:00
|
|
|
def filter_existing_dirs(dirs):
|
|
|
|
'''Filters the given list and keeps only existing directory names.'''
|
|
|
|
return [d for d in dirs if os.path.isdir(d)]
|
|
|
|
|
[setup.py] support parsing INCLUDE_DIRS and LIBRARY_DIRS environment variables
Summary:
Update the code to read additional include and library directories from the
INCLUDE_DIRS and LIBRARY_DIRS environment variables, if they are set.
This allows us to build the extensions using custom include/library paths.
While these could have been set through the CFLAGS and LDFLAGS variables,
distutils already includes many default settings in CFLAGS and LDFLAGS by
default (taken from the settings used to build python), and setting CFLAGS and
LDFLAGS ourself would override these defaults. We do still want all of the
flags in CFLAGS and LDFLAGS by default, so it is easier to add support for
separate INCLUDE_DIRS and LIBRARY_DIRS variables.
Test Plan: Tested building fb-mercurial-omnibus on CentOS.
Reviewers: ikostia, simonfar, ttung, durham, mbolin, wez, quark
Reviewed By: quark
Subscribers: ttung, net-systems-diffs@, yogeshwer, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3916198
Signature: t1:3916198:1475091590:97904ef87a8a2b78e178c87d9d49bdadcce94e44
2016-09-29 02:08:21 +03:00
|
|
|
# Historical default values.
|
|
|
|
# We should perhaps clean these up in the future after verifying that it
|
|
|
|
# doesn't break the build on any platforms.
|
|
|
|
#
|
|
|
|
# The /usr/local/* directories shouldn't actually be needed--the compiler
|
|
|
|
# should already use these directories when appropriate (e.g., if we are
|
|
|
|
# using the standard system compiler that has them in its default paths).
|
|
|
|
#
|
|
|
|
# The /opt/local paths may be necessary on Darwin builds.
|
|
|
|
if include_dirs is None:
|
2017-04-06 01:00:31 +03:00
|
|
|
if iswindows:
|
|
|
|
include_dirs = []
|
|
|
|
else:
|
2017-05-08 17:53:23 +03:00
|
|
|
include_dirs = filter_existing_dirs([
|
2017-04-06 01:00:31 +03:00
|
|
|
'/usr/local/include',
|
|
|
|
'/opt/local/include',
|
|
|
|
'/opt/homebrew/include/',
|
2017-05-08 17:53:23 +03:00
|
|
|
])
|
2017-02-24 01:03:02 +03:00
|
|
|
|
|
|
|
def distutils_dir_name(dname):
|
|
|
|
"""Returns the name of a distutils build directory"""
|
|
|
|
f = "{dirname}.{platform}-{version}"
|
|
|
|
return f.format(dirname=dname,
|
|
|
|
platform=distutils.util.get_platform(),
|
|
|
|
version=sys.version[:3])
|
|
|
|
|
[setup.py] support parsing INCLUDE_DIRS and LIBRARY_DIRS environment variables
Summary:
Update the code to read additional include and library directories from the
INCLUDE_DIRS and LIBRARY_DIRS environment variables, if they are set.
This allows us to build the extensions using custom include/library paths.
While these could have been set through the CFLAGS and LDFLAGS variables,
distutils already includes many default settings in CFLAGS and LDFLAGS by
default (taken from the settings used to build python), and setting CFLAGS and
LDFLAGS ourself would override these defaults. We do still want all of the
flags in CFLAGS and LDFLAGS by default, so it is easier to add support for
separate INCLUDE_DIRS and LIBRARY_DIRS variables.
Test Plan: Tested building fb-mercurial-omnibus on CentOS.
Reviewers: ikostia, simonfar, ttung, durham, mbolin, wez, quark
Reviewed By: quark
Subscribers: ttung, net-systems-diffs@, yogeshwer, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3916198
Signature: t1:3916198:1475091590:97904ef87a8a2b78e178c87d9d49bdadcce94e44
2016-09-29 02:08:21 +03:00
|
|
|
if library_dirs is None:
|
2017-04-06 01:00:31 +03:00
|
|
|
if iswindows:
|
|
|
|
library_dirs = []
|
|
|
|
else:
|
2017-05-08 17:53:23 +03:00
|
|
|
library_dirs = filter_existing_dirs([
|
2017-04-06 01:00:31 +03:00
|
|
|
'/usr/local/lib',
|
|
|
|
'/opt/local/lib',
|
|
|
|
'/opt/homebrew/lib/',
|
2017-05-08 17:53:23 +03:00
|
|
|
])
|
2017-04-06 01:00:31 +03:00
|
|
|
library_dirs.append('build/' + distutils_dir_name('lib'))
|
[setup.py] support parsing INCLUDE_DIRS and LIBRARY_DIRS environment variables
Summary:
Update the code to read additional include and library directories from the
INCLUDE_DIRS and LIBRARY_DIRS environment variables, if they are set.
This allows us to build the extensions using custom include/library paths.
While these could have been set through the CFLAGS and LDFLAGS variables,
distutils already includes many default settings in CFLAGS and LDFLAGS by
default (taken from the settings used to build python), and setting CFLAGS and
LDFLAGS ourself would override these defaults. We do still want all of the
flags in CFLAGS and LDFLAGS by default, so it is easier to add support for
separate INCLUDE_DIRS and LIBRARY_DIRS variables.
Test Plan: Tested building fb-mercurial-omnibus on CentOS.
Reviewers: ikostia, simonfar, ttung, durham, mbolin, wez, quark
Reviewed By: quark
Subscribers: ttung, net-systems-diffs@, yogeshwer, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3916198
Signature: t1:3916198:1475091590:97904ef87a8a2b78e178c87d9d49bdadcce94e44
2016-09-29 02:08:21 +03:00
|
|
|
|
2017-02-24 01:03:02 +03:00
|
|
|
# Override the default c static library building code in distutils since it
|
|
|
|
# doesn't pass enough args, like libraries and extra args.
|
|
|
|
import distutils.command.build_clib
|
|
|
|
from distutils.errors import DistutilsSetupError
|
|
|
|
def build_libraries(self, libraries):
|
|
|
|
for (lib_name, build_info) in libraries:
|
|
|
|
sources = build_info.get('sources')
|
|
|
|
if sources is None or not isinstance(sources, (list, tuple)):
|
|
|
|
raise DistutilsSetupError(
|
|
|
|
"in 'libraries' option (library '%s'), " +
|
|
|
|
"'sources' must be present and must be " +
|
|
|
|
"a list of source filenames") % lib_name
|
|
|
|
sources = list(sources)
|
|
|
|
|
|
|
|
# First, compile the source code to object files in the library
|
|
|
|
# directory. (This should probably change to putting object
|
|
|
|
# files in a temporary build directory.)
|
|
|
|
macros = build_info.get('macros')
|
|
|
|
include_dirs = build_info.get('include_dirs')
|
|
|
|
extra_args = build_info.get('extra_args')
|
|
|
|
objects = self.compiler.compile(sources,
|
|
|
|
output_dir=self.build_temp,
|
|
|
|
macros=macros,
|
|
|
|
include_dirs=include_dirs,
|
|
|
|
debug=self.debug,
|
|
|
|
extra_postargs=extra_args)
|
|
|
|
|
|
|
|
# Now "link" the object files together into a static library.
|
|
|
|
# (On Unix at least, this isn't really linking -- it just
|
|
|
|
# builds an archive. Whatever.)
|
2017-02-24 01:03:03 +03:00
|
|
|
libraries = build_info.get('libraries', [])
|
2017-02-24 01:03:02 +03:00
|
|
|
for lib in libraries:
|
|
|
|
self.compiler.add_library(lib)
|
|
|
|
self.compiler.create_static_lib(objects, lib_name,
|
|
|
|
output_dir=self.build_clib,
|
|
|
|
debug=self.debug)
|
|
|
|
distutils.command.build_clib.build_clib.build_libraries = build_libraries
|
|
|
|
|
|
|
|
# Static c libaries
|
2017-03-17 19:04:19 +03:00
|
|
|
if iswindows:
|
2017-05-06 01:30:00 +03:00
|
|
|
availablelibraries = {}
|
2017-03-17 19:04:19 +03:00
|
|
|
else:
|
2017-05-06 01:30:00 +03:00
|
|
|
availablelibraries = {
|
|
|
|
'datapack': {
|
2017-03-17 19:04:19 +03:00
|
|
|
"sources" : ["cdatapack/cdatapack.c"],
|
2017-08-26 02:46:07 +03:00
|
|
|
"include_dirs" : ["."] + include_dirs,
|
2017-08-23 05:09:07 +03:00
|
|
|
"libraries" : ["lz4", SHA1_LIBRARY],
|
2017-04-06 01:00:31 +03:00
|
|
|
"extra_args" : filter(None,
|
|
|
|
[STDC99, WALL, WERROR, WSTRICTPROTOTYPES] + cflags),
|
2017-05-06 01:30:00 +03:00
|
|
|
},
|
|
|
|
'mpatch': {
|
2017-08-26 02:46:07 +03:00
|
|
|
"sources": ["cstore/mpatch.c"],
|
|
|
|
"include_dirs" : ["."] + include_dirs,
|
2017-05-06 01:30:00 +03:00
|
|
|
},
|
2017-08-23 05:09:07 +03:00
|
|
|
"sha1detectcoll": {
|
|
|
|
"sources" : [
|
|
|
|
"third-party/sha1dc/sha1.c",
|
|
|
|
"third-party/sha1dc/ubc_check.c",
|
|
|
|
],
|
|
|
|
"include_dirs" : ["third-party"] + include_dirs,
|
sha1: switch to new implementation
Summary:
This diff changes our code to use the new SHA1 library. See the previous diff
for why we do this.
Test Plan:
Run related tests manually:
```
$ make local PYTHON=python2
$ rt test-remotefilelog-*.t
.........................
# Ran 25 tests, 0 skipped, 0 warned, 0 failed.
$ rt test-treemanifest*.t
........
# Ran 8 tests, 0 skipped, 0 warned, 0 failed.
$ rt test-fastmanifest*.t
.........
# Ran 9 tests, 0 skipped, 0 warned, 0 failed.
```
Reviewers: #sourcecontrol, durham
Reviewed By: durham
Subscribers: mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D4945025
Signature: t1:4945025:1493154873:844e55a51ab250354fc08163e0949eed47b0a861
2017-04-26 00:53:32 +03:00
|
|
|
"extra_args" : filter(None,
|
|
|
|
[STDC99, WALL, WERROR, WSTRICTPROTOTYPES] + cflags),
|
2017-05-06 01:30:00 +03:00
|
|
|
},
|
|
|
|
}
|
2017-02-24 01:03:02 +03:00
|
|
|
|
2017-04-03 20:40:31 +03:00
|
|
|
# modules that are single files in hgext3rd
|
2016-10-08 19:08:29 +03:00
|
|
|
hgext3rd = [
|
|
|
|
p[:-3].replace('/', '.')
|
|
|
|
for p in glob('hgext3rd/*.py')
|
|
|
|
if p != 'hgext3rd/__init__.py'
|
|
|
|
]
|
|
|
|
|
2017-04-04 21:48:27 +03:00
|
|
|
# packages that are directories in hgext3rd
|
|
|
|
hgext3rdpkgs = [
|
2017-04-03 20:40:31 +03:00
|
|
|
p[:-12].replace('/', '.')
|
|
|
|
for p in glob('hgext3rd/*/__init__.py')
|
2017-04-04 21:48:27 +03:00
|
|
|
]
|
2017-04-03 20:40:31 +03:00
|
|
|
|
2017-05-23 21:46:25 +03:00
|
|
|
availablepymodules = hgext3rd
|
2016-10-08 19:08:29 +03:00
|
|
|
|
2017-04-04 21:48:27 +03:00
|
|
|
availablepackages = hgext3rdpkgs + [
|
2016-10-08 19:08:29 +03:00
|
|
|
'infinitepush',
|
|
|
|
'phabricator',
|
|
|
|
'sqldirstate',
|
|
|
|
'remotefilelog',
|
|
|
|
]
|
|
|
|
|
2017-05-19 07:12:26 +03:00
|
|
|
if iswindows:
|
|
|
|
availablepackages += [
|
|
|
|
'linelog',
|
|
|
|
]
|
|
|
|
else:
|
2017-03-17 19:04:19 +03:00
|
|
|
availablepackages += [
|
|
|
|
'fastmanifest',
|
|
|
|
'treemanifest',
|
|
|
|
'linelog',
|
|
|
|
]
|
|
|
|
|
2017-02-24 01:03:03 +03:00
|
|
|
def distutils_dir_name(dname):
|
|
|
|
"""Returns the name of a distutils build directory"""
|
|
|
|
f = "{dirname}.{platform}-{version}"
|
|
|
|
return f.format(dirname=dname,
|
|
|
|
platform=distutils.util.get_platform(),
|
|
|
|
version=sys.version[:3])
|
|
|
|
|
2017-03-17 19:04:19 +03:00
|
|
|
if iswindows:
|
2016-12-14 04:20:13 +03:00
|
|
|
# The modules that successfully compile on Windows
|
2017-05-19 07:12:26 +03:00
|
|
|
availableextmodules = {
|
|
|
|
'linelog' : [
|
|
|
|
Extension('linelog',
|
2017-06-16 20:25:28 +03:00
|
|
|
sources=['linelog/pyext/linelog.pyx'],
|
2017-05-19 07:12:26 +03:00
|
|
|
extra_compile_args=filter(None, [
|
|
|
|
STDC99, WALL, WEXTRA, WCONVERSION, PEDANTIC,
|
|
|
|
]),
|
|
|
|
),
|
|
|
|
],
|
|
|
|
}
|
2016-12-14 04:20:13 +03:00
|
|
|
else:
|
|
|
|
availableextmodules = {
|
2017-02-24 01:03:02 +03:00
|
|
|
'cstore' : [
|
|
|
|
Extension('cstore',
|
2016-12-14 04:20:13 +03:00
|
|
|
sources=[
|
2017-02-24 01:03:03 +03:00
|
|
|
'cstore/datapackstore.cpp',
|
2017-02-24 01:03:02 +03:00
|
|
|
'cstore/py-cstore.cpp',
|
2017-03-07 22:39:46 +03:00
|
|
|
'cstore/pythonutil.cpp',
|
2017-02-24 01:03:03 +03:00
|
|
|
'cstore/uniondatapackstore.cpp',
|
2016-12-14 04:20:13 +03:00
|
|
|
'ctreemanifest/manifest.cpp',
|
|
|
|
'ctreemanifest/manifest_entry.cpp',
|
|
|
|
'ctreemanifest/manifest_fetcher.cpp',
|
2017-08-26 02:46:07 +03:00
|
|
|
'ctreemanifest/manifest_ptr.cpp',
|
2016-12-14 04:20:13 +03:00
|
|
|
'ctreemanifest/treemanifest.cpp',
|
|
|
|
],
|
|
|
|
include_dirs=[
|
2017-08-23 05:09:07 +03:00
|
|
|
'.',
|
2017-08-23 05:09:07 +03:00
|
|
|
'third-party',
|
2016-12-14 04:20:13 +03:00
|
|
|
] + include_dirs,
|
2017-02-24 01:03:03 +03:00
|
|
|
library_dirs=[
|
|
|
|
'build/' + distutils_dir_name('lib'),
|
|
|
|
] + library_dirs,
|
2016-12-14 04:20:13 +03:00
|
|
|
libraries=[
|
2017-02-24 01:03:03 +03:00
|
|
|
'datapack',
|
|
|
|
'lz4',
|
2017-05-09 04:35:53 +03:00
|
|
|
'mpatch',
|
2017-08-23 05:09:07 +03:00
|
|
|
SHA1_LIBRARY,
|
2016-12-14 04:20:13 +03:00
|
|
|
],
|
2017-04-06 01:00:31 +03:00
|
|
|
extra_compile_args=filter(None, [STDCPP0X, WALL] + cflags),
|
2016-12-14 04:20:13 +03:00
|
|
|
),
|
|
|
|
],
|
|
|
|
'cfastmanifest' : [
|
|
|
|
Extension('cfastmanifest',
|
|
|
|
sources=['cfastmanifest.c',
|
|
|
|
'cfastmanifest/bsearch.c',
|
|
|
|
'clib/buffer.c',
|
|
|
|
'cfastmanifest/checksum.c',
|
|
|
|
'cfastmanifest/node.c',
|
|
|
|
'cfastmanifest/tree.c',
|
|
|
|
'cfastmanifest/tree_arena.c',
|
|
|
|
'cfastmanifest/tree_convert.c',
|
|
|
|
'cfastmanifest/tree_copy.c',
|
|
|
|
'cfastmanifest/tree_diff.c',
|
|
|
|
'cfastmanifest/tree_disk.c',
|
|
|
|
'cfastmanifest/tree_iterator.c',
|
|
|
|
'cfastmanifest/tree_path.c',
|
|
|
|
],
|
|
|
|
include_dirs=[
|
2017-08-23 05:09:07 +03:00
|
|
|
'.',
|
2016-12-14 04:20:13 +03:00
|
|
|
'cfastmanifest',
|
|
|
|
'clib',
|
2017-08-23 05:09:07 +03:00
|
|
|
'third-party',
|
2016-12-14 04:20:13 +03:00
|
|
|
] + include_dirs,
|
|
|
|
library_dirs=library_dirs,
|
2017-08-23 05:09:07 +03:00
|
|
|
libraries=[SHA1_LIBRARY],
|
2017-04-06 01:00:31 +03:00
|
|
|
extra_compile_args=filter(None, [
|
|
|
|
STDC99,
|
|
|
|
WALL,
|
|
|
|
WSTRICTPROTOTYPES,
|
|
|
|
] + cflags),
|
2016-12-14 04:20:13 +03:00
|
|
|
),
|
|
|
|
],
|
|
|
|
'linelog' : [
|
|
|
|
Extension('linelog',
|
|
|
|
sources=['linelog/pyext/linelog.pyx'],
|
2017-04-06 01:00:31 +03:00
|
|
|
extra_compile_args=filter(None, [
|
|
|
|
STDC99, WALL, WEXTRA, WCONVERSION, PEDANTIC,
|
|
|
|
]),
|
2016-12-14 04:20:13 +03:00
|
|
|
),
|
|
|
|
],
|
2017-03-16 04:55:48 +03:00
|
|
|
'patchrmdir': [
|
|
|
|
Extension('hgext3rd.patchrmdir',
|
|
|
|
sources=['hgext3rd/patchrmdir.pyx'],
|
2017-04-06 01:00:31 +03:00
|
|
|
extra_compile_args=filter(None, [
|
|
|
|
STDC99, WALL, WEXTRA, WCONVERSION, PEDANTIC,
|
|
|
|
]),
|
2017-03-16 04:55:48 +03:00
|
|
|
),
|
|
|
|
],
|
traceprof: new extension to perform accurate profiling
Summary:
Instead of sampling periodically like the default profiler `statprof`, trace
every function call so the report is accurate. The output is designed to be
similar to `statprof`.
Written in C++ and inline assembly to minimize overhead. Cython is used as a
glue layer.
Comparing to `lsprof`, the overhead is similarly small. The major difference is
this profiler cares about "frame object" and records `frame->f_back` (parent
frame) so it could draw a statprof-like tree of the callgraph. `lsprof` records
"code object" instead and could have difficulty to deal with recursive calls.
The following code could demostrate the difference:
```
def fib(x):
if x < 2:
return x
return fib(x - 1) + fib(x - 2)
# lsprof - recorded 1 code object for fib and no way to figure out callgraph
import _lsprof
p = _lsprof.Profiler()
p.enable(subcalls=True)
fib(5)
p.disable()
for s in p.getstats():
print(s)
'''output:
_lsprof.profiler_entry(code="<method 'disable' of '_lsprof.Profiler' objects>", callcount=1, reccallcount=0, totaltime=0.0, inlinetime=0.0, calls=None)
_lsprof.profiler_entry(code=<code object fib at 0x7f27ca587530, file "a.py", line 1>, callcount=15, reccallcount=14, totaltime=8e-06, inlinetime=8e-06, calls=[_lsprof.profiler_subentry(code=<code object fib at 0x7f27ca587530, file "a.py", line 1>, callcount=14, reccallcount=12, totaltime=6e-06, inlinetime=6e-06)])
'''
# traceprof (with timethreshold set to 0) - callgraph drawn correctly
import sys
from hgext3rd import traceprof
with traceprof.profile(None, sys.stderr):
fib(5)
'''output:
| <module> a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
\ __exit__ contextlib.py:21
'''
```
There are some performance / correctness tradeoffs. The current implementation
takes `frame` address and several (but not theoretically complete) fields to
hash a frame. That could in theory lead to inaccurate frame information being
recorded. But hashing all fields recursively incurs significant overhead. The
related logic is the `hashframe` function, which could be tweaked later.
This is an initial version. It probably does not cover all corner cases like
exception handling well. And it may duplicate a same frame multiple times
because of the way it hashes a frame. The duplicated frame issue could be
solved by adding post-profiling filtering though.
Here is a real-world example of `hg log -r .` on `hg-committed`:
```
| <module> hg:10
| run dispatch.py:81
| dispatch dispatch.py:132
| _runcatch dispatch.py:202
| _callcatch dispatch.py:309
| callcatch scmutil.py:133
| _runcatchfunc dispatch.py:215
| _dispatch dispatch.py:744
2 \ repository hg.py:166
2 | _peerorrepo hg.py:151
2 | instance localrepo.py:2025
2 | __init__ localrepo.py:275
863 \ runcommand dispatch.py:656
863 | _runcommand dispatch.py:926
863 | <lambda> dispatch.py:918
863 | check util.py:1059
863 | log commands.py:3262
480 \ getlogrevs cmdutil.py:2236
479 | _logrevs cmdutil.py:2190
479 | revrange scmutil.py:429
479 | anyrevs localrepo.py:622
478 | mfunc revset.py:2275
478 | __init__ (4 times) smartset.py:1033
478 | __init__ (4 times) smartset.py:919
478 | __len__ (4 times) localrepo.py:587
478 | changelog repoview.py:309
477 | filterrevs (11 times) repoview.py:260
477 | computehidden repoview.py:171
455 \ hideablerevs repoview.py:24
455 | getrevs obsolete.py:1114
455 | _computeobsoleteset obsolete.py:1143
3 \ __get__ (2 times) localrepo.py:76
7 \ node (5760 times) changelog.py:359
4 | node (8938 times) revlog.py:482
9 \ __get__ (5760 times) localrepo.py:76
428 \ __get__ util.py:798
428 | successors obsolete.py:672
225 \ __get__ util.py:798
224 | _all obsolete.py:662
6 \ tryread vfs.py:32
6 | read vfs.py:60
200 \ _readmarkers obsolete.py:442
* 199 | _fm1readmarkers obsolete.py:430
15 \ _checkinvalidmarkers obsolete.py:523
* 203 \ _addsuccessors obsolete.py:504
3 \ tryreadcache repoview.py:166
3 | cachehash repoview.py:98
3 | heads localrepo.py:1884
17 \ <genexpr> (5225 times) repoview.py:191
15 | __contains__ (5226 times) ancestor.py:334
5 | parentrevs (4010 times) changelog.py:371
2 | parentrevs (4332 times) revlog.py:479
9 \ pager ui.py:843
9 | _runpager ui.py:906
8 | __init__ subprocess.py:330
8 | _execute_child subprocess.py:880
5 | _eintr_retry_call subprocess.py:118
374 \ show cmdutil.py:1308
374 | _show cmdutil.py:1316
365 \ _changesetlabels cmdutil.py:1266
365 | troubled context.py:225
12 \ unstable (2 times) context.py:207
12 | getrevs obsolete.py:1114
12 | _computeunstableset obsolete.py:1154
3 \ set (322 times) localrepo.py:610
3 \ revs (3 times) localrepo.py:593
8 \ __getitem__ (963 times) localrepo.py:566
4 \ parents (321 times) context.py:246
4 | __get__ (321 times) util.py:798
4 | _parents (321 times) context.py:562
3 \ <genexpr> (642 times) obsolete.py:1164
3 | obsolete (321 times) context.py:199
2 | getrevs (321 times) obsolete.py:1114
319 \ bumped (2 times) context.py:211
319 | getrevs obsolete.py:1114
319 | _computebumpedset obsolete.py:1181
4 \ set (322 times) localrepo.py:610
312 \ allprecursors (2021 times) obsolete.py:850
308 | __get__ util.py:798
308 | precursors obsolete.py:678
* 308 | _addprecursors obsolete.py:509
34 \ divergent (2 times) context.py:218
34 | getrevs obsolete.py:1114
33 | _computedivergentset obsolete.py:1204
4 \ set (322 times) localrepo.py:610
24 \ successorssets (1368 times) obsolete.py:899
3 \ __get__ (1368 times) localrepo.py:76
4 \ __contains__ (200 times) localrepo.py:575
9 \ names (2 times) namespaces.py:184
9 | <lambda> namespaces.py:43
9 | nodetags localrepo.py:741
8 | __get__ util.py:798
8 | _tagscache localrepo.py:646
8 | _findtags localrepo.py:685
7 | findglobaltags tags.py:170
7 | _readtagcache tags.py:370
6 | filteredhash scmutil.py:307
\ __exit__ contextlib.py:21
| maybeprofile profiling.py:199
| __exit__ contextlib.py:21
| profile profiling.py:148
| __exit__ contextlib.py:21
Total time: 867 ms
```
Some example conclusions from reading the above output include:
1. `_fm1readmarkers`, `_addsuccessors`, `_addprecursors` are slow
2. `changelog.node` and `changelog.parentrevs` are called thousands of times,
but they are just a few milliseconds.
3. Reading the `obsstore` file (`vfs.read`) takes 5ms exactly
While `statprof` may get 1 right, `lsprof` may get 2 right, neither of them is
helpful for 3. `statprof` is not accurate enough to be confident about 5ms,
`lsprof` will include other `vfs.read` calls so it's impossible to see just the
read for obsstore.
Test Plan:
With the upstream [patch](https://patchwork.mercurial-scm.org/patch/20822/), run:
```
HGPROF=traceprof hg id --config extensions.traceprof= --profile
```
Reviewers: #mercurial, rmcelroy
Reviewed By: rmcelroy
Subscribers: rmcelroy, mjpieters, jiangyuan
Differential Revision: https://phabricator.intern.facebook.com/D5104851
Signature: t1:5104851:1495480386:666847ea08bb6a94658bc10d7f0e91c225d56836
2017-05-23 01:49:45 +03:00
|
|
|
'traceprof': [
|
|
|
|
Extension('hgext3rd.traceprof',
|
|
|
|
sources=['hgext3rd/traceprof.pyx'],
|
2017-08-26 02:46:07 +03:00
|
|
|
include_dirs=['hgext3rd'],
|
traceprof: new extension to perform accurate profiling
Summary:
Instead of sampling periodically like the default profiler `statprof`, trace
every function call so the report is accurate. The output is designed to be
similar to `statprof`.
Written in C++ and inline assembly to minimize overhead. Cython is used as a
glue layer.
Comparing to `lsprof`, the overhead is similarly small. The major difference is
this profiler cares about "frame object" and records `frame->f_back` (parent
frame) so it could draw a statprof-like tree of the callgraph. `lsprof` records
"code object" instead and could have difficulty to deal with recursive calls.
The following code could demostrate the difference:
```
def fib(x):
if x < 2:
return x
return fib(x - 1) + fib(x - 2)
# lsprof - recorded 1 code object for fib and no way to figure out callgraph
import _lsprof
p = _lsprof.Profiler()
p.enable(subcalls=True)
fib(5)
p.disable()
for s in p.getstats():
print(s)
'''output:
_lsprof.profiler_entry(code="<method 'disable' of '_lsprof.Profiler' objects>", callcount=1, reccallcount=0, totaltime=0.0, inlinetime=0.0, calls=None)
_lsprof.profiler_entry(code=<code object fib at 0x7f27ca587530, file "a.py", line 1>, callcount=15, reccallcount=14, totaltime=8e-06, inlinetime=8e-06, calls=[_lsprof.profiler_subentry(code=<code object fib at 0x7f27ca587530, file "a.py", line 1>, callcount=14, reccallcount=12, totaltime=6e-06, inlinetime=6e-06)])
'''
# traceprof (with timethreshold set to 0) - callgraph drawn correctly
import sys
from hgext3rd import traceprof
with traceprof.profile(None, sys.stderr):
fib(5)
'''output:
| <module> a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
\ __exit__ contextlib.py:21
'''
```
There are some performance / correctness tradeoffs. The current implementation
takes `frame` address and several (but not theoretically complete) fields to
hash a frame. That could in theory lead to inaccurate frame information being
recorded. But hashing all fields recursively incurs significant overhead. The
related logic is the `hashframe` function, which could be tweaked later.
This is an initial version. It probably does not cover all corner cases like
exception handling well. And it may duplicate a same frame multiple times
because of the way it hashes a frame. The duplicated frame issue could be
solved by adding post-profiling filtering though.
Here is a real-world example of `hg log -r .` on `hg-committed`:
```
| <module> hg:10
| run dispatch.py:81
| dispatch dispatch.py:132
| _runcatch dispatch.py:202
| _callcatch dispatch.py:309
| callcatch scmutil.py:133
| _runcatchfunc dispatch.py:215
| _dispatch dispatch.py:744
2 \ repository hg.py:166
2 | _peerorrepo hg.py:151
2 | instance localrepo.py:2025
2 | __init__ localrepo.py:275
863 \ runcommand dispatch.py:656
863 | _runcommand dispatch.py:926
863 | <lambda> dispatch.py:918
863 | check util.py:1059
863 | log commands.py:3262
480 \ getlogrevs cmdutil.py:2236
479 | _logrevs cmdutil.py:2190
479 | revrange scmutil.py:429
479 | anyrevs localrepo.py:622
478 | mfunc revset.py:2275
478 | __init__ (4 times) smartset.py:1033
478 | __init__ (4 times) smartset.py:919
478 | __len__ (4 times) localrepo.py:587
478 | changelog repoview.py:309
477 | filterrevs (11 times) repoview.py:260
477 | computehidden repoview.py:171
455 \ hideablerevs repoview.py:24
455 | getrevs obsolete.py:1114
455 | _computeobsoleteset obsolete.py:1143
3 \ __get__ (2 times) localrepo.py:76
7 \ node (5760 times) changelog.py:359
4 | node (8938 times) revlog.py:482
9 \ __get__ (5760 times) localrepo.py:76
428 \ __get__ util.py:798
428 | successors obsolete.py:672
225 \ __get__ util.py:798
224 | _all obsolete.py:662
6 \ tryread vfs.py:32
6 | read vfs.py:60
200 \ _readmarkers obsolete.py:442
* 199 | _fm1readmarkers obsolete.py:430
15 \ _checkinvalidmarkers obsolete.py:523
* 203 \ _addsuccessors obsolete.py:504
3 \ tryreadcache repoview.py:166
3 | cachehash repoview.py:98
3 | heads localrepo.py:1884
17 \ <genexpr> (5225 times) repoview.py:191
15 | __contains__ (5226 times) ancestor.py:334
5 | parentrevs (4010 times) changelog.py:371
2 | parentrevs (4332 times) revlog.py:479
9 \ pager ui.py:843
9 | _runpager ui.py:906
8 | __init__ subprocess.py:330
8 | _execute_child subprocess.py:880
5 | _eintr_retry_call subprocess.py:118
374 \ show cmdutil.py:1308
374 | _show cmdutil.py:1316
365 \ _changesetlabels cmdutil.py:1266
365 | troubled context.py:225
12 \ unstable (2 times) context.py:207
12 | getrevs obsolete.py:1114
12 | _computeunstableset obsolete.py:1154
3 \ set (322 times) localrepo.py:610
3 \ revs (3 times) localrepo.py:593
8 \ __getitem__ (963 times) localrepo.py:566
4 \ parents (321 times) context.py:246
4 | __get__ (321 times) util.py:798
4 | _parents (321 times) context.py:562
3 \ <genexpr> (642 times) obsolete.py:1164
3 | obsolete (321 times) context.py:199
2 | getrevs (321 times) obsolete.py:1114
319 \ bumped (2 times) context.py:211
319 | getrevs obsolete.py:1114
319 | _computebumpedset obsolete.py:1181
4 \ set (322 times) localrepo.py:610
312 \ allprecursors (2021 times) obsolete.py:850
308 | __get__ util.py:798
308 | precursors obsolete.py:678
* 308 | _addprecursors obsolete.py:509
34 \ divergent (2 times) context.py:218
34 | getrevs obsolete.py:1114
33 | _computedivergentset obsolete.py:1204
4 \ set (322 times) localrepo.py:610
24 \ successorssets (1368 times) obsolete.py:899
3 \ __get__ (1368 times) localrepo.py:76
4 \ __contains__ (200 times) localrepo.py:575
9 \ names (2 times) namespaces.py:184
9 | <lambda> namespaces.py:43
9 | nodetags localrepo.py:741
8 | __get__ util.py:798
8 | _tagscache localrepo.py:646
8 | _findtags localrepo.py:685
7 | findglobaltags tags.py:170
7 | _readtagcache tags.py:370
6 | filteredhash scmutil.py:307
\ __exit__ contextlib.py:21
| maybeprofile profiling.py:199
| __exit__ contextlib.py:21
| profile profiling.py:148
| __exit__ contextlib.py:21
Total time: 867 ms
```
Some example conclusions from reading the above output include:
1. `_fm1readmarkers`, `_addsuccessors`, `_addprecursors` are slow
2. `changelog.node` and `changelog.parentrevs` are called thousands of times,
but they are just a few milliseconds.
3. Reading the `obsstore` file (`vfs.read`) takes 5ms exactly
While `statprof` may get 1 right, `lsprof` may get 2 right, neither of them is
helpful for 3. `statprof` is not accurate enough to be confident about 5ms,
`lsprof` will include other `vfs.read` calls so it's impossible to see just the
read for obsstore.
Test Plan:
With the upstream [patch](https://patchwork.mercurial-scm.org/patch/20822/), run:
```
HGPROF=traceprof hg id --config extensions.traceprof= --profile
```
Reviewers: #mercurial, rmcelroy
Reviewed By: rmcelroy
Subscribers: rmcelroy, mjpieters, jiangyuan
Differential Revision: https://phabricator.intern.facebook.com/D5104851
Signature: t1:5104851:1495480386:666847ea08bb6a94658bc10d7f0e91c225d56836
2017-05-23 01:49:45 +03:00
|
|
|
extra_compile_args=filter(None, [
|
|
|
|
OPTIMIZATION, STDCPP0X, WALL, WEXTRA, WCONVERSION, PEDANTIC,
|
traceprof: store raw Python code object
Summary:
Previously, the tracer extracts file name, function name from Python code
objects and stores them as std::string. And frame the de-duplication logic
hashes those strings, which is actually quite expensive.
`hg id` takes 1.0 seconds without frame de-duplication, and 2.5 seconds with
de-duplication.
This diff assumes Python code object is unique (i.e. Python won't generate two
code objects for a single code segment). That seems reasonable and `lsprof`
seems to make a same assumption.
With that assumption, just store `PyCodeObject` (contains file, method name and
line number) instead of `std::string` (for file, method names) and line numbers
for code identity and use the raw address of `PyCodeObject` to do frame
de-duplication.
That is helpful during profiling and de-duplication:
- During profiling, no need to copy strings (convert to std::string),
therefore less overhead
- During frame de-duplication, no need to read long strings and hash them,
therefore much faster
Debug flag during compilation is added to make debugging easier as the code
starts to mix of C++ and Python objects, which means mistakes could be made
more easily.
Test Plan:
With this patch, `hg id` with frame de-duplication takes 1.0 seconds.
There is no visible overhead doing de-duplication. Cheers!
Reviewers: #mercurial, rmcelroy
Reviewed By: rmcelroy
Subscribers: rmcelroy, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D5112636
Signature: t1:5112636:1495562232:dd0724198ebb7f2eb4e9fd3a83517fc9160fa01e
2017-05-23 21:51:10 +03:00
|
|
|
PRODUCEDEBUGSYMBOLS
|
traceprof: new extension to perform accurate profiling
Summary:
Instead of sampling periodically like the default profiler `statprof`, trace
every function call so the report is accurate. The output is designed to be
similar to `statprof`.
Written in C++ and inline assembly to minimize overhead. Cython is used as a
glue layer.
Comparing to `lsprof`, the overhead is similarly small. The major difference is
this profiler cares about "frame object" and records `frame->f_back` (parent
frame) so it could draw a statprof-like tree of the callgraph. `lsprof` records
"code object" instead and could have difficulty to deal with recursive calls.
The following code could demostrate the difference:
```
def fib(x):
if x < 2:
return x
return fib(x - 1) + fib(x - 2)
# lsprof - recorded 1 code object for fib and no way to figure out callgraph
import _lsprof
p = _lsprof.Profiler()
p.enable(subcalls=True)
fib(5)
p.disable()
for s in p.getstats():
print(s)
'''output:
_lsprof.profiler_entry(code="<method 'disable' of '_lsprof.Profiler' objects>", callcount=1, reccallcount=0, totaltime=0.0, inlinetime=0.0, calls=None)
_lsprof.profiler_entry(code=<code object fib at 0x7f27ca587530, file "a.py", line 1>, callcount=15, reccallcount=14, totaltime=8e-06, inlinetime=8e-06, calls=[_lsprof.profiler_subentry(code=<code object fib at 0x7f27ca587530, file "a.py", line 1>, callcount=14, reccallcount=12, totaltime=6e-06, inlinetime=6e-06)])
'''
# traceprof (with timethreshold set to 0) - callgraph drawn correctly
import sys
from hgext3rd import traceprof
with traceprof.profile(None, sys.stderr):
fib(5)
'''output:
| <module> a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
\ __exit__ contextlib.py:21
'''
```
There are some performance / correctness tradeoffs. The current implementation
takes `frame` address and several (but not theoretically complete) fields to
hash a frame. That could in theory lead to inaccurate frame information being
recorded. But hashing all fields recursively incurs significant overhead. The
related logic is the `hashframe` function, which could be tweaked later.
This is an initial version. It probably does not cover all corner cases like
exception handling well. And it may duplicate a same frame multiple times
because of the way it hashes a frame. The duplicated frame issue could be
solved by adding post-profiling filtering though.
Here is a real-world example of `hg log -r .` on `hg-committed`:
```
| <module> hg:10
| run dispatch.py:81
| dispatch dispatch.py:132
| _runcatch dispatch.py:202
| _callcatch dispatch.py:309
| callcatch scmutil.py:133
| _runcatchfunc dispatch.py:215
| _dispatch dispatch.py:744
2 \ repository hg.py:166
2 | _peerorrepo hg.py:151
2 | instance localrepo.py:2025
2 | __init__ localrepo.py:275
863 \ runcommand dispatch.py:656
863 | _runcommand dispatch.py:926
863 | <lambda> dispatch.py:918
863 | check util.py:1059
863 | log commands.py:3262
480 \ getlogrevs cmdutil.py:2236
479 | _logrevs cmdutil.py:2190
479 | revrange scmutil.py:429
479 | anyrevs localrepo.py:622
478 | mfunc revset.py:2275
478 | __init__ (4 times) smartset.py:1033
478 | __init__ (4 times) smartset.py:919
478 | __len__ (4 times) localrepo.py:587
478 | changelog repoview.py:309
477 | filterrevs (11 times) repoview.py:260
477 | computehidden repoview.py:171
455 \ hideablerevs repoview.py:24
455 | getrevs obsolete.py:1114
455 | _computeobsoleteset obsolete.py:1143
3 \ __get__ (2 times) localrepo.py:76
7 \ node (5760 times) changelog.py:359
4 | node (8938 times) revlog.py:482
9 \ __get__ (5760 times) localrepo.py:76
428 \ __get__ util.py:798
428 | successors obsolete.py:672
225 \ __get__ util.py:798
224 | _all obsolete.py:662
6 \ tryread vfs.py:32
6 | read vfs.py:60
200 \ _readmarkers obsolete.py:442
* 199 | _fm1readmarkers obsolete.py:430
15 \ _checkinvalidmarkers obsolete.py:523
* 203 \ _addsuccessors obsolete.py:504
3 \ tryreadcache repoview.py:166
3 | cachehash repoview.py:98
3 | heads localrepo.py:1884
17 \ <genexpr> (5225 times) repoview.py:191
15 | __contains__ (5226 times) ancestor.py:334
5 | parentrevs (4010 times) changelog.py:371
2 | parentrevs (4332 times) revlog.py:479
9 \ pager ui.py:843
9 | _runpager ui.py:906
8 | __init__ subprocess.py:330
8 | _execute_child subprocess.py:880
5 | _eintr_retry_call subprocess.py:118
374 \ show cmdutil.py:1308
374 | _show cmdutil.py:1316
365 \ _changesetlabels cmdutil.py:1266
365 | troubled context.py:225
12 \ unstable (2 times) context.py:207
12 | getrevs obsolete.py:1114
12 | _computeunstableset obsolete.py:1154
3 \ set (322 times) localrepo.py:610
3 \ revs (3 times) localrepo.py:593
8 \ __getitem__ (963 times) localrepo.py:566
4 \ parents (321 times) context.py:246
4 | __get__ (321 times) util.py:798
4 | _parents (321 times) context.py:562
3 \ <genexpr> (642 times) obsolete.py:1164
3 | obsolete (321 times) context.py:199
2 | getrevs (321 times) obsolete.py:1114
319 \ bumped (2 times) context.py:211
319 | getrevs obsolete.py:1114
319 | _computebumpedset obsolete.py:1181
4 \ set (322 times) localrepo.py:610
312 \ allprecursors (2021 times) obsolete.py:850
308 | __get__ util.py:798
308 | precursors obsolete.py:678
* 308 | _addprecursors obsolete.py:509
34 \ divergent (2 times) context.py:218
34 | getrevs obsolete.py:1114
33 | _computedivergentset obsolete.py:1204
4 \ set (322 times) localrepo.py:610
24 \ successorssets (1368 times) obsolete.py:899
3 \ __get__ (1368 times) localrepo.py:76
4 \ __contains__ (200 times) localrepo.py:575
9 \ names (2 times) namespaces.py:184
9 | <lambda> namespaces.py:43
9 | nodetags localrepo.py:741
8 | __get__ util.py:798
8 | _tagscache localrepo.py:646
8 | _findtags localrepo.py:685
7 | findglobaltags tags.py:170
7 | _readtagcache tags.py:370
6 | filteredhash scmutil.py:307
\ __exit__ contextlib.py:21
| maybeprofile profiling.py:199
| __exit__ contextlib.py:21
| profile profiling.py:148
| __exit__ contextlib.py:21
Total time: 867 ms
```
Some example conclusions from reading the above output include:
1. `_fm1readmarkers`, `_addsuccessors`, `_addprecursors` are slow
2. `changelog.node` and `changelog.parentrevs` are called thousands of times,
but they are just a few milliseconds.
3. Reading the `obsstore` file (`vfs.read`) takes 5ms exactly
While `statprof` may get 1 right, `lsprof` may get 2 right, neither of them is
helpful for 3. `statprof` is not accurate enough to be confident about 5ms,
`lsprof` will include other `vfs.read` calls so it's impossible to see just the
read for obsstore.
Test Plan:
With the upstream [patch](https://patchwork.mercurial-scm.org/patch/20822/), run:
```
HGPROF=traceprof hg id --config extensions.traceprof= --profile
```
Reviewers: #mercurial, rmcelroy
Reviewed By: rmcelroy
Subscribers: rmcelroy, mjpieters, jiangyuan
Differential Revision: https://phabricator.intern.facebook.com/D5104851
Signature: t1:5104851:1495480386:666847ea08bb6a94658bc10d7f0e91c225d56836
2017-05-23 01:49:45 +03:00
|
|
|
]),
|
|
|
|
),
|
|
|
|
]
|
2016-12-14 04:20:13 +03:00
|
|
|
}
|
2016-10-08 19:08:29 +03:00
|
|
|
|
2017-04-04 23:41:54 +03:00
|
|
|
allnames = availablepackages + availableextmodules.keys() + availablepymodules
|
|
|
|
COMPONENTS = sorted(name.split('.')[-1] for name in allnames)
|
2016-10-08 19:08:29 +03:00
|
|
|
|
|
|
|
if not components:
|
|
|
|
components = COMPONENTS
|
|
|
|
|
|
|
|
dependencies = {
|
|
|
|
'absorb' : ['linelog'],
|
2017-02-24 01:03:03 +03:00
|
|
|
'cstore' : ['ctreemanifest', 'cdatapack'],
|
2016-10-08 19:08:29 +03:00
|
|
|
'fastannotate' : ['linelog'],
|
2016-11-21 11:53:37 +03:00
|
|
|
'infinitepush' : ['extutil'],
|
2017-02-24 01:03:02 +03:00
|
|
|
'remotefilelog' : ['cstore', 'extutil'],
|
2017-02-24 01:03:03 +03:00
|
|
|
'treemanifest' : ['cstore'],
|
2016-10-08 19:08:29 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
processdep = True
|
|
|
|
while processdep:
|
|
|
|
processdep = False
|
|
|
|
for name, deps in dependencies.iteritems():
|
|
|
|
if name in components:
|
|
|
|
for dep in deps:
|
|
|
|
if dep not in components:
|
|
|
|
components.append(dep)
|
|
|
|
processdep = True
|
|
|
|
|
2017-03-17 19:04:19 +03:00
|
|
|
if iswindows:
|
2016-12-14 04:20:13 +03:00
|
|
|
# The modules that successfully compile on Windows
|
2017-05-19 07:12:26 +03:00
|
|
|
cythonmodules = ['linelog']
|
2016-12-14 04:20:13 +03:00
|
|
|
else:
|
|
|
|
cythonmodules = [
|
|
|
|
'linelog',
|
2017-03-16 04:55:48 +03:00
|
|
|
'patchrmdir',
|
traceprof: new extension to perform accurate profiling
Summary:
Instead of sampling periodically like the default profiler `statprof`, trace
every function call so the report is accurate. The output is designed to be
similar to `statprof`.
Written in C++ and inline assembly to minimize overhead. Cython is used as a
glue layer.
Comparing to `lsprof`, the overhead is similarly small. The major difference is
this profiler cares about "frame object" and records `frame->f_back` (parent
frame) so it could draw a statprof-like tree of the callgraph. `lsprof` records
"code object" instead and could have difficulty to deal with recursive calls.
The following code could demostrate the difference:
```
def fib(x):
if x < 2:
return x
return fib(x - 1) + fib(x - 2)
# lsprof - recorded 1 code object for fib and no way to figure out callgraph
import _lsprof
p = _lsprof.Profiler()
p.enable(subcalls=True)
fib(5)
p.disable()
for s in p.getstats():
print(s)
'''output:
_lsprof.profiler_entry(code="<method 'disable' of '_lsprof.Profiler' objects>", callcount=1, reccallcount=0, totaltime=0.0, inlinetime=0.0, calls=None)
_lsprof.profiler_entry(code=<code object fib at 0x7f27ca587530, file "a.py", line 1>, callcount=15, reccallcount=14, totaltime=8e-06, inlinetime=8e-06, calls=[_lsprof.profiler_subentry(code=<code object fib at 0x7f27ca587530, file "a.py", line 1>, callcount=14, reccallcount=12, totaltime=6e-06, inlinetime=6e-06)])
'''
# traceprof (with timethreshold set to 0) - callgraph drawn correctly
import sys
from hgext3rd import traceprof
with traceprof.profile(None, sys.stderr):
fib(5)
'''output:
| <module> a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
\ __exit__ contextlib.py:21
'''
```
There are some performance / correctness tradeoffs. The current implementation
takes `frame` address and several (but not theoretically complete) fields to
hash a frame. That could in theory lead to inaccurate frame information being
recorded. But hashing all fields recursively incurs significant overhead. The
related logic is the `hashframe` function, which could be tweaked later.
This is an initial version. It probably does not cover all corner cases like
exception handling well. And it may duplicate a same frame multiple times
because of the way it hashes a frame. The duplicated frame issue could be
solved by adding post-profiling filtering though.
Here is a real-world example of `hg log -r .` on `hg-committed`:
```
| <module> hg:10
| run dispatch.py:81
| dispatch dispatch.py:132
| _runcatch dispatch.py:202
| _callcatch dispatch.py:309
| callcatch scmutil.py:133
| _runcatchfunc dispatch.py:215
| _dispatch dispatch.py:744
2 \ repository hg.py:166
2 | _peerorrepo hg.py:151
2 | instance localrepo.py:2025
2 | __init__ localrepo.py:275
863 \ runcommand dispatch.py:656
863 | _runcommand dispatch.py:926
863 | <lambda> dispatch.py:918
863 | check util.py:1059
863 | log commands.py:3262
480 \ getlogrevs cmdutil.py:2236
479 | _logrevs cmdutil.py:2190
479 | revrange scmutil.py:429
479 | anyrevs localrepo.py:622
478 | mfunc revset.py:2275
478 | __init__ (4 times) smartset.py:1033
478 | __init__ (4 times) smartset.py:919
478 | __len__ (4 times) localrepo.py:587
478 | changelog repoview.py:309
477 | filterrevs (11 times) repoview.py:260
477 | computehidden repoview.py:171
455 \ hideablerevs repoview.py:24
455 | getrevs obsolete.py:1114
455 | _computeobsoleteset obsolete.py:1143
3 \ __get__ (2 times) localrepo.py:76
7 \ node (5760 times) changelog.py:359
4 | node (8938 times) revlog.py:482
9 \ __get__ (5760 times) localrepo.py:76
428 \ __get__ util.py:798
428 | successors obsolete.py:672
225 \ __get__ util.py:798
224 | _all obsolete.py:662
6 \ tryread vfs.py:32
6 | read vfs.py:60
200 \ _readmarkers obsolete.py:442
* 199 | _fm1readmarkers obsolete.py:430
15 \ _checkinvalidmarkers obsolete.py:523
* 203 \ _addsuccessors obsolete.py:504
3 \ tryreadcache repoview.py:166
3 | cachehash repoview.py:98
3 | heads localrepo.py:1884
17 \ <genexpr> (5225 times) repoview.py:191
15 | __contains__ (5226 times) ancestor.py:334
5 | parentrevs (4010 times) changelog.py:371
2 | parentrevs (4332 times) revlog.py:479
9 \ pager ui.py:843
9 | _runpager ui.py:906
8 | __init__ subprocess.py:330
8 | _execute_child subprocess.py:880
5 | _eintr_retry_call subprocess.py:118
374 \ show cmdutil.py:1308
374 | _show cmdutil.py:1316
365 \ _changesetlabels cmdutil.py:1266
365 | troubled context.py:225
12 \ unstable (2 times) context.py:207
12 | getrevs obsolete.py:1114
12 | _computeunstableset obsolete.py:1154
3 \ set (322 times) localrepo.py:610
3 \ revs (3 times) localrepo.py:593
8 \ __getitem__ (963 times) localrepo.py:566
4 \ parents (321 times) context.py:246
4 | __get__ (321 times) util.py:798
4 | _parents (321 times) context.py:562
3 \ <genexpr> (642 times) obsolete.py:1164
3 | obsolete (321 times) context.py:199
2 | getrevs (321 times) obsolete.py:1114
319 \ bumped (2 times) context.py:211
319 | getrevs obsolete.py:1114
319 | _computebumpedset obsolete.py:1181
4 \ set (322 times) localrepo.py:610
312 \ allprecursors (2021 times) obsolete.py:850
308 | __get__ util.py:798
308 | precursors obsolete.py:678
* 308 | _addprecursors obsolete.py:509
34 \ divergent (2 times) context.py:218
34 | getrevs obsolete.py:1114
33 | _computedivergentset obsolete.py:1204
4 \ set (322 times) localrepo.py:610
24 \ successorssets (1368 times) obsolete.py:899
3 \ __get__ (1368 times) localrepo.py:76
4 \ __contains__ (200 times) localrepo.py:575
9 \ names (2 times) namespaces.py:184
9 | <lambda> namespaces.py:43
9 | nodetags localrepo.py:741
8 | __get__ util.py:798
8 | _tagscache localrepo.py:646
8 | _findtags localrepo.py:685
7 | findglobaltags tags.py:170
7 | _readtagcache tags.py:370
6 | filteredhash scmutil.py:307
\ __exit__ contextlib.py:21
| maybeprofile profiling.py:199
| __exit__ contextlib.py:21
| profile profiling.py:148
| __exit__ contextlib.py:21
Total time: 867 ms
```
Some example conclusions from reading the above output include:
1. `_fm1readmarkers`, `_addsuccessors`, `_addprecursors` are slow
2. `changelog.node` and `changelog.parentrevs` are called thousands of times,
but they are just a few milliseconds.
3. Reading the `obsstore` file (`vfs.read`) takes 5ms exactly
While `statprof` may get 1 right, `lsprof` may get 2 right, neither of them is
helpful for 3. `statprof` is not accurate enough to be confident about 5ms,
`lsprof` will include other `vfs.read` calls so it's impossible to see just the
read for obsstore.
Test Plan:
With the upstream [patch](https://patchwork.mercurial-scm.org/patch/20822/), run:
```
HGPROF=traceprof hg id --config extensions.traceprof= --profile
```
Reviewers: #mercurial, rmcelroy
Reviewed By: rmcelroy
Subscribers: rmcelroy, mjpieters, jiangyuan
Differential Revision: https://phabricator.intern.facebook.com/D5104851
Signature: t1:5104851:1495480386:666847ea08bb6a94658bc10d7f0e91c225d56836
2017-05-23 01:49:45 +03:00
|
|
|
'traceprof',
|
2016-12-14 04:20:13 +03:00
|
|
|
]
|
2017-06-15 22:55:17 +03:00
|
|
|
|
|
|
|
if USECYTHON:
|
|
|
|
# see http://cython.readthedocs.io/en/latest/src/reference/compilation.html
|
|
|
|
compileroptions = {
|
|
|
|
'unraisable_tracebacks': False,
|
|
|
|
'c_string_type': 'bytes',
|
|
|
|
}
|
|
|
|
for cythonmodule in cythonmodules:
|
|
|
|
if cythonmodule in components:
|
|
|
|
module = availableextmodules[cythonmodule]
|
|
|
|
try:
|
|
|
|
from Cython.Build import cythonize
|
|
|
|
availableextmodules[cythonmodule] = cythonize(
|
|
|
|
module,
|
|
|
|
compiler_directives=compileroptions,
|
|
|
|
)
|
|
|
|
except Exception:
|
|
|
|
# ImportError or Cython.Compiler.Errors.CompileError
|
|
|
|
sys.stderr.write(
|
|
|
|
'+------------------------------------------------+\n'
|
|
|
|
'| Failed to run cythonize. |\n'
|
|
|
|
'| Make sure you have Cython >= 0.21.1 installed. |\n'
|
|
|
|
'+------------------------------------------------+\n')
|
|
|
|
raise SystemExit(255)
|
|
|
|
else:
|
|
|
|
# use prebuilt files under prebuilt/cython
|
|
|
|
# change module sources from .pyx to .c or .cpp files
|
|
|
|
for cythonmodule in cythonmodules:
|
|
|
|
for m in availableextmodules[cythonmodule]:
|
|
|
|
sources = m.sources
|
|
|
|
iscpp = 'c++' in open(sources[0]).readline()
|
|
|
|
ext = iscpp and '.cpp' or '.c'
|
|
|
|
dstpaths = []
|
|
|
|
for src in sources:
|
|
|
|
dst = src.replace('.pyx', ext)
|
|
|
|
dstpaths.append(dst)
|
|
|
|
shutil.copy(os.path.join('prebuilt', 'cython',
|
|
|
|
os.path.basename(dst)), dst)
|
|
|
|
m.sources = dstpaths
|
2016-10-08 19:08:29 +03:00
|
|
|
|
|
|
|
packages = []
|
|
|
|
for package in availablepackages:
|
2017-04-04 21:48:27 +03:00
|
|
|
if package.split('.')[-1] in components:
|
2016-10-08 19:08:29 +03:00
|
|
|
packages.append(package)
|
|
|
|
|
2017-05-06 01:30:00 +03:00
|
|
|
librarynames = set()
|
2016-10-08 19:08:29 +03:00
|
|
|
ext_modules = []
|
|
|
|
for ext_module in availableextmodules:
|
|
|
|
if ext_module in components:
|
2017-05-06 01:30:00 +03:00
|
|
|
modules = availableextmodules[ext_module]
|
|
|
|
ext_modules.extend(modules)
|
|
|
|
librarynames.update(l for m in modules for l in m.libraries)
|
|
|
|
|
|
|
|
libraries = [(n, availablelibraries[n])
|
|
|
|
for n in librarynames if n in availablelibraries]
|
2016-10-08 19:08:29 +03:00
|
|
|
|
2017-02-24 01:03:02 +03:00
|
|
|
# Dependencies between our native libraries means we need to build in order
|
|
|
|
ext_order = {
|
|
|
|
'libdatapack' : 0,
|
|
|
|
'cstore' : 3,
|
|
|
|
}
|
|
|
|
ext_modules = sorted(ext_modules, key=lambda k: ext_order.get(k.name, 999))
|
|
|
|
|
2016-10-08 19:08:29 +03:00
|
|
|
requires = []
|
|
|
|
requireslz4 = ['remotefilelog', 'cdatapack']
|
|
|
|
if any(c for c in components if c in requireslz4):
|
|
|
|
requires.append('lz4')
|
|
|
|
|
|
|
|
py_modules = []
|
|
|
|
for module in availablepymodules:
|
2017-04-04 21:48:27 +03:00
|
|
|
if module.split('.')[-1] in components:
|
|
|
|
py_modules.append(module)
|
2016-10-08 19:08:29 +03:00
|
|
|
|
2017-05-02 19:12:56 +03:00
|
|
|
# Extra clean command cleaning up non-Python extensions
|
|
|
|
class CleanExtCommand(Command):
|
|
|
|
description = 'remove extra build files'
|
|
|
|
user_options = []
|
|
|
|
|
|
|
|
def initialize_options(self):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def finalize_options(self):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
root = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
os.chdir(root)
|
|
|
|
|
|
|
|
# removed counter (ext: count)
|
|
|
|
removed = {}
|
|
|
|
|
|
|
|
def removepath(path):
|
|
|
|
try:
|
|
|
|
os.unlink(path)
|
|
|
|
except OSError: # ENOENT
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
ext = path.split('.')[-1]
|
|
|
|
removed.setdefault(ext, 0)
|
|
|
|
removed[ext] += 1
|
|
|
|
|
|
|
|
# remove *.o not belonging to Python extensions, and .py[cdo], .so files
|
|
|
|
for pat in ['*.o', '*.py[cdo]', '*.so']:
|
|
|
|
for path in self._rglob(pat):
|
|
|
|
removepath(path)
|
|
|
|
|
|
|
|
# remove .c generated from Cython .pyx
|
|
|
|
for path in self._rglob('*.pyx'):
|
|
|
|
cpath = '%s.c' % path[:-4]
|
|
|
|
removepath(cpath)
|
traceprof: new extension to perform accurate profiling
Summary:
Instead of sampling periodically like the default profiler `statprof`, trace
every function call so the report is accurate. The output is designed to be
similar to `statprof`.
Written in C++ and inline assembly to minimize overhead. Cython is used as a
glue layer.
Comparing to `lsprof`, the overhead is similarly small. The major difference is
this profiler cares about "frame object" and records `frame->f_back` (parent
frame) so it could draw a statprof-like tree of the callgraph. `lsprof` records
"code object" instead and could have difficulty to deal with recursive calls.
The following code could demostrate the difference:
```
def fib(x):
if x < 2:
return x
return fib(x - 1) + fib(x - 2)
# lsprof - recorded 1 code object for fib and no way to figure out callgraph
import _lsprof
p = _lsprof.Profiler()
p.enable(subcalls=True)
fib(5)
p.disable()
for s in p.getstats():
print(s)
'''output:
_lsprof.profiler_entry(code="<method 'disable' of '_lsprof.Profiler' objects>", callcount=1, reccallcount=0, totaltime=0.0, inlinetime=0.0, calls=None)
_lsprof.profiler_entry(code=<code object fib at 0x7f27ca587530, file "a.py", line 1>, callcount=15, reccallcount=14, totaltime=8e-06, inlinetime=8e-06, calls=[_lsprof.profiler_subentry(code=<code object fib at 0x7f27ca587530, file "a.py", line 1>, callcount=14, reccallcount=12, totaltime=6e-06, inlinetime=6e-06)])
'''
# traceprof (with timethreshold set to 0) - callgraph drawn correctly
import sys
from hgext3rd import traceprof
with traceprof.profile(None, sys.stderr):
fib(5)
'''output:
| <module> a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
0 \ fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
0 \ fib a.py:1
0 | fib a.py:1
\ __exit__ contextlib.py:21
'''
```
There are some performance / correctness tradeoffs. The current implementation
takes `frame` address and several (but not theoretically complete) fields to
hash a frame. That could in theory lead to inaccurate frame information being
recorded. But hashing all fields recursively incurs significant overhead. The
related logic is the `hashframe` function, which could be tweaked later.
This is an initial version. It probably does not cover all corner cases like
exception handling well. And it may duplicate a same frame multiple times
because of the way it hashes a frame. The duplicated frame issue could be
solved by adding post-profiling filtering though.
Here is a real-world example of `hg log -r .` on `hg-committed`:
```
| <module> hg:10
| run dispatch.py:81
| dispatch dispatch.py:132
| _runcatch dispatch.py:202
| _callcatch dispatch.py:309
| callcatch scmutil.py:133
| _runcatchfunc dispatch.py:215
| _dispatch dispatch.py:744
2 \ repository hg.py:166
2 | _peerorrepo hg.py:151
2 | instance localrepo.py:2025
2 | __init__ localrepo.py:275
863 \ runcommand dispatch.py:656
863 | _runcommand dispatch.py:926
863 | <lambda> dispatch.py:918
863 | check util.py:1059
863 | log commands.py:3262
480 \ getlogrevs cmdutil.py:2236
479 | _logrevs cmdutil.py:2190
479 | revrange scmutil.py:429
479 | anyrevs localrepo.py:622
478 | mfunc revset.py:2275
478 | __init__ (4 times) smartset.py:1033
478 | __init__ (4 times) smartset.py:919
478 | __len__ (4 times) localrepo.py:587
478 | changelog repoview.py:309
477 | filterrevs (11 times) repoview.py:260
477 | computehidden repoview.py:171
455 \ hideablerevs repoview.py:24
455 | getrevs obsolete.py:1114
455 | _computeobsoleteset obsolete.py:1143
3 \ __get__ (2 times) localrepo.py:76
7 \ node (5760 times) changelog.py:359
4 | node (8938 times) revlog.py:482
9 \ __get__ (5760 times) localrepo.py:76
428 \ __get__ util.py:798
428 | successors obsolete.py:672
225 \ __get__ util.py:798
224 | _all obsolete.py:662
6 \ tryread vfs.py:32
6 | read vfs.py:60
200 \ _readmarkers obsolete.py:442
* 199 | _fm1readmarkers obsolete.py:430
15 \ _checkinvalidmarkers obsolete.py:523
* 203 \ _addsuccessors obsolete.py:504
3 \ tryreadcache repoview.py:166
3 | cachehash repoview.py:98
3 | heads localrepo.py:1884
17 \ <genexpr> (5225 times) repoview.py:191
15 | __contains__ (5226 times) ancestor.py:334
5 | parentrevs (4010 times) changelog.py:371
2 | parentrevs (4332 times) revlog.py:479
9 \ pager ui.py:843
9 | _runpager ui.py:906
8 | __init__ subprocess.py:330
8 | _execute_child subprocess.py:880
5 | _eintr_retry_call subprocess.py:118
374 \ show cmdutil.py:1308
374 | _show cmdutil.py:1316
365 \ _changesetlabels cmdutil.py:1266
365 | troubled context.py:225
12 \ unstable (2 times) context.py:207
12 | getrevs obsolete.py:1114
12 | _computeunstableset obsolete.py:1154
3 \ set (322 times) localrepo.py:610
3 \ revs (3 times) localrepo.py:593
8 \ __getitem__ (963 times) localrepo.py:566
4 \ parents (321 times) context.py:246
4 | __get__ (321 times) util.py:798
4 | _parents (321 times) context.py:562
3 \ <genexpr> (642 times) obsolete.py:1164
3 | obsolete (321 times) context.py:199
2 | getrevs (321 times) obsolete.py:1114
319 \ bumped (2 times) context.py:211
319 | getrevs obsolete.py:1114
319 | _computebumpedset obsolete.py:1181
4 \ set (322 times) localrepo.py:610
312 \ allprecursors (2021 times) obsolete.py:850
308 | __get__ util.py:798
308 | precursors obsolete.py:678
* 308 | _addprecursors obsolete.py:509
34 \ divergent (2 times) context.py:218
34 | getrevs obsolete.py:1114
33 | _computedivergentset obsolete.py:1204
4 \ set (322 times) localrepo.py:610
24 \ successorssets (1368 times) obsolete.py:899
3 \ __get__ (1368 times) localrepo.py:76
4 \ __contains__ (200 times) localrepo.py:575
9 \ names (2 times) namespaces.py:184
9 | <lambda> namespaces.py:43
9 | nodetags localrepo.py:741
8 | __get__ util.py:798
8 | _tagscache localrepo.py:646
8 | _findtags localrepo.py:685
7 | findglobaltags tags.py:170
7 | _readtagcache tags.py:370
6 | filteredhash scmutil.py:307
\ __exit__ contextlib.py:21
| maybeprofile profiling.py:199
| __exit__ contextlib.py:21
| profile profiling.py:148
| __exit__ contextlib.py:21
Total time: 867 ms
```
Some example conclusions from reading the above output include:
1. `_fm1readmarkers`, `_addsuccessors`, `_addprecursors` are slow
2. `changelog.node` and `changelog.parentrevs` are called thousands of times,
but they are just a few milliseconds.
3. Reading the `obsstore` file (`vfs.read`) takes 5ms exactly
While `statprof` may get 1 right, `lsprof` may get 2 right, neither of them is
helpful for 3. `statprof` is not accurate enough to be confident about 5ms,
`lsprof` will include other `vfs.read` calls so it's impossible to see just the
read for obsstore.
Test Plan:
With the upstream [patch](https://patchwork.mercurial-scm.org/patch/20822/), run:
```
HGPROF=traceprof hg id --config extensions.traceprof= --profile
```
Reviewers: #mercurial, rmcelroy
Reviewed By: rmcelroy
Subscribers: rmcelroy, mjpieters, jiangyuan
Differential Revision: https://phabricator.intern.facebook.com/D5104851
Signature: t1:5104851:1495480386:666847ea08bb6a94658bc10d7f0e91c225d56836
2017-05-23 01:49:45 +03:00
|
|
|
cpppath = cpath + 'pp'
|
|
|
|
removepath(cpppath)
|
2017-05-02 19:12:56 +03:00
|
|
|
|
|
|
|
# print short summary
|
|
|
|
if removed:
|
|
|
|
summary = 'removed %s files' % (
|
|
|
|
', '.join('%s .%s' % (count, ext)
|
|
|
|
for ext, count in sorted(removed.iteritems())))
|
|
|
|
self.announce(summary, level=distutils.log.INFO)
|
|
|
|
|
|
|
|
def _rglob(self, patten):
|
|
|
|
# recursive glob
|
|
|
|
for dirname, dirs, files in os.walk('.'):
|
|
|
|
for name in fnmatch.filter(files, patten):
|
|
|
|
yield os.path.join(dirname, name)
|
|
|
|
|
2013-06-20 23:16:36 +04:00
|
|
|
setup(
|
2013-06-20 23:57:57 +04:00
|
|
|
name='fbhgext',
|
2016-08-26 16:27:59 +03:00
|
|
|
version='1.0',
|
|
|
|
author='Facebook Source Control Team',
|
|
|
|
maintainer='Facebook Source Control Team',
|
|
|
|
maintainer_email='sourcecontrol-dev@fb.com',
|
|
|
|
url='https://bitbucket.org/facebook/hg-experimental/',
|
|
|
|
description='Facebook mercurial extensions',
|
2013-06-20 23:16:36 +04:00
|
|
|
long_description="",
|
2016-08-26 16:27:59 +03:00
|
|
|
keywords='facebook fb hg mercurial shallow remote filelog',
|
|
|
|
license='GPLv2+',
|
2016-10-08 19:08:29 +03:00
|
|
|
packages=packages,
|
|
|
|
install_requires=requires,
|
|
|
|
py_modules=py_modules,
|
|
|
|
ext_modules = ext_modules,
|
2017-02-24 01:03:03 +03:00
|
|
|
libraries=libraries,
|
2017-05-02 19:12:56 +03:00
|
|
|
cmdclass={
|
|
|
|
'clean_ext': CleanExtCommand,
|
|
|
|
}
|
2013-06-20 23:16:36 +04:00
|
|
|
)
|