# Copyright (c) Facebook, Inc. and its affiliates. # # This software may be used and distributed according to the terms of the # GNU General Public License version 2. from __future__ import absolute_import # Attention: Modules imported are not traceable. Keep the list minimal. import sys import types import bindings enabled = False class ModuleLoader(object): # load_module: (fullname) -> module # See find_module below for why it's implemented in this way. load_module = sys.modules.__getitem__ class TraceImporter(object): """Trace time spent on importing modules. In additional, wrap functions so they get traced. """ # Excluded modules - Tracing them might yield huge amount of # uninteresting data. _blocklist = { # unicodedata.east_asian_width can be called very frequently. "unicodedata", # parsers.isasciistr can be called very frequently. "edenscmnative.parsers", # encoding.tolocal can be called very frequently. "edenscm.mercurial.encoding", } def __init__(self, shouldtrace=lambda _name: True): """ shouldtrace: (name) -> bool. If shouldtrace(modulename) is True, trace functions in module. If shouldtrace("import") is True, trace import statements. """ # Function parameters are used below for performance. # They changed LOAD_GLOBAL to LOAD_FAST. _modules = sys.modules _loader = ModuleLoader() _attempted = set() def _import( name, _shouldwrap=shouldtrace, _rawimport=__import__, _get=_modules.__getitem__, _wrap=tracemodule, _blocklist=self._blocklist, ): _rawimport(name) if _shouldwrap(name) and name not in _blocklist: mod = _get(name) _wrap(mod) try: pass except Exception as ex: # Failing to wrap the module is not fatal. More importantly, # Do not translate this into an ImportError, which might # trigger surprising behaviors, including importing (aka. # executing code) on an already imported module again. # While most modules are fine, some modules are definitely # not ready for it. For example: # # # module foo.py # import time # origtime = time.time # def newtime(): # return origtime() + 1 # time.time = newtime # # When importing again, `newtime()` will stack overflow, # since `origtime = time.time` gets executed, and `origtime` # used in `newtime` is `newtime` itself. # # The same effect can be achieved using `reload` from # stdlib: `reload(foo)`. But most modules are not tested # about `reload` friendliness. # But, still surface the error, since normally traceimport # should be able to wrap modules just fine. sys.stderr.write( "traceimport: fail to instrument module %s: %r\n" % (name, ex) ) sys.stderr.flush() if shouldtrace("import"): _import = bindings.tracing.wrapfunc( _import, meta=lambda name: [("name", "import %s" % name), ("cat", "import")], ) # importer.find_module(fullname, path=None) is defined by PEP 302. # Note: Python 3.4 introduced find_spec, and deprecated this API. def find_module( fullname, path=None, _import=_import, _attempted=_attempted, _loader=_loader, _modules=_modules, ): # Example arguments: # - fullname = "contextlib", path = None # - fullname = "io", path = None # - fullname = "edenscm.mercurial.blackbox", path = ["/data/edenscm"] # - fullname = "email.errors", path = ["/lib/python/email"] # PEP 302 says "find_module" returns either None or a "loader" that has # "load_module(fullname)" to actually load the module. # # Abuse the interface by actually importing the module now. if fullname not in _attempted: assert fullname not in _modules _attempted.add(fullname) _import(fullname) # Since we just imported the module (to sys.modules). # The loader can read it from sys.modules directly. return _loader # Try the next importer. return None self.find_module = find_module _functypes = (types.FunctionType, types.BuiltinFunctionType) _isheaptype = bindings.tracing.isheaptype _tracedclasses = {object, type, types.ModuleType, dict} _wrapfunc = bindings.tracing.wrapfunc def traceclass(cls): """Annotate functions in a class so they get traced.""" bases = getattr(cls, "__mro__", []) for obj in bases: # It's possible to have recursive classes (ex. ctypes). So avoid # wrapping a same class again. if obj in _tracedclasses: continue _tracedclasses.add(obj) # Don't bother with non-heap types. `setattr` does not work on them. if not isinstance(obj, type) or not _isheaptype(obj): continue container = obj.__dict__ name = obj.__name__ for k, v in container.items(): if isinstance(v, type): traceclass(v) elif isinstance(v, _functypes): # `container` is likely a read-only `dict_proxy`. # So `container[k] = v` does not work. Use `setattr` instead. # See https://stackoverflow.com/questions/25440694. setattr(obj, k, _wrapfunc(v, classname=name)) def tracemodule(mod): """Annotate functions and classes in a module so they get traced.""" modname = mod.__name__ container = mod.__dict__ for k, v in container.items(): if getattr(v, "__module__", None) != modname: continue if isinstance(v, type): traceclass(v) elif isinstance(v, _functypes): container[k] = _wrapfunc(v) def enable(config=None): """Enable traceimport. 'config' is space separated names. Space separated names. A name can be one of the following forms: - "import": Trace import. - "foo.bar": Attempt to trace functions in module "foo.bar" without its submodules. - "foo.bar.*": Attempt to trace functions in module"foo.bar" and its submodules. - "*": Attempt to trace everything. If config is not specified, it's read from `os.getenv("EDENSCM_TRACE_PY")`. If 'printatexit' is True, print a ASCII graph at the end of program (for quick-adhoc performance analysis). """ if config is None: import os config = os.getenv("EDENSCM_TRACE_PY") if config in {None, ""}: return names = config.split() prefixes = [n[:-1] for n in names if n.endswith("*")] exactnames = {n for n in names if not n.endswith("*")} if "" in prefixes: def shouldtrace(name): return True else: def shouldtrace(name, _exact=exactnames, _prefix=prefixes, _any=any): if name in _exact: return True startswith = name.startswith return _any(startswith(p) for p in _prefix) sys.meta_path.insert(0, TraceImporter(shouldtrace)) global enabled enabled = True def registeratexit(threshold=20000): """Register an atexit handler that prints ASCII tracing output. This is for quick ad-hoc performance analysis. """ import atexit def printtrace(): tracer = bindings.tracing.singleton sys.stderr.write(tracer.ascii(threshold)) sys.stderr.flush() atexit.register(printtrace)