sapling/edenscm/hgext/sampling.py
Mark Thomas f337e0d5e4 sampling: fail gracefully when ui.log arguments are not a valid format
Summary:
`ui.log` expects to be called with valid format arguments.  If the arguments
are not a valid format string, or the number of arguments doesn't match the
number of format placeholders, formatting will fail.

In this case, catch the exception and fail gracefully.  Don't even bother
formatting if there is exactly one argument.

The `blackbox` extension already does this, so extend to the `sampling`
extension.

Also fix the place where `perftrace` calls `ui.log` with a string that might
contain formatting placeholders.

Reviewed By: quark-zju

Differential Revision: D14938952

fbshipit-source-id: 1d9802308dba925109c018124d51273c348526b4
2019-04-16 01:21:55 -07:00

221 lines
7.5 KiB
Python

# sampling.py - sample collection extension
#
# Copyright 2016 Facebook
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
#
# Usage:
# - This extension enhances ui.log(category, message, key=value, ...)
# to also append filtered logged events as JSON to a file.
# - The events are separated by NULL characters: '\0'.
# - The file is either specified with the SCM_SAMPLING_FILEPATH environment
# variable or the sampling.filepath configuration.
# - If the file cannot be created or accessed, fails silently
#
# The configuration details can be found in the documentation of ui.log below
import json
import os
import weakref
from edenscm.mercurial import encoding, localrepo, pycompat, registrar, util
configtable = {}
configitem = registrar.configitem(configtable)
configitem("sampling", "filepath", default="")
def _parentfolderexists(f):
return f is not None and os.path.exists(os.path.dirname(os.path.normpath(f)))
def _getcandidatelocation(ui):
for candidatelocation in (
encoding.environ.get("SCM_SAMPLING_FILEPATH", None),
ui.config("sampling", "filepath"),
):
if _parentfolderexists(candidatelocation):
return candidatelocation
return None
def uisetup(ui):
class logtofile(ui.__class__):
@classmethod
def computesamplingfilters(cls, self):
filtermap = {}
for k in ui.configitems("sampling"):
if not k[0].startswith("key."):
continue # not a key
filtermap[k[0][len("key.") :]] = k[1]
return filtermap
def log(self, event, *msg, **opts):
"""Redirect filtered log event to a sampling file
The configuration looks like:
[sampling]
filepath = path/to/file
key.eventname = value
key.eventname2 = value2
If an event name appears in the config, it is logged to the
samplingfile augmented with value stored as ref.
Example:
[sampling]
filepath = path/to/file
key.perfstatus = perf_status
Assuming that we call:
ui.log('perfstatus', t=3)
ui.log('perfcommit', t=3)
ui.log('perfstatus', t=42)
Then we will log in path/to/file, two JSON strings separated by \0
one for each perfstatus, like:
{"event":"perfstatus",
"ref":"perf_status",
"msg":"",
"opts":{"t":3}}\0
{"event":"perfstatus",
"ref":"perf_status",
"msg":"",
"opts":{"t":42}}\0
We will also log any given environmental vars to the env_vars log,
if configured::
[sampling]
env_vars = PATH,SHELL
"""
if not util.safehasattr(self, "samplingfilters"):
self.samplingfilters = logtofile.computesamplingfilters(self)
if event not in self.samplingfilters:
return super(logtofile, self).log(event, *msg, **opts)
# special case: remove less interesting blocked fields starting
# with "unknown_" or "alias_".
if event == "measuredtimes":
opts = {
k: v
for k, v in opts.items()
if (not k.startswith("alias_") and not k.startswith("unknown_"))
}
ref = self.samplingfilters[event]
script = _getcandidatelocation(ui)
if script:
try:
opts["metrics_type"] = event
if msg and event != "metrics":
# do not keep message for "metrics", which only wants
# to log key/value dict.
if len(msg) == 1:
# don't try to format if there is only one item.
opts["msg"] = msg[0]
else:
# ui.log treats msg as a format string + format args.
try:
opts["msg"] = msg[0] % msg[1:]
except TypeError:
# formatting failed - just log each item of the
# message separately.
opts["msg"] = " ".join(msg)
with open(script, "a") as outfile:
outfile.write(json.dumps({"data": opts, "category": ref}))
outfile.write("\0")
except EnvironmentError:
pass
return super(logtofile, self).log(event, *msg, **opts)
# Replace the class for this instance and all clones created from it:
ui.__class__ = logtofile
def getrelativecwd(repo):
"""Returns the current directory relative to the working copy root, or
None if it's not in the working copy.
"""
cwd = pycompat.getcwdsafe()
if cwd.startswith(repo.root):
return os.path.normpath(cwd[len(repo.root) + 1 :])
else:
return None
def gettopdir(repo):
"""Returns the first component of the current directory, if it's in the
working copy.
"""
reldir = getrelativecwd(repo)
if reldir:
components = reldir.split(pycompat.ossep)
if len(components) > 0 and components[0] != ".":
return components[0]
else:
return None
def telemetry(reporef):
repo = reporef()
if repo is None:
return
try:
ui = repo.ui
if util.safehasattr(repo, "requirements"):
ui.log(
"requirements",
generaldelta=str("generaldelta" in repo.requirements).lower(),
)
ui.log(
"requirements",
remotefilelog=str("remotefilelog" in repo.requirements).lower(),
)
try:
lfsmetrics = repo.svfs.lfsremoteblobstore.getlfsmetrics()
ui.log("command_metrics", **lfsmetrics)
except Exception:
pass
maxrss = util.getmaxrss()
# Log maxrss from within the hg process. The wrapper logs its own
# value (which is incorrect if chg is used) so the column is
# prefixed.
ui.log("command_info", hg_maxrss=maxrss)
except Exception as e:
ui.log("command_info", sampling_failure=e.message)
def reposetup(ui, repo):
# Don't setup telemetry for sshpeer's
if not isinstance(repo, localrepo.localrepository):
return
repo.ui.atexit(telemetry, weakref.ref(repo))
# Log other information that we don't want to log in the wrapper, if it's
# cheap to do so.
# Log the current directory bucketed to top-level directories, if enabled.
# This provides a very rough approximation of what area the users works in.
# developer config: sampling.logtopdir
if repo.ui.config("sampling", "logtopdir"):
topdir = gettopdir(repo)
if topdir:
ui.log("command_info", topdir=topdir)
# Allow environment variables to be directly mapped to metrics columns.
env = encoding.environ
tolog = {}
for conf in ui.configlist("sampling", "env_vars"):
if conf in env:
# The default name is a lowercased version of the environment
# variable name; in the future, an override config could be used to
# customize it.
tolog["env_" + conf.lower()] = env[conf]
ui.log("env_vars", **tolog)