sapling/fastmanifest/metrics.py
Laurent Charignon b6eae920e2 fastmanifest: fix logic error for debugmetrics and simplify test
Summary:
Before this patch we were using config instead of configbool for reading
the debugmetrics config causing "False" to be evaluated as a truthy value
for the config. This patch fixes the issue and sets the config to false for
some of the tests to reduce the noise of the output.

Test Plan: Tests pass

Reviewers: ttung

Differential Revision: https://phabricator.intern.facebook.com/D3524501
2016-07-06 13:42:32 -07:00

145 lines
5.4 KiB
Python

# metrics.py
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
#
# To log a new metric, add it to the list FASTMANIFEST_METRICS
# Then from the code, use metrics.metricscollector.get(repo) or
# metrics.metricscollector.getfromui(ui) to get a metrics `collector`.
# call collector.recordsample(metricsname, key=value, key2=value2, ...) to
# record a samples.
#
# When the command ends the sample will be relayed with ui.log unless
# it is in the list FASTMANIFEST_DONOTREPORT_METRICS.
# You would put a metrics in that list if you do some computation with hit
# and are not interested in the individual sample but only their aggregation.
# For example, if you want to record the cache hit ratio, you can record
# all the cache hit and cache miss, not report them but compute and report their
# ratio.
#
# To debug metrics use fastmanifest.debugmetrics = True, this will print
# the metrics collected for each command with ui.status at the end of each
# command.
FASTMANIFEST_DONOTREPORT_METRICS = set([
"cachehit",
"diffcachehit",
"filesnotincachehit"
])
FASTMANIFEST_METRICS = set([
## Individual Metrics
# ondiskcachestats has information about the cache on disk
# => keys are "bytes", "entries", "limit" and "freespace", all numbers,
# freespace and limit are in MB
"ondiskcachestats",
# revsetsize is the number of revisions in the 'fastmanifesttocache()'
# => key is "size", a number
"revsetsize",
# trigger is what caused caching to trigger
# => keys is "source", one of ("commit", "remotenames", "bookmark")
"trigger",
# cacheoverflow, logs cache overflow event: not enough space in the
# cache to store revisions, it will inform us on how to resize the
# cache if needed
# => key is "hit", always True
"cacheoverflow",
# The three followings are metrics that will be aggregated as ratio
# they register cache hit and miss at different level: global, diff and
# during filesnotin operations
# => key is "hit", True or False, True is a cache hit, False a cache miss
"cachehit",
"diffcachehit",
"filesnotincachehit",
## Aggregate Metrics
# Cache hit ratio (global, diff and filesnotin), expressed as a percentage
# so between 0 and 100. -1 means no operations.
# => keys is "ratio", a number
# examples:
# -1 for cachehitratio => we never accessed a manifest for the command
# 30 for cachehitratio => 30% of manifest access hit the cache
# 45 for diffcachehitratio => 45% of manifest diffs hit the cache
"cachehitratio",
"diffcachehitratio",
"filesnotincachehitratio",
])
class metricscollector(object):
_instance = None
@classmethod
def get(cls):
if not cls._instance:
cls._instance = metricscollector()
return cls._instance
def __init__(self):
self.samples = []
def recordsample(self, kind, **kwargs):
assert kind in FASTMANIFEST_METRICS
self.samples.append((kind, kwargs))
def mergesamples(self, collector):
if collector is not self:
self.samples.extend(collector.samples)
return self
def _addaggregatesamples(self):
def _addhitratio(key, aggkey, dedupe=False):
# Aggregate the cache hit and miss to build a hit ratio
# store the ratio as aggkey : {ratio: ratio} in self.samples
# If dedupe is set, will dedupe using the node field of each sample
hitlist = (s for s in self.samples
if s[0] == key and s[1]["hit"])
misslist = (s for s in self.samples
if s[0] == key and not s[1]["hit"])
if dedupe:
hit = len(set(s[1]["node"] for s in hitlist))
miss = len(set(s[1]["node"] for s in misslist))
else:
hit = len(list(hitlist))
miss = len(list(misslist))
if miss + hit == 0:
ratio = -1
else:
ratio=float(hit) * 100 / (miss + hit)
data = { aggkey: int(ratio) }
self.recordsample(aggkey, **data)
_addhitratio("cachehit", "cachehitratio", dedupe=True)
_addhitratio("diffcachehit", "diffcachehitratio")
_addhitratio("filesnotincachehit", "filesnotincachehitratio")
def logsamples(self, ui):
self._addaggregatesamples()
debug = ui.configbool("fastmanifest", "debugmetrics", False)
if debug:
ui.status(("[FM-METRICS] Begin metrics\n"))
for kind, kwargs in self.samples:
if kind in FASTMANIFEST_DONOTREPORT_METRICS:
continue
if debug:
dispkw = kwargs
# Not removing freespace and limit would make the output of
# test machine dependant
if "freespace" in kwargs:
del dispkw["freespace"]
if "limit" in kwargs:
del dispkw["limit"]
# Here we sort to make test output stable
ui.status(("[FM-METRICS] kind: %s, kwargs: %s\n"
% (kind, sorted(dispkw.items()))))
ui.log('fastmanifest-%s' % kind,
"", # ui.log requires a format string as args[0].
**kwargs)
if debug:
ui.status(("[FM-METRICS] End metrics\n"))