sapling/fastmanifest/metrics.py
Tony Tung 38faebe31d [fastmanifest] the key of of the hit ratio should be the aggregating key
Summary: If the key as always 'ratio', then all the ratios are aliased when we send off aggregate stats.  That means we end up sending the last entry, rather than each individual entry.

Test Plan:
along with D3504940

run FB_HG_DIAGS= hg diff -c .:

```
    "int": {
        "builddate": 1467289392,
        "cachehitratio": 100,
        "consumed": 489,
        "diffcachehitratio": -1,
        "elapsed": 280,
        "errorcode": 0,
        "filesnotincachehitratio": 0,
        "time": 1467318481
    },
```

Reviewers: lcharignon

Reviewed By: lcharignon

Subscribers: mitrandir, mjpieters

Differential Revision: https://phabricator.intern.facebook.com/D3505464

Tasks: 12019647

Signature: t1:3505464:1467320144:327e0d306b9afa90ed9fc2d704a8ca02a79f3038
2016-06-30 13:59:40 -07:00

139 lines
5.1 KiB
Python

# metrics.py
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
#
# To log a new metric, add it to the list FASTMANIFEST_METRICS
# Then from the code, use metrics.metricscollector.get(repo) or
# metrics.metricscollector.getfromui(ui) to get a metrics `collector`.
# call collector.recordsample(metricsname, key=value, key2=value2, ...) to
# record a samples.
#
# When the command ends the sample will be relayed with ui.log unless
# it is in the list FASTMANIFEST_DONOTREPORT_METRICS.
# You would put a metrics in that list if you do some computation with hit
# and are not interested in the individual sample but only their aggregation.
# For example, if you want to record the cache hit ratio, you can record
# all the cache hit and cache miss, not report them but compute and report their
# ratio.
#
# To debug metrics use fastmanifest.debugmetrics = True, this will print
# the metrics collected for each command with ui.status at the end of each
# command.
from mercurial import util
FASTMANIFEST_DONOTREPORT_METRICS = set([
"cachehit",
"diffcachehit",
"filesnotincachehit"
])
FASTMANIFEST_METRICS = set([
## Individual Metrics
# ondiskcachestats has information about the cache on disk
# => keys are "bytes", "entries", "limit" and "freespace", all numbers,
# freespace and limit are in MB
"ondiskcachestats",
# revsetsize is the number of revisions in the 'fastmanifesttocache()'
# => key is "size", a number
"revsetsize",
# trigger is what caused caching to trigger
# => keys is "source", one of ("commit", "remotenames", "bookmark")
"trigger",
# cacheoverflow, logs cache overflow event: not enough space in the
# cache to store revisions, it will inform us on how to resize the
# cache if needed
# => key is "hit", always True
"cacheoverflow",
# The three followings are metrics that will be aggregated as ratio
# they register cache hit and miss at different level: global, diff and
# during filesnotin operations
# => key is "hit", True or False, True is a cache hit, False a cache miss
"cachehit",
"diffcachehit",
"filesnotincachehit",
## Aggregate Metrics
# Cache hit ratio (global, diff and filesnotin), expressed as a percentage
# so between 0 and 100. -1 means no operations.
# => keys is "ratio", a number
# examples:
# -1 for cachehitratio => we never accessed a manifest for the command
# 30 for cachehitratio => 30% of manifest access hit the cache
# 45 for diffcachehitratio => 45% of manifest diffs hit the cache
"cachehitratio",
"diffcachehitratio",
"filesnotincachehitratio",
])
class metricscollector(object):
_instance = None
@classmethod
def get(cls):
if not cls._instance:
cls._instance = metricscollector()
return cls._instance
def __init__(self):
self.samples = []
def recordsample(self, kind, **kwargs):
assert kind in FASTMANIFEST_METRICS
self.samples.append((kind, kwargs))
def mergesamples(self, collector):
if collector is not self:
self.samples.extend(collector.samples)
return self
def _addaggregatesamples(self):
def _addhitratio(key, aggkey):
# Aggregate the cache hit and miss to build a hit ratio
# store the ratio as aggkey : {aggkey: ratio} in self.samples
hit = len([s for s in self.samples
if s[0] == key and s[1]["hit"]])
miss = len([s for s in self.samples
if s[0] == key and not s[1]["hit"]])
if miss + hit == 0:
ratio = -1
else:
ratio=float(hit) * 100 / (miss + hit)
data = { aggkey: ratio }
self.recordsample(aggkey, **data)
_addhitratio("cachehit", "cachehitratio")
_addhitratio("diffcachehit", "diffcachehitratio")
_addhitratio("filesnotincachehit", "filesnotincachehitratio")
def logsamples(self, ui):
self._addaggregatesamples()
debug = ui.config("fastmanifest", "debugmetrics", False)
if debug:
ui.status(("[FM-METRICS] Begin metrics\n"))
for kind, kwargs in self.samples:
if kind in FASTMANIFEST_DONOTREPORT_METRICS:
continue
if debug:
dispkw = kwargs
# Not removing freespace and limit would make the output of
# test machine dependant
if "freespace" in kwargs:
del dispkw["freespace"]
if "limit" in kwargs:
del dispkw["limit"]
# Here we sort to make test output stable
ui.status(("[FM-METRICS] kind: %s, kwargs: %s\n"
% (kind, sorted(dispkw.items()))))
ui.log('fastmanifest-%s' % kind,
"", # ui.log requires a format string as args[0].
**kwargs)
if debug:
ui.status(("[FM-METRICS] End metrics\n"))