sapling/eden/scm/edenscm/hgext/clienttelemetry.py
Xavier Deguillard 472ec19699 clienttelemetry: add wantslfspointers
Summary:
Rolling out LFSv2 on fbsource is a bit complex. Initially, the intent was to
roll it out via a server-only config and send pointers to say 5% of the
clients. The big snag in this is that LFS pointers are stored in Memcache, and
thus, a client who wasn't supposed to use LFS may end up reading a pointer from
Memcache, and issuing a request to the LFS server. Thanks to the way Memcache
works, this may lead to an avalance effect where everyone is fetching LFS
blobs, even if the server rollout is at a small percentage.

There are several solutions to this, the first obvious one would be to simply
not use Memcache for pointers, but that also means a forced connection to the
server, and a higher latency, ie: not a very desireable situation. An
alternative would be to have a proper capability exchange at connection time,
but that's unfortunately not feasible today due to the need to support the old
Mercurial server. Long term this is definitively the approach we want to go to,
and depending on the exchanged capabilities, we can even imagine using
different memcache keys automatically.

For now, we can hack this up by re-using the only free-form channel that the
client has to the server: clienttelemetry. Mononoke can then use the passed in
information to decide on whether to send LFS pointers (or not). This
unfortunately means that the rollout will be entirely client-side driven. To alleviate
the issue of Memcache keys being shared between clients wanting LFS pointers
and the ones not wanting them, a different Memcache key space will be used.

Reviewed By: StanislavGlebik

Differential Revision: D21765065

fbshipit-source-id: aebda3c567a827342b2fa96d374a06a23ea0ca34
2020-05-28 14:16:35 -07:00

143 lines
4.3 KiB
Python

# Copyright (c) Facebook, Inc. and its affiliates.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.
# clienttelemetry: provide information about the client in server telemetry
"""provide information about the client in server telemetry
[clienttelemetry]
# whether or not to announce the remote hostname when connecting
announceremotehostname = False
"""
from __future__ import absolute_import
import socket
import string
from edenscm.mercurial import (
blackbox,
dispatch,
extensions,
hg,
perftrace,
util,
wireproto,
)
from edenscm.mercurial.i18n import _
from edenscm.mercurial.pycompat import decodeutf8
# Client telemetry functions generate client telemetry data at connection time.
_clienttelemetryfuncs = {}
def clienttelemetryfunc(f):
"""Decorator for registering client telemetry functions."""
_clienttelemetryfuncs[f.__name__] = f
return f
@clienttelemetryfunc
def hostname(ui):
return socket.gethostname()
_correlator = None
@clienttelemetryfunc
def correlator(ui):
"""
The correlator is a random string that is logged on both the client and
server. This can be used to correlate the client logging to the server
logging.
"""
global _correlator
if _correlator is None:
_correlator = util.makerandomidentifier()
ui.log("clienttelemetry", client_correlator=_correlator)
return _correlator
@clienttelemetryfunc
def wantslfspointers(ui):
"""
Tells the server whether this clients wants LFS pointers to be sent in
getpackv2. Only applies when the repository is being migrated to sending
LFS pointers and doesn't apply on repositories already converted.
Oh, if you haven't realized already, this is a hack. Hopefully when the
Mercurial servers are gone we'll be able to have a real capability exchange
system when establishing a connection.
"""
return str(ui.configbool("lfs", "wantslfspointers"))
# Client telemetry data is generated before connection and stored here.
_clienttelemetrydata = {}
def _clienttelemetry(repo, proto, args):
"""Handle received client telemetry"""
logargs = {"client_%s" % key: value for key, value in args.items()}
repo.ui.log("clienttelemetry", **logargs)
# Make them available to other extensions
repo.clienttelemetry = logargs
return socket.gethostname()
def getclienttelemetry(repo):
kwargs = {}
if util.safehasattr(repo, "clienttelemetry"):
clienttelemetry = repo.clienttelemetry
fields = ["client_fullcommand", "client_hostname"]
for f in fields:
if f in clienttelemetry:
kwargs[f] = clienttelemetry[f]
return kwargs
def _capabilities(orig, repo, proto):
result = orig(repo, proto)
result.append("clienttelemetry")
return result
def _runcommand(orig, lui, repo, cmd, fullargs, ui, options, d, cmdpats, cmdoptions):
# Record the command that is running in the client telemetry data.
_clienttelemetrydata["command"] = cmd
fullcommand = dispatch._formatargs(fullargs)
# Long invocations can occupy a lot of space in the logs.
if len(fullcommand) > 256:
fullcommand = fullcommand[:256] + " (truncated)"
_clienttelemetrydata["fullcommand"] = fullcommand
return orig(lui, repo, cmd, fullargs, ui, options, d, cmdpats, cmdoptions)
def _peersetup(ui, peer):
if peer.capable("clienttelemetry"):
logargs = {name: f(ui) for name, f in _clienttelemetryfuncs.items()}
logargs.update(_clienttelemetrydata)
peername = decodeutf8(peer._call("clienttelemetry", **logargs))
peer._realhostname = peername
blackbox.log({"clienttelemetry": {"peername": peername}})
util.info("client-telemetry", peername=peername)
ann = ui.configbool("clienttelemetry", "announceremotehostname", None)
if ann is None:
ann = not ui.plain() and ui._isatty(ui.ferr)
if ann and not ui.quiet:
ui.warn(_("connected to %s\n") % peername)
perftrace.tracevalue("Server", peername)
def uisetup(ui):
wireproto.wireprotocommand("clienttelemetry", "*")(_clienttelemetry)
extensions.wrapfunction(wireproto, "_capabilities", _capabilities)
hg.wirepeersetupfuncs.append(_peersetup)
extensions.wrapfunction(dispatch, "runcommand", _runcommand)