Commit Graph

221 Commits

Author SHA1 Message Date
Jun Wu
e47f7dc2fa codemod: register core configitems using a script
This is done by a script [2] using RedBaron [1], a tool designed for doing
code refactoring. All "default" values are decided by the script and are
strongly consistent with the existing code.

There are 2 changes done manually to fix tests:

  [warn] mercurial/exchange.py: experimental.bundle2-output-capture: default needs manual removal
  [warn] mercurial/localrepo.py: experimental.hook-track-tags: default needs manual removal

Since RedBaron is not confident about how to indent things [2].

[1]: https://github.com/PyCQA/redbaron
[2]: https://github.com/PyCQA/redbaron/issues/100
[3]:

#!/usr/bin/env python
# codemod_configitems.py - codemod tool to fill configitems
#
# Copyright 2017 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import, print_function

import os
import sys

import redbaron

def readpath(path):
    with open(path) as f:
        return f.read()

def writepath(path, content):
    with open(path, 'w') as f:
        f.write(content)

_configmethods = {'config', 'configbool', 'configint', 'configbytes',
                  'configlist', 'configdate'}

def extractstring(rnode):
    """get the string from a RedBaron string or call_argument node"""
    while rnode.type != 'string':
        rnode = rnode.value
    return rnode.value[1:-1]  # unquote, "'str'" -> "str"

def uiconfigitems(red):
    """match *.ui.config* pattern, yield (node, method, args, section, name)"""
    for node in red.find_all('atomtrailers'):
        entry = None
        try:
            obj = node[-3].value
            method = node[-2].value
            args = node[-1]
            section = args[0].value
            name = args[1].value
            if (obj in ('ui', 'self') and method in _configmethods
                and section.type == 'string' and name.type == 'string'):
                entry = (node, method, args, extractstring(section),
                         extractstring(name))
        except Exception:
            pass
        else:
            if entry:
                yield entry

def coreconfigitems(red):
    """match coreconfigitem(...) pattern, yield (node, args, section, name)"""
    for node in red.find_all('atomtrailers'):
        entry = None
        try:
            args = node[1]
            section = args[0].value
            name = args[1].value
            if (node[0].value == 'coreconfigitem' and section.type == 'string'
                and name.type == 'string'):
                entry = (node, args, extractstring(section),
                         extractstring(name))
        except Exception:
            pass
        else:
            if entry:
                yield entry

def registercoreconfig(cfgred, section, name, defaultrepr):
    """insert coreconfigitem to cfgred AST

    section and name are plain string, defaultrepr is a string
    """
    # find a place to insert the "coreconfigitem" item
    entries = list(coreconfigitems(cfgred))
    for node, args, nodesection, nodename in reversed(entries):
        if (nodesection, nodename) < (section, name):
            # insert after this entry
            node.insert_after(
                'coreconfigitem(%r, %r,\n'
                '    default=%s,\n'
                ')' % (section, name, defaultrepr))
            return

def main(argv):
    if not argv:
        print('Usage: codemod_configitems.py FILES\n'
              'For example, FILES could be "{hgext,mercurial}/*/**.py"')
    dirname = os.path.dirname
    reporoot = dirname(dirname(dirname(os.path.abspath(__file__))))

    # register configitems to this destination
    cfgpath = os.path.join(reporoot, 'mercurial', 'configitems.py')
    cfgred = redbaron.RedBaron(readpath(cfgpath))

    # state about what to do
    registered = set((s, n) for n, a, s, n in coreconfigitems(cfgred))
    toregister = {} # {(section, name): defaultrepr}
    coreconfigs = set() # {(section, name)}, whether it's used in core

    # first loop: scan all files before taking any action
    for i, path in enumerate(argv):
        print('(%d/%d) scanning %s' % (i + 1, len(argv), path))
        iscore = ('mercurial' in path) and ('hgext' not in path)
        red = redbaron.RedBaron(readpath(path))
        # find all repo.ui.config* and ui.config* calls, and collect their
        # section, name and default value information.
        for node, method, args, section, name in uiconfigitems(red):
            if section == 'web':
                # [web] section has some weirdness, ignore them for now
                continue
            defaultrepr = None
            key = (section, name)
            if len(args) == 2:
                if key in registered:
                    continue
                if method == 'configlist':
                    defaultrepr = 'list'
                elif method == 'configbool':
                    defaultrepr = 'False'
                else:
                    defaultrepr = 'None'
            elif len(args) >= 3 and (args[2].target is None or
                                     args[2].target.value == 'default'):
                # try to understand the "default" value
                dnode = args[2].value
                if dnode.type == 'name':
                    if dnode.value in {'None', 'True', 'False'}:
                        defaultrepr = dnode.value
                elif dnode.type == 'string':
                    defaultrepr = repr(dnode.value[1:-1])
                elif dnode.type in ('int', 'float'):
                    defaultrepr = dnode.value
            # inconsistent default
            if key in toregister and toregister[key] != defaultrepr:
                defaultrepr = None
            # interesting to rewrite
            if key not in registered:
                if defaultrepr is None:
                    print('[note] %s: %s.%s: unsupported default'
                          % (path, section, name))
                    registered.add(key) # skip checking it again
                else:
                    toregister[key] = defaultrepr
                    if iscore:
                        coreconfigs.add(key)

    # second loop: rewrite files given "toregister" result
    for path in argv:
        # reconstruct redbaron - trade CPU for memory
        red = redbaron.RedBaron(readpath(path))
        changed = False
        for node, method, args, section, name in uiconfigitems(red):
            key = (section, name)
            defaultrepr = toregister.get(key)
            if defaultrepr is None or key not in coreconfigs:
                continue
            if len(args) >= 3 and (args[2].target is None or
                                   args[2].target.value == 'default'):
                try:
                    del args[2]
                    changed = True
                except Exception:
                    # redbaron fails to do the rewrite due to indentation
                    # see https://github.com/PyCQA/redbaron/issues/100
                    print('[warn] %s: %s.%s: default needs manual removal'
                          % (path, section, name))
            if key not in registered:
                print('registering %s.%s' % (section, name))
                registercoreconfig(cfgred, section, name, defaultrepr)
                registered.add(key)
        if changed:
            print('updating %s' % path)
            writepath(path, red.dumps())

    if toregister:
        print('updating configitems.py')
        writepath(cfgpath, cfgred.dumps())

if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))
2017-07-14 14:22:40 -07:00
Pierre-Yves David
a13e2abdc0 configitems: register the 'server.preferuncompressed' config 2017-06-30 03:44:12 +02:00
Pierre-Yves David
7b7818c09a configitems: register the 'server.maxhttpheaderlen' config 2017-06-30 03:44:11 +02:00
Pierre-Yves David
3369f427a0 configitems: register the 'server.disablefullbundle' config 2017-06-30 03:44:10 +02:00
Pierre-Yves David
834e358808 configitems: register the 'server.bundle1gd' config 2017-06-30 03:44:07 +02:00
Pierre-Yves David
548593d9ae configitems: register the 'server.bundle1' config 2017-06-30 03:44:06 +02:00
Martin von Zweigbergk
dc0d2c3515 wireproto: update reference to deleted addchangegroup()
Thanks to Yuya for catching this.
2017-06-16 09:37:22 -07:00
Gregory Szorc
bc8582fc01 streamclone: consider secret changesets (BC) (issue5589)
Previously, a repo containing secret changesets would be served via
stream clone, transferring those secret changesets. While secret
changesets aren't meant to imply strong security (if you really
want to keep them secret, others shouldn't have read access to the
repo), we should at least make an effort to protect secret changesets
when possible.

After this commit, we no longer serve stream clones for repos
containing secret changesets by default. This is backwards
incompatible behavior. In case anyone is relying on the behavior,
we provide a config option to opt into the old behavior.

Note that this defense is only beneficial for remote repos
accessed via the wire protocol: if a client has access to the
files backing a repo, they can get to the raw data and see secret
revisions.
2017-06-09 10:41:13 -07:00
Martin von Zweigbergk
c3406ac3db cleanup: use set literals
We no longer support Python 2.6, so we can now use set literals.
2017-02-10 16:56:29 -08:00
Siddharth Agarwal
0c7305054f clone: add a server-side option to disable full getbundles (pull-based clones)
For large enough repositories, pull-based clones take too long, and an attempt
to use them indicates some sort of configuration or other issue or maybe an
outdated Mercurial. Add a config option to disable them.
2017-05-11 10:50:05 -07:00
Yuya Nishihara
02022fc3c5 util: wrap s.encode('string_escape') call for future py3 compatibility 2017-03-15 23:06:50 +09:00
Pierre-Yves David
1be9d3fb99 clonebundle: use 'repo.vfs' instead of 'repo.opener'
The later is a 5 years old form.
2017-03-02 03:23:18 +01:00
Jun Wu
22df174ea7 wireproto: remove unused code
Removed an unused line introduced by 251ec05dc288.
2017-02-22 10:14:18 -08:00
Pulkit Goyal
07314d0686 py3: convert the mode argument of os.fdopen to unicodes (1 of 2)
os.fdopen() does not accepts bytes as its second argument which represent the
mode in which the file is to be opened. This patch makes sure unicodes are
passed in py3 by using pycompat.sysstr().
2017-02-13 20:06:38 +05:30
Pierre-Yves David
43b1ef004c wireproto: properly report server Abort during 'getbundle'
Previously Abort raised during 'getbundle' call poorly reported (HTTP-500 for
http, some scary messages for ssh). Abort error have been properly reported for
"push" for a long time, there is not reason to be different for 'getbundle'. We
properly catch such error and report them back the best way available. For
bundle, we issue a valid bundle2 reply (as expected by the client) with an
'error:abort' part. With bundle1 we do as best as we can depending of http or
ssh.
2017-02-10 18:20:58 +01:00
Pierre-Yves David
d00dbd00d9 bundle1: fix bundle1-denied reporting for pull over ssh
Changeset a0966f529e1b introduced a config option to have the server deny pull
using bundle1. The original protocol has not really been design to allow that
kind of error reporting so some hack was used. It turned the hack only works on
HTTP and that ssh server hangs forever when this is used. After further
digging, there is no way to report the error in a unified way. Using `ooberror`
freeze ssh and raising 'Abort' makes HTTP return a HTTP-500 without further
details. So with sadness we implement a version that dispatch according to the
protocol used.

Now the error is properly reported, but we still have ungraceful abort after
that. The protocol do not allow anything better to happen using bundle1.
2017-02-10 18:06:08 +01:00
Pierre-Yves David
5b07cfa3b3 bundle1: display server abort hint during unbundle
The code was printing the abort message but not the hint. This is now fixed.
2017-02-10 17:56:52 +01:00
Pierre-Yves David
64f57e513b bundle1: fix bundle1-denied reporting for push over ssh
Changeset a0966f529e1b introduced a config option to have the server deny push
using bundle1. The original protocol has not really be design to allow such kind
of error reporting so some hack was used. It turned the hack only works on HTTP
and that ssh wire peer hangs forever when the same hack is used. After further
digging, there is no way to report the error in a unified way. Using 'ooberror'
freeze ssh and raising 'Abort' makes HTTP return a HTTP500 without further
details. So with sadness we implement a version that dispatch according to the
protocol used.

We also add a test for pushing over ssh to make sure we won't regress in the
future. That test show that the hint is missing, this is another bug fixed in
the next changeset.
2017-02-10 17:56:59 +01:00
Gregory Szorc
a95fb0b61b wireproto: advertise supported media types and compression formats
This commit introduces support for advertising a server's support for
media types and compression formats in accordance with the spec defined
in internals.wireproto.

The bulk of the new code is a helper function in wireproto.py to
obtain a prioritized list of compression engines available to the
wire protocol. While not utilized yet, we implement support
for obtaining the list of compression engines advertised by the
client.

The upcoming HTTP protocol enhancements are a bit lower-level than
existing tests (most existing tests are command centric). So,
this commit establishes a new test file that will be appropriate
for holding tests around the functionality of the HTTP protocol
itself.

Rounding out this change, `hg debuginstall` now prints compression
engines available to the server.
2016-12-24 15:21:46 -07:00
Gregory Szorc
6a4fd5ab05 wireproto: only advertise HTTP-specific capabilities to HTTP peers (BC)
Previously, the capabilities list was protocol agnostic and we
advertised the same capabilities list to all clients, regardless of
transport protocol.

A few capabilities are specific to HTTP. I see no good reason why we
should advertise them to SSH clients. So this patch limits their
advertisement to HTTP clients.

This patch is BC, but SSH clients shouldn't be using the removed
capabilities so there should be no impact.
2016-11-28 20:46:42 -08:00
Yuya Nishihara
47f9c8b52e py3: bulk replace sys.stdin/out/err by util's
Almost all sys.stdin/out/err in hgext/ and mercurial/ are replaced by util's.
There are a few exceptions:

 - lsprof.py and statprof.py are untouched since they are a kind of vendor
   code and they never import mercurial modules right now.
 - ui._readline() needs to replace sys.stdin and stdout to pass them to
   raw_input(). We'll need another workaround here.
2016-10-20 23:53:36 +09:00
Gregory Szorc
2112fb0fd2 wireproto: perform chunking and compression at protocol layer (API)
Currently, the "streamres" response type is populated with a generator
of chunks with compression possibly already applied. This puts the onus
on commands to perform chunking and compression. Architecturally, I
think this is the wrong place to perform this work. I think commands
should say "here is the data" and the protocol layer should take care
of encoding the final bytes to put on the wire.

Additionally, upcoming commits will improve wire protocol support for
compression. Having a central place for performing compression in the
protocol transport layer will be easier than having to deal with
compression at the commands layer.

This commit refactors the "streamres" response type to accept either
a generator or an object with "read." Additionally, the type now
accepts a flag indicating whether the response is a "version 1
compressible" response. This basically identifies all commands
currently performing compression. I could have used a special type
for this, but a flag works just as well. The argument name
foreshadows the introduction of wire protocol changes, hence the "v1."

The code for chunking and compressing has been moved to the output
generation function for each protocol transport. Some code has been
inlined, resulting in the deletion of now unused methods.
2016-11-20 13:50:45 -08:00
Gregory Szorc
1538b87cfc wireproto: compress data from a generator
Currently, the "getbundle" wire protocol command obtains a generator of
data, converts it to a util.chunkbuffer, then converts it back to a
generator via the protocol's groupchunks() implementation. For the SSH
protocol, groupchunks() simply reads 4kb chunks then write()s the
data to a file descriptor. For the HTTP protocol, groupchunks() reads
32kb chunks, feeds those into a zlib compressor, emits compressed data
as it is available, and that is sent to the WSGI layer, where it is
likely turned into HTTP chunked transfer chunks as is or further
buffered and turned into a larger chunk.

For both the SSH and HTTP protocols, there is inefficiency from using
util.chunkbuffer.

For SSH, emitting consistent 4kb chunks sounds nice. However, the file
descriptor it is writing to is almost certainly buffered. That means
that a Python .write() probably doesn't translate into exactly what is
written to the I/O layer.

For HTTP, we're going through an intermediate layer to zlib compress
data. So all util.chunkbuffer is doing is ensuring that the chunks we
feed into the zlib compressor are of uniform size. This means more CPU
time in Python buffering and emitting chunks in util.chunkbuffer but
fewer function calls to zlib.

This patch introduces and implements a new wire protocol abstract
method: compresschunks(). It is like groupchunks() except it operates
on a generator instead of something with a .read(). The SSH
implementation simply proxies chunks. The HTTP implementation uses
zlib compression.

To avoid duplicate code, the HTTP groupchunks() has been reimplemented
in terms of compresschunks().

To prove this all works, the "getbundle" wire protocol command has been
switched to compresschunks(). This removes the util.chunkbuffer from
that command. Now, data essentially streams straight from the
changegroup emitter to the wire, possibly through a zlib compressor.
Generators all the way, baby.

There were slim to no performance changes on the server as measured
with the mozilla-central repository. This is likely because CPU
time is dominated by reading revlogs, producing the changegroup, and
zlib compressing the output stream. Still, this brings us a little
closer to our ideal of using generators everywhere.
2016-10-16 11:10:21 -07:00
Gregory Szorc
26f6f03d4c exchange: refactor APIs to obtain bundle data (API)
Currently, exchange.getbundle() returns either a cg1unpacker or a
util.chunkbuffer (in the case of bundle2). This is kinda OK, as
both expose a .read() to consumers. However, localpeer.getbundle()
has code inferring what the response type is based on arguments and
converts the util.chunkbuffer returned in the bundle2 case to a
bundle2.unbundle20 instance. This is a sign that the API for
exchange.getbundle() is not ideal because it doesn't consistently
return an "unbundler" instance.

In addition, unbundlers mask the fact that there is an underlying
generator of changegroup data. In both cg1 and bundle2, this generator
is being fed into a util.chunkbuffer so it can be re-exposed as a
file object.

util.chunkbuffer is a nice abstraction. However, it should only be
used "at the edges." This is because keeping data as a generator is
more efficient than converting it to a chunkbuffer, especially if we
convert that chunkbuffer back to a generator (as is the case in some
code paths currently).

This patch refactors exchange.getbundle() into
exchange.getbundlechunks(). The new API returns an iterator of chunks
instead of a file-like object.

Callers of exchange.getbundle() have been updated to use the new API.

There is a minor change of behavior in test-getbundle.t. This is
because `hg debuggetbundle` isn't defining bundlecaps. As a result,
a cg1 data stream and unpacker is being produced. This is getting fed
into a new bundle20 instance via bundle2.writebundle(), which uses
a backchannel mechanism between changegroup generation to add the
"nbchanges" part parameter. I never liked this backchannel mechanism
and I plan to remove it someday. `hg bundle` still produces the
"nbchanges" part parameter, so there should be no user-visible
change of behavior. I consider this "regression" a bug in
`hg debuggetbundle`. And that bug is captured by an existing
"TODO" in the code to use bundle2 capabilities.
2016-10-16 10:38:52 -07:00
Gregory Szorc
36f039b85b wireproto: rename argument to groupchunks()
groupchunks() is a generic "turn a file object into a generator"
function. It isn't limited to changegroups. Rename the argument
and update the docstring to reflect this.
2016-09-25 12:20:31 -07:00
Gregory Szorc
6fcb5a026b wireproto: remove gboptslist (API)
This variable has been unused since 4e11f86d8ce7, which was over
2 years ago. gboptsmap should be used instead.

Marking as API because this could break extensions.
2016-08-06 15:00:34 -07:00
Gregory Szorc
b3c092eb61 wireproto: unescape argument names in batch command (BC)
Clients escape both argument names and values when using the
batch command. Yet the server was only unescaping argument values.

Fortunately we don't have any argument names that need escaped. But
that isn't an excuse to lack symmetry in the code.

Since the server wasn't properly unescaping argument names, this
means we can never introduce an argument to a batchable command that
needs escaped because an old server wouldn't properly decode its name.
So we've introduced an assertion to detect the accidental introduction
of this in the future. Of course, we could introduce a server
capability that says the server knows how to decode argument names and
allow special argument names to go through. But until there is a need
for it (which I doubt there will be), we shouldn't bother with adding
an unused capability.
2016-08-06 13:55:21 -07:00
Gregory Szorc
7315f10dde wireproto: consolidate code for obtaining "cmds" argument value
Both wireproto.py and sshpeer.py had code for producing the value to
the "cmds" argument used by the "batch" command. This patch extracts
that code to a standalone function and uses it.
2016-08-06 13:46:28 -07:00
Augie Fackler
d19a4d923c wirepeer: rename confusing source parameter
It's named "url" everyplace else this method is defined, so let's be
consistent.
2016-08-05 16:34:30 -04:00
Gregory Szorc
589c58d0cd wireproto: extract repo filtering to standalone function
As part of teaching Mozilla's replication extension to better handle
repositories with obsolescence data, I encountered a few scenarios
where I wanted built-in wire protocol commands from replication clients
to operate on unfiltered repositories so they could have access to
obsolete changesets.

While the undocumented "web.view" config option provides a mechanism
to choose what filter/view hgweb operates on, this doesn't apply
to wire protocol commands because wireproto.dispatch() is always
operating on the "served" repo.

This patch extracts the line for obtaining the repo that
wireproto commands operate on to its own function so extensions
can monkeypatch it to e.g. return an unfiltered repo.

I stopped short of exposing a config option because I view the use
case for changing this as a niche feature, best left to the domain
of extensions.
2016-07-15 13:41:34 -07:00
Augie Fackler
ad67b99d20 cleanup: replace uses of util.(md5|sha1|sha256|sha512) with hashlib.\1
All versions of Python we support or hope to support make the hash
functions available in the same way under the same name, so we may as
well drop the util forwards.
2016-06-10 00:12:33 -04:00
timeless
a1cb3173a2 py3: convert to next() function
next(..) was introduced in py2.6 and .next() is not available in py3

https://docs.python.org/2/library/functions.html#next
2016-05-16 21:30:53 +00:00
Augie Fackler
6a7aaa4bf4 wireproto: optimize handling of large batch responses
Now that batch can be used by remotefilelog, the quadratic string
copying this was doing was actually disastrous. In my local testing,
fetching a 56 meg file used to take 3 minutes, and now takes only a
few seconds.
2016-05-12 09:39:14 -04:00
timeless
109fcbc79e pycompat: switch to util.urlreq/util.urlerr for py3 compat 2016-04-06 23:22:12 +00:00
Martin von Zweigbergk
4cc86f7b27 bundle: move writebundle() from changegroup.py to bundle2.py (API)
writebundle() writes a bundle2 bundle or a plain changegroup1. Imagine
away the "2" in "bundle2.py" for a moment and this change should makes
sense. The bundle wraps the changegroup, so it makes sense that it
knows about it. Another sign that this is correct is that the delayed
import of bundle2 in changegroup goes away.

I'll leave it for another time to remove the "2" in "bundle2.py"
(alternatively, extract a new bundle.py from it).
2016-03-28 14:41:29 -07:00
Augie Fackler
b3f8347d29 http: support sending hgargs via POST body instead of in GET or headers
narrowhg (for its narrow spec) and remotefilelog (for its large batch
requests) would like to be able to make requests with argument sets so
absurdly large that they blow out total request size limit on some
http servers. As a workaround, support stuffing args at the start
of the POST body.

We will probably want to leave this behavior off by default in servers
forever, because it makes the old "POSTs are only for writes"
assumption wrong, which might break some of the simpler authentication
configurations.
2016-03-11 11:37:00 -05:00
Augie Fackler
af1601947d wireproto: make iterbatcher behave streamily over http(s)
Unfortunately, the ssh and http implementations are slightly different
due to differences in their _callstream implementations, which
prevents ssh from behaving streamily. We should probably introduce a
new batch command that can stream results over ssh at some point in
the near future.

The streamy behavior of batch over http(s) is an enormous win for
remotefilelog over http: in my testing, it's saving about 40% on file
fetches with a cold cache against a server on localhost.
2016-03-01 18:41:43 -05:00
Augie Fackler
19d5a9a428 peer: add an iterbatcher interface
This is very much like ordinary batch(), but it will let me add a mode
for batch where we have pathologically large requests which are then
handled streamily. This will be a significant improvement for things
like remotefilelog, which may want to request thousands of entities at
once.
2016-03-01 18:39:25 -05:00
Augie Fackler
44da57a9e4 wireproto: document quirk of _callstream between http and ssh
This tripped me up when trying to use it, so it feels like we should
document this to avoid future pain.
2016-03-02 14:18:43 -05:00
Gregory Szorc
b5e53cc1d6 wireproto: support disabling bundle1 only if repo is generaldelta
I recently implemented the server.bundle1* options to control whether
bundle1 exchange is allowed.

After thinking about Mozilla's strategy for handling generaldelta
rollout a bit more, I think server operators need an additional
lever: disable bundle1 if and only if the repo is generaldelta.

bundle1 exchange for non-generaldelta repos will not have the potential
for CPU explosion that generaldelta repos do. Therefore, it makes sense
for server operators to continue to allow bundle1 exchange for
non-generaldelta repos without having to set a per-repo hgrc option
to change the policy depending on whether the repo is generaldelta.

This patch introduces a new set of options to control bundle1 behavior
for generaldelta repos. These options enable server operators to limit
bundle1 restrictions to the class of repos that can be performance
issues. It also allows server operators to tie bundle1 access to store
format. In many server environments (including Mozilla's), legacy repos
will not be generaldelta and new repos will or might be. New repos often
aren't bound by legacy access requirements, so setting a global policy
that disallows access to new/generaldelta repos via bundle1 could be a
reasonable policy in many server environments. This patch makes this
policy very easy to implement (modify global hgrc, add options to
existing generaldelta repos to grandfather them in).
2015-12-20 11:56:24 -08:00
Gregory Szorc
9af952ad6e wireproto: config options to disable bundle1
bundle2 is the new and preferred wire protocol format. For various
reasons, server operators may wish to force clients to use it.

One reason is performance. If a repository is stored in generaldelta,
the server must recompute deltas in order to produce the bundle1
changegroup. This can be extremely expensive. For mozilla-central,
bundle generation typically takes a few minutes. However, generating
a non-gd bundle from a generaldelta encoded mozilla-central requires
over 30 minutes of CPU! If a large repository like mozilla-central
were encoded in generaldelta and non-gd clients connected, they could
easily flood a server by cloning.

This patch gives server operators config knobs to control whether
bundle1 is allowed for push and pull operations. The default is to
support legacy bundle1 clients, making this patch backwards compatible.
2015-12-04 15:12:11 -08:00
Gregory Szorc
49ce77c867 wireproto: add docstring for wirepeer 2015-12-04 13:15:14 -08:00
Pierre-Yves David
2e9e629297 stream: sort stream capability before serialisation
We want that capability to be stable in our testing. This is currently not an
issue because the set is size 1, but this will be once generaldelta related
data gets in there.
2015-10-20 12:28:42 +02:00
Gregory Szorc
61b9ffeec8 wireproto: move clonebundles command from extension (issue4931)
The SSH peer class accesses wireproto.commands[cmd] as part of encoding
command arguments. Previously, the wire protocol command was defined in
the clonebundles extension. If the client didn't have this extension
enabled (which it likely doesn't since it is meant as a server-side
extension), then clients attempting to clone via ssh:// would get a
crash due to a KeyError accessing wireproto.commands['clonebundles']
when cloning from a server that is advertising clone bundles.

Moving the definition of the wire protocol command to wireproto.py makes
this problem go away.

A side effect of this code move is servers will always respond to
"clonebundles" wire protocol command requests. This should be fine: the
server will return an empty response unless a clone bundles manifest
file is present and clients shouldn't call the command unless the server
is advertising the capability, which only happens if the clonebundles
extension is enabled and the manifest file exists.
2015-11-03 12:31:33 -08:00
Gregory Szorc
9f94bd29c6 exchange: advertise if a clone bundle was attempted
The client now sends a "cbattempted" boolean flag to the "getbundle"
wire protocol command to tell the server whether a clone bundle was
attempted.

The presence of this flag will enable the server to conditionally emit a
bundle2 "output" part advertising the availability of clone bundles to
compatible clients that don't have it enabled.
2015-10-14 10:36:20 -07:00
Gregory Szorc
b794e37a76 wireproto: properly parse false boolean args (BC)
The client represents boolean arguments as '0' and '1'.
bool('0') == bool('1') == True, so a simple bool(val) isn't sufficient
for converting the argument back to a bool type.

Currently, "obsmarkers" is the only boolean argument to getbundle.

I /think/ the only place where we currently set the "obsmarkers"
argument is during bundle2 pulls. As a result of this bug, the server
/might/ be sending obsolete markers bundle2 part(s) to clients that
don't request them. That is why I marked this BC.

Surprisingly there was no test fall out from this change. I suspect a
lapse in test coverage.
2015-10-14 10:58:35 -07:00
Pierre-Yves David
30913031d4 error: get Abort from 'error' instead of 'util'
The home of 'Abort' is 'error' not 'util' however, a lot of code seems to be
confused about that and gives all the credit to 'util' instead of the
hardworking 'error'. In a spirit of equity, we break the cycle of injustice and
give back to 'error' the respect it deserves. And screw that 'util' poser.

For great justice.
2015-10-08 12:55:45 -07:00
Gregory Szorc
9250e99393 streamclone: move payload header generation into own function
The stream clone data over the wire protocol contains a header line
indicating total file count and data size. In bundle2, this metadata can
be captured by a part parameter and doesn't need to be in the body.

In preparation for bundle2, have generatev1() return the raw metadata
and move the header generation to its own function.
2015-10-04 19:06:06 -07:00
Gregory Szorc
40483f6f59 streamclone: move _allowstream() from wireproto
While we're moving things into streamclone.py...
2015-10-02 16:24:56 -07:00
Gregory Szorc
d8e74180f0 streamclone: move code out of exchange.py
We bulk move functions from exchange.py related to streaming clones.

Function names were renamed slightly to drop a component redundant with
the module name. Docstrings and comments referencing old names and
locations were updated accordingly.
2015-10-02 16:05:52 -07:00