discovery: remove unused discovery logic

Summary:
We have `experimental.findcommonheadsnew` set to True in all tests, and
Rust commit backends force the `findcommonheadsnew` paths, which is
pretty much everywhere except hgsql repos. Remove `_findcommonheadsold`.

The fast discovery is also unnecessary. Remove them too.

Reviewed By: DurhamG

Differential Revision: D27630496

fbshipit-source-id: ab1948f03a8c84e75e3b5c9ff4769e17533447d2
This commit is contained in:
Jun Wu 2021-04-12 17:10:28 -07:00 committed by Facebook GitHub Bot
parent 1776efce9c
commit 2480bfcef5
7 changed files with 13 additions and 294 deletions

View File

@ -78,7 +78,6 @@ configitem("remotenames", "calculatedistance", default=True)
configitem("remotenames", "disallowedbookmarks", default=[])
configitem("remotenames", "disallowedhint", default=None)
configitem("remotenames", "disallowedto", default=None)
configitem("remotenames", "fastheaddiscovery", default=False)
configitem("remotenames", "forcecompat", default=False)
configitem("remotenames", "forceto", default=False)
configitem("remotenames", "hoist", default="default")
@ -208,86 +207,12 @@ def _expull(orig, repo, remote, heads=None, force=False, **kwargs):
else:
bookmarks = remote.listkeys("bookmarks")
with extensions.wrappedfunction(setdiscovery, "findcommonheads", exfindcommonheads):
res = orig(repo, remote, heads, force, **kwargs)
res = orig(repo, remote, heads, force, **kwargs)
pullremotenames(repo, remote, bookmarks)
return res
def exfindcommonheads(orig, ui, local, remote, **kwargs):
"""Return a tuple (common, anyincoming, remoteheads) used to identify
missing nodes from or in remote.
"""
# The normal findcommonheads implementation tries to find the exact boundary
# between what the client has and what the server has. With remotenames, we
# have pretty good knowledge about what local commits already exist on the
# server, so we can short circuit all the discovery logic by just assuming
# the current remotenames are representative of what's on the server. In the
# worst case the data might be slightly out of sync and the server sends us
# more data than necessary, but this should be rare.
if not ui.configbool("remotenames", "fastheaddiscovery"):
return orig(ui, local, remote, **kwargs)
remotepath = activepath(local.ui, remote)
remotenodes = []
for node, nametype, remotename, rname in readremotenames(local):
# Note: It's important that this excludes hidden commits (by doing
# node in local), since the callers assume all nodes in common are
# visible.
node = bin(node)
if remotename == remotepath and node in local:
remotenodes.append(node)
# If we have no remotenames, fallback to normal discovery.
if not remotenodes:
return orig(ui, local, remote, **kwargs)
remotenodes = set(remotenodes)
# Check which remote nodes still exist on the server
ui.status_err(_("searching for changes\n"))
batch = remote.iterbatch()
batch.heads()
batch.known(remotenodes)
batch.submit()
srvheadhashes, yesno = batch.results()
common = list(n for i, n in enumerate(remotenodes) if yesno[i])
# If we don't know of any server commits, fall back to legacy discovery
if not common:
# If this path is hit, it will print "searching for changes" twice,
# which is weird. This should be very rare though, since it only happens
# if the client has remote names, but none of those names exist on the
# server (i.e. the server has been completely replaced, or stripped).
ui.status_err(
_(
"server has changed since last pull - falling back to the "
"default search strategy\n"
)
)
return orig(ui, local, remote, **kwargs)
# We only want to use this for existence checks. We don't want hidden
# commits to result in throwing an exception here.
cl = local.changelog
if cl.tip() == nullid:
if srvheadhashes != [nullid]:
return [nullid], True, srvheadhashes
return ([nullid], False, [])
# early exit if we know all the specified remote heads already
clrev = cl.rev
clcontains = cl.nodemap.__contains__
srvheads = list(clrev(n) for n in srvheadhashes if clcontains(n))
if len(srvheads) == len(srvheadhashes):
ui.debug("all remote heads known locally\n")
return (srvheadhashes, False, srvheadhashes)
return (common, True, srvheadhashes)
def pullremotenames(repo, remote, bookmarks):
# when working between multiple local repos which do not all have
# remotenames enabled, do this work only for those with it enabled

View File

@ -230,8 +230,6 @@ coreconfigitem("diff", "ignoreblanklines", default=False)
coreconfigitem("diff", "ignorewseol", default=False)
coreconfigitem("diff", "nobinary", default=False)
coreconfigitem("diff", "noprefix", default=False)
coreconfigitem("discovery", "fastdiscovery", False)
coreconfigitem("discovery", "knownserverbookmarks", default=[])
coreconfigitem("doctor", "check-lag-name", "master")
coreconfigitem("doctor", "check-lag-threshold", 50)
coreconfigitem("doctor", "check-too-many-names-threshold", 20)
@ -278,7 +276,6 @@ coreconfigitem(
coreconfigitem("experimental", "evolution.exchange", default=None)
coreconfigitem("experimental", "evolution.bundle-obsmarker", default=False)
coreconfigitem("experimental", "evolution.track-operation", default=True)
coreconfigitem("experimental", "findcommonheadsnew", default=util.istest())
coreconfigitem("experimental", "worddiff", default=False)
coreconfigitem("experimental", "mmapindexthreshold", default=1)
coreconfigitem("experimental", "new-clone-path", default=True)

View File

@ -18,9 +18,7 @@ from . import bookmarks, branchmap, phases, pycompat, setdiscovery, util
from .node import hex, nullid
def findcommonincoming(
repo, remote, heads=None, force=False, ancestorsof=None, needlargestcommonset=True
):
def findcommonincoming(repo, remote, heads=None, force=False, ancestorsof=None):
"""Return a tuple (common, anyincoming, heads) used to identify the common
subset of nodes between repo and remote.
@ -34,9 +32,6 @@ def findcommonincoming(
"ancestorsof" if not None, restrict the discovery to a subset defined by
these nodes. Changeset outside of this set won't be considered (and
won't appears in "common")
"needlargestcommonset" if set to True then it will return the largest set of common nodes.
Otherwise heuristics can be used to speed up discovery but return a smaller
common set.
If you pass heads and they are all known locally, the response lists just
these heads in "common" and in "heads".
@ -62,7 +57,6 @@ def findcommonincoming(
abortwhenunrelated=not force,
ancestorsof=ancestorsof,
explicitremoteheads=heads,
needlargestcommonset=needlargestcommonset,
)
common, anyinc, srvheads = res
unfi = repo
@ -151,9 +145,7 @@ def findcommonoutgoing(
# get common set if not provided
if commoninc is None:
commoninc = findcommonincoming(
repo, other, force=force, ancestorsof=onlyheads, needlargestcommonset=True
)
commoninc = findcommonincoming(repo, other, force=force, ancestorsof=onlyheads)
og.commonheads, _any, _hds = commoninc
# compute outgoing

View File

@ -1498,7 +1498,6 @@ def _pulldiscoverychangegroup(pullop):
pullop.remote,
heads=pullop.heads,
force=pullop.force,
needlargestcommonset=False,
)
common, fetch, rheads = tmp
nm = pullop.repo.changelog.nodemap

View File

@ -960,21 +960,6 @@ for related options for the annotate command.
Ignore copies or renames if the source path is outside file patterns.
``discovery``
--------
Options that control how discovery of commits to push/pull works
The following options apply to all hosts.
``fastdiscovery``
Special mode that makes pull time discovery faster but less precise i.e.
client can pull more commits than necessary.
``knownserverbookmarks``
Optional. Bookmarks that should normally be present on client and server.
Can be used to make fastdiscovery more precise
``edenfs``
---------

View File

@ -136,86 +136,6 @@ def _limitsample(sample, desiredlen):
return sample
def fastdiscovery(ui, local, remote):
# The normal findcommonheads implementation tries to find the exact boundary
# between what the client has and what the server has. But normally we
# have pretty good knowledge about what local commits already exist on the
# server, so we can short circuit all the discovery logic by just assuming
# the current public heads are representative of what's on the server. In the
# worst case the data might be slightly out of sync and the server sends us
# more data than necessary, but this should be rare.
cl = local.changelog
publicheads = []
# That should be equivalent to "heads(public())" but much faster
revs = list(local.revs("head() & public() + parents(roots(draft()))"))
for r in revs:
publicheads.append(local[r].node())
bookmarks = ui.configlist("discovery", "knownserverbookmarks")
knownbookmarksvalues = []
for book in bookmarks:
if book in local:
knownbookmarksvalues.append(local[book].node())
# If we have no remotenames, fallback to normal discovery.
if not publicheads:
return None
publicheads = set(publicheads)
# Check which remote nodes still exist on the server
ui.status_err(_("searching for changes\n"))
batch = remote.iterbatch()
batch.heads()
batch.known(knownbookmarksvalues)
batch.known(publicheads)
batch.submit()
srvheadhashes, yesnoknownbookmarks, yesnopublicheads = batch.results()
if knownbookmarksvalues and not any(yesnoknownbookmarks):
ui.status_err(_("No known server bookmarks\n"))
# Server doesn't known any remote bookmark. That's odd and it's better
# to fallback to normal discovery process. Otherwise we might request
# too many commits from the server
return None
common = list(n for i, n in enumerate(publicheads) if yesnopublicheads[i])
common.extend(
(n for i, n in enumerate(knownbookmarksvalues) if yesnoknownbookmarks[i])
)
# If we don't know of any server commits, fall back to legacy discovery
if not common:
# If this path is hit, it will print "searching for changes" twice,
# which is weird. This should be very rare though, since it only happens
# if the client has remote names, but none of those names exist on the
# server (i.e. the server has been completely replaced, or stripped).
ui.status_err(
_(
"server has changed since last pull - falling back to the "
"default search strategy\n"
)
)
return None
ui.debug("using fastdiscovery\n")
if cl.tip() == nullid:
if srvheadhashes != [nullid]:
return [nullid], True, srvheadhashes
return ([nullid], False, [])
# early exit if we know all the specified remote heads already
clcontains = cl.nodemap.__contains__
srvheads = list(n for n in srvheadhashes if clcontains(n))
if len(srvheads) == len(srvheadhashes):
ui.debug("all remote heads known locally\n")
return (srvheadhashes, False, srvheadhashes)
return (common, True, srvheadhashes)
def findcommonheads(
ui,
local,
@ -225,7 +145,6 @@ def findcommonheads(
abortwhenunrelated=True,
ancestorsof=None,
explicitremoteheads=None,
needlargestcommonset=True,
):
"""Return a tuple (commonheads, anyincoming, remoteheads) used to
identify missing nodes from or in remote.
@ -246,8 +165,6 @@ def findcommonheads(
'::ancestorsof'.
- explicitremoteheads: if not None, a list of nodes that are known existed
on the remote server.
- needlargestcommonset: do not try heuristic algorithm that can categorize
some 'common' commits as 'missing'.
Return values:
- 'anyincoming' is a boolean. Its usefulness is questionable.
@ -257,38 +174,16 @@ def findcommonheads(
remote repo. 'remoteheads' might include commit hashes unknown to the
local repo.
"""
# fastdiscovery might returns *some* common set, but it might not be
# necessary the largest common set. In some cases (e.g. during `hg push`)
# we actually want largest common set
if ui.configbool("discovery", "fastdiscovery") and not needlargestcommonset:
res = fastdiscovery(ui, local, remote)
if res is not None:
return res
# The Rust changelog is incompatible with the old discovery logic
# that uses revlog details. Enforce the new discovery logic.
if ui.configbool("experimental", "findcommonheadsnew") or local.changelog.userust():
return _findcommonheadsnew(
ui,
local,
remote,
initialsamplesize,
fullsamplesize,
abortwhenunrelated,
ancestorsof,
explicitremoteheads,
)
else:
return _findcommonheadsold(
ui,
local,
remote,
initialsamplesize,
fullsamplesize,
abortwhenunrelated,
ancestorsof,
)
return _findcommonheadsnew(
ui,
local,
remote,
initialsamplesize,
fullsamplesize,
abortwhenunrelated,
ancestorsof,
explicitremoteheads,
)
def _findcommonheadsnew(

View File

@ -1,74 +0,0 @@
#chg-compatible
$ disable treemanifest
$ setconfig discovery.fastdiscovery=True
$ . $TESTDIR/library.sh
$ . "$TESTDIR/infinitepush/library.sh"
$ setupcommon
Setup remotefilelog server
$ hg init server
$ cd server
$ setupserver
$ setconfig remotefilelog.server=true
$ mkcommit initial
$ hg bookmark master
$ cd ..
Make client shallow clone
$ hgcloneshallow ssh://user@dummy/server client
streaming all changes
* files to transfer, * bytes of data (glob)
transferred * bytes in * seconds (* KB/sec) (glob)
searching for changes
no changes found
updating to branch default
* files updated, * files merged, * files removed, * files unresolved (glob)
* files fetched over * fetches - (* misses, * hit ratio) over * (glob) (?)
$ cd server
$ mkcommit first
$ mkcommit second
$ mkcommit third
Make sure that fastdiscovery is used for pull
$ cd ../client
$ hg pull --debug 2>&1 | grep fastdiscovery
using fastdiscovery
Make sure that fastdiscovery is used for push
$ hg up -q tip
3 files fetched over 1 fetches - (3 misses, * hit ratio) over * (glob) (?)
$ mkcommit clientcommit
$ hg push --debug 2>&1 | grep fastdiscovery || echo "no fastdiscovery"
no fastdiscovery
Make public head on the client - fastdiscovery is NOT used because no common nodes found
$ mkcommit publichead
$ hg debugmakepublic .
$ hg pull
pulling from ssh://user@dummy/server
searching for changes
server has changed since last pull - falling back to the default search strategy
searching for changes
no changes found
Set knownserverbookmarks - fastdiscovery is used
$ hg book -r ".^" master_bookmark
$ hg pull --config discovery.knownserverbookmarks=master_bookmark
pulling from ssh://user@dummy/server
searching for changes
no changes found
$ cd ../server
$ mkcommit newcommit
$ cd ../client
$ hg pull --config discovery.knownserverbookmarks=master_bookmark
pulling from ssh://user@dummy/server
searching for changes
adding changesets
adding manifests
adding file changes
added 1 changesets with 0 changes to 0 files
updating bookmark master