discovery: remove unused discovery logic

Summary: We have `experimental.findcommonheadsnew` set to True in all tests, and Rust commit backends force the `findcommonheadsnew` paths, which is pretty much everywhere except hgsql repos. Remove `_findcommonheadsold`. The fast discovery is also unnecessary. Remove them too. Reviewed By: DurhamG Differential Revision: D27630496 fbshipit-source-id: ab1948f03a8c84e75e3b5c9ff4769e17533447d2
2024-10-09 16:31:02 +03:00 · 2021-04-12 17:10:28 -07:00 · 2021-04-12 17:10:28 -07:00 · 2480bfcef5
commit 2480bfcef5
parent 1776efce9c
7 changed files with 13 additions and 294 deletions
--- a/eden/scm/edenscm/hgext/remotenames.py
+++ b/eden/scm/edenscm/hgext/remotenames.py
@ -78,7 +78,6 @@ configitem("remotenames", "calculatedistance", default=True)
 configitem("remotenames", "disallowedbookmarks", default=[])
 configitem("remotenames", "disallowedhint", default=None)
 configitem("remotenames", "disallowedto", default=None)
-configitem("remotenames", "fastheaddiscovery", default=False)
 configitem("remotenames", "forcecompat", default=False)
 configitem("remotenames", "forceto", default=False)
 configitem("remotenames", "hoist", default="default")
@ -208,86 +207,12 @@ def _expull(orig, repo, remote, heads=None, force=False, **kwargs):
    else:
        bookmarks = remote.listkeys("bookmarks")

-    with extensions.wrappedfunction(setdiscovery, "findcommonheads", exfindcommonheads):
-        res = orig(repo, remote, heads, force, **kwargs)
+    res = orig(repo, remote, heads, force, **kwargs)
    pullremotenames(repo, remote, bookmarks)

    return res


-def exfindcommonheads(orig, ui, local, remote, **kwargs):
-    """Return a tuple (common, anyincoming, remoteheads) used to identify
-    missing nodes from or in remote.
-    """
-    # The normal findcommonheads implementation tries to find the exact boundary
-    # between what the client has and what the server has. With remotenames, we
-    # have pretty good knowledge about what local commits already exist on the
-    # server, so we can short circuit all the discovery logic by just assuming
-    # the current remotenames are representative of what's on the server. In the
-    # worst case the data might be slightly out of sync and the server sends us
-    # more data than necessary, but this should be rare.
-    if not ui.configbool("remotenames", "fastheaddiscovery"):
-        return orig(ui, local, remote, **kwargs)
-
-    remotepath = activepath(local.ui, remote)
-    remotenodes = []
-    for node, nametype, remotename, rname in readremotenames(local):
-        # Note: It's important that this excludes hidden commits (by doing
-        # node in local), since the callers assume all nodes in common are
-        # visible.
-        node = bin(node)
-        if remotename == remotepath and node in local:
-            remotenodes.append(node)
-
-    # If we have no remotenames, fallback to normal discovery.
-    if not remotenodes:
-        return orig(ui, local, remote, **kwargs)
-
-    remotenodes = set(remotenodes)
-
-    # Check which remote nodes still exist on the server
-    ui.status_err(_("searching for changes\n"))
-    batch = remote.iterbatch()
-    batch.heads()
-    batch.known(remotenodes)
-    batch.submit()
-    srvheadhashes, yesno = batch.results()
-    common = list(n for i, n in enumerate(remotenodes) if yesno[i])
-
-    # If we don't know of any server commits, fall back to legacy discovery
-    if not common:
-        # If this path is hit, it will print "searching for changes" twice,
-        # which is weird. This should be very rare though, since it only happens
-        # if the client has remote names, but none of those names exist on the
-        # server (i.e. the server has been completely replaced, or stripped).
-        ui.status_err(
-            _(
-                "server has changed since last pull - falling back to the "
-                "default search strategy\n"
-            )
-        )
-        return orig(ui, local, remote, **kwargs)
-
-    # We only want to use this for existence checks. We don't want hidden
-    # commits to result in throwing an exception here.
-    cl = local.changelog
-
-    if cl.tip() == nullid:
-        if srvheadhashes != [nullid]:
-            return [nullid], True, srvheadhashes
-        return ([nullid], False, [])
-
-    # early exit if we know all the specified remote heads already
-    clrev = cl.rev
-    clcontains = cl.nodemap.__contains__
-    srvheads = list(clrev(n) for n in srvheadhashes if clcontains(n))
-    if len(srvheads) == len(srvheadhashes):
-        ui.debug("all remote heads known locally\n")
-        return (srvheadhashes, False, srvheadhashes)
-
-    return (common, True, srvheadhashes)
-
-
 def pullremotenames(repo, remote, bookmarks):
    # when working between multiple local repos which do not all have
    # remotenames enabled, do this work only for those with it enabled
--- a/eden/scm/edenscm/mercurial/configitems.py
+++ b/eden/scm/edenscm/mercurial/configitems.py
@ -230,8 +230,6 @@ coreconfigitem("diff", "ignoreblanklines", default=False)
 coreconfigitem("diff", "ignorewseol", default=False)
 coreconfigitem("diff", "nobinary", default=False)
 coreconfigitem("diff", "noprefix", default=False)
-coreconfigitem("discovery", "fastdiscovery", False)
-coreconfigitem("discovery", "knownserverbookmarks", default=[])
 coreconfigitem("doctor", "check-lag-name", "master")
 coreconfigitem("doctor", "check-lag-threshold", 50)
 coreconfigitem("doctor", "check-too-many-names-threshold", 20)
@ -278,7 +276,6 @@ coreconfigitem(
 coreconfigitem("experimental", "evolution.exchange", default=None)
 coreconfigitem("experimental", "evolution.bundle-obsmarker", default=False)
 coreconfigitem("experimental", "evolution.track-operation", default=True)
-coreconfigitem("experimental", "findcommonheadsnew", default=util.istest())
 coreconfigitem("experimental", "worddiff", default=False)
 coreconfigitem("experimental", "mmapindexthreshold", default=1)
 coreconfigitem("experimental", "new-clone-path", default=True)
--- a/eden/scm/edenscm/mercurial/discovery.py
+++ b/eden/scm/edenscm/mercurial/discovery.py
@ -18,9 +18,7 @@ from . import bookmarks, branchmap, phases, pycompat, setdiscovery, util
 from .node import hex, nullid


-def findcommonincoming(
-    repo, remote, heads=None, force=False, ancestorsof=None, needlargestcommonset=True
-):
+def findcommonincoming(repo, remote, heads=None, force=False, ancestorsof=None):
    """Return a tuple (common, anyincoming, heads) used to identify the common
    subset of nodes between repo and remote.

@ -34,9 +32,6 @@ def findcommonincoming(
    "ancestorsof" if not None, restrict the discovery to a subset defined by
      these nodes. Changeset outside of this set won't be considered (and
      won't appears in "common")
-    "needlargestcommonset" if set to True then it will return the largest set of common nodes.
-    Otherwise heuristics can be used to speed up discovery but return a smaller
-    common set.

    If you pass heads and they are all known locally, the response lists just
    these heads in "common" and in "heads".
@ -62,7 +57,6 @@ def findcommonincoming(
        abortwhenunrelated=not force,
        ancestorsof=ancestorsof,
        explicitremoteheads=heads,
-        needlargestcommonset=needlargestcommonset,
    )
    common, anyinc, srvheads = res
    unfi = repo
@ -151,9 +145,7 @@ def findcommonoutgoing(

    # get common set if not provided
    if commoninc is None:
-        commoninc = findcommonincoming(
-            repo, other, force=force, ancestorsof=onlyheads, needlargestcommonset=True
-        )
+        commoninc = findcommonincoming(repo, other, force=force, ancestorsof=onlyheads)
    og.commonheads, _any, _hds = commoninc

    # compute outgoing
--- a/eden/scm/edenscm/mercurial/exchange.py
+++ b/eden/scm/edenscm/mercurial/exchange.py
@ -1498,7 +1498,6 @@ def _pulldiscoverychangegroup(pullop):
        pullop.remote,
        heads=pullop.heads,
        force=pullop.force,
-        needlargestcommonset=False,
    )
    common, fetch, rheads = tmp
    nm = pullop.repo.changelog.nodemap
--- a/eden/scm/edenscm/mercurial/helptext.py
+++ b/eden/scm/edenscm/mercurial/helptext.py
@ -960,21 +960,6 @@ for related options for the annotate command.
    Ignore copies or renames if the source path is outside file patterns.


-``discovery``
--------
-
-Options that control how discovery of commits to push/pull works
-
-The following options apply to all hosts.
-
-``fastdiscovery``
-    Special mode that makes pull time discovery faster but less precise i.e.
-    client can pull more commits than necessary.
-
-``knownserverbookmarks``
-    Optional. Bookmarks that should normally be present on client and server.
-    Can be used to make fastdiscovery more precise
-
 ``edenfs``
 ---------

--- a/eden/scm/edenscm/mercurial/setdiscovery.py
+++ b/eden/scm/edenscm/mercurial/setdiscovery.py
@ -136,86 +136,6 @@ def _limitsample(sample, desiredlen):
    return sample


-def fastdiscovery(ui, local, remote):
-    # The normal findcommonheads implementation tries to find the exact boundary
-    # between what the client has and what the server has. But normally we
-    # have pretty good knowledge about what local commits already exist on the
-    # server, so we can short circuit all the discovery logic by just assuming
-    # the current public heads are representative of what's on the server. In the
-    # worst case the data might be slightly out of sync and the server sends us
-    # more data than necessary, but this should be rare.
-    cl = local.changelog
-
-    publicheads = []
-    # That should be equivalent to "heads(public())" but much faster
-    revs = list(local.revs("head() & public() + parents(roots(draft()))"))
-
-    for r in revs:
-        publicheads.append(local[r].node())
-
-    bookmarks = ui.configlist("discovery", "knownserverbookmarks")
-    knownbookmarksvalues = []
-    for book in bookmarks:
-        if book in local:
-            knownbookmarksvalues.append(local[book].node())
-
-    # If we have no remotenames, fallback to normal discovery.
-    if not publicheads:
-        return None
-
-    publicheads = set(publicheads)
-
-    # Check which remote nodes still exist on the server
-    ui.status_err(_("searching for changes\n"))
-    batch = remote.iterbatch()
-    batch.heads()
-    batch.known(knownbookmarksvalues)
-    batch.known(publicheads)
-    batch.submit()
-    srvheadhashes, yesnoknownbookmarks, yesnopublicheads = batch.results()
-
-    if knownbookmarksvalues and not any(yesnoknownbookmarks):
-        ui.status_err(_("No known server bookmarks\n"))
-        # Server doesn't known any remote bookmark. That's odd and it's better
-        # to fallback to normal discovery process. Otherwise we might request
-        # too many commits from the server
-        return None
-
-    common = list(n for i, n in enumerate(publicheads) if yesnopublicheads[i])
-    common.extend(
-        (n for i, n in enumerate(knownbookmarksvalues) if yesnoknownbookmarks[i])
-    )
-
-    # If we don't know of any server commits, fall back to legacy discovery
-    if not common:
-        # If this path is hit, it will print "searching for changes" twice,
-        # which is weird. This should be very rare though, since it only happens
-        # if the client has remote names, but none of those names exist on the
-        # server (i.e. the server has been completely replaced, or stripped).
-        ui.status_err(
-            _(
-                "server has changed since last pull - falling back to the "
-                "default search strategy\n"
-            )
-        )
-        return None
-
-    ui.debug("using fastdiscovery\n")
-    if cl.tip() == nullid:
-        if srvheadhashes != [nullid]:
-            return [nullid], True, srvheadhashes
-        return ([nullid], False, [])
-
-    # early exit if we know all the specified remote heads already
-    clcontains = cl.nodemap.__contains__
-    srvheads = list(n for n in srvheadhashes if clcontains(n))
-    if len(srvheads) == len(srvheadhashes):
-        ui.debug("all remote heads known locally\n")
-        return (srvheadhashes, False, srvheadhashes)
-
-    return (common, True, srvheadhashes)
-
-
 def findcommonheads(
    ui,
    local,
@ -225,7 +145,6 @@ def findcommonheads(
    abortwhenunrelated=True,
    ancestorsof=None,
    explicitremoteheads=None,
-    needlargestcommonset=True,
 ):
    """Return a tuple (commonheads, anyincoming, remoteheads) used to
    identify missing nodes from or in remote.
@ -246,8 +165,6 @@ def findcommonheads(
      '::ancestorsof'.
    - explicitremoteheads: if not None, a list of nodes that are known existed
      on the remote server.
-    - needlargestcommonset: do not try heuristic algorithm that can categorize
-      some 'common' commits as 'missing'.

    Return values:
    - 'anyincoming' is a boolean. Its usefulness is questionable.
@ -257,38 +174,16 @@ def findcommonheads(
      remote repo. 'remoteheads' might include commit hashes unknown to the
      local repo.
    """
-
-    # fastdiscovery might returns *some* common set, but it might not be
-    # necessary the largest common set. In some cases (e.g. during `hg push`)
-    # we actually want largest common set
-    if ui.configbool("discovery", "fastdiscovery") and not needlargestcommonset:
-        res = fastdiscovery(ui, local, remote)
-        if res is not None:
-            return res
-
-    # The Rust changelog is incompatible with the old discovery logic
-    # that uses revlog details. Enforce the new discovery logic.
-    if ui.configbool("experimental", "findcommonheadsnew") or local.changelog.userust():
-        return _findcommonheadsnew(
-            ui,
-            local,
-            remote,
-            initialsamplesize,
-            fullsamplesize,
-            abortwhenunrelated,
-            ancestorsof,
-            explicitremoteheads,
-        )
-    else:
-        return _findcommonheadsold(
-            ui,
-            local,
-            remote,
-            initialsamplesize,
-            fullsamplesize,
-            abortwhenunrelated,
-            ancestorsof,
-        )
+    return _findcommonheadsnew(
+        ui,
+        local,
+        remote,
+        initialsamplesize,
+        fullsamplesize,
+        abortwhenunrelated,
+        ancestorsof,
+        explicitremoteheads,
+    )


 def _findcommonheadsnew(
--- a/eden/scm/tests/test-fastdiscovery.t
+++ b/eden/scm/tests/test-fastdiscovery.t
@ -1,74 +0,0 @@
-#chg-compatible
-
-  $ disable treemanifest
-  $ setconfig discovery.fastdiscovery=True
-  $ . $TESTDIR/library.sh
-  $ . "$TESTDIR/infinitepush/library.sh"
-
-  $ setupcommon
-
-Setup remotefilelog server
-  $ hg init server
-  $ cd server
-  $ setupserver
-  $ setconfig remotefilelog.server=true
-  $ mkcommit initial
-  $ hg bookmark master
-  $ cd ..
-
-Make client shallow clone
-  $ hgcloneshallow ssh://user@dummy/server client
-  streaming all changes
-  * files to transfer, * bytes of data (glob)
-  transferred * bytes in * seconds (* KB/sec) (glob)
-  searching for changes
-  no changes found
-  updating to branch default
-  * files updated, * files merged, * files removed, * files unresolved (glob)
-  * files fetched over * fetches - (* misses, * hit ratio) over * (glob) (?)
-
-  $ cd server
-  $ mkcommit first
-  $ mkcommit second
-  $ mkcommit third
-
-Make sure that fastdiscovery is used for pull
-  $ cd ../client
-  $ hg pull --debug 2>&1 | grep fastdiscovery
-  using fastdiscovery
-
-Make sure that fastdiscovery is used for push
-  $ hg up -q tip
-  3 files fetched over 1 fetches - (3 misses, * hit ratio) over * (glob) (?)
-  $ mkcommit clientcommit
-  $ hg push --debug 2>&1 | grep fastdiscovery || echo "no fastdiscovery"
-  no fastdiscovery
-
-Make public head on the client - fastdiscovery is NOT used because no common nodes found
-  $ mkcommit publichead
-  $ hg debugmakepublic .
-  $ hg pull
-  pulling from ssh://user@dummy/server
-  searching for changes
-  server has changed since last pull - falling back to the default search strategy
-  searching for changes
-  no changes found
-
-Set knownserverbookmarks - fastdiscovery is used
-  $ hg book -r ".^" master_bookmark
-  $ hg pull --config discovery.knownserverbookmarks=master_bookmark
-  pulling from ssh://user@dummy/server
-  searching for changes
-  no changes found
-
-  $ cd ../server
-  $ mkcommit newcommit
-  $ cd ../client
-  $ hg pull --config discovery.knownserverbookmarks=master_bookmark
-  pulling from ssh://user@dummy/server
-  searching for changes
-  adding changesets
-  adding manifests
-  adding file changes
-  added 1 changesets with 0 changes to 0 files
-  updating bookmark master