diff --git a/eden/scm/edenscm/hgext/hgsql.py b/eden/scm/edenscm/hgext/hgsql.py index c922ca380e..72bcb532fe 100644 --- a/eden/scm/edenscm/hgext/hgsql.py +++ b/eden/scm/edenscm/hgext/hgsql.py @@ -1116,6 +1116,17 @@ def wraprepo(repo): self._filecache.pop("manifestlog", None) self._filecache.pop("_phasecache", None) + # hgsql only writes to revlogs. Sync revlog to zstore. + # This is for test compatibility. Do not use it in production. + if "zstorecommitdata" in self.storerequirements: + if not util.istest(): + raise error.Abort( + _( + "zstore commit data is forbidden for hgsql production use due to performance concerns" + ) + ) + self._syncrevlogtozstore() + # Refill the cache. We can't just reuse the exact contents of # the old cached ctx, since the old ctx contains a reference to # the old revlog, which is now out of date. diff --git a/eden/scm/edenscm/mercurial/configitems.py b/eden/scm/edenscm/mercurial/configitems.py index 4e14670bf5..25efd932c1 100644 --- a/eden/scm/edenscm/mercurial/configitems.py +++ b/eden/scm/edenscm/mercurial/configitems.py @@ -311,6 +311,9 @@ coreconfigitem("format", "obsstore-version", default=None) coreconfigitem("format", "usefncache", default=True) coreconfigitem("format", "usegeneraldelta", default=True) coreconfigitem("format", "usestore", default=True) +coreconfigitem("format", "use-zstore-commit-data", default=False) +coreconfigitem("format", "use-zstore-commit-data-revlog-fallback", default=False) +coreconfigitem("format", "use-zstore-commit-data-server-fallback", default=False) coreconfigitem("fsmonitor", "warn_when_unused", default=True) coreconfigitem("fsmonitor", "warn_update_file_count", default=50000) coreconfigitem("hint", "ack", default=list) diff --git a/eden/scm/edenscm/mercurial/helptext.py b/eden/scm/edenscm/mercurial/helptext.py index 162890566c..449c64dd44 100644 --- a/eden/scm/edenscm/mercurial/helptext.py +++ b/eden/scm/edenscm/mercurial/helptext.py @@ -1101,6 +1101,23 @@ Example for ``~/.hgrc``:: Enabled by default. +``use-zstore-commit-data`` + + Use zstore (a SHA1 content store) to store commit metadata (user, date, + message, extras, but not the parent order). This makes it that + "00changelog.d" is no longer used for reading commits content. If + "00changelog.i" is still used, "00changelog.d" is still written to when + adding new commits. + +``use-zstore-commit-data-revlog-fallback`` + + When data is not found in zstore, fallback to lookup in revlog. + +``use-zstore-commit-data-server-fallback`` + + When data is not found in zstore, fallback to lookup from the server. + (Not implemented yet) + ``dirstate`` Dirstate format version to use. One of 0 (flat dirstate), 1 (treedirstate), and 2 (treestate). Default is 1. diff --git a/eden/scm/edenscm/mercurial/hg.py b/eden/scm/edenscm/mercurial/hg.py index bc4660113f..efe12ed707 100644 --- a/eden/scm/edenscm/mercurial/hg.py +++ b/eden/scm/edenscm/mercurial/hg.py @@ -678,6 +678,13 @@ def clone( raise destlock = copystore(ui, srcrepo, destpath) + # repo initialization might also take a lock. Keeping destlock + # outside the repo object can cause deadlock. To avoid deadlock, + # we just release destlock here. The lock will be re-acquired + # soon by `destpeer`, or `local.lock()` below. + if destlock is not None: + destlock.release() + # copy bookmarks over srcbookmarks = srcrepo.svfs.join("bookmarks") dstbookmarks = os.path.join(destpath, "store", "bookmarks") @@ -688,11 +695,6 @@ def clone( # into it destpeer = peer(srcrepo, peeropts, dest) srcrepo.hook("outgoing", source="clone", node=node.hex(node.nullid)) - # Attach "destlock" to the repo. So 'repo.lock()' wouldn't - # deadlock. wlock needs to be taken first. - if destlock: - destlockw = destpeer.local().wlock() - destpeer.local()._lockref = weakref.ref(destlock) else: try: destpeer = peer(srcrepo or ui, peeropts, dest, create=True) @@ -755,6 +757,8 @@ def clone( fp.close() destrepo.ui.setconfig("paths", "default", defaulturl, "clone") + if destrepo.ui.configbool("format", "use-zstore-commit-data"): + destrepo._syncrevlogtozstore() if update: if update is not True: diff --git a/eden/scm/edenscm/mercurial/localrepo.py b/eden/scm/edenscm/mercurial/localrepo.py index 05da21d565..ed6a87a01c 100644 --- a/eden/scm/edenscm/mercurial/localrepo.py +++ b/eden/scm/edenscm/mercurial/localrepo.py @@ -20,6 +20,8 @@ import random import time import weakref +# pyre-fixme[21]: Could not find `bindings`. +import bindings from edenscm.hgext.extlib.phabricator import diffprops from . import ( @@ -52,6 +54,7 @@ from . import ( pathutil, peer, phases, + progress, pushkey, pycompat, repository, @@ -356,7 +359,7 @@ class localrepository(object): "treestate", "storerequirements", } - _basestoresupported = {"visibleheads", "narrowheads"} + _basestoresupported = {"visibleheads", "narrowheads", "zstorecommitdata"} openerreqs = {"revlogv1", "generaldelta", "treemanifest"} # sets of (ui, featureset) functions for repo and store features. @@ -588,6 +591,7 @@ class localrepository(object): self.svfs.write(name, self.sharedvfs.read(name)) self._narrowheadsmigration() + self._zstorecommitdatamigration() def _narrowheadsmigration(self): """Migrate if 'narrow-heads' config has changed.""" @@ -648,6 +652,58 @@ class localrepository(object): self.storerequirements.remove("narrowheads") self._writestorerequirements() + def _zstorecommitdatamigration(self): + """Migrate if 'narrow-heads' config has changed.""" + zstorecommitdatadesired = self.ui.configbool("format", "use-zstore-commit-data") + zstorecommitdatacurrent = "zstorecommitdata" in self.storerequirements + if zstorecommitdatadesired != zstorecommitdatacurrent: + if zstorecommitdatadesired: + # Migrating up. Read all commits in revlog and store them in + # zstore. + with self.lock(): + self._syncrevlogtozstore() + self.storerequirements.add("zstorecommitdata") + self._writestorerequirements() + else: + # Migrating down is just removing the store requirement. + with self.lock(): + self.storerequirements.remove("zstorecommitdata") + self._writestorerequirements() + + def _syncrevlogtozstore(self): + """Sync commit data from revlog to zstore""" + zstore = bindings.zstore.zstore(self.svfs.join("hgcommits/v1")) + self.unfiltered().changelog.zstore = zstore + + if self.ui.configbool( + "format", "use-zstore-commit-data-revlog-fallback" + ) or self.ui.configbool("format", "use-zstore-commit-data-server-fallback"): + return + + with progress.bar( + self.ui, _("migrating commit data"), _("commits"), len(self) + ) as prog: + cl = self.changelog + cl.zstore = None + textwithheader = revlog.textwithheader + clrevision = cl.revision + clparents = cl.parents + clnode = cl.node + insert = zstore.insert + contains = zstore.__contains__ + for rev in self: + prog.value += 1 + node = clnode(rev) + if contains(node): + continue + text = clrevision(rev) + p1, p2 = clparents(node) + newnode = insert(textwithheader(text, p1, p2)) + assert node == newnode + if (rev + 1) % 1000000 == 0: + zstore.flush() + zstore.flush() + @property def vfs(self): self.ui.develwarn( @@ -852,10 +908,15 @@ class localrepository(object): @storecache("00changelog.i", "visibleheads", "remotenames") def changelog(self): def loadchangelog(self): + if "zstorecommitdata" in self.storerequirements: + zstore = bindings.zstore.zstore(self.svfs.join("hgcommits/v1")) + else: + zstore = None return changelog.changelog( self.svfs, uiconfig=self.ui.uiconfig(), trypending=txnutil.mayhavesharedpending(self.root, self.sharedroot), + zstore=zstore, ) cl = loadchangelog(self) @@ -1317,6 +1378,11 @@ class localrepository(object): def releasefn(tr, success): repo = reporef() + # Flush changelog zstore unconditionally. This makes the commit + # data available even if the transaction gets rolled back. + zstore = repo.changelog.zstore + if zstore is not None: + zstore.flush() if success: # this should be explicitly invoked here, because # in-memory changes aren't written out at closing @@ -2661,4 +2727,7 @@ def newrepostorerequirements(repo): if ui.configbool("experimental", "narrow-heads"): requirements.add("narrowheads") + if ui.configbool("format", "use-zstore-commit-data"): + requirements.add("zstorecommitdata") + return requirements diff --git a/eden/scm/edenscm/mercurial/streamclone.py b/eden/scm/edenscm/mercurial/streamclone.py index 5c6ab7553f..e0c064e435 100644 --- a/eden/scm/edenscm/mercurial/streamclone.py +++ b/eden/scm/edenscm/mercurial/streamclone.py @@ -162,6 +162,9 @@ def maybeperformlegacystreamclone(pullop): repo.invalidate() + if "zstorecommitdata" in repo.storerequirements: + repo._syncrevlogtozstore() + return True diff --git a/eden/scm/tests/test-fb-hgext-fixcorrupt.t b/eden/scm/tests/test-fb-hgext-fixcorrupt.t index 013cfd17b0..8be47f1347 100644 --- a/eden/scm/tests/test-fb-hgext-fixcorrupt.t +++ b/eden/scm/tests/test-fb-hgext-fixcorrupt.t @@ -4,6 +4,11 @@ $ SKIPREMOTEFILELOGCHECK=1 $ export SKIPREMOTEFILELOGCHECK +The fixcorrupt extension fixes pure revlog-based changelog. It is incompatible +with zstore-baked changelog.d: + + $ setconfig format.use-zstore-commit-data=false + $ cat > noinline.py << EOF > from edenscm.mercurial import revlog > revlog.REVLOG_DEFAULT_FLAGS = 0 diff --git a/eden/scm/tests/test-help.t b/eden/scm/tests/test-help.t index 7fdd413cfb..50b54b1528 100644 --- a/eden/scm/tests/test-help.t +++ b/eden/scm/tests/test-help.t @@ -1212,6 +1212,12 @@ Separate sections from subsections "usestore" + "use-zstore-commit-data" + + "use-zstore-commit-data-revlog-fallback" + + "use-zstore-commit-data-server-fallback" + "dirstate" "uselz4" diff --git a/eden/scm/tests/test-verify.t b/eden/scm/tests/test-verify.t index 4048a5b357..425dec5ced 100644 --- a/eden/scm/tests/test-verify.t +++ b/eden/scm/tests/test-verify.t @@ -1,6 +1,11 @@ #chg-compatible $ setconfig extensions.treemanifest=! + +Verify works on revlog repos. It is incompatible with zstore-backed changelog.d: + + $ setconfig format.use-zstore-commit-data=false + prepare repo $ hg init a diff --git a/eden/scm/tests/test-zstore-commit-data-migration-t.py b/eden/scm/tests/test-zstore-commit-data-migration-t.py new file mode 100644 index 0000000000..9fec034706 --- /dev/null +++ b/eden/scm/tests/test-zstore-commit-data-migration-t.py @@ -0,0 +1,87 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2. + +from __future__ import absolute_import + +from testutil.dott import feature, sh, testtmp # noqa: F401 + + +# Test turning zstore-commit-data on and off + +sh % "setconfig format.use-zstore-commit-data=off" + +sh % "newrepo" +sh % "drawdag" << r""" +B C +|/ +A +""" + +# Migrate up (double-writes to zstore and 00changelog.d). + +sh % "setconfig format.use-zstore-commit-data=on" +sh % 'hg log -r "$C" -T "{desc}\\n"' == "C" + +# Create new commits. + +sh % "drawdag" << r""" + F + /| +D E +| | +desc(C) +""" + +# With zstore-commit-data, 00changelog.d is not used for reading commits. + +sh % "mv .hg/store/00changelog.d .hg/store/00changelog.d.bak" +sh % 'hg log -GT "{desc}"' == r""" + o F + |\ + | o E + | | + o | D + |/ + o C + | + | o B + |/ + o A""" + +# Migrate down. 00changelog.d becomes required. + +sh % "setconfig format.use-zstore-commit-data=off" +sh % 'hg log -GT "{desc}"' == r""" + abort: *00changelog.d* (glob) + [255]""" + +sh % "mv .hg/store/00changelog.d.bak .hg/store/00changelog.d" +sh % 'hg log -GT "{desc}"' == r""" + o F + |\ + | o E + | | + o | D + |/ + o C + | + | o B + |/ + o A""" + +# Create new commits. + +sh % "drawdag" << r""" +H +| +G +| +desc(B) +""" + +# Migrate up (double-writes to zstore and 00changelog.d). + +sh % "setconfig format.use-zstore-commit-data=on" +sh % 'hg log -r "$H" -T "{desc}\\n"' == "H"