changelog: add a way to migrate up and down with zstore-commit-data format

Summary:
This makes it possible to use zstore instead of 00changelog.d to store commit
data. Right now it double writes to zstore and 00changelog.d if turned on.
In the future we can switch to only writing to zstore if 00changelog.i
alternative is available.

Some related configs were added for fallback strategy: revlog or server.
The revlog fallback allows us to do fast migration. The server fallback
(not implemented in this diff) allows us to stop depending on revlog.d.

Reviewed By: DurhamG

Differential Revision: D18737260

fbshipit-source-id: 3c9605f0babd8a215ee74bdf1275cc4e9dbd766c
This commit is contained in:
Jun Wu 2020-01-10 18:59:42 -08:00 committed by Facebook Github Bot
parent cbb65388aa
commit ffc85f7222
10 changed files with 216 additions and 6 deletions

View File

@ -1116,6 +1116,17 @@ def wraprepo(repo):
self._filecache.pop("manifestlog", None) self._filecache.pop("manifestlog", None)
self._filecache.pop("_phasecache", None) self._filecache.pop("_phasecache", None)
# hgsql only writes to revlogs. Sync revlog to zstore.
# This is for test compatibility. Do not use it in production.
if "zstorecommitdata" in self.storerequirements:
if not util.istest():
raise error.Abort(
_(
"zstore commit data is forbidden for hgsql production use due to performance concerns"
)
)
self._syncrevlogtozstore()
# Refill the cache. We can't just reuse the exact contents of # Refill the cache. We can't just reuse the exact contents of
# the old cached ctx, since the old ctx contains a reference to # the old cached ctx, since the old ctx contains a reference to
# the old revlog, which is now out of date. # the old revlog, which is now out of date.

View File

@ -311,6 +311,9 @@ coreconfigitem("format", "obsstore-version", default=None)
coreconfigitem("format", "usefncache", default=True) coreconfigitem("format", "usefncache", default=True)
coreconfigitem("format", "usegeneraldelta", default=True) coreconfigitem("format", "usegeneraldelta", default=True)
coreconfigitem("format", "usestore", default=True) coreconfigitem("format", "usestore", default=True)
coreconfigitem("format", "use-zstore-commit-data", default=False)
coreconfigitem("format", "use-zstore-commit-data-revlog-fallback", default=False)
coreconfigitem("format", "use-zstore-commit-data-server-fallback", default=False)
coreconfigitem("fsmonitor", "warn_when_unused", default=True) coreconfigitem("fsmonitor", "warn_when_unused", default=True)
coreconfigitem("fsmonitor", "warn_update_file_count", default=50000) coreconfigitem("fsmonitor", "warn_update_file_count", default=50000)
coreconfigitem("hint", "ack", default=list) coreconfigitem("hint", "ack", default=list)

View File

@ -1101,6 +1101,23 @@ Example for ``~/.hgrc``::
Enabled by default. Enabled by default.
``use-zstore-commit-data``
Use zstore (a SHA1 content store) to store commit metadata (user, date,
message, extras, but not the parent order). This makes it that
"00changelog.d" is no longer used for reading commits content. If
"00changelog.i" is still used, "00changelog.d" is still written to when
adding new commits.
``use-zstore-commit-data-revlog-fallback``
When data is not found in zstore, fallback to lookup in revlog.
``use-zstore-commit-data-server-fallback``
When data is not found in zstore, fallback to lookup from the server.
(Not implemented yet)
``dirstate`` ``dirstate``
Dirstate format version to use. One of 0 (flat dirstate), 1 Dirstate format version to use. One of 0 (flat dirstate), 1
(treedirstate), and 2 (treestate). Default is 1. (treedirstate), and 2 (treestate). Default is 1.

View File

@ -678,6 +678,13 @@ def clone(
raise raise
destlock = copystore(ui, srcrepo, destpath) destlock = copystore(ui, srcrepo, destpath)
# repo initialization might also take a lock. Keeping destlock
# outside the repo object can cause deadlock. To avoid deadlock,
# we just release destlock here. The lock will be re-acquired
# soon by `destpeer`, or `local.lock()` below.
if destlock is not None:
destlock.release()
# copy bookmarks over # copy bookmarks over
srcbookmarks = srcrepo.svfs.join("bookmarks") srcbookmarks = srcrepo.svfs.join("bookmarks")
dstbookmarks = os.path.join(destpath, "store", "bookmarks") dstbookmarks = os.path.join(destpath, "store", "bookmarks")
@ -688,11 +695,6 @@ def clone(
# into it # into it
destpeer = peer(srcrepo, peeropts, dest) destpeer = peer(srcrepo, peeropts, dest)
srcrepo.hook("outgoing", source="clone", node=node.hex(node.nullid)) srcrepo.hook("outgoing", source="clone", node=node.hex(node.nullid))
# Attach "destlock" to the repo. So 'repo.lock()' wouldn't
# deadlock. wlock needs to be taken first.
if destlock:
destlockw = destpeer.local().wlock()
destpeer.local()._lockref = weakref.ref(destlock)
else: else:
try: try:
destpeer = peer(srcrepo or ui, peeropts, dest, create=True) destpeer = peer(srcrepo or ui, peeropts, dest, create=True)
@ -755,6 +757,8 @@ def clone(
fp.close() fp.close()
destrepo.ui.setconfig("paths", "default", defaulturl, "clone") destrepo.ui.setconfig("paths", "default", defaulturl, "clone")
if destrepo.ui.configbool("format", "use-zstore-commit-data"):
destrepo._syncrevlogtozstore()
if update: if update:
if update is not True: if update is not True:

View File

@ -20,6 +20,8 @@ import random
import time import time
import weakref import weakref
# pyre-fixme[21]: Could not find `bindings`.
import bindings
from edenscm.hgext.extlib.phabricator import diffprops from edenscm.hgext.extlib.phabricator import diffprops
from . import ( from . import (
@ -52,6 +54,7 @@ from . import (
pathutil, pathutil,
peer, peer,
phases, phases,
progress,
pushkey, pushkey,
pycompat, pycompat,
repository, repository,
@ -356,7 +359,7 @@ class localrepository(object):
"treestate", "treestate",
"storerequirements", "storerequirements",
} }
_basestoresupported = {"visibleheads", "narrowheads"} _basestoresupported = {"visibleheads", "narrowheads", "zstorecommitdata"}
openerreqs = {"revlogv1", "generaldelta", "treemanifest"} openerreqs = {"revlogv1", "generaldelta", "treemanifest"}
# sets of (ui, featureset) functions for repo and store features. # sets of (ui, featureset) functions for repo and store features.
@ -588,6 +591,7 @@ class localrepository(object):
self.svfs.write(name, self.sharedvfs.read(name)) self.svfs.write(name, self.sharedvfs.read(name))
self._narrowheadsmigration() self._narrowheadsmigration()
self._zstorecommitdatamigration()
def _narrowheadsmigration(self): def _narrowheadsmigration(self):
"""Migrate if 'narrow-heads' config has changed.""" """Migrate if 'narrow-heads' config has changed."""
@ -648,6 +652,58 @@ class localrepository(object):
self.storerequirements.remove("narrowheads") self.storerequirements.remove("narrowheads")
self._writestorerequirements() self._writestorerequirements()
def _zstorecommitdatamigration(self):
"""Migrate if 'narrow-heads' config has changed."""
zstorecommitdatadesired = self.ui.configbool("format", "use-zstore-commit-data")
zstorecommitdatacurrent = "zstorecommitdata" in self.storerequirements
if zstorecommitdatadesired != zstorecommitdatacurrent:
if zstorecommitdatadesired:
# Migrating up. Read all commits in revlog and store them in
# zstore.
with self.lock():
self._syncrevlogtozstore()
self.storerequirements.add("zstorecommitdata")
self._writestorerequirements()
else:
# Migrating down is just removing the store requirement.
with self.lock():
self.storerequirements.remove("zstorecommitdata")
self._writestorerequirements()
def _syncrevlogtozstore(self):
"""Sync commit data from revlog to zstore"""
zstore = bindings.zstore.zstore(self.svfs.join("hgcommits/v1"))
self.unfiltered().changelog.zstore = zstore
if self.ui.configbool(
"format", "use-zstore-commit-data-revlog-fallback"
) or self.ui.configbool("format", "use-zstore-commit-data-server-fallback"):
return
with progress.bar(
self.ui, _("migrating commit data"), _("commits"), len(self)
) as prog:
cl = self.changelog
cl.zstore = None
textwithheader = revlog.textwithheader
clrevision = cl.revision
clparents = cl.parents
clnode = cl.node
insert = zstore.insert
contains = zstore.__contains__
for rev in self:
prog.value += 1
node = clnode(rev)
if contains(node):
continue
text = clrevision(rev)
p1, p2 = clparents(node)
newnode = insert(textwithheader(text, p1, p2))
assert node == newnode
if (rev + 1) % 1000000 == 0:
zstore.flush()
zstore.flush()
@property @property
def vfs(self): def vfs(self):
self.ui.develwarn( self.ui.develwarn(
@ -852,10 +908,15 @@ class localrepository(object):
@storecache("00changelog.i", "visibleheads", "remotenames") @storecache("00changelog.i", "visibleheads", "remotenames")
def changelog(self): def changelog(self):
def loadchangelog(self): def loadchangelog(self):
if "zstorecommitdata" in self.storerequirements:
zstore = bindings.zstore.zstore(self.svfs.join("hgcommits/v1"))
else:
zstore = None
return changelog.changelog( return changelog.changelog(
self.svfs, self.svfs,
uiconfig=self.ui.uiconfig(), uiconfig=self.ui.uiconfig(),
trypending=txnutil.mayhavesharedpending(self.root, self.sharedroot), trypending=txnutil.mayhavesharedpending(self.root, self.sharedroot),
zstore=zstore,
) )
cl = loadchangelog(self) cl = loadchangelog(self)
@ -1317,6 +1378,11 @@ class localrepository(object):
def releasefn(tr, success): def releasefn(tr, success):
repo = reporef() repo = reporef()
# Flush changelog zstore unconditionally. This makes the commit
# data available even if the transaction gets rolled back.
zstore = repo.changelog.zstore
if zstore is not None:
zstore.flush()
if success: if success:
# this should be explicitly invoked here, because # this should be explicitly invoked here, because
# in-memory changes aren't written out at closing # in-memory changes aren't written out at closing
@ -2661,4 +2727,7 @@ def newrepostorerequirements(repo):
if ui.configbool("experimental", "narrow-heads"): if ui.configbool("experimental", "narrow-heads"):
requirements.add("narrowheads") requirements.add("narrowheads")
if ui.configbool("format", "use-zstore-commit-data"):
requirements.add("zstorecommitdata")
return requirements return requirements

View File

@ -162,6 +162,9 @@ def maybeperformlegacystreamclone(pullop):
repo.invalidate() repo.invalidate()
if "zstorecommitdata" in repo.storerequirements:
repo._syncrevlogtozstore()
return True return True

View File

@ -4,6 +4,11 @@
$ SKIPREMOTEFILELOGCHECK=1 $ SKIPREMOTEFILELOGCHECK=1
$ export SKIPREMOTEFILELOGCHECK $ export SKIPREMOTEFILELOGCHECK
The fixcorrupt extension fixes pure revlog-based changelog. It is incompatible
with zstore-baked changelog.d:
$ setconfig format.use-zstore-commit-data=false
$ cat > noinline.py << EOF $ cat > noinline.py << EOF
> from edenscm.mercurial import revlog > from edenscm.mercurial import revlog
> revlog.REVLOG_DEFAULT_FLAGS = 0 > revlog.REVLOG_DEFAULT_FLAGS = 0

View File

@ -1212,6 +1212,12 @@ Separate sections from subsections
"usestore" "usestore"
"use-zstore-commit-data"
"use-zstore-commit-data-revlog-fallback"
"use-zstore-commit-data-server-fallback"
"dirstate" "dirstate"
"uselz4" "uselz4"

View File

@ -1,6 +1,11 @@
#chg-compatible #chg-compatible
$ setconfig extensions.treemanifest=! $ setconfig extensions.treemanifest=!
Verify works on revlog repos. It is incompatible with zstore-backed changelog.d:
$ setconfig format.use-zstore-commit-data=false
prepare repo prepare repo
$ hg init a $ hg init a

View File

@ -0,0 +1,87 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.
from __future__ import absolute_import
from testutil.dott import feature, sh, testtmp # noqa: F401
# Test turning zstore-commit-data on and off
sh % "setconfig format.use-zstore-commit-data=off"
sh % "newrepo"
sh % "drawdag" << r"""
B C
|/
A
"""
# Migrate up (double-writes to zstore and 00changelog.d).
sh % "setconfig format.use-zstore-commit-data=on"
sh % 'hg log -r "$C" -T "{desc}\\n"' == "C"
# Create new commits.
sh % "drawdag" << r"""
F
/|
D E
| |
desc(C)
"""
# With zstore-commit-data, 00changelog.d is not used for reading commits.
sh % "mv .hg/store/00changelog.d .hg/store/00changelog.d.bak"
sh % 'hg log -GT "{desc}"' == r"""
o F
|\
| o E
| |
o | D
|/
o C
|
| o B
|/
o A"""
# Migrate down. 00changelog.d becomes required.
sh % "setconfig format.use-zstore-commit-data=off"
sh % 'hg log -GT "{desc}"' == r"""
abort: *00changelog.d* (glob)
[255]"""
sh % "mv .hg/store/00changelog.d.bak .hg/store/00changelog.d"
sh % 'hg log -GT "{desc}"' == r"""
o F
|\
| o E
| |
o | D
|/
o C
|
| o B
|/
o A"""
# Create new commits.
sh % "drawdag" << r"""
H
|
G
|
desc(B)
"""
# Migrate up (double-writes to zstore and 00changelog.d).
sh % "setconfig format.use-zstore-commit-data=on"
sh % 'hg log -r "$H" -T "{desc}\\n"' == "H"